piskvorky · mpenkov · May 5, 2021 · Apr 22, 2021 · Apr 22, 2021 · May 5, 2021
diff --git a/gensim/models/atmodel.py b/gensim/models/atmodel.py
@@ -188,12 +188,24 @@ def __init__(self, corpus=None, num_topics=100, id2word=None, author2doc=None, d
             Controls how old documents are forgotten.
         offset : float, optional
             Controls down-weighting of iterations.
-        alpha : float, optional
-            Hyperparameters for author-topic model.Supports special values of 'asymmetric'
-            and 'auto': the former uses a fixed normalized asymmetric 1.0/topicno prior,
-            the latter learns an asymmetric prior directly from your data.
-        eta : float, optional
-            Hyperparameters for author-topic model.
+        alpha : {float, numpy.ndarray of float, list of float, str}, optional
+            A-priori belief on document-topic distribution, this can be:
+                * scalar for a symmetric prior over document-topic distribution,
+                * 1D array of length equal to num_topics to denote an asymmetric user defined prior for each topic.
+
+            Alternatively default prior selecting strategies can be employed by supplying a string:
+                * 'symmetric': (default) Uses a fixed symmetric prior of `1.0 / num_topics`,
+                * 'asymmetric': Uses a fixed normalized asymmetric prior of `1.0 / (topic_index + sqrt(num_topics))`,
+                * 'auto': Learns an asymmetric prior from the corpus (not available if `distributed==True`).
+        eta : {float, numpy.ndarray of float, list of float, str}, optional
+            A-priori belief on topic-word distribution, this can be:
+                * scalar for a symmetric prior over topic-word distribution,
+                * 1D array of length equal to num_words to denote an asymmetric user defined prior for each word,
+                * matrix of shape (num_topics, num_words) to assign a probability for each word-topic combination.
+
+            Alternatively default prior selecting strategies can be employed by supplying a string:
+                * 'symmetric': (default) Uses a fixed symmetric prior of `1.0 / num_topics`,
+                * 'auto': Learns an asymmetric prior from the corpus.
         update_every : int, optional
             Make updates in topic probability for latest mini-batch.
         eval_every : int, optional
@@ -279,23 +291,17 @@ def __init__(self, corpus=None, num_topics=100, id2word=None, author2doc=None, d
         self.init_empty_corpus()
 
         self.alpha, self.optimize_alpha = self.init_dir_prior(alpha, 'alpha')
-
         assert self.alpha.shape == (self.num_topics,), \
             "Invalid alpha shape. Got shape %s, but expected (%d, )" % (str(self.alpha.shape), self.num_topics)
 
-        if isinstance(eta, str):
-            if eta == 'asymmetric':
-                raise ValueError("The 'asymmetric' option cannot be used for eta")
-
         self.eta, self.optimize_eta = self.init_dir_prior(eta, 'eta')
-
-        self.random_state = utils.get_random_state(random_state)
-
         assert (self.eta.shape == (self.num_terms,) or self.eta.shape == (self.num_topics, self.num_terms)), (
             "Invalid eta shape. Got shape %s, but expected (%d, 1) or (%d, %d)" %
             (str(self.eta.shape), self.num_terms, self.num_topics, self.num_terms)
         )
 
+        self.random_state = utils.get_random_state(random_state)
+
         # VB constants
         self.iterations = iterations
         self.gamma_threshold = gamma_threshold

diff --git a/gensim/models/ldamodel.py b/gensim/models/ldamodel.py
@@ -374,21 +374,24 @@ def __init__(self, corpus=None, num_topics=100, id2word=None,
         update_every : int, optional
             Number of documents to be iterated through for each update.
             Set to 0 for batch learning, > 1 for online iterative learning.
-        alpha : {numpy.ndarray, str}, optional
-            Can be set to an 1D array of length equal to the number of expected topics that expresses
-            our a-priori belief for each topics' probability.
-            Alternatively default prior selecting strategies can be employed by supplying a string:
+        alpha : {float, numpy.ndarray of float, list of float, str}, optional
+            A-priori belief on document-topic distribution, this can be:
+                * scalar for a symmetric prior over document-topic distribution,
+                * 1D array of length equal to num_topics to denote an asymmetric user defined prior for each topic.
 
-                * 'symmetric': Default; uses a fixed symmetric prior per topic,
+            Alternatively default prior selecting strategies can be employed by supplying a string:
+                * 'symmetric': (default) Uses a fixed symmetric prior of `1.0 / num_topics`,
                 * 'asymmetric': Uses a fixed normalized asymmetric prior of `1.0 / (topic_index + sqrt(num_topics))`,
                 * 'auto': Learns an asymmetric prior from the corpus (not available if `distributed==True`).
-        eta : {float, np.array, str}, optional
-            A-priori belief on word probability, this can be:
+        eta : {float, numpy.ndarray of float, list of float, str}, optional
+            A-priori belief on topic-word distribution, this can be:
+                * scalar for a symmetric prior over topic-word distribution,
+                * 1D array of length equal to num_words to denote an asymmetric user defined prior for each word,
+                * matrix of shape (num_topics, num_words) to assign a probability for each word-topic combination.
 
-                * scalar for a symmetric prior over topic/word probability,
-                * vector of length num_words to denote an asymmetric user defined probability for each word,
-                * matrix of shape (num_topics, num_words) to assign a probability for each word-topic combination,
-                * the string 'auto' to learn the asymmetric prior from the data.
+            Alternatively default prior selecting strategies can be employed by supplying a string:
+                * 'symmetric': (default) Uses a fixed symmetric prior of `1.0 / num_topics`,
+                * 'auto': Learns an asymmetric prior from the corpus.
         decay : float, optional
             A number between (0.5, 1] to weight what percentage of the previous lambda value is forgotten
             when each new document is examined. Corresponds to Kappa from
@@ -409,7 +412,7 @@ def __init__(self, corpus=None, num_topics=100, id2word=None,
         random_state : {np.random.RandomState, int}, optional
             Either a randomState object or a seed to generate one. Useful for reproducibility.
         ns_conf : dict of (str, object), optional
-            Key word parameters propagated to :func:`gensim.utils.getNS` to get a Pyro4 Nameserved.
+            Key word parameters propagated to :func:`gensim.utils.getNS` to get a Pyro4 nameserver.
             Only used if `distributed` is set to True.
         minimum_phi_value : float, optional
             if `per_word_topics` is True, this represents a lower bound on the term probabilities.
@@ -459,22 +462,16 @@ def __init__(self, corpus=None, num_topics=100, id2word=None,
         self.callbacks = callbacks
 
         self.alpha, self.optimize_alpha = self.init_dir_prior(alpha, 'alpha')
-
         assert self.alpha.shape == (self.num_topics,), \
             "Invalid alpha shape. Got shape %s, but expected (%d, )" % (str(self.alpha.shape), self.num_topics)
 
-        if isinstance(eta, str):
-            if eta == 'asymmetric':
-                raise ValueError("The 'asymmetric' option cannot be used for eta")
-
         self.eta, self.optimize_eta = self.init_dir_prior(eta, 'eta')
-
-        self.random_state = utils.get_random_state(random_state)
-
         assert self.eta.shape == (self.num_terms,) or self.eta.shape == (self.num_topics, self.num_terms), (
             "Invalid eta shape. Got shape %s, but expected (%d, 1) or (%d, %d)" %
             (str(self.eta.shape), self.num_terms, self.num_topics, self.num_terms))
 
+        self.random_state = utils.get_random_state(random_state)
+
         # VB constants
         self.iterations = iterations
         self.gamma_threshold = gamma_threshold
@@ -531,24 +528,36 @@ def init_dir_prior(self, prior, name):
 
         Parameters
         ----------
-        prior : {str, list of float, numpy.ndarray of float, float}
-            A-priori belief on word probability. If `name` == 'eta' then the prior can be:
+        prior : {float, numpy.ndarray of float, list of float, str}
+            A-priori belief on document-topic distribution. If `name` == 'alpha', then the prior can be:
+                * scalar for a symmetric prior over document-topic distribution,
+                * 1D array of length equal to num_topics to denote an asymmetric user defined prior for each topic.
 
-                * scalar for a symmetric prior over topic/word probability,
-                * vector of length num_words to denote an asymmetric user defined probability for each word,
-                * matrix of shape (num_topics, num_words) to assign a probability for each word-topic combination,
-                * the string 'auto' to learn the asymmetric prior from the data.
+            Alternatively default prior selecting strategies can be employed by supplying a string:
+                * 'symmetric': (default) Uses a fixed symmetric prior of `1.0 / num_topics`,
+                * 'asymmetric': Uses a fixed normalized asymmetric prior of `1.0 / (topic_index + sqrt(num_topics))`,
+                * 'auto': Learns an asymmetric prior from the corpus (not available if `distributed==True`).
 
-            If `name` == 'alpha', then the prior can be:
+            A-priori belief on topic-word distribution. If `name` == 'eta' then the prior can be:
+                * scalar for a symmetric prior over topic-word distribution,
+                * 1D array of length equal to num_words to denote an asymmetric user defined prior for each word,
+                * matrix of shape (num_topics, num_words) to assign a probability for each word-topic combination.
 
-                * an 1D array of length equal to the number of expected topics,
-                * 'symmetric': Uses a fixed symmetric prior per topic,
-                * 'asymmetric': Uses a fixed normalized asymmetric prior of `1.0 / (topic_index + sqrt(num_topics))`,
+            Alternatively default prior selecting strategies can be employed by supplying a string:
+                * 'symmetric': (default) Uses a fixed symmetric prior of `1.0 / num_topics`,
                 * 'auto': Learns an asymmetric prior from the corpus.
         name : {'alpha', 'eta'}
             Whether the `prior` is parameterized by the alpha vector (1 parameter per topic)
             or by the eta (1 parameter per unique term in the vocabulary).
 
+        Returns
+        -------
+        init_prior: numpy.ndarray
+            Initialized Dirichlet prior:
+            If 'alpha' was provided as `name` the shape is (self.num_topics, ).
+            If 'eta' was provided as `name` the shape is (len(self.id2word), ).
+        is_auto: bool
+            Flag that shows if hyperparameter optimization should be used or not.
         """
         if prior is None:
             prior = 'symmetric'
@@ -570,6 +579,8 @@ def init_dir_prior(self, prior, name):
                     dtype=self.dtype, count=prior_shape,
                 )
             elif prior == 'asymmetric':
+                if name == 'eta':
+                    raise ValueError("The 'asymmetric' option cannot be used for eta")
                 init_prior = np.fromiter(
                     (1.0 / (i + np.sqrt(prior_shape)) for i in range(prior_shape)),
                     dtype=self.dtype, count=prior_shape,

diff --git a/gensim/models/ldamulticore.py b/gensim/models/ldamulticore.py
@@ -128,19 +128,23 @@ def __init__(self, corpus=None, num_topics=100, id2word=None, workers=None,
             Number of documents to be used in each training chunk.
         passes : int, optional
             Number of passes through the corpus during training.
-        alpha : {np.ndarray, str}, optional
-            Can be set to an 1D array of length equal to the number of expected topics that expresses
-            our a-priori belief for the each topics' probability.
-            Alternatively default prior selecting strategies can be employed by supplying a string:
+        alpha : {float, numpy.ndarray of float, list of float, str}, optional
+            A-priori belief on document-topic distribution, this can be:
+                * scalar for a symmetric prior over document-topic distribution,
+                * 1D array of length equal to num_topics to denote an asymmetric user defined prior for each topic.
 
-                * 'asymmetric': Uses a fixed normalized asymmetric prior of `1.0 / topicno`.
-        eta : {float, np.array, str}, optional
-            A-priori belief on word probability, this can be:
+            Alternatively default prior selecting strategies can be employed by supplying a string:
+                * 'symmetric': (default) Uses a fixed symmetric prior of `1.0 / num_topics`,
+                * 'asymmetric': Uses a fixed normalized asymmetric prior of `1.0 / (topic_index + sqrt(num_topics))`.
+        eta : {float, numpy.ndarray of float, list of float, str}, optional
+            A-priori belief on topic-word distribution, this can be:
+                * scalar for a symmetric prior over topic-word distribution,
+                * 1D array of length equal to num_words to denote an asymmetric user defined prior for each word,
+                * matrix of shape (num_topics, num_words) to assign a probability for each word-topic combination.
 
-                * scalar for a symmetric prior over topic/word probability,
-                * vector of length num_words to denote an asymmetric user defined probability for each word,
-                * matrix of shape (num_topics, num_words) to assign a probability for each word-topic combination,
-                * the string 'auto' to learn the asymmetric prior from the data.
+            Alternatively default prior selecting strategies can be employed by supplying a string:
+                * 'symmetric': (default) Uses a fixed symmetric prior of `1.0 / num_topics`,
+                * 'auto': Learns an asymmetric prior from the corpus.
         decay : float, optional
             A number between (0.5, 1] to weight what percentage of the previous lambda value is forgotten
             when each new document is examined. Corresponds to Kappa from
@@ -174,7 +178,7 @@ def __init__(self, corpus=None, num_topics=100, id2word=None, workers=None,
         self.batch = batch
 
         if isinstance(alpha, str) and alpha == 'auto':
-            raise NotImplementedError("auto-tuning alpha not implemented in multicore LDA; use plain LdaModel.")
+            raise NotImplementedError("auto-tuning alpha not implemented in LdaMulticore; use plain LdaModel.")
 
         super(LdaMulticore, self).__init__(
             corpus=corpus, num_topics=num_topics,