piskvorky · menshikh-iv · May 25, 2017 · May 15, 2017 · May 16, 2017 · May 16, 2017
diff --git a/gensim/models/ldamodel.py b/gensim/models/ldamodel.py
@@ -1004,6 +1004,11 @@ def save(self, fname, ignore=['state', 'dispatcher'], separately=None, *args, **
         """
         if self.state is not None:
             self.state.save(utils.smart_extension(fname, '.state'), *args, **kwargs)
+
+        # Save 'random_state' separately
+        if self.random_state is not None:
+            utils.pickle(self.random_state, utils.smart_extension(fname, '.random_state'))
+
         # Save the dictionary separately if not in 'ignore'.
         if 'id2word' not in ignore:
             utils.pickle(self.id2word, utils.smart_extension(fname, '.id2word'))
@@ -1023,9 +1028,9 @@ def save(self, fname, ignore=['state', 'dispatcher'], separately=None, *args, **
         separately_explicit = ['expElogbeta', 'sstats']
         # Also add 'alpha' and 'eta' to separately list if they are set 'auto' or some
         # array manually.
-        if (isinstance(self.alpha, six.string_types) and self.alpha == 'auto') or len(self.alpha.shape) != 1:
+        if (isinstance(self.alpha, six.string_types) and self.alpha == 'auto') or (isinstance(self.alpha, np.ndarray) and len(self.alpha.shape) != 1):
             separately_explicit.append('alpha')
-        if (isinstance(self.eta, six.string_types) and self.eta == 'auto') or len(self.eta.shape) != 1:
+        if (isinstance(self.eta, six.string_types) and self.eta == 'auto') or (isinstance(self.eta, np.ndarray) and len(self.eta.shape) != 1):
             separately_explicit.append('eta')
         # Merge separately_explicit with separately.
         if separately:
@@ -1054,13 +1059,22 @@ def load(cls, fname, *args, **kwargs):
             result.state = super(LdaModel, cls).load(state_fname, *args, **kwargs)
         except Exception as e:
             logging.warning("failed to load state from %s: %s", state_fname, e)
-        id2word_fname = utils.smart_extension(fname, '.id2word')
-        if (os.path.isfile(id2word_fname)):
-            try:
-                result.id2word = utils.unpickle(id2word_fname)
-            except Exception as e:
-                logging.warning("failed to load id2word dictionary from %s: %s", id2word_fname, e)
+
+        random_state_fname = utils.smart_extension(fname, '.random_state')
+        if (os.path.isfile(random_state_fname)):
+            result.random_state = utils.unpickle(random_state_fname)
         else:
-            result.id2word = None
+            logging.warning("random_state not stored on disk so using default value")
+            result.random_state = utils.get_random_state(None)
+
+        if not result.id2word:
+            id2word_fname = utils.smart_extension(fname, '.id2word')
+            if (os.path.isfile(id2word_fname)):
+                try:
+                    result.id2word = utils.unpickle(id2word_fname)
+                except Exception as e:
+                    logging.warning("failed to load id2word dictionary from %s: %s", id2word_fname, e)
+            else:
+                result.id2word = None
         return result
 # endclass LdaModel
diff --git a/gensim/test/test_data/pre_0_13_2_model b/gensim/test/test_data/pre_0_13_2_model
diff --git a/gensim/test/test_data/pre_0_13_2_model.state b/gensim/test/test_data/pre_0_13_2_model.state
diff --git a/gensim/test/test_ldamodel.py b/gensim/test/test_ldamodel.py
@@ -46,7 +46,7 @@
 
 def testfile(test_fname=''):
     # temporary data will be stored to this file
-    fname = 'gensim_models_' + test_fname + '.tst' 
+    fname = 'gensim_models_' + test_fname + '.tst'
     return os.path.join(tempfile.gettempdir(), fname)
 
 
@@ -247,9 +247,9 @@ def testGetDocumentTopics(self):
 
         #Test case to use the get_document_topic function for the corpus
         all_topics = model.get_document_topics(self.corpus, per_word_topics=True)
-        
+
         self.assertEqual(model.state.numdocs, len(corpus))
-        
+
         for topic in all_topics:
             self.assertTrue(isinstance(topic, tuple))
             for k, v in topic[0]: # list of doc_topics
@@ -269,9 +269,9 @@ def testGetDocumentTopics(self):
         word_phi_count_na = 0
 
         all_topics = model.get_document_topics(self.corpus, minimum_probability=0.8, minimum_phi_value=1.0, per_word_topics=True)
-        
+
         self.assertEqual(model.state.numdocs, len(corpus))
-        
+
         for topic in all_topics:
             self.assertTrue(isinstance(topic, tuple))
             for k, v in topic[0]: # list of doc_topics
@@ -470,6 +470,29 @@ def testLargeMmapCompressed(self):
         # test loading the large model arrays with mmap
         self.assertRaises(IOError, self.class_.load, fname, mmap='r')
 
+    def testId2WordBackwardCompatibility(self):
+        # load a model saved using a pre-0.13.2 version of Gensim
+        pre_0_13_2_fname = datapath('pre_0_13_2_model')
+        model_pre_0_13_2 = self.class_.load(pre_0_13_2_fname)
+
+        model_topics = model_pre_0_13_2.print_topics(num_topics=3, num_words=3)
+
+        for i in model_topics:
+            self.assertTrue(isinstance(i[0], int))
+            self.assertTrue(isinstance(i[1], six.string_types))
+
+    def testRandomStateBackwardCompatibility(self):
+        # load a model saved using a pre-0.13.2 version of Gensim
+        pre_0_13_2_fname = datapath('pre_0_13_2_model')
+        model_pre_0_13_2 = self.class_.load(pre_0_13_2_fname)
+
+        # set `num_topics` less than `model_pre_0_13_2.num_topics` so that `model_pre_0_13_2.random_state` is used
+        model_topics = model_pre_0_13_2.print_topics(num_topics=2, num_words=3)
+
+        for i in model_topics:
+            self.assertTrue(isinstance(i[0], int))
+            self.assertTrue(isinstance(i[1], six.string_types))
+
 #endclass TestLdaModel