piskvorky · mpenkov · Mar 7, 2019 · Feb 6, 2019 · Feb 6, 2019 · Feb 15, 2019
diff --git a/gensim/models/fasttext.py b/gensim/models/fasttext.py
@@ -171,27 +171,28 @@
 .. sourcecode:: pycon
 
     >>> cap_path = datapath("crime-and-punishment.bin")
-    >>> # Partial model: loads quickly, uses less RAM, but cannot continue training
-    >>> fb_partial = FastText.load_fasttext_format(cap_path, full_model=False)
-    >>> # Full model: loads slowly, consumes RAM, but can continue training (see below)
-    >>> fb_full = FastText.load_fasttext_format(cap_path, full_model=True)
+    >>> fb_model = FastText.load_fasttext_format(cap_path)
 
 Once loaded, such models behave identically to those trained from scratch.
 You may continue training them on new data:
 
 .. sourcecode:: pycon
 
-    >>> 'computer' in fb_full.wv.vocab  # New word, currently out of vocab
+    >>> 'computer' in fb_model.wv.vocab  # New word, currently out of vocab
     False
-    >>> old_computer = np.copy(fb_full.wv['computer'])  # Calculate current vectors
-    >>> fb_full.build_vocab(new_sentences, update=True)
-    >>> fb_full.train(new_sentences, total_examples=len(new_sentences), epochs=model.epochs)
-    >>> new_computer = fb_full.wv['computer']
+    >>> old_computer = np.copy(fb_model.wv['computer'])  # Calculate current vectors
+    >>> fb_model.build_vocab(new_sentences, update=True)
+    >>> fb_model.train(new_sentences, total_examples=len(new_sentences), epochs=model.epochs)
+    >>> new_computer = fb_model.wv['computer']
     >>> np.allclose(old_computer, new_computer, atol=1e-4)  # Vector has changed, model has learnt something
     False
-    >>> 'computer' in fb_full.wv.vocab  # New word is now in the vocabulary
+    >>> 'computer' in fb_model.wv.vocab  # New word is now in the vocabulary
     True
 
+If you do not intend to continue training the model, consider using the
+:meth:`gensim.models.KeyedVectors.from_fasttext_format` method instead.
+That method only loads the word embeddings, consuming much less CPU and RAM.
+
 Retrieve word-vector for vocab and out-of-vocab word:
 
 .. sourcecode:: pycon
@@ -879,14 +880,6 @@ def train(self, sentences=None, corpus_file=None, total_examples=None, total_wor
             >>> model.train(sentences, total_examples=model.corpus_count, epochs=model.epochs)
 
         """
-        cant_train = hasattr(self.trainables, 'syn1neg') and self.trainables.syn1neg is None
-        if cant_train:
-            raise ValueError(
-                'this model cannot be trained any further, '
-                'if this is a native model, try loading it with '
-                'FastText.load_fasttext_format(path, full_model=True)'
-            )
-
         super(FastText, self).train(
             sentences=sentences, corpus_file=corpus_file, total_examples=total_examples, total_words=total_words,
             epochs=epochs, start_alpha=start_alpha, end_alpha=end_alpha, word_count=word_count,
@@ -937,13 +930,14 @@ def __contains__(self, word):
         return self.wv.__contains__(word)
 
     @classmethod
-    def load_fasttext_format(cls, model_file, encoding='utf8', full_model=True):
+    def load_fasttext_format(cls, model_file, encoding='utf8'):
         """Load the input-hidden weight matrix from Facebook's native fasttext `.bin` and `.vec` output files.
 
         By default, this function loads the full model.  A full model allows
         continuing training with more data, but also consumes more RAM and
         takes longer to load.  If you do not need to continue training and only
-        wish the work with the already-trained embeddings, use `full_model=False`
+        wish the work with the already-trained embeddings, use
+        :meth:`gensim.models.KeyedVectors.from_fasttext_format`
         for faster loading and to save RAM.
 
         Notes
@@ -963,9 +957,6 @@ def load_fasttext_format(cls, model_file, encoding='utf8', full_model=True):
             as Gensim requires only `.bin` file to the load entire fastText model.
         encoding : str, optional
             Specifies the file encoding.
-        full_model : boolean, optional
-            If False, skips loading the hidden output matrix. This saves a fair bit
-            of CPU time and RAM, but **prevents training continuation**.
 
         Examples
         --------
@@ -977,41 +968,27 @@ def load_fasttext_format(cls, model_file, encoding='utf8', full_model=True):
             >>> from gensim.test.utils import datapath
             >>>
             >>> cap_path = datapath("crime-and-punishment.bin")
-            >>> fb_full = FastText.load_fasttext_format(cap_path, full_model=True)
+            >>> fb_model = FastText.load_fasttext_format(cap_path)
             >>>
-            >>> 'landlord' in fb_full.wv.vocab  # Word is out of vocabulary
+            >>> 'landlord' in fb_model.wv.vocab  # Word is out of vocabulary
             False
-            >>> oov_term = fb_full.wv['landlord']
+            >>> oov_term = fb_model.wv['landlord']
             >>>
-            >>> 'landlady' in fb_full.wv.vocab  # Word is in the vocabulary
+            >>> 'landlady' in fb_model.wv.vocab  # Word is in the vocabulary
             True
-            >>> iv_term = fb_full.wv['landlady']
+            >>> iv_term = fb_model.wv['landlady']
             >>>
             >>> new_sent = [['lord', 'of', 'the', 'rings'], ['lord', 'of', 'the', 'flies']]
-            >>> fb_full.build_vocab(new_sent, update=True)
-            >>> fb_full.train(sentences=new_sent, total_examples=len(new_sent), epochs=5)
-
-        Load quickly, infer (forego training continuation):
-
-        .. sourcecode:: pycon
-
-            >>> fb_partial = FastText.load_fasttext_format(cap_path, full_model=False)
-            >>>
-            >>> 'landlord' in fb_partial.wv.vocab  # Word is out of vocabulary
-            False
-            >>> oov_term = fb_partial.wv['landlord']
-            >>>
-            >>> 'landlady' in fb_partial.wv.vocab  # Word is in the vocabulary
-            True
-            >>> iv_term = fb_partial.wv['landlady']
+            >>> fb_model.build_vocab(new_sent, update=True)
+            >>> fb_model.train(sentences=new_sent, total_examples=len(new_sent), epochs=5)
 
         Returns
         -------
         gensim.models.fasttext.FastText
             The loaded model.
 
         """
-        return _load_fasttext_format(model_file, encoding=encoding, full_model=full_model)
+        return _load_fasttext_format(model_file, encoding=encoding, full_model=True)
 
     def load_binary_data(self, encoding='utf8'):
         """Load data from a binary file created by Facebook's native FastText.

diff --git a/gensim/models/keyedvectors.py b/gensim/models/keyedvectors.py
@@ -86,6 +86,23 @@
     >>> wv_from_text = KeyedVectors.load_word2vec_format(datapath('word2vec_pre_kv_c'), binary=False)  # C text format
     >>> wv_from_bin = KeyedVectors.load_word2vec_format(datapath("euclidean_vectors.bin"), binary=True)  # C bin format
 
+You can also load vectors from Facebook's fastText binary format:
+
+.. sourcecode:: pycon
+
+    >>> from gensim.test.utils import datapath
+    >>>
+    >>> cap_path = datapath("crime-and-punishment.bin")
+    >>> fbkv = KeyedVectors.load_fasttext_format(cap_path)
+    >>>
+    >>> 'landlord' in fbkv.vocab  # Word is out of vocabulary
+    False
+    >>> oov_vector = fbkv['landlord']
+    >>>
+    >>> 'landlady' in fbkv.vocab  # Word is in the vocabulary
+    True
+    >>> iv_vector = fbkv['landlady']
+
 What can I do with word vectors?
 ================================
 
@@ -1475,6 +1492,52 @@ def load_word2vec_format(cls, fname, fvocab=None, binary=False, encoding='utf8',
             cls, fname, fvocab=fvocab, binary=binary, encoding=encoding, unicode_errors=unicode_errors,
             limit=limit, datatype=datatype)
 
+    @staticmethod
+    def load_fasttext_format(path, encoding='utf-8'):
+        """Load word embeddings from a model saved in Facebook's native fasttext `.bin` format.
+
+        Parameters
+        ----------
+        path : str
+            The location of the model file.
+        encoding : str, optional
+            Specifies the file encoding.
+
+        Returns
+        -------
+        gensim.models.keyedvectors.FastTextKeyedVectors
+            The word embeddings.
+
+        Examples
+        --------
+
+        Load and infer:
+
+            >>> from gensim.test.utils import datapath
+            >>>
+            >>> cap_path = datapath("crime-and-punishment.bin")
+            >>> fbkv = KeyedVectors.load_fasttext_format(cap_path)
+            >>>
+            >>> 'landlord' in fbkv.vocab  # Word is out of vocabulary
+            False
+            >>> oov_vector = fbkv['landlord']
+            >>>
+            >>> 'landlady' in fbkv.vocab  # Word is in the vocabulary
+            True
+            >>> iv_vector = fbkv['landlady']
+
+        See Also
+        --------
+
+        :meth:`gensim.models.fasttext.FastText.load_fasttext_format` loads
+        the full model, not just word embeddings, and enables you to continue
+        model training.
+
+        """
+        from gensim.models.fasttext import _load_fasttext_format
+        model_wrapper = _load_fasttext_format(path, full_model=False, encoding=encoding)
+        return model_wrapper.wv
+
     def get_keras_embedding(self, train_embeddings=False):
         """Get a Keras 'Embedding' layer with weights set as the Word2Vec model's learned word embeddings.
 

diff --git a/gensim/test/test_fasttext.py b/gensim/test/test_fasttext.py
@@ -59,21 +59,6 @@ def setUp(self):
         self.test_model = FT_gensim.load_fasttext_format(self.test_model_file)
         self.test_new_model_file = datapath('lee_fasttext_new')
 
-    def test_native_partial_model(self):
-        """Can we skip loading the NN and still get a working model?"""
-        model = FT_gensim.load_fasttext_format(self.test_model_file, full_model=False)
-
-        #
-        # Training continuation should be impossible
-        #
-        self.assertIsNone(model.trainables.syn1neg)
-        self.assertRaises(ValueError, model.train, sentences,
-                          total_examples=model.corpus_count, epochs=model.epochs)
-
-        model.wv['green']
-        model.wv['foobar']
-        model.wv['thisworddoesnotexist']
-
     def test_training(self):
         model = FT_gensim(size=10, min_count=1, hs=1, negative=0, seed=42, workers=1)
         model.build_vocab(sentences)

diff --git a/gensim/test/test_keyedvectors.py b/gensim/test/test_keyedvectors.py
@@ -306,6 +306,17 @@ def test(self):
         self.assertTrue(np.allclose(m, norm))
 
 
+class LoadFastTextFormatTest(unittest.TestCase):
+    def test(self):
+        cap_path = datapath("crime-and-punishment.bin")
+        fbkv = gensim.models.keyedvectors.KeyedVectors.load_fasttext_format(cap_path)
+        self.assertFalse('landlord' in fbkv.vocab)
+        self.assertTrue('landlady' in fbkv.vocab)
+        oov_vector = fbkv['landlord']
+        iv_vector = fbkv['landlady']
+        self.assertFalse(np.allclose(oov_vector, iv_vector))
+
+
 if __name__ == '__main__':
     logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG)
     unittest.main()