From e69fff895d309082400b97530c0821d31ffea734 Mon Sep 17 00:00:00 2001
From: Gordon Mohr <gojogit@gmail.com>
Date: Tue, 17 Dec 2019 18:33:26 -0800
Subject: [PATCH] rm obsolete Vocab/Trainable/abstract/Wrapper classes,
 persistent callbacks (bug #2136), outdated tests/warnings; update usages

---
 docs/src/apiref.rst                           |    7 -
 docs/src/models/base_any2vec.rst              |   10 -
 docs/src/models/deprecated/doc2vec.rst        |    9 -
 docs/src/models/deprecated/fasttext.rst       |   10 -
 .../models/deprecated/fasttext_wrapper.rst    |   10 -
 docs/src/models/deprecated/keyedvectors.rst   |    9 -
 docs/src/models/deprecated/word2vec.rst       |    9 -
 docs/src/models/wrappers/fasttext.rst         |    9 -
 gensim/models/__init__.py                     |    1 -
 gensim/models/base_any2vec.py                 | 1251 -----------
 gensim/models/callbacks.py                    |   14 +-
 gensim/models/deprecated/__init__.py          |    1 -
 gensim/models/deprecated/doc2vec.py           | 1044 ---------
 gensim/models/deprecated/fasttext.py          |  711 ------
 gensim/models/deprecated/fasttext_wrapper.py  |  461 ----
 gensim/models/deprecated/keyedvectors.py      | 1115 ----------
 gensim/models/deprecated/old_saveload.py      |  398 ----
 gensim/models/deprecated/word2vec.py          | 1907 ----------------
 gensim/models/doc2vec.py                      |  335 ++-
 gensim/models/doc2vec_inner.pyx               |   20 +-
 gensim/models/fasttext.py                     |  325 +--
 gensim/models/fasttext_inner.pyx              |   14 +-
 gensim/models/keyedvectors.py                 |    9 +-
 gensim/models/word2vec.py                     | 1909 ++++++++++++-----
 gensim/models/word2vec_inner.pyx              |   16 +-
 gensim/models/wrappers/__init__.py            |    1 -
 gensim/models/wrappers/fasttext.py            |   40 -
 gensim/sklearn_api/d2vmodel.py                |   19 +-
 gensim/sklearn_api/ftmodel.py                 |   22 +-
 gensim/sklearn_api/w2vmodel.py                |   22 +-
 gensim/test/test_doc2vec.py                   |   74 +-
 gensim/test/test_fasttext.py                  |  119 +-
 gensim/test/test_fasttext_wrapper.py          |  382 ----
 gensim/test/test_keras_integration.py         |    4 +-
 gensim/test/test_keyedvectors.py              |    6 +-
 gensim/test/test_poincare.py                  |    8 +-
 gensim/test/test_sklearn_api.py               |   48 +-
 gensim/test/test_translation_matrix.py        |   27 +-
 gensim/test/test_word2vec.py                  |  150 +-
 39 files changed, 1891 insertions(+), 8635 deletions(-)
 delete mode 100644 docs/src/models/base_any2vec.rst
 delete mode 100644 docs/src/models/deprecated/doc2vec.rst
 delete mode 100644 docs/src/models/deprecated/fasttext.rst
 delete mode 100644 docs/src/models/deprecated/fasttext_wrapper.rst
 delete mode 100644 docs/src/models/deprecated/keyedvectors.rst
 delete mode 100644 docs/src/models/deprecated/word2vec.rst
 delete mode 100644 docs/src/models/wrappers/fasttext.rst
 delete mode 100644 gensim/models/base_any2vec.py
 delete mode 100644 gensim/models/deprecated/__init__.py
 delete mode 100644 gensim/models/deprecated/doc2vec.py
 delete mode 100644 gensim/models/deprecated/fasttext.py
 delete mode 100644 gensim/models/deprecated/fasttext_wrapper.py
 delete mode 100644 gensim/models/deprecated/keyedvectors.py
 delete mode 100644 gensim/models/deprecated/old_saveload.py
 delete mode 100644 gensim/models/deprecated/word2vec.py
 delete mode 100644 gensim/models/wrappers/fasttext.py
 delete mode 100644 gensim/test/test_fasttext_wrapper.py

diff --git a/docs/src/apiref.rst b/docs/src/apiref.rst
index e20c1e2f1f..1e3e341487 100644
--- a/docs/src/apiref.rst
+++ b/docs/src/apiref.rst
@@ -61,13 +61,6 @@ Modules:
     models/wrappers/ldavowpalwabbit.rst
     models/wrappers/wordrank
     models/wrappers/varembed
-    models/wrappers/fasttext
-    models/deprecated/doc2vec
-    models/deprecated/fasttext
-    models/deprecated/word2vec
-    models/deprecated/keyedvectors
-    models/deprecated/fasttext_wrapper
-    models/base_any2vec
     similarities/docsim
     similarities/termsim
     similarities/index
diff --git a/docs/src/models/base_any2vec.rst b/docs/src/models/base_any2vec.rst
deleted file mode 100644
index e6685cda66..0000000000
--- a/docs/src/models/base_any2vec.rst
+++ /dev/null
@@ -1,10 +0,0 @@
-:mod:`models.base_any2vec` -- Base classes for any2vec models
-=============================================================
-
-.. automodule:: gensim.models.base_any2vec
-    :synopsis: Base classes for any2vec models
-    :members:
-    :inherited-members:
-    :special-members: __getitem__
-    :undoc-members:
-    :show-inheritance:
diff --git a/docs/src/models/deprecated/doc2vec.rst b/docs/src/models/deprecated/doc2vec.rst
deleted file mode 100644
index e8fb2d96b3..0000000000
--- a/docs/src/models/deprecated/doc2vec.rst
+++ /dev/null
@@ -1,9 +0,0 @@
-:mod:`models.deprecated.doc2vec` -- Deep learning with paragraph2vec
-====================================================================
-
-.. automodule:: gensim.models.deprecated.doc2vec
-    :synopsis: Deep learning with doc2vec
-    :members:
-    :inherited-members:
-    :undoc-members:
-    :show-inheritance:
diff --git a/docs/src/models/deprecated/fasttext.rst b/docs/src/models/deprecated/fasttext.rst
deleted file mode 100644
index 08de0234d2..0000000000
--- a/docs/src/models/deprecated/fasttext.rst
+++ /dev/null
@@ -1,10 +0,0 @@
-:mod:`models.deprecated.fasttext` -- FastText model
-===================================================
-
-.. automodule:: gensim.models.deprecated.fasttext
-    :synopsis: FastText model
-    :members:
-    :inherited-members:
-    :special-members: __getitem__
-    :undoc-members:
-    :show-inheritance:
diff --git a/docs/src/models/deprecated/fasttext_wrapper.rst b/docs/src/models/deprecated/fasttext_wrapper.rst
deleted file mode 100644
index 020504de24..0000000000
--- a/docs/src/models/deprecated/fasttext_wrapper.rst
+++ /dev/null
@@ -1,10 +0,0 @@
-:mod:`models.deprecated.fasttext_wrapper` -- Wrapper for Facebook implementation of FastText model
-==================================================================================================
-
-.. automodule:: gensim.models.deprecated.fasttext_wrapper
-    :synopsis: FastText model
-    :members:
-    :inherited-members:
-    :special-members: __getitem__
-    :undoc-members:
-    :show-inheritance:
diff --git a/docs/src/models/deprecated/keyedvectors.rst b/docs/src/models/deprecated/keyedvectors.rst
deleted file mode 100644
index 7d55cbc798..0000000000
--- a/docs/src/models/deprecated/keyedvectors.rst
+++ /dev/null
@@ -1,9 +0,0 @@
-:mod:`models.deprecated.keyedvectors` -- Store and query word vectors
-=====================================================================
-
-.. automodule:: gensim.models.deprecated.keyedvectors
-    :synopsis: Store and query word vectors
-    :members:
-    :inherited-members:
-    :undoc-members:
-    :show-inheritance:
diff --git a/docs/src/models/deprecated/word2vec.rst b/docs/src/models/deprecated/word2vec.rst
deleted file mode 100644
index 3b80aaf196..0000000000
--- a/docs/src/models/deprecated/word2vec.rst
+++ /dev/null
@@ -1,9 +0,0 @@
-:mod:`models.deprecated.word2vec` -- Deep learning with word2vec
-================================================================
-
-.. automodule:: gensim.models.deprecated.word2vec
-    :synopsis: Deep learning with word2vec
-    :members:
-    :inherited-members:
-    :undoc-members:
-    :show-inheritance:
diff --git a/docs/src/models/wrappers/fasttext.rst b/docs/src/models/wrappers/fasttext.rst
deleted file mode 100644
index 4476cc7b43..0000000000
--- a/docs/src/models/wrappers/fasttext.rst
+++ /dev/null
@@ -1,9 +0,0 @@
-:mod:`models.wrappers.fasttext` -- Wrapper for FastText implementation from Facebook
-====================================================================================
-
-.. automodule:: gensim.models.wrappers.fasttext
-    :synopsis: FastText
-    :members:
-    :inherited-members:
-    :undoc-members:
-    :show-inheritance:
diff --git a/gensim/models/__init__.py b/gensim/models/__init__.py
index 96ca698b27..ee054b167d 100644
--- a/gensim/models/__init__.py
+++ b/gensim/models/__init__.py
@@ -23,7 +23,6 @@
 from .translation_matrix import TranslationMatrix, BackMappingTranslationMatrix  # noqa:F401
 
 from . import wrappers  # noqa:F401
-from . import deprecated  # noqa:F401
 
 from gensim import interfaces, utils
 
diff --git a/gensim/models/base_any2vec.py b/gensim/models/base_any2vec.py
deleted file mode 100644
index f0a33ba7ff..0000000000
--- a/gensim/models/base_any2vec.py
+++ /dev/null
@@ -1,1251 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Author: Shiva Manne <manneshiva@gmail.com>
-# Copyright (C) 2018 RaRe Technologies s.r.o.
-# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html
-
-r"""This module contains base classes required for implementing \*2vec algorithms.
-
-The class hierarchy is designed to facilitate adding more concrete implementations for creating embeddings.
-In the most general case, the purpose of this class is to transform an arbitrary representation to a numerical vector
-(embedding). This is represented by the base :class:`~gensim.models.base_any2vec.BaseAny2VecModel`. The input space in
-most cases (in the NLP field at least) is plain text. For this reason, we enrich the class hierarchy with the abstract
-:class:`~gensim.models.base_any2vec.BaseWordEmbeddingsModel` to be used as a base for models where the input
-space is text.
-
-Notes
------
-Even though this is the usual case, not all embeddings transform text, such as the
-:class:`~gensim.models.poincare.PoincareModel` that embeds graphs.
-
-See Also
---------
-:class:`~gensim.models.word2vec.Word2Vec`.
-    Word2Vec model - embeddings for words.
-:class:`~gensim.models.fasttext.FastText`.
-    FastText model - embeddings for words (ngram-based).
-:class:`~gensim.models.doc2vec.Doc2Vec`.
-    Doc2Vec model - embeddings for documents.
-:class:`~gensim.models.poincare.PoincareModel`
-    Poincare model - embeddings for graphs.
-
-"""
-
-from gensim import utils
-import logging
-from timeit import default_timer
-import threading
-from six.moves import range
-from six import itervalues, string_types
-from gensim import matutils
-from numpy import float32 as REAL, ones, random, dtype
-from types import GeneratorType
-import os
-import copy
-
-
-try:
-    from queue import Queue
-except ImportError:
-    from Queue import Queue
-
-logger = logging.getLogger(__name__)
-
-
-class BaseAny2VecModel(utils.SaveLoad):
-    def __init__(self, workers=3, vector_size=100, epochs=5, callbacks=(), batch_words=10000):
-        r"""Base class for training, using and evaluating \*2vec model.
-
-        Contains implementation for multi-threaded training. The purpose of this class is to provide a
-        reference interface for concrete embedding implementations, whether the input space is a corpus
-        of words, documents or anything else. At the same time, functionality that we expect to be common
-        for those implementations is provided here to avoid code duplication.
-
-        In the special but usual case where the input space consists of words, a more specialized layer
-        is provided, consider inheriting from :class:`~gensim.models.base_any2vec.BaseWordEmbeddingsModel`
-
-        Parameters
-        ----------
-        workers : int, optional
-            Number of working threads, used for multithreading.
-        vector_size : int, optional
-            Dimensionality of the feature vectors.
-        epochs : int, optional
-            Number of iterations (epochs) of training through the corpus.
-        callbacks : list of :class:`~gensim.models.callbacks.CallbackAny2Vec`, optional
-            List of callbacks that need to be executed/run at specific stages during training.
-        batch_words : int, optional
-            Number of words to be processed by a single job.
-
-        Notes
-        -----
-        A subclass should initialize the following attributes:
-
-        * self.kv - keyed vectors in model (see :class:`~gensim.models.keyedvectors.Word2VecKeyedVectors` as example)
-        * self.vocabulary - vocabulary (see :class:`~gensim.models.word2vec.Word2VecVocab` as example)
-        * self.trainables - internal matrices (see :class:`~gensim.models.word2vec.Word2VecTrainables` as example)
-
-        """
-        self.vector_size = int(vector_size)
-        self.workers = int(workers)
-        self.epochs = epochs
-        self.train_count = 0
-        self.total_train_time = 0
-        self.batch_words = batch_words
-        self.model_trimmed_post_training = False
-        self.callbacks = callbacks
-
-    def _get_job_params(self, cur_epoch):
-        """Get job parameters required for each batch."""
-        raise NotImplementedError()
-
-    def _set_train_params(self, **kwargs):
-        """Set model parameters required for training."""
-        raise NotImplementedError()
-
-    def _update_job_params(self, job_params, epoch_progress, cur_epoch):
-        """Get updated job parameters based on the epoch_progress and cur_epoch."""
-        raise NotImplementedError()
-
-    def _get_thread_working_mem(self):
-        """Get private working memory per thread."""
-        raise NotImplementedError()
-
-    def _raw_word_count(self, job):
-        """Get the number of words in a given job."""
-        raise NotImplementedError()
-
-    def _clear_post_train(self):
-        """Resets certain properties of the model post training. eg. `keyedvectors.vectors_norm`."""
-        raise NotImplementedError()
-
-    def _do_train_epoch(self, corpus_file, thread_id, offset, cython_vocab, thread_private_mem, cur_epoch,
-                        total_examples=None, total_words=None, **kwargs):
-        raise NotImplementedError()
-
-    def _do_train_job(self, data_iterable, job_parameters, thread_private_mem):
-        """Train a single batch. Return 2-tuple `(effective word count, total word count)`."""
-        raise NotImplementedError()
-
-    def _check_training_sanity(self, epochs=None, total_examples=None, total_words=None, **kwargs):
-        """Check that the training parameters provided make sense. e.g. raise error if `epochs` not provided."""
-        raise NotImplementedError()
-
-    def _check_input_data_sanity(self, data_iterable=None, corpus_file=None):
-        """Check that only one argument is None."""
-        if not (data_iterable is None) ^ (corpus_file is None):
-            raise ValueError("You must provide only one of singlestream or corpus_file arguments.")
-
-    def _worker_loop_corpusfile(self, corpus_file, thread_id, offset, cython_vocab, progress_queue, cur_epoch=0,
-                                total_examples=None, total_words=None, **kwargs):
-        """Train the model on a `corpus_file` in LineSentence format.
-
-        This function will be called in parallel by multiple workers (threads or processes) to make
-        optimal use of multicore machines.
-
-        Parameters
-        ----------
-        corpus_file : str
-            Path to a corpus file in :class:`~gensim.models.word2vec.LineSentence` format.
-        thread_id : int
-            Thread index starting from 0 to `number of workers - 1`.
-        offset : int
-            Offset (in bytes) in the `corpus_file` for particular worker.
-        cython_vocab : :class:`~gensim.models.word2vec_inner.CythonVocab`
-            Copy of the vocabulary in order to access it without GIL.
-        progress_queue : Queue of (int, int, int)
-            A queue of progress reports. Each report is represented as a tuple of these 3 elements:
-                * Size of data chunk processed, for example number of sentences in the corpus chunk.
-                * Effective word count used in training (after ignoring unknown words and trimming the sentence length).
-                * Total word count used in training.
-        **kwargs : object
-            Additional key word parameters for the specific model inheriting from this class.
-
-        """
-        thread_private_mem = self._get_thread_working_mem()
-
-        examples, tally, raw_tally = self._do_train_epoch(
-            corpus_file, thread_id, offset, cython_vocab, thread_private_mem, cur_epoch,
-            total_examples=total_examples, total_words=total_words, **kwargs)
-
-        progress_queue.put((examples, tally, raw_tally))
-        progress_queue.put(None)
-
-    def _worker_loop(self, job_queue, progress_queue):
-        """Train the model, lifting batches of data from the queue.
-
-        This function will be called in parallel by multiple workers (threads or processes) to make
-        optimal use of multicore machines.
-
-        Parameters
-        ----------
-        job_queue : Queue of (list of objects, (str, int))
-            A queue of jobs still to be processed. The worker will take up jobs from this queue.
-            Each job is represented by a tuple where the first element is the corpus chunk to be processed and
-            the second is the dictionary of parameters.
-        progress_queue : Queue of (int, int, int)
-            A queue of progress reports. Each report is represented as a tuple of these 3 elements:
-                * Size of data chunk processed, for example number of sentences in the corpus chunk.
-                * Effective word count used in training (after ignoring unknown words and trimming the sentence length).
-                * Total word count used in training.
-
-        """
-        thread_private_mem = self._get_thread_working_mem()
-        jobs_processed = 0
-        while True:
-            job = job_queue.get()
-            if job is None:
-                progress_queue.put(None)
-                break  # no more jobs => quit this worker
-            data_iterable, job_parameters = job
-
-            for callback in self.callbacks:
-                callback.on_batch_begin(self)
-
-            tally, raw_tally = self._do_train_job(data_iterable, job_parameters, thread_private_mem)
-
-            for callback in self.callbacks:
-                callback.on_batch_end(self)
-
-            progress_queue.put((len(data_iterable), tally, raw_tally))  # report back progress
-            jobs_processed += 1
-        logger.debug("worker exiting, processed %i jobs", jobs_processed)
-
-    def _job_producer(self, data_iterator, job_queue, cur_epoch=0, total_examples=None, total_words=None):
-        """Fill the jobs queue using the data found in the input stream.
-
-        Each job is represented by a tuple where the first element is the corpus chunk to be processed and
-        the second is a dictionary of parameters.
-
-        Parameters
-        ----------
-        data_iterator : iterable of list of objects
-            The input dataset. This will be split in chunks and these chunks will be pushed to the queue.
-        job_queue : Queue of (list of object, dict of (str, int))
-            A queue of jobs still to be processed. The worker will take up jobs from this queue.
-            Each job is represented by a tuple where the first element is the corpus chunk to be processed and
-            the second is the dictionary of parameters.
-        cur_epoch : int, optional
-            The current training epoch, needed to compute the training parameters for each job.
-            For example in many implementations the learning rate would be dropping with the number of epochs.
-        total_examples : int, optional
-            Count of objects in the `data_iterator`. In the usual case this would correspond to the number of sentences
-            in a corpus. Used to log progress.
-        total_words : int, optional
-            Count of total objects in `data_iterator`. In the usual case this would correspond to the number of raw
-            words in a corpus. Used to log progress.
-
-        """
-        job_batch, batch_size = [], 0
-        pushed_words, pushed_examples = 0, 0
-        next_job_params = self._get_job_params(cur_epoch)
-        job_no = 0
-
-        for data_idx, data in enumerate(data_iterator):
-            data_length = self._raw_word_count([data])
-
-            # can we fit this sentence into the existing job batch?
-            if batch_size + data_length <= self.batch_words:
-                # yes => add it to the current job
-                job_batch.append(data)
-                batch_size += data_length
-            else:
-                job_no += 1
-                job_queue.put((job_batch, next_job_params))
-
-                # update the learning rate for the next job
-                if total_examples:
-                    # examples-based decay
-                    pushed_examples += len(job_batch)
-                    epoch_progress = 1.0 * pushed_examples / total_examples
-                else:
-                    # words-based decay
-                    pushed_words += self._raw_word_count(job_batch)
-                    epoch_progress = 1.0 * pushed_words / total_words
-                next_job_params = self._update_job_params(next_job_params, epoch_progress, cur_epoch)
-
-                # add the sentence that didn't fit as the first item of a new job
-                job_batch, batch_size = [data], data_length
-        # add the last job too (may be significantly smaller than batch_words)
-        if job_batch:
-            job_no += 1
-            job_queue.put((job_batch, next_job_params))
-
-        if job_no == 0 and self.train_count == 0:
-            logger.warning(
-                "train() called with an empty iterator (if not intended, "
-                "be sure to provide a corpus that offers restartable iteration = an iterable)."
-            )
-
-        # give the workers heads up that they can finish -- no more work!
-        for _ in range(self.workers):
-            job_queue.put(None)
-        logger.debug("job loop exiting, total %i jobs", job_no)
-
-    def _log_progress(self, job_queue, progress_queue, cur_epoch, example_count, total_examples,
-                      raw_word_count, total_words, trained_word_count, elapsed):
-        raise NotImplementedError()
-
-    def _log_epoch_end(self, cur_epoch, example_count, total_examples, raw_word_count, total_words,
-                       trained_word_count, elapsed, is_corpus_file_mode):
-        raise NotImplementedError()
-
-    def _log_train_end(self, raw_word_count, trained_word_count, total_elapsed, job_tally):
-        raise NotImplementedError()
-
-    def _log_epoch_progress(self, progress_queue=None, job_queue=None, cur_epoch=0, total_examples=None,
-                            total_words=None, report_delay=1.0, is_corpus_file_mode=None):
-        """Get the progress report for a single training epoch.
-
-        Parameters
-        ----------
-        progress_queue : Queue of (int, int, int)
-            A queue of progress reports. Each report is represented as a tuple of these 3 elements:
-                * size of data chunk processed, for example number of sentences in the corpus chunk.
-                * Effective word count used in training (after ignoring unknown words and trimming the sentence length).
-                * Total word count used in training.
-        job_queue : Queue of (list of object, dict of (str, int))
-            A queue of jobs still to be processed. The worker will take up jobs from this queue.
-            Each job is represented by a tuple where the first element is the corpus chunk to be processed and
-            the second is the dictionary of parameters.
-        cur_epoch : int, optional
-            The current training epoch, needed to compute the training parameters for each job.
-            For example in many implementations the learning rate would be dropping with the number of epochs.
-        total_examples : int, optional
-            Count of objects in the `data_iterator`. In the usual case this would correspond to the number of sentences
-            in a corpus. Used to log progress.
-        total_words : int, optional
-            Count of total objects in `data_iterator`. In the usual case this would correspond to the number of raw
-            words in a corpus. Used to log progress.
-        report_delay : float, optional
-            Number of seconds between two consecutive progress report messages in the logger.
-        is_corpus_file_mode : bool, optional
-            Whether training is file-based (corpus_file argument) or not.
-
-        Returns
-        -------
-        (int, int, int)
-            The epoch report consisting of three elements:
-                * size of data chunk processed, for example number of sentences in the corpus chunk.
-                * Effective word count used in training (after ignoring unknown words and trimming the sentence length).
-                * Total word count used in training.
-
-        """
-        example_count, trained_word_count, raw_word_count = 0, 0, 0
-        start, next_report = default_timer() - 0.00001, 1.0
-        job_tally = 0
-        unfinished_worker_count = self.workers
-
-        while unfinished_worker_count > 0:
-            report = progress_queue.get()  # blocks if workers too slow
-            if report is None:  # a thread reporting that it finished
-                unfinished_worker_count -= 1
-                logger.info("worker thread finished; awaiting finish of %i more threads", unfinished_worker_count)
-                continue
-            examples, trained_words, raw_words = report
-            job_tally += 1
-
-            # update progress stats
-            example_count += examples
-            trained_word_count += trained_words  # only words in vocab & sampled
-            raw_word_count += raw_words
-
-            # log progress once every report_delay seconds
-            elapsed = default_timer() - start
-            if elapsed >= next_report:
-                self._log_progress(
-                    job_queue, progress_queue, cur_epoch, example_count, total_examples,
-                    raw_word_count, total_words, trained_word_count, elapsed)
-                next_report = elapsed + report_delay
-        # all done; report the final stats
-        elapsed = default_timer() - start
-        self._log_epoch_end(
-            cur_epoch, example_count, total_examples, raw_word_count, total_words,
-            trained_word_count, elapsed, is_corpus_file_mode)
-        self.total_train_time += elapsed
-        return trained_word_count, raw_word_count, job_tally
-
-    def _train_epoch_corpusfile(self, corpus_file, cur_epoch=0, total_examples=None, total_words=None, **kwargs):
-        """Train the model for a single epoch.
-
-        Parameters
-        ----------
-        corpus_file : str
-            Path to a corpus file in :class:`~gensim.models.word2vec.LineSentence` format.
-        cur_epoch : int, optional
-            The current training epoch, needed to compute the training parameters for each job.
-            For example in many implementations the learning rate would be dropping with the number of epochs.
-        total_examples : int, optional
-            Count of objects in the `data_iterator`. In the usual case this would correspond to the number of sentences
-            in a corpus, used to log progress.
-        total_words : int
-            Count of total objects in `data_iterator`. In the usual case this would correspond to the number of raw
-            words in a corpus, used to log progress. Must be provided in order to seek in `corpus_file`.
-        **kwargs : object
-            Additional key word parameters for the specific model inheriting from this class.
-
-        Returns
-        -------
-        (int, int, int)
-            The training report for this epoch consisting of three elements:
-                * Size of data chunk processed, for example number of sentences in the corpus chunk.
-                * Effective word count used in training (after ignoring unknown words and trimming the sentence length).
-                * Total word count used in training.
-
-        """
-        if not total_words:
-            raise ValueError("total_words must be provided alongside corpus_file argument.")
-
-        from gensim.models.word2vec_corpusfile import CythonVocab
-        from gensim.models.fasttext import FastText
-        cython_vocab = CythonVocab(self.wv, hs=self.hs, fasttext=isinstance(self, FastText))
-
-        progress_queue = Queue()
-
-        corpus_file_size = os.path.getsize(corpus_file)
-
-        thread_kwargs = copy.copy(kwargs)
-        thread_kwargs['cur_epoch'] = cur_epoch
-        thread_kwargs['total_examples'] = total_examples
-        thread_kwargs['total_words'] = total_words
-        workers = [
-            threading.Thread(
-                target=self._worker_loop_corpusfile,
-                args=(
-                    corpus_file, thread_id, corpus_file_size / self.workers * thread_id, cython_vocab, progress_queue
-                ),
-                kwargs=thread_kwargs
-            ) for thread_id in range(self.workers)
-        ]
-
-        for thread in workers:
-            thread.daemon = True
-            thread.start()
-
-        trained_word_count, raw_word_count, job_tally = self._log_epoch_progress(
-            progress_queue=progress_queue, job_queue=None, cur_epoch=cur_epoch,
-            total_examples=total_examples, total_words=total_words, is_corpus_file_mode=True)
-
-        return trained_word_count, raw_word_count, job_tally
-
-    def _train_epoch(self, data_iterable, cur_epoch=0, total_examples=None, total_words=None,
-                     queue_factor=2, report_delay=1.0):
-        """Train the model for a single epoch.
-
-        Parameters
-        ----------
-        data_iterable : iterable of list of object
-            The input corpus. This will be split in chunks and these chunks will be pushed to the queue.
-        cur_epoch : int, optional
-            The current training epoch, needed to compute the training parameters for each job.
-            For example in many implementations the learning rate would be dropping with the number of epochs.
-        total_examples : int, optional
-            Count of objects in the `data_iterator`. In the usual case this would correspond to the number of sentences
-            in a corpus, used to log progress.
-        total_words : int, optional
-            Count of total objects in `data_iterator`. In the usual case this would correspond to the number of raw
-            words in a corpus, used to log progress.
-        queue_factor : int, optional
-            Multiplier for size of queue -> size = number of workers * queue_factor.
-        report_delay : float, optional
-            Number of seconds between two consecutive progress report messages in the logger.
-
-        Returns
-        -------
-        (int, int, int)
-            The training report for this epoch consisting of three elements:
-                * Size of data chunk processed, for example number of sentences in the corpus chunk.
-                * Effective word count used in training (after ignoring unknown words and trimming the sentence length).
-                * Total word count used in training.
-
-        """
-        job_queue = Queue(maxsize=queue_factor * self.workers)
-        progress_queue = Queue(maxsize=(queue_factor + 1) * self.workers)
-
-        workers = [
-            threading.Thread(
-                target=self._worker_loop,
-                args=(job_queue, progress_queue,))
-            for _ in range(self.workers)
-        ]
-
-        workers.append(threading.Thread(
-            target=self._job_producer,
-            args=(data_iterable, job_queue),
-            kwargs={'cur_epoch': cur_epoch, 'total_examples': total_examples, 'total_words': total_words}))
-
-        for thread in workers:
-            thread.daemon = True  # make interrupting the process with ctrl+c easier
-            thread.start()
-
-        trained_word_count, raw_word_count, job_tally = self._log_epoch_progress(
-            progress_queue, job_queue, cur_epoch=cur_epoch, total_examples=total_examples, total_words=total_words,
-            report_delay=report_delay, is_corpus_file_mode=False)
-
-        return trained_word_count, raw_word_count, job_tally
-
-    def train(self, data_iterable=None, corpus_file=None, epochs=None, total_examples=None,
-              total_words=None, queue_factor=2, report_delay=1.0, callbacks=(), **kwargs):
-        """Train the model for multiple epochs using multiple workers.
-
-        Parameters
-        ----------
-        data_iterable : iterable of list of object
-            The input corpus. This will be split in chunks and these chunks will be pushed to the queue.
-        corpus_file : str, optional
-            Path to a corpus file in :class:`~gensim.models.word2vec.LineSentence` format.
-            If you use this argument instead of `data_iterable`, you must provide `total_words` argument as well.
-        epochs : int, optional
-            Number of epochs (training iterations over the whole input) of training.
-        total_examples : int, optional
-            Count of objects in the `data_iterator`. In the usual case this would correspond to the number of sentences
-            in a corpus, used to log progress.
-        total_words : int, optional
-            Count of total objects in `data_iterator`. In the usual case this would correspond to the number of raw
-            words in a corpus, used to log progress.
-        queue_factor : int, optional
-            Multiplier for size of queue -> size = number of workers * queue_factor.
-        report_delay : float, optional
-            Number of seconds between two consecutive progress report messages in the logger.
-        callbacks : list of :class:`~gensim.models.callbacks.CallbackAny2Vec`, optional
-            List of callbacks to execute at specific stages during training.
-        **kwargs : object
-            Additional key word parameters for the specific model inheriting from this class.
-
-        Returns
-        -------
-        (int, int)
-            The total training report consisting of two elements:
-                * size of total data processed, for example number of sentences in the whole corpus.
-                * Effective word count used in training (after ignoring unknown words and trimming the sentence length).
-
-        """
-        self._set_train_params(**kwargs)
-        if callbacks:
-            self.callbacks = callbacks
-        self.epochs = epochs
-        self._check_training_sanity(
-            epochs=epochs,
-            total_examples=total_examples,
-            total_words=total_words, **kwargs)
-
-        for callback in self.callbacks:
-            callback.on_train_begin(self)
-
-        trained_word_count = 0
-        raw_word_count = 0
-        start = default_timer() - 0.00001
-        job_tally = 0
-
-        for cur_epoch in range(self.epochs):
-            for callback in self.callbacks:
-                callback.on_epoch_begin(self)
-
-            if data_iterable is not None:
-                trained_word_count_epoch, raw_word_count_epoch, job_tally_epoch = self._train_epoch(
-                    data_iterable, cur_epoch=cur_epoch, total_examples=total_examples,
-                    total_words=total_words, queue_factor=queue_factor, report_delay=report_delay)
-            else:
-                trained_word_count_epoch, raw_word_count_epoch, job_tally_epoch = self._train_epoch_corpusfile(
-                    corpus_file, cur_epoch=cur_epoch, total_examples=total_examples, total_words=total_words, **kwargs)
-
-            trained_word_count += trained_word_count_epoch
-            raw_word_count += raw_word_count_epoch
-            job_tally += job_tally_epoch
-
-            for callback in self.callbacks:
-                callback.on_epoch_end(self)
-
-        # Log overall time
-        total_elapsed = default_timer() - start
-        self._log_train_end(raw_word_count, trained_word_count, total_elapsed, job_tally)
-
-        self.train_count += 1  # number of times train() has been called
-        self._clear_post_train()
-
-        for callback in self.callbacks:
-            callback.on_train_end(self)
-        return trained_word_count, raw_word_count
-
-    @classmethod
-    def load(cls, fname_or_handle, **kwargs):
-        """Load a previously saved object (using :meth:`gensim.models.base_any2vec.BaseAny2VecModel.save`) from a file.
-
-        Parameters
-        ----------
-        fname_or_handle : {str, file-like object}
-            Path to file that contains needed object or handle to an open file.
-        **kwargs : object
-            Keyword arguments propagated to :meth:`~gensim.utils.SaveLoad.load`.
-
-        See Also
-        --------
-        :meth:`~gensim.models.base_any2vec.BaseAny2VecModel.save`
-            Method for save a model.
-
-        Returns
-        -------
-        object
-            Object loaded from `fname_or_handle`.
-
-        Raises
-        ------
-        IOError
-            When methods are called on an instance (should be called on a class, this is a class method).
-
-        """
-        return super(BaseAny2VecModel, cls).load(fname_or_handle, **kwargs)
-
-    def save(self, fname_or_handle, **kwargs):
-        """Save the object to file.
-
-        Parameters
-        ----------
-        fname_or_handle : {str, file-like object}
-            Path to file where the model will be persisted.
-        **kwargs : object
-            Key word arguments propagated to :meth:`~gensim.utils.SaveLoad.save`.
-
-        See Also
-        --------
-        :meth:`~gensim.models.base_any2vec.BaseAny2VecModel.load`
-            Method for load model after current method.
-
-        """
-        super(BaseAny2VecModel, self).save(fname_or_handle, **kwargs)
-
-
-class BaseWordEmbeddingsModel(BaseAny2VecModel):
-    def __init__(self, sentences=None, corpus_file=None, workers=3, vector_size=100, epochs=5, callbacks=(),
-                 batch_words=10000, trim_rule=None, sg=0, alpha=0.025, window=5, seed=1, hs=0, negative=5,
-                 ns_exponent=0.75, cbow_mean=1, min_alpha=0.0001, compute_loss=False, **kwargs):
-        """Base class containing common methods for training, using & evaluating word embeddings learning models.
-
-        Parameters
-        ----------
-        sentences : iterable of list of str, optional
-            Can be simply a list of lists of tokens, but for larger corpora,
-            consider an iterable that streams the sentences directly from disk/network.
-            See :class:`~gensim.models.word2vec.BrownCorpus`, :class:`~gensim.models.word2vec.Text8Corpus`
-            or :class:`~gensim.models.word2vec.LineSentence` for such examples.
-        corpus_file : str, optional
-            Path to a corpus file in :class:`~gensim.models.word2vec.LineSentence` format.
-            You may use this argument instead of `sentences` to get performance boost. Only one of `sentences` or
-            `corpus_file` arguments need to be passed (or none of them, in that case, the model is left uninitialized).
-        workers : int, optional
-            Number of working threads, used for multiprocessing.
-        vector_size : int, optional
-            Dimensionality of the feature vectors.
-        epochs : int, optional
-            Number of iterations (epochs) of training through the corpus.
-        callbacks : list of :class:`~gensim.models.callbacks.CallbackAny2Vec`, optional
-            List of callbacks that need to be executed/run at specific stages during training.
-        batch_words : int, optional
-            Number of words to be processed by a single job.
-        trim_rule : function, optional
-            Vocabulary trimming rule, specifies whether certain words should remain in the vocabulary,
-            be trimmed away, or handled using the default (discard if word count < min_count).
-            Can be None (min_count will be used, look to :func:`~gensim.utils.keep_vocab_item`),
-            or a callable that accepts parameters (word, count, min_count) and returns either
-            :attr:`gensim.utils.RULE_DISCARD`, :attr:`gensim.utils.RULE_KEEP` or :attr:`gensim.utils.RULE_DEFAULT`.
-            The rule, if given, is only used to prune vocabulary during current method call and is not stored as part
-            of the model.
-
-            The input parameters are of the following types:
-                * `word` (str) - the word we are examining
-                * `count` (int) - the word's frequency count in the corpus
-                * `min_count` (int) - the minimum count threshold.
-
-        sg : {1, 0}, optional
-            Defines the training algorithm. If 1, skip-gram is used, otherwise, CBOW is employed.
-        alpha : float, optional
-            The beginning learning rate. This will linearly reduce with iterations until it reaches `min_alpha`.
-        window : int, optional
-            The maximum distance between the current and predicted word within a sentence.
-        seed : int, optional
-            Seed for the random number generator. Initial vectors for each word are seeded with a hash of
-            the concatenation of word + `str(seed)`.
-            Note that for a fully deterministically-reproducible run, you must also limit the model to a single worker
-            thread (`workers=1`), to eliminate ordering jitter from OS thread scheduling.
-            In Python 3, reproducibility between interpreter launches also requires use of the `PYTHONHASHSEED`
-            environment variable to control hash randomization.
-        hs : {1,0}, optional
-            If 1, hierarchical softmax will be used for model training.
-            If set to 0, and `negative` is non-zero, negative sampling will be used.
-        negative : int, optional
-            If > 0, negative sampling will be used, the int for negative specifies how many "noise words"
-            should be drawn (usually between 5-20).
-            If set to 0, no negative sampling is used.
-        cbow_mean : {1,0}, optional
-            If 0, use the sum of the context word vectors. If 1, use the mean, only applies when cbow is used.
-        min_alpha : float, optional
-            Final learning rate. Drops linearly with the number of iterations from `alpha`.
-        compute_loss : bool, optional
-            If True, loss will be computed while training the Word2Vec model and stored in
-            :attr:`~gensim.models.base_any2vec.BaseWordEmbeddingsModel.running_training_loss` attribute.
-        **kwargs : object
-            Key word arguments needed to allow children classes to accept more arguments.
-
-        See Also
-        --------
-        :class:`~gensim.models.word2vec.Word2Vec`.
-            Word2Vec model - embeddings for words.
-        :class:`~gensim.models.fasttext.FastText`.
-            FastText model - embeddings for words (ngram-based).
-        :class:`~gensim.models.doc2vec.Doc2Vec`.
-            Doc2Vec model - embeddings for documents.
-        :class:`~gensim.models.poincare.PoincareModel`
-            Poincare model - embeddings for graphs.
-
-        """
-        self.sg = int(sg)
-        if vector_size % 4 != 0:
-            logger.warning("consider setting layer size to a multiple of 4 for greater performance")
-        self.alpha = float(alpha)
-        self.window = int(window)
-        self.random = random.RandomState(seed)
-        self.min_alpha = float(min_alpha)
-        self.hs = int(hs)
-        self.negative = int(negative)
-        self.ns_exponent = ns_exponent
-        self.cbow_mean = int(cbow_mean)
-        self.compute_loss = bool(compute_loss)
-        self.running_training_loss = 0
-        self.min_alpha_yet_reached = float(alpha)
-        self.corpus_count = 0
-        self.corpus_total_words = 0
-
-        super(BaseWordEmbeddingsModel, self).__init__(
-            workers=workers, vector_size=vector_size, epochs=epochs, callbacks=callbacks, batch_words=batch_words)
-
-        if sentences is not None or corpus_file is not None:
-            self._check_input_data_sanity(data_iterable=sentences, corpus_file=corpus_file)
-            if corpus_file is not None and not isinstance(corpus_file, string_types):
-                raise TypeError("You must pass string as the corpus_file argument.")
-            elif isinstance(sentences, GeneratorType):
-                raise TypeError("You can't pass a generator as the sentences argument. Try a sequence.")
-
-            self.build_vocab(sentences=sentences, corpus_file=corpus_file, trim_rule=trim_rule)
-            self.train(
-                sentences=sentences, corpus_file=corpus_file, total_examples=self.corpus_count,
-                total_words=self.corpus_total_words, epochs=self.epochs, start_alpha=self.alpha,
-                end_alpha=self.min_alpha, compute_loss=compute_loss)
-        else:
-            if trim_rule is not None:
-                logger.warning(
-                    "The rule, if given, is only used to prune vocabulary during build_vocab() "
-                    "and is not stored as part of the model. Model initialized without sentences. "
-                    "trim_rule provided, if any, will be ignored.")
-
-    def _clear_post_train(self):
-        raise NotImplementedError()
-
-    def _do_train_job(self, data_iterable, job_parameters, thread_private_mem):
-        raise NotImplementedError()
-
-    def _set_train_params(self, **kwargs):
-        raise NotImplementedError()
-
-    def __str__(self):
-        """Get a human readable representation of the object.
-
-        Returns
-        -------
-        str
-            A human readable string containing the class name, as well as the size of dictionary, number of
-            features and starting learning rate used by the object.
-
-        """
-        return "%s(vocab=%s, size=%s, alpha=%s)" % (
-            self.__class__.__name__, len(self.wv.index2word), self.vector_size, self.alpha
-        )
-
-    def build_vocab(self, sentences=None, corpus_file=None, update=False, progress_per=10000,
-                    keep_raw_vocab=False, trim_rule=None, **kwargs):
-        """Build vocabulary from a sequence of sentences (can be a once-only generator stream).
-
-        Parameters
-        ----------
-        sentences : iterable of list of str
-            Can be simply a list of lists of tokens, but for larger corpora,
-            consider an iterable that streams the sentences directly from disk/network.
-            See :class:`~gensim.models.word2vec.BrownCorpus`, :class:`~gensim.models.word2vec.Text8Corpus`
-            or :class:`~gensim.models.word2vec.LineSentence` module for such examples.
-        corpus_file : str, optional
-            Path to a corpus file in :class:`~gensim.models.word2vec.LineSentence` format.
-            You may use this argument instead of `sentences` to get performance boost. Only one of `sentences` or
-            `corpus_file` arguments need to be passed (not both of them).
-        update : bool
-            If true, the new words in `sentences` will be added to model's vocab.
-        progress_per : int, optional
-            Indicates how many words to process before showing/updating the progress.
-        keep_raw_vocab : bool, optional
-            If False, the raw vocabulary will be deleted after the scaling is done to free up RAM.
-        trim_rule : function, optional
-            Vocabulary trimming rule, specifies whether certain words should remain in the vocabulary,
-            be trimmed away, or handled using the default (discard if word count < min_count).
-            Can be None (min_count will be used, look to :func:`~gensim.utils.keep_vocab_item`),
-            or a callable that accepts parameters (word, count, min_count) and returns either
-            :attr:`gensim.utils.RULE_DISCARD`, :attr:`gensim.utils.RULE_KEEP` or :attr:`gensim.utils.RULE_DEFAULT`.
-            The rule, if given, is only used to prune vocabulary during current method call and is not stored as part
-            of the model.
-
-            The input parameters are of the following types:
-                * `word` (str) - the word we are examining
-                * `count` (int) - the word's frequency count in the corpus
-                * `min_count` (int) - the minimum count threshold.
-
-        **kwargs : object
-            Key word arguments propagated to `self.vocabulary.prepare_vocab`
-
-        """
-        total_words, corpus_count = self.vocabulary.scan_vocab(
-            sentences=sentences, corpus_file=corpus_file, progress_per=progress_per, trim_rule=trim_rule)
-        self.corpus_count = corpus_count
-        self.corpus_total_words = total_words
-        report_values = self.vocabulary.prepare_vocab(
-            self.hs, self.negative, self.wv, update=update, keep_raw_vocab=keep_raw_vocab,
-            trim_rule=trim_rule, **kwargs)
-        report_values['memory'] = self.estimate_memory(vocab_size=report_values['num_retained_words'])
-        self.trainables.prepare_weights(self.hs, self.negative, self.wv, update=update, vocabulary=self.vocabulary)
-
-    def build_vocab_from_freq(self, word_freq, keep_raw_vocab=False, corpus_count=None, trim_rule=None, update=False):
-        """Build vocabulary from a dictionary of word frequencies.
-
-        Parameters
-        ----------
-        word_freq : dict of (str, int)
-            A mapping from a word in the vocabulary to its frequency count.
-        keep_raw_vocab : bool, optional
-            If False, delete the raw vocabulary after the scaling is done to free up RAM.
-        corpus_count : int, optional
-            Even if no corpus is provided, this argument can set corpus_count explicitly.
-        trim_rule : function, optional
-            Vocabulary trimming rule, specifies whether certain words should remain in the vocabulary,
-            be trimmed away, or handled using the default (discard if word count < min_count).
-            Can be None (min_count will be used, look to :func:`~gensim.utils.keep_vocab_item`),
-            or a callable that accepts parameters (word, count, min_count) and returns either
-            :attr:`gensim.utils.RULE_DISCARD`, :attr:`gensim.utils.RULE_KEEP` or :attr:`gensim.utils.RULE_DEFAULT`.
-            The rule, if given, is only used to prune vocabulary during current method call and is not stored as part
-            of the model.
-
-            The input parameters are of the following types:
-                * `word` (str) - the word we are examining
-                * `count` (int) - the word's frequency count in the corpus
-                * `min_count` (int) - the minimum count threshold.
-
-        update : bool, optional
-            If true, the new provided words in `word_freq` dict will be added to model's vocab.
-
-        """
-        logger.info("Processing provided word frequencies")
-        # Instead of scanning text, this will assign provided word frequencies dictionary(word_freq)
-        # to be directly the raw vocab
-        raw_vocab = word_freq
-        logger.info(
-            "collected %i different raw word, with total frequency of %i",
-            len(raw_vocab), sum(itervalues(raw_vocab))
-        )
-
-        # Since no sentences are provided, this is to control the corpus_count.
-        self.corpus_count = corpus_count or 0
-        self.vocabulary.raw_vocab = raw_vocab
-
-        # trim by min_count & precalculate downsampling
-        report_values = self.vocabulary.prepare_vocab(
-            self.hs, self.negative, self.wv, keep_raw_vocab=keep_raw_vocab,
-            trim_rule=trim_rule, update=update)
-        report_values['memory'] = self.estimate_memory(vocab_size=report_values['num_retained_words'])
-        self.trainables.prepare_weights(
-            self.hs, self.negative, self.wv, update=update, vocabulary=self.vocabulary)  # build tables & arrays
-
-    def estimate_memory(self, vocab_size=None, report=None):
-        """Estimate required memory for a model using current settings and provided vocabulary size.
-
-        Parameters
-        ----------
-        vocab_size : int, optional
-            Number of unique tokens in the vocabulary
-        report : dict of (str, int), optional
-            A dictionary from string representations of the model's memory consuming members to their size in bytes.
-
-        Returns
-        -------
-        dict of (str, int)
-            A dictionary from string representations of the model's memory consuming members to their size in bytes.
-
-        """
-        vocab_size = vocab_size or len(self.wv.vocab)
-        report = report or {}
-        report['vocab'] = vocab_size * (700 if self.hs else 500)
-        report['vectors'] = vocab_size * self.vector_size * dtype(REAL).itemsize
-        if self.hs:
-            report['syn1'] = vocab_size * self.trainables.layer1_size * dtype(REAL).itemsize
-        if self.negative:
-            report['syn1neg'] = vocab_size * self.trainables.layer1_size * dtype(REAL).itemsize
-        report['total'] = sum(report.values())
-        logger.info(
-            "estimated required memory for %i words and %i dimensions: %i bytes",
-            vocab_size, self.vector_size, report['total']
-        )
-        return report
-
-    def train(self, sentences=None, corpus_file=None, total_examples=None, total_words=None,
-              epochs=None, start_alpha=None, end_alpha=None, word_count=0,
-              queue_factor=2, report_delay=1.0, compute_loss=False, callbacks=(), **kwargs):
-        """Train the model. If the hyper-parameters are passed, they override the ones set in the constructor.
-
-        Parameters
-        ----------
-        sentences : iterable of list of str
-            Can be simply a list of lists of tokens, but for larger corpora,
-            consider an iterable that streams the sentences directly from disk/network.
-            See :class:`~gensim.models.word2vec.BrownCorpus`, :class:`~gensim.models.word2vec.Text8Corpus`
-            or :class:`~gensim.models.word2vec.LineSentence` module for such examples.
-        corpus_file : str, optional
-            Path to a corpus file in :class:`~gensim.models.word2vec.LineSentence` format.
-            You may use this argument instead of `sentences` to get performance boost. Only one of `sentences` or
-            `corpus_file` arguments need to be passed (not both of them).
-        total_examples : int, optional
-            Count of sentences.
-        total_words : int, optional
-            Count of raw words in sentences.
-        epochs : int, optional
-            Number of iterations (epochs) over the corpus.
-        start_alpha : float, optional
-            Initial learning rate.
-        end_alpha : float, optional
-            Final learning rate. Drops linearly with the number of iterations from `start_alpha`.
-        word_count : int, optional
-            Count of words already trained. Leave this to 0 for the usual case of training on all words in sentences.
-        queue_factor : int, optional
-            Multiplier for size of queue -> size = number of workers * queue_factor.
-        report_delay : float, optional
-            Seconds to wait before reporting progress.
-        compute_loss : bool, optional
-            If True, loss will be computed while training the Word2Vec model and stored in
-            :attr:`~gensim.models.base_any2vec.BaseWordEmbeddingsModel.running_training_loss`.
-        callbacks : list of :class:`~gensim.models.callbacks.CallbackAny2Vec`, optional
-            List of callbacks that need to be executed/run at specific stages during training.
-        **kwargs : object
-            Additional key word parameters for the specific model inheriting from this class.
-
-        Returns
-        -------
-        (int, int)
-            Tuple of (effective word count after ignoring unknown words and sentence length trimming, total word count).
-
-        """
-
-        self.alpha = start_alpha or self.alpha
-        self.min_alpha = end_alpha or self.min_alpha
-        self.compute_loss = compute_loss
-        self.running_training_loss = 0.0
-        return super(BaseWordEmbeddingsModel, self).train(
-            data_iterable=sentences, corpus_file=corpus_file, total_examples=total_examples,
-            total_words=total_words, epochs=epochs, start_alpha=start_alpha, end_alpha=end_alpha, word_count=word_count,
-            queue_factor=queue_factor, report_delay=report_delay, compute_loss=compute_loss, callbacks=callbacks,
-            **kwargs)
-
-    def _get_job_params(self, cur_epoch):
-        """Get the learning rate used in the current epoch.
-
-        Parameters
-        ----------
-        cur_epoch : int
-            Current iteration through the corpus
-
-        Returns
-        -------
-        float
-            The learning rate for this epoch (it is linearly reduced with epochs from `self.alpha` to `self.min_alpha`).
-
-        """
-        alpha = self.alpha - ((self.alpha - self.min_alpha) * float(cur_epoch) / self.epochs)
-        return alpha
-
-    def _update_job_params(self, job_params, epoch_progress, cur_epoch):
-        """Get the correct learning rate for the next iteration.
-
-        Parameters
-        ----------
-        job_params : dict of (str, obj)
-            UNUSED.
-        epoch_progress : float
-            Ratio of finished work in the current epoch.
-        cur_epoch : int
-            Number of current iteration.
-
-        Returns
-        -------
-        float
-            The learning rate to be used in the next training epoch.
-
-        """
-        start_alpha = self.alpha
-        end_alpha = self.min_alpha
-        progress = (cur_epoch + epoch_progress) / self.epochs
-        next_alpha = start_alpha - (start_alpha - end_alpha) * progress
-        next_alpha = max(end_alpha, next_alpha)
-        self.min_alpha_yet_reached = next_alpha
-        return next_alpha
-
-    def _get_thread_working_mem(self):
-        """Computes the memory used per worker thread.
-
-        Returns
-        -------
-        (np.ndarray, np.ndarray)
-            Each worker threads private work memory.
-
-        """
-        work = matutils.zeros_aligned(self.trainables.layer1_size, dtype=REAL)  # per-thread private work memory
-        neu1 = matutils.zeros_aligned(self.trainables.layer1_size, dtype=REAL)
-        return work, neu1
-
-    def _raw_word_count(self, job):
-        """Get the number of words in a given job.
-
-        Parameters
-        ----------
-        job: iterable of list of str
-            The corpus chunk processed in a single batch.
-
-        Returns
-        -------
-        int
-            Number of raw words in the corpus chunk.
-
-        """
-        return sum(len(sentence) for sentence in job)
-
-    def _check_training_sanity(self, epochs=None, total_examples=None, total_words=None, **kwargs):
-        """Checks whether the training parameters make sense.
-
-        Called right before training starts in :meth:`~gensim.models.base_any2vec.BaseWordEmbeddingsModel.train`
-        and raises warning or errors depending on the severity of the issue in case an inconsistent parameter
-        combination is detected.
-
-        Parameters
-        ----------
-        epochs : int, optional
-            Number of training epochs. Must have a (non None) value.
-        total_examples : int, optional
-            Number of documents in the corpus. Either `total_examples` or `total_words` **must** be supplied.
-        total_words : int, optional
-            Number of words in the corpus. Either `total_examples` or `total_words` **must** be supplied.
-        **kwargs : object
-            Unused. Present to preserve signature among base and inherited implementations.
-
-        Raises
-        ------
-        RuntimeError
-            If one of the required training pre/post processing steps have not been performed.
-        ValueError
-            If the combination of input parameters is inconsistent.
-
-        """
-        if self.alpha > self.min_alpha_yet_reached:
-            logger.warning("Effective 'alpha' higher than previous training cycles")
-        if self.model_trimmed_post_training:
-            raise RuntimeError("Parameters for training were discarded using model_trimmed_post_training method")
-
-        if not self.wv.vocab:  # should be set by `build_vocab`
-            raise RuntimeError("you must first build vocabulary before training the model")
-        if not len(self.wv.vectors):
-            raise RuntimeError("you must initialize vectors before training the model")
-
-        if not hasattr(self, 'corpus_count'):
-            raise ValueError(
-                "The number of examples in the training corpus is missing. "
-                "Please make sure this is set inside `build_vocab` function."
-                "Call the `build_vocab` function before calling `train`."
-            )
-
-        if total_words is None and total_examples is None:
-            raise ValueError(
-                "You must specify either total_examples or total_words, for proper job parameters updation"
-                "and progress calculations. "
-                "The usual value is total_examples=model.corpus_count."
-            )
-        if epochs is None:
-            raise ValueError("You must specify an explict epochs count. The usual value is epochs=model.epochs.")
-        logger.info(
-            "training model with %i workers on %i vocabulary and %i features, "
-            "using sg=%s hs=%s sample=%s negative=%s window=%s",
-            self.workers, len(self.wv.vocab), self.trainables.layer1_size, self.sg,
-            self.hs, self.vocabulary.sample, self.negative, self.window
-        )
-
-    @classmethod
-    def load(cls, *args, **kwargs):
-        """Load a previously saved object (using :meth:`~gensim.models.base_any2vec.BaseWordEmbeddingsModel.save`) from file.
-
-        Also initializes extra instance attributes in case the loaded model does not include them.
-        `*args` or `**kwargs` **MUST** include the fname argument (path to saved file).
-        See :meth:`~gensim.utils.SaveLoad.load`.
-
-        Parameters
-        ----------
-        *args : object
-            Positional arguments passed to :meth:`~gensim.utils.SaveLoad.load`.
-        **kwargs : object
-            Key word arguments passed to :meth:`~gensim.utils.SaveLoad.load`.
-
-        See Also
-        --------
-        :meth:`~gensim.models.base_any2vec.BaseWordEmbeddingsModel.save`
-            Method for save a model.
-
-        Returns
-        -------
-        :class:`~gensim.models.base_any2vec.BaseWordEmbeddingsModel`
-            Model loaded from disk.
-
-        Raises
-        ------
-        IOError
-            When methods are called on instance (should be called from class).
-
-        """
-        model = super(BaseWordEmbeddingsModel, cls).load(*args, **kwargs)
-        if not hasattr(model, 'ns_exponent'):
-            model.ns_exponent = 0.75
-        if not hasattr(model.vocabulary, 'ns_exponent'):
-            model.vocabulary.ns_exponent = 0.75
-        if model.negative and hasattr(model.wv, 'index2word'):
-            model.vocabulary.make_cum_table(model.wv)  # rebuild cum_table from vocabulary
-        if not hasattr(model, 'corpus_count'):
-            model.corpus_count = None
-        if not hasattr(model, 'corpus_total_words'):
-            model.corpus_total_words = None
-        if not hasattr(model.trainables, 'vectors_lockf') and hasattr(model.wv, 'vectors'):
-            model.trainables.vectors_lockf = ones(len(model.wv.vectors), dtype=REAL)
-        if not hasattr(model, 'random'):
-            model.random = random.RandomState(model.trainables.seed)
-        if not hasattr(model, 'train_count'):
-            model.train_count = 0
-            model.total_train_time = 0
-        return model
-
-    def _log_progress(self, job_queue, progress_queue, cur_epoch, example_count, total_examples,
-                      raw_word_count, total_words, trained_word_count, elapsed):
-        """Callback used to log progress for long running jobs.
-
-        Parameters
-        ----------
-        job_queue : Queue of (list of object, dict of (str, float))
-            The queue of jobs still to be performed by workers. Each job is represented as a tuple containing
-            the batch of data to be processed and the parameters to be used for the processing as a dict.
-        progress_queue : Queue of (int, int, int)
-            A queue of progress reports. Each report is represented as a tuple of these 3 elements:
-                * size of data chunk processed, for example number of sentences in the corpus chunk.
-                * Effective word count used in training (after ignoring unknown words and trimming the sentence length).
-                * Total word count used in training.
-        cur_epoch : int
-            The current training iteration through the corpus.
-        example_count : int
-            Number of examples (could be sentences for example) processed until now.
-        total_examples : int
-            Number of all examples present in the input corpus.
-        raw_word_count : int
-            Number of words used in training until now.
-        total_words : int
-            Number of all words in the input corpus.
-        trained_word_count : int
-            Number of effective words used in training until now (after ignoring unknown words and trimming
-            the sentence length).
-        elapsed : int
-            Elapsed time since the beginning of training in seconds.
-
-        Notes
-        -----
-        If you train the model via `corpus_file` argument, there is no job_queue, so reported job_queue size will
-        always be equal to -1.
-
-        """
-        if total_examples:
-            # examples-based progress %
-            logger.info(
-                "EPOCH %i - PROGRESS: at %.2f%% examples, %.0f words/s, in_qsize %i, out_qsize %i",
-                cur_epoch + 1, 100.0 * example_count / total_examples, trained_word_count / elapsed,
-                -1 if job_queue is None else utils.qsize(job_queue), utils.qsize(progress_queue)
-            )
-        else:
-            # words-based progress %
-            logger.info(
-                "EPOCH %i - PROGRESS: at %.2f%% words, %.0f words/s, in_qsize %i, out_qsize %i",
-                cur_epoch + 1, 100.0 * raw_word_count / total_words, trained_word_count / elapsed,
-                -1 if job_queue is None else utils.qsize(job_queue), utils.qsize(progress_queue)
-            )
-
-    def _log_epoch_end(self, cur_epoch, example_count, total_examples, raw_word_count, total_words,
-                       trained_word_count, elapsed, is_corpus_file_mode):
-        """Callback used to log the end of a training epoch.
-
-        Parameters
-        ----------
-        cur_epoch : int
-            The current training iteration through the corpus.
-        example_count : int
-            Number of examples (could be sentences for example) processed until now.
-        total_examples : int
-            Number of all examples present in the input corpus.
-        raw_word_count : int
-            Number of words used in training until now.
-        total_words : int
-            Number of all words in the input corpus.
-        trained_word_count : int
-            Number of effective words used in training until now (after ignoring unknown words and trimming
-            the sentence length).
-        elapsed : int
-            Elapsed time since the beginning of training in seconds.
-        is_corpus_file_mode : bool
-            Whether training is file-based (corpus_file argument) or not.
-
-        Warnings
-        --------
-        In case the corpus is changed while the epoch was running.
-
-        """
-        logger.info(
-            "EPOCH - %i : training on %i raw words (%i effective words) took %.1fs, %.0f effective words/s",
-            cur_epoch + 1, raw_word_count, trained_word_count, elapsed, trained_word_count / elapsed
-        )
-
-        # don't warn if training in file-based mode, because it's expected behavior
-        if is_corpus_file_mode:
-            return
-
-        # check that the input corpus hasn't changed during iteration
-        if total_examples and total_examples != example_count:
-            logger.warning(
-                "EPOCH - %i : supplied example count (%i) did not equal expected count (%i)", cur_epoch + 1,
-                example_count, total_examples
-            )
-        if total_words and total_words != raw_word_count:
-            logger.warning(
-                "EPOCH - %i : supplied raw word count (%i) did not equal expected count (%i)", cur_epoch + 1,
-                raw_word_count, total_words
-            )
-
-    def _log_train_end(self, raw_word_count, trained_word_count, total_elapsed, job_tally):
-        """Callback to log the end of training.
-
-        Parameters
-        ----------
-        raw_word_count : int
-            Number of words used in the whole training.
-        trained_word_count : int
-            Number of effective words used in training (after ignoring unknown words and trimming the sentence length).
-        total_elapsed : int
-            Total time spent during training in seconds.
-        job_tally : int
-            Total number of jobs processed during training.
-
-        """
-        logger.info(
-            "training on a %i raw words (%i effective words) took %.1fs, %.0f effective words/s",
-            raw_word_count, trained_word_count, total_elapsed, trained_word_count / total_elapsed
-        )
diff --git a/gensim/models/callbacks.py b/gensim/models/callbacks.py
index cfa29d1998..27dbca4dce 100644
--- a/gensim/models/callbacks.py
+++ b/gensim/models/callbacks.py
@@ -569,7 +569,7 @@ def on_epoch_end(self, epoch, topics=None):
 
 
 class CallbackAny2Vec(object):
-    """Base class to build callbacks for :class:`~gensim.models.base_any2vec.BaseWordEmbeddingsModel`.
+    """Base class to build callbacks for :class:`~gensim.models.word2vec.Word2Vec` & subclasses.
 
     Callbacks are used to apply custom functions over the model at specific points
     during training (epoch start, batch end etc.). This is a base class and its purpose is to be inherited by
@@ -584,7 +584,7 @@ def on_epoch_begin(self, model):
 
         Parameters
         ----------
-        model : :class:`~gensim.models.base_any2vec.BaseWordEmbeddingsModel`
+        model : :class:`~gensim.models.word2vec.Word2Vec` or subclass
             Current model.
 
         """
@@ -595,7 +595,7 @@ def on_epoch_end(self, model):
 
         Parameters
         ----------
-        model : :class:`~gensim.models.base_any2vec.BaseWordEmbeddingsModel`
+        model : :class:`~gensim.models.word2vec.Word2Vec` or subclass
             Current model.
 
         """
@@ -606,7 +606,7 @@ def on_batch_begin(self, model):
 
         Parameters
         ----------
-        model : :class:`~gensim.models.base_any2vec.BaseWordEmbeddingsModel`
+        model : :class:`~gensim.models.word2vec.Word2Vec` or subclass
             Current model.
 
         """
@@ -617,7 +617,7 @@ def on_batch_end(self, model):
 
         Parameters
         ----------
-        model : :class:`~gensim.models.base_any2vec.BaseWordEmbeddingsModel`
+        model : :class:`~gensim.models.word2vec.Word2Vec` or subclass
             Current model.
 
         """
@@ -628,7 +628,7 @@ def on_train_begin(self, model):
 
         Parameters
         ----------
-        model : :class:`~gensim.models.base_any2vec.BaseWordEmbeddingsModel`
+        model : :class:`~gensim.models.word2vec.Word2Vec` or subclass
             Current model.
 
         """
@@ -639,7 +639,7 @@ def on_train_end(self, model):
 
         Parameters
         ----------
-        model : :class:`~gensim.models.base_any2vec.BaseWordEmbeddingsModel`
+        model : :class:`~gensim.models.word2vec.Word2Vec` or subclass
             Current model.
 
         """
diff --git a/gensim/models/deprecated/__init__.py b/gensim/models/deprecated/__init__.py
deleted file mode 100644
index cfa71654f5..0000000000
--- a/gensim/models/deprecated/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-"""This package contains some deprecated implementations of algorithm, will be removed soon."""
diff --git a/gensim/models/deprecated/doc2vec.py b/gensim/models/deprecated/doc2vec.py
deleted file mode 100644
index 41f74fdc6b..0000000000
--- a/gensim/models/deprecated/doc2vec.py
+++ /dev/null
@@ -1,1044 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (C) 2013 Radim Rehurek <me@radimrehurek.com>
-# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html
-
-
-"""
-Warnings
---------
-.. deprecated:: 3.3.0
-   Use :mod:`gensim.models.doc2vec` instead.
-
-
-
-Deep learning via the distributed memory and distributed bag of words models from
-[1]_, using either hierarchical softmax or negative sampling [2]_ [3]_. See [#tutorial]_
-
-**Make sure you have a C compiler before installing gensim, to use optimized (compiled)
-doc2vec training** (70x speedup [blog]_).
-
-Initialize a model with e.g.::
-
-.. sourcecode:: pycon
-
-    >>> model = Doc2Vec(documents, size=100, window=8, min_count=5, workers=4)
-
-Persist a model to disk with::
-
-.. sourcecode:: pycon
-
-    >>> model.save(fname)
-    >>> model = Doc2Vec.load(fname)  # you can continue training with the loaded model!
-
-If you're finished training a model (=no more updates, only querying), you can do
-
-.. sourcecode:: pycon
-
-    >>> model.delete_temporary_training_data(keep_doctags_vectors=True, keep_inference=True):
-
-to trim unneeded model memory = use (much) less RAM.
-
-
-
-.. [1] Quoc Le and Tomas Mikolov. Distributed Representations of Sentences and Documents.
-       http://arxiv.org/pdf/1405.4053v2.pdf
-.. [2] Tomas Mikolov, Kai Chen, Greg Corrado, and Jeffrey Dean.
-       Efficient Estimation of Word Representations in Vector Space. In Proceedings of Workshop at ICLR, 2013.
-.. [3] Tomas Mikolov, Ilya Sutskever, Kai Chen, Greg Corrado, and Jeffrey Dean.
-       Distributed Representations of Words and Phrases and their Compositionality. In Proceedings of NIPS, 2013.
-.. [blog] Optimizing word2vec in gensim, http://radimrehurek.com/2013/09/word2vec-in-python-part-two-optimizing/
-
-.. [#tutorial] Doc2vec in gensim tutorial,
-               https://github.com/RaRe-Technologies/gensim/blob/develop/docs/notebooks/doc2vec-lee.ipynb
-
-
-
-"""
-
-import logging
-import os
-
-try:
-    from queue import Queue
-except ImportError:
-    from Queue import Queue  # noqa:F401
-
-from collections import namedtuple, defaultdict
-from timeit import default_timer
-
-from numpy import zeros, sum as np_sum, add as np_add, concatenate, \
-    repeat as np_repeat, array, float32 as REAL, empty, ones, memmap as np_memmap, \
-    sqrt, newaxis, ndarray, dot, vstack, dtype, divide as np_divide, integer
-
-from gensim import utils
-from gensim.utils import call_on_class_only, deprecated
-from gensim.models.deprecated.word2vec import Word2Vec, train_cbow_pair, train_sg_pair, train_batch_sg,\
-    MAX_WORDS_IN_BATCH
-from gensim.models.deprecated.keyedvectors import KeyedVectors
-from gensim.models.doc2vec import Doc2Vec as NewDoc2Vec
-from gensim.models.deprecated.old_saveload import SaveLoad
-
-from gensim import matutils  # utility fnc for pickling, common scipy operations etc
-from six.moves import zip, range
-from six import string_types, integer_types
-
-logger = logging.getLogger(__name__)
-
-
-def load_old_doc2vec(*args, **kwargs):
-    old_model = Doc2Vec.load(*args, **kwargs)
-    params = {
-        'dm_mean': old_model.__dict__.get('dm_mean', None),
-        'dm': old_model.dm,
-        'dbow_words': old_model.dbow_words,
-        'dm_concat': old_model.dm_concat,
-        'dm_tag_count': old_model.dm_tag_count,
-        'docvecs_mapfile': old_model.__dict__.get('docvecs_mapfile', None),
-        'comment': old_model.__dict__.get('comment', None),
-        'vector_size': old_model.vector_size,
-        'alpha': old_model.alpha,
-        'window': old_model.window,
-        'min_count': old_model.min_count,
-        'max_vocab_size': old_model.__dict__.get('max_vocab_size', None),
-        'sample': old_model.sample,
-        'seed': old_model.seed,
-        'workers': old_model.workers,
-        'min_alpha': old_model.min_alpha,
-        'hs': old_model.hs,
-        'negative': old_model.negative,
-        'cbow_mean': old_model.cbow_mean,
-        'hashfxn': old_model.hashfxn,
-        'epochs': old_model.iter,
-        'sorted_vocab': old_model.__dict__.get('sorted_vocab', 1),
-        'batch_words': old_model.__dict__.get('batch_words', MAX_WORDS_IN_BATCH),
-        'compute_loss': old_model.__dict__.get('compute_loss', None)
-    }
-    new_model = NewDoc2Vec(**params)
-    # set word2vec trainables attributes
-    new_model.wv.vectors = old_model.wv.syn0
-    if hasattr(old_model.wv, 'syn0norm'):
-        new_model.docvecs.vectors_norm = old_model.wv.syn0norm
-    if hasattr(old_model, 'syn1'):
-        new_model.trainables.syn1 = old_model.syn1
-    if hasattr(old_model, 'syn1neg'):
-        new_model.trainables.syn1neg = old_model.syn1neg
-    if hasattr(old_model, 'syn0_lockf'):
-        new_model.trainables.vectors_lockf = old_model.syn0_lockf
-
-    # set doc2vec trainables attributes
-    new_model.docvecs.vectors_docs = old_model.docvecs.doctag_syn0
-    if hasattr(old_model.docvecs, 'doctag_syn0norm'):
-        new_model.docvecs.vectors_docs_norm = old_model.docvecs.doctag_syn0norm
-    if hasattr(old_model.docvecs, 'doctag_syn0_lockf'):
-        new_model.trainables.vectors_docs_lockf = old_model.docvecs.doctag_syn0_lockf
-    if hasattr(old_model.docvecs, 'mapfile_path'):
-        new_model.docvecs.mapfile_path = old_model.docvecs.mapfile_path
-
-    # set word2vec vocabulary attributes
-    new_model.wv.vocab = old_model.wv.vocab
-    new_model.wv.index2word = old_model.wv.index2word
-    new_model.vocabulary.cum_table = old_model.cum_table
-
-    # set doc2vec vocabulary attributes
-    new_model.docvecs.doctags = old_model.docvecs.doctags
-    new_model.docvecs.count = old_model.docvecs.count
-    if hasattr(old_model.docvecs, 'max_rawint'):  # `doc2vec` models before `0.12.3` do not have these 2 attributes
-        new_model.docvecs.max_rawint = old_model.docvecs.__dict__.get('max_rawint')
-        new_model.docvecs.offset2doctag = old_model.docvecs.__dict__.get('offset2doctag')
-    else:
-        # Doc2Vec models before Gensim version 0.12.3 did not have `max_rawint` and `offset2doctag` as they did not
-        # mixing of string and int tags. This implies the new attribute `offset2doctag` equals the old `index2doctag`
-        # (which was only filled if the documents had string tags).
-        # This also implies that the new attribute, `max_rawint`(highest rawint-indexed doctag) would either be equal
-        # to the initial value -1, in case only string tags are used or would be equal to `count` if only int indexing
-        # was used.
-        new_model.docvecs.max_rawint = -1 if old_model.docvecs.index2doctag else old_model.docvecs.count - 1
-        new_model.docvecs.offset2doctag = old_model.docvecs.index2doctag
-    # now upconvert that to gensim-4.0.0+
-    new_model.docvecs._upconvert_old_d2vkv()
-
-    new_model.train_count = old_model.__dict__.get('train_count', None)
-    new_model.corpus_count = old_model.__dict__.get('corpus_count', None)
-    new_model.corpus_total_words = old_model.__dict__.get('corpus_total_words', None)
-    new_model.running_training_loss = old_model.__dict__.get('running_training_loss', 0)
-    new_model.total_train_time = old_model.__dict__.get('total_train_time', None)
-    new_model.min_alpha_yet_reached = old_model.__dict__.get('min_alpha_yet_reached', old_model.alpha)
-    new_model.model_trimmed_post_training = old_model.__dict__.get('model_trimmed_post_training', None)
-
-    return new_model
-
-
-def train_document_dbow(model, doc_words, doctag_indexes, alpha, work=None,
-                        train_words=False, learn_doctags=True, learn_words=True, learn_hidden=True,
-                        word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None):
-    """
-    Update distributed bag of words model ("PV-DBOW") by training on a single document.
-
-    Called internally from `Doc2Vec.train()` and `Doc2Vec.infer_vector()`.
-
-    The document is provided as `doc_words`, a list of word tokens which are looked up
-    in the model's vocab dictionary, and `doctag_indexes`, which provide indexes
-    into the doctag_vectors array.
-
-    If `train_words` is True, simultaneously train word-to-word (not just doc-to-word)
-    examples, exactly as per Word2Vec skip-gram training. (Without this option,
-    word vectors are neither consulted nor updated during DBOW doc vector training.)
-
-    Any of `learn_doctags', `learn_words`, and `learn_hidden` may be set False to
-    prevent learning-updates to those respective model weights, as if using the
-    (partially-)frozen model to infer other compatible vectors.
-
-    This is the non-optimized, Python version. If you have cython installed, gensim
-    will use the optimized version from doc2vec_inner instead.
-
-    """
-    if doctag_vectors is None:
-        doctag_vectors = model.docvecs.doctag_syn0
-    if doctag_locks is None:
-        doctag_locks = model.docvecs.doctag_syn0_lockf
-
-    if train_words and learn_words:
-        train_batch_sg(model, [doc_words], alpha, work)
-    for doctag_index in doctag_indexes:
-        for word in doc_words:
-            train_sg_pair(
-                model, word, doctag_index, alpha, learn_vectors=learn_doctags, learn_hidden=learn_hidden,
-                context_vectors=doctag_vectors, context_locks=doctag_locks
-            )
-
-    return len(doc_words)
-
-
-def train_document_dm(model, doc_words, doctag_indexes, alpha, work=None, neu1=None,
-                      learn_doctags=True, learn_words=True, learn_hidden=True,
-                      word_vectors=None, word_locks=None, doctag_vectors=None, doctag_locks=None):
-    """
-    Update distributed memory model ("PV-DM") by training on a single document.
-
-    Called internally from `Doc2Vec.train()` and `Doc2Vec.infer_vector()`. This
-    method implements the DM model with a projection (input) layer that is
-    either the sum or mean of the context vectors, depending on the model's
-    `dm_mean` configuration field.  See `train_document_dm_concat()` for the DM
-    model with a concatenated input layer.
-
-    The document is provided as `doc_words`, a list of word tokens which are looked up
-    in the model's vocab dictionary, and `doctag_indexes`, which provide indexes
-    into the doctag_vectors array.
-
-    Any of `learn_doctags', `learn_words`, and `learn_hidden` may be set False to
-    prevent learning-updates to those respective model weights, as if using the
-    (partially-)frozen model to infer other compatible vectors.
-
-    This is the non-optimized, Python version. If you have a C compiler, gensim
-    will use the optimized version from doc2vec_inner instead.
-
-    """
-    if word_vectors is None:
-        word_vectors = model.wv.syn0
-    if word_locks is None:
-        word_locks = model.syn0_lockf
-    if doctag_vectors is None:
-        doctag_vectors = model.docvecs.doctag_syn0
-    if doctag_locks is None:
-        doctag_locks = model.docvecs.doctag_syn0_lockf
-
-    word_vocabs = [model.wv.vocab[w] for w in doc_words if w in model.wv.vocab
-                   and model.wv.vocab[w].sample_int > model.random.rand() * 2**32]
-
-    for pos, word in enumerate(word_vocabs):
-        reduced_window = model.random.randint(model.window)  # `b` in the original doc2vec code
-        start = max(0, pos - model.window + reduced_window)
-        window_pos = enumerate(word_vocabs[start:(pos + model.window + 1 - reduced_window)], start)
-        word2_indexes = [word2.index for pos2, word2 in window_pos if pos2 != pos]
-        l1 = np_sum(word_vectors[word2_indexes], axis=0) + np_sum(doctag_vectors[doctag_indexes], axis=0)
-        count = len(word2_indexes) + len(doctag_indexes)
-        if model.cbow_mean and count > 1:
-            l1 /= count
-        neu1e = train_cbow_pair(model, word, word2_indexes, l1, alpha,
-                                learn_vectors=False, learn_hidden=learn_hidden)
-        if not model.cbow_mean and count > 1:
-            neu1e /= count
-        if learn_doctags:
-            for i in doctag_indexes:
-                doctag_vectors[i] += neu1e * doctag_locks[i]
-        if learn_words:
-            for i in word2_indexes:
-                word_vectors[i] += neu1e * word_locks[i]
-
-    return len(word_vocabs)
-
-
-def train_document_dm_concat(model, doc_words, doctag_indexes, alpha, work=None, neu1=None, learn_doctags=True,
-                             learn_words=True, learn_hidden=True, word_vectors=None, word_locks=None,
-                             doctag_vectors=None, doctag_locks=None):
-    """
-    Update distributed memory model ("PV-DM") by training on a single document, using a
-    concatenation of the context window word vectors (rather than a sum or average).
-
-    Called internally from `Doc2Vec.train()` and `Doc2Vec.infer_vector()`.
-
-    The document is provided as `doc_words`, a list of word tokens which are looked up
-    in the model's vocab dictionary, and `doctag_indexes`, which provide indexes
-    into the doctag_vectors array.
-
-    Any of `learn_doctags', `learn_words`, and `learn_hidden` may be set False to
-    prevent learning-updates to those respective model weights, as if using the
-    (partially-)frozen model to infer other compatible vectors.
-
-    This is the non-optimized, Python version. If you have a C compiler, gensim
-    will use the optimized version from doc2vec_inner instead.
-
-    """
-    if word_vectors is None:
-        word_vectors = model.wv.syn0
-    if word_locks is None:
-        word_locks = model.syn0_lockf
-    if doctag_vectors is None:
-        doctag_vectors = model.docvecs.doctag_syn0
-    if doctag_locks is None:
-        doctag_locks = model.docvecs.doctag_syn0_lockf
-
-    word_vocabs = [model.wv.vocab[w] for w in doc_words if w in model.wv.vocab
-                   and model.wv.vocab[w].sample_int > model.random.rand() * 2**32]
-    doctag_len = len(doctag_indexes)
-    if doctag_len != model.dm_tag_count:
-        return 0  # skip doc without expected number of doctag(s) (TODO: warn/pad?)
-
-    null_word = model.wv.vocab['\0']
-    pre_pad_count = model.window
-    post_pad_count = model.window
-    padded_document_indexes = (
-        (pre_pad_count * [null_word.index])  # pre-padding
-        + [word.index for word in word_vocabs if word is not None]  # elide out-of-Vocabulary words
-        + (post_pad_count * [null_word.index])  # post-padding
-    )
-
-    for pos in range(pre_pad_count, len(padded_document_indexes) - post_pad_count):
-        word_context_indexes = (
-            padded_document_indexes[(pos - pre_pad_count): pos]  # preceding words
-            + padded_document_indexes[(pos + 1):(pos + 1 + post_pad_count)]  # following words
-        )
-        predict_word = model.wv.vocab[model.wv.index2word[padded_document_indexes[pos]]]
-        # numpy advanced-indexing copies; concatenate, flatten to 1d
-        l1 = concatenate((doctag_vectors[doctag_indexes], word_vectors[word_context_indexes])).ravel()
-        neu1e = train_cbow_pair(model, predict_word, None, l1, alpha,
-                                learn_hidden=learn_hidden, learn_vectors=False)
-
-        # filter by locks and shape for addition to source vectors
-        e_locks = concatenate((doctag_locks[doctag_indexes], word_locks[word_context_indexes]))
-        neu1e_r = (neu1e.reshape(-1, model.vector_size)
-                   * np_repeat(e_locks, model.vector_size).reshape(-1, model.vector_size))
-
-        if learn_doctags:
-            np_add.at(doctag_vectors, doctag_indexes, neu1e_r[:doctag_len])
-        if learn_words:
-            np_add.at(word_vectors, word_context_indexes, neu1e_r[doctag_len:])
-
-    return len(padded_document_indexes) - pre_pad_count - post_pad_count
-
-
-class TaggedDocument(namedtuple('TaggedDocument', 'words tags')):
-    """
-    A single document, made up of `words` (a list of unicode string tokens)
-    and `tags` (a list of tokens). Tags may be one or more unicode string
-    tokens, but typical practice (which will also be most memory-efficient) is
-    for the tags list to include a unique integer id as the only tag.
-
-    Replaces "sentence as a list of words" from Word2Vec.
-
-    """
-
-    def __str__(self):
-        return '%s(%s, %s)' % (self.__class__.__name__, self.words, self.tags)
-
-
-# for compatibility
-@deprecated("Class will be removed in 4.0.0, use TaggedDocument instead")
-class LabeledSentence(TaggedDocument):
-    pass
-
-
-class DocvecsArray(SaveLoad):
-    """
-    Default storage of doc vectors during/after training, in a numpy array.
-
-    As the 'docvecs' property of a Doc2Vec model, allows access and
-    comparison of document vectors.
-
-    .. sourcecode:: pycon
-
-        >>> docvec = d2v_model.docvecs[99]
-        >>> docvec = d2v_model.docvecs['SENT_99']  # if string tag used in training
-        >>> sims = d2v_model.docvecs.most_similar(99)
-        >>> sims = d2v_model.docvecs.most_similar('SENT_99')
-        >>> sims = d2v_model.docvecs.most_similar(docvec)
-
-    If only plain int tags are presented during training, the dict (of
-    string tag -> index) and list (of index -> string tag) stay empty,
-    saving memory.
-
-    Supplying a mapfile_path (as by initializing a Doc2Vec model with a
-    'docvecs_mapfile' value) will use a pair of memory-mapped
-    files as the array backing for doctag_syn0/doctag_syn0_lockf values.
-
-    The Doc2Vec model automatically uses this class, but a future alternative
-    implementation, based on another persistence mechanism like LMDB, LevelDB,
-    or SQLite, should also be possible.
-    """
-
-    def __init__(self, mapfile_path=None):
-        self.doctags = {}  # string -> Doctag (only filled if necessary)
-        self.max_rawint = -1  # highest rawint-indexed doctag
-        self.offset2doctag = []  # int offset-past-(max_rawint+1) -> String (only filled if necessary)
-        self.count = 0
-        self.mapfile_path = mapfile_path
-
-    def note_doctag(self, key, document_no, document_length):
-        """Note a document tag during initial corpus scan, for structure sizing."""
-        if isinstance(key, integer_types + (integer,)):
-            self.max_rawint = max(self.max_rawint, key)
-        else:
-            if key in self.doctags:
-                self.doctags[key] = self.doctags[key].repeat(document_length)
-            else:
-                self.doctags[key] = Doctag(len(self.offset2doctag), document_length, 1)
-                self.offset2doctag.append(key)
-        self.count = self.max_rawint + 1 + len(self.offset2doctag)
-
-    def indexed_doctags(self, doctag_tokens):
-        """Return indexes and backing-arrays used in training examples."""
-        return ([self._int_index(index) for index in doctag_tokens if index in self],
-                self.doctag_syn0, self.doctag_syn0_lockf, doctag_tokens)
-
-    def trained_item(self, indexed_tuple):
-        """Persist any changes made to the given indexes (matching tuple previously
-        returned by indexed_doctags()); a no-op for this implementation"""
-        pass
-
-    def _int_index(self, index):
-        """Return int index for either string or int index"""
-        if isinstance(index, integer_types + (integer,)):
-            return index
-        else:
-            return self.max_rawint + 1 + self.doctags[index].offset
-
-    @deprecated("Method will be removed in 4.0.0, use self.index_to_doctag instead")
-    def _key_index(self, i_index, missing=None):
-        """Return string index for given int index, if available"""
-        return self.index_to_doctag(i_index)
-
-    def index_to_doctag(self, i_index):
-        """Return string key for given i_index, if available. Otherwise return raw int doctag (same int)."""
-        candidate_offset = i_index - self.max_rawint - 1
-        if 0 <= candidate_offset < len(self.offset2doctag):
-            return self.offset2doctag[candidate_offset]
-        else:
-            return i_index
-
-    def __getitem__(self, index):
-        """
-        Accept a single key (int or string tag) or list of keys as input.
-
-        If a single string or int, return designated tag's vector
-        representation, as a 1D numpy array.
-
-        If a list, return designated tags' vector representations as a
-        2D numpy array: #tags x #vector_size.
-        """
-        if isinstance(index, string_types + integer_types + (integer,)):
-            return self.doctag_syn0[self._int_index(index)]
-
-        return vstack([self[i] for i in index])
-
-    def __len__(self):
-        return self.count
-
-    def __contains__(self, index):
-        if isinstance(index, integer_types + (integer,)):
-            return index < self.count
-        else:
-            return index in self.doctags
-
-    def save(self, *args, **kwargs):
-        # don't bother storing the cached normalized vectors
-        kwargs['ignore'] = kwargs.get('ignore', ['syn0norm'])
-        super(DocvecsArray, self).save(*args, **kwargs)
-
-    def borrow_from(self, other_docvecs):
-        self.count = other_docvecs.count
-        self.doctags = other_docvecs.doctags
-        self.offset2doctag = other_docvecs.offset2doctag
-
-    def clear_sims(self):
-        self.doctag_syn0norm = None
-
-    def estimated_lookup_memory(self):
-        """Estimated memory for tag lookup; 0 if using pure int tags."""
-        return 60 * len(self.offset2doctag) + 140 * len(self.doctags)
-
-    def reset_weights(self, model):
-        length = max(len(self.doctags), self.count)
-        if self.mapfile_path:
-            self.doctag_syn0 = np_memmap(
-                self.mapfile_path + '.doctag_syn0', dtype=REAL, mode='w+', shape=(length, model.vector_size)
-            )
-            self.doctag_syn0_lockf = np_memmap(
-                self.mapfile_path + '.doctag_syn0_lockf', dtype=REAL, mode='w+', shape=(length,)
-            )
-            self.doctag_syn0_lockf.fill(1.0)
-        else:
-            self.doctag_syn0 = empty((length, model.vector_size), dtype=REAL)
-            self.doctag_syn0_lockf = ones((length,), dtype=REAL)  # zeros suppress learning
-
-        for i in range(length):
-            # construct deterministic seed from index AND model seed
-            seed = "%d %s" % (model.seed, self.index_to_doctag(i))
-            self.doctag_syn0[i] = model.seeded_vector(seed)
-
-    def init_sims(self, replace=False):
-        """
-        Precompute L2-normalized vectors.
-
-        If `replace` is set, forget the original vectors and only keep the normalized
-        ones = saves lots of memory!
-
-        Note that you **cannot continue training or inference** after doing a replace.
-        The model becomes effectively read-only = you can call `most_similar`, `similarity`
-        etc., but not `train` or `infer_vector`.
-
-        """
-        if getattr(self, 'doctag_syn0norm', None) is None or replace:
-            logger.info("precomputing L2-norms of doc weight vectors")
-            if replace:
-                for i in range(self.doctag_syn0.shape[0]):
-                    self.doctag_syn0[i, :] /= sqrt((self.doctag_syn0[i, :] ** 2).sum(-1))
-                self.doctag_syn0norm = self.doctag_syn0
-            else:
-                if self.mapfile_path:
-                    self.doctag_syn0norm = np_memmap(
-                        self.mapfile_path + '.doctag_syn0norm', dtype=REAL,
-                        mode='w+', shape=self.doctag_syn0.shape)
-                else:
-                    self.doctag_syn0norm = empty(self.doctag_syn0.shape, dtype=REAL)
-                np_divide(self.doctag_syn0, sqrt((self.doctag_syn0 ** 2).sum(-1))[..., newaxis], self.doctag_syn0norm)
-
-    def most_similar(self, positive=None, negative=None, topn=10, clip_start=0, clip_end=None, indexer=None):
-        """
-        Find the top-N most similar docvecs known from training. Positive docs contribute
-        positively towards the similarity, negative docs negatively.
-
-        This method computes cosine similarity between a simple mean of the projection
-        weight vectors of the given docs. Docs may be specified as vectors, integer indexes
-        of trained docvecs, or if the documents were originally presented with string tags,
-        by the corresponding tags.
-
-        The 'clip_start' and 'clip_end' allow limiting results to a particular contiguous
-        range of the underlying doctag_syn0norm vectors. (This may be useful if the ordering
-        there was chosen to be significant, such as more popular tag IDs in lower indexes.)
-        """
-        if positive is None:
-            positive = []
-        if negative is None:
-            negative = []
-
-        self.init_sims()
-        clip_end = clip_end or len(self.doctag_syn0norm)
-
-        if isinstance(positive, string_types + integer_types + (integer,)) and not negative:
-            # allow calls like most_similar('dog'), as a shorthand for most_similar(['dog'])
-            positive = [positive]
-
-        # add weights for each doc, if not already present; default to 1.0 for positive and -1.0 for negative docs
-        positive = [
-            (doc, 1.0) if isinstance(doc, string_types + integer_types + (ndarray, integer))
-            else doc for doc in positive
-        ]
-        negative = [
-            (doc, -1.0) if isinstance(doc, string_types + integer_types + (ndarray, integer))
-            else doc for doc in negative
-        ]
-
-        # compute the weighted average of all docs
-        all_docs, mean = set(), []
-        for doc, weight in positive + negative:
-            if isinstance(doc, ndarray):
-                mean.append(weight * doc)
-            elif doc in self.doctags or doc < self.count:
-                mean.append(weight * self.doctag_syn0norm[self._int_index(doc)])
-                all_docs.add(self._int_index(doc))
-            else:
-                raise KeyError("doc '%s' not in trained set" % doc)
-        if not mean:
-            raise ValueError("cannot compute similarity with no input")
-        mean = matutils.unitvec(array(mean).mean(axis=0)).astype(REAL)
-
-        if indexer is not None:
-            return indexer.most_similar(mean, topn)
-
-        dists = dot(self.doctag_syn0norm[clip_start:clip_end], mean)
-        if not topn:
-            return dists
-        best = matutils.argsort(dists, topn=topn + len(all_docs), reverse=True)
-        # ignore (don't return) docs from the input
-        result = [
-            (self.index_to_doctag(sim + clip_start), float(dists[sim]))
-            for sim in best
-            if (sim + clip_start) not in all_docs
-        ]
-        return result[:topn]
-
-    def doesnt_match(self, docs):
-        """
-        Which doc from the given list doesn't go with the others?
-
-        (TODO: Accept vectors of out-of-training-set docs, as if from inference.)
-
-        """
-        self.init_sims()
-
-        docs = [doc for doc in docs if doc in self.doctags or 0 <= doc < self.count]  # filter out unknowns
-        logger.debug("using docs %s", docs)
-        if not docs:
-            raise ValueError("cannot select a doc from an empty list")
-        vectors = vstack(self.doctag_syn0norm[self._int_index(doc)] for doc in docs).astype(REAL)
-        mean = matutils.unitvec(vectors.mean(axis=0)).astype(REAL)
-        dists = dot(vectors, mean)
-        return sorted(zip(dists, docs))[0][1]
-
-    def similarity(self, d1, d2):
-        """
-        Compute cosine similarity between two docvecs in the trained set, specified by int index or
-        string tag. (TODO: Accept vectors of out-of-training-set docs, as if from inference.)
-
-        """
-        return dot(matutils.unitvec(self[d1]), matutils.unitvec(self[d2]))
-
-    def n_similarity(self, ds1, ds2):
-        """
-        Compute cosine similarity between two sets of docvecs from the trained set, specified by int
-        index or string tag. (TODO: Accept vectors of out-of-training-set docs, as if from inference.)
-
-        """
-        v1 = [self[doc] for doc in ds1]
-        v2 = [self[doc] for doc in ds2]
-        return dot(matutils.unitvec(array(v1).mean(axis=0)), matutils.unitvec(array(v2).mean(axis=0)))
-
-    def similarity_unseen_docs(self, model, doc_words1, doc_words2, alpha=0.1, min_alpha=0.0001, steps=5):
-        """
-        Compute cosine similarity between two post-bulk out of training documents.
-
-        Document should be a list of (word) tokens.
-        """
-        d1 = model.infer_vector(doc_words=doc_words1, alpha=alpha, min_alpha=min_alpha, steps=steps)
-        d2 = model.infer_vector(doc_words=doc_words2, alpha=alpha, min_alpha=min_alpha, steps=steps)
-        return dot(matutils.unitvec(d1), matutils.unitvec(d2))
-
-
-class Doctag(namedtuple('Doctag', 'offset, word_count, doc_count')):
-    """A string document tag discovered during the initial vocabulary
-    scan. (The document-vector equivalent of a Vocab object.)
-
-    Will not be used if all presented document tags are ints.
-
-    The offset is only the true index into the doctags_syn0/doctags_syn0_lockf
-    if-and-only-if no raw-int tags were used. If any raw-int tags were used,
-    string Doctag vectors begin at index (max_rawint + 1), so the true index is
-    (rawint_index + 1 + offset). See also DocvecsArray.index_to_doctag().
-    """
-    __slots__ = ()
-
-    def repeat(self, word_count):
-        return self._replace(word_count=self.word_count + word_count, doc_count=self.doc_count + 1)
-
-
-class Doc2Vec(Word2Vec):
-    """Class for training, using and evaluating neural networks described in http://arxiv.org/pdf/1405.4053v2.pdf"""
-
-    def __init__(self, documents=None, dm_mean=None, dm=1, dbow_words=0, dm_concat=0, dm_tag_count=1,
-                 docvecs=None, docvecs_mapfile=None, comment=None, trim_rule=None, **kwargs):
-        """
-        Initialize the model from an iterable of `documents`. Each document is a
-        TaggedDocument object that will be used for training.
-
-        The `documents` iterable can be simply a list of TaggedDocument elements, but for larger corpora,
-        consider an iterable that streams the documents directly from disk/network.
-
-        If you don't supply `documents`, the model is left uninitialized -- use if
-        you plan to initialize it in some other way.
-
-        `dm` defines the training algorithm. By default (`dm=1`), 'distributed memory' (PV-DM) is used.
-        Otherwise, `distributed bag of words` (PV-DBOW) is employed.
-
-        `size` is the dimensionality of the feature vectors.
-
-        `window` is the maximum distance between the predicted word and context words used for prediction
-        within a document.
-
-        `alpha` is the initial learning rate (will linearly drop to `min_alpha` as training progresses).
-
-        `seed` = for the random number generator.
-        Note that for a fully deterministically-reproducible run, you must also limit the model to
-        a single worker thread, to eliminate ordering jitter from OS thread scheduling. (In Python
-        3, reproducibility between interpreter launches also requires use of the PYTHONHASHSEED
-        environment variable to control hash randomization.)
-
-        `min_count` = ignore all words with total frequency lower than this.
-
-        `max_vocab_size` = limit RAM during vocabulary building; if there are more unique
-        words than this, then prune the infrequent ones. Every 10 million word types
-        need about 1GB of RAM. Set to `None` for no limit (default).
-
-        `sample` = threshold for configuring which higher-frequency words are randomly downsampled;
-                default is 1e-3, values of 1e-5 (or lower) may also be useful, set to 0.0 to disable downsampling.
-
-        `workers` = use this many worker threads to train the model (=faster training with multicore machines).
-
-        `iter` = number of iterations (epochs) over the corpus. The default inherited from Word2Vec is 5,
-        but values of 10 or 20 are common in published 'Paragraph Vector' experiments.
-
-        `hs` = if 1, hierarchical softmax will be used for model training.
-        If set to 0 (default), and `negative` is non-zero, negative sampling will be used.
-
-        `negative` = if > 0, negative sampling will be used, the int for negative
-        specifies how many "noise words" should be drawn (usually between 5-20).
-        Default is 5. If set to 0, no negative samping is used.
-
-        `dm_mean` = if 0 (default), use the sum of the context word vectors. If 1, use the mean.
-        Only applies when dm is used in non-concatenative mode.
-
-        `dm_concat` = if 1, use concatenation of context vectors rather than sum/average;
-        default is 0 (off). Note concatenation results in a much-larger model, as the input
-        is no longer the size of one (sampled or arithmetically combined) word vector, but the
-        size of the tag(s) and all words in the context strung together.
-
-        `dm_tag_count` = expected constant number of document tags per document, when using
-        dm_concat mode; default is 1.
-
-        `dbow_words` if set to 1 trains word-vectors (in skip-gram fashion) simultaneous with DBOW
-        doc-vector training; default is 0 (faster training of doc-vectors only).
-
-        `trim_rule` = vocabulary trimming rule, specifies whether certain words should remain
-        in the vocabulary, be trimmed away, or handled using the default (discard if word count < min_count).
-        Can be None (min_count will be used), or a callable that accepts parameters (word, count, min_count) and
-        returns either util.RULE_DISCARD, util.RULE_KEEP or util.RULE_DEFAULT.
-        Note: The rule, if given, is only used prune vocabulary during build_vocab() and is not stored as part
-        of the model.
-        """
-
-        if 'sentences' in kwargs:
-            raise DeprecationWarning(
-                "Parameter 'sentences' was renamed to 'documents', and will be removed in 4.0.0, "
-                "use 'documents' instead."
-            )
-
-        super(Doc2Vec, self).__init__(
-            sg=(1 + dm) % 2,
-            null_word=dm_concat,
-            **kwargs)
-
-        self.load = call_on_class_only
-
-        if dm_mean is not None:
-            self.cbow_mean = dm_mean
-
-        self.dbow_words = dbow_words
-        self.dm_concat = dm_concat
-        self.dm_tag_count = dm_tag_count
-        if self.dm and self.dm_concat:
-            self.layer1_size = (self.dm_tag_count + (2 * self.window)) * self.vector_size
-
-        self.docvecs = docvecs or DocvecsArray(docvecs_mapfile)
-        self.comment = comment
-        if documents is not None:
-            self.build_vocab(documents, trim_rule=trim_rule)
-            self.train(documents, total_examples=self.corpus_count, epochs=self.iter)
-
-    @property
-    def dm(self):
-        return not self.sg  # opposite of SG
-
-    @property
-    def dbow(self):
-        return self.sg  # same as SG
-
-    def clear_sims(self):
-        super(Doc2Vec, self).clear_sims()
-        self.docvecs.clear_sims()
-
-    def reset_weights(self):
-        if self.dm and self.dm_concat:
-            # expand l1 size to match concatenated tags+words length
-            self.layer1_size = (self.dm_tag_count + (2 * self.window)) * self.vector_size
-            logger.info("using concatenative %d-dimensional layer1", self.layer1_size)
-        super(Doc2Vec, self).reset_weights()
-        self.docvecs.reset_weights(self)
-
-    def reset_from(self, other_model):
-        """Reuse shareable structures from other_model."""
-        self.docvecs.borrow_from(other_model.docvecs)
-        super(Doc2Vec, self).reset_from(other_model)
-
-    def scan_vocab(self, documents, progress_per=10000, trim_rule=None, update=False):
-        logger.info("collecting all words and their counts")
-        document_no = -1
-        total_words = 0
-        min_reduce = 1
-        interval_start = default_timer() - 0.00001  # guard against next sample being identical
-        interval_count = 0
-        checked_string_types = 0
-        vocab = defaultdict(int)
-        for document_no, document in enumerate(documents):
-            if not checked_string_types:
-                if isinstance(document.words, string_types):
-                    logger.warning(
-                        "Each 'words' should be a list of words (usually unicode strings). "
-                        "First 'words' here is instead plain %s.",
-                        type(document.words)
-                    )
-                checked_string_types += 1
-            if document_no % progress_per == 0:
-                interval_rate = (total_words - interval_count) / (default_timer() - interval_start)
-                logger.info(
-                    "PROGRESS: at example #%i, processed %i words (%i/s), %i word types, %i tags",
-                    document_no, total_words, interval_rate, len(vocab), len(self.docvecs)
-                )
-                interval_start = default_timer()
-                interval_count = total_words
-            document_length = len(document.words)
-
-            for tag in document.tags:
-                self.docvecs.note_doctag(tag, document_no, document_length)
-
-            for word in document.words:
-                vocab[word] += 1
-            total_words += len(document.words)
-
-            if self.max_vocab_size and len(vocab) > self.max_vocab_size:
-                utils.prune_vocab(vocab, min_reduce, trim_rule=trim_rule)
-                min_reduce += 1
-
-        logger.info(
-            "collected %i word types and %i unique tags from a corpus of %i examples and %i words",
-            len(vocab), len(self.docvecs), document_no + 1, total_words
-        )
-        self.corpus_count = document_no + 1
-        self.raw_vocab = vocab
-
-    def _do_train_job(self, job, alpha, inits):
-        work, neu1 = inits
-        tally = 0
-        for doc in job:
-            indexed_doctags = self.docvecs.indexed_doctags(doc.tags)
-            doctag_indexes, doctag_vectors, doctag_locks, ignored = indexed_doctags
-            if self.sg:
-                tally += train_document_dbow(
-                    self, doc.words, doctag_indexes, alpha, work, train_words=self.dbow_words,
-                    doctag_vectors=doctag_vectors, doctag_locks=doctag_locks
-                )
-            elif self.dm_concat:
-                tally += train_document_dm_concat(
-                    self, doc.words, doctag_indexes, alpha, work, neu1,
-                    doctag_vectors=doctag_vectors, doctag_locks=doctag_locks
-                )
-            else:
-                tally += train_document_dm(
-                    self, doc.words, doctag_indexes, alpha, work, neu1,
-                    doctag_vectors=doctag_vectors, doctag_locks=doctag_locks
-                )
-            self.docvecs.trained_item(indexed_doctags)
-        return tally, self._raw_word_count(job)
-
-    def _raw_word_count(self, job):
-        """Return the number of words in a given job."""
-        return sum(len(sentence.words) for sentence in job)
-
-    def infer_vector(self, doc_words, alpha=0.1, min_alpha=0.0001, steps=5):
-        """
-        Infer a vector for given post-bulk training document.
-
-        Document should be a list of (word) tokens.
-        """
-        doctag_vectors = empty((1, self.vector_size), dtype=REAL)
-        doctag_vectors[0] = self.seeded_vector(' '.join(doc_words))
-        doctag_locks = ones(1, dtype=REAL)
-        doctag_indexes = [0]
-
-        work = zeros(self.layer1_size, dtype=REAL)
-        if not self.sg:
-            neu1 = matutils.zeros_aligned(self.layer1_size, dtype=REAL)
-
-        for i in range(steps):
-            if self.sg:
-                train_document_dbow(
-                    self, doc_words, doctag_indexes, alpha, work,
-                    learn_words=False, learn_hidden=False, doctag_vectors=doctag_vectors, doctag_locks=doctag_locks
-                )
-            elif self.dm_concat:
-                train_document_dm_concat(
-                    self, doc_words, doctag_indexes, alpha, work, neu1,
-                    learn_words=False, learn_hidden=False, doctag_vectors=doctag_vectors, doctag_locks=doctag_locks
-                )
-            else:
-                train_document_dm(
-                    self, doc_words, doctag_indexes, alpha, work, neu1,
-                    learn_words=False, learn_hidden=False, doctag_vectors=doctag_vectors, doctag_locks=doctag_locks
-                )
-            alpha = ((alpha - min_alpha) / (steps - i)) + min_alpha
-
-        return doctag_vectors[0]
-
-    def estimate_memory(self, vocab_size=None, report=None):
-        """Estimate required memory for a model using current settings."""
-        report = report or {}
-        report['doctag_lookup'] = self.docvecs.estimated_lookup_memory()
-        report['doctag_syn0'] = self.docvecs.count * self.vector_size * dtype(REAL).itemsize
-        return super(Doc2Vec, self).estimate_memory(vocab_size, report=report)
-
-    def __str__(self):
-        """Abbreviated name reflecting major configuration paramaters."""
-        segments = []
-        if self.comment:
-            segments.append('"%s"' % self.comment)
-        if self.sg:
-            if self.dbow_words:
-                segments.append('dbow+w')  # also training words
-            else:
-                segments.append('dbow')  # PV-DBOW (skip-gram-style)
-
-        else:  # PV-DM...
-            if self.dm_concat:
-                segments.append('dm/c')  # ...with concatenative context layer
-            else:
-                if self.cbow_mean:
-                    segments.append('dm/m')
-                else:
-                    segments.append('dm/s')
-        segments.append('d%d' % self.vector_size)  # dimensions
-        if self.negative:
-            segments.append('n%d' % self.negative)  # negative samples
-        if self.hs:
-            segments.append('hs')
-        if not self.sg or (self.sg and self.dbow_words):
-            segments.append('w%d' % self.window)  # window size, when relevant
-        if self.min_count > 1:
-            segments.append('mc%d' % self.min_count)
-        if self.sample > 0:
-            segments.append('s%g' % self.sample)
-        if self.workers > 1:
-            segments.append('t%d' % self.workers)
-        return '%s(%s)' % (self.__class__.__name__, ','.join(segments))
-
-    def delete_temporary_training_data(self, keep_doctags_vectors=True, keep_inference=True):
-        """
-        Discard parameters that are used in training and score. Use if you're sure you're done training a model.
-        Set `keep_doctags_vectors` to False if you don't want to save doctags vectors,
-        in this case you can't to use docvecs's most_similar, similarity etc. methods.
-        Set `keep_inference` to False if you don't want to store parameters that is used for infer_vector method
-        """
-        if not keep_inference:
-            self._minimize_model(False, False, False)
-        if self.docvecs and hasattr(self.docvecs, 'doctag_syn0') and not keep_doctags_vectors:
-            del self.docvecs.doctag_syn0
-        if self.docvecs and hasattr(self.docvecs, 'doctag_syn0_lockf'):
-            del self.docvecs.doctag_syn0_lockf
-
-    def save_word2vec_format(self, fname, doctag_vec=False, word_vec=True, prefix='*dt_', fvocab=None, binary=False):
-        """
-        Store the input-hidden weight matrix.
-
-         `fname` is the file used to save the vectors in
-         `doctag_vec` is an optional boolean indicating whether to store document vectors
-         `word_vec` is an optional boolean indicating whether to store word vectors
-         (if both doctag_vec and word_vec are True, then both vectors are stored in the same file)
-         `prefix` to uniquely identify doctags from word vocab, and avoid collision
-         in case of repeated string in doctag and word vocab
-         `fvocab` is an optional file used to save the vocabulary
-         `binary` is an optional boolean indicating whether the data is to be saved
-         in binary word2vec format (default: False)
-
-        """
-        total_vec = len(self.wv.vocab) + len(self.docvecs)
-        # save word vectors
-        if word_vec:
-            if not doctag_vec:
-                total_vec = len(self.wv.vocab)
-            KeyedVectors.save_word2vec_format(self.wv, fname, fvocab, binary, total_vec)
-        # save document vectors
-        if doctag_vec:
-            with utils.open(fname, 'ab') as fout:
-                if not word_vec:
-                    total_vec = len(self.docvecs)
-                    logger.info("storing %sx%s projection weights into %s", total_vec, self.vector_size, fname)
-                    fout.write(utils.to_utf8("%s %s\n" % (total_vec, self.vector_size)))
-                # store as in input order
-                for i in range(len(self.docvecs)):
-                    doctag = u"%s%s" % (prefix, self.docvecs.index_to_doctag(i))
-                    row = self.docvecs.doctag_syn0[i]
-                    if binary:
-                        fout.write(utils.to_utf8(doctag) + b" " + row.tostring())
-                    else:
-                        fout.write(utils.to_utf8("%s %s\n" % (doctag, ' '.join("%f" % val for val in row))))
-
-
-class TaggedBrownCorpus(object):
-    """Iterate over documents from the Brown corpus (part of NLTK data), yielding
-    each document out as a TaggedDocument object."""
-
-    def __init__(self, dirname):
-        self.dirname = dirname
-
-    def __iter__(self):
-        for fname in os.listdir(self.dirname):
-            fname = os.path.join(self.dirname, fname)
-            if not os.path.isfile(fname):
-                continue
-            with utils.open(fname, 'rb') as f:
-                for item_no, line in enumerate(f):
-                    line = utils.to_unicode(line)
-                    # each file line is a single document in the Brown corpus
-                    # each token is WORD/POS_TAG
-                    token_tags = [t.split('/') for t in line.split() if len(t.split('/')) == 2]
-                    # ignore words with non-alphabetic tags like ",", "!" etc (punctuation, weird stuff)
-                    words = ["%s/%s" % (token.lower(), tag[:2]) for token, tag in token_tags if tag[:2].isalpha()]
-                    if not words:  # don't bother sending out empty documents
-                        continue
-                    yield TaggedDocument(words, ['%s_SENT_%s' % (fname, item_no)])
-
-
-class TaggedLineDocument(object):
-    """Simple format: one document = one line = one TaggedDocument object.
-
-    Words are expected to be already preprocessed and separated by whitespace,
-    tags are constructed automatically from the document line number."""
-
-    def __init__(self, source):
-        """
-        `source` can be either a string (filename) or a file object.
-
-        Example::
-
-            documents = TaggedLineDocument('myfile.txt')
-
-        Or for compressed files::
-
-            documents = TaggedLineDocument('compressed_text.txt.bz2')
-            documents = TaggedLineDocument('compressed_text.txt.gz')
-
-        """
-        self.source = source
-
-    def __iter__(self):
-        """Iterate through the lines in the source."""
-        try:
-            # Assume it is a file-like object and try treating it as such
-            # Things that don't have seek will trigger an exception
-            self.source.seek(0)
-            for item_no, line in enumerate(self.source):
-                yield TaggedDocument(utils.to_unicode(line).split(), [item_no])
-        except AttributeError:
-            # If it didn't work like a file, use it as a string filename
-            with utils.open(self.source, 'rb') as fin:
-                for item_no, line in enumerate(fin):
-                    yield TaggedDocument(utils.to_unicode(line).split(), [item_no])
diff --git a/gensim/models/deprecated/fasttext.py b/gensim/models/deprecated/fasttext.py
deleted file mode 100644
index 0d46b6f1cc..0000000000
--- a/gensim/models/deprecated/fasttext.py
+++ /dev/null
@@ -1,711 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# Authors: Chinmaya Pancholi <chinmayapancholi13@gmail.com>, Shiva Manne <s.manne@rare-technologies.com>
-# Copyright (C) 2017 RaRe Technologies s.r.o.
-
-"""
-Warnings
---------
-.. deprecated:: 3.3.0
-   Use :mod:`gensim.models.fasttext` instead.
-
-
-Learn word representations via fasttext's "skip-gram and CBOW models", using either
-hierarchical softmax or negative sampling [1]_.
-
-Notes
------
-There are more ways to get word vectors in Gensim than just FastText.
-See wrappers for VarEmbed and WordRank or Word2Vec
-
-This module allows training a word embedding from a training corpus with the additional ability
-to obtain word vectors for out-of-vocabulary words.
-
-For a tutorial on gensim's native fasttext, refer to the noteboook -- [2]_
-
-**Make sure you have a C compiler before installing gensim, to use optimized (compiled) fasttext training**
-
-.. [1] P. Bojanowski, E. Grave, A. Joulin, T. Mikolov
-       Enriching Word Vectors with Subword Information. In arXiv preprint arXiv:1607.04606.
-       https://arxiv.org/abs/1607.04606
-
-.. [2] https://github.com/RaRe-Technologies/gensim/blob/develop/docs/notebooks/FastText_Tutorial.ipynb
-
-"""
-
-import logging
-
-import numpy as np
-from numpy import zeros, ones, vstack, sum as np_sum, empty, float32 as REAL
-
-from gensim.models.deprecated.word2vec import Word2Vec, train_sg_pair, train_cbow_pair
-from gensim.models.deprecated.fasttext_wrapper import FastTextKeyedVectors
-from gensim.models.deprecated.fasttext_wrapper import FastText as Ft_Wrapper, compute_ngrams, ft_hash
-from gensim.models.fasttext import FastText as NewFastText
-
-logger = logging.getLogger(__name__)
-
-MAX_WORDS_IN_BATCH = 10000
-
-
-def load_old_fasttext(*args, **kwargs):
-    old_model = FastText.load(*args, **kwargs)
-    params = {
-        'size': old_model.vector_size,
-        'alpha': old_model.alpha,
-        'window': old_model.window,
-        'min_count': old_model.min_count,
-        'max_vocab_size': old_model.__dict__.get('max_vocab_size', None),
-        'sample': old_model.sample,
-        'seed': old_model.seed,
-        'workers': old_model.workers,
-        'min_alpha': old_model.min_alpha,
-        'sg': old_model.sg,
-        'hs': old_model.hs,
-        'negative': old_model.negative,
-        'cbow_mean': old_model.cbow_mean,
-        'hashfxn': old_model.hashfxn,
-        'iter': old_model.iter,
-        'null_word': old_model.null_word,
-        'sorted_vocab': old_model.sorted_vocab,
-        'batch_words': old_model.batch_words,
-        'min_n': old_model.min_n,
-        'max_n': old_model.max_n,
-        'word_ngrams': old_model.word_ngrams,
-        'bucket': old_model.bucket
-    }
-    new_model = NewFastText(**params)
-    # set trainables attributes
-    new_model.wv.vectors = old_model.wv.syn0
-    new_model.wv.vectors_vocab = old_model.wv.syn0_vocab
-    new_model.wv.vectors_ngrams = old_model.wv.syn0_ngrams
-    if hasattr(old_model.wv, 'syn0norm'):
-        new_model.wv.vectors_norm = old_model.wv.syn0norm
-    if hasattr(old_model, 'syn1'):
-        new_model.trainables.syn1 = old_model.syn1
-    if hasattr(old_model, 'syn1neg'):
-        new_model.trainables.syn1neg = old_model.syn1neg
-    if hasattr(old_model, 'syn0_lockf'):
-        new_model.trainables.vectors_lockf = old_model.syn0_lockf
-
-    if hasattr(old_model, 'syn0_vocab_lockf'):
-        new_model.trainables.vectors_vocab_lockf = old_model.syn0_vocab_lockf
-    if hasattr(old_model, 'syn0_ngrams_lockf'):
-        new_model.trainables.vectors_ngrams_lockf = old_model.syn0_ngrams_lockf
-    if hasattr(old_model.wv, 'syn0_vocab_norm'):
-        new_model.trainables.vectors_vocab_norm = old_model.wv.syn0_vocab_norm
-    if hasattr(old_model.wv, 'syn0_ngrams_norm'):
-        new_model.trainables.vectors_ngrams_norm = old_model.wv.syn0_ngrams_norm
-
-    # set vocabulary attributes
-    new_model.wv.vocab = old_model.wv.vocab
-    new_model.wv.index2word = old_model.wv.index2word
-    new_model.vocabulary.cum_table = old_model.cum_table
-
-    new_model.wv.hash2index = old_model.wv.hash2index
-
-    new_model.train_count = old_model.train_count
-    new_model.corpus_count = old_model.corpus_count
-    new_model.corpus_total_words = old_model.corpus_total_words
-    new_model.running_training_loss = old_model.running_training_loss
-    new_model.total_train_time = old_model.total_train_time
-    new_model.min_alpha_yet_reached = old_model.min_alpha_yet_reached
-    new_model.model_trimmed_post_training = old_model.model_trimmed_post_training
-
-    new_model.trainables.num_ngram_vectors = old_model.num_ngram_vectors
-
-    return new_model
-
-
-def train_batch_cbow(model, sentences, alpha, work=None, neu1=None):
-    """Update CBOW model by training on a sequence of sentences.
-
-    Each sentence is a list of string tokens, which are looked up in the model's
-    vocab dictionary. Called internally from :meth:`gensim.models.fasttext.FastText.train()`.
-
-    This is the non-optimized, Python version. If you have cython installed, gensim
-    will use the optimized version from fasttext_inner instead.
-
-    Parameters
-    ----------
-    model : :class:`~gensim.models.fasttext.FastText`
-        `FastText` instance.
-    sentences : iterable of iterables
-        Iterable of the sentences directly from disk/network.
-    alpha : float
-        Learning rate.
-    work : :class:`numpy.ndarray`
-        Private working memory for each worker.
-    neu1 : :class:`numpy.ndarray`
-        Private working memory for each worker.
-
-    Returns
-    -------
-    int
-        Effective number of words trained.
-
-    """
-    result = 0
-    for sentence in sentences:
-        word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab
-                       and model.wv.vocab[w].sample_int > model.random.rand() * 2**32]
-        for pos, word in enumerate(word_vocabs):
-            reduced_window = model.random.randint(model.window)
-            start = max(0, pos - model.window + reduced_window)
-            window_pos = enumerate(word_vocabs[start:(pos + model.window + 1 - reduced_window)], start)
-            word2_indices = [word2.index for pos2, word2 in window_pos if (word2 is not None and pos2 != pos)]
-
-            word2_subwords = []
-            vocab_subwords_indices = []
-            ngrams_subwords_indices = []
-
-            for index in word2_indices:
-                vocab_subwords_indices += [index]
-                word2_subwords += model.wv.ngrams_word[model.wv.index2word[index]]
-
-            for subword in word2_subwords:
-                ngrams_subwords_indices.append(model.wv.ngrams[subword])
-
-            l1_vocab = np_sum(model.wv.syn0_vocab[vocab_subwords_indices], axis=0)  # 1 x vector_size
-            l1_ngrams = np_sum(model.wv.syn0_ngrams[ngrams_subwords_indices], axis=0)  # 1 x vector_size
-
-            l1 = np_sum([l1_vocab, l1_ngrams], axis=0)
-            subwords_indices = [vocab_subwords_indices] + [ngrams_subwords_indices]
-            if (subwords_indices[0] or subwords_indices[1]) and model.cbow_mean:
-                l1 /= (len(subwords_indices[0]) + len(subwords_indices[1]))
-
-            # train on the sliding window for target word
-            train_cbow_pair(model, word, subwords_indices, l1, alpha, is_ft=True)
-        result += len(word_vocabs)
-    return result
-
-
-def train_batch_sg(model, sentences, alpha, work=None, neu1=None):
-    """Update skip-gram model by training on a sequence of sentences.
-
-    Each sentence is a list of string tokens, which are looked up in the model's
-    vocab dictionary. Called internally from :meth:`gensim.models.fasttext.FastText.train()`.
-
-    This is the non-optimized, Python version. If you have cython installed, gensim
-    will use the optimized version from fasttext_inner instead.
-
-    Parameters
-    ----------
-    model : :class:`~gensim.models.fasttext.FastText`
-        `FastText` instance.
-    sentences : iterable of iterables
-        Iterable of the sentences directly from disk/network.
-    alpha : float
-        Learning rate.
-    work : :class:`numpy.ndarray`
-        Private working memory for each worker.
-    neu1 : :class:`numpy.ndarray`
-        Private working memory for each worker.
-
-    Returns
-    -------
-    int
-        Effective number of words trained.
-
-    """
-    result = 0
-    for sentence in sentences:
-        word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab
-                       and model.wv.vocab[w].sample_int > model.random.rand() * 2**32]
-        for pos, word in enumerate(word_vocabs):
-            reduced_window = model.random.randint(model.window)  # `b` in the original word2vec code
-            # now go over all words from the (reduced) window, predicting each one in turn
-            start = max(0, pos - model.window + reduced_window)
-
-            subwords_indices = [word.index]
-            word2_subwords = model.wv.ngrams_word[model.wv.index2word[word.index]]
-
-            for subword in word2_subwords:
-                subwords_indices.append(model.wv.ngrams[subword])
-
-            for pos2, word2 in enumerate(word_vocabs[start:(pos + model.window + 1 - reduced_window)], start):
-                if pos2 != pos:  # don't train on the `word` itself
-                    train_sg_pair(model, model.wv.index2word[word2.index], subwords_indices, alpha, is_ft=True)
-
-        result += len(word_vocabs)
-    return result
-
-
-class FastText(Word2Vec):
-    """Class for training, using and evaluating word representations learned using method
-    described in [1]_ aka Fasttext.
-
-    The model can be stored/loaded via its :meth:`~gensim.models.fasttext.FastText.save()` and
-    :meth:`~gensim.models.fasttext.FastText.load()` methods, or loaded in a format compatible with the original
-    fasttext implementation via :meth:`~gensim.models.fasttext.FastText.load_fasttext_format()`.
-
-    """
-    def __init__(
-            self, sentences=None, sg=0, hs=0, size=100, alpha=0.025, window=5, min_count=5,
-            max_vocab_size=None, word_ngrams=1, sample=1e-3, seed=1, workers=3, min_alpha=0.0001,
-            negative=5, cbow_mean=1, hashfxn=hash, iter=5, null_word=0, min_n=3, max_n=6, sorted_vocab=1,
-            bucket=2000000, trim_rule=None, batch_words=MAX_WORDS_IN_BATCH):
-        """Initialize the model from an iterable of `sentences`. Each sentence is a
-        list of words (unicode strings) that will be used for training.
-
-        Parameters
-        ----------
-        sentences : iterable of iterables
-            The `sentences` iterable can be simply a list of lists of tokens, but for larger corpora,
-            consider an iterable that streams the sentences directly from disk/network.
-            See :class:`~gensim.models.word2vec.BrownCorpus`, :class:`~gensim.models.word2vec.Text8Corpus`
-            or :class:`~gensim.models.word2vec.LineSentence` in :mod:`~gensim.models.word2vec` module for such examples.
-            If you don't supply `sentences`, the model is left uninitialized -- use if you plan to initialize it
-            in some other way.
-        sg : int {1, 0}
-            Defines the training algorithm. If 1, skip-gram is used, otherwise, CBOW is employed.
-        size : int
-            Dimensionality of the feature vectors.
-        window : int
-            The maximum distance between the current and predicted word within a sentence.
-        alpha : float
-            The initial learning rate.
-        min_alpha : float
-            Learning rate will linearly drop to `min_alpha` as training progresses.
-        seed : int
-            Seed for the random number generator. Initial vectors for each word are seeded with a hash of
-            the concatenation of word + `str(seed)`. Note that for a fully deterministically-reproducible run,
-            you must also limit the model to a single worker thread (`workers=1`), to eliminate ordering jitter
-            from OS thread scheduling. (In Python 3, reproducibility between interpreter launches also requires
-            use of the `PYTHONHASHSEED` environment variable to control hash randomization).
-        min_count : int
-            Ignores all words with total frequency lower than this.
-        max_vocab_size : int
-            Limits the RAM during vocabulary building; if there are more unique
-            words than this, then prune the infrequent ones. Every 10 million word types need about 1GB of RAM.
-            Set to `None` for no limit.
-        sample : float
-            The threshold for configuring which higher-frequency words are randomly downsampled,
-            useful range is (0, 1e-5).
-        workers : int
-            Use these many worker threads to train the model (=faster training with multicore machines).
-        hs : int {1,0}
-            If 1, hierarchical softmax will be used for model training.
-            If set to 0, and `negative` is non-zero, negative sampling will be used.
-        negative : int
-            If > 0, negative sampling will be used, the int for negative specifies how many "noise words"
-            should be drawn (usually between 5-20).
-            If set to 0, no negative sampling is used.
-        cbow_mean : int {1,0}
-            If 0, use the sum of the context word vectors. If 1, use the mean, only applies when cbow is used.
-        hashfxn : function
-            Hash function to use to randomly initialize weights, for increased training reproducibility.
-        iter : int
-            Number of iterations (epochs) over the corpus.
-        trim_rule : function
-            Vocabulary trimming rule, specifies whether certain words should remain in the vocabulary,
-            be trimmed away, or handled using the default (discard if word count < min_count).
-            Can be None (min_count will be used, look to :func:`~gensim.utils.keep_vocab_item`),
-            or a callable that accepts parameters (word, count, min_count) and returns either
-            :attr:`gensim.utils.RULE_DISCARD`, :attr:`gensim.utils.RULE_KEEP` or :attr:`gensim.utils.RULE_DEFAULT`.
-            Note: The rule, if given, is only used to prune vocabulary during build_vocab() and is not stored as part
-            of the model.
-        sorted_vocab : int {1,0}
-            If 1, sort the vocabulary by descending frequency before assigning word indexes.
-        batch_words : int
-            Target size (in words) for batches of examples passed to worker threads (and
-            thus cython routines).(Larger batches will be passed if individual
-            texts are longer than 10000 words, but the standard cython code truncates to that maximum.)
-        min_n : int
-            Min length of char ngrams to be used for training word representations.
-        max_n : int
-            Max length of char ngrams to be used for training word representations. Set `max_n` to be
-            lesser than `min_n` to avoid char ngrams being used.
-        word_ngrams : int {1,0}
-            If 1, uses enriches word vectors with subword(ngrams) information.
-            If 0, this is equivalent to word2vec.
-        bucket : int
-            Character ngrams are hashed into a fixed number of buckets, in order to limit the
-            memory usage of the model. This option specifies the number of buckets used by the model.
-
-        Examples
-        --------
-        Initialize and train a `FastText` model
-
-        .. sourcecode:: pycon
-
-            >>> from gensim.models import FastText
-            >>> sentences = [["cat", "say", "meow"], ["dog", "say", "woof"]]
-            >>>
-            >>> model = FastText(sentences, min_count=1)
-            >>> say_vector = model['say']  # get vector for word
-            >>> of_vector = model['of']  # get vector for out-of-vocab word
-
-        """
-        # fastText specific params
-        self.bucket = bucket
-        self.word_ngrams = word_ngrams
-        self.min_n = min_n
-        self.max_n = max_n
-        if self.word_ngrams <= 1 and self.max_n == 0:
-            self.bucket = 0
-
-        super(FastText, self).__init__(
-            sentences=sentences, size=size, alpha=alpha, window=window, min_count=min_count,
-            max_vocab_size=max_vocab_size, sample=sample, seed=seed, workers=workers, min_alpha=min_alpha,
-            sg=sg, hs=hs, negative=negative, cbow_mean=cbow_mean, hashfxn=hashfxn, iter=iter, null_word=null_word,
-            trim_rule=trim_rule, sorted_vocab=sorted_vocab, batch_words=batch_words)
-
-    def initialize_word_vectors(self):
-        """Initializes FastTextKeyedVectors instance to store all vocab/ngram vectors for the model."""
-        self.wv = FastTextKeyedVectors()
-        self.wv.min_n = self.min_n
-        self.wv.max_n = self.max_n
-
-    def build_vocab(self, sentences, keep_raw_vocab=False, trim_rule=None, progress_per=10000, update=False):
-        """Build vocabulary from a sequence of sentences (can be a once-only generator stream).
-        Each sentence must be a list of unicode strings.
-
-        Parameters
-        ----------
-        sentences : iterable of iterables
-            The `sentences` iterable can be simply a list of lists of tokens, but for larger corpora,
-            consider an iterable that streams the sentences directly from disk/network.
-            See :class:`~gensim.models.word2vec.BrownCorpus`, :class:`~gensim.models.word2vec.Text8Corpus`
-            or :class:`~gensim.models.word2vec.LineSentence` in :mod:`~gensim.models.word2vec` module for such examples.
-        keep_raw_vocab : bool
-            If not true, delete the raw vocabulary after the scaling is done and free up RAM.
-        trim_rule : function
-            Vocabulary trimming rule, specifies whether certain words should remain in the vocabulary,
-            be trimmed away, or handled using the default (discard if word count < min_count).
-            Can be None (min_count will be used, look to :func:`~gensim.utils.keep_vocab_item`),
-            or a callable that accepts parameters (word, count, min_count) and returns either
-            :attr:`gensim.utils.RULE_DISCARD`, :attr:`gensim.utils.RULE_KEEP` or :attr:`gensim.utils.RULE_DEFAULT`.
-            Note: The rule, if given, is only used to prune vocabulary during build_vocab() and is not stored as part
-            of the model.
-        progress_per : int
-            Indicates how many words to process before showing/updating the progress.
-        update: bool
-            If true, the new words in `sentences` will be added to model's vocab.
-
-        Example
-        -------
-        Train a model and update vocab for online training
-
-        .. sourcecode:: pycon
-
-            >>> from gensim.models import FastText
-            >>> sentences_1 = [["cat", "say", "meow"], ["dog", "say", "woof"]]
-            >>> sentences_2 = [["dude", "say", "wazzup!"]]
-            >>>
-            >>> model = FastText(min_count=1)
-            >>> model.build_vocab(sentences_1)
-            >>> model.train(sentences_1, total_examples=model.corpus_count, epochs=model.iter)
-            >>> model.build_vocab(sentences_2, update=True)
-            >>> model.train(sentences_2, total_examples=model.corpus_count, epochs=model.iter)
-
-        """
-        if update:
-            if not len(self.wv.vocab):
-                raise RuntimeError(
-                    "You cannot do an online vocabulary-update of a model which has no prior vocabulary. "
-                    "First build the vocabulary of your model with a corpus "
-                    "before doing an online update.")
-            self.old_vocab_len = len(self.wv.vocab)
-            self.old_hash2index_len = len(self.wv.hash2index)
-
-        super(FastText, self).build_vocab(
-            sentences, keep_raw_vocab=keep_raw_vocab, trim_rule=trim_rule, progress_per=progress_per, update=update)
-        self.init_ngrams(update=update)
-
-    def init_ngrams(self, update=False):
-        """Compute ngrams of all words present in vocabulary and stores vectors for only those ngrams.
-        Vectors for other ngrams are initialized with a random uniform distribution in FastText.
-
-        Parameters
-        ----------
-        update : bool
-            If True, the new vocab words and their new ngrams word vectors are initialized
-            with random uniform distribution and updated/added to the existing vocab word and ngram vectors.
-
-        """
-        if not update:
-            self.wv.ngrams = {}
-            self.wv.syn0_vocab = empty((len(self.wv.vocab), self.vector_size), dtype=REAL)
-            self.syn0_vocab_lockf = ones((len(self.wv.vocab), self.vector_size), dtype=REAL)
-
-            self.wv.syn0_ngrams = empty((self.bucket, self.vector_size), dtype=REAL)
-            self.syn0_ngrams_lockf = ones((self.bucket, self.vector_size), dtype=REAL)
-
-            all_ngrams = []
-            for w, v in self.wv.vocab.items():
-                self.wv.ngrams_word[w] = compute_ngrams(w, self.min_n, self.max_n)
-                all_ngrams += self.wv.ngrams_word[w]
-
-            all_ngrams = list(set(all_ngrams))
-            self.num_ngram_vectors = len(all_ngrams)
-            logger.info("Total number of ngrams is %d", len(all_ngrams))
-
-            self.wv.hash2index = {}
-            ngram_indices = []
-            new_hash_count = 0
-            for i, ngram in enumerate(all_ngrams):
-                ngram_hash = ft_hash(ngram) % self.bucket
-                if ngram_hash in self.wv.hash2index:
-                    self.wv.ngrams[ngram] = self.wv.hash2index[ngram_hash]
-                else:
-                    ngram_indices.append(ngram_hash % self.bucket)
-                    self.wv.hash2index[ngram_hash] = new_hash_count
-                    self.wv.ngrams[ngram] = self.wv.hash2index[ngram_hash]
-                    new_hash_count = new_hash_count + 1
-
-            self.wv.syn0_ngrams = self.wv.syn0_ngrams.take(ngram_indices, axis=0)
-            self.syn0_ngrams_lockf = self.syn0_ngrams_lockf.take(ngram_indices, axis=0)
-            self.reset_ngram_weights()
-        else:
-            new_ngrams = []
-            for w, v in self.wv.vocab.items():
-                self.wv.ngrams_word[w] = compute_ngrams(w, self.min_n, self.max_n)
-                new_ngrams += [ng for ng in self.wv.ngrams_word[w] if ng not in self.wv.ngrams]
-
-            new_ngrams = list(set(new_ngrams))
-            logger.info("Number of new ngrams is %d", len(new_ngrams))
-            new_hash_count = 0
-            for i, ngram in enumerate(new_ngrams):
-                ngram_hash = ft_hash(ngram) % self.bucket
-                if ngram_hash not in self.wv.hash2index:
-                    self.wv.hash2index[ngram_hash] = new_hash_count + self.old_hash2index_len
-                    self.wv.ngrams[ngram] = self.wv.hash2index[ngram_hash]
-                    new_hash_count = new_hash_count + 1
-                else:
-                    self.wv.ngrams[ngram] = self.wv.hash2index[ngram_hash]
-
-            rand_obj = np.random
-            rand_obj.seed(self.seed)
-            new_vocab_rows = rand_obj.uniform(
-                -1.0 / self.vector_size, 1.0 / self.vector_size,
-                (len(self.wv.vocab) - self.old_vocab_len, self.vector_size)
-            ).astype(REAL)
-            new_vocab_lockf_rows = ones((len(self.wv.vocab) - self.old_vocab_len, self.vector_size), dtype=REAL)
-            new_ngram_rows = rand_obj.uniform(
-                -1.0 / self.vector_size, 1.0 / self.vector_size,
-                (len(self.wv.hash2index) - self.old_hash2index_len, self.vector_size)
-            ).astype(REAL)
-            new_ngram_lockf_rows = ones(
-                (len(self.wv.hash2index) - self.old_hash2index_len,
-                self.vector_size),
-                dtype=REAL)
-
-            self.wv.syn0_vocab = vstack([self.wv.syn0_vocab, new_vocab_rows])
-            self.syn0_vocab_lockf = vstack([self.syn0_vocab_lockf, new_vocab_lockf_rows])
-            self.wv.syn0_ngrams = vstack([self.wv.syn0_ngrams, new_ngram_rows])
-            self.syn0_ngrams_lockf = vstack([self.syn0_ngrams_lockf, new_ngram_lockf_rows])
-
-    def reset_ngram_weights(self):
-        """Reset all projection weights to an initial (untrained) state,
-        but keep the existing vocabulary and their ngrams.
-
-        """
-        rand_obj = np.random
-        rand_obj.seed(self.seed)
-        for index in range(len(self.wv.vocab)):
-            self.wv.syn0_vocab[index] = rand_obj.uniform(
-                -1.0 / self.vector_size, 1.0 / self.vector_size, self.vector_size
-            ).astype(REAL)
-        for index in range(len(self.wv.hash2index)):
-            self.wv.syn0_ngrams[index] = rand_obj.uniform(
-                -1.0 / self.vector_size, 1.0 / self.vector_size, self.vector_size
-            ).astype(REAL)
-
-    def _do_train_job(self, sentences, alpha, inits):
-        """Train a single batch of sentences. Return 2-tuple `(effective word count after
-        ignoring unknown words and sentence length trimming, total word count)`.
-
-        Parameters
-        ----------
-        sentences : iterable of iterables
-            The `sentences` iterable can be simply a list of lists of tokens, but for larger corpora,
-            consider an iterable that streams the sentences directly from disk/network.
-            See :class:`~gensim.models.word2vec.BrownCorpus`, :class:`~gensim.models.word2vec.Text8Corpus`
-            or :class:`~gensim.models.word2vec.LineSentence` in :mod:`~gensim.models.word2vec` module for such examples.
-        alpha : float
-            The current learning rate.
-        inits : (:class:`numpy.ndarray`, :class:`numpy.ndarray`)
-            Each worker's private work memory.
-
-        Returns
-        -------
-        (int, int)
-            Tuple of (effective word count after ignoring unknown words and sentence length trimming, total word count)
-
-        """
-        work, neu1 = inits
-        tally = 0
-        if self.sg:
-            tally += train_batch_sg(self, sentences, alpha, work, neu1)
-        else:
-            tally += train_batch_cbow(self, sentences, alpha, work, neu1)
-
-        return tally, self._raw_word_count(sentences)
-
-    def train(self, sentences, total_examples=None, total_words=None,
-              epochs=None, start_alpha=None, end_alpha=None,
-              word_count=0, queue_factor=2, report_delay=1.0):
-        """Update the model's neural weights from a sequence of sentences (can be a once-only generator stream).
-        For FastText, each sentence must be a list of unicode strings. (Subclasses may accept other examples.)
-
-        To support linear learning-rate decay from (initial) alpha to min_alpha, and accurate
-        progress-percentage logging, either total_examples (count of sentences) or total_words (count of
-        raw words in sentences) **MUST** be provided (if the corpus is the same as was provided to
-        :meth:`~gensim.models.fasttext.FastText.build_vocab()`, the count of examples in that corpus
-        will be available in the model's :attr:`corpus_count` property).
-
-        To avoid common mistakes around the model's ability to do multiple training passes itself, an
-        explicit `epochs` argument **MUST** be provided. In the common and recommended case,
-        where :meth:`~gensim.models.fasttext.FastText.train()` is only called once,
-        the model's cached `iter` value should be supplied as `epochs` value.
-
-        Parameters
-        ----------
-        sentences : iterable of iterables
-            The `sentences` iterable can be simply a list of lists of tokens, but for larger corpora,
-            consider an iterable that streams the sentences directly from disk/network.
-            See :class:`~gensim.models.word2vec.BrownCorpus`, :class:`~gensim.models.word2vec.Text8Corpus`
-            or :class:`~gensim.models.word2vec.LineSentence` in :mod:`~gensim.models.word2vec` module for such examples.
-        total_examples : int
-            Count of sentences.
-        total_words : int
-            Count of raw words in sentences.
-        epochs : int
-            Number of iterations (epochs) over the corpus.
-        start_alpha : float
-            Initial learning rate.
-        end_alpha : float
-            Final learning rate. Drops linearly from `start_alpha`.
-        word_count : int
-            Count of words already trained. Set this to 0 for the usual
-            case of training on all words in sentences.
-        queue_factor : int
-            Multiplier for size of queue (number of workers * queue_factor).
-        report_delay : float
-            Seconds to wait before reporting progress.
-
-        Examples
-        --------
-
-        .. sourcecode:: pycon
-
-            >>> from gensim.models import FastText
-            >>> sentences = [["cat", "say", "meow"], ["dog", "say", "woof"]]
-            >>>
-            >>> model = FastText(min_count=1)
-            >>> model.build_vocab(sentences)
-            >>> model.train(sentences, total_examples=model.corpus_count, epochs=model.iter)
-
-        """
-        self.neg_labels = []
-        if self.negative > 0:
-            # precompute negative labels optimization for pure-python training
-            self.neg_labels = zeros(self.negative + 1)
-            self.neg_labels[0] = 1.
-
-        Word2Vec.train(
-            self, sentences, total_examples=self.corpus_count, epochs=self.iter,
-            start_alpha=self.alpha, end_alpha=self.min_alpha)
-        self.get_vocab_word_vecs()
-
-    def __getitem__(self, word):
-        """Get `word` representations in vector space, as a 1D numpy array.
-
-        Parameters
-        ----------
-        word : str
-            A single word whose vector needs to be returned.
-
-        Returns
-        -------
-        :class:`numpy.ndarray`
-            The word's representations in vector space, as a 1D numpy array.
-
-        Raises
-        ------
-        KeyError
-            For words with all ngrams absent, a KeyError is raised.
-
-        Example
-        -------
-        .. sourcecode:: pycon
-
-            >>> from gensim.models import FastText
-            >>> from gensim.test.utils import datapath
-            >>>
-            >>> trained_model = FastText.load_fasttext_format(datapath('lee_fasttext'))
-            >>> meow_vector = trained_model['hello']  # get vector for word
-
-        """
-        return self.word_vec(word)
-
-    def get_vocab_word_vecs(self):
-        """Calculate vectors for words in vocabulary and stores them in `wv.syn0`."""
-        for w, v in self.wv.vocab.items():
-            word_vec = np.copy(self.wv.syn0_vocab[v.index])
-            ngrams = self.wv.ngrams_word[w]
-            ngram_weights = self.wv.syn0_ngrams
-            for ngram in ngrams:
-                word_vec += ngram_weights[self.wv.ngrams[ngram]]
-            word_vec /= (len(ngrams) + 1)
-            self.wv.syn0[v.index] = word_vec
-
-    def word_vec(self, word, use_norm=False):
-        """Get the word's representations in vector space, as a 1D numpy array.
-
-        Parameters
-        ----------
-        word : str
-            A single word whose vector needs to be returned.
-        use_norm : bool
-            If True, returns normalized vector.
-
-        Returns
-        -------
-        :class:`numpy.ndarray`
-            The word's representations in vector space, as a 1D numpy array.
-
-        Raises
-        ------
-        KeyError
-            For words with all ngrams absent, a KeyError is raised.
-
-        Example
-        -------
-        .. sourcecode:: pycon
-
-            >>> from gensim.models import FastText
-            >>> sentences = [["cat", "say", "meow"], ["dog", "say", "woof"]]
-            >>>
-            >>> model = FastText(sentences, min_count=1)
-            >>> meow_vector = model.word_vec('meow')  # get vector for word
-
-        """
-        return FastTextKeyedVectors.word_vec(self.wv, word, use_norm=use_norm)
-
-    @classmethod
-    def load_fasttext_format(cls, *args, **kwargs):
-        """Load a :class:`~gensim.models.fasttext.FastText` model from a format compatible with
-        the original fasttext implementation.
-
-        Parameters
-        ----------
-        fname : str
-            Path to the file.
-
-        """
-        return Ft_Wrapper.load_fasttext_format(*args, **kwargs)
-
-    def save(self, *args, **kwargs):
-        """Save the model. This saved model can be loaded again using :func:`~gensim.models.fasttext.FastText.load`,
-        which supports online training and getting vectors for out-of-vocabulary words.
-
-        Parameters
-        ----------
-        fname : str
-            Path to the file.
-
-        """
-        kwargs['ignore'] = kwargs.get('ignore', ['syn0norm', 'syn0_vocab_norm', 'syn0_ngrams_norm'])
-        super(FastText, self).save(*args, **kwargs)
diff --git a/gensim/models/deprecated/fasttext_wrapper.py b/gensim/models/deprecated/fasttext_wrapper.py
deleted file mode 100644
index 727db0e1e0..0000000000
--- a/gensim/models/deprecated/fasttext_wrapper.py
+++ /dev/null
@@ -1,461 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Author: Jayant Jain <jayantjain1992@gmail.com>
-# Copyright (C) 2017 Radim Rehurek <me@radimrehurek.com>
-# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html
-
-
-"""
-Warnings
---------
-.. deprecated:: 3.2.0
-   Use :mod:`gensim.models.fasttext` instead.
-
-
-Python wrapper around word representation learning from FastText, a library for efficient learning
-of word representations and sentence classification [1].
-
-This module allows training a word embedding from a training corpus with the additional ability
-to obtain word vectors for out-of-vocabulary words, using the fastText C implementation.
-
-The wrapped model can NOT be updated with new documents for online training -- use gensim's
-`Word2Vec` for that.
-
-Example:
-.. sourcecode:: pycon
-
-    >>> from gensim.models.wrappers import FastText
-    >>> model = FastText.train('/Users/kofola/fastText/fasttext', corpus_file='text8')
-    >>> print model['forests']  # prints vector for given out-of-vocabulary word
-
-.. [1] https://github.com/facebookresearch/fastText#enriching-word-vectors-with-subword-information
-
-
-
-"""
-
-
-import logging
-import tempfile
-import os
-import struct
-
-import numpy as np
-from numpy import float32 as REAL, sqrt, newaxis
-from gensim import utils
-from gensim.models.deprecated.keyedvectors import KeyedVectors, Vocab
-from gensim.models.deprecated.word2vec import Word2Vec
-
-logger = logging.getLogger(__name__)
-
-try:
-    FileNotFoundError
-except NameError:
-    FileNotFoundError = IOError
-
-FASTTEXT_FILEFORMAT_MAGIC = 793712314
-
-
-class FastTextKeyedVectors(KeyedVectors):
-    """
-    Class to contain vectors, vocab and ngrams for the FastText training class and other methods not directly
-    involved in training such as most_similar().
-    Subclasses KeyedVectors to implement oov lookups, storing ngrams and other FastText specific methods
-
-    """
-
-    def __init__(self):
-        super(FastTextKeyedVectors, self).__init__()
-        self.syn0_vocab = None
-        self.syn0_vocab_norm = None
-        self.syn0_ngrams = None
-        self.syn0_ngrams_norm = None
-        self.ngrams = {}
-        self.hash2index = {}
-        self.ngrams_word = {}
-        self.min_n = 0
-        self.max_n = 0
-
-    def save(self, *args, **kwargs):
-        # don't bother storing the cached normalized vectors
-        kwargs['ignore'] = kwargs.get('ignore', ['syn0norm', 'syn0_vocab_norm', 'syn0_ngrams_norm'])
-        super(FastTextKeyedVectors, self).save(*args, **kwargs)
-
-    def word_vec(self, word, use_norm=False):
-        """
-        Accept a single word as input.
-        Returns the word's representations in vector space, as a 1D numpy array.
-
-        The word can be out-of-vocabulary as long as ngrams for the word are present.
-        For words with all ngrams absent, a KeyError is raised.
-
-        Example:
-
-        .. sourcecode:: pycon
-
-            >>> trained_model['office']
-            array([ -1.40128313e-02, ...])
-
-        """
-        if word in self.vocab:
-            return super(FastTextKeyedVectors, self).word_vec(word, use_norm)
-        else:
-            word_vec = np.zeros(self.syn0_ngrams.shape[1], dtype=np.float32)
-            ngrams = compute_ngrams(word, self.min_n, self.max_n)
-            ngrams = [ng for ng in ngrams if ng in self.ngrams]
-            if use_norm:
-                ngram_weights = self.syn0_ngrams_norm
-            else:
-                ngram_weights = self.syn0_ngrams
-            for ngram in ngrams:
-                word_vec += ngram_weights[self.ngrams[ngram]]
-            if word_vec.any():
-                return word_vec / len(ngrams)
-            else:  # No ngrams of the word are present in self.ngrams
-                raise KeyError('all ngrams for word %s absent from model' % word)
-
-    def init_sims(self, replace=False):
-        """
-        Precompute L2-normalized vectors.
-
-        If `replace` is set, forget the original vectors and only keep the normalized
-        ones = saves lots of memory!
-
-        Note that you **cannot continue training** after doing a replace. The model becomes
-        effectively read-only = you can only call `most_similar`, `similarity` etc.
-
-        """
-        super(FastTextKeyedVectors, self).init_sims(replace)
-        if getattr(self, 'syn0_ngrams_norm', None) is None or replace:
-            logger.info("precomputing L2-norms of ngram weight vectors")
-            if replace:
-                for i in range(self.syn0_ngrams.shape[0]):
-                    self.syn0_ngrams[i, :] /= sqrt((self.syn0_ngrams[i, :] ** 2).sum(-1))
-                self.syn0_ngrams_norm = self.syn0_ngrams
-            else:
-                self.syn0_ngrams_norm = \
-                    (self.syn0_ngrams / sqrt((self.syn0_ngrams ** 2).sum(-1))[..., newaxis]).astype(REAL)
-
-    def __contains__(self, word):
-        """
-        Check if `word` or any character ngrams in `word` are present in the vocabulary.
-        A vector for the word is guaranteed to exist if `__contains__` returns True.
-        """
-        if word in self.vocab:
-            return True
-        else:
-            char_ngrams = compute_ngrams(word, self.min_n, self.max_n)
-            return any(ng in self.ngrams for ng in char_ngrams)
-
-    @classmethod
-    def load_word2vec_format(cls, *args, **kwargs):
-        """Not suppported. Use gensim.models.KeyedVectors.load_word2vec_format instead."""
-        raise NotImplementedError("Not supported. Use gensim.models.KeyedVectors.load_word2vec_format instead.")
-
-
-class FastText(Word2Vec):
-    """
-    Class for word vector training using FastText. Communication between FastText and Python
-    takes place by working with data files on disk and calling the FastText binary with
-    subprocess.call().
-    Implements functionality similar to [fasttext.py](https://github.com/salestock/fastText.py),
-    improving speed and scope of functionality like `most_similar`, `similarity` by extracting vectors
-    into numpy matrix.
-
-    Warnings
-    --------
-    .. deprecated:: 3.2.0
-       Use :class:`gensim.models.fasttext.FastText` instead of :class:`gensim.models.wrappers.fasttext.FastText`.
-
-
-    """
-
-    def initialize_word_vectors(self):
-        self.wv = FastTextKeyedVectors()
-
-    @classmethod
-    def train(cls, ft_path, corpus_file, output_file=None, model='cbow', size=100, alpha=0.025, window=5, min_count=5,
-              word_ngrams=1, loss='ns', sample=1e-3, negative=5, iter=5, min_n=3, max_n=6, sorted_vocab=1, threads=12):
-        """
-        `ft_path` is the path to the FastText executable, e.g. `/home/kofola/fastText/fasttext`.
-
-        `corpus_file` is the filename of the text file to be used for training the FastText model.
-        Expects file to contain utf-8 encoded text.
-
-        `model` defines the training algorithm. By default, cbow is used. Accepted values are
-        'cbow', 'skipgram'.
-
-        `size` is the dimensionality of the feature vectors.
-
-        `window` is the maximum distance between the current and predicted word within a sentence.
-
-        `alpha` is the initial learning rate.
-
-        `min_count` = ignore all words with total occurrences lower than this.
-
-        `word_ngram` = max length of word ngram
-
-        `loss` = defines training objective. Allowed values are `hs` (hierarchical softmax),
-        `ns` (negative sampling) and `softmax`. Defaults to `ns`
-
-        `sample` = threshold for configuring which higher-frequency words are randomly downsampled;
-            default is 1e-3, useful range is (0, 1e-5).
-
-        `negative` = the value for negative specifies how many "noise words" should be drawn
-        (usually between 5-20). Default is 5. If set to 0, no negative samping is used.
-        Only relevant when `loss` is set to `ns`
-
-        `iter` = number of iterations (epochs) over the corpus. Default is 5.
-
-        `min_n` = min length of char ngrams to be used for training word representations. Default is 3.
-
-        `max_n` = max length of char ngrams to be used for training word representations. Set `max_n` to be
-        lesser than `min_n` to avoid char ngrams being used. Default is 6.
-
-        `sorted_vocab` = if 1 (default), sort the vocabulary by descending frequency before
-        assigning word indexes.
-
-        `threads` = number of threads to use. Default is 12.
-
-        """
-        ft_path = ft_path
-        output_file = output_file or os.path.join(tempfile.gettempdir(), 'ft_model')
-        ft_args = {
-            'input': corpus_file,
-            'output': output_file,
-            'lr': alpha,
-            'dim': size,
-            'ws': window,
-            'epoch': iter,
-            'minCount': min_count,
-            'wordNgrams': word_ngrams,
-            'neg': negative,
-            'loss': loss,
-            'minn': min_n,
-            'maxn': max_n,
-            'thread': threads,
-            't': sample
-        }
-        cmd = [ft_path, model]
-        for option, value in ft_args.items():
-            cmd.append("-%s" % option)
-            cmd.append(str(value))
-
-        utils.check_output(args=cmd)
-        model = cls.load_fasttext_format(output_file)
-        cls.delete_training_files(output_file)
-        return model
-
-    def save(self, *args, **kwargs):
-        # don't bother storing the cached normalized vectors
-        kwargs['ignore'] = kwargs.get('ignore', ['syn0norm', 'syn0_vocab_norm', 'syn0_ngrams_norm'])
-        super(FastText, self).save(*args, **kwargs)
-
-    @classmethod
-    def load_fasttext_format(cls, model_file, encoding='utf8'):
-        """
-        Load the input-hidden weight matrix from the fast text output files.
-
-        Note that due to limitations in the FastText API, you cannot continue training
-        with a model loaded this way, though you can query for word similarity etc.
-
-        `model_file` is the path to the FastText output files.
-        FastText outputs two model files - `/path/to/model.vec` and `/path/to/model.bin`
-
-        Expected value for this example: `/path/to/model` or `/path/to/model.bin`,
-        as gensim requires only `.bin` file to load entire fastText model.
-
-        """
-        model = cls()
-        if not model_file.endswith('.bin'):
-            model_file += '.bin'
-        model.file_name = model_file
-        model.load_binary_data(encoding=encoding)
-        return model
-
-    @classmethod
-    def load(cls, *args, **kwargs):
-        model = super(FastText, cls).load(*args, **kwargs)
-        if hasattr(model.wv, 'syn0_all'):
-            setattr(model.wv, 'syn0_ngrams', model.wv.syn0_all)
-            delattr(model.wv, 'syn0_all')
-        return model
-
-    @classmethod
-    def delete_training_files(cls, model_file):
-        """Deletes the files created by FastText training"""
-        try:
-            os.remove('%s.vec' % model_file)
-            os.remove('%s.bin' % model_file)
-        except FileNotFoundError:
-            logger.debug('Training files %s not found when attempting to delete', model_file)
-            pass
-
-    def load_binary_data(self, encoding='utf8'):
-        """Loads data from the output binary file created by FastText training"""
-
-        # TODO use smart_open again when https://github.com/RaRe-Technologies/smart_open/issues/207 will be fixed
-        with open(self.file_name, 'rb') as f:
-            self.load_model_params(f)
-            self.load_dict(f, encoding=encoding)
-            self.load_vectors(f)
-
-    def load_model_params(self, file_handle):
-        magic, version = self.struct_unpack(file_handle, '@2i')
-        if magic == FASTTEXT_FILEFORMAT_MAGIC:  # newer format
-            self.new_format = True
-            dim, ws, epoch, min_count, neg, _, loss, model, bucket, minn, maxn, _, t = \
-                self.struct_unpack(file_handle, '@12i1d')
-        else:  # older format
-            self.new_format = False
-            dim = magic
-            ws = version
-            epoch, min_count, neg, _, loss, model, bucket, minn, maxn, _, t = self.struct_unpack(file_handle, '@10i1d')
-        # Parameters stored by [Args::save](https://github.com/facebookresearch/fastText/blob/master/src/args.cc)
-        self.vector_size = dim
-        self.window = ws
-        self.iter = epoch
-        self.min_count = min_count
-        self.negative = neg
-        self.hs = loss == 1
-        self.sg = model == 2
-        self.bucket = bucket
-        self.wv.min_n = minn
-        self.wv.max_n = maxn
-        self.sample = t
-
-    def load_dict(self, file_handle, encoding='utf8'):
-        vocab_size, nwords, nlabels = self.struct_unpack(file_handle, '@3i')
-        # Vocab stored by [Dictionary::save](https://github.com/facebookresearch/fastText/blob/master/src/dictionary.cc)
-        if nlabels > 0:
-            raise NotImplementedError("Supervised fastText models are not supported")
-        logger.info("loading %s words for fastText model from %s", vocab_size, self.file_name)
-
-        self.struct_unpack(file_handle, '@1q')  # number of tokens
-        if self.new_format:
-            pruneidx_size, = self.struct_unpack(file_handle, '@q')
-        for i in range(vocab_size):
-            word_bytes = b''
-            char_byte = file_handle.read(1)
-            # Read vocab word
-            while char_byte != b'\x00':
-                word_bytes += char_byte
-                char_byte = file_handle.read(1)
-            word = word_bytes.decode(encoding)
-            count, _ = self.struct_unpack(file_handle, '@qb')
-
-            self.wv.vocab[word] = Vocab(index=i, count=count)
-            self.wv.index2word.append(word)
-
-        assert len(self.wv.vocab) == nwords, (
-            'mismatch between final vocab size ({} words), '
-            'and expected number of words ({} words)'.format(len(self.wv.vocab), nwords))
-        if len(self.wv.vocab) != vocab_size:
-            # expecting to log this warning only for pretrained french vector, wiki.fr
-            logger.warning(
-                "mismatch between final vocab size (%s words), and expected vocab size (%s words)",
-                len(self.wv.vocab), vocab_size
-            )
-
-        if self.new_format:
-            for j in range(pruneidx_size):
-                self.struct_unpack(file_handle, '@2i')
-
-    def load_vectors(self, file_handle):
-        if self.new_format:
-            self.struct_unpack(file_handle, '@?')  # bool quant_input in fasttext.cc
-        num_vectors, dim = self.struct_unpack(file_handle, '@2q')
-        # Vectors stored by [Matrix::save](https://github.com/facebookresearch/fastText/blob/master/src/matrix.cc)
-        assert self.vector_size == dim, (
-            'mismatch between vector size in model params ({}) and model vectors ({})'
-            .format(self.vector_size, dim)
-        )
-        float_size = struct.calcsize('@f')
-        if float_size == 4:
-            dtype = np.dtype(np.float32)
-        elif float_size == 8:
-            dtype = np.dtype(np.float64)
-
-        self.num_original_vectors = num_vectors
-        self.wv.syn0_ngrams = np.fromfile(file_handle, dtype=dtype, count=num_vectors * dim)
-        self.wv.syn0_ngrams = self.wv.syn0_ngrams.reshape((num_vectors, dim))
-        assert self.wv.syn0_ngrams.shape == (self.bucket + len(self.wv.vocab), self.vector_size), \
-            'mismatch between actual weight matrix shape {} and expected shape {}'\
-            .format(
-                self.wv.syn0_ngrams.shape, (self.bucket + len(self.wv.vocab), self.vector_size)
-            )
-
-        self.init_ngrams()
-
-    def struct_unpack(self, file_handle, fmt):
-        num_bytes = struct.calcsize(fmt)
-        return struct.unpack(fmt, file_handle.read(num_bytes))
-
-    def init_ngrams(self):
-        """
-        Computes ngrams of all words present in vocabulary and stores vectors for only those ngrams.
-        Vectors for other ngrams are initialized with a random uniform distribution in FastText. These
-        vectors are discarded here to save space.
-
-        """
-        self.wv.ngrams = {}
-        all_ngrams = []
-        self.wv.syn0 = np.zeros((len(self.wv.vocab), self.vector_size), dtype=REAL)
-
-        for w, vocab in self.wv.vocab.items():
-            all_ngrams += compute_ngrams(w, self.wv.min_n, self.wv.max_n)
-            self.wv.syn0[vocab.index] += np.array(self.wv.syn0_ngrams[vocab.index])
-
-        all_ngrams = set(all_ngrams)
-        self.num_ngram_vectors = len(all_ngrams)
-        ngram_indices = []
-        for i, ngram in enumerate(all_ngrams):
-            ngram_hash = ft_hash(ngram)
-            ngram_indices.append(len(self.wv.vocab) + ngram_hash % self.bucket)
-            self.wv.ngrams[ngram] = i
-        self.wv.syn0_ngrams = self.wv.syn0_ngrams.take(ngram_indices, axis=0)
-
-        ngram_weights = self.wv.syn0_ngrams
-
-        logger.info(
-            "loading weights for %s words for fastText model from %s",
-            len(self.wv.vocab), self.file_name
-        )
-
-        for w, vocab in self.wv.vocab.items():
-            word_ngrams = compute_ngrams(w, self.wv.min_n, self.wv.max_n)
-            for word_ngram in word_ngrams:
-                self.wv.syn0[vocab.index] += np.array(ngram_weights[self.wv.ngrams[word_ngram]])
-
-            self.wv.syn0[vocab.index] /= (len(word_ngrams) + 1)
-        logger.info(
-            "loaded %s weight matrix for fastText model from %s",
-            self.wv.syn0.shape, self.file_name
-        )
-
-
-def compute_ngrams(word, min_n, max_n):
-    BOW, EOW = ('<', '>')  # Used by FastText to attach to all words as prefix and suffix
-    extended_word = BOW + word + EOW
-    ngrams = []
-    for ngram_length in range(min_n, min(len(extended_word), max_n) + 1):
-        for i in range(0, len(extended_word) - ngram_length + 1):
-            ngrams.append(extended_word[i:i + ngram_length])
-    return ngrams
-
-
-def ft_hash(string):
-    """
-    Reproduces [hash method](https://github.com/facebookresearch/fastText/blob/master/src/dictionary.cc)
-    used in fastText.
-
-    """
-    # Runtime warnings for integer overflow are raised, this is expected behaviour. These warnings are suppressed.
-    old_settings = np.seterr(all='ignore')
-    h = np.uint32(2166136261)
-    for c in string:
-        h = h ^ np.uint32(ord(c))
-        h = h * np.uint32(16777619)
-    np.seterr(**old_settings)
-    return h
diff --git a/gensim/models/deprecated/keyedvectors.py b/gensim/models/deprecated/keyedvectors.py
deleted file mode 100644
index a8983909d0..0000000000
--- a/gensim/models/deprecated/keyedvectors.py
+++ /dev/null
@@ -1,1115 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (C) 2016 Radim Rehurek <me@radimrehurek.com>
-# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html
-
-"""
-Warnings
---------
-.. deprecated:: 3.3.0
-   Use :mod:`gensim.models.keyedvectors` instead.
-
-
-Word vector storage and similarity look-ups.
-Common code independent of the way the vectors are trained(Word2Vec, FastText, WordRank, VarEmbed etc)
-
-The word vectors are considered read-only in this class.
-
-Initialize the vectors by training e.g. Word2Vec:
-
-.. sourcecode:: pycon
-
-    >>> model = Word2Vec(sentences, size=100, window=5, min_count=5, workers=4)
-    >>> word_vectors = model.wv
-
-Persist the word vectors to disk with:
-
-.. sourcecode:: pycon
-
-    >>> word_vectors.save(fname)
-    >>> word_vectors = KeyedVectors.load(fname)
-
-The vectors can also be instantiated from an existing file on disk
-in the original Google's word2vec C format as a KeyedVectors instance:
-
-.. sourcecode:: pycon
-
-    >>> from gensim.models.keyedvectors import KeyedVectors
-    >>> word_vectors = KeyedVectors.load_word2vec_format('/tmp/vectors.txt', binary=False)  # C text format
-    >>> word_vectors = KeyedVectors.load_word2vec_format('/tmp/vectors.bin', binary=True)  # C binary format
-
-You can perform various syntactic/semantic NLP word tasks with the vectors. Some of them
-are already built-in:
-
-.. sourcecode:: pycon
-
-    >>> word_vectors.most_similar(positive=['woman', 'king'], negative=['man'])
-    [('queen', 0.50882536), ...]
-
-    >>> word_vectors.most_similar_cosmul(positive=['woman', 'king'], negative=['man'])
-    [('queen', 0.71382287), ...]
-
-    >>> word_vectors.doesnt_match("breakfast cereal dinner lunch".split())
-    'cereal'
-
-    >>> word_vectors.similarity('woman', 'man')
-    0.73723527
-
-Correlation with human opinion on word similarity:
-
-.. sourcecode:: pycon
-
-    >>> word_vectors.evaluate_word_pairs(os.path.join(module_path, 'test_data','wordsim353.tsv'))
-    0.51, 0.62, 0.13
-
-And on analogies:
-
-.. sourcecode:: pycon
-
-    >>> word_vectors.accuracy(os.path.join(module_path, 'test_data', 'questions-words.txt'))
-
-and so on.
-
-"""
-from __future__ import division  # py3 "true division"
-
-import logging
-
-try:
-    from queue import Queue, Empty
-except ImportError:
-    from Queue import Queue, Empty  # noqa:F401
-
-# If pyemd C extension is available, import it.
-# If pyemd is attempted to be used, but isn't installed, ImportError will be raised in wmdistance
-try:
-    from pyemd import emd
-    PYEMD_EXT = True
-except (ImportError, ValueError):
-    PYEMD_EXT = False
-
-from numpy import dot, zeros, dtype, float32 as REAL,\
-    double, array, vstack, fromstring, sqrt, newaxis,\
-    ndarray, sum as np_sum, prod, ascontiguousarray,\
-    argmax
-import numpy as np
-
-from gensim import utils, matutils  # utility fnc for pickling, common scipy operations etc
-from gensim.corpora.dictionary import Dictionary
-from six import string_types, iteritems
-from six.moves import range
-from scipy import stats
-
-
-logger = logging.getLogger(__name__)
-
-
-class Vocab(object):
-    """
-    A single vocabulary item, used internally for collecting per-word frequency/sampling info,
-    and for constructing binary trees (incl. both word leaves and inner nodes).
-
-    """
-
-    def __init__(self, **kwargs):
-        self.count = 0
-        self.__dict__.update(kwargs)
-
-    def __lt__(self, other):  # used for sorting in a priority queue
-        return self.count < other.count
-
-    def __str__(self):
-        vals = ['%s:%r' % (key, self.__dict__[key]) for key in sorted(self.__dict__) if not key.startswith('_')]
-        return "%s(%s)" % (self.__class__.__name__, ', '.join(vals))
-
-
-class KeyedVectorsBase(utils.SaveLoad):
-    """
-    Base class to contain vectors and vocab for any set of vectors which are each associated with a key.
-
-    """
-
-    def __init__(self):
-        self.syn0 = []
-        self.vocab = {}
-        self.index2word = []
-        self.vector_size = None
-
-    def save_word2vec_format(self, fname, fvocab=None, binary=False, total_vec=None):
-        """
-        Store the input-hidden weight matrix in the same format used by the original
-        C word2vec-tool, for compatibility.
-
-         `fname` is the file used to save the vectors in
-         `fvocab` is an optional file used to save the vocabulary
-         `binary` is an optional boolean indicating whether the data is to be saved
-         in binary word2vec format (default: False)
-         `total_vec` is an optional parameter to explicitly specify total no. of vectors
-         (in case word vectors are appended with document vectors afterwards)
-
-        """
-        if total_vec is None:
-            total_vec = len(self.vocab)
-        vector_size = self.syn0.shape[1]
-        if fvocab is not None:
-            logger.info("storing vocabulary in %s", fvocab)
-            with utils.open(fvocab, 'wb') as vout:
-                for word, vocab in sorted(iteritems(self.vocab), key=lambda item: -item[1].count):
-                    vout.write(utils.to_utf8("%s %s\n" % (word, vocab.count)))
-        logger.info("storing %sx%s projection weights into %s", total_vec, vector_size, fname)
-        assert (len(self.vocab), vector_size) == self.syn0.shape
-        with utils.open(fname, 'wb') as fout:
-            fout.write(utils.to_utf8("%s %s\n" % (total_vec, vector_size)))
-            # store in sorted order: most frequent words at the top
-            for word, vocab in sorted(iteritems(self.vocab), key=lambda item: -item[1].count):
-                row = self.syn0[vocab.index]
-                if binary:
-                    fout.write(utils.to_utf8(word) + b" " + row.tostring())
-                else:
-                    fout.write(utils.to_utf8("%s %s\n" % (word, ' '.join("%f" % val for val in row))))
-
-    @classmethod
-    def load_word2vec_format(cls, fname, fvocab=None, binary=False, encoding='utf8', unicode_errors='strict',
-                             limit=None, datatype=REAL):
-        """
-        Load the input-hidden weight matrix from the original C word2vec-tool format.
-
-        Note that the information stored in the file is incomplete (the binary tree is missing),
-        so while you can query for word similarity etc., you cannot continue training
-        with a model loaded this way.
-
-        `binary` is a boolean indicating whether the data is in binary word2vec format.
-        `norm_only` is a boolean indicating whether to only store normalised word2vec vectors in memory.
-        Word counts are read from `fvocab` filename, if set (this is the file generated
-        by `-save-vocab` flag of the original C tool).
-
-        If you trained the C model using non-utf8 encoding for words, specify that
-        encoding in `encoding`.
-
-        `unicode_errors`, default 'strict', is a string suitable to be passed as the `errors`
-        argument to the unicode() (Python 2.x) or str() (Python 3.x) function. If your source
-        file may include word tokens truncated in the middle of a multibyte unicode character
-        (as is common from the original word2vec.c tool), 'ignore' or 'replace' may help.
-
-        `limit` sets a maximum number of word-vectors to read from the file. The default,
-        None, means read all.
-
-        `datatype` (experimental) can coerce dimensions to a non-default float type (such
-        as np.float16) to save memory. (Such types may result in much slower bulk operations
-        or incompatibility with optimized routines.)
-
-        """
-        counts = None
-        if fvocab is not None:
-            logger.info("loading word counts from %s", fvocab)
-            counts = {}
-            with utils.open(fvocab, 'rb') as fin:
-                for line in fin:
-                    word, count = utils.to_unicode(line).strip().split()
-                    counts[word] = int(count)
-
-        logger.info("loading projection weights from %s", fname)
-        with utils.open(fname, 'rb') as fin:
-            header = utils.to_unicode(fin.readline(), encoding=encoding)
-            vocab_size, vector_size = (int(x) for x in header.split())  # throws for invalid file format
-            if limit:
-                vocab_size = min(vocab_size, limit)
-            result = cls()
-            result.vector_size = vector_size
-            result.syn0 = zeros((vocab_size, vector_size), dtype=datatype)
-
-            def add_word(word, weights):
-                word_id = len(result.vocab)
-                if word in result.vocab:
-                    logger.warning("duplicate word '%s' in %s, ignoring all but first", word, fname)
-                    return
-                if counts is None:
-                    # most common scenario: no vocab file given. just make up some bogus counts, in descending order
-                    result.vocab[word] = Vocab(index=word_id, count=vocab_size - word_id)
-                elif word in counts:
-                    # use count from the vocab file
-                    result.vocab[word] = Vocab(index=word_id, count=counts[word])
-                else:
-                    # vocab file given, but word is missing -- set count to None (TODO: or raise?)
-                    logger.warning("vocabulary file is incomplete: '%s' is missing", word)
-                    result.vocab[word] = Vocab(index=word_id, count=None)
-                result.syn0[word_id] = weights
-                result.index2word.append(word)
-
-            if binary:
-                binary_len = dtype(REAL).itemsize * vector_size
-                for _ in range(vocab_size):
-                    # mixed text and binary: read text first, then binary
-                    word = []
-                    while True:
-                        ch = fin.read(1)
-                        if ch == b' ':
-                            break
-                        if ch == b'':
-                            raise EOFError("unexpected end of input; is count incorrect or file otherwise damaged?")
-                        if ch != b'\n':  # ignore newlines in front of words (some binary files have)
-                            word.append(ch)
-                    word = utils.to_unicode(b''.join(word), encoding=encoding, errors=unicode_errors)
-                    weights = fromstring(fin.read(binary_len), dtype=REAL)
-                    add_word(word, weights)
-            else:
-                for line_no in range(vocab_size):
-                    line = fin.readline()
-                    if line == b'':
-                        raise EOFError("unexpected end of input; is count incorrect or file otherwise damaged?")
-                    parts = utils.to_unicode(line.rstrip(), encoding=encoding, errors=unicode_errors).split(" ")
-                    if len(parts) != vector_size + 1:
-                        raise ValueError("invalid vector on line %s (is this really the text format?)" % line_no)
-                    word, weights = parts[0], [REAL(x) for x in parts[1:]]
-                    add_word(word, weights)
-        if result.syn0.shape[0] != len(result.vocab):
-            logger.info(
-                "duplicate words detected, shrinking matrix size from %i to %i",
-                result.syn0.shape[0], len(result.vocab)
-            )
-            result.syn0 = ascontiguousarray(result.syn0[: len(result.vocab)])
-        assert (len(result.vocab), vector_size) == result.syn0.shape
-
-        logger.info("loaded %s matrix from %s", result.syn0.shape, fname)
-        return result
-
-    def similarity(self, w1, w2):
-        """
-        Compute similarity between vectors of two input words.
-        To be implemented by child class.
-
-        """
-        raise NotImplementedError
-
-    def distance(self, w1, w2):
-        """
-        Compute distance between vectors of two input words.
-        To be implemented by child class.
-
-        """
-        raise NotImplementedError
-
-    def distances(self, word_or_vector, other_words=()):
-        """
-        Compute distances from given word or vector to all words in `other_words`.
-        If `other_words` is empty, return distance between `word_or_vectors` and all words in vocab.
-        To be implemented by child class.
-
-        """
-        raise NotImplementedError
-
-    def word_vec(self, word):
-        """
-        Accept a single word as input.
-        Returns the word's representations in vector space, as a 1D numpy array.
-
-        Example:
-
-        .. sourcecode:: pycon
-
-            >>> trained_model.word_vec('office')
-            array([ -1.40128313e-02, ...])
-
-        """
-        if word in self.vocab:
-            result = self.syn0[self.vocab[word].index]
-            result.setflags(write=False)
-            return result
-        else:
-            raise KeyError("word '%s' not in vocabulary" % word)
-
-    def __getitem__(self, words):
-        """
-        Accept a single word or a list of words as input.
-
-        If a single word: returns the word's representations in vector space, as
-        a 1D numpy array.
-
-        Multiple words: return the words' representations in vector space, as a
-        2d numpy array: #words x #vector_size. Matrix rows are in the same order
-        as in input.
-
-        Example:
-
-        .. sourcecode:: pycon
-
-            >>> trained_model['office']
-            array([ -1.40128313e-02, ...])
-
-            >>> trained_model[['office', 'products']]
-            array([ -1.40128313e-02, ...]
-                  [ -1.70425311e-03, ...]
-                   ...)
-
-        """
-        if isinstance(words, string_types):
-            # allow calls like trained_model['office'], as a shorthand for trained_model[['office']]
-            return self.word_vec(words)
-
-        return vstack([self.word_vec(word) for word in words])
-
-    def __contains__(self, word):
-        return word in self.vocab
-
-    def most_similar_to_given(self, w1, word_list):
-        """Return the word from word_list most similar to w1.
-
-        Args:
-            w1 (str): a word
-            word_list (list): list of words containing a word most similar to w1
-
-        Returns:
-            the word in word_list with the highest similarity to w1
-
-        Raises:
-            KeyError: If w1 or any word in word_list is not in the vocabulary
-
-        Example:
-
-        .. sourcecode:: pycon
-
-            >>> trained_model.most_similar_to_given('music', ['water', 'sound', 'backpack', 'mouse'])
-            'sound'
-
-            >>> trained_model.most_similar_to_given('snake', ['food', 'pencil', 'animal', 'phone'])
-            'animal'
-
-        """
-        return word_list[argmax([self.similarity(w1, word) for word in word_list])]
-
-    def words_closer_than(self, w1, w2):
-        """
-        Returns all words that are closer to `w1` than `w2` is to `w1`.
-
-        Parameters
-        ----------
-        w1 : str
-            Input word.
-        w2 : str
-            Input word.
-
-        Returns
-        -------
-        list (str)
-            List of words that are closer to `w1` than `w2` is to `w1`.
-
-        Examples
-        --------
-
-        .. sourcecode:: pycon
-
-            >>> model.words_closer_than('carnivore.n.01', 'mammal.n.01')
-            ['dog.n.01', 'canine.n.02']
-
-        """
-        all_distances = self.distances(w1)
-        w1_index = self.vocab[w1].index
-        w2_index = self.vocab[w2].index
-        closer_node_indices = np.where(all_distances < all_distances[w2_index])[0]
-        return [self.index2word[index] for index in closer_node_indices if index != w1_index]
-
-    def rank(self, w1, w2):
-        """
-        Rank of the distance of `w2` from `w1`, in relation to distances of all words from `w1`.
-
-        Parameters
-        ----------
-        w1 : str
-            Input word.
-        w2 : str
-            Input word.
-
-        Returns
-        -------
-        int
-            Rank of `w2` from `w1` in relation to all other nodes.
-
-        Examples
-        --------
-
-        .. sourcecode:: pycon
-
-            >>> model.rank('mammal.n.01', 'carnivore.n.01')
-            3
-
-        """
-        return len(self.words_closer_than(w1, w2)) + 1
-
-
-class EuclideanKeyedVectors(KeyedVectorsBase):
-    """
-    Class to contain vectors and vocab for the Word2Vec training class and other w2v methods not directly
-    involved in training such as most_similar()
-    """
-
-    def __init__(self):
-        super(EuclideanKeyedVectors, self).__init__()
-        self.syn0norm = None
-
-    @property
-    def wv(self):
-        return self
-
-    def save(self, *args, **kwargs):
-        # don't bother storing the cached normalized vectors
-        kwargs['ignore'] = kwargs.get('ignore', ['syn0norm'])
-        super(EuclideanKeyedVectors, self).save(*args, **kwargs)
-
-    def word_vec(self, word, use_norm=False):
-        """
-        Accept a single word as input.
-        Returns the word's representations in vector space, as a 1D numpy array.
-
-        If `use_norm` is True, returns the normalized word vector.
-
-        Example:
-
-        .. sourcecode:: pycon
-
-            >>> trained_model['office']
-            array([ -1.40128313e-02, ...])
-
-        """
-        if word in self.vocab:
-            if use_norm:
-                result = self.syn0norm[self.vocab[word].index]
-            else:
-                result = self.syn0[self.vocab[word].index]
-
-            result.setflags(write=False)
-            return result
-        else:
-            raise KeyError("word '%s' not in vocabulary" % word)
-
-    def most_similar(self, positive=None, negative=None, topn=10, restrict_vocab=None, indexer=None):
-        """
-        Find the top-N most similar words. Positive words contribute positively towards the
-        similarity, negative words negatively.
-
-        This method computes cosine similarity between a simple mean of the projection
-        weight vectors of the given words and the vectors for each word in the model.
-        The method corresponds to the `word-analogy` and `distance` scripts in the original
-        word2vec implementation.
-
-        If topn is False, most_similar returns the vector of similarity scores.
-
-        `restrict_vocab` is an optional integer which limits the range of vectors which
-        are searched for most-similar values. For example, restrict_vocab=10000 would
-        only check the first 10000 word vectors in the vocabulary order. (This may be
-        meaningful if you've sorted the vocabulary by descending frequency.)
-
-        Example:
-
-        .. sourcecode:: pycon
-
-            >>> trained_model.most_similar(positive=['woman', 'king'], negative=['man'])
-            [('queen', 0.50882536), ...]
-
-        """
-        if positive is None:
-            positive = []
-        if negative is None:
-            negative = []
-
-        self.init_sims()
-
-        if isinstance(positive, string_types) and not negative:
-            # allow calls like most_similar('dog'), as a shorthand for most_similar(['dog'])
-            positive = [positive]
-
-        # add weights for each word, if not already present; default to 1.0 for positive and -1.0 for negative words
-        positive = [
-            (word, 1.0) if isinstance(word, string_types + (ndarray,)) else word
-            for word in positive
-        ]
-        negative = [
-            (word, -1.0) if isinstance(word, string_types + (ndarray,)) else word
-            for word in negative
-        ]
-
-        # compute the weighted average of all words
-        all_words, mean = set(), []
-        for word, weight in positive + negative:
-            if isinstance(word, ndarray):
-                mean.append(weight * word)
-            else:
-                mean.append(weight * self.word_vec(word, use_norm=True))
-                if word in self.vocab:
-                    all_words.add(self.vocab[word].index)
-        if not mean:
-            raise ValueError("cannot compute similarity with no input")
-        mean = matutils.unitvec(array(mean).mean(axis=0)).astype(REAL)
-
-        if indexer is not None:
-            return indexer.most_similar(mean, topn)
-
-        limited = self.syn0norm if restrict_vocab is None else self.syn0norm[:restrict_vocab]
-        dists = dot(limited, mean)
-        if not topn:
-            return dists
-        best = matutils.argsort(dists, topn=topn + len(all_words), reverse=True)
-        # ignore (don't return) words from the input
-        result = [(self.index2word[sim], float(dists[sim])) for sim in best if sim not in all_words]
-        return result[:topn]
-
-    def similar_by_word(self, word, topn=10, restrict_vocab=None):
-        """
-        Find the top-N most similar words.
-
-        If topn is False, similar_by_word returns the vector of similarity scores.
-
-        `restrict_vocab` is an optional integer which limits the range of vectors which
-        are searched for most-similar values. For example, restrict_vocab=10000 would
-        only check the first 10000 word vectors in the vocabulary order. (This may be
-        meaningful if you've sorted the vocabulary by descending frequency.)
-
-        Example:
-
-        .. sourcecode:: pycon
-
-            >>> trained_model.similar_by_word('graph')
-            [('user', 0.9999163150787354), ...]
-
-        """
-        return self.most_similar(positive=[word], topn=topn, restrict_vocab=restrict_vocab)
-
-    def similar_by_vector(self, vector, topn=10, restrict_vocab=None):
-        """
-        Find the top-N most similar words by vector.
-
-        If topn is False, similar_by_vector returns the vector of similarity scores.
-
-        `restrict_vocab` is an optional integer which limits the range of vectors which
-        are searched for most-similar values. For example, restrict_vocab=10000 would
-        only check the first 10000 word vectors in the vocabulary order. (This may be
-        meaningful if you've sorted the vocabulary by descending frequency.)
-
-        Example::
-
-          >>> trained_model.similar_by_vector([1,2])
-          [('survey', 0.9942699074745178), ...]
-
-        """
-        return self.most_similar(positive=[vector], topn=topn, restrict_vocab=restrict_vocab)
-
-    def wmdistance(self, document1, document2):
-        """
-        Compute the Word Mover's Distance between two documents. When using this
-        code, please consider citing the following papers:
-
-        .. Ofir Pele and Michael Werman, "A linear time histogram metric for improved SIFT matching".
-        .. Ofir Pele and Michael Werman, "Fast and robust earth mover's distances".
-        .. Matt Kusner et al. "From Word Embeddings To Document Distances".
-
-        Note that if one of the documents have no words that exist in the
-        Word2Vec vocab, `float('inf')` (i.e. infinity) will be returned.
-
-        This method only works if `pyemd` is installed (can be installed via pip, but requires a C compiler).
-
-        Example:
-
-        .. sourcecode:: pycon
-
-            >>> # Train word2vec model.
-            >>> model = Word2Vec(sentences)
-
-            >>> # Some sentences to test.
-            >>> sentence_obama = 'Obama speaks to the media in Illinois'.lower().split()
-            >>> sentence_president = 'The president greets the press in Chicago'.lower().split()
-
-            >>> # Remove their stopwords.
-            >>> from nltk.corpus import stopwords
-            >>> stopwords = nltk.corpus.stopwords.words('english')
-            >>> sentence_obama = [w for w in sentence_obama if w not in stopwords]
-            >>> sentence_president = [w for w in sentence_president if w not in stopwords]
-
-            >>> # Compute WMD.
-            >>> distance = model.wmdistance(sentence_obama, sentence_president)
-        """
-
-        if not PYEMD_EXT:
-            raise ImportError("Please install pyemd Python package to compute WMD.")
-
-        # Remove out-of-vocabulary words.
-        len_pre_oov1 = len(document1)
-        len_pre_oov2 = len(document2)
-        document1 = [token for token in document1 if token in self]
-        document2 = [token for token in document2 if token in self]
-        diff1 = len_pre_oov1 - len(document1)
-        diff2 = len_pre_oov2 - len(document2)
-        if diff1 > 0 or diff2 > 0:
-            logger.info('Removed %d and %d OOV words from document 1 and 2 (respectively).', diff1, diff2)
-
-        if len(document1) == 0 or len(document2) == 0:
-            logger.info(
-                "At least one of the documents had no words that werein the vocabulary. "
-                "Aborting (returning inf)."
-            )
-            return float('inf')
-
-        dictionary = Dictionary(documents=[document1, document2])
-        vocab_len = len(dictionary)
-
-        if vocab_len == 1:
-            # Both documents are composed by a single unique token
-            return 0.0
-
-        # Sets for faster look-up.
-        docset1 = set(document1)
-        docset2 = set(document2)
-
-        # Compute distance matrix.
-        distance_matrix = zeros((vocab_len, vocab_len), dtype=double)
-        for i, t1 in dictionary.items():
-            for j, t2 in dictionary.items():
-                if t1 not in docset1 or t2 not in docset2:
-                    continue
-                # Compute Euclidean distance between word vectors.
-                distance_matrix[i, j] = sqrt(np_sum((self[t1] - self[t2])**2))
-
-        if np_sum(distance_matrix) == 0.0:
-            # `emd` gets stuck if the distance matrix contains only zeros.
-            logger.info('The distance matrix is all zeros. Aborting (returning inf).')
-            return float('inf')
-
-        def nbow(document):
-            d = zeros(vocab_len, dtype=double)
-            nbow = dictionary.doc2bow(document)  # Word frequencies.
-            doc_len = len(document)
-            for idx, freq in nbow:
-                d[idx] = freq / float(doc_len)  # Normalized word frequencies.
-            return d
-
-        # Compute nBOW representation of documents.
-        d1 = nbow(document1)
-        d2 = nbow(document2)
-
-        # Compute WMD.
-        return emd(d1, d2, distance_matrix)
-
-    def most_similar_cosmul(self, positive=None, negative=None, topn=10):
-        """
-        Find the top-N most similar words, using the multiplicative combination objective
-        proposed by Omer Levy and Yoav Goldberg in [4]_. Positive words still contribute
-        positively towards the similarity, negative words negatively, but with less
-        susceptibility to one large distance dominating the calculation.
-
-        In the common analogy-solving case, of two positive and one negative examples,
-        this method is equivalent to the "3CosMul" objective (equation (4)) of Levy and Goldberg.
-
-        Additional positive or negative examples contribute to the numerator or denominator,
-        respectively – a potentially sensible but untested extension of the method. (With
-        a single positive example, rankings will be the same as in the default most_similar.)
-
-        Example:
-
-        .. sourcecode:: pycon
-
-            >>> trained_model.most_similar_cosmul(positive=['baghdad', 'england'], negative=['london'])
-            [(u'iraq', 0.8488819003105164), ...]
-
-        .. [4] Omer Levy and Yoav Goldberg. Linguistic Regularities in Sparse and Explicit Word Representations, 2014.
-
-        """
-        if positive is None:
-            positive = []
-        if negative is None:
-            negative = []
-
-        self.init_sims()
-
-        if isinstance(positive, string_types) and not negative:
-            # allow calls like most_similar_cosmul('dog'), as a shorthand for most_similar_cosmul(['dog'])
-            positive = [positive]
-
-        all_words = {
-            self.vocab[word].index for word in positive + negative
-            if not isinstance(word, ndarray) and word in self.vocab
-            }
-
-        positive = [
-            self.word_vec(word, use_norm=True) if isinstance(word, string_types) else word
-            for word in positive
-        ]
-        negative = [
-            self.word_vec(word, use_norm=True) if isinstance(word, string_types) else word
-            for word in negative
-        ]
-
-        if not positive:
-            raise ValueError("cannot compute similarity with no input")
-
-        # equation (4) of Levy & Goldberg "Linguistic Regularities...",
-        # with distances shifted to [0,1] per footnote (7)
-        pos_dists = [((1 + dot(self.syn0norm, term)) / 2) for term in positive]
-        neg_dists = [((1 + dot(self.syn0norm, term)) / 2) for term in negative]
-        dists = prod(pos_dists, axis=0) / (prod(neg_dists, axis=0) + 0.000001)
-
-        if not topn:
-            return dists
-        best = matutils.argsort(dists, topn=topn + len(all_words), reverse=True)
-        # ignore (don't return) words from the input
-        result = [(self.index2word[sim], float(dists[sim])) for sim in best if sim not in all_words]
-        return result[:topn]
-
-    def doesnt_match(self, words):
-        """
-        Which word from the given list doesn't go with the others?
-
-        Example::
-
-          >>> trained_model.doesnt_match("breakfast cereal dinner lunch".split())
-          'cereal'
-
-        """
-        self.init_sims()
-
-        used_words = [word for word in words if word in self]
-        if len(used_words) != len(words):
-            ignored_words = set(words) - set(used_words)
-            logger.warning("vectors for words %s are not present in the model, ignoring these words", ignored_words)
-        if not used_words:
-            raise ValueError("cannot select a word from an empty list")
-        vectors = vstack(self.word_vec(word, use_norm=True) for word in used_words).astype(REAL)
-        mean = matutils.unitvec(vectors.mean(axis=0)).astype(REAL)
-        dists = dot(vectors, mean)
-        return sorted(zip(dists, used_words))[0][1]
-
-    @staticmethod
-    def cosine_similarities(vector_1, vectors_all):
-        """
-        Return cosine similarities between one vector and a set of other vectors.
-
-        Parameters
-        ----------
-        vector_1 : numpy.array
-            vector from which similarities are to be computed.
-            expected shape (dim,)
-        vectors_all : numpy.array
-            for each row in vectors_all, distance from vector_1 is computed.
-            expected shape (num_vectors, dim)
-
-        Returns
-        -------
-        numpy.array
-            Contains cosine distance between vector_1 and each row in vectors_all.
-            shape (num_vectors,)
-
-        """
-        norm = np.linalg.norm(vector_1)
-        all_norms = np.linalg.norm(vectors_all, axis=1)
-        dot_products = dot(vectors_all, vector_1)
-        similarities = dot_products / (norm * all_norms)
-        return similarities
-
-    def distances(self, word_or_vector, other_words=()):
-        """
-        Compute cosine distances from given word or vector to all words in `other_words`.
-        If `other_words` is empty, return distance between `word_or_vectors` and all words in vocab.
-
-        Parameters
-        ----------
-        word_or_vector : str or numpy.array
-            Word or vector from which distances are to be computed.
-
-        other_words : iterable(str) or None
-            For each word in `other_words` distance from `word_or_vector` is computed.
-            If None or empty, distance of `word_or_vector` from all words in vocab is computed (including itself).
-
-        Returns
-        -------
-        numpy.array
-            Array containing distances to all words in `other_words` from input `word_or_vector`,
-            in the same order as `other_words`.
-
-        Notes
-        -----
-        Raises KeyError if either `word_or_vector` or any word in `other_words` is absent from vocab.
-
-        """
-        if isinstance(word_or_vector, string_types):
-            input_vector = self.word_vec(word_or_vector)
-        else:
-            input_vector = word_or_vector
-        if not other_words:
-            other_vectors = self.syn0
-        else:
-            other_indices = [self.vocab[word].index for word in other_words]
-            other_vectors = self.syn0[other_indices]
-        return 1 - self.cosine_similarities(input_vector, other_vectors)
-
-    def distance(self, w1, w2):
-        """
-        Compute cosine distance between two words.
-
-        Example:
-
-        .. sourcecode:: pycon
-
-            >>> trained_model.distance('woman', 'man')
-            0.34
-
-            >>> trained_model.distance('woman', 'woman')
-            0.0
-
-        """
-        return 1 - self.similarity(w1, w2)
-
-    def similarity(self, w1, w2):
-        """
-        Compute cosine similarity between two words.
-
-        Example:
-
-        .. sourcecode:: pycon
-
-            >>> trained_model.similarity('woman', 'man')
-            0.73723527
-
-            >>> trained_model.similarity('woman', 'woman')
-            1.0
-
-        """
-        return dot(matutils.unitvec(self[w1]), matutils.unitvec(self[w2]))
-
-    def n_similarity(self, ws1, ws2):
-        """
-        Compute cosine similarity between two sets of words.
-
-        Example:
-
-        .. sourcecode:: pycon
-
-            >>> trained_model.n_similarity(['sushi', 'shop'], ['japanese', 'restaurant'])
-            0.61540466561049689
-
-            >>> trained_model.n_similarity(['restaurant', 'japanese'], ['japanese', 'restaurant'])
-            1.0000000000000004
-
-            >>> trained_model.n_similarity(['sushi'], ['restaurant']) == trained_model.similarity('sushi', 'restaurant')
-            True
-
-        """
-        if not(len(ws1) and len(ws2)):
-            raise ZeroDivisionError('At least one of the passed list is empty.')
-        v1 = [self[word] for word in ws1]
-        v2 = [self[word] for word in ws2]
-        return dot(matutils.unitvec(array(v1).mean(axis=0)), matutils.unitvec(array(v2).mean(axis=0)))
-
-    @staticmethod
-    def log_accuracy(section):
-        correct, incorrect = len(section['correct']), len(section['incorrect'])
-        if correct + incorrect > 0:
-            logger.info(
-                "%s: %.1f%% (%i/%i)",
-                section['section'], 100.0 * correct / (correct + incorrect), correct, correct + incorrect
-            )
-
-    def accuracy(self, questions, restrict_vocab=30000, most_similar=most_similar, case_insensitive=True):
-        """
-        Compute accuracy of the model. `questions` is a filename where lines are
-        4-tuples of words, split into sections by ": SECTION NAME" lines.
-        See questions-words.txt in
-        https://storage.googleapis.com/google-code-archive-source/v2/code.google.com/word2vec/source-archive.zip
-        for an example.
-
-        The accuracy is reported (=printed to log and returned as a list) for each
-        section separately, plus there's one aggregate summary at the end.
-
-        Use `restrict_vocab` to ignore all questions containing a word not in the first `restrict_vocab`
-        words (default 30,000). This may be meaningful if you've sorted the vocabulary by descending frequency.
-        In case `case_insensitive` is True, the first `restrict_vocab` words are taken first, and then
-        case normalization is performed.
-
-        Use `case_insensitive` to convert all words in questions and vocab to their uppercase form before
-        evaluating the accuracy (default True). Useful in case of case-mismatch between training tokens
-        and question words. In case of multiple case variants of a single word, the vector for the first
-        occurrence (also the most frequent if vocabulary is sorted) is taken.
-
-        This method corresponds to the `compute-accuracy` script of the original C word2vec.
-
-        """
-        ok_vocab = [(w, self.vocab[w]) for w in self.index2word[:restrict_vocab]]
-        ok_vocab = {w.upper(): v for w, v in reversed(ok_vocab)} if case_insensitive else dict(ok_vocab)
-
-        sections, section = [], None
-        with utils.open(questions, 'rb') as f:
-            for line_no, line in enumerate(f):
-                # TODO: use level3 BLAS (=evaluate multiple questions at once), for speed
-                line = utils.to_unicode(line)
-                if line.startswith(': '):
-                    # a new section starts => store the old section
-                    if section:
-                        sections.append(section)
-                        self.log_accuracy(section)
-                    section = {'section': line.lstrip(': ').strip(), 'correct': [], 'incorrect': []}
-                else:
-                    if not section:
-                        raise ValueError("missing section header before line #%i in %s" % (line_no, questions))
-                    try:
-                        if case_insensitive:
-                            a, b, c, expected = [word.upper() for word in line.split()]
-                        else:
-                            a, b, c, expected = [word for word in line.split()]
-                    except ValueError:
-                        logger.info("skipping invalid line #%i in %s", line_no, questions)
-                        continue
-                    if a not in ok_vocab or b not in ok_vocab or c not in ok_vocab or expected not in ok_vocab:
-                        logger.debug("skipping line #%i with OOV words: %s", line_no, line.strip())
-                        continue
-
-                    original_vocab = self.vocab
-                    self.vocab = ok_vocab
-                    ignore = {a, b, c}  # input words to be ignored
-                    predicted = None
-                    # find the most likely prediction, ignoring OOV words and input words
-                    sims = most_similar(self, positive=[b, c], negative=[a], topn=False, restrict_vocab=restrict_vocab)
-                    self.vocab = original_vocab
-                    for index in matutils.argsort(sims, reverse=True):
-                        predicted = self.index2word[index].upper() if case_insensitive else self.index2word[index]
-                        if predicted in ok_vocab and predicted not in ignore:
-                            if predicted != expected:
-                                logger.debug("%s: expected %s, predicted %s", line.strip(), expected, predicted)
-                            break
-                    if predicted == expected:
-                        section['correct'].append((a, b, c, expected))
-                    else:
-                        section['incorrect'].append((a, b, c, expected))
-        if section:
-            # store the last section, too
-            sections.append(section)
-            self.log_accuracy(section)
-
-        total = {
-            'section': 'total',
-            'correct': sum((s['correct'] for s in sections), []),
-            'incorrect': sum((s['incorrect'] for s in sections), []),
-        }
-        self.log_accuracy(total)
-        sections.append(total)
-        return sections
-
-    @staticmethod
-    def log_evaluate_word_pairs(pearson, spearman, oov, pairs):
-        logger.info('Pearson correlation coefficient against %s: %.4f', pairs, pearson[0])
-        logger.info('Spearman rank-order correlation coefficient against %s: %.4f', pairs, spearman[0])
-        logger.info('Pairs with unknown words ratio: %.1f%%', oov)
-
-    def evaluate_word_pairs(self, pairs, delimiter='\t', restrict_vocab=300000,
-                            case_insensitive=True, dummy4unknown=False):
-        """
-        Compute correlation of the model with human similarity judgments. `pairs` is a filename of a dataset where
-        lines are 3-tuples, each consisting of a word pair and a similarity value, separated by `delimiter`.
-        An example dataset is included in Gensim (test/test_data/wordsim353.tsv). More datasets can be found at
-        http://technion.ac.il/~ira.leviant/MultilingualVSMdata.html or https://www.cl.cam.ac.uk/~fh295/simlex.html.
-
-        The model is evaluated using Pearson correlation coefficient and Spearman rank-order correlation coefficient
-        between the similarities from the dataset and the similarities produced by the model itself.
-        The results are printed to log and returned as a triple (pearson, spearman, ratio of pairs with unknown words).
-
-        Use `restrict_vocab` to ignore all word pairs containing a word not in the first `restrict_vocab`
-        words (default 300,000). This may be meaningful if you've sorted the vocabulary by descending frequency.
-        If `case_insensitive` is True, the first `restrict_vocab` words are taken, and then case normalization
-        is performed.
-
-        Use `case_insensitive` to convert all words in the pairs and vocab to their uppercase form before
-        evaluating the model (default True). Useful when you expect case-mismatch between training tokens
-        and words pairs in the dataset. If there are multiple case variants of a single word, the vector for the first
-        occurrence (also the most frequent if vocabulary is sorted) is taken.
-
-        Use `dummy4unknown=True` to produce zero-valued similarities for pairs with out-of-vocabulary words.
-        Otherwise (default False), these pairs are skipped entirely.
-        """
-        ok_vocab = [(w, self.vocab[w]) for w in self.index2word[:restrict_vocab]]
-        ok_vocab = {w.upper(): v for w, v in reversed(ok_vocab)} if case_insensitive else dict(ok_vocab)
-
-        similarity_gold = []
-        similarity_model = []
-        oov = 0
-
-        original_vocab = self.vocab
-        self.vocab = ok_vocab
-
-        with utils.open(pairs, 'rb') as f:
-            for line_no, line in enumerate(f):
-                line = utils.to_unicode(line)
-                if line.startswith('#'):
-                    # May be a comment
-                    continue
-                else:
-                    try:
-                        if case_insensitive:
-                            a, b, sim = [word.upper() for word in line.split(delimiter)]
-                        else:
-                            a, b, sim = [word for word in line.split(delimiter)]
-                        sim = float(sim)
-                    except (ValueError, TypeError):
-                        logger.info('skipping invalid line #%d in %s', line_no, pairs)
-                        continue
-                    if a not in ok_vocab or b not in ok_vocab:
-                        oov += 1
-                        if dummy4unknown:
-                            similarity_model.append(0.0)
-                            similarity_gold.append(sim)
-                            continue
-                        else:
-                            logger.debug('skipping line #%d with OOV words: %s', line_no, line.strip())
-                            continue
-                    similarity_gold.append(sim)  # Similarity from the dataset
-                    similarity_model.append(self.similarity(a, b))  # Similarity from the model
-        self.vocab = original_vocab
-        spearman = stats.spearmanr(similarity_gold, similarity_model)
-        pearson = stats.pearsonr(similarity_gold, similarity_model)
-        oov_ratio = float(oov) / (len(similarity_gold) + oov) * 100
-
-        logger.debug('Pearson correlation coefficient against %s: %f with p-value %f', pairs, pearson[0], pearson[1])
-        logger.debug(
-            'Spearman rank-order correlation coefficient against %s: %f with p-value %f',
-            pairs, spearman[0], spearman[1]
-        )
-        logger.debug('Pairs with unknown words: %d', oov)
-        self.log_evaluate_word_pairs(pearson, spearman, oov_ratio, pairs)
-        return pearson, spearman, oov_ratio
-
-    def init_sims(self, replace=False):
-        """
-        Precompute L2-normalized vectors.
-
-        If `replace` is set, forget the original vectors and only keep the normalized
-        ones = saves lots of memory!
-
-        Note that you **cannot continue training** after doing a replace. The model becomes
-        effectively read-only = you can call `most_similar`, `similarity` etc., but not `train`.
-
-        """
-        if getattr(self, 'syn0norm', None) is None or replace:
-            logger.info("precomputing L2-norms of word weight vectors")
-            if replace:
-                for i in range(self.syn0.shape[0]):
-                    self.syn0[i, :] /= sqrt((self.syn0[i, :] ** 2).sum(-1))
-                self.syn0norm = self.syn0
-            else:
-                self.syn0norm = (self.syn0 / sqrt((self.syn0 ** 2).sum(-1))[..., newaxis]).astype(REAL)
-
-    def get_keras_embedding(self, train_embeddings=False):
-        """
-        Return a Keras 'Embedding' layer with weights set as the Word2Vec model's learned word embeddings
-        """
-        try:
-            from keras.layers import Embedding
-        except ImportError:
-            raise ImportError("Please install Keras to use this function")
-        weights = self.syn0
-
-        # set `trainable` as `False` to use the pretrained word embedding
-        # No extra mem usage here as `Embedding` layer doesn't create any new matrix for weights
-        layer = Embedding(
-            input_dim=weights.shape[0], output_dim=weights.shape[1],
-            weights=[weights], trainable=train_embeddings
-        )
-        return layer
-
-
-# For backward compatibility
-KeyedVectors = EuclideanKeyedVectors
diff --git a/gensim/models/deprecated/old_saveload.py b/gensim/models/deprecated/old_saveload.py
deleted file mode 100644
index 750d83ed44..0000000000
--- a/gensim/models/deprecated/old_saveload.py
+++ /dev/null
@@ -1,398 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (C) 2018 Radim Rehurek <me@radimrehurek.com>
-# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html
-"""
-Warnings
---------
-.. deprecated:: 3.3.0
-   Use :mod:`gensim.utils` instead.
-
-
-Class containing the old SaveLoad class with modeified `unpickle` function is support loading models saved using
-an older gensim version.
-
-"""
-from __future__ import with_statement
-
-import logging
-
-try:
-    import cPickle as _pickle
-except ImportError:
-    import pickle as _pickle
-
-import re
-import sys
-
-import numpy as np
-import scipy.sparse
-
-from six import iteritems
-
-from gensim import utils
-
-if sys.version_info[0] >= 3:
-    unicode = str
-
-logger = logging.getLogger(__name__)
-
-
-PAT_ALPHABETIC = re.compile(r'(((?![\d])\w)+)', re.UNICODE)
-RE_HTML_ENTITY = re.compile(r'&(#?)([xX]?)(\w{1,8});', re.UNICODE)
-
-
-class SaveLoad(object):
-    """Class which inherit from this class have save/load functions, which un/pickle them to disk.
-
-    Warnings
-    --------
-    This uses pickle for de/serializing, so objects must not contain unpicklable attributes,
-    such as lambda functions etc.
-
-    """
-    @classmethod
-    def load(cls, fname, mmap=None):
-        """Load a previously saved object (using :meth:`~gensim.utils.SaveLoad.save`) from file.
-
-        Parameters
-        ----------
-        fname : str
-            Path to file that contains needed object.
-        mmap : str, optional
-            Memory-map option.  If the object was saved with large arrays stored separately, you can load these arrays
-            via mmap (shared memory) using `mmap='r'.
-            If the file being loaded is compressed (either '.gz' or '.bz2'), then `mmap=None` **must be** set.
-
-        See Also
-        --------
-        :meth:`~gensim.utils.SaveLoad.save`
-
-        Returns
-        -------
-        object
-            Object loaded from `fname`.
-
-        Raises
-        ------
-        IOError
-            When methods are called on instance (should be called from class).
-
-        """
-        logger.info("loading %s object from %s", cls.__name__, fname)
-
-        compress, subname = SaveLoad._adapt_by_suffix(fname)
-
-        obj = unpickle(fname)
-        obj._load_specials(fname, mmap, compress, subname)
-        logger.info("loaded %s", fname)
-        return obj
-
-    def _load_specials(self, fname, mmap, compress, subname):
-        """Loads any attributes that were stored specially, and gives the same opportunity
-        to recursively included :class:`~gensim.utils.SaveLoad` instances.
-
-        Parameters
-        ----------
-        fname : str
-            Path to file that contains needed object.
-        mmap : str
-            Memory-map option.
-        compress : bool
-            Set to True if file is compressed.
-        subname : str
-            ...
-
-
-        """
-        def mmap_error(obj, filename):
-            return IOError(
-                'Cannot mmap compressed object %s in file %s. ' % (obj, filename)
-                + 'Use `load(fname, mmap=None)` or uncompress files manually.'
-            )
-
-        for attrib in getattr(self, '__recursive_saveloads', []):
-            cfname = '.'.join((fname, attrib))
-            logger.info("loading %s recursively from %s.* with mmap=%s", attrib, cfname, mmap)
-            getattr(self, attrib)._load_specials(cfname, mmap, compress, subname)
-
-        for attrib in getattr(self, '__numpys', []):
-            logger.info("loading %s from %s with mmap=%s", attrib, subname(fname, attrib), mmap)
-
-            if compress:
-                if mmap:
-                    raise mmap_error(attrib, subname(fname, attrib))
-
-                val = np.load(subname(fname, attrib))['val']
-            else:
-                val = np.load(subname(fname, attrib), mmap_mode=mmap)
-
-            setattr(self, attrib, val)
-
-        for attrib in getattr(self, '__scipys', []):
-            logger.info("loading %s from %s with mmap=%s", attrib, subname(fname, attrib), mmap)
-            sparse = unpickle(subname(fname, attrib))
-            if compress:
-                if mmap:
-                    raise mmap_error(attrib, subname(fname, attrib))
-
-                with np.load(subname(fname, attrib, 'sparse')) as f:
-                    sparse.data = f['data']
-                    sparse.indptr = f['indptr']
-                    sparse.indices = f['indices']
-            else:
-                sparse.data = np.load(subname(fname, attrib, 'data'), mmap_mode=mmap)
-                sparse.indptr = np.load(subname(fname, attrib, 'indptr'), mmap_mode=mmap)
-                sparse.indices = np.load(subname(fname, attrib, 'indices'), mmap_mode=mmap)
-
-            setattr(self, attrib, sparse)
-
-        for attrib in getattr(self, '__ignoreds', []):
-            logger.info("setting ignored attribute %s to None", attrib)
-            setattr(self, attrib, None)
-
-    @staticmethod
-    def _adapt_by_suffix(fname):
-        """Give appropriate compress setting and filename formula.
-
-        Parameters
-        ----------
-        fname : str
-            Input filename.
-
-        Returns
-        -------
-        (bool, function)
-            First argument will be True if `fname` compressed.
-
-        """
-        compress, suffix = (True, 'npz') if fname.endswith('.gz') or fname.endswith('.bz2') else (False, 'npy')
-        return compress, lambda *args: '.'.join(args + (suffix,))
-
-    def _smart_save(self, fname, separately=None, sep_limit=10 * 1024**2, ignore=frozenset(), pickle_protocol=2):
-        """Save the object to file.
-
-        Parameters
-        ----------
-        fname : str
-            Path to file.
-        separately : list, optional
-            Iterable of attributes than need to store distinctly.
-        sep_limit : int, optional
-            Limit for separation.
-        ignore : frozenset, optional
-            Attributes that shouldn't be store.
-        pickle_protocol : int, optional
-            Protocol number for pickle.
-
-        Notes
-        -----
-        If `separately` is None, automatically detect large
-        numpy/scipy.sparse arrays in the object being stored, and store
-        them into separate files. This avoids pickle memory errors and
-        allows mmap'ing large arrays back on load efficiently.
-
-        You can also set `separately` manually, in which case it must be
-        a list of attribute names to be stored in separate files. The
-        automatic check is not performed in this case.
-
-        See Also
-        --------
-        :meth:`~gensim.utils.SaveLoad.load`
-
-        """
-        logger.info("saving %s object under %s, separately %s", self.__class__.__name__, fname, separately)
-
-        compress, subname = SaveLoad._adapt_by_suffix(fname)
-
-        restores = self._save_specials(fname, separately, sep_limit, ignore, pickle_protocol,
-                                       compress, subname)
-        try:
-            pickle(self, fname, protocol=pickle_protocol)
-        finally:
-            # restore attribs handled specially
-            for obj, asides in restores:
-                for attrib, val in iteritems(asides):
-                    setattr(obj, attrib, val)
-        logger.info("saved %s", fname)
-
-    def _save_specials(self, fname, separately, sep_limit, ignore, pickle_protocol, compress, subname):
-        """Save aside any attributes that need to be handled separately, including
-        by recursion any attributes that are themselves :class:`~gensim.utils.SaveLoad` instances.
-
-        Parameters
-        ----------
-        fname : str
-            Output filename.
-        separately : list or None
-            Iterable of attributes than need to store distinctly
-        sep_limit : int
-            Limit for separation.
-        ignore : iterable of str
-            Attributes that shouldn't be store.
-        pickle_protocol : int
-            Protocol number for pickle.
-        compress : bool
-            If True - compress output with :func:`numpy.savez_compressed`.
-        subname : function
-            Produced by :meth:`~gensim.utils.SaveLoad._adapt_by_suffix`
-
-        Returns
-        -------
-        list of (obj, {attrib: value, ...})
-            Settings that the caller should use to restore each object's attributes that were set aside
-            during the default :func:`~gensim.utils.pickle`.
-
-        """
-        asides = {}
-        sparse_matrices = (scipy.sparse.csr_matrix, scipy.sparse.csc_matrix)
-        if separately is None:
-            separately = []
-            for attrib, val in iteritems(self.__dict__):
-                if isinstance(val, np.ndarray) and val.size >= sep_limit:
-                    separately.append(attrib)
-                elif isinstance(val, sparse_matrices) and val.nnz >= sep_limit:
-                    separately.append(attrib)
-
-        # whatever's in `separately` or `ignore` at this point won't get pickled
-        for attrib in separately + list(ignore):
-            if hasattr(self, attrib):
-                asides[attrib] = getattr(self, attrib)
-                delattr(self, attrib)
-
-        recursive_saveloads = []
-        restores = []
-        for attrib, val in iteritems(self.__dict__):
-            if hasattr(val, '_save_specials'):  # better than 'isinstance(val, SaveLoad)' if IPython reloading
-                recursive_saveloads.append(attrib)
-                cfname = '.'.join((fname, attrib))
-                restores.extend(val._save_specials(cfname, None, sep_limit, ignore, pickle_protocol, compress, subname))
-
-        try:
-            numpys, scipys, ignoreds = [], [], []
-            for attrib, val in iteritems(asides):
-                if isinstance(val, np.ndarray) and attrib not in ignore:
-                    numpys.append(attrib)
-                    logger.info("storing np array '%s' to %s", attrib, subname(fname, attrib))
-
-                    if compress:
-                        np.savez_compressed(subname(fname, attrib), val=np.ascontiguousarray(val))
-                    else:
-                        np.save(subname(fname, attrib), np.ascontiguousarray(val))
-
-                elif isinstance(val, (scipy.sparse.csr_matrix, scipy.sparse.csc_matrix)) and attrib not in ignore:
-                    scipys.append(attrib)
-                    logger.info("storing scipy.sparse array '%s' under %s", attrib, subname(fname, attrib))
-
-                    if compress:
-                        np.savez_compressed(
-                            subname(fname, attrib, 'sparse'),
-                            data=val.data,
-                            indptr=val.indptr,
-                            indices=val.indices
-                        )
-                    else:
-                        np.save(subname(fname, attrib, 'data'), val.data)
-                        np.save(subname(fname, attrib, 'indptr'), val.indptr)
-                        np.save(subname(fname, attrib, 'indices'), val.indices)
-
-                    data, indptr, indices = val.data, val.indptr, val.indices
-                    val.data, val.indptr, val.indices = None, None, None
-
-                    try:
-                        # store array-less object
-                        pickle(val, subname(fname, attrib), protocol=pickle_protocol)
-                    finally:
-                        val.data, val.indptr, val.indices = data, indptr, indices
-                else:
-                    logger.info("not storing attribute %s", attrib)
-                    ignoreds.append(attrib)
-
-            self.__dict__['__numpys'] = numpys
-            self.__dict__['__scipys'] = scipys
-            self.__dict__['__ignoreds'] = ignoreds
-            self.__dict__['__recursive_saveloads'] = recursive_saveloads
-        except Exception:
-            # restore the attributes if exception-interrupted
-            for attrib, val in iteritems(asides):
-                setattr(self, attrib, val)
-            raise
-        return restores + [(self, asides)]
-
-    def save(self, fname_or_handle, separately=None, sep_limit=10 * 1024**2, ignore=frozenset(), pickle_protocol=2):
-        """Save the object to file.
-
-        Parameters
-        ----------
-        fname_or_handle : str or file-like
-            Path to output file or already opened file-like object. If the object is a file handle,
-            no special array handling will be performed, all attributes will be saved to the same file.
-        separately : list of str or None, optional
-            If None -  automatically detect large numpy/scipy.sparse arrays in the object being stored, and store
-            them into separate files. This avoids pickle memory errors and allows mmap'ing large arrays
-            back on load efficiently.
-            If list of str - this attributes will be stored in separate files, the automatic check
-            is not performed in this case.
-        sep_limit : int
-            Limit for automatic separation.
-        ignore : frozenset of str
-            Attributes that shouldn't be serialize/store.
-        pickle_protocol : int
-            Protocol number for pickle.
-
-        See Also
-        --------
-        :meth:`~gensim.utils.SaveLoad.load`
-
-        """
-        try:
-            _pickle.dump(self, fname_or_handle, protocol=pickle_protocol)
-            logger.info("saved %s object", self.__class__.__name__)
-        except TypeError:  # `fname_or_handle` does not have write attribute
-            self._smart_save(fname_or_handle, separately, sep_limit, ignore, pickle_protocol=pickle_protocol)
-
-
-def unpickle(fname):
-    """Load object from `fname`.
-
-    Parameters
-    ----------
-    fname : str
-        Path to pickle file.
-
-    Returns
-    -------
-    object
-        Python object loaded from `fname`.
-
-    """
-    with utils.open(fname, 'rb') as f:
-        file_bytes = f.read()
-        file_bytes = file_bytes.replace(b'gensim.models.word2vec', b'gensim.models.deprecated.word2vec')
-        file_bytes = file_bytes.replace(b'gensim.models.keyedvectors', b'gensim.models.deprecated.keyedvectors')
-        file_bytes = file_bytes.replace(b'gensim.models.doc2vec', b'gensim.models.deprecated.doc2vec')
-        file_bytes = file_bytes.replace(b'gensim.models.fasttext', b'gensim.models.deprecated.fasttext')
-        file_bytes = file_bytes.replace(
-            b'gensim.models.wrappers.fasttext', b'gensim.models.deprecated.fasttext_wrapper')
-        if sys.version_info > (3, 0):
-            return _pickle.loads(file_bytes, encoding='latin1')
-        else:
-            return _pickle.loads(file_bytes)
-
-
-def pickle(obj, fname, protocol=2):
-    """Pickle object `obj` to file `fname`.
-
-    Parameters
-    ----------
-    obj : object
-        Any python object.
-    fname : str
-        Path to pickle file.
-    protocol : int, optional
-        Pickle protocol number, default is 2 to support compatible across python 2.x and 3.x.
-
-    """
-    with utils.open(fname, 'wb') as fout:  # 'b' for binary, needed on Windows
-        _pickle.dump(obj, fout, protocol=protocol)
diff --git a/gensim/models/deprecated/word2vec.py b/gensim/models/deprecated/word2vec.py
deleted file mode 100644
index d57a902c55..0000000000
--- a/gensim/models/deprecated/word2vec.py
+++ /dev/null
@@ -1,1907 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (C) 2013 Radim Rehurek <me@radimrehurek.com>
-# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html
-
-
-"""
-Warnings
---------
-.. deprecated:: 3.3.0
-   Use :mod:`gensim.models.word2vec` instead.
-
-
-Produce word vectors with deep learning via word2vec's "skip-gram and CBOW models", using either
-hierarchical softmax or negative sampling [1]_ [2]_.
-
-NOTE: There are more ways to get word vectors in Gensim than just Word2Vec.
-See wrappers for FastText, VarEmbed and WordRank.
-
-The training algorithms were originally ported from the C package https://code.google.com/p/word2vec/
-and extended with additional functionality.
-
-For a blog tutorial on gensim word2vec, with an interactive web app trained on GoogleNews,
-visit http://radimrehurek.com/2014/02/word2vec-tutorial/
-
-**Make sure you have a C compiler before installing gensim, to use optimized (compiled) word2vec training**
-(70x speedup compared to plain NumPy implementation [3]_).
-
-Initialize a model with e.g.:
-
-.. sourcecode:: pycon
-
-    >>> model = Word2Vec(sentences, size=100, window=5, min_count=5, workers=4)
-
-Persist a model to disk with:
-
-.. sourcecode:: pycon
-
-    >>> model.save(fname)
-    >>> model = Word2Vec.load(fname)  # you can continue training with the loaded model!
-
-The word vectors are stored in a KeyedVectors instance in model.wv.
-This separates the read-only word vector lookup operations in KeyedVectors from the training code in Word2Vec:
-
-.. sourcecode:: pycon
-
-    >>> model.wv['computer']  # numpy vector of a word
-    array([-0.00449447, -0.00310097,  0.02421786, ...], dtype=float32)
-
-The word vectors can also be instantiated from an existing file on disk in the word2vec C format
-as a KeyedVectors instance::
-
-    NOTE: It is impossible to continue training the vectors loaded from the C format because hidden weights,
-    vocabulary frequency and the binary tree is missing:
-
-    .. sourcecode:: pycon
-
-        >>> from gensim.models.keyedvectors import KeyedVectors
-        >>> word_vectors = KeyedVectors.load_word2vec_format('/tmp/vectors.txt', binary=False)  # C text format
-        >>> word_vectors = KeyedVectors.load_word2vec_format('/tmp/vectors.bin', binary=True)  # C binary format
-
-
-You can perform various NLP word tasks with the model. Some of them
-are already built-in:
-
-.. sourcecode:: pycon
-
-    >>> model.wv.most_similar(positive=['woman', 'king'], negative=['man'])
-    [('queen', 0.50882536), ...]
-
-    >>> model.wv.most_similar_cosmul(positive=['woman', 'king'], negative=['man'])
-    [('queen', 0.71382287), ...]
-
-    >>> model.wv.doesnt_match("breakfast cereal dinner lunch".split())
-    'cereal'
-
-    >>> model.wv.similarity('woman', 'man')
-    0.73723527
-
-Probability of a text under the model:
-
-.. sourcecode:: pycon
-
-    >>> model.score(["The fox jumped over a lazy dog".split()])
-    0.2158356
-
-Correlation with human opinion on word similarity:
-
-.. sourcecode:: pycon
-
-    >>> model.wv.evaluate_word_pairs(os.path.join(module_path, 'test_data','wordsim353.tsv'))
-    0.51, 0.62, 0.13
-
-And on analogies:
-
-.. sourcecode:: pycon
-
-    >>> model.wv.accuracy(os.path.join(module_path, 'test_data', 'questions-words.txt'))
-
-and so on.
-
-If you're finished training a model (i.e. no more updates, only querying),
-then switch to the :mod:`gensim.models.KeyedVectors` instance in wv
-
-.. sourcecode:: pycon
-
-    >>> word_vectors = model.wv
-    >>> del model
-
-to trim unneeded model memory = use much less RAM.
-
-Note that there is a :mod:`gensim.models.phrases` module which lets you automatically
-detect phrases longer than one word. Using phrases, you can learn a word2vec model
-where "words" are actually multiword expressions, such as `new_york_times` or `financial_crisis`:
-
-.. sourcecode:: pycon
-
-    >>> bigram_transformer = gensim.models.Phrases(sentences)
-    >>> model = Word2Vec(bigram_transformer[sentences], size=100, ...)
-
-.. [1] Tomas Mikolov, Kai Chen, Greg Corrado, and Jeffrey Dean.
-       Efficient Estimation of Word Representations in Vector Space. In Proceedings of Workshop at ICLR, 2013.
-.. [2] Tomas Mikolov, Ilya Sutskever, Kai Chen, Greg Corrado, and Jeffrey Dean.
-       Distributed Representations of Words and Phrases and their Compositionality. In Proceedings of NIPS, 2013.
-.. [3] Optimizing word2vec in gensim, http://radimrehurek.com/2013/09/word2vec-in-python-part-two-optimizing/
-"""
-from __future__ import division  # py3 "true division"
-
-import logging
-import sys
-import os
-import heapq
-from timeit import default_timer
-from copy import deepcopy
-from collections import defaultdict
-import threading
-import itertools
-import warnings
-
-from gensim.utils import keep_vocab_item, call_on_class_only
-from gensim.models.deprecated.keyedvectors import KeyedVectors, Vocab
-from gensim.models.word2vec import Word2Vec as NewWord2Vec
-from gensim.models.deprecated.old_saveload import SaveLoad
-
-try:
-    from queue import Queue, Empty
-except ImportError:
-    from Queue import Queue, Empty
-
-from numpy import exp, log, dot, zeros, outer, random, dtype, float32 as REAL,\
-    uint32, seterr, array, uint8, vstack, fromstring, sqrt,\
-    empty, sum as np_sum, ones, logaddexp
-
-from scipy.special import expit
-
-from gensim import utils
-from gensim import matutils  # utility fnc for pickling, common scipy operations etc
-from six import iteritems, itervalues, string_types
-from six.moves import range
-from types import GeneratorType
-
-logger = logging.getLogger(__name__)
-
-MAX_WORDS_IN_BATCH = 10000
-
-
-def load_old_word2vec(*args, **kwargs):
-    old_model = Word2Vec.load(*args, **kwargs)
-    vector_size = getattr(old_model, 'vector_size', old_model.layer1_size)
-    params = {
-        'size': vector_size,
-        'alpha': old_model.alpha,
-        'window': old_model.window,
-        'min_count': old_model.min_count,
-        'max_vocab_size': old_model.__dict__.get('max_vocab_size', None),
-        'sample': old_model.__dict__.get('sample', 1e-3),
-        'seed': old_model.seed,
-        'workers': old_model.workers,
-        'min_alpha': old_model.min_alpha,
-        'sg': old_model.sg,
-        'hs': old_model.hs,
-        'negative': old_model.negative,
-        'cbow_mean': old_model.cbow_mean,
-        'hashfxn': old_model.__dict__.get('hashfxn', hash),
-        'iter': old_model.__dict__.get('iter', 5),
-        'null_word': old_model.__dict__.get('null_word', 0),
-        'sorted_vocab': old_model.__dict__.get('sorted_vocab', 1),
-        'batch_words': old_model.__dict__.get('batch_words', MAX_WORDS_IN_BATCH),
-        'compute_loss': old_model.__dict__.get('compute_loss', None)
-    }
-    new_model = NewWord2Vec(**params)
-    # set trainables attributes
-    new_model.wv.vectors = old_model.wv.syn0
-    if hasattr(old_model.wv, 'syn0norm'):
-        new_model.wv.vectors_norm = old_model.wv.syn0norm
-    if hasattr(old_model, 'syn1'):
-        new_model.trainables.syn1 = old_model.syn1
-    if hasattr(old_model, 'syn1neg'):
-        new_model.trainables.syn1neg = old_model.syn1neg
-    if hasattr(old_model, 'syn0_lockf'):
-        new_model.trainables.vectors_lockf = old_model.syn0_lockf
-    # set vocabulary attributes
-    new_model.wv.vocab = old_model.wv.vocab
-    new_model.wv.index2word = old_model.wv.index2word
-    new_model.vocabulary.cum_table = old_model.__dict__.get('cum_table', None)
-
-    new_model.train_count = old_model.__dict__.get('train_count', None)
-    new_model.corpus_count = old_model.__dict__.get('corpus_count', None)
-    new_model.corpus_total_words = old_model.__dict__.get('corpus_total_words', None)
-    new_model.running_training_loss = old_model.__dict__.get('running_training_loss', 0)
-    new_model.total_train_time = old_model.__dict__.get('total_train_time', None)
-    new_model.min_alpha_yet_reached = old_model.__dict__.get('min_alpha_yet_reached', old_model.alpha)
-    new_model.model_trimmed_post_training = old_model.__dict__.get('model_trimmed_post_training', None)
-
-    return new_model
-
-
-def train_batch_sg(model, sentences, alpha, work=None, compute_loss=False):
-    """
-    Update skip-gram model by training on a sequence of sentences.
-
-    Each sentence is a list of string tokens, which are looked up in the model's
-    vocab dictionary. Called internally from `Word2Vec.train()`.
-
-    This is the non-optimized, Python version. If you have cython installed, gensim
-    will use the optimized version from word2vec_inner instead.
-
-    """
-    result = 0
-    for sentence in sentences:
-        word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab
-                       and model.wv.vocab[w].sample_int > model.random.rand() * 2**32]
-        for pos, word in enumerate(word_vocabs):
-            reduced_window = model.random.randint(model.window)  # `b` in the original word2vec code
-
-            # now go over all words from the (reduced) window, predicting each one in turn
-            start = max(0, pos - model.window + reduced_window)
-            for pos2, word2 in enumerate(word_vocabs[start:(pos + model.window + 1 - reduced_window)], start):
-                # don't train on the `word` itself
-                if pos2 != pos:
-                    train_sg_pair(
-                        model, model.wv.index2word[word.index], word2.index, alpha, compute_loss=compute_loss
-                    )
-
-        result += len(word_vocabs)
-    return result
-
-
-def train_batch_cbow(model, sentences, alpha, work=None, neu1=None, compute_loss=False):
-    """
-    Update CBOW model by training on a sequence of sentences.
-
-    Each sentence is a list of string tokens, which are looked up in the model's
-    vocab dictionary. Called internally from `Word2Vec.train()`.
-
-    This is the non-optimized, Python version. If you have cython installed, gensim
-    will use the optimized version from word2vec_inner instead.
-
-    """
-    result = 0
-    for sentence in sentences:
-        word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab
-                       and model.wv.vocab[w].sample_int > model.random.rand() * 2**32]
-        for pos, word in enumerate(word_vocabs):
-            reduced_window = model.random.randint(model.window)  # `b` in the original word2vec code
-            start = max(0, pos - model.window + reduced_window)
-            window_pos = enumerate(word_vocabs[start:(pos + model.window + 1 - reduced_window)], start)
-            word2_indices = [word2.index for pos2, word2 in window_pos if (word2 is not None and pos2 != pos)]
-            l1 = np_sum(model.wv.syn0[word2_indices], axis=0)  # 1 x vector_size
-            if word2_indices and model.cbow_mean:
-                l1 /= len(word2_indices)
-            train_cbow_pair(model, word, word2_indices, l1, alpha, compute_loss=compute_loss)
-        result += len(word_vocabs)
-    return result
-
-
-def score_sentence_sg(model, sentence, work=None):
-    """
-    Obtain likelihood score for a single sentence in a fitted skip-gram representaion.
-
-    The sentence is a list of Vocab objects (or None, when the corresponding
-    word is not in the vocabulary). Called internally from `Word2Vec.score()`.
-
-    This is the non-optimized, Python version. If you have cython installed, gensim
-    will use the optimized version from word2vec_inner instead.
-
-    """
-    log_prob_sentence = 0.0
-    if model.negative:
-        raise RuntimeError("scoring is only available for HS=True")
-
-    word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab]
-    for pos, word in enumerate(word_vocabs):
-        if word is None:
-            continue  # OOV word in the input sentence => skip
-
-        # now go over all words from the window, predicting each one in turn
-        start = max(0, pos - model.window)
-        for pos2, word2 in enumerate(word_vocabs[start: pos + model.window + 1], start):
-            # don't train on OOV words and on the `word` itself
-            if word2 is not None and pos2 != pos:
-                log_prob_sentence += score_sg_pair(model, word, word2)
-
-    return log_prob_sentence
-
-
-def score_sentence_cbow(model, sentence, work=None, neu1=None):
-    """
-    Obtain likelihood score for a single sentence in a fitted CBOW representaion.
-
-    The sentence is a list of Vocab objects (or None, where the corresponding
-    word is not in the vocabulary. Called internally from `Word2Vec.score()`.
-
-    This is the non-optimized, Python version. If you have cython installed, gensim
-    will use the optimized version from word2vec_inner instead.
-
-    """
-    log_prob_sentence = 0.0
-    if model.negative:
-        raise RuntimeError("scoring is only available for HS=True")
-
-    word_vocabs = [model.wv.vocab[w] for w in sentence if w in model.wv.vocab]
-    for pos, word in enumerate(word_vocabs):
-        if word is None:
-            continue  # OOV word in the input sentence => skip
-
-        start = max(0, pos - model.window)
-        window_pos = enumerate(word_vocabs[start:(pos + model.window + 1)], start)
-        word2_indices = [word2.index for pos2, word2 in window_pos if (word2 is not None and pos2 != pos)]
-        l1 = np_sum(model.wv.syn0[word2_indices], axis=0)  # 1 x layer1_size
-        if word2_indices and model.cbow_mean:
-            l1 /= len(word2_indices)
-        log_prob_sentence += score_cbow_pair(model, word, l1)
-
-    return log_prob_sentence
-
-
-def train_sg_pair(model, word, context_index, alpha, learn_vectors=True, learn_hidden=True,
-                  context_vectors=None, context_locks=None, compute_loss=False, is_ft=False):
-    if context_vectors is None:
-        if is_ft:
-            context_vectors_vocab = model.wv.syn0_vocab
-            context_vectors_ngrams = model.wv.syn0_ngrams
-        else:
-            context_vectors = model.wv.syn0
-    if context_locks is None:
-        if is_ft:
-            context_locks_vocab = model.syn0_vocab_lockf
-            context_locks_ngrams = model.syn0_ngrams_lockf
-        else:
-            context_locks = model.syn0_lockf
-
-    if word not in model.wv.vocab:
-        return
-    predict_word = model.wv.vocab[word]  # target word (NN output)
-
-    if is_ft:
-        l1_vocab = context_vectors_vocab[context_index[0]]
-        l1_ngrams = np_sum(context_vectors_ngrams[context_index[1:]], axis=0)
-        if context_index:
-            l1 = np_sum([l1_vocab, l1_ngrams], axis=0) / len(context_index)
-    else:
-        l1 = context_vectors[context_index]  # input word (NN input/projection layer)
-        lock_factor = context_locks[context_index]
-
-    neu1e = zeros(l1.shape)
-
-    if model.hs:
-        # work on the entire tree at once, to push as much work into numpy's C routines as possible (performance)
-        l2a = deepcopy(model.syn1[predict_word.point])  # 2d matrix, codelen x layer1_size
-        prod_term = dot(l1, l2a.T)
-        fa = expit(prod_term)  # propagate hidden -> output
-        ga = (1 - predict_word.code - fa) * alpha  # vector of error gradients multiplied by the learning rate
-        if learn_hidden:
-            model.syn1[predict_word.point] += outer(ga, l1)  # learn hidden -> output
-        neu1e += dot(ga, l2a)  # save error
-
-        # loss component corresponding to hierarchical softmax
-        if compute_loss:
-            sgn = (-1.0)**predict_word.code  # `ch` function, 0 -> 1, 1 -> -1
-            lprob = -log(expit(-sgn * prod_term))
-            model.running_training_loss += sum(lprob)
-
-    if model.negative:
-        # use this word (label = 1) + `negative` other random words not from this sentence (label = 0)
-        word_indices = [predict_word.index]
-        while len(word_indices) < model.negative + 1:
-            w = model.cum_table.searchsorted(model.random.randint(model.cum_table[-1]))
-            if w != predict_word.index:
-                word_indices.append(w)
-        l2b = model.syn1neg[word_indices]  # 2d matrix, k+1 x layer1_size
-        prod_term = dot(l1, l2b.T)
-        fb = expit(prod_term)  # propagate hidden -> output
-        gb = (model.neg_labels - fb) * alpha  # vector of error gradients multiplied by the learning rate
-        if learn_hidden:
-            model.syn1neg[word_indices] += outer(gb, l1)  # learn hidden -> output
-        neu1e += dot(gb, l2b)  # save error
-
-        # loss component corresponding to negative sampling
-        if compute_loss:
-            model.running_training_loss -= sum(log(expit(-1 * prod_term[1:])))  # for the sampled words
-            model.running_training_loss -= log(expit(prod_term[0]))  # for the output word
-
-    if learn_vectors:
-        if is_ft:
-            model.wv.syn0_vocab[context_index[0]] += neu1e * context_locks_vocab[context_index[0]]
-            for i in context_index[1:]:
-                model.wv.syn0_ngrams[i] += neu1e * context_locks_ngrams[i]
-        else:
-            l1 += neu1e * lock_factor  # learn input -> hidden (mutates model.wv.syn0[word2.index], if that is l1)
-    return neu1e
-
-
-def train_cbow_pair(model, word, input_word_indices, l1, alpha, learn_vectors=True, learn_hidden=True,
-                    compute_loss=False, context_vectors=None, context_locks=None, is_ft=False):
-    if context_vectors is None:
-        if is_ft:
-            context_vectors_vocab = model.wv.syn0_vocab
-            context_vectors_ngrams = model.wv.syn0_ngrams
-        else:
-            context_vectors = model.wv.syn0
-    if context_locks is None:
-        if is_ft:
-            context_locks_vocab = model.syn0_vocab_lockf
-            context_locks_ngrams = model.syn0_ngrams_lockf
-        else:
-            context_locks = model.syn0_lockf
-
-    neu1e = zeros(l1.shape)
-
-    if model.hs:
-        l2a = model.syn1[word.point]  # 2d matrix, codelen x layer1_size
-        prod_term = dot(l1, l2a.T)
-        fa = expit(prod_term)  # propagate hidden -> output
-        ga = (1. - word.code - fa) * alpha  # vector of error gradients multiplied by the learning rate
-        if learn_hidden:
-            model.syn1[word.point] += outer(ga, l1)  # learn hidden -> output
-        neu1e += dot(ga, l2a)  # save error
-
-        # loss component corresponding to hierarchical softmax
-        if compute_loss:
-            sgn = (-1.0)**word.code  # ch function, 0-> 1, 1 -> -1
-            model.running_training_loss += sum(-log(expit(-sgn * prod_term)))
-
-    if model.negative:
-        # use this word (label = 1) + `negative` other random words not from this sentence (label = 0)
-        word_indices = [word.index]
-        while len(word_indices) < model.negative + 1:
-            w = model.cum_table.searchsorted(model.random.randint(model.cum_table[-1]))
-            if w != word.index:
-                word_indices.append(w)
-        l2b = model.syn1neg[word_indices]  # 2d matrix, k+1 x layer1_size
-        prod_term = dot(l1, l2b.T)
-        fb = expit(prod_term)  # propagate hidden -> output
-        gb = (model.neg_labels - fb) * alpha  # vector of error gradients multiplied by the learning rate
-        if learn_hidden:
-            model.syn1neg[word_indices] += outer(gb, l1)  # learn hidden -> output
-        neu1e += dot(gb, l2b)  # save error
-
-        # loss component corresponding to negative sampling
-        if compute_loss:
-            model.running_training_loss -= sum(log(expit(-1 * prod_term[1:])))  # for the sampled words
-            model.running_training_loss -= log(expit(prod_term[0]))  # for the output word
-
-    if learn_vectors:
-        # learn input -> hidden, here for all words in the window separately
-        if is_ft:
-            if not model.cbow_mean and input_word_indices:
-                neu1e /= (len(input_word_indices[0]) + len(input_word_indices[1]))
-            for i in input_word_indices[0]:
-                context_vectors_vocab[i] += neu1e * context_locks_vocab[i]
-            for i in input_word_indices[1]:
-                context_vectors_ngrams[i] += neu1e * context_locks_ngrams[i]
-        else:
-            if not model.cbow_mean and input_word_indices:
-                neu1e /= len(input_word_indices)
-            for i in input_word_indices:
-                context_vectors[i] += neu1e * context_locks[i]
-
-    return neu1e
-
-
-def score_sg_pair(model, word, word2):
-    l1 = model.wv.syn0[word2.index]
-    l2a = deepcopy(model.syn1[word.point])  # 2d matrix, codelen x layer1_size
-    sgn = (-1.0)**word.code  # ch function, 0-> 1, 1 -> -1
-    lprob = -logaddexp(0, -sgn * dot(l1, l2a.T))
-    return sum(lprob)
-
-
-def score_cbow_pair(model, word, l1):
-    l2a = model.syn1[word.point]  # 2d matrix, codelen x layer1_size
-    sgn = (-1.0)**word.code  # ch function, 0-> 1, 1 -> -1
-    lprob = -logaddexp(0, -sgn * dot(l1, l2a.T))
-    return sum(lprob)
-
-
-class Word2Vec(SaveLoad):
-    """
-    Class for training, using and evaluating neural networks described in https://code.google.com/p/word2vec/
-
-    If you're finished training a model (=no more updates, only querying)
-    then switch to the :mod:`gensim.models.KeyedVectors` instance in wv
-
-    The model can be stored/loaded via its `save()` and `load()` methods, or stored/loaded in a format
-    compatible with the original word2vec implementation via `wv.save_word2vec_format()`
-    and `KeyedVectors.load_word2vec_format()`.
-
-    """
-
-    def __init__(self, sentences=None, size=100, alpha=0.025, window=5, min_count=5,
-                 max_vocab_size=None, sample=1e-3, seed=1, workers=3, min_alpha=0.0001,
-                 sg=0, hs=0, negative=5, cbow_mean=1, hashfxn=hash, iter=5, null_word=0,
-                 trim_rule=None, sorted_vocab=1, batch_words=MAX_WORDS_IN_BATCH, compute_loss=False):
-        """
-        Initialize the model from an iterable of `sentences`. Each sentence is a
-        list of words (unicode strings) that will be used for training.
-
-        The `sentences` iterable can be simply a list, but for larger corpora,
-        consider an iterable that streams the sentences directly from disk/network.
-        See :class:`BrownCorpus`, :class:`Text8Corpus` or :class:`LineSentence` in
-        this module for such examples.
-
-        If you don't supply `sentences`, the model is left uninitialized -- use if
-        you plan to initialize it in some other way.
-
-        `sg` defines the training algorithm. By default (`sg=0`), CBOW is used.
-        Otherwise (`sg=1`), skip-gram is employed.
-
-        `size` is the dimensionality of the feature vectors.
-
-        `window` is the maximum distance between the current and predicted word within a sentence.
-
-        `alpha` is the initial learning rate (will linearly drop to `min_alpha` as training progresses).
-
-        `seed` = for the random number generator. Initial vectors for each
-        word are seeded with a hash of the concatenation of word + str(seed).
-        Note that for a fully deterministically-reproducible run, you must also limit the model to
-        a single worker thread, to eliminate ordering jitter from OS thread scheduling. (In Python
-        3, reproducibility between interpreter launches also requires use of the PYTHONHASHSEED
-        environment variable to control hash randomization.)
-
-        `min_count` = ignore all words with total frequency lower than this.
-
-        `max_vocab_size` = limit RAM during vocabulary building; if there are more unique
-        words than this, then prune the infrequent ones. Every 10 million word types
-        need about 1GB of RAM. Set to `None` for no limit (default).
-
-        `sample` = threshold for configuring which higher-frequency words are randomly downsampled;
-            default is 1e-3, useful range is (0, 1e-5).
-
-        `workers` = use this many worker threads to train the model (=faster training with multicore machines).
-
-        `hs` = if 1, hierarchical softmax will be used for model training.
-        If set to 0 (default), and `negative` is non-zero, negative sampling will be used.
-
-        `negative` = if > 0, negative sampling will be used, the int for negative
-        specifies how many "noise words" should be drawn (usually between 5-20).
-        Default is 5. If set to 0, no negative samping is used.
-
-        `cbow_mean` = if 0, use the sum of the context word vectors. If 1 (default), use the mean.
-        Only applies when cbow is used.
-
-        `hashfxn` = hash function to use to randomly initialize weights, for increased
-        training reproducibility. Default is Python's rudimentary built in hash function.
-
-        `iter` = number of iterations (epochs) over the corpus. Default is 5.
-
-        `trim_rule` = vocabulary trimming rule, specifies whether certain words should remain
-        in the vocabulary, be trimmed away, or handled using the default (discard if word count < min_count).
-        Can be None (min_count will be used), or a callable that accepts parameters (word, count, min_count) and
-        returns either `utils.RULE_DISCARD`, `utils.RULE_KEEP` or `utils.RULE_DEFAULT`.
-        Note: The rule, if given, is only used to prune vocabulary during build_vocab() and is not stored as part
-        of the model.
-
-        `sorted_vocab` = if 1 (default), sort the vocabulary by descending frequency before
-        assigning word indexes.
-
-        `batch_words` = target size (in words) for batches of examples passed to worker threads (and
-        thus cython routines). Default is 10000. (Larger batches will be passed if individual
-        texts are longer than 10000 words, but the standard cython code truncates to that maximum.)
-
-        """
-
-        self.load = call_on_class_only
-
-        self.initialize_word_vectors()
-        self.sg = int(sg)
-        self.cum_table = None  # for negative sampling
-        self.vector_size = int(size)
-        self.layer1_size = int(size)
-        if size % 4 != 0:
-            logger.warning("consider setting layer size to a multiple of 4 for greater performance")
-        self.alpha = float(alpha)
-        self.min_alpha_yet_reached = float(alpha)  # To warn user if alpha increases
-        self.window = int(window)
-        self.max_vocab_size = max_vocab_size
-        self.seed = seed
-        self.random = random.RandomState(seed)
-        self.min_count = min_count
-        self.sample = sample
-        self.workers = int(workers)
-        self.min_alpha = float(min_alpha)
-        self.hs = hs
-        self.negative = negative
-        self.cbow_mean = int(cbow_mean)
-        self.hashfxn = hashfxn
-        self.iter = iter
-        self.null_word = null_word
-        self.train_count = 0
-        self.total_train_time = 0
-        self.sorted_vocab = sorted_vocab
-        self.batch_words = batch_words
-        self.model_trimmed_post_training = False
-        self.compute_loss = compute_loss
-        self.running_training_loss = 0
-        if sentences is not None:
-            if isinstance(sentences, GeneratorType):
-                raise TypeError("You can't pass a generator as the sentences argument. Try an iterator.")
-            self.build_vocab(sentences, trim_rule=trim_rule)
-            self.train(
-                sentences, total_examples=self.corpus_count, epochs=self.iter,
-                start_alpha=self.alpha, end_alpha=self.min_alpha
-            )
-        else:
-            if trim_rule is not None:
-                logger.warning(
-                    "The rule, if given, is only used to prune vocabulary during build_vocab() "
-                    "and is not stored as part of the model. Model initialized without sentences. "
-                    "trim_rule provided, if any, will be ignored."
-                )
-
-    def initialize_word_vectors(self):
-        self.wv = KeyedVectors()
-
-    def make_cum_table(self, power=0.75, domain=2**31 - 1):
-        """
-        Create a cumulative-distribution table using stored vocabulary word counts for
-        drawing random words in the negative-sampling training routines.
-
-        To draw a word index, choose a random integer up to the maximum value in the
-        table (cum_table[-1]), then finding that integer's sorted insertion point
-        (as if by bisect_left or ndarray.searchsorted()). That insertion point is the
-        drawn index, coming up in proportion equal to the increment at that slot.
-
-        Called internally from 'build_vocab()'.
-        """
-        vocab_size = len(self.wv.index2word)
-        self.cum_table = zeros(vocab_size, dtype=uint32)
-        # compute sum of all power (Z in paper)
-        train_words_pow = 0.0
-        for word_index in range(vocab_size):
-            train_words_pow += self.wv.vocab[self.wv.index2word[word_index]].count**power
-        cumulative = 0.0
-        for word_index in range(vocab_size):
-            cumulative += self.wv.vocab[self.wv.index2word[word_index]].count**power
-            self.cum_table[word_index] = round(cumulative / train_words_pow * domain)
-        if len(self.cum_table) > 0:
-            assert self.cum_table[-1] == domain
-
-    def create_binary_tree(self):
-        """
-        Create a binary Huffman tree using stored vocabulary word counts. Frequent words
-        will have shorter binary codes. Called internally from `build_vocab()`.
-
-        """
-        logger.info("constructing a huffman tree from %i words", len(self.wv.vocab))
-
-        # build the huffman tree
-        heap = list(itervalues(self.wv.vocab))
-        heapq.heapify(heap)
-        for i in range(len(self.wv.vocab) - 1):
-            min1, min2 = heapq.heappop(heap), heapq.heappop(heap)
-            heapq.heappush(
-                heap, Vocab(count=min1.count + min2.count, index=i + len(self.wv.vocab), left=min1, right=min2)
-            )
-
-        # recurse over the tree, assigning a binary code to each vocabulary word
-        if heap:
-            max_depth, stack = 0, [(heap[0], [], [])]
-            while stack:
-                node, codes, points = stack.pop()
-                if node.index < len(self.wv.vocab):
-                    # leaf node => store its path from the root
-                    node.code, node.point = codes, points
-                    max_depth = max(len(codes), max_depth)
-                else:
-                    # inner node => continue recursion
-                    points = array(list(points) + [node.index - len(self.wv.vocab)], dtype=uint32)
-                    stack.append((node.left, array(list(codes) + [0], dtype=uint8), points))
-                    stack.append((node.right, array(list(codes) + [1], dtype=uint8), points))
-
-            logger.info("built huffman tree with maximum node depth %i", max_depth)
-
-    def build_vocab(self, sentences, keep_raw_vocab=False, trim_rule=None, progress_per=10000, update=False):
-        """
-        Build vocabulary from a sequence of sentences (can be a once-only generator stream).
-        Each sentence must be a list of unicode strings.
-        """
-        self.scan_vocab(sentences, progress_per=progress_per, trim_rule=trim_rule)  # initial survey
-        # trim by min_count & precalculate downsampling
-        self.scale_vocab(keep_raw_vocab=keep_raw_vocab, trim_rule=trim_rule, update=update)
-        self.finalize_vocab(update=update)  # build tables & arrays
-
-    def build_vocab_from_freq(self, word_freq, keep_raw_vocab=False, corpus_count=None, trim_rule=None, update=False):
-        """
-        Build vocabulary from a dictionary of word frequencies.
-        Build model vocabulary from a passed dictionary that contains (word,word count).
-        Words must be of type unicode strings.
-
-        Parameters
-        ----------
-        `word_freq` : dict
-            Word,Word_Count dictionary.
-        `keep_raw_vocab` : bool
-            If not true, delete the raw vocabulary after the scaling is done and free up RAM.
-        `corpus_count`: int
-            Even if no corpus is provided, this argument can set corpus_count explicitly.
-        `trim_rule` = vocabulary trimming rule, specifies whether certain words should remain
-        in the vocabulary, be trimmed away, or handled using the default (discard if word count < min_count).
-        Can be None (min_count will be used), or a callable that accepts parameters (word, count, min_count) and
-        returns either `utils.RULE_DISCARD`, `utils.RULE_KEEP` or `utils.RULE_DEFAULT`.
-        `update`: bool
-            If true, the new provided words in `word_freq` dict will be added to model's vocab.
-
-        Returns
-        --------
-        None
-
-        Examples
-        --------
-
-        .. sourcecode:: pycon
-
-            >>> from gensim.models.word2vec import Word2Vec
-            >>> model = Word2Vec()
-            >>> model.build_vocab_from_freq({"Word1": 15, "Word2": 20})
-
-        """
-        logger.info("Processing provided word frequencies")
-        # Instead of scanning text, this will assign provided word frequencies dictionary(word_freq)
-        # to be directly the raw vocab
-        raw_vocab = word_freq
-        logger.info(
-            "collected %i different raw word, with total frequency of %i",
-            len(raw_vocab), sum(itervalues(raw_vocab))
-        )
-
-        # Since no sentences are provided, this is to control the corpus_count
-        self.corpus_count = corpus_count if corpus_count else 0
-        self.raw_vocab = raw_vocab
-
-        # trim by min_count & precalculate downsampling
-        self.scale_vocab(keep_raw_vocab=keep_raw_vocab, trim_rule=trim_rule, update=update)
-        self.finalize_vocab(update=update)  # build tables & arrays
-
-    def scan_vocab(self, sentences, progress_per=10000, trim_rule=None):
-        """Do an initial scan of all words appearing in sentences."""
-        logger.info("collecting all words and their counts")
-        sentence_no = -1
-        total_words = 0
-        min_reduce = 1
-        vocab = defaultdict(int)
-        checked_string_types = 0
-        for sentence_no, sentence in enumerate(sentences):
-            if not checked_string_types:
-                if isinstance(sentence, string_types):
-                    logger.warning(
-                        "Each 'sentences' item should be a list of words (usually unicode strings). "
-                        "First item here is instead plain %s.",
-                        type(sentence)
-                    )
-                checked_string_types += 1
-            if sentence_no % progress_per == 0:
-                logger.info(
-                    "PROGRESS: at sentence #%i, processed %i words, keeping %i word types",
-                    sentence_no, total_words, len(vocab)
-                )
-            for word in sentence:
-                vocab[word] += 1
-            total_words += len(sentence)
-
-            if self.max_vocab_size and len(vocab) > self.max_vocab_size:
-                utils.prune_vocab(vocab, min_reduce, trim_rule=trim_rule)
-                min_reduce += 1
-
-        logger.info(
-            "collected %i word types from a corpus of %i raw words and %i sentences",
-            len(vocab), total_words, sentence_no + 1
-        )
-        self.corpus_count = sentence_no + 1
-        self.raw_vocab = vocab
-        return total_words
-
-    def scale_vocab(self, min_count=None, sample=None, dry_run=False,
-                    keep_raw_vocab=False, trim_rule=None, update=False):
-        """
-        Apply vocabulary settings for `min_count` (discarding less-frequent words)
-        and `sample` (controlling the downsampling of more-frequent words).
-
-        Calling with `dry_run=True` will only simulate the provided settings and
-        report the size of the retained vocabulary, effective corpus length, and
-        estimated memory requirements. Results are both printed via logging and
-        returned as a dict.
-
-        Delete the raw vocabulary after the scaling is done to free up RAM,
-        unless `keep_raw_vocab` is set.
-
-        """
-        min_count = min_count or self.min_count
-        sample = sample or self.sample
-        drop_total = drop_unique = 0
-
-        if not update:
-            logger.info("Loading a fresh vocabulary")
-            retain_total, retain_words = 0, []
-            # Discard words less-frequent than min_count
-            if not dry_run:
-                self.wv.index2word = []
-                # make stored settings match these applied settings
-                self.min_count = min_count
-                self.sample = sample
-                self.wv.vocab = {}
-
-            for word, v in iteritems(self.raw_vocab):
-                if keep_vocab_item(word, v, min_count, trim_rule=trim_rule):
-                    retain_words.append(word)
-                    retain_total += v
-                    if not dry_run:
-                        self.wv.vocab[word] = Vocab(count=v, index=len(self.wv.index2word))
-                        self.wv.index2word.append(word)
-                else:
-                    drop_unique += 1
-                    drop_total += v
-            original_unique_total = len(retain_words) + drop_unique
-            retain_unique_pct = len(retain_words) * 100 / max(original_unique_total, 1)
-            logger.info(
-                "min_count=%d retains %i unique words (%i%% of original %i, drops %i)",
-                min_count, len(retain_words), retain_unique_pct, original_unique_total, drop_unique
-            )
-            original_total = retain_total + drop_total
-            retain_pct = retain_total * 100 / max(original_total, 1)
-            logger.info(
-                "min_count=%d leaves %i word corpus (%i%% of original %i, drops %i)",
-                min_count, retain_total, retain_pct, original_total, drop_total
-            )
-        else:
-            logger.info("Updating model with new vocabulary")
-            new_total = pre_exist_total = 0
-            new_words = pre_exist_words = []
-            for word, v in iteritems(self.raw_vocab):
-                if keep_vocab_item(word, v, min_count, trim_rule=trim_rule):
-                    if word in self.wv.vocab:
-                        pre_exist_words.append(word)
-                        pre_exist_total += v
-                        if not dry_run:
-                            self.wv.vocab[word].count += v
-                    else:
-                        new_words.append(word)
-                        new_total += v
-                        if not dry_run:
-                            self.wv.vocab[word] = Vocab(count=v, index=len(self.wv.index2word))
-                            self.wv.index2word.append(word)
-                else:
-                    drop_unique += 1
-                    drop_total += v
-            original_unique_total = len(pre_exist_words) + len(new_words) + drop_unique
-            pre_exist_unique_pct = len(pre_exist_words) * 100 / max(original_unique_total, 1)
-            new_unique_pct = len(new_words) * 100 / max(original_unique_total, 1)
-            logger.info(
-                "New added %i unique words (%i%% of original %i) "
-                "and increased the count of %i pre-existing words (%i%% of original %i)",
-                len(new_words), new_unique_pct, original_unique_total, len(pre_exist_words),
-                pre_exist_unique_pct, original_unique_total
-            )
-            retain_words = new_words + pre_exist_words
-            retain_total = new_total + pre_exist_total
-
-        # Precalculate each vocabulary item's threshold for sampling
-        if not sample:
-            # no words downsampled
-            threshold_count = retain_total
-        elif sample < 1.0:
-            # traditional meaning: set parameter as proportion of total
-            threshold_count = sample * retain_total
-        else:
-            # new shorthand: sample >= 1 means downsample all words with higher count than sample
-            threshold_count = int(sample * (3 + sqrt(5)) / 2)
-
-        downsample_total, downsample_unique = 0, 0
-        for w in retain_words:
-            v = self.raw_vocab[w]
-            word_probability = (sqrt(v / threshold_count) + 1) * (threshold_count / v)
-            if word_probability < 1.0:
-                downsample_unique += 1
-                downsample_total += word_probability * v
-            else:
-                word_probability = 1.0
-                downsample_total += v
-            if not dry_run:
-                self.wv.vocab[w].sample_int = int(round(word_probability * 2**32))
-
-        if not dry_run and not keep_raw_vocab:
-            logger.info("deleting the raw counts dictionary of %i items", len(self.raw_vocab))
-            self.raw_vocab = defaultdict(int)
-
-        logger.info("sample=%g downsamples %i most-common words", sample, downsample_unique)
-        logger.info(
-            "downsampling leaves estimated %i word corpus (%.1f%% of prior %i)",
-            downsample_total, downsample_total * 100.0 / max(retain_total, 1), retain_total
-        )
-
-        # return from each step: words-affected, resulting-corpus-size, extra memory estimates
-        report_values = {
-            'drop_unique': drop_unique, 'retain_total': retain_total, 'downsample_unique': downsample_unique,
-            'downsample_total': int(downsample_total), 'memory': self.estimate_memory(vocab_size=len(retain_words))
-        }
-
-        return report_values
-
-    def finalize_vocab(self, update=False):
-        """Build tables and model weights based on final vocabulary settings."""
-        if not self.wv.index2word:
-            self.scale_vocab()
-        if self.sorted_vocab and not update:
-            self.sort_vocab()
-        if self.hs:
-            # add info about each word's Huffman encoding
-            self.create_binary_tree()
-        if self.negative:
-            # build the table for drawing random words (for negative sampling)
-            self.make_cum_table()
-        if self.null_word:
-            # create null pseudo-word for padding when using concatenative L1 (run-of-words)
-            # this word is only ever input – never predicted – so count, huffman-point, etc doesn't matter
-            word, v = '\0', Vocab(count=1, sample_int=0)
-            v.index = len(self.wv.vocab)
-            self.wv.index2word.append(word)
-            self.wv.vocab[word] = v
-        # set initial input/projection and hidden weights
-        if not update:
-            self.reset_weights()
-        else:
-            self.update_weights()
-
-    def sort_vocab(self):
-        """Sort the vocabulary so the most frequent words have the lowest indexes."""
-        if len(self.wv.syn0):
-            raise RuntimeError("cannot sort vocabulary after model weights already initialized.")
-        self.wv.index2word.sort(key=lambda word: self.wv.vocab[word].count, reverse=True)
-        for i, word in enumerate(self.wv.index2word):
-            self.wv.vocab[word].index = i
-
-    def reset_from(self, other_model):
-        """
-        Borrow shareable pre-built structures (like vocab) from the other_model. Useful
-        if testing multiple models in parallel on the same corpus.
-        """
-        self.wv.vocab = other_model.wv.vocab
-        self.wv.index2word = other_model.wv.index2word
-        self.cum_table = other_model.cum_table
-        self.corpus_count = other_model.corpus_count
-        self.reset_weights()
-
-    def _do_train_job(self, sentences, alpha, inits):
-        """
-        Train a single batch of sentences. Return 2-tuple `(effective word count after
-        ignoring unknown words and sentence length trimming, total word count)`.
-        """
-        work, neu1 = inits
-        tally = 0
-        if self.sg:
-            tally += train_batch_sg(self, sentences, alpha, work, self.compute_loss)
-        else:
-            tally += train_batch_cbow(self, sentences, alpha, work, neu1, self.compute_loss)
-        return tally, self._raw_word_count(sentences)
-
-    def _raw_word_count(self, job):
-        """Return the number of words in a given job."""
-        return sum(len(sentence) for sentence in job)
-
-    def train(self, sentences, total_examples=None, total_words=None,
-              epochs=None, start_alpha=None, end_alpha=None, word_count=0,
-              queue_factor=2, report_delay=1.0, compute_loss=None):
-        """
-        Update the model's neural weights from a sequence of sentences (can be a once-only generator stream).
-        For Word2Vec, each sentence must be a list of unicode strings. (Subclasses may accept other examples.)
-
-        To support linear learning-rate decay from (initial) alpha to min_alpha, and accurate
-        progres-percentage logging, either total_examples (count of sentences) or total_words (count of
-        raw words in sentences) MUST be provided. (If the corpus is the same as was provided to
-        `build_vocab()`, the count of examples in that corpus will be available in the model's
-        `corpus_count` property.)
-
-        To avoid common mistakes around the model's ability to do multiple training passes itself, an
-        explicit `epochs` argument MUST be provided. In the common and recommended case, where `train()`
-        is only called once, the model's cached `iter` value should be supplied as `epochs` value.
-        """
-        if self.model_trimmed_post_training:
-            raise RuntimeError("Parameters for training were discarded using model_trimmed_post_training method")
-
-        if compute_loss:
-            self.compute_loss = compute_loss
-        self.running_training_loss = 0
-
-        logger.info(
-            "training model with %i workers on %i vocabulary and %i features, "
-            "using sg=%s hs=%s sample=%s negative=%s window=%s",
-            self.workers, len(self.wv.vocab), self.layer1_size, self.sg,
-            self.hs, self.sample, self.negative, self.window
-        )
-
-        if not self.wv.vocab:
-            raise RuntimeError("you must first build vocabulary before training the model")
-        if not len(self.wv.syn0):
-            raise RuntimeError("you must first finalize vocabulary before training the model")
-
-        if not hasattr(self, 'corpus_count'):
-            raise ValueError(
-                "The number of sentences in the training corpus is missing. "
-                "Did you load the model via KeyedVectors.load_word2vec_format?"
-                "Models loaded via load_word2vec_format don't support further training. "
-                "Instead start with a blank model, scan_vocab on the new corpus, "
-                "intersect_word2vec_format with the old model, then train."
-            )
-
-        if total_words is None and total_examples is None:
-            raise ValueError(
-                "You must specify either total_examples or total_words, for proper alpha and progress calculations. "
-                "The usual value is total_examples=model.corpus_count."
-            )
-        if epochs is None:
-            raise ValueError("You must specify an explict epochs count. The usual value is epochs=model.iter.")
-        start_alpha = start_alpha or self.alpha
-        end_alpha = end_alpha or self.min_alpha
-
-        job_tally = 0
-
-        if epochs > 1:
-            sentences = utils.RepeatCorpusNTimes(sentences, epochs)
-            total_words = total_words and total_words * epochs
-            total_examples = total_examples and total_examples * epochs
-
-        def worker_loop():
-            """Train the model, lifting lists of sentences from the job_queue."""
-            work = matutils.zeros_aligned(self.layer1_size, dtype=REAL)  # per-thread private work memory
-            neu1 = matutils.zeros_aligned(self.layer1_size, dtype=REAL)
-            jobs_processed = 0
-            while True:
-                job = job_queue.get()
-                if job is None:
-                    progress_queue.put(None)
-                    break  # no more jobs => quit this worker
-                sentences, alpha = job
-                tally, raw_tally = self._do_train_job(sentences, alpha, (work, neu1))
-                progress_queue.put((len(sentences), tally, raw_tally))  # report back progress
-                jobs_processed += 1
-            logger.debug("worker exiting, processed %i jobs", jobs_processed)
-
-        def job_producer():
-            """Fill jobs queue using the input `sentences` iterator."""
-            job_batch, batch_size = [], 0
-            pushed_words, pushed_examples = 0, 0
-            next_alpha = start_alpha
-            if next_alpha > self.min_alpha_yet_reached:
-                logger.warning("Effective 'alpha' higher than previous training cycles")
-            self.min_alpha_yet_reached = next_alpha
-            job_no = 0
-
-            for sent_idx, sentence in enumerate(sentences):
-                sentence_length = self._raw_word_count([sentence])
-
-                # can we fit this sentence into the existing job batch?
-                if batch_size + sentence_length <= self.batch_words:
-                    # yes => add it to the current job
-                    job_batch.append(sentence)
-                    batch_size += sentence_length
-                else:
-                    # no => submit the existing job
-                    logger.debug(
-                        "queueing job #%i (%i words, %i sentences) at alpha %.05f",
-                        job_no, batch_size, len(job_batch), next_alpha
-                    )
-                    job_no += 1
-                    job_queue.put((job_batch, next_alpha))
-
-                    # update the learning rate for the next job
-                    if end_alpha < next_alpha:
-                        if total_examples:
-                            # examples-based decay
-                            pushed_examples += len(job_batch)
-                            progress = 1.0 * pushed_examples / total_examples
-                        else:
-                            # words-based decay
-                            pushed_words += self._raw_word_count(job_batch)
-                            progress = 1.0 * pushed_words / total_words
-                        next_alpha = start_alpha - (start_alpha - end_alpha) * progress
-                        next_alpha = max(end_alpha, next_alpha)
-
-                    # add the sentence that didn't fit as the first item of a new job
-                    job_batch, batch_size = [sentence], sentence_length
-
-            # add the last job too (may be significantly smaller than batch_words)
-            if job_batch:
-                logger.debug(
-                    "queueing job #%i (%i words, %i sentences) at alpha %.05f",
-                    job_no, batch_size, len(job_batch), next_alpha
-                )
-                job_no += 1
-                job_queue.put((job_batch, next_alpha))
-
-            if job_no == 0 and self.train_count == 0:
-                logger.warning(
-                    "train() called with an empty iterator (if not intended, "
-                    "be sure to provide a corpus that offers restartable iteration = an iterable)."
-                )
-
-            # give the workers heads up that they can finish -- no more work!
-            for _ in range(self.workers):
-                job_queue.put(None)
-            logger.debug("job loop exiting, total %i jobs", job_no)
-
-        # buffer ahead only a limited number of jobs.. this is the reason we can't simply use ThreadPool :(
-        job_queue = Queue(maxsize=queue_factor * self.workers)
-        progress_queue = Queue(maxsize=(queue_factor + 1) * self.workers)
-
-        workers = [threading.Thread(target=worker_loop) for _ in range(self.workers)]
-        unfinished_worker_count = len(workers)
-        workers.append(threading.Thread(target=job_producer))
-
-        for thread in workers:
-            thread.daemon = True  # make interrupting the process with ctrl+c easier
-            thread.start()
-
-        example_count, trained_word_count, raw_word_count = 0, 0, word_count
-        start, next_report = default_timer() - 0.00001, 1.0
-
-        while unfinished_worker_count > 0:
-            report = progress_queue.get()  # blocks if workers too slow
-            if report is None:  # a thread reporting that it finished
-                unfinished_worker_count -= 1
-                logger.info("worker thread finished; awaiting finish of %i more threads", unfinished_worker_count)
-                continue
-            examples, trained_words, raw_words = report
-            job_tally += 1
-
-            # update progress stats
-            example_count += examples
-            trained_word_count += trained_words  # only words in vocab & sampled
-            raw_word_count += raw_words
-
-            # log progress once every report_delay seconds
-            elapsed = default_timer() - start
-            if elapsed >= next_report:
-                if total_examples:
-                    # examples-based progress %
-                    logger.info(
-                        "PROGRESS: at %.2f%% examples, %.0f words/s, in_qsize %i, out_qsize %i",
-                        100.0 * example_count / total_examples, trained_word_count / elapsed,
-                        utils.qsize(job_queue), utils.qsize(progress_queue)
-                    )
-                else:
-                    # words-based progress %
-                    logger.info(
-                        "PROGRESS: at %.2f%% words, %.0f words/s, in_qsize %i, out_qsize %i",
-                        100.0 * raw_word_count / total_words, trained_word_count / elapsed,
-                        utils.qsize(job_queue), utils.qsize(progress_queue)
-                    )
-                next_report = elapsed + report_delay
-
-        # all done; report the final stats
-        elapsed = default_timer() - start
-        logger.info(
-            "training on %i raw words (%i effective words) took %.1fs, %.0f effective words/s",
-            raw_word_count, trained_word_count, elapsed, trained_word_count / elapsed
-        )
-        if job_tally < 10 * self.workers:
-            logger.warning(
-                "under 10 jobs per worker: consider setting a smaller `batch_words' for smoother alpha decay"
-            )
-
-        # check that the input corpus hasn't changed during iteration
-        if total_examples and total_examples != example_count:
-            logger.warning(
-                "supplied example count (%i) did not equal expected count (%i)", example_count, total_examples
-            )
-        if total_words and total_words != raw_word_count:
-            logger.warning(
-                "supplied raw word count (%i) did not equal expected count (%i)", raw_word_count, total_words
-            )
-
-        self.train_count += 1  # number of times train() has been called
-        self.total_train_time += elapsed
-        self.clear_sims()
-        return trained_word_count
-
-    # basics copied from the train() function
-    def score(self, sentences, total_sentences=int(1e6), chunksize=100, queue_factor=2, report_delay=1):
-        """
-        Score the log probability for a sequence of sentences (can be a once-only generator stream).
-        Each sentence must be a list of unicode strings.
-        This does not change the fitted model in any way (see Word2Vec.train() for that).
-
-        We have currently only implemented score for the hierarchical softmax scheme,
-        so you need to have run word2vec with hs=1 and negative=0 for this to work.
-
-        Note that you should specify total_sentences; we'll run into problems if you ask to
-        score more than this number of sentences but it is inefficient to set the value too high.
-
-        See the article by [#taddy]_ and the gensim demo at [#deepir]_ for examples of
-        how to use such scores in document classification.
-
-        .. [#taddy] Taddy, Matt.  Document Classification by Inversion of Distributed Language Representations,
-                    in Proceedings of the 2015 Conference of the Association of Computational Linguistics.
-        .. [#deepir] https://github.com/piskvorky/gensim/blob/develop/docs/notebooks/deepir.ipynb
-
-        """
-        logger.info(
-            "scoring sentences with %i workers on %i vocabulary and %i features, "
-            "using sg=%s hs=%s sample=%s and negative=%s",
-            self.workers, len(self.wv.vocab), self.layer1_size, self.sg, self.hs, self.sample, self.negative
-        )
-
-        if not self.wv.vocab:
-            raise RuntimeError("you must first build vocabulary before scoring new data")
-
-        if not self.hs:
-            raise RuntimeError(
-                "We have currently only implemented score for the hierarchical softmax scheme, "
-                "so you need to have run word2vec with hs=1 and negative=0 for this to work."
-            )
-
-        def worker_loop():
-            """Compute log probability for each sentence, lifting lists of sentences from the jobs queue."""
-            work = zeros(1, dtype=REAL)  # for sg hs, we actually only need one memory loc (running sum)
-            neu1 = matutils.zeros_aligned(self.layer1_size, dtype=REAL)
-            while True:
-                job = job_queue.get()
-                if job is None:  # signal to finish
-                    break
-                ns = 0
-                for sentence_id, sentence in job:
-                    if sentence_id >= total_sentences:
-                        break
-                    if self.sg:
-                        score = score_sentence_sg(self, sentence, work)
-                    else:
-                        score = score_sentence_cbow(self, sentence, work, neu1)
-                    sentence_scores[sentence_id] = score
-                    ns += 1
-                progress_queue.put(ns)  # report progress
-
-        start, next_report = default_timer(), 1.0
-        # buffer ahead only a limited number of jobs.. this is the reason we can't simply use ThreadPool :(
-        job_queue = Queue(maxsize=queue_factor * self.workers)
-        progress_queue = Queue(maxsize=(queue_factor + 1) * self.workers)
-
-        workers = [threading.Thread(target=worker_loop) for _ in range(self.workers)]
-        for thread in workers:
-            thread.daemon = True  # make interrupting the process with ctrl+c easier
-            thread.start()
-
-        sentence_count = 0
-        sentence_scores = matutils.zeros_aligned(total_sentences, dtype=REAL)
-
-        push_done = False
-        done_jobs = 0
-        jobs_source = enumerate(utils.grouper(enumerate(sentences), chunksize))
-
-        # fill jobs queue with (id, sentence) job items
-        while True:
-            try:
-                job_no, items = next(jobs_source)
-                if (job_no - 1) * chunksize > total_sentences:
-                    logger.warning(
-                        "terminating after %i sentences (set higher total_sentences if you want more).",
-                        total_sentences
-                    )
-                    job_no -= 1
-                    raise StopIteration()
-                logger.debug("putting job #%i in the queue", job_no)
-                job_queue.put(items)
-            except StopIteration:
-                logger.info("reached end of input; waiting to finish %i outstanding jobs", job_no - done_jobs + 1)
-                for _ in range(self.workers):
-                    job_queue.put(None)  # give the workers heads up that they can finish -- no more work!
-                push_done = True
-            try:
-                while done_jobs < (job_no + 1) or not push_done:
-                    ns = progress_queue.get(push_done)  # only block after all jobs pushed
-                    sentence_count += ns
-                    done_jobs += 1
-                    elapsed = default_timer() - start
-                    if elapsed >= next_report:
-                        logger.info(
-                            "PROGRESS: at %.2f%% sentences, %.0f sentences/s",
-                            100.0 * sentence_count, sentence_count / elapsed
-                        )
-                        next_report = elapsed + report_delay  # don't flood log, wait report_delay seconds
-                else:
-                    # loop ended by job count; really done
-                    break
-            except Empty:
-                pass  # already out of loop; continue to next push
-
-        elapsed = default_timer() - start
-        self.clear_sims()
-        logger.info(
-            "scoring %i sentences took %.1fs, %.0f sentences/s",
-            sentence_count, elapsed, sentence_count / elapsed
-        )
-        return sentence_scores[:sentence_count]
-
-    def clear_sims(self):
-        """
-        Removes all L2-normalized vectors for words from the model.
-        You will have to recompute them using init_sims method.
-        """
-
-        self.wv.syn0norm = None
-
-    def update_weights(self):
-        """
-        Copy all the existing weights, and reset the weights for the newly
-        added vocabulary.
-        """
-        logger.info("updating layer weights")
-        gained_vocab = len(self.wv.vocab) - len(self.wv.syn0)
-        newsyn0 = empty((gained_vocab, self.vector_size), dtype=REAL)
-
-        # randomize the remaining words
-        for i in range(len(self.wv.syn0), len(self.wv.vocab)):
-            # construct deterministic seed from word AND seed argument
-            newsyn0[i - len(self.wv.syn0)] = self.seeded_vector(self.wv.index2word[i] + str(self.seed))
-
-        # Raise an error if an online update is run before initial training on a corpus
-        if not len(self.wv.syn0):
-            raise RuntimeError(
-                "You cannot do an online vocabulary-update of a model which has no prior vocabulary. "
-                "First build the vocabulary of your model with a corpus before doing an online update."
-            )
-
-        self.wv.syn0 = vstack([self.wv.syn0, newsyn0])
-
-        if self.hs:
-            self.syn1 = vstack([self.syn1, zeros((gained_vocab, self.layer1_size), dtype=REAL)])
-        if self.negative:
-            self.syn1neg = vstack([self.syn1neg, zeros((gained_vocab, self.layer1_size), dtype=REAL)])
-        self.wv.syn0norm = None
-
-        # do not suppress learning for already learned words
-        self.syn0_lockf = ones(len(self.wv.vocab), dtype=REAL)  # zeros suppress learning
-
-    def reset_weights(self):
-        """Reset all projection weights to an initial (untrained) state, but keep the existing vocabulary."""
-        logger.info("resetting layer weights")
-        self.wv.syn0 = empty((len(self.wv.vocab), self.vector_size), dtype=REAL)
-        # randomize weights vector by vector, rather than materializing a huge random matrix in RAM at once
-        for i in range(len(self.wv.vocab)):
-            # construct deterministic seed from word AND seed argument
-            self.wv.syn0[i] = self.seeded_vector(self.wv.index2word[i] + str(self.seed))
-        if self.hs:
-            self.syn1 = zeros((len(self.wv.vocab), self.layer1_size), dtype=REAL)
-        if self.negative:
-            self.syn1neg = zeros((len(self.wv.vocab), self.layer1_size), dtype=REAL)
-        self.wv.syn0norm = None
-
-        self.syn0_lockf = ones(len(self.wv.vocab), dtype=REAL)  # zeros suppress learning
-
-    def seeded_vector(self, seed_string):
-        """Create one 'random' vector (but deterministic by seed_string)"""
-        # Note: built-in hash() may vary by Python version or even (in Py3.x) per launch
-        once = random.RandomState(self.hashfxn(seed_string) & 0xffffffff)
-        return (once.rand(self.vector_size) - 0.5) / self.vector_size
-
-    def intersect_word2vec_format(self, fname, lockf=0.0, binary=False, encoding='utf8', unicode_errors='strict'):
-        """
-        Merge the input-hidden weight matrix from the original C word2vec-tool format
-        given, where it intersects with the current vocabulary. (No words are added to the
-        existing vocabulary, but intersecting words adopt the file's weights, and
-        non-intersecting words are left alone.)
-
-        `binary` is a boolean indicating whether the data is in binary word2vec format.
-
-        `lockf` is a lock-factor value to be set for any imported word-vectors; the
-        default value of 0.0 prevents further updating of the vector during subsequent
-        training. Use 1.0 to allow further training updates of merged vectors.
-        """
-        overlap_count = 0
-        logger.info("loading projection weights from %s", fname)
-        with utils.open(fname, 'rb') as fin:
-            header = utils.to_unicode(fin.readline(), encoding=encoding)
-            vocab_size, vector_size = (int(x) for x in header.split())  # throws for invalid file format
-            if not vector_size == self.vector_size:
-                raise ValueError("incompatible vector size %d in file %s" % (vector_size, fname))
-                # TOCONSIDER: maybe mismatched vectors still useful enough to merge (truncating/padding)?
-            if binary:
-                binary_len = dtype(REAL).itemsize * vector_size
-                for _ in range(vocab_size):
-                    # mixed text and binary: read text first, then binary
-                    word = []
-                    while True:
-                        ch = fin.read(1)
-                        if ch == b' ':
-                            break
-                        if ch != b'\n':  # ignore newlines in front of words (some binary files have)
-                            word.append(ch)
-                    word = utils.to_unicode(b''.join(word), encoding=encoding, errors=unicode_errors)
-                    weights = fromstring(fin.read(binary_len), dtype=REAL)
-                    if word in self.wv.vocab:
-                        overlap_count += 1
-                        self.wv.syn0[self.wv.vocab[word].index] = weights
-                        self.syn0_lockf[self.wv.vocab[word].index] = lockf  # lock-factor: 0.0 stops further changes
-            else:
-                for line_no, line in enumerate(fin):
-                    parts = utils.to_unicode(line.rstrip(), encoding=encoding, errors=unicode_errors).split(" ")
-                    if len(parts) != vector_size + 1:
-                        raise ValueError("invalid vector on line %s (is this really the text format?)" % line_no)
-                    word, weights = parts[0], [REAL(x) for x in parts[1:]]
-                    if word in self.wv.vocab:
-                        overlap_count += 1
-                        self.wv.syn0[self.wv.vocab[word].index] = weights
-                        self.syn0_lockf[self.wv.vocab[word].index] = lockf  # lock-factor: 0.0 stops further changes
-        logger.info("merged %d vectors into %s matrix from %s", overlap_count, self.wv.syn0.shape, fname)
-
-    def most_similar(self, positive=None, negative=None, topn=10, restrict_vocab=None, indexer=None):
-        """
-        Deprecated. Use self.wv.most_similar() instead.
-        Refer to the documentation for `gensim.models.KeyedVectors.most_similar`
-        """
-        return self.wv.most_similar(positive, negative, topn, restrict_vocab, indexer)
-
-    def wmdistance(self, document1, document2):
-        """
-        Deprecated. Use self.wv.wmdistance() instead.
-        Refer to the documentation for `gensim.models.KeyedVectors.wmdistance`
-        """
-        return self.wv.wmdistance(document1, document2)
-
-    def most_similar_cosmul(self, positive=None, negative=None, topn=10):
-        """
-        Deprecated. Use self.wv.most_similar_cosmul() instead.
-        Refer to the documentation for `gensim.models.KeyedVectors.most_similar_cosmul`
-        """
-        return self.wv.most_similar_cosmul(positive, negative, topn)
-
-    def similar_by_word(self, word, topn=10, restrict_vocab=None):
-        """
-        Deprecated. Use self.wv.similar_by_word() instead.
-        Refer to the documentation for `gensim.models.KeyedVectors.similar_by_word`
-        """
-        return self.wv.similar_by_word(word, topn, restrict_vocab)
-
-    def similar_by_vector(self, vector, topn=10, restrict_vocab=None):
-        """
-        Deprecated. Use self.wv.similar_by_vector() instead.
-        Refer to the documentation for `gensim.models.KeyedVectors.similar_by_vector`
-        """
-        return self.wv.similar_by_vector(vector, topn, restrict_vocab)
-
-    def doesnt_match(self, words):
-        """
-        Deprecated. Use self.wv.doesnt_match() instead.
-        Refer to the documentation for `gensim.models.KeyedVectors.doesnt_match`
-        """
-        return self.wv.doesnt_match(words)
-
-    def __getitem__(self, words):
-        """
-        Deprecated. Use self.wv.__getitem__() instead.
-        Refer to the documentation for `gensim.models.KeyedVectors.__getitem__`
-        """
-        return self.wv.__getitem__(words)
-
-    def __contains__(self, word):
-        """
-        Deprecated. Use self.wv.__contains__() instead.
-        Refer to the documentation for `gensim.models.KeyedVectors.__contains__`
-        """
-        return self.wv.__contains__(word)
-
-    def similarity(self, w1, w2):
-        """
-        Deprecated. Use self.wv.similarity() instead.
-        Refer to the documentation for `gensim.models.KeyedVectors.similarity`
-        """
-        return self.wv.similarity(w1, w2)
-
-    def n_similarity(self, ws1, ws2):
-        """
-        Deprecated. Use self.wv.n_similarity() instead.
-        Refer to the documentation for `gensim.models.KeyedVectors.n_similarity`
-        """
-        return self.wv.n_similarity(ws1, ws2)
-
-    def predict_output_word(self, context_words_list, topn=10):
-        """Report the probability distribution of the center word given the context words
-        as input to the trained model."""
-        if not self.negative:
-            raise RuntimeError(
-                "We have currently only implemented predict_output_word for the negative sampling scheme, "
-                "so you need to have run word2vec with negative > 0 for this to work."
-            )
-
-        if not hasattr(self.wv, 'syn0') or not hasattr(self, 'syn1neg'):
-            raise RuntimeError("Parameters required for predicting the output words not found.")
-
-        word_vocabs = [self.wv.vocab[w] for w in context_words_list if w in self.wv.vocab]
-        if not word_vocabs:
-            warnings.warn("All the input context words are out-of-vocabulary for the current model.")
-            return None
-
-        word2_indices = [word.index for word in word_vocabs]
-
-        l1 = np_sum(self.wv.syn0[word2_indices], axis=0)
-        if word2_indices and self.cbow_mean:
-            l1 /= len(word2_indices)
-
-        prob_values = exp(dot(l1, self.syn1neg.T))  # propagate hidden -> output and take softmax to get probabilities
-        prob_values /= sum(prob_values)
-        top_indices = matutils.argsort(prob_values, topn=topn, reverse=True)
-        # returning the most probable output words with their probabilities
-        return [(self.wv.index2word[index1], prob_values[index1]) for index1 in top_indices]
-
-    def init_sims(self, replace=False):
-        """
-        init_sims() resides in KeyedVectors because it deals with syn0 mainly, but because syn1 is not an attribute
-        of KeyedVectors, it has to be deleted in this class, and the normalizing of syn0 happens inside of KeyedVectors
-        """
-        if replace and hasattr(self, 'syn1'):
-            del self.syn1
-        return self.wv.init_sims(replace)
-
-    def estimate_memory(self, vocab_size=None, report=None):
-        """Estimate required memory for a model using current settings and provided vocabulary size."""
-        vocab_size = vocab_size or len(self.wv.vocab)
-        report = report or {}
-        report['vocab'] = vocab_size * (700 if self.hs else 500)
-        report['syn0'] = vocab_size * self.vector_size * dtype(REAL).itemsize
-        if self.hs:
-            report['syn1'] = vocab_size * self.layer1_size * dtype(REAL).itemsize
-        if self.negative:
-            report['syn1neg'] = vocab_size * self.layer1_size * dtype(REAL).itemsize
-        report['total'] = sum(report.values())
-        logger.info(
-            "estimated required memory for %i words and %i dimensions: %i bytes",
-            vocab_size, self.vector_size, report['total']
-        )
-        return report
-
-    @staticmethod
-    def log_accuracy(section):
-        return KeyedVectors.log_accuracy(section)
-
-    def accuracy(self, questions, restrict_vocab=30000, most_similar=None, case_insensitive=True):
-        most_similar = most_similar or KeyedVectors.most_similar
-        return self.wv.accuracy(questions, restrict_vocab, most_similar, case_insensitive)
-
-    @staticmethod
-    def log_evaluate_word_pairs(pearson, spearman, oov, pairs):
-        """
-        Deprecated. Use self.wv.log_evaluate_word_pairs() instead.
-        Refer to the documentation for `gensim.models.KeyedVectors.log_evaluate_word_pairs`
-        """
-        return KeyedVectors.log_evaluate_word_pairs(pearson, spearman, oov, pairs)
-
-    def evaluate_word_pairs(self, pairs, delimiter='\t', restrict_vocab=300000,
-                            case_insensitive=True, dummy4unknown=False):
-        """
-        Deprecated. Use self.wv.evaluate_word_pairs() instead.
-        Refer to the documentation for `gensim.models.KeyedVectors.evaluate_word_pairs`
-        """
-        return self.wv.evaluate_word_pairs(pairs, delimiter, restrict_vocab, case_insensitive, dummy4unknown)
-
-    def __str__(self):
-        return "%s(vocab=%s, size=%s, alpha=%s)" % (
-            self.__class__.__name__, len(self.wv.index2word), self.vector_size, self.alpha
-        )
-
-    def _minimize_model(self, save_syn1=False, save_syn1neg=False, save_syn0_lockf=False):
-        warnings.warn(
-            "This method would be deprecated in the future. "
-            "Keep just_word_vectors = model.wv to retain just the KeyedVectors instance "
-            "for read-only querying of word vectors."
-        )
-        if save_syn1 and save_syn1neg and save_syn0_lockf:
-            return
-        if hasattr(self, 'syn1') and not save_syn1:
-            del self.syn1
-        if hasattr(self, 'syn1neg') and not save_syn1neg:
-            del self.syn1neg
-        if hasattr(self, 'syn0_lockf') and not save_syn0_lockf:
-            del self.syn0_lockf
-        self.model_trimmed_post_training = True
-
-    def delete_temporary_training_data(self, replace_word_vectors_with_normalized=False):
-        """
-        Discard parameters that are used in training and score. Use if you're sure you're done training a model.
-        If `replace_word_vectors_with_normalized` is set, forget the original vectors and only keep the normalized
-        ones = saves lots of memory!
-        """
-        if replace_word_vectors_with_normalized:
-            self.init_sims(replace=True)
-        self._minimize_model()
-
-    def save(self, *args, **kwargs):
-        # don't bother storing the cached normalized vectors, recalculable table
-        kwargs['ignore'] = kwargs.get('ignore', ['syn0norm', 'table', 'cum_table'])
-
-        super(Word2Vec, self).save(*args, **kwargs)
-
-    save.__doc__ = SaveLoad.save.__doc__
-
-    @classmethod
-    def load(cls, *args, **kwargs):
-        model = super(Word2Vec, cls).load(*args, **kwargs)
-        # update older models
-        if hasattr(model, 'table'):
-            delattr(model, 'table')  # discard in favor of cum_table
-        if model.negative and hasattr(model.wv, 'index2word'):
-            model.make_cum_table()  # rebuild cum_table from vocabulary
-        if not hasattr(model, 'corpus_count'):
-            model.corpus_count = None
-        if not hasattr(model, 'corpus_total_words'):
-            model.corpus_total_words = None
-        for v in model.wv.vocab.values():
-            if hasattr(v, 'sample_int'):
-                break  # already 0.12.0+ style int probabilities
-            elif hasattr(v, 'sample_probability'):
-                v.sample_int = int(round(v.sample_probability * 2**32))
-                del v.sample_probability
-        if not hasattr(model, 'syn0_lockf') and hasattr(model, 'syn0'):
-            model.syn0_lockf = ones(len(model.wv.syn0), dtype=REAL)
-        if not hasattr(model, 'random'):
-            model.random = random.RandomState(model.seed)
-        if not hasattr(model, 'train_count'):
-            model.train_count = 0
-            model.total_train_time = 0
-        return model
-
-    def _load_specials(self, *args, **kwargs):
-        super(Word2Vec, self)._load_specials(*args, **kwargs)
-        # loading from a pre-KeyedVectors word2vec model
-        if not hasattr(self, 'wv'):
-            wv = KeyedVectors()
-            wv.syn0 = self.__dict__.get('syn0', [])
-            wv.syn0norm = self.__dict__.get('syn0norm', None)
-            wv.vocab = self.__dict__.get('vocab', {})
-            wv.index2word = self.__dict__.get('index2word', [])
-            self.wv = wv
-
-    @classmethod
-    def load_word2vec_format(cls, fname, fvocab=None, binary=False, encoding='utf8', unicode_errors='strict',
-                         limit=None, datatype=REAL):
-        """Deprecated. Use gensim.models.KeyedVectors.load_word2vec_format instead."""
-        raise DeprecationWarning("Deprecated. Use gensim.models.KeyedVectors.load_word2vec_format instead.")
-
-    def save_word2vec_format(self, fname, fvocab=None, binary=False):
-        """Deprecated. Use model.wv.save_word2vec_format instead."""
-        raise DeprecationWarning("Deprecated. Use model.wv.save_word2vec_format instead.")
-
-    def get_latest_training_loss(self):
-        return self.running_training_loss
-
-
-class BrownCorpus(object):
-    """Iterate over sentences from the Brown corpus (part of NLTK data)."""
-
-    def __init__(self, dirname):
-        self.dirname = dirname
-
-    def __iter__(self):
-        for fname in os.listdir(self.dirname):
-            fname = os.path.join(self.dirname, fname)
-            if not os.path.isfile(fname):
-                continue
-            with utils.open(fname, 'rb') as fin:
-                for line in fin:
-                    line = utils.to_unicode(line)
-                    # each file line is a single sentence in the Brown corpus
-                    # each token is WORD/POS_TAG
-                    token_tags = [t.split('/') for t in line.split() if len(t.split('/')) == 2]
-                    # ignore words with non-alphabetic tags like ",", "!" etc (punctuation, weird stuff)
-                    words = ["%s/%s" % (token.lower(), tag[:2]) for token, tag in token_tags if tag[:2].isalpha()]
-                    if not words:  # don't bother sending out empty sentences
-                        continue
-                    yield words
-
-
-class Text8Corpus(object):
-    """Iterate over sentences from the "text8" corpus, unzipped from http://mattmahoney.net/dc/text8.zip ."""
-
-    def __init__(self, fname, max_sentence_length=MAX_WORDS_IN_BATCH):
-        self.fname = fname
-        self.max_sentence_length = max_sentence_length
-
-    def __iter__(self):
-        # the entire corpus is one gigantic line -- there are no sentence marks at all
-        # so just split the sequence of tokens arbitrarily: 1 sentence = 1000 tokens
-        sentence, rest = [], b''
-        with utils.open(self.fname, 'rb') as fin:
-            while True:
-                text = rest + fin.read(8192)  # avoid loading the entire file (=1 line) into RAM
-                if text == rest:  # EOF
-                    words = utils.to_unicode(text).split()
-                    sentence.extend(words)  # return the last chunk of words, too (may be shorter/longer)
-                    if sentence:
-                        yield sentence
-                    break
-                last_token = text.rfind(b' ')  # last token may have been split in two... keep for next iteration
-                words, rest = (utils.to_unicode(text[:last_token]).split(),
-                               text[last_token:].strip()) if last_token >= 0 else ([], text)
-                sentence.extend(words)
-                while len(sentence) >= self.max_sentence_length:
-                    yield sentence[:self.max_sentence_length]
-                    sentence = sentence[self.max_sentence_length:]
-
-
-class LineSentence(object):
-    """
-    Simple format: one sentence = one line; words already preprocessed and separated by whitespace.
-    """
-
-    def __init__(self, source, max_sentence_length=MAX_WORDS_IN_BATCH, limit=None):
-        """
-        `source` can be either a string or a file object. Clip the file to the first
-        `limit` lines (or not clipped if limit is None, the default).
-
-        Example::
-
-            sentences = LineSentence('myfile.txt')
-
-        Or for compressed files::
-
-            sentences = LineSentence('compressed_text.txt.bz2')
-            sentences = LineSentence('compressed_text.txt.gz')
-
-        """
-        self.source = source
-        self.max_sentence_length = max_sentence_length
-        self.limit = limit
-
-    def __iter__(self):
-        """Iterate through the lines in the source."""
-        try:
-            # Assume it is a file-like object and try treating it as such
-            # Things that don't have seek will trigger an exception
-            self.source.seek(0)
-            for line in itertools.islice(self.source, self.limit):
-                line = utils.to_unicode(line).split()
-                i = 0
-                while i < len(line):
-                    yield line[i: i + self.max_sentence_length]
-                    i += self.max_sentence_length
-        except AttributeError:
-            # If it didn't work like a file, use it as a string filename
-            with utils.open(self.source, 'rb') as fin:
-                for line in itertools.islice(fin, self.limit):
-                    line = utils.to_unicode(line).split()
-                    i = 0
-                    while i < len(line):
-                        yield line[i: i + self.max_sentence_length]
-                        i += self.max_sentence_length
-
-
-class PathLineSentences(object):
-    """
-
-    Works like word2vec.LineSentence, but will process all files in a directory in alphabetical order by filename.
-    The directory can only contain files that can be read by LineSentence: .bz2, .gz, and text files.
-    Any file not ending with .bz2 or .gz is assumed to be a text file. Does not work with subdirectories.
-
-    The format of files (either text, or compressed text files) in the path is one sentence = one line,
-    with words already preprocessed and separated by whitespace.
-
-    """
-
-    def __init__(self, source, max_sentence_length=MAX_WORDS_IN_BATCH, limit=None):
-        """
-        `source` should be a path to a directory (as a string) where all files can be opened by the
-        LineSentence class. Each file will be read up to `limit` lines (or not clipped if limit is None, the default).
-
-        Example::
-
-            sentences = PathLineSentences(os.getcwd() + '\\corpus\\')
-
-        The files in the directory should be either text files, .bz2 files, or .gz files.
-
-        """
-        self.source = source
-        self.max_sentence_length = max_sentence_length
-        self.limit = limit
-
-        if os.path.isfile(self.source):
-            logger.debug('single file given as source, rather than a directory of files')
-            logger.debug('consider using models.word2vec.LineSentence for a single file')
-            self.input_files = [self.source]  # force code compatibility with list of files
-        elif os.path.isdir(self.source):
-            self.source = os.path.join(self.source, '')  # ensures os-specific slash at end of path
-            logger.info('reading directory %s', self.source)
-            self.input_files = os.listdir(self.source)
-            self.input_files = [self.source + filename for filename in self.input_files]  # make full paths
-            self.input_files.sort()  # makes sure it happens in filename order
-        else:  # not a file or a directory, then we can't do anything with it
-            raise ValueError('input is neither a file nor a path')
-        logger.info('files read into PathLineSentences:%s', '\n'.join(self.input_files))
-
-    def __iter__(self):
-        """iterate through the files"""
-        for file_name in self.input_files:
-            logger.info('reading file %s', file_name)
-            with utils.open(file_name, 'rb') as fin:
-                for line in itertools.islice(fin, self.limit):
-                    line = utils.to_unicode(line).split()
-                    i = 0
-                    while i < len(line):
-                        yield line[i:i + self.max_sentence_length]
-                        i += self.max_sentence_length
-
-
-# Example: ./word2vec.py -train data.txt -output vec.txt -size 200 -window 5 -sample 1e-4 \
-# -negative 5 -hs 0 -binary 0 -cbow 1 -iter 3
-if __name__ == "__main__":
-    import argparse
-    logging.basicConfig(
-        format='%(asctime)s : %(threadName)s : %(levelname)s : %(message)s',
-        level=logging.INFO
-    )
-    logger.info("running %s", " ".join(sys.argv))
-
-    # check and process cmdline input
-    program = os.path.basename(sys.argv[0])
-    if len(sys.argv) < 2:
-        print(globals()['__doc__'] % locals())
-        sys.exit(1)
-
-    from gensim.models.word2vec import Word2Vec  # noqa:F811 avoid referencing __main__ in pickle
-
-    seterr(all='raise')  # don't ignore numpy errors
-
-    parser = argparse.ArgumentParser()
-    parser.add_argument("-train", help="Use text data from file TRAIN to train the model", required=True)
-    parser.add_argument("-output", help="Use file OUTPUT to save the resulting word vectors")
-    parser.add_argument("-window", help="Set max skip length WINDOW between words; default is 5", type=int, default=5)
-    parser.add_argument("-size", help="Set size of word vectors; default is 100", type=int, default=100)
-    parser.add_argument(
-        "-sample",
-        help="Set threshold for occurrence of words. "
-             "Those that appear with higher frequency in the training data will be randomly down-sampled;"
-             " default is 1e-3, useful range is (0, 1e-5)",
-        type=float, default=1e-3
-    )
-    parser.add_argument(
-        "-hs", help="Use Hierarchical Softmax; default is 0 (not used)",
-        type=int, default=0, choices=[0, 1]
-    )
-    parser.add_argument(
-        "-negative", help="Number of negative examples; default is 5, common values are 3 - 10 (0 = not used)",
-        type=int, default=5
-    )
-    parser.add_argument("-threads", help="Use THREADS threads (default 12)", type=int, default=12)
-    parser.add_argument("-iter", help="Run more training iterations (default 5)", type=int, default=5)
-    parser.add_argument(
-        "-min_count", help="This will discard words that appear less than MIN_COUNT times; default is 5",
-        type=int, default=5
-    )
-    parser.add_argument(
-        "-cbow", help="Use the continuous bag of words model; default is 1 (use 0 for skip-gram model)",
-        type=int, default=1, choices=[0, 1]
-    )
-    parser.add_argument(
-        "-binary", help="Save the resulting vectors in binary mode; default is 0 (off)",
-        type=int, default=0, choices=[0, 1]
-    )
-    parser.add_argument("-accuracy", help="Use questions from file ACCURACY to evaluate the model")
-
-    args = parser.parse_args()
-
-    if args.cbow == 0:
-        skipgram = 1
-    else:
-        skipgram = 0
-
-    corpus = LineSentence(args.train)
-
-    model = Word2Vec(
-        corpus, size=args.size, min_count=args.min_count, workers=args.threads,
-        window=args.window, sample=args.sample, sg=skipgram, hs=args.hs,
-        negative=args.negative, cbow_mean=1, iter=args.iter
-    )
-
-    if args.output:
-        outfile = args.output
-        model.wv.save_word2vec_format(outfile, binary=args.binary)
-    else:
-        outfile = args.train
-        model.save(outfile + '.model')
-    if args.binary == 1:
-        model.wv.save_word2vec_format(outfile + '.model.bin', binary=True)
-    else:
-        model.wv.save_word2vec_format(outfile + '.model.txt', binary=False)
-
-    if args.accuracy:
-        model.accuracy(args.accuracy)
-
-    logger.info("finished running %s", program)
diff --git a/gensim/models/doc2vec.py b/gensim/models/doc2vec.py
index 624875bf18..4a2a1761ac 100644
--- a/gensim/models/doc2vec.py
+++ b/gensim/models/doc2vec.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
-# Author: Shiva Manne <manneshiva@gmail.com>
+# Author: Gensim Contributors
 # Copyright (C) 2018 RaRe Technologies s.r.o.
 # Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html
 
@@ -69,7 +69,8 @@
 except ImportError:
     from Queue import Queue  # noqa:F401
 
-from collections import namedtuple, defaultdict, Iterable
+from collections import namedtuple, defaultdict
+from collections.abc import Iterable
 from timeit import default_timer
 from dataclasses import dataclass
 
@@ -77,14 +78,11 @@
     memmap as np_memmap, vstack, integer, dtype
 import numpy as np
 
-from gensim.utils import call_on_class_only
 from gensim import utils, matutils  # utility fnc for pickling, common scipy operations etc
-from gensim.models.word2vec import Word2VecVocab, Word2VecTrainables
+from gensim.models import Word2Vec
 from six.moves import range
 from six import string_types, integer_types, itervalues
-from gensim.models.base_any2vec import BaseWordEmbeddingsModel
 from gensim.models.keyedvectors import KeyedVectors, ConcatList, pseudorandom_weak_vector
-from types import GeneratorType
 
 logger = logging.getLogger(__name__)
 
@@ -170,10 +168,10 @@ def count(self, new_val):
 Doctag = DoctagVocab
 
 
-class Doc2Vec(BaseWordEmbeddingsModel):
-    def __init__(self, documents=None, corpus_file=None, dm_mean=None, dm=1, dbow_words=0, dm_concat=0,
+class Doc2Vec(Word2Vec):
+    def __init__(self, documents=None, corpus_file=None, vector_size=100, dm_mean=None, dm=1, dbow_words=0, dm_concat=0,
                  dm_tag_count=1, docvecs=None, docvecs_mapfile=None, comment=None, trim_rule=None, callbacks=(),
-                 **kwargs):
+                 window=5, epochs=10, **kwargs):
         """Class for training, using and evaluating neural networks described in
         `Distributed Representations of Sentences and Documents <http://arxiv.org/abs/1405.4053v2>`_.
 
@@ -219,7 +217,7 @@ def __init__(self, documents=None, corpus_file=None, dm_mean=None, dm=1, dbow_wo
         workers : int, optional
             Use these many worker threads to train the model (=faster training with multicore machines).
         epochs : int, optional
-            Number of iterations (epochs) over the corpus.
+            Number of iterations (epochs) over the corpus. Defaults to 10 for Doc2Vec.
         hs : {1,0}, optional
             If 1, hierarchical softmax will be used for model training.
             If set to 0, and `negative` is non-zero, negative sampling will be used.
@@ -280,28 +278,8 @@ def __init__(self, documents=None, corpus_file=None, dm_mean=None, dm=1, dbow_wo
             .. sourcecode:: pycon
 
                 >>> model.docvecs['doc003']
-
-        vocabulary : :class:`~gensim.models.doc2vec.Doc2VecVocab`
-            This object represents the vocabulary (sometimes called Dictionary in gensim) of the model.
-            Besides keeping track of all unique words, this object provides extra functionality, such as
-            sorting words by frequency, or discarding extremely rare words.
-
-        trainables : :class:`~gensim.models.doc2vec.Doc2VecTrainables`
-            This object represents the inner shallow neural network used to train the embeddings. The semantics
-            of the network differ slightly in the two available training modes (CBOW or SG) but you can think
-            of it as a NN with a single projection and hidden layer which we train on the corpus. The weights are
-            then used as our embeddings. The only addition to the underlying NN used in
-            :class:`~gensim.models.word2vec.Word2Vec` is that the input includes not only the word vectors of
-            each word in the context, but also the paragraph vector.
-
         """
-        super(Doc2Vec, self).__init__(
-            sg=(1 + dm) % 2,
-            null_word=dm_concat,
-            callbacks=callbacks,
-            **kwargs)
-
-        self.load = call_on_class_only
+        corpus_iterable = documents
 
         if dm_mean is not None:
             self.cbow_mean = dm_mean
@@ -309,34 +287,23 @@ def __init__(self, documents=None, corpus_file=None, dm_mean=None, dm=1, dbow_wo
         self.dbow_words = int(dbow_words)
         self.dm_concat = int(dm_concat)
         self.dm_tag_count = int(dm_tag_count)
+        if dm and dm_concat:
+            self.layer1_size = (dm_tag_count + (2 * window)) * vector_size
+            logger.info("using concatenative %d-dimensional layer1", self.layer1_size)
 
-        kwargs['null_word'] = dm_concat
-        vocabulary_keys = ['max_vocab_size', 'min_count', 'sample', 'sorted_vocab', 'null_word', 'ns_exponent']
-        vocabulary_kwargs = dict((k, kwargs[k]) for k in vocabulary_keys if k in kwargs)
-        self.vocabulary = Doc2VecVocab(**vocabulary_kwargs)
-
-        trainables_keys = ['seed', 'hashfxn', 'window']
-        trainables_kwargs = dict((k, kwargs[k]) for k in trainables_keys if k in kwargs)
-        self.trainables = Doc2VecTrainables(
-            dm=dm, dm_concat=dm_concat, dm_tag_count=dm_tag_count,
-            vector_size=self.vector_size, **trainables_kwargs)
-
-        self.wv = KeyedVectors(self.vector_size)
+        self.vector_size = vector_size
         self.docvecs = docvecs or KeyedVectors(self.vector_size, mapfile_path=docvecs_mapfile)
 
-        self.comment = comment
-
-        if documents is not None or corpus_file is not None:
-            self._check_input_data_sanity(data_iterable=documents, corpus_file=corpus_file)
-            if corpus_file is not None and not isinstance(corpus_file, string_types):
-                raise TypeError("You must pass string as the corpus_file argument.")
-            elif isinstance(documents, GeneratorType):
-                raise TypeError("You can't pass a generator as the documents argument. Try a sequence.")
-            self.build_vocab(documents=documents, corpus_file=corpus_file, trim_rule=trim_rule)
-            self.train(
-                documents=documents, corpus_file=corpus_file, total_examples=self.corpus_count,
-                total_words=self.corpus_total_words, epochs=self.epochs, start_alpha=self.alpha,
-                end_alpha=self.min_alpha, callbacks=callbacks)
+        super(Doc2Vec, self).__init__(
+            sentences=corpus_iterable,
+            corpus_file=corpus_file,
+            vector_size=self.vector_size,
+            sg=(1 + dm) % 2,
+            null_word=self.dm_concat,
+            callbacks=callbacks,
+            window=window,
+            epochs=epochs,
+            **kwargs)
 
     @property
     def dm(self):
@@ -354,9 +321,6 @@ def dbow(self):
         """
         return self.sg  # same as SG
 
-    def _set_train_params(self, **kwargs):
-        pass
-
     def _clear_post_train(self):
         """Alias for :meth:`~gensim.models.doc2vec.Doc2Vec.clear_sims`."""
         self.clear_sims()
@@ -366,6 +330,18 @@ def clear_sims(self):
         self.wv.vectors_norm = None
         self.docvecs.vectors_norm = None
 
+    def reset_weights(self):
+        super(Doc2Vec, self).reset_weights()
+        self.docvecs.resize_vectors()
+        self.docvecs.randomly_initialize_vectors()
+        if self.docvecs.mapfile_path:
+            self.docvecs.vectors_lockf = np_memmap(
+                self.docvecs.mapfile_path + '.vectors_lockf', dtype=REAL, mode='w+', shape=(len(self.docvecs.vectors),)
+            )
+            self.docvecs.vectors_lockf.fill(1.0)
+        else:
+            self.docvecs.vectors_lockf = ones((len(self.docvecs.vectors),), dtype=REAL)  # zeros suppress learning
+
     def reset_from(self, other_model):
         """Copy shareable data structures from another (possibly pre-trained) model.
 
@@ -377,17 +353,17 @@ def reset_from(self, other_model):
         """
         self.wv.vocab = other_model.wv.vocab
         self.wv.index2key = other_model.wv.index2key
-        self.vocabulary.cum_table = other_model.vocabulary.cum_table
+        self.cum_table = other_model.cum_table
         self.corpus_count = other_model.corpus_count
         self.docvecs.vocab = other_model.docvecs.vocab
         self.docvecs.index2key = other_model.docvecs.index2key
-        self.trainables.reset_weights(self.hs, self.negative, self.wv, self.docvecs)
+        self.reset_weights()
 
     def _do_train_epoch(self, corpus_file, thread_id, offset, cython_vocab, thread_private_mem, cur_epoch,
                         total_examples=None, total_words=None, offsets=None, start_doctags=None, **kwargs):
         work, neu1 = thread_private_mem
         doctag_vectors = self.docvecs.vectors
-        doctag_locks = self.trainables.vectors_docs_lockf
+        doctag_locks = self.docvecs.vectors_lockf
 
         offset = offsets[thread_id]
         start_doctag = start_doctags[thread_id]
@@ -433,7 +409,7 @@ def _do_train_job(self, job, alpha, inits):
         for doc in job:
             doctag_indexes = [self.docvecs.get_index(tag) for tag in doc.tags if tag in self.docvecs]
             doctag_vectors = self.docvecs.vectors
-            doctag_locks = self.trainables.vectors_docs_lockf
+            doctag_locks = self.docvecs.vectors_lockf
             if self.sg:
                 tally += train_document_dbow(
                     self, doc.words, doctag_indexes, alpha, work, train_words=self.dbow_words,
@@ -451,9 +427,10 @@ def _do_train_job(self, job, alpha, inits):
                 )
         return tally, self._raw_word_count(job)
 
-    def train(self, documents=None, corpus_file=None, total_examples=None, total_words=None,
+    def train(self, corpus_iterable=None, corpus_file=None, total_examples=None, total_words=None,
               epochs=None, start_alpha=None, end_alpha=None,
-              word_count=0, queue_factor=2, report_delay=1.0, callbacks=()):
+              word_count=0, queue_factor=2, report_delay=1.0, callbacks=(),
+              **kwargs):
         """Update the model's neural weights.
 
         To support linear learning-rate decay from (initial) `alpha` to `min_alpha`, and accurate
@@ -469,7 +446,7 @@ def train(self, documents=None, corpus_file=None, total_examples=None, total_wor
 
         Parameters
         ----------
-        documents : iterable of list of :class:`~gensim.models.doc2vec.TaggedDocument`, optional
+        corpus_iterable : iterable of list of :class:`~gensim.models.doc2vec.TaggedDocument`, optional
             Can be simply a list of elements, but for larger corpora,consider an iterable that streams
             the documents directly from disk/network. If you don't supply `documents` (or `corpus_file`), the model is
             left uninitialized -- use if you plan to initialize it in some other way.
@@ -506,19 +483,17 @@ def train(self, documents=None, corpus_file=None, total_examples=None, total_wor
             List of callbacks that need to be executed/run at specific stages during training.
 
         """
-        kwargs = {}
-
-        if corpus_file is None and documents is None:
+        if corpus_file is None and corpus_iterable is None:
             raise TypeError("Either one of corpus_file or documents value must be provided")
 
-        if corpus_file is not None and documents is not None:
-            raise TypeError("Both corpus_file and documents must not be provided at the same time")
+        if corpus_file is not None and corpus_iterable is not None:
+            raise TypeError("Both corpus_file and corpus_iterable must not be provided at the same time")
 
-        if documents is None and not os.path.isfile(corpus_file):
+        if corpus_iterable is None and not os.path.isfile(corpus_file):
             raise TypeError("Parameter corpus_file must be a valid path to a file, got %r instead" % corpus_file)
 
-        if documents is not None and not isinstance(documents, Iterable):
-            raise TypeError("documents must be an iterable of list, got %r instead" % documents)
+        if corpus_iterable is not None and not isinstance(corpus_iterable, Iterable):
+            raise TypeError("corpus_iterable must be an iterable of TaggedDocument, got %r instead" % corpus_iterable)
 
         if corpus_file is not None:
             # Calculate offsets for each worker along with initial doctags (doctag ~ document/line number in a file)
@@ -527,7 +502,8 @@ def train(self, documents=None, corpus_file=None, total_examples=None, total_wor
             kwargs['start_doctags'] = start_doctags
 
         super(Doc2Vec, self).train(
-            sentences=documents, corpus_file=corpus_file, total_examples=total_examples, total_words=total_words,
+            corpus_iterable=corpus_iterable, corpus_file=corpus_file,
+            total_examples=total_examples, total_words=total_words,
             epochs=epochs, start_alpha=start_alpha, end_alpha=end_alpha, word_count=word_count,
             queue_factor=queue_factor, report_delay=report_delay, callbacks=callbacks, **kwargs)
 
@@ -642,9 +618,9 @@ def infer_vector(self, doc_words, alpha=None, min_alpha=None, epochs=None, steps
 
         doctag_locks = np.ones(1, dtype=REAL)
         doctag_indexes = [0]
-        work = zeros(self.trainables.layer1_size, dtype=REAL)
+        work = zeros(self.layer1_size, dtype=REAL)
         if not self.sg:
-            neu1 = matutils.zeros_aligned(self.trainables.layer1_size, dtype=REAL)
+            neu1 = matutils.zeros_aligned(self.layer1_size, dtype=REAL)
 
         alpha_delta = (alpha - min_alpha) / max(epochs - 1, 1)
 
@@ -721,10 +697,10 @@ def __str__(self):
             segments.append('hs')
         if not self.sg or (self.sg and self.dbow_words):
             segments.append('w%d' % self.window)  # window size, when relevant
-        if self.vocabulary.min_count > 1:
-            segments.append('mc%d' % self.vocabulary.min_count)
-        if self.vocabulary.sample > 0:
-            segments.append('s%g' % self.vocabulary.sample)
+        if self.min_count > 1:
+            segments.append('mc%d' % self.min_count)
+        if self.sample > 0:
+            segments.append('s%g' % self.sample)
         if self.workers > 1:
             segments.append('t%d' % self.workers)
         return '%s(%s)' % (self.__class__.__name__, ','.join(segments))
@@ -788,9 +764,9 @@ def load(cls, *args, **kwargs):
         fname : str
             Path to the saved file.
         *args : object
-            Additional arguments, see `~gensim.models.base_any2vec.BaseWordEmbeddingsModel.load`.
+            Additional arguments, see `~gensim.models.word2vec.Word2Vec.load`.
         **kwargs : object
-            Additional arguments, see `~gensim.models.base_any2vec.BaseWordEmbeddingsModel.load`.
+            Additional arguments, see `~gensim.models.word2vec.Word2Vec.load`.
 
         See Also
         --------
@@ -804,11 +780,13 @@ def load(cls, *args, **kwargs):
 
         """
         try:
-            return super(Doc2Vec, cls).load(*args, **kwargs)
-        except AttributeError:
-            logger.info('Model saved using code from earlier Gensim Version. Re-loading old model in a compatible way.')
-            from gensim.models.deprecated.doc2vec import load_old_doc2vec
-            return load_old_doc2vec(*args, **kwargs)
+            return super(Doc2Vec, cls).load(*args, rethrow=True, **kwargs)
+        except AttributeError as ae:
+            logger.error(
+                "Model load error. Was model saved using code from an older Gensim Version? "
+                "Try loading older model using gensim-3.8.1, then re-saving, to restore "
+                "compatibility with current code.")
+            raise ae
 
     def estimate_memory(self, vocab_size=None, report=None):
         """Estimate required memory for a model using current settings.
@@ -834,8 +812,8 @@ def estimate_memory(self, vocab_size=None, report=None):
         report['doctag_syn0'] = len(self.docvecs) * self.vector_size * dtype(REAL).itemsize
         return super(Doc2Vec, self).estimate_memory(vocab_size, report=report)
 
-    def build_vocab(self, documents=None, corpus_file=None, update=False, progress_per=10000, keep_raw_vocab=False,
-                    trim_rule=None, **kwargs):
+    def build_vocab(self, corpus_iterable=None, corpus_file=None, update=False, progress_per=10000,
+                    keep_raw_vocab=False, trim_rule=None, **kwargs):
         """Build vocabulary from a sequence of documents (can be a once-only generator stream).
 
         Parameters
@@ -873,19 +851,16 @@ def build_vocab(self, documents=None, corpus_file=None, update=False, progress_p
             Additional key word arguments passed to the internal vocabulary construction.
 
         """
-        total_words, corpus_count = self.vocabulary.scan_vocab(
-            documents=documents, corpus_file=corpus_file, docvecs=self.docvecs,
+        total_words, corpus_count = self.scan_vocab(
+            corpus_iterable=corpus_iterable, corpus_file=corpus_file, docvecs=self.docvecs,
             progress_per=progress_per, trim_rule=trim_rule
         )
         self.corpus_count = corpus_count
         self.corpus_total_words = total_words
-        report_values = self.vocabulary.prepare_vocab(
-            self.hs, self.negative, self.wv, update=update, keep_raw_vocab=keep_raw_vocab, trim_rule=trim_rule,
-            **kwargs)
+        report_values = self.prepare_vocab(update=update, keep_raw_vocab=keep_raw_vocab, trim_rule=trim_rule, **kwargs)
 
         report_values['memory'] = self.estimate_memory(vocab_size=report_values['num_retained_words'])
-        self.trainables.prepare_weights(
-            self.hs, self.negative, self.wv, self.docvecs, update=update)
+        self.prepare_weights(update=update)
 
     def build_vocab_from_freq(self, word_freq, keep_raw_vocab=False, corpus_count=None, trim_rule=None, update=False):
         """Build vocabulary from a dictionary of word frequencies.
@@ -930,80 +905,14 @@ def build_vocab_from_freq(self, word_freq, keep_raw_vocab=False, corpus_count=No
 
         # Since no documents are provided, this is to control the corpus_count
         self.corpus_count = corpus_count or 0
-        self.vocabulary.raw_vocab = raw_vocab
+        self.raw_vocab = raw_vocab
 
         # trim by min_count & precalculate downsampling
-        report_values = self.vocabulary.prepare_vocab(
-            self.hs, self.negative, self.wv, keep_raw_vocab=keep_raw_vocab,
-            trim_rule=trim_rule, update=update)
+        report_values = self.prepare_vocab(keep_raw_vocab=keep_raw_vocab, trim_rule=trim_rule, update=update)
         report_values['memory'] = self.estimate_memory(vocab_size=report_values['num_retained_words'])
-        self.trainables.prepare_weights(
-            self.hs, self.negative, self.wv, self.docvecs, update=update)
-
-    def similarity_unseen_docs(self, doc_words1, doc_words2, alpha=None, min_alpha=None, steps=None):
-        """Compute cosine similarity between two post-bulk out of training documents.
-
-        Parameters
-        ----------
-        model : :class:`~gensim.models.doc2vec.Doc2Vec`
-            An instance of a trained `Doc2Vec` model.
-        doc_words1 : list of str
-            Input document.
-        doc_words2 : list of str
-            Input document.
-        alpha : float, optional
-            The initial learning rate.
-        min_alpha : float, optional
-            Learning rate will linearly drop to `min_alpha` as training progresses.
-        steps : int, optional
-            Number of epoch to train the new document.
-
-        Returns
-        -------
-        float
-            The cosine similarity between `doc_words1` and `doc_words2`.
-
-        """
-        d1 = self.infer_vector(doc_words=doc_words1, alpha=alpha, min_alpha=min_alpha, steps=steps)
-        d2 = self.infer_vector(doc_words=doc_words2, alpha=alpha, min_alpha=min_alpha, steps=steps)
-        return np.dot(matutils.unitvec(d1), matutils.unitvec(d2))
-
+        self.prepare_weights(update=update)
 
-class Doc2VecVocab(Word2VecVocab):
-    def __init__(self, max_vocab_size=None, min_count=5, sample=1e-3, sorted_vocab=True, null_word=0, ns_exponent=0.75):
-        """Vocabulary used by :class:`~gensim.models.doc2vec.Doc2Vec`.
-
-        This includes a mapping from words found in the corpus to their total frequency count.
-
-        Parameters
-        ----------
-        max_vocab_size : int, optional
-            Maximum number of words in the Vocabulary. Used to limit the RAM during vocabulary building;
-            if there are more unique words than this, then prune the infrequent ones.
-            Every 10 million word types need about 1GB of RAM, set to `None` for no limit.
-        min_count : int
-            Words with frequency lower than this limit will be discarded from the vocabulary.
-        sample : float, optional
-            The threshold for configuring which higher-frequency words are randomly downsampled,
-            useful range is (0, 1e-5).
-        sorted_vocab : bool
-            If True, sort the vocabulary by descending frequency before assigning word indexes.
-        null_word : {0, 1}
-            If True, a null pseudo-word will be created for padding when using concatenative L1 (run-of-words).
-            This word is only ever input – never predicted – so count, huffman-point, etc doesn't matter.
-        ns_exponent : float, optional
-            The exponent used to shape the negative sampling distribution. A value of 1.0 samples exactly in proportion
-            to the frequencies, 0.0 samples all words equally, while a negative value samples low-frequency words more
-            than high-frequency words. The popular default value of 0.75 was chosen by the original Word2Vec paper.
-            More recently, in https://arxiv.org/abs/1804.04212, Caselles-Dupré, Lesaint, & Royo-Letelier suggest that
-            other values may perform better for recommendation applications.
-
-        """
-        super(Doc2VecVocab, self).__init__(
-            max_vocab_size=max_vocab_size, min_count=min_count, sample=sample,
-            sorted_vocab=sorted_vocab, null_word=null_word, ns_exponent=ns_exponent)
-
-    def _scan_vocab(self, documents, docvecs, progress_per, trim_rule):
+    def _scan_vocab(self, corpus_iterable, progress_per, trim_rule):
         document_no = -1
         total_words = 0
         min_reduce = 1
@@ -1014,7 +923,7 @@ def _scan_vocab(self, documents, docvecs, progress_per, trim_rule):
         max_rawint = -1  # highest raw int tag seen (-1 for none)
         doctags_lookup = {}
         doctags_list = []
-        for document_no, document in enumerate(documents):
+        for document_no, document in enumerate(corpus_iterable):
             if not checked_string_types:
                 if isinstance(document.words, string_types):
                     logger.warning(
@@ -1027,7 +936,7 @@ def _scan_vocab(self, documents, docvecs, progress_per, trim_rule):
                 interval_rate = (total_words - interval_count) / (default_timer() - interval_start)
                 logger.info(
                     "PROGRESS: at example #%i, processed %i words (%i/s), %i word types, %i tags",
-                    document_no, total_words, interval_rate, len(vocab), len(docvecs)
+                    document_no, total_words, interval_rate, len(vocab), len(doctags_list)
                 )
                 interval_start = default_timer()
                 interval_count = total_words
@@ -1053,21 +962,26 @@ def _scan_vocab(self, documents, docvecs, progress_per, trim_rule):
                 utils.prune_vocab(vocab, min_reduce, trim_rule=trim_rule)
                 min_reduce += 1
 
+        corpus_count = document_no + 1
+        if len(doctags_list) > corpus_count:
+            logger.warning("More unique tags (%i) than documents (%i).", len(doctags_list), corpus_count)
+        if max_rawint > corpus_count:
+            logger.warning(
+                "Highest int doctag (%i) larger than count of documents (%i). This means "
+                "at least %i excess, unused slots (%i bytes) will be allocated for vectors.",
+                max_rawint, corpus_count, ((max_rawint - corpus_count) * self.vector_size * 4))
         if max_rawint > -1:
             # adjust indexes/list to account for range of pure-int keyed doctags
             for key in doctags_list:
                 doctags_lookup[key].index = doctags_lookup[key].index + max_rawint + 1
             doctags_list = ConcatList([range(0, max_rawint + 1), doctags_list])
 
-        docvecs.vocab = doctags_lookup
-        docvecs.index2key = doctags_list
-        corpus_count = document_no + 1
-        if len(doctags_list) > corpus_count:
-            logger.warn("More unique tags (%i) than documents (%i).", len(doctags_list), corpus_count)
+        self.docvecs.map = doctags_lookup
+        self.docvecs.index2key = doctags_list
         self.raw_vocab = vocab
         return total_words, corpus_count
 
-    def scan_vocab(self, documents=None, corpus_file=None, docvecs=None, progress_per=10000, trim_rule=None):
+    def scan_vocab(self, corpus_iterable=None, corpus_file=None, docvecs=None, progress_per=10000, trim_rule=None):
         """Create the models Vocabulary: A mapping from unique words in the corpus to their frequency count.
 
         Parameters
@@ -1104,49 +1018,54 @@ def scan_vocab(self, documents=None, corpus_file=None, docvecs=None, progress_pe
         """
         logger.info("collecting all words and their counts")
         if corpus_file is not None:
-            documents = TaggedLineDocument(corpus_file)
+            corpus_iterable = TaggedLineDocument(corpus_file)
 
-        total_words, corpus_count = self._scan_vocab(documents, docvecs, progress_per, trim_rule)
+        total_words, corpus_count = self._scan_vocab(corpus_iterable, progress_per, trim_rule)
 
         logger.info(
             "collected %i word types and %i unique tags from a corpus of %i examples and %i words",
-            len(self.raw_vocab), len(docvecs), corpus_count, total_words
+            len(self.raw_vocab), len(self.docvecs), corpus_count, total_words
         )
 
         return total_words, corpus_count
 
+    def similarity_unseen_docs(self, doc_words1, doc_words2, alpha=None, min_alpha=None, steps=None):
+        """Compute cosine similarity between two post-bulk out of training documents.
 
-class Doc2VecTrainables(Word2VecTrainables):
-    def __init__(self, dm=1, dm_concat=0, dm_tag_count=1, vector_size=100, seed=1, hashfxn=hash, window=5):
-        """Represents the inner shallow neural network used to train :class:`~gensim.models.doc2vec.Doc2Vec`."""
-        super(Doc2VecTrainables, self).__init__(
-            vector_size=vector_size, seed=seed, hashfxn=hashfxn)
-        if dm and dm_concat:
-            self.layer1_size = (dm_tag_count + (2 * window)) * vector_size
-            logger.info("using concatenative %d-dimensional layer1", self.layer1_size)
+        Parameters
+        ----------
+        model : :class:`~gensim.models.doc2vec.Doc2Vec`
+            An instance of a trained `Doc2Vec` model.
+        doc_words1 : list of str
+            Input document.
+        doc_words2 : list of str
+            Input document.
+        alpha : float, optional
+            The initial learning rate.
+        min_alpha : float, optional
+            Learning rate will linearly drop to `min_alpha` as training progresses.
+        steps : int, optional
+            Number of epoch to train the new document.
 
-    def prepare_weights(self, hs, negative, wv, docvecs, update=False):
-        """Build tables and model weights based on final vocabulary settings."""
-        # set initial input/projection and hidden weights
-        if not update:
-            self.reset_weights(hs, negative, wv, docvecs)
-        else:
-            self.update_weights(hs, negative, wv)
-
-    def reset_weights(self, hs, negative, wv, docvecs, vocabulary=None):
-        super(Doc2VecTrainables, self).reset_weights(hs, negative, wv)
-        self.reset_doc_weights(docvecs)
-
-    def reset_doc_weights(self, docvecs):
-        docvecs.resize_vectors()
-        docvecs.randomly_initialize_vectors()
-        if docvecs.mapfile_path:
-            self.vectors_docs_lockf = np_memmap(
-                docvecs.mapfile_path + '.vectors_docs_lockf', dtype=REAL, mode='w+', shape=(len(docvecs.vectors),)
-            )
-            self.vectors_docs_lockf.fill(1.0)
-        else:
-            self.vectors_docs_lockf = ones((len(docvecs.vectors),), dtype=REAL)  # zeros suppress learning
+        Returns
+        -------
+        float
+            The cosine similarity between `doc_words1` and `doc_words2`.
+
+        """
+        d1 = self.infer_vector(doc_words=doc_words1, alpha=alpha, min_alpha=min_alpha, steps=steps)
+        d2 = self.infer_vector(doc_words=doc_words2, alpha=alpha, min_alpha=min_alpha, steps=steps)
+        return np.dot(matutils.unitvec(d1), matutils.unitvec(d2))
+
+
+class Doc2VecVocab(utils.SaveLoad):
+    """Obsolete class retained for now as load-compatibility state capture"""
+    pass
+
+
+class Doc2VecTrainables(utils.SaveLoad):
+    """Obsolete class retained for now as load-compatibility state capture"""
+    pass
 
 
 class TaggedBrownCorpus(object):
diff --git a/gensim/models/doc2vec_inner.pyx b/gensim/models/doc2vec_inner.pyx
index 8d9ca4862f..e06aa00a35 100644
--- a/gensim/models/doc2vec_inner.pyx
+++ b/gensim/models/doc2vec_inner.pyx
@@ -225,14 +225,14 @@ cdef init_d2v_config(Doc2VecConfig *c, model, alpha, learn_doctags, learn_words,
                      doctag_locks=None, docvecs_count=0):
     c[0].hs = model.hs
     c[0].negative = model.negative
-    c[0].sample = (model.vocabulary.sample != 0)
+    c[0].sample = (model.sample != 0)
     c[0].cbow_mean = model.cbow_mean
     c[0].train_words = train_words
     c[0].learn_doctags = learn_doctags
     c[0].learn_words = learn_words
     c[0].learn_hidden = learn_hidden
     c[0].alpha = alpha
-    c[0].layer1_size = model.trainables.layer1_size
+    c[0].layer1_size = model.layer1_size
     c[0].vector_size = model.docvecs.vector_size
     c[0].workers = model.workers
     c[0].docvecs_count = docvecs_count
@@ -251,28 +251,28 @@ cdef init_d2v_config(Doc2VecConfig *c, model, alpha, learn_doctags, learn_words,
        doctag_vectors = model.docvecs.vectors_docs
     c[0].doctag_vectors = <REAL_t *>(np.PyArray_DATA(doctag_vectors))
     if word_locks is None:
-       word_locks = model.trainables.vectors_lockf
+       word_locks = model.wv.vectors_lockf
     c[0].word_locks = <REAL_t *>(np.PyArray_DATA(word_locks))
     if doctag_locks is None:
-       doctag_locks = model.trainables.vectors_docs_lockf
+       doctag_locks = model.docvecs.vectors_lockf
     c[0].doctag_locks = <REAL_t *>(np.PyArray_DATA(doctag_locks))
 
     if c[0].hs:
-        c[0].syn1 = <REAL_t *>(np.PyArray_DATA(model.trainables.syn1))
+        c[0].syn1 = <REAL_t *>(np.PyArray_DATA(model.syn1))
 
     if c[0].negative:
-        c[0].syn1neg = <REAL_t *>(np.PyArray_DATA(model.trainables.syn1neg))
-        c[0].cum_table = <np.uint32_t *>(np.PyArray_DATA(model.vocabulary.cum_table))
-        c[0].cum_table_len = len(model.vocabulary.cum_table)
+        c[0].syn1neg = <REAL_t *>(np.PyArray_DATA(model.syn1neg))
+        c[0].cum_table = <np.uint32_t *>(np.PyArray_DATA(model.cum_table))
+        c[0].cum_table_len = len(model.cum_table)
     if c[0].negative or c[0].sample:
         c[0].next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24)
 
     # convert Python structures to primitive types, so we can release the GIL
     if work is None:
-       work = zeros(model.trainables.layer1_size, dtype=REAL)
+       work = zeros(model.layer1_size, dtype=REAL)
     c[0].work = <REAL_t *>np.PyArray_DATA(work)
     if neu1 is None:
-       neu1 = zeros(model.trainables.layer1_size, dtype=REAL)
+       neu1 = zeros(model.layer1_size, dtype=REAL)
     c[0].neu1 = <REAL_t *>np.PyArray_DATA(neu1)
 
 
diff --git a/gensim/models/fasttext.py b/gensim/models/fasttext.py
index 7f0c482362..dd299ec964 100644
--- a/gensim/models/fasttext.py
+++ b/gensim/models/fasttext.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
-# Authors: Shiva Manne <manneshiva@gmail.com>, Chinmaya Pancholi <chinmayapancholi13@gmail.com>
+# Authors: Gensim Contributors
 # Copyright (C) 2018 RaRe Technologies s.r.o.
 # Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html
 
@@ -37,7 +37,7 @@
     ['human', 'interface', 'computer']
     >>> print(len(common_texts))
     9
-    >>> model = FastText(size=4, window=3, min_count=1)  # instantiate
+    >>> model = FastText(vector_size=4, window=3, min_count=1)  # instantiate
     >>> model.build_vocab(sentences=common_texts)
     >>> model.train(sentences=common_texts, total_examples=len(common_texts), epochs=10)  # train
 
@@ -50,7 +50,7 @@
 
 .. sourcecode:: pycon
 
-    >>> model2 = FastText(size=4, window=3, min_count=1, sentences=common_texts, iter=10)
+    >>> model2 = FastText(vector_size=4, window=3, min_count=1, sentences=common_texts, iter=10)
 
 .. Important::
     This style of initialize-and-train in a single line is **deprecated**. We include it here
@@ -84,7 +84,7 @@
     >>> from gensim.test.utils import datapath
     >>>
     >>> corpus_file = datapath('lee_background.cor')  # absolute path to corpus
-    >>> model3 = FastText(size=4, window=3, min_count=1)
+    >>> model3 = FastText(vector_size=4, window=3, min_count=1)
     >>> model3.build_vocab(corpus_file=corpus_file)  # scan over corpus to build the vocabulary
     >>>
     >>> total_words = model3.corpus_total_words  # number of words in the corpus
@@ -116,7 +116,7 @@
     ...                 yield list(tokenize(line))
     >>>
     >>>
-    >>> model4 = FastText(size=4, window=3, min_count=1)
+    >>> model4 = FastText(vector_size=4, window=3, min_count=1)
     >>> model4.build_vocab(sentences=MyIter())
     >>> total_examples = model4.corpus_count
     >>> model4.train(sentences=MyIter(), total_examples=total_examples, epochs=5)
@@ -258,10 +258,7 @@
 
 - :mod:`gensim.models.fasttext`: This module. Contains FastText-specific functionality only.
 - :mod:`gensim.models.keyedvectors`: Implements generic functionality.
-- :mod:`gensim.models.word2vec`: Contains implementations for the vocabulary
-  and the trainables for FastText.
-- :mod:`gensim.models.base_any2vec`: Contains implementations for the base.
-  classes, including functionality such as callbacks, logging.
+- :mod:`gensim.models.word2vec`: Provides much of the basic scan & train framework.
 - :mod:`gensim.utils`: Implements model I/O (loading and saving).
 
 Our implementation relies heavily on inheritance.
@@ -288,9 +285,8 @@
 
 import gensim.models._fasttext_bin
 
-from gensim.models.word2vec import Word2VecVocab, Word2VecTrainables
+from gensim.models.word2vec import Word2Vec
 from gensim.models.keyedvectors import KeyedVectors
-from gensim.models.base_any2vec import BaseWordEmbeddingsModel
 from gensim.utils import deprecated, call_on_class_only, open, NO_CYTHON
 
 
@@ -312,10 +308,11 @@
     raise NO_CYTHON
 
 
-class FastText(BaseWordEmbeddingsModel):
-    def __init__(self, sentences=None, corpus_file=None, sg=0, hs=0, size=100, alpha=0.025, window=5, min_count=5,
+class FastText(Word2Vec):
+    def __init__(self, sentences=None, corpus_file=None, sg=0, hs=0, vector_size=100, alpha=0.025,
+                 window=5, min_count=5,
                  max_vocab_size=None, word_ngrams=1, sample=1e-3, seed=1, workers=3, min_alpha=0.0001,
-                 negative=5, ns_exponent=0.75, cbow_mean=1, hashfxn=hash, iter=5, null_word=0, min_n=3, max_n=6,
+                 negative=5, ns_exponent=0.75, cbow_mean=1, hashfxn=hash, epochs=5, null_word=0, min_n=3, max_n=6,
                  sorted_vocab=1, bucket=2000000, trim_rule=None, batch_words=MAX_WORDS_IN_BATCH, callbacks=(),
                  compatible_hash=True):
         """Train, use and evaluate word representations learned using the method
@@ -470,27 +467,62 @@ def __init__(self, sentences=None, corpus_file=None, sg=0, hs=0, size=100, alpha
         if self.word_ngrams <= 1 and max_n == 0:
             bucket = 0
 
-        self.wv = FastTextKeyedVectors(size, min_n, max_n, bucket, compatible_hash)
-        self.vocabulary = FastTextVocab(
-            max_vocab_size=max_vocab_size, min_count=min_count, sample=sample,
-            sorted_vocab=bool(sorted_vocab), null_word=null_word, ns_exponent=ns_exponent)
-        self.trainables = FastTextTrainables(vector_size=size, seed=seed, bucket=bucket, hashfxn=hashfxn)
-        self.trainables.prepare_weights(hs, negative, self.wv, update=False, vocabulary=self.vocabulary)
-        self.wv.bucket = self.trainables.bucket
+        self.wv = FastTextKeyedVectors(vector_size, min_n, max_n, bucket, compatible_hash)
+        self.bucket = bucket
+        self.wv.bucket = bucket
 
         super(FastText, self).__init__(
-            sentences=sentences, corpus_file=corpus_file, workers=workers, vector_size=size, epochs=iter,
+            sentences=sentences, corpus_file=corpus_file, workers=workers, vector_size=vector_size, epochs=epochs,
             callbacks=callbacks, batch_words=batch_words, trim_rule=trim_rule, sg=sg, alpha=alpha, window=window,
+            max_vocab_size=max_vocab_size, min_count=min_count, sample=sample, sorted_vocab=sorted_vocab,
+            null_word=null_word, ns_exponent=ns_exponent, hashfxn=hashfxn,
             seed=seed, hs=hs, negative=negative, cbow_mean=cbow_mean, min_alpha=min_alpha)
 
-    def build_vocab(self, sentences=None, corpus_file=None, update=False, progress_per=10000, keep_raw_vocab=False,
-                    trim_rule=None, **kwargs):
+    def prepare_weights(self, update=False):
+        """In addition to superclass allocations, compute ngrams of all words present in vocabulary.
+
+        Parameters
+        ----------
+        update : bool
+            If True, the new vocab words and their new ngrams word vectors are initialized
+            with random uniform distribution and updated/added to the existing vocab word and ngram vectors.
+        """
+        super(FastText, self).prepare_weights(update=update)
+        if not update:
+            self.wv.init_ngrams_weights(self.seed)
+            self.wv.vectors_vocab_lockf = ones(len(self.wv.vectors_vocab), dtype=REAL)
+            self.wv.vectors_ngrams_lockf = ones(len(self.wv.vectors_ngrams), dtype=REAL)
+        else:
+            self.wv.update_ngrams_weights(self.seed, self.old_vocab_len)
+            self.wv.vectors_vocab_lockf = _pad_ones(self.wv.vectors_vocab_lockf, len(self.wv.vectors_vocab))
+            self.wv.vectors_ngrams_lockf = _pad_ones(self.wv.vectors_ngrams_lockf, len(self.wv.vectors_ngrams))
+
+    def init_post_load(self, hidden_output):
+        num_vectors = len(self.wv.vectors)
+        vocab_size = len(self.wv.vocab)
+        vector_size = self.wv.vector_size
+
+        assert num_vectors > 0, 'expected num_vectors to be initialized already'
+        assert vocab_size > 0, 'expected vocab_size to be initialized already'
+
+        self.wv.vectors_ngrams_lockf = ones(len(self.wv.vectors_ngrams), dtype=REAL)
+        self.wv.vectors_vocab_lockf = ones(len(self.wv.vectors_vocab.shape), dtype=REAL)
+
+        if self.hs:
+            self.syn1 = hidden_output
+        if self.negative:
+            self.syn1neg = hidden_output
+
+        self.layer1_size = vector_size
+
+    def build_vocab(self, corpus_iterable=None, corpus_file=None, update=False, progress_per=10000,
+                    keep_raw_vocab=False, trim_rule=None, **kwargs):
         """Build vocabulary from a sequence of sentences (can be a once-only generator stream).
         Each sentence must be a list of unicode strings.
 
         Parameters
         ----------
-        sentences : iterable of list of str, optional
+        corpus_iterable : iterable of list of str, optional
             Can be simply a list of lists of tokens, but for larger corpora,
             consider an iterable that streams the sentences directly from disk/network.
             See :class:`~gensim.models.word2vec.BrownCorpus`, :class:`~gensim.models.word2vec.Text8Corpus`
@@ -521,7 +553,7 @@ def build_vocab(self, sentences=None, corpus_file=None, update=False, progress_p
 
         **kwargs
             Additional key word parameters passed to
-            :meth:`~gensim.models.base_any2vec.BaseWordEmbeddingsModel.build_vocab`.
+            :meth:`~gensim.models.word2vec.Word2Vec.build_vocab`.
 
         Examples
         --------
@@ -542,7 +574,7 @@ def build_vocab(self, sentences=None, corpus_file=None, update=False, progress_p
 
         """
         if not update:
-            self.wv.init_ngrams_weights(self.trainables.seed)
+            self.wv.init_ngrams_weights(self.seed)
         elif not len(self.wv.vocab):
             raise RuntimeError(
                 "You cannot do an online vocabulary-update of a model which has no prior vocabulary. "
@@ -551,43 +583,30 @@ def build_vocab(self, sentences=None, corpus_file=None, update=False, progress_p
                 "before doing an online update."
             )
         else:
-            self.vocabulary.old_vocab_len = len(self.wv.vocab)
+            self.old_vocab_len = len(self.wv.vocab)
 
         retval = super(FastText, self).build_vocab(
-            sentences=sentences, corpus_file=corpus_file, update=update, progress_per=progress_per,
+            corpus_iterable=corpus_iterable, corpus_file=corpus_file, update=update, progress_per=progress_per,
             keep_raw_vocab=keep_raw_vocab, trim_rule=trim_rule, **kwargs)
 
         if update:
-            self.wv.update_ngrams_weights(self.trainables.seed, self.vocabulary.old_vocab_len)
+            self.wv.update_ngrams_weights(self.seed, self.old_vocab_len)
 
         return retval
 
-    def _set_train_params(self, **kwargs):
-        #
-        # We need the wv.buckets_word member to be initialized in order to
-        # continue training. The _clear_post_train method destroys this
-        # variable, so we reinitialize it here, if needed.
-        #
-        # The .old_vocab_len member is set only to keep the init_ngrams_weights method happy.
-        #
-        if self.wv.buckets_word is None:
-            self.vocabulary.old_vocab_len = len(self.wv.vocab)
-            self.trainables.init_ngrams_weights(self.wv, update=True, vocabulary=self.vocabulary)
-
     def _clear_post_train(self):
         """Clear the model's internal structures after training has finished to free up RAM."""
         self.wv.vectors_norm = None
-        self.wv.buckets_word = None
         self.wv.adjust_vectors()  # ensure composite-word vecs reflect latest training
 
     def estimate_memory(self, vocab_size=None, report=None):
         vocab_size = vocab_size or len(self.wv.vocab)
         vec_size = self.vector_size * np.dtype(np.float32).itemsize
-        l1_size = self.trainables.layer1_size * np.dtype(np.float32).itemsize
+        l1_size = self.layer1_size * np.dtype(np.float32).itemsize
         report = report or {}
         report['vocab'] = len(self.wv.vocab) * (700 if self.hs else 500)
         report['syn0_vocab'] = len(self.wv.vocab) * vec_size
-        num_buckets = self.trainables.bucket
+        num_buckets = self.bucket
         if self.hs:
             report['syn1'] = len(self.wv.vocab) * l1_size
         if self.negative:
@@ -595,7 +614,7 @@ def estimate_memory(self, vocab_size=None, report=None):
         if self.word_ngrams > 0 and self.wv.vocab:
             num_buckets = num_ngrams = 0
 
-            if self.trainables.bucket:
+            if self.bucket:
                 buckets = set()
                 num_ngrams = 0
                 for word in self.wv.vocab:
@@ -603,7 +622,7 @@ def estimate_memory(self, vocab_size=None, report=None):
                         word,
                         self.wv.min_n,
                         self.wv.max_n,
-                        self.trainables.bucket,
+                        self.bucket,
                         self.wv.compatible_hash
                     )
                     num_ngrams += len(hashes)
@@ -669,7 +688,7 @@ def _do_train_job(self, sentences, alpha, inits):
 
         return tally, self._raw_word_count(sentences)
 
-    def train(self, sentences=None, corpus_file=None, total_examples=None, total_words=None,
+    def train(self, corpus_iterable=None, corpus_file=None, total_examples=None, total_words=None,
               epochs=None, start_alpha=None, end_alpha=None,
               word_count=0, queue_factor=2, report_delay=1.0, callbacks=(), **kwargs):
         """Update the model's neural weights from a sequence of sentences (can be a once-only generator stream).
@@ -736,20 +755,26 @@ def train(self, sentences=None, corpus_file=None, total_examples=None, total_wor
 
         """
 
-        if corpus_file is None and sentences is None:
-            raise TypeError("Either one of corpus_file or sentences value must be provided")
+        if corpus_file is None and corpus_iterable is None:
+            raise TypeError("Either one of corpus_file or corpus_iterable value must be provided")
 
-        if corpus_file is not None and sentences is not None:
-            raise TypeError("Both corpus_file and sentences must not be provided at the same time")
+        if corpus_file is not None and corpus_iterable is not None:
+            raise TypeError("Both corpus_file and corpus_iterable must not be provided at the same time")
 
-        if sentences is None and not os.path.isfile(corpus_file):
+        if corpus_iterable is None and not os.path.isfile(corpus_file):
             raise TypeError("Parameter corpus_file must be a valid path to a file, got %r instead" % corpus_file)
 
-        if sentences is not None and not isinstance(sentences, Iterable):
-            raise TypeError("sentences must be an iterable of list, got %r instead" % sentences)
+        if corpus_iterable is not None and not isinstance(corpus_iterable, Iterable):
+            raise TypeError("sentences must be an iterable of list, got %r instead" % corpus_iterable)
+
+        if self.wv.buckets_word is None:
+            logger.warn("self.wv.buckets_word was None; fixing.")
+            self.old_vocab_len = len(self.wv.vocab)
+            self.wv.init_ngrams_weights(seed=self.seed)
 
         super(FastText, self).train(
-            sentences=sentences, corpus_file=corpus_file, total_examples=total_examples, total_words=total_words,
+            corpus_iterable=corpus_iterable, corpus_file=corpus_file,
+            total_examples=total_examples, total_words=total_words,
             epochs=epochs, start_alpha=start_alpha, end_alpha=end_alpha, word_count=word_count,
             queue_factor=queue_factor, report_delay=report_delay, callbacks=callbacks)
         self.wv.adjust_vectors()
@@ -767,8 +792,8 @@ def init_sims(self, replace=False):
         # init_sims() resides in KeyedVectors because it deals with input layer mainly, but because the
         # hidden layer is not an attribute of KeyedVectors, it has to be deleted in this class.
         # The normalizing of input layer happens inside of KeyedVectors.
-        if replace and hasattr(self.trainables, 'syn1'):
-            del self.trainables.syn1
+        if replace and hasattr(self, 'syn1'):
+            del self.syn1
         self.wv.init_sims(replace)
 
     def clear_sims(self):
@@ -850,141 +875,36 @@ def load(cls, *args, **kwargs):
             Save :class:`~gensim.models.fasttext.FastText` model.
 
         """
-        try:
-            model = super(FastText, cls).load(*args, **kwargs)
-
-            if not hasattr(model.trainables, 'vectors_vocab_lockf') and hasattr(model.wv, 'vectors_vocab'):
-                model.trainables.vectors_vocab_lockf = ones(len(model.wv.vectors_vocab), dtype=REAL)
-            if not hasattr(model.trainables, 'vectors_ngrams_lockf') and hasattr(model.wv, 'vectors_ngrams'):
-                model.trainables.vectors_ngrams_lockf = ones(len(model.wv.vectors_ngrams), dtype=REAL)
-
-            # fixup mistakenly overdimensioned gensim-3.x lockf arrays
-            if len(model.trainables.vectors_vocab_lockf.shape) > 1:
-                model.trainables.vectors_vocab_lockf = model.trainables.vectors_vocab_lockf[:, 0]
-            if len(model.trainables.vectors_ngrams_lockf.shape) > 1:
-                model.trainables.vectors_ngrams_lockf = model.trainables.vectors_ngrams_lockf[:, 0]
-
-            if not hasattr(model.wv, 'bucket'):
-                model.wv.bucket = model.trainables.bucket
-        except AttributeError:
-            logger.info('Model saved using code from earlier Gensim Version. Re-loading old model in a compatible way.')
-            from gensim.models.deprecated.fasttext import load_old_fasttext
-            model = load_old_fasttext(*args, **kwargs)
+        model = super(FastText, cls).load(*args, rethrow=True, **kwargs)
+
+        if not hasattr(model.wv, 'vectors_vocab_lockf') and hasattr(model.wv, 'vectors_vocab'):
+            # TODO: try trainables-location
+            model.wv.vectors_vocab_lockf = ones(len(model.wv.vectors_vocab), dtype=REAL)
+        if not hasattr(model, 'vectors_ngrams_lockf') and hasattr(model.wv, 'vectors_ngrams'):
+            # TODO: try trainables-location
+            model.wv.vectors_ngrams_lockf = ones(len(model.wv.vectors_ngrams), dtype=REAL)
+        # fixup mistakenly overdimensioned gensim-3.x lockf arrays
+        if len(model.wv.vectors_vocab_lockf.shape) > 1:
+            model.wv.vectors_vocab_lockf = model.wv.vectors_vocab_lockf[:, 0]
+        if len(model.wv.vectors_ngrams_lockf.shape) > 1:
+            model.wv.vectors_ngrams_lockf = model.wv.vectors_ngrams_lockf[:, 0]
+        if not hasattr(model, 'bucket'):
+            model.bucket = model.wv.bucket
 
         _try_upgrade(model.wv)
 
         return model
 
 
-class FastTextVocab(Word2VecVocab):
+class FastTextVocab(utils.SaveLoad):
     """This is a redundant class. It exists only to maintain backwards compatibility
     with older gensim versions."""
     pass
 
 
-class FastTextTrainables(Word2VecTrainables):
-    """Represents the inner shallow neural network used to train :class:`~gensim.models.fasttext.FastText`.
-
-    Mostly inherits from its parent (:class:`~gensim.models.word2vec.Word2VecTrainables`).
-    Adds logic for calculating and maintaining ngram weights.
-
-    Attributes
-    ----------
-    hashfxn : function
-        Used for randomly initializing weights. Defaults to the built-in hash()
-    layer1_size : int
-        The size of the inner layer of the NN. Equal to the vector dimensionality.
-        Set in the :class:`~gensim.models.word2vec.Word2VecTrainables` constructor.
-    seed : float
-        The random generator seed used in reset_weights and update_weights.
-    syn1 : numpy.array
-        The inner layer of the NN. Each row corresponds to a term in the vocabulary.
-        Columns correspond to weights of the inner layer.
-        There are layer1_size such weights.
-        Set in the reset_weights and update_weights methods, only if hierarchical sampling is used.
-    syn1neg : numpy.array
-        Similar to syn1, but only set if negative sampling is used.
-    vectors_lockf : numpy.array
-        A one-dimensional array with one element for each term in the vocab. Set in reset_weights to an array of ones.
-    vectors_vocab_lockf : numpy.array
-        Similar to vectors_vocab_lockf, ones(len(model.trainables.vectors), dtype=REAL)
-    vectors_ngrams_lockf : numpy.array
-        np.ones((self.bucket, wv.vector_size), dtype=REAL)
-
-    """
-    def __init__(self, vector_size=100, seed=1, hashfxn=hash, bucket=2000000):
-        super(FastTextTrainables, self).__init__(
-            vector_size=vector_size, seed=seed, hashfxn=hashfxn)
-        self.bucket = int(bucket)
-
-        #
-        # There are also two "hidden" attributes that get initialized outside
-        # this constructor:
-        #
-        #   1. vectors_vocab_lockf
-        #   2. vectors_ngrams_lockf
-        #
-        # These are both 1D matrices of shapes equal to the lengths of
-        # wv.vectors_vocab and wv.vectors_ngrams. So, each row corresponds to
-        # a vector.
-        #
-        # Lockf stands for "lock factor": zero values suppress learning, one
-        # values enable it. The vectors_vocab_lockf and vectors_ngrams_lockf
-        # are used only by the Cython code in fasttext_inner.pyx.
-        #
-        # The word2vec implementation also uses vectors_lockf: in that case,
-        # it's a 1D array, with a real number for each vector. The FastText
-        # implementation inherits this vectors_lockf attribute but doesn't
-        # appear to use it.
-        #
-
-    def prepare_weights(self, hs, negative, wv, update=False, vocabulary=None):
-        super(FastTextTrainables, self).prepare_weights(hs, negative, wv, update=update, vocabulary=vocabulary)
-        self.init_ngrams_weights(wv, update=update, vocabulary=vocabulary)
-
-    def init_ngrams_weights(self, wv, update=False, vocabulary=None):
-        """Compute ngrams of all words present in vocabulary and stores vectors for only those ngrams.
-        Vectors for other ngrams are initialized with a random uniform distribution in FastText.
-
-        Parameters
-        ----------
-        wv : :class:`~gensim.models.fasttext.FastTextKeyedVectors`
-            Contains the mapping between the words and embeddings.
-            The vectors for the computed ngrams will go here.
-        update : bool
-            If True, the new vocab words and their new ngrams word vectors are initialized
-            with random uniform distribution and updated/added to the existing vocab word and ngram vectors.
-        vocabulary : :class:`~gensim.models.fasttext.FastTextVocab`
-            This object represents the vocabulary of the model.
-            If update is True, then vocabulary may not be None.
-
-        """
-        if not update:
-            wv.init_ngrams_weights(self.seed)
-            self.vectors_vocab_lockf = ones(len(wv.vectors_vocab), dtype=REAL)
-            self.vectors_ngrams_lockf = ones(len(wv.vectors_ngrams), dtype=REAL)
-        else:
-            wv.update_ngrams_weights(self.seed, vocabulary.old_vocab_len)
-            self.vectors_vocab_lockf = _pad_ones(self.vectors_vocab_lockf, len(wv.vectors_vocab))
-            self.vectors_ngrams_lockf = _pad_ones(self.vectors_ngrams_lockf, len(wv.vectors_ngrams))
-
-    def init_post_load(self, model, hidden_output):
-        num_vectors = len(model.wv.vectors)
-        vocab_size = len(model.wv.vocab)
-        vector_size = model.wv.vector_size
-
-        assert num_vectors > 0, 'expected num_vectors to be initialized already'
-        assert vocab_size > 0, 'expected vocab_size to be initialized already'
-
-        self.vectors_ngrams_lockf = ones(len(model.wv.vectors_ngrams), dtype=REAL)
-        self.vectors_vocab_lockf = ones(len(model.wv.vectors_vocab.shape), dtype=REAL)
-
-        if model.hs:
-            self.syn1 = hidden_output
-        if model.negative:
-            self.syn1neg = hidden_output
-
-        self.layer1_size = vector_size
+class FastTextTrainables(utils.SaveLoad):
+    """Obsolete class retained for backward-compatible load()s"""
+    pass
 
 
 def _pad_ones(m, new_len):
@@ -1113,8 +1033,8 @@ def load_facebook_vectors(path, encoding='utf-8'):
     model training.
 
     """
-    model_wrapper = _load_fasttext_format(path, encoding=encoding, full_model=False)
-    return model_wrapper.wv
+    full_model = _load_fasttext_format(path, encoding=encoding, full_model=False)
+    return full_model.wv
 
 
 def _load_fasttext_format(model_file, encoding='utf-8', full_model=True):
@@ -1140,9 +1060,9 @@ def _load_fasttext_format(model_file, encoding='utf-8', full_model=True):
         m = gensim.models._fasttext_bin.load(fin, encoding=encoding, full_model=full_model)
 
     model = FastText(
-        size=m.dim,
+        vector_size=m.dim,
         window=m.ws,
-        iter=m.epoch,
+        epochs=m.epoch,
         negative=m.neg,
         hs=int(m.loss == 1),
         sg=int(m.model == 2),
@@ -1153,9 +1073,9 @@ def _load_fasttext_format(model_file, encoding='utf-8', full_model=True):
         max_n=m.maxn,
     )
     model.corpus_total_words = m.ntokens
-    model.vocabulary.raw_vocab = m.raw_vocab
-    model.vocabulary.nwords = m.nwords
-    model.vocabulary.vocab_size = m.vocab_size
+    model.raw_vocab = m.raw_vocab
+    model.nwords = m.nwords
+    model.vocab_size = m.vocab_size
 
     #
     # This is here to fix https://github.com/RaRe-Technologies/gensim/pull/2373.
@@ -1169,15 +1089,13 @@ def _load_fasttext_format(model_file, encoding='utf-8', full_model=True):
     # Native models trained _without_ pretrained vectors already contain the
     # trimmed raw_vocab, so this change does not affect them.
     #
-    model.vocabulary.prepare_vocab(
-        model.hs, model.negative, model.wv,
-        update=True, min_count=1,
-    )
+    model.prepare_vocab(update=True, min_count=1)
 
     model.num_original_vectors = m.vectors_ngrams.shape[0]
 
     model.wv.init_post_load(m.vectors_ngrams)
-    model.trainables.init_post_load(model, m.hidden_output)
+    model.init_post_load(m.hidden_output)
+
     _check_model(model)
 
     logger.info("loaded %s weight matrix for fastText model from %s", m.vectors_ngrams.shape, fin.name)
@@ -1192,28 +1110,22 @@ def _check_model(m):
         'mismatch between vector size in model params ({}) and model vectors ({})'
         .format(m.wv.vector_size, m.wv.vectors_ngrams)
     )
-
-    try:
-        syn1neg = m.trainables.syn1neg
-    except AttributeError:
-        syn1neg = None
-
-    if syn1neg is not None:
-        assert m.wv.vector_size == m.trainables.syn1neg.shape[1], (
+    if m.syn1neg is not None:
+        assert m.wv.vector_size == m.syn1neg.shape[1], (
             'mismatch between vector size in model params ({}) and trainables ({})'
             .format(m.wv.vector_size, m.wv.vectors_ngrams)
         )
 
-    assert len(m.wv.vocab) == m.vocabulary.nwords, (
+    assert len(m.wv.vocab) == m.nwords, (
         'mismatch between final vocab size ({} words), '
-        'and expected number of words ({} words)'.format(len(m.wv.vocab), m.vocabulary.nwords)
+        'and expected number of words ({} words)'.format(len(m.wv.vocab), m.nwords)
     )
 
-    if len(m.wv.vocab) != m.vocabulary.vocab_size:
+    if len(m.wv.vocab) != m.vocab_size:
         # expecting to log this warning only for pretrained french vector, wiki.fr
         logger.warning(
             "mismatch between final vocab size (%s words), and expected vocab size (%s words)",
-            len(m.wv.vocab), m.vocabulary.vocab_size
+            len(m.wv.vocab), m.vocab_size
         )
 
 
@@ -1524,7 +1436,6 @@ def init_post_load(self, fb_vectors):
         self.vectors_vocab = np.array(fb_vectors[:vocab_words, :])
         self.vectors_ngrams = np.array(fb_vectors[vocab_words:, :])
         self.buckets_word = None  # This can get initialized later
-
         self.adjust_vectors()  # calculate composite full-word vectors
 
     def adjust_vectors(self):
diff --git a/gensim/models/fasttext_inner.pyx b/gensim/models/fasttext_inner.pyx
index c2794d7d11..0702729c90 100644
--- a/gensim/models/fasttext_inner.pyx
+++ b/gensim/models/fasttext_inner.pyx
@@ -454,26 +454,26 @@ cdef void init_ft_config(FastTextConfig *c, model, alpha, _work, _neu1):
     """
     c.hs = model.hs
     c.negative = model.negative
-    c.sample = (model.vocabulary.sample != 0)
+    c.sample = (model.sample != 0)
     c.cbow_mean = model.cbow_mean
     c.window = model.window
     c.workers = model.workers
 
     c.syn0_vocab = <REAL_t *>(np.PyArray_DATA(model.wv.vectors_vocab))
-    c.word_locks_vocab = <REAL_t *>(np.PyArray_DATA(model.trainables.vectors_vocab_lockf))
+    c.word_locks_vocab = <REAL_t *>(np.PyArray_DATA(model.wv.vectors_vocab_lockf))
     c.syn0_ngrams = <REAL_t *>(np.PyArray_DATA(model.wv.vectors_ngrams))
-    c.word_locks_ngrams = <REAL_t *>(np.PyArray_DATA(model.trainables.vectors_ngrams_lockf))
+    c.word_locks_ngrams = <REAL_t *>(np.PyArray_DATA(model.wv.vectors_ngrams_lockf))
 
     c.alpha = alpha
     c.size = model.wv.vector_size
 
     if c.hs:
-        c.syn1 = <REAL_t *>(np.PyArray_DATA(model.trainables.syn1))
+        c.syn1 = <REAL_t *>(np.PyArray_DATA(model.syn1))
 
     if c.negative:
-        c.syn1neg = <REAL_t *>(np.PyArray_DATA(model.trainables.syn1neg))
-        c.cum_table = <np.uint32_t *>(np.PyArray_DATA(model.vocabulary.cum_table))
-        c.cum_table_len = len(model.vocabulary.cum_table)
+        c.syn1neg = <REAL_t *>(np.PyArray_DATA(model.syn1neg))
+        c.cum_table = <np.uint32_t *>(np.PyArray_DATA(model.cum_table))
+        c.cum_table_len = len(model.cum_table)
     if c.negative or c.sample:
         c.next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24)
 
diff --git a/gensim/models/keyedvectors.py b/gensim/models/keyedvectors.py
index d4882778d0..68fc62858e 100644
--- a/gensim/models/keyedvectors.py
+++ b/gensim/models/keyedvectors.py
@@ -187,9 +187,6 @@
 from six.moves import zip, range
 from scipy import stats
 
-# For backwards compatibility, see https://github.com/RaRe-Technologies/gensim/issues/2201
-#
-from gensim.models.deprecated.keyedvectors import EuclideanKeyedVectors  # noqa
 
 logger = logging.getLogger(__name__)
 
@@ -220,6 +217,11 @@ def _load_specials(self, *args, **kwargs):
         # fixup rename/consolidation into index2key of older index2word, index2entity
         if not hasattr(self, 'index2key'):
             self.index2key = self.__dict__.pop('index2word', self.__dict__.pop('index2word', None))
+        # fixup rename into vectors of older syn0
+        if not hasattr(self, 'vectors'):
+            self.vectors = self.__dict__.pop('syn0', None)
+            self.vectors_norm = None
+            self.vector_size = self.vectors.shape[1]
         # fixup rename of vocab into map
         if 'map' not in self.__dict__:
             self.map = self.__dict__.pop('vocab', None)
@@ -1383,6 +1385,7 @@ def similarity_unseen_docs(self, *args, **kwargs):
 # to help 3.8.1 & older pickles load properly
 Word2VecKeyedVectors = KeyedVectors
 Doc2VecKeyedVectors = KeyedVectors
+EuclideanKeyedVectors = KeyedVectors
 
 
 def _l2_norm(m, replace=False):
diff --git a/gensim/models/word2vec.py b/gensim/models/word2vec.py
index b6a6c8c2d6..5432059ec4 100755
--- a/gensim/models/word2vec.py
+++ b/gensim/models/word2vec.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
-# Author: Shiva Manne <manneshiva@gmail.com>
+# Author: Gensim Contributors
 # Copyright (C) 2018 RaRe Technologies s.r.o.
 # Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html
 
@@ -129,13 +129,13 @@
 from collections import defaultdict, namedtuple
 from dataclasses import dataclass
 from typing import List
+from types import GeneratorType
 import threading
 import itertools
-import warnings
+import copy
 
 from gensim.utils import keep_vocab_item, call_on_class_only, deprecated
 from gensim.models.keyedvectors import KeyedVectors, pseudorandom_weak_vector
-from gensim.models.base_any2vec import BaseWordEmbeddingsModel
 
 try:
     from queue import Queue, Empty
@@ -145,6 +145,7 @@
 from numpy import exp, dot, zeros, dtype, float32 as REAL,\
     uint32, seterr, array, uint8, vstack, fromstring, sqrt,\
     sum as np_sum, ones, logaddexp
+import numpy as np
 
 from gensim import utils, matutils  # utility fnc for pickling, common scipy operations etc
 from six import iteritems, itervalues, string_types
@@ -228,12 +229,12 @@ def score_cbow_pair(model, word, l1):
     return sum(lprob)
 
 
-class Word2Vec(BaseWordEmbeddingsModel):
-    def __init__(self, sentences=None, corpus_file=None, size=100, alpha=0.025, window=5, min_count=5,
+class Word2Vec(utils.SaveLoad):
+    def __init__(self, sentences=None, corpus_file=None, vector_size=100, alpha=0.025, window=5, min_count=5,
                  max_vocab_size=None, sample=1e-3, seed=1, workers=3, min_alpha=0.0001,
-                 sg=0, hs=0, negative=5, ns_exponent=0.75, cbow_mean=1, hashfxn=hash, iter=5, null_word=0,
+                 sg=0, hs=0, negative=5, ns_exponent=0.75, cbow_mean=1, hashfxn=hash, epochs=5, null_word=0,
                  trim_rule=None, sorted_vocab=1, batch_words=MAX_WORDS_IN_BATCH, compute_loss=False, callbacks=(),
-                 max_final_vocab=None):
+                 comment=None, max_final_vocab=None):
         """Train, use and evaluate neural networks described in https://code.google.com/p/word2vec/.
 
         Once you're finished training a model (=no more updates, only querying)
@@ -262,7 +263,7 @@ def __init__(self, sentences=None, corpus_file=None, size=100, alpha=0.025, wind
             Path to a corpus file in :class:`~gensim.models.word2vec.LineSentence` format.
             You may use this argument instead of `sentences` to get performance boost. Only one of `sentences` or
             `corpus_file` arguments need to be passed (or none of them, in that case, the model is left uninitialized).
-        size : int, optional
+        vector_size : int, optional
             Dimensionality of the word vectors.
         window : int, optional
             Maximum distance between the current and predicted word within a sentence.
@@ -310,8 +311,8 @@ def __init__(self, sentences=None, corpus_file=None, size=100, alpha=0.025, wind
             useful range is (0, 1e-5).
         hashfxn : function, optional
             Hash function to use to randomly initialize weights, for increased training reproducibility.
-        iter : int, optional
-            Number of iterations (epochs) over the corpus.
+        epochs : int, optional
+            Number of iterations (epochs) over the corpus. (Formerly: `iter`)
         trim_rule : function, optional
             Vocabulary trimming rule, specifies whether certain words should remain in the vocabulary,
             be trimmed away, or handled using the default (discard if word count < min_count).
@@ -342,48 +343,516 @@ def __init__(self, sentences=None, corpus_file=None, size=100, alpha=0.025, wind
         --------
         Initialize and train a :class:`~gensim.models.word2vec.Word2Vec` model
 
-        .. sourcecode:: pycon
+        .. sourcecode:: pycon
+
+            >>> from gensim.models import Word2Vec
+            >>> sentences = [["cat", "say", "meow"], ["dog", "say", "woof"]]
+            >>> model = Word2Vec(sentences, min_count=1)
+
+        Attributes
+        ----------
+        wv : :class:`~gensim.models.keyedvectors.KeyedVectors`
+            This object essentially contains the mapping between words and embeddings. After training, it can be used
+            directly to query those embeddings in various ways. See the module level docstring for examples.
+
+        """
+        corpus_iterable = sentences
+
+        self.vector_size = int(vector_size)
+        self.workers = int(workers)
+        self.epochs = epochs
+        self.train_count = 0
+        self.total_train_time = 0
+        self.batch_words = batch_words
+
+        self.sg = int(sg)
+        self.alpha = float(alpha)
+        self.min_alpha = float(min_alpha)
+
+        self.window = int(window)
+        self.random = np.random.RandomState(seed)
+
+        self.hs = int(hs)
+        self.negative = int(negative)
+        self.ns_exponent = ns_exponent
+        self.cbow_mean = int(cbow_mean)
+        self.compute_loss = bool(compute_loss)
+        self.running_training_loss = 0
+        self.min_alpha_yet_reached = float(alpha)
+        self.corpus_count = 0
+        self.corpus_total_words = 0
+
+        self.max_final_vocab = max_final_vocab
+        self.max_vocab_size = max_vocab_size
+        self.min_count = min_count
+        self.sample = sample
+        self.sorted_vocab = sorted_vocab
+        self.null_word = null_word
+        self.cum_table = None  # for negative sampling
+        self.raw_vocab = None
+
+        if not hasattr(self, 'wv'):  # set unless subclass already set (eg: FastText)
+            self.wv = KeyedVectors(vector_size)
+
+        self.hashfxn = hashfxn
+        self.seed = seed
+        if not hasattr(self, 'layer1_size'):  # set unless subclass already set (as for Doc2Vec dm_concat mode)
+            self.layer1_size = vector_size
+
+        self.comment = comment
+
+        self.load = call_on_class_only
+
+        if corpus_iterable is not None or corpus_file is not None:
+            self.build_vocab_and_train(corpus_iterable=corpus_iterable, corpus_file=corpus_file,
+                                       trim_rule=trim_rule, callbacks=callbacks)
+        else:
+            if trim_rule is not None:
+                logger.warning(
+                    "The rule, if given, is only used to prune vocabulary during build_vocab() "
+                    "and is not stored as part of the model. Model initialized without sentences. "
+                    "trim_rule provided, if any, will be ignored.")
+            if callbacks:
+                logger.warning(
+                    "Callbacks are no longer retained by the model, so must be provided whenever "
+                    "training is triggered, as in initialization with a corpus or calling `train()`. "
+                    "The callbacks provided in this initialization without triggering train will "
+                    "be ignored.")
+
+    def build_vocab_and_train(self, corpus_iterable=None, corpus_file=None, trim_rule=None, callbacks=None):
+        if not (corpus_iterable is None) ^ (corpus_file is None):
+            raise ValueError("You must provide only one of corpus_iterable or corpus_file arguments.")
+        if corpus_file is not None and not isinstance(corpus_file, string_types):
+            raise TypeError("You must pass string as the corpus_file argument.")
+        elif isinstance(corpus_iterable, GeneratorType):
+            raise TypeError("You can't pass a generator as the sentences argument. Try a sequence.")
+        # TODO: test for restartable?
+        self.build_vocab(corpus_iterable=corpus_iterable, corpus_file=corpus_file, trim_rule=trim_rule)
+        self.train(
+            corpus_iterable=corpus_iterable, corpus_file=corpus_file, total_examples=self.corpus_count,
+            total_words=self.corpus_total_words, epochs=self.epochs, start_alpha=self.alpha,
+            end_alpha=self.min_alpha, compute_loss=self.compute_loss, callbacks=callbacks)
+
+    def build_vocab(self, corpus_iterable=None, corpus_file=None, update=False, progress_per=10000,
+                    keep_raw_vocab=False, trim_rule=None, **kwargs):
+        """Build vocabulary from a sequence of sentences (can be a once-only generator stream).
+
+        Parameters
+        ----------
+        corpus_iterable : iterable of list of str
+            Can be simply a list of lists of tokens, but for larger corpora,
+            consider an iterable that streams the sentences directly from disk/network.
+            See :class:`~gensim.models.word2vec.BrownCorpus`, :class:`~gensim.models.word2vec.Text8Corpus`
+            or :class:`~gensim.models.word2vec.LineSentence` module for such examples.
+        corpus_file : str, optional
+            Path to a corpus file in :class:`~gensim.models.word2vec.LineSentence` format.
+            You may use this argument instead of `sentences` to get performance boost. Only one of `sentences` or
+            `corpus_file` arguments need to be passed (not both of them).
+        update : bool
+            If true, the new words in `sentences` will be added to model's vocab.
+        progress_per : int, optional
+            Indicates how many words to process before showing/updating the progress.
+        keep_raw_vocab : bool, optional
+            If False, the raw vocabulary will be deleted after the scaling is done to free up RAM.
+        trim_rule : function, optional
+            Vocabulary trimming rule, specifies whether certain words should remain in the vocabulary,
+            be trimmed away, or handled using the default (discard if word count < min_count).
+            Can be None (min_count will be used, look to :func:`~gensim.utils.keep_vocab_item`),
+            or a callable that accepts parameters (word, count, min_count) and returns either
+            :attr:`gensim.utils.RULE_DISCARD`, :attr:`gensim.utils.RULE_KEEP` or :attr:`gensim.utils.RULE_DEFAULT`.
+            The rule, if given, is only used to prune vocabulary during current method call and is not stored as part
+            of the model.
+
+            The input parameters are of the following types:
+                * `word` (str) - the word we are examining
+                * `count` (int) - the word's frequency count in the corpus
+                * `min_count` (int) - the minimum count threshold.
+
+        **kwargs : object
+            Key word arguments propagated to `self.prepare_vocab`
+
+        """
+        total_words, corpus_count = self.scan_vocab(
+            corpus_iterable=corpus_iterable, corpus_file=corpus_file, progress_per=progress_per, trim_rule=trim_rule)
+        self.corpus_count = corpus_count
+        self.corpus_total_words = total_words
+        report_values = self.prepare_vocab(update=update, keep_raw_vocab=keep_raw_vocab, trim_rule=trim_rule, **kwargs)
+        report_values['memory'] = self.estimate_memory(vocab_size=report_values['num_retained_words'])
+        self.prepare_weights(update=update)
+
+    def build_vocab_from_freq(self, word_freq, keep_raw_vocab=False, corpus_count=None, trim_rule=None, update=False):
+        """Build vocabulary from a dictionary of word frequencies.
+
+        Parameters
+        ----------
+        word_freq : dict of (str, int)
+            A mapping from a word in the vocabulary to its frequency count.
+        keep_raw_vocab : bool, optional
+            If False, delete the raw vocabulary after the scaling is done to free up RAM.
+        corpus_count : int, optional
+            Even if no corpus is provided, this argument can set corpus_count explicitly.
+        trim_rule : function, optional
+            Vocabulary trimming rule, specifies whether certain words should remain in the vocabulary,
+            be trimmed away, or handled using the default (discard if word count < min_count).
+            Can be None (min_count will be used, look to :func:`~gensim.utils.keep_vocab_item`),
+            or a callable that accepts parameters (word, count, min_count) and returns either
+            :attr:`gensim.utils.RULE_DISCARD`, :attr:`gensim.utils.RULE_KEEP` or :attr:`gensim.utils.RULE_DEFAULT`.
+            The rule, if given, is only used to prune vocabulary during current method call and is not stored as part
+            of the model.
+
+            The input parameters are of the following types:
+                * `word` (str) - the word we are examining
+                * `count` (int) - the word's frequency count in the corpus
+                * `min_count` (int) - the minimum count threshold.
+
+        update : bool, optional
+            If true, the new provided words in `word_freq` dict will be added to model's vocab.
+
+        """
+        logger.info("Processing provided word frequencies")
+        # Instead of scanning text, this will assign provided word frequencies dictionary(word_freq)
+        # to be directly the raw vocab
+        raw_vocab = word_freq
+        logger.info(
+            "collected %i different raw word, with total frequency of %i",
+            len(raw_vocab), sum(itervalues(raw_vocab))
+        )
+
+        # Since no sentences are provided, this is to control the corpus_count.
+        self.corpus_count = corpus_count or 0
+        self.raw_vocab = raw_vocab
+
+        # trim by min_count & precalculate downsampling
+        report_values = self.prepare_vocab(keep_raw_vocab=keep_raw_vocab, trim_rule=trim_rule, update=update)
+        report_values['memory'] = self.estimate_memory(vocab_size=report_values['num_retained_words'])
+        self.prepare_weights(update=update)  # build tables & arrays
+
+    def _scan_vocab(self, sentences, progress_per, trim_rule):
+        sentence_no = -1
+        total_words = 0
+        min_reduce = 1
+        vocab = defaultdict(int)
+        checked_string_types = 0
+        for sentence_no, sentence in enumerate(sentences):
+            if not checked_string_types:
+                if isinstance(sentence, string_types):
+                    logger.warning(
+                        "Each 'sentences' item should be a list of words (usually unicode strings). "
+                        "First item here is instead plain %s.",
+                        type(sentence)
+                    )
+                checked_string_types += 1
+            if sentence_no % progress_per == 0:
+                logger.info(
+                    "PROGRESS: at sentence #%i, processed %i words, keeping %i word types",
+                    sentence_no, total_words, len(vocab)
+                )
+            for word in sentence:
+                vocab[word] += 1
+            total_words += len(sentence)
+
+            if self.max_vocab_size and len(vocab) > self.max_vocab_size:
+                utils.prune_vocab(vocab, min_reduce, trim_rule=trim_rule)
+                min_reduce += 1
+
+        corpus_count = sentence_no + 1
+        self.raw_vocab = vocab
+        return total_words, corpus_count
+
+    def scan_vocab(self, corpus_iterable=None, corpus_file=None, progress_per=10000, workers=None, trim_rule=None):
+        logger.info("collecting all words and their counts")
+        if corpus_file:
+            corpus_iterable = LineSentence(corpus_file)
+
+        total_words, corpus_count = self._scan_vocab(corpus_iterable, progress_per, trim_rule)
+
+        logger.info(
+            "collected %i word types from a corpus of %i raw words and %i sentences",
+            len(self.raw_vocab), total_words, corpus_count
+        )
+
+        return total_words, corpus_count
+
+    def sort_vocab(self):
+        """Sort the vocabulary so the most frequent words have the lowest indexes."""
+        if len(self.wv.vectors):
+            raise RuntimeError("cannot sort vocabulary after model weights already initialized.")
+        self.wv.index2key.sort(key=lambda word: self.wv.vocab[word].count, reverse=True)
+        for i, word in enumerate(self.wv.index2key):
+            self.wv.vocab[word].index = i
+
+    def prepare_vocab(
+            self, update=False, keep_raw_vocab=False, trim_rule=None,
+            min_count=None, sample=None, dry_run=False):
+        """Apply vocabulary settings for `min_count` (discarding less-frequent words)
+        and `sample` (controlling the downsampling of more-frequent words).
+
+        Calling with `dry_run=True` will only simulate the provided settings and
+        report the size of the retained vocabulary, effective corpus length, and
+        estimated memory requirements. Results are both printed via logging and
+        returned as a dict.
+
+        Delete the raw vocabulary after the scaling is done to free up RAM,
+        unless `keep_raw_vocab` is set.
+
+        """
+        min_count = min_count or self.min_count
+        sample = sample or self.sample
+        drop_total = drop_unique = 0
+
+        # set effective_min_count to min_count in case max_final_vocab isn't set
+        self.effective_min_count = min_count
+
+        # if max_final_vocab is specified instead of min_count
+        # pick a min_count which satisfies max_final_vocab as well as possible
+        if self.max_final_vocab is not None:
+            sorted_vocab = sorted(self.raw_vocab.keys(), key=lambda word: self.raw_vocab[word], reverse=True)
+            calc_min_count = 1
+
+            if self.max_final_vocab < len(sorted_vocab):
+                calc_min_count = self.raw_vocab[sorted_vocab[self.max_final_vocab]] + 1
+
+            self.effective_min_count = max(calc_min_count, min_count)
+            logger.info(
+                "max_final_vocab=%d and min_count=%d resulted in calc_min_count=%d, effective_min_count=%d",
+                self.max_final_vocab, min_count, calc_min_count, self.effective_min_count
+            )
+
+        if not update:
+            logger.info("Loading a fresh vocabulary")
+            retain_total, retain_words = 0, []
+            # Discard words less-frequent than min_count
+            if not dry_run:
+                self.wv.index2key = []
+                # make stored settings match these applied settings
+                self.min_count = min_count
+                self.sample = sample
+                self.wv.vocab = {}
+
+            for word, v in iteritems(self.raw_vocab):
+                if keep_vocab_item(word, v, self.effective_min_count, trim_rule=trim_rule):
+                    retain_words.append(word)
+                    retain_total += v
+                    if not dry_run:
+                        self.wv.vocab[word] = W2VVocab(count=v, index=len(self.wv.index2key))
+                        self.wv.index2key.append(word)
+                else:
+                    drop_unique += 1
+                    drop_total += v
+            original_unique_total = len(retain_words) + drop_unique
+            retain_unique_pct = len(retain_words) * 100 / max(original_unique_total, 1)
+            logger.info(
+                "effective_min_count=%d retains %i unique words (%i%% of original %i, drops %i)",
+                self.effective_min_count, len(retain_words), retain_unique_pct, original_unique_total, drop_unique
+            )
+            original_total = retain_total + drop_total
+            retain_pct = retain_total * 100 / max(original_total, 1)
+            logger.info(
+                "effective_min_count=%d leaves %i word corpus (%i%% of original %i, drops %i)",
+                self.effective_min_count, retain_total, retain_pct, original_total, drop_total
+            )
+        else:
+            logger.info("Updating model with new vocabulary")
+            new_total = pre_exist_total = 0
+            new_words = pre_exist_words = []
+            for word, v in iteritems(self.raw_vocab):
+                if keep_vocab_item(word, v, self.effective_min_count, trim_rule=trim_rule):
+                    if word in self.wv.vocab:
+                        pre_exist_words.append(word)
+                        pre_exist_total += v
+                        if not dry_run:
+                            self.wv.vocab[word].count += v
+                    else:
+                        new_words.append(word)
+                        new_total += v
+                        if not dry_run:
+                            self.wv.vocab[word] = W2VVocab(count=v, index=len(self.wv.index2key))
+                            self.wv.index2key.append(word)
+                else:
+                    drop_unique += 1
+                    drop_total += v
+            original_unique_total = len(pre_exist_words) + len(new_words) + drop_unique
+            pre_exist_unique_pct = len(pre_exist_words) * 100 / max(original_unique_total, 1)
+            new_unique_pct = len(new_words) * 100 / max(original_unique_total, 1)
+            logger.info(
+                "New added %i unique words (%i%% of original %i) "
+                "and increased the count of %i pre-existing words (%i%% of original %i)",
+                len(new_words), new_unique_pct, original_unique_total, len(pre_exist_words),
+                pre_exist_unique_pct, original_unique_total
+            )
+            retain_words = new_words + pre_exist_words
+            retain_total = new_total + pre_exist_total
+
+        # Precalculate each vocabulary item's threshold for sampling
+        if not sample:
+            # no words downsampled
+            threshold_count = retain_total
+        elif sample < 1.0:
+            # traditional meaning: set parameter as proportion of total
+            threshold_count = sample * retain_total
+        else:
+            # new shorthand: sample >= 1 means downsample all words with higher count than sample
+            threshold_count = int(sample * (3 + sqrt(5)) / 2)
+
+        downsample_total, downsample_unique = 0, 0
+        for w in retain_words:
+            v = self.raw_vocab[w]
+            word_probability = (sqrt(v / threshold_count) + 1) * (threshold_count / v)
+            if word_probability < 1.0:
+                downsample_unique += 1
+                downsample_total += word_probability * v
+            else:
+                word_probability = 1.0
+                downsample_total += v
+            if not dry_run:
+                self.wv.vocab[w].sample_int = int(round(word_probability * 2**32))
+
+        if not dry_run and not keep_raw_vocab:
+            logger.info("deleting the raw counts dictionary of %i items", len(self.raw_vocab))
+            self.raw_vocab = defaultdict(int)
+
+        logger.info("sample=%g downsamples %i most-common words", sample, downsample_unique)
+        logger.info(
+            "downsampling leaves estimated %i word corpus (%.1f%% of prior %i)",
+            downsample_total, downsample_total * 100.0 / max(retain_total, 1), retain_total
+        )
+
+        # return from each step: words-affected, resulting-corpus-size, extra memory estimates
+        report_values = {
+            'drop_unique': drop_unique, 'retain_total': retain_total, 'downsample_unique': downsample_unique,
+            'downsample_total': int(downsample_total), 'num_retained_words': len(retain_words)
+        }
+
+        if self.null_word:
+            # create null pseudo-word for padding when using concatenative L1 (run-of-words)
+            # this word is only ever input – never predicted – so count, huffman-point, etc doesn't matter
+            self.add_null_word()
+
+        if self.sorted_vocab and not update:
+            self.sort_vocab()
+        if self.hs:
+            # add info about each word's Huffman encoding
+            self.create_binary_tree()
+        if self.negative:
+            # build the table for drawing random words (for negative sampling)
+            self.make_cum_table()
+
+        return report_values
+
+    def estimate_memory(self, vocab_size=None, report=None):
+        """Estimate required memory for a model using current settings and provided vocabulary size.
+
+        Parameters
+        ----------
+        vocab_size : int, optional
+            Number of unique tokens in the vocabulary
+        report : dict of (str, int), optional
+            A dictionary from string representations of the model's memory consuming members to their size in bytes.
+
+        Returns
+        -------
+        dict of (str, int)
+            A dictionary from string representations of the model's memory consuming members to their size in bytes.
+
+        """
+        vocab_size = vocab_size or len(self.wv.vocab)
+        report = report or {}
+        report['vocab'] = vocab_size * (700 if self.hs else 500)
+        report['vectors'] = vocab_size * self.vector_size * dtype(REAL).itemsize
+        if self.hs:
+            report['syn1'] = vocab_size * self.layer1_size * dtype(REAL).itemsize
+        if self.negative:
+            report['syn1neg'] = vocab_size * self.layer1_size * dtype(REAL).itemsize
+        report['total'] = sum(report.values())
+        logger.info(
+            "estimated required memory for %i words and %i dimensions: %i bytes",
+            vocab_size, self.vector_size, report['total']
+        )
+        return report
+
+    def add_null_word(self):
+        word, v = '\0', W2VVocab(count=1, sample_int=0)
+        v.index = len(self.wv.vocab)
+        self.wv.index2key.append(word)
+        self.wv.vocab[word] = v
+
+    def create_binary_tree(self):
+        """Create a `binary Huffman tree <https://en.wikipedia.org/wiki/Huffman_coding>`_ using stored vocabulary
+        word counts. Frequent words will have shorter binary codes.
+        Called internally from :meth:`~gensim.models.word2vec.Word2VecVocab.build_vocab`.
+
+        """
+        _assign_binary_codes(self.wv.vocab)
+
+    def make_cum_table(self, domain=2**31 - 1):
+        """Create a cumulative-distribution table using stored vocabulary word counts for
+        drawing random words in the negative-sampling training routines.
+
+        To draw a word index, choose a random integer up to the maximum value in the table (cum_table[-1]),
+        then finding that integer's sorted insertion point (as if by `bisect_left` or `ndarray.searchsorted()`).
+        That insertion point is the drawn index, coming up in proportion equal to the increment at that slot.
 
-            >>> from gensim.models import Word2Vec
-            >>> sentences = [["cat", "say", "meow"], ["dog", "say", "woof"]]
-            >>> model = Word2Vec(sentences, min_count=1)
+        """
+        vocab_size = len(self.wv.index2key)
+        self.cum_table = zeros(vocab_size, dtype=uint32)
+        # compute sum of all power (Z in paper)
+        train_words_pow = 0.0
+        for word_index in range(vocab_size):
+            train_words_pow += self.wv.vocab[self.wv.index2key[word_index]].count**self.ns_exponent
+        cumulative = 0.0
+        for word_index in range(vocab_size):
+            cumulative += self.wv.vocab[self.wv.index2key[word_index]].count**self.ns_exponent
+            self.cum_table[word_index] = round(cumulative / train_words_pow * domain)
+        if len(self.cum_table) > 0:
+            assert self.cum_table[-1] == domain
 
-        Some important attributes are the following:
+    def prepare_weights(self, update=False):
+        """Build tables and model weights based on final vocabulary settings."""
+        # set initial input/projection and hidden weights
+        if not update:
+            self.reset_weights()
+        else:
+            self.update_weights()
 
-        Attributes
-        ----------
-        wv : :class:`~gensim.models.keyedvectors.KeyedVectors`
-            This object essentially contains the mapping between words and embeddings. After training, it can be used
-            directly to query those embeddings in various ways. See the module level docstring for examples.
+    @deprecated("Use gensim.models.keyedvectors.pseudorandom_weak_vector() directly")
+    def seeded_vector(self, seed_string, vector_size):
+        return pseudorandom_weak_vector(vector_size, seed_string=seed_string, hashfxn=self.hashfxn)
 
-        vocabulary : :class:`~gensim.models.word2vec.Word2VecVocab`
-            This object represents the vocabulary (sometimes called Dictionary in gensim) of the model.
-            Besides keeping track of all unique words, this object provides extra functionality, such as
-            constructing a huffman tree (frequent words are closer to the root), or discarding extremely rare words.
+    def reset_weights(self):
+        """Reset all projection weights to an initial (untrained) state, but keep the existing vocabulary."""
+        logger.info("resetting layer weights")
+        self.wv.resize_vectors()
+        self.wv.randomly_initialize_vectors(seed=self.seed)
+        if self.hs:
+            self.syn1 = zeros((len(self.wv.vocab), self.layer1_size), dtype=REAL)
+        if self.negative:
+            self.syn1neg = zeros((len(self.wv.vocab), self.layer1_size), dtype=REAL)
 
-        trainables : :class:`~gensim.models.word2vec.Word2VecTrainables`
-            This object represents the inner shallow neural network used to train the embeddings. The semantics
-            of the network differ slightly in the two available training modes (CBOW or SG) but you can think of it
-            as a NN with single projection and hidden layer which we train on the corpus. The weights are then used
-            as our embeddings (which means that the size of the hidden layer is equal to the number of features
-            `self.size`).
+        self.wv.vectors_lockf = ones(len(self.wv.vocab), dtype=REAL)  # zeros suppress learning
 
-        """
-        self.max_final_vocab = max_final_vocab
+    def update_weights(self):
+        """Copy all the existing weights, and reset the weights for the newly added vocabulary."""
+        logger.info("updating layer weights")
+        new_range = self.wv.resize_vectors()
+        gained_vocab = len(new_range)
+        self.wv.randomly_initialize_vectors(indexes=new_range)
 
-        self.callbacks = callbacks
-        self.load = call_on_class_only
+        # Raise an error if an online update is run before initial training on a corpus
+        if not len(self.wv.vectors):
+            raise RuntimeError(
+                "You cannot do an online vocabulary-update of a model which has no prior vocabulary. "
+                "First build the vocabulary of your model with a corpus before doing an online update."
+            )
 
-        self.wv = KeyedVectors(size)
-        self.vocabulary = Word2VecVocab(
-            max_vocab_size=max_vocab_size, min_count=min_count, sample=sample, sorted_vocab=bool(sorted_vocab),
-            null_word=null_word, max_final_vocab=max_final_vocab, ns_exponent=ns_exponent)
-        self.trainables = Word2VecTrainables(seed=seed, vector_size=size, hashfxn=hashfxn)
+        if self.hs:
+            self.syn1 = vstack([self.syn1, zeros((gained_vocab, self.layer1_size), dtype=REAL)])
+        if self.negative:
+            pad = zeros((gained_vocab, self.layer1_size), dtype=REAL)
+            self.syn1neg = vstack([self.syn1neg, pad])
+        self.wv.vectors_norm = None
 
-        super(Word2Vec, self).__init__(
-            sentences=sentences, corpus_file=corpus_file, workers=workers, vector_size=size, epochs=iter,
-            callbacks=callbacks, batch_words=batch_words, trim_rule=trim_rule, sg=sg, alpha=alpha, window=window,
-            seed=seed, hs=hs, negative=negative, cbow_mean=cbow_mean, min_alpha=min_alpha, compute_loss=compute_loss)
+        # do not suppress learning for already learned words
+        self.wv.vectors_lockf = ones(len(self.wv.vocab), dtype=REAL)  # zeros suppress learning
 
     def _do_train_epoch(self, corpus_file, thread_id, offset, cython_vocab, thread_private_mem, cur_epoch,
                         total_examples=None, total_words=None, **kwargs):
@@ -428,14 +897,10 @@ def _clear_post_train(self):
         """Remove all L2-normalized word vectors from the model."""
         self.wv.vectors_norm = None
 
-    def _set_train_params(self, **kwargs):
-        if 'compute_loss' in kwargs:
-            self.compute_loss = kwargs['compute_loss']
-        self.running_training_loss = 0
-
-    def train(self, sentences=None, corpus_file=None, total_examples=None, total_words=None,
+    def train(self, corpus_iterable=None, corpus_file=None, total_examples=None, total_words=None,
               epochs=None, start_alpha=None, end_alpha=None, word_count=0,
-              queue_factor=2, report_delay=1.0, compute_loss=False, callbacks=()):
+              queue_factor=2, report_delay=1.0, compute_loss=False, callbacks=(),
+              **kwargs):
         """Update the model's neural weights from a sequence of sentences.
 
         Notes
@@ -454,63 +919,699 @@ def train(self, sentences=None, corpus_file=None, total_examples=None, total_wor
 
         Parameters
         ----------
-        sentences : iterable of list of str
-            The `sentences` iterable can be simply a list of lists of tokens, but for larger corpora,
-            consider an iterable that streams the sentences directly from disk/network.
-            See :class:`~gensim.models.word2vec.BrownCorpus`, :class:`~gensim.models.word2vec.Text8Corpus`
-            or :class:`~gensim.models.word2vec.LineSentence` in :mod:`~gensim.models.word2vec` module for such examples.
-            See also the `tutorial on data streaming in Python
-            <https://rare-technologies.com/data-streaming-in-python-generators-iterators-iterables/>`_.
-        corpus_file : str, optional
-            Path to a corpus file in :class:`~gensim.models.word2vec.LineSentence` format.
-            You may use this argument instead of `sentences` to get performance boost. Only one of `sentences` or
-            `corpus_file` arguments need to be passed (not both of them).
+        corpus_iterable : iterable of list of str
+            The `sentences` iterable can be simply a list of lists of tokens, but for larger corpora,
+            consider an iterable that streams the sentences directly from disk/network.
+            See :class:`~gensim.models.word2vec.BrownCorpus`, :class:`~gensim.models.word2vec.Text8Corpus`
+            or :class:`~gensim.models.word2vec.LineSentence` in :mod:`~gensim.models.word2vec` module for such examples.
+            See also the `tutorial on data streaming in Python
+            <https://rare-technologies.com/data-streaming-in-python-generators-iterators-iterables/>`_.
+        corpus_file : str, optional
+            Path to a corpus file in :class:`~gensim.models.word2vec.LineSentence` format.
+            You may use this argument instead of `sentences` to get performance boost. Only one of `sentences` or
+            `corpus_file` arguments need to be passed (not both of them).
+        total_examples : int
+            Count of sentences.
+        total_words : int
+            Count of raw words in sentences.
+        epochs : int
+            Number of iterations (epochs) over the corpus.
+        start_alpha : float, optional
+            Initial learning rate. If supplied, replaces the starting `alpha` from the constructor,
+            for this one call to`train()`.
+            Use only if making multiple calls to `train()`, when you want to manage the alpha learning-rate yourself
+            (not recommended).
+        end_alpha : float, optional
+            Final learning rate. Drops linearly from `start_alpha`.
+            If supplied, this replaces the final `min_alpha` from the constructor, for this one call to `train()`.
+            Use only if making multiple calls to `train()`, when you want to manage the alpha learning-rate yourself
+            (not recommended).
+        word_count : int, optional
+            Count of words already trained. Set this to 0 for the usual
+            case of training on all words in sentences.
+        queue_factor : int, optional
+            Multiplier for size of queue (number of workers * queue_factor).
+        report_delay : float, optional
+            Seconds to wait before reporting progress.
+        compute_loss: bool, optional
+            If True, computes and stores loss value which can be retrieved using
+            :meth:`~gensim.models.word2vec.Word2Vec.get_latest_training_loss`.
+        callbacks : iterable of :class:`~gensim.models.callbacks.CallbackAny2Vec`, optional
+            Sequence of callbacks to be executed at specific stages during training.
+
+        Examples
+        --------
+        .. sourcecode:: pycon
+
+            >>> from gensim.models import Word2Vec
+            >>> sentences = [["cat", "say", "meow"], ["dog", "say", "woof"]]
+            >>>
+            >>> model = Word2Vec(min_count=1)
+            >>> model.build_vocab(sentences)  # prepare the model vocabulary
+            >>> model.train(sentences, total_examples=model.corpus_count, epochs=model.iter)  # train word vectors
+            (1, 30)
+
+        """
+        self.alpha = start_alpha or self.alpha
+        self.min_alpha = end_alpha or self.min_alpha
+        self.epochs = epochs
+
+        self._check_training_sanity(
+            epochs=epochs,
+            total_examples=total_examples,
+            total_words=total_words)
+
+        self.compute_loss = compute_loss
+        self.running_training_loss = 0.0
+
+        for callback in callbacks:
+            callback.on_train_begin(self)
+
+        trained_word_count = 0
+        raw_word_count = 0
+        start = default_timer() - 0.00001
+        job_tally = 0
+
+        for cur_epoch in range(self.epochs):
+            for callback in callbacks:
+                callback.on_epoch_begin(self)
+
+            if corpus_iterable is not None:
+                trained_word_count_epoch, raw_word_count_epoch, job_tally_epoch = self._train_epoch(
+                    corpus_iterable, cur_epoch=cur_epoch, total_examples=total_examples,
+                    total_words=total_words, queue_factor=queue_factor, report_delay=report_delay,
+                    callbacks=callbacks, **kwargs)
+            else:
+                trained_word_count_epoch, raw_word_count_epoch, job_tally_epoch = self._train_epoch_corpusfile(
+                    corpus_file, cur_epoch=cur_epoch, total_examples=total_examples, total_words=total_words,
+                    callbacks=callbacks, **kwargs)
+
+            trained_word_count += trained_word_count_epoch
+            raw_word_count += raw_word_count_epoch
+            job_tally += job_tally_epoch
+
+            for callback in callbacks:
+                callback.on_epoch_end(self)
+
+        # Log overall time
+        total_elapsed = default_timer() - start
+        self._log_train_end(raw_word_count, trained_word_count, total_elapsed, job_tally)
+
+        self.train_count += 1  # number of times train() has been called
+        self._clear_post_train()
+
+        for callback in callbacks:
+            callback.on_train_end(self)
+        return trained_word_count, raw_word_count
+
+    def _worker_loop_corpusfile(self, corpus_file, thread_id, offset, cython_vocab, progress_queue, cur_epoch=0,
+                                total_examples=None, total_words=None, **kwargs):
+        """Train the model on a `corpus_file` in LineSentence format.
+
+        This function will be called in parallel by multiple workers (threads or processes) to make
+        optimal use of multicore machines.
+
+        Parameters
+        ----------
+        corpus_file : str
+            Path to a corpus file in :class:`~gensim.models.word2vec.LineSentence` format.
+        thread_id : int
+            Thread index starting from 0 to `number of workers - 1`.
+        offset : int
+            Offset (in bytes) in the `corpus_file` for particular worker.
+        cython_vocab : :class:`~gensim.models.word2vec_inner.CythonVocab`
+            Copy of the vocabulary in order to access it without GIL.
+        progress_queue : Queue of (int, int, int)
+            A queue of progress reports. Each report is represented as a tuple of these 3 elements:
+                * Size of data chunk processed, for example number of sentences in the corpus chunk.
+                * Effective word count used in training (after ignoring unknown words and trimming the sentence length).
+                * Total word count used in training.
+        **kwargs : object
+            Additional key word parameters for the specific model inheriting from this class.
+
+        """
+        thread_private_mem = self._get_thread_working_mem()
+
+        examples, tally, raw_tally = self._do_train_epoch(
+            corpus_file, thread_id, offset, cython_vocab, thread_private_mem, cur_epoch,
+            total_examples=total_examples, total_words=total_words, **kwargs)
+
+        progress_queue.put((examples, tally, raw_tally))
+        progress_queue.put(None)
+
+    def _worker_loop(self, job_queue, progress_queue):
+        """Train the model, lifting batches of data from the queue.
+
+        This function will be called in parallel by multiple workers (threads or processes) to make
+        optimal use of multicore machines.
+
+        Parameters
+        ----------
+        job_queue : Queue of (list of objects, (str, int))
+            A queue of jobs still to be processed. The worker will take up jobs from this queue.
+            Each job is represented by a tuple where the first element is the corpus chunk to be processed and
+            the second is the dictionary of parameters.
+        progress_queue : Queue of (int, int, int)
+            A queue of progress reports. Each report is represented as a tuple of these 3 elements:
+                * Size of data chunk processed, for example number of sentences in the corpus chunk.
+                * Effective word count used in training (after ignoring unknown words and trimming the sentence length).
+                * Total word count used in training.
+
+        """
+        thread_private_mem = self._get_thread_working_mem()
+        jobs_processed = 0
+        callbacks = progress_queue.callbacks
+        while True:
+            job = job_queue.get()
+            if job is None:
+                progress_queue.put(None)
+                break  # no more jobs => quit this worker
+            data_iterable, job_parameters = job
+
+            for callback in callbacks:
+                callback.on_batch_begin(self)
+
+            tally, raw_tally = self._do_train_job(data_iterable, job_parameters, thread_private_mem)
+
+            for callback in callbacks:
+                callback.on_batch_end(self)
+
+            progress_queue.put((len(data_iterable), tally, raw_tally))  # report back progress
+            jobs_processed += 1
+        logger.debug("worker exiting, processed %i jobs", jobs_processed)
+
+    def _job_producer(self, data_iterator, job_queue, cur_epoch=0, total_examples=None, total_words=None):
+        """Fill the jobs queue using the data found in the input stream.
+
+        Each job is represented by a tuple where the first element is the corpus chunk to be processed and
+        the second is a dictionary of parameters.
+
+        Parameters
+        ----------
+        data_iterator : iterable of list of objects
+            The input dataset. This will be split in chunks and these chunks will be pushed to the queue.
+        job_queue : Queue of (list of object, dict of (str, int))
+            A queue of jobs still to be processed. The worker will take up jobs from this queue.
+            Each job is represented by a tuple where the first element is the corpus chunk to be processed and
+            the second is the dictionary of parameters.
+        cur_epoch : int, optional
+            The current training epoch, needed to compute the training parameters for each job.
+            For example in many implementations the learning rate would be dropping with the number of epochs.
+        total_examples : int, optional
+            Count of objects in the `data_iterator`. In the usual case this would correspond to the number of sentences
+            in a corpus. Used to log progress.
+        total_words : int, optional
+            Count of total objects in `data_iterator`. In the usual case this would correspond to the number of raw
+            words in a corpus. Used to log progress.
+
+        """
+        job_batch, batch_size = [], 0
+        pushed_words, pushed_examples = 0, 0
+        next_job_params = self._get_job_params(cur_epoch)
+        job_no = 0
+
+        for data_idx, data in enumerate(data_iterator):
+            data_length = self._raw_word_count([data])
+
+            # can we fit this sentence into the existing job batch?
+            if batch_size + data_length <= self.batch_words:
+                # yes => add it to the current job
+                job_batch.append(data)
+                batch_size += data_length
+            else:
+                job_no += 1
+                job_queue.put((job_batch, next_job_params))
+
+                # update the learning rate for the next job
+                if total_examples:
+                    # examples-based decay
+                    pushed_examples += len(job_batch)
+                    epoch_progress = 1.0 * pushed_examples / total_examples
+                else:
+                    # words-based decay
+                    pushed_words += self._raw_word_count(job_batch)
+                    epoch_progress = 1.0 * pushed_words / total_words
+                next_job_params = self._update_job_params(next_job_params, epoch_progress, cur_epoch)
+
+                # add the sentence that didn't fit as the first item of a new job
+                job_batch, batch_size = [data], data_length
+        # add the last job too (may be significantly smaller than batch_words)
+        if job_batch:
+            job_no += 1
+            job_queue.put((job_batch, next_job_params))
+
+        if job_no == 0 and self.train_count == 0:
+            logger.warning(
+                "train() called with an empty iterator (if not intended, "
+                "be sure to provide a corpus that offers restartable iteration = an iterable)."
+            )
+
+        # give the workers heads up that they can finish -- no more work!
+        for _ in range(self.workers):
+            job_queue.put(None)
+        logger.debug("job loop exiting, total %i jobs", job_no)
+
+    def _log_epoch_progress(self, progress_queue=None, job_queue=None, cur_epoch=0, total_examples=None,
+                            total_words=None, report_delay=1.0, is_corpus_file_mode=None):
+        """Get the progress report for a single training epoch.
+
+        Parameters
+        ----------
+        progress_queue : Queue of (int, int, int)
+            A queue of progress reports. Each report is represented as a tuple of these 3 elements:
+                * size of data chunk processed, for example number of sentences in the corpus chunk.
+                * Effective word count used in training (after ignoring unknown words and trimming the sentence length).
+                * Total word count used in training.
+        job_queue : Queue of (list of object, dict of (str, int))
+            A queue of jobs still to be processed. The worker will take up jobs from this queue.
+            Each job is represented by a tuple where the first element is the corpus chunk to be processed and
+            the second is the dictionary of parameters.
+        cur_epoch : int, optional
+            The current training epoch, needed to compute the training parameters for each job.
+            For example in many implementations the learning rate would be dropping with the number of epochs.
+        total_examples : int, optional
+            Count of objects in the `data_iterator`. In the usual case this would correspond to the number of sentences
+            in a corpus. Used to log progress.
+        total_words : int, optional
+            Count of total objects in `data_iterator`. In the usual case this would correspond to the number of raw
+            words in a corpus. Used to log progress.
+        report_delay : float, optional
+            Number of seconds between two consecutive progress report messages in the logger.
+        is_corpus_file_mode : bool, optional
+            Whether training is file-based (corpus_file argument) or not.
+
+        Returns
+        -------
+        (int, int, int)
+            The epoch report consisting of three elements:
+                * size of data chunk processed, for example number of sentences in the corpus chunk.
+                * Effective word count used in training (after ignoring unknown words and trimming the sentence length).
+                * Total word count used in training.
+
+        """
+        example_count, trained_word_count, raw_word_count = 0, 0, 0
+        start, next_report = default_timer() - 0.00001, 1.0
+        job_tally = 0
+        unfinished_worker_count = self.workers
+
+        while unfinished_worker_count > 0:
+            report = progress_queue.get()  # blocks if workers too slow
+            if report is None:  # a thread reporting that it finished
+                unfinished_worker_count -= 1
+                logger.info("worker thread finished; awaiting finish of %i more threads", unfinished_worker_count)
+                continue
+            examples, trained_words, raw_words = report
+            job_tally += 1
+
+            # update progress stats
+            example_count += examples
+            trained_word_count += trained_words  # only words in vocab & sampled
+            raw_word_count += raw_words
+
+            # log progress once every report_delay seconds
+            elapsed = default_timer() - start
+            if elapsed >= next_report:
+                self._log_progress(
+                    job_queue, progress_queue, cur_epoch, example_count, total_examples,
+                    raw_word_count, total_words, trained_word_count, elapsed)
+                next_report = elapsed + report_delay
+        # all done; report the final stats
+        elapsed = default_timer() - start
+        self._log_epoch_end(
+            cur_epoch, example_count, total_examples, raw_word_count, total_words,
+            trained_word_count, elapsed, is_corpus_file_mode)
+        self.total_train_time += elapsed
+        return trained_word_count, raw_word_count, job_tally
+
+    def _train_epoch_corpusfile(
+        self, corpus_file, cur_epoch=0, total_examples=None, total_words=None, callbacks=(), **kwargs):
+        """Train the model for a single epoch.
+
+        Parameters
+        ----------
+        corpus_file : str
+            Path to a corpus file in :class:`~gensim.models.word2vec.LineSentence` format.
+        cur_epoch : int, optional
+            The current training epoch, needed to compute the training parameters for each job.
+            For example in many implementations the learning rate would be dropping with the number of epochs.
+        total_examples : int, optional
+            Count of objects in the `data_iterator`. In the usual case this would correspond to the number of sentences
+            in a corpus, used to log progress.
+        total_words : int
+            Count of total objects in `data_iterator`. In the usual case this would correspond to the number of raw
+            words in a corpus, used to log progress. Must be provided in order to seek in `corpus_file`.
+        **kwargs : object
+            Additional key word parameters for the specific model inheriting from this class.
+
+        Returns
+        -------
+        (int, int, int)
+            The training report for this epoch consisting of three elements:
+                * Size of data chunk processed, for example number of sentences in the corpus chunk.
+                * Effective word count used in training (after ignoring unknown words and trimming the sentence length).
+                * Total word count used in training.
+
+        """
+        if not total_words:
+            raise ValueError("total_words must be provided alongside corpus_file argument.")
+
+        from gensim.models.word2vec_corpusfile import CythonVocab
+        from gensim.models.fasttext import FastText
+        cython_vocab = CythonVocab(self.wv, hs=self.hs, fasttext=isinstance(self, FastText))
+
+        progress_queue = Queue()
+
+        corpus_file_size = os.path.getsize(corpus_file)
+
+        thread_kwargs = copy.copy(kwargs)
+        thread_kwargs['cur_epoch'] = cur_epoch
+        thread_kwargs['total_examples'] = total_examples
+        thread_kwargs['total_words'] = total_words
+        workers = [
+            threading.Thread(
+                target=self._worker_loop_corpusfile,
+                args=(
+                    corpus_file, thread_id, corpus_file_size / self.workers * thread_id, cython_vocab, progress_queue
+                ),
+                kwargs=thread_kwargs
+            ) for thread_id in range(self.workers)
+        ]
+
+        for thread in workers:
+            thread.daemon = True
+            thread.start()
+
+        trained_word_count, raw_word_count, job_tally = self._log_epoch_progress(
+            progress_queue=progress_queue, job_queue=None, cur_epoch=cur_epoch,
+            total_examples=total_examples, total_words=total_words, is_corpus_file_mode=True)
+
+        return trained_word_count, raw_word_count, job_tally
+
+    def _train_epoch(self, data_iterable, cur_epoch=0, total_examples=None, total_words=None,
+                     queue_factor=2, report_delay=1.0, callbacks=()):
+        """Train the model for a single epoch.
+
+        Parameters
+        ----------
+        data_iterable : iterable of list of object
+            The input corpus. This will be split in chunks and these chunks will be pushed to the queue.
+        cur_epoch : int, optional
+            The current training epoch, needed to compute the training parameters for each job.
+            For example in many implementations the learning rate would be dropping with the number of epochs.
+        total_examples : int, optional
+            Count of objects in the `data_iterator`. In the usual case this would correspond to the number of sentences
+            in a corpus, used to log progress.
+        total_words : int, optional
+            Count of total objects in `data_iterator`. In the usual case this would correspond to the number of raw
+            words in a corpus, used to log progress.
+        queue_factor : int, optional
+            Multiplier for size of queue -> size = number of workers * queue_factor.
+        report_delay : float, optional
+            Number of seconds between two consecutive progress report messages in the logger.
+
+        Returns
+        -------
+        (int, int, int)
+            The training report for this epoch consisting of three elements:
+                * Size of data chunk processed, for example number of sentences in the corpus chunk.
+                * Effective word count used in training (after ignoring unknown words and trimming the sentence length).
+                * Total word count used in training.
+
+        """
+        job_queue = Queue(maxsize=queue_factor * self.workers)
+        progress_queue = Queue(maxsize=(queue_factor + 1) * self.workers)
+        progress_queue.callbacks = callbacks  # messy way to pass along for just this session
+
+        workers = [
+            threading.Thread(
+                target=self._worker_loop,
+                args=(job_queue, progress_queue,))
+            for _ in range(self.workers)
+        ]
+
+        workers.append(threading.Thread(
+            target=self._job_producer,
+            args=(data_iterable, job_queue),
+            kwargs={'cur_epoch': cur_epoch, 'total_examples': total_examples, 'total_words': total_words}))
+
+        for thread in workers:
+            thread.daemon = True  # make interrupting the process with ctrl+c easier
+            thread.start()
+
+        trained_word_count, raw_word_count, job_tally = self._log_epoch_progress(
+            progress_queue, job_queue, cur_epoch=cur_epoch, total_examples=total_examples, total_words=total_words,
+            report_delay=report_delay, is_corpus_file_mode=False)
+
+        return trained_word_count, raw_word_count, job_tally
+
+    def _get_job_params(self, cur_epoch):
+        """Get the learning rate used in the current epoch.
+
+        Parameters
+        ----------
+        cur_epoch : int
+            Current iteration through the corpus
+
+        Returns
+        -------
+        float
+            The learning rate for this epoch (it is linearly reduced with epochs from `self.alpha` to `self.min_alpha`).
+
+        """
+        alpha = self.alpha - ((self.alpha - self.min_alpha) * float(cur_epoch) / self.epochs)
+        return alpha
+
+    def _update_job_params(self, job_params, epoch_progress, cur_epoch):
+        """Get the correct learning rate for the next iteration.
+
+        Parameters
+        ----------
+        job_params : dict of (str, obj)
+            UNUSED.
+        epoch_progress : float
+            Ratio of finished work in the current epoch.
+        cur_epoch : int
+            Number of current iteration.
+
+        Returns
+        -------
+        float
+            The learning rate to be used in the next training epoch.
+
+        """
+        start_alpha = self.alpha
+        end_alpha = self.min_alpha
+        progress = (cur_epoch + epoch_progress) / self.epochs
+        next_alpha = start_alpha - (start_alpha - end_alpha) * progress
+        next_alpha = max(end_alpha, next_alpha)
+        self.min_alpha_yet_reached = next_alpha
+        return next_alpha
+
+    def _get_thread_working_mem(self):
+        """Computes the memory used per worker thread.
+
+        Returns
+        -------
+        (np.ndarray, np.ndarray)
+            Each worker threads private work memory.
+
+        """
+        work = matutils.zeros_aligned(self.layer1_size, dtype=REAL)  # per-thread private work memory
+        neu1 = matutils.zeros_aligned(self.layer1_size, dtype=REAL)
+        return work, neu1
+
+    def _raw_word_count(self, job):
+        """Get the number of words in a given job.
+
+        Parameters
+        ----------
+        job: iterable of list of str
+            The corpus chunk processed in a single batch.
+
+        Returns
+        -------
+        int
+            Number of raw words in the corpus chunk.
+
+        """
+        return sum(len(sentence) for sentence in job)
+
+    def _check_training_sanity(self, epochs=None, total_examples=None, total_words=None, **kwargs):
+        """Checks whether the training parameters make sense.
+
+        Called right before training starts in :meth:`~gensim.models.base_any2vec.BaseWordEmbeddingsModel.train`
+        and raises warning or errors depending on the severity of the issue in case an inconsistent parameter
+        combination is detected.
+
+        Parameters
+        ----------
+        epochs : int, optional
+            Number of training epochs. Must have a (non None) value.
+        total_examples : int, optional
+            Number of documents in the corpus. Either `total_examples` or `total_words` **must** be supplied.
+        total_words : int, optional
+            Number of words in the corpus. Either `total_examples` or `total_words` **must** be supplied.
+        **kwargs : object
+            Unused. Present to preserve signature among base and inherited implementations.
+
+        Raises
+        ------
+        RuntimeError
+            If one of the required training pre/post processing steps have not been performed.
+        ValueError
+            If the combination of input parameters is inconsistent.
+
+        """
+        if self.alpha > self.min_alpha_yet_reached:
+            logger.warning("Effective 'alpha' higher than previous training cycles")
+
+        if not self.wv.vocab:  # should be set by `build_vocab`
+            raise RuntimeError("you must first build vocabulary before training the model")
+        if not len(self.wv.vectors):
+            raise RuntimeError("you must initialize vectors before training the model")
+
+        if not hasattr(self, 'corpus_count'):
+            raise ValueError(
+                "The number of examples in the training corpus is missing. "
+                "Please make sure this is set inside `build_vocab` function."
+                "Call the `build_vocab` function before calling `train`."
+            )
+
+        if total_words is None and total_examples is None:
+            raise ValueError(
+                "You must specify either total_examples or total_words, for proper job parameters updation"
+                "and progress calculations. "
+                "The usual value is total_examples=model.corpus_count."
+            )
+        if epochs is None:
+            raise ValueError("You must specify an explict epochs count. The usual value is epochs=model.epochs.")
+        logger.info(
+            "training model with %i workers on %i vocabulary and %i features, "
+            "using sg=%s hs=%s sample=%s negative=%s window=%s",
+            self.workers, len(self.wv.vocab), self.layer1_size, self.sg,
+            self.hs, self.sample, self.negative, self.window
+        )
+
+    def _log_progress(self, job_queue, progress_queue, cur_epoch, example_count, total_examples,
+                      raw_word_count, total_words, trained_word_count, elapsed):
+        """Callback used to log progress for long running jobs.
+
+        Parameters
+        ----------
+        job_queue : Queue of (list of object, dict of (str, float))
+            The queue of jobs still to be performed by workers. Each job is represented as a tuple containing
+            the batch of data to be processed and the parameters to be used for the processing as a dict.
+        progress_queue : Queue of (int, int, int)
+            A queue of progress reports. Each report is represented as a tuple of these 3 elements:
+                * size of data chunk processed, for example number of sentences in the corpus chunk.
+                * Effective word count used in training (after ignoring unknown words and trimming the sentence length).
+                * Total word count used in training.
+        cur_epoch : int
+            The current training iteration through the corpus.
+        example_count : int
+            Number of examples (could be sentences for example) processed until now.
+        total_examples : int
+            Number of all examples present in the input corpus.
+        raw_word_count : int
+            Number of words used in training until now.
+        total_words : int
+            Number of all words in the input corpus.
+        trained_word_count : int
+            Number of effective words used in training until now (after ignoring unknown words and trimming
+            the sentence length).
+        elapsed : int
+            Elapsed time since the beginning of training in seconds.
+
+        Notes
+        -----
+        If you train the model via `corpus_file` argument, there is no job_queue, so reported job_queue size will
+        always be equal to -1.
+
+        """
+        if total_examples:
+            # examples-based progress %
+            logger.info(
+                "EPOCH %i - PROGRESS: at %.2f%% examples, %.0f words/s, in_qsize %i, out_qsize %i",
+                cur_epoch + 1, 100.0 * example_count / total_examples, trained_word_count / elapsed,
+                -1 if job_queue is None else utils.qsize(job_queue), utils.qsize(progress_queue)
+            )
+        else:
+            # words-based progress %
+            logger.info(
+                "EPOCH %i - PROGRESS: at %.2f%% words, %.0f words/s, in_qsize %i, out_qsize %i",
+                cur_epoch + 1, 100.0 * raw_word_count / total_words, trained_word_count / elapsed,
+                -1 if job_queue is None else utils.qsize(job_queue), utils.qsize(progress_queue)
+            )
+
+    def _log_epoch_end(self, cur_epoch, example_count, total_examples, raw_word_count, total_words,
+                       trained_word_count, elapsed, is_corpus_file_mode):
+        """Callback used to log the end of a training epoch.
+
+        Parameters
+        ----------
+        cur_epoch : int
+            The current training iteration through the corpus.
+        example_count : int
+            Number of examples (could be sentences for example) processed until now.
         total_examples : int
-            Count of sentences.
+            Number of all examples present in the input corpus.
+        raw_word_count : int
+            Number of words used in training until now.
         total_words : int
-            Count of raw words in sentences.
-        epochs : int
-            Number of iterations (epochs) over the corpus.
-        start_alpha : float, optional
-            Initial learning rate. If supplied, replaces the starting `alpha` from the constructor,
-            for this one call to`train()`.
-            Use only if making multiple calls to `train()`, when you want to manage the alpha learning-rate yourself
-            (not recommended).
-        end_alpha : float, optional
-            Final learning rate. Drops linearly from `start_alpha`.
-            If supplied, this replaces the final `min_alpha` from the constructor, for this one call to `train()`.
-            Use only if making multiple calls to `train()`, when you want to manage the alpha learning-rate yourself
-            (not recommended).
-        word_count : int, optional
-            Count of words already trained. Set this to 0 for the usual
-            case of training on all words in sentences.
-        queue_factor : int, optional
-            Multiplier for size of queue (number of workers * queue_factor).
-        report_delay : float, optional
-            Seconds to wait before reporting progress.
-        compute_loss: bool, optional
-            If True, computes and stores loss value which can be retrieved using
-            :meth:`~gensim.models.word2vec.Word2Vec.get_latest_training_loss`.
-        callbacks : iterable of :class:`~gensim.models.callbacks.CallbackAny2Vec`, optional
-            Sequence of callbacks to be executed at specific stages during training.
+            Number of all words in the input corpus.
+        trained_word_count : int
+            Number of effective words used in training until now (after ignoring unknown words and trimming
+            the sentence length).
+        elapsed : int
+            Elapsed time since the beginning of training in seconds.
+        is_corpus_file_mode : bool
+            Whether training is file-based (corpus_file argument) or not.
 
-        Examples
+        Warnings
         --------
-        .. sourcecode:: pycon
+        In case the corpus is changed while the epoch was running.
 
-            >>> from gensim.models import Word2Vec
-            >>> sentences = [["cat", "say", "meow"], ["dog", "say", "woof"]]
-            >>>
-            >>> model = Word2Vec(min_count=1)
-            >>> model.build_vocab(sentences)  # prepare the model vocabulary
-            >>> model.train(sentences, total_examples=model.corpus_count, epochs=model.iter)  # train word vectors
-            (1, 30)
+        """
+        logger.info(
+            "EPOCH - %i : training on %i raw words (%i effective words) took %.1fs, %.0f effective words/s",
+            cur_epoch + 1, raw_word_count, trained_word_count, elapsed, trained_word_count / elapsed
+        )
+
+        # don't warn if training in file-based mode, because it's expected behavior
+        if is_corpus_file_mode:
+            return
+
+        # check that the input corpus hasn't changed during iteration
+        if total_examples and total_examples != example_count:
+            logger.warning(
+                "EPOCH - %i : supplied example count (%i) did not equal expected count (%i)", cur_epoch + 1,
+                example_count, total_examples
+            )
+        if total_words and total_words != raw_word_count:
+            logger.warning(
+                "EPOCH - %i : supplied raw word count (%i) did not equal expected count (%i)", cur_epoch + 1,
+                raw_word_count, total_words
+            )
+
+    def _log_train_end(self, raw_word_count, trained_word_count, total_elapsed, job_tally):
+        """Callback to log the end of training.
+
+        Parameters
+        ----------
+        raw_word_count : int
+            Number of words used in the whole training.
+        trained_word_count : int
+            Number of effective words used in training (after ignoring unknown words and trimming the sentence length).
+        total_elapsed : int
+            Total time spent during training in seconds.
+        job_tally : int
+            Total number of jobs processed during training.
 
         """
-        return super(Word2Vec, self).train(
-            sentences=sentences, corpus_file=corpus_file, total_examples=total_examples, total_words=total_words,
-            epochs=epochs, start_alpha=start_alpha, end_alpha=end_alpha, word_count=word_count,
-            queue_factor=queue_factor, report_delay=report_delay, compute_loss=compute_loss, callbacks=callbacks)
+        logger.info(
+            "training on a %i raw words (%i effective words) took %.1fs, %.0f effective words/s",
+            raw_word_count, trained_word_count, total_elapsed, trained_word_count / total_elapsed
+        )
 
     def score(self, sentences, total_sentences=int(1e6), chunksize=100, queue_factor=2, report_delay=1):
         """Score the log probability for a sequence of sentences.
@@ -547,8 +1648,8 @@ def score(self, sentences, total_sentences=int(1e6), chunksize=100, queue_factor
         logger.info(
             "scoring sentences with %i workers on %i vocabulary and %i features, "
             "using sg=%s hs=%s sample=%s and negative=%s",
-            self.workers, len(self.wv.vocab), self.trainables.layer1_size, self.sg, self.hs,
-            self.vocabulary.sample, self.negative
+            self.workers, len(self.wv.vocab), self.layer1_size, self.sg, self.hs,
+            self.sample, self.negative
         )
 
         if not self.wv.vocab:
@@ -563,7 +1664,7 @@ def score(self, sentences, total_sentences=int(1e6), chunksize=100, queue_factor
         def worker_loop():
             """Compute log probability for each sentence, lifting lists of sentences from the jobs queue."""
             work = zeros(1, dtype=REAL)  # for sg hs, we actually only need one memory loc (running sum)
-            neu1 = matutils.zeros_aligned(self.trainables.layer1_size, dtype=REAL)
+            neu1 = matutils.zeros_aligned(self.layer1_size, dtype=REAL)
             while True:
                 job = job_queue.get()
                 if job is None:  # signal to finish
@@ -696,7 +1797,7 @@ def intersect_word2vec_format(self, fname, lockf=0.0, binary=False, encoding='ut
                     if word in self.wv.vocab:
                         overlap_count += 1
                         self.wv.vectors[self.wv.vocab[word].index] = weights
-                        self.trainables.vectors_lockf[self.wv.vocab[word].index] = lockf  # lock-factor: 0.0=no changes
+                        self.wv.vectors_lockf[self.wv.vocab[word].index] = lockf  # lock-factor: 0.0=no changes
             else:
                 for line_no, line in enumerate(fin):
                     parts = utils.to_unicode(line.rstrip(), encoding=encoding, errors=unicode_errors).split(" ")
@@ -706,7 +1807,7 @@ def intersect_word2vec_format(self, fname, lockf=0.0, binary=False, encoding='ut
                     if word in self.wv.vocab:
                         overlap_count += 1
                         self.wv.vectors[self.wv.vocab[word].index] = weights
-                        self.trainables.vectors_lockf[self.wv.vocab[word].index] = lockf  # lock-factor: 0.0=no changes
+                        self.wv.vectors_lockf[self.wv.vocab[word].index] = lockf  # lock-factor: 0.0=no changes
         logger.info("merged %d vectors into %s matrix from %s", overlap_count, self.wv.vectors.shape, fname)
 
     def predict_output_word(self, context_words_list, topn=10):
@@ -731,12 +1832,12 @@ def predict_output_word(self, context_words_list, topn=10):
                 "so you need to have run word2vec with negative > 0 for this to work."
             )
 
-        if not hasattr(self.wv, 'vectors') or not hasattr(self.trainables, 'syn1neg'):
+        if not hasattr(self.wv, 'vectors') or not hasattr(self, 'syn1neg'):
             raise RuntimeError("Parameters required for predicting the output words not found.")
 
         word_vocabs = [self.wv.vocab[w] for w in context_words_list if w in self.wv.vocab]
         if not word_vocabs:
-            warnings.warn("All the input context words are out-of-vocabulary for the current model.")
+            logger.warning("All the input context words are out-of-vocabulary for the current model.")
             return None
 
         word2_indices = [word.index for word in word_vocabs]
@@ -746,7 +1847,7 @@ def predict_output_word(self, context_words_list, topn=10):
             l1 /= len(word2_indices)
 
         # propagate hidden -> output and take softmax to get probabilities
-        prob_values = exp(dot(l1, self.trainables.syn1neg.T))
+        prob_values = exp(dot(l1, self.syn1neg.T))
         prob_values /= sum(prob_values)
         top_indices = matutils.argsort(prob_values, topn=topn, reverse=True)
         # returning the most probable output words with their probabilities
@@ -771,9 +1872,9 @@ def reset_from(self, other_model):
         """
         self.wv.vocab = other_model.wv.vocab
         self.wv.index2key = other_model.wv.index2key
-        self.vocabulary.cum_table = other_model.vocabulary.cum_table
+        self.cum_table = other_model.cum_table
         self.corpus_count = other_model.corpus_count
-        self.trainables.reset_weights(self.hs, self.negative, self.wv)
+        self.reset_weights()
 
     def __str__(self):
         """Human readable representation of the model's state.
@@ -816,7 +1917,7 @@ def get_latest_training_loss(self):
         return self.running_training_loss
 
     @classmethod
-    def load(cls, *args, **kwargs):
+    def load(cls, *args, rethrow=False, **kwargs):
         """Load a previously saved :class:`~gensim.models.word2vec.Word2Vec` model.
 
         See Also
@@ -837,17 +1938,51 @@ def load(cls, *args, **kwargs):
         """
         try:
             model = super(Word2Vec, cls).load(*args, **kwargs)
-
-            # for backward compatibility for `max_final_vocab` feature
+            if not isinstance(model, Word2Vec):
+                rethrow = True
+                raise AttributeError("Model of type %s can't be loaded by %s" % (type(model), str(cls)))
+            # for backward compatibility
+            if not hasattr(model, 'ns_exponent'):
+                model.ns_exponent = 0.75
+            if model.negative and hasattr(model.wv, 'index2word'):
+                model.make_cum_table()  # rebuild cum_table from vocabulary  ## TODO: ???
+            if not hasattr(model, 'corpus_count'):
+                model.corpus_count = None
+            if not hasattr(model, 'corpus_total_words'):
+                model.corpus_total_words = None
+            if not hasattr(model.wv, 'vectors_lockf') and hasattr(model.wv, 'vectors'):
+                model.wv.vectors_lockf = getattr(model, 'vectors_lockf', ones(len(model.wv.vectors), dtype=REAL))
+            if not hasattr(model, 'random'):
+                model.random = np.random.RandomState(model.seed)
+            if not hasattr(model, 'train_count'):
+                model.train_count = 0
+                model.total_train_time = 0
+            if not hasattr(model, 'epochs'):
+                model.epochs = model.iter
+                del model.iter
             if not hasattr(model, 'max_final_vocab'):
                 model.max_final_vocab = None
-                model.vocabulary.max_final_vocab = None
-
+            if hasattr(model, 'vocabulary'):  # re-integrate state that had been moved
+                for a in ('max_vocab_size', 'min_count', 'sample', 'sorted_vocab', 'null_word', 'raw_vocab'):
+                    setattr(model, a, getattr(model.vocabulary, a))
+                del model.vocabulary
+            if hasattr(model, 'trainables'):  # re-integrate state that had been moved
+                for a in ('hashfxn', 'layer1_size', 'seed', 'syn1neg', 'syn1'):
+                    if hasattr(model.trainables, a):
+                        setattr(model, a, getattr(model.trainables, a))
+                if hasattr(model, 'syn1'):
+                    model.syn1 = model.syn1
+                    del model.syn1
+                del model.trainables
             return model
-        except AttributeError:
-            logger.info('Model saved using code from earlier Gensim Version. Re-loading old model in a compatible way.')
-            from gensim.models.deprecated.word2vec import load_old_word2vec
-            return load_old_word2vec(*args, **kwargs)
+        except AttributeError as ae:
+            if rethrow:
+                raise ae
+            logger.error(
+                "Model load error. Was model saved using code from an older Gensim Version? "
+                "Try loading older model using gensim-3.8.1, then re-saving, to restore "
+                "compatibility with current code.")
+            raise ae
 
 
 class BrownCorpus(object):
@@ -934,412 +2069,122 @@ def __iter__(self):
         """Iterate through the lines in the source."""
         try:
             # Assume it is a file-like object and try treating it as such
-            # Things that don't have seek will trigger an exception
-            self.source.seek(0)
-            for line in itertools.islice(self.source, self.limit):
-                line = utils.to_unicode(line).split()
-                i = 0
-                while i < len(line):
-                    yield line[i: i + self.max_sentence_length]
-                    i += self.max_sentence_length
-        except AttributeError:
-            # If it didn't work like a file, use it as a string filename
-            with utils.open(self.source, 'rb') as fin:
-                for line in itertools.islice(fin, self.limit):
-                    line = utils.to_unicode(line).split()
-                    i = 0
-                    while i < len(line):
-                        yield line[i: i + self.max_sentence_length]
-                        i += self.max_sentence_length
-
-
-class PathLineSentences(object):
-    def __init__(self, source, max_sentence_length=MAX_WORDS_IN_BATCH, limit=None):
-        """Like :class:`~gensim.models.word2vec.LineSentence`, but process all files in a directory
-        in alphabetical order by filename.
-
-        The directory must only contain files that can be read by :class:`gensim.models.word2vec.LineSentence`:
-        .bz2, .gz, and text files. Any file not ending with .bz2 or .gz is assumed to be a text file.
-
-        The format of files (either text, or compressed text files) in the path is one sentence = one line,
-        with words already preprocessed and separated by whitespace.
-
-        Warnings
-        --------
-        Does **not recurse** into subdirectories.
-
-        Parameters
-        ----------
-        source : str
-            Path to the directory.
-        limit : int or None
-            Read only the first `limit` lines from each file. Read all if limit is None (the default).
-
-        """
-        self.source = source
-        self.max_sentence_length = max_sentence_length
-        self.limit = limit
-
-        if os.path.isfile(self.source):
-            logger.debug('single file given as source, rather than a directory of files')
-            logger.debug('consider using models.word2vec.LineSentence for a single file')
-            self.input_files = [self.source]  # force code compatibility with list of files
-        elif os.path.isdir(self.source):
-            self.source = os.path.join(self.source, '')  # ensures os-specific slash at end of path
-            logger.info('reading directory %s', self.source)
-            self.input_files = os.listdir(self.source)
-            self.input_files = [self.source + filename for filename in self.input_files]  # make full paths
-            self.input_files.sort()  # makes sure it happens in filename order
-        else:  # not a file or a directory, then we can't do anything with it
-            raise ValueError('input is neither a file nor a path')
-        logger.info('files read into PathLineSentences:%s', '\n'.join(self.input_files))
-
-    def __iter__(self):
-        """iterate through the files"""
-        for file_name in self.input_files:
-            logger.info('reading file %s', file_name)
-            with utils.open(file_name, 'rb') as fin:
-                for line in itertools.islice(fin, self.limit):
-                    line = utils.to_unicode(line).split()
-                    i = 0
-                    while i < len(line):
-                        yield line[i:i + self.max_sentence_length]
-                        i += self.max_sentence_length
-
-
-def _scan_vocab_worker(stream, progress_queue, max_vocab_size=None, trim_rule=None):
-    """Do an initial scan of all words appearing in stream.
-
-    Note: This function can not be Word2VecVocab's method because
-    of multiprocessing synchronization specifics in Python.
-    """
-    min_reduce = 1
-    vocab = defaultdict(int)
-    checked_string_types = 0
-    sentence_no = -1
-    total_words = 0
-    for sentence_no, sentence in enumerate(stream):
-        if not checked_string_types:
-            if isinstance(sentence, string_types):
-                log_msg = "Each 'sentences' item should be a list of words (usually unicode strings). " \
-                          "First item here is instead plain %s." % type(sentence)
-                progress_queue.put(log_msg)
-
-            checked_string_types += 1
-
-        for word in sentence:
-            vocab[word] += 1
-
-        if max_vocab_size and len(vocab) > max_vocab_size:
-            utils.prune_vocab(vocab, min_reduce, trim_rule=trim_rule)
-            min_reduce += 1
-
-        total_words += len(sentence)
-
-    progress_queue.put((total_words, sentence_no + 1))
-    progress_queue.put(None)
-    return vocab
-
-
-@dataclass
-class W2VVocab:
-    """A dataclass shape-compatible with keyedvectors.SimpleVocab, extended with the
-    `sample_int` property needed by `Word2Vec` models."""
-    __slots__ = ('count', 'index', 'sample_int')
-    count: int
-    index: int
-    sample_int: int
-
-    def __init__(self, count=0, index=0, sample_int=2**32):
-        self.count, self.index, self.sample_int = count, index, sample_int
-
-    def __lt__(self, other):
-        return self.count < other.count
-
-
-@dataclass
-class W2VHSVocab:
-    """A dataclass shape-compatible with W2VVocab, extended with the `code` and
-    `point` properties needed by hierarchical-sampling (`hs=1`) `Word2Vec` models."""
-    __slots__ = ('count', 'index', 'sample_int', 'code', 'point')
-    count: int
-    index: int
-    sample_int: int
-    code: List[int]
-    point: List[int]
-
-    def __init__(self, count=0, index=0, sample_int=2**32, code=None, point=None):
-        self.count, self.index, self.sample_int, self.code, self.point = \
-            count, index, sample_int, code, point
-
-    def __lt__(self, other):
-        return self.count < other.count
-
-
-class Word2VecVocab(utils.SaveLoad):
-    def __init__(
-            self, max_vocab_size=None, min_count=5, sample=1e-3, sorted_vocab=True, null_word=0,
-            max_final_vocab=None, ns_exponent=0.75):
-        """Vocabulary used by :class:`~gensim.models.word2vec.Word2Vec`."""
-        self.max_vocab_size = max_vocab_size
-        self.min_count = min_count
-        self.sample = sample
-        self.sorted_vocab = sorted_vocab
-        self.null_word = null_word
-        self.cum_table = None  # for negative sampling
-        self.raw_vocab = None
-        self.max_final_vocab = max_final_vocab
-        self.ns_exponent = ns_exponent
-
-    def _scan_vocab(self, sentences, progress_per, trim_rule):
-        sentence_no = -1
-        total_words = 0
-        min_reduce = 1
-        vocab = defaultdict(int)
-        checked_string_types = 0
-        for sentence_no, sentence in enumerate(sentences):
-            if not checked_string_types:
-                if isinstance(sentence, string_types):
-                    logger.warning(
-                        "Each 'sentences' item should be a list of words (usually unicode strings). "
-                        "First item here is instead plain %s.",
-                        type(sentence)
-                    )
-                checked_string_types += 1
-            if sentence_no % progress_per == 0:
-                logger.info(
-                    "PROGRESS: at sentence #%i, processed %i words, keeping %i word types",
-                    sentence_no, total_words, len(vocab)
-                )
-            for word in sentence:
-                vocab[word] += 1
-            total_words += len(sentence)
-
-            if self.max_vocab_size and len(vocab) > self.max_vocab_size:
-                utils.prune_vocab(vocab, min_reduce, trim_rule=trim_rule)
-                min_reduce += 1
-
-        corpus_count = sentence_no + 1
-        self.raw_vocab = vocab
-        return total_words, corpus_count
-
-    def scan_vocab(self, sentences=None, corpus_file=None, progress_per=10000, workers=None, trim_rule=None):
-        logger.info("collecting all words and their counts")
-        if corpus_file:
-            sentences = LineSentence(corpus_file)
-
-        total_words, corpus_count = self._scan_vocab(sentences, progress_per, trim_rule)
-
-        logger.info(
-            "collected %i word types from a corpus of %i raw words and %i sentences",
-            len(self.raw_vocab), total_words, corpus_count
-        )
-
-        return total_words, corpus_count
-
-    def sort_vocab(self, wv):
-        """Sort the vocabulary so the most frequent words have the lowest indexes."""
-        if len(wv.vectors):
-            raise RuntimeError("cannot sort vocabulary after model weights already initialized.")
-        wv.index2key.sort(key=lambda word: wv.vocab[word].count, reverse=True)
-        for i, word in enumerate(wv.index2key):
-            wv.vocab[word].index = i
-
-    def prepare_vocab(
-            self, hs, negative, wv, update=False, keep_raw_vocab=False, trim_rule=None,
-            min_count=None, sample=None, dry_run=False):
-        """Apply vocabulary settings for `min_count` (discarding less-frequent words)
-        and `sample` (controlling the downsampling of more-frequent words).
-
-        Calling with `dry_run=True` will only simulate the provided settings and
-        report the size of the retained vocabulary, effective corpus length, and
-        estimated memory requirements. Results are both printed via logging and
-        returned as a dict.
-
-        Delete the raw vocabulary after the scaling is done to free up RAM,
-        unless `keep_raw_vocab` is set.
-
-        """
-        min_count = min_count or self.min_count
-        sample = sample or self.sample
-        drop_total = drop_unique = 0
+            # Things that don't have seek will trigger an exception
+            self.source.seek(0)
+            for line in itertools.islice(self.source, self.limit):
+                line = utils.to_unicode(line).split()
+                i = 0
+                while i < len(line):
+                    yield line[i: i + self.max_sentence_length]
+                    i += self.max_sentence_length
+        except AttributeError:
+            # If it didn't work like a file, use it as a string filename
+            with utils.open(self.source, 'rb') as fin:
+                for line in itertools.islice(fin, self.limit):
+                    line = utils.to_unicode(line).split()
+                    i = 0
+                    while i < len(line):
+                        yield line[i: i + self.max_sentence_length]
+                        i += self.max_sentence_length
 
-        # set effective_min_count to min_count in case max_final_vocab isn't set
-        self.effective_min_count = min_count
 
-        # if max_final_vocab is specified instead of min_count
-        # pick a min_count which satisfies max_final_vocab as well as possible
-        if self.max_final_vocab is not None:
-            sorted_vocab = sorted(self.raw_vocab.keys(), key=lambda word: self.raw_vocab[word], reverse=True)
-            calc_min_count = 1
+class PathLineSentences(object):
+    def __init__(self, source, max_sentence_length=MAX_WORDS_IN_BATCH, limit=None):
+        """Like :class:`~gensim.models.word2vec.LineSentence`, but process all files in a directory
+        in alphabetical order by filename.
 
-            if self.max_final_vocab < len(sorted_vocab):
-                calc_min_count = self.raw_vocab[sorted_vocab[self.max_final_vocab]] + 1
+        The directory must only contain files that can be read by :class:`gensim.models.word2vec.LineSentence`:
+        .bz2, .gz, and text files. Any file not ending with .bz2 or .gz is assumed to be a text file.
 
-            self.effective_min_count = max(calc_min_count, min_count)
-            logger.info(
-                "max_final_vocab=%d and min_count=%d resulted in calc_min_count=%d, effective_min_count=%d",
-                self.max_final_vocab, min_count, calc_min_count, self.effective_min_count
-            )
+        The format of files (either text, or compressed text files) in the path is one sentence = one line,
+        with words already preprocessed and separated by whitespace.
 
-        if not update:
-            logger.info("Loading a fresh vocabulary")
-            retain_total, retain_words = 0, []
-            # Discard words less-frequent than min_count
-            if not dry_run:
-                wv.index2key = []
-                # make stored settings match these applied settings
-                self.min_count = min_count
-                self.sample = sample
-                wv.vocab = {}
+        Warnings
+        --------
+        Does **not recurse** into subdirectories.
 
-            for word, v in iteritems(self.raw_vocab):
-                if keep_vocab_item(word, v, self.effective_min_count, trim_rule=trim_rule):
-                    retain_words.append(word)
-                    retain_total += v
-                    if not dry_run:
-                        wv.vocab[word] = W2VVocab(count=v, index=len(wv.index2key))
-                        wv.index2key.append(word)
-                else:
-                    drop_unique += 1
-                    drop_total += v
-            original_unique_total = len(retain_words) + drop_unique
-            retain_unique_pct = len(retain_words) * 100 / max(original_unique_total, 1)
-            logger.info(
-                "effective_min_count=%d retains %i unique words (%i%% of original %i, drops %i)",
-                self.effective_min_count, len(retain_words), retain_unique_pct, original_unique_total, drop_unique
-            )
-            original_total = retain_total + drop_total
-            retain_pct = retain_total * 100 / max(original_total, 1)
-            logger.info(
-                "effective_min_count=%d leaves %i word corpus (%i%% of original %i, drops %i)",
-                self.effective_min_count, retain_total, retain_pct, original_total, drop_total
-            )
-        else:
-            logger.info("Updating model with new vocabulary")
-            new_total = pre_exist_total = 0
-            new_words = pre_exist_words = []
-            for word, v in iteritems(self.raw_vocab):
-                if keep_vocab_item(word, v, self.effective_min_count, trim_rule=trim_rule):
-                    if word in wv.vocab:
-                        pre_exist_words.append(word)
-                        pre_exist_total += v
-                        if not dry_run:
-                            wv.vocab[word].count += v
-                    else:
-                        new_words.append(word)
-                        new_total += v
-                        if not dry_run:
-                            wv.vocab[word] = W2VVocab(count=v, index=len(wv.index2key))
-                            wv.index2key.append(word)
-                else:
-                    drop_unique += 1
-                    drop_total += v
-            original_unique_total = len(pre_exist_words) + len(new_words) + drop_unique
-            pre_exist_unique_pct = len(pre_exist_words) * 100 / max(original_unique_total, 1)
-            new_unique_pct = len(new_words) * 100 / max(original_unique_total, 1)
-            logger.info(
-                "New added %i unique words (%i%% of original %i) "
-                "and increased the count of %i pre-existing words (%i%% of original %i)",
-                len(new_words), new_unique_pct, original_unique_total, len(pre_exist_words),
-                pre_exist_unique_pct, original_unique_total
-            )
-            retain_words = new_words + pre_exist_words
-            retain_total = new_total + pre_exist_total
+        Parameters
+        ----------
+        source : str
+            Path to the directory.
+        limit : int or None
+            Read only the first `limit` lines from each file. Read all if limit is None (the default).
 
-        # Precalculate each vocabulary item's threshold for sampling
-        if not sample:
-            # no words downsampled
-            threshold_count = retain_total
-        elif sample < 1.0:
-            # traditional meaning: set parameter as proportion of total
-            threshold_count = sample * retain_total
-        else:
-            # new shorthand: sample >= 1 means downsample all words with higher count than sample
-            threshold_count = int(sample * (3 + sqrt(5)) / 2)
+        """
+        self.source = source
+        self.max_sentence_length = max_sentence_length
+        self.limit = limit
 
-        downsample_total, downsample_unique = 0, 0
-        for w in retain_words:
-            v = self.raw_vocab[w]
-            word_probability = (sqrt(v / threshold_count) + 1) * (threshold_count / v)
-            if word_probability < 1.0:
-                downsample_unique += 1
-                downsample_total += word_probability * v
-            else:
-                word_probability = 1.0
-                downsample_total += v
-            if not dry_run:
-                wv.vocab[w].sample_int = int(round(word_probability * 2**32))
+        if os.path.isfile(self.source):
+            logger.debug('single file given as source, rather than a directory of files')
+            logger.debug('consider using models.word2vec.LineSentence for a single file')
+            self.input_files = [self.source]  # force code compatibility with list of files
+        elif os.path.isdir(self.source):
+            self.source = os.path.join(self.source, '')  # ensures os-specific slash at end of path
+            logger.info('reading directory %s', self.source)
+            self.input_files = os.listdir(self.source)
+            self.input_files = [self.source + filename for filename in self.input_files]  # make full paths
+            self.input_files.sort()  # makes sure it happens in filename order
+        else:  # not a file or a directory, then we can't do anything with it
+            raise ValueError('input is neither a file nor a path')
+        logger.info('files read into PathLineSentences:%s', '\n'.join(self.input_files))
 
-        if not dry_run and not keep_raw_vocab:
-            logger.info("deleting the raw counts dictionary of %i items", len(self.raw_vocab))
-            self.raw_vocab = defaultdict(int)
+    def __iter__(self):
+        """iterate through the files"""
+        for file_name in self.input_files:
+            logger.info('reading file %s', file_name)
+            with utils.open(file_name, 'rb') as fin:
+                for line in itertools.islice(fin, self.limit):
+                    line = utils.to_unicode(line).split()
+                    i = 0
+                    while i < len(line):
+                        yield line[i:i + self.max_sentence_length]
+                        i += self.max_sentence_length
 
-        logger.info("sample=%g downsamples %i most-common words", sample, downsample_unique)
-        logger.info(
-            "downsampling leaves estimated %i word corpus (%.1f%% of prior %i)",
-            downsample_total, downsample_total * 100.0 / max(retain_total, 1), retain_total
-        )
 
-        # return from each step: words-affected, resulting-corpus-size, extra memory estimates
-        report_values = {
-            'drop_unique': drop_unique, 'retain_total': retain_total, 'downsample_unique': downsample_unique,
-            'downsample_total': int(downsample_total), 'num_retained_words': len(retain_words)
-        }
+@dataclass
+class W2VVocab:
+    """A dataclass shape-compatible with keyedvectors.SimpleVocab, extended with the
+    `sample_int` property needed by `Word2Vec` models."""
+    __slots__ = ('count', 'index', 'sample_int')
+    count: int
+    index: int
+    sample_int: int
 
-        if self.null_word:
-            # create null pseudo-word for padding when using concatenative L1 (run-of-words)
-            # this word is only ever input – never predicted – so count, huffman-point, etc doesn't matter
-            self.add_null_word(wv)
+    def __init__(self, count=0, index=0, sample_int=2**32):
+        self.count, self.index, self.sample_int = count, index, sample_int
 
-        if self.sorted_vocab and not update:
-            self.sort_vocab(wv)
-        if hs:
-            # add info about each word's Huffman encoding
-            self.create_binary_tree(wv)
-        if negative:
-            # build the table for drawing random words (for negative sampling)
-            self.make_cum_table(wv)
+    def __lt__(self, other):
+        return self.count < other.count
 
-        return report_values
 
-    def add_null_word(self, wv):
-        word, v = '\0', W2VVocab(count=1, sample_int=0)
-        v.index = len(wv.vocab)
-        wv.index2key.append(word)
-        wv.vocab[word] = v
+@dataclass
+class W2VHSVocab:
+    """A dataclass shape-compatible with W2VVocab, extended with the `code` and
+    `point` properties needed by hierarchical-sampling (`hs=1`) `Word2Vec` models."""
+    __slots__ = ('count', 'index', 'sample_int', 'code', 'point')
+    count: int
+    index: int
+    sample_int: int
+    code: List[int]
+    point: List[int]
 
-    def create_binary_tree(self, wv):
-        """Create a `binary Huffman tree <https://en.wikipedia.org/wiki/Huffman_coding>`_ using stored vocabulary
-        word counts. Frequent words will have shorter binary codes.
-        Called internally from :meth:`~gensim.models.word2vec.Word2VecVocab.build_vocab`.
+    def __init__(self, count=0, index=0, sample_int=2**32, code=None, point=None):
+        self.count, self.index, self.sample_int, self.code, self.point = \
+            count, index, sample_int, code, point
 
-        """
-        _assign_binary_codes(wv.vocab)
+    def __lt__(self, other):
+        return self.count < other.count
 
-    def make_cum_table(self, wv, domain=2**31 - 1):
-        """Create a cumulative-distribution table using stored vocabulary word counts for
-        drawing random words in the negative-sampling training routines.
 
-        To draw a word index, choose a random integer up to the maximum value in the table (cum_table[-1]),
-        then finding that integer's sorted insertion point (as if by `bisect_left` or `ndarray.searchsorted()`).
-        That insertion point is the drawn index, coming up in proportion equal to the increment at that slot.
+class Word2VecVocab(utils.SaveLoad):
+    """Obsolete class retained for now as load-compatibility state capture"""
+    pass
 
-        Called internally from :meth:`~gensim.models.word2vec.Word2VecVocab.build_vocab`.
 
-        """
-        vocab_size = len(wv.index2key)
-        self.cum_table = zeros(vocab_size, dtype=uint32)
-        # compute sum of all power (Z in paper)
-        train_words_pow = 0.0
-        for word_index in range(vocab_size):
-            train_words_pow += wv.vocab[wv.index2key[word_index]].count**self.ns_exponent
-        cumulative = 0.0
-        for word_index in range(vocab_size):
-            cumulative += wv.vocab[wv.index2key[word_index]].count**self.ns_exponent
-            self.cum_table[word_index] = round(cumulative / train_words_pow * domain)
-        if len(self.cum_table) > 0:
-            assert self.cum_table[-1] == domain
+class Word2VecTrainables(utils.SaveLoad):
+    """Obsolete class retained for now as load-compatibility state capture"""
+    pass
 
 
 class Heapitem(namedtuple('Heapitem', 'count, index, left, right')):
@@ -1409,62 +2254,6 @@ def _assign_binary_codes(vocab):
     logger.info("built huffman tree with maximum node depth %i", max_depth)
 
 
-class Word2VecTrainables(utils.SaveLoad):
-    def __init__(self, vector_size=100, seed=1, hashfxn=hash):
-        """Represents the inner shallow neural network used to train :class:`~gensim.models.word2vec.Word2Vec`."""
-        self.hashfxn = hashfxn
-        self.layer1_size = vector_size
-        self.seed = seed
-
-    def prepare_weights(self, hs, negative, wv, update=False, vocabulary=None):
-        """Build tables and model weights based on final vocabulary settings."""
-        # set initial input/projection and hidden weights
-        if not update:
-            self.reset_weights(hs, negative, wv)
-        else:
-            self.update_weights(hs, negative, wv)
-
-    @deprecated("Use gensim.models.keyedvectors.pseudorandom_weak_vector() directly")
-    def seeded_vector(self, seed_string, vector_size):
-        return pseudorandom_weak_vector(vector_size, seed_string=seed_string, hashfxn=self.hashfxn)
-
-    def reset_weights(self, hs, negative, wv):
-        """Reset all projection weights to an initial (untrained) state, but keep the existing vocabulary."""
-        logger.info("resetting layer weights")
-        wv.resize_vectors()
-        wv.randomly_initialize_vectors(seed=self.seed)
-        if hs:
-            self.syn1 = zeros((len(wv.vocab), self.layer1_size), dtype=REAL)
-        if negative:
-            self.syn1neg = zeros((len(wv.vocab), self.layer1_size), dtype=REAL)
-
-        self.vectors_lockf = ones(len(wv.vocab), dtype=REAL)  # zeros suppress learning
-
-    def update_weights(self, hs, negative, wv):
-        """Copy all the existing weights, and reset the weights for the newly added vocabulary."""
-        logger.info("updating layer weights")
-        new_range = wv.resize_vectors()
-        gained_vocab = len(new_range)
-        wv.randomly_initialize_vectors(indexes=new_range)
-
-        # Raise an error if an online update is run before initial training on a corpus
-        if not len(wv.vectors):
-            raise RuntimeError(
-                "You cannot do an online vocabulary-update of a model which has no prior vocabulary. "
-                "First build the vocabulary of your model with a corpus before doing an online update."
-            )
-
-        if hs:
-            self.syn1 = vstack([self.syn1, zeros((gained_vocab, self.layer1_size), dtype=REAL)])
-        if negative:
-            pad = zeros((gained_vocab, self.layer1_size), dtype=REAL)
-            self.syn1neg = vstack([self.syn1neg, pad])
-        wv.vectors_norm = None
-
-        # do not suppress learning for already learned words
-        self.vectors_lockf = ones(len(wv.vocab), dtype=REAL)  # zeros suppress learning
-
-
 # Example: ./word2vec.py -train data.txt -output vec.txt -size 200 -window 5 -sample 1e-4 \
 # -negative 5 -hs 0 -binary 0 -cbow 1 -iter 3
 if __name__ == "__main__":
diff --git a/gensim/models/word2vec_inner.pyx b/gensim/models/word2vec_inner.pyx
index 0576773bd5..076ff54b1c 100755
--- a/gensim/models/word2vec_inner.pyx
+++ b/gensim/models/word2vec_inner.pyx
@@ -467,7 +467,7 @@ cdef unsigned long long w2v_fast_sentence_cbow_neg(
 cdef init_w2v_config(Word2VecConfig *c, model, alpha, compute_loss, _work, _neu1=None):
     c[0].hs = model.hs
     c[0].negative = model.negative
-    c[0].sample = (model.vocabulary.sample != 0)
+    c[0].sample = (model.sample != 0)
     c[0].cbow_mean = model.cbow_mean
     c[0].window = model.window
     c[0].workers = model.workers
@@ -476,17 +476,17 @@ cdef init_w2v_config(Word2VecConfig *c, model, alpha, compute_loss, _work, _neu1
     c[0].running_training_loss = model.running_training_loss
 
     c[0].syn0 = <REAL_t *>(np.PyArray_DATA(model.wv.vectors))
-    c[0].word_locks = <REAL_t *>(np.PyArray_DATA(model.trainables.vectors_lockf))
+    c[0].word_locks = <REAL_t *>(np.PyArray_DATA(model.wv.vectors_lockf))
     c[0].alpha = alpha
     c[0].size = model.wv.vector_size
 
     if c[0].hs:
-        c[0].syn1 = <REAL_t *>(np.PyArray_DATA(model.trainables.syn1))
+        c[0].syn1 = <REAL_t *>(np.PyArray_DATA(model.syn1))
 
     if c[0].negative:
-        c[0].syn1neg = <REAL_t *>(np.PyArray_DATA(model.trainables.syn1neg))
-        c[0].cum_table = <np.uint32_t *>(np.PyArray_DATA(model.vocabulary.cum_table))
-        c[0].cum_table_len = len(model.vocabulary.cum_table)
+        c[0].syn1neg = <REAL_t *>(np.PyArray_DATA(model.syn1neg))
+        c[0].cum_table = <np.uint32_t *>(np.PyArray_DATA(model.cum_table))
+        c[0].cum_table_len = len(model.cum_table)
     if c[0].negative or c[0].sample:
         c[0].next_random = (2**24) * model.random.randint(0, 2**24) + model.random.randint(0, 2**24)
 
@@ -709,7 +709,7 @@ def score_sentence_sg(model, sentence, _work):
     cdef long result = 0
     cdef int sentence_len
 
-    c.syn1 = <REAL_t *>(np.PyArray_DATA(model.trainables.syn1))
+    c.syn1 = <REAL_t *>(np.PyArray_DATA(model.syn1))
 
     # convert Python structures to primitive types, so we can release the GIL
     c.work = <REAL_t *>np.PyArray_DATA(_work)
@@ -804,7 +804,7 @@ def score_sentence_cbow(model, sentence, _work, _neu1):
     cdef int i, j, k
     cdef long result = 0
 
-    c.syn1 = <REAL_t *>(np.PyArray_DATA(model.trainables.syn1))
+    c.syn1 = <REAL_t *>(np.PyArray_DATA(model.syn1))
 
     # convert Python structures to primitive types, so we can release the GIL
     c.work = <REAL_t *>np.PyArray_DATA(_work)
diff --git a/gensim/models/wrappers/__init__.py b/gensim/models/wrappers/__init__.py
index 9cd14ea8e7..330abce500 100644
--- a/gensim/models/wrappers/__init__.py
+++ b/gensim/models/wrappers/__init__.py
@@ -5,6 +5,5 @@
 from .ldamallet import LdaMallet  # noqa:F401
 from .dtmmodel import DtmModel  # noqa:F401
 from .ldavowpalwabbit import LdaVowpalWabbit  # noqa:F401
-from .fasttext import FastText  # noqa:F401
 from .wordrank import Wordrank  # noqa:F401
 from .varembed import VarEmbed  # noqa:F401
diff --git a/gensim/models/wrappers/fasttext.py b/gensim/models/wrappers/fasttext.py
deleted file mode 100644
index bca36c7cb9..0000000000
--- a/gensim/models/wrappers/fasttext.py
+++ /dev/null
@@ -1,40 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Author: Jayant Jain <jayantjain1992@gmail.com>
-# Copyright (C) 2017 Radim Rehurek <me@radimrehurek.com>
-# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html
-
-
-"""
-Warnings
---------
-.. deprecated:: 3.2.0
-   Use :mod:`gensim.models.fasttext` instead.
-
-
-
-Python wrapper around word representation learning from FastText, a library for efficient learning
-of word representations and sentence classification [1].
-
-This module allows training a word embedding from a training corpus with the additional ability
-to obtain word vectors for out-of-vocabulary words, using the fastText C implementation.
-
-The wrapped model can NOT be updated with new documents for online training -- use gensim's
-`Word2Vec` for that.
-
-Example:
-
-.. sourcecode:: pycon
-
-    >>> from gensim.models.wrappers import FastText
-    >>> model = FastText.train('/Users/kofola/fastText/fasttext', corpus_file='text8')
-    >>> print(model['forests'])  # prints vector for given out-of-vocabulary word
-
-.. [1] https://github.com/facebookresearch/fastText#enriching-word-vectors-with-subword-information
-
-
-
-"""
-from gensim.models.deprecated.fasttext_wrapper import FastText, FastTextKeyedVectors  # noqa:F401
-from gensim.models.deprecated.fasttext_wrapper import ft_hash, compute_ngrams  # noqa:F401
diff --git a/gensim/sklearn_api/d2vmodel.py b/gensim/sklearn_api/d2vmodel.py
index fa154a2497..c49d1b2baf 100644
--- a/gensim/sklearn_api/d2vmodel.py
+++ b/gensim/sklearn_api/d2vmodel.py
@@ -36,9 +36,10 @@ class D2VTransformer(TransformerMixin, BaseEstimator):
 
     """
     def __init__(self, dm_mean=None, dm=1, dbow_words=0, dm_concat=0, dm_tag_count=1, docvecs=None,
-                 docvecs_mapfile=None, comment=None, trim_rule=None, size=100, alpha=0.025, window=5, min_count=5,
-                 max_vocab_size=None, sample=1e-3, seed=1, workers=3, min_alpha=0.0001, hs=0, negative=5, cbow_mean=1,
-                 hashfxn=hash, iter=5, sorted_vocab=1, batch_words=10000):
+                 docvecs_mapfile=None, comment=None, trim_rule=None, vector_size=100, alpha=0.025, window=5,
+                 min_count=5, max_vocab_size=None, sample=1e-3, seed=1, workers=3, min_alpha=0.0001,
+                 hs=0, negative=5, cbow_mean=1,
+                 hashfxn=hash, epochs=5, sorted_vocab=1, batch_words=10000):
         """
 
         Parameters
@@ -72,7 +73,7 @@ def __init__(self, dm_mean=None, dm=1, dbow_words=0, dm_concat=0, dm_tag_count=1
             be trimmed away (:attr:`gensim.utils.RULE_DISCARD`), or handled using the default
             (:attr:`gensim.utils.RULE_DEFAULT`).
             If None, then :func:`gensim.utils.keep_vocab_item` will be used.
-        size : int, optional
+        vector_size : int, optional
             Dimensionality of the feature vectors.
         alpha : float, optional
             The initial learning rate.
@@ -108,7 +109,7 @@ def __init__(self, dm_mean=None, dm=1, dbow_words=0, dm_concat=0, dm_tag_count=1
             Same as `dm_mean`, **unused**.
         hashfxn : function (object -> int), optional
             A hashing function. Used to create an initial random reproducible vector by hashing the random seed.
-        iter : int, optional
+        epochs : int, optional
             Number of epochs to iterate through the corpus.
         sorted_vocab : bool, optional
             Whether the vocabulary should be sorted internally.
@@ -128,7 +129,7 @@ def __init__(self, dm_mean=None, dm=1, dbow_words=0, dm_concat=0, dm_tag_count=1
         self.trim_rule = trim_rule
 
         # attributes associated with gensim.models.Word2Vec
-        self.size = size
+        self.vector_size = vector_size
         self.alpha = alpha
         self.window = window
         self.min_count = min_count
@@ -141,7 +142,7 @@ def __init__(self, dm_mean=None, dm=1, dbow_words=0, dm_concat=0, dm_tag_count=1
         self.negative = negative
         self.cbow_mean = int(cbow_mean)
         self.hashfxn = hashfxn
-        self.iter = iter
+        self.epochs = epochs
         self.sorted_vocab = sorted_vocab
         self.batch_words = batch_words
 
@@ -167,11 +168,11 @@ def fit(self, X, y=None):
             documents=d2v_sentences, dm_mean=self.dm_mean, dm=self.dm,
             dbow_words=self.dbow_words, dm_concat=self.dm_concat, dm_tag_count=self.dm_tag_count,
             docvecs=self.docvecs, docvecs_mapfile=self.docvecs_mapfile, comment=self.comment,
-            trim_rule=self.trim_rule, vector_size=self.size, alpha=self.alpha, window=self.window,
+            trim_rule=self.trim_rule, vector_size=self.vector_size, alpha=self.alpha, window=self.window,
             min_count=self.min_count, max_vocab_size=self.max_vocab_size, sample=self.sample,
             seed=self.seed, workers=self.workers, min_alpha=self.min_alpha, hs=self.hs,
             negative=self.negative, cbow_mean=self.cbow_mean, hashfxn=self.hashfxn,
-            epochs=self.iter, sorted_vocab=self.sorted_vocab, batch_words=self.batch_words
+            epochs=self.epochs, sorted_vocab=self.sorted_vocab, batch_words=self.batch_words
         )
         return self
 
diff --git a/gensim/sklearn_api/ftmodel.py b/gensim/sklearn_api/ftmodel.py
index a1edd6c338..7acd22cfc2 100644
--- a/gensim/sklearn_api/ftmodel.py
+++ b/gensim/sklearn_api/ftmodel.py
@@ -18,7 +18,7 @@
     >>> from gensim.sklearn_api import FTTransformer
     >>>
     >>> # Create a model to represent each word by a 10 dimensional vector.
-    >>> model = FTTransformer(size=10, min_count=1, seed=1)
+    >>> model = FTTransformer(vector_size=10, min_count=1, seed=1)
     >>>
     >>> # What is the vector representations of the word 'graph' and 'system'?
     >>> wordvecs = model.fit(common_texts).transform(['graph', 'system'])
@@ -56,10 +56,10 @@ class FTTransformer(TransformerMixin, BaseEstimator):
     Information <https://arxiv.org/abs/1607.04606>`_.
 
     """
-    def __init__(self, sg=0, hs=0, size=100, alpha=0.025, window=5, min_count=5,
+    def __init__(self, sg=0, hs=0, vector_size=100, alpha=0.025, window=5, min_count=5,
                  max_vocab_size=None, word_ngrams=1, sample=1e-3, seed=1,
                  workers=3, min_alpha=0.0001, negative=5, ns_exponent=0.75,
-                 cbow_mean=1, hashfxn=hash, iter=5, null_word=0, min_n=3,
+                 cbow_mean=1, hashfxn=hash, epochs=5, null_word=0, min_n=3,
                  max_n=6, sorted_vocab=1, bucket=2000000, trim_rule=None,
                  batch_words=10000):
         """
@@ -71,7 +71,7 @@ def __init__(self, sg=0, hs=0, size=100, alpha=0.025, window=5, min_count=5,
         hs : {1,0}, optional
             If 1, hierarchical softmax will be used for model training.
             If set to 0, and `negative` is non-zero, negative sampling will be used.
-        size : int, optional
+        vector_size : int, optional
             Dimensionality of the word vectors.
         alpha : float, optional
             The initial learning rate.
@@ -113,7 +113,7 @@ def __init__(self, sg=0, hs=0, size=100, alpha=0.025, window=5, min_count=5,
             If 0, use the sum of the context word vectors. If 1, use the mean, only applies when cbow is used.
         hashfxn : function, optional
             Hash function to use to randomly initialize weights, for increased training reproducibility.
-        iter : int, optional
+        epochs : int, optional
             Number of iterations (epochs) over the corpus.
         min_n : int, optional
             Minimum length of char n-grams to be used for training word representations.
@@ -148,7 +148,7 @@ def __init__(self, sg=0, hs=0, size=100, alpha=0.025, window=5, min_count=5,
         self.gensim_model = None
         self.sg = sg
         self.hs = hs
-        self.size = size
+        self.vector_size = vector_size
         self.alpha = alpha
         self.window = window
         self.min_count = min_count
@@ -162,7 +162,7 @@ def __init__(self, sg=0, hs=0, size=100, alpha=0.025, window=5, min_count=5,
         self.ns_exponent = ns_exponent
         self.cbow_mean = cbow_mean
         self.hashfxn = hashfxn
-        self.iter = iter
+        self.epochs = epochs
         self.null_word = null_word
         self.min_n = min_n
         self.max_n = max_n
@@ -189,13 +189,13 @@ def fit(self, X, y=None):
 
         """
         self.gensim_model = models.FastText(
-                sentences=X, sg=self.sg, hs=self.hs, size=self.size,
+                sentences=X, sg=self.sg, hs=self.hs, vector_size=self.vector_size,
                 alpha=self.alpha, window=self.window, min_count=self.min_count,
                 max_vocab_size=self.max_vocab_size, word_ngrams=self.word_ngrams,
                 sample=self.sample, seed=self.seed, workers=self.workers,
                 min_alpha=self.min_alpha, negative=self.negative,
                 ns_exponent=self.ns_exponent, cbow_mean=self.cbow_mean,
-                hashfxn=self.hashfxn, iter=self.iter, null_word=self.null_word,
+                hashfxn=self.hashfxn, epochs=self.epochs, null_word=self.null_word,
                 min_n=self.min_n, max_n=self.max_n, sorted_vocab=self.sorted_vocab,
                 bucket=self.bucket, trim_rule=self.trim_rule,
                 batch_words=self.batch_words
@@ -212,7 +212,7 @@ def transform(self, words):
 
         Returns
         -------
-        np.ndarray of shape [`len(words)`, `size`]
+        np.ndarray of shape [`len(words)`, `vector_size`]
             A 2D array where each row is the vector of one word.
 
         """
@@ -225,4 +225,4 @@ def transform(self, words):
         if isinstance(words, six.string_types):
             words = [words]
         vectors = [self.gensim_model.wv[word] for word in words]
-        return np.reshape(np.array(vectors), (len(words), self.size))
+        return np.reshape(np.array(vectors), (len(words), self.vector_size))
diff --git a/gensim/sklearn_api/w2vmodel.py b/gensim/sklearn_api/w2vmodel.py
index 07091c2dde..ae64b56e3e 100644
--- a/gensim/sklearn_api/w2vmodel.py
+++ b/gensim/sklearn_api/w2vmodel.py
@@ -18,7 +18,7 @@
     >>> from gensim.sklearn_api import W2VTransformer
     >>>
     >>> # Create a model to represent each word by a 10 dimensional vector.
-    >>> model = W2VTransformer(size=10, min_count=1, seed=1)
+    >>> model = W2VTransformer(vector_size=10, min_count=1, seed=1)
     >>>
     >>> # What is the vector representation of the word 'graph'?
     >>> wordvecs = model.fit(common_texts).transform(['graph', 'system'])
@@ -40,14 +40,14 @@ class W2VTransformer(TransformerMixin, BaseEstimator):
     Estimation of Word Representations in Vector Space" <https://arxiv.org/abs/1301.3781>`_.
 
     """
-    def __init__(self, size=100, alpha=0.025, window=5, min_count=5, max_vocab_size=None, sample=1e-3, seed=1,
-                 workers=3, min_alpha=0.0001, sg=0, hs=0, negative=5, cbow_mean=1, hashfxn=hash, iter=5, null_word=0,
+    def __init__(self, vector_size=100, alpha=0.025, window=5, min_count=5, max_vocab_size=None, sample=1e-3, seed=1,
+                 workers=3, min_alpha=0.0001, sg=0, hs=0, negative=5, cbow_mean=1, hashfxn=hash, epochs=5, null_word=0,
                  trim_rule=None, sorted_vocab=1, batch_words=10000):
         """
 
         Parameters
         ----------
-        size : int
+        vector_size : int
             Dimensionality of the feature vectors.
         alpha : float
             The initial learning rate.
@@ -85,7 +85,7 @@ def __init__(self, size=100, alpha=0.025, window=5, min_count=5, max_vocab_size=
             If 0, use the sum of the context word vectors. If 1, use the mean, only applies when cbow is used.
         hashfxn : callable (object -> int), optional
             A hashing function. Used to create an initial random reproducible vector by hashing the random seed.
-        iter : int
+        epochs : int
             Number of iterations (epochs) over the corpus.
         null_word : int {1, 0}
             If 1, a null pseudo-word will be created for padding when using concatenative L1 (run-of-words)
@@ -106,7 +106,7 @@ def __init__(self, size=100, alpha=0.025, window=5, min_count=5, max_vocab_size=
 
         """
         self.gensim_model = None
-        self.size = size
+        self.vector_size = vector_size
         self.alpha = alpha
         self.window = window
         self.min_count = min_count
@@ -120,7 +120,7 @@ def __init__(self, size=100, alpha=0.025, window=5, min_count=5, max_vocab_size=
         self.negative = negative
         self.cbow_mean = int(cbow_mean)
         self.hashfxn = hashfxn
-        self.iter = iter
+        self.epochs = epochs
         self.null_word = null_word
         self.trim_rule = trim_rule
         self.sorted_vocab = sorted_vocab
@@ -144,11 +144,11 @@ def fit(self, X, y=None):
 
         """
         self.gensim_model = models.Word2Vec(
-            sentences=X, size=self.size, alpha=self.alpha,
+            sentences=X, vector_size=self.vector_size, alpha=self.alpha,
             window=self.window, min_count=self.min_count, max_vocab_size=self.max_vocab_size,
             sample=self.sample, seed=self.seed, workers=self.workers, min_alpha=self.min_alpha,
             sg=self.sg, hs=self.hs, negative=self.negative, cbow_mean=self.cbow_mean,
-            hashfxn=self.hashfxn, iter=self.iter, null_word=self.null_word, trim_rule=self.trim_rule,
+            hashfxn=self.hashfxn, epochs=self.epochs, null_word=self.null_word, trim_rule=self.trim_rule,
             sorted_vocab=self.sorted_vocab, batch_words=self.batch_words
         )
         return self
@@ -163,7 +163,7 @@ def transform(self, words):
 
         Returns
         -------
-        np.ndarray of shape [`len(words)`, `size`]
+        np.ndarray of shape [`len(words)`, `vector_size`]
             A 2D array where each row is the vector of one word.
 
         """
@@ -176,7 +176,7 @@ def transform(self, words):
         if isinstance(words, six.string_types):
             words = [words]
         vectors = [self.gensim_model.wv[word] for word in words]
-        return np.reshape(np.array(vectors), (len(words), self.size))
+        return np.reshape(np.array(vectors), (len(words), self.vector_size))
 
     def partial_fit(self, X):
         raise NotImplementedError(
diff --git a/gensim/test/test_doc2vec.py b/gensim/test/test_doc2vec.py
index 0dbd70e5a4..6eb09671de 100644
--- a/gensim/test/test_doc2vec.py
+++ b/gensim/test/test_doc2vec.py
@@ -75,7 +75,7 @@ def test_persistence(self):
     @unittest.skipIf(os.name == 'nt' and six.PY2, "corpus_file training is not supported on Windows + Py27")
     def test_persistence_fromfile(self):
         """Test storing/loading the entire model."""
-        with temporary_file(get_tmpfile('gensim_word2vec.tst')) as corpus_file:
+        with temporary_file(get_tmpfile('gensim_doc2vec.tst')) as corpus_file:
             save_lee_corpus_as_line_sentence(corpus_file)
 
             tmpf = get_tmpfile('gensim_doc2vec.tst')
@@ -102,7 +102,7 @@ def testPersistenceWord2VecFormat(self):
         binary_model_dv = keyedvectors.KeyedVectors.load_word2vec_format(test_word, binary=True)
         self.assertEqual(len(model.wv.vocab), len(binary_model_dv.vocab))
 
-    def testLoadOldModel(self):
+    def obsolete_testLoadOldModel(self):
         """Test loading an old doc2vec model from indeterminate version"""
 
         model_file = 'doc2vec_old'  # which version?!?
@@ -111,17 +111,17 @@ def testLoadOldModel(self):
         self.assertTrue(len(model.wv.vocab) == 3955)
         self.assertTrue(len(model.wv.index2word) == 3955)
         self.assertIsNone(model.corpus_total_words)
-        self.assertTrue(model.trainables.syn1neg.shape == (len(model.wv.vocab), model.vector_size))
-        self.assertTrue(model.trainables.vectors_lockf.shape == (3955, ))
-        self.assertTrue(model.vocabulary.cum_table.shape == (3955, ))
+        self.assertTrue(model.syn1neg.shape == (len(model.wv.vocab), model.vector_size))
+        self.assertTrue(model.wv.vectors_lockf.shape == (3955, ))
+        self.assertTrue(model.cum_table.shape == (3955, ))
 
         self.assertTrue(model.docvecs.vectors.shape == (300, 100))
-        self.assertTrue(model.trainables.vectors_docs_lockf.shape == (300, ))
+        self.assertTrue(model.docvecs.vectors_lockf.shape == (300, ))
         self.assertTrue(len(model.docvecs) == 300)
 
         self.model_sanity(model)
 
-    def testLoadOldModelSeparates(self):
+    def obsolete_testLoadOldModelSeparates(self):
         """Test loading an old doc2vec model from indeterminate version"""
 
         # Model stored in multiple files
@@ -131,16 +131,16 @@ def testLoadOldModelSeparates(self):
         self.assertTrue(len(model.wv.vocab) == 3955)
         self.assertTrue(len(model.wv.index2word) == 3955)
         self.assertIsNone(model.corpus_total_words)
-        self.assertTrue(model.trainables.syn1neg.shape == (len(model.wv.vocab), model.vector_size))
-        self.assertTrue(model.trainables.vectors_lockf.shape == (3955, ))
-        self.assertTrue(model.vocabulary.cum_table.shape == (3955, ))
+        self.assertTrue(model.syn1neg.shape == (len(model.wv.vocab), model.vector_size))
+        self.assertTrue(model.wv.vectors_lockf.shape == (3955, ))
+        self.assertTrue(model.cum_table.shape == (3955, ))
         self.assertTrue(model.docvecs.vectors.shape == (300, 100))
-        self.assertTrue(model.trainables.vectors_docs_lockf.shape == (300, ))
+        self.assertTrue(model.docvecs.vectors_lockf.shape == (300, ))
         self.assertTrue(len(model.docvecs) == 300)
 
         self.model_sanity(model)
 
-    def test_load_old_models_pre_1_0(self):
+    def obsolete_test_load_old_models_pre_1_0(self):
         """Test loading pre-1.0 models"""
         model_file = 'd2v-lee-v0.13.0'
         model = doc2vec.Doc2Vec.load(datapath(model_file))
@@ -153,7 +153,7 @@ def test_load_old_models_pre_1_0(self):
         for old_version in old_versions:
             self._check_old_version(old_version)
 
-    def test_load_old_models_1_x(self):
+    def obsolete_test_load_old_models_1_x(self):
         """Test loading 1.x models"""
         old_versions = [
             '1.0.0', '1.0.1',
@@ -161,7 +161,7 @@ def test_load_old_models_1_x(self):
         for old_version in old_versions:
             self._check_old_version(old_version)
 
-    def test_load_old_models_2_x(self):
+    def obsolete_test_load_old_models_2_x(self):
         """Test loading 2.x models"""
         old_versions = [
             '2.0.0', '2.1.0', '2.2.0', '2.3.0',
@@ -169,10 +169,18 @@ def test_load_old_models_2_x(self):
         for old_version in old_versions:
             self._check_old_version(old_version)
 
-    def test_load_old_models_3_x(self):
+    def obsolete_test_load_old_models_pre_3_3(self):
         """Test loading 3.x models"""
         old_versions = [
-            '3.0.0', '3.1.0', '3.2.0', '3.3.0', '3.4.0'
+            '3.2.0', '3.1.0', '3.0.0'
+        ]
+        for old_version in old_versions:
+            self._check_old_version(old_version)
+
+    def obsolete_test_load_old_models_post_3_2(self):
+        """Test loading 3.x models"""
+        old_versions = [
+            '3.4.0', '3.3.0',
         ]
         for old_version in old_versions:
             self._check_old_version(old_version)
@@ -201,12 +209,12 @@ def _check_old_version(self, old_version):
     def testDoc2vecTrainParameters(self):
 
         model = doc2vec.Doc2Vec(vector_size=50)
-        model.build_vocab(documents=list_corpus)
+        model.build_vocab(corpus_iterable=list_corpus)
 
         self.assertRaises(TypeError, model.train, corpus_file=11111)
-        self.assertRaises(TypeError, model.train, documents=11111)
-        self.assertRaises(TypeError, model.train, documents=sentences, corpus_file='test')
-        self.assertRaises(TypeError, model.train, documents=None, corpus_file=None)
+        self.assertRaises(TypeError, model.train, corpus_iterable=11111)
+        self.assertRaises(TypeError, model.train, corpus_iterable=sentences, corpus_file='test')
+        self.assertRaises(TypeError, model.train, corpus_iterable=None, corpus_file=None)
         self.assertRaises(TypeError, model.train, corpus_file=sentences)
 
     @unittest.skipIf(os.name == 'nt', "See another test for Windows below")
@@ -418,10 +426,10 @@ def model_sanity(self, model, keep_training=True):
 
         # keep training after save
         if keep_training:
-            tmpf = get_tmpfile('gensim_doc2vec.tst')
+            tmpf = get_tmpfile('gensim_doc2vec_resave.tst')
             model.save(tmpf)
             loaded = doc2vec.Doc2Vec.load(tmpf)
-            loaded.train(documents=sentences, total_examples=loaded.corpus_count, epochs=loaded.epochs)
+            loaded.train(corpus_iterable=sentences, total_examples=loaded.corpus_count, epochs=loaded.epochs)
 
     def test_training(self):
         """Test doc2vec training."""
@@ -440,7 +448,7 @@ def test_training(self):
     @unittest.skipIf(os.name == 'nt' and six.PY2, "corpus_file training is not supported on Windows + Py27")
     def test_training_fromfile(self):
         """Test doc2vec training."""
-        with temporary_file(get_tmpfile('gensim_word2vec.tst')) as corpus_file:
+        with temporary_file(get_tmpfile('gensim_doc2vec.tst')) as corpus_file:
             save_lee_corpus_as_line_sentence(corpus_file)
 
             model = doc2vec.Doc2Vec(vector_size=100, min_count=2, epochs=20, workers=1)
@@ -461,7 +469,7 @@ def test_dbow_hs(self):
     @unittest.skipIf(os.name == 'nt' and six.PY2, "corpus_file training is not supported on Windows + Py27")
     def test_dbow_hs_fromfile(self):
         """Test DBOW doc2vec training."""
-        with temporary_file(get_tmpfile('gensim_word2vec.tst')) as corpus_file:
+        with temporary_file(get_tmpfile('gensim_doc2vec.tst')) as corpus_file:
             save_lee_corpus_as_line_sentence(corpus_file)
             model = doc2vec.Doc2Vec(corpus_file=corpus_file, dm=0, hs=1, negative=0, min_count=2, epochs=20)
             self.model_sanity(model)
@@ -477,7 +485,7 @@ def test_dmm_hs(self):
     @unittest.skipIf(os.name == 'nt' and six.PY2, "corpus_file training is not supported on Windows + Py27")
     def test_dmm_hs_fromfile(self):
         """Test DBOW doc2vec training."""
-        with temporary_file(get_tmpfile('gensim_word2vec.tst')) as corpus_file:
+        with temporary_file(get_tmpfile('gensim_doc2vec.tst')) as corpus_file:
             save_lee_corpus_as_line_sentence(corpus_file)
             model = doc2vec.Doc2Vec(
                 list_corpus, dm=1, dm_mean=1, vector_size=24, window=4,
@@ -496,7 +504,7 @@ def test_dms_hs(self):
     @unittest.skipIf(os.name == 'nt' and six.PY2, "corpus_file training is not supported on Windows + Py27")
     def test_dms_hs_fromfile(self):
         """Test DBOW doc2vec training."""
-        with temporary_file(get_tmpfile('gensim_word2vec.tst')) as corpus_file:
+        with temporary_file(get_tmpfile('gensim_doc2vec.tst')) as corpus_file:
             save_lee_corpus_as_line_sentence(corpus_file)
             model = doc2vec.Doc2Vec(
                 list_corpus, dm=1, dm_mean=0, vector_size=24, window=4, hs=1,
@@ -515,7 +523,7 @@ def test_dmc_hs(self):
     @unittest.skipIf(os.name == 'nt' and six.PY2, "corpus_file training is not supported on Windows + Py27")
     def test_dmc_hs_fromfile(self):
         """Test DBOW doc2vec training."""
-        with temporary_file(get_tmpfile('gensim_word2vec.tst')) as corpus_file:
+        with temporary_file(get_tmpfile('gensim_doc2vec.tst')) as corpus_file:
             save_lee_corpus_as_line_sentence(corpus_file)
             model = doc2vec.Doc2Vec(
                 list_corpus, dm=1, dm_concat=1, vector_size=24, window=4,
@@ -531,7 +539,7 @@ def test_dbow_neg(self):
     @unittest.skipIf(os.name == 'nt' and six.PY2, "corpus_file training is not supported on Windows + Py27")
     def test_dbow_neg_fromfile(self):
         """Test DBOW doc2vec training."""
-        with temporary_file(get_tmpfile('gensim_word2vec.tst')) as corpus_file:
+        with temporary_file(get_tmpfile('gensim_doc2vec.tst')) as corpus_file:
             save_lee_corpus_as_line_sentence(corpus_file)
             model = doc2vec.Doc2Vec(list_corpus, dm=0, hs=0, negative=10, min_count=2, epochs=20)
             self.model_sanity(model)
@@ -547,7 +555,7 @@ def test_dmm_neg(self):
     @unittest.skipIf(os.name == 'nt' and six.PY2, "corpus_file training is not supported on Windows + Py27")
     def test_dmm_neg_fromfile(self):
         """Test DBOW doc2vec training."""
-        with temporary_file(get_tmpfile('gensim_word2vec.tst')) as corpus_file:
+        with temporary_file(get_tmpfile('gensim_doc2vec.tst')) as corpus_file:
             save_lee_corpus_as_line_sentence(corpus_file)
             model = doc2vec.Doc2Vec(
                 list_corpus, dm=1, dm_mean=1, vector_size=24, window=4, hs=0,
@@ -566,7 +574,7 @@ def test_dms_neg(self):
     @unittest.skipIf(os.name == 'nt' and six.PY2, "corpus_file training is not supported on Windows + Py27")
     def test_dms_neg_fromfile(self):
         """Test DBOW doc2vec training."""
-        with temporary_file(get_tmpfile('gensim_word2vec.tst')) as corpus_file:
+        with temporary_file(get_tmpfile('gensim_doc2vec.tst')) as corpus_file:
             save_lee_corpus_as_line_sentence(corpus_file)
             model = doc2vec.Doc2Vec(
                 list_corpus, dm=1, dm_mean=0, vector_size=24, window=4, hs=0,
@@ -585,7 +593,7 @@ def test_dmc_neg(self):
     @unittest.skipIf(os.name == 'nt' and six.PY2, "corpus_file training is not supported on Windows + Py27")
     def test_dmc_neg_fromfile(self):
         """Test DBOW doc2vec training."""
-        with temporary_file(get_tmpfile('gensim_word2vec.tst')) as corpus_file:
+        with temporary_file(get_tmpfile('gensim_doc2vec.tst')) as corpus_file:
             save_lee_corpus_as_line_sentence(corpus_file)
             model = doc2vec.Doc2Vec(
                 list_corpus, dm=1, dm_concat=1, vector_size=24, window=4, hs=0,
@@ -641,9 +649,9 @@ def models_equal(self, model, model2):
         self.assertEqual(len(model.wv.vocab), len(model2.wv.vocab))
         self.assertTrue(np.allclose(model.wv.vectors, model2.wv.vectors))
         if model.hs:
-            self.assertTrue(np.allclose(model.trainables.syn1, model2.trainables.syn1))
+            self.assertTrue(np.allclose(model.syn1, model2.syn1))
         if model.negative:
-            self.assertTrue(np.allclose(model.trainables.syn1neg, model2.trainables.syn1neg))
+            self.assertTrue(np.allclose(model.syn1neg, model2.syn1neg))
         # check docvecs
         self.assertEqual(len(model.docvecs.map), len(model2.docvecs.map))
         self.assertEqual(len(model.docvecs.index2key), len(model2.docvecs.index2key))
diff --git a/gensim/test/test_fasttext.py b/gensim/test/test_fasttext.py
index 791386eb8d..1e4b431e88 100644
--- a/gensim/test/test_fasttext.py
+++ b/gensim/test/test_fasttext.py
@@ -16,14 +16,11 @@
 
 from gensim import utils
 from gensim.models.word2vec import LineSentence
-from gensim.models.fasttext import FastText as FT_gensim, _unpack, _unpack_copy
-from gensim.models.wrappers.fasttext import FastTextKeyedVectors
-from gensim.models.wrappers.fasttext import FastText as FT_wrapper
+from gensim.models.fasttext import FastText as FT_gensim, FastTextKeyedVectors, _unpack, _unpack_copy
 from gensim.models.keyedvectors import KeyedVectors
 from gensim.test.utils import datapath, get_tmpfile, temporary_file, common_texts as sentences
 import gensim.models._fasttext_bin
 from gensim.models.fasttext_inner import compute_ngrams, compute_ngrams_bytes, ft_hash_broken, ft_hash_bytes
-from gensim.models.fasttext import _unpack, _unpack_copy
 
 import gensim.models.fasttext
 
@@ -70,7 +67,7 @@ def setUp(self):
         self.test_new_model_file = datapath('lee_fasttext_new.bin')
 
     def test_training(self):
-        model = FT_gensim(size=12, min_count=1, hs=1, negative=0, seed=42, workers=1)
+        model = FT_gensim(vector_size=12, min_count=1, hs=1, negative=0, seed=42, workers=1)
         model.build_vocab(sentences)
         self.model_sanity(model)
 
@@ -90,7 +87,7 @@ def test_training(self):
         self.assertEqual(sims, sims2)
 
         # build vocab and train in one step; must be the same as above
-        model2 = FT_gensim(sentences, size=12, min_count=1, hs=1, negative=0, seed=42, workers=1)
+        model2 = FT_gensim(sentences, vector_size=12, min_count=1, hs=1, negative=0, seed=42, workers=1)
         self.models_equal(model, model2)
 
         # verify oov-word vector retrieval
@@ -102,20 +99,20 @@ def test_training(self):
 
     def testFastTextTrainParameters(self):
 
-        model = FT_gensim(size=12, min_count=1, hs=1, negative=0, seed=42, workers=1)
-        model.build_vocab(sentences=sentences)
+        model = FT_gensim(vector_size=12, min_count=1, hs=1, negative=0, seed=42, workers=1)
+        model.build_vocab(corpus_iterable=sentences)
 
         self.assertRaises(TypeError, model.train, corpus_file=11111)
-        self.assertRaises(TypeError, model.train, sentences=11111)
-        self.assertRaises(TypeError, model.train, sentences=sentences, corpus_file='test')
-        self.assertRaises(TypeError, model.train, sentences=None, corpus_file=None)
+        self.assertRaises(TypeError, model.train, corpus_iterable=11111)
+        self.assertRaises(TypeError, model.train, corpus_iterable=sentences, corpus_file='test')
+        self.assertRaises(TypeError, model.train, corpus_iterable=None, corpus_file=None)
         self.assertRaises(TypeError, model.train, corpus_file=sentences)
 
     def test_training_fromfile(self):
         with temporary_file(get_tmpfile('gensim_fasttext.tst')) as corpus_file:
             utils.save_as_line_sentence(sentences, corpus_file)
 
-            model = FT_gensim(size=12, min_count=1, hs=1, negative=0, seed=42, workers=1)
+            model = FT_gensim(vector_size=12, min_count=1, hs=1, negative=0, seed=42, workers=1)
             model.build_vocab(corpus_file=corpus_file)
             self.model_sanity(model)
 
@@ -148,9 +145,9 @@ def models_equal(self, model, model2):
         self.assertTrue(np.allclose(model.wv.vectors_ngrams, model2.wv.vectors_ngrams))
         self.assertTrue(np.allclose(model.wv.vectors, model2.wv.vectors))
         if model.hs:
-            self.assertTrue(np.allclose(model.trainables.syn1, model2.trainables.syn1))
+            self.assertTrue(np.allclose(model.syn1, model2.syn1))
         if model.negative:
-            self.assertTrue(np.allclose(model.trainables.syn1neg, model2.trainables.syn1neg))
+            self.assertTrue(np.allclose(model.syn1neg, model2.syn1neg))
         most_common_word = max(model.wv.vocab.items(), key=lambda item: item[1].count)[0]
         self.assertTrue(np.allclose(model.wv[most_common_word], model2.wv[most_common_word]))
 
@@ -243,12 +240,12 @@ def test_load_fasttext_format(self):
         actual_vec_oov = model.wv["rejection"]
         self.assertTrue(np.allclose(actual_vec_oov, expected_vec_oov, atol=1e-4))
 
-        self.assertEqual(model.vocabulary.min_count, 5)
+        self.assertEqual(model.min_count, 5)
         self.assertEqual(model.window, 5)
         self.assertEqual(model.epochs, 5)
         self.assertEqual(model.negative, 5)
-        self.assertEqual(model.vocabulary.sample, 0.0001)
-        self.assertEqual(model.trainables.bucket, 1000)
+        self.assertEqual(model.sample, 0.0001)
+        self.assertEqual(model.bucket, 1000)
         self.assertEqual(model.wv.max_n, 6)
         self.assertEqual(model.wv.min_n, 3)
         self.assertEqual(model.wv.vectors.shape, (len(model.wv.vocab), model.vector_size))
@@ -296,12 +293,12 @@ def test_load_fasttext_new_format(self):
         actual_vec_oov = new_model.wv["rejection"]
         self.assertTrue(np.allclose(actual_vec_oov, expected_vec_oov, atol=1e-4))
 
-        self.assertEqual(new_model.vocabulary.min_count, 5)
+        self.assertEqual(new_model.min_count, 5)
         self.assertEqual(new_model.window, 5)
         self.assertEqual(new_model.epochs, 5)
         self.assertEqual(new_model.negative, 5)
-        self.assertEqual(new_model.vocabulary.sample, 0.0001)
-        self.assertEqual(new_model.trainables.bucket, 1000)
+        self.assertEqual(new_model.sample, 0.0001)
+        self.assertEqual(new_model.bucket, 1000)
         self.assertEqual(new_model.wv.max_n, 6)
         self.assertEqual(new_model.wv.min_n, 3)
         self.assertEqual(new_model.wv.vectors.shape, (len(new_model.wv.vocab), new_model.vector_size))
@@ -396,8 +393,8 @@ def test_wm_distance(self):
     def test_cbow_hs_training(self):
 
         model_gensim = FT_gensim(
-            size=48, sg=0, cbow_mean=1, alpha=0.05, window=5, hs=1, negative=0,
-            min_count=5, iter=5, batch_words=1000, word_ngrams=1, sample=1e-3, min_n=3, max_n=6,
+            vector_size=48, sg=0, cbow_mean=1, alpha=0.05, window=5, hs=1, negative=0,
+            min_count=5, epochs=5, batch_words=1000, word_ngrams=1, sample=1e-3, min_n=3, max_n=6,
             sorted_vocab=1, workers=1, min_alpha=0.0)
 
         lee_data = LineSentence(datapath('lee_background.cor'))
@@ -425,8 +422,8 @@ def test_cbow_hs_training(self):
     def test_cbow_hs_training_fromfile(self):
         with temporary_file(get_tmpfile('gensim_fasttext.tst')) as corpus_file:
             model_gensim = FT_gensim(
-                size=48, sg=0, cbow_mean=1, alpha=0.05, window=5, hs=1, negative=0,
-                min_count=5, iter=5, batch_words=1000, word_ngrams=1, sample=1e-3, min_n=3, max_n=6,
+                vector_size=48, sg=0, cbow_mean=1, alpha=0.05, window=5, hs=1, negative=0,
+                min_count=5, epochs=5, batch_words=1000, word_ngrams=1, sample=1e-3, min_n=3, max_n=6,
                 sorted_vocab=1, workers=1, min_alpha=0.0)
 
             lee_data = LineSentence(datapath('lee_background.cor'))
@@ -458,8 +455,8 @@ def test_cbow_hs_training_fromfile(self):
     def test_sg_hs_training(self):
 
         model_gensim = FT_gensim(
-            size=48, sg=1, cbow_mean=1, alpha=0.025, window=5, hs=1, negative=0,
-            min_count=5, iter=5, batch_words=1000, word_ngrams=1, sample=1e-3, min_n=3, max_n=6,
+            vector_size=48, sg=1, cbow_mean=1, alpha=0.025, window=5, hs=1, negative=0,
+            min_count=5, epochs=5, batch_words=1000, word_ngrams=1, sample=1e-3, min_n=3, max_n=6,
             sorted_vocab=1, workers=1, min_alpha=0.0)
 
         lee_data = LineSentence(datapath('lee_background.cor'))
@@ -487,8 +484,8 @@ def test_sg_hs_training(self):
     def test_sg_hs_training_fromfile(self):
         with temporary_file(get_tmpfile('gensim_fasttext.tst')) as corpus_file:
             model_gensim = FT_gensim(
-                size=48, sg=1, cbow_mean=1, alpha=0.025, window=5, hs=1, negative=0,
-                min_count=5, iter=5, batch_words=1000, word_ngrams=1, sample=1e-3, min_n=3, max_n=6,
+                vector_size=48, sg=1, cbow_mean=1, alpha=0.025, window=5, hs=1, negative=0,
+                min_count=5, epochs=5, batch_words=1000, word_ngrams=1, sample=1e-3, min_n=3, max_n=6,
                 sorted_vocab=1, workers=1, min_alpha=0.0)
 
             lee_data = LineSentence(datapath('lee_background.cor'))
@@ -520,8 +517,8 @@ def test_sg_hs_training_fromfile(self):
     def test_cbow_neg_training(self):
 
         model_gensim = FT_gensim(
-            size=48, sg=0, cbow_mean=1, alpha=0.05, window=5, hs=0, negative=5,
-            min_count=5, iter=5, batch_words=1000, word_ngrams=1, sample=1e-3, min_n=3, max_n=6,
+            vector_size=48, sg=0, cbow_mean=1, alpha=0.05, window=5, hs=0, negative=5,
+            min_count=5, epochs=5, batch_words=1000, word_ngrams=1, sample=1e-3, min_n=3, max_n=6,
             sorted_vocab=1, workers=1, min_alpha=0.0)
 
         lee_data = LineSentence(datapath('lee_background.cor'))
@@ -549,8 +546,8 @@ def test_cbow_neg_training(self):
     def test_cbow_neg_training_fromfile(self):
         with temporary_file(get_tmpfile('gensim_fasttext.tst')) as corpus_file:
             model_gensim = FT_gensim(
-                size=48, sg=0, cbow_mean=1, alpha=0.05, window=5, hs=0, negative=5,
-                min_count=5, iter=5, batch_words=1000, word_ngrams=1, sample=1e-3, min_n=3, max_n=6,
+                vector_size=48, sg=0, cbow_mean=1, alpha=0.05, window=5, hs=0, negative=5,
+                min_count=5, epochs=5, batch_words=1000, word_ngrams=1, sample=1e-3, min_n=3, max_n=6,
                 sorted_vocab=1, workers=1, min_alpha=0.0)
 
             lee_data = LineSentence(datapath('lee_background.cor'))
@@ -582,8 +579,8 @@ def test_cbow_neg_training_fromfile(self):
     def test_sg_neg_training(self):
 
         model_gensim = FT_gensim(
-            size=48, sg=1, cbow_mean=1, alpha=0.025, window=5, hs=0, negative=5,
-            min_count=5, iter=5, batch_words=1000, word_ngrams=1, sample=1e-3, min_n=3, max_n=6,
+            vector_size=48, sg=1, cbow_mean=1, alpha=0.025, window=5, hs=0, negative=5,
+            min_count=5, epochs=5, batch_words=1000, word_ngrams=1, sample=1e-3, min_n=3, max_n=6,
             sorted_vocab=1, workers=1, min_alpha=0.0)
 
         lee_data = LineSentence(datapath('lee_background.cor'))
@@ -611,8 +608,8 @@ def test_sg_neg_training(self):
     def test_sg_neg_training_fromfile(self):
         with temporary_file(get_tmpfile('gensim_fasttext.tst')) as corpus_file:
             model_gensim = FT_gensim(
-                size=48, sg=1, cbow_mean=1, alpha=0.025, window=5, hs=0, negative=5,
-                min_count=5, iter=5, batch_words=1000, word_ngrams=1, sample=1e-3, min_n=3, max_n=6,
+                vector_size=48, sg=1, cbow_mean=1, alpha=0.025, window=5, hs=0, negative=5,
+                min_count=5, epochs=5, batch_words=1000, word_ngrams=1, sample=1e-3, min_n=3, max_n=6,
                 sorted_vocab=1, workers=1, min_alpha=0.0)
 
             lee_data = LineSentence(datapath('lee_background.cor'))
@@ -642,7 +639,7 @@ def test_sg_neg_training_fromfile(self):
             self.assertGreaterEqual(overlap_count, 2)
 
     def test_online_learning(self):
-        model_hs = FT_gensim(sentences, size=12, min_count=1, seed=42, hs=1, negative=0)
+        model_hs = FT_gensim(sentences, vector_size=12, min_count=1, seed=42, hs=1, negative=0)
         self.assertTrue(len(model_hs.wv.vocab), 12)
         self.assertTrue(model_hs.wv.vocab['graph'].count, 3)
         model_hs.build_vocab(new_sentences, update=True)  # update vocab
@@ -656,7 +653,7 @@ def test_online_learning_fromfile(self):
             utils.save_as_line_sentence(sentences, corpus_file)
             utils.save_as_line_sentence(new_sentences, new_corpus_file)
 
-            model_hs = FT_gensim(corpus_file=corpus_file, size=12, min_count=1, seed=42, hs=1, negative=0)
+            model_hs = FT_gensim(corpus_file=corpus_file, vector_size=12, min_count=1, seed=42, hs=1, negative=0)
             self.assertTrue(len(model_hs.wv.vocab), 12)
             self.assertTrue(model_hs.wv.vocab['graph'].count, 3)
             model_hs.build_vocab(corpus_file=new_corpus_file, update=True)  # update vocab
@@ -666,7 +663,7 @@ def test_online_learning_fromfile(self):
 
     def test_online_learning_after_save(self):
         tmpf = get_tmpfile('gensim_fasttext.tst')
-        model_neg = FT_gensim(sentences, size=12, min_count=0, seed=42, hs=0, negative=5)
+        model_neg = FT_gensim(sentences, vector_size=12, min_count=0, seed=42, hs=0, negative=5)
         model_neg.save(tmpf)
         model_neg = FT_gensim.load(tmpf)
         self.assertTrue(len(model_neg.wv.vocab), 12)
@@ -681,7 +678,7 @@ def test_online_learning_after_save_fromfile(self):
             utils.save_as_line_sentence(new_sentences, new_corpus_file)
 
             tmpf = get_tmpfile('gensim_fasttext.tst')
-            model_neg = FT_gensim(corpus_file=corpus_file, size=12, min_count=0, seed=42, hs=0, negative=5)
+            model_neg = FT_gensim(corpus_file=corpus_file, vector_size=12, min_count=0, seed=42, hs=0, negative=5)
             model_neg.save(tmpf)
             model_neg = FT_gensim.load(tmpf)
             self.assertTrue(len(model_neg.wv.vocab), 12)
@@ -717,18 +714,18 @@ def online_sanity(self, model):
 
     @unittest.skipIf(IS_WIN32, "avoid memory error with Appveyor x32")
     def test_sg_hs_online(self):
-        model = FT_gensim(sg=1, window=2, hs=1, negative=0, min_count=3, iter=1, seed=42, workers=1)
+        model = FT_gensim(sg=1, window=2, hs=1, negative=0, min_count=3, epochs=1, seed=42, workers=1)
         self.online_sanity(model)
 
     @unittest.skipIf(IS_WIN32, "avoid memory error with Appveyor x32")
     def test_sg_neg_online(self):
-        model = FT_gensim(sg=1, window=2, hs=0, negative=5, min_count=3, iter=1, seed=42, workers=1)
+        model = FT_gensim(sg=1, window=2, hs=0, negative=5, min_count=3, epochs=1, seed=42, workers=1)
         self.online_sanity(model)
 
     @unittest.skipIf(IS_WIN32, "avoid memory error with Appveyor x32")
     def test_cbow_hs_online(self):
         model = FT_gensim(
-            sg=0, cbow_mean=1, alpha=0.05, window=2, hs=1, negative=0, min_count=3, iter=1, seed=42, workers=1
+            sg=0, cbow_mean=1, alpha=0.05, window=2, hs=1, negative=0, min_count=3, epochs=1, seed=42, workers=1
         )
         self.online_sanity(model)
 
@@ -736,12 +733,12 @@ def test_cbow_hs_online(self):
     def test_cbow_neg_online(self):
         model = FT_gensim(
             sg=0, cbow_mean=1, alpha=0.05, window=2, hs=0, negative=5,
-            min_count=5, iter=1, seed=42, workers=1, sample=0
+            min_count=5, epochs=1, seed=42, workers=1, sample=0
         )
         self.online_sanity(model)
 
     def test_get_vocab_word_vecs(self):
-        model = FT_gensim(size=12, min_count=1, seed=42)
+        model = FT_gensim(vector_size=12, min_count=1, seed=42)
         model.build_vocab(sentences)
         original_syn0_vocab = np.copy(model.wv.vectors_vocab)
         model.wv.adjust_vectors()
@@ -750,21 +747,21 @@ def test_get_vocab_word_vecs(self):
     def test_persistence_word2vec_format(self):
         """Test storing/loading the model in word2vec format."""
         tmpf = get_tmpfile('gensim_fasttext_w2v_format.tst')
-        model = FT_gensim(sentences, min_count=1, size=12)
+        model = FT_gensim(sentences, min_count=1, vector_size=12)
         model.wv.save_word2vec_format(tmpf, binary=True)
         loaded_model_kv = KeyedVectors.load_word2vec_format(tmpf, binary=True)
         self.assertEqual(len(model.wv.vocab), len(loaded_model_kv.vocab))
         self.assertTrue(np.allclose(model.wv['human'], loaded_model_kv['human']))
 
     def test_bucket_ngrams(self):
-        model = FT_gensim(size=12, min_count=1, bucket=20)
+        model = FT_gensim(vector_size=12, min_count=1, bucket=20)
         model.build_vocab(sentences)
         self.assertEqual(model.wv.vectors_ngrams.shape, (20, 12))
         model.build_vocab(new_sentences, update=True)
         self.assertEqual(model.wv.vectors_ngrams.shape, (20, 12))
 
     def test_estimate_memory(self):
-        model = FT_gensim(sg=1, hs=1, size=12, negative=5, min_count=3)
+        model = FT_gensim(sg=1, hs=1, vector_size=12, negative=5, min_count=3)
         model.build_vocab(sentences)
         report = model.estimate_memory()
         self.assertEqual(report['vocab'], 2800)
@@ -775,7 +772,7 @@ def test_estimate_memory(self):
         self.assertEqual(report['buckets_word'], 640)
         self.assertEqual(report['total'], 6704)
 
-    def testLoadOldModel(self):
+    def obsolete_testLoadOldModel(self):
         """Test loading fasttext models from previous version"""
 
         model_file = 'fasttext_old'
@@ -784,9 +781,9 @@ def testLoadOldModel(self):
         self.assertTrue(len(model.wv.vocab) == 12)
         self.assertTrue(len(model.wv.index2word) == 12)
         self.assertIsNone(model.corpus_total_words)
-        self.assertTrue(model.trainables.syn1neg.shape == (len(model.wv.vocab), model.vector_size))
-        self.assertTrue(model.trainables.vectors_lockf.shape == (12, ))
-        self.assertTrue(model.vocabulary.cum_table.shape == (12, ))
+        self.assertTrue(model.syn1neg.shape == (len(model.wv.vocab), model.vector_size))
+        self.assertTrue(model.wv.vectors_lockf.shape == (12, ))
+        self.assertTrue(model.cum_table.shape == (12, ))
 
         self.assertEqual(model.wv.vectors_vocab.shape, (12, 100))
         self.assertEqual(model.wv.vectors_ngrams.shape, (2000000, 100))
@@ -798,9 +795,9 @@ def testLoadOldModel(self):
         self.assertTrue(len(model.wv.vocab) == 12)
         self.assertTrue(len(model.wv.index2word) == 12)
         self.assertIsNone(model.corpus_total_words)
-        self.assertTrue(model.trainables.syn1neg.shape == (len(model.wv.vocab), model.vector_size))
-        self.assertTrue(model.trainables.vectors_lockf.shape == (12, ))
-        self.assertTrue(model.vocabulary.cum_table.shape == (12, ))
+        self.assertTrue(model.syn1neg.shape == (len(model.wv.vocab), model.vector_size))
+        self.assertTrue(model.wv.vectors_lockf.shape == (12, ))
+        self.assertTrue(model.cum_table.shape == (12, ))
 
         self.assertEqual(model.wv.vectors_vocab.shape, (12, 100))
         self.assertEqual(model.wv.vectors_ngrams.shape, (2000000, 100))
@@ -869,7 +866,7 @@ def train_gensim(bucket=100, min_count=5):
     #
     # Set parameters to match those in the load_native function
     #
-    model = FT_gensim(bucket=bucket, size=5, alpha=0.05, workers=1, sample=0.0001, min_count=min_count)
+    model = FT_gensim(bucket=bucket, vector_size=5, alpha=0.05, workers=1, sample=0.0001, min_count=min_count)
     model.build_vocab(TOY_SENTENCES)
     model.train(TOY_SENTENCES, total_examples=len(TOY_SENTENCES), epochs=model.epochs)
     return model
@@ -1025,8 +1022,8 @@ def test_sanity(self):
         # self.assertEqual(trained.bucket, native.bucket)
 
         compare_wv(trained.wv, native.wv, self)
-        compare_vocabulary(trained.vocabulary, native.vocabulary, self)
-        compare_nn(trained.trainables, native.trainables, self)
+        compare_vocabulary(trained, native, self)
+        compare_nn(trained, native, self)
 
     def test_continuation_native(self):
         """Ensure that training has had a measurable effect."""
@@ -1149,7 +1146,7 @@ class HashCompatibilityTest(unittest.TestCase):
     def test_compatibility_true(self):
         m = FT_gensim.load(datapath('compatible-hash-true.model'))
         self.assertTrue(m.wv.compatible_hash)
-        self.assertEqual(m.trainables.bucket, m.wv.bucket)
+        self.assertEqual(m.bucket, m.wv.bucket)
 
     def test_compatibility_false(self):
         #
@@ -1157,12 +1154,12 @@ def test_compatibility_false(self):
         #
         m = FT_gensim.load(datapath('compatible-hash-false.model'))
         self.assertFalse(m.wv.compatible_hash)
-        self.assertEqual(m.trainables.bucket, m.wv.bucket)
+        self.assertEqual(m.bucket, m.wv.bucket)
 
     def test_hash_native(self):
         m = load_native()
         self.assertTrue(m.wv.compatible_hash)
-        self.assertEqual(m.trainables.bucket, m.wv.bucket)
+        self.assertEqual(m.bucket, m.wv.bucket)
 
 
 class FTHashResultsTest(unittest.TestCase):
diff --git a/gensim/test/test_fasttext_wrapper.py b/gensim/test/test_fasttext_wrapper.py
deleted file mode 100644
index 66dd7b47c5..0000000000
--- a/gensim/test/test_fasttext_wrapper.py
+++ /dev/null
@@ -1,382 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-#
-# Copyright (C) 2010 Radim Rehurek <radimrehurek@seznam.cz>
-# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl.html
-
-"""
-Automated tests for checking transformation algorithms (the models package).
-"""
-
-import logging
-import unittest
-import os
-
-import numpy
-
-from gensim.models.wrappers import fasttext
-from gensim.models import keyedvectors
-from gensim.test.utils import datapath, get_tmpfile
-
-
-try:
-    from pyemd import emd  # noqa:F401
-    PYEMD_EXT = True
-except (ImportError, ValueError):
-    PYEMD_EXT = False
-
-
-logger = logging.getLogger(__name__)
-
-
-class TestFastText(unittest.TestCase):
-    def setUp(self):
-        ft_home = os.environ.get('FT_HOME', None)
-        self.ft_path = os.path.join(ft_home, 'fasttext') if ft_home else None
-        self.corpus_file = datapath('lee_background.cor')
-        self.test_model_file = datapath('lee_fasttext')
-        self.test_new_model_file = datapath('lee_fasttext_new')
-        # Load pre-trained model to perform tests in case FastText binary isn't available in test environment
-        self.test_model = fasttext.FastText.load_fasttext_format(self.test_model_file)
-
-    def model_sanity(self, model):
-        """Even tiny models trained on any corpus should pass these sanity checks"""
-        self.assertEqual(model.wv.syn0.shape, (len(model.wv.vocab), model.vector_size))
-        self.assertEqual(model.wv.syn0_ngrams.shape, (model.num_ngram_vectors, model.vector_size))
-
-    def models_equal(self, model1, model2):
-        self.assertEqual(len(model1.wv.vocab), len(model2.wv.vocab))
-        self.assertEqual(set(model1.wv.vocab.keys()), set(model2.wv.vocab.keys()))
-        self.assertTrue(numpy.allclose(model1.wv.syn0, model2.wv.syn0))
-        self.assertTrue(numpy.allclose(model1.wv.syn0_ngrams, model2.wv.syn0_ngrams))
-
-    def testTraining(self):
-        """Test self.test_model successfully trained, parameters and weights correctly loaded"""
-        if self.ft_path is None:
-            logger.info("FT_HOME env variable not set, skipping test")
-            return  # Use self.skipTest once python < 2.7 is no longer supported
-        vocab_size, model_size = 1763, 10
-        tmpf = get_tmpfile('gensim_fasttext_wrapper.tst')
-        trained_model = fasttext.FastText.train(
-            self.ft_path, self.corpus_file, size=model_size, output_file=tmpf
-        )
-
-        self.assertEqual(trained_model.wv.syn0.shape, (vocab_size, model_size))
-        self.assertEqual(len(trained_model.wv.vocab), vocab_size)
-        self.assertEqual(trained_model.wv.syn0_ngrams.shape[1], model_size)
-        self.model_sanity(trained_model)
-
-        # Tests temporary training files deleted
-        self.assertFalse(os.path.exists('%s.bin' % tmpf))
-
-    def testMinCount(self):
-        """Tests words with frequency less than `min_count` absent from vocab"""
-        if self.ft_path is None:
-            logger.info("FT_HOME env variable not set, skipping test")
-            return  # Use self.skipTest once python < 2.7 is no longer supported
-        tmpf = get_tmpfile('gensim_fasttext_wrapper.tst')
-        test_model_min_count_5 = fasttext.FastText.train(
-            self.ft_path, self.corpus_file, output_file=tmpf, size=10, min_count=5
-        )
-        self.assertTrue('forests' not in test_model_min_count_5.wv.vocab)
-
-        test_model_min_count_1 = fasttext.FastText.train(
-            self.ft_path, self.corpus_file, output_file=tmpf, size=10, min_count=1
-        )
-        self.assertTrue('forests' in test_model_min_count_1.wv.vocab)
-
-    def testModelSize(self):
-        """Tests output vector dimensions are the same as the value for `size` param"""
-        if self.ft_path is None:
-            logger.info("FT_HOME env variable not set, skipping test")
-            return  # Use self.skipTest once python < 2.7 is no longer supported
-        tmpf = get_tmpfile('gensim_fasttext_wrapper.tst')
-        test_model_size_20 = fasttext.FastText.train(
-            self.ft_path, self.corpus_file, output_file=tmpf, size=20
-        )
-        self.assertEqual(test_model_size_20.vector_size, 20)
-        self.assertEqual(test_model_size_20.wv.syn0.shape[1], 20)
-        self.assertEqual(test_model_size_20.wv.syn0_ngrams.shape[1], 20)
-
-    def testPersistence(self):
-        """Test storing/loading the entire model."""
-        tmpf = get_tmpfile('gensim_fasttext_wrapper.tst')
-        self.test_model.save(tmpf)
-        loaded = fasttext.FastText.load(tmpf)
-        self.models_equal(self.test_model, loaded)
-
-        self.test_model.save(tmpf, sep_limit=0)
-        self.models_equal(self.test_model, fasttext.FastText.load(tmpf))
-
-    def testNormalizedVectorsNotSaved(self):
-        """Test syn0norm/syn0_ngrams_norm aren't saved in model file"""
-        tmpf = get_tmpfile('gensim_fasttext_wrapper.tst')
-        self.test_model.init_sims()
-        self.test_model.save(tmpf)
-        loaded = fasttext.FastText.load(tmpf)
-        self.assertTrue(loaded.wv.syn0norm is None)
-        self.assertTrue(loaded.wv.syn0_ngrams_norm is None)
-
-        wv = self.test_model.wv
-        wv.save(tmpf)
-        loaded_kv = keyedvectors.KeyedVectors.load(tmpf)
-        self.assertTrue(loaded_kv.syn0norm is None)
-        self.assertTrue(loaded_kv.syn0_ngrams_norm is None)
-
-    def testLoadFastTextFormat(self):
-        """Test model successfully loaded from fastText .bin file"""
-        try:
-            model = fasttext.FastText.load_fasttext_format(self.test_model_file)
-        except Exception as exc:
-            self.fail('Unable to load FastText model from file %s: %s' % (self.test_model_file, exc))
-        vocab_size, model_size = 1762, 10
-        self.assertEqual(model.wv.syn0.shape, (vocab_size, model_size))
-        self.assertEqual(len(model.wv.vocab), vocab_size, model_size)
-        self.assertEqual(model.wv.syn0_ngrams.shape, (model.num_ngram_vectors, model_size))
-
-        expected_vec = [
-            -0.57144,
-            -0.0085561,
-            0.15748,
-            -0.67855,
-            -0.25459,
-            -0.58077,
-            -0.09913,
-            1.1447,
-            0.23418,
-            0.060007
-        ]  # obtained using ./fasttext print-word-vectors lee_fasttext_new.bin
-        self.assertTrue(numpy.allclose(model["hundred"], expected_vec, atol=1e-4))
-
-        # vector for oov words are slightly different from original FastText due to discarding unused ngrams
-        # obtained using a modified version of ./fasttext print-word-vectors lee_fasttext_new.bin
-        expected_vec_oov = [
-            -0.23825,
-            -0.58482,
-            -0.22276,
-            -0.41215,
-            0.91015,
-            -1.6786,
-            -0.26724,
-            0.58818,
-            0.57828,
-            0.75801
-        ]
-        self.assertTrue(numpy.allclose(model["rejection"], expected_vec_oov, atol=1e-4))
-
-        self.assertEqual(model.min_count, 5)
-        self.assertEqual(model.window, 5)
-        self.assertEqual(model.iter, 5)
-        self.assertEqual(model.negative, 5)
-        self.assertEqual(model.sample, 0.0001)
-        self.assertEqual(model.bucket, 1000)
-        self.assertEqual(model.wv.max_n, 6)
-        self.assertEqual(model.wv.min_n, 3)
-        self.model_sanity(model)
-
-    def testLoadFastTextNewFormat(self):
-        """ Test model successfully loaded from fastText (new format) .bin file """
-        try:
-            new_model = fasttext.FastText.load_fasttext_format(self.test_new_model_file)
-        except Exception as exc:
-            self.fail('Unable to load FastText model from file %s: %s' % (self.test_new_model_file, exc))
-        vocab_size, model_size = 1763, 10
-        self.assertEqual(new_model.wv.syn0.shape, (vocab_size, model_size))
-        self.assertEqual(len(new_model.wv.vocab), vocab_size, model_size)
-        self.assertEqual(new_model.wv.syn0_ngrams.shape, (new_model.num_ngram_vectors, model_size))
-
-        expected_vec = [
-            -0.025627,
-            -0.11448,
-            0.18116,
-            -0.96779,
-            0.2532,
-            -0.93224,
-            0.3929,
-            0.12679,
-            -0.19685,
-            -0.13179
-        ]  # obtained using ./fasttext print-word-vectors lee_fasttext_new.bin
-        self.assertTrue(numpy.allclose(new_model["hundred"], expected_vec, atol=1e-4))
-
-        # vector for oov words are slightly different from original FastText due to discarding unused ngrams
-        # obtained using a modified version of ./fasttext print-word-vectors lee_fasttext_new.bin
-        expected_vec_oov = [
-            -0.53378,
-            -0.19,
-            0.013482,
-            -0.86767,
-            -0.21684,
-            -0.89928,
-            0.45124,
-            0.18025,
-            -0.14128,
-            0.22508
-        ]
-        self.assertTrue(numpy.allclose(new_model["rejection"], expected_vec_oov, atol=1e-4))
-
-        self.assertEqual(new_model.min_count, 5)
-        self.assertEqual(new_model.window, 5)
-        self.assertEqual(new_model.iter, 5)
-        self.assertEqual(new_model.negative, 5)
-        self.assertEqual(new_model.sample, 0.0001)
-        self.assertEqual(new_model.bucket, 1000)
-        self.assertEqual(new_model.wv.max_n, 6)
-        self.assertEqual(new_model.wv.min_n, 3)
-        self.model_sanity(new_model)
-
-    def testLoadFileName(self):
-        """ Test model accepts input as both `/path/to/model` or `/path/to/model.bin` """
-        self.assertTrue(fasttext.FastText.load_fasttext_format(datapath('lee_fasttext_new')))
-        self.assertTrue(fasttext.FastText.load_fasttext_format(datapath('lee_fasttext_new.bin')))
-
-    def testLoadModelSupervised(self):
-        """Test loading model with supervised learning labels"""
-        with self.assertRaises(NotImplementedError):
-            fasttext.FastText.load_fasttext_format(datapath('pang_lee_polarity_fasttext'))
-
-    def testLoadModelWithNonAsciiVocab(self):
-        """Test loading model with non-ascii words in vocab"""
-        model = fasttext.FastText.load_fasttext_format(datapath('non_ascii_fasttext'))
-        self.assertTrue(u'který' in model)
-        try:
-            vector = model[u'který']  # noqa:F841
-        except UnicodeDecodeError:
-            self.fail('Unable to access vector for utf8 encoded non-ascii word')
-
-    def testLoadModelNonUtf8Encoding(self):
-        """Test loading model with words in user-specified encoding"""
-        model = fasttext.FastText.load_fasttext_format(datapath('cp852_fasttext'), encoding='cp852')
-        self.assertTrue(u'který' in model)
-        try:
-            vector = model[u'který']  # noqa:F841
-        except KeyError:
-            self.fail('Unable to access vector for cp-852 word')
-
-    def testNSimilarity(self):
-        """Test n_similarity for in-vocab and out-of-vocab words"""
-        # In vocab, sanity check
-        self.assertTrue(numpy.allclose(self.test_model.n_similarity(['the', 'and'], ['and', 'the']), 1.0))
-        self.assertEqual(self.test_model.n_similarity(['the'], ['and']), self.test_model.n_similarity(['and'], ['the']))
-        # Out of vocab check
-        self.assertTrue(numpy.allclose(self.test_model.n_similarity(['night', 'nights'], ['nights', 'night']), 1.0))
-        self.assertEqual(
-            self.test_model.n_similarity(['night'], ['nights']),
-            self.test_model.n_similarity(['nights'], ['night'])
-        )
-
-    def testSimilarity(self):
-        """Test similarity for in-vocab and out-of-vocab words"""
-        # In vocab, sanity check
-        self.assertTrue(numpy.allclose(self.test_model.similarity('the', 'the'), 1.0))
-        self.assertEqual(self.test_model.similarity('the', 'and'), self.test_model.similarity('and', 'the'))
-        # Out of vocab check
-        self.assertTrue(numpy.allclose(self.test_model.similarity('nights', 'nights'), 1.0))
-        self.assertEqual(self.test_model.similarity('night', 'nights'), self.test_model.similarity('nights', 'night'))
-
-    def testMostSimilar(self):
-        """Test most_similar for in-vocab and out-of-vocab words"""
-        # In vocab, sanity check
-        self.assertEqual(len(self.test_model.most_similar(positive=['the', 'and'], topn=5)), 5)
-        self.assertEqual(self.test_model.most_similar('the'), self.test_model.most_similar(positive=['the']))
-        # Out of vocab check
-        self.assertEqual(len(self.test_model.most_similar(['night', 'nights'], topn=5)), 5)
-        self.assertEqual(self.test_model.most_similar('nights'), self.test_model.most_similar(positive=['nights']))
-
-    def testMostSimilarCosmul(self):
-        """Test most_similar_cosmul for in-vocab and out-of-vocab words"""
-        # In vocab, sanity check
-        self.assertEqual(len(self.test_model.most_similar_cosmul(positive=['the', 'and'], topn=5)), 5)
-        self.assertEqual(
-            self.test_model.most_similar_cosmul('the'),
-            self.test_model.most_similar_cosmul(positive=['the']))
-        # Out of vocab check
-        self.assertEqual(len(self.test_model.most_similar_cosmul(['night', 'nights'], topn=5)), 5)
-        self.assertEqual(
-            self.test_model.most_similar_cosmul('nights'),
-            self.test_model.most_similar_cosmul(positive=['nights']))
-
-    def testLookup(self):
-        """Tests word vector lookup for in-vocab and out-of-vocab words"""
-        # In vocab, sanity check
-        self.assertTrue('night' in self.test_model.wv.vocab)
-        self.assertTrue(numpy.allclose(self.test_model['night'], self.test_model[['night']]))
-        # Out of vocab check
-        self.assertFalse('nights' in self.test_model.wv.vocab)
-        self.assertTrue(numpy.allclose(self.test_model['nights'], self.test_model[['nights']]))
-        # Word with no ngrams in model
-        self.assertRaises(KeyError, lambda: self.test_model['a!@'])
-
-    def testContains(self):
-        """Tests __contains__ for in-vocab and out-of-vocab words"""
-        # In vocab, sanity check
-        self.assertTrue('night' in self.test_model.wv.vocab)
-        self.assertTrue('night' in self.test_model)
-        # Out of vocab check
-        self.assertFalse('nights' in self.test_model.wv.vocab)
-        self.assertTrue('nights' in self.test_model)
-        # Word with no ngrams in model
-        self.assertFalse('a!@' in self.test_model.wv.vocab)
-        self.assertFalse('a!@' in self.test_model)
-
-    @unittest.skipIf(PYEMD_EXT is False, "pyemd not installed or have some issues")
-    def testWmdistance(self):
-        """Tests wmdistance for docs with in-vocab and out-of-vocab words"""
-        doc = ['night', 'payment']
-        oov_doc = ['nights', 'forests', 'payments']
-        ngrams_absent_doc = ['a!@', 'b#$']
-
-        dist = self.test_model.wmdistance(doc, oov_doc)
-        self.assertNotEqual(float('inf'), dist)
-        dist = self.test_model.wmdistance(doc, ngrams_absent_doc)
-        self.assertEqual(float('inf'), dist)
-
-    def testDoesntMatch(self):
-        """Tests doesnt_match for list of out-of-vocab words"""
-        oov_words = ['nights', 'forests', 'payments']
-        # Out of vocab check
-        for word in oov_words:
-            self.assertFalse(word in self.test_model.wv.vocab)
-        try:
-            self.test_model.doesnt_match(oov_words)
-        except Exception:
-            self.fail('model.doesnt_match raises exception for oov words')
-
-    def testHash(self):
-        # Tests FastText.ft_hash method return values to those obtained from original C implementation
-        ft_hash = fasttext.ft_hash('test')
-        self.assertEqual(ft_hash, 2949673445)
-        ft_hash = fasttext.ft_hash('word')
-        self.assertEqual(ft_hash, 1788406269)
-
-    def testConsistentDtype(self):
-        """Test that the same dtype is returned for OOV words as for words in the vocabulary"""
-        vocab_word = 'night'
-        oov_word = 'wordnotpresentinvocabulary'
-        self.assertIn(vocab_word, self.test_model.wv.vocab)
-        self.assertNotIn(oov_word, self.test_model.wv.vocab)
-
-        vocab_embedding = self.test_model[vocab_word]
-        oov_embedding = self.test_model[oov_word]
-        self.assertEqual(vocab_embedding.dtype, oov_embedding.dtype)
-
-    def testPersistenceForOldVersions(self):
-        """Test backward compatibility for models saved with versions < 3.0.0"""
-        old_model_path = datapath('ft_model_2.3.0')
-        loaded_model = fasttext.FastText.load(old_model_path)
-        self.assertEqual(loaded_model.vector_size, 10)
-        self.assertEqual(loaded_model.wv.syn0.shape[1], 10)
-        self.assertEqual(loaded_model.wv.syn0_ngrams.shape[1], 10)
-        # in-vocab word
-        in_expected_vec = numpy.array([-2.44566941, -1.54802394, -2.61103821, -1.88549316, 1.02860415,
-            1.19031894, 2.01627707, 1.98942184, -1.39095843, -0.65036952])
-        self.assertTrue(numpy.allclose(loaded_model["the"], in_expected_vec, atol=1e-4))
-        # out-of-vocab word
-        out_expected_vec = numpy.array([-1.34948218, -0.8686831, -1.51483142, -1.0164026, 0.56272298,
-            0.66228276, 1.06477463, 1.1355902, -0.80972326, -0.39845538])
-        self.assertTrue(numpy.allclose(loaded_model["random_word"], out_expected_vec, atol=1e-4))
-
-
-if __name__ == '__main__':
-    logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG)
-    unittest.main()
diff --git a/gensim/test/test_keras_integration.py b/gensim/test/test_keras_integration.py
index bad0bb8b95..3eb2841f58 100644
--- a/gensim/test/test_keras_integration.py
+++ b/gensim/test/test_keras_integration.py
@@ -25,7 +25,7 @@
 
 class TestKerasWord2VecWrapper(unittest.TestCase):
     def setUp(self):
-        self.model_cos_sim = word2vec.Word2Vec(common_texts, size=100, min_count=1, hs=1)
+        self.model_cos_sim = word2vec.Word2Vec(common_texts, vector_size=100, min_count=1, hs=1)
         self.model_twenty_ng = word2vec.Word2Vec(min_count=1)
 
     def testWord2VecTraining(self):
@@ -34,7 +34,7 @@ def testWord2VecTraining(self):
         """
         model = self.model_cos_sim
         self.assertTrue(model.wv.vectors.shape == (len(model.wv.vocab), 100))
-        self.assertTrue(model.trainables.syn1.shape == (len(model.wv.vocab), 100))
+        self.assertTrue(model.syn1.shape == (len(model.wv.vocab), 100))
         sims = model.wv.most_similar('graph', topn=10)
         # self.assertTrue(sims[0][0] == 'trees', sims)  # most similar
 
diff --git a/gensim/test/test_keyedvectors.py b/gensim/test/test_keyedvectors.py
index 0b46afec5e..ba759fde5f 100644
--- a/gensim/test/test_keyedvectors.py
+++ b/gensim/test/test_keyedvectors.py
@@ -142,11 +142,11 @@ def test_similarity(self):
         self.assertTrue(np.allclose(self.vectors.similarity('war', 'war'), 1))
         self.assertTrue(np.allclose(self.vectors.similarity('war', 'conflict'), 0.93305397))
 
-    def test_words_closer_than(self):
+    def test_closer_than(self):
         """Test words_closer_than returns expected value for distinct and identical nodes."""
-        self.assertEqual(self.vectors.words_closer_than('war', 'war'), [])
+        self.assertEqual(self.vectors.closer_than('war', 'war'), [])
         expected = set(['conflict', 'administration'])
-        self.assertEqual(set(self.vectors.words_closer_than('war', 'terrorism')), expected)
+        self.assertEqual(set(self.vectors.closer_than('war', 'terrorism')), expected)
 
     def test_rank(self):
         """Test rank returns expected value for distinct and identical nodes."""
diff --git a/gensim/test/test_poincare.py b/gensim/test/test_poincare.py
index c4fe8af433..f0520d0a7f 100644
--- a/gensim/test/test_poincare.py
+++ b/gensim/test/test_poincare.py
@@ -383,11 +383,11 @@ def test_difference_in_hierarchy(self):
         self.assertTrue(np.allclose(self.vectors.difference_in_hierarchy('mammal.n.01', 'dog.n.01'), 0.9384287))
         self.assertTrue(np.allclose(self.vectors.difference_in_hierarchy('dog.n.01', 'mammal.n.01'), -0.9384287))
 
-    def test_words_closer_than(self):
-        """Test words_closer_than returns expected value for distinct and identical nodes."""
-        self.assertEqual(self.vectors.words_closer_than('dog.n.01', 'dog.n.01'), [])
+    def test_closer_than(self):
+        """Test closer_than returns expected value for distinct and identical nodes."""
+        self.assertEqual(self.vectors.closer_than('dog.n.01', 'dog.n.01'), [])
         expected = set(['canine.n.02', 'hunting_dog.n.01'])
-        self.assertEqual(set(self.vectors.words_closer_than('dog.n.01', 'carnivore.n.01')), expected)
+        self.assertEqual(set(self.vectors.closer_than('dog.n.01', 'carnivore.n.01')), expected)
 
     def test_rank(self):
         """Test rank returns expected value for distinct and identical nodes."""
diff --git a/gensim/test/test_sklearn_api.py b/gensim/test/test_sklearn_api.py
index e325910b48..a8d9e3e6eb 100644
--- a/gensim/test/test_sklearn_api.py
+++ b/gensim/test/test_sklearn_api.py
@@ -655,7 +655,7 @@ def testModelNotFitted(self):
 class TestWord2VecWrapper(unittest.TestCase):
     def setUp(self):
         numpy.random.seed(0)
-        self.model = W2VTransformer(size=10, min_count=0, seed=42)
+        self.model = W2VTransformer(vector_size=10, min_count=0, seed=42)
         self.model.fit(texts)
 
     def testTransform(self):
@@ -664,21 +664,21 @@ def testTransform(self):
         words = words + texts[0]
         matrix = self.model.transform(words)
         self.assertEqual(matrix.shape[0], 3)
-        self.assertEqual(matrix.shape[1], self.model.size)
+        self.assertEqual(matrix.shape[1], self.model.vector_size)
 
         # tranform one word
         word = texts[0][0]
         matrix = self.model.transform(word)
         self.assertEqual(matrix.shape[0], 1)
-        self.assertEqual(matrix.shape[1], self.model.size)
+        self.assertEqual(matrix.shape[1], self.model.vector_size)
 
     def testConsistencyWithGensimModel(self):
         # training a W2VTransformer
-        self.model = W2VTransformer(size=10, min_count=0, seed=42)
+        self.model = W2VTransformer(vector_size=10, min_count=0, seed=42)
         self.model.fit(texts)
 
         # training a Gensim Word2Vec model with the same params
-        gensim_w2vmodel = models.Word2Vec(texts, size=10, min_count=0, seed=42)
+        gensim_w2vmodel = models.Word2Vec(texts, vector_size=10, min_count=0, seed=42)
 
         word = texts[0][0]
         vec_transformer_api = self.model.transform(word)  # vector returned by W2VTransformer
@@ -688,7 +688,7 @@ def testConsistencyWithGensimModel(self):
 
     def testPipeline(self):
         numpy.random.seed(0)  # set fixed seed to get similar values everytime
-        model = W2VTransformer(size=10, min_count=1)
+        model = W2VTransformer(vector_size=10, min_count=1)
         model.fit(w2v_texts)
 
         class_dict = {'mathematics': 1, 'physics': 0}
@@ -725,7 +725,7 @@ def testPersistence(self):
 
         # sanity check for transformation operation
         self.assertEqual(loaded_transformed_vecs.shape[0], 1)
-        self.assertEqual(loaded_transformed_vecs.shape[1], model_load.size)
+        self.assertEqual(loaded_transformed_vecs.shape[1], model_load.vector_size)
 
         # comparing the original and loaded models
         original_transformed_vecs = self.model.transform(word)
@@ -733,7 +733,7 @@ def testPersistence(self):
         self.assertTrue(passed)
 
     def testModelNotFitted(self):
-        w2vmodel_wrapper = W2VTransformer(size=10, min_count=0, seed=42)
+        w2vmodel_wrapper = W2VTransformer(vector_size=10, min_count=0, seed=42)
         word = texts[0][0]
         self.assertRaises(NotFittedError, w2vmodel_wrapper.transform, word)
 
@@ -832,13 +832,13 @@ def testTransform(self):
         docs = [w2v_texts[0], w2v_texts[1], w2v_texts[2]]
         matrix = self.model.transform(docs)
         self.assertEqual(matrix.shape[0], 3)
-        self.assertEqual(matrix.shape[1], self.model.size)
+        self.assertEqual(matrix.shape[1], self.model.vector_size)
 
         # tranform one document
         doc = w2v_texts[0]
         matrix = self.model.transform(doc)
         self.assertEqual(matrix.shape[0], 1)
-        self.assertEqual(matrix.shape[1], self.model.size)
+        self.assertEqual(matrix.shape[1], self.model.vector_size)
 
     def testFitTransform(self):
         model = D2VTransformer(min_count=1)
@@ -847,13 +847,13 @@ def testFitTransform(self):
         docs = [w2v_texts[0], w2v_texts[1], w2v_texts[2]]
         matrix = model.fit_transform(docs)
         self.assertEqual(matrix.shape[0], 3)
-        self.assertEqual(matrix.shape[1], model.size)
+        self.assertEqual(matrix.shape[1], model.vector_size)
 
         # fit and transform one document
         doc = w2v_texts[0]
         matrix = model.fit_transform(doc)
         self.assertEqual(matrix.shape[0], 1)
-        self.assertEqual(matrix.shape[1], model.size)
+        self.assertEqual(matrix.shape[1], model.vector_size)
 
     def testSetGetParams(self):
         # updating only one param
@@ -893,7 +893,7 @@ def testPersistence(self):
 
         # sanity check for transformation operation
         self.assertEqual(loaded_transformed_vecs.shape[0], 1)
-        self.assertEqual(loaded_transformed_vecs.shape[1], model_load.size)
+        self.assertEqual(loaded_transformed_vecs.shape[1], model_load.vector_size)
 
         # comparing the original and loaded models
         original_transformed_vecs = self.model.transform(doc)
@@ -1297,9 +1297,9 @@ def testModelNotFitted(self):
         self.assertRaises(NotFittedError, phrases_transformer.transform, phrases_sentences[0])
 
 
-class TestFastTextWrapper(unittest.TestCase):
+class TestFTTransformer(unittest.TestCase):
     def setUp(self):
-        self.model = FTTransformer(size=10, min_count=0, seed=42)
+        self.model = FTTransformer(vector_size=10, min_count=0, seed=42)
         self.model.fit(texts)
 
     def testTransform(self):
@@ -1308,30 +1308,30 @@ def testTransform(self):
         words = words + texts[0]
         matrix = self.model.transform(words)
         self.assertEqual(matrix.shape[0], 3)
-        self.assertEqual(matrix.shape[1], self.model.size)
+        self.assertEqual(matrix.shape[1], self.model.vector_size)
 
         # tranform one word
         word = texts[0][0]
         matrix = self.model.transform(word)
         self.assertEqual(matrix.shape[0], 1)
-        self.assertEqual(matrix.shape[1], self.model.size)
+        self.assertEqual(matrix.shape[1], self.model.vector_size)
 
         # verify oov-word vector retrieval
         invocab_vec = self.model.transform("computer")  # invocab word
         self.assertEqual(invocab_vec.shape[0], 1)
-        self.assertEqual(invocab_vec.shape[1], self.model.size)
+        self.assertEqual(invocab_vec.shape[1], self.model.vector_size)
 
         oov_vec = self.model.transform('compute')  # oov word
         self.assertEqual(oov_vec.shape[0], 1)
-        self.assertEqual(oov_vec.shape[1], self.model.size)
+        self.assertEqual(oov_vec.shape[1], self.model.vector_size)
 
     def testConsistencyWithGensimModel(self):
         # training a FTTransformer
-        self.model = FTTransformer(size=10, min_count=0, seed=42, workers=1)
+        self.model = FTTransformer(vector_size=10, min_count=0, seed=42, workers=1)
         self.model.fit(texts)
 
         # training a Gensim FastText model with the same params
-        gensim_ftmodel = models.FastText(texts, size=10, min_count=0, seed=42,
+        gensim_ftmodel = models.FastText(texts, vector_size=10, min_count=0, seed=42,
                                          workers=1)
 
         # vectors returned by FTTransformer
@@ -1350,7 +1350,7 @@ def testConsistencyWithGensimModel(self):
         self.assertTrue(passed)
 
     def testPipeline(self):
-        model = FTTransformer(size=10, min_count=1)
+        model = FTTransformer(vector_size=10, min_count=1)
         model.fit(w2v_texts)
 
         class_dict = {'mathematics': 1, 'physics': 0}
@@ -1388,7 +1388,7 @@ def testPersistence(self):
 
         # sanity check for transformation operation
         self.assertEqual(loaded_transformed_vecs.shape[0], len(words))
-        self.assertEqual(loaded_transformed_vecs.shape[1], model_load.size)
+        self.assertEqual(loaded_transformed_vecs.shape[1], model_load.vector_size)
 
         # comparing the original and loaded models
         original_transformed_vecs = self.model.transform(words)
@@ -1396,7 +1396,7 @@ def testPersistence(self):
         self.assertTrue(passed)
 
     def testModelNotFitted(self):
-        ftmodel_wrapper = FTTransformer(size=10, min_count=0, seed=42)
+        ftmodel_wrapper = FTTransformer(vector_size=10, min_count=0, seed=42)
         word = texts[0][0]
         self.assertRaises(NotFittedError, ftmodel_wrapper.transform, word)
 
diff --git a/gensim/test/test_translation_matrix.py b/gensim/test/test_translation_matrix.py
index f6798ac9cc..2841845e6c 100644
--- a/gensim/test/test_translation_matrix.py
+++ b/gensim/test/test_translation_matrix.py
@@ -2,7 +2,6 @@
 # encoding: utf-8
 from collections import namedtuple
 import unittest
-import math
 import logging
 
 import numpy as np
@@ -92,31 +91,33 @@ def setUp(self):
         filename = datapath("alldata-id-10.txt")
         train_docs = read_sentiment_docs(filename)
         self.train_docs = train_docs
-        self.source_doc_vec_file = datapath("small_tag_doc_5_iter50")
-        self.target_doc_vec_file = datapath("large_tag_doc_10_iter50")
-
-        self.source_doc_vec = Doc2Vec.load(self.source_doc_vec_file)
-        self.target_doc_vec = Doc2Vec.load(self.target_doc_vec_file)
+        self.source_doc_vec = Doc2Vec(documents=train_docs[:5], vector_size=8, epochs=50, seed=1)
+        self.target_doc_vec = Doc2Vec(documents=train_docs, vector_size=8, epochs=50, seed=2)
 
     def test_translation_matrix(self):
         model = translation_matrix.BackMappingTranslationMatrix(
             self.source_doc_vec, self.target_doc_vec, self.train_docs[:5]
         )
         transmat = model.train(self.train_docs[:5])
-        self.assertEqual(transmat.shape, (100, 100))
+        self.assertEqual(transmat.shape, (8, 8))
 
     def test_infer_vector(self):
+        """Test that translation gives similar results to traditional inference.
+
+        This may not be completely sensible/salient with such tiny data, but
+        replaces a nonsensical test.
+        """
         model = translation_matrix.BackMappingTranslationMatrix(
             self.source_doc_vec, self.target_doc_vec, self.train_docs[:5]
         )
         model.train(self.train_docs[:5])
-        infered_vec = model.infer_vector(self.target_doc_vec.docvecs[self.train_docs[5].tags])
-        self.assertEqual(infered_vec.shape, (100, ))
+        backmapped_vec = model.infer_vector(self.target_doc_vec.docvecs[self.train_docs[5].tags])
+        self.assertEqual(backmapped_vec.shape, (8, ))
+
+        d2v_inferred_vector = self.source_doc_vec.infer_vector(self.train_docs[5].words)
 
-        expected = 0.6453547135
-        eps = 1e-6
-        caculated = cosine(self.target_doc_vec.docvecs[self.train_docs[5].tags], infered_vec)
-        self.assertLessEqual(math.fabs(caculated - expected), eps)
+        distance = cosine(backmapped_vec, d2v_inferred_vector)
+        self.assertLessEqual(distance, 0.1)
 
 
 if __name__ == '__main__':
diff --git a/gensim/test/test_word2vec.py b/gensim/test/test_word2vec.py
index b610047a84..9e0c83c946 100644
--- a/gensim/test/test_word2vec.py
+++ b/gensim/test/test_word2vec.py
@@ -75,8 +75,8 @@ def testBuildVocabFromFreq(self):
         'survey': 2, 'user': 3, 'human': 2,
         'time': 2, 'interface': 2, 'response': 2
         }
-        model_hs = word2vec.Word2Vec(size=10, min_count=0, seed=42, hs=1, negative=0)
-        model_neg = word2vec.Word2Vec(size=10, min_count=0, seed=42, hs=0, negative=5)
+        model_hs = word2vec.Word2Vec(vector_size=10, min_count=0, seed=42, hs=1, negative=0)
+        model_neg = word2vec.Word2Vec(vector_size=10, min_count=0, seed=42, hs=0, negative=5)
         model_hs.build_vocab_from_freq(freq_dict)
         model_neg.build_vocab_from_freq(freq_dict)
         self.assertEqual(len(model_hs.wv.vocab), 12)
@@ -123,7 +123,7 @@ def testPruneVocab(self):
             ["system", "eps"],
             ["graph", "system"]
         ]
-        model = word2vec.Word2Vec(sentences, size=10, min_count=0, max_vocab_size=2, seed=42, hs=1, negative=0)
+        model = word2vec.Word2Vec(sentences, vector_size=10, min_count=0, max_vocab_size=2, seed=42, hs=1, negative=0)
         self.assertEqual(len(model.wv.vocab), 2)
         self.assertEqual(model.wv.vocab['graph'].count, 3)
         self.assertEqual(model.wv.vocab['system'].count, 4)
@@ -135,43 +135,43 @@ def testPruneVocab(self):
             ["graph", "system"],
             ["minors", "survey", "minors", "survey", "minors"]
         ]
-        model = word2vec.Word2Vec(sentences, size=10, min_count=0, max_vocab_size=2, seed=42, hs=1, negative=0)
+        model = word2vec.Word2Vec(sentences, vector_size=10, min_count=0, max_vocab_size=2, seed=42, hs=1, negative=0)
         self.assertEqual(len(model.wv.vocab), 3)
         self.assertEqual(model.wv.vocab['graph'].count, 3)
         self.assertEqual(model.wv.vocab['minors'].count, 3)
         self.assertEqual(model.wv.vocab['system'].count, 4)
 
     def testTotalWordCount(self):
-        model = word2vec.Word2Vec(size=10, min_count=0, seed=42)
-        total_words = model.vocabulary.scan_vocab(sentences)[0]
+        model = word2vec.Word2Vec(vector_size=10, min_count=0, seed=42)
+        total_words = model.scan_vocab(sentences)[0]
         self.assertEqual(total_words, 29)
 
     def testMaxFinalVocab(self):
         # Test for less restricting effect of max_final_vocab
         # max_final_vocab is specified but has no effect
-        model = word2vec.Word2Vec(size=10, max_final_vocab=4, min_count=4, sample=0)
-        model.vocabulary.scan_vocab(sentences)
-        reported_values = model.vocabulary.prepare_vocab(wv=model.wv, hs=0, negative=0)
+        model = word2vec.Word2Vec(vector_size=10, max_final_vocab=4, min_count=4, sample=0)
+        model.scan_vocab(sentences)
+        reported_values = model.prepare_vocab()
         self.assertEqual(reported_values['drop_unique'], 11)
         self.assertEqual(reported_values['retain_total'], 4)
         self.assertEqual(reported_values['num_retained_words'], 1)
-        self.assertEqual(model.vocabulary.effective_min_count, 4)
+        self.assertEqual(model.effective_min_count, 4)
 
         # Test for more restricting effect of max_final_vocab
         # results in setting a min_count more restricting than specified min_count
-        model = word2vec.Word2Vec(size=10, max_final_vocab=4, min_count=2, sample=0)
-        model.vocabulary.scan_vocab(sentences)
-        reported_values = model.vocabulary.prepare_vocab(wv=model.wv, hs=0, negative=0)
+        model = word2vec.Word2Vec(vector_size=10, max_final_vocab=4, min_count=2, sample=0)
+        model.scan_vocab(sentences)
+        reported_values = model.prepare_vocab()
         self.assertEqual(reported_values['drop_unique'], 8)
         self.assertEqual(reported_values['retain_total'], 13)
         self.assertEqual(reported_values['num_retained_words'], 4)
-        self.assertEqual(model.vocabulary.effective_min_count, 3)
+        self.assertEqual(model.effective_min_count, 3)
 
     def testOnlineLearning(self):
         """Test that the algorithm is able to add new words to the
         vocabulary and to a trained model when using a sorted vocabulary"""
-        model_hs = word2vec.Word2Vec(sentences, size=10, min_count=0, seed=42, hs=1, negative=0)
-        model_neg = word2vec.Word2Vec(sentences, size=10, min_count=0, seed=42, hs=0, negative=5)
+        model_hs = word2vec.Word2Vec(sentences, vector_size=10, min_count=0, seed=42, hs=1, negative=0)
+        model_neg = word2vec.Word2Vec(sentences, vector_size=10, min_count=0, seed=42, hs=0, negative=5)
         self.assertTrue(len(model_hs.wv.vocab), 12)
         self.assertTrue(model_hs.wv.vocab['graph'].count, 3)
         model_hs.build_vocab(new_sentences, update=True)
@@ -185,7 +185,7 @@ def testOnlineLearningAfterSave(self):
         """Test that the algorithm is able to add new words to the
         vocabulary and to a trained model when using a sorted vocabulary"""
         tmpf = get_tmpfile('gensim_word2vec.tst')
-        model_neg = word2vec.Word2Vec(sentences, size=10, min_count=0, seed=42, hs=0, negative=5)
+        model_neg = word2vec.Word2Vec(sentences, vector_size=10, min_count=0, seed=42, hs=0, negative=5)
         model_neg.save(tmpf)
         model_neg = word2vec.Word2Vec.load(tmpf)
         self.assertTrue(len(model_neg.wv.vocab), 12)
@@ -202,8 +202,10 @@ def testOnlineLearningFromFile(self):
             utils.save_as_line_sentence(sentences, corpus_file)
             utils.save_as_line_sentence(new_sentences, new_corpus_file)
 
-            model_hs = word2vec.Word2Vec(corpus_file=corpus_file, size=10, min_count=0, seed=42, hs=1, negative=0)
-            model_neg = word2vec.Word2Vec(corpus_file=corpus_file, size=10, min_count=0, seed=42, hs=0, negative=5)
+            model_hs = word2vec.Word2Vec(corpus_file=corpus_file, vector_size=10, min_count=0, seed=42,
+                                         hs=1, negative=0)
+            model_neg = word2vec.Word2Vec(corpus_file=corpus_file, vector_size=10, min_count=0, seed=42,
+                                          hs=0, negative=5)
             self.assertTrue(len(model_hs.wv.vocab), 12)
             self.assertTrue(model_hs.wv.vocab['graph'].count, 3)
             model_hs.build_vocab(corpus_file=new_corpus_file, update=True)
@@ -227,7 +229,8 @@ def testOnlineLearningAfterSaveFromFile(self):
             utils.save_as_line_sentence(new_sentences, new_corpus_file)
 
             tmpf = get_tmpfile('gensim_word2vec.tst')
-            model_neg = word2vec.Word2Vec(corpus_file=corpus_file, size=10, min_count=0, seed=42, hs=0, negative=5)
+            model_neg = word2vec.Word2Vec(corpus_file=corpus_file, vector_size=10, min_count=0, seed=42,
+                                          hs=0, negative=5)
             model_neg.save(tmpf)
             model_neg = word2vec.Word2Vec.load(tmpf)
             self.assertTrue(len(model_neg.wv.vocab), 12)
@@ -260,19 +263,19 @@ def onlineSanity(self, model, trained_model=False):
 
     def test_sg_hs_online(self):
         """Test skipgram w/ hierarchical softmax"""
-        model = word2vec.Word2Vec(sg=1, window=5, hs=1, negative=0, min_count=3, iter=10, seed=42, workers=2)
+        model = word2vec.Word2Vec(sg=1, window=5, hs=1, negative=0, min_count=3, epochs=10, seed=42, workers=2)
         self.onlineSanity(model)
 
     def test_sg_neg_online(self):
         """Test skipgram w/ negative sampling"""
-        model = word2vec.Word2Vec(sg=1, window=4, hs=0, negative=15, min_count=3, iter=10, seed=42, workers=2)
+        model = word2vec.Word2Vec(sg=1, window=4, hs=0, negative=15, min_count=3, epochs=10, seed=42, workers=2)
         self.onlineSanity(model)
 
     def test_cbow_hs_online(self):
         """Test CBOW w/ hierarchical softmax"""
         model = word2vec.Word2Vec(
             sg=0, cbow_mean=1, alpha=0.05, window=5, hs=1, negative=0,
-            min_count=3, iter=10, seed=42, workers=2
+            min_count=3, epochs=10, seed=42, workers=2
         )
         self.onlineSanity(model)
 
@@ -280,7 +283,7 @@ def test_cbow_neg_online(self):
         """Test CBOW w/ negative sampling"""
         model = word2vec.Word2Vec(
             sg=0, cbow_mean=1, alpha=0.05, window=5, hs=0, negative=15,
-            min_count=5, iter=10, seed=42, workers=2, sample=0
+            min_count=5, epochs=10, seed=42, workers=2, sample=0
         )
         self.onlineSanity(model)
 
@@ -356,7 +359,7 @@ def testVectorsNormNotSaved(self):
         loaded_kv = keyedvectors.KeyedVectors.load(tmpf)
         self.assertTrue(loaded_kv.vectors_norm is None)
 
-    def testLoadPreKeyedVectorModel(self):
+    def obsolete_testLoadPreKeyedVectorModel(self):
         """Test loading pre-KeyedVectors word2vec model"""
 
         if sys.version_info[:2] == (3, 4):
@@ -370,13 +373,13 @@ def testLoadPreKeyedVectorModel(self):
         model_file = 'word2vec_pre_kv%s' % model_file_suffix
         model = word2vec.Word2Vec.load(datapath(model_file))
         self.assertTrue(model.wv.vectors.shape == (len(model.wv.vocab), model.vector_size))
-        self.assertTrue(model.trainables.syn1neg.shape == (len(model.wv.vocab), model.vector_size))
+        self.assertTrue(model.syn1neg.shape == (len(model.wv.vocab), model.vector_size))
 
         # Model stored in multiple files
         model_file = 'word2vec_pre_kv_sep%s' % model_file_suffix
         model = word2vec.Word2Vec.load(datapath(model_file))
         self.assertTrue(model.wv.vectors.shape == (len(model.wv.vocab), model.vector_size))
-        self.assertTrue(model.trainables.syn1neg.shape == (len(model.wv.vocab), model.vector_size))
+        self.assertTrue(model.syn1neg.shape == (len(model.wv.vocab), model.vector_size))
 
     def testLoadPreKeyedVectorModelCFormat(self):
         """Test loading pre-KeyedVectors word2vec model saved in word2vec format"""
@@ -479,6 +482,8 @@ def testPersistenceWord2VecFormatCombinationWithStandardPersistence(self):
         testvocab = get_tmpfile('gensim_word2vec.vocab')
         model.wv.save_word2vec_format(tmpf, testvocab, binary=True)
         binary_model_with_vocab_kv = keyedvectors.KeyedVectors.load_word2vec_format(tmpf, testvocab, binary=True)
+        print("BIN")
+        print(binary_model_with_vocab_kv)
         binary_model_with_vocab_kv.save(tmpf)
         self.assertRaises(AttributeError, word2vec.Word2Vec.load, tmpf)
 
@@ -524,11 +529,11 @@ def testVocab(self):
     def testTraining(self):
         """Test word2vec training."""
         # build vocabulary, don't train yet
-        model = word2vec.Word2Vec(size=2, min_count=1, hs=1, negative=0)
+        model = word2vec.Word2Vec(vector_size=2, min_count=1, hs=1, negative=0)
         model.build_vocab(sentences)
 
         self.assertTrue(model.wv.vectors.shape == (len(model.wv.vocab), 2))
-        self.assertTrue(model.trainables.syn1.shape == (len(model.wv.vocab), 2))
+        self.assertTrue(model.syn1.shape == (len(model.wv.vocab), 2))
 
         model.train(sentences, total_examples=model.corpus_count, epochs=model.epochs)
         sims = model.wv.most_similar('graph', topn=10)
@@ -541,7 +546,7 @@ def testTraining(self):
         self.assertEqual(sims, sims2)
 
         # build vocab and train in one step; must be the same as above
-        model2 = word2vec.Word2Vec(sentences, size=2, min_count=1, hs=1, negative=0)
+        model2 = word2vec.Word2Vec(sentences, vector_size=2, min_count=1, hs=1, negative=0)
         self.models_equal(model, model2)
 
     @unittest.skipIf(os.name == 'nt' and six.PY2, "CythonLineSentence is not supported on Windows + Py27")
@@ -551,11 +556,11 @@ def testTrainingFromFile(self):
         with temporary_file(get_tmpfile('gensim_word2vec.tst')) as tf:
             utils.save_as_line_sentence(sentences, tf)
 
-            model = word2vec.Word2Vec(size=2, min_count=1, hs=1, negative=0)
+            model = word2vec.Word2Vec(vector_size=2, min_count=1, hs=1, negative=0)
             model.build_vocab(corpus_file=tf)
 
             self.assertTrue(model.wv.vectors.shape == (len(model.wv.vocab), 2))
-            self.assertTrue(model.trainables.syn1.shape == (len(model.wv.vocab), 2))
+            self.assertTrue(model.syn1.shape == (len(model.wv.vocab), 2))
 
             model.train(corpus_file=tf, total_words=model.corpus_total_words, epochs=model.epochs)
             sims = model.wv.most_similar('graph', topn=10)
@@ -569,7 +574,7 @@ def testTrainingFromFile(self):
 
     def testScoring(self):
         """Test word2vec scoring."""
-        model = word2vec.Word2Vec(sentences, size=2, min_count=1, hs=1, negative=0)
+        model = word2vec.Word2Vec(sentences, vector_size=2, min_count=1, hs=1, negative=0)
 
         # just score and make sure they exist
         scores = model.score(sentences, len(sentences))
@@ -580,14 +585,14 @@ def testLocking(self):
         corpus = LeeCorpus()
         # build vocabulary, don't train yet
         for sg in range(2):  # test both cbow and sg
-            model = word2vec.Word2Vec(size=4, hs=1, negative=5, min_count=1, sg=sg, window=5)
+            model = word2vec.Word2Vec(vector_size=4, hs=1, negative=5, min_count=1, sg=sg, window=5)
             model.build_vocab(corpus)
 
             # remember two vectors
             locked0 = np.copy(model.wv.vectors[0])
             unlocked1 = np.copy(model.wv.vectors[1])
             # lock the vector in slot 0 against change
-            model.trainables.vectors_lockf[0] = 0.0
+            model.wv.vectors_lockf[0] = 0.0
 
             model.train(corpus, total_examples=model.corpus_count, epochs=model.epochs)
             self.assertFalse((unlocked1 == model.wv.vectors[1]).all())  # unlocked vector should vary
@@ -609,7 +614,7 @@ def testEvaluateWordAnalogies(self):
     def testEvaluateWordPairs(self):
         """Test Spearman and Pearson correlation coefficients give sane results on similarity datasets"""
         corpus = word2vec.LineSentence(datapath('head500.noblanks.cor.bz2'))
-        model = word2vec.Word2Vec(corpus, min_count=3, iter=10)
+        model = word2vec.Word2Vec(corpus, min_count=3, epochs=10)
         correlation = model.wv.evaluate_word_pairs(datapath('wordsim353.tsv'))
         pearson = correlation[0][0]
         spearman = correlation[1][0]
@@ -624,7 +629,7 @@ def testEvaluateWordPairsFromFile(self):
         with temporary_file(get_tmpfile('gensim_word2vec.tst')) as tf:
             utils.save_as_line_sentence(word2vec.LineSentence(datapath('head500.noblanks.cor.bz2')), tf)
 
-            model = word2vec.Word2Vec(corpus_file=tf, min_count=3, iter=10)
+            model = word2vec.Word2Vec(corpus_file=tf, min_count=3, epochs=10)
             correlation = model.wv.evaluate_word_pairs(datapath('wordsim353.tsv'))
             pearson = correlation[0][0]
             spearman = correlation[1][0]
@@ -658,29 +663,29 @@ def model_sanity(self, model, train=True, with_corpus_file=False):
 
     def test_sg_hs(self):
         """Test skipgram w/ hierarchical softmax"""
-        model = word2vec.Word2Vec(sg=1, window=4, hs=1, negative=0, min_count=5, iter=10, workers=2)
+        model = word2vec.Word2Vec(sg=1, window=4, hs=1, negative=0, min_count=5, epochs=10, workers=2)
         self.model_sanity(model)
 
     @unittest.skipIf(os.name == 'nt' and six.PY2, "CythonLineSentence is not supported on Windows + Py27")
     def test_sg_hs_fromfile(self):
-        model = word2vec.Word2Vec(sg=1, window=4, hs=1, negative=0, min_count=5, iter=10, workers=2)
+        model = word2vec.Word2Vec(sg=1, window=4, hs=1, negative=0, min_count=5, epochs=10, workers=2)
         self.model_sanity(model, with_corpus_file=True)
 
     def test_sg_neg(self):
         """Test skipgram w/ negative sampling"""
-        model = word2vec.Word2Vec(sg=1, window=4, hs=0, negative=15, min_count=5, iter=10, workers=2)
+        model = word2vec.Word2Vec(sg=1, window=4, hs=0, negative=15, min_count=5, epochs=10, workers=2)
         self.model_sanity(model)
 
     @unittest.skipIf(os.name == 'nt' and six.PY2, "CythonLineSentence is not supported on Windows + Py27")
     def test_sg_neg_fromfile(self):
-        model = word2vec.Word2Vec(sg=1, window=4, hs=0, negative=15, min_count=5, iter=10, workers=2)
+        model = word2vec.Word2Vec(sg=1, window=4, hs=0, negative=15, min_count=5, epochs=10, workers=2)
         self.model_sanity(model, with_corpus_file=True)
 
     def test_cbow_hs(self):
         """Test CBOW w/ hierarchical softmax"""
         model = word2vec.Word2Vec(
             sg=0, cbow_mean=1, alpha=0.05, window=8, hs=1, negative=0,
-            min_count=5, iter=10, workers=2, batch_words=1000
+            min_count=5, epochs=10, workers=2, batch_words=1000
         )
         self.model_sanity(model)
 
@@ -688,7 +693,7 @@ def test_cbow_hs(self):
     def test_cbow_hs_fromfile(self):
         model = word2vec.Word2Vec(
             sg=0, cbow_mean=1, alpha=0.05, window=8, hs=1, negative=0,
-            min_count=5, iter=10, workers=2, batch_words=1000
+            min_count=5, epochs=10, workers=2, batch_words=1000
         )
         self.model_sanity(model, with_corpus_file=True)
 
@@ -696,7 +701,7 @@ def test_cbow_neg(self):
         """Test CBOW w/ negative sampling"""
         model = word2vec.Word2Vec(
             sg=0, cbow_mean=1, alpha=0.05, window=5, hs=0, negative=15,
-            min_count=5, iter=10, workers=2, sample=0
+            min_count=5, epochs=10, workers=2, sample=0
         )
         self.model_sanity(model)
 
@@ -704,12 +709,12 @@ def test_cbow_neg(self):
     def test_cbow_neg_fromfile(self):
         model = word2vec.Word2Vec(
             sg=0, cbow_mean=1, alpha=0.05, window=5, hs=0, negative=15,
-            min_count=5, iter=10, workers=2, sample=0
+            min_count=5, epochs=10, workers=2, sample=0
         )
         self.model_sanity(model, with_corpus_file=True)
 
     def test_cosmul(self):
-        model = word2vec.Word2Vec(sentences, size=2, min_count=1, hs=1, negative=0)
+        model = word2vec.Word2Vec(sentences, vector_size=2, min_count=1, hs=1, negative=0)
         sims = model.wv.most_similar_cosmul('graph', topn=10)
         # self.assertTrue(sims[0][0] == 'trees', sims)  # most similar
 
@@ -723,10 +728,10 @@ def testTrainingCbow(self):
         """Test CBOW word2vec training."""
         # to test training, make the corpus larger by repeating its sentences over and over
         # build vocabulary, don't train yet
-        model = word2vec.Word2Vec(size=2, min_count=1, sg=0, hs=1, negative=0)
+        model = word2vec.Word2Vec(vector_size=2, min_count=1, sg=0, hs=1, negative=0)
         model.build_vocab(sentences)
         self.assertTrue(model.wv.vectors.shape == (len(model.wv.vocab), 2))
-        self.assertTrue(model.trainables.syn1.shape == (len(model.wv.vocab), 2))
+        self.assertTrue(model.syn1.shape == (len(model.wv.vocab), 2))
 
         model.train(sentences, total_examples=model.corpus_count, epochs=model.epochs)
         sims = model.wv.most_similar('graph', topn=10)
@@ -739,17 +744,17 @@ def testTrainingCbow(self):
         self.assertEqual(sims, sims2)
 
         # build vocab and train in one step; must be the same as above
-        model2 = word2vec.Word2Vec(sentences, size=2, min_count=1, sg=0, hs=1, negative=0)
+        model2 = word2vec.Word2Vec(sentences, vector_size=2, min_count=1, sg=0, hs=1, negative=0)
         self.models_equal(model, model2)
 
     def testTrainingSgNegative(self):
         """Test skip-gram (negative sampling) word2vec training."""
         # to test training, make the corpus larger by repeating its sentences over and over
         # build vocabulary, don't train yet
-        model = word2vec.Word2Vec(size=2, min_count=1, sg=1, hs=0, negative=2)
+        model = word2vec.Word2Vec(vector_size=2, min_count=1, sg=1, hs=0, negative=2)
         model.build_vocab(sentences)
         self.assertTrue(model.wv.vectors.shape == (len(model.wv.vocab), 2))
-        self.assertTrue(model.trainables.syn1neg.shape == (len(model.wv.vocab), 2))
+        self.assertTrue(model.syn1neg.shape == (len(model.wv.vocab), 2))
 
         model.train(sentences, total_examples=model.corpus_count, epochs=model.epochs)
         sims = model.wv.most_similar('graph', topn=10)
@@ -762,17 +767,17 @@ def testTrainingSgNegative(self):
         self.assertEqual(sims, sims2)
 
         # build vocab and train in one step; must be the same as above
-        model2 = word2vec.Word2Vec(sentences, size=2, min_count=1, sg=1, hs=0, negative=2)
+        model2 = word2vec.Word2Vec(sentences, vector_size=2, min_count=1, sg=1, hs=0, negative=2)
         self.models_equal(model, model2)
 
     def testTrainingCbowNegative(self):
         """Test CBOW (negative sampling) word2vec training."""
         # to test training, make the corpus larger by repeating its sentences over and over
         # build vocabulary, don't train yet
-        model = word2vec.Word2Vec(size=2, min_count=1, sg=0, hs=0, negative=2)
+        model = word2vec.Word2Vec(vector_size=2, min_count=1, sg=0, hs=0, negative=2)
         model.build_vocab(sentences)
         self.assertTrue(model.wv.vectors.shape == (len(model.wv.vocab), 2))
-        self.assertTrue(model.trainables.syn1neg.shape == (len(model.wv.vocab), 2))
+        self.assertTrue(model.syn1neg.shape == (len(model.wv.vocab), 2))
 
         model.train(sentences, total_examples=model.corpus_count, epochs=model.epochs)
         sims = model.wv.most_similar('graph', topn=10)
@@ -785,13 +790,13 @@ def testTrainingCbowNegative(self):
         self.assertEqual(sims, sims2)
 
         # build vocab and train in one step; must be the same as above
-        model2 = word2vec.Word2Vec(sentences, size=2, min_count=1, sg=0, hs=0, negative=2)
+        model2 = word2vec.Word2Vec(sentences, vector_size=2, min_count=1, sg=0, hs=0, negative=2)
         self.models_equal(model, model2)
 
     def testSimilarities(self):
         """Test similarity and n_similarity methods."""
         # The model is trained using CBOW
-        model = word2vec.Word2Vec(size=2, min_count=1, sg=0, hs=0, negative=2)
+        model = word2vec.Word2Vec(vector_size=2, min_count=1, sg=0, hs=0, negative=2)
         model.build_vocab(sentences)
         model.train(sentences, total_examples=model.corpus_count, epochs=model.epochs)
 
@@ -803,7 +808,7 @@ def testSimilarities(self):
 
     def testSimilarBy(self):
         """Test word2vec similar_by_word and similar_by_vector."""
-        model = word2vec.Word2Vec(sentences, size=2, min_count=1, hs=1, negative=0)
+        model = word2vec.Word2Vec(sentences, vector_size=2, min_count=1, hs=1, negative=0)
         wordsims = model.wv.similar_by_word('graph', topn=10)
         wordsims2 = model.wv.most_similar(positive='graph', topn=10)
         vectorsims = model.wv.similar_by_vector(model.wv['graph'], topn=10)
@@ -833,9 +838,9 @@ def models_equal(self, model, model2):
         self.assertEqual(len(model.wv.vocab), len(model2.wv.vocab))
         self.assertTrue(np.allclose(model.wv.vectors, model2.wv.vectors))
         if model.hs:
-            self.assertTrue(np.allclose(model.trainables.syn1, model2.trainables.syn1))
+            self.assertTrue(np.allclose(model.syn1, model2.syn1))
         if model.negative:
-            self.assertTrue(np.allclose(model.trainables.syn1neg, model2.trainables.syn1neg))
+            self.assertTrue(np.allclose(model.syn1neg, model2.syn1neg))
         most_common_word = max(model.wv.vocab.items(), key=lambda item: item[1].count)[0]
         self.assertTrue(np.allclose(model.wv[most_common_word], model2.wv[most_common_word]))
 
@@ -871,9 +876,9 @@ def testLoadOldModel(self):
         self.assertTrue(model.wv.vectors.shape == (12, 100))
         self.assertTrue(len(model.wv.vocab) == 12)
         self.assertTrue(len(model.wv.index2word) == 12)
-        self.assertTrue(model.trainables.syn1neg.shape == (len(model.wv.vocab), model.wv.vector_size))
-        self.assertTrue(model.trainables.vectors_lockf.shape == (12,))
-        self.assertTrue(model.vocabulary.cum_table.shape == (12,))
+        self.assertTrue(model.syn1neg.shape == (len(model.wv.vocab), model.wv.vector_size))
+        self.assertTrue(model.wv.vectors_lockf.shape == (12,))
+        self.assertTrue(model.cum_table.shape == (12,))
 
         self.onlineSanity(model, trained_model=True)
 
@@ -886,13 +891,13 @@ def testLoadOldModelSeparates(self):
         self.assertTrue(model.wv.vectors.shape == (12, 100))
         self.assertTrue(len(model.wv.vocab) == 12)
         self.assertTrue(len(model.wv.index2word) == 12)
-        self.assertTrue(model.trainables.syn1neg.shape == (len(model.wv.vocab), model.wv.vector_size))
-        self.assertTrue(model.trainables.vectors_lockf.shape == (12,))
-        self.assertTrue(model.vocabulary.cum_table.shape == (12,))
+        self.assertTrue(model.syn1neg.shape == (len(model.wv.vocab), model.wv.vector_size))
+        self.assertTrue(model.wv.vectors_lockf.shape == (12,))
+        self.assertTrue(model.cum_table.shape == (12,))
 
         self.onlineSanity(model, trained_model=True)
 
-    def test_load_old_models_pre_1_0(self):
+    def obsolete_test_load_old_models_pre_1_0(self):
         """Test loading pre-1.0 models"""
         # load really old model
         model_file = 'w2v-lee-v0.12.0'
@@ -934,7 +939,7 @@ def test_load_old_models_3_x(self):
         model_file = 'word2vec_3.3'
         model = word2vec.Word2Vec.load(datapath(model_file))
         self.assertEqual(model.max_final_vocab, None)
-        self.assertEqual(model.vocabulary.max_final_vocab, None)
+        self.assertEqual(model.max_final_vocab, None)
 
         old_versions = [
             '3.0.0', '3.1.0', '3.2.0', '3.3.0', '3.4.0'
@@ -949,7 +954,14 @@ def _check_old_version(self, old_version):
         model = word2vec.Word2Vec.load(saved_models_dir.format(old_version))
         self.assertIsNone(model.corpus_total_words)
         self.assertTrue(len(model.wv.vocab) == 3)
-        self.assertTrue(model.wv.vectors.shape == (3, 4))
+        try:
+            self.assertTrue(model.wv.vectors.shape == (3, 4))
+        except AttributeError as ae:
+            print("WV")
+            print(model.wv)
+            print(dir(model.wv))
+            print(model.wv.syn0)
+            raise ae
         # check if similarity search and online training works.
         self.assertTrue(len(model.wv.most_similar('sentence')) == 2)
         model.build_vocab(list_corpus, update=True)
@@ -989,7 +1001,7 @@ def testTrainWarning(self, l):
         self.assertTrue(warning in str(l))
 
     def test_train_with_explicit_param(self):
-        model = word2vec.Word2Vec(size=2, min_count=1, hs=1, negative=0)
+        model = word2vec.Word2Vec(vector_size=2, min_count=1, hs=1, negative=0)
         model.build_vocab(sentences)
         with self.assertRaises(ValueError):
             model.train(sentences, total_examples=model.corpus_count)