diff --git a/gensim/downloader.py b/gensim/downloader.py
index 6fb362ccad..8a4395440e 100644
--- a/gensim/downloader.py
+++ b/gensim/downloader.py
@@ -50,6 +50,7 @@
 By default, this subdirectory is ~/gensim-data.
 
 """
+
 from __future__ import absolute_import
 import argparse
 import os
diff --git a/gensim/models/_fasttext_bin.py b/gensim/models/_fasttext_bin.py
index 5eeb4ca71a..77549b1351 100644
--- a/gensim/models/_fasttext_bin.py
+++ b/gensim/models/_fasttext_bin.py
@@ -549,7 +549,7 @@ def _dict_save(fout, model, encoding):
     # prunedidx_size_=-1, -1 value denotes no prunning index (prunning is only supported in supervised mode)
     fout.write(np.int64(-1))
 
-    for word in model.wv.index2word:
+    for word in model.wv.index_to_key:
         word_count = model.wv.get_vecattr(word, 'count')
         fout.write(word.encode(encoding))
         fout.write(_END_OF_WORD_MARKER)
diff --git a/gensim/models/coherencemodel.py b/gensim/models/coherencemodel.py
index 9633a2e62f..70fea79804 100644
--- a/gensim/models/coherencemodel.py
+++ b/gensim/models/coherencemodel.py
@@ -25,6 +25,7 @@
     Internal functions for pipelines.
 
 """
+
 import logging
 import multiprocessing as mp
 from collections import namedtuple
@@ -33,9 +34,11 @@
 
 from gensim import interfaces, matutils
 from gensim import utils
-from gensim.topic_coherence import (segmentation, probability_estimation,
-                                    direct_confirmation_measure, indirect_confirmation_measure,
-                                    aggregation)
+from gensim.topic_coherence import (
+    segmentation, probability_estimation,
+    direct_confirmation_measure, indirect_confirmation_measure,
+    aggregation,
+)
 from gensim.topic_coherence.probability_estimation import unique_ids_from_segments
 
 logger = logging.getLogger(__name__)
diff --git a/gensim/models/doc2vec.py b/gensim/models/doc2vec.py
index f7d43d8e70..1fa7335da6 100644
--- a/gensim/models/doc2vec.py
+++ b/gensim/models/doc2vec.py
@@ -799,7 +799,7 @@ def load(cls, *args, **kwargs):
         except AttributeError as ae:
             logger.error(
                 "Model load error. Was model saved using code from an older Gensim Version? "
-                "Try loading older model using gensim-3.8.1, then re-saving, to restore "
+                "Try loading older model using gensim-3.8.3, then re-saving, to restore "
                 "compatibility with current code.")
             raise ae
 
diff --git a/gensim/models/fasttext.py b/gensim/models/fasttext.py
index 42a69b8bc5..3476f7c5dc 100644
--- a/gensim/models/fasttext.py
+++ b/gensim/models/fasttext.py
@@ -827,7 +827,6 @@ def save(self, *args, **kwargs):
             Load :class:`~gensim.models.fasttext.FastText` model.
 
         """
-        kwargs['ignore'] = kwargs.get('ignore', []) + ['buckets_word', ]
         super(FastText, self).save(*args, **kwargs)
 
     @classmethod
@@ -850,25 +849,15 @@ def load(cls, *args, **kwargs):
             Save :class:`~gensim.models.fasttext.FastText` model.
 
         """
-        model = super(FastText, cls).load(*args, rethrow=True, **kwargs)
-
-        if not hasattr(model.wv, 'vectors_vocab_lockf') and hasattr(model.wv, 'vectors_vocab'):
-            # TODO: try trainables-location
-            model.wv.vectors_vocab_lockf = ones(1, dtype=REAL)
-        if not hasattr(model, 'vectors_ngrams_lockf') and hasattr(model.wv, 'vectors_ngrams'):
-            # TODO: try trainables-location
-            model.wv.vectors_ngrams_lockf = ones(1, dtype=REAL)
-        # fixup mistakenly overdimensioned gensim-3.x lockf arrays
-        if len(model.wv.vectors_vocab_lockf.shape) > 1:
-            model.wv.vectors_vocab_lockf = ones(1, dtype=REAL)
-        if len(model.wv.vectors_ngrams_lockf.shape) > 1:
-            model.wv.vectors_ngrams_lockf = ones(1, dtype=REAL)
-        if hasattr(model, 'bucket'):
-            del model.bucket  # should only exist in one place: the wv subcomponent
-        if not hasattr(model.wv, 'buckets_word') or not model.wv.buckets_word:
-            model.wv.recalc_char_ngram_buckets()
+        return super(FastText, cls).load(*args, rethrow=True, **kwargs)
 
-        return model
+    def _load_specials(self, *args, **kwargs):
+        """Handle special requirements of `.load()` protocol, usually up-converting older versions."""
+        super(FastText, self)._load_specials(*args, **kwargs)
+        if hasattr(self, 'bucket'):
+            # should only exist in one place: the wv subcomponent
+            self.wv.bucket = self.bucket
+            del self.bucket
 
 
 class FastTextVocab(utils.SaveLoad):
@@ -1202,12 +1191,49 @@ def __init__(self, vector_size, min_n, max_n, bucket):
 
     @classmethod
     def load(cls, fname_or_handle, **kwargs):
-        model = super(FastTextKeyedVectors, cls).load(fname_or_handle, **kwargs)
-        if isinstance(model, FastTextKeyedVectors):
-            if not hasattr(model, 'compatible_hash') or model.compatible_hash is False:
-                raise TypeError("Pre-gensim-3.8.x Fasttext models with nonstandard hashing are no longer compatible."
-                                "Loading into gensim-3.8.3 & re-saving may create a compatible model.")
-        return model
+        """Load a previously saved `FastTextKeyedVectors` model.
+
+        Parameters
+        ----------
+        fname : str
+            Path to the saved file.
+
+        Returns
+        -------
+        :class:`~gensim.models.fasttext.FastTextKeyedVectors`
+            Loaded model.
+
+        See Also
+        --------
+        :meth:`~gensim.models.fasttext.FastTextKeyedVectors.save`
+            Save :class:`~gensim.models.fasttext.FastTextKeyedVectors` model.
+
+        """
+        return super(FastTextKeyedVectors, cls).load(fname_or_handle, **kwargs)
+
+    def _load_specials(self, *args, **kwargs):
+        """Handle special requirements of `.load()` protocol, usually up-converting older versions."""
+        super(FastTextKeyedVectors, self)._load_specials(*args, **kwargs)
+        if not isinstance(self, FastTextKeyedVectors):
+            raise TypeError("Loaded object of type %s, not expected FastTextKeyedVectors" % type(self))
+        if not hasattr(self, 'compatible_hash') or self.compatible_hash is False:
+            raise TypeError(
+                "Pre-gensim-3.8.x fastText models with nonstandard hashing are no longer compatible. "
+                "Loading your old model into gensim-3.8.3 & re-saving may create a model compatible with gensim 4.x."
+            )
+        if not hasattr(self, 'vectors_vocab_lockf') and hasattr(self, 'vectors_vocab'):
+            self.vectors_vocab_lockf = ones(1, dtype=REAL)
+        if not hasattr(self, 'vectors_ngrams_lockf') and hasattr(self, 'vectors_ngrams'):
+            self.vectors_ngrams_lockf = ones(1, dtype=REAL)
+        # fixup mistakenly overdimensioned gensim-3.x lockf arrays
+        if len(self.vectors_vocab_lockf.shape) > 1:
+            self.vectors_vocab_lockf = ones(1, dtype=REAL)
+        if len(self.vectors_ngrams_lockf.shape) > 1:
+            self.vectors_ngrams_lockf = ones(1, dtype=REAL)
+        if not hasattr(self, 'buckets_word') or not self.buckets_word:
+            self.recalc_char_ngram_buckets()
+        if not hasattr(self, 'vectors') or self.vectors is None:
+            self.adjust_vectors()  # recompose full-word vectors
 
     def __contains__(self, word):
         """Check if `word` or any character ngrams in `word` are present in the vocabulary.
@@ -1255,14 +1281,15 @@ def save(self, *args, **kwargs):
             Load object.
 
         """
-        # don't bother storing the cached normalized vectors
-        ignore_attrs = [
-            'buckets_word',
-            'hash2index',
-        ]
-        kwargs['ignore'] = kwargs.get('ignore', ignore_attrs)
         super(FastTextKeyedVectors, self).save(*args, **kwargs)
 
+    def _save_specials(self, fname, separately, sep_limit, ignore, pickle_protocol, compress, subname):
+        """Arrange any special handling for the gensim.utils.SaveLoad protocol"""
+        # don't save properties that are merely calculated from others
+        ignore = set(ignore).union(['buckets_word', 'vectors', ])
+        return super(FastTextKeyedVectors, self)._save_specials(
+            fname, separately, sep_limit, ignore, pickle_protocol, compress, subname)
+
     def get_vector(self, word, norm=False):
         """Get `word` representations in vector space, as a 1D numpy array.
 
diff --git a/gensim/models/keyedvectors.py b/gensim/models/keyedvectors.py
index 0846dcb78a..5d5f01e84c 100644
--- a/gensim/models/keyedvectors.py
+++ b/gensim/models/keyedvectors.py
@@ -101,12 +101,16 @@
     >>>
     >>> word_vectors = api.load("glove-wiki-gigaword-100")  # load pre-trained word-vectors from gensim-data
     >>>
+    >>> # Check the "most similar words", using the default "cosine similarity" measure.
     >>> result = word_vectors.most_similar(positive=['woman', 'king'], negative=['man'])
-    >>> print("{}: {:.4f}".format(*result[0]))
+    >>> most_similar_key, similarity = result[0]  # look at the first match
+    >>> print(f"{most_similar_key}: {similarity:.4f}")
     queen: 0.7699
     >>>
+    >>> # Use a different similarity measure: "cosmul".
     >>> result = word_vectors.most_similar_cosmul(positive=['woman', 'king'], negative=['man'])
-    >>> print("{}: {:.4f}".format(*result[0]))
+    >>> most_similar_key, similarity = result[0]  # look at the first match
+    >>> print(f"{most_similar_key}: {similarity:.4f}")
     queen: 0.8965
     >>>
     >>> print(word_vectors.doesnt_match("breakfast cereal dinner lunch".split()))
@@ -117,22 +121,23 @@
     True
     >>>
     >>> result = word_vectors.similar_by_word("cat")
-    >>> print("{}: {:.4f}".format(*result[0]))
+    >>> most_similar_key, similarity = result[0]  # look at the first match
+    >>> print(f"{most_similar_key}: {similarity:.4f}")
     dog: 0.8798
     >>>
     >>> sentence_obama = 'Obama speaks to the media in Illinois'.lower().split()
     >>> sentence_president = 'The president greets the press in Chicago'.lower().split()
     >>>
     >>> similarity = word_vectors.wmdistance(sentence_obama, sentence_president)
-    >>> print("{:.4f}".format(similarity))
+    >>> print(f"{similarity:.4f}")
     3.4893
     >>>
     >>> distance = word_vectors.distance("media", "media")
-    >>> print("{:.1f}".format(distance))
+    >>> print(f"{distance:.1f}")
     0.0
     >>>
-    >>> sim = word_vectors.n_similarity(['sushi', 'shop'], ['japanese', 'restaurant'])
-    >>> print("{:.4f}".format(sim))
+    >>> similarity = word_vectors.n_similarity(['sushi', 'shop'], ['japanese', 'restaurant'])
+    >>> print(f"{similarity:.4f}")
     0.7067
     >>>
     >>> vector = word_vectors['computer']  # numpy vector of a word
@@ -219,7 +224,7 @@ def _load_specials(self, *args, **kwargs):
             self._upconvert_old_d2vkv()
         # fixup rename/consolidation into index_to_key of older index2word, index2entity
         if not hasattr(self, 'index_to_key'):
-            self.index_to_key = self.__dict__.pop('index2word', self.__dict__.pop('index2word', None))
+            self.index_to_key = self.__dict__.pop('index2word', self.__dict__.pop('index2entity', None))
         # fixup rename into vectors of older syn0
         if not hasattr(self, 'vectors'):
             self.vectors = self.__dict__.pop('syn0', None)
@@ -267,22 +272,54 @@ def allocate_vecattrs(self, attrs=None, types=None):
                 continue
             prev_expando = self.expandos[attr]
             if not np.issubdtype(t, prev_expando.dtype):
-                raise TypeError("can't allocate {0} for existing {1}".format(t, prev_expando.dtype))
+                raise TypeError(f"can't allocate {t} for existing {prev_expando.dtype}")
             if len(prev_expando) == target_size:
                 continue  # no resizing necessary
             prev_count = len(prev_expando)
             self.expandos[attr] = np.zeros(target_size, dtype=prev_expando.dtype)
-            self.expandos[attr][0:min(prev_count, target_size), ] = \
-                prev_expando[0:min(prev_count, target_size), ]
+            self.expandos[attr][: min(prev_count, target_size), ] = prev_expando[: min(prev_count, target_size), ]
 
     def set_vecattr(self, key, attr, val):
-        """Set attribute associated with given key to value. TODO: param docs"""
+        """Set attribute associated with the given key to value.
+
+        Parameters
+        ----------
+
+        key : str
+            Store the attribute for this vector key.
+        attr : str
+            Name of the additional attribute to store for the given key.
+        val : object
+            Value of the additional attribute to store for the given key.
+
+        Returns
+        -------
+
+        None
+
+        """
         self.allocate_vecattrs(attrs=[attr], types=[type(val)])
         index = self.get_index(key)
         self.expandos[attr][index] = val
 
     def get_vecattr(self, key, attr):
-        """Get attribute value associate with given key. TODO: param docs"""
+        """Get attribute value associated with given key.
+
+        Parameters
+        ----------
+
+        key : str
+            Vector key for which to fetch the attribute value.
+        attr : str
+            Name of the additional attribute to fetch for the given key.
+
+        Returns
+        -------
+
+        object
+            Value of the additional attribute fetched for the given key.
+
+        """
         index = self.get_index(key)
         return self.expandos[attr][index]
 
@@ -351,13 +388,14 @@ def get_index(self, key, default=None):
         elif default is not None:
             return default
         else:
-            raise KeyError("Key '%s' not present" % key)
+            raise KeyError(f"Key '{key}' not present")
 
     def get_vector(self, key, norm=False):
         """Get the key's vector, as a 1D numpy array.
 
         Parameters
         ----------
+
         key : str
             Key for vector to return.
         norm : bool, optional
@@ -365,11 +403,13 @@ def get_vector(self, key, norm=False):
 
         Returns
         -------
+
         numpy.ndarray
             Vector for the specified key.
 
         Raises
         ------
+
         KeyError
             If the given key doesn't exist.
 
@@ -386,18 +426,30 @@ def get_vector(self, key, norm=False):
 
     @deprecated("Use get_vector instead")
     def word_vec(self, *args, **kwargs):
-        """Compatibility alias for get_vector(); must exist so subclass calls reach subclass get_vector()"""
+        """Compatibility alias for get_vector(); must exist so subclass calls reach subclass get_vector()."""
         return self.get_vector(*args, **kwargs)
 
-    def add_one(self, key, vector):
+    def add_vector(self, key, vector):
         """Add one new vector at the given key, into existing slot if available.
 
         Warning: using this repeatedly is inefficient, requiring a full reallocation & copy,
-        if this instance hasn't been preallocated to be ready fro such incremental additions.
+        if this instance hasn't been preallocated to be ready for such incremental additions.
 
-        returns: actual index used TODO: other param docs
-        """
+        Parameters
+        ----------
+
+        key: str
+            Key identifier of the added vector.
+        vector: numpy.ndarray
+            1D numpy array with the vector values.
+
+        Returns
+        -------
+        int
+            Index of the newly added vector, so that ``self.vectors[result] == vector`` and
+            ``self.index_to_key[result] == key``.
 
+        """
         target_index = self.next_index
         if target_index >= len(self) or self.index_to_key[target_index] is not None:
             # must append at end by expanding existing structures
@@ -406,7 +458,7 @@ def add_one(self, key, vector):
                 "Adding single vectors to a KeyedVectors which grows by one each time can be costly. "
                 "Consider adding in batches or preallocating to the required size.",
                 UserWarning)
-            self.add([key], [vector])
+            self.add_vectors([key], [vector])
             self.allocate_vecattrs()  # grow any adjunct arrays
             self.next_index = target_index + 1
         else:
@@ -417,14 +469,14 @@ def add_one(self, key, vector):
             self.next_index += 1
         return target_index
 
-    def add(self, keys, weights, extras=None, replace=False):
+    def add_vectors(self, keys, weights, extras=None, replace=False):
         """Append keys and their vectors in a manual way.
         If some key is already in the vocabulary, the old vector is kept unless `replace` flag is True.
 
         Parameters
         ----------
         keys : list of (str or int)
-            keys specified by string or int ids.
+            Keys specified by string or int ids.
         weights: list of numpy.ndarray or numpy.ndarray
             List of 1D np.array vectors or a 2D np.array of vectors.
         replace: bool, optional
@@ -484,7 +536,7 @@ def __setitem__(self, keys, weights):
             keys = [keys]
             weights = weights.reshape(1, -1)
 
-        self.add(keys, weights, replace=True)
+        self.add_vectors(keys, weights, replace=True)
 
     def has_index_for(self, key):
         """Can this model return a single index for this key?
@@ -522,19 +574,29 @@ def rank(self, key1, key2):
 
     @property
     def vectors_norm(self):
-        raise ValueError(
-            "The vectors_norm attribute became a get_normed_vectors() method in Gensim 4.0.0. "
+        raise AttributeError(
+            "The `.vectors_norm` attribute is computed dynamically since Gensim 4.0.0. "
+            "Use `.get_normed_vectors()` instead.\n"
             "See https://github.com/RaRe-Technologies/gensim/wiki/Migrating-from-Gensim-3.x-to-4#init_sims"
         )
 
     @vectors_norm.setter
     def vectors_norm(self, _):
-        pass  # no-op; shouldn't be set
+        pass   # ignored but must remain for backward serialization compatibility
 
     def get_normed_vectors(self):
-        # TODO: what's the way for users to get from a matrix index (integer) to the
-        # corresponding key (string)?
-        # Shouldn't we return this as a mapping (dict), or even a new KeyedVectors instance?
+        """Get all embedding vectors normalized to unit L2 length (euclidean), as a 2D numpy array.
+
+        To see which key corresponds to which vector = which array row, refer
+        to the :attr:`~gensim.models.keyedvectors.KeyedVectors.index_to_key` attribute.
+
+        Returns
+        -------
+        numpy.ndarray:
+            2D numpy array of shape ``(number_of_keys, embedding dimensionality)``, L2-normalized
+            along the rows (key vectors).
+
+        """
         self.fill_norms()
         return self.vectors / self.norms[..., np.newaxis]
 
@@ -543,7 +605,7 @@ def fill_norms(self, force=False):
         Ensure per-vector norms are available.
 
         Any code which modifies vectors should ensure the accompanying norms are
-        either recalculated or 'None', to trigger a full recalculation later.
+        either recalculated or 'None', to trigger a full recalculation later on-request.
 
         """
         if self.norms is None or force:
@@ -551,27 +613,33 @@ def fill_norms(self, force=False):
 
     @property
     def index2entity(self):
-        return self.index_to_key
+        raise AttributeError(
+            "The index2entity attribute has been replaced by index_to_key since Gensim 4.0.0.\n"
+            "See https://github.com/RaRe-Technologies/gensim/wiki/Migrating-from-Gensim-3.x-to-4#init_sims"
+        )
 
     @index2entity.setter
     def index2entity(self, value):
-        self.index_to_key = value
+        self.index_to_key = value  # must remain for backward serialization compatibility
 
     @property
     def index2word(self):
-        return self.index_to_key
+        raise AttributeError(
+            "The index2word attribute has been replaced by index_to_key since Gensim 4.0.0.\n"
+            "See https://github.com/RaRe-Technologies/gensim/wiki/Migrating-from-Gensim-3.x-to-4#init_sims"
+        )
 
     @index2word.setter
     def index2word(self, value):
-        self.index_to_key = value
+        self.index_to_key = value  # must remain for backward serialization compatibility
 
     @property
     def vocab(self):
-        raise NotImplementedError(
-            "The .vocab dict of 'Vocab' propery objects, one per key, has been removed.\n"
-            "See the KeyedVectors .key_to_index dict, .index_to_key list, and methods\n"
-            ".get_vecattr(key, attr)/.set_vecattr(key, attr, new_val) for replacement\n"
-            "functionality."
+        raise AttributeError(
+            "The vocab attribute was removed from KeyedVector in Gensim 4.0.0.\n"
+            "Use KeyedVector's .key_to_index dict, .index_to_key list, and methods "
+            ".get_vecattr(key, attr) and .set_vecattr(key, attr, new_val) instead.\n"
+            "See https://github.com/RaRe-Technologies/gensim/wiki/Migrating-from-Gensim-3.x-to-4#init_sims"
         )
 
     @vocab.setter
@@ -583,7 +651,7 @@ def sort_by_descending_frequency(self):
         if not len(self):
             return  # noop if empty
         count_sorted_indexes = np.argsort(self.expandos['count'])[::-1]
-        self.index_to_key = list(np.array(self.index_to_key)[count_sorted_indexes])
+        self.index_to_key = [self.index_to_key[idx] for idx in count_sorted_indexes]
         self.allocate_vecattrs()
         for k in self.expandos:
             # Use numpy's "fancy indexing" to permutate the entire array in one step.
@@ -591,11 +659,10 @@ def sort_by_descending_frequency(self):
         if len(self.vectors):
             logger.warning("sorting after vectors have been allocated is expensive & error-prone")
             self.vectors = self.vectors[count_sorted_indexes]
-        for i, word in enumerate(self.index_to_key):
-            self.key_to_index[word] = i
+        self.key_to_index = {word: i for i, word in enumerate(self.index_to_key)}
 
     def save(self, *args, **kwargs):
-        """Save KeyedVectors.
+        """Save KeyedVectors to a file.
 
         Parameters
         ----------
@@ -605,13 +672,15 @@ def save(self, *args, **kwargs):
         See Also
         --------
         :meth:`~gensim.models.keyedvectors.KeyedVectors.load`
-            Load saved model.
+            Load a previously saved model.
 
         """
         super(KeyedVectors, self).save(*args, **kwargs)
 
-    def most_similar(self, positive=None, negative=None, topn=10, clip_start=0, clip_end=None,
-                     restrict_vocab=None, indexer=None):
+    def most_similar(
+            self, positive=None, negative=None, topn=10, clip_start=0, clip_end=None,
+            restrict_vocab=None, indexer=None,
+        ):
         """Find the top-N most similar keys.
         Positive keys contribute positively towards the similarity, negative keys negatively.
 
@@ -698,8 +767,10 @@ def most_similar(self, positive=None, negative=None, topn=10, clip_start=0, clip
             return dists
         best = matutils.argsort(dists, topn=topn + len(all_keys), reverse=True)
         # ignore (don't return) keys from the input
-        result = [(self.index_to_key[sim + clip_start], float(dists[sim]))
-                  for sim in best if (sim + clip_start) not in all_keys]
+        result = [
+            (self.index_to_key[sim + clip_start], float(dists[sim]))
+            for sim in best if (sim + clip_start) not in all_keys
+        ]
         return result[:topn]
 
     def similar_by_word(self, word, topn=10, restrict_vocab=None):
@@ -1647,11 +1718,11 @@ def _add_word_to_kv(kv, counts, word, weights, vocab_size):
     if kv.has_index_for(word):
         logger.warning("duplicate word '%s' in word2vec file, ignoring all but first", word)
         return
-    word_id = kv.add_one(word, weights)
+    word_id = kv.add_vector(word, weights)
 
     if counts is None:
-        # most common scenario: no vocab file given. just make up some bogus counts, in descending order
-        # FIXME(someday): make this faking optional, include more realistic (Zipf-based) fake numbers
+        # Most common scenario: no vocab file given. Just make up some bogus counts, in descending order.
+        # TODO (someday): make this faking optional, include more realistic (Zipf-based) fake numbers.
         word_count = vocab_size - word_id
     elif word in counts:
         # use count from the vocab file
@@ -1797,14 +1868,16 @@ def _load_word2vec_format(cls, fname, fvocab=None, binary=False, encoding='utf8'
         kv = cls(vector_size, vocab_size, dtype=datatype)
 
         if binary:
-            _word2vec_read_binary(fin, kv, counts,
-                vocab_size, vector_size, datatype, unicode_errors, binary_chunk_size)
+            _word2vec_read_binary(
+                fin, kv, counts,
+                vocab_size, vector_size, datatype, unicode_errors, binary_chunk_size,
+            )
         else:
             _word2vec_read_text(fin, kv, counts, vocab_size, vector_size, datatype, unicode_errors, encoding)
     if kv.vectors.shape[0] != len(kv):
         logger.info(
             "duplicate words detected, shrinking matrix size from %i to %i",
-            kv.vectors.shape[0], len(kv)
+            kv.vectors.shape[0], len(kv),
         )
         kv.vectors = ascontiguousarray(kv.vectors[: len(kv)])
     assert (len(kv), vector_size) == kv.vectors.shape
@@ -1814,15 +1887,15 @@ def _load_word2vec_format(cls, fname, fvocab=None, binary=False, encoding='utf8'
 
 
 def load_word2vec_format(*args, **kwargs):
-    """Alias for `KeyedVectors.load_word2vec_format(...)`"""
+    """Alias for :meth:`~gensim.models.keyedvectors.KeyedVectors.load_word2vec_format`."""
     return KeyedVectors.load_word2vec_format(*args, **kwargs)
 
 
 def pseudorandom_weak_vector(size, seed_string=None, hashfxn=hash):
-    """Get a 'random' vector (but deterministically derived from seed_string if supplied).
+    """Get a random vector, derived deterministically from `seed_string` if supplied.
+
+    Useful for initializing KeyedVectors that will be the starting projection/input layers of _2Vec models.
 
-    Useful for initializing KeyedVectors that will be the starting
-    projection/input layers of _2Vec models.
     """
     if seed_string:
         once = np.random.Generator(np.random.SFC64(hashfxn(seed_string) & 0xffffffff))
diff --git a/gensim/models/phrases.py b/gensim/models/phrases.py
index d7001830ee..9460619db8 100644
--- a/gensim/models/phrases.py
+++ b/gensim/models/phrases.py
@@ -62,20 +62,14 @@
 import os
 import logging
 from collections import defaultdict
-import functools as ft
-import itertools as it
+import functools
+import itertools
 from math import log
 import pickle
-import six
-
-from six import iteritems, string_types, PY2, next
+from inspect import getfullargspec as getargspec
 
 from gensim import utils, interfaces
 
-if PY2:
-    from inspect import getargspec
-else:
-    from inspect import getfullargspec as getargspec
 
 logger = logging.getLogger(__name__)
 
@@ -101,11 +95,11 @@ def _is_single(obj):
     temp_iter = obj_iter
     try:
         peek = next(obj_iter)
-        obj_iter = it.chain([peek], obj_iter)
+        obj_iter = itertools.chain([peek], obj_iter)
     except StopIteration:
         # An empty object is a single document
         return True, obj
-    if isinstance(peek, string_types):
+    if isinstance(peek, str):
         # It's a document, return the iterator
         return True, obj_iter
     if temp_iter is obj:
@@ -116,7 +110,7 @@ def _is_single(obj):
         return False, obj
 
 
-class SentenceAnalyzer(object):
+class SentenceAnalyzer:
     """Base util class for :class:`~gensim.models.phrases.Phrases` and :class:`~gensim.models.phrases.Phraser`."""
     def score_item(self, worda, wordb, components, scorer):
         """Get bi-gram score statistics.
@@ -194,7 +188,7 @@ def analyze_sentence(self, sentence, threshold, common_terms, scorer):
                     in_between = []
                 else:
                     # release words individually
-                    for w in it.chain([last_uncommon], in_between):
+                    for w in itertools.chain([last_uncommon], in_between):
                         yield (w, None)
                     in_between = []
                     last_uncommon = word
@@ -242,7 +236,7 @@ def load(cls, *args, **kwargs):
             model.scoring = original_scorer
         # if there is a scoring parameter, and it's a text value, load the proper scoring function
         if hasattr(model, 'scoring'):
-            if isinstance(model.scoring, six.string_types):
+            if isinstance(model.scoring, str):
                 if model.scoring == 'default':
                     logger.info('older version of %s loaded with "default" scoring parameter', cls.__name__)
                     logger.info('setting scoring method to original_scorer pluggable scoring method for compatibility')
@@ -290,7 +284,7 @@ def _sentence2token(phrase_class, sentence):
 
     delimiter = phrase_class.delimiter
     if hasattr(phrase_class, 'vocab'):
-        scorer = ft.partial(
+        scorer = functools.partial(
             phrase_class.scoring,
             len_vocab=float(len(phrase_class.vocab)),
             min_count=float(phrase_class.min_count),
@@ -311,9 +305,11 @@ def _sentence2token(phrase_class, sentence):
 class Phrases(SentenceAnalyzer, PhrasesTransformation):
     """Detect phrases based on collocation counts."""
 
-    def __init__(self, sentences=None, min_count=5, threshold=10.0,
-                 max_vocab_size=40000000, delimiter=b'_', progress_per=10000,
-                 scoring='default', common_terms=frozenset()):
+    def __init__(
+            self, sentences=None, min_count=5, threshold=10.0,
+            max_vocab_size=40000000, delimiter=b'_', progress_per=10000,
+            scoring='default', common_terms=frozenset(),
+        ):
         """
 
         Parameters
@@ -378,16 +374,16 @@ def __init__(self, sentences=None, min_count=5, threshold=10.0,
         # intentially override the value of the scoring parameter rather than set self.scoring here,
         # to still run the check of scoring function parameters in the next code block
 
-        if isinstance(scoring, six.string_types):
+        if isinstance(scoring, str):
             if scoring == 'default':
                 scoring = original_scorer
             elif scoring == 'npmi':
                 scoring = npmi_scorer
             else:
-                raise ValueError('unknown scoring method string %s specified' % (scoring))
+                raise ValueError(f'unknown scoring method string {scoring} specified')
 
         scoring_parameters = [
-            'worda_count', 'wordb_count', 'bigram_count', 'len_vocab', 'min_count', 'corpus_word_count'
+            'worda_count', 'wordb_count', 'bigram_count', 'len_vocab', 'min_count', 'corpus_word_count',
         ]
         if callable(scoring):
             if all(parameter in getargspec(scoring)[0] for parameter in scoring_parameters):
@@ -407,13 +403,9 @@ def __init__(self, sentences=None, min_count=5, threshold=10.0,
 
         # ensure picklability of custom scorer
         try:
-            test_pickle = pickle.dumps(self.scoring)
-            load_pickle = pickle.loads(test_pickle)
+            pickle.loads(pickle.dumps(self.scoring))
         except pickle.PickleError:
-            raise pickle.PickleError('unable to pickle custom Phrases scoring function')
-        finally:
-            del(test_pickle)
-            del(load_pickle)
+            raise pickle.PickleError('Custom Phrases scoring function must be pickle-able')
 
         if sentences is not None:
             self.add_vocab(sentences)
@@ -442,7 +434,7 @@ def __str__(self):
         """Get short string representation of this phrase detector."""
         return "%s<%i vocab, min_count=%s, threshold=%s, max_vocab_size=%s>" % (
             self.__class__.__name__, len(self.vocab), self.min_count,
-            self.threshold, self.max_vocab_size
+            self.threshold, self.max_vocab_size,
         )
 
     @staticmethod
@@ -510,7 +502,7 @@ def learn_vocab(sentences, max_vocab_size, delimiter=b'_', progress_per=10000,
                 if word not in common_terms:
                     vocab[word] += 1
                     if last_uncommon is not None:
-                        components = it.chain([last_uncommon], in_between, [word])
+                        components = itertools.chain([last_uncommon], in_between, [word])
                         vocab[delimiter.join(components)] += 1
                     last_uncommon = word
                     in_between = []
@@ -569,7 +561,7 @@ def add_vocab(self, sentences):
         if len(self.vocab) > 0:
             logger.info("merging %i counts into %s", len(vocab), self)
             self.min_reduce = max(self.min_reduce, min_reduce)
-            for word, count in iteritems(vocab):
+            for word, count in vocab.items():
                 self.vocab[word] += count
             if len(self.vocab) > self.max_vocab_size:
                 utils.prune_vocab(self.vocab, self.min_reduce)
@@ -612,11 +604,11 @@ def export_phrases(self, sentences, out_delimiter=b' ', as_tuples=False):
             ...     pass
 
         """
-        analyze_sentence = ft.partial(
+        analyze_sentence = functools.partial(
             self.analyze_sentence,
             threshold=self.threshold,
             common_terms=self.common_terms,
-            scorer=ft.partial(
+            scorer=functools.partial(
                 self.scoring,
                 len_vocab=float(len(self.vocab)),
                 min_count=float(self.min_count),
@@ -780,7 +772,7 @@ def pseudocorpus(source_vocab, sep, common_terms=frozenset()):
         for i in range(1, len(unigrams)):
             if unigrams[i - 1] not in common_terms:
                 # do not join common terms
-                cterms = list(it.takewhile(lambda w: w in common_terms, unigrams[i:]))
+                cterms = list(itertools.takewhile(lambda w: w in common_terms, unigrams[i:]))
                 tail = unigrams[i + len(cterms):]
                 components = [sep.join(unigrams[:i])] + cterms
                 if tail:
diff --git a/gensim/models/translation_matrix.py b/gensim/models/translation_matrix.py
index 54b21416e3..528e3d6fa2 100644
--- a/gensim/models/translation_matrix.py
+++ b/gensim/models/translation_matrix.py
@@ -1,8 +1,8 @@
 #!/usr/bin/env python
 # encoding: utf-8
 
-"""Produce translation matrix to translate the word from one language to another language, using either
-standard nearest neighbour method or globally corrected neighbour retrieval method [1]_.
+"""Produce a translation matrix to translate words from one language to another, using either
+a standard nearest neighbour method or a globally corrected neighbour retrieval method [1]_.
 
 This method can be used to augment the existing phrase tables with more candidate translations, or
 filter out errors from the translation tables and known dictionaries [2]_. What's more, It also work
@@ -10,6 +10,7 @@
 
 Examples
 --------
+
 How to make translation between two set of word-vectors
 =======================================================
 
@@ -97,19 +98,18 @@
 """
 
 import warnings
+from collections import OrderedDict
+
 import numpy as np
 
-from collections import OrderedDict
 from gensim import utils
-from six import string_types
 
 
-class Space(object):
+class Space:
     """An auxiliary class for storing the the words space."""
 
     def __init__(self, matrix, index2word):
         """
-
         Parameters
         ----------
         matrix : iterable of numpy.ndarray
@@ -256,7 +256,7 @@ def train(self, word_pairs):
         self.translation_matrix = np.linalg.lstsq(m1, m2, -1)[0]
 
     def save(self, *args, **kwargs):
-        """Save the model to file but ignoring the `source_space` and `target_space`"""
+        """Save the model to a file. Ignores (doesn't store) the `source_space` and `target_space` attributes."""
         kwargs['ignore'] = kwargs.get('ignore', ['source_space', 'target_space'])
         super(TranslationMatrix, self).save(*args, **kwargs)
 
@@ -266,12 +266,12 @@ def apply_transmat(self, words_space):
         Parameters
         ----------
         words_space : :class:`~gensim.models.translation_matrix.Space`
-            Object that constructed for those words to be translate.
+            `Space` object constructed for the words to be translated.
 
         Returns
         -------
         :class:`~gensim.models.translation_matrix.Space`
-            Object that constructed for those mapped words.
+            `Space` object constructed for the mapped words.
 
         """
         return Space(np.dot(words_space.mat, self.translation_matrix), words_space.index2word)
@@ -301,8 +301,7 @@ def translate(self, source_words, topn=5, gc=0, sample_num=None, source_lang_vec
             Ordered dict where each item is `word`: [`translated_word_1`, `translated_word_2`, ...]
 
         """
-
-        if isinstance(source_words, string_types):
+        if isinstance(source_words, str):
             # pass only one word to translate
             source_words = [source_words]
 
@@ -329,7 +328,7 @@ def translate(self, source_words, topn=5, gc=0, sample_num=None, source_lang_vec
                     "When using the globally corrected neighbour retrieval method, "
                     "the `sample_num` parameter(i.e. the number of words sampled from source space) must be provided."
                 )
-            lexicon = set(source_lang_vec.index2word)
+            lexicon = set(source_lang_vec.index_to_key)
             addition = min(sample_num, len(lexicon) - len(source_words))
             lexicon = self.random_state.choice(list(lexicon.difference(source_words)), addition)
             source_space = Space.build(source_lang_vec, set(source_words).union(set(lexicon)))
diff --git a/gensim/models/word2vec.py b/gensim/models/word2vec.py
index 1c042ba851..806e087c56 100755
--- a/gensim/models/word2vec.py
+++ b/gensim/models/word2vec.py
@@ -8,6 +8,7 @@
 """
 Introduction
 ============
+
 This module implements the word2vec family of algorithms, using highly optimized C routines,
 data streaming and Pythonic interfaces.
 
@@ -21,17 +22,15 @@
 
 There are more ways to train word vectors in Gensim than just Word2Vec.
 See also :class:`~gensim.models.doc2vec.Doc2Vec`, :class:`~gensim.models.fasttext.FastText` and
-wrappers for :class:`~gensim.models.wrappers.VarEmbed` and :class:`~gensim.models.wrappers.WordRank`.
+wrappers for :class:`~gensim.models.wrappers.varembed.VarEmbed` and :class:`~gensim.models.wrappers.wordrank.WordRank`.
 
 The training algorithms were originally ported from the C package https://code.google.com/p/word2vec/
-and extended with additional functionality and optimizations over the years.
+and extended with additional functionality and
+`optimizations <https://rare-technologies.com/parallelizing-word2vec-in-python/>`_ over the years.
 
 For a tutorial on Gensim word2vec, with an interactive web app trained on GoogleNews,
 visit https://rare-technologies.com/word2vec-tutorial/.
 
-**Make sure you have a C compiler before installing Gensim, to use the optimized word2vec routines**
-(70x speedup compared to plain NumPy implementation, https://rare-technologies.com/parallelizing-word2vec-in-python/).
-
 Usage examples
 ==============
 
@@ -42,17 +41,17 @@
     >>> from gensim.test.utils import common_texts
     >>> from gensim.models import Word2Vec
     >>>
-    >>> model = Word2Vec(common_texts, size=100, window=5, min_count=1, workers=4)
+    >>> model = Word2Vec(sentences=common_texts, vector_size=100, window=5, min_count=1, workers=4)
     >>> model.save("word2vec.model")
 
 
-The training is streamed, so ``sentences`` can be an iterable, reading input data
-from disk on-the-fly. This lets you avoid loading the entire corpus into RAM.
-However, note that because the iterable must be re-startable, `sentences` must
-not be a generator. For an example of an appropriate iterator see
-:class:`~gensim.models.word2vec.BrownCorpus`,
-:class:`~gensim.models.word2vec.Text8Corpus` or
-:class:`~gensim.models.word2vec.LineSentence`.
+**The training is streamed, so ``sentences`` can be an iterable**, reading input data
+from the disk or network on-the-fly, without loading your entire corpus into RAM.
+
+Note the ``sentences`` iterable must be *restartable* (not just a generator), to allow the algorithm
+to stream over your dataset multiple times. For some examples of streamed iterables,
+see :class:`~gensim.models.word2vec.BrownCorpus`,
+:class:`~gensim.models.word2vec.Text8Corpus` or :class:`~gensim.models.word2vec.LineSentence`.
 
 If you save the model you can continue training it later:
 
@@ -62,26 +61,31 @@
     >>> model.train([["hello", "world"]], total_examples=1, epochs=1)
     (0, 2)
 
-The trained word vectors are stored in a :class:`~gensim.models.keyedvectors.KeyedVectors` instance in `model.wv`:
+The trained word vectors are stored in a :class:`~gensim.models.keyedvectors.KeyedVectors` instance, as `model.wv`:
 
 .. sourcecode:: pycon
 
-    >>> vector = model.wv['computer']  # numpy vector of a word
+    >>> vector = model.wv['computer']  # get numpy vector of a word
 
 The reason for separating the trained vectors into `KeyedVectors` is that if you don't
-need the full model state any more (don't need to continue training), the state can discarded,
-resulting in a much smaller and faster object that can be mmapped for lightning
+need the full model state any more (don't need to continue training), its state can discarded,
+keeping just the vectors and their keys proper.
+
+This results in a much smaller and faster object that can be mmapped for lightning
 fast loading and sharing the vectors in RAM between processes:
 
 .. sourcecode:: pycon
 
     >>> from gensim.models import KeyedVectors
     >>>
-    >>> path = get_tmpfile("wordvectors.kv")
+    >>> # Store just the words + their trained embeddings.
+    >>> word_vectors = model.wv
+    >>> word_vectors.save("word2vec.wordvectors")
+    >>>
+    >>> # Load back with memory-mapping = read-only, shared across processes.
+    >>> wv = KeyedVectors.load("word2vec.wordvectors", mmap='r')
     >>>
-    >>> model.wv.save(path)
-    >>> wv = KeyedVectors.load("model.wv", mmap='r')
-    >>> vector = wv['computer']  # numpy vector of a word
+    >>> vector = wv['computer']  # Get numpy vector of a word
 
 Gensim can also load word vectors in the "word2vec C format", as a
 :class:`~gensim.models.keyedvectors.KeyedVectors` instance:
@@ -90,16 +94,18 @@
 
     >>> from gensim.test.utils import datapath
     >>>
-    >>> wv_from_text = KeyedVectors.load_word2vec_format(datapath('word2vec_pre_kv_c'), binary=False)  # C text format
-    >>> wv_from_bin = KeyedVectors.load_word2vec_format(datapath("euclidean_vectors.bin"), binary=True)  # C bin format
+    >>> # Load a word2vec model stored in the C *text* format.
+    >>> wv_from_text = KeyedVectors.load_word2vec_format(datapath('word2vec_pre_kv_c'), binary=False)
+    >>> # Load a word2vec model stored in the C *binary* format.
+    >>> wv_from_bin = KeyedVectors.load_word2vec_format(datapath("euclidean_vectors.bin"), binary=True)
 
 It is impossible to continue training the vectors loaded from the C format because the hidden weights,
 vocabulary frequencies and the binary tree are missing. To continue training, you'll need the
 full :class:`~gensim.models.word2vec.Word2Vec` object state, as stored by :meth:`~gensim.models.word2vec.Word2Vec.save`,
 not just the :class:`~gensim.models.keyedvectors.KeyedVectors`.
 
-You can perform various NLP word tasks with a trained model. Some of them
-are already built-in - you can see it in :mod:`gensim.models.keyedvectors`.
+You can perform various NLP tasks with a trained model. Some of the operations
+are already built-in - see :mod:`gensim.models.keyedvectors`.
 
 If you're finished training a model (i.e. no more updates, only querying),
 you can switch to the :class:`~gensim.models.keyedvectors.KeyedVectors` instance:
@@ -111,18 +117,65 @@
 
 to trim unneeded model state = use much less RAM and allow fast loading and memory sharing (mmap).
 
-Note that there is a :mod:`gensim.models.phrases` module which lets you automatically
-detect phrases longer than one word. Using phrases, you can learn a word2vec model
-where "words" are actually multiword expressions, such as `new_york_times` or `financial_crisis`:
+Embeddings with multiword ngrams
+================================
+
+There is a :mod:`gensim.models.phrases` module which lets you automatically
+detect phrases longer than one word, using collocation statistics.
+Using phrases, you can learn a word2vec model where "words" are actually multiword expressions,
+such as `new_york_times` or `financial_crisis`:
 
 .. sourcecode:: pycon
 
-    >>> from gensim.test.utils import common_texts
     >>> from gensim.models import Phrases
     >>>
+    >>> # Train a bigram detector.
     >>> bigram_transformer = Phrases(common_texts)
+    >>>
+    >>> # Apply the trained MWE detector to a corpus, using the result to train a Word2vec model.
     >>> model = Word2Vec(bigram_transformer[common_texts], min_count=1)
 
+Pretrained models
+=================
+
+Gensim comes with several already pre-trained models, in the
+`Gensim-data repository <https://github.com/RaRe-Technologies/gensim-data>`_:
+
+.. sourcecode:: pycon
+
+    >>> import gensim.downloader
+    >>> # Show all available models in gensim-data
+    >>> print(list(gensim.downloader.info()['models'].keys()))
+    ['fasttext-wiki-news-subwords-300',
+     'conceptnet-numberbatch-17-06-300',
+     'word2vec-ruscorpora-300',
+     'word2vec-google-news-300',
+     'glove-wiki-gigaword-50',
+     'glove-wiki-gigaword-100',
+     'glove-wiki-gigaword-200',
+     'glove-wiki-gigaword-300',
+     'glove-twitter-25',
+     'glove-twitter-50',
+     'glove-twitter-100',
+     'glove-twitter-200',
+     '__testing_word2vec-matrix-synopsis']
+    >>>
+    >>> # Download the "glove-twitter-25" embeddings
+    >>> glove_vectors = gensim.downloader.load('glove-twitter-25')
+    >>>
+    >>> # Use the downloaded vectors as usual:
+    >>> glove_vectors.most_similar('twitter')
+    [('facebook', 0.948005199432373),
+     ('tweet', 0.9403423070907593),
+     ('fb', 0.9342358708381653),
+     ('instagram', 0.9104824066162109),
+     ('chat', 0.8964964747428894),
+     ('hashtag', 0.8885937333106995),
+     ('tweets', 0.8878158330917358),
+     ('tl', 0.8778461217880249),
+     ('link', 0.8778210878372192),
+     ('internet', 0.8753897547721863)]
+
 """
 
 from __future__ import division  # py3 "true division"
@@ -137,21 +190,15 @@
 import threading
 import itertools
 import copy
-
-from gensim.utils import keep_vocab_item, call_on_class_only, deprecated
-from gensim.models.keyedvectors import KeyedVectors, pseudorandom_weak_vector
-
-try:
-    from queue import Queue, Empty
-except ImportError:
-    from Queue import Queue, Empty
+from queue import Queue, Empty
 
 from numpy import float32 as REAL
 import numpy as np
 
-from gensim import utils, matutils  # utility fnc for pickling, common scipy operations etc
-from six import iteritems, itervalues, string_types
-from six.moves import range
+from gensim.utils import keep_vocab_item, call_on_class_only, deprecated
+from gensim.models.keyedvectors import KeyedVectors, pseudorandom_weak_vector
+from gensim import utils, matutils
+
 
 logger = logging.getLogger(__name__)
 
@@ -173,21 +220,27 @@
     # file-based word2vec is not supported
     CORPUSFILE_VERSION = -1
 
-    def train_epoch_sg(model, corpus_file, offset, _cython_vocab, _cur_epoch, _expected_examples, _expected_words,
-                       _work, _neu1, compute_loss):
+    def train_epoch_sg(
+            model, corpus_file, offset, _cython_vocab, _cur_epoch, _expected_examples, _expected_words,
+            _work, _neu1, compute_loss,
+        ):
         raise RuntimeError("Training with corpus_file argument is not supported")
 
-    def train_epoch_cbow(model, corpus_file, offset, _cython_vocab, _cur_epoch, _expected_examples, _expected_words,
-                         _work, _neu1, compute_loss):
+    def train_epoch_cbow(
+            model, corpus_file, offset, _cython_vocab, _cur_epoch, _expected_examples, _expected_words,
+            _work, _neu1, compute_loss,
+        ):
         raise RuntimeError("Training with corpus_file argument is not supported")
 
 
 class Word2Vec(utils.SaveLoad):
-    def __init__(self, sentences=None, corpus_file=None, vector_size=100, alpha=0.025, window=5, min_count=5,
-                 max_vocab_size=None, sample=1e-3, seed=1, workers=3, min_alpha=0.0001,
-                 sg=0, hs=0, negative=5, ns_exponent=0.75, cbow_mean=1, hashfxn=hash, epochs=5, null_word=0,
-                 trim_rule=None, sorted_vocab=1, batch_words=MAX_WORDS_IN_BATCH, compute_loss=False, callbacks=(),
-                 comment=None, max_final_vocab=None):
+    def __init__(
+            self, sentences=None, corpus_file=None, vector_size=100, alpha=0.025, window=5, min_count=5,
+            max_vocab_size=None, sample=1e-3, seed=1, workers=3, min_alpha=0.0001,
+            sg=0, hs=0, negative=5, ns_exponent=0.75, cbow_mean=1, hashfxn=hash, epochs=5, null_word=0,
+            trim_rule=None, sorted_vocab=1, batch_words=MAX_WORDS_IN_BATCH, compute_loss=False, callbacks=(),
+            comment=None, max_final_vocab=None,
+        ):
         """Train, use and evaluate neural networks described in https://code.google.com/p/word2vec/.
 
         Once you're finished training a model (=no more updates, only querying)
@@ -375,7 +428,7 @@ def __init__(self, sentences=None, corpus_file=None, vector_size=100, alpha=0.02
     def build_vocab_and_train(self, corpus_iterable=None, corpus_file=None, trim_rule=None, callbacks=None):
         if not (corpus_iterable is None) ^ (corpus_file is None):
             raise ValueError("You must provide only one of corpus_iterable or corpus_file arguments.")
-        if corpus_file is not None and not isinstance(corpus_file, string_types):
+        if corpus_file is not None and not isinstance(corpus_file, str):
             raise TypeError("You must pass string as the corpus_file argument.")
         elif isinstance(corpus_iterable, GeneratorType):
             raise TypeError("You can't pass a generator as the sentences argument. Try a sequence.")
@@ -386,8 +439,10 @@ def build_vocab_and_train(self, corpus_iterable=None, corpus_file=None, trim_rul
             total_words=self.corpus_total_words, epochs=self.epochs, start_alpha=self.alpha,
             end_alpha=self.min_alpha, compute_loss=self.compute_loss, callbacks=callbacks)
 
-    def build_vocab(self, corpus_iterable=None, corpus_file=None, update=False, progress_per=10000,
-                    keep_raw_vocab=False, trim_rule=None, **kwargs):
+    def build_vocab(
+            self, corpus_iterable=None, corpus_file=None, update=False, progress_per=10000,
+            keep_raw_vocab=False, trim_rule=None, **kwargs,
+        ):
         """Build vocabulary from a sequence of sentences (can be a once-only generator stream).
 
         Parameters
@@ -433,7 +488,9 @@ def build_vocab(self, corpus_iterable=None, corpus_file=None, update=False, prog
         report_values['memory'] = self.estimate_memory(vocab_size=report_values['num_retained_words'])
         self.prepare_weights(update=update)
 
-    def build_vocab_from_freq(self, word_freq, keep_raw_vocab=False, corpus_count=None, trim_rule=None, update=False):
+    def build_vocab_from_freq(
+            self, word_freq, keep_raw_vocab=False, corpus_count=None, trim_rule=None, update=False,
+        ):
         """Build vocabulary from a dictionary of word frequencies.
 
         Parameters
@@ -468,7 +525,7 @@ def build_vocab_from_freq(self, word_freq, keep_raw_vocab=False, corpus_count=No
         raw_vocab = word_freq
         logger.info(
             "collected %i different raw word, with total frequency of %i",
-            len(raw_vocab), sum(itervalues(raw_vocab))
+            len(raw_vocab), sum(raw_vocab.values()),
         )
 
         # Since no sentences are provided, this is to control the corpus_count.
@@ -488,11 +545,11 @@ def _scan_vocab(self, sentences, progress_per, trim_rule):
         checked_string_types = 0
         for sentence_no, sentence in enumerate(sentences):
             if not checked_string_types:
-                if isinstance(sentence, string_types):
+                if isinstance(sentence, str):
                     logger.warning(
                         "Each 'sentences' item should be a list of words (usually unicode strings). "
                         "First item here is instead plain %s.",
-                        type(sentence)
+                        type(sentence),
                     )
                 checked_string_types += 1
             if sentence_no % progress_per == 0:
@@ -528,7 +585,8 @@ def scan_vocab(self, corpus_iterable=None, corpus_file=None, progress_per=10000,
 
     def prepare_vocab(
             self, update=False, keep_raw_vocab=False, trim_rule=None,
-            min_count=None, sample=None, dry_run=False):
+            min_count=None, sample=None, dry_run=False,
+        ):
         """Apply vocabulary settings for `min_count` (discarding less-frequent words)
         and `sample` (controlling the downsampling of more-frequent words).
 
@@ -574,7 +632,7 @@ def prepare_vocab(
                 self.sample = sample
                 self.wv.key_to_index = {}
 
-            for word, v in iteritems(self.raw_vocab):
+            for word, v in self.raw_vocab.items():
                 if keep_vocab_item(word, v, self.effective_min_count, trim_rule=trim_rule):
                     retain_words.append(word)
                     retain_total += v
@@ -604,7 +662,7 @@ def prepare_vocab(
             logger.info("Updating model with new vocabulary")
             new_total = pre_exist_total = 0
             new_words = pre_exist_words = []
-            for word, v in iteritems(self.raw_vocab):
+            for word, v in self.raw_vocab.items():
                 if keep_vocab_item(word, v, self.effective_min_count, trim_rule=trim_rule):
                     if self.wv.has_index_for(word):
                         pre_exist_words.append(word)
@@ -836,8 +894,10 @@ def init_sims(self, replace=False):
         """
         self.wv.init_sims(replace=replace)
 
-    def _do_train_epoch(self, corpus_file, thread_id, offset, cython_vocab, thread_private_mem, cur_epoch,
-                        total_examples=None, total_words=None, **kwargs):
+    def _do_train_epoch(
+            self, corpus_file, thread_id, offset, cython_vocab, thread_private_mem, cur_epoch,
+            total_examples=None, total_words=None, **kwargs,
+        ):
         work, neu1 = thread_private_mem
 
         if self.sg:
@@ -879,10 +939,12 @@ def _clear_post_train(self):
         """Clear any cached vector lengths from the model."""
         self.wv.norms = None
 
-    def train(self, corpus_iterable=None, corpus_file=None, total_examples=None, total_words=None,
-              epochs=None, start_alpha=None, end_alpha=None, word_count=0,
-              queue_factor=2, report_delay=1.0, compute_loss=False, callbacks=(),
-              **kwargs):
+    def train(
+            self, corpus_iterable=None, corpus_file=None, total_examples=None, total_words=None,
+            epochs=None, start_alpha=None, end_alpha=None, word_count=0,
+            queue_factor=2, report_delay=1.0, compute_loss=False, callbacks=(),
+            **kwargs,
+        ):
         """Update the model's neural weights from a sequence of sentences.
 
         Notes
@@ -897,7 +959,7 @@ def train(self, corpus_iterable=None, corpus_file=None, total_examples=None, tot
         --------
         To avoid common mistakes around the model's ability to do multiple training passes itself, an
         explicit `epochs` argument **MUST** be provided. In the common and recommended case
-        where :meth:`~gensim.models.word2vec.Word2Vec.train` is only called once, you can set `epochs=self.iter`.
+        where :meth:`~gensim.models.word2vec.Word2Vec.train` is only called once, you can set `epochs=self.epochs`.
 
         Parameters
         ----------
@@ -950,7 +1012,7 @@ def train(self, corpus_iterable=None, corpus_file=None, total_examples=None, tot
             >>>
             >>> model = Word2Vec(min_count=1)
             >>> model.build_vocab(sentences)  # prepare the model vocabulary
-            >>> model.train(sentences, total_examples=model.corpus_count, epochs=model.iter)  # train word vectors
+            >>> model.train(sentences, total_examples=model.corpus_count, epochs=model.epochs)  # train word vectors
             (1, 30)
 
         """
@@ -1006,8 +1068,10 @@ def train(self, corpus_iterable=None, corpus_file=None, total_examples=None, tot
             callback.on_train_end(self)
         return trained_word_count, raw_word_count
 
-    def _worker_loop_corpusfile(self, corpus_file, thread_id, offset, cython_vocab, progress_queue, cur_epoch=0,
-                                total_examples=None, total_words=None, **kwargs):
+    def _worker_loop_corpusfile(
+            self, corpus_file, thread_id, offset, cython_vocab, progress_queue, cur_epoch=0,
+            total_examples=None, total_words=None, **kwargs,
+        ):
         """Train the model on a `corpus_file` in LineSentence format.
 
         This function will be called in parallel by multiple workers (threads or processes) to make
@@ -1153,8 +1217,10 @@ def _job_producer(self, data_iterator, job_queue, cur_epoch=0, total_examples=No
             job_queue.put(None)
         logger.debug("job loop exiting, total %i jobs", job_no)
 
-    def _log_epoch_progress(self, progress_queue=None, job_queue=None, cur_epoch=0, total_examples=None,
-                            total_words=None, report_delay=1.0, is_corpus_file_mode=None):
+    def _log_epoch_progress(
+            self, progress_queue=None, job_queue=None, cur_epoch=0, total_examples=None,
+            total_words=None, report_delay=1.0, is_corpus_file_mode=None,
+        ):
         """Get the progress report for a single training epoch.
 
         Parameters
@@ -1226,7 +1292,8 @@ def _log_epoch_progress(self, progress_queue=None, job_queue=None, cur_epoch=0,
         return trained_word_count, raw_word_count, job_tally
 
     def _train_epoch_corpusfile(
-        self, corpus_file, cur_epoch=0, total_examples=None, total_words=None, callbacks=(), **kwargs):
+            self, corpus_file, cur_epoch=0, total_examples=None, total_words=None, callbacks=(), **kwargs,
+        ):
         """Train the model for a single epoch.
 
         Parameters
@@ -1289,8 +1356,10 @@ def _train_epoch_corpusfile(
 
         return trained_word_count, raw_word_count, job_tally
 
-    def _train_epoch(self, data_iterable, cur_epoch=0, total_examples=None, total_words=None,
-                     queue_factor=2, report_delay=1.0, callbacks=()):
+    def _train_epoch(
+            self, data_iterable, cur_epoch=0, total_examples=None, total_words=None,
+            queue_factor=2, report_delay=1.0, callbacks=(),
+        ):
         """Train the model for a single epoch.
 
         Parameters
@@ -1455,8 +1524,10 @@ def _check_training_sanity(self, epochs=None, total_examples=None, total_words=N
             self.hs, self.sample, self.negative, self.window
         )
 
-    def _log_progress(self, job_queue, progress_queue, cur_epoch, example_count, total_examples,
-                      raw_word_count, total_words, trained_word_count, elapsed):
+    def _log_progress(
+            self, job_queue, progress_queue, cur_epoch, example_count, total_examples,
+            raw_word_count, total_words, trained_word_count, elapsed
+        ):
         """Callback used to log progress for long running jobs.
 
         Parameters
@@ -1506,8 +1577,10 @@ def _log_progress(self, job_queue, progress_queue, cur_epoch, example_count, tot
                 -1 if job_queue is None else utils.qsize(job_queue), utils.qsize(progress_queue)
             )
 
-    def _log_epoch_end(self, cur_epoch, example_count, total_examples, raw_word_count, total_words,
-                       trained_word_count, elapsed, is_corpus_file_mode):
+    def _log_epoch_end(
+            self, cur_epoch, example_count, total_examples, raw_word_count, total_words,
+            trained_word_count, elapsed, is_corpus_file_mode
+        ):
         """Callback used to log the end of a training epoch.
 
         Parameters
@@ -1801,20 +1874,14 @@ def save(self, *args, **kwargs):
             Path to the file.
 
         """
-        # don't bother storing recalculable table
-        kwargs['ignore'] = kwargs.get('ignore', []) + ['cum_table', ]
         super(Word2Vec, self).save(*args, **kwargs)
 
-    def get_latest_training_loss(self):
-        """Get current value of the training loss.
-
-        Returns
-        -------
-        float
-            Current training loss.
-
-        """
-        return self.running_training_loss
+    def _save_specials(self, fname, separately, sep_limit, ignore, pickle_protocol, compress, subname):
+        """Arrange any special handling for the `gensim.utils.SaveLoad` protocol."""
+        # don't save properties that are merely calculated from others
+        ignore = set(ignore).union(['cum_table', ])
+        return super(Word2Vec, self)._save_specials(
+            fname, separately, sep_limit, ignore, pickle_protocol, compress, subname)
 
     @classmethod
     def load(cls, *args, rethrow=False, **kwargs):
@@ -1841,49 +1908,65 @@ def load(cls, *args, rethrow=False, **kwargs):
             if not isinstance(model, Word2Vec):
                 rethrow = True
                 raise AttributeError("Model of type %s can't be loaded by %s" % (type(model), str(cls)))
-            # for backward compatibility
-            if not hasattr(model, 'ns_exponent'):
-                model.ns_exponent = 0.75
-            if model.negative and hasattr(model.wv, 'index2word'):
-                model.make_cum_table()  # rebuild cum_table from vocabulary  ## TODO: ???
-            if not hasattr(model, 'corpus_count'):
-                model.corpus_count = None
-            if not hasattr(model, 'corpus_total_words'):
-                model.corpus_total_words = None
-            if not hasattr(model.wv, 'vectors_lockf') and hasattr(model.wv, 'vectors'):
-                model.wv.vectors_lockf = getattr(model, 'vectors_lockf', np.ones(1, dtype=REAL))
-            if not hasattr(model, 'random'):
-                model.random = np.random.RandomState(model.seed)
-            if not hasattr(model, 'train_count'):
-                model.train_count = 0
-                model.total_train_time = 0
-            if not hasattr(model, 'epochs'):
-                model.epochs = model.iter
-                del model.iter
-            if not hasattr(model, 'max_final_vocab'):
-                model.max_final_vocab = None
-            if hasattr(model, 'vocabulary'):  # re-integrate state that had been moved
-                for a in ('max_vocab_size', 'min_count', 'sample', 'sorted_vocab', 'null_word', 'raw_vocab'):
-                    setattr(model, a, getattr(model.vocabulary, a))
-                del model.vocabulary
-            if hasattr(model, 'trainables'):  # re-integrate state that had been moved
-                for a in ('hashfxn', 'layer1_size', 'seed', 'syn1neg', 'syn1'):
-                    if hasattr(model.trainables, a):
-                        setattr(model, a, getattr(model.trainables, a))
-                if hasattr(model, 'syn1'):
-                    model.syn1 = model.syn1
-                    del model.syn1
-                del model.trainables
             return model
         except AttributeError as ae:
             if rethrow:
                 raise ae
             logger.error(
                 "Model load error. Was model saved using code from an older Gensim Version? "
-                "Try loading older model using gensim-3.8.1, then re-saving, to restore "
+                "Try loading older model using gensim-3.8.3, then re-saving, to restore "
                 "compatibility with current code.")
             raise ae
 
+    def _load_specials(self, *args, **kwargs):
+        """Handle special requirements of `.load()` protocol, usually up-converting older versions."""
+        super(Word2Vec, self)._load_specials(*args, **kwargs)
+        # for backward compatibility, add/rearrange properties from prior versions
+        if not hasattr(self, 'ns_exponent'):
+            self.ns_exponent = 0.75
+        if self.negative and hasattr(self.wv, 'index_to_key'):
+            self.make_cum_table()  # rebuild cum_table from vocabulary
+        if not hasattr(self, 'corpus_count'):
+            self.corpus_count = None
+        if not hasattr(self, 'corpus_total_words'):
+            self.corpus_total_words = None
+        if not hasattr(self.wv, 'vectors_lockf') and hasattr(self.wv, 'vectors'):
+            self.wv.vectors_lockf = np.ones(1, dtype=REAL)
+        if not hasattr(self, 'random'):
+            # use new instance of numpy's recommended generator/algorithm
+            self.random = np.random.default_rng(seed=self.seed)
+        if not hasattr(self, 'train_count'):
+            self.train_count = 0
+            self.total_train_time = 0
+        if not hasattr(self, 'epochs'):
+            self.epochs = self.iter
+            del self.iter
+        if not hasattr(self, 'max_final_vocab'):
+            self.max_final_vocab = None
+        if hasattr(self, 'vocabulary'):  # re-integrate state that had been moved
+            for a in ('max_vocab_size', 'min_count', 'sample', 'sorted_vocab', 'null_word', 'raw_vocab'):
+                setattr(self, a, getattr(self.vocabulary, a))
+            del self.vocabulary
+        if hasattr(self, 'trainables'):  # re-integrate state that had been moved
+            for a in ('hashfxn', 'layer1_size', 'seed', 'syn1neg', 'syn1'):
+                if hasattr(self.trainables, a):
+                    setattr(self, a, getattr(self.trainables, a))
+            if hasattr(self, 'syn1'):
+                self.syn1 = self.syn1
+                del self.syn1
+            del self.trainables
+
+    def get_latest_training_loss(self):
+        """Get current value of the training loss.
+
+        Returns
+        -------
+        float
+            Current training loss.
+
+        """
+        return self.running_training_loss
+
 
 class BrownCorpus(object):
     def __init__(self, dirname):
@@ -2043,12 +2126,12 @@ def __iter__(self):
 
 
 class Word2VecVocab(utils.SaveLoad):
-    """Obsolete class retained for now as load-compatibility state capture"""
+    """Obsolete class retained for now as load-compatibility state capture."""
     pass
 
 
 class Word2VecTrainables(utils.SaveLoad):
-    """Obsolete class retained for now as load-compatibility state capture"""
+    """Obsolete class retained for now as load-compatibility state capture."""
     pass
 
 
diff --git a/gensim/scripts/word2vec2tensor.py b/gensim/scripts/word2vec2tensor.py
index 8495cb9862..3e79688490 100644
--- a/gensim/scripts/word2vec2tensor.py
+++ b/gensim/scripts/word2vec2tensor.py
@@ -70,7 +70,7 @@ def word2vec2tensor(word2vec_model_path, tensor_filename, binary=False):
     outfiletsvmeta = tensor_filename + '_metadata.tsv'
 
     with utils.open(outfiletsv, 'wb') as file_vector, utils.open(outfiletsvmeta, 'wb') as file_metadata:
-        for word in model.index2word:
+        for word in model.index_to_key:
             file_metadata.write(gensim.utils.to_utf8(word) + gensim.utils.to_utf8('\n'))
             vector_row = '\t'.join(str(x) for x in model[word])
             file_vector.write(gensim.utils.to_utf8(vector_row) + gensim.utils.to_utf8('\n'))
diff --git a/gensim/similarities/annoy.py b/gensim/similarities/annoy.py
index 9f8b8fdbc0..57808f1b3b 100644
--- a/gensim/similarities/annoy.py
+++ b/gensim/similarities/annoy.py
@@ -151,7 +151,7 @@ def load(self, fname):
     def build_from_word2vec(self):
         """Build an Annoy index using word vectors from a Word2Vec model."""
         return self._build_from_model(
-            self.model.wv.get_normed_vectors(), self.model.wv.index2word, self.model.vector_size,
+            self.model.wv.get_normed_vectors(), self.model.wv.index_to_key, self.model.vector_size,
         )
 
     def build_from_doc2vec(self):
@@ -163,7 +163,7 @@ def build_from_doc2vec(self):
     def build_from_keyedvectors(self):
         """Build an Annoy index using word vectors from a KeyedVectors model."""
         return self._build_from_model(
-            self.model.get_normed_vectors(), self.model.index2word, self.model.vector_size,
+            self.model.get_normed_vectors(), self.model.index_to_key, self.model.vector_size,
         )
 
     def _build_from_model(self, vectors, labels, num_features):
diff --git a/gensim/similarities/nmslib.py b/gensim/similarities/nmslib.py
index b70a9f4e43..7ff78539c1 100644
--- a/gensim/similarities/nmslib.py
+++ b/gensim/similarities/nmslib.py
@@ -187,7 +187,7 @@ def load(cls, fname):
 
     def _build_from_word2vec(self):
         """Build an NMSLIB index using word vectors from a Word2Vec model."""
-        self._build_from_model(self.model.wv.get_normed_vectors(), self.model.wv.index2word)
+        self._build_from_model(self.model.wv.get_normed_vectors(), self.model.wv.index_to_key)
 
     def _build_from_doc2vec(self):
         """Build an NMSLIB index using document vectors from a Doc2Vec model."""
@@ -197,7 +197,7 @@ def _build_from_doc2vec(self):
 
     def _build_from_keyedvectors(self):
         """Build an NMSLIB index using word vectors from a KeyedVectors model."""
-        self._build_from_model(self.model.get_normed_vectors(), self.model.index2word)
+        self._build_from_model(self.model.get_normed_vectors(), self.model.index_to_key)
 
     def _build_from_model(self, vectors, labels):
         index = nmslib.init(method='hnsw', space='cosinesimil')
diff --git a/gensim/test/test_doc2vec.py b/gensim/test/test_doc2vec.py
index aa958b744d..e402b1355a 100644
--- a/gensim/test/test_doc2vec.py
+++ b/gensim/test/test_doc2vec.py
@@ -109,7 +109,7 @@ def obsolete_testLoadOldModel(self):
         model = doc2vec.Doc2Vec.load(datapath(model_file))
         self.assertTrue(model.wv.vectors.shape == (3955, 100))
         self.assertTrue(len(model.wv) == 3955)
-        self.assertTrue(len(model.wv.index2word) == 3955)
+        self.assertTrue(len(model.wv.index_to_key) == 3955)
         self.assertIsNone(model.corpus_total_words)
         self.assertTrue(model.syn1neg.shape == (len(model.wv), model.vector_size))
         self.assertTrue(model.wv.vectors_lockf.shape == (3955, ))
@@ -129,7 +129,7 @@ def obsolete_testLoadOldModelSeparates(self):
         model = doc2vec.Doc2Vec.load(datapath(model_file))
         self.assertTrue(model.wv.vectors.shape == (3955, 100))
         self.assertTrue(len(model.wv) == 3955)
-        self.assertTrue(len(model.wv.index2word) == 3955)
+        self.assertTrue(len(model.wv.index_to_key) == 3955)
         self.assertIsNone(model.corpus_total_words)
         self.assertTrue(model.syn1neg.shape == (len(model.wv), model.vector_size))
         self.assertTrue(model.wv.vectors_lockf.shape == (3955, ))
diff --git a/gensim/test/test_keyedvectors.py b/gensim/test/test_keyedvectors.py
index b998ffe308..fd96f9f26f 100644
--- a/gensim/test/test_keyedvectors.py
+++ b/gensim/test/test_keyedvectors.py
@@ -23,8 +23,7 @@
 
 class TestKeyedVectors(unittest.TestCase):
     def setUp(self):
-        self.vectors = KeyedVectors.load_word2vec_format(
-            datapath('euclidean_vectors.bin'), binary=True)
+        self.vectors = KeyedVectors.load_word2vec_format(datapath('euclidean_vectors.bin'), binary=True)
         self.model_path = datapath("w2v_keyedvectors_load_test.modeldata")
         self.vocab_path = datapath("w2v_keyedvectors_load_test.vocab")
 
@@ -61,12 +60,9 @@ def test_relative_cosine_similarity(self):
             'respectable', 'beneficial', 'just', 'upright', 'adept', 'expert', 'practiced', 'proficient',
             'skillful', 'skilful', 'dear', 'near', 'dependable', 'safe', 'secure', 'right', 'ripe', 'well',
             'effective', 'in_effect', 'in_force', 'serious', 'sound', 'salutary', 'honest', 'undecomposed',
-            'unspoiled', 'unspoilt', 'thoroughly', 'soundly'
+            'unspoiled', 'unspoilt', 'thoroughly', 'soundly',
         ]  # synonyms for "good" as per wordnet
-        cos_sim = []
-        for i in range(len(wordnet_syn)):
-            if wordnet_syn[i] in self.vectors:
-                cos_sim.append(self.vectors.similarity("good", wordnet_syn[i]))
+        cos_sim = [self.vectors.similarity("good", syn) for syn in wordnet_syn if syn in self.vectors]
         cos_sim = sorted(cos_sim, reverse=True)  # cosine_similarity of "good" with wordnet_syn in decreasing order
         # computing relative_cosine_similarity of two similar words
         rcs_wordnet = self.vectors.similarity("good", "nice") / sum(cos_sim[i] for i in range(10))
@@ -84,7 +80,7 @@ def test_most_similar_raises_keyerror(self):
 
     def test_most_similar_restrict_vocab(self):
         """Test most_similar returns handles restrict_vocab correctly."""
-        expected = set(self.vectors.index2word[:5])
+        expected = set(self.vectors.index_to_key[:5])
         predicted = set(result[0] for result in self.vectors.most_similar('war', topn=5, restrict_vocab=5))
         self.assertEqual(expected, predicted)
 
@@ -113,7 +109,7 @@ def test_similar_by_word(self):
             'administration',
             'terrorism',
             'call',
-            'israel'
+            'israel',
         ]
         predicted = [result[0] for result in self.vectors.similar_by_word('war', topn=5)]
         self.assertEqual(expected, predicted)
@@ -154,12 +150,12 @@ def test_rank(self):
 
     def test_add_single(self):
         """Test that adding entity in a manual way works correctly."""
-        entities = ['___some_entity{}_not_present_in_keyed_vectors___'.format(i) for i in range(5)]
+        entities = [f'___some_entity{i}_not_present_in_keyed_vectors___' for i in range(5)]
         vectors = [np.random.randn(self.vectors.vector_size) for _ in range(5)]
 
         # Test `add` on already filled kv.
         for ent, vector in zip(entities, vectors):
-            self.vectors.add(ent, vector)
+            self.vectors.add_vectors(ent, vector)
 
         for ent, vector in zip(entities, vectors):
             self.assertTrue(np.allclose(self.vectors[ent], vector))
@@ -167,7 +163,7 @@ def test_add_single(self):
         # Test `add` on empty kv.
         kv = KeyedVectors(self.vectors.vector_size)
         for ent, vector in zip(entities, vectors):
-            kv.add(ent, vector)
+            kv.add_vectors(ent, vector)
 
         for ent, vector in zip(entities, vectors):
             self.assertTrue(np.allclose(kv[ent], vector))
@@ -179,7 +175,7 @@ def test_add_multiple(self):
 
         # Test `add` on already filled kv.
         vocab_size = len(self.vectors)
-        self.vectors.add(entities, vectors, replace=False)
+        self.vectors.add_vectors(entities, vectors, replace=False)
         self.assertEqual(vocab_size + len(entities), len(self.vectors))
 
         for ent, vector in zip(entities, vectors):
@@ -198,7 +194,7 @@ def test_add_type(self):
         assert kv.vectors.dtype == REAL
 
         words, vectors = ["a"], np.array([1., 1.], dtype=np.float64).reshape(1, -1)
-        kv.add(words, vectors)
+        kv.add_vectors(words, vectors)
 
         assert kv.vectors.dtype == REAL
 
@@ -270,7 +266,7 @@ def test_save_reload(self):
         count = 20
         keys = [str(i) for i in range(count)]
         weights = [pseudorandom_weak_vector(randkv.vector_size) for _ in range(count)]
-        randkv.add(keys, weights)
+        randkv.add_vectors(keys, weights)
         tmpfiletxt = gensim.test.utils.get_tmpfile("tmp_kv.txt")
         randkv.save_word2vec_format(tmpfiletxt, binary=False)
         reloadtxtkv = KeyedVectors.load_word2vec_format(tmpfiletxt, binary=False)
@@ -287,7 +283,7 @@ def test_no_header(self):
         count = 20
         keys = [str(i) for i in range(count)]
         weights = [pseudorandom_weak_vector(randkv.vector_size) for _ in range(count)]
-        randkv.add(keys, weights)
+        randkv.add_vectors(keys, weights)
         tmpfiletxt = gensim.test.utils.get_tmpfile("tmp_kv.txt")
         randkv.save_word2vec_format(tmpfiletxt, binary=False, write_header=False)
         reloadtxtkv = KeyedVectors.load_word2vec_format(tmpfiletxt, binary=False, no_header=True)
diff --git a/gensim/test/test_poincare.py b/gensim/test/test_poincare.py
index 67b2668e02..98970525a2 100644
--- a/gensim/test/test_poincare.py
+++ b/gensim/test/test_poincare.py
@@ -278,7 +278,7 @@ def test_most_similar_raises_keyerror(self):
 
     def test_most_similar_restrict_vocab(self):
         """Test most_similar returns handles restrict_vocab correctly."""
-        expected = set(self.vectors.index2word[:5])
+        expected = set(self.vectors.index_to_key[:5])
         predicted = set(result[0] for result in self.vectors.most_similar('dog.n.01', topn=5, restrict_vocab=5))
         self.assertEqual(expected, predicted)
 
diff --git a/gensim/test/test_sharded_corpus.py b/gensim/test/test_sharded_corpus.py
index 3a56f240e2..14eea34f88 100644
--- a/gensim/test/test_sharded_corpus.py
+++ b/gensim/test/test_sharded_corpus.py
@@ -1,19 +1,17 @@
 """
-Testing the test sharded corpus.
+Tests for ShardedCorpus.
 """
-import os
 
+import os
 import unittest
-
 import random
-import numpy as np
 import shutil
 
+import numpy as np
 from scipy import sparse
-from gensim.utils import is_corpus
 
+from gensim.utils import is_corpus, mock_data
 from gensim.corpora.sharded_corpus import ShardedCorpus
-from gensim.utils import mock_data, range
 
 #############################################################################
 
diff --git a/gensim/test/test_similarities.py b/gensim/test/test_similarities.py
index 9c91e8926d..6a0321fdbe 100644
--- a/gensim/test/test_similarities.py
+++ b/gensim/test/test_similarities.py
@@ -591,7 +591,7 @@ def testLoadMissingRaisesError(self):
 
     def assertVectorIsSimilarToItself(self, wv, index):
         vector = wv.get_normed_vectors()[0]
-        label = wv.index2word[0]
+        label = wv.index_to_key[0]
         approx_neighbors = index.most_similar(vector, 1)
         word, similarity = approx_neighbors[0]
 
@@ -748,7 +748,7 @@ def test_load_missing_raises_error(self):
 
     def assertVectorIsSimilarToItself(self, wv, index):
         vector = wv.get_normed_vectors()[0]
-        label = wv.index2word[0]
+        label = wv.index_to_key[0]
         approx_neighbors = index.most_similar(vector, 1)
         word, similarity = approx_neighbors[0]
 
diff --git a/gensim/test/test_translation_matrix.py b/gensim/test/test_translation_matrix.py
index 8846dc617d..578be26941 100644
--- a/gensim/test/test_translation_matrix.py
+++ b/gensim/test/test_translation_matrix.py
@@ -19,10 +19,11 @@ def setUp(self):
         self.source_word_vec_file = datapath("EN.1-10.cbow1_wind5_hs0_neg10_size300_smpl1e-05.txt")
         self.target_word_vec_file = datapath("IT.1-10.cbow1_wind5_hs0_neg10_size300_smpl1e-05.txt")
 
-        self.word_pairs = [("one", "uno"), ("two", "due"), ("three", "tre"),
+        self.word_pairs = [
+            ("one", "uno"), ("two", "due"), ("three", "tre"),
             ("four", "quattro"), ("five", "cinque"), ("seven", "sette"), ("eight", "otto"),
             ("dog", "cane"), ("pig", "maiale"), ("fish", "cavallo"), ("birds", "uccelli"),
-            ("apple", "mela"), ("orange", "arancione"), ("grape", "acino"), ("banana", "banana")
+            ("apple", "mela"), ("orange", "arancione"), ("grape", "acino"), ("banana", "banana"),
         ]
 
         self.test_word_pairs = [("ten", "dieci"), ("cat", "gatto")]
@@ -53,7 +54,7 @@ def test_translate_nn(self):
 
         test_source_word, test_target_word = zip(*self.test_word_pairs)
         translated_words = model.translate(
-            test_source_word, topn=5, source_lang_vec=self.source_word_vec, target_lang_vec=self.target_word_vec
+            test_source_word, topn=5, source_lang_vec=self.source_word_vec, target_lang_vec=self.target_word_vec,
         )
 
         for idx, item in enumerate(self.test_word_pairs):
@@ -96,7 +97,7 @@ def setUp(self):
 
     def test_translation_matrix(self):
         model = translation_matrix.BackMappingTranslationMatrix(
-            self.source_doc_vec, self.target_doc_vec, self.train_docs[:5]
+            self.source_doc_vec, self.target_doc_vec, self.train_docs[:5],
         )
         transmat = model.train(self.train_docs[:5])
         self.assertEqual(transmat.shape, (8, 8))
@@ -108,7 +109,7 @@ def test_infer_vector(self):
         replaces a nonsensical test.
         """
         model = translation_matrix.BackMappingTranslationMatrix(
-            self.source_doc_vec, self.target_doc_vec, self.train_docs[:5]
+            self.source_doc_vec, self.target_doc_vec, self.train_docs[:5],
         )
         model.train(self.train_docs[:5])
         backmapped_vec = model.infer_vector(self.target_doc_vec.dv[self.train_docs[5].tags])
diff --git a/gensim/test/test_word2vec.py b/gensim/test/test_word2vec.py
index 001ad4c365..a1d766bdb8 100644
--- a/gensim/test/test_word2vec.py
+++ b/gensim/test/test_word2vec.py
@@ -571,9 +571,9 @@ def testEvaluateWordPairs(self):
         pearson = correlation[0][0]
         spearman = correlation[1][0]
         oov = correlation[2]
-        self.assertTrue(0.1 < pearson < 1.0, "pearson %f not between 0.1 & 1.0" % pearson)
-        self.assertTrue(0.1 < spearman < 1.0, "spearman %f not between 0.1 and 1.0" % spearman)
-        self.assertTrue(0.0 <= oov < 90.0, "oov %f not between 0.0 and 90.0" % oov)
+        self.assertTrue(0.1 < pearson < 1.0, "pearson {pearson} not between 0.1 & 1.0")
+        self.assertTrue(0.1 < spearman < 1.0, "spearman {spearman} not between 0.1 and 1.0")
+        self.assertTrue(0.0 <= oov < 90.0, "OOV {oov} not between 0.0 and 90.0")
 
     @unittest.skipIf(os.name == 'nt' and six.PY2, "CythonLineSentence is not supported on Windows + Py27")
     def testEvaluateWordPairsFromFile(self):
@@ -586,9 +586,9 @@ def testEvaluateWordPairsFromFile(self):
             pearson = correlation[0][0]
             spearman = correlation[1][0]
             oov = correlation[2]
-            self.assertTrue(0.1 < pearson < 1.0, "pearson %f not between 0.1 & 1.0" % pearson)
-            self.assertTrue(0.1 < spearman < 1.0, "spearman %f not between 0.1 and 1.0" % spearman)
-            self.assertTrue(0.0 <= oov < 90.0, "oov %f not between 0.0 and 90.0" % oov)
+            self.assertTrue(0.1 < pearson < 1.0, f"pearson {pearson} not between 0.1 & 1.0")
+            self.assertTrue(0.1 < spearman < 1.0, f"spearman {spearman} not between 0.1 and 1.0")
+            self.assertTrue(0.0 <= oov < 90.0, f"OOV {oov} not between 0.0 and 90.0")
 
     def model_sanity(self, model, train=True, with_corpus_file=False, ranks=None):
         """Even tiny models trained on LeeCorpus should pass these sanity checks"""
@@ -606,7 +606,7 @@ def model_sanity(self, model, train=True, with_corpus_file=False, ranks=None):
             self.assertFalse((orig0 == model.wv.vectors[1]).all())  # vector should vary after training
         query_word = 'attacks'
         expected_word = 'bombings'
-        sims = model.wv.most_similar(query_word, topn=len(model.wv.index2word))
+        sims = model.wv.most_similar(query_word, topn=len(model.wv.index_to_key))
         t_rank = [word for word, score in sims].index(expected_word)
         # in >200 calibration runs w/ calling parameters, 'terrorism' in 50-most_sim for 'war'
         if ranks is not None:
@@ -855,7 +855,7 @@ def testLoadOldModel(self):
         model = word2vec.Word2Vec.load(datapath(model_file))
         self.assertTrue(model.wv.vectors.shape == (12, 100))
         self.assertTrue(len(model.wv) == 12)
-        self.assertTrue(len(model.wv.index2word) == 12)
+        self.assertTrue(len(model.wv.index_to_key) == 12)
         self.assertTrue(model.syn1neg.shape == (len(model.wv), model.wv.vector_size))
         self.assertTrue(len(model.wv.vectors_lockf.shape) > 0)
         self.assertTrue(model.cum_table.shape == (12,))
@@ -870,7 +870,7 @@ def testLoadOldModelSeparates(self):
         model = word2vec.Word2Vec.load(datapath(model_file))
         self.assertTrue(model.wv.vectors.shape == (12, 100))
         self.assertTrue(len(model.wv) == 12)
-        self.assertTrue(len(model.wv.index2word) == 12)
+        self.assertTrue(len(model.wv.index_to_key) == 12)
         self.assertTrue(model.syn1neg.shape == (len(model.wv), model.wv.vector_size))
         self.assertTrue(len(model.wv.vectors_lockf.shape) > 0)
         self.assertTrue(model.cum_table.shape == (12,))
diff --git a/gensim/utils.py b/gensim/utils.py
index bb9ee2fa02..49cab6c595 100644
--- a/gensim/utils.py
+++ b/gensim/utils.py
@@ -11,16 +11,9 @@
 import collections
 import logging
 import warnings
-
-try:
-    from html.entities import name2codepoint as n2cp
-except ImportError:
-    from htmlentitydefs import name2codepoint as n2cp
-try:
-    import cPickle as _pickle
-except ImportError:
-    import pickle as _pickle
-
+import numbers
+from html.entities import name2codepoint as n2cp
+import pickle as _pickle
 import re
 import unicodedata
 import os
@@ -36,18 +29,9 @@
 import heapq
 
 import numpy as np
-import numbers
 import scipy.sparse
-
-from six import iterkeys, iteritems, itervalues, u, string_types, unichr
-from six.moves import range
-
 from smart_open import open
 
-from multiprocessing import cpu_count
-
-if sys.version_info[0] >= 3:
-    unicode = str
 
 logger = logging.getLogger(__name__)
 
@@ -138,7 +122,7 @@ def file_or_filename(input):
         An open file, positioned at the beginning.
 
     """
-    if isinstance(input, string_types):
+    if isinstance(input, str):
         # input was a filename: open as file
         return open(input, 'rb')
     else:
@@ -169,11 +153,11 @@ def open_file(input):
     except Exception:
         # Handling any unhandled exceptions from the code nested in 'with' statement.
         exc = True
-        if not isinstance(input, string_types) or not mgr.__exit__(*sys.exc_info()):
+        if not isinstance(input, str) or not mgr.__exit__(*sys.exc_info()):
             raise
         # Try to introspect and silence errors.
     finally:
-        if not exc and isinstance(input, string_types):
+        if not exc and isinstance(input, str):
             mgr.__exit__(None, None, None)
 
 
@@ -199,11 +183,11 @@ def deaccent(text):
         u'Sef chomutovskych komunistu dostal postou bily prasek'
 
     """
-    if not isinstance(text, unicode):
+    if not isinstance(text, str):
         # assume utf8 for byte strings, use default (strict) error handling
         text = text.decode('utf8')
     norm = unicodedata.normalize("NFD", text)
-    result = u('').join(ch for ch in norm if unicodedata.category(ch) != 'Mn')
+    result = ''.join(ch for ch in norm if unicodedata.category(ch) != 'Mn')
     return unicodedata.normalize("NFC", result)
 
 
@@ -339,10 +323,10 @@ def any2utf8(text, errors='strict', encoding='utf8'):
 
     """
 
-    if isinstance(text, unicode):
+    if isinstance(text, str):
         return text.encode('utf8')
     # do bytestring -> unicode -> utf8 full circle, to ensure valid utf8
-    return unicode(text, encoding, errors=errors).encode('utf8')
+    return str(text, encoding, errors=errors).encode('utf8')
 
 
 to_utf8 = any2utf8
@@ -366,9 +350,9 @@ def any2unicode(text, encoding='utf8', errors='strict'):
         Unicode version of `text`.
 
     """
-    if isinstance(text, unicode):
+    if isinstance(text, str):
         return text
-    return unicode(text, encoding, errors=errors)
+    return str(text, encoding, errors=errors)
 
 
 to_unicode = any2unicode
@@ -393,7 +377,7 @@ def call_on_class_only(*args, **kwargs):
     raise AttributeError('This method should be called on a class object.')
 
 
-class SaveLoad(object):
+class SaveLoad:
     """Serialize/deserialize object from disk, by equipping objects with the save()/load() methods.
 
     Warnings
@@ -562,7 +546,7 @@ def _smart_save(self, fname, separately=None, sep_limit=10 * 1024**2, ignore=fro
         finally:
             # restore attribs handled specially
             for obj, asides in restores:
-                for attrib, val in iteritems(asides):
+                for attrib, val in asides.items():
                     with ignore_deprecation_warning():
                         setattr(obj, attrib, val)
         logger.info("saved %s", fname)
@@ -599,7 +583,7 @@ def _save_specials(self, fname, separately, sep_limit, ignore, pickle_protocol,
         sparse_matrices = (scipy.sparse.csr_matrix, scipy.sparse.csc_matrix)
         if separately is None:
             separately = []
-            for attrib, val in iteritems(self.__dict__):
+            for attrib, val in self.__dict__.items():
                 if isinstance(val, np.ndarray) and val.size >= sep_limit:
                     separately.append(attrib)
                 elif isinstance(val, sparse_matrices) and val.nnz >= sep_limit:
@@ -614,7 +598,7 @@ def _save_specials(self, fname, separately, sep_limit, ignore, pickle_protocol,
 
         recursive_saveloads = []
         restores = []
-        for attrib, val in iteritems(self.__dict__):
+        for attrib, val in self.__dict__.items():
             if hasattr(val, '_save_specials'):  # better than 'isinstance(val, SaveLoad)' if IPython reloading
                 recursive_saveloads.append(attrib)
                 cfname = '.'.join((fname, attrib))
@@ -622,7 +606,7 @@ def _save_specials(self, fname, separately, sep_limit, ignore, pickle_protocol,
 
         try:
             numpys, scipys, ignoreds = [], [], []
-            for attrib, val in iteritems(asides):
+            for attrib, val in asides.items():
                 if isinstance(val, np.ndarray) and attrib not in ignore:
                     numpys.append(attrib)
                     logger.info("storing np array '%s' to %s", attrib, subname(fname, attrib))
@@ -666,7 +650,7 @@ def _save_specials(self, fname, separately, sep_limit, ignore, pickle_protocol,
             self.__dict__['__recursive_saveloads'] = recursive_saveloads
         except Exception:
             # restore the attributes if exception-interrupted
-            for attrib, val in iteritems(asides):
+            for attrib, val in asides.items():
                 setattr(self, attrib, val)
             raise
         return restores + [(self, asides)]
@@ -749,7 +733,7 @@ def get_max_id(corpus):
     return maxid
 
 
-class FakeDict(object):
+class FakeDict:
     """Objects of this class act as dictionaries that map integer->str(integer), for a specified
     range of integers <0, num_terms).
 
@@ -778,7 +762,6 @@ def __getitem__(self, val):
     def iteritems(self):
         """Iterate over all keys and values.
 
-
         Yields
         ------
         (int, str)
@@ -1087,9 +1070,9 @@ def safe_unichr(intval):
 
     """
     try:
-        return unichr(intval)
+        return chr(intval)
     except ValueError:
-        # ValueError: unichr() arg not in range(0x10000) (narrow Python build)
+        # ValueError: chr() arg not in range(0x10000) (narrow Python build)
         s = "\\U%08x" % intval
         # return UTF16 surrogate pair
         return s.decode('unicode-escape')
@@ -1396,11 +1379,7 @@ def unpickle(fname):
 
     """
     with open(fname, 'rb') as f:
-        # Because of loading from S3 load can't be used (missing readline in smart_open)
-        if sys.version_info > (3, 0):
-            return _pickle.load(f, encoding='latin1')
-        else:
-            return _pickle.loads(f.read())
+        return _pickle.load(f, encoding='latin1')  # needed because loading from S3 doesn't support readline()
 
 
 def revdict(d):
@@ -1430,7 +1409,7 @@ def revdict(d):
         {2: 1, 4: 3}
 
     """
-    return {v: k for (k, v) in iteritems(dict(d))}
+    return {v: k for (k, v) in dict(d).items()}
 
 
 def deprecated(reason):
@@ -1450,7 +1429,7 @@ def deprecated(reason):
         Decorated function
 
     """
-    if isinstance(reason, string_types):
+    if isinstance(reason, str):
         def decorator(func):
             fmt = "Call to deprecated `{name}` ({reason})."
 
@@ -1704,7 +1683,7 @@ def lemmatize(content, allowed_tags=re.compile(r'(NN|VB|JJ|RB)'), light=False,
     # producing '==relate/VBN' or '**/NN'... try to preprocess the text a little
     # FIXME this throws away all fancy parsing cues, including sentence structure,
     # abbreviations etc.
-    content = u(' ').join(tokenize(content, lower=True, errors='ignore'))
+    content = ' '.join(tokenize(content, lower=True, errors='ignore'))
 
     parsed = parse(content, lemmata=True, collapse=False)
     result = []
@@ -1814,7 +1793,7 @@ def trim_vocab_by_freq(vocab, topk, trim_rule=None):
     if topk >= len(vocab):
         return
 
-    min_count = heapq.nlargest(topk, itervalues(vocab))[-1]
+    min_count = heapq.nlargest(topk, vocab.values())[-1]
     prune_vocab(vocab, min_count, trim_rule=trim_rule)
 
 
@@ -1831,7 +1810,7 @@ def merge_counts(dict1, dict2):
     result : dict
         Merged dictionary with sum of frequencies as values.
     """
-    for word, freq in iteritems(dict2):
+    for word, freq in dict2.items():
         if word in dict1:
             dict1[word] += freq
         else:
@@ -1957,7 +1936,7 @@ def sample_dict(d, n=10, use_random=True):
         Selected items from dictionary, as a list.
 
     """
-    selected_keys = random.sample(list(d), min(len(d), n)) if use_random else itertools.islice(iterkeys(d), n)
+    selected_keys = random.sample(list(d), min(len(d), n)) if use_random else itertools.islice(d.keys(), n)
     return [(key, d[key]) for key in selected_keys]
 
 
@@ -2080,7 +2059,7 @@ def lazy_flatten(nested_list):
 
     """
     for el in nested_list:
-        if isinstance(el, collections.Iterable) and not isinstance(el, string_types):
+        if isinstance(el, collections.Iterable) and not isinstance(el, str):
             for sub in flatten(el):
                 yield sub
         else:
@@ -2124,5 +2103,5 @@ def effective_n_jobs(n_jobs):
     elif n_jobs is None:
         return 1
     elif n_jobs < 0:
-        n_jobs = max(cpu_count() + 1 + n_jobs, 1)
+        n_jobs = max(multiprocessing.cpu_count() + 1 + n_jobs, 1)
     return n_jobs
diff --git a/gensim/viz/poincare.py b/gensim/viz/poincare.py
index f20fd8ab2d..ba91f103dd 100644
--- a/gensim/viz/poincare.py
+++ b/gensim/viz/poincare.py
@@ -51,7 +51,7 @@ def poincare_2d_visualization(model, tree, figure_title, num_nodes=50, show_node
     if vectors.shape[1] != 2:
         raise ValueError('Can only plot 2-D vectors')
 
-    node_labels = model.kv.index2word
+    node_labels = model.kv.index_to_key
     nodes_x = list(vectors[:, 0])
     nodes_y = list(vectors[:, 1])
     nodes = go.Scatter(