diff --git a/gensim/models/keyedvectors.py b/gensim/models/keyedvectors.py index d9dad1cc56..65711baec6 100644 --- a/gensim/models/keyedvectors.py +++ b/gensim/models/keyedvectors.py @@ -1512,6 +1512,15 @@ def get_keras_embedding(self, train_embeddings=False): ) return layer + @classmethod + def load(cls, fname_or_handle, **kwargs): + model = super(WordEmbeddingsKeyedVectors, cls).load(fname_or_handle, **kwargs) + if isinstance(model, FastTextKeyedVectors): + if not hasattr(model, 'compatible_hash'): + model.compatible_hash = False + + return model + KeyedVectors = Word2VecKeyedVectors # alias for backward compatibility @@ -1974,6 +1983,14 @@ def __init__(self, vector_size, min_n, max_n, bucket, compatible_hash): self.num_ngram_vectors = 0 self.compatible_hash = compatible_hash + @classmethod + def load(cls, fname_or_handle, **kwargs): + model = super(WordEmbeddingsKeyedVectors, cls).load(fname_or_handle, **kwargs) + if not hasattr(model, 'compatible_hash'): + model.compatible_hash = False + + return model + @property @deprecated("Attribute will be removed in 4.0.0, use self.vectors_vocab instead") def syn0_vocab(self): diff --git a/gensim/test/test_data/ft_kv_3.6.0.model.gz b/gensim/test/test_data/ft_kv_3.6.0.model.gz new file mode 100644 index 0000000000..55fd042b54 Binary files /dev/null and b/gensim/test/test_data/ft_kv_3.6.0.model.gz differ diff --git a/gensim/test/test_keyedvectors.py b/gensim/test/test_keyedvectors.py index 2170403342..59e361cc6c 100644 --- a/gensim/test/test_keyedvectors.py +++ b/gensim/test/test_keyedvectors.py @@ -15,7 +15,8 @@ import numpy as np from gensim.corpora import Dictionary -from gensim.models import KeyedVectors as EuclideanKeyedVectors, WordEmbeddingSimilarityIndex +from gensim.models.keyedvectors import KeyedVectors as EuclideanKeyedVectors, WordEmbeddingSimilarityIndex, \ + FastTextKeyedVectors from gensim.test.utils import datapath import gensim.models.keyedvectors @@ -279,6 +280,19 @@ def test_set_item(self): for ent, vector in zip(entities, vectors): self.assertTrue(np.allclose(self.vectors[ent], vector)) + def test_ft_kv_backward_compat_w_360(self): + kv = EuclideanKeyedVectors.load(datapath("ft_kv_3.6.0.model.gz")) + ft_kv = FastTextKeyedVectors.load(datapath("ft_kv_3.6.0.model.gz")) + + expected = ['trees', 'survey', 'system', 'graph', 'interface'] + actual = [word for (word, similarity) in kv.most_similar("human", topn=5)] + + self.assertEqual(actual, expected) + + actual = [word for (word, similarity) in ft_kv.most_similar("human", topn=5)] + + self.assertEqual(actual, expected) + class L2NormTest(unittest.TestCase): def test(self):