Skip to content

Commit

Permalink
Fix backward compatibility issue: loading FastTextKeyedVectors usin…
Browse files Browse the repository at this point in the history
…g `KeyedVectors` (missing attribute `compatible_hash`) (#2349)

* fix backward compatibility issue (impl. patching in load)

* add test

* upd test model (downgrade numpy)
  • Loading branch information
menshikh-iv authored Jan 23, 2019
1 parent 9819ce8 commit 95e222a
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 1 deletion.
17 changes: 17 additions & 0 deletions gensim/models/keyedvectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1512,6 +1512,15 @@ def get_keras_embedding(self, train_embeddings=False):
)
return layer

@classmethod
def load(cls, fname_or_handle, **kwargs):
model = super(WordEmbeddingsKeyedVectors, cls).load(fname_or_handle, **kwargs)
if isinstance(model, FastTextKeyedVectors):
if not hasattr(model, 'compatible_hash'):
model.compatible_hash = False

return model


KeyedVectors = Word2VecKeyedVectors # alias for backward compatibility

Expand Down Expand Up @@ -1974,6 +1983,14 @@ def __init__(self, vector_size, min_n, max_n, bucket, compatible_hash):
self.num_ngram_vectors = 0
self.compatible_hash = compatible_hash

@classmethod
def load(cls, fname_or_handle, **kwargs):
model = super(WordEmbeddingsKeyedVectors, cls).load(fname_or_handle, **kwargs)
if not hasattr(model, 'compatible_hash'):
model.compatible_hash = False

return model

@property
@deprecated("Attribute will be removed in 4.0.0, use self.vectors_vocab instead")
def syn0_vocab(self):
Expand Down
Binary file added gensim/test/test_data/ft_kv_3.6.0.model.gz
Binary file not shown.
16 changes: 15 additions & 1 deletion gensim/test/test_keyedvectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
import numpy as np

from gensim.corpora import Dictionary
from gensim.models import KeyedVectors as EuclideanKeyedVectors, WordEmbeddingSimilarityIndex
from gensim.models.keyedvectors import KeyedVectors as EuclideanKeyedVectors, WordEmbeddingSimilarityIndex, \
FastTextKeyedVectors
from gensim.test.utils import datapath

import gensim.models.keyedvectors
Expand Down Expand Up @@ -279,6 +280,19 @@ def test_set_item(self):
for ent, vector in zip(entities, vectors):
self.assertTrue(np.allclose(self.vectors[ent], vector))

def test_ft_kv_backward_compat_w_360(self):
kv = EuclideanKeyedVectors.load(datapath("ft_kv_3.6.0.model.gz"))
ft_kv = FastTextKeyedVectors.load(datapath("ft_kv_3.6.0.model.gz"))

expected = ['trees', 'survey', 'system', 'graph', 'interface']
actual = [word for (word, similarity) in kv.most_similar("human", topn=5)]

self.assertEqual(actual, expected)

actual = [word for (word, similarity) in ft_kv.most_similar("human", topn=5)]

self.assertEqual(actual, expected)


class L2NormTest(unittest.TestCase):
def test(self):
Expand Down

0 comments on commit 95e222a

Please sign in to comment.