Skip to content

Commit

Permalink
fixed bug of connector_words not loading, while loading saved phrases…
Browse files Browse the repository at this point in the history
… model of version >= 4

Added tests for asserting persistence of phrases connector_words
  • Loading branch information
aloknayak29 committed Apr 28, 2021
1 parent a9b91ba commit cb97aba
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 9 deletions.
17 changes: 8 additions & 9 deletions gensim/models/phrases.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,15 +391,14 @@ def load(cls, *args, **kwargs):
raise ValueError(f'failed to load {cls.__name__} model, unknown scoring "{model.scoring}"')

# common_terms didn't exist pre-3.?, and was renamed to connector in 4.0.0.
if hasattr(model, "common_terms"):
model.connector_words = model.common_terms
del model.common_terms
else:
logger.warning(
'older version of %s loaded without common_terms attribute, setting connector_words to an empty set',
cls.__name__,
)
model.connector_words = frozenset()
if not hasattr(model, "connector_words"):
if hasattr(model, "common_terms"):
model.connector_words = model.common_terms
del model.common_terms
else:
logger.warning('older version of %s loaded without common_terms attribute', cls.__name__)
logger.warning('setting connector_words to an empty set')
model.connector_words = frozenset()

if not hasattr(model, 'corpus_word_count'):
logger.warning('older version of %s loaded without corpus_word_count', cls.__name__)
Expand Down
18 changes: 18 additions & 0 deletions gensim/test/test_phrases.py
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,15 @@ def test_save_load(self):
3.444 # score for human interface
])

def test_save_load_with_connector_words(self):
"""Test saving and loading a Phrases object."""
connector_words = frozenset({'of'})
with temporary_file("test.pkl") as fpath:
bigram = Phrases(self.sentences, min_count=1, threshold=1, connector_words=connector_words)
bigram.save(fpath)
bigram_loaded = Phrases.load(fpath)
self.assertEqual(bigram_loaded.connector_words, connector_words)

def test_save_load_string_scoring(self):
"""Test backwards compatibility with a previous version of Phrases with custom scoring."""
bigram_loaded = Phrases.load(datapath("phrases-scoring-str.pkl"))
Expand Down Expand Up @@ -385,6 +394,15 @@ def test_save_load(self):
bigram_loaded[['graph', 'minors', 'survey', 'human', 'interface', 'system']],
['graph_minors', 'survey', 'human_interface', 'system'])

def test_save_load_with_connector_words(self):
"""Test saving and loading a FrozenPhrases object."""
connector_words = frozenset({'of'})
with temporary_file("test.pkl") as fpath:
bigram = FrozenPhrases(Phrases(self.sentences, min_count=1, threshold=1, connector_words=connector_words))
bigram.save(fpath)
bigram_loaded = FrozenPhrases.load(fpath)
self.assertEqual(bigram_loaded.connector_words, connector_words)

def test_save_load_string_scoring(self):
"""Test saving and loading a FrozenPhrases object with a string scoring parameter.
This should ensure backwards compatibility with the previous version of FrozenPhrases"""
Expand Down

0 comments on commit cb97aba

Please sign in to comment.