Skip to content

Commit

Permalink
Fix ValueError when instantiating SparseTermSimilarityMatrix (#2689)
Browse files Browse the repository at this point in the history
* force python int before calling islice. islice don't accept numpy int

* add test to check islice error

* it makes test to fail

* make sure that islice receives a python int

* fix typo
  • Loading branch information
ptorrestr authored and mpenkov committed Jan 6, 2020
1 parent f022028 commit 3d129de
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 2 deletions.
4 changes: 2 additions & 2 deletions gensim/similarities/levenshtein.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
This module provides a namespace for functions that use the Levenshtein distance.
"""

from itertools import islice
import itertools
import logging
from math import floor

Expand Down Expand Up @@ -150,4 +150,4 @@ def most_similar(self, t1, topn=10):
for (similarity, t2) in sorted(similarities, reverse=True)
if similarity > 0
)
return islice(most_similar, topn)
return itertools.islice(most_similar, int(topn))
5 changes: 5 additions & 0 deletions gensim/test/test_similarities.py
Original file line number Diff line number Diff line change
Expand Up @@ -1231,6 +1231,11 @@ def test_most_similar(self):
second_similarities = numpy.array([similarity for term, similarity in index.most_similar(u"holiday", topn=10)])
self.assertTrue(numpy.allclose(first_similarities ** 2.0, second_similarities))

# check proper integration with SparseTermSimilarityMatrix
index = LevenshteinSimilarityIndex(self.dictionary, alpha=1.0, beta=1.0)
similarity_matrix = SparseTermSimilarityMatrix(index, dictionary)
self.assertTrue(scipy.sparse.issparse(similarity_matrix.matrix))


if __name__ == '__main__':
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.DEBUG)
Expand Down

0 comments on commit 3d129de

Please sign in to comment.