From 57e877c3b3dcd5dc00ccf26543e5a66d2b09e318 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20=C5=98eh=C5=AF=C5=99ek?= Date: Thu, 4 Jul 2019 23:29:47 +0200 Subject: [PATCH 1/4] Fix bug with inadvertent logging setup - module imports must never issue logging events (logging not set up yet in main app, triggers a default config init) - warnings better, but in this case, we can more simply re-use the existing functionality from `gensim.utils` --- gensim/summarization/textcleaner.py | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/gensim/summarization/textcleaner.py b/gensim/summarization/textcleaner.py index ba51b7691a..fe5cbf2931 100644 --- a/gensim/summarization/textcleaner.py +++ b/gensim/summarization/textcleaner.py @@ -22,21 +22,14 @@ from gensim.summarization.syntactic_unit import SyntacticUnit from gensim.parsing.preprocessing import preprocess_documents -from gensim.utils import tokenize +from gensim.utils import tokenize, has_pattern from six.moves import range import re import logging logger = logging.getLogger(__name__) -try: - from pattern.en import tag - logger.info("'pattern' package found; tag filters are available for English") - HAS_PATTERN = True -except ImportError: - logger.info("'pattern' package not found; tag filters are not available for English") - HAS_PATTERN = False - +HAS_PATTERN = has_pattern() SEPARATOR = r'@' RE_SENTENCE = re.compile(r'(\S.+?[.!?])(?=\s+|$)|(\S.+?)(?=[\n]|$)', re.UNICODE) From 497c2a244c154d8129c04f65ef36bba85d366eec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20=C5=98eh=C5=AF=C5=99ek?= Date: Sun, 21 Jul 2019 15:51:55 +0200 Subject: [PATCH 2/4] Update textcleaner.py --- gensim/summarization/textcleaner.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/gensim/summarization/textcleaner.py b/gensim/summarization/textcleaner.py index fe5cbf2931..64f7af4bda 100644 --- a/gensim/summarization/textcleaner.py +++ b/gensim/summarization/textcleaner.py @@ -30,6 +30,8 @@ logger = logging.getLogger(__name__) HAS_PATTERN = has_pattern() +if HAS_PATTERN: + from pattern.en import tag SEPARATOR = r'@' RE_SENTENCE = re.compile(r'(\S.+?[.!?])(?=\s+|$)|(\S.+?)(?=[\n]|$)', re.UNICODE) From 0bad84489dcfa0c9dbb4e5122af2dabf7dcfcc36 Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Mon, 26 Aug 2019 17:17:32 +0900 Subject: [PATCH 3/4] get Py2.7 docs to build --- setup.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ae20cc020d..2832cd946d 100644 --- a/setup.py +++ b/setup.py @@ -265,6 +265,20 @@ def finalize_options(self): else: win_testenv.append('scikit-learn') +docs_testenv = linux_testenv + distributed_env + [ + 'sphinx', + 'sphinxcontrib-napoleon', + 'plotly', + 'pattern <= 2.6', + 'sphinxcontrib.programoutput', +] + +# +# Get Py2.7 docs to build, see https://github.com/RaRe-Technologies/gensim/pull/2552 +# +if sys.version_info == (2, 7): + docs_testenv.insert(0, 'doctools==0.14') + linux_testenv = win_testenv[:] if sys.version_info < (3, 7): @@ -385,7 +399,7 @@ def finalize_options(self): 'distributed': distributed_env, 'test-win': win_testenv, 'test': linux_testenv, - 'docs': linux_testenv + distributed_env + ['sphinx', 'sphinxcontrib-napoleon', 'plotly', 'pattern <= 2.6', 'sphinxcontrib.programoutput'], + 'docs': docs_testenv, }, include_package_data=True, From f0ae8faf3f3a8a53915a6f47b03e5b3a33befd2d Mon Sep 17 00:00:00 2001 From: Michael Penkov Date: Mon, 26 Aug 2019 17:23:02 +0900 Subject: [PATCH 4/4] bugfix in setup.py --- setup.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/setup.py b/setup.py index 2832cd946d..6d2b1c02cb 100644 --- a/setup.py +++ b/setup.py @@ -265,6 +265,16 @@ def finalize_options(self): else: win_testenv.append('scikit-learn') + +linux_testenv = win_testenv[:] + +if sys.version_info < (3, 7): + linux_testenv.extend([ + 'tensorflow <= 1.3.0', + 'keras >= 2.0.4, <= 2.1.4', + 'annoy', + ]) + docs_testenv = linux_testenv + distributed_env + [ 'sphinx', 'sphinxcontrib-napoleon', @@ -272,22 +282,12 @@ def finalize_options(self): 'pattern <= 2.6', 'sphinxcontrib.programoutput', ] - # # Get Py2.7 docs to build, see https://github.com/RaRe-Technologies/gensim/pull/2552 # if sys.version_info == (2, 7): docs_testenv.insert(0, 'doctools==0.14') -linux_testenv = win_testenv[:] - -if sys.version_info < (3, 7): - linux_testenv.extend([ - 'tensorflow <= 1.3.0', - 'keras >= 2.0.4, <= 2.1.4', - 'annoy', - ]) - ext_modules = [ Extension('gensim.models.word2vec_inner', sources=['./gensim/models/word2vec_inner.c'],