From 9e694e1bd8a3a11df25b8245d337ac8a9a09aec3 Mon Sep 17 00:00:00 2001 From: Ivan Menshikh Date: Sun, 19 Apr 2020 11:37:20 +0300 Subject: [PATCH 1/5] add osx+py38 case for avoid multiprocessing issue --- gensim/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gensim/utils.py b/gensim/utils.py index 70b12d8a88..5dac42628f 100644 --- a/gensim/utils.py +++ b/gensim/utils.py @@ -1238,7 +1238,7 @@ def run(self): self.q.put(wrapped_chunk.pop(), block=True) -if os.name == 'nt': +if os.name == 'nt' or (sys.platform == "darwin" and sys.version_info >= (3, 8)): def chunkize(corpus, chunksize, maxsize=0, as_numpy=False): """Split `corpus` into fixed-sized chunks, using :func:`~gensim.utils.chunkize_serial`. From 30dfca08a8d51dd3e68b3278746cbf6b9306b77d Mon Sep 17 00:00:00 2001 From: Ivan Menshikh Date: Sun, 19 Apr 2020 12:53:33 +0300 Subject: [PATCH 2/5] add comment, fix warning --- gensim/utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gensim/utils.py b/gensim/utils.py index 5dac42628f..88cb544a41 100644 --- a/gensim/utils.py +++ b/gensim/utils.py @@ -1238,6 +1238,7 @@ def run(self): self.q.put(wrapped_chunk.pop(), block=True) +# Avoid multiprocessing issue on Windows and OSX with python3.8+ if os.name == 'nt' or (sys.platform == "darwin" and sys.version_info >= (3, 8)): def chunkize(corpus, chunksize, maxsize=0, as_numpy=False): """Split `corpus` into fixed-sized chunks, using :func:`~gensim.utils.chunkize_serial`. @@ -1260,7 +1261,8 @@ def chunkize(corpus, chunksize, maxsize=0, as_numpy=False): """ if maxsize > 0: - warnings.warn("detected Windows; aliasing chunkize to chunkize_serial") + entity = "Windows" if os.name == 'nt' else "OSX with python3.8+" + warnings.warn("detected {entity}; aliasing chunkize to chunkize_serial".format(entity=entity)) for chunk in chunkize_serial(corpus, chunksize, as_numpy=as_numpy): yield chunk else: From 32d331ffcb99d8f09aeaa8bd8ce84547808a2fbb Mon Sep 17 00:00:00 2001 From: Ivan Menshikh Date: Sun, 19 Apr 2020 15:17:18 +0300 Subject: [PATCH 3/5] extend comment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Radim Řehůřek --- gensim/utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/gensim/utils.py b/gensim/utils.py index 88cb544a41..dccfed000a 100644 --- a/gensim/utils.py +++ b/gensim/utils.py @@ -1238,7 +1238,10 @@ def run(self): self.q.put(wrapped_chunk.pop(), block=True) -# Avoid multiprocessing issue on Windows and OSX with python3.8+ +# Multiprocessing on Windows (and on OSX with python3.8+) uses "spawn" mode, which +# causes issues with pickling. +# So for these two platforms, use simpler serial processing in `chunkize`. +# See https://github.com/RaRe-Technologies/gensim/pull/2800/files#r410890171 if os.name == 'nt' or (sys.platform == "darwin" and sys.version_info >= (3, 8)): def chunkize(corpus, chunksize, maxsize=0, as_numpy=False): """Split `corpus` into fixed-sized chunks, using :func:`~gensim.utils.chunkize_serial`. From 2744629b3e7c072c95528ea60116b810dd307f8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20=C5=98eh=C5=AF=C5=99ek?= Date: Sun, 19 Apr 2020 14:19:14 +0200 Subject: [PATCH 4/5] Update gensim/utils.py --- gensim/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gensim/utils.py b/gensim/utils.py index dccfed000a..9c9ab7e2ae 100644 --- a/gensim/utils.py +++ b/gensim/utils.py @@ -1241,7 +1241,7 @@ def run(self): # Multiprocessing on Windows (and on OSX with python3.8+) uses "spawn" mode, which # causes issues with pickling. # So for these two platforms, use simpler serial processing in `chunkize`. -# See https://github.com/RaRe-Technologies/gensim/pull/2800/files#r410890171 +# See https://github.com/RaRe-Technologies/gensim/pull/2800#discussion_r410890171 if os.name == 'nt' or (sys.platform == "darwin" and sys.version_info >= (3, 8)): def chunkize(corpus, chunksize, maxsize=0, as_numpy=False): """Split `corpus` into fixed-sized chunks, using :func:`~gensim.utils.chunkize_serial`. From 33ebe391d4622afeb5f3ce7283dc94138f1e2769 Mon Sep 17 00:00:00 2001 From: Ivan Menshikh Date: Mon, 20 Apr 2020 07:33:22 +0300 Subject: [PATCH 5/5] Update gensim/utils.py Co-Authored-By: Michael Penkov --- gensim/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gensim/utils.py b/gensim/utils.py index 9c9ab7e2ae..90c9279338 100644 --- a/gensim/utils.py +++ b/gensim/utils.py @@ -1265,7 +1265,7 @@ def chunkize(corpus, chunksize, maxsize=0, as_numpy=False): """ if maxsize > 0: entity = "Windows" if os.name == 'nt' else "OSX with python3.8+" - warnings.warn("detected {entity}; aliasing chunkize to chunkize_serial".format(entity=entity)) + warnings.warn("detected %s; aliasing chunkize to chunkize_serial" % entity) for chunk in chunkize_serial(corpus, chunksize, as_numpy=as_numpy): yield chunk else: