Remove strip_punctuation2 alias of strip_punctuation (#2965)

* Remove strip_punctuation2 Re Issue 2961 * Remove strip_punctuation2 alias Re Issue 2961, remove strip_punctuation alias strip_punctuation2 which makes a mess of docs * Move strip_punctuation2 to strip_punctuation Re Issue 2961, remove use of strip_punctuation2 function which was an alias of strip_punctuation * reorganize imports * make flake8 happy * Update CHANGELOG.md Co-authored-by: Michael Penkov <misha.penkov@gmail.com> Co-authored-by: Michael Penkov <m@penkov.dev>
piskvorky · Jun 29, 2021 · 2f23566 · 2f23566
1 parent dab0369
commit 2f23566
Show file tree

Hide file tree

Showing 4 changed files with 29 additions and 12 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -17,6 +17,7 @@ Changes
 * [#3115](https://github.com/RaRe-Technologies/gensim/pull/3115): Make LSI dispatcher CLI param for number of jobs optional, by [@robguinness](https://github.com/robguinness)
 * [#3128](https://github.com/RaRe-Technologies/gensim/pull/3128): Materialize and copy the corpus passed to SoftCosineSimilarity, by [@Witiko](https://github.com/Witiko)
 * [#3131](https://github.com/RaRe-Technologies/gensim/pull/3131): Added import to Nmf docs, and to models/__init__.py, by [@properGrammar](https://github.com/properGrammar)
+* [#2965](https://github.com/RaRe-Technologies/gensim/pull/2965): Remove strip_punctuation2 alias of strip_punctuation, by [@sciatro](https://github.com/sciatro)
 
 ### :books: Documentation
 

diff --git a/gensim/parsing/__init__.py b/gensim/parsing/__init__.py
@@ -1,8 +1,18 @@
 """This package contains functions to preprocess raw text"""
 
 from .porter import PorterStemmer  # noqa:F401
-from .preprocessing import (remove_stopwords, strip_punctuation, strip_punctuation2,  # noqa:F401
-                            strip_tags, strip_short, strip_numeric,
-                            strip_non_alphanum, strip_multiple_whitespaces,
-                            split_alphanum, stem_text, preprocess_string,
-                            preprocess_documents, read_file, read_files)
+from .preprocessing import (  # noqa:F401
+    preprocess_documents,
+    preprocess_string,
+    read_file,
+    read_files,
+    remove_stopwords,
+    split_alphanum,
+    stem_text,
+    strip_multiple_whitespaces,
+    strip_non_alphanum,
+    strip_numeric,
+    strip_punctuation,
+    strip_short,
+    strip_tags,
+)
diff --git a/gensim/parsing/preprocessing.py b/gensim/parsing/preprocessing.py
@@ -119,9 +119,6 @@ def strip_punctuation(s):
     return RE_PUNCT.sub(" ", s)
 
 
-strip_punctuation2 = strip_punctuation
-
-
 def strip_tags(s):
     """Remove tags from `s` using :const:`~gensim.parsing.preprocessing.RE_TAGS`.
 

diff --git a/gensim/test/test_parsing.py b/gensim/test/test_parsing.py
@@ -8,9 +8,18 @@
 import logging
 import unittest
 import numpy as np
-from gensim.parsing.preprocessing import \
-    remove_stopwords, strip_punctuation2, strip_tags, strip_short, strip_numeric, strip_non_alphanum, \
-    strip_multiple_whitespaces, split_alphanum, stem_text
+
+from gensim.parsing.preprocessing import (
+    remove_stopwords,
+    stem_text,
+    split_alphanum,
+    strip_multiple_whitespaces,
+    strip_non_alphanum,
+    strip_numeric,
+    strip_punctuation,
+    strip_short,
+    strip_tags,
+)
 
 
 # several documents
@@ -38,7 +47,7 @@
 for many searching purposes, a little fuzziness would help. """
 
 
-dataset = [strip_punctuation2(x.lower()) for x in [doc1, doc2, doc3, doc4]]
+dataset = [strip_punctuation(x.lower()) for x in [doc1, doc2, doc3, doc4]]
 # doc1 and doc2 have class 0, doc3 and doc4 avec class 1
 classes = np.array([[1, 0], [1, 0], [0, 1], [0, 1]])