diff --git a/CHANGELOG.md b/CHANGELOG.md index 423ce2b389..2409a52399 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ Changes * [#3115](https://github.com/RaRe-Technologies/gensim/pull/3115): Make LSI dispatcher CLI param for number of jobs optional, by [@robguinness](https://github.com/robguinness) * [#3128](https://github.com/RaRe-Technologies/gensim/pull/3128): Materialize and copy the corpus passed to SoftCosineSimilarity, by [@Witiko](https://github.com/Witiko) * [#3131](https://github.com/RaRe-Technologies/gensim/pull/3131): Added import to Nmf docs, and to models/__init__.py, by [@properGrammar](https://github.com/properGrammar) +* [#2965](https://github.com/RaRe-Technologies/gensim/pull/2965): Remove strip_punctuation2 alias of strip_punctuation, by [@sciatro](https://github.com/sciatro) ### :books: Documentation diff --git a/gensim/parsing/__init__.py b/gensim/parsing/__init__.py index 5bbf84239e..c608bf399b 100644 --- a/gensim/parsing/__init__.py +++ b/gensim/parsing/__init__.py @@ -1,8 +1,18 @@ """This package contains functions to preprocess raw text""" from .porter import PorterStemmer # noqa:F401 -from .preprocessing import (remove_stopwords, strip_punctuation, strip_punctuation2, # noqa:F401 - strip_tags, strip_short, strip_numeric, - strip_non_alphanum, strip_multiple_whitespaces, - split_alphanum, stem_text, preprocess_string, - preprocess_documents, read_file, read_files) +from .preprocessing import ( # noqa:F401 + preprocess_documents, + preprocess_string, + read_file, + read_files, + remove_stopwords, + split_alphanum, + stem_text, + strip_multiple_whitespaces, + strip_non_alphanum, + strip_numeric, + strip_punctuation, + strip_short, + strip_tags, +) diff --git a/gensim/parsing/preprocessing.py b/gensim/parsing/preprocessing.py index 777ca46e8e..02d689a58a 100644 --- a/gensim/parsing/preprocessing.py +++ b/gensim/parsing/preprocessing.py @@ -118,9 +118,6 @@ def strip_punctuation(s): return RE_PUNCT.sub(" ", s) -strip_punctuation2 = strip_punctuation - - def strip_tags(s): """Remove tags from `s` using :const:`~gensim.parsing.preprocessing.RE_TAGS`. diff --git a/gensim/test/test_parsing.py b/gensim/test/test_parsing.py index d61671bd85..7cc310b9ce 100644 --- a/gensim/test/test_parsing.py +++ b/gensim/test/test_parsing.py @@ -8,9 +8,18 @@ import logging import unittest import numpy as np -from gensim.parsing.preprocessing import \ - remove_stopwords, strip_punctuation2, strip_tags, strip_short, strip_numeric, strip_non_alphanum, \ - strip_multiple_whitespaces, split_alphanum, stem_text + +from gensim.parsing.preprocessing import ( + remove_stopwords, + stem_text, + split_alphanum, + strip_multiple_whitespaces, + strip_non_alphanum, + strip_numeric, + strip_punctuation, + strip_short, + strip_tags, +) # several documents @@ -38,7 +47,7 @@ for many searching purposes, a little fuzziness would help. """ -dataset = [strip_punctuation2(x.lower()) for x in [doc1, doc2, doc3, doc4]] +dataset = [strip_punctuation(x.lower()) for x in [doc1, doc2, doc3, doc4]] # doc1 and doc2 have class 0, doc3 and doc4 avec class 1 classes = np.array([[1, 0], [1, 0], [0, 1], [0, 1]])