Skip to content

Commit

Permalink
pythongh-56166: Deprecate passing confusing positional arguments in r…
Browse files Browse the repository at this point in the history
…e functions

Deprecate passing optional arguments maxsplit, count and flags in
module-level functions re.split(), re.sub() and re.subn() as positional.
They should only be passed by keyword.
  • Loading branch information
serhiy-storchaka committed Aug 8, 2023
1 parent 5df8b0d commit d7238ab
Show file tree
Hide file tree
Showing 5 changed files with 120 additions and 16 deletions.
18 changes: 10 additions & 8 deletions Doc/library/re.rst
Original file line number Diff line number Diff line change
Expand Up @@ -929,6 +929,11 @@ Functions
.. versionchanged:: 3.7
Added support of splitting on a pattern that could match an empty string.

.. deprecated:: 3.13
Passing *maxsplit* and *flags* as positional arguments is deprecated.
In future Python versions they will be
:ref:`keyword-only parameters <keyword-only_parameter>`.


.. function:: findall(pattern, string, flags=0)

Expand Down Expand Up @@ -1027,8 +1032,6 @@ Functions
.. versionchanged:: 3.7
Unknown escapes in *repl* consisting of ``'\'`` and an ASCII letter
now are errors.

.. versionchanged:: 3.7
Empty matches for the pattern are replaced when adjacent to a previous
non-empty match.

Expand All @@ -1037,18 +1040,17 @@ Functions
In :class:`bytes` replacement strings, group *name* can only contain bytes
in the ASCII range (``b'\x00'``-``b'\x7f'``).

.. deprecated:: 3.13
Passing *count* and *flags* as positional arguments is deprecated.
In future Python versions they will be
:ref:`keyword-only parameters <keyword-only_parameter>`.


.. function:: subn(pattern, repl, string, count=0, flags=0)

Perform the same operation as :func:`sub`, but return a tuple ``(new_string,
number_of_subs_made)``.

.. versionchanged:: 3.1
Added the optional flags argument.

.. versionchanged:: 3.5
Unmatched groups are replaced with an empty string.


.. function:: escape(pattern)

Expand Down
8 changes: 8 additions & 0 deletions Doc/whatsnew/3.13.rst
Original file line number Diff line number Diff line change
Expand Up @@ -827,6 +827,14 @@ Porting to Python 3.13
Deprecated
----------

* Passing more than 2 positional arguments in module-level functions
:func:`re.split`, :func:`re.sub` and :func:`re.subn` is now deprecated.
Arguments *maxsplit*, *count* and *flags* should now be passed by
keyword.
In future Python versions these parameters will be
:ref:`keyword-only <keyword-only_parameter>`.
(Contributed by Serhiy Storchaka in :gh:`56166`.)

* Deprecate the old ``Py_UNICODE`` and ``PY_UNICODE_TYPE`` types: use directly
the :c:type:`wchar_t` type instead. Since Python 3.3, ``Py_UNICODE`` and
``PY_UNICODE_TYPE`` are just aliases to :c:type:`wchar_t`.
Expand Down
67 changes: 64 additions & 3 deletions Lib/re/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,16 +175,39 @@ def search(pattern, string, flags=0):
a Match object, or None if no match was found."""
return _compile(pattern, flags).search(string)

def sub(pattern, repl, string, count=0, flags=0):
class _ZeroSentinel(int):
pass
_zero_sentinel = _ZeroSentinel()

def sub(pattern, repl, string, *args, count=_zero_sentinel, flags=_zero_sentinel):
"""Return the string obtained by replacing the leftmost
non-overlapping occurrences of the pattern in string by the
replacement repl. repl can be either a string or a callable;
if a string, backslash escapes in it are processed. If it is
a callable, it's passed the Match object and must return
a replacement string to be used."""
if args:
if count is not _zero_sentinel:
raise TypeError("sub() got multiple values for argument 'count'")
count, *args = args
if args:
if flags is not _zero_sentinel:
raise TypeError("sub() got multiple values for argument 'flags'")
flags, *args = args
if args:
raise TypeError("sub() takes from 2 to 4 positional arguments "
"but %d were given" % (4 + len(args)))

import warnings
warnings.warn(
"'count' is passed as positional argument",
DeprecationWarning, stacklevel=2
)

return _compile(pattern, flags).sub(repl, string, count)
sub.__text_signature__ = '(pattern, repl, string, count=0, flags=0)'

def subn(pattern, repl, string, count=0, flags=0):
def subn(pattern, repl, string, *args, count=_zero_sentinel, flags=_zero_sentinel):
"""Return a 2-tuple containing (new_string, number).
new_string is the string obtained by replacing the leftmost
non-overlapping occurrences of the pattern in the source
Expand All @@ -193,17 +216,55 @@ def subn(pattern, repl, string, count=0, flags=0):
callable; if a string, backslash escapes in it are processed.
If it is a callable, it's passed the Match object and must
return a replacement string to be used."""
if args:
if count is not _zero_sentinel:
raise TypeError("subn() got multiple values for argument 'count'")
count, *args = args
if args:
if flags is not _zero_sentinel:
raise TypeError("subn() got multiple values for argument 'flags'")
flags, *args = args
if args:
raise TypeError("subn() takes from 2 to 4 positional arguments "
"but %d were given" % (4 + len(args)))

import warnings
warnings.warn(
"'count' is passed as positional argument",
DeprecationWarning, stacklevel=2
)

return _compile(pattern, flags).subn(repl, string, count)
subn.__text_signature__ = '(pattern, repl, string, count=0, flags=0)'

def split(pattern, string, maxsplit=0, flags=0):
def split(pattern, string, *args, maxsplit=_zero_sentinel, flags=_zero_sentinel):
"""Split the source string by the occurrences of the pattern,
returning a list containing the resulting substrings. If
capturing parentheses are used in pattern, then the text of all
groups in the pattern are also returned as part of the resulting
list. If maxsplit is nonzero, at most maxsplit splits occur,
and the remainder of the string is returned as the final element
of the list."""
if args:
if maxsplit is not _zero_sentinel:
raise TypeError("split() got multiple values for argument 'maxsplit'")
maxsplit, *args = args
if args:
if flags is not _zero_sentinel:
raise TypeError("split() got multiple values for argument 'flags'")
flags, *args = args
if args:
raise TypeError("split() takes from 2 to 4 positional arguments "
"but %d were given" % (4 + len(args)))

import warnings
warnings.warn(
"'maxsplit' is passed as positional argument",
DeprecationWarning, stacklevel=2
)

return _compile(pattern, flags).split(string, maxsplit)
split.__text_signature__ = '(pattern, string, maxsplit=0, flags=0)'

def findall(pattern, string, flags=0):
"""Return a list of all non-overlapping matches in the string.
Expand Down
40 changes: 35 additions & 5 deletions Lib/test/test_re.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,10 @@ def test_basic_re_sub(self):
self.assertEqual(re.sub("(?i)b+", "x", "bbbb BBBB"), 'x x')
self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y'),
'9.3 -3 24x100y')
self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
'9.3 -3 23x99y')
with self.assertWarns(DeprecationWarning) as w:
self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', 3),
'9.3 -3 23x99y')
self.assertEqual(w.filename, __file__)
self.assertEqual(re.sub(r'\d+', self.bump_num, '08.2 -2 23x99y', count=3),
'9.3 -3 23x99y')

Expand Down Expand Up @@ -235,9 +237,29 @@ def test_sub_template_numeric_escape(self):

def test_qualified_re_sub(self):
self.assertEqual(re.sub('a', 'b', 'aaaaa'), 'bbbbb')
self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
with self.assertWarns(DeprecationWarning) as w:
self.assertEqual(re.sub('a', 'b', 'aaaaa', 1), 'baaaa')
self.assertEqual(w.filename, __file__)
self.assertEqual(re.sub('a', 'b', 'aaaaa', count=1), 'baaaa')

self.assertRaises(TypeError, re.sub, 'a', 'b', 'aaaaa', 1, count=1)
self.assertRaises(TypeError, re.sub, 'a', 'b', 'aaaaa', 1, 0, flags=0)
self.assertRaises(TypeError, re.sub, 'a', 'b', 'aaaaa', 1, 0, 0)

def test_misuse_flags(self):
with self.assertWarns(DeprecationWarning) as w:
result = re.sub('a', 'b', 'aaaaa', re.I)
self.assertEqual(result, re.sub('a', 'b', 'aaaaa', count=int(re.I)))
self.assertEqual(w.filename, __file__)
with self.assertWarns(DeprecationWarning) as w:
result = re.subn("b*", "x", "xyz", re.I)
self.assertEqual(result, re.subn("b*", "x", "xyz", count=int(re.I)))
self.assertEqual(w.filename, __file__)
with self.assertWarns(DeprecationWarning) as w:
result = re.split(":", ":a:b::c", re.I)
self.assertEqual(result, re.split(":", ":a:b::c", maxsplit=int(re.I)))
self.assertEqual(w.filename, __file__)

def test_bug_114660(self):
self.assertEqual(re.sub(r'(\S)\s+(\S)', r'\1 \2', 'hello there'),
'hello there')
Expand Down Expand Up @@ -344,9 +366,15 @@ def test_re_subn(self):
self.assertEqual(re.subn("b+", "x", "bbbb BBBB"), ('x BBBB', 1))
self.assertEqual(re.subn("b+", "x", "xyz"), ('xyz', 0))
self.assertEqual(re.subn("b*", "x", "xyz"), ('xxxyxzx', 4))
self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
with self.assertWarns(DeprecationWarning) as w:
self.assertEqual(re.subn("b*", "x", "xyz", 2), ('xxxyz', 2))
self.assertEqual(w.filename, __file__)
self.assertEqual(re.subn("b*", "x", "xyz", count=2), ('xxxyz', 2))

self.assertRaises(TypeError, re.subn, "b*", "x", "xyz", 2, count=1)
self.assertRaises(TypeError, re.subn, "b*", "x", "xyz", 2, 0, flags=0)
self.assertRaises(TypeError, re.subn, "b*", "x", "xyz", 2, 0, 0)

def test_re_split(self):
for string in ":a:b::c", S(":a:b::c"):
self.assertTypedEqual(re.split(":", string),
Expand Down Expand Up @@ -401,7 +429,9 @@ def test_re_split(self):
self.assertTypedEqual(re.split(sep, ':a:b::c'), expected)

def test_qualified_re_split(self):
self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
with self.assertWarns(DeprecationWarning) as w:
self.assertEqual(re.split(":", ":a:b::c", 2), ['', 'a', 'b::c'])
self.assertEqual(w.filename, __file__)
self.assertEqual(re.split(":", ":a:b::c", maxsplit=2), ['', 'a', 'b::c'])
self.assertEqual(re.split(':', 'a:b:c:d', maxsplit=2), ['a', 'b', 'c:d'])
self.assertEqual(re.split("(:)", ":a:b::c", maxsplit=2),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Deprecate passing optional arguments *maxsplit*, *count* and *flags* in
:func:`re.split`, :func:`re.sub` and :func:`re.subn`. They should only be
passed by keyword.

0 comments on commit d7238ab

Please sign in to comment.