Skip to content

Commit c72a149

Browse files
Ignore quantifiers when splitting comma-separated regexes (#8898) (#8901)
Do not split on commas if they are between braces, since that indicates a quantifier. Also added a protection for slow implementations since existing workarounds may result in long strings of chained regular expressions. Adjust existing test for invalid regex to be truly invalid Co-authored-by: lihu <lihu.ben-ezri-ravin@portalinstruments.com> (cherry picked from commit d28def9)
1 parent fe161df commit c72a149

File tree

5 files changed

+64
-4
lines changed

5 files changed

+64
-4
lines changed

doc/whatsnew/fragments/7229.bugfix

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
When parsing comma-separated lists of regular expressions in the config, ignore
2+
commas that are inside braces since those indicate quantifiers, not delineation
3+
between expressions.
4+
5+
Closes #7229

pylint/config/argument.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -122,7 +122,7 @@ def _regex_transformer(value: str) -> Pattern[str]:
122122
def _regexp_csv_transfomer(value: str) -> Sequence[Pattern[str]]:
123123
"""Transforms a comma separated list of regular expressions."""
124124
patterns: list[Pattern[str]] = []
125-
for pattern in _csv_transformer(value):
125+
for pattern in pylint_utils._check_regexp_csv(value):
126126
patterns.append(_regex_transformer(pattern))
127127
return patterns
128128

pylint/utils/__init__.py

+2
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
HAS_ISORT_5,
1515
IsortDriver,
1616
_check_csv,
17+
_check_regexp_csv,
1718
_format_option_value,
1819
_splitstrip,
1920
_unquote,
@@ -34,6 +35,7 @@
3435
"HAS_ISORT_5",
3536
"IsortDriver",
3637
"_check_csv",
38+
"_check_regexp_csv",
3739
"_format_option_value",
3840
"_splitstrip",
3941
"_unquote",

pylint/utils/utils.py

+27-1
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@
2222
import textwrap
2323
import tokenize
2424
import warnings
25-
from collections.abc import Sequence
25+
from collections import deque
26+
from collections.abc import Iterable, Sequence
2627
from io import BufferedReader, BytesIO
2728
from typing import (
2829
TYPE_CHECKING,
@@ -330,6 +331,31 @@ def _check_csv(value: list[str] | tuple[str] | str) -> Sequence[str]:
330331
return _splitstrip(value)
331332

332333

334+
def _check_regexp_csv(value: list[str] | tuple[str] | str) -> Iterable[str]:
335+
r"""Split a comma-separated list of regexps, taking care to avoid splitting
336+
a regex employing a comma as quantifier, as in `\d{1,2}`."""
337+
if isinstance(value, (list, tuple)):
338+
yield from value
339+
else:
340+
# None is a sentinel value here
341+
regexps: deque[deque[str] | None] = deque([None])
342+
open_braces = False
343+
for char in value:
344+
if char == "{":
345+
open_braces = True
346+
elif char == "}" and open_braces:
347+
open_braces = False
348+
349+
if char == "," and not open_braces:
350+
regexps.append(None)
351+
elif regexps[-1] is None:
352+
regexps.pop()
353+
regexps.append(deque([char]))
354+
else:
355+
regexps[-1].append(char)
356+
yield from ("".join(regexp).strip() for regexp in regexps if regexp is not None)
357+
358+
333359
def _comment(string: str) -> str:
334360
"""Return string as a comment."""
335361
lines = [line.strip() for line in string.splitlines()]

tests/config/test_config.py

+29-2
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,10 @@
55
from __future__ import annotations
66

77
import os
8+
import re
89
from pathlib import Path
910
from tempfile import TemporaryDirectory
11+
from typing import Any
1012

1113
import pytest
1214
from pytest import CaptureFixture
@@ -113,6 +115,31 @@ def test_unknown_py_version(capsys: CaptureFixture) -> None:
113115
assert "the-newest has an invalid format, should be a version string." in output.err
114116

115117

118+
CSV_REGEX_COMMA_CASES = [
119+
("foo", ["foo"]),
120+
("foo,bar", ["foo", "bar"]),
121+
("foo, bar", ["foo", "bar"]),
122+
("foo, bar{1,3}", ["foo", "bar{1,3}"]),
123+
]
124+
125+
126+
@pytest.mark.parametrize("in_string,expected", CSV_REGEX_COMMA_CASES)
127+
def test_csv_regex_comma_in_quantifier(in_string: str, expected: list[str]) -> None:
128+
"""Check that we correctly parse a comma-separated regex when there are one
129+
or more commas within quantifier expressions.
130+
"""
131+
132+
def _template_run(in_string: str) -> list[re.Pattern[Any]]:
133+
r = Run(
134+
[str(EMPTY_MODULE), rf"--bad-names-rgx={in_string}"],
135+
exit=False,
136+
)
137+
bad_names_rgxs: list[re.Pattern[Any]] = r.linter.config.bad_names_rgxs
138+
return bad_names_rgxs
139+
140+
assert _template_run(in_string) == [re.compile(regex) for regex in expected]
141+
142+
116143
def test_regex_error(capsys: CaptureFixture) -> None:
117144
"""Check that we correctly error when an an option is passed whose value is an invalid regular expression."""
118145
with pytest.raises(SystemExit):
@@ -135,12 +162,12 @@ def test_csv_regex_error(capsys: CaptureFixture) -> None:
135162
"""
136163
with pytest.raises(SystemExit):
137164
Run(
138-
[str(EMPTY_MODULE), r"--bad-names-rgx=(foo{1,3})"],
165+
[str(EMPTY_MODULE), r"--bad-names-rgx=(foo{1,}, foo{1,3}})"],
139166
exit=False,
140167
)
141168
output = capsys.readouterr()
142169
assert (
143-
r"Error in provided regular expression: (foo{1 beginning at index 0: missing ), unterminated subpattern"
170+
r"Error in provided regular expression: (foo{1,} beginning at index 0: missing ), unterminated subpattern"
144171
in output.err
145172
)
146173

0 commit comments

Comments
 (0)