Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-77609: Support following symlinks in pathlib.Path.glob() #104176

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 9 additions & 2 deletions Doc/library/pathlib.rst
Original file line number Diff line number Diff line change
Expand Up @@ -890,8 +890,9 @@ call fails (for example because the path doesn't exist).
[PosixPath('docs/conf.py')]

Patterns are the same as for :mod:`fnmatch`, with the addition of "``**``"
which means "this directory and all subdirectories, recursively". In other
words, it enables recursive globbing::
which means "this directory and all subdirectories, recursively", and "``***``"
which additionally follows symlinks to directories. These wildcards enable
recursive globbing::

>>> sorted(Path('.').glob('**/*.py'))
[PosixPath('build/lib/pathlib.py'),
Expand All @@ -915,6 +916,9 @@ call fails (for example because the path doesn't exist).
Return only directories if *pattern* ends with a pathname components
separator (:data:`~os.sep` or :data:`~os.altsep`).

.. versionchanged:: 3.12
Support for the "``***``" wildcard was added.

.. versionadded:: 3.12
The *case_sensitive* argument.

Expand Down Expand Up @@ -1327,6 +1331,9 @@ call fails (for example because the path doesn't exist).
Return only directories if *pattern* ends with a pathname components
separator (:data:`~os.sep` or :data:`~os.altsep`).

.. versionchanged:: 3.12
Support for the "``***``" wildcard was added.

.. versionadded:: 3.12
The *case_sensitive* argument.

Expand Down
20 changes: 14 additions & 6 deletions Lib/pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,20 @@ def _make_selector(pattern_parts, flavour, case_sensitive):
pat = pattern_parts[0]
if not pat:
return _TerminatingSelector()
if pat == '**':
if pat == '**' or pat == '***':
child_parts_idx = 1
while child_parts_idx < len(pattern_parts) and pattern_parts[child_parts_idx] == '**':
while child_parts_idx < len(pattern_parts):
child_part = pattern_parts[child_parts_idx]
if child_part == '***':
pat = '***'
elif child_part != '**':
break
child_parts_idx += 1
child_parts = pattern_parts[child_parts_idx:]
if '**' in child_parts:
cls = _DoubleRecursiveWildcardSelector
for child_part in child_parts:
if child_part == '**' or child_part == '***':
cls = _DoubleRecursiveWildcardSelector
break
else:
cls = _RecursiveWildcardSelector
else:
Expand Down Expand Up @@ -166,6 +173,7 @@ def _select_from(self, parent_path, scandir):
class _RecursiveWildcardSelector(_Selector):

def __init__(self, pat, child_parts, flavour, case_sensitive):
self.follow_symlinks = pat == '***'
_Selector.__init__(self, child_parts, flavour, case_sensitive)

def _iterate_directories(self, parent_path, scandir):
Expand All @@ -178,11 +186,11 @@ def _iterate_directories(self, parent_path, scandir):
for entry in entries:
entry_is_dir = False
try:
entry_is_dir = entry.is_dir()
entry_is_dir = entry.is_dir(follow_symlinks=self.follow_symlinks)
except OSError as e:
if not _ignore_error(e):
raise
if entry_is_dir and not entry.is_symlink():
if entry_is_dir:
path = parent_path._make_child_relpath(entry.name)
for p in self._iterate_directories(path, scandir):
yield p
Expand Down
39 changes: 39 additions & 0 deletions Lib/test/test_pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -1977,6 +1977,45 @@ def my_scandir(path):
subdir.chmod(000)
self.assertEqual(len(set(base.glob("*"))), 4)

def test_glob_recurse_symlinks(self):
def _check(glob, expected):
glob = {path for path in glob if "linkD" not in path.parts}
self.assertEqual(glob, { P(BASE, q) for q in expected })
P = self.cls

p = P(BASE)
if os_helper.can_symlink():
_check(p.glob("***/fileB"), ["dirB/fileB", "dirA/linkC/fileB", "linkB/fileB"])
_check(p.glob("***/***/fileB"), ["dirB/fileB", "dirA/linkC/fileB", "linkB/fileB"])
_check(p.glob("***/**/fileB"), ["dirB/fileB", "dirA/linkC/fileB", "linkB/fileB"])
_check(p.glob("**/***/fileB"), ["dirB/fileB", "dirA/linkC/fileB", "linkB/fileB"])
_check(p.glob("***/*/fileA"), [])
_check(p.glob("***/*/fileB"), ["dirB/fileB", "linkB/fileB", "dirA/linkC/fileB"])
_check(p.glob("***/file*"), ["fileA", "dirA/linkC/fileB", "dirB/fileB", "dirC/fileC",
"dirC/dirD/fileD", "linkB/fileB"])
_check(p.glob("***/*/"), ["dirA", "dirA/linkC", "dirB", "dirC",
"dirC/dirD", "dirE", "linkB",])
_check(p.glob("***"), ["", "dirA", "dirA/linkC", "dirB", "dirC", "dirE", "dirC/dirD",
"linkB"])
else:
_check(p.glob("***/fileB"), ["dirB/fileB"])
_check(p.glob("***/*/fileA"), [])
_check(p.glob("***/*/fileB"), ["dirB/fileB"])
_check(p.glob("***/file*"), ["fileA", "dirB/fileB", "dirC/fileC", "dirC/dirD/fileD"])
_check(p.glob("***/*/"), ["dirA", "dirB", "dirC", "dirC/dirD", "dirE"])
_check(p.glob("***"), ["", "dirA", "dirB", "dirC", "dirE", "dirC/dirD"])

p = P(BASE, "dirC")
_check(p.glob("***/*"), ["dirC/fileC", "dirC/novel.txt", "dirC/dirD", "dirC/dirD/fileD"])
_check(p.glob("***/file*"), ["dirC/fileC", "dirC/dirD/fileD"])
_check(p.glob("dir*/***"), ["dirC/dirD"])
_check(p.glob("***/*/*"), ["dirC/dirD/fileD"])
_check(p.glob("***/*/"), ["dirC/dirD"])
_check(p.glob("***"), ["dirC", "dirC/dirD"])
_check(p.glob("***/***"), ["dirC", "dirC/dirD"])
_check(p.glob("***/*.txt"), ["dirC/novel.txt"])
_check(p.glob("***/*.*"), ["dirC/novel.txt"])

def _check_resolve(self, p, expected, strict=True):
q = p.resolve(strict)
self.assertEqual(q, expected)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Add support for "``***``" wildcard in :meth:`pathlib.Path.glob` and
:meth:`~pathlib.Path.rglob`. This wildcard works like "``**``", except that
it also recurses into symlinks.