Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GH-89727: Fix os.fwalk() recursion error on deep trees #119638

Merged
merged 9 commits into from
May 30, 2024
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 53 additions & 38 deletions Lib/os.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,24 +471,52 @@ def fwalk(top=".", topdown=True, onerror=None, *, follow_symlinks=False, dir_fd=
"""
sys.audit("os.fwalk", top, topdown, onerror, follow_symlinks, dir_fd)
top = fspath(top)
# Note: To guard against symlink races, we use the standard
# lstat()/open()/fstat() trick.
if not follow_symlinks:
orig_st = stat(top, follow_symlinks=False, dir_fd=dir_fd)
topfd = open(top, O_RDONLY | O_NONBLOCK, dir_fd=dir_fd)
try:
if (follow_symlinks or (st.S_ISDIR(orig_st.st_mode) and
path.samestat(orig_st, stat(topfd)))):
yield from _fwalk(topfd, top, isinstance(top, bytes),
topdown, onerror, follow_symlinks)
finally:
close(topfd)

def _fwalk(topfd, toppath, isbytes, topdown, onerror, follow_symlinks):
stack = [(_fwalk_walk, (True, dir_fd, top, top, None))]
isbytes = isinstance(top, bytes)
while stack:
yield from _fwalk(stack, isbytes, topdown, onerror, follow_symlinks)

# Each item in the _fwalk() stack is a pair (action, args).
_fwalk_walk = 0 # args: (isroot, dirfd, toppath, topname, entry)
_fwalk_yield = 1 # args: (toppath, dirnames, filenames, topfd)
_fwalk_close = 2 # args: dirfd

def _fwalk(stack, isbytes, topdown, onerror, follow_symlinks):
# Note: This uses O(depth of the directory tree) file descriptors: if
# necessary, it can be adapted to only require O(1) FDs, see issue
# #13734.

action, value = stack.pop()
if action == _fwalk_close:
close(value)
return
elif action == _fwalk_yield:
yield value
return
assert action == _fwalk_walk
isroot, dirfd, toppath, topname, entry = value
try:
if not follow_symlinks:
# Note: To guard against symlink races, we use the standard
# lstat()/open()/fstat() trick.
if entry is None:
orig_st = stat(topname, follow_symlinks=False, dir_fd=dirfd)
else:
orig_st = entry.stat(follow_symlinks=False)
topfd = open(topname, O_RDONLY | O_NONBLOCK, dir_fd=dirfd)
except OSError as err:
if isroot:
raise
if onerror is not None:
onerror(err)
return
stack.append((_fwalk_close, topfd))
if not follow_symlinks:
if isroot and not st.S_ISDIR(orig_st.st_mode):
return
if not path.samestat(orig_st, stat(topfd)):
return

scandir_it = scandir(topfd)
dirs = []
nondirs = []
Expand All @@ -514,31 +542,18 @@ def _fwalk(topfd, toppath, isbytes, topdown, onerror, follow_symlinks):

if topdown:
yield toppath, dirs, nondirs, topfd
else:
stack.append((_fwalk_yield, (toppath, dirs, nondirs, topfd)))

for name in dirs if entries is None else zip(dirs, entries):
try:
if not follow_symlinks:
if topdown:
orig_st = stat(name, dir_fd=topfd, follow_symlinks=False)
else:
assert entries is not None
name, entry = name
orig_st = entry.stat(follow_symlinks=False)
dirfd = open(name, O_RDONLY | O_NONBLOCK, dir_fd=topfd)
except OSError as err:
if onerror is not None:
onerror(err)
continue
try:
if follow_symlinks or path.samestat(orig_st, stat(dirfd)):
dirpath = path.join(toppath, name)
yield from _fwalk(dirfd, dirpath, isbytes,
topdown, onerror, follow_symlinks)
finally:
close(dirfd)

if not topdown:
yield toppath, dirs, nondirs, topfd
toppath = path.join(toppath, toppath[:0]) # Add trailing slash.
barneygale marked this conversation as resolved.
Show resolved Hide resolved
if entries is None:
stack.extend(
(_fwalk_walk, (False, topfd, toppath + name, name, None))
for name in dirs[::-1])
else:
stack.extend(
(_fwalk_walk, (False, topfd, toppath + name, name, entry))
for name, entry in zip(dirs[::-1], entries[::-1]))

__all__.append("fwalk")

Expand Down
2 changes: 0 additions & 2 deletions Lib/test/test_os.py
Original file line number Diff line number Diff line change
Expand Up @@ -1687,8 +1687,6 @@ def test_fd_leak(self):

# fwalk() keeps file descriptors open
test_walk_many_open_files = None
# fwalk() still uses recursion
test_walk_above_recursion_limit = None


class BytesWalkTests(WalkTests):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fix issue with :func:`os.fwalk` where a :exc:`RecursionError` was raised on
deep directory trees by adjusting the implementation to be iterative instead
of recursive.
Loading