From 24a1a101f3804a66e9e66e95b85db8c04f10ccb7 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Sat, 1 Jun 2024 19:49:12 +0100 Subject: [PATCH] GH-89727: Fix `shutil.rmtree()` recursion error on deep trees (#119808) Implement `shutil._rmtree_safe_fd()` using a list as a stack to avoid emitting recursion errors on deeply nested trees. `shutil._rmtree_unsafe()` was fixed in a150679f90. --- Lib/shutil.py | 162 +++++++----------- Lib/test/test_shutil.py | 1 - ...4-05-30-21-37-05.gh-issue-89727.D6S9ig.rst | 2 + 3 files changed, 68 insertions(+), 97 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-05-30-21-37-05.gh-issue-89727.D6S9ig.rst diff --git a/Lib/shutil.py b/Lib/shutil.py index 03a9d7560304303..b0d49e98cfe5f98 100644 --- a/Lib/shutil.py +++ b/Lib/shutil.py @@ -635,81 +635,76 @@ def onerror(err): onexc(os.rmdir, path, err) # Version using fd-based APIs to protect against races -def _rmtree_safe_fd(topfd, path, onexc): +def _rmtree_safe_fd(stack, onexc): + # Each stack item has four elements: + # * func: The first operation to perform: os.lstat, os.close or os.rmdir. + # Walking a directory starts with an os.lstat() to detect symlinks; in + # this case, func is updated before subsequent operations and passed to + # onexc() if an error occurs. + # * dirfd: Open file descriptor, or None if we're processing the top-level + # directory given to rmtree() and the user didn't supply dir_fd. + # * path: Path of file to operate upon. This is passed to onexc() if an + # error occurs. + # * orig_entry: os.DirEntry, or None if we're processing the top-level + # directory given to rmtree(). We used the cached stat() of the entry to + # save a call to os.lstat() when walking subdirectories. + func, dirfd, path, orig_entry = stack.pop() + name = path if orig_entry is None else orig_entry.name try: + if func is os.close: + os.close(dirfd) + return + if func is os.rmdir: + os.rmdir(name, dir_fd=dirfd) + return + + # Note: To guard against symlink races, we use the standard + # lstat()/open()/fstat() trick. + assert func is os.lstat + if orig_entry is None: + orig_st = os.lstat(name, dir_fd=dirfd) + else: + orig_st = orig_entry.stat(follow_symlinks=False) + + func = os.open # For error reporting. + topfd = os.open(name, os.O_RDONLY | os.O_NONBLOCK, dir_fd=dirfd) + + func = os.path.islink # For error reporting. + try: + if not os.path.samestat(orig_st, os.fstat(topfd)): + # Symlinks to directories are forbidden, see GH-46010. + raise OSError("Cannot call rmtree on a symbolic link") + stack.append((os.rmdir, dirfd, path, orig_entry)) + finally: + stack.append((os.close, topfd, path, orig_entry)) + + func = os.scandir # For error reporting. with os.scandir(topfd) as scandir_it: entries = list(scandir_it) - except FileNotFoundError: - return - except OSError as err: - err.filename = path - onexc(os.scandir, path, err) - return - for entry in entries: - fullname = os.path.join(path, entry.name) - try: - is_dir = entry.is_dir(follow_symlinks=False) - except FileNotFoundError: - continue - except OSError: - is_dir = False - else: - if is_dir: - try: - orig_st = entry.stat(follow_symlinks=False) - is_dir = stat.S_ISDIR(orig_st.st_mode) - except FileNotFoundError: - continue - except OSError as err: - onexc(os.lstat, fullname, err) - continue - if is_dir: + for entry in entries: + fullname = os.path.join(path, entry.name) try: - dirfd = os.open(entry.name, os.O_RDONLY | os.O_NONBLOCK, dir_fd=topfd) - dirfd_closed = False + if entry.is_dir(follow_symlinks=False): + # Traverse into sub-directory. + stack.append((os.lstat, topfd, fullname, entry)) + continue except FileNotFoundError: continue - except OSError as err: - onexc(os.open, fullname, err) - else: - try: - if os.path.samestat(orig_st, os.fstat(dirfd)): - _rmtree_safe_fd(dirfd, fullname, onexc) - try: - os.close(dirfd) - except OSError as err: - # close() should not be retried after an error. - dirfd_closed = True - onexc(os.close, fullname, err) - dirfd_closed = True - try: - os.rmdir(entry.name, dir_fd=topfd) - except FileNotFoundError: - continue - except OSError as err: - onexc(os.rmdir, fullname, err) - else: - try: - # This can only happen if someone replaces - # a directory with a symlink after the call to - # os.scandir or stat.S_ISDIR above. - raise OSError("Cannot call rmtree on a symbolic " - "link") - except OSError as err: - onexc(os.path.islink, fullname, err) - finally: - if not dirfd_closed: - try: - os.close(dirfd) - except OSError as err: - onexc(os.close, fullname, err) - else: + except OSError: + pass try: os.unlink(entry.name, dir_fd=topfd) except FileNotFoundError: continue except OSError as err: onexc(os.unlink, fullname, err) + except FileNotFoundError as err: + if orig_entry is None or func is os.close: + err.filename = path + onexc(func, path, err) + except OSError as err: + err.filename = path + onexc(func, path, err) _use_fd_functions = ({os.open, os.stat, os.unlink, os.rmdir} <= os.supports_dir_fd and @@ -762,41 +757,16 @@ def onexc(*args): # While the unsafe rmtree works fine on bytes, the fd based does not. if isinstance(path, bytes): path = os.fsdecode(path) - # Note: To guard against symlink races, we use the standard - # lstat()/open()/fstat() trick. - try: - orig_st = os.lstat(path, dir_fd=dir_fd) - except OSError as err: - onexc(os.lstat, path, err) - return + stack = [(os.lstat, dir_fd, path, None)] try: - fd = os.open(path, os.O_RDONLY | os.O_NONBLOCK, dir_fd=dir_fd) - fd_closed = False - except OSError as err: - onexc(os.open, path, err) - return - try: - if os.path.samestat(orig_st, os.fstat(fd)): - _rmtree_safe_fd(fd, path, onexc) - try: - os.close(fd) - except OSError as err: - # close() should not be retried after an error. - fd_closed = True - onexc(os.close, path, err) - fd_closed = True - try: - os.rmdir(path, dir_fd=dir_fd) - except OSError as err: - onexc(os.rmdir, path, err) - else: - try: - # symlinks to directories are forbidden, see bug #1669 - raise OSError("Cannot call rmtree on a symbolic link") - except OSError as err: - onexc(os.path.islink, path, err) + while stack: + _rmtree_safe_fd(stack, onexc) finally: - if not fd_closed: + # Close any file descriptors still on the stack. + while stack: + func, fd, path, entry = stack.pop() + if func is not os.close: + continue try: os.close(fd) except OSError as err: diff --git a/Lib/test/test_shutil.py b/Lib/test/test_shutil.py index 01f139073dcd97e..bccb81e0737c570 100644 --- a/Lib/test/test_shutil.py +++ b/Lib/test/test_shutil.py @@ -741,7 +741,6 @@ def _onexc(fn, path, exc): shutil.rmtree(TESTFN) raise - @unittest.skipIf(shutil._use_fd_functions, "fd-based functions remain unfixed (GH-89727)") def test_rmtree_above_recursion_limit(self): recursion_limit = 40 # directory_depth > recursion_limit diff --git a/Misc/NEWS.d/next/Library/2024-05-30-21-37-05.gh-issue-89727.D6S9ig.rst b/Misc/NEWS.d/next/Library/2024-05-30-21-37-05.gh-issue-89727.D6S9ig.rst new file mode 100644 index 000000000000000..854c56609acb8c7 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-05-30-21-37-05.gh-issue-89727.D6S9ig.rst @@ -0,0 +1,2 @@ +Fix issue with :func:`shutil.rmtree` where a :exc:`RecursionError` is raised +on deep directory trees.