From 21c0ba9069f496912f6cc6fc31f21055b772d1ea Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 12 Jan 2023 20:48:19 +0000 Subject: [PATCH 01/30] gh-101000: Add os.path.splitroot() --- Doc/library/os.path.rst | 20 ++++++ Lib/ntpath.py | 65 +++++++++++++------ Lib/posixpath.py | 26 +++++++- Lib/test/test_ntpath.py | 16 +++++ Lib/test/test_posixpath.py | 20 ++++++ ...-01-12-21-22-20.gh-issue-101000.wz4Xgc.rst | 2 + 6 files changed, 127 insertions(+), 22 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-01-12-21-22-20.gh-issue-101000.wz4Xgc.rst diff --git a/Doc/library/os.path.rst b/Doc/library/os.path.rst index 42bbe24830e6c1..041c0c6095b266 100644 --- a/Doc/library/os.path.rst +++ b/Doc/library/os.path.rst @@ -488,6 +488,26 @@ the :mod:`glob` module.) Accepts a :term:`path-like object`. +.. function:: splitroot(path) + + Split the pathname *path* into a triad ``(drive, root, tail)`` where: + + 1. *drive* is an optional mount point, exactly like :func:`splitdrive`; + 2. *root* is an optional sequence of separators following the drive; and + 3. *tail* is anything after the root. + + On Posix, *drive* is always empty. The *root* may be empty (relative path), + a single forward slash (absolute path), or two forward slashes + (implementation-defined per the POSIX standard). + + On Windows, *drive* may be a UNC sharepoint or a traditional DOS drive. The + *root* may be empty, a forward slash, or a backward slash. + + In all cases, ``drive + root + tail`` will be the same as *path*. + + .. versionadded:: 3.12 + + .. function:: splitext(path) Split the pathname *path* into a pair ``(root, ext)`` such that ``root + ext == diff --git a/Lib/ntpath.py b/Lib/ntpath.py index cd7fb58a88de67..12e632582a2227 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -24,7 +24,7 @@ from genericpath import * -__all__ = ["normcase","isabs","join","splitdrive","split","splitext", +__all__ = ["normcase","isabs","join","splitdrive","splitroot","split","splitext", "basename","dirname","commonprefix","getsize","getmtime", "getatime","getctime", "islink","exists","lexists","isdir","isfile", "ismount", "expanduser","expandvars","normpath","abspath", @@ -169,35 +169,58 @@ def splitdrive(p): Paths cannot contain both a drive letter and a UNC path. + """ + drive, root, tail = splitroot(p) + return drive, root + tail + + +def splitroot(p): + """Split a pathname into drive, root and tail. The drive is defined + exactly as in splitdrive(). On Windows, the root may be a single path + separator or an empty string. The tail contains anything after the root. + For example: + + splitroot('//server/share/') == ('//server/share', '/', '') + splitroot('C:/Users/Barney') == ('C:', '/', 'Users/Barney') + splitroot('Windows') == ('', '', 'Windows') """ p = os.fspath(p) - if len(p) >= 2: - if isinstance(p, bytes): - sep = b'\\' - altsep = b'/' - colon = b':' - unc_prefix = b'\\\\?\\UNC\\' - else: - sep = '\\' - altsep = '/' - colon = ':' - unc_prefix = '\\\\?\\UNC\\' - normp = p.replace(altsep, sep) - if normp[0:2] == sep * 2: + if isinstance(p, bytes): + sep = b'\\' + altsep = b'/' + colon = b':' + unc_prefix = b'\\\\?\\UNC\\' + else: + sep = '\\' + altsep = '/' + colon = ':' + unc_prefix = '\\\\?\\UNC\\' + normp = p.replace(altsep, sep) + if normp[:1] == sep: + if normp[1:2] == sep: # UNC drives, e.g. \\server\share or \\?\UNC\server\share # Device drives, e.g. \\.\device or \\?\device start = 8 if normp[:8].upper() == unc_prefix else 2 index = normp.find(sep, start) if index == -1: - return p, p[:0] + return p, p[:0], p[:0] index2 = normp.find(sep, index + 1) if index2 == -1: - return p, p[:0] - return p[:index2], p[index2:] - if normp[1:2] == colon: - # Drive-letter drives, e.g. X: - return p[:2], p[2:] - return p[:0], p + return p, p[:0], p[:0] + return p[:index2], p[index2:index2 + 1], p[index2 + 1:] + else: + # Relative path with root, e.g. \Windows + return p[:0], p[:1], p[1:] + elif normp[1:2] == colon: + if normp[2:3] == sep: + # Absolute drive-letter path, e.g. X:\Windows + return p[:2], p[2:3], p[3:] + else: + # Relative path with drive, e.g. X:Windows + return p[:2], p[:0], p[2:] + else: + # Relative path, e.g. Windows + return p[:0], p[:0], p # Split a path in head (everything up to the last '/') and tail (the diff --git a/Lib/posixpath.py b/Lib/posixpath.py index 737f8a5c156d81..e6130613ec9245 100644 --- a/Lib/posixpath.py +++ b/Lib/posixpath.py @@ -28,7 +28,7 @@ import genericpath from genericpath import * -__all__ = ["normcase","isabs","join","splitdrive","split","splitext", +__all__ = ["normcase","isabs","join","splitdrive","splitroot","split","splitext", "basename","dirname","commonprefix","getsize","getmtime", "getatime","getctime","islink","exists","lexists","isdir","isfile", "ismount", "expanduser","expandvars","normpath","abspath", @@ -135,6 +135,30 @@ def splitdrive(p): return p[:0], p +def splitroot(p): + """Split a pathname into drive, root and tail. On Posix, drive is always + empty; the root may be empty, a single slash, or two slashes. The tail + contains anything after the root. For example: + + splitdrive('foo/bar') == ('', '', 'foo/bar') + splitdrive('/foo/bar') == ('', '/', 'foo/bar') + """ + p = os.fspath(p) + sep = b'/' if isinstance(p, bytes) else '/' + if p[:1] != sep: + # Relative path, e.g.: 'foo' + return p[:0], p[:0], p + elif p[1:2] != sep: + # Absolute path, e.g.: '/foo' + return p[:0], p[:1], p[1:] + elif p[2:3] != sep: + # Implementation defined per POSIX standard, e.g.: '//foo' + return p[:0], p[:2], p[2:] + else: + # Absolute path with extraneous slashes, e.g.: '///foo', '////foo', etc. + return p[:0], p[:1], p[1:] + + # Return the tail (basename) part of a path, same as split(path)[1]. def basename(p): diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py index f56de0be772105..dbc614728b4ff6 100644 --- a/Lib/test/test_ntpath.py +++ b/Lib/test/test_ntpath.py @@ -150,6 +150,22 @@ def test_splitdrive(self): tester('ntpath.splitdrive("//x")', ("//x", "")) # non-empty server & missing share tester('ntpath.splitdrive("//x/")', ("//x/", "")) # non-empty server & empty share + def test_splitroot(self): + tester("ntpath.splitroot('')", ('', '', '')) + tester("ntpath.splitroot('a')", ('', '', 'a')) + tester("ntpath.splitroot('a\\b')", ('', '', 'a\\b')) + tester("ntpath.splitroot('\\a')", ('', '\\', 'a')) + tester("ntpath.splitroot('\\a\\b')", ('', '\\', 'a\\b')) + tester("ntpath.splitroot('c:a\\b')", ('c:', '', 'a\\b')) + tester("ntpath.splitroot('c:\\a\\b')", ('c:', '\\', 'a\\b')) + # Redundant slashes are not included in the root. + tester("ntpath.splitroot('c:\\\\a')", ('c:', '\\', '\\a')) + tester("ntpath.splitroot('c:\\\\\\a/b')", ('c:', '\\', '\\\\a/b')) + # Valid UNC paths. + tester("ntpath.splitroot('\\\\a\\b')", ('\\\\a\\b', '', '')) + tester("ntpath.splitroot('\\\\a\\b\\')", ('\\\\a\\b', '\\', '')) + tester("ntpath.splitroot('\\\\a\\b\\c\\d')", ('\\\\a\\b', '\\', 'c\\d')) + def test_split(self): tester('ntpath.split("c:\\foo\\bar")', ('c:\\foo', 'bar')) tester('ntpath.split("\\\\conky\\mountpoint\\foo\\bar")', diff --git a/Lib/test/test_posixpath.py b/Lib/test/test_posixpath.py index 6c1c0f5577b7ec..bab37700b6b1e0 100644 --- a/Lib/test/test_posixpath.py +++ b/Lib/test/test_posixpath.py @@ -115,6 +115,26 @@ def test_splitext(self): self.splitextTest("........", "........", "") self.splitextTest("", "", "") + def test_splitroot(self): + f = posixpath.splitroot + self.assertEqual(f(''), ('', '', '')) + self.assertEqual(f('a'), ('', '', 'a')) + self.assertEqual(f('a/b'), ('', '', 'a/b')) + self.assertEqual(f('a/b/'), ('', '', 'a/b/')) + self.assertEqual(f('/a'), ('', '/', 'a')) + self.assertEqual(f('/a/b'), ('', '/', 'a/b')) + self.assertEqual(f('/a/b/'), ('', '/', 'a/b/')) + # The root is collapsed when there are redundant slashes + # except when there are exactly two leading slashes, which + # is a special case in POSIX. + self.assertEqual(f('//a'), ('', '//', 'a')) + self.assertEqual(f('///a'), ('', '/', '//a')) + self.assertEqual(f('///a/b'), ('', '/', '//a/b')) + # Paths which look like NT paths aren't treated specially. + self.assertEqual(f('c:/a/b'), ('', '', 'c:/a/b')) + self.assertEqual(f('\\/a/b'), ('', '', '\\/a/b')) + self.assertEqual(f('\\a\\b'), ('', '', '\\a\\b')) + def test_isabs(self): self.assertIs(posixpath.isabs(""), False) self.assertIs(posixpath.isabs("/"), True) diff --git a/Misc/NEWS.d/next/Library/2023-01-12-21-22-20.gh-issue-101000.wz4Xgc.rst b/Misc/NEWS.d/next/Library/2023-01-12-21-22-20.gh-issue-101000.wz4Xgc.rst new file mode 100644 index 00000000000000..ba3dc034563190 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-01-12-21-22-20.gh-issue-101000.wz4Xgc.rst @@ -0,0 +1,2 @@ +Add :func:`os.path.splitroot()`, which splits a path into a triad of +``(drive, root, tail)``. From 836b85d887b807bebfdb88f8443a6785e9005dec Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 12 Jan 2023 21:52:24 +0000 Subject: [PATCH 02/30] Use splitroot() from pathlib --- Lib/pathlib.py | 18 ++++-------------- Lib/test/test_pathlib.py | 37 ------------------------------------- 2 files changed, 4 insertions(+), 51 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index a0678f61b63211..1fc61343b44d86 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -271,19 +271,6 @@ def __reduce__(self): # when pickling related paths. return (self.__class__, tuple(self._parts)) - @classmethod - def _split_root(cls, part): - sep = cls._flavour.sep - rel = cls._flavour.splitdrive(part)[1].lstrip(sep) - anchor = part.removesuffix(rel) - if anchor: - anchor = cls._flavour.normpath(anchor) - drv, root = cls._flavour.splitdrive(anchor) - if drv.startswith(sep): - # UNC paths always have a root. - root = sep - return drv, root, rel - @classmethod def _parse_parts(cls, parts): if not parts: @@ -293,7 +280,10 @@ def _parse_parts(cls, parts): path = cls._flavour.join(*parts) if altsep: path = path.replace(altsep, sep) - drv, root, rel = cls._split_root(path) + drv, root, rel = cls._flavour.splitroot(path) + if drv.startswith(sep): + # pathlib assumes that UNC paths always have a root. + root = sep unfiltered_parsed = [drv + root] + rel.split(sep) parsed = [sys.intern(x) for x in unfiltered_parsed if x and x != '.'] return drv, root, parsed diff --git a/Lib/test/test_pathlib.py b/Lib/test/test_pathlib.py index 7d4d782cf5f075..50831f65728990 100644 --- a/Lib/test/test_pathlib.py +++ b/Lib/test/test_pathlib.py @@ -80,26 +80,6 @@ def test_parse_parts(self): check(['c:\\a'], ('', '', ['c:\\a'])) check(['\\a'], ('', '', ['\\a'])) - def test_splitroot(self): - f = self.cls._split_root - self.assertEqual(f(''), ('', '', '')) - self.assertEqual(f('a'), ('', '', 'a')) - self.assertEqual(f('a/b'), ('', '', 'a/b')) - self.assertEqual(f('a/b/'), ('', '', 'a/b/')) - self.assertEqual(f('/a'), ('', '/', 'a')) - self.assertEqual(f('/a/b'), ('', '/', 'a/b')) - self.assertEqual(f('/a/b/'), ('', '/', 'a/b/')) - # The root is collapsed when there are redundant slashes - # except when there are exactly two leading slashes, which - # is a special case in POSIX. - self.assertEqual(f('//a'), ('', '//', 'a')) - self.assertEqual(f('///a'), ('', '/', 'a')) - self.assertEqual(f('///a/b'), ('', '/', 'a/b')) - # Paths which look like NT paths aren't treated specially. - self.assertEqual(f('c:/a/b'), ('', '', 'c:/a/b')) - self.assertEqual(f('\\/a/b'), ('', '', '\\/a/b')) - self.assertEqual(f('\\a\\b'), ('', '', '\\a\\b')) - class NTFlavourTest(_BaseFlavourTest, unittest.TestCase): cls = pathlib.PureWindowsPath @@ -143,23 +123,6 @@ def test_parse_parts(self): check(['c:/a/b', 'c:x/y'], ('c:', '\\', ['c:\\', 'a', 'b', 'x', 'y'])) check(['c:/a/b', 'c:/x/y'], ('c:', '\\', ['c:\\', 'x', 'y'])) - def test_splitroot(self): - f = self.cls._split_root - self.assertEqual(f(''), ('', '', '')) - self.assertEqual(f('a'), ('', '', 'a')) - self.assertEqual(f('a\\b'), ('', '', 'a\\b')) - self.assertEqual(f('\\a'), ('', '\\', 'a')) - self.assertEqual(f('\\a\\b'), ('', '\\', 'a\\b')) - self.assertEqual(f('c:a\\b'), ('c:', '', 'a\\b')) - self.assertEqual(f('c:\\a\\b'), ('c:', '\\', 'a\\b')) - # Redundant slashes in the root are collapsed. - self.assertEqual(f('c:\\\\a'), ('c:', '\\', 'a')) - self.assertEqual(f('c:\\\\\\a/b'), ('c:', '\\', 'a/b')) - # Valid UNC paths. - self.assertEqual(f('\\\\a\\b'), ('\\\\a\\b', '\\', '')) - self.assertEqual(f('\\\\a\\b\\'), ('\\\\a\\b', '\\', '')) - self.assertEqual(f('\\\\a\\b\\c\\d'), ('\\\\a\\b', '\\', 'c\\d')) - # # Tests for the pure classes. From bc2d1f976fa07f092db018edcb398776fbc8a17f Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 12 Jan 2023 22:24:08 +0000 Subject: [PATCH 03/30] Use splitroot() from posixpath --- Lib/posixpath.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/Lib/posixpath.py b/Lib/posixpath.py index e6130613ec9245..8f208d80b56678 100644 --- a/Lib/posixpath.py +++ b/Lib/posixpath.py @@ -396,27 +396,18 @@ def normpath(path): dotdot = '..' if path == empty: return dot - initial_slashes = path.startswith(sep) - # POSIX allows one or two initial slashes, but treats three or more - # as single slash. - # (see https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13) - if (initial_slashes and - path.startswith(sep*2) and not path.startswith(sep*3)): - initial_slashes = 2 + _, root, path = splitroot(path) comps = path.split(sep) new_comps = [] for comp in comps: if comp in (empty, dot): continue - if (comp != dotdot or (not initial_slashes and not new_comps) or + if (comp != dotdot or (not root and not new_comps) or (new_comps and new_comps[-1] == dotdot)): new_comps.append(comp) elif new_comps: new_comps.pop() - comps = new_comps - path = sep.join(comps) - if initial_slashes: - path = sep*initial_slashes + path + path = root + sep.join(new_comps) return path or dot else: From ecdc40dc94341feb080bbc5a5e424c930f7faef8 Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 12 Jan 2023 22:29:02 +0000 Subject: [PATCH 04/30] Use splitroot() from ntpath --- Lib/ntpath.py | 58 +++++++++++++++++++++++---------------------------- 1 file changed, 26 insertions(+), 32 deletions(-) diff --git a/Lib/ntpath.py b/Lib/ntpath.py index 12e632582a2227..f1a7c6e8c3d045 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -117,19 +117,21 @@ def join(path, *paths): try: if not paths: path[:0] + sep #23780: Ensure compatible data type even if p is null. - result_drive, result_path = splitdrive(path) + result_drive, result_root, result_path = splitroot(path) for p in map(os.fspath, paths): - p_drive, p_path = splitdrive(p) - if p_path and p_path[0] in seps: + p_drive, p_root, p_path = splitroot(p) + if p_root: # Second path is absolute if p_drive or not result_drive: result_drive = p_drive + result_root = p_root result_path = p_path continue elif p_drive and p_drive != result_drive: if p_drive.lower() != result_drive.lower(): # Different drives => ignore the first path entirely result_drive = p_drive + result_root = p_root result_path = p_path continue # Same drive in different case @@ -139,10 +141,10 @@ def join(path, *paths): result_path = result_path + sep result_path = result_path + p_path ## add separator between UNC and non-absolute path - if (result_path and result_path[0] not in seps and + if (result_path and not result_root and result_drive and result_drive[-1:] != colon): return result_drive + sep + result_path - return result_drive + result_path + return result_drive + result_root + result_path except (TypeError, AttributeError, BytesWarning): genericpath._check_arg_types('join', path, *paths) raise @@ -235,15 +237,13 @@ def split(p): Either part may be empty.""" p = os.fspath(p) seps = _get_bothseps(p) - d, p = splitdrive(p) + d, r, p = splitroot(p) # set i to index beyond p's last slash i = len(p) while i and p[i-1] not in seps: i -= 1 head, tail = p[:i], p[i:] # now tail has no slashes - # remove trailing slashes from head, unless it's all slashes - head = head.rstrip(seps) or head - return d + head, tail + return d + r + head.rstrip(seps), tail # Split a path in root and extension. @@ -334,10 +334,10 @@ def ismount(path): path = os.fspath(path) seps = _get_bothseps(path) path = abspath(path) - root, rest = splitdrive(path) - if root and root[0] in seps: - return (not rest) or (rest in seps) - if rest and rest in seps: + drive, root, rest = splitroot(path) + if drive and drive[0] in seps: + return not rest + if root and not rest: return True if _getvolumepathname: @@ -548,14 +548,9 @@ def normpath(path): curdir = '.' pardir = '..' path = path.replace(altsep, sep) - prefix, path = splitdrive(path) + drive, root, path = splitroot(path) - # collapse initial backslashes - if path.startswith(sep): - prefix += sep - path = path.lstrip(sep) - - comps = path.split(sep) + comps = path.lstrip(sep).split(sep) i = 0 while i < len(comps): if not comps[i] or comps[i] == curdir: @@ -564,16 +559,16 @@ def normpath(path): if i > 0 and comps[i-1] != pardir: del comps[i-1:i+1] i -= 1 - elif i == 0 and prefix.endswith(sep): + elif i == 0 and root: del comps[i] else: i += 1 else: i += 1 # If the path is now empty, substitute '.' - if not prefix and not comps: + if not drive and not root and not comps: comps.append(curdir) - return prefix + sep.join(comps) + return drive + root + sep.join(comps) else: def normpath(path): @@ -788,8 +783,8 @@ def relpath(path, start=None): try: start_abs = abspath(normpath(start)) path_abs = abspath(normpath(path)) - start_drive, start_rest = splitdrive(start_abs) - path_drive, path_rest = splitdrive(path_abs) + start_drive, _, start_rest = splitroot(start_abs) + path_drive, _, path_rest = splitroot(path_abs) if normcase(start_drive) != normcase(path_drive): raise ValueError("path is on mount %r, start on mount %r" % ( path_drive, start_drive)) @@ -839,21 +834,21 @@ def commonpath(paths): curdir = '.' try: - drivesplits = [splitdrive(p.replace(altsep, sep).lower()) for p in paths] - split_paths = [p.split(sep) for d, p in drivesplits] + drivesplits = [splitroot(p.replace(altsep, sep).lower()) for p in paths] + split_paths = [p.split(sep) for d, r, p in drivesplits] try: - isabs, = set(p[:1] == sep for d, p in drivesplits) + isabs, = set(r for d, r, p in drivesplits) except ValueError: raise ValueError("Can't mix absolute and relative paths") from None # Check that all drive letters or UNC paths match. The check is made only # now otherwise type errors for mixing strings and bytes would not be # caught. - if len(set(d for d, p in drivesplits)) != 1: + if len(set(d for d, r, p in drivesplits)) != 1: raise ValueError("Paths don't have the same drive") - drive, path = splitdrive(paths[0].replace(altsep, sep)) + drive, root, path = splitroot(paths[0].replace(altsep, sep)) common = path.split(sep) common = [c for c in common if c and c != curdir] @@ -867,8 +862,7 @@ def commonpath(paths): else: common = common[:len(s1)] - prefix = drive + sep if isabs else drive - return prefix + sep.join(common) + return drive + root + sep.join(common) except (TypeError, AttributeError): genericpath._check_arg_types('commonpath', *paths) raise From 6592b27e1d72516c47713ba508359445fe63e2c2 Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 12 Jan 2023 23:10:31 +0000 Subject: [PATCH 05/30] Optimizations --- Lib/ntpath.py | 12 +++++++----- Lib/posixpath.py | 15 ++++++++++----- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/Lib/ntpath.py b/Lib/ntpath.py index f1a7c6e8c3d045..859896816a7167 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -192,11 +192,13 @@ def splitroot(p): altsep = b'/' colon = b':' unc_prefix = b'\\\\?\\UNC\\' + empty = b'' else: sep = '\\' altsep = '/' colon = ':' unc_prefix = '\\\\?\\UNC\\' + empty = '' normp = p.replace(altsep, sep) if normp[:1] == sep: if normp[1:2] == sep: @@ -205,24 +207,24 @@ def splitroot(p): start = 8 if normp[:8].upper() == unc_prefix else 2 index = normp.find(sep, start) if index == -1: - return p, p[:0], p[:0] + return p, empty, empty index2 = normp.find(sep, index + 1) if index2 == -1: - return p, p[:0], p[:0] + return p, empty, empty return p[:index2], p[index2:index2 + 1], p[index2 + 1:] else: # Relative path with root, e.g. \Windows - return p[:0], p[:1], p[1:] + return empty, p[:1], p[1:] elif normp[1:2] == colon: if normp[2:3] == sep: # Absolute drive-letter path, e.g. X:\Windows return p[:2], p[2:3], p[3:] else: # Relative path with drive, e.g. X:Windows - return p[:2], p[:0], p[2:] + return p[:2], empty, p[2:] else: # Relative path, e.g. Windows - return p[:0], p[:0], p + return empty, empty, p # Split a path in head (everything up to the last '/') and tail (the diff --git a/Lib/posixpath.py b/Lib/posixpath.py index 8f208d80b56678..7b78b964f77b43 100644 --- a/Lib/posixpath.py +++ b/Lib/posixpath.py @@ -144,19 +144,24 @@ def splitroot(p): splitdrive('/foo/bar') == ('', '/', 'foo/bar') """ p = os.fspath(p) - sep = b'/' if isinstance(p, bytes) else '/' + if isinstance(p, bytes): + sep = b'/' + empty = b'' + else: + sep = '/' + empty = '' if p[:1] != sep: # Relative path, e.g.: 'foo' - return p[:0], p[:0], p + return empty, empty, p elif p[1:2] != sep: # Absolute path, e.g.: '/foo' - return p[:0], p[:1], p[1:] + return empty, p[:1], p[1:] elif p[2:3] != sep: # Implementation defined per POSIX standard, e.g.: '//foo' - return p[:0], p[:2], p[2:] + return empty, p[:2], p[2:] else: # Absolute path with extraneous slashes, e.g.: '///foo', '////foo', etc. - return p[:0], p[:1], p[1:] + return empty, p[:1], p[1:] # Return the tail (basename) part of a path, same as split(path)[1]. From 78f42270b78e44c17d6d387bf9c083e2ba3938ea Mon Sep 17 00:00:00 2001 From: barneygale Date: Fri, 13 Jan 2023 18:41:50 +0000 Subject: [PATCH 06/30] Correct and expand examples in splitroot() docstring --- Lib/ntpath.py | 3 ++- Lib/posixpath.py | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/Lib/ntpath.py b/Lib/ntpath.py index 859896816a7167..3bacf597c7190b 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -184,7 +184,8 @@ def splitroot(p): splitroot('//server/share/') == ('//server/share', '/', '') splitroot('C:/Users/Barney') == ('C:', '/', 'Users/Barney') - splitroot('Windows') == ('', '', 'Windows') + splitroot('C:///spam///ham') == ('C:', '/', '//spam///egg') + splitroot('Windows/notepad') == ('', '', 'Windows/notepad') """ p = os.fspath(p) if isinstance(p, bytes): diff --git a/Lib/posixpath.py b/Lib/posixpath.py index 7b78b964f77b43..8e6b42e618ad69 100644 --- a/Lib/posixpath.py +++ b/Lib/posixpath.py @@ -140,8 +140,10 @@ def splitroot(p): empty; the root may be empty, a single slash, or two slashes. The tail contains anything after the root. For example: - splitdrive('foo/bar') == ('', '', 'foo/bar') - splitdrive('/foo/bar') == ('', '/', 'foo/bar') + splitroot('foo/bar') == ('', '', 'foo/bar') + splitroot('/foo/bar') == ('', '/', 'foo/bar') + splitroot('//foo/bar') == ('', '//', 'foo/bar') + splitroot('///foo/bar') == ('', '/', '//foo/bar') """ p = os.fspath(p) if isinstance(p, bytes): From 9726ca4e394947fa322a659593a9f252fe4a9c29 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Fri, 13 Jan 2023 18:50:34 +0000 Subject: [PATCH 07/30] Update Lib/ntpath.py Co-authored-by: Eryk Sun --- Lib/ntpath.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/ntpath.py b/Lib/ntpath.py index 3bacf597c7190b..a3eba4cacb0bf7 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -184,7 +184,7 @@ def splitroot(p): splitroot('//server/share/') == ('//server/share', '/', '') splitroot('C:/Users/Barney') == ('C:', '/', 'Users/Barney') - splitroot('C:///spam///ham') == ('C:', '/', '//spam///egg') + splitroot('C:///spam///ham') == ('C:', '/', '//spam///ham') splitroot('Windows/notepad') == ('', '', 'Windows/notepad') """ p = os.fspath(p) From 7a6613c5dc53391690102e2725cb93791a11007d Mon Sep 17 00:00:00 2001 From: barneygale Date: Sat, 14 Jan 2023 01:28:23 +0000 Subject: [PATCH 08/30] Use splitroot() from pathlib.PurePath.with_name() --- Lib/pathlib.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 1fc61343b44d86..364514cd0cf3ee 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -483,9 +483,9 @@ def with_name(self, name): """Return a new path with the file name changed.""" if not self.name: raise ValueError("%r has an empty name" % (self,)) - drv, root, parts = self._parse_parts((name,)) - if (not name or name[-1] in [self._flavour.sep, self._flavour.altsep] - or drv or root or len(parts) != 1): + f = self._flavour + drv, root, tail = f.splitroot(name) + if drv or root or not tail or f.sep in tail or (f.altsep and f.altsep in tail): raise ValueError("Invalid name %r" % (name)) return self._from_parsed_parts(self._drv, self._root, self._parts[:-1] + [name]) From 26a8dba786c660cb97a112b44d56b42e7414f48d Mon Sep 17 00:00:00 2001 From: barneygale Date: Sun, 15 Jan 2023 18:56:01 +0000 Subject: [PATCH 09/30] Reduce ntpath.normpath() diff noise --- Lib/ntpath.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Lib/ntpath.py b/Lib/ntpath.py index a3eba4cacb0bf7..7232273a2eac57 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -552,8 +552,8 @@ def normpath(path): pardir = '..' path = path.replace(altsep, sep) drive, root, path = splitroot(path) - - comps = path.lstrip(sep).split(sep) + prefix = drive + root + comps = path.split(sep) i = 0 while i < len(comps): if not comps[i] or comps[i] == curdir: @@ -569,9 +569,9 @@ def normpath(path): else: i += 1 # If the path is now empty, substitute '.' - if not drive and not root and not comps: + if not prefix and not comps: comps.append(curdir) - return drive + root + sep.join(comps) + return prefix + sep.join(comps) else: def normpath(path): From 0c237d490b2a77759a9813e3b8433f6987b620a8 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sun, 15 Jan 2023 19:16:32 +0000 Subject: [PATCH 10/30] Simplify ntpath.commonpath() now that 'isabs' is unused. --- Lib/ntpath.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Lib/ntpath.py b/Lib/ntpath.py index 7232273a2eac57..62750ff8aca9bd 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -840,10 +840,8 @@ def commonpath(paths): drivesplits = [splitroot(p.replace(altsep, sep).lower()) for p in paths] split_paths = [p.split(sep) for d, r, p in drivesplits] - try: - isabs, = set(r for d, r, p in drivesplits) - except ValueError: - raise ValueError("Can't mix absolute and relative paths") from None + if len(set(r for d, r, p in drivesplits)) != 1: + raise ValueError("Can't mix absolute and relative paths") # Check that all drive letters or UNC paths match. The check is made only # now otherwise type errors for mixing strings and bytes would not be From 11ed3eb55a93fb21bc89ad6a27c7366dab20f0bf Mon Sep 17 00:00:00 2001 From: barneygale Date: Sun, 15 Jan 2023 19:37:29 +0000 Subject: [PATCH 11/30] Reduce posixpath.normpath() diff noise --- Lib/posixpath.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Lib/posixpath.py b/Lib/posixpath.py index 8e6b42e618ad69..29d3b94d529173 100644 --- a/Lib/posixpath.py +++ b/Lib/posixpath.py @@ -403,18 +403,19 @@ def normpath(path): dotdot = '..' if path == empty: return dot - _, root, path = splitroot(path) + _, initial_slashes, path = splitroot(path) comps = path.split(sep) new_comps = [] for comp in comps: if comp in (empty, dot): continue - if (comp != dotdot or (not root and not new_comps) or + if (comp != dotdot or (not initial_slashes and not new_comps) or (new_comps and new_comps[-1] == dotdot)): new_comps.append(comp) elif new_comps: new_comps.pop() - path = root + sep.join(new_comps) + comps = new_comps + path = initial_slashes + sep.join(comps) return path or dot else: From 2c9eed88823e840d8b39a34b26ae0e63f0a9fe38 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sun, 15 Jan 2023 20:10:11 +0000 Subject: [PATCH 12/30] Improve documentation --- Doc/library/os.path.rst | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/Doc/library/os.path.rst b/Doc/library/os.path.rst index 041c0c6095b266..94c2782aebadb8 100644 --- a/Doc/library/os.path.rst +++ b/Doc/library/os.path.rst @@ -492,16 +492,28 @@ the :mod:`glob` module.) Split the pathname *path* into a triad ``(drive, root, tail)`` where: - 1. *drive* is an optional mount point, exactly like :func:`splitdrive`; - 2. *root* is an optional sequence of separators following the drive; and + 1. *drive* is a mount point or the empty string; + 2. *root* is a sequence of separators following the drive or the empty string; and 3. *tail* is anything after the root. - On Posix, *drive* is always empty. The *root* may be empty (relative path), - a single forward slash (absolute path), or two forward slashes - (implementation-defined per the POSIX standard). + On POSIX systems, *drive* is always empty. The *root* may be empty (if *path* is + relative), a single forward slash (if *path* is absolute), or two forward slashes + (implementation-defined per `IEEE Std 1003.1 2013 Edition; 4.13 Pathname Resolution + `_.) + For example:: - On Windows, *drive* may be a UNC sharepoint or a traditional DOS drive. The - *root* may be empty, a forward slash, or a backward slash. + >>> splitroot('/etc/hosts') + ('', '/', 'etc/hosts') + + On Windows, *drive* may be a UNC sharepoint or a traditional drive-letter drive. The + *root* may be empty, a forward slash, or a backward slash. For example:: + + >>> splitroot('//server/share/') + ('//server/share', '/', '') + >>> splitroot('C:/Users/Barney') + ('C:', '/', 'Users/Barney') + >>> splitroot('Windows/notepad') + ('', '', 'Windows/notepad') In all cases, ``drive + root + tail`` will be the same as *path*. From 8299e969d316a9cd2b391c88cf203f1fa518ddd4 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sun, 15 Jan 2023 20:10:33 +0000 Subject: [PATCH 13/30] Add whatsnew entry. --- Doc/whatsnew/3.12.rst | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index 7d318cac019350..0f5bbed865e78b 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -282,13 +282,18 @@ os for a process with :func:`os.pidfd_open` in non-blocking mode. (Contributed by Kumar Aditya in :gh:`93312`.) -* Add :func:`os.path.isjunction` to check if a given path is a junction. - (Contributed by Charles Machalow in :gh:`99547`.) - * :class:`os.DirEntry` now includes an :meth:`os.DirEntry.is_junction` method to check if the entry is a junction. (Contributed by Charles Machalow in :gh:`99547`.) +os.path +------- + +* Add :func:`os.path.isjunction` to check if a given path is a junction. + (Contributed by Charles Machalow in :gh:`99547`.) + +* Add :func:`os.path.splitroot` to split a path into a triad + ``(drive, root, tail)``. (Contributed by Barney Gale in :gh:`101000`.) shutil ------ From 27ffe37a991ab0a2c8e25b5bfc72630026f2c37c Mon Sep 17 00:00:00 2001 From: barneygale Date: Sun, 15 Jan 2023 22:36:58 +0000 Subject: [PATCH 14/30] Simplify ntpath.splitroot() slightly --- Lib/posixpath.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/Lib/posixpath.py b/Lib/posixpath.py index 29d3b94d529173..32b5d6e105dde9 100644 --- a/Lib/posixpath.py +++ b/Lib/posixpath.py @@ -155,15 +155,13 @@ def splitroot(p): if p[:1] != sep: # Relative path, e.g.: 'foo' return empty, empty, p - elif p[1:2] != sep: - # Absolute path, e.g.: '/foo' - return empty, p[:1], p[1:] - elif p[2:3] != sep: - # Implementation defined per POSIX standard, e.g.: '//foo' - return empty, p[:2], p[2:] + elif p[1:2] != sep or p[2:3] == sep: + # Absolute path, e.g.: '/foo', '///foo', '////foo', etc. + return empty, sep, p[1:] else: - # Absolute path with extraneous slashes, e.g.: '///foo', '////foo', etc. - return empty, p[:1], p[1:] + # Precisely two leading slashes, e.g.: '//foo'. Implementation defined per POSIX, see + # https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13 + return empty, p[:2], p[2:] # Return the tail (basename) part of a path, same as split(path)[1]. From 9beff2af6f5c80945fc745cc64ec9912c87b59a2 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Mon, 16 Jan 2023 17:41:43 +0000 Subject: [PATCH 15/30] Apply suggestions from code review Co-authored-by: Eryk Sun --- Doc/library/os.path.rst | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/Doc/library/os.path.rst b/Doc/library/os.path.rst index 94c2782aebadb8..2b2e72c384c065 100644 --- a/Doc/library/os.path.rst +++ b/Doc/library/os.path.rst @@ -492,7 +492,7 @@ the :mod:`glob` module.) Split the pathname *path* into a triad ``(drive, root, tail)`` where: - 1. *drive* is a mount point or the empty string; + 1. *drive* is a device name, mount point or the empty string; 2. *root* is a sequence of separators following the drive or the empty string; and 3. *tail* is anything after the root. @@ -505,13 +505,16 @@ the :mod:`glob` module.) >>> splitroot('/etc/hosts') ('', '/', 'etc/hosts') - On Windows, *drive* may be a UNC sharepoint or a traditional drive-letter drive. The - *root* may be empty, a forward slash, or a backward slash. For example:: + On Windows, *drive* may be a drive-letter name, a UNC share, or a device + name. The *root* may be empty, a forward slash, or a backward slash. For + example:: - >>> splitroot('//server/share/') - ('//server/share', '/', '') >>> splitroot('C:/Users/Barney') ('C:', '/', 'Users/Barney') + >>> splitroot('//server/share/') + ('//server/share', '/', '') + >>> splitroot('//?/Volume{12345678-1234-1234-1234-123456781234}/') + ('//?/Volume{12345678-1234-1234-1234-123456781234}', '/', '') >>> splitroot('Windows/notepad') ('', '', 'Windows/notepad') From bacdee153c5f28413f21c68ca7919d7311340e7b Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Mon, 16 Jan 2023 18:43:37 +0000 Subject: [PATCH 16/30] Update Doc/library/os.path.rst Co-authored-by: Eryk Sun --- Doc/library/os.path.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/os.path.rst b/Doc/library/os.path.rst index 2b2e72c384c065..d267981936694e 100644 --- a/Doc/library/os.path.rst +++ b/Doc/library/os.path.rst @@ -498,7 +498,7 @@ the :mod:`glob` module.) On POSIX systems, *drive* is always empty. The *root* may be empty (if *path* is relative), a single forward slash (if *path* is absolute), or two forward slashes - (implementation-defined per `IEEE Std 1003.1 2013 Edition; 4.13 Pathname Resolution + (implementation-defined per `IEEE Std 1003.1-2017; 4.13 Pathname Resolution `_.) For example:: From 4ebe54555959283b243ebce697c76ef988a57cfb Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 16 Jan 2023 18:53:45 +0000 Subject: [PATCH 17/30] Note that drive may be empty on Windows --- Doc/library/os.path.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/os.path.rst b/Doc/library/os.path.rst index d267981936694e..121fff690d11a7 100644 --- a/Doc/library/os.path.rst +++ b/Doc/library/os.path.rst @@ -505,7 +505,7 @@ the :mod:`glob` module.) >>> splitroot('/etc/hosts') ('', '/', 'etc/hosts') - On Windows, *drive* may be a drive-letter name, a UNC share, or a device + On Windows, *drive* may be empty, a drive-letter name, a UNC share, or a device name. The *root* may be empty, a forward slash, or a backward slash. For example:: From 2927afe301417f7a53442416b73b34c45035e371 Mon Sep 17 00:00:00 2001 From: barneygale Date: Mon, 16 Jan 2023 18:56:49 +0000 Subject: [PATCH 18/30] Re-order drive example --- Doc/library/os.path.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/os.path.rst b/Doc/library/os.path.rst index 121fff690d11a7..c286cd15b93291 100644 --- a/Doc/library/os.path.rst +++ b/Doc/library/os.path.rst @@ -509,14 +509,14 @@ the :mod:`glob` module.) name. The *root* may be empty, a forward slash, or a backward slash. For example:: + >>> splitroot('Windows/notepad') + ('', '', 'Windows/notepad') >>> splitroot('C:/Users/Barney') ('C:', '/', 'Users/Barney') >>> splitroot('//server/share/') ('//server/share', '/', '') >>> splitroot('//?/Volume{12345678-1234-1234-1234-123456781234}/') ('//?/Volume{12345678-1234-1234-1234-123456781234}', '/', '') - >>> splitroot('Windows/notepad') - ('', '', 'Windows/notepad') In all cases, ``drive + root + tail`` will be the same as *path*. From b0aa73e41c782a5a4f0008d6cd8f8c2972afc499 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Mon, 16 Jan 2023 19:43:53 +0000 Subject: [PATCH 19/30] Update Doc/library/os.path.rst Co-authored-by: Eryk Sun --- Doc/library/os.path.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Doc/library/os.path.rst b/Doc/library/os.path.rst index c286cd15b93291..834a71f79b3282 100644 --- a/Doc/library/os.path.rst +++ b/Doc/library/os.path.rst @@ -515,8 +515,8 @@ the :mod:`glob` module.) ('C:', '/', 'Users/Barney') >>> splitroot('//server/share/') ('//server/share', '/', '') - >>> splitroot('//?/Volume{12345678-1234-1234-1234-123456781234}/') - ('//?/Volume{12345678-1234-1234-1234-123456781234}', '/', '') + >>> splitroot('//?/UNC/server/share/') + ('//?/UNC/server/share', '/', '') In all cases, ``drive + root + tail`` will be the same as *path*. From 19777d65c409ae85d60c01d27b5a293dc6e5c605 Mon Sep 17 00:00:00 2001 From: barneygale Date: Wed, 18 Jan 2023 22:06:13 +0000 Subject: [PATCH 20/30] Adjust docstring examples --- Doc/library/os.path.rst | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/Doc/library/os.path.rst b/Doc/library/os.path.rst index 834a71f79b3282..a7c137a9ea9dc3 100644 --- a/Doc/library/os.path.rst +++ b/Doc/library/os.path.rst @@ -502,21 +502,21 @@ the :mod:`glob` module.) `_.) For example:: - >>> splitroot('/etc/hosts') - ('', '/', 'etc/hosts') + >>> splitroot('/home/eryk') + ('', '/', 'home/eryk') + >>> splitroot('//home/eryk') + ('', '//', 'home/eryk') + >>> splitroot('///home/eryk') + ('', '/', '//home/eryk') On Windows, *drive* may be empty, a drive-letter name, a UNC share, or a device name. The *root* may be empty, a forward slash, or a backward slash. For example:: - >>> splitroot('Windows/notepad') - ('', '', 'Windows/notepad') - >>> splitroot('C:/Users/Barney') - ('C:', '/', 'Users/Barney') - >>> splitroot('//server/share/') - ('//server/share', '/', '') - >>> splitroot('//?/UNC/server/share/') - ('//?/UNC/server/share', '/', '') + >>> splitroot('C:/Users/Eryk') + ('C:', '/', 'Users/Eryk') + >>> splitroot('//Server/Share/Users/Eryk') + ('//Server/Share', '/', 'Users/Eryk') In all cases, ``drive + root + tail`` will be the same as *path*. From 32e212e1cb08a6d2bf3704075b5d78a72b05b134 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Thu, 19 Jan 2023 12:46:02 +0000 Subject: [PATCH 21/30] Apply suggestions from code review Co-authored-by: Alex Waygood --- Lib/ntpath.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/ntpath.py b/Lib/ntpath.py index 62750ff8aca9bd..f9ee8e02a576b7 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -840,13 +840,13 @@ def commonpath(paths): drivesplits = [splitroot(p.replace(altsep, sep).lower()) for p in paths] split_paths = [p.split(sep) for d, r, p in drivesplits] - if len(set(r for d, r, p in drivesplits)) != 1: + if len({r for d, r, p in drivesplits}) != 1: raise ValueError("Can't mix absolute and relative paths") # Check that all drive letters or UNC paths match. The check is made only # now otherwise type errors for mixing strings and bytes would not be # caught. - if len(set(d for d, r, p in drivesplits)) != 1: + if len({d for d, r, p in drivesplits}) != 1: raise ValueError("Paths don't have the same drive") drive, root, path = splitroot(paths[0].replace(altsep, sep)) From 37cded31033f0314ee35e90b65794b19d30d8907 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Thu, 19 Jan 2023 16:57:55 +0000 Subject: [PATCH 22/30] Update Doc/library/os.path.rst --- Doc/library/os.path.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Doc/library/os.path.rst b/Doc/library/os.path.rst index a7c137a9ea9dc3..9ccf7a4d4065f2 100644 --- a/Doc/library/os.path.rst +++ b/Doc/library/os.path.rst @@ -490,7 +490,7 @@ the :mod:`glob` module.) .. function:: splitroot(path) - Split the pathname *path* into a triad ``(drive, root, tail)`` where: + Split the pathname *path* into a 3-item tuple ``(drive, root, tail)`` where: 1. *drive* is a device name, mount point or the empty string; 2. *root* is a sequence of separators following the drive or the empty string; and From 5a8dfce4e15db8cac619637ec2344134547fc540 Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 19 Jan 2023 17:13:13 +0000 Subject: [PATCH 23/30] Change example username in docs to 'Sam' --- Doc/library/os.path.rst | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/Doc/library/os.path.rst b/Doc/library/os.path.rst index 9ccf7a4d4065f2..9f561f1b7b6b8a 100644 --- a/Doc/library/os.path.rst +++ b/Doc/library/os.path.rst @@ -502,21 +502,21 @@ the :mod:`glob` module.) `_.) For example:: - >>> splitroot('/home/eryk') - ('', '/', 'home/eryk') - >>> splitroot('//home/eryk') - ('', '//', 'home/eryk') - >>> splitroot('///home/eryk') - ('', '/', '//home/eryk') + >>> splitroot('/home/sam') + ('', '/', 'home/sam') + >>> splitroot('//home/sam') + ('', '//', 'home/sam') + >>> splitroot('///home/sam') + ('', '/', '//home/sam') On Windows, *drive* may be empty, a drive-letter name, a UNC share, or a device name. The *root* may be empty, a forward slash, or a backward slash. For example:: - >>> splitroot('C:/Users/Eryk') - ('C:', '/', 'Users/Eryk') - >>> splitroot('//Server/Share/Users/Eryk') - ('//Server/Share', '/', 'Users/Eryk') + >>> splitroot('C:/Users/Sam') + ('C:', '/', 'Users/Sam') + >>> splitroot('//Server/Share/Users/Sam') + ('//Server/Share', '/', 'Users/Sam') In all cases, ``drive + root + tail`` will be the same as *path*. From 0e75a55b3f696e4481a51c7e414503aecf85caba Mon Sep 17 00:00:00 2001 From: barneygale Date: Thu, 19 Jan 2023 17:20:26 +0000 Subject: [PATCH 24/30] Adjust first paragraph to use prose ... and not belabour the fact that the empty string may be returned as any/all items. --- Doc/library/os.path.rst | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/Doc/library/os.path.rst b/Doc/library/os.path.rst index 9f561f1b7b6b8a..5fd752625f2011 100644 --- a/Doc/library/os.path.rst +++ b/Doc/library/os.path.rst @@ -490,11 +490,11 @@ the :mod:`glob` module.) .. function:: splitroot(path) - Split the pathname *path* into a 3-item tuple ``(drive, root, tail)`` where: - - 1. *drive* is a device name, mount point or the empty string; - 2. *root* is a sequence of separators following the drive or the empty string; and - 3. *tail* is anything after the root. + Split the pathname *path* into a 3-item tuple ``(drive, root, tail)`` where + *drive* is a device name or mount point, *root* is a string of separators + after the drive, and *tail* is everything after the root. Any of these + items may be the empty string. In all cases, ``drive + root + tail`` will + be the same as *path*. On POSIX systems, *drive* is always empty. The *root* may be empty (if *path* is relative), a single forward slash (if *path* is absolute), or two forward slashes @@ -518,7 +518,6 @@ the :mod:`glob` module.) >>> splitroot('//Server/Share/Users/Sam') ('//Server/Share', '/', 'Users/Sam') - In all cases, ``drive + root + tail`` will be the same as *path*. .. versionadded:: 3.12 From 36632370006d68e1bf5c67232f7b615e86b0b2aa Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Sun, 22 Jan 2023 22:24:37 +0000 Subject: [PATCH 25/30] Update Doc/library/os.path.rst Co-authored-by: Alex Waygood --- Doc/library/os.path.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/Doc/library/os.path.rst b/Doc/library/os.path.rst index 5fd752625f2011..786c2fd7f64fcc 100644 --- a/Doc/library/os.path.rst +++ b/Doc/library/os.path.rst @@ -518,7 +518,6 @@ the :mod:`glob` module.) >>> splitroot('//Server/Share/Users/Sam') ('//Server/Share', '/', 'Users/Sam') - .. versionadded:: 3.12 From 053729debe973546048b65e360203937ab752e66 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sun, 22 Jan 2023 22:46:06 +0000 Subject: [PATCH 26/30] Add tests for bytes (POSIX only) and path-like objects (both platforms) --- Lib/test/test_ntpath.py | 3 +++ Lib/test/test_posixpath.py | 9 +++++++++ 2 files changed, 12 insertions(+) diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py index dbc614728b4ff6..30e3428ad096bc 100644 --- a/Lib/test/test_ntpath.py +++ b/Lib/test/test_ntpath.py @@ -946,6 +946,9 @@ def test_path_splitext(self): def test_path_splitdrive(self): self._check_function(self.path.splitdrive) + def test_path_splitroot(self): + self._check_function(self.path.splitroot) + def test_path_basename(self): self._check_function(self.path.basename) diff --git a/Lib/test/test_posixpath.py b/Lib/test/test_posixpath.py index bab37700b6b1e0..9be4640f970aef 100644 --- a/Lib/test/test_posixpath.py +++ b/Lib/test/test_posixpath.py @@ -134,6 +134,12 @@ def test_splitroot(self): self.assertEqual(f('c:/a/b'), ('', '', 'c:/a/b')) self.assertEqual(f('\\/a/b'), ('', '', '\\/a/b')) self.assertEqual(f('\\a\\b'), ('', '', '\\a\\b')) + # Byte paths are supported + self.assertEqual(f(b''), (b'', b'', b'')) + self.assertEqual(f(b'a'), (b'', b'', b'a')) + self.assertEqual(f(b'/a'), (b'', b'/', b'a')) + self.assertEqual(f(b'//a'), (b'', b'//', b'a')) + self.assertEqual(f(b'///a'), (b'', b'/', b'//a')) def test_isabs(self): self.assertIs(posixpath.isabs(""), False) @@ -772,6 +778,9 @@ def test_path_splitext(self): def test_path_splitdrive(self): self.assertPathEqual(self.path.splitdrive) + def test_path_splitroot(self): + self.assertPathEqual(self.path.splitroot) + def test_path_basename(self): self.assertPathEqual(self.path.basename) From 694f093fb62074ff7e34d54182fa06edd3c606ca Mon Sep 17 00:00:00 2001 From: barneygale Date: Sun, 22 Jan 2023 22:53:02 +0000 Subject: [PATCH 27/30] Add tests for mixed path separators (Windows only) --- Lib/test/test_ntpath.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py index 30e3428ad096bc..86700a2197ef4a 100644 --- a/Lib/test/test_ntpath.py +++ b/Lib/test/test_ntpath.py @@ -165,6 +165,12 @@ def test_splitroot(self): tester("ntpath.splitroot('\\\\a\\b')", ('\\\\a\\b', '', '')) tester("ntpath.splitroot('\\\\a\\b\\')", ('\\\\a\\b', '\\', '')) tester("ntpath.splitroot('\\\\a\\b\\c\\d')", ('\\\\a\\b', '\\', 'c\\d')) + # Mixed path separators + tester("ntpath.splitroot('c:/\\')", ('c:', '/', '\\')) + tester("ntpath.splitroot('c:\\/')", ('c:', '\\', '/')) + tester("ntpath.splitroot('/\\a/b\\/\\')", ('/\\a/b', '\\', '/\\')) + tester("ntpath.splitroot('\\/a\\b/\\/')", ('\\/a\\b', '/', '\\/')) + def test_split(self): tester('ntpath.split("c:\\foo\\bar")', ('c:\\foo', 'bar')) From e99e3cd2262918ee4148258f3f49360ee85850f2 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sun, 22 Jan 2023 22:53:47 +0000 Subject: [PATCH 28/30] Remove errant newline. --- Lib/test/test_ntpath.py | 1 - 1 file changed, 1 deletion(-) diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py index 86700a2197ef4a..4975cebb4d0e4e 100644 --- a/Lib/test/test_ntpath.py +++ b/Lib/test/test_ntpath.py @@ -171,7 +171,6 @@ def test_splitroot(self): tester("ntpath.splitroot('/\\a/b\\/\\')", ('/\\a/b', '\\', '/\\')) tester("ntpath.splitroot('\\/a\\b/\\/')", ('\\/a\\b', '/', '\\/')) - def test_split(self): tester('ntpath.split("c:\\foo\\bar")', ('c:\\foo', 'bar')) tester('ntpath.split("\\\\conky\\mountpoint\\foo\\bar")', From f618a00119ba725052c2264cf8ae2ab7a4b9ed56 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sun, 22 Jan 2023 23:43:21 +0000 Subject: [PATCH 29/30] Move most test cases from `test_splitdrive` to `test_splitroot` --- Lib/test/test_ntpath.py | 140 ++++++++++++++++++++++++---------------- 1 file changed, 84 insertions(+), 56 deletions(-) diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py index 4975cebb4d0e4e..bce38a534a6a98 100644 --- a/Lib/test/test_ntpath.py +++ b/Lib/test/test_ntpath.py @@ -98,79 +98,107 @@ def test_splitext(self): tester('ntpath.splitext("c:a/b\\c.d")', ('c:a/b\\c', '.d')) def test_splitdrive(self): - tester('ntpath.splitdrive("c:\\foo\\bar")', - ('c:', '\\foo\\bar')) - tester('ntpath.splitdrive("c:/foo/bar")', - ('c:', '/foo/bar')) + tester("ntpath.splitdrive('')", ('', '')) + tester("ntpath.splitdrive('foo')", ('', 'foo')) + tester("ntpath.splitdrive('foo\\bar')", ('', 'foo\\bar')) + tester("ntpath.splitdrive('foo/bar')", ('', 'foo/bar')) + tester("ntpath.splitdrive('\\')", ('', '\\')) + tester("ntpath.splitdrive('/')", ('', '/')) + tester("ntpath.splitdrive('\\foo\\bar')", ('', '\\foo\\bar')) + tester("ntpath.splitdrive('/foo/bar')", ('', '/foo/bar')) + tester('ntpath.splitdrive("c:foo\\bar")', ('c:', 'foo\\bar')) + tester('ntpath.splitdrive("c:foo/bar")', ('c:', 'foo/bar')) + tester('ntpath.splitdrive("c:\\foo\\bar")', ('c:', '\\foo\\bar')) + tester('ntpath.splitdrive("c:/foo/bar")', ('c:', '/foo/bar')) + tester("ntpath.splitdrive('\\\\')", ('\\\\', '')) + tester("ntpath.splitdrive('//')", ('//', '')) tester('ntpath.splitdrive("\\\\conky\\mountpoint\\foo\\bar")', ('\\\\conky\\mountpoint', '\\foo\\bar')) tester('ntpath.splitdrive("//conky/mountpoint/foo/bar")', ('//conky/mountpoint', '/foo/bar')) - tester('ntpath.splitdrive("\\\\\\conky\\mountpoint\\foo\\bar")', - ('\\\\\\conky', '\\mountpoint\\foo\\bar')) - tester('ntpath.splitdrive("///conky/mountpoint/foo/bar")', - ('///conky', '/mountpoint/foo/bar')) - tester('ntpath.splitdrive("\\\\conky\\\\mountpoint\\foo\\bar")', - ('\\\\conky\\', '\\mountpoint\\foo\\bar')) - tester('ntpath.splitdrive("//conky//mountpoint/foo/bar")', - ('//conky/', '/mountpoint/foo/bar')) - # Issue #19911: UNC part containing U+0130 - self.assertEqual(ntpath.splitdrive('//conky/MOUNTPOİNT/foo/bar'), - ('//conky/MOUNTPOİNT', '/foo/bar')) - # gh-81790: support device namespace, including UNC drives. - tester('ntpath.splitdrive("//?/c:")', ("//?/c:", "")) - tester('ntpath.splitdrive("//?/c:/")', ("//?/c:", "/")) - tester('ntpath.splitdrive("//?/c:/dir")', ("//?/c:", "/dir")) - tester('ntpath.splitdrive("//?/UNC")', ("//?/UNC", "")) - tester('ntpath.splitdrive("//?/UNC/")', ("//?/UNC/", "")) - tester('ntpath.splitdrive("//?/UNC/server/")', ("//?/UNC/server/", "")) - tester('ntpath.splitdrive("//?/UNC/server/share")', ("//?/UNC/server/share", "")) - tester('ntpath.splitdrive("//?/UNC/server/share/dir")', ("//?/UNC/server/share", "/dir")) - tester('ntpath.splitdrive("//?/VOLUME{00000000-0000-0000-0000-000000000000}/spam")', - ('//?/VOLUME{00000000-0000-0000-0000-000000000000}', '/spam')) - tester('ntpath.splitdrive("//?/BootPartition/")', ("//?/BootPartition", "/")) - - tester('ntpath.splitdrive("\\\\?\\c:")', ("\\\\?\\c:", "")) - tester('ntpath.splitdrive("\\\\?\\c:\\")', ("\\\\?\\c:", "\\")) - tester('ntpath.splitdrive("\\\\?\\c:\\dir")', ("\\\\?\\c:", "\\dir")) - tester('ntpath.splitdrive("\\\\?\\UNC")', ("\\\\?\\UNC", "")) - tester('ntpath.splitdrive("\\\\?\\UNC\\")', ("\\\\?\\UNC\\", "")) - tester('ntpath.splitdrive("\\\\?\\UNC\\server\\")', ("\\\\?\\UNC\\server\\", "")) - tester('ntpath.splitdrive("\\\\?\\UNC\\server\\share")', ("\\\\?\\UNC\\server\\share", "")) tester('ntpath.splitdrive("\\\\?\\UNC\\server\\share\\dir")', ("\\\\?\\UNC\\server\\share", "\\dir")) - tester('ntpath.splitdrive("\\\\?\\VOLUME{00000000-0000-0000-0000-000000000000}\\spam")', - ('\\\\?\\VOLUME{00000000-0000-0000-0000-000000000000}', '\\spam')) - tester('ntpath.splitdrive("\\\\?\\BootPartition\\")', ("\\\\?\\BootPartition", "\\")) - - # gh-96290: support partial/invalid UNC drives - tester('ntpath.splitdrive("//")', ("//", "")) # empty server & missing share - tester('ntpath.splitdrive("///")', ("///", "")) # empty server & empty share - tester('ntpath.splitdrive("///y")', ("///y", "")) # empty server & non-empty share - tester('ntpath.splitdrive("//x")', ("//x", "")) # non-empty server & missing share - tester('ntpath.splitdrive("//x/")', ("//x/", "")) # non-empty server & empty share + tester('ntpath.splitdrive("//?/UNC/server/share/dir")', + ("//?/UNC/server/share", "/dir")) def test_splitroot(self): tester("ntpath.splitroot('')", ('', '', '')) - tester("ntpath.splitroot('a')", ('', '', 'a')) - tester("ntpath.splitroot('a\\b')", ('', '', 'a\\b')) - tester("ntpath.splitroot('\\a')", ('', '\\', 'a')) - tester("ntpath.splitroot('\\a\\b')", ('', '\\', 'a\\b')) - tester("ntpath.splitroot('c:a\\b')", ('c:', '', 'a\\b')) - tester("ntpath.splitroot('c:\\a\\b')", ('c:', '\\', 'a\\b')) + tester("ntpath.splitroot('foo')", ('', '', 'foo')) + tester("ntpath.splitroot('foo\\bar')", ('', '', 'foo\\bar')) + tester("ntpath.splitroot('foo/bar')", ('', '', 'foo/bar')) + tester("ntpath.splitroot('\\')", ('', '\\', '')) + tester("ntpath.splitroot('/')", ('', '/', '')) + tester("ntpath.splitroot('\\foo\\bar')", ('', '\\', 'foo\\bar')) + tester("ntpath.splitroot('/foo/bar')", ('', '/', 'foo/bar')) + tester('ntpath.splitroot("c:foo\\bar")', ('c:', '', 'foo\\bar')) + tester('ntpath.splitroot("c:foo/bar")', ('c:', '', 'foo/bar')) + tester('ntpath.splitroot("c:\\foo\\bar")', ('c:', '\\', 'foo\\bar')) + tester('ntpath.splitroot("c:/foo/bar")', ('c:', '/', 'foo/bar')) + # Redundant slashes are not included in the root. tester("ntpath.splitroot('c:\\\\a')", ('c:', '\\', '\\a')) tester("ntpath.splitroot('c:\\\\\\a/b')", ('c:', '\\', '\\\\a/b')) - # Valid UNC paths. - tester("ntpath.splitroot('\\\\a\\b')", ('\\\\a\\b', '', '')) - tester("ntpath.splitroot('\\\\a\\b\\')", ('\\\\a\\b', '\\', '')) - tester("ntpath.splitroot('\\\\a\\b\\c\\d')", ('\\\\a\\b', '\\', 'c\\d')) - # Mixed path separators + + # Mixed path separators. tester("ntpath.splitroot('c:/\\')", ('c:', '/', '\\')) tester("ntpath.splitroot('c:\\/')", ('c:', '\\', '/')) tester("ntpath.splitroot('/\\a/b\\/\\')", ('/\\a/b', '\\', '/\\')) tester("ntpath.splitroot('\\/a\\b/\\/')", ('\\/a\\b', '/', '\\/')) + # UNC paths. + tester("ntpath.splitroot('\\\\')", ('\\\\', '', '')) + tester("ntpath.splitroot('//')", ('//', '', '')) + tester('ntpath.splitroot("\\\\conky\\mountpoint\\foo\\bar")', + ('\\\\conky\\mountpoint', '\\', 'foo\\bar')) + tester('ntpath.splitroot("//conky/mountpoint/foo/bar")', + ('//conky/mountpoint', '/', 'foo/bar')) + tester('ntpath.splitroot("\\\\\\conky\\mountpoint\\foo\\bar")', + ('\\\\\\conky', '\\', 'mountpoint\\foo\\bar')) + tester('ntpath.splitroot("///conky/mountpoint/foo/bar")', + ('///conky', '/', 'mountpoint/foo/bar')) + tester('ntpath.splitroot("\\\\conky\\\\mountpoint\\foo\\bar")', + ('\\\\conky\\', '\\', 'mountpoint\\foo\\bar')) + tester('ntpath.splitroot("//conky//mountpoint/foo/bar")', + ('//conky/', '/', 'mountpoint/foo/bar')) + + # Issue #19911: UNC part containing U+0130 + self.assertEqual(ntpath.splitroot('//conky/MOUNTPOİNT/foo/bar'), + ('//conky/MOUNTPOİNT', '/', 'foo/bar')) + + # gh-81790: support device namespace, including UNC drives. + tester('ntpath.splitroot("//?/c:")', ("//?/c:", "", "")) + tester('ntpath.splitroot("//?/c:/")', ("//?/c:", "/", "")) + tester('ntpath.splitroot("//?/c:/dir")', ("//?/c:", "/", "dir")) + tester('ntpath.splitroot("//?/UNC")', ("//?/UNC", "", "")) + tester('ntpath.splitroot("//?/UNC/")', ("//?/UNC/", "", "")) + tester('ntpath.splitroot("//?/UNC/server/")', ("//?/UNC/server/", "", "")) + tester('ntpath.splitroot("//?/UNC/server/share")', ("//?/UNC/server/share", "", "")) + tester('ntpath.splitroot("//?/UNC/server/share/dir")', ("//?/UNC/server/share", "/", "dir")) + tester('ntpath.splitroot("//?/VOLUME{00000000-0000-0000-0000-000000000000}/spam")', + ('//?/VOLUME{00000000-0000-0000-0000-000000000000}', '/', 'spam')) + tester('ntpath.splitroot("//?/BootPartition/")', ("//?/BootPartition", "/", "")) + + tester('ntpath.splitroot("\\\\?\\c:")', ("\\\\?\\c:", "", "")) + tester('ntpath.splitroot("\\\\?\\c:\\")', ("\\\\?\\c:", "\\", "")) + tester('ntpath.splitroot("\\\\?\\c:\\dir")', ("\\\\?\\c:", "\\", "dir")) + tester('ntpath.splitroot("\\\\?\\UNC")', ("\\\\?\\UNC", "", "")) + tester('ntpath.splitroot("\\\\?\\UNC\\")', ("\\\\?\\UNC\\", "", "")) + tester('ntpath.splitroot("\\\\?\\UNC\\server\\")', ("\\\\?\\UNC\\server\\", "", "")) + tester('ntpath.splitroot("\\\\?\\UNC\\server\\share")', + ("\\\\?\\UNC\\server\\share", "", "")) + tester('ntpath.splitroot("\\\\?\\UNC\\server\\share\\dir")', + ("\\\\?\\UNC\\server\\share", "\\", "dir")) + tester('ntpath.splitroot("\\\\?\\VOLUME{00000000-0000-0000-0000-000000000000}\\spam")', + ('\\\\?\\VOLUME{00000000-0000-0000-0000-000000000000}', '\\', 'spam')) + tester('ntpath.splitroot("\\\\?\\BootPartition\\")', ("\\\\?\\BootPartition", "\\", "")) + + # gh-96290: support partial/invalid UNC drives + tester('ntpath.splitroot("//")', ("//", "", "")) # empty server & missing share + tester('ntpath.splitroot("///")', ("///", "", "")) # empty server & empty share + tester('ntpath.splitroot("///y")', ("///y", "", "")) # empty server & non-empty share + tester('ntpath.splitroot("//x")', ("//x", "", "")) # non-empty server & missing share + tester('ntpath.splitroot("//x/")', ("//x/", "", "")) # non-empty server & empty share + def test_split(self): tester('ntpath.split("c:\\foo\\bar")', ('c:\\foo', 'bar')) tester('ntpath.split("\\\\conky\\mountpoint\\foo\\bar")', From 1c522c9077ffc4c1e4c188c8c63b6cd3b7c745d5 Mon Sep 17 00:00:00 2001 From: barneygale Date: Sun, 22 Jan 2023 23:46:56 +0000 Subject: [PATCH 30/30] Mention pathlib performance improvement in news entry. --- .../Library/2023-01-12-21-22-20.gh-issue-101000.wz4Xgc.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2023-01-12-21-22-20.gh-issue-101000.wz4Xgc.rst b/Misc/NEWS.d/next/Library/2023-01-12-21-22-20.gh-issue-101000.wz4Xgc.rst index ba3dc034563190..2082361c41d697 100644 --- a/Misc/NEWS.d/next/Library/2023-01-12-21-22-20.gh-issue-101000.wz4Xgc.rst +++ b/Misc/NEWS.d/next/Library/2023-01-12-21-22-20.gh-issue-101000.wz4Xgc.rst @@ -1,2 +1,3 @@ -Add :func:`os.path.splitroot()`, which splits a path into a triad of -``(drive, root, tail)``. +Add :func:`os.path.splitroot()`, which splits a path into a 3-item tuple +``(drive, root, tail)``. This new function is used by :mod:`pathlib` to +improve the performance of path construction by up to a third.