Skip to content

Commit

Permalink
gh-101000: Add os.path.splitroot() (#101002)
Browse files Browse the repository at this point in the history
Co-authored-by: Eryk Sun <eryksun@gmail.com>
Co-authored-by: Alex Waygood <Alex.Waygood@Gmail.com>
  • Loading branch information
3 people authored Jan 27, 2023
1 parent 37f15a5 commit e5b08dd
Show file tree
Hide file tree
Showing 9 changed files with 279 additions and 165 deletions.
33 changes: 33 additions & 0 deletions Doc/library/os.path.rst
Original file line number Diff line number Diff line change
Expand Up @@ -488,6 +488,39 @@ the :mod:`glob` module.)
Accepts a :term:`path-like object`.


.. function:: splitroot(path)

Split the pathname *path* into a 3-item tuple ``(drive, root, tail)`` where
*drive* is a device name or mount point, *root* is a string of separators
after the drive, and *tail* is everything after the root. Any of these
items may be the empty string. In all cases, ``drive + root + tail`` will
be the same as *path*.

On POSIX systems, *drive* is always empty. The *root* may be empty (if *path* is
relative), a single forward slash (if *path* is absolute), or two forward slashes
(implementation-defined per `IEEE Std 1003.1-2017; 4.13 Pathname Resolution
<https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13>`_.)
For example::

>>> splitroot('/home/sam')
('', '/', 'home/sam')
>>> splitroot('//home/sam')
('', '//', 'home/sam')
>>> splitroot('///home/sam')
('', '/', '//home/sam')

On Windows, *drive* may be empty, a drive-letter name, a UNC share, or a device
name. The *root* may be empty, a forward slash, or a backward slash. For
example::

>>> splitroot('C:/Users/Sam')
('C:', '/', 'Users/Sam')
>>> splitroot('//Server/Share/Users/Sam')
('//Server/Share', '/', 'Users/Sam')

.. versionadded:: 3.12


.. function:: splitext(path)

Split the pathname *path* into a pair ``(root, ext)`` such that ``root + ext ==
Expand Down
11 changes: 8 additions & 3 deletions Doc/whatsnew/3.12.rst
Original file line number Diff line number Diff line change
Expand Up @@ -288,13 +288,18 @@ os
for a process with :func:`os.pidfd_open` in non-blocking mode.
(Contributed by Kumar Aditya in :gh:`93312`.)

* Add :func:`os.path.isjunction` to check if a given path is a junction.
(Contributed by Charles Machalow in :gh:`99547`.)

* :class:`os.DirEntry` now includes an :meth:`os.DirEntry.is_junction`
method to check if the entry is a junction.
(Contributed by Charles Machalow in :gh:`99547`.)

os.path
-------

* Add :func:`os.path.isjunction` to check if a given path is a junction.
(Contributed by Charles Machalow in :gh:`99547`.)

* Add :func:`os.path.splitroot` to split a path into a triad
``(drive, root, tail)``. (Contributed by Barney Gale in :gh:`101000`.)

shutil
------
Expand Down
126 changes: 72 additions & 54 deletions Lib/ntpath.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from genericpath import *


__all__ = ["normcase","isabs","join","splitdrive","split","splitext",
__all__ = ["normcase","isabs","join","splitdrive","splitroot","split","splitext",
"basename","dirname","commonprefix","getsize","getmtime",
"getatime","getctime", "islink","exists","lexists","isdir","isfile",
"ismount", "expanduser","expandvars","normpath","abspath",
Expand Down Expand Up @@ -117,19 +117,21 @@ def join(path, *paths):
try:
if not paths:
path[:0] + sep #23780: Ensure compatible data type even if p is null.
result_drive, result_path = splitdrive(path)
result_drive, result_root, result_path = splitroot(path)
for p in map(os.fspath, paths):
p_drive, p_path = splitdrive(p)
if p_path and p_path[0] in seps:
p_drive, p_root, p_path = splitroot(p)
if p_root:
# Second path is absolute
if p_drive or not result_drive:
result_drive = p_drive
result_root = p_root
result_path = p_path
continue
elif p_drive and p_drive != result_drive:
if p_drive.lower() != result_drive.lower():
# Different drives => ignore the first path entirely
result_drive = p_drive
result_root = p_root
result_path = p_path
continue
# Same drive in different case
Expand All @@ -139,10 +141,10 @@ def join(path, *paths):
result_path = result_path + sep
result_path = result_path + p_path
## add separator between UNC and non-absolute path
if (result_path and result_path[0] not in seps and
if (result_path and not result_root and
result_drive and result_drive[-1:] != colon):
return result_drive + sep + result_path
return result_drive + result_path
return result_drive + result_root + result_path
except (TypeError, AttributeError, BytesWarning):
genericpath._check_arg_types('join', path, *paths)
raise
Expand All @@ -169,35 +171,61 @@ def splitdrive(p):
Paths cannot contain both a drive letter and a UNC path.
"""
drive, root, tail = splitroot(p)
return drive, root + tail


def splitroot(p):
"""Split a pathname into drive, root and tail. The drive is defined
exactly as in splitdrive(). On Windows, the root may be a single path
separator or an empty string. The tail contains anything after the root.
For example:
splitroot('//server/share/') == ('//server/share', '/', '')
splitroot('C:/Users/Barney') == ('C:', '/', 'Users/Barney')
splitroot('C:///spam///ham') == ('C:', '/', '//spam///ham')
splitroot('Windows/notepad') == ('', '', 'Windows/notepad')
"""
p = os.fspath(p)
if len(p) >= 2:
if isinstance(p, bytes):
sep = b'\\'
altsep = b'/'
colon = b':'
unc_prefix = b'\\\\?\\UNC\\'
else:
sep = '\\'
altsep = '/'
colon = ':'
unc_prefix = '\\\\?\\UNC\\'
normp = p.replace(altsep, sep)
if normp[0:2] == sep * 2:
if isinstance(p, bytes):
sep = b'\\'
altsep = b'/'
colon = b':'
unc_prefix = b'\\\\?\\UNC\\'
empty = b''
else:
sep = '\\'
altsep = '/'
colon = ':'
unc_prefix = '\\\\?\\UNC\\'
empty = ''
normp = p.replace(altsep, sep)
if normp[:1] == sep:
if normp[1:2] == sep:
# UNC drives, e.g. \\server\share or \\?\UNC\server\share
# Device drives, e.g. \\.\device or \\?\device
start = 8 if normp[:8].upper() == unc_prefix else 2
index = normp.find(sep, start)
if index == -1:
return p, p[:0]
return p, empty, empty
index2 = normp.find(sep, index + 1)
if index2 == -1:
return p, p[:0]
return p[:index2], p[index2:]
if normp[1:2] == colon:
# Drive-letter drives, e.g. X:
return p[:2], p[2:]
return p[:0], p
return p, empty, empty
return p[:index2], p[index2:index2 + 1], p[index2 + 1:]
else:
# Relative path with root, e.g. \Windows
return empty, p[:1], p[1:]
elif normp[1:2] == colon:
if normp[2:3] == sep:
# Absolute drive-letter path, e.g. X:\Windows
return p[:2], p[2:3], p[3:]
else:
# Relative path with drive, e.g. X:Windows
return p[:2], empty, p[2:]
else:
# Relative path, e.g. Windows
return empty, empty, p


# Split a path in head (everything up to the last '/') and tail (the
Expand All @@ -212,15 +240,13 @@ def split(p):
Either part may be empty."""
p = os.fspath(p)
seps = _get_bothseps(p)
d, p = splitdrive(p)
d, r, p = splitroot(p)
# set i to index beyond p's last slash
i = len(p)
while i and p[i-1] not in seps:
i -= 1
head, tail = p[:i], p[i:] # now tail has no slashes
# remove trailing slashes from head, unless it's all slashes
head = head.rstrip(seps) or head
return d + head, tail
return d + r + head.rstrip(seps), tail


# Split a path in root and extension.
Expand Down Expand Up @@ -311,10 +337,10 @@ def ismount(path):
path = os.fspath(path)
seps = _get_bothseps(path)
path = abspath(path)
root, rest = splitdrive(path)
if root and root[0] in seps:
return (not rest) or (rest in seps)
if rest and rest in seps:
drive, root, rest = splitroot(path)
if drive and drive[0] in seps:
return not rest
if root and not rest:
return True

if _getvolumepathname:
Expand Down Expand Up @@ -525,13 +551,8 @@ def normpath(path):
curdir = '.'
pardir = '..'
path = path.replace(altsep, sep)
prefix, path = splitdrive(path)

# collapse initial backslashes
if path.startswith(sep):
prefix += sep
path = path.lstrip(sep)

drive, root, path = splitroot(path)
prefix = drive + root
comps = path.split(sep)
i = 0
while i < len(comps):
Expand All @@ -541,7 +562,7 @@ def normpath(path):
if i > 0 and comps[i-1] != pardir:
del comps[i-1:i+1]
i -= 1
elif i == 0 and prefix.endswith(sep):
elif i == 0 and root:
del comps[i]
else:
i += 1
Expand Down Expand Up @@ -765,8 +786,8 @@ def relpath(path, start=None):
try:
start_abs = abspath(normpath(start))
path_abs = abspath(normpath(path))
start_drive, start_rest = splitdrive(start_abs)
path_drive, path_rest = splitdrive(path_abs)
start_drive, _, start_rest = splitroot(start_abs)
path_drive, _, path_rest = splitroot(path_abs)
if normcase(start_drive) != normcase(path_drive):
raise ValueError("path is on mount %r, start on mount %r" % (
path_drive, start_drive))
Expand Down Expand Up @@ -816,21 +837,19 @@ def commonpath(paths):
curdir = '.'

try:
drivesplits = [splitdrive(p.replace(altsep, sep).lower()) for p in paths]
split_paths = [p.split(sep) for d, p in drivesplits]
drivesplits = [splitroot(p.replace(altsep, sep).lower()) for p in paths]
split_paths = [p.split(sep) for d, r, p in drivesplits]

try:
isabs, = set(p[:1] == sep for d, p in drivesplits)
except ValueError:
raise ValueError("Can't mix absolute and relative paths") from None
if len({r for d, r, p in drivesplits}) != 1:
raise ValueError("Can't mix absolute and relative paths")

# Check that all drive letters or UNC paths match. The check is made only
# now otherwise type errors for mixing strings and bytes would not be
# caught.
if len(set(d for d, p in drivesplits)) != 1:
if len({d for d, r, p in drivesplits}) != 1:
raise ValueError("Paths don't have the same drive")

drive, path = splitdrive(paths[0].replace(altsep, sep))
drive, root, path = splitroot(paths[0].replace(altsep, sep))
common = path.split(sep)
common = [c for c in common if c and c != curdir]

Expand All @@ -844,8 +863,7 @@ def commonpath(paths):
else:
common = common[:len(s1)]

prefix = drive + sep if isabs else drive
return prefix + sep.join(common)
return drive + root + sep.join(common)
except (TypeError, AttributeError):
genericpath._check_arg_types('commonpath', *paths)
raise
Expand Down
24 changes: 7 additions & 17 deletions Lib/pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,19 +271,6 @@ def __reduce__(self):
# when pickling related paths.
return (self.__class__, tuple(self._parts))

@classmethod
def _split_root(cls, part):
sep = cls._flavour.sep
rel = cls._flavour.splitdrive(part)[1].lstrip(sep)
anchor = part.removesuffix(rel)
if anchor:
anchor = cls._flavour.normpath(anchor)
drv, root = cls._flavour.splitdrive(anchor)
if drv.startswith(sep):
# UNC paths always have a root.
root = sep
return drv, root, rel

@classmethod
def _parse_parts(cls, parts):
if not parts:
Expand All @@ -293,7 +280,10 @@ def _parse_parts(cls, parts):
path = cls._flavour.join(*parts)
if altsep:
path = path.replace(altsep, sep)
drv, root, rel = cls._split_root(path)
drv, root, rel = cls._flavour.splitroot(path)
if drv.startswith(sep):
# pathlib assumes that UNC paths always have a root.
root = sep
unfiltered_parsed = [drv + root] + rel.split(sep)
parsed = [sys.intern(x) for x in unfiltered_parsed if x and x != '.']
return drv, root, parsed
Expand Down Expand Up @@ -493,9 +483,9 @@ def with_name(self, name):
"""Return a new path with the file name changed."""
if not self.name:
raise ValueError("%r has an empty name" % (self,))
drv, root, parts = self._parse_parts((name,))
if (not name or name[-1] in [self._flavour.sep, self._flavour.altsep]
or drv or root or len(parts) != 1):
f = self._flavour
drv, root, tail = f.splitroot(name)
if drv or root or not tail or f.sep in tail or (f.altsep and f.altsep in tail):
raise ValueError("Invalid name %r" % (name))
return self._from_parsed_parts(self._drv, self._root,
self._parts[:-1] + [name])
Expand Down
Loading

0 comments on commit e5b08dd

Please sign in to comment.