Skip to content

Commit

Permalink
Add pathlib._abc.PathModuleBase (python#113893)
Browse files Browse the repository at this point in the history
Path modules provide a subset of the `os.path` API, specifically those
functions needed to provide `PurePathBase` functionality. Each
`PurePathBase` subclass references its path module via a `pathmod` class
attribute.

This commit adds a new `PathModuleBase` class, which provides abstract
methods that unconditionally raise `UnsupportedOperation`. An instance of
this class is assigned to `PurePathBase.pathmod`, replacing `posixpath`.
As a result, `PurePathBase` is no longer POSIX-y by default, and
all its methods raise `UnsupportedOperation` courtesy of `pathmod`.

Users who subclass `PurePathBase` or `PathBase` should choose the path
syntax by setting `pathmod` to `posixpath`, `ntpath`, `os.path`, or their
own subclass of `PathModuleBase`, as circumstances demand.
  • Loading branch information
barneygale authored and Glyphack committed Jan 27, 2024
1 parent f497185 commit 3d5c6c3
Show file tree
Hide file tree
Showing 4 changed files with 182 additions and 59 deletions.
60 changes: 60 additions & 0 deletions Lib/pathlib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,15 @@
]


# Reference for Windows paths can be found at
# https://learn.microsoft.com/en-gb/windows/win32/fileio/naming-a-file .
_WIN_RESERVED_NAMES = frozenset(
{'CON', 'PRN', 'AUX', 'NUL', 'CONIN$', 'CONOUT$'} |
{f'COM{c}' for c in '123456789\xb9\xb2\xb3'} |
{f'LPT{c}' for c in '123456789\xb9\xb2\xb3'}
)


class _PathParents(Sequence):
"""This object provides sequence-like access to the logical ancestors
of a path. Don't try to construct it yourself."""
Expand Down Expand Up @@ -76,6 +85,10 @@ class PurePath(_abc.PurePathBase):
"""

__slots__ = (
# The `_raw_paths` slot stores unnormalized string paths. This is set
# in the `__init__()` method.
'_raw_paths',

# The `_drv`, `_root` and `_tail_cached` slots store parsed and
# normalized parts of the path. They are set when any of the `drive`,
# `root` or `_tail` properties are accessed for the first time. The
Expand Down Expand Up @@ -141,6 +154,26 @@ def __init__(self, *args):
# Avoid calling super().__init__, as an optimisation
self._raw_paths = paths

def joinpath(self, *pathsegments):
"""Combine this path with one or several arguments, and return a
new path representing either a subpath (if all arguments are relative
paths) or a totally different path (if one of the arguments is
anchored).
"""
return self.with_segments(self, *pathsegments)

def __truediv__(self, key):
try:
return self.with_segments(self, key)
except TypeError:
return NotImplemented

def __rtruediv__(self, key):
try:
return self.with_segments(key, self)
except TypeError:
return NotImplemented

def __reduce__(self):
# Using the parts tuple helps share interned path parts
# when pickling related paths.
Expand Down Expand Up @@ -386,6 +419,33 @@ def is_relative_to(self, other, /, *_deprecated):
other = self.with_segments(other)
return other == self or other in self.parents

def is_absolute(self):
"""True if the path is absolute (has both a root and, if applicable,
a drive)."""
if self.pathmod is posixpath:
# Optimization: work with raw paths on POSIX.
for path in self._raw_paths:
if path.startswith('/'):
return True
return False
return self.pathmod.isabs(self)

def is_reserved(self):
"""Return True if the path contains one of the special names reserved
by the system, if any."""
if self.pathmod is not ntpath or not self.name:
return False

# NOTE: the rules for reserved names seem somewhat complicated
# (e.g. r"..\NUL" is reserved but not r"foo\NUL" if "foo" does not
# exist). We err on the side of caution and return True for paths
# which are not considered reserved by Windows.
if self.drive.startswith('\\\\'):
# UNC paths are never reserved.
return False
name = self.name.partition('.')[0].partition(':')[0].rstrip(' ')
return name.upper() in _WIN_RESERVED_NAMES

def as_uri(self):
"""Return the path as a URI."""
if not self.is_absolute():
Expand Down
124 changes: 67 additions & 57 deletions Lib/pathlib/_abc.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,13 @@
"""

import functools
import posixpath
from errno import ENOENT, ENOTDIR, EBADF, ELOOP, EINVAL
from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO

#
# Internals
#

# Reference for Windows paths can be found at
# https://learn.microsoft.com/en-gb/windows/win32/fileio/naming-a-file .
_WIN_RESERVED_NAMES = frozenset(
{'CON', 'PRN', 'AUX', 'NUL', 'CONIN$', 'CONOUT$'} |
{f'COM{c}' for c in '123456789\xb9\xb2\xb3'} |
{f'LPT{c}' for c in '123456789\xb9\xb2\xb3'}
)

_WINERROR_NOT_READY = 21 # drive exists but is not accessible
_WINERROR_INVALID_NAME = 123 # fix for bpo-35306
_WINERROR_CANT_RESOLVE_FILENAME = 1921 # broken symlink pointing to itself
Expand Down Expand Up @@ -144,6 +135,53 @@ class UnsupportedOperation(NotImplementedError):
pass


class PathModuleBase:
"""Base class for path modules, which do low-level path manipulation.
Path modules provide a subset of the os.path API, specifically those
functions needed to provide PurePathBase functionality. Each PurePathBase
subclass references its path module via a 'pathmod' class attribute.
Every method in this base class raises an UnsupportedOperation exception.
"""

@classmethod
def _unsupported(cls, attr):
raise UnsupportedOperation(f"{cls.__name__}.{attr} is unsupported")

@property
def sep(self):
"""The character used to separate path components."""
self._unsupported('sep')

def join(self, path, *paths):
"""Join path segments."""
self._unsupported('join()')

def split(self, path):
"""Split the path into a pair (head, tail), where *head* is everything
before the final path separator, and *tail* is everything after.
Either part may be empty.
"""
self._unsupported('split()')

def splitroot(self, path):
"""Split the pathname path into a 3-item tuple (drive, root, tail),
where *drive* is a device name or mount point, *root* is a string of
separators after the drive, and *tail* is everything after the root.
Any part may be empty."""
self._unsupported('splitroot()')

def normcase(self, path):
"""Normalize the case of the path."""
self._unsupported('normcase()')

def isabs(self, path):
"""Returns whether the path is absolute, i.e. unaffected by the
current directory or drive."""
self._unsupported('isabs()')


class PurePathBase:
"""Base class for pure path objects.
Expand All @@ -154,19 +192,19 @@ class PurePathBase:
"""

__slots__ = (
# The `_raw_paths` slot stores unnormalized string paths. This is set
# in the `__init__()` method.
'_raw_paths',
# The `_raw_path` slot store a joined string path. This is set in the
# `__init__()` method.
'_raw_path',

# The '_resolving' slot stores a boolean indicating whether the path
# is being processed by `PathBase.resolve()`. This prevents duplicate
# work from occurring when `resolve()` calls `stat()` or `readlink()`.
'_resolving',
)
pathmod = posixpath
pathmod = PathModuleBase()

def __init__(self, *paths):
self._raw_paths = paths
def __init__(self, path, *paths):
self._raw_path = self.pathmod.join(path, *paths) if paths else path
self._resolving = False

def with_segments(self, *pathsegments):
Expand All @@ -176,11 +214,6 @@ def with_segments(self, *pathsegments):
"""
return type(self)(*pathsegments)

@property
def _raw_path(self):
"""The joined but unnormalized path."""
return self.pathmod.join(*self._raw_paths)

def __str__(self):
"""Return the string representation of the path, suitable for
passing to system calls."""
Expand All @@ -194,7 +227,7 @@ def as_posix(self):
@property
def drive(self):
"""The drive prefix (letter or UNC path), if any."""
return self.pathmod.splitdrive(self._raw_path)[0]
return self.pathmod.splitroot(self._raw_path)[0]

@property
def root(self):
Expand All @@ -210,7 +243,7 @@ def anchor(self):
@property
def name(self):
"""The final path component, if any."""
return self.pathmod.basename(self._raw_path)
return self.pathmod.split(self._raw_path)[1]

@property
def suffix(self):
Expand Down Expand Up @@ -251,10 +284,10 @@ def stem(self):

def with_name(self, name):
"""Return a new path with the file name changed."""
dirname = self.pathmod.dirname
if dirname(name):
split = self.pathmod.split
if split(name)[0]:
raise ValueError(f"Invalid name {name!r}")
return self.with_segments(dirname(self._raw_path), name)
return self.with_segments(split(self._raw_path)[0], name)

def with_stem(self, stem):
"""Return a new path with the stem changed."""
Expand Down Expand Up @@ -336,17 +369,17 @@ def joinpath(self, *pathsegments):
paths) or a totally different path (if one of the arguments is
anchored).
"""
return self.with_segments(*self._raw_paths, *pathsegments)
return self.with_segments(self._raw_path, *pathsegments)

def __truediv__(self, key):
try:
return self.joinpath(key)
return self.with_segments(self._raw_path, key)
except TypeError:
return NotImplemented

def __rtruediv__(self, key):
try:
return self.with_segments(key, *self._raw_paths)
return self.with_segments(key, self._raw_path)
except TypeError:
return NotImplemented

Expand All @@ -371,7 +404,7 @@ def _stack(self):
def parent(self):
"""The logical parent of the path."""
path = self._raw_path
parent = self.pathmod.dirname(path)
parent = self.pathmod.split(path)[0]
if path != parent:
parent = self.with_segments(parent)
parent._resolving = self._resolving
Expand All @@ -381,43 +414,20 @@ def parent(self):
@property
def parents(self):
"""A sequence of this path's logical parents."""
dirname = self.pathmod.dirname
split = self.pathmod.split
path = self._raw_path
parent = dirname(path)
parent = split(path)[0]
parents = []
while path != parent:
parents.append(self.with_segments(parent))
path = parent
parent = dirname(path)
parent = split(path)[0]
return tuple(parents)

def is_absolute(self):
"""True if the path is absolute (has both a root and, if applicable,
a drive)."""
if self.pathmod is posixpath:
# Optimization: work with raw paths on POSIX.
for path in self._raw_paths:
if path.startswith('/'):
return True
return False
else:
return self.pathmod.isabs(self._raw_path)

def is_reserved(self):
"""Return True if the path contains one of the special names reserved
by the system, if any."""
if self.pathmod is posixpath or not self.name:
return False

# NOTE: the rules for reserved names seem somewhat complicated
# (e.g. r"..\NUL" is reserved but not r"foo\NUL" if "foo" does not
# exist). We err on the side of caution and return True for paths
# which are not considered reserved by Windows.
if self.drive.startswith('\\\\'):
# UNC paths are never reserved.
return False
name = self.name.partition('.')[0].partition(':')[0].rstrip(' ')
return name.upper() in _WIN_RESERVED_NAMES
return self.pathmod.isabs(self._raw_path)

def match(self, path_pattern, *, case_sensitive=None):
"""
Expand Down Expand Up @@ -726,7 +736,7 @@ def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
raise ValueError("Unacceptable pattern: {!r}".format(pattern))

pattern_parts = list(path_pattern.parts)
if not self.pathmod.basename(pattern):
if not self.pathmod.split(pattern)[1]:
# GH-65238: pathlib doesn't preserve trailing slash. Add it back.
pattern_parts.append('')

Expand Down
1 change: 1 addition & 0 deletions Lib/test/test_pathlib/test_pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -1151,6 +1151,7 @@ def tempdir(self):

def test_matches_pathbase_api(self):
our_names = {name for name in dir(self.cls) if name[0] != '_'}
our_names.remove('is_reserved') # only present in PurePath
path_names = {name for name in dir(pathlib._abc.PathBase) if name[0] != '_'}
self.assertEqual(our_names, path_names)
for attr_name in our_names:
Expand Down
Loading

0 comments on commit 3d5c6c3

Please sign in to comment.