From ca6cf56330ae7751819b62748f33f23d98596703 Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Sun, 14 Jan 2024 21:49:53 +0000 Subject: [PATCH] Add `pathlib._abc.PathModuleBase` (#113893) Path modules provide a subset of the `os.path` API, specifically those functions needed to provide `PurePathBase` functionality. Each `PurePathBase` subclass references its path module via a `pathmod` class attribute. This commit adds a new `PathModuleBase` class, which provides abstract methods that unconditionally raise `UnsupportedOperation`. An instance of this class is assigned to `PurePathBase.pathmod`, replacing `posixpath`. As a result, `PurePathBase` is no longer POSIX-y by default, and all its methods raise `UnsupportedOperation` courtesy of `pathmod`. Users who subclass `PurePathBase` or `PathBase` should choose the path syntax by setting `pathmod` to `posixpath`, `ntpath`, `os.path`, or their own subclass of `PathModuleBase`, as circumstances demand. --- Lib/pathlib/__init__.py | 60 +++++++++++ Lib/pathlib/_abc.py | 124 ++++++++++++---------- Lib/test/test_pathlib/test_pathlib.py | 1 + Lib/test/test_pathlib/test_pathlib_abc.py | 56 +++++++++- 4 files changed, 182 insertions(+), 59 deletions(-) diff --git a/Lib/pathlib/__init__.py b/Lib/pathlib/__init__.py index e70cfe91d322bc..f14d35bb0038d0 100644 --- a/Lib/pathlib/__init__.py +++ b/Lib/pathlib/__init__.py @@ -33,6 +33,15 @@ ] +# Reference for Windows paths can be found at +# https://learn.microsoft.com/en-gb/windows/win32/fileio/naming-a-file . +_WIN_RESERVED_NAMES = frozenset( + {'CON', 'PRN', 'AUX', 'NUL', 'CONIN$', 'CONOUT$'} | + {f'COM{c}' for c in '123456789\xb9\xb2\xb3'} | + {f'LPT{c}' for c in '123456789\xb9\xb2\xb3'} +) + + class _PathParents(Sequence): """This object provides sequence-like access to the logical ancestors of a path. Don't try to construct it yourself.""" @@ -76,6 +85,10 @@ class PurePath(_abc.PurePathBase): """ __slots__ = ( + # The `_raw_paths` slot stores unnormalized string paths. This is set + # in the `__init__()` method. + '_raw_paths', + # The `_drv`, `_root` and `_tail_cached` slots store parsed and # normalized parts of the path. They are set when any of the `drive`, # `root` or `_tail` properties are accessed for the first time. The @@ -141,6 +154,26 @@ def __init__(self, *args): # Avoid calling super().__init__, as an optimisation self._raw_paths = paths + def joinpath(self, *pathsegments): + """Combine this path with one or several arguments, and return a + new path representing either a subpath (if all arguments are relative + paths) or a totally different path (if one of the arguments is + anchored). + """ + return self.with_segments(self, *pathsegments) + + def __truediv__(self, key): + try: + return self.with_segments(self, key) + except TypeError: + return NotImplemented + + def __rtruediv__(self, key): + try: + return self.with_segments(key, self) + except TypeError: + return NotImplemented + def __reduce__(self): # Using the parts tuple helps share interned path parts # when pickling related paths. @@ -386,6 +419,33 @@ def is_relative_to(self, other, /, *_deprecated): other = self.with_segments(other) return other == self or other in self.parents + def is_absolute(self): + """True if the path is absolute (has both a root and, if applicable, + a drive).""" + if self.pathmod is posixpath: + # Optimization: work with raw paths on POSIX. + for path in self._raw_paths: + if path.startswith('/'): + return True + return False + return self.pathmod.isabs(self) + + def is_reserved(self): + """Return True if the path contains one of the special names reserved + by the system, if any.""" + if self.pathmod is not ntpath or not self.name: + return False + + # NOTE: the rules for reserved names seem somewhat complicated + # (e.g. r"..\NUL" is reserved but not r"foo\NUL" if "foo" does not + # exist). We err on the side of caution and return True for paths + # which are not considered reserved by Windows. + if self.drive.startswith('\\\\'): + # UNC paths are never reserved. + return False + name = self.name.partition('.')[0].partition(':')[0].rstrip(' ') + return name.upper() in _WIN_RESERVED_NAMES + def as_uri(self): """Return the path as a URI.""" if not self.is_absolute(): diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index a6956f251f8b7e..1fdca004d6b31f 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -12,7 +12,6 @@ """ import functools -import posixpath from errno import ENOENT, ENOTDIR, EBADF, ELOOP, EINVAL from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO @@ -20,14 +19,6 @@ # Internals # -# Reference for Windows paths can be found at -# https://learn.microsoft.com/en-gb/windows/win32/fileio/naming-a-file . -_WIN_RESERVED_NAMES = frozenset( - {'CON', 'PRN', 'AUX', 'NUL', 'CONIN$', 'CONOUT$'} | - {f'COM{c}' for c in '123456789\xb9\xb2\xb3'} | - {f'LPT{c}' for c in '123456789\xb9\xb2\xb3'} -) - _WINERROR_NOT_READY = 21 # drive exists but is not accessible _WINERROR_INVALID_NAME = 123 # fix for bpo-35306 _WINERROR_CANT_RESOLVE_FILENAME = 1921 # broken symlink pointing to itself @@ -144,6 +135,53 @@ class UnsupportedOperation(NotImplementedError): pass +class PathModuleBase: + """Base class for path modules, which do low-level path manipulation. + + Path modules provide a subset of the os.path API, specifically those + functions needed to provide PurePathBase functionality. Each PurePathBase + subclass references its path module via a 'pathmod' class attribute. + + Every method in this base class raises an UnsupportedOperation exception. + """ + + @classmethod + def _unsupported(cls, attr): + raise UnsupportedOperation(f"{cls.__name__}.{attr} is unsupported") + + @property + def sep(self): + """The character used to separate path components.""" + self._unsupported('sep') + + def join(self, path, *paths): + """Join path segments.""" + self._unsupported('join()') + + def split(self, path): + """Split the path into a pair (head, tail), where *head* is everything + before the final path separator, and *tail* is everything after. + Either part may be empty. + """ + self._unsupported('split()') + + def splitroot(self, path): + """Split the pathname path into a 3-item tuple (drive, root, tail), + where *drive* is a device name or mount point, *root* is a string of + separators after the drive, and *tail* is everything after the root. + Any part may be empty.""" + self._unsupported('splitroot()') + + def normcase(self, path): + """Normalize the case of the path.""" + self._unsupported('normcase()') + + def isabs(self, path): + """Returns whether the path is absolute, i.e. unaffected by the + current directory or drive.""" + self._unsupported('isabs()') + + class PurePathBase: """Base class for pure path objects. @@ -154,19 +192,19 @@ class PurePathBase: """ __slots__ = ( - # The `_raw_paths` slot stores unnormalized string paths. This is set - # in the `__init__()` method. - '_raw_paths', + # The `_raw_path` slot store a joined string path. This is set in the + # `__init__()` method. + '_raw_path', # The '_resolving' slot stores a boolean indicating whether the path # is being processed by `PathBase.resolve()`. This prevents duplicate # work from occurring when `resolve()` calls `stat()` or `readlink()`. '_resolving', ) - pathmod = posixpath + pathmod = PathModuleBase() - def __init__(self, *paths): - self._raw_paths = paths + def __init__(self, path, *paths): + self._raw_path = self.pathmod.join(path, *paths) if paths else path self._resolving = False def with_segments(self, *pathsegments): @@ -176,11 +214,6 @@ def with_segments(self, *pathsegments): """ return type(self)(*pathsegments) - @property - def _raw_path(self): - """The joined but unnormalized path.""" - return self.pathmod.join(*self._raw_paths) - def __str__(self): """Return the string representation of the path, suitable for passing to system calls.""" @@ -194,7 +227,7 @@ def as_posix(self): @property def drive(self): """The drive prefix (letter or UNC path), if any.""" - return self.pathmod.splitdrive(self._raw_path)[0] + return self.pathmod.splitroot(self._raw_path)[0] @property def root(self): @@ -210,7 +243,7 @@ def anchor(self): @property def name(self): """The final path component, if any.""" - return self.pathmod.basename(self._raw_path) + return self.pathmod.split(self._raw_path)[1] @property def suffix(self): @@ -251,10 +284,10 @@ def stem(self): def with_name(self, name): """Return a new path with the file name changed.""" - dirname = self.pathmod.dirname - if dirname(name): + split = self.pathmod.split + if split(name)[0]: raise ValueError(f"Invalid name {name!r}") - return self.with_segments(dirname(self._raw_path), name) + return self.with_segments(split(self._raw_path)[0], name) def with_stem(self, stem): """Return a new path with the stem changed.""" @@ -336,17 +369,17 @@ def joinpath(self, *pathsegments): paths) or a totally different path (if one of the arguments is anchored). """ - return self.with_segments(*self._raw_paths, *pathsegments) + return self.with_segments(self._raw_path, *pathsegments) def __truediv__(self, key): try: - return self.joinpath(key) + return self.with_segments(self._raw_path, key) except TypeError: return NotImplemented def __rtruediv__(self, key): try: - return self.with_segments(key, *self._raw_paths) + return self.with_segments(key, self._raw_path) except TypeError: return NotImplemented @@ -371,7 +404,7 @@ def _stack(self): def parent(self): """The logical parent of the path.""" path = self._raw_path - parent = self.pathmod.dirname(path) + parent = self.pathmod.split(path)[0] if path != parent: parent = self.with_segments(parent) parent._resolving = self._resolving @@ -381,43 +414,20 @@ def parent(self): @property def parents(self): """A sequence of this path's logical parents.""" - dirname = self.pathmod.dirname + split = self.pathmod.split path = self._raw_path - parent = dirname(path) + parent = split(path)[0] parents = [] while path != parent: parents.append(self.with_segments(parent)) path = parent - parent = dirname(path) + parent = split(path)[0] return tuple(parents) def is_absolute(self): """True if the path is absolute (has both a root and, if applicable, a drive).""" - if self.pathmod is posixpath: - # Optimization: work with raw paths on POSIX. - for path in self._raw_paths: - if path.startswith('/'): - return True - return False - else: - return self.pathmod.isabs(self._raw_path) - - def is_reserved(self): - """Return True if the path contains one of the special names reserved - by the system, if any.""" - if self.pathmod is posixpath or not self.name: - return False - - # NOTE: the rules for reserved names seem somewhat complicated - # (e.g. r"..\NUL" is reserved but not r"foo\NUL" if "foo" does not - # exist). We err on the side of caution and return True for paths - # which are not considered reserved by Windows. - if self.drive.startswith('\\\\'): - # UNC paths are never reserved. - return False - name = self.name.partition('.')[0].partition(':')[0].rstrip(' ') - return name.upper() in _WIN_RESERVED_NAMES + return self.pathmod.isabs(self._raw_path) def match(self, path_pattern, *, case_sensitive=None): """ @@ -726,7 +736,7 @@ def glob(self, pattern, *, case_sensitive=None, follow_symlinks=None): raise ValueError("Unacceptable pattern: {!r}".format(pattern)) pattern_parts = list(path_pattern.parts) - if not self.pathmod.basename(pattern): + if not self.pathmod.split(pattern)[1]: # GH-65238: pathlib doesn't preserve trailing slash. Add it back. pattern_parts.append('') diff --git a/Lib/test/test_pathlib/test_pathlib.py b/Lib/test/test_pathlib/test_pathlib.py index 1b560adfc3b57a..61d7939ad140b2 100644 --- a/Lib/test/test_pathlib/test_pathlib.py +++ b/Lib/test/test_pathlib/test_pathlib.py @@ -1151,6 +1151,7 @@ def tempdir(self): def test_matches_pathbase_api(self): our_names = {name for name in dir(self.cls) if name[0] != '_'} + our_names.remove('is_reserved') # only present in PurePath path_names = {name for name in dir(pathlib._abc.PathBase) if name[0] != '_'} self.assertEqual(our_names, path_names) for attr_name in our_names: diff --git a/Lib/test/test_pathlib/test_pathlib_abc.py b/Lib/test/test_pathlib/test_pathlib_abc.py index 14df1e69db1f96..c3c568c296e25c 100644 --- a/Lib/test/test_pathlib/test_pathlib_abc.py +++ b/Lib/test/test_pathlib/test_pathlib_abc.py @@ -5,7 +5,7 @@ import stat import unittest -from pathlib._abc import UnsupportedOperation, PurePathBase, PathBase +from pathlib._abc import UnsupportedOperation, PathModuleBase, PurePathBase, PathBase import posixpath from test.support.os_helper import TESTFN @@ -17,6 +17,20 @@ def test_is_notimplemented(self): self.assertTrue(isinstance(UnsupportedOperation(), NotImplementedError)) +class PathModuleBaseTest(unittest.TestCase): + cls = PathModuleBase + + def test_unsupported_operation(self): + m = self.cls() + e = UnsupportedOperation + with self.assertRaises(e): + m.sep + self.assertRaises(e, m.join, 'foo') + self.assertRaises(e, m.split, 'foo') + self.assertRaises(e, m.splitroot, 'foo') + self.assertRaises(e, m.normcase, 'foo') + self.assertRaises(e, m.isabs, 'foo') + # # Tests for the pure classes. # @@ -25,6 +39,42 @@ def test_is_notimplemented(self): class PurePathBaseTest(unittest.TestCase): cls = PurePathBase + def test_unsupported_operation_pure(self): + p = self.cls('foo') + e = UnsupportedOperation + with self.assertRaises(e): + p.drive + with self.assertRaises(e): + p.root + with self.assertRaises(e): + p.anchor + with self.assertRaises(e): + p.parts + with self.assertRaises(e): + p.parent + with self.assertRaises(e): + p.parents + with self.assertRaises(e): + p.name + with self.assertRaises(e): + p.stem + with self.assertRaises(e): + p.suffix + with self.assertRaises(e): + p.suffixes + with self.assertRaises(e): + p / 'bar' + with self.assertRaises(e): + 'bar' / p + self.assertRaises(e, p.joinpath, 'bar') + self.assertRaises(e, p.with_name, 'bar') + self.assertRaises(e, p.with_stem, 'bar') + self.assertRaises(e, p.with_suffix, '.txt') + self.assertRaises(e, p.relative_to, '') + self.assertRaises(e, p.is_relative_to, '') + self.assertRaises(e, p.is_absolute) + self.assertRaises(e, p.match, '*') + def test_magic_methods(self): P = self.cls self.assertFalse(hasattr(P, '__fspath__')) @@ -39,11 +89,12 @@ def test_magic_methods(self): self.assertIs(P.__ge__, object.__ge__) def test_pathmod(self): - self.assertIs(self.cls.pathmod, posixpath) + self.assertIsInstance(self.cls.pathmod, PathModuleBase) class DummyPurePath(PurePathBase): __slots__ = () + pathmod = posixpath def __eq__(self, other): if not isinstance(other, DummyPurePath): @@ -669,6 +720,7 @@ class DummyPath(PathBase): memory. """ __slots__ = () + pathmod = posixpath _files = {} _directories = {}