Skip to content

Commit

Permalink
pythonGH-82805: Fix handling of single-dot file extensions in pathlib (
Browse files Browse the repository at this point in the history
…python#118952)

pathlib now treats "`.`" as a valid file extension (suffix). This brings
it in line with `os.path.splitext()`.

In the (private) pathlib ABCs, we add a new `ParserBase.splitext()` method
that splits a path into a `(root, ext)` pair, like `os.path.splitext()`.
This method is called by `PurePathBase.stem`, `suffix`, etc. In a future
version of pathlib, we might make these base classes public, and so users
will be able to define their own `splitext()` method to control file
extension splitting.

In `pathlib.PurePath` we add optimised `stem`, `suffix` and `suffixes`
properties that don't use `splitext()`, which avoids computing the path
base name twice.
  • Loading branch information
barneygale authored May 25, 2024
1 parent 0c5ebe1 commit e418fc3
Show file tree
Hide file tree
Showing 5 changed files with 101 additions and 35 deletions.
13 changes: 13 additions & 0 deletions Doc/library/pathlib.rst
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,10 @@ Pure paths provide the following methods and properties:

This is commonly called the file extension.

.. versionchanged:: 3.14

A single dot ("``.``") is considered a valid suffix.

.. attribute:: PurePath.suffixes

A list of the path's suffixes, often called file extensions::
Expand All @@ -460,6 +464,10 @@ Pure paths provide the following methods and properties:
>>> PurePosixPath('my/library').suffixes
[]

.. versionchanged:: 3.14

A single dot ("``.``") is considered a valid suffix.


.. attribute:: PurePath.stem

Expand Down Expand Up @@ -713,6 +721,11 @@ Pure paths provide the following methods and properties:
>>> p.with_suffix('')
PureWindowsPath('README')

.. versionchanged:: 3.14

A single dot ("``.``") is considered a valid suffix. In previous
versions, :exc:`ValueError` is raised if a single dot is supplied.


.. method:: PurePath.with_segments(*pathsegments)

Expand Down
34 changes: 16 additions & 18 deletions Lib/pathlib/_abc.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,12 @@ def splitdrive(self, path):
drive. Either part may be empty."""
raise UnsupportedOperation(self._unsupported_msg('splitdrive()'))

def splitext(self, path):
"""Split the path into a pair (root, ext), where *ext* is empty or
begins with a begins with a period and contains at most one period,
and *root* is everything before the extension."""
raise UnsupportedOperation(self._unsupported_msg('splitext()'))

def normcase(self, path):
"""Normalize the case of the path."""
raise UnsupportedOperation(self._unsupported_msg('normcase()'))
Expand Down Expand Up @@ -151,12 +157,7 @@ def suffix(self):
This includes the leading period. For example: '.txt'
"""
name = self.name
i = name.rfind('.')
if 0 < i < len(name) - 1:
return name[i:]
else:
return ''
return self.parser.splitext(self.name)[1]

@property
def suffixes(self):
Expand All @@ -165,21 +166,18 @@ def suffixes(self):
These include the leading periods. For example: ['.tar', '.gz']
"""
name = self.name
if name.endswith('.'):
return []
name = name.lstrip('.')
return ['.' + suffix for suffix in name.split('.')[1:]]
split = self.parser.splitext
stem, suffix = split(self.name)
suffixes = []
while suffix:
suffixes.append(suffix)
stem, suffix = split(stem)
return suffixes[::-1]

@property
def stem(self):
"""The final path component, minus its last suffix."""
name = self.name
i = name.rfind('.')
if 0 < i < len(name) - 1:
return name[:i]
else:
return name
return self.parser.splitext(self.name)[0]

def with_name(self, name):
"""Return a new path with the file name changed."""
Expand Down Expand Up @@ -208,7 +206,7 @@ def with_suffix(self, suffix):
if not stem:
# If the stem is empty, we can't make the suffix non-empty.
raise ValueError(f"{self!r} has an empty name")
elif suffix and not (suffix.startswith('.') and len(suffix) > 1):
elif suffix and not suffix.startswith('.'):
raise ValueError(f"Invalid suffix {suffix!r}")
else:
return self.with_name(stem + suffix)
Expand Down
34 changes: 34 additions & 0 deletions Lib/pathlib/_local.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,40 @@ def with_name(self, name):
tail[-1] = name
return self._from_parsed_parts(self.drive, self.root, tail)

@property
def stem(self):
"""The final path component, minus its last suffix."""
name = self.name
i = name.rfind('.')
if i != -1:
stem = name[:i]
# Stem must contain at least one non-dot character.
if stem.lstrip('.'):
return stem
return name

@property
def suffix(self):
"""
The final component's last suffix, if any.
This includes the leading period. For example: '.txt'
"""
name = self.name.lstrip('.')
i = name.rfind('.')
if i != -1:
return name[i:]
return ''

@property
def suffixes(self):
"""
A list of the final component's suffixes, if any.
These include the leading periods. For example: ['.tar', '.gz']
"""
return ['.' + ext for ext in self.name.lstrip('.').split('.')[1:]]

def relative_to(self, other, *, walk_up=False):
"""Return the relative path to another path identified by the passed
arguments. If the operation is not possible (because this is not
Expand Down
50 changes: 33 additions & 17 deletions Lib/test/test_pathlib/test_pathlib_abc.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def test_unsupported_operation(self):
self.assertRaises(e, m.join, 'foo')
self.assertRaises(e, m.split, 'foo')
self.assertRaises(e, m.splitdrive, 'foo')
self.assertRaises(e, m.splitext, 'foo')
self.assertRaises(e, m.normcase, 'foo')
self.assertRaises(e, m.isabs, 'foo')

Expand Down Expand Up @@ -789,8 +790,12 @@ def test_suffix_common(self):
self.assertEqual(P('/a/.hg.rc').suffix, '.rc')
self.assertEqual(P('a/b.tar.gz').suffix, '.gz')
self.assertEqual(P('/a/b.tar.gz').suffix, '.gz')
self.assertEqual(P('a/Some name. Ending with a dot.').suffix, '')
self.assertEqual(P('/a/Some name. Ending with a dot.').suffix, '')
self.assertEqual(P('a/trailing.dot.').suffix, '.')
self.assertEqual(P('/a/trailing.dot.').suffix, '.')
self.assertEqual(P('a/..d.o.t..').suffix, '.')
self.assertEqual(P('a/inn.er..dots').suffix, '.dots')
self.assertEqual(P('photo').suffix, '')
self.assertEqual(P('photo.jpg').suffix, '.jpg')

@needs_windows
def test_suffix_windows(self):
Expand All @@ -807,8 +812,8 @@ def test_suffix_windows(self):
self.assertEqual(P('c:/a/.hg.rc').suffix, '.rc')
self.assertEqual(P('c:a/b.tar.gz').suffix, '.gz')
self.assertEqual(P('c:/a/b.tar.gz').suffix, '.gz')
self.assertEqual(P('c:a/Some name. Ending with a dot.').suffix, '')
self.assertEqual(P('c:/a/Some name. Ending with a dot.').suffix, '')
self.assertEqual(P('c:a/trailing.dot.').suffix, '.')
self.assertEqual(P('c:/a/trailing.dot.').suffix, '.')
self.assertEqual(P('//My.py/Share.php').suffix, '')
self.assertEqual(P('//My.py/Share.php/a/b').suffix, '')

Expand All @@ -828,8 +833,12 @@ def test_suffixes_common(self):
self.assertEqual(P('/a/.hg.rc').suffixes, ['.rc'])
self.assertEqual(P('a/b.tar.gz').suffixes, ['.tar', '.gz'])
self.assertEqual(P('/a/b.tar.gz').suffixes, ['.tar', '.gz'])
self.assertEqual(P('a/Some name. Ending with a dot.').suffixes, [])
self.assertEqual(P('/a/Some name. Ending with a dot.').suffixes, [])
self.assertEqual(P('a/trailing.dot.').suffixes, ['.dot', '.'])
self.assertEqual(P('/a/trailing.dot.').suffixes, ['.dot', '.'])
self.assertEqual(P('a/..d.o.t..').suffixes, ['.o', '.t', '.', '.'])
self.assertEqual(P('a/inn.er..dots').suffixes, ['.er', '.', '.dots'])
self.assertEqual(P('photo').suffixes, [])
self.assertEqual(P('photo.jpg').suffixes, ['.jpg'])

@needs_windows
def test_suffixes_windows(self):
Expand All @@ -848,8 +857,8 @@ def test_suffixes_windows(self):
self.assertEqual(P('c:/a/b.tar.gz').suffixes, ['.tar', '.gz'])
self.assertEqual(P('//My.py/Share.php').suffixes, [])
self.assertEqual(P('//My.py/Share.php/a/b').suffixes, [])
self.assertEqual(P('c:a/Some name. Ending with a dot.').suffixes, [])
self.assertEqual(P('c:/a/Some name. Ending with a dot.').suffixes, [])
self.assertEqual(P('c:a/trailing.dot.').suffixes, ['.dot', '.'])
self.assertEqual(P('c:/a/trailing.dot.').suffixes, ['.dot', '.'])

def test_stem_empty(self):
P = self.cls
Expand All @@ -865,8 +874,11 @@ def test_stem_common(self):
self.assertEqual(P('a/.hgrc').stem, '.hgrc')
self.assertEqual(P('a/.hg.rc').stem, '.hg')
self.assertEqual(P('a/b.tar.gz').stem, 'b.tar')
self.assertEqual(P('a/Some name. Ending with a dot.').stem,
'Some name. Ending with a dot.')
self.assertEqual(P('a/trailing.dot.').stem, 'trailing.dot')
self.assertEqual(P('a/..d.o.t..').stem, '..d.o.t.')
self.assertEqual(P('a/inn.er..dots').stem, 'inn.er.')
self.assertEqual(P('photo').stem, 'photo')
self.assertEqual(P('photo.jpg').stem, 'photo')

@needs_windows
def test_stem_windows(self):
Expand All @@ -880,8 +892,8 @@ def test_stem_windows(self):
self.assertEqual(P('c:a/.hgrc').stem, '.hgrc')
self.assertEqual(P('c:a/.hg.rc').stem, '.hg')
self.assertEqual(P('c:a/b.tar.gz').stem, 'b.tar')
self.assertEqual(P('c:a/Some name. Ending with a dot.').stem,
'Some name. Ending with a dot.')
self.assertEqual(P('c:a/trailing.dot.').stem, 'trailing.dot')

def test_with_name_common(self):
P = self.cls
self.assertEqual(P('a/b').with_name('d.xml'), P('a/d.xml'))
Expand Down Expand Up @@ -929,16 +941,16 @@ def test_with_stem_common(self):
self.assertEqual(P('a/b.py').with_stem('d'), P('a/d.py'))
self.assertEqual(P('/a/b.py').with_stem('d'), P('/a/d.py'))
self.assertEqual(P('/a/b.tar.gz').with_stem('d'), P('/a/d.gz'))
self.assertEqual(P('a/Dot ending.').with_stem('d'), P('a/d'))
self.assertEqual(P('/a/Dot ending.').with_stem('d'), P('/a/d'))
self.assertEqual(P('a/Dot ending.').with_stem('d'), P('a/d.'))
self.assertEqual(P('/a/Dot ending.').with_stem('d'), P('/a/d.'))

@needs_windows
def test_with_stem_windows(self):
P = self.cls
self.assertEqual(P('c:a/b').with_stem('d'), P('c:a/d'))
self.assertEqual(P('c:/a/b').with_stem('d'), P('c:/a/d'))
self.assertEqual(P('c:a/Dot ending.').with_stem('d'), P('c:a/d'))
self.assertEqual(P('c:/a/Dot ending.').with_stem('d'), P('c:/a/d'))
self.assertEqual(P('c:a/Dot ending.').with_stem('d'), P('c:a/d.'))
self.assertEqual(P('c:/a/Dot ending.').with_stem('d'), P('c:/a/d.'))
self.assertRaises(ValueError, P('c:').with_stem, 'd')
self.assertRaises(ValueError, P('c:/').with_stem, 'd')
self.assertRaises(ValueError, P('//My/Share').with_stem, 'd')
Expand Down Expand Up @@ -974,6 +986,11 @@ def test_with_suffix_common(self):
# Stripping suffix.
self.assertEqual(P('a/b.py').with_suffix(''), P('a/b'))
self.assertEqual(P('/a/b').with_suffix(''), P('/a/b'))
# Single dot
self.assertEqual(P('a/b').with_suffix('.'), P('a/b.'))
self.assertEqual(P('/a/b').with_suffix('.'), P('/a/b.'))
self.assertEqual(P('a/b.py').with_suffix('.'), P('a/b.'))
self.assertEqual(P('/a/b.py').with_suffix('.'), P('/a/b.'))

@needs_windows
def test_with_suffix_windows(self):
Expand Down Expand Up @@ -1012,7 +1029,6 @@ def test_with_suffix_invalid(self):
# Invalid suffix.
self.assertRaises(ValueError, P('a/b').with_suffix, 'gz')
self.assertRaises(ValueError, P('a/b').with_suffix, '/')
self.assertRaises(ValueError, P('a/b').with_suffix, '.')
self.assertRaises(ValueError, P('a/b').with_suffix, '/.gz')
self.assertRaises(ValueError, P('a/b').with_suffix, 'c/d')
self.assertRaises(ValueError, P('a/b').with_suffix, '.c/.d')
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
Support single-dot file extensions in :attr:`pathlib.PurePath.suffix` and
related attributes and methods. For example, the
:attr:`~pathlib.PurePath.suffixes` of ``PurePath('foo.bar.')`` are now
``['.bar', '.']`` rather than ``[]``. This brings file extension splitting
in line with :func:`os.path.splitext`.

0 comments on commit e418fc3

Please sign in to comment.