Skip to content

Commit

Permalink
pythonGH-107465: Add pathlib.Path.from_uri() classmethod.
Browse files Browse the repository at this point in the history
This method supports file URIs (including variants) as described in
RFC 8089, such as URIs generated by `pathlib.Path.as_uri()` and
`urllib.request.pathname2url`.

The method is added to `Path` rather than `PurePath` because it uses
`os.fsdecode()`, and so its results vary from system to system. I intend to
deprecate `PurePath.as_uri()` and move it to `Path` for the same reason.
  • Loading branch information
barneygale committed Aug 4, 2023
1 parent c6c5665 commit 0f51181
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 2 deletions.
45 changes: 45 additions & 0 deletions Doc/library/pathlib.rst
Original file line number Diff line number Diff line change
Expand Up @@ -850,6 +850,51 @@ call fails (for example because the path doesn't exist).
.. versionadded:: 3.5


.. classmethod:: Path.from_uri(uri)

Return a new path object from parsing a 'file' URI conforming to
:rfc:`8089`. For example::

>>> p = Path.from_uri('file:///etc/hosts')
PosixPath('/etc/hosts')

On Windows, DOS device and UNC paths may be parsed from URIs::

>>> p = Path.from_uri('file:///c:/windows')
WindowsPath('c:/windows')
>>> p = Path.from_uri('file://server/share')
WindowsPath('//server/share')

Several variant forms are supported::

>>> p = Path.from_uri('file:////server/share')
WindowsPath('//server/share')
>>> p = Path.from_uri('file://///server/share')
WindowsPath('//server/share')
>>> p = Path.from_uri('file:c:/windows')
WindowsPath('c:/windows')
>>> p = Path.from_uri('file:/c|/windows')
WindowsPath('c:/windows')
>>> p = Path.from_uri('file://///c:/windows')
WindowsPath('c:/windows')

URIs with no slash after the scheme (and no drive letter) are parsed as
relative paths::

>>> p = Path.from_uri('file:foo/bar')
WindowsPath('foo/bar')

Users may wish to test the result with :meth:`~PurePath.is_absolute` and
reject relative paths, as these are not portable across processes with
differing working directories.

:func:`os.fsdecode` is used to decode percent-escaped byte sequences, and
so file URIs are not portable across machines with differing
:ref:`filesystem encodings <filesystem-encoding>`.

.. versionadded:: 3.13


.. method:: Path.stat(*, follow_symlinks=True)

Return a :class:`os.stat_result` object containing information about this path, like :func:`os.stat`.
Expand Down
3 changes: 3 additions & 0 deletions Doc/whatsnew/3.13.rst
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,9 @@ pathlib
:exc:`NotImplementedError` when a path operation isn't supported.
(Contributed by Barney Gale in :gh:`89812`.)

* Add :method:`Path.from_uri` classmethod.
(Contributed by Barney Gale in :gh:`107465`.)

* Add support for recursive wildcards in :meth:`pathlib.PurePath.match`.
(Contributed by Barney Gale in :gh:`73435`.)

Expand Down
20 changes: 18 additions & 2 deletions Lib/pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
from _collections_abc import Sequence
from errno import ENOENT, ENOTDIR, EBADF, ELOOP
from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO
from urllib.parse import quote_from_bytes as urlquote_from_bytes


__all__ = [
Expand Down Expand Up @@ -433,7 +432,8 @@ def as_uri(self):
# It's a posix path => 'file:///etc/hosts'
prefix = 'file://'
path = str(self)
return prefix + urlquote_from_bytes(os.fsencode(path))
from urllib.parse import quote_from_bytes
return prefix + quote_from_bytes(os.fsencode(path))

@property
def _str_normcase(self):
Expand Down Expand Up @@ -1178,6 +1178,22 @@ def __new__(cls, *args, **kwargs):
cls = WindowsPath if os.name == 'nt' else PosixPath
return object.__new__(cls)

@classmethod
def from_uri(cls, uri):
"""Return a new path from the given 'file' URI."""
uri = uri.removeprefix('file:')
if uri[:3] == '///':
# Remove empty authority
uri = uri[2:]
if uri[:1] == '/' and (uri[2:3] in ':|' or uri[1:3] == '//'):
# Remove slash before DOS device/UNC path
uri = uri[1:]
if uri[1:2] == '|':
# Replace bar with colon in DOS drive
uri = uri[:1] + ':' + uri[2:]
from urllib.parse import unquote_to_bytes
return cls(os.fsdecode(unquote_to_bytes(uri)))

@classmethod
def cwd(cls):
"""Return a new path pointing to the current working directory."""
Expand Down
31 changes: 31 additions & 0 deletions Lib/test/test_pathlib.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import tempfile
import unittest
from unittest import mock
from urllib.request import pathname2url

from test.support import import_helper
from test.support import set_recursion_limit
Expand Down Expand Up @@ -2913,6 +2914,20 @@ def test_passing_kwargs_deprecated(self):
with self.assertWarns(DeprecationWarning):
self.cls(foo="bar")

def test_from_uri_common(self):
P = self.cls
self.assertEqual(P.from_uri('file:foo/bar'), P('foo/bar'))
self.assertEqual(P.from_uri('file:/foo/bar'), P('/foo/bar'))
self.assertEqual(P.from_uri('file://foo/bar'), P('//foo/bar'))
self.assertEqual(P.from_uri('file:///foo/bar'), P('/foo/bar'))
self.assertEqual(P.from_uri('file:////foo/bar'), P('//foo/bar'))

def test_from_uri_pathname2url_common(self):
P = self.cls
self.assertEqual(P.from_uri(pathname2url('foo/bar')), P('foo/bar'))
self.assertEqual(P.from_uri(pathname2url('/foo/bar')), P('/foo/bar'))
self.assertEqual(P.from_uri(pathname2url('//foo/bar')), P('//foo/bar'))


class WalkTests(unittest.TestCase):

Expand Down Expand Up @@ -3441,7 +3456,23 @@ def check():
env['HOME'] = 'C:\\Users\\eve'
check()

def test_from_uri(self):
P = self.cls
# DOS drive paths
self.assertEqual(P.from_uri('file:c:/path/to/file'), P('c:/path/to/file'))
self.assertEqual(P.from_uri('file:c|/path/to/file'), P('c:/path/to/file'))
self.assertEqual(P.from_uri('file:/c|/path/to/file'), P('c:/path/to/file'))
self.assertEqual(P.from_uri('file:///c|/path/to/file'), P('c:/path/to/file'))
self.assertEqual(P.from_uri('file://///c|/path/to/file'), P('c:/path/to/file'))
# UNC paths
self.assertEqual(P.from_uri('file://server/path/to/file'), P('//server/path/to/file'))
self.assertEqual(P.from_uri('file:////server/path/to/file'), P('//server/path/to/file'))
self.assertEqual(P.from_uri('file://///server/path/to/file'), P('//server/path/to/file'))

def test_from_uri_pathname2url(self):
P = self.cls
self.assertEqual(P.from_uri(pathname2url(r'c:\path\to\file')), P('c:/path/to/file'))
self.assertEqual(P.from_uri(pathname2url(r'\\server\path\to\file')), P('//server/path/to/file'))

class PathSubclassTest(PathTest):
class cls(pathlib.Path):
Expand Down

0 comments on commit 0f51181

Please sign in to comment.