forked from python/cpython
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
pythonGH-125866: RFC8089 file URIs in
urllib.request
Adjust `urllib.request.pathname2url` and `url2pathname()` to generate and accept file URIs as described in RFC8089. `pathname2url()` gains a new *include_scheme* argument, which defaults to false. When set to true, the returned URL includes a `file:` prefix. `url2pathname()` now automatically removes a `file:` prefix if present. On Windows, `pathname2url()` now generates URIs that begin with two slashes rather than four when given a UNC path. On other platforms, `pathname2url()` now generates URIs that begin with three slashes rather than one when given an absolute path. `url2pathname()` now performs the opposite transformation, so `file:///etc/hosts` becomes `/etc/hosts`. Furthermore, `url2pathname()` now ignores local hosts (like "localhost" or any alias) and raises `URLError` for non-local hosts.
- Loading branch information
1 parent
6742f14
commit fb92f42
Showing
6 changed files
with
217 additions
and
55 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
import nturl2path | ||
import unittest | ||
import urllib.parse | ||
|
||
|
||
class nturl2path_Tests(unittest.TestCase): | ||
"""Test pathname2url() and url2pathname()""" | ||
|
||
def test_basic(self): | ||
# Make sure simple tests pass | ||
expected_path = "parts\\of\\a\\path" | ||
expected_url = "parts/of/a/path" | ||
result = nturl2path.pathname2url(expected_path) | ||
self.assertEqual(expected_url, result, | ||
"pathname2url() failed; %s != %s" % | ||
(result, expected_url)) | ||
result = nturl2path.url2pathname(expected_url) | ||
self.assertEqual(expected_path, result, | ||
"url2pathame() failed; %s != %s" % | ||
(result, expected_path)) | ||
|
||
def test_quoting(self): | ||
# Test automatic quoting and unquoting works for pathnam2url() and | ||
# url2pathname() respectively | ||
given = "needs\\quot=ing\\here" | ||
expect = "needs/%s/here" % urllib.parse.quote("quot=ing") | ||
result = nturl2path.pathname2url(given) | ||
self.assertEqual(expect, result, | ||
"pathname2url() failed; %s != %s" % | ||
(expect, result)) | ||
expect = given | ||
result = nturl2path.url2pathname(result) | ||
self.assertEqual(expect, result, | ||
"url2pathname() failed; %s != %s" % | ||
(expect, result)) | ||
given = "make sure\\using_quote" | ||
expect = "%s/using_quote" % urllib.parse.quote("make sure") | ||
result = nturl2path.pathname2url(given) | ||
self.assertEqual(expect, result, | ||
"pathname2url() failed; %s != %s" % | ||
(expect, result)) | ||
given = "make+sure/using_unquote" | ||
expect = "make+sure\\using_unquote" | ||
result = nturl2path.url2pathname(given) | ||
self.assertEqual(expect, result, | ||
"url2pathname() failed; %s != %s" % | ||
(expect, result)) | ||
|
||
def test_pathname2url(self): | ||
# Test special prefixes are correctly handled in pathname2url() | ||
fn = nturl2path.pathname2url | ||
self.assertEqual(fn('\\\\?\\C:\\dir'), '///C:/dir') | ||
self.assertEqual(fn('\\\\?\\unc\\server\\share\\dir'), '/server/share/dir') | ||
self.assertEqual(fn("C:"), '///C:') | ||
self.assertEqual(fn("C:\\"), '///C:') | ||
self.assertEqual(fn('C:\\a\\b.c'), '///C:/a/b.c') | ||
self.assertEqual(fn('C:\\a\\b%#c'), '///C:/a/b%25%23c') | ||
self.assertEqual(fn('C:\\a\\b\xe9'), '///C:/a/b%C3%A9') | ||
self.assertEqual(fn('C:\\foo\\bar\\spam.foo'), "///C:/foo/bar/spam.foo") | ||
# Long drive letter | ||
self.assertRaises(IOError, fn, "XX:\\") | ||
# No drive letter | ||
self.assertEqual(fn("\\folder\\test\\"), '/folder/test/') | ||
self.assertEqual(fn("\\\\folder\\test\\"), '////folder/test/') | ||
self.assertEqual(fn("\\\\\\folder\\test\\"), '/////folder/test/') | ||
self.assertEqual(fn('\\\\some\\share\\'), '////some/share/') | ||
self.assertEqual(fn('\\\\some\\share\\a\\b.c'), '////some/share/a/b.c') | ||
self.assertEqual(fn('\\\\some\\share\\a\\b%#c\xe9'), '////some/share/a/b%25%23c%C3%A9') | ||
# Round-tripping | ||
urls = ['///C:', | ||
'/////folder/test/', | ||
'///C:/foo/bar/spam.foo'] | ||
for url in urls: | ||
self.assertEqual(fn(nturl2path.url2pathname(url)), url) | ||
|
||
def test_url2pathname_win(self): | ||
fn = nturl2path.url2pathname | ||
self.assertEqual(fn('/C:/'), 'C:\\') | ||
self.assertEqual(fn("///C|"), 'C:') | ||
self.assertEqual(fn("///C:"), 'C:') | ||
self.assertEqual(fn('///C:/'), 'C:\\') | ||
self.assertEqual(fn('/C|//'), 'C:\\') | ||
self.assertEqual(fn('///C|/path'), 'C:\\path') | ||
# No DOS drive | ||
self.assertEqual(fn("///C/test/"), '\\\\\\C\\test\\') | ||
self.assertEqual(fn("////C/test/"), '\\\\C\\test\\') | ||
# DOS drive paths | ||
self.assertEqual(fn('C:/path/to/file'), 'C:\\path\\to\\file') | ||
self.assertEqual(fn('C|/path/to/file'), 'C:\\path\\to\\file') | ||
self.assertEqual(fn('/C|/path/to/file'), 'C:\\path\\to\\file') | ||
self.assertEqual(fn('///C|/path/to/file'), 'C:\\path\\to\\file') | ||
self.assertEqual(fn("///C|/foo/bar/spam.foo"), 'C:\\foo\\bar\\spam.foo') | ||
# Non-ASCII drive letter | ||
self.assertRaises(IOError, fn, "///\u00e8|/") | ||
# UNC paths | ||
self.assertEqual(fn('//server/path/to/file'), '\\\\server\\path\\to\\file') | ||
self.assertEqual(fn('////server/path/to/file'), '\\\\server\\path\\to\\file') | ||
self.assertEqual(fn('/////server/path/to/file'), '\\\\\\server\\path\\to\\file') | ||
# Localhost paths | ||
self.assertEqual(fn('//localhost/C:/path/to/file'), 'C:\\path\\to\\file') | ||
self.assertEqual(fn('//localhost/C|/path/to/file'), 'C:\\path\\to\\file') | ||
# Round-tripping | ||
paths = ['C:', | ||
r'\\\C\test\\', | ||
r'C:\foo\bar\spam.foo'] | ||
for path in paths: | ||
self.assertEqual(fn(nturl2path.pathname2url(path)), path) | ||
|
||
|
||
if __name__ == '__main__': | ||
unittest.main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters