diff --git a/Doc/library/os.path.rst b/Doc/library/os.path.rst index c201b1460ede30..7e2ae79016beba 100644 --- a/Doc/library/os.path.rst +++ b/Doc/library/os.path.rst @@ -469,7 +469,7 @@ the :mod:`glob` module.) ("c:", "/dir") If the path contains a UNC path, drive will contain the host name - and share, up to but not including the fourth separator:: + and share:: >>> splitdrive("//host/computer/dir") ("//host/computer", "/dir") diff --git a/Lib/ntpath.py b/Lib/ntpath.py index 041ebc75cb127c..955539ae458b33 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -146,17 +146,23 @@ def splitdrive(p): sep = b'\\' altsep = b'/' colon = b':' + unc_prefix = b'\\\\?\\UNC' else: sep = '\\' altsep = '/' colon = ':' + unc_prefix = '\\\\?\\UNC' normp = p.replace(altsep, sep) if (normp[0:2] == sep*2) and (normp[2:3] != sep): # is a UNC path: # vvvvvvvvvvvvvvvvvvvv drive letter or UNC path # \\machine\mountpoint\directory\etc\... # directory ^^^^^^^^^^^^^^^ - index = normp.find(sep, 2) + if normp[:8].upper().rstrip(sep) == unc_prefix: + start = 8 + else: + start = 2 + index = normp.find(sep, start) if index == -1: return p[:0], p index2 = normp.find(sep, index + 1) diff --git a/Lib/pathlib.py b/Lib/pathlib.py index 4763ab54f6ba81..dd7b2dea1a6ae5 100644 --- a/Lib/pathlib.py +++ b/Lib/pathlib.py @@ -120,68 +120,18 @@ class _WindowsFlavour(_Flavour): is_supported = (os.name == 'nt') - drive_letters = set('abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ') - ext_namespace_prefix = '\\\\?\\' - reserved_names = ( {'CON', 'PRN', 'AUX', 'NUL', 'CONIN$', 'CONOUT$'} | {'COM%s' % c for c in '123456789\xb9\xb2\xb3'} | {'LPT%s' % c for c in '123456789\xb9\xb2\xb3'} ) - # Interesting findings about extended paths: - # * '\\?\c:\a' is an extended path, which bypasses normal Windows API - # path processing. Thus relative paths are not resolved and slash is not - # translated to backslash. It has the native NT path limit of 32767 - # characters, but a bit less after resolving device symbolic links, - # such as '\??\C:' => '\Device\HarddiskVolume2'. - # * '\\?\c:/a' looks for a device named 'C:/a' because slash is a - # regular name character in the object namespace. - # * '\\?\c:\foo/bar' is invalid because '/' is illegal in NT filesystems. - # The only path separator at the filesystem level is backslash. - # * '//?/c:\a' and '//?/c:/a' are effectively equivalent to '\\.\c:\a' and - # thus limited to MAX_PATH. - # * Prior to Windows 8, ANSI API bytes paths are limited to MAX_PATH, - # even with the '\\?\' prefix. - def splitroot(self, part, sep=sep): - first = part[0:1] - second = part[1:2] - if (second == sep and first == sep): - # XXX extended paths should also disable the collapsing of "." - # components (according to MSDN docs). - prefix, part = self._split_extended_path(part) - first = part[0:1] - second = part[1:2] + drv, rest = self.pathmod.splitdrive(part) + if drv[:1] == sep or rest[:1] == sep: + return drv, sep, rest.lstrip(sep) else: - prefix = '' - third = part[2:3] - if (second == sep and first == sep and third != sep): - # is a UNC path: - # vvvvvvvvvvvvvvvvvvvvv root - # \\machine\mountpoint\directory\etc\... - # directory ^^^^^^^^^^^^^^ - index = part.find(sep, 2) - if index != -1: - index2 = part.find(sep, index + 1) - # a UNC path can't have two slashes in a row - # (after the initial two) - if index2 != index + 1: - if index2 == -1: - index2 = len(part) - if prefix: - return prefix + part[1:index2], sep, part[index2+1:] - else: - return part[:index2], sep, part[index2+1:] - drv = root = '' - if second == ':' and first in self.drive_letters: - drv = part[:2] - part = part[2:] - first = third - if first == sep: - root = first - part = part.lstrip(sep) - return prefix + drv, root, part + return drv, '', rest def casefold(self, s): return s.lower() @@ -192,16 +142,6 @@ def casefold_parts(self, parts): def compile_pattern(self, pattern): return re.compile(fnmatch.translate(pattern), re.IGNORECASE).fullmatch - def _split_extended_path(self, s, ext_prefix=ext_namespace_prefix): - prefix = '' - if s.startswith(ext_prefix): - prefix = s[:4] - s = s[4:] - if s.startswith('UNC\\'): - prefix += s[:3] - s = '\\' + s[3:] - return prefix, s - def is_reserved(self, parts): # NOTE: the rules for reserved names seem somewhat complicated # (e.g. r"..\NUL" is reserved but not r"foo\NUL" if "foo" does not diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py index 7211ed861762b4..d50984d5fb153f 100644 --- a/Lib/test/test_ntpath.py +++ b/Lib/test/test_ntpath.py @@ -117,6 +117,31 @@ def test_splitdrive(self): # Issue #19911: UNC part containing U+0130 self.assertEqual(ntpath.splitdrive('//conky/MOUNTPOİNT/foo/bar'), ('//conky/MOUNTPOİNT', '/foo/bar')) + # gh-81790: support device namespace, including UNC drives. + tester('ntpath.splitdrive("//?/c:")', ("//?/c:", "")) + tester('ntpath.splitdrive("//?/c:/")', ("//?/c:", "/")) + tester('ntpath.splitdrive("//?/c:/dir")', ("//?/c:", "/dir")) + tester('ntpath.splitdrive("//?/UNC")', ("", "//?/UNC")) + tester('ntpath.splitdrive("//?/UNC/")', ("", "//?/UNC/")) + tester('ntpath.splitdrive("//?/UNC/server/")', ("//?/UNC/server/", "")) + tester('ntpath.splitdrive("//?/UNC/server/share")', ("//?/UNC/server/share", "")) + tester('ntpath.splitdrive("//?/UNC/server/share/dir")', ("//?/UNC/server/share", "/dir")) + tester('ntpath.splitdrive("//?/VOLUME{00000000-0000-0000-0000-000000000000}/spam")', + ('//?/VOLUME{00000000-0000-0000-0000-000000000000}', '/spam')) + tester('ntpath.splitdrive("//?/BootPartition/")', ("//?/BootPartition", "/")) + + tester('ntpath.splitdrive("\\\\?\\c:")', ("\\\\?\\c:", "")) + tester('ntpath.splitdrive("\\\\?\\c:\\")', ("\\\\?\\c:", "\\")) + tester('ntpath.splitdrive("\\\\?\\c:\\dir")', ("\\\\?\\c:", "\\dir")) + tester('ntpath.splitdrive("\\\\?\\UNC")', ("", "\\\\?\\UNC")) + tester('ntpath.splitdrive("\\\\?\\UNC\\")', ("", "\\\\?\\UNC\\")) + tester('ntpath.splitdrive("\\\\?\\UNC\\server\\")', ("\\\\?\\UNC\\server\\", "")) + tester('ntpath.splitdrive("\\\\?\\UNC\\server\\share")', ("\\\\?\\UNC\\server\\share", "")) + tester('ntpath.splitdrive("\\\\?\\UNC\\server\\share\\dir")', + ("\\\\?\\UNC\\server\\share", "\\dir")) + tester('ntpath.splitdrive("\\\\?\\VOLUME{00000000-0000-0000-0000-000000000000}\\spam")', + ('\\\\?\\VOLUME{00000000-0000-0000-0000-000000000000}', '\\spam')) + tester('ntpath.splitdrive("\\\\?\\BootPartition\\")', ("\\\\?\\BootPartition", "\\")) def test_split(self): tester('ntpath.split("c:\\foo\\bar")', ('c:\\foo', 'bar')) diff --git a/Misc/NEWS.d/next/Library/2022-04-24-22-26-45.gh-issue-81790.M5Rvpm.rst b/Misc/NEWS.d/next/Library/2022-04-24-22-26-45.gh-issue-81790.M5Rvpm.rst new file mode 100644 index 00000000000000..8894493e97410f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2022-04-24-22-26-45.gh-issue-81790.M5Rvpm.rst @@ -0,0 +1,2 @@ +:func:`os.path.splitdrive` now understands DOS device paths with UNC +links (beginning ``\\?\UNC\``). Contributed by Barney Gale.