From c397a23922d7d2e7beabee4d834689cfebe1b2b8 Mon Sep 17 00:00:00 2001 From: nineteendo Date: Fri, 19 Apr 2024 10:47:11 +0200 Subject: [PATCH 01/28] Add C implementation of `ntpath.splitroot()` --- Include/internal/pycore_fileutils.h | 4 + Lib/ntpath.py | 117 ++++++++++++++++------------ Modules/clinic/posixmodule.c.h | 38 ++++++++- Modules/posixmodule.c | 29 +++++++ Python/fileutils.c | 57 ++++++++++++++ 5 files changed, 195 insertions(+), 50 deletions(-) diff --git a/Include/internal/pycore_fileutils.h b/Include/internal/pycore_fileutils.h index 5c55282fa39e6f..99cb29a6781bcd 100644 --- a/Include/internal/pycore_fileutils.h +++ b/Include/internal/pycore_fileutils.h @@ -290,6 +290,10 @@ extern wchar_t *_Py_normpath_and_size(wchar_t *path, Py_ssize_t size, Py_ssize_t extern HRESULT PathCchSkipRoot(const wchar_t *pszPath, const wchar_t **ppszRootEnd); #endif /* defined(MS_WINDOWS_GAMES) && !defined(MS_WINDOWS_DESKTOP) */ +#ifdef MS_WINDOWS +extern void _Py_skiproot(wchar_t *path, Py_ssize_t size, Py_ssize_t *drvsize, Py_ssize_t *rootsize); +#endif + // Macros to protect CRT calls against instant termination when passed an // invalid parameter (bpo-23524). IPH stands for Invalid Parameter Handler. // Usage: diff --git a/Lib/ntpath.py b/Lib/ntpath.py index aba18bfe407abf..1f00f0118b4070 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -166,57 +166,76 @@ def splitdrive(p): drive, root, tail = splitroot(p) return drive, root + tail - -def splitroot(p): - """Split a pathname into drive, root and tail. The drive is defined - exactly as in splitdrive(). On Windows, the root may be a single path - separator or an empty string. The tail contains anything after the root. - For example: - - splitroot('//server/share/') == ('//server/share', '/', '') - splitroot('C:/Users/Barney') == ('C:', '/', 'Users/Barney') - splitroot('C:///spam///ham') == ('C:', '/', '//spam///ham') - splitroot('Windows/notepad') == ('', '', 'Windows/notepad') - """ - p = os.fspath(p) - if isinstance(p, bytes): - sep = b'\\' - altsep = b'/' - colon = b':' - unc_prefix = b'\\\\?\\UNC\\' - empty = b'' - else: - sep = '\\' - altsep = '/' - colon = ':' - unc_prefix = '\\\\?\\UNC\\' - empty = '' - normp = p.replace(altsep, sep) - if normp[:1] == sep: - if normp[1:2] == sep: - # UNC drives, e.g. \\server\share or \\?\UNC\server\share - # Device drives, e.g. \\.\device or \\?\device - start = 8 if normp[:8].upper() == unc_prefix else 2 - index = normp.find(sep, start) - if index == -1: - return p, empty, empty - index2 = normp.find(sep, index + 1) - if index2 == -1: - return p, empty, empty - return p[:index2], p[index2:index2 + 1], p[index2 + 1:] +try: + from nt import _path_splitroot_ex +except ImportError: + def splitroot(p): + """Split a pathname into drive, root and tail. The drive is defined + exactly as in splitdrive(). On Windows, the root may be a single path + separator or an empty string. The tail contains anything after the root. + For example: + + splitroot('//server/share/') == ('//server/share', '/', '') + splitroot('C:/Users/Barney') == ('C:', '/', 'Users/Barney') + splitroot('C:///spam///ham') == ('C:', '/', '//spam///ham') + splitroot('Windows/notepad') == ('', '', 'Windows/notepad') + """ + p = os.fspath(p) + if isinstance(p, bytes): + sep = b'\\' + altsep = b'/' + colon = b':' + unc_prefix = b'\\\\?\\UNC\\' + empty = b'' else: - # Relative path with root, e.g. \Windows - return empty, p[:1], p[1:] - elif normp[1:2] == colon: - if normp[2:3] == sep: - # Absolute drive-letter path, e.g. X:\Windows - return p[:2], p[2:3], p[3:] + sep = '\\' + altsep = '/' + colon = ':' + unc_prefix = '\\\\?\\UNC\\' + empty = '' + normp = p.replace(altsep, sep) + if normp[:1] == sep: + if normp[1:2] == sep: + # UNC drives, e.g. \\server\share or \\?\UNC\server\share + # Device drives, e.g. \\.\device or \\?\device + start = 8 if normp[:8].upper() == unc_prefix else 2 + index = normp.find(sep, start) + if index == -1: + return p, empty, empty + index2 = normp.find(sep, index + 1) + if index2 == -1: + return p, empty, empty + return p[:index2], p[index2:index2 + 1], p[index2 + 1:] + else: + # Relative path with root, e.g. \Windows + return empty, p[:1], p[1:] + elif normp[1:2] == colon: + if normp[2:3] == sep: + # Absolute drive-letter path, e.g. X:\Windows + return p[:2], p[2:3], p[3:] + else: + # Relative path with drive, e.g. X:Windows + return p[:2], empty, p[2:] else: - # Relative path with drive, e.g. X:Windows - return p[:2], empty, p[2:] - else: - # Relative path, e.g. Windows - return empty, empty, p + # Relative path, e.g. Windows + return empty, empty, p +else: + def splitroot(p): + """Split a pathname into drive, root and tail. The drive is defined + exactly as in splitdrive(). On Windows, the root may be a single path + separator or an empty string. The tail contains anything after the root. + For example: + + splitroot('//server/share/') == ('//server/share', '/', '') + splitroot('C:/Users/Barney') == ('C:', '/', 'Users/Barney') + splitroot('C:///spam///ham') == ('C:', '/', '//spam///ham') + splitroot('Windows/notepad') == ('', '', 'Windows/notepad') + """ + p = os.fspath(p) + if isinstance(p, bytes): + drive, root, tail = _path_splitroot_ex(os.fsdecode(p)) + return os.fsencode(drive), os.fsencode(root), os.fsencode(tail) + return _path_splitroot_ex(p) # Split a path in head (everything up to the last '/') and tail (the diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index 0398629e3c10ce..56f14cbc573aaa 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -2012,6 +2012,38 @@ os__path_splitroot(PyObject *module, PyObject *const *args, Py_ssize_t nargs, Py #endif /* defined(MS_WINDOWS) */ +#if defined(MS_WINDOWS) && defined(MS_WINDOWS) + +PyDoc_STRVAR(os__path_splitroot_ex__doc__, +"_path_splitroot_ex($module, path, /)\n" +"--\n" +"\n"); + +#define OS__PATH_SPLITROOT_EX_METHODDEF \ + {"_path_splitroot_ex", (PyCFunction)os__path_splitroot_ex, METH_O, os__path_splitroot_ex__doc__}, + +static PyObject * +os__path_splitroot_ex_impl(PyObject *module, PyObject *path); + +static PyObject * +os__path_splitroot_ex(PyObject *module, PyObject *arg) +{ + PyObject *return_value = NULL; + PyObject *path; + + if (!PyUnicode_Check(arg)) { + _PyArg_BadArgument("_path_splitroot_ex", "argument", "str", arg); + goto exit; + } + path = arg; + return_value = os__path_splitroot_ex_impl(module, path); + +exit: + return return_value; +} + +#endif /* defined(MS_WINDOWS) && defined(MS_WINDOWS) */ + #if defined(MS_WINDOWS) PyDoc_STRVAR(os__path_isdir__doc__, @@ -12039,6 +12071,10 @@ os__supports_virtual_terminal(PyObject *module, PyObject *Py_UNUSED(ignored)) #define OS__PATH_SPLITROOT_METHODDEF #endif /* !defined(OS__PATH_SPLITROOT_METHODDEF) */ +#ifndef OS__PATH_SPLITROOT_EX_METHODDEF + #define OS__PATH_SPLITROOT_EX_METHODDEF +#endif /* !defined(OS__PATH_SPLITROOT_EX_METHODDEF) */ + #ifndef OS__PATH_ISDIR_METHODDEF #define OS__PATH_ISDIR_METHODDEF #endif /* !defined(OS__PATH_ISDIR_METHODDEF) */ @@ -12602,4 +12638,4 @@ os__supports_virtual_terminal(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF #define OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF #endif /* !defined(OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF) */ -/*[clinic end generated code: output=511f0788a6b90db0 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=d009cc9854b11b18 input=a9049054013a1b77]*/ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 5e54cf64cd563e..a42c3770b7cbbe 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -5085,6 +5085,34 @@ os__path_splitroot_impl(PyObject *module, path_t *path) return result; } +#ifdef MS_WINDOWS +/*[clinic input] +os._path_splitroot_ex + + path: unicode + / + +[clinic start generated code]*/ + +static PyObject * +os__path_splitroot_ex_impl(PyObject *module, PyObject *path) +/*[clinic end generated code: output=de97403d3dfebc40 input=bebce42edb41f967]*/ +{ + Py_ssize_t len; + wchar_t *buffer = PyUnicode_AsWideCharString(path, &len); + Py_ssize_t drvsize; + Py_ssize_t rootsize; + _Py_skiproot(buffer, len, &drvsize, &rootsize); + wchar_t *p = buffer; + PyObject *drv = PyUnicode_FromWideChar(p, drvsize); + p += drvsize; + PyObject *root = PyUnicode_FromWideChar(p, rootsize); + p += rootsize; + PyObject *tail = PyUnicode_FromWideChar(p, len - drvsize - rootsize); + PyMem_Free(buffer); + return Py_BuildValue("(OOO)", drv, root, tail); +} +#endif /*[clinic input] os._path_isdir @@ -16799,6 +16827,7 @@ static PyMethodDef posix_methods[] = { OS__FINDFIRSTFILE_METHODDEF OS__GETVOLUMEPATHNAME_METHODDEF OS__PATH_SPLITROOT_METHODDEF + OS__PATH_SPLITROOT_EX_METHODDEF OS__PATH_NORMPATH_METHODDEF OS_GETLOADAVG_METHODDEF OS_URANDOM_METHODDEF diff --git a/Python/fileutils.c b/Python/fileutils.c index 882d3299575cf3..19e7547e393150 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -2295,6 +2295,63 @@ PathCchCombineEx(wchar_t *buffer, size_t bufsize, const wchar_t *dirname, #endif /* defined(MS_WINDOWS_GAMES) && !defined(MS_WINDOWS_DESKTOP) */ +#ifdef MS_WINDOWS +void +_Py_skiproot(wchar_t *path, Py_ssize_t size, Py_ssize_t *drvsize, Py_ssize_t *rootsize) +{ + wchar_t *pEnd = size >= 0 ? &path[size] : NULL; +#define IS_END(x) (pEnd ? (x) == pEnd : !*(x)) +#define IS_SEP(x) (*(x) == SEP || *(x) == ALTSEP) + *drvsize = 0; + *rootsize = 0; + if (IS_SEP(&path[0])) { + if (IS_SEP(&path[1])) { + // Device drives, e.g. \\.\device or \\?\device + // UNC drives, e.g. \\server\share or \\?\UNC\server\share + Py_ssize_t idx; + if (path[2] == L'?' && IS_SEP(&path[3]) && (path[4] == L'U' || path[4] == L'u') && + (path[5] == L'N' || path[5] == L'n') && (path[6] == L'C' || path[6] == L'c') && + IS_SEP(&path[7])) { + idx = 8; + } + else { + idx = 2; + } + while (!IS_END(&path[idx]) && !IS_SEP(&path[idx])) { + idx++; + } + if (IS_END(&path[idx])) { + *drvsize = idx; + } + else { + idx++; + while (!IS_END(&path[idx]) && !IS_SEP(&path[idx])) { + idx++; + } + *drvsize = idx; + if (IS_SEP(&path[idx])) { + *rootsize = 1; + } + } + } + else { + // Relative path with root, e.g. \Windows + *rootsize = 1; + } + } + else if (!IS_END(&path[0]) && path[1] == L':') + { + *drvsize = 2; + if (IS_SEP(&path[2])) { + // Absolute drive-letter path, e.g. X:\Windows + *rootsize = 1; + } + } +#undef IS_SEP +#undef IS_END +} +#endif + // The caller must ensure "buffer" is big enough. static int join_relfile(wchar_t *buffer, size_t bufsize, From 4bd9734edff71ce64ed9bf7d2803bd194993807b Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Fri, 19 Apr 2024 08:50:49 +0000 Subject: [PATCH 02/28] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20b?= =?UTF-8?q?lurb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../2024-04-19-08-50-48.gh-issue-102511.qDEB66.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Core and Builtins/2024-04-19-08-50-48.gh-issue-102511.qDEB66.rst diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-04-19-08-50-48.gh-issue-102511.qDEB66.rst b/Misc/NEWS.d/next/Core and Builtins/2024-04-19-08-50-48.gh-issue-102511.qDEB66.rst new file mode 100644 index 00000000000000..0f169d6ba7e06a --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2024-04-19-08-50-48.gh-issue-102511.qDEB66.rst @@ -0,0 +1 @@ +Speedup :func:`os.path.splitroot` on Windows. From 0177f704c87bbc8903abacd55a2a7226c1cae7dc Mon Sep 17 00:00:00 2001 From: nineteendo Date: Fri, 19 Apr 2024 14:11:47 +0200 Subject: [PATCH 03/28] Follow Pep 7 --- Python/fileutils.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Python/fileutils.c b/Python/fileutils.c index 19e7547e393150..8dd6799f79a0d3 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -2311,7 +2311,8 @@ _Py_skiproot(wchar_t *path, Py_ssize_t size, Py_ssize_t *drvsize, Py_ssize_t *ro Py_ssize_t idx; if (path[2] == L'?' && IS_SEP(&path[3]) && (path[4] == L'U' || path[4] == L'u') && (path[5] == L'N' || path[5] == L'n') && (path[6] == L'C' || path[6] == L'c') && - IS_SEP(&path[7])) { + IS_SEP(&path[7])) + { idx = 8; } else { @@ -2339,8 +2340,7 @@ _Py_skiproot(wchar_t *path, Py_ssize_t size, Py_ssize_t *drvsize, Py_ssize_t *ro *rootsize = 1; } } - else if (!IS_END(&path[0]) && path[1] == L':') - { + else if (!IS_END(&path[0]) && path[1] == L':') { *drvsize = 2; if (IS_SEP(&path[2])) { // Absolute drive-letter path, e.g. X:\Windows From bb816e9a536d2956bebf63c79c94241df66520b0 Mon Sep 17 00:00:00 2001 From: nineteendo Date: Fri, 19 Apr 2024 20:59:11 +0200 Subject: [PATCH 04/28] Fix memory leak --- Modules/posixmodule.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index a42c3770b7cbbe..00ad6eef9de828 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -5110,7 +5110,11 @@ os__path_splitroot_ex_impl(PyObject *module, PyObject *path) p += rootsize; PyObject *tail = PyUnicode_FromWideChar(p, len - drvsize - rootsize); PyMem_Free(buffer); - return Py_BuildValue("(OOO)", drv, root, tail); + PyObject *result = Py_BuildValue("(OOO)", drv, root, tail); + Py_DECREF(drv); + Py_DECREF(root); + Py_DECREF(tail); + return result; } #endif From d00131abd020244102429c7b145df313d0ada51a Mon Sep 17 00:00:00 2001 From: nineteendo Date: Sat, 20 Apr 2024 14:02:14 +0200 Subject: [PATCH 05/28] Add C implementation of `posixpath.splitroot()` --- Include/internal/pycore_fileutils.h | 2 - Lib/posixpath.py | 73 ++++++++++++------- ...-04-19-08-50-48.gh-issue-102511.qDEB66.rst | 2 +- Modules/clinic/posixmodule.c.h | 66 ++++++++--------- Modules/posixmodule.c | 65 ++++++++--------- Python/fileutils.c | 45 ++++++++++-- 6 files changed, 146 insertions(+), 107 deletions(-) diff --git a/Include/internal/pycore_fileutils.h b/Include/internal/pycore_fileutils.h index 99cb29a6781bcd..1424248ac71303 100644 --- a/Include/internal/pycore_fileutils.h +++ b/Include/internal/pycore_fileutils.h @@ -290,9 +290,7 @@ extern wchar_t *_Py_normpath_and_size(wchar_t *path, Py_ssize_t size, Py_ssize_t extern HRESULT PathCchSkipRoot(const wchar_t *pszPath, const wchar_t **ppszRootEnd); #endif /* defined(MS_WINDOWS_GAMES) && !defined(MS_WINDOWS_DESKTOP) */ -#ifdef MS_WINDOWS extern void _Py_skiproot(wchar_t *path, Py_ssize_t size, Py_ssize_t *drvsize, Py_ssize_t *rootsize); -#endif // Macros to protect CRT calls against instant termination when passed an // invalid parameter (bpo-23524). IPH stands for Invalid Parameter Handler. diff --git a/Lib/posixpath.py b/Lib/posixpath.py index f1960ddb88e590..b133c628d7f854 100644 --- a/Lib/posixpath.py +++ b/Lib/posixpath.py @@ -134,33 +134,52 @@ def splitdrive(p): return p[:0], p -def splitroot(p): - """Split a pathname into drive, root and tail. On Posix, drive is always - empty; the root may be empty, a single slash, or two slashes. The tail - contains anything after the root. For example: - - splitroot('foo/bar') == ('', '', 'foo/bar') - splitroot('/foo/bar') == ('', '/', 'foo/bar') - splitroot('//foo/bar') == ('', '//', 'foo/bar') - splitroot('///foo/bar') == ('', '/', '//foo/bar') - """ - p = os.fspath(p) - if isinstance(p, bytes): - sep = b'/' - empty = b'' - else: - sep = '/' - empty = '' - if p[:1] != sep: - # Relative path, e.g.: 'foo' - return empty, empty, p - elif p[1:2] != sep or p[2:3] == sep: - # Absolute path, e.g.: '/foo', '///foo', '////foo', etc. - return empty, sep, p[1:] - else: - # Precisely two leading slashes, e.g.: '//foo'. Implementation defined per POSIX, see - # https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13 - return empty, p[:2], p[2:] +try: + from posix import _path_splitroot_ex +except ImportError: + def splitroot(p): + """Split a pathname into drive, root and tail. On Posix, drive is always + empty; the root may be empty, a single slash, or two slashes. The tail + contains anything after the root. For example: + + splitroot('foo/bar') == ('', '', 'foo/bar') + splitroot('/foo/bar') == ('', '/', 'foo/bar') + splitroot('//foo/bar') == ('', '//', 'foo/bar') + splitroot('///foo/bar') == ('', '/', '//foo/bar') + """ + p = os.fspath(p) + if isinstance(p, bytes): + sep = b'/' + empty = b'' + else: + sep = '/' + empty = '' + if p[:1] != sep: + # Relative path, e.g.: 'foo' + return empty, empty, p + elif p[1:2] != sep or p[2:3] == sep: + # Absolute path, e.g.: '/foo', '///foo', '////foo', etc. + return empty, sep, p[1:] + else: + # Precisely two leading slashes, e.g.: '//foo'. Implementation defined per POSIX, see + # https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13 + return empty, p[:2], p[2:] +else: + def splitroot(p): + """Split a pathname into drive, root and tail. On Posix, drive is always + empty; the root may be empty, a single slash, or two slashes. The tail + contains anything after the root. For example: + + splitroot('foo/bar') == ('', '', 'foo/bar') + splitroot('/foo/bar') == ('', '/', 'foo/bar') + splitroot('//foo/bar') == ('', '//', 'foo/bar') + splitroot('///foo/bar') == ('', '/', '//foo/bar') + """ + p = os.fspath(p) + if isinstance(p, bytes): + _, root, tail = _path_splitroot_ex(os.fsdecode(p)) + return b'', os.fsencode(root), os.fsencode(tail) + return _path_splitroot_ex(p) # Return the tail (basename) part of a path, same as split(path)[1]. diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-04-19-08-50-48.gh-issue-102511.qDEB66.rst b/Misc/NEWS.d/next/Core and Builtins/2024-04-19-08-50-48.gh-issue-102511.qDEB66.rst index 0f169d6ba7e06a..e10ef4dd083a56 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2024-04-19-08-50-48.gh-issue-102511.qDEB66.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2024-04-19-08-50-48.gh-issue-102511.qDEB66.rst @@ -1 +1 @@ -Speedup :func:`os.path.splitroot` on Windows. +Speedup :func:`os.path.splitroot`. diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index 56f14cbc573aaa..c021abe0f1ba0f 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -2012,38 +2012,6 @@ os__path_splitroot(PyObject *module, PyObject *const *args, Py_ssize_t nargs, Py #endif /* defined(MS_WINDOWS) */ -#if defined(MS_WINDOWS) && defined(MS_WINDOWS) - -PyDoc_STRVAR(os__path_splitroot_ex__doc__, -"_path_splitroot_ex($module, path, /)\n" -"--\n" -"\n"); - -#define OS__PATH_SPLITROOT_EX_METHODDEF \ - {"_path_splitroot_ex", (PyCFunction)os__path_splitroot_ex, METH_O, os__path_splitroot_ex__doc__}, - -static PyObject * -os__path_splitroot_ex_impl(PyObject *module, PyObject *path); - -static PyObject * -os__path_splitroot_ex(PyObject *module, PyObject *arg) -{ - PyObject *return_value = NULL; - PyObject *path; - - if (!PyUnicode_Check(arg)) { - _PyArg_BadArgument("_path_splitroot_ex", "argument", "str", arg); - goto exit; - } - path = arg; - return_value = os__path_splitroot_ex_impl(module, path); - -exit: - return return_value; -} - -#endif /* defined(MS_WINDOWS) && defined(MS_WINDOWS) */ - #if defined(MS_WINDOWS) PyDoc_STRVAR(os__path_isdir__doc__, @@ -2280,6 +2248,34 @@ os__path_islink(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj #endif /* defined(MS_WINDOWS) */ +PyDoc_STRVAR(os__path_splitroot_ex__doc__, +"_path_splitroot_ex($module, path, /)\n" +"--\n" +"\n"); + +#define OS__PATH_SPLITROOT_EX_METHODDEF \ + {"_path_splitroot_ex", (PyCFunction)os__path_splitroot_ex, METH_O, os__path_splitroot_ex__doc__}, + +static PyObject * +os__path_splitroot_ex_impl(PyObject *module, PyObject *path); + +static PyObject * +os__path_splitroot_ex(PyObject *module, PyObject *arg) +{ + PyObject *return_value = NULL; + PyObject *path; + + if (!PyUnicode_Check(arg)) { + _PyArg_BadArgument("_path_splitroot_ex", "argument", "str", arg); + goto exit; + } + path = arg; + return_value = os__path_splitroot_ex_impl(module, path); + +exit: + return return_value; +} + PyDoc_STRVAR(os__path_normpath__doc__, "_path_normpath($module, /, path)\n" "--\n" @@ -12071,10 +12067,6 @@ os__supports_virtual_terminal(PyObject *module, PyObject *Py_UNUSED(ignored)) #define OS__PATH_SPLITROOT_METHODDEF #endif /* !defined(OS__PATH_SPLITROOT_METHODDEF) */ -#ifndef OS__PATH_SPLITROOT_EX_METHODDEF - #define OS__PATH_SPLITROOT_EX_METHODDEF -#endif /* !defined(OS__PATH_SPLITROOT_EX_METHODDEF) */ - #ifndef OS__PATH_ISDIR_METHODDEF #define OS__PATH_ISDIR_METHODDEF #endif /* !defined(OS__PATH_ISDIR_METHODDEF) */ @@ -12638,4 +12630,4 @@ os__supports_virtual_terminal(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF #define OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF #endif /* !defined(OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF) */ -/*[clinic end generated code: output=d009cc9854b11b18 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=054328aa869f1582 input=a9049054013a1b77]*/ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 00ad6eef9de828..b0c982ee694419 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -5085,39 +5085,6 @@ os__path_splitroot_impl(PyObject *module, path_t *path) return result; } -#ifdef MS_WINDOWS -/*[clinic input] -os._path_splitroot_ex - - path: unicode - / - -[clinic start generated code]*/ - -static PyObject * -os__path_splitroot_ex_impl(PyObject *module, PyObject *path) -/*[clinic end generated code: output=de97403d3dfebc40 input=bebce42edb41f967]*/ -{ - Py_ssize_t len; - wchar_t *buffer = PyUnicode_AsWideCharString(path, &len); - Py_ssize_t drvsize; - Py_ssize_t rootsize; - _Py_skiproot(buffer, len, &drvsize, &rootsize); - wchar_t *p = buffer; - PyObject *drv = PyUnicode_FromWideChar(p, drvsize); - p += drvsize; - PyObject *root = PyUnicode_FromWideChar(p, rootsize); - p += rootsize; - PyObject *tail = PyUnicode_FromWideChar(p, len - drvsize - rootsize); - PyMem_Free(buffer); - PyObject *result = Py_BuildValue("(OOO)", drv, root, tail); - Py_DECREF(drv); - Py_DECREF(root); - Py_DECREF(tail); - return result; -} -#endif - /*[clinic input] os._path_isdir @@ -5499,6 +5466,38 @@ os__path_islink_impl(PyObject *module, PyObject *path) #endif /* MS_WINDOWS */ +/*[clinic input] +os._path_splitroot_ex + + path: unicode + / + +[clinic start generated code]*/ + +static PyObject * +os__path_splitroot_ex_impl(PyObject *module, PyObject *path) +/*[clinic end generated code: output=de97403d3dfebc40 input=bebce42edb41f967]*/ +{ + Py_ssize_t len; + wchar_t *buffer = PyUnicode_AsWideCharString(path, &len); + Py_ssize_t drvsize; + Py_ssize_t rootsize; + _Py_skiproot(buffer, len, &drvsize, &rootsize); + wchar_t *p = buffer; + PyObject *drv = PyUnicode_FromWideChar(p, drvsize); + p += drvsize; + PyObject *root = PyUnicode_FromWideChar(p, rootsize); + p += rootsize; + PyObject *tail = PyUnicode_FromWideChar(p, len - drvsize - rootsize); + PyMem_Free(buffer); + PyObject *result = Py_BuildValue("(OOO)", drv, root, tail); + Py_DECREF(drv); + Py_DECREF(root); + Py_DECREF(tail); + return result; +} + + /*[clinic input] os._path_normpath diff --git a/Python/fileutils.c b/Python/fileutils.c index 8dd6799f79a0d3..e8721d0c7c75be 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -2295,15 +2295,31 @@ PathCchCombineEx(wchar_t *buffer, size_t bufsize, const wchar_t *dirname, #endif /* defined(MS_WINDOWS_GAMES) && !defined(MS_WINDOWS_DESKTOP) */ -#ifdef MS_WINDOWS void _Py_skiproot(wchar_t *path, Py_ssize_t size, Py_ssize_t *drvsize, Py_ssize_t *rootsize) { +#ifndef MS_WINDOWS +#define IS_SEP(x) (*(x) == SEP) + *drvsize = 0; + if (!IS_SEP(&path[0])) { + // Relative path, e.g.: 'foo' + *rootsize = 0; + } + else if (!IS_SEP(&path[1]) || IS_SEP(&path[2])) { + // Absolute path, e.g.: '/foo', '///foo', '////foo', etc. + *rootsize = 1; + } + else { + // Precisely two leading slashes, e.g.: '//foo'. Implementation defined per POSIX, see + // https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13 + *rootsize = 2; + } +#undef IS_SEP +#else wchar_t *pEnd = size >= 0 ? &path[size] : NULL; #define IS_END(x) (pEnd ? (x) == pEnd : !*(x)) #define IS_SEP(x) (*(x) == SEP || *(x) == ALTSEP) - *drvsize = 0; - *rootsize = 0; +#define SEP_OR_END(x) (IS_SEP(x) || IS_END(x)) if (IS_SEP(&path[0])) { if (IS_SEP(&path[1])) { // Device drives, e.g. \\.\device or \\?\device @@ -2318,25 +2334,30 @@ _Py_skiproot(wchar_t *path, Py_ssize_t size, Py_ssize_t *drvsize, Py_ssize_t *ro else { idx = 2; } - while (!IS_END(&path[idx]) && !IS_SEP(&path[idx])) { + while (!SEP_OR_END(&path[idx])) { idx++; } if (IS_END(&path[idx])) { *drvsize = idx; + *rootsize = 0; } else { idx++; - while (!IS_END(&path[idx]) && !IS_SEP(&path[idx])) { + while (!SEP_OR_END(&path[idx])) { idx++; } *drvsize = idx; - if (IS_SEP(&path[idx])) { + if (IS_END(&path[idx])) { + *rootsize = 0; + } + else { *rootsize = 1; } } } else { // Relative path with root, e.g. \Windows + *drvsize = 0; *rootsize = 1; } } @@ -2346,11 +2367,21 @@ _Py_skiproot(wchar_t *path, Py_ssize_t size, Py_ssize_t *drvsize, Py_ssize_t *ro // Absolute drive-letter path, e.g. X:\Windows *rootsize = 1; } + else { + // Relative path with drive, e.g. X:Windows + *rootsize = 0; + } } + else { + // Relative path, e.g. Windows + *drvsize = 0; + *rootsize = 0; + } +#undef SEP_OR_END #undef IS_SEP #undef IS_END -} #endif +} // The caller must ensure "buffer" is big enough. static int From a4e5d15557088407e1df49cc57910a73ca75e761 Mon Sep 17 00:00:00 2001 From: nineteendo Date: Sat, 20 Apr 2024 14:11:08 +0200 Subject: [PATCH 06/28] Revert newlines --- Lib/ntpath.py | 1 + Modules/posixmodule.c | 1 + 2 files changed, 2 insertions(+) diff --git a/Lib/ntpath.py b/Lib/ntpath.py index 1f00f0118b4070..e810b655e5ac85 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -166,6 +166,7 @@ def splitdrive(p): drive, root, tail = splitroot(p) return drive, root + tail + try: from nt import _path_splitroot_ex except ImportError: diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index b0c982ee694419..364fed3e374049 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -5085,6 +5085,7 @@ os__path_splitroot_impl(PyObject *module, path_t *path) return result; } + /*[clinic input] os._path_isdir From 7e1433c58e2d93d5b164d4f853eec27e9e3a3ab5 Mon Sep 17 00:00:00 2001 From: nineteendo Date: Sat, 20 Apr 2024 16:38:25 +0200 Subject: [PATCH 07/28] Use `_Py_splitroot()` --- Lib/test/test_ntpath.py | 1 + Modules/posixmodule.c | 13 +++------- Python/fileutils.c | 54 +++++++++++++++++------------------------ 3 files changed, 27 insertions(+), 41 deletions(-) diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py index 31156130fcc747..7f91bf1c2b837a 100644 --- a/Lib/test/test_ntpath.py +++ b/Lib/test/test_ntpath.py @@ -374,6 +374,7 @@ def test_normpath(self): tester("ntpath.normpath('\\\\foo\\')", '\\\\foo\\') tester("ntpath.normpath('\\\\foo')", '\\\\foo') tester("ntpath.normpath('\\\\')", '\\\\') + tester("ntpath.normpath('//?/UNC/server/share/..')", '\\\\?\\UNC\\server\\share\\') def test_realpath_curdir(self): expected = ntpath.normpath(os.getcwd()) diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 364fed3e374049..ae4058d79321d9 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -5479,17 +5479,12 @@ static PyObject * os__path_splitroot_ex_impl(PyObject *module, PyObject *path) /*[clinic end generated code: output=de97403d3dfebc40 input=bebce42edb41f967]*/ { - Py_ssize_t len; + Py_ssize_t len, drvsize, rootsize; wchar_t *buffer = PyUnicode_AsWideCharString(path, &len); - Py_ssize_t drvsize; - Py_ssize_t rootsize; _Py_skiproot(buffer, len, &drvsize, &rootsize); - wchar_t *p = buffer; - PyObject *drv = PyUnicode_FromWideChar(p, drvsize); - p += drvsize; - PyObject *root = PyUnicode_FromWideChar(p, rootsize); - p += rootsize; - PyObject *tail = PyUnicode_FromWideChar(p, len - drvsize - rootsize); + PyObject *drv = PyUnicode_FromWideChar(buffer, drvsize); + PyObject *root = PyUnicode_FromWideChar(&buffer[drvsize], rootsize); + PyObject *tail = PyUnicode_FromWideChar(&buffer[drvsize + rootsize], len - drvsize - rootsize); PyMem_Free(buffer); PyObject *result = Py_BuildValue("(OOO)", drv, root, tail); Py_DECREF(drv); diff --git a/Python/fileutils.c b/Python/fileutils.c index e8721d0c7c75be..316e3d1ace13ee 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -2499,49 +2499,39 @@ _Py_normpath_and_size(wchar_t *path, Py_ssize_t size, Py_ssize_t *normsize) #endif #define SEP_OR_END(x) (IS_SEP(x) || IS_END(x)) - // Skip leading '.\' if (p1[0] == L'.' && IS_SEP(&p1[1])) { + // Skip leading '.\' path = &path[2]; - while (IS_SEP(path) && !IS_END(path)) { + while (IS_SEP(path)) { path++; } p1 = p2 = minP2 = path; lastC = SEP; } + else { + Py_ssize_t drvsize, rootsize; + _Py_skiproot(path, size, &drvsize, &rootsize); + if (drvsize || rootsize) { + // Skip past root and update minP2 + p1 = &path[drvsize + rootsize]; +#ifndef ALTSEP + p2 = p1; +#else + for (; p2 < p1; ++p2) { + if (*p2 == ALTSEP) { + *p2 = SEP; + } + } +#endif + minP2 = p2 - 1; + lastC = *minP2; #ifdef MS_WINDOWS - // Skip past drive segment and update minP2 - else if (p1[0] && p1[1] == L':') { - *p2++ = *p1++; - *p2++ = *p1++; - minP2 = p2; - lastC = L':'; - } - // Skip past all \\-prefixed paths, including \\?\, \\.\, - // and network paths, including the first segment. - else if (IS_SEP(&p1[0]) && IS_SEP(&p1[1])) { - int sepCount = 2; - *p2++ = SEP; - *p2++ = SEP; - p1 += 2; - for (; !IS_END(p1) && sepCount; ++p1) { - if (IS_SEP(p1)) { - --sepCount; - *p2++ = lastC = SEP; - } else { - *p2++ = lastC = *p1; + if (lastC != SEP) { + minP2++; } +#endif } - minP2 = p2 - 1; } -#else - // Skip past two leading SEPs - else if (IS_SEP(&p1[0]) && IS_SEP(&p1[1]) && !IS_SEP(&p1[2])) { - *p2++ = *p1++; - *p2++ = *p1++; - minP2 = p2 - 1; // Absolute path has SEP at minP2 - lastC = SEP; - } -#endif /* MS_WINDOWS */ /* if pEnd is specified, check that. Else, check for null terminator */ for (; !IS_END(p1); ++p1) { From 54911528b71b1dbfc7141c91eae42a1cc398e05c Mon Sep 17 00:00:00 2001 From: nineteendo Date: Sat, 20 Apr 2024 17:02:35 +0200 Subject: [PATCH 08/28] Rename to `_path_splitroot` replacing old one --- Lib/importlib/_bootstrap_external.py | 28 ++++----- Lib/ntpath.py | 6 +- Lib/posixpath.py | 6 +- Modules/clinic/posixmodule.c.h | 86 +++------------------------- Modules/posixmodule.c | 53 +---------------- 5 files changed, 33 insertions(+), 146 deletions(-) diff --git a/Lib/importlib/_bootstrap_external.py b/Lib/importlib/_bootstrap_external.py index 0a11dc9efc252c..c17de98f081013 100644 --- a/Lib/importlib/_bootstrap_external.py +++ b/Lib/importlib/_bootstrap_external.py @@ -104,28 +104,29 @@ def _path_join(*path_parts): return "" if len(path_parts) == 1: return path_parts[0] - root = "" + anchor = "" path = [] - for new_root, tail in map(_os._path_splitroot, path_parts): - if new_root.startswith(path_sep_tuple) or new_root.endswith(path_sep_tuple): - root = new_root.rstrip(path_separators) or root + for drive, root, tail in map(_os._path_splitroot, path_parts): + new_anchor = drive + root + if new_anchor.startswith(path_sep_tuple) or new_anchor.endswith(path_sep_tuple): + anchor = new_anchor.rstrip(path_separators) or anchor path = [path_sep + tail] - elif new_root.endswith(':'): - if root.casefold() != new_root.casefold(): + elif new_anchor.endswith(':'): + if anchor.casefold() != new_anchor.casefold(): # Drive relative paths have to be resolved by the OS, so we reset the # tail but do not add a path_sep prefix. - root = new_root + anchor = new_anchor path = [tail] else: path.append(tail) else: - root = new_root or root + anchor = new_anchor or anchor path.append(tail) path = [p.rstrip(path_separators) for p in path if p] if len(path) == 1 and not path[0]: - # Avoid losing the root's trailing separator when joining with nothing - return root + path_sep - return root + path_sep.join(path) + # Avoid losing the anchor's trailing separator when joining with nothing + return anchor + path_sep + return anchor + path_sep.join(path) else: def _path_join(*path_parts): @@ -178,8 +179,9 @@ def _path_isabs(path): """Replacement for os.path.isabs.""" if not path: return False - root = _os._path_splitroot(path)[0].replace('/', '\\') - return len(root) > 1 and (root.startswith('\\\\') or root.endswith('\\')) + drive, root, _ = _os._path_splitroot(path) + anchor = (drive + root).replace('/', '\\') + return len(anchor) > 1 and (anchor.startswith('\\\\') or anchor.endswith('\\')) else: def _path_isabs(path): diff --git a/Lib/ntpath.py b/Lib/ntpath.py index e810b655e5ac85..552277ba86affe 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -168,7 +168,7 @@ def splitdrive(p): try: - from nt import _path_splitroot_ex + from nt import _path_splitroot except ImportError: def splitroot(p): """Split a pathname into drive, root and tail. The drive is defined @@ -234,9 +234,9 @@ def splitroot(p): """ p = os.fspath(p) if isinstance(p, bytes): - drive, root, tail = _path_splitroot_ex(os.fsdecode(p)) + drive, root, tail = _path_splitroot(os.fsdecode(p)) return os.fsencode(drive), os.fsencode(root), os.fsencode(tail) - return _path_splitroot_ex(p) + return _path_splitroot(p) # Split a path in head (everything up to the last '/') and tail (the diff --git a/Lib/posixpath.py b/Lib/posixpath.py index b133c628d7f854..ecfcb27657f006 100644 --- a/Lib/posixpath.py +++ b/Lib/posixpath.py @@ -135,7 +135,7 @@ def splitdrive(p): try: - from posix import _path_splitroot_ex + from posix import _path_splitroot except ImportError: def splitroot(p): """Split a pathname into drive, root and tail. On Posix, drive is always @@ -177,9 +177,9 @@ def splitroot(p): """ p = os.fspath(p) if isinstance(p, bytes): - _, root, tail = _path_splitroot_ex(os.fsdecode(p)) + _, root, tail = _path_splitroot(os.fsdecode(p)) return b'', os.fsencode(root), os.fsencode(tail) - return _path_splitroot_ex(p) + return _path_splitroot(p) # Return the tail (basename) part of a path, same as split(path)[1]. diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index c021abe0f1ba0f..8b95d8203e1d72 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -1950,70 +1950,6 @@ os__getvolumepathname(PyObject *module, PyObject *const *args, Py_ssize_t nargs, #if defined(MS_WINDOWS) -PyDoc_STRVAR(os__path_splitroot__doc__, -"_path_splitroot($module, /, path)\n" -"--\n" -"\n" -"Removes everything after the root on Win32."); - -#define OS__PATH_SPLITROOT_METHODDEF \ - {"_path_splitroot", _PyCFunction_CAST(os__path_splitroot), METH_FASTCALL|METH_KEYWORDS, os__path_splitroot__doc__}, - -static PyObject * -os__path_splitroot_impl(PyObject *module, path_t *path); - -static PyObject * -os__path_splitroot(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) -{ - PyObject *return_value = NULL; - #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - - #define NUM_KEYWORDS 1 - static struct { - PyGC_Head _this_is_not_used; - PyObject_VAR_HEAD - PyObject *ob_item[NUM_KEYWORDS]; - } _kwtuple = { - .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) - .ob_item = { &_Py_ID(path), }, - }; - #undef NUM_KEYWORDS - #define KWTUPLE (&_kwtuple.ob_base.ob_base) - - #else // !Py_BUILD_CORE - # define KWTUPLE NULL - #endif // !Py_BUILD_CORE - - static const char * const _keywords[] = {"path", NULL}; - static _PyArg_Parser _parser = { - .keywords = _keywords, - .fname = "_path_splitroot", - .kwtuple = KWTUPLE, - }; - #undef KWTUPLE - PyObject *argsbuf[1]; - path_t path = PATH_T_INITIALIZE("_path_splitroot", "path", 0, 0); - - args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); - if (!args) { - goto exit; - } - if (!path_converter(args[0], &path)) { - goto exit; - } - return_value = os__path_splitroot_impl(module, &path); - -exit: - /* Cleanup for path */ - path_cleanup(&path); - - return return_value; -} - -#endif /* defined(MS_WINDOWS) */ - -#if defined(MS_WINDOWS) - PyDoc_STRVAR(os__path_isdir__doc__, "_path_isdir($module, /, s)\n" "--\n" @@ -2248,29 +2184,29 @@ os__path_islink(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj #endif /* defined(MS_WINDOWS) */ -PyDoc_STRVAR(os__path_splitroot_ex__doc__, -"_path_splitroot_ex($module, path, /)\n" +PyDoc_STRVAR(os__path_splitroot__doc__, +"_path_splitroot($module, path, /)\n" "--\n" "\n"); -#define OS__PATH_SPLITROOT_EX_METHODDEF \ - {"_path_splitroot_ex", (PyCFunction)os__path_splitroot_ex, METH_O, os__path_splitroot_ex__doc__}, +#define OS__PATH_SPLITROOT_METHODDEF \ + {"_path_splitroot", (PyCFunction)os__path_splitroot, METH_O, os__path_splitroot__doc__}, static PyObject * -os__path_splitroot_ex_impl(PyObject *module, PyObject *path); +os__path_splitroot_impl(PyObject *module, PyObject *path); static PyObject * -os__path_splitroot_ex(PyObject *module, PyObject *arg) +os__path_splitroot(PyObject *module, PyObject *arg) { PyObject *return_value = NULL; PyObject *path; if (!PyUnicode_Check(arg)) { - _PyArg_BadArgument("_path_splitroot_ex", "argument", "str", arg); + _PyArg_BadArgument("_path_splitroot", "argument", "str", arg); goto exit; } path = arg; - return_value = os__path_splitroot_ex_impl(module, path); + return_value = os__path_splitroot_impl(module, path); exit: return return_value; @@ -12063,10 +11999,6 @@ os__supports_virtual_terminal(PyObject *module, PyObject *Py_UNUSED(ignored)) #define OS__GETVOLUMEPATHNAME_METHODDEF #endif /* !defined(OS__GETVOLUMEPATHNAME_METHODDEF) */ -#ifndef OS__PATH_SPLITROOT_METHODDEF - #define OS__PATH_SPLITROOT_METHODDEF -#endif /* !defined(OS__PATH_SPLITROOT_METHODDEF) */ - #ifndef OS__PATH_ISDIR_METHODDEF #define OS__PATH_ISDIR_METHODDEF #endif /* !defined(OS__PATH_ISDIR_METHODDEF) */ @@ -12630,4 +12562,4 @@ os__supports_virtual_terminal(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF #define OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF #endif /* !defined(OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF) */ -/*[clinic end generated code: output=054328aa869f1582 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=fa17bdc7ac8fa9b9 input=a9049054013a1b77]*/ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index ae4058d79321d9..340c0c7c34eb0f 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -5040,52 +5040,6 @@ os__getvolumepathname_impl(PyObject *module, path_t *path) } -/*[clinic input] -os._path_splitroot - - path: path_t - -Removes everything after the root on Win32. -[clinic start generated code]*/ - -static PyObject * -os__path_splitroot_impl(PyObject *module, path_t *path) -/*[clinic end generated code: output=ab7f1a88b654581c input=dc93b1d3984cffb6]*/ -{ - wchar_t *buffer; - wchar_t *end; - PyObject *result = NULL; - HRESULT ret; - - buffer = (wchar_t*)PyMem_Malloc(sizeof(wchar_t) * (wcslen(path->wide) + 1)); - if (!buffer) { - return NULL; - } - wcscpy(buffer, path->wide); - for (wchar_t *p = wcschr(buffer, L'/'); p; p = wcschr(p, L'/')) { - *p = L'\\'; - } - - Py_BEGIN_ALLOW_THREADS - ret = PathCchSkipRoot(buffer, &end); - Py_END_ALLOW_THREADS - if (FAILED(ret)) { - result = Py_BuildValue("sO", "", path->object); - } else if (end != buffer) { - size_t rootLen = (size_t)(end - buffer); - result = Py_BuildValue("NN", - PyUnicode_FromWideChar(path->wide, rootLen), - PyUnicode_FromWideChar(path->wide + rootLen, -1) - ); - } else { - result = Py_BuildValue("Os", path->object, ""); - } - PyMem_Free(buffer); - - return result; -} - - /*[clinic input] os._path_isdir @@ -5468,7 +5422,7 @@ os__path_islink_impl(PyObject *module, PyObject *path) /*[clinic input] -os._path_splitroot_ex +os._path_splitroot path: unicode / @@ -5476,8 +5430,8 @@ os._path_splitroot_ex [clinic start generated code]*/ static PyObject * -os__path_splitroot_ex_impl(PyObject *module, PyObject *path) -/*[clinic end generated code: output=de97403d3dfebc40 input=bebce42edb41f967]*/ +os__path_splitroot_impl(PyObject *module, PyObject *path) +/*[clinic end generated code: output=6904e00a6a970b9b input=4ef301247820b583]*/ { Py_ssize_t len, drvsize, rootsize; wchar_t *buffer = PyUnicode_AsWideCharString(path, &len); @@ -16826,7 +16780,6 @@ static PyMethodDef posix_methods[] = { OS__FINDFIRSTFILE_METHODDEF OS__GETVOLUMEPATHNAME_METHODDEF OS__PATH_SPLITROOT_METHODDEF - OS__PATH_SPLITROOT_EX_METHODDEF OS__PATH_NORMPATH_METHODDEF OS_GETLOADAVG_METHODDEF OS_URANDOM_METHODDEF From 2244d3f28fa6d1eae3d30aa54e17f121e6832370 Mon Sep 17 00:00:00 2001 From: Nice Zombies Date: Mon, 22 Apr 2024 17:09:42 +0200 Subject: [PATCH 09/28] Update Misc/NEWS.d/next/Core and Builtins/2024-04-19-08-50-48.gh-issue-102511.qDEB66.rst Co-authored-by: Steve Dower --- .../2024-04-19-08-50-48.gh-issue-102511.qDEB66.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Core and Builtins/2024-04-19-08-50-48.gh-issue-102511.qDEB66.rst b/Misc/NEWS.d/next/Core and Builtins/2024-04-19-08-50-48.gh-issue-102511.qDEB66.rst index e10ef4dd083a56..dfdf250710778e 100644 --- a/Misc/NEWS.d/next/Core and Builtins/2024-04-19-08-50-48.gh-issue-102511.qDEB66.rst +++ b/Misc/NEWS.d/next/Core and Builtins/2024-04-19-08-50-48.gh-issue-102511.qDEB66.rst @@ -1 +1 @@ -Speedup :func:`os.path.splitroot`. +Speed up :func:`os.path.splitroot` with a native implementation. From ca0761a50d90c0eb524554556b69bf1424b0763f Mon Sep 17 00:00:00 2001 From: nineteendo Date: Mon, 22 Apr 2024 17:17:28 +0200 Subject: [PATCH 10/28] Rename old function to `_path_splitanchor` Co-authored-by: Steve Dower --- Lib/importlib/_bootstrap_external.py | 28 ++++++----- Modules/clinic/posixmodule.c.h | 70 +++++++++++++++++++++++++++- Modules/posixmodule.c | 61 ++++++++++++++++++++++-- Python/fileutils.c | 4 +- 4 files changed, 142 insertions(+), 21 deletions(-) diff --git a/Lib/importlib/_bootstrap_external.py b/Lib/importlib/_bootstrap_external.py index c17de98f081013..ac0dee4ef76f62 100644 --- a/Lib/importlib/_bootstrap_external.py +++ b/Lib/importlib/_bootstrap_external.py @@ -104,29 +104,28 @@ def _path_join(*path_parts): return "" if len(path_parts) == 1: return path_parts[0] - anchor = "" + root = "" path = [] - for drive, root, tail in map(_os._path_splitroot, path_parts): - new_anchor = drive + root - if new_anchor.startswith(path_sep_tuple) or new_anchor.endswith(path_sep_tuple): - anchor = new_anchor.rstrip(path_separators) or anchor + for new_root, tail in map(_os._path_splitanchor, path_parts): + if new_root.startswith(path_sep_tuple) or new_root.endswith(path_sep_tuple): + root = new_root.rstrip(path_separators) or root path = [path_sep + tail] - elif new_anchor.endswith(':'): - if anchor.casefold() != new_anchor.casefold(): + elif new_root.endswith(':'): + if root.casefold() != new_root.casefold(): # Drive relative paths have to be resolved by the OS, so we reset the # tail but do not add a path_sep prefix. - anchor = new_anchor + root = new_root path = [tail] else: path.append(tail) else: - anchor = new_anchor or anchor + root = new_root or root path.append(tail) path = [p.rstrip(path_separators) for p in path if p] if len(path) == 1 and not path[0]: - # Avoid losing the anchor's trailing separator when joining with nothing - return anchor + path_sep - return anchor + path_sep.join(path) + # Avoid losing the root's trailing separator when joining with nothing + return root + path_sep + return root + path_sep.join(path) else: def _path_join(*path_parts): @@ -179,9 +178,8 @@ def _path_isabs(path): """Replacement for os.path.isabs.""" if not path: return False - drive, root, _ = _os._path_splitroot(path) - anchor = (drive + root).replace('/', '\\') - return len(anchor) > 1 and (anchor.startswith('\\\\') or anchor.endswith('\\')) + root = _os._path_splitanchor(path)[0].replace('/', '\\') + return len(root) > 1 and (root.startswith('\\\\') or root.endswith('\\')) else: def _path_isabs(path): diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index 8b95d8203e1d72..fd03096e7c4ed9 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -1950,6 +1950,70 @@ os__getvolumepathname(PyObject *module, PyObject *const *args, Py_ssize_t nargs, #if defined(MS_WINDOWS) +PyDoc_STRVAR(os__path_splitanchor__doc__, +"_path_splitanchor($module, /, path)\n" +"--\n" +"\n" +"Removes everything after the root on Win32."); + +#define OS__PATH_SPLITANCHOR_METHODDEF \ + {"_path_splitanchor", _PyCFunction_CAST(os__path_splitanchor), METH_FASTCALL|METH_KEYWORDS, os__path_splitanchor__doc__}, + +static PyObject * +os__path_splitanchor_impl(PyObject *module, path_t *path); + +static PyObject * +os__path_splitanchor(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) + + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(path), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"path", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "_path_splitanchor", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; + path_t path = PATH_T_INITIALIZE("_path_splitanchor", "path", 0, 0); + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); + if (!args) { + goto exit; + } + if (!path_converter(args[0], &path)) { + goto exit; + } + return_value = os__path_splitanchor_impl(module, &path); + +exit: + /* Cleanup for path */ + path_cleanup(&path); + + return return_value; +} + +#endif /* defined(MS_WINDOWS) */ + +#if defined(MS_WINDOWS) + PyDoc_STRVAR(os__path_isdir__doc__, "_path_isdir($module, /, s)\n" "--\n" @@ -11999,6 +12063,10 @@ os__supports_virtual_terminal(PyObject *module, PyObject *Py_UNUSED(ignored)) #define OS__GETVOLUMEPATHNAME_METHODDEF #endif /* !defined(OS__GETVOLUMEPATHNAME_METHODDEF) */ +#ifndef OS__PATH_SPLITANCHOR_METHODDEF + #define OS__PATH_SPLITANCHOR_METHODDEF +#endif /* !defined(OS__PATH_SPLITANCHOR_METHODDEF) */ + #ifndef OS__PATH_ISDIR_METHODDEF #define OS__PATH_ISDIR_METHODDEF #endif /* !defined(OS__PATH_ISDIR_METHODDEF) */ @@ -12562,4 +12630,4 @@ os__supports_virtual_terminal(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF #define OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF #endif /* !defined(OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF) */ -/*[clinic end generated code: output=fa17bdc7ac8fa9b9 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=ead6c64b343365b6 input=a9049054013a1b77]*/ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 340c0c7c34eb0f..0964e1feeb6dc6 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -5040,6 +5040,50 @@ os__getvolumepathname_impl(PyObject *module, path_t *path) } +/*[clinic input] +os._path_splitanchor + path: path_t +Removes everything after the root on Win32. +[clinic start generated code]*/ + +static PyObject * +os__path_splitanchor_impl(PyObject *module, path_t *path) +/*[clinic end generated code: output=37b687463b40c424 input=00dd0c06233b8cff]*/ +{ + wchar_t *buffer; + wchar_t *end; + PyObject *result = NULL; + HRESULT ret; + + buffer = (wchar_t*)PyMem_Malloc(sizeof(wchar_t) * (wcslen(path->wide) + 1)); + if (!buffer) { + return NULL; + } + wcscpy(buffer, path->wide); + for (wchar_t *p = wcschr(buffer, L'/'); p; p = wcschr(p, L'/')) { + *p = L'\\'; + } + + Py_BEGIN_ALLOW_THREADS + ret = PathCchSkipRoot(buffer, &end); + Py_END_ALLOW_THREADS + if (FAILED(ret)) { + result = Py_BuildValue("sO", "", path->object); + } else if (end != buffer) { + size_t rootLen = (size_t)(end - buffer); + result = Py_BuildValue("NN", + PyUnicode_FromWideChar(path->wide, rootLen), + PyUnicode_FromWideChar(path->wide + rootLen, -1) + ); + } else { + result = Py_BuildValue("Os", path->object, ""); + } + PyMem_Free(buffer); + + return result; +} + + /*[clinic input] os._path_isdir @@ -5434,13 +5478,21 @@ os__path_splitroot_impl(PyObject *module, PyObject *path) /*[clinic end generated code: output=6904e00a6a970b9b input=4ef301247820b583]*/ { Py_ssize_t len, drvsize, rootsize; + PyObject *drv, *root, *tail, *result = NULL; wchar_t *buffer = PyUnicode_AsWideCharString(path, &len); + if (!buffer) { + goto exit; + } _Py_skiproot(buffer, len, &drvsize, &rootsize); - PyObject *drv = PyUnicode_FromWideChar(buffer, drvsize); - PyObject *root = PyUnicode_FromWideChar(&buffer[drvsize], rootsize); - PyObject *tail = PyUnicode_FromWideChar(&buffer[drvsize + rootsize], len - drvsize - rootsize); + if (!(drv = PyUnicode_FromWideChar(buffer, drvsize)) || + !(root = PyUnicode_FromWideChar(&buffer[drvsize], rootsize)) || + !(tail = PyUnicode_FromWideChar(&buffer[drvsize + rootsize], len - drvsize - rootsize))) + { + goto exit; + } + result = Py_BuildValue("(OOO)", drv, root, tail); +exit: PyMem_Free(buffer); - PyObject *result = Py_BuildValue("(OOO)", drv, root, tail); Py_DECREF(drv); Py_DECREF(root); Py_DECREF(tail); @@ -16779,6 +16831,7 @@ static PyMethodDef posix_methods[] = { OS__GETFINALPATHNAME_METHODDEF OS__FINDFIRSTFILE_METHODDEF OS__GETVOLUMEPATHNAME_METHODDEF + OS__PATH_SPLITANCHOR_METHODDEF OS__PATH_SPLITROOT_METHODDEF OS__PATH_NORMPATH_METHODDEF OS_GETLOADAVG_METHODDEF diff --git a/Python/fileutils.c b/Python/fileutils.c index 316e3d1ace13ee..62dcddde36472e 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -2296,8 +2296,10 @@ PathCchCombineEx(wchar_t *buffer, size_t bufsize, const wchar_t *dirname, #endif /* defined(MS_WINDOWS_GAMES) && !defined(MS_WINDOWS_DESKTOP) */ void -_Py_skiproot(wchar_t *path, Py_ssize_t size, Py_ssize_t *drvsize, Py_ssize_t *rootsize) +_Py_skiproot(const wchar_t *path, Py_ssize_t size, Py_ssize_t *drvsize, Py_ssize_t *rootsize) { + assert(drvsize); + assert(rootsize); #ifndef MS_WINDOWS #define IS_SEP(x) (*(x) == SEP) *drvsize = 0; From e1f32e91add7f4e799b1340a58c3f301e4f76215 Mon Sep 17 00:00:00 2001 From: nineteendo Date: Mon, 22 Apr 2024 17:23:58 +0200 Subject: [PATCH 11/28] Fix header --- Include/internal/pycore_fileutils.h | 2 +- Lib/posixpath.py | 1 + Modules/posixmodule.c | 4 +++- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Include/internal/pycore_fileutils.h b/Include/internal/pycore_fileutils.h index 1424248ac71303..bc8100b58e8ea3 100644 --- a/Include/internal/pycore_fileutils.h +++ b/Include/internal/pycore_fileutils.h @@ -290,7 +290,7 @@ extern wchar_t *_Py_normpath_and_size(wchar_t *path, Py_ssize_t size, Py_ssize_t extern HRESULT PathCchSkipRoot(const wchar_t *pszPath, const wchar_t **ppszRootEnd); #endif /* defined(MS_WINDOWS_GAMES) && !defined(MS_WINDOWS_DESKTOP) */ -extern void _Py_skiproot(wchar_t *path, Py_ssize_t size, Py_ssize_t *drvsize, Py_ssize_t *rootsize); +extern void _Py_skiproot(const wchar_t *path, Py_ssize_t size, Py_ssize_t *drvsize, Py_ssize_t *rootsize); // Macros to protect CRT calls against instant termination when passed an // invalid parameter (bpo-23524). IPH stands for Invalid Parameter Handler. diff --git a/Lib/posixpath.py b/Lib/posixpath.py index ecfcb27657f006..b413bebcd57d88 100644 --- a/Lib/posixpath.py +++ b/Lib/posixpath.py @@ -177,6 +177,7 @@ def splitroot(p): """ p = os.fspath(p) if isinstance(p, bytes): + # Optimisation: the drive is always empty _, root, tail = _path_splitroot(os.fsdecode(p)) return b'', os.fsencode(root), os.fsencode(tail) return _path_splitroot(p) diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 0964e1feeb6dc6..6e32fa7db34919 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -5042,13 +5042,15 @@ os__getvolumepathname_impl(PyObject *module, path_t *path) /*[clinic input] os._path_splitanchor + path: path_t + Removes everything after the root on Win32. [clinic start generated code]*/ static PyObject * os__path_splitanchor_impl(PyObject *module, path_t *path) -/*[clinic end generated code: output=37b687463b40c424 input=00dd0c06233b8cff]*/ +/*[clinic end generated code: output=37b687463b40c424 input=3cf73c8896e3d7a5]*/ { wchar_t *buffer; wchar_t *end; From 34d4d905746873257d6ba731d9b9f1727e47abb9 Mon Sep 17 00:00:00 2001 From: nineteendo Date: Mon, 22 Apr 2024 22:27:47 +0200 Subject: [PATCH 12/28] Direct C call --- Lib/importlib/_bootstrap_external.py | 4 +- Lib/ntpath.py | 19 +------ Lib/posixpath.py | 19 +------ Modules/clinic/posixmodule.c.h | 79 +++++++++++++++++++--------- Modules/posixmodule.c | 42 +++++++-------- Python/fileutils.c | 2 +- 6 files changed, 81 insertions(+), 84 deletions(-) diff --git a/Lib/importlib/_bootstrap_external.py b/Lib/importlib/_bootstrap_external.py index ac0dee4ef76f62..0a11dc9efc252c 100644 --- a/Lib/importlib/_bootstrap_external.py +++ b/Lib/importlib/_bootstrap_external.py @@ -106,7 +106,7 @@ def _path_join(*path_parts): return path_parts[0] root = "" path = [] - for new_root, tail in map(_os._path_splitanchor, path_parts): + for new_root, tail in map(_os._path_splitroot, path_parts): if new_root.startswith(path_sep_tuple) or new_root.endswith(path_sep_tuple): root = new_root.rstrip(path_separators) or root path = [path_sep + tail] @@ -178,7 +178,7 @@ def _path_isabs(path): """Replacement for os.path.isabs.""" if not path: return False - root = _os._path_splitanchor(path)[0].replace('/', '\\') + root = _os._path_splitroot(path)[0].replace('/', '\\') return len(root) > 1 and (root.startswith('\\\\') or root.endswith('\\')) else: diff --git a/Lib/ntpath.py b/Lib/ntpath.py index 552277ba86affe..1a0701fafbe1ce 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -168,7 +168,7 @@ def splitdrive(p): try: - from nt import _path_splitroot + from nt import _path_splitroot_ex as splitroot except ImportError: def splitroot(p): """Split a pathname into drive, root and tail. The drive is defined @@ -220,23 +220,6 @@ def splitroot(p): else: # Relative path, e.g. Windows return empty, empty, p -else: - def splitroot(p): - """Split a pathname into drive, root and tail. The drive is defined - exactly as in splitdrive(). On Windows, the root may be a single path - separator or an empty string. The tail contains anything after the root. - For example: - - splitroot('//server/share/') == ('//server/share', '/', '') - splitroot('C:/Users/Barney') == ('C:', '/', 'Users/Barney') - splitroot('C:///spam///ham') == ('C:', '/', '//spam///ham') - splitroot('Windows/notepad') == ('', '', 'Windows/notepad') - """ - p = os.fspath(p) - if isinstance(p, bytes): - drive, root, tail = _path_splitroot(os.fsdecode(p)) - return os.fsencode(drive), os.fsencode(root), os.fsencode(tail) - return _path_splitroot(p) # Split a path in head (everything up to the last '/') and tail (the diff --git a/Lib/posixpath.py b/Lib/posixpath.py index b413bebcd57d88..44a0fc68f6cc0e 100644 --- a/Lib/posixpath.py +++ b/Lib/posixpath.py @@ -135,7 +135,7 @@ def splitdrive(p): try: - from posix import _path_splitroot + from posix import _path_splitroot_ex as splitroot except ImportError: def splitroot(p): """Split a pathname into drive, root and tail. On Posix, drive is always @@ -164,23 +164,6 @@ def splitroot(p): # Precisely two leading slashes, e.g.: '//foo'. Implementation defined per POSIX, see # https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13 return empty, p[:2], p[2:] -else: - def splitroot(p): - """Split a pathname into drive, root and tail. On Posix, drive is always - empty; the root may be empty, a single slash, or two slashes. The tail - contains anything after the root. For example: - - splitroot('foo/bar') == ('', '', 'foo/bar') - splitroot('/foo/bar') == ('', '/', 'foo/bar') - splitroot('//foo/bar') == ('', '//', 'foo/bar') - splitroot('///foo/bar') == ('', '/', '//foo/bar') - """ - p = os.fspath(p) - if isinstance(p, bytes): - # Optimisation: the drive is always empty - _, root, tail = _path_splitroot(os.fsdecode(p)) - return b'', os.fsencode(root), os.fsencode(tail) - return _path_splitroot(p) # Return the tail (basename) part of a path, same as split(path)[1]. diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index fd03096e7c4ed9..5e82e152e3111a 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -1950,20 +1950,20 @@ os__getvolumepathname(PyObject *module, PyObject *const *args, Py_ssize_t nargs, #if defined(MS_WINDOWS) -PyDoc_STRVAR(os__path_splitanchor__doc__, -"_path_splitanchor($module, /, path)\n" +PyDoc_STRVAR(os__path_splitroot__doc__, +"_path_splitroot($module, /, path)\n" "--\n" "\n" "Removes everything after the root on Win32."); -#define OS__PATH_SPLITANCHOR_METHODDEF \ - {"_path_splitanchor", _PyCFunction_CAST(os__path_splitanchor), METH_FASTCALL|METH_KEYWORDS, os__path_splitanchor__doc__}, +#define OS__PATH_SPLITROOT_METHODDEF \ + {"_path_splitroot", _PyCFunction_CAST(os__path_splitroot), METH_FASTCALL|METH_KEYWORDS, os__path_splitroot__doc__}, static PyObject * -os__path_splitanchor_impl(PyObject *module, path_t *path); +os__path_splitroot_impl(PyObject *module, path_t *path); static PyObject * -os__path_splitanchor(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +os__path_splitroot(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) @@ -1987,12 +1987,12 @@ os__path_splitanchor(PyObject *module, PyObject *const *args, Py_ssize_t nargs, static const char * const _keywords[] = {"path", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, - .fname = "_path_splitanchor", + .fname = "_path_splitroot", .kwtuple = KWTUPLE, }; #undef KWTUPLE PyObject *argsbuf[1]; - path_t path = PATH_T_INITIALIZE("_path_splitanchor", "path", 0, 0); + path_t path = PATH_T_INITIALIZE("_path_splitroot", "path", 0, 0); args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); if (!args) { @@ -2001,7 +2001,7 @@ os__path_splitanchor(PyObject *module, PyObject *const *args, Py_ssize_t nargs, if (!path_converter(args[0], &path)) { goto exit; } - return_value = os__path_splitanchor_impl(module, &path); + return_value = os__path_splitroot_impl(module, &path); exit: /* Cleanup for path */ @@ -2248,31 +2248,62 @@ os__path_islink(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj #endif /* defined(MS_WINDOWS) */ -PyDoc_STRVAR(os__path_splitroot__doc__, -"_path_splitroot($module, path, /)\n" +PyDoc_STRVAR(os__path_splitroot_ex__doc__, +"_path_splitroot_ex($module, /, p)\n" "--\n" "\n"); -#define OS__PATH_SPLITROOT_METHODDEF \ - {"_path_splitroot", (PyCFunction)os__path_splitroot, METH_O, os__path_splitroot__doc__}, +#define OS__PATH_SPLITROOT_EX_METHODDEF \ + {"_path_splitroot_ex", _PyCFunction_CAST(os__path_splitroot_ex), METH_FASTCALL|METH_KEYWORDS, os__path_splitroot_ex__doc__}, static PyObject * -os__path_splitroot_impl(PyObject *module, PyObject *path); +os__path_splitroot_ex_impl(PyObject *module, path_t *p); static PyObject * -os__path_splitroot(PyObject *module, PyObject *arg) +os__path_splitroot_ex(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) { PyObject *return_value = NULL; - PyObject *path; + #if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) - if (!PyUnicode_Check(arg)) { - _PyArg_BadArgument("_path_splitroot", "argument", "str", arg); + #define NUM_KEYWORDS 1 + static struct { + PyGC_Head _this_is_not_used; + PyObject_VAR_HEAD + PyObject *ob_item[NUM_KEYWORDS]; + } _kwtuple = { + .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) + .ob_item = { &_Py_ID(p), }, + }; + #undef NUM_KEYWORDS + #define KWTUPLE (&_kwtuple.ob_base.ob_base) + + #else // !Py_BUILD_CORE + # define KWTUPLE NULL + #endif // !Py_BUILD_CORE + + static const char * const _keywords[] = {"p", NULL}; + static _PyArg_Parser _parser = { + .keywords = _keywords, + .fname = "_path_splitroot_ex", + .kwtuple = KWTUPLE, + }; + #undef KWTUPLE + PyObject *argsbuf[1]; + path_t p = PATH_T_INITIALIZE("_path_splitroot_ex", "p", 0, 0); + + args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); + if (!args) { + goto exit; + } + if (!path_converter(args[0], &p)) { goto exit; } - path = arg; - return_value = os__path_splitroot_impl(module, path); + return_value = os__path_splitroot_ex_impl(module, &p); exit: + /* Cleanup for p */ + path_cleanup(&p); + return return_value; } @@ -12063,9 +12094,9 @@ os__supports_virtual_terminal(PyObject *module, PyObject *Py_UNUSED(ignored)) #define OS__GETVOLUMEPATHNAME_METHODDEF #endif /* !defined(OS__GETVOLUMEPATHNAME_METHODDEF) */ -#ifndef OS__PATH_SPLITANCHOR_METHODDEF - #define OS__PATH_SPLITANCHOR_METHODDEF -#endif /* !defined(OS__PATH_SPLITANCHOR_METHODDEF) */ +#ifndef OS__PATH_SPLITROOT_METHODDEF + #define OS__PATH_SPLITROOT_METHODDEF +#endif /* !defined(OS__PATH_SPLITROOT_METHODDEF) */ #ifndef OS__PATH_ISDIR_METHODDEF #define OS__PATH_ISDIR_METHODDEF @@ -12630,4 +12661,4 @@ os__supports_virtual_terminal(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF #define OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF #endif /* !defined(OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF) */ -/*[clinic end generated code: output=ead6c64b343365b6 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=9dd0e27ec4e8edd1 input=a9049054013a1b77]*/ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 6e32fa7db34919..f4d208965759ff 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -5041,7 +5041,7 @@ os__getvolumepathname_impl(PyObject *module, path_t *path) /*[clinic input] -os._path_splitanchor +os._path_splitroot path: path_t @@ -5049,8 +5049,8 @@ Removes everything after the root on Win32. [clinic start generated code]*/ static PyObject * -os__path_splitanchor_impl(PyObject *module, path_t *path) -/*[clinic end generated code: output=37b687463b40c424 input=3cf73c8896e3d7a5]*/ +os__path_splitroot_impl(PyObject *module, path_t *path) +/*[clinic end generated code: output=ab7f1a88b654581c input=dc93b1d3984cffb6]*/ { wchar_t *buffer; wchar_t *end; @@ -5468,33 +5468,33 @@ os__path_islink_impl(PyObject *module, PyObject *path) /*[clinic input] -os._path_splitroot +os._path_splitroot_ex - path: unicode - / + p: path_t [clinic start generated code]*/ static PyObject * -os__path_splitroot_impl(PyObject *module, PyObject *path) -/*[clinic end generated code: output=6904e00a6a970b9b input=4ef301247820b583]*/ -{ - Py_ssize_t len, drvsize, rootsize; - PyObject *drv, *root, *tail, *result = NULL; - wchar_t *buffer = PyUnicode_AsWideCharString(path, &len); - if (!buffer) { - goto exit; - } - _Py_skiproot(buffer, len, &drvsize, &rootsize); - if (!(drv = PyUnicode_FromWideChar(buffer, drvsize)) || - !(root = PyUnicode_FromWideChar(&buffer[drvsize], rootsize)) || - !(tail = PyUnicode_FromWideChar(&buffer[drvsize + rootsize], len - drvsize - rootsize))) +os__path_splitroot_ex_impl(PyObject *module, path_t *p) +/*[clinic end generated code: output=2001f8839dda3762 input=3b4aad8eba96cfef]*/ +{ + Py_ssize_t len = p->length, drvsize, rootsize; + PyObject *drv = NULL, *root = NULL, *tail = NULL, *result = NULL; + const wchar_t *wide = p->wide; + _Py_skiproot(wide, len, &drvsize, &rootsize); + if (!(drv = PyUnicode_FromWideChar(wide, drvsize)) || + !(root = PyUnicode_FromWideChar(&wide[drvsize], rootsize)) || + !(tail = PyUnicode_FromWideChar(&wide[drvsize + rootsize], len - drvsize - rootsize))) { goto exit; } + if (p->narrow) { + Py_SETREF(drv, PyUnicode_EncodeFSDefault(drv)); + Py_SETREF(root, PyUnicode_EncodeFSDefault(root)); + Py_SETREF(tail, PyUnicode_EncodeFSDefault(tail)); + } result = Py_BuildValue("(OOO)", drv, root, tail); exit: - PyMem_Free(buffer); Py_DECREF(drv); Py_DECREF(root); Py_DECREF(tail); @@ -16833,8 +16833,8 @@ static PyMethodDef posix_methods[] = { OS__GETFINALPATHNAME_METHODDEF OS__FINDFIRSTFILE_METHODDEF OS__GETVOLUMEPATHNAME_METHODDEF - OS__PATH_SPLITANCHOR_METHODDEF OS__PATH_SPLITROOT_METHODDEF + OS__PATH_SPLITROOT_EX_METHODDEF OS__PATH_NORMPATH_METHODDEF OS_GETLOADAVG_METHODDEF OS_URANDOM_METHODDEF diff --git a/Python/fileutils.c b/Python/fileutils.c index 62dcddde36472e..0afc744a311578 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -2318,7 +2318,7 @@ _Py_skiproot(const wchar_t *path, Py_ssize_t size, Py_ssize_t *drvsize, Py_ssize } #undef IS_SEP #else - wchar_t *pEnd = size >= 0 ? &path[size] : NULL; + const wchar_t *pEnd = size >= 0 ? &path[size] : NULL; #define IS_END(x) (pEnd ? (x) == pEnd : !*(x)) #define IS_SEP(x) (*(x) == SEP || *(x) == ALTSEP) #define SEP_OR_END(x) (IS_SEP(x) || IS_END(x)) From 30d613ba80af1f6564315bd648751994494f07f0 Mon Sep 17 00:00:00 2001 From: nineteendo Date: Tue, 23 Apr 2024 08:08:01 +0200 Subject: [PATCH 13/28] Allow embedded null --- Modules/clinic/posixmodule.c.h | 18 +++++++----------- Modules/posixmodule.c | 33 +++++++++++++++++++++++---------- 2 files changed, 30 insertions(+), 21 deletions(-) diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index 5e82e152e3111a..14206dc45c29b1 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -2251,13 +2251,14 @@ os__path_islink(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj PyDoc_STRVAR(os__path_splitroot_ex__doc__, "_path_splitroot_ex($module, /, p)\n" "--\n" -"\n"); +"\n" +"Split a pathname into drive, root and tail."); #define OS__PATH_SPLITROOT_EX_METHODDEF \ {"_path_splitroot_ex", _PyCFunction_CAST(os__path_splitroot_ex), METH_FASTCALL|METH_KEYWORDS, os__path_splitroot_ex__doc__}, static PyObject * -os__path_splitroot_ex_impl(PyObject *module, path_t *p); +os__path_splitroot_ex_impl(PyObject *module, PyObject *p); static PyObject * os__path_splitroot_ex(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -2289,21 +2290,16 @@ os__path_splitroot_ex(PyObject *module, PyObject *const *args, Py_ssize_t nargs, }; #undef KWTUPLE PyObject *argsbuf[1]; - path_t p = PATH_T_INITIALIZE("_path_splitroot_ex", "p", 0, 0); + PyObject *p; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); if (!args) { goto exit; } - if (!path_converter(args[0], &p)) { - goto exit; - } - return_value = os__path_splitroot_ex_impl(module, &p); + p = args[0]; + return_value = os__path_splitroot_ex_impl(module, p); exit: - /* Cleanup for p */ - path_cleanup(&p); - return return_value; } @@ -12661,4 +12657,4 @@ os__supports_virtual_terminal(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF #define OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF #endif /* !defined(OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF) */ -/*[clinic end generated code: output=9dd0e27ec4e8edd1 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=5bd32f18f4146963 input=a9049054013a1b77]*/ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index f4d208965759ff..1a6b4308ac6016 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -1205,7 +1205,7 @@ path_cleanup(path_t *path) } static int -path_converter(PyObject *o, void *p) +posix_path_converter(PyObject *o, void *p, int allow_embedded_null) { path_t *path = (path_t *)p; PyObject *bytes = NULL; @@ -1293,7 +1293,7 @@ path_converter(PyObject *o, void *p) FORMAT_EXCEPTION(PyExc_ValueError, "%s too long for Windows"); goto error_exit; } - if (wcslen(wide) != length) { + if (!allow_embedded_null && wcslen(wide) != length) { FORMAT_EXCEPTION(PyExc_ValueError, "embedded null character in %s"); goto error_exit; } @@ -1341,7 +1341,7 @@ path_converter(PyObject *o, void *p) length = PyBytes_GET_SIZE(bytes); narrow = PyBytes_AS_STRING(bytes); - if ((size_t)length != strlen(narrow)) { + if (!allow_embedded_null && (size_t)length != strlen(narrow)) { FORMAT_EXCEPTION(PyExc_ValueError, "embedded null character in %s"); goto error_exit; } @@ -1364,7 +1364,7 @@ path_converter(PyObject *o, void *p) FORMAT_EXCEPTION(PyExc_ValueError, "%s too long for Windows"); goto error_exit; } - if (wcslen(wide) != length) { + if (!allow_embedded_null && wcslen(wide) != length) { FORMAT_EXCEPTION(PyExc_ValueError, "embedded null character in %s"); goto error_exit; } @@ -1400,6 +1400,12 @@ path_converter(PyObject *o, void *p) return 0; } +static int +path_converter(PyObject *o, void *p) +{ + return posix_path_converter(o, p, 0); +} + static void argument_unavailable_error(const char *function_name, const char *argument_name) { @@ -5470,17 +5476,23 @@ os__path_islink_impl(PyObject *module, PyObject *path) /*[clinic input] os._path_splitroot_ex - p: path_t + p: object +Split a pathname into drive, root and tail. [clinic start generated code]*/ static PyObject * -os__path_splitroot_ex_impl(PyObject *module, path_t *p) -/*[clinic end generated code: output=2001f8839dda3762 input=3b4aad8eba96cfef]*/ +os__path_splitroot_ex_impl(PyObject *module, PyObject *p) +/*[clinic end generated code: output=1be3aff51db9fc0d input=df3394f511f02c51]*/ { - Py_ssize_t len = p->length, drvsize, rootsize; + Py_ssize_t len, drvsize, rootsize; PyObject *drv = NULL, *root = NULL, *tail = NULL, *result = NULL; - const wchar_t *wide = p->wide; + path_t path = PATH_T_INITIALIZE("_path_splitroot_ex", "p", 0, 0); + if (!posix_path_converter(p, &path, 1)) { + goto exit; + } + len = path.length; + const wchar_t *wide = path.wide; _Py_skiproot(wide, len, &drvsize, &rootsize); if (!(drv = PyUnicode_FromWideChar(wide, drvsize)) || !(root = PyUnicode_FromWideChar(&wide[drvsize], rootsize)) || @@ -5488,13 +5500,14 @@ os__path_splitroot_ex_impl(PyObject *module, path_t *p) { goto exit; } - if (p->narrow) { + if (path.narrow) { Py_SETREF(drv, PyUnicode_EncodeFSDefault(drv)); Py_SETREF(root, PyUnicode_EncodeFSDefault(root)); Py_SETREF(tail, PyUnicode_EncodeFSDefault(tail)); } result = Py_BuildValue("(OOO)", drv, root, tail); exit: + path_cleanup(&path); Py_DECREF(drv); Py_DECREF(root); Py_DECREF(tail); From f78bad044cc82902550d40d0fd8841a5253924e5 Mon Sep 17 00:00:00 2001 From: Nineteendo Date: Tue, 23 Apr 2024 12:08:07 +0200 Subject: [PATCH 14/28] Fix segmentation fault --- Include/unicodeobject.h | 1 + Modules/posixmodule.c | 38 +++++++++++++++++++++++++++----------- Objects/unicodeobject.c | 9 +++++++-- 3 files changed, 35 insertions(+), 13 deletions(-) diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index dee00715b3c51d..eeb1a8911c6841 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -740,6 +740,7 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeLocale( /* ParseTuple converter: encode str objects to bytes using PyUnicode_EncodeFSDefault(); bytes objects are output as-is. */ +PyAPI_FUNC(int) PyUnicode_FSConverterPosix(PyObject*, void*, int); PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject*, void*); /* ParseTuple converter: decode bytes objects to unicode using diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 1a6b4308ac6016..ef21525fa626d5 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -1304,7 +1304,7 @@ posix_path_converter(PyObject *o, void *p, int allow_embedded_null) wide = NULL; goto success_exit; #else - if (!PyUnicode_FSConverter(o, &bytes)) { + if (!PyUnicode_FSConverterPosix(o, &bytes, allow_embedded_null)) { goto error_exit; } #endif @@ -5486,21 +5486,30 @@ os__path_splitroot_ex_impl(PyObject *module, PyObject *p) /*[clinic end generated code: output=1be3aff51db9fc0d input=df3394f511f02c51]*/ { Py_ssize_t len, drvsize, rootsize; - PyObject *drv = NULL, *root = NULL, *tail = NULL, *result = NULL; + PyObject *wide = NULL, *drv = NULL, *root = NULL, *tail = NULL, *result = NULL; + wchar_t *buffer = NULL; path_t path = PATH_T_INITIALIZE("_path_splitroot_ex", "p", 0, 0); if (!posix_path_converter(p, &path, 1)) { goto exit; } +#ifdef MS_WINDOWS len = path.length; - const wchar_t *wide = path.wide; - _Py_skiproot(wide, len, &drvsize, &rootsize); - if (!(drv = PyUnicode_FromWideChar(wide, drvsize)) || - !(root = PyUnicode_FromWideChar(&wide[drvsize], rootsize)) || - !(tail = PyUnicode_FromWideChar(&wide[drvsize + rootsize], len - drvsize - rootsize))) + const wchar_t *buffer = path.wide; +#else + if (!(wide = PyUnicode_DecodeFSDefaultAndSize(path.narrow, path.length)) || + !(buffer = PyUnicode_AsWideCharString(wide, &len))) + { + goto exit; + } +#endif + _Py_skiproot(buffer, len, &drvsize, &rootsize); + if (!(drv = PyUnicode_FromWideChar(buffer, drvsize)) || + !(root = PyUnicode_FromWideChar(&buffer[drvsize], rootsize)) || + !(tail = PyUnicode_FromWideChar(&buffer[drvsize + rootsize], len - drvsize - rootsize))) { goto exit; } - if (path.narrow) { + if (PyBytes_Check(path.object)) { Py_SETREF(drv, PyUnicode_EncodeFSDefault(drv)); Py_SETREF(root, PyUnicode_EncodeFSDefault(root)); Py_SETREF(tail, PyUnicode_EncodeFSDefault(tail)); @@ -5508,9 +5517,16 @@ os__path_splitroot_ex_impl(PyObject *module, PyObject *p) result = Py_BuildValue("(OOO)", drv, root, tail); exit: path_cleanup(&path); - Py_DECREF(drv); - Py_DECREF(root); - Py_DECREF(tail); + PyMem_Free(buffer); + if (drv) { + Py_DECREF(drv); + } + if (root) { + Py_DECREF(root); + } + if (tail) { + Py_DECREF(tail); + } return result; } diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 2c259b7e869efe..917d29f8771e13 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -3790,7 +3790,7 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size) int -PyUnicode_FSConverter(PyObject* arg, void* addr) +PyUnicode_FSConverterPosix(PyObject* arg, void* addr, int allow_embedded_null) { PyObject *path = NULL; PyObject *output = NULL; @@ -3819,7 +3819,7 @@ PyUnicode_FSConverter(PyObject* arg, void* addr) size = PyBytes_GET_SIZE(output); data = PyBytes_AS_STRING(output); - if ((size_t)size != strlen(data)) { + if (!allow_embedded_null && (size_t)size != strlen(data)) { PyErr_SetString(PyExc_ValueError, "embedded null byte"); Py_DECREF(output); return 0; @@ -3828,6 +3828,11 @@ PyUnicode_FSConverter(PyObject* arg, void* addr) return Py_CLEANUP_SUPPORTED; } +int +PyUnicode_FSConverter(PyObject* arg, void* addr) +{ + return PyUnicode_FSConverterPosix(arg, addr, 0); +} int PyUnicode_FSDecoder(PyObject* arg, void* addr) From 5635da5a72ee8ed1d0c75a9852e194002221fafb Mon Sep 17 00:00:00 2001 From: Nineteendo Date: Tue, 23 Apr 2024 12:11:23 +0200 Subject: [PATCH 15/28] Fix redefinition --- Modules/posixmodule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index ef21525fa626d5..2cf7243ed00511 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -5494,7 +5494,7 @@ os__path_splitroot_ex_impl(PyObject *module, PyObject *p) } #ifdef MS_WINDOWS len = path.length; - const wchar_t *buffer = path.wide; + buffer = path.wide; #else if (!(wide = PyUnicode_DecodeFSDefaultAndSize(path.narrow, path.length)) || !(buffer = PyUnicode_AsWideCharString(wide, &len))) From 6f62c1f5e61174d6032ec272d8a8dfe10356288f Mon Sep 17 00:00:00 2001 From: nineteendo Date: Tue, 23 Apr 2024 14:04:50 +0200 Subject: [PATCH 16/28] Python wrapper --- Include/unicodeobject.h | 1 - Lib/ntpath.py | 90 +++++++++++++++++++--------------- Lib/posixpath.py | 46 ++++++++++------- Modules/clinic/posixmodule.c.h | 24 +++++---- Modules/posixmodule.c | 44 +++++++---------- 5 files changed, 110 insertions(+), 95 deletions(-) diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index eeb1a8911c6841..dee00715b3c51d 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -740,7 +740,6 @@ PyAPI_FUNC(PyObject*) PyUnicode_EncodeLocale( /* ParseTuple converter: encode str objects to bytes using PyUnicode_EncodeFSDefault(); bytes objects are output as-is. */ -PyAPI_FUNC(int) PyUnicode_FSConverterPosix(PyObject*, void*, int); PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject*, void*); /* ParseTuple converter: decode bytes objects to unicode using diff --git a/Lib/ntpath.py b/Lib/ntpath.py index 1a0701fafbe1ce..7ee0caa6ba0fa8 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -167,9 +167,54 @@ def splitdrive(p): return drive, root + tail +def _splitroot_fallback(p): + """Split a pathname into drive, root and tail.""" + p = os.fspath(p) + if isinstance(p, bytes): + sep = b'\\' + altsep = b'/' + colon = b':' + unc_prefix = b'\\\\?\\UNC\\' + empty = b'' + else: + sep = '\\' + altsep = '/' + colon = ':' + unc_prefix = '\\\\?\\UNC\\' + empty = '' + normp = p.replace(altsep, sep) + if normp[:1] == sep: + if normp[1:2] == sep: + # UNC drives, e.g. \\server\share or \\?\UNC\server\share + # Device drives, e.g. \\.\device or \\?\device + start = 8 if normp[:8].upper() == unc_prefix else 2 + index = normp.find(sep, start) + if index == -1: + return p, empty, empty + index2 = normp.find(sep, index + 1) + if index2 == -1: + return p, empty, empty + return p[:index2], p[index2:index2 + 1], p[index2 + 1:] + else: + # Relative path with root, e.g. \Windows + return empty, p[:1], p[1:] + elif normp[1:2] == colon: + if normp[2:3] == sep: + # Absolute drive-letter path, e.g. X:\Windows + return p[:2], p[2:3], p[3:] + else: + # Relative path with drive, e.g. X:Windows + return p[:2], empty, p[2:] + else: + # Relative path, e.g. Windows + return empty, empty, p + + try: - from nt import _path_splitroot_ex as splitroot + from nt import _path_splitroot_ex except ImportError: + splitroot = _splitroot_fallback +else: def splitroot(p): """Split a pathname into drive, root and tail. The drive is defined exactly as in splitdrive(). On Windows, the root may be a single path @@ -181,45 +226,10 @@ def splitroot(p): splitroot('C:///spam///ham') == ('C:', '/', '//spam///ham') splitroot('Windows/notepad') == ('', '', 'Windows/notepad') """ - p = os.fspath(p) - if isinstance(p, bytes): - sep = b'\\' - altsep = b'/' - colon = b':' - unc_prefix = b'\\\\?\\UNC\\' - empty = b'' - else: - sep = '\\' - altsep = '/' - colon = ':' - unc_prefix = '\\\\?\\UNC\\' - empty = '' - normp = p.replace(altsep, sep) - if normp[:1] == sep: - if normp[1:2] == sep: - # UNC drives, e.g. \\server\share or \\?\UNC\server\share - # Device drives, e.g. \\.\device or \\?\device - start = 8 if normp[:8].upper() == unc_prefix else 2 - index = normp.find(sep, start) - if index == -1: - return p, empty, empty - index2 = normp.find(sep, index + 1) - if index2 == -1: - return p, empty, empty - return p[:index2], p[index2:index2 + 1], p[index2 + 1:] - else: - # Relative path with root, e.g. \Windows - return empty, p[:1], p[1:] - elif normp[1:2] == colon: - if normp[2:3] == sep: - # Absolute drive-letter path, e.g. X:\Windows - return p[:2], p[2:3], p[3:] - else: - # Relative path with drive, e.g. X:Windows - return p[:2], empty, p[2:] - else: - # Relative path, e.g. Windows - return empty, empty, p + try: + return _path_splitroot_ex(p) + except ValueError: + return _splitroot_fallback(p) # Split a path in head (everything up to the last '/') and tail (the diff --git a/Lib/posixpath.py b/Lib/posixpath.py index 44a0fc68f6cc0e..04ca0b6204bbcf 100644 --- a/Lib/posixpath.py +++ b/Lib/posixpath.py @@ -134,9 +134,32 @@ def splitdrive(p): return p[:0], p +def _splitroot_fallback(p): + """Split a pathname into drive, root and tail.""" + p = os.fspath(p) + if isinstance(p, bytes): + sep = b'/' + empty = b'' + else: + sep = '/' + empty = '' + if p[:1] != sep: + # Relative path, e.g.: 'foo' + return empty, empty, p + elif p[1:2] != sep or p[2:3] == sep: + # Absolute path, e.g.: '/foo', '///foo', '////foo', etc. + return empty, sep, p[1:] + else: + # Precisely two leading slashes, e.g.: '//foo'. Implementation defined per POSIX, see + # https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13 + return empty, p[:2], p[2:] + + try: - from posix import _path_splitroot_ex as splitroot + from posix import _path_splitroot_ex except ImportError: + splitroot = _splitroot_fallback +else: def splitroot(p): """Split a pathname into drive, root and tail. On Posix, drive is always empty; the root may be empty, a single slash, or two slashes. The tail @@ -147,23 +170,10 @@ def splitroot(p): splitroot('//foo/bar') == ('', '//', 'foo/bar') splitroot('///foo/bar') == ('', '/', '//foo/bar') """ - p = os.fspath(p) - if isinstance(p, bytes): - sep = b'/' - empty = b'' - else: - sep = '/' - empty = '' - if p[:1] != sep: - # Relative path, e.g.: 'foo' - return empty, empty, p - elif p[1:2] != sep or p[2:3] == sep: - # Absolute path, e.g.: '/foo', '///foo', '////foo', etc. - return empty, sep, p[1:] - else: - # Precisely two leading slashes, e.g.: '//foo'. Implementation defined per POSIX, see - # https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13 - return empty, p[:2], p[2:] + try: + return _path_splitroot_ex(p) + except (UnicodeEncodeError, ValueError): + return _splitroot_fallback(p) # Return the tail (basename) part of a path, same as split(path)[1]. diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index 14206dc45c29b1..bc6c3269ffff8c 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -2249,16 +2249,15 @@ os__path_islink(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj #endif /* defined(MS_WINDOWS) */ PyDoc_STRVAR(os__path_splitroot_ex__doc__, -"_path_splitroot_ex($module, /, p)\n" +"_path_splitroot_ex($module, /, path)\n" "--\n" -"\n" -"Split a pathname into drive, root and tail."); +"\n"); #define OS__PATH_SPLITROOT_EX_METHODDEF \ {"_path_splitroot_ex", _PyCFunction_CAST(os__path_splitroot_ex), METH_FASTCALL|METH_KEYWORDS, os__path_splitroot_ex__doc__}, static PyObject * -os__path_splitroot_ex_impl(PyObject *module, PyObject *p); +os__path_splitroot_ex_impl(PyObject *module, path_t *path); static PyObject * os__path_splitroot_ex(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -2273,7 +2272,7 @@ os__path_splitroot_ex(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *ob_item[NUM_KEYWORDS]; } _kwtuple = { .ob_base = PyVarObject_HEAD_INIT(&PyTuple_Type, NUM_KEYWORDS) - .ob_item = { &_Py_ID(p), }, + .ob_item = { &_Py_ID(path), }, }; #undef NUM_KEYWORDS #define KWTUPLE (&_kwtuple.ob_base.ob_base) @@ -2282,7 +2281,7 @@ os__path_splitroot_ex(PyObject *module, PyObject *const *args, Py_ssize_t nargs, # define KWTUPLE NULL #endif // !Py_BUILD_CORE - static const char * const _keywords[] = {"p", NULL}; + static const char * const _keywords[] = {"path", NULL}; static _PyArg_Parser _parser = { .keywords = _keywords, .fname = "_path_splitroot_ex", @@ -2290,16 +2289,21 @@ os__path_splitroot_ex(PyObject *module, PyObject *const *args, Py_ssize_t nargs, }; #undef KWTUPLE PyObject *argsbuf[1]; - PyObject *p; + path_t path = PATH_T_INITIALIZE("_path_splitroot_ex", "path", 0, 0); args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); if (!args) { goto exit; } - p = args[0]; - return_value = os__path_splitroot_ex_impl(module, p); + if (!path_converter(args[0], &path)) { + goto exit; + } + return_value = os__path_splitroot_ex_impl(module, &path); exit: + /* Cleanup for path */ + path_cleanup(&path); + return return_value; } @@ -12657,4 +12661,4 @@ os__supports_virtual_terminal(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF #define OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF #endif /* !defined(OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF) */ -/*[clinic end generated code: output=5bd32f18f4146963 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=3ad5455c1ea92c58 input=a9049054013a1b77]*/ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 2cf7243ed00511..3e757f522a1b79 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -1205,7 +1205,7 @@ path_cleanup(path_t *path) } static int -posix_path_converter(PyObject *o, void *p, int allow_embedded_null) +path_converter(PyObject *o, void *p) { path_t *path = (path_t *)p; PyObject *bytes = NULL; @@ -1293,7 +1293,7 @@ posix_path_converter(PyObject *o, void *p, int allow_embedded_null) FORMAT_EXCEPTION(PyExc_ValueError, "%s too long for Windows"); goto error_exit; } - if (!allow_embedded_null && wcslen(wide) != length) { + if (wcslen(wide) != length) { FORMAT_EXCEPTION(PyExc_ValueError, "embedded null character in %s"); goto error_exit; } @@ -1304,7 +1304,7 @@ posix_path_converter(PyObject *o, void *p, int allow_embedded_null) wide = NULL; goto success_exit; #else - if (!PyUnicode_FSConverterPosix(o, &bytes, allow_embedded_null)) { + if (!PyUnicode_FSConverter(o, &bytes)) { goto error_exit; } #endif @@ -1341,7 +1341,7 @@ posix_path_converter(PyObject *o, void *p, int allow_embedded_null) length = PyBytes_GET_SIZE(bytes); narrow = PyBytes_AS_STRING(bytes); - if (!allow_embedded_null && (size_t)length != strlen(narrow)) { + if ((size_t)length != strlen(narrow)) { FORMAT_EXCEPTION(PyExc_ValueError, "embedded null character in %s"); goto error_exit; } @@ -1364,7 +1364,7 @@ posix_path_converter(PyObject *o, void *p, int allow_embedded_null) FORMAT_EXCEPTION(PyExc_ValueError, "%s too long for Windows"); goto error_exit; } - if (!allow_embedded_null && wcslen(wide) != length) { + if (wcslen(wide) != length) { FORMAT_EXCEPTION(PyExc_ValueError, "embedded null character in %s"); goto error_exit; } @@ -1400,12 +1400,6 @@ posix_path_converter(PyObject *o, void *p, int allow_embedded_null) return 0; } -static int -path_converter(PyObject *o, void *p) -{ - return posix_path_converter(o, p, 0); -} - static void argument_unavailable_error(const char *function_name, const char *argument_name) { @@ -5476,27 +5470,22 @@ os__path_islink_impl(PyObject *module, PyObject *path) /*[clinic input] os._path_splitroot_ex - p: object + path: path_t -Split a pathname into drive, root and tail. [clinic start generated code]*/ static PyObject * -os__path_splitroot_ex_impl(PyObject *module, PyObject *p) -/*[clinic end generated code: output=1be3aff51db9fc0d input=df3394f511f02c51]*/ +os__path_splitroot_ex_impl(PyObject *module, path_t *path) +/*[clinic end generated code: output=4b0072b6cdf4b611 input=586b2015848e9416]*/ { Py_ssize_t len, drvsize, rootsize; PyObject *wide = NULL, *drv = NULL, *root = NULL, *tail = NULL, *result = NULL; - wchar_t *buffer = NULL; - path_t path = PATH_T_INITIALIZE("_path_splitroot_ex", "p", 0, 0); - if (!posix_path_converter(p, &path, 1)) { - goto exit; - } + const wchar_t *buffer = NULL; #ifdef MS_WINDOWS - len = path.length; - buffer = path.wide; + len = path->length; + buffer = path->wide; #else - if (!(wide = PyUnicode_DecodeFSDefaultAndSize(path.narrow, path.length)) || + if (!(wide = PyUnicode_DecodeFSDefaultAndSize(path->narrow, path->length)) || !(buffer = PyUnicode_AsWideCharString(wide, &len))) { goto exit; @@ -5509,15 +5498,18 @@ os__path_splitroot_ex_impl(PyObject *module, PyObject *p) { goto exit; } - if (PyBytes_Check(path.object)) { + if (PyBytes_Check(path->object)) { Py_SETREF(drv, PyUnicode_EncodeFSDefault(drv)); Py_SETREF(root, PyUnicode_EncodeFSDefault(root)); Py_SETREF(tail, PyUnicode_EncodeFSDefault(tail)); } result = Py_BuildValue("(OOO)", drv, root, tail); exit: - path_cleanup(&path); - PyMem_Free(buffer); +#ifndef MS_WINDOWS + if (buffer) { + PyMem_Free(buffer); + } +#endif if (drv) { Py_DECREF(drv); } From 2e1b11a6dfd857fe41c45de3680e9230b278f950 Mon Sep 17 00:00:00 2001 From: nineteendo Date: Tue, 23 Apr 2024 14:07:18 +0200 Subject: [PATCH 17/28] Revert allow embedded null --- Objects/unicodeobject.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 917d29f8771e13..ac72385d11bf4d 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -3790,7 +3790,7 @@ PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size) int -PyUnicode_FSConverterPosix(PyObject* arg, void* addr, int allow_embedded_null) +PyUnicode_FSConverter(PyObject* arg, void* addr) { PyObject *path = NULL; PyObject *output = NULL; @@ -3819,7 +3819,7 @@ PyUnicode_FSConverterPosix(PyObject* arg, void* addr, int allow_embedded_null) size = PyBytes_GET_SIZE(output); data = PyBytes_AS_STRING(output); - if (!allow_embedded_null && (size_t)size != strlen(data)) { + if ((size_t)size != strlen(data)) { PyErr_SetString(PyExc_ValueError, "embedded null byte"); Py_DECREF(output); return 0; @@ -3828,12 +3828,6 @@ PyUnicode_FSConverterPosix(PyObject* arg, void* addr, int allow_embedded_null) return Py_CLEANUP_SUPPORTED; } -int -PyUnicode_FSConverter(PyObject* arg, void* addr) -{ - return PyUnicode_FSConverterPosix(arg, addr, 0); -} - int PyUnicode_FSDecoder(PyObject* arg, void* addr) { From 92d1c951dbd2bfa5ca91ae7ca7ef6d7a6947e473 Mon Sep 17 00:00:00 2001 From: nineteendo Date: Tue, 23 Apr 2024 14:08:42 +0200 Subject: [PATCH 18/28] Revert newline --- Objects/unicodeobject.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index ac72385d11bf4d..2c259b7e869efe 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -3828,6 +3828,7 @@ PyUnicode_FSConverter(PyObject* arg, void* addr) return Py_CLEANUP_SUPPORTED; } + int PyUnicode_FSDecoder(PyObject* arg, void* addr) { From 5d35720b758d196ded1301267255cbb98dc541a0 Mon Sep 17 00:00:00 2001 From: nineteendo Date: Tue, 23 Apr 2024 14:48:43 +0200 Subject: [PATCH 19/28] cast constant --- Modules/posixmodule.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 3e757f522a1b79..129fa36330b778 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -5480,10 +5480,10 @@ os__path_splitroot_ex_impl(PyObject *module, path_t *path) { Py_ssize_t len, drvsize, rootsize; PyObject *wide = NULL, *drv = NULL, *root = NULL, *tail = NULL, *result = NULL; - const wchar_t *buffer = NULL; + wchar_t *buffer = NULL; #ifdef MS_WINDOWS len = path->length; - buffer = path->wide; + buffer = (wchar_t *)path->wide; #else if (!(wide = PyUnicode_DecodeFSDefaultAndSize(path->narrow, path->length)) || !(buffer = PyUnicode_AsWideCharString(wide, &len))) From bb9b34dbf8eaaf39bc2ef37e1c8f9806bf424906 Mon Sep 17 00:00:00 2001 From: nineteendo Date: Tue, 23 Apr 2024 15:00:44 +0200 Subject: [PATCH 20/28] Decrement ref counter --- Modules/posixmodule.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 129fa36330b778..0d3a3518ead9f6 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -5506,6 +5506,9 @@ os__path_splitroot_ex_impl(PyObject *module, path_t *path) result = Py_BuildValue("(OOO)", drv, root, tail); exit: #ifndef MS_WINDOWS + if (wide) { + Py_DECREF(wide); + } if (buffer) { PyMem_Free(buffer); } From bb64b186e6c7b85b719dea11ef35ee1f35a7d64f Mon Sep 17 00:00:00 2001 From: nineteendo Date: Tue, 23 Apr 2024 17:36:02 +0200 Subject: [PATCH 21/28] Simplify exception clause --- Lib/posixpath.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/posixpath.py b/Lib/posixpath.py index 04ca0b6204bbcf..e417b750b28680 100644 --- a/Lib/posixpath.py +++ b/Lib/posixpath.py @@ -172,7 +172,7 @@ def splitroot(p): """ try: return _path_splitroot_ex(p) - except (UnicodeEncodeError, ValueError): + except ValueError: return _splitroot_fallback(p) From 75e3a7082b734fc4d07e418a3c434e23a4850576 Mon Sep 17 00:00:00 2001 From: nineteendo Date: Wed, 24 Apr 2024 09:41:32 +0200 Subject: [PATCH 22/28] Remove cast --- Modules/posixmodule.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 0d3a3518ead9f6..5689239633c97e 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -5480,11 +5480,11 @@ os__path_splitroot_ex_impl(PyObject *module, path_t *path) { Py_ssize_t len, drvsize, rootsize; PyObject *wide = NULL, *drv = NULL, *root = NULL, *tail = NULL, *result = NULL; - wchar_t *buffer = NULL; #ifdef MS_WINDOWS len = path->length; - buffer = (wchar_t *)path->wide; + const wchar_t *buffer = path->wide; #else + wchar_t *buffer = NULL; if (!(wide = PyUnicode_DecodeFSDefaultAndSize(path->narrow, path->length)) || !(buffer = PyUnicode_AsWideCharString(wide, &len))) { From ef0ce7ff0d2dbb370e63160f7ad395b7531b7e32 Mon Sep 17 00:00:00 2001 From: nineteendo Date: Wed, 24 Apr 2024 14:47:19 +0200 Subject: [PATCH 23/28] Remove fallback --- Lib/ntpath.py | 103 ++++++++++++++++++--------------- Lib/posixpath.py | 59 ++++++++++--------- Modules/clinic/posixmodule.c.h | 15 +++-- Modules/posixmodule.c | 31 +++------- 4 files changed, 103 insertions(+), 105 deletions(-) diff --git a/Lib/ntpath.py b/Lib/ntpath.py index 7ee0caa6ba0fa8..e810b655e5ac85 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -167,53 +167,59 @@ def splitdrive(p): return drive, root + tail -def _splitroot_fallback(p): - """Split a pathname into drive, root and tail.""" - p = os.fspath(p) - if isinstance(p, bytes): - sep = b'\\' - altsep = b'/' - colon = b':' - unc_prefix = b'\\\\?\\UNC\\' - empty = b'' - else: - sep = '\\' - altsep = '/' - colon = ':' - unc_prefix = '\\\\?\\UNC\\' - empty = '' - normp = p.replace(altsep, sep) - if normp[:1] == sep: - if normp[1:2] == sep: - # UNC drives, e.g. \\server\share or \\?\UNC\server\share - # Device drives, e.g. \\.\device or \\?\device - start = 8 if normp[:8].upper() == unc_prefix else 2 - index = normp.find(sep, start) - if index == -1: - return p, empty, empty - index2 = normp.find(sep, index + 1) - if index2 == -1: - return p, empty, empty - return p[:index2], p[index2:index2 + 1], p[index2 + 1:] - else: - # Relative path with root, e.g. \Windows - return empty, p[:1], p[1:] - elif normp[1:2] == colon: - if normp[2:3] == sep: - # Absolute drive-letter path, e.g. X:\Windows - return p[:2], p[2:3], p[3:] - else: - # Relative path with drive, e.g. X:Windows - return p[:2], empty, p[2:] - else: - # Relative path, e.g. Windows - return empty, empty, p - - try: from nt import _path_splitroot_ex except ImportError: - splitroot = _splitroot_fallback + def splitroot(p): + """Split a pathname into drive, root and tail. The drive is defined + exactly as in splitdrive(). On Windows, the root may be a single path + separator or an empty string. The tail contains anything after the root. + For example: + + splitroot('//server/share/') == ('//server/share', '/', '') + splitroot('C:/Users/Barney') == ('C:', '/', 'Users/Barney') + splitroot('C:///spam///ham') == ('C:', '/', '//spam///ham') + splitroot('Windows/notepad') == ('', '', 'Windows/notepad') + """ + p = os.fspath(p) + if isinstance(p, bytes): + sep = b'\\' + altsep = b'/' + colon = b':' + unc_prefix = b'\\\\?\\UNC\\' + empty = b'' + else: + sep = '\\' + altsep = '/' + colon = ':' + unc_prefix = '\\\\?\\UNC\\' + empty = '' + normp = p.replace(altsep, sep) + if normp[:1] == sep: + if normp[1:2] == sep: + # UNC drives, e.g. \\server\share or \\?\UNC\server\share + # Device drives, e.g. \\.\device or \\?\device + start = 8 if normp[:8].upper() == unc_prefix else 2 + index = normp.find(sep, start) + if index == -1: + return p, empty, empty + index2 = normp.find(sep, index + 1) + if index2 == -1: + return p, empty, empty + return p[:index2], p[index2:index2 + 1], p[index2 + 1:] + else: + # Relative path with root, e.g. \Windows + return empty, p[:1], p[1:] + elif normp[1:2] == colon: + if normp[2:3] == sep: + # Absolute drive-letter path, e.g. X:\Windows + return p[:2], p[2:3], p[3:] + else: + # Relative path with drive, e.g. X:Windows + return p[:2], empty, p[2:] + else: + # Relative path, e.g. Windows + return empty, empty, p else: def splitroot(p): """Split a pathname into drive, root and tail. The drive is defined @@ -226,10 +232,11 @@ def splitroot(p): splitroot('C:///spam///ham') == ('C:', '/', '//spam///ham') splitroot('Windows/notepad') == ('', '', 'Windows/notepad') """ - try: - return _path_splitroot_ex(p) - except ValueError: - return _splitroot_fallback(p) + p = os.fspath(p) + if isinstance(p, bytes): + drive, root, tail = _path_splitroot_ex(os.fsdecode(p)) + return os.fsencode(drive), os.fsencode(root), os.fsencode(tail) + return _path_splitroot_ex(p) # Split a path in head (everything up to the last '/') and tail (the diff --git a/Lib/posixpath.py b/Lib/posixpath.py index e417b750b28680..56b7915826daf4 100644 --- a/Lib/posixpath.py +++ b/Lib/posixpath.py @@ -134,31 +134,36 @@ def splitdrive(p): return p[:0], p -def _splitroot_fallback(p): - """Split a pathname into drive, root and tail.""" - p = os.fspath(p) - if isinstance(p, bytes): - sep = b'/' - empty = b'' - else: - sep = '/' - empty = '' - if p[:1] != sep: - # Relative path, e.g.: 'foo' - return empty, empty, p - elif p[1:2] != sep or p[2:3] == sep: - # Absolute path, e.g.: '/foo', '///foo', '////foo', etc. - return empty, sep, p[1:] - else: - # Precisely two leading slashes, e.g.: '//foo'. Implementation defined per POSIX, see - # https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13 - return empty, p[:2], p[2:] - - try: from posix import _path_splitroot_ex except ImportError: - splitroot = _splitroot_fallback + def splitroot(p): + """Split a pathname into drive, root and tail. On Posix, drive is always + empty; the root may be empty, a single slash, or two slashes. The tail + contains anything after the root. For example: + + splitroot('foo/bar') == ('', '', 'foo/bar') + splitroot('/foo/bar') == ('', '/', 'foo/bar') + splitroot('//foo/bar') == ('', '//', 'foo/bar') + splitroot('///foo/bar') == ('', '/', '//foo/bar') + """ + p = os.fspath(p) + if isinstance(p, bytes): + sep = b'/' + empty = b'' + else: + sep = '/' + empty = '' + if p[:1] != sep: + # Relative path, e.g.: 'foo' + return empty, empty, p + elif p[1:2] != sep or p[2:3] == sep: + # Absolute path, e.g.: '/foo', '///foo', '////foo', etc. + return empty, sep, p[1:] + else: + # Precisely two leading slashes, e.g.: '//foo'. Implementation defined per POSIX, see + # https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap04.html#tag_04_13 + return empty, p[:2], p[2:] else: def splitroot(p): """Split a pathname into drive, root and tail. On Posix, drive is always @@ -170,10 +175,12 @@ def splitroot(p): splitroot('//foo/bar') == ('', '//', 'foo/bar') splitroot('///foo/bar') == ('', '/', '//foo/bar') """ - try: - return _path_splitroot_ex(p) - except ValueError: - return _splitroot_fallback(p) + p = os.fspath(p) + if isinstance(p, bytes): + # Optimisation: the drive is always empty + _, root, tail = _path_splitroot_ex(os.fsdecode(p)) + return b'', os.fsencode(root), os.fsencode(tail) + return _path_splitroot_ex(p) # Return the tail (basename) part of a path, same as split(path)[1]. diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index bc6c3269ffff8c..a0d1f3238a6733 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -2257,7 +2257,7 @@ PyDoc_STRVAR(os__path_splitroot_ex__doc__, {"_path_splitroot_ex", _PyCFunction_CAST(os__path_splitroot_ex), METH_FASTCALL|METH_KEYWORDS, os__path_splitroot_ex__doc__}, static PyObject * -os__path_splitroot_ex_impl(PyObject *module, path_t *path); +os__path_splitroot_ex_impl(PyObject *module, PyObject *path); static PyObject * os__path_splitroot_ex(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) @@ -2289,21 +2289,20 @@ os__path_splitroot_ex(PyObject *module, PyObject *const *args, Py_ssize_t nargs, }; #undef KWTUPLE PyObject *argsbuf[1]; - path_t path = PATH_T_INITIALIZE("_path_splitroot_ex", "path", 0, 0); + PyObject *path; args = _PyArg_UnpackKeywords(args, nargs, NULL, kwnames, &_parser, 1, 1, 0, argsbuf); if (!args) { goto exit; } - if (!path_converter(args[0], &path)) { + if (!PyUnicode_Check(args[0])) { + _PyArg_BadArgument("_path_splitroot_ex", "argument 'path'", "str", args[0]); goto exit; } - return_value = os__path_splitroot_ex_impl(module, &path); + path = args[0]; + return_value = os__path_splitroot_ex_impl(module, path); exit: - /* Cleanup for path */ - path_cleanup(&path); - return return_value; } @@ -12661,4 +12660,4 @@ os__supports_virtual_terminal(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF #define OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF #endif /* !defined(OS__SUPPORTS_VIRTUAL_TERMINAL_METHODDEF) */ -/*[clinic end generated code: output=3ad5455c1ea92c58 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=c4698b47007cd6eb input=a9049054013a1b77]*/ diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 5689239633c97e..fed61fde2a5f30 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -5470,27 +5470,22 @@ os__path_islink_impl(PyObject *module, PyObject *path) /*[clinic input] os._path_splitroot_ex - path: path_t + path: unicode [clinic start generated code]*/ static PyObject * -os__path_splitroot_ex_impl(PyObject *module, path_t *path) -/*[clinic end generated code: output=4b0072b6cdf4b611 input=586b2015848e9416]*/ +os__path_splitroot_ex_impl(PyObject *module, PyObject *path) +/*[clinic end generated code: output=de97403d3dfebc40 input=f1470e12d899f9ac]*/ { Py_ssize_t len, drvsize, rootsize; - PyObject *wide = NULL, *drv = NULL, *root = NULL, *tail = NULL, *result = NULL; -#ifdef MS_WINDOWS - len = path->length; - const wchar_t *buffer = path->wide; -#else - wchar_t *buffer = NULL; - if (!(wide = PyUnicode_DecodeFSDefaultAndSize(path->narrow, path->length)) || - !(buffer = PyUnicode_AsWideCharString(wide, &len))) - { + PyObject *drv = NULL, *root = NULL, *tail = NULL, *result = NULL; + + wchar_t *buffer = PyUnicode_AsWideCharString(path, &len); + if (!buffer) { goto exit; } -#endif + _Py_skiproot(buffer, len, &drvsize, &rootsize); if (!(drv = PyUnicode_FromWideChar(buffer, drvsize)) || !(root = PyUnicode_FromWideChar(&buffer[drvsize], rootsize)) || @@ -5498,21 +5493,11 @@ os__path_splitroot_ex_impl(PyObject *module, path_t *path) { goto exit; } - if (PyBytes_Check(path->object)) { - Py_SETREF(drv, PyUnicode_EncodeFSDefault(drv)); - Py_SETREF(root, PyUnicode_EncodeFSDefault(root)); - Py_SETREF(tail, PyUnicode_EncodeFSDefault(tail)); - } result = Py_BuildValue("(OOO)", drv, root, tail); exit: -#ifndef MS_WINDOWS - if (wide) { - Py_DECREF(wide); - } if (buffer) { PyMem_Free(buffer); } -#endif if (drv) { Py_DECREF(drv); } From df9f974bc7ac030e15c827fd141201366fe0bf36 Mon Sep 17 00:00:00 2001 From: Nice Zombies Date: Wed, 24 Apr 2024 22:34:35 +0200 Subject: [PATCH 24/28] Update Modules/posixmodule.c Co-authored-by: Erlend E. Aasland --- Modules/posixmodule.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index fed61fde2a5f30..4b9c3d90f28979 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -5495,18 +5495,10 @@ os__path_splitroot_ex_impl(PyObject *module, PyObject *path) } result = Py_BuildValue("(OOO)", drv, root, tail); exit: - if (buffer) { - PyMem_Free(buffer); - } - if (drv) { - Py_DECREF(drv); - } - if (root) { - Py_DECREF(root); - } - if (tail) { - Py_DECREF(tail); - } + PyMem_Free(buffer); + Py_XDECREF(drv); + Py_DECREF(root); + Py_DECREF(tail); return result; } From 9cd7951d6fbc40e9da41191630a1fb4b6b8a4bac Mon Sep 17 00:00:00 2001 From: Nice Zombies Date: Wed, 24 Apr 2024 23:03:44 +0200 Subject: [PATCH 25/28] Update Modules/posixmodule.c Co-authored-by: Erlend E. Aasland --- Modules/posixmodule.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 4b9c3d90f28979..49dd7b4d6bea7b 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -5487,10 +5487,17 @@ os__path_splitroot_ex_impl(PyObject *module, PyObject *path) } _Py_skiproot(buffer, len, &drvsize, &rootsize); - if (!(drv = PyUnicode_FromWideChar(buffer, drvsize)) || - !(root = PyUnicode_FromWideChar(&buffer[drvsize], rootsize)) || - !(tail = PyUnicode_FromWideChar(&buffer[drvsize + rootsize], len - drvsize - rootsize))) - { + drv = PyUnicode_FromWideChar(buffer, drvsize); + if (drv == NULL) { + goto exit; + } + root = PyUnicode_FromWideChar(&buffer[drvsize], rootsize); + if (root == NULL) { + goto exit; + } + tail = PyUnicode_FromWideChar(&buffer[drvsize + rootsize], + len - drvsize - rootsize); + if (tail == NULL) { goto exit; } result = Py_BuildValue("(OOO)", drv, root, tail); From f87f82bf6ec2b8bd780dc05b6f43f4523e40ae41 Mon Sep 17 00:00:00 2001 From: Nice Zombies Date: Wed, 24 Apr 2024 23:10:25 +0200 Subject: [PATCH 26/28] Update Python/fileutils.c Co-authored-by: Eryk Sun --- Python/fileutils.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/Python/fileutils.c b/Python/fileutils.c index 0afc744a311578..058a281c25d1c7 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -2327,8 +2327,10 @@ _Py_skiproot(const wchar_t *path, Py_ssize_t size, Py_ssize_t *drvsize, Py_ssize // Device drives, e.g. \\.\device or \\?\device // UNC drives, e.g. \\server\share or \\?\UNC\server\share Py_ssize_t idx; - if (path[2] == L'?' && IS_SEP(&path[3]) && (path[4] == L'U' || path[4] == L'u') && - (path[5] == L'N' || path[5] == L'n') && (path[6] == L'C' || path[6] == L'c') && + if (path[2] == L'?' && IS_SEP(&path[3]) && + (path[4] == L'U' || path[4] == L'u') && + (path[5] == L'N' || path[5] == L'n') && + (path[6] == L'C' || path[6] == L'c') && IS_SEP(&path[7])) { idx = 8; From 62a42fb5b64ec492e2dc16011f292e724b803e28 Mon Sep 17 00:00:00 2001 From: nineteendo Date: Wed, 24 Apr 2024 23:18:49 +0200 Subject: [PATCH 27/28] Follow pep 7 --- Python/fileutils.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Python/fileutils.c b/Python/fileutils.c index 058a281c25d1c7..54853ba2f75d9d 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -2296,7 +2296,8 @@ PathCchCombineEx(wchar_t *buffer, size_t bufsize, const wchar_t *dirname, #endif /* defined(MS_WINDOWS_GAMES) && !defined(MS_WINDOWS_DESKTOP) */ void -_Py_skiproot(const wchar_t *path, Py_ssize_t size, Py_ssize_t *drvsize, Py_ssize_t *rootsize) +_Py_skiproot(const wchar_t *path, Py_ssize_t size, Py_ssize_t *drvsize, + Py_ssize_t *rootsize) { assert(drvsize); assert(rootsize); From 6a74f15a0d840f00e62c71c36ea6c169a2b4dab1 Mon Sep 17 00:00:00 2001 From: Nice Zombies Date: Thu, 25 Apr 2024 01:40:20 +0200 Subject: [PATCH 28/28] Update Modules/posixmodule.c Co-authored-by: Eryk Sun --- Modules/posixmodule.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 49dd7b4d6bea7b..c9d67ccbb8c908 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -5504,8 +5504,8 @@ os__path_splitroot_ex_impl(PyObject *module, PyObject *path) exit: PyMem_Free(buffer); Py_XDECREF(drv); - Py_DECREF(root); - Py_DECREF(tail); + Py_XDECREF(root); + Py_XDECREF(tail); return result; }