From 060cd714f798d94fc694e8ee9fc52fdc84430d24 Mon Sep 17 00:00:00 2001 From: AN Long Date: Sun, 20 Mar 2022 23:45:22 +0800 Subject: [PATCH 01/17] Add _winapi.LCMapStringEx --- Modules/_winapi.c | 54 ++++++++++++++++++++++++++++++++++++++ Modules/clinic/_winapi.c.h | 41 +++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+) diff --git a/Modules/_winapi.c b/Modules/_winapi.c index 3e24d512cac384..94de2377c200f0 100644 --- a/Modules/_winapi.c +++ b/Modules/_winapi.c @@ -1512,6 +1512,29 @@ _winapi_PeekNamedPipe_impl(PyObject *module, HANDLE handle, int size) } } +/*[clinic input] +_winapi.LCMapStringEx + + locale: LPCWSTR + flags: DWORD + src: LPCWSTR + +[clinic start generated code]*/ + +static PyObject * +_winapi_LCMapStringEx_impl(PyObject *module, LPCWSTR locale, DWORD flags, + LPCWSTR src) +/*[clinic end generated code: output=cf4713d80e2b47c9 input=9fe26f95d5ab0001]*/ +{ + + int dest_size = LCMapStringEx(locale, flags, src, wcslen(src), NULL, 0, NULL, NULL, 0); + wchar_t* dest = PyMem_NEW(wchar_t, dest_size); + LCMapStringEx(locale, flags, src, wcslen(src), dest, dest_size, NULL, NULL, 0); + PyObject *ret = PyUnicode_FromWideChar(dest, dest_size); + PyMem_DEL(dest); + return ret; +} + /*[clinic input] _winapi.ReadFile @@ -2023,6 +2046,7 @@ static PyMethodDef winapi_functions[] = { _WINAPI_OPENFILEMAPPING_METHODDEF _WINAPI_OPENPROCESS_METHODDEF _WINAPI_PEEKNAMEDPIPE_METHODDEF + _WINAPI_LCMAPSTRINGEX_METHODDEF _WINAPI_READFILE_METHODDEF _WINAPI_SETNAMEDPIPEHANDLESTATE_METHODDEF _WINAPI_TERMINATEPROCESS_METHODDEF @@ -2160,6 +2184,36 @@ static int winapi_exec(PyObject *m) WINAPI_CONSTANT(F_DWORD, FILE_TYPE_PIPE); WINAPI_CONSTANT(F_DWORD, FILE_TYPE_REMOTE); + WINAPI_CONSTANT("u", LOCALE_NAME_INVARIANT); + WINAPI_CONSTANT(F_DWORD, LOCALE_NAME_MAX_LENGTH); + WINAPI_CONSTANT("u", LOCALE_NAME_SYSTEM_DEFAULT); + WINAPI_CONSTANT("u", LOCALE_NAME_USER_DEFAULT); + + WINAPI_CONSTANT(F_DWORD, LCMAP_BYTEREV); + WINAPI_CONSTANT(F_DWORD, LCMAP_FULLWIDTH); + WINAPI_CONSTANT(F_DWORD, LCMAP_HALFWIDTH); + WINAPI_CONSTANT(F_DWORD, LCMAP_HIRAGANA); + WINAPI_CONSTANT(F_DWORD, LCMAP_KATAKANA); + WINAPI_CONSTANT(F_DWORD, LCMAP_LINGUISTIC_CASING); + WINAPI_CONSTANT(F_DWORD, LCMAP_LOWERCASE); + WINAPI_CONSTANT(F_DWORD, LCMAP_HASH); + WINAPI_CONSTANT(F_DWORD, LCMAP_SIMPLIFIED_CHINESE); + WINAPI_CONSTANT(F_DWORD, LCMAP_SORTHANDLE); + WINAPI_CONSTANT(F_DWORD, LCMAP_SORTKEY); + WINAPI_CONSTANT(F_DWORD, LCMAP_TITLECASE); + WINAPI_CONSTANT(F_DWORD, LCMAP_TRADITIONAL_CHINESE); + WINAPI_CONSTANT(F_DWORD, LCMAP_UPPERCASE); + WINAPI_CONSTANT(F_DWORD, NORM_IGNORENONSPACE); + WINAPI_CONSTANT(F_DWORD, NORM_IGNORESYMBOLS); + WINAPI_CONSTANT(F_DWORD, LINGUISTIC_IGNORECASE); + WINAPI_CONSTANT(F_DWORD, LINGUISTIC_IGNOREDIACRITIC); + WINAPI_CONSTANT(F_DWORD, NORM_IGNORECASE); + WINAPI_CONSTANT(F_DWORD, NORM_IGNOREKANATYPE); + WINAPI_CONSTANT(F_DWORD, NORM_IGNOREWIDTH); + WINAPI_CONSTANT(F_DWORD, NORM_LINGUISTIC_CASING); + WINAPI_CONSTANT(F_DWORD, SORT_DIGITSASNUMBERS); + WINAPI_CONSTANT(F_DWORD, SORT_STRINGSORT); + WINAPI_CONSTANT("i", NULL); return 0; diff --git a/Modules/clinic/_winapi.c.h b/Modules/clinic/_winapi.c.h index 4d89888af9054a..9f73c11785c704 100644 --- a/Modules/clinic/_winapi.c.h +++ b/Modules/clinic/_winapi.c.h @@ -820,6 +820,47 @@ _winapi_PeekNamedPipe(PyObject *module, PyObject *const *args, Py_ssize_t nargs) return return_value; } +PyDoc_STRVAR(_winapi_LCMapStringEx__doc__, +"LCMapStringEx($module, /, locale, flags, src)\n" +"--\n" +"\n"); + +#define _WINAPI_LCMAPSTRINGEX_METHODDEF \ + {"LCMapStringEx", (PyCFunction)(void(*)(void))_winapi_LCMapStringEx, METH_FASTCALL|METH_KEYWORDS, _winapi_LCMapStringEx__doc__}, + +static PyObject * +_winapi_LCMapStringEx_impl(PyObject *module, LPCWSTR locale, DWORD flags, + LPCWSTR src); + +static PyObject * +_winapi_LCMapStringEx(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kwnames) +{ + PyObject *return_value = NULL; + static const char * const _keywords[] = {"locale", "flags", "src", NULL}; + static _PyArg_Parser _parser = {"O&kO&:LCMapStringEx", _keywords, 0}; + LPCWSTR locale; + DWORD flags; + LPCWSTR src; + + if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser, + _PyUnicode_WideCharString_Converter, &locale, &flags, _PyUnicode_WideCharString_Converter, &src)) { + goto exit; + } + return_value = _winapi_LCMapStringEx_impl(module, locale, flags, src); + +exit: + /* Cleanup for locale */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)locale); + #endif /* USE_UNICODE_WCHAR_CACHE */ + /* Cleanup for src */ + #if !USE_UNICODE_WCHAR_CACHE + PyMem_Free((void *)src); + #endif /* USE_UNICODE_WCHAR_CACHE */ + + return return_value; +} + PyDoc_STRVAR(_winapi_ReadFile__doc__, "ReadFile($module, /, handle, size, overlapped=False)\n" "--\n" From 6ecbc486a6bb4b05643c8b06e8533c899d5a7bc7 Mon Sep 17 00:00:00 2001 From: AN Long Date: Sun, 20 Mar 2022 23:45:44 +0800 Subject: [PATCH 02/17] Using _winapi.LCMapStringEx in ntpath.normcase --- Lib/ntpath.py | 9 +++++++-- Lib/test/test_ntpath.py | 1 + 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/Lib/ntpath.py b/Lib/ntpath.py index 041ebc75cb127c..98fc6553d62deb 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -17,6 +17,7 @@ defpath = '.;C:\\bin' devnull = 'nul' +import _winapi import os import sys import stat @@ -47,9 +48,13 @@ def normcase(s): Makes all characters lowercase and all slashes into backslashes.""" s = os.fspath(s) if isinstance(s, bytes): - return s.replace(b'/', b'\\').lower() + return _winapi.LCMapStringEx(_winapi.LOCALE_NAME_INVARIANT, + _winapi.LCMAP_LOWERCASE, + s.replace(b'/', b'\\')) else: - return s.replace('/', '\\').lower() + return _winapi.LCMapStringEx(_winapi.LOCALE_NAME_INVARIANT, + _winapi.LCMAP_LOWERCASE, + s.replace('/', '\\')) # Return whether a path is absolute. diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py index 7211ed861762b4..9799400a716447 100644 --- a/Lib/test/test_ntpath.py +++ b/Lib/test/test_ntpath.py @@ -852,6 +852,7 @@ def _check_function(self, func): def test_path_normcase(self): self._check_function(self.path.normcase) + self.assertEqual(ntpath.normcase('\u03a9\u2126'), 'ωΩ') def test_path_isabs(self): self._check_function(self.path.isabs) From c1429b90227ef69647fd29c74e736dbced913e4b Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Sun, 20 Mar 2022 15:47:37 +0000 Subject: [PATCH 03/17] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20b?= =?UTF-8?q?lurb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst diff --git a/Misc/NEWS.d/next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst b/Misc/NEWS.d/next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst new file mode 100644 index 00000000000000..91816de244acad --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst @@ -0,0 +1,3 @@ +Using Windows function `LCMapStringEx` in `ntpath.normcase` instead of `str.lower` to deal with non ASCII characters. + +The `LCMapStringEx` is added in `_winapi` module also. From 81be381e08bb4394258aff96a329eba20d552cc1 Mon Sep 17 00:00:00 2001 From: AN Long Date: Tue, 22 Mar 2022 00:30:05 +0800 Subject: [PATCH 04/17] Changes from review --- Lib/ntpath.py | 33 +++++++++++++++++++++++---------- Modules/_winapi.c | 42 ++++++++++++++++++++++++++++++++++++------ 2 files changed, 59 insertions(+), 16 deletions(-) diff --git a/Lib/ntpath.py b/Lib/ntpath.py index 98fc6553d62deb..39d0cdbff1001a 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -17,13 +17,34 @@ defpath = '.;C:\\bin' devnull = 'nul' -import _winapi import os import sys import stat import genericpath from genericpath import * + +try: + import _winapi + + def _normcase(s): + is_bytes = isinstance(s, bytes) + if is_bytes: + s = os.fsdecode(s) + s = _winapi.LCMapStringEx(_winapi.LOCALE_NAME_INVARIANT, + _winapi.LCMAP_LOWERCASE, + s.replace('/', '\\')) + if is_bytes: + s = os.fsencode(s) + return s +except ImportError: + def _normcase(s): + if isinstance(s, bytes): + return s.replace(b'/', b'\\').lower() + return s.replace('/', '\\').lower() + + + __all__ = ["normcase","isabs","join","splitdrive","split","splitext", "basename","dirname","commonprefix","getsize","getmtime", "getatime","getctime", "islink","exists","lexists","isdir","isfile", @@ -46,15 +67,7 @@ def normcase(s): """Normalize case of pathname. Makes all characters lowercase and all slashes into backslashes.""" - s = os.fspath(s) - if isinstance(s, bytes): - return _winapi.LCMapStringEx(_winapi.LOCALE_NAME_INVARIANT, - _winapi.LCMAP_LOWERCASE, - s.replace(b'/', b'\\')) - else: - return _winapi.LCMapStringEx(_winapi.LOCALE_NAME_INVARIANT, - _winapi.LCMAP_LOWERCASE, - s.replace('/', '\\')) + return _normcase(os.fspath(s)) # Return whether a path is absolute. diff --git a/Modules/_winapi.c b/Modules/_winapi.c index 94de2377c200f0..a10649c06cab54 100644 --- a/Modules/_winapi.c +++ b/Modules/_winapi.c @@ -1526,12 +1526,46 @@ _winapi_LCMapStringEx_impl(PyObject *module, LPCWSTR locale, DWORD flags, LPCWSTR src) /*[clinic end generated code: output=cf4713d80e2b47c9 input=9fe26f95d5ab0001]*/ { + if (flags & LCMAP_SORTHANDLE) { + PyErr_SetString(PyExc_ValueError, "LCMAP_SORTHANDLE is not supported"); + return NULL; + } + if (flags & LCMAP_HASH) { + PyErr_SetString(PyExc_ValueError, "LCMAP_HASH is not supported"); + return NULL; + } + if (flags & LCMAP_BYTEREV) { + PyErr_SetString(PyExc_ValueError, "LCMAP_BYTEREV is not supported"); + return NULL; + } + if (flags & LCMAP_SORTKEY) { + PyErr_SetString(PyExc_ValueError, "LCMAP_SORTKEY is not supported"); + return NULL; + } + + int dest_size = LCMapStringEx(locale, flags, src, wcslen(src), NULL, 0, + NULL, NULL, 0); + if (dest_size == 0) { + PyErr_SetFromWindowsErr(0); + return NULL; + } - int dest_size = LCMapStringEx(locale, flags, src, wcslen(src), NULL, 0, NULL, NULL, 0); wchar_t* dest = PyMem_NEW(wchar_t, dest_size); - LCMapStringEx(locale, flags, src, wcslen(src), dest, dest_size, NULL, NULL, 0); + if (dest == NULL) { + return PyErr_NoMemory(); + } + + int nmapped = LCMapStringEx(locale, flags, src, wcslen(src), dest, dest_size, + NULL, NULL, 0); + if (nmapped == 0) { + PyErr_SetFromWindowsErr(0); + PyMem_DEL(dest); + return NULL; + } + PyObject *ret = PyUnicode_FromWideChar(dest, dest_size); PyMem_DEL(dest); + return ret; } @@ -2189,17 +2223,13 @@ static int winapi_exec(PyObject *m) WINAPI_CONSTANT("u", LOCALE_NAME_SYSTEM_DEFAULT); WINAPI_CONSTANT("u", LOCALE_NAME_USER_DEFAULT); - WINAPI_CONSTANT(F_DWORD, LCMAP_BYTEREV); WINAPI_CONSTANT(F_DWORD, LCMAP_FULLWIDTH); WINAPI_CONSTANT(F_DWORD, LCMAP_HALFWIDTH); WINAPI_CONSTANT(F_DWORD, LCMAP_HIRAGANA); WINAPI_CONSTANT(F_DWORD, LCMAP_KATAKANA); WINAPI_CONSTANT(F_DWORD, LCMAP_LINGUISTIC_CASING); WINAPI_CONSTANT(F_DWORD, LCMAP_LOWERCASE); - WINAPI_CONSTANT(F_DWORD, LCMAP_HASH); WINAPI_CONSTANT(F_DWORD, LCMAP_SIMPLIFIED_CHINESE); - WINAPI_CONSTANT(F_DWORD, LCMAP_SORTHANDLE); - WINAPI_CONSTANT(F_DWORD, LCMAP_SORTKEY); WINAPI_CONSTANT(F_DWORD, LCMAP_TITLECASE); WINAPI_CONSTANT(F_DWORD, LCMAP_TRADITIONAL_CHINESE); WINAPI_CONSTANT(F_DWORD, LCMAP_UPPERCASE); From 6108d201387aaedae8eca9635fa6e7768d1dc3de Mon Sep 17 00:00:00 2001 From: AN Long Date: Tue, 22 Mar 2022 00:50:53 +0800 Subject: [PATCH 05/17] Update for comments --- .../2022-03-20-15-47-35.bpo-42658.16eXtb.rst | 4 +-- Modules/_winapi.c | 25 +++++-------------- 2 files changed, 7 insertions(+), 22 deletions(-) diff --git a/Misc/NEWS.d/next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst b/Misc/NEWS.d/next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst index 91816de244acad..a825aa4ce30311 100644 --- a/Misc/NEWS.d/next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst +++ b/Misc/NEWS.d/next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst @@ -1,3 +1 @@ -Using Windows function `LCMapStringEx` in `ntpath.normcase` instead of `str.lower` to deal with non ASCII characters. - -The `LCMapStringEx` is added in `_winapi` module also. +Using Windows function `LCMapStringEx` in `ntpath.normcase` instead of `str.lower` to deal with non ASCII characters. The `LCMapStringEx` is added in `_winapi` module also. diff --git a/Modules/_winapi.c b/Modules/_winapi.c index a10649c06cab54..085f43c58b9dca 100644 --- a/Modules/_winapi.c +++ b/Modules/_winapi.c @@ -1526,28 +1526,15 @@ _winapi_LCMapStringEx_impl(PyObject *module, LPCWSTR locale, DWORD flags, LPCWSTR src) /*[clinic end generated code: output=cf4713d80e2b47c9 input=9fe26f95d5ab0001]*/ { - if (flags & LCMAP_SORTHANDLE) { - PyErr_SetString(PyExc_ValueError, "LCMAP_SORTHANDLE is not supported"); - return NULL; - } - if (flags & LCMAP_HASH) { - PyErr_SetString(PyExc_ValueError, "LCMAP_HASH is not supported"); - return NULL; - } - if (flags & LCMAP_BYTEREV) { - PyErr_SetString(PyExc_ValueError, "LCMAP_BYTEREV is not supported"); - return NULL; - } - if (flags & LCMAP_SORTKEY) { - PyErr_SetString(PyExc_ValueError, "LCMAP_SORTKEY is not supported"); - return NULL; + if (flags & (LCMAP_SORTHANDLE | LCMAP_HASH | LCMAP_BYTEREV | + LCMAP_SORTKEY)) { + return PyErr_Format(PyExc_ValueError, "unsupported flags"); } int dest_size = LCMapStringEx(locale, flags, src, wcslen(src), NULL, 0, NULL, NULL, 0); if (dest_size == 0) { - PyErr_SetFromWindowsErr(0); - return NULL; + return PyErr_SetFromWindowsErr(0); } wchar_t* dest = PyMem_NEW(wchar_t, dest_size); @@ -1558,9 +1545,9 @@ _winapi_LCMapStringEx_impl(PyObject *module, LPCWSTR locale, DWORD flags, int nmapped = LCMapStringEx(locale, flags, src, wcslen(src), dest, dest_size, NULL, NULL, 0); if (nmapped == 0) { - PyErr_SetFromWindowsErr(0); + DWORD error = GetLastError(); PyMem_DEL(dest); - return NULL; + return PyErr_SetFromWindowsErr(error); } PyObject *ret = PyUnicode_FromWideChar(dest, dest_size); From 3b4a5cb1bdd3723904de3eeb89b34e1611feb3f6 Mon Sep 17 00:00:00 2001 From: AN Long Date: Tue, 22 Mar 2022 22:49:53 +0800 Subject: [PATCH 06/17] Update for review and fix tests --- Lib/ntpath.py | 60 ++++++++++++++++++++++++----------------- Lib/test/test_ntpath.py | 3 ++- 2 files changed, 37 insertions(+), 26 deletions(-) diff --git a/Lib/ntpath.py b/Lib/ntpath.py index 39d0cdbff1001a..526816a27fbbf4 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -24,27 +24,6 @@ from genericpath import * -try: - import _winapi - - def _normcase(s): - is_bytes = isinstance(s, bytes) - if is_bytes: - s = os.fsdecode(s) - s = _winapi.LCMapStringEx(_winapi.LOCALE_NAME_INVARIANT, - _winapi.LCMAP_LOWERCASE, - s.replace('/', '\\')) - if is_bytes: - s = os.fsencode(s) - return s -except ImportError: - def _normcase(s): - if isinstance(s, bytes): - return s.replace(b'/', b'\\').lower() - return s.replace('/', '\\').lower() - - - __all__ = ["normcase","isabs","join","splitdrive","split","splitext", "basename","dirname","commonprefix","getsize","getmtime", "getatime","getctime", "islink","exists","lexists","isdir","isfile", @@ -63,11 +42,42 @@ def _get_bothseps(path): # Other normalizations (such as optimizing '../' away) are not done # (this is done by normpath). -def normcase(s): - """Normalize case of pathname. +try: + from _winapi import ( + LCMapStringEx as _LCMapStringEx, + LOCALE_NAME_INVARIANT as _LOCALE_NAME_INVARIANT, + LCMAP_LOWERCASE as _LCMAP_LOWERCASE) - Makes all characters lowercase and all slashes into backslashes.""" - return _normcase(os.fspath(s)) + def normcase(s): + """Normalize case of pathname. + + Makes all characters lowercase and all slashes into backslashes. + """ + if isinstance(s, bytes): + if s == b'': + return s + s = os.fsdecode(s).replace('/', '\\') + return os.fsencode(_LCMapStringEx(_LOCALE_NAME_INVARIANT, + _LCMAP_LOWERCASE, s)) + else: + if isinstance(s, str) and s == '': + return s + s = os.fspath(s) + return _LCMapStringEx(_LOCALE_NAME_INVARIANT, + _LCMAP_LOWERCASE, + s.replace('/', '\\')) + # else: + # msg = f"expected str, bytes or os.PathLike object, not {type(s)}" + # raise TypeError(msg) +except ImportError: + def normcase(s): + """Normalize case of pathname. + + Makes all characters lowercase and all slashes into backslashes. + """ + if isinstance(s, bytes): + return os.fsencode(os.fsdecode(s).replace('/', '\\').lower()) + return s.replace('/', '\\').lower() # Return whether a path is absolute. diff --git a/Lib/test/test_ntpath.py b/Lib/test/test_ntpath.py index 9799400a716447..ab3603bdd73015 100644 --- a/Lib/test/test_ntpath.py +++ b/Lib/test/test_ntpath.py @@ -852,7 +852,8 @@ def _check_function(self, func): def test_path_normcase(self): self._check_function(self.path.normcase) - self.assertEqual(ntpath.normcase('\u03a9\u2126'), 'ωΩ') + if sys.platform == 'win32': + self.assertEqual(ntpath.normcase('\u03a9\u2126'), 'ωΩ') def test_path_isabs(self): self._check_function(self.path.isabs) From ef669415bd25da81c2ae835530f94dea68bbd8a0 Mon Sep 17 00:00:00 2001 From: AN Long Date: Wed, 23 Mar 2022 22:18:55 +0800 Subject: [PATCH 07/17] Update by review comment --- Lib/ntpath.py | 11 +++-------- Modules/_winapi.c | 6 +++--- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/Lib/ntpath.py b/Lib/ntpath.py index 526816a27fbbf4..8e7c5350b6f61e 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -53,22 +53,17 @@ def normcase(s): Makes all characters lowercase and all slashes into backslashes. """ + s = os.fspath(s) + if not s: + return s if isinstance(s, bytes): - if s == b'': - return s s = os.fsdecode(s).replace('/', '\\') return os.fsencode(_LCMapStringEx(_LOCALE_NAME_INVARIANT, _LCMAP_LOWERCASE, s)) else: - if isinstance(s, str) and s == '': - return s - s = os.fspath(s) return _LCMapStringEx(_LOCALE_NAME_INVARIANT, _LCMAP_LOWERCASE, s.replace('/', '\\')) - # else: - # msg = f"expected str, bytes or os.PathLike object, not {type(s)}" - # raise TypeError(msg) except ImportError: def normcase(s): """Normalize case of pathname. diff --git a/Modules/_winapi.c b/Modules/_winapi.c index 085f43c58b9dca..9952ef4b7da725 100644 --- a/Modules/_winapi.c +++ b/Modules/_winapi.c @@ -1531,7 +1531,7 @@ _winapi_LCMapStringEx_impl(PyObject *module, LPCWSTR locale, DWORD flags, return PyErr_Format(PyExc_ValueError, "unsupported flags"); } - int dest_size = LCMapStringEx(locale, flags, src, wcslen(src), NULL, 0, + int dest_size = LCMapStringEx(locale, flags, src, -1, NULL, 0, NULL, NULL, 0); if (dest_size == 0) { return PyErr_SetFromWindowsErr(0); @@ -1542,7 +1542,7 @@ _winapi_LCMapStringEx_impl(PyObject *module, LPCWSTR locale, DWORD flags, return PyErr_NoMemory(); } - int nmapped = LCMapStringEx(locale, flags, src, wcslen(src), dest, dest_size, + int nmapped = LCMapStringEx(locale, flags, src, -1, dest, dest_size, NULL, NULL, 0); if (nmapped == 0) { DWORD error = GetLastError(); @@ -1550,7 +1550,7 @@ _winapi_LCMapStringEx_impl(PyObject *module, LPCWSTR locale, DWORD flags, return PyErr_SetFromWindowsErr(error); } - PyObject *ret = PyUnicode_FromWideChar(dest, dest_size); + PyObject *ret = PyUnicode_FromWideChar(dest, dest_size - 1); PyMem_DEL(dest); return ret; From 6c0366ba0b0155c913a7fb1c76baf0c4ec1150d8 Mon Sep 17 00:00:00 2001 From: AN Long Date: Sat, 26 Mar 2022 00:19:27 +0800 Subject: [PATCH 08/17] Update for review comments --- Lib/ntpath.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Lib/ntpath.py b/Lib/ntpath.py index 8e7c5350b6f61e..49b530ac5572b4 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -57,9 +57,11 @@ def normcase(s): if not s: return s if isinstance(s, bytes): - s = os.fsdecode(s).replace('/', '\\') - return os.fsencode(_LCMapStringEx(_LOCALE_NAME_INVARIANT, - _LCMAP_LOWERCASE, s)) + encoding = sys.getfilesystemencoding() + s = s.decode(encoding, 'surrogateescape').replace('/', '\\') + s = _LCMapStringEx(_LOCALE_NAME_INVARIANT, + _LCMAP_LOWERCASE, s) + return s.encode(encoding, 'surrogateescape') else: return _LCMapStringEx(_LOCALE_NAME_INVARIANT, _LCMAP_LOWERCASE, From 6c6f2e2b92e11add98a051cf3532ea78a577678b Mon Sep 17 00:00:00 2001 From: AN Long Date: Sat, 26 Mar 2022 00:38:17 +0800 Subject: [PATCH 09/17] fix news --- .../next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst b/Misc/NEWS.d/next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst index a825aa4ce30311..2cbff01fbc3f6c 100644 --- a/Misc/NEWS.d/next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst +++ b/Misc/NEWS.d/next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst @@ -1 +1 @@ -Using Windows function `LCMapStringEx` in `ntpath.normcase` instead of `str.lower` to deal with non ASCII characters. The `LCMapStringEx` is added in `_winapi` module also. +Using Windows function ``LCMapStringEx`` in ``ntpath.normcase`` instead of ``str.lower`` to deal with non ASCII characters. The ``LCMapStringEx`` is added in ``_winapi`` module also. From 15749b86409bfd7d1896832ee4f535862c660a70 Mon Sep 17 00:00:00 2001 From: AN Long Date: Sat, 26 Mar 2022 23:30:35 +0800 Subject: [PATCH 10/17] Remove constants which can not been used --- Modules/_winapi.c | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/Modules/_winapi.c b/Modules/_winapi.c index 9952ef4b7da725..4845b4e6d4ad7c 100644 --- a/Modules/_winapi.c +++ b/Modules/_winapi.c @@ -2220,16 +2220,6 @@ static int winapi_exec(PyObject *m) WINAPI_CONSTANT(F_DWORD, LCMAP_TITLECASE); WINAPI_CONSTANT(F_DWORD, LCMAP_TRADITIONAL_CHINESE); WINAPI_CONSTANT(F_DWORD, LCMAP_UPPERCASE); - WINAPI_CONSTANT(F_DWORD, NORM_IGNORENONSPACE); - WINAPI_CONSTANT(F_DWORD, NORM_IGNORESYMBOLS); - WINAPI_CONSTANT(F_DWORD, LINGUISTIC_IGNORECASE); - WINAPI_CONSTANT(F_DWORD, LINGUISTIC_IGNOREDIACRITIC); - WINAPI_CONSTANT(F_DWORD, NORM_IGNORECASE); - WINAPI_CONSTANT(F_DWORD, NORM_IGNOREKANATYPE); - WINAPI_CONSTANT(F_DWORD, NORM_IGNOREWIDTH); - WINAPI_CONSTANT(F_DWORD, NORM_LINGUISTIC_CASING); - WINAPI_CONSTANT(F_DWORD, SORT_DIGITSASNUMBERS); - WINAPI_CONSTANT(F_DWORD, SORT_STRINGSORT); WINAPI_CONSTANT("i", NULL); From 7b2c62cbb9d1b0e70f74905a5868dc513c6cf831 Mon Sep 17 00:00:00 2001 From: AN Long Date: Sat, 26 Mar 2022 23:59:42 +0800 Subject: [PATCH 11/17] Fix ntpath_tests on non Windows systems --- Lib/ntpath.py | 1 + 1 file changed, 1 insertion(+) diff --git a/Lib/ntpath.py b/Lib/ntpath.py index 49b530ac5572b4..73b1bd12ddca76 100644 --- a/Lib/ntpath.py +++ b/Lib/ntpath.py @@ -72,6 +72,7 @@ def normcase(s): Makes all characters lowercase and all slashes into backslashes. """ + s = os.fspath(s) if isinstance(s, bytes): return os.fsencode(os.fsdecode(s).replace('/', '\\').lower()) return s.replace('/', '\\').lower() From a89adc0a4a60debe3f401a408b4b186631adc75e Mon Sep 17 00:00:00 2001 From: AN Long Date: Fri, 1 Apr 2022 00:01:40 +0800 Subject: [PATCH 12/17] Update news --- .../next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Misc/NEWS.d/next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst b/Misc/NEWS.d/next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst index 2cbff01fbc3f6c..39b17a27017d93 100644 --- a/Misc/NEWS.d/next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst +++ b/Misc/NEWS.d/next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst @@ -1 +1,3 @@ -Using Windows function ``LCMapStringEx`` in ``ntpath.normcase`` instead of ``str.lower`` to deal with non ASCII characters. The ``LCMapStringEx`` is added in ``_winapi`` module also. +Using Windows function ``LCMapStringEx`` in :func:`ntpath.normcase`` instead +of :func:`str.lower`` to deal with non ASCII characters. The ``LCMapStringEx`` +is added in :mod:`_winapi`` module also. From 4119cc2a86d0326a6b2c82a9358e5547fa6866db Mon Sep 17 00:00:00 2001 From: AN Long Date: Fri, 1 Apr 2022 00:17:57 +0800 Subject: [PATCH 13/17] Skip test on block leaks on windows --- Lib/test/test_embed.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py index f1ca6da147376c..672769455987ea 100644 --- a/Lib/test/test_embed.py +++ b/Lib/test/test_embed.py @@ -1705,7 +1705,11 @@ def test_no_memleak(self): blocks = int(match.group(2)) with self.subTest(frozen_modules=flag, stmt=stmt): self.assertEqual(refs, 0, out) - self.assertEqual(blocks, 0, out) + if not MS_WINDOWS: + self.assertEqual(blocks, 0, out) + else: + # bpo-42658: on Windows, Python still leaks 1 memory block at exit + self.assertEqual(blocks, 1, out) class StdPrinterTests(EmbeddingTestsMixin, unittest.TestCase): From 8a63de20f7d8e544fdc2880e079e051fbe1485b5 Mon Sep 17 00:00:00 2001 From: AN Long Date: Fri, 1 Apr 2022 00:39:24 +0800 Subject: [PATCH 14/17] Fix news --- .../next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Misc/NEWS.d/next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst b/Misc/NEWS.d/next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst index 39b17a27017d93..206b3f33d64730 100644 --- a/Misc/NEWS.d/next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst +++ b/Misc/NEWS.d/next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst @@ -1,3 +1,3 @@ -Using Windows function ``LCMapStringEx`` in :func:`ntpath.normcase`` instead -of :func:`str.lower`` to deal with non ASCII characters. The ``LCMapStringEx`` -is added in :mod:`_winapi`` module also. +Using Windows function ``LCMapStringEx`` in :func:``ntpath.normcase`` instead +of :func:``str.lower`` to deal with non ASCII characters. The ``LCMapStringEx`` +is added in :mod:``_winapi`` module also. From 6789cd9492d625cbc8f0bd6837284cb3dcea19ff Mon Sep 17 00:00:00 2001 From: AN Long Date: Sat, 9 Apr 2022 21:17:08 +0800 Subject: [PATCH 15/17] Revert the test modify --- Lib/test/test_embed.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/Lib/test/test_embed.py b/Lib/test/test_embed.py index 672769455987ea..f1ca6da147376c 100644 --- a/Lib/test/test_embed.py +++ b/Lib/test/test_embed.py @@ -1705,11 +1705,7 @@ def test_no_memleak(self): blocks = int(match.group(2)) with self.subTest(frozen_modules=flag, stmt=stmt): self.assertEqual(refs, 0, out) - if not MS_WINDOWS: - self.assertEqual(blocks, 0, out) - else: - # bpo-42658: on Windows, Python still leaks 1 memory block at exit - self.assertEqual(blocks, 1, out) + self.assertEqual(blocks, 0, out) class StdPrinterTests(EmbeddingTestsMixin, unittest.TestCase): From d43037162a684443c171307697c28238e93b13b5 Mon Sep 17 00:00:00 2001 From: AN Long Date: Sat, 4 Jun 2022 21:54:39 +0800 Subject: [PATCH 16/17] update news --- .../next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Misc/NEWS.d/next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst b/Misc/NEWS.d/next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst index 206b3f33d64730..852cc77676a31d 100644 --- a/Misc/NEWS.d/next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst +++ b/Misc/NEWS.d/next/Windows/2022-03-20-15-47-35.bpo-42658.16eXtb.rst @@ -1,3 +1,3 @@ -Using Windows function ``LCMapStringEx`` in :func:``ntpath.normcase`` instead -of :func:``str.lower`` to deal with non ASCII characters. The ``LCMapStringEx`` -is added in :mod:``_winapi`` module also. +Support native Windows case-insensitive path comparisons by using +``LCMapStringEx`` instead of :func:`str.lower` in :func:`ntpath.normcase`. +Add ``LCMapStringEx`` to the :mod:`_winapi` module. From 7e0dcf81d107231838bedb12126a0658b6906aa0 Mon Sep 17 00:00:00 2001 From: AN Long Date: Sat, 4 Jun 2022 22:37:59 +0800 Subject: [PATCH 17/17] update clinic for _winapi --- Modules/clinic/_winapi.c.h | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/Modules/clinic/_winapi.c.h b/Modules/clinic/_winapi.c.h index 9f73c11785c704..486029a6300304 100644 --- a/Modules/clinic/_winapi.c.h +++ b/Modules/clinic/_winapi.c.h @@ -826,7 +826,7 @@ PyDoc_STRVAR(_winapi_LCMapStringEx__doc__, "\n"); #define _WINAPI_LCMAPSTRINGEX_METHODDEF \ - {"LCMapStringEx", (PyCFunction)(void(*)(void))_winapi_LCMapStringEx, METH_FASTCALL|METH_KEYWORDS, _winapi_LCMapStringEx__doc__}, + {"LCMapStringEx", _PyCFunction_CAST(_winapi_LCMapStringEx), METH_FASTCALL|METH_KEYWORDS, _winapi_LCMapStringEx__doc__}, static PyObject * _winapi_LCMapStringEx_impl(PyObject *module, LPCWSTR locale, DWORD flags, @@ -850,13 +850,9 @@ _winapi_LCMapStringEx(PyObject *module, PyObject *const *args, Py_ssize_t nargs, exit: /* Cleanup for locale */ - #if !USE_UNICODE_WCHAR_CACHE PyMem_Free((void *)locale); - #endif /* USE_UNICODE_WCHAR_CACHE */ /* Cleanup for src */ - #if !USE_UNICODE_WCHAR_CACHE PyMem_Free((void *)src); - #endif /* USE_UNICODE_WCHAR_CACHE */ return return_value; } @@ -1205,4 +1201,4 @@ _winapi__mimetypes_read_windows_registry(PyObject *module, PyObject *const *args exit: return return_value; } -/*[clinic end generated code: output=b007dde2e7f2fff8 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=6cdefec63a1d7f12 input=a9049054013a1b77]*/