From a297d59609038ccfc3bdf6f350e8401f07b0a931 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Thu, 16 Mar 2023 16:05:38 +0300 Subject: [PATCH 1/8] Add comments to `{typing,_collections_abc}._type_repr` about each other (#102752) Remove `if` condition in `_collections_abc._type_repr` that's no longer needed, bringing it in sync with `typing._type_repr`. --- Lib/_collections_abc.py | 3 +-- Lib/typing.py | 3 +++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Lib/_collections_abc.py b/Lib/_collections_abc.py index c62233b81a5c95..f86b91a5e6fb0d 100644 --- a/Lib/_collections_abc.py +++ b/Lib/_collections_abc.py @@ -517,9 +517,8 @@ def _type_repr(obj): Copied from :mod:`typing` since collections.abc shouldn't depend on that module. + (Keep this roughly in sync with the typing version.) """ - if isinstance(obj, GenericAlias): - return repr(obj) if isinstance(obj, type): if obj.__module__ == 'builtins': return obj.__qualname__ diff --git a/Lib/typing.py b/Lib/typing.py index ab334395676159..3ee9679e50c0c4 100644 --- a/Lib/typing.py +++ b/Lib/typing.py @@ -230,6 +230,9 @@ def _type_repr(obj): typically enough to uniquely identify a type. For everything else, we fall back on repr(obj). """ + # When changing this function, don't forget about + # `_collections_abc._type_repr`, which does the same thing + # and must be consistent with this one. if isinstance(obj, type): if obj.__module__ == 'builtins': return obj.__qualname__ From b0ec6253c9cf1b22b87cd99f317dbaeb31263b20 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Thu, 16 Mar 2023 09:32:18 -0500 Subject: [PATCH 2/8] GH-102653: Make recipe docstring show the correct distribution (#102742) --- Doc/library/random.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Doc/library/random.rst b/Doc/library/random.rst index 4522f5a8d26b9d..098684d7270ffa 100644 --- a/Doc/library/random.rst +++ b/Doc/library/random.rst @@ -610,7 +610,8 @@ from the combinatoric iterators in the :mod:`itertools` module: return tuple(pool[i] for i in indices) def random_combination_with_replacement(iterable, r): - "Random selection from itertools.combinations_with_replacement(iterable, r)" + "Choose r elements with replacement. Order the result to match the iterable." + # Result will be in set(itertools.combinations_with_replacement(iterable, r)). pool = tuple(iterable) n = len(pool) indices = sorted(random.choices(range(n), k=r)) From fbe82fdd777cead5d6e08ac0d1da19738c19bd81 Mon Sep 17 00:00:00 2001 From: Nikita Sobolev Date: Thu, 16 Mar 2023 17:47:30 +0300 Subject: [PATCH 3/8] gh-102721: Improve coverage of `_collections_abc._CallableGenericAlias` (#102722) Co-authored-by: Alex Waygood --- Lib/_collections_abc.py | 7 ------- Lib/test/test_typing.py | 39 ++++++++++++++++++++++++++++++++------- 2 files changed, 32 insertions(+), 14 deletions(-) diff --git a/Lib/_collections_abc.py b/Lib/_collections_abc.py index f86b91a5e6fb0d..9d7724c33474cc 100644 --- a/Lib/_collections_abc.py +++ b/Lib/_collections_abc.py @@ -481,15 +481,8 @@ def __getitem__(self, item): # rather than the default types.GenericAlias object. Most of the # code is copied from typing's _GenericAlias and the builtin # types.GenericAlias. - if not isinstance(item, tuple): item = (item,) - # A special case in PEP 612 where if X = Callable[P, int], - # then X[int, str] == X[[int, str]]. - if (len(self.__parameters__) == 1 - and _is_param_expr(self.__parameters__[0]) - and item and not _is_param_expr(item[0])): - item = (item,) new_args = super().__getitem__(item).__args__ diff --git a/Lib/test/test_typing.py b/Lib/test/test_typing.py index 89c725cda54f12..c9f55de95c548f 100644 --- a/Lib/test/test_typing.py +++ b/Lib/test/test_typing.py @@ -1921,14 +1921,29 @@ def test_weakref(self): self.assertEqual(weakref.ref(alias)(), alias) def test_pickle(self): + global T_pickle, P_pickle, TS_pickle # needed for pickling Callable = self.Callable - alias = Callable[[int, str], float] - for proto in range(pickle.HIGHEST_PROTOCOL + 1): - s = pickle.dumps(alias, proto) - loaded = pickle.loads(s) - self.assertEqual(alias.__origin__, loaded.__origin__) - self.assertEqual(alias.__args__, loaded.__args__) - self.assertEqual(alias.__parameters__, loaded.__parameters__) + T_pickle = TypeVar('T_pickle') + P_pickle = ParamSpec('P_pickle') + TS_pickle = TypeVarTuple('TS_pickle') + + samples = [ + Callable[[int, str], float], + Callable[P_pickle, int], + Callable[P_pickle, T_pickle], + Callable[Concatenate[int, P_pickle], int], + Callable[Concatenate[*TS_pickle, P_pickle], int], + ] + for alias in samples: + for proto in range(pickle.HIGHEST_PROTOCOL + 1): + with self.subTest(alias=alias, proto=proto): + s = pickle.dumps(alias, proto) + loaded = pickle.loads(s) + self.assertEqual(alias.__origin__, loaded.__origin__) + self.assertEqual(alias.__args__, loaded.__args__) + self.assertEqual(alias.__parameters__, loaded.__parameters__) + + del T_pickle, P_pickle, TS_pickle # cleaning up global state def test_var_substitution(self): Callable = self.Callable @@ -1954,6 +1969,16 @@ def test_var_substitution(self): self.assertEqual(C5[int, str, float], Callable[[typing.List[int], tuple[str, int], float], int]) + def test_type_subst_error(self): + Callable = self.Callable + P = ParamSpec('P') + T = TypeVar('T') + + pat = "Expected a list of types, an ellipsis, ParamSpec, or Concatenate." + + with self.assertRaisesRegex(TypeError, pat): + Callable[P, T][0, int] + def test_type_erasure(self): Callable = self.Callable class C1(Callable): From adaed17341e649fabb0f522e9b4f5962fcdf1d48 Mon Sep 17 00:00:00 2001 From: Kumar Aditya <59607654+kumaraditya303@users.noreply.github.com> Date: Thu, 16 Mar 2023 20:28:10 +0530 Subject: [PATCH 4/8] GH-102748: remove legacy support for generator based coroutines from `asyncio.iscoroutine` (#102749) Co-authored-by: Alex Waygood --- Doc/whatsnew/3.12.rst | 4 ++++ Lib/asyncio/coroutines.py | 3 +-- Lib/test/test_asyncio/test_pep492.py | 6 ++++++ .../Library/2023-03-16-08-17-29.gh-issue-102748.WNACpI.rst | 3 +++ 4 files changed, 14 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2023-03-16-08-17-29.gh-issue-102748.WNACpI.rst diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index 217ffec1ee1007..b9c668543e1b73 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -237,6 +237,10 @@ asyncio * Add C implementation of :func:`asyncio.current_task` for 4x-6x speedup. (Contributed by Itamar Ostricher and Pranav Thulasiram Bhat in :gh:`100344`.) +* :func:`asyncio.iscoroutine` now returns ``False`` for generators as + :mod:`asyncio` does not support legacy generator-based coroutines. + (Contributed by Kumar Aditya in :gh:`102748`.) + inspect ------- diff --git a/Lib/asyncio/coroutines.py b/Lib/asyncio/coroutines.py index 7fda0e449d500a..ab4f30eb51ba2c 100644 --- a/Lib/asyncio/coroutines.py +++ b/Lib/asyncio/coroutines.py @@ -25,8 +25,7 @@ def iscoroutinefunction(func): # Prioritize native coroutine check to speed-up # asyncio.iscoroutine. -_COROUTINE_TYPES = (types.CoroutineType, types.GeneratorType, - collections.abc.Coroutine) +_COROUTINE_TYPES = (types.CoroutineType, collections.abc.Coroutine) _iscoroutine_typecache = set() diff --git a/Lib/test/test_asyncio/test_pep492.py b/Lib/test/test_asyncio/test_pep492.py index f833f788dcb98f..dc25a46985e349 100644 --- a/Lib/test/test_asyncio/test_pep492.py +++ b/Lib/test/test_asyncio/test_pep492.py @@ -119,6 +119,12 @@ async def foo(): pass self.assertTrue(asyncio.iscoroutine(FakeCoro())) + def test_iscoroutine_generator(self): + def foo(): yield + + self.assertFalse(asyncio.iscoroutine(foo())) + + def test_iscoroutinefunction(self): async def foo(): pass self.assertTrue(asyncio.iscoroutinefunction(foo)) diff --git a/Misc/NEWS.d/next/Library/2023-03-16-08-17-29.gh-issue-102748.WNACpI.rst b/Misc/NEWS.d/next/Library/2023-03-16-08-17-29.gh-issue-102748.WNACpI.rst new file mode 100644 index 00000000000000..b1dc67f38fe85d --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-03-16-08-17-29.gh-issue-102748.WNACpI.rst @@ -0,0 +1,3 @@ +:func:`asyncio.iscoroutine` now returns ``False`` for generators as +:mod:`asyncio` does not support legacy generator-based coroutines. +Patch by Kumar Aditya. From 84e20c689a8b3b6cebfd50d044c62af5d0e7dec1 Mon Sep 17 00:00:00 2001 From: Eric Snow Date: Thu, 16 Mar 2023 09:26:42 -0600 Subject: [PATCH 5/8] gh-102737: Un-ignore ceval.c in the CI globals check (gh-102745) The tool now allows user-added #LINE preprocessor directives. https://github.com/python/cpython/issues/102737 --- Tools/c-analyzer/c_parser/preprocessor/gcc.py | 10 +++++++--- Tools/c-analyzer/cpython/_parser.py | 4 ---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Tools/c-analyzer/c_parser/preprocessor/gcc.py b/Tools/c-analyzer/c_parser/preprocessor/gcc.py index 24c1b0e9b9d48c..c680f351f22416 100644 --- a/Tools/c-analyzer/c_parser/preprocessor/gcc.py +++ b/Tools/c-analyzer/c_parser/preprocessor/gcc.py @@ -153,9 +153,13 @@ def _iter_top_include_lines(lines, topfile, cwd, # XXX How can a file return to line 1? #assert lno > 1, (line, lno) else: - # It's the next line from the file. - assert included == files[-1], (line, files) - assert lno > 1, (line, lno) + if included == files[-1]: + # It's the next line from the file. + assert lno > 1, (line, lno) + else: + # We ran into a user-added #LINE directive, + # which we promptly ignore. + pass elif not files: raise NotImplementedError((line,)) elif filter_reqfile(files[-1]): diff --git a/Tools/c-analyzer/cpython/_parser.py b/Tools/c-analyzer/cpython/_parser.py index a2911e030ffee1..acf30e2c4020b3 100644 --- a/Tools/c-analyzer/cpython/_parser.py +++ b/Tools/c-analyzer/cpython/_parser.py @@ -96,10 +96,6 @@ def clean_lines(text): # has gone wrong where # we handle "maybe inline actual" # in Tools/c-analyzer/c_parser/parser/_global.py. Modules/expat/xmlparse.c - -# The parser doesn't like the #line directives -# that originate from generated_cases.c.h -Python/ceval.c ''') INCL_DIRS = clean_lines(''' From 61d6c110d6019a7ba79c76cc50aeaa85a1b37c08 Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Thu, 16 Mar 2023 16:21:49 +0000 Subject: [PATCH 6/8] gh-102192: Replace PyErr_Fetch/Restore etc by more efficient alternatives (#102743) --- Python/pythonrun.c | 79 ++++++++++++++++++++-------------------------- 1 file changed, 35 insertions(+), 44 deletions(-) diff --git a/Python/pythonrun.c b/Python/pythonrun.c index 34a44dd92847ba..5381b105a39fed 100644 --- a/Python/pythonrun.c +++ b/Python/pythonrun.c @@ -698,30 +698,30 @@ _Py_HandleSystemExit(int *exitcode_p) return 0; } - PyObject *exception, *value, *tb; - PyErr_Fetch(&exception, &value, &tb); - fflush(stdout); int exitcode = 0; - if (value == NULL || value == Py_None) { + + PyObject *exc = PyErr_GetRaisedException(); + if (exc == NULL) { goto done; } + assert(PyExceptionInstance_Check(exc)); - if (PyExceptionInstance_Check(value)) { - /* The error code should be in the `code' attribute. */ - PyObject *code = PyObject_GetAttr(value, &_Py_ID(code)); - if (code) { - Py_SETREF(value, code); - if (value == Py_None) - goto done; + /* The error code should be in the `code' attribute. */ + PyObject *code = PyObject_GetAttr(exc, &_Py_ID(code)); + if (code) { + Py_SETREF(exc, code); + if (exc == Py_None) { + goto done; } - /* If we failed to dig out the 'code' attribute, - just let the else clause below print the error. */ } + /* If we failed to dig out the 'code' attribute, + * just let the else clause below print the error. + */ - if (PyLong_Check(value)) { - exitcode = (int)PyLong_AsLong(value); + if (PyLong_Check(exc)) { + exitcode = (int)PyLong_AsLong(exc); } else { PyThreadState *tstate = _PyThreadState_GET(); @@ -732,20 +732,17 @@ _Py_HandleSystemExit(int *exitcode_p) */ PyErr_Clear(); if (sys_stderr != NULL && sys_stderr != Py_None) { - PyFile_WriteObject(value, sys_stderr, Py_PRINT_RAW); + PyFile_WriteObject(exc, sys_stderr, Py_PRINT_RAW); } else { - PyObject_Print(value, stderr, Py_PRINT_RAW); + PyObject_Print(exc, stderr, Py_PRINT_RAW); fflush(stderr); } PySys_WriteStderr("\n"); exitcode = 1; } - done: - /* Cleanup the exception */ - Py_CLEAR(exception); - Py_CLEAR(value); - Py_CLEAR(tb); +done: + Py_CLEAR(exc); *exitcode_p = exitcode; return 1; } @@ -1641,35 +1638,29 @@ PyRun_FileExFlags(FILE *fp, const char *filename, int start, PyObject *globals, } - static void -flush_io(void) +flush_io_stream(PyThreadState *tstate, PyObject *name) { - PyObject *f, *r; - PyObject *type, *value, *traceback; - - /* Save the current exception */ - PyErr_Fetch(&type, &value, &traceback); - - PyThreadState *tstate = _PyThreadState_GET(); - f = _PySys_GetAttr(tstate, &_Py_ID(stderr)); - if (f != NULL) { - r = _PyObject_CallMethodNoArgs(f, &_Py_ID(flush)); - if (r) - Py_DECREF(r); - else - PyErr_Clear(); - } - f = _PySys_GetAttr(tstate, &_Py_ID(stdout)); + PyObject *f = _PySys_GetAttr(tstate, name); if (f != NULL) { - r = _PyObject_CallMethodNoArgs(f, &_Py_ID(flush)); - if (r) + PyObject *r = _PyObject_CallMethodNoArgs(f, &_Py_ID(flush)); + if (r) { Py_DECREF(r); - else + } + else { PyErr_Clear(); + } } +} - PyErr_Restore(type, value, traceback); +static void +flush_io(void) +{ + PyThreadState *tstate = _PyThreadState_GET(); + PyObject *exc = _PyErr_GetRaisedException(tstate); + flush_io_stream(tstate, &_Py_ID(stderr)); + flush_io_stream(tstate, &_Py_ID(stdout)); + _PyErr_SetRaisedException(tstate, exc); } static PyObject * From e108af6eca9904d177d769281907d12ac6894dfc Mon Sep 17 00:00:00 2001 From: Irit Katriel <1055913+iritkatriel@users.noreply.github.com> Date: Thu, 16 Mar 2023 16:41:10 +0000 Subject: [PATCH 7/8] gh-102192: remove redundant exception fields from ssl module socket (#102466) --- Modules/_ssl.c | 28 +++++++++------------------- Modules/_ssl/debughelpers.c | 7 +++---- 2 files changed, 12 insertions(+), 23 deletions(-) diff --git a/Modules/_ssl.c b/Modules/_ssl.c index 28112317bc289e..121d18884d0a9f 100644 --- a/Modules/_ssl.c +++ b/Modules/_ssl.c @@ -318,9 +318,7 @@ typedef struct { * store exception information on the socket. The handshake, read, write, * and shutdown methods check for chained exceptions. */ - PyObject *exc_type; - PyObject *exc_value; - PyObject *exc_tb; + PyObject *exc; } PySSLSocket; typedef struct { @@ -564,13 +562,11 @@ fill_and_set_sslerror(_sslmodulestate *state, static int PySSL_ChainExceptions(PySSLSocket *sslsock) { - if (sslsock->exc_type == NULL) + if (sslsock->exc == NULL) return 0; - _PyErr_ChainExceptions(sslsock->exc_type, sslsock->exc_value, sslsock->exc_tb); - sslsock->exc_type = NULL; - sslsock->exc_value = NULL; - sslsock->exc_tb = NULL; + _PyErr_ChainExceptions1(sslsock->exc); + sslsock->exc = NULL; return -1; } @@ -807,9 +803,7 @@ newPySSLSocket(PySSLContext *sslctx, PySocketSockObject *sock, self->owner = NULL; self->server_hostname = NULL; self->err = err; - self->exc_type = NULL; - self->exc_value = NULL; - self->exc_tb = NULL; + self->exc = NULL; /* Make sure the SSL error state is initialized */ ERR_clear_error(); @@ -2179,9 +2173,7 @@ Passed as \"self\" in servername callback."); static int PySSL_traverse(PySSLSocket *self, visitproc visit, void *arg) { - Py_VISIT(self->exc_type); - Py_VISIT(self->exc_value); - Py_VISIT(self->exc_tb); + Py_VISIT(self->exc); Py_VISIT(Py_TYPE(self)); return 0; } @@ -2189,9 +2181,7 @@ PySSL_traverse(PySSLSocket *self, visitproc visit, void *arg) static int PySSL_clear(PySSLSocket *self) { - Py_CLEAR(self->exc_type); - Py_CLEAR(self->exc_value); - Py_CLEAR(self->exc_tb); + Py_CLEAR(self->exc); return 0; } @@ -2536,7 +2526,7 @@ _ssl__SSLSocket_read_impl(PySSLSocket *self, Py_ssize_t len, PySSL_SetError(self, retval, __FILE__, __LINE__); goto error; } - if (self->exc_type != NULL) + if (self->exc != NULL) goto error; done: @@ -2662,7 +2652,7 @@ _ssl__SSLSocket_shutdown_impl(PySSLSocket *self) PySSL_SetError(self, ret, __FILE__, __LINE__); return NULL; } - if (self->exc_type != NULL) + if (self->exc != NULL) goto error; if (sock) /* It's already INCREF'ed */ diff --git a/Modules/_ssl/debughelpers.c b/Modules/_ssl/debughelpers.c index 08f3457035b90c..217f224942556e 100644 --- a/Modules/_ssl/debughelpers.c +++ b/Modules/_ssl/debughelpers.c @@ -74,7 +74,7 @@ _PySSL_msg_callback(int write_p, int version, int content_type, buf, len ); if (res == NULL) { - PyErr_Fetch(&ssl_obj->exc_type, &ssl_obj->exc_value, &ssl_obj->exc_tb); + ssl_obj->exc = PyErr_GetRaisedException(); } else { Py_DECREF(res); } @@ -138,8 +138,7 @@ _PySSL_keylog_callback(const SSL *ssl, const char *line) lock = PyThread_allocate_lock(); if (lock == NULL) { PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock"); - PyErr_Fetch(&ssl_obj->exc_type, &ssl_obj->exc_value, - &ssl_obj->exc_tb); + ssl_obj->exc = PyErr_GetRaisedException(); return; } } @@ -156,7 +155,7 @@ _PySSL_keylog_callback(const SSL *ssl, const char *line) errno = e; PyErr_SetFromErrnoWithFilenameObject(PyExc_OSError, ssl_obj->ctx->keylog_filename); - PyErr_Fetch(&ssl_obj->exc_type, &ssl_obj->exc_value, &ssl_obj->exc_tb); + ssl_obj->exc = PyErr_GetRaisedException(); } PyGILState_Release(threadstate); } From 0f175766e27642108c65bba04bbd54dcf8799b0e Mon Sep 17 00:00:00 2001 From: Steve Dower Date: Thu, 16 Mar 2023 17:27:21 +0000 Subject: [PATCH 8/8] gh-99726: Improves correctness of stat results for Windows, and uses faster API when available (GH-102149) This deprecates `st_ctime` fields on Windows, with the intent to change them to contain the correct value in 3.14. For now, they should keep returning the creation time as they always have. --- Doc/library/os.rst | 88 +++++--- Doc/whatsnew/3.12.rst | 16 ++ Include/internal/pycore_fileutils.h | 5 +- Include/internal/pycore_fileutils_windows.h | 80 ++++++++ Lib/test/test_os.py | 12 +- ...3-02-22-17-26-10.gh-issue-99726.76t957.rst | 2 + Modules/posixmodule.c | 191 +++++++++++++----- PCbuild/pythoncore.vcxproj | 1 + PCbuild/pythoncore.vcxproj.filters | 3 + Python/fileutils.c | 130 ++++++++++-- 10 files changed, 446 insertions(+), 82 deletions(-) create mode 100644 Include/internal/pycore_fileutils_windows.h create mode 100644 Misc/NEWS.d/next/Windows/2023-02-22-17-26-10.gh-issue-99726.76t957.rst diff --git a/Doc/library/os.rst b/Doc/library/os.rst index 5b9f49be1fad55..3153f79e10ce1f 100644 --- a/Doc/library/os.rst +++ b/Doc/library/os.rst @@ -2858,6 +2858,12 @@ features: Added support for the :class:`~os.PathLike` interface. Added support for :class:`bytes` paths on Windows. + .. versionchanged:: 3.12 + The ``st_ctime`` attribute of a stat result is deprecated on Windows. + The file creation time is properly available as ``st_birthtime``, and + in the future ``st_ctime`` may be changed to return zero or the + metadata change time, if available. + .. function:: stat(path, *, dir_fd=None, follow_symlinks=True) @@ -2973,10 +2979,12 @@ features: .. attribute:: st_ctime - Platform dependent: + Time of most recent metadata change expressed in seconds. - * the time of most recent metadata change on Unix, - * the time of creation on Windows, expressed in seconds. + .. versionchanged:: 3.12 + ``st_ctime`` is deprecated on Windows. Use ``st_birthtime`` for + the file creation time. In the future, ``st_ctime`` will contain + the time of the most recent metadata change, as for other platforms. .. attribute:: st_atime_ns @@ -2989,29 +2997,48 @@ features: .. attribute:: st_ctime_ns - Platform dependent: + Time of most recent metadata change expressed in nanoseconds as an + integer. + + .. versionchanged:: 3.12 + ``st_ctime_ns`` is deprecated on Windows. Use ``st_birthtime_ns`` + for the file creation time. In the future, ``st_ctime`` will contain + the time of the most recent metadata change, as for other platforms. + + .. attribute:: st_birthtime + + Time of file creation expressed in seconds. This attribute is not + always available, and may raise :exc:`AttributeError`. + + .. versionchanged:: 3.12 + ``st_birthtime`` is now available on Windows. + + .. attribute:: st_birthtime_ns - * the time of most recent metadata change on Unix, - * the time of creation on Windows, expressed in nanoseconds as an - integer. + Time of file creation expressed in nanoseconds as an integer. + This attribute is not always available, and may raise + :exc:`AttributeError`. + + .. versionadded:: 3.12 .. note:: The exact meaning and resolution of the :attr:`st_atime`, - :attr:`st_mtime`, and :attr:`st_ctime` attributes depend on the operating - system and the file system. For example, on Windows systems using the FAT - or FAT32 file systems, :attr:`st_mtime` has 2-second resolution, and - :attr:`st_atime` has only 1-day resolution. See your operating system - documentation for details. + :attr:`st_mtime`, :attr:`st_ctime` and :attr:`st_birthtime` attributes + depend on the operating system and the file system. For example, on + Windows systems using the FAT32 file systems, :attr:`st_mtime` has + 2-second resolution, and :attr:`st_atime` has only 1-day resolution. + See your operating system documentation for details. Similarly, although :attr:`st_atime_ns`, :attr:`st_mtime_ns`, - and :attr:`st_ctime_ns` are always expressed in nanoseconds, many - systems do not provide nanosecond precision. On systems that do - provide nanosecond precision, the floating-point object used to - store :attr:`st_atime`, :attr:`st_mtime`, and :attr:`st_ctime` - cannot preserve all of it, and as such will be slightly inexact. - If you need the exact timestamps you should always use - :attr:`st_atime_ns`, :attr:`st_mtime_ns`, and :attr:`st_ctime_ns`. + :attr:`st_ctime_ns` and :attr:`st_birthtime_ns` are always expressed in + nanoseconds, many systems do not provide nanosecond precision. On + systems that do provide nanosecond precision, the floating-point object + used to store :attr:`st_atime`, :attr:`st_mtime`, :attr:`st_ctime` and + :attr:`st_birthtime` cannot preserve all of it, and as such will be + slightly inexact. If you need the exact timestamps you should always use + :attr:`st_atime_ns`, :attr:`st_mtime_ns`, :attr:`st_ctime_ns` and + :attr:`st_birthtime_ns`. On some Unix systems (such as Linux), the following attributes may also be available: @@ -3041,10 +3068,6 @@ features: File generation number. - .. attribute:: st_birthtime - - Time of file creation. - On Solaris and derivatives, the following attributes may also be available: @@ -3117,6 +3140,25 @@ features: files as :const:`S_IFCHR`, :const:`S_IFIFO` or :const:`S_IFBLK` as appropriate. + .. versionchanged:: 3.12 + On Windows, :attr:`st_ctime` is deprecated. Eventually, it will + contain the last metadata change time, for consistency with other + platforms, but for now still contains creation time. + Use :attr:`st_birthtime` for the creation time. + + .. versionchanged:: 3.12 + On Windows, :attr:`st_ino` may now be up to 128 bits, depending + on the file system. Previously it would not be above 64 bits, and + larger file identifiers would be arbitrarily packed. + + .. versionchanged:: 3.12 + On Windows, :attr:`st_rdev` no longer returns a value. Previously + it would contain the same as :attr:`st_dev`, which was incorrect. + + .. versionadded:: 3.12 + Added the :attr:`st_birthtime` member on Windows. + + .. function:: statvfs(path) Perform a :c:func:`statvfs` system call on the given path. The return value is diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst index b9c668543e1b73..03b1f975746787 100644 --- a/Doc/whatsnew/3.12.rst +++ b/Doc/whatsnew/3.12.rst @@ -306,6 +306,16 @@ os functions on Windows for enumerating drives, volumes and mount points. (Contributed by Steve Dower in :gh:`102519`.) +* :func:`os.stat` and :func:`os.lstat` are now more accurate on Windows. + The ``st_birthtime`` field will now be filled with the creation time + of the file, and ``st_ctime`` is deprecated but still contains the + creation time (but in the future will return the last metadata change, + for consistency with other platforms). ``st_dev`` may be up to 64 bits + and ``st_ino`` up to 128 bits depending on your file system, and + ``st_rdev`` is always set to zero rather than incorrect values. + Both functions may be significantly faster on newer releases of + Windows. (Contributed by Steve Dower in :gh:`99726`.) + os.path ------- @@ -469,6 +479,12 @@ Deprecated warning at compile time. This field will be removed in Python 3.14. (Contributed by Ramvikrams and Kumar Aditya in :gh:`101193`. PEP by Ken Jin.) +* The ``st_ctime`` fields return by :func:`os.stat` and :func:`os.lstat` on + Windows are deprecated. In a future release, they will contain the last + metadata change time, consistent with other platforms. For now, they still + contain the creation time, which is also available in the new ``st_birthtime`` + field. (Contributed by Steve Dower in :gh:`99726`.) + Pending Removal in Python 3.13 ------------------------------ diff --git a/Include/internal/pycore_fileutils.h b/Include/internal/pycore_fileutils.h index 445ac0a3d955d9..ef6642d00f1b54 100644 --- a/Include/internal/pycore_fileutils.h +++ b/Include/internal/pycore_fileutils.h @@ -66,7 +66,7 @@ PyAPI_FUNC(PyObject *) _Py_device_encoding(int); #ifdef MS_WINDOWS struct _Py_stat_struct { - unsigned long st_dev; + uint64_t st_dev; uint64_t st_ino; unsigned short st_mode; int st_nlink; @@ -80,8 +80,11 @@ struct _Py_stat_struct { int st_mtime_nsec; time_t st_ctime; int st_ctime_nsec; + time_t st_birthtime; + int st_birthtime_nsec; unsigned long st_file_attributes; unsigned long st_reparse_tag; + uint64_t st_ino_high; }; #else # define _Py_stat_struct stat diff --git a/Include/internal/pycore_fileutils_windows.h b/Include/internal/pycore_fileutils_windows.h new file mode 100644 index 00000000000000..44874903b092f3 --- /dev/null +++ b/Include/internal/pycore_fileutils_windows.h @@ -0,0 +1,80 @@ +#ifndef Py_INTERNAL_FILEUTILS_WINDOWS_H +#define Py_INTERNAL_FILEUTILS_WINDOWS_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "Py_BUILD_CORE must be defined to include this header" +#endif + +#ifdef MS_WINDOWS + +#if !defined(NTDDI_WIN10_NI) || !(NTDDI_VERSION >= NTDDI_WIN10_NI) +typedef struct _FILE_STAT_BASIC_INFORMATION { + LARGE_INTEGER FileId; + LARGE_INTEGER CreationTime; + LARGE_INTEGER LastAccessTime; + LARGE_INTEGER LastWriteTime; + LARGE_INTEGER ChangeTime; + LARGE_INTEGER AllocationSize; + LARGE_INTEGER EndOfFile; + ULONG FileAttributes; + ULONG ReparseTag; + ULONG NumberOfLinks; + ULONG DeviceType; + ULONG DeviceCharacteristics; + ULONG Reserved; + FILE_ID_128 FileId128; + LARGE_INTEGER VolumeSerialNumber; +} FILE_STAT_BASIC_INFORMATION; + +typedef enum _FILE_INFO_BY_NAME_CLASS { + FileStatByNameInfo, + FileStatLxByNameInfo, + FileCaseSensitiveByNameInfo, + FileStatBasicByNameInfo, + MaximumFileInfoByNameClass +} FILE_INFO_BY_NAME_CLASS; +#endif + +typedef BOOL (WINAPI *PGetFileInformationByName)( + PCWSTR FileName, + FILE_INFO_BY_NAME_CLASS FileInformationClass, + PVOID FileInfoBuffer, + ULONG FileInfoBufferSize +); + +static inline BOOL _Py_GetFileInformationByName( + PCWSTR FileName, + FILE_INFO_BY_NAME_CLASS FileInformationClass, + PVOID FileInfoBuffer, + ULONG FileInfoBufferSize +) { + static PGetFileInformationByName GetFileInformationByName = NULL; + static int GetFileInformationByName_init = -1; + + if (GetFileInformationByName_init < 0) { + HMODULE hMod = LoadLibraryW(L"api-ms-win-core-file-l2-1-4"); + GetFileInformationByName_init = 0; + if (hMod) { + GetFileInformationByName = (PGetFileInformationByName)GetProcAddress( + hMod, "GetFileInformationByName"); + if (GetFileInformationByName) { + GetFileInformationByName_init = 1; + } else { + FreeLibrary(hMod); + } + } + } + + if (GetFileInformationByName_init <= 0) { + SetLastError(ERROR_NOT_SUPPORTED); + return FALSE; + } + return GetFileInformationByName(FileName, FileInformationClass, FileInfoBuffer, FileInfoBufferSize); +} + +#endif + +#endif diff --git a/Lib/test/test_os.py b/Lib/test/test_os.py index 42357fef80ec89..74ece3ffb4ed17 100644 --- a/Lib/test/test_os.py +++ b/Lib/test/test_os.py @@ -556,6 +556,15 @@ def trunc(x): return x nanosecondy = getattr(result, name + "_ns") // 10000 self.assertAlmostEqual(floaty, nanosecondy, delta=2) + # Ensure both birthtime and birthtime_ns roughly agree, if present + try: + floaty = int(result.st_birthtime * 100000) + nanosecondy = result.st_birthtime_ns // 10000 + except AttributeError: + pass + else: + self.assertAlmostEqual(floaty, nanosecondy, delta=2) + try: result[200] self.fail("No exception raised") @@ -4234,7 +4243,8 @@ def assert_stat_equal(self, stat1, stat2, skip_fields): for attr in dir(stat1): if not attr.startswith("st_"): continue - if attr in ("st_dev", "st_ino", "st_nlink"): + if attr in ("st_dev", "st_ino", "st_nlink", "st_ctime", + "st_ctime_ns"): continue self.assertEqual(getattr(stat1, attr), getattr(stat2, attr), diff --git a/Misc/NEWS.d/next/Windows/2023-02-22-17-26-10.gh-issue-99726.76t957.rst b/Misc/NEWS.d/next/Windows/2023-02-22-17-26-10.gh-issue-99726.76t957.rst new file mode 100644 index 00000000000000..e2578620017894 --- /dev/null +++ b/Misc/NEWS.d/next/Windows/2023-02-22-17-26-10.gh-issue-99726.76t957.rst @@ -0,0 +1,2 @@ +Improves correctness of stat results for Windows, and uses faster API when +available diff --git a/Modules/posixmodule.c b/Modules/posixmodule.c index 7d91f7e4bac76b..e38caf7cc0abee 100644 --- a/Modules/posixmodule.c +++ b/Modules/posixmodule.c @@ -41,6 +41,7 @@ #ifndef MS_WINDOWS # include "posixmodule.h" #else +# include "pycore_fileutils_windows.h" # include "winreparse.h" #endif @@ -668,8 +669,11 @@ PyOS_AfterFork(void) #ifdef MS_WINDOWS /* defined in fileutils.c */ void _Py_time_t_to_FILE_TIME(time_t, int, FILETIME *); -void _Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *, - ULONG, struct _Py_stat_struct *); +void _Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *, ULONG, + FILE_BASIC_INFO *, FILE_ID_INFO *, + struct _Py_stat_struct *); +void _Py_stat_basic_info_to_stat(FILE_STAT_BASIC_INFORMATION *, + struct _Py_stat_struct *); #endif @@ -1819,12 +1823,39 @@ attributes_from_dir(LPCWSTR pszFile, BY_HANDLE_FILE_INFORMATION *info, ULONG *re return TRUE; } + +static void +update_st_mode_from_path(const wchar_t *path, DWORD attr, + struct _Py_stat_struct *result) +{ + if (!(attr & FILE_ATTRIBUTE_DIRECTORY)) { + /* Fix the file execute permissions. This hack sets S_IEXEC if + the filename has an extension that is commonly used by files + that CreateProcessW can execute. A real implementation calls + GetSecurityInfo, OpenThreadToken/OpenProcessToken, and + AccessCheck to check for generic read, write, and execute + access. */ + const wchar_t *fileExtension = wcsrchr(path, '.'); + if (fileExtension) { + if (_wcsicmp(fileExtension, L".exe") == 0 || + _wcsicmp(fileExtension, L".bat") == 0 || + _wcsicmp(fileExtension, L".cmd") == 0 || + _wcsicmp(fileExtension, L".com") == 0) { + result->st_mode |= 0111; + } + } + } +} + + static int -win32_xstat_impl(const wchar_t *path, struct _Py_stat_struct *result, - BOOL traverse) +win32_xstat_slow_impl(const wchar_t *path, struct _Py_stat_struct *result, + BOOL traverse) { HANDLE hFile; BY_HANDLE_FILE_INFORMATION fileInfo; + FILE_BASIC_INFO basicInfo; + FILE_ID_INFO idInfo; FILE_ATTRIBUTE_TAG_INFO tagInfo = { 0 }; DWORD fileType, error; BOOL isUnhandledTag = FALSE; @@ -1954,12 +1985,16 @@ win32_xstat_impl(const wchar_t *path, struct _Py_stat_struct *result, for an unhandled tag. */ } else if (!isUnhandledTag) { CloseHandle(hFile); - return win32_xstat_impl(path, result, TRUE); + return win32_xstat_slow_impl(path, result, TRUE); } } } - if (!GetFileInformationByHandle(hFile, &fileInfo)) { + if (!GetFileInformationByHandle(hFile, &fileInfo) || + !GetFileInformationByHandleEx(hFile, FileBasicInfo, + &basicInfo, sizeof(basicInfo)) || + !GetFileInformationByHandleEx(hFile, FileIdInfo, + &idInfo, sizeof(idInfo))) { switch (GetLastError()) { case ERROR_INVALID_PARAMETER: case ERROR_INVALID_FUNCTION: @@ -1975,25 +2010,8 @@ win32_xstat_impl(const wchar_t *path, struct _Py_stat_struct *result, } } - _Py_attribute_data_to_stat(&fileInfo, tagInfo.ReparseTag, result); - - if (!(fileInfo.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) { - /* Fix the file execute permissions. This hack sets S_IEXEC if - the filename has an extension that is commonly used by files - that CreateProcessW can execute. A real implementation calls - GetSecurityInfo, OpenThreadToken/OpenProcessToken, and - AccessCheck to check for generic read, write, and execute - access. */ - const wchar_t *fileExtension = wcsrchr(path, '.'); - if (fileExtension) { - if (_wcsicmp(fileExtension, L".exe") == 0 || - _wcsicmp(fileExtension, L".bat") == 0 || - _wcsicmp(fileExtension, L".cmd") == 0 || - _wcsicmp(fileExtension, L".com") == 0) { - result->st_mode |= 0111; - } - } - } + _Py_attribute_data_to_stat(&fileInfo, tagInfo.ReparseTag, &basicInfo, &idInfo, result); + update_st_mode_from_path(path, fileInfo.dwFileAttributes, result); cleanup: if (hFile != INVALID_HANDLE_VALUE) { @@ -2010,6 +2028,39 @@ win32_xstat_impl(const wchar_t *path, struct _Py_stat_struct *result, return retval; } +static int +win32_xstat_impl(const wchar_t *path, struct _Py_stat_struct *result, + BOOL traverse) +{ + FILE_STAT_BASIC_INFORMATION statInfo; + if (_Py_GetFileInformationByName(path, FileStatBasicByNameInfo, + &statInfo, sizeof(statInfo))) { + if (// Cannot use fast path for reparse points ... + !(statInfo.FileAttributes & FILE_ATTRIBUTE_REPARSE_POINT) + // ... unless it's a name surrogate (symlink) and we're not following + || (!traverse && IsReparseTagNameSurrogate(statInfo.ReparseTag)) + ) { + _Py_stat_basic_info_to_stat(&statInfo, result); + update_st_mode_from_path(path, statInfo.FileAttributes, result); + return 0; + } + } else { + switch(GetLastError()) { + case ERROR_FILE_NOT_FOUND: + case ERROR_PATH_NOT_FOUND: + case ERROR_NOT_READY: + case ERROR_BAD_NET_NAME: + /* These errors aren't worth retrying with the slow path */ + return -1; + case ERROR_NOT_SUPPORTED: + /* indicates the API couldn't be loaded */ + break; + } + } + + return win32_xstat_slow_impl(path, result, traverse); +} + static int win32_xstat(const wchar_t *path, struct _Py_stat_struct *result, BOOL traverse) { @@ -2017,6 +2068,10 @@ win32_xstat(const wchar_t *path, struct _Py_stat_struct *result, BOOL traverse) setting it to a POSIX error. Callers should use GetLastError. */ int code = win32_xstat_impl(path, result, traverse); errno = 0; + + /* ctime is only deprecated from 3.12, so we copy birthtime across */ + result->st_ctime = result->st_birthtime; + result->st_ctime_nsec = result->st_birthtime_nsec; return code; } /* About the following functions: win32_lstat_w, win32_stat, win32_stat_w @@ -2087,9 +2142,12 @@ static PyStructSequence_Field stat_result_fields[] = { #ifdef HAVE_STRUCT_STAT_ST_GEN {"st_gen", "generation number"}, #endif -#ifdef HAVE_STRUCT_STAT_ST_BIRTHTIME +#if defined(HAVE_STRUCT_STAT_ST_BIRTHTIME) || defined(MS_WINDOWS) {"st_birthtime", "time of creation"}, #endif +#ifdef MS_WINDOWS + {"st_birthtime_ns", "time of creation in nanoseconds"}, +#endif #ifdef HAVE_STRUCT_STAT_ST_FILE_ATTRIBUTES {"st_file_attributes", "Windows file attribute bits"}, #endif @@ -2132,16 +2190,22 @@ static PyStructSequence_Field stat_result_fields[] = { #define ST_GEN_IDX ST_FLAGS_IDX #endif -#ifdef HAVE_STRUCT_STAT_ST_BIRTHTIME +#if defined(HAVE_STRUCT_STAT_ST_BIRTHTIME) || defined(MS_WINDOWS) #define ST_BIRTHTIME_IDX (ST_GEN_IDX+1) #else #define ST_BIRTHTIME_IDX ST_GEN_IDX #endif -#ifdef HAVE_STRUCT_STAT_ST_FILE_ATTRIBUTES -#define ST_FILE_ATTRIBUTES_IDX (ST_BIRTHTIME_IDX+1) +#ifdef MS_WINDOWS +#define ST_BIRTHTIME_NS_IDX (ST_BIRTHTIME_IDX+1) #else -#define ST_FILE_ATTRIBUTES_IDX ST_BIRTHTIME_IDX +#define ST_BIRTHTIME_NS_IDX ST_BIRTHTIME_IDX +#endif + +#if defined(HAVE_STRUCT_STAT_ST_FILE_ATTRIBUTES) || defined(MS_WINDOWS) +#define ST_FILE_ATTRIBUTES_IDX (ST_BIRTHTIME_NS_IDX+1) +#else +#define ST_FILE_ATTRIBUTES_IDX ST_BIRTHTIME_NS_IDX #endif #ifdef HAVE_STRUCT_STAT_ST_FSTYPE @@ -2310,7 +2374,7 @@ _posix_free(void *module) } static void -fill_time(PyObject *module, PyObject *v, int index, time_t sec, unsigned long nsec) +fill_time(PyObject *module, PyObject *v, int s_index, int f_index, int ns_index, time_t sec, unsigned long nsec) { PyObject *s = _PyLong_FromTime_t(sec); PyObject *ns_fractional = PyLong_FromUnsignedLong(nsec); @@ -2334,12 +2398,18 @@ fill_time(PyObject *module, PyObject *v, int index, time_t sec, unsigned long ns goto exit; } - PyStructSequence_SET_ITEM(v, index, s); - PyStructSequence_SET_ITEM(v, index+3, float_s); - PyStructSequence_SET_ITEM(v, index+6, ns_total); - s = NULL; - float_s = NULL; - ns_total = NULL; + if (s_index >= 0) { + PyStructSequence_SET_ITEM(v, s_index, s); + s = NULL; + } + if (f_index >= 0) { + PyStructSequence_SET_ITEM(v, f_index, float_s); + float_s = NULL; + } + if (ns_index >= 0) { + PyStructSequence_SET_ITEM(v, ns_index, ns_total); + ns_total = NULL; + } exit: Py_XDECREF(s); Py_XDECREF(ns_fractional); @@ -2348,6 +2418,33 @@ fill_time(PyObject *module, PyObject *v, int index, time_t sec, unsigned long ns Py_XDECREF(float_s); } +#ifdef MS_WINDOWS +static PyObject* +_pystat_l128_from_l64_l64(uint64_t low, uint64_t high) +{ + PyObject *o_low = PyLong_FromUnsignedLongLong(low); + if (!o_low || !high) { + return o_low; + } + PyObject *o_high = PyLong_FromUnsignedLongLong(high); + PyObject *l64 = o_high ? PyLong_FromLong(64) : NULL; + if (!l64) { + Py_XDECREF(o_high); + Py_DECREF(o_low); + return NULL; + } + Py_SETREF(o_high, PyNumber_Lshift(o_high, l64)); + Py_DECREF(l64); + if (!o_high) { + Py_DECREF(o_low); + return NULL; + } + Py_SETREF(o_low, PyNumber_Add(o_low, o_high)); + Py_DECREF(o_high); + return o_low; +} +#endif + /* pack a system stat C structure into the Python stat tuple (used by posix_stat() and posix_fstat()) */ static PyObject* @@ -2360,12 +2457,13 @@ _pystat_fromstructstat(PyObject *module, STRUCT_STAT *st) return NULL; PyStructSequence_SET_ITEM(v, 0, PyLong_FromLong((long)st->st_mode)); +#ifdef MS_WINDOWS + PyStructSequence_SET_ITEM(v, 1, _pystat_l128_from_l64_l64(st->st_ino, st->st_ino_high)); + PyStructSequence_SET_ITEM(v, 2, PyLong_FromUnsignedLongLong(st->st_dev)); +#else static_assert(sizeof(unsigned long long) >= sizeof(st->st_ino), "stat.st_ino is larger than unsigned long long"); PyStructSequence_SET_ITEM(v, 1, PyLong_FromUnsignedLongLong(st->st_ino)); -#ifdef MS_WINDOWS - PyStructSequence_SET_ITEM(v, 2, PyLong_FromUnsignedLong(st->st_dev)); -#else PyStructSequence_SET_ITEM(v, 2, _PyLong_FromDev(st->st_dev)); #endif PyStructSequence_SET_ITEM(v, 3, PyLong_FromLong((long)st->st_nlink)); @@ -2395,9 +2493,9 @@ _pystat_fromstructstat(PyObject *module, STRUCT_STAT *st) #else ansec = mnsec = cnsec = 0; #endif - fill_time(module, v, 7, st->st_atime, ansec); - fill_time(module, v, 8, st->st_mtime, mnsec); - fill_time(module, v, 9, st->st_ctime, cnsec); + fill_time(module, v, 7, 10, 13, st->st_atime, ansec); + fill_time(module, v, 8, 11, 14, st->st_mtime, mnsec); + fill_time(module, v, 9, 12, 15, st->st_ctime, cnsec); #ifdef HAVE_STRUCT_STAT_ST_BLKSIZE PyStructSequence_SET_ITEM(v, ST_BLKSIZE_IDX, @@ -2415,7 +2513,7 @@ _pystat_fromstructstat(PyObject *module, STRUCT_STAT *st) PyStructSequence_SET_ITEM(v, ST_GEN_IDX, PyLong_FromLong((long)st->st_gen)); #endif -#ifdef HAVE_STRUCT_STAT_ST_BIRTHTIME +#if defined(HAVE_STRUCT_STAT_ST_BIRTHTIME) { PyObject *val; unsigned long bsec,bnsec; @@ -2429,6 +2527,9 @@ _pystat_fromstructstat(PyObject *module, STRUCT_STAT *st) PyStructSequence_SET_ITEM(v, ST_BIRTHTIME_IDX, val); } +#elif defined(MS_WINDOWS) + fill_time(module, v, -1, ST_BIRTHTIME_IDX, ST_BIRTHTIME_NS_IDX, + st->st_birthtime, st->st_birthtime_nsec); #endif #ifdef HAVE_STRUCT_STAT_ST_FLAGS PyStructSequence_SET_ITEM(v, ST_FLAGS_IDX, @@ -14639,7 +14740,7 @@ DirEntry_from_find_data(PyObject *module, path_t *path, WIN32_FIND_DATAW *dataW) } find_data_to_file_info(dataW, &file_info, &reparse_tag); - _Py_attribute_data_to_stat(&file_info, reparse_tag, &entry->win32_lstat); + _Py_attribute_data_to_stat(&file_info, reparse_tag, NULL, NULL, &entry->win32_lstat); return (PyObject *)entry; diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj index 85dc8caa458ed9..0343d30a42cd6a 100644 --- a/PCbuild/pythoncore.vcxproj +++ b/PCbuild/pythoncore.vcxproj @@ -216,6 +216,7 @@ + diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters index 98e7d59ba1020c..359463e1d9af75 100644 --- a/PCbuild/pythoncore.vcxproj.filters +++ b/PCbuild/pythoncore.vcxproj.filters @@ -555,6 +555,9 @@ Include\internal + + Include\internal + Include\internal diff --git a/Python/fileutils.c b/Python/fileutils.c index f48b626b444016..969b7163b5ac18 100644 --- a/Python/fileutils.c +++ b/Python/fileutils.c @@ -8,6 +8,8 @@ #ifdef MS_WINDOWS # include # include +# include // FILE_DEVICE_* constants +# include "pycore_fileutils_windows.h" // FILE_STAT_BASIC_INFORMATION # if defined(MS_WINDOWS_GAMES) && !defined(MS_WINDOWS_DESKTOP) # define PATHCCH_ALLOW_LONG_PATHS 0x01 # else @@ -1056,6 +1058,13 @@ FILE_TIME_to_time_t_nsec(FILETIME *in_ptr, time_t *time_out, int* nsec_out) *time_out = Py_SAFE_DOWNCAST((in / 10000000) - secs_between_epochs, __int64, time_t); } +static void +LARGE_INTEGER_to_time_t_nsec(LARGE_INTEGER *in_ptr, time_t *time_out, int* nsec_out) +{ + *nsec_out = (int)(in_ptr->QuadPart % 10000000) * 100; /* FILETIME is in units of 100 nsec. */ + *time_out = Py_SAFE_DOWNCAST((in_ptr->QuadPart / 10000000) - secs_between_epochs, __int64, time_t); +} + void _Py_time_t_to_FILE_TIME(time_t time_in, int nsec_in, FILETIME *out_ptr) { @@ -1085,33 +1094,126 @@ attributes_to_mode(DWORD attr) return m; } + +typedef union { + FILE_ID_128 id; + struct { + uint64_t st_ino; + uint64_t st_ino_high; + }; +} id_128_to_ino; + + void _Py_attribute_data_to_stat(BY_HANDLE_FILE_INFORMATION *info, ULONG reparse_tag, + FILE_BASIC_INFO *basic_info, FILE_ID_INFO *id_info, struct _Py_stat_struct *result) { memset(result, 0, sizeof(*result)); result->st_mode = attributes_to_mode(info->dwFileAttributes); result->st_size = (((__int64)info->nFileSizeHigh)<<32) + info->nFileSizeLow; - result->st_dev = info->dwVolumeSerialNumber; - result->st_rdev = result->st_dev; - FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_ctime, &result->st_ctime_nsec); - FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec); - FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec); + result->st_dev = id_info ? id_info->VolumeSerialNumber : info->dwVolumeSerialNumber; + result->st_rdev = 0; + /* st_ctime is deprecated, but we preserve the legacy value in our caller, not here */ + if (basic_info) { + LARGE_INTEGER_to_time_t_nsec(&basic_info->CreationTime, &result->st_birthtime, &result->st_birthtime_nsec); + LARGE_INTEGER_to_time_t_nsec(&basic_info->ChangeTime, &result->st_ctime, &result->st_ctime_nsec); + LARGE_INTEGER_to_time_t_nsec(&basic_info->LastWriteTime, &result->st_mtime, &result->st_mtime_nsec); + LARGE_INTEGER_to_time_t_nsec(&basic_info->LastAccessTime, &result->st_atime, &result->st_atime_nsec); + } else { + FILE_TIME_to_time_t_nsec(&info->ftCreationTime, &result->st_birthtime, &result->st_birthtime_nsec); + FILE_TIME_to_time_t_nsec(&info->ftLastWriteTime, &result->st_mtime, &result->st_mtime_nsec); + FILE_TIME_to_time_t_nsec(&info->ftLastAccessTime, &result->st_atime, &result->st_atime_nsec); + } result->st_nlink = info->nNumberOfLinks; - result->st_ino = (((uint64_t)info->nFileIndexHigh) << 32) + info->nFileIndexLow; + + if (id_info) { + id_128_to_ino file_id; + file_id.id = id_info->FileId; + result->st_ino = file_id.st_ino; + result->st_ino_high = file_id.st_ino_high; + } else { + /* should only occur for DirEntry_from_find_data, in which case the + index is likely to be zero anyway. */ + result->st_ino = (((uint64_t)info->nFileIndexHigh) << 32) + info->nFileIndexLow; + } + /* bpo-37834: Only actual symlinks set the S_IFLNK flag. But lstat() will open other name surrogate reparse points without traversing them. To detect/handle these, check st_file_attributes and st_reparse_tag. */ result->st_reparse_tag = reparse_tag; if (info->dwFileAttributes & FILE_ATTRIBUTE_REPARSE_POINT && reparse_tag == IO_REPARSE_TAG_SYMLINK) { - /* first clear the S_IFMT bits */ - result->st_mode ^= (result->st_mode & S_IFMT); - /* now set the bits that make this a symlink */ - result->st_mode |= S_IFLNK; + /* set the bits that make this a symlink */ + result->st_mode = (result->st_mode & ~S_IFMT) | S_IFLNK; } result->st_file_attributes = info->dwFileAttributes; } + +void +_Py_stat_basic_info_to_stat(FILE_STAT_BASIC_INFORMATION *info, + struct _Py_stat_struct *result) +{ + memset(result, 0, sizeof(*result)); + result->st_mode = attributes_to_mode(info->FileAttributes); + result->st_size = info->EndOfFile.QuadPart; + LARGE_INTEGER_to_time_t_nsec(&info->CreationTime, &result->st_birthtime, &result->st_birthtime_nsec); + LARGE_INTEGER_to_time_t_nsec(&info->ChangeTime, &result->st_ctime, &result->st_ctime_nsec); + LARGE_INTEGER_to_time_t_nsec(&info->LastWriteTime, &result->st_mtime, &result->st_mtime_nsec); + LARGE_INTEGER_to_time_t_nsec(&info->LastAccessTime, &result->st_atime, &result->st_atime_nsec); + result->st_nlink = info->NumberOfLinks; + result->st_dev = info->VolumeSerialNumber.QuadPart; + /* File systems with less than 128-bits zero pad into this field */ + id_128_to_ino file_id; + file_id.id = info->FileId128; + result->st_ino = file_id.st_ino; + result->st_ino_high = file_id.st_ino_high; + /* bpo-37834: Only actual symlinks set the S_IFLNK flag. But lstat() will + open other name surrogate reparse points without traversing them. To + detect/handle these, check st_file_attributes and st_reparse_tag. */ + result->st_reparse_tag = info->ReparseTag; + if (info->FileAttributes & FILE_ATTRIBUTE_REPARSE_POINT && + info->ReparseTag == IO_REPARSE_TAG_SYMLINK) { + /* set the bits that make this a symlink */ + result->st_mode = (result->st_mode & ~S_IFMT) | S_IFLNK; + } + result->st_file_attributes = info->FileAttributes; + switch (info->DeviceType) { + case FILE_DEVICE_DISK: + case FILE_DEVICE_VIRTUAL_DISK: + case FILE_DEVICE_DFS: + case FILE_DEVICE_CD_ROM: + case FILE_DEVICE_CONTROLLER: + case FILE_DEVICE_DATALINK: + break; + case FILE_DEVICE_DISK_FILE_SYSTEM: + case FILE_DEVICE_CD_ROM_FILE_SYSTEM: + case FILE_DEVICE_NETWORK_FILE_SYSTEM: + result->st_mode = (result->st_mode & ~S_IFMT) | 0x6000; /* _S_IFBLK */ + break; + case FILE_DEVICE_CONSOLE: + case FILE_DEVICE_NULL: + case FILE_DEVICE_KEYBOARD: + case FILE_DEVICE_MODEM: + case FILE_DEVICE_MOUSE: + case FILE_DEVICE_PARALLEL_PORT: + case FILE_DEVICE_PRINTER: + case FILE_DEVICE_SCREEN: + case FILE_DEVICE_SERIAL_PORT: + case FILE_DEVICE_SOUND: + result->st_mode = (result->st_mode & ~S_IFMT) | _S_IFCHR; + break; + case FILE_DEVICE_NAMED_PIPE: + result->st_mode = (result->st_mode & ~S_IFMT) | _S_IFIFO; + break; + default: + if (info->FileAttributes & FILE_ATTRIBUTE_DIRECTORY) { + result->st_mode = (result->st_mode & ~S_IFMT) | _S_IFDIR; + } + break; + } +} + #endif /* Return information about a file. @@ -1131,6 +1233,8 @@ _Py_fstat_noraise(int fd, struct _Py_stat_struct *status) { #ifdef MS_WINDOWS BY_HANDLE_FILE_INFORMATION info; + FILE_BASIC_INFO basicInfo; + FILE_ID_INFO idInfo; HANDLE h; int type; @@ -1162,14 +1266,16 @@ _Py_fstat_noraise(int fd, struct _Py_stat_struct *status) return 0; } - if (!GetFileInformationByHandle(h, &info)) { + if (!GetFileInformationByHandle(h, &info) || + !GetFileInformationByHandleEx(h, FileBasicInfo, &basicInfo, sizeof(basicInfo)) || + !GetFileInformationByHandleEx(h, FileIdInfo, &idInfo, sizeof(idInfo))) { /* The Win32 error is already set, but we also set errno for callers who expect it */ errno = winerror_to_errno(GetLastError()); return -1; } - _Py_attribute_data_to_stat(&info, 0, status); + _Py_attribute_data_to_stat(&info, 0, &basicInfo, &idInfo, status); return 0; #else return fstat(fd, status);