From af4d6dae345815df5f88a5989049a031351dc9c7 Mon Sep 17 00:00:00 2001 From: nicole mazzuca Date: Wed, 11 May 2022 18:24:26 -0700 Subject: [PATCH 1/7] [filesystem] Don't error on weird system files This changes from using GetFileAttributesExW to FindFirstFileW, since FindFirstFileW gets the information from the directory and thus doesn't error out. Fixes #2370 This will require serious testing to make sure the behavior is the same. --- stl/src/filesystem.cpp | 65 +++++++++++++++--------------------------- 1 file changed, 23 insertions(+), 42 deletions(-) diff --git a/stl/src/filesystem.cpp b/stl/src/filesystem.cpp index f31227cdda..4ccd7093bf 100644 --- a/stl/src/filesystem.cpp +++ b/stl/src/filesystem.cpp @@ -793,43 +793,13 @@ _Success_(return == __std_win_error::_Success) __std_win_error return {_Size, __std_win_error::_Success}; } -// This structure is meant to be embedded into __std_fs_stats that properly aligned it, -// so that 64-bit values are fully aligned. Note that _File_size fields are flipped to be in low:high order -// and represented by __std_fs_filetime which is a pair of ulongs. -// If this structure is used in GetFileAttributesEx, after successful read, File_size parts must be put in -// low:high order, as GetFileAttributesEx returns them in high:low order. -struct _File_attr_data { // typedef struct _WIN32_FILE_ATTRIBUTE_DATA { - __std_fs_file_attr _Attributes; // DWORD dwFileAttributes; - __std_fs_filetime _Creation_time; // FILETIME ftCreationTime; - __std_fs_filetime _Last_access_time; // FILETIME ftLastAccessTime; - __std_fs_filetime _Last_write_time; // FILETIME ftLastWriteTime; - unsigned long _File_size_high; // DWORD nFileSizeHigh; - unsigned long _File_size_low; // DWORD nFileSizeLow; -}; // } WIN32_FILE_ATTRIBUTE_DATA, *LPWIN32_FILE_ATTRIBUTE_DATA; - -struct alignas(long long) _Aligned_file_attrs { - unsigned long _Padding; // align the __std_fs_filetime inside _Data to make the memcpy below an ordinary 64-bit load - _File_attr_data _Data; - - [[nodiscard]] long long _Last_write_time() const noexcept { - long long _Result; - _CSTD memcpy(&_Result, &_Data._Last_write_time, sizeof(_Result)); - return _Result; - } - - [[nodiscard]] unsigned long long _File_size() const noexcept { - return (static_cast(_Data._File_size_high) << 32) + _Data._File_size_low; - } -}; +static unsigned long long _Merge_to_ull(unsigned long _Low, unsigned long _High) noexcept { + return static_cast(_Low) | static_cast(_High) << 32; +} [[nodiscard]] _Success_(return == __std_win_error::_Success) __std_win_error __stdcall __std_fs_get_stats(_In_z_ const wchar_t* const _Path, __std_fs_stats* const _Stats, _In_ __std_fs_stats_flags _Flags, _In_ const __std_fs_file_attr _Symlink_attribute_hint) noexcept { - static_assert((offsetof(_Aligned_file_attrs, _Data._Last_write_time) % 8) == 0, "_Last_write_time not aligned"); - static_assert(sizeof(_File_attr_data) == sizeof(WIN32_FILE_ATTRIBUTE_DATA)); - static_assert(alignof(_File_attr_data) == alignof(WIN32_FILE_ATTRIBUTE_DATA)); - static_assert(alignof(_File_attr_data) == 4); - const bool _Follow_symlinks = _Bitmask_includes(_Flags, __std_fs_stats_flags::_Follow_symlinks); _Flags &= ~__std_fs_stats_flags::_Follow_symlinks; if (_Follow_symlinks && _Bitmask_includes(_Flags, __std_fs_stats_flags::_Reparse_tag)) { @@ -854,21 +824,32 @@ struct alignas(long long) _Aligned_file_attrs { _Flags, _Get_file_attributes_data)) { // caller wants something GetFileAttributesExW might provide if (_Symlink_attribute_hint == __std_fs_file_attr::_Invalid || !_Bitmask_includes(_Symlink_attribute_hint, __std_fs_file_attr::_Reparse_point) - || !_Follow_symlinks) { // we might not be a symlink or not following symlinks, so GetFileAttributesExW + || !_Follow_symlinks) { // we might not be a symlink or not following symlinks, so FindFirstFileW // would return the right answer - _Aligned_file_attrs _Aligned_attrs; - auto& _Data = _Aligned_attrs._Data; - if (!GetFileAttributesExW(_Path, GetFileExInfoStandard, &_Data)) { - return __std_win_error{GetLastError()}; + + // check for file names that contain `?` or `*` (globbing characters) + // these are invalid file names, and will give us the wrong answer with `FindFirstFile` + if (::wcspbrk(_Path, L"?*")) { + return __std_win_error(ERROR_INVALID_NAME); } + WIN32_FIND_DATAW _Data; + HANDLE _Find_handle = FindFirstFileW(_Path, &_Data); + if (_Find_handle == INVALID_HANDLE_VALUE) { + return __std_win_error(GetLastError()); + } + FindClose(_Find_handle); + + auto _Attributes = static_cast<__std_fs_file_attr>(_Data.dwFileAttributes); if (!_Follow_symlinks - || !_Bitmask_includes(_Data._Attributes, + || !_Bitmask_includes(_Attributes, __std_fs_file_attr::_Reparse_point)) { // if we aren't following symlinks or can't be a // symlink, that data was useful, record - _Stats->_Attributes = _Data._Attributes; - _Stats->_File_size = _Aligned_attrs._File_size(); - _Stats->_Last_write_time = _Aligned_attrs._Last_write_time(); + _Stats->_Attributes = _Attributes; + _Stats->_File_size = _Merge_to_ull(_Data.nFileSizeLow, _Data.nFileSizeHigh); + _Stats->_Last_write_time = static_cast( + _Merge_to_ull(_Data.ftLastWriteTime.dwLowDateTime, _Data.ftLastWriteTime.dwHighDateTime)); + _Flags &= ~_Get_file_attributes_data; } } From a35d7692e4efb7ebc023f3b2c8c0e12ee74037e7 Mon Sep 17 00:00:00 2001 From: nicole mazzuca Date: Thu, 12 May 2022 09:28:36 -0700 Subject: [PATCH 2/7] Stephan CRs --- stl/src/filesystem.cpp | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/stl/src/filesystem.cpp b/stl/src/filesystem.cpp index 4ccd7093bf..b376bce719 100644 --- a/stl/src/filesystem.cpp +++ b/stl/src/filesystem.cpp @@ -168,6 +168,10 @@ namespace { return __std_win_error{GetLastError()}; } + + [[nodiscard]] static unsigned long long _Merge_to_ull(unsigned long _High, unsigned long _Low) noexcept { + return static_cast(_Low) | (static_cast(_High) << 32); + } } // unnamed namespace _EXTERN_C @@ -793,10 +797,6 @@ _Success_(return == __std_win_error::_Success) __std_win_error return {_Size, __std_win_error::_Success}; } -static unsigned long long _Merge_to_ull(unsigned long _Low, unsigned long _High) noexcept { - return static_cast(_Low) | static_cast(_High) << 32; -} - [[nodiscard]] _Success_(return == __std_win_error::_Success) __std_win_error __stdcall __std_fs_get_stats(_In_z_ const wchar_t* const _Path, __std_fs_stats* const _Stats, _In_ __std_fs_stats_flags _Flags, _In_ const __std_fs_file_attr _Symlink_attribute_hint) noexcept { @@ -827,28 +827,30 @@ static unsigned long long _Merge_to_ull(unsigned long _Low, unsigned long _High) || !_Follow_symlinks) { // we might not be a symlink or not following symlinks, so FindFirstFileW // would return the right answer - // check for file names that contain `?` or `*` (globbing characters) - // these are invalid file names, and will give us the wrong answer with `FindFirstFile` - if (::wcspbrk(_Path, L"?*")) { - return __std_win_error(ERROR_INVALID_NAME); + // Check for file names that contain `?` or `*` (i.e., globbing characters). + // These are invalid file names, and will give us the wrong answer with `FindFirstFileW`. + if (_CSTD wcspbrk(_Path, L"?*")) { + return __std_win_error{ERROR_INVALID_NAME}; } WIN32_FIND_DATAW _Data; - HANDLE _Find_handle = FindFirstFileW(_Path, &_Data); - if (_Find_handle == INVALID_HANDLE_VALUE) { - return __std_win_error(GetLastError()); + { + HANDLE _Find_handle = FindFirstFileW(_Path, &_Data); + if (_Find_handle == INVALID_HANDLE_VALUE) { + return __std_win_error{GetLastError()}; + } + FindClose(_Find_handle); } - FindClose(_Find_handle); - auto _Attributes = static_cast<__std_fs_file_attr>(_Data.dwFileAttributes); + const __std_fs_file_attr _Attributes{_Data.dwFileAttributes}; if (!_Follow_symlinks || !_Bitmask_includes(_Attributes, __std_fs_file_attr::_Reparse_point)) { // if we aren't following symlinks or can't be a // symlink, that data was useful, record _Stats->_Attributes = _Attributes; - _Stats->_File_size = _Merge_to_ull(_Data.nFileSizeLow, _Data.nFileSizeHigh); + _Stats->_File_size = _Merge_to_ull(_Data.nFileSizeHigh, _Data.nFileSizeLow); _Stats->_Last_write_time = static_cast( - _Merge_to_ull(_Data.ftLastWriteTime.dwLowDateTime, _Data.ftLastWriteTime.dwHighDateTime)); + _Merge_to_ull(_Data.ftLastWriteTime.dwHighDateTime, _Data.ftLastWriteTime.dwLowDateTime)); _Flags &= ~_Get_file_attributes_data; } From 5cd723b135c14da2edab301ae5ac1d167b7d5af4 Mon Sep 17 00:00:00 2001 From: nicole mazzuca Date: Thu, 12 May 2022 10:24:22 -0700 Subject: [PATCH 3/7] fix long path support given the following program: ```cxx #include #include namespace fs = std::filesystem; int wmain(int argc, wchar_t **argv) { std::error_code ec; for (int i = 1; i < argc; ++i) { wprintf(L"exists(%ls): %ls\n", argv[i], fs::exists(argv[i], ec) ? L"true" : L"false"); if (ec) { wprintf(L" error: 0x%lX\n", ec.value()); } } } ``` before this change, the result of this program would be: ``` PS C:\..\nimazzuc\projects\stl-tests> .\exists.exe \\?\C:\hiberfil.sys exists(\??\C:\hiberfil.sys): false ``` after this change, ``` PS C:\..\nimazzuc\projects\stl-tests> .\exists.exe \\?\C:\hiberfil.sys exists(\\?\C:\hiberfil.sys): true ``` --- stl/inc/filesystem | 77 ------------------------------------- stl/inc/xfilesystem_abi.h | 81 +++++++++++++++++++++++++++++++++++++++ stl/src/filesystem.cpp | 16 +++++++- 3 files changed, 95 insertions(+), 79 deletions(-) diff --git a/stl/inc/filesystem b/stl/inc/filesystem index 1c0a832393..78d284192d 100644 --- a/stl/inc/filesystem +++ b/stl/inc/filesystem @@ -409,83 +409,6 @@ namespace filesystem { } } - template - _NODISCARD _Ty _Unaligned_load(const void* _Ptr) { // load a _Ty from _Ptr - static_assert(is_trivial_v<_Ty>, "Unaligned loads require trivial types"); - _Ty _Tmp; - _CSTD memcpy(&_Tmp, _Ptr, sizeof(_Tmp)); - return _Tmp; - } - - _NODISCARD inline bool _Is_drive_prefix(const wchar_t* const _First) { - // test if _First points to a prefix of the form X: - // pre: _First points to at least 2 wchar_t instances - // pre: Little endian - auto _Value = _Unaligned_load(_First); - _Value &= 0xFFFF'FFDFu; // transform lowercase drive letters into uppercase ones - _Value -= (static_cast(L':') << (sizeof(wchar_t) * CHAR_BIT)) | L'A'; - return _Value < 26; - } - - _NODISCARD inline bool _Has_drive_letter_prefix(const wchar_t* const _First, const wchar_t* const _Last) { - // test if [_First, _Last) has a prefix of the form X: - return _Last - _First >= 2 && _Is_drive_prefix(_First); - } - - _NODISCARD inline const wchar_t* _Find_root_name_end(const wchar_t* const _First, const wchar_t* const _Last) { - // attempt to parse [_First, _Last) as a path and return the end of root-name if it exists; otherwise, _First - - // This is the place in the generic grammar where library implementations have the most freedom. - // Below are example Windows paths, and what we've decided to do with them: - // * X:DriveRelative, X:\DosAbsolute - // We parse X: as root-name, if and only if \ is present we consider that root-directory - // * \RootRelative - // We parse no root-name, and \ as root-directory - // * \\server\share - // We parse \\server as root-name, \ as root-directory, and share as the first element in relative-path. - // Technically, Windows considers all of \\server\share the logical "root", but for purposes - // of decomposition we want those split, so that path(R"(\\server\share)").replace_filename("other_share") - // is \\server\other_share - // * \\?\device - // * \??\device - // * \\.\device - // CreateFile appears to treat these as the same thing; we will set the first three characters as root-name - // and the first \ as root-directory. Support for these prefixes varies by particular Windows version, but - // for the purposes of path decomposition we don't need to worry about that. - // * \\?\UNC\server\share - // MSDN explicitly documents the \\?\UNC syntax as a special case. What actually happens is that the device - // Mup, or "Multiple UNC provider", owns the path \\?\UNC in the NT namespace, and is responsible for the - // network file access. When the user says \\server\share, CreateFile translates that into - // \\?\UNC\server\share to get the remote server access behavior. Because NT treats this like any other - // device, we have chosen to treat this as the \\?\ case above. - if (_Last - _First < 2) { - return _First; - } - - if (_Has_drive_letter_prefix(_First, _Last)) { // check for X: first because it's the most common root-name - return _First + 2; - } - - if (!_Is_slash(_First[0])) { // all the other root-names start with a slash; check that first because - // we expect paths without a leading slash to be very common - return _First; - } - - // $ means anything other than a slash, including potentially the end of the input - if (_Last - _First >= 4 && _Is_slash(_First[3]) && (_Last - _First == 4 || !_Is_slash(_First[4])) // \xx\$ - && ((_Is_slash(_First[1]) && (_First[2] == L'?' || _First[2] == L'.')) // \\?\$ or \\.\$ - || (_First[1] == L'?' && _First[2] == L'?'))) { // \??\$ - return _First + 3; - } - - if (_Last - _First >= 3 && _Is_slash(_First[1]) && !_Is_slash(_First[2])) { // \\server - return _STD find_if(_First + 3, _Last, _Is_slash); - } - - // no match - return _First; - } - _NODISCARD inline wstring_view _Parse_root_name(const wstring_view _Str) { // attempt to parse _Str as a path and return the root-name if it exists; otherwise, an empty view const auto _First = _Str.data(); diff --git a/stl/inc/xfilesystem_abi.h b/stl/inc/xfilesystem_abi.h index 1122b14984..3dd24f9cba 100644 --- a/stl/inc/xfilesystem_abi.h +++ b/stl/inc/xfilesystem_abi.h @@ -7,6 +7,9 @@ #ifndef _XFILESYSTEM_ABI_H #define _XFILESYSTEM_ABI_H #include + +#include + #if _STL_COMPILER_PREPROCESSOR #include @@ -368,6 +371,84 @@ struct _Is_slash_oper { // predicate testing if input is a preferred-separator o }; inline constexpr _Is_slash_oper _Is_slash{}; + + +template +_NODISCARD _Ty _Unaligned_load(const void* _Ptr) { // load a _Ty from _Ptr + static_assert(is_trivial_v<_Ty>, "Unaligned loads require trivial types"); + _Ty _Tmp; + _CSTD memcpy(&_Tmp, _Ptr, sizeof(_Tmp)); + return _Tmp; +} + +_NODISCARD inline bool _Is_drive_prefix(const wchar_t* const _First) { + // test if _First points to a prefix of the form X: + // pre: _First points to at least 2 wchar_t instances + // pre: Little endian + auto _Value = _Unaligned_load(_First); + _Value &= 0xFFFF'FFDFu; // transform lowercase drive letters into uppercase ones + _Value -= (static_cast(L':') << (sizeof(wchar_t) * CHAR_BIT)) | L'A'; + return _Value < 26; +} + +_NODISCARD inline bool _Has_drive_letter_prefix(const wchar_t* const _First, const wchar_t* const _Last) { + // test if [_First, _Last) has a prefix of the form X: + return _Last - _First >= 2 && _Is_drive_prefix(_First); +} + +_NODISCARD inline const wchar_t* _Find_root_name_end(const wchar_t* const _First, const wchar_t* const _Last) { + // attempt to parse [_First, _Last) as a path and return the end of root-name if it exists; otherwise, _First + + // This is the place in the generic grammar where library implementations have the most freedom. + // Below are example Windows paths, and what we've decided to do with them: + // * X:DriveRelative, X:\DosAbsolute + // We parse X: as root-name, if and only if \ is present we consider that root-directory + // * \RootRelative + // We parse no root-name, and \ as root-directory + // * \\server\share + // We parse \\server as root-name, \ as root-directory, and share as the first element in relative-path. + // Technically, Windows considers all of \\server\share the logical "root", but for purposes + // of decomposition we want those split, so that path(R"(\\server\share)").replace_filename("other_share") + // is \\server\other_share + // * \\?\device + // * \??\device + // * \\.\device + // CreateFile appears to treat these as the same thing; we will set the first three characters as root-name + // and the first \ as root-directory. Support for these prefixes varies by particular Windows version, but + // for the purposes of path decomposition we don't need to worry about that. + // * \\?\UNC\server\share + // MSDN explicitly documents the \\?\UNC syntax as a special case. What actually happens is that the device + // Mup, or "Multiple UNC provider", owns the path \\?\UNC in the NT namespace, and is responsible for the + // network file access. When the user says \\server\share, CreateFile translates that into + // \\?\UNC\server\share to get the remote server access behavior. Because NT treats this like any other + // device, we have chosen to treat this as the \\?\ case above. + if (_Last - _First < 2) { + return _First; + } + + if (_Has_drive_letter_prefix(_First, _Last)) { // check for X: first because it's the most common root-name + return _First + 2; + } + + if (!_Is_slash(_First[0])) { // all the other root-names start with a slash; check that first because + // we expect paths without a leading slash to be very common + return _First; + } + + // $ means anything other than a slash, including potentially the end of the input + if (_Last - _First >= 4 && _Is_slash(_First[3]) && (_Last - _First == 4 || !_Is_slash(_First[4])) // \xx\$ + && ((_Is_slash(_First[1]) && (_First[2] == L'?' || _First[2] == L'.')) // \\?\$ or \\.\$ + || (_First[1] == L'?' && _First[2] == L'?'))) { // \??\$ + return _First + 3; + } + + if (_Last - _First >= 3 && _Is_slash(_First[1]) && !_Is_slash(_First[2])) { // \\server + return _STD find_if(_First + 3, _Last, _Is_slash); + } + + // no match + return _First; +} _STD_END #pragma pop_macro("new") diff --git a/stl/src/filesystem.cpp b/stl/src/filesystem.cpp index b376bce719..a389f70aa4 100644 --- a/stl/src/filesystem.cpp +++ b/stl/src/filesystem.cpp @@ -9,6 +9,7 @@ // Do not include or define anything else here. // In particular, basic_string must not be included here. +#include #include #include #include @@ -829,8 +830,19 @@ _Success_(return == __std_win_error::_Success) __std_win_error // Check for file names that contain `?` or `*` (i.e., globbing characters). // These are invalid file names, and will give us the wrong answer with `FindFirstFileW`. - if (_CSTD wcspbrk(_Path, L"?*")) { - return __std_win_error{ERROR_INVALID_NAME}; + { + const wchar_t* _Path_end = _Path + _CSTD wcslen(_Path); + const wchar_t* _After_drive_prefix = _STD _Find_root_name_end(_Path, _Path_end); + // `?` is allowed in the drive prefix, but `*` is not. + if (_STD find(_Path, _After_drive_prefix, '*') != _After_drive_prefix) { + return __std_win_error{ERROR_INVALID_NAME}; + } + + constexpr static auto _Is_globbing_character = [](wchar_t _Ch) { return _Ch == '*' || _Ch == '?'; }; + // In the rest of the path, neither is allowed. + if (_STD find_if(_After_drive_prefix, _Path_end, _Is_globbing_character) != _Path_end) { + return __std_win_error{ERROR_INVALID_NAME}; + } } WIN32_FIND_DATAW _Data; From 86547de977a885bdb28ad6837dbcf8f84aafef5b Mon Sep 17 00:00:00 2001 From: Nicole Mazzuca Date: Fri, 13 May 2022 12:49:45 -0700 Subject: [PATCH 4/7] fix fsb4000's comment this fixes the obvious bug of `FindFirstFileW(directory-we-dont-have-access-to/file-we-do)` that I didn't notice when implementing. This instead GetFileAttributesExW, followed by FindFirstFileW if that fails with ERROR_SHARING_VIOLATION. The change: Let testdir be a directory where the current user doesn't have list permissions; let testdir/testfile.txt be a file which the current user _does_ have permissions on. Then, FindFirstFileW(L"testdir/testfile.txt") will result in ERROR_ACCESS_DENIED, but GetFileAttributesExW(L"testdir/testfile.txt") will succeed. --- stl/inc/filesystem | 77 +++++++++++++++++++++++++++++++++++++ stl/inc/xfilesystem_abi.h | 81 --------------------------------------- stl/src/filesystem.cpp | 50 ++++++++++++------------ 3 files changed, 101 insertions(+), 107 deletions(-) diff --git a/stl/inc/filesystem b/stl/inc/filesystem index 78d284192d..1c0a832393 100644 --- a/stl/inc/filesystem +++ b/stl/inc/filesystem @@ -409,6 +409,83 @@ namespace filesystem { } } + template + _NODISCARD _Ty _Unaligned_load(const void* _Ptr) { // load a _Ty from _Ptr + static_assert(is_trivial_v<_Ty>, "Unaligned loads require trivial types"); + _Ty _Tmp; + _CSTD memcpy(&_Tmp, _Ptr, sizeof(_Tmp)); + return _Tmp; + } + + _NODISCARD inline bool _Is_drive_prefix(const wchar_t* const _First) { + // test if _First points to a prefix of the form X: + // pre: _First points to at least 2 wchar_t instances + // pre: Little endian + auto _Value = _Unaligned_load(_First); + _Value &= 0xFFFF'FFDFu; // transform lowercase drive letters into uppercase ones + _Value -= (static_cast(L':') << (sizeof(wchar_t) * CHAR_BIT)) | L'A'; + return _Value < 26; + } + + _NODISCARD inline bool _Has_drive_letter_prefix(const wchar_t* const _First, const wchar_t* const _Last) { + // test if [_First, _Last) has a prefix of the form X: + return _Last - _First >= 2 && _Is_drive_prefix(_First); + } + + _NODISCARD inline const wchar_t* _Find_root_name_end(const wchar_t* const _First, const wchar_t* const _Last) { + // attempt to parse [_First, _Last) as a path and return the end of root-name if it exists; otherwise, _First + + // This is the place in the generic grammar where library implementations have the most freedom. + // Below are example Windows paths, and what we've decided to do with them: + // * X:DriveRelative, X:\DosAbsolute + // We parse X: as root-name, if and only if \ is present we consider that root-directory + // * \RootRelative + // We parse no root-name, and \ as root-directory + // * \\server\share + // We parse \\server as root-name, \ as root-directory, and share as the first element in relative-path. + // Technically, Windows considers all of \\server\share the logical "root", but for purposes + // of decomposition we want those split, so that path(R"(\\server\share)").replace_filename("other_share") + // is \\server\other_share + // * \\?\device + // * \??\device + // * \\.\device + // CreateFile appears to treat these as the same thing; we will set the first three characters as root-name + // and the first \ as root-directory. Support for these prefixes varies by particular Windows version, but + // for the purposes of path decomposition we don't need to worry about that. + // * \\?\UNC\server\share + // MSDN explicitly documents the \\?\UNC syntax as a special case. What actually happens is that the device + // Mup, or "Multiple UNC provider", owns the path \\?\UNC in the NT namespace, and is responsible for the + // network file access. When the user says \\server\share, CreateFile translates that into + // \\?\UNC\server\share to get the remote server access behavior. Because NT treats this like any other + // device, we have chosen to treat this as the \\?\ case above. + if (_Last - _First < 2) { + return _First; + } + + if (_Has_drive_letter_prefix(_First, _Last)) { // check for X: first because it's the most common root-name + return _First + 2; + } + + if (!_Is_slash(_First[0])) { // all the other root-names start with a slash; check that first because + // we expect paths without a leading slash to be very common + return _First; + } + + // $ means anything other than a slash, including potentially the end of the input + if (_Last - _First >= 4 && _Is_slash(_First[3]) && (_Last - _First == 4 || !_Is_slash(_First[4])) // \xx\$ + && ((_Is_slash(_First[1]) && (_First[2] == L'?' || _First[2] == L'.')) // \\?\$ or \\.\$ + || (_First[1] == L'?' && _First[2] == L'?'))) { // \??\$ + return _First + 3; + } + + if (_Last - _First >= 3 && _Is_slash(_First[1]) && !_Is_slash(_First[2])) { // \\server + return _STD find_if(_First + 3, _Last, _Is_slash); + } + + // no match + return _First; + } + _NODISCARD inline wstring_view _Parse_root_name(const wstring_view _Str) { // attempt to parse _Str as a path and return the root-name if it exists; otherwise, an empty view const auto _First = _Str.data(); diff --git a/stl/inc/xfilesystem_abi.h b/stl/inc/xfilesystem_abi.h index 3dd24f9cba..1122b14984 100644 --- a/stl/inc/xfilesystem_abi.h +++ b/stl/inc/xfilesystem_abi.h @@ -7,9 +7,6 @@ #ifndef _XFILESYSTEM_ABI_H #define _XFILESYSTEM_ABI_H #include - -#include - #if _STL_COMPILER_PREPROCESSOR #include @@ -371,84 +368,6 @@ struct _Is_slash_oper { // predicate testing if input is a preferred-separator o }; inline constexpr _Is_slash_oper _Is_slash{}; - - -template -_NODISCARD _Ty _Unaligned_load(const void* _Ptr) { // load a _Ty from _Ptr - static_assert(is_trivial_v<_Ty>, "Unaligned loads require trivial types"); - _Ty _Tmp; - _CSTD memcpy(&_Tmp, _Ptr, sizeof(_Tmp)); - return _Tmp; -} - -_NODISCARD inline bool _Is_drive_prefix(const wchar_t* const _First) { - // test if _First points to a prefix of the form X: - // pre: _First points to at least 2 wchar_t instances - // pre: Little endian - auto _Value = _Unaligned_load(_First); - _Value &= 0xFFFF'FFDFu; // transform lowercase drive letters into uppercase ones - _Value -= (static_cast(L':') << (sizeof(wchar_t) * CHAR_BIT)) | L'A'; - return _Value < 26; -} - -_NODISCARD inline bool _Has_drive_letter_prefix(const wchar_t* const _First, const wchar_t* const _Last) { - // test if [_First, _Last) has a prefix of the form X: - return _Last - _First >= 2 && _Is_drive_prefix(_First); -} - -_NODISCARD inline const wchar_t* _Find_root_name_end(const wchar_t* const _First, const wchar_t* const _Last) { - // attempt to parse [_First, _Last) as a path and return the end of root-name if it exists; otherwise, _First - - // This is the place in the generic grammar where library implementations have the most freedom. - // Below are example Windows paths, and what we've decided to do with them: - // * X:DriveRelative, X:\DosAbsolute - // We parse X: as root-name, if and only if \ is present we consider that root-directory - // * \RootRelative - // We parse no root-name, and \ as root-directory - // * \\server\share - // We parse \\server as root-name, \ as root-directory, and share as the first element in relative-path. - // Technically, Windows considers all of \\server\share the logical "root", but for purposes - // of decomposition we want those split, so that path(R"(\\server\share)").replace_filename("other_share") - // is \\server\other_share - // * \\?\device - // * \??\device - // * \\.\device - // CreateFile appears to treat these as the same thing; we will set the first three characters as root-name - // and the first \ as root-directory. Support for these prefixes varies by particular Windows version, but - // for the purposes of path decomposition we don't need to worry about that. - // * \\?\UNC\server\share - // MSDN explicitly documents the \\?\UNC syntax as a special case. What actually happens is that the device - // Mup, or "Multiple UNC provider", owns the path \\?\UNC in the NT namespace, and is responsible for the - // network file access. When the user says \\server\share, CreateFile translates that into - // \\?\UNC\server\share to get the remote server access behavior. Because NT treats this like any other - // device, we have chosen to treat this as the \\?\ case above. - if (_Last - _First < 2) { - return _First; - } - - if (_Has_drive_letter_prefix(_First, _Last)) { // check for X: first because it's the most common root-name - return _First + 2; - } - - if (!_Is_slash(_First[0])) { // all the other root-names start with a slash; check that first because - // we expect paths without a leading slash to be very common - return _First; - } - - // $ means anything other than a slash, including potentially the end of the input - if (_Last - _First >= 4 && _Is_slash(_First[3]) && (_Last - _First == 4 || !_Is_slash(_First[4])) // \xx\$ - && ((_Is_slash(_First[1]) && (_First[2] == L'?' || _First[2] == L'.')) // \\?\$ or \\.\$ - || (_First[1] == L'?' && _First[2] == L'?'))) { // \??\$ - return _First + 3; - } - - if (_Last - _First >= 3 && _Is_slash(_First[1]) && !_Is_slash(_First[2])) { // \\server - return _STD find_if(_First + 3, _Last, _Is_slash); - } - - // no match - return _First; -} _STD_END #pragma pop_macro("new") diff --git a/stl/src/filesystem.cpp b/stl/src/filesystem.cpp index a389f70aa4..ed03cd9403 100644 --- a/stl/src/filesystem.cpp +++ b/stl/src/filesystem.cpp @@ -9,7 +9,6 @@ // Do not include or define anything else here. // In particular, basic_string must not be included here. -#include #include #include #include @@ -170,8 +169,8 @@ namespace { return __std_win_error{GetLastError()}; } - [[nodiscard]] static unsigned long long _Merge_to_ull(unsigned long _High, unsigned long _Low) noexcept { - return static_cast(_Low) | (static_cast(_High) << 32); + [[nodiscard]] unsigned long long _Merge_to_ull(unsigned long _High, unsigned long _Low) noexcept { + return (static_cast(_High) << 32) | static_cast(_Low); } } // unnamed namespace @@ -821,37 +820,36 @@ _Success_(return == __std_win_error::_Success) __std_win_error constexpr auto _Get_file_attributes_data = __std_fs_stats_flags::_Attributes | __std_fs_stats_flags::_File_size | __std_fs_stats_flags::_Last_write_time; - if (_Bitmask_includes( - _Flags, _Get_file_attributes_data)) { // caller wants something GetFileAttributesExW might provide + if (_Bitmask_includes(_Flags, + _Get_file_attributes_data)) { // caller wants something GetFileAttributesExW/FindFirstFileW might provide if (_Symlink_attribute_hint == __std_fs_file_attr::_Invalid || !_Bitmask_includes(_Symlink_attribute_hint, __std_fs_file_attr::_Reparse_point) - || !_Follow_symlinks) { // we might not be a symlink or not following symlinks, so FindFirstFileW - // would return the right answer - - // Check for file names that contain `?` or `*` (i.e., globbing characters). - // These are invalid file names, and will give us the wrong answer with `FindFirstFileW`. - { - const wchar_t* _Path_end = _Path + _CSTD wcslen(_Path); - const wchar_t* _After_drive_prefix = _STD _Find_root_name_end(_Path, _Path_end); - // `?` is allowed in the drive prefix, but `*` is not. - if (_STD find(_Path, _After_drive_prefix, '*') != _After_drive_prefix) { - return __std_win_error{ERROR_INVALID_NAME}; + || !_Follow_symlinks) { // we might not be a symlink or not following symlinks, so + // GetFileAttributesExW/FindFirstFileW would return the right answer + + WIN32_FILE_ATTRIBUTE_DATA _Data; + if (!GetFileAttributesExW(_Path, GetFileExInfoStandard, &_Data)) { + __std_win_error _Last_error{GetLastError()}; + // In some cases, ERROR_SHARING_VIOLATION is returned from GetFileAttributesExW; + // FindFirstFileW will work in those cases if we have read permissions on the directory. + if (_Last_error != __std_win_error::_Sharing_violation) { + return _Last_error; } - constexpr static auto _Is_globbing_character = [](wchar_t _Ch) { return _Ch == '*' || _Ch == '?'; }; - // In the rest of the path, neither is allowed. - if (_STD find_if(_After_drive_prefix, _Path_end, _Is_globbing_character) != _Path_end) { - return __std_win_error{ERROR_INVALID_NAME}; - } - } - - WIN32_FIND_DATAW _Data; - { - HANDLE _Find_handle = FindFirstFileW(_Path, &_Data); + // Note that FindFirstFileW does allow globbing characters and has extra behavior with them + // that we don't want. However, GetFileAttributesExW would've failed with ERROR_INVALID_NAME + // if there were any globbing characters in _Path. + WIN32_FIND_DATAW _Find_data; + HANDLE _Find_handle = FindFirstFileW(_Path, &_Find_data); if (_Find_handle == INVALID_HANDLE_VALUE) { return __std_win_error{GetLastError()}; } FindClose(_Find_handle); + + _Data.dwFileAttributes = _Find_data.dwFileAttributes; + _Data.nFileSizeHigh = _Find_data.nFileSizeHigh; + _Data.nFileSizeLow = _Find_data.nFileSizeHigh; + _Data.ftLastWriteTime = _Find_data.ftLastWriteTime; } const __std_fs_file_attr _Attributes{_Data.dwFileAttributes}; From 7195f798b733eb2836db0a6922166c42af462aff Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Fri, 13 May 2022 17:23:43 -0700 Subject: [PATCH 5/7] Detach comments. --- stl/src/filesystem.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/stl/src/filesystem.cpp b/stl/src/filesystem.cpp index 2d55b7265f..63b00e5dee 100644 --- a/stl/src/filesystem.cpp +++ b/stl/src/filesystem.cpp @@ -801,8 +801,8 @@ _Success_(return == __std_win_error::_Success) __std_win_error constexpr auto _Get_file_attributes_data = __std_fs_stats_flags::_Attributes | __std_fs_stats_flags::_File_size | __std_fs_stats_flags::_Last_write_time; - if (_Bitmask_includes(_Flags, - _Get_file_attributes_data)) { // caller wants something GetFileAttributesExW/FindFirstFileW might provide + if (_Bitmask_includes(_Flags, _Get_file_attributes_data)) { + // caller wants something GetFileAttributesExW/FindFirstFileW might provide if (_Symlink_attribute_hint == __std_fs_file_attr::_Invalid || !_Bitmask_includes(_Symlink_attribute_hint, __std_fs_file_attr::_Reparse_point) || !_Follow_symlinks) { // we might not be a symlink or not following symlinks, so @@ -834,10 +834,8 @@ _Success_(return == __std_win_error::_Success) __std_win_error } const __std_fs_file_attr _Attributes{_Data.dwFileAttributes}; - if (!_Follow_symlinks - || !_Bitmask_includes(_Attributes, - __std_fs_file_attr::_Reparse_point)) { // if we aren't following symlinks or can't be a - // symlink, that data was useful, record + if (!_Follow_symlinks || !_Bitmask_includes(_Attributes, __std_fs_file_attr::_Reparse_point)) { + // if we aren't following symlinks or can't be a symlink, that data was useful, record _Stats->_Attributes = _Attributes; _Stats->_File_size = _Merge_to_ull(_Data.nFileSizeHigh, _Data.nFileSizeLow); _Stats->_Last_write_time = static_cast( From eb752a6a7aeef0389c33170f6b2538bceae9efb0 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Fri, 13 May 2022 17:29:42 -0700 Subject: [PATCH 6/7] Limit scope. --- stl/src/filesystem.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/stl/src/filesystem.cpp b/stl/src/filesystem.cpp index 63b00e5dee..2c470dc564 100644 --- a/stl/src/filesystem.cpp +++ b/stl/src/filesystem.cpp @@ -810,10 +810,10 @@ _Success_(return == __std_win_error::_Success) __std_win_error WIN32_FILE_ATTRIBUTE_DATA _Data; if (!GetFileAttributesExW(_Path, GetFileExInfoStandard, &_Data)) { - __std_win_error _Last_error{GetLastError()}; // In some cases, ERROR_SHARING_VIOLATION is returned from GetFileAttributesExW; // FindFirstFileW will work in those cases if we have read permissions on the directory. - if (_Last_error != __std_win_error::_Sharing_violation) { + if (const __std_win_error _Last_error{GetLastError()}; + _Last_error != __std_win_error::_Sharing_violation) { return _Last_error; } @@ -821,11 +821,13 @@ _Success_(return == __std_win_error::_Success) __std_win_error // that we don't want. However, GetFileAttributesExW would've failed with ERROR_INVALID_NAME // if there were any globbing characters in _Path. WIN32_FIND_DATAW _Find_data; - HANDLE _Find_handle = FindFirstFileW(_Path, &_Find_data); - if (_Find_handle == INVALID_HANDLE_VALUE) { - return __std_win_error{GetLastError()}; + { + HANDLE _Find_handle = FindFirstFileW(_Path, &_Find_data); + if (_Find_handle == INVALID_HANDLE_VALUE) { + return __std_win_error{GetLastError()}; + } + FindClose(_Find_handle); } - FindClose(_Find_handle); _Data.dwFileAttributes = _Find_data.dwFileAttributes; _Data.nFileSizeHigh = _Find_data.nFileSizeHigh; From 4bd6c723e6c3164ff8c4c5192c638d7f156dbeac Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Fri, 13 May 2022 17:29:56 -0700 Subject: [PATCH 7/7] Fix typo-bug. --- stl/src/filesystem.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stl/src/filesystem.cpp b/stl/src/filesystem.cpp index 2c470dc564..379e22ead2 100644 --- a/stl/src/filesystem.cpp +++ b/stl/src/filesystem.cpp @@ -831,7 +831,7 @@ _Success_(return == __std_win_error::_Success) __std_win_error _Data.dwFileAttributes = _Find_data.dwFileAttributes; _Data.nFileSizeHigh = _Find_data.nFileSizeHigh; - _Data.nFileSizeLow = _Find_data.nFileSizeHigh; + _Data.nFileSizeLow = _Find_data.nFileSizeLow; _Data.ftLastWriteTime = _Find_data.ftLastWriteTime; }