Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[filesystem] Don't error on weird system files #2715

Merged
merged 8 commits into from
May 17, 2022
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 0 additions & 77 deletions stl/inc/filesystem
Original file line number Diff line number Diff line change
Expand Up @@ -409,83 +409,6 @@ namespace filesystem {
}
}

template <class _Ty>
_NODISCARD _Ty _Unaligned_load(const void* _Ptr) { // load a _Ty from _Ptr
static_assert(is_trivial_v<_Ty>, "Unaligned loads require trivial types");
_Ty _Tmp;
_CSTD memcpy(&_Tmp, _Ptr, sizeof(_Tmp));
return _Tmp;
}

_NODISCARD inline bool _Is_drive_prefix(const wchar_t* const _First) {
// test if _First points to a prefix of the form X:
// pre: _First points to at least 2 wchar_t instances
// pre: Little endian
auto _Value = _Unaligned_load<unsigned int>(_First);
_Value &= 0xFFFF'FFDFu; // transform lowercase drive letters into uppercase ones
_Value -= (static_cast<unsigned int>(L':') << (sizeof(wchar_t) * CHAR_BIT)) | L'A';
return _Value < 26;
}

_NODISCARD inline bool _Has_drive_letter_prefix(const wchar_t* const _First, const wchar_t* const _Last) {
// test if [_First, _Last) has a prefix of the form X:
return _Last - _First >= 2 && _Is_drive_prefix(_First);
}

_NODISCARD inline const wchar_t* _Find_root_name_end(const wchar_t* const _First, const wchar_t* const _Last) {
// attempt to parse [_First, _Last) as a path and return the end of root-name if it exists; otherwise, _First

// This is the place in the generic grammar where library implementations have the most freedom.
// Below are example Windows paths, and what we've decided to do with them:
// * X:DriveRelative, X:\DosAbsolute
// We parse X: as root-name, if and only if \ is present we consider that root-directory
// * \RootRelative
// We parse no root-name, and \ as root-directory
// * \\server\share
// We parse \\server as root-name, \ as root-directory, and share as the first element in relative-path.
// Technically, Windows considers all of \\server\share the logical "root", but for purposes
// of decomposition we want those split, so that path(R"(\\server\share)").replace_filename("other_share")
// is \\server\other_share
// * \\?\device
// * \??\device
// * \\.\device
// CreateFile appears to treat these as the same thing; we will set the first three characters as root-name
// and the first \ as root-directory. Support for these prefixes varies by particular Windows version, but
// for the purposes of path decomposition we don't need to worry about that.
// * \\?\UNC\server\share
// MSDN explicitly documents the \\?\UNC syntax as a special case. What actually happens is that the device
// Mup, or "Multiple UNC provider", owns the path \\?\UNC in the NT namespace, and is responsible for the
// network file access. When the user says \\server\share, CreateFile translates that into
// \\?\UNC\server\share to get the remote server access behavior. Because NT treats this like any other
// device, we have chosen to treat this as the \\?\ case above.
if (_Last - _First < 2) {
return _First;
}

if (_Has_drive_letter_prefix(_First, _Last)) { // check for X: first because it's the most common root-name
return _First + 2;
}

if (!_Is_slash(_First[0])) { // all the other root-names start with a slash; check that first because
// we expect paths without a leading slash to be very common
return _First;
}

// $ means anything other than a slash, including potentially the end of the input
if (_Last - _First >= 4 && _Is_slash(_First[3]) && (_Last - _First == 4 || !_Is_slash(_First[4])) // \xx\$
&& ((_Is_slash(_First[1]) && (_First[2] == L'?' || _First[2] == L'.')) // \\?\$ or \\.\$
|| (_First[1] == L'?' && _First[2] == L'?'))) { // \??\$
return _First + 3;
}

if (_Last - _First >= 3 && _Is_slash(_First[1]) && !_Is_slash(_First[2])) { // \\server
return _STD find_if(_First + 3, _Last, _Is_slash);
}

// no match
return _First;
}

_NODISCARD inline wstring_view _Parse_root_name(const wstring_view _Str) {
// attempt to parse _Str as a path and return the root-name if it exists; otherwise, an empty view
const auto _First = _Str.data();
Expand Down
81 changes: 81 additions & 0 deletions stl/inc/xfilesystem_abi.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@
#ifndef _XFILESYSTEM_ABI_H
#define _XFILESYSTEM_ABI_H
#include <yvals_core.h>

#include <xutility>

#if _STL_COMPILER_PREPROCESSOR
strega-nil-ms marked this conversation as resolved.
Show resolved Hide resolved

#include <cstdint>
Expand Down Expand Up @@ -368,6 +371,84 @@ struct _Is_slash_oper { // predicate testing if input is a preferred-separator o
};

inline constexpr _Is_slash_oper _Is_slash{};


template <class _Ty>
_NODISCARD _Ty _Unaligned_load(const void* _Ptr) { // load a _Ty from _Ptr
static_assert(is_trivial_v<_Ty>, "Unaligned loads require trivial types");
_Ty _Tmp;
_CSTD memcpy(&_Tmp, _Ptr, sizeof(_Tmp));
return _Tmp;
}

_NODISCARD inline bool _Is_drive_prefix(const wchar_t* const _First) {
// test if _First points to a prefix of the form X:
// pre: _First points to at least 2 wchar_t instances
// pre: Little endian
auto _Value = _Unaligned_load<unsigned int>(_First);
_Value &= 0xFFFF'FFDFu; // transform lowercase drive letters into uppercase ones
_Value -= (static_cast<unsigned int>(L':') << (sizeof(wchar_t) * CHAR_BIT)) | L'A';
return _Value < 26;
}

_NODISCARD inline bool _Has_drive_letter_prefix(const wchar_t* const _First, const wchar_t* const _Last) {
// test if [_First, _Last) has a prefix of the form X:
return _Last - _First >= 2 && _Is_drive_prefix(_First);
}

_NODISCARD inline const wchar_t* _Find_root_name_end(const wchar_t* const _First, const wchar_t* const _Last) {
// attempt to parse [_First, _Last) as a path and return the end of root-name if it exists; otherwise, _First

// This is the place in the generic grammar where library implementations have the most freedom.
// Below are example Windows paths, and what we've decided to do with them:
// * X:DriveRelative, X:\DosAbsolute
// We parse X: as root-name, if and only if \ is present we consider that root-directory
// * \RootRelative
// We parse no root-name, and \ as root-directory
// * \\server\share
// We parse \\server as root-name, \ as root-directory, and share as the first element in relative-path.
// Technically, Windows considers all of \\server\share the logical "root", but for purposes
// of decomposition we want those split, so that path(R"(\\server\share)").replace_filename("other_share")
// is \\server\other_share
// * \\?\device
// * \??\device
// * \\.\device
// CreateFile appears to treat these as the same thing; we will set the first three characters as root-name
// and the first \ as root-directory. Support for these prefixes varies by particular Windows version, but
// for the purposes of path decomposition we don't need to worry about that.
// * \\?\UNC\server\share
// MSDN explicitly documents the \\?\UNC syntax as a special case. What actually happens is that the device
// Mup, or "Multiple UNC provider", owns the path \\?\UNC in the NT namespace, and is responsible for the
// network file access. When the user says \\server\share, CreateFile translates that into
// \\?\UNC\server\share to get the remote server access behavior. Because NT treats this like any other
// device, we have chosen to treat this as the \\?\ case above.
if (_Last - _First < 2) {
return _First;
}

if (_Has_drive_letter_prefix(_First, _Last)) { // check for X: first because it's the most common root-name
return _First + 2;
}

if (!_Is_slash(_First[0])) { // all the other root-names start with a slash; check that first because
// we expect paths without a leading slash to be very common
return _First;
}

// $ means anything other than a slash, including potentially the end of the input
if (_Last - _First >= 4 && _Is_slash(_First[3]) && (_Last - _First == 4 || !_Is_slash(_First[4])) // \xx\$
&& ((_Is_slash(_First[1]) && (_First[2] == L'?' || _First[2] == L'.')) // \\?\$ or \\.\$
|| (_First[1] == L'?' && _First[2] == L'?'))) { // \??\$
return _First + 3;
}

if (_Last - _First >= 3 && _Is_slash(_First[1]) && !_Is_slash(_First[2])) { // \\server
return _STD find_if(_First + 3, _Last, _Is_slash);
}

// no match
return _First;
}
_STD_END

#pragma pop_macro("new")
Expand Down
81 changes: 38 additions & 43 deletions stl/src/filesystem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
// Do not include or define anything else here.
// In particular, basic_string must not be included here.

#include <algorithm>
strega-nil-ms marked this conversation as resolved.
Show resolved Hide resolved
#include <clocale>
#include <corecrt_terminate.h>
#include <cstdlib>
Expand Down Expand Up @@ -168,6 +169,10 @@ namespace {

return __std_win_error{GetLastError()};
}

[[nodiscard]] static unsigned long long _Merge_to_ull(unsigned long _High, unsigned long _Low) noexcept {
strega-nil-ms marked this conversation as resolved.
Show resolved Hide resolved
return static_cast<unsigned long long>(_Low) | (static_cast<unsigned long long>(_High) << 32);
strega-nil-ms marked this conversation as resolved.
Show resolved Hide resolved
}
} // unnamed namespace

_EXTERN_C
Expand Down Expand Up @@ -793,43 +798,9 @@ _Success_(return == __std_win_error::_Success) __std_win_error
return {_Size, __std_win_error::_Success};
}

// This structure is meant to be embedded into __std_fs_stats that properly aligned it,
// so that 64-bit values are fully aligned. Note that _File_size fields are flipped to be in low:high order
// and represented by __std_fs_filetime which is a pair of ulongs.
// If this structure is used in GetFileAttributesEx, after successful read, File_size parts must be put in
// low:high order, as GetFileAttributesEx returns them in high:low order.
struct _File_attr_data { // typedef struct _WIN32_FILE_ATTRIBUTE_DATA {
__std_fs_file_attr _Attributes; // DWORD dwFileAttributes;
__std_fs_filetime _Creation_time; // FILETIME ftCreationTime;
__std_fs_filetime _Last_access_time; // FILETIME ftLastAccessTime;
__std_fs_filetime _Last_write_time; // FILETIME ftLastWriteTime;
unsigned long _File_size_high; // DWORD nFileSizeHigh;
unsigned long _File_size_low; // DWORD nFileSizeLow;
}; // } WIN32_FILE_ATTRIBUTE_DATA, *LPWIN32_FILE_ATTRIBUTE_DATA;

struct alignas(long long) _Aligned_file_attrs {
unsigned long _Padding; // align the __std_fs_filetime inside _Data to make the memcpy below an ordinary 64-bit load
_File_attr_data _Data;

[[nodiscard]] long long _Last_write_time() const noexcept {
long long _Result;
_CSTD memcpy(&_Result, &_Data._Last_write_time, sizeof(_Result));
return _Result;
}

[[nodiscard]] unsigned long long _File_size() const noexcept {
return (static_cast<unsigned long long>(_Data._File_size_high) << 32) + _Data._File_size_low;
}
};

[[nodiscard]] _Success_(return == __std_win_error::_Success) __std_win_error
__stdcall __std_fs_get_stats(_In_z_ const wchar_t* const _Path, __std_fs_stats* const _Stats,
_In_ __std_fs_stats_flags _Flags, _In_ const __std_fs_file_attr _Symlink_attribute_hint) noexcept {
static_assert((offsetof(_Aligned_file_attrs, _Data._Last_write_time) % 8) == 0, "_Last_write_time not aligned");
static_assert(sizeof(_File_attr_data) == sizeof(WIN32_FILE_ATTRIBUTE_DATA));
static_assert(alignof(_File_attr_data) == alignof(WIN32_FILE_ATTRIBUTE_DATA));
static_assert(alignof(_File_attr_data) == 4);

const bool _Follow_symlinks = _Bitmask_includes(_Flags, __std_fs_stats_flags::_Follow_symlinks);
_Flags &= ~__std_fs_stats_flags::_Follow_symlinks;
if (_Follow_symlinks && _Bitmask_includes(_Flags, __std_fs_stats_flags::_Reparse_tag)) {
Expand All @@ -854,21 +825,45 @@ struct alignas(long long) _Aligned_file_attrs {
_Flags, _Get_file_attributes_data)) { // caller wants something GetFileAttributesExW might provide
if (_Symlink_attribute_hint == __std_fs_file_attr::_Invalid
|| !_Bitmask_includes(_Symlink_attribute_hint, __std_fs_file_attr::_Reparse_point)
|| !_Follow_symlinks) { // we might not be a symlink or not following symlinks, so GetFileAttributesExW
|| !_Follow_symlinks) { // we might not be a symlink or not following symlinks, so FindFirstFileW
// would return the right answer
_Aligned_file_attrs _Aligned_attrs;
auto& _Data = _Aligned_attrs._Data;
if (!GetFileAttributesExW(_Path, GetFileExInfoStandard, &_Data)) {
return __std_win_error{GetLastError()};

// Check for file names that contain `?` or `*` (i.e., globbing characters).
// These are invalid file names, and will give us the wrong answer with `FindFirstFileW`.
{
const wchar_t* _Path_end = _Path + _CSTD wcslen(_Path);
const wchar_t* _After_drive_prefix = _STD _Find_root_name_end(_Path, _Path_end);
strega-nil-ms marked this conversation as resolved.
Show resolved Hide resolved
// `?` is allowed in the drive prefix, but `*` is not.
if (_STD find(_Path, _After_drive_prefix, '*') != _After_drive_prefix) {
return __std_win_error{ERROR_INVALID_NAME};
}

constexpr static auto _Is_globbing_character = [](wchar_t _Ch) { return _Ch == '*' || _Ch == '?'; };
strega-nil-ms marked this conversation as resolved.
Show resolved Hide resolved
strega-nil-ms marked this conversation as resolved.
Show resolved Hide resolved
// In the rest of the path, neither is allowed.
if (_STD find_if(_After_drive_prefix, _Path_end, _Is_globbing_character) != _Path_end) {
return __std_win_error{ERROR_INVALID_NAME};
}
}

WIN32_FIND_DATAW _Data;
{
HANDLE _Find_handle = FindFirstFileW(_Path, &_Data);
if (_Find_handle == INVALID_HANDLE_VALUE) {
return __std_win_error{GetLastError()};
}
FindClose(_Find_handle);
}

const __std_fs_file_attr _Attributes{_Data.dwFileAttributes};
if (!_Follow_symlinks
|| !_Bitmask_includes(_Data._Attributes,
|| !_Bitmask_includes(_Attributes,
__std_fs_file_attr::_Reparse_point)) { // if we aren't following symlinks or can't be a
// symlink, that data was useful, record
_Stats->_Attributes = _Data._Attributes;
_Stats->_File_size = _Aligned_attrs._File_size();
_Stats->_Last_write_time = _Aligned_attrs._Last_write_time();
_Stats->_Attributes = _Attributes;
_Stats->_File_size = _Merge_to_ull(_Data.nFileSizeHigh, _Data.nFileSizeLow);
_Stats->_Last_write_time = static_cast<long long>(
_Merge_to_ull(_Data.ftLastWriteTime.dwHighDateTime, _Data.ftLastWriteTime.dwLowDateTime));

_Flags &= ~_Get_file_attributes_data;
}
}
Expand Down