Skip to content

Commit

Permalink
<format> assumes strings are encoded in the active code page (#1834)
Browse files Browse the repository at this point in the history
  • Loading branch information
CaseyCarter authored Apr 20, 2021
1 parent a108657 commit ccc5aaa
Show file tree
Hide file tree
Showing 10 changed files with 435 additions and 276 deletions.
1 change: 1 addition & 0 deletions stl/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ endforeach()
# Objs that exist in both libcpmt[d][01].lib and msvcprt[d].lib.
set(IMPLIB_SOURCES
${CMAKE_CURRENT_LIST_DIR}/src/filesystem.cpp
${CMAKE_CURRENT_LIST_DIR}/src/format.cpp
${CMAKE_CURRENT_LIST_DIR}/src/locale0_implib.cpp
${CMAKE_CURRENT_LIST_DIR}/src/nothrow.cpp
${CMAKE_CURRENT_LIST_DIR}/src/sharedmutex.cpp
Expand Down
397 changes: 216 additions & 181 deletions stl/inc/format

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion stl/inc/xfilesystem_abi.h
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ _BITMASK_OPS(__std_fs_file_flags)

enum class __std_fs_file_handle : intptr_t { _Invalid = -1 };

enum class __std_code_page : unsigned int { _Utf8 = 65001 };
enum class __std_code_page : unsigned int { _Acp = 0, _Utf8 = 65001 };

struct __std_fs_convert_result {
int _Len;
Expand Down
1 change: 1 addition & 0 deletions stl/msbuild/stl_base/stl.files.settings.targets
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
(controlled by IncludeInLink and IncludeInImportLib). -->
<BuildFiles Include="
$(CrtRoot)\github\stl\src\filesystem.cpp;
$(CrtRoot)\github\stl\src\format.cpp;
$(CrtRoot)\github\stl\src\locale0_implib.cpp;
$(CrtRoot)\github\stl\src\nothrow.cpp;
$(CrtRoot)\github\stl\src\sharedmutex.cpp;
Expand Down
45 changes: 45 additions & 0 deletions stl/src/format.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

// Implements a win32 API wrapper for <format>

// This must be as small as possible, because its contents are
// injected into the msvcprt.lib and msvcprtd.lib import libraries.
// Do not include or define anything else here.
// In particular, basic_string must not be included here.

#include <xfilesystem_abi.h>
#include <xlocinfo.h>

#include <Windows.h>

static_assert(__std_code_page::_Acp == __std_code_page{CP_ACP});

extern "C" [[nodiscard]] __std_win_error __stdcall __std_get_cvt(
const __std_code_page _Codepage, _Cvtvec* const _Pcvt) noexcept {
// get conversion info for an arbitrary codepage
*_Pcvt = {};

CPINFOEXW _Info{};
const DWORD _Flags = 0; // reserved, must be zero
if (GetCPInfoExW(static_cast<UINT>(_Codepage), _Flags, &_Info) == 0) {
// NB: the only documented failure mode for GetCPInfoExW is ERROR_INVALID_PARAMETER,
// so in practice it should never fail for CP_ACP.
return __std_win_error{GetLastError()};
}

_Pcvt->_Page = _Info.CodePage;
_Pcvt->_Mbcurmax = _Info.MaxCharSize;

for (int _Idx = 0; _Idx < MAX_LEADBYTES; _Idx += 2) {
if (_Info.LeadByte[_Idx] == 0 && _Info.LeadByte[_Idx + 1] == 0) {
break;
}

for (unsigned char _First = _Info.LeadByte[_Idx], _Last = _Info.LeadByte[_Idx + 1]; _First != _Last; ++_First) {
_Pcvt->_Isleadbyte[_First >> 3] |= 1u << (_First & 0b111u);
}
}

return __std_win_error::_Success;
}
5 changes: 3 additions & 2 deletions tests/std/include/test_format_support.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,9 +117,10 @@ void test_parse_helper(const CharT* (*func)(const CharT*, const CharT*, callback
callback_type&& callbacks = {}) {
try {
auto end = func(view.data(), view.data() + view.size(), std::move(callbacks));
if (expected_end_position != std::basic_string_view<CharT>::npos) {
assert(end == view.data() + expected_end_position);
if (expected_end_position == std::basic_string_view<CharT>::npos) {
expected_end_position = view.size();
}
assert(end == view.data() + expected_end_position);
assert(!err_expected);
} catch (const std::format_error&) {
assert(err_expected);
Expand Down
27 changes: 0 additions & 27 deletions tests/std/tests/P0645R10_text_formatting_formatting/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -975,31 +975,6 @@ void test_size() {
test_size_helper<charT>(8, STR("{:8}"), STR("scully"));
}

void test_multibyte_format_strings() {
#ifndef MSVC_INTERNAL_TESTING // TRANSITION, the Windows version on Contest VMs doesn't always understand ".UTF-8"
{
assert(setlocale(LC_ALL, ".UTF-8") != nullptr);
// Filling with footballs ("\xf0\x9f\x8f\x88" is U+1F3C8 AMERICAN FOOTBALL)
assert(format("{:\xf0\x9f\x8f\x88>4}"sv, 42) == "\xf0\x9f\x8f\x88\xf0\x9f\x8f\x88\x34\x32");

assert(format("{:\xf0\x9f\x8f\x88<4.2}", "1") == "\x31\xf0\x9f\x8f\x88\xf0\x9f\x8f\x88\xf0\x9f\x8f\x88"sv);
assert(format("{:\xf0\x9f\x8f\x88^4.2}", "1") == "\xf0\x9f\x8f\x88\x31\xf0\x9f\x8f\x88\xf0\x9f\x8f\x88"sv);
assert(format("{:\xf0\x9f\x8f\x88>4.2}", "1") == "\xf0\x9f\x8f\x88\xf0\x9f\x8f\x88\xf0\x9f\x8f\x88\x31"sv);
}

{
assert(setlocale(LC_ALL, ".UTF-8") != nullptr);
try {
(void) format("{:\x9f\x8f\x88<10}"sv, 42); // Bad fill character encoding: missing lead byte before \x9f
assert(false);
} catch (const format_error&) {
}
}
#endif // MSVC_INTERNAL_TESTING

assert(setlocale(LC_ALL, "C") != nullptr);
}

// The libfmt_ tests are derived from tests in
// libfmt, Copyright (c) 2012 - present, Victor Zverovich
// See NOTICE.txt for more information.
Expand Down Expand Up @@ -1318,8 +1293,6 @@ void test() {
test_size<char>();
test_size<wchar_t>();

test_multibyte_format_strings();

libfmt_formatter_test_escape<char>();
libfmt_formatter_test_escape<wchar_t>();

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
// Copyright (c) Microsoft Corporation.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#define _FORMAT_CODEPAGE (__std_code_page{932})

#include <cassert>
#include <clocale>
#include <format>
Expand All @@ -11,55 +13,76 @@
using namespace std;

void test_multibyte_format_strings() {
{
assert(setlocale(LC_ALL, ".932") != nullptr);
const auto s =
"\x93\xfa\x96{\x92\x6e\x90}"sv; // Note the use of `{` and `}` as continuation bytes (from GH-1576)
assert(format(s) == s);

assert(format("{:.2}", s) == "\x93\xfa"sv);
assert(format("{:4.2}", s) == "\x93\xfa "sv);
const auto s = "\x93\xfa\x96{\x92\x6e\x90}"sv; // Note the use of `{` and `}` as continuation bytes (from GH-1576)
assert(format(s) == s);

assert(format("{:<4.2}", s) == "\x93\xfa "sv);
assert(format("{:^4.2}", s) == " \x93\xfa "sv);
assert(format("{:>4.2}", s) == " \x93\xfa"sv);
assert(format("{:.2}", s) == "\x93\xfa"sv);
assert(format("{:4.2}", s) == "\x93\xfa "sv);

assert(format("{:\x90}<4.2}", s) == "\x93\xfa\x90}\x90}"sv);
assert(format("{:\x90}^4.2}", s) == "\x90}\x93\xfa\x90}"sv);
assert(format("{:\x90}>4.2}", s) == "\x90}\x90}\x93\xfa"sv);
assert(format("{:<4.2}", s) == "\x93\xfa "sv);
assert(format("{:^4.2}", s) == " \x93\xfa "sv);
assert(format("{:>4.2}", s) == " \x93\xfa"sv);

assert(format("{:.3}", s) == "\x93\xfa"sv);
assert(format("{:4.3}", s) == "\x93\xfa "sv);
assert(format("{:\x90}<4.2}", s) == "\x93\xfa\x90}\x90}"sv);
assert(format("{:\x90}^4.2}", s) == "\x90}\x93\xfa\x90}"sv);
assert(format("{:\x90}>4.2}", s) == "\x90}\x90}\x93\xfa"sv);

assert(format("{:<4.3}", s) == "\x93\xfa "sv);
assert(format("{:^4.3}", s) == " \x93\xfa "sv);
assert(format("{:>4.3}", s) == " \x93\xfa"sv);
assert(format("{:.3}", s) == "\x93\xfa"sv);
assert(format("{:4.3}", s) == "\x93\xfa "sv);

assert(format("{:\x90}<4.3}", s) == "\x93\xfa\x90}\x90}"sv);
assert(format("{:\x90}^4.3}", s) == "\x90}\x93\xfa\x90}"sv);
assert(format("{:\x90}>4.3}", s) == "\x90}\x90}\x93\xfa"sv);
}
assert(format("{:<4.3}", s) == "\x93\xfa "sv);
assert(format("{:^4.3}", s) == " \x93\xfa "sv);
assert(format("{:>4.3}", s) == " \x93\xfa"sv);

assert(setlocale(LC_ALL, "C") != nullptr);
assert(format("{:\x90}<4.3}", s) == "\x93\xfa\x90}\x90}"sv);
assert(format("{:\x90}^4.3}", s) == "\x90}\x93\xfa\x90}"sv);
assert(format("{:\x90}>4.3}", s) == "\x90}\x90}\x93\xfa"sv);
}

void test_parse_align() {
auto parse_align_fn = _Parse_align<char, testing_callbacks<char>>;

{
assert(setlocale(LC_ALL, ".932") != nullptr);
test_parse_helper(parse_align_fn, "\x93\xfa<X"sv, false, 3,
{.expected_alignment = _Fmt_align::_Left, .expected_fill = "\x93\xfa"sv});
test_parse_helper(parse_align_fn, "\x96\x7b>X"sv, false, 3,
{.expected_alignment = _Fmt_align::_Right, .expected_fill = "\x96\x7b"sv});
test_parse_helper(parse_align_fn, "\x92\x6e^X"sv, false, 3,
{.expected_alignment = _Fmt_align::_Center, .expected_fill = "\x92\x6e"sv});
}
const auto parse_align_fn = _Parse_align<char, testing_callbacks<char>>;

assert(setlocale(LC_ALL, "C") != nullptr);
test_parse_helper(parse_align_fn, "\x93\xfa<X"sv, false, 3, //
{.expected_alignment = _Fmt_align::_Left, .expected_fill = "\x93\xfa"sv});
test_parse_helper(parse_align_fn, "\x96\x7b>X"sv, false, 3,
{.expected_alignment = _Fmt_align::_Right, .expected_fill = "\x96\x7b"sv});
test_parse_helper(parse_align_fn, "\x92\x6e^X"sv, false, 3,
{.expected_alignment = _Fmt_align::_Center, .expected_fill = "\x92\x6e"sv});
}

void test_width_estimation() {
// Format strings of known width with a trailing delimiter using a precision large enough to
// include all but the delimiter to validate the width estimation code.
struct test_case {
const char* str;
int width;
};
constexpr test_case test_cases[] = {
{"\x58", 1},
{"x\x58", 2},

// Pick "short" and "long" codepoints (\x20 and \x96\x7b), then form all permutations of
// 3-codepoint prefixes with the same fixed delimiter as above. This gives us coverage of
// all adjacent pairings (short/short, short/long, long/short, long/long).
{"\x20\x20\x20\x58", 4},
{"\x20\x20\x96\x7b\x58", 5},
{"\x20\x96\x7b\x20\x58", 5},
{"\x96\x7b\x20\x20\x58", 5},
{"\x20\x96\x7b\x96\x7b\x58", 6},
{"\x96\x7b\x20\x96\x7b\x58", 6},
{"\x96\x7b\x96\x7b\x20\x58", 6},
{"\x96\x7b\x96\x7b\x96\x7b\x58", 7},
};

for (const auto& test : test_cases) {
string_view sv{test.str};
sv = sv.substr(0, sv.size() - 1);
assert(format("{:.{}}", test.str, test.width - 1) == sv);
}
}

int main() {
test_multibyte_format_strings();
test_parse_align();
test_width_estimation();
}
18 changes: 1 addition & 17 deletions tests/std/tests/P0645R10_text_formatting_parsing/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ bool test_parse_align() {
// \x343E (which is from CJK unified ideographs extension A) and similar characters to parse as
// an alignment specifier.
auto s4 = L"*\x343E"sv;
test_parse_helper(parse_align_fn, s4, false, view_typ::npos, {.expected_fill = L"*"sv});
test_parse_helper(parse_align_fn, s4, false, 0, {.expected_fill = L"*"sv});

// test multi-code-unit fill characters
{
Expand All @@ -47,22 +47,6 @@ bool test_parse_align() {
test_parse_helper(parse_align_fn, L"\U0001F3C8^X"sv, false, 3,
{.expected_alignment = _Fmt_align::_Center, .expected_fill = L"\U0001F3C8"sv});
}
} else {
// test multibyte fill characters
#ifndef MSVC_INTERNAL_TESTING // TRANSITION, the Windows version on Contest VMs doesn't always understand ".UTF-8"
{
assert(setlocale(LC_ALL, ".UTF-8") != nullptr);
// "\xf0\x9f\x8f\x88" is U+1F3C8 AMERICAN FOOTBALL
test_parse_helper(parse_align_fn, "\xf0\x9f\x8f\x88<X"sv, false, 5,
{.expected_alignment = _Fmt_align::_Left, .expected_fill = "\xf0\x9f\x8f\x88"sv});
test_parse_helper(parse_align_fn, "\xf0\x9f\x8f\x88>X"sv, false, 5,
{.expected_alignment = _Fmt_align::_Right, .expected_fill = "\xf0\x9f\x8f\x88"sv});
test_parse_helper(parse_align_fn, "\xf0\x9f\x8f\x88^X"sv, false, 5,
{.expected_alignment = _Fmt_align::_Center, .expected_fill = "\xf0\x9f\x8f\x88"sv});
}
#endif // MSVC_INTERNAL_TESTING

assert(setlocale(LC_ALL, "C") != nullptr);
}

return true;
Expand Down
Loading

0 comments on commit ccc5aaa

Please sign in to comment.