diff --git a/stl/inc/format b/stl/inc/format index fe47c696b0..8b4eda68de 100644 --- a/stl/inc/format +++ b/stl/inc/format @@ -417,55 +417,85 @@ _NODISCARD constexpr const _CharT* _Parse_arg_id( throw format_error("Invalid format string."); } +_NODISCARD constexpr bool _Is_execution_charset_utf8() { +#pragma warning(push) +#pragma warning(disable : 4309) // 'initializing' : truncation of constant value +#pragma warning(disable : 4566) // character represented by universal-character-name '\u4E00' cannot be represented in + // the current code page +#pragma warning(disable : 6201) // Index '2' is out of valid index range '0' to '1' for possibly stack allocated buffer + // '_Test_char' +#pragma warning(disable : 6239) // ( && ) always evaluates to the result of . + // Did you intend to use the bitwise-and operator? + constexpr char _Test_char[] = "\u4e00"; + return sizeof(_Test_char) == 4 && _Test_char[0] == '\xe4' && _Test_char[1] == '\xb8' && _Test_char[2] == '\x80'; +#pragma warning(pop) +} + +inline constexpr bool _Is_execution_charset_utf8_v = _Is_execution_charset_utf8(); + +_NODISCARD constexpr int _Utf8_code_units_in_next_character( + const char* const _First, const char* const _Last) noexcept { + // Returns a count of the number of UTF-8 code units that compose the first encoded character in [_First, _Last), + // or -1 if [_First, _Last) doesn't contain an entire encoded character or *_First is not a valid lead byte. + const auto _Ch = static_cast(*_First); + if (_Ch < 0b1000'0000u) { + return 1; + } + + const auto _Len = static_cast(_Last - _First); + + if (_Ch < 0b1110'0000u) { + // check for non-lead byte or partial 2-byte encoded character + return (_Ch >= 0b1100'0000u && _Len >= 2) ? 2 : -1; + } + + if (_Ch < 0b1111'0000u) { + // check for partial 3-byte encoded character + return (_Len >= 3) ? 3 : -1; + } + + // check for partial 4-byte encoded character + return (_Len >= 4) ? 4 : -1; +} + +_NODISCARD inline int _Double_byte_encoding_code_units_in_next_character( + const char* const _First, const char* const _Last, const _Cvtvec& _Cvt) { + // Returns a count of the number of code units that compose the first encoded character in [_First, _Last), + // or -1 if [_First, _Last) doesn't contain an entire encoded character or *_First is not a valid lead byte. + wchar_t _Wide; + mbstate_t _St{}; + const auto _Len = static_cast(_Last - _First); + const int _Result = _Mbrtowc(&_Wide, _First, _Len, &_St, &_Cvt); + if (_Result > 0) { + return _Result; + } else if (_Result < 0) { // invalid or incomplete encoded character + return -1; + } else { // next code unit is '\0' + return 1; + } +} + _NODISCARD inline int _Code_units_in_next_character(const char* _First, const char* _Last, const _Cvtvec& _Cvt) { // Returns a count of the number of code units that compose the first encoded character in // [_First, _Last), or -1 if [_First, _Last) doesn't contain an entire encoded character or // *_First is not a valid lead byte. _STL_INTERNAL_CHECK(_First < _Last); - switch (_Cvt._Mbcurmax) { - default: - _STL_INTERNAL_CHECK(!"Bad number of encoding units for this code page"); - [[fallthrough]]; - case 1: - return 1; // all characters have only one code unit - - case 2: - { - wchar_t _Wide; - mbstate_t _St{}; - const auto _Len = static_cast(_Last - _First); - const int _Result = _Mbrtowc(&_Wide, _First, _Len, &_St, &_Cvt); - if (_Result > 0) { - return _Result; - } else if (_Result < 0) { // invalid or incomplete encoded character - return -1; - } else { // next code unit is '\0' - return 1; - } - } - - case 4: // Assume UTF-8 (as does _Mbrtowc) - { - const auto _Ch = static_cast(*_First); - if (_Ch < 0b1000'0000u) { - return 1; - } - - const auto _Len = static_cast(_Last - _First); + if constexpr (_Is_execution_charset_utf8_v) { + return _Utf8_code_units_in_next_character(_First, _Last); + } else { + switch (_Cvt._Mbcurmax) { + default: + _STL_INTERNAL_CHECK(!"Bad number of encoding units for this code page"); + [[fallthrough]]; + case 1: + return 1; // all characters have only one code unit - if (_Ch < 0b1110'0000u) { - // check for non-lead byte or partial 2-byte encoded character - return (_Ch >= 0b1100'0000u && _Len >= 2) ? 2 : -1; - } + case 2: + return _Double_byte_encoding_code_units_in_next_character(_First, _Last, _Cvt); - if (_Ch < 0b1111'0000u) { - // check for partial 3-byte encoded character - return (_Len >= 3) ? 3 : -1; - } - - // check for partial 4-byte encoded character - return (_Len >= 4) ? 4 : -1; + case 4: // Assume UTF-8 (as does _Mbrtowc) + return _Utf8_code_units_in_next_character(_First, _Last); } } } @@ -751,20 +781,24 @@ template const _CharT* _Find_encoded(const _CharT* _First, const _CharT* _Last, const _CharT _Val, const _Cvtvec& _Cvt) { // Returns the first occurrence of _Val as an encoded character (and not, for example, as a // continuation byte) in [_First, _Last). - if (_Cvt._Mbcurmax == 1 || _Cvt._Mbcurmax == 4) { - // As above and in _Mbrtowc, assume 4-byte encodings are UTF-8 + if constexpr (_Is_execution_charset_utf8_v) { return _Find_unchecked(_First, _Last, _Val); - } + } else { + if (_Cvt._Mbcurmax == 1 || _Cvt._Mbcurmax == 4) { + // As above and in _Mbrtowc, assume 4-byte encodings are UTF-8 + return _Find_unchecked(_First, _Last, _Val); + } - while (_First != _Last && *_First != _Val) { - const int _Units = _Code_units_in_next_character(_First, _Last, _Cvt); - if (_Units < 0) { - throw format_error("Invalid encoded character in format string."); + while (_First != _Last && *_First != _Val) { + const int _Units = _Code_units_in_next_character(_First, _Last, _Cvt); + if (_Units < 0) { + _THROW(format_error("Invalid encoded character in format string.")); + } + _First += _Units; } - _First += _Units; - } - return _First; + return _First; + } } template _HandlerT> @@ -2163,15 +2197,9 @@ _NODISCARD constexpr int _Unicode_width_estimate(const char32_t _Ch) noexcept { return 1; } -_NODISCARD inline int _Estimate_character_width(const char* _Ptr, const int _Units, const _Cvtvec& _Cvt) { +_NODISCARD inline int _Estimate_utf8_character_width(const char* const _Ptr, const int _Units) noexcept { // Return an estimate for the width of the character composed of _Units code units, // whose first code unit is denoted by _Ptr. - if (_Cvt._Mbcurmax != 4) { - // not a Unicode encoding; estimate width == number of code units - return _Units; - } - - // assume UTF-8 auto _Ch = static_cast(*_Ptr); switch (_Units) { default: @@ -2197,6 +2225,22 @@ _NODISCARD inline int _Estimate_character_width(const char* _Ptr, const int _Uni return _Unicode_width_estimate<_Width_estimate_high_intervals>(_Ch); } +_NODISCARD inline int _Estimate_character_width(const char* _Ptr, const int _Units, const _Cvtvec& _Cvt) { + // Return an estimate for the width of the character composed of _Units code units, + // whose first code unit is denoted by _Ptr. + if constexpr (_Is_execution_charset_utf8_v) { + return _Estimate_utf8_character_width(_Ptr, _Units); + } else { + if (_Cvt._Mbcurmax != 4) { + // not a Unicode encoding; estimate width == number of code units + return _Units; + } + + // assume UTF-8 + return _Estimate_utf8_character_width(_Ptr, _Units); + } +} + _NODISCARD inline int _Estimate_character_width(const wchar_t* _Ptr, const int _Units, const _Cvtvec&) { // Return an estimate for the width of the character composed of _Units code units, // whose first code unit is denoted by _Ptr. diff --git a/tests/std/include/test_format_support.hpp b/tests/std/include/test_format_support.hpp new file mode 100644 index 0000000000..d3bf9b92ac --- /dev/null +++ b/tests/std/include/test_format_support.hpp @@ -0,0 +1,127 @@ +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#pragma once + +#include +#include +#include +#include +#include + +// copied from the string_view tests +template +struct choose_literal; // not defined + +template <> +struct choose_literal { + static constexpr const char* choose(const char* s, const wchar_t*) { + return s; + } +}; + +template <> +struct choose_literal { + static constexpr const wchar_t* choose(const char*, const wchar_t* s) { + return s; + } +}; + +#define TYPED_LITERAL(CharT, Literal) (choose_literal::choose(Literal, L##Literal)) + +template +struct noop_testing_callbacks { + constexpr void _On_align(std::_Align) {} + constexpr void _On_fill(std::basic_string_view) {} + constexpr void _On_width(unsigned int) {} + constexpr void _On_dynamic_width(std::size_t) {} + constexpr void _On_dynamic_width(std::_Auto_id_tag) {} + constexpr void _On_precision(unsigned int) {} + constexpr void _On_dynamic_precision(std::size_t) {} + constexpr void _On_dynamic_precision(std::_Auto_id_tag) {} + constexpr void _On_sign(std::_Sign) {} + constexpr void _On_hash() {} + constexpr void _On_zero() {} + constexpr void _On_localized() {} + constexpr void _On_type(CharT) {} +}; + +template +struct testing_callbacks { + std::_Align expected_alignment = std::_Align::_None; + std::_Sign expected_sign = std::_Sign::_None; + std::basic_string_view expected_fill; + int expected_width = -1; + std::size_t expected_dynamic_width = static_cast(-1); + bool expected_auto_dynamic_width = false; + int expected_precision = -1; + std::size_t expected_dynamic_precision = static_cast(-1); + bool expected_auto_dynamic_precision = false; + bool expected_hash = false; + bool expected_zero = false; + bool expected_localized = false; + CharT expected_type = '\0'; + + constexpr void _On_align(std::_Align aln) { + assert(aln == expected_alignment); + } + constexpr void _On_fill(std::basic_string_view str_view) { + assert(str_view == expected_fill); + } + constexpr void _On_width(int width) { + assert(width == expected_width); + } + constexpr void _On_dynamic_width(std::size_t id) { + assert(id == expected_dynamic_width); + } + constexpr void _On_dynamic_width(std::_Auto_id_tag) { + assert(expected_auto_dynamic_width); + } + constexpr void _On_precision(int pre) { + assert(pre == expected_precision); + } + constexpr void _On_dynamic_precision(std::size_t id) { + assert(id == expected_dynamic_precision); + } + constexpr void _On_dynamic_precision(std::_Auto_id_tag) { + assert(expected_auto_dynamic_precision); + } + constexpr void _On_sign(std::_Sign sgn) { + assert(sgn == expected_sign); + } + constexpr void _On_hash() { + assert(expected_hash); + } + constexpr void _On_zero() { + assert(expected_zero); + } + constexpr void _On_localized() { + assert(expected_localized); + } + constexpr void _On_type(CharT type) { + assert(type == expected_type); + } +}; +template +testing_callbacks(std::_Align, std::basic_string_view) -> testing_callbacks; + +struct testing_arg_id_callbacks { + constexpr void _On_auto_id() {} + constexpr void _On_manual_id(std::size_t) {} +}; + +template +void test_parse_helper(const CharT* (*func)(const CharT*, const CharT*, callback_type&&), + std::basic_string_view view, bool err_expected = false, + typename std::basic_string_view::size_type expected_end_position = std::basic_string_view::npos, + callback_type&& callbacks = {}) { + try { + auto end = func(view.data(), view.data() + view.size(), std::move(callbacks)); + if (expected_end_position != std::basic_string_view::npos) { + assert(end == view.data() + expected_end_position); + } + assert(!err_expected); + } catch (const std::format_error&) { + assert(err_expected); + } +} diff --git a/tests/std/test.lst b/tests/std/test.lst index 6db71e0154..cba3615d38 100644 --- a/tests/std/test.lst +++ b/tests/std/test.lst @@ -263,8 +263,10 @@ tests\P0645R10_text_formatting_args tests\P0645R10_text_formatting_custom_formatting tests\P0645R10_text_formatting_death tests\P0645R10_text_formatting_formatting +tests\P0645R10_text_formatting_legacy_text_encoding tests\P0645R10_text_formatting_parse_contexts tests\P0645R10_text_formatting_parsing +tests\P0645R10_text_formatting_utf8 tests\P0660R10_jthread_and_cv_any tests\P0660R10_stop_token tests\P0660R10_stop_token_death diff --git a/tests/std/tests/P0645R10_text_formatting_formatting/test.cpp b/tests/std/tests/P0645R10_text_formatting_formatting/test.cpp index dc86a49658..21fda060b4 100644 --- a/tests/std/tests/P0645R10_text_formatting_formatting/test.cpp +++ b/tests/std/tests/P0645R10_text_formatting_formatting/test.cpp @@ -973,35 +973,6 @@ void test_size() { } void test_multibyte_format_strings() { - { - setlocale(LC_ALL, ".932"); - const auto s = - "\x93\xfa\x96{\x92\x6e\x90}"sv; // Note the use of `{` and `}` as continuation bytes (from GH-1576) - assert(format(s) == s); - - assert(format("{:.2}", s) == "\x93\xfa"sv); - assert(format("{:4.2}", s) == "\x93\xfa "sv); - - assert(format("{:<4.2}", s) == "\x93\xfa "sv); - assert(format("{:^4.2}", s) == " \x93\xfa "sv); - assert(format("{:>4.2}", s) == " \x93\xfa"sv); - - assert(format("{:\x90}<4.2}", s) == "\x93\xfa\x90}\x90}"sv); - assert(format("{:\x90}^4.2}", s) == "\x90}\x93\xfa\x90}"sv); - assert(format("{:\x90}>4.2}", s) == "\x90}\x90}\x93\xfa"sv); - - assert(format("{:.3}", s) == "\x93\xfa"sv); - assert(format("{:4.3}", s) == "\x93\xfa "sv); - - assert(format("{:<4.3}", s) == "\x93\xfa "sv); - assert(format("{:^4.3}", s) == " \x93\xfa "sv); - assert(format("{:>4.3}", s) == " \x93\xfa"sv); - - assert(format("{:\x90}<4.3}", s) == "\x93\xfa\x90}\x90}"sv); - assert(format("{:\x90}^4.3}", s) == "\x90}\x93\xfa\x90}"sv); - assert(format("{:\x90}>4.3}", s) == "\x90}\x90}\x93\xfa"sv); - } - #ifndef MSVC_INTERNAL_TESTING // TRANSITION, Windows on Contest VMs understand ".UTF-8" codepage { setlocale(LC_ALL, ".UTF-8"); diff --git a/tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/env.lst b/tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/env.lst new file mode 100644 index 0000000000..9aa5f2a5cd --- /dev/null +++ b/tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/env.lst @@ -0,0 +1,27 @@ +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +# This is `concepts_matrix.lst` with `/execution-charset:.932` added. +# clang is excluded since it doesn't support non-UTF-8 execution charsets. + +RUNALL_INCLUDE ..\prefix.lst +RUNALL_CROSSLIST +PM_CL="/w14640 /Zc:threadSafeInit- /EHsc /std:c++latest /execution-charset:.932" +RUNALL_CROSSLIST +PM_CL="/MD /D_ITERATOR_DEBUG_LEVEL=0 /permissive- /Zc:noexceptTypes-" +PM_CL="/MD /D_ITERATOR_DEBUG_LEVEL=1 /permissive-" +PM_CL="/MD /D_ITERATOR_DEBUG_LEVEL=0 /permissive- /Zc:char8_t- /Zc:preprocessor" +PM_CL="/MDd /D_ITERATOR_DEBUG_LEVEL=0 /permissive- /Zc:wchar_t-" +PM_CL="/MDd /D_ITERATOR_DEBUG_LEVEL=1 /permissive-" +PM_CL="/MDd /D_ITERATOR_DEBUG_LEVEL=2 /permissive- /fp:except /Zc:preprocessor" +PM_CL="/MT /D_ITERATOR_DEBUG_LEVEL=0 /permissive-" +PM_CL="/MT /D_ITERATOR_DEBUG_LEVEL=0 /permissive- /analyze:only /analyze:autolog-" +PM_CL="/MT /D_ITERATOR_DEBUG_LEVEL=1 /permissive-" +PM_CL="/MTd /D_ITERATOR_DEBUG_LEVEL=0 /permissive- /fp:strict" +PM_CL="/MTd /D_ITERATOR_DEBUG_LEVEL=1 /permissive-" +PM_CL="/MTd /D_ITERATOR_DEBUG_LEVEL=2 /permissive" +PM_CL="/MTd /D_ITERATOR_DEBUG_LEVEL=2 /permissive- /analyze:only /analyze:autolog-" +PM_CL="/permissive- /Za /MD" +PM_CL="/permissive- /Za /MDd" +# PM_CL="/permissive- /BE /c /MD" +# PM_CL="/permissive- /BE /c /MTd" diff --git a/tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/test.cpp b/tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/test.cpp new file mode 100644 index 0000000000..af57f610fc --- /dev/null +++ b/tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/test.cpp @@ -0,0 +1,65 @@ +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include +#include +#include + +#include "test_format_support.hpp" + +using namespace std; + +void test_multibyte_format_strings() { + { + assert(setlocale(LC_ALL, ".932") != nullptr); + const auto s = + "\x93\xfa\x96{\x92\x6e\x90}"sv; // Note the use of `{` and `}` as continuation bytes (from GH-1576) + assert(format(s) == s); + + assert(format("{:.2}", s) == "\x93\xfa"sv); + assert(format("{:4.2}", s) == "\x93\xfa "sv); + + assert(format("{:<4.2}", s) == "\x93\xfa "sv); + assert(format("{:^4.2}", s) == " \x93\xfa "sv); + assert(format("{:>4.2}", s) == " \x93\xfa"sv); + + assert(format("{:\x90}<4.2}", s) == "\x93\xfa\x90}\x90}"sv); + assert(format("{:\x90}^4.2}", s) == "\x90}\x93\xfa\x90}"sv); + assert(format("{:\x90}>4.2}", s) == "\x90}\x90}\x93\xfa"sv); + + assert(format("{:.3}", s) == "\x93\xfa"sv); + assert(format("{:4.3}", s) == "\x93\xfa "sv); + + assert(format("{:<4.3}", s) == "\x93\xfa "sv); + assert(format("{:^4.3}", s) == " \x93\xfa "sv); + assert(format("{:>4.3}", s) == " \x93\xfa"sv); + + assert(format("{:\x90}<4.3}", s) == "\x93\xfa\x90}\x90}"sv); + assert(format("{:\x90}^4.3}", s) == "\x90}\x93\xfa\x90}"sv); + assert(format("{:\x90}>4.3}", s) == "\x90}\x90}\x93\xfa"sv); + } + + assert(setlocale(LC_ALL, "C") != nullptr); +} + +void test_parse_align() { + auto parse_align_fn = _Parse_align>; + + { + assert(setlocale(LC_ALL, ".932") != nullptr); + test_parse_helper(parse_align_fn, "\x93\xfaX"sv, false, 3, + {.expected_alignment = _Align::_Right, .expected_fill = "\x96\x7b"sv}); + test_parse_helper(parse_align_fn, "\x92\x6e^X"sv, false, 3, + {.expected_alignment = _Align::_Center, .expected_fill = "\x92\x6e"sv}); + } + + assert(setlocale(LC_ALL, "C") != nullptr); +} + +int main() { + test_multibyte_format_strings(); + test_parse_align(); +} diff --git a/tests/std/tests/P0645R10_text_formatting_parsing/test.cpp b/tests/std/tests/P0645R10_text_formatting_parsing/test.cpp index 9a7f62083d..966f04a1a8 100644 --- a/tests/std/tests/P0645R10_text_formatting_parsing/test.cpp +++ b/tests/std/tests/P0645R10_text_formatting_parsing/test.cpp @@ -8,124 +8,9 @@ #include #include -using namespace std; - -// copied from the string_view tests -template -struct choose_literal; // not defined - -template <> -struct choose_literal { - static constexpr const char* choose(const char* s, const wchar_t*) { - return s; - } -}; - -template <> -struct choose_literal { - static constexpr const wchar_t* choose(const char*, const wchar_t* s) { - return s; - } -}; - -#define TYPED_LITERAL(CharT, Literal) (choose_literal::choose(Literal, L##Literal)) +#include "test_format_support.hpp" -template -struct noop_testing_callbacks { - constexpr void _On_align(_Align) {} - constexpr void _On_fill(basic_string_view) {} - constexpr void _On_width(unsigned int) {} - constexpr void _On_dynamic_width(size_t) {} - constexpr void _On_dynamic_width(_Auto_id_tag) {} - constexpr void _On_precision(unsigned int) {} - constexpr void _On_dynamic_precision(size_t) {} - constexpr void _On_dynamic_precision(_Auto_id_tag) {} - constexpr void _On_sign(_Sign) {} - constexpr void _On_hash() {} - constexpr void _On_zero() {} - constexpr void _On_localized() {} - constexpr void _On_type(CharT) {} -}; - -template -struct testing_callbacks { - _Align expected_alignment = _Align::_None; - _Sign expected_sign = _Sign::_None; - basic_string_view expected_fill; - int expected_width = -1; - size_t expected_dynamic_width = static_cast(-1); - bool expected_auto_dynamic_width = false; - int expected_precision = -1; - size_t expected_dynamic_precision = static_cast(-1); - bool expected_auto_dynamic_precision = false; - bool expected_hash = false; - bool expected_zero = false; - bool expected_localized = false; - CharT expected_type = '\0'; - - constexpr void _On_align(_Align aln) { - assert(aln == expected_alignment); - } - constexpr void _On_fill(basic_string_view str_view) { - assert(str_view == expected_fill); - } - constexpr void _On_width(int width) { - assert(width == expected_width); - } - constexpr void _On_dynamic_width(size_t id) { - assert(id == expected_dynamic_width); - } - constexpr void _On_dynamic_width(_Auto_id_tag) { - assert(expected_auto_dynamic_width); - } - constexpr void _On_precision(int pre) { - assert(pre == expected_precision); - } - constexpr void _On_dynamic_precision(size_t id) { - assert(id == expected_dynamic_precision); - } - constexpr void _On_dynamic_precision(_Auto_id_tag) { - assert(expected_auto_dynamic_precision); - } - constexpr void _On_sign(_Sign sgn) { - assert(sgn == expected_sign); - } - constexpr void _On_hash() { - assert(expected_hash); - } - constexpr void _On_zero() { - assert(expected_zero); - } - constexpr void _On_localized() { - assert(expected_localized); - } - constexpr void _On_type(CharT type) { - assert(type == expected_type); - } -}; -template -testing_callbacks(_Align, basic_string_view) -> testing_callbacks; - -struct testing_arg_id_callbacks { - constexpr void _On_auto_id() {} - constexpr void _On_manual_id(size_t) {} -}; - -template -void test_parse_helper(const CharT* (*func)(const CharT*, const CharT*, callback_type&&), basic_string_view view, - bool err_expected = false, - typename basic_string_view::size_type expected_end_position = basic_string_view::npos, - callback_type&& callbacks = {}) { - try { - auto end = func(view.data(), view.data() + view.size(), move(callbacks)); - if (expected_end_position != basic_string_view::npos) { - assert(end == view.data() + expected_end_position); - } - assert(!err_expected); - } catch (const format_error&) { - assert(err_expected); - } -} +using namespace std; template bool test_parse_align() { @@ -162,16 +47,6 @@ bool test_parse_align() { } } else { // test multibyte fill characters - { - setlocale(LC_ALL, ".932"); - test_parse_helper(parse_align_fn, "\x93\xfaX"sv, false, 3, - {.expected_alignment = _Align::_Right, .expected_fill = "\x96\x7b"sv}); - test_parse_helper(parse_align_fn, "\x92\x6e^X"sv, false, 3, - {.expected_alignment = _Align::_Center, .expected_fill = "\x92\x6e"sv}); - } - #ifndef MSVC_INTERNAL_TESTING // TRANSITION, Windows on Contest VMs understand ".UTF-8" codepage { setlocale(LC_ALL, ".UTF-8"); diff --git a/tests/std/tests/P0645R10_text_formatting_utf8/env.lst b/tests/std/tests/P0645R10_text_formatting_utf8/env.lst new file mode 100644 index 0000000000..42da0946d2 --- /dev/null +++ b/tests/std/tests/P0645R10_text_formatting_utf8/env.lst @@ -0,0 +1,6 @@ +# Copyright (c) Microsoft Corporation. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +RUNALL_INCLUDE ..\concepts_matrix.lst +RUNALL_CROSSLIST +PM_CL="/utf-8" diff --git a/tests/std/tests/P0645R10_text_formatting_utf8/test.cpp b/tests/std/tests/P0645R10_text_formatting_utf8/test.cpp new file mode 100644 index 0000000000..0cf1ca3d9c --- /dev/null +++ b/tests/std/tests/P0645R10_text_formatting_utf8/test.cpp @@ -0,0 +1,64 @@ +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include +#include +#include + +#include "test_format_support.hpp" + +using namespace std; + +void test_multibyte_format_strings() { + { + // Filling with footballs ("\xf0\x9f\x8f\x88" is U+1F3C8 AMERICAN FOOTBALL) + assert(format("{:\xf0\x9f\x8f\x88>4}"sv, 42) == "\xf0\x9f\x8f\x88\xf0\x9f\x8f\x88\x34\x32"); + + assert(format("{:\xf0\x9f\x8f\x88<4.2}", "1") == "\x31\xf0\x9f\x8f\x88\xf0\x9f\x8f\x88\xf0\x9f\x8f\x88"sv); + assert(format("{:\xf0\x9f\x8f\x88^4.2}", "1") == "\xf0\x9f\x8f\x88\x31\xf0\x9f\x8f\x88\xf0\x9f\x8f\x88"sv); + assert(format("{:\xf0\x9f\x8f\x88>4.2}", "1") == "\xf0\x9f\x8f\x88\xf0\x9f\x8f\x88\xf0\x9f\x8f\x88\x31"sv); + } + + { + try { + (void) format("{:\x9f\x8f\x88<10}"sv, 42); // Bad fill character encoding: missing lead byte before \x9f + assert(false); + } catch (const format_error&) { + } + } +} + +void test_parse_align() { + auto parse_align_fn = _Parse_align>; + + { + // "\xf0\x9f\x8f\x88" is U+1F3C8 AMERICAN FOOTBALL + test_parse_helper(parse_align_fn, "\xf0\x9f\x8f\x88X"sv, false, 5, + {.expected_alignment = _Align::_Right, .expected_fill = "\xf0\x9f\x8f\x88"sv}); + test_parse_helper(parse_align_fn, "\xf0\x9f\x8f\x88^X"sv, false, 5, + {.expected_alignment = _Align::_Center, .expected_fill = "\xf0\x9f\x8f\x88"sv}); + } +} + +void run_tests() { + test_multibyte_format_strings(); + test_parse_align(); +} + +int main() { + run_tests(); + + assert(setlocale(LC_ALL, ".1252") != nullptr); + run_tests(); + + assert(setlocale(LC_ALL, ".932") != nullptr); + run_tests(); + +#ifndef MSVC_INTERNAL_TESTING // TRANSITION, Windows on Contest VMs understand ".UTF-8" codepage + assert(setlocale(LC_ALL, ".UTF-8") != nullptr); + run_tests(); +#endif +} diff --git a/tests/std/tests/concepts_matrix.lst b/tests/std/tests/concepts_matrix.lst index 5810e10f46..d35c3015ad 100644 --- a/tests/std/tests/concepts_matrix.lst +++ b/tests/std/tests/concepts_matrix.lst @@ -1,6 +1,8 @@ # Copyright (c) Microsoft Corporation. # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +# When updating this file, also update tests\P0645R10_text_formatting_legacy_text_encoding\env.lst to match + RUNALL_INCLUDE .\prefix.lst RUNALL_CROSSLIST PM_CL="/w14640 /Zc:threadSafeInit- /EHsc /std:c++latest"