From 0fec1662f14ed4954966b6296a0317a69d51a5f3 Mon Sep 17 00:00:00 2001
From: statementreply <statementreply@gmail.com>
Date: Sun, 11 Apr 2021 16:56:17 +0800
Subject: [PATCH 1/8] Assume format strings are always UTF-8 when encoding
 charset is UTF-8

---
 stl/inc/format | 152 ++++++++++++++++++++++++++++++-------------------
 1 file changed, 95 insertions(+), 57 deletions(-)
diff --git a/stl/inc/format b/stl/inc/format
index fe47c696b0..bf10804b62 100644
--- a/stl/inc/format
+++ b/stl/inc/format
@@ -417,55 +417,79 @@ _NODISCARD constexpr const _CharT* _Parse_arg_id(
     throw format_error("Invalid format string.");
 }
 
+inline constexpr bool _Execution_charset_is_utf8 = [] {
+#pragma warning(push)
+#pragma warning(disable : 4309) // 'initializing' : truncation of constant value
+#pragma warning(disable : 4566) // character represented by universal-character-name '\u4E00' cannot be represented in
+                                // the current code page
+#pragma warning(disable : 6201) // Index '2' is out of valid index range '0' to '1' for possibly stack allocated buffer
+                                // '_Test_char'
+#pragma warning(disable : 6239) // (<non-zero constant> && <expression>) always evaluates to the result of <expression>.
+                                // Did you intend to use the bitwise-and operator?
+    constexpr char _Test_char[] = "\u4e00";
+    return sizeof(_Test_char) == 4 && _Test_char[0] == '\xe4' && _Test_char[1] == '\xb8' && _Test_char[2] == '\x80';
+#pragma warning(pop)
+}();
+
+_NODISCARD inline int _Utf8_code_units_in_next_character(const char* const _First, const char* const _Last) noexcept {
+    // Returns a count of the number of UTF-8 code units that compose the first encoded character in [_First, _Last),
+    // or -1 if [_First, _Last) doesn't contain an entire encoded character or *_First is not a valid lead byte.
+    const auto _Ch = static_cast<unsigned char>(*_First);
+    if (_Ch < 0b1000'0000u) {
+        return 1;
+    }
+
+    const auto _Len = static_cast<size_t>(_Last - _First);
+
+    if (_Ch < 0b1110'0000u) {
+        // check for non-lead byte or partial 2-byte encoded character
+        return (_Ch >= 0b1100'0000u && _Len >= 2) ? 2 : -1;
+    }
+
+    if (_Ch < 0b1111'0000u) {
+        // check for partial 3-byte encoded character
+        return (_Len >= 3) ? 3 : -1;
+    }
+
+    // check for partial 4-byte encoded character
+    return (_Len >= 4) ? 4 : -1;
+}
+
 _NODISCARD inline int _Code_units_in_next_character(const char* _First, const char* _Last, const _Cvtvec& _Cvt) {
     // Returns a count of the number of code units that compose the first encoded character in
     // [_First, _Last), or -1 if [_First, _Last) doesn't contain an entire encoded character or
     // *_First is not a valid lead byte.
     _STL_INTERNAL_CHECK(_First < _Last);
 
-    switch (_Cvt._Mbcurmax) {
-    default:
-        _STL_INTERNAL_CHECK(!"Bad number of encoding units for this code page");
-        [[fallthrough]];
-    case 1:
-        return 1; // all characters have only one code unit
-
-    case 2:
-        {
-            wchar_t _Wide;
-            mbstate_t _St{};
-            const auto _Len   = static_cast<size_t>(_Last - _First);
-            const int _Result = _Mbrtowc(&_Wide, _First, _Len, &_St, &_Cvt);
-            if (_Result > 0) {
-                return _Result;
-            } else if (_Result < 0) { // invalid or incomplete encoded character
-                return -1;
-            } else { // next code unit is '\0'
-                return 1;
-            }
-        }
-
-    case 4: // Assume UTF-8 (as does _Mbrtowc)
-        {
-            const auto _Ch = static_cast<unsigned char>(*_First);
-            if (_Ch < 0b1000'0000u) {
-                return 1;
-            }
-
-            const auto _Len = static_cast<size_t>(_Last - _First);
-
-            if (_Ch < 0b1110'0000u) {
-                // check for non-lead byte or partial 2-byte encoded character
-                return (_Ch >= 0b1100'0000u && _Len >= 2) ? 2 : -1;
+    if constexpr (_Execution_charset_is_utf8) {
+        return _Utf8_code_units_in_next_character(_First, _Last);
+    } else {
+        switch (_Cvt._Mbcurmax) {
+        default:
+            _STL_INTERNAL_CHECK(!"Bad number of encoding units for this code page");
+            [[fallthrough]];
+        case 1:
+            return 1; // all characters have only one code unit
+
+        case 2:
+            {
+                wchar_t _Wide;
+                mbstate_t _St{};
+                const auto _Len   = static_cast<size_t>(_Last - _First);
+                const int _Result = _Mbrtowc(&_Wide, _First, _Len, &_St, &_Cvt);
+                if (_Result > 0) {
+                    return _Result;
+                } else if (_Result < 0) { // invalid or incomplete encoded character
+                    return -1;
+                } else { // next code unit is '\0'
+                    return 1;
+                }
             }
 
-            if (_Ch < 0b1111'0000u) {
-                // check for partial 3-byte encoded character
-                return (_Len >= 3) ? 3 : -1;
+        case 4: // Assume UTF-8 (as does _Mbrtowc)
+            {
+                return _Utf8_code_units_in_next_character(_First, _Last);
             }
-
-            // check for partial 4-byte encoded character
-            return (_Len >= 4) ? 4 : -1;
         }
     }
 }
@@ -751,20 +775,24 @@ template <class _CharT>
 const _CharT* _Find_encoded(const _CharT* _First, const _CharT* _Last, const _CharT _Val, const _Cvtvec& _Cvt) {
     // Returns the first occurrence of _Val as an encoded character (and not, for example, as a
     // continuation byte) in [_First, _Last).
-    if (_Cvt._Mbcurmax == 1 || _Cvt._Mbcurmax == 4) {
-        // As above and in _Mbrtowc, assume 4-byte encodings are UTF-8
+    if constexpr (_Execution_charset_is_utf8) {
         return _Find_unchecked(_First, _Last, _Val);
-    }
+    } else {
+        if (_Cvt._Mbcurmax == 1 || _Cvt._Mbcurmax == 4) {
+            // As above and in _Mbrtowc, assume 4-byte encodings are UTF-8
+            return _Find_unchecked(_First, _Last, _Val);
+        }
 
-    while (_First != _Last && *_First != _Val) {
-        const int _Units = _Code_units_in_next_character(_First, _Last, _Cvt);
-        if (_Units < 0) {
-            throw format_error("Invalid encoded character in format string.");
+        while (_First != _Last && *_First != _Val) {
+            const int _Units = _Code_units_in_next_character(_First, _Last, _Cvt);
+            if (_Units < 0) {
+                throw format_error("Invalid encoded character in format string.");
+            }
+            _First += _Units;
         }
-        _First += _Units;
-    }
 
-    return _First;
+        return _First;
+    }
 }
 
 template <class _CharT, _Parse_replacement_field_callbacks<_CharT> _HandlerT>
@@ -2163,15 +2191,9 @@ _NODISCARD constexpr int _Unicode_width_estimate(const char32_t _Ch) noexcept {
     return 1;
 }
 
-_NODISCARD inline int _Estimate_character_width(const char* _Ptr, const int _Units, const _Cvtvec& _Cvt) {
+_NODISCARD inline int _Estimate_utf8_character_width(const char* const _Ptr, const int _Units) noexcept {
     // Return an estimate for the width of the character composed of _Units code units,
     // whose first code unit is denoted by _Ptr.
-    if (_Cvt._Mbcurmax != 4) {
-        // not a Unicode encoding; estimate width == number of code units
-        return _Units;
-    }
-
-    // assume UTF-8
     auto _Ch = static_cast<char32_t>(*_Ptr);
     switch (_Units) {
     default:
@@ -2197,6 +2219,22 @@ _NODISCARD inline int _Estimate_character_width(const char* _Ptr, const int _Uni
     return _Unicode_width_estimate<_Width_estimate_high_intervals>(_Ch);
 }
 
+_NODISCARD inline int _Estimate_character_width(const char* _Ptr, const int _Units, const _Cvtvec& _Cvt) {
+    // Return an estimate for the width of the character composed of _Units code units,
+    // whose first code unit is denoted by _Ptr.
+    if constexpr (_Execution_charset_is_utf8) {
+        return _Estimate_utf8_character_width(_Ptr, _Units);
+    } else {
+        if (_Cvt._Mbcurmax != 4) {
+            // not a Unicode encoding; estimate width == number of code units
+            return _Units;
+        }
+
+        // assume UTF-8
+        return _Estimate_utf8_character_width(_Ptr, _Units);
+    }
+}
+
 _NODISCARD inline int _Estimate_character_width(const wchar_t* _Ptr, const int _Units, const _Cvtvec&) {
     // Return an estimate for the width of the character composed of _Units code units,
     // whose first code unit is denoted by _Ptr.

From 97a1ee08515af6bec5cf549ccb89bfc631872f8e Mon Sep 17 00:00:00 2001
From: statementreply <statementreply@gmail.com>
Date: Sun, 11 Apr 2021 17:03:36 +0800
Subject: [PATCH 2/8] Add `/utf-8` tests

---
 tests/std/include/test_format_support.hpp     | 126 ++++++++++++++++++
 .../P0645R10_text_formatting_utf8/env.lst     |   6 +
 .../P0645R10_text_formatting_utf8/test.cpp    |  64 +++++++++
 3 files changed, 196 insertions(+)
 create mode 100644 tests/std/include/test_format_support.hpp
 create mode 100644 tests/std/tests/P0645R10_text_formatting_utf8/env.lst
 create mode 100644 tests/std/tests/P0645R10_text_formatting_utf8/test.cpp

diff --git a/tests/std/include/test_format_support.hpp b/tests/std/include/test_format_support.hpp
new file mode 100644
index 0000000000..ca569f23d3
--- /dev/null
+++ b/tests/std/include/test_format_support.hpp
@@ -0,0 +1,126 @@
+// Copyright (c) Microsoft Corporation.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#pragma once
+
+#include <cassert>
+#include <cstddef>
+#include <format>
+#include <string_view>
+
+// copied from the string_view tests
+template <typename CharT>
+struct choose_literal; // not defined
+
+template <>
+struct choose_literal<char> {
+    static constexpr const char* choose(const char* s, const wchar_t*) {
+        return s;
+    }
+};
+
+template <>
+struct choose_literal<wchar_t> {
+    static constexpr const wchar_t* choose(const char*, const wchar_t* s) {
+        return s;
+    }
+};
+
+#define TYPED_LITERAL(CharT, Literal) (choose_literal<CharT>::choose(Literal, L##Literal))
+
+template <typename CharT>
+struct noop_testing_callbacks {
+    constexpr void _On_align(std::_Align) {}
+    constexpr void _On_fill(std::basic_string_view<CharT>) {}
+    constexpr void _On_width(unsigned int) {}
+    constexpr void _On_dynamic_width(std::size_t) {}
+    constexpr void _On_dynamic_width(std::_Auto_id_tag) {}
+    constexpr void _On_precision(unsigned int) {}
+    constexpr void _On_dynamic_precision(std::size_t) {}
+    constexpr void _On_dynamic_precision(std::_Auto_id_tag) {}
+    constexpr void _On_sign(std::_Sign) {}
+    constexpr void _On_hash() {}
+    constexpr void _On_zero() {}
+    constexpr void _On_localized() {}
+    constexpr void _On_type(CharT) {}
+};
+
+template <typename CharT>
+struct testing_callbacks {
+    std::_Align expected_alignment = std::_Align::_None;
+    std::_Sign expected_sign       = std::_Sign::_None;
+    std::basic_string_view<CharT> expected_fill;
+    int expected_width                     = -1;
+    std::size_t expected_dynamic_width     = static_cast<std::size_t>(-1);
+    bool expected_auto_dynamic_width       = false;
+    int expected_precision                 = -1;
+    std::size_t expected_dynamic_precision = static_cast<std::size_t>(-1);
+    bool expected_auto_dynamic_precision   = false;
+    bool expected_hash                     = false;
+    bool expected_zero                     = false;
+    bool expected_localized                = false;
+    CharT expected_type                    = '\0';
+
+    constexpr void _On_align(std::_Align aln) {
+        assert(aln == expected_alignment);
+    }
+    constexpr void _On_fill(std::basic_string_view<CharT> str_view) {
+        assert(str_view == expected_fill);
+    }
+    constexpr void _On_width(int width) {
+        assert(width == expected_width);
+    }
+    constexpr void _On_dynamic_width(std::size_t id) {
+        assert(id == expected_dynamic_width);
+    }
+    constexpr void _On_dynamic_width(std::_Auto_id_tag) {
+        assert(expected_auto_dynamic_width);
+    }
+    constexpr void _On_precision(int pre) {
+        assert(pre == expected_precision);
+    }
+    constexpr void _On_dynamic_precision(std::size_t id) {
+        assert(id == expected_dynamic_precision);
+    }
+    constexpr void _On_dynamic_precision(std::_Auto_id_tag) {
+        assert(expected_auto_dynamic_precision);
+    }
+    constexpr void _On_sign(std::_Sign sgn) {
+        assert(sgn == expected_sign);
+    }
+    constexpr void _On_hash() {
+        assert(expected_hash);
+    }
+    constexpr void _On_zero() {
+        assert(expected_zero);
+    }
+    constexpr void _On_localized() {
+        assert(expected_localized);
+    }
+    constexpr void _On_type(CharT type) {
+        assert(type == expected_type);
+    }
+};
+template <typename CharT>
+testing_callbacks(std::_Align, std::basic_string_view<CharT>) -> testing_callbacks<CharT>;
+
+struct testing_arg_id_callbacks {
+    constexpr void _On_auto_id() {}
+    constexpr void _On_manual_id(std::size_t) {}
+};
+
+template <typename CharT, typename callback_type>
+void test_parse_helper(const CharT* (*func)(const CharT*, const CharT*, callback_type&&),
+    std::basic_string_view<CharT> view, bool err_expected = false,
+    typename std::basic_string_view<CharT>::size_type expected_end_position = std::basic_string_view<CharT>::npos,
+    callback_type&& callbacks                                               = {}) {
+    try {
+        auto end = func(view.data(), view.data() + view.size(), std::move(callbacks));
+        if (expected_end_position != std::basic_string_view<CharT>::npos) {
+            assert(end == view.data() + expected_end_position);
+        }
+        assert(!err_expected);
+    } catch (const std::format_error&) {
+        assert(err_expected);
+    }
+}
diff --git a/tests/std/tests/P0645R10_text_formatting_utf8/env.lst b/tests/std/tests/P0645R10_text_formatting_utf8/env.lst
new file mode 100644
index 0000000000..42da0946d2
--- /dev/null
+++ b/tests/std/tests/P0645R10_text_formatting_utf8/env.lst
@@ -0,0 +1,6 @@
+# Copyright (c) Microsoft Corporation.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+RUNALL_INCLUDE ..\concepts_matrix.lst
+RUNALL_CROSSLIST
+PM_CL="/utf-8"
diff --git a/tests/std/tests/P0645R10_text_formatting_utf8/test.cpp b/tests/std/tests/P0645R10_text_formatting_utf8/test.cpp
new file mode 100644
index 0000000000..ef45b409eb
--- /dev/null
+++ b/tests/std/tests/P0645R10_text_formatting_utf8/test.cpp
@@ -0,0 +1,64 @@
+// Copyright (c) Microsoft Corporation.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <cassert>
+#include <format>
+#include <locale>
+#include <string_view>
+
+#include "test_format_support.hpp"
+
+using namespace std;
+
+void test_multibyte_format_strings() {
+    {
+        // Filling with footballs ("\xf0\x9f\x8f\x88" is U+1F3C8 AMERICAN FOOTBALL)
+        assert(format("{:\xf0\x9f\x8f\x88>4}"sv, 42) == "\xf0\x9f\x8f\x88\xf0\x9f\x8f\x88\x34\x32");
+
+        assert(format("{:\xf0\x9f\x8f\x88<4.2}", "1") == "\x31\xf0\x9f\x8f\x88\xf0\x9f\x8f\x88\xf0\x9f\x8f\x88"sv);
+        assert(format("{:\xf0\x9f\x8f\x88^4.2}", "1") == "\xf0\x9f\x8f\x88\x31\xf0\x9f\x8f\x88\xf0\x9f\x8f\x88"sv);
+        assert(format("{:\xf0\x9f\x8f\x88>4.2}", "1") == "\xf0\x9f\x8f\x88\xf0\x9f\x8f\x88\xf0\x9f\x8f\x88\x31"sv);
+    }
+
+    {
+        try {
+            (void) format("{:\x9f\x8f\x88<10}"sv, 42); // Bad fill character encoding: missing lead byte before \x9f
+            assert(false);
+        } catch (const format_error&) {
+        }
+    }
+}
+
+void test_parse_align() {
+    auto parse_align_fn = _Parse_align<char, testing_callbacks<char>>;
+
+    {
+        // "\xf0\x9f\x8f\x88" is U+1F3C8 AMERICAN FOOTBALL
+        test_parse_helper(parse_align_fn, "\xf0\x9f\x8f\x88<X"sv, false, 5,
+            {.expected_alignment = _Align::_Left, .expected_fill = "\xf0\x9f\x8f\x88"sv});
+        test_parse_helper(parse_align_fn, "\xf0\x9f\x8f\x88>X"sv, false, 5,
+            {.expected_alignment = _Align::_Right, .expected_fill = "\xf0\x9f\x8f\x88"sv});
+        test_parse_helper(parse_align_fn, "\xf0\x9f\x8f\x88^X"sv, false, 5,
+            {.expected_alignment = _Align::_Center, .expected_fill = "\xf0\x9f\x8f\x88"sv});
+    }
+}
+
+void run_tests() {
+    test_multibyte_format_strings();
+    test_parse_align();
+}
+
+int main() {
+    run_tests();
+
+    setlocale(LC_ALL, ".1252");
+    run_tests();
+
+    setlocale(LC_ALL, ".932");
+    run_tests();
+
+#ifndef MSVC_INTERNAL_TESTING // TRANSITION, Windows on Contest VMs understand ".UTF-8" codepage
+    setlocale(LC_ALL, ".UTF-8");
+    run_tests();
+#endif
+}

From 761932afb529e7ae938bb84e03547d0bb9eb9a12 Mon Sep 17 00:00:00 2001
From: statementreply <statementreply@gmail.com>
Date: Sun, 11 Apr 2021 17:07:19 +0800
Subject: [PATCH 3/8] Run Shift-JIS tests with `/execution-charset:.932`

---
 .../test.cpp                                  |  29 ----
 .../env.lst                                   |  27 ++++
 .../test.cpp                                  |  65 +++++++++
 .../P0645R10_text_formatting_parsing/test.cpp | 129 +-----------------
 tests/std/tests/concepts_matrix.lst           |   2 +
 5 files changed, 96 insertions(+), 156 deletions(-)
 create mode 100644 tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/env.lst
 create mode 100644 tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/test.cpp

diff --git a/tests/std/tests/P0645R10_text_formatting_formatting/test.cpp b/tests/std/tests/P0645R10_text_formatting_formatting/test.cpp
index dc86a49658..21fda060b4 100644
--- a/tests/std/tests/P0645R10_text_formatting_formatting/test.cpp
+++ b/tests/std/tests/P0645R10_text_formatting_formatting/test.cpp
@@ -973,35 +973,6 @@ void test_size() {
 }
 
 void test_multibyte_format_strings() {
-    {
-        setlocale(LC_ALL, ".932");
-        const auto s =
-            "\x93\xfa\x96{\x92\x6e\x90}"sv; // Note the use of `{` and `}` as continuation bytes (from GH-1576)
-        assert(format(s) == s);
-
-        assert(format("{:.2}", s) == "\x93\xfa"sv);
-        assert(format("{:4.2}", s) == "\x93\xfa  "sv);
-
-        assert(format("{:<4.2}", s) == "\x93\xfa  "sv);
-        assert(format("{:^4.2}", s) == " \x93\xfa "sv);
-        assert(format("{:>4.2}", s) == "  \x93\xfa"sv);
-
-        assert(format("{:\x90}<4.2}", s) == "\x93\xfa\x90}\x90}"sv);
-        assert(format("{:\x90}^4.2}", s) == "\x90}\x93\xfa\x90}"sv);
-        assert(format("{:\x90}>4.2}", s) == "\x90}\x90}\x93\xfa"sv);
-
-        assert(format("{:.3}", s) == "\x93\xfa"sv);
-        assert(format("{:4.3}", s) == "\x93\xfa  "sv);
-
-        assert(format("{:<4.3}", s) == "\x93\xfa  "sv);
-        assert(format("{:^4.3}", s) == " \x93\xfa "sv);
-        assert(format("{:>4.3}", s) == "  \x93\xfa"sv);
-
-        assert(format("{:\x90}<4.3}", s) == "\x93\xfa\x90}\x90}"sv);
-        assert(format("{:\x90}^4.3}", s) == "\x90}\x93\xfa\x90}"sv);
-        assert(format("{:\x90}>4.3}", s) == "\x90}\x90}\x93\xfa"sv);
-    }
-
 #ifndef MSVC_INTERNAL_TESTING // TRANSITION, Windows on Contest VMs understand ".UTF-8" codepage
     {
         setlocale(LC_ALL, ".UTF-8");
diff --git a/tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/env.lst b/tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/env.lst
new file mode 100644
index 0000000000..9aa5f2a5cd
--- /dev/null
+++ b/tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/env.lst
@@ -0,0 +1,27 @@
+# Copyright (c) Microsoft Corporation.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+# This is `concepts_matrix.lst` with `/execution-charset:.932` added.
+# clang is excluded since it doesn't support non-UTF-8 execution charsets.
+
+RUNALL_INCLUDE ..\prefix.lst
+RUNALL_CROSSLIST
+PM_CL="/w14640 /Zc:threadSafeInit- /EHsc /std:c++latest /execution-charset:.932"
+RUNALL_CROSSLIST
+PM_CL="/MD /D_ITERATOR_DEBUG_LEVEL=0 /permissive- /Zc:noexceptTypes-"
+PM_CL="/MD /D_ITERATOR_DEBUG_LEVEL=1 /permissive-"
+PM_CL="/MD /D_ITERATOR_DEBUG_LEVEL=0 /permissive- /Zc:char8_t- /Zc:preprocessor"
+PM_CL="/MDd /D_ITERATOR_DEBUG_LEVEL=0 /permissive- /Zc:wchar_t-"
+PM_CL="/MDd /D_ITERATOR_DEBUG_LEVEL=1 /permissive-"
+PM_CL="/MDd /D_ITERATOR_DEBUG_LEVEL=2 /permissive- /fp:except /Zc:preprocessor"
+PM_CL="/MT /D_ITERATOR_DEBUG_LEVEL=0 /permissive-"
+PM_CL="/MT /D_ITERATOR_DEBUG_LEVEL=0 /permissive- /analyze:only /analyze:autolog-"
+PM_CL="/MT /D_ITERATOR_DEBUG_LEVEL=1 /permissive-"
+PM_CL="/MTd /D_ITERATOR_DEBUG_LEVEL=0 /permissive- /fp:strict"
+PM_CL="/MTd /D_ITERATOR_DEBUG_LEVEL=1 /permissive-"
+PM_CL="/MTd /D_ITERATOR_DEBUG_LEVEL=2 /permissive"
+PM_CL="/MTd /D_ITERATOR_DEBUG_LEVEL=2 /permissive- /analyze:only /analyze:autolog-"
+PM_CL="/permissive- /Za /MD"
+PM_CL="/permissive- /Za /MDd"
+# PM_CL="/permissive- /BE /c /MD"
+# PM_CL="/permissive- /BE /c /MTd"
diff --git a/tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/test.cpp b/tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/test.cpp
new file mode 100644
index 0000000000..a24c1a6e02
--- /dev/null
+++ b/tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/test.cpp
@@ -0,0 +1,65 @@
+// Copyright (c) Microsoft Corporation.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include <cassert>
+#include <format>
+#include <locale>
+#include <string_view>
+
+#include "test_format_support.hpp"
+
+using namespace std;
+
+void test_multibyte_format_strings() {
+    {
+        setlocale(LC_ALL, ".932");
+        const auto s =
+            "\x93\xfa\x96{\x92\x6e\x90}"sv; // Note the use of `{` and `}` as continuation bytes (from GH-1576)
+        assert(format(s) == s);
+
+        assert(format("{:.2}", s) == "\x93\xfa"sv);
+        assert(format("{:4.2}", s) == "\x93\xfa  "sv);
+
+        assert(format("{:<4.2}", s) == "\x93\xfa  "sv);
+        assert(format("{:^4.2}", s) == " \x93\xfa "sv);
+        assert(format("{:>4.2}", s) == "  \x93\xfa"sv);
+
+        assert(format("{:\x90}<4.2}", s) == "\x93\xfa\x90}\x90}"sv);
+        assert(format("{:\x90}^4.2}", s) == "\x90}\x93\xfa\x90}"sv);
+        assert(format("{:\x90}>4.2}", s) == "\x90}\x90}\x93\xfa"sv);
+
+        assert(format("{:.3}", s) == "\x93\xfa"sv);
+        assert(format("{:4.3}", s) == "\x93\xfa  "sv);
+
+        assert(format("{:<4.3}", s) == "\x93\xfa  "sv);
+        assert(format("{:^4.3}", s) == " \x93\xfa "sv);
+        assert(format("{:>4.3}", s) == "  \x93\xfa"sv);
+
+        assert(format("{:\x90}<4.3}", s) == "\x93\xfa\x90}\x90}"sv);
+        assert(format("{:\x90}^4.3}", s) == "\x90}\x93\xfa\x90}"sv);
+        assert(format("{:\x90}>4.3}", s) == "\x90}\x90}\x93\xfa"sv);
+    }
+
+    setlocale(LC_ALL, nullptr);
+}
+
+void test_parse_align() {
+    auto parse_align_fn = _Parse_align<char, testing_callbacks<char>>;
+
+    {
+        setlocale(LC_ALL, ".932");
+        test_parse_helper(parse_align_fn, "\x93\xfa<X"sv, false, 3,
+            {.expected_alignment = _Align::_Left, .expected_fill = "\x93\xfa"sv});
+        test_parse_helper(parse_align_fn, "\x96\x7b>X"sv, false, 3,
+            {.expected_alignment = _Align::_Right, .expected_fill = "\x96\x7b"sv});
+        test_parse_helper(parse_align_fn, "\x92\x6e^X"sv, false, 3,
+            {.expected_alignment = _Align::_Center, .expected_fill = "\x92\x6e"sv});
+    }
+
+    setlocale(LC_ALL, nullptr);
+}
+
+int main() {
+    test_multibyte_format_strings();
+    test_parse_align();
+}
diff --git a/tests/std/tests/P0645R10_text_formatting_parsing/test.cpp b/tests/std/tests/P0645R10_text_formatting_parsing/test.cpp
index 9a7f62083d..966f04a1a8 100644
--- a/tests/std/tests/P0645R10_text_formatting_parsing/test.cpp
+++ b/tests/std/tests/P0645R10_text_formatting_parsing/test.cpp
@@ -8,124 +8,9 @@
 #include <stdio.h>
 #include <string_view>
 
-using namespace std;
-
-// copied from the string_view tests
-template <typename CharT>
-struct choose_literal; // not defined
-
-template <>
-struct choose_literal<char> {
-    static constexpr const char* choose(const char* s, const wchar_t*) {
-        return s;
-    }
-};
-
-template <>
-struct choose_literal<wchar_t> {
-    static constexpr const wchar_t* choose(const char*, const wchar_t* s) {
-        return s;
-    }
-};
-
-#define TYPED_LITERAL(CharT, Literal) (choose_literal<CharT>::choose(Literal, L##Literal))
+#include "test_format_support.hpp"
 
-template <typename CharT>
-struct noop_testing_callbacks {
-    constexpr void _On_align(_Align) {}
-    constexpr void _On_fill(basic_string_view<CharT>) {}
-    constexpr void _On_width(unsigned int) {}
-    constexpr void _On_dynamic_width(size_t) {}
-    constexpr void _On_dynamic_width(_Auto_id_tag) {}
-    constexpr void _On_precision(unsigned int) {}
-    constexpr void _On_dynamic_precision(size_t) {}
-    constexpr void _On_dynamic_precision(_Auto_id_tag) {}
-    constexpr void _On_sign(_Sign) {}
-    constexpr void _On_hash() {}
-    constexpr void _On_zero() {}
-    constexpr void _On_localized() {}
-    constexpr void _On_type(CharT) {}
-};
-
-template <typename CharT>
-struct testing_callbacks {
-    _Align expected_alignment = _Align::_None;
-    _Sign expected_sign       = _Sign::_None;
-    basic_string_view<CharT> expected_fill;
-    int expected_width                   = -1;
-    size_t expected_dynamic_width        = static_cast<size_t>(-1);
-    bool expected_auto_dynamic_width     = false;
-    int expected_precision               = -1;
-    size_t expected_dynamic_precision    = static_cast<size_t>(-1);
-    bool expected_auto_dynamic_precision = false;
-    bool expected_hash                   = false;
-    bool expected_zero                   = false;
-    bool expected_localized              = false;
-    CharT expected_type                  = '\0';
-
-    constexpr void _On_align(_Align aln) {
-        assert(aln == expected_alignment);
-    }
-    constexpr void _On_fill(basic_string_view<CharT> str_view) {
-        assert(str_view == expected_fill);
-    }
-    constexpr void _On_width(int width) {
-        assert(width == expected_width);
-    }
-    constexpr void _On_dynamic_width(size_t id) {
-        assert(id == expected_dynamic_width);
-    }
-    constexpr void _On_dynamic_width(_Auto_id_tag) {
-        assert(expected_auto_dynamic_width);
-    }
-    constexpr void _On_precision(int pre) {
-        assert(pre == expected_precision);
-    }
-    constexpr void _On_dynamic_precision(size_t id) {
-        assert(id == expected_dynamic_precision);
-    }
-    constexpr void _On_dynamic_precision(_Auto_id_tag) {
-        assert(expected_auto_dynamic_precision);
-    }
-    constexpr void _On_sign(_Sign sgn) {
-        assert(sgn == expected_sign);
-    }
-    constexpr void _On_hash() {
-        assert(expected_hash);
-    }
-    constexpr void _On_zero() {
-        assert(expected_zero);
-    }
-    constexpr void _On_localized() {
-        assert(expected_localized);
-    }
-    constexpr void _On_type(CharT type) {
-        assert(type == expected_type);
-    }
-};
-template <typename CharT>
-testing_callbacks(_Align, basic_string_view<CharT>) -> testing_callbacks<CharT>;
-
-struct testing_arg_id_callbacks {
-    constexpr void _On_auto_id() {}
-    constexpr void _On_manual_id(size_t) {}
-};
-
-template <typename CharT, typename callback_type>
-void test_parse_helper(const CharT* (*func)(const CharT*, const CharT*, callback_type&&), basic_string_view<CharT> view,
-    bool err_expected                                                  = false,
-    typename basic_string_view<CharT>::size_type expected_end_position = basic_string_view<CharT>::npos,
-    callback_type&& callbacks                                          = {}) {
-    try {
-        auto end = func(view.data(), view.data() + view.size(), move(callbacks));
-        if (expected_end_position != basic_string_view<CharT>::npos) {
-            assert(end == view.data() + expected_end_position);
-        }
-        assert(!err_expected);
-    } catch (const format_error&) {
-        assert(err_expected);
-    }
-}
+using namespace std;
 
 template <typename CharT>
 bool test_parse_align() {
@@ -162,16 +47,6 @@ bool test_parse_align() {
         }
     } else {
         // test multibyte fill characters
-        {
-            setlocale(LC_ALL, ".932");
-            test_parse_helper(parse_align_fn, "\x93\xfa<X"sv, false, 3,
-                {.expected_alignment = _Align::_Left, .expected_fill = "\x93\xfa"sv});
-            test_parse_helper(parse_align_fn, "\x96\x7b>X"sv, false, 3,
-                {.expected_alignment = _Align::_Right, .expected_fill = "\x96\x7b"sv});
-            test_parse_helper(parse_align_fn, "\x92\x6e^X"sv, false, 3,
-                {.expected_alignment = _Align::_Center, .expected_fill = "\x92\x6e"sv});
-        }
-
 #ifndef MSVC_INTERNAL_TESTING // TRANSITION, Windows on Contest VMs understand ".UTF-8" codepage
         {
             setlocale(LC_ALL, ".UTF-8");
diff --git a/tests/std/tests/concepts_matrix.lst b/tests/std/tests/concepts_matrix.lst
index 5810e10f46..d35c3015ad 100644
--- a/tests/std/tests/concepts_matrix.lst
+++ b/tests/std/tests/concepts_matrix.lst
@@ -1,6 +1,8 @@
 # Copyright (c) Microsoft Corporation.
 # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
+# When updating this file, also update tests\P0645R10_text_formatting_legacy_text_encoding\env.lst to match
+
 RUNALL_INCLUDE .\prefix.lst
 RUNALL_CROSSLIST
 PM_CL="/w14640 /Zc:threadSafeInit- /EHsc /std:c++latest"

From 903ba79196cee08b8a69f7ec690a2b11e5bc01b1 Mon Sep 17 00:00:00 2001
From: statementreply <statementreply@gmail.com>
Date: Tue, 13 Apr 2021 00:58:52 +0800
Subject: [PATCH 4/8] Apply code review feedback

---
 stl/inc/format | 50 ++++++++++++++++++++++++++++----------------------
 1 file changed, 28 insertions(+), 22 deletions(-)

diff --git a/stl/inc/format b/stl/inc/format
index bf10804b62..f9dffd211d 100644
--- a/stl/inc/format
+++ b/stl/inc/format
@@ -417,7 +417,7 @@ _NODISCARD constexpr const _CharT* _Parse_arg_id(
     throw format_error("Invalid format string.");
 }
 
-inline constexpr bool _Execution_charset_is_utf8 = [] {
+_NODISCARD constexpr bool _Is_execution_charset_utf8() {
 #pragma warning(push)
 #pragma warning(disable : 4309) // 'initializing' : truncation of constant value
 #pragma warning(disable : 4566) // character represented by universal-character-name '\u4E00' cannot be represented in
@@ -429,9 +429,12 @@ inline constexpr bool _Execution_charset_is_utf8 = [] {
     constexpr char _Test_char[] = "\u4e00";
     return sizeof(_Test_char) == 4 && _Test_char[0] == '\xe4' && _Test_char[1] == '\xb8' && _Test_char[2] == '\x80';
 #pragma warning(pop)
-}();
+}
+
+inline constexpr bool _Is_execution_charset_utf8_v = _Is_execution_charset_utf8();
 
-_NODISCARD inline int _Utf8_code_units_in_next_character(const char* const _First, const char* const _Last) noexcept {
+_NODISCARD inline constexpr int _Utf8_code_units_in_next_character(
+    const char* const _First, const char* const _Last) noexcept {
     // Returns a count of the number of UTF-8 code units that compose the first encoded character in [_First, _Last),
     // or -1 if [_First, _Last) doesn't contain an entire encoded character or *_First is not a valid lead byte.
     const auto _Ch = static_cast<unsigned char>(*_First);
@@ -455,13 +458,30 @@ _NODISCARD inline int _Utf8_code_units_in_next_character(const char* const _Firs
     return (_Len >= 4) ? 4 : -1;
 }
 
+_NODISCARD inline int _Double_byte_encoding_code_units_in_next_character(
+    const char* const _First, const char* const _Last, const _Cvtvec& _Cvt) {
+    // Returns a count of the number of code units that compose the first encoded character in [_First, _Last),
+    // or -1 if [_First, _Last) doesn't contain an entire encoded character or *_First is not a valid lead byte.
+    wchar_t _Wide;
+    mbstate_t _St{};
+    const auto _Len   = static_cast<size_t>(_Last - _First);
+    const int _Result = _Mbrtowc(&_Wide, _First, _Len, &_St, &_Cvt);
+    if (_Result > 0) {
+        return _Result;
+    } else if (_Result < 0) { // invalid or incomplete encoded character
+        return -1;
+    } else { // next code unit is '\0'
+        return 1;
+    }
+}
+
 _NODISCARD inline int _Code_units_in_next_character(const char* _First, const char* _Last, const _Cvtvec& _Cvt) {
     // Returns a count of the number of code units that compose the first encoded character in
     // [_First, _Last), or -1 if [_First, _Last) doesn't contain an entire encoded character or
     // *_First is not a valid lead byte.
     _STL_INTERNAL_CHECK(_First < _Last);
 
-    if constexpr (_Execution_charset_is_utf8) {
+    if constexpr (_Is_execution_charset_utf8_v) {
         return _Utf8_code_units_in_next_character(_First, _Last);
     } else {
         switch (_Cvt._Mbcurmax) {
@@ -472,24 +492,10 @@ _NODISCARD inline int _Code_units_in_next_character(const char* _First, const ch
             return 1; // all characters have only one code unit
 
         case 2:
-            {
-                wchar_t _Wide;
-                mbstate_t _St{};
-                const auto _Len   = static_cast<size_t>(_Last - _First);
-                const int _Result = _Mbrtowc(&_Wide, _First, _Len, &_St, &_Cvt);
-                if (_Result > 0) {
-                    return _Result;
-                } else if (_Result < 0) { // invalid or incomplete encoded character
-                    return -1;
-                } else { // next code unit is '\0'
-                    return 1;
-                }
-            }
+            return _Double_byte_encoding_code_units_in_next_character(_First, _Last, _Cvt);
 
         case 4: // Assume UTF-8 (as does _Mbrtowc)
-            {
-                return _Utf8_code_units_in_next_character(_First, _Last);
-            }
+            return _Utf8_code_units_in_next_character(_First, _Last);
         }
     }
 }
@@ -775,7 +781,7 @@ template <class _CharT>
 const _CharT* _Find_encoded(const _CharT* _First, const _CharT* _Last, const _CharT _Val, const _Cvtvec& _Cvt) {
     // Returns the first occurrence of _Val as an encoded character (and not, for example, as a
     // continuation byte) in [_First, _Last).
-    if constexpr (_Execution_charset_is_utf8) {
+    if constexpr (_Is_execution_charset_utf8_v) {
         return _Find_unchecked(_First, _Last, _Val);
     } else {
         if (_Cvt._Mbcurmax == 1 || _Cvt._Mbcurmax == 4) {
@@ -2222,7 +2228,7 @@ _NODISCARD inline int _Estimate_utf8_character_width(const char* const _Ptr, con
 _NODISCARD inline int _Estimate_character_width(const char* _Ptr, const int _Units, const _Cvtvec& _Cvt) {
     // Return an estimate for the width of the character composed of _Units code units,
     // whose first code unit is denoted by _Ptr.
-    if constexpr (_Execution_charset_is_utf8) {
+    if constexpr (_Is_execution_charset_utf8_v) {
         return _Estimate_utf8_character_width(_Ptr, _Units);
     } else {
         if (_Cvt._Mbcurmax != 4) {

From 8f20290f327ce4c6f5453595ad7f50d15a76c042 Mon Sep 17 00:00:00 2001
From: statementreply <statementreply@gmail.com>
Date: Tue, 13 Apr 2021 01:06:36 +0800
Subject: [PATCH 5/8] constexpr function implies inline

---
 stl/inc/format | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stl/inc/format b/stl/inc/format
index f9dffd211d..8f576a9792 100644
--- a/stl/inc/format
+++ b/stl/inc/format
@@ -433,7 +433,7 @@ _NODISCARD constexpr bool _Is_execution_charset_utf8() {
 
 inline constexpr bool _Is_execution_charset_utf8_v = _Is_execution_charset_utf8();
 
-_NODISCARD inline constexpr int _Utf8_code_units_in_next_character(
+_NODISCARD constexpr int _Utf8_code_units_in_next_character(
     const char* const _First, const char* const _Last) noexcept {
     // Returns a count of the number of UTF-8 code units that compose the first encoded character in [_First, _Last),
     // or -1 if [_First, _Last) doesn't contain an entire encoded character or *_First is not a valid lead byte.

From b5ddb4d81380b39448c741ecc95709f745b3531e Mon Sep 17 00:00:00 2001
From: "Stephan T. Lavavej" <stl@nuwen.net>
Date: Mon, 12 Apr 2021 18:22:40 -0700
Subject: [PATCH 6/8] Code review feedback.

---
 tests/std/include/test_format_support.hpp                       | 1 +
 tests/std/test.lst                                              | 2 ++
 .../P0645R10_text_formatting_legacy_text_encoding/test.cpp      | 2 +-
 tests/std/tests/P0645R10_text_formatting_utf8/test.cpp          | 2 +-
 4 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/tests/std/include/test_format_support.hpp b/tests/std/include/test_format_support.hpp
index ca569f23d3..d3bf9b92ac 100644
--- a/tests/std/include/test_format_support.hpp
+++ b/tests/std/include/test_format_support.hpp
@@ -7,6 +7,7 @@
 #include <cstddef>
 #include <format>
 #include <string_view>
+#include <utility>
 
 // copied from the string_view tests
 template <typename CharT>
diff --git a/tests/std/test.lst b/tests/std/test.lst
index 6db71e0154..cba3615d38 100644
--- a/tests/std/test.lst
+++ b/tests/std/test.lst
@@ -263,8 +263,10 @@ tests\P0645R10_text_formatting_args
 tests\P0645R10_text_formatting_custom_formatting
 tests\P0645R10_text_formatting_death
 tests\P0645R10_text_formatting_formatting
+tests\P0645R10_text_formatting_legacy_text_encoding
 tests\P0645R10_text_formatting_parse_contexts
 tests\P0645R10_text_formatting_parsing
+tests\P0645R10_text_formatting_utf8
 tests\P0660R10_jthread_and_cv_any
 tests\P0660R10_stop_token
 tests\P0660R10_stop_token_death
diff --git a/tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/test.cpp b/tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/test.cpp
index a24c1a6e02..36a1c51130 100644
--- a/tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/test.cpp
+++ b/tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/test.cpp
@@ -2,8 +2,8 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 #include <cassert>
+#include <clocale>
 #include <format>
-#include <locale>
 #include <string_view>
 
 #include "test_format_support.hpp"
diff --git a/tests/std/tests/P0645R10_text_formatting_utf8/test.cpp b/tests/std/tests/P0645R10_text_formatting_utf8/test.cpp
index ef45b409eb..c4cc5c6ebe 100644
--- a/tests/std/tests/P0645R10_text_formatting_utf8/test.cpp
+++ b/tests/std/tests/P0645R10_text_formatting_utf8/test.cpp
@@ -2,8 +2,8 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
 #include <cassert>
+#include <clocale>
 #include <format>
-#include <locale>
 #include <string_view>
 
 #include "test_format_support.hpp"

From 33a901fcb25ad6b38b5f7013bfef938ddb1dcd5f Mon Sep 17 00:00:00 2001
From: "Stephan T. Lavavej" <stl@nuwen.net>
Date: Mon, 12 Apr 2021 18:24:21 -0700
Subject: [PATCH 7/8] Mitigate merge conflicts: Use _THROW.

---
 stl/inc/format | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/stl/inc/format b/stl/inc/format
index 8f576a9792..8b4eda68de 100644
--- a/stl/inc/format
+++ b/stl/inc/format
@@ -792,7 +792,7 @@ const _CharT* _Find_encoded(const _CharT* _First, const _CharT* _Last, const _Ch
         while (_First != _Last && *_First != _Val) {
             const int _Units = _Code_units_in_next_character(_First, _Last, _Cvt);
             if (_Units < 0) {
-                throw format_error("Invalid encoded character in format string.");
+                _THROW(format_error("Invalid encoded character in format string."));
             }
             _First += _Units;
         }

From 926444b88606fcb9ab25a5cdc095bf683a7f755a Mon Sep 17 00:00:00 2001
From: "Stephan T. Lavavej" <stl@nuwen.net>
Date: Mon, 12 Apr 2021 18:26:46 -0700
Subject: [PATCH 8/8] Mitigate merge conflicts: Check setlocale(), reset to
 "C".

---
 .../test.cpp                                              | 8 ++++----
 tests/std/tests/P0645R10_text_formatting_utf8/test.cpp    | 6 +++---
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/test.cpp b/tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/test.cpp
index 36a1c51130..af57f610fc 100644
--- a/tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/test.cpp
+++ b/tests/std/tests/P0645R10_text_formatting_legacy_text_encoding/test.cpp
@@ -12,7 +12,7 @@ using namespace std;
 
 void test_multibyte_format_strings() {
     {
-        setlocale(LC_ALL, ".932");
+        assert(setlocale(LC_ALL, ".932") != nullptr);
         const auto s =
             "\x93\xfa\x96{\x92\x6e\x90}"sv; // Note the use of `{` and `}` as continuation bytes (from GH-1576)
         assert(format(s) == s);
@@ -40,14 +40,14 @@ void test_multibyte_format_strings() {
         assert(format("{:\x90}>4.3}", s) == "\x90}\x90}\x93\xfa"sv);
     }
 
-    setlocale(LC_ALL, nullptr);
+    assert(setlocale(LC_ALL, "C") != nullptr);
 }
 
 void test_parse_align() {
     auto parse_align_fn = _Parse_align<char, testing_callbacks<char>>;
 
     {
-        setlocale(LC_ALL, ".932");
+        assert(setlocale(LC_ALL, ".932") != nullptr);
         test_parse_helper(parse_align_fn, "\x93\xfa<X"sv, false, 3,
             {.expected_alignment = _Align::_Left, .expected_fill = "\x93\xfa"sv});
         test_parse_helper(parse_align_fn, "\x96\x7b>X"sv, false, 3,
@@ -56,7 +56,7 @@ void test_parse_align() {
             {.expected_alignment = _Align::_Center, .expected_fill = "\x92\x6e"sv});
     }
 
-    setlocale(LC_ALL, nullptr);
+    assert(setlocale(LC_ALL, "C") != nullptr);
 }
 
 int main() {
diff --git a/tests/std/tests/P0645R10_text_formatting_utf8/test.cpp b/tests/std/tests/P0645R10_text_formatting_utf8/test.cpp
index c4cc5c6ebe..0cf1ca3d9c 100644
--- a/tests/std/tests/P0645R10_text_formatting_utf8/test.cpp
+++ b/tests/std/tests/P0645R10_text_formatting_utf8/test.cpp
@@ -51,14 +51,14 @@ void run_tests() {
 int main() {
     run_tests();
 
-    setlocale(LC_ALL, ".1252");
+    assert(setlocale(LC_ALL, ".1252") != nullptr);
     run_tests();
 
-    setlocale(LC_ALL, ".932");
+    assert(setlocale(LC_ALL, ".932") != nullptr);
     run_tests();
 
 #ifndef MSVC_INTERNAL_TESTING // TRANSITION, Windows on Contest VMs understand ".UTF-8" codepage
-    setlocale(LC_ALL, ".UTF-8");
+    assert(setlocale(LC_ALL, ".UTF-8") != nullptr);
     run_tests();
 #endif
 }