From bb5ab1ffe719f5e801ef08ac08be975546aa3266 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung@gmail.com>
Date: Mon, 24 Jun 2024 15:51:24 -0700
Subject: [PATCH 01/30] =?UTF-8?q?[=F0=9D=98=80=F0=9D=97=BD=F0=9D=97=BF]=20?=
 =?UTF-8?q?initial=20version?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Created using spr 1.3.4
---
 bolt/docs/CommandLineArgumentReference.md     |  4 ++
 bolt/lib/Profile/YAMLProfileReader.cpp        | 68 ++++++++++++++++---
 bolt/lib/Rewrite/RewriteInstance.cpp          |  7 +-
 bolt/lib/Utils/CommandLineOpts.cpp            |  8 +++
 .../X86/hashing-based-function-matching.test  | 64 +++++++++++++++++
 5 files changed, 138 insertions(+), 13 deletions(-)
 create mode 100644 bolt/test/X86/hashing-based-function-matching.test
diff --git a/bolt/docs/CommandLineArgumentReference.md b/bolt/docs/CommandLineArgumentReference.md
index d95f30a299a285..00d472c5789168 100644
--- a/bolt/docs/CommandLineArgumentReference.md
+++ b/bolt/docs/CommandLineArgumentReference.md
@@ -259,6 +259,10 @@
 
   Always use long jumps/nops for Linux kernel static keys
 
+- `--match-profile-with-function-hash`
+
+  Match profile with function hash
+
 - `--max-data-relocations=<uint>`
 
   Maximum number of data relocations to process
diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index f25f59201f1cd9..6c4eece4ddb1bd 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -22,6 +22,8 @@ namespace opts {
 extern cl::opt<unsigned> Verbosity;
 extern cl::OptionCategory BoltOptCategory;
 extern cl::opt<bool> InferStaleProfile;
+extern cl::opt<bool> MatchProfileWithFunctionHash;
+extern cl::opt<bool> Lite;
 
 static llvm::cl::opt<bool>
     IgnoreHash("profile-ignore-hash",
@@ -363,9 +365,19 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
     return Profile.Hash == static_cast<uint64_t>(BF.getHash());
   };
 
-  // We have to do 2 passes since LTO introduces an ambiguity in function
-  // names. The first pass assigns profiles that match 100% by name and
-  // by hash. The second pass allows name ambiguity for LTO private functions.
+  uint64_t MatchedWithExactName = 0;
+  uint64_t MatchedWithHash = 0;
+  uint64_t MatchedWithLTOCommonName = 0;
+
+  // Computes hash for binary functions.
+  if (opts::MatchProfileWithFunctionHash)
+    for (auto &[_, BF] : BC.getBinaryFunctions())
+      BF.computeHash(YamlBP.Header.IsDFSOrder, YamlBP.Header.HashFunction);
+  else if (!opts::IgnoreHash)
+    for (BinaryFunction *BF : ProfileBFs)
+      BF->computeHash(YamlBP.Header.IsDFSOrder, YamlBP.Header.HashFunction);
+
+  // This first pass assigns profiles that match 100% by name and by hash.
   for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs)) {
     if (!BF)
       continue;
@@ -374,15 +386,34 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
     // the profile.
     Function.setExecutionCount(BinaryFunction::COUNT_NO_PROFILE);
 
-    // Recompute hash once per function.
-    if (!opts::IgnoreHash)
-      Function.computeHash(YamlBP.Header.IsDFSOrder,
-                           YamlBP.Header.HashFunction);
-
-    if (profileMatches(YamlBF, Function))
+    if (profileMatches(YamlBF, Function)) {
       matchProfileToFunction(YamlBF, Function);
+      ++MatchedWithExactName;
+    }
   }
 
+  // Uses the strict hash of profiled and binary functions to match functions
+  // that are not matched by name or common name.
+  if (opts::MatchProfileWithFunctionHash) {
+    std::unordered_map<size_t, BinaryFunction *> StrictHashToBF;
+    StrictHashToBF.reserve(BC.getBinaryFunctions().size());
+
+    for (auto &[_, BF] : BC.getBinaryFunctions())
+      StrictHashToBF[BF.getHash()] = &BF;
+
+    for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) {
+      if (YamlBF.Used)
+        continue;
+      auto It = StrictHashToBF.find(YamlBF.Hash);
+      if (It != StrictHashToBF.end() && !ProfiledFunctions.count(It->second)) {
+        BinaryFunction *BF = It->second;
+        matchProfileToFunction(YamlBF, *BF);
+        ++MatchedWithHash;
+      }
+    }
+  }
+
+  // This second pass allows name ambiguity for LTO private functions.
   for (const auto &[CommonName, LTOProfiles] : LTOCommonNameMap) {
     if (!LTOCommonNameFunctionMap.contains(CommonName))
       continue;
@@ -396,6 +427,7 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
       for (BinaryFunction *BF : Functions) {
         if (!ProfiledFunctions.count(BF) && profileMatches(*YamlBF, *BF)) {
           matchProfileToFunction(*YamlBF, *BF);
+          ++MatchedWithLTOCommonName;
           return true;
         }
       }
@@ -407,8 +439,10 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
     // partially.
     if (!ProfileMatched && LTOProfiles.size() == 1 && Functions.size() == 1 &&
         !LTOProfiles.front()->Used &&
-        !ProfiledFunctions.count(*Functions.begin()))
+        !ProfiledFunctions.count(*Functions.begin())) {
       matchProfileToFunction(*LTOProfiles.front(), **Functions.begin());
+      ++MatchedWithLTOCommonName;
+    }
   }
 
   for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs))
@@ -420,6 +454,15 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
       errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name
              << '\n';
 
+  if (opts::Verbosity >= 2) {
+    outs() << "BOLT-INFO: matched " << MatchedWithExactName
+           << " functions with identical names\n";
+    outs() << "BOLT-INFO: matched " << MatchedWithHash
+           << " functions with hash\n";
+    outs() << "BOLT-INFO: matched " << MatchedWithLTOCommonName
+           << " functions with matching LTO common names\n";
+  }
+
   // Set for parseFunctionProfile().
   NormalizeByInsnCount = usesEvent("cycles") || usesEvent("instructions");
   NormalizeByCalls = usesEvent("branches");
@@ -439,6 +482,11 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
 
   BC.setNumUnusedProfiledObjects(NumUnused);
 
+  if (opts::Lite)
+    for (BinaryFunction *BF : BC.getAllBinaryFunctions())
+      if (!BF->hasProfile())
+        BF->setIgnored();
+
   return Error::success();
 }
 
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index 1a3a8af21d81b6..c0873a58a8a6e6 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -82,6 +82,7 @@ extern cl::opt<bool> Hugify;
 extern cl::opt<bool> Instrument;
 extern cl::opt<JumpTableSupportLevel> JumpTables;
 extern cl::opt<bool> KeepNops;
+extern cl::opt<bool> MatchProfileWithFunctionHash;
 extern cl::list<std::string> ReorderData;
 extern cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions;
 extern cl::opt<bool> TerminalTrap;
@@ -140,9 +141,6 @@ KeepTmp("keep-tmp",
   cl::Hidden,
   cl::cat(BoltCategory));
 
-cl::opt<bool> Lite("lite", cl::desc("skip processing of cold functions"),
-                   cl::cat(BoltCategory));
-
 static cl::opt<unsigned>
 LiteThresholdPct("lite-threshold-pct",
   cl::desc("threshold (in percent) for selecting functions to process in lite "
@@ -2982,6 +2980,9 @@ void RewriteInstance::selectFunctionsToProcess() {
     if (mustSkip(Function))
       return false;
 
+    if (opts::MatchProfileWithFunctionHash)
+      return true;
+
     // If the list is not empty, only process functions from the list.
     if (!opts::ForceFunctionNames.empty() || !ForceFunctionsNR.empty()) {
       // Regex check (-funcs and -funcs-file options).
diff --git a/bolt/lib/Utils/CommandLineOpts.cpp b/bolt/lib/Utils/CommandLineOpts.cpp
index 41c89bc8aeba4e..41de30f3f566b1 100644
--- a/bolt/lib/Utils/CommandLineOpts.cpp
+++ b/bolt/lib/Utils/CommandLineOpts.cpp
@@ -128,6 +128,14 @@ cl::opt<bool>
                cl::desc("instrument code to generate accurate profile data"),
                cl::cat(BoltOptCategory));
 
+cl::opt<bool> Lite("lite", cl::desc("skip processing of cold functions"),
+                   cl::cat(BoltCategory));
+
+cl::opt<bool>
+    MatchProfileWithFunctionHash("match-profile-with-function-hash",
+                                 cl::desc("Match profile with function hash"),
+                                 cl::Hidden, cl::cat(BoltCategory));
+
 cl::opt<std::string>
 OutputFilename("o",
   cl::desc("<output file>"),
diff --git a/bolt/test/X86/hashing-based-function-matching.test b/bolt/test/X86/hashing-based-function-matching.test
new file mode 100644
index 00000000000000..4426da085bbd9c
--- /dev/null
+++ b/bolt/test/X86/hashing-based-function-matching.test
@@ -0,0 +1,64 @@
+## Tests function matching in YAMLProfileReader by function hash.
+
+# REQUIRES: system-linux
+# RUN: split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/main.s -o %t.o
+# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib
+# RUN: llvm-bolt %t.exe -o %t.out --data %t/yaml -v=2 \
+# RUN:   --print-cfg --match-profile-with-function-hash 2>&1 --profile-ignore-hash=0 | FileCheck %s
+
+# CHECK: BOLT-INFO: matched 1 functions with hash
+
+#--- main.s
+.globl main
+.type	main, @function
+main:
+  .cfi_startproc
+.LBB00:
+  pushq   %rbp
+  movq    %rsp, %rbp
+  subq    $16, %rsp
+  testq   %rax, %rax
+  js      .LBB03
+.LBB01:
+  jne     .LBB04
+.LBB02:
+  nop
+.LBB03:
+  xorl    %eax, %eax
+  addq    $16, %rsp
+  popq    %rbp
+  retq
+.LBB04:
+  xorl    %eax, %eax
+  addq    $16, %rsp
+  popq    %rbp
+  retq
+## For relocations against .text
+.LBB05:
+  call exit
+  .cfi_endproc
+  .size	main, .-main
+
+#--- yaml
+---
+header:
+  profile-version: 1
+  binary-name:     'hashing-based-function-matching.s.tmp.exe'
+  binary-build-id: '<unknown>'
+  profile-flags:   [ lbr ]
+  profile-origin:  branch profile reader
+  profile-events:  ''
+  dfs-order:       false
+  hash-func:       xxh3
+functions:
+  - name:            main2
+    fid:             0
+    hash:            0x72F82DEAA6FE65FB
+    exec:            1
+    nblocks:         6
+    blocks:
+      - bid:             1
+        insns:           1
+        succ:            [ { bid: 3, cnt: 1} ]
+...

From 020f69cd10a2ff1233cc28088989319e5a58b116 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung@gmail.com>
Date: Tue, 25 Jun 2024 07:51:35 -0700
Subject: [PATCH 02/30] Added opts::Lite to RewriteInstance

---
 bolt/lib/Rewrite/RewriteInstance.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index c0873a58a8a6e6..ee6ac1c7d57b51 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -82,6 +82,7 @@ extern cl::opt<bool> Hugify;
 extern cl::opt<bool> Instrument;
 extern cl::opt<JumpTableSupportLevel> JumpTables;
 extern cl::opt<bool> KeepNops;
+extern cl::opt<bool> Lite;
 extern cl::opt<bool> MatchProfileWithFunctionHash;
 extern cl::list<std::string> ReorderData;
 extern cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions;

From c097e643efc2a2ee5170fbcdb5e8c644a7207452 Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung@gmail.com>
Date: Tue, 25 Jun 2024 08:07:45 -0700
Subject: [PATCH 03/30] Revert "Added opts::Lite to RewriteInstance"

This reverts commit 020f69cd10a2ff1233cc28088989319e5a58b116.
---
 bolt/lib/Rewrite/RewriteInstance.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index ee6ac1c7d57b51..c0873a58a8a6e6 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -82,7 +82,6 @@ extern cl::opt<bool> Hugify;
 extern cl::opt<bool> Instrument;
 extern cl::opt<JumpTableSupportLevel> JumpTables;
 extern cl::opt<bool> KeepNops;
-extern cl::opt<bool> Lite;
 extern cl::opt<bool> MatchProfileWithFunctionHash;
 extern cl::list<std::string> ReorderData;
 extern cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions;

From 0e11a7e7179a2eb066e5f63f25b9277aa63f38eb Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Tue, 25 Jun 2024 17:09:06 +0200
Subject: [PATCH 04/30] [EarlyCSE] Add test with noundef load of undef (NFC)

---
 llvm/test/Transforms/EarlyCSE/flags.ll | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/llvm/test/Transforms/EarlyCSE/flags.ll b/llvm/test/Transforms/EarlyCSE/flags.ll
index 78282b081c298e..78b3818b211dac 100644
--- a/llvm/test/Transforms/EarlyCSE/flags.ll
+++ b/llvm/test/Transforms/EarlyCSE/flags.ll
@@ -51,7 +51,7 @@ define void @test_inbounds_program_not_ub_if_first_gep_poison(ptr %ptr, i64 %n)
 
 define void @load_both_nonnull(ptr %p) {
 ; CHECK-LABEL: @load_both_nonnull(
-; CHECK-NEXT:    [[V1:%.*]] = load ptr, ptr [[P:%.*]], align 8, !nonnull !0
+; CHECK-NEXT:    [[V1:%.*]] = load ptr, ptr [[P:%.*]], align 8, !nonnull [[META0:![0-9]+]]
 ; CHECK-NEXT:    call void @use.ptr(ptr [[V1]])
 ; CHECK-NEXT:    call void @use.ptr(ptr [[V1]])
 ; CHECK-NEXT:    ret void
@@ -79,7 +79,7 @@ define void @load_first_nonnull(ptr %p) {
 
 define void @load_first_nonnull_noundef(ptr %p) {
 ; CHECK-LABEL: @load_first_nonnull_noundef(
-; CHECK-NEXT:    [[V1:%.*]] = load ptr, ptr [[P:%.*]], align 8, !nonnull !0, !noundef !0
+; CHECK-NEXT:    [[V1:%.*]] = load ptr, ptr [[P:%.*]], align 8, !nonnull [[META0]], !noundef [[META0]]
 ; CHECK-NEXT:    call void @use.ptr(ptr [[V1]])
 ; CHECK-NEXT:    call void @use.ptr(ptr [[V1]])
 ; CHECK-NEXT:    ret void
@@ -93,7 +93,7 @@ define void @load_first_nonnull_noundef(ptr %p) {
 
 define ptr @store_to_load_forward(ptr %p, ptr %p2) {
 ; CHECK-LABEL: @store_to_load_forward(
-; CHECK-NEXT:    [[P3:%.*]] = load ptr, ptr [[P:%.*]], align 8, !nonnull !0
+; CHECK-NEXT:    [[P3:%.*]] = load ptr, ptr [[P:%.*]], align 8, !nonnull [[META0]]
 ; CHECK-NEXT:    store ptr [[P3]], ptr [[P2:%.*]], align 8
 ; CHECK-NEXT:    ret ptr [[P3]]
 ;
@@ -102,3 +102,13 @@ define ptr @store_to_load_forward(ptr %p, ptr %p2) {
   %res = load ptr, ptr %p2
   ret ptr %res
 }
+
+define i32 @load_undef_noundef(ptr %p) {
+; CHECK-LABEL: @load_undef_noundef(
+; CHECK-NEXT:    store i32 undef, ptr [[P:%.*]], align 4
+; CHECK-NEXT:    ret i32 undef
+;
+  store i32 undef, ptr %p
+  %v = load i32, ptr %p, !noundef !{}
+  ret i32 %v
+}

From 79e8a5952366eacd92201a8d6472726fc14e00fd Mon Sep 17 00:00:00 2001
From: Hui <hui.xie1990@gmail.com>
Date: Tue, 25 Jun 2024 16:13:48 +0100
Subject: [PATCH 05/30] [libc++] Move allocator assertion into allocator_traits
 (#94750)

There is code duplication in all containers that static_assert the
allocator matches the allocator requirements in the spec. This check can
be moved into a more centralised place.
---
 libcxx/include/__memory/allocator_traits.h | 9 +++++++++
 libcxx/include/deque                       | 9 +++------
 libcxx/include/forward_list                | 6 ++----
 libcxx/include/list                        | 5 +----
 libcxx/include/map                         | 9 ++-------
 libcxx/include/set                         | 8 ++------
 libcxx/include/string                      | 5 +----
 libcxx/include/unordered_map               | 9 ++-------
 libcxx/include/unordered_set               | 5 +----
 libcxx/include/vector                      | 5 +----
 10 files changed, 24 insertions(+), 46 deletions(-)

diff --git a/libcxx/include/__memory/allocator_traits.h b/libcxx/include/__memory/allocator_traits.h
index 47fe132d15cb1f..ac564f0e6fa0cc 100644
--- a/libcxx/include/__memory/allocator_traits.h
+++ b/libcxx/include/__memory/allocator_traits.h
@@ -16,6 +16,7 @@
 #include <__type_traits/enable_if.h>
 #include <__type_traits/is_constructible.h>
 #include <__type_traits/is_empty.h>
+#include <__type_traits/is_same.h>
 #include <__type_traits/make_unsigned.h>
 #include <__type_traits/remove_reference.h>
 #include <__type_traits/void_t.h>
@@ -372,6 +373,14 @@ template <class _Traits, class _Tp>
 using __rebind_alloc = typename _Traits::template rebind_alloc<_Tp>::other;
 #endif
 
+template <class _Alloc>
+struct __check_valid_allocator : true_type {
+  using _Traits = std::allocator_traits<_Alloc>;
+  static_assert(is_same<_Alloc, __rebind_alloc<_Traits, typename _Traits::value_type> >::value,
+                "[allocator.requirements] states that rebinding an allocator to the same type should result in the "
+                "original allocator");
+};
+
 // __is_default_allocator
 template <class _Tp>
 struct __is_default_allocator : false_type {};
diff --git a/libcxx/include/deque b/libcxx/include/deque
index 43ed8c46866ecb..aee4764859dd20 100644
--- a/libcxx/include/deque
+++ b/libcxx/include/deque
@@ -449,11 +449,11 @@ public:
 
   using value_type = _Tp;
 
-  static_assert(is_same<typename _Allocator::value_type, value_type>::value,
-                "Allocator::value_type must be same type as value_type");
-
   using allocator_type = _Allocator;
   using __alloc_traits = allocator_traits<allocator_type>;
+  static_assert(__check_valid_allocator<allocator_type>::value, "");
+  static_assert(is_same<typename allocator_type::value_type, value_type>::value,
+                "Allocator::value_type must be same type as value_type");
 
   using size_type       = typename __alloc_traits::size_type;
   using difference_type = typename __alloc_traits::difference_type;
@@ -488,9 +488,6 @@ public:
       deque,
       void>;
 
-  static_assert(is_same<allocator_type, __rebind_alloc<__alloc_traits, value_type> >::value,
-                "[allocator.requirements] states that rebinding an allocator to the same type should result in the "
-                "original allocator");
   static_assert(is_nothrow_default_constructible<allocator_type>::value ==
                     is_nothrow_default_constructible<__pointer_allocator>::value,
                 "rebinding an allocator should not change exception guarantees");
diff --git a/libcxx/include/forward_list b/libcxx/include/forward_list
index e59c210654caa9..3731d3f6cf6d16 100644
--- a/libcxx/include/forward_list
+++ b/libcxx/include/forward_list
@@ -648,13 +648,11 @@ public:
   typedef _Tp value_type;
   typedef _Alloc allocator_type;
 
+  static_assert(__check_valid_allocator<allocator_type>::value, "");
+
   static_assert(is_same<value_type, typename allocator_type::value_type>::value,
                 "Allocator::value_type must be same type as value_type");
 
-  static_assert(is_same<allocator_type, __rebind_alloc<allocator_traits<allocator_type>, value_type> >::value,
-                "[allocator.requirements] states that rebinding an allocator to the same type should result in the "
-                "original allocator");
-
   static_assert(!is_same<allocator_type, __node_allocator>::value,
                 "internal allocator type must differ from user-specified type; otherwise overload resolution breaks");
 
diff --git a/libcxx/include/list b/libcxx/include/list
index 57e5c05da9f06b..1678559a841dde 100644
--- a/libcxx/include/list
+++ b/libcxx/include/list
@@ -673,6 +673,7 @@ class _LIBCPP_TEMPLATE_VIS list : private __list_imp<_Tp, _Alloc> {
 public:
   typedef _Tp value_type;
   typedef _Alloc allocator_type;
+  static_assert(__check_valid_allocator<allocator_type>::value);
   static_assert(is_same<value_type, typename allocator_type::value_type>::value,
                 "Allocator::value_type must be same type as value_type");
   typedef value_type& reference;
@@ -691,10 +692,6 @@ public:
   typedef void __remove_return_type;
 #endif
 
-  static_assert(is_same<allocator_type, __rebind_alloc<allocator_traits<allocator_type>, value_type> >::value,
-                "[allocator.requirements] states that rebinding an allocator to the same type should result in the "
-                "original allocator");
-
   _LIBCPP_HIDE_FROM_ABI list() _NOEXCEPT_(is_nothrow_default_constructible<__node_allocator>::value) {}
   _LIBCPP_HIDE_FROM_ABI explicit list(const allocator_type& __a) : base(__a) {}
   _LIBCPP_HIDE_FROM_ABI explicit list(size_type __n);
diff --git a/libcxx/include/map b/libcxx/include/map
index c44e75a1eb7e58..4b2f3fc71cbfea 100644
--- a/libcxx/include/map
+++ b/libcxx/include/map
@@ -997,9 +997,7 @@ private:
   typedef typename __base::__node_traits __node_traits;
   typedef allocator_traits<allocator_type> __alloc_traits;
 
-  static_assert(is_same<allocator_type, __rebind_alloc<__alloc_traits, value_type> >::value,
-                "[allocator.requirements] states that rebinding an allocator to the same type should result in the "
-                "original allocator");
+  static_assert(__check_valid_allocator<allocator_type>::value, "");
 
   __base __tree_;
 
@@ -1656,6 +1654,7 @@ public:
   typedef value_type& reference;
   typedef const value_type& const_reference;
 
+  static_assert(__check_valid_allocator<allocator_type>::value, "");
   static_assert(is_same<typename allocator_type::value_type, value_type>::value,
                 "Allocator::value_type must be same type as value_type");
 
@@ -1681,10 +1680,6 @@ private:
   typedef typename __base::__node_traits __node_traits;
   typedef allocator_traits<allocator_type> __alloc_traits;
 
-  static_assert(is_same<allocator_type, __rebind_alloc<__alloc_traits, value_type> >::value,
-                "[allocator.requirements] states that rebinding an allocator to the same type should result in the "
-                "original allocator");
-
   __base __tree_;
 
 public:
diff --git a/libcxx/include/set b/libcxx/include/set
index c74d5d8d4cf027..9a2eb12d0a25ab 100644
--- a/libcxx/include/set
+++ b/libcxx/include/set
@@ -578,9 +578,7 @@ private:
   typedef __tree<value_type, value_compare, allocator_type> __base;
   typedef allocator_traits<allocator_type> __alloc_traits;
 
-  static_assert(is_same<allocator_type, __rebind_alloc<__alloc_traits, value_type> >::value,
-                "[allocator.requirements] states that rebinding an allocator to the same type should result in the "
-                "original allocator");
+  static_assert(__check_valid_allocator<allocator_type>::value, "");
 
   __base __tree_;
 
@@ -1035,9 +1033,7 @@ private:
   typedef __tree<value_type, value_compare, allocator_type> __base;
   typedef allocator_traits<allocator_type> __alloc_traits;
 
-  static_assert(is_same<allocator_type, __rebind_alloc<__alloc_traits, value_type> >::value,
-                "[allocator.requirements] states that rebinding an allocator to the same type should result in the "
-                "original allocator");
+  static_assert(__check_valid_allocator<allocator_type>::value, "");
 
   __base __tree_;
 
diff --git a/libcxx/include/string b/libcxx/include/string
index 54d24c88a9c3fa..9a52ab6aef41e8 100644
--- a/libcxx/include/string
+++ b/libcxx/include/string
@@ -780,10 +780,7 @@ public:
                 "traits_type::char_type must be the same type as CharT");
   static_assert(is_same<typename allocator_type::value_type, value_type>::value,
                 "Allocator::value_type must be same type as value_type");
-
-  static_assert(is_same<allocator_type, __rebind_alloc<__alloc_traits, value_type> >::value,
-                "[allocator.requirements] states that rebinding an allocator to the same type should result in the "
-                "original allocator");
+  static_assert(__check_valid_allocator<allocator_type>::value, "");
 
   // TODO: Implement iterator bounds checking without requiring the global database.
   typedef __wrap_iter<pointer> iterator;
diff --git a/libcxx/include/unordered_map b/libcxx/include/unordered_map
index a243689b9dc868..7c31c4fce26b03 100644
--- a/libcxx/include/unordered_map
+++ b/libcxx/include/unordered_map
@@ -1057,9 +1057,7 @@ private:
   typedef unique_ptr<__node, _Dp> __node_holder;
   typedef allocator_traits<allocator_type> __alloc_traits;
 
-  static_assert(is_same<allocator_type, __rebind_alloc<__alloc_traits, value_type> >::value,
-                "[allocator.requirements] states that rebinding an allocator to the same type should result in the "
-                "original allocator");
+  static_assert(__check_valid_allocator<allocator_type>::value, "");
 
   static_assert(is_same<typename __table::__container_value_type, value_type>::value, "");
   static_assert(is_same<typename __table::__node_value_type, __value_type>::value, "");
@@ -1839,6 +1837,7 @@ public:
   typedef pair<const key_type, mapped_type> value_type;
   typedef value_type& reference;
   typedef const value_type& const_reference;
+  static_assert(__check_valid_allocator<allocator_type>::value, "");
   static_assert(is_same<value_type, typename allocator_type::value_type>::value,
                 "Allocator::value_type must be same type as value_type");
 
@@ -1862,10 +1861,6 @@ private:
   static_assert(is_same<typename __node_traits::size_type, typename __alloc_traits::size_type>::value,
                 "Allocator uses different size_type for different types");
 
-  static_assert(is_same<allocator_type, __rebind_alloc<__alloc_traits, value_type> >::value,
-                "[allocator.requirements] states that rebinding an allocator to the same type should result in the "
-                "original allocator");
-
 public:
   typedef typename __alloc_traits::pointer pointer;
   typedef typename __alloc_traits::const_pointer const_pointer;
diff --git a/libcxx/include/unordered_set b/libcxx/include/unordered_set
index 5813e38a41f0d9..3297294a893f82 100644
--- a/libcxx/include/unordered_set
+++ b/libcxx/include/unordered_set
@@ -588,13 +588,10 @@ public:
   typedef __type_identity_t<_Alloc> allocator_type;
   typedef value_type& reference;
   typedef const value_type& const_reference;
+  static_assert(__check_valid_allocator<allocator_type>::value, "");
   static_assert(is_same<value_type, typename allocator_type::value_type>::value,
                 "Allocator::value_type must be same type as value_type");
 
-  static_assert(is_same<allocator_type, __rebind_alloc<allocator_traits<allocator_type>, value_type> >::value,
-                "[allocator.requirements] states that rebinding an allocator to the same type should result in the "
-                "original allocator");
-
 private:
   typedef __hash_table<value_type, hasher, key_equal, allocator_type> __table;
 
diff --git a/libcxx/include/vector b/libcxx/include/vector
index 014971b4a680eb..299ad8c9b23f28 100644
--- a/libcxx/include/vector
+++ b/libcxx/include/vector
@@ -415,13 +415,10 @@ public:
       vector,
       void>;
 
+  static_assert(__check_valid_allocator<allocator_type>::value, "");
   static_assert(is_same<typename allocator_type::value_type, value_type>::value,
                 "Allocator::value_type must be same type as value_type");
 
-  static_assert(is_same<allocator_type, __rebind_alloc<__alloc_traits, value_type> >::value,
-                "[allocator.requirements] states that rebinding an allocator to the same type should result in the "
-                "original allocator");
-
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI vector()
       _NOEXCEPT_(is_nothrow_default_constructible<allocator_type>::value) {}
   _LIBCPP_CONSTEXPR_SINCE_CXX20 _LIBCPP_HIDE_FROM_ABI explicit vector(const allocator_type& __a)

From 7f10ed637e53c68ce62b756a3be8546a3dccf751 Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka@google.com>
Date: Tue, 25 Jun 2024 08:17:10 -0700
Subject: [PATCH 06/30] [tsan] Fix dead lock when starting StackDepot thread
 (#96456)

Sometime tsan runtimes calls, like
`__tsan_mutex_create ()`, need to store a stack
in the StackDepot, and the Depot may need to start
and maintenance thread.

Example:
```
__sanitizer::FutexWait ()
__sanitizer::Semaphore::Wait ()
__sanitizer::Mutex::Lock ()
__tsan::SlotLock ()
__tsan::SlotLocker::SlotLocker ()
__tsan::Acquire ()
__tsan::CallUserSignalHandler ()
__tsan::ProcessPendingSignalsImpl ()
__tsan::ProcessPendingSignals ()
__tsan::ScopedInterceptor::~ScopedInterceptor ()
___interceptor_mmap ()
pthread_create ()
__sanitizer::internal_start_thread ()
__sanitizer::(anonymous namespace)::CompressThread::NewWorkNotify ()
__sanitizer::StackDepotNode::store ()
__sanitizer::StackDepotBase<__sanitizer::StackDepotNode, 1, 20>::Put ()
__tsan::CurrentStackId ()
__tsan::MutexCreate ()
__tsan_mutex_create ()
```

pthread_create() implementation may hit other
interceptors recursively, which may invoke
ProcessPendingSignals, which deadlocks.

Alternative solution could be block interceptors
closer to TSAN runtime API function, like
`__tsan_mutex_create`, or just before
`StackDepotPut``, but it's not needed for most
calls, only when new thread is created using
`real_pthread_create`.

I don't see a reasonable way to create a
regression test.
---
 .../lib/tsan/rtl/tsan_interceptors_posix.cpp        | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
index 94adea777cafd9..151693112158b4 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
@@ -1088,7 +1088,18 @@ TSAN_INTERCEPTOR(int, pthread_join, void *th, void **ret) {
   return res;
 }
 
-DEFINE_REAL_PTHREAD_FUNCTIONS
+// DEFINE_REAL_PTHREAD_FUNCTIONS
+namespace __sanitizer {
+int real_pthread_create(void *th, void *attr, void *(*callback)(void *),
+                        void *param) {
+  ScopedIgnoreInterceptors ignore;
+  return REAL(pthread_create)(th, attr, callback, param);
+}
+int real_pthread_join(void *th, void **ret) {
+  ScopedIgnoreInterceptors ignore;
+  return REAL(pthread_join(th, ret));
+}
+}  // namespace __sanitizer
 
 TSAN_INTERCEPTOR(int, pthread_detach, void *th) {
   SCOPED_INTERCEPTOR_RAW(pthread_detach, th);

From dddef9d1c9b7b4e8a96bd68d44cd3ed6dc85f758 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@sifive.com>
Date: Tue, 25 Jun 2024 08:18:37 -0700
Subject: [PATCH 07/30] [RISCV] Add FPR16 regbank and start legalizing f16
 operations for Zfh. (#96582)

---
 .../Target/RISCV/GISel/RISCVLegalizerInfo.cpp |  18 +-
 .../RISCV/GISel/RISCVRegisterBankInfo.cpp     |  47 +++-
 .../instruction-select/fp-arith-f16.mir       | 266 ++++++++++++++++++
 .../legalizer/legalize-fp-arith-f16.mir       | 233 +++++++++++++++
 .../GlobalISel/regbankselect/fp-arith-f16.mir | 257 +++++++++++++++++
 5 files changed, 802 insertions(+), 19 deletions(-)
 create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/fp-arith-f16.mir
 create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-fp-arith-f16.mir
 create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/fp-arith-f16.mir

diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
index e003b6b8ee4e14..bec542f7781b1d 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp
@@ -371,17 +371,25 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST)
 
   // FP Operations
 
-  getActionDefinitionsBuilder({G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FNEG,
-                               G_FABS, G_FSQRT, G_FMAXNUM, G_FMINNUM})
-      .legalIf(typeIsScalarFPArith(0, ST));
+  auto &FPArithActions = getActionDefinitionsBuilder(
+                             {G_FADD, G_FSUB, G_FMUL, G_FDIV, G_FMA, G_FNEG,
+                              G_FABS, G_FSQRT, G_FMAXNUM, G_FMINNUM})
+                             .legalIf(typeIsScalarFPArith(0, ST));
+  // TODO: Fold this into typeIsScalarFPArith.
+  if (ST.hasStdExtZfh())
+    FPArithActions.legalFor({s16});
 
   getActionDefinitionsBuilder(G_FREM)
       .libcallFor({s32, s64})
       .minScalar(0, s32)
       .scalarize(0);
 
-  getActionDefinitionsBuilder(G_FCOPYSIGN)
-      .legalIf(all(typeIsScalarFPArith(0, ST), typeIsScalarFPArith(1, ST)));
+  auto &CopySignActions =
+      getActionDefinitionsBuilder(G_FCOPYSIGN)
+          .legalIf(all(typeIsScalarFPArith(0, ST), typeIsScalarFPArith(1, ST)));
+  // TODO: Fold this into typeIsScalarFPArith.
+  if (ST.hasStdExtZfh())
+    CopySignActions.legalFor({s16, s16});
 
   getActionDefinitionsBuilder(G_FPTRUNC).legalIf(
       [=, &ST](const LegalityQuery &Query) -> bool {
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
index 686c8d89a73212..41ca164b38f3d0 100644
--- a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
+++ b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp
@@ -29,6 +29,7 @@ const RegisterBankInfo::PartialMapping PartMappings[] = {
     // clang-format off
     {0, 32, GPRBRegBank},
     {0, 64, GPRBRegBank},
+    {0, 16, FPRBRegBank},
     {0, 32, FPRBRegBank},
     {0, 64, FPRBRegBank},
     {0, 64, VRBRegBank},
@@ -41,12 +42,13 @@ const RegisterBankInfo::PartialMapping PartMappings[] = {
 enum PartialMappingIdx {
   PMI_GPRB32 = 0,
   PMI_GPRB64 = 1,
-  PMI_FPRB32 = 2,
-  PMI_FPRB64 = 3,
-  PMI_VRB64 = 4,
-  PMI_VRB128 = 5,
-  PMI_VRB256 = 6,
-  PMI_VRB512 = 7,
+  PMI_FPRB16 = 2,
+  PMI_FPRB32 = 3,
+  PMI_FPRB64 = 4,
+  PMI_VRB64 = 5,
+  PMI_VRB128 = 6,
+  PMI_VRB256 = 7,
+  PMI_VRB512 = 8,
 };
 
 const RegisterBankInfo::ValueMapping ValueMappings[] = {
@@ -60,6 +62,10 @@ const RegisterBankInfo::ValueMapping ValueMappings[] = {
     {&PartMappings[PMI_GPRB64], 1},
     {&PartMappings[PMI_GPRB64], 1},
     {&PartMappings[PMI_GPRB64], 1},
+    // Maximum 3 FPR operands; 16 bit.
+    {&PartMappings[PMI_FPRB16], 1},
+    {&PartMappings[PMI_FPRB16], 1},
+    {&PartMappings[PMI_FPRB16], 1},
     // Maximum 3 FPR operands; 32 bit.
     {&PartMappings[PMI_FPRB32], 1},
     {&PartMappings[PMI_FPRB32], 1},
@@ -90,12 +96,13 @@ enum ValueMappingIdx {
   InvalidIdx = 0,
   GPRB32Idx = 1,
   GPRB64Idx = 4,
-  FPRB32Idx = 7,
-  FPRB64Idx = 10,
-  VRB64Idx = 13,
-  VRB128Idx = 16,
-  VRB256Idx = 19,
-  VRB512Idx = 22,
+  FPRB16Idx = 7,
+  FPRB32Idx = 10,
+  FPRB64Idx = 13,
+  VRB64Idx = 16,
+  VRB128Idx = 19,
+  VRB256Idx = 22,
+  VRB512Idx = 25,
 };
 } // namespace RISCV
 } // namespace llvm
@@ -151,8 +158,20 @@ RISCVRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
 }
 
 static const RegisterBankInfo::ValueMapping *getFPValueMapping(unsigned Size) {
-  assert(Size == 32 || Size == 64);
-  unsigned Idx = Size == 64 ? RISCV::FPRB64Idx : RISCV::FPRB32Idx;
+  unsigned Idx;
+  switch (Size) {
+  default:
+    llvm_unreachable("Unexpected size");
+  case 16:
+    Idx = RISCV::FPRB16Idx;
+    break;
+  case 32:
+    Idx = RISCV::FPRB32Idx;
+    break;
+  case 64:
+    Idx = RISCV::FPRB64Idx;
+    break;
+  }
   return &RISCV::ValueMappings[Idx];
 }
 
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/fp-arith-f16.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/fp-arith-f16.mir
new file mode 100644
index 00000000000000..96f019948537fd
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/fp-arith-f16.mir
@@ -0,0 +1,266 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=riscv32 -mattr=+zfh -run-pass=instruction-select \
+# RUN:   -simplify-mir -verify-machineinstrs %s -o - | FileCheck %s
+# RUN: llc -mtriple=riscv64 -mattr=+zfh -run-pass=instruction-select \
+# RUN:   -simplify-mir -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name:            fadd_f16
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $f10_h, $f11_h
+
+    ; CHECK-LABEL: name: fadd_f16
+    ; CHECK: liveins: $f10_h, $f11_h
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $f10_h
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $f11_h
+    ; CHECK-NEXT: [[FADD_H:%[0-9]+]]:fpr16 = nofpexcept FADD_H [[COPY]], [[COPY1]], 7
+    ; CHECK-NEXT: $f10_h = COPY [[FADD_H]]
+    ; CHECK-NEXT: PseudoRET implicit $f10_h
+    %0:fprb(s16) = COPY $f10_h
+    %1:fprb(s16) = COPY $f11_h
+    %2:fprb(s16) = G_FADD %0, %1
+    $f10_h = COPY %2(s16)
+    PseudoRET implicit $f10_h
+
+...
+---
+name:            fsub_f16
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $f10_h, $f11_h
+
+    ; CHECK-LABEL: name: fsub_f16
+    ; CHECK: liveins: $f10_h, $f11_h
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $f10_h
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $f11_h
+    ; CHECK-NEXT: [[FSUB_H:%[0-9]+]]:fpr16 = nofpexcept FSUB_H [[COPY]], [[COPY1]], 7
+    ; CHECK-NEXT: $f10_h = COPY [[FSUB_H]]
+    ; CHECK-NEXT: PseudoRET implicit $f10_h
+    %0:fprb(s16) = COPY $f10_h
+    %1:fprb(s16) = COPY $f11_h
+    %2:fprb(s16) = G_FSUB %0, %1
+    $f10_h = COPY %2(s16)
+    PseudoRET implicit $f10_h
+
+...
+---
+name:            fmul_f16
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $f10_h, $f11_h
+
+    ; CHECK-LABEL: name: fmul_f16
+    ; CHECK: liveins: $f10_h, $f11_h
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $f10_h
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $f11_h
+    ; CHECK-NEXT: [[FMUL_H:%[0-9]+]]:fpr16 = nofpexcept FMUL_H [[COPY]], [[COPY1]], 7
+    ; CHECK-NEXT: $f10_h = COPY [[FMUL_H]]
+    ; CHECK-NEXT: PseudoRET implicit $f10_h
+    %0:fprb(s16) = COPY $f10_h
+    %1:fprb(s16) = COPY $f11_h
+    %2:fprb(s16) = G_FMUL %0, %1
+    $f10_h = COPY %2(s16)
+    PseudoRET implicit $f10_h
+
+...
+---
+name:            fdiv_f16
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $f10_h, $f11_h
+
+    ; CHECK-LABEL: name: fdiv_f16
+    ; CHECK: liveins: $f10_h, $f11_h
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $f10_h
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $f11_h
+    ; CHECK-NEXT: [[FDIV_H:%[0-9]+]]:fpr16 = nofpexcept FDIV_H [[COPY]], [[COPY1]], 7
+    ; CHECK-NEXT: $f10_h = COPY [[FDIV_H]]
+    ; CHECK-NEXT: PseudoRET implicit $f10_h
+    %0:fprb(s16) = COPY $f10_h
+    %1:fprb(s16) = COPY $f11_h
+    %2:fprb(s16) = G_FDIV %0, %1
+    $f10_h = COPY %2(s16)
+    PseudoRET implicit $f10_h
+
+...
+---
+name:            fma_f16
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $f10_h, $f11_h, $f12_h
+
+    ; CHECK-LABEL: name: fma_f16
+    ; CHECK: liveins: $f10_h, $f11_h, $f12_h
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $f10_h
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $f11_h
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fpr16 = COPY $f12_h
+    ; CHECK-NEXT: [[FMADD_H:%[0-9]+]]:fpr16 = nofpexcept FMADD_H [[COPY]], [[COPY1]], [[COPY2]], 7
+    ; CHECK-NEXT: $f10_h = COPY [[FMADD_H]]
+    ; CHECK-NEXT: PseudoRET implicit $f10_h
+    %0:fprb(s16) = COPY $f10_h
+    %1:fprb(s16) = COPY $f11_h
+    %2:fprb(s16) = COPY $f12_h
+    %3:fprb(s16) = G_FMA %0, %1, %2
+    $f10_h = COPY %3(s16)
+    PseudoRET implicit $f10_h
+
+...
+---
+name:            fneg_f16
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $f10_h, $f11_h, $f12_h
+
+    ; CHECK-LABEL: name: fneg_f16
+    ; CHECK: liveins: $f10_h, $f11_h, $f12_h
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $f10_h
+    ; CHECK-NEXT: [[FSGNJN_H:%[0-9]+]]:fpr16 = FSGNJN_H [[COPY]], [[COPY]]
+    ; CHECK-NEXT: $f10_h = COPY [[FSGNJN_H]]
+    ; CHECK-NEXT: PseudoRET implicit $f10_h
+    %0:fprb(s16) = COPY $f10_h
+    %1:fprb(s16) = G_FNEG %0
+    $f10_h = COPY %1(s16)
+    PseudoRET implicit $f10_h
+
+...
+---
+name:            fabs_f16
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $f10_h, $f11_h, $f12_h
+
+    ; CHECK-LABEL: name: fabs_f16
+    ; CHECK: liveins: $f10_h, $f11_h, $f12_h
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $f10_h
+    ; CHECK-NEXT: [[FSGNJX_H:%[0-9]+]]:fpr16 = FSGNJX_H [[COPY]], [[COPY]]
+    ; CHECK-NEXT: $f10_h = COPY [[FSGNJX_H]]
+    ; CHECK-NEXT: PseudoRET implicit $f10_h
+    %0:fprb(s16) = COPY $f10_h
+    %1:fprb(s16) = G_FABS %0
+    $f10_h = COPY %1(s16)
+    PseudoRET implicit $f10_h
+
+...
+---
+name:            fsqrt_f16
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $f10_h, $f11_h, $f12_h
+
+    ; CHECK-LABEL: name: fsqrt_f16
+    ; CHECK: liveins: $f10_h, $f11_h, $f12_h
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $f10_h
+    ; CHECK-NEXT: [[FSQRT_H:%[0-9]+]]:fpr16 = nofpexcept FSQRT_H [[COPY]], 7
+    ; CHECK-NEXT: $f10_h = COPY [[FSQRT_H]]
+    ; CHECK-NEXT: PseudoRET implicit $f10_h
+    %0:fprb(s16) = COPY $f10_h
+    %1:fprb(s16) = G_FSQRT %0
+    $f10_h = COPY %1(s16)
+    PseudoRET implicit $f10_h
+
+...
+---
+name:            fmaxnum_f16
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $f10_h, $f11_h
+
+    ; CHECK-LABEL: name: fmaxnum_f16
+    ; CHECK: liveins: $f10_h, $f11_h
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $f10_h
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $f11_h
+    ; CHECK-NEXT: [[FMAX_H:%[0-9]+]]:fpr16 = nofpexcept FMAX_H [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $f10_h = COPY [[FMAX_H]]
+    ; CHECK-NEXT: PseudoRET implicit $f10_h
+    %0:fprb(s16) = COPY $f10_h
+    %1:fprb(s16) = COPY $f11_h
+    %2:fprb(s16) = G_FMAXNUM %0, %1
+    $f10_h = COPY %2(s16)
+    PseudoRET implicit $f10_h
+
+...
+---
+name:            fminnum_f16
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $f10_h, $f11_h
+
+    ; CHECK-LABEL: name: fminnum_f16
+    ; CHECK: liveins: $f10_h, $f11_h
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $f10_h
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $f11_h
+    ; CHECK-NEXT: [[FMIN_H:%[0-9]+]]:fpr16 = nofpexcept FMIN_H [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $f10_h = COPY [[FMIN_H]]
+    ; CHECK-NEXT: PseudoRET implicit $f10_h
+    %0:fprb(s16) = COPY $f10_h
+    %1:fprb(s16) = COPY $f11_h
+    %2:fprb(s16) = G_FMINNUM %0, %1
+    $f10_h = COPY %2(s16)
+    PseudoRET implicit $f10_h
+
+...
+---
+name:            fcopysign_f16
+legalized:       true
+regBankSelected: true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $f10_h, $f11_h
+
+    ; CHECK-LABEL: name: fcopysign_f16
+    ; CHECK: liveins: $f10_h, $f11_h
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fpr16 = COPY $f10_h
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fpr16 = COPY $f11_h
+    ; CHECK-NEXT: [[FSGNJ_H:%[0-9]+]]:fpr16 = FSGNJ_H [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $f10_h = COPY [[FSGNJ_H]]
+    ; CHECK-NEXT: PseudoRET implicit $f10_h
+    %0:fprb(s16) = COPY $f10_h
+    %1:fprb(s16) = COPY $f11_h
+    %2:fprb(s16) = G_FCOPYSIGN %0, %1
+    $f10_h = COPY %2(s16)
+    PseudoRET implicit $f10_h
+
+...
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-fp-arith-f16.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-fp-arith-f16.mir
new file mode 100644
index 00000000000000..f1777e945b12db
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-fp-arith-f16.mir
@@ -0,0 +1,233 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=riscv32 -mattr=+zfh -run-pass=legalizer %s -o - \
+# RUN: | FileCheck %s
+# RUN: llc -mtriple=riscv64 -mattr=+zfh -run-pass=legalizer %s -o - \
+# RUN: | FileCheck %s
+
+---
+name:            fadd_f16
+body:             |
+  bb.0:
+    liveins: $f10_h, $f11_h
+
+    ; CHECK-LABEL: name: fadd_f16
+    ; CHECK: liveins: $f10_h, $f11_h
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $f10_h
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY $f11_h
+    ; CHECK-NEXT: [[FADD:%[0-9]+]]:_(s16) = G_FADD [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $f10_h = COPY [[FADD]](s16)
+    ; CHECK-NEXT: PseudoRET implicit $f10_h
+    %0:_(s16) = COPY $f10_h
+    %1:_(s16) = COPY $f11_h
+    %2:_(s16) = G_FADD %0, %1
+    $f10_h = COPY %2(s16)
+    PseudoRET implicit $f10_h
+
+...
+---
+name:            fsub_f16
+body:             |
+  bb.0:
+    liveins: $f10_h, $f11_h
+
+    ; CHECK-LABEL: name: fsub_f16
+    ; CHECK: liveins: $f10_h, $f11_h
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $f10_h
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY $f11_h
+    ; CHECK-NEXT: [[FSUB:%[0-9]+]]:_(s16) = G_FSUB [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $f10_h = COPY [[FSUB]](s16)
+    ; CHECK-NEXT: PseudoRET implicit $f10_h
+    %0:_(s16) = COPY $f10_h
+    %1:_(s16) = COPY $f11_h
+    %2:_(s16) = G_FSUB %0, %1
+    $f10_h = COPY %2(s16)
+    PseudoRET implicit $f10_h
+
+...
+---
+name:            fmul_f16
+body:             |
+  bb.0:
+    liveins: $f10_h, $f11_h
+
+    ; CHECK-LABEL: name: fmul_f16
+    ; CHECK: liveins: $f10_h, $f11_h
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $f10_h
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY $f11_h
+    ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s16) = G_FMUL [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $f10_h = COPY [[FMUL]](s16)
+    ; CHECK-NEXT: PseudoRET implicit $f10_h
+    %0:_(s16) = COPY $f10_h
+    %1:_(s16) = COPY $f11_h
+    %2:_(s16) = G_FMUL %0, %1
+    $f10_h = COPY %2(s16)
+    PseudoRET implicit $f10_h
+
+...
+---
+name:            fdiv_f16
+body:             |
+  bb.0:
+    liveins: $f10_h, $f11_h
+
+    ; CHECK-LABEL: name: fdiv_f16
+    ; CHECK: liveins: $f10_h, $f11_h
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $f10_h
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY $f11_h
+    ; CHECK-NEXT: [[FDIV:%[0-9]+]]:_(s16) = G_FDIV [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $f10_h = COPY [[FDIV]](s16)
+    ; CHECK-NEXT: PseudoRET implicit $f10_h
+    %0:_(s16) = COPY $f10_h
+    %1:_(s16) = COPY $f11_h
+    %2:_(s16) = G_FDIV %0, %1
+    $f10_h = COPY %2(s16)
+    PseudoRET implicit $f10_h
+
+...
+---
+name:            fma_f16
+body:             |
+  bb.0:
+    liveins: $f10_h, $f11_h, $f12_h
+
+    ; CHECK-LABEL: name: fma_f16
+    ; CHECK: liveins: $f10_h, $f11_h, $f12_h
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $f10_h
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY $f11_h
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s16) = COPY $f12_h
+    ; CHECK-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
+    ; CHECK-NEXT: $f10_h = COPY [[FMA]](s16)
+    ; CHECK-NEXT: PseudoRET implicit $f10_h
+    %0:_(s16) = COPY $f10_h
+    %1:_(s16) = COPY $f11_h
+    %2:_(s16) = COPY $f12_h
+    %3:_(s16) = G_FMA %0, %1, %2
+    $f10_h = COPY %3(s16)
+    PseudoRET implicit $f10_h
+
+...
+---
+name:            fneg_f16
+body:             |
+  bb.0:
+    liveins: $f10_h
+
+    ; CHECK-LABEL: name: fneg_f16
+    ; CHECK: liveins: $f10_h
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $f10_h
+    ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s16) = G_FNEG [[COPY]]
+    ; CHECK-NEXT: $f10_h = COPY [[FNEG]](s16)
+    ; CHECK-NEXT: PseudoRET implicit $f10_h
+    %0:_(s16) = COPY $f10_h
+    %1:_(s16) = G_FNEG %0
+    $f10_h = COPY %1(s16)
+    PseudoRET implicit $f10_h
+
+...
+---
+name:            fabs_f16
+body:             |
+  bb.0:
+    liveins: $f10_h
+
+    ; CHECK-LABEL: name: fabs_f16
+    ; CHECK: liveins: $f10_h
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $f10_h
+    ; CHECK-NEXT: [[FABS:%[0-9]+]]:_(s16) = G_FABS [[COPY]]
+    ; CHECK-NEXT: $f10_h = COPY [[FABS]](s16)
+    ; CHECK-NEXT: PseudoRET implicit $f10_h
+    %0:_(s16) = COPY $f10_h
+    %1:_(s16) = G_FABS %0
+    $f10_h = COPY %1(s16)
+    PseudoRET implicit $f10_h
+
+...
+---
+name:            fsqrt_f16
+body:             |
+  bb.0:
+    liveins: $f10_h
+
+    ; CHECK-LABEL: name: fsqrt_f16
+    ; CHECK: liveins: $f10_h
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $f10_h
+    ; CHECK-NEXT: [[FSQRT:%[0-9]+]]:_(s16) = G_FSQRT [[COPY]]
+    ; CHECK-NEXT: $f10_h = COPY [[FSQRT]](s16)
+    ; CHECK-NEXT: PseudoRET implicit $f10_h
+    %0:_(s16) = COPY $f10_h
+    %1:_(s16) = G_FSQRT %0
+    $f10_h = COPY %1(s16)
+    PseudoRET implicit $f10_h
+
+...
+---
+name:            fmaxnum_f16
+body:             |
+  bb.0:
+    liveins: $f10_h, $f11_h
+
+    ; CHECK-LABEL: name: fmaxnum_f16
+    ; CHECK: liveins: $f10_h, $f11_h
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $f10_h
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY $f11_h
+    ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:_(s16) = G_FMAXNUM [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $f10_h = COPY [[FMAXNUM]](s16)
+    ; CHECK-NEXT: PseudoRET implicit $f10_h
+    %0:_(s16) = COPY $f10_h
+    %1:_(s16) = COPY $f11_h
+    %2:_(s16) = G_FMAXNUM %0, %1
+    $f10_h = COPY %2(s16)
+    PseudoRET implicit $f10_h
+
+...
+---
+name:            fminnum_f16
+body:             |
+  bb.0:
+    liveins: $f10_h, $f11_h
+
+    ; CHECK-LABEL: name: fminnum_f16
+    ; CHECK: liveins: $f10_h, $f11_h
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $f10_h
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY $f11_h
+    ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:_(s16) = G_FMINNUM [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $f10_h = COPY [[FMINNUM]](s16)
+    ; CHECK-NEXT: PseudoRET implicit $f10_h
+    %0:_(s16) = COPY $f10_h
+    %1:_(s16) = COPY $f11_h
+    %2:_(s16) = G_FMINNUM %0, %1
+    $f10_h = COPY %2(s16)
+    PseudoRET implicit $f10_h
+
+...
+---
+name:            fcopysign_f16
+body:             |
+  bb.0:
+    liveins: $f10_h, $f11_h
+
+    ; CHECK-LABEL: name: fcopysign_f16
+    ; CHECK: liveins: $f10_h, $f11_h
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s16) = COPY $f10_h
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s16) = COPY $f11_h
+    ; CHECK-NEXT: [[FCOPYSIGN:%[0-9]+]]:_(s16) = G_FCOPYSIGN [[COPY]], [[COPY1]](s16)
+    ; CHECK-NEXT: $f10_h = COPY [[FCOPYSIGN]](s16)
+    ; CHECK-NEXT: PseudoRET implicit $f10_h
+    %0:_(s16) = COPY $f10_h
+    %1:_(s16) = COPY $f11_h
+    %2:_(s16) = G_FCOPYSIGN %0, %1
+    $f10_h = COPY %2(s16)
+    PseudoRET implicit $f10_h
+
+...
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/fp-arith-f16.mir b/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/fp-arith-f16.mir
new file mode 100644
index 00000000000000..ace4aa542377b3
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/fp-arith-f16.mir
@@ -0,0 +1,257 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=riscv32 -mattr=+zfh -run-pass=regbankselect \
+# RUN:   -simplify-mir -verify-machineinstrs %s \
+# RUN:   -o - | FileCheck %s
+# RUN: llc -mtriple=riscv64 -mattr=+zfh -run-pass=regbankselect \
+# RUN:   -simplify-mir -verify-machineinstrs %s \
+# RUN:   -o - | FileCheck %s
+
+---
+name:            fadd_f16
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $f10_h, $f11_h
+
+    ; CHECK-LABEL: name: fadd_f16
+    ; CHECK: liveins: $f10_h, $f11_h
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fprb(s16) = COPY $f10_h
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fprb(s16) = COPY $f11_h
+    ; CHECK-NEXT: [[FADD:%[0-9]+]]:fprb(s16) = G_FADD [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $f10_h = COPY [[FADD]](s16)
+    ; CHECK-NEXT: PseudoRET implicit $f10_h
+    %0:_(s16) = COPY $f10_h
+    %1:_(s16) = COPY $f11_h
+    %2:_(s16) = G_FADD %0, %1
+    $f10_h = COPY %2(s16)
+    PseudoRET implicit $f10_h
+
+...
+---
+name:            fsub_f16
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $f10_h, $f11_h
+
+    ; CHECK-LABEL: name: fsub_f16
+    ; CHECK: liveins: $f10_h, $f11_h
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fprb(s16) = COPY $f10_h
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fprb(s16) = COPY $f11_h
+    ; CHECK-NEXT: [[FSUB:%[0-9]+]]:fprb(s16) = G_FSUB [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $f10_h = COPY [[FSUB]](s16)
+    ; CHECK-NEXT: PseudoRET implicit $f10_h
+    %0:_(s16) = COPY $f10_h
+    %1:_(s16) = COPY $f11_h
+    %2:_(s16) = G_FSUB %0, %1
+    $f10_h = COPY %2(s16)
+    PseudoRET implicit $f10_h
+
+...
+---
+name:            fmul_f16
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $f10_h, $f11_h
+
+    ; CHECK-LABEL: name: fmul_f16
+    ; CHECK: liveins: $f10_h, $f11_h
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fprb(s16) = COPY $f10_h
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fprb(s16) = COPY $f11_h
+    ; CHECK-NEXT: [[FMUL:%[0-9]+]]:fprb(s16) = G_FMUL [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $f10_h = COPY [[FMUL]](s16)
+    ; CHECK-NEXT: PseudoRET implicit $f10_h
+    %0:_(s16) = COPY $f10_h
+    %1:_(s16) = COPY $f11_h
+    %2:_(s16) = G_FMUL %0, %1
+    $f10_h = COPY %2(s16)
+    PseudoRET implicit $f10_h
+
+...
+---
+name:            fdiv_f16
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $f10_h, $f11_h
+
+    ; CHECK-LABEL: name: fdiv_f16
+    ; CHECK: liveins: $f10_h, $f11_h
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fprb(s16) = COPY $f10_h
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fprb(s16) = COPY $f11_h
+    ; CHECK-NEXT: [[FDIV:%[0-9]+]]:fprb(s16) = G_FDIV [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $f10_h = COPY [[FDIV]](s16)
+    ; CHECK-NEXT: PseudoRET implicit $f10_h
+    %0:_(s16) = COPY $f10_h
+    %1:_(s16) = COPY $f11_h
+    %2:_(s16) = G_FDIV %0, %1
+    $f10_h = COPY %2(s16)
+    PseudoRET implicit $f10_h
+
+...
+---
+name:            fma_f16
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $f10_h, $f11_h, $f12_h
+
+    ; CHECK-LABEL: name: fma_f16
+    ; CHECK: liveins: $f10_h, $f11_h, $f12_h
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fprb(s16) = COPY $f10_h
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fprb(s16) = COPY $f11_h
+    ; CHECK-NEXT: [[COPY2:%[0-9]+]]:fprb(s16) = COPY $f12_h
+    ; CHECK-NEXT: [[FMA:%[0-9]+]]:fprb(s16) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
+    ; CHECK-NEXT: $f10_h = COPY [[FMA]](s16)
+    ; CHECK-NEXT: PseudoRET implicit $f10_h
+    %0:_(s16) = COPY $f10_h
+    %1:_(s16) = COPY $f11_h
+    %2:_(s16) = COPY $f12_h
+    %3:_(s16) = G_FMA %0, %1, %2
+    $f10_h = COPY %3(s16)
+    PseudoRET implicit $f10_h
+
+...
+---
+name:            fneg_f16
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $f10_h
+
+    ; CHECK-LABEL: name: fneg_f16
+    ; CHECK: liveins: $f10_h
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fprb(s16) = COPY $f10_h
+    ; CHECK-NEXT: [[FNEG:%[0-9]+]]:fprb(s16) = G_FNEG [[COPY]]
+    ; CHECK-NEXT: $f10_h = COPY [[FNEG]](s16)
+    ; CHECK-NEXT: PseudoRET implicit $f10_h
+    %0:_(s16) = COPY $f10_h
+    %1:_(s16) = G_FNEG %0
+    $f10_h = COPY %1(s16)
+    PseudoRET implicit $f10_h
+
+...
+---
+name:            fabs_f16
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $f10_h
+
+    ; CHECK-LABEL: name: fabs_f16
+    ; CHECK: liveins: $f10_h
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fprb(s16) = COPY $f10_h
+    ; CHECK-NEXT: [[FABS:%[0-9]+]]:fprb(s16) = G_FABS [[COPY]]
+    ; CHECK-NEXT: $f10_h = COPY [[FABS]](s16)
+    ; CHECK-NEXT: PseudoRET implicit $f10_h
+    %0:_(s16) = COPY $f10_h
+    %1:_(s16) = G_FABS %0
+    $f10_h = COPY %1(s16)
+    PseudoRET implicit $f10_h
+
+...
+---
+name:            fsqrt_f16
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $f10_h
+
+    ; CHECK-LABEL: name: fsqrt_f16
+    ; CHECK: liveins: $f10_h
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fprb(s16) = COPY $f10_h
+    ; CHECK-NEXT: [[FSQRT:%[0-9]+]]:fprb(s16) = G_FSQRT [[COPY]]
+    ; CHECK-NEXT: $f10_h = COPY [[FSQRT]](s16)
+    ; CHECK-NEXT: PseudoRET implicit $f10_h
+    %0:_(s16) = COPY $f10_h
+    %1:_(s16) = G_FSQRT %0
+    $f10_h = COPY %1(s16)
+    PseudoRET implicit $f10_h
+
+...
+---
+name:            fmaxnum_f16
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $f10_h, $f11_h
+
+    ; CHECK-LABEL: name: fmaxnum_f16
+    ; CHECK: liveins: $f10_h, $f11_h
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fprb(s16) = COPY $f10_h
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fprb(s16) = COPY $f11_h
+    ; CHECK-NEXT: [[FMAXNUM:%[0-9]+]]:fprb(s16) = G_FMAXNUM [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $f10_h = COPY [[FMAXNUM]](s16)
+    ; CHECK-NEXT: PseudoRET implicit $f10_h
+    %0:_(s16) = COPY $f10_h
+    %1:_(s16) = COPY $f11_h
+    %2:_(s16) = G_FMAXNUM %0, %1
+    $f10_h = COPY %2(s16)
+    PseudoRET implicit $f10_h
+
+...
+---
+name:            fminnum_f16
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $f10_h, $f11_h
+
+    ; CHECK-LABEL: name: fminnum_f16
+    ; CHECK: liveins: $f10_h, $f11_h
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fprb(s16) = COPY $f10_h
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fprb(s16) = COPY $f11_h
+    ; CHECK-NEXT: [[FMINNUM:%[0-9]+]]:fprb(s16) = G_FMINNUM [[COPY]], [[COPY1]]
+    ; CHECK-NEXT: $f10_h = COPY [[FMINNUM]](s16)
+    ; CHECK-NEXT: PseudoRET implicit $f10_h
+    %0:_(s16) = COPY $f10_h
+    %1:_(s16) = COPY $f11_h
+    %2:_(s16) = G_FMINNUM %0, %1
+    $f10_h = COPY %2(s16)
+    PseudoRET implicit $f10_h
+
+...
+---
+name:            fcopysign_f16
+legalized:       true
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $f10_h, $f11_h
+
+    ; CHECK-LABEL: name: fcopysign_f16
+    ; CHECK: liveins: $f10_h, $f11_h
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:fprb(s16) = COPY $f10_h
+    ; CHECK-NEXT: [[COPY1:%[0-9]+]]:fprb(s16) = COPY $f11_h
+    ; CHECK-NEXT: [[FCOPYSIGN:%[0-9]+]]:fprb(s16) = G_FCOPYSIGN [[COPY]], [[COPY1]](s16)
+    ; CHECK-NEXT: $f10_h = COPY [[FCOPYSIGN]](s16)
+    ; CHECK-NEXT: PseudoRET implicit $f10_h
+    %0:_(s16) = COPY $f10_h
+    %1:_(s16) = COPY $f11_h
+    %2:_(s16) = G_FCOPYSIGN %0, %1
+    $f10_h = COPY %2(s16)
+    PseudoRET implicit $f10_h
+
+...

From 8a46bbbc22a51db57f05beb0026772b899a785b9 Mon Sep 17 00:00:00 2001
From: Lukacma <Marian.Lukac@arm.com>
Date: Tue, 25 Jun 2024 17:19:42 +0200
Subject: [PATCH 08/30] [Clang] Remove preprocessor guards and global feature
 checks for NEON (#95224)

To enable function multi-versioning (FMV), current checks which rely on
cmd line options or global macros to see if target feature is present
need to be removed. This patch removes those for NEON and also
implements changes to NEON header file as proposed in
[ACLE](https://github.com/ARM-software/acle/pull/321).
---
 .../clang/Basic/DiagnosticSemaKinds.td        |  3 +++
 clang/lib/Sema/SemaType.cpp                   | 24 ++++++++-----------
 clang/test/Sema/arm-vector-types-support.c    | 11 +++++----
 clang/test/SemaCUDA/neon-attrs.cu             | 22 -----------------
 clang/utils/TableGen/NeonEmitter.cpp          |  5 ----
 5 files changed, 19 insertions(+), 46 deletions(-)
 delete mode 100644 clang/test/SemaCUDA/neon-attrs.cu

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 46ad359751d7dd..af8d75f76a7d70 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -3246,6 +3246,9 @@ def warn_unsupported_target_attribute
 def err_attribute_unsupported
     : Error<"%0 attribute is not supported on targets missing %1;"
             " specify an appropriate -march= or -mcpu=">;
+def err_attribute_unsupported_m_profile
+    : Error<"on M-profile architectures %0 attribute is not supported on targets missing %1;"
+            " specify an appropriate -march= or -mcpu=">;
 def err_duplicate_target_attribute
     : Error<"%select{unsupported|duplicate|unknown}0%select{| CPU|"
               " tune CPU}1 '%2' in the '%select{target|target_clones|target_version}3' "
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index 53b9083c95c1bd..308274720d58d6 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -8203,23 +8203,19 @@ static void HandleNeonVectorTypeAttr(QualType &CurType, const ParsedAttr &Attr,
 
   // Target must have NEON (or MVE, whose vectors are similar enough
   // not to need a separate attribute)
-  if (!(S.Context.getTargetInfo().hasFeature("neon") ||
-        S.Context.getTargetInfo().hasFeature("mve") ||
-        S.Context.getTargetInfo().hasFeature("sve") ||
-        S.Context.getTargetInfo().hasFeature("sme") ||
-        IsTargetCUDAAndHostARM) &&
-      VecKind == VectorKind::Neon) {
-    S.Diag(Attr.getLoc(), diag::err_attribute_unsupported)
-        << Attr << "'neon', 'mve', 'sve' or 'sme'";
+  if (!S.Context.getTargetInfo().hasFeature("mve") &&
+      VecKind == VectorKind::Neon &&
+      S.Context.getTargetInfo().getTriple().isArmMClass()) {
+    S.Diag(Attr.getLoc(), diag::err_attribute_unsupported_m_profile)
+        << Attr << "'mve'";
     Attr.setInvalid();
     return;
   }
-  if (!(S.Context.getTargetInfo().hasFeature("neon") ||
-        S.Context.getTargetInfo().hasFeature("mve") ||
-        IsTargetCUDAAndHostARM) &&
-      VecKind == VectorKind::NeonPoly) {
-    S.Diag(Attr.getLoc(), diag::err_attribute_unsupported)
-        << Attr << "'neon' or 'mve'";
+  if (!S.Context.getTargetInfo().hasFeature("mve") &&
+      VecKind == VectorKind::NeonPoly &&
+      S.Context.getTargetInfo().getTriple().isArmMClass()) {
+    S.Diag(Attr.getLoc(), diag::err_attribute_unsupported_m_profile)
+        << Attr << "'mve'";
     Attr.setInvalid();
     return;
   }
diff --git a/clang/test/Sema/arm-vector-types-support.c b/clang/test/Sema/arm-vector-types-support.c
index ed5f5ba175a94a..8b8c9634631d05 100644
--- a/clang/test/Sema/arm-vector-types-support.c
+++ b/clang/test/Sema/arm-vector-types-support.c
@@ -1,7 +1,8 @@
-// RUN: %clang_cc1 %s -triple armv7 -fsyntax-only -verify
-// RUN: %clang_cc1 %s -triple aarch64 -fsyntax-only -verify
-// RUN: %clang_cc1 %s -triple aarch64 -target-feature -fp-armv8 -target-abi aapcs-soft -fsyntax-only -verify
+// RUN: %clang_cc1 %s -triple armv8.1m.main -fsyntax-only -verify
+// RUN: %clang_cc1 %s -triple aarch64 -fsyntax-only -verify=sve-type
+// RUN: %clang_cc1 %s -triple aarch64 -target-feature -fp-armv8 -target-abi aapcs-soft -fsyntax-only -verify=sve-type
 
-typedef __attribute__((neon_vector_type(2))) int int32x2_t; // expected-error{{'neon_vector_type' attribute is not supported on targets missing 'neon', 'mve', 'sve' or 'sme'; specify an appropriate -march= or -mcpu=}}
-typedef __attribute__((neon_polyvector_type(16))) short poly8x16_t; // expected-error{{'neon_polyvector_type' attribute is not supported on targets missing 'neon' or 'mve'; specify an appropriate -march= or -mcpu=}}
+typedef __attribute__((neon_vector_type(2))) int int32x2_t; // expected-error{{on M-profile architectures 'neon_vector_type' attribute is not supported on targets missing 'mve'; specify an appropriate -march= or -mcpu=}}
+typedef __attribute__((neon_polyvector_type(16))) unsigned char poly8x16_t; // expected-error{{on M-profile architectures 'neon_polyvector_type' attribute is not supported on targets missing 'mve'; specify an appropriate -march= or -mcpu=}}
 typedef __attribute__((arm_sve_vector_bits(256))) void nosveflag; // expected-error{{'arm_sve_vector_bits' attribute is not supported on targets missing 'sve'; specify an appropriate -march= or -mcpu=}}
+                                                                  // sve-type-error@-1{{'arm_sve_vector_bits' attribute is not supported on targets missing 'sve'; specify an appropriate -march= or -mcpu=}}
diff --git a/clang/test/SemaCUDA/neon-attrs.cu b/clang/test/SemaCUDA/neon-attrs.cu
deleted file mode 100644
index 129056741ac9a4..00000000000000
--- a/clang/test/SemaCUDA/neon-attrs.cu
+++ /dev/null
@@ -1,22 +0,0 @@
-// CPU-side compilation on ARM with neon enabled (no errors expected).
-// RUN: %clang_cc1 -triple arm64-linux-gnu -target-feature +neon -aux-triple nvptx64 -x cuda -fsyntax-only -verify=quiet %s
-
-// CPU-side compilation on ARM with neon disabled.
-// RUN: %clang_cc1 -triple arm64-linux-gnu -target-feature -neon -aux-triple nvptx64 -x cuda -fsyntax-only -verify %s
-
-// GPU-side compilation on ARM (no errors expected).
-// RUN: %clang_cc1 -triple nvptx64 -aux-triple arm64-linux-gnu -fcuda-is-device -x cuda -fsyntax-only -verify=quiet %s
-
-// Regular C++ compilation on ARM with neon enabled (no errors expected).
-// RUN: %clang_cc1 -triple arm64-linux-gnu -target-feature +neon -x c++ -fsyntax-only -verify=quiet %s
-
-// Regular C++ compilation on ARM with neon disabled.
-// RUN: %clang_cc1 -triple arm64-linux-gnu -target-feature -neon -x c++ -fsyntax-only -verify %s
-
-// quiet-no-diagnostics
-typedef __attribute__((neon_vector_type(4))) float float32x4_t;
-// expected-error@-1 {{'neon_vector_type' attribute is not supported on targets missing 'neon', 'mve', 'sve' or 'sme'}}
-// expect
-typedef unsigned char poly8_t;
-typedef __attribute__((neon_polyvector_type(8))) poly8_t poly8x8_t;
-// expected-error@-1 {{'neon_polyvector_type' attribute is not supported on targets missing 'neon' or 'mve'}}
diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp
index 56f1fdf9ef574f..626031d38cf003 100644
--- a/clang/utils/TableGen/NeonEmitter.cpp
+++ b/clang/utils/TableGen/NeonEmitter.cpp
@@ -2370,10 +2370,6 @@ void NeonEmitter::run(raw_ostream &OS) {
         "Please use -mfloat-abi=softfp or -mfloat-abi=hard\"\n";
   OS << "#else\n\n";
 
-  OS << "#if !defined(__ARM_NEON)\n";
-  OS << "#error \"NEON support not enabled\"\n";
-  OS << "#else\n\n";
-
   OS << "#include <stdint.h>\n\n";
 
   OS << "#include <arm_bf16.h>\n";
@@ -2450,7 +2446,6 @@ void NeonEmitter::run(raw_ostream &OS) {
   OS << "#undef __ai\n\n";
   OS << "#endif /* if !defined(__ARM_NEON) */\n";
   OS << "#endif /* ifndef __ARM_FP */\n";
-  OS << "#endif /* __ARM_NEON_H */\n";
 }
 
 /// run - Read the records in arm_fp16.td and output arm_fp16.h.  arm_fp16.h

From 8c11d3788c26cd85f102959068109d3e498e8498 Mon Sep 17 00:00:00 2001
From: Xiaoyang Liu <siujoeng.lau@gmail.com>
Date: Tue, 25 Jun 2024 08:20:14 -0700
Subject: [PATCH 09/30] [libc++] P3029R1: Better `mdspan`'s CTAD -
 `std::extents` (#89015)

This patch implements an improvement introduced in P3029R1 that was
missed in #87873. It adds a deduction of static extents if
integral_constant-like  constants are passed to `std::extents`.
---
 libcxx/include/__mdspan/extents.h                   |  9 ++++++++-
 .../containers/views/mdspan/extents/ctad.pass.cpp   | 13 ++++++++++++-
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/libcxx/include/__mdspan/extents.h b/libcxx/include/__mdspan/extents.h
index ddf9fabd8cea79..fea0decd8c6af7 100644
--- a/libcxx/include/__mdspan/extents.h
+++ b/libcxx/include/__mdspan/extents.h
@@ -455,8 +455,15 @@ template <class _IndexType, size_t _Rank>
 using dextents = typename __mdspan_detail::__make_dextents<_IndexType, _Rank>::type;
 
 // Deduction guide for extents
+#  if _LIBCPP_STD_VER >= 26
 template <class... _IndexTypes>
-extents(_IndexTypes...) -> extents<size_t, size_t(((void)sizeof(_IndexTypes), dynamic_extent))...>;
+  requires(is_convertible_v<_IndexTypes, size_t> && ...)
+explicit extents(_IndexTypes...) -> extents<size_t, __maybe_static_ext<_IndexTypes>...>;
+#  else
+template <class... _IndexTypes>
+  requires(is_convertible_v<_IndexTypes, size_t> && ...)
+explicit extents(_IndexTypes...) -> extents<size_t, size_t(((void)sizeof(_IndexTypes), dynamic_extent))...>;
+#  endif
 
 namespace __mdspan_detail {
 
diff --git a/libcxx/test/std/containers/views/mdspan/extents/ctad.pass.cpp b/libcxx/test/std/containers/views/mdspan/extents/ctad.pass.cpp
index 3f99d8a3b47a2e..9144bb6812e3cc 100644
--- a/libcxx/test/std/containers/views/mdspan/extents/ctad.pass.cpp
+++ b/libcxx/test/std/containers/views/mdspan/extents/ctad.pass.cpp
@@ -13,10 +13,12 @@
 // explicit extents(Integrals...) -> see below;
 //   Constraints: (is_convertible_v<Integrals, size_t> && ...) is true.
 //
-// Remarks: The deduced type is dextents<size_t, sizeof...(Integrals)>.
+// Remarks: The deduced type is dextents<size_t, sizeof...(Integrals)>.           // until C++26
+// Remarks: The deduced type is extents<size_t, maybe-static-ext<Integrals>...>.  // since C++26
 
 #include <mdspan>
 #include <cassert>
+#include <type_traits>
 
 #include "../ConvertibleToIntegral.h"
 #include "test_macros.h"
@@ -43,6 +45,15 @@ constexpr bool test() {
   test(std::extents(1, 2u, 3, 4, 5, 6, 7, 8, 9),
        std::extents<std::size_t, D, D, D, D, D, D, D, D, D>(1, 2u, 3, 4, 5, 6, 7, 8, 9));
   test(std::extents(NoDefaultCtorIndex{1}, NoDefaultCtorIndex{2}), std::extents<std::size_t, D, D>(1, 2));
+
+#if _LIBCPP_STD_VER >= 26
+  // P3029R1: deduction from `integral_constant`
+  test(std::extents(std::integral_constant<size_t, 5>{}), std::extents<std::size_t, 5>());
+  test(std::extents(std::integral_constant<size_t, 5>{}, 6), std::extents<std::size_t, 5, std::dynamic_extent>(6));
+  test(std::extents(std::integral_constant<size_t, 5>{}, 6, std::integral_constant<size_t, 7>{}),
+       std::extents<std::size_t, 5, std::dynamic_extent, 7>(6));
+#endif
+
   return true;
 }
 

From 902952ae04afc2dfe28805b949a1e2218affe65e Mon Sep 17 00:00:00 2001
From: shawbyoung <shawbyoung@gmail.com>
Date: Tue, 25 Jun 2024 08:30:29 -0700
Subject: [PATCH 10/30] =?UTF-8?q?Revert=20"[=F0=9D=98=80=F0=9D=97=BD?=
 =?UTF-8?q?=F0=9D=97=BF]=20initial=20version"?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit bb5ab1ffe719f5e801ef08ac08be975546aa3266.
---
 bolt/docs/CommandLineArgumentReference.md     |  4 --
 bolt/lib/Profile/YAMLProfileReader.cpp        | 68 +++----------------
 bolt/lib/Rewrite/RewriteInstance.cpp          |  7 +-
 bolt/lib/Utils/CommandLineOpts.cpp            |  8 ---
 .../X86/hashing-based-function-matching.test  | 64 -----------------
 5 files changed, 13 insertions(+), 138 deletions(-)
 delete mode 100644 bolt/test/X86/hashing-based-function-matching.test

diff --git a/bolt/docs/CommandLineArgumentReference.md b/bolt/docs/CommandLineArgumentReference.md
index 00d472c5789168..d95f30a299a285 100644
--- a/bolt/docs/CommandLineArgumentReference.md
+++ b/bolt/docs/CommandLineArgumentReference.md
@@ -259,10 +259,6 @@
 
   Always use long jumps/nops for Linux kernel static keys
 
-- `--match-profile-with-function-hash`
-
-  Match profile with function hash
-
 - `--max-data-relocations=<uint>`
 
   Maximum number of data relocations to process
diff --git a/bolt/lib/Profile/YAMLProfileReader.cpp b/bolt/lib/Profile/YAMLProfileReader.cpp
index 6c4eece4ddb1bd..f25f59201f1cd9 100644
--- a/bolt/lib/Profile/YAMLProfileReader.cpp
+++ b/bolt/lib/Profile/YAMLProfileReader.cpp
@@ -22,8 +22,6 @@ namespace opts {
 extern cl::opt<unsigned> Verbosity;
 extern cl::OptionCategory BoltOptCategory;
 extern cl::opt<bool> InferStaleProfile;
-extern cl::opt<bool> MatchProfileWithFunctionHash;
-extern cl::opt<bool> Lite;
 
 static llvm::cl::opt<bool>
     IgnoreHash("profile-ignore-hash",
@@ -365,19 +363,9 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
     return Profile.Hash == static_cast<uint64_t>(BF.getHash());
   };
 
-  uint64_t MatchedWithExactName = 0;
-  uint64_t MatchedWithHash = 0;
-  uint64_t MatchedWithLTOCommonName = 0;
-
-  // Computes hash for binary functions.
-  if (opts::MatchProfileWithFunctionHash)
-    for (auto &[_, BF] : BC.getBinaryFunctions())
-      BF.computeHash(YamlBP.Header.IsDFSOrder, YamlBP.Header.HashFunction);
-  else if (!opts::IgnoreHash)
-    for (BinaryFunction *BF : ProfileBFs)
-      BF->computeHash(YamlBP.Header.IsDFSOrder, YamlBP.Header.HashFunction);
-
-  // This first pass assigns profiles that match 100% by name and by hash.
+  // We have to do 2 passes since LTO introduces an ambiguity in function
+  // names. The first pass assigns profiles that match 100% by name and
+  // by hash. The second pass allows name ambiguity for LTO private functions.
   for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs)) {
     if (!BF)
       continue;
@@ -386,34 +374,15 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
     // the profile.
     Function.setExecutionCount(BinaryFunction::COUNT_NO_PROFILE);
 
-    if (profileMatches(YamlBF, Function)) {
-      matchProfileToFunction(YamlBF, Function);
-      ++MatchedWithExactName;
-    }
-  }
-
-  // Uses the strict hash of profiled and binary functions to match functions
-  // that are not matched by name or common name.
-  if (opts::MatchProfileWithFunctionHash) {
-    std::unordered_map<size_t, BinaryFunction *> StrictHashToBF;
-    StrictHashToBF.reserve(BC.getBinaryFunctions().size());
+    // Recompute hash once per function.
+    if (!opts::IgnoreHash)
+      Function.computeHash(YamlBP.Header.IsDFSOrder,
+                           YamlBP.Header.HashFunction);
 
-    for (auto &[_, BF] : BC.getBinaryFunctions())
-      StrictHashToBF[BF.getHash()] = &BF;
-
-    for (yaml::bolt::BinaryFunctionProfile &YamlBF : YamlBP.Functions) {
-      if (YamlBF.Used)
-        continue;
-      auto It = StrictHashToBF.find(YamlBF.Hash);
-      if (It != StrictHashToBF.end() && !ProfiledFunctions.count(It->second)) {
-        BinaryFunction *BF = It->second;
-        matchProfileToFunction(YamlBF, *BF);
-        ++MatchedWithHash;
-      }
-    }
+    if (profileMatches(YamlBF, Function))
+      matchProfileToFunction(YamlBF, Function);
   }
 
-  // This second pass allows name ambiguity for LTO private functions.
   for (const auto &[CommonName, LTOProfiles] : LTOCommonNameMap) {
     if (!LTOCommonNameFunctionMap.contains(CommonName))
       continue;
@@ -427,7 +396,6 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
       for (BinaryFunction *BF : Functions) {
         if (!ProfiledFunctions.count(BF) && profileMatches(*YamlBF, *BF)) {
           matchProfileToFunction(*YamlBF, *BF);
-          ++MatchedWithLTOCommonName;
           return true;
         }
       }
@@ -439,10 +407,8 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
     // partially.
     if (!ProfileMatched && LTOProfiles.size() == 1 && Functions.size() == 1 &&
         !LTOProfiles.front()->Used &&
-        !ProfiledFunctions.count(*Functions.begin())) {
+        !ProfiledFunctions.count(*Functions.begin()))
       matchProfileToFunction(*LTOProfiles.front(), **Functions.begin());
-      ++MatchedWithLTOCommonName;
-    }
   }
 
   for (auto [YamlBF, BF] : llvm::zip_equal(YamlBP.Functions, ProfileBFs))
@@ -454,15 +420,6 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
       errs() << "BOLT-WARNING: profile ignored for function " << YamlBF.Name
              << '\n';
 
-  if (opts::Verbosity >= 2) {
-    outs() << "BOLT-INFO: matched " << MatchedWithExactName
-           << " functions with identical names\n";
-    outs() << "BOLT-INFO: matched " << MatchedWithHash
-           << " functions with hash\n";
-    outs() << "BOLT-INFO: matched " << MatchedWithLTOCommonName
-           << " functions with matching LTO common names\n";
-  }
-
   // Set for parseFunctionProfile().
   NormalizeByInsnCount = usesEvent("cycles") || usesEvent("instructions");
   NormalizeByCalls = usesEvent("branches");
@@ -482,11 +439,6 @@ Error YAMLProfileReader::readProfile(BinaryContext &BC) {
 
   BC.setNumUnusedProfiledObjects(NumUnused);
 
-  if (opts::Lite)
-    for (BinaryFunction *BF : BC.getAllBinaryFunctions())
-      if (!BF->hasProfile())
-        BF->setIgnored();
-
   return Error::success();
 }
 
diff --git a/bolt/lib/Rewrite/RewriteInstance.cpp b/bolt/lib/Rewrite/RewriteInstance.cpp
index c0873a58a8a6e6..1a3a8af21d81b6 100644
--- a/bolt/lib/Rewrite/RewriteInstance.cpp
+++ b/bolt/lib/Rewrite/RewriteInstance.cpp
@@ -82,7 +82,6 @@ extern cl::opt<bool> Hugify;
 extern cl::opt<bool> Instrument;
 extern cl::opt<JumpTableSupportLevel> JumpTables;
 extern cl::opt<bool> KeepNops;
-extern cl::opt<bool> MatchProfileWithFunctionHash;
 extern cl::list<std::string> ReorderData;
 extern cl::opt<bolt::ReorderFunctions::ReorderType> ReorderFunctions;
 extern cl::opt<bool> TerminalTrap;
@@ -141,6 +140,9 @@ KeepTmp("keep-tmp",
   cl::Hidden,
   cl::cat(BoltCategory));
 
+cl::opt<bool> Lite("lite", cl::desc("skip processing of cold functions"),
+                   cl::cat(BoltCategory));
+
 static cl::opt<unsigned>
 LiteThresholdPct("lite-threshold-pct",
   cl::desc("threshold (in percent) for selecting functions to process in lite "
@@ -2980,9 +2982,6 @@ void RewriteInstance::selectFunctionsToProcess() {
     if (mustSkip(Function))
       return false;
 
-    if (opts::MatchProfileWithFunctionHash)
-      return true;
-
     // If the list is not empty, only process functions from the list.
     if (!opts::ForceFunctionNames.empty() || !ForceFunctionsNR.empty()) {
       // Regex check (-funcs and -funcs-file options).
diff --git a/bolt/lib/Utils/CommandLineOpts.cpp b/bolt/lib/Utils/CommandLineOpts.cpp
index 41de30f3f566b1..41c89bc8aeba4e 100644
--- a/bolt/lib/Utils/CommandLineOpts.cpp
+++ b/bolt/lib/Utils/CommandLineOpts.cpp
@@ -128,14 +128,6 @@ cl::opt<bool>
                cl::desc("instrument code to generate accurate profile data"),
                cl::cat(BoltOptCategory));
 
-cl::opt<bool> Lite("lite", cl::desc("skip processing of cold functions"),
-                   cl::cat(BoltCategory));
-
-cl::opt<bool>
-    MatchProfileWithFunctionHash("match-profile-with-function-hash",
-                                 cl::desc("Match profile with function hash"),
-                                 cl::Hidden, cl::cat(BoltCategory));
-
 cl::opt<std::string>
 OutputFilename("o",
   cl::desc("<output file>"),
diff --git a/bolt/test/X86/hashing-based-function-matching.test b/bolt/test/X86/hashing-based-function-matching.test
deleted file mode 100644
index 4426da085bbd9c..00000000000000
--- a/bolt/test/X86/hashing-based-function-matching.test
+++ /dev/null
@@ -1,64 +0,0 @@
-## Tests function matching in YAMLProfileReader by function hash.
-
-# REQUIRES: system-linux
-# RUN: split-file %s %t
-# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-unknown %t/main.s -o %t.o
-# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q -nostdlib
-# RUN: llvm-bolt %t.exe -o %t.out --data %t/yaml -v=2 \
-# RUN:   --print-cfg --match-profile-with-function-hash 2>&1 --profile-ignore-hash=0 | FileCheck %s
-
-# CHECK: BOLT-INFO: matched 1 functions with hash
-
-#--- main.s
-.globl main
-.type	main, @function
-main:
-  .cfi_startproc
-.LBB00:
-  pushq   %rbp
-  movq    %rsp, %rbp
-  subq    $16, %rsp
-  testq   %rax, %rax
-  js      .LBB03
-.LBB01:
-  jne     .LBB04
-.LBB02:
-  nop
-.LBB03:
-  xorl    %eax, %eax
-  addq    $16, %rsp
-  popq    %rbp
-  retq
-.LBB04:
-  xorl    %eax, %eax
-  addq    $16, %rsp
-  popq    %rbp
-  retq
-## For relocations against .text
-.LBB05:
-  call exit
-  .cfi_endproc
-  .size	main, .-main
-
-#--- yaml
----
-header:
-  profile-version: 1
-  binary-name:     'hashing-based-function-matching.s.tmp.exe'
-  binary-build-id: '<unknown>'
-  profile-flags:   [ lbr ]
-  profile-origin:  branch profile reader
-  profile-events:  ''
-  dfs-order:       false
-  hash-func:       xxh3
-functions:
-  - name:            main2
-    fid:             0
-    hash:            0x72F82DEAA6FE65FB
-    exec:            1
-    nblocks:         6
-    blocks:
-      - bid:             1
-        insns:           1
-        succ:            [ { bid: 3, cnt: 1} ]
-...

From 731db06a878f5c8cb29b36d526a54493677ea89f Mon Sep 17 00:00:00 2001
From: Nikolas Klauser <nikolasklauser@berlin.de>
Date: Tue, 25 Jun 2024 17:31:41 +0200
Subject: [PATCH 11/30] [libc++] Get the GCC build mostly clean of warnings
 (#96604)

The GCC build has gotten to the point where it's often hard to find the
actual error in the build log. We should look into enabling these
warnings again in the future, but it looks like a lot of them are
bogous.
---
 libcxx/include/__atomic/atomic_init.h     |  2 +-
 libcxx/src/barrier.cpp                    | 18 +++++++--------
 libcxx/src/filesystem/operations.cpp      |  4 ++--
 libcxx/src/filesystem/path.cpp            | 28 +++++++++++------------
 libcxx/src/filesystem/path_parser.h       | 28 +++++++++++------------
 libcxx/src/locale.cpp                     |  4 ++--
 runtimes/cmake/Modules/WarningFlags.cmake |  6 +++++
 7 files changed, 48 insertions(+), 42 deletions(-)

diff --git a/libcxx/include/__atomic/atomic_init.h b/libcxx/include/__atomic/atomic_init.h
index 2ed37a9a77c86d..8e86ba31b4ac3b 100644
--- a/libcxx/include/__atomic/atomic_init.h
+++ b/libcxx/include/__atomic/atomic_init.h
@@ -18,7 +18,7 @@
 #define ATOMIC_FLAG_INIT {false}
 #define ATOMIC_VAR_INIT(__v) {__v}
 
-#if _LIBCPP_STD_VER >= 20 && !defined(_LIBCPP_DISABLE_DEPRECATION_WARNINGS)
+#if _LIBCPP_STD_VER >= 20 && defined(_LIBCPP_COMPILER_CLANG_BASED) && !defined(_LIBCPP_DISABLE_DEPRECATION_WARNINGS)
 #  pragma clang deprecated(ATOMIC_VAR_INIT)
 #endif
 
diff --git a/libcxx/src/barrier.cpp b/libcxx/src/barrier.cpp
index bbcfb6001bba18..69601bfeec0546 100644
--- a/libcxx/src/barrier.cpp
+++ b/libcxx/src/barrier.cpp
@@ -21,17 +21,17 @@ class __barrier_algorithm_base {
     } __tickets[64];
   };
 
-  ptrdiff_t& __expected;
-  unique_ptr<__state_t[]> __state;
+  ptrdiff_t& __expected_;
+  unique_ptr<__state_t[]> __state_;
 
-  _LIBCPP_HIDDEN __barrier_algorithm_base(ptrdiff_t& __expected) : __expected(__expected) {
+  _LIBCPP_HIDDEN __barrier_algorithm_base(ptrdiff_t& __expected) : __expected_(__expected) {
     size_t const __count = (__expected + 1) >> 1;
-    __state              = unique_ptr<__state_t[]>(new __state_t[__count]);
+    __state_             = unique_ptr<__state_t[]>(new __state_t[__count]);
   }
   _LIBCPP_HIDDEN bool __arrive(__barrier_phase_t __old_phase) {
     __barrier_phase_t const __half_step = __old_phase + 1, __full_step = __old_phase + 2;
-    size_t __current_expected = __expected,
-           __current          = hash<thread::id>()(this_thread::get_id()) % ((__expected + 1) >> 1);
+    size_t __current_expected = __expected_,
+           __current          = hash<thread::id>()(this_thread::get_id()) % ((__expected_ + 1) >> 1);
     for (int __round = 0;; ++__round) {
       if (__current_expected <= 1)
         return true;
@@ -41,14 +41,14 @@ class __barrier_algorithm_base {
           __current = 0;
         __barrier_phase_t expect = __old_phase;
         if (__current == __last_node && (__current_expected & 1)) {
-          if (__state[__current].__tickets[__round].__phase.compare_exchange_strong(
+          if (__state_[__current].__tickets[__round].__phase.compare_exchange_strong(
                   expect, __full_step, memory_order_acq_rel))
             break; // I'm 1 in 1, go to next __round
-        } else if (__state[__current].__tickets[__round].__phase.compare_exchange_strong(
+        } else if (__state_[__current].__tickets[__round].__phase.compare_exchange_strong(
                        expect, __half_step, memory_order_acq_rel)) {
           return false; // I'm 1 in 2, done with arrival
         } else if (expect == __half_step) {
-          if (__state[__current].__tickets[__round].__phase.compare_exchange_strong(
+          if (__state_[__current].__tickets[__round].__phase.compare_exchange_strong(
                   expect, __full_step, memory_order_acq_rel))
             break; // I'm 2 in 2, go to next __round
         }
diff --git a/libcxx/src/filesystem/operations.cpp b/libcxx/src/filesystem/operations.cpp
index abd8695978ea7c..a83c1ae15a4a40 100644
--- a/libcxx/src/filesystem/operations.cpp
+++ b/libcxx/src/filesystem/operations.cpp
@@ -934,7 +934,7 @@ path __weakly_canonical(const path& p, error_code* ec) {
   vector<string_view_t> DNEParts;
 
   error_code m_ec;
-  while (PP.State != PathParser::PS_BeforeBegin) {
+  while (PP.State_ != PathParser::PS_BeforeBegin) {
     tmp.assign(createView(p.native().data(), &PP.RawEntry.back()));
     file_status st = __status(tmp, &m_ec);
     if (!status_known(st)) {
@@ -949,7 +949,7 @@ path __weakly_canonical(const path& p, error_code* ec) {
     DNEParts.push_back(*PP);
     --PP;
   }
-  if (PP.State == PathParser::PS_BeforeBegin) {
+  if (PP.State_ == PathParser::PS_BeforeBegin) {
     result = __canonical("", &m_ec);
     if (m_ec) {
       return err.report(m_ec);
diff --git a/libcxx/src/filesystem/path.cpp b/libcxx/src/filesystem/path.cpp
index c89346aa510c78..b2019521377ede 100644
--- a/libcxx/src/filesystem/path.cpp
+++ b/libcxx/src/filesystem/path.cpp
@@ -45,23 +45,23 @@ path& path::replace_extension(path const& replacement) {
 
 string_view_t path::__root_name() const {
   auto PP = PathParser::CreateBegin(__pn_);
-  if (PP.State == PathParser::PS_InRootName)
+  if (PP.State_ == PathParser::PS_InRootName)
     return *PP;
   return {};
 }
 
 string_view_t path::__root_directory() const {
   auto PP = PathParser::CreateBegin(__pn_);
-  if (PP.State == PathParser::PS_InRootName)
+  if (PP.State_ == PathParser::PS_InRootName)
     ++PP;
-  if (PP.State == PathParser::PS_InRootDir)
+  if (PP.State_ == PathParser::PS_InRootDir)
     return *PP;
   return {};
 }
 
 string_view_t path::__root_path_raw() const {
   auto PP = PathParser::CreateBegin(__pn_);
-  if (PP.State == PathParser::PS_InRootName) {
+  if (PP.State_ == PathParser::PS_InRootName) {
     auto NextCh = PP.peek();
     if (NextCh && isSeparator(*NextCh)) {
       ++PP;
@@ -69,24 +69,24 @@ string_view_t path::__root_path_raw() const {
     }
     return PP.RawEntry;
   }
-  if (PP.State == PathParser::PS_InRootDir)
+  if (PP.State_ == PathParser::PS_InRootDir)
     return *PP;
   return {};
 }
 
 static bool ConsumeRootName(PathParser* PP) {
   static_assert(PathParser::PS_BeforeBegin == 1 && PathParser::PS_InRootName == 2, "Values for enums are incorrect");
-  while (PP->State <= PathParser::PS_InRootName)
+  while (PP->State_ <= PathParser::PS_InRootName)
     ++(*PP);
-  return PP->State == PathParser::PS_AtEnd;
+  return PP->State_ == PathParser::PS_AtEnd;
 }
 
 static bool ConsumeRootDir(PathParser* PP) {
   static_assert(PathParser::PS_BeforeBegin == 1 && PathParser::PS_InRootName == 2 && PathParser::PS_InRootDir == 3,
                 "Values for enums are incorrect");
-  while (PP->State <= PathParser::PS_InRootDir)
+  while (PP->State_ <= PathParser::PS_InRootDir)
     ++(*PP);
-  return PP->State == PathParser::PS_AtEnd;
+  return PP->State_ == PathParser::PS_AtEnd;
 }
 
 string_view_t path::__relative_path() const {
@@ -248,7 +248,7 @@ path path::lexically_relative(const path& base) const {
     auto PP                      = PathParser::CreateBegin(__pn_);
     auto PPBase                  = PathParser::CreateBegin(base.__pn_);
     auto CheckIterMismatchAtBase = [&]() {
-      return PP.State != PPBase.State && (PP.inRootPath() || PPBase.inRootPath());
+      return PP.State_ != PPBase.State_ && (PP.inRootPath() || PPBase.inRootPath());
     };
     if (PP.inRootName() && PPBase.inRootName()) {
       if (*PP != *PPBase)
@@ -267,7 +267,7 @@ path path::lexically_relative(const path& base) const {
   // Find the first mismatching element
   auto PP     = PathParser::CreateBegin(__pn_);
   auto PPBase = PathParser::CreateBegin(base.__pn_);
-  while (PP && PPBase && PP.State == PPBase.State && *PP == *PPBase) {
+  while (PP && PPBase && PP.State_ == PPBase.State_ && *PP == *PPBase) {
     ++PP;
     ++PPBase;
   }
@@ -380,7 +380,7 @@ path::iterator path::begin() const {
   auto PP = PathParser::CreateBegin(__pn_);
   iterator it;
   it.__path_ptr_ = this;
-  it.__state_    = static_cast<path::iterator::_ParserState>(PP.State);
+  it.__state_    = static_cast<path::iterator::_ParserState>(PP.State_);
   it.__entry_    = PP.RawEntry;
   it.__stashed_elem_.__assign_view(*PP);
   return it;
@@ -396,7 +396,7 @@ path::iterator path::end() const {
 path::iterator& path::iterator::__increment() {
   PathParser PP(__path_ptr_->native(), __entry_, __state_);
   ++PP;
-  __state_ = static_cast<_ParserState>(PP.State);
+  __state_ = static_cast<_ParserState>(PP.State_);
   __entry_ = PP.RawEntry;
   __stashed_elem_.__assign_view(*PP);
   return *this;
@@ -405,7 +405,7 @@ path::iterator& path::iterator::__increment() {
 path::iterator& path::iterator::__decrement() {
   PathParser PP(__path_ptr_->native(), __entry_, __state_);
   --PP;
-  __state_ = static_cast<_ParserState>(PP.State);
+  __state_ = static_cast<_ParserState>(PP.State_);
   __entry_ = PP.RawEntry;
   __stashed_elem_.__assign_view(*PP);
   return *this;
diff --git a/libcxx/src/filesystem/path_parser.h b/libcxx/src/filesystem/path_parser.h
index 28a8f240e3bd22..06623696452da6 100644
--- a/libcxx/src/filesystem/path_parser.h
+++ b/libcxx/src/filesystem/path_parser.h
@@ -50,14 +50,14 @@ struct PathParser {
 
   const string_view_t Path;
   string_view_t RawEntry;
-  ParserState State;
+  ParserState State_;
 
 private:
-  PathParser(string_view_t P, ParserState State) noexcept : Path(P), State(State) {}
+  PathParser(string_view_t P, ParserState State) noexcept : Path(P), State_(State) {}
 
 public:
   PathParser(string_view_t P, string_view_t E, unsigned char S)
-      : Path(P), RawEntry(E), State(static_cast<ParserState>(S)) {
+      : Path(P), RawEntry(E), State_(static_cast<ParserState>(S)) {
     // S cannot be '0' or PS_BeforeBegin.
   }
 
@@ -84,7 +84,7 @@ struct PathParser {
     if (Start == End)
       return makeState(PS_AtEnd);
 
-    switch (State) {
+    switch (State_) {
     case PS_BeforeBegin: {
       PosPtr TkEnd = consumeRootName(Start, End);
       if (TkEnd)
@@ -125,7 +125,7 @@ struct PathParser {
     if (RStart == REnd) // we're decrementing the begin
       return makeState(PS_BeforeBegin);
 
-    switch (State) {
+    switch (State_) {
     case PS_AtEnd: {
       // Try to consume a trailing separator or root directory first.
       if (PosPtr SepEnd = consumeAllSeparators(RStart, REnd)) {
@@ -169,7 +169,7 @@ struct PathParser {
   /// \brief Return a view with the "preferred representation" of the current
   ///   element. For example trailing separators are represented as a '.'
   string_view_t operator*() const noexcept {
-    switch (State) {
+    switch (State_) {
     case PS_BeforeBegin:
     case PS_AtEnd:
       return PATHSTR("");
@@ -187,7 +187,7 @@ struct PathParser {
     __libcpp_unreachable();
   }
 
-  explicit operator bool() const noexcept { return State != PS_BeforeBegin && State != PS_AtEnd; }
+  explicit operator bool() const noexcept { return State_ != PS_BeforeBegin && State_ != PS_AtEnd; }
 
   PathParser& operator++() noexcept {
     increment();
@@ -199,21 +199,21 @@ struct PathParser {
     return *this;
   }
 
-  bool atEnd() const noexcept { return State == PS_AtEnd; }
+  bool atEnd() const noexcept { return State_ == PS_AtEnd; }
 
-  bool inRootDir() const noexcept { return State == PS_InRootDir; }
+  bool inRootDir() const noexcept { return State_ == PS_InRootDir; }
 
-  bool inRootName() const noexcept { return State == PS_InRootName; }
+  bool inRootName() const noexcept { return State_ == PS_InRootName; }
 
   bool inRootPath() const noexcept { return inRootName() || inRootDir(); }
 
 private:
   void makeState(ParserState NewState, PosPtr Start, PosPtr End) noexcept {
-    State    = NewState;
+    State_    = NewState;
     RawEntry = string_view_t(Start, End - Start);
   }
   void makeState(ParserState NewState) noexcept {
-    State    = NewState;
+    State_    = NewState;
     RawEntry = {};
   }
 
@@ -224,7 +224,7 @@ struct PathParser {
   /// \brief Return a pointer to the first character after the currently
   ///   lexed element.
   PosPtr getNextTokenStartPos() const noexcept {
-    switch (State) {
+    switch (State_) {
     case PS_BeforeBegin:
       return Path.data();
     case PS_InRootName:
@@ -241,7 +241,7 @@ struct PathParser {
   /// \brief Return a pointer to the first character in the currently lexed
   ///   element.
   PosPtr getCurrentTokenStartPos() const noexcept {
-    switch (State) {
+    switch (State_) {
     case PS_BeforeBegin:
     case PS_InRootName:
       return &Path.front();
diff --git a/libcxx/src/locale.cpp b/libcxx/src/locale.cpp
index c5ab6de5d657a2..4efdc63c096611 100644
--- a/libcxx/src/locale.cpp
+++ b/libcxx/src/locale.cpp
@@ -557,9 +557,9 @@ locale::locale(const locale& other, const locale& one, category c)
 
 string locale::name() const { return __locale_->name(); }
 
-void locale::__install_ctor(const locale& other, facet* f, long id) {
+void locale::__install_ctor(const locale& other, facet* f, long facet_id) {
   if (f)
-    __locale_ = new __imp(*other.__locale_, f, id);
+    __locale_ = new __imp(*other.__locale_, f, facet_id);
   else
     __locale_ = other.__locale_;
   __locale_->acquire();
diff --git a/runtimes/cmake/Modules/WarningFlags.cmake b/runtimes/cmake/Modules/WarningFlags.cmake
index d06409841dc9df..068d22150d3875 100644
--- a/runtimes/cmake/Modules/WarningFlags.cmake
+++ b/runtimes/cmake/Modules/WarningFlags.cmake
@@ -60,6 +60,12 @@ function(cxx_add_warning_flags target enable_werror enable_pedantic)
       -Wno-c++14-compat
       -Wno-noexcept-type
       -Wno-suggest-override
+      -Wno-alloc-size-larger-than
+      -Wno-deprecated-declarations
+      -Wno-dangling-reference
+      -Wno-strict-overflow
+      -Wno-maybe-uninitialized
+      -Wno-strict-aliasing
       )
 
   endif()

From 2604830aacdd563715da030d0396b565e912436f Mon Sep 17 00:00:00 2001
From: Akira Hatanaka <ahatanak@gmail.com>
Date: Tue, 25 Jun 2024 08:33:05 -0700
Subject: [PATCH 12/30] Add support for __builtin_verbose_trap (#79230)

The builtin causes the program to stop its execution abnormally and
shows a human-readable description of the reason for the termination
when a debugger is attached or in a symbolicated crash log.

The motivation for the builtin is explained in the following RFC:

https://discourse.llvm.org/t/rfc-adding-builtin-verbose-trap-string-literal/75845

clang's CodeGen lowers the builtin to `llvm.trap` and emits debugging
information that represents an artificial inline frame whose name
encodes the category and reason strings passed to the builtin.
---
 clang/docs/LanguageExtensions.rst             | 54 +++++++++++++++++++
 clang/include/clang/AST/Expr.h                |  5 ++
 clang/include/clang/Basic/Builtins.td         |  6 +++
 .../clang/Basic/DiagnosticSemaKinds.td        |  2 +
 clang/include/clang/CodeGen/ModuleBuilder.h   |  4 ++
 clang/lib/AST/ExprConstant.cpp                | 21 ++++++--
 clang/lib/CodeGen/CGBuiltin.cpp               | 12 +++++
 clang/lib/CodeGen/CGDebugInfo.cpp             | 40 ++++++++++++++
 clang/lib/CodeGen/CGDebugInfo.h               | 22 ++++++++
 clang/lib/Sema/SemaChecking.cpp               | 32 +++++++++++
 .../CodeGenCXX/debug-info-verbose-trap.cpp    | 54 +++++++++++++++++++
 clang/test/SemaCXX/verbose-trap.cpp           | 47 ++++++++++++++++
 12 files changed, 296 insertions(+), 3 deletions(-)
 create mode 100644 clang/test/CodeGenCXX/debug-info-verbose-trap.cpp
 create mode 100644 clang/test/SemaCXX/verbose-trap.cpp

diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 216ee83d48c145..9457e533041264 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -3483,6 +3483,60 @@ Query for this feature with ``__has_builtin(__builtin_trap)``.
 
 ``__builtin_arm_trap`` is lowered to the ``llvm.aarch64.break`` builtin, and then to ``brk #payload``.
 
+``__builtin_verbose_trap``
+--------------------------
+
+``__builtin_verbose_trap`` causes the program to stop its execution abnormally
+and shows a human-readable description of the reason for the termination when a
+debugger is attached or in a symbolicated crash log.
+
+**Syntax**:
+
+.. code-block:: c++
+
+    __builtin_verbose_trap(const char *category, const char *reason)
+
+**Description**
+
+``__builtin_verbose_trap`` is lowered to the ` ``llvm.trap`` <https://llvm.org/docs/LangRef.html#llvm-trap-intrinsic>`_ builtin.
+Additionally, clang emits debugging information that represents an artificial
+inline frame whose name encodes the category and reason strings passed to the builtin,
+prefixed by a "magic" prefix.
+
+For example, consider the following code:
+
+.. code-block:: c++
+
+    void foo(int* p) {
+      if (p == nullptr)
+        __builtin_verbose_trap("check null", "Argument must not be null!");
+    }
+
+The debugging information would look as if it were produced for the following code:
+
+.. code-block:: c++
+
+    __attribute__((always_inline))
+    inline void "__clang_trap_msg$check null$Argument must not be null!"() {
+      __builtin_trap();
+    }
+
+    void foo(int* p) {
+      if (p == nullptr)
+        "__clang_trap_msg$check null$Argument must not be null!"();
+    }
+
+However, the generated code would not actually contain a call to the artificial
+function — it only exists in the debugging information.
+
+Query for this feature with ``__has_builtin(__builtin_verbose_trap)``. Note that
+users need to enable debug information to enable this feature. A call to this
+builtin is equivalent to a call to ``__builtin_trap`` if debug information isn't
+enabled.
+
+The optimizer can merge calls to trap with different messages, which degrades
+the debugging experience.
+
 ``__builtin_allow_runtime_check``
 ---------------------------------
 
diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index 3bc8cae4d8c86c..59e174954fdbf4 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -787,6 +787,11 @@ class Expr : public ValueStmt {
                                  const Expr *PtrExpression, ASTContext &Ctx,
                                  EvalResult &Status) const;
 
+  /// If the current Expr can be evaluated to a pointer to a null-terminated
+  /// constant string, return the constant string (without the terminating
+  /// null).
+  std::optional<std::string> tryEvaluateString(ASTContext &Ctx) const;
+
   /// Enumeration used to describe the kind of Null pointer constant
   /// returned from \c isNullPointerConstant().
   enum NullPointerConstantKind {
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index c8f6104a7f1a7a..f5b15cf90d1f83 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -1152,6 +1152,12 @@ def Trap : Builtin {
   let Prototype = "void()";
 }
 
+def VerboseTrap : Builtin {
+  let Spellings = ["__builtin_verbose_trap"];
+  let Attributes = [NoThrow, NoReturn];
+  let Prototype = "void(char const*, char const*)";
+}
+
 def Debugtrap : Builtin {
   let Spellings = ["__builtin_debugtrap"];
   let Attributes = [NoThrow];
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index af8d75f76a7d70..79cc9c61f7fd37 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -8956,6 +8956,8 @@ def err_expected_callable_argument : Error<
   "expected a callable expression as %ordinal0 argument to %1, found %2">;
 def note_building_builtin_dump_struct_call : Note<
   "in call to printing function with arguments '(%0)' while dumping struct">;
+def err_builtin_verbose_trap_arg : Error<
+  "argument to __builtin_verbose_trap must %select{be a pointer to a constant string|not contain $}0">;
 
 def err_atomic_load_store_uses_lib : Error<
   "atomic %select{load|store}0 requires runtime support that is not "
diff --git a/clang/include/clang/CodeGen/ModuleBuilder.h b/clang/include/clang/CodeGen/ModuleBuilder.h
index edacd82bf899db..59b9840d02e086 100644
--- a/clang/include/clang/CodeGen/ModuleBuilder.h
+++ b/clang/include/clang/CodeGen/ModuleBuilder.h
@@ -15,6 +15,7 @@
 
 #include "clang/AST/ASTConsumer.h"
 #include "clang/Basic/LLVM.h"
+#include "llvm/ADT/StringRef.h"
 
 namespace llvm {
   class Constant;
@@ -27,6 +28,9 @@ namespace llvm {
   }
 }
 
+// Prefix of the name of the artificial inline frame.
+inline constexpr llvm::StringRef ClangTrapPrefix = "__clang_trap_msg";
+
 namespace clang {
   class CodeGenOptions;
   class CoverageSourceInfo;
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index fe4b9a569ab874..374a3acf7aa26f 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -1885,7 +1885,8 @@ static bool EvaluateAtomic(const Expr *E, const LValue *This, APValue &Result,
                            EvalInfo &Info);
 static bool EvaluateAsRValue(EvalInfo &Info, const Expr *E, APValue &Result);
 static bool EvaluateBuiltinStrLen(const Expr *E, uint64_t &Result,
-                                  EvalInfo &Info);
+                                  EvalInfo &Info,
+                                  std::string *StringResult = nullptr);
 
 /// Evaluate an integer or fixed point expression into an APResult.
 static bool EvaluateFixedPointOrInteger(const Expr *E, APFixedPoint &Result,
@@ -17009,7 +17010,7 @@ bool Expr::tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx,
 }
 
 static bool EvaluateBuiltinStrLen(const Expr *E, uint64_t &Result,
-                                  EvalInfo &Info) {
+                                  EvalInfo &Info, std::string *StringResult) {
   if (!E->getType()->hasPointerRepresentation() || !E->isPRValue())
     return false;
 
@@ -17036,6 +17037,8 @@ static bool EvaluateBuiltinStrLen(const Expr *E, uint64_t &Result,
         Str = Str.substr(0, Pos);
 
       Result = Str.size();
+      if (StringResult)
+        *StringResult = Str;
       return true;
     }
 
@@ -17051,12 +17054,24 @@ static bool EvaluateBuiltinStrLen(const Expr *E, uint64_t &Result,
     if (!Char.getInt()) {
       Result = Strlen;
       return true;
-    }
+    } else if (StringResult)
+      StringResult->push_back(Char.getInt().getExtValue());
     if (!HandleLValueArrayAdjustment(Info, E, String, CharTy, 1))
       return false;
   }
 }
 
+std::optional<std::string> Expr::tryEvaluateString(ASTContext &Ctx) const {
+  Expr::EvalStatus Status;
+  EvalInfo Info(Ctx, Status, EvalInfo::EM_ConstantFold);
+  uint64_t Result;
+  std::string StringResult;
+
+  if (EvaluateBuiltinStrLen(this, Result, Info, &StringResult))
+    return StringResult;
+  return {};
+}
+
 bool Expr::EvaluateCharRangeAsString(std::string &Result,
                                      const Expr *SizeExpression,
                                      const Expr *PtrExpression, ASTContext &Ctx,
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 2434dcc1f26d64..4edd8283aa03c0 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -3505,6 +3505,18 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
   case Builtin::BI__builtin_trap:
     EmitTrapCall(Intrinsic::trap);
     return RValue::get(nullptr);
+  case Builtin::BI__builtin_verbose_trap: {
+    llvm::DILocation *TrapLocation = Builder.getCurrentDebugLocation();
+    if (getDebugInfo()) {
+      TrapLocation = getDebugInfo()->CreateTrapFailureMessageFor(
+          TrapLocation, *E->getArg(0)->tryEvaluateString(getContext()),
+          *E->getArg(1)->tryEvaluateString(getContext()));
+    }
+    ApplyDebugLocation ApplyTrapDI(*this, TrapLocation);
+    // Currently no attempt is made to prevent traps from being merged.
+    EmitTrapCall(Intrinsic::trap);
+    return RValue::get(nullptr);
+  }
   case Builtin::BI__debugbreak:
     EmitTrapCall(Intrinsic::debugtrap);
     return RValue::get(nullptr);
diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index a072475ba77057..3d8a715b692de8 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -32,6 +32,7 @@
 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/Version.h"
+#include "clang/CodeGen/ModuleBuilder.h"
 #include "clang/Frontend/FrontendOptions.h"
 #include "clang/Lex/HeaderSearchOptions.h"
 #include "clang/Lex/ModuleMap.h"
@@ -1731,6 +1732,28 @@ llvm::DIType *CGDebugInfo::createFieldType(
                                    offsetInBits, flags, debugType, Annotations);
 }
 
+llvm::DISubprogram *
+CGDebugInfo::createInlinedTrapSubprogram(StringRef FuncName,
+                                         llvm::DIFile *FileScope) {
+  // We are caching the subprogram because we don't want to duplicate
+  // subprograms with the same message. Note that `SPFlagDefinition` prevents
+  // subprograms from being uniqued.
+  llvm::DISubprogram *&SP = InlinedTrapFuncMap[FuncName];
+
+  if (!SP) {
+    llvm::DISubroutineType *DIFnTy = DBuilder.createSubroutineType(nullptr);
+    SP = DBuilder.createFunction(
+        /*Scope=*/FileScope, /*Name=*/FuncName, /*LinkageName=*/StringRef(),
+        /*File=*/FileScope, /*LineNo=*/0, /*Ty=*/DIFnTy,
+        /*ScopeLine=*/0,
+        /*Flags=*/llvm::DINode::FlagArtificial,
+        /*SPFlags=*/llvm::DISubprogram::SPFlagDefinition,
+        /*TParams=*/nullptr, /*ThrownTypes=*/nullptr, /*Annotations=*/nullptr);
+  }
+
+  return SP;
+}
+
 void CGDebugInfo::CollectRecordLambdaFields(
     const CXXRecordDecl *CXXDecl, SmallVectorImpl<llvm::Metadata *> &elements,
     llvm::DIType *RecordTy) {
@@ -3527,6 +3550,23 @@ llvm::DIMacroFile *CGDebugInfo::CreateTempMacroFile(llvm::DIMacroFile *Parent,
   return DBuilder.createTempMacroFile(Parent, Line, FName);
 }
 
+llvm::DILocation *CGDebugInfo::CreateTrapFailureMessageFor(
+    llvm::DebugLoc TrapLocation, StringRef Category, StringRef FailureMsg) {
+  // Create a debug location from `TrapLocation` that adds an artificial inline
+  // frame.
+  SmallString<64> FuncName(ClangTrapPrefix);
+
+  FuncName += "$";
+  FuncName += Category;
+  FuncName += "$";
+  FuncName += FailureMsg;
+
+  llvm::DISubprogram *TrapSP =
+      createInlinedTrapSubprogram(FuncName, TrapLocation->getFile());
+  return llvm::DILocation::get(CGM.getLLVMContext(), /*Line=*/0, /*Column=*/0,
+                               /*Scope=*/TrapSP, /*InlinedAt=*/TrapLocation);
+}
+
 static QualType UnwrapTypeForDebugInfo(QualType T, const ASTContext &C) {
   Qualifiers Quals;
   do {
diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h
index 2731c627d9dc37..a0c419cf1e208a 100644
--- a/clang/lib/CodeGen/CGDebugInfo.h
+++ b/clang/lib/CodeGen/CGDebugInfo.h
@@ -29,7 +29,9 @@
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/Support/Allocator.h"
+#include <map>
 #include <optional>
+#include <string>
 
 namespace llvm {
 class MDNode;
@@ -346,6 +348,14 @@ class CGDebugInfo {
       const FieldDecl *BitFieldDecl, const llvm::DIDerivedType *BitFieldDI,
       llvm::ArrayRef<llvm::Metadata *> PreviousFieldsDI, const RecordDecl *RD);
 
+  /// A cache that maps names of artificial inlined functions to subprograms.
+  llvm::StringMap<llvm::DISubprogram *> InlinedTrapFuncMap;
+
+  /// A function that returns the subprogram corresponding to the artificial
+  /// inlined function for traps.
+  llvm::DISubprogram *createInlinedTrapSubprogram(StringRef FuncName,
+                                                  llvm::DIFile *FileScope);
+
   /// Helpers for collecting fields of a record.
   /// @{
   void CollectRecordLambdaFields(const CXXRecordDecl *CXXDecl,
@@ -608,6 +618,18 @@ class CGDebugInfo {
     return CoroutineParameterMappings;
   }
 
+  /// Create a debug location from `TrapLocation` that adds an artificial inline
+  /// frame where the frame name is
+  ///
+  /// * `<Prefix>:<Category>:<FailureMsg>`
+  ///
+  /// `<Prefix>` is "__clang_trap_msg".
+  ///
+  /// This is used to store failure reasons for traps.
+  llvm::DILocation *CreateTrapFailureMessageFor(llvm::DebugLoc TrapLocation,
+                                                StringRef Category,
+                                                StringRef FailureMsg);
+
 private:
   /// Emit call to llvm.dbg.declare for a variable declaration.
   /// Returns a pointer to the DILocalVariable associated with the
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 87988519e76916..bce941cc00e0ed 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -183,6 +183,33 @@ bool Sema::checkArgCount(CallExpr *Call, unsigned DesiredArgCount) {
          << /*is non object*/ 0 << Call->getArg(1)->getSourceRange();
 }
 
+static bool checkBuiltinVerboseTrap(CallExpr *Call, Sema &S) {
+  bool HasError = false;
+
+  for (unsigned I = 0; I < Call->getNumArgs(); ++I) {
+    Expr *Arg = Call->getArg(I);
+
+    if (Arg->isValueDependent())
+      continue;
+
+    std::optional<std::string> ArgString = Arg->tryEvaluateString(S.Context);
+    int DiagMsgKind = -1;
+    // Arguments must be pointers to constant strings and cannot use '$'.
+    if (!ArgString.has_value())
+      DiagMsgKind = 0;
+    else if (ArgString->find('$') != std::string::npos)
+      DiagMsgKind = 1;
+
+    if (DiagMsgKind >= 0) {
+      S.Diag(Arg->getBeginLoc(), diag::err_builtin_verbose_trap_arg)
+          << DiagMsgKind << Arg->getSourceRange();
+      HasError = true;
+    }
+  }
+
+  return !HasError;
+}
+
 static bool convertArgumentToType(Sema &S, Expr *&Value, QualType Ty) {
   if (Value->isTypeDependent())
     return false;
@@ -3351,6 +3378,11 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
   case Builtin::BI__builtin_matrix_column_major_store:
     return BuiltinMatrixColumnMajorStore(TheCall, TheCallResult);
 
+  case Builtin::BI__builtin_verbose_trap:
+    if (!checkBuiltinVerboseTrap(TheCall, *this))
+      return ExprError();
+    break;
+
   case Builtin::BI__builtin_get_device_side_mangled_name: {
     auto Check = [](CallExpr *TheCall) {
       if (TheCall->getNumArgs() != 1)
diff --git a/clang/test/CodeGenCXX/debug-info-verbose-trap.cpp b/clang/test/CodeGenCXX/debug-info-verbose-trap.cpp
new file mode 100644
index 00000000000000..f492698ccab83d
--- /dev/null
+++ b/clang/test/CodeGenCXX/debug-info-verbose-trap.cpp
@@ -0,0 +1,54 @@
+// RUN: %clang_cc1 -triple arm64-apple-ios -std=c++20 -emit-llvm -debug-info-kind=limited %s -o - | FileCheck %s
+
+// CHECK-LABEL: define void @_Z2f0v()
+// CHECK: call void @llvm.trap(), !dbg ![[LOC17:.*]]
+
+// CHECK: declare void @llvm.trap() #[[ATTR1:.*]]
+
+// CHECK-LABEL: define void @_Z2f1v()
+// CHECK: call void @llvm.trap(), !dbg ![[LOC23:.*]]
+// CHECK: call void @llvm.trap(), !dbg ![[LOC25:.*]]
+
+// CHECK-LABEL: define void @_Z2f3v()
+// CHECK: call void @_Z2f2IXadsoKcL_ZL8constCatEEEXadsoS0_L_ZL8constMsgEEEEvv()
+
+// CHECK-LABEL: define internal void @_Z2f2IXadsoKcL_ZL8constCatEEEXadsoS0_L_ZL8constMsgEEEEvv
+// CHECK: call void @llvm.trap(), !dbg ![[LOC36:.*]]
+
+// CHECK: attributes #[[ATTR1]] = { cold {{.*}}}
+
+// CHECK: ![[FILESCOPE:.*]] = !DIFile(filename: "{{.*}}debug-info-verbose-trap.cpp"
+
+char const constCat[] = "category2";
+char const constMsg[] = "hello";
+
+// CHECK: ![[SUBPROG14:.*]] = distinct !DISubprogram(name: "f0", linkageName: "_Z2f0v",
+// CHECK: ![[LOC17]] = !DILocation(line: 0, scope: ![[SUBPROG18:.*]], inlinedAt: ![[LOC20:.*]])
+// CHECK: ![[SUBPROG18]] = distinct !DISubprogram(name: "__clang_trap_msg$category1$Argument_must_not_be_null", scope: ![[FILESCOPE]], file: ![[FILESCOPE]], type: !{{.*}}, flags: DIFlagArtificial, spFlags: DISPFlagDefinition, unit: !{{.*}})
+// CHECK: ![[LOC20]] = !DILocation(line: [[@LINE+2]], column: 3, scope: ![[SUBPROG14]])
+void f0() {
+  __builtin_verbose_trap("category1", "Argument_must_not_be_null");
+}
+
+// CHECK: ![[SUBPROG22:.*]] = distinct !DISubprogram(name: "f1", linkageName: "_Z2f1v",
+// CHECK: ![[LOC23]] = !DILocation(line: 0, scope: ![[SUBPROG18]], inlinedAt: ![[LOC24:.*]])
+// CHECK: ![[LOC24]] = !DILocation(line: [[@LINE+5]], column: 3, scope: ![[SUBPROG22]])
+// CHECK: ![[LOC25]] = !DILocation(line: 0, scope: ![[SUBPROG26:.*]], inlinedAt: ![[LOC27:.*]])
+// CHECK: ![[SUBPROG26]] = distinct !DISubprogram(name: "__clang_trap_msg$category2$hello", scope: ![[FILESCOPE]], file: ![[FILESCOPE]], type: !{{.*}}, flags: DIFlagArtificial, spFlags: DISPFlagDefinition, unit: !{{.*}})
+// CHECK: ![[LOC27]] = !DILocation(line: [[@LINE+3]], column: 3, scope: ![[SUBPROG22]])
+void f1() {
+  __builtin_verbose_trap("category1", "Argument_must_not_be_null");
+  __builtin_verbose_trap("category2", "hello");
+}
+
+// CHECK: ![[SUBPROG32:.*]] = distinct !DISubprogram(name: "f2<constCat, constMsg>", linkageName: "_Z2f2IXadsoKcL_ZL8constCatEEEXadsoS0_L_ZL8constMsgEEEEvv",
+// CHECK: ![[LOC36]] = !DILocation(line: 0, scope: ![[SUBPROG26]], inlinedAt: ![[LOC37:.*]])
+// CHECK: ![[LOC37]] = !DILocation(line: [[@LINE+3]], column: 3, scope: ![[SUBPROG32]])
+template <const char * const category, const char * const reason>
+void f2() {
+  __builtin_verbose_trap(category, reason);
+}
+
+void f3() {
+  f2<constCat, constMsg>();
+}
diff --git a/clang/test/SemaCXX/verbose-trap.cpp b/clang/test/SemaCXX/verbose-trap.cpp
new file mode 100644
index 00000000000000..2503f9860d9c34
--- /dev/null
+++ b/clang/test/SemaCXX/verbose-trap.cpp
@@ -0,0 +1,47 @@
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only -fcxx-exceptions -verify %s
+// RUN: %clang_cc1 -std=c++20 -fsyntax-only -fcxx-exceptions -verify %s
+
+#if !__has_builtin(__builtin_verbose_trap)
+#error
+#endif
+
+constexpr char const* constCat1 = "cat";
+char const* const constCat2 = "cat";
+char const constCat3[] = "cat";
+
+constexpr char const* constMsg1 = "hello";
+char const* const constMsg2 = "hello";
+char const constMsg3[] = "hello";
+
+template <const char * const category, const char * const reason>
+void f(const char * arg) {
+  __builtin_verbose_trap("cat1", "Arbitrary string literals can be used!");
+  __builtin_verbose_trap(" cat1 ", "Argument_must_not_be_null");
+  __builtin_verbose_trap("cat" "egory1", "hello" "world");
+  __builtin_verbose_trap(constCat1, constMsg1);
+  __builtin_verbose_trap(constCat2, constMsg2);
+  __builtin_verbose_trap("", "");
+  __builtin_verbose_trap(); // expected-error {{too few arguments}}
+  __builtin_verbose_trap(""); // expected-error {{too few arguments}}
+  __builtin_verbose_trap("", "", ""); // expected-error {{too many arguments}}
+  __builtin_verbose_trap("", 0); // expected-error {{argument to __builtin_verbose_trap must be a pointer to a constant string}}
+  __builtin_verbose_trap(1, ""); // expected-error {{cannot initialize a parameter of type 'const char *' with an rvalue of type 'int'}}
+  __builtin_verbose_trap(arg, ""); // expected-error {{argument to __builtin_verbose_trap must be a pointer to a constant string}}
+  __builtin_verbose_trap("cat$1", "hel$lo"); // expected-error 2 {{argument to __builtin_verbose_trap must not contain $}}
+  __builtin_verbose_trap(category, reason);
+  __builtin_verbose_trap(u8"cat1", u8"hello");
+#if __cplusplus >= 202002L
+  // FIXME: Accept c++20 u8 string literals.
+  // expected-error@-3 {{cannot initialize a parameter of type 'const char *' with an lvalue of type 'const char8_t[5]'}}
+#endif
+  __builtin_verbose_trap("", "abcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcdabcd");
+}
+
+template <const char * const category>
+void f2() {
+  __builtin_verbose_trap(category, 1); // expected-error {{cannot initialize a parameter of type 'const char *' with an rvalue of type 'int'}}
+}
+
+void test() {
+  f<constCat3, constMsg3>(nullptr);
+}

From bb075eeb892374a209953ad20e02c1324e272679 Mon Sep 17 00:00:00 2001
From: Jakub Mazurkiewicz <mazkuba3@gmail.com>
Date: Tue, 25 Jun 2024 17:43:15 +0200
Subject: [PATCH 13/30] [libc++] LWG3382: NTTP for `pair` and `array` (#85811)

Mark LWG3382 as "Nothing To Do" and add tests.
---
 .../nttp.equivalence.compile.pass.cpp         | 70 +++++++++++++
 .../array/array.overview/nttp.verify.cpp      | 81 ++++++++++++++++
 .../nttp.equivalence.compile.pass.cpp         | 74 ++++++++++++++
 .../utility/pairs/pairs.pair/nttp.verify.cpp  | 97 +++++++++++++++++++
 4 files changed, 322 insertions(+)
 create mode 100644 libcxx/test/std/containers/sequences/array/array.overview/nttp.equivalence.compile.pass.cpp
 create mode 100644 libcxx/test/std/containers/sequences/array/array.overview/nttp.verify.cpp
 create mode 100644 libcxx/test/std/utilities/utility/pairs/pairs.pair/nttp.equivalence.compile.pass.cpp
 create mode 100644 libcxx/test/std/utilities/utility/pairs/pairs.pair/nttp.verify.cpp

diff --git a/libcxx/test/std/containers/sequences/array/array.overview/nttp.equivalence.compile.pass.cpp b/libcxx/test/std/containers/sequences/array/array.overview/nttp.equivalence.compile.pass.cpp
new file mode 100644
index 00000000000000..29e10fd40bd3ff
--- /dev/null
+++ b/libcxx/test/std/containers/sequences/array/array.overview/nttp.equivalence.compile.pass.cpp
@@ -0,0 +1,70 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+
+// <array>
+
+// LWG-3382 NTTP for pair and array:
+// Two values a1 and a2 of type array<T, N> are template-argument-equivalent if and only if each pair of corresponding
+// elements in a1 and a2 are template-argument-equivalent.
+
+#include <array>
+
+#include <type_traits>
+
+namespace test_full_type {
+template <class T, std::size_t S, std::array<T, S> A>
+struct test : std::false_type {};
+
+template <>
+struct test<int, 3, std::array<int, 3>{1, 2, 3}> : std::true_type {};
+
+static_assert(!test<int*, 4, std::array<int*, 4>{}>::value);
+static_assert(!test<int*, 3, std::array<int*, 3>{}>::value);
+static_assert(!test<int, 3, std::array<int, 3>{}>::value);
+static_assert(!test<int, 3, std::array<int, 3>{1}>::value);
+static_assert(!test<int, 3, std::array<int, 3>{1, 2}>::value);
+static_assert(!test<long, 3, std::array<long, 3>{1, 2, 3}>::value);
+static_assert(!test<unsigned int, 3, std::array<unsigned int, 3>{1, 2, 3}>::value);
+static_assert(test<int, 3, std::array<int, 3>{1, 2, 3}>::value);
+} // namespace test_full_type
+
+namespace test_ctad {
+template <std::array A>
+struct test : std::false_type {};
+
+template <>
+struct test<std::array<int, 3>{4, 5, 6}> : std::true_type {};
+
+static_assert(!test<std::array<int*, 4>{}>::value);
+static_assert(!test<std::array<int*, 3>{}>::value);
+static_assert(!test<std::array<int, 3>{}>::value);
+static_assert(!test<std::array<int, 3>{4}>::value);
+static_assert(!test<std::array<int, 3>{4, 5}>::value);
+static_assert(!test<std::array<long, 3>{4, 5, 6}>::value);
+static_assert(!test<std::array<unsigned int, 3>{4, 5, 6}>::value);
+static_assert(test<std::array<int, 3>{4, 5, 6}>::value);
+} // namespace test_ctad
+
+namespace test_auto {
+template <auto A>
+struct test : std::false_type {};
+
+template <>
+struct test<std::array<int, 3>{7, 8, 9}> : std::true_type {};
+
+static_assert(!test<std::array<int*, 4>{}>::value);
+static_assert(!test<std::array<int*, 3>{}>::value);
+static_assert(!test<std::array<int, 3>{}>::value);
+static_assert(!test<std::array<int, 3>{7}>::value);
+static_assert(!test<std::array<int, 3>{7, 8}>::value);
+static_assert(!test<std::array<long, 3>{7, 8, 9}>::value);
+static_assert(!test<std::array<unsigned int, 3>{7, 8, 9}>::value);
+static_assert(test<std::array<int, 3>{7, 8, 9}>::value);
+} // namespace test_auto
diff --git a/libcxx/test/std/containers/sequences/array/array.overview/nttp.verify.cpp b/libcxx/test/std/containers/sequences/array/array.overview/nttp.verify.cpp
new file mode 100644
index 00000000000000..3eb8e2596f85bf
--- /dev/null
+++ b/libcxx/test/std/containers/sequences/array/array.overview/nttp.verify.cpp
@@ -0,0 +1,81 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+
+// <array>
+
+// LWG-3382 NTTP for pair and array:
+// array<T, N> is a structural type ([temp.param]) if T is a structural type.
+
+#include <array>
+
+#include <cstddef>
+#include <string>
+
+struct LiteralBase {};
+struct LiteralNSDM {};
+
+struct LiteralType : LiteralBase {
+  LiteralNSDM nsdm;
+};
+
+struct NotALiteral {
+  NotALiteral() {}
+};
+
+int i;
+NotALiteral not_a_literal;
+
+namespace test_full_type {
+template <class T, std::size_t S, std::array<T, S> A>
+struct test {};
+
+using A = test<int, 2, std::array{2, 3}>;
+using B = test<LiteralType, 0, std::array<LiteralType, 0>{}>;
+using C = test<int*, 1, std::array<int*, 1>{&i}>;
+using D = test<NotALiteral*, 1, std::array<NotALiteral*, 1>{&not_a_literal}>;
+
+using E = test<NotALiteral, 1, std::array<NotALiteral, 1>{}>;
+// expected-error-re@*:* {{non-type template parameter has non-literal type 'std::array<NotALiteral, 1U{{L{0,2}.*}}>'}}
+
+using F = test<std::string, 2, std::array<std::string, 2>{}>;
+// expected-error-re@*:* {{type 'std::array<{{(std::)?}}string, 2U{{L{0,2}.*}}>' {{(\(aka 'array<basic_string<char>, 2UL{0,2}>'\) )?}}of non-type template parameter is not a structural type}}
+} // namespace test_full_type
+
+namespace test_ctad {
+template <std::array A>
+struct test {};
+
+using A = test<std::array{2, 3}>;
+using B = test<std::array<LiteralType, 0>{}>;
+using C = test<std::array<int*, 1>{&i}>;
+using D = test<std::array<NotALiteral*, 1>{&not_a_literal}>;
+
+using E = test<std::array<NotALiteral, 1>{}>;
+// expected-error@-1 {{non-type template parameter has non-literal type 'std::array<NotALiteral, 1>'}}
+
+using F = test<std::array<std::string, 2>{}>;
+// expected-error@-1 {{type 'std::array<string, 2>' (aka 'std::array<std::string, 2>') of non-type template parameter is not a structural type}}
+} // namespace test_ctad
+
+namespace test_auto {
+template <auto A>
+struct test {};
+
+using A = test<std::array{2, 3}>;
+using B = test<std::array<LiteralType, 0>{}>;
+using C = test<std::array<int*, 1>{&i}>;
+using D = test<std::array<NotALiteral*, 1>{&not_a_literal}>;
+
+using E = test<std::array<NotALiteral, 1>{}>;
+// expected-error@-1 {{non-type template parameter has non-literal type 'std::array<NotALiteral, 1>'}}
+
+using F = test<std::array<std::string, 2>{}>;
+// expected-error@-1 {{type 'std::array<std::string, 2>' (aka 'array<basic_string<char>, 2>') of non-type template parameter is not a structural type}}
+} // namespace test_auto
diff --git a/libcxx/test/std/utilities/utility/pairs/pairs.pair/nttp.equivalence.compile.pass.cpp b/libcxx/test/std/utilities/utility/pairs/pairs.pair/nttp.equivalence.compile.pass.cpp
new file mode 100644
index 00000000000000..db45a56feb88aa
--- /dev/null
+++ b/libcxx/test/std/utilities/utility/pairs/pairs.pair/nttp.equivalence.compile.pass.cpp
@@ -0,0 +1,74 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+// UNSUPPORTED: clang-17
+
+// <utility>
+
+// LWG-3382 NTTP for pair and array:
+// Two values p1 and p2 of type pair<T, U> are template-argument-equivalent ([temp.type]) if and only if
+// p1.first and p2.first are template-argument-equivalent and p1.second and p2.second are template-argument-equivalent.
+
+#include <utility>
+
+#include <type_traits>
+
+int i = 0;
+int j = 1;
+
+namespace test_full_type {
+template <class T, class U, std::pair<T, U> P>
+struct test : std::false_type {};
+
+template <>
+struct test<int&, int, std::pair<int&, int>{i, 5}> : std::true_type {};
+
+static_assert(!test<int*, int*, std::pair<int*, int*>{}>::value);
+static_assert(!test<int*, int, std::pair<int*, int>{}>::value);
+static_assert(!test<int&, int*, std::pair<int&, int*>{i, nullptr}>::value);
+static_assert(!test<int&, int, std::pair<int&, int>{j, 0}>::value);
+static_assert(!test<int&, int, std::pair<int&, int>{j, 5}>::value);
+static_assert(!test<int&, int, std::pair<int&, int>{i, 0}>::value);
+static_assert(!test<int&, unsigned int, std::pair<int&, unsigned int>{j, 0}>::value);
+static_assert(test<int&, int, std::pair<int&, int>{i, 5}>::value);
+} // namespace test_full_type
+
+namespace test_ctad {
+template <std::pair P>
+struct test : std::false_type {};
+
+template <>
+struct test<std::pair<int&, int>{i, 10}> : std::true_type {};
+
+static_assert(!test<std::pair<int*, int*>{}>::value);
+static_assert(!test<std::pair<int*, int>{}>::value);
+static_assert(!test<std::pair<int&, int*>{i, nullptr}>::value);
+static_assert(!test<std::pair<int&, int>{j, 0}>::value);
+static_assert(!test<std::pair<int&, int>{j, 10}>::value);
+static_assert(!test<std::pair<int&, int>{i, 0}>::value);
+static_assert(!test<std::pair<int&, unsigned int>{j, 0}>::value);
+static_assert(test<std::pair<int&, int>{i, 10}>::value);
+} // namespace test_ctad
+
+namespace test_auto {
+template <auto P>
+struct test : std::false_type {};
+
+template <>
+struct test<std::pair<int&, int>{i, 15}> : std::true_type {};
+
+static_assert(!test<std::pair<int*, int*>{}>::value);
+static_assert(!test<std::pair<int*, int>{}>::value);
+static_assert(!test<std::pair<int&, int*>{i, nullptr}>::value);
+static_assert(!test<std::pair<int&, int>{j, 0}>::value);
+static_assert(!test<std::pair<int&, int>{j, 15}>::value);
+static_assert(!test<std::pair<int&, int>{i, 0}>::value);
+static_assert(!test<std::pair<int&, unsigned int>{j, 0}>::value);
+static_assert(test<std::pair<int&, int>{i, 15}>::value);
+} // namespace test_auto
diff --git a/libcxx/test/std/utilities/utility/pairs/pairs.pair/nttp.verify.cpp b/libcxx/test/std/utilities/utility/pairs/pairs.pair/nttp.verify.cpp
new file mode 100644
index 00000000000000..ac081495a62052
--- /dev/null
+++ b/libcxx/test/std/utilities/utility/pairs/pairs.pair/nttp.verify.cpp
@@ -0,0 +1,97 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14, c++17
+// UNSUPPORTED: clang-17
+
+// <utility>
+
+// LWG-3382 NTTP for pair and array:
+// pair<T, U> is a structural type ([temp.param]) if T and U are both structural types.
+
+#include <utility>
+
+#include <functional>
+#include <string>
+
+struct LiteralBase {};
+struct LiteralNSDM {};
+
+struct LiteralType : LiteralBase {
+  LiteralNSDM nsdm;
+};
+
+struct NotALiteral {
+  NotALiteral() {}
+};
+
+int i;
+NotALiteral not_a_literal;
+
+namespace test_full_type {
+template <class T, class U, std::pair<T, U> P>
+struct test {};
+
+using A = test<int, int, std::pair{0, 1}>;
+using B = test<int&, int&, std::make_pair(std::ref(i), std::ref(i))>;
+using C = test<const int&, const int&, std::make_pair(std::cref(i), std::cref(i))>;
+using D = test<LiteralType, LiteralType, std::pair<LiteralType, LiteralType>{}>;
+using E = test<int*, int*, std::pair<int*, int*>{&i, &i}>;
+using F = test<NotALiteral&, NotALiteral&, std::make_pair(std::ref(not_a_literal), std::ref(not_a_literal))>;
+
+using G = test<int&&, int&&, std::pair<int&&, int&&>{std::move(i), std::move(i)}>;
+// expected-error@*:* {{type 'std::pair<int &&, int &&>' of non-type template parameter is not a structural type}}
+
+using H = test<NotALiteral, NotALiteral, std::pair<NotALiteral, NotALiteral>{}>;
+// expected-error@*:* {{non-type template parameter has non-literal type 'std::pair<NotALiteral, NotALiteral>'}}
+
+using I = test<std::string, std::string, std::pair<std::string, std::string>{}>;
+// expected-error-re@*:* {{type 'std::pair<{{(std::)?}}string, {{(std::)?}}string>' {{(\(aka 'pair<basic_string<char>, basic_string<char>>'\) )?}}of non-type template parameter is not a structural type}}
+} // namespace test_full_type
+
+namespace test_ctad {
+template <std::pair P>
+struct test {};
+
+using A = test<std::pair{2, 3}>;
+using B = test<std::make_pair(std::ref(i), std::ref(i))>;
+using C = test<std::make_pair(std::cref(i), std::cref(i))>;
+using D = test<std::pair<LiteralType, LiteralType>{}>;
+using E = test<std::pair<int*, int*>{&i, &i}>;
+using F = test<std::make_pair(std::ref(not_a_literal), std::ref(not_a_literal))>;
+
+using G = test<std::pair<int&&, int&&>{std::move(i), std::move(i)}>;
+// expected-error@-1 {{type 'std::pair<int &&, int &&>' of non-type template parameter is not a structural type}}
+
+using H = test<std::pair<NotALiteral, NotALiteral>{}>;
+// expected-error@-1 {{non-type template parameter has non-literal type 'std::pair<NotALiteral, NotALiteral>'}}
+
+using I = test<std::pair<std::string, std::string>{}>;
+// expected-error@-1 {{type 'std::pair<string, string>' (aka 'std::pair<std::string, std::string>') of non-type template parameter is not a structural type}}
+} // namespace test_ctad
+
+namespace test_auto {
+template <auto P>
+struct test {};
+
+using A = test<std::pair{4, 5}>;
+using B = test<std::make_pair(std::ref(i), std::ref(i))>;
+using C = test<std::make_pair(std::cref(i), std::cref(i))>;
+using D = test<std::pair<LiteralType, LiteralType>{}>;
+using E = test<std::pair<int*, int*>{&i, &i}>;
+using F = test<std::make_pair(std::ref(not_a_literal), std::ref(not_a_literal))>;
+
+using G = test<std::pair<int&&, int&&>{std::move(i), std::move(i)}>;
+// expected-error@-1 {{type 'std::pair<int &&, int &&>' of non-type template parameter is not a structural type}}
+
+using H = test<std::pair<NotALiteral, NotALiteral>{}>;
+// expected-error@-1 {{non-type template parameter has non-literal type 'std::pair<NotALiteral, NotALiteral>'}}
+
+using I = test<std::pair<std::string, std::string>{}>;
+// expected-error@-1 {{type 'std::pair<std::string, std::string>' (aka 'pair<basic_string<char>, basic_string<char>>') of non-type template parameter is not a structural type}}
+} // namespace test_auto

From 889f3c5741e78ce90b4dcc643bb15d992225a67c Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 25 Jun 2024 17:45:34 +0200
Subject: [PATCH 14/30] AMDGPU: Handle legal v2bf16 atomicrmw fadd for gfx12
 (#95930)

Annoyingly gfx90a/940 support this for global/flat but not buffer.
---
 llvm/lib/Target/AMDGPU/AMDGPU.td              |  12 +-
 llvm/lib/Target/AMDGPU/BUFInstructions.td     |   7 +-
 llvm/lib/Target/AMDGPU/GCNSubtarget.h         |   5 +
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     |   7 +-
 .../buffer-fat-pointer-atomicrmw-fadd.ll      | 147 ++----------------
 5 files changed, 36 insertions(+), 142 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index 4a1959cc0d8bad..63d83346528ab8 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -768,6 +768,12 @@ def FeatureAtomicGlobalPkAddBF16Inst : SubtargetFeature<"atomic-global-pk-add-bf
  [FeatureFlatGlobalInsts]
 >;
 
+def FeatureAtomicBufferPkAddBF16Inst : SubtargetFeature<"atomic-buffer-pk-add-bf16-inst",
+ "HasAtomicBufferPkAddBF16Inst",
+ "true",
+ "Has buffer_atomic_pk_add_bf16 instruction"
+>;
+
 def FeatureAtomicCSubNoRtnInsts : SubtargetFeature<"atomic-csub-no-rtn-insts",
   "HasAtomicCSubNoRtnInsts",
   "true",
@@ -1599,6 +1605,7 @@ def FeatureISAVersion12 : FeatureSet<
    FeatureAtomicFlatPkAdd16Insts,
    FeatureAtomicBufferGlobalPkAddF16Insts,
    FeatureAtomicGlobalPkAddBF16Inst,
+   FeatureAtomicBufferPkAddBF16Inst,
    FeatureFlatAtomicFaddF32Inst,
    FeatureImageInsts,
    FeatureExtendedImageInsts,
@@ -2177,7 +2184,10 @@ def HasAtomicBufferGlobalPkAddF16Insts
   AssemblerPredicate<(all_of FeatureAtomicBufferGlobalPkAddF16Insts)>;
 def HasAtomicGlobalPkAddBF16Inst
   : Predicate<"Subtarget->hasAtomicGlobalPkAddBF16Inst()">,
-  AssemblerPredicate<(all_of FeatureAtomicGlobalPkAddBF16Inst)>;
+    AssemblerPredicate<(all_of FeatureAtomicGlobalPkAddBF16Inst)>;
+def HasAtomicBufferPkAddBF16Inst
+  : Predicate<"Subtarget->hasAtomicBufferPkAddBF16Inst()">,
+    AssemblerPredicate<(all_of FeatureAtomicBufferPkAddBF16Inst)>;
 def HasFlatAtomicFaddF32Inst
   : Predicate<"Subtarget->hasFlatAtomicFaddF32Inst()">,
   AssemblerPredicate<(all_of FeatureFlatAtomicFaddF32Inst)>;
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index cda4b57d9b0ed1..3b8d94b7440008 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -1241,7 +1241,9 @@ let SubtargetPredicate = isGFX12Plus in {
 defm BUFFER_ATOMIC_COND_SUB_U32 : MUBUF_Pseudo_Atomics <
   "buffer_atomic_cond_sub_u32", VGPR_32, i32
 >;
+}
 
+let SubtargetPredicate = HasAtomicBufferPkAddBF16Inst in {
 let FPAtomic = 1 in
 defm BUFFER_ATOMIC_PK_ADD_BF16 : MUBUF_Pseudo_Atomics <
   "buffer_atomic_pk_add_bf16", VGPR_32, v2bf16
@@ -1747,8 +1749,11 @@ defm : SIBufferAtomicPat<"SIbuffer_atomic_dec", i64, "BUFFER_ATOMIC_DEC_X2">;
 let SubtargetPredicate = HasAtomicCSubNoRtnInsts in
 defm : SIBufferAtomicPat<"SIbuffer_atomic_csub", i32, "BUFFER_ATOMIC_CSUB", ["noret"]>;
 
+let SubtargetPredicate = HasAtomicBufferPkAddBF16Inst in {
+  defm : SIBufferAtomicPat<"SIbuffer_atomic_fadd", v2bf16, "BUFFER_ATOMIC_PK_ADD_BF16">;
+}
+
 let SubtargetPredicate = isGFX12Plus in {
-  defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_fadd", v2bf16, "BUFFER_ATOMIC_PK_ADD_BF16_VBUFFER">;
   defm : SIBufferAtomicPat_Common<"SIbuffer_atomic_cond_sub_u32", i32, "BUFFER_ATOMIC_COND_SUB_U32_VBUFFER", ["ret"]>;
 }
 
diff --git a/llvm/lib/Target/AMDGPU/GCNSubtarget.h b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
index bb0746c5b5365d..07ff855756ec9d 100644
--- a/llvm/lib/Target/AMDGPU/GCNSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/GCNSubtarget.h
@@ -171,6 +171,7 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
   bool HasAtomicBufferGlobalPkAddF16Insts = false;
   bool HasAtomicCSubNoRtnInsts = false;
   bool HasAtomicGlobalPkAddBF16Inst = false;
+  bool HasAtomicBufferPkAddBF16Inst = false;
   bool HasFlatAtomicFaddF32Inst = false;
   bool HasDefaultComponentZero = false;
   bool HasDefaultComponentBroadcast = false;
@@ -864,6 +865,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
     return HasAtomicGlobalPkAddBF16Inst;
   }
 
+  bool hasAtomicBufferPkAddBF16Inst() const {
+    return HasAtomicBufferPkAddBF16Inst;
+  }
+
   bool hasFlatAtomicFaddF32Inst() const { return HasFlatAtomicFaddF32Inst; }
 
   bool hasDefaultComponentZero() const { return HasDefaultComponentZero; }
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index d0b097d2b8febe..35774e44aba2e9 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -16175,9 +16175,10 @@ SITargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
       if (Subtarget->hasAtomicBufferGlobalPkAddF16Insts() && isHalf2(Ty))
         return AtomicExpansionKind::None;
 
-      // TODO: Handle <2 x bfloat> case. While gfx90a/gfx940 supports it for
-      // global/flat, it does not for buffer. gfx12 does have the buffer
-      // version.
+      // While gfx90a/gfx940 supports v2bf16 for global/flat, it does not for
+      // buffer. gfx12 does have the buffer version.
+      if (Subtarget->hasAtomicBufferPkAddBF16Inst() && isBFloat2(Ty))
+        return AtomicExpansionKind::None;
     }
 
     if (unsafeFPAtomicsDisabled(RMW->getFunction()))
diff --git a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fadd.ll b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fadd.ll
index 6cec55bf3d4f34..f8f85a56a9b2fd 100644
--- a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fadd.ll
+++ b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointer-atomicrmw-fadd.ll
@@ -5611,50 +5611,11 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset(ptr add
 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, s4
-; GFX12-NEXT:    s_addk_co_i32 s4, 0x400
-; GFX12-NEXT:    s_mov_b32 s5, 0
-; GFX12-NEXT:    v_mov_b32_e32 v4, s4
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX12-NEXT:    v_lshlrev_b32_e32 v2, 16, v1
-; GFX12-NEXT:    buffer_load_b32 v0, v0, s[0:3], null offen offset:1024
-; GFX12-NEXT:    v_and_b32_e32 v3, 0xffff0000, v1
-; GFX12-NEXT:  .LBB15_1: ; %atomicrmw.start
-; GFX12-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_mov_b32_e32 v6, v0
+; GFX12-NEXT:    v_mov_b32_e32 v1, s4
 ; GFX12-NEXT:    s_wait_storecnt 0x0
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_and_b32_e32 v1, 0xffff0000, v6
-; GFX12-NEXT:    v_add_f32_e32 v1, v1, v3
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX12-NEXT:    v_bfe_u32 v7, v1, 16, 1
-; GFX12-NEXT:    v_or_b32_e32 v9, 0x400000, v1
-; GFX12-NEXT:    v_cmp_u_f32_e32 vcc_lo, v1, v1
-; GFX12-NEXT:    v_add3_u32 v7, v7, v1, 0x7fff
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_dual_cndmask_b32 v1, v7, v9 :: v_dual_lshlrev_b32 v0, 16, v6
-; GFX12-NEXT:    v_add_f32_e32 v0, v0, v2
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX12-NEXT:    v_bfe_u32 v5, v0, 16, 1
-; GFX12-NEXT:    v_or_b32_e32 v8, 0x400000, v0
-; GFX12-NEXT:    v_cmp_u_f32_e64 s4, v0, v0
-; GFX12-NEXT:    v_add3_u32 v5, v5, v0, 0x7fff
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_cndmask_b32_e64 v0, v5, v8, s4
-; GFX12-NEXT:    v_perm_b32 v5, v1, v0, 0x7060302
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX12-NEXT:    v_dual_mov_b32 v0, v5 :: v_dual_mov_b32 v1, v6
-; GFX12-NEXT:    buffer_atomic_cmpswap_b32 v[0:1], v4, s[0:3], null offen th:TH_ATOMIC_RETURN
+; GFX12-NEXT:    buffer_atomic_pk_add_bf16 v0, v1, s[0:3], null offen offset:1024 th:TH_ATOMIC_RETURN
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
-; GFX12-NEXT:    v_cmp_eq_u32_e32 vcc_lo, v0, v6
-; GFX12-NEXT:    s_or_b32 s5, vcc_lo, s5
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s5
-; GFX12-NEXT:    s_cbranch_execnz .LBB15_1
-; GFX12-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset:
@@ -6033,46 +5994,11 @@ define void @buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset(ptr addrspace
 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_dual_mov_b32 v1, s4 :: v_dual_lshlrev_b32 v2, 16, v0
-; GFX12-NEXT:    s_addk_co_i32 s4, 0x400
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12-NEXT:    v_dual_mov_b32 v4, s4 :: v_dual_and_b32 v3, 0xffff0000, v0
-; GFX12-NEXT:    buffer_load_b32 v1, v1, s[0:3], null offen offset:1024
-; GFX12-NEXT:    s_mov_b32 s5, 0
-; GFX12-NEXT:  .LBB16_1: ; %atomicrmw.start
-; GFX12-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_and_b32_e32 v5, 0xffff0000, v1
-; GFX12-NEXT:    v_lshlrev_b32_e32 v0, 16, v1
+; GFX12-NEXT:    v_mov_b32_e32 v1, s4
+; GFX12-NEXT:    s_wait_storecnt 0x0
+; GFX12-NEXT:    buffer_atomic_pk_add_bf16 v0, v1, s[0:3], null offen offset:1024
 ; GFX12-NEXT:    s_wait_storecnt 0x0
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_dual_add_f32 v5, v5, v3 :: v_dual_add_f32 v0, v0, v2
-; GFX12-NEXT:    v_bfe_u32 v7, v5, 16, 1
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX12-NEXT:    v_bfe_u32 v6, v0, 16, 1
-; GFX12-NEXT:    v_or_b32_e32 v8, 0x400000, v0
-; GFX12-NEXT:    v_or_b32_e32 v9, 0x400000, v5
-; GFX12-NEXT:    v_cmp_u_f32_e32 vcc_lo, v5, v5
-; GFX12-NEXT:    v_add3_u32 v7, v7, v5, 0x7fff
-; GFX12-NEXT:    v_add3_u32 v6, v6, v0, 0x7fff
-; GFX12-NEXT:    v_cmp_u_f32_e64 s4, v0, v0
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX12-NEXT:    v_cndmask_b32_e32 v5, v7, v9, vcc_lo
-; GFX12-NEXT:    v_cndmask_b32_e64 v0, v6, v8, s4
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_perm_b32 v0, v5, v0, 0x7060302
-; GFX12-NEXT:    v_dual_mov_b32 v6, v1 :: v_dual_mov_b32 v5, v0
-; GFX12-NEXT:    buffer_atomic_cmpswap_b32 v[5:6], v4, s[0:3], null offen th:TH_ATOMIC_RETURN
-; GFX12-NEXT:    s_wait_loadcnt 0x0
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
-; GFX12-NEXT:    v_cmp_eq_u32_e32 vcc_lo, v5, v1
-; GFX12-NEXT:    v_mov_b32_e32 v1, v5
-; GFX12-NEXT:    s_or_b32 s5, vcc_lo, s5
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s5
-; GFX12-NEXT:    s_cbranch_execnz .LBB16_1
-; GFX12-NEXT:  ; %bb.2: ; %atomicrmw.end
-; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s5
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_noret_v2bf16__offset:
@@ -6442,8 +6368,8 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__waterf
 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 ; GFX12-NEXT:    s_wait_kmcnt 0x0
-; GFX12-NEXT:    v_add_nc_u32_e32 v7, 0x400, v4
 ; GFX12-NEXT:    s_mov_b32 s1, exec_lo
+; GFX12-NEXT:    s_wait_storecnt 0x0
 ; GFX12-NEXT:  .LBB17_1: ; =>This Inner Loop Header: Depth=1
 ; GFX12-NEXT:    v_readfirstlane_b32 s4, v0
 ; GFX12-NEXT:    v_readfirstlane_b32 s5, v1
@@ -6455,70 +6381,17 @@ define <2 x bfloat> @buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__waterf
 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
 ; GFX12-NEXT:    s_and_b32 s0, vcc_lo, s0
 ; GFX12-NEXT:    s_and_saveexec_b32 s0, s0
-; GFX12-NEXT:    buffer_load_b32 v6, v4, s[4:7], null offen offset:1024
+; GFX12-NEXT:    s_wait_loadcnt 0x0
+; GFX12-NEXT:    buffer_atomic_pk_add_bf16 v5, v4, s[4:7], null offen offset:1024 th:TH_ATOMIC_RETURN
+; GFX12-NEXT:    ; implicit-def: $vgpr0_vgpr1_vgpr2_vgpr3
 ; GFX12-NEXT:    ; implicit-def: $vgpr4
 ; GFX12-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
 ; GFX12-NEXT:    s_cbranch_execnz .LBB17_1
 ; GFX12-NEXT:  ; %bb.2:
 ; GFX12-NEXT:    s_mov_b32 exec_lo, s1
-; GFX12-NEXT:    v_lshlrev_b32_e32 v8, 16, v5
-; GFX12-NEXT:    v_and_b32_e32 v9, 0xffff0000, v5
-; GFX12-NEXT:    s_mov_b32 s1, 0
-; GFX12-NEXT:  .LBB17_3: ; %atomicrmw.start
-; GFX12-NEXT:    ; =>This Loop Header: Depth=1
-; GFX12-NEXT:    ; Child Loop BB17_4 Depth 2
 ; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_and_b32_e32 v5, 0xffff0000, v6
-; GFX12-NEXT:    v_lshlrev_b32_e32 v4, 16, v6
-; GFX12-NEXT:    s_mov_b32 s2, exec_lo
-; GFX12-NEXT:    s_wait_storecnt 0x0
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_dual_add_f32 v5, v5, v9 :: v_dual_add_f32 v4, v4, v8
-; GFX12-NEXT:    v_bfe_u32 v11, v5, 16, 1
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_2)
-; GFX12-NEXT:    v_bfe_u32 v10, v4, 16, 1
-; GFX12-NEXT:    v_or_b32_e32 v12, 0x400000, v4
-; GFX12-NEXT:    v_cmp_u_f32_e32 vcc_lo, v4, v4
-; GFX12-NEXT:    v_or_b32_e32 v13, 0x400000, v5
-; GFX12-NEXT:    v_add3_u32 v11, v11, v5, 0x7fff
-; GFX12-NEXT:    v_add3_u32 v10, v10, v4, 0x7fff
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX12-NEXT:    v_cndmask_b32_e32 v4, v10, v12, vcc_lo
-; GFX12-NEXT:    v_cmp_u_f32_e32 vcc_lo, v5, v5
-; GFX12-NEXT:    v_cndmask_b32_e32 v5, v11, v13, vcc_lo
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX12-NEXT:    v_perm_b32 v5, v5, v4, 0x7060302
-; GFX12-NEXT:    v_mov_b32_e32 v4, v5
-; GFX12-NEXT:    v_mov_b32_e32 v5, v6
-; GFX12-NEXT:  .LBB17_4: ; Parent Loop BB17_3 Depth=1
-; GFX12-NEXT:    ; => This Inner Loop Header: Depth=2
-; GFX12-NEXT:    v_readfirstlane_b32 s4, v0
-; GFX12-NEXT:    v_readfirstlane_b32 s5, v1
-; GFX12-NEXT:    v_readfirstlane_b32 s6, v2
-; GFX12-NEXT:    v_readfirstlane_b32 s7, v3
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX12-NEXT:    v_cmp_eq_u64_e32 vcc_lo, s[4:5], v[0:1]
-; GFX12-NEXT:    v_cmp_eq_u64_e64 s0, s[6:7], v[2:3]
-; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX12-NEXT:    s_and_b32 s0, vcc_lo, s0
-; GFX12-NEXT:    s_and_saveexec_b32 s0, s0
-; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    buffer_atomic_cmpswap_b32 v[4:5], v7, s[4:7], null offen th:TH_ATOMIC_RETURN
-; GFX12-NEXT:    s_xor_b32 exec_lo, exec_lo, s0
-; GFX12-NEXT:    s_cbranch_execnz .LBB17_4
-; GFX12-NEXT:  ; %bb.5: ; in Loop: Header=BB17_3 Depth=1
-; GFX12-NEXT:    s_mov_b32 exec_lo, s2
-; GFX12-NEXT:    s_wait_loadcnt 0x0
-; GFX12-NEXT:    v_cmp_eq_u32_e32 vcc_lo, v4, v6
-; GFX12-NEXT:    v_mov_b32_e32 v6, v4
+; GFX12-NEXT:    v_mov_b32_e32 v0, v5
 ; GFX12-NEXT:    global_inv scope:SCOPE_DEV
-; GFX12-NEXT:    s_or_b32 s1, vcc_lo, s1
-; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
-; GFX12-NEXT:    s_and_not1_b32 exec_lo, exec_lo, s1
-; GFX12-NEXT:    s_cbranch_execnz .LBB17_3
-; GFX12-NEXT:  ; %bb.6: ; %atomicrmw.end
-; GFX12-NEXT:    s_or_b32 exec_lo, exec_lo, s1
-; GFX12-NEXT:    v_mov_b32_e32 v0, v4
 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX940-LABEL: buffer_fat_ptr_agent_atomic_fadd_ret_v2bf16__offset__waterfall:

From b7768c5485844306cd3c8afeef609ddf9d79696a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder@redhat.com>
Date: Tue, 25 Jun 2024 15:33:53 +0200
Subject: [PATCH 15/30] [clang][Interp][NFC] Use delegate() to delegate to only
 initlist item

---
 clang/lib/AST/Interp/ByteCodeExprGen.cpp | 22 ++++------------------
 1 file changed, 4 insertions(+), 18 deletions(-)

diff --git a/clang/lib/AST/Interp/ByteCodeExprGen.cpp b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
index 69661a590b9c2d..7b5aaa02a22c5f 100644
--- a/clang/lib/AST/Interp/ByteCodeExprGen.cpp
+++ b/clang/lib/AST/Interp/ByteCodeExprGen.cpp
@@ -1234,7 +1234,7 @@ bool ByteCodeExprGen<Emitter>::visitInitList(ArrayRef<const Expr *> Inits,
     const Record *R = getRecord(E->getType());
 
     if (Inits.size() == 1 && E->getType() == Inits[0]->getType())
-      return this->visitInitializer(Inits[0]);
+      return this->delegate(Inits[0]);
 
     auto initPrimitiveField = [=](const Record::Field *FieldToInit,
                                   const Expr *Init, PrimType T) -> bool {
@@ -1329,22 +1329,8 @@ bool ByteCodeExprGen<Emitter>::visitInitList(ArrayRef<const Expr *> Inits,
   }
 
   if (T->isArrayType()) {
-    // Prepare composite return value.
-    if (!Initializing) {
-      if (GlobalDecl) {
-        std::optional<unsigned> GlobalIndex = P.createGlobal(E);
-        if (!GlobalIndex)
-          return false;
-        if (!this->emitGetPtrGlobal(*GlobalIndex, E))
-          return false;
-      } else {
-        std::optional<unsigned> LocalIndex = allocateLocal(E);
-        if (!LocalIndex)
-          return false;
-        if (!this->emitGetPtrLocal(*LocalIndex, E))
-          return false;
-      }
-    }
+    if (Inits.size() == 1 && E->getType() == Inits[0]->getType())
+      return this->delegate(Inits[0]);
 
     unsigned ElementIndex = 0;
     for (const Expr *Init : Inits) {
@@ -2150,7 +2136,7 @@ bool ByteCodeExprGen<Emitter>::VisitMaterializeTemporaryExpr(
 
   if (Initializing) {
     // We already have a value, just initialize that.
-    return this->visitInitializer(SubExpr);
+    return this->delegate(SubExpr);
   }
   // If we don't end up using the materialized temporary anyway, don't
   // bother creating it.

From e951bd0f51f8b077296f09d9c60ddf150048042f Mon Sep 17 00:00:00 2001
From: Vy Nguyen <vyng@google.com>
Date: Tue, 25 Jun 2024 12:01:17 -0400
Subject: [PATCH 16/30] Reapply  PR/87550 (again) (#95571)

New fixes:
- properly init the `std::optional<std::vector>` to an empty vector as
opposed to `{}` (which was effectively `std::nullopt`).

---------

Co-authored-by: Vy Nguyen <oontvoo@users.noreply.github.com>
---
 lldb/include/lldb/API/SBDebugger.h    |  2 +
 lldb/include/lldb/Symbol/TypeSystem.h |  1 +
 lldb/source/API/SBDebugger.cpp        |  4 ++
 lldb/source/Symbol/TypeSystem.cpp     | 11 +++++
 lldb/tools/lldb-dap/DAP.cpp           | 59 ++++++++++++++++++++++-----
 lldb/tools/lldb-dap/DAP.h             |  5 ++-
 lldb/tools/lldb-dap/lldb-dap.cpp      |  6 ++-
 7 files changed, 75 insertions(+), 13 deletions(-)

diff --git a/lldb/include/lldb/API/SBDebugger.h b/lldb/include/lldb/API/SBDebugger.h
index af19b1faf3bf51..84ea9c0f772e16 100644
--- a/lldb/include/lldb/API/SBDebugger.h
+++ b/lldb/include/lldb/API/SBDebugger.h
@@ -57,6 +57,8 @@ class LLDB_API SBDebugger {
 
   static const char *GetBroadcasterClass();
 
+  static bool SupportsLanguage(lldb::LanguageType language);
+
   lldb::SBBroadcaster GetBroadcaster();
 
   /// Get progress data from a SBEvent whose type is eBroadcastBitProgress.
diff --git a/lldb/include/lldb/Symbol/TypeSystem.h b/lldb/include/lldb/Symbol/TypeSystem.h
index b4025c173a1861..7d48f9b316138c 100644
--- a/lldb/include/lldb/Symbol/TypeSystem.h
+++ b/lldb/include/lldb/Symbol/TypeSystem.h
@@ -209,6 +209,7 @@ class TypeSystem : public PluginInterface,
   // TypeSystems can support more than one language
   virtual bool SupportsLanguage(lldb::LanguageType language) = 0;
 
+  static bool SupportsLanguageStatic(lldb::LanguageType language);
   // Type Completion
 
   virtual bool GetCompleteType(lldb::opaque_compiler_type_t type) = 0;
diff --git a/lldb/source/API/SBDebugger.cpp b/lldb/source/API/SBDebugger.cpp
index 7ef0d6efd4aaa5..29da7d33dd80b8 100644
--- a/lldb/source/API/SBDebugger.cpp
+++ b/lldb/source/API/SBDebugger.cpp
@@ -1742,3 +1742,7 @@ bool SBDebugger::InterruptRequested()   {
     return m_opaque_sp->InterruptRequested();
   return false;
 }
+
+bool SBDebugger::SupportsLanguage(lldb::LanguageType language) {
+  return TypeSystem::SupportsLanguageStatic(language);
+}
diff --git a/lldb/source/Symbol/TypeSystem.cpp b/lldb/source/Symbol/TypeSystem.cpp
index 4956f10a0b0a73..931ce1b0203a93 100644
--- a/lldb/source/Symbol/TypeSystem.cpp
+++ b/lldb/source/Symbol/TypeSystem.cpp
@@ -335,3 +335,14 @@ TypeSystemMap::GetTypeSystemForLanguage(lldb::LanguageType language,
   }
   return GetTypeSystemForLanguage(language);
 }
+
+bool TypeSystem::SupportsLanguageStatic(lldb::LanguageType language) {
+  if (language == eLanguageTypeUnknown || language >= eNumLanguageTypes)
+    return false;
+
+  LanguageSet languages =
+      PluginManager::GetAllTypeSystemSupportedLanguagesForTypes();
+  if (languages.Empty())
+    return false;
+  return languages[language];
+}
diff --git a/lldb/tools/lldb-dap/DAP.cpp b/lldb/tools/lldb-dap/DAP.cpp
index d419f821999e6c..0196aed819f2b4 100644
--- a/lldb/tools/lldb-dap/DAP.cpp
+++ b/lldb/tools/lldb-dap/DAP.cpp
@@ -32,14 +32,7 @@ namespace lldb_dap {
 DAP g_dap;
 
 DAP::DAP()
-    : broadcaster("lldb-dap"),
-      exception_breakpoints(
-          {{"cpp_catch", "C++ Catch", lldb::eLanguageTypeC_plus_plus},
-           {"cpp_throw", "C++ Throw", lldb::eLanguageTypeC_plus_plus},
-           {"objc_catch", "Objective-C Catch", lldb::eLanguageTypeObjC},
-           {"objc_throw", "Objective-C Throw", lldb::eLanguageTypeObjC},
-           {"swift_catch", "Swift Catch", lldb::eLanguageTypeSwift},
-           {"swift_throw", "Swift Throw", lldb::eLanguageTypeSwift}}),
+    : broadcaster("lldb-dap"), exception_breakpoints(),
       focus_tid(LLDB_INVALID_THREAD_ID), stop_at_entry(false), is_attach(false),
       enable_auto_variable_summaries(false),
       enable_synthetic_child_debugging(false),
@@ -65,8 +58,51 @@ DAP::DAP()
 
 DAP::~DAP() = default;
 
+void DAP::PopulateExceptionBreakpoints() {
+  llvm::call_once(init_exception_breakpoints_flag, [this]() {
+    exception_breakpoints = std::vector<ExceptionBreakpoint> {};
+    
+    if (lldb::SBDebugger::SupportsLanguage(lldb::eLanguageTypeC_plus_plus)) {
+      exception_breakpoints->emplace_back("cpp_catch", "C++ Catch",
+                                          lldb::eLanguageTypeC_plus_plus);
+      exception_breakpoints->emplace_back("cpp_throw", "C++ Throw",
+                                          lldb::eLanguageTypeC_plus_plus);
+    }
+    if (lldb::SBDebugger::SupportsLanguage(lldb::eLanguageTypeObjC)) {
+      exception_breakpoints->emplace_back("objc_catch", "Objective-C Catch",
+                                          lldb::eLanguageTypeObjC);
+      exception_breakpoints->emplace_back("objc_throw", "Objective-C Throw",
+                                          lldb::eLanguageTypeObjC);
+    }
+    if (lldb::SBDebugger::SupportsLanguage(lldb::eLanguageTypeSwift)) {
+      exception_breakpoints->emplace_back("swift_catch", "Swift Catch",
+                                          lldb::eLanguageTypeSwift);
+      exception_breakpoints->emplace_back("swift_throw", "Swift Throw",
+                                          lldb::eLanguageTypeSwift);
+    }
+    assert(!exception_breakpoints->empty() && "should not be empty");
+  });
+}
+
 ExceptionBreakpoint *DAP::GetExceptionBreakpoint(const std::string &filter) {
-  for (auto &bp : exception_breakpoints) {
+  // PopulateExceptionBreakpoints() is called after g_dap.debugger is created
+  // in a request-initialize.
+  //
+  // But this GetExceptionBreakpoint() method may be called before attaching, in
+  // which case, we may not have populated the filter yet.
+  //
+  // We also cannot call PopulateExceptionBreakpoints() in DAP::DAP() because
+  // we need SBDebugger::Initialize() to have been called before this.
+  //
+  // So just calling PopulateExceptionBreakoints(),which does lazy-populating
+  // seems easiest. Two other options include:
+  //  + call g_dap.PopulateExceptionBreakpoints() in lldb-dap.cpp::main()
+  //    right after the call to SBDebugger::Initialize()
+  //  + Just call PopulateExceptionBreakpoints() to get a fresh list  everytime
+  //    we query (a bit overkill since it's not likely to change?)
+  PopulateExceptionBreakpoints();
+
+  for (auto &bp : *exception_breakpoints) {
     if (bp.filter == filter)
       return &bp;
   }
@@ -74,7 +110,10 @@ ExceptionBreakpoint *DAP::GetExceptionBreakpoint(const std::string &filter) {
 }
 
 ExceptionBreakpoint *DAP::GetExceptionBreakpoint(const lldb::break_id_t bp_id) {
-  for (auto &bp : exception_breakpoints) {
+  // See comment in the other GetExceptionBreakpoint().
+  PopulateExceptionBreakpoints();
+
+  for (auto &bp : *exception_breakpoints) {
     if (bp.bp.GetID() == bp_id)
       return &bp;
   }
diff --git a/lldb/tools/lldb-dap/DAP.h b/lldb/tools/lldb-dap/DAP.h
index a88ee3e1dec6bc..37e57d58968d90 100644
--- a/lldb/tools/lldb-dap/DAP.h
+++ b/lldb/tools/lldb-dap/DAP.h
@@ -156,7 +156,8 @@ struct DAP {
   std::unique_ptr<std::ofstream> log;
   llvm::StringMap<SourceBreakpointMap> source_breakpoints;
   FunctionBreakpointMap function_breakpoints;
-  std::vector<ExceptionBreakpoint> exception_breakpoints;
+  std::optional<std::vector<ExceptionBreakpoint>> exception_breakpoints;
+  llvm::once_flag init_exception_breakpoints_flag;
   std::vector<std::string> init_commands;
   std::vector<std::string> pre_run_commands;
   std::vector<std::string> post_run_commands;
@@ -228,6 +229,8 @@ struct DAP {
 
   llvm::json::Value CreateTopLevelScopes();
 
+  void PopulateExceptionBreakpoints();
+
   /// \return
   ///   Attempt to determine if an expression is a variable expression or
   ///   lldb command using a hueristic based on the first term of the
diff --git a/lldb/tools/lldb-dap/lldb-dap.cpp b/lldb/tools/lldb-dap/lldb-dap.cpp
index 7746afb6cbbf38..470c9f84c6a203 100644
--- a/lldb/tools/lldb-dap/lldb-dap.cpp
+++ b/lldb/tools/lldb-dap/lldb-dap.cpp
@@ -16,6 +16,7 @@
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
+#include <optional>
 #include <sys/stat.h>
 #include <sys/types.h>
 #if defined(_WIN32)
@@ -1586,6 +1587,7 @@ void request_initialize(const llvm::json::Object &request) {
   bool source_init_file = GetBoolean(arguments, "sourceInitFile", true);
 
   g_dap.debugger = lldb::SBDebugger::Create(source_init_file, log_cb, nullptr);
+  g_dap.PopulateExceptionBreakpoints();
   auto cmd = g_dap.debugger.GetCommandInterpreter().AddMultiwordCommand(
       "lldb-dap", "Commands for managing lldb-dap.");
   if (GetBoolean(arguments, "supportsStartDebuggingRequest", false)) {
@@ -1621,7 +1623,7 @@ void request_initialize(const llvm::json::Object &request) {
   body.try_emplace("supportsEvaluateForHovers", true);
   // Available filters or options for the setExceptionBreakpoints request.
   llvm::json::Array filters;
-  for (const auto &exc_bp : g_dap.exception_breakpoints) {
+  for (const auto &exc_bp : *g_dap.exception_breakpoints) {
     filters.emplace_back(CreateExceptionBreakpointFilter(exc_bp));
   }
   body.try_emplace("exceptionBreakpointFilters", std::move(filters));
@@ -2476,7 +2478,7 @@ void request_setExceptionBreakpoints(const llvm::json::Object &request) {
   // Keep a list of any exception breakpoint filter names that weren't set
   // so we can clear any exception breakpoints if needed.
   std::set<std::string> unset_filters;
-  for (const auto &bp : g_dap.exception_breakpoints)
+  for (const auto &bp : *g_dap.exception_breakpoints)
     unset_filters.insert(bp.filter);
 
   for (const auto &value : *filters) {

From dca49d739de07b1755ad65aa26dacd2e2c22af20 Mon Sep 17 00:00:00 2001
From: "Nick Desaulniers (paternity leave)"
 <nickdesaulniers@users.noreply.github.com>
Date: Tue, 25 Jun 2024 09:04:19 -0700
Subject: [PATCH 17/30] [libc][arm32] define argc type and stack alignment
 (#96367)

https://github.com/ARM-software/abi-aa/blob/main/aapcs32/aapcs32.rst#6212stack-constraints-at-a-public-interface
mentions that the stack on ARM32 is double word aligned.

Remove confused comments around ArgcType. argc is always an int, passed on the
stack, so we need to store a pointer to it (regardless of ILP32 or LP64).
---
 libc/config/linux/app.h             | 24 +++---------------------
 libc/src/__support/threads/thread.h |  3 +++
 libc/startup/linux/do_start.cpp     |  4 ++--
 3 files changed, 8 insertions(+), 23 deletions(-)

diff --git a/libc/config/linux/app.h b/libc/config/linux/app.h
index 766cd49e88f6f7..2a3b1560817b8b 100644
--- a/libc/config/linux/app.h
+++ b/libc/config/linux/app.h
@@ -35,24 +35,6 @@ struct TLSImage {
   uintptr_t align;
 };
 
-#if defined(LIBC_TARGET_ARCH_IS_X86_64) ||                                     \
-    defined(LIBC_TARGET_ARCH_IS_AARCH64) ||                                    \
-    defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
-// At the language level, argc is an int. But we use uint64_t as the x86_64
-// ABI specifies it as an 8 byte value. Likewise, in the ARM64 ABI, arguments
-// are usually passed in registers.  x0 is a doubleword register, so this is
-// 64 bit for aarch64 as well.
-typedef uintptr_t ArgcType;
-
-// At the language level, argv is a char** value. However, we use uint64_t as
-// ABIs specify the argv vector be an |argc| long array of 8-byte values.
-typedef uintptr_t ArgVEntryType;
-
-typedef uintptr_t EnvironType;
-#else
-#error "argc and argv types are not defined for the target platform."
-#endif
-
 // Linux manpage on `proc(5)` says that the aux vector is an array of
 // unsigned long pairs.
 // (see: https://man7.org/linux/man-pages/man5/proc.5.html)
@@ -65,7 +47,7 @@ struct AuxEntry {
 };
 
 struct Args {
-  ArgcType argc;
+  uintptr_t argc;
 
   // A flexible length array would be more suitable here, but C++ doesn't have
   // flexible arrays: P1039 proposes to fix this. So, for now we just fake it.
@@ -73,7 +55,7 @@ struct Args {
   // (ISO C 5.1.2.2.1) so one is fine. Also, length of 1 is not really wrong as
   // |argc| is guaranteed to be atleast 1, and there is an 8-byte null entry at
   // the end of the argv array.
-  ArgVEntryType argv[1];
+  uintptr_t argv[1];
 };
 
 // Data structure which captures properties of a linux application.
@@ -87,7 +69,7 @@ struct AppProperties {
   TLSImage tls;
 
   // Environment data.
-  EnvironType *env_ptr;
+  uintptr_t *env_ptr;
 
   // Auxiliary vector data.
   AuxEntry *auxv_ptr;
diff --git a/libc/src/__support/threads/thread.h b/libc/src/__support/threads/thread.h
index acfe33879f8783..f89c687eeaa19b 100644
--- a/libc/src/__support/threads/thread.h
+++ b/libc/src/__support/threads/thread.h
@@ -43,6 +43,9 @@ union ThreadReturnValue {
      defined(LIBC_TARGET_ARCH_IS_X86_64) ||                                    \
      defined(LIBC_TARGET_ARCH_IS_ANY_RISCV))
 constexpr unsigned int STACK_ALIGNMENT = 16;
+#elif defined(LIBC_TARGET_ARCH_IS_ARM)
+// See Section 6.2.1.2 Stack constraints at a public interface of AAPCS32.
+constexpr unsigned int STACK_ALIGNMENT = 8;
 #endif
 // TODO: Provide stack alignment requirements for other architectures.
 
diff --git a/libc/startup/linux/do_start.cpp b/libc/startup/linux/do_start.cpp
index 3d7d32aead4fcf..30ab1f0e26ea58 100644
--- a/libc/startup/linux/do_start.cpp
+++ b/libc/startup/linux/do_start.cpp
@@ -69,8 +69,8 @@ static ThreadAttributes main_thread_attrib;
   // After the argv array, is a 8-byte long NULL value before the array of env
   // values. The end of the env values is marked by another 8-byte long NULL
   // value. We step over it (the "+ 1" below) to get to the env values.
-  ArgVEntryType *env_ptr = app.args->argv + app.args->argc + 1;
-  ArgVEntryType *env_end_marker = env_ptr;
+  uintptr_t *env_ptr = app.args->argv + app.args->argc + 1;
+  uintptr_t *env_end_marker = env_ptr;
   app.env_ptr = env_ptr;
   while (*env_end_marker)
     ++env_end_marker;

From fb07afedbebb0c9f2647d02c6f254245cdb84085 Mon Sep 17 00:00:00 2001
From: yonghong-song <yhs@fb.com>
Date: Tue, 25 Jun 2024 09:27:18 -0700
Subject: [PATCH 18/30] [BPF] Avoid potential long compilation time without -g
 (#96575)

Alastair Robertson reported a huge compilation time increase without -g
for bpf target when comparing to x86 ([1]). In my setup, with '-O0', for
x86, a large basic block compilation takes 0.19s while bpf target takes
2.46s. The top function which contributes to the compile time is
eliminateFrameIndex().

Such long compilation time without -g is caused by commit
  05de2e481811 ("[bpf] error when BPF stack size exceeds 512 bytes")
The compiler tries to get some debug loc by iterating all insns in the
basic block which will be used when compiler warns larger-than-512 stack
size. Even without -g, such iterating also happens which cause
unnecessary compile time increase.

To fix the issue, let us move the related code when the compiler is
about to warn stack limit violation. This fixed the compile time
regression, and on my system, the compile time is reduced from 2.46s to
0.35s.

  [1] https://github.com/bpftrace/bpftrace/issues/3257

Co-authored-by: Yonghong Song <yonghong.song@linux.dev>
---
 llvm/lib/Target/BPF/BPFRegisterInfo.cpp | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Target/BPF/BPFRegisterInfo.cpp b/llvm/lib/Target/BPF/BPFRegisterInfo.cpp
index 8761e4aa258c21..84af6806abb36c 100644
--- a/llvm/lib/Target/BPF/BPFRegisterInfo.cpp
+++ b/llvm/lib/Target/BPF/BPFRegisterInfo.cpp
@@ -47,9 +47,17 @@ BitVector BPFRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
   return Reserved;
 }
 
-static void WarnSize(int Offset, MachineFunction &MF, DebugLoc& DL)
-{
+static void WarnSize(int Offset, MachineFunction &MF, DebugLoc& DL,
+                     MachineBasicBlock& MBB) {
   if (Offset <= -BPFStackSizeOption) {
+    if (!DL)
+      /* try harder to get some debug loc */
+      for (auto &I : MBB)
+        if (I.getDebugLoc()) {
+          DL = I.getDebugLoc();
+          break;
+        }
+
     const Function &F = MF.getFunction();
     DiagnosticInfoUnsupported DiagStackSize(
         F,
@@ -73,14 +81,6 @@ bool BPFRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   MachineFunction &MF = *MBB.getParent();
   DebugLoc DL = MI.getDebugLoc();
 
-  if (!DL)
-    /* try harder to get some debug loc */
-    for (auto &I : MBB)
-      if (I.getDebugLoc()) {
-        DL = I.getDebugLoc();
-        break;
-      }
-
   while (!MI.getOperand(i).isFI()) {
     ++i;
     assert(i < MI.getNumOperands() && "Instr doesn't have FrameIndex operand!");
@@ -93,7 +93,7 @@ bool BPFRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   if (MI.getOpcode() == BPF::MOV_rr) {
     int Offset = MF.getFrameInfo().getObjectOffset(FrameIndex);
 
-    WarnSize(Offset, MF, DL);
+    WarnSize(Offset, MF, DL, MBB);
     MI.getOperand(i).ChangeToRegister(FrameReg, false);
     Register reg = MI.getOperand(i - 1).getReg();
     BuildMI(MBB, ++II, DL, TII.get(BPF::ADD_ri), reg)
@@ -108,7 +108,7 @@ bool BPFRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
   if (!isInt<32>(Offset))
     llvm_unreachable("bug in frame offset");
 
-  WarnSize(Offset, MF, DL);
+  WarnSize(Offset, MF, DL, MBB);
 
   if (MI.getOpcode() == BPF::FI_ri) {
     // architecture does not really support FI_ri, replace it with

From ac1e22f3053f761e4e2ef832b92de15876e68335 Mon Sep 17 00:00:00 2001
From: Stanley Winata <68087699+raikonenfnu@users.noreply.github.com>
Date: Tue, 25 Jun 2024 09:29:43 -0700
Subject: [PATCH 19/30] [mlir][vector] Generalize folding of ext-contractionOp
 to other types. (#96593)

Many state of the art models and quantization operations are now
directly working on vector.contract on integers.

This commit enables generalizes ext-contraction folding S.T we can emit
more performant vector.contracts on codegen pipelines.

Signed-off-by: Stanley Winata <stanley.winata@amd.com>
---
 .../Vector/Transforms/VectorTransforms.cpp    |  9 +++++---
 .../fold-arith-extf-into-vector-contract.mlir | 22 +++++++++++++++++++
 2 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
index eac6db585aad78..da3d9648cf2838 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorTransforms.cpp
@@ -1552,6 +1552,7 @@ struct CanonicalizeContractMatmulToMMT final
 /// Cores, i.e, `mma.sync.*.f32.f16.f16.f32` and `mma.sync.*.f32.bf16.bf16.f32`.
 /// This pattern folds the arithmetic extensions into the vector contraction and
 /// enables the usage of native mixed precision Tensor Core instructions.
+template <typename ExtOp>
 struct FoldArithExtIntoContractionOp
     : public OpRewritePattern<vector::ContractionOp> {
   using OpRewritePattern::OpRewritePattern;
@@ -1559,8 +1560,8 @@ struct FoldArithExtIntoContractionOp
   LogicalResult matchAndRewrite(vector::ContractionOp contractOp,
                                 PatternRewriter &rewriter) const override {
 
-    auto lhsDefOp = contractOp.getLhs().getDefiningOp<arith::ExtFOp>();
-    auto rhsDefOp = contractOp.getRhs().getDefiningOp<arith::ExtFOp>();
+    auto lhsDefOp = contractOp.getLhs().getDefiningOp<ExtOp>();
+    auto rhsDefOp = contractOp.getRhs().getDefiningOp<ExtOp>();
 
     if (!lhsDefOp || !rhsDefOp) {
       return rewriter.notifyMatchFailure(contractOp,
@@ -1895,7 +1896,9 @@ struct FoldArithToVectorOuterProduct : public OpRewritePattern<MulOpType> {
 
 void mlir::vector::populateFoldArithExtensionPatterns(
     RewritePatternSet &patterns) {
-  patterns.add<FoldArithExtIntoContractionOp>(patterns.getContext());
+  patterns.add<FoldArithExtIntoContractionOp<arith::ExtFOp>,
+               FoldArithExtIntoContractionOp<arith::ExtSIOp>>(
+      patterns.getContext());
 }
 
 void mlir::vector::populateVectorMaskMaterializationPatterns(
diff --git a/mlir/test/Dialect/Vector/fold-arith-extf-into-vector-contract.mlir b/mlir/test/Dialect/Vector/fold-arith-extf-into-vector-contract.mlir
index 31ae126906f216..6dbde7afbdd33b 100644
--- a/mlir/test/Dialect/Vector/fold-arith-extf-into-vector-contract.mlir
+++ b/mlir/test/Dialect/Vector/fold-arith-extf-into-vector-contract.mlir
@@ -48,3 +48,25 @@ func.func @fold_arith_extf_into_contract_scalable(
       %lhs_f32, %rhs_f32, %arg2 : vector<[64]x64xf32>, vector<64x64xf32> into vector<[64]x64xf32>
     return %result : vector<[64]x64xf32>
 }
+
+// -----
+
+// CHECK-LABEL: func.func @fold_arith_extsi_into_contract
+//  CHECK-SAME: (%[[ARG0:.*]]: vector<64x64xi8>, %[[ARG1:.*]]: vector<64x64xi8>, %[[ARG2:.*]]: vector<64x64xi32>)
+//  CHECK-NEXT:   %[[R:.+]] = vector.contract
+//  CHECK-SAME:   iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind<add>}
+//  CHECK-SAME:   %[[ARG0]], %[[ARG1]], %[[ARG2]] : vector<64x64xi8>, vector<64x64xi8> into vector<64x64xi32>
+//  CHECK-NEXT:   return %[[R]] : vector<64x64xi32>
+func.func @fold_arith_extsi_into_contract(
+  %arg0: vector<64x64xi8>,
+  %arg1: vector<64x64xi8>,
+  %arg2: vector<64x64xi32>) -> vector<64x64xi32> {
+    %lhs_i32 = arith.extsi %arg0 : vector<64x64xi8> to vector<64x64xi32>
+    %rhs_i32 = arith.extsi %arg1 : vector<64x64xi8> to vector<64x64xi32>
+    %result = vector.contract {
+      indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>],
+      iterator_types = ["parallel", "parallel", "reduction"],
+      kind = #vector.kind<add>}
+      %lhs_i32, %rhs_i32, %arg2 : vector<64x64xi32>, vector<64x64xi32> into vector<64x64xi32>
+    return %result : vector<64x64xi32>
+}

From 580343d96f18e3c2fa8d7a8e25a175aaa2e20522 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Timm=20B=C3=A4der?= <tbaeder@redhat.com>
Date: Tue, 25 Jun 2024 17:03:49 +0200
Subject: [PATCH 20/30] [clang][Interp][NFC] Destroy InitMap when moving
 contents to DeadBlock

---
 clang/lib/AST/Interp/Descriptor.cpp | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/clang/lib/AST/Interp/Descriptor.cpp b/clang/lib/AST/Interp/Descriptor.cpp
index fcb778f7aeab0e..fea8a7b1d14a9e 100644
--- a/clang/lib/AST/Interp/Descriptor.cpp
+++ b/clang/lib/AST/Interp/Descriptor.cpp
@@ -64,7 +64,14 @@ static void dtorArrayTy(Block *, std::byte *Ptr, const Descriptor *D) {
 template <typename T>
 static void moveArrayTy(Block *, const std::byte *Src, std::byte *Dst,
                         const Descriptor *D) {
-  // FIXME: Need to copy the InitMap?
+  // FIXME: Get rid of the const_cast.
+  InitMapPtr &SrcIMP =
+      *reinterpret_cast<InitMapPtr *>(const_cast<std::byte *>(Src));
+  if (SrcIMP) {
+    // We only ever invoke the moveFunc when moving block contents to a
+    // DeadBlock. DeadBlocks don't need InitMaps, so we destroy them here.
+    SrcIMP = std::nullopt;
+  }
   Src += sizeof(InitMapPtr);
   Dst += sizeof(InitMapPtr);
   for (unsigned I = 0, NE = D->getNumElems(); I < NE; ++I) {

From c9529f76017f0b517dca1e89eecdf6bbd97c3e84 Mon Sep 17 00:00:00 2001
From: Max191 <44243577+Max191@users.noreply.github.com>
Date: Tue, 25 Jun 2024 12:33:02 -0400
Subject: [PATCH 21/30] [mlir] Drop outermost dims in slice rank reduction
 inference (#95020)

The `getDroppedDims` utility function does not follow the convention of
dropping outermost unit dimensions first when inferring a rank reduction
mask for a slice. This PR updates the implementation to match this
convention.
---
 mlir/lib/Dialect/Tensor/IR/TensorOps.cpp       | 18 +++++++++---------
 ...ensor-subset-ops-into-vector-transfers.mlir | 12 ++++++++++++
 .../Dialect/Tensor/fold-tensor-subset-ops.mlir |  4 +++-
 3 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
index 6d6d7e506e8545..0e840da9530edb 100644
--- a/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
+++ b/mlir/lib/Dialect/Tensor/IR/TensorOps.cpp
@@ -135,40 +135,40 @@ bool tensor::isSameTypeWithoutEncoding(Type tp1, Type tp2) {
 static llvm::SmallBitVector getDroppedDims(ArrayRef<int64_t> reducedShape,
                                            ArrayRef<OpFoldResult> mixedSizes) {
   llvm::SmallBitVector droppedDims(mixedSizes.size());
-  int64_t shapePos = 0;
+  int64_t shapePos = reducedShape.size() - 1;
 
-  for (const auto &size : enumerate(mixedSizes)) {
+  for (const auto &size : enumerate(llvm::reverse(mixedSizes))) {
+    size_t idx = mixedSizes.size() - size.index() - 1;
     // Rank-reduced dims must have a static unit dimension.
     bool isStaticUnitSize =
         size.value().is<Attribute>() &&
         llvm::cast<IntegerAttr>(size.value().get<Attribute>()).getInt() == 1;
 
-    if (shapePos == static_cast<int64_t>(reducedShape.size())) {
+    if (shapePos < 0) {
       // There are no more dims in the reduced shape. All remaining sizes must
       // be rank-reduced dims.
       assert(isStaticUnitSize && "expected unit dim");
-      droppedDims.set(size.index());
+      droppedDims.set(idx);
       continue;
     }
 
     // Dim is preserved if the size is not a static 1.
     if (!isStaticUnitSize) {
-      ++shapePos;
+      --shapePos;
       continue;
     }
 
     // Dim is preserved if the reduced shape dim is also 1.
     if (reducedShape[shapePos] == 1) {
-      ++shapePos;
+      --shapePos;
       continue;
     }
 
     // Otherwise: Dim is dropped.
-    droppedDims.set(size.index());
+    droppedDims.set(idx);
   }
 
-  assert(shapePos == static_cast<int64_t>(reducedShape.size()) &&
-         "dimension mismatch");
+  assert(shapePos < 0 && "dimension mismatch");
   return droppedDims;
 }
 
diff --git a/mlir/test/Dialect/Tensor/fold-tensor-subset-ops-into-vector-transfers.mlir b/mlir/test/Dialect/Tensor/fold-tensor-subset-ops-into-vector-transfers.mlir
index e562cf8efc3567..c2f21683d0cd68 100644
--- a/mlir/test/Dialect/Tensor/fold-tensor-subset-ops-into-vector-transfers.mlir
+++ b/mlir/test/Dialect/Tensor/fold-tensor-subset-ops-into-vector-transfers.mlir
@@ -102,6 +102,18 @@ func.func @insert_slice_of_transfer_write(%t1 : tensor<?x12xf32>, %v : vector<5x
   return %1 : tensor<?x12xf32>
 }
 
+// CHECK-LABEL: func @unit_insert_slice_of_unit_transfer_write(
+//  CHECK-SAME:     %[[t1:.*]]: tensor<1x1x12xf32>, %[[v:.*]]: vector<1x6xf32>, %[[s:.*]]: index
+//       CHECK:   %[[c0:.*]] = arith.constant 0 : index
+//       CHECK:   %[[r:.*]] = vector.transfer_write %[[v]], %[[t1]][%[[c0]], %[[c0]], %[[s]]] {in_bounds = [true, true]} : vector<1x6xf32>, tensor<1x1x12xf32>
+//       CHECK:   return %[[r]]
+func.func @unit_insert_slice_of_unit_transfer_write(%t1 : tensor<1x1x12xf32>, %v : vector<1x6xf32>, %s : index, %t2 : tensor<1x6xf32>) -> tensor<1x1x12xf32> {
+  %c0 = arith.constant 0 : index
+  %0 = vector.transfer_write %v, %t2[%c0, %c0] {in_bounds = [true, true]} : vector<1x6xf32>, tensor<1x6xf32>
+  %1 = tensor.insert_slice %0 into %t1[0, 0, %s] [1, 1, 6] [1, 1, 1] : tensor<1x6xf32> into tensor<1x1x12xf32>
+  return %1 : tensor<1x1x12xf32>
+}
+
 // CHECK-LABEL: func @insert_slice_of_transfer_write_non_leading_rank_reduction(
 //  CHECK-SAME:     %[[t1:.*]]: tensor<?x?x12xf32>, %[[v:.*]]: vector<5x6xf32>, %[[s:.*]]: index
 //   CHECK-DAG:   %[[c3:.*]] = arith.constant 3 : index
diff --git a/mlir/test/Dialect/Tensor/fold-tensor-subset-ops.mlir b/mlir/test/Dialect/Tensor/fold-tensor-subset-ops.mlir
index f2e529b4cac950..1a84e141049325 100644
--- a/mlir/test/Dialect/Tensor/fold-tensor-subset-ops.mlir
+++ b/mlir/test/Dialect/Tensor/fold-tensor-subset-ops.mlir
@@ -282,11 +282,13 @@ func.func @insert_slice_of_insert_slice(%t: tensor<f32>, %r0: tensor<1x1xf32>, %
 
 // -----
 
+//   CHECK-DAG: #[[$map:.*]] = affine_map<()[s0] -> (s0 + 2)>
 // CHECK-LABEL: func @insert_slice_of_insert_slice(
 //  CHECK-SAME:     %[[t:[0-9a-z]*]]: tensor<f32>
 //  CHECK-SAME:     %[[r1:[0-9a-z]*]]: tensor<1x14xf32>
 //  CHECK-SAME:     %[[pos:[0-9a-z]*]]: index
-//       CHECK:   tensor.insert_slice %[[t]] into %[[r1]][5, %[[pos]]] [1, 1] [1, 1] : tensor<f32> into tensor<1x14xf32>
+//       CHECK:   %[[composed_pos:.+]] = affine.apply #[[$map]]()[%[[pos]]]
+//       CHECK:   tensor.insert_slice %[[t]] into %[[r1]][3, %[[composed_pos]]] [1, 1] [1, 1] : tensor<f32> into tensor<1x14xf32>
 func.func @insert_slice_of_insert_slice(%t: tensor<f32>, %r0: tensor<1xf32>, %r1: tensor<1x14xf32>, %pos: index)
     -> tensor<1x14xf32> 
 {

From 05ca20744159a33349271a96b90959a0a3807133 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron@aaronballman.com>
Date: Tue, 25 Jun 2024 12:32:44 -0400
Subject: [PATCH 22/30] [C23] Update status page regarding FLT_MAX_EXP

N2843 was subsumed by N2882; we could probably consider removing
subsumed entries, but I've been leaving them to help folks looking at
the editor's report from various working drafts and wondering about the
changes.
---
 clang/www/c_status.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/www/c_status.html b/clang/www/c_status.html
index 04c1df9ebc0501..06209eb0b7bed5 100644
--- a/clang/www/c_status.html
+++ b/clang/www/c_status.html
@@ -981,7 +981,7 @@ <h2 id="c2x">C23 implementation status</h2>
     <tr>
       <td>Clarification for max exponent macros</td>
       <td><a href="https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2843.pdf">N2843</a></td>
-      <td class="unknown" align="center">Unknown</td>
+      <td class="na" align="center">Subsumed by N2882</td>
     </tr>
     <tr>
       <td>Clarification about expression transformations</td>

From d7dd778cde84110e38521a6b55dfeb4e1c649ec2 Mon Sep 17 00:00:00 2001
From: PeterChou1 <peter.chou@mail.utoronto.ca>
Date: Tue, 25 Jun 2024 12:39:33 -0400
Subject: [PATCH 23/30] [clang-doc] update install path to share/clang-doc
 instead of share/clang (#96555)

Updates the install path for clang-doc to share/clang-doc instead
share/clang to avoid confusion
---
 clang-tools-extra/clang-doc/tool/CMakeLists.txt   | 4 ++--
 clang-tools-extra/clang-doc/tool/ClangDocMain.cpp | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/clang-tools-extra/clang-doc/tool/CMakeLists.txt b/clang-tools-extra/clang-doc/tool/CMakeLists.txt
index 4944251245c6bc..e93a5728d6b6b0 100644
--- a/clang-tools-extra/clang-doc/tool/CMakeLists.txt
+++ b/clang-tools-extra/clang-doc/tool/CMakeLists.txt
@@ -25,7 +25,7 @@ set(assets
 )
 
 set(asset_dir "${CMAKE_CURRENT_SOURCE_DIR}/../assets")
-set(resource_dir "${CMAKE_BINARY_DIR}/share/clang")
+set(resource_dir "${CMAKE_BINARY_DIR}/share/clang-doc")
 set(out_files)
 
 function(copy_files_to_dst src_dir dst_dir file)
@@ -42,7 +42,7 @@ endfunction(copy_files_to_dst)
 
 foreach(f ${assets})
   install(FILES ${asset_dir}/${f}
-    DESTINATION "${CMAKE_INSTALL_DATADIR}/clang"
+    DESTINATION "${CMAKE_INSTALL_DATADIR}/clang-doc"
     COMPONENT clang-doc)
   copy_files_to_dst(${asset_dir} ${resource_dir} ${f})
 endforeach(f)
diff --git a/clang-tools-extra/clang-doc/tool/ClangDocMain.cpp b/clang-tools-extra/clang-doc/tool/ClangDocMain.cpp
index 5517522d7967dc..1feb6d3b74d70c 100644
--- a/clang-tools-extra/clang-doc/tool/ClangDocMain.cpp
+++ b/clang-tools-extra/clang-doc/tool/ClangDocMain.cpp
@@ -188,7 +188,7 @@ Example usage for a project using a compile commands database:
     llvm::sys::path::native(ClangDocPath, NativeClangDocPath);
     llvm::SmallString<128> AssetsPath;
     AssetsPath = llvm::sys::path::parent_path(NativeClangDocPath);
-    llvm::sys::path::append(AssetsPath, "..", "share", "clang");
+    llvm::sys::path::append(AssetsPath, "..", "share", "clang-doc");
     llvm::SmallString<128> DefaultStylesheet;
     llvm::sys::path::native(AssetsPath, DefaultStylesheet);
     llvm::sys::path::append(DefaultStylesheet,

From dbd5c7805bae510c79b51c2c7700a590f2df446d Mon Sep 17 00:00:00 2001
From: PeterChou1 <peter.chou@mail.utoronto.ca>
Date: Tue, 25 Jun 2024 12:40:58 -0400
Subject: [PATCH 24/30] [clang-doc] Remove stdexecpt from clang-doc test
 (#96552)

Removes stdexecpt from clang-doc test introduced in
https://github.com/llvm/llvm-project/pull/93928
since it violates the rule that test must be freestanding
---
 .../clang-doc/Inputs/basic-project/src/Calculator.cpp     | 4 ----
 clang-tools-extra/test/clang-doc/basic-project.test       | 8 ++++----
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/clang-tools-extra/test/clang-doc/Inputs/basic-project/src/Calculator.cpp b/clang-tools-extra/test/clang-doc/Inputs/basic-project/src/Calculator.cpp
index df1778c3b9d55f..64f31dbf13d871 100644
--- a/clang-tools-extra/test/clang-doc/Inputs/basic-project/src/Calculator.cpp
+++ b/clang-tools-extra/test/clang-doc/Inputs/basic-project/src/Calculator.cpp
@@ -1,5 +1,4 @@
 #include "Calculator.h"
-#include <stdexcept>
 
 int Calculator::add(int a, int b) {
     return a + b;
@@ -14,8 +13,5 @@ int Calculator::multiply(int a, int b) {
 }
 
 double Calculator::divide(int a, int b) {
-    if (b == 0) {
-        throw std::invalid_argument("Division by zero");
-    }
     return static_cast<double>(a) / b;
 }
diff --git a/clang-tools-extra/test/clang-doc/basic-project.test b/clang-tools-extra/test/clang-doc/basic-project.test
index 0898acaea3a33e..c973638837613d 100644
--- a/clang-tools-extra/test/clang-doc/basic-project.test
+++ b/clang-tools-extra/test/clang-doc/basic-project.test
@@ -139,25 +139,25 @@
 // HTML-CALC-NEXT:     <div>
 // HTML-CALC-NEXT:       <h3 id="{{([0-9A-F]{40})}}">add</h3>
 // HTML-CALC-NEXT:       <p>public int add(int a, int b)</p>
-// HTML-CALC-NEXT:       <p>Defined at line 4 of file {{.*}}Calculator.cpp</p>
+// HTML-CALC-NEXT:       <p>Defined at line 3 of file {{.*}}Calculator.cpp</p>
 // HTML-CALC-NEXT:       <div>
 // HTML-CALC-NEXT:         <div></div>
 // HTML-CALC-NEXT:       </div>
 // HTML-CALC-NEXT:       <h3 id="{{([0-9A-F]{40})}}">subtract</h3>
 // HTML-CALC-NEXT:       <p>public int subtract(int a, int b)</p>
-// HTML-CALC-NEXT:       <p>Defined at line 8 of file {{.*}}Calculator.cpp</p>
+// HTML-CALC-NEXT:       <p>Defined at line 7 of file {{.*}}Calculator.cpp</p>
 // HTML-CALC-NEXT:       <div>
 // HTML-CALC-NEXT:         <div></div>
 // HTML-CALC-NEXT:       </div>
 // HTML-CALC-NEXT:       <h3 id="{{([0-9A-F]{40})}}">multiply</h3>
 // HTML-CALC-NEXT:       <p>public int multiply(int a, int b)</p>
-// HTML-CALC-NEXT:       <p>Defined at line 12 of file {{.*}}Calculator.cpp</p>
+// HTML-CALC-NEXT:       <p>Defined at line 11 of file {{.*}}Calculator.cpp</p>
 // HTML-CALC-NEXT:       <div>
 // HTML-CALC-NEXT:         <div></div>
 // HTML-CALC-NEXT:       </div>
 // HTML-CALC-NEXT:       <h3 id="{{([0-9A-F]{40})}}">divide</h3>
 // HTML-CALC-NEXT:       <p>public double divide(int a, int b)</p>
-// HTML-CALC-NEXT:       <p>Defined at line 16 of file {{.*}}Calculator.cpp</p>
+// HTML-CALC-NEXT:       <p>Defined at line 15 of file {{.*}}Calculator.cpp</p>
 // HTML-CALC-NEXT:       <div>
 // HTML-CALC-NEXT:         <div></div>
 // HTML-CALC-NEXT:       </div>

From f0f774ebf09b1f1ae8129074801342eeadf5495b Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka@google.com>
Date: Tue, 25 Jun 2024 09:42:01 -0700
Subject: [PATCH 25/30] [sanitizer] Rename DEFINE_REAL_PTHREAD_FUNCTIONS
 (#96527)

We use REAL() calls in interceptors, but
DEFINE_REAL_PTHREAD_FUNCTIONS has nothing to do
with them and only used for internal maintenance
threads.

This is done to avoid confusion like in #96456.
---
 compiler-rt/lib/asan/asan_interceptors.cpp    |  2 +-
 .../lib/hwasan/hwasan_interceptors.cpp        |  2 +-
 compiler-rt/lib/lsan/lsan_interceptors.cpp    |  2 +-
 .../lib/memprof/memprof_interceptors.cpp      |  2 +-
 compiler-rt/lib/msan/msan_interceptors.cpp    |  2 +-
 .../sanitizer_common_libcdep.cpp              |  4 +--
 .../lib/sanitizer_common/sanitizer_linux.cpp  |  8 +++---
 .../lib/sanitizer_common/sanitizer_posix.h    | 28 +++++++++----------
 .../lib/tsan/rtl/tsan_interceptors_posix.cpp  |  8 +++---
 9 files changed, 29 insertions(+), 29 deletions(-)

diff --git a/compiler-rt/lib/asan/asan_interceptors.cpp b/compiler-rt/lib/asan/asan_interceptors.cpp
index 6d1360e104975f..f8f86a766b204f 100644
--- a/compiler-rt/lib/asan/asan_interceptors.cpp
+++ b/compiler-rt/lib/asan/asan_interceptors.cpp
@@ -333,7 +333,7 @@ INTERCEPTOR(int, pthread_timedjoin_np, void *thread, void **ret,
 }
 #    endif
 
-DEFINE_REAL_PTHREAD_FUNCTIONS
+DEFINE_INTERNAL_PTHREAD_FUNCTIONS
 #endif  // ASAN_INTERCEPT_PTHREAD_CREATE
 
 #if ASAN_INTERCEPT_SWAPCONTEXT
diff --git a/compiler-rt/lib/hwasan/hwasan_interceptors.cpp b/compiler-rt/lib/hwasan/hwasan_interceptors.cpp
index 08ae435b8214ae..c10b5c158548e9 100644
--- a/compiler-rt/lib/hwasan/hwasan_interceptors.cpp
+++ b/compiler-rt/lib/hwasan/hwasan_interceptors.cpp
@@ -334,7 +334,7 @@ INTERCEPTOR(int, pthread_timedjoin_np, void *thread, void **ret,
 }
 #    endif
 
-DEFINE_REAL_PTHREAD_FUNCTIONS
+DEFINE_INTERNAL_PTHREAD_FUNCTIONS
 
 DEFINE_REAL(int, vfork,)
 DECLARE_EXTERN_INTERCEPTOR_AND_WRAPPER(int, vfork,)
diff --git a/compiler-rt/lib/lsan/lsan_interceptors.cpp b/compiler-rt/lib/lsan/lsan_interceptors.cpp
index 1fd0010f9ea936..6df4b6865b3794 100644
--- a/compiler-rt/lib/lsan/lsan_interceptors.cpp
+++ b/compiler-rt/lib/lsan/lsan_interceptors.cpp
@@ -525,7 +525,7 @@ INTERCEPTOR(int, pthread_timedjoin_np, void *thread, void **ret,
 #    define LSAN_MAYBE_INTERCEPT_TIMEDJOIN
 #  endif  // SANITIZER_INTERCEPT_TIMEDJOIN
 
-DEFINE_REAL_PTHREAD_FUNCTIONS
+DEFINE_INTERNAL_PTHREAD_FUNCTIONS
 
 INTERCEPTOR(void, _exit, int status) {
   if (status == 0 && HasReportedLeaks()) status = common_flags()->exitcode;
diff --git a/compiler-rt/lib/memprof/memprof_interceptors.cpp b/compiler-rt/lib/memprof/memprof_interceptors.cpp
index a267f6d3d6717b..53ee4e953419b5 100644
--- a/compiler-rt/lib/memprof/memprof_interceptors.cpp
+++ b/compiler-rt/lib/memprof/memprof_interceptors.cpp
@@ -166,7 +166,7 @@ INTERCEPTOR(int, pthread_join, void *t, void **arg) {
   return REAL(pthread_join)(t, arg);
 }
 
-DEFINE_REAL_PTHREAD_FUNCTIONS
+DEFINE_INTERNAL_PTHREAD_FUNCTIONS
 
 INTERCEPTOR(char *, index, const char *string, int c)
 ALIAS(WRAP(strchr));
diff --git a/compiler-rt/lib/msan/msan_interceptors.cpp b/compiler-rt/lib/msan/msan_interceptors.cpp
index 9abf2406332588..789b739b41189a 100644
--- a/compiler-rt/lib/msan/msan_interceptors.cpp
+++ b/compiler-rt/lib/msan/msan_interceptors.cpp
@@ -1226,7 +1226,7 @@ INTERCEPTOR(int, pthread_timedjoin_np, void *thread, void **retval,
 }
 #endif
 
-DEFINE_REAL_PTHREAD_FUNCTIONS
+DEFINE_INTERNAL_PTHREAD_FUNCTIONS
 
 extern char *tzname[2];
 
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp
index 7b74bb1a7e0f3c..a174ae7be991d0 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp
@@ -87,8 +87,8 @@ void MaybeStartBackgroudThread() {
   if (!common_flags()->hard_rss_limit_mb &&
       !common_flags()->soft_rss_limit_mb &&
       !common_flags()->heap_profile) return;
-  if (!&real_pthread_create) {
-    VPrintf(1, "%s: real_pthread_create undefined\n", SanitizerToolName);
+  if (!&internal_pthread_create) {
+    VPrintf(1, "%s: internal_pthread_create undefined\n", SanitizerToolName);
     return;  // Can't spawn the thread anyway.
   }
 
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
index bf2002b6b3de63..12df3ef73da4bc 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
@@ -1845,18 +1845,18 @@ HandleSignalMode GetHandleSignalMode(int signum) {
 
 #  if !SANITIZER_GO
 void *internal_start_thread(void *(*func)(void *arg), void *arg) {
-  if (&real_pthread_create == 0)
+  if (&internal_pthread_create == 0)
     return nullptr;
   // Start the thread with signals blocked, otherwise it can steal user signals.
   ScopedBlockSignals block(nullptr);
   void *th;
-  real_pthread_create(&th, nullptr, func, arg);
+  internal_pthread_create(&th, nullptr, func, arg);
   return th;
 }
 
 void internal_join_thread(void *th) {
-  if (&real_pthread_join)
-    real_pthread_join(th, nullptr);
+  if (&internal_pthread_join)
+    internal_pthread_join(th, nullptr);
 }
 #  else
 void *internal_start_thread(void *(*func)(void *), void *arg) { return 0; }
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_posix.h b/compiler-rt/lib/sanitizer_common/sanitizer_posix.h
index c5811dffea94b5..14617e4771bec4 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_posix.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_posix.h
@@ -74,21 +74,21 @@ int internal_sysctlbyname(const char *sname, void *oldp, uptr *oldlenp,
 // These functions call appropriate pthread_ functions directly, bypassing
 // the interceptor. They are weak and may not be present in some tools.
 SANITIZER_WEAK_ATTRIBUTE
-int real_pthread_create(void *th, void *attr, void *(*callback)(void *),
-                        void *param);
+int internal_pthread_create(void *th, void *attr, void *(*callback)(void *),
+                            void *param);
 SANITIZER_WEAK_ATTRIBUTE
-int real_pthread_join(void *th, void **ret);
-
-#define DEFINE_REAL_PTHREAD_FUNCTIONS                                          \
-  namespace __sanitizer {                                                      \
-  int real_pthread_create(void *th, void *attr, void *(*callback)(void *),     \
-                          void *param) {                                       \
-    return REAL(pthread_create)(th, attr, callback, param);                    \
-  }                                                                            \
-  int real_pthread_join(void *th, void **ret) {                                \
-    return REAL(pthread_join(th, ret));                                        \
-  }                                                                            \
-  }  // namespace __sanitizer
+int internal_pthread_join(void *th, void **ret);
+
+#  define DEFINE_INTERNAL_PTHREAD_FUNCTIONS                               \
+    namespace __sanitizer {                                               \
+    int internal_pthread_create(void *th, void *attr,                     \
+                                void *(*callback)(void *), void *param) { \
+      return REAL(pthread_create)(th, attr, callback, param);             \
+    }                                                                     \
+    int internal_pthread_join(void *th, void **ret) {                     \
+      return REAL(pthread_join(th, ret));                                 \
+    }                                                                     \
+    }  // namespace __sanitizer
 
 int internal_pthread_attr_getstack(void *attr, void **addr, uptr *size);
 
diff --git a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
index 151693112158b4..034ae3d322b56b 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_interceptors_posix.cpp
@@ -1088,14 +1088,14 @@ TSAN_INTERCEPTOR(int, pthread_join, void *th, void **ret) {
   return res;
 }
 
-// DEFINE_REAL_PTHREAD_FUNCTIONS
+// DEFINE_INTERNAL_PTHREAD_FUNCTIONS
 namespace __sanitizer {
-int real_pthread_create(void *th, void *attr, void *(*callback)(void *),
-                        void *param) {
+int internal_pthread_create(void *th, void *attr, void *(*callback)(void *),
+                            void *param) {
   ScopedIgnoreInterceptors ignore;
   return REAL(pthread_create)(th, attr, callback, param);
 }
-int real_pthread_join(void *th, void **ret) {
+int internal_pthread_join(void *th, void **ret) {
   ScopedIgnoreInterceptors ignore;
   return REAL(pthread_join(th, ret));
 }

From de7c1396f29b9bf7011912e7cfea9edad1efb492 Mon Sep 17 00:00:00 2001
From: Han-Kuan Chen <hankuan.chen@sifive.com>
Date: Wed, 26 Jun 2024 00:42:38 +0800
Subject: [PATCH 26/30] [SLP] NFC. Refactor and add getAltInstrMask help
 function. (#94709)

Co-authored-by: Alexey Bataev <a.bataev@gmx.com>
---
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 29 +++++++++----------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 494db0493dacab..08fcca6e9bef89 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -983,6 +983,17 @@ static void fixupOrderingIndices(MutableArrayRef<unsigned> Order) {
   }
 }
 
+/// \returns a bitset for selecting opcodes. false for Opcode0 and true for
+/// Opcode1.
+SmallBitVector getAltInstrMask(ArrayRef<Value *> VL, unsigned Opcode0,
+                               unsigned Opcode1) {
+  SmallBitVector OpcodeMask(VL.size(), false);
+  for (unsigned Lane : seq<unsigned>(VL.size()))
+    if (cast<Instruction>(VL[Lane])->getOpcode() == Opcode1)
+      OpcodeMask.set(Lane);
+  return OpcodeMask;
+}
+
 namespace llvm {
 
 static void inversePermutation(ArrayRef<unsigned> Indices,
@@ -5093,11 +5104,7 @@ void BoUpSLP::reorderTopToBottom() {
           FixedVectorType::get(TE->Scalars[0]->getType(), TE->Scalars.size());
       unsigned Opcode0 = TE->getOpcode();
       unsigned Opcode1 = TE->getAltOpcode();
-      // The opcode mask selects between the two opcodes.
-      SmallBitVector OpcodeMask(TE->Scalars.size(), false);
-      for (unsigned Lane : seq<unsigned>(0, TE->Scalars.size()))
-        if (cast<Instruction>(TE->Scalars[Lane])->getOpcode() == Opcode1)
-          OpcodeMask.set(Lane);
+      SmallBitVector OpcodeMask(getAltInstrMask(TE->Scalars, Opcode0, Opcode1));
       // If this pattern is supported by the target then we consider the order.
       if (TTIRef.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask)) {
         VFToOrderedEntries[TE->getVectorFactor()].insert(TE.get());
@@ -6009,11 +6016,7 @@ bool BoUpSLP::areAltOperandsProfitable(const InstructionsState &S,
                                        ArrayRef<Value *> VL) const {
   unsigned Opcode0 = S.getOpcode();
   unsigned Opcode1 = S.getAltOpcode();
-  // The opcode mask selects between the two opcodes.
-  SmallBitVector OpcodeMask(VL.size(), false);
-  for (unsigned Lane : seq<unsigned>(0, VL.size()))
-    if (cast<Instruction>(VL[Lane])->getOpcode() == Opcode1)
-      OpcodeMask.set(Lane);
+  SmallBitVector OpcodeMask(getAltInstrMask(VL, Opcode0, Opcode1));
   // If this pattern is supported by the target then consider it profitable.
   if (TTI->isLegalAltInstr(FixedVectorType::get(S.MainOp->getType(), VL.size()),
                            Opcode0, Opcode1, OpcodeMask))
@@ -9744,11 +9747,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef<Value *> VectorizedVals,
       // order.
       unsigned Opcode0 = E->getOpcode();
       unsigned Opcode1 = E->getAltOpcode();
-      // The opcode mask selects between the two opcodes.
-      SmallBitVector OpcodeMask(E->Scalars.size(), false);
-      for (unsigned Lane : seq<unsigned>(0, E->Scalars.size()))
-        if (cast<Instruction>(E->Scalars[Lane])->getOpcode() == Opcode1)
-          OpcodeMask.set(Lane);
+      SmallBitVector OpcodeMask(getAltInstrMask(E->Scalars, Opcode0, Opcode1));
       // If this pattern is supported by the target then we consider the
       // order.
       if (TTIRef.isLegalAltInstr(VecTy, Opcode0, Opcode1, OpcodeMask)) {

From aaf50bf34f3a2007221c644384d238666cfc2bc3 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad@amd.com>
Date: Tue, 25 Jun 2024 17:43:00 +0100
Subject: [PATCH 27/30] [AMDGPU] Disallow negative s_load offsets in
 isLegalAddressingMode (#91327)

---
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     | 10 +++
 .../AMDGPU/cgp-addressing-modes-smem.ll       | 89 +++++++++++++------
 2 files changed, 72 insertions(+), 27 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index 35774e44aba2e9..b8ff5ed35ac800 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -1610,6 +1610,16 @@ bool SITargetLowering::isLegalAddressingMode(const DataLayout &DL,
         return false;
     }
 
+    if ((AS == AMDGPUAS::CONSTANT_ADDRESS ||
+         AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT) &&
+        AM.BaseOffs < 0) {
+      // Scalar (non-buffer) loads can only use a negative offset if
+      // soffset+offset is non-negative. Since the compiler can only prove that
+      // in a few special cases, it is safer to claim that negative offsets are
+      // not supported.
+      return false;
+    }
+
     if (AM.Scale == 0) // r + i or just i, depending on HasBaseReg.
       return true;
 
diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-smem.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-smem.ll
index 41d2360dd5e1e6..c7f7f30a5e6bd3 100644
--- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-smem.ll
+++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-smem.ll
@@ -279,33 +279,19 @@ end:
 }
 
 define amdgpu_cs void @test_sink_smem_offset_neg400(ptr addrspace(4) inreg %ptr, i32 inreg %val) {
-; GFX678-LABEL: test_sink_smem_offset_neg400:
-; GFX678:       ; %bb.0: ; %entry
-; GFX678-NEXT:    s_add_u32 s0, s0, 0xfffffe70
-; GFX678-NEXT:    s_addc_u32 s1, s1, -1
-; GFX678-NEXT:  .LBB5_1: ; %loop
-; GFX678-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX678-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX678-NEXT:    s_load_dword s3, s[0:1], 0x0
-; GFX678-NEXT:    s_add_i32 s2, s2, -1
-; GFX678-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX678-NEXT:    s_cbranch_scc1 .LBB5_1
-; GFX678-NEXT:  ; %bb.2: ; %end
-; GFX678-NEXT:    s_endpgm
-;
-; GFX9-LABEL: test_sink_smem_offset_neg400:
-; GFX9:       ; %bb.0: ; %entry
-; GFX9-NEXT:  .LBB5_1: ; %loop
-; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX9-NEXT:    s_add_i32 s2, s2, -1
-; GFX9-NEXT:    s_add_u32 s4, s0, 0xfffffe70
-; GFX9-NEXT:    s_addc_u32 s5, s1, -1
-; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_load_dword s3, s[4:5], 0x0
-; GFX9-NEXT:    s_cmp_lg_u32 s2, 0
-; GFX9-NEXT:    s_cbranch_scc1 .LBB5_1
-; GFX9-NEXT:  ; %bb.2: ; %end
-; GFX9-NEXT:    s_endpgm
+; GFX6789-LABEL: test_sink_smem_offset_neg400:
+; GFX6789:       ; %bb.0: ; %entry
+; GFX6789-NEXT:    s_add_u32 s0, s0, 0xfffffe70
+; GFX6789-NEXT:    s_addc_u32 s1, s1, -1
+; GFX6789-NEXT:  .LBB5_1: ; %loop
+; GFX6789-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX6789-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX6789-NEXT:    s_load_dword s3, s[0:1], 0x0
+; GFX6789-NEXT:    s_add_i32 s2, s2, -1
+; GFX6789-NEXT:    s_cmp_lg_u32 s2, 0
+; GFX6789-NEXT:    s_cbranch_scc1 .LBB5_1
+; GFX6789-NEXT:  ; %bb.2: ; %end
+; GFX6789-NEXT:    s_endpgm
 ;
 ; GFX12-LABEL: test_sink_smem_offset_neg400:
 ; GFX12:       ; %bb.0: ; %entry
@@ -337,3 +323,52 @@ loop:
 end:
   ret void
 }
+
+; Same for address space 6, constant 32-bit.
+define amdgpu_cs void @test_sink_smem_offset_neg400_32bit(ptr addrspace(6) inreg %ptr, i32 inreg %val) {
+; GFX6789-LABEL: test_sink_smem_offset_neg400_32bit:
+; GFX6789:       ; %bb.0: ; %entry
+; GFX6789-NEXT:    s_add_i32 s2, s0, 0xfffffe70
+; GFX6789-NEXT:    s_mov_b32 s3, 0
+; GFX6789-NEXT:  .LBB6_1: ; %loop
+; GFX6789-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX6789-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX6789-NEXT:    s_load_dword s0, s[2:3], 0x0
+; GFX6789-NEXT:    s_add_i32 s1, s1, -1
+; GFX6789-NEXT:    s_cmp_lg_u32 s1, 0
+; GFX6789-NEXT:    s_cbranch_scc1 .LBB6_1
+; GFX6789-NEXT:  ; %bb.2: ; %end
+; GFX6789-NEXT:    s_endpgm
+;
+; GFX12-LABEL: test_sink_smem_offset_neg400_32bit:
+; GFX12:       ; %bb.0: ; %entry
+; GFX12-NEXT:    s_add_co_i32 s2, s0, 0xfffffe70
+; GFX12-NEXT:    s_mov_b32 s3, 0
+; GFX12-NEXT:  .LBB6_1: ; %loop
+; GFX12-NEXT:    ; =>This Inner Loop Header: Depth=1
+; GFX12-NEXT:    s_wait_kmcnt 0x0
+; GFX12-NEXT:    s_load_b32 s0, s[2:3], 0x0
+; GFX12-NEXT:    s_add_co_i32 s1, s1, -1
+; GFX12-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
+; GFX12-NEXT:    s_cmp_lg_u32 s1, 0
+; GFX12-NEXT:    s_cbranch_scc1 .LBB6_1
+; GFX12-NEXT:  ; %bb.2: ; %end
+; GFX12-NEXT:    s_endpgm
+entry:
+  %gep = getelementptr i8, ptr addrspace(6) %ptr, i64 -400
+  br label %loop
+
+loop:
+  %count = phi i32 [ %dec, %loop ], [ %val, %entry ]
+  %dec = sub i32 %count, 1
+  %load = load volatile i32, ptr addrspace(6) %gep
+  %cond = icmp eq i32 %dec, 0
+  br i1 %cond, label %end, label %loop
+
+end:
+  ret void
+}
+
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX678: {{.*}}
+; GFX9: {{.*}}

From 5e2beed9a1643cd6358fb8b43feb893543d90bf3 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron@aaronballman.com>
Date: Tue, 25 Jun 2024 12:45:41 -0400
Subject: [PATCH 28/30] [C23] Move WG14 N2931 to the TS18661 section

This paper only matters for TS18661-3 integration.
---
 clang/www/c_status.html | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/clang/www/c_status.html b/clang/www/c_status.html
index 06209eb0b7bed5..5b7ba7686f9572 100644
--- a/clang/www/c_status.html
+++ b/clang/www/c_status.html
@@ -658,7 +658,7 @@ <h2 id="c2x">C23 implementation status</h2>
       <td class="full" align="center">Clang 9</td>
     </tr>
     <tr id="TS18661">
-      <td rowspan="9">TS 18661 Integration</td>
+      <td rowspan="10">TS 18661 Integration</td>
     </tr>
       <tr> <!-- Pre-Oct 2019 -->
         <td><a href="https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2314.pdf">N2314</a></td>
@@ -692,6 +692,10 @@ <h2 id="c2x">C23 implementation status</h2>
         <td><a href="https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2755.htm">N2755</a></td>
         <td class="unknown" align="center">Unknown</td>
       </tr>
+      <tr> <!-- Feb 2022 -->
+        <td><a href="https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2931.pdf">N2931</a></td>
+        <td class="unknown" align="center">Unknown</td>
+      </tr>
     <tr>
       <td>Preprocessor line numbers unspecified</td>
       <td><a href="https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2322.htm">N2322</a></td>
@@ -1091,11 +1095,6 @@ <h2 id="c2x">C23 implementation status</h2>
         <td><a href="https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2930.pdf">N2930</a></td>
         <td class="full" align="center">Clang 16</td>
       </tr>
-    <tr>
-      <td>Type annex tgmath narrowing macros with integer args v2</td>
-      <td><a href="https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2931.pdf">N2931</a></td>
-      <td class="unknown" align="center">Unknown</td>
-    </tr>
     <tr>
       <td>Revise spelling of keywords v7</td>
       <td><a href="https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2934.pdf">N2934</a></td>

From 0b049ce64653a8cbdeedaa2bfe6cc79fc6af5d40 Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka@google.com>
Date: Tue, 25 Jun 2024 09:58:32 -0700
Subject: [PATCH 29/30] [tsan] Test `__tsan_test_only_on_fork` only on Mac
 (#96597)

According to https://reviews.llvm.org/D114250
this was to handle Mac specific issue, however
the test is Linux only.

The test effectively prevents to lock main allocator
on fork, but we do that on Linux for other
sanitizers for years, and need to do the same
for TSAN to avoid deadlocks.
---
 compiler-rt/lib/tsan/rtl/tsan_rtl.cpp                     | 4 ++++
 compiler-rt/test/tsan/{Linux => Darwin}/fork_deadlock.cpp | 0
 2 files changed, 4 insertions(+)
 rename compiler-rt/test/tsan/{Linux => Darwin}/fork_deadlock.cpp (100%)

diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp
index fd9441dfcb53cb..2d5992b703a6ab 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_rtl.cpp
@@ -35,8 +35,10 @@ extern "C" void __tsan_resume() {
   __tsan_resumed = 1;
 }
 
+#if SANITIZER_APPLE
 SANITIZER_WEAK_DEFAULT_IMPL
 void __tsan_test_only_on_fork() {}
+#endif
 
 namespace __tsan {
 
@@ -828,7 +830,9 @@ void ForkBefore(ThreadState* thr, uptr pc) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
   // Disables memory write in OnUserAlloc/Free.
   thr->ignore_reads_and_writes++;
 
+#  if SANITIZER_APPLE
   __tsan_test_only_on_fork();
+#  endif
 }
 
 static void ForkAfter(ThreadState* thr) SANITIZER_NO_THREAD_SAFETY_ANALYSIS {
diff --git a/compiler-rt/test/tsan/Linux/fork_deadlock.cpp b/compiler-rt/test/tsan/Darwin/fork_deadlock.cpp
similarity index 100%
rename from compiler-rt/test/tsan/Linux/fork_deadlock.cpp
rename to compiler-rt/test/tsan/Darwin/fork_deadlock.cpp

From 4c87212d63c3b45e9371434239553ef007216106 Mon Sep 17 00:00:00 2001
From: "Nick Desaulniers (paternity leave)"
 <nickdesaulniers@users.noreply.github.com>
Date: Tue, 25 Jun 2024 09:58:50 -0700
Subject: [PATCH 30/30] [libc][thumb] support syscalls from thumb mode (#96558)

r7 is reserved in thumb2 (typically for the frame pointer, as opposed to r11 in
ARM mode), so assigning to a variable with explicit register storage in r7 will
produce an error.

But r7 is where the Linux kernel expects the syscall number to be placed. We
can use a temporary to get the register allocator to pick a temporary, which we
save+restore the previous value of r7 in.

Fixes: #93738
---
 libc/src/__support/OSUtil/linux/arm/syscall.h | 26 ++++++++++++++-----
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/libc/src/__support/OSUtil/linux/arm/syscall.h b/libc/src/__support/OSUtil/linux/arm/syscall.h
index af100747832750..9674ee45a49e91 100644
--- a/libc/src/__support/OSUtil/linux/arm/syscall.h
+++ b/libc/src/__support/OSUtil/linux/arm/syscall.h
@@ -12,14 +12,29 @@
 #include "src/__support/common.h"
 
 #ifdef __thumb__
-#error "The arm syscall implementation does not yet support thumb flavor."
-#endif // __thumb__
+#define R7 long r7 = number
+#define SYSCALL_INSTR(input_constraint)                                        \
+  int temp;                                                                    \
+  LIBC_INLINE_ASM(R"(
+    mov %[temp], r7
+    mov r7, %2
+    svc #0
+    mov r7, %[temp]
+  )"                                                          \
+                  : "=r"(r0), [temp] "=&r"(temp)                               \
+                  : input_constraint                                           \
+                  : "memory", "cc")
+#else
+#define R7 register long r7 asm("r7") = number
+#define SYSCALL_INSTR(input_constraint)                                        \
+  LIBC_INLINE_ASM("svc 0" : "=r"(r0) : input_constraint : "memory", "cc")
+#endif
 
 #define REGISTER_DECL_0                                                        \
-  register long r7 __asm__("r7") = number;                                     \
+  R7;                                                                          \
   register long r0 __asm__("r0");
 #define REGISTER_DECL_1                                                        \
-  register long r7 __asm__("r7") = number;                                     \
+  R7;                                                                          \
   register long r0 __asm__("r0") = arg1;
 #define REGISTER_DECL_2                                                        \
   REGISTER_DECL_1                                                              \
@@ -45,9 +60,6 @@
 #define REGISTER_CONSTRAINT_5 REGISTER_CONSTRAINT_4, "r"(r4)
 #define REGISTER_CONSTRAINT_6 REGISTER_CONSTRAINT_5, "r"(r5)
 
-#define SYSCALL_INSTR(input_constraint)                                        \
-  LIBC_INLINE_ASM("svc 0" : "=r"(r0) : input_constraint : "memory", "cc")
-
 namespace LIBC_NAMESPACE {
 
 LIBC_INLINE long syscall_impl(long number) {