From ad9c284cc3186f9ca603b1a99a0723b45a07ba09 Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <johannes@jdoerfert.de>
Date: Mon, 13 Apr 2020 12:24:09 -0500
Subject: [PATCH 001/216] [Attributor][NFC] Run the verifier only on functions
 and under EXPENSIVE_CHECKS

Running the verifier is expensive so we want to avoid it even in runs
that enable assertions. As we move closer to enabling the Attributor
this code will be executed by some buildbots but not cause overhead for
most people.
---
 llvm/lib/Transforms/IPO/Attributor.cpp | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index 647518522c36..4cf2812d0dcc 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -1248,6 +1248,14 @@ ChangeStatus Attributor::run() {
                      "specified iterations!");
   }
 
+#ifdef EXPENSIVE_CHECKS
+  for (Function *F : Functions) {
+    if (ToBeDeletedFunctions.count(F))
+      continue;
+    assert(!verifyFunction(*F, &errs()) && "Module verification failed!");
+  }
+#endif
+
   return ManifestChange;
 }
 
@@ -1982,10 +1990,7 @@ static bool runAttributorOnFunctions(InformationCache &InfoCache,
     A.identifyDefaultAbstractAttributes(*F);
   }
 
-  Module &M = *Functions.front()->getParent();
-  (void)M;
   ChangeStatus Changed = A.run();
-  assert(!verifyModule(M, &errs()) && "Module verification failed!");
   LLVM_DEBUG(dbgs() << "[Attributor] Done with " << Functions.size()
                     << " functions, result: " << Changed << ".\n");
   return Changed == ChangeStatus::CHANGED;

From 253d6be0f6fa7d96f78127a6da527d38f02e81d8 Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <johannes@jdoerfert.de>
Date: Tue, 14 Apr 2020 18:49:59 -0500
Subject: [PATCH 002/216] [Attributor][FIX] Properly check for accesses to
 globals

The check if globals were accessed was not always working because two
bits are set for NO_GLOBAL_MEM. The new check works also if only on kind
of globals (internal/external) is accessed.
---
 .../Transforms/IPO/AttributorAttributes.cpp   |  7 +--
 .../Transforms/Attributor/memory_locations.ll | 44 +++++++++++++++++++
 .../test/Transforms/Attributor/nocapture-1.ll |  4 +-
 3 files changed, 50 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 8cf45fd42222..be54fb3eb536 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -6251,8 +6251,9 @@ AAMemoryLocationImpl::categorizeAccessedLocations(Attributor &A, Instruction &I,
                                 nullptr, Changed);
     }
 
-    // Now handle global memory if it might be accessed.
-    bool HasGlobalAccesses = !(ICSAssumedNotAccessedLocs & NO_GLOBAL_MEM);
+    // Now handle global memory if it might be accessed. This is slightly tricky
+    // as NO_GLOBAL_MEM has multiple bits set.
+    bool HasGlobalAccesses = ((~ICSAssumedNotAccessedLocs) & NO_GLOBAL_MEM);
     if (HasGlobalAccesses) {
       auto AccessPred = [&](const Instruction *, const Value *Ptr,
                             AccessKind Kind, MemoryLocationsKind MLK) {
@@ -6270,7 +6271,7 @@ AAMemoryLocationImpl::categorizeAccessedLocations(Attributor &A, Instruction &I,
                << getMemoryLocationsAsStr(AccessedLocs.getAssumed()) << "\n");
 
     // Now handle argument memory if it might be accessed.
-    bool HasArgAccesses = !(ICSAssumedNotAccessedLocs & NO_ARGUMENT_MEM);
+    bool HasArgAccesses = ((~ICSAssumedNotAccessedLocs) & NO_ARGUMENT_MEM);
     if (HasArgAccesses) {
       for (unsigned ArgNo = 0, e = ICS.getNumArgOperands(); ArgNo < e;
            ++ArgNo) {
diff --git a/llvm/test/Transforms/Attributor/memory_locations.ll b/llvm/test/Transforms/Attributor/memory_locations.ll
index c3222367360a..237617d6a9cc 100644
--- a/llvm/test/Transforms/Attributor/memory_locations.ll
+++ b/llvm/test/Transforms/Attributor/memory_locations.ll
@@ -397,3 +397,47 @@ define void @callerE(i8* %arg) {
   ret void
 }
 
+@G = external dso_local global i32, align 4
+
+; CHECK: Function Attrs:
+; CHECK-SAME: writeonly
+define void @write_global() {
+; CHECK-LABEL: define {{[^@]+}}@write_global()
+; CHECK-NEXT:    store i32 0, i32* @G, align 4
+; CHECK-NEXT:    ret void
+;
+  store i32 0, i32* @G, align 4
+  ret void
+}
+; CHECK: Function Attrs: argmemonly
+; CHECK-SAME: writeonly
+define void @write_global_via_arg(i32* %GPtr) {
+; CHECK-LABEL: define {{[^@]+}}@write_global_via_arg
+; CHECK-SAME: (i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[GPTR:%.*]])
+; CHECK-NEXT:    store i32 0, i32* [[GPTR]], align 4
+; CHECK-NEXT:    ret void
+;
+  store i32 0, i32* %GPtr, align 4
+  ret void
+}
+
+; CHECK: Function Attrs:
+; CHECK-SAME: writeonly
+define void @writeonly_global() {
+; CHECK-LABEL: define {{[^@]+}}@writeonly_global()
+; CHECK-NEXT:    call void @write_global()
+; CHECK-NEXT:    ret void
+;
+  call void @write_global()
+  ret void
+}
+; CHECK: Function Attrs:
+; CHECK-SAME: writeonly
+define void @writeonly_global_via_arg() {
+; CHECK-LABEL: define {{[^@]+}}@writeonly_global_via_arg()
+; CHECK-NEXT:    call void @write_global_via_arg(i32* nofree nonnull writeonly align 4 dereferenceable(4) @G)
+; CHECK-NEXT:    ret void
+;
+  call void @write_global_via_arg(i32* @G)
+  ret void
+}
diff --git a/llvm/test/Transforms/Attributor/nocapture-1.ll b/llvm/test/Transforms/Attributor/nocapture-1.ll
index 9a16f5e8ad3d..24018b289579 100644
--- a/llvm/test/Transforms/Attributor/nocapture-1.ll
+++ b/llvm/test/Transforms/Attributor/nocapture-1.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
-; RUN: opt -attributor -attributor-manifest-internal -attributor-disable=false -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-disable=false -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
+; RUN: opt -attributor -attributor-manifest-internal -attributor-disable=false -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-disable=false -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
 ; RUN: opt -attributor-cgscc -attributor-manifest-internal -attributor-disable=false -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM
 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-disable=false -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM
 

From 8ad19ffa7bd2ce50b90f3214b6012ec2d880cf52 Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <johannes@jdoerfert.de>
Date: Sat, 11 Apr 2020 12:36:24 -0500
Subject: [PATCH 003/216] [MustExecute][NFC] Copy function_ref instead of
 passing a reference

---
 llvm/include/llvm/Analysis/MustExecute.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm/Analysis/MustExecute.h b/llvm/include/llvm/Analysis/MustExecute.h
index aa54db55b04f..f40fa1f8bf9d 100644
--- a/llvm/include/llvm/Analysis/MustExecute.h
+++ b/llvm/include/llvm/Analysis/MustExecute.h
@@ -468,7 +468,7 @@ struct MustBeExecutedContextExplorer {
   /// This method will evaluate \p Pred and return
   /// true if \p Pred holds in every instruction.
   bool checkForAllContext(const Instruction *PP,
-                          const function_ref<bool(const Instruction *)> &Pred) {
+                          function_ref<bool(const Instruction *)> Pred) {
     for (auto EIt = begin(PP), EEnd = end(PP); EIt != EEnd; ++EIt)
       if (!Pred(*EIt))
         return false;

From ea7f17ee387062fcb74af2ba27c426a68c6bc8ad Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <johannes@jdoerfert.de>
Date: Sun, 12 Apr 2020 12:41:56 -0500
Subject: [PATCH 004/216] [InstCombine] Simplify calls with casted `returned`
 attribute

The handling of the `returned` attribute in D75815 did miss the case
where the argument is (bit)casted to a different type. This is
explicitly allowed by the language reference and exposed by the
Attributor.

Reviewed By: lebedev.ri

Differential Revision: https://reviews.llvm.org/D77977
---
 .../InstCombine/InstCombineCalls.cpp          |  9 +++-
 .../Transforms/InstCombine/call-returned.ll   | 43 +++++++++++++++++++
 2 files changed, 50 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 28236ad4933d..26fe9e659c7e 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -4654,8 +4654,13 @@ Instruction *InstCombiner::visitCallBase(CallBase &Call) {
   }
 
   if (!Call.use_empty() && !Call.isMustTailCall())
-    if (Value *ReturnedArg = Call.getReturnedArgOperand())
-      return replaceInstUsesWith(Call, ReturnedArg);
+    if (Value *ReturnedArg = Call.getReturnedArgOperand()) {
+      Type *CallTy = Call.getType();
+      Type *RetArgTy = ReturnedArg->getType();
+      if (RetArgTy->canLosslesslyBitCastTo(CallTy))
+        return replaceInstUsesWith(
+            Call, Builder.CreateBitOrPointerCast(ReturnedArg, CallTy));
+    }
 
   if (isAllocLikeFn(&Call, &TLI))
     return visitAllocSite(Call);
diff --git a/llvm/test/Transforms/InstCombine/call-returned.ll b/llvm/test/Transforms/InstCombine/call-returned.ll
index 24d95a316c42..bf442b0e9679 100644
--- a/llvm/test/Transforms/InstCombine/call-returned.ll
+++ b/llvm/test/Transforms/InstCombine/call-returned.ll
@@ -3,6 +3,8 @@
 
 declare i32 @passthru_i32(i32 returned)
 declare i8* @passthru_p8(i8* returned)
+declare i8* @passthru_p8_from_p32(i32* returned)
+declare <8 x i8> @passthru_8i8v_from_2i32v(<2 x i32> returned)
 
 define i32 @returned_const_int_arg() {
 ; CHECK-LABEL: @returned_const_int_arg(
@@ -22,6 +24,46 @@ define i8* @returned_const_ptr_arg() {
   ret i8* %x
 }
 
+define i8* @returned_const_ptr_arg_casted() {
+; CHECK-LABEL: @returned_const_ptr_arg_casted(
+; CHECK-NEXT:    [[X:%.*]] = call i8* @passthru_p8_from_p32(i32* null)
+; CHECK-NEXT:    ret i8* null
+;
+  %x = call i8* @passthru_p8_from_p32(i32* null)
+  ret i8* %x
+}
+
+define i8* @returned_ptr_arg_casted(i32* %a) {
+; CHECK-LABEL: @returned_ptr_arg_casted(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[A:%.*]] to i8*
+; CHECK-NEXT:    [[X:%.*]] = call i8* @passthru_p8_from_p32(i32* [[A]])
+; CHECK-NEXT:    ret i8* [[TMP1]]
+;
+  %x = call i8* @passthru_p8_from_p32(i32* %a)
+  ret i8* %x
+}
+
+@GV = constant <2 x i32> zeroinitializer
+define <8 x i8> @returned_const_vec_arg_casted() {
+; CHECK-LABEL: @returned_const_vec_arg_casted(
+; CHECK-NEXT:    [[X:%.*]] = call <8 x i8> @passthru_8i8v_from_2i32v(<2 x i32> zeroinitializer)
+; CHECK-NEXT:    ret <8 x i8> zeroinitializer
+;
+  %v = load <2 x i32>, <2 x i32>* @GV
+  %x = call <8 x i8> @passthru_8i8v_from_2i32v(<2 x i32> %v)
+  ret <8 x i8> %x
+}
+
+define <8 x i8> @returned_vec_arg_casted(<2 x i32> %a) {
+; CHECK-LABEL: @returned_vec_arg_casted(
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[A:%.*]] to <8 x i8>
+; CHECK-NEXT:    [[X:%.*]] = call <8 x i8> @passthru_8i8v_from_2i32v(<2 x i32> [[A]])
+; CHECK-NEXT:    ret <8 x i8> [[TMP1]]
+;
+  %x = call <8 x i8> @passthru_8i8v_from_2i32v(<2 x i32> %a)
+  ret <8 x i8> %x
+}
+
 define i32 @returned_var_arg(i32 %arg) {
 ; CHECK-LABEL: @returned_var_arg(
 ; CHECK-NEXT:    [[X:%.*]] = call i32 @passthru_i32(i32 [[ARG:%.*]])
@@ -48,3 +90,4 @@ define i32 @returned_var_arg_musttail(i32 %arg) {
   %x = musttail call i32 @passthru_i32(i32 %arg)
   ret i32 %x
 }
+

From 0741dec27b049111094aec22f547651bb42a27ce Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <johannes@jdoerfert.de>
Date: Fri, 10 Apr 2020 23:41:59 -0500
Subject: [PATCH 005/216] [Attributor][FIX] Handle droppable uses when
 replacing values

Since we use the fact that some uses are droppable in the Attributor we
need to handle them explicitly when we replace uses. As an example, an
assumed dead value can have live droppable users. In those we cannot
replace the value simply by an undef. Instead, we either drop the uses
(via `dropDroppableUses`) or keep them as they are. In this patch we do
both, depending on the situation. For values that are dead but not
necessarily removed we keep droppable uses around because they contain
information we might be able to use later. For values that are removed
we drop droppable uses explicitly to avoid replacement with undef.
---
 llvm/include/llvm/Transforms/IPO/Attributor.h        | 9 ++++++---
 llvm/lib/Transforms/IPO/Attributor.cpp               | 1 +
 llvm/lib/Transforms/IPO/AttributorAttributes.cpp     | 9 +++++++--
 llvm/test/Transforms/Attributor/dereferenceable-1.ll | 8 ++++----
 llvm/test/Transforms/Attributor/nonnull.ll           | 2 +-
 5 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index cc4390f41d49..a4b0c6a605e7 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -875,11 +875,14 @@ struct Attributor {
   }
 
   /// Helper function to replace all uses of \p V with \p NV. Return true if
-  /// there is any change.
-  bool changeValueAfterManifest(Value &V, Value &NV) {
+  /// there is any change. The flag \p ChangeDroppable indicates if dropppable
+  /// uses should be changed too.
+  bool changeValueAfterManifest(Value &V, Value &NV,
+                                bool ChangeDroppable = true) {
     bool Changed = false;
     for (auto &U : V.uses())
-      Changed |= changeUseAfterManifest(U, NV);
+      if (ChangeDroppable || !U.getUser()->isDroppable())
+        Changed |= changeUseAfterManifest(U, NV);
 
     return Changed;
   }
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index 4cf2812d0dcc..e8e8aed8d31b 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -1169,6 +1169,7 @@ ChangeStatus Attributor::run() {
 
     for (auto &V : ToBeDeletedInsts) {
       if (Instruction *I = dyn_cast_or_null<Instruction>(V)) {
+        I->dropDroppableUses();
         CGModifiedFunctions.insert(I->getFunction());
         if (!I->getType()->isVoidTy())
           I->replaceAllUsesWith(UndefValue::get(I->getType()));
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index be54fb3eb536..21cb9ed9d9aa 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -2675,8 +2675,11 @@ struct AAIsDeadFloating : public AAIsDeadValueImpl {
     if (C.hasValue() && C.getValue())
       return ChangeStatus::UNCHANGED;
 
+    // Replace the value with undef as it is dead but keep droppable uses around
+    // as they provide information we don't want to give up on just yet.
     UndefValue &UV = *UndefValue::get(V.getType());
-    bool AnyChange = A.changeValueAfterManifest(V, UV);
+    bool AnyChange =
+        A.changeValueAfterManifest(V, UV, /* ChangeDropppable */ false);
     return AnyChange ? ChangeStatus::CHANGED : ChangeStatus::UNCHANGED;
   }
 
@@ -2703,8 +2706,10 @@ struct AAIsDeadArgument : public AAIsDeadFloating {
       if (A.registerFunctionSignatureRewrite(
               Arg, /* ReplacementTypes */ {},
               Attributor::ArgumentReplacementInfo::CalleeRepairCBTy{},
-              Attributor::ArgumentReplacementInfo::ACSRepairCBTy{}))
+              Attributor::ArgumentReplacementInfo::ACSRepairCBTy{})) {
+        Arg.dropDroppableUses();
         return ChangeStatus::CHANGED;
+      }
     return Changed;
   }
 
diff --git a/llvm/test/Transforms/Attributor/dereferenceable-1.ll b/llvm/test/Transforms/Attributor/dereferenceable-1.ll
index a1da5fbe2d7c..9036667630eb 100644
--- a/llvm/test/Transforms/Attributor/dereferenceable-1.ll
+++ b/llvm/test/Transforms/Attributor/dereferenceable-1.ll
@@ -623,19 +623,19 @@ define void @nonnull_assume_pos(i8* %arg1, i8* %arg2, i8* %arg3, i8* %arg4) {
 ;
 ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@nonnull_assume_pos
 ; IS__TUNIT_OPM-SAME: (i8* nocapture nofree nonnull readnone dereferenceable(101) [[ARG1:%.*]], i8* nocapture nofree readnone dereferenceable_or_null(31) [[ARG2:%.*]], i8* nocapture nofree nonnull readnone [[ARG3:%.*]], i8* nocapture nofree readnone dereferenceable_or_null(42) [[ARG4:%.*]])
-; IS__TUNIT_OPM-NEXT:    call void @llvm.assume(i1 true) #6 [ "nonnull"(i8* undef), "dereferenceable"(i8* undef, i64 1), "dereferenceable"(i8* undef, i64 2), "dereferenceable"(i8* undef, i64 101), "dereferenceable_or_null"(i8* undef, i64 31), "dereferenceable_or_null"(i8* undef, i64 42) ]
+; IS__TUNIT_OPM-NEXT:    call void @llvm.assume(i1 true) #6 [ "nonnull"(i8* [[ARG3]]), "dereferenceable"(i8* [[ARG1]], i64 1), "dereferenceable"(i8* [[ARG1]], i64 2), "dereferenceable"(i8* [[ARG1]], i64 101), "dereferenceable_or_null"(i8* [[ARG2]], i64 31), "dereferenceable_or_null"(i8* [[ARG4]], i64 42) ]
 ; IS__TUNIT_OPM-NEXT:    call void @unknown()
 ; IS__TUNIT_OPM-NEXT:    ret void
 ;
 ; IS________NPM-LABEL: define {{[^@]+}}@nonnull_assume_pos
 ; IS________NPM-SAME: (i8* nocapture nofree nonnull readnone dereferenceable(101) [[ARG1:%.*]], i8* nocapture nofree readnone dereferenceable_or_null(31) [[ARG2:%.*]], i8* nocapture nofree nonnull readnone [[ARG3:%.*]], i8* nocapture nofree readnone dereferenceable_or_null(42) [[ARG4:%.*]])
-; IS________NPM-NEXT:    call void @llvm.assume(i1 true) #7 [ "nonnull"(i8* undef), "dereferenceable"(i8* undef, i64 1), "dereferenceable"(i8* undef, i64 2), "dereferenceable"(i8* undef, i64 101), "dereferenceable_or_null"(i8* undef, i64 31), "dereferenceable_or_null"(i8* undef, i64 42) ]
+; IS________NPM-NEXT:    call void @llvm.assume(i1 true) #7 [ "nonnull"(i8* [[ARG3]]), "dereferenceable"(i8* [[ARG1]], i64 1), "dereferenceable"(i8* [[ARG1]], i64 2), "dereferenceable"(i8* [[ARG1]], i64 101), "dereferenceable_or_null"(i8* [[ARG2]], i64 31), "dereferenceable_or_null"(i8* [[ARG4]], i64 42) ]
 ; IS________NPM-NEXT:    call void @unknown()
 ; IS________NPM-NEXT:    ret void
 ;
 ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@nonnull_assume_pos
 ; IS__CGSCC_OPM-SAME: (i8* nocapture nofree nonnull readnone dereferenceable(101) [[ARG1:%.*]], i8* nocapture nofree readnone dereferenceable_or_null(31) [[ARG2:%.*]], i8* nocapture nofree nonnull readnone [[ARG3:%.*]], i8* nocapture nofree readnone dereferenceable_or_null(42) [[ARG4:%.*]])
-; IS__CGSCC_OPM-NEXT:    call void @llvm.assume(i1 true) #8 [ "nonnull"(i8* undef), "dereferenceable"(i8* undef, i64 1), "dereferenceable"(i8* undef, i64 2), "dereferenceable"(i8* undef, i64 101), "dereferenceable_or_null"(i8* undef, i64 31), "dereferenceable_or_null"(i8* undef, i64 42) ]
+; IS__CGSCC_OPM-NEXT:    call void @llvm.assume(i1 true) #8 [ "nonnull"(i8* [[ARG3]]), "dereferenceable"(i8* [[ARG1]], i64 1), "dereferenceable"(i8* [[ARG1]], i64 2), "dereferenceable"(i8* [[ARG1]], i64 101), "dereferenceable_or_null"(i8* [[ARG2]], i64 31), "dereferenceable_or_null"(i8* [[ARG4]], i64 42) ]
 ; IS__CGSCC_OPM-NEXT:    call void @unknown()
 ; IS__CGSCC_OPM-NEXT:    ret void
 ;
@@ -653,7 +653,7 @@ define void @nonnull_assume_neg(i8* %arg1, i8* %arg2, i8* %arg3) {
 ; CHECK-LABEL: define {{[^@]+}}@nonnull_assume_neg
 ; CHECK-SAME: (i8* nocapture nofree readnone [[ARG1:%.*]], i8* nocapture nofree readnone [[ARG2:%.*]], i8* nocapture nofree readnone [[ARG3:%.*]])
 ; CHECK-NEXT:    call void @unknown()
-; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "dereferenceable"(i8* undef, i64 101), "dereferenceable"(i8* undef, i64 -2), "dereferenceable_or_null"(i8* undef, i64 31) ]
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "dereferenceable"(i8* [[ARG1]], i64 101), "dereferenceable"(i8* [[ARG2]], i64 -2), "dereferenceable_or_null"(i8* [[ARG3]], i64 31) ]
 ; CHECK-NEXT:    ret void
 ;
   call void @unknown()
diff --git a/llvm/test/Transforms/Attributor/nonnull.ll b/llvm/test/Transforms/Attributor/nonnull.ll
index e0bdd5155ab1..ce8626ab5943 100644
--- a/llvm/test/Transforms/Attributor/nonnull.ll
+++ b/llvm/test/Transforms/Attributor/nonnull.ll
@@ -185,7 +185,7 @@ declare void @llvm.assume(i1)
 define i8* @test10(i8* %a, i64 %n) {
 ; CHECK-LABEL: define {{[^@]+}}@test10
 ; CHECK-SAME: (i8* nofree readnone "no-capture-maybe-returned" [[A:%.*]], i64 [[N:%.*]])
-; CHECK-NEXT:    call void @llvm.assume(i1 undef)
+; CHECK-NEXT:    call void @llvm.assume(i1 true)
 ; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i8, i8* [[A]], i64 [[N]]
 ; CHECK-NEXT:    ret i8* [[B]]
 ;

From 83d5131d87a6f929b21b54e3fc0f9636ff64c808 Mon Sep 17 00:00:00 2001
From: Matthias Gehre <gehre.matthias@gmail.com>
Date: Mon, 19 Aug 2019 23:28:04 +0200
Subject: [PATCH 006/216] [LifetimeAnalysis] Add [[gsl::Pointer]] to
 llvm::StringRef

Summary:
This detected the bugs fixed in
  https://reviews.llvm.org/D66442
and
  https://reviews.llvm.org/D66440

The warning itself was implemented in
  https://reviews.llvm.org/D63954
  https://reviews.llvm.org/D64256
  https://reviews.llvm.org/D65120
  https://reviews.llvm.org/D65127
  https://reviews.llvm.org/D66152

Reviewers: zturner, mehdi_amini, gribozavr

Subscribers: dexonsmith, Szelethus, xazax.hun, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D66443
---
 llvm/include/llvm/ADT/StringRef.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm/ADT/StringRef.h b/llvm/include/llvm/ADT/StringRef.h
index ad31517a1ea7..337efd641135 100644
--- a/llvm/include/llvm/ADT/StringRef.h
+++ b/llvm/include/llvm/ADT/StringRef.h
@@ -54,7 +54,7 @@ namespace llvm {
   /// situations where the character data resides in some other buffer, whose
   /// lifetime extends past that of the StringRef. For this reason, it is not in
   /// general safe to store a StringRef.
-  class StringRef {
+  class [[gsl::Pointer]] StringRef {
   public:
     static const size_t npos = ~size_t(0);
 

From e882ac7c04a47e5fadc6cd151149e2192c4e0a7e Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@gmail.com>
Date: Wed, 15 Apr 2020 23:28:03 -0700
Subject: [PATCH 007/216] [CallSite removal][TargetLowering] Remove
 ArgListEntry::setAttributes signature that took an ImmutableCallSite. NFC

There's another signature that takes a CallBase. The uses of the
ImmutableCallSite version were removed in previous patches.
---
 llvm/include/llvm/CodeGen/TargetLowering.h | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 98df5214d815..ef907d663f99 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -37,7 +37,6 @@
 #include "llvm/CodeGen/TargetCallingConv.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/IR/Attributes.h"
-#include "llvm/IR/CallSite.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
@@ -287,10 +286,6 @@ class TargetLoweringBase {
           IsSwiftSelf(false), IsSwiftError(false), IsCFGuardTarget(false) {}
 
     void setAttributes(const CallBase *Call, unsigned ArgIdx);
-
-    void setAttributes(ImmutableCallSite *CS, unsigned ArgIdx) {
-      return setAttributes(cast<CallBase>(CS->getInstruction()), ArgIdx);
-    }
   };
   using ArgListTy = std::vector<ArgListEntry>;
 

From e5d666d76837f7b808452b24dce646bf18ee6e40 Mon Sep 17 00:00:00 2001
From: Dominik Montada <dominik.montada@hightec-rt.com>
Date: Thu, 16 Apr 2020 09:02:46 +0200
Subject: [PATCH 008/216] Revert "Revert "[GlobalISel] Fix invalid combine of
 unmerge(merge) with intermediate cast""

This reverts commit 1265899c5f7d34034a8c1f67e69a5ab6087310e7.
---
 .../GlobalISel/LegalizationArtifactCombiner.h |    2 +-
 .../artifact-combiner-unmerge-values.mir      |   87 +-
 .../AMDGPU/GlobalISel/cvt_f32_ubyte-xfail.ll  |    9 +
 .../AMDGPU/GlobalISel/cvt_f32_ubyte.ll        |   34 -
 .../AMDGPU/GlobalISel/legalize-bitcast.mir    |   49 +-
 .../GlobalISel/legalize-merge-values.mir      | 1910 +++++++++--------
 .../GlobalISel/legalize-unmerge-values.mir    |   30 +-
 .../AMDGPU/GlobalISel/zextload-xfail.ll       |    9 +
 .../CodeGen/AMDGPU/GlobalISel/zextload.ll     |   38 -
 9 files changed, 1173 insertions(+), 995 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte-xfail.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/zextload-xfail.ll

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
index b27263b659a6..6a0157f9243e 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h
@@ -364,7 +364,7 @@ class LegalizationArtifactCombiner {
       // That is not done yet.
       if (ConvertOp == 0)
         return true;
-      return !DestTy.isVector();
+      return !DestTy.isVector() && OpTy.isVector();
     case TargetOpcode::G_CONCAT_VECTORS: {
       if (ConvertOp == 0)
         return true;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir
index 114c660135a1..e3985bb4b536 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/artifact-combiner-unmerge-values.mir
@@ -910,10 +910,10 @@ body:             |
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
     ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
     ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[COPY1]](s64)
-    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s32) = G_TRUNC [[COPY2]](s64)
-    ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s32), implicit [[TRUNC1]](s32), implicit [[TRUNC2]](s32)
+    ; CHECK: [[MV:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[COPY]](s64), [[COPY1]](s64), [[COPY2]](s64)
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV]](s192)
+    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s96)
+    ; CHECK: S_ENDPGM 0, implicit [[UV]](s32), implicit [[UV1]](s32), implicit [[UV2]](s32)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     %2:_(s64) = COPY $vgpr4_vgpr5
@@ -933,16 +933,20 @@ body:             |
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
     ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
     ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5
-    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
-    ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s64)
-    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
-    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV3]](s32)
-    ; CHECK: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY2]](s64)
-    ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[UV4]](s32)
-    ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV5]](s32)
-    ; CHECK: S_ENDPGM 0, implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16), implicit [[TRUNC3]](s16), implicit [[TRUNC4]](s16), implicit [[TRUNC5]](s16)
+    ; CHECK: [[MV:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[COPY]](s64), [[COPY1]](s64), [[COPY2]](s64)
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV]](s192)
+    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s96)
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
+    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[UV1]](s32)
+    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV1]], [[C]](s32)
+    ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[UV2]](s32)
+    ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
+    ; CHECK: S_ENDPGM 0, implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16), implicit [[TRUNC3]](s16), implicit [[TRUNC4]](s16), implicit [[TRUNC5]](s16), implicit [[TRUNC6]](s16)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
     %2:_(s64) = COPY $vgpr4_vgpr5
@@ -968,11 +972,15 @@ body:             |
     ; CHECK: [[MV:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32)
     ; CHECK: [[MV1:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
     ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
-    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
-    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32)
-    ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY4]](s32)
-    ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[COPY5]](s32)
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY1]], [[C]](s32)
+    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
+    ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+    ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY2]], [[C]](s32)
+    ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32)
     ; CHECK: S_ENDPGM 0, implicit [[MV]](s192), implicit [[MV1]](s96), implicit [[TRUNC]](s16), implicit [[TRUNC1]](s16), implicit [[TRUNC2]](s16), implicit [[TRUNC3]](s16), implicit [[TRUNC4]](s16), implicit [[TRUNC5]](s16)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
@@ -986,3 +994,44 @@ body:             |
     S_ENDPGM 0, implicit %6, implicit %7, implicit %8, implicit %9, implicit %10, implicit %11, implicit %12, implicit %13
 
 ...
+
+---
+name: test_unmerge_values_s64_anyext_s128_of_merge_values_s64
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: test_unmerge_values_s64_anyext_s128_of_merge_values_s64
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+    ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CHECK: $vgpr0_vgpr1 = COPY [[MV]](s64)
+    ; CHECK: $vgpr2_vgpr3 = COPY [[DEF]](s64)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s64) = G_MERGE_VALUES %0, %1
+    %3:_(s128) = G_ANYEXT %2
+    %4:_(s64), %5:_(s64) = G_UNMERGE_VALUES %3
+    $vgpr0_vgpr1 = COPY %4
+    $vgpr2_vgpr3 = COPY %5
+
+...
+
+---
+name: test_unmerge_values_s32_trunc_s64_of_merge_values_s128
+body:             |
+  bb.0:
+    ; CHECK-LABEL: name: test_unmerge_values_s32_trunc_s64_of_merge_values_s128
+    ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; CHECK: $vgpr0 = COPY [[UV]](s32)
+    ; CHECK: $vgpr1 = COPY [[UV1]](s32)
+    %0:_(s64) = COPY $vgpr0_vgpr1
+    %1:_(s64) = COPY $vgpr2_vgpr3
+    %2:_(s128) = G_MERGE_VALUES %0, %1
+    %3:_(s64) = G_TRUNC %2
+    %4:_(s32), %5:_(s32) = G_UNMERGE_VALUES %3
+    $vgpr0 = COPY %4
+    $vgpr1 = COPY %5
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte-xfail.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte-xfail.ll
new file mode 100644
index 000000000000..95ddf2045648
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte-xfail.ll
@@ -0,0 +1,9 @@
+; RUN: not --crash llc -global-isel -mtriple=amdgcn-- -mcpu=tahiti -verify-machineinstrs < %s
+; RUN: not --crash llc -global-isel -mtriple=amdgcn-- -mcpu=tonga -verify-machineinstrs < %s
+
+define <3 x float> @v_uitofp_v3i8_to_v3f32(i32 %arg0) nounwind {
+  %trunc = trunc i32 %arg0 to i24
+  %val = bitcast i24 %trunc to <3 x i8>
+  %cvt = uitofp <3 x i8> %val to <3 x float>
+  ret <3 x float> %cvt
+}
\ No newline at end of file
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
index 09e2d1ad0826..4bfc7e3e6744 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
@@ -191,40 +191,6 @@ define <2 x float> @v_uitofp_v2i8_to_v2f32(i16 %arg0) nounwind {
   ret <2 x float> %cvt
 }
 
-define <3 x float> @v_uitofp_v3i8_to_v3f32(i32 %arg0) nounwind {
-; SI-LABEL: v_uitofp_v3i8_to_v3f32:
-; SI:       ; %bb.0:
-; SI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; SI-NEXT:    v_and_b32_e32 v1, 0xffff, v0
-; SI-NEXT:    v_lshrrev_b32_e32 v2, 16, v0
-; SI-NEXT:    s_movk_i32 s4, 0xff
-; SI-NEXT:    v_lshrrev_b32_e32 v1, 8, v1
-; SI-NEXT:    v_and_b32_e32 v0, s4, v0
-; SI-NEXT:    v_and_b32_e32 v1, s4, v1
-; SI-NEXT:    v_and_b32_e32 v2, s4, v2
-; SI-NEXT:    v_cvt_f32_ubyte0_e32 v0, v0
-; SI-NEXT:    v_cvt_f32_ubyte0_e32 v1, v1
-; SI-NEXT:    v_cvt_f32_ubyte0_e32 v2, v2
-; SI-NEXT:    s_setpc_b64 s[30:31]
-;
-; VI-LABEL: v_uitofp_v3i8_to_v3f32:
-; VI:       ; %bb.0:
-; VI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NEXT:    s_movk_i32 s4, 0xff
-; VI-NEXT:    v_mov_b32_e32 v2, s4
-; VI-NEXT:    v_and_b32_sdwa v1, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD
-; VI-NEXT:    v_cvt_f32_ubyte0_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0
-; VI-NEXT:    v_and_b32_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-NEXT:    v_cvt_f32_ubyte0_e32 v2, v0
-; VI-NEXT:    v_cvt_f32_ubyte0_e32 v1, v1
-; VI-NEXT:    v_mov_b32_e32 v0, v3
-; VI-NEXT:    s_setpc_b64 s[30:31]
-  %trunc = trunc i32 %arg0 to i24
-  %val = bitcast i24 %trunc to <3 x i8>
-  %cvt = uitofp <3 x i8> %val to <3 x float>
-  ret <3 x float> %cvt
-}
-
 define <4 x float> @v_uitofp_v4i8_to_v4f32(i32 %arg0) nounwind {
 ; SI-LABEL: v_uitofp_v4i8_to_v4f32:
 ; SI:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir
index 75ac2668d904..a26a5669ea32 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-bitcast.mir
@@ -292,23 +292,25 @@ body: |
     ; CHECK-LABEL: name: test_bitcast_s24_to_v3s8
     ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[DEF]](s32)
     ; CHECK: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF1]](s64)
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK: [[MV1:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[MV]](s64), [[DEF1]](s64), [[DEF1]](s64)
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV1]](s192)
+    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s96)
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
     ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY]], [[C]](s32)
+    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
     ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
     ; CHECK: [[C1:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
     ; CHECK: [[LSHR1:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC1]], [[C1]](s16)
     ; CHECK: [[LSHR2:%[0-9]+]]:_(s16) = G_LSHR [[TRUNC2]], [[C1]](s16)
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
     ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[LSHR1]](s16)
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
-    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[ANYEXT]](s32)
-    ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[COPY3]](s32)
-    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32)
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[ANYEXT]](s32)
+    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[COPY2]](s32)
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32)
     ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR]](<3 x s32>)
     %0:_(s32) = COPY $vgpr0
     %1:_(s24) = G_TRUNC %0
@@ -326,21 +328,24 @@ body: |
     ; CHECK-LABEL: name: test_bitcast_s48_to_v3s16
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
     ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32)
     ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s64)
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32)
+    ; CHECK: [[MV1:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[MV]](s64), [[DEF]](s64), [[DEF]](s64)
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV1]](s192)
+    ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s96)
     ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[TRUNC]], [[C]](s32)
+    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
     ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
-    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
+    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
     ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[C]](s32)
     ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND]], [[SHL]]
     ; CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C1]]
+    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
+    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY3]], [[C1]]
     ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C2]], [[C]](s32)
     ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[SHL1]]
@@ -349,10 +354,10 @@ body: |
     ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0
     ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF1]], [[EXTRACT]](<3 x s16>), 0
-    ; CHECK: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
-    ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; CHECK: [[UV5:%[0-9]+]]:_(<2 x s16>), [[UV6:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<4 x s16>)
+    ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV5]](<2 x s16>)
     ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
-    ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV6]](<2 x s16>)
     ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
     ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
     ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir
index 8dbf083da85d..b5af07f04a33 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-merge-values.mir
@@ -606,913 +606,1085 @@ body: |
     ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
     ; CHECK: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
     ; CHECK: [[C4:%[0-9]+]]:_(s16) = G_CONSTANT i16 0
-    ; CHECK: [[COPY:%[0-9]+]]:_(s16) = COPY [[C4]](s16)
     ; CHECK: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s16) = COPY [[C4]](s16)
-    ; CHECK: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF
-    ; CHECK: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CHECK: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[COPY1]], [[C5]](s32)
+    ; CHECK: [[OR:%[0-9]+]]:_(s32) = G_OR [[COPY]], [[SHL]]
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[C]](s32)
     ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[COPY3]], [[COPY2]](s32)
-    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[COPY5]], [[COPY4]](s32)
-    ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[COPY7]], [[COPY6]](s32)
-    ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
-    ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C7]](s32)
-    ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[COPY9]], [[COPY8]](s32)
-    ; CHECK: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
+    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[COPY3]], [[C5]](s32)
+    ; CHECK: [[OR1:%[0-9]+]]:_(s32) = G_OR [[COPY2]], [[SHL1]]
+    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR]](s32), [[OR1]](s32)
+    ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CHECK: [[MV1:%[0-9]+]]:_(s1088) = G_MERGE_VALUES [[MV]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64)
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s544) = G_TRUNC [[MV1]](s1088)
+    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s544)
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[UV]](s32)
+    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C5]](s32)
+    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; CHECK: [[C6:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
+    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CHECK: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
+    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C7]]
+    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY4]](s32)
+    ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY7]], [[C7]]
+    ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY6]](s32)
+    ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY9]], [[C7]]
+    ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[COPY8]](s32)
+    ; CHECK: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 4
     ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[COPY11]], [[COPY10]](s32)
-    ; CHECK: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
+    ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY11]], [[C7]]
+    ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND3]], [[COPY10]](s32)
+    ; CHECK: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
     ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY [[C9]](s32)
-    ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[COPY13]], [[COPY12]](s32)
-    ; CHECK: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
+    ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY13]], [[C7]]
+    ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[AND4]], [[COPY12]](s32)
+    ; CHECK: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 6
     ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY [[C10]](s32)
-    ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[COPY15]], [[COPY14]](s32)
-    ; CHECK: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
+    ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY15]], [[C7]]
+    ; CHECK: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[AND5]], [[COPY14]](s32)
+    ; CHECK: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
     ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY [[C11]](s32)
-    ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[COPY17]], [[COPY16]](s32)
-    ; CHECK: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
+    ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY17]], [[C7]]
+    ; CHECK: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[AND6]], [[COPY16]](s32)
+    ; CHECK: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY [[C12]](s32)
-    ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[COPY19]], [[COPY18]](s32)
-    ; CHECK: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
+    ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY19]], [[C7]]
+    ; CHECK: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[AND7]], [[COPY18]](s32)
+    ; CHECK: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 9
     ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY [[C13]](s32)
-    ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[COPY21]], [[COPY20]](s32)
-    ; CHECK: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
+    ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY21]], [[C7]]
+    ; CHECK: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[AND8]], [[COPY20]](s32)
+    ; CHECK: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 10
     ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY [[C14]](s32)
-    ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[COPY23]], [[COPY22]](s32)
-    ; CHECK: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
+    ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY23]], [[C7]]
+    ; CHECK: [[LSHR10:%[0-9]+]]:_(s32) = G_LSHR [[AND9]], [[COPY22]](s32)
+    ; CHECK: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 11
     ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY [[C15]](s32)
-    ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[COPY25]], [[COPY24]](s32)
-    ; CHECK: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 13
+    ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY25]], [[C7]]
+    ; CHECK: [[LSHR11:%[0-9]+]]:_(s32) = G_LSHR [[AND10]], [[COPY24]](s32)
+    ; CHECK: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
     ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY [[C16]](s32)
-    ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[COPY27]], [[COPY26]](s32)
-    ; CHECK: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 14
+    ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY27]], [[C7]]
+    ; CHECK: [[LSHR12:%[0-9]+]]:_(s32) = G_LSHR [[AND11]], [[COPY26]](s32)
+    ; CHECK: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 13
     ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY [[C17]](s32)
-    ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[COPY29]], [[COPY28]](s32)
-    ; CHECK: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
+    ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; CHECK: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY29]], [[C7]]
+    ; CHECK: [[LSHR13:%[0-9]+]]:_(s32) = G_LSHR [[AND12]], [[COPY28]](s32)
+    ; CHECK: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 14
     ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY [[C18]](s32)
-    ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[COPY31]], [[COPY30]](s32)
-    ; CHECK: [[COPY32:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[COPY33:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[COPY33]], [[COPY32]](s32)
-    ; CHECK: [[COPY34:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CHECK: [[COPY35:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[COPY35]], [[COPY34]](s32)
-    ; CHECK: [[COPY36:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[COPY37:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[COPY37]], [[COPY36]](s32)
-    ; CHECK: [[COPY38:%[0-9]+]]:_(s32) = COPY [[C7]](s32)
-    ; CHECK: [[COPY39:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[COPY39]], [[COPY38]](s32)
+    ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; CHECK: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY31]], [[C7]]
+    ; CHECK: [[LSHR14:%[0-9]+]]:_(s32) = G_LSHR [[AND13]], [[COPY30]](s32)
+    ; CHECK: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 15
+    ; CHECK: [[COPY32:%[0-9]+]]:_(s32) = COPY [[C19]](s32)
+    ; CHECK: [[COPY33:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; CHECK: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY33]], [[C7]]
+    ; CHECK: [[LSHR15:%[0-9]+]]:_(s32) = G_LSHR [[AND14]], [[COPY32]](s32)
+    ; CHECK: [[COPY34:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CHECK: [[COPY35:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CHECK: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY35]], [[C7]]
+    ; CHECK: [[LSHR16:%[0-9]+]]:_(s32) = G_LSHR [[AND15]], [[COPY34]](s32)
+    ; CHECK: [[COPY36:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CHECK: [[COPY37:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CHECK: [[AND16:%[0-9]+]]:_(s32) = G_AND [[COPY37]], [[C7]]
+    ; CHECK: [[LSHR17:%[0-9]+]]:_(s32) = G_LSHR [[AND16]], [[COPY36]](s32)
+    ; CHECK: [[COPY38:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CHECK: [[COPY39:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CHECK: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY39]], [[C7]]
+    ; CHECK: [[LSHR18:%[0-9]+]]:_(s32) = G_LSHR [[AND17]], [[COPY38]](s32)
     ; CHECK: [[COPY40:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CHECK: [[COPY41:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[COPY41]], [[COPY40]](s32)
+    ; CHECK: [[COPY41:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CHECK: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY41]], [[C7]]
+    ; CHECK: [[LSHR19:%[0-9]+]]:_(s32) = G_LSHR [[AND18]], [[COPY40]](s32)
     ; CHECK: [[COPY42:%[0-9]+]]:_(s32) = COPY [[C9]](s32)
-    ; CHECK: [[COPY43:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[COPY43]], [[COPY42]](s32)
+    ; CHECK: [[COPY43:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CHECK: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY43]], [[C7]]
+    ; CHECK: [[LSHR20:%[0-9]+]]:_(s32) = G_LSHR [[AND19]], [[COPY42]](s32)
     ; CHECK: [[COPY44:%[0-9]+]]:_(s32) = COPY [[C10]](s32)
-    ; CHECK: [[COPY45:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[COPY45]], [[COPY44]](s32)
+    ; CHECK: [[COPY45:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CHECK: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY45]], [[C7]]
+    ; CHECK: [[LSHR21:%[0-9]+]]:_(s32) = G_LSHR [[AND20]], [[COPY44]](s32)
     ; CHECK: [[COPY46:%[0-9]+]]:_(s32) = COPY [[C11]](s32)
-    ; CHECK: [[COPY47:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[COPY47]], [[COPY46]](s32)
+    ; CHECK: [[COPY47:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CHECK: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY47]], [[C7]]
+    ; CHECK: [[LSHR22:%[0-9]+]]:_(s32) = G_LSHR [[AND21]], [[COPY46]](s32)
     ; CHECK: [[COPY48:%[0-9]+]]:_(s32) = COPY [[C12]](s32)
-    ; CHECK: [[COPY49:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[COPY49]], [[COPY48]](s32)
+    ; CHECK: [[COPY49:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CHECK: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY49]], [[C7]]
+    ; CHECK: [[LSHR23:%[0-9]+]]:_(s32) = G_LSHR [[AND22]], [[COPY48]](s32)
     ; CHECK: [[COPY50:%[0-9]+]]:_(s32) = COPY [[C13]](s32)
-    ; CHECK: [[COPY51:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[COPY51]], [[COPY50]](s32)
+    ; CHECK: [[COPY51:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CHECK: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY51]], [[C7]]
+    ; CHECK: [[LSHR24:%[0-9]+]]:_(s32) = G_LSHR [[AND23]], [[COPY50]](s32)
     ; CHECK: [[COPY52:%[0-9]+]]:_(s32) = COPY [[C14]](s32)
-    ; CHECK: [[COPY53:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[COPY53]], [[COPY52]](s32)
+    ; CHECK: [[COPY53:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CHECK: [[AND24:%[0-9]+]]:_(s32) = G_AND [[COPY53]], [[C7]]
+    ; CHECK: [[LSHR25:%[0-9]+]]:_(s32) = G_LSHR [[AND24]], [[COPY52]](s32)
     ; CHECK: [[COPY54:%[0-9]+]]:_(s32) = COPY [[C15]](s32)
-    ; CHECK: [[COPY55:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[COPY55]], [[COPY54]](s32)
+    ; CHECK: [[COPY55:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CHECK: [[AND25:%[0-9]+]]:_(s32) = G_AND [[COPY55]], [[C7]]
+    ; CHECK: [[LSHR26:%[0-9]+]]:_(s32) = G_LSHR [[AND25]], [[COPY54]](s32)
     ; CHECK: [[COPY56:%[0-9]+]]:_(s32) = COPY [[C16]](s32)
-    ; CHECK: [[COPY57:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR27:%[0-9]+]]:_(s32) = G_LSHR [[COPY57]], [[COPY56]](s32)
+    ; CHECK: [[COPY57:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CHECK: [[AND26:%[0-9]+]]:_(s32) = G_AND [[COPY57]], [[C7]]
+    ; CHECK: [[LSHR27:%[0-9]+]]:_(s32) = G_LSHR [[AND26]], [[COPY56]](s32)
     ; CHECK: [[COPY58:%[0-9]+]]:_(s32) = COPY [[C17]](s32)
-    ; CHECK: [[COPY59:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR28:%[0-9]+]]:_(s32) = G_LSHR [[COPY59]], [[COPY58]](s32)
+    ; CHECK: [[COPY59:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CHECK: [[AND27:%[0-9]+]]:_(s32) = G_AND [[COPY59]], [[C7]]
+    ; CHECK: [[LSHR28:%[0-9]+]]:_(s32) = G_LSHR [[AND27]], [[COPY58]](s32)
     ; CHECK: [[COPY60:%[0-9]+]]:_(s32) = COPY [[C18]](s32)
-    ; CHECK: [[COPY61:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR29:%[0-9]+]]:_(s32) = G_LSHR [[COPY61]], [[COPY60]](s32)
-    ; CHECK: [[COPY62:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[COPY63:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[LSHR30:%[0-9]+]]:_(s32) = G_LSHR [[COPY63]], [[COPY62]](s32)
-    ; CHECK: [[COPY64:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CHECK: [[COPY65:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[LSHR31:%[0-9]+]]:_(s32) = G_LSHR [[COPY65]], [[COPY64]](s32)
-    ; CHECK: [[COPY66:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[COPY67:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[LSHR32:%[0-9]+]]:_(s32) = G_LSHR [[COPY67]], [[COPY66]](s32)
-    ; CHECK: [[COPY68:%[0-9]+]]:_(s32) = COPY [[C7]](s32)
-    ; CHECK: [[COPY69:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[LSHR33:%[0-9]+]]:_(s32) = G_LSHR [[COPY69]], [[COPY68]](s32)
-    ; CHECK: [[COPY70:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CHECK: [[COPY71:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[LSHR34:%[0-9]+]]:_(s32) = G_LSHR [[COPY71]], [[COPY70]](s32)
-    ; CHECK: [[COPY72:%[0-9]+]]:_(s32) = COPY [[C9]](s32)
-    ; CHECK: [[COPY73:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[LSHR35:%[0-9]+]]:_(s32) = G_LSHR [[COPY73]], [[COPY72]](s32)
-    ; CHECK: [[COPY74:%[0-9]+]]:_(s32) = COPY [[C10]](s32)
-    ; CHECK: [[COPY75:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[LSHR36:%[0-9]+]]:_(s32) = G_LSHR [[COPY75]], [[COPY74]](s32)
-    ; CHECK: [[COPY76:%[0-9]+]]:_(s32) = COPY [[C11]](s32)
-    ; CHECK: [[COPY77:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[LSHR37:%[0-9]+]]:_(s32) = G_LSHR [[COPY77]], [[COPY76]](s32)
-    ; CHECK: [[COPY78:%[0-9]+]]:_(s32) = COPY [[C12]](s32)
-    ; CHECK: [[COPY79:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[LSHR38:%[0-9]+]]:_(s32) = G_LSHR [[COPY79]], [[COPY78]](s32)
-    ; CHECK: [[COPY80:%[0-9]+]]:_(s32) = COPY [[C13]](s32)
-    ; CHECK: [[COPY81:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[LSHR39:%[0-9]+]]:_(s32) = G_LSHR [[COPY81]], [[COPY80]](s32)
-    ; CHECK: [[COPY82:%[0-9]+]]:_(s32) = COPY [[C14]](s32)
-    ; CHECK: [[COPY83:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[LSHR40:%[0-9]+]]:_(s32) = G_LSHR [[COPY83]], [[COPY82]](s32)
-    ; CHECK: [[COPY84:%[0-9]+]]:_(s32) = COPY [[C15]](s32)
-    ; CHECK: [[COPY85:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[LSHR41:%[0-9]+]]:_(s32) = G_LSHR [[COPY85]], [[COPY84]](s32)
-    ; CHECK: [[COPY86:%[0-9]+]]:_(s32) = COPY [[C16]](s32)
-    ; CHECK: [[COPY87:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[LSHR42:%[0-9]+]]:_(s32) = G_LSHR [[COPY87]], [[COPY86]](s32)
-    ; CHECK: [[COPY88:%[0-9]+]]:_(s32) = COPY [[C17]](s32)
-    ; CHECK: [[COPY89:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[LSHR43:%[0-9]+]]:_(s32) = G_LSHR [[COPY89]], [[COPY88]](s32)
-    ; CHECK: [[COPY90:%[0-9]+]]:_(s32) = COPY [[C18]](s32)
-    ; CHECK: [[COPY91:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[LSHR44:%[0-9]+]]:_(s32) = G_LSHR [[COPY91]], [[COPY90]](s32)
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR44]](s32)
-    ; CHECK: [[COPY92:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[COPY93:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR45:%[0-9]+]]:_(s32) = G_LSHR [[COPY93]], [[COPY92]](s32)
-    ; CHECK: [[COPY94:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CHECK: [[COPY95:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR46:%[0-9]+]]:_(s32) = G_LSHR [[COPY95]], [[COPY94]](s32)
-    ; CHECK: [[COPY96:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[COPY97:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR47:%[0-9]+]]:_(s32) = G_LSHR [[COPY97]], [[COPY96]](s32)
-    ; CHECK: [[COPY98:%[0-9]+]]:_(s32) = COPY [[C7]](s32)
-    ; CHECK: [[COPY99:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR48:%[0-9]+]]:_(s32) = G_LSHR [[COPY99]], [[COPY98]](s32)
-    ; CHECK: [[COPY100:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CHECK: [[COPY101:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR49:%[0-9]+]]:_(s32) = G_LSHR [[COPY101]], [[COPY100]](s32)
-    ; CHECK: [[COPY102:%[0-9]+]]:_(s32) = COPY [[C9]](s32)
-    ; CHECK: [[COPY103:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR50:%[0-9]+]]:_(s32) = G_LSHR [[COPY103]], [[COPY102]](s32)
-    ; CHECK: [[COPY104:%[0-9]+]]:_(s32) = COPY [[C10]](s32)
-    ; CHECK: [[COPY105:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR51:%[0-9]+]]:_(s32) = G_LSHR [[COPY105]], [[COPY104]](s32)
-    ; CHECK: [[COPY106:%[0-9]+]]:_(s32) = COPY [[C11]](s32)
-    ; CHECK: [[COPY107:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR52:%[0-9]+]]:_(s32) = G_LSHR [[COPY107]], [[COPY106]](s32)
-    ; CHECK: [[COPY108:%[0-9]+]]:_(s32) = COPY [[C12]](s32)
-    ; CHECK: [[COPY109:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR53:%[0-9]+]]:_(s32) = G_LSHR [[COPY109]], [[COPY108]](s32)
-    ; CHECK: [[COPY110:%[0-9]+]]:_(s32) = COPY [[C13]](s32)
-    ; CHECK: [[COPY111:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR54:%[0-9]+]]:_(s32) = G_LSHR [[COPY111]], [[COPY110]](s32)
-    ; CHECK: [[COPY112:%[0-9]+]]:_(s32) = COPY [[C14]](s32)
-    ; CHECK: [[COPY113:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR55:%[0-9]+]]:_(s32) = G_LSHR [[COPY113]], [[COPY112]](s32)
-    ; CHECK: [[COPY114:%[0-9]+]]:_(s32) = COPY [[C15]](s32)
-    ; CHECK: [[COPY115:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR56:%[0-9]+]]:_(s32) = G_LSHR [[COPY115]], [[COPY114]](s32)
-    ; CHECK: [[COPY116:%[0-9]+]]:_(s32) = COPY [[C16]](s32)
-    ; CHECK: [[COPY117:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR57:%[0-9]+]]:_(s32) = G_LSHR [[COPY117]], [[COPY116]](s32)
-    ; CHECK: [[COPY118:%[0-9]+]]:_(s32) = COPY [[C17]](s32)
-    ; CHECK: [[COPY119:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR58:%[0-9]+]]:_(s32) = G_LSHR [[COPY119]], [[COPY118]](s32)
-    ; CHECK: [[COPY120:%[0-9]+]]:_(s32) = COPY [[C18]](s32)
-    ; CHECK: [[COPY121:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR59:%[0-9]+]]:_(s32) = G_LSHR [[COPY121]], [[COPY120]](s32)
-    ; CHECK: [[COPY122:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[COPY123:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CHECK: [[LSHR60:%[0-9]+]]:_(s32) = G_LSHR [[COPY123]], [[COPY122]](s32)
-    ; CHECK: [[COPY124:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CHECK: [[COPY125:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CHECK: [[LSHR61:%[0-9]+]]:_(s32) = G_LSHR [[COPY125]], [[COPY124]](s32)
-    ; CHECK: [[COPY126:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[COPY127:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CHECK: [[LSHR62:%[0-9]+]]:_(s32) = G_LSHR [[COPY127]], [[COPY126]](s32)
-    ; CHECK: [[COPY128:%[0-9]+]]:_(s32) = COPY [[C7]](s32)
-    ; CHECK: [[COPY129:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CHECK: [[LSHR63:%[0-9]+]]:_(s32) = G_LSHR [[COPY129]], [[COPY128]](s32)
-    ; CHECK: [[COPY130:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CHECK: [[COPY131:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CHECK: [[LSHR64:%[0-9]+]]:_(s32) = G_LSHR [[COPY131]], [[COPY130]](s32)
-    ; CHECK: [[COPY132:%[0-9]+]]:_(s32) = COPY [[C9]](s32)
-    ; CHECK: [[COPY133:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CHECK: [[LSHR65:%[0-9]+]]:_(s32) = G_LSHR [[COPY133]], [[COPY132]](s32)
-    ; CHECK: [[COPY134:%[0-9]+]]:_(s32) = COPY [[C10]](s32)
-    ; CHECK: [[COPY135:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CHECK: [[LSHR66:%[0-9]+]]:_(s32) = G_LSHR [[COPY135]], [[COPY134]](s32)
-    ; CHECK: [[COPY136:%[0-9]+]]:_(s32) = COPY [[C11]](s32)
-    ; CHECK: [[COPY137:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CHECK: [[LSHR67:%[0-9]+]]:_(s32) = G_LSHR [[COPY137]], [[COPY136]](s32)
-    ; CHECK: [[COPY138:%[0-9]+]]:_(s32) = COPY [[C12]](s32)
-    ; CHECK: [[COPY139:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CHECK: [[LSHR68:%[0-9]+]]:_(s32) = G_LSHR [[COPY139]], [[COPY138]](s32)
-    ; CHECK: [[COPY140:%[0-9]+]]:_(s32) = COPY [[C13]](s32)
-    ; CHECK: [[COPY141:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CHECK: [[LSHR69:%[0-9]+]]:_(s32) = G_LSHR [[COPY141]], [[COPY140]](s32)
-    ; CHECK: [[COPY142:%[0-9]+]]:_(s32) = COPY [[C14]](s32)
-    ; CHECK: [[COPY143:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CHECK: [[LSHR70:%[0-9]+]]:_(s32) = G_LSHR [[COPY143]], [[COPY142]](s32)
-    ; CHECK: [[COPY144:%[0-9]+]]:_(s32) = COPY [[C15]](s32)
-    ; CHECK: [[COPY145:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CHECK: [[LSHR71:%[0-9]+]]:_(s32) = G_LSHR [[COPY145]], [[COPY144]](s32)
-    ; CHECK: [[COPY146:%[0-9]+]]:_(s32) = COPY [[C16]](s32)
-    ; CHECK: [[COPY147:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CHECK: [[LSHR72:%[0-9]+]]:_(s32) = G_LSHR [[COPY147]], [[COPY146]](s32)
-    ; CHECK: [[COPY148:%[0-9]+]]:_(s32) = COPY [[C17]](s32)
-    ; CHECK: [[COPY149:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CHECK: [[LSHR73:%[0-9]+]]:_(s32) = G_LSHR [[COPY149]], [[COPY148]](s32)
-    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR73]](s32)
-    ; CHECK: [[COPY150:%[0-9]+]]:_(s32) = COPY [[C18]](s32)
-    ; CHECK: [[COPY151:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CHECK: [[LSHR74:%[0-9]+]]:_(s32) = G_LSHR [[COPY151]], [[COPY150]](s32)
-    ; CHECK: [[COPY152:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[COPY153:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR75:%[0-9]+]]:_(s32) = G_LSHR [[COPY153]], [[COPY152]](s32)
-    ; CHECK: [[COPY154:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CHECK: [[COPY155:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR76:%[0-9]+]]:_(s32) = G_LSHR [[COPY155]], [[COPY154]](s32)
-    ; CHECK: [[COPY156:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[COPY157:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR77:%[0-9]+]]:_(s32) = G_LSHR [[COPY157]], [[COPY156]](s32)
-    ; CHECK: [[COPY158:%[0-9]+]]:_(s32) = COPY [[C7]](s32)
-    ; CHECK: [[COPY159:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR78:%[0-9]+]]:_(s32) = G_LSHR [[COPY159]], [[COPY158]](s32)
-    ; CHECK: [[COPY160:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CHECK: [[COPY161:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR79:%[0-9]+]]:_(s32) = G_LSHR [[COPY161]], [[COPY160]](s32)
-    ; CHECK: [[COPY162:%[0-9]+]]:_(s32) = COPY [[C9]](s32)
-    ; CHECK: [[COPY163:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR80:%[0-9]+]]:_(s32) = G_LSHR [[COPY163]], [[COPY162]](s32)
-    ; CHECK: [[COPY164:%[0-9]+]]:_(s32) = COPY [[C10]](s32)
-    ; CHECK: [[COPY165:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR81:%[0-9]+]]:_(s32) = G_LSHR [[COPY165]], [[COPY164]](s32)
-    ; CHECK: [[COPY166:%[0-9]+]]:_(s32) = COPY [[C11]](s32)
-    ; CHECK: [[COPY167:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR82:%[0-9]+]]:_(s32) = G_LSHR [[COPY167]], [[COPY166]](s32)
-    ; CHECK: [[COPY168:%[0-9]+]]:_(s32) = COPY [[C12]](s32)
-    ; CHECK: [[COPY169:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR83:%[0-9]+]]:_(s32) = G_LSHR [[COPY169]], [[COPY168]](s32)
-    ; CHECK: [[COPY170:%[0-9]+]]:_(s32) = COPY [[C13]](s32)
-    ; CHECK: [[COPY171:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR84:%[0-9]+]]:_(s32) = G_LSHR [[COPY171]], [[COPY170]](s32)
-    ; CHECK: [[COPY172:%[0-9]+]]:_(s32) = COPY [[C14]](s32)
-    ; CHECK: [[COPY173:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR85:%[0-9]+]]:_(s32) = G_LSHR [[COPY173]], [[COPY172]](s32)
-    ; CHECK: [[COPY174:%[0-9]+]]:_(s32) = COPY [[C15]](s32)
-    ; CHECK: [[COPY175:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR86:%[0-9]+]]:_(s32) = G_LSHR [[COPY175]], [[COPY174]](s32)
-    ; CHECK: [[COPY176:%[0-9]+]]:_(s32) = COPY [[C16]](s32)
-    ; CHECK: [[COPY177:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR87:%[0-9]+]]:_(s32) = G_LSHR [[COPY177]], [[COPY176]](s32)
-    ; CHECK: [[COPY178:%[0-9]+]]:_(s32) = COPY [[C17]](s32)
-    ; CHECK: [[COPY179:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR88:%[0-9]+]]:_(s32) = G_LSHR [[COPY179]], [[COPY178]](s32)
-    ; CHECK: [[COPY180:%[0-9]+]]:_(s32) = COPY [[C18]](s32)
-    ; CHECK: [[COPY181:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR89:%[0-9]+]]:_(s32) = G_LSHR [[COPY181]], [[COPY180]](s32)
-    ; CHECK: [[COPY182:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[COPY183:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[LSHR90:%[0-9]+]]:_(s32) = G_LSHR [[COPY183]], [[COPY182]](s32)
-    ; CHECK: [[COPY184:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CHECK: [[COPY185:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[LSHR91:%[0-9]+]]:_(s32) = G_LSHR [[COPY185]], [[COPY184]](s32)
-    ; CHECK: [[COPY186:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[COPY187:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[LSHR92:%[0-9]+]]:_(s32) = G_LSHR [[COPY187]], [[COPY186]](s32)
-    ; CHECK: [[COPY188:%[0-9]+]]:_(s32) = COPY [[C7]](s32)
-    ; CHECK: [[COPY189:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[LSHR93:%[0-9]+]]:_(s32) = G_LSHR [[COPY189]], [[COPY188]](s32)
-    ; CHECK: [[COPY190:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CHECK: [[COPY191:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[LSHR94:%[0-9]+]]:_(s32) = G_LSHR [[COPY191]], [[COPY190]](s32)
-    ; CHECK: [[COPY192:%[0-9]+]]:_(s32) = COPY [[C9]](s32)
-    ; CHECK: [[COPY193:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[LSHR95:%[0-9]+]]:_(s32) = G_LSHR [[COPY193]], [[COPY192]](s32)
-    ; CHECK: [[COPY194:%[0-9]+]]:_(s32) = COPY [[C10]](s32)
-    ; CHECK: [[COPY195:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[LSHR96:%[0-9]+]]:_(s32) = G_LSHR [[COPY195]], [[COPY194]](s32)
-    ; CHECK: [[COPY196:%[0-9]+]]:_(s32) = COPY [[C11]](s32)
-    ; CHECK: [[COPY197:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[LSHR97:%[0-9]+]]:_(s32) = G_LSHR [[COPY197]], [[COPY196]](s32)
-    ; CHECK: [[COPY198:%[0-9]+]]:_(s32) = COPY [[C12]](s32)
-    ; CHECK: [[COPY199:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[LSHR98:%[0-9]+]]:_(s32) = G_LSHR [[COPY199]], [[COPY198]](s32)
-    ; CHECK: [[COPY200:%[0-9]+]]:_(s32) = COPY [[C13]](s32)
-    ; CHECK: [[COPY201:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[LSHR99:%[0-9]+]]:_(s32) = G_LSHR [[COPY201]], [[COPY200]](s32)
-    ; CHECK: [[COPY202:%[0-9]+]]:_(s32) = COPY [[C14]](s32)
-    ; CHECK: [[COPY203:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[LSHR100:%[0-9]+]]:_(s32) = G_LSHR [[COPY203]], [[COPY202]](s32)
-    ; CHECK: [[COPY204:%[0-9]+]]:_(s32) = COPY [[C15]](s32)
-    ; CHECK: [[COPY205:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[LSHR101:%[0-9]+]]:_(s32) = G_LSHR [[COPY205]], [[COPY204]](s32)
-    ; CHECK: [[COPY206:%[0-9]+]]:_(s32) = COPY [[C16]](s32)
-    ; CHECK: [[COPY207:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[LSHR102:%[0-9]+]]:_(s32) = G_LSHR [[COPY207]], [[COPY206]](s32)
-    ; CHECK: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR102]](s32)
-    ; CHECK: [[COPY208:%[0-9]+]]:_(s32) = COPY [[C17]](s32)
-    ; CHECK: [[COPY209:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[LSHR103:%[0-9]+]]:_(s32) = G_LSHR [[COPY209]], [[COPY208]](s32)
-    ; CHECK: [[COPY210:%[0-9]+]]:_(s32) = COPY [[C18]](s32)
-    ; CHECK: [[COPY211:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[LSHR104:%[0-9]+]]:_(s32) = G_LSHR [[COPY211]], [[COPY210]](s32)
-    ; CHECK: [[COPY212:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[COPY213:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR105:%[0-9]+]]:_(s32) = G_LSHR [[COPY213]], [[COPY212]](s32)
-    ; CHECK: [[COPY214:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CHECK: [[COPY215:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR106:%[0-9]+]]:_(s32) = G_LSHR [[COPY215]], [[COPY214]](s32)
-    ; CHECK: [[COPY216:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[COPY217:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR107:%[0-9]+]]:_(s32) = G_LSHR [[COPY217]], [[COPY216]](s32)
-    ; CHECK: [[COPY218:%[0-9]+]]:_(s32) = COPY [[C7]](s32)
-    ; CHECK: [[COPY219:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR108:%[0-9]+]]:_(s32) = G_LSHR [[COPY219]], [[COPY218]](s32)
-    ; CHECK: [[COPY220:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CHECK: [[COPY221:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR109:%[0-9]+]]:_(s32) = G_LSHR [[COPY221]], [[COPY220]](s32)
-    ; CHECK: [[COPY222:%[0-9]+]]:_(s32) = COPY [[C9]](s32)
-    ; CHECK: [[COPY223:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR110:%[0-9]+]]:_(s32) = G_LSHR [[COPY223]], [[COPY222]](s32)
-    ; CHECK: [[COPY224:%[0-9]+]]:_(s32) = COPY [[C10]](s32)
-    ; CHECK: [[COPY225:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR111:%[0-9]+]]:_(s32) = G_LSHR [[COPY225]], [[COPY224]](s32)
-    ; CHECK: [[COPY226:%[0-9]+]]:_(s32) = COPY [[C11]](s32)
-    ; CHECK: [[COPY227:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR112:%[0-9]+]]:_(s32) = G_LSHR [[COPY227]], [[COPY226]](s32)
-    ; CHECK: [[COPY228:%[0-9]+]]:_(s32) = COPY [[C12]](s32)
-    ; CHECK: [[COPY229:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR113:%[0-9]+]]:_(s32) = G_LSHR [[COPY229]], [[COPY228]](s32)
-    ; CHECK: [[COPY230:%[0-9]+]]:_(s32) = COPY [[C13]](s32)
-    ; CHECK: [[COPY231:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR114:%[0-9]+]]:_(s32) = G_LSHR [[COPY231]], [[COPY230]](s32)
-    ; CHECK: [[COPY232:%[0-9]+]]:_(s32) = COPY [[C14]](s32)
-    ; CHECK: [[COPY233:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR115:%[0-9]+]]:_(s32) = G_LSHR [[COPY233]], [[COPY232]](s32)
-    ; CHECK: [[COPY234:%[0-9]+]]:_(s32) = COPY [[C15]](s32)
-    ; CHECK: [[COPY235:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR116:%[0-9]+]]:_(s32) = G_LSHR [[COPY235]], [[COPY234]](s32)
-    ; CHECK: [[COPY236:%[0-9]+]]:_(s32) = COPY [[C16]](s32)
-    ; CHECK: [[COPY237:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR117:%[0-9]+]]:_(s32) = G_LSHR [[COPY237]], [[COPY236]](s32)
-    ; CHECK: [[COPY238:%[0-9]+]]:_(s32) = COPY [[C17]](s32)
-    ; CHECK: [[COPY239:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR118:%[0-9]+]]:_(s32) = G_LSHR [[COPY239]], [[COPY238]](s32)
-    ; CHECK: [[COPY240:%[0-9]+]]:_(s32) = COPY [[C18]](s32)
-    ; CHECK: [[COPY241:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[LSHR119:%[0-9]+]]:_(s32) = G_LSHR [[COPY241]], [[COPY240]](s32)
-    ; CHECK: [[COPY242:%[0-9]+]]:_(s16) = COPY [[COPY]](s16)
-    ; CHECK: [[AND:%[0-9]+]]:_(s16) = G_AND [[COPY242]], [[C6]]
-    ; CHECK: [[COPY243:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[COPY244:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY244]], [[C1]]
-    ; CHECK: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[AND1]], [[COPY243]](s32)
-    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[SHL]](s32)
-    ; CHECK: [[OR:%[0-9]+]]:_(s16) = G_OR [[AND]], [[TRUNC3]]
-    ; CHECK: [[COPY245:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CHECK: [[COPY246:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
-    ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY246]], [[C1]]
-    ; CHECK: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[AND2]], [[COPY245]](s32)
-    ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[SHL1]](s32)
-    ; CHECK: [[OR1:%[0-9]+]]:_(s16) = G_OR [[OR]], [[TRUNC4]]
-    ; CHECK: [[COPY247:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[COPY248:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
-    ; CHECK: [[AND3:%[0-9]+]]:_(s32) = G_AND [[COPY248]], [[C1]]
-    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[AND3]], [[COPY247]](s32)
-    ; CHECK: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[SHL2]](s32)
-    ; CHECK: [[OR2:%[0-9]+]]:_(s16) = G_OR [[OR1]], [[TRUNC5]]
-    ; CHECK: [[COPY249:%[0-9]+]]:_(s32) = COPY [[C7]](s32)
-    ; CHECK: [[COPY250:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
-    ; CHECK: [[AND4:%[0-9]+]]:_(s32) = G_AND [[COPY250]], [[C1]]
-    ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[AND4]], [[COPY249]](s32)
-    ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[SHL3]](s32)
-    ; CHECK: [[OR3:%[0-9]+]]:_(s16) = G_OR [[OR2]], [[TRUNC6]]
-    ; CHECK: [[COPY251:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CHECK: [[COPY252:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
-    ; CHECK: [[AND5:%[0-9]+]]:_(s32) = G_AND [[COPY252]], [[C1]]
-    ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND5]], [[COPY251]](s32)
-    ; CHECK: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[SHL4]](s32)
-    ; CHECK: [[OR4:%[0-9]+]]:_(s16) = G_OR [[OR3]], [[TRUNC7]]
-    ; CHECK: [[COPY253:%[0-9]+]]:_(s32) = COPY [[C9]](s32)
-    ; CHECK: [[COPY254:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
-    ; CHECK: [[AND6:%[0-9]+]]:_(s32) = G_AND [[COPY254]], [[C1]]
-    ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[AND6]], [[COPY253]](s32)
-    ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[SHL5]](s32)
-    ; CHECK: [[OR5:%[0-9]+]]:_(s16) = G_OR [[OR4]], [[TRUNC8]]
-    ; CHECK: [[COPY255:%[0-9]+]]:_(s32) = COPY [[C10]](s32)
-    ; CHECK: [[COPY256:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
-    ; CHECK: [[AND7:%[0-9]+]]:_(s32) = G_AND [[COPY256]], [[C1]]
-    ; CHECK: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[AND7]], [[COPY255]](s32)
-    ; CHECK: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL6]](s32)
-    ; CHECK: [[OR6:%[0-9]+]]:_(s16) = G_OR [[OR5]], [[TRUNC9]]
-    ; CHECK: [[COPY257:%[0-9]+]]:_(s32) = COPY [[C11]](s32)
-    ; CHECK: [[COPY258:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32)
-    ; CHECK: [[AND8:%[0-9]+]]:_(s32) = G_AND [[COPY258]], [[C1]]
-    ; CHECK: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[AND8]], [[COPY257]](s32)
-    ; CHECK: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[SHL7]](s32)
-    ; CHECK: [[OR7:%[0-9]+]]:_(s16) = G_OR [[OR6]], [[TRUNC10]]
-    ; CHECK: [[COPY259:%[0-9]+]]:_(s32) = COPY [[C12]](s32)
-    ; CHECK: [[COPY260:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32)
-    ; CHECK: [[AND9:%[0-9]+]]:_(s32) = G_AND [[COPY260]], [[C1]]
-    ; CHECK: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND9]], [[COPY259]](s32)
-    ; CHECK: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32)
-    ; CHECK: [[OR8:%[0-9]+]]:_(s16) = G_OR [[OR7]], [[TRUNC11]]
-    ; CHECK: [[COPY261:%[0-9]+]]:_(s32) = COPY [[C13]](s32)
-    ; CHECK: [[COPY262:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32)
-    ; CHECK: [[AND10:%[0-9]+]]:_(s32) = G_AND [[COPY262]], [[C1]]
-    ; CHECK: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND10]], [[COPY261]](s32)
-    ; CHECK: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32)
-    ; CHECK: [[OR9:%[0-9]+]]:_(s16) = G_OR [[OR8]], [[TRUNC12]]
-    ; CHECK: [[COPY263:%[0-9]+]]:_(s32) = COPY [[C14]](s32)
-    ; CHECK: [[COPY264:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32)
-    ; CHECK: [[AND11:%[0-9]+]]:_(s32) = G_AND [[COPY264]], [[C1]]
-    ; CHECK: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[COPY263]](s32)
-    ; CHECK: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32)
-    ; CHECK: [[OR10:%[0-9]+]]:_(s16) = G_OR [[OR9]], [[TRUNC13]]
-    ; CHECK: [[COPY265:%[0-9]+]]:_(s32) = COPY [[C15]](s32)
-    ; CHECK: [[COPY266:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32)
-    ; CHECK: [[AND12:%[0-9]+]]:_(s32) = G_AND [[COPY266]], [[C1]]
-    ; CHECK: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND12]], [[COPY265]](s32)
-    ; CHECK: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[SHL11]](s32)
-    ; CHECK: [[OR11:%[0-9]+]]:_(s16) = G_OR [[OR10]], [[TRUNC14]]
-    ; CHECK: [[COPY267:%[0-9]+]]:_(s32) = COPY [[C16]](s32)
-    ; CHECK: [[COPY268:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32)
-    ; CHECK: [[AND13:%[0-9]+]]:_(s32) = G_AND [[COPY268]], [[C1]]
-    ; CHECK: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND13]], [[COPY267]](s32)
-    ; CHECK: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32)
-    ; CHECK: [[OR12:%[0-9]+]]:_(s16) = G_OR [[OR11]], [[TRUNC15]]
-    ; CHECK: [[COPY269:%[0-9]+]]:_(s32) = COPY [[C17]](s32)
-    ; CHECK: [[COPY270:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32)
-    ; CHECK: [[AND14:%[0-9]+]]:_(s32) = G_AND [[COPY270]], [[C1]]
-    ; CHECK: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND14]], [[COPY269]](s32)
-    ; CHECK: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32)
-    ; CHECK: [[OR13:%[0-9]+]]:_(s16) = G_OR [[OR12]], [[TRUNC16]]
-    ; CHECK: [[COPY271:%[0-9]+]]:_(s32) = COPY [[C18]](s32)
-    ; CHECK: [[COPY272:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32)
-    ; CHECK: [[AND15:%[0-9]+]]:_(s32) = G_AND [[COPY272]], [[C1]]
-    ; CHECK: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND15]], [[COPY271]](s32)
-    ; CHECK: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32)
-    ; CHECK: [[OR14:%[0-9]+]]:_(s16) = G_OR [[OR13]], [[TRUNC17]]
-    ; CHECK: [[COPY273:%[0-9]+]]:_(s16) = COPY [[COPY1]](s16)
-    ; CHECK: [[AND16:%[0-9]+]]:_(s16) = G_AND [[COPY273]], [[C6]]
-    ; CHECK: [[COPY274:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[COPY275:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[AND17:%[0-9]+]]:_(s32) = G_AND [[COPY275]], [[C1]]
-    ; CHECK: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND17]], [[COPY274]](s32)
-    ; CHECK: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32)
-    ; CHECK: [[OR15:%[0-9]+]]:_(s16) = G_OR [[AND16]], [[TRUNC18]]
-    ; CHECK: [[COPY276:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CHECK: [[COPY277:%[0-9]+]]:_(s32) = COPY [[LSHR30]](s32)
-    ; CHECK: [[AND18:%[0-9]+]]:_(s32) = G_AND [[COPY277]], [[C1]]
-    ; CHECK: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND18]], [[COPY276]](s32)
-    ; CHECK: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL16]](s32)
-    ; CHECK: [[OR16:%[0-9]+]]:_(s16) = G_OR [[OR15]], [[TRUNC19]]
-    ; CHECK: [[COPY278:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[COPY279:%[0-9]+]]:_(s32) = COPY [[LSHR31]](s32)
-    ; CHECK: [[AND19:%[0-9]+]]:_(s32) = G_AND [[COPY279]], [[C1]]
-    ; CHECK: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND19]], [[COPY278]](s32)
-    ; CHECK: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[SHL17]](s32)
-    ; CHECK: [[OR17:%[0-9]+]]:_(s16) = G_OR [[OR16]], [[TRUNC20]]
-    ; CHECK: [[COPY280:%[0-9]+]]:_(s32) = COPY [[C7]](s32)
-    ; CHECK: [[COPY281:%[0-9]+]]:_(s32) = COPY [[LSHR32]](s32)
-    ; CHECK: [[AND20:%[0-9]+]]:_(s32) = G_AND [[COPY281]], [[C1]]
-    ; CHECK: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND20]], [[COPY280]](s32)
-    ; CHECK: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL18]](s32)
-    ; CHECK: [[OR18:%[0-9]+]]:_(s16) = G_OR [[OR17]], [[TRUNC21]]
-    ; CHECK: [[COPY282:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CHECK: [[COPY283:%[0-9]+]]:_(s32) = COPY [[LSHR33]](s32)
-    ; CHECK: [[AND21:%[0-9]+]]:_(s32) = G_AND [[COPY283]], [[C1]]
-    ; CHECK: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND21]], [[COPY282]](s32)
-    ; CHECK: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[SHL19]](s32)
-    ; CHECK: [[OR19:%[0-9]+]]:_(s16) = G_OR [[OR18]], [[TRUNC22]]
-    ; CHECK: [[COPY284:%[0-9]+]]:_(s32) = COPY [[C9]](s32)
-    ; CHECK: [[COPY285:%[0-9]+]]:_(s32) = COPY [[LSHR34]](s32)
-    ; CHECK: [[AND22:%[0-9]+]]:_(s32) = G_AND [[COPY285]], [[C1]]
-    ; CHECK: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND22]], [[COPY284]](s32)
-    ; CHECK: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL20]](s32)
-    ; CHECK: [[OR20:%[0-9]+]]:_(s16) = G_OR [[OR19]], [[TRUNC23]]
-    ; CHECK: [[COPY286:%[0-9]+]]:_(s32) = COPY [[C10]](s32)
-    ; CHECK: [[COPY287:%[0-9]+]]:_(s32) = COPY [[LSHR35]](s32)
-    ; CHECK: [[AND23:%[0-9]+]]:_(s32) = G_AND [[COPY287]], [[C1]]
-    ; CHECK: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND23]], [[COPY286]](s32)
-    ; CHECK: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[SHL21]](s32)
-    ; CHECK: [[OR21:%[0-9]+]]:_(s16) = G_OR [[OR20]], [[TRUNC24]]
-    ; CHECK: [[COPY288:%[0-9]+]]:_(s32) = COPY [[C11]](s32)
-    ; CHECK: [[COPY289:%[0-9]+]]:_(s32) = COPY [[LSHR36]](s32)
-    ; CHECK: [[AND24:%[0-9]+]]:_(s32) = G_AND [[COPY289]], [[C1]]
-    ; CHECK: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[AND24]], [[COPY288]](s32)
-    ; CHECK: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[SHL22]](s32)
-    ; CHECK: [[OR22:%[0-9]+]]:_(s16) = G_OR [[OR21]], [[TRUNC25]]
-    ; CHECK: [[COPY290:%[0-9]+]]:_(s32) = COPY [[C12]](s32)
-    ; CHECK: [[COPY291:%[0-9]+]]:_(s32) = COPY [[LSHR37]](s32)
-    ; CHECK: [[AND25:%[0-9]+]]:_(s32) = G_AND [[COPY291]], [[C1]]
-    ; CHECK: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[AND25]], [[COPY290]](s32)
-    ; CHECK: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[SHL23]](s32)
-    ; CHECK: [[OR23:%[0-9]+]]:_(s16) = G_OR [[OR22]], [[TRUNC26]]
-    ; CHECK: [[COPY292:%[0-9]+]]:_(s32) = COPY [[C13]](s32)
-    ; CHECK: [[COPY293:%[0-9]+]]:_(s32) = COPY [[LSHR38]](s32)
-    ; CHECK: [[AND26:%[0-9]+]]:_(s32) = G_AND [[COPY293]], [[C1]]
-    ; CHECK: [[SHL24:%[0-9]+]]:_(s32) = G_SHL [[AND26]], [[COPY292]](s32)
-    ; CHECK: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[SHL24]](s32)
-    ; CHECK: [[OR24:%[0-9]+]]:_(s16) = G_OR [[OR23]], [[TRUNC27]]
-    ; CHECK: [[COPY294:%[0-9]+]]:_(s32) = COPY [[C14]](s32)
-    ; CHECK: [[COPY295:%[0-9]+]]:_(s32) = COPY [[LSHR39]](s32)
-    ; CHECK: [[AND27:%[0-9]+]]:_(s32) = G_AND [[COPY295]], [[C1]]
-    ; CHECK: [[SHL25:%[0-9]+]]:_(s32) = G_SHL [[AND27]], [[COPY294]](s32)
-    ; CHECK: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[SHL25]](s32)
-    ; CHECK: [[OR25:%[0-9]+]]:_(s16) = G_OR [[OR24]], [[TRUNC28]]
-    ; CHECK: [[COPY296:%[0-9]+]]:_(s32) = COPY [[C15]](s32)
-    ; CHECK: [[COPY297:%[0-9]+]]:_(s32) = COPY [[LSHR40]](s32)
-    ; CHECK: [[AND28:%[0-9]+]]:_(s32) = G_AND [[COPY297]], [[C1]]
-    ; CHECK: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[AND28]], [[COPY296]](s32)
-    ; CHECK: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[SHL26]](s32)
-    ; CHECK: [[OR26:%[0-9]+]]:_(s16) = G_OR [[OR25]], [[TRUNC29]]
-    ; CHECK: [[COPY298:%[0-9]+]]:_(s32) = COPY [[C16]](s32)
-    ; CHECK: [[COPY299:%[0-9]+]]:_(s32) = COPY [[LSHR41]](s32)
-    ; CHECK: [[AND29:%[0-9]+]]:_(s32) = G_AND [[COPY299]], [[C1]]
-    ; CHECK: [[SHL27:%[0-9]+]]:_(s32) = G_SHL [[AND29]], [[COPY298]](s32)
-    ; CHECK: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[SHL27]](s32)
-    ; CHECK: [[OR27:%[0-9]+]]:_(s16) = G_OR [[OR26]], [[TRUNC30]]
-    ; CHECK: [[COPY300:%[0-9]+]]:_(s32) = COPY [[C17]](s32)
-    ; CHECK: [[COPY301:%[0-9]+]]:_(s32) = COPY [[LSHR42]](s32)
-    ; CHECK: [[AND30:%[0-9]+]]:_(s32) = G_AND [[COPY301]], [[C1]]
-    ; CHECK: [[SHL28:%[0-9]+]]:_(s32) = G_SHL [[AND30]], [[COPY300]](s32)
-    ; CHECK: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[SHL28]](s32)
-    ; CHECK: [[OR28:%[0-9]+]]:_(s16) = G_OR [[OR27]], [[TRUNC31]]
-    ; CHECK: [[COPY302:%[0-9]+]]:_(s32) = COPY [[C18]](s32)
-    ; CHECK: [[COPY303:%[0-9]+]]:_(s32) = COPY [[LSHR43]](s32)
-    ; CHECK: [[AND31:%[0-9]+]]:_(s32) = G_AND [[COPY303]], [[C1]]
-    ; CHECK: [[SHL29:%[0-9]+]]:_(s32) = G_SHL [[AND31]], [[COPY302]](s32)
-    ; CHECK: [[TRUNC32:%[0-9]+]]:_(s16) = G_TRUNC [[SHL29]](s32)
-    ; CHECK: [[OR29:%[0-9]+]]:_(s16) = G_OR [[OR28]], [[TRUNC32]]
-    ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR14]](s16)
-    ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR29]](s16)
-    ; CHECK: [[SHL30:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
-    ; CHECK: [[OR30:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL30]]
-    ; CHECK: [[COPY304:%[0-9]+]]:_(s16) = COPY [[TRUNC]](s16)
-    ; CHECK: [[AND32:%[0-9]+]]:_(s16) = G_AND [[COPY304]], [[C6]]
-    ; CHECK: [[COPY305:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[COPY306:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[AND33:%[0-9]+]]:_(s32) = G_AND [[COPY306]], [[C1]]
-    ; CHECK: [[SHL31:%[0-9]+]]:_(s32) = G_SHL [[AND33]], [[COPY305]](s32)
-    ; CHECK: [[TRUNC33:%[0-9]+]]:_(s16) = G_TRUNC [[SHL31]](s32)
-    ; CHECK: [[OR31:%[0-9]+]]:_(s16) = G_OR [[AND32]], [[TRUNC33]]
-    ; CHECK: [[COPY307:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CHECK: [[COPY308:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CHECK: [[AND34:%[0-9]+]]:_(s32) = G_AND [[COPY308]], [[C1]]
-    ; CHECK: [[SHL32:%[0-9]+]]:_(s32) = G_SHL [[AND34]], [[COPY307]](s32)
-    ; CHECK: [[TRUNC34:%[0-9]+]]:_(s16) = G_TRUNC [[SHL32]](s32)
-    ; CHECK: [[OR32:%[0-9]+]]:_(s16) = G_OR [[OR31]], [[TRUNC34]]
-    ; CHECK: [[COPY309:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[COPY310:%[0-9]+]]:_(s32) = COPY [[LSHR60]](s32)
-    ; CHECK: [[AND35:%[0-9]+]]:_(s32) = G_AND [[COPY310]], [[C1]]
-    ; CHECK: [[SHL33:%[0-9]+]]:_(s32) = G_SHL [[AND35]], [[COPY309]](s32)
-    ; CHECK: [[TRUNC35:%[0-9]+]]:_(s16) = G_TRUNC [[SHL33]](s32)
-    ; CHECK: [[OR33:%[0-9]+]]:_(s16) = G_OR [[OR32]], [[TRUNC35]]
-    ; CHECK: [[COPY311:%[0-9]+]]:_(s32) = COPY [[C7]](s32)
-    ; CHECK: [[COPY312:%[0-9]+]]:_(s32) = COPY [[LSHR61]](s32)
-    ; CHECK: [[AND36:%[0-9]+]]:_(s32) = G_AND [[COPY312]], [[C1]]
-    ; CHECK: [[SHL34:%[0-9]+]]:_(s32) = G_SHL [[AND36]], [[COPY311]](s32)
-    ; CHECK: [[TRUNC36:%[0-9]+]]:_(s16) = G_TRUNC [[SHL34]](s32)
-    ; CHECK: [[OR34:%[0-9]+]]:_(s16) = G_OR [[OR33]], [[TRUNC36]]
-    ; CHECK: [[COPY313:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CHECK: [[COPY314:%[0-9]+]]:_(s32) = COPY [[LSHR62]](s32)
-    ; CHECK: [[AND37:%[0-9]+]]:_(s32) = G_AND [[COPY314]], [[C1]]
-    ; CHECK: [[SHL35:%[0-9]+]]:_(s32) = G_SHL [[AND37]], [[COPY313]](s32)
-    ; CHECK: [[TRUNC37:%[0-9]+]]:_(s16) = G_TRUNC [[SHL35]](s32)
-    ; CHECK: [[OR35:%[0-9]+]]:_(s16) = G_OR [[OR34]], [[TRUNC37]]
-    ; CHECK: [[COPY315:%[0-9]+]]:_(s32) = COPY [[C9]](s32)
-    ; CHECK: [[COPY316:%[0-9]+]]:_(s32) = COPY [[LSHR63]](s32)
-    ; CHECK: [[AND38:%[0-9]+]]:_(s32) = G_AND [[COPY316]], [[C1]]
-    ; CHECK: [[SHL36:%[0-9]+]]:_(s32) = G_SHL [[AND38]], [[COPY315]](s32)
-    ; CHECK: [[TRUNC38:%[0-9]+]]:_(s16) = G_TRUNC [[SHL36]](s32)
-    ; CHECK: [[OR36:%[0-9]+]]:_(s16) = G_OR [[OR35]], [[TRUNC38]]
-    ; CHECK: [[COPY317:%[0-9]+]]:_(s32) = COPY [[C10]](s32)
-    ; CHECK: [[COPY318:%[0-9]+]]:_(s32) = COPY [[LSHR64]](s32)
-    ; CHECK: [[AND39:%[0-9]+]]:_(s32) = G_AND [[COPY318]], [[C1]]
-    ; CHECK: [[SHL37:%[0-9]+]]:_(s32) = G_SHL [[AND39]], [[COPY317]](s32)
-    ; CHECK: [[TRUNC39:%[0-9]+]]:_(s16) = G_TRUNC [[SHL37]](s32)
-    ; CHECK: [[OR37:%[0-9]+]]:_(s16) = G_OR [[OR36]], [[TRUNC39]]
-    ; CHECK: [[COPY319:%[0-9]+]]:_(s32) = COPY [[C11]](s32)
-    ; CHECK: [[COPY320:%[0-9]+]]:_(s32) = COPY [[LSHR65]](s32)
-    ; CHECK: [[AND40:%[0-9]+]]:_(s32) = G_AND [[COPY320]], [[C1]]
-    ; CHECK: [[SHL38:%[0-9]+]]:_(s32) = G_SHL [[AND40]], [[COPY319]](s32)
-    ; CHECK: [[TRUNC40:%[0-9]+]]:_(s16) = G_TRUNC [[SHL38]](s32)
-    ; CHECK: [[OR38:%[0-9]+]]:_(s16) = G_OR [[OR37]], [[TRUNC40]]
-    ; CHECK: [[COPY321:%[0-9]+]]:_(s32) = COPY [[C12]](s32)
-    ; CHECK: [[COPY322:%[0-9]+]]:_(s32) = COPY [[LSHR66]](s32)
-    ; CHECK: [[AND41:%[0-9]+]]:_(s32) = G_AND [[COPY322]], [[C1]]
-    ; CHECK: [[SHL39:%[0-9]+]]:_(s32) = G_SHL [[AND41]], [[COPY321]](s32)
-    ; CHECK: [[TRUNC41:%[0-9]+]]:_(s16) = G_TRUNC [[SHL39]](s32)
-    ; CHECK: [[OR39:%[0-9]+]]:_(s16) = G_OR [[OR38]], [[TRUNC41]]
-    ; CHECK: [[COPY323:%[0-9]+]]:_(s32) = COPY [[C13]](s32)
-    ; CHECK: [[COPY324:%[0-9]+]]:_(s32) = COPY [[LSHR67]](s32)
-    ; CHECK: [[AND42:%[0-9]+]]:_(s32) = G_AND [[COPY324]], [[C1]]
-    ; CHECK: [[SHL40:%[0-9]+]]:_(s32) = G_SHL [[AND42]], [[COPY323]](s32)
-    ; CHECK: [[TRUNC42:%[0-9]+]]:_(s16) = G_TRUNC [[SHL40]](s32)
-    ; CHECK: [[OR40:%[0-9]+]]:_(s16) = G_OR [[OR39]], [[TRUNC42]]
-    ; CHECK: [[COPY325:%[0-9]+]]:_(s32) = COPY [[C14]](s32)
-    ; CHECK: [[COPY326:%[0-9]+]]:_(s32) = COPY [[LSHR68]](s32)
-    ; CHECK: [[AND43:%[0-9]+]]:_(s32) = G_AND [[COPY326]], [[C1]]
-    ; CHECK: [[SHL41:%[0-9]+]]:_(s32) = G_SHL [[AND43]], [[COPY325]](s32)
-    ; CHECK: [[TRUNC43:%[0-9]+]]:_(s16) = G_TRUNC [[SHL41]](s32)
-    ; CHECK: [[OR41:%[0-9]+]]:_(s16) = G_OR [[OR40]], [[TRUNC43]]
-    ; CHECK: [[COPY327:%[0-9]+]]:_(s32) = COPY [[C15]](s32)
-    ; CHECK: [[COPY328:%[0-9]+]]:_(s32) = COPY [[LSHR69]](s32)
-    ; CHECK: [[AND44:%[0-9]+]]:_(s32) = G_AND [[COPY328]], [[C1]]
-    ; CHECK: [[SHL42:%[0-9]+]]:_(s32) = G_SHL [[AND44]], [[COPY327]](s32)
-    ; CHECK: [[TRUNC44:%[0-9]+]]:_(s16) = G_TRUNC [[SHL42]](s32)
-    ; CHECK: [[OR42:%[0-9]+]]:_(s16) = G_OR [[OR41]], [[TRUNC44]]
-    ; CHECK: [[COPY329:%[0-9]+]]:_(s32) = COPY [[C16]](s32)
-    ; CHECK: [[COPY330:%[0-9]+]]:_(s32) = COPY [[LSHR70]](s32)
-    ; CHECK: [[AND45:%[0-9]+]]:_(s32) = G_AND [[COPY330]], [[C1]]
-    ; CHECK: [[SHL43:%[0-9]+]]:_(s32) = G_SHL [[AND45]], [[COPY329]](s32)
-    ; CHECK: [[TRUNC45:%[0-9]+]]:_(s16) = G_TRUNC [[SHL43]](s32)
-    ; CHECK: [[OR43:%[0-9]+]]:_(s16) = G_OR [[OR42]], [[TRUNC45]]
-    ; CHECK: [[COPY331:%[0-9]+]]:_(s32) = COPY [[C17]](s32)
-    ; CHECK: [[COPY332:%[0-9]+]]:_(s32) = COPY [[LSHR71]](s32)
-    ; CHECK: [[AND46:%[0-9]+]]:_(s32) = G_AND [[COPY332]], [[C1]]
-    ; CHECK: [[SHL44:%[0-9]+]]:_(s32) = G_SHL [[AND46]], [[COPY331]](s32)
-    ; CHECK: [[TRUNC46:%[0-9]+]]:_(s16) = G_TRUNC [[SHL44]](s32)
-    ; CHECK: [[OR44:%[0-9]+]]:_(s16) = G_OR [[OR43]], [[TRUNC46]]
-    ; CHECK: [[COPY333:%[0-9]+]]:_(s32) = COPY [[C18]](s32)
-    ; CHECK: [[COPY334:%[0-9]+]]:_(s32) = COPY [[LSHR72]](s32)
-    ; CHECK: [[AND47:%[0-9]+]]:_(s32) = G_AND [[COPY334]], [[C1]]
-    ; CHECK: [[SHL45:%[0-9]+]]:_(s32) = G_SHL [[AND47]], [[COPY333]](s32)
-    ; CHECK: [[TRUNC47:%[0-9]+]]:_(s16) = G_TRUNC [[SHL45]](s32)
-    ; CHECK: [[OR45:%[0-9]+]]:_(s16) = G_OR [[OR44]], [[TRUNC47]]
-    ; CHECK: [[COPY335:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16)
-    ; CHECK: [[AND48:%[0-9]+]]:_(s16) = G_AND [[COPY335]], [[C6]]
-    ; CHECK: [[COPY336:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[COPY337:%[0-9]+]]:_(s32) = COPY [[LSHR74]](s32)
-    ; CHECK: [[AND49:%[0-9]+]]:_(s32) = G_AND [[COPY337]], [[C1]]
-    ; CHECK: [[SHL46:%[0-9]+]]:_(s32) = G_SHL [[AND49]], [[COPY336]](s32)
-    ; CHECK: [[TRUNC48:%[0-9]+]]:_(s16) = G_TRUNC [[SHL46]](s32)
-    ; CHECK: [[OR46:%[0-9]+]]:_(s16) = G_OR [[AND48]], [[TRUNC48]]
-    ; CHECK: [[COPY338:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CHECK: [[COPY339:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[AND50:%[0-9]+]]:_(s32) = G_AND [[COPY339]], [[C1]]
-    ; CHECK: [[SHL47:%[0-9]+]]:_(s32) = G_SHL [[AND50]], [[COPY338]](s32)
-    ; CHECK: [[TRUNC49:%[0-9]+]]:_(s16) = G_TRUNC [[SHL47]](s32)
-    ; CHECK: [[OR47:%[0-9]+]]:_(s16) = G_OR [[OR46]], [[TRUNC49]]
-    ; CHECK: [[COPY340:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[COPY341:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[AND51:%[0-9]+]]:_(s32) = G_AND [[COPY341]], [[C1]]
-    ; CHECK: [[SHL48:%[0-9]+]]:_(s32) = G_SHL [[AND51]], [[COPY340]](s32)
-    ; CHECK: [[TRUNC50:%[0-9]+]]:_(s16) = G_TRUNC [[SHL48]](s32)
-    ; CHECK: [[OR48:%[0-9]+]]:_(s16) = G_OR [[OR47]], [[TRUNC50]]
-    ; CHECK: [[COPY342:%[0-9]+]]:_(s32) = COPY [[C7]](s32)
-    ; CHECK: [[COPY343:%[0-9]+]]:_(s32) = COPY [[LSHR90]](s32)
-    ; CHECK: [[AND52:%[0-9]+]]:_(s32) = G_AND [[COPY343]], [[C1]]
-    ; CHECK: [[SHL49:%[0-9]+]]:_(s32) = G_SHL [[AND52]], [[COPY342]](s32)
-    ; CHECK: [[TRUNC51:%[0-9]+]]:_(s16) = G_TRUNC [[SHL49]](s32)
-    ; CHECK: [[OR49:%[0-9]+]]:_(s16) = G_OR [[OR48]], [[TRUNC51]]
-    ; CHECK: [[COPY344:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CHECK: [[COPY345:%[0-9]+]]:_(s32) = COPY [[LSHR91]](s32)
-    ; CHECK: [[AND53:%[0-9]+]]:_(s32) = G_AND [[COPY345]], [[C1]]
-    ; CHECK: [[SHL50:%[0-9]+]]:_(s32) = G_SHL [[AND53]], [[COPY344]](s32)
-    ; CHECK: [[TRUNC52:%[0-9]+]]:_(s16) = G_TRUNC [[SHL50]](s32)
-    ; CHECK: [[OR50:%[0-9]+]]:_(s16) = G_OR [[OR49]], [[TRUNC52]]
-    ; CHECK: [[COPY346:%[0-9]+]]:_(s32) = COPY [[C9]](s32)
-    ; CHECK: [[COPY347:%[0-9]+]]:_(s32) = COPY [[LSHR92]](s32)
-    ; CHECK: [[AND54:%[0-9]+]]:_(s32) = G_AND [[COPY347]], [[C1]]
-    ; CHECK: [[SHL51:%[0-9]+]]:_(s32) = G_SHL [[AND54]], [[COPY346]](s32)
-    ; CHECK: [[TRUNC53:%[0-9]+]]:_(s16) = G_TRUNC [[SHL51]](s32)
-    ; CHECK: [[OR51:%[0-9]+]]:_(s16) = G_OR [[OR50]], [[TRUNC53]]
-    ; CHECK: [[COPY348:%[0-9]+]]:_(s32) = COPY [[C10]](s32)
-    ; CHECK: [[COPY349:%[0-9]+]]:_(s32) = COPY [[LSHR93]](s32)
-    ; CHECK: [[AND55:%[0-9]+]]:_(s32) = G_AND [[COPY349]], [[C1]]
-    ; CHECK: [[SHL52:%[0-9]+]]:_(s32) = G_SHL [[AND55]], [[COPY348]](s32)
-    ; CHECK: [[TRUNC54:%[0-9]+]]:_(s16) = G_TRUNC [[SHL52]](s32)
-    ; CHECK: [[OR52:%[0-9]+]]:_(s16) = G_OR [[OR51]], [[TRUNC54]]
-    ; CHECK: [[COPY350:%[0-9]+]]:_(s32) = COPY [[C11]](s32)
-    ; CHECK: [[COPY351:%[0-9]+]]:_(s32) = COPY [[LSHR94]](s32)
-    ; CHECK: [[AND56:%[0-9]+]]:_(s32) = G_AND [[COPY351]], [[C1]]
-    ; CHECK: [[SHL53:%[0-9]+]]:_(s32) = G_SHL [[AND56]], [[COPY350]](s32)
-    ; CHECK: [[TRUNC55:%[0-9]+]]:_(s16) = G_TRUNC [[SHL53]](s32)
-    ; CHECK: [[OR53:%[0-9]+]]:_(s16) = G_OR [[OR52]], [[TRUNC55]]
-    ; CHECK: [[COPY352:%[0-9]+]]:_(s32) = COPY [[C12]](s32)
-    ; CHECK: [[COPY353:%[0-9]+]]:_(s32) = COPY [[LSHR95]](s32)
-    ; CHECK: [[AND57:%[0-9]+]]:_(s32) = G_AND [[COPY353]], [[C1]]
-    ; CHECK: [[SHL54:%[0-9]+]]:_(s32) = G_SHL [[AND57]], [[COPY352]](s32)
-    ; CHECK: [[TRUNC56:%[0-9]+]]:_(s16) = G_TRUNC [[SHL54]](s32)
-    ; CHECK: [[OR54:%[0-9]+]]:_(s16) = G_OR [[OR53]], [[TRUNC56]]
-    ; CHECK: [[COPY354:%[0-9]+]]:_(s32) = COPY [[C13]](s32)
-    ; CHECK: [[COPY355:%[0-9]+]]:_(s32) = COPY [[LSHR96]](s32)
-    ; CHECK: [[AND58:%[0-9]+]]:_(s32) = G_AND [[COPY355]], [[C1]]
-    ; CHECK: [[SHL55:%[0-9]+]]:_(s32) = G_SHL [[AND58]], [[COPY354]](s32)
-    ; CHECK: [[TRUNC57:%[0-9]+]]:_(s16) = G_TRUNC [[SHL55]](s32)
-    ; CHECK: [[OR55:%[0-9]+]]:_(s16) = G_OR [[OR54]], [[TRUNC57]]
-    ; CHECK: [[COPY356:%[0-9]+]]:_(s32) = COPY [[C14]](s32)
-    ; CHECK: [[COPY357:%[0-9]+]]:_(s32) = COPY [[LSHR97]](s32)
-    ; CHECK: [[AND59:%[0-9]+]]:_(s32) = G_AND [[COPY357]], [[C1]]
-    ; CHECK: [[SHL56:%[0-9]+]]:_(s32) = G_SHL [[AND59]], [[COPY356]](s32)
-    ; CHECK: [[TRUNC58:%[0-9]+]]:_(s16) = G_TRUNC [[SHL56]](s32)
-    ; CHECK: [[OR56:%[0-9]+]]:_(s16) = G_OR [[OR55]], [[TRUNC58]]
-    ; CHECK: [[COPY358:%[0-9]+]]:_(s32) = COPY [[C15]](s32)
-    ; CHECK: [[COPY359:%[0-9]+]]:_(s32) = COPY [[LSHR98]](s32)
-    ; CHECK: [[AND60:%[0-9]+]]:_(s32) = G_AND [[COPY359]], [[C1]]
-    ; CHECK: [[SHL57:%[0-9]+]]:_(s32) = G_SHL [[AND60]], [[COPY358]](s32)
-    ; CHECK: [[TRUNC59:%[0-9]+]]:_(s16) = G_TRUNC [[SHL57]](s32)
-    ; CHECK: [[OR57:%[0-9]+]]:_(s16) = G_OR [[OR56]], [[TRUNC59]]
-    ; CHECK: [[COPY360:%[0-9]+]]:_(s32) = COPY [[C16]](s32)
-    ; CHECK: [[COPY361:%[0-9]+]]:_(s32) = COPY [[LSHR99]](s32)
-    ; CHECK: [[AND61:%[0-9]+]]:_(s32) = G_AND [[COPY361]], [[C1]]
-    ; CHECK: [[SHL58:%[0-9]+]]:_(s32) = G_SHL [[AND61]], [[COPY360]](s32)
-    ; CHECK: [[TRUNC60:%[0-9]+]]:_(s16) = G_TRUNC [[SHL58]](s32)
-    ; CHECK: [[OR58:%[0-9]+]]:_(s16) = G_OR [[OR57]], [[TRUNC60]]
-    ; CHECK: [[COPY362:%[0-9]+]]:_(s32) = COPY [[C17]](s32)
-    ; CHECK: [[COPY363:%[0-9]+]]:_(s32) = COPY [[LSHR100]](s32)
-    ; CHECK: [[AND62:%[0-9]+]]:_(s32) = G_AND [[COPY363]], [[C1]]
-    ; CHECK: [[SHL59:%[0-9]+]]:_(s32) = G_SHL [[AND62]], [[COPY362]](s32)
-    ; CHECK: [[TRUNC61:%[0-9]+]]:_(s16) = G_TRUNC [[SHL59]](s32)
-    ; CHECK: [[OR59:%[0-9]+]]:_(s16) = G_OR [[OR58]], [[TRUNC61]]
-    ; CHECK: [[COPY364:%[0-9]+]]:_(s32) = COPY [[C18]](s32)
-    ; CHECK: [[COPY365:%[0-9]+]]:_(s32) = COPY [[LSHR101]](s32)
-    ; CHECK: [[AND63:%[0-9]+]]:_(s32) = G_AND [[COPY365]], [[C1]]
-    ; CHECK: [[SHL60:%[0-9]+]]:_(s32) = G_SHL [[AND63]], [[COPY364]](s32)
-    ; CHECK: [[TRUNC62:%[0-9]+]]:_(s16) = G_TRUNC [[SHL60]](s32)
-    ; CHECK: [[OR60:%[0-9]+]]:_(s16) = G_OR [[OR59]], [[TRUNC62]]
-    ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR45]](s16)
-    ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR60]](s16)
-    ; CHECK: [[SHL61:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32)
-    ; CHECK: [[OR61:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL61]]
-    ; CHECK: [[COPY366:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16)
-    ; CHECK: [[AND64:%[0-9]+]]:_(s16) = G_AND [[COPY366]], [[C6]]
-    ; CHECK: [[COPY367:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
-    ; CHECK: [[COPY368:%[0-9]+]]:_(s32) = COPY [[LSHR103]](s32)
-    ; CHECK: [[AND65:%[0-9]+]]:_(s32) = G_AND [[COPY368]], [[C1]]
-    ; CHECK: [[SHL62:%[0-9]+]]:_(s32) = G_SHL [[AND65]], [[COPY367]](s32)
-    ; CHECK: [[TRUNC63:%[0-9]+]]:_(s16) = G_TRUNC [[SHL62]](s32)
-    ; CHECK: [[OR62:%[0-9]+]]:_(s16) = G_OR [[AND64]], [[TRUNC63]]
-    ; CHECK: [[COPY369:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
-    ; CHECK: [[COPY370:%[0-9]+]]:_(s32) = COPY [[LSHR104]](s32)
-    ; CHECK: [[AND66:%[0-9]+]]:_(s32) = G_AND [[COPY370]], [[C1]]
-    ; CHECK: [[SHL63:%[0-9]+]]:_(s32) = G_SHL [[AND66]], [[COPY369]](s32)
-    ; CHECK: [[TRUNC64:%[0-9]+]]:_(s16) = G_TRUNC [[SHL63]](s32)
-    ; CHECK: [[OR63:%[0-9]+]]:_(s16) = G_OR [[OR62]], [[TRUNC64]]
-    ; CHECK: [[COPY371:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
-    ; CHECK: [[COPY372:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[AND67:%[0-9]+]]:_(s32) = G_AND [[COPY372]], [[C1]]
-    ; CHECK: [[SHL64:%[0-9]+]]:_(s32) = G_SHL [[AND67]], [[COPY371]](s32)
-    ; CHECK: [[TRUNC65:%[0-9]+]]:_(s16) = G_TRUNC [[SHL64]](s32)
-    ; CHECK: [[OR64:%[0-9]+]]:_(s16) = G_OR [[OR63]], [[TRUNC65]]
-    ; CHECK: [[COPY373:%[0-9]+]]:_(s32) = COPY [[C7]](s32)
-    ; CHECK: [[COPY374:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL65:%[0-9]+]]:_(s32) = G_SHL [[COPY374]], [[COPY373]](s32)
-    ; CHECK: [[TRUNC66:%[0-9]+]]:_(s16) = G_TRUNC [[SHL65]](s32)
-    ; CHECK: [[OR65:%[0-9]+]]:_(s16) = G_OR [[OR64]], [[TRUNC66]]
-    ; CHECK: [[COPY375:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
-    ; CHECK: [[COPY376:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL66:%[0-9]+]]:_(s32) = G_SHL [[COPY376]], [[COPY375]](s32)
-    ; CHECK: [[TRUNC67:%[0-9]+]]:_(s16) = G_TRUNC [[SHL66]](s32)
-    ; CHECK: [[OR66:%[0-9]+]]:_(s16) = G_OR [[OR65]], [[TRUNC67]]
-    ; CHECK: [[COPY377:%[0-9]+]]:_(s32) = COPY [[C9]](s32)
-    ; CHECK: [[COPY378:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL67:%[0-9]+]]:_(s32) = G_SHL [[COPY378]], [[COPY377]](s32)
-    ; CHECK: [[TRUNC68:%[0-9]+]]:_(s16) = G_TRUNC [[SHL67]](s32)
-    ; CHECK: [[OR67:%[0-9]+]]:_(s16) = G_OR [[OR66]], [[TRUNC68]]
-    ; CHECK: [[COPY379:%[0-9]+]]:_(s32) = COPY [[C10]](s32)
-    ; CHECK: [[COPY380:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL68:%[0-9]+]]:_(s32) = G_SHL [[COPY380]], [[COPY379]](s32)
-    ; CHECK: [[TRUNC69:%[0-9]+]]:_(s16) = G_TRUNC [[SHL68]](s32)
-    ; CHECK: [[OR68:%[0-9]+]]:_(s16) = G_OR [[OR67]], [[TRUNC69]]
-    ; CHECK: [[COPY381:%[0-9]+]]:_(s32) = COPY [[C11]](s32)
-    ; CHECK: [[COPY382:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL69:%[0-9]+]]:_(s32) = G_SHL [[COPY382]], [[COPY381]](s32)
-    ; CHECK: [[TRUNC70:%[0-9]+]]:_(s16) = G_TRUNC [[SHL69]](s32)
-    ; CHECK: [[OR69:%[0-9]+]]:_(s16) = G_OR [[OR68]], [[TRUNC70]]
-    ; CHECK: [[COPY383:%[0-9]+]]:_(s32) = COPY [[C12]](s32)
-    ; CHECK: [[COPY384:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL70:%[0-9]+]]:_(s32) = G_SHL [[COPY384]], [[COPY383]](s32)
-    ; CHECK: [[TRUNC71:%[0-9]+]]:_(s16) = G_TRUNC [[SHL70]](s32)
-    ; CHECK: [[OR70:%[0-9]+]]:_(s16) = G_OR [[OR69]], [[TRUNC71]]
-    ; CHECK: [[COPY385:%[0-9]+]]:_(s32) = COPY [[C13]](s32)
-    ; CHECK: [[COPY386:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL71:%[0-9]+]]:_(s32) = G_SHL [[COPY386]], [[COPY385]](s32)
-    ; CHECK: [[TRUNC72:%[0-9]+]]:_(s16) = G_TRUNC [[SHL71]](s32)
-    ; CHECK: [[OR71:%[0-9]+]]:_(s16) = G_OR [[OR70]], [[TRUNC72]]
-    ; CHECK: [[COPY387:%[0-9]+]]:_(s32) = COPY [[C14]](s32)
+    ; CHECK: [[COPY61:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CHECK: [[AND28:%[0-9]+]]:_(s32) = G_AND [[COPY61]], [[C7]]
+    ; CHECK: [[LSHR29:%[0-9]+]]:_(s32) = G_LSHR [[AND28]], [[COPY60]](s32)
+    ; CHECK: [[COPY62:%[0-9]+]]:_(s32) = COPY [[C19]](s32)
+    ; CHECK: [[COPY63:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CHECK: [[AND29:%[0-9]+]]:_(s32) = G_AND [[COPY63]], [[C7]]
+    ; CHECK: [[LSHR30:%[0-9]+]]:_(s32) = G_LSHR [[AND29]], [[COPY62]](s32)
+    ; CHECK: [[COPY64:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CHECK: [[COPY65:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[COPY65]], [[C5]](s32)
+    ; CHECK: [[OR2:%[0-9]+]]:_(s32) = G_OR [[COPY64]], [[SHL2]]
+    ; CHECK: [[COPY66:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK: [[COPY67:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[COPY67]], [[C5]](s32)
+    ; CHECK: [[OR3:%[0-9]+]]:_(s32) = G_OR [[COPY66]], [[SHL3]]
+    ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR2]](s32), [[OR3]](s32)
+    ; CHECK: [[MV3:%[0-9]+]]:_(s1088) = G_MERGE_VALUES [[MV2]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64)
+    ; CHECK: [[TRUNC3:%[0-9]+]]:_(s544) = G_TRUNC [[MV3]](s1088)
+    ; CHECK: [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC3]](s544)
+    ; CHECK: [[LSHR31:%[0-9]+]]:_(s32) = G_LSHR [[UV17]], [[C5]](s32)
+    ; CHECK: [[COPY68:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CHECK: [[COPY69:%[0-9]+]]:_(s32) = COPY [[UV17]](s32)
+    ; CHECK: [[AND30:%[0-9]+]]:_(s32) = G_AND [[COPY69]], [[C7]]
+    ; CHECK: [[LSHR32:%[0-9]+]]:_(s32) = G_LSHR [[AND30]], [[COPY68]](s32)
+    ; CHECK: [[COPY70:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CHECK: [[COPY71:%[0-9]+]]:_(s32) = COPY [[UV17]](s32)
+    ; CHECK: [[AND31:%[0-9]+]]:_(s32) = G_AND [[COPY71]], [[C7]]
+    ; CHECK: [[LSHR33:%[0-9]+]]:_(s32) = G_LSHR [[AND31]], [[COPY70]](s32)
+    ; CHECK: [[COPY72:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CHECK: [[COPY73:%[0-9]+]]:_(s32) = COPY [[UV17]](s32)
+    ; CHECK: [[AND32:%[0-9]+]]:_(s32) = G_AND [[COPY73]], [[C7]]
+    ; CHECK: [[LSHR34:%[0-9]+]]:_(s32) = G_LSHR [[AND32]], [[COPY72]](s32)
+    ; CHECK: [[COPY74:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CHECK: [[COPY75:%[0-9]+]]:_(s32) = COPY [[UV17]](s32)
+    ; CHECK: [[AND33:%[0-9]+]]:_(s32) = G_AND [[COPY75]], [[C7]]
+    ; CHECK: [[LSHR35:%[0-9]+]]:_(s32) = G_LSHR [[AND33]], [[COPY74]](s32)
+    ; CHECK: [[COPY76:%[0-9]+]]:_(s32) = COPY [[C9]](s32)
+    ; CHECK: [[COPY77:%[0-9]+]]:_(s32) = COPY [[UV17]](s32)
+    ; CHECK: [[AND34:%[0-9]+]]:_(s32) = G_AND [[COPY77]], [[C7]]
+    ; CHECK: [[LSHR36:%[0-9]+]]:_(s32) = G_LSHR [[AND34]], [[COPY76]](s32)
+    ; CHECK: [[COPY78:%[0-9]+]]:_(s32) = COPY [[C10]](s32)
+    ; CHECK: [[COPY79:%[0-9]+]]:_(s32) = COPY [[UV17]](s32)
+    ; CHECK: [[AND35:%[0-9]+]]:_(s32) = G_AND [[COPY79]], [[C7]]
+    ; CHECK: [[LSHR37:%[0-9]+]]:_(s32) = G_LSHR [[AND35]], [[COPY78]](s32)
+    ; CHECK: [[COPY80:%[0-9]+]]:_(s32) = COPY [[C11]](s32)
+    ; CHECK: [[COPY81:%[0-9]+]]:_(s32) = COPY [[UV17]](s32)
+    ; CHECK: [[AND36:%[0-9]+]]:_(s32) = G_AND [[COPY81]], [[C7]]
+    ; CHECK: [[LSHR38:%[0-9]+]]:_(s32) = G_LSHR [[AND36]], [[COPY80]](s32)
+    ; CHECK: [[COPY82:%[0-9]+]]:_(s32) = COPY [[C12]](s32)
+    ; CHECK: [[COPY83:%[0-9]+]]:_(s32) = COPY [[UV17]](s32)
+    ; CHECK: [[AND37:%[0-9]+]]:_(s32) = G_AND [[COPY83]], [[C7]]
+    ; CHECK: [[LSHR39:%[0-9]+]]:_(s32) = G_LSHR [[AND37]], [[COPY82]](s32)
+    ; CHECK: [[COPY84:%[0-9]+]]:_(s32) = COPY [[C13]](s32)
+    ; CHECK: [[COPY85:%[0-9]+]]:_(s32) = COPY [[UV17]](s32)
+    ; CHECK: [[AND38:%[0-9]+]]:_(s32) = G_AND [[COPY85]], [[C7]]
+    ; CHECK: [[LSHR40:%[0-9]+]]:_(s32) = G_LSHR [[AND38]], [[COPY84]](s32)
+    ; CHECK: [[COPY86:%[0-9]+]]:_(s32) = COPY [[C14]](s32)
+    ; CHECK: [[COPY87:%[0-9]+]]:_(s32) = COPY [[UV17]](s32)
+    ; CHECK: [[AND39:%[0-9]+]]:_(s32) = G_AND [[COPY87]], [[C7]]
+    ; CHECK: [[LSHR41:%[0-9]+]]:_(s32) = G_LSHR [[AND39]], [[COPY86]](s32)
+    ; CHECK: [[COPY88:%[0-9]+]]:_(s32) = COPY [[C15]](s32)
+    ; CHECK: [[COPY89:%[0-9]+]]:_(s32) = COPY [[UV17]](s32)
+    ; CHECK: [[AND40:%[0-9]+]]:_(s32) = G_AND [[COPY89]], [[C7]]
+    ; CHECK: [[LSHR42:%[0-9]+]]:_(s32) = G_LSHR [[AND40]], [[COPY88]](s32)
+    ; CHECK: [[COPY90:%[0-9]+]]:_(s32) = COPY [[C16]](s32)
+    ; CHECK: [[COPY91:%[0-9]+]]:_(s32) = COPY [[UV17]](s32)
+    ; CHECK: [[AND41:%[0-9]+]]:_(s32) = G_AND [[COPY91]], [[C7]]
+    ; CHECK: [[LSHR43:%[0-9]+]]:_(s32) = G_LSHR [[AND41]], [[COPY90]](s32)
+    ; CHECK: [[COPY92:%[0-9]+]]:_(s32) = COPY [[C17]](s32)
+    ; CHECK: [[COPY93:%[0-9]+]]:_(s32) = COPY [[UV17]](s32)
+    ; CHECK: [[AND42:%[0-9]+]]:_(s32) = G_AND [[COPY93]], [[C7]]
+    ; CHECK: [[LSHR44:%[0-9]+]]:_(s32) = G_LSHR [[AND42]], [[COPY92]](s32)
+    ; CHECK: [[COPY94:%[0-9]+]]:_(s32) = COPY [[C18]](s32)
+    ; CHECK: [[COPY95:%[0-9]+]]:_(s32) = COPY [[UV17]](s32)
+    ; CHECK: [[AND43:%[0-9]+]]:_(s32) = G_AND [[COPY95]], [[C7]]
+    ; CHECK: [[LSHR45:%[0-9]+]]:_(s32) = G_LSHR [[AND43]], [[COPY94]](s32)
+    ; CHECK: [[COPY96:%[0-9]+]]:_(s32) = COPY [[C19]](s32)
+    ; CHECK: [[COPY97:%[0-9]+]]:_(s32) = COPY [[UV17]](s32)
+    ; CHECK: [[AND44:%[0-9]+]]:_(s32) = G_AND [[COPY97]], [[C7]]
+    ; CHECK: [[LSHR46:%[0-9]+]]:_(s32) = G_LSHR [[AND44]], [[COPY96]](s32)
+    ; CHECK: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR46]](s32)
+    ; CHECK: [[COPY98:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CHECK: [[COPY99:%[0-9]+]]:_(s32) = COPY [[LSHR31]](s32)
+    ; CHECK: [[AND45:%[0-9]+]]:_(s32) = G_AND [[COPY99]], [[C7]]
+    ; CHECK: [[LSHR47:%[0-9]+]]:_(s32) = G_LSHR [[AND45]], [[COPY98]](s32)
+    ; CHECK: [[COPY100:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CHECK: [[COPY101:%[0-9]+]]:_(s32) = COPY [[LSHR31]](s32)
+    ; CHECK: [[AND46:%[0-9]+]]:_(s32) = G_AND [[COPY101]], [[C7]]
+    ; CHECK: [[LSHR48:%[0-9]+]]:_(s32) = G_LSHR [[AND46]], [[COPY100]](s32)
+    ; CHECK: [[COPY102:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CHECK: [[COPY103:%[0-9]+]]:_(s32) = COPY [[LSHR31]](s32)
+    ; CHECK: [[AND47:%[0-9]+]]:_(s32) = G_AND [[COPY103]], [[C7]]
+    ; CHECK: [[LSHR49:%[0-9]+]]:_(s32) = G_LSHR [[AND47]], [[COPY102]](s32)
+    ; CHECK: [[COPY104:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CHECK: [[COPY105:%[0-9]+]]:_(s32) = COPY [[LSHR31]](s32)
+    ; CHECK: [[AND48:%[0-9]+]]:_(s32) = G_AND [[COPY105]], [[C7]]
+    ; CHECK: [[LSHR50:%[0-9]+]]:_(s32) = G_LSHR [[AND48]], [[COPY104]](s32)
+    ; CHECK: [[COPY106:%[0-9]+]]:_(s32) = COPY [[C9]](s32)
+    ; CHECK: [[COPY107:%[0-9]+]]:_(s32) = COPY [[LSHR31]](s32)
+    ; CHECK: [[AND49:%[0-9]+]]:_(s32) = G_AND [[COPY107]], [[C7]]
+    ; CHECK: [[LSHR51:%[0-9]+]]:_(s32) = G_LSHR [[AND49]], [[COPY106]](s32)
+    ; CHECK: [[COPY108:%[0-9]+]]:_(s32) = COPY [[C10]](s32)
+    ; CHECK: [[COPY109:%[0-9]+]]:_(s32) = COPY [[LSHR31]](s32)
+    ; CHECK: [[AND50:%[0-9]+]]:_(s32) = G_AND [[COPY109]], [[C7]]
+    ; CHECK: [[LSHR52:%[0-9]+]]:_(s32) = G_LSHR [[AND50]], [[COPY108]](s32)
+    ; CHECK: [[COPY110:%[0-9]+]]:_(s32) = COPY [[C11]](s32)
+    ; CHECK: [[COPY111:%[0-9]+]]:_(s32) = COPY [[LSHR31]](s32)
+    ; CHECK: [[AND51:%[0-9]+]]:_(s32) = G_AND [[COPY111]], [[C7]]
+    ; CHECK: [[LSHR53:%[0-9]+]]:_(s32) = G_LSHR [[AND51]], [[COPY110]](s32)
+    ; CHECK: [[COPY112:%[0-9]+]]:_(s32) = COPY [[C12]](s32)
+    ; CHECK: [[COPY113:%[0-9]+]]:_(s32) = COPY [[LSHR31]](s32)
+    ; CHECK: [[AND52:%[0-9]+]]:_(s32) = G_AND [[COPY113]], [[C7]]
+    ; CHECK: [[LSHR54:%[0-9]+]]:_(s32) = G_LSHR [[AND52]], [[COPY112]](s32)
+    ; CHECK: [[COPY114:%[0-9]+]]:_(s32) = COPY [[C13]](s32)
+    ; CHECK: [[COPY115:%[0-9]+]]:_(s32) = COPY [[LSHR31]](s32)
+    ; CHECK: [[AND53:%[0-9]+]]:_(s32) = G_AND [[COPY115]], [[C7]]
+    ; CHECK: [[LSHR55:%[0-9]+]]:_(s32) = G_LSHR [[AND53]], [[COPY114]](s32)
+    ; CHECK: [[COPY116:%[0-9]+]]:_(s32) = COPY [[C14]](s32)
+    ; CHECK: [[COPY117:%[0-9]+]]:_(s32) = COPY [[LSHR31]](s32)
+    ; CHECK: [[AND54:%[0-9]+]]:_(s32) = G_AND [[COPY117]], [[C7]]
+    ; CHECK: [[LSHR56:%[0-9]+]]:_(s32) = G_LSHR [[AND54]], [[COPY116]](s32)
+    ; CHECK: [[COPY118:%[0-9]+]]:_(s32) = COPY [[C15]](s32)
+    ; CHECK: [[COPY119:%[0-9]+]]:_(s32) = COPY [[LSHR31]](s32)
+    ; CHECK: [[AND55:%[0-9]+]]:_(s32) = G_AND [[COPY119]], [[C7]]
+    ; CHECK: [[LSHR57:%[0-9]+]]:_(s32) = G_LSHR [[AND55]], [[COPY118]](s32)
+    ; CHECK: [[COPY120:%[0-9]+]]:_(s32) = COPY [[C16]](s32)
+    ; CHECK: [[COPY121:%[0-9]+]]:_(s32) = COPY [[LSHR31]](s32)
+    ; CHECK: [[AND56:%[0-9]+]]:_(s32) = G_AND [[COPY121]], [[C7]]
+    ; CHECK: [[LSHR58:%[0-9]+]]:_(s32) = G_LSHR [[AND56]], [[COPY120]](s32)
+    ; CHECK: [[COPY122:%[0-9]+]]:_(s32) = COPY [[C17]](s32)
+    ; CHECK: [[COPY123:%[0-9]+]]:_(s32) = COPY [[LSHR31]](s32)
+    ; CHECK: [[AND57:%[0-9]+]]:_(s32) = G_AND [[COPY123]], [[C7]]
+    ; CHECK: [[LSHR59:%[0-9]+]]:_(s32) = G_LSHR [[AND57]], [[COPY122]](s32)
+    ; CHECK: [[COPY124:%[0-9]+]]:_(s32) = COPY [[C18]](s32)
+    ; CHECK: [[COPY125:%[0-9]+]]:_(s32) = COPY [[LSHR31]](s32)
+    ; CHECK: [[AND58:%[0-9]+]]:_(s32) = G_AND [[COPY125]], [[C7]]
+    ; CHECK: [[LSHR60:%[0-9]+]]:_(s32) = G_LSHR [[AND58]], [[COPY124]](s32)
+    ; CHECK: [[COPY126:%[0-9]+]]:_(s32) = COPY [[C19]](s32)
+    ; CHECK: [[COPY127:%[0-9]+]]:_(s32) = COPY [[LSHR31]](s32)
+    ; CHECK: [[AND59:%[0-9]+]]:_(s32) = G_AND [[COPY127]], [[C7]]
+    ; CHECK: [[LSHR61:%[0-9]+]]:_(s32) = G_LSHR [[AND59]], [[COPY126]](s32)
+    ; CHECK: [[COPY128:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CHECK: [[COPY129:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[COPY129]], [[C5]](s32)
+    ; CHECK: [[OR4:%[0-9]+]]:_(s32) = G_OR [[COPY128]], [[SHL4]]
+    ; CHECK: [[COPY130:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK: [[COPY131:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK: [[SHL5:%[0-9]+]]:_(s32) = G_SHL [[COPY131]], [[C5]](s32)
+    ; CHECK: [[OR5:%[0-9]+]]:_(s32) = G_OR [[COPY130]], [[SHL5]]
+    ; CHECK: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR4]](s32), [[OR5]](s32)
+    ; CHECK: [[MV5:%[0-9]+]]:_(s1088) = G_MERGE_VALUES [[MV4]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64)
+    ; CHECK: [[TRUNC5:%[0-9]+]]:_(s544) = G_TRUNC [[MV5]](s1088)
+    ; CHECK: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32), [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC5]](s544)
+    ; CHECK: [[LSHR62:%[0-9]+]]:_(s32) = G_LSHR [[UV34]], [[C5]](s32)
+    ; CHECK: [[COPY132:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CHECK: [[COPY133:%[0-9]+]]:_(s32) = COPY [[UV34]](s32)
+    ; CHECK: [[AND60:%[0-9]+]]:_(s32) = G_AND [[COPY133]], [[C7]]
+    ; CHECK: [[LSHR63:%[0-9]+]]:_(s32) = G_LSHR [[AND60]], [[COPY132]](s32)
+    ; CHECK: [[COPY134:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CHECK: [[COPY135:%[0-9]+]]:_(s32) = COPY [[UV34]](s32)
+    ; CHECK: [[AND61:%[0-9]+]]:_(s32) = G_AND [[COPY135]], [[C7]]
+    ; CHECK: [[LSHR64:%[0-9]+]]:_(s32) = G_LSHR [[AND61]], [[COPY134]](s32)
+    ; CHECK: [[COPY136:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CHECK: [[COPY137:%[0-9]+]]:_(s32) = COPY [[UV34]](s32)
+    ; CHECK: [[AND62:%[0-9]+]]:_(s32) = G_AND [[COPY137]], [[C7]]
+    ; CHECK: [[LSHR65:%[0-9]+]]:_(s32) = G_LSHR [[AND62]], [[COPY136]](s32)
+    ; CHECK: [[COPY138:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CHECK: [[COPY139:%[0-9]+]]:_(s32) = COPY [[UV34]](s32)
+    ; CHECK: [[AND63:%[0-9]+]]:_(s32) = G_AND [[COPY139]], [[C7]]
+    ; CHECK: [[LSHR66:%[0-9]+]]:_(s32) = G_LSHR [[AND63]], [[COPY138]](s32)
+    ; CHECK: [[COPY140:%[0-9]+]]:_(s32) = COPY [[C9]](s32)
+    ; CHECK: [[COPY141:%[0-9]+]]:_(s32) = COPY [[UV34]](s32)
+    ; CHECK: [[AND64:%[0-9]+]]:_(s32) = G_AND [[COPY141]], [[C7]]
+    ; CHECK: [[LSHR67:%[0-9]+]]:_(s32) = G_LSHR [[AND64]], [[COPY140]](s32)
+    ; CHECK: [[COPY142:%[0-9]+]]:_(s32) = COPY [[C10]](s32)
+    ; CHECK: [[COPY143:%[0-9]+]]:_(s32) = COPY [[UV34]](s32)
+    ; CHECK: [[AND65:%[0-9]+]]:_(s32) = G_AND [[COPY143]], [[C7]]
+    ; CHECK: [[LSHR68:%[0-9]+]]:_(s32) = G_LSHR [[AND65]], [[COPY142]](s32)
+    ; CHECK: [[COPY144:%[0-9]+]]:_(s32) = COPY [[C11]](s32)
+    ; CHECK: [[COPY145:%[0-9]+]]:_(s32) = COPY [[UV34]](s32)
+    ; CHECK: [[AND66:%[0-9]+]]:_(s32) = G_AND [[COPY145]], [[C7]]
+    ; CHECK: [[LSHR69:%[0-9]+]]:_(s32) = G_LSHR [[AND66]], [[COPY144]](s32)
+    ; CHECK: [[COPY146:%[0-9]+]]:_(s32) = COPY [[C12]](s32)
+    ; CHECK: [[COPY147:%[0-9]+]]:_(s32) = COPY [[UV34]](s32)
+    ; CHECK: [[AND67:%[0-9]+]]:_(s32) = G_AND [[COPY147]], [[C7]]
+    ; CHECK: [[LSHR70:%[0-9]+]]:_(s32) = G_LSHR [[AND67]], [[COPY146]](s32)
+    ; CHECK: [[COPY148:%[0-9]+]]:_(s32) = COPY [[C13]](s32)
+    ; CHECK: [[COPY149:%[0-9]+]]:_(s32) = COPY [[UV34]](s32)
+    ; CHECK: [[AND68:%[0-9]+]]:_(s32) = G_AND [[COPY149]], [[C7]]
+    ; CHECK: [[LSHR71:%[0-9]+]]:_(s32) = G_LSHR [[AND68]], [[COPY148]](s32)
+    ; CHECK: [[COPY150:%[0-9]+]]:_(s32) = COPY [[C14]](s32)
+    ; CHECK: [[COPY151:%[0-9]+]]:_(s32) = COPY [[UV34]](s32)
+    ; CHECK: [[AND69:%[0-9]+]]:_(s32) = G_AND [[COPY151]], [[C7]]
+    ; CHECK: [[LSHR72:%[0-9]+]]:_(s32) = G_LSHR [[AND69]], [[COPY150]](s32)
+    ; CHECK: [[COPY152:%[0-9]+]]:_(s32) = COPY [[C15]](s32)
+    ; CHECK: [[COPY153:%[0-9]+]]:_(s32) = COPY [[UV34]](s32)
+    ; CHECK: [[AND70:%[0-9]+]]:_(s32) = G_AND [[COPY153]], [[C7]]
+    ; CHECK: [[LSHR73:%[0-9]+]]:_(s32) = G_LSHR [[AND70]], [[COPY152]](s32)
+    ; CHECK: [[COPY154:%[0-9]+]]:_(s32) = COPY [[C16]](s32)
+    ; CHECK: [[COPY155:%[0-9]+]]:_(s32) = COPY [[UV34]](s32)
+    ; CHECK: [[AND71:%[0-9]+]]:_(s32) = G_AND [[COPY155]], [[C7]]
+    ; CHECK: [[LSHR74:%[0-9]+]]:_(s32) = G_LSHR [[AND71]], [[COPY154]](s32)
+    ; CHECK: [[COPY156:%[0-9]+]]:_(s32) = COPY [[C17]](s32)
+    ; CHECK: [[COPY157:%[0-9]+]]:_(s32) = COPY [[UV34]](s32)
+    ; CHECK: [[AND72:%[0-9]+]]:_(s32) = G_AND [[COPY157]], [[C7]]
+    ; CHECK: [[LSHR75:%[0-9]+]]:_(s32) = G_LSHR [[AND72]], [[COPY156]](s32)
+    ; CHECK: [[COPY158:%[0-9]+]]:_(s32) = COPY [[C18]](s32)
+    ; CHECK: [[COPY159:%[0-9]+]]:_(s32) = COPY [[UV34]](s32)
+    ; CHECK: [[AND73:%[0-9]+]]:_(s32) = G_AND [[COPY159]], [[C7]]
+    ; CHECK: [[LSHR76:%[0-9]+]]:_(s32) = G_LSHR [[AND73]], [[COPY158]](s32)
+    ; CHECK: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR76]](s32)
+    ; CHECK: [[COPY160:%[0-9]+]]:_(s32) = COPY [[C19]](s32)
+    ; CHECK: [[COPY161:%[0-9]+]]:_(s32) = COPY [[UV34]](s32)
+    ; CHECK: [[AND74:%[0-9]+]]:_(s32) = G_AND [[COPY161]], [[C7]]
+    ; CHECK: [[LSHR77:%[0-9]+]]:_(s32) = G_LSHR [[AND74]], [[COPY160]](s32)
+    ; CHECK: [[COPY162:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CHECK: [[COPY163:%[0-9]+]]:_(s32) = COPY [[LSHR62]](s32)
+    ; CHECK: [[AND75:%[0-9]+]]:_(s32) = G_AND [[COPY163]], [[C7]]
+    ; CHECK: [[LSHR78:%[0-9]+]]:_(s32) = G_LSHR [[AND75]], [[COPY162]](s32)
+    ; CHECK: [[COPY164:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CHECK: [[COPY165:%[0-9]+]]:_(s32) = COPY [[LSHR62]](s32)
+    ; CHECK: [[AND76:%[0-9]+]]:_(s32) = G_AND [[COPY165]], [[C7]]
+    ; CHECK: [[LSHR79:%[0-9]+]]:_(s32) = G_LSHR [[AND76]], [[COPY164]](s32)
+    ; CHECK: [[COPY166:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CHECK: [[COPY167:%[0-9]+]]:_(s32) = COPY [[LSHR62]](s32)
+    ; CHECK: [[AND77:%[0-9]+]]:_(s32) = G_AND [[COPY167]], [[C7]]
+    ; CHECK: [[LSHR80:%[0-9]+]]:_(s32) = G_LSHR [[AND77]], [[COPY166]](s32)
+    ; CHECK: [[COPY168:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CHECK: [[COPY169:%[0-9]+]]:_(s32) = COPY [[LSHR62]](s32)
+    ; CHECK: [[AND78:%[0-9]+]]:_(s32) = G_AND [[COPY169]], [[C7]]
+    ; CHECK: [[LSHR81:%[0-9]+]]:_(s32) = G_LSHR [[AND78]], [[COPY168]](s32)
+    ; CHECK: [[COPY170:%[0-9]+]]:_(s32) = COPY [[C9]](s32)
+    ; CHECK: [[COPY171:%[0-9]+]]:_(s32) = COPY [[LSHR62]](s32)
+    ; CHECK: [[AND79:%[0-9]+]]:_(s32) = G_AND [[COPY171]], [[C7]]
+    ; CHECK: [[LSHR82:%[0-9]+]]:_(s32) = G_LSHR [[AND79]], [[COPY170]](s32)
+    ; CHECK: [[COPY172:%[0-9]+]]:_(s32) = COPY [[C10]](s32)
+    ; CHECK: [[COPY173:%[0-9]+]]:_(s32) = COPY [[LSHR62]](s32)
+    ; CHECK: [[AND80:%[0-9]+]]:_(s32) = G_AND [[COPY173]], [[C7]]
+    ; CHECK: [[LSHR83:%[0-9]+]]:_(s32) = G_LSHR [[AND80]], [[COPY172]](s32)
+    ; CHECK: [[COPY174:%[0-9]+]]:_(s32) = COPY [[C11]](s32)
+    ; CHECK: [[COPY175:%[0-9]+]]:_(s32) = COPY [[LSHR62]](s32)
+    ; CHECK: [[AND81:%[0-9]+]]:_(s32) = G_AND [[COPY175]], [[C7]]
+    ; CHECK: [[LSHR84:%[0-9]+]]:_(s32) = G_LSHR [[AND81]], [[COPY174]](s32)
+    ; CHECK: [[COPY176:%[0-9]+]]:_(s32) = COPY [[C12]](s32)
+    ; CHECK: [[COPY177:%[0-9]+]]:_(s32) = COPY [[LSHR62]](s32)
+    ; CHECK: [[AND82:%[0-9]+]]:_(s32) = G_AND [[COPY177]], [[C7]]
+    ; CHECK: [[LSHR85:%[0-9]+]]:_(s32) = G_LSHR [[AND82]], [[COPY176]](s32)
+    ; CHECK: [[COPY178:%[0-9]+]]:_(s32) = COPY [[C13]](s32)
+    ; CHECK: [[COPY179:%[0-9]+]]:_(s32) = COPY [[LSHR62]](s32)
+    ; CHECK: [[AND83:%[0-9]+]]:_(s32) = G_AND [[COPY179]], [[C7]]
+    ; CHECK: [[LSHR86:%[0-9]+]]:_(s32) = G_LSHR [[AND83]], [[COPY178]](s32)
+    ; CHECK: [[COPY180:%[0-9]+]]:_(s32) = COPY [[C14]](s32)
+    ; CHECK: [[COPY181:%[0-9]+]]:_(s32) = COPY [[LSHR62]](s32)
+    ; CHECK: [[AND84:%[0-9]+]]:_(s32) = G_AND [[COPY181]], [[C7]]
+    ; CHECK: [[LSHR87:%[0-9]+]]:_(s32) = G_LSHR [[AND84]], [[COPY180]](s32)
+    ; CHECK: [[COPY182:%[0-9]+]]:_(s32) = COPY [[C15]](s32)
+    ; CHECK: [[COPY183:%[0-9]+]]:_(s32) = COPY [[LSHR62]](s32)
+    ; CHECK: [[AND85:%[0-9]+]]:_(s32) = G_AND [[COPY183]], [[C7]]
+    ; CHECK: [[LSHR88:%[0-9]+]]:_(s32) = G_LSHR [[AND85]], [[COPY182]](s32)
+    ; CHECK: [[COPY184:%[0-9]+]]:_(s32) = COPY [[C16]](s32)
+    ; CHECK: [[COPY185:%[0-9]+]]:_(s32) = COPY [[LSHR62]](s32)
+    ; CHECK: [[AND86:%[0-9]+]]:_(s32) = G_AND [[COPY185]], [[C7]]
+    ; CHECK: [[LSHR89:%[0-9]+]]:_(s32) = G_LSHR [[AND86]], [[COPY184]](s32)
+    ; CHECK: [[COPY186:%[0-9]+]]:_(s32) = COPY [[C17]](s32)
+    ; CHECK: [[COPY187:%[0-9]+]]:_(s32) = COPY [[LSHR62]](s32)
+    ; CHECK: [[AND87:%[0-9]+]]:_(s32) = G_AND [[COPY187]], [[C7]]
+    ; CHECK: [[LSHR90:%[0-9]+]]:_(s32) = G_LSHR [[AND87]], [[COPY186]](s32)
+    ; CHECK: [[COPY188:%[0-9]+]]:_(s32) = COPY [[C18]](s32)
+    ; CHECK: [[COPY189:%[0-9]+]]:_(s32) = COPY [[LSHR62]](s32)
+    ; CHECK: [[AND88:%[0-9]+]]:_(s32) = G_AND [[COPY189]], [[C7]]
+    ; CHECK: [[LSHR91:%[0-9]+]]:_(s32) = G_LSHR [[AND88]], [[COPY188]](s32)
+    ; CHECK: [[COPY190:%[0-9]+]]:_(s32) = COPY [[C19]](s32)
+    ; CHECK: [[COPY191:%[0-9]+]]:_(s32) = COPY [[LSHR62]](s32)
+    ; CHECK: [[AND89:%[0-9]+]]:_(s32) = G_AND [[COPY191]], [[C7]]
+    ; CHECK: [[LSHR92:%[0-9]+]]:_(s32) = G_LSHR [[AND89]], [[COPY190]](s32)
+    ; CHECK: [[COPY192:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CHECK: [[COPY193:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK: [[SHL6:%[0-9]+]]:_(s32) = G_SHL [[COPY193]], [[C5]](s32)
+    ; CHECK: [[OR6:%[0-9]+]]:_(s32) = G_OR [[COPY192]], [[SHL6]]
+    ; CHECK: [[COPY194:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK: [[COPY195:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK: [[SHL7:%[0-9]+]]:_(s32) = G_SHL [[COPY195]], [[C5]](s32)
+    ; CHECK: [[OR7:%[0-9]+]]:_(s32) = G_OR [[COPY194]], [[SHL7]]
+    ; CHECK: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[OR6]](s32), [[OR7]](s32)
+    ; CHECK: [[MV7:%[0-9]+]]:_(s1088) = G_MERGE_VALUES [[MV6]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64)
+    ; CHECK: [[TRUNC7:%[0-9]+]]:_(s544) = G_TRUNC [[MV7]](s1088)
+    ; CHECK: [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32), [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32), [[UV66:%[0-9]+]]:_(s32), [[UV67:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC7]](s544)
+    ; CHECK: [[LSHR93:%[0-9]+]]:_(s32) = G_LSHR [[UV51]], [[C5]](s32)
+    ; CHECK: [[COPY196:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CHECK: [[COPY197:%[0-9]+]]:_(s32) = COPY [[UV51]](s32)
+    ; CHECK: [[AND90:%[0-9]+]]:_(s32) = G_AND [[COPY197]], [[C7]]
+    ; CHECK: [[LSHR94:%[0-9]+]]:_(s32) = G_LSHR [[AND90]], [[COPY196]](s32)
+    ; CHECK: [[COPY198:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CHECK: [[COPY199:%[0-9]+]]:_(s32) = COPY [[UV51]](s32)
+    ; CHECK: [[AND91:%[0-9]+]]:_(s32) = G_AND [[COPY199]], [[C7]]
+    ; CHECK: [[LSHR95:%[0-9]+]]:_(s32) = G_LSHR [[AND91]], [[COPY198]](s32)
+    ; CHECK: [[COPY200:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CHECK: [[COPY201:%[0-9]+]]:_(s32) = COPY [[UV51]](s32)
+    ; CHECK: [[AND92:%[0-9]+]]:_(s32) = G_AND [[COPY201]], [[C7]]
+    ; CHECK: [[LSHR96:%[0-9]+]]:_(s32) = G_LSHR [[AND92]], [[COPY200]](s32)
+    ; CHECK: [[COPY202:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CHECK: [[COPY203:%[0-9]+]]:_(s32) = COPY [[UV51]](s32)
+    ; CHECK: [[AND93:%[0-9]+]]:_(s32) = G_AND [[COPY203]], [[C7]]
+    ; CHECK: [[LSHR97:%[0-9]+]]:_(s32) = G_LSHR [[AND93]], [[COPY202]](s32)
+    ; CHECK: [[COPY204:%[0-9]+]]:_(s32) = COPY [[C9]](s32)
+    ; CHECK: [[COPY205:%[0-9]+]]:_(s32) = COPY [[UV51]](s32)
+    ; CHECK: [[AND94:%[0-9]+]]:_(s32) = G_AND [[COPY205]], [[C7]]
+    ; CHECK: [[LSHR98:%[0-9]+]]:_(s32) = G_LSHR [[AND94]], [[COPY204]](s32)
+    ; CHECK: [[COPY206:%[0-9]+]]:_(s32) = COPY [[C10]](s32)
+    ; CHECK: [[COPY207:%[0-9]+]]:_(s32) = COPY [[UV51]](s32)
+    ; CHECK: [[AND95:%[0-9]+]]:_(s32) = G_AND [[COPY207]], [[C7]]
+    ; CHECK: [[LSHR99:%[0-9]+]]:_(s32) = G_LSHR [[AND95]], [[COPY206]](s32)
+    ; CHECK: [[COPY208:%[0-9]+]]:_(s32) = COPY [[C11]](s32)
+    ; CHECK: [[COPY209:%[0-9]+]]:_(s32) = COPY [[UV51]](s32)
+    ; CHECK: [[AND96:%[0-9]+]]:_(s32) = G_AND [[COPY209]], [[C7]]
+    ; CHECK: [[LSHR100:%[0-9]+]]:_(s32) = G_LSHR [[AND96]], [[COPY208]](s32)
+    ; CHECK: [[COPY210:%[0-9]+]]:_(s32) = COPY [[C12]](s32)
+    ; CHECK: [[COPY211:%[0-9]+]]:_(s32) = COPY [[UV51]](s32)
+    ; CHECK: [[AND97:%[0-9]+]]:_(s32) = G_AND [[COPY211]], [[C7]]
+    ; CHECK: [[LSHR101:%[0-9]+]]:_(s32) = G_LSHR [[AND97]], [[COPY210]](s32)
+    ; CHECK: [[COPY212:%[0-9]+]]:_(s32) = COPY [[C13]](s32)
+    ; CHECK: [[COPY213:%[0-9]+]]:_(s32) = COPY [[UV51]](s32)
+    ; CHECK: [[AND98:%[0-9]+]]:_(s32) = G_AND [[COPY213]], [[C7]]
+    ; CHECK: [[LSHR102:%[0-9]+]]:_(s32) = G_LSHR [[AND98]], [[COPY212]](s32)
+    ; CHECK: [[COPY214:%[0-9]+]]:_(s32) = COPY [[C14]](s32)
+    ; CHECK: [[COPY215:%[0-9]+]]:_(s32) = COPY [[UV51]](s32)
+    ; CHECK: [[AND99:%[0-9]+]]:_(s32) = G_AND [[COPY215]], [[C7]]
+    ; CHECK: [[LSHR103:%[0-9]+]]:_(s32) = G_LSHR [[AND99]], [[COPY214]](s32)
+    ; CHECK: [[COPY216:%[0-9]+]]:_(s32) = COPY [[C15]](s32)
+    ; CHECK: [[COPY217:%[0-9]+]]:_(s32) = COPY [[UV51]](s32)
+    ; CHECK: [[AND100:%[0-9]+]]:_(s32) = G_AND [[COPY217]], [[C7]]
+    ; CHECK: [[LSHR104:%[0-9]+]]:_(s32) = G_LSHR [[AND100]], [[COPY216]](s32)
+    ; CHECK: [[COPY218:%[0-9]+]]:_(s32) = COPY [[C16]](s32)
+    ; CHECK: [[COPY219:%[0-9]+]]:_(s32) = COPY [[UV51]](s32)
+    ; CHECK: [[AND101:%[0-9]+]]:_(s32) = G_AND [[COPY219]], [[C7]]
+    ; CHECK: [[LSHR105:%[0-9]+]]:_(s32) = G_LSHR [[AND101]], [[COPY218]](s32)
+    ; CHECK: [[COPY220:%[0-9]+]]:_(s32) = COPY [[C17]](s32)
+    ; CHECK: [[COPY221:%[0-9]+]]:_(s32) = COPY [[UV51]](s32)
+    ; CHECK: [[AND102:%[0-9]+]]:_(s32) = G_AND [[COPY221]], [[C7]]
+    ; CHECK: [[LSHR106:%[0-9]+]]:_(s32) = G_LSHR [[AND102]], [[COPY220]](s32)
+    ; CHECK: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR106]](s32)
+    ; CHECK: [[COPY222:%[0-9]+]]:_(s32) = COPY [[C18]](s32)
+    ; CHECK: [[COPY223:%[0-9]+]]:_(s32) = COPY [[UV51]](s32)
+    ; CHECK: [[AND103:%[0-9]+]]:_(s32) = G_AND [[COPY223]], [[C7]]
+    ; CHECK: [[LSHR107:%[0-9]+]]:_(s32) = G_LSHR [[AND103]], [[COPY222]](s32)
+    ; CHECK: [[COPY224:%[0-9]+]]:_(s32) = COPY [[C19]](s32)
+    ; CHECK: [[COPY225:%[0-9]+]]:_(s32) = COPY [[UV51]](s32)
+    ; CHECK: [[AND104:%[0-9]+]]:_(s32) = G_AND [[COPY225]], [[C7]]
+    ; CHECK: [[LSHR108:%[0-9]+]]:_(s32) = G_LSHR [[AND104]], [[COPY224]](s32)
+    ; CHECK: [[COPY226:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CHECK: [[COPY227:%[0-9]+]]:_(s32) = COPY [[LSHR93]](s32)
+    ; CHECK: [[AND105:%[0-9]+]]:_(s32) = G_AND [[COPY227]], [[C7]]
+    ; CHECK: [[LSHR109:%[0-9]+]]:_(s32) = G_LSHR [[AND105]], [[COPY226]](s32)
+    ; CHECK: [[COPY228:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CHECK: [[COPY229:%[0-9]+]]:_(s32) = COPY [[LSHR93]](s32)
+    ; CHECK: [[AND106:%[0-9]+]]:_(s32) = G_AND [[COPY229]], [[C7]]
+    ; CHECK: [[LSHR110:%[0-9]+]]:_(s32) = G_LSHR [[AND106]], [[COPY228]](s32)
+    ; CHECK: [[COPY230:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CHECK: [[COPY231:%[0-9]+]]:_(s32) = COPY [[LSHR93]](s32)
+    ; CHECK: [[AND107:%[0-9]+]]:_(s32) = G_AND [[COPY231]], [[C7]]
+    ; CHECK: [[LSHR111:%[0-9]+]]:_(s32) = G_LSHR [[AND107]], [[COPY230]](s32)
+    ; CHECK: [[COPY232:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CHECK: [[COPY233:%[0-9]+]]:_(s32) = COPY [[LSHR93]](s32)
+    ; CHECK: [[AND108:%[0-9]+]]:_(s32) = G_AND [[COPY233]], [[C7]]
+    ; CHECK: [[LSHR112:%[0-9]+]]:_(s32) = G_LSHR [[AND108]], [[COPY232]](s32)
+    ; CHECK: [[COPY234:%[0-9]+]]:_(s32) = COPY [[C9]](s32)
+    ; CHECK: [[COPY235:%[0-9]+]]:_(s32) = COPY [[LSHR93]](s32)
+    ; CHECK: [[AND109:%[0-9]+]]:_(s32) = G_AND [[COPY235]], [[C7]]
+    ; CHECK: [[LSHR113:%[0-9]+]]:_(s32) = G_LSHR [[AND109]], [[COPY234]](s32)
+    ; CHECK: [[COPY236:%[0-9]+]]:_(s32) = COPY [[C10]](s32)
+    ; CHECK: [[COPY237:%[0-9]+]]:_(s32) = COPY [[LSHR93]](s32)
+    ; CHECK: [[AND110:%[0-9]+]]:_(s32) = G_AND [[COPY237]], [[C7]]
+    ; CHECK: [[LSHR114:%[0-9]+]]:_(s32) = G_LSHR [[AND110]], [[COPY236]](s32)
+    ; CHECK: [[COPY238:%[0-9]+]]:_(s32) = COPY [[C11]](s32)
+    ; CHECK: [[COPY239:%[0-9]+]]:_(s32) = COPY [[LSHR93]](s32)
+    ; CHECK: [[AND111:%[0-9]+]]:_(s32) = G_AND [[COPY239]], [[C7]]
+    ; CHECK: [[LSHR115:%[0-9]+]]:_(s32) = G_LSHR [[AND111]], [[COPY238]](s32)
+    ; CHECK: [[COPY240:%[0-9]+]]:_(s32) = COPY [[C12]](s32)
+    ; CHECK: [[COPY241:%[0-9]+]]:_(s32) = COPY [[LSHR93]](s32)
+    ; CHECK: [[AND112:%[0-9]+]]:_(s32) = G_AND [[COPY241]], [[C7]]
+    ; CHECK: [[LSHR116:%[0-9]+]]:_(s32) = G_LSHR [[AND112]], [[COPY240]](s32)
+    ; CHECK: [[COPY242:%[0-9]+]]:_(s32) = COPY [[C13]](s32)
+    ; CHECK: [[COPY243:%[0-9]+]]:_(s32) = COPY [[LSHR93]](s32)
+    ; CHECK: [[AND113:%[0-9]+]]:_(s32) = G_AND [[COPY243]], [[C7]]
+    ; CHECK: [[LSHR117:%[0-9]+]]:_(s32) = G_LSHR [[AND113]], [[COPY242]](s32)
+    ; CHECK: [[COPY244:%[0-9]+]]:_(s32) = COPY [[C14]](s32)
+    ; CHECK: [[COPY245:%[0-9]+]]:_(s32) = COPY [[LSHR93]](s32)
+    ; CHECK: [[AND114:%[0-9]+]]:_(s32) = G_AND [[COPY245]], [[C7]]
+    ; CHECK: [[LSHR118:%[0-9]+]]:_(s32) = G_LSHR [[AND114]], [[COPY244]](s32)
+    ; CHECK: [[COPY246:%[0-9]+]]:_(s32) = COPY [[C15]](s32)
+    ; CHECK: [[COPY247:%[0-9]+]]:_(s32) = COPY [[LSHR93]](s32)
+    ; CHECK: [[AND115:%[0-9]+]]:_(s32) = G_AND [[COPY247]], [[C7]]
+    ; CHECK: [[LSHR119:%[0-9]+]]:_(s32) = G_LSHR [[AND115]], [[COPY246]](s32)
+    ; CHECK: [[COPY248:%[0-9]+]]:_(s32) = COPY [[C16]](s32)
+    ; CHECK: [[COPY249:%[0-9]+]]:_(s32) = COPY [[LSHR93]](s32)
+    ; CHECK: [[AND116:%[0-9]+]]:_(s32) = G_AND [[COPY249]], [[C7]]
+    ; CHECK: [[LSHR120:%[0-9]+]]:_(s32) = G_LSHR [[AND116]], [[COPY248]](s32)
+    ; CHECK: [[COPY250:%[0-9]+]]:_(s32) = COPY [[C17]](s32)
+    ; CHECK: [[COPY251:%[0-9]+]]:_(s32) = COPY [[LSHR93]](s32)
+    ; CHECK: [[AND117:%[0-9]+]]:_(s32) = G_AND [[COPY251]], [[C7]]
+    ; CHECK: [[LSHR121:%[0-9]+]]:_(s32) = G_LSHR [[AND117]], [[COPY250]](s32)
+    ; CHECK: [[COPY252:%[0-9]+]]:_(s32) = COPY [[C18]](s32)
+    ; CHECK: [[COPY253:%[0-9]+]]:_(s32) = COPY [[LSHR93]](s32)
+    ; CHECK: [[AND118:%[0-9]+]]:_(s32) = G_AND [[COPY253]], [[C7]]
+    ; CHECK: [[LSHR122:%[0-9]+]]:_(s32) = G_LSHR [[AND118]], [[COPY252]](s32)
+    ; CHECK: [[COPY254:%[0-9]+]]:_(s32) = COPY [[C19]](s32)
+    ; CHECK: [[COPY255:%[0-9]+]]:_(s32) = COPY [[LSHR93]](s32)
+    ; CHECK: [[AND119:%[0-9]+]]:_(s32) = G_AND [[COPY255]], [[C7]]
+    ; CHECK: [[LSHR123:%[0-9]+]]:_(s32) = G_LSHR [[AND119]], [[COPY254]](s32)
+    ; CHECK: [[COPY256:%[0-9]+]]:_(s16) = COPY [[TRUNC1]](s16)
+    ; CHECK: [[AND120:%[0-9]+]]:_(s16) = G_AND [[COPY256]], [[C6]]
+    ; CHECK: [[COPY257:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CHECK: [[COPY258:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; CHECK: [[AND121:%[0-9]+]]:_(s32) = G_AND [[COPY258]], [[C1]]
+    ; CHECK: [[SHL8:%[0-9]+]]:_(s32) = G_SHL [[AND121]], [[COPY257]](s32)
+    ; CHECK: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[SHL8]](s32)
+    ; CHECK: [[OR8:%[0-9]+]]:_(s16) = G_OR [[AND120]], [[TRUNC9]]
+    ; CHECK: [[COPY259:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CHECK: [[COPY260:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; CHECK: [[AND122:%[0-9]+]]:_(s32) = G_AND [[COPY260]], [[C1]]
+    ; CHECK: [[SHL9:%[0-9]+]]:_(s32) = G_SHL [[AND122]], [[COPY259]](s32)
+    ; CHECK: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[SHL9]](s32)
+    ; CHECK: [[OR9:%[0-9]+]]:_(s16) = G_OR [[OR8]], [[TRUNC10]]
+    ; CHECK: [[COPY261:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CHECK: [[COPY262:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
+    ; CHECK: [[AND123:%[0-9]+]]:_(s32) = G_AND [[COPY262]], [[C1]]
+    ; CHECK: [[SHL10:%[0-9]+]]:_(s32) = G_SHL [[AND123]], [[COPY261]](s32)
+    ; CHECK: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[SHL10]](s32)
+    ; CHECK: [[OR10:%[0-9]+]]:_(s16) = G_OR [[OR9]], [[TRUNC11]]
+    ; CHECK: [[COPY263:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CHECK: [[COPY264:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; CHECK: [[AND124:%[0-9]+]]:_(s32) = G_AND [[COPY264]], [[C1]]
+    ; CHECK: [[SHL11:%[0-9]+]]:_(s32) = G_SHL [[AND124]], [[COPY263]](s32)
+    ; CHECK: [[TRUNC12:%[0-9]+]]:_(s16) = G_TRUNC [[SHL11]](s32)
+    ; CHECK: [[OR11:%[0-9]+]]:_(s16) = G_OR [[OR10]], [[TRUNC12]]
+    ; CHECK: [[COPY265:%[0-9]+]]:_(s32) = COPY [[C9]](s32)
+    ; CHECK: [[COPY266:%[0-9]+]]:_(s32) = COPY [[LSHR5]](s32)
+    ; CHECK: [[AND125:%[0-9]+]]:_(s32) = G_AND [[COPY266]], [[C1]]
+    ; CHECK: [[SHL12:%[0-9]+]]:_(s32) = G_SHL [[AND125]], [[COPY265]](s32)
+    ; CHECK: [[TRUNC13:%[0-9]+]]:_(s16) = G_TRUNC [[SHL12]](s32)
+    ; CHECK: [[OR12:%[0-9]+]]:_(s16) = G_OR [[OR11]], [[TRUNC13]]
+    ; CHECK: [[COPY267:%[0-9]+]]:_(s32) = COPY [[C10]](s32)
+    ; CHECK: [[COPY268:%[0-9]+]]:_(s32) = COPY [[LSHR6]](s32)
+    ; CHECK: [[AND126:%[0-9]+]]:_(s32) = G_AND [[COPY268]], [[C1]]
+    ; CHECK: [[SHL13:%[0-9]+]]:_(s32) = G_SHL [[AND126]], [[COPY267]](s32)
+    ; CHECK: [[TRUNC14:%[0-9]+]]:_(s16) = G_TRUNC [[SHL13]](s32)
+    ; CHECK: [[OR13:%[0-9]+]]:_(s16) = G_OR [[OR12]], [[TRUNC14]]
+    ; CHECK: [[COPY269:%[0-9]+]]:_(s32) = COPY [[C11]](s32)
+    ; CHECK: [[COPY270:%[0-9]+]]:_(s32) = COPY [[LSHR7]](s32)
+    ; CHECK: [[AND127:%[0-9]+]]:_(s32) = G_AND [[COPY270]], [[C1]]
+    ; CHECK: [[SHL14:%[0-9]+]]:_(s32) = G_SHL [[AND127]], [[COPY269]](s32)
+    ; CHECK: [[TRUNC15:%[0-9]+]]:_(s16) = G_TRUNC [[SHL14]](s32)
+    ; CHECK: [[OR14:%[0-9]+]]:_(s16) = G_OR [[OR13]], [[TRUNC15]]
+    ; CHECK: [[COPY271:%[0-9]+]]:_(s32) = COPY [[C12]](s32)
+    ; CHECK: [[COPY272:%[0-9]+]]:_(s32) = COPY [[LSHR8]](s32)
+    ; CHECK: [[AND128:%[0-9]+]]:_(s32) = G_AND [[COPY272]], [[C1]]
+    ; CHECK: [[SHL15:%[0-9]+]]:_(s32) = G_SHL [[AND128]], [[COPY271]](s32)
+    ; CHECK: [[TRUNC16:%[0-9]+]]:_(s16) = G_TRUNC [[SHL15]](s32)
+    ; CHECK: [[OR15:%[0-9]+]]:_(s16) = G_OR [[OR14]], [[TRUNC16]]
+    ; CHECK: [[COPY273:%[0-9]+]]:_(s32) = COPY [[C13]](s32)
+    ; CHECK: [[COPY274:%[0-9]+]]:_(s32) = COPY [[LSHR9]](s32)
+    ; CHECK: [[AND129:%[0-9]+]]:_(s32) = G_AND [[COPY274]], [[C1]]
+    ; CHECK: [[SHL16:%[0-9]+]]:_(s32) = G_SHL [[AND129]], [[COPY273]](s32)
+    ; CHECK: [[TRUNC17:%[0-9]+]]:_(s16) = G_TRUNC [[SHL16]](s32)
+    ; CHECK: [[OR16:%[0-9]+]]:_(s16) = G_OR [[OR15]], [[TRUNC17]]
+    ; CHECK: [[COPY275:%[0-9]+]]:_(s32) = COPY [[C14]](s32)
+    ; CHECK: [[COPY276:%[0-9]+]]:_(s32) = COPY [[LSHR10]](s32)
+    ; CHECK: [[AND130:%[0-9]+]]:_(s32) = G_AND [[COPY276]], [[C1]]
+    ; CHECK: [[SHL17:%[0-9]+]]:_(s32) = G_SHL [[AND130]], [[COPY275]](s32)
+    ; CHECK: [[TRUNC18:%[0-9]+]]:_(s16) = G_TRUNC [[SHL17]](s32)
+    ; CHECK: [[OR17:%[0-9]+]]:_(s16) = G_OR [[OR16]], [[TRUNC18]]
+    ; CHECK: [[COPY277:%[0-9]+]]:_(s32) = COPY [[C15]](s32)
+    ; CHECK: [[COPY278:%[0-9]+]]:_(s32) = COPY [[LSHR11]](s32)
+    ; CHECK: [[AND131:%[0-9]+]]:_(s32) = G_AND [[COPY278]], [[C1]]
+    ; CHECK: [[SHL18:%[0-9]+]]:_(s32) = G_SHL [[AND131]], [[COPY277]](s32)
+    ; CHECK: [[TRUNC19:%[0-9]+]]:_(s16) = G_TRUNC [[SHL18]](s32)
+    ; CHECK: [[OR18:%[0-9]+]]:_(s16) = G_OR [[OR17]], [[TRUNC19]]
+    ; CHECK: [[COPY279:%[0-9]+]]:_(s32) = COPY [[C16]](s32)
+    ; CHECK: [[COPY280:%[0-9]+]]:_(s32) = COPY [[LSHR12]](s32)
+    ; CHECK: [[AND132:%[0-9]+]]:_(s32) = G_AND [[COPY280]], [[C1]]
+    ; CHECK: [[SHL19:%[0-9]+]]:_(s32) = G_SHL [[AND132]], [[COPY279]](s32)
+    ; CHECK: [[TRUNC20:%[0-9]+]]:_(s16) = G_TRUNC [[SHL19]](s32)
+    ; CHECK: [[OR19:%[0-9]+]]:_(s16) = G_OR [[OR18]], [[TRUNC20]]
+    ; CHECK: [[COPY281:%[0-9]+]]:_(s32) = COPY [[C17]](s32)
+    ; CHECK: [[COPY282:%[0-9]+]]:_(s32) = COPY [[LSHR13]](s32)
+    ; CHECK: [[AND133:%[0-9]+]]:_(s32) = G_AND [[COPY282]], [[C1]]
+    ; CHECK: [[SHL20:%[0-9]+]]:_(s32) = G_SHL [[AND133]], [[COPY281]](s32)
+    ; CHECK: [[TRUNC21:%[0-9]+]]:_(s16) = G_TRUNC [[SHL20]](s32)
+    ; CHECK: [[OR20:%[0-9]+]]:_(s16) = G_OR [[OR19]], [[TRUNC21]]
+    ; CHECK: [[COPY283:%[0-9]+]]:_(s32) = COPY [[C18]](s32)
+    ; CHECK: [[COPY284:%[0-9]+]]:_(s32) = COPY [[LSHR14]](s32)
+    ; CHECK: [[AND134:%[0-9]+]]:_(s32) = G_AND [[COPY284]], [[C1]]
+    ; CHECK: [[SHL21:%[0-9]+]]:_(s32) = G_SHL [[AND134]], [[COPY283]](s32)
+    ; CHECK: [[TRUNC22:%[0-9]+]]:_(s16) = G_TRUNC [[SHL21]](s32)
+    ; CHECK: [[OR21:%[0-9]+]]:_(s16) = G_OR [[OR20]], [[TRUNC22]]
+    ; CHECK: [[COPY285:%[0-9]+]]:_(s32) = COPY [[C19]](s32)
+    ; CHECK: [[COPY286:%[0-9]+]]:_(s32) = COPY [[LSHR15]](s32)
+    ; CHECK: [[AND135:%[0-9]+]]:_(s32) = G_AND [[COPY286]], [[C1]]
+    ; CHECK: [[SHL22:%[0-9]+]]:_(s32) = G_SHL [[AND135]], [[COPY285]](s32)
+    ; CHECK: [[TRUNC23:%[0-9]+]]:_(s16) = G_TRUNC [[SHL22]](s32)
+    ; CHECK: [[OR22:%[0-9]+]]:_(s16) = G_OR [[OR21]], [[TRUNC23]]
+    ; CHECK: [[COPY287:%[0-9]+]]:_(s16) = COPY [[TRUNC2]](s16)
+    ; CHECK: [[AND136:%[0-9]+]]:_(s16) = G_AND [[COPY287]], [[C6]]
+    ; CHECK: [[COPY288:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CHECK: [[COPY289:%[0-9]+]]:_(s32) = COPY [[UV17]](s32)
+    ; CHECK: [[AND137:%[0-9]+]]:_(s32) = G_AND [[COPY289]], [[C1]]
+    ; CHECK: [[SHL23:%[0-9]+]]:_(s32) = G_SHL [[AND137]], [[COPY288]](s32)
+    ; CHECK: [[TRUNC24:%[0-9]+]]:_(s16) = G_TRUNC [[SHL23]](s32)
+    ; CHECK: [[OR23:%[0-9]+]]:_(s16) = G_OR [[AND136]], [[TRUNC24]]
+    ; CHECK: [[COPY290:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CHECK: [[COPY291:%[0-9]+]]:_(s32) = COPY [[LSHR32]](s32)
+    ; CHECK: [[AND138:%[0-9]+]]:_(s32) = G_AND [[COPY291]], [[C1]]
+    ; CHECK: [[SHL24:%[0-9]+]]:_(s32) = G_SHL [[AND138]], [[COPY290]](s32)
+    ; CHECK: [[TRUNC25:%[0-9]+]]:_(s16) = G_TRUNC [[SHL24]](s32)
+    ; CHECK: [[OR24:%[0-9]+]]:_(s16) = G_OR [[OR23]], [[TRUNC25]]
+    ; CHECK: [[COPY292:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CHECK: [[COPY293:%[0-9]+]]:_(s32) = COPY [[LSHR33]](s32)
+    ; CHECK: [[AND139:%[0-9]+]]:_(s32) = G_AND [[COPY293]], [[C1]]
+    ; CHECK: [[SHL25:%[0-9]+]]:_(s32) = G_SHL [[AND139]], [[COPY292]](s32)
+    ; CHECK: [[TRUNC26:%[0-9]+]]:_(s16) = G_TRUNC [[SHL25]](s32)
+    ; CHECK: [[OR25:%[0-9]+]]:_(s16) = G_OR [[OR24]], [[TRUNC26]]
+    ; CHECK: [[COPY294:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CHECK: [[COPY295:%[0-9]+]]:_(s32) = COPY [[LSHR34]](s32)
+    ; CHECK: [[AND140:%[0-9]+]]:_(s32) = G_AND [[COPY295]], [[C1]]
+    ; CHECK: [[SHL26:%[0-9]+]]:_(s32) = G_SHL [[AND140]], [[COPY294]](s32)
+    ; CHECK: [[TRUNC27:%[0-9]+]]:_(s16) = G_TRUNC [[SHL26]](s32)
+    ; CHECK: [[OR26:%[0-9]+]]:_(s16) = G_OR [[OR25]], [[TRUNC27]]
+    ; CHECK: [[COPY296:%[0-9]+]]:_(s32) = COPY [[C9]](s32)
+    ; CHECK: [[COPY297:%[0-9]+]]:_(s32) = COPY [[LSHR35]](s32)
+    ; CHECK: [[AND141:%[0-9]+]]:_(s32) = G_AND [[COPY297]], [[C1]]
+    ; CHECK: [[SHL27:%[0-9]+]]:_(s32) = G_SHL [[AND141]], [[COPY296]](s32)
+    ; CHECK: [[TRUNC28:%[0-9]+]]:_(s16) = G_TRUNC [[SHL27]](s32)
+    ; CHECK: [[OR27:%[0-9]+]]:_(s16) = G_OR [[OR26]], [[TRUNC28]]
+    ; CHECK: [[COPY298:%[0-9]+]]:_(s32) = COPY [[C10]](s32)
+    ; CHECK: [[COPY299:%[0-9]+]]:_(s32) = COPY [[LSHR36]](s32)
+    ; CHECK: [[AND142:%[0-9]+]]:_(s32) = G_AND [[COPY299]], [[C1]]
+    ; CHECK: [[SHL28:%[0-9]+]]:_(s32) = G_SHL [[AND142]], [[COPY298]](s32)
+    ; CHECK: [[TRUNC29:%[0-9]+]]:_(s16) = G_TRUNC [[SHL28]](s32)
+    ; CHECK: [[OR28:%[0-9]+]]:_(s16) = G_OR [[OR27]], [[TRUNC29]]
+    ; CHECK: [[COPY300:%[0-9]+]]:_(s32) = COPY [[C11]](s32)
+    ; CHECK: [[COPY301:%[0-9]+]]:_(s32) = COPY [[LSHR37]](s32)
+    ; CHECK: [[AND143:%[0-9]+]]:_(s32) = G_AND [[COPY301]], [[C1]]
+    ; CHECK: [[SHL29:%[0-9]+]]:_(s32) = G_SHL [[AND143]], [[COPY300]](s32)
+    ; CHECK: [[TRUNC30:%[0-9]+]]:_(s16) = G_TRUNC [[SHL29]](s32)
+    ; CHECK: [[OR29:%[0-9]+]]:_(s16) = G_OR [[OR28]], [[TRUNC30]]
+    ; CHECK: [[COPY302:%[0-9]+]]:_(s32) = COPY [[C12]](s32)
+    ; CHECK: [[COPY303:%[0-9]+]]:_(s32) = COPY [[LSHR38]](s32)
+    ; CHECK: [[AND144:%[0-9]+]]:_(s32) = G_AND [[COPY303]], [[C1]]
+    ; CHECK: [[SHL30:%[0-9]+]]:_(s32) = G_SHL [[AND144]], [[COPY302]](s32)
+    ; CHECK: [[TRUNC31:%[0-9]+]]:_(s16) = G_TRUNC [[SHL30]](s32)
+    ; CHECK: [[OR30:%[0-9]+]]:_(s16) = G_OR [[OR29]], [[TRUNC31]]
+    ; CHECK: [[COPY304:%[0-9]+]]:_(s32) = COPY [[C13]](s32)
+    ; CHECK: [[COPY305:%[0-9]+]]:_(s32) = COPY [[LSHR39]](s32)
+    ; CHECK: [[AND145:%[0-9]+]]:_(s32) = G_AND [[COPY305]], [[C1]]
+    ; CHECK: [[SHL31:%[0-9]+]]:_(s32) = G_SHL [[AND145]], [[COPY304]](s32)
+    ; CHECK: [[TRUNC32:%[0-9]+]]:_(s16) = G_TRUNC [[SHL31]](s32)
+    ; CHECK: [[OR31:%[0-9]+]]:_(s16) = G_OR [[OR30]], [[TRUNC32]]
+    ; CHECK: [[COPY306:%[0-9]+]]:_(s32) = COPY [[C14]](s32)
+    ; CHECK: [[COPY307:%[0-9]+]]:_(s32) = COPY [[LSHR40]](s32)
+    ; CHECK: [[AND146:%[0-9]+]]:_(s32) = G_AND [[COPY307]], [[C1]]
+    ; CHECK: [[SHL32:%[0-9]+]]:_(s32) = G_SHL [[AND146]], [[COPY306]](s32)
+    ; CHECK: [[TRUNC33:%[0-9]+]]:_(s16) = G_TRUNC [[SHL32]](s32)
+    ; CHECK: [[OR32:%[0-9]+]]:_(s16) = G_OR [[OR31]], [[TRUNC33]]
+    ; CHECK: [[COPY308:%[0-9]+]]:_(s32) = COPY [[C15]](s32)
+    ; CHECK: [[COPY309:%[0-9]+]]:_(s32) = COPY [[LSHR41]](s32)
+    ; CHECK: [[AND147:%[0-9]+]]:_(s32) = G_AND [[COPY309]], [[C1]]
+    ; CHECK: [[SHL33:%[0-9]+]]:_(s32) = G_SHL [[AND147]], [[COPY308]](s32)
+    ; CHECK: [[TRUNC34:%[0-9]+]]:_(s16) = G_TRUNC [[SHL33]](s32)
+    ; CHECK: [[OR33:%[0-9]+]]:_(s16) = G_OR [[OR32]], [[TRUNC34]]
+    ; CHECK: [[COPY310:%[0-9]+]]:_(s32) = COPY [[C16]](s32)
+    ; CHECK: [[COPY311:%[0-9]+]]:_(s32) = COPY [[LSHR42]](s32)
+    ; CHECK: [[AND148:%[0-9]+]]:_(s32) = G_AND [[COPY311]], [[C1]]
+    ; CHECK: [[SHL34:%[0-9]+]]:_(s32) = G_SHL [[AND148]], [[COPY310]](s32)
+    ; CHECK: [[TRUNC35:%[0-9]+]]:_(s16) = G_TRUNC [[SHL34]](s32)
+    ; CHECK: [[OR34:%[0-9]+]]:_(s16) = G_OR [[OR33]], [[TRUNC35]]
+    ; CHECK: [[COPY312:%[0-9]+]]:_(s32) = COPY [[C17]](s32)
+    ; CHECK: [[COPY313:%[0-9]+]]:_(s32) = COPY [[LSHR43]](s32)
+    ; CHECK: [[AND149:%[0-9]+]]:_(s32) = G_AND [[COPY313]], [[C1]]
+    ; CHECK: [[SHL35:%[0-9]+]]:_(s32) = G_SHL [[AND149]], [[COPY312]](s32)
+    ; CHECK: [[TRUNC36:%[0-9]+]]:_(s16) = G_TRUNC [[SHL35]](s32)
+    ; CHECK: [[OR35:%[0-9]+]]:_(s16) = G_OR [[OR34]], [[TRUNC36]]
+    ; CHECK: [[COPY314:%[0-9]+]]:_(s32) = COPY [[C18]](s32)
+    ; CHECK: [[COPY315:%[0-9]+]]:_(s32) = COPY [[LSHR44]](s32)
+    ; CHECK: [[AND150:%[0-9]+]]:_(s32) = G_AND [[COPY315]], [[C1]]
+    ; CHECK: [[SHL36:%[0-9]+]]:_(s32) = G_SHL [[AND150]], [[COPY314]](s32)
+    ; CHECK: [[TRUNC37:%[0-9]+]]:_(s16) = G_TRUNC [[SHL36]](s32)
+    ; CHECK: [[OR36:%[0-9]+]]:_(s16) = G_OR [[OR35]], [[TRUNC37]]
+    ; CHECK: [[COPY316:%[0-9]+]]:_(s32) = COPY [[C19]](s32)
+    ; CHECK: [[COPY317:%[0-9]+]]:_(s32) = COPY [[LSHR45]](s32)
+    ; CHECK: [[AND151:%[0-9]+]]:_(s32) = G_AND [[COPY317]], [[C1]]
+    ; CHECK: [[SHL37:%[0-9]+]]:_(s32) = G_SHL [[AND151]], [[COPY316]](s32)
+    ; CHECK: [[TRUNC38:%[0-9]+]]:_(s16) = G_TRUNC [[SHL37]](s32)
+    ; CHECK: [[OR37:%[0-9]+]]:_(s16) = G_OR [[OR36]], [[TRUNC38]]
+    ; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[OR22]](s16)
+    ; CHECK: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[OR37]](s16)
+    ; CHECK: [[SHL38:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C5]](s32)
+    ; CHECK: [[OR38:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL38]]
+    ; CHECK: [[COPY318:%[0-9]+]]:_(s16) = COPY [[TRUNC4]](s16)
+    ; CHECK: [[AND152:%[0-9]+]]:_(s16) = G_AND [[COPY318]], [[C6]]
+    ; CHECK: [[COPY319:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CHECK: [[COPY320:%[0-9]+]]:_(s32) = COPY [[LSHR31]](s32)
+    ; CHECK: [[AND153:%[0-9]+]]:_(s32) = G_AND [[COPY320]], [[C1]]
+    ; CHECK: [[SHL39:%[0-9]+]]:_(s32) = G_SHL [[AND153]], [[COPY319]](s32)
+    ; CHECK: [[TRUNC39:%[0-9]+]]:_(s16) = G_TRUNC [[SHL39]](s32)
+    ; CHECK: [[OR39:%[0-9]+]]:_(s16) = G_OR [[AND152]], [[TRUNC39]]
+    ; CHECK: [[COPY321:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CHECK: [[COPY322:%[0-9]+]]:_(s32) = COPY [[UV34]](s32)
+    ; CHECK: [[AND154:%[0-9]+]]:_(s32) = G_AND [[COPY322]], [[C1]]
+    ; CHECK: [[SHL40:%[0-9]+]]:_(s32) = G_SHL [[AND154]], [[COPY321]](s32)
+    ; CHECK: [[TRUNC40:%[0-9]+]]:_(s16) = G_TRUNC [[SHL40]](s32)
+    ; CHECK: [[OR40:%[0-9]+]]:_(s16) = G_OR [[OR39]], [[TRUNC40]]
+    ; CHECK: [[COPY323:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CHECK: [[COPY324:%[0-9]+]]:_(s32) = COPY [[LSHR63]](s32)
+    ; CHECK: [[AND155:%[0-9]+]]:_(s32) = G_AND [[COPY324]], [[C1]]
+    ; CHECK: [[SHL41:%[0-9]+]]:_(s32) = G_SHL [[AND155]], [[COPY323]](s32)
+    ; CHECK: [[TRUNC41:%[0-9]+]]:_(s16) = G_TRUNC [[SHL41]](s32)
+    ; CHECK: [[OR41:%[0-9]+]]:_(s16) = G_OR [[OR40]], [[TRUNC41]]
+    ; CHECK: [[COPY325:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CHECK: [[COPY326:%[0-9]+]]:_(s32) = COPY [[LSHR64]](s32)
+    ; CHECK: [[AND156:%[0-9]+]]:_(s32) = G_AND [[COPY326]], [[C1]]
+    ; CHECK: [[SHL42:%[0-9]+]]:_(s32) = G_SHL [[AND156]], [[COPY325]](s32)
+    ; CHECK: [[TRUNC42:%[0-9]+]]:_(s16) = G_TRUNC [[SHL42]](s32)
+    ; CHECK: [[OR42:%[0-9]+]]:_(s16) = G_OR [[OR41]], [[TRUNC42]]
+    ; CHECK: [[COPY327:%[0-9]+]]:_(s32) = COPY [[C9]](s32)
+    ; CHECK: [[COPY328:%[0-9]+]]:_(s32) = COPY [[LSHR65]](s32)
+    ; CHECK: [[AND157:%[0-9]+]]:_(s32) = G_AND [[COPY328]], [[C1]]
+    ; CHECK: [[SHL43:%[0-9]+]]:_(s32) = G_SHL [[AND157]], [[COPY327]](s32)
+    ; CHECK: [[TRUNC43:%[0-9]+]]:_(s16) = G_TRUNC [[SHL43]](s32)
+    ; CHECK: [[OR43:%[0-9]+]]:_(s16) = G_OR [[OR42]], [[TRUNC43]]
+    ; CHECK: [[COPY329:%[0-9]+]]:_(s32) = COPY [[C10]](s32)
+    ; CHECK: [[COPY330:%[0-9]+]]:_(s32) = COPY [[LSHR66]](s32)
+    ; CHECK: [[AND158:%[0-9]+]]:_(s32) = G_AND [[COPY330]], [[C1]]
+    ; CHECK: [[SHL44:%[0-9]+]]:_(s32) = G_SHL [[AND158]], [[COPY329]](s32)
+    ; CHECK: [[TRUNC44:%[0-9]+]]:_(s16) = G_TRUNC [[SHL44]](s32)
+    ; CHECK: [[OR44:%[0-9]+]]:_(s16) = G_OR [[OR43]], [[TRUNC44]]
+    ; CHECK: [[COPY331:%[0-9]+]]:_(s32) = COPY [[C11]](s32)
+    ; CHECK: [[COPY332:%[0-9]+]]:_(s32) = COPY [[LSHR67]](s32)
+    ; CHECK: [[AND159:%[0-9]+]]:_(s32) = G_AND [[COPY332]], [[C1]]
+    ; CHECK: [[SHL45:%[0-9]+]]:_(s32) = G_SHL [[AND159]], [[COPY331]](s32)
+    ; CHECK: [[TRUNC45:%[0-9]+]]:_(s16) = G_TRUNC [[SHL45]](s32)
+    ; CHECK: [[OR45:%[0-9]+]]:_(s16) = G_OR [[OR44]], [[TRUNC45]]
+    ; CHECK: [[COPY333:%[0-9]+]]:_(s32) = COPY [[C12]](s32)
+    ; CHECK: [[COPY334:%[0-9]+]]:_(s32) = COPY [[LSHR68]](s32)
+    ; CHECK: [[AND160:%[0-9]+]]:_(s32) = G_AND [[COPY334]], [[C1]]
+    ; CHECK: [[SHL46:%[0-9]+]]:_(s32) = G_SHL [[AND160]], [[COPY333]](s32)
+    ; CHECK: [[TRUNC46:%[0-9]+]]:_(s16) = G_TRUNC [[SHL46]](s32)
+    ; CHECK: [[OR46:%[0-9]+]]:_(s16) = G_OR [[OR45]], [[TRUNC46]]
+    ; CHECK: [[COPY335:%[0-9]+]]:_(s32) = COPY [[C13]](s32)
+    ; CHECK: [[COPY336:%[0-9]+]]:_(s32) = COPY [[LSHR69]](s32)
+    ; CHECK: [[AND161:%[0-9]+]]:_(s32) = G_AND [[COPY336]], [[C1]]
+    ; CHECK: [[SHL47:%[0-9]+]]:_(s32) = G_SHL [[AND161]], [[COPY335]](s32)
+    ; CHECK: [[TRUNC47:%[0-9]+]]:_(s16) = G_TRUNC [[SHL47]](s32)
+    ; CHECK: [[OR47:%[0-9]+]]:_(s16) = G_OR [[OR46]], [[TRUNC47]]
+    ; CHECK: [[COPY337:%[0-9]+]]:_(s32) = COPY [[C14]](s32)
+    ; CHECK: [[COPY338:%[0-9]+]]:_(s32) = COPY [[LSHR70]](s32)
+    ; CHECK: [[AND162:%[0-9]+]]:_(s32) = G_AND [[COPY338]], [[C1]]
+    ; CHECK: [[SHL48:%[0-9]+]]:_(s32) = G_SHL [[AND162]], [[COPY337]](s32)
+    ; CHECK: [[TRUNC48:%[0-9]+]]:_(s16) = G_TRUNC [[SHL48]](s32)
+    ; CHECK: [[OR48:%[0-9]+]]:_(s16) = G_OR [[OR47]], [[TRUNC48]]
+    ; CHECK: [[COPY339:%[0-9]+]]:_(s32) = COPY [[C15]](s32)
+    ; CHECK: [[COPY340:%[0-9]+]]:_(s32) = COPY [[LSHR71]](s32)
+    ; CHECK: [[AND163:%[0-9]+]]:_(s32) = G_AND [[COPY340]], [[C1]]
+    ; CHECK: [[SHL49:%[0-9]+]]:_(s32) = G_SHL [[AND163]], [[COPY339]](s32)
+    ; CHECK: [[TRUNC49:%[0-9]+]]:_(s16) = G_TRUNC [[SHL49]](s32)
+    ; CHECK: [[OR49:%[0-9]+]]:_(s16) = G_OR [[OR48]], [[TRUNC49]]
+    ; CHECK: [[COPY341:%[0-9]+]]:_(s32) = COPY [[C16]](s32)
+    ; CHECK: [[COPY342:%[0-9]+]]:_(s32) = COPY [[LSHR72]](s32)
+    ; CHECK: [[AND164:%[0-9]+]]:_(s32) = G_AND [[COPY342]], [[C1]]
+    ; CHECK: [[SHL50:%[0-9]+]]:_(s32) = G_SHL [[AND164]], [[COPY341]](s32)
+    ; CHECK: [[TRUNC50:%[0-9]+]]:_(s16) = G_TRUNC [[SHL50]](s32)
+    ; CHECK: [[OR50:%[0-9]+]]:_(s16) = G_OR [[OR49]], [[TRUNC50]]
+    ; CHECK: [[COPY343:%[0-9]+]]:_(s32) = COPY [[C17]](s32)
+    ; CHECK: [[COPY344:%[0-9]+]]:_(s32) = COPY [[LSHR73]](s32)
+    ; CHECK: [[AND165:%[0-9]+]]:_(s32) = G_AND [[COPY344]], [[C1]]
+    ; CHECK: [[SHL51:%[0-9]+]]:_(s32) = G_SHL [[AND165]], [[COPY343]](s32)
+    ; CHECK: [[TRUNC51:%[0-9]+]]:_(s16) = G_TRUNC [[SHL51]](s32)
+    ; CHECK: [[OR51:%[0-9]+]]:_(s16) = G_OR [[OR50]], [[TRUNC51]]
+    ; CHECK: [[COPY345:%[0-9]+]]:_(s32) = COPY [[C18]](s32)
+    ; CHECK: [[COPY346:%[0-9]+]]:_(s32) = COPY [[LSHR74]](s32)
+    ; CHECK: [[AND166:%[0-9]+]]:_(s32) = G_AND [[COPY346]], [[C1]]
+    ; CHECK: [[SHL52:%[0-9]+]]:_(s32) = G_SHL [[AND166]], [[COPY345]](s32)
+    ; CHECK: [[TRUNC52:%[0-9]+]]:_(s16) = G_TRUNC [[SHL52]](s32)
+    ; CHECK: [[OR52:%[0-9]+]]:_(s16) = G_OR [[OR51]], [[TRUNC52]]
+    ; CHECK: [[COPY347:%[0-9]+]]:_(s32) = COPY [[C19]](s32)
+    ; CHECK: [[COPY348:%[0-9]+]]:_(s32) = COPY [[LSHR75]](s32)
+    ; CHECK: [[AND167:%[0-9]+]]:_(s32) = G_AND [[COPY348]], [[C1]]
+    ; CHECK: [[SHL53:%[0-9]+]]:_(s32) = G_SHL [[AND167]], [[COPY347]](s32)
+    ; CHECK: [[TRUNC53:%[0-9]+]]:_(s16) = G_TRUNC [[SHL53]](s32)
+    ; CHECK: [[OR53:%[0-9]+]]:_(s16) = G_OR [[OR52]], [[TRUNC53]]
+    ; CHECK: [[COPY349:%[0-9]+]]:_(s16) = COPY [[TRUNC6]](s16)
+    ; CHECK: [[AND168:%[0-9]+]]:_(s16) = G_AND [[COPY349]], [[C6]]
+    ; CHECK: [[COPY350:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CHECK: [[COPY351:%[0-9]+]]:_(s32) = COPY [[LSHR77]](s32)
+    ; CHECK: [[AND169:%[0-9]+]]:_(s32) = G_AND [[COPY351]], [[C1]]
+    ; CHECK: [[SHL54:%[0-9]+]]:_(s32) = G_SHL [[AND169]], [[COPY350]](s32)
+    ; CHECK: [[TRUNC54:%[0-9]+]]:_(s16) = G_TRUNC [[SHL54]](s32)
+    ; CHECK: [[OR54:%[0-9]+]]:_(s16) = G_OR [[AND168]], [[TRUNC54]]
+    ; CHECK: [[COPY352:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CHECK: [[COPY353:%[0-9]+]]:_(s32) = COPY [[LSHR62]](s32)
+    ; CHECK: [[AND170:%[0-9]+]]:_(s32) = G_AND [[COPY353]], [[C1]]
+    ; CHECK: [[SHL55:%[0-9]+]]:_(s32) = G_SHL [[AND170]], [[COPY352]](s32)
+    ; CHECK: [[TRUNC55:%[0-9]+]]:_(s16) = G_TRUNC [[SHL55]](s32)
+    ; CHECK: [[OR55:%[0-9]+]]:_(s16) = G_OR [[OR54]], [[TRUNC55]]
+    ; CHECK: [[COPY354:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CHECK: [[COPY355:%[0-9]+]]:_(s32) = COPY [[UV51]](s32)
+    ; CHECK: [[AND171:%[0-9]+]]:_(s32) = G_AND [[COPY355]], [[C1]]
+    ; CHECK: [[SHL56:%[0-9]+]]:_(s32) = G_SHL [[AND171]], [[COPY354]](s32)
+    ; CHECK: [[TRUNC56:%[0-9]+]]:_(s16) = G_TRUNC [[SHL56]](s32)
+    ; CHECK: [[OR56:%[0-9]+]]:_(s16) = G_OR [[OR55]], [[TRUNC56]]
+    ; CHECK: [[COPY356:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
+    ; CHECK: [[COPY357:%[0-9]+]]:_(s32) = COPY [[LSHR94]](s32)
+    ; CHECK: [[AND172:%[0-9]+]]:_(s32) = G_AND [[COPY357]], [[C1]]
+    ; CHECK: [[SHL57:%[0-9]+]]:_(s32) = G_SHL [[AND172]], [[COPY356]](s32)
+    ; CHECK: [[TRUNC57:%[0-9]+]]:_(s16) = G_TRUNC [[SHL57]](s32)
+    ; CHECK: [[OR57:%[0-9]+]]:_(s16) = G_OR [[OR56]], [[TRUNC57]]
+    ; CHECK: [[COPY358:%[0-9]+]]:_(s32) = COPY [[C9]](s32)
+    ; CHECK: [[COPY359:%[0-9]+]]:_(s32) = COPY [[LSHR95]](s32)
+    ; CHECK: [[AND173:%[0-9]+]]:_(s32) = G_AND [[COPY359]], [[C1]]
+    ; CHECK: [[SHL58:%[0-9]+]]:_(s32) = G_SHL [[AND173]], [[COPY358]](s32)
+    ; CHECK: [[TRUNC58:%[0-9]+]]:_(s16) = G_TRUNC [[SHL58]](s32)
+    ; CHECK: [[OR58:%[0-9]+]]:_(s16) = G_OR [[OR57]], [[TRUNC58]]
+    ; CHECK: [[COPY360:%[0-9]+]]:_(s32) = COPY [[C10]](s32)
+    ; CHECK: [[COPY361:%[0-9]+]]:_(s32) = COPY [[LSHR96]](s32)
+    ; CHECK: [[AND174:%[0-9]+]]:_(s32) = G_AND [[COPY361]], [[C1]]
+    ; CHECK: [[SHL59:%[0-9]+]]:_(s32) = G_SHL [[AND174]], [[COPY360]](s32)
+    ; CHECK: [[TRUNC59:%[0-9]+]]:_(s16) = G_TRUNC [[SHL59]](s32)
+    ; CHECK: [[OR59:%[0-9]+]]:_(s16) = G_OR [[OR58]], [[TRUNC59]]
+    ; CHECK: [[COPY362:%[0-9]+]]:_(s32) = COPY [[C11]](s32)
+    ; CHECK: [[COPY363:%[0-9]+]]:_(s32) = COPY [[LSHR97]](s32)
+    ; CHECK: [[AND175:%[0-9]+]]:_(s32) = G_AND [[COPY363]], [[C1]]
+    ; CHECK: [[SHL60:%[0-9]+]]:_(s32) = G_SHL [[AND175]], [[COPY362]](s32)
+    ; CHECK: [[TRUNC60:%[0-9]+]]:_(s16) = G_TRUNC [[SHL60]](s32)
+    ; CHECK: [[OR60:%[0-9]+]]:_(s16) = G_OR [[OR59]], [[TRUNC60]]
+    ; CHECK: [[COPY364:%[0-9]+]]:_(s32) = COPY [[C12]](s32)
+    ; CHECK: [[COPY365:%[0-9]+]]:_(s32) = COPY [[LSHR98]](s32)
+    ; CHECK: [[AND176:%[0-9]+]]:_(s32) = G_AND [[COPY365]], [[C1]]
+    ; CHECK: [[SHL61:%[0-9]+]]:_(s32) = G_SHL [[AND176]], [[COPY364]](s32)
+    ; CHECK: [[TRUNC61:%[0-9]+]]:_(s16) = G_TRUNC [[SHL61]](s32)
+    ; CHECK: [[OR61:%[0-9]+]]:_(s16) = G_OR [[OR60]], [[TRUNC61]]
+    ; CHECK: [[COPY366:%[0-9]+]]:_(s32) = COPY [[C13]](s32)
+    ; CHECK: [[COPY367:%[0-9]+]]:_(s32) = COPY [[LSHR99]](s32)
+    ; CHECK: [[AND177:%[0-9]+]]:_(s32) = G_AND [[COPY367]], [[C1]]
+    ; CHECK: [[SHL62:%[0-9]+]]:_(s32) = G_SHL [[AND177]], [[COPY366]](s32)
+    ; CHECK: [[TRUNC62:%[0-9]+]]:_(s16) = G_TRUNC [[SHL62]](s32)
+    ; CHECK: [[OR62:%[0-9]+]]:_(s16) = G_OR [[OR61]], [[TRUNC62]]
+    ; CHECK: [[COPY368:%[0-9]+]]:_(s32) = COPY [[C14]](s32)
+    ; CHECK: [[COPY369:%[0-9]+]]:_(s32) = COPY [[LSHR100]](s32)
+    ; CHECK: [[AND178:%[0-9]+]]:_(s32) = G_AND [[COPY369]], [[C1]]
+    ; CHECK: [[SHL63:%[0-9]+]]:_(s32) = G_SHL [[AND178]], [[COPY368]](s32)
+    ; CHECK: [[TRUNC63:%[0-9]+]]:_(s16) = G_TRUNC [[SHL63]](s32)
+    ; CHECK: [[OR63:%[0-9]+]]:_(s16) = G_OR [[OR62]], [[TRUNC63]]
+    ; CHECK: [[COPY370:%[0-9]+]]:_(s32) = COPY [[C15]](s32)
+    ; CHECK: [[COPY371:%[0-9]+]]:_(s32) = COPY [[LSHR101]](s32)
+    ; CHECK: [[AND179:%[0-9]+]]:_(s32) = G_AND [[COPY371]], [[C1]]
+    ; CHECK: [[SHL64:%[0-9]+]]:_(s32) = G_SHL [[AND179]], [[COPY370]](s32)
+    ; CHECK: [[TRUNC64:%[0-9]+]]:_(s16) = G_TRUNC [[SHL64]](s32)
+    ; CHECK: [[OR64:%[0-9]+]]:_(s16) = G_OR [[OR63]], [[TRUNC64]]
+    ; CHECK: [[COPY372:%[0-9]+]]:_(s32) = COPY [[C16]](s32)
+    ; CHECK: [[COPY373:%[0-9]+]]:_(s32) = COPY [[LSHR102]](s32)
+    ; CHECK: [[AND180:%[0-9]+]]:_(s32) = G_AND [[COPY373]], [[C1]]
+    ; CHECK: [[SHL65:%[0-9]+]]:_(s32) = G_SHL [[AND180]], [[COPY372]](s32)
+    ; CHECK: [[TRUNC65:%[0-9]+]]:_(s16) = G_TRUNC [[SHL65]](s32)
+    ; CHECK: [[OR65:%[0-9]+]]:_(s16) = G_OR [[OR64]], [[TRUNC65]]
+    ; CHECK: [[COPY374:%[0-9]+]]:_(s32) = COPY [[C17]](s32)
+    ; CHECK: [[COPY375:%[0-9]+]]:_(s32) = COPY [[LSHR103]](s32)
+    ; CHECK: [[AND181:%[0-9]+]]:_(s32) = G_AND [[COPY375]], [[C1]]
+    ; CHECK: [[SHL66:%[0-9]+]]:_(s32) = G_SHL [[AND181]], [[COPY374]](s32)
+    ; CHECK: [[TRUNC66:%[0-9]+]]:_(s16) = G_TRUNC [[SHL66]](s32)
+    ; CHECK: [[OR66:%[0-9]+]]:_(s16) = G_OR [[OR65]], [[TRUNC66]]
+    ; CHECK: [[COPY376:%[0-9]+]]:_(s32) = COPY [[C18]](s32)
+    ; CHECK: [[COPY377:%[0-9]+]]:_(s32) = COPY [[LSHR104]](s32)
+    ; CHECK: [[AND182:%[0-9]+]]:_(s32) = G_AND [[COPY377]], [[C1]]
+    ; CHECK: [[SHL67:%[0-9]+]]:_(s32) = G_SHL [[AND182]], [[COPY376]](s32)
+    ; CHECK: [[TRUNC67:%[0-9]+]]:_(s16) = G_TRUNC [[SHL67]](s32)
+    ; CHECK: [[OR67:%[0-9]+]]:_(s16) = G_OR [[OR66]], [[TRUNC67]]
+    ; CHECK: [[COPY378:%[0-9]+]]:_(s32) = COPY [[C19]](s32)
+    ; CHECK: [[COPY379:%[0-9]+]]:_(s32) = COPY [[LSHR105]](s32)
+    ; CHECK: [[AND183:%[0-9]+]]:_(s32) = G_AND [[COPY379]], [[C1]]
+    ; CHECK: [[SHL68:%[0-9]+]]:_(s32) = G_SHL [[AND183]], [[COPY378]](s32)
+    ; CHECK: [[TRUNC68:%[0-9]+]]:_(s16) = G_TRUNC [[SHL68]](s32)
+    ; CHECK: [[OR68:%[0-9]+]]:_(s16) = G_OR [[OR67]], [[TRUNC68]]
+    ; CHECK: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[OR53]](s16)
+    ; CHECK: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[OR68]](s16)
+    ; CHECK: [[SHL69:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C5]](s32)
+    ; CHECK: [[OR69:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL69]]
+    ; CHECK: [[COPY380:%[0-9]+]]:_(s16) = COPY [[TRUNC8]](s16)
+    ; CHECK: [[AND184:%[0-9]+]]:_(s16) = G_AND [[COPY380]], [[C6]]
+    ; CHECK: [[COPY381:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CHECK: [[COPY382:%[0-9]+]]:_(s32) = COPY [[LSHR107]](s32)
+    ; CHECK: [[AND185:%[0-9]+]]:_(s32) = G_AND [[COPY382]], [[C1]]
+    ; CHECK: [[SHL70:%[0-9]+]]:_(s32) = G_SHL [[AND185]], [[COPY381]](s32)
+    ; CHECK: [[TRUNC69:%[0-9]+]]:_(s16) = G_TRUNC [[SHL70]](s32)
+    ; CHECK: [[OR70:%[0-9]+]]:_(s16) = G_OR [[AND184]], [[TRUNC69]]
+    ; CHECK: [[COPY383:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CHECK: [[COPY384:%[0-9]+]]:_(s32) = COPY [[LSHR108]](s32)
+    ; CHECK: [[AND186:%[0-9]+]]:_(s32) = G_AND [[COPY384]], [[C1]]
+    ; CHECK: [[SHL71:%[0-9]+]]:_(s32) = G_SHL [[AND186]], [[COPY383]](s32)
+    ; CHECK: [[TRUNC70:%[0-9]+]]:_(s16) = G_TRUNC [[SHL71]](s32)
+    ; CHECK: [[OR71:%[0-9]+]]:_(s16) = G_OR [[OR70]], [[TRUNC70]]
+    ; CHECK: [[COPY385:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CHECK: [[COPY386:%[0-9]+]]:_(s32) = COPY [[LSHR93]](s32)
+    ; CHECK: [[AND187:%[0-9]+]]:_(s32) = G_AND [[COPY386]], [[C1]]
+    ; CHECK: [[SHL72:%[0-9]+]]:_(s32) = G_SHL [[AND187]], [[COPY385]](s32)
+    ; CHECK: [[TRUNC71:%[0-9]+]]:_(s16) = G_TRUNC [[SHL72]](s32)
+    ; CHECK: [[OR72:%[0-9]+]]:_(s16) = G_OR [[OR71]], [[TRUNC71]]
+    ; CHECK: [[COPY387:%[0-9]+]]:_(s32) = COPY [[C8]](s32)
     ; CHECK: [[COPY388:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL72:%[0-9]+]]:_(s32) = G_SHL [[COPY388]], [[COPY387]](s32)
-    ; CHECK: [[TRUNC73:%[0-9]+]]:_(s16) = G_TRUNC [[SHL72]](s32)
-    ; CHECK: [[OR72:%[0-9]+]]:_(s16) = G_OR [[OR71]], [[TRUNC73]]
-    ; CHECK: [[COPY389:%[0-9]+]]:_(s32) = COPY [[C15]](s32)
+    ; CHECK: [[SHL73:%[0-9]+]]:_(s32) = G_SHL [[COPY388]], [[COPY387]](s32)
+    ; CHECK: [[TRUNC72:%[0-9]+]]:_(s16) = G_TRUNC [[SHL73]](s32)
+    ; CHECK: [[OR73:%[0-9]+]]:_(s16) = G_OR [[OR72]], [[TRUNC72]]
+    ; CHECK: [[COPY389:%[0-9]+]]:_(s32) = COPY [[C9]](s32)
     ; CHECK: [[COPY390:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL73:%[0-9]+]]:_(s32) = G_SHL [[COPY390]], [[COPY389]](s32)
-    ; CHECK: [[TRUNC74:%[0-9]+]]:_(s16) = G_TRUNC [[SHL73]](s32)
-    ; CHECK: [[OR73:%[0-9]+]]:_(s16) = G_OR [[OR72]], [[TRUNC74]]
-    ; CHECK: [[COPY391:%[0-9]+]]:_(s32) = COPY [[C16]](s32)
+    ; CHECK: [[SHL74:%[0-9]+]]:_(s32) = G_SHL [[COPY390]], [[COPY389]](s32)
+    ; CHECK: [[TRUNC73:%[0-9]+]]:_(s16) = G_TRUNC [[SHL74]](s32)
+    ; CHECK: [[OR74:%[0-9]+]]:_(s16) = G_OR [[OR73]], [[TRUNC73]]
+    ; CHECK: [[COPY391:%[0-9]+]]:_(s32) = COPY [[C10]](s32)
     ; CHECK: [[COPY392:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL74:%[0-9]+]]:_(s32) = G_SHL [[COPY392]], [[COPY391]](s32)
-    ; CHECK: [[TRUNC75:%[0-9]+]]:_(s16) = G_TRUNC [[SHL74]](s32)
-    ; CHECK: [[OR74:%[0-9]+]]:_(s16) = G_OR [[OR73]], [[TRUNC75]]
-    ; CHECK: [[COPY393:%[0-9]+]]:_(s32) = COPY [[C17]](s32)
+    ; CHECK: [[SHL75:%[0-9]+]]:_(s32) = G_SHL [[COPY392]], [[COPY391]](s32)
+    ; CHECK: [[TRUNC74:%[0-9]+]]:_(s16) = G_TRUNC [[SHL75]](s32)
+    ; CHECK: [[OR75:%[0-9]+]]:_(s16) = G_OR [[OR74]], [[TRUNC74]]
+    ; CHECK: [[COPY393:%[0-9]+]]:_(s32) = COPY [[C11]](s32)
     ; CHECK: [[COPY394:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL75:%[0-9]+]]:_(s32) = G_SHL [[COPY394]], [[COPY393]](s32)
-    ; CHECK: [[TRUNC76:%[0-9]+]]:_(s16) = G_TRUNC [[SHL75]](s32)
-    ; CHECK: [[OR75:%[0-9]+]]:_(s16) = G_OR [[OR74]], [[TRUNC76]]
-    ; CHECK: [[COPY395:%[0-9]+]]:_(s32) = COPY [[C18]](s32)
+    ; CHECK: [[SHL76:%[0-9]+]]:_(s32) = G_SHL [[COPY394]], [[COPY393]](s32)
+    ; CHECK: [[TRUNC75:%[0-9]+]]:_(s16) = G_TRUNC [[SHL76]](s32)
+    ; CHECK: [[OR76:%[0-9]+]]:_(s16) = G_OR [[OR75]], [[TRUNC75]]
+    ; CHECK: [[COPY395:%[0-9]+]]:_(s32) = COPY [[C12]](s32)
     ; CHECK: [[COPY396:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL76:%[0-9]+]]:_(s32) = G_SHL [[COPY396]], [[COPY395]](s32)
-    ; CHECK: [[TRUNC77:%[0-9]+]]:_(s16) = G_TRUNC [[SHL76]](s32)
-    ; CHECK: [[OR76:%[0-9]+]]:_(s16) = G_OR [[OR75]], [[TRUNC77]]
-    ; CHECK: [[COPY397:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
+    ; CHECK: [[SHL77:%[0-9]+]]:_(s32) = G_SHL [[COPY396]], [[COPY395]](s32)
+    ; CHECK: [[TRUNC76:%[0-9]+]]:_(s16) = G_TRUNC [[SHL77]](s32)
+    ; CHECK: [[OR77:%[0-9]+]]:_(s16) = G_OR [[OR76]], [[TRUNC76]]
+    ; CHECK: [[COPY397:%[0-9]+]]:_(s32) = COPY [[C13]](s32)
     ; CHECK: [[COPY398:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL77:%[0-9]+]]:_(s32) = G_SHL [[COPY398]], [[COPY397]](s32)
-    ; CHECK: [[TRUNC78:%[0-9]+]]:_(s16) = G_TRUNC [[SHL77]](s32)
-    ; CHECK: [[OR77:%[0-9]+]]:_(s16) = G_OR [[C4]], [[TRUNC78]]
-    ; CHECK: [[COPY399:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
+    ; CHECK: [[SHL78:%[0-9]+]]:_(s32) = G_SHL [[COPY398]], [[COPY397]](s32)
+    ; CHECK: [[TRUNC77:%[0-9]+]]:_(s16) = G_TRUNC [[SHL78]](s32)
+    ; CHECK: [[OR78:%[0-9]+]]:_(s16) = G_OR [[OR77]], [[TRUNC77]]
+    ; CHECK: [[COPY399:%[0-9]+]]:_(s32) = COPY [[C14]](s32)
     ; CHECK: [[COPY400:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL78:%[0-9]+]]:_(s32) = G_SHL [[COPY400]], [[COPY399]](s32)
-    ; CHECK: [[TRUNC79:%[0-9]+]]:_(s16) = G_TRUNC [[SHL78]](s32)
-    ; CHECK: [[OR78:%[0-9]+]]:_(s16) = G_OR [[OR77]], [[TRUNC79]]
-    ; CHECK: [[COPY401:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CHECK: [[SHL79:%[0-9]+]]:_(s32) = G_SHL [[COPY400]], [[COPY399]](s32)
+    ; CHECK: [[TRUNC78:%[0-9]+]]:_(s16) = G_TRUNC [[SHL79]](s32)
+    ; CHECK: [[OR79:%[0-9]+]]:_(s16) = G_OR [[OR78]], [[TRUNC78]]
+    ; CHECK: [[COPY401:%[0-9]+]]:_(s32) = COPY [[C15]](s32)
     ; CHECK: [[COPY402:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL79:%[0-9]+]]:_(s32) = G_SHL [[COPY402]], [[COPY401]](s32)
-    ; CHECK: [[TRUNC80:%[0-9]+]]:_(s16) = G_TRUNC [[SHL79]](s32)
-    ; CHECK: [[OR79:%[0-9]+]]:_(s16) = G_OR [[OR78]], [[TRUNC80]]
-    ; CHECK: [[COPY403:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL80:%[0-9]+]]:_(s32) = G_SHL [[COPY403]], [[C7]](s32)
-    ; CHECK: [[TRUNC81:%[0-9]+]]:_(s16) = G_TRUNC [[SHL80]](s32)
-    ; CHECK: [[OR80:%[0-9]+]]:_(s16) = G_OR [[OR79]], [[TRUNC81]]
+    ; CHECK: [[SHL80:%[0-9]+]]:_(s32) = G_SHL [[COPY402]], [[COPY401]](s32)
+    ; CHECK: [[TRUNC79:%[0-9]+]]:_(s16) = G_TRUNC [[SHL80]](s32)
+    ; CHECK: [[OR80:%[0-9]+]]:_(s16) = G_OR [[OR79]], [[TRUNC79]]
+    ; CHECK: [[COPY403:%[0-9]+]]:_(s32) = COPY [[C16]](s32)
     ; CHECK: [[COPY404:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL81:%[0-9]+]]:_(s32) = G_SHL [[COPY404]], [[C8]](s32)
-    ; CHECK: [[TRUNC82:%[0-9]+]]:_(s16) = G_TRUNC [[SHL81]](s32)
-    ; CHECK: [[OR81:%[0-9]+]]:_(s16) = G_OR [[OR80]], [[TRUNC82]]
-    ; CHECK: [[COPY405:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL82:%[0-9]+]]:_(s32) = G_SHL [[COPY405]], [[C9]](s32)
-    ; CHECK: [[TRUNC83:%[0-9]+]]:_(s16) = G_TRUNC [[SHL82]](s32)
-    ; CHECK: [[OR82:%[0-9]+]]:_(s16) = G_OR [[OR81]], [[TRUNC83]]
+    ; CHECK: [[SHL81:%[0-9]+]]:_(s32) = G_SHL [[COPY404]], [[COPY403]](s32)
+    ; CHECK: [[TRUNC80:%[0-9]+]]:_(s16) = G_TRUNC [[SHL81]](s32)
+    ; CHECK: [[OR81:%[0-9]+]]:_(s16) = G_OR [[OR80]], [[TRUNC80]]
+    ; CHECK: [[COPY405:%[0-9]+]]:_(s32) = COPY [[C17]](s32)
     ; CHECK: [[COPY406:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL83:%[0-9]+]]:_(s32) = G_SHL [[COPY406]], [[C10]](s32)
-    ; CHECK: [[TRUNC84:%[0-9]+]]:_(s16) = G_TRUNC [[SHL83]](s32)
-    ; CHECK: [[OR83:%[0-9]+]]:_(s16) = G_OR [[OR82]], [[TRUNC84]]
-    ; CHECK: [[COPY407:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL84:%[0-9]+]]:_(s32) = G_SHL [[COPY407]], [[C11]](s32)
-    ; CHECK: [[TRUNC85:%[0-9]+]]:_(s16) = G_TRUNC [[SHL84]](s32)
-    ; CHECK: [[OR84:%[0-9]+]]:_(s16) = G_OR [[OR83]], [[TRUNC85]]
+    ; CHECK: [[SHL82:%[0-9]+]]:_(s32) = G_SHL [[COPY406]], [[COPY405]](s32)
+    ; CHECK: [[TRUNC81:%[0-9]+]]:_(s16) = G_TRUNC [[SHL82]](s32)
+    ; CHECK: [[OR82:%[0-9]+]]:_(s16) = G_OR [[OR81]], [[TRUNC81]]
+    ; CHECK: [[COPY407:%[0-9]+]]:_(s32) = COPY [[C18]](s32)
     ; CHECK: [[COPY408:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL85:%[0-9]+]]:_(s32) = G_SHL [[COPY408]], [[C12]](s32)
-    ; CHECK: [[TRUNC86:%[0-9]+]]:_(s16) = G_TRUNC [[SHL85]](s32)
-    ; CHECK: [[OR85:%[0-9]+]]:_(s16) = G_OR [[OR84]], [[TRUNC86]]
-    ; CHECK: [[COPY409:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL86:%[0-9]+]]:_(s32) = G_SHL [[COPY409]], [[C13]](s32)
-    ; CHECK: [[TRUNC87:%[0-9]+]]:_(s16) = G_TRUNC [[SHL86]](s32)
-    ; CHECK: [[OR86:%[0-9]+]]:_(s16) = G_OR [[OR85]], [[TRUNC87]]
+    ; CHECK: [[SHL83:%[0-9]+]]:_(s32) = G_SHL [[COPY408]], [[COPY407]](s32)
+    ; CHECK: [[TRUNC82:%[0-9]+]]:_(s16) = G_TRUNC [[SHL83]](s32)
+    ; CHECK: [[OR83:%[0-9]+]]:_(s16) = G_OR [[OR82]], [[TRUNC82]]
+    ; CHECK: [[COPY409:%[0-9]+]]:_(s32) = COPY [[C19]](s32)
     ; CHECK: [[COPY410:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL87:%[0-9]+]]:_(s32) = G_SHL [[COPY410]], [[C14]](s32)
-    ; CHECK: [[TRUNC88:%[0-9]+]]:_(s16) = G_TRUNC [[SHL87]](s32)
-    ; CHECK: [[OR87:%[0-9]+]]:_(s16) = G_OR [[OR86]], [[TRUNC88]]
-    ; CHECK: [[COPY411:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL88:%[0-9]+]]:_(s32) = G_SHL [[COPY411]], [[C15]](s32)
-    ; CHECK: [[TRUNC89:%[0-9]+]]:_(s16) = G_TRUNC [[SHL88]](s32)
-    ; CHECK: [[OR88:%[0-9]+]]:_(s16) = G_OR [[OR87]], [[TRUNC89]]
+    ; CHECK: [[SHL84:%[0-9]+]]:_(s32) = G_SHL [[COPY410]], [[COPY409]](s32)
+    ; CHECK: [[TRUNC83:%[0-9]+]]:_(s16) = G_TRUNC [[SHL84]](s32)
+    ; CHECK: [[OR84:%[0-9]+]]:_(s16) = G_OR [[OR83]], [[TRUNC83]]
+    ; CHECK: [[COPY411:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
     ; CHECK: [[COPY412:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL89:%[0-9]+]]:_(s32) = G_SHL [[COPY412]], [[C16]](s32)
-    ; CHECK: [[TRUNC90:%[0-9]+]]:_(s16) = G_TRUNC [[SHL89]](s32)
-    ; CHECK: [[OR89:%[0-9]+]]:_(s16) = G_OR [[OR88]], [[TRUNC90]]
-    ; CHECK: [[COPY413:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL90:%[0-9]+]]:_(s32) = G_SHL [[COPY413]], [[C17]](s32)
-    ; CHECK: [[TRUNC91:%[0-9]+]]:_(s16) = G_TRUNC [[SHL90]](s32)
-    ; CHECK: [[OR90:%[0-9]+]]:_(s16) = G_OR [[OR89]], [[TRUNC91]]
+    ; CHECK: [[SHL85:%[0-9]+]]:_(s32) = G_SHL [[COPY412]], [[COPY411]](s32)
+    ; CHECK: [[TRUNC84:%[0-9]+]]:_(s16) = G_TRUNC [[SHL85]](s32)
+    ; CHECK: [[OR85:%[0-9]+]]:_(s16) = G_OR [[C4]], [[TRUNC84]]
+    ; CHECK: [[COPY413:%[0-9]+]]:_(s32) = COPY [[C2]](s32)
     ; CHECK: [[COPY414:%[0-9]+]]:_(s32) = COPY [[C]](s32)
-    ; CHECK: [[SHL91:%[0-9]+]]:_(s32) = G_SHL [[COPY414]], [[C18]](s32)
-    ; CHECK: [[TRUNC92:%[0-9]+]]:_(s16) = G_TRUNC [[SHL91]](s32)
-    ; CHECK: [[OR91:%[0-9]+]]:_(s16) = G_OR [[OR90]], [[TRUNC92]]
-    ; CHECK: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR76]](s16)
-    ; CHECK: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR91]](s16)
-    ; CHECK: [[SHL92:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C5]](s32)
-    ; CHECK: [[OR92:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL92]]
-    ; CHECK: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR30]](s32), [[OR61]](s32), [[OR92]](s32)
-    ; CHECK: [[TRUNC93:%[0-9]+]]:_(s68) = G_TRUNC [[MV]](s96)
-    ; CHECK: S_NOP 0, implicit [[TRUNC93]](s68)
+    ; CHECK: [[SHL86:%[0-9]+]]:_(s32) = G_SHL [[COPY414]], [[COPY413]](s32)
+    ; CHECK: [[TRUNC85:%[0-9]+]]:_(s16) = G_TRUNC [[SHL86]](s32)
+    ; CHECK: [[OR86:%[0-9]+]]:_(s16) = G_OR [[OR85]], [[TRUNC85]]
+    ; CHECK: [[COPY415:%[0-9]+]]:_(s32) = COPY [[C3]](s32)
+    ; CHECK: [[COPY416:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK: [[SHL87:%[0-9]+]]:_(s32) = G_SHL [[COPY416]], [[COPY415]](s32)
+    ; CHECK: [[TRUNC86:%[0-9]+]]:_(s16) = G_TRUNC [[SHL87]](s32)
+    ; CHECK: [[OR87:%[0-9]+]]:_(s16) = G_OR [[OR86]], [[TRUNC86]]
+    ; CHECK: [[COPY417:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK: [[SHL88:%[0-9]+]]:_(s32) = G_SHL [[COPY417]], [[C8]](s32)
+    ; CHECK: [[TRUNC87:%[0-9]+]]:_(s16) = G_TRUNC [[SHL88]](s32)
+    ; CHECK: [[OR88:%[0-9]+]]:_(s16) = G_OR [[OR87]], [[TRUNC87]]
+    ; CHECK: [[COPY418:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK: [[SHL89:%[0-9]+]]:_(s32) = G_SHL [[COPY418]], [[C9]](s32)
+    ; CHECK: [[TRUNC88:%[0-9]+]]:_(s16) = G_TRUNC [[SHL89]](s32)
+    ; CHECK: [[OR89:%[0-9]+]]:_(s16) = G_OR [[OR88]], [[TRUNC88]]
+    ; CHECK: [[COPY419:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK: [[SHL90:%[0-9]+]]:_(s32) = G_SHL [[COPY419]], [[C10]](s32)
+    ; CHECK: [[TRUNC89:%[0-9]+]]:_(s16) = G_TRUNC [[SHL90]](s32)
+    ; CHECK: [[OR90:%[0-9]+]]:_(s16) = G_OR [[OR89]], [[TRUNC89]]
+    ; CHECK: [[COPY420:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK: [[SHL91:%[0-9]+]]:_(s32) = G_SHL [[COPY420]], [[C11]](s32)
+    ; CHECK: [[TRUNC90:%[0-9]+]]:_(s16) = G_TRUNC [[SHL91]](s32)
+    ; CHECK: [[OR91:%[0-9]+]]:_(s16) = G_OR [[OR90]], [[TRUNC90]]
+    ; CHECK: [[COPY421:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK: [[SHL92:%[0-9]+]]:_(s32) = G_SHL [[COPY421]], [[C12]](s32)
+    ; CHECK: [[TRUNC91:%[0-9]+]]:_(s16) = G_TRUNC [[SHL92]](s32)
+    ; CHECK: [[OR92:%[0-9]+]]:_(s16) = G_OR [[OR91]], [[TRUNC91]]
+    ; CHECK: [[COPY422:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK: [[SHL93:%[0-9]+]]:_(s32) = G_SHL [[COPY422]], [[C13]](s32)
+    ; CHECK: [[TRUNC92:%[0-9]+]]:_(s16) = G_TRUNC [[SHL93]](s32)
+    ; CHECK: [[OR93:%[0-9]+]]:_(s16) = G_OR [[OR92]], [[TRUNC92]]
+    ; CHECK: [[COPY423:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK: [[SHL94:%[0-9]+]]:_(s32) = G_SHL [[COPY423]], [[C14]](s32)
+    ; CHECK: [[TRUNC93:%[0-9]+]]:_(s16) = G_TRUNC [[SHL94]](s32)
+    ; CHECK: [[OR94:%[0-9]+]]:_(s16) = G_OR [[OR93]], [[TRUNC93]]
+    ; CHECK: [[COPY424:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK: [[SHL95:%[0-9]+]]:_(s32) = G_SHL [[COPY424]], [[C15]](s32)
+    ; CHECK: [[TRUNC94:%[0-9]+]]:_(s16) = G_TRUNC [[SHL95]](s32)
+    ; CHECK: [[OR95:%[0-9]+]]:_(s16) = G_OR [[OR94]], [[TRUNC94]]
+    ; CHECK: [[COPY425:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK: [[SHL96:%[0-9]+]]:_(s32) = G_SHL [[COPY425]], [[C16]](s32)
+    ; CHECK: [[TRUNC95:%[0-9]+]]:_(s16) = G_TRUNC [[SHL96]](s32)
+    ; CHECK: [[OR96:%[0-9]+]]:_(s16) = G_OR [[OR95]], [[TRUNC95]]
+    ; CHECK: [[COPY426:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK: [[SHL97:%[0-9]+]]:_(s32) = G_SHL [[COPY426]], [[C17]](s32)
+    ; CHECK: [[TRUNC96:%[0-9]+]]:_(s16) = G_TRUNC [[SHL97]](s32)
+    ; CHECK: [[OR97:%[0-9]+]]:_(s16) = G_OR [[OR96]], [[TRUNC96]]
+    ; CHECK: [[COPY427:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK: [[SHL98:%[0-9]+]]:_(s32) = G_SHL [[COPY427]], [[C18]](s32)
+    ; CHECK: [[TRUNC97:%[0-9]+]]:_(s16) = G_TRUNC [[SHL98]](s32)
+    ; CHECK: [[OR98:%[0-9]+]]:_(s16) = G_OR [[OR97]], [[TRUNC97]]
+    ; CHECK: [[COPY428:%[0-9]+]]:_(s32) = COPY [[C]](s32)
+    ; CHECK: [[SHL99:%[0-9]+]]:_(s32) = G_SHL [[COPY428]], [[C19]](s32)
+    ; CHECK: [[TRUNC98:%[0-9]+]]:_(s16) = G_TRUNC [[SHL99]](s32)
+    ; CHECK: [[OR99:%[0-9]+]]:_(s16) = G_OR [[OR98]], [[TRUNC98]]
+    ; CHECK: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[OR84]](s16)
+    ; CHECK: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[OR99]](s16)
+    ; CHECK: [[SHL100:%[0-9]+]]:_(s32) = G_SHL [[ZEXT5]], [[C5]](s32)
+    ; CHECK: [[OR100:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL100]]
+    ; CHECK: [[MV8:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[OR38]](s32), [[OR69]](s32), [[OR100]](s32)
+    ; CHECK: [[TRUNC99:%[0-9]+]]:_(s68) = G_TRUNC [[MV8]](s96)
+    ; CHECK: S_NOP 0, implicit [[TRUNC99]](s68)
     %0:_(s17) = G_CONSTANT i17 0
     %1:_(s17) = G_CONSTANT i17 1
     %2:_(s17) = G_CONSTANT i17 2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir
index aa52a681c156..6ebc76f723e8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-unmerge-values.mir
@@ -233,29 +233,32 @@ body: |
     ; CHECK-LABEL: name: test_unmerge_s8_s48
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
     ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32)
     ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s64)
+    ; CHECK: [[MV1:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[MV]](s64), [[DEF]](s64), [[DEF]](s64)
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV1]](s192)
+    ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s96)
     ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[TRUNC]], [[C]](s32)
+    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
     ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
     ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
     ; CHECK: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
-    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
     ; CHECK: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C2]]
     ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[AND]], [[COPY1]](s32)
     ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[C1]](s32)
     ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
     ; CHECK: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY4]], [[C2]]
     ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[AND1]], [[COPY3]](s32)
-    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32)
+    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
     ; CHECK: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY5]], [[C2]]
     ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[AND2]], [[C1]](s32)
-    ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
     ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
     ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
     ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
-    ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32)
+    ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
     ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
     ; CHECK: $vgpr0 = COPY [[COPY6]](s32)
     ; CHECK: $vgpr1 = COPY [[COPY7]](s32)
@@ -288,14 +291,17 @@ body: |
     ; CHECK-LABEL: name: test_unmerge_s16_s48
     ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
     ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32)
     ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
-    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[DEF]](s64)
+    ; CHECK: [[MV1:%[0-9]+]]:_(s192) = G_MERGE_VALUES [[MV]](s64), [[DEF]](s64), [[DEF]](s64)
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[MV1]](s192)
+    ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s96)
     ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
-    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV]], [[C]](s32)
-    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[TRUNC]], [[C]](s32)
-    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
+    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV2]], [[C]](s32)
+    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[UV2]](s32)
     ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
-    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32)
+    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[UV3]](s32)
     ; CHECK: $vgpr0 = COPY [[COPY1]](s32)
     ; CHECK: $vgpr1 = COPY [[COPY2]](s32)
     ; CHECK: $vgpr2 = COPY [[COPY3]](s32)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/zextload-xfail.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/zextload-xfail.ll
new file mode 100644
index 000000000000..dc5f9e4a29f2
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/zextload-xfail.ll
@@ -0,0 +1,9 @@
+; RUN: not --crash llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s
+; RUN: not --crash llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s
+; RUN: not --crash llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s
+
+define i96 @zextload_global_i32_to_i96(i32 addrspace(1)* %ptr) {
+  %load = load i32, i32 addrspace(1)* %ptr
+  %ext = zext i32 %load to i96
+  ret i96 %ext
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/zextload.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/zextload.ll
index f7fe19a436b7..df52912c7013 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/zextload.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/zextload.ll
@@ -134,44 +134,6 @@ define i64 @zextload_global_i32_to_i64(i32 addrspace(1)* %ptr) {
   ret i64 %ext
 }
 
-define i96 @zextload_global_i32_to_i96(i32 addrspace(1)* %ptr) {
-; GFX9-LABEL: zextload_global_i32_to_i96:
-; GFX9:       ; %bb.0:
-; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    global_load_dword v0, v[0:1], off
-; GFX9-NEXT:    s_mov_b32 s4, 0
-; GFX9-NEXT:    v_mov_b32_e32 v1, s4
-; GFX9-NEXT:    v_mov_b32_e32 v2, s4
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX8-LABEL: zextload_global_i32_to_i96:
-; GFX8:       ; %bb.0:
-; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    flat_load_dword v0, v[0:1]
-; GFX8-NEXT:    s_mov_b32 s4, 0
-; GFX8-NEXT:    v_mov_b32_e32 v1, s4
-; GFX8-NEXT:    v_mov_b32_e32 v2, s4
-; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; GFX8-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX6-LABEL: zextload_global_i32_to_i96:
-; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX6-NEXT:    s_mov_b32 s6, 0
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b64 s[4:5], 0
-; GFX6-NEXT:    buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
-; GFX6-NEXT:    s_mov_b32 s8, 0
-; GFX6-NEXT:    v_mov_b32_e32 v1, s8
-; GFX6-NEXT:    v_mov_b32_e32 v2, s8
-; GFX6-NEXT:    s_waitcnt vmcnt(0)
-; GFX6-NEXT:    s_setpc_b64 s[30:31]
-  %load = load i32, i32 addrspace(1)* %ptr
-  %ext = zext i32 %load to i96
-  ret i96 %ext
-}
-
 define i128 @zextload_global_i32_to_i128(i32 addrspace(1)* %ptr) {
 ; GFX9-LABEL: zextload_global_i32_to_i128:
 ; GFX9:       ; %bb.0:

From d18bb247492face84966f6c1c32e479e5e026f1d Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <johannes@jdoerfert.de>
Date: Wed, 15 Apr 2020 02:14:56 -0500
Subject: [PATCH 009/216] [Attributor][NFC] Do not create temporary maps during
 lookup

The AAMap.lookup() call created a temporary value if the key was not
present. Since the value was another map it was not free to create it.
Instead of a lookup we now use find and compare the result against the
end iterator explicitly. The result is the same but we never need to
create a temporary map.
---
 llvm/include/llvm/Transforms/IPO/Attributor.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index a4b0c6a605e7..2363a74d211b 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -1195,9 +1195,11 @@ struct Attributor {
 
     // Lookup the abstract attribute of type AAType. If found, return it after
     // registering a dependence of QueryingAA on the one returned attribute.
-    const auto &KindToAbstractAttributeMap = AAMap.lookup(IRP);
+    auto KindToAbstractAttributeMapIt = AAMap.find(IRP);
+    if ( KindToAbstractAttributeMapIt == AAMap.end())
+      return nullptr;
     if (AAType *AA = static_cast<AAType *>(
-            KindToAbstractAttributeMap.lookup(&AAType::ID))) {
+            KindToAbstractAttributeMapIt->second.lookup(&AAType::ID))) {
       // Do not register a dependence on an attribute with an invalid state.
       if (TrackDependence && AA->getState().isValidState())
         recordDependence(*AA, const_cast<AbstractAttribute &>(*QueryingAA),

From c4d3188adb5bc306b3e9f52ba261fa31f724ea5b Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <johannes@jdoerfert.de>
Date: Wed, 15 Apr 2020 11:59:50 -0500
Subject: [PATCH 010/216] [Attributor][NFC] Reduce indention for call site
 attribute seeding

Also added a TODO to remind us that indirect calls could be optimized as
well.
---
 llvm/lib/Transforms/IPO/Attributor.cpp | 72 ++++++++++++++------------
 1 file changed, 38 insertions(+), 34 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index e8e8aed8d31b..365f9bfd6542 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -1807,54 +1807,58 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
     // users. The return value might be dead if there are no live users.
     getOrCreateAAFor<AAIsDead>(CSRetPos);
 
-    if (Function *Callee = CS.getCalledFunction()) {
-      // Skip declerations except if annotations on their call sites were
-      // explicitly requested.
-      if (!AnnotateDeclarationCallSites && Callee->isDeclaration() &&
-          !Callee->hasMetadata(LLVMContext::MD_callback))
-        return true;
+    Function *Callee = CS.getCalledFunction();
+    // TODO: Even if the callee is not known now we might be able to simplify
+    //       the call/callee.
+    if (!Callee)
+      return true;
 
-      if (!Callee->getReturnType()->isVoidTy() && !CS->use_empty()) {
+    // Skip declarations except if annotations on their call sites were
+    // explicitly requested.
+    if (!AnnotateDeclarationCallSites && Callee->isDeclaration() &&
+        !Callee->hasMetadata(LLVMContext::MD_callback))
+      return true;
 
-        IRPosition CSRetPos = IRPosition::callsite_returned(CS);
+    if (!Callee->getReturnType()->isVoidTy() && !CS->use_empty()) {
 
-        // Call site return integer values might be limited by a constant range.
-        if (Callee->getReturnType()->isIntegerTy())
-          getOrCreateAAFor<AAValueConstantRange>(CSRetPos);
-      }
+      IRPosition CSRetPos = IRPosition::callsite_returned(CS);
+
+      // Call site return integer values might be limited by a constant range.
+      if (Callee->getReturnType()->isIntegerTy())
+        getOrCreateAAFor<AAValueConstantRange>(CSRetPos);
+    }
 
-      for (int i = 0, e = CS.getNumArgOperands(); i < e; i++) {
+    for (int i = 0, e = CS.getNumArgOperands(); i < e; i++) {
 
-        IRPosition CSArgPos = IRPosition::callsite_argument(CS, i);
+      IRPosition CSArgPos = IRPosition::callsite_argument(CS, i);
 
-        // Every call site argument might be dead.
-        getOrCreateAAFor<AAIsDead>(CSArgPos);
+      // Every call site argument might be dead.
+      getOrCreateAAFor<AAIsDead>(CSArgPos);
 
-        // Call site argument might be simplified.
-        getOrCreateAAFor<AAValueSimplify>(CSArgPos);
+      // Call site argument might be simplified.
+      getOrCreateAAFor<AAValueSimplify>(CSArgPos);
 
-        if (!CS.getArgument(i)->getType()->isPointerTy())
-          continue;
+      if (!CS.getArgument(i)->getType()->isPointerTy())
+        continue;
 
-        // Call site argument attribute "non-null".
-        getOrCreateAAFor<AANonNull>(CSArgPos);
+      // Call site argument attribute "non-null".
+      getOrCreateAAFor<AANonNull>(CSArgPos);
 
-        // Call site argument attribute "no-alias".
-        getOrCreateAAFor<AANoAlias>(CSArgPos);
+      // Call site argument attribute "no-alias".
+      getOrCreateAAFor<AANoAlias>(CSArgPos);
 
-        // Call site argument attribute "dereferenceable".
-        getOrCreateAAFor<AADereferenceable>(CSArgPos);
+      // Call site argument attribute "dereferenceable".
+      getOrCreateAAFor<AADereferenceable>(CSArgPos);
 
-        // Call site argument attribute "align".
-        getOrCreateAAFor<AAAlign>(CSArgPos);
+      // Call site argument attribute "align".
+      getOrCreateAAFor<AAAlign>(CSArgPos);
 
-        // Call site argument attribute
-        // "readnone/readonly/writeonly/..."
-        getOrCreateAAFor<AAMemoryBehavior>(CSArgPos);
+      // Call site argument attribute
+      // "readnone/readonly/writeonly/..."
+      getOrCreateAAFor<AAMemoryBehavior>(CSArgPos);
 
-        // Call site argument attribute "nofree".
-        getOrCreateAAFor<AANoFree>(CSArgPos);
-      }
+      // Call site argument attribute "nofree".
+      getOrCreateAAFor<AANoFree>(CSArgPos);
     }
     return true;
   };

From c2f628e46c32dce91be0d23f9b603a11a74173f6 Mon Sep 17 00:00:00 2001
From: Pierre Oechsel <pierre.oechsel@gmail.com>
Date: Thu, 16 Apr 2020 10:24:48 +0200
Subject: [PATCH 011/216] [MLIR] [EDSC] Add folded_xxxx handles for common std
 instructions.

Differential Revision: https://reviews.llvm.org/D77729
---
 .../Dialect/StandardOps/EDSC/Intrinsics.h     | 22 +++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/mlir/include/mlir/Dialect/StandardOps/EDSC/Intrinsics.h b/mlir/include/mlir/Dialect/StandardOps/EDSC/Intrinsics.h
index 9df53d356a86..50dc0a0050f0 100644
--- a/mlir/include/mlir/Dialect/StandardOps/EDSC/Intrinsics.h
+++ b/mlir/include/mlir/Dialect/StandardOps/EDSC/Intrinsics.h
@@ -142,6 +142,28 @@ using folded_std_constant_int = folded::ValueBuilder<ConstantIntOp>;
 using folded_std_constant = folded::ValueBuilder<ConstantOp>;
 using folded_std_dim = folded::ValueBuilder<DimOp>;
 using folded_std_muli = folded::ValueBuilder<MulIOp>;
+using folded_std_addi = folded::ValueBuilder<AddIOp>;
+using folded_std_addf = folded::ValueBuilder<AddFOp>;
+using folded_std_alloc = folded::ValueBuilder<AllocOp>;
+using folded_std_constant = folded::ValueBuilder<ConstantOp>;
+using folded_std_constant_float = folded::ValueBuilder<ConstantFloatOp>;
+using folded_std_constant_index = folded::ValueBuilder<ConstantIndexOp>;
+using folded_std_constant_int = folded::ValueBuilder<ConstantIntOp>;
+using folded_std_dim = folded::ValueBuilder<DimOp>;
+using folded_std_extract_element = folded::ValueBuilder<ExtractElementOp>;
+using folded_std_index_cast = folded::ValueBuilder<IndexCastOp>;
+using folded_std_muli = folded::ValueBuilder<MulIOp>;
+using folded_std_mulf = folded::ValueBuilder<MulFOp>;
+using folded_std_memref_cast = folded::ValueBuilder<MemRefCastOp>;
+using folded_std_select = folded::ValueBuilder<SelectOp>;
+using folded_std_load = folded::ValueBuilder<LoadOp>;
+using folded_std_subi = folded::ValueBuilder<SubIOp>;
+using folded_std_sub_view = folded::ValueBuilder<SubViewOp>;
+using folded_std_tanh = folded::ValueBuilder<TanhOp>;
+using folded_std_tensor_load = folded::ValueBuilder<TensorLoadOp>;
+using folded_std_view = folded::ValueBuilder<ViewOp>;
+using folded_std_zero_extendi = folded::ValueBuilder<ZeroExtendIOp>;
+using folded_std_sign_extendi = folded::ValueBuilder<SignExtendIOp>;
 } // namespace intrinsics
 } // namespace edsc
 } // namespace mlir

From 997f33cfeec9cd8c5bc913cf862794e986a4bd39 Mon Sep 17 00:00:00 2001
From: Ulysse Beaugnon <ulysse@google.com>
Date: Thu, 16 Apr 2020 10:28:02 +0200
Subject: [PATCH 012/216] [MLIR] Add IndexAttr to primitive attributes kinds in
 tablegen.

Summary:
OpBase.td defined attributes kind for all integer types expect index. This
commit fixes that by adding an IndexAttr attribute kind.

Differential Revision: https://reviews.llvm.org/D78195
---
 mlir/include/mlir/IR/OpBase.td        | 10 ++++++++++
 mlir/test/IR/attribute.mlir           |  2 ++
 mlir/test/lib/Dialect/Test/TestOps.td |  1 +
 3 files changed, 13 insertions(+)

diff --git a/mlir/include/mlir/IR/OpBase.td b/mlir/include/mlir/IR/OpBase.td
index e552279e8a8e..14ef45ed4c7f 100644
--- a/mlir/include/mlir/IR/OpBase.td
+++ b/mlir/include/mlir/IR/OpBase.td
@@ -845,6 +845,16 @@ def BoolAttr : Attr<CPred<"$_self.isa<BoolAttr>()">, "bool attribute"> {
   let constBuilderCall = "$_builder.getBoolAttr($0)";
 }
 
+// Index attribute.
+def IndexAttr :
+    TypedAttrBase<
+      Index, "IntegerAttr",
+      And<[CPred<"$_self.isa<IntegerAttr>()">,
+           CPred<"$_self.cast<IntegerAttr>().getType().isa<IndexType>()">]>,
+      "index attribute"> {
+  let returnType = [{ APInt }];
+}
+
 // Base class for any integer (regardless of signedness semantics) attributes
 // of fixed width.
 class AnyIntegerAttrBase<AnyI attrValType, string descr> :
diff --git a/mlir/test/IR/attribute.mlir b/mlir/test/IR/attribute.mlir
index 31804b274a55..32b8f8c25180 100644
--- a/mlir/test/IR/attribute.mlir
+++ b/mlir/test/IR/attribute.mlir
@@ -8,6 +8,8 @@ func @int_attrs_pass() {
   "test.int_attrs"() {
     // CHECK: any_i32_attr = 5 : ui32
     any_i32_attr = 5 : ui32,
+    // CHECK-SAME: index_attr = 8 : index
+    index_attr = 8 : index,
     // CHECK-SAME: si32_attr = 7 : si32
     si32_attr = 7 : si32,
     // CHECK-SAME: ui32_attr = 6 : ui32
diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td
index 6f1ef4a50f67..524780b89552 100644
--- a/mlir/test/lib/Dialect/Test/TestOps.td
+++ b/mlir/test/lib/Dialect/Test/TestOps.td
@@ -199,6 +199,7 @@ def I64EnumAttrOp : TEST_Op<"i64_enum_attr"> {
 def IntAttrOp : TEST_Op<"int_attrs"> {
   let arguments = (ins
     AnyI32Attr:$any_i32_attr,
+    IndexAttr:$index_attr,
     UI32Attr:$ui32_attr,
     SI32Attr:$si32_attr
   );

From a60fdd2ba487ea65f041930df4b7b0596bf28977 Mon Sep 17 00:00:00 2001
From: Lorenzo Chelini <l.chelini@icloud.com>
Date: Thu, 16 Apr 2020 10:30:11 +0200
Subject: [PATCH 013/216] [MLIR] NFC after commit D77478.

Remove leftovers 'applyPatternsGreedily' from the codebase.

Differential Revision: https://reviews.llvm.org/D78274
---
 mlir/lib/Transforms/Inliner.cpp                          | 3 ++-
 mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/mlir/lib/Transforms/Inliner.cpp b/mlir/lib/Transforms/Inliner.cpp
index 9826f5ca187f..96a098819972 100644
--- a/mlir/lib/Transforms/Inliner.cpp
+++ b/mlir/lib/Transforms/Inliner.cpp
@@ -518,7 +518,8 @@ static void canonicalizeSCC(CallGraph &cg, CGUseList &useList,
 
     // We also won't apply canonicalizations for nodes that are not
     // isolated. This avoids potentially mutating the regions of nodes defined
-    // above, this is also a stipulation of the 'applyPatternsGreedily' driver.
+    // above, this is also a stipulation of the 'applyPatternsAndFoldGreedily'
+    // driver.
     auto *region = node->getCallableRegion();
     if (!region->getParentOp()->isKnownIsolatedFromAbove())
       continue;
diff --git a/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp b/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp
index 256c1340d0c6..2ebf1d6a47d7 100644
--- a/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp
+++ b/mlir/lib/Transforms/Utils/GreedyPatternRewriteDriver.cpp
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements mlir::applyPatternsGreedily.
+// This file implements mlir::applyPatternsAndFoldGreedily.
 //
 //===----------------------------------------------------------------------===//
 

From be9c3bdc44baddfd1ed0efeb4db249198a21b20d Mon Sep 17 00:00:00 2001
From: Alexander Belyaev <pifon@google.com>
Date: Thu, 16 Apr 2020 10:39:42 +0200
Subject: [PATCH 014/216] [MLIR] Fix fusion of linalg.indexed_generic producer
 into tiled (Indexed)GenericOp.

Differential Revision: https://reviews.llvm.org/D78209
---
 mlir/include/mlir/Transforms/LoopUtils.h      |   5 +
 mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp |  26 ++-
 mlir/lib/Transforms/Utils/LoopUtils.cpp       |  20 +-
 mlir/test/Dialect/Linalg/fusion.mlir          | 105 ----------
 .../Linalg/fusion_indexed_generic.mlir        | 186 ++++++++++++++++++
 5 files changed, 221 insertions(+), 121 deletions(-)
 create mode 100644 mlir/test/Dialect/Linalg/fusion_indexed_generic.mlir

diff --git a/mlir/include/mlir/Transforms/LoopUtils.h b/mlir/include/mlir/Transforms/LoopUtils.h
index 3bc34f1444e0..1d0e8d39bd61 100644
--- a/mlir/include/mlir/Transforms/LoopUtils.h
+++ b/mlir/include/mlir/Transforms/LoopUtils.h
@@ -287,6 +287,11 @@ LogicalResult
 separateFullTiles(MutableArrayRef<AffineForOp> nest,
                   SmallVectorImpl<AffineForOp> *fullTileNest = nullptr);
 
+/// Replaces all uses of `orig` with `replacement` except if the user is listed
+/// in `exceptions`.
+void replaceAllUsesExcept(Value orig, Value replacement,
+                          const SmallPtrSetImpl<Operation *> &exceptions);
+
 } // end namespace mlir
 
 #endif // MLIR_TRANSFORMS_LOOP_UTILS_H
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp
index 5c3763523cdd..96cbdab5ac47 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp
@@ -24,6 +24,7 @@
 #include "mlir/IR/PatternMatch.h"
 #include "mlir/Support/LLVM.h"
 #include "mlir/Transforms/FoldUtils.h"
+#include "mlir/Transforms/LoopUtils.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -97,7 +98,26 @@ static LinalgOp cloneWithLoopRanges(OpBuilder &b, Location loc, LinalgOp op,
   }
   auto operands = getAssumedNonViewOperands(op);
   clonedViews.append(operands.begin(), operands.end());
-  return op.clone(b, loc, clonedViews);
+
+  Operation *clonedOp = op.clone(b, loc, clonedViews);
+  // When the producer is an IndexedGenercOp, we have to transform its block
+  // IV arguments according to the tiling of the consumer, i.e. offset them by
+  // the values computed in `loopRanges`.
+  if (auto indexedGenericOp = dyn_cast<IndexedGenericOp>(clonedOp)) {
+    auto &block = indexedGenericOp.region().front();
+
+    OpBuilder::InsertionGuard g(b);
+    b.setInsertionPointToStart(&block);
+    for (unsigned i = 0, e = indexedGenericOp.getNumLoops(); i < e; ++i) {
+      Value oldIndex = block.getArgument(i);
+      Value newIndex = b.create<AddIOp>(indexedGenericOp.getLoc(), oldIndex,
+                                        loopRanges[i].offset);
+      replaceAllUsesExcept(
+          oldIndex, newIndex,
+          SmallPtrSet<Operation *, 1>{newIndex.getDefiningOp()});
+    }
+  }
+  return clonedOp;
 }
 
 struct ViewDimension {
@@ -284,10 +304,6 @@ fuseProducerOfDep(OpBuilder &b, LinalgOp consumer, unsigned consumerIdx,
     LLVM_DEBUG(dbgs() << "\n***Consider producer:\t"
                       << *dependence.dependentOpView.op << "\n");
     auto producer = cast<LinalgOp>(dependence.dependentOpView.op);
-    if (isa<linalg::IndexedGenericOp>(dependence.dependentOpView.op)) {
-      LLVM_DEBUG(dbgs() << "Not fusing indexed_generic producer");
-      continue;
-    }
 
     // Check that the dependence is indeed on the input `consumerIdx` view.
     auto consumedView = dependence.indexingView;
diff --git a/mlir/lib/Transforms/Utils/LoopUtils.cpp b/mlir/lib/Transforms/Utils/LoopUtils.cpp
index 3aebc83678f7..c03cf6cfc282 100644
--- a/mlir/lib/Transforms/Utils/LoopUtils.cpp
+++ b/mlir/lib/Transforms/Utils/LoopUtils.cpp
@@ -1158,17 +1158,6 @@ TileLoops mlir::extractFixedOuterLoops(loop::ForOp rootForOp,
   return tileLoops;
 }
 
-// Replaces all uses of `orig` with `replacement` except if the user is listed
-// in `exceptions`.
-static void
-replaceAllUsesExcept(Value orig, Value replacement,
-                     const SmallPtrSetImpl<Operation *> &exceptions) {
-  for (auto &use : llvm::make_early_inc_range(orig.getUses())) {
-    if (exceptions.count(use.getOwner()) == 0)
-      use.set(replacement);
-  }
-}
-
 /// Return the new lower bound, upper bound, and step in that order. Insert any
 /// additional bounds calculations before the given builder and any additional
 /// conversion back to the original loop induction value inside the given Block.
@@ -2382,3 +2371,12 @@ mlir::separateFullTiles(MutableArrayRef<AffineForOp> inputNest,
 
   return success();
 }
+
+void mlir::replaceAllUsesExcept(
+    Value orig, Value replacement,
+    const SmallPtrSetImpl<Operation *> &exceptions) {
+  for (auto &use : llvm::make_early_inc_range(orig.getUses())) {
+    if (exceptions.count(use.getOwner()) == 0)
+      use.set(replacement);
+  }
+}
diff --git a/mlir/test/Dialect/Linalg/fusion.mlir b/mlir/test/Dialect/Linalg/fusion.mlir
index 82ef196d0d97..14a12840d1d0 100644
--- a/mlir/test/Dialect/Linalg/fusion.mlir
+++ b/mlir/test/Dialect/Linalg/fusion.mlir
@@ -604,111 +604,6 @@ func @pointwise_no_view(%M: index, %N: index) {
 // CHECK:      linalg.generic
 // CHECK:        mulf
 
-// -----
-
-#map5 = affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>
-#map6 = affine_map<(d0, d1) -> (d0, d1)>
-#id_2d = affine_map<(i, j) -> (i, j)>
-#pointwise_2d_trait = {
-  args_in = 2,
-  args_out = 1,
-  indexing_maps = [#id_2d, #id_2d, #id_2d],
-  iterator_types = ["parallel", "parallel"]
-}
-func @indexed_generic_test(%A: memref<?x?xf32>,
-                           %B: memref<?x?xf32>,
-                           %C: memref<?x?xf32>,
-                           %D: memref<?x?xf32>) {
-  linalg.generic #pointwise_2d_trait %A, %B, %C {
-  ^bb0(%e: f32, %arg5: f32, %arg6: f32):   // no predecessors
-    %2 = addf %e, %arg5 : f32
-    linalg.yield %2 : f32
-  }: memref<?x?xf32>, memref<?x?xf32>, memref<?x?xf32>
-  %c1 = constant 1 : index
-  %c0 = constant 0 : index
-  %c25 = constant 25 : index
-  %c10 = constant 10 : index
-  %0 = dim %C, 0 : memref<?x?xf32>
-  %1 = dim %C, 1 : memref<?x?xf32>
-  %2 = dim %D, 0 : memref<?x?xf32>
-  %3 = dim %D, 1 : memref<?x?xf32>
-  loop.for %arg2 = %c0 to %0 step %c10 {
-    loop.for %arg3 = %c0 to %1 step %c25 {
-      %4 = std.subview %C[%arg2, %arg3][%c10, %c25][%c1, %c1] :
-          memref<?x?xf32> to memref<?x?xf32, #map5>
-      %5 = std.subview %D[%arg2, %arg3][%c10, %c25][%c1, %c1] :
-          memref<?x?xf32> to memref<?x?xf32, #map5>
-      linalg.indexed_generic {
-        indexing_maps = [#map6, #map6],
-        iterator_types = ["parallel", "parallel"],
-        args_in = 1,
-        args_out = 1
-      } %4, %5 {
-      ^bb0(%arg4: index, %arg5: index, %arg6: f32, %arg7: f32):
-        %6 = addi %arg4, %arg2 : index
-        %7 = addi %arg5, %arg3 : index
-        %8 = index_cast %6 : index to i32
-        %9 = sitofp %8 : i32 to f32
-        %10 = index_cast %7 : index to i32
-        %11 = sitofp %10 : i32 to f32
-        %12 = addf %9, %11 : f32
-        linalg.yield %12 : f32
-      }: memref<?x?xf32, #map5>, memref<?x?xf32, #map5>
-    }
-  }
-  return
-}
-// CHECK-LABEL: func @indexed_generic_test
-// CHECK:  loop.for
-// CHECK:    loop.for
-// CHECK-NOT:  loop.for
-// CHECK:      linalg.generic
-// CHECK:        addf
-// CHECK:      linalg.indexed_generic
-// CHECK:        index_cast
-
-// -----
-
-//
-// We should not be fusing indexed_generic into a generic yet.
-// https://bugs.llvm.org/show_bug.cgi?id=44875
-//
-
-#map0 = affine_map<(d0)[s0,s1] -> (d0 * s1 + s0)>
-#pointwise_map = affine_map<(d0) -> (d0)>
-#pointwise_1d_trait = {
-  args_in = 1,
-  args_out = 1,
-  indexing_maps = [#pointwise_map, #pointwise_map],
-  iterator_types = ["parallel"]
-}
-
-func @nofuse_indexed_generic(%A: memref<?xf32>, %B: memref<?xf32>, %C: memref<?xf32>) {
-  linalg.indexed_generic #pointwise_1d_trait %A, %B {
-  ^bb0(%i: index, %a: f32, %b: f32):
-    linalg.yield %a : f32
-  }: memref<?xf32>, memref<?xf32>
-
-  %c0 = constant 0 : index
-  %c1 = constant 1 : index
-  %c10 = constant 10 : index
-  %dB = dim %B, 0 : memref<?xf32>
-  loop.for %i = %c0 to %dB step %c10 {
-    %subB = subview %B[%i][%c10][%c1] : memref<?xf32> to memref<?xf32, #map0>
-    %subC = subview %C[%i][%c10][%c1] : memref<?xf32> to memref<?xf32, #map0>
-    linalg.generic #pointwise_1d_trait %subB, %subC {
-    ^bb0(%b: f32, %c: f32):
-      linalg.yield %b : f32
-    }: memref<?xf32, #map0>, memref<?xf32, #map0>
-  }
-  return
-}
-// CHECK-LABEL: func @nofuse_indexed_generic
-// CHECK-NOT: loop.for
-// CHECK:     linalg.indexed_generic
-// CHECK:     loop.for
-// CHECK-NOT:   linalg.indexed_generic
-// CHECK:       linalg.generic
 
 // -----
 
diff --git a/mlir/test/Dialect/Linalg/fusion_indexed_generic.mlir b/mlir/test/Dialect/Linalg/fusion_indexed_generic.mlir
new file mode 100644
index 000000000000..eaef27b2f3de
--- /dev/null
+++ b/mlir/test/Dialect/Linalg/fusion_indexed_generic.mlir
@@ -0,0 +1,186 @@
+// RUN: mlir-opt %s -linalg-fusion -split-input-file | FileCheck %s --dump-input-on-failure
+
+#map = affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>
+#id_2d = affine_map<(d0, d1) -> (d0, d1)>
+#pointwise_2d_trait = {
+  args_in = 2,
+  args_out = 1,
+  indexing_maps = [#id_2d, #id_2d, #id_2d],
+  iterator_types = ["parallel", "parallel"]
+}
+func @fuse_indexed_generic_consumer(%A: memref<?x?xf32>,
+                                    %B: memref<?x?xf32>,
+                                    %C: memref<?x?xf32>,
+                                    %D: memref<?x?xf32>) {
+  linalg.generic #pointwise_2d_trait %A, %B, %C {
+  ^bb0(%e: f32, %arg5: f32, %arg6: f32):   // no predecessors
+    %2 = addf %e, %arg5 : f32
+    linalg.yield %2 : f32
+  }: memref<?x?xf32>, memref<?x?xf32>, memref<?x?xf32>
+  %c1 = constant 1 : index
+  %c0 = constant 0 : index
+  %c25 = constant 25 : index
+  %c10 = constant 10 : index
+  %0 = dim %C, 0 : memref<?x?xf32>
+  %1 = dim %C, 1 : memref<?x?xf32>
+  %2 = dim %D, 0 : memref<?x?xf32>
+  %3 = dim %D, 1 : memref<?x?xf32>
+  loop.for %arg2 = %c0 to %0 step %c10 {
+    loop.for %arg3 = %c0 to %1 step %c25 {
+      %4 = std.subview %C[%arg2, %arg3][%c10, %c25][%c1, %c1] :
+          memref<?x?xf32> to memref<?x?xf32, #map>
+      %5 = std.subview %D[%arg2, %arg3][%c10, %c25][%c1, %c1] :
+          memref<?x?xf32> to memref<?x?xf32, #map>
+      linalg.indexed_generic {
+        indexing_maps = [#id_2d, #id_2d],
+        iterator_types = ["parallel", "parallel"],
+        args_in = 1,
+        args_out = 1
+      } %4, %5 {
+      ^bb0(%arg4: index, %arg5: index, %arg6: f32, %arg7: f32):
+        %6 = addi %arg4, %arg2 : index
+        %7 = addi %arg5, %arg3 : index
+        %8 = index_cast %6 : index to i32
+        %9 = sitofp %8 : i32 to f32
+        %10 = index_cast %7 : index to i32
+        %11 = sitofp %10 : i32 to f32
+        %12 = addf %9, %11 : f32
+        linalg.yield %12 : f32
+      }: memref<?x?xf32, #map>, memref<?x?xf32, #map>
+    }
+  }
+  return
+}
+// CHECK-LABEL: func @fuse_indexed_generic_consumer
+// CHECK:  loop.for
+// CHECK:    loop.for
+// CHECK-NOT:  loop.for
+// CHECK:      linalg.generic
+// CHECK-NOT:    addi
+// CHECK:        addf
+// CHECK:      linalg.indexed_generic
+// CHECK:        index_cast
+
+// -----
+
+#map = affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>
+#id_2d = affine_map<(d0, d1) -> (d0, d1)>
+#pointwise_2d_trait = {
+  args_in = 2,
+  args_out = 1,
+  indexing_maps = [#id_2d, #id_2d, #id_2d],
+  iterator_types = ["parallel", "parallel"]
+}
+func @fuse_indexed_generic_producer(%A: memref<?x?xf32>,
+                                    %B: memref<?x?xf32>,
+                                    %C: memref<?x?xf32>,
+                                    %D: memref<?x?xf32>) {
+  %c1 = constant 1 : index
+  %c0 = constant 0 : index
+  %c25 = constant 25 : index
+  %c10 = constant 10 : index
+  linalg.indexed_generic #pointwise_2d_trait %A, %B, %C {
+    ^bb0(%i: index, %j: index, %a: f32, %b: f32, %c: f32): // no predecessors
+      %i_int = index_cast %i: index to i32
+      %i_float = sitofp %i_int : i32 to f32
+      %ab = addf %a, %b : f32
+      %out = addf %ab, %i_float : f32
+      linalg.yield %out : f32
+  }: memref<?x?xf32>, memref<?x?xf32>, memref<?x?xf32>
+  %C_X = dim %C, 0 : memref<?x?xf32>
+  %C_Y = dim %C, 1 : memref<?x?xf32>
+  %D_X = dim %D, 0 : memref<?x?xf32>
+  %D_Y = dim %D, 1 : memref<?x?xf32>
+  loop.parallel (%arg2, %arg3) = (%c0, %c0) to (%C_X, %C_Y) step (%c10, %c25) {
+    %C_view = std.subview %C[%arg2, %arg3][%c10, %c25][%c1, %c1] :
+        memref<?x?xf32> to memref<?x?xf32, #map>
+    %D_view = std.subview %D[%arg2, %arg3][%c10, %c25][%c1, %c1] :
+        memref<?x?xf32> to memref<?x?xf32, #map>
+    linalg.generic {
+      indexing_maps = [#id_2d, #id_2d],
+      iterator_types = ["parallel", "parallel"],
+      args_in = 1,
+      args_out = 1
+    } %C_view, %D_view {
+    ^bb0( %a: f32, %b: f32):
+      %ab = addf %a, %b : f32
+      linalg.yield %ab : f32
+    }: memref<?x?xf32, #map>, memref<?x?xf32, #map>
+  }
+  return
+}
+// CHECK-LABEL: func @fuse_indexed_generic_producer
+// CHECK:  loop.parallel ([[I:%.*]], [[J:%.*]]) =
+// CHECK-NOT:  loop.parallel
+// CHECK:      linalg.indexed_generic
+// CHECK:        ^bb0([[i:%.*]]: index, [[j:%.*]]: index
+// CHECK:          [[i_new:%.*]] = addi [[i]], [[I]] : index
+// CHECK:          [[j_new:%.*]] = addi [[j]], [[J]] : index
+// CHECK:          {{.*}} = index_cast [[i_new]] : index to i32
+// CHECK:      linalg.generic
+// CHECK:          addf
+
+// -----
+
+#map = affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>
+#id_2d = affine_map<(d0, d1) -> (d0, d1)>
+#pointwise_2d_trait = {
+  args_in = 2,
+  args_out = 1,
+  indexing_maps = [#id_2d, #id_2d, #id_2d],
+  iterator_types = ["parallel", "parallel"]
+}
+func @fuse_indexed_generic_producer_tile_second_dim_only(%A: memref<?x?xf32>,
+                                                         %B: memref<?x?xf32>,
+                                                         %C: memref<?x?xf32>,
+                                                         %D: memref<?x?xf32>) {
+  %c1 = constant 1 : index
+  %c3 = constant 3 : index
+  %c0 = constant 0 : index
+  linalg.indexed_generic #pointwise_2d_trait %A, %B, %C {
+    ^bb0(%i: index, %j: index, %a: f32, %b: f32, %c: f32): // no predecessors
+      %j_int = index_cast %j: index to i32
+      %j_float = sitofp %j_int : i32 to f32
+      %ab = addf %a, %b : f32
+      %out = addf %ab, %j_float : f32
+      linalg.yield %out : f32
+  }: memref<?x?xf32>, memref<?x?xf32>, memref<?x?xf32>
+  %C_X = dim %C, 0 : memref<?x?xf32>
+  %C_Y = dim %C, 1 : memref<?x?xf32>
+  %D_X = dim %D, 0 : memref<?x?xf32>
+  %D_Y = dim %D, 1 : memref<?x?xf32>
+  %3 = linalg.range %c0 : %C_Y : %c3 : !linalg.range
+  loop.parallel (%j) = (%c0) to (%C_Y) step (%c3) {
+    %0 = affine.min affine_map<(d0, d1, d2) -> (d0, d1 - d2)>(%c3, %C_Y, %j)
+    %C_view = subview %C[%c0, %j] [%C_X, %0] [%c1, %c1] :
+      memref<?x?xf32> to memref<?x?xf32, #map>
+
+    %1 = affine.min affine_map<(d0, d1, d2) -> (d0, d1 - d2)>(%c3, %D_Y, %j)
+    %D_view = subview %D[%c0, %j] [%D_X, %1] [%c1, %c1] :
+      memref<?x?xf32> to memref<?x?xf32, #map>
+
+    linalg.generic {
+      indexing_maps = [#id_2d, #id_2d],
+      iterator_types = ["parallel", "parallel"],
+      args_in = 1,
+      args_out = 1
+    } %C_view, %D_view {
+    ^bb0( %a: f32, %b: f32):
+      %ab = addf %a, %b : f32
+      linalg.yield %ab : f32
+    }: memref<?x?xf32, #map>, memref<?x?xf32, #map>
+    loop.yield
+  }
+  return
+}
+// CHECK-LABEL: func @fuse_indexed_generic_producer_tile_second_dim_only
+// CHECK:  [[C0:%.*]] = constant 0 : index
+// CHECK:  loop.parallel ([[J:%.*]]) =
+// CHECK-NOT:  loop.parallel
+// CHECK:      linalg.indexed_generic
+// CHECK:        ^bb0([[i:%.*]]: index, [[j:%.*]]: index
+// CHECK:          [[i_new:%.*]] = addi [[i]], [[C0]] : index
+// CHECK:          [[j_new:%.*]] = addi [[j]], [[J]] : index
+// CHECK:          {{.*}} = index_cast [[j_new]] : index to i32
+// CHECK:      linalg.generic
+// CHECK:          addf

From 7b9c6c16c33deb52e7081f94ad51e3910ca592c9 Mon Sep 17 00:00:00 2001
From: Stephan Dollberg <stephan.dollberg@gmail.com>
Date: Thu, 16 Apr 2020 11:17:10 +0200
Subject: [PATCH 015/216] Also look for devtoolset-9 gcc toolchain

devtoolset-9 has been out for a while so also look for it.

Differential Revision: https://reviews.llvm.org/D77420
---
 clang/lib/Driver/ToolChains/Gnu.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp
index d20d62987589..0ea33fc20e86 100644
--- a/clang/lib/Driver/ToolChains/Gnu.cpp
+++ b/clang/lib/Driver/ToolChains/Gnu.cpp
@@ -1977,6 +1977,7 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes(
   // Non-Solaris is much simpler - most systems just go with "/usr".
   if (SysRoot.empty() && TargetTriple.getOS() == llvm::Triple::Linux) {
     // Yet, still look for RHEL devtoolsets.
+    Prefixes.push_back("/opt/rh/devtoolset-9/root/usr");
     Prefixes.push_back("/opt/rh/devtoolset-8/root/usr");
     Prefixes.push_back("/opt/rh/devtoolset-7/root/usr");
     Prefixes.push_back("/opt/rh/devtoolset-6/root/usr");

From c8d6fa5134ae66f3fb8e0b8caac5de4f737c8bef Mon Sep 17 00:00:00 2001
From: Jeremy Morse <jeremy.morse@sony.com>
Date: Thu, 16 Apr 2020 10:24:47 +0100
Subject: [PATCH 016/216] [LiveDebugValues] Terminate open ranges on DBG_VALUE
 $noreg

In D68209, LiveDebugValues::transferDebugValue had a call to
OpenRanges.erase shifted, and by accident this led to a code path where
DBG_VALUEs of $noreg would not have their open range terminated, allowing
variable locations to extend past blocks where they were terminated.

This patch correctly terminates the open range, if present, when such a
DBG_VAUE is encountered, and adds a test for this behaviour.

Differential Revision: https://reviews.llvm.org/D78218
---
 llvm/lib/CodeGen/LiveDebugValues.cpp          |  4 +-
 .../X86/livedebugvalues_loop_terminated.mir   | 67 +++++++++++++++++++
 2 files changed, 70 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_terminated.mir

diff --git a/llvm/lib/CodeGen/LiveDebugValues.cpp b/llvm/lib/CodeGen/LiveDebugValues.cpp
index 530da523e554..fdea70237ef0 100644
--- a/llvm/lib/CodeGen/LiveDebugValues.cpp
+++ b/llvm/lib/CodeGen/LiveDebugValues.cpp
@@ -968,9 +968,11 @@ void LiveDebugValues::transferDebugValue(const MachineInstr &MI,
   } else if (MI.hasOneMemOperand()) {
     llvm_unreachable("DBG_VALUE with mem operand encountered after regalloc?");
   } else {
-    // This must be an undefined location. We should leave OpenRanges closed.
+    // This must be an undefined location. If it has an open range, erase it.
     assert(MI.getOperand(0).isReg() && MI.getOperand(0).getReg() == 0 &&
            "Unexpected non-undef DBG_VALUE encountered");
+    VarLoc VL(MI, LS);
+    OpenRanges.erase(VL);
   }
 }
 
diff --git a/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_terminated.mir b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_terminated.mir
new file mode 100644
index 000000000000..bbe1d4ceda60
--- /dev/null
+++ b/llvm/test/DebugInfo/MIR/X86/livedebugvalues_loop_terminated.mir
@@ -0,0 +1,67 @@
+--- |
+  ; RUN: llc %s -march=x86-64 -run-pass=livedebugvalues -o - | FileCheck %s -implicit-check-not=DBG_VALUE
+
+  ; Check that DBG_VALUE instructions are not propagated into a loop that
+  ; explicitly terminates its location.
+
+  ; CHECK:       ![[VARNO:[0-9]+]] = !DILocalVariable(name: "myVar"
+  ; CHECK-LABEL: bb.0.entry:
+  ; CHECK:       DBG_VALUE $ebx, $noreg, ![[VARNO]], !DIExpression(),
+  ; CHECK-LABEL: bb.2.bb2:
+  ; CHECK:       DBG_VALUE $noreg, $noreg, ![[VARNO]], !DIExpression(),
+
+  define i32 @_Z8bb_to_bb() local_unnamed_addr !dbg !12 {
+  entry:
+    br label %bb1, !dbg !17
+  bb1:
+    br label %bb2, !dbg !17
+  bb2:
+    br label %bb3, !dbg !17
+  bb3:
+    ret i32 0, !dbg !17
+  }
+
+  !llvm.dbg.cu = !{!0}
+  !llvm.module.flags = !{!7, !8, !9, !10}
+  !llvm.ident = !{!11}
+  !0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 10.0.0)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, globals: !3, debugInfoForProfiling: true, nameTableKind: None)
+  !1 = !DIFile(filename: "main.cpp", directory: "F:\test")
+  !2 = !{}
+  !3 = !{!4}
+  !4 = !DIGlobalVariableExpression(var: !5, expr: !DIExpression())
+  !5 = distinct !DIGlobalVariable(name: "start", scope: !0, file: !1, line: 4, type: !6, isLocal: false, isDefinition: true)
+  !6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+  !7 = !{i32 2, !"Dwarf Version", i32 4}
+  !8 = !{i32 2, !"Debug Info Version", i32 3}
+  !9 = !{i32 1, !"wchar_size", i32 2}
+  !10 = !{i32 7, !"PIC Level", i32 2}
+  !11 = !{!"clang version 10.0.0"}
+  !12 = distinct !DISubprogram(name: "bb_to_bb", linkageName: "bb_to_bb", scope: !1, file: !1, line: 6, type: !13, scopeLine: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !15)
+  !13 = !DISubroutineType(types: !14)
+  !14 = !{!6, !6}
+  !15 = !{!16}
+  !16 = !DILocalVariable(name: "myVar", scope: !12, file: !1, line: 7, type: !6)
+  !17 = !DILocation(line: 10, scope: !12)
+
+...
+---
+name: _Z8bb_to_bb
+body:  |
+  bb.0.entry:
+    successors: %bb.1
+    $ebx = MOV32ri 0, debug-location !17
+    DBG_VALUE $ebx, $noreg, !16, !DIExpression(), debug-location !17
+
+  bb.1.bb1:
+    successors: %bb.2
+    $eax = MOV32ri 0, debug-location !17
+
+  bb.2.bb2:
+    successors: %bb.1, %bb.3
+    $ecx = MOV32ri 1, debug-location !17
+    DBG_VALUE $noreg, $noreg, !16, !DIExpression(), debug-location !17
+    JCC_1 %bb.1, 4, implicit killed $eflags
+
+  bb.3.bb3:
+    RETQ $eax, debug-location !17
+...

From 3ee1ec0b9dd6ee2350f39ae8a418bf3ce28d06cf Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Thu, 16 Apr 2020 11:45:02 +0200
Subject: [PATCH 017/216] LangOptions cannot depend on ASTContext, make it not
 use ASTContext directly

Fixes a layering violation introduced in 2ba4e3a4598b165245c581c506a813cd4a7dce33.
---
 clang/include/clang/AST/Expr.h          | 40 +++++--------------------
 clang/include/clang/Basic/LangOptions.h |  6 ++--
 clang/lib/AST/ASTImporter.cpp           | 10 +++----
 clang/lib/AST/Expr.cpp                  | 40 +++++++++++++++++++++++--
 clang/lib/Basic/LangOptions.cpp         |  9 +++---
 clang/lib/CodeGen/CGExprScalar.cpp      |  4 +--
 clang/lib/Sema/SemaExprCXX.cpp          |  2 +-
 clang/lib/Sema/TreeTransform.h          |  4 +--
 8 files changed, 62 insertions(+), 53 deletions(-)

diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index 1fdfe926eb71..fab84f6a6ecd 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -3494,19 +3494,7 @@ class BinaryOperator : public Expr {
   /// allocated for the trailing objects when needed.
   BinaryOperator(const ASTContext &Ctx, Expr *lhs, Expr *rhs, Opcode opc,
                  QualType ResTy, ExprValueKind VK, ExprObjectKind OK,
-                 SourceLocation opLoc, FPOptions FPFeatures)
-      : Expr(BinaryOperatorClass, ResTy, VK, OK) {
-    BinaryOperatorBits.Opc = opc;
-    assert(!isCompoundAssignmentOp() &&
-           "Use CompoundAssignOperator for compound assignments");
-    BinaryOperatorBits.OpLoc = opLoc;
-    SubExprs[LHS] = lhs;
-    SubExprs[RHS] = rhs;
-    BinaryOperatorBits.HasFPFeatures = FPFeatures.requiresTrailingStorage(Ctx);
-    if (BinaryOperatorBits.HasFPFeatures)
-      *getTrailingFPFeatures() = FPFeatures;
-    setDependence(computeDependence(this));
-  }
+                 SourceLocation opLoc, FPOptions FPFeatures);
 
   /// Construct an empty binary operator.
   explicit BinaryOperator(EmptyShell Empty) : Expr(BinaryOperatorClass, Empty) {
@@ -3678,40 +3666,28 @@ class BinaryOperator : public Expr {
 
   // Get the FP features status of this operator. Only meaningful for
   // operations on floating point types.
-  FPOptions getFPFeatures(const ASTContext &C) const {
+  FPOptions getFPFeatures(const LangOptions &LO) const {
     if (BinaryOperatorBits.HasFPFeatures)
       return getStoredFPFeatures();
-    return FPOptions::defaultWithoutTrailingStorage(C);
+    return FPOptions::defaultWithoutTrailingStorage(LO);
   }
 
   // Get the FP contractability status of this operator. Only meaningful for
   // operations on floating point types.
-  bool isFPContractableWithinStatement(const ASTContext &C) const {
-    return getFPFeatures(C).allowFPContractWithinStatement();
+  bool isFPContractableWithinStatement(const LangOptions &LO) const {
+    return getFPFeatures(LO).allowFPContractWithinStatement();
   }
 
   // Get the FENV_ACCESS status of this operator. Only meaningful for
   // operations on floating point types.
-  bool isFEnvAccessOn(const ASTContext &C) const {
-    return getFPFeatures(C).allowFEnvAccess();
+  bool isFEnvAccessOn(const LangOptions &LO) const {
+    return getFPFeatures(LO).allowFEnvAccess();
   }
 
 protected:
   BinaryOperator(const ASTContext &Ctx, Expr *lhs, Expr *rhs, Opcode opc,
                  QualType ResTy, ExprValueKind VK, ExprObjectKind OK,
-                 SourceLocation opLoc, FPOptions FPFeatures, bool dead2)
-      : Expr(CompoundAssignOperatorClass, ResTy, VK, OK) {
-    BinaryOperatorBits.Opc = opc;
-    assert(isCompoundAssignmentOp() &&
-           "Use CompoundAssignOperator for compound assignments");
-    BinaryOperatorBits.OpLoc = opLoc;
-    SubExprs[LHS] = lhs;
-    SubExprs[RHS] = rhs;
-    BinaryOperatorBits.HasFPFeatures = FPFeatures.requiresTrailingStorage(Ctx);
-    if (BinaryOperatorBits.HasFPFeatures)
-      *getTrailingFPFeatures() = FPFeatures;
-    setDependence(computeDependence(this));
-  }
+                 SourceLocation opLoc, FPOptions FPFeatures, bool dead2);
 
   /// Construct an empty BinaryOperator, SC is CompoundAssignOperator.
   BinaryOperator(StmtClass SC, EmptyShell Empty) : Expr(SC, Empty) {
diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h
index c33f8bf8c8ef..76ddd7051fd3 100644
--- a/clang/include/clang/Basic/LangOptions.h
+++ b/clang/include/clang/Basic/LangOptions.h
@@ -27,8 +27,6 @@
 
 namespace clang {
 
-class ASTContext;
-
 /// Bitfields of LangOptions, split out from LangOptions in order to ensure that
 /// this large collection of bitfields is a trivial class type.
 class LangOptionsBase {
@@ -403,11 +401,11 @@ class FPOptions {
 
   /// Return the default value of FPOptions that's used when trailing
   /// storage isn't required.
-  static FPOptions defaultWithoutTrailingStorage(const ASTContext &C);
+  static FPOptions defaultWithoutTrailingStorage(const LangOptions &LO);
 
   /// Does this FPOptions require trailing storage when stored in various
   /// AST nodes, or can it be recreated using `defaultWithoutTrailingStorage`?
-  bool requiresTrailingStorage(const ASTContext &C);
+  bool requiresTrailingStorage(const LangOptions &LO);
 
   bool allowFPContractWithinStatement() const {
     return fp_contract == LangOptions::FPC_On;
diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp
index e303701cf5d3..5cdf1de4c96a 100644
--- a/clang/lib/AST/ASTImporter.cpp
+++ b/clang/lib/AST/ASTImporter.cpp
@@ -6703,10 +6703,10 @@ ExpectedStmt ASTNodeImporter::VisitBinaryOperator(BinaryOperator *E) {
   if (Err)
     return std::move(Err);
 
-  return BinaryOperator::Create(Importer.getToContext(), ToLHS, ToRHS,
-                                E->getOpcode(), ToType, E->getValueKind(),
-                                E->getObjectKind(), ToOperatorLoc,
-                                E->getFPFeatures(Importer.getFromContext()));
+  return BinaryOperator::Create(
+      Importer.getToContext(), ToLHS, ToRHS, E->getOpcode(), ToType,
+      E->getValueKind(), E->getObjectKind(), ToOperatorLoc,
+      E->getFPFeatures(Importer.getFromContext().getLangOpts()));
 }
 
 ExpectedStmt ASTNodeImporter::VisitConditionalOperator(ConditionalOperator *E) {
@@ -6817,7 +6817,7 @@ ASTNodeImporter::VisitCompoundAssignOperator(CompoundAssignOperator *E) {
   return CompoundAssignOperator::Create(
       Importer.getToContext(), ToLHS, ToRHS, E->getOpcode(), ToType,
       E->getValueKind(), E->getObjectKind(), ToOperatorLoc,
-      E->getFPFeatures(Importer.getFromContext()),
+      E->getFPFeatures(Importer.getFromContext().getLangOpts()),
       importChecked(Err, ToComputationLHSType),
       importChecked(Err, ToComputationResultType));
 }
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index a5c634e298de..bc6fadc71609 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -4347,6 +4347,42 @@ ParenListExpr *ParenListExpr::CreateEmpty(const ASTContext &Ctx,
   return new (Mem) ParenListExpr(EmptyShell(), NumExprs);
 }
 
+BinaryOperator::BinaryOperator(const ASTContext &Ctx, Expr *lhs, Expr *rhs,
+                               Opcode opc, QualType ResTy, ExprValueKind VK,
+                               ExprObjectKind OK, SourceLocation opLoc,
+                               FPOptions FPFeatures)
+    : Expr(BinaryOperatorClass, ResTy, VK, OK) {
+  BinaryOperatorBits.Opc = opc;
+  assert(!isCompoundAssignmentOp() &&
+         "Use CompoundAssignOperator for compound assignments");
+  BinaryOperatorBits.OpLoc = opLoc;
+  SubExprs[LHS] = lhs;
+  SubExprs[RHS] = rhs;
+  BinaryOperatorBits.HasFPFeatures =
+      FPFeatures.requiresTrailingStorage(Ctx.getLangOpts());
+  if (BinaryOperatorBits.HasFPFeatures)
+    *getTrailingFPFeatures() = FPFeatures;
+  setDependence(computeDependence(this));
+}
+
+BinaryOperator::BinaryOperator(const ASTContext &Ctx, Expr *lhs, Expr *rhs,
+                               Opcode opc, QualType ResTy, ExprValueKind VK,
+                               ExprObjectKind OK, SourceLocation opLoc,
+                               FPOptions FPFeatures, bool dead2)
+    : Expr(CompoundAssignOperatorClass, ResTy, VK, OK) {
+  BinaryOperatorBits.Opc = opc;
+  assert(isCompoundAssignmentOp() &&
+         "Use CompoundAssignOperator for compound assignments");
+  BinaryOperatorBits.OpLoc = opLoc;
+  SubExprs[LHS] = lhs;
+  SubExprs[RHS] = rhs;
+  BinaryOperatorBits.HasFPFeatures =
+      FPFeatures.requiresTrailingStorage(Ctx.getLangOpts());
+  if (BinaryOperatorBits.HasFPFeatures)
+    *getTrailingFPFeatures() = FPFeatures;
+  setDependence(computeDependence(this));
+}
+
 BinaryOperator *BinaryOperator::CreateEmpty(const ASTContext &C,
                                             bool HasFPFeatures) {
   unsigned Extra = sizeOfTrailingObjects(HasFPFeatures);
@@ -4360,7 +4396,7 @@ BinaryOperator *BinaryOperator::Create(const ASTContext &C, Expr *lhs,
                                        ExprValueKind VK, ExprObjectKind OK,
                                        SourceLocation opLoc,
                                        FPOptions FPFeatures) {
-  bool HasFPFeatures = FPFeatures.requiresTrailingStorage(C);
+  bool HasFPFeatures = FPFeatures.requiresTrailingStorage(C.getLangOpts());
   unsigned Extra = sizeOfTrailingObjects(HasFPFeatures);
   void *Mem =
       C.Allocate(sizeof(BinaryOperator) + Extra, alignof(BinaryOperator));
@@ -4380,7 +4416,7 @@ CompoundAssignOperator *CompoundAssignOperator::Create(
     const ASTContext &C, Expr *lhs, Expr *rhs, Opcode opc, QualType ResTy,
     ExprValueKind VK, ExprObjectKind OK, SourceLocation opLoc,
     FPOptions FPFeatures, QualType CompLHSType, QualType CompResultType) {
-  bool HasFPFeatures = FPFeatures.requiresTrailingStorage(C);
+  bool HasFPFeatures = FPFeatures.requiresTrailingStorage(C.getLangOpts());
   unsigned Extra = sizeOfTrailingObjects(HasFPFeatures);
   void *Mem = C.Allocate(sizeof(CompoundAssignOperator) + Extra,
                          alignof(CompoundAssignOperator));
diff --git a/clang/lib/Basic/LangOptions.cpp b/clang/lib/Basic/LangOptions.cpp
index 6e12bda65a42..a74efdc460bf 100644
--- a/clang/lib/Basic/LangOptions.cpp
+++ b/clang/lib/Basic/LangOptions.cpp
@@ -11,7 +11,6 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Basic/LangOptions.h"
-#include "clang/AST/ASTContext.h"
 
 using namespace clang;
 
@@ -49,11 +48,11 @@ VersionTuple LangOptions::getOpenCLVersionTuple() const {
   return VersionTuple(Ver / 100, (Ver % 100) / 10);
 }
 
-FPOptions FPOptions::defaultWithoutTrailingStorage(const ASTContext &C) {
-  FPOptions result(C.getLangOpts());
+FPOptions FPOptions::defaultWithoutTrailingStorage(const LangOptions &LO) {
+  FPOptions result(LO);
   return result;
 }
 
-bool FPOptions::requiresTrailingStorage(const ASTContext &C) {
-  return getAsOpaqueInt() != defaultWithoutTrailingStorage(C).getAsOpaqueInt();
+bool FPOptions::requiresTrailingStorage(const LangOptions &LO) {
+  return getAsOpaqueInt() != defaultWithoutTrailingStorage(LO).getAsOpaqueInt();
 }
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 75be18e23e2f..97e96941ec2f 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2929,7 +2929,7 @@ BinOpInfo ScalarExprEmitter::EmitBinOps(const BinaryOperator *E) {
   Result.RHS = Visit(E->getRHS());
   Result.Ty  = E->getType();
   Result.Opcode = E->getOpcode();
-  Result.FPFeatures = E->getFPFeatures(CGF.getContext());
+  Result.FPFeatures = E->getFPFeatures(CGF.getLangOpts());
   Result.E = E;
   return Result;
 }
@@ -2949,7 +2949,7 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue(
   OpInfo.RHS = Visit(E->getRHS());
   OpInfo.Ty = E->getComputationResultType();
   OpInfo.Opcode = E->getOpcode();
-  OpInfo.FPFeatures = E->getFPFeatures(CGF.getContext());
+  OpInfo.FPFeatures = E->getFPFeatures(CGF.getLangOpts());
   OpInfo.E = E;
   // Load/convert the LHS.
   LValue LHSLV = EmitCheckedLValue(E->getLHS(), CodeGenFunction::TCK_Store);
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index 1fe1515bed10..0761f02066fe 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -6994,7 +6994,7 @@ ExprResult Sema::ActOnDecltypeExpression(Expr *E) {
       return BinaryOperator::Create(Context, BO->getLHS(), RHS.get(), BO_Comma,
                                     BO->getType(), BO->getValueKind(),
                                     BO->getObjectKind(), BO->getOperatorLoc(),
-                                    BO->getFPFeatures(getASTContext()));
+                                    BO->getFPFeatures(getLangOpts()));
     }
   }
 
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index de7892f64257..e79969e9a87a 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -10267,7 +10267,7 @@ TreeTransform<Derived>::TransformBinaryOperator(BinaryOperator *E) {
     return getDerived().RebuildBinaryOperator(
         E->getOperatorLoc(), E->getOpcode(), LHS.get(), RHS.get());
   Sema::FPFeaturesStateRAII FPFeaturesState(getSema());
-  getSema().FPFeatures = E->getFPFeatures(getSema().getASTContext());
+  getSema().FPFeatures = E->getFPFeatures(getSema().getLangOpts());
 
   return getDerived().RebuildBinaryOperator(E->getOperatorLoc(), E->getOpcode(),
                                             LHS.get(), RHS.get());
@@ -10322,7 +10322,7 @@ ExprResult
 TreeTransform<Derived>::TransformCompoundAssignOperator(
                                                       CompoundAssignOperator *E) {
   Sema::FPFeaturesStateRAII FPFeaturesState(getSema());
-  getSema().FPFeatures = E->getFPFeatures(getSema().getASTContext());
+  getSema().FPFeatures = E->getFPFeatures(getSema().getLangOpts());
   return getDerived().TransformBinaryOperator(E);
 }
 

From d5c26f871b7ee81e7bc6cc17cfddc9d08befe971 Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel@labath.sk>
Date: Thu, 16 Apr 2020 11:57:40 +0200
Subject: [PATCH 018/216] [lldb/unittests] Better error messages when creating
 sockets fails

We get failures in SocketTestUtilities on the pre-merge bots. This
might give us a clue as to what's wrong.
---
 lldb/unittests/Host/SocketTestUtilities.cpp | 22 ++++++++++-----------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/lldb/unittests/Host/SocketTestUtilities.cpp b/lldb/unittests/Host/SocketTestUtilities.cpp
index 858d64f9b4fc..ab883531bdf2 100644
--- a/lldb/unittests/Host/SocketTestUtilities.cpp
+++ b/lldb/unittests/Host/SocketTestUtilities.cpp
@@ -33,10 +33,10 @@ void lldb_private::CreateConnectedSockets(
   Status error;
   std::unique_ptr<SocketType> listen_socket_up(
       new SocketType(true, child_processes_inherit));
-  EXPECT_FALSE(error.Fail());
+  ASSERT_THAT_ERROR(error.ToError(), llvm::Succeeded());
   error = listen_socket_up->Listen(listen_remote_address, 5);
-  EXPECT_FALSE(error.Fail());
-  EXPECT_TRUE(listen_socket_up->IsValid());
+  ASSERT_THAT_ERROR(error.ToError(), llvm::Succeeded());
+  ASSERT_TRUE(listen_socket_up->IsValid());
 
   Status accept_error;
   Socket *accept_socket;
@@ -47,21 +47,19 @@ void lldb_private::CreateConnectedSockets(
   std::string connect_remote_address = get_connect_addr(*listen_socket_up);
   std::unique_ptr<SocketType> connect_socket_up(
       new SocketType(true, child_processes_inherit));
-  EXPECT_FALSE(error.Fail());
+  ASSERT_THAT_ERROR(error.ToError(), llvm::Succeeded());
   error = connect_socket_up->Connect(connect_remote_address);
-  EXPECT_FALSE(error.Fail());
-  EXPECT_TRUE(connect_socket_up->IsValid());
+  ASSERT_THAT_ERROR(error.ToError(), llvm::Succeeded());
+  ASSERT_TRUE(connect_socket_up->IsValid());
 
   a_up->swap(connect_socket_up);
-  EXPECT_TRUE(error.Success());
-  EXPECT_NE(nullptr, a_up->get());
-  EXPECT_TRUE((*a_up)->IsValid());
+  ASSERT_TRUE((*a_up)->IsValid());
 
   accept_thread.join();
   b_up->reset(static_cast<SocketType *>(accept_socket));
-  EXPECT_TRUE(accept_error.Success());
-  EXPECT_NE(nullptr, b_up->get());
-  EXPECT_TRUE((*b_up)->IsValid());
+  ASSERT_THAT_ERROR(accept_error.ToError(), llvm::Succeeded());
+  ASSERT_NE(nullptr, b_up->get());
+  ASSERT_TRUE((*b_up)->IsValid());
 
   listen_socket_up.reset();
 }

From 61b96704564b121210a3b83107f7867c9f2d89b3 Mon Sep 17 00:00:00 2001
From: Kadir Cetinkaya <kadircet@google.com>
Date: Thu, 16 Apr 2020 11:54:48 +0200
Subject: [PATCH 019/216] [clang] Const correct ComputePreambleBounds

---
 clang/include/clang/Frontend/PrecompiledPreamble.h | 2 +-
 clang/lib/Frontend/PrecompiledPreamble.cpp         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/include/clang/Frontend/PrecompiledPreamble.h b/clang/include/clang/Frontend/PrecompiledPreamble.h
index 5ae77735576c..0d95ee683eee 100644
--- a/clang/include/clang/Frontend/PrecompiledPreamble.h
+++ b/clang/include/clang/Frontend/PrecompiledPreamble.h
@@ -38,7 +38,7 @@ class PCHContainerOperations;
 
 /// Runs lexer to compute suggested preamble bounds.
 PreambleBounds ComputePreambleBounds(const LangOptions &LangOpts,
-                                     llvm::MemoryBuffer *Buffer,
+                                     const llvm::MemoryBuffer *Buffer,
                                      unsigned MaxLines);
 
 class PreambleCallbacks;
diff --git a/clang/lib/Frontend/PrecompiledPreamble.cpp b/clang/lib/Frontend/PrecompiledPreamble.cpp
index 3657ccf8ecea..442ad63cee0e 100644
--- a/clang/lib/Frontend/PrecompiledPreamble.cpp
+++ b/clang/lib/Frontend/PrecompiledPreamble.cpp
@@ -228,7 +228,7 @@ template <class T> bool moveOnNoError(llvm::ErrorOr<T> Val, T &Output) {
 } // namespace
 
 PreambleBounds clang::ComputePreambleBounds(const LangOptions &LangOpts,
-                                            llvm::MemoryBuffer *Buffer,
+                                            const llvm::MemoryBuffer *Buffer,
                                             unsigned MaxLines) {
   return Lexer::ComputePreamble(Buffer->getBuffer(), LangOpts, MaxLines);
 }

From 921009e66798f8a40b59d4e7c63545adaead0f54 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Thu, 16 Apr 2020 01:01:11 +0200
Subject: [PATCH 020/216] [MSan] Enable for SystemZ

Summary:
This patch adds runtime support, adjusts tests and enables MSan.

Like for ASan and UBSan, compile the tests with -mbackchain.

Reviewers: eugenis, uweigand, jonpa, vitalybuka

Reviewed By: eugenis, vitalybuka

Subscribers: vitalybuka, mgorny, hiraditya, #sanitizers, stefansf, Andreas-Krebbel

Tags: #sanitizers

Differential Revision: https://reviews.llvm.org/D76358
---
 compiler-rt/cmake/config-ix.cmake           |  2 +-
 compiler-rt/lib/msan/msan.h                 | 14 ++++++++++++++
 compiler-rt/lib/msan/msan_allocator.cpp     | 14 ++++++++++++++
 compiler-rt/test/msan/backtrace.cpp         |  8 +++++++-
 compiler-rt/test/msan/lit.cfg.py            |  3 +++
 compiler-rt/test/msan/mmap.cpp              |  3 +++
 compiler-rt/test/msan/mmap_below_shadow.cpp |  3 +++
 compiler-rt/test/msan/param_tls_limit.cpp   |  6 ++++++
 compiler-rt/test/msan/strlen_of_shadow.cpp  |  2 ++
 9 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/compiler-rt/cmake/config-ix.cmake b/compiler-rt/cmake/config-ix.cmake
index 8261bc9fcdd5..ef62d701dee2 100644
--- a/compiler-rt/cmake/config-ix.cmake
+++ b/compiler-rt/cmake/config-ix.cmake
@@ -305,7 +305,7 @@ if(APPLE)
 else()
   set(ALL_LSAN_SUPPORTED_ARCH ${X86} ${X86_64} ${MIPS64} ${ARM64} ${ARM32} ${PPC64})
 endif()
-set(ALL_MSAN_SUPPORTED_ARCH ${X86_64} ${MIPS64} ${ARM64} ${PPC64})
+set(ALL_MSAN_SUPPORTED_ARCH ${X86_64} ${MIPS64} ${ARM64} ${PPC64} ${S390X})
 set(ALL_HWASAN_SUPPORTED_ARCH ${X86_64} ${ARM64})
 set(ALL_PROFILE_SUPPORTED_ARCH ${X86} ${X86_64} ${ARM32} ${ARM64} ${PPC64}
     ${MIPS32} ${MIPS64} ${S390X} ${SPARC} ${SPARCV9})
diff --git a/compiler-rt/lib/msan/msan.h b/compiler-rt/lib/msan/msan.h
index 12aeaa43519a..e794c7c15f89 100644
--- a/compiler-rt/lib/msan/msan.h
+++ b/compiler-rt/lib/msan/msan.h
@@ -181,6 +181,20 @@ const MappingDesc kMemoryLayout[] = {
 #define MEM_TO_SHADOW(mem) (LINEARIZE_MEM((mem)) + 0x080000000000ULL)
 #define SHADOW_TO_ORIGIN(shadow) (((uptr)(shadow)) + 0x140000000000ULL)
 
+#elif SANITIZER_LINUX && SANITIZER_S390_64
+const MappingDesc kMemoryLayout[] = {
+    {0x000000000000ULL, 0x040000000000ULL, MappingDesc::APP, "low memory"},
+    {0x040000000000ULL, 0x080000000000ULL, MappingDesc::INVALID, "invalid"},
+    {0x080000000000ULL, 0x180000000000ULL, MappingDesc::SHADOW, "shadow"},
+    {0x180000000000ULL, 0x1C0000000000ULL, MappingDesc::INVALID, "invalid"},
+    {0x1C0000000000ULL, 0x2C0000000000ULL, MappingDesc::ORIGIN, "origin"},
+    {0x2C0000000000ULL, 0x440000000000ULL, MappingDesc::INVALID, "invalid"},
+    {0x440000000000ULL, 0x500000000000ULL, MappingDesc::APP, "high memory"}};
+
+#define MEM_TO_SHADOW(mem) \
+  ((((uptr)(mem)) & ~0xC00000000000ULL) + 0x080000000000ULL)
+#define SHADOW_TO_ORIGIN(shadow) (((uptr)(shadow)) + 0x140000000000ULL)
+
 #elif SANITIZER_FREEBSD && SANITIZER_WORDSIZE == 64
 
 // Low memory: main binary, MAP_32BIT mappings and modules
diff --git a/compiler-rt/lib/msan/msan_allocator.cpp b/compiler-rt/lib/msan/msan_allocator.cpp
index a08c1a00d2e5..68be794106b1 100644
--- a/compiler-rt/lib/msan/msan_allocator.cpp
+++ b/compiler-rt/lib/msan/msan_allocator.cpp
@@ -92,6 +92,20 @@ struct AP64 {  // Allocator64 parameters. Deliberately using a short name.
   using AddressSpaceView = LocalAddressSpaceView;
 };
 
+typedef SizeClassAllocator64<AP64> PrimaryAllocator;
+#elif defined(__s390x__)
+static const uptr kMaxAllowedMallocSize = 2UL << 30;  // 2G
+
+struct AP64 {  // Allocator64 parameters. Deliberately using a short name.
+  static const uptr kSpaceBeg = 0x440000000000;
+  static const uptr kSpaceSize = 0x020000000000;  // 2T.
+  static const uptr kMetadataSize = sizeof(Metadata);
+  typedef DefaultSizeClassMap SizeClassMap;
+  typedef MsanMapUnmapCallback MapUnmapCallback;
+  static const uptr kFlags = 0;
+  using AddressSpaceView = LocalAddressSpaceView;
+};
+
 typedef SizeClassAllocator64<AP64> PrimaryAllocator;
 #elif defined(__aarch64__)
 static const uptr kMaxAllowedMallocSize = 2UL << 30;  // 2G
diff --git a/compiler-rt/test/msan/backtrace.cpp b/compiler-rt/test/msan/backtrace.cpp
index cde4e8fc1c9e..7a8e041b3396 100644
--- a/compiler-rt/test/msan/backtrace.cpp
+++ b/compiler-rt/test/msan/backtrace.cpp
@@ -12,8 +12,14 @@ void f() {
   int sz = backtrace(buf, sizeof(buf) / sizeof(*buf));
   assert(sz > 0);
   for (int i = 0; i < sz; ++i)
-    if (!buf[i])
+    if (!buf[i]) {
+#if defined(__s390x__)
+      // backtrace() may return a bogus trailing NULL on s390x.
+      if (i == sz - 1)
+        continue;
+#endif
       exit(1);
+    }
   char **s = backtrace_symbols(buf, sz);
   assert(s != 0);
   for (int i = 0; i < sz; ++i)
diff --git a/compiler-rt/test/msan/lit.cfg.py b/compiler-rt/test/msan/lit.cfg.py
index 0b43d0810d89..8ec1614be130 100644
--- a/compiler-rt/test/msan/lit.cfg.py
+++ b/compiler-rt/test/msan/lit.cfg.py
@@ -18,6 +18,9 @@
 # Some Msan tests leverage backtrace() which requires libexecinfo on FreeBSD.
 if config.host_os == 'FreeBSD':
   clang_msan_cflags += ["-lexecinfo", "-fPIC"]
+# On SystemZ we need -mbackchain to make the fast unwinder work.
+if config.target_arch == 's390x':
+  clang_msan_cflags.append("-mbackchain")
 clang_msan_cxxflags = config.cxx_mode_flags + clang_msan_cflags
 
 # Flags for KMSAN invocation. This is C-only, we're not interested in C++.
diff --git a/compiler-rt/test/msan/mmap.cpp b/compiler-rt/test/msan/mmap.cpp
index d83423735211..2e7e883c863a 100644
--- a/compiler-rt/test/msan/mmap.cpp
+++ b/compiler-rt/test/msan/mmap.cpp
@@ -24,6 +24,9 @@ bool AddrIsApp(void *p) {
          addr >= 0xe200000000ULL;
 #elif defined(__powerpc64__)
   return addr < 0x000100000000ULL || addr >= 0x300000000000ULL;
+#elif defined(__s390x__)
+  return addr < 0x040000000000ULL ||
+         (addr >= 0x440000000000ULL && addr < 0x500000000000);
 #elif defined(__aarch64__)
 
   struct AddrMapping {
diff --git a/compiler-rt/test/msan/mmap_below_shadow.cpp b/compiler-rt/test/msan/mmap_below_shadow.cpp
index 806b19da8ca6..46d948c9a5eb 100644
--- a/compiler-rt/test/msan/mmap_below_shadow.cpp
+++ b/compiler-rt/test/msan/mmap_below_shadow.cpp
@@ -27,6 +27,9 @@ int main(void) {
 #elif defined (__powerpc64__)
   uintptr_t hint = 0x2f0000000000ULL;
   const uintptr_t app_start = 0x300000000000ULL;
+#elif defined(__s390x__)
+  uintptr_t hint = 0x07f000000000ULL;
+  const uintptr_t app_start = 0x020000000000ULL;
 #elif defined (__aarch64__)
   uintptr_t hint = 0x4f0000000ULL;
   const uintptr_t app_start = 0x7000000000ULL;
diff --git a/compiler-rt/test/msan/param_tls_limit.cpp b/compiler-rt/test/msan/param_tls_limit.cpp
index d34376a1f0c4..43e66858e26f 100644
--- a/compiler-rt/test/msan/param_tls_limit.cpp
+++ b/compiler-rt/test/msan/param_tls_limit.cpp
@@ -8,6 +8,12 @@
 // AArch64 fails with:
 // void f801(S<801>): Assertion `__msan_test_shadow(&s, sizeof(s)) == -1' failed
 // XFAIL: aarch64
+// When passing huge structs by value, SystemZ uses pointers, therefore this
+// test in its present form is unfortunately not applicable.
+// ABI says: "A struct or union of any other size <snip>. Replace such an
+// argument by a pointer to the object, or to a copy where necessary to enforce
+// call-by-value semantics."
+// XFAIL: s390x
 
 #include <sanitizer/msan_interface.h>
 #include <assert.h>
diff --git a/compiler-rt/test/msan/strlen_of_shadow.cpp b/compiler-rt/test/msan/strlen_of_shadow.cpp
index 718cc08dc1fd..5e7c89c7b59f 100644
--- a/compiler-rt/test/msan/strlen_of_shadow.cpp
+++ b/compiler-rt/test/msan/strlen_of_shadow.cpp
@@ -21,6 +21,8 @@ const char *mem_to_shadow(const char *p) {
 #define LINEARIZE_MEM(mem) \
   (((uintptr_t)(mem) & ~0x200000000000ULL) ^ 0x100000000000ULL)
   return (char *)(LINEARIZE_MEM(p) + 0x080000000000ULL);
+#elif defined(__s390x__)
+  return (char *)(((uintptr_t)p & ~0xC00000000000ULL) + 0x080000000000ULL);
 #elif defined(__aarch64__)
   return (char *)((uintptr_t)p ^ 0x6000000000ULL);
 #endif

From 03a9526fe5adae909f1d5fd2736703e69fc46e09 Mon Sep 17 00:00:00 2001
From: Ehud Katz <ehudkatz@gmail.com>
Date: Thu, 16 Apr 2020 13:26:23 +0300
Subject: [PATCH 021/216] [CGExprAgg] Fix infinite loop in `findPeephole`

Simplify the function using IgnoreParenNoopCasts.

Fix PR45476

Differential Revision: https://reviews.llvm.org/D78098
---
 clang/lib/CodeGen/CGExprAgg.cpp | 19 ++++++++-----------
 clang/test/CodeGen/pr45476.cpp  | 19 +++++++++++++++++++
 2 files changed, 27 insertions(+), 11 deletions(-)
 create mode 100644 clang/test/CodeGen/pr45476.cpp

diff --git a/clang/lib/CodeGen/CGExprAgg.cpp b/clang/lib/CodeGen/CGExprAgg.cpp
index fa2d228b7eeb..90d4f7e4e096 100644
--- a/clang/lib/CodeGen/CGExprAgg.cpp
+++ b/clang/lib/CodeGen/CGExprAgg.cpp
@@ -677,17 +677,13 @@ AggExprEmitter::VisitCompoundLiteralExpr(CompoundLiteralExpr *E) {
 
 /// Attempt to look through various unimportant expressions to find a
 /// cast of the given kind.
-static Expr *findPeephole(Expr *op, CastKind kind) {
-  while (true) {
-    op = op->IgnoreParens();
-    if (CastExpr *castE = dyn_cast<CastExpr>(op)) {
-      if (castE->getCastKind() == kind)
-        return castE->getSubExpr();
-      if (castE->getCastKind() == CK_NoOp)
-        continue;
-    }
-    return nullptr;
+static Expr *findPeephole(Expr *op, CastKind kind, const ASTContext &ctx) {
+  op = op->IgnoreParenNoopCasts(ctx);
+  if (auto castE = dyn_cast<CastExpr>(op)) {
+    if (castE->getCastKind() == kind)
+      return castE->getSubExpr();
   }
+  return nullptr;
 }
 
 void AggExprEmitter::VisitCastExpr(CastExpr *E) {
@@ -776,7 +772,8 @@ void AggExprEmitter::VisitCastExpr(CastExpr *E) {
       (isToAtomic ? CK_AtomicToNonAtomic : CK_NonAtomicToAtomic);
 
     // These two cases are reverses of each other; try to peephole them.
-    if (Expr *op = findPeephole(E->getSubExpr(), peepholeTarget)) {
+    if (Expr *op =
+            findPeephole(E->getSubExpr(), peepholeTarget, CGF.getContext())) {
       assert(CGF.getContext().hasSameUnqualifiedType(op->getType(),
                                                      E->getType()) &&
            "peephole significantly changed types?");
diff --git a/clang/test/CodeGen/pr45476.cpp b/clang/test/CodeGen/pr45476.cpp
new file mode 100644
index 000000000000..61f3f3649986
--- /dev/null
+++ b/clang/test/CodeGen/pr45476.cpp
@@ -0,0 +1,19 @@
+// RUN: %clang_cc1 -emit-llvm %s -o - | FileCheck %s
+// PR45476
+
+// This test used to get into an infinite loop,
+// which, in turn, caused clang to never finish execution.
+
+struct s3 {
+  char a, b, c;
+};
+
+_Atomic struct s3 a;
+
+extern "C" void foo() {
+  // CHECK-LABEL: @foo
+  // CHECK: store atomic i32
+
+  a = s3{1, 2, 3};
+}
+

From 27e63d9b0eb7db38f41084063f77768df7b95913 Mon Sep 17 00:00:00 2001
From: David Zarzycki <dave@znu.io>
Date: Thu, 16 Apr 2020 06:32:17 -0400
Subject: [PATCH 022/216] Fix -Wdocumentation-html warning

---
 llvm/include/llvm/ADT/STLExtras.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h
index 5895cae84a71..e3b9e405f69a 100644
--- a/llvm/include/llvm/ADT/STLExtras.h
+++ b/llvm/include/llvm/ADT/STLExtras.h
@@ -112,7 +112,7 @@ using is_invocable = is_detected<detail::is_invocable, Callable, Args...>;
 
 /// This class provides various trait information about a callable object.
 ///   * To access the number of arguments: Traits::num_args
-///   * To access the type of an argument: Traits::arg_t<i>
+///   * To access the type of an argument: Traits::arg_t<Index>
 ///   * To access the type of the result:  Traits::result_t
 template <typename T, bool isClass = std::is_class<T>::value>
 struct function_traits : public function_traits<decltype(&T::operator())> {};
@@ -127,8 +127,8 @@ struct function_traits<ReturnType (ClassType::*)(Args...) const, false> {
   using result_t = ReturnType;
 
   /// The type of an argument to this function.
-  template <size_t i>
-  using arg_t = typename std::tuple_element<i, std::tuple<Args...>>::type;
+  template <size_t Index>
+  using arg_t = typename std::tuple_element<Index, std::tuple<Args...>>::type;
 };
 /// Overload for class function types.
 template <typename ClassType, typename ReturnType, typename... Args>

From 94d6dd01ba439ffcef7f7873622cf6ae99bcf5cb Mon Sep 17 00:00:00 2001
From: Haojian Wu <hokein.wu@gmail.com>
Date: Thu, 16 Apr 2020 10:38:54 +0200
Subject: [PATCH 023/216] [AST] Fix an undefine behavior when creating an empty
 recovery expr.

Summary:
We forgot to initialize the NumExpr member in one of the constructors,
which leads crashes in preamble serialization.

Reviewers: sammccall

Subscribers: cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D78284
---
 clang/include/clang/AST/Expr.h       |  3 ++-
 clang/lib/AST/Expr.cpp               |  2 +-
 clang/test/PCH/cxx-recovery-expr.cpp | 13 +++++++++++++
 3 files changed, 16 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/PCH/cxx-recovery-expr.cpp

diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index fab84f6a6ecd..4d89234b0da7 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -6080,7 +6080,8 @@ class RecoveryExpr final : public Expr,
 private:
   RecoveryExpr(ASTContext &Ctx, SourceLocation BeginLoc, SourceLocation EndLoc,
                ArrayRef<Expr *> SubExprs);
-  RecoveryExpr(EmptyShell Empty) : Expr(RecoveryExprClass, Empty) {}
+  RecoveryExpr(EmptyShell Empty, unsigned NumSubExprs)
+      : Expr(RecoveryExprClass, Empty), NumExprs(NumSubExprs) {}
 
   size_t numTrailingObjects(OverloadToken<Stmt *>) const { return NumExprs; }
 
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index bc6fadc71609..f108b49ceac1 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -4652,7 +4652,7 @@ RecoveryExpr *RecoveryExpr::Create(ASTContext &Ctx, SourceLocation BeginLoc,
 RecoveryExpr *RecoveryExpr::CreateEmpty(ASTContext &Ctx, unsigned NumSubExprs) {
   void *Mem = Ctx.Allocate(totalSizeToAlloc<Expr *>(NumSubExprs),
                            alignof(RecoveryExpr));
-  return new (Mem) RecoveryExpr(EmptyShell());
+  return new (Mem) RecoveryExpr(EmptyShell(), NumSubExprs);
 }
 
 void OMPArrayShapingExpr::setDimensions(ArrayRef<Expr *> Dims) {
diff --git a/clang/test/PCH/cxx-recovery-expr.cpp b/clang/test/PCH/cxx-recovery-expr.cpp
new file mode 100644
index 000000000000..e0d58c119c54
--- /dev/null
+++ b/clang/test/PCH/cxx-recovery-expr.cpp
@@ -0,0 +1,13 @@
+// Test with pch.
+// RUN: %clang_cc1 -emit-pch -frecovery-ast -fallow-pch-with-compiler-errors -o %t %s
+// RUN: %clang_cc1 -include-pch %t -fno-validate-pch -emit-llvm -o - %s
+
+#ifndef HEADER
+#define HEADER
+
+int func(int);
+int s = func();
+
+#else
+
+#endif

From 43e2460a89abf6aace35973c682e1723d5f16f10 Mon Sep 17 00:00:00 2001
From: Carl Ritson <carl.ritson@amd.com>
Date: Thu, 16 Apr 2020 19:57:55 +0900
Subject: [PATCH 024/216] [LiveIntervals] Replace handleMoveIntoBundle

Summary:
The current handleMoveIntoBundle implementation is unusable,
it attempts to access the slot indexes of bundled instructions.
It also leaves bundled instructions with slot indexes assigned.

Replace handleMoveIntoBundle this with a more explicit
handleMoveIntoNewBundle function which recalculates the live
intervals for all instructions moved into a newly formed bundle,
and removes slot indexes from these instructions.

Reviewers: arsenm, MaskRay, kariddi, tpr, qcolombet

Reviewed By: qcolombet

Subscribers: MatzeB, wdng, hiraditya, arphaman, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D77969
---
 llvm/include/llvm/CodeGen/LiveIntervals.h |  14 +--
 llvm/include/llvm/CodeGen/SlotIndexes.h   |  12 ++-
 llvm/lib/CodeGen/LiveIntervals.cpp        |  44 +++++++--
 llvm/lib/CodeGen/SlotIndexes.cpp          |   9 +-
 llvm/unittests/MI/LiveIntervalTest.cpp    | 111 ++++++++++++++++++++++
 5 files changed, 169 insertions(+), 21 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/LiveIntervals.h b/llvm/include/llvm/CodeGen/LiveIntervals.h
index 34e88045f6ef..1d89e6f38c72 100644
--- a/llvm/include/llvm/CodeGen/LiveIntervals.h
+++ b/llvm/include/llvm/CodeGen/LiveIntervals.h
@@ -310,16 +310,16 @@ class VirtRegMap;
     /// \param UpdateFlags Update live intervals for nonallocatable physregs.
     void handleMove(MachineInstr &MI, bool UpdateFlags = false);
 
-    /// Update intervals for operands of \p MI so that they begin/end on the
-    /// SlotIndex for \p BundleStart.
+    /// Update intervals of operands of all instructions in the newly
+    /// created bundle specified by \p BundleStart.
     ///
     /// \param UpdateFlags Update live intervals for nonallocatable physregs.
     ///
-    /// Requires MI and BundleStart to have SlotIndexes, and assumes
-    /// existing liveness is accurate. BundleStart should be the first
-    /// instruction in the Bundle.
-    void handleMoveIntoBundle(MachineInstr &MI, MachineInstr &BundleStart,
-                              bool UpdateFlags = false);
+    /// Assumes existing liveness is accurate.
+    /// \pre BundleStart should be the first instruction in the Bundle.
+    /// \pre BundleStart should not have a have SlotIndex as one will be assigned.
+    void handleMoveIntoNewBundle(MachineInstr &BundleStart,
+                                 bool UpdateFlags = false);
 
     /// Update live intervals for instructions in a range of iterators. It is
     /// intended for use after target hooks that may insert or remove
diff --git a/llvm/include/llvm/CodeGen/SlotIndexes.h b/llvm/include/llvm/CodeGen/SlotIndexes.h
index fb833806ca8e..85bd7a404f9b 100644
--- a/llvm/include/llvm/CodeGen/SlotIndexes.h
+++ b/llvm/include/llvm/CodeGen/SlotIndexes.h
@@ -382,13 +382,15 @@ class raw_ostream;
     }
 
     /// Returns the base index for the given instruction.
-    SlotIndex getInstructionIndex(const MachineInstr &MI) const {
+    SlotIndex getInstructionIndex(const MachineInstr &MI,
+                                  bool IgnoreBundle = false) const {
       // Instructions inside a bundle have the same number as the bundle itself.
       auto BundleStart = getBundleStart(MI.getIterator());
       auto BundleEnd = getBundleEnd(MI.getIterator());
       // Use the first non-debug instruction in the bundle to get SlotIndex.
       const MachineInstr &BundleNonDebug =
-          *skipDebugInstructionsForward(BundleStart, BundleEnd);
+          IgnoreBundle ? MI
+                       : *skipDebugInstructionsForward(BundleStart, BundleEnd);
       assert(!BundleNonDebug.isDebugInstr() &&
              "Could not use a debug instruction to query mi2iMap.");
       Mi2IndexMap::const_iterator itr = mi2iMap.find(&BundleNonDebug);
@@ -573,7 +575,11 @@ class raw_ostream;
     /// Removes machine instruction (bundle) \p MI from the mapping.
     /// This should be called before MachineInstr::eraseFromParent() is used to
     /// remove a whole bundle or an unbundled instruction.
-    void removeMachineInstrFromMaps(MachineInstr &MI);
+    /// If \p AllowBundled is set then this can be used on a bundled
+    /// instruction; however, this exists to support handleMoveIntoBundle,
+    /// and in general removeSingleMachineInstrFromMaps should be used instead.
+    void removeMachineInstrFromMaps(MachineInstr &MI,
+                                    bool AllowBundled = false);
 
     /// Removes a single machine instruction \p MI from the mapping.
     /// This should be called before MachineInstr::eraseFromBundle() is used to
diff --git a/llvm/lib/CodeGen/LiveIntervals.cpp b/llvm/lib/CodeGen/LiveIntervals.cpp
index 70f131e406ef..b830c93d43f4 100644
--- a/llvm/lib/CodeGen/LiveIntervals.cpp
+++ b/llvm/lib/CodeGen/LiveIntervals.cpp
@@ -1478,13 +1478,43 @@ void LiveIntervals::handleMove(MachineInstr &MI, bool UpdateFlags) {
   HME.updateAllRanges(&MI);
 }
 
-void LiveIntervals::handleMoveIntoBundle(MachineInstr &MI,
-                                         MachineInstr &BundleStart,
-                                         bool UpdateFlags) {
-  SlotIndex OldIndex = Indexes->getInstructionIndex(MI);
-  SlotIndex NewIndex = Indexes->getInstructionIndex(BundleStart);
-  HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex, UpdateFlags);
-  HME.updateAllRanges(&MI);
+void LiveIntervals::handleMoveIntoNewBundle(MachineInstr &BundleStart,
+                                            bool UpdateFlags) {
+  assert((BundleStart.getOpcode() == TargetOpcode::BUNDLE) &&
+         "Bundle start is not a bundle");
+  SmallVector<SlotIndex, 16> ToProcess;
+  const SlotIndex NewIndex = Indexes->insertMachineInstrInMaps(BundleStart);
+  auto BundleEnd = getBundleEnd(BundleStart.getIterator());
+
+  auto I = BundleStart.getIterator();
+  I++;
+  while (I != BundleEnd) {
+    if (!Indexes->hasIndex(*I))
+      continue;
+    SlotIndex OldIndex = Indexes->getInstructionIndex(*I, true);
+    ToProcess.push_back(OldIndex);
+    Indexes->removeMachineInstrFromMaps(*I, true);
+    I++;
+  }
+  for (SlotIndex OldIndex : ToProcess) {
+    HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex, UpdateFlags);
+    HME.updateAllRanges(&BundleStart);
+  }
+
+  // Fix up dead defs
+  const SlotIndex Index = getInstructionIndex(BundleStart);
+  for (unsigned Idx = 0, E = BundleStart.getNumOperands(); Idx != E; ++Idx) {
+    MachineOperand &MO = BundleStart.getOperand(Idx);
+    if (!MO.isReg())
+      continue;
+    Register Reg = MO.getReg();
+    if (Reg.isVirtual() && hasInterval(Reg) && !MO.isUndef()) {
+      LiveInterval &LI = getInterval(Reg);
+      LiveQueryResult LRQ = LI.Query(Index);
+      if (LRQ.isDeadDef())
+        MO.setIsDead();
+    }
+  }
 }
 
 void LiveIntervals::repairOldRegInRange(const MachineBasicBlock::iterator Begin,
diff --git a/llvm/lib/CodeGen/SlotIndexes.cpp b/llvm/lib/CodeGen/SlotIndexes.cpp
index 6664b58eccf8..d2bfdc663edb 100644
--- a/llvm/lib/CodeGen/SlotIndexes.cpp
+++ b/llvm/lib/CodeGen/SlotIndexes.cpp
@@ -112,9 +112,10 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
   return false;
 }
 
-void SlotIndexes::removeMachineInstrFromMaps(MachineInstr &MI) {
-  assert(!MI.isBundledWithPred() &&
-         "Use removeSingleMachineInstrFromMaps() instread");
+void SlotIndexes::removeMachineInstrFromMaps(MachineInstr &MI,
+                                             bool AllowBundled) {
+  assert((AllowBundled || !MI.isBundledWithPred()) &&
+         "Use removeSingleMachineInstrFromMaps() instead");
   Mi2IndexMap::iterator mi2iItr = mi2iMap.find(&MI);
   if (mi2iItr == mi2iMap.end())
     return;
@@ -141,7 +142,7 @@ void SlotIndexes::removeSingleMachineInstrFromMaps(MachineInstr &MI) {
   // instruction.
   if (MI.isBundledWithSucc()) {
     // Only the first instruction of a bundle should have an index assigned.
-    assert(!MI.isBundledWithPred() && "Should have first bundle isntruction");
+    assert(!MI.isBundledWithPred() && "Should be first bundle instruction");
 
     MachineBasicBlock::instr_iterator Next = std::next(MI.getIterator());
     MachineInstr &NextMI = *Next;
diff --git a/llvm/unittests/MI/LiveIntervalTest.cpp b/llvm/unittests/MI/LiveIntervalTest.cpp
index 835d3f91c66e..f0be9709332b 100644
--- a/llvm/unittests/MI/LiveIntervalTest.cpp
+++ b/llvm/unittests/MI/LiveIntervalTest.cpp
@@ -128,6 +128,27 @@ static void testHandleMove(MachineFunction &MF, LiveIntervals &LIS,
   LIS.handleMove(FromInstr, true);
 }
 
+/**
+ * Move instructions numbered \p From inclusive through instruction number
+ * \p To into a newly formed bundle and update affected liveness intervals
+ * with LiveIntervalAnalysis::handleMoveIntoNewBundle().
+ */
+static void testHandleMoveIntoNewBundle(MachineFunction &MF, LiveIntervals &LIS,
+                                        unsigned From, unsigned To,
+                                        unsigned BlockNum = 0) {
+  MachineInstr &FromInstr = getMI(MF, From, BlockNum);
+  MachineInstr &ToInstr = getMI(MF, To, BlockNum);
+  MachineBasicBlock &MBB = *FromInstr.getParent();
+  MachineBasicBlock::instr_iterator I = FromInstr.getIterator();
+
+  // Build bundle
+  finalizeBundle(MBB, I, std::next(ToInstr.getIterator()));
+
+  // Update LiveIntervals
+  MachineBasicBlock::instr_iterator BundleStart = std::prev(I);
+  LIS.handleMoveIntoNewBundle(*BundleStart, true);
+}
+
 static void liveIntervalTest(StringRef MIRFunc, LiveIntervalTest T) {
   LLVMContext Context;
   std::unique_ptr<LLVMTargetMachine> TM = createTargetMachine();
@@ -462,6 +483,96 @@ TEST(LiveIntervalTest, TestMoveSubRegDefAcrossUseDefMulti) {
      testHandleMove(MF, LIS, 4, 1, 1);
   });
 }
+
+TEST(LiveIntervalTest, BundleUse) {
+  liveIntervalTest(R"MIR(
+    %0 = IMPLICIT_DEF
+    S_NOP 0
+    S_NOP 0, implicit %0
+    S_NOP 0
+)MIR", [](MachineFunction &MF, LiveIntervals &LIS) {
+    testHandleMoveIntoNewBundle(MF, LIS, 1, 2);
+  });
+}
+
+TEST(LiveIntervalTest, BundleDef) {
+  liveIntervalTest(R"MIR(
+    %0 = IMPLICIT_DEF
+    S_NOP 0
+    S_NOP 0, implicit %0
+    S_NOP 0
+)MIR", [](MachineFunction &MF, LiveIntervals &LIS) {
+    testHandleMoveIntoNewBundle(MF, LIS, 0, 1);
+  });
+}
+
+TEST(LiveIntervalTest, BundleRedef) {
+  liveIntervalTest(R"MIR(
+    %0 = IMPLICIT_DEF
+    S_NOP 0
+    %0 = IMPLICIT_DEF implicit %0(tied-def 0)
+    S_NOP 0, implicit %0
+)MIR", [](MachineFunction &MF, LiveIntervals &LIS) {
+    testHandleMoveIntoNewBundle(MF, LIS, 1, 2);
+  });
+}
+
+TEST(LiveIntervalTest, BundleInternalUse) {
+  liveIntervalTest(R"MIR(
+    %0 = IMPLICIT_DEF
+    S_NOP 0
+    S_NOP 0, implicit %0
+    S_NOP 0
+)MIR", [](MachineFunction &MF, LiveIntervals &LIS) {
+    testHandleMoveIntoNewBundle(MF, LIS, 0, 2);
+  });
+}
+
+TEST(LiveIntervalTest, BundleUndefUse) {
+  liveIntervalTest(R"MIR(
+    %0 = IMPLICIT_DEF
+    S_NOP 0
+    S_NOP 0, implicit undef %0
+    S_NOP 0
+)MIR", [](MachineFunction &MF, LiveIntervals &LIS) {
+    testHandleMoveIntoNewBundle(MF, LIS, 1, 2);
+  });
+}
+
+TEST(LiveIntervalTest, BundleSubRegUse) {
+  liveIntervalTest(R"MIR(
+    successors: %bb.1, %bb.2
+    undef %0.sub0 = IMPLICIT_DEF
+    %0.sub1 = IMPLICIT_DEF
+    S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc
+    S_BRANCH %bb.1
+  bb.1:
+    S_NOP 0
+    S_NOP 0, implicit %0.sub1
+  bb.2:
+    S_NOP 0, implicit %0.sub1
+)MIR", [](MachineFunction &MF, LiveIntervals &LIS) {
+    testHandleMoveIntoNewBundle(MF, LIS, 0, 1, 1);
+  });
+}
+
+TEST(LiveIntervalTest, BundleSubRegDef) {
+  liveIntervalTest(R"MIR(
+    successors: %bb.1, %bb.2
+    undef %0.sub0 = IMPLICIT_DEF
+    %0.sub1 = IMPLICIT_DEF
+    S_CBRANCH_VCCNZ %bb.2, implicit undef $vcc
+    S_BRANCH %bb.1
+  bb.1:
+    S_NOP 0
+    S_NOP 0, implicit %0.sub1
+  bb.2:
+    S_NOP 0, implicit %0.sub1
+)MIR", [](MachineFunction &MF, LiveIntervals &LIS) {
+    testHandleMoveIntoNewBundle(MF, LIS, 0, 1, 0);
+  });
+}
+
 int main(int argc, char **argv) {
   ::testing::InitGoogleTest(&argc, argv);
   initLLVM();

From 1a3e89aa2bd26ad05b25635457bad28f46427eeb Mon Sep 17 00:00:00 2001
From: Konstantin Schwarz <konstantin.schwarz@hightec-rt.com>
Date: Tue, 14 Apr 2020 09:24:40 +0200
Subject: [PATCH 025/216] [MIR] Add comments to INLINEASM immediate flag
 MachineOperands

Summary:
The INLINEASM MIR instructions use immediate operands to encode the values of some operands.
The MachineInstr pretty printer function already handles those operands and prints human readable annotations instead of the immediates. This patch adds similar annotations to the output of the MIRPrinter, however uses the new MIROperandComment feature.

Reviewers: SjoerdMeijer, arsenm, efriedma

Reviewed By: arsenm

Subscribers: qcolombet, sdardis, jvesely, wdng, nhaehnle, hiraditya, jrtc27, atanasyan, kerbowa, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D78088
---
 llvm/include/llvm/CodeGen/TargetInstrInfo.h   |  8 +-
 llvm/include/llvm/IR/InlineAsm.h              | 91 +++++++++++++++++++
 llvm/lib/CodeGen/MIRPrinter.cpp               |  2 +-
 llvm/lib/CodeGen/MachineInstr.cpp             | 35 +------
 llvm/lib/CodeGen/TargetInstrInfo.cpp          | 56 ++++++++++++
 llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp      | 15 ++-
 llvm/lib/Target/ARM/ARMBaseInstrInfo.h        |  7 +-
 llvm/test/CodeGen/AArch64/seqpairspill.mir    |  4 +-
 llvm/test/CodeGen/AMDGPU/endpgm-dce.mir       |  4 +-
 .../AMDGPU/rename-independent-subregs.mir     |  2 +-
 ...ssert-dead-def-subreg-use-other-subreg.mir |  2 +-
 ...dleMoveUp-subreg-def-across-subreg-def.mir | 10 +-
 ...ubreg-undef-def-with-other-subreg-defs.mir | 12 +--
 .../AMDGPU/vccz-corrupt-bug-workaround.mir    |  4 +-
 .../ARM/ifcvt-diamond-unanalyzable-common.mir |  4 +-
 .../MIR/X86/early-clobber-register-flag.mir   |  2 +-
 .../CodeGen/MIR/X86/inline-asm-registers.mir  |  4 +-
 .../longbranch/branch-limits-fp-micromips.mir |  8 +-
 .../branch-limits-fp-micromipsr6.mir          |  8 +-
 .../Mips/longbranch/branch-limits-fp-mips.mir |  8 +-
 .../longbranch/branch-limits-fp-mipsr6.mir    |  8 +-
 .../Mips/longbranch/branch-limits-msa.mir     | 40 ++++----
 llvm/test/CodeGen/Thumb2/high-reg-spill.mir   |  2 +-
 .../X86/inline-asm-avx512f-x-constraint.ll    |  2 +-
 .../X86/inline-asm-default-clobbers.ll        |  2 +-
 llvm/test/CodeGen/X86/stack-folding-adx.mir   |  8 +-
 llvm/test/CodeGen/X86/stack-folding-bmi2.mir  |  4 +-
 .../X86/stack-folding-fp-nofpexcept.mir       |  2 +-
 28 files changed, 239 insertions(+), 115 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
index 58ea804d2747..7792738f4ec9 100644
--- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h
@@ -1310,11 +1310,9 @@ class TargetInstrInfo : public MCInstrInfo {
   virtual bool isPredicated(const MachineInstr &MI) const { return false; }
 
   // Returns a MIRPrinter comment for this machine operand.
-  virtual std::string createMIROperandComment(const MachineInstr &MI,
-                                              const MachineOperand &Op,
-                                              unsigned OpIdx) const {
-    return std::string();
-  };
+  virtual std::string
+  createMIROperandComment(const MachineInstr &MI, const MachineOperand &Op,
+                          unsigned OpIdx, const TargetRegisterInfo *TRI) const;
 
   /// Returns true if the instruction is a
   /// terminator instruction that has not been predicated.
diff --git a/llvm/include/llvm/IR/InlineAsm.h b/llvm/include/llvm/IR/InlineAsm.h
index 72d8ad1501ae..b6f377093337 100644
--- a/llvm/include/llvm/IR/InlineAsm.h
+++ b/llvm/include/llvm/IR/InlineAsm.h
@@ -17,6 +17,7 @@
 
 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/Value.h"
+#include "llvm/Support/ErrorHandling.h"
 #include <cassert>
 #include <string>
 #include <vector>
@@ -359,6 +360,96 @@ class InlineAsm final : public Value {
     RC = High - 1;
     return true;
   }
+
+  static std::vector<StringRef> getExtraInfoNames(unsigned ExtraInfo) {
+    std::vector<StringRef> Result;
+    if (ExtraInfo & InlineAsm::Extra_HasSideEffects)
+      Result.push_back("sideeffect");
+    if (ExtraInfo & InlineAsm::Extra_MayLoad)
+      Result.push_back("mayload");
+    if (ExtraInfo & InlineAsm::Extra_MayStore)
+      Result.push_back("maystore");
+    if (ExtraInfo & InlineAsm::Extra_IsConvergent)
+      Result.push_back("isconvergent");
+    if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
+      Result.push_back("alignstack");
+
+    AsmDialect Dialect =
+        InlineAsm::AsmDialect((ExtraInfo & InlineAsm::Extra_AsmDialect));
+
+    if (Dialect == InlineAsm::AD_ATT)
+      Result.push_back("attdialect");
+    if (Dialect == InlineAsm::AD_Intel)
+      Result.push_back("inteldialect");
+
+    return Result;
+  }
+
+  static StringRef getKindName(unsigned Kind) {
+    switch (Kind) {
+    case InlineAsm::Kind_RegUse:
+      return "reguse";
+    case InlineAsm::Kind_RegDef:
+      return "regdef";
+    case InlineAsm::Kind_RegDefEarlyClobber:
+      return "regdef-ec";
+    case InlineAsm::Kind_Clobber:
+      return "clobber";
+    case InlineAsm::Kind_Imm:
+      return "imm";
+    case InlineAsm::Kind_Mem:
+      return "mem";
+    default:
+      llvm_unreachable("Unknown operand kind");
+    }
+  }
+
+  static StringRef getMemConstraintName(unsigned Constraint) {
+    switch (Constraint) {
+    case InlineAsm::Constraint_es:
+      return "es";
+    case InlineAsm::Constraint_i:
+      return "i";
+    case InlineAsm::Constraint_m:
+      return "m";
+    case InlineAsm::Constraint_o:
+      return "o";
+    case InlineAsm::Constraint_v:
+      return "v";
+    case InlineAsm::Constraint_Q:
+      return "Q";
+    case InlineAsm::Constraint_R:
+      return "R";
+    case InlineAsm::Constraint_S:
+      return "S";
+    case InlineAsm::Constraint_T:
+      return "T";
+    case InlineAsm::Constraint_Um:
+      return "Um";
+    case InlineAsm::Constraint_Un:
+      return "Un";
+    case InlineAsm::Constraint_Uq:
+      return "Uq";
+    case InlineAsm::Constraint_Us:
+      return "Us";
+    case InlineAsm::Constraint_Ut:
+      return "Ut";
+    case InlineAsm::Constraint_Uv:
+      return "Uv";
+    case InlineAsm::Constraint_Uy:
+      return "Uy";
+    case InlineAsm::Constraint_X:
+      return "X";
+    case InlineAsm::Constraint_Z:
+      return "Z";
+    case InlineAsm::Constraint_ZC:
+      return "ZC";
+    case InlineAsm::Constraint_Zy:
+      return "Zy";
+    default:
+      llvm_unreachable("Unknown memory constraint");
+    }
+  }
 };
 
 } // end namespace llvm
diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp
index 550448027915..5e01af25bdd8 100644
--- a/llvm/lib/CodeGen/MIRPrinter.cpp
+++ b/llvm/lib/CodeGen/MIRPrinter.cpp
@@ -860,7 +860,7 @@ void MIPrinter::print(const MachineInstr &MI, unsigned OpIdx,
                       bool ShouldPrintRegisterTies, LLT TypeToPrint,
                       bool PrintDef) {
   const MachineOperand &Op = MI.getOperand(OpIdx);
-  std::string MOComment = TII->createMIROperandComment(MI, Op, OpIdx);
+  std::string MOComment = TII->createMIROperandComment(MI, Op, OpIdx, TRI);
 
   switch (Op.getType()) {
   case MachineOperand::MO_Immediate:
diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp
index 542dc220ad30..8ee85c6229b6 100644
--- a/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/llvm/lib/CodeGen/MachineInstr.cpp
@@ -1669,15 +1669,8 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
       // Pretty print the inline asm operand descriptor.
       OS << '$' << AsmOpCount++;
       unsigned Flag = MO.getImm();
-      switch (InlineAsm::getKind(Flag)) {
-      case InlineAsm::Kind_RegUse:             OS << ":[reguse"; break;
-      case InlineAsm::Kind_RegDef:             OS << ":[regdef"; break;
-      case InlineAsm::Kind_RegDefEarlyClobber: OS << ":[regdef-ec"; break;
-      case InlineAsm::Kind_Clobber:            OS << ":[clobber"; break;
-      case InlineAsm::Kind_Imm:                OS << ":[imm"; break;
-      case InlineAsm::Kind_Mem:                OS << ":[mem"; break;
-      default: OS << ":[??" << InlineAsm::getKind(Flag); break;
-      }
+      OS << ":[";
+      OS << InlineAsm::getKindName(InlineAsm::getKind(Flag));
 
       unsigned RCID = 0;
       if (!InlineAsm::isImmKind(Flag) && !InlineAsm::isMemKind(Flag) &&
@@ -1690,29 +1683,7 @@ void MachineInstr::print(raw_ostream &OS, ModuleSlotTracker &MST,
 
       if (InlineAsm::isMemKind(Flag)) {
         unsigned MCID = InlineAsm::getMemoryConstraintID(Flag);
-        switch (MCID) {
-        case InlineAsm::Constraint_es: OS << ":es"; break;
-        case InlineAsm::Constraint_i:  OS << ":i"; break;
-        case InlineAsm::Constraint_m:  OS << ":m"; break;
-        case InlineAsm::Constraint_o:  OS << ":o"; break;
-        case InlineAsm::Constraint_v:  OS << ":v"; break;
-        case InlineAsm::Constraint_Q:  OS << ":Q"; break;
-        case InlineAsm::Constraint_R:  OS << ":R"; break;
-        case InlineAsm::Constraint_S:  OS << ":S"; break;
-        case InlineAsm::Constraint_T:  OS << ":T"; break;
-        case InlineAsm::Constraint_Um: OS << ":Um"; break;
-        case InlineAsm::Constraint_Un: OS << ":Un"; break;
-        case InlineAsm::Constraint_Uq: OS << ":Uq"; break;
-        case InlineAsm::Constraint_Us: OS << ":Us"; break;
-        case InlineAsm::Constraint_Ut: OS << ":Ut"; break;
-        case InlineAsm::Constraint_Uv: OS << ":Uv"; break;
-        case InlineAsm::Constraint_Uy: OS << ":Uy"; break;
-        case InlineAsm::Constraint_X:  OS << ":X"; break;
-        case InlineAsm::Constraint_Z:  OS << ":Z"; break;
-        case InlineAsm::Constraint_ZC: OS << ":ZC"; break;
-        case InlineAsm::Constraint_Zy: OS << ":Zy"; break;
-        default: OS << ":?"; break;
-        }
+        OS << ":" << InlineAsm::getMemConstraintName(MCID);
       }
 
       unsigned TiedTo = 0;
diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp
index 2e515094cf6c..0c91cc166f57 100644
--- a/llvm/lib/CodeGen/TargetInstrInfo.cpp
+++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp
@@ -1322,4 +1322,60 @@ bool TargetInstrInfo::getInsertSubregInputs(
   return true;
 }
 
+// Returns a MIRPrinter comment for this machine operand.
+std::string TargetInstrInfo::createMIROperandComment(
+    const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx,
+    const TargetRegisterInfo *TRI) const {
+
+  if (!MI.isInlineAsm())
+    return "";
+
+  std::string Flags;
+  raw_string_ostream OS(Flags);
+
+  if (OpIdx == InlineAsm::MIOp_ExtraInfo) {
+    // Print HasSideEffects, MayLoad, MayStore, IsAlignStack
+    unsigned ExtraInfo = Op.getImm();
+    bool First = true;
+    for (StringRef Info : InlineAsm::getExtraInfoNames(ExtraInfo)) {
+      if (!First)
+        OS << " ";
+      First = false;
+      OS << Info;
+    }
+
+    return OS.str();
+  }
+
+  int FlagIdx = MI.findInlineAsmFlagIdx(OpIdx);
+  if (FlagIdx < 0 || (unsigned)FlagIdx != OpIdx)
+    return "";
+
+  assert(Op.isImm() && "Expected flag operand to be an immediate");
+  // Pretty print the inline asm operand descriptor.
+  unsigned Flag = Op.getImm();
+  unsigned Kind = InlineAsm::getKind(Flag);
+  OS << InlineAsm::getKindName(Kind);
+
+  unsigned RCID = 0;
+  if (!InlineAsm::isImmKind(Flag) && !InlineAsm::isMemKind(Flag) &&
+      InlineAsm::hasRegClassConstraint(Flag, RCID)) {
+    if (TRI) {
+      OS << ':' << TRI->getRegClassName(TRI->getRegClass(RCID));
+    } else
+      OS << ":RC" << RCID;
+  }
+
+  if (InlineAsm::isMemKind(Flag)) {
+    unsigned MCID = InlineAsm::getMemoryConstraintID(Flag);
+    OS << ":" << InlineAsm::getMemConstraintName(MCID);
+  }
+
+  unsigned TiedTo = 0;
+  if (InlineAsm::isUseOperandTiedToDef(Flag, TiedTo))
+    OS << " tiedto:$" << TiedTo;
+
+  return OS.str();
+}
+
 TargetInstrInfo::PipelinerLoopInfo::~PipelinerLoopInfo() {}
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
index fc4e7182bf11..63bea53a72fa 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -495,10 +495,17 @@ bool ARMBaseInstrInfo::isPredicated(const MachineInstr &MI) const {
   return PIdx != -1 && MI.getOperand(PIdx).getImm() != ARMCC::AL;
 }
 
-std::string ARMBaseInstrInfo::createMIROperandComment(const MachineInstr &MI,
-                                                      const MachineOperand &Op,
-                                                      unsigned OpIdx) const {
-  // Only support immediates for now.
+std::string ARMBaseInstrInfo::createMIROperandComment(
+    const MachineInstr &MI, const MachineOperand &Op, unsigned OpIdx,
+    const TargetRegisterInfo *TRI) const {
+
+  // First, let's see if there is a generic comment for this operand
+  std::string GenericComment =
+      TargetInstrInfo::createMIROperandComment(MI, Op, OpIdx, TRI);
+  if (!GenericComment.empty())
+    return GenericComment;
+
+  // If not, check if we have an immediate operand.
   if (Op.getType() != MachineOperand::MO_Immediate)
     return std::string();
 
diff --git a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
index 9f23483e595c..173b57c62a2f 100644
--- a/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/llvm/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -152,9 +152,10 @@ class ARMBaseInstrInfo : public ARMGenInstrInfo {
   bool isPredicated(const MachineInstr &MI) const override;
 
   // MIR printer helper function to annotate Operands with a comment.
-  std::string createMIROperandComment(const MachineInstr &MI,
-                                      const MachineOperand &Op,
-                                      unsigned OpIdx) const override;
+  std::string
+  createMIROperandComment(const MachineInstr &MI, const MachineOperand &Op,
+                          unsigned OpIdx,
+                          const TargetRegisterInfo *TRI) const override;
 
   ARMCC::CondCodes getPredicate(const MachineInstr &MI) const {
     int PIdx = MI.findFirstPredOperandIdx();
diff --git a/llvm/test/CodeGen/AArch64/seqpairspill.mir b/llvm/test/CodeGen/AArch64/seqpairspill.mir
index fdcb3dc61181..12748378e678 100644
--- a/llvm/test/CodeGen/AArch64/seqpairspill.mir
+++ b/llvm/test/CodeGen/AArch64/seqpairspill.mir
@@ -16,7 +16,7 @@ body: |
     %1 : xseqpairsclass = IMPLICIT_DEF
     %2 : gpr64common = IMPLICIT_DEF
     %0 = CASPALX %0, %1, %2
-    INLINEASM &" ", 0, 0, implicit def dead $x0, implicit def dead $x1, implicit def dead $x2, implicit def dead $x3, implicit def dead $x4, implicit def dead $x5, implicit def dead $x6, implicit def dead $x7, implicit def dead $x8, implicit def dead $x9, implicit def dead $x10, implicit def dead $x11, implicit def dead $x12, implicit def dead $x13, implicit def dead $x14, implicit def dead $x15, implicit def dead $x16, implicit def dead $x17, implicit def dead $x18, implicit def dead $x19, implicit def dead $x20, implicit def dead $x21, implicit def dead $x22, implicit def dead $x23, implicit def dead $x24, implicit def dead $x25, implicit def dead $x26, implicit def dead $x27, implicit def dead $x28, implicit def dead $fp, implicit def dead $lr
+    INLINEASM &" ", 0, 12, implicit def dead $x0, implicit def dead $x1, implicit def dead $x2, implicit def dead $x3, implicit def dead $x4, implicit def dead $x5, implicit def dead $x6, implicit def dead $x7, implicit def dead $x8, implicit def dead $x9, implicit def dead $x10, implicit def dead $x11, implicit def dead $x12, implicit def dead $x13, implicit def dead $x14, implicit def dead $x15, implicit def dead $x16, implicit def dead $x17, implicit def dead $x18, implicit def dead $x19, implicit def dead $x20, implicit def dead $x21, implicit def dead $x22, implicit def dead $x23, implicit def dead $x24, implicit def dead $x25, implicit def dead $x26, implicit def dead $x27, implicit def dead $x28, implicit def dead $fp, implicit def dead $lr
     $xzr = COPY %0.sube64
     $xzr = COPY %0.subo64
 ...
@@ -36,7 +36,7 @@ body: |
     %1 : wseqpairsclass = IMPLICIT_DEF
     %2 : gpr64common = IMPLICIT_DEF
     %0 = CASPALW %0, %1, %2
-    INLINEASM &" ", 0, 0, implicit def dead $x0, implicit def dead $x1, implicit def dead $x2, implicit def dead $x3, implicit def dead $x4, implicit def dead $x5, implicit def dead $x6, implicit def dead $x7, implicit def dead $x8, implicit def dead $x9, implicit def dead $x10, implicit def dead $x11, implicit def dead $x12, implicit def dead $x13, implicit def dead $x14, implicit def dead $x15, implicit def dead $x16, implicit def dead $x17, implicit def dead $x18, implicit def dead $x19, implicit def dead $x20, implicit def dead $x21, implicit def dead $x22, implicit def dead $x23, implicit def dead $x24, implicit def dead $x25, implicit def dead $x26, implicit def dead $x27, implicit def dead $x28, implicit def dead $fp, implicit def dead $lr
+    INLINEASM &" ", 0, 12, implicit def dead $x0, implicit def dead $x1, implicit def dead $x2, implicit def dead $x3, implicit def dead $x4, implicit def dead $x5, implicit def dead $x6, implicit def dead $x7, implicit def dead $x8, implicit def dead $x9, implicit def dead $x10, implicit def dead $x11, implicit def dead $x12, implicit def dead $x13, implicit def dead $x14, implicit def dead $x15, implicit def dead $x16, implicit def dead $x17, implicit def dead $x18, implicit def dead $x19, implicit def dead $x20, implicit def dead $x21, implicit def dead $x22, implicit def dead $x23, implicit def dead $x24, implicit def dead $x25, implicit def dead $x26, implicit def dead $x27, implicit def dead $x28, implicit def dead $fp, implicit def dead $lr
     $xzr = COPY %0.sube32
     $xzr = COPY %0.subo32
 ...
diff --git a/llvm/test/CodeGen/AMDGPU/endpgm-dce.mir b/llvm/test/CodeGen/AMDGPU/endpgm-dce.mir
index 7733b0487cf7..baa54b492f61 100644
--- a/llvm/test/CodeGen/AMDGPU/endpgm-dce.mir
+++ b/llvm/test/CodeGen/AMDGPU/endpgm-dce.mir
@@ -331,7 +331,7 @@ body:             |
     %1 = IMPLICIT_DEF
     $sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc
     %2:sreg_64 = IMPLICIT_DEF
-    INLINEASM &"", 0, 0
+    INLINEASM &"", 0
     S_ENDPGM 0
 ...
 
@@ -353,6 +353,6 @@ body:             |
     %1 = IMPLICIT_DEF
     $sgpr0_sgpr1 = S_OR_B64 $exec, killed $vcc, implicit-def $scc
     %2:sreg_64 = IMPLICIT_DEF
-    INLINEASM &"", 1, 0
+    INLINEASM &"", 1
     S_ENDPGM 0
 ...
diff --git a/llvm/test/CodeGen/AMDGPU/rename-independent-subregs.mir b/llvm/test/CodeGen/AMDGPU/rename-independent-subregs.mir
index 789b15556455..134b2a0fb589 100644
--- a/llvm/test/CodeGen/AMDGPU/rename-independent-subregs.mir
+++ b/llvm/test/CodeGen/AMDGPU/rename-independent-subregs.mir
@@ -73,7 +73,7 @@ body: |
 # (1) %0.sub0 + %0.sub0 and (2) %0.sub1 + %0.sub1
 # Check that renaming (2) does not inadvertently rename (1).
 # CHECK-LABEL: name: test2
-# CHECK: INLINEASM &"", 32, 327690, def undef %0.sub0, 327690, def dead %1.sub1, 2147483657, undef %0.sub0(tied-def 3), 2147549193, %1.sub1(tied-def 5)
+# CHECK: INLINEASM &"", 32 /* isconvergent attdialect */, 327690 /* regdef:SReg_1_XEXEC_with_sub0 */, def undef %0.sub0, 327690 /* regdef:SReg_1_XEXEC_with_sub0 */, def dead %1.sub1, 2147483657 /* reguse tiedto:$0 */, undef %0.sub0(tied-def 3), 2147549193 /* reguse tiedto:$1 */, %1.sub1(tied-def 5)
 name: test2
 body: |
   bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir b/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir
index e12cff942bc6..53c4544c0bf9 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir
@@ -33,7 +33,7 @@ body:             |
   ; CHECK:   dead %9:vreg_128 = DS_READ_B128_gfx9 [[V_ADD_U32_e32_]], 0, 0, implicit $exec
   ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
   ; CHECK:   undef %11.sub1:vreg_512 = COPY [[COPY]].sub1
-  ; CHECK:   INLINEASM &"", 1, 851978, def dead [[COPY1]], 851978, def dead [[COPY]].sub1, 2147483657, [[COPY1]], 2147549193, [[COPY]].sub1
+  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VRegOrLds_32 */, def dead [[COPY1]], 851978 /* regdef:VRegOrLds_32 */, def dead [[COPY]].sub1, 2147483657 /* reguse tiedto:$0 */, [[COPY1]], 2147549193 /* reguse tiedto:$1 */, [[COPY]].sub1
   ; CHECK:   %11.sub0:vreg_512 = COPY [[COPY]].sub0
   ; CHECK:   %11.sub3:vreg_512 = COPY [[COPY]].sub3
   ; CHECK:   dead %10:vgpr_32 = V_ADD_I32_e32 4, [[V_MOV_B32_e32_1]], implicit-def dead $vcc, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir b/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir
index bd0423c5457c..f43289ffee2a 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir
@@ -36,18 +36,18 @@ body:             |
   ; CHECK:   [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
   ; CHECK: bb.1:
   ; CHECK:   successors: %bb.1(0x80000000)
-  ; CHECK:   INLINEASM &"", 1, 851978, def dead %11
+  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VRegOrLds_32 */, def dead %11
   ; CHECK:   GLOBAL_STORE_DWORD undef %12:vreg_64, [[BUFFER_LOAD_DWORD_OFFEN]], 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
   ; CHECK:   [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
   ; CHECK:   [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
   ; CHECK:   [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 undef %14:vgpr_32, 0, 0, implicit $exec :: (load 8, addrspace 3)
-  ; CHECK:   INLINEASM &"def $0 $1", 1, 851978, def %15, 851978, def %16
+  ; CHECK:   INLINEASM &"def $0 $1", 1 /* sideeffect attdialect */, 851978 /* regdef:VRegOrLds_32 */, def %15, 851978 /* regdef:VRegOrLds_32 */, def %16
   ; CHECK:   [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec
   ; CHECK:   [[DS_READ_B32_gfx9_1:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_1]], 0, 0, implicit $exec
   ; CHECK:   [[DS_READ_B32_gfx9_2:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 undef %20:vgpr_32, 0, 0, implicit $exec
-  ; CHECK:   INLINEASM &"def $0 $1", 1, 851978, def %21, 851978, def %22
+  ; CHECK:   INLINEASM &"def $0 $1", 1 /* sideeffect attdialect */, 851978 /* regdef:VRegOrLds_32 */, def %21, 851978 /* regdef:VRegOrLds_32 */, def %22
   ; CHECK:   [[DS_READ_B32_gfx9_3:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_1]], 0, 0, implicit $exec
-  ; CHECK:   INLINEASM &"", 1, 851978, def dead [[V_MOV_B32_e32_2]], 851978, def dead [[V_MOV_B32_e32_3]], 851977, [[DS_READ_B64_gfx9_]].sub0, 2147483657, [[V_MOV_B32_e32_2]](tied-def 3), 2147549193, [[V_MOV_B32_e32_3]](tied-def 5), 851977, %15, 851977, %16, 851977, [[DS_READ_B32_gfx9_1]], 851977, [[DS_READ_B32_gfx9_]], 851977, [[DS_READ_B32_gfx9_3]], 851977, [[DS_READ_B32_gfx9_2]]
+  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VRegOrLds_32 */, def dead [[V_MOV_B32_e32_2]], 851978 /* regdef:VRegOrLds_32 */, def dead [[V_MOV_B32_e32_3]], 851977 /* reguse:VRegOrLds_32 */, [[DS_READ_B64_gfx9_]].sub0, 2147483657 /* reguse tiedto:$0 */, [[V_MOV_B32_e32_2]](tied-def 3), 2147549193 /* reguse tiedto:$1 */, [[V_MOV_B32_e32_3]](tied-def 5), 851977 /* reguse:VRegOrLds_32 */, %15, 851977 /* reguse:VRegOrLds_32 */, %16, 851977 /* reguse:VRegOrLds_32 */, [[DS_READ_B32_gfx9_1]], 851977 /* reguse:VRegOrLds_32 */, [[DS_READ_B32_gfx9_]], 851977 /* reguse:VRegOrLds_32 */, [[DS_READ_B32_gfx9_3]], 851977 /* reguse:VRegOrLds_32 */, [[DS_READ_B32_gfx9_2]]
   ; CHECK:   %5.sub1:vreg_64 = COPY [[V_MOV_B32_e32_]]
   ; CHECK:   DS_WRITE_B32_gfx9 undef %28:vgpr_32, %21, 0, 0, implicit $exec :: (store 4, addrspace 3)
   ; CHECK:   DS_WRITE_B32_gfx9 undef %29:vgpr_32, %22, 0, 0, implicit $exec :: (store 4, addrspace 3)
@@ -69,7 +69,7 @@ body:             |
   ; CHECK:   undef %42.sub0:sgpr_64 = V_READFIRSTLANE_B32 %38.sub0, implicit $exec
   ; CHECK:   %42.sub1:sgpr_64 = V_READFIRSTLANE_B32 %40.sub1, implicit $exec
   ; CHECK:   [[S_LOAD_DWORD_IMM:%[0-9]+]]:sreg_32_xm0_xexec = S_LOAD_DWORD_IMM %42, 0, 0, 0 :: (load 4, addrspace 1)
-  ; CHECK:   INLINEASM &"", 1
+  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */
   ; CHECK:   [[DS_READ_B32_gfx9_4:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 undef %45:vgpr_32, 0, 0, implicit $exec :: (load 4, addrspace 3)
   ; CHECK:   GLOBAL_STORE_DWORD undef %46:vreg_64, [[DS_READ_B32_gfx9_4]], 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
   ; CHECK:   %31.sub0:vreg_64 = COPY [[S_LOAD_DWORD_IMM]], implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/subreg-undef-def-with-other-subreg-defs.mir b/llvm/test/CodeGen/AMDGPU/subreg-undef-def-with-other-subreg-defs.mir
index 3a574977e4d8..522f9a0385c6 100644
--- a/llvm/test/CodeGen/AMDGPU/subreg-undef-def-with-other-subreg-defs.mir
+++ b/llvm/test/CodeGen/AMDGPU/subreg-undef-def-with-other-subreg-defs.mir
@@ -25,9 +25,9 @@ body:             |
   ; CHECK: bb.1:
   ; CHECK:   successors: %bb.1(0x80000000)
   ; CHECK:   [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load 4, addrspace 3)
-  ; CHECK:   INLINEASM &"", 1, 851978, def %0, 2147549193, %0(tied-def 3)
-  ; CHECK:   INLINEASM &"", 1, 851977, [[DS_READ_B32_gfx9_]]
-  ; CHECK:   INLINEASM &"", 1, 851978, def undef %0.sub0, 851978, def undef %0.sub1
+  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VRegOrLds_32 */, def %0, 2147549193 /* reguse tiedto:$1 */, %0(tied-def 3)
+  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851977 /* reguse:VRegOrLds_32 */, [[DS_READ_B32_gfx9_]]
+  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VRegOrLds_32 */, def undef %0.sub0, 851978 /* regdef:VRegOrLds_32 */, def undef %0.sub1
   ; CHECK:   S_NOP 0, implicit %0.sub1
   ; CHECK:   $sgpr10 = S_MOV_B32 -1
   ; CHECK:   S_BRANCH %bb.1
@@ -63,9 +63,9 @@ body:             |
   ; CHECK: bb.1:
   ; CHECK:   successors: %bb.1(0x80000000)
   ; CHECK:   [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load 4, addrspace 3)
-  ; CHECK:   INLINEASM &"", 1, 851978, def %0, 2147549193, %0(tied-def 3)
-  ; CHECK:   INLINEASM &"", 1, 851977, [[DS_READ_B32_gfx9_]]
-  ; CHECK:   INLINEASM &"", 1, 851978, def undef %0.sub1, 851978, def undef %0.sub0
+  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VRegOrLds_32 */, def %0, 2147549193 /* reguse tiedto:$1 */, %0(tied-def 3)
+  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851977 /* reguse:VRegOrLds_32 */, [[DS_READ_B32_gfx9_]]
+  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VRegOrLds_32 */, def undef %0.sub1, 851978 /* regdef:VRegOrLds_32 */, def undef %0.sub0
   ; CHECK:   S_NOP 0, implicit %0.sub1
   ; CHECK:   $sgpr10 = S_MOV_B32 -1
   ; CHECK:   S_BRANCH %bb.1
diff --git a/llvm/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir b/llvm/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir
index 686ff6f3b0c9..20c21aae6c75 100644
--- a/llvm/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir
+++ b/llvm/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir
@@ -134,7 +134,7 @@ body: |
 # instructions to fix vccz.
 
 # CHECK-LABEL: name: inlineasm_def_vcc_lo
-# CHECK: INLINEASM &"; def vcc_lo", 1, 10, implicit-def $vcc_lo
+# CHECK: INLINEASM &"; def vcc_lo", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $vcc_lo
 # SI:    $vcc = S_MOV_B64 $vcc
 # GFX9:  $vcc = S_MOV_B64 $vcc
 # CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
@@ -152,7 +152,7 @@ body: |
 # inserted to fix vccz.
 
 # CHECK-LABEL: name: inlineasm_def_vcc
-# CHECK: INLINEASM &"; def vcc", 1, 10, implicit-def $vcc
+# CHECK: INLINEASM &"; def vcc", 1 /* sideeffect attdialect */, 10 /* regdef */, implicit-def $vcc
 # CHECK-NEXT: S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
 
 name: inlineasm_def_vcc
diff --git a/llvm/test/CodeGen/ARM/ifcvt-diamond-unanalyzable-common.mir b/llvm/test/CodeGen/ARM/ifcvt-diamond-unanalyzable-common.mir
index 466cf6a9862c..227544961b2a 100644
--- a/llvm/test/CodeGen/ARM/ifcvt-diamond-unanalyzable-common.mir
+++ b/llvm/test/CodeGen/ARM/ifcvt-diamond-unanalyzable-common.mir
@@ -26,10 +26,10 @@ body:             |
   ; CHECK:   $r0 = t2MOVi 2, 1 /* CC::ne */, $cpsr, $noreg
   ; CHECK:   $r0 = t2MOVi 3, 0 /* CC::eq */, killed $cpsr, $noreg, implicit killed $r0
   ; CHECK:   tBL 14 /* CC::al */, $noreg, @fn2, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit killed $r1, implicit-def $sp, implicit-def dead $r0
-  ; CHECK:   INLINEASM_BR &"", 9, 13, 0, 13, blockaddress(@fn1, %ir-block.l_yes)
+  ; CHECK:   INLINEASM_BR &"", 9 /* sideeffect mayload attdialect */, 13 /* imm */, 0, 13 /* imm */, blockaddress(@fn1, %ir-block.l_yes)
   ; CHECK:   t2B %bb.1, 14 /* CC::al */, $noreg
   ; CHECK: bb.1:
-  ; CHECK:   INLINEASM &"", 1
+  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */
   ; CHECK:   $sp = t2LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r4, def $pc
   ; CHECK: bb.2.l_yes (address-taken):
   ; CHECK:   $sp = t2LDMIA_RET $sp, 14 /* CC::al */, $noreg, def $r4, def $pc
diff --git a/llvm/test/CodeGen/MIR/X86/early-clobber-register-flag.mir b/llvm/test/CodeGen/MIR/X86/early-clobber-register-flag.mir
index 3829489e2a9a..87ea82623ee9 100644
--- a/llvm/test/CodeGen/MIR/X86/early-clobber-register-flag.mir
+++ b/llvm/test/CodeGen/MIR/X86/early-clobber-register-flag.mir
@@ -35,7 +35,7 @@ body: |
     CFI_INSTRUCTION def_cfa_offset 16
     $ecx = COPY $edi
     $ecx = ADD32rr killed $ecx, killed $esi, implicit-def dead $eflags
-  ; CHECK: INLINEASM &nop, 1, 12, implicit-def dead early-clobber $ax, 12, implicit-def dead early-clobber $di
+  ; CHECK: INLINEASM &nop, 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $ax, 12 /* clobber */, implicit-def dead early-clobber $di
     INLINEASM &nop, 1, 12, implicit-def dead early-clobber $ax, 12, implicit-def dead early-clobber $di
     $edi = COPY killed $ecx
     CALL64pcrel32 @foo, csr_64, implicit $rsp, implicit $edi, implicit-def $rsp
diff --git a/llvm/test/CodeGen/MIR/X86/inline-asm-registers.mir b/llvm/test/CodeGen/MIR/X86/inline-asm-registers.mir
index 3403ac867379..fd1e7aaa3209 100644
--- a/llvm/test/CodeGen/MIR/X86/inline-asm-registers.mir
+++ b/llvm/test/CodeGen/MIR/X86/inline-asm-registers.mir
@@ -28,7 +28,7 @@ body: |
     liveins: $rdi, $rsi
 
   ; CHECK-LABEL: name: test
-  ; CHECK: INLINEASM &foo, 0, 2818058, def $rsi, 2818058, def dead $rdi,
+  ; CHECK: INLINEASM &foo, 0 /* attdialect */, 2818058 /* regdef:GR32_TC */, def $rsi, 2818058 /* regdef:GR32_TC */, def dead $rdi,
     INLINEASM &foo, 0, 2818058, def $rsi, 2818058, def dead $rdi, 2147549193, killed $rdi, 2147483657, killed $rsi, 12, implicit-def dead early-clobber $eflags
     $rax = MOV64rr killed $rsi
     RETQ killed $rax
@@ -45,7 +45,7 @@ body: |
 
   ; Verify that the register ties are preserved.
   ; CHECK-LABEL: name: test2
-  ; CHECK: INLINEASM &foo, 0, 2818058, def $rsi, 2818058, def dead $rdi, 2147549193, killed $rdi(tied-def 5), 2147483657, killed $rsi(tied-def 3), 12, implicit-def dead early-clobber $eflags
+  ; CHECK: INLINEASM &foo, 0 /* attdialect */, 2818058 /* regdef:GR32_TC */, def $rsi, 2818058 /* regdef:GR32_TC */, def dead $rdi, 2147549193 /* reguse tiedto:$1 */, killed $rdi(tied-def 5), 2147483657 /* reguse tiedto:$0 */, killed $rsi(tied-def 3), 12 /* clobber */, implicit-def dead early-clobber $eflags
     INLINEASM &foo, 0, 2818058, def $rsi, 2818058, def dead $rdi, 2147549193, killed $rdi(tied-def 5), 2147483657, killed $rsi(tied-def 3), 12, implicit-def dead early-clobber $eflags
     $rax = MOV64rr killed $rsi
     RETQ killed $rax
diff --git a/llvm/test/CodeGen/Mips/longbranch/branch-limits-fp-micromips.mir b/llvm/test/CodeGen/Mips/longbranch/branch-limits-fp-micromips.mir
index 2411f65b1758..7224d0ddc5ff 100644
--- a/llvm/test/CodeGen/Mips/longbranch/branch-limits-fp-micromips.mir
+++ b/llvm/test/CodeGen/Mips/longbranch/branch-limits-fp-micromips.mir
@@ -81,7 +81,7 @@ body:             |
   ; MM:     NOP
   ; MM:   }
   ; MM: bb.2.if.then:
-  ; MM:   INLINEASM &".space 310680", 1, 12, implicit-def dead early-clobber $at
+  ; MM:   INLINEASM &".space 310680", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $at
   ; MM:   $v0 = LI16_MM 0
   ; MM:   JRC16_MM undef $ra, implicit killed $v0
   ; MM: bb.3.return:
@@ -110,7 +110,7 @@ body:             |
   ; PIC:     $sp = ADDiu $sp, 8
   ; PIC:   }
   ; PIC: bb.3.if.then:
-  ; PIC:   INLINEASM &".space 310680", 1, 12, implicit-def dead early-clobber $at
+  ; PIC:   INLINEASM &".space 310680", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $at
   ; PIC:   $v0 = LI16_MM 0
   ; PIC:   JRC16_MM undef $ra, implicit killed $v0
   ; PIC: bb.4.return:
@@ -179,7 +179,7 @@ body:             |
   ; MM:   $v0 = LI16_MM 1
   ; MM:   JRC16_MM undef $ra, implicit killed $v0
   ; MM: bb.2.if.then:
-  ; MM:   INLINEASM &".space 310680", 1, 12, implicit-def dead early-clobber $at
+  ; MM:   INLINEASM &".space 310680", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $at
   ; MM:   $v0 = LI16_MM 0
   ; MM:   JRC16_MM undef $ra, implicit killed $v0
   ; PIC-LABEL: name: b
@@ -193,7 +193,7 @@ body:             |
   ; PIC:   $v0 = LI16_MM 1
   ; PIC:   JRC16_MM undef $ra, implicit killed $v0
   ; PIC: bb.2.if.then:
-  ; PIC:   INLINEASM &".space 310680", 1, 12, implicit-def dead early-clobber $at
+  ; PIC:   INLINEASM &".space 310680", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $at
   ; PIC:   $v0 = LI16_MM 0
   ; PIC:   JRC16_MM undef $ra, implicit killed $v0
   bb.0.entry:
diff --git a/llvm/test/CodeGen/Mips/longbranch/branch-limits-fp-micromipsr6.mir b/llvm/test/CodeGen/Mips/longbranch/branch-limits-fp-micromipsr6.mir
index 9d6713480502..aa6dfed58d5a 100644
--- a/llvm/test/CodeGen/Mips/longbranch/branch-limits-fp-micromipsr6.mir
+++ b/llvm/test/CodeGen/Mips/longbranch/branch-limits-fp-micromipsr6.mir
@@ -77,7 +77,7 @@ body:             |
   ; MM:   successors: %bb.3(0x80000000)
   ; MM:   BC_MMR6 %bb.3
   ; MM: bb.2.if.then:
-  ; MM:   INLINEASM &".space 810680", 1, 12, implicit-def dead early-clobber $at
+  ; MM:   INLINEASM &".space 810680", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $at
   ; MM:   $v0 = LI16_MM 0
   ; MM:   JRC16_MM undef $ra, implicit $v0
   ; MM: bb.3.return:
@@ -102,7 +102,7 @@ body:             |
   ; PIC:   $sp = ADDiu $sp, 8
   ; PIC:   JIC_MMR6 $at, 0, implicit-def $at
   ; PIC: bb.3.if.then:
-  ; PIC:   INLINEASM &".space 810680", 1, 12, implicit-def dead early-clobber $at
+  ; PIC:   INLINEASM &".space 810680", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $at
   ; PIC:   $v0 = LI16_MM 0
   ; PIC:   JRC16_MM undef $ra, implicit $v0
   ; PIC: bb.4.return:
@@ -169,7 +169,7 @@ body:             |
   ; MM:   successors: %bb.3(0x80000000)
   ; MM:   BC_MMR6 %bb.3
   ; MM: bb.2.if.then:
-  ; MM:   INLINEASM &".space 810680", 1, 12, implicit-def dead early-clobber $at
+  ; MM:   INLINEASM &".space 810680", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $at
   ; MM:   $v0 = LI16_MM 0
   ; MM:   JRC16_MM undef $ra, implicit $v0
   ; MM: bb.3.return:
@@ -194,7 +194,7 @@ body:             |
   ; PIC:   $sp = ADDiu $sp, 8
   ; PIC:   JIC_MMR6 $at, 0, implicit-def $at
   ; PIC: bb.3.if.then:
-  ; PIC:   INLINEASM &".space 810680", 1, 12, implicit-def dead early-clobber $at
+  ; PIC:   INLINEASM &".space 810680", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $at
   ; PIC:   $v0 = LI16_MM 0
   ; PIC:   JRC16_MM undef $ra, implicit $v0
   ; PIC: bb.4.return:
diff --git a/llvm/test/CodeGen/Mips/longbranch/branch-limits-fp-mips.mir b/llvm/test/CodeGen/Mips/longbranch/branch-limits-fp-mips.mir
index 802acab0619c..78d8c073ea72 100644
--- a/llvm/test/CodeGen/Mips/longbranch/branch-limits-fp-mips.mir
+++ b/llvm/test/CodeGen/Mips/longbranch/branch-limits-fp-mips.mir
@@ -80,7 +80,7 @@ body:             |
   ; MIPS:     NOP
   ; MIPS:   }
   ; MIPS: bb.2.if.then:
-  ; MIPS:   INLINEASM &".space 310680", 1, 12, implicit-def dead early-clobber $at
+  ; MIPS:   INLINEASM &".space 310680", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $at
   ; MIPS:   PseudoReturn undef $ra, implicit killed $v0 {
   ; MIPS:     $v0 = ADDiu $zero, 0
   ; MIPS:   }
@@ -111,7 +111,7 @@ body:             |
   ; PIC:     $sp = ADDiu $sp, 8
   ; PIC:   }
   ; PIC: bb.3.if.then:
-  ; PIC:   INLINEASM &".space 310680", 1, 12, implicit-def dead early-clobber $at
+  ; PIC:   INLINEASM &".space 310680", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $at
   ; PIC:   PseudoReturn undef $ra, implicit killed $v0 {
   ; PIC:     $v0 = ADDiu $zero, 0
   ; PIC:   }
@@ -184,7 +184,7 @@ body:             |
   ; MIPS:     NOP
   ; MIPS:   }
   ; MIPS: bb.2.if.then:
-  ; MIPS:   INLINEASM &".space 310680", 1, 12, implicit-def dead early-clobber $at
+  ; MIPS:   INLINEASM &".space 310680", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $at
   ; MIPS:   PseudoReturn undef $ra, implicit killed $v0 {
   ; MIPS:     $v0 = ADDiu $zero, 0
   ; MIPS:   }
@@ -215,7 +215,7 @@ body:             |
   ; PIC:     $sp = ADDiu $sp, 8
   ; PIC:   }
   ; PIC: bb.3.if.then:
-  ; PIC:   INLINEASM &".space 310680", 1, 12, implicit-def dead early-clobber $at
+  ; PIC:   INLINEASM &".space 310680", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $at
   ; PIC:   PseudoReturn undef $ra, implicit killed $v0 {
   ; PIC:     $v0 = ADDiu $zero, 0
   ; PIC:   }
diff --git a/llvm/test/CodeGen/Mips/longbranch/branch-limits-fp-mipsr6.mir b/llvm/test/CodeGen/Mips/longbranch/branch-limits-fp-mipsr6.mir
index 5356ea43485e..e472da1a93de 100644
--- a/llvm/test/CodeGen/Mips/longbranch/branch-limits-fp-mipsr6.mir
+++ b/llvm/test/CodeGen/Mips/longbranch/branch-limits-fp-mipsr6.mir
@@ -80,7 +80,7 @@ body:             |
   ; R6:   successors: %bb.3(0x80000000)
   ; R6:   BC %bb.3
   ; R6: bb.2.if.then:
-  ; R6:   INLINEASM &".space 310680", 1, 12, implicit-def dead early-clobber $at
+  ; R6:   INLINEASM &".space 310680", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $at
   ; R6:   PseudoReturn undef $ra, implicit killed $v0 {
   ; R6:     $v0 = ADDiu $zero, 0
   ; R6:   }
@@ -109,7 +109,7 @@ body:             |
   ; PIC:   $sp = ADDiu $sp, 8
   ; PIC:   JIC $at, 0, implicit-def $at
   ; PIC: bb.3.if.then:
-  ; PIC:   INLINEASM &".space 310680", 1, 12, implicit-def dead early-clobber $at
+  ; PIC:   INLINEASM &".space 310680", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $at
   ; PIC:   PseudoReturn undef $ra, implicit killed $v0 {
   ; PIC:     $v0 = ADDiu $zero, 0
   ; PIC:   }
@@ -180,7 +180,7 @@ body:             |
   ; R6:   successors: %bb.3(0x80000000)
   ; R6:   BC %bb.3
   ; R6: bb.2.if.then:
-  ; R6:   INLINEASM &".space 310680", 1, 12, implicit-def dead early-clobber $at
+  ; R6:   INLINEASM &".space 310680", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $at
   ; R6:   PseudoReturn undef $ra, implicit killed $v0 {
   ; R6:     $v0 = ADDiu $zero, 0
   ; R6:   }
@@ -209,7 +209,7 @@ body:             |
   ; PIC:   $sp = ADDiu $sp, 8
   ; PIC:   JIC $at, 0, implicit-def $at
   ; PIC: bb.3.if.then:
-  ; PIC:   INLINEASM &".space 310680", 1, 12, implicit-def dead early-clobber $at
+  ; PIC:   INLINEASM &".space 310680", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $at
   ; PIC:   PseudoReturn undef $ra, implicit killed $v0 {
   ; PIC:     $v0 = ADDiu $zero, 0
   ; PIC:   }
diff --git a/llvm/test/CodeGen/Mips/longbranch/branch-limits-msa.mir b/llvm/test/CodeGen/Mips/longbranch/branch-limits-msa.mir
index a2ceff05b857..9b497fce0a98 100644
--- a/llvm/test/CodeGen/Mips/longbranch/branch-limits-msa.mir
+++ b/llvm/test/CodeGen/Mips/longbranch/branch-limits-msa.mir
@@ -271,7 +271,7 @@ body:             |
   ; MSA:     NOP
   ; MSA:   }
   ; MSA: bb.2.if.then:
-  ; MSA:   INLINEASM &".space 810680", 1, 12, implicit-def dead early-clobber $at
+  ; MSA:   INLINEASM &".space 810680", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $at
   ; MSA:   PseudoReturn64 undef $ra_64, implicit killed $v0 {
   ; MSA:     renamable $v0 = ADDiu $zero, 1
   ; MSA:   }
@@ -307,7 +307,7 @@ body:             |
   ; PIC:     $sp_64 = DADDiu $sp_64, 16
   ; PIC:   }
   ; PIC: bb.3.if.then:
-  ; PIC:   INLINEASM &".space 810680", 1, 12, implicit-def dead early-clobber $at
+  ; PIC:   INLINEASM &".space 810680", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $at
   ; PIC:   PseudoReturn64 undef $ra_64, implicit killed $v0 {
   ; PIC:     renamable $v0 = ADDiu $zero, 1
   ; PIC:   }
@@ -387,7 +387,7 @@ body:             |
   ; MSA:     NOP
   ; MSA:   }
   ; MSA: bb.2.if.then:
-  ; MSA:   INLINEASM &".space 810680", 1, 12, implicit-def dead early-clobber $at
+  ; MSA:   INLINEASM &".space 810680", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $at
   ; MSA:   PseudoReturn64 undef $ra_64, implicit killed $v0 {
   ; MSA:     renamable $v0 = ADDiu $zero, 1
   ; MSA:   }
@@ -422,7 +422,7 @@ body:             |
   ; PIC:     $sp_64 = DADDiu $sp_64, 16
   ; PIC:   }
   ; PIC: bb.3.if.then:
-  ; PIC:   INLINEASM &".space 810680", 1, 12, implicit-def dead early-clobber $at
+  ; PIC:   INLINEASM &".space 810680", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $at
   ; PIC:   PseudoReturn64 undef $ra_64, implicit killed $v0 {
   ; PIC:     renamable $v0 = ADDiu $zero, 1
   ; PIC:   }
@@ -501,7 +501,7 @@ body:             |
   ; MSA:     NOP
   ; MSA:   }
   ; MSA: bb.2.if.then:
-  ; MSA:   INLINEASM &".space 810680", 1, 12, implicit-def dead early-clobber $at
+  ; MSA:   INLINEASM &".space 810680", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $at
   ; MSA:   PseudoReturn64 undef $ra_64, implicit killed $v0 {
   ; MSA:     renamable $v0 = ADDiu $zero, 1
   ; MSA:   }
@@ -536,7 +536,7 @@ body:             |
   ; PIC:     $sp_64 = DADDiu $sp_64, 16
   ; PIC:   }
   ; PIC: bb.3.if.then:
-  ; PIC:   INLINEASM &".space 810680", 1, 12, implicit-def dead early-clobber $at
+  ; PIC:   INLINEASM &".space 810680", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $at
   ; PIC:   PseudoReturn64 undef $ra_64, implicit killed $v0 {
   ; PIC:     renamable $v0 = ADDiu $zero, 1
   ; PIC:   }
@@ -614,7 +614,7 @@ body:             |
   ; MSA:     NOP
   ; MSA:   }
   ; MSA: bb.2.if.then:
-  ; MSA:   INLINEASM &".space 810680", 1, 12, implicit-def dead early-clobber $at
+  ; MSA:   INLINEASM &".space 810680", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $at
   ; MSA:   PseudoReturn64 undef $ra_64, implicit killed $v0 {
   ; MSA:     renamable $v0 = ADDiu $zero, 1
   ; MSA:   }
@@ -648,7 +648,7 @@ body:             |
   ; PIC:     $sp_64 = DADDiu $sp_64, 16
   ; PIC:   }
   ; PIC: bb.3.if.then:
-  ; PIC:   INLINEASM &".space 810680", 1, 12, implicit-def dead early-clobber $at
+  ; PIC:   INLINEASM &".space 810680", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $at
   ; PIC:   PseudoReturn64 undef $ra_64, implicit killed $v0 {
   ; PIC:     renamable $v0 = ADDiu $zero, 1
   ; PIC:   }
@@ -725,7 +725,7 @@ body:             |
   ; MSA:     NOP
   ; MSA:   }
   ; MSA: bb.2.if.then:
-  ; MSA:   INLINEASM &".space 810680", 1, 12, implicit-def dead early-clobber $at
+  ; MSA:   INLINEASM &".space 810680", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $at
   ; MSA:   PseudoReturn64 undef $ra_64, implicit killed $v0 {
   ; MSA:     renamable $v0 = ADDiu $zero, 1
   ; MSA:   }
@@ -759,7 +759,7 @@ body:             |
   ; PIC:     $sp_64 = DADDiu $sp_64, 16
   ; PIC:   }
   ; PIC: bb.3.if.then:
-  ; PIC:   INLINEASM &".space 810680", 1, 12, implicit-def dead early-clobber $at
+  ; PIC:   INLINEASM &".space 810680", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $at
   ; PIC:   PseudoReturn64 undef $ra_64, implicit killed $v0 {
   ; PIC:     renamable $v0 = ADDiu $zero, 1
   ; PIC:   }
@@ -838,7 +838,7 @@ body:             |
   ; MSA:     NOP
   ; MSA:   }
   ; MSA: bb.2.if.then:
-  ; MSA:   INLINEASM &".space 810680", 1, 12, implicit-def dead early-clobber $at
+  ; MSA:   INLINEASM &".space 810680", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $at
   ; MSA:   PseudoReturn64 undef $ra_64, implicit killed $v0 {
   ; MSA:     renamable $v0 = ADDiu $zero, 1
   ; MSA:   }
@@ -874,7 +874,7 @@ body:             |
   ; PIC:     $sp_64 = DADDiu $sp_64, 16
   ; PIC:   }
   ; PIC: bb.3.if.then:
-  ; PIC:   INLINEASM &".space 810680", 1, 12, implicit-def dead early-clobber $at
+  ; PIC:   INLINEASM &".space 810680", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $at
   ; PIC:   PseudoReturn64 undef $ra_64, implicit killed $v0 {
   ; PIC:     renamable $v0 = ADDiu $zero, 1
   ; PIC:   }
@@ -954,7 +954,7 @@ body:             |
   ; MSA:     NOP
   ; MSA:   }
   ; MSA: bb.2.if.then:
-  ; MSA:   INLINEASM &".space 810680", 1, 12, implicit-def dead early-clobber $at
+  ; MSA:   INLINEASM &".space 810680", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $at
   ; MSA:   PseudoReturn64 undef $ra_64, implicit killed $v0 {
   ; MSA:     renamable $v0 = ADDiu $zero, 1
   ; MSA:   }
@@ -989,7 +989,7 @@ body:             |
   ; PIC:     $sp_64 = DADDiu $sp_64, 16
   ; PIC:   }
   ; PIC: bb.3.if.then:
-  ; PIC:   INLINEASM &".space 810680", 1, 12, implicit-def dead early-clobber $at
+  ; PIC:   INLINEASM &".space 810680", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $at
   ; PIC:   PseudoReturn64 undef $ra_64, implicit killed $v0 {
   ; PIC:     renamable $v0 = ADDiu $zero, 1
   ; PIC:   }
@@ -1068,7 +1068,7 @@ body:             |
   ; MSA:     NOP
   ; MSA:   }
   ; MSA: bb.2.if.then:
-  ; MSA:   INLINEASM &".space 810680", 1, 12, implicit-def dead early-clobber $at
+  ; MSA:   INLINEASM &".space 810680", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $at
   ; MSA:   PseudoReturn64 undef $ra_64, implicit killed $v0 {
   ; MSA:     renamable $v0 = ADDiu $zero, 1
   ; MSA:   }
@@ -1103,7 +1103,7 @@ body:             |
   ; PIC:     $sp_64 = DADDiu $sp_64, 16
   ; PIC:   }
   ; PIC: bb.3.if.then:
-  ; PIC:   INLINEASM &".space 810680", 1, 12, implicit-def dead early-clobber $at
+  ; PIC:   INLINEASM &".space 810680", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $at
   ; PIC:   PseudoReturn64 undef $ra_64, implicit killed $v0 {
   ; PIC:     renamable $v0 = ADDiu $zero, 1
   ; PIC:   }
@@ -1181,7 +1181,7 @@ body:             |
   ; MSA:     NOP
   ; MSA:   }
   ; MSA: bb.2.if.then:
-  ; MSA:   INLINEASM &".space 810680", 1, 12, implicit-def dead early-clobber $at
+  ; MSA:   INLINEASM &".space 810680", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $at
   ; MSA:   PseudoReturn64 undef $ra_64, implicit killed $v0 {
   ; MSA:     renamable $v0 = ADDiu $zero, 1
   ; MSA:   }
@@ -1215,7 +1215,7 @@ body:             |
   ; PIC:     $sp_64 = DADDiu $sp_64, 16
   ; PIC:   }
   ; PIC: bb.3.if.then:
-  ; PIC:   INLINEASM &".space 810680", 1, 12, implicit-def dead early-clobber $at
+  ; PIC:   INLINEASM &".space 810680", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $at
   ; PIC:   PseudoReturn64 undef $ra_64, implicit killed $v0 {
   ; PIC:     renamable $v0 = ADDiu $zero, 1
   ; PIC:   }
@@ -1292,7 +1292,7 @@ body:             |
   ; MSA:     NOP
   ; MSA:   }
   ; MSA: bb.2.if.then:
-  ; MSA:   INLINEASM &".space 810680", 1, 12, implicit-def dead early-clobber $at
+  ; MSA:   INLINEASM &".space 810680", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $at
   ; MSA:   PseudoReturn64 undef $ra_64, implicit killed $v0 {
   ; MSA:     renamable $v0 = ADDiu $zero, 1
   ; MSA:   }
@@ -1326,7 +1326,7 @@ body:             |
   ; PIC:     $sp_64 = DADDiu $sp_64, 16
   ; PIC:   }
   ; PIC: bb.3.if.then:
-  ; PIC:   INLINEASM &".space 810680", 1, 12, implicit-def dead early-clobber $at
+  ; PIC:   INLINEASM &".space 810680", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def dead early-clobber $at
   ; PIC:   PseudoReturn64 undef $ra_64, implicit killed $v0 {
   ; PIC:     renamable $v0 = ADDiu $zero, 1
   ; PIC:   }
diff --git a/llvm/test/CodeGen/Thumb2/high-reg-spill.mir b/llvm/test/CodeGen/Thumb2/high-reg-spill.mir
index dc04b82cc0ca..ace7a38ec10b 100644
--- a/llvm/test/CodeGen/Thumb2/high-reg-spill.mir
+++ b/llvm/test/CodeGen/Thumb2/high-reg-spill.mir
@@ -41,7 +41,7 @@ body:             |
     ; CHECK: renamable $r12 = COPY killed renamable $r0
     ; CHECK: t2STRi12 killed $r12, %stack.1, 0, 14 /* CC::al */, $noreg :: (store 4 into %stack.1)
     ; CHECK: $r8 = t2LDRi12 %stack.1, 0, 14 /* CC::al */, $noreg :: (load 4 from %stack.1)
-    ; CHECK: INLINEASM &"@ $0", 1, 589833, renamable $r8, 12, implicit-def early-clobber $r12
+    ; CHECK: INLINEASM &"@ $0", 1 /* sideeffect attdialect */, 589833 /* reguse:GPRnopc */, renamable $r8, 12 /* clobber */, implicit-def early-clobber $r12
     ; CHECK: tBX_RET 14 /* CC::al */, $noreg
     %1:tgpr = tLDRspi %stack.0.i, 0, 14, $noreg :: (dereferenceable load 4 from %ir.i)
     %0:hgpr = COPY %1
diff --git a/llvm/test/CodeGen/X86/inline-asm-avx512f-x-constraint.ll b/llvm/test/CodeGen/X86/inline-asm-avx512f-x-constraint.ll
index 47bbff877c56..cc647d2ba771 100644
--- a/llvm/test/CodeGen/X86/inline-asm-avx512f-x-constraint.ll
+++ b/llvm/test/CodeGen/X86/inline-asm-avx512f-x-constraint.ll
@@ -2,7 +2,7 @@
 
 ; CHECK: %[[REG1:.*]]:vr512_0_15 = COPY %1
 ; CHECK: %[[REG2:.*]]:vr512_0_15 = COPY %2
-; CHECK: INLINEASM &"vpaddq\09$3, $2, $0 {$1}", 0, {{.*}}, def %{{.*}}, {{.*}}, %{{.*}}, {{.*}}, %[[REG1]], {{.*}}, %[[REG2]], 12, implicit-def early-clobber $df, 12, implicit-def early-clobber $fpsw, 12, implicit-def early-clobber $eflags
+; CHECK: INLINEASM &"vpaddq\09$3, $2, $0 {$1}", 0 /* attdialect */, {{.*}}, def %{{.*}}, {{.*}}, %{{.*}}, {{.*}}, %[[REG1]], {{.*}}, %[[REG2]], 12 /* clobber */, implicit-def early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def early-clobber $eflags
 
 define <8 x i64> @mask_Yk_i8(i8 signext %msk, <8 x i64> %x, <8 x i64> %y) {
 entry:
diff --git a/llvm/test/CodeGen/X86/inline-asm-default-clobbers.ll b/llvm/test/CodeGen/X86/inline-asm-default-clobbers.ll
index fae8c73cf083..4a8906af95c4 100644
--- a/llvm/test/CodeGen/X86/inline-asm-default-clobbers.ll
+++ b/llvm/test/CodeGen/X86/inline-asm-default-clobbers.ll
@@ -1,6 +1,6 @@
 ; RUN: llc < %s -mtriple=i686 -stop-after=finalize-isel | FileCheck %s
 
-; CHECK: INLINEASM &"", 1, 12, implicit-def early-clobber $df, 12, implicit-def early-clobber $fpsw, 12, implicit-def early-clobber $eflags
+; CHECK: INLINEASM &"", 1 /* sideeffect attdialect */, 12 /* clobber */, implicit-def early-clobber $df, 12 /* clobber */, implicit-def early-clobber $fpsw, 12 /* clobber */, implicit-def early-clobber $eflags
 define void @foo() {
 entry:
   call void asm sideeffect "", "~{dirflag},~{fpsr},~{flags}"()
diff --git a/llvm/test/CodeGen/X86/stack-folding-adx.mir b/llvm/test/CodeGen/X86/stack-folding-adx.mir
index 99e24cb12d1b..902d4b84e0c0 100644
--- a/llvm/test/CodeGen/X86/stack-folding-adx.mir
+++ b/llvm/test/CodeGen/X86/stack-folding-adx.mir
@@ -88,7 +88,7 @@ body:             |
     ; CHECK: MOV32mr %stack.1, 1, $noreg, 0, $noreg, $edx :: (store 4 into %stack.1)
     ; CHECK: MOV32mr %stack.2, 1, $noreg, 0, $noreg, $esi :: (store 4 into %stack.2)
     ; CHECK: MOV32mr %stack.3, 1, $noreg, 0, $noreg, $edi :: (store 4 into %stack.3)
-    ; CHECK: INLINEASM &nop, 1, 3145738, def dead %4, 12, implicit-def dead early-clobber $rax, 12, implicit-def dead early-clobber $rbx, 12, implicit-def dead early-clobber $rcx, 12, implicit-def dead early-clobber $rdx, 12, implicit-def dead early-clobber $rsi, 12, implicit-def dead early-clobber $rdi, 12, implicit-def dead early-clobber $rbp, 12, implicit-def dead early-clobber $r8, 12, implicit-def dead early-clobber $r9, 12, implicit-def dead early-clobber $r10, 12, implicit-def dead early-clobber $r11, 12, implicit-def dead early-clobber $r12, 12, implicit-def dead early-clobber $r13, 12, implicit-def dead early-clobber $r14, 12, implicit-def dead early-clobber $r15
+    ; CHECK: INLINEASM &nop, 1 /* sideeffect attdialect */, 3145738 /* regdef:GR32_CB */, def dead %4, 12 /* clobber */, implicit-def dead early-clobber $rax, 12 /* clobber */, implicit-def dead early-clobber $rbx, 12 /* clobber */, implicit-def dead early-clobber $rcx, 12 /* clobber */, implicit-def dead early-clobber $rdx, 12 /* clobber */, implicit-def dead early-clobber $rsi, 12 /* clobber */, implicit-def dead early-clobber $rdi, 12 /* clobber */, implicit-def dead early-clobber $rbp, 12 /* clobber */, implicit-def dead early-clobber $r8, 12 /* clobber */, implicit-def dead early-clobber $r9, 12 /* clobber */, implicit-def dead early-clobber $r10, 12 /* clobber */, implicit-def dead early-clobber $r11, 12 /* clobber */, implicit-def dead early-clobber $r12, 12 /* clobber */, implicit-def dead early-clobber $r13, 12 /* clobber */, implicit-def dead early-clobber $r14, 12 /* clobber */, implicit-def dead early-clobber $r15
     ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %stack.3, 1, $noreg, 0, $noreg :: (load 4 from %stack.3)
     ; CHECK: dead [[MOV32rm]].sub_8bit:gr32 = ADD8ri [[MOV32rm]].sub_8bit, -1, implicit-def $eflags
     ; CHECK: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm %stack.2, 1, $noreg, 0, $noreg :: (load 4 from %stack.2)
@@ -140,7 +140,7 @@ body:             |
     ; CHECK: MOV64mr %stack.1, 1, $noreg, 0, $noreg, $rdx :: (store 8 into %stack.1)
     ; CHECK: MOV64mr %stack.2, 1, $noreg, 0, $noreg, $rsi :: (store 8 into %stack.2)
     ; CHECK: MOV32mr %stack.3, 1, $noreg, 0, $noreg, $edi :: (store 4 into %stack.3)
-    ; CHECK: INLINEASM &nop, 1, 3145738, def dead %4, 12, implicit-def dead early-clobber $rax, 12, implicit-def dead early-clobber $rbx, 12, implicit-def dead early-clobber $rcx, 12, implicit-def dead early-clobber $rdx, 12, implicit-def dead early-clobber $rsi, 12, implicit-def dead early-clobber $rdi, 12, implicit-def dead early-clobber $rbp, 12, implicit-def dead early-clobber $r8, 12, implicit-def dead early-clobber $r9, 12, implicit-def dead early-clobber $r10, 12, implicit-def dead early-clobber $r11, 12, implicit-def dead early-clobber $r12, 12, implicit-def dead early-clobber $r13, 12, implicit-def dead early-clobber $r14, 12, implicit-def dead early-clobber $r15
+    ; CHECK: INLINEASM &nop, 1 /* sideeffect attdialect */, 3145738 /* regdef:GR32_CB */, def dead %4, 12 /* clobber */, implicit-def dead early-clobber $rax, 12 /* clobber */, implicit-def dead early-clobber $rbx, 12 /* clobber */, implicit-def dead early-clobber $rcx, 12 /* clobber */, implicit-def dead early-clobber $rdx, 12 /* clobber */, implicit-def dead early-clobber $rsi, 12 /* clobber */, implicit-def dead early-clobber $rdi, 12 /* clobber */, implicit-def dead early-clobber $rbp, 12 /* clobber */, implicit-def dead early-clobber $r8, 12 /* clobber */, implicit-def dead early-clobber $r9, 12 /* clobber */, implicit-def dead early-clobber $r10, 12 /* clobber */, implicit-def dead early-clobber $r11, 12 /* clobber */, implicit-def dead early-clobber $r12, 12 /* clobber */, implicit-def dead early-clobber $r13, 12 /* clobber */, implicit-def dead early-clobber $r14, 12 /* clobber */, implicit-def dead early-clobber $r15
     ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %stack.3, 1, $noreg, 0, $noreg :: (load 4 from %stack.3)
     ; CHECK: dead [[MOV32rm]].sub_8bit:gr32 = ADD8ri [[MOV32rm]].sub_8bit, -1, implicit-def $eflags
     ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm %stack.2, 1, $noreg, 0, $noreg :: (load 8 from %stack.2)
@@ -192,7 +192,7 @@ body:             |
     ; CHECK: MOV32mr %stack.1, 1, $noreg, 0, $noreg, $edx :: (store 4 into %stack.1)
     ; CHECK: MOV32mr %stack.2, 1, $noreg, 0, $noreg, $esi :: (store 4 into %stack.2)
     ; CHECK: MOV32mr %stack.3, 1, $noreg, 0, $noreg, $edi :: (store 4 into %stack.3)
-    ; CHECK: INLINEASM &nop, 1, 3145738, def dead %4, 12, implicit-def dead early-clobber $rax, 12, implicit-def dead early-clobber $rbx, 12, implicit-def dead early-clobber $rcx, 12, implicit-def dead early-clobber $rdx, 12, implicit-def dead early-clobber $rsi, 12, implicit-def dead early-clobber $rdi, 12, implicit-def dead early-clobber $rbp, 12, implicit-def dead early-clobber $r8, 12, implicit-def dead early-clobber $r9, 12, implicit-def dead early-clobber $r10, 12, implicit-def dead early-clobber $r11, 12, implicit-def dead early-clobber $r12, 12, implicit-def dead early-clobber $r13, 12, implicit-def dead early-clobber $r14, 12, implicit-def dead early-clobber $r15
+    ; CHECK: INLINEASM &nop, 1 /* sideeffect attdialect */, 3145738 /* regdef:GR32_CB */, def dead %4, 12 /* clobber */, implicit-def dead early-clobber $rax, 12 /* clobber */, implicit-def dead early-clobber $rbx, 12 /* clobber */, implicit-def dead early-clobber $rcx, 12 /* clobber */, implicit-def dead early-clobber $rdx, 12 /* clobber */, implicit-def dead early-clobber $rsi, 12 /* clobber */, implicit-def dead early-clobber $rdi, 12 /* clobber */, implicit-def dead early-clobber $rbp, 12 /* clobber */, implicit-def dead early-clobber $r8, 12 /* clobber */, implicit-def dead early-clobber $r9, 12 /* clobber */, implicit-def dead early-clobber $r10, 12 /* clobber */, implicit-def dead early-clobber $r11, 12 /* clobber */, implicit-def dead early-clobber $r12, 12 /* clobber */, implicit-def dead early-clobber $r13, 12 /* clobber */, implicit-def dead early-clobber $r14, 12 /* clobber */, implicit-def dead early-clobber $r15
     ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %stack.3, 1, $noreg, 0, $noreg :: (load 4 from %stack.3)
     ; CHECK: dead [[MOV32rm]].sub_8bit:gr32 = ADD8ri [[MOV32rm]].sub_8bit, 127, implicit-def $eflags
     ; CHECK: [[MOV32rm1:%[0-9]+]]:gr32 = MOV32rm %stack.2, 1, $noreg, 0, $noreg :: (load 4 from %stack.2)
@@ -244,7 +244,7 @@ body:             |
     ; CHECK: MOV64mr %stack.1, 1, $noreg, 0, $noreg, $rdx :: (store 8 into %stack.1)
     ; CHECK: MOV64mr %stack.2, 1, $noreg, 0, $noreg, $rsi :: (store 8 into %stack.2)
     ; CHECK: MOV32mr %stack.3, 1, $noreg, 0, $noreg, $edi :: (store 4 into %stack.3)
-    ; CHECK: INLINEASM &nop, 1, 3145738, def dead %4, 12, implicit-def dead early-clobber $rax, 12, implicit-def dead early-clobber $rbx, 12, implicit-def dead early-clobber $rcx, 12, implicit-def dead early-clobber $rdx, 12, implicit-def dead early-clobber $rsi, 12, implicit-def dead early-clobber $rdi, 12, implicit-def dead early-clobber $rbp, 12, implicit-def dead early-clobber $r8, 12, implicit-def dead early-clobber $r9, 12, implicit-def dead early-clobber $r10, 12, implicit-def dead early-clobber $r11, 12, implicit-def dead early-clobber $r12, 12, implicit-def dead early-clobber $r13, 12, implicit-def dead early-clobber $r14, 12, implicit-def dead early-clobber $r15
+    ; CHECK: INLINEASM &nop, 1 /* sideeffect attdialect */, 3145738 /* regdef:GR32_CB */, def dead %4, 12 /* clobber */, implicit-def dead early-clobber $rax, 12 /* clobber */, implicit-def dead early-clobber $rbx, 12 /* clobber */, implicit-def dead early-clobber $rcx, 12 /* clobber */, implicit-def dead early-clobber $rdx, 12 /* clobber */, implicit-def dead early-clobber $rsi, 12 /* clobber */, implicit-def dead early-clobber $rdi, 12 /* clobber */, implicit-def dead early-clobber $rbp, 12 /* clobber */, implicit-def dead early-clobber $r8, 12 /* clobber */, implicit-def dead early-clobber $r9, 12 /* clobber */, implicit-def dead early-clobber $r10, 12 /* clobber */, implicit-def dead early-clobber $r11, 12 /* clobber */, implicit-def dead early-clobber $r12, 12 /* clobber */, implicit-def dead early-clobber $r13, 12 /* clobber */, implicit-def dead early-clobber $r14, 12 /* clobber */, implicit-def dead early-clobber $r15
     ; CHECK: [[MOV32rm:%[0-9]+]]:gr32 = MOV32rm %stack.3, 1, $noreg, 0, $noreg :: (load 4 from %stack.3)
     ; CHECK: dead [[MOV32rm]].sub_8bit:gr32 = ADD8ri [[MOV32rm]].sub_8bit, 127, implicit-def $eflags
     ; CHECK: [[MOV64rm:%[0-9]+]]:gr64 = MOV64rm %stack.2, 1, $noreg, 0, $noreg :: (load 8 from %stack.2)
diff --git a/llvm/test/CodeGen/X86/stack-folding-bmi2.mir b/llvm/test/CodeGen/X86/stack-folding-bmi2.mir
index 604f7bdacdf1..8f8d074eb1b7 100644
--- a/llvm/test/CodeGen/X86/stack-folding-bmi2.mir
+++ b/llvm/test/CodeGen/X86/stack-folding-bmi2.mir
@@ -52,7 +52,7 @@ body:             |
     ; CHECK: liveins: $edi, $esi
     ; CHECK: MOV32mr %stack.0, 1, $noreg, 0, $noreg, $esi :: (store 4 into %stack.0)
     ; CHECK: MOV32mr %stack.1, 1, $noreg, 0, $noreg, $edi :: (store 4 into %stack.1)
-    ; CHECK: INLINEASM &nop, 1, 4063242, def dead %2, 12, implicit-def dead early-clobber $rax, 12, implicit-def dead early-clobber $rbx, 12, implicit-def dead early-clobber $rcx, 12, implicit-def dead early-clobber $rdx, 12, implicit-def dead early-clobber $rsi, 12, implicit-def dead early-clobber $rdi, 12, implicit-def dead early-clobber $rbp, 12, implicit-def dead early-clobber $r8, 12, implicit-def dead early-clobber $r9, 12, implicit-def dead early-clobber $r10, 12, implicit-def dead early-clobber $r11, 12, implicit-def dead early-clobber $r12, 12, implicit-def dead early-clobber $r13, 12, implicit-def dead early-clobber $r14, 12, implicit-def dead early-clobber $r15
+    ; CHECK: INLINEASM &nop, 1 /* sideeffect attdialect */, 4063242 /* regdef:LOW32_ADDR_ACCESS_RBP_with_sub_8bit_with_sub_32bit */, def dead %2, 12 /* clobber */, implicit-def dead early-clobber $rax, 12 /* clobber */, implicit-def dead early-clobber $rbx, 12 /* clobber */, implicit-def dead early-clobber $rcx, 12 /* clobber */, implicit-def dead early-clobber $rdx, 12 /* clobber */, implicit-def dead early-clobber $rsi, 12 /* clobber */, implicit-def dead early-clobber $rdi, 12 /* clobber */, implicit-def dead early-clobber $rbp, 12 /* clobber */, implicit-def dead early-clobber $r8, 12 /* clobber */, implicit-def dead early-clobber $r9, 12 /* clobber */, implicit-def dead early-clobber $r10, 12 /* clobber */, implicit-def dead early-clobber $r11, 12 /* clobber */, implicit-def dead early-clobber $r12, 12 /* clobber */, implicit-def dead early-clobber $r13, 12 /* clobber */, implicit-def dead early-clobber $r14, 12 /* clobber */, implicit-def dead early-clobber $r15
     ; CHECK: $edx = MOV32rm %stack.1, 1, $noreg, 0, $noreg :: (load 4 from %stack.1)
     ; CHECK: %3:gr32, dead %4:gr32 = MULX32rm %stack.0, 1, $noreg, 0, $noreg, implicit $edx :: (load 4 from %stack.0)
     ; CHECK: $eax = COPY %3
@@ -87,7 +87,7 @@ body:             |
     ; CHECK: liveins: $rdi, $rsi
     ; CHECK: MOV64mr %stack.0, 1, $noreg, 0, $noreg, $rsi :: (store 8 into %stack.0)
     ; CHECK: MOV64mr %stack.1, 1, $noreg, 0, $noreg, $rdi :: (store 8 into %stack.1)
-    ; CHECK: INLINEASM &nop, 1, 4063242, def dead %2, 12, implicit-def dead early-clobber $rax, 12, implicit-def dead early-clobber $rbx, 12, implicit-def dead early-clobber $rcx, 12, implicit-def dead early-clobber $rdx, 12, implicit-def dead early-clobber $rsi, 12, implicit-def dead early-clobber $rdi, 12, implicit-def dead early-clobber $rbp, 12, implicit-def dead early-clobber $r8, 12, implicit-def dead early-clobber $r9, 12, implicit-def dead early-clobber $r10, 12, implicit-def dead early-clobber $r11, 12, implicit-def dead early-clobber $r12, 12, implicit-def dead early-clobber $r13, 12, implicit-def dead early-clobber $r14, 12, implicit-def dead early-clobber $r15
+    ; CHECK: INLINEASM &nop, 1 /* sideeffect attdialect */, 4063242 /* regdef:LOW32_ADDR_ACCESS_RBP_with_sub_8bit_with_sub_32bit */, def dead %2, 12 /* clobber */, implicit-def dead early-clobber $rax, 12 /* clobber */, implicit-def dead early-clobber $rbx, 12 /* clobber */, implicit-def dead early-clobber $rcx, 12 /* clobber */, implicit-def dead early-clobber $rdx, 12 /* clobber */, implicit-def dead early-clobber $rsi, 12 /* clobber */, implicit-def dead early-clobber $rdi, 12 /* clobber */, implicit-def dead early-clobber $rbp, 12 /* clobber */, implicit-def dead early-clobber $r8, 12 /* clobber */, implicit-def dead early-clobber $r9, 12 /* clobber */, implicit-def dead early-clobber $r10, 12 /* clobber */, implicit-def dead early-clobber $r11, 12 /* clobber */, implicit-def dead early-clobber $r12, 12 /* clobber */, implicit-def dead early-clobber $r13, 12 /* clobber */, implicit-def dead early-clobber $r14, 12 /* clobber */, implicit-def dead early-clobber $r15
     ; CHECK: $rdx = MOV64rm %stack.1, 1, $noreg, 0, $noreg :: (load 8 from %stack.1)
     ; CHECK: %3:gr64, dead %4:gr64 = MULX64rm %stack.0, 1, $noreg, 0, $noreg, implicit $rdx :: (load 8 from %stack.0)
     ; CHECK: $rax = COPY %3
diff --git a/llvm/test/CodeGen/X86/stack-folding-fp-nofpexcept.mir b/llvm/test/CodeGen/X86/stack-folding-fp-nofpexcept.mir
index 479c5d45a194..f3f945a8e54f 100644
--- a/llvm/test/CodeGen/X86/stack-folding-fp-nofpexcept.mir
+++ b/llvm/test/CodeGen/X86/stack-folding-fp-nofpexcept.mir
@@ -38,7 +38,7 @@ body:             |
     ; CHECK: liveins: $xmm0, $xmm1
     ; CHECK: MOVAPSmr %stack.0, 1, $noreg, 0, $noreg, $xmm1 :: (store 16 into %stack.0)
     ; CHECK: [[COPY:%[0-9]+]]:vr128 = COPY $xmm0
-    ; CHECK: INLINEASM &nop, 1, 7405578, def dead %2, 12, implicit-def dead early-clobber $xmm2, 12, implicit-def dead early-clobber $xmm3, 12, implicit-def dead early-clobber $xmm4, 12, implicit-def dead early-clobber $xmm5, 12, implicit-def dead early-clobber $xmm6, 12, implicit-def dead early-clobber $xmm7, 12, implicit-def dead early-clobber $xmm8, 12, implicit-def dead early-clobber $xmm9, 12, implicit-def dead early-clobber $xmm10, 12, implicit-def dead early-clobber $xmm11, 12, implicit-def dead early-clobber $xmm12, 12, implicit-def dead early-clobber $xmm13, 12, implicit-def dead early-clobber $xmm14, 12, implicit-def dead early-clobber $xmm15, 12, implicit-def dead early-clobber $eflags
+    ; CHECK: INLINEASM &nop, 1 /* sideeffect attdialect */, 7405578 /* regdef:VR128 */, def dead %2, 12 /* clobber */, implicit-def dead early-clobber $xmm2, 12 /* clobber */, implicit-def dead early-clobber $xmm3, 12 /* clobber */, implicit-def dead early-clobber $xmm4, 12 /* clobber */, implicit-def dead early-clobber $xmm5, 12 /* clobber */, implicit-def dead early-clobber $xmm6, 12 /* clobber */, implicit-def dead early-clobber $xmm7, 12 /* clobber */, implicit-def dead early-clobber $xmm8, 12 /* clobber */, implicit-def dead early-clobber $xmm9, 12 /* clobber */, implicit-def dead early-clobber $xmm10, 12 /* clobber */, implicit-def dead early-clobber $xmm11, 12 /* clobber */, implicit-def dead early-clobber $xmm12, 12 /* clobber */, implicit-def dead early-clobber $xmm13, 12 /* clobber */, implicit-def dead early-clobber $xmm14, 12 /* clobber */, implicit-def dead early-clobber $xmm15, 12 /* clobber */, implicit-def dead early-clobber $eflags
     ; CHECK: [[COPY]]:vr128 = nofpexcept ADDPDrm [[COPY]], %stack.0, 1, $noreg, 0, $noreg, implicit $mxcsr :: (load 16 from %stack.0)
     ; CHECK: $xmm0 = COPY [[COPY]]
     ; CHECK: RET 0, $xmm0

From cee80c0489e96c36269388b2aacd4da1c5714a66 Mon Sep 17 00:00:00 2001
From: Kirill Bobyrev <kbobyrev@google.com>
Date: Thu, 16 Apr 2020 11:10:03 +0200
Subject: [PATCH 026/216] [clangd] Pull installed gRPC and introduce
 clangd-remote-(server|client)

Summary:
This patch allows using installed gRPC to build two simple tools which
currently provide the functionality of looking up the symbol by name.
remote-index-client is a simplified version of dexp which connects to
remote-index-server passes lookup requests.

I also significantly reduced the scope of this patch to prevent large changelist
and more bugs. The next steps would be:

* Extending Protocol for deep copies of Symbol and inherit RemoteIndex from
  Index to unify the interfaces
* Make remote-index-server more generic and merge the remote index client with
  dexp
* Modify Clangd to allow using remote index instead of the local one for all
  global index requests

Reviewers: sammccall

Reviewed By: sammccall

Subscribers: mgorny, ilya-biryukov, MaskRay, jkorous, arphaman, kadircet, usaxena95, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D77794
---
 clang-tools-extra/clangd/CMakeLists.txt       |   9 ++
 .../clangd/index/remote/CMakeLists.txt        |   7 ++
 .../clangd/index/remote/Index.proto           |  19 ++++
 .../clangd/index/remote/README.md             |  59 ++++++++++
 .../clangd/index/remote/client/CMakeLists.txt |  19 ++++
 .../clangd/index/remote/client/Client.cpp     |  91 ++++++++++++++++
 .../clangd/index/remote/server/CMakeLists.txt |  20 ++++
 .../clangd/index/remote/server/Server.cpp     | 102 ++++++++++++++++++
 llvm/cmake/modules/FindGRPC.cmake             |  50 +++++++++
 9 files changed, 376 insertions(+)
 create mode 100644 clang-tools-extra/clangd/index/remote/CMakeLists.txt
 create mode 100644 clang-tools-extra/clangd/index/remote/Index.proto
 create mode 100644 clang-tools-extra/clangd/index/remote/README.md
 create mode 100644 clang-tools-extra/clangd/index/remote/client/CMakeLists.txt
 create mode 100644 clang-tools-extra/clangd/index/remote/client/Client.cpp
 create mode 100644 clang-tools-extra/clangd/index/remote/server/CMakeLists.txt
 create mode 100644 clang-tools-extra/clangd/index/remote/server/Server.cpp
 create mode 100644 llvm/cmake/modules/FindGRPC.cmake

diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt
index 7a9a4f7932ae..1c2cbf398b77 100644
--- a/clang-tools-extra/clangd/CMakeLists.txt
+++ b/clang-tools-extra/clangd/CMakeLists.txt
@@ -153,3 +153,12 @@ if(CLANG_INCLUDE_TESTS)
 add_subdirectory(test)
 add_subdirectory(unittests)
 endif()
+
+# FIXME(kirillbobyrev): Document this in the LLVM docs once remote index is stable.
+option(CLANGD_ENABLE_REMOTE "Use gRPC library to enable remote index support for Clangd" OFF)
+set(GRPC_INSTALL_PATH "" CACHE PATH "Path to gRPC library manual installation.")
+
+if (CLANGD_ENABLE_REMOTE)
+  include(FindGRPC)
+  add_subdirectory(index/remote)
+endif()
diff --git a/clang-tools-extra/clangd/index/remote/CMakeLists.txt b/clang-tools-extra/clangd/index/remote/CMakeLists.txt
new file mode 100644
index 000000000000..b946958f3c5f
--- /dev/null
+++ b/clang-tools-extra/clangd/index/remote/CMakeLists.txt
@@ -0,0 +1,7 @@
+generate_grpc_protos(RemoteIndexProtos "Index.proto")
+
+include_directories("${CMAKE_CURRENT_BINARY_DIR}")
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/../../)
+
+add_subdirectory(client)
+add_subdirectory(server)
diff --git a/clang-tools-extra/clangd/index/remote/Index.proto b/clang-tools-extra/clangd/index/remote/Index.proto
new file mode 100644
index 000000000000..399036ed72b7
--- /dev/null
+++ b/clang-tools-extra/clangd/index/remote/Index.proto
@@ -0,0 +1,19 @@
+//===--- Index.proto - Remote index Protocol Buffers definition -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+syntax = "proto3";
+
+package clang.clangd.remote;
+
+service Index {
+  rpc Lookup(LookupRequest) returns (stream LookupReply) {}
+}
+
+message LookupRequest { string id = 1; }
+
+message LookupReply { string symbol_yaml = 1; }
diff --git a/clang-tools-extra/clangd/index/remote/README.md b/clang-tools-extra/clangd/index/remote/README.md
new file mode 100644
index 000000000000..b56b2fc1011e
--- /dev/null
+++ b/clang-tools-extra/clangd/index/remote/README.md
@@ -0,0 +1,59 @@
+# Clangd remote index
+
+Clangd uses a global index for project-wide code completion, navigation and
+other features.  For large projects, building this can take many hours and
+keeping it loaded uses a lot of memory.
+
+To relieve that burden, we're building remote index &mdash; a global index
+served on a different machine and shared between developers. This directory
+contains code that is used as Proof of Concept for the upcoming remote index
+feature.
+
+## Building
+
+This feature uses gRPC and Protobuf libraries, so you will need to install them.
+There are two ways of doing that.
+
+However you install dependencies, to enable this feature and build remote index
+tools you will need to set this CMake flag &mdash; `-DCLANGD_ENABLE_REMOTE=On`.
+
+### System-installed libraries
+
+On Debian-like systems gRPC and Protobuf can be installed from apt:
+
+```bash
+apt install libgrpc++-dev libprotobuf-dev protobuf-compiler protobuf-compiler-grpc
+```
+
+### Building from sources
+
+Another way of installing gRPC and Protobuf is building from sources using
+CMake. The easiest way of doing that would be to choose a directory where you
+want to install so that the installation files are not copied to system root and
+you can uninstall gRPC or use different versions of the library.
+
+```bash
+# Get source code.
+$ git clone -b v1.28.1 https://github.com/grpc/grpc
+$ cd grpc
+$ git submodule update --init
+# Choose directory where you want gRPC installation to live.
+$ export GRPC_INSTALL_PATH=/where/you/want/grpc/to/be/installed
+# Build and install gRPC to ${GRPC_INSTALL_PATH}
+$ mkdir build; cd build
+$ cmake -DgRPC_INSTALL=ON -DCMAKE_INSTALL_PREFIX=${GRPC_INSTALL_PATH} -DCMAKE_BUILD_TYPE=Release ..
+$ make install
+```
+
+This [guide](https://github.com/grpc/grpc/blob/master/BUILDING.md) goes into
+more detail on how to build gRPC from sources.
+
+By default, CMake will look for system-installed libraries when building remote
+index tools so you will have to adjust LLVM's CMake invocation. The following
+flag will inform build system that you chose this option &mdash;
+`-DGRPC_INSTALL_PATH=${GRPC_INSTALL_PATH}`.
+
+## Running
+
+The remote index isn't usable with Clangd yet, but you can try the
+proof-of-concept tools in `client/` and `server/` subdirectories.
diff --git a/clang-tools-extra/clangd/index/remote/client/CMakeLists.txt b/clang-tools-extra/clangd/index/remote/client/CMakeLists.txt
new file mode 100644
index 000000000000..18bca1b04436
--- /dev/null
+++ b/clang-tools-extra/clangd/index/remote/client/CMakeLists.txt
@@ -0,0 +1,19 @@
+set(LLVM_LINK_COMPONENTS
+  LineEditor
+  Support
+  )
+add_clang_executable(clangd-index-client
+  Client.cpp
+  )
+target_compile_definitions(clangd-index-client PRIVATE -DGOOGLE_PROTOBUF_NO_RTTI=1)
+clang_target_link_libraries(clangd-index-client
+  PRIVATE
+  clangDaemon
+  )
+target_link_libraries(clangd-index-client
+  PRIVATE
+  RemoteIndexProtos
+
+  protobuf
+  grpc++
+  )
diff --git a/clang-tools-extra/clangd/index/remote/client/Client.cpp b/clang-tools-extra/clangd/index/remote/client/Client.cpp
new file mode 100644
index 000000000000..5e888c5e0fa7
--- /dev/null
+++ b/clang-tools-extra/clangd/index/remote/client/Client.cpp
@@ -0,0 +1,91 @@
+//===--- Client.cpp - Remote Index Client -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a simple interactive tool which can be used to manually
+// evaluate symbol search quality of Clangd index.
+//
+//===----------------------------------------------------------------------===//
+
+#include "SourceCode.h"
+#include "index/Serialization.h"
+#include "index/dex/Dex.h"
+#include "llvm/ADT/ScopeExit.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/LineEditor/LineEditor.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Signals.h"
+
+#include "grpcpp/grpcpp.h"
+
+#include "Index.grpc.pb.h"
+
+namespace clang {
+namespace clangd {
+namespace {
+
+llvm::cl::opt<std::string>
+    ServerAddress("server-address",
+                  llvm::cl::desc("Address of remote index server to use."),
+                  llvm::cl::init("0.0.0.0:50051"));
+
+static const std::string Overview = R"(
+This is an **experimental** interactive tool to process user-provided search
+queries over given symbol collection obtained via clangd-indexer with the help
+of remote index server. The client will connect to remote index server and pass
+it lookup queries.
+)";
+
+class RemoteIndexClient {
+public:
+  RemoteIndexClient(std::shared_ptr<grpc::Channel> Channel)
+      : Stub(remote::Index::NewStub(Channel)) {}
+
+  void lookup(llvm::StringRef ID) {
+    llvm::outs() << "Lookup of symbol with ID " << ID << '\n';
+    remote::LookupRequest Proto;
+    Proto.set_id(ID.str());
+
+    grpc::ClientContext Context;
+    remote::LookupReply Reply;
+    std::unique_ptr<grpc::ClientReader<remote::LookupReply>> Reader(
+        Stub->Lookup(&Context, Proto));
+    while (Reader->Read(&Reply)) {
+      llvm::outs() << Reply.symbol_yaml();
+    }
+    grpc::Status Status = Reader->Finish();
+    if (Status.ok()) {
+      llvm::outs() << "lookupRequest rpc succeeded.\n";
+    } else {
+      llvm::outs() << "lookupRequest rpc failed.\n";
+    }
+  }
+
+private:
+  std::unique_ptr<remote::Index::Stub> Stub;
+};
+
+} // namespace
+} // namespace clangd
+} // namespace clang
+
+int main(int argc, const char *argv[]) {
+  using namespace clang::clangd;
+
+  llvm::cl::ParseCommandLineOptions(argc, argv, Overview);
+  llvm::cl::ResetCommandLineParser(); // We reuse it for REPL commands.
+  llvm::sys::PrintStackTraceOnErrorSignal(argv[0]);
+
+  RemoteIndexClient IndexClient(
+      grpc::CreateChannel(ServerAddress, grpc::InsecureChannelCredentials()));
+
+  llvm::LineEditor LE("remote-index-client");
+  while (llvm::Optional<std::string> Request = LE.readLine())
+    IndexClient.lookup(std::move(*Request));
+}
diff --git a/clang-tools-extra/clangd/index/remote/server/CMakeLists.txt b/clang-tools-extra/clangd/index/remote/server/CMakeLists.txt
new file mode 100644
index 000000000000..7493be1a444f
--- /dev/null
+++ b/clang-tools-extra/clangd/index/remote/server/CMakeLists.txt
@@ -0,0 +1,20 @@
+set(LLVM_LINK_COMPONENTS
+  LineEditor
+  Support
+  )
+add_clang_executable(clangd-index-server
+  Server.cpp
+  )
+target_compile_definitions(clangd-index-server PRIVATE -DGOOGLE_PROTOBUF_NO_RTTI=1)
+clang_target_link_libraries(clangd-index-server
+  PRIVATE
+  clangDaemon
+  )
+target_link_libraries(clangd-index-server
+  PRIVATE
+  RemoteIndexProtos
+
+  protobuf
+  grpc++
+  clangDaemon
+  )
diff --git a/clang-tools-extra/clangd/index/remote/server/Server.cpp b/clang-tools-extra/clangd/index/remote/server/Server.cpp
new file mode 100644
index 000000000000..b7a54b79b6c3
--- /dev/null
+++ b/clang-tools-extra/clangd/index/remote/server/Server.cpp
@@ -0,0 +1,102 @@
+//===--- Server.cpp - gRPC-based Remote Index Server  ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "index/Index.h"
+#include "index/Serialization.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/LineEditor/LineEditor.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/Signals.h"
+
+#include "grpcpp/grpcpp.h"
+#include "grpcpp/health_check_service_interface.h"
+
+#include "Index.grpc.pb.h"
+
+namespace clang {
+namespace clangd {
+namespace {
+
+static const std::string Overview = R"(
+This is an experimental remote index implementation. The server opens Dex and
+awaits gRPC lookup requests from the client.
+)";
+
+llvm::cl::opt<std::string> IndexPath(llvm::cl::desc("<INDEX FILE>"),
+                                     llvm::cl::Positional, llvm::cl::Required);
+
+llvm::cl::opt<std::string> ServerAddress("server-address",
+                                         llvm::cl::init("0.0.0.0:50051"));
+
+std::unique_ptr<SymbolIndex> openIndex(llvm::StringRef Index) {
+  return loadIndex(Index, /*UseIndex=*/true);
+}
+
+class RemoteIndexServer final : public remote::Index::Service {
+public:
+  RemoteIndexServer(std::unique_ptr<SymbolIndex> Index)
+      : Index(std::move(Index)) {}
+
+private:
+  grpc::Status Lookup(grpc::ServerContext *Context,
+                      const remote::LookupRequest *Request,
+                      grpc::ServerWriter<remote::LookupReply> *Reply) override {
+    llvm::outs() << "Lookup of symbol with ID " << Request->id() << '\n';
+    LookupRequest Req;
+    auto SID = SymbolID::fromStr(Request->id());
+    if (!SID) {
+      llvm::outs() << llvm::toString(SID.takeError()) << "\n";
+      return grpc::Status::CANCELLED;
+    }
+    Req.IDs.insert(*SID);
+    Index->lookup(Req, [&](const Symbol &Sym) {
+      remote::LookupReply NextSymbol;
+      NextSymbol.set_symbol_yaml(toYAML(Sym));
+      Reply->Write(NextSymbol);
+    });
+    return grpc::Status::OK;
+  }
+
+  std::unique_ptr<SymbolIndex> Index;
+};
+
+void runServer(std::unique_ptr<SymbolIndex> Index,
+               const std::string &ServerAddress) {
+  RemoteIndexServer Service(std::move(Index));
+
+  grpc::EnableDefaultHealthCheckService(true);
+  grpc::ServerBuilder Builder;
+  Builder.AddListeningPort(ServerAddress, grpc::InsecureServerCredentials());
+  Builder.RegisterService(&Service);
+  std::unique_ptr<grpc::Server> Server(Builder.BuildAndStart());
+  llvm::outs() << "Server listening on " << ServerAddress << '\n';
+
+  Server->Wait();
+}
+
+} // namespace
+} // namespace clangd
+} // namespace clang
+
+int main(int argc, char *argv[]) {
+  using namespace clang::clangd;
+  llvm::cl::ParseCommandLineOptions(argc, argv, clang::clangd::Overview);
+  llvm::sys::PrintStackTraceOnErrorSignal(argv[0]);
+
+  std::unique_ptr<SymbolIndex> Index = openIndex(IndexPath);
+
+  if (!Index) {
+    llvm::outs() << "Failed to open the index.\n";
+    return -1;
+  }
+
+  runServer(std::move(Index), ServerAddress);
+}
diff --git a/llvm/cmake/modules/FindGRPC.cmake b/llvm/cmake/modules/FindGRPC.cmake
new file mode 100644
index 000000000000..b70356696298
--- /dev/null
+++ b/llvm/cmake/modules/FindGRPC.cmake
@@ -0,0 +1,50 @@
+# This setup requires gRPC to be built from sources using CMake and installed to
+# ${GRPC_INSTALL_PATH} via -DCMAKE_INSTALL_PREFIX=${GRPC_INSTALL_PATH}.
+if (GRPC_INSTALL_PATH)
+  set(protobuf_MODULE_COMPATIBLE TRUE)
+  find_package(Protobuf CONFIG REQUIRED HINTS ${GRPC_INSTALL_PATH})
+  message(STATUS "Using protobuf ${protobuf_VERSION}")
+  find_package(gRPC CONFIG REQUIRED HINTS ${GRPC_INSTALL_PATH})
+  message(STATUS "Using gRPC ${gRPC_VERSION}")
+
+  include_directories(${Protobuf_INCLUDE_DIRS})
+
+  # gRPC CMake CONFIG gives the libraries slightly odd names, make them match
+  # the conventional system-installed names.
+  set_target_properties(protobuf::libprotobuf PROPERTIES IMPORTED_GLOBAL TRUE)
+  add_library(protobuf ALIAS protobuf::libprotobuf)
+  set_target_properties(gRPC::grpc++ PROPERTIES IMPORTED_GLOBAL TRUE)
+  add_library(grpc++ ALIAS gRPC::grpc++)
+
+  set(GRPC_CPP_PLUGIN $<TARGET_FILE:gRPC::grpc_cpp_plugin>)
+  set(PROTOC ${Protobuf_PROTOC_EXECUTABLE})
+else()
+  find_program(GRPC_CPP_PLUGIN grpc_cpp_plugin)
+  find_program(PROTOC protoc)
+endif()
+
+# Proto headers are generated in ${CMAKE_CURRENT_BINARY_DIR}.
+# Libraries that use these headers should adjust the include path.
+# FIXME(kirillbobyrev): Allow optional generation of gRPC code and give callers
+# control over it via additional parameters.
+function(generate_grpc_protos LibraryName ProtoFile)
+  get_filename_component(ProtoSourceAbsolutePath "${CMAKE_CURRENT_SOURCE_DIR}/${ProtoFile}" ABSOLUTE)
+  get_filename_component(ProtoSourcePath ${ProtoSourceAbsolutePath} PATH)
+
+  set(GeneratedProtoSource "${CMAKE_CURRENT_BINARY_DIR}/Index.pb.cc")
+  set(GeneratedProtoHeader "${CMAKE_CURRENT_BINARY_DIR}/Index.pb.h")
+  set(GeneratedGRPCSource "${CMAKE_CURRENT_BINARY_DIR}/Index.grpc.pb.cc")
+  set(GeneratedGRPCHeader "${CMAKE_CURRENT_BINARY_DIR}/Index.grpc.pb.h")
+  add_custom_command(
+        OUTPUT "${GeneratedProtoSource}" "${GeneratedProtoHeader}" "${GeneratedGRPCSource}" "${GeneratedGRPCHeader}"
+        COMMAND ${PROTOC}
+        ARGS --grpc_out="${CMAKE_CURRENT_BINARY_DIR}"
+          --cpp_out="${CMAKE_CURRENT_BINARY_DIR}"
+          --proto_path="${ProtoSourcePath}"
+          --plugin=protoc-gen-grpc="${GRPC_CPP_PLUGIN}"
+          "${ProtoSourceAbsolutePath}"
+          DEPENDS "${ProtoSourceAbsolutePath}")
+
+  add_library(${LibraryName} ${GeneratedProtoSource} ${GeneratedGRPCSource})
+  target_link_libraries(${LibraryName} grpc++ protobuf)
+endfunction()

From 65a2de7e6c986193a630e691686c527b08f292d5 Mon Sep 17 00:00:00 2001
From: Georgii Rymar <grimar@accesssoftek.com>
Date: Mon, 13 Apr 2020 18:10:53 +0300
Subject: [PATCH 027/216] [FileCheck] - Fix the false positive when
 -implicit-check-not is used with an unknown -check-prefix.

Imagine we have the following invocation:

`FileCheck -check-prefix=UNKNOWN-PREFIX -implicit-check-not=something`

When the check prefix does not exist it does not fail.
This patch fixes the issue.

Differential revision: https://reviews.llvm.org/D78024
---
 ...ch-implicit-conversions-basics-negatives.c |  2 +-
 llvm/include/llvm/Support/FileCheck.h         |  1 +
 llvm/lib/Support/FileCheck.cpp                | 30 ++++++++++++-------
 llvm/test/FileCheck/implicit-check-not.txt    | 12 ++++++++
 .../tools/llvm-objcopy/MachO/strip-debug.test |  2 +-
 5 files changed, 34 insertions(+), 13 deletions(-)

diff --git a/clang/test/CodeGen/catch-implicit-conversions-basics-negatives.c b/clang/test/CodeGen/catch-implicit-conversions-basics-negatives.c
index 2e060cfcddef..e8f09975a26e 100644
--- a/clang/test/CodeGen/catch-implicit-conversions-basics-negatives.c
+++ b/clang/test/CodeGen/catch-implicit-conversions-basics-negatives.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fsanitize=implicit-unsigned-integer-truncation,implicit-signed-integer-truncation,implicit-integer-sign-change -fsanitize-recover=implicit-unsigned-integer-truncation,implicit-signed-integer-truncation,implicit-integer-sign-change -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion" --check-prefixes=CHECK
+// RUN: %clang_cc1 -fsanitize=implicit-unsigned-integer-truncation,implicit-signed-integer-truncation,implicit-integer-sign-change -fsanitize-recover=implicit-unsigned-integer-truncation,implicit-signed-integer-truncation,implicit-integer-sign-change -emit-llvm %s -o - -triple x86_64-linux-gnu | FileCheck %s -implicit-check-not="call void @__ubsan_handle_implicit_conversion"
 
 // If we have an enum, it will be promoted to an unsigned integer.
 // But both types are unsigned, and have same bitwidth.
diff --git a/llvm/include/llvm/Support/FileCheck.h b/llvm/include/llvm/Support/FileCheck.h
index 429e36cfcbb5..d218ef042257 100644
--- a/llvm/include/llvm/Support/FileCheck.h
+++ b/llvm/include/llvm/Support/FileCheck.h
@@ -31,6 +31,7 @@ struct FileCheckRequest {
   bool AllowEmptyInput = false;
   bool MatchFullLines = false;
   bool IgnoreCase = false;
+  bool IsDefaultCheckPrefix = false;
   bool EnableVarScope = false;
   bool AllowDeprecatedDagOverlap = false;
   bool Verbose = false;
diff --git a/llvm/lib/Support/FileCheck.cpp b/llvm/lib/Support/FileCheck.cpp
index 0913b97fcdd0..71b1e8356137 100644
--- a/llvm/lib/Support/FileCheck.cpp
+++ b/llvm/lib/Support/FileCheck.cpp
@@ -1305,6 +1305,7 @@ bool FileCheck::readCheckFile(SourceMgr &SM, StringRef Buffer,
   // found.
   unsigned LineNumber = 1;
 
+  bool FoundUsedPrefix = false;
   while (1) {
     Check::FileCheckType CheckTy;
 
@@ -1315,6 +1316,8 @@ bool FileCheck::readCheckFile(SourceMgr &SM, StringRef Buffer,
         FindFirstMatchingPrefix(PrefixRE, Buffer, LineNumber, CheckTy);
     if (UsedPrefix.empty())
       break;
+    FoundUsedPrefix = true;
+
     assert(UsedPrefix.data() == Buffer.data() &&
            "Failed to move Buffer's start forward, or pointed prefix outside "
            "of the buffer!");
@@ -1398,16 +1401,10 @@ bool FileCheck::readCheckFile(SourceMgr &SM, StringRef Buffer,
     DagNotMatches = ImplicitNegativeChecks;
   }
 
-  // Add an EOF pattern for any trailing --implicit-check-not/CHECK-DAG/-NOTs,
-  // and use the first prefix as a filler for the error message.
-  if (!DagNotMatches.empty()) {
-    CheckStrings->emplace_back(
-        Pattern(Check::CheckEOF, PatternContext.get(), LineNumber + 1),
-        *Req.CheckPrefixes.begin(), SMLoc::getFromPointer(Buffer.data()));
-    std::swap(DagNotMatches, CheckStrings->back().DagNotStrings);
-  }
-
-  if (CheckStrings->empty()) {
+  // When there are no used prefixes we report an error except in the case that
+  // no prefix is specified explicitly but -implicit-check-not is specified.
+  if (!FoundUsedPrefix &&
+      (ImplicitNegativeChecks.empty() || !Req.IsDefaultCheckPrefix)) {
     errs() << "error: no check strings found with prefix"
            << (Req.CheckPrefixes.size() > 1 ? "es " : " ");
     auto I = Req.CheckPrefixes.begin();
@@ -1423,6 +1420,15 @@ bool FileCheck::readCheckFile(SourceMgr &SM, StringRef Buffer,
     return true;
   }
 
+  // Add an EOF pattern for any trailing --implicit-check-not/CHECK-DAG/-NOTs,
+  // and use the first prefix as a filler for the error message.
+  if (!DagNotMatches.empty()) {
+    CheckStrings->emplace_back(
+        Pattern(Check::CheckEOF, PatternContext.get(), LineNumber + 1),
+        *Req.CheckPrefixes.begin(), SMLoc::getFromPointer(Buffer.data()));
+    std::swap(DagNotMatches, CheckStrings->back().DagNotStrings);
+  }
+
   return false;
 }
 
@@ -1888,8 +1894,10 @@ bool FileCheck::ValidateCheckPrefixes() {
 Regex FileCheck::buildCheckPrefixRegex() {
   // I don't think there's a way to specify an initial value for cl::list,
   // so if nothing was specified, add the default
-  if (Req.CheckPrefixes.empty())
+  if (Req.CheckPrefixes.empty()) {
     Req.CheckPrefixes.push_back("CHECK");
+    Req.IsDefaultCheckPrefix = true;
+  }
 
   // We already validated the contents of CheckPrefixes so just concatenate
   // them as alternatives.
diff --git a/llvm/test/FileCheck/implicit-check-not.txt b/llvm/test/FileCheck/implicit-check-not.txt
index 3aea712d6506..95dd9fa782df 100644
--- a/llvm/test/FileCheck/implicit-check-not.txt
+++ b/llvm/test/FileCheck/implicit-check-not.txt
@@ -1,4 +1,16 @@
 ; RUN: sed 's#^;.*##' %s | FileCheck -check-prefix=CHECK-PASS -implicit-check-not=warning: %s
+
+; Check we report an error when an unknown prefix is used together with `-implicit-check-not`.
+; RUN: sed 's#^;.*##' %s | %ProtectFileCheckOutput not FileCheck -check-prefix=UNKNOWN-PREFIX -implicit-check-not=abc %s 2>&1 | FileCheck %s -DPREFIX=UNKNOWN-PREFIX -check-prefix CHECK-PREFIX-ERROR
+; CHECK-PREFIX-ERROR: error: no check strings found with prefix '[[PREFIX]]:'
+
+; Check we report an error when the "CHECK" prefix is used explicitly with `-implicit-check-not`, but not present in the input.
+; RUN: sed 's#^;.*##' %s | %ProtectFileCheckOutput not FileCheck -check-prefix=CHECK -implicit-check-not=abc %s 2>&1 | FileCheck %s -DPREFIX=CHECK -check-prefix CHECK-PREFIX-ERROR
+
+; Check we allow using `-implicit-check-not` when there is no `-check-prefix` specified and there
+; is no default `CHECK` line in an input.
+; RUN: sed 's#^;.*##' %s | FileCheck -implicit-check-not="unique_string" %s
+
 ; RUN: sed 's#^;.*##' %s | %ProtectFileCheckOutput not FileCheck -check-prefix=CHECK-FAIL1 -implicit-check-not=warning: %s 2>&1 | FileCheck %s -check-prefix CHECK-ERROR1
 ; RUN: sed 's#^;.*##' %s | %ProtectFileCheckOutput not FileCheck -check-prefix=CHECK-FAIL2 -implicit-check-not=warning: %s 2>&1 | FileCheck %s -check-prefix CHECK-ERROR2
 ; RUN: sed 's#^;.*##' %s | %ProtectFileCheckOutput not FileCheck -check-prefix=CHECK-FAIL3 -implicit-check-not=warning: %s 2>&1 | FileCheck %s -check-prefix CHECK-ERROR3
diff --git a/llvm/test/tools/llvm-objcopy/MachO/strip-debug.test b/llvm/test/tools/llvm-objcopy/MachO/strip-debug.test
index ff99b97f1047..817ca0ecb561 100644
--- a/llvm/test/tools/llvm-objcopy/MachO/strip-debug.test
+++ b/llvm/test/tools/llvm-objcopy/MachO/strip-debug.test
@@ -3,7 +3,7 @@
 # RUN: yaml2obj %p/Inputs/strip-all-with-dwarf.yaml -o %t
 
 # RUN: llvm-objcopy --strip-debug %t %t.stripped
-# RUN: llvm-readobj --sections %t.stripped | FileCheck /dev/null --check-prefix=NODWARF \
+# RUN: llvm-readobj --sections %t.stripped | FileCheck /dev/null \
 # RUN:   --implicit-check-not='Name: __debug' --implicit-check-not='Name: __apple'
 
 ## Make sure that all symbols are kept.

From 5cef31074ff5ff63a38e0142783849987c598ef8 Mon Sep 17 00:00:00 2001
From: Sergej Jaskiewicz <jaskiewiczs@icloud.com>
Date: Mon, 13 Apr 2020 14:26:35 +0300
Subject: [PATCH 028/216] Introduce llvm::sys::Process::getProcessId() and
 adopt it

Differential Revision: https://reviews.llvm.org/D78022
---
 llvm/include/llvm/Support/Process.h             |  5 +++++
 .../PerfJITEvents/PerfJITEventListener.cpp      |  8 ++++----
 llvm/lib/Support/CodeGenCoverage.cpp            | 17 ++---------------
 llvm/lib/Support/LockFileManager.cpp            |  8 ++------
 llvm/lib/Support/Unix/Process.inc               |  6 ++++++
 llvm/lib/Support/Windows/Process.inc            |  6 ++++++
 llvm/unittests/Support/ProcessTest.cpp          | 10 ++++++++++
 7 files changed, 35 insertions(+), 25 deletions(-)

diff --git a/llvm/include/llvm/Support/Process.h b/llvm/include/llvm/Support/Process.h
index bb5c33dfb38d..0ba6d58ba287 100644
--- a/llvm/include/llvm/Support/Process.h
+++ b/llvm/include/llvm/Support/Process.h
@@ -42,6 +42,11 @@ namespace sys {
 /// current executing process.
 class Process {
 public:
+  using Pid = int32_t;
+
+  /// Get the process's identifier.
+  static Pid getProcessId();
+
   /// Get the process's page size.
   /// This may fail if the underlying syscall returns an error. In most cases,
   /// page size information is used for optimization, and this error can be
diff --git a/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp b/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
index ba9e7476e294..d4c715cc59f6 100644
--- a/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
+++ b/llvm/lib/ExecutionEngine/PerfJITEvents/PerfJITEventListener.cpp
@@ -34,9 +34,8 @@
 #include <mutex>
 
 #include <sys/mman.h>  // mmap()
-#include <sys/types.h> // getpid()
 #include <time.h>      // clock_gettime(), time(), localtime_r() */
-#include <unistd.h>    // for getpid(), read(), close()
+#include <unistd.h>    // for read(), close()
 
 using namespace llvm;
 using namespace llvm::object;
@@ -81,7 +80,7 @@ class PerfJITEventListener : public JITEventListener {
   void NotifyDebug(uint64_t CodeAddr, DILineInfoTable Lines);
 
   // cache lookups
-  pid_t Pid;
+  sys::Process::Pid Pid;
 
   // base directory for output data
   std::string JitPath;
@@ -177,7 +176,8 @@ static inline uint64_t perf_get_timestamp(void) {
   return timespec_to_ns(&ts);
 }
 
-PerfJITEventListener::PerfJITEventListener() : Pid(::getpid()) {
+PerfJITEventListener::PerfJITEventListener()
+    : Pid(sys::Process::getProcessId()) {
   // check if clock-source is supported
   if (!perf_get_timestamp()) {
     errs() << "kernel does not support CLOCK_MONOTONIC\n";
diff --git a/llvm/lib/Support/CodeGenCoverage.cpp b/llvm/lib/Support/CodeGenCoverage.cpp
index 2db4193ce382..93f386b6e23d 100644
--- a/llvm/lib/Support/CodeGenCoverage.cpp
+++ b/llvm/lib/Support/CodeGenCoverage.cpp
@@ -11,20 +11,14 @@
 
 #include "llvm/Support/CodeGenCoverage.h"
 
-#include "llvm/Config/llvm-config.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Mutex.h"
+#include "llvm/Support/Process.h"
 #include "llvm/Support/ScopedPrinter.h"
 #include "llvm/Support/ToolOutputFile.h"
 
-#if LLVM_ON_UNIX
-#include <unistd.h>
-#elif defined(_WIN32)
-#include <windows.h>
-#endif
-
 using namespace llvm;
 
 static sys::SmartMutex<true> OutputMutex;
@@ -89,14 +83,7 @@ bool CodeGenCoverage::emit(StringRef CoveragePrefix,
     // We can handle locking within a process easily enough but we don't want to
     // manage it between multiple processes. Use the process ID to ensure no
     // more than one process is ever writing to the same file at the same time.
-    std::string Pid =
-#if LLVM_ON_UNIX
-        llvm::to_string(::getpid());
-#elif defined(_WIN32)
-        llvm::to_string(::GetCurrentProcessId());
-#else
-        "";
-#endif
+    std::string Pid = llvm::to_string(sys::Process::getProcessId());
 
     std::string CoverageFilename = (CoveragePrefix + Pid).str();
 
diff --git a/llvm/lib/Support/LockFileManager.cpp b/llvm/lib/Support/LockFileManager.cpp
index 88489a658953..a2b56ab295c4 100644
--- a/llvm/lib/Support/LockFileManager.cpp
+++ b/llvm/lib/Support/LockFileManager.cpp
@@ -14,6 +14,7 @@
 #include "llvm/Support/ErrorOr.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Process.h"
 #include "llvm/Support/Signals.h"
 #include "llvm/Support/raw_ostream.h"
 #include <cerrno>
@@ -195,12 +196,7 @@ LockFileManager::LockFileManager(StringRef FileName)
     }
 
     raw_fd_ostream Out(UniqueLockFileID, /*shouldClose=*/true);
-    Out << HostID << ' ';
-#if LLVM_ON_UNIX
-    Out << getpid();
-#else
-    Out << "1";
-#endif
+    Out << HostID << ' ' << sys::Process::getProcessId();
     Out.close();
 
     if (Out.has_error()) {
diff --git a/llvm/lib/Support/Unix/Process.inc b/llvm/lib/Support/Unix/Process.inc
index a68b30a546c8..24f16b51af7b 100644
--- a/llvm/lib/Support/Unix/Process.inc
+++ b/llvm/lib/Support/Unix/Process.inc
@@ -66,6 +66,12 @@ static std::pair<std::chrono::microseconds, std::chrono::microseconds> getRUsage
 #endif
 }
 
+Process::Pid Process::getProcessId() {
+  static_assert(sizeof(Pid) >= sizeof(pid_t),
+                "Process::Pid should be big enough to store pid_t");
+  return Pid(::getpid());
+}
+
 // On Cygwin, getpagesize() returns 64k(AllocationGranularity) and
 // offset in mmap(3) should be aligned to the AllocationGranularity.
 Expected<unsigned> Process::getPageSize() {
diff --git a/llvm/lib/Support/Windows/Process.inc b/llvm/lib/Support/Windows/Process.inc
index 6eb4a5eb7457..8064d4e17b29 100644
--- a/llvm/lib/Support/Windows/Process.inc
+++ b/llvm/lib/Support/Windows/Process.inc
@@ -43,6 +43,12 @@
 
 using namespace llvm;
 
+Process::Pid Process::getProcessId() {
+  static_assert(sizeof(Pid) >= sizeof(DWORD),
+                "Process::Pid should be big enough to store DWORD");
+  return Pid(::GetCurrentProcessId());
+}
+
 // This function retrieves the page size using GetNativeSystemInfo() and is
 // present solely so it can be called once to initialize the self_process member
 // below.
diff --git a/llvm/unittests/Support/ProcessTest.cpp b/llvm/unittests/Support/ProcessTest.cpp
index 83be3a910f0d..86208d4d731a 100644
--- a/llvm/unittests/Support/ProcessTest.cpp
+++ b/llvm/unittests/Support/ProcessTest.cpp
@@ -21,6 +21,16 @@ namespace {
 using namespace llvm;
 using namespace sys;
 
+TEST(ProcessTest, GetProcessIdTest) {
+  const Process::Pid pid = Process::getProcessId();
+
+#ifdef _WIN32
+  EXPECT_EQ(pid, ::GetCurrentProcessId());
+#else
+  EXPECT_EQ(pid, ::getpid());
+#endif
+}
+
 TEST(ProcessTest, GetRandomNumberTest) {
   const unsigned r1 = Process::GetRandomNumber();
   const unsigned r2 = Process::GetRandomNumber();

From bac85ab3b55d02f0a1e824712f185af42cd1ea04 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Thu, 16 Apr 2020 14:05:29 +0200
Subject: [PATCH 029/216] Revert "[LifetimeAnalysis] Add [[gsl::Pointer]] to
 llvm::StringRef"

This reverts commit 83d5131d87a6f929b21b54e3fc0f9636ff64c808. Spams
llvm/ADT/StringRef.h:57:11: warning: unknown attribute 'Pointer' ignored [-Wunknown-attributes]
---
 llvm/include/llvm/ADT/StringRef.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm/ADT/StringRef.h b/llvm/include/llvm/ADT/StringRef.h
index 337efd641135..ad31517a1ea7 100644
--- a/llvm/include/llvm/ADT/StringRef.h
+++ b/llvm/include/llvm/ADT/StringRef.h
@@ -54,7 +54,7 @@ namespace llvm {
   /// situations where the character data resides in some other buffer, whose
   /// lifetime extends past that of the StringRef. For this reason, it is not in
   /// general safe to store a StringRef.
-  class [[gsl::Pointer]] StringRef {
+  class StringRef {
   public:
     static const size_t npos = ~size_t(0);
 

From 38ca7b11db2d22e0fdfbff3f19276f9796f747d3 Mon Sep 17 00:00:00 2001
From: Henry Jen <henryjen@ztune.net>
Date: Thu, 16 Apr 2020 07:58:02 -0400
Subject: [PATCH 030/216] Expose AtomicType in the libclang C API.

---
 clang/include/clang-c/Index.h           | 12 ++++++++++--
 clang/test/Index/print-type.c           |  7 ++++++-
 clang/tools/c-index-test/c-index-test.c |  6 ++++++
 clang/tools/libclang/CXType.cpp         | 12 ++++++++++++
 clang/tools/libclang/libclang.exports   |  1 +
 5 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/clang/include/clang-c/Index.h b/clang/include/clang-c/Index.h
index 0acd50021ed8..8e367b617bd3 100644
--- a/clang/include/clang-c/Index.h
+++ b/clang/include/clang-c/Index.h
@@ -33,7 +33,7 @@
  * compatible, thus CINDEX_VERSION_MAJOR is expected to remain stable.
  */
 #define CINDEX_VERSION_MAJOR 0
-#define CINDEX_VERSION_MINOR 59
+#define CINDEX_VERSION_MINOR 60
 
 #define CINDEX_VERSION_ENCODE(major, minor) (((major)*10000) + ((minor)*1))
 
@@ -3342,7 +3342,8 @@ enum CXTypeKind {
 
   CXType_OCLIntelSubgroupAVCImeDualRefStreamin = 175,
 
-  CXType_ExtVector = 176
+  CXType_ExtVector = 176,
+  CXType_Atomic = 177
 };
 
 /**
@@ -3932,6 +3933,13 @@ CINDEX_LINKAGE long long clang_Type_getOffsetOf(CXType T, const char *S);
  */
 CINDEX_LINKAGE CXType clang_Type_getModifiedType(CXType T);
 
+/**
+ * Gets the type contained by this atomic type.
+ *
+ * If a non-atomic type is passed in, an invalid type is returned.
+ */
+CINDEX_LINKAGE CXType clang_Type_getValueType(CXType CT);
+
 /**
  * Return the offset of the field represented by the Cursor.
  *
diff --git a/clang/test/Index/print-type.c b/clang/test/Index/print-type.c
index 9bf058840825..b58a76937626 100644
--- a/clang/test/Index/print-type.c
+++ b/clang/test/Index/print-type.c
@@ -22,13 +22,15 @@ struct {
 
 struct {
   struct {
-    int x;
+    _Atomic int x;
     int y;
   };
 } bar;
 
 void fun(struct { int x; int y; } *param);
 
+_Atomic(unsigned long) aul;
+
 // RUN: c-index-test -test-print-type %s | FileCheck %s
 // CHECK: FunctionDecl=f:3:6 (Definition) [type=int *(int *, char *, FooType, int *, void (*)(int))] [typekind=FunctionProto] [canonicaltype=int *(int *, char *, int, int *, void (*)(int))] [canonicaltypekind=FunctionProto] [resulttype=int *] [resulttypekind=Pointer] [args= [int *] [Pointer] [char *] [Pointer] [FooType] [Typedef] [int [5]] [ConstantArray] [void (*)(int)] [Pointer]] [isPOD=0]
 // CHECK: ParmDecl=p:3:13 (Definition) [type=int *] [typekind=Pointer] [isPOD=1] [pointeetype=int] [pointeekind=Int]
@@ -70,4 +72,7 @@ void fun(struct { int x; int y; } *param);
 // CHECK: StructDecl=:18:1 (Definition) [type=struct (anonymous at {{.*}}print-type.c:18:1)] [typekind=Record] [isPOD=1] [nbFields=2] [isAnon=1] [isAnonRecDecl=0]
 // CHECK: StructDecl=:23:1 (Definition) [type=struct (anonymous at {{.*}}print-type.c:23:1)] [typekind=Record] [isPOD=1] [nbFields=1] [isAnon=1] [isAnonRecDecl=0]
 // CHECK: StructDecl=:24:3 (Definition) [type=struct (anonymous at {{.*}}print-type.c:24:3)] [typekind=Record] [isPOD=1] [nbFields=2] [isAnon=1] [isAnonRecDecl=1]
+// CHECK: FieldDecl=x:25:17 (Definition) [type=_Atomic(int)] [typekind=Atomic] [valuetype=int] [valuetypekind=Int] [isPOD=0] [isAnonRecDecl=0]
+// CHECK: FieldDecl=y:26:9 (Definition) [type=int] [typekind=Int] [isPOD=1] [isAnonRecDecl=0]
 // CHECK: StructDecl=:30:10 (Definition) [type=struct (anonymous at {{.*}}print-type.c:30:10)] [typekind=Record] [isPOD=1] [nbFields=2] [isAnon=1] [isAnonRecDecl=0]
+// CHECK: VarDecl=aul:32:24 [type=_Atomic(unsigned long)] [typekind=Atomic] [valuetype=unsigned long] [valuetypekind=ULong] [isPOD=0] [isAnonRecDecl=0]
diff --git a/clang/tools/c-index-test/c-index-test.c b/clang/tools/c-index-test/c-index-test.c
index d4de743f2e38..6e82bf9999f6 100644
--- a/clang/tools/c-index-test/c-index-test.c
+++ b/clang/tools/c-index-test/c-index-test.c
@@ -1579,6 +1579,12 @@ static enum CXChildVisitResult PrintType(CXCursor cursor, CXCursor p,
         PrintTypeTemplateArgs(CT, " [canonicaltemplateargs/%d=");
       }
     }
+    /* Print the value type if it exists. */
+    {
+      CXType VT = clang_Type_getValueType(T);
+      if (VT.kind != CXType_Invalid)
+        PrintTypeAndTypeKind(VT, " [valuetype=%s] [valuetypekind=%s]");
+    }
     /* Print the modified type if it exists. */
     {
       CXType MT = clang_Type_getModifiedType(T);
diff --git a/clang/tools/libclang/CXType.cpp b/clang/tools/libclang/CXType.cpp
index acecf87d0cda..42da867ac4af 100644
--- a/clang/tools/libclang/CXType.cpp
+++ b/clang/tools/libclang/CXType.cpp
@@ -115,6 +115,7 @@ static CXTypeKind GetTypeKind(QualType T) {
     TKCASE(Elaborated);
     TKCASE(Pipe);
     TKCASE(Attributed);
+    TKCASE(Atomic);
     default:
       return CXType_Unexposed;
   }
@@ -616,6 +617,7 @@ CXString clang_getTypeKindSpelling(enum CXTypeKind K) {
     TKIND(OCLEvent);
     TKIND(OCLQueue);
     TKIND(OCLReserveID);
+    TKIND(Atomic);
   }
 #undef TKIND
   return cxstring::createRef(s);
@@ -1318,3 +1320,13 @@ enum CXTypeNullabilityKind clang_Type_getNullability(CXType CT) {
   }
   return CXTypeNullability_Invalid;
 }
+
+CXType clang_Type_getValueType(CXType CT) {
+  QualType T = GetQualType(CT);
+
+  if (T.isNull() || !T->isAtomicType())
+      return MakeCXType(QualType(), GetTU(CT));
+
+  const auto *AT = T->castAs<AtomicType>();
+  return MakeCXType(AT->getValueType(), GetTU(CT));
+}
diff --git a/clang/tools/libclang/libclang.exports b/clang/tools/libclang/libclang.exports
index 9408c02083fd..defbaa91a488 100644
--- a/clang/tools/libclang/libclang.exports
+++ b/clang/tools/libclang/libclang.exports
@@ -109,6 +109,7 @@ clang_Type_getNumObjCTypeArgs
 clang_Type_getObjCTypeArg
 clang_Type_getModifiedType
 clang_Type_getNullability
+clang_Type_getValueType
 clang_VerbatimBlockLineComment_getText
 clang_VerbatimLineComment_getText
 clang_HTMLTagComment_getAsString

From 6d2f73f821ed5ea584869924b150ac2e6e65c12e Mon Sep 17 00:00:00 2001
From: Matthias Gehre <gehre.matthias@gmail.com>
Date: Thu, 16 Apr 2020 14:10:06 +0200
Subject: [PATCH 031/216] Revert "Revert "[LifetimeAnalysis] Add
 [[gsl::Pointer]] to llvm::StringRef""

This reverts commit bac85ab3b55d02f0a1e824712f185af42cd1ea04.
---
 llvm/include/llvm/ADT/StringRef.h    | 2 +-
 llvm/include/llvm/Support/Compiler.h | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/llvm/include/llvm/ADT/StringRef.h b/llvm/include/llvm/ADT/StringRef.h
index ad31517a1ea7..add5d37f89f7 100644
--- a/llvm/include/llvm/ADT/StringRef.h
+++ b/llvm/include/llvm/ADT/StringRef.h
@@ -54,7 +54,7 @@ namespace llvm {
   /// situations where the character data resides in some other buffer, whose
   /// lifetime extends past that of the StringRef. For this reason, it is not in
   /// general safe to store a StringRef.
-  class StringRef {
+  class LLVM_GSL_POINTER StringRef {
   public:
     static const size_t npos = ~size_t(0);
 
diff --git a/llvm/include/llvm/Support/Compiler.h b/llvm/include/llvm/Support/Compiler.h
index 34d8ed8baf2d..a8356ccf812d 100644
--- a/llvm/include/llvm/Support/Compiler.h
+++ b/llvm/include/llvm/Support/Compiler.h
@@ -289,6 +289,14 @@
 #define LLVM_REQUIRE_CONSTANT_INITIALIZATION
 #endif
 
+/// LLVM_GSL_POINTER - Apply this to non-owning classes like
+/// StringRef to enable lifetime warnings.
+#if LLVM_HAS_CPP_ATTRIBUTE(gsl::Pointer)
+#define LLVM_GSL_POINTER [[gsl::Pointer]]
+#else
+#define LLVM_GSL_POINTER
+#endif
+
 /// LLVM_EXTENSION - Support compilers where we have a keyword to suppress
 /// pedantic diagnostics.
 #ifdef __GNUC__

From ee66b5b0da7e54124b87268d33d9fad7a4a93b87 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Thu, 16 Apr 2020 12:08:10 +0100
Subject: [PATCH 032/216] Pass.h/cpp - cleanup includes and forward
 declaration. NFC.

Remove unused BasicBlock forward declaration from Pass.h and Attributes/BasicBlock includes from Pass.cpp
Add BasicBlock forward declaration to UnifyFunctionExitNodes.h which was relying on Pass.h
---
 llvm/include/llvm/Pass.h                                    | 1 -
 llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h | 2 ++
 llvm/lib/IR/Pass.cpp                                        | 2 --
 3 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/Pass.h b/llvm/include/llvm/Pass.h
index 5ed4ca4f6157..b0af15a7dab2 100644
--- a/llvm/include/llvm/Pass.h
+++ b/llvm/include/llvm/Pass.h
@@ -34,7 +34,6 @@ namespace llvm {
 
 class AnalysisResolver;
 class AnalysisUsage;
-class BasicBlock;
 class Function;
 class ImmutablePass;
 class Module;
diff --git a/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h b/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h
index b8a4fe72ea25..f1789ed2a1b1 100644
--- a/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h
+++ b/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h
@@ -22,6 +22,8 @@
 
 namespace llvm {
 
+class BasicBlock;
+
 struct UnifyFunctionExitNodes : public FunctionPass {
   BasicBlock *ReturnBlock = nullptr;
   BasicBlock *UnwindBlock = nullptr;
diff --git a/llvm/lib/IR/Pass.cpp b/llvm/lib/IR/Pass.cpp
index dbdbbf4cf35e..893cf2fe0b6b 100644
--- a/llvm/lib/IR/Pass.cpp
+++ b/llvm/lib/IR/Pass.cpp
@@ -14,8 +14,6 @@
 
 #include "llvm/Pass.h"
 #include "llvm/Config/llvm-config.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRPrintingPasses.h"
 #include "llvm/IR/LLVMContext.h"

From b9a8e787ae4e178b0fb0ac578ba6b7c96d419ba9 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Thu, 16 Apr 2020 12:37:46 +0100
Subject: [PATCH 033/216] Parser.h/cpp - cleanup includes and forward
 declaration. NFC. Parser.h - Reduce MemoryBuffer.h include to just the
 necessary StringRef.h include and MemoryBufferRef forward declaration
 Parser.cpp - Remove unused raw_ostream.h include

---
 llvm/include/llvm/AsmParser/Parser.h | 4 +++-
 llvm/lib/AsmParser/Parser.cpp        | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/AsmParser/Parser.h b/llvm/include/llvm/AsmParser/Parser.h
index b0c603497805..28a0a479cc3a 100644
--- a/llvm/include/llvm/AsmParser/Parser.h
+++ b/llvm/include/llvm/AsmParser/Parser.h
@@ -13,12 +13,14 @@
 #ifndef LLVM_ASMPARSER_PARSER_H
 #define LLVM_ASMPARSER_PARSER_H
 
-#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/ADT/StringRef.h"
+#include <memory>
 
 namespace llvm {
 
 class Constant;
 class LLVMContext;
+class MemoryBufferRef;
 class Module;
 class ModuleSummaryIndex;
 struct SlotMapping;
diff --git a/llvm/lib/AsmParser/Parser.cpp b/llvm/lib/AsmParser/Parser.cpp
index b7f552a6fccb..7f4283218ab3 100644
--- a/llvm/lib/AsmParser/Parser.cpp
+++ b/llvm/lib/AsmParser/Parser.cpp
@@ -17,9 +17,9 @@
 #include "llvm/IR/ModuleSummaryIndex.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/raw_ostream.h"
 #include <cstring>
 #include <system_error>
+
 using namespace llvm;
 
 bool llvm::parseAssemblyInto(MemoryBufferRef F, Module *M,

From da207407866a683563f6243e2b5f7502783b9842 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Thu, 16 Apr 2020 12:58:44 +0100
Subject: [PATCH 034/216] yaml2obj.h - cleanup includes and forward
 declaration. NFC. Reduce StringRef.h/Error.h includes to just the necessary
 STLExtras.h include and StringRef/Twine forward declarations Remove unused
 Expected<> forward declaration

---
 llvm/include/llvm/ObjectYAML/yaml2obj.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/ObjectYAML/yaml2obj.h b/llvm/include/llvm/ObjectYAML/yaml2obj.h
index 386551337d86..cf8020d7800b 100644
--- a/llvm/include/llvm/ObjectYAML/yaml2obj.h
+++ b/llvm/include/llvm/ObjectYAML/yaml2obj.h
@@ -11,14 +11,14 @@
 #ifndef LLVM_TOOLS_YAML2OBJ_YAML2OBJ_H
 #define LLVM_TOOLS_YAML2OBJ_YAML2OBJ_H
 
-#include "llvm/ADT/StringRef.h"
-#include "llvm/Support/Error.h"
+#include "llvm/ADT/STLExtras.h"
 #include <memory>
 
 namespace llvm {
 class raw_ostream;
 template <typename T> class SmallVectorImpl;
-template <typename T> class Expected;
+class StringRef;
+class Twine;
 
 namespace object {
 class ObjectFile;

From 69040d5b0bfa59edacc2ad10d517b4270bf76845 Mon Sep 17 00:00:00 2001
From: Stephan Herhut <herhut@google.com>
Date: Thu, 16 Apr 2020 13:14:43 +0200
Subject: [PATCH 035/216] [MLIR] Allow for multiple gpu modules during
 translation.

This change makes the ModuleTranslation threadsafe by locking on the
LLVMContext. Furthermore, we now clone the llvm module into a new
context when compiling to PTX similar to what the OrcJit does.

Differential Revision: https://reviews.llvm.org/D78207
---
 .../include/mlir/Dialect/LLVMIR/LLVMDialect.h | 10 +++++++
 .../include/mlir/Dialect/LLVMIR/LLVMOpBase.td |  1 +
 .../mlir/Target/LLVMIR/ModuleTranslation.h    |  3 +-
 .../GPUToCUDA/ConvertKernelFuncToCubin.cpp    | 10 ++++++-
 .../ConvertLaunchFuncToCudaCalls.cpp          | 12 ++++----
 mlir/lib/Dialect/LLVMIR/CMakeLists.txt        |  2 ++
 mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp    | 18 ++++++++++++
 mlir/lib/ExecutionEngine/CMakeLists.txt       |  2 --
 mlir/lib/ExecutionEngine/ExecutionEngine.cpp  | 16 ++---------
 mlir/lib/Target/LLVMIR/ModuleTranslation.cpp  | 11 +++++++-
 mlir/test/mlir-cuda-runner/two-modules.mlir   | 28 +++++++++++++++++++
 11 files changed, 90 insertions(+), 23 deletions(-)
 create mode 100644 mlir/test/mlir-cuda-runner/two-modules.mlir

diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h b/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h
index b33b38971249..c081a3df29cb 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMDialect.h
@@ -32,6 +32,10 @@
 namespace llvm {
 class Type;
 class LLVMContext;
+namespace sys {
+template <bool mt_only>
+class SmartMutex;
+} // end namespace sys
 } // end namespace llvm
 
 namespace mlir {
@@ -216,6 +220,12 @@ Value createGlobalString(Location loc, OpBuilder &builder, StringRef name,
 /// function confirms that the Operation has the desired properties.
 bool satisfiesLLVMModule(Operation *op);
 
+/// Clones the given module into the provided context. This is implemented by
+/// transforming the module into bitcode and then reparsing the bitcode in the
+/// provided context.
+std::unique_ptr<llvm::Module>
+cloneModuleIntoNewContext(llvm::LLVMContext *context, llvm::Module *module);
+
 } // end namespace LLVM
 } // end namespace mlir
 
diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td
index 20ed573ab8bd..48eecb4eed87 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOpBase.td
@@ -24,6 +24,7 @@ def LLVM_Dialect : Dialect {
     ~LLVMDialect();
     llvm::LLVMContext &getLLVMContext();
     llvm::Module &getLLVMModule();
+    llvm::sys::SmartMutex<true> &getLLVMContextMutex();
 
   private:
     friend LLVMType;
diff --git a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
index 7ba4a7d21adf..e7223bf7349a 100644
--- a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
+++ b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
@@ -106,7 +106,6 @@ class ModuleTranslation {
   /// Original and translated module.
   Operation *mlirModule;
   std::unique_ptr<llvm::Module> llvmModule;
-
   /// A converter for translating debug information.
   std::unique_ptr<detail::DebugTranslation> debugTranslation;
 
@@ -114,6 +113,8 @@ class ModuleTranslation {
   std::unique_ptr<llvm::OpenMPIRBuilder> ompBuilder;
   /// Precomputed pointer to OpenMP dialect.
   const Dialect *ompDialect;
+  /// Pointer to the llvmDialect;
+  LLVMDialect *llvmDialect;
 
   /// Mappings between llvm.mlir.global definitions and corresponding globals.
   DenseMap<Operation *, llvm::GlobalValue *> globalsMapping;
diff --git a/mlir/lib/Conversion/GPUToCUDA/ConvertKernelFuncToCubin.cpp b/mlir/lib/Conversion/GPUToCUDA/ConvertKernelFuncToCubin.cpp
index 38820f174d98..7cdb0dda4454 100644
--- a/mlir/lib/Conversion/GPUToCUDA/ConvertKernelFuncToCubin.cpp
+++ b/mlir/lib/Conversion/GPUToCUDA/ConvertKernelFuncToCubin.cpp
@@ -15,6 +15,7 @@
 #include "mlir/Conversion/GPUToCUDA/GPUToCUDAPass.h"
 
 #include "mlir/Dialect/GPU/GPUDialect.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/IR/Attributes.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/Function.h"
@@ -98,12 +99,19 @@ std::string GpuKernelToCubinPass::translateModuleToPtx(
     llvm::Module &module, llvm::TargetMachine &target_machine) {
   std::string ptx;
   {
+    // Clone the llvm module into a new context to enable concurrent compilation
+    // with multiple threads.
+    // TODO(zinenko): Reevaluate model of ownership of LLVMContext in
+    //                LLVMDialect.
+    llvm::LLVMContext llvmContext;
+    auto clone = LLVM::cloneModuleIntoNewContext(&llvmContext, &module);
+
     llvm::raw_string_ostream stream(ptx);
     llvm::buffer_ostream pstream(stream);
     llvm::legacy::PassManager codegen_passes;
     target_machine.addPassesToEmitFile(codegen_passes, pstream, nullptr,
                                        llvm::CGFT_AssemblyFile);
-    codegen_passes.run(module);
+    codegen_passes.run(*clone);
   }
 
   return ptx;
diff --git a/mlir/lib/Conversion/GPUToCUDA/ConvertLaunchFuncToCudaCalls.cpp b/mlir/lib/Conversion/GPUToCUDA/ConvertLaunchFuncToCudaCalls.cpp
index 134ca5d6c6e7..bdd9bb66f617 100644
--- a/mlir/lib/Conversion/GPUToCUDA/ConvertLaunchFuncToCudaCalls.cpp
+++ b/mlir/lib/Conversion/GPUToCUDA/ConvertLaunchFuncToCudaCalls.cpp
@@ -116,8 +116,8 @@ class GpuLaunchFuncToCudaCallsPass
   void addParamToList(OpBuilder &builder, Location loc, Value param, Value list,
                       unsigned pos, Value one);
   Value setupParamsArray(gpu::LaunchFuncOp launchOp, OpBuilder &builder);
-  Value generateKernelNameConstant(StringRef name, Location loc,
-                                   OpBuilder &builder);
+  Value generateKernelNameConstant(StringRef moduleName, StringRef name,
+                                   Location loc, OpBuilder &builder);
   void translateGpuLaunchCalls(mlir::gpu::LaunchFuncOp launchOp);
 
 public:
@@ -345,12 +345,13 @@ Value GpuLaunchFuncToCudaCallsPass::setupParamsArray(gpu::LaunchFuncOp launchOp,
 //   %2 = llvm.getelementptr %0[%1, %1] : !llvm<"i8*">
 // }
 Value GpuLaunchFuncToCudaCallsPass::generateKernelNameConstant(
-    StringRef name, Location loc, OpBuilder &builder) {
+    StringRef moduleName, StringRef name, Location loc, OpBuilder &builder) {
   // Make sure the trailing zero is included in the constant.
   std::vector<char> kernelName(name.begin(), name.end());
   kernelName.push_back('\0');
 
-  std::string globalName = std::string(llvm::formatv("{0}_kernel_name", name));
+  std::string globalName =
+      std::string(llvm::formatv("{0}_{1}_kernel_name", moduleName, name));
   return LLVM::createGlobalString(
       loc, builder, globalName, StringRef(kernelName.data(), kernelName.size()),
       LLVM::Linkage::Internal, llvmDialect);
@@ -415,7 +416,8 @@ void GpuLaunchFuncToCudaCallsPass::translateGpuLaunchCalls(
   // the kernel function.
   auto cuOwningModuleRef =
       builder.create<LLVM::LoadOp>(loc, getPointerType(), cuModule);
-  auto kernelName = generateKernelNameConstant(launchOp.kernel(), loc, builder);
+  auto kernelName = generateKernelNameConstant(launchOp.getKernelModuleName(),
+                                               launchOp.kernel(), loc, builder);
   auto cuFunction = allocatePointer(builder, loc);
   auto cuModuleGetFunction =
       getOperation().lookupSymbol<LLVM::LLVMFuncOp>(cuModuleGetFunctionName);
diff --git a/mlir/lib/Dialect/LLVMIR/CMakeLists.txt b/mlir/lib/Dialect/LLVMIR/CMakeLists.txt
index 148bc4bef3e8..833438a70cb9 100644
--- a/mlir/lib/Dialect/LLVMIR/CMakeLists.txt
+++ b/mlir/lib/Dialect/LLVMIR/CMakeLists.txt
@@ -13,6 +13,8 @@ add_mlir_dialect_library(MLIRLLVMIR
 target_link_libraries(MLIRLLVMIR
   PUBLIC
   LLVMAsmParser
+  LLVMBitReader
+  LLVMBitWriter
   LLVMCore
   LLVMSupport
   LLVMFrontendOpenMP
diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
index 9ad878006e09..7ce591de3802 100644
--- a/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
+++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMDialect.cpp
@@ -20,6 +20,8 @@
 
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/AsmParser/Parser.h"
+#include "llvm/Bitcode/BitcodeReader.h"
+#include "llvm/Bitcode/BitcodeWriter.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/Type.h"
@@ -1682,6 +1684,9 @@ LLVMDialect::~LLVMDialect() {}
 
 llvm::LLVMContext &LLVMDialect::getLLVMContext() { return impl->llvmContext; }
 llvm::Module &LLVMDialect::getLLVMModule() { return impl->module; }
+llvm::sys::SmartMutex<true> &LLVMDialect::getLLVMContextMutex() {
+  return impl->mutex;
+}
 
 /// Parse a type registered to this dialect.
 Type LLVMDialect::parseType(DialectAsmParser &parser) const {
@@ -1971,3 +1976,16 @@ bool mlir::LLVM::satisfiesLLVMModule(Operation *op) {
   return op->hasTrait<OpTrait::SymbolTable>() &&
          op->hasTrait<OpTrait::IsIsolatedFromAbove>();
 }
+
+std::unique_ptr<llvm::Module>
+mlir::LLVM::cloneModuleIntoNewContext(llvm::LLVMContext *context,
+                                      llvm::Module *module) {
+  SmallVector<char, 1> buffer;
+  {
+    llvm::raw_svector_ostream os(buffer);
+    WriteBitcodeToFile(*module, os);
+  }
+  llvm::MemoryBufferRef bufferRef(StringRef(buffer.data(), buffer.size()),
+                                  "cloned module buffer");
+  return cantFail(parseBitcodeFile(bufferRef, *context));
+}
diff --git a/mlir/lib/ExecutionEngine/CMakeLists.txt b/mlir/lib/ExecutionEngine/CMakeLists.txt
index a30f987dbe98..df3268a49d59 100644
--- a/mlir/lib/ExecutionEngine/CMakeLists.txt
+++ b/mlir/lib/ExecutionEngine/CMakeLists.txt
@@ -17,8 +17,6 @@ target_link_libraries(MLIRExecutionEngine
   PUBLIC
   MLIRLLVMIR
   MLIRTargetLLVMIR
-  LLVMBitReader
-  LLVMBitWriter
   LLVMExecutionEngine
   LLVMObject
   LLVMOrcJIT
diff --git a/mlir/lib/ExecutionEngine/ExecutionEngine.cpp b/mlir/lib/ExecutionEngine/ExecutionEngine.cpp
index 2314dba09f59..25bd45f15885 100644
--- a/mlir/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/mlir/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -11,13 +11,12 @@
 //
 //===----------------------------------------------------------------------===//
 #include "mlir/ExecutionEngine/ExecutionEngine.h"
+#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/IR/Function.h"
 #include "mlir/IR/Module.h"
 #include "mlir/Support/FileUtilities.h"
 #include "mlir/Target/LLVMIR.h"
 
-#include "llvm/Bitcode/BitcodeReader.h"
-#include "llvm/Bitcode/BitcodeWriter.h"
 #include "llvm/ExecutionEngine/JITEventListener.h"
 #include "llvm/ExecutionEngine/ObjectCache.h"
 #include "llvm/ExecutionEngine/Orc/CompileUtils.h"
@@ -211,17 +210,8 @@ Expected<std::unique_ptr<ExecutionEngine>> ExecutionEngine::create(
   // Clone module in a new LLVMContext since translateModuleToLLVMIR buries
   // ownership too deeply.
   // TODO(zinenko): Reevaluate model of ownership of LLVMContext in LLVMDialect.
-  SmallVector<char, 1> buffer;
-  {
-    llvm::raw_svector_ostream os(buffer);
-    WriteBitcodeToFile(*llvmModule, os);
-  }
-  llvm::MemoryBufferRef bufferRef(StringRef(buffer.data(), buffer.size()),
-                                  "cloned module buffer");
-  auto expectedModule = parseBitcodeFile(bufferRef, *ctx);
-  if (!expectedModule)
-    return expectedModule.takeError();
-  std::unique_ptr<Module> deserModule = std::move(*expectedModule);
+  std::unique_ptr<Module> deserModule =
+      LLVM::cloneModuleIntoNewContext(ctx.get(), llvmModule.get());
   auto dataLayout = deserModule->getDataLayout();
 
   // Callback to create the object layer with symbol resolution to current
diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
index 78458e88ed8d..4cfa05ee33a3 100644
--- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
@@ -301,7 +301,8 @@ ModuleTranslation::ModuleTranslation(Operation *module,
       debugTranslation(
           std::make_unique<DebugTranslation>(module, *this->llvmModule)),
       ompDialect(
-          module->getContext()->getRegisteredDialect<omp::OpenMPDialect>()) {
+          module->getContext()->getRegisteredDialect<omp::OpenMPDialect>()),
+      llvmDialect(module->getContext()->getRegisteredDialect<LLVMDialect>()) {
   assert(satisfiesLLVMModule(mlirModule) &&
          "mlirModule should honor LLVM's module semantics.");
 }
@@ -495,6 +496,9 @@ LogicalResult ModuleTranslation::convertBlock(Block &bb, bool ignoreArguments) {
 /// Create named global variables that correspond to llvm.mlir.global
 /// definitions.
 LogicalResult ModuleTranslation::convertGlobals() {
+  // Lock access to the llvm context.
+  llvm::sys::SmartScopedLock<true> scopedLock(
+      llvmDialect->getLLVMContextMutex());
   for (auto op : getModuleBody(mlirModule).getOps<LLVM::GlobalOp>()) {
     llvm::Type *type = op.getType().getUnderlyingType();
     llvm::Constant *cst = llvm::UndefValue::get(type);
@@ -754,6 +758,9 @@ LogicalResult ModuleTranslation::checkSupportedModuleOps(Operation *m) {
 }
 
 LogicalResult ModuleTranslation::convertFunctions() {
+  // Lock access to the llvm context.
+  llvm::sys::SmartScopedLock<true> scopedLock(
+      llvmDialect->getLLVMContextMutex());
   // Declare all functions first because there may be function calls that form a
   // call graph with cycles.
   for (auto function : getModuleBody(mlirModule).getOps<LLVMFuncOp>()) {
@@ -798,6 +805,8 @@ std::unique_ptr<llvm::Module>
 ModuleTranslation::prepareLLVMModule(Operation *m) {
   auto *dialect = m->getContext()->getRegisteredDialect<LLVM::LLVMDialect>();
   assert(dialect && "LLVM dialect must be registered");
+  // Lock the LLVM context as we might create new types here.
+  llvm::sys::SmartScopedLock<true> scopedLock(dialect->getLLVMContextMutex());
 
   auto llvmModule = llvm::CloneModule(dialect->getLLVMModule());
   if (!llvmModule)
diff --git a/mlir/test/mlir-cuda-runner/two-modules.mlir b/mlir/test/mlir-cuda-runner/two-modules.mlir
new file mode 100644
index 000000000000..0f01b36f5cee
--- /dev/null
+++ b/mlir/test/mlir-cuda-runner/two-modules.mlir
@@ -0,0 +1,28 @@
+// RUN: mlir-cuda-runner %s --print-ir-after-all --shared-libs=%cuda_wrapper_library_dir/libcuda-runtime-wrappers%shlibext,%linalg_test_lib_dir/libmlir_runner_utils%shlibext --entry-point-result=void | FileCheck %s --dump-input=always
+
+// CHECK: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
+func @main() {
+  %arg = alloc() : memref<13xi32>
+  %dst = memref_cast %arg : memref<13xi32> to memref<?xi32>
+  %one = constant 1 : index
+  %sx = dim %dst, 0 : memref<?xi32>
+  call @mcuMemHostRegisterMemRef1dInt32(%dst) : (memref<?xi32>) -> ()
+  gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %one, %grid_y = %one, %grid_z = %one)
+             threads(%tx, %ty, %tz) in (%block_x = %sx, %block_y = %one, %block_z = %one) {
+    %t0 = index_cast %tx : index to i32
+    store %t0, %dst[%tx] : memref<?xi32>
+    gpu.terminator
+  }
+  gpu.launch blocks(%bx, %by, %bz) in (%grid_x = %one, %grid_y = %one, %grid_z = %one)
+             threads(%tx, %ty, %tz) in (%block_x = %sx, %block_y = %one, %block_z = %one) {
+    %t0 = index_cast %tx : index to i32
+    store %t0, %dst[%tx] : memref<?xi32>
+    gpu.terminator
+  }
+  %U = memref_cast %dst : memref<?xi32> to memref<*xi32>
+  call @print_memref_i32(%U) : (memref<*xi32>) -> ()
+  return
+}
+
+func @mcuMemHostRegisterMemRef1dInt32(%ptr : memref<?xi32>)
+func @print_memref_i32(%ptr : memref<*xi32>)

From d9c7fc658d2befdca9cc147801abfe50cb364767 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Thu, 16 Apr 2020 14:28:18 +0200
Subject: [PATCH 036/216] Revert "[MLIR] Add IndexAttr to primitive attributes
 kinds in tablegen."

This reverts commit 997f33cfeec9cd8c5bc913cf862794e986a4bd39. Breaks check-mlir

******************** TEST 'MLIR :: IR/attribute.mlir' FAILED ********************
Script:
--
: 'RUN: at line 1';   mlir-opt llvm-project/mlir/test/IR/attribute.mlir -split-input-file -verify-diagnostics | /FileCheck llvm-project/mlir/test/IR/attribute.mlir
--
Exit Code: 1

Command Output (stderr):
--
llvm-project/mlir/test/IR/attribute.mlir split at line #1:19:3: error: unexpected error: 'test.int_attrs' op requires attribute 'index_attr'
  "test.int_attrs"() {
  ^
llvm-project/mlir/test/IR/attribute.mlir split at line #120:6:3: error: unexpected error: 'test.int_attrs' op requires attribute 'index_attr'
  "test.int_attrs"() {
  ^
llvm-project/mlir/test/IR/attribute.mlir split at line #120:5:6: error: expected error "'si32_attr' failed to satisfy constraint: 32-bit signed integer attribute" was not produced
  // expected-error @+1 {{'si32_attr' failed to satisfy constraint: 32-bit signed integer attribute}}
     ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
llvm-project/mlir/test/IR/attribute.mlir split at line #133:5:3: error: unexpected error: 'test.int_attrs' op requires attribute 'index_attr'
  "test.int_attrs"() {
  ^
llvm-project/mlir/test/IR/attribute.mlir split at line #133:4:6: error: expected error "'ui32_attr' failed to satisfy constraint: 32-bit unsigned integer attribute" was not produced
  // expected-error @+1 {{'ui32_attr' failed to satisfy constraint: 32-bit unsigned integer attribute}}
     ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
llvm-project/mlir/test/IR/attribute.mlir:9:12: error: CHECK: expected string not found in input
 // CHECK: any_i32_attr = 5 : ui32
           ^
<stdin>:3:1: note: scanning from here
module {
^
<stdin>:21:28: note: possible intended match here
 "test.non_negative_int_attr"() {i32attr = 5 : i32, i64attr = 10 : i64} : () -> ()
---
 mlir/include/mlir/IR/OpBase.td        | 10 ----------
 mlir/test/IR/attribute.mlir           |  2 --
 mlir/test/lib/Dialect/Test/TestOps.td |  1 -
 3 files changed, 13 deletions(-)

diff --git a/mlir/include/mlir/IR/OpBase.td b/mlir/include/mlir/IR/OpBase.td
index 14ef45ed4c7f..e552279e8a8e 100644
--- a/mlir/include/mlir/IR/OpBase.td
+++ b/mlir/include/mlir/IR/OpBase.td
@@ -845,16 +845,6 @@ def BoolAttr : Attr<CPred<"$_self.isa<BoolAttr>()">, "bool attribute"> {
   let constBuilderCall = "$_builder.getBoolAttr($0)";
 }
 
-// Index attribute.
-def IndexAttr :
-    TypedAttrBase<
-      Index, "IntegerAttr",
-      And<[CPred<"$_self.isa<IntegerAttr>()">,
-           CPred<"$_self.cast<IntegerAttr>().getType().isa<IndexType>()">]>,
-      "index attribute"> {
-  let returnType = [{ APInt }];
-}
-
 // Base class for any integer (regardless of signedness semantics) attributes
 // of fixed width.
 class AnyIntegerAttrBase<AnyI attrValType, string descr> :
diff --git a/mlir/test/IR/attribute.mlir b/mlir/test/IR/attribute.mlir
index 32b8f8c25180..31804b274a55 100644
--- a/mlir/test/IR/attribute.mlir
+++ b/mlir/test/IR/attribute.mlir
@@ -8,8 +8,6 @@ func @int_attrs_pass() {
   "test.int_attrs"() {
     // CHECK: any_i32_attr = 5 : ui32
     any_i32_attr = 5 : ui32,
-    // CHECK-SAME: index_attr = 8 : index
-    index_attr = 8 : index,
     // CHECK-SAME: si32_attr = 7 : si32
     si32_attr = 7 : si32,
     // CHECK-SAME: ui32_attr = 6 : ui32
diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td
index 524780b89552..6f1ef4a50f67 100644
--- a/mlir/test/lib/Dialect/Test/TestOps.td
+++ b/mlir/test/lib/Dialect/Test/TestOps.td
@@ -199,7 +199,6 @@ def I64EnumAttrOp : TEST_Op<"i64_enum_attr"> {
 def IntAttrOp : TEST_Op<"int_attrs"> {
   let arguments = (ins
     AnyI32Attr:$any_i32_attr,
-    IndexAttr:$index_attr,
     UI32Attr:$ui32_attr,
     SI32Attr:$si32_attr
   );

From 48d64f56549f170ccaf32503338cf0efbffd6af1 Mon Sep 17 00:00:00 2001
From: "Kazushi (Jam) Marukawa" <marukawa@nec.com>
Date: Thu, 16 Apr 2020 14:34:53 +0200
Subject: [PATCH 037/216] [VE] Update logical operation instructions

Summary:
Changing all mnemonic to match assembly instructions to simplify mnemonic
naming rules. This time update all fixed-point arithmetic instructions.
This also corrects bswp operand type.

Reviewed By: simoll

Differential Revision: https://reviews.llvm.org/D78177
---
 llvm/lib/Target/VE/VEInstrInfo.td | 210 +++++++++++++++---------------
 1 file changed, 103 insertions(+), 107 deletions(-)

diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td
index 8efdc37b0127..2cdb6c15384f 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.td
+++ b/llvm/lib/Target/VE/VEInstrInfo.td
@@ -24,6 +24,10 @@ include "VEInstrFormats.td"
 // Instruction Pattern Stuff
 //===----------------------------------------------------------------------===//
 
+// uimm1 - Generic immediate value.
+def uimm1 : Operand<i32>, PatLeaf<(imm), [{
+    return isUInt<1>(N->getZExtValue()); }]>;
+
 // uimm6 - Generic immediate value.
 def uimm6 : Operand<i32>, PatLeaf<(imm), [{
     return isUInt<6>(N->getZExtValue()); }]>;
@@ -484,63 +488,55 @@ multiclass RRIm<string opcStr, bits<8>opc,
   }
 }
 
-// Multiclass for RR type instructions
-//   Used by cmov instruction
-
-let Constraints = "$sx = $sd", DisableEncoding = "$sd" in
-multiclass RRCMOVm<string opcStr, bits<8>opc,
-               RegisterClass RC, ValueType Ty, Operand immOp, Operand immOp2> {
-  def rr : RR<
-    opc, (outs I64:$sx), (ins CCOp:$cf, RC:$sy, I64:$sz, I64:$sd),
-    !strconcat(opcStr, " $sx, $sz, $sy")> {
-    let cy = 1;
-    let cz = 1;
-    let hasSideEffects = 0;
-  }
-  def rm0 : RR<
-    opc, (outs I64:$sx), (ins CCOp:$cf, RC:$sy, immOp2:$sz, I64:$sd),
-    !strconcat(opcStr, " $sx, (${sz})0, $sy")> {
-    let cy = 1;
-    let cz = 0;
-    let sz{6} = 1;
-    let hasSideEffects = 0;
-  }
+// Generic RR multiclass with an argument.
+//   e.g. LDZ, PCNT, and  BRV
+let cy = 0, sy = 0, hasSideEffects = 0 in
+multiclass RRI1m<string opcStr, bits<8>opc, RegisterClass RC, ValueType Ty,
+                 SDPatternOperator OpNode = null_frag> {
+  def r : RR<opc, (outs RC:$sx), (ins RC:$sz), !strconcat(opcStr, " $sx, $sz"),
+             [(set Ty:$sx, (OpNode Ty:$sz))]>;
+  let cz = 0 in
+  def m : RR<opc, (outs RC:$sx), (ins mimm:$sz),
+             !strconcat(opcStr, " $sx, $sz"),
+             [(set Ty:$sx, (OpNode (Ty mimm:$sz)))]>;
 }
 
-// Multiclass for RR type instructions with only 2 operands
-//   Used by pcnt, brv
+// Special RR multiclass for BSWP instruction.
+//   e.g. BSWP
 let hasSideEffects = 0 in
-multiclass RRI2m<string opcStr, bits<8>opc, RegisterClass RC, ValueType Ty,
-                 Operand immOp2, SDPatternOperator OpNode=null_frag> {
-  def r : RR<
-    opc, (outs RC:$sx), (ins RC:$sz),
-    !strconcat(opcStr, " $sx, $sz"),
-    [(set Ty:$sx, (OpNode Ty:$sz))]> {
-    let cy = 1;
-    let cz = 1;
-  }
-  def i : RR<
-    opc, (outs RC:$sx), (ins RC:$sz),
-    !strconcat(opcStr, " $sx, $sz"),
-    [(set Ty:$sx, (OpNode Ty:$sz))]> {
-    let cy = 0;
-    let cz = 1;
-  }
-  def m0 : RR<
-    opc, (outs RC:$sx), (ins immOp2:$sz),
-    !strconcat(opcStr, " $sx, (${sz})0")> {
-    let cy = 1;
-    let cz = 0;
-    let sz{6} = 1;
-  }
-  def m1 : RR<
-    opc, (outs RC:$sx), (ins immOp2:$sz),
-    !strconcat(opcStr, " $sx, (${sz})1")> {
-    let cy = 1;
-    let cz = 0;
-  }
+multiclass RRSWPm<string opcStr, bits<8>opc,
+                  RegisterClass RC, ValueType Ty,
+                  SDPatternOperator OpNode = null_frag> {
+  let cy = 0 in
+  def ri : RR<opc, (outs RC:$sx), (ins RC:$sz, uimm1:$sy),
+              !strconcat(opcStr, " $sx, $sz, $sy"),
+              [(set Ty:$sx, (OpNode Ty:$sz, (i32 uimm1:$sy)))]>;
+  let cy = 0, cz = 0 in
+  def mi : RR<opc, (outs RC:$sx), (ins mimm:$sz, uimm1:$sy),
+              !strconcat(opcStr, " $sx, $sz, $sy"),
+              [(set Ty:$sx, (OpNode (Ty mimm:$sz), (i32 uimm1:$sy)))]>;
 }
 
+// Multiclass for CMOV instructions.
+//   e.g. CMOVL, CMOVW, CMOVD, and etc.
+let Constraints = "$sx = $sd", DisableEncoding = "$sd", hasSideEffects = 0,
+    cfw = ? in
+multiclass RRCMOVm<string opcStr, bits<8>opc, RegisterClass RC, ValueType Ty> {
+  def rr : RR<opc, (outs I64:$sx), (ins CCOp:$cfw, RC:$sy, I64:$sz, I64:$sd),
+              !strconcat(opcStr, " $sx, $sz, $sy")>;
+  let cy = 0 in
+  def ir : RR<opc, (outs I64:$sx),
+              (ins CCOp:$cfw, simm7:$sy, I64:$sz, I64:$sd),
+              !strconcat(opcStr, " $sx, $sz, $sy")>;
+  let cz = 0 in
+  def rm : RR<opc, (outs I64:$sx),
+              (ins CCOp:$cfw, RC:$sy, mimm:$sz, I64:$sd),
+              !strconcat(opcStr, " $sx, $sz, $sy")>;
+  let cy = 0, cz = 0 in
+  def im : RR<opc, (outs I64:$sx),
+              (ins CCOp:$cfw, simm7:$sy, mimm:$sz, I64:$sd),
+              !strconcat(opcStr, " $sx, $sz, $sy")>;
+}
 
 // Branch multiclass
 let isBranch = 1, isTerminator = 1, hasDelaySlot = 1 in
@@ -792,19 +788,6 @@ defm ST1B : STOREm<"st1b", 0x15, I32, i32, truncstorei8>;
 // Section 8.3.1 - FENCE (Fence)
 // Section 8.3.2 - SVOB (Set Vector Out-of-order memory access Boundary)
 
-// CMOV instructions
-let cx = 0, cw = 0, cw2 = 0 in
-defm CMOVL : RRCMOVm<"cmov.l.${cf}", 0x3B, I64, i64, simm7, uimm6>;
-
-let cx = 0, cw = 1, cw2 = 0 in
-defm CMOVW : RRCMOVm<"cmov.w.${cf}", 0x3B, I32, i32, simm7, uimm6>;
-
-let cx = 0, cw = 0, cw2 = 1 in
-defm CMOVD : RRCMOVm<"cmov.d.${cf}", 0x3B, I64, f64, simm7, uimm6>;
-
-let cx = 0, cw = 1, cw2 = 1 in
-defm CMOVS : RRCMOVm<"cmov.s.${cf}", 0x3B, F32, f32, simm7, uimm6>;
-
 //-----------------------------------------------------------------------------
 // Section 8.4 - Fixed-point Operation Instructions
 //-----------------------------------------------------------------------------
@@ -877,30 +860,43 @@ let cx = 1, cw = 1 in defm MINSWZX : RRm<"mins.w.zx", 0x78, I32, i32>;
 defm MAXSL : RRm<"maxs.l", 0x68, I64, i64>;
 let cw = 1 in defm MINSL : RRm<"mins.l", 0x68, I64, i64>;
 
+//-----------------------------------------------------------------------------
+// Section 8.5 - Logical Operation Instructions
+//-----------------------------------------------------------------------------
+
+// Section 8.5.1 - AND (AND)
+defm AND : RRm<"and", 0x44, I64, i64, and>;
+let isCodeGenOnly = 1 in defm AND32 : RRm<"and", 0x44, I32, i32, and>;
 
-// 5.3.2.3. Logical Arithmetic Operation Instructions
+// Section 8.5.2 - OR (OR)
+defm OR : RRm<"or", 0x45, I64, i64, or>;
+let isCodeGenOnly = 1 in defm OR32 : RRm<"or", 0x45, I32, i32, or>;
 
-let cx = 0 in {
-  defm AND : RRm<"and", 0x44, I64, i64, and>;
-  defm OR : RRm<"or", 0x45, I64, i64, or>;
-  defm XOR : RRm<"xor", 0x46, I64, i64, xor>;
-  let isCodeGenOnly = 1 in {
-    defm AND32 : RRm<"and", 0x44, I32, i32, and>;
-    defm OR32 : RRm<"or", 0x45, I32, i32, or>;
-    defm XOR32 : RRm<"xor", 0x46, I32, i32, xor>;
-  }
-}
+// Section 8.5.3 - XOR (Exclusive OR)
+defm XOR : RRm<"xor", 0x46, I64, i64, xor>;
+let isCodeGenOnly = 1 in defm XOR32 : RRm<"xor", 0x46, I32, i32, xor>;
 
-// Bits operations
+// Section 8.5.4 - EQV (Equivalence)
+// Section 8.5.5 - NND (Negate AND)
+// Section 8.5.6 - MRG (Merge)
 
-let cx = 0 in {
-  defm PCNT : RRI2m<"pcnt", 0x38, I64, i64, uimm6, ctpop>;
-  defm BRV : RRI2m<"brv", 0x39, I64, i64, uimm6, bitreverse>;
-  defm LDZ : RRI2m<"ldz", 0x67, I64, i64, uimm6, ctlz>;
-  defm BSWP : RRIm<"bswp", 0x2B, I64, i64, simm7, uimm6>;
-}
+// Section 8.5.7 - LDZ (Leading Zero Count)
+defm LDZ : RRI1m<"ldz", 0x67, I64, i64, ctlz>;
+
+// Section 8.5.8 - PCNT (Population Count)
+defm PCNT : RRI1m<"pcnt", 0x38, I64, i64, ctpop>;
+
+// Section 8.5.9 - BRV (Bit Reverse)
+defm BRV : RRI1m<"brv", 0x39, I64, i64, bitreverse>;
 
+// Section 8.5.10 - BSWP (Byte Swap)
+defm BSWP : RRSWPm<"bswp", 0x2B, I64, i64>;
 
+// Section 8.5.11 - CMOV (Conditional Move)
+let cw = 0, cw2 = 0 in defm CMOVL : RRCMOVm<"cmov.l.${cfw}", 0x3B, I64, i64>;
+let cw = 1, cw2 = 0 in defm CMOVW : RRCMOVm<"cmov.w.${cfw}", 0x3B, I32, i32>;
+let cw = 0, cw2 = 1 in defm CMOVD : RRCMOVm<"cmov.d.${cfw}", 0x3B, I64, f64>;
+let cw = 1, cw2 = 1 in defm CMOVS : RRCMOVm<"cmov.s.${cfw}", 0x3B, F32, f32>;
 
 // 5.3.2.4 Shift Instructions
 
@@ -1326,45 +1322,45 @@ def EXTEND_STACK_GUARD : Pseudo<(outs), (ins),
 
 def : Pat<(i32 (setcc i64:$LHS, i64:$RHS, CCSIOp:$cond)),
           (EXTRACT_SUBREG
-              (CMOVLrm0 (icond2cc $cond),
-                        (CMPSLrr i64:$LHS, i64:$RHS),
-                        63,
-                        (ORim 0, 0)), sub_i32)>;
+              (CMOVLrm (icond2cc $cond),
+                       (CMPSLrr i64:$LHS, i64:$RHS),
+                       !add(63, 64),
+                       (ORim 0, 0)), sub_i32)>;
 
 def : Pat<(i32 (setcc i64:$LHS, i64:$RHS, CCUIOp:$cond)),
           (EXTRACT_SUBREG
-              (CMOVLrm0 (icond2cc $cond),
-                        (CMPULrr i64:$LHS, i64:$RHS),
-                        63,
-                        (ORim 0, 0)), sub_i32)>;
+              (CMOVLrm (icond2cc $cond),
+                       (CMPULrr i64:$LHS, i64:$RHS),
+                       !add(63, 64),
+                       (ORim 0, 0)), sub_i32)>;
 
 def : Pat<(i32 (setcc i32:$LHS, i32:$RHS, CCSIOp:$cond)),
           (EXTRACT_SUBREG
-              (CMOVWrm0 (icond2cc $cond),
-                        (CMPSWSXrr i32:$LHS, i32:$RHS),
-                        63,
-                        (ORim 0, 0)), sub_i32)>;
+              (CMOVWrm (icond2cc $cond),
+                       (CMPSWSXrr i32:$LHS, i32:$RHS),
+                       !add(63, 64),
+                       (ORim 0, 0)), sub_i32)>;
 
 def : Pat<(i32 (setcc i32:$LHS, i32:$RHS, CCUIOp:$cond)),
           (EXTRACT_SUBREG
-              (CMOVWrm0 (icond2cc $cond),
-                        (CMPUWrr i32:$LHS, i32:$RHS),
-                        63,
-                        (ORim 0, 0)), sub_i32)>;
+              (CMOVWrm (icond2cc $cond),
+                       (CMPUWrr i32:$LHS, i32:$RHS),
+                       !add(63, 64),
+                       (ORim 0, 0)), sub_i32)>;
 
 def : Pat<(i32 (setcc f64:$LHS, f64:$RHS, cond:$cond)),
           (EXTRACT_SUBREG
-              (CMOVDrm0 (fcond2cc $cond),
-                        (FCPrr f64:$LHS, f64:$RHS),
-                        63,
-                        (ORim 0, 0)), sub_i32)>;
+              (CMOVDrm (fcond2cc $cond),
+                       (FCPrr f64:$LHS, f64:$RHS),
+                       !add(63, 64),
+                       (ORim 0, 0)), sub_i32)>;
 
 def : Pat<(i32 (setcc f32:$LHS, f32:$RHS, cond:$cond)),
           (EXTRACT_SUBREG
-              (CMOVSrm0 (fcond2cc $cond),
-                        (FCPSrr f32:$LHS, f32:$RHS),
-                        63,
-                        (ORim 0, 0)), sub_i32)>;
+              (CMOVSrm (fcond2cc $cond),
+                       (FCPSrr f32:$LHS, f32:$RHS),
+                       !add(63, 64),
+                       (ORim 0, 0)), sub_i32)>;
 
 // Special SELECTCC pattern matches
 // Use min/max for better performance.

From 11f093fab4a38a652563cde52fcfa65ebcdc65e4 Mon Sep 17 00:00:00 2001
From: Ulysse Beaugnon <ulysse@google.com>
Date: Thu, 16 Apr 2020 10:28:02 +0200
Subject: [PATCH 038/216] [MLIR] Add IndexAttr to primitive attributes kinds in
 tablegen.

OpBase.td defined attributes kind for all integer types expect index. This
commit fixes that by adding an IndexAttr attribute kind. Update the
respective tests.

Differential Revision: https://reviews.llvm.org/D78195
---
 mlir/include/mlir/IR/OpBase.td        | 10 ++++++++++
 mlir/test/IR/attribute.mlir           |  6 ++++++
 mlir/test/lib/Dialect/Test/TestOps.td |  1 +
 3 files changed, 17 insertions(+)

diff --git a/mlir/include/mlir/IR/OpBase.td b/mlir/include/mlir/IR/OpBase.td
index e552279e8a8e..14ef45ed4c7f 100644
--- a/mlir/include/mlir/IR/OpBase.td
+++ b/mlir/include/mlir/IR/OpBase.td
@@ -845,6 +845,16 @@ def BoolAttr : Attr<CPred<"$_self.isa<BoolAttr>()">, "bool attribute"> {
   let constBuilderCall = "$_builder.getBoolAttr($0)";
 }
 
+// Index attribute.
+def IndexAttr :
+    TypedAttrBase<
+      Index, "IntegerAttr",
+      And<[CPred<"$_self.isa<IntegerAttr>()">,
+           CPred<"$_self.cast<IntegerAttr>().getType().isa<IndexType>()">]>,
+      "index attribute"> {
+  let returnType = [{ APInt }];
+}
+
 // Base class for any integer (regardless of signedness semantics) attributes
 // of fixed width.
 class AnyIntegerAttrBase<AnyI attrValType, string descr> :
diff --git a/mlir/test/IR/attribute.mlir b/mlir/test/IR/attribute.mlir
index 31804b274a55..81edebd796b4 100644
--- a/mlir/test/IR/attribute.mlir
+++ b/mlir/test/IR/attribute.mlir
@@ -8,6 +8,8 @@ func @int_attrs_pass() {
   "test.int_attrs"() {
     // CHECK: any_i32_attr = 5 : ui32
     any_i32_attr = 5 : ui32,
+    // CHECK-SAME: index_attr = 8 : index
+    index_attr = 8 : index,
     // CHECK-SAME: si32_attr = 7 : si32
     si32_attr = 7 : si32,
     // CHECK-SAME: ui32_attr = 6 : ui32
@@ -17,6 +19,7 @@ func @int_attrs_pass() {
   "test.int_attrs"() {
     // CHECK: any_i32_attr = 5 : si32
     any_i32_attr = 5 : si32,
+    index_attr = 8 : index,
     si32_attr = 7 : si32,
     ui32_attr = 6 : ui32
   } : () -> ()
@@ -24,6 +27,7 @@ func @int_attrs_pass() {
   "test.int_attrs"() {
     // CHECK: any_i32_attr = 5 : i32
     any_i32_attr = 5 : i32,
+    index_attr = 8 : index,
     si32_attr = 7 : si32,
     ui32_attr = 6 : ui32
   } : () -> ()
@@ -122,6 +126,7 @@ func @wrong_int_attrs_signedness_fail() {
   // expected-error @+1 {{'si32_attr' failed to satisfy constraint: 32-bit signed integer attribute}}
   "test.int_attrs"() {
     any_i32_attr = 5 : i32,
+    index_attr = 8 : index,
     si32_attr = 7 : ui32,
     ui32_attr = 6 : ui32
   } : () -> ()
@@ -134,6 +139,7 @@ func @wrong_int_attrs_signedness_fail() {
   // expected-error @+1 {{'ui32_attr' failed to satisfy constraint: 32-bit unsigned integer attribute}}
   "test.int_attrs"() {
     any_i32_attr = 5 : i32,
+    index_attr = 8 : index,
     si32_attr = 7 : si32,
     ui32_attr = 6 : si32
   } : () -> ()
diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td
index 6f1ef4a50f67..524780b89552 100644
--- a/mlir/test/lib/Dialect/Test/TestOps.td
+++ b/mlir/test/lib/Dialect/Test/TestOps.td
@@ -199,6 +199,7 @@ def I64EnumAttrOp : TEST_Op<"i64_enum_attr"> {
 def IntAttrOp : TEST_Op<"int_attrs"> {
   let arguments = (ins
     AnyI32Attr:$any_i32_attr,
+    IndexAttr:$index_attr,
     UI32Attr:$ui32_attr,
     SI32Attr:$si32_attr
   );

From 07c1978b15b4e9daefbf358e6fd185b5aa269f98 Mon Sep 17 00:00:00 2001
From: Jonathan Coe <jbcoe@google.com>
Date: Thu, 16 Apr 2020 14:20:51 +0100
Subject: [PATCH 039/216] [clang-format] Do not interpret C# deconstruction in
 a foreach as a cast

Reviewers: krasimir

Reviewed By: krasimir

Subscribers: cfe-commits, MyDeveloperDay

Tags: #clang-format, #clang

Differential Revision: https://reviews.llvm.org/D78295
---
 clang/lib/Format/TokenAnnotator.cpp         | 4 ++++
 clang/unittests/Format/FormatTestCSharp.cpp | 7 +++++++
 2 files changed, 11 insertions(+)

diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index 8204623645a4..6532f8108f08 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -1775,6 +1775,10 @@ class AnnotatingParser {
     if (Tok.Next->is(tok::question))
       return false;
 
+    // `foreach((A a, B b) in someList)` should not be seen as a cast.
+    if (Tok.Next->is(Keywords.kw_in) && Style.isCSharp())
+      return false;
+
     // Functions which end with decorations like volatile, noexcept are unlikely
     // to be casts.
     if (Tok.Next->isOneOf(tok::kw_noexcept, tok::kw_volatile, tok::kw_const,
diff --git a/clang/unittests/Format/FormatTestCSharp.cpp b/clang/unittests/Format/FormatTestCSharp.cpp
index b0e4e76cefe7..67571d3909bf 100644
--- a/clang/unittests/Format/FormatTestCSharp.cpp
+++ b/clang/unittests/Format/FormatTestCSharp.cpp
@@ -624,6 +624,7 @@ TEST_F(FormatTestCSharp, CSharpSpaces) {
   Style.SpaceBeforeCpp11BracedList = true;
   Style.Cpp11BracedListStyle = false;
   Style.SpacesInContainerLiterals = false;
+  Style.SpaceAfterCStyleCast = false;
 
   verifyFormat(R"(new Car { "Door", 0.1 })", Style);
   verifyFormat(R"(new Car { 0.1, "Door" })", Style);
@@ -642,6 +643,12 @@ TEST_F(FormatTestCSharp, CSharpSpaces) {
 
   verifyFormat(R"(char[,,] rawCharArray = MakeCharacterGrid();)", Style);
 
+  // Not seen as a C-style cast.
+  verifyFormat(R"(//
+foreach ((A a, B b) in someList) {
+})",
+               Style);
+
   Style.SpacesInSquareBrackets = true;
   verifyFormat(R"(private float[ , ] Values;)", Style);
   verifyFormat(R"(string dirPath = args?[ 0 ];)", Style);

From fdf9bad573c11760a4c83586bb48dbc3cd9d96c7 Mon Sep 17 00:00:00 2001
From: Bjorn Pettersson <bjorn.a.pettersson@ericsson.com>
Date: Wed, 15 Apr 2020 13:32:31 +0200
Subject: [PATCH 040/216] [Float2Int] Stop passing around a reference to the
 class member Roots. NFC

The Float2IntPass got a class member called Roots, but Roots
was also passed around to member function as a reference. This
patch simply remove those references.
---
 llvm/include/llvm/Transforms/Scalar/Float2Int.h | 5 ++---
 llvm/lib/Transforms/Scalar/Float2Int.cpp        | 9 ++++-----
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Scalar/Float2Int.h b/llvm/include/llvm/Transforms/Scalar/Float2Int.h
index f04b98a19d82..d7f36456fc2b 100644
--- a/llvm/include/llvm/Transforms/Scalar/Float2Int.h
+++ b/llvm/include/llvm/Transforms/Scalar/Float2Int.h
@@ -30,13 +30,12 @@ class Float2IntPass : public PassInfoMixin<Float2IntPass> {
   bool runImpl(Function &F, const DominatorTree &DT);
 
 private:
-  void findRoots(Function &F, const DominatorTree &DT,
-                 SmallPtrSet<Instruction *, 8> &Roots);
+  void findRoots(Function &F, const DominatorTree &DT);
   void seen(Instruction *I, ConstantRange R);
   ConstantRange badRange();
   ConstantRange unknownRange();
   ConstantRange validateRange(ConstantRange R);
-  void walkBackwards(const SmallPtrSetImpl<Instruction *> &Roots);
+  void walkBackwards();
   void walkForwards();
   bool validateAndTransform();
   Value *convert(Instruction *I, Type *ToTy);
diff --git a/llvm/lib/Transforms/Scalar/Float2Int.cpp b/llvm/lib/Transforms/Scalar/Float2Int.cpp
index 33b0712bfe49..83f4c402ed4d 100644
--- a/llvm/lib/Transforms/Scalar/Float2Int.cpp
+++ b/llvm/lib/Transforms/Scalar/Float2Int.cpp
@@ -120,8 +120,7 @@ static Instruction::BinaryOps mapBinOpcode(unsigned Opcode) {
 
 // Find the roots - instructions that convert from the FP domain to
 // integer domain.
-void Float2IntPass::findRoots(Function &F, const DominatorTree &DT,
-                              SmallPtrSet<Instruction*,8> &Roots) {
+void Float2IntPass::findRoots(Function &F, const DominatorTree &DT) {
   for (BasicBlock &BB : F) {
     // Unreachable code can take on strange forms that we are not prepared to
     // handle. For example, an instruction may have itself as an operand.
@@ -184,7 +183,7 @@ ConstantRange Float2IntPass::validateRange(ConstantRange R) {
 
 // Breadth-first walk of the use-def graph; determine the set of nodes
 // we care about and eagerly determine if some of them are poisonous.
-void Float2IntPass::walkBackwards(const SmallPtrSetImpl<Instruction*> &Roots) {
+void Float2IntPass::walkBackwards() {
   std::deque<Instruction*> Worklist(Roots.begin(), Roots.end());
   while (!Worklist.empty()) {
     Instruction *I = Worklist.back();
@@ -525,9 +524,9 @@ bool Float2IntPass::runImpl(Function &F, const DominatorTree &DT) {
 
   Ctx = &F.getParent()->getContext();
 
-  findRoots(F, DT, Roots);
+  findRoots(F, DT);
 
-  walkBackwards(Roots);
+  walkBackwards();
   walkForwards();
 
   bool Modified = validateAndTransform();

From 2ec5520a54ef9b359c6154adf857ba690bc117f1 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron@aaronballman.com>
Date: Thu, 16 Apr 2020 09:27:37 -0400
Subject: [PATCH 041/216] Disallow [[nodiscard]] on a function pointer
 declaration.

This is not allowed by [dcl.attr.nodiscard]p1 for the standard attribute, but
is still supported for the [[clang::warn_unused_result]] spelling.
---
 clang/lib/Sema/SemaDeclAttr.cpp                           | 6 ++++++
 clang/test/CXX/dcl.dcl/dcl.attr/dcl.attr.nodiscard/p2.cpp | 2 +-
 clang/test/SemaCXX/warn-unused-result.cpp                 | 8 ++++++++
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 1a8a73660cf4..3205b4472db2 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -2826,6 +2826,12 @@ static void handleWarnUnusedResult(Sema &S, Decl *D, const ParsedAttr &AL) {
 
   StringRef Str;
   if ((AL.isCXX11Attribute() || AL.isC2xAttribute()) && !AL.getScopeName()) {
+    // The standard attribute cannot be applied to variable declarations such
+    // as a function pointer.
+    if (isa<VarDecl>(D))
+      S.Diag(AL.getLoc(), diag::warn_attribute_wrong_decl_type_str)
+          << AL << "functions, classes, or enumerations";
+
     // If this is spelled as the standard C++17 attribute, but not in C++17,
     // warn about using it as an extension. If there are attribute arguments,
     // then claim it's a C++2a extension instead.
diff --git a/clang/test/CXX/dcl.dcl/dcl.attr/dcl.attr.nodiscard/p2.cpp b/clang/test/CXX/dcl.dcl/dcl.attr/dcl.attr.nodiscard/p2.cpp
index 3d3223cda756..e2397c12e2e9 100644
--- a/clang/test/CXX/dcl.dcl/dcl.attr/dcl.attr.nodiscard/p2.cpp
+++ b/clang/test/CXX/dcl.dcl/dcl.attr/dcl.attr.nodiscard/p2.cpp
@@ -26,7 +26,7 @@ void f() {
   (void)get_e();
 }
 
-[[nodiscard]] volatile char &(*fp)();
+[[nodiscard]] volatile char &(*fp)(); // expected-warning {{'nodiscard' attribute only applies to functions, classes, or enumerations}}
 void g() {
   fp(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}}
 
diff --git a/clang/test/SemaCXX/warn-unused-result.cpp b/clang/test/SemaCXX/warn-unused-result.cpp
index f1de4618a741..d0bb4c9317dd 100644
--- a/clang/test/SemaCXX/warn-unused-result.cpp
+++ b/clang/test/SemaCXX/warn-unused-result.cpp
@@ -246,3 +246,11 @@ void g() {
     f(b); // expected-warning {{ignoring return value}}
 }
 } // namespace PR39837
+
+namespace PR45520 {
+[[nodiscard]] bool (*f)(); // expected-warning {{'nodiscard' attribute only applies to functions, classes, or enumerations}}
+[[clang::warn_unused_result]] bool (*g)();
+__attribute__((warn_unused_result)) bool (*h)();
+
+void i([[nodiscard]] bool (*fp)()); // expected-warning {{'nodiscard' attribute only applies to functions, classes, or enumerations}}
+}

From f54312277cdbc9e52657ec904ca4c1c333208c43 Mon Sep 17 00:00:00 2001
From: Nicolas Vasilache <ntv@google.com>
Date: Thu, 16 Apr 2020 09:46:00 -0400
Subject: [PATCH 042/216] [mlir][Linalg] Drop function attribute from generic
 ops.

The function attribute in generic ops is not paying for itself.
A region is the more standardized way of specifying a custom computation.
If needed this region can call a function directly.
This is deemed more natural than managing a dedicated function attribute.

This also simplifies named ops generation by trimming unnecessary complexity.

Differential Revision: https://reviews.llvm.org/D78266
---
 .../Dialect/Linalg/IR/LinalgStructuredOps.td  |  78 ++----
 .../mlir/Dialect/Utils/StructuredOpsUtils.h   |   4 -
 mlir/lib/Dialect/Linalg/EDSC/Builders.cpp     |   1 -
 mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp      | 121 ++-------
 mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp |   4 +-
 .../Linalg/Transforms/LinalgToLoops.cpp       |  29 ---
 mlir/test/Dialect/Linalg/invalid.mlir         | 232 +++++-------------
 mlir/test/Dialect/Linalg/loops.mlir           |  86 -------
 mlir/test/Dialect/Linalg/roundtrip.mlir       |  95 ++++---
 .../Dialect/Linalg/transform-patterns.mlir    |  82 ++++---
 .../TestLinalgTransformPatterns.td            |   6 +-
 11 files changed, 206 insertions(+), 532 deletions(-)

diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
index 0ff455391cb4..61d909139f1b 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
@@ -523,7 +523,6 @@ class GenericOpBase<string mnemonic> : LinalgStructuredBase_Op<mnemonic, []> {
                    AffineMapArrayAttr:$indexing_maps,
                    ArrayAttr:$iterator_types,
                    OptionalAttr<StrAttr>:$doc,
-                   OptionalAttr<FlatSymbolRefAttr>:$fun,
                    OptionalAttr<StrAttr>:$library_call);
   let results = (outs Variadic<AnyRankedTensor>:$output_tensors);
   let regions = (region AnyRegion:$region);
@@ -531,7 +530,7 @@ class GenericOpBase<string mnemonic> : LinalgStructuredBase_Op<mnemonic, []> {
     SmallVector<StringRef, 8> linalgTraitAttrNames() {
       return SmallVector<StringRef, 8>{
         getArgsInAttrName(), getArgsOutAttrName(), getDocAttrName(),
-        getFunAttrName(), getIndexingMapsAttrName(), getLibraryCallAttrName(),
+        getIndexingMapsAttrName(), getLibraryCallAttrName(),
         getIteratorTypesAttrName()
       };
     }
@@ -540,12 +539,6 @@ class GenericOpBase<string mnemonic> : LinalgStructuredBase_Op<mnemonic, []> {
 
     unsigned getNumOutputs() { return args_out().getSExtValue(); }
 
-    FuncOp getFunction() {
-      auto moduleOp = getParentOfType<ModuleOp>();
-      return fun().hasValue() ?
-        moduleOp.lookupSymbol<FuncOp>(fun().getValue()) : FuncOp();
-    }
-
     StringRef getLibraryCallName() {
       return library_call().hasValue() ? library_call().getValue() : "";
     }
@@ -581,13 +574,6 @@ def GenericOp : GenericOpBase<"generic"> {
       - args_in: an I64Attr representing the number of input (readonly) views
       - args_out: an I64Attr representing the number of output (readwrite) views
       - doc [optional]: a documentation string
-      - fun: a FlatSymbolRefAttr that must resolve to an existing function
-        symbol. To support inplace updates in a generic fashion, the signature
-        of the function must be:
-        ```
-          fun([input views element types], [output views element types])
-            -> ([output views element types])
-        ```
       - indexing_maps: a list of AffineMapAttr, one AffineMapAttr per each input
         and output view. Such AffineMapAttr specifies the mapping between the
         loops and the indexing within each view.
@@ -604,11 +590,6 @@ def GenericOp : GenericOpBase<"generic"> {
     Example:
     Defining a #matmul_trait attribute in MLIR can be done as follows:
       ```mlir
-      func @fma(%a: f32, %b: f32, %c: f32) -> f32 {
-        %d = mulf %a, %b: f32
-        %e = addf %c, %d: f32
-        return %e: f32
-      }
       #matmul_accesses = [
         (m, n, k) -> (m, k),
         (m, n, k) -> (k, n),
@@ -616,7 +597,6 @@ def GenericOp : GenericOpBase<"generic"> {
       ]
       #matmul_trait = {
         doc = "C(m, n) += A(m, k) * B(k, n)",
-        fun = @fma,
         indexing_maps = #matmul_accesses,
         library_call = "linalg_matmul",
         n_views = [2, 1],
@@ -626,10 +606,14 @@ def GenericOp : GenericOpBase<"generic"> {
 
     And can be reused in multiple places as:
       ```mlir
-      linalg.generic #matmul_trait %A, %B, %C [other-attributes] :
-        memref<?x?xf32, stride_specification>,
-        memref<?x?xf32, stride_specification>,
-        memref<?x?xf32, stride_specification>
+      linalg.generic #matmul_trait %A, %B, %C [other-attributes] {
+        (%a: f32, %b: f32, %c: f32) :
+          %d = mulf %a, %b: f32
+          %e = addf %c, %d: f32
+          linalg_yield %e : f32
+      } : memref<?x?xf32, stride_specification>,
+          memref<?x?xf32, stride_specification>,
+          memref<?x?xf32, stride_specification>
       ```
 
     This may lower to either:
@@ -649,9 +633,9 @@ def GenericOp : GenericOpBase<"generic"> {
           %a = load %A[%m, %k] : memref<?x?xf32, stride_specification>
           %b = load %B[%k, %n] : memref<?x?xf32, stride_specification>
           %c = load %C[%m, %n] : memref<?x?xf32, stride_specification>
-          %d = call @func_of_elements(%a, %b, %c)
-                 : (f32, f32, f32) -> (f32)
-          store %d, %C[%m, %n] : memref<?x?x?xf32, stride_specification>
+          %d = mulf %a, %b: f32
+          %e = addf %c, %d: f32
+          store %e, %C[%m, %n] : memref<?x?x?xf32, stride_specification>
         }
       }
     }
@@ -662,7 +646,7 @@ def GenericOp : GenericOpBase<"generic"> {
     mixing input and output ranked tensor values with input and output memrefs.
 
     ```mlir
-    %C = linalg.generic #trait_attribute %A, %B {other-attributes} :
+    %C = linalg.generic #trait_attribute %A, %B {other-attributes} {region} :
       tensor<?x?xf32>,
       memref<?x?xf32, stride_specification>
       -> (tensor<?x?xf32>)
@@ -708,13 +692,6 @@ def IndexedGenericOp : GenericOpBase<"indexed_generic"> {
       - args_in: an I64Attr representing the number of input (readonly) views
       - args_out: an I64Attr representing the number of output (readwrite) views
       - doc [optional]: a documentation string
-      - fun: a FlatSymbolRefAttr that must resolve to an existing function
-        symbol. To support inplace updates in a generic fashion, the signature
-        of the function must be:
-        ```
-          fun([index types of induction variables], [input views element types],
-              [output views element types]) -> ([output views element types])
-        ```
       - indexing_maps: a list of AffineMapAttr, one AffineMapAttr per each input
         and output view. Such AffineMapAttr specifies the mapping between the
         loops and the indexing within each view.
@@ -732,15 +709,6 @@ def IndexedGenericOp : GenericOpBase<"indexed_generic"> {
     Defining a #matmul_trait attribute in MLIR can be done as follows:
 
     ```mlir
-    func @fma(%offset_m: index, %offset_n: index, %offset_k: index,
-              %a: f32, %b: f32, %c: f32)
-      -> f32
-    {
-      "some_optional_condition"(%offset_m, %offset_n, %offset_k)
-      %d = mulf %a, %b: f32
-      %e = addf %c, %d: f32
-      return %e: f32
-    }
     #matmul_accesses = [
       (m, n, k) -> (m, k),
       (m, n, k) -> (k, n),
@@ -748,7 +716,6 @@ def IndexedGenericOp : GenericOpBase<"indexed_generic"> {
     ]
     #matmul_trait = {
       doc = "C(m, n) += A(m, k) * B(k, n)",
-      fun = @fma,
       indexing_maps = #matmul_accesses,
       library_call = "linalg_matmul",
       n_views = [2, 1],
@@ -759,10 +726,16 @@ def IndexedGenericOp : GenericOpBase<"indexed_generic"> {
     And can be reused in multiple places as:
 
     ```mlir
-    linalg.indexed_generic #matmul_trait %A, %B, %C [other-attributes] :
-      memref<?x?xf32, stride_specification>,
-      memref<?x?xf32, stride_specification>,
-      memref<?x?xf32, stride_specification>
+    linalg.indexed_generic #matmul_trait %A, %B, %C [other-attributes] {
+      (%offset_m: index, %offset_n: index, %offset_k: index,
+       %a: f32, %b: f32, %c: f32) :
+        "some_optional_computation"(%offset_m, %offset_n, %offset_k)
+        %d = mulf %a, %b: f32
+        %e = addf %c, %d: f32
+        linalg_yield %e : f32
+    } : memref<?x?xf32, stride_specification>,
+        memref<?x?xf32, stride_specification>,
+        memref<?x?xf32, stride_specification>
     ```
 
     This may lower to either:
@@ -784,8 +757,9 @@ def IndexedGenericOp : GenericOpBase<"indexed_generic"> {
           %a = load %A[%m, %k] : memref<?x?xf32, stride_specification>
           %b = load %B[%k, %n] : memref<?x?xf32, stride_specification>
           %c = load %C[%m, %n] : memref<?x?xf32, stride_specification>
-          %d = call @func_of_elements_and_indices(%m, %n, %k, %a, %b, %c)
-                 : (index, index, index, f32, f32, f32) -> (f32)
+          "some_optional_computation"(%m, %n, %k)
+          %d = mulf %a, %b: f32
+          %e = addf %c, %d: f32
           store %d, %C[%m, %n] : memref<?x?x?xf32, stride_specification>
         }
       }
diff --git a/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h b/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h
index 6262e7757c6c..5a36aabfab75 100644
--- a/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h
+++ b/mlir/include/mlir/Dialect/Utils/StructuredOpsUtils.h
@@ -66,10 +66,6 @@ constexpr StringRef getArgsOutAttrName() { return "args_out"; }
 /// string of the structured op.
 constexpr StringRef getDocAttrName() { return "doc"; }
 
-/// Attribute name for the StrArrayAttr which encodes the SymbolAttr for the
-/// MLIR function that implements the body of the structured op.
-constexpr StringRef getFunAttrName() { return "fun"; }
-
 /// Attribute name for the StrArrayAttr which encodes the external library
 /// function that implements the structured op.
 constexpr StringRef getLibraryCallAttrName() { return "library_call"; }
diff --git a/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp b/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp
index 59a565c5e395..2fa09b7422a9 100644
--- a/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp
+++ b/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp
@@ -177,7 +177,6 @@ Operation *mlir::edsc::makeGenericLinalgOp(
               builder.getAffineMapArrayAttr(maps),
               builder.getStrArrayAttr(iteratorStrTypes),
               StringAttr() /*doc*/,
-              FlatSymbolRefAttr() /*fun*/,
               StringAttr() /*library_call*/
               /* TODO: other attributes in op */
               )
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
index df1a957d344c..9f664586453a 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -133,10 +133,11 @@ static void printGenericOp(OpAsmPrinter &p, GenericOpType op) {
       attrs.push_back(attr);
 
   auto dictAttr = DictionaryAttr::get(attrs, op.getContext());
-  p << op.getOperationName() << " " << dictAttr << " " << op.getOperands();
+  p << op.getOperationName() << " " << dictAttr;
+  p.printOptionalAttrDict(op.getAttrs(), attrNames);
+  p << " " << op.getOperands();
   if (!op.region().empty())
     p.printRegion(op.region());
-  p.printOptionalAttrDict(op.getAttrs(), attrNames);
   p << ": " << op.getOperandTypes();
   auto outputTensorTypes = op.getResultTypes();
   if (!outputTensorTypes.empty())
@@ -156,21 +157,21 @@ static ParseResult parseGenericOp(OpAsmParser &parser, OperationState &result) {
   // The name is unimportant as we will overwrite result.attributes.
   // The core linalg traits must contain the information necessary to pass the
   // verifier.
-  if (parser.parseAttribute(dictAttr, "_", result.attributes) ||
-      parser.parseOperandList(operandsInfo))
+  if (parser.parseAttribute(dictAttr, "_", result.attributes))
     return failure();
   result.attributes.assign(dictAttr.getValue().begin(),
                            dictAttr.getValue().end());
 
+  // Optional attributes may be added.
+  if (parser.parseOptionalAttrDict(result.attributes) ||
+      parser.parseOperandList(operandsInfo))
+    return failure();
+
   Region &region = *result.addRegion();
   SmallVector<Type, 8> operandTypes, regionTypes;
-  // Optional attributes may be added.
-  // Either Optional getFunAttrName() attribute or region must be specified.
-  if (!dictAttr.get(getFunAttrName()) &&
-      parser.parseOptionalRegion(region, regionOperandsInfo, regionTypes))
+  if (parser.parseRegion(region, regionOperandsInfo, regionTypes))
     return failure();
-  if (parser.parseOptionalAttrDict(result.attributes) ||
-      parser.parseColonTypeList(operandTypes))
+  if (parser.parseColonTypeList(operandTypes))
     return failure();
   // Generic ops may specify that a subset of its outputs are tensors. Such
   // outputs are specified in the result type.
@@ -183,10 +184,7 @@ static ParseResult parseGenericOp(OpAsmParser &parser, OperationState &result) {
                                 parser.getCurrentLocation(), result.operands);
 }
 
-template <typename GenericOpType>
-static LogicalResult verifyBlockArgs(GenericOpType op, Block &block);
-
-template <> LogicalResult verifyBlockArgs(GenericOp op, Block &block) {
+LogicalResult verifyBlockArgs(GenericOp op, Block &block) {
   auto nOperands = op.getNumOperands();
   if (block.getNumArguments() != nOperands)
     return op.emitOpError("expected number of block arguments to match number "
@@ -205,7 +203,7 @@ template <> LogicalResult verifyBlockArgs(GenericOp op, Block &block) {
   return success();
 }
 
-template <> LogicalResult verifyBlockArgs(IndexedGenericOp op, Block &block) {
+LogicalResult verifyBlockArgs(IndexedGenericOp op, Block &block) {
   auto nInputViews = op.getNumInputs();
   auto nLoops = op.getNumLoops();
   auto nOperands = op.getNumOperands();
@@ -234,81 +232,6 @@ template <> LogicalResult verifyBlockArgs(IndexedGenericOp op, Block &block) {
   return success();
 }
 
-template <typename GenericOpType>
-static LogicalResult verifyFuncArgs(GenericOpType op, FunctionType funType);
-
-template <typename GenericOpType>
-static LogicalResult verifyFuncArgsGeneric(GenericOpType op,
-                                           FunctionType funType) {
-  auto res = verifyFuncArgs(op, funType);
-  if (failed(res))
-    return res;
-
-  auto nInputs = op.getNumInputs();
-  auto nOutputs = op.getNumOutputs();
-  // linalg.generic output element types are exactly the function results.
-  for (unsigned idx = 0; idx < nOutputs; ++idx) {
-    ShapedType shapedType = op.getShapedType(nInputs + idx);
-    if (funType.getResult(idx) != shapedType.getElementType())
-      return op.emitOpError("expected function result ")
-             << (idx + 1) << " of the same type as elemental type "
-             << shapedType.getElementType() << " of output " << (idx + 1);
-  }
-  return success();
-}
-
-template <> LogicalResult verifyFuncArgs(GenericOp op, FunctionType funType) {
-  auto nOperands = op.getNumOperands();
-  if (funType.getNumInputs() != nOperands)
-    return op.emitOpError(
-        "expected function arguments to match number of operands");
-  if (funType.getNumResults() != op.getNumOutputs())
-    return op.emitOpError("expected function results(")
-           << funType.getNumResults() << ") to match number of outputs("
-           << op.getNumOutputs() << ")";
-
-  // linalg.generic operands element types are exactly the first function
-  // arguments.
-  for (unsigned idx = 0; idx < nOperands; ++idx) {
-    ShapedType shapedType = op.getShapedType(idx);
-    if (funType.getInput(idx) != shapedType.getElementType())
-      return op.emitOpError("expected function argument ")
-             << (idx + 1) << " of the same type as elemental type "
-             << shapedType.getElementType() << " of operand " << (idx + 1);
-  }
-
-  return success();
-}
-
-template <>
-LogicalResult verifyFuncArgs(IndexedGenericOp op, FunctionType funType) {
-  auto nLoops = op.getNumLoops();
-  auto nOutputs = op.getNumOutputs();
-  auto nOperands = op.getNumOperands();
-  if (funType.getNumInputs() != nOperands + nLoops)
-    return op.emitOpError("expected function arguments to match number of "
-                          "loops + number of operands");
-  if (funType.getNumResults() != nOutputs)
-    return op.emitOpError(
-        "expected function results to match number of outputs");
-  for (unsigned i = 0; i < nLoops; ++i)
-    if (!funType.getInput(i).isIndex())
-      return op.emitOpError("expected function argument ")
-             << (i + 1) << " to be an index";
-
-  // linalg.generic operands element types are exactly the first function
-  // arguments.
-  for (unsigned idx = 0; idx < nOperands; ++idx) {
-    ShapedType shapedType = op.getShapedType(idx);
-    if (funType.getInput(idx + nLoops) != shapedType.getElementType())
-      return op.emitOpError("expected function argument ")
-             << (idx + nLoops + 1) << " of the same type as elemental type "
-             << shapedType.getElementType() << " of input " << (idx + 1);
-  }
-
-  return success();
-}
-
 template <typename GenericOpType>
 static LogicalResult verifyGenericOp(GenericOpType op) {
   auto nInputViews = op.getNumInputs();
@@ -320,20 +243,10 @@ static LogicalResult verifyGenericOp(GenericOpType op) {
            << " inputs (tensor or buffer) and output buffer operands";
 
   auto &region = op.region();
-  auto funOp = op.getFunction();
-  auto funType = funOp ? funOp.getType() : FunctionType();
-  if (!region.empty()) {
-    if (region.getBlocks().size() != 1)
-      return op.emitOpError("expected region with 1 block");
-    if (failed(verifyBlockArgs(op, region.getBlocks().front())))
-      return failure();
-  } else {
-    if (!funOp || !funOp.getType())
-      return op.emitOpError(
-          "expected function attribute to refer to a defined symbol");
-    if (failed(verifyFuncArgsGeneric(op, funType)))
-      return failure();
-  }
+  if (region.getBlocks().size() != 1)
+    return op.emitOpError("expected region with 1 block");
+  if (failed(verifyBlockArgs(op, region.getBlocks().front())))
+    return failure();
 
   SmallVector<AffineMap, 4> indexingMaps;
   indexingMaps.reserve(op.indexing_maps().size());
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp
index 96cbdab5ac47..a5f4cd9e4592 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp
@@ -382,8 +382,7 @@ static bool areTensorOpsFusible(LinalgOp producer, LinalgOp consumer,
   // - only handle ops that use regions for specifying the scalar operations.
   if (!producerOp || !consumerOp || producerOp.getNumOutputs() != 1 ||
       producerOp.getResult(0) != consumerOp.getOperand(consumerIdx) ||
-      producerOp.getNumParallelLoops() != producerOp.getNumLoops() ||
-      producerOp.fun() || consumerOp.fun())
+      producerOp.getNumParallelLoops() != producerOp.getNumLoops())
     return false;
 
   // Get the consumer index map. The number of results of the consumer index map
@@ -472,7 +471,6 @@ Optional<LinalgOp> mlir::linalg::fuseTensorOps(OpBuilder &b, LinalgOp producer,
       b.getI64IntegerAttr(fusedArgsIn), b.getI64IntegerAttr(fusedArgsOut),
       b.getArrayAttr(fusedIndexingMapAttrs), consumerOp.iterator_types(),
       /*doc=*/nullptr,
-      /*fun=*/nullptr,
       /*library_call=*/nullptr);
 
   // Build the region of the fused op.
diff --git a/mlir/lib/Dialect/Linalg/Transforms/LinalgToLoops.cpp b/mlir/lib/Dialect/Linalg/Transforms/LinalgToLoops.cpp
index 529448497728..07a2c370a152 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/LinalgToLoops.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/LinalgToLoops.cpp
@@ -400,21 +400,6 @@ class LinalgScopedEmitter<IndexedValueType, GenericOp> {
       indexedValues[nInputs + i] = std_load(output, indexing);
     }
 
-    auto funcOp = genericOp.getFunction();
-    if (funcOp) {
-      // 2. Emit call.
-      Operation *callOp = std_call(funcOp, indexedValues);
-      assert(callOp->getNumResults() == genericOp.getNumOutputs());
-
-      // 3. Emit std_store.
-      for (unsigned i = 0; i < nOutputs; ++i) {
-        Value output = genericOp.getOutputBuffer(i);
-        ValueHandleArray indexing(makeCanonicalAffineApplies(
-            b, loc, genericOp.getOutputIndexingMap(i), allIvs));
-        std_store(callOp->getResult(i), output, indexing);
-      }
-      return;
-    }
     // TODO(ntv): When a region inliner exists, use it.
     // 2. Inline region, currently only works for a single basic block.
     // 3. Emit std_store.
@@ -495,20 +480,6 @@ class LinalgScopedEmitter<IndexedValueType, IndexedGenericOp> {
       indexedValues[nLoops + nInputs + i] = std_load(output, indexing);
     }
 
-    if (auto funcOp = indexedGenericOp.getFunction()) {
-      // 2. Emit call.
-      Operation *callOp = std_call(funcOp, indexedValues);
-      assert(callOp->getNumResults() == indexedGenericOp.getNumOutputs());
-
-      // 3. Emit std_store.
-      for (unsigned i = 0; i < nOutputs; ++i) {
-        Value output = indexedGenericOp.getOutputBuffer(i);
-        ValueHandleArray indexing(makeCanonicalAffineApplies(
-            b, loc, indexedGenericOp.getOutputIndexingMap(i), allIvs));
-        std_store(callOp->getResult(i), output, indexing);
-      }
-      return;
-    }
     // TODO(ntv): When a region inliner exists, use it.
     // 2. Inline region, currently only works for a single basic block.
     // 3. Emit std_store.
diff --git a/mlir/test/Dialect/Linalg/invalid.mlir b/mlir/test/Dialect/Linalg/invalid.mlir
index 0041f97d7eea..e6414a0fbd78 100644
--- a/mlir/test/Dialect/Linalg/invalid.mlir
+++ b/mlir/test/Dialect/Linalg/invalid.mlir
@@ -54,200 +54,131 @@ func @yield_parent(%arg0: memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
 
 // -----
 
-func @generic_at_least_2_operands(%arg0: memref<f32>) {
-  // expected-error @+1 {{op expected 2 or more operands}}
+func @generic_no_region(%arg0: memref<f32>) {
+  // expected-error @+6 {{expected '{' to begin a region}}
   linalg.generic {
     args_in = 1,
     args_out = 1,
-    fun = @foo,
     indexing_maps =  [ affine_map<() -> (0)> ],
     iterator_types = []
-  } %arg0: memref<f32>
+  } %arg0 : memref<f32>
 }
 
 // -----
 
-func @generic_exactly_2_views(%arg0: memref<f32>) {
-  // expected-error @+1 {{op expected exactly 2 inputs (tensor or buffer) and output buffer operands}}
+func @generic_at_least_2_operands(%arg0: memref<f32>) {
+  // expected-error @+1 {{op expected 2 or more operands}}
   linalg.generic {
     args_in = 1,
     args_out = 1,
-    fun = @foo,
     indexing_maps =  [ affine_map<() -> (0)> ],
     iterator_types = []
-  } %arg0, %arg0, %arg0: memref<f32>, memref<f32>, memref<f32>
+  } %arg0 {} : memref<f32>
 }
 
 // -----
 
-func @generic_undefined_fun(%arg0: memref<f32>) {
-  // expected-error @+1 {{op expected function attribute to refer to a defined symbol}}
+func @generic_exactly_2_views(%arg0: memref<f32>) {
+  // expected-error @+1 {{op expected exactly 2 inputs (tensor or buffer) and output buffer operands}}
   linalg.generic {
     args_in = 1,
     args_out = 1,
-    fun = @foo,
-    indexing_maps =  [ affine_map<() -> (0)> ],
-    iterator_types = []
-  } %arg0, %arg0: memref<f32>, memref<f32>
-}
-
-// -----
-
-func @foo() { return }
-
-func @generic_mismatched_num_arguments(%arg0: memref<f32>) {
-  // expected-error @+1 {{op expected function arguments to match number of operands}}
-  linalg.generic {
-    args_in = 0,
-    args_out = 1,
-    fun = @foo,
     indexing_maps =  [ affine_map<() -> (0)> ],
     iterator_types = []
-  } %arg0: memref<f32>
+  } %arg0, %arg0, %arg0 {}: memref<f32>, memref<f32>, memref<f32>
 }
 
 // -----
 
-func @foo(%0: i32) { return }
-
 func @generic_mismatched_num_returns(%arg0: memref<f32>) {
-  // expected-error @+1 {{op expected function results(0) to match number of outputs(1)}}
+  // expected-error @+8 {{op expected number of yield values (1) to match the number of operands of the enclosing linalg.generic op (0)}}
   linalg.generic {
     args_in = 0,
     args_out = 1,
-    fun = @foo,
-    indexing_maps =  [ affine_map<() -> (0)> ],
-    iterator_types = []
-  } %arg0: memref<f32>
-}
-
-// -----
-
-func @foo(%0: i32, %1: i32, %2: i32) { return }
-
-func @generic_mismatched_num_returns(%0: memref<i32>, %1: memref<f32>) {
-  // expected-error @+1 {{op expected function argument 2 of the same type as elemental type 'f32' of operand 2}}
-  linalg.generic {
-    args_in = 3,
-    args_out = 0,
-    fun = @foo,
-    indexing_maps =  [ affine_map<() -> (0)> ],
-    iterator_types = []
-  } %0, %1, %1: memref<i32>, memref<f32>, memref<f32>
-}
-
-// -----
-
-func @foo(%0: i32, %1: i32, %2: f32) -> i32 { return %1: i32}
-
-func @generic_mismatched_num_returns(%0: memref<i32>, %1: memref<f32>) {
-  // expected-error @+1 {{op expected function result 1 of the same type as elemental type 'f32' of output 1}}
-  linalg.generic {
-    args_in = 2,
-    args_out = 1,
-    fun = @foo,
-    indexing_maps =  [ affine_map<() -> (0)> ],
+    indexing_maps =  [ affine_map<() -> ()> ],
     iterator_types = []
-  } %0, %0, %1: memref<i32>, memref<i32>, memref<f32>
+  } %arg0 {
+    ^bb(%0: f32):
+      linalg.yield
+  }: memref<f32>
 }
 
 // -----
 
-func @foo(%0: i32) -> i32 { return %0: i32 }
-
 func @generic_symbol_in_map(%arg0: memref<i32>) {
   // expected-error @+1 {{op expected indexing_map #0 to have no symbols}}
   linalg.generic {
     args_in = 0,
     args_out = 1,
-    fun = @foo,
     indexing_maps =  [ affine_map<()[N] -> (0)> ],
     iterator_types = ["parallel"]
-  } %arg0: memref<i32>
+  } %arg0 {
+    ^bb(%i : i32):
+  }: memref<i32>
 }
 
 // -----
 
-func @foo(%0: i32) -> i32 { return %0: i32 }
-
 func @generic_wrong_dim_in_map(%arg0: memref<1xi32>) {
   // expected-error @+1 {{op expected indexing_map #0 to have 1 dim(s) to match the number of loops}}
   linalg.generic {
     args_in = 0,
     args_out = 1,
-    fun = @foo,
     indexing_maps =  [ affine_map<() -> (0)> ],
     iterator_types = ["parallel"]
-  } %arg0: memref<1xi32>
+  } %arg0 {
+    ^bb(%i : i32):
+  }: memref<1xi32>
 }
 
 // -----
 
-func @foo(%0: f32) -> f32 { return %0: f32 }
-
 func @generic_one_d_view(%arg0: memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
   // expected-error @+1 {{op expected indexing_map #0 results to match view rank: 'memref<?xf32, affine_map<(d0)[s0] -> (d0 + s0)>>'}}
   linalg.generic {
     args_in = 0,
     args_out = 1,
-    fun = @foo,
     indexing_maps =  [ affine_map<() -> (0, 0)> ],
     iterator_types = []
-  } %arg0: memref<?xf32, affine_map<(i)[off]->(off + i)>>
-}
-
-// -----
-
-func @foo(%0: i32) -> f32 {
-  %1 = constant 0.0: f32
-  return %1: f32
-}
-
-func @generic_fun_arg_0_element_type(%arg0: memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
-  // expected-error @+1 {{op expected function argument 1 of the same type as elemental type 'f32' of operand 1}}
-  linalg.generic {
-    args_in = 0,
-    args_out = 1,
-    fun = @foo,
-    indexing_maps =  [ affine_map<() -> (0)> ],
-    iterator_types = []
-  } %arg0: memref<?xf32, affine_map<(i)[off]->(off + i)>>
+  } %arg0 {
+    ^bb(%f : f32):
+      linalg.yield %f: f32
+  }: memref<?xf32, affine_map<(i)[off]->(off + i)>>
 }
 
 // -----
 
-func @foo(%0: f32) -> i4 {
-  %1 = constant 1: i4
-  return %1: i4
-}
-
-func @generic_fun_result_0_element_type(%arg0: memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
-  // expected-error @+1 {{op expected function result 1 of the same type as elemental type 'f32' of output 1}}
+func @generic_result_0_element_type(%arg0: memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
+  // expected-error @+9 {{'linalg.yield' op type of yield operand 1 ('i4') doesn't match the element type of the enclosing linalg.generic op ('f32')}}
   linalg.generic {
     args_in = 0,
     args_out = 1,
-    fun = @foo,
-    indexing_maps =  [ affine_map<() -> (0)> ],
-    iterator_types = []
-  } %arg0: memref<?xf32, affine_map<(i)[off]->(off + i)>>
+    indexing_maps =  [ affine_map<(i) -> (i)> ],
+    iterator_types = ["parallel"]
+  } %arg0 {
+    ^bb(%0: f32):
+      %1 = constant 1: i4
+      linalg.yield %1: i4
+  }: memref<?xf32, affine_map<(i)[off]->(off + i)>>
 }
 
 // -----
 
-func @foo(%0: f32, %1: f32) -> f32 { return %1: f32 }
-
 func @generic_singular_maps(%arg0: memref<?xf32, affine_map<(i)[off]->(off + i)>>, %arg1: memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
   // expected-error @+1 {{op expected the concatenation of maps in indexing_map to be invertible}}
   linalg.generic {
     args_in = 1,
     args_out = 1,
-    fun = @foo,
     indexing_maps =  [
       affine_map<(i, j) -> (i + j)>,
       affine_map<(i, j) -> (i + j)>
     ],
     iterator_types = ["parallel","parallel"]
-  } %arg0, %arg1: memref<?xf32, affine_map<(i)[off]->(off + i)>>, memref<?xf32, affine_map<(i)[off]->(off + i)>>
+  } %arg0, %arg1 {
+    ^bb(%0: f32, %1: f32):
+      linalg.yield %1: f32
+  }: memref<?xf32, affine_map<(i)[off]->(off + i)>>,
+     memref<?xf32, affine_map<(i)[off]->(off + i)>>
 }
 
 ////////////////////////////////////////////////////////////////////////////////
@@ -341,88 +272,53 @@ func @indexed_generic_block_arg_type(%arg0: memref<f32>) {
 
 // -----
 
-func @foo(%f: f32) -> (f32) {
-  return %f : f32
-}
-func @indexed_generic_fun_arg_count(%arg0: memref<f32>) {
-  // expected-error @+1 {{op expected function arguments to match number of loops + number of operands}}
-  linalg.indexed_generic {
-    args_in = 0,
-    args_out = 1,
-    indexing_maps =  [ affine_map<(d0) -> (d0)> ],
-    iterator_types = ["parallel"],
-    fun = @foo
-  } %arg0:  memref<f32>
-}
-
-// -----
-
-func @foo(%i: i32, %val: f32) -> (f32) {
-  return %val : f32
-}
-func @indexed_generic_fun_induction_var_arg_type(%arg0: memref<f32>) {
-  // expected-error @+1 {{op expected function argument 1 to be an index}}
-  linalg.indexed_generic {
-    args_in = 0,
-    args_out = 1,
-    iterator_types = ["parallel"],
-    indexing_maps = [ affine_map<(i) -> (i)> ],
-    fun = @foo
-  } %arg0 : memref<f32>
-}
-
-// -----
-
-func @foo(%i: index, %val: i1) -> (i1) {
-  return %val : i1
-}
-func @indexed_generic_fun_arg_type(%arg0: memref<f32>) {
-  // expected-error @+1 {{op expected function argument 2 of the same type as elemental type 'f32' of input 1}}
+func @indexed_generic_arg_count(%arg0: memref<f32>) {
+  // expected-error @+1 {{op expected number of block arguments to match number of operands + number of loops}}
   linalg.indexed_generic {
     args_in = 0,
     args_out = 1,
-    indexing_maps =  [ affine_map<(d0) -> (d0)> ],
-    iterator_types = ["parallel"],
-    fun = @foo
-  } %arg0: memref<f32>
+    indexing_maps =  [ affine_map<()[] -> ()> ],
+    iterator_types = []
+  } %arg0 {
+    ^bb(%0: index, %1: f32):
+      linalg.yield %1: f32
+  } :  memref<f32>
+  return
 }
 
 // -----
 
-func @foo(%i: index, %val: i1) -> (i1, i1) {
-  return %val, %val : i1, i1
-}
-func @indexed_generic_fun_result_count(%arg0: memref<f32>) {
-  // expected-error @+1 {{op expected function results to match number of outputs}}
+func @indexed_generic_induction_var_arg_type(%arg0: memref<f32>) {
+  // expected-error @+1 {{op expected block argument 1 to be an index}}
   linalg.indexed_generic {
     args_in = 0,
     args_out = 1,
-    indexing_maps =  [ affine_map<(d0) -> (d0)> ],
     iterator_types = ["parallel"],
-    fun = @foo
-  } %arg0: memref<f32>
+    indexing_maps = [ affine_map<(i) -> (i)> ]
+  } %arg0 {
+    ^bb(%0: i32, %1: f32):
+      linalg.yield %1: f32
+  } : memref<f32>
 }
 
 // -----
 
-func @foo(%i: index, %val: i32) -> (f32) {
-  %val_float = sitofp %val : i32 to f32
-  return %val_float : f32
-}
-func @indexed_generic_fun_result_count(%arg0: memref<i32>) {
-  // expected-error @+1 {{op expected function result 1 of the same type as elemental type 'i32' of output 1}}
+func @indexed_generic_result_count(%arg0: memref<?xf32>) {
+  // expected-error @+8 {{op expected number of yield values (1) to match the number of operands of the enclosing linalg.generic op (2)}}
   linalg.indexed_generic {
     args_in = 0,
     args_out = 1,
     indexing_maps =  [ affine_map<(d0) -> (d0)> ],
-    iterator_types = ["parallel"],
-    fun = @foo
-  } %arg0: memref<i32>
+    iterator_types = ["parallel"]
+  } %arg0 {
+    ^bb(%i: index, %val: f32):
+      linalg.yield %val, %val: f32, f32
+  }: memref<?xf32>
 }
 
 // -----
 
-func @generic_fun_result_0_element_type(%arg0: memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
+func @generic_result_0_element_type(%arg0: memref<?xf32, affine_map<(i)[off]->(off + i)>>) {
   // expected-error @+9 {{type of yield operand 1 ('i1') doesn't match the element type of the enclosing linalg.generic op ('f32')}}
   linalg.generic {
     args_in = 0,
@@ -453,7 +349,7 @@ func @generic_result_tensor_type(%arg0: memref<?xf32, affine_map<(i)[off]->(off
 
 // -----
 
-func @generic_fun_result_0_element_type(%arg0: memref<?xf32>) {
+func @generic_result_0_element_type(%arg0: memref<?xf32>) {
   // expected-error @+1 {{'linalg.dot' op expected 3 operands, but found 2}}
   linalg.dot(%arg0, %arg0): memref<?xf32>, memref<?xf32>
 }
diff --git a/mlir/test/Dialect/Linalg/loops.mlir b/mlir/test/Dialect/Linalg/loops.mlir
index 48e4b6ecd10d..3751c105f310 100644
--- a/mlir/test/Dialect/Linalg/loops.mlir
+++ b/mlir/test/Dialect/Linalg/loops.mlir
@@ -533,51 +533,11 @@ func @pooling_sum(%arg0: memref<?x?xf32>,
 //       CHECKPARALLEL:         %[[RES:.*]] = addf %[[LHS]], %[[RHS]] : f32
 //       CHECKPARALLEL:         store %[[RES]], %{{.*}}[%{{.*}}, %{{.*}}] : memref<?x?xf32>
 
-func @foo(%0: f32, %1: f32, %2: f32) -> (f32, f32) {
-  %f0 = constant 0.0 : f32
-  return %f0, %f0 : f32, f32
-}
 #accesses = [
   affine_map<(i, j, k) -> (i, j)>,
   affine_map<(i, j, k) -> (i, j, k)>,
   affine_map<(i, j, k) -> (i, k, j)>
 ]
-#trait = {
-  args_in = 1,
-  args_out = 2,
-  iterator_types = ["parallel", "parallel", "parallel"],
-  indexing_maps = #accesses,
-  fun = @foo,
-  library_call = "some_external_function_name_1",
-  doc = "B(i,j,k), C(i,k,j) = foo(A(i, j), B(i,j,k), C(i,k,j))"
-}
-func @generic_function(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>, %arg1: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, %arg2: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>) {
-  linalg.generic #trait %arg0, %arg1, %arg2:
-    memref<?x?xf32, offset: ?, strides: [?, 1]>, memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>, memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>
-  return
-}
-// CHECKLOOP-LABEL: @foo
-// CHECKLOOP-LABEL: @generic_function
-//       CHECKLOOP: loop.for %[[i:.*]] = {{.*}}
-//       CHECKLOOP:   loop.for %[[j:.*]] = {{.*}}
-//       CHECKLOOP:     loop.for %[[k:.*]] = {{.*}}
-//       CHECKLOOP:       %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]] : memref<?x?xf32, #[[strided2D]]>
-//       CHECKLOOP:       %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref<?x?x?xf32, #[[strided3D]]>
-//       CHECKLOOP:       %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref<?x?x?xf32, #[[strided3D]]>
-//       CHECKLOOP:       %[[res:.*]]:2 = call @foo(%[[a]], %[[b]], %[[c]]) : (f32, f32, f32) -> (f32, f32)
-//       CHECKLOOP:       store %[[res]]#0, %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref<?x?x?xf32, #[[strided3D]]>
-//       CHECKLOOP:       store %[[res]]#1, %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref<?x?x?xf32, #[[strided3D]]>
-
-// CHECKPARALLEL-LABEL: @foo
-// CHECKPARALLEL-LABEL: @generic_function
-//       CHECKPARALLEL: loop.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]], %[[k:[a-zA-Z0-9_]*]])
-//       CHECKPARALLEL:   %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]] : memref<?x?xf32, #[[strided2D]]>
-//       CHECKPARALLEL:   %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref<?x?x?xf32, #[[strided3D]]>
-//       CHECKPARALLEL:   %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref<?x?x?xf32, #[[strided3D]]>
-//       CHECKPARALLEL:   %[[res:.*]]:2 = call @foo(%[[a]], %[[b]], %[[c]]) : (f32, f32, f32) -> (f32, f32)
-//       CHECKPARALLEL:   store %[[res]]#0, %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref<?x?x?xf32, #[[strided3D]]>
-//       CHECKPARALLEL:   store %[[res]]#1, %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref<?x?x?xf32, #[[strided3D]]>
-
 #trait2 = {
   args_in = 1,
   args_out = 2,
@@ -617,52 +577,6 @@ func @generic_region(%arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>, %arg1:
 //       CHECKPARALLEL:   store %[[d]], %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref<?x?x?xf32, #[[strided3D]]>
 //       CHECKPARALLEL:   store %[[e]], %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref<?x?x?xf32, #[[strided3D]]>
 
-func @indexed_foo(%i: index, %j: index, %k: index, %0: f32, %1: f32, %2: f32) -> (f32, f32) {
-  %i_int = index_cast %i: index to i32
-  %i_float = sitofp %i_int : i32 to f32
-  return %i_float, %i_float : f32, f32
-}
-#trait3 = {
-  args_in = 1,
-  args_out = 2,
-  iterator_types = ["parallel", "parallel", "parallel"],
-  indexing_maps = #accesses,
-  fun = @indexed_foo,
-  library_call = "some_external_function_name_1",
-  doc = "b(i,j,k), c(i,k,j) = foo(a(i, j), b(i,j,k), c(i,k,j))"
-}
-func @indexed_generic_function(
-         %arg0: memref<?x?xf32, offset: ?, strides: [?, 1]>,
-         %arg1: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>,
-         %arg2: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>) {
-  linalg.indexed_generic #trait3 %arg0, %arg1, %arg2:
-    memref<?x?xf32, offset: ?, strides: [?, 1]>,
-    memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>,
-    memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>
-  return
-}
-// CHECKLOOP-LABEL: @indexed_foo
-// CHECKLOOP-LABEL: @indexed_generic_function
-//       CHECKLOOP: loop.for %[[i:.*]] = {{.*}}
-//       CHECKLOOP:   loop.for %[[j:.*]] = {{.*}}
-//       CHECKLOOP:     loop.for %[[k:.*]] = {{.*}}
-//       CHECKLOOP:       %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]] : memref<?x?xf32, #[[strided2D]]>
-//       CHECKLOOP:       %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref<?x?x?xf32, #[[strided3D]]>
-//       CHECKLOOP:       %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref<?x?x?xf32, #[[strided3D]]>
-//       CHECKLOOP:       %[[res:.*]]:2 = call @indexed_foo(%[[i]], %[[j]], %[[k]], %[[a]], %[[b]], %[[c]]) : (index, index, index, f32, f32, f32) -> (f32, f32)
-//       CHECKLOOP:       store %[[res]]#0, %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref<?x?x?xf32, #[[strided3D]]>
-//       CHECKLOOP:       store %[[res]]#1, %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref<?x?x?xf32, #[[strided3D]]>
-
-// CHECKPARALLEL-LABEL: @indexed_foo
-// CHECKPARALLEL-LABEL: @indexed_generic_function
-//       CHECKPARALLEL: loop.parallel (%[[i:[a-zA-Z0-9_]*]], %[[j:[a-zA-Z0-9_]*]], %[[k:[a-zA-Z0-9_]*]])
-//       CHECKPARALLEL:   %[[a:.*]] = load %{{.*}}[%[[i]], %[[j]]] : memref<?x?xf32, #[[strided2D]]>
-//       CHECKPARALLEL:   %[[b:.*]] = load %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref<?x?x?xf32, #[[strided3D]]>
-//       CHECKPARALLEL:   %[[c:.*]] = load %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref<?x?x?xf32, #[[strided3D]]>
-//       CHECKPARALLEL:   %[[res:.*]]:2 = call @indexed_foo(%[[i]], %[[j]], %[[k]], %[[a]], %[[b]], %[[c]]) : (index, index, index, f32, f32, f32) -> (f32, f32)
-//       CHECKPARALLEL:   store %[[res]]#0, %{{.*}}[%[[i]], %[[j]], %[[k]]] : memref<?x?x?xf32, #[[strided3D]]>
-//       CHECKPARALLEL:   store %[[res]]#1, %{{.*}}[%[[i]], %[[k]], %[[j]]] : memref<?x?x?xf32, #[[strided3D]]>
-
 #trait4 = {
   args_in = 1,
   args_out = 2,
diff --git a/mlir/test/Dialect/Linalg/roundtrip.mlir b/mlir/test/Dialect/Linalg/roundtrip.mlir
index c28c671d2885..89b910e7b04a 100644
--- a/mlir/test/Dialect/Linalg/roundtrip.mlir
+++ b/mlir/test/Dialect/Linalg/roundtrip.mlir
@@ -289,11 +289,6 @@ func @pooling_sum(%arg0: memref<?x?x?xf32>,
 // CHECK-DAG: #[[strided2D:.*]] = affine_map<(d0, d1)[s0, s1] -> (d0 * s1 + s0 + d1)>
 // CHECK-DAG: #[[strided3D:.*]] = affine_map<(d0, d1, d2)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2 + d2)>
 
-func @foo(%0: vector<3x4xi4>, %1: f32) -> f32 {
-  %f0 = constant 0.0 : f32
-  return %f0 : f32
-}
-
 #accesses = [
   affine_map<(i, j, k) -> (j, i)>,
   affine_map<(i, j, k) -> (i, k, i + j)>
@@ -304,46 +299,45 @@ func @foo(%0: vector<3x4xi4>, %1: f32) -> f32 {
   args_out = 1,
   indexing_maps = #accesses,
   iterator_types = ["parallel", "parallel", "parallel"],
-  fun = @foo,
   library_call = "some_external_function_name_1"
 }
 
 func @generic(%arg0: memref<?x?xvector<3x4xi4>, offset: ?, strides: [?, 1]>,
               %arg1: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>) {
-  linalg.generic #trait %arg0, %arg1 {foo = 1} :
-    memref<?x?xvector<3x4xi4>, offset: ?, strides: [?, 1]>,
-    memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>
+  linalg.generic #trait {foo = 1} %arg0, %arg1 {
+    ^bb(%0: vector<3x4xi4>, %1: f32) :
+      %f0 = constant 0.0 : f32
+      linalg.yield %f0 : f32
+  } : memref<?x?xvector<3x4xi4>, offset: ?, strides: [?, 1]>,
+      memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>
   return
 }
-// CHECK-LABEL: func @foo
 // CHECK-LABEL: func @generic
-//       CHECK:   linalg.generic {args_in = 1 : i64, args_out = 1 : i64, fun = @foo,
+//       CHECK:   linalg.generic {args_in = 1 : i64, args_out = 1 : i64,
 //  CHECK-SAME:     indexing_maps = [#{{.*}}, #{{.*}}], iterator_types = ["parallel", "parallel", "parallel"],
 //  CHECK-SAME:     library_call = "some_external_function_name_1"
-//  CHECK-SAME:     {foo = 1 : i64}:
-//  CHECK-SAME:     memref<?x?xvector<3x4xi4>, #[[strided2D]]>, memref<?x?x?xf32, #[[strided3D]]>
+//  CHECK-SAME:     {foo = 1 : i64}
+//       CHECK:     memref<?x?xvector<3x4xi4>, #[[strided2D]]>, memref<?x?x?xf32, #[[strided3D]]>
 
 func @generic_with_tensor_input(%arg0: tensor<?x?xvector<3x4xi4>>,
                                 %arg1: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>) {
-  linalg.generic #trait %arg0, %arg1 {foo = 1} :
-    tensor<?x?xvector<3x4xi4>>,
-    memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>
+  linalg.generic #trait {foo = 1} %arg0, %arg1 {
+    ^bb(%0: vector<3x4xi4>, %1: f32) :
+      %f0 = constant 0.0 : f32
+      linalg.yield %f0 : f32
+  } : tensor<?x?xvector<3x4xi4>>,
+      memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>
   return
 }
 // CHECK-LABEL: func @generic_with_tensor_input
-//       CHECK:   linalg.generic {args_in = 1 : i64, args_out = 1 : i64, fun = @foo,
+//       CHECK:   linalg.generic {args_in = 1 : i64, args_out = 1 : i64,
 //  CHECK-SAME:     indexing_maps = [#{{.*}}, #{{.*}}], iterator_types = ["parallel", "parallel", "parallel"],
 //  CHECK-SAME:     library_call = "some_external_function_name_1"}
-//  CHECK-SAME:     {foo = 1 : i64}:
-//  CHECK-SAME:     tensor<?x?xvector<3x4xi4>>, memref<?x?x?xf32, #[[strided3D]]>
+//  CHECK-SAME:     {foo = 1 : i64}
+//       CHECK:     tensor<?x?xvector<3x4xi4>>, memref<?x?x?xf32, #[[strided3D]]>
 
 // -----
 
-func @foo(%0: vector<3x4xi4>, %1: f32) -> f32 {
-  %f0 = constant 0.0 : f32
-  return %f0 : f32
-}
-
 #accesses = [
   affine_map<(i, j, k) -> (j, i)>,
   affine_map<(i, j, k) -> (i, k, i + j)>
@@ -354,31 +348,30 @@ func @foo(%0: vector<3x4xi4>, %1: f32) -> f32 {
   args_out = 1,
   indexing_maps = #accesses,
   iterator_types = ["parallel", "parallel", "parallel"],
-  fun = @foo,
   library_call = "some_external_function_name_1"
 }
 
 func @generic_with_tensor_input_and_output(
     %arg0: tensor<?x?xvector<3x4xi4>>, %arg1: tensor<?x?x?xf32>)
     -> (tensor<?x?x?xf32>) {
-  %0 = linalg.generic #trait2 %arg0, %arg1 {foo = 1} :
-    tensor<?x?xvector<3x4xi4>>, tensor<?x?x?xf32> -> tensor<?x?x?xf32>
+  %0 = linalg.generic #trait2 {foo = 1} %arg0, %arg1 {
+    ^bb(%0: vector<3x4xi4>, %1: f32) :
+      %f0 = constant 0.0 : f32
+      linalg.yield %f0 : f32
+  } : tensor<?x?xvector<3x4xi4>>, tensor<?x?x?xf32> -> tensor<?x?x?xf32>
   return %0 : tensor<?x?x?xf32>
 }
 // CHECK-LABEL: func @generic_with_tensor_input_and_output
-//       CHECK:   linalg.generic {args_in = 2 : i64, args_out = 1 : i64, fun = @foo,
+//       CHECK:   linalg.generic {args_in = 2 : i64, args_out = 1 : i64,
 //  CHECK-SAME:     indexing_maps = [#{{.*}}, #{{.*}}], iterator_types = ["parallel", "parallel", "parallel"],
-//  CHECK-SAME:     library_call = "some_external_function_name_1"} %{{.*}}, %{{.*}} {foo = 1 : i64}:
-//  CHECK-SAME:     tensor<?x?xvector<3x4xi4>>, tensor<?x?x?xf32> -> tensor<?x?x?xf32>
+//  CHECK-SAME:     library_call = "some_external_function_name_1"}
+//  CHECK-SAME:     {foo = 1 : i64}
+//  CHECK-SAME:     %{{.*}}, %{{.*}}
+//       CHECK:     tensor<?x?xvector<3x4xi4>>, tensor<?x?x?xf32> -> tensor<?x?x?xf32>
 //       CHECK:   return {{.*}} : tensor<?x?x?xf32>
 
 // -----
 
-func @foo(%i: index, %j: index, %k: index, %0: vector<3x4xi4>, %1: f32) -> f32 {
-  %f0 = constant 0.0 : f32
-  return %f0 : f32
-}
-
 #accesses = [
   affine_map<(i, j, k) -> (j, i)>,
   affine_map<(i, j, k) -> (i, k, i + j)>
@@ -389,22 +382,26 @@ func @foo(%i: index, %j: index, %k: index, %0: vector<3x4xi4>, %1: f32) -> f32 {
   args_out = 1,
   indexing_maps = #accesses,
   iterator_types = ["parallel", "parallel", "parallel"],
-  fun = @foo,
   library_call = "some_external_function_name_1"
 }
 
 func @indexed_generic_with_tensor_input_and_output(
     %arg0: tensor<?x?xvector<3x4xi4>>, %arg1: tensor<?x?x?xf32>)
     -> (tensor<?x?x?xf32>) {
-  %0 = linalg.indexed_generic #trait2 %arg0, %arg1 {foo = 1} :
-    tensor<?x?xvector<3x4xi4>>, tensor<?x?x?xf32> -> tensor<?x?x?xf32>
+  %0 = linalg.indexed_generic #trait2 {foo = 1} %arg0, %arg1 {
+    ^bb(%i: index, %j: index, %k: index, %0: vector<3x4xi4>, %1: f32) :
+      %f0 = constant 0.0 : f32
+      linalg.yield %f0 : f32
+  } : tensor<?x?xvector<3x4xi4>>, tensor<?x?x?xf32> -> tensor<?x?x?xf32>
   return %0 : tensor<?x?x?xf32>
 }
 // CHECK-LABEL: func @indexed_generic_with_tensor_input_and_output
-//       CHECK:   linalg.indexed_generic {args_in = 2 : i64, args_out = 1 : i64, fun = @foo,
+//       CHECK:   linalg.indexed_generic {args_in = 2 : i64, args_out = 1 : i64,
 //  CHECK-SAME:     indexing_maps = [#{{.*}}, #{{.*}}], iterator_types = ["parallel", "parallel", "parallel"],
-//  CHECK-SAME:     library_call = "some_external_function_name_1"} %{{.*}}, %{{.*}} {foo = 1 : i64}:
-//  CHECK-SAME:     tensor<?x?xvector<3x4xi4>>, tensor<?x?x?xf32> -> tensor<?x?x?xf32>
+//  CHECK-SAME:     library_call = "some_external_function_name_1"}
+//  CHECK-SAME:     {foo = 1 : i64}
+//  CHECK-SAME:     %{{.*}}, %{{.*}}
+//       CHECK:     tensor<?x?xvector<3x4xi4>>, tensor<?x?x?xf32> -> tensor<?x?x?xf32>
 //       CHECK:   return {{.*}} : tensor<?x?x?xf32>
 
 // -----
@@ -460,10 +457,10 @@ func @indexed_generic_op_zero_rank(%arg0: tensor<f32>) -> (tensor<3x4xf32>)
 
 func @generic_region(%arg0: memref<?x?xvector<3x4xi4>, offset: ?, strides: [?, 1]>,
                      %arg1: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>) {
-  linalg.generic #trait3 %arg0, %arg1 {
+  linalg.generic #trait3 {foo = 1} %arg0, %arg1 {
     ^bb(%a: vector<3x4xi4>, %b: f32) :
       linalg.yield %b : f32
-  } {foo = 1}: memref<?x?xvector<3x4xi4>, offset: ?, strides: [?, 1]>,
+  } : memref<?x?xvector<3x4xi4>, offset: ?, strides: [?, 1]>,
                memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>
   return
 }
@@ -471,17 +468,18 @@ func @generic_region(%arg0: memref<?x?xvector<3x4xi4>, offset: ?, strides: [?, 1
 //       CHECK:   linalg.generic {args_in = 1 : i64, args_out = 1 : i64,
 //  CHECK-SAME:     indexing_maps = [#{{.*}}, #{{.*}}], iterator_types = ["parallel", "parallel", "parallel"],
 //  CHECK-SAME:     library_call = "some_external_function_name_2"
+//  CHECK-SAME:     {foo = 1 : i64}
 //       CHECK:    ^{{.*}}(%{{.*}}: vector<3x4xi4>, %{{.*}}: f32):
 //       CHECK:      linalg.yield %{{.*}} : f32
-//       CHECK:    } {foo = 1 : i64}: memref<?x?xvector<3x4xi4>, #[[strided2D]]>,
-//  CHECK-SAME:                       memref<?x?x?xf32, #[[strided3D]]>
+//       CHECK:    memref<?x?xvector<3x4xi4>, #[[strided2D]]>,
+//  CHECK-SAME:    memref<?x?x?xf32, #[[strided3D]]>
 
 func @indexed_generic(%arg0: memref<?x?xvector<3x4xi4>, offset: ?, strides: [?, 1]>,
                       %arg1: memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>) {
-  linalg.indexed_generic #trait3 %arg0, %arg1 {
+  linalg.indexed_generic #trait3 {foo = 1} %arg0, %arg1 {
   ^bb(%i: index, %j: index, %k: index, %a: vector<3x4xi4>, %b: f32) :
       linalg.yield %b : f32
-  } {foo = 1}: memref<?x?xvector<3x4xi4>, offset: ?, strides: [?, 1]>,
+  }: memref<?x?xvector<3x4xi4>, offset: ?, strides: [?, 1]>,
                memref<?x?x?xf32, offset: ?, strides: [?, ?, 1]>
   return
 }
@@ -489,9 +487,10 @@ func @indexed_generic(%arg0: memref<?x?xvector<3x4xi4>, offset: ?, strides: [?,
 //       CHECK:   linalg.indexed_generic {args_in = 1 : i64, args_out = 1 : i64,
 //  CHECK-SAME:     indexing_maps = [#{{.*}}, #{{.*}}], iterator_types = ["parallel", "parallel", "parallel"],
 //  CHECK-SAME:     library_call = "some_external_function_name_2"
+//  CHECK-SAME:     {foo = 1 : i64}
 //       CHECK:    ^{{.*}}(%{{.*}}: index, %{{.*}}: index, %{{.*}}: index, %{{.*}}: vector<3x4xi4>, %{{.*}}: f32):
 //       CHECK:      linalg.yield %{{.*}} : f32
-//       CHECK:    } {foo = 1 : i64}: memref<?x?xvector<3x4xi4>, #[[strided2D]]>,
+//       CHECK:    }: memref<?x?xvector<3x4xi4>, #[[strided2D]]>,
 //  CHECK-SAME:                       memref<?x?x?xf32, #[[strided3D]]>
 
 // -----
diff --git a/mlir/test/Dialect/Linalg/transform-patterns.mlir b/mlir/test/Dialect/Linalg/transform-patterns.mlir
index a0a7b74d4257..7f76819b0849 100644
--- a/mlir/test/Dialect/Linalg/transform-patterns.mlir
+++ b/mlir/test/Dialect/Linalg/transform-patterns.mlir
@@ -212,57 +212,71 @@ func @test_vectorize_fill(%A : memref<8x16xf32>, %arg0 : f32) {
 // CHECK-LABEL: func @test_vectorize_fill
 //       CHECK: vector.broadcast {{.*}} : f32 to vector<8x16xf32>
 
-func @fma(%a: f32, %b: f32, %c: f32) -> f32 {
-          %d = mulf %a, %b: f32
-          %e = addf %c, %d: f32
-          return %e: f32
-        }
 #matmul_accesses = [
-          affine_map<(m, n, k) -> (m, k)>,
-          affine_map<(m, n, k) -> (k, n)>,
-          affine_map<(m, n, k) -> (m, n)>
+  affine_map<(m, n, k) -> (m, k)>,
+  affine_map<(m, n, k) -> (k, n)>,
+  affine_map<(m, n, k) -> (m, n)>
 ]
 #generic_matmul_trait = {
-          args_in = 2,
-          args_out = 1,
-          fun = @fma,
-          indexing_maps = #matmul_accesses,
-          library_call = "linalg_matmul",
-          iterator_types = ["parallel", "parallel", "reduction"]
-        }
+  args_in = 2,
+  args_out = 1,
+  indexing_maps = #matmul_accesses,
+  library_call = "linalg_matmul",
+  iterator_types = ["parallel", "parallel", "reduction"]
+}
 func @permute_generic(%A: memref<?x?xf32, offset: ?, strides: [?, 1]>,
            %B: memref<?x?xf32, offset: ?, strides: [?, 1]>,
            %C: memref<?x?xf32, offset: ?, strides: [?, 1]>) {
-  linalg.generic #generic_matmul_trait %A, %B, %C : memref<?x?xf32, offset: ?, strides: [?, 1]>, memref<?x?xf32, offset: ?, strides: [?, 1]>, memref<?x?xf32, offset: ?, strides: [?, 1]>
-
+  linalg.generic #generic_matmul_trait %A, %B, %C {
+    ^bb(%a: f32, %b: f32, %c: f32):
+      %d = mulf %a, %b: f32
+      %e = addf %c, %d: f32
+      linalg.yield %e: f32
+  }: memref<?x?xf32, offset: ?, strides: [?, 1]>,
+     memref<?x?xf32, offset: ?, strides: [?, 1]>,
+     memref<?x?xf32, offset: ?, strides: [?, 1]>
   return
 }
 // CHECK-LABEL : func @fma
 // CHECK-LABEL : func @permute_generic
-// CHECK       : linalg.generic {args_in = 2, args_out = 1, fun = @fma, indexing_maps = [#[[kn]], #[[nm]], #[[km]]], iterator_types = ["parallel", "reduction", "parallel"], library_call = "linalg_matmul"} %{{.*}}, %{{.*}}, %{{.*}} : memref<?x?xf32, #[[STRIDED_2D]]>, memref<?x?xf32, #[[STRIDED_2D]]>, memref<?x?xf32, #[[STRIDED_2D]]>
+// CHECK       : linalg.generic {args_in = 2, args_out = 1,
+// CHECK-SAME  : indexing_maps = [#[[kn]], #[[nm]], #[[km]]],
+// CHECK-SAME  : iterator_types = ["parallel", "reduction", "parallel"],
+// CHECK-SAME  : library_call = "linalg_matmul"} %{{.*}}, %{{.*}}, %{{.*}}
+// CHECK       :   memref<?x?xf32, #[[STRIDED_2D]]>,
+// CHECK-SAME  :   memref<?x?xf32, #[[STRIDED_2D]]>,
+// CHECK-SAME  :   memref<?x?xf32, #[[STRIDED_2D]]>
 
-func @fma_indexed(%i: index, %j: index, %k: index, %a: f32, %b: f32, %c: f32) -> f32 {
-          %d = mulf %a, %b: f32
-          %e = addf %c, %d: f32
-          return %e: f32
-}
 #indexed_matmul_trait = {
-          args_in = 2,
-          args_out = 1,
-          fun = @fma_indexed,
-          indexing_maps = #matmul_accesses,
-          library_call = "linalg_matmul_indexed",
-          iterator_types = ["parallel", "parallel", "reduction"]
+  args_in = 2,
+  args_out = 1,
+  indexing_maps = #matmul_accesses,
+  library_call = "linalg_matmul_indexed",
+  iterator_types = ["parallel", "parallel", "reduction"]
 }
-func @permute_generic_indexed(%A: memref<?x?xf32, offset: ?, strides: [?, 1]>,
-           %B: memref<?x?xf32, offset: ?, strides: [?, 1]>,
-           %C: memref<?x?xf32, offset: ?, strides: [?, 1]>) {
-  linalg.indexed_generic #indexed_matmul_trait %A, %B, %C : memref<?x?xf32, offset: ?, strides: [?, 1]>, memref<?x?xf32, offset: ?, strides: [?, 1]>, memref<?x?xf32, offset: ?, strides: [?, 1]>
+func @permute_generic_indexed(
+    %A: memref<?x?xf32, offset: ?, strides: [?, 1]>,
+    %B: memref<?x?xf32, offset: ?, strides: [?, 1]>,
+    %C: memref<?x?xf32, offset: ?, strides: [?, 1]>) {
+  linalg.indexed_generic #indexed_matmul_trait %A, %B, %C {
+    ^bb(%i: index, %j: index, %k: index, %a: f32, %b: f32, %c: f32):
+      %d = mulf %a, %b: f32
+      %e = addf %c, %d: f32
+      linalg.yield %e: f32
+  } : memref<?x?xf32, offset: ?, strides: [?, 1]>,
+      memref<?x?xf32, offset: ?, strides: [?, 1]>,
+      memref<?x?xf32, offset: ?, strides: [?, 1]>
   return
 }
 // CHECK-LABEL : func @fma_indexed
 // CHECK-LABEL : func @permute_generic_indexed
-// CHECK       : linalg.indexed_generic {args_in = 2, args_out = 1, fun = @fma, indexing_maps = [#[[kn]], #[[nm]], #[[km]]], iterator_types = ["parallel", "reduction", "parallel"], library_call = "linalg_matmul_indexed"} %{{.*}}, %{{.*}}, %{{.*}} : memref<?x?xf32, #[[STRIDED_2D]]>, memref<?x?xf32, #[[STRIDED_2D]]>, memref<?x?xf32, #[[STRIDED_2D]]>
+// CHECK       : linalg.indexed_generic {args_in = 2, args_out = 1,
+// CHECK-SAME  :   indexing_maps = [#[[kn]], #[[nm]], #[[km]]],
+// CHECK-SAME  :   iterator_types = ["parallel", "reduction", "parallel"],
+// CHECK-SAME  :   library_call = "linalg_matmul_indexed"} %{{.*}}, %{{.*}}, %{{.*}} :
+// CHECK       :     memref<?x?xf32, #[[STRIDED_2D]]>,
+// CHECK-SAME  :     memref<?x?xf32, #[[STRIDED_2D]]>,
+// CHECK-SAME  :     memref<?x?xf32, #[[STRIDED_2D]]>
 
 func @dot_perm(%x: memref<?xf32, offset: ?, strides: [1]>,
           %y: memref<?xf32, offset: ?, strides: [1]>,
diff --git a/mlir/test/lib/DeclarativeTransforms/TestLinalgTransformPatterns.td b/mlir/test/lib/DeclarativeTransforms/TestLinalgTransformPatterns.td
index 795247ad2dff..a55cdbffbdb6 100644
--- a/mlir/test/lib/DeclarativeTransforms/TestLinalgTransformPatterns.td
+++ b/mlir/test/lib/DeclarativeTransforms/TestLinalgTransformPatterns.td
@@ -111,7 +111,7 @@ def : Pattern<(FillOp:$op $_, $_),
                 HasLinalgTransformMarker<"VECTORIZE">,
                 PreconditionVectorizeLinalgOp
                ]>>)]>;
-def : Pattern<(GenericOp:$op $_, $_, $_, $_, $_, $_, $_, $_),
+def : Pattern<(GenericOp:$op $_, $_, $_, $_, $_, $_, $_),
               [(VectorizeLinalgOp)],
               [(Constraint<And<[
                 HasLinalgTransformMarker<"VECTORIZE">,
@@ -122,7 +122,7 @@ def : Pattern<(GenericOp:$op $_, $_, $_, $_, $_, $_, $_, $_),
 //===----------------------------------------------------------------------===//
 // Linalg generic permutation patterns.
 //===----------------------------------------------------------------------===//
-def : Pat<(GenericOp:$op $_, $_, $_, $_, $_, $_, $_, $_),
+def : Pat<(GenericOp:$op $_, $_, $_, $_, $_, $_, $_),
               (PermuteGenericLinalgOp<[1, 2, 0], "PERMUTE"> $op),
               [(Constraint<And<[
                 HasNoLinalgTransformMarker,
@@ -130,7 +130,7 @@ def : Pat<(GenericOp:$op $_, $_, $_, $_, $_, $_, $_, $_),
                 PreconditionPermuteGenericLinalgOp<[1, 2, 0]>
               ]>>)]>;
 
-def : Pat<(IndexedGenericOp:$op $_, $_, $_, $_, $_, $_, $_, $_),
+def : Pat<(IndexedGenericOp:$op $_, $_, $_, $_, $_, $_, $_),
               (PermuteGenericLinalgOp<[1, 2, 0], "PERMUTE"> $op),
               [(Constraint<And<[
                 HasNoLinalgTransformMarker,

From ebd90232fbe046dcfd518252eae842eccc111e88 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne@apple.com>
Date: Thu, 16 Apr 2020 09:52:44 -0400
Subject: [PATCH 043/216] [libc++] Support arbitrary .sh.X extensions in the
 new format

This allows writing all kinds of ShTests, for example .sh.py tests for
testing Python code.
---
 libcxx/utils/libcxx/test/newformat.py | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/libcxx/utils/libcxx/test/newformat.py b/libcxx/utils/libcxx/test/newformat.py
index 10c6ff325402..5e8e90706eaf 100644
--- a/libcxx/utils/libcxx/test/newformat.py
+++ b/libcxx/utils/libcxx/test/newformat.py
@@ -9,6 +9,7 @@
 import lit
 import os
 import pipes
+import re
 
 class CxxStandardLibraryTest(lit.formats.TestFormat):
     """
@@ -29,8 +30,7 @@ class CxxStandardLibraryTest(lit.formats.TestFormat):
     FOO.link.pass.cpp       - Compiles and links successfully, run not attempted
     FOO.link.fail.cpp       - Compiles successfully, but fails to link
 
-    FOO.sh.cpp              - A builtin lit Shell test
-    FOO.sh.s                - A builtin lit Shell test
+    FOO.sh.<anything>       - A builtin Lit Shell test
 
     FOO.verify.cpp          - Compiles with clang-verify
 
@@ -87,12 +87,12 @@ class CxxStandardLibraryTest(lit.formats.TestFormat):
         - It is unknown how well it works on Windows yet.
     """
     def getTestsInDirectory(self, testSuite, pathInSuite, litConfig, localConfig):
-        SUPPORTED_SUFFIXES = ['.pass.cpp', '.pass.mm', '.run.fail.cpp',
-                              '.compile.pass.cpp', '.compile.fail.cpp',
-                              '.link.pass.cpp', '.link.fail.cpp',
-                              '.sh.cpp', '.sh.s',
-                              '.verify.cpp',
-                              '.fail.cpp']
+        SUPPORTED_SUFFIXES = ['[.]pass[.]cpp$', '[.]pass[.]mm$', '[.]run[.]fail[.]cpp$',
+                              '[.]compile[.]pass[.]cpp$', '[.]compile[.]fail[.]cpp$',
+                              '[.]link[.]pass[.]cpp$', '[.]link[.]fail[.]cpp$',
+                              '[.]sh[.][^.]+$',
+                              '[.]verify[.]cpp$',
+                              '[.]fail[.]cpp$']
         sourcePath = testSuite.getSourcePath(pathInSuite)
         for filename in os.listdir(sourcePath):
             # Ignore dot files and excluded tests.
@@ -101,7 +101,7 @@ def getTestsInDirectory(self, testSuite, pathInSuite, litConfig, localConfig):
 
             filepath = os.path.join(sourcePath, filename)
             if not os.path.isdir(filepath):
-                if any([filename.endswith(ext) for ext in SUPPORTED_SUFFIXES]):
+                if any([re.search(ext, filename) for ext in SUPPORTED_SUFFIXES]):
                     yield lit.Test.Test(testSuite, pathInSuite + (filename,), localConfig)
 
     def _checkSubstitutions(self, substitutions):
@@ -136,7 +136,7 @@ def execute(self, test, litConfig):
         if '-fmodules' in test.config.available_features and self._disableWithModules(test, litConfig):
             return lit.Test.Result(lit.Test.UNSUPPORTED, 'Test {} is unsupported when modules are enabled')
 
-        if filename.endswith('.sh.cpp') or filename.endswith('.sh.s'):
+        if re.search('[.]sh[.][^.]+$', filename):
             steps = [ ] # The steps are already in the script
             return self._executeShTest(test, litConfig, steps)
         elif filename.endswith('.compile.pass.cpp'):

From d9e81aab103a63292d122d36ca5c87181254d384 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Thu, 16 Apr 2020 14:09:20 +0100
Subject: [PATCH 044/216] WasmEHFuncInfo.h - reduce
 BasicBlock.h/MachineBasicBlock.h includes to just forward declarations. NFC.

---
 llvm/include/llvm/CodeGen/WasmEHFuncInfo.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/WasmEHFuncInfo.h b/llvm/include/llvm/CodeGen/WasmEHFuncInfo.h
index 887a1467b3e4..41f8856f31f2 100644
--- a/llvm/include/llvm/CodeGen/WasmEHFuncInfo.h
+++ b/llvm/include/llvm/CodeGen/WasmEHFuncInfo.h
@@ -15,11 +15,13 @@
 
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/PointerUnion.h"
-#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/IR/BasicBlock.h"
 
 namespace llvm {
 
+class BasicBlock;
+class Function;
+class MachineBasicBlock;
+
 enum EventTag { CPP_EXCEPTION = 0, C_LONGJMP = 1 };
 
 using BBOrMBB = PointerUnion<const BasicBlock *, MachineBasicBlock *>;

From 1cbd6a58880e40647462739907c6c86395dec6c0 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Thu, 16 Apr 2020 14:55:39 +0100
Subject: [PATCH 045/216] MCObjectWriter.h - remove unnecessary includes. NFC

The EndianStream.h/raw_ostream.h headers should be removed as well but we have a lot of other files that are implicitly relying on them being present.
---
 llvm/include/llvm/MC/MCObjectWriter.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/llvm/include/llvm/MC/MCObjectWriter.h b/llvm/include/llvm/MC/MCObjectWriter.h
index 2547b2b7c9c1..0d742019321c 100644
--- a/llvm/include/llvm/MC/MCObjectWriter.h
+++ b/llvm/include/llvm/MC/MCObjectWriter.h
@@ -9,13 +9,10 @@
 #ifndef LLVM_MC_MCOBJECTWRITER_H
 #define LLVM_MC_MCOBJECTWRITER_H
 
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/EndianStream.h"
 #include "llvm/Support/raw_ostream.h"
-#include <cassert>
 #include <cstdint>
 
 namespace llvm {

From 5d3a400463c1586f17bd190639142ec468c62396 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Thu, 16 Apr 2020 14:59:31 +0100
Subject: [PATCH 046/216] AntiDepBreaker.h - remove unused MachineOperand.h
 include. NFC.

---
 llvm/include/llvm/CodeGen/AntiDepBreaker.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/include/llvm/CodeGen/AntiDepBreaker.h b/llvm/include/llvm/CodeGen/AntiDepBreaker.h
index 813a3c5a35e7..d9dfcd34ded1 100644
--- a/llvm/include/llvm/CodeGen/AntiDepBreaker.h
+++ b/llvm/include/llvm/CodeGen/AntiDepBreaker.h
@@ -17,7 +17,6 @@
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineInstr.h"
-#include "llvm/CodeGen/MachineOperand.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/Support/Compiler.h"

From e033ec291a1b72f307ab14569ca99822c127610b Mon Sep 17 00:00:00 2001
From: Gabor Marton <gabor.marton@ericsson.com>
Date: Thu, 16 Apr 2020 15:48:13 +0200
Subject: [PATCH 047/216] [ASTImporter] Fix bug introduced in 2ba4e3a4598b

2ba4e3a4598b (Move FPFeatures from BinaryOperator bitfields to Trailing
storage, D76384) introduced an assertion failure during CTU analysis.
The reason is that in ASTNodeImporter::VisitCompoundAssignOperator the
LHSType and the ResultType have been imported twice.

Details:
clang: ../../git/llvm-project/clang/lib/Basic/SourceManager.cpp:918: clang::FileID clang::SourceManager::getFileIDLoaded(unsigned int) const: Assertion `0 && "Invalid SLocOffset or bad function choice"' failed.
clang::SourceManager::getDecomposedExpansionLoc(clang::SourceLocation) const
clang::SourceManager::getPresumedLoc(clang::SourceLocation, bool) const
clang::ASTImporter::Import(clang::SourceLocation)
llvm::Error clang::ASTImporter::importInto<clang::SourceLocation>(clang::SourceLocation&, clang::SourceLocation const&)
clang::ASTNodeImporter::ImportDeclParts(clang::NamedDecl*, clang::DeclContext*&, clang::DeclContext*&, clang::DeclarationName&, clang::NamedDecl*&, clang::SourceLocation&)
clang::ASTNodeImporter::VisitRecordDecl(clang::RecordDecl*)
clang::declvisitor::Base<std::add_pointer, clang::ASTNodeImporter, llvm::Expected<clang::Decl*> >::Visit(clang::Decl*)
clang::ASTImporter::Import(clang::Decl*)
clang::ASTNodeImporter::VisitRecordType(clang::RecordType const*)
clang::TypeVisitor<clang::ASTNodeImporter, llvm::Expected<clang::QualType> >::Visit(clang::Type const*)
clang::ASTImporter::Import(clang::QualType)
clang::ASTNodeImporter::VisitElaboratedType(clang::ElaboratedType const*)
clang::TypeVisitor<clang::ASTNodeImporter, llvm::Expected<clang::QualType> >::Visit(clang::Type const*)
clang::ASTImporter::Import(clang::QualType)
clang::ASTNodeImporter::VisitPointerType(clang::PointerType const*)
clang::TypeVisitor<clang::ASTNodeImporter, llvm::Expected<clang::QualType> >::Visit(clang::Type const*)
clang::ASTImporter::Import(clang::QualType)
clang::QualType clang::ASTNodeImporter::importChecked<clang::QualType>(llvm::Error&, clang::QualType const&)
clang::ASTNodeImporter::VisitCompoundAssignOperator(clang::CompoundAssignOperator*)
---
 clang/lib/AST/ASTImporter.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp
index 5cdf1de4c96a..afd35e0137b6 100644
--- a/clang/lib/AST/ASTImporter.cpp
+++ b/clang/lib/AST/ASTImporter.cpp
@@ -6818,8 +6818,7 @@ ASTNodeImporter::VisitCompoundAssignOperator(CompoundAssignOperator *E) {
       Importer.getToContext(), ToLHS, ToRHS, E->getOpcode(), ToType,
       E->getValueKind(), E->getObjectKind(), ToOperatorLoc,
       E->getFPFeatures(Importer.getFromContext().getLangOpts()),
-      importChecked(Err, ToComputationLHSType),
-      importChecked(Err, ToComputationResultType));
+      ToComputationLHSType, ToComputationResultType);
 }
 
 Expected<CXXCastPath>

From f701d8fa5ff939235dc5cc2a3340bef021c07423 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Thu, 16 Apr 2020 15:18:10 +0100
Subject: [PATCH 048/216] MCValue.h - cleanup include and forward declaration.
 NFC. Remove MCSymbol.h include Remove unused MCAsmInfo forward declaration

---
 llvm/include/llvm/MC/MCValue.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/llvm/include/llvm/MC/MCValue.h b/llvm/include/llvm/MC/MCValue.h
index 0be7ce7055c5..37feee4c9ea8 100644
--- a/llvm/include/llvm/MC/MCValue.h
+++ b/llvm/include/llvm/MC/MCValue.h
@@ -14,12 +14,10 @@
 #define LLVM_MC_MCVALUE_H
 
 #include "llvm/MC/MCExpr.h"
-#include "llvm/MC/MCSymbol.h"
 #include "llvm/Support/DataTypes.h"
 #include <cassert>
 
 namespace llvm {
-class MCAsmInfo;
 class raw_ostream;
 
 /// This represents an "assembler immediate".

From 30d5946db95fa465d7ee6caceb2b1ff191e3727c Mon Sep 17 00:00:00 2001
From: Dmitry Polukhin <dmitry.polukhin@gmail.com>
Date: Sat, 21 Mar 2020 13:52:51 -0700
Subject: [PATCH 049/216] [clang][AST] Support AST files larger than 512M
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Summary:
Clang uses 32-bit integers for storing bit offsets from the beginning of
the file that results in 512M limit on AST file. This diff replaces
absolute offsets with relative offsets from the beginning of
corresponding data structure when it is possible. And uses 64-bit
offsets for DeclOffests and TypeOffssts because these coder AST
section may easily exceeds 512M alone.

This diff breaks AST file format compatibility so VERSION_MAJOR bumped.

Test Plan:
Existing clang AST serialization tests
Tested on clangd with ~700M and ~900M preamble files

Reviewers: rsmith, dexonsmith

Subscribers: ilya-biryukov, kadircet, usaxena95, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D76594
---
 .../include/clang/Serialization/ASTBitCodes.h | 27 +++++++++---
 clang/include/clang/Serialization/ASTReader.h |  7 ++--
 clang/include/clang/Serialization/ASTWriter.h | 10 +++--
 .../include/clang/Serialization/ModuleFile.h  | 10 ++++-
 clang/lib/Serialization/ASTReader.cpp         | 20 +++++----
 clang/lib/Serialization/ASTReaderDecl.cpp     |  2 +-
 clang/lib/Serialization/ASTWriter.cpp         | 41 ++++++++++++-------
 clang/lib/Serialization/ASTWriterDecl.cpp     |  4 +-
 8 files changed, 81 insertions(+), 40 deletions(-)

diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
index 323edfbf8126..198d8e3b4fed 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -41,7 +41,7 @@ namespace serialization {
     /// Version 4 of AST files also requires that the version control branch and
     /// revision match exactly, since there is no backward compatibility of
     /// AST files at this time.
-    const unsigned VERSION_MAJOR = 9;
+    const unsigned VERSION_MAJOR = 10;
 
     /// AST file minor version number supported by this version of
     /// Clang.
@@ -181,7 +181,7 @@ namespace serialization {
       /// Raw source location of end of range.
       unsigned End;
 
-      /// Offset in the AST file.
+      /// Offset in the AST file relative to ModuleFile::MacroOffsetsBase.
       uint32_t BitOffset;
 
       PPEntityOffset(SourceRange R, uint32_t BitOffset)
@@ -221,12 +221,18 @@ namespace serialization {
       /// Raw source location.
       unsigned Loc = 0;
 
-      /// Offset in the AST file.
-      uint32_t BitOffset = 0;
+      /// Offset in the AST file. Split 64-bit integer into low/high parts
+      /// to keep structure alignment 32-bit and don't have padding gap.
+      /// This structure is serialized "as is" to the AST file and undefined
+      /// value in the padding affects AST hash.
+      uint32_t BitOffsetLow = 0;
+      uint32_t BitOffsetHigh = 0;
 
       DeclOffset() = default;
-      DeclOffset(SourceLocation Loc, uint32_t BitOffset)
-        : Loc(Loc.getRawEncoding()), BitOffset(BitOffset) {}
+      DeclOffset(SourceLocation Loc, uint64_t BitOffset) {
+        setLocation(Loc);
+        setBitOffset(BitOffset);
+      }
 
       void setLocation(SourceLocation L) {
         Loc = L.getRawEncoding();
@@ -235,6 +241,15 @@ namespace serialization {
       SourceLocation getLocation() const {
         return SourceLocation::getFromRawEncoding(Loc);
       }
+
+      void setBitOffset(uint64_t Offset) {
+        BitOffsetLow = Offset;
+        BitOffsetHigh = Offset >> 32;
+      }
+
+      uint64_t getBitOffset() const {
+        return BitOffsetLow | (uint64_t(BitOffsetHigh) << 32);
+      }
     };
 
     /// The number of predefined preprocessed entity IDs.
diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h
index 94645fff9f93..11a537fad5d5 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -723,9 +723,10 @@ class ASTReader
 
   struct PendingMacroInfo {
     ModuleFile *M;
-    uint64_t MacroDirectivesOffset;
+    /// Offset relative to ModuleFile::MacroOffsetsBase.
+    uint32_t MacroDirectivesOffset;
 
-    PendingMacroInfo(ModuleFile *M, uint64_t MacroDirectivesOffset)
+    PendingMacroInfo(ModuleFile *M, uint32_t MacroDirectivesOffset)
         : M(M), MacroDirectivesOffset(MacroDirectivesOffset) {}
   };
 
@@ -2205,7 +2206,7 @@ class ASTReader
   /// \param MacroDirectivesOffset Offset of the serialized macro directive
   /// history.
   void addPendingMacro(IdentifierInfo *II, ModuleFile *M,
-                       uint64_t MacroDirectivesOffset);
+                       uint32_t MacroDirectivesOffset);
 
   /// Read the set of macros defined by this external macro source.
   void ReadDefinedMacros() override;
diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h
index c0a943adf2c7..9413a8d50446 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -243,7 +243,7 @@ class ASTWriter : public ASTDeserializationListener,
 
   /// Offset of each type in the bitstream, indexed by
   /// the type's ID.
-  std::vector<uint32_t> TypeOffsets;
+  std::vector<uint64_t> TypeOffsets;
 
   /// The first ID number we can use for our own identifiers.
   serialization::IdentID FirstIdentID = serialization::NUM_PREDEF_IDENT_IDS;
@@ -277,7 +277,8 @@ class ASTWriter : public ASTDeserializationListener,
   /// The macro infos to emit.
   std::vector<MacroInfoToEmitData> MacroInfosToEmit;
 
-  llvm::DenseMap<const IdentifierInfo *, uint64_t> IdentMacroDirectivesOffsetMap;
+  llvm::DenseMap<const IdentifierInfo *, uint32_t>
+      IdentMacroDirectivesOffsetMap;
 
   /// @name FlushStmt Caches
   /// @{
@@ -464,7 +465,8 @@ class ASTWriter : public ASTDeserializationListener,
                                const Preprocessor &PP);
   void WritePreprocessor(const Preprocessor &PP, bool IsModule);
   void WriteHeaderSearch(const HeaderSearch &HS);
-  void WritePreprocessorDetail(PreprocessingRecord &PPRec);
+  void WritePreprocessorDetail(PreprocessingRecord &PPRec,
+                               uint64_t MacroOffsetsBase);
   void WriteSubmodules(Module *WritingModule);
 
   void WritePragmaDiagnosticMappings(const DiagnosticsEngine &Diag,
@@ -588,7 +590,7 @@ class ASTWriter : public ASTDeserializationListener,
   /// Determine the ID of an already-emitted macro.
   serialization::MacroID getMacroID(MacroInfo *MI);
 
-  uint64_t getMacroDirectivesOffset(const IdentifierInfo *Name);
+  uint32_t getMacroDirectivesOffset(const IdentifierInfo *Name);
 
   /// Emit a reference to a type.
   void AddTypeRef(QualType T, RecordDataImpl &Record);
diff --git a/clang/include/clang/Serialization/ModuleFile.h b/clang/include/clang/Serialization/ModuleFile.h
index 90d2745e080c..98d7f46fd8f8 100644
--- a/clang/include/clang/Serialization/ModuleFile.h
+++ b/clang/include/clang/Serialization/ModuleFile.h
@@ -251,6 +251,10 @@ class ModuleFile {
   /// The base offset in the source manager's view of this module.
   unsigned SLocEntryBaseOffset = 0;
 
+  /// Base file offset for the offsets in SLocEntryOffsets. Real file offset
+  /// for the entry is SLocEntryOffsetsBase + SLocEntryOffsets[i].
+  uint64_t SLocEntryOffsetsBase = 0;
+
   /// Offsets for all of the source location entries in the
   /// AST file.
   const uint32_t *SLocEntryOffsets = nullptr;
@@ -302,6 +306,10 @@ class ModuleFile {
   /// The number of macros in this AST file.
   unsigned LocalNumMacros = 0;
 
+  /// Base file offset for the offsets in MacroOffsets. Real file offset for
+  /// the entry is MacroOffsetsBase + MacroOffsets[i].
+  uint64_t MacroOffsetsBase = 0;
+
   /// Offsets of macros in the preprocessor block.
   ///
   /// This array is indexed by the macro ID (-1), and provides
@@ -450,7 +458,7 @@ class ModuleFile {
 
   /// Offset of each type within the bitstream, indexed by the
   /// type ID, or the representation of a Type*.
-  const uint32_t *TypeOffsets = nullptr;
+  const uint64_t *TypeOffsets = nullptr;
 
   /// Base type ID for types local to this module as represented in
   /// the global type ID space.
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 2afd2b764fa6..cc69488c4f64 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -1470,6 +1470,7 @@ bool ASTReader::ReadSLocEntry(int ID) {
 
   ModuleFile *F = GlobalSLocEntryMap.find(-ID)->second;
   if (llvm::Error Err = F->SLocEntryCursor.JumpToBit(
+          F->SLocEntryOffsetsBase +
           F->SLocEntryOffsets[ID - F->SLocEntryBaseID])) {
     Error(std::move(Err));
     return true;
@@ -1932,9 +1933,8 @@ HeaderFileInfoTrait::ReadData(internal_key_ref key, const unsigned char *d,
   return HFI;
 }
 
-void ASTReader::addPendingMacro(IdentifierInfo *II,
-                                ModuleFile *M,
-                                uint64_t MacroDirectivesOffset) {
+void ASTReader::addPendingMacro(IdentifierInfo *II, ModuleFile *M,
+                                uint32_t MacroDirectivesOffset) {
   assert(NumCurrentElementsDeserializing > 0 &&"Missing deserialization guard");
   PendingMacroIDs[II].push_back(PendingMacroInfo(M, MacroDirectivesOffset));
 }
@@ -2099,7 +2099,8 @@ void ASTReader::resolvePendingMacro(IdentifierInfo *II,
 
   BitstreamCursor &Cursor = M.MacroCursor;
   SavedStreamPosition SavedPosition(Cursor);
-  if (llvm::Error Err = Cursor.JumpToBit(PMInfo.MacroDirectivesOffset)) {
+  if (llvm::Error Err =
+          Cursor.JumpToBit(M.MacroOffsetsBase + PMInfo.MacroDirectivesOffset)) {
     Error(std::move(Err));
     return;
   }
@@ -3098,7 +3099,7 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
         Error("duplicate TYPE_OFFSET record in AST file");
         return Failure;
       }
-      F.TypeOffsets = (const uint32_t *)Blob.data();
+      F.TypeOffsets = reinterpret_cast<const uint64_t *>(Blob.data());
       F.LocalNumTypes = Record[0];
       unsigned LocalBaseTypeIndex = Record[1];
       F.BaseTypeIndex = getTotalNumTypes();
@@ -3376,6 +3377,7 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
       F.SLocEntryOffsets = (const uint32_t *)Blob.data();
       F.LocalNumSLocEntries = Record[0];
       unsigned SLocSpaceSize = Record[1];
+      F.SLocEntryOffsetsBase = Record[2];
       std::tie(F.SLocEntryBaseID, F.SLocEntryBaseOffset) =
           SourceMgr.AllocateLoadedSLocEntries(F.LocalNumSLocEntries,
                                               SLocSpaceSize);
@@ -3694,6 +3696,7 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
       F.MacroOffsets = (const uint32_t *)Blob.data();
       F.LocalNumMacros = Record[0];
       unsigned LocalBaseMacroID = Record[1];
+      F.MacroOffsetsBase = Record[2];
       F.BaseMacroID = getTotalNumMacros();
 
       if (F.LocalNumMacros > 0) {
@@ -5907,8 +5910,8 @@ PreprocessedEntity *ASTReader::ReadPreprocessedEntity(unsigned Index) {
   }
 
   SavedStreamPosition SavedPosition(M.PreprocessorDetailCursor);
-  if (llvm::Error Err =
-          M.PreprocessorDetailCursor.JumpToBit(PPOffs.BitOffset)) {
+  if (llvm::Error Err = M.PreprocessorDetailCursor.JumpToBit(
+          M.MacroOffsetsBase + PPOffs.BitOffset)) {
     Error(std::move(Err));
     return nullptr;
   }
@@ -8427,7 +8430,8 @@ MacroInfo *ASTReader::getMacro(MacroID ID) {
     assert(I != GlobalMacroMap.end() && "Corrupted global macro map");
     ModuleFile *M = I->second;
     unsigned Index = ID - M->BaseMacroID;
-    MacrosLoaded[ID] = ReadMacroRecord(*M, M->MacroOffsets[Index]);
+    MacrosLoaded[ID] =
+        ReadMacroRecord(*M, M->MacroOffsetsBase + M->MacroOffsets[Index]);
 
     if (DeserializationListener)
       DeserializationListener->MacroRead(ID + NUM_PREDEF_MACRO_IDS,
diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp
index fce4be133220..0a278c7506e1 100644
--- a/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -2870,7 +2870,7 @@ ASTReader::DeclCursorForID(DeclID ID, SourceLocation &Loc) {
   const DeclOffset &DOffs =
       M->DeclOffsets[ID - M->BaseDeclID - NUM_PREDEF_DECL_IDS];
   Loc = TranslateSourceLocation(*M, DOffs.getLocation());
-  return RecordLocation(M, DOffs.BitOffset);
+  return RecordLocation(M, DOffs.getBitOffset());
 }
 
 ASTReader::RecordLocation ASTReader::getLocalBitOffset(uint64_t GlobalOffset) {
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 7f59a2e8695b..05ec8feffb26 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -1893,6 +1893,7 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr,
   // Write out the source location entry table. We skip the first
   // entry, which is always the same dummy entry.
   std::vector<uint32_t> SLocEntryOffsets;
+  uint64_t SLocEntryOffsetsBase = Stream.GetCurrentBitNo();
   RecordData PreloadSLocs;
   SLocEntryOffsets.reserve(SourceMgr.local_sloc_entry_size() - 1);
   for (unsigned I = 1, N = SourceMgr.local_sloc_entry_size();
@@ -1903,7 +1904,9 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr,
     assert(&SourceMgr.getSLocEntry(FID) == SLoc);
 
     // Record the offset of this source-location entry.
-    SLocEntryOffsets.push_back(Stream.GetCurrentBitNo());
+    uint64_t Offset = Stream.GetCurrentBitNo() - SLocEntryOffsetsBase;
+    assert((Offset >> 32) == 0 && "SLocEntry offset too large");
+    SLocEntryOffsets.push_back(Offset);
 
     // Figure out which record code to use.
     unsigned Code;
@@ -2011,12 +2014,14 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr,
   Abbrev->Add(BitCodeAbbrevOp(SOURCE_LOCATION_OFFSETS));
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 16)); // # of slocs
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 16)); // total size
+  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 32)); // base offset
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // offsets
   unsigned SLocOffsetsAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
   {
     RecordData::value_type Record[] = {
         SOURCE_LOCATION_OFFSETS, SLocEntryOffsets.size(),
-        SourceMgr.getNextLocalOffset() - 1 /* skip dummy */};
+        SourceMgr.getNextLocalOffset() - 1 /* skip dummy */,
+        SLocEntryOffsetsBase};
     Stream.EmitRecordWithBlob(SLocOffsetsAbbrev, Record,
                               bytes(SLocEntryOffsets));
   }
@@ -2093,9 +2098,11 @@ static bool shouldIgnoreMacro(MacroDirective *MD, bool IsModule,
 /// Writes the block containing the serialized form of the
 /// preprocessor.
 void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) {
+  uint64_t MacroOffsetsBase = Stream.GetCurrentBitNo();
+
   PreprocessingRecord *PPRec = PP.getPreprocessingRecord();
   if (PPRec)
-    WritePreprocessorDetail(*PPRec);
+    WritePreprocessorDetail(*PPRec, MacroOffsetsBase);
 
   RecordData Record;
   RecordData ModuleMacroRecord;
@@ -2156,7 +2163,8 @@ void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) {
   // identifier they belong to.
   for (const IdentifierInfo *Name : MacroIdentifiers) {
     MacroDirective *MD = PP.getLocalMacroDirectiveHistory(Name);
-    auto StartOffset = Stream.GetCurrentBitNo();
+    uint64_t StartOffset = Stream.GetCurrentBitNo() - MacroOffsetsBase;
+    assert((StartOffset >> 32) == 0 && "Macro identifiers offset too large");
 
     // Emit the macro directives in reverse source order.
     for (; MD; MD = MD->getPrevious()) {
@@ -2229,14 +2237,12 @@ void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) {
 
     // Record the local offset of this macro.
     unsigned Index = ID - FirstMacroID;
-    if (Index == MacroOffsets.size())
-      MacroOffsets.push_back(Stream.GetCurrentBitNo());
-    else {
-      if (Index > MacroOffsets.size())
-        MacroOffsets.resize(Index + 1);
+    if (Index >= MacroOffsets.size())
+      MacroOffsets.resize(Index + 1);
 
-      MacroOffsets[Index] = Stream.GetCurrentBitNo();
-    }
+    uint64_t Offset = Stream.GetCurrentBitNo() - MacroOffsetsBase;
+    assert((Offset >> 32) == 0 && "Macro offset too large");
+    MacroOffsets[Index] = Offset;
 
     AddIdentifierRef(Name, Record);
     AddSourceLocation(MI->getDefinitionLoc(), Record);
@@ -2287,17 +2293,20 @@ void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) {
   Abbrev->Add(BitCodeAbbrevOp(MACRO_OFFSET));
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // # of macros
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // first ID
+  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 32));   // base offset
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
 
   unsigned MacroOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
   {
     RecordData::value_type Record[] = {MACRO_OFFSET, MacroOffsets.size(),
-                                       FirstMacroID - NUM_PREDEF_MACRO_IDS};
+                                       FirstMacroID - NUM_PREDEF_MACRO_IDS,
+                                       MacroOffsetsBase};
     Stream.EmitRecordWithBlob(MacroOffsetAbbrev, Record, bytes(MacroOffsets));
   }
 }
 
-void ASTWriter::WritePreprocessorDetail(PreprocessingRecord &PPRec) {
+void ASTWriter::WritePreprocessorDetail(PreprocessingRecord &PPRec,
+                                        uint64_t MacroOffsetsBase) {
   if (PPRec.local_begin() == PPRec.local_end())
     return;
 
@@ -2334,8 +2343,10 @@ void ASTWriter::WritePreprocessorDetail(PreprocessingRecord &PPRec) {
        (void)++E, ++NumPreprocessingRecords, ++NextPreprocessorEntityID) {
     Record.clear();
 
+    uint64_t Offset = Stream.GetCurrentBitNo() - MacroOffsetsBase;
+    assert((Offset >> 32) == 0 && "Preprocessed entity offset too large");
     PreprocessedEntityOffsets.push_back(
-        PPEntityOffset((*E)->getSourceRange(), Stream.GetCurrentBitNo()));
+        PPEntityOffset((*E)->getSourceRange(), Offset));
 
     if (auto *MD = dyn_cast<MacroDefinitionRecord>(*E)) {
       // Record this macro definition's ID.
@@ -5144,7 +5155,7 @@ MacroID ASTWriter::getMacroID(MacroInfo *MI) {
   return MacroIDs[MI];
 }
 
-uint64_t ASTWriter::getMacroDirectivesOffset(const IdentifierInfo *Name) {
+uint32_t ASTWriter::getMacroDirectivesOffset(const IdentifierInfo *Name) {
   return IdentMacroDirectivesOffsetMap.lookup(Name);
 }
 
diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp
index e847180435ec..8c5be6cacac0 100644
--- a/clang/lib/Serialization/ASTWriterDecl.cpp
+++ b/clang/lib/Serialization/ASTWriterDecl.cpp
@@ -2434,12 +2434,12 @@ void ASTWriter::WriteDecl(ASTContext &Context, Decl *D) {
   SourceLocation Loc = D->getLocation();
   unsigned Index = ID - FirstDeclID;
   if (DeclOffsets.size() == Index)
-    DeclOffsets.push_back(DeclOffset(Loc, Offset));
+    DeclOffsets.emplace_back(Loc, Offset);
   else if (DeclOffsets.size() < Index) {
     // FIXME: Can/should this happen?
     DeclOffsets.resize(Index+1);
     DeclOffsets[Index].setLocation(Loc);
-    DeclOffsets[Index].BitOffset = Offset;
+    DeclOffsets[Index].setBitOffset(Offset);
   } else {
     llvm_unreachable("declarations should be emitted in ID order");
   }

From 2d6b9dbfef55364fc762682cd8ab93045582944a Mon Sep 17 00:00:00 2001
From: Mehdi Chinoune <chinoune.mehdi@hotmail.com>
Date: Thu, 16 Apr 2020 13:34:17 +0100
Subject: [PATCH 050/216] [flang] Use the Flang cmake-functions to add targets.

Summary: It also removes the cycle-dependency between FortranSemantics and FortranEvaluate.

Reviewers: #flang, jdoerfert, sscalpone

Reviewed By: #flang, sscalpone

Subscribers: DavidTruby, schweitz, tskeith, mgorny, aartbik, llvm-commits

Tags: #flang, #llvm

Differential Revision: https://reviews.llvm.org/D78215
---
 flang/cmake/modules/AddFlang.cmake         |  1 -
 flang/lib/Common/CMakeLists.txt            | 11 +++--------
 flang/lib/Decimal/CMakeLists.txt           | 10 +---------
 flang/lib/Evaluate/CMakeLists.txt          | 14 ++------------
 flang/lib/Lower/CMakeLists.txt             | 14 ++++----------
 flang/lib/Optimizer/Dialect/CMakeLists.txt | 20 +++++++++-----------
 flang/lib/Optimizer/Support/CMakeLists.txt | 12 +++++-------
 flang/lib/Parser/CMakeLists.txt            | 15 ++++-----------
 flang/lib/Semantics/CMakeLists.txt         | 15 ++++-----------
 flang/runtime/CMakeLists.txt               |  9 ++-------
 flang/tools/f18-parse-demo/CMakeLists.txt  |  8 ++------
 flang/tools/f18/CMakeLists.txt             |  4 +---
 flang/tools/tco/CMakeLists.txt             |  3 +--
 13 files changed, 38 insertions(+), 98 deletions(-)

diff --git a/flang/cmake/modules/AddFlang.cmake b/flang/cmake/modules/AddFlang.cmake
index 84610a633a04..7fe8b7e9f406 100644
--- a/flang/cmake/modules/AddFlang.cmake
+++ b/flang/cmake/modules/AddFlang.cmake
@@ -109,7 +109,6 @@ macro(add_flang_tool name)
   endif()
 
   add_flang_executable(${name} ${ARGN})
-  add_dependencies(${name} flang-resource-headers)
 
   if (FLANG_BUILD_TOOLS)
     set(export_to_flangtargets)
diff --git a/flang/lib/Common/CMakeLists.txt b/flang/lib/Common/CMakeLists.txt
index f1be58f0e6d0..7865eb582307 100644
--- a/flang/lib/Common/CMakeLists.txt
+++ b/flang/lib/Common/CMakeLists.txt
@@ -1,15 +1,10 @@
 
-add_library(FortranCommon
+add_flang_library(FortranCommon
   Fortran.cpp
   Fortran-features.cpp
   default-kinds.cpp
   idioms.cpp
-)
-
-target_compile_features(FortranCommon PUBLIC cxx_std_17)
 
-install (TARGETS FortranCommon
-  ARCHIVE DESTINATION lib
-  LIBRARY DESTINATION lib
-  RUNTIME DESTINATION bin
+  LINK_COMPONENTS
+  Support
 )
diff --git a/flang/lib/Decimal/CMakeLists.txt b/flang/lib/Decimal/CMakeLists.txt
index 92f87621fc05..18655f096f0d 100644
--- a/flang/lib/Decimal/CMakeLists.txt
+++ b/flang/lib/Decimal/CMakeLists.txt
@@ -1,13 +1,5 @@
 
-add_library(FortranDecimal
+add_flang_library(FortranDecimal
   binary-to-decimal.cpp
   decimal-to-binary.cpp
 )
-
-target_compile_features(FortranDecimal PUBLIC cxx_std_17)
-
-install (TARGETS FortranDecimal
-  ARCHIVE DESTINATION lib
-  LIBRARY DESTINATION lib
-  RUNTIME DESTINATION bin
-)
diff --git a/flang/lib/Evaluate/CMakeLists.txt b/flang/lib/Evaluate/CMakeLists.txt
index a508bec10a63..8426995d948b 100644
--- a/flang/lib/Evaluate/CMakeLists.txt
+++ b/flang/lib/Evaluate/CMakeLists.txt
@@ -1,5 +1,5 @@
 
-add_library(FortranEvaluate
+add_flang_library(FortranEvaluate
   call.cpp
   characteristics.cpp
   check-expression.cpp
@@ -25,23 +25,13 @@ add_library(FortranEvaluate
   tools.cpp
   type.cpp
   variable.cpp
-)
-
-target_compile_features(FortranEvaluate PUBLIC cxx_std_17)
 
-target_link_libraries(FortranEvaluate
+  LINK_LIBS
   FortranCommon
   FortranDecimal
-  FortranSemantics
   FortranParser
 )
 
-install (TARGETS FortranEvaluate
-  ARCHIVE DESTINATION lib
-  LIBRARY DESTINATION lib
-  RUNTIME DESTINATION bin
-)
-
 if (LIBPGMATH_DIR)
   # If pgmath library is found, it can be used for constant folding.
   find_library(LIBPGMATH pgmath PATHS ${LIBPGMATH_DIR})
diff --git a/flang/lib/Lower/CMakeLists.txt b/flang/lib/Lower/CMakeLists.txt
index 87131cd9fa53..6cbcfc3b630e 100644
--- a/flang/lib/Lower/CMakeLists.txt
+++ b/flang/lib/Lower/CMakeLists.txt
@@ -1,13 +1,7 @@
-add_library(FortranLower
-  PFTBuilder.cpp
-)
 
-target_link_libraries(FortranLower
-  LLVMSupport
-)
+add_flang_library(FortranLower
+  PFTBuilder.cpp
 
-install (TARGETS FortranLower
-  ARCHIVE DESTINATION lib
-  LIBRARY DESTINATION lib
-  RUNTIME DESTINATION bin
+  LINK_COMPONENTS
+  Support
 )
diff --git a/flang/lib/Optimizer/Dialect/CMakeLists.txt b/flang/lib/Optimizer/Dialect/CMakeLists.txt
index 711fc64c5a2a..462039ebf6a3 100644
--- a/flang/lib/Optimizer/Dialect/CMakeLists.txt
+++ b/flang/lib/Optimizer/Dialect/CMakeLists.txt
@@ -1,13 +1,14 @@
-add_llvm_library(FIRDialect
+add_flang_library(FIRDialect
   FIRAttr.cpp
   FIRDialect.cpp
   FIROps.cpp
   FIRType.cpp
-)
 
-add_dependencies(FIRDialect FIROpsIncGen)
+  DEPENDS
+  FIROpsIncGen
 
-target_link_libraries(FIRDialect
+  LINK_LIBS
+  FIRSupport
   MLIRTargetLLVMIR
   MLIRTargetLLVMIRModuleTranslation
   MLIREDSC
@@ -16,12 +17,9 @@ target_link_libraries(FIRDialect
   MLIRSupport
   MLIRStandardToLLVM
   MLIRTransforms
-  LLVMAsmParser
-  LLVMAsmPrinter
-  LLVMRemarks
-)
 
-install (TARGETS FIRDialect
-  ARCHIVE DESTINATION lib
-  LIBRARY DESTINATION lib
+  LINK_COMPONENTS
+  AsmParser
+  AsmPrinter
+  Remarks
 )
diff --git a/flang/lib/Optimizer/Support/CMakeLists.txt b/flang/lib/Optimizer/Support/CMakeLists.txt
index 88a1fc78a5f6..d8e270f5e73e 100644
--- a/flang/lib/Optimizer/Support/CMakeLists.txt
+++ b/flang/lib/Optimizer/Support/CMakeLists.txt
@@ -1,10 +1,8 @@
-add_llvm_library(FIRSupport
-  KindMapping.cpp
-)
+get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
 
-target_link_libraries(FIRSupport FIRDialect)
+add_flang_library(FIRSupport
+  KindMapping.cpp
 
-install (TARGETS FIRSupport
-  ARCHIVE DESTINATION lib
-  LIBRARY DESTINATION lib
+  LINK_LIBS
+  ${dialect_libs}
 )
diff --git a/flang/lib/Parser/CMakeLists.txt b/flang/lib/Parser/CMakeLists.txt
index 9dc6480a2e9d..cc691de35b48 100644
--- a/flang/lib/Parser/CMakeLists.txt
+++ b/flang/lib/Parser/CMakeLists.txt
@@ -1,5 +1,5 @@
 
-add_library(FortranParser
+add_flang_library(FortranParser
   Fortran-parsers.cpp
   char-buffer.cpp
   char-block.cpp
@@ -23,17 +23,10 @@ add_library(FortranParser
   tools.cpp
   unparse.cpp
   user-state.cpp
-)
-
-target_compile_features(FortranParser PRIVATE cxx_std_17)
 
-target_link_libraries(FortranParser
+  LINK_LIBS
   FortranCommon
-  LLVMSupport
-)
 
-install (TARGETS FortranParser
-  ARCHIVE DESTINATION lib
-  LIBRARY DESTINATION lib
-  RUNTIME DESTINATION bin
+  LINK_COMPONENTS
+  Support
 )
diff --git a/flang/lib/Semantics/CMakeLists.txt b/flang/lib/Semantics/CMakeLists.txt
index feedbab17860..04c9cf8b31fc 100644
--- a/flang/lib/Semantics/CMakeLists.txt
+++ b/flang/lib/Semantics/CMakeLists.txt
@@ -1,5 +1,5 @@
 
-add_library(FortranSemantics
+add_flang_library(FortranSemantics
   assignment.cpp
   attr.cpp
   canonicalize-do.cpp
@@ -35,18 +35,11 @@ add_library(FortranSemantics
   tools.cpp
   type.cpp
   unparse-with-symbols.cpp
-)
-
-target_compile_features(FortranSemantics PUBLIC cxx_std_17)
 
-target_link_libraries(FortranSemantics
+  LINK_LIBS
   FortranCommon
   FortranEvaluate
-  LLVMSupport
-)
 
-install (TARGETS FortranSemantics
-  ARCHIVE DESTINATION lib
-  LIBRARY DESTINATION lib
-  RUNTIME DESTINATION bin
+  LINK_COMPONENTS
+  Support
 )
diff --git a/flang/runtime/CMakeLists.txt b/flang/runtime/CMakeLists.txt
index 9232d8095fc1..7da1a2128292 100644
--- a/flang/runtime/CMakeLists.txt
+++ b/flang/runtime/CMakeLists.txt
@@ -26,7 +26,7 @@ endif()
 
 configure_file(config.h.cmake config.h)
 
-add_library(FortranRuntime
+add_flang_library(FortranRuntime
   ISO_Fortran_binding.cpp
   allocatable.cpp
   buffer.cpp
@@ -53,12 +53,7 @@ add_library(FortranRuntime
   type-code.cpp
   unit.cpp
   unit-map.cpp
-)
-
-target_include_directories(FortranRuntime
-  PRIVATE ${CMAKE_CURRENT_BINARY_DIR}
-)
 
-target_link_libraries(FortranRuntime
+  LINK_LIBS
   FortranDecimal
 )
diff --git a/flang/tools/f18-parse-demo/CMakeLists.txt b/flang/tools/f18-parse-demo/CMakeLists.txt
index fc64a3f0d904..ab13d602542f 100644
--- a/flang/tools/f18-parse-demo/CMakeLists.txt
+++ b/flang/tools/f18-parse-demo/CMakeLists.txt
@@ -1,13 +1,9 @@
-add_llvm_tool(f18-parse-demo
+add_flang_tool(f18-parse-demo
   f18-parse-demo.cpp
   stub-evaluate.cpp
-  )
-set_property(TARGET f18-parse-demo PROPERTY CXX_STANDARD 17)
-target_compile_features(f18-parse-demo PRIVATE cxx_std_17)
+)
 
 target_link_libraries(f18-parse-demo
   PRIVATE
   FortranParser
   )
-
-#install(TARGETS f18-parse-demo DESTINATION bin)
diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt
index 8745f7c1caef..70e09ef55144 100644
--- a/flang/tools/f18/CMakeLists.txt
+++ b/flang/tools/f18/CMakeLists.txt
@@ -1,9 +1,7 @@
-add_llvm_tool(f18
+add_flang_tool(f18
   dump.cpp
   f18.cpp
 )
-set_property(TARGET f18 PROPERTY CXX_STANDARD 17)
-target_compile_features(f18 PRIVATE cxx_std_17)
 
 target_link_libraries(f18
   PRIVATE
diff --git a/flang/tools/tco/CMakeLists.txt b/flang/tools/tco/CMakeLists.txt
index 33f410677705..451952ba5a3b 100644
--- a/flang/tools/tco/CMakeLists.txt
+++ b/flang/tools/tco/CMakeLists.txt
@@ -19,6 +19,5 @@ set(LIBS
   MLIRVectorToLLVM
 )
 
-add_llvm_tool(tco tco.cpp)
-llvm_update_compile_flags(tco)
+add_flang_tool(tco tco.cpp)
 target_link_libraries(tco PRIVATE ${LIBS})

From ea88dd821253103a07f335449416e55034e7d8b3 Mon Sep 17 00:00:00 2001
From: Uday Bondhugula <uday@polymagelabs.com>
Date: Thu, 16 Apr 2020 20:40:38 +0530
Subject: [PATCH 051/216] [MLIR] Fix MLIR build - add missing CMake dependency

This will fix a failure when using a linker sensitive to the order in
which libraries are passed.

Differential Revision: https://reviews.llvm.org/D78303
---
 mlir/lib/Dialect/Affine/Utils/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt b/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt
index 64738c0cf369..ac9dae972488 100644
--- a/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt
+++ b/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt
@@ -8,4 +8,5 @@ add_mlir_dialect_library(MLIRAffineUtils
 target_link_libraries(MLIRAffineUtils
   PUBLIC
   MLIRAffine
+  MLIRTransformUtils
   )

From 44c4ba34d001dcf538d7396007b5611d6f697f86 Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Thu, 16 Apr 2020 15:23:49 +0100
Subject: [PATCH 052/216] [MachineSink] Fix for breaking phi edges with
 instructions with multiple defs

BreakPHIEdge would be set based on whether the instruction needs to
insert a new critical edge to allow sinking into a block where the uses
are PHI nodes. But for instructions with multiple defs it would be reset
on the second def, allowing the instruciton to sink where it should not.

Fixes PR44981

Differential Revision: https://reviews.llvm.org/D78087
---
 llvm/lib/CodeGen/MachineSink.cpp              | 30 +++++-----
 .../test/CodeGen/ARM/machine-sink-multidef.ll | 56 +++++++++++++++++++
 2 files changed, 69 insertions(+), 17 deletions(-)
 create mode 100644 llvm/test/CodeGen/ARM/machine-sink-multidef.ll

diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index ac342babfb60..01a7be47b62e 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -269,30 +269,26 @@ MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
   // into and they are all PHI nodes. In this case, machine-sink must break
   // the critical edge first. e.g.
   //
-  // %bb.1: derived from LLVM BB %bb4.preheader
+  // %bb.1:
   //   Predecessors according to CFG: %bb.0
   //     ...
-  //     %reg16385 = DEC64_32r %reg16437, implicit-def dead %eflags
+  //     %def = DEC64_32r %x, implicit-def dead %eflags
   //     ...
   //     JE_4 <%bb.37>, implicit %eflags
   //   Successors according to CFG: %bb.37 %bb.2
   //
-  // %bb.2: derived from LLVM BB %bb.nph
-  //   Predecessors according to CFG: %bb.0 %bb.1
-  //     %reg16386 = PHI %reg16434, %bb.0, %reg16385, %bb.1
-  BreakPHIEdge = true;
-  for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
-    MachineInstr *UseInst = MO.getParent();
-    unsigned OpNo = &MO - &UseInst->getOperand(0);
-    MachineBasicBlock *UseBlock = UseInst->getParent();
-    if (!(UseBlock == MBB && UseInst->isPHI() &&
-          UseInst->getOperand(OpNo+1).getMBB() == DefMBB)) {
-      BreakPHIEdge = false;
-      break;
-    }
-  }
-  if (BreakPHIEdge)
+  // %bb.2:
+  //     %p = PHI %y, %bb.0, %def, %bb.1
+  if (llvm::all_of(MRI->use_nodbg_operands(Reg), [&](MachineOperand &MO) {
+        MachineInstr *UseInst = MO.getParent();
+        unsigned OpNo = UseInst->getOperandNo(&MO);
+        MachineBasicBlock *UseBlock = UseInst->getParent();
+        return UseBlock == MBB && UseInst->isPHI() &&
+               UseInst->getOperand(OpNo + 1).getMBB() == DefMBB;
+      })) {
+    BreakPHIEdge = true;
     return true;
+  }
 
   for (MachineOperand &MO : MRI->use_nodbg_operands(Reg)) {
     // Determine the block of the use.
diff --git a/llvm/test/CodeGen/ARM/machine-sink-multidef.ll b/llvm/test/CodeGen/ARM/machine-sink-multidef.ll
new file mode 100644
index 000000000000..81be72836241
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/machine-sink-multidef.ll
@@ -0,0 +1,56 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=arm-none-eabi | FileCheck %s
+
+%struct.anon.1.19.23.27.35.49.55.57.59.61.89.95 = type { i32, i32 }
+
+@e = external constant [2 x %struct.anon.1.19.23.27.35.49.55.57.59.61.89.95], align 4
+@f = external global i32, align 4
+
+define arm_aapcscc void @g() {
+; CHECK-LABEL: g:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r11, lr}
+; CHECK-NEXT:    push {r11, lr}
+; CHECK-NEXT:    ldr r0, .LCPI0_0
+; CHECK-NEXT:    mov r2, #0
+; CHECK-NEXT:    ldr r1, .LCPI0_1
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    ldr r0, [r0]
+; CHECK-NEXT:    ldr r0, [r1, r0, lsl #3]!
+; CHECK-NEXT:    moveq r0, #0
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    popne {r11, lr}
+; CHECK-NEXT:    movne pc, lr
+; CHECK-NEXT:    ldr r1, [r1, #4]
+; CHECK-NEXT:    bl k
+; CHECK-NEXT:    .p2align 2
+; CHECK-NEXT:  @ %bb.1:
+; CHECK-NEXT:  .LCPI0_0:
+; CHECK-NEXT:    .long f
+; CHECK-NEXT:  .LCPI0_1:
+; CHECK-NEXT:    .long e
+entry:
+  %0 = load i32, i32* @f, align 4
+  %c = getelementptr inbounds [2 x %struct.anon.1.19.23.27.35.49.55.57.59.61.89.95], [2 x %struct.anon.1.19.23.27.35.49.55.57.59.61.89.95]* @e, i32 0, i32 %0, i32 0
+  %1 = load i32, i32* %c, align 4
+  %d = getelementptr inbounds [2 x %struct.anon.1.19.23.27.35.49.55.57.59.61.89.95], [2 x %struct.anon.1.19.23.27.35.49.55.57.59.61.89.95]* @e, i32 0, i32 %0, i32 1
+  %2 = load i32, i32* %d, align 4
+  br i1 undef, label %land.lhs.true, label %if.end
+
+land.lhs.true:                                    ; preds = %entry
+  br label %if.end
+
+if.end:                                           ; preds = %land.lhs.true, %entry
+  %h.0 = phi i32 [ %1, %entry ], [ 0, %land.lhs.true ]
+  br i1 undef, label %if.end7, label %if.then5
+
+if.then5:                                         ; preds = %if.end
+  %call6 = call arm_aapcscc i32 bitcast (i32 (...)* @k to i32 (i32, i32)*)(i32 %h.0, i32 %2)
+  unreachable
+
+if.end7:                                          ; preds = %if.end
+  ret void
+}
+
+declare arm_aapcscc i32 @k(...)
+

From 5fedf7f42043f6a7d4562df2eab4a22b3346ac1a Mon Sep 17 00:00:00 2001
From: Siva Chandra Reddy <sivachandra@google.com>
Date: Mon, 23 Mar 2020 12:00:35 -0700
Subject: [PATCH 053/216] [libc] Move implementations of cosf, sinf, sincosf to
 src/math directory.

NFC intended in the implementaton. Only mechanical changes to fit the LLVM
libc implementation standard have been done.

Math testing infrastructure has been added. This infrastructure compares the
results produced by the libc with the high precision results from MPFR.
Tests making use of this infrastructure have been added for cosf, sinf and
sincosf.

Reviewers: abrachet, phosek

Differential Revision: https://reviews.llvm.org/D76825
---
 libc/AOR_v20.02/math/cosf.c                   |  64 --------
 libc/AOR_v20.02/math/sincosf.c                |  80 ---------
 libc/AOR_v20.02/math/sincosf.h                | 154 ------------------
 libc/AOR_v20.02/math/sincosf_data.c           |  64 --------
 libc/AOR_v20.02/math/sinf.c                   |  68 --------
 .../math/test/testcases/directed/cosf.tst     |  26 ---
 .../math/test/testcases/directed/sincosf.tst  |  52 ------
 .../math/test/testcases/directed/sinf.tst     |  29 ----
 .../math/test/testcases/random/float.tst      |   4 -
 libc/config/linux/api.td                      |   4 +
 libc/lib/CMakeLists.txt                       |   3 +
 libc/src/__support/common.h.def               |   4 +
 libc/src/math/CMakeLists.txt                  |  56 +++++++
 libc/src/math/cosf.cpp                        |  64 ++++++++
 libc/src/math/cosf.h                          |  18 ++
 libc/src/math/math_utils.h                    |  49 ++++++
 libc/src/math/sincosf.cpp                     |  76 +++++++++
 libc/src/math/sincosf.h                       |  18 ++
 libc/src/math/sincosf_data.cpp                |  51 ++++++
 libc/src/math/sincosf_utils.h                 | 142 ++++++++++++++++
 libc/src/math/sinf.cpp                        |  68 ++++++++
 libc/src/math/sinf.h                          |  18 ++
 libc/test/src/CMakeLists.txt                  |   1 +
 libc/test/src/math/CMakeLists.txt             |  80 +++++++++
 libc/test/src/math/cosf_test.cpp              | 103 ++++++++++++
 libc/test/src/math/float.h                    |  49 ++++++
 libc/test/src/math/sdcomp26094.h              |  25 +++
 libc/test/src/math/sincosf_test.cpp           | 125 ++++++++++++++
 libc/test/src/math/sinf_test.cpp              | 110 +++++++++++++
 libc/utils/CMakeLists.txt                     |   1 +
 libc/utils/MPFRWrapper/CMakeLists.txt         |  17 ++
 libc/utils/MPFRWrapper/MPFRUtils.cpp          |  97 +++++++++++
 libc/utils/MPFRWrapper/MPFRUtils.h            |  51 ++++++
 libc/utils/MPFRWrapper/check_mpfr.cpp         |   8 +
 34 files changed, 1238 insertions(+), 541 deletions(-)
 delete mode 100644 libc/AOR_v20.02/math/cosf.c
 delete mode 100644 libc/AOR_v20.02/math/sincosf.c
 delete mode 100644 libc/AOR_v20.02/math/sincosf.h
 delete mode 100644 libc/AOR_v20.02/math/sincosf_data.c
 delete mode 100644 libc/AOR_v20.02/math/sinf.c
 delete mode 100644 libc/AOR_v20.02/math/test/testcases/directed/cosf.tst
 delete mode 100644 libc/AOR_v20.02/math/test/testcases/directed/sincosf.tst
 delete mode 100644 libc/AOR_v20.02/math/test/testcases/directed/sinf.tst
 create mode 100644 libc/src/math/cosf.cpp
 create mode 100644 libc/src/math/cosf.h
 create mode 100644 libc/src/math/math_utils.h
 create mode 100644 libc/src/math/sincosf.cpp
 create mode 100644 libc/src/math/sincosf.h
 create mode 100644 libc/src/math/sincosf_data.cpp
 create mode 100644 libc/src/math/sincosf_utils.h
 create mode 100644 libc/src/math/sinf.cpp
 create mode 100644 libc/src/math/sinf.h
 create mode 100644 libc/test/src/math/CMakeLists.txt
 create mode 100644 libc/test/src/math/cosf_test.cpp
 create mode 100644 libc/test/src/math/float.h
 create mode 100644 libc/test/src/math/sdcomp26094.h
 create mode 100644 libc/test/src/math/sincosf_test.cpp
 create mode 100644 libc/test/src/math/sinf_test.cpp
 create mode 100644 libc/utils/MPFRWrapper/CMakeLists.txt
 create mode 100644 libc/utils/MPFRWrapper/MPFRUtils.cpp
 create mode 100644 libc/utils/MPFRWrapper/MPFRUtils.h
 create mode 100644 libc/utils/MPFRWrapper/check_mpfr.cpp

diff --git a/libc/AOR_v20.02/math/cosf.c b/libc/AOR_v20.02/math/cosf.c
deleted file mode 100644
index 1ab98a1dd60f..000000000000
--- a/libc/AOR_v20.02/math/cosf.c
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Single-precision cos function.
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- */
-
-#include <stdint.h>
-#include <math.h>
-#include "math_config.h"
-#include "sincosf.h"
-
-/* Fast cosf implementation.  Worst-case ULP is 0.5607, maximum relative
-   error is 0.5303 * 2^-23.  A single-step range reduction is used for
-   small values.  Large inputs have their range reduced using fast integer
-   arithmetic.  */
-float
-cosf (float y)
-{
-  double x = y;
-  double s;
-  int n;
-  const sincos_t *p = &__sincosf_table[0];
-
-  if (abstop12 (y) < abstop12 (pio4))
-    {
-      double x2 = x * x;
-
-      if (unlikely (abstop12 (y) < abstop12 (0x1p-12f)))
-	return 1.0f;
-
-      return sinf_poly (x, x2, p, 1);
-    }
-  else if (likely (abstop12 (y) < abstop12 (120.0f)))
-    {
-      x = reduce_fast (x, p, &n);
-
-      /* Setup the signs for sin and cos.  */
-      s = p->sign[n & 3];
-
-      if (n & 2)
-	p = &__sincosf_table[1];
-
-      return sinf_poly (x * s, x * x, p, n ^ 1);
-    }
-  else if (abstop12 (y) < abstop12 (INFINITY))
-    {
-      uint32_t xi = asuint (y);
-      int sign = xi >> 31;
-
-      x = reduce_large (xi, &n);
-
-      /* Setup signs for sin and cos - include original sign.  */
-      s = p->sign[(n + sign) & 3];
-
-      if ((n + sign) & 2)
-	p = &__sincosf_table[1];
-
-      return sinf_poly (x * s, x * x, p, n ^ 1);
-    }
-  else
-    return __math_invalidf (y);
-}
diff --git a/libc/AOR_v20.02/math/sincosf.c b/libc/AOR_v20.02/math/sincosf.c
deleted file mode 100644
index 819b05b21080..000000000000
--- a/libc/AOR_v20.02/math/sincosf.c
+++ /dev/null
@@ -1,80 +0,0 @@
-/*
- * Single-precision sin/cos function.
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- */
-
-#include <stdint.h>
-#include <math.h>
-#include "math_config.h"
-#include "sincosf.h"
-
-/* Fast sincosf implementation.  Worst-case ULP is 0.5607, maximum relative
-   error is 0.5303 * 2^-23.  A single-step range reduction is used for
-   small values.  Large inputs have their range reduced using fast integer
-   arithmetic.  */
-void
-sincosf (float y, float *sinp, float *cosp)
-{
-  double x = y;
-  double s;
-  int n;
-  const sincos_t *p = &__sincosf_table[0];
-
-  if (abstop12 (y) < abstop12 (pio4))
-    {
-      double x2 = x * x;
-
-      if (unlikely (abstop12 (y) < abstop12 (0x1p-12f)))
-	{
-	  if (unlikely (abstop12 (y) < abstop12 (0x1p-126f)))
-	    /* Force underflow for tiny y.  */
-	    force_eval_float (x2);
-	  *sinp = y;
-	  *cosp = 1.0f;
-	  return;
-	}
-
-      sincosf_poly (x, x2, p, 0, sinp, cosp);
-    }
-  else if (abstop12 (y) < abstop12 (120.0f))
-    {
-      x = reduce_fast (x, p, &n);
-
-      /* Setup the signs for sin and cos.  */
-      s = p->sign[n & 3];
-
-      if (n & 2)
-	p = &__sincosf_table[1];
-
-      sincosf_poly (x * s, x * x, p, n, sinp, cosp);
-    }
-  else if (likely (abstop12 (y) < abstop12 (INFINITY)))
-    {
-      uint32_t xi = asuint (y);
-      int sign = xi >> 31;
-
-      x = reduce_large (xi, &n);
-
-      /* Setup signs for sin and cos - include original sign.  */
-      s = p->sign[(n + sign) & 3];
-
-      if ((n + sign) & 2)
-	p = &__sincosf_table[1];
-
-      sincosf_poly (x * s, x * x, p, n, sinp, cosp);
-    }
-  else
-    {
-      /* Return NaN if Inf or NaN for both sin and cos.  */
-      *sinp = *cosp = y - y;
-#if WANT_ERRNO
-      /* Needed to set errno for +-Inf, the add is a hack to work
-	 around a gcc register allocation issue: just passing y
-	 affects code generation in the fast path.  */
-      __math_invalidf (y + y);
-#endif
-    }
-}
diff --git a/libc/AOR_v20.02/math/sincosf.h b/libc/AOR_v20.02/math/sincosf.h
deleted file mode 100644
index ef40d708acde..000000000000
--- a/libc/AOR_v20.02/math/sincosf.h
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * Header for sinf, cosf and sincosf.
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- */
-
-#include <stdint.h>
-#include <math.h>
-#include "math_config.h"
-
-/* 2PI * 2^-64.  */
-static const double pi63 = 0x1.921FB54442D18p-62;
-/* PI / 4.  */
-static const double pio4 = 0x1.921FB54442D18p-1;
-
-/* The constants and polynomials for sine and cosine.  */
-typedef struct
-{
-  double sign[4];		/* Sign of sine in quadrants 0..3.  */
-  double hpi_inv;		/* 2 / PI ( * 2^24 if !TOINT_INTRINSICS).  */
-  double hpi;			/* PI / 2.  */
-  double c0, c1, c2, c3, c4;	/* Cosine polynomial.  */
-  double s1, s2, s3;		/* Sine polynomial.  */
-} sincos_t;
-
-/* Polynomial data (the cosine polynomial is negated in the 2nd entry).  */
-extern const sincos_t __sincosf_table[2] HIDDEN;
-
-/* Table with 4/PI to 192 bit precision.  */
-extern const uint32_t __inv_pio4[] HIDDEN;
-
-/* Top 12 bits of the float representation with the sign bit cleared.  */
-static inline uint32_t
-abstop12 (float x)
-{
-  return (asuint (x) >> 20) & 0x7ff;
-}
-
-/* Compute the sine and cosine of inputs X and X2 (X squared), using the
-   polynomial P and store the results in SINP and COSP.  N is the quadrant,
-   if odd the cosine and sine polynomials are swapped.  */
-static inline void
-sincosf_poly (double x, double x2, const sincos_t *p, int n, float *sinp,
-	      float *cosp)
-{
-  double x3, x4, x5, x6, s, c, c1, c2, s1;
-
-  x4 = x2 * x2;
-  x3 = x2 * x;
-  c2 = p->c3 + x2 * p->c4;
-  s1 = p->s2 + x2 * p->s3;
-
-  /* Swap sin/cos result based on quadrant.  */
-  float *tmp = (n & 1 ? cosp : sinp);
-  cosp = (n & 1 ? sinp : cosp);
-  sinp = tmp;
-
-  c1 = p->c0 + x2 * p->c1;
-  x5 = x3 * x2;
-  x6 = x4 * x2;
-
-  s = x + x3 * p->s1;
-  c = c1 + x4 * p->c2;
-
-  *sinp = s + x5 * s1;
-  *cosp = c + x6 * c2;
-}
-
-/* Return the sine of inputs X and X2 (X squared) using the polynomial P.
-   N is the quadrant, and if odd the cosine polynomial is used.  */
-static inline float
-sinf_poly (double x, double x2, const sincos_t *p, int n)
-{
-  double x3, x4, x6, x7, s, c, c1, c2, s1;
-
-  if ((n & 1) == 0)
-    {
-      x3 = x * x2;
-      s1 = p->s2 + x2 * p->s3;
-
-      x7 = x3 * x2;
-      s = x + x3 * p->s1;
-
-      return s + x7 * s1;
-    }
-  else
-    {
-      x4 = x2 * x2;
-      c2 = p->c3 + x2 * p->c4;
-      c1 = p->c0 + x2 * p->c1;
-
-      x6 = x4 * x2;
-      c = c1 + x4 * p->c2;
-
-      return c + x6 * c2;
-    }
-}
-
-/* Fast range reduction using single multiply-subtract.  Return the modulo of
-   X as a value between -PI/4 and PI/4 and store the quadrant in NP.
-   The values for PI/2 and 2/PI are accessed via P.  Since PI/2 as a double
-   is accurate to 55 bits and the worst-case cancellation happens at 6 * PI/4,
-   the result is accurate for |X| <= 120.0.  */
-static inline double
-reduce_fast (double x, const sincos_t *p, int *np)
-{
-  double r;
-#if TOINT_INTRINSICS
-  /* Use fast round and lround instructions when available.  */
-  r = x * p->hpi_inv;
-  *np = converttoint (r);
-  return x - roundtoint (r) * p->hpi;
-#else
-  /* Use scaled float to int conversion with explicit rounding.
-     hpi_inv is prescaled by 2^24 so the quadrant ends up in bits 24..31.
-     This avoids inaccuracies introduced by truncating negative values.  */
-  r = x * p->hpi_inv;
-  int n = ((int32_t)r + 0x800000) >> 24;
-  *np = n;
-  return x - n * p->hpi;
-#endif
-}
-
-/* Reduce the range of XI to a multiple of PI/2 using fast integer arithmetic.
-   XI is a reinterpreted float and must be >= 2.0f (the sign bit is ignored).
-   Return the modulo between -PI/4 and PI/4 and store the quadrant in NP.
-   Reduction uses a table of 4/PI with 192 bits of precision.  A 32x96->128 bit
-   multiply computes the exact 2.62-bit fixed-point modulo.  Since the result
-   can have at most 29 leading zeros after the binary point, the double
-   precision result is accurate to 33 bits.  */
-static inline double
-reduce_large (uint32_t xi, int *np)
-{
-  const uint32_t *arr = &__inv_pio4[(xi >> 26) & 15];
-  int shift = (xi >> 23) & 7;
-  uint64_t n, res0, res1, res2;
-
-  xi = (xi & 0xffffff) | 0x800000;
-  xi <<= shift;
-
-  res0 = xi * arr[0];
-  res1 = (uint64_t)xi * arr[4];
-  res2 = (uint64_t)xi * arr[8];
-  res0 = (res2 >> 32) | (res0 << 32);
-  res0 += res1;
-
-  n = (res0 + (1ULL << 61)) >> 62;
-  res0 -= n << 62;
-  double x = (int64_t)res0;
-  *np = n;
-  return x * pi63;
-}
diff --git a/libc/AOR_v20.02/math/sincosf_data.c b/libc/AOR_v20.02/math/sincosf_data.c
deleted file mode 100644
index a3d5efda6f4c..000000000000
--- a/libc/AOR_v20.02/math/sincosf_data.c
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Data definition for sinf, cosf and sincosf.
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- */
-
-#include <stdint.h>
-#include <math.h>
-#include "math_config.h"
-#include "sincosf.h"
-
-/* The constants and polynomials for sine and cosine.  The 2nd entry
-   computes -cos (x) rather than cos (x) to get negation for free.  */
-const sincos_t __sincosf_table[2] =
-{
-  {
-    { 1.0, -1.0, -1.0, 1.0 },
-#if TOINT_INTRINSICS
-    0x1.45F306DC9C883p-1,
-#else
-    0x1.45F306DC9C883p+23,
-#endif
-    0x1.921FB54442D18p0,
-    0x1p0,
-    -0x1.ffffffd0c621cp-2,
-    0x1.55553e1068f19p-5,
-    -0x1.6c087e89a359dp-10,
-    0x1.99343027bf8c3p-16,
-    -0x1.555545995a603p-3,
-    0x1.1107605230bc4p-7,
-    -0x1.994eb3774cf24p-13
-  },
-  {
-    { 1.0, -1.0, -1.0, 1.0 },
-#if TOINT_INTRINSICS
-    0x1.45F306DC9C883p-1,
-#else
-    0x1.45F306DC9C883p+23,
-#endif
-    0x1.921FB54442D18p0,
-    -0x1p0,
-    0x1.ffffffd0c621cp-2,
-    -0x1.55553e1068f19p-5,
-    0x1.6c087e89a359dp-10,
-    -0x1.99343027bf8c3p-16,
-    -0x1.555545995a603p-3,
-    0x1.1107605230bc4p-7,
-    -0x1.994eb3774cf24p-13
-  }
-};
-
-/* Table with 4/PI to 192 bit precision.  To avoid unaligned accesses
-   only 8 new bits are added per entry, making the table 4 times larger.  */
-const uint32_t __inv_pio4[24] =
-{
-  0xa2,       0xa2f9,	  0xa2f983,   0xa2f9836e,
-  0xf9836e4e, 0x836e4e44, 0x6e4e4415, 0x4e441529,
-  0x441529fc, 0x1529fc27, 0x29fc2757, 0xfc2757d1,
-  0x2757d1f5, 0x57d1f534, 0xd1f534dd, 0xf534ddc0,
-  0x34ddc0db, 0xddc0db62, 0xc0db6295, 0xdb629599,
-  0x6295993c, 0x95993c43, 0x993c4390, 0x3c439041
-};
diff --git a/libc/AOR_v20.02/math/sinf.c b/libc/AOR_v20.02/math/sinf.c
deleted file mode 100644
index 644b82dd94da..000000000000
--- a/libc/AOR_v20.02/math/sinf.c
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Single-precision sin function.
- *
- * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- * See https://llvm.org/LICENSE.txt for license information.
- * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- */
-
-#include <math.h>
-#include "math_config.h"
-#include "sincosf.h"
-
-/* Fast sinf implementation.  Worst-case ULP is 0.5607, maximum relative
-   error is 0.5303 * 2^-23.  A single-step range reduction is used for
-   small values.  Large inputs have their range reduced using fast integer
-   arithmetic.  */
-float
-sinf (float y)
-{
-  double x = y;
-  double s;
-  int n;
-  const sincos_t *p = &__sincosf_table[0];
-
-  if (abstop12 (y) < abstop12 (pio4))
-    {
-      s = x * x;
-
-      if (unlikely (abstop12 (y) < abstop12 (0x1p-12f)))
-	{
-	  if (unlikely (abstop12 (y) < abstop12 (0x1p-126f)))
-	    /* Force underflow for tiny y.  */
-	    force_eval_float (s);
-	  return y;
-	}
-
-      return sinf_poly (x, s, p, 0);
-    }
-  else if (likely (abstop12 (y) < abstop12 (120.0f)))
-    {
-      x = reduce_fast (x, p, &n);
-
-      /* Setup the signs for sin and cos.  */
-      s = p->sign[n & 3];
-
-      if (n & 2)
-	p = &__sincosf_table[1];
-
-      return sinf_poly (x * s, x * x, p, n);
-    }
-  else if (abstop12 (y) < abstop12 (INFINITY))
-    {
-      uint32_t xi = asuint (y);
-      int sign = xi >> 31;
-
-      x = reduce_large (xi, &n);
-
-      /* Setup signs for sin and cos - include original sign.  */
-      s = p->sign[(n + sign) & 3];
-
-      if ((n + sign) & 2)
-	p = &__sincosf_table[1];
-
-      return sinf_poly (x * s, x * x, p, n);
-    }
-  else
-    return __math_invalidf (y);
-}
diff --git a/libc/AOR_v20.02/math/test/testcases/directed/cosf.tst b/libc/AOR_v20.02/math/test/testcases/directed/cosf.tst
deleted file mode 100644
index 8c7621a4550c..000000000000
--- a/libc/AOR_v20.02/math/test/testcases/directed/cosf.tst
+++ /dev/null
@@ -1,26 +0,0 @@
-; cosf.tst - Directed test cases for SP cosine
-;
-; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-; See https://llvm.org/LICENSE.txt for license information.
-; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-func=cosf op1=7fc00001 result=7fc00001 errno=0
-func=cosf op1=ffc00001 result=7fc00001 errno=0
-func=cosf op1=7f800001 result=7fc00001 errno=0 status=i
-func=cosf op1=ff800001 result=7fc00001 errno=0 status=i
-func=cosf op1=7f800000 result=7fc00001 errno=EDOM status=i
-func=cosf op1=ff800000 result=7fc00001 errno=EDOM status=i
-func=cosf op1=00000000 result=3f800000 errno=0
-func=cosf op1=80000000 result=3f800000 errno=0
-; SDCOMP-26094: check cosf in the cases for which the range reducer
-; returns values furthest beyond its nominal upper bound of pi/4.
-func=cosf op1=46427f1b result=3f34dc5c.565 error=0
-func=cosf op1=4647e568 result=3f34dc33.c1f error=0
-func=cosf op1=46428bac result=bf34dbf2.8e3 error=0
-func=cosf op1=4647f1f9 result=bf34dbc9.f9b error=0
-func=cosf op1=4647fe8a result=3f34db60.313 error=0
-func=cosf op1=45d8d7f1 result=bf35006a.7fd error=0
-func=cosf op1=45d371a4 result=3f350056.39b error=0
-func=cosf op1=45ce0b57 result=bf350041.f38 error=0
-func=cosf op1=45d35882 result=bf34ffec.868 error=0
-func=cosf op1=45cdf235 result=3f34ffd8.404 error=0
diff --git a/libc/AOR_v20.02/math/test/testcases/directed/sincosf.tst b/libc/AOR_v20.02/math/test/testcases/directed/sincosf.tst
deleted file mode 100644
index d22fd9802694..000000000000
--- a/libc/AOR_v20.02/math/test/testcases/directed/sincosf.tst
+++ /dev/null
@@ -1,52 +0,0 @@
-; Directed test cases for SP sincos
-;
-; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-; See https://llvm.org/LICENSE.txt for license information.
-; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-
-func=sincosf_sinf op1=7fc00001 result=7fc00001 errno=0
-func=sincosf_sinf op1=ffc00001 result=7fc00001 errno=0
-func=sincosf_sinf op1=7f800001 result=7fc00001 errno=0 status=i
-func=sincosf_sinf op1=ff800001 result=7fc00001 errno=0 status=i
-func=sincosf_sinf op1=7f800000 result=7fc00001 errno=EDOM status=i
-func=sincosf_sinf op1=ff800000 result=7fc00001 errno=EDOM status=i
-func=sincosf_sinf op1=00000000 result=00000000 errno=0
-func=sincosf_sinf op1=80000000 result=80000000 errno=0
-func=sincosf_sinf op1=c70d39a1 result=be37fad5.7ed errno=0
-func=sincosf_sinf op1=46427f1b result=3f352d80.f9b error=0
-func=sincosf_sinf op1=4647e568 result=3f352da9.7be error=0
-func=sincosf_sinf op1=46428bac result=bf352dea.924 error=0
-func=sincosf_sinf op1=4647f1f9 result=bf352e13.146 error=0
-func=sincosf_sinf op1=4647fe8a result=3f352e7c.ac9 error=0
-func=sincosf_sinf op1=45d8d7f1 result=3f35097b.cb0 error=0
-func=sincosf_sinf op1=45d371a4 result=bf350990.102 error=0
-func=sincosf_sinf op1=45ce0b57 result=3f3509a4.554 error=0
-func=sincosf_sinf op1=45d35882 result=3f3509f9.bdb error=0
-func=sincosf_sinf op1=45cdf235 result=bf350a0e.02c error=0
-
-func=sincosf_cosf op1=7fc00001 result=7fc00001 errno=0
-func=sincosf_cosf op1=ffc00001 result=7fc00001 errno=0
-func=sincosf_cosf op1=7f800001 result=7fc00001 errno=0 status=i
-func=sincosf_cosf op1=ff800001 result=7fc00001 errno=0 status=i
-func=sincosf_cosf op1=7f800000 result=7fc00001 errno=EDOM status=i
-func=sincosf_cosf op1=ff800000 result=7fc00001 errno=EDOM status=i
-func=sincosf_cosf op1=00000000 result=3f800000 errno=0
-func=sincosf_cosf op1=80000000 result=3f800000 errno=0
-func=sincosf_cosf op1=46427f1b result=3f34dc5c.565 error=0
-func=sincosf_cosf op1=4647e568 result=3f34dc33.c1f error=0
-func=sincosf_cosf op1=46428bac result=bf34dbf2.8e3 error=0
-func=sincosf_cosf op1=4647f1f9 result=bf34dbc9.f9b error=0
-func=sincosf_cosf op1=4647fe8a result=3f34db60.313 error=0
-func=sincosf_cosf op1=45d8d7f1 result=bf35006a.7fd error=0
-func=sincosf_cosf op1=45d371a4 result=3f350056.39b error=0
-func=sincosf_cosf op1=45ce0b57 result=bf350041.f38 error=0
-func=sincosf_cosf op1=45d35882 result=bf34ffec.868 error=0
-func=sincosf_cosf op1=45cdf235 result=3f34ffd8.404 error=0
-
-; no underflow
-func=sincosf_sinf op1=17800000 result=17800000.000
-func=sincosf_cosf op1=17800000 result=3f800000.000
-; underflow
-func=sincosf_sinf op1=00400000 result=00400000.000 status=ux
-func=sincosf_cosf op1=00400000 result=3f800000.000 status=ux
diff --git a/libc/AOR_v20.02/math/test/testcases/directed/sinf.tst b/libc/AOR_v20.02/math/test/testcases/directed/sinf.tst
deleted file mode 100644
index 022bf1424879..000000000000
--- a/libc/AOR_v20.02/math/test/testcases/directed/sinf.tst
+++ /dev/null
@@ -1,29 +0,0 @@
-; sinf.tst - Directed test cases for SP sine
-;
-; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-; See https://llvm.org/LICENSE.txt for license information.
-; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-
-func=sinf op1=7fc00001 result=7fc00001 errno=0
-func=sinf op1=ffc00001 result=7fc00001 errno=0
-func=sinf op1=7f800001 result=7fc00001 errno=0 status=i
-func=sinf op1=ff800001 result=7fc00001 errno=0 status=i
-func=sinf op1=7f800000 result=7fc00001 errno=EDOM status=i
-func=sinf op1=ff800000 result=7fc00001 errno=EDOM status=i
-func=sinf op1=00000000 result=00000000 errno=0
-func=sinf op1=80000000 result=80000000 errno=0
-; Directed test for a failure I found while developing mathbench
-func=sinf op1=c70d39a1 result=be37fad5.7ed errno=0
-; SDCOMP-26094: check sinf in the cases for which the range reducer
-; returns values furthest beyond its nominal upper bound of pi/4.
-func=sinf op1=46427f1b result=3f352d80.f9b error=0
-func=sinf op1=4647e568 result=3f352da9.7be error=0
-func=sinf op1=46428bac result=bf352dea.924 error=0
-func=sinf op1=4647f1f9 result=bf352e13.146 error=0
-func=sinf op1=4647fe8a result=3f352e7c.ac9 error=0
-func=sinf op1=45d8d7f1 result=3f35097b.cb0 error=0
-func=sinf op1=45d371a4 result=bf350990.102 error=0
-func=sinf op1=45ce0b57 result=3f3509a4.554 error=0
-func=sinf op1=45d35882 result=3f3509f9.bdb error=0
-func=sinf op1=45cdf235 result=bf350a0e.02c error=0
diff --git a/libc/AOR_v20.02/math/test/testcases/random/float.tst b/libc/AOR_v20.02/math/test/testcases/random/float.tst
index c142d63cd594..aadd70a336a1 100644
--- a/libc/AOR_v20.02/math/test/testcases/random/float.tst
+++ b/libc/AOR_v20.02/math/test/testcases/random/float.tst
@@ -4,10 +4,6 @@
 !! See https://llvm.org/LICENSE.txt for license information.
 !! SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 
-test sinf 10000
-test cosf 10000
-test sincosf_sinf 5000
-test sincosf_cosf 5000
 test tanf 10000
 test expf 10000
 test exp2f 10000
diff --git a/libc/config/linux/api.td b/libc/config/linux/api.td
index fe3064ddf227..f176caee6a4e 100644
--- a/libc/config/linux/api.td
+++ b/libc/config/linux/api.td
@@ -1,5 +1,6 @@
 include "config/public_api.td"
 
+include "spec/gnu_ext.td"
 include "spec/linux.td"
 include "spec/posix.td"
 include "spec/stdc.td"
@@ -123,7 +124,10 @@ def MathAPI : PublicAPI<"math.h"> {
     IsNanMacro,
   ];
   let Functions = [
+   "cosf",
    "round",
+   "sincosf",
+   "sinf",
   ];
 }
 
diff --git a/libc/lib/CMakeLists.txt b/libc/lib/CMakeLists.txt
index 79456c938e6e..bc245cdb481e 100644
--- a/libc/lib/CMakeLists.txt
+++ b/libc/lib/CMakeLists.txt
@@ -41,7 +41,10 @@ add_entrypoint_library(
   llvmlibm
   DEPENDS
     # math.h entrypoints
+    libc.src.math.cosf
     libc.src.math.round
+    libc.src.math.sincosf
+    libc.src.math.sinf
 )
 
 add_redirector_library(
diff --git a/libc/src/__support/common.h.def b/libc/src/__support/common.h.def
index 3abd23674c97..a1bb78d5b00f 100644
--- a/libc/src/__support/common.h.def
+++ b/libc/src/__support/common.h.def
@@ -11,6 +11,10 @@
 
 #define LIBC_INLINE_ASM __asm__ __volatile__
 
+#define likely(x) __builtin_expect (!!(x), 1)
+#define unlikely(x) __builtin_expect (x, 0)
+#define UNUSED __attribute__((unused))
+
 <!> Include the platform specific definitions at build time. For example, that
 <!> of entrypoint macro.
 %%include_file(${platform_defs})
diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
index e7ad9d2a73fe..5ec98ddd18dd 100644
--- a/libc/src/math/CMakeLists.txt
+++ b/libc/src/math/CMakeLists.txt
@@ -1,3 +1,23 @@
+add_header_library(
+  math_utils
+  HDRS
+    math_utils.h
+  DEPENDS
+    libc.include.errno
+    libc.include.math
+    libc.src.errno.__errno_location
+)
+
+add_object_library(
+  sincosf_utils
+  HDRS
+    sincosf_utils.h
+  SRCS
+    sincosf_data.cpp
+  DEPENDS
+    .math_utils
+)
+
 add_entrypoint_object(
   round
   REDIRECTED
@@ -12,3 +32,39 @@ add_redirector_object(
   SRC
     round_redirector.cpp
 )
+
+add_entrypoint_object(
+  cosf
+  SRCS
+    cosf.cpp
+  HDRS
+    cosf.h
+  DEPENDS
+    .sincosf_utils
+    libc.include.math
+    libc.src.errno.__errno_location
+)
+
+add_entrypoint_object(
+  sinf
+  SRCS
+    sinf.cpp
+  HDRS
+    sinf.h
+  DEPENDS
+    .sincosf_utils
+    libc.include.math
+    libc.src.errno.__errno_location
+)
+
+add_entrypoint_object(
+  sincosf
+  SRCS
+    sincosf.cpp
+  HDRS
+    sincosf.h
+  DEPENDS
+    .sincosf_utils
+    libc.include.math
+    libc.src.errno.__errno_location
+)
diff --git a/libc/src/math/cosf.cpp b/libc/src/math/cosf.cpp
new file mode 100644
index 000000000000..db121b2cb396
--- /dev/null
+++ b/libc/src/math/cosf.cpp
@@ -0,0 +1,64 @@
+//===-- Single-precision cos function -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "math_utils.h"
+#include "sincosf_utils.h"
+
+#include "include/math.h"
+#include "src/__support/common.h"
+
+#include <stdint.h>
+
+namespace __llvm_libc {
+
+// Fast cosf implementation. Worst-case ULP is 0.5607, maximum relative
+// error is 0.5303 * 2^-23. A single-step range reduction is used for
+// small values. Large inputs have their range reduced using fast integer
+// arithmetic.
+float LLVM_LIBC_ENTRYPOINT(cosf)(float y) {
+  double x = y;
+  double s;
+  int n;
+  const sincos_t *p = &__sincosf_table[0];
+
+  if (abstop12(y) < abstop12(pio4)) {
+    double x2 = x * x;
+
+    if (unlikely(abstop12(y) < abstop12(as_float(0x39800000))))
+      return 1.0f;
+
+    return sinf_poly(x, x2, p, 1);
+  } else if (likely(abstop12(y) < abstop12(120.0f))) {
+    x = reduce_fast(x, p, &n);
+
+    // Setup the signs for sin and cos.
+    s = p->sign[n & 3];
+
+    if (n & 2)
+      p = &__sincosf_table[1];
+
+    return sinf_poly(x * s, x * x, p, n ^ 1);
+  } else if (abstop12(y) < abstop12(INFINITY)) {
+    uint32_t xi = as_uint32_bits(y);
+    int sign = xi >> 31;
+
+    x = reduce_large(xi, &n);
+
+    // Setup signs for sin and cos - include original sign.
+    s = p->sign[(n + sign) & 3];
+
+    if ((n + sign) & 2)
+      p = &__sincosf_table[1];
+
+    return sinf_poly(x * s, x * x, p, n ^ 1);
+  }
+
+  return invalidf(y);
+}
+
+} // namespace __llvm_libc
diff --git a/libc/src/math/cosf.h b/libc/src/math/cosf.h
new file mode 100644
index 000000000000..1aaabe900ba8
--- /dev/null
+++ b/libc/src/math/cosf.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for cosf --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_COSF_H
+#define LLVM_LIBC_SRC_MATH_COSF_H
+
+namespace __llvm_libc {
+
+float cosf(float x);
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_MATH_COSF_H
diff --git a/libc/src/math/math_utils.h b/libc/src/math/math_utils.h
new file mode 100644
index 000000000000..3553673486f9
--- /dev/null
+++ b/libc/src/math/math_utils.h
@@ -0,0 +1,49 @@
+//===-- Collection of utils for implementing math functions -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_MATH_UTILS_H
+#define LLVM_LIBC_SRC_MATH_MATH_UTILS_H
+
+#include "include/errno.h"
+#include "include/math.h"
+
+#include "src/__support/common.h"
+#include "src/errno/llvmlibc_errno.h"
+
+#include <stdint.h>
+
+namespace __llvm_libc {
+
+static inline float with_errnof(float x, int err) {
+  if (math_errhandling & MATH_ERRNO)
+    llvmlibc_errno = err;
+  return x;
+}
+
+static inline uint32_t as_uint32_bits(float x) {
+  return *reinterpret_cast<uint32_t *>(&x);
+}
+
+static inline float as_float(uint32_t x) {
+  return *reinterpret_cast<float *>(&x);
+}
+
+static inline double as_double(uint64_t x) {
+  return *reinterpret_cast<double *>(&x);
+}
+
+static inline constexpr float invalidf(float x) {
+  float y = (x - x) / (x - x);
+  return isnan(x) ? y : with_errnof(y, EDOM);
+}
+
+static inline void force_eval_float(float x) { volatile float y UNUSED = x; }
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_MATH_MATH_UTILS_H
diff --git a/libc/src/math/sincosf.cpp b/libc/src/math/sincosf.cpp
new file mode 100644
index 000000000000..717feaa470d2
--- /dev/null
+++ b/libc/src/math/sincosf.cpp
@@ -0,0 +1,76 @@
+//===-- Single-precision sincos function ----------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "math_utils.h"
+#include "sincosf_utils.h"
+
+#include "include/math.h"
+#include "src/__support/common.h"
+
+#include <stdint.h>
+
+namespace __llvm_libc {
+
+// Fast sincosf implementation. Worst-case ULP is 0.5607, maximum relative
+// error is 0.5303 * 2^-23. A single-step range reduction is used for
+// small values. Large inputs have their range reduced using fast integer
+// arithmetic.
+void LLVM_LIBC_ENTRYPOINT(sincosf)(float y, float *sinp, float *cosp) {
+  double x = y;
+  double s;
+  int n;
+  const sincos_t *p = &__sincosf_table[0];
+
+  if (abstop12(y) < abstop12(pio4)) {
+    double x2 = x * x;
+
+    if (unlikely(abstop12(y) < abstop12(as_float(0x39800000)))) {
+      if (unlikely(abstop12(y) < abstop12(as_float(0x800000))))
+        // Force underflow for tiny y.
+        force_eval_float(x2);
+      *sinp = y;
+      *cosp = 1.0f;
+      return;
+    }
+
+    sincosf_poly(x, x2, p, 0, sinp, cosp);
+  } else if (abstop12(y) < abstop12(120.0f)) {
+    x = reduce_fast(x, p, &n);
+
+    // Setup the signs for sin and cos.
+    s = p->sign[n & 3];
+
+    if (n & 2)
+      p = &__sincosf_table[1];
+
+    sincosf_poly(x * s, x * x, p, n, sinp, cosp);
+  } else if (likely(abstop12(y) < abstop12(INFINITY))) {
+    uint32_t xi = as_uint32_bits(y);
+    int sign = xi >> 31;
+
+    x = reduce_large(xi, &n);
+
+    // Setup signs for sin and cos - include original sign.
+    s = p->sign[(n + sign) & 3];
+
+    if ((n + sign) & 2)
+      p = &__sincosf_table[1];
+
+    sincosf_poly(x * s, x * x, p, n, sinp, cosp);
+  } else {
+    // Return NaN if Inf or NaN for both sin and cos.
+    *sinp = *cosp = y - y;
+
+    // Needed to set errno for +-Inf, the add is a hack to work
+    // around a gcc register allocation issue: just passing y
+    // affects code generation in the fast path.
+    invalidf(y + y);
+  }
+}
+
+} // namespace __llvm_libc
diff --git a/libc/src/math/sincosf.h b/libc/src/math/sincosf.h
new file mode 100644
index 000000000000..47ef983f4385
--- /dev/null
+++ b/libc/src/math/sincosf.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for sincosf -----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_SINCOSF_H
+#define LLVM_LIBC_SRC_MATH_SINCOSF_H
+
+namespace __llvm_libc {
+
+void sincosf(float x, float *sinx, float *cosx);
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_MATH_SINCOSF_H
diff --git a/libc/src/math/sincosf_data.cpp b/libc/src/math/sincosf_data.cpp
new file mode 100644
index 000000000000..50984570b55f
--- /dev/null
+++ b/libc/src/math/sincosf_data.cpp
@@ -0,0 +1,51 @@
+//===-- sinf/cosf data tables ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "math_utils.h"
+#include "sincosf_utils.h"
+
+#include <stdint.h>
+
+namespace __llvm_libc {
+
+// The constants and polynomials for sine and cosine.  The 2nd entry
+// computes -cos (x) rather than cos (x) to get negation for free.
+const sincos_t __sincosf_table[2] = {
+    {{1.0, -1.0, -1.0, 1.0},
+     as_double(0x41645f306dc9c883),
+     as_double(0x3ff921fb54442d18),
+     as_double(0x3ff0000000000000),
+     as_double(0xbfdffffffd0c621c),
+     as_double(0x3fa55553e1068f19),
+     as_double(0xbf56c087e89a359d),
+     as_double(0x3ef99343027bf8c3),
+     as_double(0xbfc555545995a603),
+     as_double(0x3f81107605230bc4),
+     as_double(0xbf2994eb3774cf24)},
+    {{1.0, -1.0, -1.0, 1.0},
+     as_double(0x41645f306dc9c883),
+     as_double(0x3ff921fb54442d18),
+     as_double(0xbff0000000000000),
+     as_double(0x3fdffffffd0c621c),
+     as_double(0xbfa55553e1068f19),
+     as_double(0x3f56c087e89a359d),
+     as_double(0xbef99343027bf8c3),
+     as_double(0xbfc555545995a603),
+     as_double(0x3f81107605230bc4),
+     as_double(0xbf2994eb3774cf24)},
+};
+
+// Table with 4/PI to 192 bit precision.  To avoid unaligned accesses
+// only 8 new bits are added per entry, making the table 4 times larger.
+const uint32_t __inv_pio4[24] = {
+    0xa2,       0xa2f9,     0xa2f983,   0xa2f9836e, 0xf9836e4e, 0x836e4e44,
+    0x6e4e4415, 0x4e441529, 0x441529fc, 0x1529fc27, 0x29fc2757, 0xfc2757d1,
+    0x2757d1f5, 0x57d1f534, 0xd1f534dd, 0xf534ddc0, 0x34ddc0db, 0xddc0db62,
+    0xc0db6295, 0xdb629599, 0x6295993c, 0x95993c43, 0x993c4390, 0x3c439041};
+
+} // namespace __llvm_libc
diff --git a/libc/src/math/sincosf_utils.h b/libc/src/math/sincosf_utils.h
new file mode 100644
index 000000000000..8c54cb9c1d90
--- /dev/null
+++ b/libc/src/math/sincosf_utils.h
@@ -0,0 +1,142 @@
+//===-- Collection of utils for cosf/sinf/sincosf ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_SINCOSF_UTILS_H
+#define LLVM_LIBC_SRC_MATH_SINCOSF_UTILS_H
+
+#include "math_utils.h"
+
+#include <stdint.h>
+
+namespace __llvm_libc {
+
+// 2PI * 2^-64.
+static const double pi63 = as_double(0x3c1921fb54442d18);
+// PI / 4.
+static const double pio4 = as_double(0x3fe921fb54442d18);
+
+// The constants and polynomials for sine and cosine.
+typedef struct {
+  double sign[4];            // Sign of sine in quadrants 0..3.
+  double hpi_inv;            // 2 / PI ( * 2^24 ).
+  double hpi;                // PI / 2.
+  double c0, c1, c2, c3, c4; // Cosine polynomial.
+  double s1, s2, s3;         // Sine polynomial.
+} sincos_t;
+
+// Polynomial data (the cosine polynomial is negated in the 2nd entry).
+extern const sincos_t __sincosf_table[2];
+
+// Table with 4/PI to 192 bit precision.
+extern const uint32_t __inv_pio4[];
+
+// Top 12 bits of the float representation with the sign bit cleared.
+static inline uint32_t abstop12(float x) {
+  return (as_uint32_bits(x) >> 20) & 0x7ff;
+}
+
+// Compute the sine and cosine of inputs X and X2 (X squared), using the
+// polynomial P and store the results in SINP and COSP. N is the quadrant,
+// if odd the cosine and sine polynomials are swapped.
+static inline void sincosf_poly(double x, double x2, const sincos_t *p, int n,
+                                float *sinp, float *cosp) {
+  double x3, x4, x5, x6, s, c, c1, c2, s1;
+
+  x4 = x2 * x2;
+  x3 = x2 * x;
+  c2 = p->c3 + x2 * p->c4;
+  s1 = p->s2 + x2 * p->s3;
+
+  // Swap sin/cos result based on quadrant.
+  float *tmp = (n & 1 ? cosp : sinp);
+  cosp = (n & 1 ? sinp : cosp);
+  sinp = tmp;
+
+  c1 = p->c0 + x2 * p->c1;
+  x5 = x3 * x2;
+  x6 = x4 * x2;
+
+  s = x + x3 * p->s1;
+  c = c1 + x4 * p->c2;
+
+  *sinp = s + x5 * s1;
+  *cosp = c + x6 * c2;
+}
+
+// Return the sine of inputs X and X2 (X squared) using the polynomial P.
+// N is the quadrant, and if odd the cosine polynomial is used.
+static inline float sinf_poly(double x, double x2, const sincos_t *p, int n) {
+  double x3, x4, x6, x7, s, c, c1, c2, s1;
+
+  if ((n & 1) == 0) {
+    x3 = x * x2;
+    s1 = p->s2 + x2 * p->s3;
+
+    x7 = x3 * x2;
+    s = x + x3 * p->s1;
+
+    return s + x7 * s1;
+  } else {
+    x4 = x2 * x2;
+    c2 = p->c3 + x2 * p->c4;
+    c1 = p->c0 + x2 * p->c1;
+
+    x6 = x4 * x2;
+    c = c1 + x4 * p->c2;
+
+    return c + x6 * c2;
+  }
+}
+
+// Fast range reduction using single multiply-subtract. Return the modulo of
+// X as a value between -PI/4 and PI/4 and store the quadrant in NP.
+// The values for PI/2 and 2/PI are accessed via P. Since PI/2 as a double
+// is accurate to 55 bits and the worst-case cancellation happens at 6 * PI/4,
+// the result is accurate for |X| <= 120.0.
+static inline double reduce_fast(double x, const sincos_t *p, int *np) {
+  double r;
+  // Use scaled float to int conversion with explicit rounding.
+  // hpi_inv is prescaled by 2^24 so the quadrant ends up in bits 24..31.
+  // This avoids inaccuracies introduced by truncating negative values.
+  r = x * p->hpi_inv;
+  int n = ((int32_t)r + 0x800000) >> 24;
+  *np = n;
+  return x - n * p->hpi;
+}
+
+// Reduce the range of XI to a multiple of PI/2 using fast integer arithmetic.
+// XI is a reinterpreted float and must be >= 2.0f (the sign bit is ignored).
+// Return the modulo between -PI/4 and PI/4 and store the quadrant in NP.
+// Reduction uses a table of 4/PI with 192 bits of precision. A 32x96->128 bit
+// multiply computes the exact 2.62-bit fixed-point modulo. Since the result
+// can have at most 29 leading zeros after the binary point, the double
+// precision result is accurate to 33 bits.
+static inline double reduce_large(uint32_t xi, int *np) {
+  const uint32_t *arr = &__inv_pio4[(xi >> 26) & 15];
+  int shift = (xi >> 23) & 7;
+  uint64_t n, res0, res1, res2;
+
+  xi = (xi & 0xffffff) | 0x800000;
+  xi <<= shift;
+
+  res0 = xi * arr[0];
+  res1 = (uint64_t)xi * arr[4];
+  res2 = (uint64_t)xi * arr[8];
+  res0 = (res2 >> 32) | (res0 << 32);
+  res0 += res1;
+
+  n = (res0 + (1ULL << 61)) >> 62;
+  res0 -= n << 62;
+  double x = (int64_t)res0;
+  *np = n;
+  return x * pi63;
+}
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_MATH_SINCOSF_UTILS_H
diff --git a/libc/src/math/sinf.cpp b/libc/src/math/sinf.cpp
new file mode 100644
index 000000000000..634e40c50f6b
--- /dev/null
+++ b/libc/src/math/sinf.cpp
@@ -0,0 +1,68 @@
+//===-- Single-precision sin function -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "math_utils.h"
+#include "sincosf_utils.h"
+
+#include "include/math.h"
+#include "src/__support/common.h"
+
+#include <stdint.h>
+
+namespace __llvm_libc {
+
+// Fast sinf implementation. Worst-case ULP is 0.5607, maximum relative
+// error is 0.5303 * 2^-23. A single-step range reduction is used for
+// small values. Large inputs have their range reduced using fast integer
+// arithmetic.
+float LLVM_LIBC_ENTRYPOINT(sinf)(float y) {
+  double x = y;
+  double s;
+  int n;
+  const sincos_t *p = &__sincosf_table[0];
+
+  if (abstop12(y) < abstop12(pio4)) {
+    s = x * x;
+
+    if (unlikely(abstop12(y) < abstop12(as_float(0x39800000)))) {
+      if (unlikely(abstop12(y) < abstop12(as_float(0x800000))))
+        // Force underflow for tiny y.
+        force_eval_float(s);
+      return y;
+    }
+
+    return sinf_poly(x, s, p, 0);
+  } else if (likely(abstop12(y) < abstop12(120.0f))) {
+    x = reduce_fast(x, p, &n);
+
+    // Setup the signs for sin and cos.
+    s = p->sign[n & 3];
+
+    if (n & 2)
+      p = &__sincosf_table[1];
+
+    return sinf_poly(x * s, x * x, p, n);
+  } else if (abstop12(y) < abstop12(INFINITY)) {
+    uint32_t xi = as_uint32_bits(y);
+    int sign = xi >> 31;
+
+    x = reduce_large(xi, &n);
+
+    // Setup signs for sin and cos - include original sign.
+    s = p->sign[(n + sign) & 3];
+
+    if ((n + sign) & 2)
+      p = &__sincosf_table[1];
+
+    return sinf_poly(x * s, x * x, p, n);
+  }
+
+  return invalidf(y);
+}
+
+} // namespace __llvm_libc
diff --git a/libc/src/math/sinf.h b/libc/src/math/sinf.h
new file mode 100644
index 000000000000..e63db04c51b5
--- /dev/null
+++ b/libc/src/math/sinf.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for sinf --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_SINF_H
+#define LLVM_LIBC_SRC_MATH_SINF_H
+
+namespace __llvm_libc {
+
+float sinf(float x);
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_MATH_SINF_H
diff --git a/libc/test/src/CMakeLists.txt b/libc/test/src/CMakeLists.txt
index 209d00b6d7f9..f92a0463b359 100644
--- a/libc/test/src/CMakeLists.txt
+++ b/libc/test/src/CMakeLists.txt
@@ -1,5 +1,6 @@
 add_subdirectory(assert)
 add_subdirectory(errno)
+add_subdirectory(math)
 add_subdirectory(signal)
 add_subdirectory(stdio)
 add_subdirectory(stdlib)
diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt
new file mode 100644
index 000000000000..4ba3ff3f5d3a
--- /dev/null
+++ b/libc/test/src/math/CMakeLists.txt
@@ -0,0 +1,80 @@
+add_libc_testsuite(libc_math_unittests)
+
+function(add_math_unittest name)
+  cmake_parse_arguments(
+    "MATH_UNITTEST"
+    "NEED_MPFR" # No optional arguments
+    "" # Single value arguments
+    "" # Multi-value arguments
+    ${ARGN}
+  )
+
+  if(MATH_UNITTEST_NEED_MPFR)
+    if(NOT LIBC_TESTS_CAN_USE_MPFR)
+      message("WARNING: Math test ${name} will be skipped as MPFR library is not available.")
+      return()
+    endif()
+  endif()
+
+  add_libc_unittest(${name} ${MATH_UNITTEST_UNPARSED_ARGUMENTS})
+  if(MATH_UNITTEST_NEED_MPFR)
+    get_fq_target_name(${name} fq_target_name)
+    target_link_libraries(${fq_target_name} PRIVATE libcMPFRWrapper -lmpfr -lgmp)
+  endif()
+endfunction(add_math_unittest)
+
+add_header_library(
+  float_utils
+  HDRS
+    float.h
+)
+
+# TODO(sivachandra): Remove the dependency on __errno_location as the tested
+# entry points depend on the already.
+add_math_unittest(
+  cosf_test
+  NEED_MPFR
+  SUITE
+    libc_math_unittests
+  SRCS
+    cosf_test.cpp
+  HDRS
+    sdcomp26094.h
+  DEPENDS
+    .float_utils
+    libc.src.errno.__errno_location
+    libc.src.math.cosf
+    libc.utils.CPP.standalone_cpp
+)
+
+add_math_unittest(
+  sinf_test
+  NEED_MPFR
+  SUITE
+    libc_math_unittests
+  SRCS
+    sinf_test.cpp
+  HDRS
+    sdcomp26094.h
+  DEPENDS
+    .float_utils
+    libc.src.errno.__errno_location
+    libc.src.math.sinf
+    libc.utils.CPP.standalone_cpp
+)
+
+add_math_unittest(
+  sincosf_test
+  NEED_MPFR
+  SUITE
+    libc_math_unittests
+  SRCS
+    sincosf_test.cpp
+  HDRS
+    sdcomp26094.h
+  DEPENDS
+    .float_utils
+    libc.src.errno.__errno_location
+    libc.src.math.sincosf
+    libc.utils.CPP.standalone_cpp
+)
diff --git a/libc/test/src/math/cosf_test.cpp b/libc/test/src/math/cosf_test.cpp
new file mode 100644
index 000000000000..94c66cda1b0f
--- /dev/null
+++ b/libc/test/src/math/cosf_test.cpp
@@ -0,0 +1,103 @@
+//===-- Unittests for cosf ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "include/math.h"
+#include "src/errno/llvmlibc_errno.h"
+#include "src/math/cosf.h"
+#include "src/math/math_utils.h"
+#include "test/src/math/float.h"
+#include "test/src/math/sdcomp26094.h"
+#include "utils/CPP/Array.h"
+#include "utils/MPFRWrapper/MPFRUtils.h"
+#include "utils/UnitTest/Test.h"
+
+#include <stdint.h>
+
+using __llvm_libc::as_float;
+using __llvm_libc::as_uint32_bits;
+
+using __llvm_libc::testing::FloatBits;
+using __llvm_libc::testing::sdcomp26094Values;
+
+namespace mpfr = __llvm_libc::testing::mpfr;
+
+// 12 additional bits of precision over the base precision of a |float|
+// value.
+static constexpr mpfr::Tolerance tolerance{mpfr::Tolerance::floatPrecision, 12,
+                                           3 * 0x1000 / 4};
+
+TEST(CosfTest, SpecialNumbers) {
+  llvmlibc_errno = 0;
+
+  EXPECT_TRUE(FloatBits::isQNan(
+      as_uint32_bits(__llvm_libc::cosf(as_float(FloatBits::QNan)))));
+  EXPECT_EQ(llvmlibc_errno, 0);
+
+  EXPECT_TRUE(FloatBits::isNegQNan(
+      as_uint32_bits(__llvm_libc::cosf(as_float(FloatBits::NegQNan)))));
+  EXPECT_EQ(llvmlibc_errno, 0);
+
+  EXPECT_TRUE(FloatBits::isQNan(
+      as_uint32_bits(__llvm_libc::cosf(as_float(FloatBits::SNan)))));
+  EXPECT_EQ(llvmlibc_errno, 0);
+
+  EXPECT_TRUE(FloatBits::isNegQNan(
+      as_uint32_bits(__llvm_libc::cosf(as_float(FloatBits::NegSNan)))));
+  EXPECT_EQ(llvmlibc_errno, 0);
+
+  EXPECT_EQ(FloatBits::One,
+            as_uint32_bits(__llvm_libc::cosf(as_float(FloatBits::Zero))));
+  EXPECT_EQ(llvmlibc_errno, 0);
+
+  EXPECT_EQ(FloatBits::One,
+            as_uint32_bits(__llvm_libc::cosf(as_float(FloatBits::NegZero))));
+  EXPECT_EQ(llvmlibc_errno, 0);
+
+  llvmlibc_errno = 0;
+  EXPECT_TRUE(FloatBits::isQNan(
+      as_uint32_bits(__llvm_libc::cosf(as_float(FloatBits::Inf)))));
+  EXPECT_EQ(llvmlibc_errno, EDOM);
+
+  llvmlibc_errno = 0;
+  EXPECT_TRUE(FloatBits::isNegQNan(
+      as_uint32_bits(__llvm_libc::cosf(as_float(FloatBits::NegInf)))));
+  EXPECT_EQ(llvmlibc_errno, EDOM);
+}
+
+TEST(CosfTest, InFloatRange) {
+  constexpr uint32_t count = 1000000;
+  constexpr uint32_t step = UINT32_MAX / count;
+  for (uint32_t i = 0, v = 0; i <= count; ++i, v += step) {
+    float x = as_float(v);
+    if (isnan(x) || isinf(x))
+      continue;
+    EXPECT_TRUE(mpfr::equalsCos(x, __llvm_libc::cosf(x), tolerance));
+  }
+}
+
+// For small values, cos(x) is 1.
+TEST(CosfTest, SmallValues) {
+  float x = as_float(0x17800000);
+  float result = __llvm_libc::cosf(x);
+  EXPECT_TRUE(mpfr::equalsCos(x, result, tolerance));
+  EXPECT_EQ(FloatBits::One, as_uint32_bits(result));
+
+  x = as_float(0x00400000);
+  result = __llvm_libc::cosf(x);
+  EXPECT_TRUE(mpfr::equalsCos(x, result, tolerance));
+  EXPECT_EQ(FloatBits::One, as_uint32_bits(result));
+}
+
+// SDCOMP-26094: check cosf in the cases for which the range reducer
+// returns values furthest beyond its nominal upper bound of pi/4.
+TEST(CosfTest, SDCOMP_26094) {
+  for (uint32_t v : sdcomp26094Values) {
+    float x = as_float(v);
+    EXPECT_TRUE(mpfr::equalsCos(x, __llvm_libc::cosf(x), tolerance));
+  }
+}
diff --git a/libc/test/src/math/float.h b/libc/test/src/math/float.h
new file mode 100644
index 000000000000..bfa15a18ce3f
--- /dev/null
+++ b/libc/test/src/math/float.h
@@ -0,0 +1,49 @@
+//===-- Single precision floating point test utils --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_TEST_SRC_MATH_FLOAT_H
+#define LLVM_LIBC_TEST_SRC_MATH_FLOAT_H
+
+#include "src/math/math_utils.h"
+
+namespace __llvm_libc {
+namespace testing {
+
+struct FloatBits {
+  // The various NaN bit patterns here are just one of the many possible
+  // patterns. The functions isQNan and isNegQNan can help understand why.
+
+  static const uint32_t QNan = 0x7fc00000;
+  static const uint32_t NegQNan = 0xffc00000;
+
+  static const uint32_t SNan = 0x7f800001;
+  static const uint32_t NegSNan = 0xff800001;
+
+  static bool isQNan(float f) {
+    uint32_t bits = as_uint32_bits(f);
+    return ((0x7fc00000 & bits) != 0) && ((0x80000000 & bits) == 0);
+  }
+
+  static bool isNegQNan(float f) {
+    uint32_t bits = as_uint32_bits(f);
+    return 0xffc00000 & bits;
+  }
+
+  static constexpr uint32_t Zero = 0x0;
+  static constexpr uint32_t NegZero = 0x80000000;
+
+  static constexpr uint32_t Inf = 0x7f800000;
+  static constexpr uint32_t NegInf = 0xff800000;
+
+  static constexpr uint32_t One = 0x3f800000;
+};
+
+} // namespace testing
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_TEST_SRC_MATH_FLOAT_H
diff --git a/libc/test/src/math/sdcomp26094.h b/libc/test/src/math/sdcomp26094.h
new file mode 100644
index 000000000000..7ebcd60e4af2
--- /dev/null
+++ b/libc/test/src/math/sdcomp26094.h
@@ -0,0 +1,25 @@
+//===-- SDCOMP-26094 specific items -----------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_TEST_SRC_MATH_SDCOMP26094_H
+#define LLVM_LIBC_TEST_SRC_MATH_SDCOMP26094_H
+
+#include "utils/CPP/Array.h"
+
+namespace __llvm_libc {
+namespace testing {
+
+static constexpr __llvm_libc::cpp::Array<uint32_t, 10> sdcomp26094Values{
+    0x46427f1b, 0x4647e568, 0x46428bac, 0x4647f1f9, 0x4647fe8a,
+    0x45d8d7f1, 0x45d371a4, 0x45ce0b57, 0x45d35882, 0x45cdf235,
+};
+
+} // namespace testing
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_TEST_SRC_MATH_SDCOMP26094_H
diff --git a/libc/test/src/math/sincosf_test.cpp b/libc/test/src/math/sincosf_test.cpp
new file mode 100644
index 000000000000..36e6b4a129a7
--- /dev/null
+++ b/libc/test/src/math/sincosf_test.cpp
@@ -0,0 +1,125 @@
+//===-- Unittests for sincosf ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "include/math.h"
+#include "src/errno/llvmlibc_errno.h"
+#include "src/math/math_utils.h"
+#include "src/math/sincosf.h"
+#include "test/src/math/float.h"
+#include "test/src/math/sdcomp26094.h"
+#include "utils/CPP/Array.h"
+#include "utils/MPFRWrapper/MPFRUtils.h"
+#include "utils/UnitTest/Test.h"
+
+#include <stdint.h>
+
+using __llvm_libc::as_float;
+using __llvm_libc::as_uint32_bits;
+
+using __llvm_libc::testing::FloatBits;
+using __llvm_libc::testing::sdcomp26094Values;
+
+namespace mpfr = __llvm_libc::testing::mpfr;
+// 12 additional bits of precision over the base precision of a |float|
+// value.
+static constexpr mpfr::Tolerance tolerance{mpfr::Tolerance::floatPrecision, 12,
+                                           3 * 0x1000 / 4};
+
+TEST(SinCosfTest, SpecialNumbers) {
+  llvmlibc_errno = 0;
+  float sin, cos;
+
+  __llvm_libc::sincosf(as_float(FloatBits::QNan), &sin, &cos);
+  EXPECT_TRUE(FloatBits::isQNan(as_uint32_bits(cos)));
+  EXPECT_TRUE(FloatBits::isQNan(as_uint32_bits(sin)));
+  EXPECT_EQ(llvmlibc_errno, 0);
+
+  __llvm_libc::sincosf(as_float(FloatBits::NegQNan), &sin, &cos);
+  EXPECT_TRUE(FloatBits::isNegQNan(as_uint32_bits(cos)));
+  EXPECT_TRUE(FloatBits::isNegQNan(as_uint32_bits(sin)));
+  EXPECT_EQ(llvmlibc_errno, 0);
+
+  __llvm_libc::sincosf(as_float(FloatBits::SNan), &sin, &cos);
+  EXPECT_TRUE(FloatBits::isQNan(as_uint32_bits(cos)));
+  EXPECT_TRUE(FloatBits::isQNan(as_uint32_bits(sin)));
+  EXPECT_EQ(llvmlibc_errno, 0);
+
+  __llvm_libc::sincosf(as_float(FloatBits::NegSNan), &sin, &cos);
+  EXPECT_TRUE(FloatBits::isNegQNan(as_uint32_bits(cos)));
+  EXPECT_TRUE(FloatBits::isNegQNan(as_uint32_bits(sin)));
+  EXPECT_EQ(llvmlibc_errno, 0);
+
+  __llvm_libc::sincosf(as_float(FloatBits::Zero), &sin, &cos);
+  EXPECT_EQ(FloatBits::One, as_uint32_bits(cos));
+  EXPECT_EQ(FloatBits::Zero, as_uint32_bits(sin));
+  EXPECT_EQ(llvmlibc_errno, 0);
+
+  __llvm_libc::sincosf(as_float(FloatBits::NegZero), &sin, &cos);
+  EXPECT_EQ(FloatBits::One, as_uint32_bits(cos));
+  EXPECT_EQ(FloatBits::NegZero, as_uint32_bits(sin));
+  EXPECT_EQ(llvmlibc_errno, 0);
+
+  llvmlibc_errno = 0;
+  __llvm_libc::sincosf(as_float(FloatBits::Inf), &sin, &cos);
+  EXPECT_TRUE(FloatBits::isQNan(as_uint32_bits(cos)));
+  EXPECT_TRUE(FloatBits::isQNan(as_uint32_bits(sin)));
+  EXPECT_EQ(llvmlibc_errno, EDOM);
+
+  llvmlibc_errno = 0;
+  __llvm_libc::sincosf(as_float(FloatBits::NegInf), &sin, &cos);
+  EXPECT_TRUE(FloatBits::isQNan(as_uint32_bits(cos)));
+  EXPECT_TRUE(FloatBits::isQNan(as_uint32_bits(sin)));
+  EXPECT_EQ(llvmlibc_errno, EDOM);
+}
+
+TEST(SinCosfTest, InFloatRange) {
+  constexpr uint32_t count = 1000000;
+  constexpr uint32_t step = UINT32_MAX / count;
+  for (uint32_t i = 0, v = 0; i <= count; ++i, v += step) {
+    float x = as_float(v);
+    if (isnan(x) || isinf(x))
+      continue;
+
+    float sin, cos;
+    __llvm_libc::sincosf(x, &sin, &cos);
+    EXPECT_TRUE(mpfr::equalsCos(x, cos, tolerance));
+    EXPECT_TRUE(mpfr::equalsSin(x, sin, tolerance));
+  }
+}
+
+// For small values, cos(x) is 1 and sin(x) is x.
+TEST(SinCosfTest, SmallValues) {
+  uint32_t bits = 0x17800000;
+  float x = as_float(bits);
+  float result_cos, result_sin;
+  __llvm_libc::sincosf(x, &result_sin, &result_cos);
+  EXPECT_TRUE(mpfr::equalsCos(x, result_cos, tolerance));
+  EXPECT_TRUE(mpfr::equalsSin(x, result_sin, tolerance));
+  EXPECT_EQ(FloatBits::One, as_uint32_bits(result_cos));
+  EXPECT_EQ(bits, as_uint32_bits(result_sin));
+
+  bits = 0x00400000;
+  x = as_float(bits);
+  __llvm_libc::sincosf(x, &result_sin, &result_cos);
+  EXPECT_TRUE(mpfr::equalsCos(x, result_cos, tolerance));
+  EXPECT_TRUE(mpfr::equalsSin(x, result_sin, tolerance));
+  EXPECT_EQ(FloatBits::One, as_uint32_bits(result_cos));
+  EXPECT_EQ(bits, as_uint32_bits(result_sin));
+}
+
+// SDCOMP-26094: check sinf in the cases for which the range reducer
+// returns values furthest beyond its nominal upper bound of pi/4.
+TEST(SinCosfTest, SDCOMP_26094) {
+  for (uint32_t v : sdcomp26094Values) {
+    float x = as_float(v);
+    float sin, cos;
+    __llvm_libc::sincosf(x, &sin, &cos);
+    EXPECT_TRUE(mpfr::equalsCos(x, cos, tolerance));
+    EXPECT_TRUE(mpfr::equalsSin(x, sin, tolerance));
+  }
+}
diff --git a/libc/test/src/math/sinf_test.cpp b/libc/test/src/math/sinf_test.cpp
new file mode 100644
index 000000000000..e4c6e818b57a
--- /dev/null
+++ b/libc/test/src/math/sinf_test.cpp
@@ -0,0 +1,110 @@
+//===-- Unittests for sinf ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "include/math.h"
+#include "src/errno/llvmlibc_errno.h"
+#include "src/math/math_utils.h"
+#include "src/math/sinf.h"
+#include "test/src/math/float.h"
+#include "test/src/math/sdcomp26094.h"
+#include "utils/CPP/Array.h"
+#include "utils/MPFRWrapper/MPFRUtils.h"
+#include "utils/UnitTest/Test.h"
+
+#include <stdint.h>
+
+using __llvm_libc::as_float;
+using __llvm_libc::as_uint32_bits;
+
+using __llvm_libc::testing::FloatBits;
+using __llvm_libc::testing::sdcomp26094Values;
+
+namespace mpfr = __llvm_libc::testing::mpfr;
+
+// 12 additional bits of precision over the base precision of a |float|
+// value.
+static constexpr mpfr::Tolerance tolerance{mpfr::Tolerance::floatPrecision, 12,
+                                           3 * 0x1000 / 4};
+
+TEST(SinfTest, SpecialNumbers) {
+  llvmlibc_errno = 0;
+
+  EXPECT_TRUE(FloatBits::isQNan(
+      as_uint32_bits(__llvm_libc::sinf(as_float(FloatBits::QNan)))));
+  EXPECT_EQ(llvmlibc_errno, 0);
+
+  EXPECT_TRUE(FloatBits::isNegQNan(
+      as_uint32_bits(__llvm_libc::sinf(as_float(FloatBits::NegQNan)))));
+  EXPECT_EQ(llvmlibc_errno, 0);
+
+  EXPECT_TRUE(FloatBits::isQNan(
+      as_uint32_bits(__llvm_libc::sinf(as_float(FloatBits::SNan)))));
+  EXPECT_EQ(llvmlibc_errno, 0);
+
+  EXPECT_TRUE(FloatBits::isNegQNan(
+      as_uint32_bits(__llvm_libc::sinf(as_float(FloatBits::NegSNan)))));
+  EXPECT_EQ(llvmlibc_errno, 0);
+
+  EXPECT_EQ(FloatBits::Zero,
+            as_uint32_bits(__llvm_libc::sinf(as_float(FloatBits::Zero))));
+  EXPECT_EQ(llvmlibc_errno, 0);
+
+  EXPECT_EQ(FloatBits::NegZero,
+            as_uint32_bits(__llvm_libc::sinf(as_float(FloatBits::NegZero))));
+  EXPECT_EQ(llvmlibc_errno, 0);
+
+  llvmlibc_errno = 0;
+  EXPECT_TRUE(FloatBits::isQNan(
+      as_uint32_bits(__llvm_libc::sinf(as_float(FloatBits::Inf)))));
+  EXPECT_EQ(llvmlibc_errno, EDOM);
+
+  llvmlibc_errno = 0;
+  EXPECT_TRUE(FloatBits::isNegQNan(
+      as_uint32_bits(__llvm_libc::sinf(as_float(FloatBits::NegInf)))));
+  EXPECT_EQ(llvmlibc_errno, EDOM);
+}
+
+TEST(SinfTest, InFloatRange) {
+  constexpr uint32_t count = 1000000;
+  constexpr uint32_t step = UINT32_MAX / count;
+  for (uint32_t i = 0, v = 0; i <= count; ++i, v += step) {
+    float x = as_float(v);
+    if (isnan(x) || isinf(x))
+      continue;
+    EXPECT_TRUE(mpfr::equalsSin(x, __llvm_libc::sinf(x), tolerance));
+  }
+}
+
+TEST(SinfTest, SpecificBitPatterns) {
+  float x = as_float(0xc70d39a1);
+  EXPECT_TRUE(mpfr::equalsSin(x, __llvm_libc::sinf(x), tolerance));
+}
+
+// For small values, sin(x) is x.
+TEST(SinfTest, SmallValues) {
+  uint32_t bits = 0x17800000;
+  float x = as_float(bits);
+  float result = __llvm_libc::sinf(x);
+  EXPECT_TRUE(mpfr::equalsSin(x, result, tolerance));
+  EXPECT_EQ(bits, as_uint32_bits(result));
+
+  bits = 0x00400000;
+  x = as_float(bits);
+  result = __llvm_libc::sinf(x);
+  EXPECT_TRUE(mpfr::equalsSin(x, result, tolerance));
+  EXPECT_EQ(bits, as_uint32_bits(result));
+}
+
+// SDCOMP-26094: check sinf in the cases for which the range reducer
+// returns values furthest beyond its nominal upper bound of pi/4.
+TEST(SinfTest, SDCOMP_26094) {
+  for (uint32_t v : sdcomp26094Values) {
+    float x = as_float(v);
+    EXPECT_TRUE(mpfr::equalsSin(x, __llvm_libc::sinf(x), tolerance));
+  }
+}
diff --git a/libc/utils/CMakeLists.txt b/libc/utils/CMakeLists.txt
index e0aa20e19a3e..1e85d05507cc 100644
--- a/libc/utils/CMakeLists.txt
+++ b/libc/utils/CMakeLists.txt
@@ -1,5 +1,6 @@
 add_subdirectory(CPP)
 add_subdirectory(HdrGen)
+add_subdirectory(MPFRWrapper)
 add_subdirectory(testutils)
 add_subdirectory(UnitTest)
 add_subdirectory(benchmarks)
diff --git a/libc/utils/MPFRWrapper/CMakeLists.txt b/libc/utils/MPFRWrapper/CMakeLists.txt
new file mode 100644
index 000000000000..aa03724aee46
--- /dev/null
+++ b/libc/utils/MPFRWrapper/CMakeLists.txt
@@ -0,0 +1,17 @@
+try_compile(
+  LIBC_TESTS_CAN_USE_MPFR
+  ${CMAKE_CURRENT_BINARY_DIR}
+  SOURCES
+    ${CMAKE_CURRENT_SOURCE_DIR}/check_mpfr.cpp
+  LINK_LIBRARIES
+    -lmpfr -lgmp
+)
+
+if(LIBC_TESTS_CAN_USE_MPFR)
+  add_library(libcMPFRWrapper
+    MPFRUtils.cpp
+    MPFRUtils.h
+  )
+else()
+  message(WARNING "Math tests using MPFR will be skipped.")
+endif()
diff --git a/libc/utils/MPFRWrapper/MPFRUtils.cpp b/libc/utils/MPFRWrapper/MPFRUtils.cpp
new file mode 100644
index 000000000000..7bd849934fc7
--- /dev/null
+++ b/libc/utils/MPFRWrapper/MPFRUtils.cpp
@@ -0,0 +1,97 @@
+//===-- Utils which wrap MPFR ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "MPFRUtils.h"
+
+#include <iostream>
+#include <mpfr.h>
+
+namespace __llvm_libc {
+namespace testing {
+namespace mpfr {
+
+class MPFRNumber {
+  // A precision value which allows sufficiently large additional
+  // precision even compared to double precision floating point values.
+  static constexpr unsigned int mpfrPrecision = 96;
+
+  mpfr_t value;
+
+public:
+  MPFRNumber() { mpfr_init2(value, mpfrPrecision); }
+
+  explicit MPFRNumber(float x) {
+    mpfr_init2(value, mpfrPrecision);
+    mpfr_set_flt(value, x, MPFR_RNDN);
+  }
+
+  MPFRNumber(const MPFRNumber &other) {
+    mpfr_set(value, other.value, MPFR_RNDN);
+  }
+
+  ~MPFRNumber() { mpfr_clear(value); }
+
+  // Returns true if |other| is within the tolerance value |t| of this
+  // number.
+  bool isEqual(const MPFRNumber &other, const Tolerance &t) {
+    MPFRNumber tolerance(0.0);
+    uint32_t bitMask = 1 << (t.width - 1);
+    for (int exponent = -t.basePrecision; bitMask > 0; bitMask >>= 1) {
+      --exponent;
+      if (t.bits & bitMask) {
+        MPFRNumber delta;
+        mpfr_set_ui_2exp(delta.value, 1, exponent, MPFR_RNDN);
+        mpfr_add(tolerance.value, tolerance.value, delta.value, MPFR_RNDN);
+      }
+    }
+
+    MPFRNumber difference;
+    if (mpfr_cmp(value, other.value) >= 0)
+      mpfr_sub(difference.value, value, other.value, MPFR_RNDN);
+    else
+      mpfr_sub(difference.value, other.value, value, MPFR_RNDN);
+
+    return mpfr_lessequal_p(difference.value, tolerance.value);
+  }
+
+  // These functions are useful for debugging.
+  float asFloat() const { return mpfr_get_flt(value, MPFR_RNDN); }
+  double asDouble() const { return mpfr_get_d(value, MPFR_RNDN); }
+  void dump(const char *msg) const { mpfr_printf("%s%.128Rf\n", msg, value); }
+
+public:
+  static MPFRNumber cos(float x) {
+    MPFRNumber result;
+    MPFRNumber mpfrX(x);
+    mpfr_cos(result.value, mpfrX.value, MPFR_RNDN);
+    return result;
+  }
+
+  static MPFRNumber sin(float x) {
+    MPFRNumber result;
+    MPFRNumber mpfrX(x);
+    mpfr_sin(result.value, mpfrX.value, MPFR_RNDN);
+    return result;
+  }
+};
+
+bool equalsCos(float input, float libcOutput, const Tolerance &t) {
+  MPFRNumber mpfrResult = MPFRNumber::cos(input);
+  MPFRNumber libcResult(libcOutput);
+  return mpfrResult.isEqual(libcResult, t);
+}
+
+bool equalsSin(float input, float libcOutput, const Tolerance &t) {
+  MPFRNumber mpfrResult = MPFRNumber::sin(input);
+  MPFRNumber libcResult(libcOutput);
+  return mpfrResult.isEqual(libcResult, t);
+}
+
+} // namespace mpfr
+} // namespace testing
+} // namespace __llvm_libc
diff --git a/libc/utils/MPFRWrapper/MPFRUtils.h b/libc/utils/MPFRWrapper/MPFRUtils.h
new file mode 100644
index 000000000000..9f56ccc61fe6
--- /dev/null
+++ b/libc/utils/MPFRWrapper/MPFRUtils.h
@@ -0,0 +1,51 @@
+//===-- MPFRUtils.h ---------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_UTILS_TESTUTILS_MPFRUTILS_H
+#define LLVM_LIBC_UTILS_TESTUTILS_MPFRUTILS_H
+
+#include <stdint.h>
+
+namespace __llvm_libc {
+namespace testing {
+namespace mpfr {
+
+struct Tolerance {
+  // Number of bits used to represent the fractional
+  // part of a value of type 'float'.
+  static constexpr unsigned int floatPrecision = 23;
+
+  // Number of bits used to represent the fractional
+  // part of a value of type 'double'.
+  static constexpr unsigned int doublePrecision = 52;
+
+  // The base precision of the number. For example, for values of
+  // type float, the base precision is the value |floatPrecision|.
+  unsigned int basePrecision;
+
+  unsigned int width; // Number of valid LSB bits in |value|.
+
+  // The bits in the tolerance value. The tolerance value will be
+  // sum(bits[width - i] * 2 ^ (- basePrecision - i)) for |i| in
+  // range [1, width].
+  uint32_t bits;
+};
+
+// Return true if |libcOutput| is within the tolerance |t| of the cos(x)
+// value as evaluated by MPFR.
+bool equalsCos(float x, float libcOutput, const Tolerance &t);
+
+// Return true if |libcOutput| is within the tolerance |t| of the sin(x)
+// value as evaluated by MPFR.
+bool equalsSin(float x, float libcOutput, const Tolerance &t);
+
+} // namespace mpfr
+} // namespace testing
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_UTILS_TESTUTILS_MPFRUTILS_H
diff --git a/libc/utils/MPFRWrapper/check_mpfr.cpp b/libc/utils/MPFRWrapper/check_mpfr.cpp
new file mode 100644
index 000000000000..6e6282457960
--- /dev/null
+++ b/libc/utils/MPFRWrapper/check_mpfr.cpp
@@ -0,0 +1,8 @@
+#include <mpfr.h>
+
+int main() {
+  mpfr_t x;
+  mpfr_init(x);
+  mpfr_clear(x);
+  return 0;
+}

From 8812b0cc5cc09f350d8e89bff99f185c5e1a5d4d Mon Sep 17 00:00:00 2001
From: Melanie Blower <melanie.blower@intel.com>
Date: Thu, 16 Apr 2020 08:45:26 -0700
Subject: [PATCH 054/216] [NFC] Rename Sema.FPFeatures to CurFPFeatures and
 accessor to getCurFPFeatures

---
 clang/include/clang/Sema/Sema.h       |  8 ++++----
 clang/lib/Sema/Sema.cpp               |  2 +-
 clang/lib/Sema/SemaAttr.cpp           | 14 +++++++-------
 clang/lib/Sema/SemaDeclCXX.cpp        |  2 +-
 clang/lib/Sema/SemaExpr.cpp           |  8 ++++----
 clang/lib/Sema/SemaOverload.cpp       | 24 ++++++++++++------------
 clang/lib/Sema/SemaPseudoObject.cpp   | 10 +++++-----
 clang/lib/Sema/TreeTransform.h        |  6 +++---
 clang/lib/Serialization/ASTReader.cpp |  2 +-
 clang/lib/Serialization/ASTWriter.cpp |  2 +-
 10 files changed, 39 insertions(+), 39 deletions(-)

diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index 889f4ee9031e..a1a0b854a85b 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -392,7 +392,7 @@ class Sema final {
   typedef OpaquePtr<QualType> TypeTy;
 
   OpenCLOptions OpenCLFeatures;
-  FPOptions FPFeatures;
+  FPOptions CurFPFeatures;
 
   const LangOptions &LangOpts;
   Preprocessor &PP;
@@ -1354,8 +1354,8 @@ class Sema final {
   /// statements.
   class FPFeaturesStateRAII {
   public:
-    FPFeaturesStateRAII(Sema &S) : S(S), OldFPFeaturesState(S.FPFeatures) {}
-    ~FPFeaturesStateRAII() { S.FPFeatures = OldFPFeaturesState; }
+    FPFeaturesStateRAII(Sema &S) : S(S), OldFPFeaturesState(S.CurFPFeatures) {}
+    ~FPFeaturesStateRAII() { S.CurFPFeatures = OldFPFeaturesState; }
 
   private:
     Sema& S;
@@ -1378,7 +1378,7 @@ class Sema final {
 
   const LangOptions &getLangOpts() const { return LangOpts; }
   OpenCLOptions &getOpenCLOptions() { return OpenCLFeatures; }
-  FPOptions     &getFPOptions() { return FPFeatures; }
+  FPOptions     &getCurFPFeatures() { return CurFPFeatures; }
 
   DiagnosticsEngine &getDiagnostics() const { return Diags; }
   SourceManager &getSourceManager() const { return SourceMgr; }
diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp
index 5e5a90ad0143..405b6c33d280 100644
--- a/clang/lib/Sema/Sema.cpp
+++ b/clang/lib/Sema/Sema.cpp
@@ -150,7 +150,7 @@ const unsigned Sema::MaximumAlignment;
 Sema::Sema(Preprocessor &pp, ASTContext &ctxt, ASTConsumer &consumer,
            TranslationUnitKind TUKind, CodeCompleteConsumer *CodeCompleter)
     : ExternalSource(nullptr), isMultiplexExternalSource(false),
-      FPFeatures(pp.getLangOpts()), LangOpts(pp.getLangOpts()), PP(pp),
+      CurFPFeatures(pp.getLangOpts()), LangOpts(pp.getLangOpts()), PP(pp),
       Context(ctxt), Consumer(consumer), Diags(PP.getDiagnostics()),
       SourceMgr(PP.getSourceManager()), CollectStats(false),
       CodeCompleter(CodeCompleter), CurContext(nullptr),
diff --git a/clang/lib/Sema/SemaAttr.cpp b/clang/lib/Sema/SemaAttr.cpp
index 9141a28381ad..8633581b6880 100644
--- a/clang/lib/Sema/SemaAttr.cpp
+++ b/clang/lib/Sema/SemaAttr.cpp
@@ -929,32 +929,32 @@ void Sema::ActOnPragmaVisibility(const IdentifierInfo* VisType,
 void Sema::ActOnPragmaFPContract(LangOptions::FPContractModeKind FPC) {
   switch (FPC) {
   case LangOptions::FPC_On:
-    FPFeatures.setAllowFPContractWithinStatement();
+    CurFPFeatures.setAllowFPContractWithinStatement();
     break;
   case LangOptions::FPC_Fast:
-    FPFeatures.setAllowFPContractAcrossStatement();
+    CurFPFeatures.setAllowFPContractAcrossStatement();
     break;
   case LangOptions::FPC_Off:
-    FPFeatures.setDisallowFPContract();
+    CurFPFeatures.setDisallowFPContract();
     break;
   }
 }
 
 void Sema::setRoundingMode(llvm::RoundingMode FPR) {
-  FPFeatures.setRoundingMode(FPR);
+  CurFPFeatures.setRoundingMode(FPR);
 }
 
 void Sema::setExceptionMode(LangOptions::FPExceptionModeKind FPE) {
-  FPFeatures.setExceptionMode(FPE);
+  CurFPFeatures.setExceptionMode(FPE);
 }
 
 void Sema::ActOnPragmaFEnvAccess(LangOptions::FEnvAccessModeKind FPC) {
   switch (FPC) {
   case LangOptions::FEA_On:
-    FPFeatures.setAllowFEnvAccess();
+    CurFPFeatures.setAllowFEnvAccess();
     break;
   case LangOptions::FEA_Off:
-    FPFeatures.setDisallowFEnvAccess();
+    CurFPFeatures.setDisallowFEnvAccess();
     break;
   }
 }
diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index 381db055afae..8f04f5ae5dea 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -13698,7 +13698,7 @@ buildSingleCopyAssignRecursively(Sema &S, SourceLocation Loc, QualType T,
   Expr *Comparison = BinaryOperator::Create(
       S.Context, IterationVarRefRVal.build(S, Loc),
       IntegerLiteral::Create(S.Context, Upper, SizeType, Loc), BO_NE,
-      S.Context.BoolTy, VK_RValue, OK_Ordinary, Loc, S.FPFeatures);
+      S.Context.BoolTy, VK_RValue, OK_Ordinary, Loc, S.CurFPFeatures);
 
   // Create the pre-increment of the iteration variable. We can determine
   // whether the increment will overflow based on the value of the array
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 141c4b1fea1d..60d99db7ced9 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -13693,9 +13693,9 @@ ExprResult Sema::CreateBuiltinBinOp(SourceLocation OpLoc,
   if (CompResultTy.isNull()) {
     if (ConvertHalfVec)
       return convertHalfVecBinOp(*this, LHS, RHS, Opc, ResultTy, VK, OK, false,
-                                 OpLoc, FPFeatures);
+                                 OpLoc, CurFPFeatures);
     return BinaryOperator::Create(Context, LHS.get(), RHS.get(), Opc, ResultTy,
-                                  VK, OK, OpLoc, FPFeatures);
+                                  VK, OK, OpLoc, CurFPFeatures);
   }
 
   // Handle compound assignments.
@@ -13707,10 +13707,10 @@ ExprResult Sema::CreateBuiltinBinOp(SourceLocation OpLoc,
 
   if (ConvertHalfVec)
     return convertHalfVecBinOp(*this, LHS, RHS, Opc, ResultTy, VK, OK, true,
-                               OpLoc, FPFeatures);
+                               OpLoc, CurFPFeatures);
 
   return CompoundAssignOperator::Create(Context, LHS.get(), RHS.get(), Opc,
-                                        ResultTy, VK, OK, OpLoc, FPFeatures,
+                                        ResultTy, VK, OK, OpLoc, CurFPFeatures,
                                         CompLHSTy, CompResultTy);
 }
 
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 805bf667e861..2594e68a48b2 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -12975,7 +12975,7 @@ Sema::CreateOverloadedUnaryOp(SourceLocation OpLoc, UnaryOperatorKind Opc,
         /*ADL*/ true, IsOverloaded(Fns), Fns.begin(), Fns.end());
     return CXXOperatorCallExpr::Create(Context, Op, Fn, ArgsArray,
                                        Context.DependentTy, VK_RValue, OpLoc,
-                                       FPFeatures);
+                                       CurFPFeatures);
   }
 
   // Build an empty overload set.
@@ -13048,8 +13048,8 @@ Sema::CreateOverloadedUnaryOp(SourceLocation OpLoc, UnaryOperatorKind Opc,
 
       Args[0] = Input;
       CallExpr *TheCall = CXXOperatorCallExpr::Create(
-          Context, Op, FnExpr.get(), ArgsArray, ResultTy, VK, OpLoc, FPFeatures,
-          Best->IsADLCandidate);
+          Context, Op, FnExpr.get(), ArgsArray, ResultTy, VK, OpLoc,
+          CurFPFeatures, Best->IsADLCandidate);
 
       if (CheckCallReturnType(FnDecl->getReturnType(), OpLoc, TheCall, FnDecl))
         return ExprError();
@@ -13220,10 +13220,10 @@ ExprResult Sema::CreateOverloadedBinOp(SourceLocation OpLoc,
       if (Opc <= BO_Assign || Opc > BO_OrAssign)
         return BinaryOperator::Create(Context, Args[0], Args[1], Opc,
                                       Context.DependentTy, VK_RValue,
-                                      OK_Ordinary, OpLoc, FPFeatures);
+                                      OK_Ordinary, OpLoc, CurFPFeatures);
       return CompoundAssignOperator::Create(
           Context, Args[0], Args[1], Opc, Context.DependentTy, VK_LValue,
-          OK_Ordinary, OpLoc, FPFeatures, Context.DependentTy,
+          OK_Ordinary, OpLoc, CurFPFeatures, Context.DependentTy,
           Context.DependentTy);
     }
 
@@ -13237,7 +13237,7 @@ ExprResult Sema::CreateOverloadedBinOp(SourceLocation OpLoc,
         /*ADL*/ PerformADL, IsOverloaded(Fns), Fns.begin(), Fns.end());
     return CXXOperatorCallExpr::Create(Context, Op, Fn, Args,
                                        Context.DependentTy, VK_RValue, OpLoc,
-                                       FPFeatures);
+                                       CurFPFeatures);
   }
 
   // Always do placeholder-like conversions on the RHS.
@@ -13406,7 +13406,7 @@ ExprResult Sema::CreateOverloadedBinOp(SourceLocation OpLoc,
 
         CXXOperatorCallExpr *TheCall = CXXOperatorCallExpr::Create(
             Context, ChosenOp, FnExpr.get(), Args, ResultTy, VK, OpLoc,
-            FPFeatures, Best->IsADLCandidate);
+            CurFPFeatures, Best->IsADLCandidate);
 
         if (CheckCallReturnType(FnDecl->getReturnType(), OpLoc, TheCall,
                                 FnDecl))
@@ -13674,7 +13674,7 @@ ExprResult Sema::BuildSynthesizedThreeWayComparison(
   Expr *SyntacticForm = BinaryOperator::Create(
       Context, OrigLHS, OrigRHS, BO_Cmp, Result.get()->getType(),
       Result.get()->getValueKind(), Result.get()->getObjectKind(), OpLoc,
-      FPFeatures);
+      CurFPFeatures);
   Expr *SemanticForm[] = {LHS, RHS, Result.get()};
   return PseudoObjectExpr::Create(Context, SyntacticForm, SemanticForm, 2);
 }
@@ -13705,7 +13705,7 @@ Sema::CreateOverloadedArraySubscriptExpr(SourceLocation LLoc,
 
     return CXXOperatorCallExpr::Create(Context, OO_Subscript, Fn, Args,
                                        Context.DependentTy, VK_RValue, RLoc,
-                                       FPFeatures);
+                                       CurFPFeatures);
   }
 
   // Handle placeholders on both operands.
@@ -13780,7 +13780,7 @@ Sema::CreateOverloadedArraySubscriptExpr(SourceLocation LLoc,
 
         CXXOperatorCallExpr *TheCall =
             CXXOperatorCallExpr::Create(Context, OO_Subscript, FnExpr.get(),
-                                        Args, ResultTy, VK, RLoc, FPFeatures);
+                                        Args, ResultTy, VK, RLoc, CurFPFeatures);
         if (CheckCallReturnType(FnDecl->getReturnType(), LLoc, TheCall, FnDecl))
           return ExprError();
 
@@ -14403,7 +14403,7 @@ Sema::BuildCallToObjectOfClassType(Scope *S, Expr *Obj,
 
   CXXOperatorCallExpr *TheCall =
       CXXOperatorCallExpr::Create(Context, OO_Call, NewFn.get(), MethodArgs,
-                                  ResultTy, VK, RParenLoc, FPFeatures);
+                                  ResultTy, VK, RParenLoc, CurFPFeatures);
 
   if (CheckCallReturnType(Method->getReturnType(), LParenLoc, TheCall, Method))
     return true;
@@ -14520,7 +14520,7 @@ Sema::BuildOverloadedArrowExpr(Scope *S, Expr *Base, SourceLocation OpLoc,
   ExprValueKind VK = Expr::getValueKindForType(ResultTy);
   ResultTy = ResultTy.getNonLValueExprType(Context);
   CXXOperatorCallExpr *TheCall = CXXOperatorCallExpr::Create(
-      Context, OO_Arrow, FnExpr.get(), Base, ResultTy, VK, OpLoc, FPFeatures);
+      Context, OO_Arrow, FnExpr.get(), Base, ResultTy, VK, OpLoc, CurFPFeatures);
 
   if (CheckCallReturnType(Method->getReturnType(), OpLoc, TheCall, Method))
     return ExprError();
diff --git a/clang/lib/Sema/SemaPseudoObject.cpp b/clang/lib/Sema/SemaPseudoObject.cpp
index 74077b33f945..4413b24fa958 100644
--- a/clang/lib/Sema/SemaPseudoObject.cpp
+++ b/clang/lib/Sema/SemaPseudoObject.cpp
@@ -450,7 +450,7 @@ PseudoOpBuilder::buildAssignmentOperation(Scope *Sc, SourceLocation opcLoc,
     result = semanticRHS;
     syntactic = BinaryOperator::Create(
         S.Context, syntacticLHS, capturedRHS, opcode, capturedRHS->getType(),
-        capturedRHS->getValueKind(), OK_Ordinary, opcLoc, S.FPFeatures);
+        capturedRHS->getValueKind(), OK_Ordinary, opcLoc, S.CurFPFeatures);
 
   } else {
     ExprResult opLHS = buildGet();
@@ -464,7 +464,7 @@ PseudoOpBuilder::buildAssignmentOperation(Scope *Sc, SourceLocation opcLoc,
 
     syntactic = CompoundAssignOperator::Create(
         S.Context, syntacticLHS, capturedRHS, opcode, result.get()->getType(),
-        result.get()->getValueKind(), OK_Ordinary, opcLoc, S.FPFeatures,
+        result.get()->getValueKind(), OK_Ordinary, opcLoc, S.CurFPFeatures,
         opLHS.get()->getType(), result.get()->getType());
   }
 
@@ -1583,7 +1583,7 @@ ExprResult Sema::checkPseudoObjectAssignment(Scope *S, SourceLocation opcLoc,
   if (LHS->isTypeDependent() || RHS->isTypeDependent())
     return BinaryOperator::Create(Context, LHS, RHS, opcode,
                                   Context.DependentTy, VK_RValue, OK_Ordinary,
-                                  opcLoc, FPFeatures);
+                                  opcLoc, CurFPFeatures);
 
   // Filter out non-overload placeholder types in the RHS.
   if (RHS->getType()->isNonOverloadPlaceholderType()) {
@@ -1646,7 +1646,7 @@ Expr *Sema::recreateSyntacticForm(PseudoObjectExpr *E) {
     return CompoundAssignOperator::Create(
         Context, lhs, rhs, cop->getOpcode(), cop->getType(),
         cop->getValueKind(), cop->getObjectKind(), cop->getOperatorLoc(),
-        FPFeatures, cop->getComputationLHSType(),
+        CurFPFeatures, cop->getComputationLHSType(),
         cop->getComputationResultType());
 
   } else if (BinaryOperator *bop = dyn_cast<BinaryOperator>(syntax)) {
@@ -1655,7 +1655,7 @@ Expr *Sema::recreateSyntacticForm(PseudoObjectExpr *E) {
     return BinaryOperator::Create(Context, lhs, rhs, bop->getOpcode(),
                                   bop->getType(), bop->getValueKind(),
                                   bop->getObjectKind(), bop->getOperatorLoc(),
-                                  FPFeatures);
+                                  CurFPFeatures);
 
   } else if (isa<CallExpr>(syntax)) {
     return syntax;
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index e79969e9a87a..87b07897ec28 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -10267,7 +10267,7 @@ TreeTransform<Derived>::TransformBinaryOperator(BinaryOperator *E) {
     return getDerived().RebuildBinaryOperator(
         E->getOperatorLoc(), E->getOpcode(), LHS.get(), RHS.get());
   Sema::FPFeaturesStateRAII FPFeaturesState(getSema());
-  getSema().FPFeatures = E->getFPFeatures(getSema().getLangOpts());
+  getSema().CurFPFeatures = E->getFPFeatures(getSema().getLangOpts());
 
   return getDerived().RebuildBinaryOperator(E->getOperatorLoc(), E->getOpcode(),
                                             LHS.get(), RHS.get());
@@ -10322,7 +10322,7 @@ ExprResult
 TreeTransform<Derived>::TransformCompoundAssignOperator(
                                                       CompoundAssignOperator *E) {
   Sema::FPFeaturesStateRAII FPFeaturesState(getSema());
-  getSema().FPFeatures = E->getFPFeatures(getSema().getLangOpts());
+  getSema().CurFPFeatures = E->getFPFeatures(getSema().getLangOpts());
   return getDerived().TransformBinaryOperator(E);
 }
 
@@ -10797,7 +10797,7 @@ TreeTransform<Derived>::TransformCXXOperatorCallExpr(CXXOperatorCallExpr *E) {
     return SemaRef.MaybeBindToTemporary(E);
 
   Sema::FPFeaturesStateRAII FPFeaturesState(getSema());
-  getSema().FPFeatures = E->getFPFeatures();
+  getSema().CurFPFeatures = E->getFPFeatures();
 
   return getDerived().RebuildCXXOperatorCallExpr(E->getOperator(),
                                                  E->getOperatorLoc(),
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index cc69488c4f64..9f662c6e0d4c 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -7773,7 +7773,7 @@ void ASTReader::InitializeSema(Sema &S) {
   // FIXME: What happens if these are changed by a module import?
   if (!FPPragmaOptions.empty()) {
     assert(FPPragmaOptions.size() == 1 && "Wrong number of FP_PRAGMA_OPTIONS");
-    SemaObj->FPFeatures = FPOptions(FPPragmaOptions[0]);
+    SemaObj->CurFPFeatures = FPOptions(FPPragmaOptions[0]);
   }
 
   SemaObj->OpenCLFeatures.copy(OpenCLExtensions);
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 05ec8feffb26..a3ceeef99097 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -4727,7 +4727,7 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot,
   WriteReferencedSelectorsPool(SemaRef);
   WriteLateParsedTemplates(SemaRef);
   WriteIdentifierTable(PP, SemaRef.IdResolver, isModule);
-  WriteFPPragmaOptions(SemaRef.getFPOptions());
+  WriteFPPragmaOptions(SemaRef.getCurFPFeatures());
   WriteOpenCLExtensions(SemaRef);
   WriteOpenCLExtensionTypes(SemaRef);
   WriteCUDAPragmas(SemaRef);

From dfcc403b2d78e9b2e8aec788445b9add5ed0ac7a Mon Sep 17 00:00:00 2001
From: Uday Bondhugula <uday@polymagelabs.com>
Date: Thu, 16 Apr 2020 18:57:32 +0530
Subject: [PATCH 055/216] [MLIR] NFC use Operation::getParentWithTrait in
 alloca verifier

Use recently added accessor Operation::getParentWithTrait in alloca
verifier.

Differential Revision: https://reviews.llvm.org/D78296
---
 mlir/lib/Dialect/StandardOps/IR/Ops.cpp | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp
index 53870e414c19..452dade61e32 100644
--- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp
+++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp
@@ -324,14 +324,11 @@ static LogicalResult verify(AllocLikeOp op) {
     return success();
 
   // An alloca op needs to have an ancestor with an allocation scope trait.
-  auto *parentOp = op.getParentOp();
-  while (parentOp) {
-    if (parentOp->template hasTrait<OpTrait::AutomaticAllocationScope>())
-      return success();
-    parentOp = parentOp->getParentOp();
-  }
-  return op.emitOpError(
-      "requires an ancestor op with AutomaticAllocationScope trait");
+  if (!op.template getParentWithTrait<OpTrait::AutomaticAllocationScope>())
+    return op.emitOpError(
+        "requires an ancestor op with AutomaticAllocationScope trait");
+
+  return success();
 }
 
 namespace {

From a8f85da9f538a400dfea00e4954e403bf5f3269c Mon Sep 17 00:00:00 2001
From: Dmitry Polukhin <dmitry.polukhin@gmail.com>
Date: Thu, 16 Apr 2020 09:05:40 -0700
Subject: [PATCH 056/216] Revert "[clang][AST] Support AST files larger than
 512M"

Bitcode file alignment is only 32-bit so 64-bit offsets need
special handling.
/b/sanitizer-x86_64-linux-fast/build/llvm-project/clang/lib/Serialization/ASTReader.cpp:6327:28: runtime error: load of misaligned address 0x7fca2bcfe54c for type 'const uint64_t' (aka 'const unsigned long'), which requires 8 byte alignment
0x7fca2bcfe54c: note: pointer points here
  00 00 00 00 5a a6 01 00  00 00 00 00 19 a7 01 00  00 00 00 00 48 a7 01 00  00 00 00 00 7d a7 01 00
              ^
    #0 0x3be2fe4 in clang::ASTReader::TypeCursorForIndex(unsigned int) /b/sanitizer-x86_64-linux-fast/build/llvm-project/clang/lib/Serialization/ASTReader.cpp:6327:28
    #1 0x3be30a0 in clang::ASTReader::readTypeRecord(unsigned int) /b/sanitizer-x86_64-linux-fast/build/llvm-project/clang/lib/Serialization/ASTReader.cpp:6348:24
    #2 0x3bd3d4a in clang::ASTReader::GetType(unsigned int) /b/sanitizer-x86_64-linux-fast/build/llvm-project/clang/lib/Serialization/ASTReader.cpp:6985:26
    #3 0x3c5d9ae in clang::ASTDeclReader::Visit(clang::Decl*) /b/sanitizer-x86_64-linux-fast/build/llvm-project/clang/lib/Serialization/ASTReaderDecl.cpp:533:31
    #4 0x3c91cac in clang::ASTReader::ReadDeclRecord(unsigned int) /b/sanitizer-x86_64-linux-fast/build/llvm-project/clang/lib/Serialization/ASTReaderDecl.cpp:4045:10
    #5 0x3bd4fb1 in clang::ASTReader::GetDecl(unsigned int) /b/sanitizer-x86_64-linux-fast/build/llvm-project/clang/lib/Serialization/ASTReader.cpp:7352:5
    #6 0x3bce2f9 in clang::ASTReader::ReadASTBlock(clang::serialization::ModuleFile&, unsigned int) /b/sanitizer-x86_64-linux-fast/build/llvm-project/clang/lib/Serialization/ASTReader.cpp:3625:22
    #7 0x3bd6d75 in clang::ASTReader::ReadAST(llvm::StringRef, clang::serialization::ModuleKind, clang::SourceLocation, unsigned int, llvm::SmallVectorImpl<clang::ASTReader::ImportedSubmodule>*) /b/sanitizer-x86_64-linux-fast/build/llvm-project/clang/lib/Serialization/ASTReader.cpp:4230:32
    #8 0x3a6b415 in clang::CompilerInstance::createPCHExternalASTSource(llvm::StringRef, llvm::StringRef, bool, bool, clang::Preprocessor&, clang::InMemoryModuleCache&, clang::ASTContext&, clang::PCHContainerReader const&, llvm::ArrayRef<std::shared_ptr<clang::ModuleFileExtension> >, llvm::ArrayRef<std::shared_ptr<clang::DependencyCollector> >, void*, bool, bool, bool) /b/sanitizer-x86_64-linux-fast/build/llvm-project/clang/lib/Frontend/CompilerInstance.cpp:539:19
    #9 0x3a6b00e in clang::CompilerInstance::createPCHExternalASTSource(llvm::StringRef, bool, bool, void*, bool) /b/sanitizer-x86_64-linux-fast/build/llvm-project/clang/lib/Frontend/CompilerInstance.cpp:501:18
    #10 0x3abac80 in clang::FrontendAction::BeginSourceFile(clang::CompilerInstance&, clang::FrontendInputFile const&) /b/sanitizer-x86_64-linux-fast/build/llvm-project/clang/lib/Frontend/FrontendAction.cpp:865:12
    #11 0x3a6e61c in clang::CompilerInstance::ExecuteAction(clang::FrontendAction&) /b/sanitizer-x86_64-linux-fast/build/llvm-project/clang/lib/Frontend/CompilerInstance.cpp:972:13
    #12 0x3ba74bf in clang::ExecuteCompilerInvocation(clang::CompilerInstance*) /b/sanitizer-x86_64-linux-fast/build/llvm-project/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp:282:25
    #13 0xa3f753 in cc1_main(llvm::ArrayRef<char const*>, char const*, void*) /b/sanitizer-x86_64-linux-fast/build/llvm-project/clang/tools/driver/cc1_main.cpp:240:15
    #14 0xa3a68a in ExecuteCC1Tool(llvm::SmallVectorImpl<char const*>&) /b/sanitizer-x86_64-linux-fast/build/llvm-project/clang/tools/driver/driver.cpp:330:12
    #15 0xa37f31 in main /b/sanitizer-x86_64-linux-fast/build/llvm-project/clang/tools/driver/driver.cpp:407:12
    #16 0x7fca2a7032e0 in __libc_start_main (/lib/x86_64-linux-gnu/libc.so.6+0x202e0)
    #17 0xa21029 in _start (/b/sanitizer-x86_64-linux-fast/build/llvm_build_ubsan/bin/clang-11+0xa21029)

This reverts commit 30d5946db95fa465d7ee6caceb2b1ff191e3727c.
---
 .../include/clang/Serialization/ASTBitCodes.h | 27 +++---------
 clang/include/clang/Serialization/ASTReader.h |  7 ++--
 clang/include/clang/Serialization/ASTWriter.h | 10 ++---
 .../include/clang/Serialization/ModuleFile.h  | 10 +----
 clang/lib/Serialization/ASTReader.cpp         | 20 ++++-----
 clang/lib/Serialization/ASTReaderDecl.cpp     |  2 +-
 clang/lib/Serialization/ASTWriter.cpp         | 41 +++++++------------
 clang/lib/Serialization/ASTWriterDecl.cpp     |  4 +-
 8 files changed, 40 insertions(+), 81 deletions(-)

diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
index 198d8e3b4fed..323edfbf8126 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -41,7 +41,7 @@ namespace serialization {
     /// Version 4 of AST files also requires that the version control branch and
     /// revision match exactly, since there is no backward compatibility of
     /// AST files at this time.
-    const unsigned VERSION_MAJOR = 10;
+    const unsigned VERSION_MAJOR = 9;
 
     /// AST file minor version number supported by this version of
     /// Clang.
@@ -181,7 +181,7 @@ namespace serialization {
       /// Raw source location of end of range.
       unsigned End;
 
-      /// Offset in the AST file relative to ModuleFile::MacroOffsetsBase.
+      /// Offset in the AST file.
       uint32_t BitOffset;
 
       PPEntityOffset(SourceRange R, uint32_t BitOffset)
@@ -221,18 +221,12 @@ namespace serialization {
       /// Raw source location.
       unsigned Loc = 0;
 
-      /// Offset in the AST file. Split 64-bit integer into low/high parts
-      /// to keep structure alignment 32-bit and don't have padding gap.
-      /// This structure is serialized "as is" to the AST file and undefined
-      /// value in the padding affects AST hash.
-      uint32_t BitOffsetLow = 0;
-      uint32_t BitOffsetHigh = 0;
+      /// Offset in the AST file.
+      uint32_t BitOffset = 0;
 
       DeclOffset() = default;
-      DeclOffset(SourceLocation Loc, uint64_t BitOffset) {
-        setLocation(Loc);
-        setBitOffset(BitOffset);
-      }
+      DeclOffset(SourceLocation Loc, uint32_t BitOffset)
+        : Loc(Loc.getRawEncoding()), BitOffset(BitOffset) {}
 
       void setLocation(SourceLocation L) {
         Loc = L.getRawEncoding();
@@ -241,15 +235,6 @@ namespace serialization {
       SourceLocation getLocation() const {
         return SourceLocation::getFromRawEncoding(Loc);
       }
-
-      void setBitOffset(uint64_t Offset) {
-        BitOffsetLow = Offset;
-        BitOffsetHigh = Offset >> 32;
-      }
-
-      uint64_t getBitOffset() const {
-        return BitOffsetLow | (uint64_t(BitOffsetHigh) << 32);
-      }
     };
 
     /// The number of predefined preprocessed entity IDs.
diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h
index 11a537fad5d5..94645fff9f93 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -723,10 +723,9 @@ class ASTReader
 
   struct PendingMacroInfo {
     ModuleFile *M;
-    /// Offset relative to ModuleFile::MacroOffsetsBase.
-    uint32_t MacroDirectivesOffset;
+    uint64_t MacroDirectivesOffset;
 
-    PendingMacroInfo(ModuleFile *M, uint32_t MacroDirectivesOffset)
+    PendingMacroInfo(ModuleFile *M, uint64_t MacroDirectivesOffset)
         : M(M), MacroDirectivesOffset(MacroDirectivesOffset) {}
   };
 
@@ -2206,7 +2205,7 @@ class ASTReader
   /// \param MacroDirectivesOffset Offset of the serialized macro directive
   /// history.
   void addPendingMacro(IdentifierInfo *II, ModuleFile *M,
-                       uint32_t MacroDirectivesOffset);
+                       uint64_t MacroDirectivesOffset);
 
   /// Read the set of macros defined by this external macro source.
   void ReadDefinedMacros() override;
diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h
index 9413a8d50446..c0a943adf2c7 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -243,7 +243,7 @@ class ASTWriter : public ASTDeserializationListener,
 
   /// Offset of each type in the bitstream, indexed by
   /// the type's ID.
-  std::vector<uint64_t> TypeOffsets;
+  std::vector<uint32_t> TypeOffsets;
 
   /// The first ID number we can use for our own identifiers.
   serialization::IdentID FirstIdentID = serialization::NUM_PREDEF_IDENT_IDS;
@@ -277,8 +277,7 @@ class ASTWriter : public ASTDeserializationListener,
   /// The macro infos to emit.
   std::vector<MacroInfoToEmitData> MacroInfosToEmit;
 
-  llvm::DenseMap<const IdentifierInfo *, uint32_t>
-      IdentMacroDirectivesOffsetMap;
+  llvm::DenseMap<const IdentifierInfo *, uint64_t> IdentMacroDirectivesOffsetMap;
 
   /// @name FlushStmt Caches
   /// @{
@@ -465,8 +464,7 @@ class ASTWriter : public ASTDeserializationListener,
                                const Preprocessor &PP);
   void WritePreprocessor(const Preprocessor &PP, bool IsModule);
   void WriteHeaderSearch(const HeaderSearch &HS);
-  void WritePreprocessorDetail(PreprocessingRecord &PPRec,
-                               uint64_t MacroOffsetsBase);
+  void WritePreprocessorDetail(PreprocessingRecord &PPRec);
   void WriteSubmodules(Module *WritingModule);
 
   void WritePragmaDiagnosticMappings(const DiagnosticsEngine &Diag,
@@ -590,7 +588,7 @@ class ASTWriter : public ASTDeserializationListener,
   /// Determine the ID of an already-emitted macro.
   serialization::MacroID getMacroID(MacroInfo *MI);
 
-  uint32_t getMacroDirectivesOffset(const IdentifierInfo *Name);
+  uint64_t getMacroDirectivesOffset(const IdentifierInfo *Name);
 
   /// Emit a reference to a type.
   void AddTypeRef(QualType T, RecordDataImpl &Record);
diff --git a/clang/include/clang/Serialization/ModuleFile.h b/clang/include/clang/Serialization/ModuleFile.h
index 98d7f46fd8f8..90d2745e080c 100644
--- a/clang/include/clang/Serialization/ModuleFile.h
+++ b/clang/include/clang/Serialization/ModuleFile.h
@@ -251,10 +251,6 @@ class ModuleFile {
   /// The base offset in the source manager's view of this module.
   unsigned SLocEntryBaseOffset = 0;
 
-  /// Base file offset for the offsets in SLocEntryOffsets. Real file offset
-  /// for the entry is SLocEntryOffsetsBase + SLocEntryOffsets[i].
-  uint64_t SLocEntryOffsetsBase = 0;
-
   /// Offsets for all of the source location entries in the
   /// AST file.
   const uint32_t *SLocEntryOffsets = nullptr;
@@ -306,10 +302,6 @@ class ModuleFile {
   /// The number of macros in this AST file.
   unsigned LocalNumMacros = 0;
 
-  /// Base file offset for the offsets in MacroOffsets. Real file offset for
-  /// the entry is MacroOffsetsBase + MacroOffsets[i].
-  uint64_t MacroOffsetsBase = 0;
-
   /// Offsets of macros in the preprocessor block.
   ///
   /// This array is indexed by the macro ID (-1), and provides
@@ -458,7 +450,7 @@ class ModuleFile {
 
   /// Offset of each type within the bitstream, indexed by the
   /// type ID, or the representation of a Type*.
-  const uint64_t *TypeOffsets = nullptr;
+  const uint32_t *TypeOffsets = nullptr;
 
   /// Base type ID for types local to this module as represented in
   /// the global type ID space.
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 9f662c6e0d4c..7f114c069586 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -1470,7 +1470,6 @@ bool ASTReader::ReadSLocEntry(int ID) {
 
   ModuleFile *F = GlobalSLocEntryMap.find(-ID)->second;
   if (llvm::Error Err = F->SLocEntryCursor.JumpToBit(
-          F->SLocEntryOffsetsBase +
           F->SLocEntryOffsets[ID - F->SLocEntryBaseID])) {
     Error(std::move(Err));
     return true;
@@ -1933,8 +1932,9 @@ HeaderFileInfoTrait::ReadData(internal_key_ref key, const unsigned char *d,
   return HFI;
 }
 
-void ASTReader::addPendingMacro(IdentifierInfo *II, ModuleFile *M,
-                                uint32_t MacroDirectivesOffset) {
+void ASTReader::addPendingMacro(IdentifierInfo *II,
+                                ModuleFile *M,
+                                uint64_t MacroDirectivesOffset) {
   assert(NumCurrentElementsDeserializing > 0 &&"Missing deserialization guard");
   PendingMacroIDs[II].push_back(PendingMacroInfo(M, MacroDirectivesOffset));
 }
@@ -2099,8 +2099,7 @@ void ASTReader::resolvePendingMacro(IdentifierInfo *II,
 
   BitstreamCursor &Cursor = M.MacroCursor;
   SavedStreamPosition SavedPosition(Cursor);
-  if (llvm::Error Err =
-          Cursor.JumpToBit(M.MacroOffsetsBase + PMInfo.MacroDirectivesOffset)) {
+  if (llvm::Error Err = Cursor.JumpToBit(PMInfo.MacroDirectivesOffset)) {
     Error(std::move(Err));
     return;
   }
@@ -3099,7 +3098,7 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
         Error("duplicate TYPE_OFFSET record in AST file");
         return Failure;
       }
-      F.TypeOffsets = reinterpret_cast<const uint64_t *>(Blob.data());
+      F.TypeOffsets = (const uint32_t *)Blob.data();
       F.LocalNumTypes = Record[0];
       unsigned LocalBaseTypeIndex = Record[1];
       F.BaseTypeIndex = getTotalNumTypes();
@@ -3377,7 +3376,6 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
       F.SLocEntryOffsets = (const uint32_t *)Blob.data();
       F.LocalNumSLocEntries = Record[0];
       unsigned SLocSpaceSize = Record[1];
-      F.SLocEntryOffsetsBase = Record[2];
       std::tie(F.SLocEntryBaseID, F.SLocEntryBaseOffset) =
           SourceMgr.AllocateLoadedSLocEntries(F.LocalNumSLocEntries,
                                               SLocSpaceSize);
@@ -3696,7 +3694,6 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
       F.MacroOffsets = (const uint32_t *)Blob.data();
       F.LocalNumMacros = Record[0];
       unsigned LocalBaseMacroID = Record[1];
-      F.MacroOffsetsBase = Record[2];
       F.BaseMacroID = getTotalNumMacros();
 
       if (F.LocalNumMacros > 0) {
@@ -5910,8 +5907,8 @@ PreprocessedEntity *ASTReader::ReadPreprocessedEntity(unsigned Index) {
   }
 
   SavedStreamPosition SavedPosition(M.PreprocessorDetailCursor);
-  if (llvm::Error Err = M.PreprocessorDetailCursor.JumpToBit(
-          M.MacroOffsetsBase + PPOffs.BitOffset)) {
+  if (llvm::Error Err =
+          M.PreprocessorDetailCursor.JumpToBit(PPOffs.BitOffset)) {
     Error(std::move(Err));
     return nullptr;
   }
@@ -8430,8 +8427,7 @@ MacroInfo *ASTReader::getMacro(MacroID ID) {
     assert(I != GlobalMacroMap.end() && "Corrupted global macro map");
     ModuleFile *M = I->second;
     unsigned Index = ID - M->BaseMacroID;
-    MacrosLoaded[ID] =
-        ReadMacroRecord(*M, M->MacroOffsetsBase + M->MacroOffsets[Index]);
+    MacrosLoaded[ID] = ReadMacroRecord(*M, M->MacroOffsets[Index]);
 
     if (DeserializationListener)
       DeserializationListener->MacroRead(ID + NUM_PREDEF_MACRO_IDS,
diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp
index 0a278c7506e1..fce4be133220 100644
--- a/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -2870,7 +2870,7 @@ ASTReader::DeclCursorForID(DeclID ID, SourceLocation &Loc) {
   const DeclOffset &DOffs =
       M->DeclOffsets[ID - M->BaseDeclID - NUM_PREDEF_DECL_IDS];
   Loc = TranslateSourceLocation(*M, DOffs.getLocation());
-  return RecordLocation(M, DOffs.getBitOffset());
+  return RecordLocation(M, DOffs.BitOffset);
 }
 
 ASTReader::RecordLocation ASTReader::getLocalBitOffset(uint64_t GlobalOffset) {
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index a3ceeef99097..de59dd280ba8 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -1893,7 +1893,6 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr,
   // Write out the source location entry table. We skip the first
   // entry, which is always the same dummy entry.
   std::vector<uint32_t> SLocEntryOffsets;
-  uint64_t SLocEntryOffsetsBase = Stream.GetCurrentBitNo();
   RecordData PreloadSLocs;
   SLocEntryOffsets.reserve(SourceMgr.local_sloc_entry_size() - 1);
   for (unsigned I = 1, N = SourceMgr.local_sloc_entry_size();
@@ -1904,9 +1903,7 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr,
     assert(&SourceMgr.getSLocEntry(FID) == SLoc);
 
     // Record the offset of this source-location entry.
-    uint64_t Offset = Stream.GetCurrentBitNo() - SLocEntryOffsetsBase;
-    assert((Offset >> 32) == 0 && "SLocEntry offset too large");
-    SLocEntryOffsets.push_back(Offset);
+    SLocEntryOffsets.push_back(Stream.GetCurrentBitNo());
 
     // Figure out which record code to use.
     unsigned Code;
@@ -2014,14 +2011,12 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr,
   Abbrev->Add(BitCodeAbbrevOp(SOURCE_LOCATION_OFFSETS));
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 16)); // # of slocs
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 16)); // total size
-  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 32)); // base offset
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // offsets
   unsigned SLocOffsetsAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
   {
     RecordData::value_type Record[] = {
         SOURCE_LOCATION_OFFSETS, SLocEntryOffsets.size(),
-        SourceMgr.getNextLocalOffset() - 1 /* skip dummy */,
-        SLocEntryOffsetsBase};
+        SourceMgr.getNextLocalOffset() - 1 /* skip dummy */};
     Stream.EmitRecordWithBlob(SLocOffsetsAbbrev, Record,
                               bytes(SLocEntryOffsets));
   }
@@ -2098,11 +2093,9 @@ static bool shouldIgnoreMacro(MacroDirective *MD, bool IsModule,
 /// Writes the block containing the serialized form of the
 /// preprocessor.
 void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) {
-  uint64_t MacroOffsetsBase = Stream.GetCurrentBitNo();
-
   PreprocessingRecord *PPRec = PP.getPreprocessingRecord();
   if (PPRec)
-    WritePreprocessorDetail(*PPRec, MacroOffsetsBase);
+    WritePreprocessorDetail(*PPRec);
 
   RecordData Record;
   RecordData ModuleMacroRecord;
@@ -2163,8 +2156,7 @@ void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) {
   // identifier they belong to.
   for (const IdentifierInfo *Name : MacroIdentifiers) {
     MacroDirective *MD = PP.getLocalMacroDirectiveHistory(Name);
-    uint64_t StartOffset = Stream.GetCurrentBitNo() - MacroOffsetsBase;
-    assert((StartOffset >> 32) == 0 && "Macro identifiers offset too large");
+    auto StartOffset = Stream.GetCurrentBitNo();
 
     // Emit the macro directives in reverse source order.
     for (; MD; MD = MD->getPrevious()) {
@@ -2237,12 +2229,14 @@ void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) {
 
     // Record the local offset of this macro.
     unsigned Index = ID - FirstMacroID;
-    if (Index >= MacroOffsets.size())
-      MacroOffsets.resize(Index + 1);
+    if (Index == MacroOffsets.size())
+      MacroOffsets.push_back(Stream.GetCurrentBitNo());
+    else {
+      if (Index > MacroOffsets.size())
+        MacroOffsets.resize(Index + 1);
 
-    uint64_t Offset = Stream.GetCurrentBitNo() - MacroOffsetsBase;
-    assert((Offset >> 32) == 0 && "Macro offset too large");
-    MacroOffsets[Index] = Offset;
+      MacroOffsets[Index] = Stream.GetCurrentBitNo();
+    }
 
     AddIdentifierRef(Name, Record);
     AddSourceLocation(MI->getDefinitionLoc(), Record);
@@ -2293,20 +2287,17 @@ void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) {
   Abbrev->Add(BitCodeAbbrevOp(MACRO_OFFSET));
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // # of macros
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // first ID
-  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 32));   // base offset
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
 
   unsigned MacroOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
   {
     RecordData::value_type Record[] = {MACRO_OFFSET, MacroOffsets.size(),
-                                       FirstMacroID - NUM_PREDEF_MACRO_IDS,
-                                       MacroOffsetsBase};
+                                       FirstMacroID - NUM_PREDEF_MACRO_IDS};
     Stream.EmitRecordWithBlob(MacroOffsetAbbrev, Record, bytes(MacroOffsets));
   }
 }
 
-void ASTWriter::WritePreprocessorDetail(PreprocessingRecord &PPRec,
-                                        uint64_t MacroOffsetsBase) {
+void ASTWriter::WritePreprocessorDetail(PreprocessingRecord &PPRec) {
   if (PPRec.local_begin() == PPRec.local_end())
     return;
 
@@ -2343,10 +2334,8 @@ void ASTWriter::WritePreprocessorDetail(PreprocessingRecord &PPRec,
        (void)++E, ++NumPreprocessingRecords, ++NextPreprocessorEntityID) {
     Record.clear();
 
-    uint64_t Offset = Stream.GetCurrentBitNo() - MacroOffsetsBase;
-    assert((Offset >> 32) == 0 && "Preprocessed entity offset too large");
     PreprocessedEntityOffsets.push_back(
-        PPEntityOffset((*E)->getSourceRange(), Offset));
+        PPEntityOffset((*E)->getSourceRange(), Stream.GetCurrentBitNo()));
 
     if (auto *MD = dyn_cast<MacroDefinitionRecord>(*E)) {
       // Record this macro definition's ID.
@@ -5155,7 +5144,7 @@ MacroID ASTWriter::getMacroID(MacroInfo *MI) {
   return MacroIDs[MI];
 }
 
-uint32_t ASTWriter::getMacroDirectivesOffset(const IdentifierInfo *Name) {
+uint64_t ASTWriter::getMacroDirectivesOffset(const IdentifierInfo *Name) {
   return IdentMacroDirectivesOffsetMap.lookup(Name);
 }
 
diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp
index 8c5be6cacac0..e847180435ec 100644
--- a/clang/lib/Serialization/ASTWriterDecl.cpp
+++ b/clang/lib/Serialization/ASTWriterDecl.cpp
@@ -2434,12 +2434,12 @@ void ASTWriter::WriteDecl(ASTContext &Context, Decl *D) {
   SourceLocation Loc = D->getLocation();
   unsigned Index = ID - FirstDeclID;
   if (DeclOffsets.size() == Index)
-    DeclOffsets.emplace_back(Loc, Offset);
+    DeclOffsets.push_back(DeclOffset(Loc, Offset));
   else if (DeclOffsets.size() < Index) {
     // FIXME: Can/should this happen?
     DeclOffsets.resize(Index+1);
     DeclOffsets[Index].setLocation(Loc);
-    DeclOffsets[Index].setBitOffset(Offset);
+    DeclOffsets[Index].BitOffset = Offset;
   } else {
     llvm_unreachable("declarations should be emitted in ID order");
   }

From 56e70fe8b73bb59dda959731b66c24c3e88a6ab2 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Thu, 16 Apr 2020 16:08:22 +0100
Subject: [PATCH 057/216] MCAsmBackend.h - cleanup includes and forward
 declarations. NFC. Replace StringRef.h include to forward declaration Remove
 MCFragment/MCRelaxableFragment forward declarations - these are included in
 MCFragment.h

---
 llvm/include/llvm/MC/MCAsmBackend.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/llvm/include/llvm/MC/MCAsmBackend.h b/llvm/include/llvm/MC/MCAsmBackend.h
index 9b67c920d15b..c24e7c948b84 100644
--- a/llvm/include/llvm/MC/MCAsmBackend.h
+++ b/llvm/include/llvm/MC/MCAsmBackend.h
@@ -11,7 +11,6 @@
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/Optional.h"
-#include "llvm/ADT/StringRef.h"
 #include "llvm/MC/MCDirectives.h"
 #include "llvm/MC/MCFixup.h"
 #include "llvm/MC/MCFragment.h"
@@ -24,15 +23,14 @@ class MCAsmLayout;
 class MCAssembler;
 class MCCFIInstruction;
 struct MCFixupKindInfo;
-class MCFragment;
 class MCInst;
 class MCObjectStreamer;
 class MCObjectTargetWriter;
 class MCObjectWriter;
-class MCRelaxableFragment;
 class MCSubtargetInfo;
 class MCValue;
 class raw_pwrite_stream;
+class StringRef;
 
 /// Generic interface to target specific assembler backends.
 class MCAsmBackend {

From e1dc1ae01b9e3903f7ac6c420a5cff5978b6954e Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Thu, 16 Apr 2020 16:09:43 +0100
Subject: [PATCH 058/216] Wasm.h - remove unnecessary StringMap.h include. NFC

---
 llvm/include/llvm/Object/Wasm.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/include/llvm/Object/Wasm.h b/llvm/include/llvm/Object/Wasm.h
index 0ba83550bd7d..87d82558562a 100644
--- a/llvm/include/llvm/Object/Wasm.h
+++ b/llvm/include/llvm/Object/Wasm.h
@@ -17,7 +17,6 @@
 #define LLVM_OBJECT_WASM_H
 
 #include "llvm/ADT/ArrayRef.h"
-#include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/BinaryFormat/Wasm.h"
 #include "llvm/Config/llvm-config.h"

From e7fc356668b1b9ba31126155fd55c76189be31ff Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Thu, 16 Apr 2020 17:08:57 +0100
Subject: [PATCH 059/216] MCInstrDesc.h - move MCSubtargetInfo forward
 declaration down to MCInstrInfo.h. NFC.

Remove unused FeatureBitset forward declaration
---
 llvm/include/llvm/MC/MCInstrDesc.h | 5 ++---
 llvm/include/llvm/MC/MCInstrInfo.h | 2 ++
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/MC/MCInstrDesc.h b/llvm/include/llvm/MC/MCInstrDesc.h
index fa620d21a120..17454e3134a2 100644
--- a/llvm/include/llvm/MC/MCInstrDesc.h
+++ b/llvm/include/llvm/MC/MCInstrDesc.h
@@ -19,9 +19,8 @@
 #include <string>
 
 namespace llvm {
-  class MCInst;
-  class MCSubtargetInfo;
-  class FeatureBitset;
+
+class MCInst;
 
 //===----------------------------------------------------------------------===//
 // Machine Operand Flags and Description
diff --git a/llvm/include/llvm/MC/MCInstrInfo.h b/llvm/include/llvm/MC/MCInstrInfo.h
index f5c944eed682..598e24257e5d 100644
--- a/llvm/include/llvm/MC/MCInstrInfo.h
+++ b/llvm/include/llvm/MC/MCInstrInfo.h
@@ -18,6 +18,8 @@
 
 namespace llvm {
 
+class MCSubtargetInfo;
+
 //---------------------------------------------------------------------------
 /// Interface to description of machine instruction set.
 class MCInstrInfo {

From 490443f822bbf293e9009cc8e5d33db7438fe294 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Thu, 16 Apr 2020 17:09:30 +0100
Subject: [PATCH 060/216] MCSchedule.h - replace ArrayRef.h include with
 forward declaration. NFC.

---
 llvm/include/llvm/MC/MCSchedule.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm/MC/MCSchedule.h b/llvm/include/llvm/MC/MCSchedule.h
index df3248ee6e86..66c5659af3a7 100644
--- a/llvm/include/llvm/MC/MCSchedule.h
+++ b/llvm/include/llvm/MC/MCSchedule.h
@@ -14,7 +14,6 @@
 #ifndef LLVM_MC_MCSCHEDULE_H
 #define LLVM_MC_MCSCHEDULE_H
 
-#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/Config/llvm-config.h"
 #include "llvm/Support/DataTypes.h"
@@ -22,6 +21,7 @@
 
 namespace llvm {
 
+template <typename T> class ArrayRef;
 struct InstrItinerary;
 class MCSubtargetInfo;
 class MCInstrInfo;

From 513976df2e6541a73876bac896e4d923e42413b9 Mon Sep 17 00:00:00 2001
From: Kang Zhang <shkzhang@cn.ibm.com>
Date: Thu, 16 Apr 2020 16:22:43 +0000
Subject: [PATCH 061/216] [PowerPC] Ignore implicit register operands for
 MCInst

Summary:
When doing the conversion: MachineInst -> MCInst, we should ignore the
implicit operands, it will expose more opportunity for InstiAlias.

Reviewed By: steven.zhang

Differential Revision: https://reviews.llvm.org/D77118
---
 llvm/lib/Target/PowerPC/PPCMCInstLower.cpp    |   3 +
 .../NoCRFieldRedefWhenSpillingCRBIT.mir       |   2 +-
 .../CodeGen/PowerPC/atomics-regression.ll     |  80 +--
 .../CodeGen/PowerPC/bitcasts-direct-move.ll   |   8 +-
 llvm/test/CodeGen/PowerPC/bool-math.ll        |   6 +-
 llvm/test/CodeGen/PowerPC/branch_coalesce.ll  |   6 +-
 llvm/test/CodeGen/PowerPC/bswap64.ll          |   2 +-
 .../CodeGen/PowerPC/build-vector-tests.ll     | 112 ++--
 .../PowerPC/canonical-merge-shuffles.ll       |   2 +-
 .../convert-rr-to-ri-instrs-out-of-range.mir  |   2 +-
 .../PowerPC/convert-rr-to-ri-instrs.mir       |   2 +-
 llvm/test/CodeGen/PowerPC/crbits.ll           |   2 +-
 llvm/test/CodeGen/PowerPC/dform-adjust.ll     |   2 +-
 llvm/test/CodeGen/PowerPC/expand-isel.ll      |   8 +-
 .../test/CodeGen/PowerPC/extract-and-store.ll |   8 +-
 llvm/test/CodeGen/PowerPC/f128-aggregates.ll  |   2 +-
 .../fp-int-conversions-direct-moves.ll        |  16 +-
 llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll |  18 +-
 llvm/test/CodeGen/PowerPC/funnel-shift.ll     |  12 +-
 .../test/CodeGen/PowerPC/inlineasm-i64-reg.ll |   2 +-
 llvm/test/CodeGen/PowerPC/knowCRBitSpill.ll   |   2 +-
 llvm/test/CodeGen/PowerPC/load-and-splat.ll   |   2 +-
 llvm/test/CodeGen/PowerPC/loop-comment.ll     |   2 +-
 llvm/test/CodeGen/PowerPC/memcmp.ll           |   4 +-
 llvm/test/CodeGen/PowerPC/optcmp.ll           |  20 +-
 llvm/test/CodeGen/PowerPC/optimize-andiso.ll  |   4 +-
 .../PowerPC/p8-scalar_vector_conversions.ll   | 140 ++--
 .../PowerPC/p9-xxinsertw-xxextractuw.ll       |  16 +-
 llvm/test/CodeGen/PowerPC/popcnt-zext.ll      |  18 +-
 llvm/test/CodeGen/PowerPC/ppc-crbits-onoff.ll |   2 +-
 .../CodeGen/PowerPC/ppc-shrink-wrapping.ll    |  26 +-
 llvm/test/CodeGen/PowerPC/ppc64-P9-setb.ll    |  56 +-
 llvm/test/CodeGen/PowerPC/pr25080.ll          |  40 +-
 llvm/test/CodeGen/PowerPC/pr33093.ll          |   4 +-
 llvm/test/CodeGen/PowerPC/pr35688.ll          |   4 +-
 llvm/test/CodeGen/PowerPC/pr45448.ll          |   2 +-
 llvm/test/CodeGen/PowerPC/pre-inc-disable.ll  |  12 +-
 llvm/test/CodeGen/PowerPC/qpx-s-sel.ll        |   2 +-
 llvm/test/CodeGen/PowerPC/qpx-sel.ll          |   2 +-
 .../PowerPC/redundant-copy-after-tail-dup.ll  |   2 +-
 llvm/test/CodeGen/PowerPC/sat-add.ll          |  18 +-
 llvm/test/CodeGen/PowerPC/select-i1-vs-i1.ll  |  20 +-
 llvm/test/CodeGen/PowerPC/select_const.ll     |   6 +-
 llvm/test/CodeGen/PowerPC/setcc-logic.ll      |  12 +-
 llvm/test/CodeGen/PowerPC/shift_mask.ll       |  28 +-
 llvm/test/CodeGen/PowerPC/signbit-shift.ll    |   8 +-
 .../CodeGen/PowerPC/simplifyConstCmpToISEL.ll |   2 +-
 llvm/test/CodeGen/PowerPC/sms-cpy-1.ll        |   2 +-
 llvm/test/CodeGen/PowerPC/spill_p9_setb.ll    |   4 +-
 llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll  | 138 ++--
 llvm/test/CodeGen/PowerPC/stack-realign.ll    |   2 +-
 llvm/test/CodeGen/PowerPC/testBitReverse.ll   |   4 +-
 .../CodeGen/PowerPC/testComparesi32gtu.ll     |   4 +-
 .../CodeGen/PowerPC/testComparesi32leu.ll     |   4 +-
 .../CodeGen/PowerPC/testComparesi32ltu.ll     |   4 +-
 .../CodeGen/PowerPC/testComparesigesll.ll     |  16 +-
 .../CodeGen/PowerPC/testComparesigeull.ll     |  12 +-
 .../CodeGen/PowerPC/testComparesigtsll.ll     |   8 +-
 .../CodeGen/PowerPC/testComparesilesll.ll     |  16 +-
 .../CodeGen/PowerPC/testComparesileull.ll     |  12 +-
 .../CodeGen/PowerPC/testComparesiltsll.ll     |  12 +-
 .../CodeGen/PowerPC/testComparesllgesll.ll    |  16 +-
 .../CodeGen/PowerPC/testComparesllgeull.ll    |  12 +-
 .../CodeGen/PowerPC/testComparesllgtsll.ll    |   8 +-
 .../CodeGen/PowerPC/testCompareslllesll.ll    |  16 +-
 .../CodeGen/PowerPC/testComparesllleull.ll    |  12 +-
 .../CodeGen/PowerPC/testComparesllltsll.ll    |  12 +-
 .../test/CodeGen/PowerPC/tocSaveInPrologue.ll |   2 +-
 llvm/test/CodeGen/PowerPC/trunc-srl-load.ll   |   2 +-
 llvm/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll |  24 +-
 llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll  | 248 +++----
 llvm/test/CodeGen/PowerPC/vec-min-max.ll      |   6 +-
 llvm/test/CodeGen/PowerPC/vec-trunc.ll        |   2 +-
 .../CodeGen/PowerPC/vec_add_sub_doubleword.ll |   4 +-
 .../CodeGen/PowerPC/vec_add_sub_quadword.ll   |   8 +-
 .../PowerPC/vec_conv_fp32_to_i16_elts.ll      | 612 ++++++++---------
 .../PowerPC/vec_conv_fp32_to_i64_elts.ll      |  12 +-
 .../PowerPC/vec_conv_fp32_to_i8_elts.ll       | 624 +++++++++---------
 .../PowerPC/vec_conv_fp64_to_i16_elts.ll      | 592 ++++++++---------
 .../PowerPC/vec_conv_fp64_to_i32_elts.ll      |  20 +-
 .../PowerPC/vec_conv_fp64_to_i8_elts.ll       | 596 ++++++++---------
 .../PowerPC/vec_conv_fp_to_i_4byte_elts.ll    |  20 +-
 .../PowerPC/vec_conv_i16_to_fp32_elts.ll      |  32 +-
 .../PowerPC/vec_conv_i16_to_fp64_elts.ll      |  12 +-
 .../PowerPC/vec_conv_i32_to_fp64_elts.ll      |  12 +-
 .../PowerPC/vec_conv_i64_to_fp32_elts.ll      |   4 +-
 .../PowerPC/vec_conv_i8_to_fp32_elts.ll       |  36 +-
 .../PowerPC/vec_conv_i8_to_fp64_elts.ll       |  16 +-
 .../PowerPC/vec_conv_i_to_fp_4byte_elts.ll    |  20 +-
 llvm/test/CodeGen/PowerPC/vsx.ll              |   2 +-
 .../PowerPC/xray-conditional-return.ll        |   8 +-
 91 files changed, 1990 insertions(+), 1987 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
index 6c3cf77ed46d..0a0e168c0076 100644
--- a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -147,6 +147,9 @@ bool llvm::LowerPPCMachineOperandToMCOperand(const MachineOperand &MO,
     assert(MO.getReg() > PPC::NoRegister &&
            MO.getReg() < PPC::NUM_TARGET_REGS &&
            "Invalid register for this target!");
+    // Ignore all implicit register operands.
+    if (MO.isImplicit())
+      return false;
     OutMO = MCOperand::createReg(MO.getReg());
     return true;
   case MachineOperand::MO_Immediate:
diff --git a/llvm/test/CodeGen/PowerPC/NoCRFieldRedefWhenSpillingCRBIT.mir b/llvm/test/CodeGen/PowerPC/NoCRFieldRedefWhenSpillingCRBIT.mir
index b95b066c169e..bcd51d31c6cf 100644
--- a/llvm/test/CodeGen/PowerPC/NoCRFieldRedefWhenSpillingCRBIT.mir
+++ b/llvm/test/CodeGen/PowerPC/NoCRFieldRedefWhenSpillingCRBIT.mir
@@ -94,7 +94,7 @@ body:             |
     renamable $cr5lt = CRNOR renamable $cr0lt, renamable $cr1gt, implicit killed $cr0
     renamable $cr5gt = COPY renamable $cr1gt, implicit $cr1
     ; CHECK: crnor 4*cr5+lt, lt, 4*cr1+gt
-    ; CHECK: cror 4*cr5+gt, 4*cr1+gt, 4*cr1+gt
+    ; CHECK: crmove 4*cr5+gt, 4*cr1+gt
     SPILL_CRBIT killed renamable $cr5lt, 0, %stack.0 :: (store 4 into %stack.0)
     renamable $cr1 = CMPW renamable $r4, renamable $r5, implicit killed $x5, implicit killed $x4
     SPILL_CRBIT killed renamable $cr5gt, 0, %stack.1 :: (store 4 into %stack.1)
diff --git a/llvm/test/CodeGen/PowerPC/atomics-regression.ll b/llvm/test/CodeGen/PowerPC/atomics-regression.ll
index 1409a41e0783..778be9db18e9 100644
--- a/llvm/test/CodeGen/PowerPC/atomics-regression.ll
+++ b/llvm/test/CodeGen/PowerPC/atomics-regression.ll
@@ -400,7 +400,7 @@ define void @test39() {
 define void @test40(i8* %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test40:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
+; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:  .LBB40_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 4, 6
@@ -419,7 +419,7 @@ define void @test40(i8* %ptr, i8 %cmp, i8 %val) {
 define void @test41(i8* %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test41:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
+; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:  .LBB41_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 4, 6
@@ -441,7 +441,7 @@ define void @test41(i8* %ptr, i8 %cmp, i8 %val) {
 define void @test42(i8* %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test42:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
+; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:  .LBB42_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 4, 6
@@ -463,7 +463,7 @@ define void @test42(i8* %ptr, i8 %cmp, i8 %val) {
 define void @test43(i8* %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test43:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
+; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:  .LBB43_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
@@ -483,7 +483,7 @@ define void @test43(i8* %ptr, i8 %cmp, i8 %val) {
 define void @test44(i8* %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test44:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
+; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:  .LBB44_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
@@ -503,7 +503,7 @@ define void @test44(i8* %ptr, i8 %cmp, i8 %val) {
 define void @test45(i8* %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test45:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
+; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:  .LBB45_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
@@ -526,7 +526,7 @@ define void @test45(i8* %ptr, i8 %cmp, i8 %val) {
 define void @test46(i8* %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test46:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
+; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:  .LBB46_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
@@ -549,7 +549,7 @@ define void @test46(i8* %ptr, i8 %cmp, i8 %val) {
 define void @test47(i8* %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test47:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
+; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:  .LBB47_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
@@ -572,7 +572,7 @@ define void @test47(i8* %ptr, i8 %cmp, i8 %val) {
 define void @test48(i8* %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test48:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
+; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:  .LBB48_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
@@ -595,7 +595,7 @@ define void @test48(i8* %ptr, i8 %cmp, i8 %val) {
 define void @test49(i8* %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test49:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
+; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:  .LBB49_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
@@ -618,7 +618,7 @@ define void @test49(i8* %ptr, i8 %cmp, i8 %val) {
 define void @test50(i16* %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test50:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
+; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:  .LBB50_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 4, 6
@@ -637,7 +637,7 @@ define void @test50(i16* %ptr, i16 %cmp, i16 %val) {
 define void @test51(i16* %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test51:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
+; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:  .LBB51_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 4, 6
@@ -659,7 +659,7 @@ define void @test51(i16* %ptr, i16 %cmp, i16 %val) {
 define void @test52(i16* %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test52:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
+; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:  .LBB52_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 4, 6
@@ -681,7 +681,7 @@ define void @test52(i16* %ptr, i16 %cmp, i16 %val) {
 define void @test53(i16* %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test53:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
+; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:  .LBB53_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
@@ -701,7 +701,7 @@ define void @test53(i16* %ptr, i16 %cmp, i16 %val) {
 define void @test54(i16* %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test54:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
+; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:  .LBB54_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
@@ -721,7 +721,7 @@ define void @test54(i16* %ptr, i16 %cmp, i16 %val) {
 define void @test55(i16* %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test55:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
+; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:  .LBB55_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
@@ -744,7 +744,7 @@ define void @test55(i16* %ptr, i16 %cmp, i16 %val) {
 define void @test56(i16* %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test56:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
+; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:  .LBB56_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
@@ -767,7 +767,7 @@ define void @test56(i16* %ptr, i16 %cmp, i16 %val) {
 define void @test57(i16* %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test57:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
+; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:  .LBB57_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
@@ -790,7 +790,7 @@ define void @test57(i16* %ptr, i16 %cmp, i16 %val) {
 define void @test58(i16* %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test58:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
+; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:  .LBB58_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
@@ -813,7 +813,7 @@ define void @test58(i16* %ptr, i16 %cmp, i16 %val) {
 define void @test59(i16* %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test59:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
+; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:  .LBB59_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
@@ -1252,7 +1252,7 @@ define void @test79(i64* %ptr, i64 %cmp, i64 %val) {
 define void @test80(i8* %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test80:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
+; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:  .LBB80_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 4, 6
@@ -1271,7 +1271,7 @@ define void @test80(i8* %ptr, i8 %cmp, i8 %val) {
 define void @test81(i8* %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test81:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
+; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:  .LBB81_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 4, 6
@@ -1293,7 +1293,7 @@ define void @test81(i8* %ptr, i8 %cmp, i8 %val) {
 define void @test82(i8* %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test82:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
+; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:  .LBB82_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 4, 6
@@ -1315,7 +1315,7 @@ define void @test82(i8* %ptr, i8 %cmp, i8 %val) {
 define void @test83(i8* %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test83:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
+; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:  .LBB83_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
@@ -1335,7 +1335,7 @@ define void @test83(i8* %ptr, i8 %cmp, i8 %val) {
 define void @test84(i8* %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test84:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
+; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:  .LBB84_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
@@ -1355,7 +1355,7 @@ define void @test84(i8* %ptr, i8 %cmp, i8 %val) {
 define void @test85(i8* %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test85:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
+; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:  .LBB85_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
@@ -1378,7 +1378,7 @@ define void @test85(i8* %ptr, i8 %cmp, i8 %val) {
 define void @test86(i8* %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test86:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
+; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:  .LBB86_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
@@ -1401,7 +1401,7 @@ define void @test86(i8* %ptr, i8 %cmp, i8 %val) {
 define void @test87(i8* %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test87:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
+; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:  .LBB87_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
@@ -1424,7 +1424,7 @@ define void @test87(i8* %ptr, i8 %cmp, i8 %val) {
 define void @test88(i8* %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test88:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
+; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:  .LBB88_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
@@ -1447,7 +1447,7 @@ define void @test88(i8* %ptr, i8 %cmp, i8 %val) {
 define void @test89(i8* %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test89:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
+; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:  .LBB89_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
@@ -1470,7 +1470,7 @@ define void @test89(i8* %ptr, i8 %cmp, i8 %val) {
 define void @test90(i16* %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test90:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
+; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:  .LBB90_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 4, 6
@@ -1489,7 +1489,7 @@ define void @test90(i16* %ptr, i16 %cmp, i16 %val) {
 define void @test91(i16* %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test91:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
+; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:  .LBB91_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 4, 6
@@ -1511,7 +1511,7 @@ define void @test91(i16* %ptr, i16 %cmp, i16 %val) {
 define void @test92(i16* %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test92:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
+; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:  .LBB92_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 4, 6
@@ -1533,7 +1533,7 @@ define void @test92(i16* %ptr, i16 %cmp, i16 %val) {
 define void @test93(i16* %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test93:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
+; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:  .LBB93_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
@@ -1553,7 +1553,7 @@ define void @test93(i16* %ptr, i16 %cmp, i16 %val) {
 define void @test94(i16* %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test94:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
+; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:  .LBB94_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
@@ -1573,7 +1573,7 @@ define void @test94(i16* %ptr, i16 %cmp, i16 %val) {
 define void @test95(i16* %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test95:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
+; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:  .LBB95_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
@@ -1596,7 +1596,7 @@ define void @test95(i16* %ptr, i16 %cmp, i16 %val) {
 define void @test96(i16* %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test96:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
+; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    lwsync
 ; PPC64LE-NEXT:  .LBB96_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
@@ -1619,7 +1619,7 @@ define void @test96(i16* %ptr, i16 %cmp, i16 %val) {
 define void @test97(i16* %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test97:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
+; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:  .LBB97_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
@@ -1642,7 +1642,7 @@ define void @test97(i16* %ptr, i16 %cmp, i16 %val) {
 define void @test98(i16* %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test98:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
+; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:  .LBB98_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
@@ -1665,7 +1665,7 @@ define void @test98(i16* %ptr, i16 %cmp, i16 %val) {
 define void @test99(i16* %ptr, i16 %cmp, i16 %val) {
 ; PPC64LE-LABEL: test99:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    rlwinm 4, 4, 0, 16, 31
+; PPC64LE-NEXT:    clrlwi 4, 4, 16
 ; PPC64LE-NEXT:    sync
 ; PPC64LE-NEXT:  .LBB99_1:
 ; PPC64LE-NEXT:    lharx 6, 0, 3
diff --git a/llvm/test/CodeGen/PowerPC/bitcasts-direct-move.ll b/llvm/test/CodeGen/PowerPC/bitcasts-direct-move.ll
index 0eafd86bbbef..26a0be8da407 100644
--- a/llvm/test/CodeGen/PowerPC/bitcasts-direct-move.ll
+++ b/llvm/test/CodeGen/PowerPC/bitcasts-direct-move.ll
@@ -11,7 +11,7 @@ entry:
 ; CHECK-P7: lwa 3,
 ; CHECK: xscvdpspn [[CONVREG:[0-9]+]], 1
 ; CHECK: xxsldwi [[SHIFTREG:[0-9]+]], [[CONVREG]], [[CONVREG]], 3
-; CHECK: mfvsrwz 3, [[SHIFTREG]]
+; CHECK: mffprwz 3, [[SHIFTREG]]
 }
 
 define i64 @f64toi64(double %a) {
@@ -29,7 +29,7 @@ entry:
   ret float %0
 ; CHECK-P7: stw 3,
 ; CHECK-P7: lfs 1,
-; CHECK: mtvsrd [[MOVEREG:[0-9]+]], 3
+; CHECK: mtfprd [[MOVEREG:[0-9]+]], 3
 ; CHECK: xxsldwi [[SHIFTREG:[0-9]+]], [[MOVEREG]], [[MOVEREG]], 1
 ; CHECK: xscvspdpn 1, [[SHIFTREG]]
 }
@@ -51,7 +51,7 @@ entry:
 ; CHECK-P7: lwz 3,
 ; CHECK: xscvdpspn [[CONVREG:[0-9]+]], 1
 ; CHECK: xxsldwi [[SHIFTREG:[0-9]+]], [[CONVREG]], [[CONVREG]], 3
-; CHECK: mfvsrwz 3, [[SHIFTREG]]
+; CHECK: mffprwz 3, [[SHIFTREG]]
 }
 
 define i64 @f64toi64u(double %a) {
@@ -69,7 +69,7 @@ entry:
   ret float %0
 ; CHECK-P7: stw 3,
 ; CHECK-P7: lfs 1,
-; CHECK: mtvsrd [[MOVEREG:[0-9]+]], 3
+; CHECK: mtfprd [[MOVEREG:[0-9]+]], 3
 ; CHECK: xxsldwi [[SHIFTREG:[0-9]+]], [[MOVEREG]], [[MOVEREG]], 1
 ; CHECK: xscvspdpn 1, [[SHIFTREG]]
 }
diff --git a/llvm/test/CodeGen/PowerPC/bool-math.ll b/llvm/test/CodeGen/PowerPC/bool-math.ll
index 7b9c8c4851d9..9ec3c7b4671a 100644
--- a/llvm/test/CodeGen/PowerPC/bool-math.ll
+++ b/llvm/test/CodeGen/PowerPC/bool-math.ll
@@ -44,7 +44,7 @@ define i8 @sub_zext_cmp_mask_narrower_result(i32 %x) {
 define i8 @add_zext_cmp_mask_same_size_result(i8 %x) {
 ; CHECK-LABEL: add_zext_cmp_mask_same_size_result:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    rlwinm 3, 3, 0, 31, 31
+; CHECK-NEXT:    clrlwi 3, 3, 31
 ; CHECK-NEXT:    subfic 3, 3, 27
 ; CHECK-NEXT:    blr
   %a = and i8 %x, 1
@@ -57,7 +57,7 @@ define i8 @add_zext_cmp_mask_same_size_result(i8 %x) {
 define i32 @add_zext_cmp_mask_wider_result(i8 %x) {
 ; CHECK-LABEL: add_zext_cmp_mask_wider_result:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    rlwinm 3, 3, 0, 31, 31
+; CHECK-NEXT:    clrlwi 3, 3, 31
 ; CHECK-NEXT:    subfic 3, 3, 27
 ; CHECK-NEXT:    blr
   %a = and i8 %x, 1
@@ -70,7 +70,7 @@ define i32 @add_zext_cmp_mask_wider_result(i8 %x) {
 define i8 @add_zext_cmp_mask_narrower_result(i32 %x) {
 ; CHECK-LABEL: add_zext_cmp_mask_narrower_result:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    rlwinm 3, 3, 0, 31, 31
+; CHECK-NEXT:    clrlwi 3, 3, 31
 ; CHECK-NEXT:    subfic 3, 3, 43
 ; CHECK-NEXT:    blr
   %a = and i32 %x, 1
diff --git a/llvm/test/CodeGen/PowerPC/branch_coalesce.ll b/llvm/test/CodeGen/PowerPC/branch_coalesce.ll
index fcec348f18fa..c0486015a3a3 100644
--- a/llvm/test/CodeGen/PowerPC/branch_coalesce.ll
+++ b/llvm/test/CodeGen/PowerPC/branch_coalesce.ll
@@ -7,8 +7,8 @@
 define double @testBranchCoal(double %a, double %b, double %c, i32 %x) {
 
 ; CHECK-LABEL: @testBranchCoal
-; CHECK: cmplwi [[CMPR:[0-7]+]], 6, 0
-; CHECK: beq [[CMPR]], .LBB[[LAB1:[0-9_]+]]
+; CHECK: cmplwi 6, 0
+; CHECK: beq 0, .LBB[[LAB1:[0-9_]+]]
 ; CHECK-DAG: addis [[LD1REG:[0-9]+]], 2, .LCPI0_0@toc@ha
 ; CHECK-DAG: addis [[LD2REG:[0-9]+]], 2, .LCPI0_1@toc@ha
 ; CHECK-DAG: xxlxor 2, 2, 2
@@ -22,7 +22,7 @@ define double @testBranchCoal(double %a, double %b, double %c, i32 %x) {
 
 ; CHECK-NOCOALESCE-LABEL: testBranchCoal:
 ; CHECK-NOCOALESCE:       # %bb.0: # %entry
-; CHECK-NOCOALESCE-NEXT:    cmplwi 0, 6, 0
+; CHECK-NOCOALESCE-NEXT:    cmplwi 6, 0
 ; CHECK-NOCOALESCE-NEXT:    bne 0, .LBB0_5
 ; CHECK-NOCOALESCE-NEXT:  # %bb.1: # %entry
 ; CHECK-NOCOALESCE-NEXT:    bne 0, .LBB0_6
diff --git a/llvm/test/CodeGen/PowerPC/bswap64.ll b/llvm/test/CodeGen/PowerPC/bswap64.ll
index 816a55422c38..75a839a3b95f 100644
--- a/llvm/test/CodeGen/PowerPC/bswap64.ll
+++ b/llvm/test/CodeGen/PowerPC/bswap64.ll
@@ -11,7 +11,7 @@ define i64 @bswap64(i64 %x) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    mtvsrdd 34, 3, 3
 ; CHECK-NEXT:    xxbrd 0, 34
-; CHECK-NEXT:    mfvsrd 3, 0
+; CHECK-NEXT:    mffprd 3, 0
 ; CHECK-NEXT:    blr
 ;
 ; NO-ALTIVEC-LABEL: bswap64:
diff --git a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
index aa081170de03..ee0cc41ea6bd 100644
--- a/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
+++ b/llvm/test/CodeGen/PowerPC/build-vector-tests.ll
@@ -832,8 +832,8 @@ define <4 x i32> @fromRegsi(i32 signext %a, i32 signext %b, i32 signext %c, i32
 ; P8BE:       # %bb.0: # %entry
 ; P8BE-NEXT:    rldimi r6, r5, 32, 0
 ; P8BE-NEXT:    rldimi r4, r3, 32, 0
-; P8BE-NEXT:    mtvsrd f0, r6
-; P8BE-NEXT:    mtvsrd f1, r4
+; P8BE-NEXT:    mtfprd f0, r6
+; P8BE-NEXT:    mtfprd f1, r4
 ; P8BE-NEXT:    xxmrghd v2, vs1, vs0
 ; P8BE-NEXT:    blr
 ;
@@ -841,8 +841,8 @@ define <4 x i32> @fromRegsi(i32 signext %a, i32 signext %b, i32 signext %c, i32
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    rldimi r3, r4, 32, 0
 ; P8LE-NEXT:    rldimi r5, r6, 32, 0
-; P8LE-NEXT:    mtvsrd f0, r3
-; P8LE-NEXT:    mtvsrd f1, r5
+; P8LE-NEXT:    mtfprd f0, r3
+; P8LE-NEXT:    mtfprd f1, r5
 ; P8LE-NEXT:    xxmrghd v2, vs1, vs0
 ; P8LE-NEXT:    blr
 entry:
@@ -1120,8 +1120,8 @@ define <4 x i32> @fromRandMemConsi(i32* nocapture readonly %arr) {
 ; P8BE-NEXT:    lwz r3, 72(r3)
 ; P8BE-NEXT:    rldimi r5, r4, 32, 0
 ; P8BE-NEXT:    rldimi r3, r6, 32, 0
-; P8BE-NEXT:    mtvsrd f0, r5
-; P8BE-NEXT:    mtvsrd f1, r3
+; P8BE-NEXT:    mtfprd f0, r5
+; P8BE-NEXT:    mtfprd f1, r3
 ; P8BE-NEXT:    xxmrghd v2, vs1, vs0
 ; P8BE-NEXT:    blr
 ;
@@ -1133,8 +1133,8 @@ define <4 x i32> @fromRandMemConsi(i32* nocapture readonly %arr) {
 ; P8LE-NEXT:    lwz r3, 352(r3)
 ; P8LE-NEXT:    rldimi r4, r5, 32, 0
 ; P8LE-NEXT:    rldimi r6, r3, 32, 0
-; P8LE-NEXT:    mtvsrd f0, r4
-; P8LE-NEXT:    mtvsrd f1, r6
+; P8LE-NEXT:    mtfprd f0, r4
+; P8LE-NEXT:    mtfprd f1, r6
 ; P8LE-NEXT:    xxmrghd v2, vs1, vs0
 ; P8LE-NEXT:    blr
 entry:
@@ -1190,8 +1190,8 @@ define <4 x i32> @fromRandMemVari(i32* nocapture readonly %arr, i32 signext %ele
 ; P8BE-NEXT:    lwz r3, 4(r3)
 ; P8BE-NEXT:    rldimi r5, r4, 32, 0
 ; P8BE-NEXT:    rldimi r3, r6, 32, 0
-; P8BE-NEXT:    mtvsrd f0, r5
-; P8BE-NEXT:    mtvsrd f1, r3
+; P8BE-NEXT:    mtfprd f0, r5
+; P8BE-NEXT:    mtfprd f1, r3
 ; P8BE-NEXT:    xxmrghd v2, vs1, vs0
 ; P8BE-NEXT:    blr
 ;
@@ -1205,8 +1205,8 @@ define <4 x i32> @fromRandMemVari(i32* nocapture readonly %arr, i32 signext %ele
 ; P8LE-NEXT:    lwz r3, 32(r3)
 ; P8LE-NEXT:    rldimi r4, r5, 32, 0
 ; P8LE-NEXT:    rldimi r6, r3, 32, 0
-; P8LE-NEXT:    mtvsrd f0, r4
-; P8LE-NEXT:    mtvsrd f1, r6
+; P8LE-NEXT:    mtfprd f0, r4
+; P8LE-NEXT:    mtfprd f1, r6
 ; P8LE-NEXT:    xxmrghd v2, vs1, vs0
 ; P8LE-NEXT:    blr
 entry:
@@ -1246,13 +1246,13 @@ define <4 x i32> @spltRegVali(i32 signext %val) {
 ;
 ; P8BE-LABEL: spltRegVali:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    mtvsrwz f0, r3
+; P8BE-NEXT:    mtfprwz f0, r3
 ; P8BE-NEXT:    xxspltw v2, vs0, 1
 ; P8BE-NEXT:    blr
 ;
 ; P8LE-LABEL: spltRegVali:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    mtvsrwz f0, r3
+; P8LE-NEXT:    mtfprwz f0, r3
 ; P8LE-NEXT:    xxspltw v2, vs0, 1
 ; P8LE-NEXT:    blr
 entry:
@@ -2351,8 +2351,8 @@ define <4 x i32> @fromRegsui(i32 zeroext %a, i32 zeroext %b, i32 zeroext %c, i32
 ; P8BE:       # %bb.0: # %entry
 ; P8BE-NEXT:    rldimi r6, r5, 32, 0
 ; P8BE-NEXT:    rldimi r4, r3, 32, 0
-; P8BE-NEXT:    mtvsrd f0, r6
-; P8BE-NEXT:    mtvsrd f1, r4
+; P8BE-NEXT:    mtfprd f0, r6
+; P8BE-NEXT:    mtfprd f1, r4
 ; P8BE-NEXT:    xxmrghd v2, vs1, vs0
 ; P8BE-NEXT:    blr
 ;
@@ -2360,8 +2360,8 @@ define <4 x i32> @fromRegsui(i32 zeroext %a, i32 zeroext %b, i32 zeroext %c, i32
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    rldimi r3, r4, 32, 0
 ; P8LE-NEXT:    rldimi r5, r6, 32, 0
-; P8LE-NEXT:    mtvsrd f0, r3
-; P8LE-NEXT:    mtvsrd f1, r5
+; P8LE-NEXT:    mtfprd f0, r3
+; P8LE-NEXT:    mtfprd f1, r5
 ; P8LE-NEXT:    xxmrghd v2, vs1, vs0
 ; P8LE-NEXT:    blr
 entry:
@@ -2639,8 +2639,8 @@ define <4 x i32> @fromRandMemConsui(i32* nocapture readonly %arr) {
 ; P8BE-NEXT:    lwz r3, 72(r3)
 ; P8BE-NEXT:    rldimi r5, r4, 32, 0
 ; P8BE-NEXT:    rldimi r3, r6, 32, 0
-; P8BE-NEXT:    mtvsrd f0, r5
-; P8BE-NEXT:    mtvsrd f1, r3
+; P8BE-NEXT:    mtfprd f0, r5
+; P8BE-NEXT:    mtfprd f1, r3
 ; P8BE-NEXT:    xxmrghd v2, vs1, vs0
 ; P8BE-NEXT:    blr
 ;
@@ -2652,8 +2652,8 @@ define <4 x i32> @fromRandMemConsui(i32* nocapture readonly %arr) {
 ; P8LE-NEXT:    lwz r3, 352(r3)
 ; P8LE-NEXT:    rldimi r4, r5, 32, 0
 ; P8LE-NEXT:    rldimi r6, r3, 32, 0
-; P8LE-NEXT:    mtvsrd f0, r4
-; P8LE-NEXT:    mtvsrd f1, r6
+; P8LE-NEXT:    mtfprd f0, r4
+; P8LE-NEXT:    mtfprd f1, r6
 ; P8LE-NEXT:    xxmrghd v2, vs1, vs0
 ; P8LE-NEXT:    blr
 entry:
@@ -2709,8 +2709,8 @@ define <4 x i32> @fromRandMemVarui(i32* nocapture readonly %arr, i32 signext %el
 ; P8BE-NEXT:    lwz r3, 4(r3)
 ; P8BE-NEXT:    rldimi r5, r4, 32, 0
 ; P8BE-NEXT:    rldimi r3, r6, 32, 0
-; P8BE-NEXT:    mtvsrd f0, r5
-; P8BE-NEXT:    mtvsrd f1, r3
+; P8BE-NEXT:    mtfprd f0, r5
+; P8BE-NEXT:    mtfprd f1, r3
 ; P8BE-NEXT:    xxmrghd v2, vs1, vs0
 ; P8BE-NEXT:    blr
 ;
@@ -2724,8 +2724,8 @@ define <4 x i32> @fromRandMemVarui(i32* nocapture readonly %arr, i32 signext %el
 ; P8LE-NEXT:    lwz r3, 32(r3)
 ; P8LE-NEXT:    rldimi r4, r5, 32, 0
 ; P8LE-NEXT:    rldimi r6, r3, 32, 0
-; P8LE-NEXT:    mtvsrd f0, r4
-; P8LE-NEXT:    mtvsrd f1, r6
+; P8LE-NEXT:    mtfprd f0, r4
+; P8LE-NEXT:    mtfprd f1, r6
 ; P8LE-NEXT:    xxmrghd v2, vs1, vs0
 ; P8LE-NEXT:    blr
 entry:
@@ -2765,13 +2765,13 @@ define <4 x i32> @spltRegValui(i32 zeroext %val) {
 ;
 ; P8BE-LABEL: spltRegValui:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    mtvsrwz f0, r3
+; P8BE-NEXT:    mtfprwz f0, r3
 ; P8BE-NEXT:    xxspltw v2, vs0, 1
 ; P8BE-NEXT:    blr
 ;
 ; P8LE-LABEL: spltRegValui:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    mtvsrwz f0, r3
+; P8LE-NEXT:    mtfprwz f0, r3
 ; P8LE-NEXT:    xxspltw v2, vs0, 1
 ; P8LE-NEXT:    blr
 entry:
@@ -3884,15 +3884,15 @@ define <2 x i64> @fromRegsll(i64 %a, i64 %b) {
 ;
 ; P8BE-LABEL: fromRegsll:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    mtvsrd f0, r4
-; P8BE-NEXT:    mtvsrd f1, r3
+; P8BE-NEXT:    mtfprd f0, r4
+; P8BE-NEXT:    mtfprd f1, r3
 ; P8BE-NEXT:    xxmrghd v2, vs1, vs0
 ; P8BE-NEXT:    blr
 ;
 ; P8LE-LABEL: fromRegsll:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    mtvsrd f0, r3
-; P8LE-NEXT:    mtvsrd f1, r4
+; P8LE-NEXT:    mtfprd f0, r3
+; P8LE-NEXT:    mtfprd f1, r4
 ; P8LE-NEXT:    xxmrghd v2, vs1, vs0
 ; P8LE-NEXT:    blr
 entry:
@@ -4103,8 +4103,8 @@ define <2 x i64> @fromRandMemConsll(i64* nocapture readonly %arr) {
 ; P8BE:       # %bb.0: # %entry
 ; P8BE-NEXT:    ld r4, 144(r3)
 ; P8BE-NEXT:    ld r3, 32(r3)
-; P8BE-NEXT:    mtvsrd f0, r4
-; P8BE-NEXT:    mtvsrd f1, r3
+; P8BE-NEXT:    mtfprd f0, r4
+; P8BE-NEXT:    mtfprd f1, r3
 ; P8BE-NEXT:    xxmrghd v2, vs1, vs0
 ; P8BE-NEXT:    blr
 ;
@@ -4112,8 +4112,8 @@ define <2 x i64> @fromRandMemConsll(i64* nocapture readonly %arr) {
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    ld r4, 32(r3)
 ; P8LE-NEXT:    ld r3, 144(r3)
-; P8LE-NEXT:    mtvsrd f0, r4
-; P8LE-NEXT:    mtvsrd f1, r3
+; P8LE-NEXT:    mtfprd f0, r4
+; P8LE-NEXT:    mtfprd f1, r3
 ; P8LE-NEXT:    xxmrghd v2, vs1, vs0
 ; P8LE-NEXT:    blr
 entry:
@@ -4151,8 +4151,8 @@ define <2 x i64> @fromRandMemVarll(i64* nocapture readonly %arr, i32 signext %el
 ; P8BE-NEXT:    add r3, r3, r4
 ; P8BE-NEXT:    ld r4, 8(r3)
 ; P8BE-NEXT:    ld r3, 32(r3)
-; P8BE-NEXT:    mtvsrd f0, r4
-; P8BE-NEXT:    mtvsrd f1, r3
+; P8BE-NEXT:    mtfprd f0, r4
+; P8BE-NEXT:    mtfprd f1, r3
 ; P8BE-NEXT:    xxmrghd v2, vs1, vs0
 ; P8BE-NEXT:    blr
 ;
@@ -4162,8 +4162,8 @@ define <2 x i64> @fromRandMemVarll(i64* nocapture readonly %arr, i32 signext %el
 ; P8LE-NEXT:    add r3, r3, r4
 ; P8LE-NEXT:    ld r4, 32(r3)
 ; P8LE-NEXT:    ld r3, 8(r3)
-; P8LE-NEXT:    mtvsrd f0, r4
-; P8LE-NEXT:    mtvsrd f1, r3
+; P8LE-NEXT:    mtfprd f0, r4
+; P8LE-NEXT:    mtfprd f1, r3
 ; P8LE-NEXT:    xxmrghd v2, vs1, vs0
 ; P8LE-NEXT:    blr
 entry:
@@ -4193,13 +4193,13 @@ define <2 x i64> @spltRegValll(i64 %val) {
 ;
 ; P8BE-LABEL: spltRegValll:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    mtvsrd f0, r3
+; P8BE-NEXT:    mtfprd f0, r3
 ; P8BE-NEXT:    xxspltd v2, vs0, 0
 ; P8BE-NEXT:    blr
 ;
 ; P8LE-LABEL: spltRegValll:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    mtvsrd f0, r3
+; P8LE-NEXT:    mtfprd f0, r3
 ; P8LE-NEXT:    xxspltd v2, vs0, 0
 ; P8LE-NEXT:    blr
 entry:
@@ -5072,15 +5072,15 @@ define <2 x i64> @fromRegsull(i64 %a, i64 %b) {
 ;
 ; P8BE-LABEL: fromRegsull:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    mtvsrd f0, r4
-; P8BE-NEXT:    mtvsrd f1, r3
+; P8BE-NEXT:    mtfprd f0, r4
+; P8BE-NEXT:    mtfprd f1, r3
 ; P8BE-NEXT:    xxmrghd v2, vs1, vs0
 ; P8BE-NEXT:    blr
 ;
 ; P8LE-LABEL: fromRegsull:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    mtvsrd f0, r3
-; P8LE-NEXT:    mtvsrd f1, r4
+; P8LE-NEXT:    mtfprd f0, r3
+; P8LE-NEXT:    mtfprd f1, r4
 ; P8LE-NEXT:    xxmrghd v2, vs1, vs0
 ; P8LE-NEXT:    blr
 entry:
@@ -5291,8 +5291,8 @@ define <2 x i64> @fromRandMemConsull(i64* nocapture readonly %arr) {
 ; P8BE:       # %bb.0: # %entry
 ; P8BE-NEXT:    ld r4, 144(r3)
 ; P8BE-NEXT:    ld r3, 32(r3)
-; P8BE-NEXT:    mtvsrd f0, r4
-; P8BE-NEXT:    mtvsrd f1, r3
+; P8BE-NEXT:    mtfprd f0, r4
+; P8BE-NEXT:    mtfprd f1, r3
 ; P8BE-NEXT:    xxmrghd v2, vs1, vs0
 ; P8BE-NEXT:    blr
 ;
@@ -5300,8 +5300,8 @@ define <2 x i64> @fromRandMemConsull(i64* nocapture readonly %arr) {
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    ld r4, 32(r3)
 ; P8LE-NEXT:    ld r3, 144(r3)
-; P8LE-NEXT:    mtvsrd f0, r4
-; P8LE-NEXT:    mtvsrd f1, r3
+; P8LE-NEXT:    mtfprd f0, r4
+; P8LE-NEXT:    mtfprd f1, r3
 ; P8LE-NEXT:    xxmrghd v2, vs1, vs0
 ; P8LE-NEXT:    blr
 entry:
@@ -5339,8 +5339,8 @@ define <2 x i64> @fromRandMemVarull(i64* nocapture readonly %arr, i32 signext %e
 ; P8BE-NEXT:    add r3, r3, r4
 ; P8BE-NEXT:    ld r4, 8(r3)
 ; P8BE-NEXT:    ld r3, 32(r3)
-; P8BE-NEXT:    mtvsrd f0, r4
-; P8BE-NEXT:    mtvsrd f1, r3
+; P8BE-NEXT:    mtfprd f0, r4
+; P8BE-NEXT:    mtfprd f1, r3
 ; P8BE-NEXT:    xxmrghd v2, vs1, vs0
 ; P8BE-NEXT:    blr
 ;
@@ -5350,8 +5350,8 @@ define <2 x i64> @fromRandMemVarull(i64* nocapture readonly %arr, i32 signext %e
 ; P8LE-NEXT:    add r3, r3, r4
 ; P8LE-NEXT:    ld r4, 32(r3)
 ; P8LE-NEXT:    ld r3, 8(r3)
-; P8LE-NEXT:    mtvsrd f0, r4
-; P8LE-NEXT:    mtvsrd f1, r3
+; P8LE-NEXT:    mtfprd f0, r4
+; P8LE-NEXT:    mtfprd f1, r3
 ; P8LE-NEXT:    xxmrghd v2, vs1, vs0
 ; P8LE-NEXT:    blr
 entry:
@@ -5381,13 +5381,13 @@ define <2 x i64> @spltRegValull(i64 %val) {
 ;
 ; P8BE-LABEL: spltRegValull:
 ; P8BE:       # %bb.0: # %entry
-; P8BE-NEXT:    mtvsrd f0, r3
+; P8BE-NEXT:    mtfprd f0, r3
 ; P8BE-NEXT:    xxspltd v2, vs0, 0
 ; P8BE-NEXT:    blr
 ;
 ; P8LE-LABEL: spltRegValull:
 ; P8LE:       # %bb.0: # %entry
-; P8LE-NEXT:    mtvsrd f0, r3
+; P8LE-NEXT:    mtfprd f0, r3
 ; P8LE-NEXT:    xxspltd v2, vs0, 0
 ; P8LE-NEXT:    blr
 entry:
diff --git a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
index fee67162e019..2ffe98e1f694 100644
--- a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
+++ b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
@@ -217,7 +217,7 @@ define dso_local <8 x i16> @testmrglb3(<8 x i8>* nocapture readonly %a) local_un
 ; CHECK-P8-NEXT:    ld r3, 0(r3)
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI12_0@toc@ha
 ; CHECK-P8-NEXT:    xxlxor v4, v4, v4
-; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    addi r3, r4, .LCPI12_0@toc@l
 ; CHECK-P8-NEXT:    lvx v3, 0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
diff --git a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-out-of-range.mir b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-out-of-range.mir
index e14bc1f6ff10..b52e0a4103ad 100644
--- a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-out-of-range.mir
+++ b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs-out-of-range.mir
@@ -891,7 +891,7 @@ body:             |
     %3 = LI -37
     %4 = RLDCL_rec %0, killed %3, 0, implicit-def $cr0
     ; CHECK: RLDICL_rec %0, 27, 0, implicit-def $cr0
-    ; CHECK-LATE: rldicl. 5, 3, 27, 0
+    ; CHECK-LATE: rotldi. 5, 3, 27
     %5 = COPY killed $cr0
     %6 = ISEL8 %2, %0, %5.sub_eq
     $x3 = COPY %6
diff --git a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir
index c922312eae12..31968dcae360 100644
--- a/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir
+++ b/llvm/test/CodeGen/PowerPC/convert-rr-to-ri-instrs.mir
@@ -3728,7 +3728,7 @@ body:             |
     %3 = LI 37
     %4 = RLDCL_rec %0, killed %3, 0, implicit-def $cr0
     ; CHECK: RLDICL_rec %0, 37, 0, implicit-def $cr0
-    ; CHECK-LATE: rldicl. 5, 3, 37, 0
+    ; CHECK-LATE: rotldi. 5, 3, 37
     %5 = COPY killed $cr0
     %6 = ISEL8 %2, %0, %5.sub_eq
     $x3 = COPY %6
diff --git a/llvm/test/CodeGen/PowerPC/crbits.ll b/llvm/test/CodeGen/PowerPC/crbits.ll
index 18d149558635..6fc0babac6b4 100644
--- a/llvm/test/CodeGen/PowerPC/crbits.ll
+++ b/llvm/test/CodeGen/PowerPC/crbits.ll
@@ -145,7 +145,7 @@ entry:
   ret i32 %cond
 
 ; CHECK-LABEL: @exttest7
-; CHECK-DAG: cmpwi {{[0-9]+}}, 3, 5
+; CHECK-DAG: cmpwi 3, 5
 ; CHECK-DAG: li [[REG1:[0-9]+]], 8
 ; CHECK-DAG: li [[REG2:[0-9]+]], 7
 ; CHECK: isel 3, [[REG2]], [[REG1]],
diff --git a/llvm/test/CodeGen/PowerPC/dform-adjust.ll b/llvm/test/CodeGen/PowerPC/dform-adjust.ll
index e34c4b81e2ff..c32655233d86 100644
--- a/llvm/test/CodeGen/PowerPC/dform-adjust.ll
+++ b/llvm/test/CodeGen/PowerPC/dform-adjust.ll
@@ -19,7 +19,7 @@ define dso_local i64 @test1(i8* nocapture readonly %p, i32 signext %count) local
 ; CHECK-NEXT:    ldx 3, 3, 8
 ; CHECK-NEXT:    mffprd 8, 0
 ; CHECK-NEXT:    mfvsrld 10, 1
-; CHECK-NEXT:    mfvsrd 11, 1
+; CHECK-NEXT:    mffprd 11, 1
 ; CHECK-NEXT:    mulld 8, 9, 8
 ; CHECK-NEXT:    mulld 5, 8, 5
 ; CHECK-NEXT:    mulld 5, 5, 10
diff --git a/llvm/test/CodeGen/PowerPC/expand-isel.ll b/llvm/test/CodeGen/PowerPC/expand-isel.ll
index 1d1c00e5f5a5..2e78c0e05b1c 100644
--- a/llvm/test/CodeGen/PowerPC/expand-isel.ll
+++ b/llvm/test/CodeGen/PowerPC/expand-isel.ll
@@ -11,7 +11,7 @@ entry:
 
 ; CHECK-LABEL: @testExpandISELToIfElse
 ; CHECK: addi r5, r3, 1
-; CHECK-NEXT: cmpwi cr0, r3, 0
+; CHECK-NEXT: cmpwi r3, 0
 ; CHECK-NEXT: bc 12, gt, [[TRUE:.LBB[0-9]+]]
 ; CHECK: ori r3, r4, 0
 ; CHECK-NEXT: b [[SUCCESSOR:.LBB[0-9]+]]
@@ -101,7 +101,7 @@ entry:
   ret i32 %add
 
 ; CHECK-LABEL: @testExpandISELsTo2ORIs1ADDI
-; CHECK: cmpwi cr0, r7, 0
+; CHECK: cmpwi r7, 0
 ; CHECK-NEXT: bc 12, gt, [[TRUE:.LBB[0-9]+]]
 ; CHECK: ori r3, r4, 0
 ; CHECK-NEXT: ori r4, r6, 0
@@ -127,7 +127,7 @@ entry:
   ret i32 %add2
 
 ; CHECK-LABEL: @testExpandISELsTo1ORI1ADDI
-; CHECK: cmpwi cr0, r7, 0
+; CHECK: cmpwi r7, 0
 ; CHECK-NEXT: bc 12, gt, [[TRUE:.LBB[0-9]+]]
 ; CHECK: ori r5, r6, 0
 ; CHECK-NEXT: b [[SUCCESSOR:.LBB[0-9]+]]
@@ -154,7 +154,7 @@ entry:
   ret i32 %sub1
 
 ; CHECK-LABEL: @testExpandISELsTo0ORI2ADDIs
-; CHECK: cmpwi cr0, r7, 0
+; CHECK: cmpwi r7, 0
 ; CHECK-NEXT: bc 12, gt, [[TRUE:.LBB[0-9]+]]
 ; CHECK-NEXT: b [[SUCCESSOR:.LBB[0-9]+]]
 ; CHECK-NEXT:  [[TRUE]]
diff --git a/llvm/test/CodeGen/PowerPC/extract-and-store.ll b/llvm/test/CodeGen/PowerPC/extract-and-store.ll
index 2731ffd07125..fe1e56b839f8 100644
--- a/llvm/test/CodeGen/PowerPC/extract-and-store.ll
+++ b/llvm/test/CodeGen/PowerPC/extract-and-store.ll
@@ -645,7 +645,7 @@ define void @test_5_consecutive_stores_of_bytes(<16 x i8> %a, i8* nocapture %b)
 ; CHECK-NEXT:    mfvsrd r3, vs34
 ; CHECK-NEXT:    rldicl r6, r3, 32, 56
 ; CHECK-NEXT:    rldicl r3, r3, 56, 56
-; CHECK-NEXT:    mfvsrd r4, f0
+; CHECK-NEXT:    mffprd r4, f0
 ; CHECK-NEXT:    stb r6, 1(r5)
 ; CHECK-NEXT:    stb r3, 2(r5)
 ; CHECK-NEXT:    rldicl r6, r4, 32, 56
@@ -661,7 +661,7 @@ define void @test_5_consecutive_stores_of_bytes(<16 x i8> %a, i8* nocapture %b)
 ; CHECK-BE-NEXT:    xxswapd vs0, vs34
 ; CHECK-BE-NEXT:    mfvsrd r3, vs34
 ; CHECK-BE-NEXT:    rldicl r6, r3, 40, 56
-; CHECK-BE-NEXT:    mfvsrd r4, f0
+; CHECK-BE-NEXT:    mffprd r4, f0
 ; CHECK-BE-NEXT:    stb r6, 0(r5)
 ; CHECK-BE-NEXT:    rldicl r6, r4, 40, 56
 ; CHECK-BE-NEXT:    rldicl r4, r4, 16, 56
@@ -734,7 +734,7 @@ define void @test_13_consecutive_stores_of_bytes(<16 x i8> %a, i8* nocapture %b)
 ; CHECK-NEXT:    rldicl r6, r3, 56, 56
 ; CHECK-NEXT:    stb r4, 1(r5)
 ; CHECK-NEXT:    rldicl r4, r3, 40, 56
-; CHECK-NEXT:    mfvsrd r7, f0
+; CHECK-NEXT:    mffprd r7, f0
 ; CHECK-NEXT:    stb r6, 2(r5)
 ; CHECK-NEXT:    rldicl r6, r3, 24, 56
 ; CHECK-NEXT:    stb r4, 6(r5)
@@ -767,7 +767,7 @@ define void @test_13_consecutive_stores_of_bytes(<16 x i8> %a, i8* nocapture %b)
 ; CHECK-BE-NEXT:    clrldi r6, r3, 56
 ; CHECK-BE-NEXT:    stb r4, 0(r5)
 ; CHECK-BE-NEXT:    rldicl r4, r3, 56, 56
-; CHECK-BE-NEXT:    mfvsrd r7, f0
+; CHECK-BE-NEXT:    mffprd r7, f0
 ; CHECK-BE-NEXT:    stb r6, 3(r5)
 ; CHECK-BE-NEXT:    rldicl r6, r3, 8, 56
 ; CHECK-BE-NEXT:    stb r4, 4(r5)
diff --git a/llvm/test/CodeGen/PowerPC/f128-aggregates.ll b/llvm/test/CodeGen/PowerPC/f128-aggregates.ll
index 6e782c2b02a1..006ad745f607 100644
--- a/llvm/test/CodeGen/PowerPC/f128-aggregates.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-aggregates.ll
@@ -343,7 +343,7 @@ define fp128 @sum_float128(i32 signext %count, ...) {
 ; CHECK-DAG:     std r7, 64(r1)
 ; CHECK-DAG:     std r6, 56(r1)
 ; CHECK-DAG:     std r4, 40(r1)
-; CHECK-DAG:     cmpwi cr0, r3, 1
+; CHECK-DAG:     cmpwi r3, 1
 ; CHECK-DAG:     std r5, 48(r1)
 ; CHECK-DAG:     addis [[REG:r[0-9]+]], r2, .LCPI17_0@toc@ha
 ; CHECK-DAG:     addi [[REG1:r[0-9]+]], [[REG]], .LCPI17_0@toc@l
diff --git a/llvm/test/CodeGen/PowerPC/fp-int-conversions-direct-moves.ll b/llvm/test/CodeGen/PowerPC/fp-int-conversions-direct-moves.ll
index 3a6ed21f5029..268d89174216 100644
--- a/llvm/test/CodeGen/PowerPC/fp-int-conversions-direct-moves.ll
+++ b/llvm/test/CodeGen/PowerPC/fp-int-conversions-direct-moves.ll
@@ -10,7 +10,7 @@ define zeroext i8 @_Z6testcff(float %arg) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xscvdpsxws f0, f1
 ; CHECK-NEXT:    stfs f1, -4(r1)
-; CHECK-NEXT:    mfvsrwz r3, f0
+; CHECK-NEXT:    mffprwz r3, f0
 ; CHECK-NEXT:    clrldi r3, r3, 32
 ; CHECK-NEXT:    blr
 entry:
@@ -43,7 +43,7 @@ define zeroext i8 @_Z6testcdd(double %arg) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xscvdpsxws f0, f1
 ; CHECK-NEXT:    stfd f1, -8(r1)
-; CHECK-NEXT:    mfvsrwz r3, f0
+; CHECK-NEXT:    mffprwz r3, f0
 ; CHECK-NEXT:    clrldi r3, r3, 32
 ; CHECK-NEXT:    blr
 entry:
@@ -76,7 +76,7 @@ define zeroext i8 @_Z7testucff(float %arg) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xscvdpsxws f0, f1
 ; CHECK-NEXT:    stfs f1, -4(r1)
-; CHECK-NEXT:    mfvsrwz r3, f0
+; CHECK-NEXT:    mffprwz r3, f0
 ; CHECK-NEXT:    clrldi r3, r3, 32
 ; CHECK-NEXT:    blr
 entry:
@@ -109,7 +109,7 @@ define zeroext i8 @_Z7testucdd(double %arg) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xscvdpsxws f0, f1
 ; CHECK-NEXT:    stfd f1, -8(r1)
-; CHECK-NEXT:    mfvsrwz r3, f0
+; CHECK-NEXT:    mffprwz r3, f0
 ; CHECK-NEXT:    clrldi r3, r3, 32
 ; CHECK-NEXT:    blr
 entry:
@@ -208,7 +208,7 @@ define zeroext i16 @_Z7testusff(float %arg) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xscvdpsxws f0, f1
 ; CHECK-NEXT:    stfs f1, -4(r1)
-; CHECK-NEXT:    mfvsrwz r3, f0
+; CHECK-NEXT:    mffprwz r3, f0
 ; CHECK-NEXT:    clrldi r3, r3, 32
 ; CHECK-NEXT:    blr
 entry:
@@ -241,7 +241,7 @@ define zeroext i16 @_Z7testusdd(double %arg) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xscvdpsxws f0, f1
 ; CHECK-NEXT:    stfd f1, -8(r1)
-; CHECK-NEXT:    mfvsrwz r3, f0
+; CHECK-NEXT:    mffprwz r3, f0
 ; CHECK-NEXT:    clrldi r3, r3, 32
 ; CHECK-NEXT:    blr
 entry:
@@ -340,7 +340,7 @@ define zeroext i32 @_Z7testuiff(float %arg) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xscvdpuxws f0, f1
 ; CHECK-NEXT:    stfs f1, -4(r1)
-; CHECK-NEXT:    mfvsrwz r3, f0
+; CHECK-NEXT:    mffprwz r3, f0
 ; CHECK-NEXT:    clrldi r3, r3, 32
 ; CHECK-NEXT:    blr
 entry:
@@ -373,7 +373,7 @@ define zeroext i32 @_Z7testuidd(double %arg) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    xscvdpuxws f0, f1
 ; CHECK-NEXT:    stfd f1, -8(r1)
-; CHECK-NEXT:    mfvsrwz r3, f0
+; CHECK-NEXT:    mffprwz r3, f0
 ; CHECK-NEXT:    clrldi r3, r3, 32
 ; CHECK-NEXT:    blr
 entry:
diff --git a/llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll b/llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll
index fbbd01faa012..525dc8624798 100644
--- a/llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll
+++ b/llvm/test/CodeGen/PowerPC/funnel-shift-rot.ll
@@ -18,7 +18,7 @@ declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>)
 define i8 @rotl_i8_const_shift(i8 %x) {
 ; CHECK-LABEL: rotl_i8_const_shift:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    rlwinm 4, 3, 27, 0, 31
+; CHECK-NEXT:    rotlwi 4, 3, 27
 ; CHECK-NEXT:    rlwimi 4, 3, 3, 0, 28
 ; CHECK-NEXT:    mr 3, 4
 ; CHECK-NEXT:    blr
@@ -42,7 +42,7 @@ define i16 @rotl_i16(i16 %x, i16 %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    neg 5, 4
 ; CHECK-NEXT:    clrlwi 6, 3, 16
-; CHECK-NEXT:    rlwinm 4, 4, 0, 28, 31
+; CHECK-NEXT:    clrlwi 4, 4, 28
 ; CHECK-NEXT:    clrlwi 5, 5, 28
 ; CHECK-NEXT:    slw 3, 3, 4
 ; CHECK-NEXT:    srw 4, 6, 5
@@ -55,7 +55,7 @@ define i16 @rotl_i16(i16 %x, i16 %z) {
 define i32 @rotl_i32(i32 %x, i32 %z) {
 ; CHECK-LABEL: rotl_i32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    rlwnm 3, 3, 4, 0, 31
+; CHECK-NEXT:    rotlw 3, 3, 4
 ; CHECK-NEXT:    blr
   %f = call i32 @llvm.fshl.i32(i32 %x, i32 %x, i32 %z)
   ret i32 %f
@@ -64,7 +64,7 @@ define i32 @rotl_i32(i32 %x, i32 %z) {
 define i64 @rotl_i64(i64 %x, i64 %z) {
 ; CHECK-LABEL: rotl_i64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    rldcl 3, 3, 4, 0
+; CHECK-NEXT:    rotld 3, 3, 4
 ; CHECK-NEXT:    blr
   %f = call i64 @llvm.fshl.i64(i64 %x, i64 %x, i64 %z)
   ret i64 %f
@@ -98,7 +98,7 @@ define <4 x i32> @rotl_v4i32_const_shift(<4 x i32> %x) {
 define i8 @rotr_i8_const_shift(i8 %x) {
 ; CHECK-LABEL: rotr_i8_const_shift:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    rlwinm 4, 3, 29, 0, 31
+; CHECK-NEXT:    rotlwi 4, 3, 29
 ; CHECK-NEXT:    rlwimi 4, 3, 5, 0, 26
 ; CHECK-NEXT:    mr 3, 4
 ; CHECK-NEXT:    blr
@@ -109,7 +109,7 @@ define i8 @rotr_i8_const_shift(i8 %x) {
 define i32 @rotr_i32_const_shift(i32 %x) {
 ; CHECK-LABEL: rotr_i32_const_shift:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    rlwinm 3, 3, 29, 0, 31
+; CHECK-NEXT:    rotlwi 3, 3, 29
 ; CHECK-NEXT:    blr
   %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 3)
   ret i32 %f
@@ -122,7 +122,7 @@ define i16 @rotr_i16(i16 %x, i16 %z) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    neg 5, 4
 ; CHECK-NEXT:    clrlwi 6, 3, 16
-; CHECK-NEXT:    rlwinm 4, 4, 0, 28, 31
+; CHECK-NEXT:    clrlwi 4, 4, 28
 ; CHECK-NEXT:    clrlwi 5, 5, 28
 ; CHECK-NEXT:    srw 4, 6, 4
 ; CHECK-NEXT:    slw 3, 3, 5
@@ -136,7 +136,7 @@ define i32 @rotr_i32(i32 %x, i32 %z) {
 ; CHECK-LABEL: rotr_i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    neg 4, 4
-; CHECK-NEXT:    rlwnm 3, 3, 4, 0, 31
+; CHECK-NEXT:    rotlw 3, 3, 4
 ; CHECK-NEXT:    blr
   %f = call i32 @llvm.fshr.i32(i32 %x, i32 %x, i32 %z)
   ret i32 %f
@@ -146,7 +146,7 @@ define i64 @rotr_i64(i64 %x, i64 %z) {
 ; CHECK-LABEL: rotr_i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    neg 4, 4
-; CHECK-NEXT:    rldcl 3, 3, 4, 0
+; CHECK-NEXT:    rotld 3, 3, 4
 ; CHECK-NEXT:    blr
   %f = call i64 @llvm.fshr.i64(i64 %x, i64 %x, i64 %z)
   ret i64 %f
diff --git a/llvm/test/CodeGen/PowerPC/funnel-shift.ll b/llvm/test/CodeGen/PowerPC/funnel-shift.ll
index 8a26f773440a..8690e1e5d5c3 100644
--- a/llvm/test/CodeGen/PowerPC/funnel-shift.ll
+++ b/llvm/test/CodeGen/PowerPC/funnel-shift.ll
@@ -44,7 +44,7 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
 ; CHECK-NEXT:    mulhdu 6, 5, 6
 ; CHECK-NEXT:    rldicl 6, 6, 59, 5
 ; CHECK-NEXT:    mulli 6, 6, 37
-; CHECK-NEXT:    subf. 5, 6, 5
+; CHECK-NEXT:    sub. 5, 5, 6
 ; CHECK-NEXT:    subfic 6, 5, 37
 ; CHECK-NEXT:    sld 5, 3, 5
 ; CHECK-NEXT:    srd 4, 4, 6
@@ -72,7 +72,7 @@ define i7 @fshl_i7_const_fold() {
 define i32 @fshl_i32_const_shift(i32 %x, i32 %y) {
 ; CHECK-LABEL: fshl_i32_const_shift:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    rlwinm 4, 4, 9, 0, 31
+; CHECK-NEXT:    rotlwi 4, 4, 9
 ; CHECK-NEXT:    rlwimi 4, 3, 9, 0, 22
 ; CHECK-NEXT:    mr 3, 4
 ; CHECK-NEXT:    blr
@@ -85,7 +85,7 @@ define i32 @fshl_i32_const_shift(i32 %x, i32 %y) {
 define i32 @fshl_i32_const_overshift(i32 %x, i32 %y) {
 ; CHECK-LABEL: fshl_i32_const_overshift:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    rlwinm 4, 4, 9, 0, 31
+; CHECK-NEXT:    rotlwi 4, 4, 9
 ; CHECK-NEXT:    rlwimi 4, 3, 9, 0, 22
 ; CHECK-NEXT:    mr 3, 4
 ; CHECK-NEXT:    blr
@@ -149,7 +149,7 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {
 ; CHECK-NEXT:    mulhdu 6, 5, 6
 ; CHECK-NEXT:    rldicl 6, 6, 59, 5
 ; CHECK-NEXT:    mulli 6, 6, 37
-; CHECK-NEXT:    subf. 5, 6, 5
+; CHECK-NEXT:    sub. 5, 5, 6
 ; CHECK-NEXT:    clrldi 6, 4, 27
 ; CHECK-NEXT:    subfic 7, 5, 37
 ; CHECK-NEXT:    srd 5, 6, 5
@@ -178,7 +178,7 @@ define i7 @fshr_i7_const_fold() {
 define i32 @fshr_i32_const_shift(i32 %x, i32 %y) {
 ; CHECK-LABEL: fshr_i32_const_shift:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    rlwinm 4, 4, 23, 0, 31
+; CHECK-NEXT:    rotlwi 4, 4, 23
 ; CHECK-NEXT:    rlwimi 4, 3, 23, 0, 8
 ; CHECK-NEXT:    mr 3, 4
 ; CHECK-NEXT:    blr
@@ -191,7 +191,7 @@ define i32 @fshr_i32_const_shift(i32 %x, i32 %y) {
 define i32 @fshr_i32_const_overshift(i32 %x, i32 %y) {
 ; CHECK-LABEL: fshr_i32_const_overshift:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    rlwinm 4, 4, 23, 0, 31
+; CHECK-NEXT:    rotlwi 4, 4, 23
 ; CHECK-NEXT:    rlwimi 4, 3, 23, 0, 8
 ; CHECK-NEXT:    mr 3, 4
 ; CHECK-NEXT:    blr
diff --git a/llvm/test/CodeGen/PowerPC/inlineasm-i64-reg.ll b/llvm/test/CodeGen/PowerPC/inlineasm-i64-reg.ll
index aa944a8d4646..e4dfd6c58f0e 100644
--- a/llvm/test/CodeGen/PowerPC/inlineasm-i64-reg.ll
+++ b/llvm/test/CodeGen/PowerPC/inlineasm-i64-reg.ll
@@ -81,7 +81,7 @@ entry:
 ; CHECK:     sc
 ; CHECK:     #NO_APP
                                       
-; CHECK:     cmpwi {{[0-9]+}}, [[REG]], 1
+; CHECK:     cmpwi [[REG]], 1
 
 ; CHECK: blr
 
diff --git a/llvm/test/CodeGen/PowerPC/knowCRBitSpill.ll b/llvm/test/CodeGen/PowerPC/knowCRBitSpill.ll
index 7f8807d2f466..c0ba04598071 100644
--- a/llvm/test/CodeGen/PowerPC/knowCRBitSpill.ll
+++ b/llvm/test/CodeGen/PowerPC/knowCRBitSpill.ll
@@ -20,7 +20,7 @@
 define dso_local signext i32 @spillCRSET(i32 signext %p1, i32 signext %p2) {
 ; CHECK-LABEL: spillCRSET:
 ; CHECK:        # %bb.2:
-; CHECK-DAG:    crnor [[CREG:.*]]*cr5+lt, eq, eq
+; CHECK-DAG:    crnot [[CREG:.*]]*cr5+lt, eq
 ; CHECK-DAG:    mfocrf [[REG2:.*]], [[CREG]]
 ; CHECK-DAG:    rlwinm [[REG2]], [[REG2]]
 ; CHECK:        .LBB0_3:
diff --git a/llvm/test/CodeGen/PowerPC/load-and-splat.ll b/llvm/test/CodeGen/PowerPC/load-and-splat.ll
index fc714cfe1e2d..f411712ba3fa 100644
--- a/llvm/test/CodeGen/PowerPC/load-and-splat.ll
+++ b/llvm/test/CodeGen/PowerPC/load-and-splat.ll
@@ -130,7 +130,7 @@ define <16 x i8> @adjusted_lxvwsx(i64* %s, i64* %t) {
 ; P8-LABEL: adjusted_lxvwsx:
 ; P8:       # %bb.0: # %entry
 ; P8-NEXT:    ld r3, 0(r3)
-; P8-NEXT:    mtvsrd f0, r3
+; P8-NEXT:    mtfprd f0, r3
 ; P8-NEXT:    xxswapd v2, vs0
 ; P8-NEXT:    xxspltw v2, v2, 2
 ; P8-NEXT:    blr
diff --git a/llvm/test/CodeGen/PowerPC/loop-comment.ll b/llvm/test/CodeGen/PowerPC/loop-comment.ll
index fbb4472fe4b5..5891fa4635c3 100644
--- a/llvm/test/CodeGen/PowerPC/loop-comment.ll
+++ b/llvm/test/CodeGen/PowerPC/loop-comment.ll
@@ -4,7 +4,7 @@
 define void @test(i8* %ptr, i8 %cmp, i8 %val) {
 ; PPC64LE-LABEL: test:
 ; PPC64LE:       # %bb.0:
-; PPC64LE-NEXT:    rlwinm 4, 4, 0, 24, 31
+; PPC64LE-NEXT:    clrlwi 4, 4, 24
 ; PPC64LE-NEXT:  .LBB0_1:
 ; PPC64LE-NEXT:    lbarx 6, 0, 3
 ; PPC64LE-NEXT:    cmpw 4, 6
diff --git a/llvm/test/CodeGen/PowerPC/memcmp.ll b/llvm/test/CodeGen/PowerPC/memcmp.ll
index 4aa5b400dd7c..f471a1d47119 100644
--- a/llvm/test/CodeGen/PowerPC/memcmp.ll
+++ b/llvm/test/CodeGen/PowerPC/memcmp.ll
@@ -6,9 +6,9 @@ define signext i32 @memcmp8(i32* nocapture readonly %buffer1, i32* nocapture rea
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    ldbrx 3, 0, 3
 ; CHECK-NEXT:    ldbrx 4, 0, 4
-; CHECK-NEXT:    subfc 5, 3, 4
+; CHECK-NEXT:    subc 5, 4, 3
 ; CHECK-NEXT:    subfe 5, 4, 4
-; CHECK-NEXT:    subfc 4, 4, 3
+; CHECK-NEXT:    subc 4, 3, 4
 ; CHECK-NEXT:    subfe 3, 3, 3
 ; CHECK-NEXT:    neg 4, 5
 ; CHECK-NEXT:    neg 3, 3
diff --git a/llvm/test/CodeGen/PowerPC/optcmp.ll b/llvm/test/CodeGen/PowerPC/optcmp.ll
index 2d342acfb2ef..0bd55b717e50 100644
--- a/llvm/test/CodeGen/PowerPC/optcmp.ll
+++ b/llvm/test/CodeGen/PowerPC/optcmp.ll
@@ -70,14 +70,14 @@ entry:
 define i64 @fool(i64 %a, i64 %b, i64* nocapture %c) #0 {
 ; CHECK-LABEL: fool:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    subf. 6, 4, 3
+; CHECK-NEXT:    sub. 6, 3, 4
 ; CHECK-NEXT:    isel 3, 3, 4, 1
 ; CHECK-NEXT:    std 6, 0(5)
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-NO-ISEL-LABEL: fool:
 ; CHECK-NO-ISEL:       # %bb.0: # %entry
-; CHECK-NO-ISEL-NEXT:    subf. 6, 4, 3
+; CHECK-NO-ISEL-NEXT:    sub. 6, 3, 4
 ; CHECK-NO-ISEL-NEXT:    bc 12, 1, .LBB2_2
 ; CHECK-NO-ISEL-NEXT:  # %bb.1: # %entry
 ; CHECK-NO-ISEL-NEXT:    ori 3, 4, 0
@@ -96,14 +96,14 @@ entry:
 define i64 @foolb(i64 %a, i64 %b, i64* nocapture %c) #0 {
 ; CHECK-LABEL: foolb:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    subf. 6, 4, 3
+; CHECK-NEXT:    sub. 6, 3, 4
 ; CHECK-NEXT:    isel 3, 4, 3, 1
 ; CHECK-NEXT:    std 6, 0(5)
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-NO-ISEL-LABEL: foolb:
 ; CHECK-NO-ISEL:       # %bb.0: # %entry
-; CHECK-NO-ISEL-NEXT:    subf. 6, 4, 3
+; CHECK-NO-ISEL-NEXT:    sub. 6, 3, 4
 ; CHECK-NO-ISEL-NEXT:    bc 12, 1, .LBB3_1
 ; CHECK-NO-ISEL-NEXT:    b .LBB3_2
 ; CHECK-NO-ISEL-NEXT:  .LBB3_1: # %entry
@@ -122,14 +122,14 @@ entry:
 define i64 @foolc(i64 %a, i64 %b, i64* nocapture %c) #0 {
 ; CHECK-LABEL: foolc:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    subf. 6, 3, 4
+; CHECK-NEXT:    sub. 6, 4, 3
 ; CHECK-NEXT:    isel 3, 3, 4, 0
 ; CHECK-NEXT:    std 6, 0(5)
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-NO-ISEL-LABEL: foolc:
 ; CHECK-NO-ISEL:       # %bb.0: # %entry
-; CHECK-NO-ISEL-NEXT:    subf. 6, 3, 4
+; CHECK-NO-ISEL-NEXT:    sub. 6, 4, 3
 ; CHECK-NO-ISEL-NEXT:    bc 12, 0, .LBB4_2
 ; CHECK-NO-ISEL-NEXT:  # %bb.1: # %entry
 ; CHECK-NO-ISEL-NEXT:    ori 3, 4, 0
@@ -148,14 +148,14 @@ entry:
 define i64 @foold(i64 %a, i64 %b, i64* nocapture %c) #0 {
 ; CHECK-LABEL: foold:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    subf. 6, 3, 4
+; CHECK-NEXT:    sub. 6, 4, 3
 ; CHECK-NEXT:    isel 3, 3, 4, 1
 ; CHECK-NEXT:    std 6, 0(5)
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-NO-ISEL-LABEL: foold:
 ; CHECK-NO-ISEL:       # %bb.0: # %entry
-; CHECK-NO-ISEL-NEXT:    subf. 6, 3, 4
+; CHECK-NO-ISEL-NEXT:    sub. 6, 4, 3
 ; CHECK-NO-ISEL-NEXT:    bc 12, 1, .LBB5_2
 ; CHECK-NO-ISEL-NEXT:  # %bb.1: # %entry
 ; CHECK-NO-ISEL-NEXT:    ori 3, 4, 0
@@ -174,14 +174,14 @@ entry:
 define i64 @foold2(i64 %a, i64 %b, i64* nocapture %c) #0 {
 ; CHECK-LABEL: foold2:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    subf. 6, 4, 3
+; CHECK-NEXT:    sub. 6, 3, 4
 ; CHECK-NEXT:    isel 3, 3, 4, 0
 ; CHECK-NEXT:    std 6, 0(5)
 ; CHECK-NEXT:    blr
 ;
 ; CHECK-NO-ISEL-LABEL: foold2:
 ; CHECK-NO-ISEL:       # %bb.0: # %entry
-; CHECK-NO-ISEL-NEXT:    subf. 6, 4, 3
+; CHECK-NO-ISEL-NEXT:    sub. 6, 3, 4
 ; CHECK-NO-ISEL-NEXT:    bc 12, 0, .LBB6_2
 ; CHECK-NO-ISEL-NEXT:  # %bb.1: # %entry
 ; CHECK-NO-ISEL-NEXT:    ori 3, 4, 0
diff --git a/llvm/test/CodeGen/PowerPC/optimize-andiso.ll b/llvm/test/CodeGen/PowerPC/optimize-andiso.ll
index 83416d124084..24df97032491 100644
--- a/llvm/test/CodeGen/PowerPC/optimize-andiso.ll
+++ b/llvm/test/CodeGen/PowerPC/optimize-andiso.ll
@@ -15,8 +15,8 @@ define float @floatundisf(i64 %a) {
 ; CHECK-NEXT:    li r4, 3
 ; CHECK-NEXT:    isel r4, r5, r4, eq
 ; CHECK-NEXT:    srd r3, r3, r4
-; CHECK-NEXT:    rlwinm r3, r3, 0, 9, 31
-; CHECK-NEXT:    mtvsrd f0, r3
+; CHECK-NEXT:    clrlwi r3, r3, 9
+; CHECK-NEXT:    mtfprd f0, r3
 ; CHECK-NEXT:    xxsldwi vs0, vs0, vs0, 1
 ; CHECK-NEXT:    xscvspdpn f1, vs0
 ; CHECK-NEXT:    blr
diff --git a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
index 1b11bfd2b47e..e1f0e827b9f6 100644
--- a/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
+++ b/llvm/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll
@@ -21,7 +21,7 @@ entry:
 ; CHECK: sldi r3, r3, 56
 ; CHECK: mtvsrd v2, r3
 ; CHECK-LE-LABEL: buildc
-; CHECK-LE: mtvsrd f0, r3
+; CHECK-LE: mtfprd f0, r3
 ; CHECK-LE: xxswapd v2, vs0
 }
 
@@ -35,7 +35,7 @@ entry:
 ; CHECK: sldi r3, r3, 48
 ; CHECK: mtvsrd v2, r3
 ; CHECK-LE-LABEL: builds
-; CHECK-LE: mtvsrd f0, r3
+; CHECK-LE: mtfprd f0, r3
 ; CHECK-LE: xxswapd v2, vs0
 }
 
@@ -46,10 +46,10 @@ entry:
   %splat.splat = shufflevector <4 x i32> %splat.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
   ret <4 x i32> %splat.splat
 ; CHECK-LABEL: buildi
-; CHECK: mtvsrwz f0, r3
+; CHECK: mtfprwz f0, r3
 ; CHECK: xxspltw v2, vs0, 1
 ; CHECK-LE-LABEL: buildi
-; CHECK-LE: mtvsrwz f0, r3
+; CHECK-LE: mtfprwz f0, r3
 ; CHECK-LE: xxspltw v2, vs0, 1
 }
 
@@ -60,9 +60,9 @@ entry:
   %splat.splat = shufflevector <2 x i64> %splat.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer
   ret <2 x i64> %splat.splat
 ; CHECK-LABEL: buildl
-; CHECK: mtvsrd f0, r3
+; CHECK: mtfprd f0, r3
 ; CHECK-LE-LABEL: buildl
-; CHECK-LE: mtvsrd f0, r3
+; CHECK-LE: mtfprd f0, r3
 ; CHECK-LE: xxspltd v2, vs0, 0
 }
 
@@ -107,7 +107,7 @@ entry:
 ; CHECK: rldicl r3, r3, 8, 56
 ; CHECK: extsb r3, r3
 ; CHECK-LE-LABEL: @getsc0
-; CHECK-LE: mfvsrd r3, f0
+; CHECK-LE: mffprd r3, f0
 ; CHECK-LE: clrldi r3, r3, 56
 ; CHECK-LE: extsb r3, r3
 }
@@ -122,7 +122,7 @@ entry:
 ; CHECK: rldicl r3, r3, 16, 56
 ; CHECK: extsb r3, r3
 ; CHECK-LE-LABEL: @getsc1
-; CHECK-LE: mfvsrd r3, f0
+; CHECK-LE: mffprd r3, f0
 ; CHECK-LE: rldicl r3, r3, 56, 56
 ; CHECK-LE: extsb r3, r3
 }
@@ -137,7 +137,7 @@ entry:
 ; CHECK: rldicl r3, r3, 24, 56
 ; CHECK: extsb r3, r3
 ; CHECK-LE-LABEL: @getsc2
-; CHECK-LE: mfvsrd r3, f0
+; CHECK-LE: mffprd r3, f0
 ; CHECK-LE: rldicl r3, r3, 48, 56
 ; CHECK-LE: extsb r3, r3
 }
@@ -152,7 +152,7 @@ entry:
 ; CHECK: rldicl r3, r3, 32, 56
 ; CHECK: extsb r3, r3
 ; CHECK-LE-LABEL: @getsc3
-; CHECK-LE: mfvsrd r3, f0
+; CHECK-LE: mffprd r3, f0
 ; CHECK-LE: rldicl r3, r3, 40, 56
 ; CHECK-LE: extsb r3, r3
 }
@@ -167,7 +167,7 @@ entry:
 ; CHECK: rldicl r3, r3, 40, 56
 ; CHECK: extsb r3, r3
 ; CHECK-LE-LABEL: @getsc4
-; CHECK-LE: mfvsrd r3, f0
+; CHECK-LE: mffprd r3, f0
 ; CHECK-LE: rldicl r3, r3, 32, 56
 ; CHECK-LE: extsb r3, r3
 }
@@ -182,7 +182,7 @@ entry:
 ; CHECK: rldicl r3, r3, 48, 56
 ; CHECK: extsb r3, r3
 ; CHECK-LE-LABEL: @getsc5
-; CHECK-LE: mfvsrd r3, f0
+; CHECK-LE: mffprd r3, f0
 ; CHECK-LE: rldicl r3, r3, 24, 56
 ; CHECK-LE: extsb r3, r3
 }
@@ -197,7 +197,7 @@ entry:
 ; CHECK: rldicl r3, r3, 56, 56
 ; CHECK: extsb r3, r3
 ; CHECK-LE-LABEL: @getsc6
-; CHECK-LE: mfvsrd r3, f0
+; CHECK-LE: mffprd r3, f0
 ; CHECK-LE: rldicl r3, r3, 16, 56
 ; CHECK-LE: extsb r3, r3
 }
@@ -212,7 +212,7 @@ entry:
 ; CHECK: clrldi r3, r3, 56
 ; CHECK: extsb r3, r3
 ; CHECK-LE-LABEL: @getsc7
-; CHECK-LE: mfvsrd r3, f0
+; CHECK-LE: mffprd r3, f0
 ; CHECK-LE: rldicl r3, r3, 8, 56
 ; CHECK-LE: extsb r3, r3
 }
@@ -223,7 +223,7 @@ entry:
   %vecext = extractelement <16 x i8> %vsc, i32 8
   ret i8 %vecext
 ; CHECK-LABEL: @getsc8
-; CHECK: mfvsrd r3, f0
+; CHECK: mffprd r3, f0
 ; CHECK: rldicl r3, r3, 8, 56
 ; CHECK: extsb r3, r3
 ; CHECK-LE-LABEL: @getsc8
@@ -238,7 +238,7 @@ entry:
   %vecext = extractelement <16 x i8> %vsc, i32 9
   ret i8 %vecext
 ; CHECK-LABEL: @getsc9
-; CHECK: mfvsrd r3, f0
+; CHECK: mffprd r3, f0
 ; CHECK: rldicl r3, r3, 16, 56
 ; CHECK: extsb r3, r3
 ; CHECK-LE-LABEL: @getsc9
@@ -253,7 +253,7 @@ entry:
   %vecext = extractelement <16 x i8> %vsc, i32 10
   ret i8 %vecext
 ; CHECK-LABEL: @getsc10
-; CHECK: mfvsrd r3, f0
+; CHECK: mffprd r3, f0
 ; CHECK: rldicl r3, r3, 24, 56
 ; CHECK: extsb r3, r3
 ; CHECK-LE-LABEL: @getsc10
@@ -268,7 +268,7 @@ entry:
   %vecext = extractelement <16 x i8> %vsc, i32 11
   ret i8 %vecext
 ; CHECK-LABEL: @getsc11
-; CHECK: mfvsrd r3, f0
+; CHECK: mffprd r3, f0
 ; CHECK: rldicl r3, r3, 32, 56
 ; CHECK: extsb r3, r3
 ; CHECK-LE-LABEL: @getsc11
@@ -283,7 +283,7 @@ entry:
   %vecext = extractelement <16 x i8> %vsc, i32 12
   ret i8 %vecext
 ; CHECK-LABEL: @getsc12
-; CHECK: mfvsrd r3, f0
+; CHECK: mffprd r3, f0
 ; CHECK: rldicl r3, r3, 40, 56
 ; CHECK: extsb r3, r3
 ; CHECK-LE-LABEL: @getsc12
@@ -298,7 +298,7 @@ entry:
   %vecext = extractelement <16 x i8> %vsc, i32 13
   ret i8 %vecext
 ; CHECK-LABEL: @getsc13
-; CHECK: mfvsrd r3, f0
+; CHECK: mffprd r3, f0
 ; CHECK: rldicl r3, r3, 48, 56
 ; CHECK: extsb r3, r3
 ; CHECK-LE-LABEL: @getsc13
@@ -313,7 +313,7 @@ entry:
   %vecext = extractelement <16 x i8> %vsc, i32 14
   ret i8 %vecext
 ; CHECK-LABEL: @getsc14
-; CHECK: mfvsrd r3, f0
+; CHECK: mffprd r3, f0
 ; CHECK: rldicl r3, r3, 56, 56
 ; CHECK: extsb r3, r3
 ; CHECK-LE-LABEL: @getsc14
@@ -328,7 +328,7 @@ entry:
   %vecext = extractelement <16 x i8> %vsc, i32 15
   ret i8 %vecext
 ; CHECK-LABEL: @getsc15
-; CHECK: mfvsrd r3, f0
+; CHECK: mffprd r3, f0
 ; CHECK: clrldi  r3, r3, 56
 ; CHECK: extsb r3, r3
 ; CHECK-LE-LABEL: @getsc15
@@ -346,7 +346,7 @@ entry:
 ; CHECK: mfvsrd r3, v2
 ; CHECK: rldicl r3, r3, 8, 56
 ; CHECK-LE-LABEL: @getuc0
-; CHECK-LE: mfvsrd r3, f0
+; CHECK-LE: mffprd r3, f0
 ; CHECK-LE: clrldi r3, r3, 56
 }
 
@@ -359,7 +359,7 @@ entry:
 ; CHECK: mfvsrd r3, v2
 ; CHECK: rldicl r3, r3, 16, 56
 ; CHECK-LE-LABEL: @getuc1
-; CHECK-LE: mfvsrd r3, f0
+; CHECK-LE: mffprd r3, f0
 ; CHECK-LE: rldicl r3, r3, 56, 56
 }
 
@@ -372,7 +372,7 @@ entry:
 ; CHECK: mfvsrd r3, v2
 ; CHECK: rldicl r3, r3, 24, 56
 ; CHECK-LE-LABEL: @getuc2
-; CHECK-LE: mfvsrd r3, f0
+; CHECK-LE: mffprd r3, f0
 ; CHECK-LE: rldicl r3, r3, 48, 56
 }
 
@@ -385,7 +385,7 @@ entry:
 ; CHECK: mfvsrd r3, v2
 ; CHECK: rldicl r3, r3, 32, 56
 ; CHECK-LE-LABEL: @getuc3
-; CHECK-LE: mfvsrd r3, f0
+; CHECK-LE: mffprd r3, f0
 ; CHECK-LE: rldicl r3, r3, 40, 56
 }
 
@@ -398,7 +398,7 @@ entry:
 ; CHECK: mfvsrd r3, v2
 ; CHECK: rldicl r3, r3, 40, 56
 ; CHECK-LE-LABEL: @getuc4
-; CHECK-LE: mfvsrd r3, f0
+; CHECK-LE: mffprd r3, f0
 ; CHECK-LE: rldicl r3, r3, 32, 56
 }
 
@@ -411,7 +411,7 @@ entry:
 ; CHECK: mfvsrd r3, v2
 ; CHECK: rldicl r3, r3, 48, 56
 ; CHECK-LE-LABEL: @getuc5
-; CHECK-LE: mfvsrd r3, f0
+; CHECK-LE: mffprd r3, f0
 ; CHECK-LE: rldicl r3, r3, 24, 56
 }
 
@@ -424,7 +424,7 @@ entry:
 ; CHECK: mfvsrd r3, v2
 ; CHECK: rldicl r3, r3, 56, 56
 ; CHECK-LE-LABEL: @getuc6
-; CHECK-LE: mfvsrd r3, f0
+; CHECK-LE: mffprd r3, f0
 ; CHECK-LE: rldicl r3, r3, 16, 56
 }
 
@@ -437,7 +437,7 @@ entry:
 ; CHECK: mfvsrd r3, v2
 ; CHECK: clrldi   r3, r3, 56
 ; CHECK-LE-LABEL: @getuc7
-; CHECK-LE: mfvsrd r3, f0
+; CHECK-LE: mffprd r3, f0
 ; CHECK-LE: rldicl r3, r3, 8, 56
 }
 
@@ -447,7 +447,7 @@ entry:
   %vecext = extractelement <16 x i8> %vuc, i32 8
   ret i8 %vecext
 ; CHECK-LABEL: @getuc8
-; CHECK: mfvsrd r3, f0
+; CHECK: mffprd r3, f0
 ; CHECK: rldicl r3, r3, 8, 56
 ; CHECK-LE-LABEL: @getuc8
 ; CHECK-LE: mfvsrd r3, v2
@@ -460,7 +460,7 @@ entry:
   %vecext = extractelement <16 x i8> %vuc, i32 9
   ret i8 %vecext
 ; CHECK-LABEL: @getuc9
-; CHECK: mfvsrd r3, f0
+; CHECK: mffprd r3, f0
 ; CHECK: rldicl r3, r3, 16, 56
 ; CHECK-LE-LABEL: @getuc9
 ; CHECK-LE: mfvsrd r3, v2
@@ -473,7 +473,7 @@ entry:
   %vecext = extractelement <16 x i8> %vuc, i32 10
   ret i8 %vecext
 ; CHECK-LABEL: @getuc10
-; CHECK: mfvsrd r3, f0
+; CHECK: mffprd r3, f0
 ; CHECK: rldicl r3, r3, 24, 56
 ; CHECK-LE-LABEL: @getuc10
 ; CHECK-LE: mfvsrd r3, v2
@@ -486,7 +486,7 @@ entry:
   %vecext = extractelement <16 x i8> %vuc, i32 11
   ret i8 %vecext
 ; CHECK-LABEL: @getuc11
-; CHECK: mfvsrd r3, f0
+; CHECK: mffprd r3, f0
 ; CHECK: rldicl r3, r3, 32, 56
 ; CHECK-LE-LABEL: @getuc11
 ; CHECK-LE: mfvsrd r3, v2
@@ -499,7 +499,7 @@ entry:
   %vecext = extractelement <16 x i8> %vuc, i32 12
   ret i8 %vecext
 ; CHECK-LABEL: @getuc12
-; CHECK: mfvsrd r3, f0
+; CHECK: mffprd r3, f0
 ; CHECK: rldicl r3, r3, 40, 56
 ; CHECK-LE-LABEL: @getuc12
 ; CHECK-LE: mfvsrd r3, v2
@@ -512,7 +512,7 @@ entry:
   %vecext = extractelement <16 x i8> %vuc, i32 13
   ret i8 %vecext
 ; CHECK-LABEL: @getuc13
-; CHECK: mfvsrd r3, f0
+; CHECK: mffprd r3, f0
 ; CHECK: rldicl r3, r3, 48, 56
 ; CHECK-LE-LABEL: @getuc13
 ; CHECK-LE: mfvsrd r3, v2
@@ -525,7 +525,7 @@ entry:
   %vecext = extractelement <16 x i8> %vuc, i32 14
   ret i8 %vecext
 ; CHECK-LABEL: @getuc14
-; CHECK: mfvsrd r3, f0
+; CHECK: mffprd r3, f0
 ; CHECK: rldicl r3, r3, 56, 56
 ; CHECK-LE-LABEL: @getuc14
 ; CHECK-LE: mfvsrd r3, v2
@@ -538,7 +538,7 @@ entry:
   %vecext = extractelement <16 x i8> %vuc, i32 15
   ret i8 %vecext
 ; CHECK-LABEL: @getuc15
-; CHECK: mfvsrd r3, f0
+; CHECK: mffprd r3, f0
 ; CHECK: clrldi   r3, r3, 56
 ; CHECK-LE-LABEL: @getuc15
 ; CHECK-LE: mfvsrd r3, v2
@@ -611,7 +611,7 @@ entry:
 ; CHECK: rldicl r3, r3, 16, 48
 ; CHECK: extsh r3, r3
 ; CHECK-LE-LABEL: @getss0
-; CHECK-LE: mfvsrd r3, f0
+; CHECK-LE: mffprd r3, f0
 ; CHECK-LE: clrldi r3, r3, 48
 ; CHECK-LE: extsh r3, r3
 }
@@ -626,7 +626,7 @@ entry:
 ; CHECK: rldicl r3, r3, 32, 48
 ; CHECK: extsh r3, r3
 ; CHECK-LE-LABEL: @getss1
-; CHECK-LE: mfvsrd r3, f0
+; CHECK-LE: mffprd r3, f0
 ; CHECK-LE: rldicl r3, r3, 48, 48
 ; CHECK-LE: extsh r3, r3
 }
@@ -641,7 +641,7 @@ entry:
 ; CHECK: rldicl r3, r3, 48, 48
 ; CHECK: extsh r3, r3
 ; CHECK-LE-LABEL: @getss2
-; CHECK-LE: mfvsrd r3, f0
+; CHECK-LE: mffprd r3, f0
 ; CHECK-LE: rldicl r3, r3, 32, 48
 ; CHECK-LE: extsh r3, r3
 }
@@ -656,7 +656,7 @@ entry:
 ; CHECK: clrldi r3, r3, 48
 ; CHECK: extsh r3, r3
 ; CHECK-LE-LABEL: @getss3
-; CHECK-LE: mfvsrd r3, f0
+; CHECK-LE: mffprd r3, f0
 ; CHECK-LE: rldicl r3, r3, 16, 48
 ; CHECK-LE: extsh r3, r3
 }
@@ -667,7 +667,7 @@ entry:
   %vecext = extractelement <8 x i16> %vss, i32 4
   ret i16 %vecext
 ; CHECK-LABEL: @getss4
-; CHECK: mfvsrd r3, f0
+; CHECK: mffprd r3, f0
 ; CHECK: rldicl r3, r3, 16, 48
 ; CHECK: extsh r3, r3
 ; CHECK-LE-LABEL: @getss4
@@ -682,7 +682,7 @@ entry:
   %vecext = extractelement <8 x i16> %vss, i32 5
   ret i16 %vecext
 ; CHECK-LABEL: @getss5
-; CHECK: mfvsrd r3, f0
+; CHECK: mffprd r3, f0
 ; CHECK: rldicl r3, r3, 32, 48
 ; CHECK: extsh r3, r3
 ; CHECK-LE-LABEL: @getss5
@@ -697,7 +697,7 @@ entry:
   %vecext = extractelement <8 x i16> %vss, i32 6
   ret i16 %vecext
 ; CHECK-LABEL: @getss6
-; CHECK: mfvsrd r3, f0
+; CHECK: mffprd r3, f0
 ; CHECK: rldicl r3, r3, 48, 48
 ; CHECK: extsh r3, r3
 ; CHECK-LE-LABEL: @getss6
@@ -712,7 +712,7 @@ entry:
   %vecext = extractelement <8 x i16> %vss, i32 7
   ret i16 %vecext
 ; CHECK-LABEL: @getss7
-; CHECK: mfvsrd r3, f0
+; CHECK: mffprd r3, f0
 ; CHECK: clrldi  r3, r3, 48
 ; CHECK: extsh r3, r3
 ; CHECK-LE-LABEL: @getss7
@@ -730,7 +730,7 @@ entry:
 ; CHECK: mfvsrd r3, v2
 ; CHECK: rldicl r3, r3, 16, 48
 ; CHECK-LE-LABEL: @getus0
-; CHECK-LE: mfvsrd r3, f0
+; CHECK-LE: mffprd r3, f0
 ; CHECK-LE: clrldi r3, r3, 48
 }
 
@@ -743,7 +743,7 @@ entry:
 ; CHECK: mfvsrd r3, v2
 ; CHECK: rldicl r3, r3, 32, 48
 ; CHECK-LE-LABEL: @getus1
-; CHECK-LE: mfvsrd r3, f0
+; CHECK-LE: mffprd r3, f0
 ; CHECK-LE: rldicl r3, r3, 48, 48
 }
 
@@ -756,7 +756,7 @@ entry:
 ; CHECK: mfvsrd r3, v2
 ; CHECK: rldicl r3, r3, 48, 48
 ; CHECK-LE-LABEL: @getus2
-; CHECK-LE: mfvsrd r3, f0
+; CHECK-LE: mffprd r3, f0
 ; CHECK-LE: rldicl r3, r3, 32, 48
 }
 
@@ -769,7 +769,7 @@ entry:
 ; CHECK: mfvsrd r3, v2
 ; CHECK: clrldi   r3, r3, 48
 ; CHECK-LE-LABEL: @getus3
-; CHECK-LE: mfvsrd r3, f0
+; CHECK-LE: mffprd r3, f0
 ; CHECK-LE: rldicl r3, r3, 16, 48
 }
 
@@ -779,7 +779,7 @@ entry:
   %vecext = extractelement <8 x i16> %vus, i32 4
   ret i16 %vecext
 ; CHECK-LABEL: @getus4
-; CHECK: mfvsrd r3, f0
+; CHECK: mffprd r3, f0
 ; CHECK: rldicl r3, r3, 16, 48
 ; CHECK-LE-LABEL: @getus4
 ; CHECK-LE: mfvsrd r3, v2
@@ -792,7 +792,7 @@ entry:
   %vecext = extractelement <8 x i16> %vus, i32 5
   ret i16 %vecext
 ; CHECK-LABEL: @getus5
-; CHECK: mfvsrd r3, f0
+; CHECK: mffprd r3, f0
 ; CHECK: rldicl r3, r3, 32, 48
 ; CHECK-LE-LABEL: @getus5
 ; CHECK-LE: mfvsrd r3, v2
@@ -805,7 +805,7 @@ entry:
   %vecext = extractelement <8 x i16> %vus, i32 6
   ret i16 %vecext
 ; CHECK-LABEL: @getus6
-; CHECK: mfvsrd r3, f0
+; CHECK: mffprd r3, f0
 ; CHECK: rldicl r3, r3, 48, 48
 ; CHECK-LE-LABEL: @getus6
 ; CHECK-LE: mfvsrd r3, v2
@@ -818,7 +818,7 @@ entry:
   %vecext = extractelement <8 x i16> %vus, i32 7
   ret i16 %vecext
 ; CHECK-LABEL: @getus7
-; CHECK: mfvsrd r3, f0
+; CHECK: mffprd r3, f0
 ; CHECK: clrldi   r3, r3, 48
 ; CHECK-LE-LABEL: @getus7
 ; CHECK-LE: mfvsrd r3, v2
@@ -892,11 +892,11 @@ entry:
   ret i32 %vecext
 ; CHECK-LABEL: @getsi0
 ; CHECK: xxsldwi vs0, v2, v2, 3
-; CHECK: mfvsrwz r3, f0
+; CHECK: mffprwz r3, f0
 ; CHECK: extsw r3, r3
 ; CHECK-LE-LABEL: @getsi0
 ; CHECK-LE: xxswapd vs0, v2
-; CHECK-LE: mfvsrwz r3, f0
+; CHECK-LE: mffprwz r3, f0
 ; CHECK-LE: extsw r3, r3
 }
 
@@ -910,7 +910,7 @@ entry:
 ; CHECK: extsw r3, r3
 ; CHECK-LE-LABEL: @getsi1
 ; CHECK-LE: xxsldwi vs0, v2, v2, 1
-; CHECK-LE: mfvsrwz r3, f0
+; CHECK-LE: mffprwz r3, f0
 ; CHECK-LE: extsw r3, r3
 }
 
@@ -921,7 +921,7 @@ entry:
   ret i32 %vecext
 ; CHECK-LABEL: @getsi2
 ; CHECK: xxsldwi vs0, v2, v2, 1
-; CHECK: mfvsrwz r3, f0
+; CHECK: mffprwz r3, f0
 ; CHECK: extsw r3, r3
 ; CHECK-LE-LABEL: @getsi2
 ; CHECK-LE: mfvsrwz r3, v2
@@ -935,11 +935,11 @@ entry:
   ret i32 %vecext
 ; CHECK-LABEL: @getsi3
 ; CHECK: xxswapd vs0, v2
-; CHECK: mfvsrwz r3, f0
+; CHECK: mffprwz r3, f0
 ; CHECK: extsw r3, r3
 ; CHECK-LE-LABEL: @getsi3
 ; CHECK-LE: xxsldwi vs0, v2, v2, 3
-; CHECK-LE: mfvsrwz r3, f0
+; CHECK-LE: mffprwz r3, f0
 ; CHECK-LE: extsw r3, r3
 }
 
@@ -950,10 +950,10 @@ entry:
   ret i32 %vecext
 ; CHECK-LABEL: @getui0
 ; CHECK: xxsldwi vs0, v2, v2, 3
-; CHECK: mfvsrwz r3, f0
+; CHECK: mffprwz r3, f0
 ; CHECK-LE-LABEL: @getui0
 ; CHECK-LE: xxswapd vs0, v2
-; CHECK-LE: mfvsrwz r3, f0
+; CHECK-LE: mffprwz r3, f0
 }
 
 ; Function Attrs: norecurse nounwind readnone
@@ -965,7 +965,7 @@ entry:
 ; CHECK: mfvsrwz r3, v2
 ; CHECK-LE-LABEL: @getui1
 ; CHECK-LE: xxsldwi vs0, v2, v2, 1
-; CHECK-LE: mfvsrwz r3, f0
+; CHECK-LE: mffprwz r3, f0
 }
 
 ; Function Attrs: norecurse nounwind readnone
@@ -975,7 +975,7 @@ entry:
   ret i32 %vecext
 ; CHECK-LABEL: @getui2
 ; CHECK: xxsldwi vs0, v2, v2, 1
-; CHECK: mfvsrwz r3, f0
+; CHECK: mffprwz r3, f0
 ; CHECK-LE-LABEL: @getui2
 ; CHECK-LE: mfvsrwz r3, v2
 }
@@ -987,10 +987,10 @@ entry:
   ret i32 %vecext
 ; CHECK-LABEL: @getui3
 ; CHECK: xxswapd vs0, v2
-; CHECK: mfvsrwz r3, f0
+; CHECK: mffprwz r3, f0
 ; CHECK-LE-LABEL: @getui3
 ; CHECK-LE: xxsldwi vs0, v2, v2, 3
-; CHECK-LE: mfvsrwz r3, f0
+; CHECK-LE: mffprwz r3, f0
 }
 
 ; Function Attrs: norecurse nounwind readnone
@@ -1022,7 +1022,7 @@ entry:
 ; CHECK: mfvsrd r3, v2
 ; CHECK-LE-LABEL: @getsl0
 ; CHECK-LE: xxswapd vs0, v2
-; CHECK-LE: mfvsrd r3, f0
+; CHECK-LE: mffprd r3, f0
 }
 
 ; Function Attrs: norecurse nounwind readnone
@@ -1032,7 +1032,7 @@ entry:
   ret i64 %vecext
 ; CHECK-LABEL: @getsl1
 ; CHECK: xxswapd vs0, v2
-; CHECK: mfvsrd r3, f0
+; CHECK: mffprd r3, f0
 ; CHECK-LE-LABEL: @getsl1
 ; CHECK-LE: mfvsrd r3, v2
 }
@@ -1046,7 +1046,7 @@ entry:
 ; CHECK: mfvsrd r3, v2
 ; CHECK-LE-LABEL: @getul0
 ; CHECK-LE: xxswapd  vs0, v2
-; CHECK-LE: mfvsrd r3, f0
+; CHECK-LE: mffprd r3, f0
 }
 
 ; Function Attrs: norecurse nounwind readnone
@@ -1056,7 +1056,7 @@ entry:
   ret i64 %vecext
 ; CHECK-LABEL: @getul1
 ; CHECK: xxswapd vs0, v2
-; CHECK: mfvsrd r3, f0
+; CHECK: mffprd r3, f0
 ; CHECK-LE-LABEL: @getul1
 ; CHECK-LE: mfvsrd r3, v2
 }
diff --git a/llvm/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll b/llvm/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll
index 0e50b3a68cec..467aa422ec93 100644
--- a/llvm/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll
+++ b/llvm/test/CodeGen/PowerPC/p9-xxinsertw-xxextractuw.ll
@@ -561,10 +561,10 @@ entry:
 define <4 x i32> @_Z10testInsEltILj0EDv4_jjET0_S1_T1_(<4 x i32> %a, i32 zeroext %b) {
 entry:
 ; CHECK-LABEL: _Z10testInsEltILj0EDv4_jjET0_S1_T1_
-; CHECK: mtvsrwz 0, 5
+; CHECK: mtfprwz 0, 5
 ; CHECK: xxinsertw 34, 0, 12
 ; CHECK-BE-LABEL: _Z10testInsEltILj0EDv4_jjET0_S1_T1_
-; CHECK-BE: mtvsrwz 0, 5
+; CHECK-BE: mtfprwz 0, 5
 ; CHECK-BE: xxinsertw 34, 0, 0
   %vecins = insertelement <4 x i32> %a, i32 %b, i32 0
   ret <4 x i32> %vecins
@@ -573,10 +573,10 @@ entry:
 define <4 x i32> @_Z10testInsEltILj1EDv4_jjET0_S1_T1_(<4 x i32> %a, i32 zeroext %b) {
 entry:
 ; CHECK-LABEL: _Z10testInsEltILj1EDv4_jjET0_S1_T1_
-; CHECK: mtvsrwz 0, 5
+; CHECK: mtfprwz 0, 5
 ; CHECK: xxinsertw 34, 0, 8
 ; CHECK-BE-LABEL: _Z10testInsEltILj1EDv4_jjET0_S1_T1_
-; CHECK-BE: mtvsrwz 0, 5
+; CHECK-BE: mtfprwz 0, 5
 ; CHECK-BE: xxinsertw 34, 0, 4
   %vecins = insertelement <4 x i32> %a, i32 %b, i32 1
   ret <4 x i32> %vecins
@@ -585,10 +585,10 @@ entry:
 define <4 x i32> @_Z10testInsEltILj2EDv4_jjET0_S1_T1_(<4 x i32> %a, i32 zeroext %b) {
 entry:
 ; CHECK-LABEL: _Z10testInsEltILj2EDv4_jjET0_S1_T1_
-; CHECK: mtvsrwz 0, 5
+; CHECK: mtfprwz 0, 5
 ; CHECK: xxinsertw 34, 0, 4
 ; CHECK-BE-LABEL: _Z10testInsEltILj2EDv4_jjET0_S1_T1_
-; CHECK-BE: mtvsrwz 0, 5
+; CHECK-BE: mtfprwz 0, 5
 ; CHECK-BE: xxinsertw 34, 0, 8
   %vecins = insertelement <4 x i32> %a, i32 %b, i32 2
   ret <4 x i32> %vecins
@@ -597,10 +597,10 @@ entry:
 define <4 x i32> @_Z10testInsEltILj3EDv4_jjET0_S1_T1_(<4 x i32> %a, i32 zeroext %b) {
 entry:
 ; CHECK-LABEL: _Z10testInsEltILj3EDv4_jjET0_S1_T1_
-; CHECK: mtvsrwz 0, 5
+; CHECK: mtfprwz 0, 5
 ; CHECK: xxinsertw 34, 0, 0
 ; CHECK-BE-LABEL: _Z10testInsEltILj3EDv4_jjET0_S1_T1_
-; CHECK-BE: mtvsrwz 0, 5
+; CHECK-BE: mtfprwz 0, 5
 ; CHECK-BE: xxinsertw 34, 0, 12
   %vecins = insertelement <4 x i32> %a, i32 %b, i32 3
   ret <4 x i32> %vecins
diff --git a/llvm/test/CodeGen/PowerPC/popcnt-zext.ll b/llvm/test/CodeGen/PowerPC/popcnt-zext.ll
index eab90bf2fbfe..43cbd376a3ab 100644
--- a/llvm/test/CodeGen/PowerPC/popcnt-zext.ll
+++ b/llvm/test/CodeGen/PowerPC/popcnt-zext.ll
@@ -12,7 +12,7 @@ define i16 @zpop_i8_i16(i8 %x) {
 ; SLOW-LABEL: zpop_i8_i16:
 ; SLOW:       # %bb.0:
 ; SLOW-NEXT:    clrlwi 5, 3, 24
-; SLOW-NEXT:    rlwinm 3, 3, 31, 0, 31
+; SLOW-NEXT:    rotlwi 3, 3, 31
 ; SLOW-NEXT:    andi. 3, 3, 85
 ; SLOW-NEXT:    lis 4, 13107
 ; SLOW-NEXT:    subf 3, 3, 5
@@ -48,7 +48,7 @@ define i16 @popz_i8_i16(i8 %x) {
 ; SLOW-LABEL: popz_i8_i16:
 ; SLOW:       # %bb.0:
 ; SLOW-NEXT:    clrlwi 5, 3, 24
-; SLOW-NEXT:    rlwinm 3, 3, 31, 0, 31
+; SLOW-NEXT:    rotlwi 3, 3, 31
 ; SLOW-NEXT:    andi. 3, 3, 85
 ; SLOW-NEXT:    lis 4, 13107
 ; SLOW-NEXT:    subf 3, 3, 5
@@ -77,14 +77,14 @@ define i16 @popz_i8_i16(i8 %x) {
 define i32 @zpop_i8_i32(i8 %x) {
 ; FAST-LABEL: zpop_i8_i32:
 ; FAST:       # %bb.0:
-; FAST-NEXT:    rlwinm 3, 3, 0, 24, 31
+; FAST-NEXT:    clrlwi 3, 3, 24
 ; FAST-NEXT:    popcntw 3, 3
 ; FAST-NEXT:    blr
 ;
 ; SLOW-LABEL: zpop_i8_i32:
 ; SLOW:       # %bb.0:
 ; SLOW-NEXT:    clrlwi 5, 3, 24
-; SLOW-NEXT:    rlwinm 3, 3, 31, 0, 31
+; SLOW-NEXT:    rotlwi 3, 3, 31
 ; SLOW-NEXT:    andi. 3, 3, 85
 ; SLOW-NEXT:    lis 4, 13107
 ; SLOW-NEXT:    subf 3, 3, 5
@@ -120,7 +120,7 @@ define i32 @popz_i8_32(i8 %x) {
 ; SLOW-LABEL: popz_i8_32:
 ; SLOW:       # %bb.0:
 ; SLOW-NEXT:    clrlwi 5, 3, 24
-; SLOW-NEXT:    rlwinm 3, 3, 31, 0, 31
+; SLOW-NEXT:    rotlwi 3, 3, 31
 ; SLOW-NEXT:    andi. 3, 3, 85
 ; SLOW-NEXT:    lis 4, 13107
 ; SLOW-NEXT:    subf 3, 3, 5
@@ -149,14 +149,14 @@ define i32 @popz_i8_32(i8 %x) {
 define i32 @zpop_i16_i32(i16 %x) {
 ; FAST-LABEL: zpop_i16_i32:
 ; FAST:       # %bb.0:
-; FAST-NEXT:    rlwinm 3, 3, 0, 16, 31
+; FAST-NEXT:    clrlwi 3, 3, 16
 ; FAST-NEXT:    popcntw 3, 3
 ; FAST-NEXT:    blr
 ;
 ; SLOW-LABEL: zpop_i16_i32:
 ; SLOW:       # %bb.0:
 ; SLOW-NEXT:    clrlwi 5, 3, 16
-; SLOW-NEXT:    rlwinm 3, 3, 31, 0, 31
+; SLOW-NEXT:    rotlwi 3, 3, 31
 ; SLOW-NEXT:    andi. 3, 3, 21845
 ; SLOW-NEXT:    lis 4, 13107
 ; SLOW-NEXT:    subf 3, 3, 5
@@ -192,7 +192,7 @@ define i32 @popz_i16_32(i16 %x) {
 ; SLOW-LABEL: popz_i16_32:
 ; SLOW:       # %bb.0:
 ; SLOW-NEXT:    clrlwi 5, 3, 16
-; SLOW-NEXT:    rlwinm 3, 3, 31, 0, 31
+; SLOW-NEXT:    rotlwi 3, 3, 31
 ; SLOW-NEXT:    andi. 3, 3, 21845
 ; SLOW-NEXT:    lis 4, 13107
 ; SLOW-NEXT:    subf 3, 3, 5
@@ -305,7 +305,7 @@ define i64 @popa_i16_i64(i16 %x) {
 ; SLOW-LABEL: popa_i16_i64:
 ; SLOW:       # %bb.0:
 ; SLOW-NEXT:    clrlwi 5, 3, 16
-; SLOW-NEXT:    rlwinm 3, 3, 31, 0, 31
+; SLOW-NEXT:    rotlwi 3, 3, 31
 ; SLOW-NEXT:    andi. 3, 3, 21845
 ; SLOW-NEXT:    lis 4, 13107
 ; SLOW-NEXT:    subf 3, 3, 5
diff --git a/llvm/test/CodeGen/PowerPC/ppc-crbits-onoff.ll b/llvm/test/CodeGen/PowerPC/ppc-crbits-onoff.ll
index c5546254fe38..ba93b2926360 100644
--- a/llvm/test/CodeGen/PowerPC/ppc-crbits-onoff.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc-crbits-onoff.ll
@@ -14,7 +14,7 @@ entry:
 
 ; CHECK-LABEL: @crbitsoff
 ; CHECK-NO-ISEL-LABEL: @crbitsoff
-; CHECK-DAG: cmplwi {{[0-9]+}}, 3, 0
+; CHECK-DAG: cmplwi 3, 0
 ; CHECK-DAG: li [[REG2:[0-9]+]], 1
 ; CHECK-DAG: cntlzw [[REG3:[0-9]+]],
 ; CHECK: isel [[REG4:[0-9]+]], 0, [[REG2]]
diff --git a/llvm/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll b/llvm/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll
index 0e9ea90ed143..004bf64e6953 100644
--- a/llvm/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc-shrink-wrapping.ll
@@ -14,7 +14,7 @@
 ;
 ; Compare the arguments and return
 ; No prologue needed.
-; ENABLE: cmpw 0, 3, 4
+; ENABLE: cmpw 3, 4
 ; ENABLE-NEXT: bgelr 0
 ;
 ; Prologue code.
@@ -24,7 +24,7 @@
 ;
 ; Compare the arguments and jump to exit.
 ; After the prologue is set.
-; DISABLE: cmpw 0, 3, 4
+; DISABLE: cmpw 3, 4
 ; DISABLE-NEXT: bge 0, .[[EXIT_LABEL:LBB[0-9_]+]]
 ;
 ; Store %a on the stack
@@ -75,14 +75,14 @@ declare i32 @doSomething(i32, i32*)
 ; CHECK-LABEL: freqSaveAndRestoreOutsideLoop:
 ;
 ; Shrink-wrapping allows to skip the prologue in the else case.
-; ENABLE: cmplwi 0, 3, 0
+; ENABLE: cmplwi 3, 0
 ; ENABLE: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
 ;
 ; Prologue code.
 ; Make sure we save the link register
 ; CHECK: mflr {{[0-9]+}}
 ;
-; DISABLE: cmplwi 0, 3, 0
+; DISABLE: cmplwi 3, 0
 ; DISABLE: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
 ;
 ; Loop preheader
@@ -202,7 +202,7 @@ for.end:                                          ; preds = %for.body
 ; restore outside.
 ; CHECK-LABEL: loopInfoSaveOutsideLoop:
 ;
-; ENABLE: cmplwi 0, 3, 0
+; ENABLE: cmplwi 3, 0
 ; ENABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
 ;
 ; Prologue code.
@@ -211,7 +211,7 @@ for.end:                                          ; preds = %for.body
 ;
 ; DISABLE: std
 ; DISABLE-NEXT: std
-; DISABLE: cmplwi 0, 3, 0
+; DISABLE: cmplwi 3, 0
 ; DISABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
 ;
 ; Loop preheader
@@ -284,7 +284,7 @@ declare void @somethingElse(...)
 ; save outside.
 ; CHECK-LABEL: loopInfoRestoreOutsideLoop:
 ;
-; ENABLE: cmplwi 0, 3, 0
+; ENABLE: cmplwi 3, 0
 ; ENABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
 ;
 ; Prologue code.
@@ -293,7 +293,7 @@ declare void @somethingElse(...)
 ;
 ; DISABLE: std
 ; DISABLE-NEXT: std
-; DISABLE: cmplwi 0, 3, 0
+; DISABLE: cmplwi 3, 0
 ; DISABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
 ;
 ; CHECK: bl somethingElse
@@ -373,7 +373,7 @@ entry:
 ; Check that we handle inline asm correctly.
 ; CHECK-LABEL: inlineAsm:
 ;
-; ENABLE: cmplwi 0, 3, 0
+; ENABLE: cmplwi 3, 0
 ; ENABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
 ;
 ; Prologue code.
@@ -381,7 +381,7 @@ entry:
 ; ENABLE-DAG: li [[IV:[0-9]+]], 10
 ; ENABLE-DAG: std 14, -[[STACK_OFFSET:[0-9]+]](1) # 8-byte Folded Spill
 ;
-; DISABLE: cmplwi 0, 3, 0
+; DISABLE: cmplwi 3, 0
 ; DISABLE-NEXT: std 14, -[[STACK_OFFSET:[0-9]+]](1) # 8-byte Folded Spill
 ; DISABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
 ; DISABLE: li [[IV:[0-9]+]], 10
@@ -438,13 +438,13 @@ if.end:                                           ; preds = %for.body, %if.else
 ; Check that we handle calls to variadic functions correctly.
 ; CHECK-LABEL: callVariadicFunc:
 ;
-; ENABLE: cmplwi 0, 3, 0
+; ENABLE: cmplwi 3, 0
 ; ENABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
 ;
 ; Prologue code.
 ; CHECK: mflr {{[0-9]+}}
 ; 
-; DISABLE: cmplwi 0, 3, 0
+; DISABLE: cmplwi 3, 0
 ; DISABLE-NEXT: beq 0, .[[ELSE_LABEL:LBB[0-9_]+]]
 ;
 ; Setup of the varags.
@@ -497,7 +497,7 @@ declare i32 @someVariadicFunc(i32, ...)
 ; CHECK-LABEL: noreturn:
 ; DISABLE: mflr {{[0-9]+}}
 ;
-; CHECK: cmplwi 0, 3, 0
+; CHECK: cmplwi 3, 0
 ; CHECK-NEXT: bne{{[-]?}} 0, .[[ABORT:LBB[0-9_]+]]
 ;
 ; CHECK: li 3, 42
diff --git a/llvm/test/CodeGen/PowerPC/ppc64-P9-setb.ll b/llvm/test/CodeGen/PowerPC/ppc64-P9-setb.ll
index d141e4aae074..ace75a76a5dd 100644
--- a/llvm/test/CodeGen/PowerPC/ppc64-P9-setb.ll
+++ b/llvm/test/CodeGen/PowerPC/ppc64-P9-setb.ll
@@ -123,7 +123,7 @@ define i64 @setb5(i64 %a, i64 %b) {
 ; CHECK-NOT: li
 ; CHECK: cmpd {{c?r?(0, )?}}r3, r4
 ; CHECK-NEXT: setb r3, cr0
-; CHECK-NOT: subfc
+; CHECK-NOT: subc
 ; CHECK-NOT: adde
 ; CHECK-NOT: xori
 ; CHECK-NOT: isel
@@ -133,7 +133,7 @@ define i64 @setb5(i64 %a, i64 %b) {
 ; CHECK-PWR8-DAG: rldicl
 ; CHECK-PWR8-DAG: li
 ; CHECK-PWR8-DAG: cmpd
-; CHECK-PWR8-DAG: subfc
+; CHECK-PWR8-DAG: subc
 ; CHECK-PWR8-DAG: adde
 ; CHECK-PWR8-DAG: xori
 ; CHECK-PWR8: isel
@@ -153,7 +153,7 @@ define i64 @setb6(i64 %a, i64 %b) {
 ; CHECK-NOT: li
 ; CHECK: cmpd {{c?r?(0, )?}}r3, r4
 ; CHECK-NEXT: setb r3, cr0
-; CHECK-NOT: subfc
+; CHECK-NOT: subc
 ; CHECK-NOT: adde
 ; CHECK-NOT: xori
 ; CHECK-NOT: isel
@@ -163,7 +163,7 @@ define i64 @setb6(i64 %a, i64 %b) {
 ; CHECK-PWR8-DAG: rldicl
 ; CHECK-PWR8-DAG: li
 ; CHECK-PWR8-DAG: cmpd
-; CHECK-PWR8-DAG: subfc
+; CHECK-PWR8-DAG: subc
 ; CHECK-PWR8-DAG: adde
 ; CHECK-PWR8-DAG: xori
 ; CHECK-PWR8: isel
@@ -183,7 +183,7 @@ define i64 @setb7(i64 %a, i64 %b) {
 ; CHECK-NOT: li
 ; CHECK: cmpd {{c?r?(0, )?}}r3, r4
 ; CHECK-NEXT: setb r3, cr0
-; CHECK-NOT: subfc
+; CHECK-NOT: subc
 ; CHECK-NOT: adde
 ; CHECK-NOT: xori
 ; CHECK-NOT: isel
@@ -193,7 +193,7 @@ define i64 @setb7(i64 %a, i64 %b) {
 ; CHECK-PWR8-DAG: rldicl
 ; CHECK-PWR8-DAG: li
 ; CHECK-PWR8-DAG: cmpd
-; CHECK-PWR8-DAG: subfc
+; CHECK-PWR8-DAG: subc
 ; CHECK-PWR8-DAG: adde
 ; CHECK-PWR8-DAG: xori
 ; CHECK-PWR8: isel
@@ -213,7 +213,7 @@ define i64 @setb8(i64 %a, i64 %b) {
 ; CHECK-NOT: li
 ; CHECK: cmpd {{c?r?(0, )?}}r3, r4
 ; CHECK-NEXT: setb r3, cr0
-; CHECK-NOT: subfc
+; CHECK-NOT: subc
 ; CHECK-NOT: adde
 ; CHECK-NOT: xori
 ; CHECK-NOT: isel
@@ -223,7 +223,7 @@ define i64 @setb8(i64 %a, i64 %b) {
 ; CHECK-PWR8-DAG: rldicl
 ; CHECK-PWR8-DAG: li
 ; CHECK-PWR8-DAG: cmpd
-; CHECK-PWR8-DAG: subfc
+; CHECK-PWR8-DAG: subc
 ; CHECK-PWR8-DAG: adde
 ; CHECK-PWR8-DAG: xori
 ; CHECK-PWR8: isel
@@ -347,7 +347,7 @@ define i64 @setb13(i64 %a, i64 %b) {
 ; CHECK-NOT: li
 ; CHECK: cmpd {{c?r?(0, )?}}r3, r4
 ; CHECK-NEXT: setb r3, cr0
-; CHECK-NOT: subfc
+; CHECK-NOT: subc
 ; CHECK-NOT: adde
 ; CHECK-NOT: xori
 ; CHECK-NOT: neg
@@ -358,7 +358,7 @@ define i64 @setb13(i64 %a, i64 %b) {
 ; CHECK-PWR8-DAG: rldicl
 ; CHECK-PWR8-DAG: li
 ; CHECK-PWR8-DAG: cmpd
-; CHECK-PWR8-DAG: subfc
+; CHECK-PWR8-DAG: subc
 ; CHECK-PWR8-DAG: adde
 ; CHECK-PWR8-DAG: xori
 ; CHECK-PWR8-DAG: neg
@@ -379,7 +379,7 @@ define i64 @setb14(i64 %a, i64 %b) {
 ; CHECK-NOT: li
 ; CHECK: cmpd {{c?r?(0, )?}}r3, r4
 ; CHECK-NEXT: setb r3, cr0
-; CHECK-NOT: subfc
+; CHECK-NOT: subc
 ; CHECK-NOT: adde
 ; CHECK-NOT: xori
 ; CHECK-NOT: neg
@@ -390,7 +390,7 @@ define i64 @setb14(i64 %a, i64 %b) {
 ; CHECK-PWR8-DAG: rldicl
 ; CHECK-PWR8-DAG: li
 ; CHECK-PWR8-DAG: cmpd
-; CHECK-PWR8-DAG: subfc
+; CHECK-PWR8-DAG: subc
 ; CHECK-PWR8-DAG: adde
 ; CHECK-PWR8-DAG: xori
 ; CHECK-PWR8-DAG: neg
@@ -411,7 +411,7 @@ define i64 @setb15(i64 %a, i64 %b) {
 ; CHECK-NOT: li
 ; CHECK: cmpd {{c?r?(0, )?}}r3, r4
 ; CHECK-NEXT: setb r3, cr0
-; CHECK-NOT: subfc
+; CHECK-NOT: subc
 ; CHECK-NOT: adde
 ; CHECK-NOT: xori
 ; CHECK-NOT: neg
@@ -422,7 +422,7 @@ define i64 @setb15(i64 %a, i64 %b) {
 ; CHECK-PWR8-DAG: rldicl
 ; CHECK-PWR8-DAG: li
 ; CHECK-PWR8-DAG: cmpd
-; CHECK-PWR8-DAG: subfc
+; CHECK-PWR8-DAG: subc
 ; CHECK-PWR8-DAG: adde
 ; CHECK-PWR8-DAG: xori
 ; CHECK-PWR8-DAG: neg
@@ -443,7 +443,7 @@ define i64 @setb16(i64 %a, i64 %b) {
 ; CHECK-NOT: li
 ; CHECK: cmpd {{c?r?(0, )?}}r3, r4
 ; CHECK-NEXT: setb r3, cr0
-; CHECK-NOT: subfc
+; CHECK-NOT: subc
 ; CHECK-NOT: adde
 ; CHECK-NOT: xori
 ; CHECK-NOT: neg
@@ -454,7 +454,7 @@ define i64 @setb16(i64 %a, i64 %b) {
 ; CHECK-PWR8-DAG: rldicl
 ; CHECK-PWR8-DAG: li
 ; CHECK-PWR8-DAG: cmpd
-; CHECK-PWR8-DAG: subfc
+; CHECK-PWR8-DAG: subc
 ; CHECK-PWR8-DAG: adde
 ; CHECK-PWR8-DAG: xori
 ; CHECK-PWR8-DAG: neg
@@ -769,14 +769,14 @@ define i64 @setb29(i64 %a, i64 %b) {
 ; CHECK-NOT: li
 ; CHECK: cmpd {{c?r?(0, )?}}r3, r4
 ; CHECK-NEXT: setb r3, cr0
-; CHECK-NOT: subfc
+; CHECK-NOT: subc
 ; CHECK-NOT: adde
 ; CHECK-NOT: xori
 ; CHECK-NOT: isel
 ; CHECK: blr
 ; CHECK-PWR8-LABEL: setb29
 ; CHECK-PWR8-DAG: cmpd
-; CHECK-PWR8-DAG: subfc
+; CHECK-PWR8-DAG: subc
 ; CHECK-PWR8-DAG: adde
 ; CHECK-PWR8: isel
 ; CHECK-PWR8: blr
@@ -1013,13 +1013,13 @@ define i64 @setbud1(i64 %a, i64 %b) {
 ; CHECK-NOT: li
 ; CHECK: cmpld {{c?r?(0, )?}}r3, r4
 ; CHECK-NEXT: setb r3, cr0
-; CHECK-NOT: subfc
+; CHECK-NOT: subc
 ; CHECK-NOT: subfe
 ; CHECK-NOT: neg
 ; CHECK-NOT: isel
 ; CHECK: blr
 ; CHECK-PWR8-LABEL: setbud1
-; CHECK-PWR8-DAG: subfc
+; CHECK-PWR8-DAG: subc
 ; CHECK-PWR8-DAG: subfe
 ; CHECK-PWR8-DAG: cmpld
 ; CHECK-PWR8-DAG: neg
@@ -1138,8 +1138,8 @@ define i64 @setbuh(i16 %a, i16 %b) {
   %t4 = select i1 %t1, i64 1, i64 %t3
   ret i64 %t4
 ; CHECK-LABEL: setbuh:
-; CHECK-DAG: rlwinm [[RA:r[0-9]+]], r3, 0, 16, 31
-; CHECK-DAG: rlwinm [[RB:r[0-9]+]], r4, 0, 16, 31
+; CHECK-DAG: clrlwi [[RA:r[0-9]+]], r3, 16
+; CHECK-DAG: clrlwi [[RB:r[0-9]+]], r4, 16
 ; CHECK-NOT: li
 ; CHECK-NOT: xor
 ; CHECK: cmplw {{c?r?(0, )?}}[[RA]], [[RB]]
@@ -1151,8 +1151,8 @@ define i64 @setbuh(i16 %a, i16 %b) {
 ; CHECK-NOT: isel
 ; CHECK: blr
 ; CHECK-PWR8-LABEL: setbuh
-; CHECK-PWR8: rlwinm
-; CHECK-PWR8: rlwinm
+; CHECK-PWR8: clrlwi
+; CHECK-PWR8: clrlwi
 ; CHECK-PWR8-DAG: cmplw
 ; CHECK-PWR8-DAG: cntlzw
 ; CHECK-PWR8: srwi
@@ -1170,8 +1170,8 @@ define i64 @setbuc(i8 %a, i8 %b) {
   %t4 = select i1 %t1, i64 1, i64 %t3
   ret i64 %t4
 ; CHECK-LABEL: setbuc:
-; CHECK-DAG: rlwinm [[RA:r[0-9]+]], r3, 0, 24, 31
-; CHECK-DAG: rlwinm [[RB:r[0-9]+]], r4, 0, 24, 31
+; CHECK-DAG: clrlwi [[RA:r[0-9]+]], r3, 24
+; CHECK-DAG: clrlwi [[RB:r[0-9]+]], r4, 24
 ; CHECK-NOT: li
 ; CHECK-NOT: clrldi
 ; CHECK: cmplw {{c?r?(0, )?}}[[RA]], [[RB]]
@@ -1181,8 +1181,8 @@ define i64 @setbuc(i8 %a, i8 %b) {
 ; CHECK-NOT: isel
 ; CHECK: blr
 ; CHECK-PWR8-LABEL: setbuc
-; CHECK-PWR8: rlwinm
-; CHECK-PWR8: rlwinm
+; CHECK-PWR8: clrlwi
+; CHECK-PWR8: clrlwi
 ; CHECK-PWR8-DAG: clrldi
 ; CHECK-PWR8-DAG: clrldi
 ; CHECK-PWR8-DAG: cmplw
diff --git a/llvm/test/CodeGen/PowerPC/pr25080.ll b/llvm/test/CodeGen/PowerPC/pr25080.ll
index 5edaff2c53c8..7a2fb76fd453 100644
--- a/llvm/test/CodeGen/PowerPC/pr25080.ll
+++ b/llvm/test/CodeGen/PowerPC/pr25080.ll
@@ -18,30 +18,30 @@ define <8 x i16> @pr25080(<8 x i32> %a) {
 ; LE-NEXT:    xxsldwi 1, 34, 34, 1
 ; LE-NEXT:    mfvsrwz 4, 35
 ; LE-NEXT:    xxsldwi 4, 34, 34, 3
-; LE-NEXT:    mtvsrd 2, 3
-; LE-NEXT:    mfvsrwz 3, 0
+; LE-NEXT:    mtfprd 2, 3
+; LE-NEXT:    mffprwz 3, 0
 ; LE-NEXT:    xxswapd 0, 35
-; LE-NEXT:    mtvsrd 3, 4
+; LE-NEXT:    mtfprd 3, 4
 ; LE-NEXT:    xxsldwi 5, 35, 35, 1
-; LE-NEXT:    mfvsrwz 4, 1
+; LE-NEXT:    mffprwz 4, 1
 ; LE-NEXT:    xxsldwi 7, 35, 35, 3
-; LE-NEXT:    mtvsrd 1, 3
+; LE-NEXT:    mtfprd 1, 3
 ; LE-NEXT:    xxswapd 33, 3
-; LE-NEXT:    mfvsrwz 3, 4
-; LE-NEXT:    mtvsrd 4, 4
+; LE-NEXT:    mffprwz 3, 4
+; LE-NEXT:    mtfprd 4, 4
 ; LE-NEXT:    xxswapd 34, 1
-; LE-NEXT:    mfvsrwz 4, 0
-; LE-NEXT:    mtvsrd 0, 3
+; LE-NEXT:    mffprwz 4, 0
+; LE-NEXT:    mtfprd 0, 3
 ; LE-NEXT:    xxswapd 35, 4
-; LE-NEXT:    mfvsrwz 3, 5
-; LE-NEXT:    mtvsrd 6, 4
+; LE-NEXT:    mffprwz 3, 5
+; LE-NEXT:    mtfprd 6, 4
 ; LE-NEXT:    xxswapd 36, 0
-; LE-NEXT:    mtvsrd 1, 3
-; LE-NEXT:    mfvsrwz 3, 7
+; LE-NEXT:    mtfprd 1, 3
+; LE-NEXT:    mffprwz 3, 7
 ; LE-NEXT:    xxswapd 37, 6
 ; LE-NEXT:    vmrglh 2, 3, 2
 ; LE-NEXT:    xxswapd 35, 2
-; LE-NEXT:    mtvsrd 2, 3
+; LE-NEXT:    mtfprd 2, 3
 ; LE-NEXT:    xxswapd 32, 1
 ; LE-NEXT:    addis 3, 2, .LCPI0_1@toc@ha
 ; LE-NEXT:    addi 3, 3, .LCPI0_1@toc@l
@@ -73,29 +73,29 @@ define <8 x i16> @pr25080(<8 x i32> %a) {
 ; BE-NEXT:    mfvsrwz 3, 35
 ; BE-NEXT:    xxsldwi 1, 35, 35, 1
 ; BE-NEXT:    sldi 3, 3, 48
-; BE-NEXT:    mfvsrwz 4, 0
+; BE-NEXT:    mffprwz 4, 0
 ; BE-NEXT:    xxsldwi 0, 35, 35, 3
 ; BE-NEXT:    mtvsrd 36, 3
-; BE-NEXT:    mfvsrwz 3, 1
+; BE-NEXT:    mffprwz 3, 1
 ; BE-NEXT:    sldi 4, 4, 48
 ; BE-NEXT:    xxswapd 1, 34
 ; BE-NEXT:    mtvsrd 35, 4
 ; BE-NEXT:    mfvsrwz 4, 34
 ; BE-NEXT:    sldi 3, 3, 48
 ; BE-NEXT:    mtvsrd 37, 3
-; BE-NEXT:    mfvsrwz 3, 0
+; BE-NEXT:    mffprwz 3, 0
 ; BE-NEXT:    sldi 4, 4, 48
 ; BE-NEXT:    xxsldwi 0, 34, 34, 1
 ; BE-NEXT:    vmrghh 3, 5, 3
 ; BE-NEXT:    mtvsrd 37, 4
 ; BE-NEXT:    sldi 3, 3, 48
-; BE-NEXT:    mfvsrwz 4, 1
+; BE-NEXT:    mffprwz 4, 1
 ; BE-NEXT:    xxsldwi 1, 34, 34, 3
 ; BE-NEXT:    mtvsrd 34, 3
-; BE-NEXT:    mfvsrwz 3, 0
+; BE-NEXT:    mffprwz 3, 0
 ; BE-NEXT:    sldi 4, 4, 48
 ; BE-NEXT:    mtvsrd 32, 4
-; BE-NEXT:    mfvsrwz 4, 1
+; BE-NEXT:    mffprwz 4, 1
 ; BE-NEXT:    sldi 3, 3, 48
 ; BE-NEXT:    mtvsrd 33, 3
 ; BE-NEXT:    sldi 3, 4, 48
diff --git a/llvm/test/CodeGen/PowerPC/pr33093.ll b/llvm/test/CodeGen/PowerPC/pr33093.ll
index 0853d2b6cedb..c301d42a3292 100644
--- a/llvm/test/CodeGen/PowerPC/pr33093.ll
+++ b/llvm/test/CodeGen/PowerPC/pr33093.ll
@@ -115,8 +115,8 @@ define i64 @ReverseBits64(i64 %n) {
 ; CHECK-NEXT:    and 3, 3, 4
 ; CHECK-NEXT:    or 3, 3, 5
 ; CHECK-NEXT:    rldicl 4, 3, 32, 32
-; CHECK-NEXT:    rlwinm 5, 3, 24, 0, 31
-; CHECK-NEXT:    rlwinm 6, 4, 24, 0, 31
+; CHECK-NEXT:    rotlwi 5, 3, 24
+; CHECK-NEXT:    rotlwi 6, 4, 24
 ; CHECK-NEXT:    rlwimi 5, 3, 8, 8, 15
 ; CHECK-NEXT:    rlwimi 5, 3, 8, 24, 31
 ; CHECK-NEXT:    rlwimi 6, 4, 8, 8, 15
diff --git a/llvm/test/CodeGen/PowerPC/pr35688.ll b/llvm/test/CodeGen/PowerPC/pr35688.ll
index 098573ec1b00..3f027abceeeb 100644
--- a/llvm/test/CodeGen/PowerPC/pr35688.ll
+++ b/llvm/test/CodeGen/PowerPC/pr35688.ll
@@ -13,7 +13,7 @@ define void @ec_GFp_nistp256_points_mul() {
 ; CHECK:    subfze 6, 4
 ; CHECK:    sradi 7, 6, 63
 ; CHECK:    srad 6, 6, 3
-; CHECK:    subfc 5, 5, 7
+; CHECK:    subc 5, 7, 5
 ; CHECK:    subfe 5, 4, 6
 ; CHECK:    sradi 5, 5, 63
 
@@ -25,7 +25,7 @@ define void @ec_GFp_nistp256_points_mul() {
 ; MSSA:     subfic 5, 3, 0
 ; MSSA:     subfze 5, 4
 ; MSSA:     sradi 5, 5, 63
-; MSSA:     subfc 3, 3, 5
+; MSSA:     subc 3, 5, 3
 ; MSSA:     subfe 3, 4, 5
 ; MSSA:     sradi 3, 3, 63
 ; MSSA:     std 3, 0(3)
diff --git a/llvm/test/CodeGen/PowerPC/pr45448.ll b/llvm/test/CodeGen/PowerPC/pr45448.ll
index eb0a61cb0756..6ee0e3a00335 100644
--- a/llvm/test/CodeGen/PowerPC/pr45448.ll
+++ b/llvm/test/CodeGen/PowerPC/pr45448.ll
@@ -26,7 +26,7 @@ define hidden void @julia_tryparse_internal_45896() #0 {
 ; CHECK-NEXT:    sradi r4, r3, 63
 ; CHECK-NEXT:    mulhdu r3, r3, r5
 ; CHECK-NEXT:    maddld r6, r4, r5, r3
-; CHECK-NEXT:    crnor 4*cr5+gt, eq, eq
+; CHECK-NEXT:    crnot 4*cr5+gt, eq
 ; CHECK-NEXT:    cmpld r6, r3
 ; CHECK-NEXT:    mulld r3, r4, r5
 ; CHECK-NEXT:    cmpldi cr1, r3, 0
diff --git a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
index 4d3bb80792cb..4c9137d86124 100644
--- a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
+++ b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
@@ -361,7 +361,7 @@ define void @test16(i16* nocapture readonly %sums, i32 signext %delta, i32 signe
 ; CHECK-NEXT:    lxsihzx v2, r6, r7
 ; CHECK-NEXT:    lxsihzx v4, r3, r4
 ; CHECK-NEXT:    li r6, 0
-; CHECK-NEXT:    mtvsrd f0, r6
+; CHECK-NEXT:    mtfprd f0, r6
 ; CHECK-NEXT:    vsplth v4, v4, 3
 ; CHECK-NEXT:    xxswapd v3, vs0
 ; CHECK-NEXT:    vsplth v2, v2, 3
@@ -377,7 +377,7 @@ define void @test16(i16* nocapture readonly %sums, i32 signext %delta, i32 signe
 ; CHECK-NEXT:    xxspltw v3, v2, 2
 ; CHECK-NEXT:    vadduwm v2, v2, v3
 ; CHECK-NEXT:    vextuwrx r3, r3, v2
-; CHECK-NEXT:    cmpw cr0, r3, r5
+; CHECK-NEXT:    cmpw r3, r5
 ; CHECK-NEXT:    bgelr+ cr0
 ; CHECK-NEXT:  # %bb.1: # %if.then
 ;
@@ -405,7 +405,7 @@ define void @test16(i16* nocapture readonly %sums, i32 signext %delta, i32 signe
 ; P9BE-NEXT:    xxspltw v3, v2, 1
 ; P9BE-NEXT:    vadduwm v2, v2, v3
 ; P9BE-NEXT:    vextuwlx r3, r3, v2
-; P9BE-NEXT:    cmpw cr0, r3, r5
+; P9BE-NEXT:    cmpw r3, r5
 ; P9BE-NEXT:    bgelr+ cr0
 ; P9BE-NEXT:  # %bb.1: # %if.then
 entry:
@@ -446,7 +446,7 @@ define void @test8(i8* nocapture readonly %sums, i32 signext %delta, i32 signext
 ; CHECK-NEXT:    add r6, r3, r4
 ; CHECK-NEXT:    lxsibzx v2, r3, r4
 ; CHECK-NEXT:    li r3, 0
-; CHECK-NEXT:    mtvsrd f0, r3
+; CHECK-NEXT:    mtfprd f0, r3
 ; CHECK-NEXT:    li r3, 8
 ; CHECK-NEXT:    lxsibzx v5, r6, r3
 ; CHECK-NEXT:    xxswapd v3, vs0
@@ -467,7 +467,7 @@ define void @test8(i8* nocapture readonly %sums, i32 signext %delta, i32 signext
 ; CHECK-NEXT:    xxspltw v3, v2, 2
 ; CHECK-NEXT:    vadduwm v2, v2, v3
 ; CHECK-NEXT:    vextuwrx r3, r3, v2
-; CHECK-NEXT:    cmpw cr0, r3, r5
+; CHECK-NEXT:    cmpw r3, r5
 ; CHECK-NEXT:    bgelr+ cr0
 ; CHECK-NEXT:  # %bb.1: # %if.then
 ;
@@ -496,7 +496,7 @@ define void @test8(i8* nocapture readonly %sums, i32 signext %delta, i32 signext
 ; P9BE-NEXT:    xxspltw v3, v2, 1
 ; P9BE-NEXT:    vadduwm v2, v2, v3
 ; P9BE-NEXT:    vextuwlx r3, r3, v2
-; P9BE-NEXT:    cmpw cr0, r3, r5
+; P9BE-NEXT:    cmpw r3, r5
 ; P9BE-NEXT:    bgelr+ cr0
 ; P9BE-NEXT:  # %bb.1: # %if.then
 entry:
diff --git a/llvm/test/CodeGen/PowerPC/qpx-s-sel.ll b/llvm/test/CodeGen/PowerPC/qpx-s-sel.ll
index 83b444b044cd..5d42b9a52995 100644
--- a/llvm/test/CodeGen/PowerPC/qpx-s-sel.ll
+++ b/llvm/test/CodeGen/PowerPC/qpx-s-sel.ll
@@ -56,7 +56,7 @@ entry:
 ; CHECK-DAG: qvlfiwzx [[REG2:[0-9]+]],
 ; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG2]]
 ; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG1]]
-; CHECK: qvflogical 1, 1, [[REG4]], 1
+; CHECK: qvfand 1, 1, [[REG4]]
 ; CHECK: blr
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/qpx-sel.ll b/llvm/test/CodeGen/PowerPC/qpx-sel.ll
index b027d602eb49..abc92c9e98b1 100644
--- a/llvm/test/CodeGen/PowerPC/qpx-sel.ll
+++ b/llvm/test/CodeGen/PowerPC/qpx-sel.ll
@@ -60,7 +60,7 @@ entry:
 ; CHECK-DAG: qvlfiwzx [[REG2:[0-9]+]],
 ; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG2]]
 ; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG1]]
-; CHECK: qvflogical 1, 1, [[REG4]], 1
+; CHECK: qvfand 1, 1, [[REG4]]
 ; CHECK: blr
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/redundant-copy-after-tail-dup.ll b/llvm/test/CodeGen/PowerPC/redundant-copy-after-tail-dup.ll
index dd41abd093d6..f6506b3c87f8 100644
--- a/llvm/test/CodeGen/PowerPC/redundant-copy-after-tail-dup.ll
+++ b/llvm/test/CodeGen/PowerPC/redundant-copy-after-tail-dup.ll
@@ -29,7 +29,7 @@ define dso_local i1 @t(%class.A* %this, i32 %color, i32 %vertex) local_unnamed_a
 ; CHECK-P9-NEXT:    li r3, 0
 ; CHECK-P9-NEXT:    blr
 ; CHECK-P9-NEXT:  .LBB0_4: # %lor.lhs.false
-; CHECK-P9-NEXT:    cmplwi cr0, r4, 0
+; CHECK-P9-NEXT:    cmplwi r4, 0
 ; CHECK-P9-NEXT:    bne cr0, .LBB0_2
 ; CHECK-P9-NEXT:  .LBB0_5: # %cleanup16
 ; CHECK-P9-NEXT:    mr r3, r5
diff --git a/llvm/test/CodeGen/PowerPC/sat-add.ll b/llvm/test/CodeGen/PowerPC/sat-add.ll
index 932a3f786a1e..64c3515ca5f1 100644
--- a/llvm/test/CodeGen/PowerPC/sat-add.ll
+++ b/llvm/test/CodeGen/PowerPC/sat-add.ll
@@ -24,7 +24,7 @@ define i8 @unsigned_sat_constant_i8_using_min(i8 %x) {
 define i8 @unsigned_sat_constant_i8_using_cmp_sum(i8 %x) {
 ; CHECK-LABEL: unsigned_sat_constant_i8_using_cmp_sum:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    rlwinm 3, 3, 0, 24, 31
+; CHECK-NEXT:    clrlwi 3, 3, 24
 ; CHECK-NEXT:    addi 3, 3, 42
 ; CHECK-NEXT:    andi. 4, 3, 256
 ; CHECK-NEXT:    li 4, -1
@@ -69,7 +69,7 @@ define i16 @unsigned_sat_constant_i16_using_min(i16 %x) {
 define i16 @unsigned_sat_constant_i16_using_cmp_sum(i16 %x) {
 ; CHECK-LABEL: unsigned_sat_constant_i16_using_cmp_sum:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    rlwinm 3, 3, 0, 16, 31
+; CHECK-NEXT:    clrlwi 3, 3, 16
 ; CHECK-NEXT:    addi 3, 3, 42
 ; CHECK-NEXT:    andis. 4, 3, 1
 ; CHECK-NEXT:    li 4, -1
@@ -115,7 +115,7 @@ define i32 @unsigned_sat_constant_i32_using_cmp_sum(i32 %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    addi 5, 3, 42
 ; CHECK-NEXT:    li 4, -1
-; CHECK-NEXT:    cmplw 0, 5, 3
+; CHECK-NEXT:    cmplw 5, 3
 ; CHECK-NEXT:    isel 3, 4, 5, 0
 ; CHECK-NEXT:    blr
   %a = add i32 %x, 42
@@ -129,7 +129,7 @@ define i32 @unsigned_sat_constant_i32_using_cmp_notval(i32 %x) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li 4, -43
 ; CHECK-NEXT:    addi 5, 3, 42
-; CHECK-NEXT:    cmplw 0, 3, 4
+; CHECK-NEXT:    cmplw 3, 4
 ; CHECK-NEXT:    li 3, -1
 ; CHECK-NEXT:    isel 3, 3, 5, 1
 ; CHECK-NEXT:    blr
@@ -202,8 +202,8 @@ define i8 @unsigned_sat_variable_i8_using_min(i8 %x, i8 %y) {
 define i8 @unsigned_sat_variable_i8_using_cmp_sum(i8 %x, i8 %y) {
 ; CHECK-LABEL: unsigned_sat_variable_i8_using_cmp_sum:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    rlwinm 4, 4, 0, 24, 31
-; CHECK-NEXT:    rlwinm 3, 3, 0, 24, 31
+; CHECK-NEXT:    clrlwi 4, 4, 24
+; CHECK-NEXT:    clrlwi 3, 3, 24
 ; CHECK-NEXT:    add 3, 3, 4
 ; CHECK-NEXT:    andi. 4, 3, 256
 ; CHECK-NEXT:    li 4, -1
@@ -253,8 +253,8 @@ define i16 @unsigned_sat_variable_i16_using_min(i16 %x, i16 %y) {
 define i16 @unsigned_sat_variable_i16_using_cmp_sum(i16 %x, i16 %y) {
 ; CHECK-LABEL: unsigned_sat_variable_i16_using_cmp_sum:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    rlwinm 4, 4, 0, 16, 31
-; CHECK-NEXT:    rlwinm 3, 3, 0, 16, 31
+; CHECK-NEXT:    clrlwi 4, 4, 16
+; CHECK-NEXT:    clrlwi 3, 3, 16
 ; CHECK-NEXT:    add 3, 3, 4
 ; CHECK-NEXT:    andis. 4, 3, 1
 ; CHECK-NEXT:    li 4, -1
@@ -304,7 +304,7 @@ define i32 @unsigned_sat_variable_i32_using_cmp_sum(i32 %x, i32 %y) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    add 4, 3, 4
 ; CHECK-NEXT:    li 5, -1
-; CHECK-NEXT:    cmplw 0, 4, 3
+; CHECK-NEXT:    cmplw 4, 3
 ; CHECK-NEXT:    isel 3, 5, 4, 0
 ; CHECK-NEXT:    blr
   %a = add i32 %x, %y
diff --git a/llvm/test/CodeGen/PowerPC/select-i1-vs-i1.ll b/llvm/test/CodeGen/PowerPC/select-i1-vs-i1.ll
index b6251c2bc462..80ac73315619 100644
--- a/llvm/test/CodeGen/PowerPC/select-i1-vs-i1.ll
+++ b/llvm/test/CodeGen/PowerPC/select-i1-vs-i1.ll
@@ -18,7 +18,7 @@ entry:
 
 ; CHECK-LABEL: @testi32slt
 ; CHECK-NO-ISEL-LABEL: @testi32slt
-; CHECK-DAG: cmpw {{[0-9]+}}, 5, 6
+; CHECK-DAG: cmpw 5, 6
 ; CHECK-DAG: cmpw {{[0-9]+}}, 3, 4
 ; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
 ; CHECK: isel 3, 7, 8, [[REG1]]
@@ -40,7 +40,7 @@ entry:
   ret i32 %cond
 
 ; CHECK-NO-ISEL-LABEL: @testi32ult
-; CHECK-DAG: cmpw {{[0-9]+}}, 5, 6
+; CHECK-DAG: cmpw 5, 6
 ; CHECK-DAG: cmpw {{[0-9]+}}, 3, 4
 ; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
 ; CHECK: isel 3, 7, 8, [[REG1]]
@@ -63,7 +63,7 @@ entry:
 
 ; CHECK-LABEL: @testi32sle
 ; CHECK-NO-ISEL-LABEL: @testi32sle
-; CHECK-DAG: cmpw {{[0-9]+}}, 5, 6
+; CHECK-DAG: cmpw 5, 6
 ; CHECK-DAG: cmpw {{[0-9]+}}, 3, 4
 ; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
 ; CHECK: isel 3, 7, 8, [[REG1]]
@@ -86,7 +86,7 @@ entry:
 
 ; CHECK-LABEL: @testi32ule
 ; CHECK-NO-ISEL-LABEL: @testi32ule
-; CHECK-DAG: cmpw {{[0-9]+}}, 5, 6
+; CHECK-DAG: cmpw 5, 6
 ; CHECK-DAG: cmpw {{[0-9]+}}, 3, 4
 ; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
 ; CHECK: isel 3, 7, 8, [[REG1]]
@@ -109,7 +109,7 @@ entry:
 
 ; CHECK-LABEL: @testi32eq
 ; CHECK-NO-ISEL-LABEL: @testi32eq
-; CHECK-DAG: cmpw {{[0-9]+}}, 5, 6
+; CHECK-DAG: cmpw 5, 6
 ; CHECK-DAG: cmpw {{[0-9]+}}, 3, 4
 ; CHECK: creqv [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
 ; CHECK: isel 3, 7, 8, [[REG1]]
@@ -132,7 +132,7 @@ entry:
 
 ; CHECK-LABEL: @testi32sge
 ; CHECK-NO-ISEL-LABEL: @testi32sge
-; CHECK-DAG: cmpw {{[0-9]+}}, 5, 6
+; CHECK-DAG: cmpw 5, 6
 ; CHECK-DAG: cmpw {{[0-9]+}}, 3, 4
 ; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
 ; CHECK: isel 3, 7, 8, [[REG1]]
@@ -155,7 +155,7 @@ entry:
 
 ; CHECK-LABEL: @testi32uge
 ; CHECK-NO-ISEL-LABEL: @testi32uge
-; CHECK-DAG: cmpw {{[0-9]+}}, 5, 6
+; CHECK-DAG: cmpw 5, 6
 ; CHECK-DAG: cmpw {{[0-9]+}}, 3, 4
 ; CHECK: crorc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
 ; CHECK: isel 3, 7, 8, [[REG1]]
@@ -178,7 +178,7 @@ entry:
 
 ; CHECK-LABEL: @testi32sgt
 ; CHECK-NO-ISEL-LABEL: @testi32sgt
-; CHECK-DAG: cmpw {{[0-9]+}}, 5, 6
+; CHECK-DAG: cmpw 5, 6
 ; CHECK-DAG: cmpw {{[0-9]+}}, 3, 4
 ; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
 ; CHECK: isel 3, 7, 8, [[REG1]]
@@ -201,7 +201,7 @@ entry:
 
 ; CHECK-LABEL: @testi32ugt
 ; CHECK-NO-ISEL-LABEL: @testi32ugt
-; CHECK-DAG: cmpw {{[0-9]+}}, 5, 6
+; CHECK-DAG: cmpw 5, 6
 ; CHECK-DAG: cmpw {{[0-9]+}}, 3, 4
 ; CHECK: crandc [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
 ; CHECK: isel 3, 7, 8, [[REG1]]
@@ -224,7 +224,7 @@ entry:
 
 ; CHECK-LABEL: @testi32ne
 ; CHECK-NO-ISEL-LABEL: @testi32ne
-; CHECK-DAG: cmpw {{[0-9]+}}, 5, 6
+; CHECK-DAG: cmpw 5, 6
 ; CHECK-DAG: cmpw {{[0-9]+}}, 3, 4
 ; CHECK: crxor [[REG1:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}
 ; CHECK: isel 3, 7, 8, [[REG1]]
diff --git a/llvm/test/CodeGen/PowerPC/select_const.ll b/llvm/test/CodeGen/PowerPC/select_const.ll
index d07d6d68c65d..d21170754b59 100644
--- a/llvm/test/CodeGen/PowerPC/select_const.ll
+++ b/llvm/test/CodeGen/PowerPC/select_const.ll
@@ -614,7 +614,7 @@ define i8 @sel_constants_shl_constant(i1 %cond) {
 define i8 @shl_constant_sel_constants(i1 %cond) {
 ; ALL-LABEL: shl_constant_sel_constants:
 ; ALL:       # %bb.0:
-; ALL-NEXT:    rlwinm 3, 3, 0, 31, 31
+; ALL-NEXT:    clrlwi 3, 3, 31
 ; ALL-NEXT:    li 4, 1
 ; ALL-NEXT:    subfic 3, 3, 3
 ; ALL-NEXT:    slw 3, 4, 3
@@ -651,7 +651,7 @@ define i8 @sel_constants_lshr_constant(i1 %cond) {
 define i8 @lshr_constant_sel_constants(i1 %cond) {
 ; ALL-LABEL: lshr_constant_sel_constants:
 ; ALL:       # %bb.0:
-; ALL-NEXT:    rlwinm 3, 3, 0, 31, 31
+; ALL-NEXT:    clrlwi 3, 3, 31
 ; ALL-NEXT:    li 4, 64
 ; ALL-NEXT:    subfic 3, 3, 3
 ; ALL-NEXT:    srw 3, 4, 3
@@ -676,7 +676,7 @@ define i8 @sel_constants_ashr_constant(i1 %cond) {
 define i8 @ashr_constant_sel_constants(i1 %cond) {
 ; ALL-LABEL: ashr_constant_sel_constants:
 ; ALL:       # %bb.0:
-; ALL-NEXT:    rlwinm 3, 3, 0, 31, 31
+; ALL-NEXT:    clrlwi 3, 3, 31
 ; ALL-NEXT:    li 4, -128
 ; ALL-NEXT:    subfic 3, 3, 3
 ; ALL-NEXT:    sraw 3, 4, 3
diff --git a/llvm/test/CodeGen/PowerPC/setcc-logic.ll b/llvm/test/CodeGen/PowerPC/setcc-logic.ll
index 05eaad8b50a1..2e23611bea0a 100644
--- a/llvm/test/CodeGen/PowerPC/setcc-logic.ll
+++ b/llvm/test/CodeGen/PowerPC/setcc-logic.ll
@@ -138,7 +138,7 @@ define i32 @all_sign_bits_clear_branch(i32 %P, i32 %Q)  {
 ; CHECK-LABEL: all_sign_bits_clear_branch:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    or 3, 3, 4
-; CHECK-NEXT:    cmpwi 0, 3, 0
+; CHECK-NEXT:    cmpwi 3, 0
 ; CHECK-NEXT:    blt 0, .LBB9_2
 ; CHECK-NEXT:  # %bb.1: # %bb1
 ; CHECK-NEXT:    li 3, 4
@@ -163,7 +163,7 @@ define i32 @all_bits_set_branch(i32 %P, i32 %Q)  {
 ; CHECK-LABEL: all_bits_set_branch:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    and 3, 3, 4
-; CHECK-NEXT:    cmpwi 0, 3, -1
+; CHECK-NEXT:    cmpwi 3, -1
 ; CHECK-NEXT:    bne 0, .LBB10_2
 ; CHECK-NEXT:  # %bb.1: # %bb1
 ; CHECK-NEXT:    li 3, 4
@@ -188,7 +188,7 @@ define i32 @all_sign_bits_set_branch(i32 %P, i32 %Q)  {
 ; CHECK-LABEL: all_sign_bits_set_branch:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    and 3, 3, 4
-; CHECK-NEXT:    cmpwi 0, 3, -1
+; CHECK-NEXT:    cmpwi 3, -1
 ; CHECK-NEXT:    bgt 0, .LBB11_2
 ; CHECK-NEXT:  # %bb.1: # %bb1
 ; CHECK-NEXT:    li 3, 4
@@ -238,7 +238,7 @@ define i32 @any_sign_bits_set_branch(i32 %P, i32 %Q)  {
 ; CHECK-LABEL: any_sign_bits_set_branch:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    or 3, 3, 4
-; CHECK-NEXT:    cmpwi 0, 3, -1
+; CHECK-NEXT:    cmpwi 3, -1
 ; CHECK-NEXT:    bgt 0, .LBB13_2
 ; CHECK-NEXT:  # %bb.1: # %bb1
 ; CHECK-NEXT:    li 3, 4
@@ -263,7 +263,7 @@ define i32 @any_bits_clear_branch(i32 %P, i32 %Q)  {
 ; CHECK-LABEL: any_bits_clear_branch:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    and 3, 3, 4
-; CHECK-NEXT:    cmpwi 0, 3, -1
+; CHECK-NEXT:    cmpwi 3, -1
 ; CHECK-NEXT:    beq 0, .LBB14_2
 ; CHECK-NEXT:  # %bb.1: # %bb1
 ; CHECK-NEXT:    li 3, 4
@@ -288,7 +288,7 @@ define i32 @any_sign_bits_clear_branch(i32 %P, i32 %Q)  {
 ; CHECK-LABEL: any_sign_bits_clear_branch:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    and 3, 3, 4
-; CHECK-NEXT:    cmpwi 0, 3, 0
+; CHECK-NEXT:    cmpwi 3, 0
 ; CHECK-NEXT:    blt 0, .LBB15_2
 ; CHECK-NEXT:  # %bb.1: # %bb1
 ; CHECK-NEXT:    li 3, 4
diff --git a/llvm/test/CodeGen/PowerPC/shift_mask.ll b/llvm/test/CodeGen/PowerPC/shift_mask.ll
index 59382c615311..9bf60368b2a4 100644
--- a/llvm/test/CodeGen/PowerPC/shift_mask.ll
+++ b/llvm/test/CodeGen/PowerPC/shift_mask.ll
@@ -5,7 +5,7 @@ target triple = "powerpc64le-linux-gnu"
 define i8 @test000(i8 %a, i8 %b) {
 ; CHECK-LABEL: test000:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    rlwinm 4, 4, 0, 29, 31
+; CHECK-NEXT:    clrlwi 4, 4, 29
 ; CHECK-NEXT:    slw 3, 3, 4
 ; CHECK-NEXT:    blr
   %rem = and i8 %b, 7
@@ -16,7 +16,7 @@ define i8 @test000(i8 %a, i8 %b) {
 define i16 @test001(i16 %a, i16 %b) {
 ; CHECK-LABEL: test001:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    rlwinm 4, 4, 0, 28, 31
+; CHECK-NEXT:    clrlwi 4, 4, 28
 ; CHECK-NEXT:    slw 3, 3, 4
 ; CHECK-NEXT:    blr
   %rem = and i16 %b, 15
@@ -27,7 +27,7 @@ define i16 @test001(i16 %a, i16 %b) {
 define i32 @test002(i32 %a, i32 %b) {
 ; CHECK-LABEL: test002:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    rlwinm 4, 4, 0, 27, 31
+; CHECK-NEXT:    clrlwi 4, 4, 27
 ; CHECK-NEXT:    slw 3, 3, 4
 ; CHECK-NEXT:    blr
   %rem = and i32 %b, 31
@@ -38,7 +38,7 @@ define i32 @test002(i32 %a, i32 %b) {
 define i64 @test003(i64 %a, i64 %b) {
 ; CHECK-LABEL: test003:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    rlwinm 4, 4, 0, 26, 31
+; CHECK-NEXT:    clrlwi 4, 4, 26
 ; CHECK-NEXT:    sld 3, 3, 4
 ; CHECK-NEXT:    blr
   %rem = and i64 %b, 63
@@ -89,8 +89,8 @@ define <2 x i64> @test013(<2 x i64> %a, <2 x i64> %b) {
 define i8 @test100(i8 %a, i8 %b) {
 ; CHECK-LABEL: test100:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    rlwinm 3, 3, 0, 24, 31
-; CHECK-NEXT:    rlwinm 4, 4, 0, 29, 31
+; CHECK-NEXT:    clrlwi 3, 3, 24
+; CHECK-NEXT:    clrlwi 4, 4, 29
 ; CHECK-NEXT:    srw 3, 3, 4
 ; CHECK-NEXT:    blr
   %rem = and i8 %b, 7
@@ -101,8 +101,8 @@ define i8 @test100(i8 %a, i8 %b) {
 define i16 @test101(i16 %a, i16 %b) {
 ; CHECK-LABEL: test101:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    rlwinm 3, 3, 0, 16, 31
-; CHECK-NEXT:    rlwinm 4, 4, 0, 28, 31
+; CHECK-NEXT:    clrlwi 3, 3, 16
+; CHECK-NEXT:    clrlwi 4, 4, 28
 ; CHECK-NEXT:    srw 3, 3, 4
 ; CHECK-NEXT:    blr
   %rem = and i16 %b, 15
@@ -113,7 +113,7 @@ define i16 @test101(i16 %a, i16 %b) {
 define i32 @test102(i32 %a, i32 %b) {
 ; CHECK-LABEL: test102:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    rlwinm 4, 4, 0, 27, 31
+; CHECK-NEXT:    clrlwi 4, 4, 27
 ; CHECK-NEXT:    srw 3, 3, 4
 ; CHECK-NEXT:    blr
   %rem = and i32 %b, 31
@@ -124,7 +124,7 @@ define i32 @test102(i32 %a, i32 %b) {
 define i64 @test103(i64 %a, i64 %b) {
 ; CHECK-LABEL: test103:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    rlwinm 4, 4, 0, 26, 31
+; CHECK-NEXT:    clrlwi 4, 4, 26
 ; CHECK-NEXT:    srd 3, 3, 4
 ; CHECK-NEXT:    blr
   %rem = and i64 %b, 63
@@ -176,7 +176,7 @@ define i8 @test200(i8 %a, i8 %b) {
 ; CHECK-LABEL: test200:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    extsb 3, 3
-; CHECK-NEXT:    rlwinm 4, 4, 0, 29, 31
+; CHECK-NEXT:    clrlwi 4, 4, 29
 ; CHECK-NEXT:    sraw 3, 3, 4
 ; CHECK-NEXT:    blr
   %rem = and i8 %b, 7
@@ -188,7 +188,7 @@ define i16 @test201(i16 %a, i16 %b) {
 ; CHECK-LABEL: test201:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    extsh 3, 3
-; CHECK-NEXT:    rlwinm 4, 4, 0, 28, 31
+; CHECK-NEXT:    clrlwi 4, 4, 28
 ; CHECK-NEXT:    sraw 3, 3, 4
 ; CHECK-NEXT:    blr
   %rem = and i16 %b, 15
@@ -199,7 +199,7 @@ define i16 @test201(i16 %a, i16 %b) {
 define i32 @test202(i32 %a, i32 %b) {
 ; CHECK-LABEL: test202:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    rlwinm 4, 4, 0, 27, 31
+; CHECK-NEXT:    clrlwi 4, 4, 27
 ; CHECK-NEXT:    sraw 3, 3, 4
 ; CHECK-NEXT:    blr
   %rem = and i32 %b, 31
@@ -210,7 +210,7 @@ define i32 @test202(i32 %a, i32 %b) {
 define i64 @test203(i64 %a, i64 %b) {
 ; CHECK-LABEL: test203:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    rlwinm 4, 4, 0, 26, 31
+; CHECK-NEXT:    clrlwi 4, 4, 26
 ; CHECK-NEXT:    srad 3, 3, 4
 ; CHECK-NEXT:    blr
   %rem = and i64 %b, 63
diff --git a/llvm/test/CodeGen/PowerPC/signbit-shift.ll b/llvm/test/CodeGen/PowerPC/signbit-shift.ll
index d66b7fc51619..50d7d5da34dc 100644
--- a/llvm/test/CodeGen/PowerPC/signbit-shift.ll
+++ b/llvm/test/CodeGen/PowerPC/signbit-shift.ll
@@ -46,7 +46,7 @@ define i32 @sel_ifpos_tval_bigger(i32 %x) {
 ; CHECK-LABEL: sel_ifpos_tval_bigger:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li 4, 41
-; CHECK-NEXT:    cmpwi 0, 3, -1
+; CHECK-NEXT:    cmpwi 3, -1
 ; CHECK-NEXT:    li 3, 42
 ; CHECK-NEXT:    isel 3, 3, 4, 1
 ; CHECK-NEXT:    blr
@@ -98,7 +98,7 @@ define i32 @sel_ifpos_fval_bigger(i32 %x) {
 ; CHECK-LABEL: sel_ifpos_fval_bigger:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li 4, 42
-; CHECK-NEXT:    cmpwi 0, 3, -1
+; CHECK-NEXT:    cmpwi 3, -1
 ; CHECK-NEXT:    li 3, 41
 ; CHECK-NEXT:    isel 3, 3, 4, 1
 ; CHECK-NEXT:    blr
@@ -135,7 +135,7 @@ define i32 @sel_ifneg_tval_bigger(i32 %x) {
 ; CHECK-LABEL: sel_ifneg_tval_bigger:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li 4, 41
-; CHECK-NEXT:    cmpwi 0, 3, 0
+; CHECK-NEXT:    cmpwi 3, 0
 ; CHECK-NEXT:    li 3, 42
 ; CHECK-NEXT:    isel 3, 3, 4, 0
 ; CHECK-NEXT:    blr
@@ -170,7 +170,7 @@ define i32 @sel_ifneg_fval_bigger(i32 %x) {
 ; CHECK-LABEL: sel_ifneg_fval_bigger:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li 4, 42
-; CHECK-NEXT:    cmpwi 0, 3, 0
+; CHECK-NEXT:    cmpwi 3, 0
 ; CHECK-NEXT:    li 3, 41
 ; CHECK-NEXT:    isel 3, 3, 4, 0
 ; CHECK-NEXT:    blr
diff --git a/llvm/test/CodeGen/PowerPC/simplifyConstCmpToISEL.ll b/llvm/test/CodeGen/PowerPC/simplifyConstCmpToISEL.ll
index bf5754dc9146..0dea7c5dec49 100644
--- a/llvm/test/CodeGen/PowerPC/simplifyConstCmpToISEL.ll
+++ b/llvm/test/CodeGen/PowerPC/simplifyConstCmpToISEL.ll
@@ -4,7 +4,7 @@
 define void @test(i32 zeroext %parts) {
 ; CHECK-LABEL: test:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    cmplwi 0, 3, 1
+; CHECK-NEXT:    cmplwi 3, 1
 ; CHECK-NEXT:    bnelr+ 0
 ; CHECK-NEXT:  # %bb.1: # %test2.exit.us.unr-lcssa
 ; CHECK-NEXT:    ld 3, 0(3)
diff --git a/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll b/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll
index 7804b0a3f097..b2e04e32ddd4 100644
--- a/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll
+++ b/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll
@@ -13,7 +13,7 @@ define void @print_res() nounwind {
 ; CHECK-NEXT:    lwz 3, 0(3)
 ; CHECK-NEXT:    addi 3, 3, -1
 ; CHECK-NEXT:    clrldi 4, 3, 32
-; CHECK-NEXT:    cmplwi 0, 3, 1
+; CHECK-NEXT:    cmplwi 3, 1
 ; CHECK-NEXT:    li 3, 1
 ; CHECK-NEXT:    isel 3, 4, 3, 1
 ; CHECK-NEXT:    li 4, 2
diff --git a/llvm/test/CodeGen/PowerPC/spill_p9_setb.ll b/llvm/test/CodeGen/PowerPC/spill_p9_setb.ll
index ad3deca5bc9f..17d1b616088b 100644
--- a/llvm/test/CodeGen/PowerPC/spill_p9_setb.ll
+++ b/llvm/test/CodeGen/PowerPC/spill_p9_setb.ll
@@ -17,7 +17,7 @@
 define void @p9_setb_spill() {
 ; CHECK-P9-LABEL: p9_setb_spill:
 ; CHECK-P9:       # %bb.1: # %if.then
-; CHECK-P9-DAG:    crnor 4*cr[[CREG:.*]]+lt, eq, eq
+; CHECK-P9-DAG:    crnot 4*cr[[CREG:.*]]+lt, eq
 ; CHECK-P9-DAG:    setb [[REG1:.*]], cr[[CREG]]
 ; CHECK-P9-DAG:    stw [[REG1]]
 ; CHECK-P9:        blr
@@ -25,7 +25,7 @@ define void @p9_setb_spill() {
 ;
 ; CHECK-P8-LABEL: p9_setb_spill:
 ; CHECK-P8:       # %bb.1: # %if.then
-; CHECK-P8-DAG:    crnor 4*cr[[CREG2:.*]]+lt, eq, eq
+; CHECK-P8-DAG:    crnot 4*cr[[CREG2:.*]]+lt, eq
 ; CHECK-P8-DAG:    mfocrf [[REG2:.*]],
 ; CHECK-P8-DAG:    rlwinm [[REG2]], [[REG2]]
 ; CHECK-P8-DAG:    stw [[REG2]]
diff --git a/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll b/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll
index d795f6b62fab..051e467cf39b 100644
--- a/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll
+++ b/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll
@@ -26,7 +26,7 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) {
 ; P9LE-NEXT:    lis r5, 31710
 ; P9LE-NEXT:    mulli r4, r4, 95
 ; P9LE-NEXT:    subf r3, r4, r3
-; P9LE-NEXT:    mtvsrd f0, r3
+; P9LE-NEXT:    mtfprd f0, r3
 ; P9LE-NEXT:    li r3, 2
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    extsh r4, r3
@@ -42,7 +42,7 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) {
 ; P9LE-NEXT:    mulli r4, r4, -124
 ; P9LE-NEXT:    subf r3, r4, r3
 ; P9LE-NEXT:    xxswapd v3, vs0
-; P9LE-NEXT:    mtvsrd f0, r3
+; P9LE-NEXT:    mtfprd f0, r3
 ; P9LE-NEXT:    li r3, 4
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    extsh r4, r3
@@ -57,7 +57,7 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) {
 ; P9LE-NEXT:    mulli r4, r4, 98
 ; P9LE-NEXT:    subf r3, r4, r3
 ; P9LE-NEXT:    xxswapd v4, vs0
-; P9LE-NEXT:    mtvsrd f0, r3
+; P9LE-NEXT:    mtfprd f0, r3
 ; P9LE-NEXT:    li r3, 6
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    extsh r4, r3
@@ -72,7 +72,7 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) {
 ; P9LE-NEXT:    subf r3, r4, r3
 ; P9LE-NEXT:    vmrglh v3, v4, v3
 ; P9LE-NEXT:    xxswapd v4, vs0
-; P9LE-NEXT:    mtvsrd f0, r3
+; P9LE-NEXT:    mtfprd f0, r3
 ; P9LE-NEXT:    xxswapd v2, vs0
 ; P9LE-NEXT:    vmrglh v2, v2, v4
 ; P9LE-NEXT:    vmrglw v2, v2, v3
@@ -157,7 +157,7 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) {
 ; P8LE-NEXT:    ori r4, r4, 33437
 ; P8LE-NEXT:    ori r9, r9, 63249
 ; P8LE-NEXT:    ori r11, r11, 37253
-; P8LE-NEXT:    mfvsrd r5, f0
+; P8LE-NEXT:    mffprd r5, f0
 ; P8LE-NEXT:    rldicl r3, r5, 32, 48
 ; P8LE-NEXT:    rldicl r6, r5, 16, 48
 ; P8LE-NEXT:    clrldi r7, r5, 48
@@ -201,13 +201,13 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) {
 ; P8LE-NEXT:    mulli r8, r8, -124
 ; P8LE-NEXT:    subf r3, r4, r3
 ; P8LE-NEXT:    subf r4, r9, r6
-; P8LE-NEXT:    mtvsrd f0, r3
+; P8LE-NEXT:    mtfprd f0, r3
 ; P8LE-NEXT:    subf r3, r10, r7
-; P8LE-NEXT:    mtvsrd f1, r4
+; P8LE-NEXT:    mtfprd f1, r4
 ; P8LE-NEXT:    subf r4, r8, r5
-; P8LE-NEXT:    mtvsrd f2, r3
+; P8LE-NEXT:    mtfprd f2, r3
 ; P8LE-NEXT:    xxswapd v2, vs0
-; P8LE-NEXT:    mtvsrd f3, r4
+; P8LE-NEXT:    mtfprd f3, r4
 ; P8LE-NEXT:    xxswapd v3, vs1
 ; P8LE-NEXT:    xxswapd v4, vs2
 ; P8LE-NEXT:    xxswapd v5, vs3
@@ -302,7 +302,7 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
 ; P9LE-NEXT:    add r4, r4, r6
 ; P9LE-NEXT:    mulli r4, r4, 95
 ; P9LE-NEXT:    subf r3, r4, r3
-; P9LE-NEXT:    mtvsrd f0, r3
+; P9LE-NEXT:    mtfprd f0, r3
 ; P9LE-NEXT:    li r3, 2
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    extsh r4, r3
@@ -316,7 +316,7 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
 ; P9LE-NEXT:    mulli r4, r4, 95
 ; P9LE-NEXT:    subf r3, r4, r3
 ; P9LE-NEXT:    xxswapd v3, vs0
-; P9LE-NEXT:    mtvsrd f0, r3
+; P9LE-NEXT:    mtfprd f0, r3
 ; P9LE-NEXT:    li r3, 4
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    extsh r4, r3
@@ -330,7 +330,7 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
 ; P9LE-NEXT:    mulli r4, r4, 95
 ; P9LE-NEXT:    subf r3, r4, r3
 ; P9LE-NEXT:    xxswapd v4, vs0
-; P9LE-NEXT:    mtvsrd f0, r3
+; P9LE-NEXT:    mtfprd f0, r3
 ; P9LE-NEXT:    li r3, 6
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    extsh r4, r3
@@ -345,7 +345,7 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
 ; P9LE-NEXT:    subf r3, r4, r3
 ; P9LE-NEXT:    vmrglh v3, v4, v3
 ; P9LE-NEXT:    xxswapd v4, vs0
-; P9LE-NEXT:    mtvsrd f0, r3
+; P9LE-NEXT:    mtfprd f0, r3
 ; P9LE-NEXT:    xxswapd v2, vs0
 ; P9LE-NEXT:    vmrglh v2, v2, v4
 ; P9LE-NEXT:    vmrglw v2, v2, v3
@@ -422,7 +422,7 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
 ; P8LE-NEXT:    lis r4, -21386
 ; P8LE-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; P8LE-NEXT:    ori r4, r4, 37253
-; P8LE-NEXT:    mfvsrd r5, f0
+; P8LE-NEXT:    mffprd r5, f0
 ; P8LE-NEXT:    clrldi r3, r5, 48
 ; P8LE-NEXT:    rldicl r7, r5, 32, 48
 ; P8LE-NEXT:    extsh r8, r3
@@ -466,13 +466,13 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
 ; P8LE-NEXT:    mulli r4, r4, 95
 ; P8LE-NEXT:    subf r3, r8, r3
 ; P8LE-NEXT:    subf r6, r9, r6
-; P8LE-NEXT:    mtvsrd f0, r3
+; P8LE-NEXT:    mtfprd f0, r3
 ; P8LE-NEXT:    subf r3, r10, r7
 ; P8LE-NEXT:    subf r4, r4, r5
-; P8LE-NEXT:    mtvsrd f1, r6
-; P8LE-NEXT:    mtvsrd f2, r3
+; P8LE-NEXT:    mtfprd f1, r6
+; P8LE-NEXT:    mtfprd f2, r3
 ; P8LE-NEXT:    xxswapd v2, vs0
-; P8LE-NEXT:    mtvsrd f3, r4
+; P8LE-NEXT:    mtfprd f3, r4
 ; P8LE-NEXT:    xxswapd v3, vs1
 ; P8LE-NEXT:    xxswapd v4, vs2
 ; P8LE-NEXT:    xxswapd v5, vs3
@@ -565,7 +565,7 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) {
 ; P9LE-NEXT:    add r4, r4, r6
 ; P9LE-NEXT:    mulli r6, r4, 95
 ; P9LE-NEXT:    subf r3, r6, r3
-; P9LE-NEXT:    mtvsrd f0, r3
+; P9LE-NEXT:    mtfprd f0, r3
 ; P9LE-NEXT:    li r3, 2
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    extsh r6, r3
@@ -579,7 +579,7 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) {
 ; P9LE-NEXT:    mulli r7, r6, 95
 ; P9LE-NEXT:    subf r3, r7, r3
 ; P9LE-NEXT:    xxswapd v3, vs0
-; P9LE-NEXT:    mtvsrd f0, r3
+; P9LE-NEXT:    mtfprd f0, r3
 ; P9LE-NEXT:    li r3, 4
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    extsh r7, r3
@@ -593,7 +593,7 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) {
 ; P9LE-NEXT:    mulli r8, r7, 95
 ; P9LE-NEXT:    subf r3, r8, r3
 ; P9LE-NEXT:    xxswapd v4, vs0
-; P9LE-NEXT:    mtvsrd f0, r3
+; P9LE-NEXT:    mtfprd f0, r3
 ; P9LE-NEXT:    li r3, 6
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    extsh r8, r3
@@ -608,18 +608,18 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) {
 ; P9LE-NEXT:    subf r3, r8, r3
 ; P9LE-NEXT:    vmrglh v3, v4, v3
 ; P9LE-NEXT:    xxswapd v4, vs0
-; P9LE-NEXT:    mtvsrd f0, r3
+; P9LE-NEXT:    mtfprd f0, r3
 ; P9LE-NEXT:    xxswapd v2, vs0
-; P9LE-NEXT:    mtvsrd f0, r4
+; P9LE-NEXT:    mtfprd f0, r4
 ; P9LE-NEXT:    vmrglh v2, v2, v4
 ; P9LE-NEXT:    vmrglw v2, v2, v3
 ; P9LE-NEXT:    xxswapd v3, vs0
-; P9LE-NEXT:    mtvsrd f0, r6
+; P9LE-NEXT:    mtfprd f0, r6
 ; P9LE-NEXT:    xxswapd v4, vs0
-; P9LE-NEXT:    mtvsrd f0, r7
+; P9LE-NEXT:    mtfprd f0, r7
 ; P9LE-NEXT:    vmrglh v3, v4, v3
 ; P9LE-NEXT:    xxswapd v4, vs0
-; P9LE-NEXT:    mtvsrd f0, r5
+; P9LE-NEXT:    mtfprd f0, r5
 ; P9LE-NEXT:    xxswapd v5, vs0
 ; P9LE-NEXT:    vmrglh v4, v5, v4
 ; P9LE-NEXT:    vmrglw v3, v4, v3
@@ -709,7 +709,7 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) {
 ; P8LE-NEXT:    lis r5, -21386
 ; P8LE-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; P8LE-NEXT:    ori r5, r5, 37253
-; P8LE-NEXT:    mfvsrd r6, f0
+; P8LE-NEXT:    mffprd r6, f0
 ; P8LE-NEXT:    clrldi r3, r6, 48
 ; P8LE-NEXT:    rldicl r4, r6, 48, 48
 ; P8LE-NEXT:    rldicl r7, r6, 32, 48
@@ -745,28 +745,28 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) {
 ; P8LE-NEXT:    add r9, r0, r9
 ; P8LE-NEXT:    mulli r0, r8, 95
 ; P8LE-NEXT:    add r10, r12, r10
-; P8LE-NEXT:    mtvsrd f0, r8
+; P8LE-NEXT:    mtfprd f0, r8
 ; P8LE-NEXT:    srwi r8, r5, 31
 ; P8LE-NEXT:    srawi r5, r5, 6
 ; P8LE-NEXT:    mulli r11, r9, 95
-; P8LE-NEXT:    mtvsrd f1, r9
+; P8LE-NEXT:    mtfprd f1, r9
 ; P8LE-NEXT:    mulli r9, r10, 95
 ; P8LE-NEXT:    add r5, r5, r8
 ; P8LE-NEXT:    xxswapd v2, vs0
-; P8LE-NEXT:    mtvsrd f2, r10
-; P8LE-NEXT:    mtvsrd f3, r5
+; P8LE-NEXT:    mtfprd f2, r10
+; P8LE-NEXT:    mtfprd f3, r5
 ; P8LE-NEXT:    mulli r5, r5, 95
 ; P8LE-NEXT:    xxswapd v3, vs1
 ; P8LE-NEXT:    subf r3, r0, r3
 ; P8LE-NEXT:    xxswapd v1, vs2
-; P8LE-NEXT:    mtvsrd f0, r3
+; P8LE-NEXT:    mtfprd f0, r3
 ; P8LE-NEXT:    subf r4, r11, r4
 ; P8LE-NEXT:    xxswapd v6, vs3
 ; P8LE-NEXT:    subf r3, r9, r7
-; P8LE-NEXT:    mtvsrd f1, r4
-; P8LE-NEXT:    mtvsrd f4, r3
+; P8LE-NEXT:    mtfprd f1, r4
+; P8LE-NEXT:    mtfprd f4, r3
 ; P8LE-NEXT:    subf r3, r5, r6
-; P8LE-NEXT:    mtvsrd f5, r3
+; P8LE-NEXT:    mtfprd f5, r3
 ; P8LE-NEXT:    xxswapd v4, vs1
 ; P8LE-NEXT:    vmrglh v2, v3, v2
 ; P8LE-NEXT:    xxswapd v3, vs0
@@ -870,7 +870,7 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) {
 ; P9LE-NEXT:    addze r4, r4
 ; P9LE-NEXT:    slwi r4, r4, 6
 ; P9LE-NEXT:    subf r3, r4, r3
-; P9LE-NEXT:    mtvsrd f0, r3
+; P9LE-NEXT:    mtfprd f0, r3
 ; P9LE-NEXT:    li r3, 2
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    extsh r4, r3
@@ -879,7 +879,7 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) {
 ; P9LE-NEXT:    slwi r4, r4, 5
 ; P9LE-NEXT:    subf r3, r4, r3
 ; P9LE-NEXT:    xxswapd v3, vs0
-; P9LE-NEXT:    mtvsrd f0, r3
+; P9LE-NEXT:    mtfprd f0, r3
 ; P9LE-NEXT:    li r3, 6
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    extsh r4, r3
@@ -896,7 +896,7 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) {
 ; P9LE-NEXT:    add r4, r4, r5
 ; P9LE-NEXT:    mulli r4, r4, 95
 ; P9LE-NEXT:    subf r3, r4, r3
-; P9LE-NEXT:    mtvsrd f0, r3
+; P9LE-NEXT:    mtfprd f0, r3
 ; P9LE-NEXT:    li r3, 4
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    extsh r4, r3
@@ -905,7 +905,7 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) {
 ; P9LE-NEXT:    slwi r4, r4, 3
 ; P9LE-NEXT:    subf r3, r4, r3
 ; P9LE-NEXT:    xxswapd v4, vs0
-; P9LE-NEXT:    mtvsrd f0, r3
+; P9LE-NEXT:    mtfprd f0, r3
 ; P9LE-NEXT:    xxswapd v2, vs0
 ; P9LE-NEXT:    vmrglh v2, v4, v2
 ; P9LE-NEXT:    vmrglw v2, v2, v3
@@ -966,7 +966,7 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) {
 ; P8LE-NEXT:    xxswapd vs0, v2
 ; P8LE-NEXT:    lis r3, -21386
 ; P8LE-NEXT:    ori r3, r3, 37253
-; P8LE-NEXT:    mfvsrd r4, f0
+; P8LE-NEXT:    mffprd r4, f0
 ; P8LE-NEXT:    rldicl r5, r4, 16, 48
 ; P8LE-NEXT:    clrldi r7, r4, 48
 ; P8LE-NEXT:    extsh r6, r5
@@ -982,7 +982,7 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) {
 ; P8LE-NEXT:    slwi r8, r8, 6
 ; P8LE-NEXT:    subf r7, r8, r7
 ; P8LE-NEXT:    rldicl r3, r3, 32, 32
-; P8LE-NEXT:    mtvsrd f0, r7
+; P8LE-NEXT:    mtfprd f0, r7
 ; P8LE-NEXT:    add r3, r3, r6
 ; P8LE-NEXT:    addze r6, r10
 ; P8LE-NEXT:    srwi r10, r3, 31
@@ -994,14 +994,14 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) {
 ; P8LE-NEXT:    subf r6, r6, r9
 ; P8LE-NEXT:    mulli r3, r3, 95
 ; P8LE-NEXT:    srawi r8, r10, 3
-; P8LE-NEXT:    mtvsrd f1, r6
+; P8LE-NEXT:    mtfprd f1, r6
 ; P8LE-NEXT:    addze r7, r8
 ; P8LE-NEXT:    xxswapd v3, vs1
 ; P8LE-NEXT:    subf r3, r3, r5
 ; P8LE-NEXT:    slwi r5, r7, 3
 ; P8LE-NEXT:    subf r4, r5, r4
-; P8LE-NEXT:    mtvsrd f2, r3
-; P8LE-NEXT:    mtvsrd f3, r4
+; P8LE-NEXT:    mtfprd f2, r3
+; P8LE-NEXT:    mtfprd f3, r4
 ; P8LE-NEXT:    xxswapd v4, vs2
 ; P8LE-NEXT:    vmrglh v2, v3, v2
 ; P8LE-NEXT:    xxswapd v5, vs3
@@ -1079,7 +1079,7 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
 ; P9LE-NEXT:    lis r5, -19946
 ; P9LE-NEXT:    mulli r4, r4, 654
 ; P9LE-NEXT:    subf r3, r4, r3
-; P9LE-NEXT:    mtvsrd f0, r3
+; P9LE-NEXT:    mtfprd f0, r3
 ; P9LE-NEXT:    li r3, 4
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    extsh r4, r3
@@ -1095,7 +1095,7 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
 ; P9LE-NEXT:    mulli r4, r4, 23
 ; P9LE-NEXT:    subf r3, r4, r3
 ; P9LE-NEXT:    xxswapd v3, vs0
-; P9LE-NEXT:    mtvsrd f0, r3
+; P9LE-NEXT:    mtfprd f0, r3
 ; P9LE-NEXT:    li r3, 6
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    extsh r4, r3
@@ -1110,7 +1110,7 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
 ; P9LE-NEXT:    subf r3, r4, r3
 ; P9LE-NEXT:    vmrglh v3, v3, v4
 ; P9LE-NEXT:    xxswapd v4, vs0
-; P9LE-NEXT:    mtvsrd f0, r3
+; P9LE-NEXT:    mtfprd f0, r3
 ; P9LE-NEXT:    xxswapd v2, vs0
 ; P9LE-NEXT:    vmrglh v2, v2, v4
 ; P9LE-NEXT:    vmrglw v2, v2, v3
@@ -1182,7 +1182,7 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
 ; P8LE-NEXT:    xxlxor v5, v5, v5
 ; P8LE-NEXT:    ori r3, r3, 47143
 ; P8LE-NEXT:    ori r8, r8, 17097
-; P8LE-NEXT:    mfvsrd r4, f0
+; P8LE-NEXT:    mffprd r4, f0
 ; P8LE-NEXT:    rldicl r5, r4, 16, 48
 ; P8LE-NEXT:    rldicl r6, r4, 32, 48
 ; P8LE-NEXT:    rldicl r4, r4, 48, 48
@@ -1214,11 +1214,11 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
 ; P8LE-NEXT:    mulli r8, r8, 23
 ; P8LE-NEXT:    mulli r7, r7, 654
 ; P8LE-NEXT:    subf r3, r3, r5
-; P8LE-NEXT:    mtvsrd f0, r3
+; P8LE-NEXT:    mtfprd f0, r3
 ; P8LE-NEXT:    subf r3, r8, r6
 ; P8LE-NEXT:    subf r4, r7, r4
-; P8LE-NEXT:    mtvsrd f1, r3
-; P8LE-NEXT:    mtvsrd f2, r4
+; P8LE-NEXT:    mtfprd f1, r3
+; P8LE-NEXT:    mtfprd f2, r4
 ; P8LE-NEXT:    xxswapd v2, vs0
 ; P8LE-NEXT:    xxswapd v3, vs1
 ; P8LE-NEXT:    xxswapd v4, vs2
@@ -1304,7 +1304,7 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) {
 ; P9LE-NEXT:    lis r5, 24749
 ; P9LE-NEXT:    mulli r4, r4, 23
 ; P9LE-NEXT:    subf r3, r4, r3
-; P9LE-NEXT:    mtvsrd f0, r3
+; P9LE-NEXT:    mtfprd f0, r3
 ; P9LE-NEXT:    li r3, 6
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    extsh r4, r3
@@ -1318,7 +1318,7 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) {
 ; P9LE-NEXT:    mulli r4, r4, 5423
 ; P9LE-NEXT:    subf r3, r4, r3
 ; P9LE-NEXT:    xxswapd v3, vs0
-; P9LE-NEXT:    mtvsrd f0, r3
+; P9LE-NEXT:    mtfprd f0, r3
 ; P9LE-NEXT:    li r3, 2
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    extsh r4, r3
@@ -1327,7 +1327,7 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) {
 ; P9LE-NEXT:    slwi r4, r4, 15
 ; P9LE-NEXT:    subf r3, r4, r3
 ; P9LE-NEXT:    xxswapd v4, vs0
-; P9LE-NEXT:    mtvsrd f0, r3
+; P9LE-NEXT:    mtfprd f0, r3
 ; P9LE-NEXT:    xxswapd v2, vs0
 ; P9LE-NEXT:    vmrglh v3, v4, v3
 ; P9LE-NEXT:    xxlxor v4, v4, v4
@@ -1393,7 +1393,7 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) {
 ; P8LE-NEXT:    xxlxor v5, v5, v5
 ; P8LE-NEXT:    ori r6, r6, 47143
 ; P8LE-NEXT:    ori r7, r7, 17097
-; P8LE-NEXT:    mfvsrd r3, f0
+; P8LE-NEXT:    mffprd r3, f0
 ; P8LE-NEXT:    rldicl r4, r3, 16, 48
 ; P8LE-NEXT:    rldicl r5, r3, 32, 48
 ; P8LE-NEXT:    extsh r8, r4
@@ -1418,13 +1418,13 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) {
 ; P8LE-NEXT:    srawi r8, r8, 15
 ; P8LE-NEXT:    subf r4, r6, r4
 ; P8LE-NEXT:    addze r6, r8
-; P8LE-NEXT:    mtvsrd f0, r4
+; P8LE-NEXT:    mtfprd f0, r4
 ; P8LE-NEXT:    slwi r4, r6, 15
 ; P8LE-NEXT:    subf r5, r7, r5
 ; P8LE-NEXT:    subf r3, r4, r3
-; P8LE-NEXT:    mtvsrd f1, r5
+; P8LE-NEXT:    mtfprd f1, r5
 ; P8LE-NEXT:    xxswapd v2, vs0
-; P8LE-NEXT:    mtvsrd f2, r3
+; P8LE-NEXT:    mtfprd f2, r3
 ; P8LE-NEXT:    xxswapd v3, vs1
 ; P8LE-NEXT:    xxswapd v4, vs2
 ; P8LE-NEXT:    vmrglh v2, v2, v3
@@ -1588,7 +1588,7 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) {
 ; P8LE-NEXT:    sldi r5, r5, 32
 ; P8LE-NEXT:    oris r3, r3, 58853
 ; P8LE-NEXT:    oris r4, r4, 22795
-; P8LE-NEXT:    mfvsrd r8, f0
+; P8LE-NEXT:    mffprd r8, f0
 ; P8LE-NEXT:    oris r5, r5, 1603
 ; P8LE-NEXT:    ori r3, r3, 6055
 ; P8LE-NEXT:    ori r4, r4, 8549
@@ -1610,13 +1610,13 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) {
 ; P8LE-NEXT:    add r4, r4, r9
 ; P8LE-NEXT:    mulli r4, r4, 23
 ; P8LE-NEXT:    sub r3, r6, r3
-; P8LE-NEXT:    mtvsrd f0, r3
+; P8LE-NEXT:    mtfprd f0, r3
 ; P8LE-NEXT:    sub r5, r7, r5
-; P8LE-NEXT:    mtvsrd f1, r5
+; P8LE-NEXT:    mtfprd f1, r5
 ; P8LE-NEXT:    sub r3, r8, r4
 ; P8LE-NEXT:    li r4, 0
-; P8LE-NEXT:    mtvsrd f2, r3
-; P8LE-NEXT:    mtvsrd f3, r4
+; P8LE-NEXT:    mtfprd f2, r3
+; P8LE-NEXT:    mtfprd f3, r4
 ; P8LE-NEXT:    xxmrghd v3, vs0, vs2
 ; P8LE-NEXT:    xxmrghd v2, vs1, vs3
 ; P8LE-NEXT:    blr
@@ -1637,11 +1637,11 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) {
 ; P8BE-NEXT:    oris r4, r4, 22795
 ; P8BE-NEXT:    sldi r5, r5, 32
 ; P8BE-NEXT:    oris r3, r3, 58853
-; P8BE-NEXT:    mfvsrd r7, f0
+; P8BE-NEXT:    mffprd r7, f0
 ; P8BE-NEXT:    ori r4, r4, 8549
 ; P8BE-NEXT:    ori r3, r3, 6055
 ; P8BE-NEXT:    oris r5, r5, 1603
-; P8BE-NEXT:    mfvsrd r8, f1
+; P8BE-NEXT:    mffprd r8, f1
 ; P8BE-NEXT:    mulhd r4, r6, r4
 ; P8BE-NEXT:    mulhd r3, r7, r3
 ; P8BE-NEXT:    ori r5, r5, 21445
@@ -1661,12 +1661,12 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) {
 ; P8BE-NEXT:    mulli r5, r5, 654
 ; P8BE-NEXT:    sub r3, r7, r3
 ; P8BE-NEXT:    sub r4, r6, r4
-; P8BE-NEXT:    mtvsrd f0, r3
+; P8BE-NEXT:    mtfprd f0, r3
 ; P8BE-NEXT:    sub r3, r8, r5
-; P8BE-NEXT:    mtvsrd f1, r4
+; P8BE-NEXT:    mtfprd f1, r4
 ; P8BE-NEXT:    li r4, 0
-; P8BE-NEXT:    mtvsrd f2, r3
-; P8BE-NEXT:    mtvsrd f3, r4
+; P8BE-NEXT:    mtfprd f2, r3
+; P8BE-NEXT:    mtfprd f3, r4
 ; P8BE-NEXT:    xxmrghd v3, vs1, vs0
 ; P8BE-NEXT:    xxmrghd v2, vs3, vs2
 ; P8BE-NEXT:    blr
diff --git a/llvm/test/CodeGen/PowerPC/stack-realign.ll b/llvm/test/CodeGen/PowerPC/stack-realign.ll
index 21de346ab57e..6402a2597836 100644
--- a/llvm/test/CodeGen/PowerPC/stack-realign.ll
+++ b/llvm/test/CodeGen/PowerPC/stack-realign.ll
@@ -129,7 +129,7 @@ entry:
 ; CHECK-DAG: std 30, -16(1)
 ; CHECK-DAG: mr 30, 1
 ; CHECK-DAG: std 0, 16(1)
-; CHECK-DAG: subfc 0, [[REG3]], [[REG2]]
+; CHECK-DAG: subc 0, [[REG2]], [[REG3]]
 ; CHECK: stdux 1, 1, 0
 
 ; CHECK: .cfi_def_cfa_register r30
diff --git a/llvm/test/CodeGen/PowerPC/testBitReverse.ll b/llvm/test/CodeGen/PowerPC/testBitReverse.ll
index b6c78b4c3151..d795879a7924 100644
--- a/llvm/test/CodeGen/PowerPC/testBitReverse.ll
+++ b/llvm/test/CodeGen/PowerPC/testBitReverse.ll
@@ -91,8 +91,8 @@ define i64 @testBitReverseIntrinsicI64(i64 %arg) {
 ; CHECK-NEXT:    and 3, 3, 4
 ; CHECK-NEXT:    or 3, 3, 5
 ; CHECK-NEXT:    rldicl 4, 3, 32, 32
-; CHECK-NEXT:    rlwinm 5, 3, 24, 0, 31
-; CHECK-NEXT:    rlwinm 6, 4, 24, 0, 31
+; CHECK-NEXT:    rotlwi 5, 3, 24
+; CHECK-NEXT:    rotlwi 6, 4, 24
 ; CHECK-NEXT:    rlwimi 5, 3, 8, 8, 15
 ; CHECK-NEXT:    rlwimi 5, 3, 8, 24, 31
 ; CHECK-NEXT:    rlwimi 6, 4, 8, 8, 15
diff --git a/llvm/test/CodeGen/PowerPC/testComparesi32gtu.ll b/llvm/test/CodeGen/PowerPC/testComparesi32gtu.ll
index 855e175c6e90..c5d1e3ed1a29 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesi32gtu.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesi32gtu.ll
@@ -72,8 +72,8 @@ entry:
 define signext i32 @testCompare2(i32 zeroext %a, i32 zeroext %b) {
 ; CHECK-LABEL: testCompare2:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    rlwinm r3, r3, 0, 31, 31
-; CHECK-NEXT:    rlwinm r4, r4, 0, 31, 31
+; CHECK-NEXT:    clrlwi r3, r3, 31
+; CHECK-NEXT:    clrlwi r4, r4, 31
 ; CHECK-NEXT:    clrldi r3, r3, 32
 ; CHECK-NEXT:    clrldi r4, r4, 32
 ; CHECK-NEXT:    sub r3, r4, r3
diff --git a/llvm/test/CodeGen/PowerPC/testComparesi32leu.ll b/llvm/test/CodeGen/PowerPC/testComparesi32leu.ll
index 3ba967b51dad..cd22b238e84d 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesi32leu.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesi32leu.ll
@@ -9,8 +9,8 @@
 define signext i32 @test(i8 zeroext %a, i8 zeroext %b) {
 ; CHECK-LABEL: test:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    rlwinm r3, r3, 0, 31, 31
-; CHECK-NEXT:    rlwinm r4, r4, 0, 31, 31
+; CHECK-NEXT:    clrlwi r3, r3, 31
+; CHECK-NEXT:    clrlwi r4, r4, 31
 ; CHECK-NEXT:    clrldi r3, r3, 32
 ; CHECK-NEXT:    clrldi r4, r4, 32
 ; CHECK-NEXT:    sub r3, r4, r3
diff --git a/llvm/test/CodeGen/PowerPC/testComparesi32ltu.ll b/llvm/test/CodeGen/PowerPC/testComparesi32ltu.ll
index 0c399ff42b95..603f8177c808 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesi32ltu.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesi32ltu.ll
@@ -72,8 +72,8 @@ entry:
 define signext i32 @testCompare2(i32 zeroext %a, i32 zeroext %b) {
 ; CHECK-LABEL: testCompare2:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    rlwinm r3, r3, 0, 31, 31
-; CHECK-NEXT:    rlwinm r4, r4, 0, 31, 31
+; CHECK-NEXT:    clrlwi r3, r3, 31
+; CHECK-NEXT:    clrlwi r4, r4, 31
 ; CHECK-NEXT:    clrldi r3, r3, 32
 ; CHECK-NEXT:    clrldi r4, r4, 32
 ; CHECK-NEXT:    sub r3, r3, r4
diff --git a/llvm/test/CodeGen/PowerPC/testComparesigesll.ll b/llvm/test/CodeGen/PowerPC/testComparesigesll.ll
index 541ee87da1ff..a8c5b8e54fa5 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesigesll.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesigesll.ll
@@ -19,7 +19,7 @@ define signext i32 @test_igesll(i64 %a, i64 %b) {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    sradi r5, r3, 63
 ; CHECK-BE-NEXT:    rldicl r6, r4, 1, 63
-; CHECK-BE-NEXT:    subfc r3, r4, r3
+; CHECK-BE-NEXT:    subc r3, r3, r4
 ; CHECK-BE-NEXT:    adde r3, r5, r6
 ; CHECK-BE-NEXT:    blr
 ;
@@ -27,7 +27,7 @@ define signext i32 @test_igesll(i64 %a, i64 %b) {
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    sradi r5, r3, 63
 ; CHECK-LE-NEXT:    rldicl r6, r4, 1, 63
-; CHECK-LE-NEXT:    subfc r3, r4, r3
+; CHECK-LE-NEXT:    subc r3, r3, r4
 ; CHECK-LE-NEXT:    adde r3, r5, r6
 ; CHECK-LE-NEXT:    blr
 entry:
@@ -49,7 +49,7 @@ define signext i32 @test_igesll_sext(i64 %a, i64 %b) {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    sradi r5, r3, 63
 ; CHECK-BE-NEXT:    rldicl r6, r4, 1, 63
-; CHECK-BE-NEXT:    subfc r3, r4, r3
+; CHECK-BE-NEXT:    subc r3, r3, r4
 ; CHECK-BE-NEXT:    adde r3, r5, r6
 ; CHECK-BE-NEXT:    neg r3, r3
 ; CHECK-BE-NEXT:    blr
@@ -58,7 +58,7 @@ define signext i32 @test_igesll_sext(i64 %a, i64 %b) {
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    sradi r5, r3, 63
 ; CHECK-LE-NEXT:    rldicl r6, r4, 1, 63
-; CHECK-LE-NEXT:    subfc r3, r4, r3
+; CHECK-LE-NEXT:    subc r3, r3, r4
 ; CHECK-LE-NEXT:    adde r3, r5, r6
 ; CHECK-LE-NEXT:    neg r3, r3
 ; CHECK-LE-NEXT:    blr
@@ -129,7 +129,7 @@ define void @test_igesll_store(i64 %a, i64 %b) {
 ; CHECK-BE-NEXT:    addis r5, r2, .LC0@toc@ha
 ; CHECK-BE-NEXT:    sradi r6, r3, 63
 ; CHECK-BE-NEXT:    ld r5, .LC0@toc@l(r5)
-; CHECK-BE-NEXT:    subfc r3, r4, r3
+; CHECK-BE-NEXT:    subc r3, r3, r4
 ; CHECK-BE-NEXT:    rldicl r3, r4, 1, 63
 ; CHECK-BE-NEXT:    adde r3, r6, r3
 ; CHECK-BE-NEXT:    std r3, 0(r5)
@@ -139,7 +139,7 @@ define void @test_igesll_store(i64 %a, i64 %b) {
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    sradi r6, r3, 63
 ; CHECK-LE-NEXT:    addis r5, r2, glob@toc@ha
-; CHECK-LE-NEXT:    subfc r3, r4, r3
+; CHECK-LE-NEXT:    subc r3, r3, r4
 ; CHECK-LE-NEXT:    rldicl r3, r4, 1, 63
 ; CHECK-LE-NEXT:    adde r3, r6, r3
 ; CHECK-LE-NEXT:    std r3, glob@toc@l(r5)
@@ -166,7 +166,7 @@ define void @test_igesll_sext_store(i64 %a, i64 %b) {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    sradi r6, r3, 63
 ; CHECK-BE-NEXT:    addis r5, r2, .LC0@toc@ha
-; CHECK-BE-NEXT:    subfc r3, r4, r3
+; CHECK-BE-NEXT:    subc r3, r3, r4
 ; CHECK-BE-NEXT:    rldicl r3, r4, 1, 63
 ; CHECK-BE-NEXT:    ld r4, .LC0@toc@l(r5)
 ; CHECK-BE-NEXT:    adde r3, r6, r3
@@ -178,7 +178,7 @@ define void @test_igesll_sext_store(i64 %a, i64 %b) {
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    sradi r6, r3, 63
 ; CHECK-LE-NEXT:    addis r5, r2, glob@toc@ha
-; CHECK-LE-NEXT:    subfc r3, r4, r3
+; CHECK-LE-NEXT:    subc r3, r3, r4
 ; CHECK-LE-NEXT:    rldicl r3, r4, 1, 63
 ; CHECK-LE-NEXT:    adde r3, r6, r3
 ; CHECK-LE-NEXT:    neg r3, r3
diff --git a/llvm/test/CodeGen/PowerPC/testComparesigeull.ll b/llvm/test/CodeGen/PowerPC/testComparesigeull.ll
index 4d430db0b68b..a2af8d931a99 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesigeull.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesigeull.ll
@@ -14,7 +14,7 @@
 define signext i32 @test_igeull(i64 %a, i64 %b) {
 ; CHECK-LABEL: test_igeull:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    subfc r3, r4, r3
+; CHECK-NEXT:    subc r3, r3, r4
 ; CHECK-NEXT:    subfe r3, r4, r4
 ; CHECK-NEXT:    addi r3, r3, 1
 ; CHECK-NEXT:    blr
@@ -28,7 +28,7 @@ entry:
 define signext i32 @test_igeull_sext(i64 %a, i64 %b) {
 ; CHECK-LABEL: test_igeull_sext:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    subfc r3, r4, r3
+; CHECK-NEXT:    subc r3, r3, r4
 ; CHECK-NEXT:    subfe r3, r4, r4
 ; CHECK-NEXT:    not r3, r3
 ; CHECK-NEXT:    blr
@@ -67,7 +67,7 @@ define void @test_igeull_store(i64 %a, i64 %b) {
 ; BE-LABEL: test_igeull_store:
 ; BE:       # %bb.0: # %entry
 ; BE-NEXT:    addis r5, r2, .LC0@toc@ha
-; BE-NEXT:    subfc r3, r4, r3
+; BE-NEXT:    subc r3, r3, r4
 ; BE-NEXT:    ld r3, .LC0@toc@l(r5)
 ; BE-NEXT:    subfe r4, r4, r4
 ; BE-NEXT:    addi r4, r4, 1
@@ -76,7 +76,7 @@ define void @test_igeull_store(i64 %a, i64 %b) {
 ;
 ; LE-LABEL: test_igeull_store:
 ; LE:       # %bb.0: # %entry
-; LE-NEXT:    subfc r3, r4, r3
+; LE-NEXT:    subc r3, r3, r4
 ; LE-NEXT:    addis r5, r2, glob@toc@ha
 ; LE-NEXT:    subfe r3, r4, r4
 ; LE-NEXT:    addi r3, r3, 1
@@ -94,7 +94,7 @@ define void @test_igeull_sext_store(i64 %a, i64 %b) {
 ; BE-LABEL: test_igeull_sext_store:
 ; BE:       # %bb.0: # %entry
 ; BE-NEXT:    addis r5, r2, .LC0@toc@ha
-; BE-NEXT:    subfc r3, r4, r3
+; BE-NEXT:    subc r3, r3, r4
 ; BE-NEXT:    ld r3, .LC0@toc@l(r5)
 ; BE-NEXT:    subfe r4, r4, r4
 ; BE-NEXT:    not r4, r4
@@ -103,7 +103,7 @@ define void @test_igeull_sext_store(i64 %a, i64 %b) {
 ;
 ; LE-LABEL: test_igeull_sext_store:
 ; LE:       # %bb.0: # %entry
-; LE-NEXT:    subfc r3, r4, r3
+; LE-NEXT:    subc r3, r3, r4
 ; LE-NEXT:    addis r5, r2, glob@toc@ha
 ; LE-NEXT:    subfe r3, r4, r4
 ; LE-NEXT:    not r3, r3
diff --git a/llvm/test/CodeGen/PowerPC/testComparesigtsll.ll b/llvm/test/CodeGen/PowerPC/testComparesigtsll.ll
index f96a285142a3..0f5aa673282a 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesigtsll.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesigtsll.ll
@@ -14,7 +14,7 @@ define signext i32 @test_igtsll(i64 %a, i64 %b) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sradi r5, r4, 63
 ; CHECK-NEXT:    rldicl r6, r3, 1, 63
-; CHECK-NEXT:    subfc r3, r3, r4
+; CHECK-NEXT:    subc r3, r4, r3
 ; CHECK-NEXT:    adde r3, r6, r5
 ; CHECK-NEXT:    xori r3, r3, 1
 ; CHECK-NEXT:    blr
@@ -30,7 +30,7 @@ define signext i32 @test_igtsll_sext(i64 %a, i64 %b) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sradi r5, r4, 63
 ; CHECK-NEXT:    rldicl r6, r3, 1, 63
-; CHECK-NEXT:    subfc r3, r3, r4
+; CHECK-NEXT:    subc r3, r4, r3
 ; CHECK-NEXT:    adde r3, r6, r5
 ; CHECK-NEXT:    xori r3, r3, 1
 ; CHECK-NEXT:    neg r3, r3
@@ -76,7 +76,7 @@ define void @test_igtsll_store(i64 %a, i64 %b) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sradi r6, r4, 63
 ; CHECK-NEXT:    addis r5, r2, .LC0@toc@ha
-; CHECK-NEXT:    subfc r4, r3, r4
+; CHECK-NEXT:    subc r4, r4, r3
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-NEXT:    ld r4, .LC0@toc@l(r5)
 ; CHECK-NEXT:    adde r3, r3, r6
@@ -97,7 +97,7 @@ define void @test_igtsll_sext_store(i64 %a, i64 %b) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sradi r6, r4, 63
 ; CHECK-NEXT:    addis r5, r2, .LC0@toc@ha
-; CHECK-NEXT:    subfc r4, r3, r4
+; CHECK-NEXT:    subc r4, r4, r3
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-NEXT:    adde r3, r3, r6
 ; CHECK-NEXT:    ld r4, .LC0@toc@l(r5)
diff --git a/llvm/test/CodeGen/PowerPC/testComparesilesll.ll b/llvm/test/CodeGen/PowerPC/testComparesilesll.ll
index b1827661a935..9f4e92c6d2eb 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesilesll.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesilesll.ll
@@ -19,7 +19,7 @@ define signext i32 @test_ilesll(i64 %a, i64 %b) {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    sradi r5, r4, 63
 ; CHECK-BE-NEXT:    rldicl r6, r3, 1, 63
-; CHECK-BE-NEXT:    subfc r3, r3, r4
+; CHECK-BE-NEXT:    subc r3, r4, r3
 ; CHECK-BE-NEXT:    adde r3, r5, r6
 ; CHECK-BE-NEXT:    blr
 ;
@@ -27,7 +27,7 @@ define signext i32 @test_ilesll(i64 %a, i64 %b) {
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    sradi r5, r4, 63
 ; CHECK-LE-NEXT:    rldicl r6, r3, 1, 63
-; CHECK-LE-NEXT:    subfc r3, r3, r4
+; CHECK-LE-NEXT:    subc r3, r4, r3
 ; CHECK-LE-NEXT:    adde r3, r5, r6
 ; CHECK-LE-NEXT:    blr
 entry:
@@ -49,7 +49,7 @@ define signext i32 @test_ilesll_sext(i64 %a, i64 %b) {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    sradi r5, r4, 63
 ; CHECK-BE-NEXT:    rldicl r6, r3, 1, 63
-; CHECK-BE-NEXT:    subfc r3, r3, r4
+; CHECK-BE-NEXT:    subc r3, r4, r3
 ; CHECK-BE-NEXT:    adde r3, r5, r6
 ; CHECK-BE-NEXT:    neg r3, r3
 ; CHECK-BE-NEXT:    blr
@@ -58,7 +58,7 @@ define signext i32 @test_ilesll_sext(i64 %a, i64 %b) {
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    sradi r5, r4, 63
 ; CHECK-LE-NEXT:    rldicl r6, r3, 1, 63
-; CHECK-LE-NEXT:    subfc r3, r3, r4
+; CHECK-LE-NEXT:    subc r3, r4, r3
 ; CHECK-LE-NEXT:    adde r3, r5, r6
 ; CHECK-LE-NEXT:    neg r3, r3
 ; CHECK-LE-NEXT:    blr
@@ -135,7 +135,7 @@ define void @test_ilesll_store(i64 %a, i64 %b) {
 ; CHECK-BE-NEXT:    addis r5, r2, .LC0@toc@ha
 ; CHECK-BE-NEXT:    sradi r6, r4, 63
 ; CHECK-BE-NEXT:    ld r5, .LC0@toc@l(r5)
-; CHECK-BE-NEXT:    subfc r4, r3, r4
+; CHECK-BE-NEXT:    subc r4, r4, r3
 ; CHECK-BE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-BE-NEXT:    adde r3, r6, r3
 ; CHECK-BE-NEXT:    std r3, 0(r5)
@@ -145,7 +145,7 @@ define void @test_ilesll_store(i64 %a, i64 %b) {
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    sradi r6, r4, 63
 ; CHECK-LE-NEXT:    addis r5, r2, glob@toc@ha
-; CHECK-LE-NEXT:    subfc r4, r3, r4
+; CHECK-LE-NEXT:    subc r4, r4, r3
 ; CHECK-LE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-LE-NEXT:    adde r3, r6, r3
 ; CHECK-LE-NEXT:    std r3, glob@toc@l(r5)
@@ -172,7 +172,7 @@ define void @test_ilesll_sext_store(i64 %a, i64 %b) {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    sradi r6, r4, 63
 ; CHECK-BE-NEXT:    addis r5, r2, .LC0@toc@ha
-; CHECK-BE-NEXT:    subfc r4, r3, r4
+; CHECK-BE-NEXT:    subc r4, r4, r3
 ; CHECK-BE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-BE-NEXT:    ld r4, .LC0@toc@l(r5)
 ; CHECK-BE-NEXT:    adde r3, r6, r3
@@ -184,7 +184,7 @@ define void @test_ilesll_sext_store(i64 %a, i64 %b) {
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    sradi r6, r4, 63
 ; CHECK-LE-NEXT:    addis r5, r2, glob@toc@ha
-; CHECK-LE-NEXT:    subfc r4, r3, r4
+; CHECK-LE-NEXT:    subc r4, r4, r3
 ; CHECK-LE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-LE-NEXT:    adde r3, r6, r3
 ; CHECK-LE-NEXT:    neg r3, r3
diff --git a/llvm/test/CodeGen/PowerPC/testComparesileull.ll b/llvm/test/CodeGen/PowerPC/testComparesileull.ll
index 54ee2561acbd..28d828fa345b 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesileull.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesileull.ll
@@ -14,7 +14,7 @@
 define signext i32 @test_ileull(i64 %a, i64 %b) {
 ; CHECK-LABEL: test_ileull:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    subfc r4, r3, r4
+; CHECK-NEXT:    subc r4, r4, r3
 ; CHECK-NEXT:    subfe r3, r3, r3
 ; CHECK-NEXT:    addi r3, r3, 1
 ; CHECK-NEXT:    blr
@@ -28,7 +28,7 @@ entry:
 define signext i32 @test_ileull_sext(i64 %a, i64 %b) {
 ; CHECK-LABEL: test_ileull_sext:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    subfc r4, r3, r4
+; CHECK-NEXT:    subc r4, r4, r3
 ; CHECK-NEXT:    subfe r3, r3, r3
 ; CHECK-NEXT:    not r3, r3
 ; CHECK-NEXT:    blr
@@ -69,7 +69,7 @@ define void @test_ileull_store(i64 %a, i64 %b) {
 ; BE-LABEL: test_ileull_store:
 ; BE:       # %bb.0: # %entry
 ; BE-NEXT:    addis r5, r2, .LC0@toc@ha
-; BE-NEXT:    subfc r4, r3, r4
+; BE-NEXT:    subc r4, r4, r3
 ; BE-NEXT:    ld r4, .LC0@toc@l(r5)
 ; BE-NEXT:    subfe r3, r3, r3
 ; BE-NEXT:    addi r3, r3, 1
@@ -78,7 +78,7 @@ define void @test_ileull_store(i64 %a, i64 %b) {
 ;
 ; LE-LABEL: test_ileull_store:
 ; LE:       # %bb.0: # %entry
-; LE-NEXT:    subfc r4, r3, r4
+; LE-NEXT:    subc r4, r4, r3
 ; LE-NEXT:    addis r5, r2, glob@toc@ha
 ; LE-NEXT:    subfe r3, r3, r3
 ; LE-NEXT:    addi r3, r3, 1
@@ -96,7 +96,7 @@ define void @test_ileull_sext_store(i64 %a, i64 %b) {
 ; BE-LABEL: test_ileull_sext_store:
 ; BE:       # %bb.0: # %entry
 ; BE-NEXT:    addis r5, r2, .LC0@toc@ha
-; BE-NEXT:    subfc r4, r3, r4
+; BE-NEXT:    subc r4, r4, r3
 ; BE-NEXT:    ld r4, .LC0@toc@l(r5)
 ; BE-NEXT:    subfe r3, r3, r3
 ; BE-NEXT:    not r3, r3
@@ -105,7 +105,7 @@ define void @test_ileull_sext_store(i64 %a, i64 %b) {
 ;
 ; LE-LABEL: test_ileull_sext_store:
 ; LE:       # %bb.0: # %entry
-; LE-NEXT:    subfc r4, r3, r4
+; LE-NEXT:    subc r4, r4, r3
 ; LE-NEXT:    addis r5, r2, glob@toc@ha
 ; LE-NEXT:    subfe r3, r3, r3
 ; LE-NEXT:    not r3, r3
diff --git a/llvm/test/CodeGen/PowerPC/testComparesiltsll.ll b/llvm/test/CodeGen/PowerPC/testComparesiltsll.ll
index 0a31d051e0e5..740eb33a550f 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesiltsll.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesiltsll.ll
@@ -16,7 +16,7 @@ define signext i32 @test_iltsll(i64 %a, i64 %b) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sradi r5, r3, 63
 ; CHECK-NEXT:    rldicl r6, r4, 1, 63
-; CHECK-NEXT:    subfc r3, r4, r3
+; CHECK-NEXT:    subc r3, r3, r4
 ; CHECK-NEXT:    adde r3, r6, r5
 ; CHECK-NEXT:    xori r3, r3, 1
 ; CHECK-NEXT:    blr
@@ -32,7 +32,7 @@ define signext i32 @test_iltsll_sext(i64 %a, i64 %b) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sradi r5, r3, 63
 ; CHECK-NEXT:    rldicl r6, r4, 1, 63
-; CHECK-NEXT:    subfc r3, r4, r3
+; CHECK-NEXT:    subc r3, r3, r4
 ; CHECK-NEXT:    adde r3, r6, r5
 ; CHECK-NEXT:    xori r3, r3, 1
 ; CHECK-NEXT:    neg r3, r3
@@ -61,7 +61,7 @@ define void @test_iltsll_store(i64 %a, i64 %b) {
 ; BE:       # %bb.0: # %entry
 ; BE-NEXT:    sradi r6, r3, 63
 ; BE-NEXT:    addis r5, r2, .LC0@toc@ha
-; BE-NEXT:    subfc r3, r4, r3
+; BE-NEXT:    subc r3, r3, r4
 ; BE-NEXT:    rldicl r3, r4, 1, 63
 ; BE-NEXT:    ld r4, .LC0@toc@l(r5)
 ; BE-NEXT:    adde r3, r3, r6
@@ -73,7 +73,7 @@ define void @test_iltsll_store(i64 %a, i64 %b) {
 ; LE:       # %bb.0: # %entry
 ; LE-NEXT:    sradi r6, r3, 63
 ; LE-NEXT:    addis r5, r2, glob@toc@ha
-; LE-NEXT:    subfc r3, r4, r3
+; LE-NEXT:    subc r3, r3, r4
 ; LE-NEXT:    rldicl r3, r4, 1, 63
 ; LE-NEXT:    adde r3, r3, r6
 ; LE-NEXT:    xori r3, r3, 1
@@ -93,7 +93,7 @@ define void @test_iltsll_sext_store(i64 %a, i64 %b) {
 ; BE:       # %bb.0: # %entry
 ; BE-NEXT:    sradi r6, r3, 63
 ; BE-NEXT:    addis r5, r2, .LC0@toc@ha
-; BE-NEXT:    subfc r3, r4, r3
+; BE-NEXT:    subc r3, r3, r4
 ; BE-NEXT:    rldicl r3, r4, 1, 63
 ; BE-NEXT:    ld r4, .LC0@toc@l(r5)
 ; BE-NEXT:    adde r3, r3, r6
@@ -106,7 +106,7 @@ define void @test_iltsll_sext_store(i64 %a, i64 %b) {
 ; LE:       # %bb.0: # %entry
 ; LE-NEXT:    sradi r6, r3, 63
 ; LE-NEXT:    addis r5, r2, glob@toc@ha
-; LE-NEXT:    subfc r3, r4, r3
+; LE-NEXT:    subc r3, r3, r4
 ; LE-NEXT:    rldicl r3, r4, 1, 63
 ; LE-NEXT:    adde r3, r3, r6
 ; LE-NEXT:    xori r3, r3, 1
diff --git a/llvm/test/CodeGen/PowerPC/testComparesllgesll.ll b/llvm/test/CodeGen/PowerPC/testComparesllgesll.ll
index 2b1dc4a689c9..4b344294aa09 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesllgesll.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesllgesll.ll
@@ -19,7 +19,7 @@ define i64 @test_llgesll(i64 %a, i64 %b) {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    sradi r5, r3, 63
 ; CHECK-BE-NEXT:    rldicl r6, r4, 1, 63
-; CHECK-BE-NEXT:    subfc r3, r4, r3
+; CHECK-BE-NEXT:    subc r3, r3, r4
 ; CHECK-BE-NEXT:    adde r3, r5, r6
 ; CHECK-BE-NEXT:    blr
 ;
@@ -27,7 +27,7 @@ define i64 @test_llgesll(i64 %a, i64 %b) {
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    sradi r5, r3, 63
 ; CHECK-LE-NEXT:    rldicl r6, r4, 1, 63
-; CHECK-LE-NEXT:    subfc r3, r4, r3
+; CHECK-LE-NEXT:    subc r3, r3, r4
 ; CHECK-LE-NEXT:    adde r3, r5, r6
 ; CHECK-LE-NEXT:    blr
 entry:
@@ -49,7 +49,7 @@ define i64 @test_llgesll_sext(i64 %a, i64 %b) {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    sradi r5, r3, 63
 ; CHECK-BE-NEXT:    rldicl r6, r4, 1, 63
-; CHECK-BE-NEXT:    subfc r3, r4, r3
+; CHECK-BE-NEXT:    subc r3, r3, r4
 ; CHECK-BE-NEXT:    adde r3, r5, r6
 ; CHECK-BE-NEXT:    neg r3, r3
 ; CHECK-BE-NEXT:    blr
@@ -58,7 +58,7 @@ define i64 @test_llgesll_sext(i64 %a, i64 %b) {
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    sradi r5, r3, 63
 ; CHECK-LE-NEXT:    rldicl r6, r4, 1, 63
-; CHECK-LE-NEXT:    subfc r3, r4, r3
+; CHECK-LE-NEXT:    subc r3, r3, r4
 ; CHECK-LE-NEXT:    adde r3, r5, r6
 ; CHECK-LE-NEXT:    neg r3, r3
 ; CHECK-LE-NEXT:    blr
@@ -129,7 +129,7 @@ define void @test_llgesll_store(i64 %a, i64 %b) {
 ; CHECK-BE-NEXT:    addis r5, r2, .LC0@toc@ha
 ; CHECK-BE-NEXT:    sradi r6, r3, 63
 ; CHECK-BE-NEXT:    ld r5, .LC0@toc@l(r5)
-; CHECK-BE-NEXT:    subfc r3, r4, r3
+; CHECK-BE-NEXT:    subc r3, r3, r4
 ; CHECK-BE-NEXT:    rldicl r3, r4, 1, 63
 ; CHECK-BE-NEXT:    adde r3, r6, r3
 ; CHECK-BE-NEXT:    std r3, 0(r5)
@@ -139,7 +139,7 @@ define void @test_llgesll_store(i64 %a, i64 %b) {
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    sradi r6, r3, 63
 ; CHECK-LE-NEXT:    addis r5, r2, glob@toc@ha
-; CHECK-LE-NEXT:    subfc r3, r4, r3
+; CHECK-LE-NEXT:    subc r3, r3, r4
 ; CHECK-LE-NEXT:    rldicl r3, r4, 1, 63
 ; CHECK-LE-NEXT:    adde r3, r6, r3
 ; CHECK-LE-NEXT:    std r3, glob@toc@l(r5)
@@ -166,7 +166,7 @@ define void @test_llgesll_sext_store(i64 %a, i64 %b) {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    sradi r6, r3, 63
 ; CHECK-BE-NEXT:    addis r5, r2, .LC0@toc@ha
-; CHECK-BE-NEXT:    subfc r3, r4, r3
+; CHECK-BE-NEXT:    subc r3, r3, r4
 ; CHECK-BE-NEXT:    rldicl r3, r4, 1, 63
 ; CHECK-BE-NEXT:    ld r4, .LC0@toc@l(r5)
 ; CHECK-BE-NEXT:    adde r3, r6, r3
@@ -178,7 +178,7 @@ define void @test_llgesll_sext_store(i64 %a, i64 %b) {
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    sradi r6, r3, 63
 ; CHECK-LE-NEXT:    addis r5, r2, glob@toc@ha
-; CHECK-LE-NEXT:    subfc r3, r4, r3
+; CHECK-LE-NEXT:    subc r3, r3, r4
 ; CHECK-LE-NEXT:    rldicl r3, r4, 1, 63
 ; CHECK-LE-NEXT:    adde r3, r6, r3
 ; CHECK-LE-NEXT:    neg r3, r3
diff --git a/llvm/test/CodeGen/PowerPC/testComparesllgeull.ll b/llvm/test/CodeGen/PowerPC/testComparesllgeull.ll
index 68d908a4e805..580bf834106a 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesllgeull.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesllgeull.ll
@@ -14,7 +14,7 @@
 define i64 @test_llgeull(i64 %a, i64 %b) {
 ; CHECK-LABEL: test_llgeull:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    subfc r3, r4, r3
+; CHECK-NEXT:    subc r3, r3, r4
 ; CHECK-NEXT:    subfe r3, r4, r4
 ; CHECK-NEXT:    addi r3, r3, 1
 ; CHECK-NEXT:    blr
@@ -28,7 +28,7 @@ entry:
 define i64 @test_llgeull_sext(i64 %a, i64 %b) {
 ; CHECK-LABEL: test_llgeull_sext:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    subfc r3, r4, r3
+; CHECK-NEXT:    subc r3, r3, r4
 ; CHECK-NEXT:    subfe r3, r4, r4
 ; CHECK-NEXT:    not r3, r3
 ; CHECK-NEXT:    blr
@@ -67,7 +67,7 @@ define void @test_llgeull_store(i64 %a, i64 %b) {
 ; BE-LABEL: test_llgeull_store:
 ; BE:       # %bb.0: # %entry
 ; BE-NEXT:    addis r5, r2, .LC0@toc@ha
-; BE-NEXT:    subfc r3, r4, r3
+; BE-NEXT:    subc r3, r3, r4
 ; BE-NEXT:    ld r3, .LC0@toc@l(r5)
 ; BE-NEXT:    subfe r4, r4, r4
 ; BE-NEXT:    addi r4, r4, 1
@@ -76,7 +76,7 @@ define void @test_llgeull_store(i64 %a, i64 %b) {
 ;
 ; LE-LABEL: test_llgeull_store:
 ; LE:       # %bb.0: # %entry
-; LE-NEXT:    subfc r3, r4, r3
+; LE-NEXT:    subc r3, r3, r4
 ; LE-NEXT:    addis r5, r2, glob@toc@ha
 ; LE-NEXT:    subfe r3, r4, r4
 ; LE-NEXT:    addi r3, r3, 1
@@ -94,7 +94,7 @@ define void @test_llgeull_sext_store(i64 %a, i64 %b) {
 ; BE-LABEL: test_llgeull_sext_store:
 ; BE:       # %bb.0: # %entry
 ; BE-NEXT:    addis r5, r2, .LC0@toc@ha
-; BE-NEXT:    subfc r3, r4, r3
+; BE-NEXT:    subc r3, r3, r4
 ; BE-NEXT:    ld r3, .LC0@toc@l(r5)
 ; BE-NEXT:    subfe r4, r4, r4
 ; BE-NEXT:    not r4, r4
@@ -103,7 +103,7 @@ define void @test_llgeull_sext_store(i64 %a, i64 %b) {
 ;
 ; LE-LABEL: test_llgeull_sext_store:
 ; LE:       # %bb.0: # %entry
-; LE-NEXT:    subfc r3, r4, r3
+; LE-NEXT:    subc r3, r3, r4
 ; LE-NEXT:    addis r5, r2, glob@toc@ha
 ; LE-NEXT:    subfe r3, r4, r4
 ; LE-NEXT:    not r3, r3
diff --git a/llvm/test/CodeGen/PowerPC/testComparesllgtsll.ll b/llvm/test/CodeGen/PowerPC/testComparesllgtsll.ll
index 6f4c9b00b83f..ae7c15e00cee 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesllgtsll.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesllgtsll.ll
@@ -14,7 +14,7 @@ define i64 @test_llgtsll(i64 %a, i64 %b) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sradi r5, r4, 63
 ; CHECK-NEXT:    rldicl r6, r3, 1, 63
-; CHECK-NEXT:    subfc r3, r3, r4
+; CHECK-NEXT:    subc r3, r4, r3
 ; CHECK-NEXT:    adde r3, r6, r5
 ; CHECK-NEXT:    xori r3, r3, 1
 ; CHECK-NEXT:    blr
@@ -30,7 +30,7 @@ define i64 @test_llgtsll_sext(i64 %a, i64 %b) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sradi r5, r4, 63
 ; CHECK-NEXT:    rldicl r6, r3, 1, 63
-; CHECK-NEXT:    subfc r3, r3, r4
+; CHECK-NEXT:    subc r3, r4, r3
 ; CHECK-NEXT:    adde r3, r6, r5
 ; CHECK-NEXT:    xori r3, r3, 1
 ; CHECK-NEXT:    neg r3, r3
@@ -76,7 +76,7 @@ define void @test_llgtsll_store(i64 %a, i64 %b) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sradi r6, r4, 63
 ; CHECK-NEXT:    addis r5, r2, .LC0@toc@ha
-; CHECK-NEXT:    subfc r4, r3, r4
+; CHECK-NEXT:    subc r4, r4, r3
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-NEXT:    ld r4, .LC0@toc@l(r5)
 ; CHECK-NEXT:    adde r3, r3, r6
@@ -97,7 +97,7 @@ define void @test_llgtsll_sext_store(i64 %a, i64 %b) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sradi r6, r4, 63
 ; CHECK-NEXT:    addis r5, r2, .LC0@toc@ha
-; CHECK-NEXT:    subfc r4, r3, r4
+; CHECK-NEXT:    subc r4, r4, r3
 ; CHECK-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-NEXT:    adde r3, r3, r6
 ; CHECK-NEXT:    ld r4, .LC0@toc@l(r5)
diff --git a/llvm/test/CodeGen/PowerPC/testCompareslllesll.ll b/llvm/test/CodeGen/PowerPC/testCompareslllesll.ll
index ca7ce5e56e3b..a323f075182e 100644
--- a/llvm/test/CodeGen/PowerPC/testCompareslllesll.ll
+++ b/llvm/test/CodeGen/PowerPC/testCompareslllesll.ll
@@ -20,7 +20,7 @@ define i64 @test_lllesll(i64 %a, i64 %b)  {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    sradi r5, r4, 63
 ; CHECK-BE-NEXT:    rldicl r6, r3, 1, 63
-; CHECK-BE-NEXT:    subfc r3, r3, r4
+; CHECK-BE-NEXT:    subc r3, r4, r3
 ; CHECK-BE-NEXT:    adde r3, r5, r6
 ; CHECK-BE-NEXT:    blr
 ;
@@ -28,7 +28,7 @@ define i64 @test_lllesll(i64 %a, i64 %b)  {
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    sradi r5, r4, 63
 ; CHECK-LE-NEXT:    rldicl r6, r3, 1, 63
-; CHECK-LE-NEXT:    subfc r3, r3, r4
+; CHECK-LE-NEXT:    subc r3, r4, r3
 ; CHECK-LE-NEXT:    adde r3, r5, r6
 ; CHECK-LE-NEXT:    blr
 entry:
@@ -51,7 +51,7 @@ define i64 @test_lllesll_sext(i64 %a, i64 %b)  {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    sradi r5, r4, 63
 ; CHECK-BE-NEXT:    rldicl r6, r3, 1, 63
-; CHECK-BE-NEXT:    subfc r3, r3, r4
+; CHECK-BE-NEXT:    subc r3, r4, r3
 ; CHECK-BE-NEXT:    adde r3, r5, r6
 ; CHECK-BE-NEXT:    neg r3, r3
 ; CHECK-BE-NEXT:    blr
@@ -60,7 +60,7 @@ define i64 @test_lllesll_sext(i64 %a, i64 %b)  {
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    sradi r5, r4, 63
 ; CHECK-LE-NEXT:    rldicl r6, r3, 1, 63
-; CHECK-LE-NEXT:    subfc r3, r3, r4
+; CHECK-LE-NEXT:    subc r3, r4, r3
 ; CHECK-LE-NEXT:    adde r3, r5, r6
 ; CHECK-LE-NEXT:    neg r3, r3
 ; CHECK-LE-NEXT:    blr
@@ -140,7 +140,7 @@ define void @test_lllesll_store(i64 %a, i64 %b) {
 ; CHECK-BE-NEXT:    addis r5, r2, .LC0@toc@ha
 ; CHECK-BE-NEXT:    sradi r6, r4, 63
 ; CHECK-BE-NEXT:    ld r5, .LC0@toc@l(r5)
-; CHECK-BE-NEXT:    subfc r4, r3, r4
+; CHECK-BE-NEXT:    subc r4, r4, r3
 ; CHECK-BE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-BE-NEXT:    adde r3, r6, r3
 ; CHECK-BE-NEXT:    std r3, 0(r5)
@@ -150,7 +150,7 @@ define void @test_lllesll_store(i64 %a, i64 %b) {
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    sradi r6, r4, 63
 ; CHECK-LE-NEXT:    addis r5, r2, glob@toc@ha
-; CHECK-LE-NEXT:    subfc r4, r3, r4
+; CHECK-LE-NEXT:    subc r4, r4, r3
 ; CHECK-LE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-LE-NEXT:    adde r3, r6, r3
 ; CHECK-LE-NEXT:    std r3, glob@toc@l(r5)
@@ -178,7 +178,7 @@ define void @test_lllesll_sext_store(i64 %a, i64 %b) {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    sradi r6, r4, 63
 ; CHECK-BE-NEXT:    addis r5, r2, .LC0@toc@ha
-; CHECK-BE-NEXT:    subfc r4, r3, r4
+; CHECK-BE-NEXT:    subc r4, r4, r3
 ; CHECK-BE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-BE-NEXT:    ld r4, .LC0@toc@l(r5)
 ; CHECK-BE-NEXT:    adde r3, r6, r3
@@ -190,7 +190,7 @@ define void @test_lllesll_sext_store(i64 %a, i64 %b) {
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    sradi r6, r4, 63
 ; CHECK-LE-NEXT:    addis r5, r2, glob@toc@ha
-; CHECK-LE-NEXT:    subfc r4, r3, r4
+; CHECK-LE-NEXT:    subc r4, r4, r3
 ; CHECK-LE-NEXT:    rldicl r3, r3, 1, 63
 ; CHECK-LE-NEXT:    adde r3, r6, r3
 ; CHECK-LE-NEXT:    neg r3, r3
diff --git a/llvm/test/CodeGen/PowerPC/testComparesllleull.ll b/llvm/test/CodeGen/PowerPC/testComparesllleull.ll
index 412d26497444..05df4b399aaf 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesllleull.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesllleull.ll
@@ -14,7 +14,7 @@
 define i64 @test_llleull(i64 %a, i64 %b) {
 ; CHECK-LABEL: test_llleull:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    subfc r4, r3, r4
+; CHECK-NEXT:    subc r4, r4, r3
 ; CHECK-NEXT:    subfe r3, r3, r3
 ; CHECK-NEXT:    addi r3, r3, 1
 ; CHECK-NEXT:    blr
@@ -28,7 +28,7 @@ entry:
 define i64 @test_llleull_sext(i64 %a, i64 %b) {
 ; CHECK-LABEL: test_llleull_sext:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    subfc r4, r3, r4
+; CHECK-NEXT:    subc r4, r4, r3
 ; CHECK-NEXT:    subfe r3, r3, r3
 ; CHECK-NEXT:    not r3, r3
 ; CHECK-NEXT:    blr
@@ -69,7 +69,7 @@ define void @test_llleull_store(i64 %a, i64 %b) {
 ; BE-LABEL: test_llleull_store:
 ; BE:       # %bb.0: # %entry
 ; BE-NEXT:    addis r5, r2, .LC0@toc@ha
-; BE-NEXT:    subfc r4, r3, r4
+; BE-NEXT:    subc r4, r4, r3
 ; BE-NEXT:    ld r4, .LC0@toc@l(r5)
 ; BE-NEXT:    subfe r3, r3, r3
 ; BE-NEXT:    addi r3, r3, 1
@@ -78,7 +78,7 @@ define void @test_llleull_store(i64 %a, i64 %b) {
 ;
 ; LE-LABEL: test_llleull_store:
 ; LE:       # %bb.0: # %entry
-; LE-NEXT:    subfc r4, r3, r4
+; LE-NEXT:    subc r4, r4, r3
 ; LE-NEXT:    addis r5, r2, glob@toc@ha
 ; LE-NEXT:    subfe r3, r3, r3
 ; LE-NEXT:    addi r3, r3, 1
@@ -96,7 +96,7 @@ define void @test_llleull_sext_store(i64 %a, i64 %b) {
 ; BE-LABEL: test_llleull_sext_store:
 ; BE:       # %bb.0: # %entry
 ; BE-NEXT:    addis r5, r2, .LC0@toc@ha
-; BE-NEXT:    subfc r4, r3, r4
+; BE-NEXT:    subc r4, r4, r3
 ; BE-NEXT:    ld r4, .LC0@toc@l(r5)
 ; BE-NEXT:    subfe r3, r3, r3
 ; BE-NEXT:    not r3, r3
@@ -105,7 +105,7 @@ define void @test_llleull_sext_store(i64 %a, i64 %b) {
 ;
 ; LE-LABEL: test_llleull_sext_store:
 ; LE:       # %bb.0: # %entry
-; LE-NEXT:    subfc r4, r3, r4
+; LE-NEXT:    subc r4, r4, r3
 ; LE-NEXT:    addis r5, r2, glob@toc@ha
 ; LE-NEXT:    subfe r3, r3, r3
 ; LE-NEXT:    not r3, r3
diff --git a/llvm/test/CodeGen/PowerPC/testComparesllltsll.ll b/llvm/test/CodeGen/PowerPC/testComparesllltsll.ll
index aeb1891122e3..fbaa5cb3d5db 100644
--- a/llvm/test/CodeGen/PowerPC/testComparesllltsll.ll
+++ b/llvm/test/CodeGen/PowerPC/testComparesllltsll.ll
@@ -16,7 +16,7 @@ define i64 @test_llltsll(i64 %a, i64 %b) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sradi r5, r3, 63
 ; CHECK-NEXT:    rldicl r6, r4, 1, 63
-; CHECK-NEXT:    subfc r3, r4, r3
+; CHECK-NEXT:    subc r3, r3, r4
 ; CHECK-NEXT:    adde r3, r6, r5
 ; CHECK-NEXT:    xori r3, r3, 1
 ; CHECK-NEXT:    blr
@@ -32,7 +32,7 @@ define i64 @test_llltsll_sext(i64 %a, i64 %b) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sradi r5, r3, 63
 ; CHECK-NEXT:    rldicl r6, r4, 1, 63
-; CHECK-NEXT:    subfc r3, r4, r3
+; CHECK-NEXT:    subc r3, r3, r4
 ; CHECK-NEXT:    adde r3, r6, r5
 ; CHECK-NEXT:    xori r3, r3, 1
 ; CHECK-NEXT:    neg r3, r3
@@ -61,7 +61,7 @@ define void @test_llltsll_store(i64 %a, i64 %b) {
 ; BE:       # %bb.0: # %entry
 ; BE-NEXT:    sradi r6, r3, 63
 ; BE-NEXT:    addis r5, r2, .LC0@toc@ha
-; BE-NEXT:    subfc r3, r4, r3
+; BE-NEXT:    subc r3, r3, r4
 ; BE-NEXT:    rldicl r3, r4, 1, 63
 ; BE-NEXT:    ld r4, .LC0@toc@l(r5)
 ; BE-NEXT:    adde r3, r3, r6
@@ -73,7 +73,7 @@ define void @test_llltsll_store(i64 %a, i64 %b) {
 ; LE:       # %bb.0: # %entry
 ; LE-NEXT:    sradi r6, r3, 63
 ; LE-NEXT:    addis r5, r2, glob@toc@ha
-; LE-NEXT:    subfc r3, r4, r3
+; LE-NEXT:    subc r3, r3, r4
 ; LE-NEXT:    rldicl r3, r4, 1, 63
 ; LE-NEXT:    adde r3, r3, r6
 ; LE-NEXT:    xori r3, r3, 1
@@ -93,7 +93,7 @@ define void @test_llltsll_sext_store(i64 %a, i64 %b) {
 ; BE:       # %bb.0: # %entry
 ; BE-NEXT:    sradi r6, r3, 63
 ; BE-NEXT:    addis r5, r2, .LC0@toc@ha
-; BE-NEXT:    subfc r3, r4, r3
+; BE-NEXT:    subc r3, r3, r4
 ; BE-NEXT:    rldicl r3, r4, 1, 63
 ; BE-NEXT:    ld r4, .LC0@toc@l(r5)
 ; BE-NEXT:    adde r3, r3, r6
@@ -106,7 +106,7 @@ define void @test_llltsll_sext_store(i64 %a, i64 %b) {
 ; LE:       # %bb.0: # %entry
 ; LE-NEXT:    sradi r6, r3, 63
 ; LE-NEXT:    addis r5, r2, glob@toc@ha
-; LE-NEXT:    subfc r3, r4, r3
+; LE-NEXT:    subc r3, r3, r4
 ; LE-NEXT:    rldicl r3, r4, 1, 63
 ; LE-NEXT:    adde r3, r3, r6
 ; LE-NEXT:    xori r3, r3, 1
diff --git a/llvm/test/CodeGen/PowerPC/tocSaveInPrologue.ll b/llvm/test/CodeGen/PowerPC/tocSaveInPrologue.ll
index 5235f3359a8f..18e47f131dd6 100644
--- a/llvm/test/CodeGen/PowerPC/tocSaveInPrologue.ll
+++ b/llvm/test/CodeGen/PowerPC/tocSaveInPrologue.ll
@@ -22,7 +22,7 @@ define dso_local void @test(void (i32)* nocapture %fp, i32 signext %Arg, i32 sig
 ; CHECK-NEXT:    cmpwi r29, 1
 ; CHECK-NEXT:    bc 12, lt, .LBB0_3
 ; CHECK-NEXT:  # %bb.1: # %entry
-; CHECK-NEXT:    cmpwi cr0, r4, 11
+; CHECK-NEXT:    cmpwi r4, 11
 ; CHECK-NEXT:    bc 12, lt, .LBB0_3
 ; CHECK-NEXT:    .p2align 5
 ; CHECK-NEXT:  .LBB0_2: # %for.body.us
diff --git a/llvm/test/CodeGen/PowerPC/trunc-srl-load.ll b/llvm/test/CodeGen/PowerPC/trunc-srl-load.ll
index 5dc0534ef446..dcddbb135bae 100644
--- a/llvm/test/CodeGen/PowerPC/trunc-srl-load.ll
+++ b/llvm/test/CodeGen/PowerPC/trunc-srl-load.ll
@@ -5,7 +5,7 @@ define dso_local fastcc void @trunc_srl_load(i32 zeroext %AttrArgNo) {
 ; CHECK-LABEL: trunc_srl_load:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    lhz 4, 2(0)
-; CHECK-NEXT:    cmplw 0, 4, 3
+; CHECK-NEXT:    cmplw 4, 3
 ; CHECK-NEXT:    ble 0, .LBB0_2
 ; CHECK-NEXT:  # %bb.1: # %exit
 ; CHECK-NEXT:  .LBB0_2: # %cond.false
diff --git a/llvm/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll b/llvm/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll
index 4afd0e204bf6..2020833fd897 100644
--- a/llvm/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll
+++ b/llvm/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll
@@ -16,11 +16,11 @@ define dso_local <2 x double> @test1(<8 x i16> %a) {
 ; P9BE:       # %bb.0: # %entry
 ; P9BE-NEXT:    li r3, 0
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    rlwinm r3, r3, 0, 16, 31
+; P9BE-NEXT:    clrlwi r3, r3, 16
 ; P9BE-NEXT:    mtfprwz f0, r3
 ; P9BE-NEXT:    li r3, 2
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    rlwinm r3, r3, 0, 16, 31
+; P9BE-NEXT:    clrlwi r3, r3, 16
 ; P9BE-NEXT:    mtfprwz f1, r3
 ; P9BE-NEXT:    xscvuxddp f0, f0
 ; P9BE-NEXT:    xscvuxddp f1, f1
@@ -31,11 +31,11 @@ define dso_local <2 x double> @test1(<8 x i16> %a) {
 ; P9LE:       # %bb.0: # %entry
 ; P9LE-NEXT:    li r3, 0
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    rlwinm r3, r3, 0, 16, 31
+; P9LE-NEXT:    clrlwi r3, r3, 16
 ; P9LE-NEXT:    mtfprwz f0, r3
 ; P9LE-NEXT:    li r3, 2
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    rlwinm r3, r3, 0, 16, 31
+; P9LE-NEXT:    clrlwi r3, r3, 16
 ; P9LE-NEXT:    mtfprwz f1, r3
 ; P9LE-NEXT:    xscvuxddp f0, f0
 ; P9LE-NEXT:    xscvuxddp f1, f1
@@ -47,8 +47,8 @@ define dso_local <2 x double> @test1(<8 x i16> %a) {
 ; P8BE-NEXT:    mfvsrd r3, v2
 ; P8BE-NEXT:    rldicl r4, r3, 16, 48
 ; P8BE-NEXT:    rldicl r3, r3, 32, 48
-; P8BE-NEXT:    rlwinm r4, r4, 0, 16, 31
-; P8BE-NEXT:    rlwinm r3, r3, 0, 16, 31
+; P8BE-NEXT:    clrlwi r4, r4, 16
+; P8BE-NEXT:    clrlwi r3, r3, 16
 ; P8BE-NEXT:    mtfprwz f0, r4
 ; P8BE-NEXT:    mtfprwz f1, r3
 ; P8BE-NEXT:    xscvuxddp f0, f0
@@ -59,11 +59,11 @@ define dso_local <2 x double> @test1(<8 x i16> %a) {
 ; P8LE-LABEL: test1:
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    xxswapd vs0, v2
-; P8LE-NEXT:    mfvsrd r3, f0
+; P8LE-NEXT:    mffprd r3, f0
 ; P8LE-NEXT:    clrldi r4, r3, 48
 ; P8LE-NEXT:    rldicl r3, r3, 48, 48
-; P8LE-NEXT:    rlwinm r4, r4, 0, 16, 31
-; P8LE-NEXT:    rlwinm r3, r3, 0, 16, 31
+; P8LE-NEXT:    clrlwi r4, r4, 16
+; P8LE-NEXT:    clrlwi r3, r3, 16
 ; P8LE-NEXT:    mtfprwz f0, r4
 ; P8LE-NEXT:    mtfprwz f1, r3
 ; P8LE-NEXT:    xscvuxddp f0, f0
@@ -104,7 +104,7 @@ define dso_local <2 x double> @test2(<4 x i32> %a, <4 x i32> %b) {
 ; P8BE-NEXT:    xxsldwi vs0, v2, v2, 3
 ; P8BE-NEXT:    mfvsrwz r4, v3
 ; P8BE-NEXT:    mtfprwz f1, r4
-; P8BE-NEXT:    mfvsrwz r3, f0
+; P8BE-NEXT:    mffprwz r3, f0
 ; P8BE-NEXT:    xscvuxddp f1, f1
 ; P8BE-NEXT:    mtfprwz f0, r3
 ; P8BE-NEXT:    xscvuxddp f0, f0
@@ -115,8 +115,8 @@ define dso_local <2 x double> @test2(<4 x i32> %a, <4 x i32> %b) {
 ; P8LE:       # %bb.0: # %entry
 ; P8LE-NEXT:    xxswapd vs0, v2
 ; P8LE-NEXT:    xxsldwi vs1, v3, v3, 1
-; P8LE-NEXT:    mfvsrwz r3, f0
-; P8LE-NEXT:    mfvsrwz r4, f1
+; P8LE-NEXT:    mffprwz r3, f0
+; P8LE-NEXT:    mffprwz r4, f1
 ; P8LE-NEXT:    mtfprwz f0, r3
 ; P8LE-NEXT:    mtfprwz f1, r4
 ; P8LE-NEXT:    xscvuxddp f0, f0
diff --git a/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll b/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll
index e3d9027d9e98..ba568c5d153b 100644
--- a/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll
+++ b/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll
@@ -22,7 +22,7 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) {
 ; P9LE-NEXT:    rldicl r4, r4, 27, 37
 ; P9LE-NEXT:    mulli r4, r4, 98
 ; P9LE-NEXT:    subf r3, r4, r3
-; P9LE-NEXT:    mtvsrd f0, r3
+; P9LE-NEXT:    mtfprd f0, r3
 ; P9LE-NEXT:    li r3, 6
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    rlwinm r4, r3, 0, 16, 31
@@ -33,7 +33,7 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) {
 ; P9LE-NEXT:    mulli r4, r4, 1003
 ; P9LE-NEXT:    subf r3, r4, r3
 ; P9LE-NEXT:    xxswapd v3, vs0
-; P9LE-NEXT:    mtvsrd f0, r3
+; P9LE-NEXT:    mtfprd f0, r3
 ; P9LE-NEXT:    li r3, 2
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    rlwinm r4, r3, 30, 18, 31
@@ -42,10 +42,10 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) {
 ; P9LE-NEXT:    mulli r4, r4, 124
 ; P9LE-NEXT:    subf r3, r4, r3
 ; P9LE-NEXT:    xxswapd v4, vs0
-; P9LE-NEXT:    mtvsrd f0, r3
+; P9LE-NEXT:    mtfprd f0, r3
 ; P9LE-NEXT:    li r3, 0
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    rlwinm r4, r3, 0, 16, 31
+; P9LE-NEXT:    clrlwi r4, r3, 16
 ; P9LE-NEXT:    lis r6, 22765
 ; P9LE-NEXT:    ori r6, r6, 8969
 ; P9LE-NEXT:    vmrglh v3, v4, v3
@@ -59,7 +59,7 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) {
 ; P9LE-NEXT:    srwi r4, r4, 6
 ; P9LE-NEXT:    mulli r4, r4, 95
 ; P9LE-NEXT:    subf r3, r4, r3
-; P9LE-NEXT:    mtvsrd f0, r3
+; P9LE-NEXT:    mtfprd f0, r3
 ; P9LE-NEXT:    xxswapd v2, vs0
 ; P9LE-NEXT:    vmrglh v2, v4, v2
 ; P9LE-NEXT:    vmrglw v2, v3, v2
@@ -69,7 +69,7 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) {
 ; P9BE:       # %bb.0:
 ; P9BE-NEXT:    li r3, 6
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    rlwinm r3, r3, 0, 16, 31
+; P9BE-NEXT:    clrlwi r3, r3, 16
 ; P9BE-NEXT:    lis r5, 16727
 ; P9BE-NEXT:    ori r5, r5, 2287
 ; P9BE-NEXT:    clrldi r4, r3, 32
@@ -83,7 +83,7 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) {
 ; P9BE-NEXT:    mtvsrd v3, r3
 ; P9BE-NEXT:    li r3, 4
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    rlwinm r3, r3, 0, 16, 31
+; P9BE-NEXT:    clrlwi r3, r3, 16
 ; P9BE-NEXT:    clrldi r4, r3, 32
 ; P9BE-NEXT:    mulld r4, r4, r5
 ; P9BE-NEXT:    lis r5, 8456
@@ -108,7 +108,7 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) {
 ; P9BE-NEXT:    mtvsrd v4, r3
 ; P9BE-NEXT:    li r3, 0
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    rlwinm r3, r3, 0, 16, 31
+; P9BE-NEXT:    clrlwi r3, r3, 16
 ; P9BE-NEXT:    clrldi r4, r3, 32
 ; P9BE-NEXT:    mulld r4, r4, r5
 ; P9BE-NEXT:    rldicl r4, r4, 32, 32
@@ -131,10 +131,10 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) {
 ; P8LE-NEXT:    lis r8, 21399
 ; P8LE-NEXT:    ori r3, r3, 8969
 ; P8LE-NEXT:    ori r8, r8, 33437
-; P8LE-NEXT:    mfvsrd r4, f0
+; P8LE-NEXT:    mffprd r4, f0
 ; P8LE-NEXT:    clrldi r5, r4, 48
 ; P8LE-NEXT:    rldicl r9, r4, 32, 48
-; P8LE-NEXT:    rlwinm r6, r5, 0, 16, 31
+; P8LE-NEXT:    clrlwi r6, r5, 16
 ; P8LE-NEXT:    rldicl r10, r4, 16, 48
 ; P8LE-NEXT:    rlwinm r11, r9, 0, 16, 31
 ; P8LE-NEXT:    clrldi r7, r6, 32
@@ -163,13 +163,13 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) {
 ; P8LE-NEXT:    mulli r8, r8, 124
 ; P8LE-NEXT:    subf r7, r7, r9
 ; P8LE-NEXT:    subf r6, r6, r10
-; P8LE-NEXT:    mtvsrd f0, r7
+; P8LE-NEXT:    mtfprd f0, r7
 ; P8LE-NEXT:    subf r3, r3, r5
 ; P8LE-NEXT:    subf r4, r8, r4
-; P8LE-NEXT:    mtvsrd f1, r6
-; P8LE-NEXT:    mtvsrd f2, r3
+; P8LE-NEXT:    mtfprd f1, r6
+; P8LE-NEXT:    mtfprd f2, r3
 ; P8LE-NEXT:    xxswapd v2, vs0
-; P8LE-NEXT:    mtvsrd f3, r4
+; P8LE-NEXT:    mtfprd f3, r4
 ; P8LE-NEXT:    xxswapd v3, vs1
 ; P8LE-NEXT:    xxswapd v4, vs2
 ; P8LE-NEXT:    xxswapd v5, vs3
@@ -187,11 +187,11 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) {
 ; P8BE-NEXT:    ori r9, r9, 2287
 ; P8BE-NEXT:    rldicl r5, r4, 16, 48
 ; P8BE-NEXT:    clrldi r6, r4, 48
-; P8BE-NEXT:    rlwinm r5, r5, 0, 16, 31
+; P8BE-NEXT:    clrlwi r5, r5, 16
 ; P8BE-NEXT:    rldicl r7, r4, 48, 48
-; P8BE-NEXT:    rlwinm r6, r6, 0, 16, 31
+; P8BE-NEXT:    clrlwi r6, r6, 16
 ; P8BE-NEXT:    clrldi r8, r5, 32
-; P8BE-NEXT:    rlwinm r7, r7, 0, 16, 31
+; P8BE-NEXT:    clrlwi r7, r7, 16
 ; P8BE-NEXT:    mulld r3, r8, r3
 ; P8BE-NEXT:    lis r8, 21399
 ; P8BE-NEXT:    clrldi r10, r6, 32
@@ -204,7 +204,7 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) {
 ; P8BE-NEXT:    ori r10, r10, 16913
 ; P8BE-NEXT:    rlwinm r11, r4, 30, 18, 31
 ; P8BE-NEXT:    rldicl r3, r3, 32, 32
-; P8BE-NEXT:    rlwinm r4, r4, 0, 16, 31
+; P8BE-NEXT:    clrlwi r4, r4, 16
 ; P8BE-NEXT:    mulld r10, r11, r10
 ; P8BE-NEXT:    subf r11, r3, r5
 ; P8BE-NEXT:    srwi r11, r11, 1
@@ -242,7 +242,7 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) {
 ; P9LE:       # %bb.0:
 ; P9LE-NEXT:    li r3, 0
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    rlwinm r4, r3, 0, 16, 31
+; P9LE-NEXT:    clrlwi r4, r3, 16
 ; P9LE-NEXT:    lis r6, 22765
 ; P9LE-NEXT:    ori r6, r6, 8969
 ; P9LE-NEXT:    clrldi r5, r4, 32
@@ -254,10 +254,10 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) {
 ; P9LE-NEXT:    srwi r4, r4, 6
 ; P9LE-NEXT:    mulli r4, r4, 95
 ; P9LE-NEXT:    subf r3, r4, r3
-; P9LE-NEXT:    mtvsrd f0, r3
+; P9LE-NEXT:    mtfprd f0, r3
 ; P9LE-NEXT:    li r3, 2
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    rlwinm r4, r3, 0, 16, 31
+; P9LE-NEXT:    clrlwi r4, r3, 16
 ; P9LE-NEXT:    clrldi r5, r4, 32
 ; P9LE-NEXT:    mulld r5, r5, r6
 ; P9LE-NEXT:    rldicl r5, r5, 32, 32
@@ -268,10 +268,10 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) {
 ; P9LE-NEXT:    mulli r4, r4, 95
 ; P9LE-NEXT:    subf r3, r4, r3
 ; P9LE-NEXT:    xxswapd v3, vs0
-; P9LE-NEXT:    mtvsrd f0, r3
+; P9LE-NEXT:    mtfprd f0, r3
 ; P9LE-NEXT:    li r3, 4
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    rlwinm r4, r3, 0, 16, 31
+; P9LE-NEXT:    clrlwi r4, r3, 16
 ; P9LE-NEXT:    clrldi r5, r4, 32
 ; P9LE-NEXT:    mulld r5, r5, r6
 ; P9LE-NEXT:    rldicl r5, r5, 32, 32
@@ -282,10 +282,10 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) {
 ; P9LE-NEXT:    mulli r4, r4, 95
 ; P9LE-NEXT:    subf r3, r4, r3
 ; P9LE-NEXT:    xxswapd v4, vs0
-; P9LE-NEXT:    mtvsrd f0, r3
+; P9LE-NEXT:    mtfprd f0, r3
 ; P9LE-NEXT:    li r3, 6
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    rlwinm r4, r3, 0, 16, 31
+; P9LE-NEXT:    clrlwi r4, r3, 16
 ; P9LE-NEXT:    clrldi r5, r4, 32
 ; P9LE-NEXT:    mulld r5, r5, r6
 ; P9LE-NEXT:    rldicl r5, r5, 32, 32
@@ -297,7 +297,7 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) {
 ; P9LE-NEXT:    subf r3, r4, r3
 ; P9LE-NEXT:    vmrglh v3, v4, v3
 ; P9LE-NEXT:    xxswapd v4, vs0
-; P9LE-NEXT:    mtvsrd f0, r3
+; P9LE-NEXT:    mtfprd f0, r3
 ; P9LE-NEXT:    xxswapd v2, vs0
 ; P9LE-NEXT:    vmrglh v2, v2, v4
 ; P9LE-NEXT:    vmrglw v2, v2, v3
@@ -307,7 +307,7 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) {
 ; P9BE:       # %bb.0:
 ; P9BE-NEXT:    li r3, 6
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    rlwinm r3, r3, 0, 16, 31
+; P9BE-NEXT:    clrlwi r3, r3, 16
 ; P9BE-NEXT:    lis r5, 22765
 ; P9BE-NEXT:    ori r5, r5, 8969
 ; P9BE-NEXT:    clrldi r4, r3, 32
@@ -323,7 +323,7 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) {
 ; P9BE-NEXT:    mtvsrd v3, r3
 ; P9BE-NEXT:    li r3, 4
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    rlwinm r3, r3, 0, 16, 31
+; P9BE-NEXT:    clrlwi r3, r3, 16
 ; P9BE-NEXT:    clrldi r4, r3, 32
 ; P9BE-NEXT:    mulld r4, r4, r5
 ; P9BE-NEXT:    rldicl r4, r4, 32, 32
@@ -337,7 +337,7 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) {
 ; P9BE-NEXT:    mtvsrd v4, r3
 ; P9BE-NEXT:    li r3, 2
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    rlwinm r3, r3, 0, 16, 31
+; P9BE-NEXT:    clrlwi r3, r3, 16
 ; P9BE-NEXT:    clrldi r4, r3, 32
 ; P9BE-NEXT:    mulld r4, r4, r5
 ; P9BE-NEXT:    rldicl r4, r4, 32, 32
@@ -352,7 +352,7 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) {
 ; P9BE-NEXT:    mtvsrd v4, r3
 ; P9BE-NEXT:    li r3, 0
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    rlwinm r3, r3, 0, 16, 31
+; P9BE-NEXT:    clrlwi r3, r3, 16
 ; P9BE-NEXT:    clrldi r4, r3, 32
 ; P9BE-NEXT:    mulld r4, r4, r5
 ; P9BE-NEXT:    rldicl r4, r4, 32, 32
@@ -375,16 +375,16 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) {
 ; P8LE-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
 ; P8LE-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; P8LE-NEXT:    ori r4, r4, 8969
-; P8LE-NEXT:    mfvsrd r5, f0
+; P8LE-NEXT:    mffprd r5, f0
 ; P8LE-NEXT:    clrldi r3, r5, 48
 ; P8LE-NEXT:    rldicl r6, r5, 48, 48
-; P8LE-NEXT:    rlwinm r8, r3, 0, 16, 31
+; P8LE-NEXT:    clrlwi r8, r3, 16
 ; P8LE-NEXT:    rldicl r7, r5, 32, 48
-; P8LE-NEXT:    rlwinm r9, r6, 0, 16, 31
+; P8LE-NEXT:    clrlwi r9, r6, 16
 ; P8LE-NEXT:    rldicl r5, r5, 16, 48
 ; P8LE-NEXT:    clrldi r11, r8, 32
-; P8LE-NEXT:    rlwinm r10, r7, 0, 16, 31
-; P8LE-NEXT:    rlwinm r12, r5, 0, 16, 31
+; P8LE-NEXT:    clrlwi r10, r7, 16
+; P8LE-NEXT:    clrlwi r12, r5, 16
 ; P8LE-NEXT:    mulld r11, r11, r4
 ; P8LE-NEXT:    clrldi r0, r9, 32
 ; P8LE-NEXT:    clrldi r30, r10, 32
@@ -420,13 +420,13 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) {
 ; P8LE-NEXT:    mulli r4, r4, 95
 ; P8LE-NEXT:    subf r3, r8, r3
 ; P8LE-NEXT:    subf r6, r9, r6
-; P8LE-NEXT:    mtvsrd f0, r3
+; P8LE-NEXT:    mtfprd f0, r3
 ; P8LE-NEXT:    subf r3, r10, r7
 ; P8LE-NEXT:    subf r4, r4, r5
-; P8LE-NEXT:    mtvsrd f1, r6
-; P8LE-NEXT:    mtvsrd f2, r3
+; P8LE-NEXT:    mtfprd f1, r6
+; P8LE-NEXT:    mtfprd f2, r3
 ; P8LE-NEXT:    xxswapd v2, vs0
-; P8LE-NEXT:    mtvsrd f3, r4
+; P8LE-NEXT:    mtfprd f3, r4
 ; P8LE-NEXT:    xxswapd v3, vs1
 ; P8LE-NEXT:    xxswapd v4, vs2
 ; P8LE-NEXT:    xxswapd v5, vs3
@@ -442,15 +442,15 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) {
 ; P8BE-NEXT:    ori r3, r3, 8969
 ; P8BE-NEXT:    clrldi r5, r4, 48
 ; P8BE-NEXT:    rldicl r6, r4, 48, 48
-; P8BE-NEXT:    rlwinm r5, r5, 0, 16, 31
+; P8BE-NEXT:    clrlwi r5, r5, 16
 ; P8BE-NEXT:    rldicl r7, r4, 32, 48
-; P8BE-NEXT:    rlwinm r6, r6, 0, 16, 31
+; P8BE-NEXT:    clrlwi r6, r6, 16
 ; P8BE-NEXT:    clrldi r8, r5, 32
 ; P8BE-NEXT:    rldicl r4, r4, 16, 48
-; P8BE-NEXT:    rlwinm r7, r7, 0, 16, 31
+; P8BE-NEXT:    clrlwi r7, r7, 16
 ; P8BE-NEXT:    clrldi r9, r6, 32
 ; P8BE-NEXT:    mulld r8, r8, r3
-; P8BE-NEXT:    rlwinm r4, r4, 0, 16, 31
+; P8BE-NEXT:    clrlwi r4, r4, 16
 ; P8BE-NEXT:    clrldi r10, r7, 32
 ; P8BE-NEXT:    mulld r9, r9, r3
 ; P8BE-NEXT:    clrldi r11, r4, 32
@@ -507,7 +507,7 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
 ; P9LE:       # %bb.0:
 ; P9LE-NEXT:    li r3, 0
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    rlwinm r4, r3, 0, 16, 31
+; P9LE-NEXT:    clrlwi r4, r3, 16
 ; P9LE-NEXT:    lis r6, 22765
 ; P9LE-NEXT:    ori r6, r6, 8969
 ; P9LE-NEXT:    clrldi r5, r4, 32
@@ -519,10 +519,10 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
 ; P9LE-NEXT:    srwi r4, r4, 6
 ; P9LE-NEXT:    mulli r5, r4, 95
 ; P9LE-NEXT:    subf r3, r5, r3
-; P9LE-NEXT:    mtvsrd f0, r3
+; P9LE-NEXT:    mtfprd f0, r3
 ; P9LE-NEXT:    li r3, 2
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    rlwinm r5, r3, 0, 16, 31
+; P9LE-NEXT:    clrlwi r5, r3, 16
 ; P9LE-NEXT:    clrldi r7, r5, 32
 ; P9LE-NEXT:    mulld r7, r7, r6
 ; P9LE-NEXT:    rldicl r7, r7, 32, 32
@@ -533,10 +533,10 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
 ; P9LE-NEXT:    mulli r7, r5, 95
 ; P9LE-NEXT:    subf r3, r7, r3
 ; P9LE-NEXT:    xxswapd v3, vs0
-; P9LE-NEXT:    mtvsrd f0, r3
+; P9LE-NEXT:    mtfprd f0, r3
 ; P9LE-NEXT:    li r3, 4
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    rlwinm r7, r3, 0, 16, 31
+; P9LE-NEXT:    clrlwi r7, r3, 16
 ; P9LE-NEXT:    clrldi r8, r7, 32
 ; P9LE-NEXT:    mulld r8, r8, r6
 ; P9LE-NEXT:    rldicl r8, r8, 32, 32
@@ -547,10 +547,10 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
 ; P9LE-NEXT:    mulli r8, r7, 95
 ; P9LE-NEXT:    subf r3, r8, r3
 ; P9LE-NEXT:    xxswapd v4, vs0
-; P9LE-NEXT:    mtvsrd f0, r3
+; P9LE-NEXT:    mtfprd f0, r3
 ; P9LE-NEXT:    li r3, 6
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    rlwinm r8, r3, 0, 16, 31
+; P9LE-NEXT:    clrlwi r8, r3, 16
 ; P9LE-NEXT:    clrldi r9, r8, 32
 ; P9LE-NEXT:    mulld r6, r9, r6
 ; P9LE-NEXT:    rldicl r6, r6, 32, 32
@@ -562,18 +562,18 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
 ; P9LE-NEXT:    subf r3, r8, r3
 ; P9LE-NEXT:    vmrglh v3, v4, v3
 ; P9LE-NEXT:    xxswapd v4, vs0
-; P9LE-NEXT:    mtvsrd f0, r3
+; P9LE-NEXT:    mtfprd f0, r3
 ; P9LE-NEXT:    xxswapd v2, vs0
-; P9LE-NEXT:    mtvsrd f0, r4
+; P9LE-NEXT:    mtfprd f0, r4
 ; P9LE-NEXT:    vmrglh v2, v2, v4
 ; P9LE-NEXT:    vmrglw v2, v2, v3
 ; P9LE-NEXT:    xxswapd v3, vs0
-; P9LE-NEXT:    mtvsrd f0, r5
+; P9LE-NEXT:    mtfprd f0, r5
 ; P9LE-NEXT:    xxswapd v4, vs0
-; P9LE-NEXT:    mtvsrd f0, r7
+; P9LE-NEXT:    mtfprd f0, r7
 ; P9LE-NEXT:    vmrglh v3, v4, v3
 ; P9LE-NEXT:    xxswapd v4, vs0
-; P9LE-NEXT:    mtvsrd f0, r6
+; P9LE-NEXT:    mtfprd f0, r6
 ; P9LE-NEXT:    xxswapd v5, vs0
 ; P9LE-NEXT:    vmrglh v4, v5, v4
 ; P9LE-NEXT:    vmrglw v3, v4, v3
@@ -584,7 +584,7 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
 ; P9BE:       # %bb.0:
 ; P9BE-NEXT:    li r3, 6
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    rlwinm r4, r3, 0, 16, 31
+; P9BE-NEXT:    clrlwi r4, r3, 16
 ; P9BE-NEXT:    lis r6, 22765
 ; P9BE-NEXT:    ori r6, r6, 8969
 ; P9BE-NEXT:    clrldi r5, r4, 32
@@ -600,7 +600,7 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
 ; P9BE-NEXT:    mtvsrd v3, r3
 ; P9BE-NEXT:    li r3, 4
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    rlwinm r5, r3, 0, 16, 31
+; P9BE-NEXT:    clrlwi r5, r3, 16
 ; P9BE-NEXT:    clrldi r7, r5, 32
 ; P9BE-NEXT:    mulld r7, r7, r6
 ; P9BE-NEXT:    rldicl r7, r7, 32, 32
@@ -614,7 +614,7 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
 ; P9BE-NEXT:    mtvsrd v4, r3
 ; P9BE-NEXT:    li r3, 2
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    rlwinm r7, r3, 0, 16, 31
+; P9BE-NEXT:    clrlwi r7, r3, 16
 ; P9BE-NEXT:    clrldi r8, r7, 32
 ; P9BE-NEXT:    mulld r8, r8, r6
 ; P9BE-NEXT:    rldicl r8, r8, 32, 32
@@ -629,7 +629,7 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
 ; P9BE-NEXT:    mtvsrd v4, r3
 ; P9BE-NEXT:    li r3, 0
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    rlwinm r3, r3, 0, 16, 31
+; P9BE-NEXT:    clrlwi r3, r3, 16
 ; P9BE-NEXT:    clrldi r8, r3, 32
 ; P9BE-NEXT:    mulld r6, r8, r6
 ; P9BE-NEXT:    rldicl r6, r6, 32, 32
@@ -664,16 +664,16 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
 ; P8LE-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
 ; P8LE-NEXT:    std r29, -24(r1) # 8-byte Folded Spill
 ; P8LE-NEXT:    ori r5, r5, 8969
-; P8LE-NEXT:    mfvsrd r6, f0
+; P8LE-NEXT:    mffprd r6, f0
 ; P8LE-NEXT:    clrldi r3, r6, 48
 ; P8LE-NEXT:    rldicl r4, r6, 48, 48
 ; P8LE-NEXT:    rldicl r7, r6, 32, 48
-; P8LE-NEXT:    rlwinm r8, r3, 0, 16, 31
-; P8LE-NEXT:    rlwinm r9, r4, 0, 16, 31
+; P8LE-NEXT:    clrlwi r8, r3, 16
+; P8LE-NEXT:    clrlwi r9, r4, 16
 ; P8LE-NEXT:    rldicl r6, r6, 16, 48
-; P8LE-NEXT:    rlwinm r10, r7, 0, 16, 31
+; P8LE-NEXT:    clrlwi r10, r7, 16
 ; P8LE-NEXT:    clrldi r11, r8, 32
-; P8LE-NEXT:    rlwinm r12, r6, 0, 16, 31
+; P8LE-NEXT:    clrlwi r12, r6, 16
 ; P8LE-NEXT:    clrldi r0, r9, 32
 ; P8LE-NEXT:    clrldi r30, r10, 32
 ; P8LE-NEXT:    mulld r11, r11, r5
@@ -703,26 +703,26 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
 ; P8LE-NEXT:    mulli r12, r8, 95
 ; P8LE-NEXT:    srwi r10, r10, 6
 ; P8LE-NEXT:    add r5, r11, r5
-; P8LE-NEXT:    mtvsrd f0, r8
+; P8LE-NEXT:    mtfprd f0, r8
 ; P8LE-NEXT:    mulli r8, r9, 95
-; P8LE-NEXT:    mtvsrd f1, r9
+; P8LE-NEXT:    mtfprd f1, r9
 ; P8LE-NEXT:    mulli r9, r10, 95
 ; P8LE-NEXT:    srwi r5, r5, 6
-; P8LE-NEXT:    mtvsrd f3, r5
+; P8LE-NEXT:    mtfprd f3, r5
 ; P8LE-NEXT:    mulli r5, r5, 95
 ; P8LE-NEXT:    xxswapd v2, vs0
 ; P8LE-NEXT:    xxswapd v3, vs1
-; P8LE-NEXT:    mtvsrd f2, r10
+; P8LE-NEXT:    mtfprd f2, r10
 ; P8LE-NEXT:    subf r3, r12, r3
 ; P8LE-NEXT:    xxswapd v6, vs3
-; P8LE-NEXT:    mtvsrd f0, r3
+; P8LE-NEXT:    mtfprd f0, r3
 ; P8LE-NEXT:    subf r3, r9, r7
 ; P8LE-NEXT:    subf r4, r8, r4
 ; P8LE-NEXT:    xxswapd v1, vs2
-; P8LE-NEXT:    mtvsrd f4, r3
+; P8LE-NEXT:    mtfprd f4, r3
 ; P8LE-NEXT:    subf r3, r5, r6
-; P8LE-NEXT:    mtvsrd f1, r4
-; P8LE-NEXT:    mtvsrd f5, r3
+; P8LE-NEXT:    mtfprd f1, r4
+; P8LE-NEXT:    mtfprd f5, r3
 ; P8LE-NEXT:    xxswapd v5, vs4
 ; P8LE-NEXT:    vmrglh v2, v3, v2
 ; P8LE-NEXT:    xxswapd v3, vs0
@@ -744,13 +744,13 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
 ; P8BE-NEXT:    ori r5, r5, 8969
 ; P8BE-NEXT:    clrldi r3, r6, 48
 ; P8BE-NEXT:    rldicl r4, r6, 48, 48
-; P8BE-NEXT:    rlwinm r8, r3, 0, 16, 31
+; P8BE-NEXT:    clrlwi r8, r3, 16
 ; P8BE-NEXT:    rldicl r7, r6, 32, 48
-; P8BE-NEXT:    rlwinm r9, r4, 0, 16, 31
+; P8BE-NEXT:    clrlwi r9, r4, 16
 ; P8BE-NEXT:    rldicl r6, r6, 16, 48
 ; P8BE-NEXT:    clrldi r11, r8, 32
-; P8BE-NEXT:    rlwinm r10, r7, 0, 16, 31
-; P8BE-NEXT:    rlwinm r6, r6, 0, 16, 31
+; P8BE-NEXT:    clrlwi r10, r7, 16
+; P8BE-NEXT:    clrlwi r6, r6, 16
 ; P8BE-NEXT:    clrldi r12, r9, 32
 ; P8BE-NEXT:    mulld r11, r11, r5
 ; P8BE-NEXT:    clrldi r0, r10, 32
@@ -823,16 +823,16 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) {
 ; P9LE:       # %bb.0:
 ; P9LE-NEXT:    li r3, 0
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    rlwinm r3, r3, 0, 26, 31
-; P9LE-NEXT:    mtvsrd f0, r3
+; P9LE-NEXT:    clrlwi r3, r3, 26
+; P9LE-NEXT:    mtfprd f0, r3
 ; P9LE-NEXT:    li r3, 2
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    rlwinm r3, r3, 0, 27, 31
+; P9LE-NEXT:    clrlwi r3, r3, 27
 ; P9LE-NEXT:    xxswapd v3, vs0
-; P9LE-NEXT:    mtvsrd f0, r3
+; P9LE-NEXT:    mtfprd f0, r3
 ; P9LE-NEXT:    li r3, 6
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    rlwinm r4, r3, 0, 16, 31
+; P9LE-NEXT:    clrlwi r4, r3, 16
 ; P9LE-NEXT:    lis r6, 22765
 ; P9LE-NEXT:    ori r6, r6, 8969
 ; P9LE-NEXT:    xxswapd v4, vs0
@@ -846,12 +846,12 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) {
 ; P9LE-NEXT:    srwi r4, r4, 6
 ; P9LE-NEXT:    mulli r4, r4, 95
 ; P9LE-NEXT:    subf r3, r4, r3
-; P9LE-NEXT:    mtvsrd f0, r3
+; P9LE-NEXT:    mtfprd f0, r3
 ; P9LE-NEXT:    li r3, 4
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    rlwinm r3, r3, 0, 29, 31
+; P9LE-NEXT:    clrlwi r3, r3, 29
 ; P9LE-NEXT:    xxswapd v4, vs0
-; P9LE-NEXT:    mtvsrd f0, r3
+; P9LE-NEXT:    mtfprd f0, r3
 ; P9LE-NEXT:    xxswapd v2, vs0
 ; P9LE-NEXT:    vmrglh v2, v4, v2
 ; P9LE-NEXT:    vmrglw v2, v2, v3
@@ -861,17 +861,17 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) {
 ; P9BE:       # %bb.0:
 ; P9BE-NEXT:    li r3, 2
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    rlwinm r3, r3, 0, 27, 31
+; P9BE-NEXT:    clrlwi r3, r3, 27
 ; P9BE-NEXT:    sldi r3, r3, 48
 ; P9BE-NEXT:    mtvsrd v3, r3
 ; P9BE-NEXT:    li r3, 0
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    rlwinm r3, r3, 0, 26, 31
+; P9BE-NEXT:    clrlwi r3, r3, 26
 ; P9BE-NEXT:    sldi r3, r3, 48
 ; P9BE-NEXT:    mtvsrd v4, r3
 ; P9BE-NEXT:    li r3, 6
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    rlwinm r3, r3, 0, 16, 31
+; P9BE-NEXT:    clrlwi r3, r3, 16
 ; P9BE-NEXT:    lis r5, 22765
 ; P9BE-NEXT:    ori r5, r5, 8969
 ; P9BE-NEXT:    vmrghh v3, v4, v3
@@ -888,7 +888,7 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) {
 ; P9BE-NEXT:    mtvsrd v4, r3
 ; P9BE-NEXT:    li r3, 4
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    rlwinm r3, r3, 0, 29, 31
+; P9BE-NEXT:    clrlwi r3, r3, 29
 ; P9BE-NEXT:    sldi r3, r3, 48
 ; P9BE-NEXT:    mtvsrd v2, r3
 ; P9BE-NEXT:    vmrghh v2, v2, v4
@@ -900,14 +900,14 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) {
 ; P8LE-NEXT:    xxswapd vs0, v2
 ; P8LE-NEXT:    lis r3, 22765
 ; P8LE-NEXT:    ori r3, r3, 8969
-; P8LE-NEXT:    mfvsrd r4, f0
+; P8LE-NEXT:    mffprd r4, f0
 ; P8LE-NEXT:    rldicl r5, r4, 16, 48
-; P8LE-NEXT:    rlwinm r6, r5, 0, 16, 31
+; P8LE-NEXT:    clrlwi r6, r5, 16
 ; P8LE-NEXT:    clrldi r7, r6, 32
 ; P8LE-NEXT:    mulld r3, r7, r3
 ; P8LE-NEXT:    rldicl r7, r4, 48, 48
-; P8LE-NEXT:    rlwinm r7, r7, 0, 27, 31
-; P8LE-NEXT:    mtvsrd f1, r7
+; P8LE-NEXT:    clrlwi r7, r7, 27
+; P8LE-NEXT:    mtfprd f1, r7
 ; P8LE-NEXT:    rldicl r3, r3, 32, 32
 ; P8LE-NEXT:    xxswapd v3, vs1
 ; P8LE-NEXT:    subf r6, r3, r6
@@ -916,15 +916,15 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) {
 ; P8LE-NEXT:    clrldi r6, r4, 48
 ; P8LE-NEXT:    srwi r3, r3, 6
 ; P8LE-NEXT:    rldicl r4, r4, 32, 48
-; P8LE-NEXT:    rlwinm r6, r6, 0, 26, 31
+; P8LE-NEXT:    clrlwi r6, r6, 26
 ; P8LE-NEXT:    mulli r3, r3, 95
-; P8LE-NEXT:    rlwinm r4, r4, 0, 29, 31
-; P8LE-NEXT:    mtvsrd f0, r6
-; P8LE-NEXT:    mtvsrd f3, r4
+; P8LE-NEXT:    clrlwi r4, r4, 29
+; P8LE-NEXT:    mtfprd f0, r6
+; P8LE-NEXT:    mtfprd f3, r4
 ; P8LE-NEXT:    xxswapd v2, vs0
 ; P8LE-NEXT:    xxswapd v5, vs3
 ; P8LE-NEXT:    subf r3, r3, r5
-; P8LE-NEXT:    mtvsrd f2, r3
+; P8LE-NEXT:    mtfprd f2, r3
 ; P8LE-NEXT:    vmrglh v2, v3, v2
 ; P8LE-NEXT:    xxswapd v4, vs2
 ; P8LE-NEXT:    vmrglh v3, v4, v5
@@ -938,8 +938,8 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) {
 ; P8BE-NEXT:    ori r3, r3, 8969
 ; P8BE-NEXT:    clrldi r5, r4, 48
 ; P8BE-NEXT:    rldicl r7, r4, 16, 48
-; P8BE-NEXT:    rlwinm r5, r5, 0, 16, 31
-; P8BE-NEXT:    rlwinm r7, r7, 0, 26, 31
+; P8BE-NEXT:    clrlwi r5, r5, 16
+; P8BE-NEXT:    clrlwi r7, r7, 26
 ; P8BE-NEXT:    clrldi r6, r5, 32
 ; P8BE-NEXT:    mulld r3, r6, r3
 ; P8BE-NEXT:    rldicl r3, r3, 32, 32
@@ -949,10 +949,10 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) {
 ; P8BE-NEXT:    rldicl r6, r4, 32, 48
 ; P8BE-NEXT:    srwi r3, r3, 6
 ; P8BE-NEXT:    rldicl r4, r4, 48, 48
-; P8BE-NEXT:    rlwinm r6, r6, 0, 27, 31
+; P8BE-NEXT:    clrlwi r6, r6, 27
 ; P8BE-NEXT:    mulli r3, r3, 95
 ; P8BE-NEXT:    sldi r6, r6, 48
-; P8BE-NEXT:    rlwinm r4, r4, 0, 29, 31
+; P8BE-NEXT:    clrlwi r4, r4, 29
 ; P8BE-NEXT:    mtvsrd v2, r6
 ; P8BE-NEXT:    sldi r6, r7, 48
 ; P8BE-NEXT:    sldi r4, r4, 48
@@ -987,7 +987,7 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) {
 ; P9LE-NEXT:    rldicl r4, r4, 28, 36
 ; P9LE-NEXT:    mulli r4, r4, 23
 ; P9LE-NEXT:    subf r3, r4, r3
-; P9LE-NEXT:    mtvsrd f0, r3
+; P9LE-NEXT:    mtfprd f0, r3
 ; P9LE-NEXT:    li r3, 6
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    rlwinm r4, r3, 0, 16, 31
@@ -996,7 +996,7 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) {
 ; P9LE-NEXT:    mulli r4, r4, 5423
 ; P9LE-NEXT:    subf r3, r4, r3
 ; P9LE-NEXT:    xxswapd v3, vs0
-; P9LE-NEXT:    mtvsrd f0, r3
+; P9LE-NEXT:    mtfprd f0, r3
 ; P9LE-NEXT:    li r3, 2
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    rlwinm r4, r3, 31, 17, 31
@@ -1005,7 +1005,7 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) {
 ; P9LE-NEXT:    mulli r4, r4, 654
 ; P9LE-NEXT:    subf r3, r4, r3
 ; P9LE-NEXT:    xxswapd v4, vs0
-; P9LE-NEXT:    mtvsrd f0, r3
+; P9LE-NEXT:    mtfprd f0, r3
 ; P9LE-NEXT:    xxswapd v2, vs0
 ; P9LE-NEXT:    vmrglh v3, v4, v3
 ; P9LE-NEXT:    xxlxor v4, v4, v4
@@ -1017,7 +1017,7 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) {
 ; P9BE:       # %bb.0:
 ; P9BE-NEXT:    li r3, 6
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    rlwinm r3, r3, 0, 16, 31
+; P9BE-NEXT:    clrlwi r3, r3, 16
 ; P9BE-NEXT:    lis r5, 24749
 ; P9BE-NEXT:    ori r5, r5, 47143
 ; P9BE-NEXT:    clrldi r4, r3, 32
@@ -1034,7 +1034,7 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) {
 ; P9BE-NEXT:    mtvsrd v3, r3
 ; P9BE-NEXT:    li r3, 4
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    rlwinm r3, r3, 0, 16, 31
+; P9BE-NEXT:    clrlwi r3, r3, 16
 ; P9BE-NEXT:    clrldi r4, r3, 32
 ; P9BE-NEXT:    mulld r4, r4, r6
 ; P9BE-NEXT:    rldicl r4, r4, 28, 36
@@ -1071,7 +1071,7 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) {
 ; P8LE-NEXT:    oris r3, r3, 51306
 ; P8LE-NEXT:    ori r5, r5, 17097
 ; P8LE-NEXT:    ori r3, r3, 30865
-; P8LE-NEXT:    mfvsrd r4, f0
+; P8LE-NEXT:    mffprd r4, f0
 ; P8LE-NEXT:    rldicl r6, r4, 32, 48
 ; P8LE-NEXT:    rldicl r7, r4, 16, 48
 ; P8LE-NEXT:    rlwinm r9, r6, 0, 16, 31
@@ -1089,10 +1089,10 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) {
 ; P8LE-NEXT:    mulli r3, r3, 654
 ; P8LE-NEXT:    subf r5, r5, r6
 ; P8LE-NEXT:    subf r6, r8, r7
-; P8LE-NEXT:    mtvsrd f0, r5
+; P8LE-NEXT:    mtfprd f0, r5
 ; P8LE-NEXT:    subf r3, r3, r4
-; P8LE-NEXT:    mtvsrd f1, r6
-; P8LE-NEXT:    mtvsrd f2, r3
+; P8LE-NEXT:    mtfprd f1, r6
+; P8LE-NEXT:    mtfprd f2, r3
 ; P8LE-NEXT:    xxswapd v2, vs0
 ; P8LE-NEXT:    xxswapd v3, vs1
 ; P8LE-NEXT:    xxswapd v4, vs2
@@ -1115,9 +1115,9 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) {
 ; P8BE-NEXT:    ori r3, r3, 17097
 ; P8BE-NEXT:    rldicl r4, r4, 48, 48
 ; P8BE-NEXT:    rlwinm r9, r5, 31, 17, 31
-; P8BE-NEXT:    rlwinm r7, r7, 0, 16, 31
-; P8BE-NEXT:    rlwinm r5, r5, 0, 16, 31
-; P8BE-NEXT:    rlwinm r4, r4, 0, 16, 31
+; P8BE-NEXT:    clrlwi r7, r7, 16
+; P8BE-NEXT:    clrlwi r5, r5, 16
+; P8BE-NEXT:    clrlwi r4, r4, 16
 ; P8BE-NEXT:    mulld r6, r9, r6
 ; P8BE-NEXT:    clrldi r9, r7, 32
 ; P8BE-NEXT:    mulld r8, r9, r8
@@ -1255,7 +1255,7 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) {
 ; P8LE-NEXT:    sldi r3, r3, 32
 ; P8LE-NEXT:    sldi r4, r4, 32
 ; P8LE-NEXT:    oris r3, r3, 45590
-; P8LE-NEXT:    mfvsrd r7, f0
+; P8LE-NEXT:    mffprd r7, f0
 ; P8LE-NEXT:    sldi r5, r5, 32
 ; P8LE-NEXT:    oris r4, r4, 52170
 ; P8LE-NEXT:    ori r3, r3, 17097
@@ -1277,12 +1277,12 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) {
 ; P8LE-NEXT:    mulli r3, r3, 23
 ; P8LE-NEXT:    sub r4, r8, r4
 ; P8LE-NEXT:    sub r5, r6, r5
-; P8LE-NEXT:    mtvsrd f0, r4
+; P8LE-NEXT:    mtfprd f0, r4
 ; P8LE-NEXT:    sub r3, r7, r3
 ; P8LE-NEXT:    li r4, 0
-; P8LE-NEXT:    mtvsrd f1, r5
-; P8LE-NEXT:    mtvsrd f2, r3
-; P8LE-NEXT:    mtvsrd f3, r4
+; P8LE-NEXT:    mtfprd f1, r5
+; P8LE-NEXT:    mtfprd f2, r3
+; P8LE-NEXT:    mtfprd f3, r4
 ; P8LE-NEXT:    xxmrghd v3, vs0, vs2
 ; P8LE-NEXT:    xxmrghd v2, vs1, vs3
 ; P8LE-NEXT:    blr
@@ -1302,10 +1302,10 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) {
 ; P8BE-NEXT:    sldi r4, r4, 32
 ; P8BE-NEXT:    oris r3, r3, 45590
 ; P8BE-NEXT:    sldi r5, r5, 32
-; P8BE-NEXT:    mfvsrd r7, f0
+; P8BE-NEXT:    mffprd r7, f0
 ; P8BE-NEXT:    oris r4, r4, 52170
 ; P8BE-NEXT:    ori r3, r3, 17097
-; P8BE-NEXT:    mfvsrd r8, f1
+; P8BE-NEXT:    mffprd r8, f1
 ; P8BE-NEXT:    oris r5, r5, 1603
 ; P8BE-NEXT:    ori r4, r4, 12109
 ; P8BE-NEXT:    mulhdu r3, r6, r3
@@ -1323,13 +1323,13 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) {
 ; P8BE-NEXT:    mulli r5, r5, 654
 ; P8BE-NEXT:    mulli r3, r3, 23
 ; P8BE-NEXT:    sub r4, r7, r4
-; P8BE-NEXT:    mtvsrd f0, r4
+; P8BE-NEXT:    mtfprd f0, r4
 ; P8BE-NEXT:    sub r4, r8, r5
 ; P8BE-NEXT:    sub r3, r6, r3
-; P8BE-NEXT:    mtvsrd f1, r4
+; P8BE-NEXT:    mtfprd f1, r4
 ; P8BE-NEXT:    li r4, 0
-; P8BE-NEXT:    mtvsrd f2, r3
-; P8BE-NEXT:    mtvsrd f3, r4
+; P8BE-NEXT:    mtfprd f2, r3
+; P8BE-NEXT:    mtfprd f3, r4
 ; P8BE-NEXT:    xxmrghd v3, vs2, vs0
 ; P8BE-NEXT:    xxmrghd v2, vs3, vs1
 ; P8BE-NEXT:    blr
diff --git a/llvm/test/CodeGen/PowerPC/vec-min-max.ll b/llvm/test/CodeGen/PowerPC/vec-min-max.ll
index e29ef336879a..23ab95d64559 100644
--- a/llvm/test/CodeGen/PowerPC/vec-min-max.ll
+++ b/llvm/test/CodeGen/PowerPC/vec-min-max.ll
@@ -246,9 +246,9 @@ define i128 @invalidv1i128(<2 x i128> %v1, <2 x i128> %v2) {
 ; CHECK-NEXT:    xxswapd 1, 34
 ; CHECK-NEXT:    cmpld 4, 3
 ; CHECK-NEXT:    cmpd 1, 4, 3
-; CHECK-NEXT:    mfvsrd 3, 0
+; CHECK-NEXT:    mffprd 3, 0
 ; CHECK-NEXT:    crandc 20, 4, 2
-; CHECK-NEXT:    mfvsrd 4, 1
+; CHECK-NEXT:    mffprd 4, 1
 ; CHECK-NEXT:    cmpld 1, 4, 3
 ; CHECK-NEXT:    bc 12, 20, .LBB12_3
 ; CHECK-NEXT:  # %bb.1:
@@ -259,7 +259,7 @@ define i128 @invalidv1i128(<2 x i128> %v1, <2 x i128> %v2) {
 ; CHECK-NEXT:  .LBB12_3:
 ; CHECK-NEXT:    xxswapd 0, 34
 ; CHECK-NEXT:    mfvsrd 4, 34
-; CHECK-NEXT:    mfvsrd 3, 0
+; CHECK-NEXT:    mffprd 3, 0
 ; CHECK-NEXT:    blr
 ;
 ; NOP8VEC-LABEL: invalidv1i128:
diff --git a/llvm/test/CodeGen/PowerPC/vec-trunc.ll b/llvm/test/CodeGen/PowerPC/vec-trunc.ll
index 6df696a38e91..e8ca7bff7108 100644
--- a/llvm/test/CodeGen/PowerPC/vec-trunc.ll
+++ b/llvm/test/CodeGen/PowerPC/vec-trunc.ll
@@ -93,7 +93,7 @@ define void @test2i8(<2 x i8>* nocapture %Sink, <2 x i16>* nocapture readonly %S
 ; CHECK-NEXT:    lvx v2, 0, r4
 ; CHECK-NEXT:    vpkuhum v2, v2, v2
 ; CHECK-NEXT:    xxswapd vs0, v2
-; CHECK-NEXT:    mfvsrd r4, f0
+; CHECK-NEXT:    mffprd r4, f0
 ; CHECK-NEXT:    clrldi r4, r4, 48
 ; CHECK-NEXT:    sth r4, 0(r3)
 ; CHECK-NEXT:    blr
diff --git a/llvm/test/CodeGen/PowerPC/vec_add_sub_doubleword.ll b/llvm/test/CodeGen/PowerPC/vec_add_sub_doubleword.ll
index cba2b5a5b8e4..3f7e0b694ce4 100644
--- a/llvm/test/CodeGen/PowerPC/vec_add_sub_doubleword.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_add_sub_doubleword.ll
@@ -36,7 +36,7 @@ define <2 x i64> @increment_by_one(<2 x i64> %x) nounwind {
 define <2 x i64> @increment_by_val(<2 x i64> %x, i64 %val) nounwind {
 ; VSX-LABEL: increment_by_val:
 ; VSX:       # %bb.0:
-; VSX-NEXT:    mtvsrd 0, 5
+; VSX-NEXT:    mtfprd 0, 5
 ; VSX-NEXT:    xxspltd 35, 0, 0
 ; VSX-NEXT:    vaddudm 2, 2, 3
 ; VSX-NEXT:    blr
@@ -98,7 +98,7 @@ define <2 x i64> @decrement_by_one(<2 x i64> %x) nounwind {
 define <2 x i64> @decrement_by_val(<2 x i64> %x, i64 %val) nounwind {
 ; VSX-LABEL: decrement_by_val:
 ; VSX:       # %bb.0:
-; VSX-NEXT:    mtvsrd 0, 5
+; VSX-NEXT:    mtfprd 0, 5
 ; VSX-NEXT:    xxspltd 35, 0, 0
 ; VSX-NEXT:    vsubudm 2, 2, 3
 ; VSX-NEXT:    blr
diff --git a/llvm/test/CodeGen/PowerPC/vec_add_sub_quadword.ll b/llvm/test/CodeGen/PowerPC/vec_add_sub_quadword.ll
index 8ddf0ad9b50d..b536144e9622 100644
--- a/llvm/test/CodeGen/PowerPC/vec_add_sub_quadword.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_add_sub_quadword.ll
@@ -45,8 +45,8 @@ define <1 x i128> @increment_by_one(<1 x i128> %x) nounwind {
 define <1 x i128> @increment_by_val(<1 x i128> %x, i128 %val) nounwind {
 ; VSX-LABEL: increment_by_val:
 ; VSX:       # %bb.0:
-; VSX-NEXT:    mtvsrd 0, 6
-; VSX-NEXT:    mtvsrd 1, 5
+; VSX-NEXT:    mtfprd 0, 6
+; VSX-NEXT:    mtfprd 1, 5
 ; VSX-NEXT:    xxmrghd 35, 1, 0
 ; VSX-NEXT:    vadduqm 2, 2, 3
 ; VSX-NEXT:    blr
@@ -96,8 +96,8 @@ define <1 x i128> @decrement_by_one(<1 x i128> %x) nounwind {
 define <1 x i128> @decrement_by_val(<1 x i128> %x, i128 %val) nounwind {
 ; VSX-LABEL: decrement_by_val:
 ; VSX:       # %bb.0:
-; VSX-NEXT:    mtvsrd 0, 6
-; VSX-NEXT:    mtvsrd 1, 5
+; VSX-NEXT:    mtfprd 0, 6
+; VSX-NEXT:    mtfprd 1, 5
 ; VSX-NEXT:    xxmrghd 35, 1, 0
 ; VSX-NEXT:    vsubuqm 2, 2, 3
 ; VSX-NEXT:    blr
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll
index 631e3c4fd430..239b38e2ec70 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll
@@ -12,37 +12,37 @@
 define i32 @test2elt(i64 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: test2elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P8-NEXT:    xxsldwi vs1, v2, v2, 3
 ; CHECK-P8-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mfvsrwz r4, f0
-; CHECK-P8-NEXT:    mfvsrwz r3, f1
-; CHECK-P8-NEXT:    mtvsrd f1, r4
-; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    mffprwz r3, f1
+; CHECK-P8-NEXT:    mtfprd f1, r4
+; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    vmrglh v2, v3, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    mfvsrwz r3, f0
+; CHECK-P8-NEXT:    mffprwz r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test2elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    xxswapd v2, vs0
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xxsldwi vs1, v2, v2, 3
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    mtfprd f1, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    xxswapd v2, vs1
 ; CHECK-P9-NEXT:    xxswapd v3, vs0
 ; CHECK-P9-NEXT:    vmrglh v2, v3, v2
@@ -52,16 +52,16 @@ define i32 @test2elt(i64 %a.coerce) local_unnamed_addr #0 {
 ;
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mtvsrd f0, r3
+; CHECK-BE-NEXT:    mtfprd f0, r3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    li r3, 0
@@ -89,23 +89,23 @@ define i64 @test4elt(<4 x float> %a) local_unnamed_addr #1 {
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P8-NEXT:    xscvdpsxws f3, f3
-; CHECK-P8-NEXT:    mfvsrwz r3, f1
-; CHECK-P8-NEXT:    mtvsrd f1, r3
-; CHECK-P8-NEXT:    mfvsrwz r3, f0
-; CHECK-P8-NEXT:    mfvsrwz r4, f2
+; CHECK-P8-NEXT:    mffprwz r3, f1
+; CHECK-P8-NEXT:    mtfprd f1, r3
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    mffprwz r4, f2
 ; CHECK-P8-NEXT:    xxswapd v4, vs1
-; CHECK-P8-NEXT:    mtvsrd f0, r3
-; CHECK-P8-NEXT:    mfvsrwz r3, f3
-; CHECK-P8-NEXT:    mtvsrd f2, r4
+; CHECK-P8-NEXT:    mtfprd f0, r3
+; CHECK-P8-NEXT:    mffprwz r3, f3
+; CHECK-P8-NEXT:    mtfprd f2, r4
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    mtvsrd f3, r3
+; CHECK-P8-NEXT:    mtfprd f3, r3
 ; CHECK-P8-NEXT:    xxswapd v3, vs2
 ; CHECK-P8-NEXT:    xxswapd v5, vs3
 ; CHECK-P8-NEXT:    vmrglh v2, v3, v2
 ; CHECK-P8-NEXT:    vmrglh v3, v4, v5
 ; CHECK-P8-NEXT:    vmrglw v2, v3, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test4elt:
@@ -113,26 +113,26 @@ define i64 @test4elt(<4 x float> %a) local_unnamed_addr #1 {
 ; CHECK-P9-NEXT:    xxsldwi vs0, v2, v2, 3
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    xxswapd v3, vs0
 ; CHECK-P9-NEXT:    xxswapd vs0, v2
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    xxswapd v4, vs0
 ; CHECK-P9-NEXT:    xscvspdpn f0, v2
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    vmrglh v3, v4, v3
 ; CHECK-P9-NEXT:    xxswapd v4, vs0
 ; CHECK-P9-NEXT:    xxsldwi vs0, v2, v2, 1
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    xxswapd v2, vs0
 ; CHECK-P9-NEXT:    vmrglh v2, v4, v2
 ; CHECK-P9-NEXT:    vmrglw v2, v2, v3
@@ -144,25 +144,25 @@ define i64 @test4elt(<4 x float> %a) local_unnamed_addr #1 {
 ; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 3
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, v2
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    xscvspdpn f0, v2
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    vmrghh v3, v4, v3
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 1
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
 ; CHECK-BE-NEXT:    vmrghh v2, v4, v2
@@ -198,34 +198,34 @@ define <8 x i16> @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr #
 ; CHECK-P8-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P8-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P8-NEXT:    xscvdpsxws f5, f5
-; CHECK-P8-NEXT:    mfvsrwz r4, f4
-; CHECK-P8-NEXT:    mfvsrwz r6, f1
-; CHECK-P8-NEXT:    mfvsrwz r5, f0
-; CHECK-P8-NEXT:    mtvsrd f1, r6
-; CHECK-P8-NEXT:    mtvsrd f0, r5
+; CHECK-P8-NEXT:    mffprwz r4, f4
+; CHECK-P8-NEXT:    mffprwz r6, f1
+; CHECK-P8-NEXT:    mffprwz r5, f0
+; CHECK-P8-NEXT:    mtfprd f1, r6
+; CHECK-P8-NEXT:    mtfprd f0, r5
 ; CHECK-P8-NEXT:    xxswapd v4, vs1
 ; CHECK-P8-NEXT:    xxsldwi vs1, v2, v2, 1
 ; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    xscvspdpn f0, v2
-; CHECK-P8-NEXT:    mtvsrd f4, r4
+; CHECK-P8-NEXT:    mtfprd f4, r4
 ; CHECK-P8-NEXT:    xscvspdpn f1, vs1
-; CHECK-P8-NEXT:    mfvsrwz r4, f2
+; CHECK-P8-NEXT:    mffprwz r4, f2
 ; CHECK-P8-NEXT:    xxswapd v1, vs4
 ; CHECK-P8-NEXT:    vmrglh v2, v4, v3
-; CHECK-P8-NEXT:    mtvsrd f2, r4
+; CHECK-P8-NEXT:    mtfprd f2, r4
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mfvsrwz r4, f5
+; CHECK-P8-NEXT:    mffprwz r4, f5
 ; CHECK-P8-NEXT:    xxswapd v5, vs2
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mfvsrwz r3, f0
-; CHECK-P8-NEXT:    mtvsrd f0, r3
-; CHECK-P8-NEXT:    mfvsrwz r3, f1
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    mtfprd f0, r3
+; CHECK-P8-NEXT:    mffprwz r3, f1
 ; CHECK-P8-NEXT:    xxswapd v3, vs0
-; CHECK-P8-NEXT:    mtvsrd f1, r3
-; CHECK-P8-NEXT:    mfvsrwz r3, f3
-; CHECK-P8-NEXT:    mtvsrd f3, r4
+; CHECK-P8-NEXT:    mtfprd f1, r3
+; CHECK-P8-NEXT:    mffprwz r3, f3
+; CHECK-P8-NEXT:    mtfprd f3, r4
 ; CHECK-P8-NEXT:    xxswapd v4, vs1
-; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    xxswapd v6, vs3
 ; CHECK-P8-NEXT:    xxswapd v0, vs0
 ; CHECK-P8-NEXT:    vmrglh v3, v3, v4
@@ -243,24 +243,24 @@ define <8 x i16> @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr #
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P9-NEXT:    lxv vs0, 16(r3)
-; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtfprd f2, r3
 ; CHECK-P9-NEXT:    xxswapd v2, vs2
 ; CHECK-P9-NEXT:    xxswapd vs2, vs1
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtfprd f2, r3
 ; CHECK-P9-NEXT:    xxswapd v3, vs2
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs1
 ; CHECK-P9-NEXT:    xxsldwi vs1, vs1, vs1, 1
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtfprd f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    mtfprd f1, r3
 ; CHECK-P9-NEXT:    xxswapd v4, vs1
 ; CHECK-P9-NEXT:    xxsldwi vs1, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
@@ -269,24 +269,24 @@ define <8 x i16> @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr #
 ; CHECK-P9-NEXT:    xxswapd v3, vs2
 ; CHECK-P9-NEXT:    vmrglh v3, v3, v4
 ; CHECK-P9-NEXT:    vmrglw v2, v3, v2
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    mtfprd f1, r3
 ; CHECK-P9-NEXT:    xxswapd v3, vs1
 ; CHECK-P9-NEXT:    xxswapd vs1, vs0
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    mtfprd f1, r3
 ; CHECK-P9-NEXT:    xxswapd v4, vs1
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs0
 ; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 1
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    mtfprd f1, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    vmrglh v3, v4, v3
 ; CHECK-P9-NEXT:    xxswapd v4, vs1
 ; CHECK-P9-NEXT:    xxswapd v5, vs0
@@ -302,13 +302,13 @@ define <8 x i16> @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr #
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
-; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs1
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
@@ -317,16 +317,16 @@ define <8 x i16> @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr #
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    vmrghh v2, v3, v2
-; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs0
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
@@ -334,7 +334,7 @@ define <8 x i16> @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr #
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    vmrghw v2, v3, v2
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
@@ -343,10 +343,10 @@ define <8 x i16> @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr #
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    vmrghh v3, v4, v3
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    mtvsrd v5, r3
 ; CHECK-BE-NEXT:    vmrghh v4, v4, v5
@@ -399,56 +399,56 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x flo
 ; CHECK-P8-NEXT:    xscvdpsxws f6, f6
 ; CHECK-P8-NEXT:    xscvspdpn f12, vs12
 ; CHECK-P8-NEXT:    xscvdpsxws f5, f5
-; CHECK-P8-NEXT:    mfvsrwz r4, f0
+; CHECK-P8-NEXT:    mffprwz r4, f0
 ; CHECK-P8-NEXT:    xscvspdpn f11, vs11
 ; CHECK-P8-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P8-NEXT:    xscvspdpn v2, v2
 ; CHECK-P8-NEXT:    xscvdpsxws f8, f8
-; CHECK-P8-NEXT:    mtvsrd f0, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, f4
+; CHECK-P8-NEXT:    mtfprd f0, r4
+; CHECK-P8-NEXT:    mffprwz r4, f4
 ; CHECK-P8-NEXT:    xscvdpsxws f7, f7
-; CHECK-P8-NEXT:    mfvsrwz r6, f2
+; CHECK-P8-NEXT:    mffprwz r6, f2
 ; CHECK-P8-NEXT:    xscvspdpn f13, vs13
 ; CHECK-P8-NEXT:    xscvspdpn v3, v3
 ; CHECK-P8-NEXT:    xscvdpsxws f10, f10
-; CHECK-P8-NEXT:    mtvsrd f4, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, f1
+; CHECK-P8-NEXT:    mtfprd f4, r4
+; CHECK-P8-NEXT:    mffprwz r4, f1
 ; CHECK-P8-NEXT:    xscvdpsxws f9, f9
-; CHECK-P8-NEXT:    mtvsrd f2, r6
-; CHECK-P8-NEXT:    mfvsrwz r6, f6
+; CHECK-P8-NEXT:    mtfprd f2, r6
+; CHECK-P8-NEXT:    mffprwz r6, f6
 ; CHECK-P8-NEXT:    xscvdpsxws f12, f12
-; CHECK-P8-NEXT:    mtvsrd f1, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, f5
+; CHECK-P8-NEXT:    mtfprd f1, r4
+; CHECK-P8-NEXT:    mffprwz r4, f5
 ; CHECK-P8-NEXT:    xscvdpsxws f11, f11
 ; CHECK-P8-NEXT:    xxswapd v4, vs1
-; CHECK-P8-NEXT:    mtvsrd f6, r6
-; CHECK-P8-NEXT:    mfvsrwz r6, f3
+; CHECK-P8-NEXT:    mtfprd f6, r6
+; CHECK-P8-NEXT:    mffprwz r6, f3
 ; CHECK-P8-NEXT:    xscvdpsxws v2, v2
 ; CHECK-P8-NEXT:    xxswapd v9, vs6
-; CHECK-P8-NEXT:    mtvsrd f5, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, f8
-; CHECK-P8-NEXT:    mtvsrd f3, r6
+; CHECK-P8-NEXT:    mtfprd f5, r4
+; CHECK-P8-NEXT:    mffprwz r4, f8
+; CHECK-P8-NEXT:    mtfprd f3, r6
 ; CHECK-P8-NEXT:    xxswapd v0, vs5
-; CHECK-P8-NEXT:    mfvsrwz r6, f7
+; CHECK-P8-NEXT:    mffprwz r6, f7
 ; CHECK-P8-NEXT:    xscvdpsxws f13, f13
 ; CHECK-P8-NEXT:    xxswapd v5, vs3
 ; CHECK-P8-NEXT:    xscvdpsxws v3, v3
-; CHECK-P8-NEXT:    mtvsrd f8, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, f10
-; CHECK-P8-NEXT:    mtvsrd f7, r6
-; CHECK-P8-NEXT:    mfvsrwz r6, f9
-; CHECK-P8-NEXT:    mtvsrd f10, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, f12
-; CHECK-P8-NEXT:    mtvsrd f9, r6
+; CHECK-P8-NEXT:    mtfprd f8, r4
+; CHECK-P8-NEXT:    mffprwz r4, f10
+; CHECK-P8-NEXT:    mtfprd f7, r6
+; CHECK-P8-NEXT:    mffprwz r6, f9
+; CHECK-P8-NEXT:    mtfprd f10, r4
+; CHECK-P8-NEXT:    mffprwz r4, f12
+; CHECK-P8-NEXT:    mtfprd f9, r6
 ; CHECK-P8-NEXT:    xxswapd v6, vs10
-; CHECK-P8-NEXT:    mfvsrwz r6, f11
-; CHECK-P8-NEXT:    mtvsrd f12, r4
+; CHECK-P8-NEXT:    mffprwz r6, f11
+; CHECK-P8-NEXT:    mtfprd f12, r4
 ; CHECK-P8-NEXT:    xxswapd v1, vs9
 ; CHECK-P8-NEXT:    mfvsrwz r4, v2
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    mtvsrd f11, r6
-; CHECK-P8-NEXT:    mfvsrwz r6, f13
-; CHECK-P8-NEXT:    mtvsrd f0, r4
+; CHECK-P8-NEXT:    mtfprd f11, r6
+; CHECK-P8-NEXT:    mffprwz r6, f13
+; CHECK-P8-NEXT:    mtfprd f0, r4
 ; CHECK-P8-NEXT:    xxswapd v7, vs11
 ; CHECK-P8-NEXT:    mfvsrwz r4, v3
 ; CHECK-P8-NEXT:    vmrglh v3, v5, v4
@@ -456,8 +456,8 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x flo
 ; CHECK-P8-NEXT:    vmrglh v2, v2, v0
 ; CHECK-P8-NEXT:    xxswapd v5, vs8
 ; CHECK-P8-NEXT:    xxswapd v0, vs2
-; CHECK-P8-NEXT:    mtvsrd f13, r6
-; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    mtfprd f13, r6
+; CHECK-P8-NEXT:    mtfprd f1, r4
 ; CHECK-P8-NEXT:    xxswapd v8, vs0
 ; CHECK-P8-NEXT:    vmrglh v4, v5, v4
 ; CHECK-P8-NEXT:    vmrglh v5, v0, v1
@@ -502,14 +502,14 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x flo
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f4, f4
 ; CHECK-P9-NEXT:    xscvdpsxws f6, f6
-; CHECK-P9-NEXT:    mfvsrwz r5, f5
+; CHECK-P9-NEXT:    mffprwz r5, f5
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    xscvdpsxws f7, f7
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    mtvsrd f5, r5
-; CHECK-P9-NEXT:    mfvsrwz r5, f8
-; CHECK-P9-NEXT:    mtvsrd f8, r5
-; CHECK-P9-NEXT:    mfvsrwz r5, f2
+; CHECK-P9-NEXT:    mtfprd f5, r5
+; CHECK-P9-NEXT:    mffprwz r5, f8
+; CHECK-P9-NEXT:    mtfprd f8, r5
+; CHECK-P9-NEXT:    mffprwz r5, f2
 ; CHECK-P9-NEXT:    lxv vs0, 32(r4)
 ; CHECK-P9-NEXT:    xxsldwi vs9, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xxswapd vs10, vs0
@@ -517,40 +517,40 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x flo
 ; CHECK-P9-NEXT:    xscvspdpn f10, vs10
 ; CHECK-P9-NEXT:    xscvdpsxws f9, f9
 ; CHECK-P9-NEXT:    xscvdpsxws f10, f10
-; CHECK-P9-NEXT:    mtvsrd f2, r5
-; CHECK-P9-NEXT:    mfvsrwz r5, f4
-; CHECK-P9-NEXT:    mtvsrd f4, r5
-; CHECK-P9-NEXT:    mfvsrwz r5, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r5
-; CHECK-P9-NEXT:    mfvsrwz r5, f6
+; CHECK-P9-NEXT:    mtfprd f2, r5
+; CHECK-P9-NEXT:    mffprwz r5, f4
+; CHECK-P9-NEXT:    mtfprd f4, r5
+; CHECK-P9-NEXT:    mffprwz r5, f1
+; CHECK-P9-NEXT:    mtfprd f1, r5
+; CHECK-P9-NEXT:    mffprwz r5, f6
 ; CHECK-P9-NEXT:    xxswapd v2, vs2
 ; CHECK-P9-NEXT:    xxswapd v3, vs4
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs0
 ; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 1
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    mtvsrd f6, r5
-; CHECK-P9-NEXT:    mfvsrwz r5, f7
+; CHECK-P9-NEXT:    mtfprd f6, r5
+; CHECK-P9-NEXT:    mffprwz r5, f7
 ; CHECK-P9-NEXT:    xxswapd v4, vs1
 ; CHECK-P9-NEXT:    lxv vs1, 48(r4)
 ; CHECK-P9-NEXT:    vmrglh v2, v3, v2
 ; CHECK-P9-NEXT:    xxswapd v3, vs5
-; CHECK-P9-NEXT:    mtvsrd f7, r5
-; CHECK-P9-NEXT:    mfvsrwz r5, f3
+; CHECK-P9-NEXT:    mtfprd f7, r5
+; CHECK-P9-NEXT:    mffprwz r5, f3
 ; CHECK-P9-NEXT:    vmrglh v3, v3, v4
 ; CHECK-P9-NEXT:    xxswapd v4, vs6
 ; CHECK-P9-NEXT:    xxswapd v5, vs7
-; CHECK-P9-NEXT:    mtvsrd f3, r5
+; CHECK-P9-NEXT:    mtfprd f3, r5
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P9-NEXT:    xxswapd v0, vs3
 ; CHECK-P9-NEXT:    vmrglh v4, v5, v4
 ; CHECK-P9-NEXT:    xxswapd v5, vs8
 ; CHECK-P9-NEXT:    vmrglh v5, v5, v0
-; CHECK-P9-NEXT:    mfvsrwz r4, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r4
-; CHECK-P9-NEXT:    mfvsrwz r4, f0
+; CHECK-P9-NEXT:    mffprwz r4, f2
+; CHECK-P9-NEXT:    mtfprd f2, r4
+; CHECK-P9-NEXT:    mffprwz r4, f0
 ; CHECK-P9-NEXT:    vmrglw v2, v3, v2
-; CHECK-P9-NEXT:    mtvsrd f0, r4
+; CHECK-P9-NEXT:    mtfprd f0, r4
 ; CHECK-P9-NEXT:    vmrglw v3, v5, v4
 ; CHECK-P9-NEXT:    xxswapd v4, vs2
 ; CHECK-P9-NEXT:    xxmrgld vs2, v3, v2
@@ -558,36 +558,36 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x flo
 ; CHECK-P9-NEXT:    xxsldwi vs0, vs1, vs1, 3
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r4, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r4
+; CHECK-P9-NEXT:    mffprwz r4, f0
+; CHECK-P9-NEXT:    mtfprd f0, r4
 ; CHECK-P9-NEXT:    xxswapd v3, vs0
 ; CHECK-P9-NEXT:    xxswapd vs0, vs1
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r4, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r4
+; CHECK-P9-NEXT:    mffprwz r4, f0
+; CHECK-P9-NEXT:    mtfprd f0, r4
 ; CHECK-P9-NEXT:    vmrglh v2, v4, v2
 ; CHECK-P9-NEXT:    xxswapd v4, vs0
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r4, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r4
+; CHECK-P9-NEXT:    mffprwz r4, f0
+; CHECK-P9-NEXT:    mtfprd f0, r4
 ; CHECK-P9-NEXT:    vmrglh v3, v4, v3
 ; CHECK-P9-NEXT:    xxswapd v4, vs0
 ; CHECK-P9-NEXT:    xxsldwi vs0, vs1, vs1, 1
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r5, f9
-; CHECK-P9-NEXT:    mtvsrd f9, r5
-; CHECK-P9-NEXT:    mfvsrwz r5, f10
-; CHECK-P9-NEXT:    mtvsrd f10, r5
+; CHECK-P9-NEXT:    mffprwz r5, f9
+; CHECK-P9-NEXT:    mtfprd f9, r5
+; CHECK-P9-NEXT:    mffprwz r5, f10
+; CHECK-P9-NEXT:    mtfprd f10, r5
 ; CHECK-P9-NEXT:    xxswapd v0, vs9
 ; CHECK-P9-NEXT:    xxswapd v1, vs10
 ; CHECK-P9-NEXT:    vmrglh v0, v1, v0
 ; CHECK-P9-NEXT:    vmrglw v2, v2, v0
 ; CHECK-P9-NEXT:    stxv vs2, 0(r3)
-; CHECK-P9-NEXT:    mfvsrwz r4, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r4
+; CHECK-P9-NEXT:    mffprwz r4, f0
+; CHECK-P9-NEXT:    mtfprd f0, r4
 ; CHECK-P9-NEXT:    xxswapd v5, vs0
 ; CHECK-P9-NEXT:    vmrglh v4, v4, v5
 ; CHECK-P9-NEXT:    vmrglw v3, v4, v3
@@ -604,14 +604,14 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x flo
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    mfvsrwz r5, f2
+; CHECK-BE-NEXT:    mffprwz r5, f2
 ; CHECK-BE-NEXT:    xscvspdpn f4, vs1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    mtvsrd v2, r5
-; CHECK-BE-NEXT:    mfvsrwz r5, f3
+; CHECK-BE-NEXT:    mffprwz r5, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f4
 ; CHECK-BE-NEXT:    lxv vs0, 0(r4)
 ; CHECK-BE-NEXT:    xxsldwi vs2, vs0, vs0, 3
@@ -619,22 +619,22 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x flo
 ; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    mtvsrd v3, r5
 ; CHECK-BE-NEXT:    vmrghh v2, v3, v2
-; CHECK-BE-NEXT:    mfvsrwz r5, f3
+; CHECK-BE-NEXT:    mffprwz r5, f3
 ; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    mtvsrd v3, r5
-; CHECK-BE-NEXT:    mfvsrwz r5, f1
+; CHECK-BE-NEXT:    mffprwz r5, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs0
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    mtvsrd v4, r5
-; CHECK-BE-NEXT:    mfvsrwz r5, f2
+; CHECK-BE-NEXT:    mffprwz r5, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    vmrghh v3, v3, v4
 ; CHECK-BE-NEXT:    mtvsrd v4, r5
 ; CHECK-BE-NEXT:    vmrghw v2, v3, v2
-; CHECK-BE-NEXT:    mfvsrwz r5, f1
+; CHECK-BE-NEXT:    mffprwz r5, f1
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
 ; CHECK-BE-NEXT:    sldi r5, r5, 48
@@ -643,11 +643,11 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x flo
 ; CHECK-BE-NEXT:    mtvsrd v5, r5
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    vmrghh v4, v5, v4
-; CHECK-BE-NEXT:    mfvsrwz r5, f1
+; CHECK-BE-NEXT:    mffprwz r5, f1
 ; CHECK-BE-NEXT:    lxv vs1, 48(r4)
 ; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    mtvsrd v5, r5
-; CHECK-BE-NEXT:    mfvsrwz r5, f0
+; CHECK-BE-NEXT:    mffprwz r5, f0
 ; CHECK-BE-NEXT:    lxv vs0, 32(r4)
 ; CHECK-BE-NEXT:    xscvspdpn f5, vs1
 ; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
@@ -663,26 +663,26 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x flo
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    vmrghw v3, v5, v4
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    mfvsrwz r4, f5
+; CHECK-BE-NEXT:    mffprwz r4, f5
 ; CHECK-BE-NEXT:    xxmrghd vs4, v3, v2
 ; CHECK-BE-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-NEXT:    mtvsrd v2, r4
-; CHECK-BE-NEXT:    mfvsrwz r4, f2
+; CHECK-BE-NEXT:    mffprwz r4, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    stxv vs4, 0(r3)
 ; CHECK-BE-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-NEXT:    mtvsrd v3, r4
-; CHECK-BE-NEXT:    mfvsrwz r4, f3
+; CHECK-BE-NEXT:    mffprwz r4, f3
 ; CHECK-BE-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-NEXT:    mtvsrd v4, r4
-; CHECK-BE-NEXT:    mfvsrwz r4, f1
+; CHECK-BE-NEXT:    mffprwz r4, f1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
 ; CHECK-BE-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    vmrghh v3, v4, v3
 ; CHECK-BE-NEXT:    mtvsrd v4, r4
-; CHECK-BE-NEXT:    mfvsrwz r4, f1
+; CHECK-BE-NEXT:    mffprwz r4, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs0
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
@@ -690,7 +690,7 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x flo
 ; CHECK-BE-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-NEXT:    vmrghw v2, v2, v3
 ; CHECK-BE-NEXT:    mtvsrd v3, r4
-; CHECK-BE-NEXT:    mfvsrwz r4, f1
+; CHECK-BE-NEXT:    mffprwz r4, f1
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
 ; CHECK-BE-NEXT:    sldi r4, r4, 48
@@ -699,10 +699,10 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x flo
 ; CHECK-BE-NEXT:    mtvsrd v4, r4
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    vmrghh v3, v4, v3
-; CHECK-BE-NEXT:    mfvsrwz r4, f1
+; CHECK-BE-NEXT:    mffprwz r4, f1
 ; CHECK-BE-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-NEXT:    mtvsrd v4, r4
-; CHECK-BE-NEXT:    mfvsrwz r4, f0
+; CHECK-BE-NEXT:    mffprwz r4, f0
 ; CHECK-BE-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-NEXT:    mtvsrd v5, r4
 ; CHECK-BE-NEXT:    vmrghh v4, v4, v5
@@ -720,37 +720,37 @@ entry:
 define i32 @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: test2elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P8-NEXT:    xxsldwi vs1, v2, v2, 3
 ; CHECK-P8-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mfvsrwz r4, f0
-; CHECK-P8-NEXT:    mfvsrwz r3, f1
-; CHECK-P8-NEXT:    mtvsrd f1, r4
-; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    mffprwz r3, f1
+; CHECK-P8-NEXT:    mtfprd f1, r4
+; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    vmrglh v2, v3, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    mfvsrwz r3, f0
+; CHECK-P8-NEXT:    mffprwz r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test2elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    xxswapd v2, vs0
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xxsldwi vs1, v2, v2, 3
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    mtfprd f1, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    xxswapd v2, vs1
 ; CHECK-P9-NEXT:    xxswapd v3, vs0
 ; CHECK-P9-NEXT:    vmrglh v2, v3, v2
@@ -760,16 +760,16 @@ define i32 @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 {
 ;
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mtvsrd f0, r3
+; CHECK-BE-NEXT:    mtfprd f0, r3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    li r3, 0
@@ -797,23 +797,23 @@ define i64 @test4elt_signed(<4 x float> %a) local_unnamed_addr #1 {
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P8-NEXT:    xscvdpsxws f3, f3
-; CHECK-P8-NEXT:    mfvsrwz r3, f1
-; CHECK-P8-NEXT:    mtvsrd f1, r3
-; CHECK-P8-NEXT:    mfvsrwz r3, f0
-; CHECK-P8-NEXT:    mfvsrwz r4, f2
+; CHECK-P8-NEXT:    mffprwz r3, f1
+; CHECK-P8-NEXT:    mtfprd f1, r3
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    mffprwz r4, f2
 ; CHECK-P8-NEXT:    xxswapd v4, vs1
-; CHECK-P8-NEXT:    mtvsrd f0, r3
-; CHECK-P8-NEXT:    mfvsrwz r3, f3
-; CHECK-P8-NEXT:    mtvsrd f2, r4
+; CHECK-P8-NEXT:    mtfprd f0, r3
+; CHECK-P8-NEXT:    mffprwz r3, f3
+; CHECK-P8-NEXT:    mtfprd f2, r4
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    mtvsrd f3, r3
+; CHECK-P8-NEXT:    mtfprd f3, r3
 ; CHECK-P8-NEXT:    xxswapd v3, vs2
 ; CHECK-P8-NEXT:    xxswapd v5, vs3
 ; CHECK-P8-NEXT:    vmrglh v2, v3, v2
 ; CHECK-P8-NEXT:    vmrglh v3, v4, v5
 ; CHECK-P8-NEXT:    vmrglw v2, v3, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test4elt_signed:
@@ -821,26 +821,26 @@ define i64 @test4elt_signed(<4 x float> %a) local_unnamed_addr #1 {
 ; CHECK-P9-NEXT:    xxsldwi vs0, v2, v2, 3
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    xxswapd v3, vs0
 ; CHECK-P9-NEXT:    xxswapd vs0, v2
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    xxswapd v4, vs0
 ; CHECK-P9-NEXT:    xscvspdpn f0, v2
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    vmrglh v3, v4, v3
 ; CHECK-P9-NEXT:    xxswapd v4, vs0
 ; CHECK-P9-NEXT:    xxsldwi vs0, v2, v2, 1
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    xxswapd v2, vs0
 ; CHECK-P9-NEXT:    vmrglh v2, v4, v2
 ; CHECK-P9-NEXT:    vmrglw v2, v2, v3
@@ -852,25 +852,25 @@ define i64 @test4elt_signed(<4 x float> %a) local_unnamed_addr #1 {
 ; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 3
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, v2
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    xscvspdpn f0, v2
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    vmrghh v3, v4, v3
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 1
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
 ; CHECK-BE-NEXT:    vmrghh v2, v4, v2
@@ -906,34 +906,34 @@ define <8 x i16> @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed
 ; CHECK-P8-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P8-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P8-NEXT:    xscvdpsxws f5, f5
-; CHECK-P8-NEXT:    mfvsrwz r4, f4
-; CHECK-P8-NEXT:    mfvsrwz r6, f1
-; CHECK-P8-NEXT:    mfvsrwz r5, f0
-; CHECK-P8-NEXT:    mtvsrd f1, r6
-; CHECK-P8-NEXT:    mtvsrd f0, r5
+; CHECK-P8-NEXT:    mffprwz r4, f4
+; CHECK-P8-NEXT:    mffprwz r6, f1
+; CHECK-P8-NEXT:    mffprwz r5, f0
+; CHECK-P8-NEXT:    mtfprd f1, r6
+; CHECK-P8-NEXT:    mtfprd f0, r5
 ; CHECK-P8-NEXT:    xxswapd v4, vs1
 ; CHECK-P8-NEXT:    xxsldwi vs1, v2, v2, 1
 ; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    xscvspdpn f0, v2
-; CHECK-P8-NEXT:    mtvsrd f4, r4
+; CHECK-P8-NEXT:    mtfprd f4, r4
 ; CHECK-P8-NEXT:    xscvspdpn f1, vs1
-; CHECK-P8-NEXT:    mfvsrwz r4, f2
+; CHECK-P8-NEXT:    mffprwz r4, f2
 ; CHECK-P8-NEXT:    xxswapd v1, vs4
 ; CHECK-P8-NEXT:    vmrglh v2, v4, v3
-; CHECK-P8-NEXT:    mtvsrd f2, r4
+; CHECK-P8-NEXT:    mtfprd f2, r4
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mfvsrwz r4, f5
+; CHECK-P8-NEXT:    mffprwz r4, f5
 ; CHECK-P8-NEXT:    xxswapd v5, vs2
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mfvsrwz r3, f0
-; CHECK-P8-NEXT:    mtvsrd f0, r3
-; CHECK-P8-NEXT:    mfvsrwz r3, f1
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    mtfprd f0, r3
+; CHECK-P8-NEXT:    mffprwz r3, f1
 ; CHECK-P8-NEXT:    xxswapd v3, vs0
-; CHECK-P8-NEXT:    mtvsrd f1, r3
-; CHECK-P8-NEXT:    mfvsrwz r3, f3
-; CHECK-P8-NEXT:    mtvsrd f3, r4
+; CHECK-P8-NEXT:    mtfprd f1, r3
+; CHECK-P8-NEXT:    mffprwz r3, f3
+; CHECK-P8-NEXT:    mtfprd f3, r4
 ; CHECK-P8-NEXT:    xxswapd v4, vs1
-; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    xxswapd v6, vs3
 ; CHECK-P8-NEXT:    xxswapd v0, vs0
 ; CHECK-P8-NEXT:    vmrglh v3, v3, v4
@@ -951,24 +951,24 @@ define <8 x i16> @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P9-NEXT:    lxv vs0, 16(r3)
-; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtfprd f2, r3
 ; CHECK-P9-NEXT:    xxswapd v2, vs2
 ; CHECK-P9-NEXT:    xxswapd vs2, vs1
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtfprd f2, r3
 ; CHECK-P9-NEXT:    xxswapd v3, vs2
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs1
 ; CHECK-P9-NEXT:    xxsldwi vs1, vs1, vs1, 1
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtfprd f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    mtfprd f1, r3
 ; CHECK-P9-NEXT:    xxswapd v4, vs1
 ; CHECK-P9-NEXT:    xxsldwi vs1, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
@@ -977,24 +977,24 @@ define <8 x i16> @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed
 ; CHECK-P9-NEXT:    xxswapd v3, vs2
 ; CHECK-P9-NEXT:    vmrglh v3, v3, v4
 ; CHECK-P9-NEXT:    vmrglw v2, v3, v2
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    mtfprd f1, r3
 ; CHECK-P9-NEXT:    xxswapd v3, vs1
 ; CHECK-P9-NEXT:    xxswapd vs1, vs0
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    mtfprd f1, r3
 ; CHECK-P9-NEXT:    xxswapd v4, vs1
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs0
 ; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 1
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    mtfprd f1, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    vmrglh v3, v4, v3
 ; CHECK-P9-NEXT:    xxswapd v4, vs1
 ; CHECK-P9-NEXT:    xxswapd v5, vs0
@@ -1010,13 +1010,13 @@ define <8 x i16> @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
-; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs1
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
@@ -1025,16 +1025,16 @@ define <8 x i16> @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    vmrghh v2, v3, v2
-; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs0
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
@@ -1042,7 +1042,7 @@ define <8 x i16> @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    vmrghw v2, v3, v2
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
@@ -1051,10 +1051,10 @@ define <8 x i16> @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    vmrghh v3, v4, v3
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    mtvsrd v5, r3
 ; CHECK-BE-NEXT:    vmrghh v4, v4, v5
@@ -1107,56 +1107,56 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1
 ; CHECK-P8-NEXT:    xscvdpsxws f6, f6
 ; CHECK-P8-NEXT:    xscvspdpn f12, vs12
 ; CHECK-P8-NEXT:    xscvdpsxws f5, f5
-; CHECK-P8-NEXT:    mfvsrwz r4, f0
+; CHECK-P8-NEXT:    mffprwz r4, f0
 ; CHECK-P8-NEXT:    xscvspdpn f11, vs11
 ; CHECK-P8-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P8-NEXT:    xscvspdpn v2, v2
 ; CHECK-P8-NEXT:    xscvdpsxws f8, f8
-; CHECK-P8-NEXT:    mtvsrd f0, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, f4
+; CHECK-P8-NEXT:    mtfprd f0, r4
+; CHECK-P8-NEXT:    mffprwz r4, f4
 ; CHECK-P8-NEXT:    xscvdpsxws f7, f7
-; CHECK-P8-NEXT:    mfvsrwz r6, f2
+; CHECK-P8-NEXT:    mffprwz r6, f2
 ; CHECK-P8-NEXT:    xscvspdpn f13, vs13
 ; CHECK-P8-NEXT:    xscvspdpn v3, v3
 ; CHECK-P8-NEXT:    xscvdpsxws f10, f10
-; CHECK-P8-NEXT:    mtvsrd f4, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, f1
+; CHECK-P8-NEXT:    mtfprd f4, r4
+; CHECK-P8-NEXT:    mffprwz r4, f1
 ; CHECK-P8-NEXT:    xscvdpsxws f9, f9
-; CHECK-P8-NEXT:    mtvsrd f2, r6
-; CHECK-P8-NEXT:    mfvsrwz r6, f6
+; CHECK-P8-NEXT:    mtfprd f2, r6
+; CHECK-P8-NEXT:    mffprwz r6, f6
 ; CHECK-P8-NEXT:    xscvdpsxws f12, f12
-; CHECK-P8-NEXT:    mtvsrd f1, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, f5
+; CHECK-P8-NEXT:    mtfprd f1, r4
+; CHECK-P8-NEXT:    mffprwz r4, f5
 ; CHECK-P8-NEXT:    xscvdpsxws f11, f11
 ; CHECK-P8-NEXT:    xxswapd v4, vs1
-; CHECK-P8-NEXT:    mtvsrd f6, r6
-; CHECK-P8-NEXT:    mfvsrwz r6, f3
+; CHECK-P8-NEXT:    mtfprd f6, r6
+; CHECK-P8-NEXT:    mffprwz r6, f3
 ; CHECK-P8-NEXT:    xscvdpsxws v2, v2
 ; CHECK-P8-NEXT:    xxswapd v9, vs6
-; CHECK-P8-NEXT:    mtvsrd f5, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, f8
-; CHECK-P8-NEXT:    mtvsrd f3, r6
+; CHECK-P8-NEXT:    mtfprd f5, r4
+; CHECK-P8-NEXT:    mffprwz r4, f8
+; CHECK-P8-NEXT:    mtfprd f3, r6
 ; CHECK-P8-NEXT:    xxswapd v0, vs5
-; CHECK-P8-NEXT:    mfvsrwz r6, f7
+; CHECK-P8-NEXT:    mffprwz r6, f7
 ; CHECK-P8-NEXT:    xscvdpsxws f13, f13
 ; CHECK-P8-NEXT:    xxswapd v5, vs3
 ; CHECK-P8-NEXT:    xscvdpsxws v3, v3
-; CHECK-P8-NEXT:    mtvsrd f8, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, f10
-; CHECK-P8-NEXT:    mtvsrd f7, r6
-; CHECK-P8-NEXT:    mfvsrwz r6, f9
-; CHECK-P8-NEXT:    mtvsrd f10, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, f12
-; CHECK-P8-NEXT:    mtvsrd f9, r6
+; CHECK-P8-NEXT:    mtfprd f8, r4
+; CHECK-P8-NEXT:    mffprwz r4, f10
+; CHECK-P8-NEXT:    mtfprd f7, r6
+; CHECK-P8-NEXT:    mffprwz r6, f9
+; CHECK-P8-NEXT:    mtfprd f10, r4
+; CHECK-P8-NEXT:    mffprwz r4, f12
+; CHECK-P8-NEXT:    mtfprd f9, r6
 ; CHECK-P8-NEXT:    xxswapd v6, vs10
-; CHECK-P8-NEXT:    mfvsrwz r6, f11
-; CHECK-P8-NEXT:    mtvsrd f12, r4
+; CHECK-P8-NEXT:    mffprwz r6, f11
+; CHECK-P8-NEXT:    mtfprd f12, r4
 ; CHECK-P8-NEXT:    xxswapd v1, vs9
 ; CHECK-P8-NEXT:    mfvsrwz r4, v2
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    mtvsrd f11, r6
-; CHECK-P8-NEXT:    mfvsrwz r6, f13
-; CHECK-P8-NEXT:    mtvsrd f0, r4
+; CHECK-P8-NEXT:    mtfprd f11, r6
+; CHECK-P8-NEXT:    mffprwz r6, f13
+; CHECK-P8-NEXT:    mtfprd f0, r4
 ; CHECK-P8-NEXT:    xxswapd v7, vs11
 ; CHECK-P8-NEXT:    mfvsrwz r4, v3
 ; CHECK-P8-NEXT:    vmrglh v3, v5, v4
@@ -1164,8 +1164,8 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1
 ; CHECK-P8-NEXT:    vmrglh v2, v2, v0
 ; CHECK-P8-NEXT:    xxswapd v5, vs8
 ; CHECK-P8-NEXT:    xxswapd v0, vs2
-; CHECK-P8-NEXT:    mtvsrd f13, r6
-; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    mtfprd f13, r6
+; CHECK-P8-NEXT:    mtfprd f1, r4
 ; CHECK-P8-NEXT:    xxswapd v8, vs0
 ; CHECK-P8-NEXT:    vmrglh v4, v5, v4
 ; CHECK-P8-NEXT:    vmrglh v5, v0, v1
@@ -1210,14 +1210,14 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f4, f4
 ; CHECK-P9-NEXT:    xscvdpsxws f6, f6
-; CHECK-P9-NEXT:    mfvsrwz r5, f5
+; CHECK-P9-NEXT:    mffprwz r5, f5
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    xscvdpsxws f7, f7
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    mtvsrd f5, r5
-; CHECK-P9-NEXT:    mfvsrwz r5, f8
-; CHECK-P9-NEXT:    mtvsrd f8, r5
-; CHECK-P9-NEXT:    mfvsrwz r5, f2
+; CHECK-P9-NEXT:    mtfprd f5, r5
+; CHECK-P9-NEXT:    mffprwz r5, f8
+; CHECK-P9-NEXT:    mtfprd f8, r5
+; CHECK-P9-NEXT:    mffprwz r5, f2
 ; CHECK-P9-NEXT:    lxv vs0, 32(r4)
 ; CHECK-P9-NEXT:    xxsldwi vs9, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xxswapd vs10, vs0
@@ -1225,40 +1225,40 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1
 ; CHECK-P9-NEXT:    xscvspdpn f10, vs10
 ; CHECK-P9-NEXT:    xscvdpsxws f9, f9
 ; CHECK-P9-NEXT:    xscvdpsxws f10, f10
-; CHECK-P9-NEXT:    mtvsrd f2, r5
-; CHECK-P9-NEXT:    mfvsrwz r5, f4
-; CHECK-P9-NEXT:    mtvsrd f4, r5
-; CHECK-P9-NEXT:    mfvsrwz r5, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r5
-; CHECK-P9-NEXT:    mfvsrwz r5, f6
+; CHECK-P9-NEXT:    mtfprd f2, r5
+; CHECK-P9-NEXT:    mffprwz r5, f4
+; CHECK-P9-NEXT:    mtfprd f4, r5
+; CHECK-P9-NEXT:    mffprwz r5, f1
+; CHECK-P9-NEXT:    mtfprd f1, r5
+; CHECK-P9-NEXT:    mffprwz r5, f6
 ; CHECK-P9-NEXT:    xxswapd v2, vs2
 ; CHECK-P9-NEXT:    xxswapd v3, vs4
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs0
 ; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 1
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    mtvsrd f6, r5
-; CHECK-P9-NEXT:    mfvsrwz r5, f7
+; CHECK-P9-NEXT:    mtfprd f6, r5
+; CHECK-P9-NEXT:    mffprwz r5, f7
 ; CHECK-P9-NEXT:    xxswapd v4, vs1
 ; CHECK-P9-NEXT:    lxv vs1, 48(r4)
 ; CHECK-P9-NEXT:    vmrglh v2, v3, v2
 ; CHECK-P9-NEXT:    xxswapd v3, vs5
-; CHECK-P9-NEXT:    mtvsrd f7, r5
-; CHECK-P9-NEXT:    mfvsrwz r5, f3
+; CHECK-P9-NEXT:    mtfprd f7, r5
+; CHECK-P9-NEXT:    mffprwz r5, f3
 ; CHECK-P9-NEXT:    vmrglh v3, v3, v4
 ; CHECK-P9-NEXT:    xxswapd v4, vs6
 ; CHECK-P9-NEXT:    xxswapd v5, vs7
-; CHECK-P9-NEXT:    mtvsrd f3, r5
+; CHECK-P9-NEXT:    mtfprd f3, r5
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P9-NEXT:    xxswapd v0, vs3
 ; CHECK-P9-NEXT:    vmrglh v4, v5, v4
 ; CHECK-P9-NEXT:    xxswapd v5, vs8
 ; CHECK-P9-NEXT:    vmrglh v5, v5, v0
-; CHECK-P9-NEXT:    mfvsrwz r4, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r4
-; CHECK-P9-NEXT:    mfvsrwz r4, f0
+; CHECK-P9-NEXT:    mffprwz r4, f2
+; CHECK-P9-NEXT:    mtfprd f2, r4
+; CHECK-P9-NEXT:    mffprwz r4, f0
 ; CHECK-P9-NEXT:    vmrglw v2, v3, v2
-; CHECK-P9-NEXT:    mtvsrd f0, r4
+; CHECK-P9-NEXT:    mtfprd f0, r4
 ; CHECK-P9-NEXT:    vmrglw v3, v5, v4
 ; CHECK-P9-NEXT:    xxswapd v4, vs2
 ; CHECK-P9-NEXT:    xxmrgld vs2, v3, v2
@@ -1266,36 +1266,36 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1
 ; CHECK-P9-NEXT:    xxsldwi vs0, vs1, vs1, 3
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r4, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r4
+; CHECK-P9-NEXT:    mffprwz r4, f0
+; CHECK-P9-NEXT:    mtfprd f0, r4
 ; CHECK-P9-NEXT:    xxswapd v3, vs0
 ; CHECK-P9-NEXT:    xxswapd vs0, vs1
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r4, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r4
+; CHECK-P9-NEXT:    mffprwz r4, f0
+; CHECK-P9-NEXT:    mtfprd f0, r4
 ; CHECK-P9-NEXT:    vmrglh v2, v4, v2
 ; CHECK-P9-NEXT:    xxswapd v4, vs0
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r4, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r4
+; CHECK-P9-NEXT:    mffprwz r4, f0
+; CHECK-P9-NEXT:    mtfprd f0, r4
 ; CHECK-P9-NEXT:    vmrglh v3, v4, v3
 ; CHECK-P9-NEXT:    xxswapd v4, vs0
 ; CHECK-P9-NEXT:    xxsldwi vs0, vs1, vs1, 1
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r5, f9
-; CHECK-P9-NEXT:    mtvsrd f9, r5
-; CHECK-P9-NEXT:    mfvsrwz r5, f10
-; CHECK-P9-NEXT:    mtvsrd f10, r5
+; CHECK-P9-NEXT:    mffprwz r5, f9
+; CHECK-P9-NEXT:    mtfprd f9, r5
+; CHECK-P9-NEXT:    mffprwz r5, f10
+; CHECK-P9-NEXT:    mtfprd f10, r5
 ; CHECK-P9-NEXT:    xxswapd v0, vs9
 ; CHECK-P9-NEXT:    xxswapd v1, vs10
 ; CHECK-P9-NEXT:    vmrglh v0, v1, v0
 ; CHECK-P9-NEXT:    vmrglw v2, v2, v0
 ; CHECK-P9-NEXT:    stxv vs2, 0(r3)
-; CHECK-P9-NEXT:    mfvsrwz r4, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r4
+; CHECK-P9-NEXT:    mffprwz r4, f0
+; CHECK-P9-NEXT:    mtfprd f0, r4
 ; CHECK-P9-NEXT:    xxswapd v5, vs0
 ; CHECK-P9-NEXT:    vmrglh v4, v4, v5
 ; CHECK-P9-NEXT:    vmrglw v3, v4, v3
@@ -1312,14 +1312,14 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    mfvsrwz r5, f2
+; CHECK-BE-NEXT:    mffprwz r5, f2
 ; CHECK-BE-NEXT:    xscvspdpn f4, vs1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    mtvsrd v2, r5
-; CHECK-BE-NEXT:    mfvsrwz r5, f3
+; CHECK-BE-NEXT:    mffprwz r5, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f4
 ; CHECK-BE-NEXT:    lxv vs0, 0(r4)
 ; CHECK-BE-NEXT:    xxsldwi vs2, vs0, vs0, 3
@@ -1327,22 +1327,22 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1
 ; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    mtvsrd v3, r5
 ; CHECK-BE-NEXT:    vmrghh v2, v3, v2
-; CHECK-BE-NEXT:    mfvsrwz r5, f3
+; CHECK-BE-NEXT:    mffprwz r5, f3
 ; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    mtvsrd v3, r5
-; CHECK-BE-NEXT:    mfvsrwz r5, f1
+; CHECK-BE-NEXT:    mffprwz r5, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs0
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    mtvsrd v4, r5
-; CHECK-BE-NEXT:    mfvsrwz r5, f2
+; CHECK-BE-NEXT:    mffprwz r5, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    vmrghh v3, v3, v4
 ; CHECK-BE-NEXT:    mtvsrd v4, r5
 ; CHECK-BE-NEXT:    vmrghw v2, v3, v2
-; CHECK-BE-NEXT:    mfvsrwz r5, f1
+; CHECK-BE-NEXT:    mffprwz r5, f1
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
 ; CHECK-BE-NEXT:    sldi r5, r5, 48
@@ -1351,11 +1351,11 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1
 ; CHECK-BE-NEXT:    mtvsrd v5, r5
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    vmrghh v4, v5, v4
-; CHECK-BE-NEXT:    mfvsrwz r5, f1
+; CHECK-BE-NEXT:    mffprwz r5, f1
 ; CHECK-BE-NEXT:    lxv vs1, 48(r4)
 ; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    mtvsrd v5, r5
-; CHECK-BE-NEXT:    mfvsrwz r5, f0
+; CHECK-BE-NEXT:    mffprwz r5, f0
 ; CHECK-BE-NEXT:    lxv vs0, 32(r4)
 ; CHECK-BE-NEXT:    xscvspdpn f5, vs1
 ; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
@@ -1371,26 +1371,26 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    vmrghw v3, v5, v4
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    mfvsrwz r4, f5
+; CHECK-BE-NEXT:    mffprwz r4, f5
 ; CHECK-BE-NEXT:    xxmrghd vs4, v3, v2
 ; CHECK-BE-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-NEXT:    mtvsrd v2, r4
-; CHECK-BE-NEXT:    mfvsrwz r4, f2
+; CHECK-BE-NEXT:    mffprwz r4, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    stxv vs4, 0(r3)
 ; CHECK-BE-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-NEXT:    mtvsrd v3, r4
-; CHECK-BE-NEXT:    mfvsrwz r4, f3
+; CHECK-BE-NEXT:    mffprwz r4, f3
 ; CHECK-BE-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-NEXT:    mtvsrd v4, r4
-; CHECK-BE-NEXT:    mfvsrwz r4, f1
+; CHECK-BE-NEXT:    mffprwz r4, f1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
 ; CHECK-BE-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    vmrghh v3, v4, v3
 ; CHECK-BE-NEXT:    mtvsrd v4, r4
-; CHECK-BE-NEXT:    mfvsrwz r4, f1
+; CHECK-BE-NEXT:    mffprwz r4, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs0
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
@@ -1398,7 +1398,7 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1
 ; CHECK-BE-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-NEXT:    vmrghw v2, v2, v3
 ; CHECK-BE-NEXT:    mtvsrd v3, r4
-; CHECK-BE-NEXT:    mfvsrwz r4, f1
+; CHECK-BE-NEXT:    mffprwz r4, f1
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
 ; CHECK-BE-NEXT:    sldi r4, r4, 48
@@ -1407,10 +1407,10 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1
 ; CHECK-BE-NEXT:    mtvsrd v4, r4
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    vmrghh v3, v4, v3
-; CHECK-BE-NEXT:    mfvsrwz r4, f1
+; CHECK-BE-NEXT:    mffprwz r4, f1
 ; CHECK-BE-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-NEXT:    mtvsrd v4, r4
-; CHECK-BE-NEXT:    mfvsrwz r4, f0
+; CHECK-BE-NEXT:    mffprwz r4, f0
 ; CHECK-BE-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-NEXT:    mtvsrd v5, r4
 ; CHECK-BE-NEXT:    vmrghh v4, v4, v5
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll
index d355dcd08b0f..fe87bea1c138 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll
@@ -12,7 +12,7 @@
 define <2 x i64> @test2elt(i64 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: test2elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xxmrglw vs0, v2, v2
 ; CHECK-P8-NEXT:    xvcvspdp vs0, vs0
@@ -21,7 +21,7 @@ define <2 x i64> @test2elt(i64 %a.coerce) local_unnamed_addr #0 {
 ;
 ; CHECK-P9-LABEL: test2elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    xxswapd v2, vs0
 ; CHECK-P9-NEXT:    xxmrglw vs0, v2, v2
 ; CHECK-P9-NEXT:    xvcvspdp vs0, vs0
@@ -30,7 +30,7 @@ define <2 x i64> @test2elt(i64 %a.coerce) local_unnamed_addr #0 {
 ;
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mtvsrd f0, r3
+; CHECK-BE-NEXT:    mtfprd f0, r3
 ; CHECK-BE-NEXT:    xxmrghw vs0, vs0, vs0
 ; CHECK-BE-NEXT:    xvcvspdp vs0, vs0
 ; CHECK-BE-NEXT:    xvcvdpuxds v2, vs0
@@ -311,7 +311,7 @@ entry:
 define <2 x i64> @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: test2elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xxmrglw vs0, v2, v2
 ; CHECK-P8-NEXT:    xvcvspdp vs0, vs0
@@ -320,7 +320,7 @@ define <2 x i64> @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 {
 ;
 ; CHECK-P9-LABEL: test2elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    xxswapd v2, vs0
 ; CHECK-P9-NEXT:    xxmrglw vs0, v2, v2
 ; CHECK-P9-NEXT:    xvcvspdp vs0, vs0
@@ -329,7 +329,7 @@ define <2 x i64> @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 {
 ;
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mtvsrd f0, r3
+; CHECK-BE-NEXT:    mtfprd f0, r3
 ; CHECK-BE-NEXT:    xxmrghw vs0, vs0, vs0
 ; CHECK-BE-NEXT:    xvcvspdp vs0, vs0
 ; CHECK-BE-NEXT:    xvcvdpuxds v2, vs0
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll
index ae1b70d0be97..54c009ccf00f 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll
@@ -12,22 +12,22 @@
 define i16 @test2elt(i64 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: test2elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P8-NEXT:    xxsldwi vs1, v2, v2, 3
 ; CHECK-P8-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mfvsrwz r4, f0
-; CHECK-P8-NEXT:    mfvsrwz r3, f1
-; CHECK-P8-NEXT:    mtvsrd f1, r4
-; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    mffprwz r3, f1
+; CHECK-P8-NEXT:    mtfprd f1, r4
+; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    vmrglb v2, v3, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    clrldi r3, r3, 48
 ; CHECK-P8-NEXT:    sth r3, -2(r1)
 ; CHECK-P8-NEXT:    lhz r3, -2(r1)
@@ -35,17 +35,17 @@ define i16 @test2elt(i64 %a.coerce) local_unnamed_addr #0 {
 ;
 ; CHECK-P9-LABEL: test2elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    xxswapd v2, vs0
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xxsldwi vs1, v2, v2, 3
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    mtfprd f1, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    xxswapd v2, vs1
 ; CHECK-P9-NEXT:    xxswapd v3, vs0
 ; CHECK-P9-NEXT:    vmrglb v2, v3, v2
@@ -57,16 +57,16 @@ define i16 @test2elt(i64 %a.coerce) local_unnamed_addr #0 {
 ;
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mtvsrd f0, r3
+; CHECK-BE-NEXT:    mtfprd f0, r3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    addi r3, r1, -2
@@ -96,23 +96,23 @@ define i32 @test4elt(<4 x float> %a) local_unnamed_addr #1 {
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P8-NEXT:    xscvdpsxws f3, f3
-; CHECK-P8-NEXT:    mfvsrwz r3, f1
-; CHECK-P8-NEXT:    mtvsrd f1, r3
-; CHECK-P8-NEXT:    mfvsrwz r3, f0
-; CHECK-P8-NEXT:    mfvsrwz r4, f2
+; CHECK-P8-NEXT:    mffprwz r3, f1
+; CHECK-P8-NEXT:    mtfprd f1, r3
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    mffprwz r4, f2
 ; CHECK-P8-NEXT:    xxswapd v4, vs1
-; CHECK-P8-NEXT:    mtvsrd f0, r3
-; CHECK-P8-NEXT:    mfvsrwz r3, f3
-; CHECK-P8-NEXT:    mtvsrd f2, r4
+; CHECK-P8-NEXT:    mtfprd f0, r3
+; CHECK-P8-NEXT:    mffprwz r3, f3
+; CHECK-P8-NEXT:    mtfprd f2, r4
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    mtvsrd f3, r3
+; CHECK-P8-NEXT:    mtfprd f3, r3
 ; CHECK-P8-NEXT:    xxswapd v3, vs2
 ; CHECK-P8-NEXT:    xxswapd v5, vs3
 ; CHECK-P8-NEXT:    vmrglb v2, v3, v2
 ; CHECK-P8-NEXT:    vmrglb v3, v4, v5
 ; CHECK-P8-NEXT:    vmrglh v2, v3, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    mfvsrwz r3, f0
+; CHECK-P8-NEXT:    mffprwz r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test4elt:
@@ -120,26 +120,26 @@ define i32 @test4elt(<4 x float> %a) local_unnamed_addr #1 {
 ; CHECK-P9-NEXT:    xxsldwi vs0, v2, v2, 3
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    xxswapd v3, vs0
 ; CHECK-P9-NEXT:    xxswapd vs0, v2
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    xxswapd v4, vs0
 ; CHECK-P9-NEXT:    xscvspdpn f0, v2
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    vmrglb v3, v4, v3
 ; CHECK-P9-NEXT:    xxswapd v4, vs0
 ; CHECK-P9-NEXT:    xxsldwi vs0, v2, v2, 1
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    li r3, 0
 ; CHECK-P9-NEXT:    xxswapd v2, vs0
 ; CHECK-P9-NEXT:    vmrglb v2, v4, v2
@@ -152,25 +152,25 @@ define i32 @test4elt(<4 x float> %a) local_unnamed_addr #1 {
 ; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 3
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, v2
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    xscvspdpn f0, v2
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    vmrghb v3, v4, v3
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 1
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
 ; CHECK-BE-NEXT:    li r3, 0
@@ -207,34 +207,34 @@ define i64 @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-P8-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P8-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P8-NEXT:    xscvdpsxws f5, f5
-; CHECK-P8-NEXT:    mfvsrwz r4, f4
-; CHECK-P8-NEXT:    mfvsrwz r6, f1
-; CHECK-P8-NEXT:    mfvsrwz r5, f0
-; CHECK-P8-NEXT:    mtvsrd f1, r6
-; CHECK-P8-NEXT:    mtvsrd f0, r5
+; CHECK-P8-NEXT:    mffprwz r4, f4
+; CHECK-P8-NEXT:    mffprwz r6, f1
+; CHECK-P8-NEXT:    mffprwz r5, f0
+; CHECK-P8-NEXT:    mtfprd f1, r6
+; CHECK-P8-NEXT:    mtfprd f0, r5
 ; CHECK-P8-NEXT:    xxswapd v4, vs1
 ; CHECK-P8-NEXT:    xxsldwi vs1, v2, v2, 1
 ; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    xscvspdpn f0, v2
-; CHECK-P8-NEXT:    mtvsrd f4, r4
+; CHECK-P8-NEXT:    mtfprd f4, r4
 ; CHECK-P8-NEXT:    xscvspdpn f1, vs1
-; CHECK-P8-NEXT:    mfvsrwz r4, f2
+; CHECK-P8-NEXT:    mffprwz r4, f2
 ; CHECK-P8-NEXT:    xxswapd v1, vs4
 ; CHECK-P8-NEXT:    vmrglb v2, v4, v3
-; CHECK-P8-NEXT:    mtvsrd f2, r4
+; CHECK-P8-NEXT:    mtfprd f2, r4
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mfvsrwz r4, f5
+; CHECK-P8-NEXT:    mffprwz r4, f5
 ; CHECK-P8-NEXT:    xxswapd v5, vs2
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mfvsrwz r3, f0
-; CHECK-P8-NEXT:    mtvsrd f0, r3
-; CHECK-P8-NEXT:    mfvsrwz r3, f1
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    mtfprd f0, r3
+; CHECK-P8-NEXT:    mffprwz r3, f1
 ; CHECK-P8-NEXT:    xxswapd v3, vs0
-; CHECK-P8-NEXT:    mtvsrd f1, r3
-; CHECK-P8-NEXT:    mfvsrwz r3, f3
-; CHECK-P8-NEXT:    mtvsrd f3, r4
+; CHECK-P8-NEXT:    mtfprd f1, r3
+; CHECK-P8-NEXT:    mffprwz r3, f3
+; CHECK-P8-NEXT:    mtfprd f3, r4
 ; CHECK-P8-NEXT:    xxswapd v4, vs1
-; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    xxswapd v6, vs3
 ; CHECK-P8-NEXT:    xxswapd v0, vs0
 ; CHECK-P8-NEXT:    vmrglb v3, v3, v4
@@ -244,7 +244,7 @@ define i64 @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-P8-NEXT:    vmrglh v3, v5, v4
 ; CHECK-P8-NEXT:    vmrglw v2, v3, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test8elt:
@@ -254,24 +254,24 @@ define i64 @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P9-NEXT:    lxv vs0, 16(r3)
-; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtfprd f2, r3
 ; CHECK-P9-NEXT:    xxswapd v2, vs2
 ; CHECK-P9-NEXT:    xxswapd vs2, vs1
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtfprd f2, r3
 ; CHECK-P9-NEXT:    xxswapd v3, vs2
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs1
 ; CHECK-P9-NEXT:    xxsldwi vs1, vs1, vs1, 1
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtfprd f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    mtfprd f1, r3
 ; CHECK-P9-NEXT:    xxswapd v4, vs1
 ; CHECK-P9-NEXT:    xxsldwi vs1, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
@@ -280,24 +280,24 @@ define i64 @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-P9-NEXT:    xxswapd v3, vs2
 ; CHECK-P9-NEXT:    vmrglb v3, v3, v4
 ; CHECK-P9-NEXT:    vmrglh v2, v3, v2
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    mtfprd f1, r3
 ; CHECK-P9-NEXT:    xxswapd v3, vs1
 ; CHECK-P9-NEXT:    xxswapd vs1, vs0
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    mtfprd f1, r3
 ; CHECK-P9-NEXT:    xxswapd v4, vs1
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs0
 ; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 1
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    mtfprd f1, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    vmrglb v3, v4, v3
 ; CHECK-P9-NEXT:    xxswapd v4, vs1
 ; CHECK-P9-NEXT:    xxswapd v5, vs0
@@ -314,13 +314,13 @@ define i64 @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
-; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs1
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
@@ -329,16 +329,16 @@ define i64 @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    vmrghb v2, v3, v2
-; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs0
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
@@ -346,7 +346,7 @@ define i64 @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    vmrghh v2, v3, v2
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
@@ -355,10 +355,10 @@ define i64 @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    vmrghb v3, v4, v3
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v5, r3
 ; CHECK-BE-NEXT:    vmrghb v4, v4, v5
@@ -400,47 +400,47 @@ define <16 x i8> @test16elt(<16 x float>* nocapture readonly) local_unnamed_addr
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    xscvspdpn f6, vs6
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mfvsrwz r4, f2
+; CHECK-P8-NEXT:    mffprwz r4, f2
 ; CHECK-P8-NEXT:    xscvspdpn f7, vs7
-; CHECK-P8-NEXT:    mtvsrd f2, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, f4
+; CHECK-P8-NEXT:    mtfprd f2, r4
+; CHECK-P8-NEXT:    mffprwz r4, f4
 ; CHECK-P8-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P8-NEXT:    xscvspdpn f8, vs8
-; CHECK-P8-NEXT:    mtvsrd f4, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, f0
+; CHECK-P8-NEXT:    mtfprd f4, r4
+; CHECK-P8-NEXT:    mffprwz r4, f0
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f5
 ; CHECK-P8-NEXT:    xxswapd v0, vs4
 ; CHECK-P8-NEXT:    xscvspdpn f9, vs9
-; CHECK-P8-NEXT:    mtvsrd f5, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, f1
+; CHECK-P8-NEXT:    mtfprd f5, r4
+; CHECK-P8-NEXT:    mffprwz r4, f1
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f6
 ; CHECK-P8-NEXT:    xxswapd v3, vs5
-; CHECK-P8-NEXT:    mtvsrd f6, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, f3
+; CHECK-P8-NEXT:    mtfprd f6, r4
+; CHECK-P8-NEXT:    mffprwz r4, f3
 ; CHECK-P8-NEXT:    xscvdpsxws f3, f7
 ; CHECK-P8-NEXT:    xxswapd v4, vs6
-; CHECK-P8-NEXT:    mtvsrd f7, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, f0
+; CHECK-P8-NEXT:    mtfprd f7, r4
+; CHECK-P8-NEXT:    mffprwz r4, f0
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f8
 ; CHECK-P8-NEXT:    xxswapd v5, vs7
-; CHECK-P8-NEXT:    mtvsrd f8, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, f1
+; CHECK-P8-NEXT:    mtfprd f8, r4
+; CHECK-P8-NEXT:    mffprwz r4, f1
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f9
 ; CHECK-P8-NEXT:    xxswapd v1, vs8
-; CHECK-P8-NEXT:    mtvsrd f9, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, f3
+; CHECK-P8-NEXT:    mtfprd f9, r4
+; CHECK-P8-NEXT:    mffprwz r4, f3
 ; CHECK-P8-NEXT:    vmrglb v3, v4, v3
 ; CHECK-P8-NEXT:    xxswapd v4, vs2
-; CHECK-P8-NEXT:    mtvsrd f3, r4
+; CHECK-P8-NEXT:    mtfprd f3, r4
 ; CHECK-P8-NEXT:    xxswapd v6, vs9
-; CHECK-P8-NEXT:    mfvsrwz r4, f0
+; CHECK-P8-NEXT:    mffprwz r4, f0
 ; CHECK-P8-NEXT:    xscvspdpn f0, v2
 ; CHECK-P8-NEXT:    xxswapd v7, vs3
-; CHECK-P8-NEXT:    mtvsrd f5, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, f1
+; CHECK-P8-NEXT:    mtfprd f5, r4
+; CHECK-P8-NEXT:    mffprwz r4, f1
 ; CHECK-P8-NEXT:    vmrglb v4, v4, v5
 ; CHECK-P8-NEXT:    xxswapd v5, vs5
-; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    mtfprd f1, r4
 ; CHECK-P8-NEXT:    li r4, 48
 ; CHECK-P8-NEXT:    lvx v9, r3, r4
 ; CHECK-P8-NEXT:    vmrglb v1, v6, v1
@@ -460,23 +460,23 @@ define <16 x i8> @test16elt(<16 x float>* nocapture readonly) local_unnamed_addr
 ; CHECK-P8-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P8-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P8-NEXT:    xscvdpsxws f5, f5
-; CHECK-P8-NEXT:    mfvsrwz r3, f0
-; CHECK-P8-NEXT:    mfvsrwz r4, f4
-; CHECK-P8-NEXT:    mtvsrd f0, r3
-; CHECK-P8-NEXT:    mtvsrd f4, r4
-; CHECK-P8-NEXT:    mfvsrwz r3, f1
-; CHECK-P8-NEXT:    mfvsrwz r4, f2
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    mffprwz r4, f4
+; CHECK-P8-NEXT:    mtfprd f0, r3
+; CHECK-P8-NEXT:    mtfprd f4, r4
+; CHECK-P8-NEXT:    mffprwz r3, f1
+; CHECK-P8-NEXT:    mffprwz r4, f2
 ; CHECK-P8-NEXT:    xxswapd v9, vs4
-; CHECK-P8-NEXT:    mtvsrd f1, r3
-; CHECK-P8-NEXT:    mfvsrwz r3, f3
-; CHECK-P8-NEXT:    mtvsrd f2, r4
+; CHECK-P8-NEXT:    mtfprd f1, r3
+; CHECK-P8-NEXT:    mffprwz r3, f3
+; CHECK-P8-NEXT:    mtfprd f2, r4
 ; CHECK-P8-NEXT:    xxswapd v6, vs1
-; CHECK-P8-NEXT:    mfvsrwz r4, f5
+; CHECK-P8-NEXT:    mffprwz r4, f5
 ; CHECK-P8-NEXT:    vmrglb v2, v0, v7
 ; CHECK-P8-NEXT:    xxswapd v0, vs0
-; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    xxswapd v7, vs2
-; CHECK-P8-NEXT:    mtvsrd f3, r4
+; CHECK-P8-NEXT:    mtfprd f3, r4
 ; CHECK-P8-NEXT:    vmrglb v5, v8, v5
 ; CHECK-P8-NEXT:    xxswapd v8, vs0
 ; CHECK-P8-NEXT:    xxswapd v10, vs3
@@ -501,24 +501,24 @@ define <16 x i8> @test16elt(<16 x float>* nocapture readonly) local_unnamed_addr
 ; CHECK-P9-NEXT:    lxv vs0, 48(r3)
 ; CHECK-P9-NEXT:    lxv vs1, 32(r3)
 ; CHECK-P9-NEXT:    lxv vs4, 16(r3)
-; CHECK-P9-NEXT:    mfvsrwz r3, f3
-; CHECK-P9-NEXT:    mtvsrd f3, r3
+; CHECK-P9-NEXT:    mffprwz r3, f3
+; CHECK-P9-NEXT:    mtfprd f3, r3
 ; CHECK-P9-NEXT:    xxswapd v2, vs3
 ; CHECK-P9-NEXT:    xxswapd vs3, vs2
 ; CHECK-P9-NEXT:    xscvspdpn f3, vs3
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    mfvsrwz r3, f3
-; CHECK-P9-NEXT:    mtvsrd f3, r3
+; CHECK-P9-NEXT:    mffprwz r3, f3
+; CHECK-P9-NEXT:    mtfprd f3, r3
 ; CHECK-P9-NEXT:    xxswapd v3, vs3
 ; CHECK-P9-NEXT:    xscvspdpn f3, vs2
 ; CHECK-P9-NEXT:    xxsldwi vs2, vs2, vs2, 1
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    mfvsrwz r3, f3
-; CHECK-P9-NEXT:    mtvsrd f3, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f3
+; CHECK-P9-NEXT:    mtfprd f3, r3
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtfprd f2, r3
 ; CHECK-P9-NEXT:    xxswapd v4, vs2
 ; CHECK-P9-NEXT:    xxsldwi vs2, vs4, vs4, 3
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
@@ -527,26 +527,26 @@ define <16 x i8> @test16elt(<16 x float>* nocapture readonly) local_unnamed_addr
 ; CHECK-P9-NEXT:    xxswapd v3, vs3
 ; CHECK-P9-NEXT:    vmrglb v3, v3, v4
 ; CHECK-P9-NEXT:    vmrglh v2, v3, v2
-; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtfprd f2, r3
 ; CHECK-P9-NEXT:    xxswapd v3, vs2
 ; CHECK-P9-NEXT:    xxswapd vs2, vs4
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtfprd f2, r3
 ; CHECK-P9-NEXT:    xxswapd v4, vs2
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs4
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtfprd f2, r3
 ; CHECK-P9-NEXT:    vmrglb v3, v4, v3
 ; CHECK-P9-NEXT:    xxswapd v4, vs2
 ; CHECK-P9-NEXT:    xxsldwi vs2, vs4, vs4, 1
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtfprd f2, r3
 ; CHECK-P9-NEXT:    xxswapd v5, vs2
 ; CHECK-P9-NEXT:    xxsldwi vs2, vs1, vs1, 3
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
@@ -554,24 +554,24 @@ define <16 x i8> @test16elt(<16 x float>* nocapture readonly) local_unnamed_addr
 ; CHECK-P9-NEXT:    vmrglb v4, v4, v5
 ; CHECK-P9-NEXT:    vmrglh v3, v4, v3
 ; CHECK-P9-NEXT:    vmrglw v2, v3, v2
-; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtfprd f2, r3
 ; CHECK-P9-NEXT:    xxswapd v3, vs2
 ; CHECK-P9-NEXT:    xxswapd vs2, vs1
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtfprd f2, r3
 ; CHECK-P9-NEXT:    xxswapd v4, vs2
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs1
 ; CHECK-P9-NEXT:    xxsldwi vs1, vs1, vs1, 1
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtfprd f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    mtfprd f1, r3
 ; CHECK-P9-NEXT:    xxswapd v5, vs1
 ; CHECK-P9-NEXT:    xxsldwi vs1, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
@@ -580,24 +580,24 @@ define <16 x i8> @test16elt(<16 x float>* nocapture readonly) local_unnamed_addr
 ; CHECK-P9-NEXT:    xxswapd v4, vs2
 ; CHECK-P9-NEXT:    vmrglb v4, v4, v5
 ; CHECK-P9-NEXT:    vmrglh v3, v4, v3
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    mtfprd f1, r3
 ; CHECK-P9-NEXT:    xxswapd v4, vs1
 ; CHECK-P9-NEXT:    xxswapd vs1, vs0
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    mtfprd f1, r3
 ; CHECK-P9-NEXT:    xxswapd v5, vs1
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs0
 ; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 1
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    mtfprd f1, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    vmrglb v4, v5, v4
 ; CHECK-P9-NEXT:    xxswapd v5, vs1
 ; CHECK-P9-NEXT:    xxswapd v0, vs0
@@ -616,13 +616,13 @@ define <16 x i8> @test16elt(<16 x float>* nocapture readonly) local_unnamed_addr
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    lxv vs2, 32(r3)
-; CHECK-BE-NEXT:    mfvsrwz r3, f4
+; CHECK-BE-NEXT:    mffprwz r3, f4
 ; CHECK-BE-NEXT:    xxswapd vs4, vs3
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvspdpn f4, vs4
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f4, f4
-; CHECK-BE-NEXT:    mfvsrwz r3, f4
+; CHECK-BE-NEXT:    mffprwz r3, f4
 ; CHECK-BE-NEXT:    xscvspdpn f4, vs3
 ; CHECK-BE-NEXT:    xxsldwi vs3, vs3, vs3, 1
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
@@ -631,16 +631,16 @@ define <16 x i8> @test16elt(<16 x float>* nocapture readonly) local_unnamed_addr
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    vmrghb v2, v3, v2
-; CHECK-BE-NEXT:    mfvsrwz r3, f4
+; CHECK-BE-NEXT:    mffprwz r3, f4
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    xxsldwi vs3, vs2, vs2, 3
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs3
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    xxswapd vs3, vs2
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
@@ -648,7 +648,7 @@ define <16 x i8> @test16elt(<16 x float>* nocapture readonly) local_unnamed_addr
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    vmrghh v2, v3, v2
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs2
 ; CHECK-BE-NEXT:    xxsldwi vs2, vs2, vs2, 1
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
@@ -657,16 +657,16 @@ define <16 x i8> @test16elt(<16 x float>* nocapture readonly) local_unnamed_addr
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    vmrghb v3, v4, v3
-; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    mtvsrd v5, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs1
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
@@ -675,7 +675,7 @@ define <16 x i8> @test16elt(<16 x float>* nocapture readonly) local_unnamed_addr
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    vmrghw v2, v3, v2
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
@@ -684,16 +684,16 @@ define <16 x i8> @test16elt(<16 x float>* nocapture readonly) local_unnamed_addr
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    vmrghb v3, v4, v3
-; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    mtvsrd v5, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs0
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
@@ -701,7 +701,7 @@ define <16 x i8> @test16elt(<16 x float>* nocapture readonly) local_unnamed_addr
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    vmrghh v3, v4, v3
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
@@ -710,10 +710,10 @@ define <16 x i8> @test16elt(<16 x float>* nocapture readonly) local_unnamed_addr
 ; CHECK-BE-NEXT:    mtvsrd v5, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    vmrghb v4, v5, v4
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v5, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v0, r3
 ; CHECK-BE-NEXT:    vmrghb v5, v5, v0
@@ -730,22 +730,22 @@ entry:
 define i16 @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: test2elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P8-NEXT:    xxsldwi vs1, v2, v2, 3
 ; CHECK-P8-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mfvsrwz r4, f0
-; CHECK-P8-NEXT:    mfvsrwz r3, f1
-; CHECK-P8-NEXT:    mtvsrd f1, r4
-; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    mffprwz r3, f1
+; CHECK-P8-NEXT:    mtfprd f1, r4
+; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    vmrglb v2, v3, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    clrldi r3, r3, 48
 ; CHECK-P8-NEXT:    sth r3, -2(r1)
 ; CHECK-P8-NEXT:    lhz r3, -2(r1)
@@ -753,17 +753,17 @@ define i16 @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 {
 ;
 ; CHECK-P9-LABEL: test2elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    xxswapd v2, vs0
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xxsldwi vs1, v2, v2, 3
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    mtfprd f1, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    xxswapd v2, vs1
 ; CHECK-P9-NEXT:    xxswapd v3, vs0
 ; CHECK-P9-NEXT:    vmrglb v2, v3, v2
@@ -775,16 +775,16 @@ define i16 @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 {
 ;
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mtvsrd f0, r3
+; CHECK-BE-NEXT:    mtfprd f0, r3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    addi r3, r1, -2
@@ -814,23 +814,23 @@ define i32 @test4elt_signed(<4 x float> %a) local_unnamed_addr #1 {
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P8-NEXT:    xscvdpsxws f3, f3
-; CHECK-P8-NEXT:    mfvsrwz r3, f1
-; CHECK-P8-NEXT:    mtvsrd f1, r3
-; CHECK-P8-NEXT:    mfvsrwz r3, f0
-; CHECK-P8-NEXT:    mfvsrwz r4, f2
+; CHECK-P8-NEXT:    mffprwz r3, f1
+; CHECK-P8-NEXT:    mtfprd f1, r3
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    mffprwz r4, f2
 ; CHECK-P8-NEXT:    xxswapd v4, vs1
-; CHECK-P8-NEXT:    mtvsrd f0, r3
-; CHECK-P8-NEXT:    mfvsrwz r3, f3
-; CHECK-P8-NEXT:    mtvsrd f2, r4
+; CHECK-P8-NEXT:    mtfprd f0, r3
+; CHECK-P8-NEXT:    mffprwz r3, f3
+; CHECK-P8-NEXT:    mtfprd f2, r4
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
-; CHECK-P8-NEXT:    mtvsrd f3, r3
+; CHECK-P8-NEXT:    mtfprd f3, r3
 ; CHECK-P8-NEXT:    xxswapd v3, vs2
 ; CHECK-P8-NEXT:    xxswapd v5, vs3
 ; CHECK-P8-NEXT:    vmrglb v2, v3, v2
 ; CHECK-P8-NEXT:    vmrglb v3, v4, v5
 ; CHECK-P8-NEXT:    vmrglh v2, v3, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    mfvsrwz r3, f0
+; CHECK-P8-NEXT:    mffprwz r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test4elt_signed:
@@ -838,26 +838,26 @@ define i32 @test4elt_signed(<4 x float> %a) local_unnamed_addr #1 {
 ; CHECK-P9-NEXT:    xxsldwi vs0, v2, v2, 3
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    xxswapd v3, vs0
 ; CHECK-P9-NEXT:    xxswapd vs0, v2
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    xxswapd v4, vs0
 ; CHECK-P9-NEXT:    xscvspdpn f0, v2
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    vmrglb v3, v4, v3
 ; CHECK-P9-NEXT:    xxswapd v4, vs0
 ; CHECK-P9-NEXT:    xxsldwi vs0, v2, v2, 1
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    li r3, 0
 ; CHECK-P9-NEXT:    xxswapd v2, vs0
 ; CHECK-P9-NEXT:    vmrglb v2, v4, v2
@@ -870,25 +870,25 @@ define i32 @test4elt_signed(<4 x float> %a) local_unnamed_addr #1 {
 ; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 3
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, v2
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    xscvspdpn f0, v2
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    vmrghb v3, v4, v3
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    xxsldwi vs0, v2, v2, 1
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvspdpn f0, vs0
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
 ; CHECK-BE-NEXT:    li r3, 0
@@ -925,34 +925,34 @@ define i64 @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed_addr
 ; CHECK-P8-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P8-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P8-NEXT:    xscvdpsxws f5, f5
-; CHECK-P8-NEXT:    mfvsrwz r4, f4
-; CHECK-P8-NEXT:    mfvsrwz r6, f1
-; CHECK-P8-NEXT:    mfvsrwz r5, f0
-; CHECK-P8-NEXT:    mtvsrd f1, r6
-; CHECK-P8-NEXT:    mtvsrd f0, r5
+; CHECK-P8-NEXT:    mffprwz r4, f4
+; CHECK-P8-NEXT:    mffprwz r6, f1
+; CHECK-P8-NEXT:    mffprwz r5, f0
+; CHECK-P8-NEXT:    mtfprd f1, r6
+; CHECK-P8-NEXT:    mtfprd f0, r5
 ; CHECK-P8-NEXT:    xxswapd v4, vs1
 ; CHECK-P8-NEXT:    xxsldwi vs1, v2, v2, 1
 ; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    xscvspdpn f0, v2
-; CHECK-P8-NEXT:    mtvsrd f4, r4
+; CHECK-P8-NEXT:    mtfprd f4, r4
 ; CHECK-P8-NEXT:    xscvspdpn f1, vs1
-; CHECK-P8-NEXT:    mfvsrwz r4, f2
+; CHECK-P8-NEXT:    mffprwz r4, f2
 ; CHECK-P8-NEXT:    xxswapd v1, vs4
 ; CHECK-P8-NEXT:    vmrglb v2, v4, v3
-; CHECK-P8-NEXT:    mtvsrd f2, r4
+; CHECK-P8-NEXT:    mtfprd f2, r4
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mfvsrwz r4, f5
+; CHECK-P8-NEXT:    mffprwz r4, f5
 ; CHECK-P8-NEXT:    xxswapd v5, vs2
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mfvsrwz r3, f0
-; CHECK-P8-NEXT:    mtvsrd f0, r3
-; CHECK-P8-NEXT:    mfvsrwz r3, f1
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    mtfprd f0, r3
+; CHECK-P8-NEXT:    mffprwz r3, f1
 ; CHECK-P8-NEXT:    xxswapd v3, vs0
-; CHECK-P8-NEXT:    mtvsrd f1, r3
-; CHECK-P8-NEXT:    mfvsrwz r3, f3
-; CHECK-P8-NEXT:    mtvsrd f3, r4
+; CHECK-P8-NEXT:    mtfprd f1, r3
+; CHECK-P8-NEXT:    mffprwz r3, f3
+; CHECK-P8-NEXT:    mtfprd f3, r4
 ; CHECK-P8-NEXT:    xxswapd v4, vs1
-; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    xxswapd v6, vs3
 ; CHECK-P8-NEXT:    xxswapd v0, vs0
 ; CHECK-P8-NEXT:    vmrglb v3, v3, v4
@@ -962,7 +962,7 @@ define i64 @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed_addr
 ; CHECK-P8-NEXT:    vmrglh v3, v5, v4
 ; CHECK-P8-NEXT:    vmrglw v2, v3, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test8elt_signed:
@@ -972,24 +972,24 @@ define i64 @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed_addr
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P9-NEXT:    lxv vs0, 16(r3)
-; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtfprd f2, r3
 ; CHECK-P9-NEXT:    xxswapd v2, vs2
 ; CHECK-P9-NEXT:    xxswapd vs2, vs1
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtfprd f2, r3
 ; CHECK-P9-NEXT:    xxswapd v3, vs2
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs1
 ; CHECK-P9-NEXT:    xxsldwi vs1, vs1, vs1, 1
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtfprd f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    mtfprd f1, r3
 ; CHECK-P9-NEXT:    xxswapd v4, vs1
 ; CHECK-P9-NEXT:    xxsldwi vs1, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
@@ -998,24 +998,24 @@ define i64 @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed_addr
 ; CHECK-P9-NEXT:    xxswapd v3, vs2
 ; CHECK-P9-NEXT:    vmrglb v3, v3, v4
 ; CHECK-P9-NEXT:    vmrglh v2, v3, v2
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    mtfprd f1, r3
 ; CHECK-P9-NEXT:    xxswapd v3, vs1
 ; CHECK-P9-NEXT:    xxswapd vs1, vs0
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    mtfprd f1, r3
 ; CHECK-P9-NEXT:    xxswapd v4, vs1
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs0
 ; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 1
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    mtfprd f1, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    vmrglb v3, v4, v3
 ; CHECK-P9-NEXT:    xxswapd v4, vs1
 ; CHECK-P9-NEXT:    xxswapd v5, vs0
@@ -1032,13 +1032,13 @@ define i64 @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed_addr
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
-; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs1
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
@@ -1047,16 +1047,16 @@ define i64 @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed_addr
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    vmrghb v2, v3, v2
-; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs0
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
@@ -1064,7 +1064,7 @@ define i64 @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed_addr
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    vmrghh v2, v3, v2
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
@@ -1073,10 +1073,10 @@ define i64 @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed_addr
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    vmrghb v3, v4, v3
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v5, r3
 ; CHECK-BE-NEXT:    vmrghb v4, v4, v5
@@ -1118,47 +1118,47 @@ define <16 x i8> @test16elt_signed(<16 x float>* nocapture readonly) local_unnam
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    xscvspdpn f6, vs6
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mfvsrwz r4, f2
+; CHECK-P8-NEXT:    mffprwz r4, f2
 ; CHECK-P8-NEXT:    xscvspdpn f7, vs7
-; CHECK-P8-NEXT:    mtvsrd f2, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, f4
+; CHECK-P8-NEXT:    mtfprd f2, r4
+; CHECK-P8-NEXT:    mffprwz r4, f4
 ; CHECK-P8-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P8-NEXT:    xscvspdpn f8, vs8
-; CHECK-P8-NEXT:    mtvsrd f4, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, f0
+; CHECK-P8-NEXT:    mtfprd f4, r4
+; CHECK-P8-NEXT:    mffprwz r4, f0
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f5
 ; CHECK-P8-NEXT:    xxswapd v0, vs4
 ; CHECK-P8-NEXT:    xscvspdpn f9, vs9
-; CHECK-P8-NEXT:    mtvsrd f5, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, f1
+; CHECK-P8-NEXT:    mtfprd f5, r4
+; CHECK-P8-NEXT:    mffprwz r4, f1
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f6
 ; CHECK-P8-NEXT:    xxswapd v3, vs5
-; CHECK-P8-NEXT:    mtvsrd f6, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, f3
+; CHECK-P8-NEXT:    mtfprd f6, r4
+; CHECK-P8-NEXT:    mffprwz r4, f3
 ; CHECK-P8-NEXT:    xscvdpsxws f3, f7
 ; CHECK-P8-NEXT:    xxswapd v4, vs6
-; CHECK-P8-NEXT:    mtvsrd f7, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, f0
+; CHECK-P8-NEXT:    mtfprd f7, r4
+; CHECK-P8-NEXT:    mffprwz r4, f0
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f8
 ; CHECK-P8-NEXT:    xxswapd v5, vs7
-; CHECK-P8-NEXT:    mtvsrd f8, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, f1
+; CHECK-P8-NEXT:    mtfprd f8, r4
+; CHECK-P8-NEXT:    mffprwz r4, f1
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f9
 ; CHECK-P8-NEXT:    xxswapd v1, vs8
-; CHECK-P8-NEXT:    mtvsrd f9, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, f3
+; CHECK-P8-NEXT:    mtfprd f9, r4
+; CHECK-P8-NEXT:    mffprwz r4, f3
 ; CHECK-P8-NEXT:    vmrglb v3, v4, v3
 ; CHECK-P8-NEXT:    xxswapd v4, vs2
-; CHECK-P8-NEXT:    mtvsrd f3, r4
+; CHECK-P8-NEXT:    mtfprd f3, r4
 ; CHECK-P8-NEXT:    xxswapd v6, vs9
-; CHECK-P8-NEXT:    mfvsrwz r4, f0
+; CHECK-P8-NEXT:    mffprwz r4, f0
 ; CHECK-P8-NEXT:    xscvspdpn f0, v2
 ; CHECK-P8-NEXT:    xxswapd v7, vs3
-; CHECK-P8-NEXT:    mtvsrd f5, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, f1
+; CHECK-P8-NEXT:    mtfprd f5, r4
+; CHECK-P8-NEXT:    mffprwz r4, f1
 ; CHECK-P8-NEXT:    vmrglb v4, v4, v5
 ; CHECK-P8-NEXT:    xxswapd v5, vs5
-; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    mtfprd f1, r4
 ; CHECK-P8-NEXT:    li r4, 48
 ; CHECK-P8-NEXT:    lvx v9, r3, r4
 ; CHECK-P8-NEXT:    vmrglb v1, v6, v1
@@ -1178,23 +1178,23 @@ define <16 x i8> @test16elt_signed(<16 x float>* nocapture readonly) local_unnam
 ; CHECK-P8-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P8-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P8-NEXT:    xscvdpsxws f5, f5
-; CHECK-P8-NEXT:    mfvsrwz r3, f0
-; CHECK-P8-NEXT:    mfvsrwz r4, f4
-; CHECK-P8-NEXT:    mtvsrd f0, r3
-; CHECK-P8-NEXT:    mtvsrd f4, r4
-; CHECK-P8-NEXT:    mfvsrwz r3, f1
-; CHECK-P8-NEXT:    mfvsrwz r4, f2
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    mffprwz r4, f4
+; CHECK-P8-NEXT:    mtfprd f0, r3
+; CHECK-P8-NEXT:    mtfprd f4, r4
+; CHECK-P8-NEXT:    mffprwz r3, f1
+; CHECK-P8-NEXT:    mffprwz r4, f2
 ; CHECK-P8-NEXT:    xxswapd v9, vs4
-; CHECK-P8-NEXT:    mtvsrd f1, r3
-; CHECK-P8-NEXT:    mfvsrwz r3, f3
-; CHECK-P8-NEXT:    mtvsrd f2, r4
+; CHECK-P8-NEXT:    mtfprd f1, r3
+; CHECK-P8-NEXT:    mffprwz r3, f3
+; CHECK-P8-NEXT:    mtfprd f2, r4
 ; CHECK-P8-NEXT:    xxswapd v6, vs1
-; CHECK-P8-NEXT:    mfvsrwz r4, f5
+; CHECK-P8-NEXT:    mffprwz r4, f5
 ; CHECK-P8-NEXT:    vmrglb v2, v0, v7
 ; CHECK-P8-NEXT:    xxswapd v0, vs0
-; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    xxswapd v7, vs2
-; CHECK-P8-NEXT:    mtvsrd f3, r4
+; CHECK-P8-NEXT:    mtfprd f3, r4
 ; CHECK-P8-NEXT:    vmrglb v5, v8, v5
 ; CHECK-P8-NEXT:    xxswapd v8, vs0
 ; CHECK-P8-NEXT:    xxswapd v10, vs3
@@ -1219,24 +1219,24 @@ define <16 x i8> @test16elt_signed(<16 x float>* nocapture readonly) local_unnam
 ; CHECK-P9-NEXT:    lxv vs0, 48(r3)
 ; CHECK-P9-NEXT:    lxv vs1, 32(r3)
 ; CHECK-P9-NEXT:    lxv vs4, 16(r3)
-; CHECK-P9-NEXT:    mfvsrwz r3, f3
-; CHECK-P9-NEXT:    mtvsrd f3, r3
+; CHECK-P9-NEXT:    mffprwz r3, f3
+; CHECK-P9-NEXT:    mtfprd f3, r3
 ; CHECK-P9-NEXT:    xxswapd v2, vs3
 ; CHECK-P9-NEXT:    xxswapd vs3, vs2
 ; CHECK-P9-NEXT:    xscvspdpn f3, vs3
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    mfvsrwz r3, f3
-; CHECK-P9-NEXT:    mtvsrd f3, r3
+; CHECK-P9-NEXT:    mffprwz r3, f3
+; CHECK-P9-NEXT:    mtfprd f3, r3
 ; CHECK-P9-NEXT:    xxswapd v3, vs3
 ; CHECK-P9-NEXT:    xscvspdpn f3, vs2
 ; CHECK-P9-NEXT:    xxsldwi vs2, vs2, vs2, 1
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    mfvsrwz r3, f3
-; CHECK-P9-NEXT:    mtvsrd f3, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f3
+; CHECK-P9-NEXT:    mtfprd f3, r3
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtfprd f2, r3
 ; CHECK-P9-NEXT:    xxswapd v4, vs2
 ; CHECK-P9-NEXT:    xxsldwi vs2, vs4, vs4, 3
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
@@ -1245,26 +1245,26 @@ define <16 x i8> @test16elt_signed(<16 x float>* nocapture readonly) local_unnam
 ; CHECK-P9-NEXT:    xxswapd v3, vs3
 ; CHECK-P9-NEXT:    vmrglb v3, v3, v4
 ; CHECK-P9-NEXT:    vmrglh v2, v3, v2
-; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtfprd f2, r3
 ; CHECK-P9-NEXT:    xxswapd v3, vs2
 ; CHECK-P9-NEXT:    xxswapd vs2, vs4
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtfprd f2, r3
 ; CHECK-P9-NEXT:    xxswapd v4, vs2
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs4
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtfprd f2, r3
 ; CHECK-P9-NEXT:    vmrglb v3, v4, v3
 ; CHECK-P9-NEXT:    xxswapd v4, vs2
 ; CHECK-P9-NEXT:    xxsldwi vs2, vs4, vs4, 1
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtfprd f2, r3
 ; CHECK-P9-NEXT:    xxswapd v5, vs2
 ; CHECK-P9-NEXT:    xxsldwi vs2, vs1, vs1, 3
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
@@ -1272,24 +1272,24 @@ define <16 x i8> @test16elt_signed(<16 x float>* nocapture readonly) local_unnam
 ; CHECK-P9-NEXT:    vmrglb v4, v4, v5
 ; CHECK-P9-NEXT:    vmrglh v3, v4, v3
 ; CHECK-P9-NEXT:    vmrglw v2, v3, v2
-; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtfprd f2, r3
 ; CHECK-P9-NEXT:    xxswapd v3, vs2
 ; CHECK-P9-NEXT:    xxswapd vs2, vs1
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtfprd f2, r3
 ; CHECK-P9-NEXT:    xxswapd v4, vs2
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs1
 ; CHECK-P9-NEXT:    xxsldwi vs1, vs1, vs1, 1
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtfprd f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    mtfprd f1, r3
 ; CHECK-P9-NEXT:    xxswapd v5, vs1
 ; CHECK-P9-NEXT:    xxsldwi vs1, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
@@ -1298,24 +1298,24 @@ define <16 x i8> @test16elt_signed(<16 x float>* nocapture readonly) local_unnam
 ; CHECK-P9-NEXT:    xxswapd v4, vs2
 ; CHECK-P9-NEXT:    vmrglb v4, v4, v5
 ; CHECK-P9-NEXT:    vmrglh v3, v4, v3
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    mtfprd f1, r3
 ; CHECK-P9-NEXT:    xxswapd v4, vs1
 ; CHECK-P9-NEXT:    xxswapd vs1, vs0
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    mtfprd f1, r3
 ; CHECK-P9-NEXT:    xxswapd v5, vs1
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs0
 ; CHECK-P9-NEXT:    xxsldwi vs0, vs0, vs0, 1
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    mtfprd f1, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    vmrglb v4, v5, v4
 ; CHECK-P9-NEXT:    xxswapd v5, vs1
 ; CHECK-P9-NEXT:    xxswapd v0, vs0
@@ -1334,13 +1334,13 @@ define <16 x i8> @test16elt_signed(<16 x float>* nocapture readonly) local_unnam
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    lxv vs2, 32(r3)
-; CHECK-BE-NEXT:    mfvsrwz r3, f4
+; CHECK-BE-NEXT:    mffprwz r3, f4
 ; CHECK-BE-NEXT:    xxswapd vs4, vs3
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvspdpn f4, vs4
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f4, f4
-; CHECK-BE-NEXT:    mfvsrwz r3, f4
+; CHECK-BE-NEXT:    mffprwz r3, f4
 ; CHECK-BE-NEXT:    xscvspdpn f4, vs3
 ; CHECK-BE-NEXT:    xxsldwi vs3, vs3, vs3, 1
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
@@ -1349,16 +1349,16 @@ define <16 x i8> @test16elt_signed(<16 x float>* nocapture readonly) local_unnam
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    vmrghb v2, v3, v2
-; CHECK-BE-NEXT:    mfvsrwz r3, f4
+; CHECK-BE-NEXT:    mffprwz r3, f4
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    xxsldwi vs3, vs2, vs2, 3
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs3
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    xxswapd vs3, vs2
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
@@ -1366,7 +1366,7 @@ define <16 x i8> @test16elt_signed(<16 x float>* nocapture readonly) local_unnam
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    vmrghh v2, v3, v2
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs2
 ; CHECK-BE-NEXT:    xxsldwi vs2, vs2, vs2, 1
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
@@ -1375,16 +1375,16 @@ define <16 x i8> @test16elt_signed(<16 x float>* nocapture readonly) local_unnam
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    vmrghb v3, v4, v3
-; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    mtvsrd v5, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs1
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
@@ -1393,7 +1393,7 @@ define <16 x i8> @test16elt_signed(<16 x float>* nocapture readonly) local_unnam
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    vmrghw v2, v3, v2
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
@@ -1402,16 +1402,16 @@ define <16 x i8> @test16elt_signed(<16 x float>* nocapture readonly) local_unnam
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    vmrghb v3, v4, v3
-; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    mtvsrd v5, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs0
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
@@ -1419,7 +1419,7 @@ define <16 x i8> @test16elt_signed(<16 x float>* nocapture readonly) local_unnam
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    vmrghh v3, v4, v3
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
@@ -1428,10 +1428,10 @@ define <16 x i8> @test16elt_signed(<16 x float>* nocapture readonly) local_unnam
 ; CHECK-BE-NEXT:    mtvsrd v5, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    vmrghb v4, v5, v4
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v5, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v0, r3
 ; CHECK-BE-NEXT:    vmrghb v5, v5, v0
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll
index 9e87876f81f1..c7d66ae784a0 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll
@@ -15,27 +15,27 @@ define i32 @test2elt(<2 x double> %a) local_unnamed_addr #0 {
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    xscvdpsxws f1, v2
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mfvsrwz r3, f1
-; CHECK-P8-NEXT:    mfvsrwz r4, f0
-; CHECK-P8-NEXT:    mtvsrd f0, r3
-; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    mffprwz r3, f1
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    mtfprd f0, r3
+; CHECK-P8-NEXT:    mtfprd f1, r4
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    vmrglh v2, v2, v3
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    mfvsrwz r3, f0
+; CHECK-P8-NEXT:    mffprwz r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test2elt:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    xscvdpsxws f0, v2
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    xxswapd v3, vs0
 ; CHECK-P9-NEXT:    xxswapd vs0, v2
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    li r3, 0
 ; CHECK-P9-NEXT:    xxswapd v2, vs0
 ; CHECK-P9-NEXT:    vmrglh v2, v3, v2
@@ -45,12 +45,12 @@ define i32 @test2elt(<2 x double> %a) local_unnamed_addr #0 {
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xscvdpsxws f0, v2
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, v2
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
 ; CHECK-BE-NEXT:    li r3, 0
@@ -75,23 +75,23 @@ define i64 @test4elt(<4 x double>* nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-P8-NEXT:    xxswapd vs1, vs1
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mfvsrwz r3, f2
-; CHECK-P8-NEXT:    mfvsrwz r4, f3
-; CHECK-P8-NEXT:    mtvsrd f2, r3
-; CHECK-P8-NEXT:    mtvsrd f3, r4
-; CHECK-P8-NEXT:    mfvsrwz r3, f0
+; CHECK-P8-NEXT:    mffprwz r3, f2
+; CHECK-P8-NEXT:    mffprwz r4, f3
+; CHECK-P8-NEXT:    mtfprd f2, r3
+; CHECK-P8-NEXT:    mtfprd f3, r4
+; CHECK-P8-NEXT:    mffprwz r3, f0
 ; CHECK-P8-NEXT:    xxswapd v2, vs2
-; CHECK-P8-NEXT:    mfvsrwz r4, f1
+; CHECK-P8-NEXT:    mffprwz r4, f1
 ; CHECK-P8-NEXT:    xxswapd v4, vs3
-; CHECK-P8-NEXT:    mtvsrd f0, r3
-; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    mtfprd f0, r3
+; CHECK-P8-NEXT:    mtfprd f1, r4
 ; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    xxswapd v5, vs1
 ; CHECK-P8-NEXT:    vmrglh v2, v3, v2
 ; CHECK-P8-NEXT:    vmrglh v3, v5, v4
 ; CHECK-P8-NEXT:    vmrglw v2, v3, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test4elt:
@@ -101,19 +101,19 @@ define i64 @test4elt(<4 x double>* nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-P9-NEXT:    xxswapd vs1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    lxv vs0, 16(r3)
-; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtfprd f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
 ; CHECK-P9-NEXT:    xxswapd v2, vs2
-; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mtfprd f1, r3
 ; CHECK-P9-NEXT:    xxswapd v3, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f0
 ; CHECK-P9-NEXT:    xxswapd vs0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    mtfprd f1, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    vmrglh v2, v2, v3
 ; CHECK-P9-NEXT:    xxswapd v3, vs1
 ; CHECK-P9-NEXT:    xxswapd v4, vs0
@@ -129,20 +129,20 @@ define i64 @test4elt(<4 x double>* nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
-; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    vmrghh v2, v2, v3
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    vmrghh v3, v3, v4
@@ -176,30 +176,30 @@ define <8 x i16> @test8elt(<8 x double>* nocapture readonly) local_unnamed_addr
 ; CHECK-P8-NEXT:    xxswapd vs3, vs3
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mfvsrwz r3, f4
+; CHECK-P8-NEXT:    mffprwz r3, f4
 ; CHECK-P8-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P8-NEXT:    xscvdpsxws f3, f3
-; CHECK-P8-NEXT:    mfvsrwz r4, f5
-; CHECK-P8-NEXT:    mtvsrd f4, r3
-; CHECK-P8-NEXT:    mfvsrwz r3, f6
-; CHECK-P8-NEXT:    mtvsrd f5, r4
+; CHECK-P8-NEXT:    mffprwz r4, f5
+; CHECK-P8-NEXT:    mtfprd f4, r3
+; CHECK-P8-NEXT:    mffprwz r3, f6
+; CHECK-P8-NEXT:    mtfprd f5, r4
 ; CHECK-P8-NEXT:    xxswapd v2, vs4
-; CHECK-P8-NEXT:    mfvsrwz r4, f7
-; CHECK-P8-NEXT:    mtvsrd f6, r3
+; CHECK-P8-NEXT:    mffprwz r4, f7
+; CHECK-P8-NEXT:    mtfprd f6, r3
 ; CHECK-P8-NEXT:    xxswapd v3, vs5
-; CHECK-P8-NEXT:    mfvsrwz r3, f0
-; CHECK-P8-NEXT:    mtvsrd f7, r4
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    mtfprd f7, r4
 ; CHECK-P8-NEXT:    xxswapd v4, vs6
-; CHECK-P8-NEXT:    mfvsrwz r4, f1
-; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    xxswapd v1, vs7
-; CHECK-P8-NEXT:    mfvsrwz r3, f2
-; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    mffprwz r3, f2
+; CHECK-P8-NEXT:    mtfprd f1, r4
 ; CHECK-P8-NEXT:    xxswapd v5, vs0
-; CHECK-P8-NEXT:    mfvsrwz r4, f3
-; CHECK-P8-NEXT:    mtvsrd f2, r3
+; CHECK-P8-NEXT:    mffprwz r4, f3
+; CHECK-P8-NEXT:    mtfprd f2, r3
 ; CHECK-P8-NEXT:    xxswapd v0, vs1
-; CHECK-P8-NEXT:    mtvsrd f0, r4
+; CHECK-P8-NEXT:    mtfprd f0, r4
 ; CHECK-P8-NEXT:    xxswapd v6, vs2
 ; CHECK-P8-NEXT:    vmrglh v2, v5, v2
 ; CHECK-P8-NEXT:    xxswapd v5, vs0
@@ -220,40 +220,40 @@ define <8 x i16> @test8elt(<8 x double>* nocapture readonly) local_unnamed_addr
 ; CHECK-P9-NEXT:    lxv vs0, 48(r3)
 ; CHECK-P9-NEXT:    lxv vs1, 32(r3)
 ; CHECK-P9-NEXT:    lxv vs2, 16(r3)
-; CHECK-P9-NEXT:    mfvsrwz r3, f4
-; CHECK-P9-NEXT:    mtvsrd f4, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f3
+; CHECK-P9-NEXT:    mffprwz r3, f4
+; CHECK-P9-NEXT:    mtfprd f4, r3
+; CHECK-P9-NEXT:    mffprwz r3, f3
 ; CHECK-P9-NEXT:    xxswapd v2, vs4
-; CHECK-P9-NEXT:    mtvsrd f3, r3
+; CHECK-P9-NEXT:    mtfprd f3, r3
 ; CHECK-P9-NEXT:    xxswapd v3, vs3
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f2
 ; CHECK-P9-NEXT:    xxswapd vs2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    mfvsrwz r3, f3
-; CHECK-P9-NEXT:    mtvsrd f3, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f3
+; CHECK-P9-NEXT:    mtfprd f3, r3
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtfprd f2, r3
 ; CHECK-P9-NEXT:    xxswapd v4, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f1
 ; CHECK-P9-NEXT:    xxswapd vs1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtfprd f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
 ; CHECK-P9-NEXT:    vmrglh v2, v2, v3
 ; CHECK-P9-NEXT:    xxswapd v3, vs3
 ; CHECK-P9-NEXT:    vmrglh v3, v3, v4
 ; CHECK-P9-NEXT:    vmrglw v2, v3, v2
 ; CHECK-P9-NEXT:    xxswapd v3, vs2
-; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mtfprd f1, r3
 ; CHECK-P9-NEXT:    xxswapd v4, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f0
 ; CHECK-P9-NEXT:    xxswapd vs0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    mtfprd f1, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    vmrglh v3, v3, v4
 ; CHECK-P9-NEXT:    xxswapd v4, vs1
 ; CHECK-P9-NEXT:    xxswapd v5, vs0
@@ -271,41 +271,41 @@ define <8 x i16> @test8elt(<8 x double>* nocapture readonly) local_unnamed_addr
 ; CHECK-BE-NEXT:    lxv vs2, 32(r3)
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
-; CHECK-BE-NEXT:    mfvsrwz r3, f4
+; CHECK-BE-NEXT:    mffprwz r3, f4
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs2
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    vmrghh v2, v2, v3
-; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    vmrghh v3, v3, v4
-; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    vmrghw v2, v3, v2
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    vmrghh v3, v3, v4
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    mtvsrd v5, r3
 ; CHECK-BE-NEXT:    vmrghh v4, v4, v5
@@ -350,63 +350,63 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x dou
 ; CHECK-P8-NEXT:    xxswapd vs7, vs7
 ; CHECK-P8-NEXT:    xscvdpsxws v2, f9
 ; CHECK-P8-NEXT:    xxswapd vs9, vs9
-; CHECK-P8-NEXT:    mfvsrwz r4, f4
+; CHECK-P8-NEXT:    mffprwz r4, f4
 ; CHECK-P8-NEXT:    xscvdpsxws v3, f11
 ; CHECK-P8-NEXT:    xxswapd vs11, vs11
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mfvsrwz r6, f6
-; CHECK-P8-NEXT:    mtvsrd f4, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, f8
+; CHECK-P8-NEXT:    mffprwz r6, f6
+; CHECK-P8-NEXT:    mtfprd f4, r4
+; CHECK-P8-NEXT:    mffprwz r4, f8
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P8-NEXT:    xxswapd v4, vs4
 ; CHECK-P8-NEXT:    xscvdpsxws f2, f2
-; CHECK-P8-NEXT:    mtvsrd f6, r6
-; CHECK-P8-NEXT:    mfvsrwz r6, f10
-; CHECK-P8-NEXT:    mtvsrd f8, r4
+; CHECK-P8-NEXT:    mtfprd f6, r6
+; CHECK-P8-NEXT:    mffprwz r6, f10
+; CHECK-P8-NEXT:    mtfprd f8, r4
 ; CHECK-P8-NEXT:    xxswapd v5, vs6
-; CHECK-P8-NEXT:    mfvsrwz r4, f12
+; CHECK-P8-NEXT:    mffprwz r4, f12
 ; CHECK-P8-NEXT:    xscvdpsxws f5, f5
 ; CHECK-P8-NEXT:    xxswapd v0, vs8
-; CHECK-P8-NEXT:    mtvsrd f10, r6
-; CHECK-P8-NEXT:    mfvsrwz r6, f13
-; CHECK-P8-NEXT:    mtvsrd f12, r4
+; CHECK-P8-NEXT:    mtfprd f10, r6
+; CHECK-P8-NEXT:    mffprwz r6, f13
+; CHECK-P8-NEXT:    mtfprd f12, r4
 ; CHECK-P8-NEXT:    xxswapd v1, vs10
 ; CHECK-P8-NEXT:    mfvsrwz r4, v2
 ; CHECK-P8-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P8-NEXT:    xxswapd v6, vs12
 ; CHECK-P8-NEXT:    xscvdpsxws f9, f9
-; CHECK-P8-NEXT:    mtvsrd f13, r6
+; CHECK-P8-NEXT:    mtfprd f13, r6
 ; CHECK-P8-NEXT:    mfvsrwz r6, v3
 ; CHECK-P8-NEXT:    mtvsrd v2, r4
 ; CHECK-P8-NEXT:    xxswapd v7, vs13
-; CHECK-P8-NEXT:    mfvsrwz r4, f0
+; CHECK-P8-NEXT:    mffprwz r4, f0
 ; CHECK-P8-NEXT:    xscvdpsxws f7, f7
 ; CHECK-P8-NEXT:    xxswapd v2, v2
 ; CHECK-P8-NEXT:    xscvdpsxws f11, f11
 ; CHECK-P8-NEXT:    mtvsrd v3, r6
-; CHECK-P8-NEXT:    mfvsrwz r6, f1
-; CHECK-P8-NEXT:    mtvsrd f0, r4
+; CHECK-P8-NEXT:    mffprwz r6, f1
+; CHECK-P8-NEXT:    mtfprd f0, r4
 ; CHECK-P8-NEXT:    xxswapd v3, v3
-; CHECK-P8-NEXT:    mfvsrwz r4, f2
-; CHECK-P8-NEXT:    mtvsrd f1, r6
+; CHECK-P8-NEXT:    mffprwz r4, f2
+; CHECK-P8-NEXT:    mtfprd f1, r6
 ; CHECK-P8-NEXT:    xxswapd v8, vs0
-; CHECK-P8-NEXT:    mtvsrd f2, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, f5
+; CHECK-P8-NEXT:    mtfprd f2, r4
+; CHECK-P8-NEXT:    mffprwz r4, f5
 ; CHECK-P8-NEXT:    xxswapd v9, vs1
-; CHECK-P8-NEXT:    mfvsrwz r6, f3
+; CHECK-P8-NEXT:    mffprwz r6, f3
 ; CHECK-P8-NEXT:    xxswapd v10, vs2
-; CHECK-P8-NEXT:    mtvsrd f5, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, f9
-; CHECK-P8-NEXT:    mtvsrd f3, r6
-; CHECK-P8-NEXT:    mfvsrwz r6, f7
-; CHECK-P8-NEXT:    mtvsrd f9, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, f11
+; CHECK-P8-NEXT:    mtfprd f5, r4
+; CHECK-P8-NEXT:    mffprwz r4, f9
+; CHECK-P8-NEXT:    mtfprd f3, r6
+; CHECK-P8-NEXT:    mffprwz r6, f7
+; CHECK-P8-NEXT:    mtfprd f9, r4
+; CHECK-P8-NEXT:    mffprwz r4, f11
 ; CHECK-P8-NEXT:    vmrglh v4, v8, v4
 ; CHECK-P8-NEXT:    xxswapd v8, vs3
 ; CHECK-P8-NEXT:    vmrglh v5, v9, v5
 ; CHECK-P8-NEXT:    xxswapd v9, vs5
-; CHECK-P8-NEXT:    mtvsrd f7, r6
-; CHECK-P8-NEXT:    mtvsrd f0, r4
+; CHECK-P8-NEXT:    mtfprd f7, r6
+; CHECK-P8-NEXT:    mtfprd f0, r4
 ; CHECK-P8-NEXT:    vmrglh v0, v10, v0
 ; CHECK-P8-NEXT:    xxswapd v10, vs7
 ; CHECK-P8-NEXT:    vmrglh v1, v8, v1
@@ -439,32 +439,32 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x dou
 ; CHECK-P9-NEXT:    xscvdpsxws f8, f1
 ; CHECK-P9-NEXT:    xxswapd vs4, vs4
 ; CHECK-P9-NEXT:    xscvdpsxws f4, f4
-; CHECK-P9-NEXT:    mfvsrwz r5, f5
+; CHECK-P9-NEXT:    mffprwz r5, f5
 ; CHECK-P9-NEXT:    xscvdpsxws f9, f0
 ; CHECK-P9-NEXT:    xxswapd vs3, vs3
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    mtvsrd f5, r5
-; CHECK-P9-NEXT:    mfvsrwz r5, f6
+; CHECK-P9-NEXT:    mtfprd f5, r5
+; CHECK-P9-NEXT:    mffprwz r5, f6
 ; CHECK-P9-NEXT:    xxswapd vs2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    mtvsrd f6, r5
-; CHECK-P9-NEXT:    mfvsrwz r5, f7
-; CHECK-P9-NEXT:    mtvsrd f7, r5
-; CHECK-P9-NEXT:    mfvsrwz r5, f8
-; CHECK-P9-NEXT:    mtvsrd f8, r5
-; CHECK-P9-NEXT:    mfvsrwz r5, f9
-; CHECK-P9-NEXT:    mtvsrd f9, r5
-; CHECK-P9-NEXT:    mfvsrwz r5, f4
-; CHECK-P9-NEXT:    mtvsrd f4, r5
-; CHECK-P9-NEXT:    mfvsrwz r5, f3
+; CHECK-P9-NEXT:    mtfprd f6, r5
+; CHECK-P9-NEXT:    mffprwz r5, f7
+; CHECK-P9-NEXT:    mtfprd f7, r5
+; CHECK-P9-NEXT:    mffprwz r5, f8
+; CHECK-P9-NEXT:    mtfprd f8, r5
+; CHECK-P9-NEXT:    mffprwz r5, f9
+; CHECK-P9-NEXT:    mtfprd f9, r5
+; CHECK-P9-NEXT:    mffprwz r5, f4
+; CHECK-P9-NEXT:    mtfprd f4, r5
+; CHECK-P9-NEXT:    mffprwz r5, f3
 ; CHECK-P9-NEXT:    xxswapd vs1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    xxswapd v2, vs5
 ; CHECK-P9-NEXT:    xxswapd v5, vs8
 ; CHECK-P9-NEXT:    xxswapd v0, vs9
-; CHECK-P9-NEXT:    mtvsrd f3, r5
-; CHECK-P9-NEXT:    mfvsrwz r5, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r5
+; CHECK-P9-NEXT:    mtfprd f3, r5
+; CHECK-P9-NEXT:    mffprwz r5, f2
+; CHECK-P9-NEXT:    mtfprd f2, r5
 ; CHECK-P9-NEXT:    xxswapd vs0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P9-NEXT:    xxswapd v1, vs2
@@ -475,49 +475,49 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x dou
 ; CHECK-P9-NEXT:    xxswapd v4, vs3
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f2
 ; CHECK-P9-NEXT:    xxswapd vs2, vs2
-; CHECK-P9-NEXT:    mfvsrwz r5, f1
+; CHECK-P9-NEXT:    mffprwz r5, f1
 ; CHECK-P9-NEXT:    vmrglh v3, v3, v4
 ; CHECK-P9-NEXT:    xxswapd v4, vs7
-; CHECK-P9-NEXT:    mtvsrd f1, r5
-; CHECK-P9-NEXT:    mfvsrwz r5, f0
+; CHECK-P9-NEXT:    mtfprd f1, r5
+; CHECK-P9-NEXT:    mffprwz r5, f0
 ; CHECK-P9-NEXT:    vmrglh v4, v4, v1
 ; CHECK-P9-NEXT:    xxswapd v1, vs1
-; CHECK-P9-NEXT:    mtvsrd f0, r5
+; CHECK-P9-NEXT:    mtfprd f0, r5
 ; CHECK-P9-NEXT:    vmrglh v5, v5, v1
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P9-NEXT:    xxswapd v1, vs0
 ; CHECK-P9-NEXT:    lxv vs0, 112(r4)
 ; CHECK-P9-NEXT:    lxv vs1, 96(r4)
-; CHECK-P9-NEXT:    mfvsrwz r4, f3
-; CHECK-P9-NEXT:    mtvsrd f3, r4
-; CHECK-P9-NEXT:    mfvsrwz r4, f2
+; CHECK-P9-NEXT:    mffprwz r4, f3
+; CHECK-P9-NEXT:    mtfprd f3, r4
+; CHECK-P9-NEXT:    mffprwz r4, f2
 ; CHECK-P9-NEXT:    vmrglw v2, v3, v2
 ; CHECK-P9-NEXT:    vmrglw v3, v5, v4
 ; CHECK-P9-NEXT:    xxmrgld vs4, v3, v2
 ; CHECK-P9-NEXT:    xxswapd v2, vs3
 ; CHECK-P9-NEXT:    vmrglh v0, v0, v1
-; CHECK-P9-NEXT:    mtvsrd f2, r4
+; CHECK-P9-NEXT:    mtfprd f2, r4
 ; CHECK-P9-NEXT:    xxswapd v3, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f1
 ; CHECK-P9-NEXT:    xxswapd vs1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    mfvsrwz r4, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r4
-; CHECK-P9-NEXT:    mfvsrwz r4, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r4
+; CHECK-P9-NEXT:    mffprwz r4, f2
+; CHECK-P9-NEXT:    mtfprd f2, r4
+; CHECK-P9-NEXT:    mffprwz r4, f1
+; CHECK-P9-NEXT:    mtfprd f1, r4
 ; CHECK-P9-NEXT:    xxswapd v4, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f0
 ; CHECK-P9-NEXT:    xxswapd vs0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r4, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r4
-; CHECK-P9-NEXT:    mfvsrwz r4, f0
+; CHECK-P9-NEXT:    mffprwz r4, f1
+; CHECK-P9-NEXT:    mtfprd f1, r4
+; CHECK-P9-NEXT:    mffprwz r4, f0
 ; CHECK-P9-NEXT:    vmrglh v2, v2, v3
 ; CHECK-P9-NEXT:    xxswapd v3, vs2
 ; CHECK-P9-NEXT:    vmrglh v3, v3, v4
 ; CHECK-P9-NEXT:    xxswapd v4, vs1
 ; CHECK-P9-NEXT:    vmrglw v2, v2, v0
-; CHECK-P9-NEXT:    mtvsrd f0, r4
+; CHECK-P9-NEXT:    mtfprd f0, r4
 ; CHECK-P9-NEXT:    xxswapd v5, vs0
 ; CHECK-P9-NEXT:    vmrglh v4, v4, v5
 ; CHECK-P9-NEXT:    vmrglw v3, v4, v3
@@ -535,7 +535,7 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x dou
 ; CHECK-BE-NEXT:    xscvdpsxws f6, f3
 ; CHECK-BE-NEXT:    xxswapd vs3, vs3
 ; CHECK-BE-NEXT:    xscvdpsxws f4, f4
-; CHECK-BE-NEXT:    mfvsrwz r5, f5
+; CHECK-BE-NEXT:    mffprwz r5, f5
 ; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    lxv vs2, 16(r4)
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
@@ -543,40 +543,40 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x dou
 ; CHECK-BE-NEXT:    xxswapd vs2, vs2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    mtvsrd v2, r5
-; CHECK-BE-NEXT:    mfvsrwz r5, f4
+; CHECK-BE-NEXT:    mffprwz r5, f4
 ; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    lxv vs1, 0(r4)
 ; CHECK-BE-NEXT:    xscvdpsxws f4, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    mtvsrd v3, r5
-; CHECK-BE-NEXT:    mfvsrwz r5, f6
+; CHECK-BE-NEXT:    mffprwz r5, f6
 ; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    lxv vs0, 112(r4)
 ; CHECK-BE-NEXT:    vmrghh v2, v2, v3
 ; CHECK-BE-NEXT:    mtvsrd v3, r5
-; CHECK-BE-NEXT:    mfvsrwz r5, f3
+; CHECK-BE-NEXT:    mffprwz r5, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
 ; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mtvsrd v4, r5
-; CHECK-BE-NEXT:    mfvsrwz r5, f7
+; CHECK-BE-NEXT:    mffprwz r5, f7
 ; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    vmrghh v3, v3, v4
 ; CHECK-BE-NEXT:    mtvsrd v4, r5
-; CHECK-BE-NEXT:    mfvsrwz r5, f4
+; CHECK-BE-NEXT:    mffprwz r5, f4
 ; CHECK-BE-NEXT:    vmrghw v2, v3, v2
 ; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    mtvsrd v5, r5
-; CHECK-BE-NEXT:    mfvsrwz r5, f3
+; CHECK-BE-NEXT:    mffprwz r5, f3
 ; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    mtvsrd v0, r5
-; CHECK-BE-NEXT:    mfvsrwz r5, f2
+; CHECK-BE-NEXT:    mffprwz r5, f2
 ; CHECK-BE-NEXT:    lxv vs2, 96(r4)
 ; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    mtvsrd v1, r5
-; CHECK-BE-NEXT:    mfvsrwz r5, f1
+; CHECK-BE-NEXT:    mffprwz r5, f1
 ; CHECK-BE-NEXT:    lxv vs1, 80(r4)
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs2
@@ -585,34 +585,34 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x dou
 ; CHECK-BE-NEXT:    mtvsrd v1, r5
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    vmrghh v5, v5, v1
-; CHECK-BE-NEXT:    mfvsrwz r5, f0
+; CHECK-BE-NEXT:    mffprwz r5, f0
 ; CHECK-BE-NEXT:    lxv vs0, 64(r4)
-; CHECK-BE-NEXT:    mfvsrwz r4, f3
+; CHECK-BE-NEXT:    mffprwz r4, f3
 ; CHECK-BE-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-NEXT:    vmrghw v3, v5, v4
 ; CHECK-BE-NEXT:    xxmrghd vs3, v3, v2
 ; CHECK-BE-NEXT:    mtvsrd v2, r4
-; CHECK-BE-NEXT:    mfvsrwz r4, f2
+; CHECK-BE-NEXT:    mffprwz r4, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
 ; CHECK-BE-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    mtvsrd v3, r4
 ; CHECK-BE-NEXT:    vmrghh v2, v2, v3
-; CHECK-BE-NEXT:    mfvsrwz r4, f2
+; CHECK-BE-NEXT:    mffprwz r4, f2
 ; CHECK-BE-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-NEXT:    mtvsrd v3, r4
-; CHECK-BE-NEXT:    mfvsrwz r4, f1
+; CHECK-BE-NEXT:    mffprwz r4, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
 ; CHECK-BE-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mtvsrd v4, r4
 ; CHECK-BE-NEXT:    vmrghh v3, v3, v4
-; CHECK-BE-NEXT:    mfvsrwz r4, f1
+; CHECK-BE-NEXT:    mffprwz r4, f1
 ; CHECK-BE-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-NEXT:    mtvsrd v4, r4
-; CHECK-BE-NEXT:    mfvsrwz r4, f0
+; CHECK-BE-NEXT:    mffprwz r4, f0
 ; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    mtvsrd v1, r5
 ; CHECK-BE-NEXT:    vmrghh v0, v0, v1
@@ -638,27 +638,27 @@ define i32 @test2elt_signed(<2 x double> %a) local_unnamed_addr #0 {
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    xscvdpsxws f1, v2
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mfvsrwz r3, f1
-; CHECK-P8-NEXT:    mfvsrwz r4, f0
-; CHECK-P8-NEXT:    mtvsrd f0, r3
-; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    mffprwz r3, f1
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    mtfprd f0, r3
+; CHECK-P8-NEXT:    mtfprd f1, r4
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    vmrglh v2, v2, v3
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    mfvsrwz r3, f0
+; CHECK-P8-NEXT:    mffprwz r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test2elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    xscvdpsxws f0, v2
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    xxswapd v3, vs0
 ; CHECK-P9-NEXT:    xxswapd vs0, v2
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    li r3, 0
 ; CHECK-P9-NEXT:    xxswapd v2, vs0
 ; CHECK-P9-NEXT:    vmrglh v2, v3, v2
@@ -668,12 +668,12 @@ define i32 @test2elt_signed(<2 x double> %a) local_unnamed_addr #0 {
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xscvdpsxws f0, v2
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, v2
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
 ; CHECK-BE-NEXT:    li r3, 0
@@ -698,23 +698,23 @@ define i64 @test4elt_signed(<4 x double>* nocapture readonly) local_unnamed_addr
 ; CHECK-P8-NEXT:    xxswapd vs1, vs1
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mfvsrwz r3, f2
-; CHECK-P8-NEXT:    mfvsrwz r4, f3
-; CHECK-P8-NEXT:    mtvsrd f2, r3
-; CHECK-P8-NEXT:    mtvsrd f3, r4
-; CHECK-P8-NEXT:    mfvsrwz r3, f0
+; CHECK-P8-NEXT:    mffprwz r3, f2
+; CHECK-P8-NEXT:    mffprwz r4, f3
+; CHECK-P8-NEXT:    mtfprd f2, r3
+; CHECK-P8-NEXT:    mtfprd f3, r4
+; CHECK-P8-NEXT:    mffprwz r3, f0
 ; CHECK-P8-NEXT:    xxswapd v2, vs2
-; CHECK-P8-NEXT:    mfvsrwz r4, f1
+; CHECK-P8-NEXT:    mffprwz r4, f1
 ; CHECK-P8-NEXT:    xxswapd v4, vs3
-; CHECK-P8-NEXT:    mtvsrd f0, r3
-; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    mtfprd f0, r3
+; CHECK-P8-NEXT:    mtfprd f1, r4
 ; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    xxswapd v5, vs1
 ; CHECK-P8-NEXT:    vmrglh v2, v3, v2
 ; CHECK-P8-NEXT:    vmrglh v3, v5, v4
 ; CHECK-P8-NEXT:    vmrglw v2, v3, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test4elt_signed:
@@ -724,19 +724,19 @@ define i64 @test4elt_signed(<4 x double>* nocapture readonly) local_unnamed_addr
 ; CHECK-P9-NEXT:    xxswapd vs1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    lxv vs0, 16(r3)
-; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtfprd f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
 ; CHECK-P9-NEXT:    xxswapd v2, vs2
-; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mtfprd f1, r3
 ; CHECK-P9-NEXT:    xxswapd v3, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f0
 ; CHECK-P9-NEXT:    xxswapd vs0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    mtfprd f1, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    vmrglh v2, v2, v3
 ; CHECK-P9-NEXT:    xxswapd v3, vs1
 ; CHECK-P9-NEXT:    xxswapd v4, vs0
@@ -752,20 +752,20 @@ define i64 @test4elt_signed(<4 x double>* nocapture readonly) local_unnamed_addr
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
-; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    vmrghh v2, v2, v3
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    vmrghh v3, v3, v4
@@ -799,30 +799,30 @@ define <8 x i16> @test8elt_signed(<8 x double>* nocapture readonly) local_unname
 ; CHECK-P8-NEXT:    xxswapd vs3, vs3
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mfvsrwz r3, f4
+; CHECK-P8-NEXT:    mffprwz r3, f4
 ; CHECK-P8-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P8-NEXT:    xscvdpsxws f3, f3
-; CHECK-P8-NEXT:    mfvsrwz r4, f5
-; CHECK-P8-NEXT:    mtvsrd f4, r3
-; CHECK-P8-NEXT:    mfvsrwz r3, f6
-; CHECK-P8-NEXT:    mtvsrd f5, r4
+; CHECK-P8-NEXT:    mffprwz r4, f5
+; CHECK-P8-NEXT:    mtfprd f4, r3
+; CHECK-P8-NEXT:    mffprwz r3, f6
+; CHECK-P8-NEXT:    mtfprd f5, r4
 ; CHECK-P8-NEXT:    xxswapd v2, vs4
-; CHECK-P8-NEXT:    mfvsrwz r4, f7
-; CHECK-P8-NEXT:    mtvsrd f6, r3
+; CHECK-P8-NEXT:    mffprwz r4, f7
+; CHECK-P8-NEXT:    mtfprd f6, r3
 ; CHECK-P8-NEXT:    xxswapd v3, vs5
-; CHECK-P8-NEXT:    mfvsrwz r3, f0
-; CHECK-P8-NEXT:    mtvsrd f7, r4
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    mtfprd f7, r4
 ; CHECK-P8-NEXT:    xxswapd v4, vs6
-; CHECK-P8-NEXT:    mfvsrwz r4, f1
-; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    xxswapd v1, vs7
-; CHECK-P8-NEXT:    mfvsrwz r3, f2
-; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    mffprwz r3, f2
+; CHECK-P8-NEXT:    mtfprd f1, r4
 ; CHECK-P8-NEXT:    xxswapd v5, vs0
-; CHECK-P8-NEXT:    mfvsrwz r4, f3
-; CHECK-P8-NEXT:    mtvsrd f2, r3
+; CHECK-P8-NEXT:    mffprwz r4, f3
+; CHECK-P8-NEXT:    mtfprd f2, r3
 ; CHECK-P8-NEXT:    xxswapd v0, vs1
-; CHECK-P8-NEXT:    mtvsrd f0, r4
+; CHECK-P8-NEXT:    mtfprd f0, r4
 ; CHECK-P8-NEXT:    xxswapd v6, vs2
 ; CHECK-P8-NEXT:    vmrglh v2, v5, v2
 ; CHECK-P8-NEXT:    xxswapd v5, vs0
@@ -843,40 +843,40 @@ define <8 x i16> @test8elt_signed(<8 x double>* nocapture readonly) local_unname
 ; CHECK-P9-NEXT:    lxv vs0, 48(r3)
 ; CHECK-P9-NEXT:    lxv vs1, 32(r3)
 ; CHECK-P9-NEXT:    lxv vs2, 16(r3)
-; CHECK-P9-NEXT:    mfvsrwz r3, f4
-; CHECK-P9-NEXT:    mtvsrd f4, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f3
+; CHECK-P9-NEXT:    mffprwz r3, f4
+; CHECK-P9-NEXT:    mtfprd f4, r3
+; CHECK-P9-NEXT:    mffprwz r3, f3
 ; CHECK-P9-NEXT:    xxswapd v2, vs4
-; CHECK-P9-NEXT:    mtvsrd f3, r3
+; CHECK-P9-NEXT:    mtfprd f3, r3
 ; CHECK-P9-NEXT:    xxswapd v3, vs3
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f2
 ; CHECK-P9-NEXT:    xxswapd vs2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    mfvsrwz r3, f3
-; CHECK-P9-NEXT:    mtvsrd f3, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f3
+; CHECK-P9-NEXT:    mtfprd f3, r3
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtfprd f2, r3
 ; CHECK-P9-NEXT:    xxswapd v4, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f1
 ; CHECK-P9-NEXT:    xxswapd vs1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtfprd f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
 ; CHECK-P9-NEXT:    vmrglh v2, v2, v3
 ; CHECK-P9-NEXT:    xxswapd v3, vs3
 ; CHECK-P9-NEXT:    vmrglh v3, v3, v4
 ; CHECK-P9-NEXT:    vmrglw v2, v3, v2
 ; CHECK-P9-NEXT:    xxswapd v3, vs2
-; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mtfprd f1, r3
 ; CHECK-P9-NEXT:    xxswapd v4, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f0
 ; CHECK-P9-NEXT:    xxswapd vs0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    mtfprd f1, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    vmrglh v3, v3, v4
 ; CHECK-P9-NEXT:    xxswapd v4, vs1
 ; CHECK-P9-NEXT:    xxswapd v5, vs0
@@ -894,41 +894,41 @@ define <8 x i16> @test8elt_signed(<8 x double>* nocapture readonly) local_unname
 ; CHECK-BE-NEXT:    lxv vs2, 32(r3)
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
-; CHECK-BE-NEXT:    mfvsrwz r3, f4
+; CHECK-BE-NEXT:    mffprwz r3, f4
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs2
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    vmrghh v2, v2, v3
-; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    vmrghh v3, v3, v4
-; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    vmrghw v2, v3, v2
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    vmrghh v3, v3, v4
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    mtvsrd v5, r3
 ; CHECK-BE-NEXT:    vmrghh v4, v4, v5
@@ -973,63 +973,63 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1
 ; CHECK-P8-NEXT:    xxswapd vs7, vs7
 ; CHECK-P8-NEXT:    xscvdpsxws v2, f9
 ; CHECK-P8-NEXT:    xxswapd vs9, vs9
-; CHECK-P8-NEXT:    mfvsrwz r4, f4
+; CHECK-P8-NEXT:    mffprwz r4, f4
 ; CHECK-P8-NEXT:    xscvdpsxws v3, f11
 ; CHECK-P8-NEXT:    xxswapd vs11, vs11
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mfvsrwz r6, f6
-; CHECK-P8-NEXT:    mtvsrd f4, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, f8
+; CHECK-P8-NEXT:    mffprwz r6, f6
+; CHECK-P8-NEXT:    mtfprd f4, r4
+; CHECK-P8-NEXT:    mffprwz r4, f8
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P8-NEXT:    xxswapd v4, vs4
 ; CHECK-P8-NEXT:    xscvdpsxws f2, f2
-; CHECK-P8-NEXT:    mtvsrd f6, r6
-; CHECK-P8-NEXT:    mfvsrwz r6, f10
-; CHECK-P8-NEXT:    mtvsrd f8, r4
+; CHECK-P8-NEXT:    mtfprd f6, r6
+; CHECK-P8-NEXT:    mffprwz r6, f10
+; CHECK-P8-NEXT:    mtfprd f8, r4
 ; CHECK-P8-NEXT:    xxswapd v5, vs6
-; CHECK-P8-NEXT:    mfvsrwz r4, f12
+; CHECK-P8-NEXT:    mffprwz r4, f12
 ; CHECK-P8-NEXT:    xscvdpsxws f5, f5
 ; CHECK-P8-NEXT:    xxswapd v0, vs8
-; CHECK-P8-NEXT:    mtvsrd f10, r6
-; CHECK-P8-NEXT:    mfvsrwz r6, f13
-; CHECK-P8-NEXT:    mtvsrd f12, r4
+; CHECK-P8-NEXT:    mtfprd f10, r6
+; CHECK-P8-NEXT:    mffprwz r6, f13
+; CHECK-P8-NEXT:    mtfprd f12, r4
 ; CHECK-P8-NEXT:    xxswapd v1, vs10
 ; CHECK-P8-NEXT:    mfvsrwz r4, v2
 ; CHECK-P8-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P8-NEXT:    xxswapd v6, vs12
 ; CHECK-P8-NEXT:    xscvdpsxws f9, f9
-; CHECK-P8-NEXT:    mtvsrd f13, r6
+; CHECK-P8-NEXT:    mtfprd f13, r6
 ; CHECK-P8-NEXT:    mfvsrwz r6, v3
 ; CHECK-P8-NEXT:    mtvsrd v2, r4
 ; CHECK-P8-NEXT:    xxswapd v7, vs13
-; CHECK-P8-NEXT:    mfvsrwz r4, f0
+; CHECK-P8-NEXT:    mffprwz r4, f0
 ; CHECK-P8-NEXT:    xscvdpsxws f7, f7
 ; CHECK-P8-NEXT:    xxswapd v2, v2
 ; CHECK-P8-NEXT:    xscvdpsxws f11, f11
 ; CHECK-P8-NEXT:    mtvsrd v3, r6
-; CHECK-P8-NEXT:    mfvsrwz r6, f1
-; CHECK-P8-NEXT:    mtvsrd f0, r4
+; CHECK-P8-NEXT:    mffprwz r6, f1
+; CHECK-P8-NEXT:    mtfprd f0, r4
 ; CHECK-P8-NEXT:    xxswapd v3, v3
-; CHECK-P8-NEXT:    mfvsrwz r4, f2
-; CHECK-P8-NEXT:    mtvsrd f1, r6
+; CHECK-P8-NEXT:    mffprwz r4, f2
+; CHECK-P8-NEXT:    mtfprd f1, r6
 ; CHECK-P8-NEXT:    xxswapd v8, vs0
-; CHECK-P8-NEXT:    mtvsrd f2, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, f5
+; CHECK-P8-NEXT:    mtfprd f2, r4
+; CHECK-P8-NEXT:    mffprwz r4, f5
 ; CHECK-P8-NEXT:    xxswapd v9, vs1
-; CHECK-P8-NEXT:    mfvsrwz r6, f3
+; CHECK-P8-NEXT:    mffprwz r6, f3
 ; CHECK-P8-NEXT:    xxswapd v10, vs2
-; CHECK-P8-NEXT:    mtvsrd f5, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, f9
-; CHECK-P8-NEXT:    mtvsrd f3, r6
-; CHECK-P8-NEXT:    mfvsrwz r6, f7
-; CHECK-P8-NEXT:    mtvsrd f9, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, f11
+; CHECK-P8-NEXT:    mtfprd f5, r4
+; CHECK-P8-NEXT:    mffprwz r4, f9
+; CHECK-P8-NEXT:    mtfprd f3, r6
+; CHECK-P8-NEXT:    mffprwz r6, f7
+; CHECK-P8-NEXT:    mtfprd f9, r4
+; CHECK-P8-NEXT:    mffprwz r4, f11
 ; CHECK-P8-NEXT:    vmrglh v4, v8, v4
 ; CHECK-P8-NEXT:    xxswapd v8, vs3
 ; CHECK-P8-NEXT:    vmrglh v5, v9, v5
 ; CHECK-P8-NEXT:    xxswapd v9, vs5
-; CHECK-P8-NEXT:    mtvsrd f7, r6
-; CHECK-P8-NEXT:    mtvsrd f0, r4
+; CHECK-P8-NEXT:    mtfprd f7, r6
+; CHECK-P8-NEXT:    mtfprd f0, r4
 ; CHECK-P8-NEXT:    vmrglh v0, v10, v0
 ; CHECK-P8-NEXT:    xxswapd v10, vs7
 ; CHECK-P8-NEXT:    vmrglh v1, v8, v1
@@ -1062,32 +1062,32 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1
 ; CHECK-P9-NEXT:    xscvdpsxws f8, f1
 ; CHECK-P9-NEXT:    xxswapd vs4, vs4
 ; CHECK-P9-NEXT:    xscvdpsxws f4, f4
-; CHECK-P9-NEXT:    mfvsrwz r5, f5
+; CHECK-P9-NEXT:    mffprwz r5, f5
 ; CHECK-P9-NEXT:    xscvdpsxws f9, f0
 ; CHECK-P9-NEXT:    xxswapd vs3, vs3
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    mtvsrd f5, r5
-; CHECK-P9-NEXT:    mfvsrwz r5, f6
+; CHECK-P9-NEXT:    mtfprd f5, r5
+; CHECK-P9-NEXT:    mffprwz r5, f6
 ; CHECK-P9-NEXT:    xxswapd vs2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    mtvsrd f6, r5
-; CHECK-P9-NEXT:    mfvsrwz r5, f7
-; CHECK-P9-NEXT:    mtvsrd f7, r5
-; CHECK-P9-NEXT:    mfvsrwz r5, f8
-; CHECK-P9-NEXT:    mtvsrd f8, r5
-; CHECK-P9-NEXT:    mfvsrwz r5, f9
-; CHECK-P9-NEXT:    mtvsrd f9, r5
-; CHECK-P9-NEXT:    mfvsrwz r5, f4
-; CHECK-P9-NEXT:    mtvsrd f4, r5
-; CHECK-P9-NEXT:    mfvsrwz r5, f3
+; CHECK-P9-NEXT:    mtfprd f6, r5
+; CHECK-P9-NEXT:    mffprwz r5, f7
+; CHECK-P9-NEXT:    mtfprd f7, r5
+; CHECK-P9-NEXT:    mffprwz r5, f8
+; CHECK-P9-NEXT:    mtfprd f8, r5
+; CHECK-P9-NEXT:    mffprwz r5, f9
+; CHECK-P9-NEXT:    mtfprd f9, r5
+; CHECK-P9-NEXT:    mffprwz r5, f4
+; CHECK-P9-NEXT:    mtfprd f4, r5
+; CHECK-P9-NEXT:    mffprwz r5, f3
 ; CHECK-P9-NEXT:    xxswapd vs1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    xxswapd v2, vs5
 ; CHECK-P9-NEXT:    xxswapd v5, vs8
 ; CHECK-P9-NEXT:    xxswapd v0, vs9
-; CHECK-P9-NEXT:    mtvsrd f3, r5
-; CHECK-P9-NEXT:    mfvsrwz r5, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r5
+; CHECK-P9-NEXT:    mtfprd f3, r5
+; CHECK-P9-NEXT:    mffprwz r5, f2
+; CHECK-P9-NEXT:    mtfprd f2, r5
 ; CHECK-P9-NEXT:    xxswapd vs0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P9-NEXT:    xxswapd v1, vs2
@@ -1098,49 +1098,49 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1
 ; CHECK-P9-NEXT:    xxswapd v4, vs3
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f2
 ; CHECK-P9-NEXT:    xxswapd vs2, vs2
-; CHECK-P9-NEXT:    mfvsrwz r5, f1
+; CHECK-P9-NEXT:    mffprwz r5, f1
 ; CHECK-P9-NEXT:    vmrglh v3, v3, v4
 ; CHECK-P9-NEXT:    xxswapd v4, vs7
-; CHECK-P9-NEXT:    mtvsrd f1, r5
-; CHECK-P9-NEXT:    mfvsrwz r5, f0
+; CHECK-P9-NEXT:    mtfprd f1, r5
+; CHECK-P9-NEXT:    mffprwz r5, f0
 ; CHECK-P9-NEXT:    vmrglh v4, v4, v1
 ; CHECK-P9-NEXT:    xxswapd v1, vs1
-; CHECK-P9-NEXT:    mtvsrd f0, r5
+; CHECK-P9-NEXT:    mtfprd f0, r5
 ; CHECK-P9-NEXT:    vmrglh v5, v5, v1
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P9-NEXT:    xxswapd v1, vs0
 ; CHECK-P9-NEXT:    lxv vs0, 112(r4)
 ; CHECK-P9-NEXT:    lxv vs1, 96(r4)
-; CHECK-P9-NEXT:    mfvsrwz r4, f3
-; CHECK-P9-NEXT:    mtvsrd f3, r4
-; CHECK-P9-NEXT:    mfvsrwz r4, f2
+; CHECK-P9-NEXT:    mffprwz r4, f3
+; CHECK-P9-NEXT:    mtfprd f3, r4
+; CHECK-P9-NEXT:    mffprwz r4, f2
 ; CHECK-P9-NEXT:    vmrglw v2, v3, v2
 ; CHECK-P9-NEXT:    vmrglw v3, v5, v4
 ; CHECK-P9-NEXT:    xxmrgld vs4, v3, v2
 ; CHECK-P9-NEXT:    xxswapd v2, vs3
 ; CHECK-P9-NEXT:    vmrglh v0, v0, v1
-; CHECK-P9-NEXT:    mtvsrd f2, r4
+; CHECK-P9-NEXT:    mtfprd f2, r4
 ; CHECK-P9-NEXT:    xxswapd v3, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f1
 ; CHECK-P9-NEXT:    xxswapd vs1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    mfvsrwz r4, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r4
-; CHECK-P9-NEXT:    mfvsrwz r4, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r4
+; CHECK-P9-NEXT:    mffprwz r4, f2
+; CHECK-P9-NEXT:    mtfprd f2, r4
+; CHECK-P9-NEXT:    mffprwz r4, f1
+; CHECK-P9-NEXT:    mtfprd f1, r4
 ; CHECK-P9-NEXT:    xxswapd v4, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f0
 ; CHECK-P9-NEXT:    xxswapd vs0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r4, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r4
-; CHECK-P9-NEXT:    mfvsrwz r4, f0
+; CHECK-P9-NEXT:    mffprwz r4, f1
+; CHECK-P9-NEXT:    mtfprd f1, r4
+; CHECK-P9-NEXT:    mffprwz r4, f0
 ; CHECK-P9-NEXT:    vmrglh v2, v2, v3
 ; CHECK-P9-NEXT:    xxswapd v3, vs2
 ; CHECK-P9-NEXT:    vmrglh v3, v3, v4
 ; CHECK-P9-NEXT:    xxswapd v4, vs1
 ; CHECK-P9-NEXT:    vmrglw v2, v2, v0
-; CHECK-P9-NEXT:    mtvsrd f0, r4
+; CHECK-P9-NEXT:    mtfprd f0, r4
 ; CHECK-P9-NEXT:    xxswapd v5, vs0
 ; CHECK-P9-NEXT:    vmrglh v4, v4, v5
 ; CHECK-P9-NEXT:    vmrglw v3, v4, v3
@@ -1158,7 +1158,7 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1
 ; CHECK-BE-NEXT:    xscvdpsxws f6, f3
 ; CHECK-BE-NEXT:    xxswapd vs3, vs3
 ; CHECK-BE-NEXT:    xscvdpsxws f4, f4
-; CHECK-BE-NEXT:    mfvsrwz r5, f5
+; CHECK-BE-NEXT:    mffprwz r5, f5
 ; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    lxv vs2, 16(r4)
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
@@ -1166,40 +1166,40 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1
 ; CHECK-BE-NEXT:    xxswapd vs2, vs2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    mtvsrd v2, r5
-; CHECK-BE-NEXT:    mfvsrwz r5, f4
+; CHECK-BE-NEXT:    mffprwz r5, f4
 ; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    lxv vs1, 0(r4)
 ; CHECK-BE-NEXT:    xscvdpsxws f4, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    mtvsrd v3, r5
-; CHECK-BE-NEXT:    mfvsrwz r5, f6
+; CHECK-BE-NEXT:    mffprwz r5, f6
 ; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    lxv vs0, 112(r4)
 ; CHECK-BE-NEXT:    vmrghh v2, v2, v3
 ; CHECK-BE-NEXT:    mtvsrd v3, r5
-; CHECK-BE-NEXT:    mfvsrwz r5, f3
+; CHECK-BE-NEXT:    mffprwz r5, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
 ; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mtvsrd v4, r5
-; CHECK-BE-NEXT:    mfvsrwz r5, f7
+; CHECK-BE-NEXT:    mffprwz r5, f7
 ; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    vmrghh v3, v3, v4
 ; CHECK-BE-NEXT:    mtvsrd v4, r5
-; CHECK-BE-NEXT:    mfvsrwz r5, f4
+; CHECK-BE-NEXT:    mffprwz r5, f4
 ; CHECK-BE-NEXT:    vmrghw v2, v3, v2
 ; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    mtvsrd v5, r5
-; CHECK-BE-NEXT:    mfvsrwz r5, f3
+; CHECK-BE-NEXT:    mffprwz r5, f3
 ; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    mtvsrd v0, r5
-; CHECK-BE-NEXT:    mfvsrwz r5, f2
+; CHECK-BE-NEXT:    mffprwz r5, f2
 ; CHECK-BE-NEXT:    lxv vs2, 96(r4)
 ; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    mtvsrd v1, r5
-; CHECK-BE-NEXT:    mfvsrwz r5, f1
+; CHECK-BE-NEXT:    mffprwz r5, f1
 ; CHECK-BE-NEXT:    lxv vs1, 80(r4)
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs2
@@ -1208,34 +1208,34 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1
 ; CHECK-BE-NEXT:    mtvsrd v1, r5
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    vmrghh v5, v5, v1
-; CHECK-BE-NEXT:    mfvsrwz r5, f0
+; CHECK-BE-NEXT:    mffprwz r5, f0
 ; CHECK-BE-NEXT:    lxv vs0, 64(r4)
-; CHECK-BE-NEXT:    mfvsrwz r4, f3
+; CHECK-BE-NEXT:    mffprwz r4, f3
 ; CHECK-BE-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-NEXT:    vmrghw v3, v5, v4
 ; CHECK-BE-NEXT:    xxmrghd vs3, v3, v2
 ; CHECK-BE-NEXT:    mtvsrd v2, r4
-; CHECK-BE-NEXT:    mfvsrwz r4, f2
+; CHECK-BE-NEXT:    mffprwz r4, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
 ; CHECK-BE-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    mtvsrd v3, r4
 ; CHECK-BE-NEXT:    vmrghh v2, v2, v3
-; CHECK-BE-NEXT:    mfvsrwz r4, f2
+; CHECK-BE-NEXT:    mffprwz r4, f2
 ; CHECK-BE-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-NEXT:    mtvsrd v3, r4
-; CHECK-BE-NEXT:    mfvsrwz r4, f1
+; CHECK-BE-NEXT:    mffprwz r4, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
 ; CHECK-BE-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mtvsrd v4, r4
 ; CHECK-BE-NEXT:    vmrghh v3, v3, v4
-; CHECK-BE-NEXT:    mfvsrwz r4, f1
+; CHECK-BE-NEXT:    mffprwz r4, f1
 ; CHECK-BE-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-NEXT:    mtvsrd v4, r4
-; CHECK-BE-NEXT:    mfvsrwz r4, f0
+; CHECK-BE-NEXT:    mffprwz r4, f0
 ; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    mtvsrd v1, r5
 ; CHECK-BE-NEXT:    vmrghh v0, v0, v1
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
index 73ae5174ecbf..369fb3f10100 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
@@ -15,15 +15,15 @@ define i64 @test2elt(<2 x double> %a) local_unnamed_addr #0 {
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    xscvdpuxws f1, v2
 ; CHECK-P8-NEXT:    xscvdpuxws f0, f0
-; CHECK-P8-NEXT:    mfvsrwz r3, f1
-; CHECK-P8-NEXT:    mfvsrwz r4, f0
-; CHECK-P8-NEXT:    mtvsrd f0, r3
-; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    mffprwz r3, f1
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    mtfprd f0, r3
+; CHECK-P8-NEXT:    mtfprd f1, r4
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    vmrglw v2, v2, v3
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test2elt:
@@ -309,15 +309,15 @@ define i64 @test2elt_signed(<2 x double> %a) local_unnamed_addr #0 {
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    xscvdpsxws f1, v2
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mfvsrwz r3, f1
-; CHECK-P8-NEXT:    mfvsrwz r4, f0
-; CHECK-P8-NEXT:    mtvsrd f0, r3
-; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    mffprwz r3, f1
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    mtfprd f0, r3
+; CHECK-P8-NEXT:    mtfprd f1, r4
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    vmrglw v2, v2, v3
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test2elt_signed:
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll
index 5e4751e554e1..fb13d1bd71f5 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll
@@ -15,15 +15,15 @@ define i16 @test2elt(<2 x double> %a) local_unnamed_addr #0 {
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    xscvdpsxws f1, v2
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mfvsrwz r3, f1
-; CHECK-P8-NEXT:    mfvsrwz r4, f0
-; CHECK-P8-NEXT:    mtvsrd f0, r3
-; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    mffprwz r3, f1
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    mtfprd f0, r3
+; CHECK-P8-NEXT:    mtfprd f1, r4
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    vmrglb v2, v2, v3
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    clrldi r3, r3, 48
 ; CHECK-P8-NEXT:    sth r3, -2(r1)
 ; CHECK-P8-NEXT:    lhz r3, -2(r1)
@@ -32,13 +32,13 @@ define i16 @test2elt(<2 x double> %a) local_unnamed_addr #0 {
 ; CHECK-P9-LABEL: test2elt:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    xscvdpsxws f0, v2
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    xxswapd v3, vs0
 ; CHECK-P9-NEXT:    xxswapd vs0, v2
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    addi r3, r1, -2
 ; CHECK-P9-NEXT:    xxswapd v2, vs0
 ; CHECK-P9-NEXT:    vmrglb v2, v3, v2
@@ -50,12 +50,12 @@ define i16 @test2elt(<2 x double> %a) local_unnamed_addr #0 {
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xscvdpsxws f0, v2
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, v2
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
 ; CHECK-BE-NEXT:    addi r3, r1, -2
@@ -82,23 +82,23 @@ define i32 @test4elt(<4 x double>* nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-P8-NEXT:    xxswapd vs1, vs1
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mfvsrwz r3, f2
-; CHECK-P8-NEXT:    mfvsrwz r4, f3
-; CHECK-P8-NEXT:    mtvsrd f2, r3
-; CHECK-P8-NEXT:    mtvsrd f3, r4
-; CHECK-P8-NEXT:    mfvsrwz r3, f0
+; CHECK-P8-NEXT:    mffprwz r3, f2
+; CHECK-P8-NEXT:    mffprwz r4, f3
+; CHECK-P8-NEXT:    mtfprd f2, r3
+; CHECK-P8-NEXT:    mtfprd f3, r4
+; CHECK-P8-NEXT:    mffprwz r3, f0
 ; CHECK-P8-NEXT:    xxswapd v2, vs2
-; CHECK-P8-NEXT:    mfvsrwz r4, f1
+; CHECK-P8-NEXT:    mffprwz r4, f1
 ; CHECK-P8-NEXT:    xxswapd v4, vs3
-; CHECK-P8-NEXT:    mtvsrd f0, r3
-; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    mtfprd f0, r3
+; CHECK-P8-NEXT:    mtfprd f1, r4
 ; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    xxswapd v5, vs1
 ; CHECK-P8-NEXT:    vmrglb v2, v3, v2
 ; CHECK-P8-NEXT:    vmrglb v3, v5, v4
 ; CHECK-P8-NEXT:    vmrglh v2, v3, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    mfvsrwz r3, f0
+; CHECK-P8-NEXT:    mffprwz r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test4elt:
@@ -108,19 +108,19 @@ define i32 @test4elt(<4 x double>* nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-P9-NEXT:    xxswapd vs1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    lxv vs0, 16(r3)
-; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtfprd f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
 ; CHECK-P9-NEXT:    xxswapd v2, vs2
-; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mtfprd f1, r3
 ; CHECK-P9-NEXT:    xxswapd v3, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f0
 ; CHECK-P9-NEXT:    xxswapd vs0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    mtfprd f1, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    vmrglb v2, v2, v3
 ; CHECK-P9-NEXT:    xxswapd v3, vs1
 ; CHECK-P9-NEXT:    xxswapd v4, vs0
@@ -137,20 +137,20 @@ define i32 @test4elt(<4 x double>* nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
-; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    vmrghb v2, v2, v3
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    li r3, 0
@@ -185,30 +185,30 @@ define i64 @test8elt(<8 x double>* nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-P8-NEXT:    xxswapd vs3, vs3
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mfvsrwz r3, f4
+; CHECK-P8-NEXT:    mffprwz r3, f4
 ; CHECK-P8-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P8-NEXT:    xscvdpsxws f3, f3
-; CHECK-P8-NEXT:    mfvsrwz r4, f5
-; CHECK-P8-NEXT:    mtvsrd f4, r3
-; CHECK-P8-NEXT:    mfvsrwz r3, f6
-; CHECK-P8-NEXT:    mtvsrd f5, r4
+; CHECK-P8-NEXT:    mffprwz r4, f5
+; CHECK-P8-NEXT:    mtfprd f4, r3
+; CHECK-P8-NEXT:    mffprwz r3, f6
+; CHECK-P8-NEXT:    mtfprd f5, r4
 ; CHECK-P8-NEXT:    xxswapd v2, vs4
-; CHECK-P8-NEXT:    mfvsrwz r4, f7
-; CHECK-P8-NEXT:    mtvsrd f6, r3
+; CHECK-P8-NEXT:    mffprwz r4, f7
+; CHECK-P8-NEXT:    mtfprd f6, r3
 ; CHECK-P8-NEXT:    xxswapd v3, vs5
-; CHECK-P8-NEXT:    mfvsrwz r3, f0
-; CHECK-P8-NEXT:    mtvsrd f7, r4
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    mtfprd f7, r4
 ; CHECK-P8-NEXT:    xxswapd v4, vs6
-; CHECK-P8-NEXT:    mfvsrwz r4, f1
-; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    xxswapd v1, vs7
-; CHECK-P8-NEXT:    mfvsrwz r3, f2
-; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    mffprwz r3, f2
+; CHECK-P8-NEXT:    mtfprd f1, r4
 ; CHECK-P8-NEXT:    xxswapd v5, vs0
-; CHECK-P8-NEXT:    mfvsrwz r4, f3
-; CHECK-P8-NEXT:    mtvsrd f2, r3
+; CHECK-P8-NEXT:    mffprwz r4, f3
+; CHECK-P8-NEXT:    mtfprd f2, r3
 ; CHECK-P8-NEXT:    xxswapd v0, vs1
-; CHECK-P8-NEXT:    mtvsrd f0, r4
+; CHECK-P8-NEXT:    mtfprd f0, r4
 ; CHECK-P8-NEXT:    xxswapd v6, vs2
 ; CHECK-P8-NEXT:    vmrglb v2, v5, v2
 ; CHECK-P8-NEXT:    xxswapd v5, vs0
@@ -219,7 +219,7 @@ define i64 @test8elt(<8 x double>* nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-P8-NEXT:    vmrglh v3, v5, v4
 ; CHECK-P8-NEXT:    vmrglw v2, v3, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test8elt:
@@ -231,40 +231,40 @@ define i64 @test8elt(<8 x double>* nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-P9-NEXT:    lxv vs0, 48(r3)
 ; CHECK-P9-NEXT:    lxv vs1, 32(r3)
 ; CHECK-P9-NEXT:    lxv vs2, 16(r3)
-; CHECK-P9-NEXT:    mfvsrwz r3, f4
-; CHECK-P9-NEXT:    mtvsrd f4, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f3
+; CHECK-P9-NEXT:    mffprwz r3, f4
+; CHECK-P9-NEXT:    mtfprd f4, r3
+; CHECK-P9-NEXT:    mffprwz r3, f3
 ; CHECK-P9-NEXT:    xxswapd v2, vs4
-; CHECK-P9-NEXT:    mtvsrd f3, r3
+; CHECK-P9-NEXT:    mtfprd f3, r3
 ; CHECK-P9-NEXT:    xxswapd v3, vs3
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f2
 ; CHECK-P9-NEXT:    xxswapd vs2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    mfvsrwz r3, f3
-; CHECK-P9-NEXT:    mtvsrd f3, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f3
+; CHECK-P9-NEXT:    mtfprd f3, r3
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtfprd f2, r3
 ; CHECK-P9-NEXT:    xxswapd v4, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f1
 ; CHECK-P9-NEXT:    xxswapd vs1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtfprd f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
 ; CHECK-P9-NEXT:    vmrglb v2, v2, v3
 ; CHECK-P9-NEXT:    xxswapd v3, vs3
 ; CHECK-P9-NEXT:    vmrglb v3, v3, v4
 ; CHECK-P9-NEXT:    vmrglh v2, v3, v2
 ; CHECK-P9-NEXT:    xxswapd v3, vs2
-; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mtfprd f1, r3
 ; CHECK-P9-NEXT:    xxswapd v4, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f0
 ; CHECK-P9-NEXT:    xxswapd vs0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    mtfprd f1, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    vmrglb v3, v3, v4
 ; CHECK-P9-NEXT:    xxswapd v4, vs1
 ; CHECK-P9-NEXT:    xxswapd v5, vs0
@@ -283,41 +283,41 @@ define i64 @test8elt(<8 x double>* nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-BE-NEXT:    lxv vs2, 32(r3)
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
-; CHECK-BE-NEXT:    mfvsrwz r3, f4
+; CHECK-BE-NEXT:    mffprwz r3, f4
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs2
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    vmrghb v2, v2, v3
-; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    vmrghb v3, v3, v4
-; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    vmrghh v2, v3, v2
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    vmrghb v3, v3, v4
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v5, r3
 ; CHECK-BE-NEXT:    vmrghb v4, v4, v5
@@ -364,63 +364,63 @@ define <16 x i8> @test16elt(<16 x double>* nocapture readonly) local_unnamed_add
 ; CHECK-P8-NEXT:    xxswapd vs7, vs7
 ; CHECK-P8-NEXT:    xscvdpsxws v2, f9
 ; CHECK-P8-NEXT:    xxswapd vs9, vs9
-; CHECK-P8-NEXT:    mfvsrwz r3, f4
+; CHECK-P8-NEXT:    mffprwz r3, f4
 ; CHECK-P8-NEXT:    xscvdpsxws v3, f11
 ; CHECK-P8-NEXT:    xxswapd vs11, vs11
-; CHECK-P8-NEXT:    mfvsrwz r4, f6
+; CHECK-P8-NEXT:    mffprwz r4, f6
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mtvsrd f4, r3
-; CHECK-P8-NEXT:    mfvsrwz r3, f8
+; CHECK-P8-NEXT:    mtfprd f4, r3
+; CHECK-P8-NEXT:    mffprwz r3, f8
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P8-NEXT:    xxswapd v4, vs4
-; CHECK-P8-NEXT:    mtvsrd f6, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, f10
+; CHECK-P8-NEXT:    mtfprd f6, r4
+; CHECK-P8-NEXT:    mffprwz r4, f10
 ; CHECK-P8-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P8-NEXT:    xxswapd v5, vs6
-; CHECK-P8-NEXT:    mtvsrd f8, r3
-; CHECK-P8-NEXT:    mfvsrwz r3, f12
+; CHECK-P8-NEXT:    mtfprd f8, r3
+; CHECK-P8-NEXT:    mffprwz r3, f12
 ; CHECK-P8-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P8-NEXT:    xxswapd v0, vs8
-; CHECK-P8-NEXT:    mtvsrd f10, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, f13
+; CHECK-P8-NEXT:    mtfprd f10, r4
+; CHECK-P8-NEXT:    mffprwz r4, f13
 ; CHECK-P8-NEXT:    xscvdpsxws f5, f5
 ; CHECK-P8-NEXT:    xxswapd v1, vs10
-; CHECK-P8-NEXT:    mtvsrd f12, r3
+; CHECK-P8-NEXT:    mtfprd f12, r3
 ; CHECK-P8-NEXT:    mfvsrwz r3, v2
 ; CHECK-P8-NEXT:    xscvdpsxws f7, f7
 ; CHECK-P8-NEXT:    xxswapd v6, vs12
-; CHECK-P8-NEXT:    mtvsrd f13, r4
+; CHECK-P8-NEXT:    mtfprd f13, r4
 ; CHECK-P8-NEXT:    mfvsrwz r4, v3
 ; CHECK-P8-NEXT:    mtvsrd v2, r3
 ; CHECK-P8-NEXT:    xxswapd v7, vs13
-; CHECK-P8-NEXT:    mfvsrwz r3, f0
+; CHECK-P8-NEXT:    mffprwz r3, f0
 ; CHECK-P8-NEXT:    xscvdpsxws f9, f9
 ; CHECK-P8-NEXT:    xxswapd v2, v2
 ; CHECK-P8-NEXT:    xscvdpsxws f11, f11
 ; CHECK-P8-NEXT:    mtvsrd v3, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, f1
-; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    xxswapd v3, v3
-; CHECK-P8-NEXT:    mfvsrwz r3, f2
-; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    mffprwz r3, f2
+; CHECK-P8-NEXT:    mtfprd f1, r4
 ; CHECK-P8-NEXT:    xxswapd v8, vs0
-; CHECK-P8-NEXT:    mfvsrwz r4, f3
-; CHECK-P8-NEXT:    mtvsrd f2, r3
+; CHECK-P8-NEXT:    mffprwz r4, f3
+; CHECK-P8-NEXT:    mtfprd f2, r3
 ; CHECK-P8-NEXT:    xxswapd v9, vs1
-; CHECK-P8-NEXT:    mfvsrwz r3, f5
-; CHECK-P8-NEXT:    mtvsrd f3, r4
+; CHECK-P8-NEXT:    mffprwz r3, f5
+; CHECK-P8-NEXT:    mtfprd f3, r4
 ; CHECK-P8-NEXT:    xxswapd v10, vs2
-; CHECK-P8-NEXT:    mfvsrwz r4, f7
-; CHECK-P8-NEXT:    mtvsrd f5, r3
-; CHECK-P8-NEXT:    mfvsrwz r3, f9
-; CHECK-P8-NEXT:    mtvsrd f7, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, f11
+; CHECK-P8-NEXT:    mffprwz r4, f7
+; CHECK-P8-NEXT:    mtfprd f5, r3
+; CHECK-P8-NEXT:    mffprwz r3, f9
+; CHECK-P8-NEXT:    mtfprd f7, r4
+; CHECK-P8-NEXT:    mffprwz r4, f11
 ; CHECK-P8-NEXT:    vmrglb v4, v8, v4
 ; CHECK-P8-NEXT:    xxswapd v8, vs3
 ; CHECK-P8-NEXT:    vmrglb v5, v9, v5
 ; CHECK-P8-NEXT:    xxswapd v9, vs5
-; CHECK-P8-NEXT:    mtvsrd f0, r3
-; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    mtfprd f0, r3
+; CHECK-P8-NEXT:    mtfprd f1, r4
 ; CHECK-P8-NEXT:    vmrglb v0, v10, v0
 ; CHECK-P8-NEXT:    xxswapd v10, vs7
 ; CHECK-P8-NEXT:    vmrglb v1, v8, v1
@@ -452,40 +452,40 @@ define <16 x i8> @test16elt(<16 x double>* nocapture readonly) local_unnamed_add
 ; CHECK-P9-NEXT:    lxv vs4, 48(r3)
 ; CHECK-P9-NEXT:    lxv vs5, 32(r3)
 ; CHECK-P9-NEXT:    lxv vs6, 16(r3)
-; CHECK-P9-NEXT:    mfvsrwz r3, f8
-; CHECK-P9-NEXT:    mtvsrd f8, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f7
+; CHECK-P9-NEXT:    mffprwz r3, f8
+; CHECK-P9-NEXT:    mtfprd f8, r3
+; CHECK-P9-NEXT:    mffprwz r3, f7
 ; CHECK-P9-NEXT:    xxswapd v2, vs8
-; CHECK-P9-NEXT:    mtvsrd f7, r3
+; CHECK-P9-NEXT:    mtfprd f7, r3
 ; CHECK-P9-NEXT:    xxswapd v3, vs7
 ; CHECK-P9-NEXT:    xscvdpsxws f7, f6
 ; CHECK-P9-NEXT:    xxswapd vs6, vs6
 ; CHECK-P9-NEXT:    xscvdpsxws f6, f6
-; CHECK-P9-NEXT:    mfvsrwz r3, f7
-; CHECK-P9-NEXT:    mtvsrd f7, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f6
-; CHECK-P9-NEXT:    mtvsrd f6, r3
+; CHECK-P9-NEXT:    mffprwz r3, f7
+; CHECK-P9-NEXT:    mtfprd f7, r3
+; CHECK-P9-NEXT:    mffprwz r3, f6
+; CHECK-P9-NEXT:    mtfprd f6, r3
 ; CHECK-P9-NEXT:    xxswapd v4, vs6
 ; CHECK-P9-NEXT:    xscvdpsxws f6, f5
 ; CHECK-P9-NEXT:    xxswapd vs5, vs5
 ; CHECK-P9-NEXT:    xscvdpsxws f5, f5
-; CHECK-P9-NEXT:    mfvsrwz r3, f6
-; CHECK-P9-NEXT:    mtvsrd f6, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f5
+; CHECK-P9-NEXT:    mffprwz r3, f6
+; CHECK-P9-NEXT:    mtfprd f6, r3
+; CHECK-P9-NEXT:    mffprwz r3, f5
 ; CHECK-P9-NEXT:    vmrglb v2, v2, v3
 ; CHECK-P9-NEXT:    xxswapd v3, vs7
 ; CHECK-P9-NEXT:    vmrglb v3, v3, v4
 ; CHECK-P9-NEXT:    vmrglh v2, v3, v2
 ; CHECK-P9-NEXT:    xxswapd v3, vs6
-; CHECK-P9-NEXT:    mtvsrd f5, r3
+; CHECK-P9-NEXT:    mtfprd f5, r3
 ; CHECK-P9-NEXT:    xxswapd v4, vs5
 ; CHECK-P9-NEXT:    xscvdpsxws f5, f4
 ; CHECK-P9-NEXT:    xxswapd vs4, vs4
 ; CHECK-P9-NEXT:    xscvdpsxws f4, f4
-; CHECK-P9-NEXT:    mfvsrwz r3, f5
-; CHECK-P9-NEXT:    mtvsrd f5, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f4
-; CHECK-P9-NEXT:    mtvsrd f4, r3
+; CHECK-P9-NEXT:    mffprwz r3, f5
+; CHECK-P9-NEXT:    mtfprd f5, r3
+; CHECK-P9-NEXT:    mffprwz r3, f4
+; CHECK-P9-NEXT:    mtfprd f4, r3
 ; CHECK-P9-NEXT:    xxswapd v5, vs4
 ; CHECK-P9-NEXT:    xscvdpsxws f4, f3
 ; CHECK-P9-NEXT:    xxswapd vs3, vs3
@@ -494,18 +494,18 @@ define <16 x i8> @test16elt(<16 x double>* nocapture readonly) local_unnamed_add
 ; CHECK-P9-NEXT:    xxswapd v4, vs5
 ; CHECK-P9-NEXT:    vmrglb v4, v4, v5
 ; CHECK-P9-NEXT:    vmrglh v3, v4, v3
-; CHECK-P9-NEXT:    mfvsrwz r3, f4
-; CHECK-P9-NEXT:    mtvsrd f4, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f3
-; CHECK-P9-NEXT:    mtvsrd f3, r3
+; CHECK-P9-NEXT:    mffprwz r3, f4
+; CHECK-P9-NEXT:    mtfprd f4, r3
+; CHECK-P9-NEXT:    mffprwz r3, f3
+; CHECK-P9-NEXT:    mtfprd f3, r3
 ; CHECK-P9-NEXT:    xxswapd v4, vs3
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f2
 ; CHECK-P9-NEXT:    xxswapd vs2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    mfvsrwz r3, f3
-; CHECK-P9-NEXT:    mtvsrd f3, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f3
+; CHECK-P9-NEXT:    mtfprd f3, r3
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtfprd f2, r3
 ; CHECK-P9-NEXT:    xxswapd v5, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f1
 ; CHECK-P9-NEXT:    xxswapd vs1, vs1
@@ -516,19 +516,19 @@ define <16 x i8> @test16elt(<16 x double>* nocapture readonly) local_unnamed_add
 ; CHECK-P9-NEXT:    xxswapd v4, vs3
 ; CHECK-P9-NEXT:    vmrglb v4, v4, v5
 ; CHECK-P9-NEXT:    vmrglh v3, v4, v3
-; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtfprd f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
 ; CHECK-P9-NEXT:    xxswapd v4, vs2
-; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mtfprd f1, r3
 ; CHECK-P9-NEXT:    xxswapd v5, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f0
 ; CHECK-P9-NEXT:    xxswapd vs0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    mtfprd f1, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    vmrglb v4, v4, v5
 ; CHECK-P9-NEXT:    xxswapd v5, vs1
 ; CHECK-P9-NEXT:    xxswapd v0, vs0
@@ -551,84 +551,84 @@ define <16 x i8> @test16elt(<16 x double>* nocapture readonly) local_unnamed_add
 ; CHECK-BE-NEXT:    lxv vs3, 48(r3)
 ; CHECK-BE-NEXT:    lxv vs4, 64(r3)
 ; CHECK-BE-NEXT:    lxv vs5, 80(r3)
-; CHECK-BE-NEXT:    mfvsrwz r3, f8
+; CHECK-BE-NEXT:    mffprwz r3, f8
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f7
+; CHECK-BE-NEXT:    mffprwz r3, f7
 ; CHECK-BE-NEXT:    xscvdpsxws f7, f6
 ; CHECK-BE-NEXT:    xxswapd vs6, vs6
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvdpsxws f6, f6
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    vmrghb v2, v2, v3
-; CHECK-BE-NEXT:    mfvsrwz r3, f7
+; CHECK-BE-NEXT:    mffprwz r3, f7
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f6
+; CHECK-BE-NEXT:    mffprwz r3, f6
 ; CHECK-BE-NEXT:    xscvdpsxws f6, f5
 ; CHECK-BE-NEXT:    xxswapd vs5, vs5
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvdpsxws f5, f5
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    vmrghb v3, v3, v4
-; CHECK-BE-NEXT:    mfvsrwz r3, f6
+; CHECK-BE-NEXT:    mffprwz r3, f6
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    vmrghh v2, v3, v2
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f5
+; CHECK-BE-NEXT:    mffprwz r3, f5
 ; CHECK-BE-NEXT:    xscvdpsxws f5, f4
 ; CHECK-BE-NEXT:    xxswapd vs4, vs4
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvdpsxws f4, f4
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    vmrghb v3, v3, v4
-; CHECK-BE-NEXT:    mfvsrwz r3, f5
+; CHECK-BE-NEXT:    mffprwz r3, f5
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f4
+; CHECK-BE-NEXT:    mffprwz r3, f4
 ; CHECK-BE-NEXT:    xscvdpsxws f4, f3
 ; CHECK-BE-NEXT:    xxswapd vs3, vs3
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    mtvsrd v5, r3
 ; CHECK-BE-NEXT:    vmrghb v4, v4, v5
-; CHECK-BE-NEXT:    mfvsrwz r3, f4
+; CHECK-BE-NEXT:    mffprwz r3, f4
 ; CHECK-BE-NEXT:    vmrghh v3, v4, v3
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    vmrghw v2, v3, v2
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs2
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    vmrghb v3, v3, v4
-; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    mtvsrd v5, r3
 ; CHECK-BE-NEXT:    vmrghb v4, v4, v5
-; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    vmrghh v3, v4, v3
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mtvsrd v5, r3
 ; CHECK-BE-NEXT:    vmrghb v4, v4, v5
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v5, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v0, r3
 ; CHECK-BE-NEXT:    vmrghb v5, v5, v0
@@ -648,15 +648,15 @@ define i16 @test2elt_signed(<2 x double> %a) local_unnamed_addr #0 {
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
 ; CHECK-P8-NEXT:    xscvdpsxws f1, v2
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mfvsrwz r3, f1
-; CHECK-P8-NEXT:    mfvsrwz r4, f0
-; CHECK-P8-NEXT:    mtvsrd f0, r3
-; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    mffprwz r3, f1
+; CHECK-P8-NEXT:    mffprwz r4, f0
+; CHECK-P8-NEXT:    mtfprd f0, r3
+; CHECK-P8-NEXT:    mtfprd f1, r4
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xxswapd v3, vs1
 ; CHECK-P8-NEXT:    vmrglb v2, v2, v3
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    clrldi r3, r3, 48
 ; CHECK-P8-NEXT:    sth r3, -2(r1)
 ; CHECK-P8-NEXT:    lhz r3, -2(r1)
@@ -665,13 +665,13 @@ define i16 @test2elt_signed(<2 x double> %a) local_unnamed_addr #0 {
 ; CHECK-P9-LABEL: test2elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    xscvdpsxws f0, v2
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    xxswapd v3, vs0
 ; CHECK-P9-NEXT:    xxswapd vs0, v2
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    addi r3, r1, -2
 ; CHECK-P9-NEXT:    xxswapd v2, vs0
 ; CHECK-P9-NEXT:    vmrglb v2, v3, v2
@@ -683,12 +683,12 @@ define i16 @test2elt_signed(<2 x double> %a) local_unnamed_addr #0 {
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    xscvdpsxws f0, v2
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, v2
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
 ; CHECK-BE-NEXT:    addi r3, r1, -2
@@ -715,23 +715,23 @@ define i32 @test4elt_signed(<4 x double>* nocapture readonly) local_unnamed_addr
 ; CHECK-P8-NEXT:    xxswapd vs1, vs1
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mfvsrwz r3, f2
-; CHECK-P8-NEXT:    mfvsrwz r4, f3
-; CHECK-P8-NEXT:    mtvsrd f2, r3
-; CHECK-P8-NEXT:    mtvsrd f3, r4
-; CHECK-P8-NEXT:    mfvsrwz r3, f0
+; CHECK-P8-NEXT:    mffprwz r3, f2
+; CHECK-P8-NEXT:    mffprwz r4, f3
+; CHECK-P8-NEXT:    mtfprd f2, r3
+; CHECK-P8-NEXT:    mtfprd f3, r4
+; CHECK-P8-NEXT:    mffprwz r3, f0
 ; CHECK-P8-NEXT:    xxswapd v2, vs2
-; CHECK-P8-NEXT:    mfvsrwz r4, f1
+; CHECK-P8-NEXT:    mffprwz r4, f1
 ; CHECK-P8-NEXT:    xxswapd v4, vs3
-; CHECK-P8-NEXT:    mtvsrd f0, r3
-; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    mtfprd f0, r3
+; CHECK-P8-NEXT:    mtfprd f1, r4
 ; CHECK-P8-NEXT:    xxswapd v3, vs0
 ; CHECK-P8-NEXT:    xxswapd v5, vs1
 ; CHECK-P8-NEXT:    vmrglb v2, v3, v2
 ; CHECK-P8-NEXT:    vmrglb v3, v5, v4
 ; CHECK-P8-NEXT:    vmrglh v2, v3, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    mfvsrwz r3, f0
+; CHECK-P8-NEXT:    mffprwz r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test4elt_signed:
@@ -741,19 +741,19 @@ define i32 @test4elt_signed(<4 x double>* nocapture readonly) local_unnamed_addr
 ; CHECK-P9-NEXT:    xxswapd vs1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    lxv vs0, 16(r3)
-; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtfprd f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
 ; CHECK-P9-NEXT:    xxswapd v2, vs2
-; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mtfprd f1, r3
 ; CHECK-P9-NEXT:    xxswapd v3, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f0
 ; CHECK-P9-NEXT:    xxswapd vs0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    mtfprd f1, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    vmrglb v2, v2, v3
 ; CHECK-P9-NEXT:    xxswapd v3, vs1
 ; CHECK-P9-NEXT:    xxswapd v4, vs0
@@ -770,20 +770,20 @@ define i32 @test4elt_signed(<4 x double>* nocapture readonly) local_unnamed_addr
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
-; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    vmrghb v2, v2, v3
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    li r3, 0
@@ -818,30 +818,30 @@ define i64 @test8elt_signed(<8 x double>* nocapture readonly) local_unnamed_addr
 ; CHECK-P8-NEXT:    xxswapd vs3, vs3
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
-; CHECK-P8-NEXT:    mfvsrwz r3, f4
+; CHECK-P8-NEXT:    mffprwz r3, f4
 ; CHECK-P8-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P8-NEXT:    xscvdpsxws f3, f3
-; CHECK-P8-NEXT:    mfvsrwz r4, f5
-; CHECK-P8-NEXT:    mtvsrd f4, r3
-; CHECK-P8-NEXT:    mfvsrwz r3, f6
-; CHECK-P8-NEXT:    mtvsrd f5, r4
+; CHECK-P8-NEXT:    mffprwz r4, f5
+; CHECK-P8-NEXT:    mtfprd f4, r3
+; CHECK-P8-NEXT:    mffprwz r3, f6
+; CHECK-P8-NEXT:    mtfprd f5, r4
 ; CHECK-P8-NEXT:    xxswapd v2, vs4
-; CHECK-P8-NEXT:    mfvsrwz r4, f7
-; CHECK-P8-NEXT:    mtvsrd f6, r3
+; CHECK-P8-NEXT:    mffprwz r4, f7
+; CHECK-P8-NEXT:    mtfprd f6, r3
 ; CHECK-P8-NEXT:    xxswapd v3, vs5
-; CHECK-P8-NEXT:    mfvsrwz r3, f0
-; CHECK-P8-NEXT:    mtvsrd f7, r4
+; CHECK-P8-NEXT:    mffprwz r3, f0
+; CHECK-P8-NEXT:    mtfprd f7, r4
 ; CHECK-P8-NEXT:    xxswapd v4, vs6
-; CHECK-P8-NEXT:    mfvsrwz r4, f1
-; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    xxswapd v1, vs7
-; CHECK-P8-NEXT:    mfvsrwz r3, f2
-; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    mffprwz r3, f2
+; CHECK-P8-NEXT:    mtfprd f1, r4
 ; CHECK-P8-NEXT:    xxswapd v5, vs0
-; CHECK-P8-NEXT:    mfvsrwz r4, f3
-; CHECK-P8-NEXT:    mtvsrd f2, r3
+; CHECK-P8-NEXT:    mffprwz r4, f3
+; CHECK-P8-NEXT:    mtfprd f2, r3
 ; CHECK-P8-NEXT:    xxswapd v0, vs1
-; CHECK-P8-NEXT:    mtvsrd f0, r4
+; CHECK-P8-NEXT:    mtfprd f0, r4
 ; CHECK-P8-NEXT:    xxswapd v6, vs2
 ; CHECK-P8-NEXT:    vmrglb v2, v5, v2
 ; CHECK-P8-NEXT:    xxswapd v5, vs0
@@ -852,7 +852,7 @@ define i64 @test8elt_signed(<8 x double>* nocapture readonly) local_unnamed_addr
 ; CHECK-P8-NEXT:    vmrglh v3, v5, v4
 ; CHECK-P8-NEXT:    vmrglw v2, v3, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test8elt_signed:
@@ -864,40 +864,40 @@ define i64 @test8elt_signed(<8 x double>* nocapture readonly) local_unnamed_addr
 ; CHECK-P9-NEXT:    lxv vs0, 48(r3)
 ; CHECK-P9-NEXT:    lxv vs1, 32(r3)
 ; CHECK-P9-NEXT:    lxv vs2, 16(r3)
-; CHECK-P9-NEXT:    mfvsrwz r3, f4
-; CHECK-P9-NEXT:    mtvsrd f4, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f3
+; CHECK-P9-NEXT:    mffprwz r3, f4
+; CHECK-P9-NEXT:    mtfprd f4, r3
+; CHECK-P9-NEXT:    mffprwz r3, f3
 ; CHECK-P9-NEXT:    xxswapd v2, vs4
-; CHECK-P9-NEXT:    mtvsrd f3, r3
+; CHECK-P9-NEXT:    mtfprd f3, r3
 ; CHECK-P9-NEXT:    xxswapd v3, vs3
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f2
 ; CHECK-P9-NEXT:    xxswapd vs2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    mfvsrwz r3, f3
-; CHECK-P9-NEXT:    mtvsrd f3, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f3
+; CHECK-P9-NEXT:    mtfprd f3, r3
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtfprd f2, r3
 ; CHECK-P9-NEXT:    xxswapd v4, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f1
 ; CHECK-P9-NEXT:    xxswapd vs1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtfprd f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
 ; CHECK-P9-NEXT:    vmrglb v2, v2, v3
 ; CHECK-P9-NEXT:    xxswapd v3, vs3
 ; CHECK-P9-NEXT:    vmrglb v3, v3, v4
 ; CHECK-P9-NEXT:    vmrglh v2, v3, v2
 ; CHECK-P9-NEXT:    xxswapd v3, vs2
-; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mtfprd f1, r3
 ; CHECK-P9-NEXT:    xxswapd v4, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f0
 ; CHECK-P9-NEXT:    xxswapd vs0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    mtfprd f1, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    vmrglb v3, v3, v4
 ; CHECK-P9-NEXT:    xxswapd v4, vs1
 ; CHECK-P9-NEXT:    xxswapd v5, vs0
@@ -916,41 +916,41 @@ define i64 @test8elt_signed(<8 x double>* nocapture readonly) local_unnamed_addr
 ; CHECK-BE-NEXT:    lxv vs2, 32(r3)
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
-; CHECK-BE-NEXT:    mfvsrwz r3, f4
+; CHECK-BE-NEXT:    mffprwz r3, f4
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs2
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    vmrghb v2, v2, v3
-; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    vmrghb v3, v3, v4
-; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    vmrghh v2, v3, v2
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    vmrghb v3, v3, v4
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v5, r3
 ; CHECK-BE-NEXT:    vmrghb v4, v4, v5
@@ -997,63 +997,63 @@ define <16 x i8> @test16elt_signed(<16 x double>* nocapture readonly) local_unna
 ; CHECK-P8-NEXT:    xxswapd vs7, vs7
 ; CHECK-P8-NEXT:    xscvdpsxws v2, f9
 ; CHECK-P8-NEXT:    xxswapd vs9, vs9
-; CHECK-P8-NEXT:    mfvsrwz r3, f4
+; CHECK-P8-NEXT:    mffprwz r3, f4
 ; CHECK-P8-NEXT:    xscvdpsxws v3, f11
 ; CHECK-P8-NEXT:    xxswapd vs11, vs11
-; CHECK-P8-NEXT:    mfvsrwz r4, f6
+; CHECK-P8-NEXT:    mffprwz r4, f6
 ; CHECK-P8-NEXT:    xscvdpsxws f0, f0
-; CHECK-P8-NEXT:    mtvsrd f4, r3
-; CHECK-P8-NEXT:    mfvsrwz r3, f8
+; CHECK-P8-NEXT:    mtfprd f4, r3
+; CHECK-P8-NEXT:    mffprwz r3, f8
 ; CHECK-P8-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P8-NEXT:    xxswapd v4, vs4
-; CHECK-P8-NEXT:    mtvsrd f6, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, f10
+; CHECK-P8-NEXT:    mtfprd f6, r4
+; CHECK-P8-NEXT:    mffprwz r4, f10
 ; CHECK-P8-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P8-NEXT:    xxswapd v5, vs6
-; CHECK-P8-NEXT:    mtvsrd f8, r3
-; CHECK-P8-NEXT:    mfvsrwz r3, f12
+; CHECK-P8-NEXT:    mtfprd f8, r3
+; CHECK-P8-NEXT:    mffprwz r3, f12
 ; CHECK-P8-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P8-NEXT:    xxswapd v0, vs8
-; CHECK-P8-NEXT:    mtvsrd f10, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, f13
+; CHECK-P8-NEXT:    mtfprd f10, r4
+; CHECK-P8-NEXT:    mffprwz r4, f13
 ; CHECK-P8-NEXT:    xscvdpsxws f5, f5
 ; CHECK-P8-NEXT:    xxswapd v1, vs10
-; CHECK-P8-NEXT:    mtvsrd f12, r3
+; CHECK-P8-NEXT:    mtfprd f12, r3
 ; CHECK-P8-NEXT:    mfvsrwz r3, v2
 ; CHECK-P8-NEXT:    xscvdpsxws f7, f7
 ; CHECK-P8-NEXT:    xxswapd v6, vs12
-; CHECK-P8-NEXT:    mtvsrd f13, r4
+; CHECK-P8-NEXT:    mtfprd f13, r4
 ; CHECK-P8-NEXT:    mfvsrwz r4, v3
 ; CHECK-P8-NEXT:    mtvsrd v2, r3
 ; CHECK-P8-NEXT:    xxswapd v7, vs13
-; CHECK-P8-NEXT:    mfvsrwz r3, f0
+; CHECK-P8-NEXT:    mffprwz r3, f0
 ; CHECK-P8-NEXT:    xscvdpsxws f9, f9
 ; CHECK-P8-NEXT:    xxswapd v2, v2
 ; CHECK-P8-NEXT:    xscvdpsxws f11, f11
 ; CHECK-P8-NEXT:    mtvsrd v3, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, f1
-; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mffprwz r4, f1
+; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    xxswapd v3, v3
-; CHECK-P8-NEXT:    mfvsrwz r3, f2
-; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    mffprwz r3, f2
+; CHECK-P8-NEXT:    mtfprd f1, r4
 ; CHECK-P8-NEXT:    xxswapd v8, vs0
-; CHECK-P8-NEXT:    mfvsrwz r4, f3
-; CHECK-P8-NEXT:    mtvsrd f2, r3
+; CHECK-P8-NEXT:    mffprwz r4, f3
+; CHECK-P8-NEXT:    mtfprd f2, r3
 ; CHECK-P8-NEXT:    xxswapd v9, vs1
-; CHECK-P8-NEXT:    mfvsrwz r3, f5
-; CHECK-P8-NEXT:    mtvsrd f3, r4
+; CHECK-P8-NEXT:    mffprwz r3, f5
+; CHECK-P8-NEXT:    mtfprd f3, r4
 ; CHECK-P8-NEXT:    xxswapd v10, vs2
-; CHECK-P8-NEXT:    mfvsrwz r4, f7
-; CHECK-P8-NEXT:    mtvsrd f5, r3
-; CHECK-P8-NEXT:    mfvsrwz r3, f9
-; CHECK-P8-NEXT:    mtvsrd f7, r4
-; CHECK-P8-NEXT:    mfvsrwz r4, f11
+; CHECK-P8-NEXT:    mffprwz r4, f7
+; CHECK-P8-NEXT:    mtfprd f5, r3
+; CHECK-P8-NEXT:    mffprwz r3, f9
+; CHECK-P8-NEXT:    mtfprd f7, r4
+; CHECK-P8-NEXT:    mffprwz r4, f11
 ; CHECK-P8-NEXT:    vmrglb v4, v8, v4
 ; CHECK-P8-NEXT:    xxswapd v8, vs3
 ; CHECK-P8-NEXT:    vmrglb v5, v9, v5
 ; CHECK-P8-NEXT:    xxswapd v9, vs5
-; CHECK-P8-NEXT:    mtvsrd f0, r3
-; CHECK-P8-NEXT:    mtvsrd f1, r4
+; CHECK-P8-NEXT:    mtfprd f0, r3
+; CHECK-P8-NEXT:    mtfprd f1, r4
 ; CHECK-P8-NEXT:    vmrglb v0, v10, v0
 ; CHECK-P8-NEXT:    xxswapd v10, vs7
 ; CHECK-P8-NEXT:    vmrglb v1, v8, v1
@@ -1085,40 +1085,40 @@ define <16 x i8> @test16elt_signed(<16 x double>* nocapture readonly) local_unna
 ; CHECK-P9-NEXT:    lxv vs4, 48(r3)
 ; CHECK-P9-NEXT:    lxv vs5, 32(r3)
 ; CHECK-P9-NEXT:    lxv vs6, 16(r3)
-; CHECK-P9-NEXT:    mfvsrwz r3, f8
-; CHECK-P9-NEXT:    mtvsrd f8, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f7
+; CHECK-P9-NEXT:    mffprwz r3, f8
+; CHECK-P9-NEXT:    mtfprd f8, r3
+; CHECK-P9-NEXT:    mffprwz r3, f7
 ; CHECK-P9-NEXT:    xxswapd v2, vs8
-; CHECK-P9-NEXT:    mtvsrd f7, r3
+; CHECK-P9-NEXT:    mtfprd f7, r3
 ; CHECK-P9-NEXT:    xxswapd v3, vs7
 ; CHECK-P9-NEXT:    xscvdpsxws f7, f6
 ; CHECK-P9-NEXT:    xxswapd vs6, vs6
 ; CHECK-P9-NEXT:    xscvdpsxws f6, f6
-; CHECK-P9-NEXT:    mfvsrwz r3, f7
-; CHECK-P9-NEXT:    mtvsrd f7, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f6
-; CHECK-P9-NEXT:    mtvsrd f6, r3
+; CHECK-P9-NEXT:    mffprwz r3, f7
+; CHECK-P9-NEXT:    mtfprd f7, r3
+; CHECK-P9-NEXT:    mffprwz r3, f6
+; CHECK-P9-NEXT:    mtfprd f6, r3
 ; CHECK-P9-NEXT:    xxswapd v4, vs6
 ; CHECK-P9-NEXT:    xscvdpsxws f6, f5
 ; CHECK-P9-NEXT:    xxswapd vs5, vs5
 ; CHECK-P9-NEXT:    xscvdpsxws f5, f5
-; CHECK-P9-NEXT:    mfvsrwz r3, f6
-; CHECK-P9-NEXT:    mtvsrd f6, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f5
+; CHECK-P9-NEXT:    mffprwz r3, f6
+; CHECK-P9-NEXT:    mtfprd f6, r3
+; CHECK-P9-NEXT:    mffprwz r3, f5
 ; CHECK-P9-NEXT:    vmrglb v2, v2, v3
 ; CHECK-P9-NEXT:    xxswapd v3, vs7
 ; CHECK-P9-NEXT:    vmrglb v3, v3, v4
 ; CHECK-P9-NEXT:    vmrglh v2, v3, v2
 ; CHECK-P9-NEXT:    xxswapd v3, vs6
-; CHECK-P9-NEXT:    mtvsrd f5, r3
+; CHECK-P9-NEXT:    mtfprd f5, r3
 ; CHECK-P9-NEXT:    xxswapd v4, vs5
 ; CHECK-P9-NEXT:    xscvdpsxws f5, f4
 ; CHECK-P9-NEXT:    xxswapd vs4, vs4
 ; CHECK-P9-NEXT:    xscvdpsxws f4, f4
-; CHECK-P9-NEXT:    mfvsrwz r3, f5
-; CHECK-P9-NEXT:    mtvsrd f5, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f4
-; CHECK-P9-NEXT:    mtvsrd f4, r3
+; CHECK-P9-NEXT:    mffprwz r3, f5
+; CHECK-P9-NEXT:    mtfprd f5, r3
+; CHECK-P9-NEXT:    mffprwz r3, f4
+; CHECK-P9-NEXT:    mtfprd f4, r3
 ; CHECK-P9-NEXT:    xxswapd v5, vs4
 ; CHECK-P9-NEXT:    xscvdpsxws f4, f3
 ; CHECK-P9-NEXT:    xxswapd vs3, vs3
@@ -1127,18 +1127,18 @@ define <16 x i8> @test16elt_signed(<16 x double>* nocapture readonly) local_unna
 ; CHECK-P9-NEXT:    xxswapd v4, vs5
 ; CHECK-P9-NEXT:    vmrglb v4, v4, v5
 ; CHECK-P9-NEXT:    vmrglh v3, v4, v3
-; CHECK-P9-NEXT:    mfvsrwz r3, f4
-; CHECK-P9-NEXT:    mtvsrd f4, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f3
-; CHECK-P9-NEXT:    mtvsrd f3, r3
+; CHECK-P9-NEXT:    mffprwz r3, f4
+; CHECK-P9-NEXT:    mtfprd f4, r3
+; CHECK-P9-NEXT:    mffprwz r3, f3
+; CHECK-P9-NEXT:    mtfprd f3, r3
 ; CHECK-P9-NEXT:    xxswapd v4, vs3
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f2
 ; CHECK-P9-NEXT:    xxswapd vs2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    mfvsrwz r3, f3
-; CHECK-P9-NEXT:    mtvsrd f3, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f3
+; CHECK-P9-NEXT:    mtfprd f3, r3
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtfprd f2, r3
 ; CHECK-P9-NEXT:    xxswapd v5, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f1
 ; CHECK-P9-NEXT:    xxswapd vs1, vs1
@@ -1149,19 +1149,19 @@ define <16 x i8> @test16elt_signed(<16 x double>* nocapture readonly) local_unna
 ; CHECK-P9-NEXT:    xxswapd v4, vs3
 ; CHECK-P9-NEXT:    vmrglb v4, v4, v5
 ; CHECK-P9-NEXT:    vmrglh v3, v4, v3
-; CHECK-P9-NEXT:    mfvsrwz r3, f2
-; CHECK-P9-NEXT:    mtvsrd f2, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
+; CHECK-P9-NEXT:    mffprwz r3, f2
+; CHECK-P9-NEXT:    mtfprd f2, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
 ; CHECK-P9-NEXT:    xxswapd v4, vs2
-; CHECK-P9-NEXT:    mtvsrd f1, r3
+; CHECK-P9-NEXT:    mtfprd f1, r3
 ; CHECK-P9-NEXT:    xxswapd v5, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f0
 ; CHECK-P9-NEXT:    xxswapd vs0, vs0
 ; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mfvsrwz r3, f1
-; CHECK-P9-NEXT:    mtvsrd f1, r3
-; CHECK-P9-NEXT:    mfvsrwz r3, f0
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mffprwz r3, f1
+; CHECK-P9-NEXT:    mtfprd f1, r3
+; CHECK-P9-NEXT:    mffprwz r3, f0
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    vmrglb v4, v4, v5
 ; CHECK-P9-NEXT:    xxswapd v5, vs1
 ; CHECK-P9-NEXT:    xxswapd v0, vs0
@@ -1184,84 +1184,84 @@ define <16 x i8> @test16elt_signed(<16 x double>* nocapture readonly) local_unna
 ; CHECK-BE-NEXT:    lxv vs3, 48(r3)
 ; CHECK-BE-NEXT:    lxv vs4, 64(r3)
 ; CHECK-BE-NEXT:    lxv vs5, 80(r3)
-; CHECK-BE-NEXT:    mfvsrwz r3, f8
+; CHECK-BE-NEXT:    mffprwz r3, f8
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f7
+; CHECK-BE-NEXT:    mffprwz r3, f7
 ; CHECK-BE-NEXT:    xscvdpsxws f7, f6
 ; CHECK-BE-NEXT:    xxswapd vs6, vs6
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvdpsxws f6, f6
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
 ; CHECK-BE-NEXT:    vmrghb v2, v2, v3
-; CHECK-BE-NEXT:    mfvsrwz r3, f7
+; CHECK-BE-NEXT:    mffprwz r3, f7
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f6
+; CHECK-BE-NEXT:    mffprwz r3, f6
 ; CHECK-BE-NEXT:    xscvdpsxws f6, f5
 ; CHECK-BE-NEXT:    xxswapd vs5, vs5
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvdpsxws f5, f5
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    vmrghb v3, v3, v4
-; CHECK-BE-NEXT:    mfvsrwz r3, f6
+; CHECK-BE-NEXT:    mffprwz r3, f6
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    vmrghh v2, v3, v2
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f5
+; CHECK-BE-NEXT:    mffprwz r3, f5
 ; CHECK-BE-NEXT:    xscvdpsxws f5, f4
 ; CHECK-BE-NEXT:    xxswapd vs4, vs4
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvdpsxws f4, f4
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    vmrghb v3, v3, v4
-; CHECK-BE-NEXT:    mfvsrwz r3, f5
+; CHECK-BE-NEXT:    mffprwz r3, f5
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f4
+; CHECK-BE-NEXT:    mffprwz r3, f4
 ; CHECK-BE-NEXT:    xscvdpsxws f4, f3
 ; CHECK-BE-NEXT:    xxswapd vs3, vs3
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    mtvsrd v5, r3
 ; CHECK-BE-NEXT:    vmrghb v4, v4, v5
-; CHECK-BE-NEXT:    mfvsrwz r3, f4
+; CHECK-BE-NEXT:    mffprwz r3, f4
 ; CHECK-BE-NEXT:    vmrghh v3, v4, v3
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    vmrghw v2, v3, v2
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs2
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    vmrghb v3, v3, v4
-; CHECK-BE-NEXT:    mfvsrwz r3, f3
+; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    mtvsrd v5, r3
 ; CHECK-BE-NEXT:    vmrghb v4, v4, v5
-; CHECK-BE-NEXT:    mfvsrwz r3, f2
+; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    vmrghh v3, v4, v3
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f0
 ; CHECK-BE-NEXT:    xxswapd vs0, vs0
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    xscvdpsxws f0, f0
 ; CHECK-BE-NEXT:    mtvsrd v5, r3
 ; CHECK-BE-NEXT:    vmrghb v4, v4, v5
-; CHECK-BE-NEXT:    mfvsrwz r3, f1
+; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v5, r3
-; CHECK-BE-NEXT:    mfvsrwz r3, f0
+; CHECK-BE-NEXT:    mffprwz r3, f0
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v0, r3
 ; CHECK-BE-NEXT:    vmrghb v5, v5, v0
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp_to_i_4byte_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp_to_i_4byte_elts.ll
index b1ef3d0d994f..4a4f332225af 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp_to_i_4byte_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp_to_i_4byte_elts.ll
@@ -12,16 +12,16 @@
 define i64 @test2elt(i64 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: test2elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xvcvspuxws vs0, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test2elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    xxswapd v2, vs0
 ; CHECK-P9-NEXT:    xvcvspuxws vs0, v2
 ; CHECK-P9-NEXT:    mfvsrld r3, vs0
@@ -29,9 +29,9 @@ define i64 @test2elt(i64 %a.coerce) local_unnamed_addr #0 {
 ;
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mtvsrd f0, r3
+; CHECK-BE-NEXT:    mtfprd f0, r3
 ; CHECK-BE-NEXT:    xvcvspuxws vs0, vs0
-; CHECK-BE-NEXT:    mfvsrd r3, f0
+; CHECK-BE-NEXT:    mffprd r3, f0
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = bitcast i64 %a.coerce to <2 x float>
@@ -159,16 +159,16 @@ entry:
 define i64 @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: test2elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xvcvspsxws vs0, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test2elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    xxswapd v2, vs0
 ; CHECK-P9-NEXT:    xvcvspsxws vs0, v2
 ; CHECK-P9-NEXT:    mfvsrld r3, vs0
@@ -176,9 +176,9 @@ define i64 @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 {
 ;
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mtvsrd f0, r3
+; CHECK-BE-NEXT:    mtfprd f0, r3
 ; CHECK-BE-NEXT:    xvcvspsxws vs0, vs0
-; CHECK-BE-NEXT:    mfvsrd r3, f0
+; CHECK-BE-NEXT:    mffprd r3, f0
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = bitcast i64 %a.coerce to <2 x float>
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
index 2179db537d4e..e51af62cb128 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
@@ -12,12 +12,12 @@
 define i64 @test2elt(i32 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: test2elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    mtvsrd f0, r3
-; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    mtfprd f0, r3
+; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    clrldi r4, r3, 48
 ; CHECK-P8-NEXT:    rldicl r3, r3, 48, 48
-; CHECK-P8-NEXT:    rlwinm r4, r4, 0, 16, 31
-; CHECK-P8-NEXT:    rlwinm r3, r3, 0, 16, 31
+; CHECK-P8-NEXT:    clrlwi r4, r4, 16
+; CHECK-P8-NEXT:    clrlwi r3, r3, 16
 ; CHECK-P8-NEXT:    mtfprwz f0, r4
 ; CHECK-P8-NEXT:    mtfprwz f1, r3
 ; CHECK-P8-NEXT:    xscvuxdsp f0, f0
@@ -28,7 +28,7 @@ define i64 @test2elt(i32 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-NEXT:    xxsldwi v3, vs1, vs1, 1
 ; CHECK-P8-NEXT:    vmrglw v2, v3, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test2elt:
@@ -36,13 +36,13 @@ define i64 @test2elt(i32 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P9-NEXT:    mtvsrws v2, r3
 ; CHECK-P9-NEXT:    li r3, 0
 ; CHECK-P9-NEXT:    vextuhrx r3, r3, v2
-; CHECK-P9-NEXT:    rlwinm r3, r3, 0, 16, 31
+; CHECK-P9-NEXT:    clrlwi r3, r3, 16
 ; CHECK-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-P9-NEXT:    li r3, 2
 ; CHECK-P9-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P9-NEXT:    xscvdpspn vs0, f0
 ; CHECK-P9-NEXT:    vextuhrx r3, r3, v2
-; CHECK-P9-NEXT:    rlwinm r3, r3, 0, 16, 31
+; CHECK-P9-NEXT:    clrlwi r3, r3, 16
 ; CHECK-P9-NEXT:    xxsldwi v3, vs0, vs0, 1
 ; CHECK-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-P9-NEXT:    xscvuxdsp f0, f0
@@ -57,12 +57,12 @@ define i64 @test2elt(i32 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-BE-NEXT:    mtvsrws v2, r3
 ; CHECK-BE-NEXT:    li r3, 2
 ; CHECK-BE-NEXT:    vextuhlx r3, r3, v2
-; CHECK-BE-NEXT:    rlwinm r3, r3, 0, 16, 31
+; CHECK-BE-NEXT:    clrlwi r3, r3, 16
 ; CHECK-BE-NEXT:    mtfprwz f0, r3
 ; CHECK-BE-NEXT:    li r3, 0
 ; CHECK-BE-NEXT:    xscvuxdsp f0, f0
 ; CHECK-BE-NEXT:    vextuhlx r3, r3, v2
-; CHECK-BE-NEXT:    rlwinm r3, r3, 0, 16, 31
+; CHECK-BE-NEXT:    clrlwi r3, r3, 16
 ; CHECK-BE-NEXT:    xscvdpspn v3, f0
 ; CHECK-BE-NEXT:    mtfprwz f0, r3
 ; CHECK-BE-NEXT:    xscvuxdsp f0, f0
@@ -81,7 +81,7 @@ define <4 x float> @test4elt(i64 %a.coerce) local_unnamed_addr #1 {
 ; CHECK-P8-LABEL: test4elt:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI1_0@toc@ha
-; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    addi r3, r4, .LCPI1_0@toc@l
 ; CHECK-P8-NEXT:    xxlxor v4, v4, v4
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
@@ -92,7 +92,7 @@ define <4 x float> @test4elt(i64 %a.coerce) local_unnamed_addr #1 {
 ;
 ; CHECK-P9-LABEL: test4elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    addis r3, r2, .LCPI1_0@toc@ha
 ; CHECK-P9-NEXT:    addi r3, r3, .LCPI1_0@toc@l
 ; CHECK-P9-NEXT:    lxvx v3, 0, r3
@@ -264,8 +264,8 @@ entry:
 define i64 @test2elt_signed(i32 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: test2elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    mtvsrd f0, r3
-; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    mtfprd f0, r3
+; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    clrldi r4, r3, 48
 ; CHECK-P8-NEXT:    rldicl r3, r3, 48, 48
 ; CHECK-P8-NEXT:    extsh r4, r4
@@ -280,7 +280,7 @@ define i64 @test2elt_signed(i32 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-NEXT:    xxsldwi v3, vs1, vs1, 1
 ; CHECK-P8-NEXT:    vmrglw v2, v3, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test2elt_signed:
@@ -332,7 +332,7 @@ entry:
 define <4 x float> @test4elt_signed(i64 %a.coerce) local_unnamed_addr #1 {
 ; CHECK-P8-LABEL: test4elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    vspltisw v3, 8
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    vadduwm v3, v3, v3
@@ -344,7 +344,7 @@ define <4 x float> @test4elt_signed(i64 %a.coerce) local_unnamed_addr #1 {
 ;
 ; CHECK-P9-LABEL: test4elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    xxswapd v2, vs0
 ; CHECK-P9-NEXT:    vmrglh v2, v2, v2
 ; CHECK-P9-NEXT:    vextsh2w v2, v2
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
index 883cf7e51709..faec95831816 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
@@ -13,7 +13,7 @@ define <2 x double> @test2elt(i32 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: test2elt:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI0_0@toc@ha
-; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    addi r3, r4, .LCPI0_0@toc@l
 ; CHECK-P8-NEXT:    xxlxor v4, v4, v4
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
@@ -53,7 +53,7 @@ define void @test4elt(<4 x double>* noalias nocapture sret %agg.result, i64 %a.c
 ; CHECK-P8-LABEL: test4elt:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r5, r2, .LCPI1_0@toc@ha
-; CHECK-P8-NEXT:    mtvsrd f0, r4
+; CHECK-P8-NEXT:    mtfprd f0, r4
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI1_1@toc@ha
 ; CHECK-P8-NEXT:    addi r5, r5, .LCPI1_0@toc@l
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI1_1@toc@l
@@ -74,7 +74,7 @@ define void @test4elt(<4 x double>* noalias nocapture sret %agg.result, i64 %a.c
 ;
 ; CHECK-P9-LABEL: test4elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mtvsrd f0, r4
+; CHECK-P9-NEXT:    mtfprd f0, r4
 ; CHECK-P9-NEXT:    addis r4, r2, .LCPI1_0@toc@ha
 ; CHECK-P9-NEXT:    addi r4, r4, .LCPI1_0@toc@l
 ; CHECK-P9-NEXT:    lxvx v3, 0, r4
@@ -370,7 +370,7 @@ define <2 x double> @test2elt_signed(i32 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: test2elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI4_0@toc@ha
-; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    addi r3, r4, .LCPI4_0@toc@l
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    lvx v3, 0, r3
@@ -415,7 +415,7 @@ define void @test4elt_signed(<4 x double>* noalias nocapture sret %agg.result, i
 ; CHECK-P8-LABEL: test4elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r5, r2, .LCPI5_0@toc@ha
-; CHECK-P8-NEXT:    mtvsrd f0, r4
+; CHECK-P8-NEXT:    mtfprd f0, r4
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI5_2@toc@ha
 ; CHECK-P8-NEXT:    addi r5, r5, .LCPI5_0@toc@l
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI5_2@toc@l
@@ -443,7 +443,7 @@ define void @test4elt_signed(<4 x double>* noalias nocapture sret %agg.result, i
 ;
 ; CHECK-P9-LABEL: test4elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mtvsrd f0, r4
+; CHECK-P9-NEXT:    mtfprd f0, r4
 ; CHECK-P9-NEXT:    addis r4, r2, .LCPI5_0@toc@ha
 ; CHECK-P9-NEXT:    addi r4, r4, .LCPI5_0@toc@l
 ; CHECK-P9-NEXT:    lxvx v3, 0, r4
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i32_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i32_to_fp64_elts.ll
index 2609cb343004..18957e6b59a2 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i32_to_fp64_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i32_to_fp64_elts.ll
@@ -12,7 +12,7 @@
 define <2 x double> @test2elt(i64 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: test2elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xxmrglw v2, v2, v2
 ; CHECK-P8-NEXT:    xvcvuxwdp v2, v2
@@ -20,7 +20,7 @@ define <2 x double> @test2elt(i64 %a.coerce) local_unnamed_addr #0 {
 ;
 ; CHECK-P9-LABEL: test2elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    xxswapd v2, vs0
 ; CHECK-P9-NEXT:    xxmrglw v2, v2, v2
 ; CHECK-P9-NEXT:    xvcvuxwdp v2, v2
@@ -28,7 +28,7 @@ define <2 x double> @test2elt(i64 %a.coerce) local_unnamed_addr #0 {
 ;
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mtvsrd f0, r3
+; CHECK-BE-NEXT:    mtfprd f0, r3
 ; CHECK-BE-NEXT:    xxmrghw v2, vs0, vs0
 ; CHECK-BE-NEXT:    xvcvuxwdp v2, v2
 ; CHECK-BE-NEXT:    blr
@@ -266,7 +266,7 @@ entry:
 define <2 x double> @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: test2elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xxmrglw v2, v2, v2
 ; CHECK-P8-NEXT:    xvcvsxwdp v2, v2
@@ -274,7 +274,7 @@ define <2 x double> @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 {
 ;
 ; CHECK-P9-LABEL: test2elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    xxswapd v2, vs0
 ; CHECK-P9-NEXT:    xxmrglw v2, v2, v2
 ; CHECK-P9-NEXT:    xvcvsxwdp v2, v2
@@ -282,7 +282,7 @@ define <2 x double> @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 {
 ;
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mtvsrd f0, r3
+; CHECK-BE-NEXT:    mtfprd f0, r3
 ; CHECK-BE-NEXT:    xxmrghw v2, vs0, vs0
 ; CHECK-BE-NEXT:    xvcvsxwdp v2, v2
 ; CHECK-BE-NEXT:    blr
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll
index 66e85e9f81b1..6f046f69ecca 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll
@@ -22,7 +22,7 @@ define i64 @test2elt(<2 x i64> %a) local_unnamed_addr #0 {
 ; CHECK-P8-NEXT:    xxsldwi v2, vs0, vs0, 1
 ; CHECK-P8-NEXT:    vmrglw v2, v3, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test2elt:
@@ -315,7 +315,7 @@ define i64 @test2elt_signed(<2 x i64> %a) local_unnamed_addr #0 {
 ; CHECK-P8-NEXT:    xxsldwi v2, vs0, vs0, 1
 ; CHECK-P8-NEXT:    vmrglw v2, v3, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test2elt_signed:
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
index a34dfb7b09a7..ce97ed67baa1 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
@@ -12,12 +12,12 @@
 define i64 @test2elt(i16 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: test2elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    mtvsrd f0, r3
-; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    mtfprd f0, r3
+; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    clrldi r4, r3, 56
 ; CHECK-P8-NEXT:    rldicl r3, r3, 56, 56
-; CHECK-P8-NEXT:    rlwinm r4, r4, 0, 24, 31
-; CHECK-P8-NEXT:    rlwinm r3, r3, 0, 24, 31
+; CHECK-P8-NEXT:    clrlwi r4, r4, 24
+; CHECK-P8-NEXT:    clrlwi r3, r3, 24
 ; CHECK-P8-NEXT:    mtfprwz f0, r4
 ; CHECK-P8-NEXT:    mtfprwz f1, r3
 ; CHECK-P8-NEXT:    xscvuxdsp f0, f0
@@ -28,7 +28,7 @@ define i64 @test2elt(i16 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-NEXT:    xxsldwi v3, vs1, vs1, 1
 ; CHECK-P8-NEXT:    vmrglw v2, v3, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test2elt:
@@ -36,13 +36,13 @@ define i64 @test2elt(i16 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P9-NEXT:    mtvsrws v2, r3
 ; CHECK-P9-NEXT:    li r3, 0
 ; CHECK-P9-NEXT:    vextubrx r3, r3, v2
-; CHECK-P9-NEXT:    rlwinm r3, r3, 0, 24, 31
+; CHECK-P9-NEXT:    clrlwi r3, r3, 24
 ; CHECK-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-P9-NEXT:    li r3, 1
 ; CHECK-P9-NEXT:    xscvuxdsp f0, f0
 ; CHECK-P9-NEXT:    xscvdpspn vs0, f0
 ; CHECK-P9-NEXT:    vextubrx r3, r3, v2
-; CHECK-P9-NEXT:    rlwinm r3, r3, 0, 24, 31
+; CHECK-P9-NEXT:    clrlwi r3, r3, 24
 ; CHECK-P9-NEXT:    xxsldwi v3, vs0, vs0, 1
 ; CHECK-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-P9-NEXT:    xscvuxdsp f0, f0
@@ -57,12 +57,12 @@ define i64 @test2elt(i16 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-BE-NEXT:    mtvsrws v2, r3
 ; CHECK-BE-NEXT:    li r3, 1
 ; CHECK-BE-NEXT:    vextublx r3, r3, v2
-; CHECK-BE-NEXT:    rlwinm r3, r3, 0, 24, 31
+; CHECK-BE-NEXT:    clrlwi r3, r3, 24
 ; CHECK-BE-NEXT:    mtfprwz f0, r3
 ; CHECK-BE-NEXT:    li r3, 0
 ; CHECK-BE-NEXT:    xscvuxdsp f0, f0
 ; CHECK-BE-NEXT:    vextublx r3, r3, v2
-; CHECK-BE-NEXT:    rlwinm r3, r3, 0, 24, 31
+; CHECK-BE-NEXT:    clrlwi r3, r3, 24
 ; CHECK-BE-NEXT:    xscvdpspn v3, f0
 ; CHECK-BE-NEXT:    mtfprwz f0, r3
 ; CHECK-BE-NEXT:    xscvuxdsp f0, f0
@@ -81,7 +81,7 @@ define <4 x float> @test4elt(i32 %a.coerce) local_unnamed_addr #1 {
 ; CHECK-P8-LABEL: test4elt:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI1_0@toc@ha
-; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    addi r3, r4, .LCPI1_0@toc@l
 ; CHECK-P8-NEXT:    xxlxor v4, v4, v4
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
@@ -121,7 +121,7 @@ define void @test8elt(<8 x float>* noalias nocapture sret %agg.result, i64 %a.co
 ; CHECK-P8-LABEL: test8elt:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r5, r2, .LCPI2_0@toc@ha
-; CHECK-P8-NEXT:    mtvsrd f0, r4
+; CHECK-P8-NEXT:    mtfprd f0, r4
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI2_1@toc@ha
 ; CHECK-P8-NEXT:    addi r5, r5, .LCPI2_0@toc@l
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI2_1@toc@l
@@ -140,7 +140,7 @@ define void @test8elt(<8 x float>* noalias nocapture sret %agg.result, i64 %a.co
 ;
 ; CHECK-P9-LABEL: test8elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mtvsrd f0, r4
+; CHECK-P9-NEXT:    mtfprd f0, r4
 ; CHECK-P9-NEXT:    addis r4, r2, .LCPI2_0@toc@ha
 ; CHECK-P9-NEXT:    addi r4, r4, .LCPI2_0@toc@l
 ; CHECK-P9-NEXT:    lxvx v3, 0, r4
@@ -280,8 +280,8 @@ entry:
 define i64 @test2elt_signed(i16 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: test2elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    mtvsrd f0, r3
-; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    mtfprd f0, r3
+; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    clrldi r4, r3, 56
 ; CHECK-P8-NEXT:    rldicl r3, r3, 56, 56
 ; CHECK-P8-NEXT:    extsb r4, r4
@@ -296,7 +296,7 @@ define i64 @test2elt_signed(i16 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-NEXT:    xxsldwi v3, vs1, vs1, 1
 ; CHECK-P8-NEXT:    vmrglw v2, v3, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, v2
-; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test2elt_signed:
@@ -349,7 +349,7 @@ define <4 x float> @test4elt_signed(i32 %a.coerce) local_unnamed_addr #1 {
 ; CHECK-P8-LABEL: test4elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI5_0@toc@ha
-; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    addi r3, r4, .LCPI5_0@toc@l
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    lvx v3, 0, r3
@@ -392,7 +392,7 @@ define void @test8elt_signed(<8 x float>* noalias nocapture sret %agg.result, i6
 ; CHECK-P8-LABEL: test8elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r5, r2, .LCPI6_0@toc@ha
-; CHECK-P8-NEXT:    mtvsrd f0, r4
+; CHECK-P8-NEXT:    mtfprd f0, r4
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI6_1@toc@ha
 ; CHECK-P8-NEXT:    vspltisw v5, 12
 ; CHECK-P8-NEXT:    addi r5, r5, .LCPI6_0@toc@l
@@ -416,7 +416,7 @@ define void @test8elt_signed(<8 x float>* noalias nocapture sret %agg.result, i6
 ;
 ; CHECK-P9-LABEL: test8elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mtvsrd f0, r4
+; CHECK-P9-NEXT:    mtfprd f0, r4
 ; CHECK-P9-NEXT:    addis r4, r2, .LCPI6_0@toc@ha
 ; CHECK-P9-NEXT:    addi r4, r4, .LCPI6_0@toc@l
 ; CHECK-P9-NEXT:    lxvx v3, 0, r4
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
index 1a9790106db7..b4582e844f30 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
@@ -13,7 +13,7 @@ define <2 x double> @test2elt(i16 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: test2elt:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI0_0@toc@ha
-; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    addi r3, r4, .LCPI0_0@toc@l
 ; CHECK-P8-NEXT:    xxlxor v4, v4, v4
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
@@ -53,7 +53,7 @@ define void @test4elt(<4 x double>* noalias nocapture sret %agg.result, i32 %a.c
 ; CHECK-P8-LABEL: test4elt:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r5, r2, .LCPI1_0@toc@ha
-; CHECK-P8-NEXT:    mtvsrd f0, r4
+; CHECK-P8-NEXT:    mtfprd f0, r4
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI1_1@toc@ha
 ; CHECK-P8-NEXT:    addi r5, r5, .LCPI1_0@toc@l
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI1_1@toc@l
@@ -118,7 +118,7 @@ define void @test8elt(<8 x double>* noalias nocapture sret %agg.result, i64 %a.c
 ; CHECK-P8-LABEL: test8elt:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r5, r2, .LCPI2_0@toc@ha
-; CHECK-P8-NEXT:    mtvsrd f0, r4
+; CHECK-P8-NEXT:    mtfprd f0, r4
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI2_2@toc@ha
 ; CHECK-P8-NEXT:    addi r5, r5, .LCPI2_0@toc@l
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI2_2@toc@l
@@ -155,7 +155,7 @@ define void @test8elt(<8 x double>* noalias nocapture sret %agg.result, i64 %a.c
 ;
 ; CHECK-P9-LABEL: test8elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mtvsrd f0, r4
+; CHECK-P9-NEXT:    mtfprd f0, r4
 ; CHECK-P9-NEXT:    addis r4, r2, .LCPI2_0@toc@ha
 ; CHECK-P9-NEXT:    addi r4, r4, .LCPI2_0@toc@l
 ; CHECK-P9-NEXT:    lxvx v3, 0, r4
@@ -404,7 +404,7 @@ define <2 x double> @test2elt_signed(i16 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: test2elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI4_0@toc@ha
-; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    addi r3, r4, .LCPI4_0@toc@l
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    lvx v3, 0, r3
@@ -449,7 +449,7 @@ define void @test4elt_signed(<4 x double>* noalias nocapture sret %agg.result, i
 ; CHECK-P8-LABEL: test4elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
 ; CHECK-P8-NEXT:    addis r5, r2, .LCPI5_0@toc@ha
-; CHECK-P8-NEXT:    mtvsrd f0, r4
+; CHECK-P8-NEXT:    mtfprd f0, r4
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI5_2@toc@ha
 ; CHECK-P8-NEXT:    addi r5, r5, .LCPI5_0@toc@l
 ; CHECK-P8-NEXT:    addi r4, r4, .LCPI5_2@toc@l
@@ -523,7 +523,7 @@ entry:
 define void @test8elt_signed(<8 x double>* noalias nocapture sret %agg.result, i64 %a.coerce) local_unnamed_addr #1 {
 ; CHECK-P8-LABEL: test8elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    mtvsrd f0, r4
+; CHECK-P8-NEXT:    mtfprd f0, r4
 ; CHECK-P8-NEXT:    addis r4, r2, .LCPI6_2@toc@ha
 ; CHECK-P8-NEXT:    addis r5, r2, .LCPI6_0@toc@ha
 ; CHECK-P8-NEXT:    addis r6, r2, .LCPI6_3@toc@ha
@@ -572,7 +572,7 @@ define void @test8elt_signed(<8 x double>* noalias nocapture sret %agg.result, i
 ;
 ; CHECK-P9-LABEL: test8elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mtvsrd f0, r4
+; CHECK-P9-NEXT:    mtfprd f0, r4
 ; CHECK-P9-NEXT:    addis r4, r2, .LCPI6_0@toc@ha
 ; CHECK-P9-NEXT:    addi r4, r4, .LCPI6_0@toc@l
 ; CHECK-P9-NEXT:    lxvx v3, 0, r4
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i_to_fp_4byte_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i_to_fp_4byte_elts.ll
index 384bcb699378..8e99e032805f 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i_to_fp_4byte_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i_to_fp_4byte_elts.ll
@@ -12,16 +12,16 @@
 define i64 @test2elt(i64 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: test2elt:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xvcvuxwsp vs0, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test2elt:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    xxswapd v2, vs0
 ; CHECK-P9-NEXT:    xvcvuxwsp vs0, v2
 ; CHECK-P9-NEXT:    mfvsrld r3, vs0
@@ -29,9 +29,9 @@ define i64 @test2elt(i64 %a.coerce) local_unnamed_addr #0 {
 ;
 ; CHECK-BE-LABEL: test2elt:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mtvsrd f0, r3
+; CHECK-BE-NEXT:    mtfprd f0, r3
 ; CHECK-BE-NEXT:    xvcvuxwsp vs0, vs0
-; CHECK-BE-NEXT:    mfvsrd r3, f0
+; CHECK-BE-NEXT:    mffprd r3, f0
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = bitcast i64 %a.coerce to <2 x i32>
@@ -159,16 +159,16 @@ entry:
 define i64 @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: test2elt_signed:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    mtvsrd f0, r3
+; CHECK-P8-NEXT:    mtfprd f0, r3
 ; CHECK-P8-NEXT:    xxswapd v2, vs0
 ; CHECK-P8-NEXT:    xvcvsxwsp vs0, v2
 ; CHECK-P8-NEXT:    xxswapd vs0, vs0
-; CHECK-P8-NEXT:    mfvsrd r3, f0
+; CHECK-P8-NEXT:    mffprd r3, f0
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: test2elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    mtvsrd f0, r3
+; CHECK-P9-NEXT:    mtfprd f0, r3
 ; CHECK-P9-NEXT:    xxswapd v2, vs0
 ; CHECK-P9-NEXT:    xvcvsxwsp vs0, v2
 ; CHECK-P9-NEXT:    mfvsrld r3, vs0
@@ -176,9 +176,9 @@ define i64 @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 {
 ;
 ; CHECK-BE-LABEL: test2elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    mtvsrd f0, r3
+; CHECK-BE-NEXT:    mtfprd f0, r3
 ; CHECK-BE-NEXT:    xvcvsxwsp vs0, vs0
-; CHECK-BE-NEXT:    mfvsrd r3, f0
+; CHECK-BE-NEXT:    mffprd r3, f0
 ; CHECK-BE-NEXT:    blr
 entry:
   %0 = bitcast i64 %a.coerce to <2 x i32>
diff --git a/llvm/test/CodeGen/PowerPC/vsx.ll b/llvm/test/CodeGen/PowerPC/vsx.ll
index 7cf43a92a5dc..a80cca516618 100644
--- a/llvm/test/CodeGen/PowerPC/vsx.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx.ll
@@ -2437,7 +2437,7 @@ define <2 x i32> @test80(i32 %v) {
 ;
 ; CHECK-LE-LABEL: test80:
 ; CHECK-LE:       # %bb.0:
-; CHECK-LE-NEXT:    mtvsrd f0, r3
+; CHECK-LE-NEXT:    mtfprd f0, r3
 ; CHECK-LE-NEXT:    addis r4, r2, .LCPI65_0@toc@ha
 ; CHECK-LE-NEXT:    addi r3, r4, .LCPI65_0@toc@l
 ; CHECK-LE-NEXT:    xxswapd vs0, vs0
diff --git a/llvm/test/CodeGen/PowerPC/xray-conditional-return.ll b/llvm/test/CodeGen/PowerPC/xray-conditional-return.ll
index 4454d3dc62af..c0e8c1c3c20c 100644
--- a/llvm/test/CodeGen/PowerPC/xray-conditional-return.ll
+++ b/llvm/test/CodeGen/PowerPC/xray-conditional-return.ll
@@ -2,8 +2,8 @@
 
 define void @Foo(i32 signext %a, i32 signext %b) #0 {
 ; CHECK-LABEL: @Foo
-; CHECK:   cmpw [[CR:[0-9]+]]
-; CHECK-NEXT:   ble [[CR]], [[LABEL:\.[a-zA-Z0-9]+]]
+; CHECK:   cmpw
+; CHECK-NEXT:   ble 0, [[LABEL:\.[a-zA-Z0-9]+]]
 ; CHECK-NEXT:   .p2align  3
 ; CHECK-NEXT: {{\.[a-zA-Z0-9]+}}:
 ; CHECK-NEXT:   blr
@@ -39,8 +39,8 @@ return:
 
 define void @Foo2(i32 signext %a, i32 signext %b) #0 {
 ; CHECK-LABEL: @Foo2
-; CHECK:   cmpw [[CR:[0-9]+]]
-; CHECK-NEXT:   bge [[CR]], [[LABEL:\.[a-zA-Z0-9]+]]
+; CHECK:   cmpw
+; CHECK-NEXT:   bge 0, [[LABEL:\.[a-zA-Z0-9]+]]
 ; CHECK-NEXT:   .p2align  3
 ; CHECK-NEXT: {{\.[a-zA-Z0-9]+}}:
 ; CHECK-NEXT:   blr

From 8079f8a7e8b1c84759b19a63ad54ff83e94f4c33 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne@apple.com>
Date: Thu, 16 Apr 2020 12:44:32 -0400
Subject: [PATCH 062/216] [libc++] Simplify conditional in __config for
 _LIBCPP_NO_RTTI

We don't support GCC's older than 5.x anymore.
---
 libcxx/include/__config | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/libcxx/include/__config b/libcxx/include/__config
index ce7a9354c50a..51b2a64901d9 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -1106,12 +1106,8 @@ _LIBCPP_FUNC_VIS extern "C" void __sanitizer_annotate_contiguous_container(
 
 // Try to find out if RTTI is disabled.
 // g++ and cl.exe have RTTI on by default and define a macro when it is.
-// g++ only defines the macro in 4.3.2 and onwards.
 #if !defined(_LIBCPP_NO_RTTI)
-#  if defined(__GNUC__) && \
-      ((__GNUC__ >= 5) || \
-       (__GNUC__ == 4 && (__GNUC_MINOR__ >= 3 || __GNUC_PATCHLEVEL__ >= 2))) && \
-      !defined(__GXX_RTTI)
+#  if defined(__GNUC__) && !defined(__GXX_RTTI)
 #    define _LIBCPP_NO_RTTI
 #  elif defined(_LIBCPP_COMPILER_MSVC) && !defined(_CPPRTTI)
 #    define _LIBCPP_NO_RTTI

From b1fbf438f647f3e8cf03baf7e0479359373241cd Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 16 Apr 2020 19:53:17 +0300
Subject: [PATCH 063/216] [OpenMPOpt] deduplicateRuntimeCalls(): avoid
 traditional map lookup pitfall

Summary:
This roughly halves time spent in that pass,
while unsurprisingly significantly reducing total memory usage.

This makes sense because most functions won't use any openmp functions..

old
```
   0.2329 (  0.5%)   0.0409 (  0.9%)   0.2738 (  0.5%)   0.2736 (  0.5%)  OpenMP specific optimizations
```
```
total runtime: 63.32s.
bytes allocated in total (ignoring deallocations): 8.34GB (131.70MB/s)
calls to allocation functions: 14526259 (229410/s)
temporary memory allocations: 3335760 (52680/s)
peak heap memory consumption: 324.36MB
peak RSS (including heaptrack overhead): 5.39GB
total memory leaked: 289.93MB
```

new
```
   0.1457 (  0.3%)   0.0276 (  0.6%)   0.1732 (  0.3%)   0.1731 (  0.3%)  OpenMP specific optimizations
```
```
total runtime: 55.01s.
bytes allocated in total (ignoring deallocations): 6.70GB (121.89MB/s)
calls to allocation functions: 14268205 (259398/s)
temporary memory allocations: 3225355 (58637/s)
peak heap memory consumption: 324.09MB
peak RSS (including heaptrack overhead): 5.39GB
total memory leaked: 289.87MB
```

diff
```
total runtime: -8.31s.
bytes allocated in total (ignoring deallocations): -1.63GB (196.58MB/s)
calls to allocation functions: -258054 (31034/s)
temporary memory allocations: -110405 (13277/s)
peak heap memory consumption: -262.36KB
peak RSS (including heaptrack overhead): 0B
total memory leaked: -61.45KB
```

Reviewers: jdoerfert, hfinkel

Reviewed By: jdoerfert

Subscribers: yaxunl, hiraditya, guansong, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D78299
---
 llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 8b45350ce44c..d0e0dde5a782 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -248,7 +248,11 @@ struct OpenMPOpt {
   /// \p ReplVal if given.
   bool deduplicateRuntimeCalls(Function &F, RuntimeFunctionInfo &RFI,
                                Value *ReplVal = nullptr) {
-    auto &Uses = RFI.UsesMap[&F];
+    auto UsesIt = RFI.UsesMap.find(&F);
+    if (UsesIt == RFI.UsesMap.end())
+      return false;
+
+    auto &Uses = UsesIt->getSecond();
     if (Uses.size() + (ReplVal != nullptr) < 2)
       return false;
 

From a3237f861cc2b4c3cd29d86f0a0212dfd4d38d56 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Tue, 14 Apr 2020 13:28:27 -0700
Subject: [PATCH 064/216] [lldb/Reproducers] Simplify LLDB_RECORD macros

Redefine the LLDB_RECORD macros in terms of a common uber-macro to
reduce code duplication across them.

Differential revision: https://reviews.llvm.org/D78141
---
 .../lldb/Utility/ReproducerInstrumentation.h  | 126 +++++++-----------
 1 file changed, 47 insertions(+), 79 deletions(-)

diff --git a/lldb/include/lldb/Utility/ReproducerInstrumentation.h b/lldb/include/lldb/Utility/ReproducerInstrumentation.h
index 8e02f4f8278d..3728e19386d1 100644
--- a/lldb/include/lldb/Utility/ReproducerInstrumentation.h
+++ b/lldb/include/lldb/Utility/ReproducerInstrumentation.h
@@ -84,21 +84,20 @@ template <typename... Ts> inline std::string stringify_args(const Ts &... ts) {
       #Result, #Class, #Method, #Signature)
 
 #define LLDB_REGISTER_METHOD_CONST(Result, Class, Method, Signature)           \
-  R.Register(&invoke<Result(Class::*) Signature const>::method_const<(         \
-                 &Class::Method)>::doit,                                       \
+  R.Register(&invoke<Result(Class::*)                                          \
+                         Signature const>::method<(&Class::Method)>::doit,     \
              #Result, #Class, #Method, #Signature)
 
 #define LLDB_REGISTER_STATIC_METHOD(Result, Class, Method, Signature)          \
-  R.Register(                                                                  \
-      &invoke<Result(*) Signature>::method_static<(&Class::Method)>::doit,     \
-      #Result, #Class, #Method, #Signature)
+  R.Register(&invoke<Result(*) Signature>::method<(&Class::Method)>::doit,     \
+             #Result, #Class, #Method, #Signature)
 
 #define LLDB_REGISTER_CHAR_PTR_REDIRECT_STATIC(Result, Class, Method)          \
-  R.Register(&invoke<Result (*)(char *, size_t)>::method_static<(              \
-                 &Class::Method)>::doit,                                       \
-             &char_ptr_redirect<Result (*)(char *, size_t)>::method_static<(   \
-                 &Class::Method)>::doit,                                       \
-             #Result, #Class, #Method, "(char*, size_t");
+  R.Register(                                                                  \
+      &invoke<Result (*)(char *, size_t)>::method<(&Class::Method)>::doit,     \
+      &char_ptr_redirect<Result (*)(char *,                                    \
+                                    size_t)>::method<(&Class::Method)>::doit,  \
+      #Result, #Class, #Method, "(char*, size_t");
 
 #define LLDB_REGISTER_CHAR_PTR_REDIRECT(Result, Class, Method)                 \
   R.Register(&invoke<Result (Class::*)(char *, size_t)>::method<(              \
@@ -109,97 +108,55 @@ template <typename... Ts> inline std::string stringify_args(const Ts &... ts) {
 
 #define LLDB_REGISTER_CHAR_PTR_REDIRECT_CONST(Result, Class, Method)           \
   R.Register(&invoke<Result (Class::*)(char *, size_t)                         \
-                         const>::method_const<(&Class::Method)>::doit,         \
-             &char_ptr_redirect<Result (Class::*)(                             \
-                 char *, size_t) const>::method_const<(&Class::Method)>::doit, \
+                         const>::method<(&Class::Method)>::doit,               \
+             &char_ptr_redirect<Result (Class::*)(char *, size_t)              \
+                                    const>::method<(&Class::Method)>::doit,    \
              #Result, #Class, #Method, "(char*, size_t");
 
-#define LLDB_RECORD_CONSTRUCTOR(Class, Signature, ...)                         \
+#define LLDB_CONSTRUCT_(T, ...)                                                \
   lldb_private::repro::Recorder _recorder(LLVM_PRETTY_FUNCTION,                \
                                           stringify_args(__VA_ARGS__));        \
   if (lldb_private::repro::InstrumentationData _data =                         \
           LLDB_GET_INSTRUMENTATION_DATA()) {                                   \
     _recorder.Record(_data.GetSerializer(), _data.GetRegistry(),               \
-                     &lldb_private::repro::construct<Class Signature>::doit,   \
-                     __VA_ARGS__);                                             \
+                     &lldb_private::repro::construct<T>::doit, __VA_ARGS__);   \
     _recorder.RecordResult(this, false);                                       \
   }
 
+#define LLDB_RECORD_CONSTRUCTOR(Class, Signature, ...)                         \
+  LLDB_CONSTRUCT_(Class Signature, __VA_ARGS__)
+
 #define LLDB_RECORD_CONSTRUCTOR_NO_ARGS(Class)                                 \
-  lldb_private::repro::Recorder _recorder(LLVM_PRETTY_FUNCTION);               \
-  if (lldb_private::repro::InstrumentationData _data =                         \
-          LLDB_GET_INSTRUMENTATION_DATA()) {                                   \
-    _recorder.Record(_data.GetSerializer(), _data.GetRegistry(),               \
-                     &lldb_private::repro::construct<Class()>::doit);          \
-    _recorder.RecordResult(this, false);                                       \
-  }
+  LLDB_CONSTRUCT_(Class(), lldb_private::repro::EmptyArg())
 
-#define LLDB_RECORD_METHOD(Result, Class, Method, Signature, ...)              \
+#define LLDB_RECORD_(T1, T2, ...)                                              \
   lldb_private::repro::Recorder _recorder(LLVM_PRETTY_FUNCTION,                \
-                                          stringify_args(*this, __VA_ARGS__)); \
+                                          stringify_args(__VA_ARGS__));        \
   if (lldb_private::repro::InstrumentationData _data =                         \
           LLDB_GET_INSTRUMENTATION_DATA()) {                                   \
     _recorder.Record(_data.GetSerializer(), _data.GetRegistry(),               \
-                     &lldb_private::repro::invoke<Result(                      \
-                         Class::*) Signature>::method<(&Class::Method)>::doit, \
-                     this, __VA_ARGS__);                                       \
+                     &lldb_private::repro::invoke<T1>::method<T2>::doit,       \
+                     __VA_ARGS__);                                             \
   }
 
+#define LLDB_RECORD_METHOD(Result, Class, Method, Signature, ...)              \
+  LLDB_RECORD_(Result(Class::*) Signature, (&Class::Method), this, __VA_ARGS__)
+
 #define LLDB_RECORD_METHOD_CONST(Result, Class, Method, Signature, ...)        \
-  lldb_private::repro::Recorder _recorder(LLVM_PRETTY_FUNCTION,                \
-                                          stringify_args(*this, __VA_ARGS__)); \
-  if (lldb_private::repro::InstrumentationData _data =                         \
-          LLDB_GET_INSTRUMENTATION_DATA()) {                                   \
-    _recorder.Record(                                                          \
-        _data.GetSerializer(), _data.GetRegistry(),                            \
-        &lldb_private::repro::invoke<Result(                                   \
-            Class::*) Signature const>::method_const<(&Class::Method)>::doit,  \
-        this, __VA_ARGS__);                                                    \
-  }
+  LLDB_RECORD_(Result(Class::*) Signature const, (&Class::Method), this,       \
+               __VA_ARGS__)
 
 #define LLDB_RECORD_METHOD_NO_ARGS(Result, Class, Method)                      \
-  lldb_private::repro::Recorder _recorder(LLVM_PRETTY_FUNCTION,                \
-                                          stringify_args(*this));              \
-  if (lldb_private::repro::InstrumentationData _data =                         \
-          LLDB_GET_INSTRUMENTATION_DATA()) {                                   \
-    _recorder.Record(_data.GetSerializer(), _data.GetRegistry(),               \
-                     &lldb_private::repro::invoke<Result (                     \
-                         Class::*)()>::method<(&Class::Method)>::doit,         \
-                     this);                                                    \
-  }
+  LLDB_RECORD_(Result (Class::*)(), (&Class::Method), this)
 
 #define LLDB_RECORD_METHOD_CONST_NO_ARGS(Result, Class, Method)                \
-  lldb_private::repro::Recorder _recorder(LLVM_PRETTY_FUNCTION,                \
-                                          stringify_args(*this));              \
-  if (lldb_private::repro::InstrumentationData _data =                         \
-          LLDB_GET_INSTRUMENTATION_DATA()) {                                   \
-    _recorder.Record(                                                          \
-        _data.GetSerializer(), _data.GetRegistry(),                            \
-        &lldb_private::repro::invoke<Result (                                  \
-            Class::*)() const>::method_const<(&Class::Method)>::doit,          \
-        this);                                                                 \
-  }
+  LLDB_RECORD_(Result (Class::*)() const, (&Class::Method), this)
 
 #define LLDB_RECORD_STATIC_METHOD(Result, Class, Method, Signature, ...)       \
-  lldb_private::repro::Recorder _recorder(LLVM_PRETTY_FUNCTION,                \
-                                          stringify_args(__VA_ARGS__));        \
-  if (lldb_private::repro::InstrumentationData _data =                         \
-          LLDB_GET_INSTRUMENTATION_DATA()) {                                   \
-    _recorder.Record(                                                          \
-        _data.GetSerializer(), _data.GetRegistry(),                            \
-        lldb_private::repro::invoke<Result(*) Signature>::method_static<(      \
-            &Class::Method)>::doit,                                            \
-        __VA_ARGS__);                                                          \
-  }
+  LLDB_RECORD_(Result(*) Signature, (&Class::Method), __VA_ARGS__)
 
 #define LLDB_RECORD_STATIC_METHOD_NO_ARGS(Result, Class, Method)               \
-  lldb_private::repro::Recorder _recorder(LLVM_PRETTY_FUNCTION);               \
-  if (lldb_private::repro::InstrumentationData _data =                         \
-          LLDB_GET_INSTRUMENTATION_DATA()) {                                   \
-    _recorder.Record(_data.GetSerializer(), _data.GetRegistry(),               \
-                     lldb_private::repro::invoke<Result (*)()>::method_static< \
-                         (&Class::Method)>::doit);                             \
-  }
+  LLDB_RECORD_(Result (*)(), (&Class::Method), lldb_private::repro::EmptyArg())
 
 #define LLDB_RECORD_RESULT(Result) _recorder.RecordResult(Result, true);
 
@@ -561,20 +518,20 @@ struct invoke<Result (Class::*)(Args...)> {
 
 template <typename Result, typename Class, typename... Args>
 struct invoke<Result (Class::*)(Args...) const> {
-  template <Result (Class::*m)(Args...) const> struct method_const {
+  template <Result (Class::*m)(Args...) const> struct method {
     static Result doit(Class *c, Args... args) { return (c->*m)(args...); }
   };
 };
 
 template <typename Result, typename... Args>
 struct invoke<Result (*)(Args...)> {
-  template <Result (*m)(Args...)> struct method_static {
+  template <Result (*m)(Args...)> struct method {
     static Result doit(Args... args) { return (*m)(args...); }
   };
 };
 
 template <typename... Args> struct invoke<void (*)(Args...)> {
-  template <void (*m)(Args...)> struct method_static {
+  template <void (*m)(Args...)> struct method {
     static void doit(Args... args) { return (*m)(args...); }
   };
 };
@@ -712,6 +669,8 @@ class InstrumentationData {
   Registry *m_registry;
 };
 
+struct EmptyArg {};
+
 /// RAII object that records function invocations and their return value.
 ///
 /// API calls are only captured when the API boundary is crossed. Once we're in
@@ -777,6 +736,15 @@ class Recorder {
     m_result_recorded = true;
   }
 
+  /// Specializations for the no-argument methods. These are passed an empty
+  /// dummy argument so the same variadic macro can be used. These methods
+  /// strip the arguments before forwarding them.
+  template <typename Result>
+  void Record(Serializer &serializer, Registry &registry, Result (*f)(),
+              const EmptyArg &arg) {
+    Record(serializer, registry, f);
+  }
+
   /// Record the result of a function call.
   template <typename Result>
   Result RecordResult(Result &&r, bool update_boundary) {
@@ -830,7 +798,7 @@ class Recorder {
 template <typename Signature> struct char_ptr_redirect;
 template <typename Result, typename Class>
 struct char_ptr_redirect<Result (Class::*)(char *, size_t) const> {
-  template <Result (Class::*m)(char *, size_t) const> struct method_const {
+  template <Result (Class::*m)(char *, size_t) const> struct method {
     static Result doit(Class *c, char *s, size_t l) {
       char *buffer = reinterpret_cast<char *>(calloc(l, sizeof(char)));
       return (c->*m)(buffer, l);
@@ -849,7 +817,7 @@ struct char_ptr_redirect<Result (Class::*)(char *, size_t)> {
 
 template <typename Result>
 struct char_ptr_redirect<Result (*)(char *, size_t)> {
-  template <Result (*m)(char *, size_t)> struct method_static {
+  template <Result (*m)(char *, size_t)> struct method {
     static Result doit(char *s, size_t l) {
       char *buffer = reinterpret_cast<char *>(calloc(l, sizeof(char)));
       return (*m)(buffer, l);

From d736571538bd3e291f28914d4b92fb67e0d5bc64 Mon Sep 17 00:00:00 2001
From: Anna Welker <anna.welker@arm.com>
Date: Thu, 16 Apr 2020 18:09:24 +0100
Subject: [PATCH 065/216] [ARM][MVE] Fix location of optimized gather addresses

Fix for the address optimization for gathers and scatters which would in
some complex cases push out instructions not to the vector loop preheader,
but to other locations as well which lead to a scrambled order and the
compilation failing.
This patch ensures that said instructions are always pushed to the end
of the vector loop preheader.

Differential Revision: https://reviews.llvm.org/D78293
---
 .../Target/ARM/MVEGatherScatterLowering.cpp   |  12 +-
 .../Thumb2/mve-gather-scatter-optimisation.ll | 359 +++++++++++++++---
 2 files changed, 307 insertions(+), 64 deletions(-)

diff --git a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp
index 09c6ccad9c56..e4f6d1200a82 100644
--- a/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp
+++ b/llvm/lib/Target/ARM/MVEGatherScatterLowering.cpp
@@ -463,11 +463,7 @@ void MVEGatherScatterLowering::pushOutAdd(PHINode *&Phi,
                                           Value *OffsSecondOperand,
                                           unsigned StartIndex) {
   LLVM_DEBUG(dbgs() << "masked gathers/scatters: optimising add instruction\n");
-  Instruction *InsertionPoint;
-  if (isa<Instruction>(OffsSecondOperand))
-    InsertionPoint = &cast<Instruction>(OffsSecondOperand)->getParent()->back();
-  else
-    InsertionPoint =
+  Instruction *InsertionPoint =
         &cast<Instruction>(Phi->getIncomingBlock(StartIndex)->back());
   // Initialize the phi with a vector that contains a sum of the constants
   Instruction *NewIndex = BinaryOperator::Create(
@@ -492,11 +488,7 @@ void MVEGatherScatterLowering::pushOutMul(PHINode *&Phi,
 
   // Create a new scalar add outside of the loop and transform it to a splat
   // by which loop variable can be incremented
-  Instruction *InsertionPoint;
-  if (isa<Instruction>(OffsSecondOperand))
-    InsertionPoint = &cast<Instruction>(OffsSecondOperand)->getParent()->back();
-  else
-    InsertionPoint = &cast<Instruction>(
+  Instruction *InsertionPoint = &cast<Instruction>(
         Phi->getIncomingBlock(LoopIncrement == 1 ? 0 : 1)->back());
 
   // Create a new index
diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll
index a26b17a29aaa..e70696761101 100644
--- a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll
@@ -445,67 +445,66 @@ end:
 define dso_local void @arm_mat_mult_q31(i32* noalias nocapture readonly %A, i32* noalias nocapture readonly %B, i32* noalias nocapture %C, i32 %n, i32 %m, i32 %l) local_unnamed_addr #0 {
 ; CHECK-LABEL: arm_mat_mult_q31:
 ; CHECK:       @ %bb.0: @ %for.cond8.preheader.us.us.preheader.preheader
-; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, lr}
-; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, lr}
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    .pad #4
+; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
 ; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; CHECK-NEXT:    .pad #48
-; CHECK-NEXT:    sub sp, #48
-; CHECK-NEXT:    adr r6, .LCPI9_0
+; CHECK-NEXT:    .pad #40
+; CHECK-NEXT:    sub sp, #40
 ; CHECK-NEXT:    ldrd r9, r12, [sp, #144]
-; CHECK-NEXT:    vldrw.u32 q0, [r6]
-; CHECK-NEXT:    sub.w r6, r12, #1
-; CHECK-NEXT:    movs r7, #1
-; CHECK-NEXT:    vdup.32 q2, r9
-; CHECK-NEXT:    add.w r6, r7, r6, lsr #1
-; CHECK-NEXT:    vstrw.32 q0, [sp] @ 16-byte Spill
-; CHECK-NEXT:    bic r6, r6, #3
-; CHECK-NEXT:    vmul.i32 q0, q0, r9
-; CHECK-NEXT:    subs r6, #4
-; CHECK-NEXT:    vshl.i32 q2, q2, #3
+; CHECK-NEXT:    sub.w r7, r12, #1
+; CHECK-NEXT:    movs r6, #1
 ; CHECK-NEXT:    mov.w r8, #0
+; CHECK-NEXT:    add.w r7, r6, r7, lsr #1
 ; CHECK-NEXT:    vmov.i32 q3, #0x8
-; CHECK-NEXT:    add.w r4, r7, r6, lsr #2
+; CHECK-NEXT:    bic r7, r7, #3
+; CHECK-NEXT:    subs r7, #4
+; CHECK-NEXT:    add.w r11, r6, r7, lsr #2
+; CHECK-NEXT:    adr r7, .LCPI9_0
+; CHECK-NEXT:    vldrw.u32 q0, [r7]
 ; CHECK-NEXT:    vstrw.32 q0, [sp, #16] @ 16-byte Spill
+; CHECK-NEXT:    vdup.32 q0, r9
+; CHECK-NEXT:    vshl.i32 q2, q0, #3
+; CHECK-NEXT:    vstrw.32 q0, [sp] @ 16-byte Spill
 ; CHECK-NEXT:  .LBB9_1: @ %for.cond8.preheader.us.us.preheader
 ; CHECK-NEXT:    @ =>This Loop Header: Depth=1
 ; CHECK-NEXT:    @ Child Loop BB9_2 Depth 2
 ; CHECK-NEXT:    @ Child Loop BB9_3 Depth 3
 ; CHECK-NEXT:    mul r10, r8, r9
-; CHECK-NEXT:    vldrw.u32 q0, [sp] @ 16-byte Reload
+; CHECK-NEXT:    movs r5, #0
 ; CHECK-NEXT:    mul r7, r8, r12
-; CHECK-NEXT:    vadd.i32 q0, q0, r7
-; CHECK-NEXT:    movs r7, #0
-; CHECK-NEXT:    vstrw.32 q0, [sp, #32] @ 16-byte Spill
 ; CHECK-NEXT:  .LBB9_2: @ %vector.ph
 ; CHECK-NEXT:    @ Parent Loop BB9_1 Depth=1
 ; CHECK-NEXT:    @ => This Loop Header: Depth=2
 ; CHECK-NEXT:    @ Child Loop BB9_3 Depth 3
 ; CHECK-NEXT:    vldrw.u32 q0, [sp, #16] @ 16-byte Reload
-; CHECK-NEXT:    vldrw.u32 q7, [sp, #32] @ 16-byte Reload
-; CHECK-NEXT:    vmov.i32 q5, #0x0
-; CHECK-NEXT:    vadd.i32 q6, q0, r7
-; CHECK-NEXT:    dls lr, r4
+; CHECK-NEXT:    vldrw.u32 q6, [sp] @ 16-byte Reload
+; CHECK-NEXT:    vmov.i32 q4, #0x0
+; CHECK-NEXT:    dls lr, r11
+; CHECK-NEXT:    vadd.i32 q5, q0, r7
+; CHECK-NEXT:    vmlas.u32 q6, q0, r5
 ; CHECK-NEXT:  .LBB9_3: @ %vector.body
 ; CHECK-NEXT:    @ Parent Loop BB9_1 Depth=1
 ; CHECK-NEXT:    @ Parent Loop BB9_2 Depth=2
 ; CHECK-NEXT:    @ => This Inner Loop Header: Depth=3
-; CHECK-NEXT:    vadd.i32 q1, q7, q3
-; CHECK-NEXT:    vldrw.u32 q4, [r0, q7, uxtw #2]
-; CHECK-NEXT:    vldrw.u32 q7, [r1, q6, uxtw #2]
-; CHECK-NEXT:    vadd.i32 q0, q6, q2
-; CHECK-NEXT:    vmov q6, q0
-; CHECK-NEXT:    vmul.i32 q4, q7, q4
-; CHECK-NEXT:    vmov q7, q1
-; CHECK-NEXT:    vadd.i32 q5, q4, q5
+; CHECK-NEXT:    vadd.i32 q1, q5, q3
+; CHECK-NEXT:    vldrw.u32 q0, [r0, q5, uxtw #2]
+; CHECK-NEXT:    vldrw.u32 q5, [r1, q6, uxtw #2]
+; CHECK-NEXT:    vadd.i32 q7, q6, q2
+; CHECK-NEXT:    vmov q6, q7
+; CHECK-NEXT:    vmul.i32 q0, q5, q0
+; CHECK-NEXT:    vmov q5, q1
+; CHECK-NEXT:    vadd.i32 q4, q0, q4
 ; CHECK-NEXT:    le lr, .LBB9_3
 ; CHECK-NEXT:  @ %bb.4: @ %middle.block
 ; CHECK-NEXT:    @ in Loop: Header=BB9_2 Depth=2
-; CHECK-NEXT:    add.w r5, r7, r10
-; CHECK-NEXT:    adds r7, #1
-; CHECK-NEXT:    vaddv.u32 r6, q5
-; CHECK-NEXT:    cmp r7, r9
-; CHECK-NEXT:    str.w r6, [r2, r5, lsl #2]
+; CHECK-NEXT:    add.w r6, r5, r10
+; CHECK-NEXT:    adds r5, #1
+; CHECK-NEXT:    vaddv.u32 r4, q4
+; CHECK-NEXT:    cmp r5, r9
+; CHECK-NEXT:    str.w r4, [r2, r6, lsl #2]
 ; CHECK-NEXT:    bne .LBB9_2
 ; CHECK-NEXT:  @ %bb.5: @ %for.cond4.for.cond.cleanup6_crit_edge.us
 ; CHECK-NEXT:    @ in Loop: Header=BB9_1 Depth=1
@@ -513,9 +512,10 @@ define dso_local void @arm_mat_mult_q31(i32* noalias nocapture readonly %A, i32*
 ; CHECK-NEXT:    cmp r8, r3
 ; CHECK-NEXT:    bne .LBB9_1
 ; CHECK-NEXT:  @ %bb.6: @ %for.end25
-; CHECK-NEXT:    add sp, #48
+; CHECK-NEXT:    add sp, #40
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
-; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, pc}
+; CHECK-NEXT:    add sp, #4
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  @ %bb.7:
 ; CHECK-NEXT:  .LCPI9_0:
@@ -594,8 +594,8 @@ define dso_local void @arm_mat_mult_q15(i16* noalias nocapture readonly %A, i16*
 ; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
 ; CHECK-NEXT:    .pad #4
 ; CHECK-NEXT:    sub sp, #4
-; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
-; CHECK-NEXT:    vpush {d8, d9, d10, d11}
+; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
+; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
 ; CHECK-NEXT:    .pad #32
 ; CHECK-NEXT:    sub sp, #32
 ; CHECK-NEXT:    strd r0, r2, [sp, #24] @ 8-byte Folded Spill
@@ -603,34 +603,33 @@ define dso_local void @arm_mat_mult_q15(i16* noalias nocapture readonly %A, i16*
 ; CHECK-NEXT:    str r3, [sp, #8] @ 4-byte Spill
 ; CHECK-NEXT:    mov r0, r3
 ; CHECK-NEXT:    itt ne
-; CHECK-NEXT:    ldrne.w lr, [sp, #104]
+; CHECK-NEXT:    ldrne.w lr, [sp, #120]
 ; CHECK-NEXT:    cmpne.w lr, #0
 ; CHECK-NEXT:    bne .LBB10_2
 ; CHECK-NEXT:  .LBB10_1: @ %for.cond.cleanup
 ; CHECK-NEXT:    add sp, #32
-; CHECK-NEXT:    vpop {d8, d9, d10, d11}
+; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13}
 ; CHECK-NEXT:    add sp, #4
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
 ; CHECK-NEXT:  .LBB10_2: @ %for.cond1.preheader.us.preheader
-; CHECK-NEXT:    ldr.w r11, [sp, #108]
+; CHECK-NEXT:    ldr.w r11, [sp, #124]
 ; CHECK-NEXT:    mov r6, r1
 ; CHECK-NEXT:    movs r1, #1
-; CHECK-NEXT:    lsl.w r4, lr, #1
+; CHECK-NEXT:    vdup.32 q4, lr
 ; CHECK-NEXT:    bic r0, r11, #3
 ; CHECK-NEXT:    str r0, [sp, #16] @ 4-byte Spill
 ; CHECK-NEXT:    subs r0, #4
+; CHECK-NEXT:    lsl.w r4, lr, #1
 ; CHECK-NEXT:    mov.w r9, #0
+; CHECK-NEXT:    vshl.i32 q6, q4, #2
 ; CHECK-NEXT:    add.w r8, r1, r0, lsr #2
 ; CHECK-NEXT:    lsl.w r0, r11, #1
 ; CHECK-NEXT:    str r0, [sp, #4] @ 4-byte Spill
 ; CHECK-NEXT:    adr r0, .LCPI10_0
-; CHECK-NEXT:    vldrw.u32 q0, [r0]
+; CHECK-NEXT:    vldrw.u32 q5, [r0]
 ; CHECK-NEXT:    ldr r0, [sp, #24] @ 4-byte Reload
 ; CHECK-NEXT:    movs r1, #0
 ; CHECK-NEXT:    str r0, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT:    vmul.i32 q4, q0, lr
-; CHECK-NEXT:    vdup.32 q0, lr
-; CHECK-NEXT:    vshl.i32 q5, q0, #2
 ; CHECK-NEXT:    b .LBB10_5
 ; CHECK-NEXT:  .LBB10_3: @ %for.cond5.preheader.us73.preheader
 ; CHECK-NEXT:    @ in Loop: Header=BB10_5 Depth=1
@@ -638,7 +637,7 @@ define dso_local void @arm_mat_mult_q15(i16* noalias nocapture readonly %A, i16*
 ; CHECK-NEXT:    mov r1, r4
 ; CHECK-NEXT:    add.w r0, r0, r12, lsl #1
 ; CHECK-NEXT:    bl __aeabi_memclr
-; CHECK-NEXT:    ldr.w lr, [sp, #104]
+; CHECK-NEXT:    ldr.w lr, [sp, #120]
 ; CHECK-NEXT:  .LBB10_4: @ %for.cond1.for.cond.cleanup3_crit_edge.us
 ; CHECK-NEXT:    @ in Loop: Header=BB10_5 Depth=1
 ; CHECK-NEXT:    ldr r0, [sp, #4] @ 4-byte Reload
@@ -685,15 +684,16 @@ define dso_local void @arm_mat_mult_q15(i16* noalias nocapture readonly %A, i16*
 ; CHECK-NEXT:    b .LBB10_13
 ; CHECK-NEXT:  .LBB10_10: @ %vector.ph
 ; CHECK-NEXT:    @ in Loop: Header=BB10_8 Depth=2
-; CHECK-NEXT:    vmov.i32 q0, #0x0
+; CHECK-NEXT:    vmov q1, q4
 ; CHECK-NEXT:    ldr r2, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT:    vadd.i32 q1, q4, r10
+; CHECK-NEXT:    vmov.i32 q0, #0x0
+; CHECK-NEXT:    vmlas.u32 q1, q5, r10
 ; CHECK-NEXT:    dls lr, r8
 ; CHECK-NEXT:  .LBB10_11: @ %vector.body
 ; CHECK-NEXT:    @ Parent Loop BB10_5 Depth=1
 ; CHECK-NEXT:    @ Parent Loop BB10_8 Depth=2
 ; CHECK-NEXT:    @ => This Inner Loop Header: Depth=3
-; CHECK-NEXT:    vadd.i32 q2, q1, q5
+; CHECK-NEXT:    vadd.i32 q2, q1, q6
 ; CHECK-NEXT:    vldrh.s32 q3, [r6, q1, uxtw #1]
 ; CHECK-NEXT:    vldrh.s32 q1, [r2], #8
 ; CHECK-NEXT:    vmul.i32 q1, q3, q1
@@ -704,7 +704,7 @@ define dso_local void @arm_mat_mult_q15(i16* noalias nocapture readonly %A, i16*
 ; CHECK-NEXT:    @ in Loop: Header=BB10_8 Depth=2
 ; CHECK-NEXT:    ldr r7, [sp, #16] @ 4-byte Reload
 ; CHECK-NEXT:    vaddv.u32 r2, q0
-; CHECK-NEXT:    ldr.w lr, [sp, #104]
+; CHECK-NEXT:    ldr.w lr, [sp, #120]
 ; CHECK-NEXT:    cmp r7, r11
 ; CHECK-NEXT:    beq .LBB10_7
 ; CHECK-NEXT:  .LBB10_13: @ %for.body8.us.us.preheader
@@ -839,8 +839,259 @@ for.cond.cleanup:                                 ; preds = %for.cond1.for.cond.
   ret void
 }
 
+define hidden arm_aapcs_vfpcc i32 @arm_depthwise_conv_s8(i8* nocapture readonly %input, i16 zeroext %input_x, i16 zeroext %input_y, i16 zeroext %input_ch, i8* nocapture readonly %kernel, i16 zeroext %output_ch, i16 zeroext %ch_mult, i16 zeroext %kernel_x, i16 zeroext %kernel_y, i16 zeroext %pad_x, i16 zeroext %pad_y, i16 zeroext %stride_x, i16 zeroext %stride_y, i32* nocapture readonly %bias, i8* nocapture %output, i32* nocapture readonly %output_shift, i32* nocapture readonly %output_mult, i16 zeroext %output_x, i16 zeroext %output_y, i32 %output_offset, i32 %input_offset, i32 %output_activation_min, i32 %output_activation_max, i16 zeroext %dilation_x, i16 zeroext %dilation_y, i16* nocapture readnone %buffer_a) local_unnamed_addr #0 {
+; CHECK-LABEL: arm_depthwise_conv_s8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    .pad #4
+; CHECK-NEXT:    sub sp, #4
+; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13}
+; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13}
+; CHECK-NEXT:    .pad #8
+; CHECK-NEXT:    sub sp, #8
+; CHECK-NEXT:    ldrd r2, r7, [sp, #104]
+; CHECK-NEXT:    add.w r12, r7, #10
+; CHECK-NEXT:    adr r7, .LCPI11_0
+; CHECK-NEXT:    ldr r1, [sp, #96]
+; CHECK-NEXT:    vdup.32 q1, r2
+; CHECK-NEXT:    vldrw.u32 q0, [r7]
+; CHECK-NEXT:    mov.w r9, #0
+; CHECK-NEXT:    mov.w r10, #11
+; CHECK-NEXT:    vshl.i32 q1, q1, #2
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:  .LBB11_1: @ %for.body10.i
+; CHECK-NEXT:    @ =>This Loop Header: Depth=1
+; CHECK-NEXT:    @ Child Loop BB11_2 Depth 2
+; CHECK-NEXT:    @ Child Loop BB11_3 Depth 3
+; CHECK-NEXT:    @ Child Loop BB11_4 Depth 4
+; CHECK-NEXT:    @ Child Loop BB11_5 Depth 5
+; CHECK-NEXT:    mov.w r8, #0
+; CHECK-NEXT:    str r6, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:  .LBB11_2: @ %for.cond22.preheader.i
+; CHECK-NEXT:    @ Parent Loop BB11_1 Depth=1
+; CHECK-NEXT:    @ => This Loop Header: Depth=2
+; CHECK-NEXT:    @ Child Loop BB11_3 Depth 3
+; CHECK-NEXT:    @ Child Loop BB11_4 Depth 4
+; CHECK-NEXT:    @ Child Loop BB11_5 Depth 5
+; CHECK-NEXT:    movs r6, #0
+; CHECK-NEXT:  .LBB11_3: @ %for.body27.i
+; CHECK-NEXT:    @ Parent Loop BB11_1 Depth=1
+; CHECK-NEXT:    @ Parent Loop BB11_2 Depth=2
+; CHECK-NEXT:    @ => This Loop Header: Depth=3
+; CHECK-NEXT:    @ Child Loop BB11_4 Depth 4
+; CHECK-NEXT:    @ Child Loop BB11_5 Depth 5
+; CHECK-NEXT:    mov.w lr, #6
+; CHECK-NEXT:    movs r4, #0
+; CHECK-NEXT:    movs r5, #4
+; CHECK-NEXT:    dls lr, lr
+; CHECK-NEXT:  .LBB11_4: @ %for.body78.us.i
+; CHECK-NEXT:    @ Parent Loop BB11_1 Depth=1
+; CHECK-NEXT:    @ Parent Loop BB11_2 Depth=2
+; CHECK-NEXT:    @ Parent Loop BB11_3 Depth=3
+; CHECK-NEXT:    @ => This Loop Header: Depth=4
+; CHECK-NEXT:    @ Child Loop BB11_5 Depth 5
+; CHECK-NEXT:    mul r7, r5, r10
+; CHECK-NEXT:    vdup.32 q3, r6
+; CHECK-NEXT:    vdup.32 q2, r8
+; CHECK-NEXT:    mov r11, r12
+; CHECK-NEXT:    vadd.i32 q4, q0, r7
+; CHECK-NEXT:    vmla.u32 q3, q4, r2
+; CHECK-NEXT:    adds r7, #113
+; CHECK-NEXT:    vadd.i32 q4, q0, r7
+; CHECK-NEXT:    vmla.u32 q2, q4, r2
+; CHECK-NEXT:  .LBB11_5: @ %vector.body
+; CHECK-NEXT:    @ Parent Loop BB11_1 Depth=1
+; CHECK-NEXT:    @ Parent Loop BB11_2 Depth=2
+; CHECK-NEXT:    @ Parent Loop BB11_3 Depth=3
+; CHECK-NEXT:    @ Parent Loop BB11_4 Depth=4
+; CHECK-NEXT:    @ => This Inner Loop Header: Depth=5
+; CHECK-NEXT:    vldrb.s32 q6, [r0, q2]
+; CHECK-NEXT:    vadd.i32 q5, q2, q1
+; CHECK-NEXT:    vadd.i32 q4, q3, q1
+; CHECK-NEXT:    subs.w r11, r11, #4
+; CHECK-NEXT:    vadd.i32 q2, q6, r2
+; CHECK-NEXT:    vldrb.s32 q6, [r1, q3]
+; CHECK-NEXT:    vmov q3, q4
+; CHECK-NEXT:    vmlava.u32 r4, q2, q6
+; CHECK-NEXT:    vmov q2, q5
+; CHECK-NEXT:    bne .LBB11_5
+; CHECK-NEXT:  @ %bb.6: @ %middle.block
+; CHECK-NEXT:    @ in Loop: Header=BB11_4 Depth=4
+; CHECK-NEXT:    adds r5, #1
+; CHECK-NEXT:    le lr, .LBB11_4
+; CHECK-NEXT:  @ %bb.7: @ %for.cond.cleanup77.i
+; CHECK-NEXT:    @ in Loop: Header=BB11_3 Depth=3
+; CHECK-NEXT:    adds r6, #1
+; CHECK-NEXT:    add.w r9, r9, #1
+; CHECK-NEXT:    cmp r6, r2
+; CHECK-NEXT:    bne .LBB11_3
+; CHECK-NEXT:  @ %bb.8: @ %for.cond.cleanup26.i
+; CHECK-NEXT:    @ in Loop: Header=BB11_2 Depth=2
+; CHECK-NEXT:    add.w r8, r8, #1
+; CHECK-NEXT:    cmp r8, r3
+; CHECK-NEXT:    bne .LBB11_2
+; CHECK-NEXT:  @ %bb.9: @ %for.cond.cleanup20.i
+; CHECK-NEXT:    @ in Loop: Header=BB11_1 Depth=1
+; CHECK-NEXT:    ldr r6, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    ldr r7, [sp, #148]
+; CHECK-NEXT:    adds r6, #1
+; CHECK-NEXT:    cmp r6, r7
+; CHECK-NEXT:    it eq
+; CHECK-NEXT:    moveq r6, #0
+; CHECK-NEXT:    b .LBB11_1
+; CHECK-NEXT:    .p2align 4
+; CHECK-NEXT:  @ %bb.10:
+; CHECK-NEXT:  .LCPI11_0:
+; CHECK-NEXT:    .long 0 @ 0x0
+; CHECK-NEXT:    .long 1 @ 0x1
+; CHECK-NEXT:    .long 2 @ 0x2
+; CHECK-NEXT:    .long 3 @ 0x3
+entry:
+  %conv = zext i16 %ch_mult to i32
+  %conv6.i = zext i16 %output_x to i32
+  %conv17.i = zext i16 %input_ch to i32
+  %conv60.i = zext i16 %kernel_x to i32
+  %broadcast.splatinsert63 = insertelement <4 x i32> undef, i32 %conv, i32 0
+  %broadcast.splat64 = shufflevector <4 x i32> %broadcast.splatinsert63, <4 x i32> undef, <4 x i32> zeroinitializer
+  %broadcast.splatinsert69 = insertelement <4 x i32> undef, i32 %conv, i32 0
+  %broadcast.splat70 = shufflevector <4 x i32> %broadcast.splatinsert69, <4 x i32> undef, <4 x i32> zeroinitializer
+  %broadcast.splatinsert73 = insertelement <4 x i32> undef, i32 %conv, i32 0
+  %broadcast.splat74 = shufflevector <4 x i32> %broadcast.splatinsert73, <4 x i32> undef, <4 x i32> zeroinitializer
+  %unroll_iter = and i32 %conv, 65534
+  br label %for.body.i38
+
+for.body.i38:                                     ; preds = %for.cond.cleanup9.i, %entry
+  %i_out.024.i = phi i32 [ 0, %entry ], [ %i_out.1.lcssa.i, %for.cond.cleanup9.i ]
+  %i_out_y.023.i = phi i32 [ 0, %entry ], [ %inc140.i, %for.cond.cleanup9.i ]
+  br label %for.body10.i
+
+for.cond.cleanup9.i:                              ; preds = %for.cond.cleanup20.i, %for.body.i38
+  %i_out.1.lcssa.i = phi i32 [ %i_out.2.lcssa.i, %for.cond.cleanup20.i ]
+  %inc140.i = add nuw nsw i32 %i_out_y.023.i, 1
+  br i1 0, label %if.end, label %for.body.i38
+
+for.body10.i:                                     ; preds = %for.cond.cleanup20.i, %for.body.i38
+  %i_out.120.i = phi i32 [ %i_out.024.i, %for.body.i38 ], [ %i_out.2.lcssa.i, %for.cond.cleanup20.i ]
+  %i_out_x.019.i = phi i32 [ 0, %for.body.i38 ], [ %inc137.i, %for.cond.cleanup20.i ]
+  %n.vec = add nsw i32  %conv60.i, 10
+  br i1 0, label %for.cond.cleanup20.i, label %for.cond22.preheader.lr.ph.i
+
+for.cond22.preheader.lr.ph.i:                     ; preds = %for.body10.i
+  %ind.end = add nsw i32 0, %n.vec
+  %.splatinsert = insertelement <4 x i32> undef, i32 0, i32 0
+  %.splat = shufflevector <4 x i32> %.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
+  %induction = add <4 x i32> %.splat, <i32 0, i32 1, i32 2, i32 3>
+  %cmp.n = icmp eq i32 10, %n.vec
+  br label %for.cond22.preheader.i
+
+for.cond22.preheader.i:                           ; preds = %for.cond.cleanup26.i, %for.cond22.preheader.lr.ph.i
+  %i_out.216.i = phi i32 [ %i_out.120.i, %for.cond22.preheader.lr.ph.i ], [ %i_out.3.lcssa.i, %for.cond.cleanup26.i ]
+  %i_input_ch.014.i = phi i32 [ 0, %for.cond22.preheader.lr.ph.i ], [ %inc134.i, %for.cond.cleanup26.i ]
+  br i1 0, label %for.cond.cleanup26.i, label %for.body27.lr.ph.i
+
+for.body27.lr.ph.i:                               ; preds = %for.cond22.preheader.i
+  br i1  0, label %for.body27.i.us.preheader, label %for.body27.i.preheader
+
+for.body27.i.preheader:                           ; preds = %for.body27.lr.ph.i
+  %broadcast.splatinsert65 = insertelement <4 x i32> undef, i32 %i_input_ch.014.i, i32 0
+  %broadcast.splat66 = shufflevector <4 x i32> %broadcast.splatinsert65, <4 x i32> undef, <4 x i32> zeroinitializer
+  br label %for.body27.i
+
+for.body27.i.us.preheader:                        ; preds = %for.body27.lr.ph.i
+  br i1 0, label %for.cond.cleanup26.i.loopexit.unr-lcssa, label %for.body27.i.us
+
+for.body27.i.us:                                  ; preds = %for.body27.i.us, %for.body27.i.us.preheader
+  %i_out.311.i.us = phi i32 [ %inc128.i.us.1, %for.body27.i.us ], [ %i_out.216.i, %for.body27.i.us.preheader ]
+  %i_ch_mult.010.i.us = phi i32 [ %inc131.i.us.1, %for.body27.i.us ], [ 0, %for.body27.i.us.preheader ]
+  %niter = phi i32 [ 0, %for.body27.i.us ], [ %unroll_iter, %for.body27.i.us.preheader ]
+  %inc128.i.us.1 = add nsw i32 %i_out.311.i.us, 2
+  %inc131.i.us.1 = add nuw nsw i32 %i_ch_mult.010.i.us, 2
+  br i1 0, label %for.cond.cleanup26.i.loopexit.unr-lcssa, label %for.body27.i.us
+
+for.cond.cleanup20.i:                             ; preds = %for.cond.cleanup26.i, %for.body10.i
+  %i_out.2.lcssa.i = phi i32 [ %i_out.120.i, %for.body10.i ], [ %i_out.3.lcssa.i, %for.cond.cleanup26.i ]
+  %inc137.i = add nuw nsw i32 %i_out_x.019.i, 1
+  %exitcond27.i = icmp eq i32 %inc137.i, %conv6.i
+  br i1 %exitcond27.i, label %for.cond.cleanup9.i, label %for.body10.i
+
+for.cond.cleanup26.i.loopexit.unr-lcssa:          ; preds = %for.body27.i.us, %for.body27.i.us.preheader
+  %inc128.i.us.lcssa.ph = phi i32 [ undef, %for.body27.i.us.preheader ], [ %inc128.i.us.1, %for.body27.i.us ]
+  br label %for.cond.cleanup26.i
+
+for.cond.cleanup26.i:                             ; preds = %for.cond.cleanup77.i, %for.cond.cleanup26.i.loopexit.unr-lcssa, %for.cond22.preheader.i
+  %i_out.3.lcssa.i = phi i32 [ %i_out.216.i, %for.cond22.preheader.i ], [ %inc128.i.us.lcssa.ph, %for.cond.cleanup26.i.loopexit.unr-lcssa ], [ %inc128.i, %for.cond.cleanup77.i ]
+  %inc134.i = add nuw nsw i32 %i_input_ch.014.i, 1
+  %exitcond26.i = icmp eq i32 %inc134.i, %conv17.i
+  br i1 %exitcond26.i, label %for.cond.cleanup20.i, label %for.cond22.preheader.i
+
+for.body27.i:                                     ; preds = %for.cond.cleanup77.i, %for.body27.i.preheader
+  %i_out.311.i = phi i32 [ %inc128.i, %for.cond.cleanup77.i ], [ %i_out.216.i, %for.body27.i.preheader ]
+  %i_ch_mult.010.i = phi i32 [ %inc131.i, %for.cond.cleanup77.i ], [ 0, %for.body27.i.preheader ]
+  %broadcast.splatinsert71 = insertelement <4 x i32> undef, i32 %i_ch_mult.010.i, i32 0
+  %broadcast.splat72 = shufflevector <4 x i32> %broadcast.splatinsert71, <4 x i32> undef, <4 x i32> zeroinitializer
+  br label %for.body78.us.i
+
+for.body78.us.i:                                  ; preds = %middle.block, %for.body27.i
+  %i_ker_y.06.us.i = phi i32 [ %inc110.us.i, %middle.block ], [ 4, %for.body27.i ]
+  %acc_0.05.us.i = phi i32 [ %tmp89, %middle.block ], [ 0, %for.body27.i ]
+  %add80.us.i43 = add nsw i32 %i_ker_y.06.us.i, 10
+  %mul89.us.i = mul nsw i32 %add80.us.i43, 11
+  %add87.us.i44 = add i32 %mul89.us.i, 3
+  %mul95.us.i = mul nsw i32 %i_ker_y.06.us.i, 11
+  br label %vector.ph
+
+vector.ph:                                        ; preds = %for.body78.us.i
+  %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %add87.us.i44, i32 0
+  %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer
+  %broadcast.splatinsert67 = insertelement <4 x i32> undef, i32 %mul95.us.i, i32 0
+  %broadcast.splat68 = shufflevector <4 x i32> %broadcast.splatinsert67, <4 x i32> undef, <4 x i32> zeroinitializer
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %vec.ind = phi <4 x i32> [ %induction, %vector.ph ], [ %vec.ind.next, %vector.body ]
+  %vec.phi = phi i32 [ %acc_0.05.us.i, %vector.ph ], [ %tmp89, %vector.body ]
+  %tmp76 = add <4 x i32> %broadcast.splat, %vec.ind
+  %tmp77 = mul nsw <4 x i32> %tmp76, %broadcast.splat64
+  %tmp78 = add nsw <4 x i32> %tmp77, %broadcast.splat66
+  %tmp79 = add nsw <4 x i32> %vec.ind, %broadcast.splat68
+  %tmp80 = mul nsw <4 x i32> %broadcast.splat70, %tmp79
+  %tmp81 = add nsw <4 x i32> %tmp80, %broadcast.splat72
+  %tmp82 = getelementptr inbounds i8, i8* %input, <4 x i32> %tmp78
+  %wide.masked.gather = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %tmp82, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
+  %tmp83 = sext <4 x i8> %wide.masked.gather to <4 x i32>
+  %tmp84 = add nsw <4 x i32> %broadcast.splat74, %tmp83
+  %tmp85 = getelementptr inbounds i8, i8* %kernel, <4 x i32> %tmp81
+  %wide.masked.gather75 = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> %tmp85, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef)
+  %tmp86 = sext <4 x i8> %wide.masked.gather75 to <4 x i32>
+  %tmp87 = mul nsw <4 x i32> %tmp84, %tmp86
+  %tmp88 = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> %tmp87)
+  %tmp89 = add i32 %tmp88, %vec.phi
+  %index.next = add i32 %index, 4
+  %vec.ind.next = add <4 x i32> %vec.ind, <i32 4, i32 4, i32 4, i32 4>
+  %tmp90 = icmp eq i32 %index.next, %n.vec
+  br i1 %tmp90, label %middle.block, label %vector.body
+
+middle.block:                                     ; preds = %vector.body
+  %inc110.us.i = add nsw i32 %i_ker_y.06.us.i, 1
+  %cmp75.us.i = icmp slt i32 %inc110.us.i, 10
+  br i1 %cmp75.us.i, label %for.body78.us.i, label %for.cond.cleanup77.i
+
+for.cond.cleanup77.i:                             ; preds = %middle.block
+  %inc128.i = add nsw i32 %i_out.311.i, 1
+  %inc131.i = add nuw nsw i32 %i_ch_mult.010.i, 1
+  %exitcond.i50 = icmp eq i32 %inc131.i, %conv
+  br i1 %exitcond.i50, label %for.cond.cleanup26.i, label %for.body27.i
+
+if.end:                                           ; preds = %for.cond.cleanup9.i, %entry, %for.cond.cleanup7.i, %if.then
+  ret i32 0
+}
+
 declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)
 declare <4 x i16> @llvm.masked.gather.v4i16.v4p0i16(<4 x i16*>, i32, <4 x i1>, <4 x i16>)
+declare <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*>, i32 immarg, <4 x i1>, <4 x i8>) #3
+
 declare i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32>)
 declare void @llvm.memset.p0i8.i32(i8* align 2, i8, i32, i1)
 

From 3a6b60fa623da6e311b61c812932917085067cd3 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Thu, 16 Apr 2020 10:02:26 -0700
Subject: [PATCH 066/216] [lldb/Docs] Add some more info about the test suite
 structure

Expand on the structure of the LLDB test suite. So far this information
has been mostly "tribal knowledge". By writing it down I hope to make it
easier to understand our test suite for anyone that's new to the
project.
---
 lldb/docs/resources/test.rst | 207 ++++++++++++++++++++++++++++++++---
 1 file changed, 190 insertions(+), 17 deletions(-)

diff --git a/lldb/docs/resources/test.rst b/lldb/docs/resources/test.rst
index dd40a1e51549..6f39a45d4b72 100644
--- a/lldb/docs/resources/test.rst
+++ b/lldb/docs/resources/test.rst
@@ -1,27 +1,200 @@
 Testing
 =======
 
+.. contents::
+   :local:
+
+Test Suite Structure
+--------------------
+
 The LLDB test suite consists of three different kinds of test:
 
-* Unit test. These are located under ``lldb/unittests`` and are written in C++
-  using googletest.
-* Integration tests that test the debugger through the SB API. These are
-  located under ``lldb/packages/Python/lldbsuite`` and are written in Python
-  using ``dotest`` (LLDB's custom testing framework on top of unittest2).
-* Integration tests that test the debugger through the command line. These are
-  located under `lldb/test/Shell` and are written in a shell-style format
-  using FileCheck to verify its output.
+* **Unit tests**: written in C++ using the googletest unit testing library.
+* **Shell tests**: Integration tests that test the debugger through the command
+  line. These tests interact with the debugger either through the command line
+  driver or through ``lldb-test`` which is a tool that exposes the internal
+  data structures in an easy-to-parse way for testing. Most people will know
+  these as *lit tests* in LLVM, although lit is the test driver and ShellTest
+  is the test format that uses ``RUN:`` lines. `FileCheck
+  <https://llvm.org/docs/CommandGuide/FileCheck.html>`_ is used to verify
+  the output.
+* **API tests**: Integration tests that interact with the debugger through the
+  SB API. These are written in Python and use LLDB's ``dotest.py`` testing
+  framework on top of Python's `unittest2
+  <https://docs.python.org/2/library/unittest.html>`_.
+
+All three test suites use ``lit`` (`LLVM Integrated Tester
+<https://llvm.org/docs/CommandGuide/lit.html>`_ ) as the test driver. The test
+suites can be run as a whole or separately.
+
+
+Unit Tests
+``````````
+
+Unit tests are located under ``lldb/unittests``. If it's possible to test
+something in isolation or as a single unit, you should make it a unit test.
+
+Often you need instances of the core objects such as a debugger, target or
+process, in order to test something meaningful. We already have a handful of
+tests that have the necessary boiler plate, but this is something we could
+abstract away and make it more user friendly.
+
+Shell Tests
+```````````
+
+Shell tests are located under ``lldb/test/Shell``. These tests are generally
+built around checking the output of ``lldb`` (the command line driver) or
+``lldb-test`` using ``FileCheck``. Shell tests are generally small and fast to
+write because they require little boilerplate.
+
+``lldb-test`` is a relatively new addition to the test suite. It was the first
+tool that was added that is designed for testing. Since then it has been
+continuously extended with new subcommands, improving our test coverage. Among
+other things you can use it to query lldb for symbol files, for object files
+and breakpoints.
+
+Obviously shell tests are great for testing the command line driver itself or
+the subcomponents already exposed by lldb-test. But when it comes to LLDB's
+vast functionality, most things can be tested both through the driver as well
+as the Python API. For example, to test setting a breakpoint, you could do it
+from the command line driver with ``b main`` or you could use the SB API and do
+something like ``target.BreakpointCreateByName`` [#]_.
+
+A good rule of thumb is to prefer shell tests when what is being tested is
+relatively simple. Expressivity is limited compared to the API tests, which
+means that you have to have a well-defined test scenario that you can easily
+match with ``FileCheck``.
+
+Another thing to consider are the binaries being debugged, which we call
+inferiors. For shell tests, they have to be relatively simple. The
+``dotest.py`` test framework has extensive support for complex build scenarios
+and different variants, which is described in more detail below, while shell
+tests are limited to single lines of shell commands with compiler and linker
+invocations.
+
+On the same topic, another interesting aspect of the shell tests is that there
+you can often get away with a broken or incomplete binary, whereas the API
+tests almost always require a fully functional executable. This enables testing
+of (some) aspects of handling of binaries with non-native architectures or
+operating systems.
+
+Finally, the shell tests always run in batch mode. You start with some input
+and the test verifies the output. The debugger can be sensitive to its
+environment, such as the the platform it runs on. It can be hard to express
+that the same test might behave slightly differently on macOS and Linux.
+Additionally, the debugger is an interactive tool, and the shell test provide
+no good way of testing those interactive aspects, such as tab completion for
+example.
+
+API Tests
+`````````
+
+API tests are located under ``lldb/test/API``. They are run with the
+``dotest.py``. Tests are written in Python and test binaries (inferiors) are
+compiled with Make. The majority of API tests are end-to-end tests that compile
+programs from source, run them, and debug the processes.
+
+As mentioned before, ``dotest.py`` is LLDB's testing framework. The
+implementation is located under ``lldb/packages/Python/lldbsuite``. We have
+several extensions and custom test primitives on top of what's offered by
+`unittest2 <https://docs.python.org/2/library/unittest.html>`_. Those can be
+found  in
+`lldbtest.py <https://github.com/llvm/llvm-project/blob/master/lldb/packages/Python/lldbsuite/test/lldbtest.py>`_.
+
+Below is the directory layout of the `example API test
+<https://github.com/llvm/llvm-project/tree/master/lldb/test/API/sample_test>`_.
+The test directory will always contain a python file, starting with ``Test``.
+Most of the tests are structured as a binary being debugged, so there will be
+one or more source files and a ``Makefile``.
 
-All three test suites use the `LLVM Integrated Tester
-<https://llvm.org/docs/CommandGuide/lit.html>`_ (lit) as their test driver. The
-test suites can be run as a whole or separately.
+::
 
-Many of the tests are accompanied by a C (C++, ObjC, etc.) source file. Each
-test first compiles the source file and then uses LLDB to debug the resulting
-executable.
+  sample_test
+  ├── Makefile
+  ├── TestSampleTest.py
+  └── main.c
+
+Let's start with the Python test file. Every test is its own class and can have
+one or more test methods, that start with ``test_``.  Many tests define
+multiple test methods and share a bunch of common code. For example, for a
+fictive test that makes sure we can set breakpoints we might have one test
+method that ensures we can set a breakpoint by address, on that sets a
+breakpoint by name and another that sets the same breakpoint by file and line
+number. The setup, teardown and everything else other than setting the
+breakpoint could be shared.
+
+Our testing framework also has a bunch of utilities that abstract common
+operations, such as creating targets, setting breakpoints etc. When code is
+shared across tests, we extract it into a utility in ``lldbutil``. It's always
+worth taking a look at  `lldbutil
+<https://github.com/llvm/llvm-project/blob/master/lldb/packages/Python/lldbsuite/test/lldbutil.py>`_
+to see if there's a utility to simplify some of the testing boiler plate.
+Because we can't always audit every existing test, this is doubly true when
+looking at an existing test for inspiration.
+
+It's possible to skip or `XFAIL
+<https://ftp.gnu.org/old-gnu/Manuals/dejagnu-1.3/html_node/dejagnu_6.html>`_
+tests using decorators. You'll see them a lot. The debugger can be sensitive to
+things like the architecture, the host and target platform, the compiler
+version etc. LLDB comes with a range of predefined decorators for these
+configurations.
 
-.. contents::
-   :local:
+::
+
+  @expectedFailureAll(archs=["aarch64"], oslist=["linux"]
+
+Another great thing about these decorators is that they're very easy to extend,
+it's even possible to define a function in a test case that determines whether
+the test should be run or not.
+
+::
+
+  @expectedFailure(checking_function_name)
+
+In addition to providing a lot more flexibility when it comes to writing the
+test, the API test also allow for much more complex scenarios when it comes to
+building inferiors. Every test has its own ``Makefile``, most of them only a
+few lines long. A shared ``Makefile`` (``Makefile.rules``) with about a
+thousand lines of rules takes care of most if not all of the boiler plate,
+while individual make files can be used to build more advanced tests.  
+
+Here's an example of a simple ``Makefile`` used by the example test.
+
+::
+
+  C_SOURCES := main.c
+  CFLAGS_EXTRAS := -std=c99
+
+  include Makefile.rules
+
+Finding the right variables to set can be tricky. You can always take a look at
+`Makefile.rules <https://github.com/llvm/llvm-project/blob/master/lldb/packages/Python/lldbsuite/test/make/Makefile.rules>`_
+but often it's easier to find an existing ``Makefile`` that does something
+similar to what you want to do.
+
+Another thing this enables is having different variants for the same test
+case. By default, we run every test for all 3 debug info formats, so once with
+DWARF from the object files, once with gmodules and finally with a dSYM on
+macOS or split DWARF (DWO) on Linux. But there are many more things we can test
+that are orthogonal to the test itself. On GreenDragon we have a matrix bot
+that runs the test suite under different configurations, with older host
+compilers and different DWARF versions.
+
+As you can imagine, this quickly lead to combinatorial explosion in the number
+of variants. It's very tempting to add more variants because it's an easy way
+to increase test coverage. It doesn't scale. It's easy to set up, but increases
+the runtime of the tests and has a large ongoing cost.
+
+The key take away is that the different variants don't obviate the need for
+focused tests. So relying on it to test say DWARF5 is a really bad idea.
+Instead you should write tests that check the specific DWARF5 feature, and have
+the variant as a nice-to-have.
+
+In conclusion, you'll want to opt for an API test to test the API itself or
+when you need the expressivity, either for the test case itself or for the
+program being debugged. The fact that the API tests work with different
+variants mean that more general tests should be API tests, so that they can be
+run against the different variants.
 
 Running The Tests
 -----------------
@@ -244,4 +417,4 @@ A quick guide to getting started with PTVS is as follows:
 
    --arch=i686 --executable D:/src/llvmbuild/ninja/bin/lldb.exe -s D:/src/llvmbuild/ninja/lldb-test-traces -u CXXFLAGS -u CFLAGS --enable-crash-dialog -C d:\src\llvmbuild\ninja_release\bin\clang.exe -p TestPaths.py D:\src\llvm\tools\lldb\packages\Python\lldbsuite\test --no-multiprocess
 
-
+.. [#] `https://lldb.llvm.org/python_reference/lldb.SBTarget-class.html#BreakpointCreateByName <https://lldb.llvm.org/python_reference/lldb.SBTarget-class.html#BreakpointCreateByName>`_

From 2e94a64b57db8cb2225f70ad931d86792db7708f Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Date: Fri, 10 Apr 2020 15:54:54 -0700
Subject: [PATCH 067/216] [AMDGPU] Define 16 bit SGPR subregs

These are needed as a counterpart for VGPR subregs even though
there are no scalar instructions which can operate 16 bit values.
When we are materializing a constant that is done into an SGPR
and that SGPR may/will be copied into a 16 bit VGPR subreg. Such
copy is illegal. There are also similar problems if a source
operand of a 16 bit VALU instruction is an SGPR. In addition
we need to get a register with a lo16 subregister of an SGPR
RC during selection and this fails as well.

All of that makes me believe we need these subregisters as a
syntactic glue.

Differential Revision: https://reviews.llvm.org/D78250
---
 llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp   |  8 +++--
 llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td |  2 +-
 llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp  |  9 ++++--
 llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp     |  6 ++++
 llvm/lib/Target/AMDGPU/SIRegisterInfo.td      | 31 +++++++++++++++++--
 .../CodeGen/AMDGPU/postra-bundle-memops.mir   |  2 +-
 .../AMDGPU/rename-independent-subregs.mir     |  2 +-
 ...ssert-dead-def-subreg-use-other-subreg.mir |  2 +-
 ...dleMoveUp-subreg-def-across-subreg-def.mir |  8 ++---
 ...ubreg-undef-def-with-other-subreg-defs.mir | 12 +++----
 10 files changed, 61 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
index 63f7590217df..42ac97aabec9 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp
@@ -764,12 +764,16 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage(
           break;
         }
 
-        if (AMDGPU::SReg_32RegClass.contains(Reg)) {
+        if (AMDGPU::SReg_32RegClass.contains(Reg) ||
+            AMDGPU::SGPR_LO16RegClass.contains(Reg) ||
+            AMDGPU::SGPR_HI16RegClass.contains(Reg)) {
           assert(!AMDGPU::TTMP_32RegClass.contains(Reg) &&
                  "trap handler registers should not be used");
           IsSGPR = true;
           Width = 1;
-        } else if (AMDGPU::VGPR_32RegClass.contains(Reg)) {
+        } else if (AMDGPU::VGPR_32RegClass.contains(Reg) ||
+                   AMDGPU::VGPR_LO16RegClass.contains(Reg) ||
+                   AMDGPU::VGPR_HI16RegClass.contains(Reg)) {
           IsSGPR = false;
           Width = 1;
         } else if (AMDGPU::AGPR_32RegClass.contains(Reg)) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td b/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
index cb831f3a53b9..af6e8f7ccbf2 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBanks.td
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 def SGPRRegBank : RegisterBank<"SGPR",
-  [SReg_32, SReg_64, SReg_128, SReg_256, SReg_512, SReg_1024]
+  [SGPR_LO16, SReg_32, SReg_64, SReg_128, SReg_256, SReg_512, SReg_1024]
 >;
 
 def VGPRRegBank : RegisterBank<"VGPR",
diff --git a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
index f4f22b27a88f..2cf3054fd455 100644
--- a/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
+++ b/llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp
@@ -100,7 +100,8 @@ static void insertCSRSaves(MachineBasicBlock &SaveBlock,
       unsigned Reg = CS.getReg();
 
       MachineInstrSpan MIS(I, &SaveBlock);
-      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+      const TargetRegisterClass *RC =
+        TRI->getMinimalPhysRegClass(Reg, MVT::i32);
 
       TII.storeRegToStackSlot(SaveBlock, I, Reg, true, CS.getFrameIdx(), RC,
                               TRI);
@@ -133,7 +134,8 @@ static void insertCSRRestores(MachineBasicBlock &RestoreBlock,
   if (!TFI->restoreCalleeSavedRegisters(RestoreBlock, I, CSI, TRI)) {
     for (const CalleeSavedInfo &CI : reverse(CSI)) {
       unsigned Reg = CI.getReg();
-      const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+      const TargetRegisterClass *RC =
+        TRI->getMinimalPhysRegClass(Reg, MVT::i32);
 
       TII.loadRegFromStackSlot(RestoreBlock, I, Reg, CI.getFrameIdx(), RC, TRI);
       assert(I != RestoreBlock.begin() &&
@@ -206,7 +208,8 @@ bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) {
     for (unsigned I = 0; CSRegs[I]; ++I) {
       unsigned Reg = CSRegs[I];
       if (SavedRegs.test(Reg)) {
-        const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
+        const TargetRegisterClass *RC =
+          TRI->getMinimalPhysRegClass(Reg, MVT::i32);
         int JunkFI = MFI.CreateStackObject(TRI->getSpillSize(*RC),
                                            TRI->getSpillAlignment(*RC),
                                            true);
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 58ea6bc6723a..a2f7fa04c9ec 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1281,6 +1281,7 @@ SIRegisterInfo::getPhysRegClass(MCRegister Reg) const {
   static const TargetRegisterClass *const BaseClasses[] = {
     &AMDGPU::VGPR_LO16RegClass,
     &AMDGPU::VGPR_HI16RegClass,
+    &AMDGPU::SGPR_LO16RegClass,
     &AMDGPU::VGPR_32RegClass,
     &AMDGPU::SReg_32RegClass,
     &AMDGPU::AGPR_32RegClass,
@@ -1375,6 +1376,8 @@ bool SIRegisterInfo::hasAGPRs(const TargetRegisterClass *RC) const {
 const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass(
                                          const TargetRegisterClass *SRC) const {
   switch (getRegSizeInBits(*SRC)) {
+  case 16:
+    return &AMDGPU::VGPR_LO16RegClass;
   case 32:
     return &AMDGPU::VGPR_32RegClass;
   case 64:
@@ -1419,6 +1422,8 @@ const TargetRegisterClass *SIRegisterInfo::getEquivalentAGPRClass(
 const TargetRegisterClass *SIRegisterInfo::getEquivalentSGPRClass(
                                          const TargetRegisterClass *VRC) const {
   switch (getRegSizeInBits(*VRC)) {
+  case 16:
+    return &AMDGPU::SGPR_LO16RegClass;
   case 32:
     return &AMDGPU::SGPR_32RegClass;
   case 64:
@@ -1795,6 +1800,7 @@ unsigned SIRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
   case AMDGPU::VGPR_HI16RegClassID:
     return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF));
   case AMDGPU::SGPR_32RegClassID:
+  case AMDGPU::SGPR_LO16RegClassID:
     return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF));
   }
 }
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
index 1431ed74fe65..777e6cc5d3f7 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.td
@@ -253,10 +253,23 @@ def FLAT_SCR : FlatReg<FLAT_SCR_LO, FLAT_SCR_HI, 0>;
 
 // SGPR registers
 foreach Index = 0-105 in {
-  def SGPR#Index :
-    SIReg <"s"#Index, Index>,
+  def SGPR#Index#_LO16 : SIReg <"s"#Index#".l", Index>,
     DwarfRegNum<[!if(!le(Index, 63), !add(Index, 32), !add(Index, 1024)),
                  !if(!le(Index, 63), !add(Index, 32), !add(Index, 1024))]>;
+
+  // This is a placeholder to fill high lane in mask.
+  def SGPR#Index#_HI16 : SIReg <"", Index> {
+    let isArtificial = 1;
+  }
+
+  def SGPR#Index :
+    SIRegWithSubRegs <"s"#Index, [!cast<Register>("SGPR"#Index#"_LO16"),
+                                  !cast<Register>("SGPR"#Index#"_HI16")],
+                      Index>,
+    DwarfRegNum<[!if(!le(Index, 63), !add(Index, 32), !add(Index, 1024)),
+                 !if(!le(Index, 63), !add(Index, 32), !add(Index, 1024))]> {
+    let SubRegIndices = [lo16, hi16];
+  }
 }
 
 // VGPR registers
@@ -317,6 +330,20 @@ def M0_CLASS : RegisterClass<"AMDGPU", [i32], 32, (add M0)> {
 
 // TODO: Do we need to set DwarfRegAlias on register tuples?
 
+def SGPR_LO16 : RegisterClass<"AMDGPU", [i16, f16], 16,
+                              (add (sequence "SGPR%u_LO16", 0, 105))> {
+  let AllocationPriority = 1;
+  let Size = 16;
+  let GeneratePressureSet = 0;
+}
+
+def SGPR_HI16 : RegisterClass<"AMDGPU", [i16, f16], 16,
+                              (add (sequence "SGPR%u_HI16", 0, 105))> {
+  let isAllocatable = 0;
+  let Size = 16;
+  let GeneratePressureSet = 0;
+}
+
 // SGPR 32-bit registers
 def SGPR_32 : RegisterClass<"AMDGPU", [i32, f32, i16, f16, v2i16, v2f16], 32,
                             (add (sequence "SGPR%u", 0, 105))> {
diff --git a/llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir b/llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir
index 2042d8233a5a..00742ebdfe4e 100644
--- a/llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir
+++ b/llvm/test/CodeGen/AMDGPU/postra-bundle-memops.mir
@@ -43,7 +43,7 @@ body:             |
     ; GCN:   DS_WRITE_B32_gfx9 $vgpr0, $vgpr3, 4, 0, implicit killed $m0, implicit $exec
     ; GCN: }
     ; GCN: S_NOP 0
-    ; GCN: BUNDLE implicit-def $sgpr2, implicit-def $sgpr3, implicit undef $sgpr0_sgpr1, implicit undef $sgpr10 {
+    ; GCN: BUNDLE implicit-def $sgpr2, implicit-def $sgpr2_lo16, implicit-def $sgpr2_hi16, implicit-def $sgpr3, implicit-def $sgpr3_lo16, implicit-def $sgpr3_hi16, implicit undef $sgpr0_sgpr1, implicit undef $sgpr10 {
     ; GCN:   $sgpr2 = S_LOAD_DWORD_IMM undef $sgpr0_sgpr1, 0, 0, 0
     ; GCN:   $sgpr3 = S_LOAD_DWORD_SGPR undef $sgpr0_sgpr1, undef $sgpr10, 0, 0
     ; GCN: }
diff --git a/llvm/test/CodeGen/AMDGPU/rename-independent-subregs.mir b/llvm/test/CodeGen/AMDGPU/rename-independent-subregs.mir
index 134b2a0fb589..12200d68e20c 100644
--- a/llvm/test/CodeGen/AMDGPU/rename-independent-subregs.mir
+++ b/llvm/test/CodeGen/AMDGPU/rename-independent-subregs.mir
@@ -73,7 +73,7 @@ body: |
 # (1) %0.sub0 + %0.sub0 and (2) %0.sub1 + %0.sub1
 # Check that renaming (2) does not inadvertently rename (1).
 # CHECK-LABEL: name: test2
-# CHECK: INLINEASM &"", 32 /* isconvergent attdialect */, 327690 /* regdef:SReg_1_XEXEC_with_sub0 */, def undef %0.sub0, 327690 /* regdef:SReg_1_XEXEC_with_sub0 */, def dead %1.sub1, 2147483657 /* reguse tiedto:$0 */, undef %0.sub0(tied-def 3), 2147549193 /* reguse tiedto:$1 */, %1.sub1(tied-def 5)
+# CHECK: INLINEASM &"", 32 /* isconvergent attdialect */, 327690 /* regdef:SReg_1_with_sub0 */, def undef %0.sub0, 327690 /* regdef:SReg_1_with_sub0 */, def dead %1.sub1, 2147483657 /* reguse tiedto:$0 */, undef %0.sub0(tied-def 3), 2147549193 /* reguse tiedto:$1 */, %1.sub1(tied-def 5)
 name: test2
 body: |
   bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir b/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir
index 53c4544c0bf9..443999bdea5f 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir
@@ -33,7 +33,7 @@ body:             |
   ; CHECK:   dead %9:vreg_128 = DS_READ_B128_gfx9 [[V_ADD_U32_e32_]], 0, 0, implicit $exec
   ; CHECK:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
   ; CHECK:   undef %11.sub1:vreg_512 = COPY [[COPY]].sub1
-  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VRegOrLds_32 */, def dead [[COPY1]], 851978 /* regdef:VRegOrLds_32 */, def dead [[COPY]].sub1, 2147483657 /* reguse tiedto:$0 */, [[COPY1]], 2147549193 /* reguse tiedto:$1 */, [[COPY]].sub1
+  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def dead [[COPY1]], 851978 /* regdef:SGPR_LO16 */, def dead [[COPY]].sub1, 2147483657 /* reguse tiedto:$0 */, [[COPY1]], 2147549193 /* reguse tiedto:$1 */, [[COPY]].sub1
   ; CHECK:   %11.sub0:vreg_512 = COPY [[COPY]].sub0
   ; CHECK:   %11.sub3:vreg_512 = COPY [[COPY]].sub3
   ; CHECK:   dead %10:vgpr_32 = V_ADD_I32_e32 4, [[V_MOV_B32_e32_1]], implicit-def dead $vcc, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir b/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir
index f43289ffee2a..3398e0f10b36 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir
@@ -36,18 +36,18 @@ body:             |
   ; CHECK:   [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
   ; CHECK: bb.1:
   ; CHECK:   successors: %bb.1(0x80000000)
-  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VRegOrLds_32 */, def dead %11
+  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def dead %11
   ; CHECK:   GLOBAL_STORE_DWORD undef %12:vreg_64, [[BUFFER_LOAD_DWORD_OFFEN]], 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
   ; CHECK:   [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
   ; CHECK:   [[V_MOV_B32_e32_3:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
   ; CHECK:   [[DS_READ_B64_gfx9_:%[0-9]+]]:vreg_64 = DS_READ_B64_gfx9 undef %14:vgpr_32, 0, 0, implicit $exec :: (load 8, addrspace 3)
-  ; CHECK:   INLINEASM &"def $0 $1", 1 /* sideeffect attdialect */, 851978 /* regdef:VRegOrLds_32 */, def %15, 851978 /* regdef:VRegOrLds_32 */, def %16
+  ; CHECK:   INLINEASM &"def $0 $1", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def %15, 851978 /* regdef:SGPR_LO16 */, def %16
   ; CHECK:   [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec
   ; CHECK:   [[DS_READ_B32_gfx9_1:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_1]], 0, 0, implicit $exec
   ; CHECK:   [[DS_READ_B32_gfx9_2:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 undef %20:vgpr_32, 0, 0, implicit $exec
-  ; CHECK:   INLINEASM &"def $0 $1", 1 /* sideeffect attdialect */, 851978 /* regdef:VRegOrLds_32 */, def %21, 851978 /* regdef:VRegOrLds_32 */, def %22
+  ; CHECK:   INLINEASM &"def $0 $1", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def %21, 851978 /* regdef:SGPR_LO16 */, def %22
   ; CHECK:   [[DS_READ_B32_gfx9_3:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_1]], 0, 0, implicit $exec
-  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VRegOrLds_32 */, def dead [[V_MOV_B32_e32_2]], 851978 /* regdef:VRegOrLds_32 */, def dead [[V_MOV_B32_e32_3]], 851977 /* reguse:VRegOrLds_32 */, [[DS_READ_B64_gfx9_]].sub0, 2147483657 /* reguse tiedto:$0 */, [[V_MOV_B32_e32_2]](tied-def 3), 2147549193 /* reguse tiedto:$1 */, [[V_MOV_B32_e32_3]](tied-def 5), 851977 /* reguse:VRegOrLds_32 */, %15, 851977 /* reguse:VRegOrLds_32 */, %16, 851977 /* reguse:VRegOrLds_32 */, [[DS_READ_B32_gfx9_1]], 851977 /* reguse:VRegOrLds_32 */, [[DS_READ_B32_gfx9_]], 851977 /* reguse:VRegOrLds_32 */, [[DS_READ_B32_gfx9_3]], 851977 /* reguse:VRegOrLds_32 */, [[DS_READ_B32_gfx9_2]]
+  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def dead [[V_MOV_B32_e32_2]], 851978 /* regdef:SGPR_LO16 */, def dead [[V_MOV_B32_e32_3]], 851977 /* reguse:SGPR_LO16 */, [[DS_READ_B64_gfx9_]].sub0, 2147483657 /* reguse tiedto:$0 */, [[V_MOV_B32_e32_2]](tied-def 3), 2147549193 /* reguse tiedto:$1 */, [[V_MOV_B32_e32_3]](tied-def 5), 851977 /* reguse:SGPR_LO16 */, %15, 851977 /* reguse:SGPR_LO16 */, %16, 851977 /* reguse:SGPR_LO16 */, [[DS_READ_B32_gfx9_1]], 851977 /* reguse:SGPR_LO16 */, [[DS_READ_B32_gfx9_]], 851977 /* reguse:SGPR_LO16 */, [[DS_READ_B32_gfx9_3]], 851977 /* reguse:SGPR_LO16 */, [[DS_READ_B32_gfx9_2]]
   ; CHECK:   %5.sub1:vreg_64 = COPY [[V_MOV_B32_e32_]]
   ; CHECK:   DS_WRITE_B32_gfx9 undef %28:vgpr_32, %21, 0, 0, implicit $exec :: (store 4, addrspace 3)
   ; CHECK:   DS_WRITE_B32_gfx9 undef %29:vgpr_32, %22, 0, 0, implicit $exec :: (store 4, addrspace 3)
diff --git a/llvm/test/CodeGen/AMDGPU/subreg-undef-def-with-other-subreg-defs.mir b/llvm/test/CodeGen/AMDGPU/subreg-undef-def-with-other-subreg-defs.mir
index 522f9a0385c6..488bbfeefa10 100644
--- a/llvm/test/CodeGen/AMDGPU/subreg-undef-def-with-other-subreg-defs.mir
+++ b/llvm/test/CodeGen/AMDGPU/subreg-undef-def-with-other-subreg-defs.mir
@@ -25,9 +25,9 @@ body:             |
   ; CHECK: bb.1:
   ; CHECK:   successors: %bb.1(0x80000000)
   ; CHECK:   [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load 4, addrspace 3)
-  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VRegOrLds_32 */, def %0, 2147549193 /* reguse tiedto:$1 */, %0(tied-def 3)
-  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851977 /* reguse:VRegOrLds_32 */, [[DS_READ_B32_gfx9_]]
-  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VRegOrLds_32 */, def undef %0.sub0, 851978 /* regdef:VRegOrLds_32 */, def undef %0.sub1
+  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def %0, 2147549193 /* reguse tiedto:$1 */, %0(tied-def 3)
+  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851977 /* reguse:SGPR_LO16 */, [[DS_READ_B32_gfx9_]]
+  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def undef %0.sub0, 851978 /* regdef:SGPR_LO16 */, def undef %0.sub1
   ; CHECK:   S_NOP 0, implicit %0.sub1
   ; CHECK:   $sgpr10 = S_MOV_B32 -1
   ; CHECK:   S_BRANCH %bb.1
@@ -63,9 +63,9 @@ body:             |
   ; CHECK: bb.1:
   ; CHECK:   successors: %bb.1(0x80000000)
   ; CHECK:   [[DS_READ_B32_gfx9_:%[0-9]+]]:vgpr_32 = DS_READ_B32_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec :: (load 4, addrspace 3)
-  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VRegOrLds_32 */, def %0, 2147549193 /* reguse tiedto:$1 */, %0(tied-def 3)
-  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851977 /* reguse:VRegOrLds_32 */, [[DS_READ_B32_gfx9_]]
-  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VRegOrLds_32 */, def undef %0.sub1, 851978 /* regdef:VRegOrLds_32 */, def undef %0.sub0
+  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def %0, 2147549193 /* reguse tiedto:$1 */, %0(tied-def 3)
+  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851977 /* reguse:SGPR_LO16 */, [[DS_READ_B32_gfx9_]]
+  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:SGPR_LO16 */, def undef %0.sub1, 851978 /* regdef:SGPR_LO16 */, def undef %0.sub0
   ; CHECK:   S_NOP 0, implicit %0.sub1
   ; CHECK:   $sgpr10 = S_MOV_B32 -1
   ; CHECK:   S_BRANCH %bb.1

From bf60953faf3a0b80876e7345462d959586250daf Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Fri, 13 Mar 2020 11:20:13 -0700
Subject: [PATCH 068/216] [MC][X86] Allow SHT_PROGBITS for .eh_frame on x86-64

GNU as emits SHT_PROGBITS .eh_frame by default for .cfi_* directives.
We follow x86-64 psABI and use SHT_X86_64_UNWIND for .eh_frame

Don't error for SHT_PROGBITS .eh_frame on x86-64.
This keeps compatibility with `.section .eh_frame,"a",@progbits` in existing assembly files.

See https://groups.google.com/d/msg/x86-64-abi/7sr4E6THl3g/zUU2UPHOAQAJ
for more discussions.

Reviewed By: joerg

Differential Revision: https://reviews.llvm.org/D76151
---
 llvm/lib/MC/MCParser/ELFAsmParser.cpp   | 6 +++++-
 llvm/test/MC/ELF/section-type-changed.s | 8 ++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
index 6d8a8a71468c..a80e8a5832ef 100644
--- a/llvm/lib/MC/MCParser/ELFAsmParser.cpp
+++ b/llvm/lib/MC/MCParser/ELFAsmParser.cpp
@@ -637,7 +637,11 @@ bool ELFAsmParser::ParseSectionArguments(bool IsPush, SMLoc loc) {
   MCSectionELF *Section = getContext().getELFSection(
       SectionName, Type, Flags, Size, GroupName, UniqueID, LinkedToSym);
   getStreamer().SwitchSection(Section, Subsection);
-  if (Section->getType() != Type)
+  // x86-64 psABI names SHT_X86_64_UNWIND as the canonical type for .eh_frame,
+  // but GNU as emits SHT_PROGBITS .eh_frame for .cfi_* directives. Don't error
+  // for SHT_PROGBITS .eh_frame
+  if (Section->getType() != Type &&
+      !(SectionName == ".eh_frame" && Type == ELF::SHT_PROGBITS))
     Error(loc, "changed section type for " + SectionName + ", expected: 0x" +
                    utohexstr(Section->getType()));
   if (Section->getFlags() != Flags)
diff --git a/llvm/test/MC/ELF/section-type-changed.s b/llvm/test/MC/ELF/section-type-changed.s
index cc871462e2db..f8fd092919ed 100644
--- a/llvm/test/MC/ELF/section-type-changed.s
+++ b/llvm/test/MC/ELF/section-type-changed.s
@@ -9,3 +9,11 @@
 .pushsection .foo,"a",@nobits
 
 .pushsection .foo,"a",@progbits
+
+## GNU as emits SHT_PROGBITS .eh_frame for .cfi_* directives. Don't error.
+.section .eh_frame,"a",@progbits
+.section .eh_frame,"a",@unwind
+.pushsection .eh_frame,"a",@progbits
+
+# CHECK: {{.*}}.s:[[# @LINE+1]]:1: error: changed section type for .eh_frame, expected: 0x70000001
+.section .eh_frame,"a",@nobits

From 7c6ca18fffdcf935b3b38ffc8697180d60511ffe Mon Sep 17 00:00:00 2001
From: Daniel Sanders <daniel_l_sanders@apple.com>
Date: Wed, 15 Apr 2020 17:24:17 -0700
Subject: [PATCH 069/216] [globalisel] Allow backends to report an issue
 without triggering fallback. NFC

Summary:
This will allow us to fix the issue where the lost locations
verifier causes CodeGen changes on lost locations because it
falls back on DAGISel

Reviewers: qcolombet, bogner, aprantl, vsk, paquette

Subscribers: rovka, hiraditya, volkan, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D78261
---
 llvm/include/llvm/CodeGen/GlobalISel/Utils.h |  6 ++++
 llvm/lib/CodeGen/GlobalISel/Utils.cpp        | 29 +++++++++++++++-----
 2 files changed, 28 insertions(+), 7 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index 956f3b444f60..885a4b792e65 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -115,6 +115,12 @@ void reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC,
                         const char *PassName, StringRef Msg,
                         const MachineInstr &MI);
 
+/// Report an ISel warning as a missed optimization remark to the LLVMContext's
+/// diagnostic stream.
+void reportGISelWarning(MachineFunction &MF, const TargetPassConfig &TPC,
+                        MachineOptimizationRemarkEmitter &MORE,
+                        MachineOptimizationRemarkMissed &R);
+
 /// If \p VReg is defined by a G_CONSTANT fits in int64_t
 /// returns it.
 Optional<int64_t> getConstantVRegVal(Register VReg,
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index d248a2f1d9eb..475d5e583040 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -198,22 +198,37 @@ bool llvm::isTriviallyDead(const MachineInstr &MI,
   return true;
 }
 
-void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC,
-                              MachineOptimizationRemarkEmitter &MORE,
-                              MachineOptimizationRemarkMissed &R) {
-  MF.getProperties().set(MachineFunctionProperties::Property::FailedISel);
-
+static void reportGISelDiagnostic(DiagnosticSeverity Severity,
+                                  MachineFunction &MF,
+                                  const TargetPassConfig &TPC,
+                                  MachineOptimizationRemarkEmitter &MORE,
+                                  MachineOptimizationRemarkMissed &R) {
+  bool IsFatal = Severity == DS_Error &&
+                 TPC.isGlobalISelAbortEnabled();
   // Print the function name explicitly if we don't have a debug location (which
   // makes the diagnostic less useful) or if we're going to emit a raw error.
-  if (!R.getLocation().isValid() || TPC.isGlobalISelAbortEnabled())
+  if (!R.getLocation().isValid() || IsFatal)
     R << (" (in function: " + MF.getName() + ")").str();
 
-  if (TPC.isGlobalISelAbortEnabled())
+  if (IsFatal)
     report_fatal_error(R.getMsg());
   else
     MORE.emit(R);
 }
 
+void llvm::reportGISelWarning(MachineFunction &MF, const TargetPassConfig &TPC,
+                              MachineOptimizationRemarkEmitter &MORE,
+                              MachineOptimizationRemarkMissed &R) {
+  reportGISelDiagnostic(DS_Warning, MF, TPC, MORE, R);
+}
+
+void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC,
+                              MachineOptimizationRemarkEmitter &MORE,
+                              MachineOptimizationRemarkMissed &R) {
+  MF.getProperties().set(MachineFunctionProperties::Property::FailedISel);
+  reportGISelDiagnostic(DS_Error, MF, TPC, MORE, R);
+}
+
 void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC,
                               MachineOptimizationRemarkEmitter &MORE,
                               const char *PassName, StringRef Msg,

From d9085f65db0b39fa53d31fc0533c77e91f2f4a1c Mon Sep 17 00:00:00 2001
From: Daniel Sanders <daniel_l_sanders@apple.com>
Date: Wed, 1 Apr 2020 11:19:58 -0700
Subject: [PATCH 070/216] [globalisel] Add lost debug locations verifier

Summary:
This verifier tries to ensure that DebugLoc's don't just disappear as
we transform the MIR. It observes the instructions created, erased, and
changed and at checkpoints chosen by the client algorithm verifies the
locations affected by those changes.

In particular, it verifies that:
* Every DebugLoc for an erased/changing instruction is still present on
  at least one new/changed instruction
* Failing that, that there is a line-0 location in the new/changed
  instructions. It's not possible to confirm which locations were merged so
  it conservatively assumes all unaccounted for locations are accounted
  for by any line-0 location to avoid false positives.
If that fails, it prints the lost locations in the debug output along with
the instructions that should have accounted for them.

In theory, this is usable by the legalizer, combiner, selector and any other
pass that performs incremental changes to the MIR. However, it has so far
only really been tested on the legalizer (not including the artifact
combiner) where it has caught lots of lost locations, particularly in Custom
legalizations. There's only one example here as my initial testing was on an
out-of-tree target and I haven't done a pass over the in-tree targets yet.

Depends on D77575, D77446

Reviewers: bogner, aprantl, vsk

Subscribers: jvesely, nhaehnle, mgorny, rovka, hiraditya, volkan, kerbowa, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D77576
---
 .../llvm/CodeGen/GlobalISel/Legalizer.h       |   2 +
 .../CodeGen/GlobalISel/LostDebugLocObserver.h |  50 ++++++++
 llvm/lib/CodeGen/GlobalISel/CMakeLists.txt    |   1 +
 llvm/lib/CodeGen/GlobalISel/Legalizer.cpp     |  59 ++++++++-
 .../GlobalISel/LostDebugLocObserver.cpp       | 113 ++++++++++++++++++
 .../CodeGen/GlobalISel/LegalizerTest.cpp      |  20 ++--
 6 files changed, 237 insertions(+), 8 deletions(-)
 create mode 100644 llvm/include/llvm/CodeGen/GlobalISel/LostDebugLocObserver.h
 create mode 100644 llvm/lib/CodeGen/GlobalISel/LostDebugLocObserver.cpp

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h b/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h
index 07173b9719bd..e59bf1b91262 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h
@@ -26,6 +26,7 @@
 namespace llvm {
 
 class MachineRegisterInfo;
+class LostDebugLocObserver;
 
 class Legalizer : public MachineFunctionPass {
 public:
@@ -71,6 +72,7 @@ class Legalizer : public MachineFunctionPass {
   static MFResult
   legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
                           ArrayRef<GISelChangeObserver *> AuxObservers,
+                          LostDebugLocObserver &LocObserver,
                           MachineIRBuilder &MIRBuilder);
 };
 } // End namespace llvm.
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LostDebugLocObserver.h b/llvm/include/llvm/CodeGen/GlobalISel/LostDebugLocObserver.h
new file mode 100644
index 000000000000..cd2a871e9579
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LostDebugLocObserver.h
@@ -0,0 +1,50 @@
+//===----- llvm/CodeGen/GlobalISel/LostDebugLocObserver.h -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// Tracks DebugLocs between checkpoints and verifies that they are transferred.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CODEGEN_GLOBALISEL_LOSTDEBUGLOCOBSERVER_H
+#define LLVM_CODEGEN_GLOBALISEL_LOSTDEBUGLOCOBSERVER_H
+
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
+
+namespace llvm {
+class LostDebugLocObserver : public GISelChangeObserver {
+  StringRef DebugType;
+  SmallSet<DebugLoc, 4> LostDebugLocs;
+  SmallPtrSet<MachineInstr *, 4> PotentialMIsForDebugLocs;
+  unsigned NumLostDebugLocs = 0;
+
+public:
+  LostDebugLocObserver(StringRef DebugType) : DebugType(DebugType) {}
+
+  unsigned getNumLostDebugLocs() const { return NumLostDebugLocs; }
+
+  /// Call this to indicate that it's a good point to assess whether locations
+  /// have been lost. Typically this will be when a logical change has been
+  /// completed such as the caller has finished replacing some instructions with
+  /// alternatives. When CheckDebugLocs is true, the locations will be checked
+  /// to see if any have been lost since the last checkpoint. When
+  /// CheckDebugLocs is false, it will just reset ready for the next checkpoint
+  /// without checking anything. This can be helpful to limit the detection to
+  /// easy-to-fix portions of an algorithm before allowing more difficult ones.
+  void checkpoint(bool CheckDebugLocs = true);
+
+  void createdInstr(MachineInstr &MI) override;
+  void erasingInstr(MachineInstr &MI) override;
+  void changingInstr(MachineInstr &MI) override;
+  void changedInstr(MachineInstr &MI) override;
+
+private:
+  void analyzeDebugLocations();
+};
+
+} // namespace llvm
+#endif // ifndef LLVM_CODEGEN_GLOBALISEL_LOSTDEBUGLOCOBSERVER_H
diff --git a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
index 5774991a43b9..ed6a45afb7aa 100644
--- a/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
+++ b/llvm/lib/CodeGen/GlobalISel/CMakeLists.txt
@@ -16,6 +16,7 @@ add_llvm_component_library(LLVMGlobalISel
         LegalizerHelper.cpp
         LegalizerInfo.cpp
         Localizer.cpp
+        LostDebugLocObserver.cpp
         MachineIRBuilder.cpp
         RegBankSelect.cpp
         RegisterBank.cpp
diff --git a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
index 0c0edc8a7b0c..4b6d24ba73e8 100644
--- a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -21,6 +21,7 @@
 #include "llvm/CodeGen/GlobalISel/GISelWorkList.h"
 #include "llvm/CodeGen/GlobalISel/LegalizationArtifactCombiner.h"
 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
+#include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h"
 #include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
 #include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -42,6 +43,27 @@ static cl::opt<bool>
                          cl::desc("Should enable CSE in Legalizer"),
                          cl::Optional, cl::init(false));
 
+enum class DebugLocVerifyLevel {
+  None,
+  Legalizations,
+  LegalizationsAndArtifactCombiners,
+};
+#ifndef NDEBUG
+static cl::opt<DebugLocVerifyLevel> VerifyDebugLocs(
+    "verify-legalizer-debug-locs",
+    cl::desc("Verify that debug locations are handled"),
+    cl::values(clEnumVal(DebugLocVerifyLevel::None, "No verification"),
+               clEnumVal(DebugLocVerifyLevel::Legalizations,
+                         "Verify legalizations"),
+               clEnumVal(DebugLocVerifyLevel::LegalizationsAndArtifactCombiners,
+                         "Verify legalizations and artifact combines")),
+    cl::init(DebugLocVerifyLevel::Legalizations));
+#else
+// Always disable it for release builds by preventing the observer from being
+// installed.
+static const DebugLocVerifyLevel VerifyDebugLocs = DebugLocVerifyLevel::None;
+#endif
+
 char Legalizer::ID = 0;
 INITIALIZE_PASS_BEGIN(Legalizer, DEBUG_TYPE,
                       "Legalize the Machine IR a function's Machine IR", false,
@@ -144,6 +166,7 @@ class LegalizerWorkListManager : public GISelChangeObserver {
 Legalizer::MFResult
 Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
                                    ArrayRef<GISelChangeObserver *> AuxObservers,
+                                   LostDebugLocObserver &LocObserver,
                                    MachineIRBuilder &MIRBuilder) {
   MachineRegisterInfo &MRI = MF.getRegInfo();
 
@@ -200,6 +223,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
       if (isTriviallyDead(MI, MRI)) {
         LLVM_DEBUG(dbgs() << MI << "Is dead; erasing.\n");
         MI.eraseFromParentAndMarkDBGValuesForRemoval();
+        LocObserver.checkpoint();
         continue;
       }
 
@@ -225,6 +249,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
         return {Changed, &MI};
       }
       WorkListObserver.printNewInstrs();
+      LocObserver.checkpoint();
       Changed |= Res == LegalizerHelper::Legalized;
     }
     // Try to combine the instructions in RetryList again if there
@@ -239,6 +264,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
         return {Changed, RetryList.front()};
       }
     }
+    LocObserver.checkpoint();
     while (!ArtifactList.empty()) {
       MachineInstr &MI = *ArtifactList.pop_back_val();
       assert(isPreISelGenericOpcode(MI.getOpcode()) &&
@@ -247,6 +273,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
         LLVM_DEBUG(dbgs() << MI << "Is dead\n");
         RemoveDeadInstFromLists(&MI);
         MI.eraseFromParentAndMarkDBGValuesForRemoval();
+        LocObserver.checkpoint();
         continue;
       }
       SmallVector<MachineInstr *, 4> DeadInstructions;
@@ -254,11 +281,15 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
       if (ArtCombiner.tryCombineInstruction(MI, DeadInstructions,
                                             WrapperObserver)) {
         WorkListObserver.printNewInstrs();
+        LocObserver.checkpoint(
+            VerifyDebugLocs ==
+            DebugLocVerifyLevel::LegalizationsAndArtifactCombiners);
         for (auto *DeadMI : DeadInstructions) {
           LLVM_DEBUG(dbgs() << *DeadMI << "Is dead\n");
           RemoveDeadInstFromLists(DeadMI);
           DeadMI->eraseFromParentAndMarkDBGValuesForRemoval();
         }
+        LocObserver.checkpoint();
         Changed = true;
         continue;
       }
@@ -307,9 +338,13 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
     AuxObservers.push_back(CSEInfo);
   }
   assert(!CSEInfo || !errorToBool(CSEInfo->verify()));
+  LostDebugLocObserver LocObserver(DEBUG_TYPE);
+  if (VerifyDebugLocs > DebugLocVerifyLevel::None)
+    AuxObservers.push_back(&LocObserver);
 
   const LegalizerInfo &LI = *MF.getSubtarget().getLegalizerInfo();
-  MFResult Result = legalizeMachineFunction(MF, LI, AuxObservers, *MIRBuilder);
+  MFResult Result =
+      legalizeMachineFunction(MF, LI, AuxObservers, LocObserver, *MIRBuilder);
 
   if (Result.FailedOn) {
     reportGISelFailure(MF, TPC, MORE, "gisel-legalize",
@@ -326,6 +361,28 @@ bool Legalizer::runOnMachineFunction(MachineFunction &MF) {
     reportGISelFailure(MF, TPC, MORE, R);
     return false;
   }
+
+  if (LocObserver.getNumLostDebugLocs()) {
+    MachineOptimizationRemarkMissed R("gisel-legalize", "LostDebugLoc",
+                                      MF.getFunction().getSubprogram(),
+                                      /*MBB=*/&*MF.begin());
+    R << "lost "
+      << ore::NV("NumLostDebugLocs", LocObserver.getNumLostDebugLocs())
+      << " debug locations during pass";
+    reportGISelWarning(MF, TPC, MORE, R);
+    // Example remark:
+    // --- !Missed
+    // Pass:            gisel-legalize
+    // Name:            GISelFailure
+    // DebugLoc:        { File: '.../legalize-urem.mir', Line: 1, Column: 0 }
+    // Function:        test_urem_s32
+    // Args:
+    //   - String:          'lost '
+    //   - NumLostDebugLocs: '1'
+    //   - String:          ' debug locations during pass'
+    // ...
+  }
+
   // If for some reason CSE was not enabled, make sure that we invalidate the
   // CSEInfo object (as we currently declare that the analysis is preserved).
   // The next time get on the wrapper is called, it will force it to recompute
diff --git a/llvm/lib/CodeGen/GlobalISel/LostDebugLocObserver.cpp b/llvm/lib/CodeGen/GlobalISel/LostDebugLocObserver.cpp
new file mode 100644
index 000000000000..29f56b167c66
--- /dev/null
+++ b/llvm/lib/CodeGen/GlobalISel/LostDebugLocObserver.cpp
@@ -0,0 +1,113 @@
+//===----- llvm/CodeGen/GlobalISel/LostDebugLocObserver.cpp -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// Tracks DebugLocs between checkpoints and verifies that they are transferred.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h"
+
+using namespace llvm;
+
+#define LOC_DEBUG(X) DEBUG_WITH_TYPE(DebugType.str().c_str(), X)
+
+void LostDebugLocObserver::analyzeDebugLocations() {
+  if (LostDebugLocs.empty()) {
+    LOC_DEBUG(dbgs() << ".. No debug info was present\n");
+    return;
+  }
+  if (PotentialMIsForDebugLocs.empty()) {
+    LOC_DEBUG(
+        dbgs() << ".. No instructions to carry debug info (dead code?)\n");
+    return;
+  }
+
+  LOC_DEBUG(dbgs() << ".. Searching " << PotentialMIsForDebugLocs.size()
+                   << " instrs for " << LostDebugLocs.size() << " locations\n");
+  SmallPtrSet<MachineInstr *, 4> FoundIn;
+  for (MachineInstr *MI : PotentialMIsForDebugLocs) {
+    if (!MI->getDebugLoc())
+      continue;
+    // Check this first in case there's a matching line-0 location on both input
+    // and output.
+    if (MI->getDebugLoc().getLine() == 0) {
+      LOC_DEBUG(
+          dbgs() << ".. Assuming line-0 location covers remainder (if any)\n");
+      return;
+    }
+    if (LostDebugLocs.erase(MI->getDebugLoc())) {
+      LOC_DEBUG(dbgs() << ".. .. found " << MI->getDebugLoc() << " in " << *MI);
+      FoundIn.insert(MI);
+      continue;
+    }
+  }
+  if (LostDebugLocs.empty())
+    return;
+
+  NumLostDebugLocs += LostDebugLocs.size();
+  LOC_DEBUG({
+    dbgs() << ".. Lost locations:\n";
+    for (const DebugLoc &Loc : LostDebugLocs) {
+      dbgs() << ".. .. ";
+      Loc.print(dbgs());
+      dbgs() << "\n";
+    }
+    dbgs() << ".. MIs with matched locations:\n";
+    for (MachineInstr *MI : FoundIn)
+      if (PotentialMIsForDebugLocs.erase(MI))
+        dbgs() << ".. .. " << *MI;
+    dbgs() << ".. Remaining MIs with unmatched/no locations:\n";
+    for (const MachineInstr *MI : PotentialMIsForDebugLocs)
+      dbgs() << ".. .. " << *MI;
+  });
+}
+
+void LostDebugLocObserver::checkpoint(bool CheckDebugLocs) {
+  if (CheckDebugLocs)
+    analyzeDebugLocations();
+  PotentialMIsForDebugLocs.clear();
+  LostDebugLocs.clear();
+}
+
+void LostDebugLocObserver::createdInstr(MachineInstr &MI) {
+  PotentialMIsForDebugLocs.insert(&MI);
+}
+
+bool irTranslatorNeverAddsLocations(unsigned Opcode) {
+  switch (Opcode) {
+  default:
+    return false;
+  case TargetOpcode::G_CONSTANT:
+  case TargetOpcode::G_FCONSTANT:
+  case TargetOpcode::G_IMPLICIT_DEF:
+  case TargetOpcode::G_GLOBAL_VALUE:
+    return true;
+  }
+}
+
+void LostDebugLocObserver::erasingInstr(MachineInstr &MI) {
+  if (irTranslatorNeverAddsLocations(MI.getOpcode()))
+    return;
+
+  PotentialMIsForDebugLocs.erase(&MI);
+  if (MI.getDebugLoc())
+    LostDebugLocs.insert(MI.getDebugLoc());
+}
+
+void LostDebugLocObserver::changingInstr(MachineInstr &MI) {
+  if (irTranslatorNeverAddsLocations(MI.getOpcode()))
+    return;
+
+  PotentialMIsForDebugLocs.erase(&MI);
+  if (MI.getDebugLoc())
+    LostDebugLocs.insert(MI.getDebugLoc());
+}
+
+void LostDebugLocObserver::changedInstr(MachineInstr &MI) {
+  PotentialMIsForDebugLocs.insert(&MI);
+}
diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerTest.cpp
index a4fb4443724a..ce6f386a544d 100644
--- a/llvm/unittests/CodeGen/GlobalISel/LegalizerTest.cpp
+++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerTest.cpp
@@ -6,8 +6,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "GISelMITest.h"
 #include "llvm/CodeGen/GlobalISel/Legalizer.h"
+#include "GISelMITest.h"
+#include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h"
+
+#define DEBUG_TYPE "legalizer-test"
 
 using namespace LegalizeActions;
 using namespace LegalizeMutations;
@@ -60,9 +63,10 @@ TEST_F(AArch64GISelMITest, BasicLegalizerTest) {
     return;
 
   ALegalizerInfo LI(MF->getSubtarget());
+  LostDebugLocObserver LocObserver(DEBUG_TYPE);
 
-  Legalizer::MFResult Result =
-      Legalizer::legalizeMachineFunction(*MF, LI, {}, B);
+  Legalizer::MFResult Result = Legalizer::legalizeMachineFunction(
+      *MF, LI, {&LocObserver}, LocObserver, B);
 
   EXPECT_TRUE(isNullMIPtr(Result.FailedOn));
   EXPECT_TRUE(Result.Changed);
@@ -98,6 +102,7 @@ TEST_F(AArch64GISelMITest, UnorderedArtifactCombiningTest) {
     return;
 
   ALegalizerInfo LI(MF->getSubtarget());
+  LostDebugLocObserver LocObserver(DEBUG_TYPE);
 
   // The events here unfold as follows:
   // 1. First, the function is scanned pre-forming the worklist of artifacts:
@@ -153,8 +158,8 @@ TEST_F(AArch64GISelMITest, UnorderedArtifactCombiningTest) {
   //  pair(s) of artifacts that could be immediately combined out. After that
   //  the process follows def-use chains, making them shorter at each step, thus
   //  combining everything that can be combined in O(n) time.
-  Legalizer::MFResult Result =
-      Legalizer::legalizeMachineFunction(*MF, LI, {}, B);
+  Legalizer::MFResult Result = Legalizer::legalizeMachineFunction(
+      *MF, LI, {&LocObserver}, LocObserver, B);
 
   EXPECT_TRUE(isNullMIPtr(Result.FailedOn));
   EXPECT_TRUE(Result.Changed);
@@ -191,9 +196,10 @@ TEST_F(AArch64GISelMITest, UnorderedArtifactCombiningManyCopiesTest) {
     return;
 
   ALegalizerInfo LI(MF->getSubtarget());
+  LostDebugLocObserver LocObserver(DEBUG_TYPE);
 
-  Legalizer::MFResult Result =
-      Legalizer::legalizeMachineFunction(*MF, LI, {}, B);
+  Legalizer::MFResult Result = Legalizer::legalizeMachineFunction(
+      *MF, LI, {&LocObserver}, LocObserver, B);
 
   EXPECT_TRUE(isNullMIPtr(Result.FailedOn));
   EXPECT_TRUE(Result.Changed);

From 18b6050324129f99d5a7d5b4b12d5244d1b2d0af Mon Sep 17 00:00:00 2001
From: Stefan Pintilie <stefanp@ca.ibm.com>
Date: Thu, 16 Apr 2020 12:45:22 -0500
Subject: [PATCH 071/216] [PowerPC][Future] Initial support for PC Relative
 addressing for global values

This patch adds PC Relative support for global values that are known at link
time. If a global value requires access through the global offset table (GOT)
it is not covered in this patch.

Differential Revision: https://reviews.llvm.org/D75280
---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |  26 +-
 llvm/lib/Target/PowerPC/PPCInstrPrefix.td     | 123 ++++
 .../global-address-non-got-indirect-access.ll | 541 ++++++++++++++++++
 .../PowerPC/pcrel-call-linkage-leaf.ll        |  13 +-
 .../PowerPC/pcrel-call-linkage-with-calls.ll  |  52 +-
 5 files changed, 699 insertions(+), 56 deletions(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/global-address-non-got-indirect-access.ll

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 85e34d7cfe03..6cf4b85a4f74 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -2588,17 +2588,19 @@ bool PPCTargetLowering::SelectAddressRegRegOnly(SDValue N, SDValue &Base,
 }
 
 /// Returns true if this address is a PC Relative address.
-/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG.
+/// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
+/// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
 bool PPCTargetLowering::SelectAddressPCRel(SDValue N, SDValue &Base) const {
-  ConstantPoolSDNode *ConstPoolNode =
-      dyn_cast<ConstantPoolSDNode>(N.getNode());
-  bool HasFlag = ConstPoolNode &&
-                 ConstPoolNode->getTargetFlags() == PPCII::MO_PCREL_FLAG;
-  bool HasNode = N.getOpcode() == PPCISD::MAT_PCREL_ADDR;
-  if (HasFlag || HasNode) {
-    Base = N;
+  // This is a materialize PC Relative node. Always select this as PC Relative.
+  Base = N;
+  if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
     return true;
-  }
+  if (ConstantPoolSDNode *CPN = dyn_cast<ConstantPoolSDNode>(N))
+    if (CPN->getTargetFlags() & PPCII::MO_PCREL_FLAG)
+      return true;
+  if (GlobalAddressSDNode *GAN = dyn_cast<GlobalAddressSDNode>(N))
+    if (GAN->getTargetFlags() & PPCII::MO_PCREL_FLAG)
+      return true;
   return false;
 }
 
@@ -3049,6 +3051,12 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
   // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
   // The actual address of the GlobalValue is stored in the TOC.
   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
+    if (!isAccessedAsGotIndirect(Op) && Subtarget.isUsingPCRelativeCalls()) {
+      EVT Ty = getPointerTy(DAG.getDataLayout());
+      SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
+                                              PPCII::MO_PCREL_FLAG);
+      return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
+    }
     setUsesTOCBasePtr(DAG);
     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
     return getTOCEntry(DAG, DL, GA);
diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
index edb1c73c618f..a4a9688e3de7 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -348,30 +348,153 @@ let Predicates = [PrefixInstrs] in {
 // follow-up patches will address this refactoring and the AddedComplexity will
 // be removed.
 let Predicates = [PCRelativeMemops], AddedComplexity = 500 in {
+  // Load i32
+  def : Pat<(i32 (zextloadi8  (PPCmatpcreladdr pcreladdr:$ga))),
+            (PLBZpc $ga, 0)>;
+  def : Pat<(i32 (extloadi8   (PPCmatpcreladdr pcreladdr:$ga))),
+            (PLBZpc $ga, 0)>;
+  def : Pat<(i32 (sextloadi16 (PPCmatpcreladdr pcreladdr:$ga))),
+            (PLHApc $ga, 0)>;
+  def : Pat<(i32 (zextloadi16 (PPCmatpcreladdr pcreladdr:$ga))),
+            (PLHZpc $ga, 0)>;
+  def : Pat<(i32 (extloadi16 (PPCmatpcreladdr pcreladdr:$ga))),
+            (PLHZpc $ga, 0)>;
+  def : Pat<(i32 (load (PPCmatpcreladdr pcreladdr:$ga))), (PLWZpc $ga, 0)>;
+
+  // Store i32
+  def : Pat<(truncstorei8 i32:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
+            (PSTBpc $RS, $ga, 0)>;
+  def : Pat<(truncstorei16 i32:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
+            (PSTHpc $RS, $ga, 0)>;
+  def : Pat<(store i32:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
+            (PSTWpc $RS, $ga, 0)>;
+
+  // Load i64
+  def : Pat<(i64 (zextloadi8  (PPCmatpcreladdr pcreladdr:$ga))),
+            (PLBZ8pc $ga, 0)>;
+  def : Pat<(i64 (extloadi8   (PPCmatpcreladdr pcreladdr:$ga))),
+            (PLBZ8pc $ga, 0)>;
+  def : Pat<(i64 (sextloadi16 (PPCmatpcreladdr pcreladdr:$ga))),
+            (PLHA8pc $ga, 0)>;
+  def : Pat<(i64 (zextloadi16 (PPCmatpcreladdr pcreladdr:$ga))),
+            (PLHZ8pc $ga, 0)>;
+  def : Pat<(i64 (extloadi16 (PPCmatpcreladdr pcreladdr:$ga))),
+            (PLHZ8pc $ga, 0)>;
+  def : Pat<(i64 (zextloadi32 (PPCmatpcreladdr pcreladdr:$ga))),
+            (PLWZ8pc $ga, 0)>;
+  def : Pat<(i64 (sextloadi32 (PPCmatpcreladdr pcreladdr:$ga))),
+            (PLWA8pc $ga, 0)>;
+  def : Pat<(i64 (extloadi32 (PPCmatpcreladdr pcreladdr:$ga))),
+            (PLWZ8pc $ga, 0)>;
+  def : Pat<(i64 (load (PPCmatpcreladdr pcreladdr:$ga))), (PLDpc $ga, 0)>;
+
+  // Store i64
+  def : Pat<(truncstorei8 i64:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
+            (PSTB8pc $RS, $ga, 0)>;
+  def : Pat<(truncstorei16 i64:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
+            (PSTH8pc $RS, $ga, 0)>;
+  def : Pat<(truncstorei32 i64:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
+            (PSTW8pc $RS, $ga, 0)>;
+  def : Pat<(store i64:$RS, (PPCmatpcreladdr pcreladdr:$ga)),
+            (PSTDpc $RS, $ga, 0)>;
+
   // Load f32
   def : Pat<(f32 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLFSpc $addr, 0)>;
 
+  // Store f32
+  def : Pat<(store f32:$FRS, (PPCmatpcreladdr pcreladdr:$ga)),
+            (PSTFSpc $FRS, $ga, 0)>;
+
   // Load f64
   def : Pat<(f64 (extloadf32 (PPCmatpcreladdr pcreladdr:$addr))),
             (COPY_TO_REGCLASS (PLFSpc $addr, 0), VSFRC)>;
   def : Pat<(f64 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLFDpc $addr, 0)>;
 
+  // Store f64
+  def : Pat<(store f64:$FRS, (PPCmatpcreladdr pcreladdr:$ga)),
+            (PSTFDpc $FRS, $ga, 0)>;
+
   // Load f128
   def : Pat<(f128 (load (PPCmatpcreladdr pcreladdr:$addr))),
             (COPY_TO_REGCLASS (PLXVpc $addr, 0), VRRC)>;
 
+  // Store f128
+  def : Pat<(store f128:$XS, (PPCmatpcreladdr pcreladdr:$ga)),
+            (PSTXVpc (COPY_TO_REGCLASS $XS, VSRC), $ga, 0)>;
+
   // Load v4i32
   def : Pat<(v4i32 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLXVpc $addr, 0)>;
 
+  // Store v4i32
+  def : Pat<(store v4i32:$XS, (PPCmatpcreladdr pcreladdr:$ga)),
+            (PSTXVpc $XS, $ga, 0)>;
+
   // Load v2i64
   def : Pat<(v2i64 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLXVpc $addr, 0)>;
 
+  // Store v2i64
+  def : Pat<(store v2i64:$XS, (PPCmatpcreladdr pcreladdr:$ga)),
+            (PSTXVpc $XS, $ga, 0)>;
+
   // Load v4f32
   def : Pat<(v4f32 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLXVpc $addr, 0)>;
 
+  // Store v4f32
+  def : Pat<(store v4f32:$XS, (PPCmatpcreladdr pcreladdr:$ga)),
+            (PSTXVpc $XS, $ga, 0)>;
+
   // Load v2f64
   def : Pat<(v2f64 (load (PPCmatpcreladdr pcreladdr:$addr))), (PLXVpc $addr, 0)>;
 
+  // Store v2f64
+  def : Pat<(store v2f64:$XS, (PPCmatpcreladdr pcreladdr:$ga)),
+            (PSTXVpc $XS, $ga, 0)>;
+
+  // Atomic Load
+  def : Pat<(atomic_load_8 (PPCmatpcreladdr pcreladdr:$ga)),
+            (PLBZpc $ga, 0)>;
+  def : Pat<(atomic_load_16 (PPCmatpcreladdr pcreladdr:$ga)),
+            (PLHZpc $ga, 0)>;
+  def : Pat<(atomic_load_32 (PPCmatpcreladdr pcreladdr:$ga)),
+            (PLWZpc $ga, 0)>;
+  def : Pat<(atomic_load_64 (PPCmatpcreladdr pcreladdr:$ga)),
+            (PLDpc $ga, 0)>;
+
+  // Atomic Store
+  def : Pat<(atomic_store_8 (PPCmatpcreladdr pcreladdr:$ga), i32:$RS),
+            (PSTBpc $RS, $ga, 0)>;
+  def : Pat<(atomic_store_16 (PPCmatpcreladdr pcreladdr:$ga), i32:$RS),
+            (PSTHpc $RS, $ga, 0)>;
+  def : Pat<(atomic_store_32 (PPCmatpcreladdr pcreladdr:$ga), i32:$RS),
+            (PSTWpc $RS, $ga, 0)>;
+  def : Pat<(atomic_store_8 (PPCmatpcreladdr pcreladdr:$ga), i64:$RS),
+            (PSTB8pc $RS, $ga, 0)>;
+  def : Pat<(atomic_store_16 (PPCmatpcreladdr pcreladdr:$ga), i64:$RS),
+            (PSTH8pc $RS, $ga, 0)>;
+  def : Pat<(atomic_store_32 (PPCmatpcreladdr pcreladdr:$ga), i64:$RS),
+            (PSTW8pc $RS, $ga, 0)>;
+  def : Pat<(atomic_store_64 (PPCmatpcreladdr pcreladdr:$ga), i64:$RS),
+            (PSTDpc $RS, $ga, 0)>;
+
+  // Special Cases For PPCstore_scal_int_from_vsr
+  def : Pat<(PPCstore_scal_int_from_vsr
+              (f64 (PPCcv_fp_to_sint_in_vsr f64:$src)),
+              (PPCmatpcreladdr pcreladdr:$dst), 8),
+            (PSTXSDpc (XSCVDPSXDS f64:$src), $dst, 0)>;
+  def : Pat<(PPCstore_scal_int_from_vsr
+              (f64 (PPCcv_fp_to_sint_in_vsr f128:$src)),
+              (PPCmatpcreladdr pcreladdr:$dst), 8),
+            (PSTXSDpc (COPY_TO_REGCLASS (XSCVQPSDZ f128:$src), VFRC), $dst, 0)>;
+
+  def : Pat<(PPCstore_scal_int_from_vsr
+              (f64 (PPCcv_fp_to_uint_in_vsr f64:$src)),
+              (PPCmatpcreladdr pcreladdr:$dst), 8),
+            (PSTXSDpc (XSCVDPUXDS f64:$src), $dst, 0)>;
+  def : Pat<(PPCstore_scal_int_from_vsr
+              (f64 (PPCcv_fp_to_uint_in_vsr f128:$src)),
+              (PPCmatpcreladdr pcreladdr:$dst), 8),
+            (PSTXSDpc (COPY_TO_REGCLASS (XSCVQPUDZ f128:$src), VFRC), $dst, 0)>;
+
   // If the PPCmatpcreladdr node is not caught by any other pattern it should be
   // caught here and turned into a paddi instruction to materialize the address.
   def : Pat<(PPCmatpcreladdr pcreladdr:$addr), (PADDI8pc 0, $addr)>;
diff --git a/llvm/test/CodeGen/PowerPC/global-address-non-got-indirect-access.ll b/llvm/test/CodeGen/PowerPC/global-address-non-got-indirect-access.ll
new file mode 100644
index 000000000000..ff409a8cb49b
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/global-address-non-got-indirect-access.ll
@@ -0,0 +1,541 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=future -enable-ppc-quad-precision -ppc-asm-full-reg-names \
+; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s
+
+ @_ZL13StaticBoolVar = internal unnamed_addr global i8 0, align 1
+@_ZL19StaticSignedCharVar = internal unnamed_addr global i8 0, align 1
+@_ZL21StaticUnsignedCharVar = internal unnamed_addr global i8 0, align 1
+@_ZL20StaticSignedShortVar = internal unnamed_addr global i16 0, align 2
+@_ZL22StaticUnsignedShortVar = internal unnamed_addr global i16 0, align 2
+@_ZL18StaticSignedIntVar = internal unnamed_addr global i32 0, align 4
+@_ZL20StaticUnsignedIntVar = internal unnamed_addr global i32 0, align 4
+@_ZL19StaticSignedLongVar = internal unnamed_addr global i64 0, align 8
+@_ZL14StaticFloatVar = internal unnamed_addr global float 0.000000e+00, align 4
+@_ZL15StaticDoubleVar = internal unnamed_addr global double 0.000000e+00, align 8
+@_ZL19StaticLongDoubleVar = internal unnamed_addr global ppc_fp128 0xM00000000000000000000000000000000, align 16
+@_ZL23StaticSigned__Int128Var = internal unnamed_addr global i128 0, align 16
+@_ZL19Static__Float128Var = internal unnamed_addr global fp128 0xL00000000000000000000000000000000, align 16
+@_ZL25StaticVectorSignedCharVar = internal unnamed_addr global <16 x i8> zeroinitializer, align 16
+@_ZL26StaticVectorSignedShortVar = internal unnamed_addr global <8 x i16> zeroinitializer, align 16
+@_ZL24StaticVectorSignedIntVar = internal unnamed_addr global <4 x i32> zeroinitializer, align 16
+@_ZL29StaticVectorSignedLongLongVar = internal unnamed_addr global <2 x i64> zeroinitializer, align 16
+@_ZL29StaticVectorSigned__Int128Var = internal unnamed_addr global <1 x i128> zeroinitializer, align 16
+@_ZL20StaticVectorFloatVar = internal unnamed_addr global <4 x float> zeroinitializer, align 16
+@_ZL21StaticVectorDoubleVar = internal unnamed_addr global <2 x double> zeroinitializer, align 16
+
+ define zeroext i1 @_Z17ReadStaticBoolVarv() {
+; CHECK-LABEL: _Z17ReadStaticBoolVarv:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    plbz r3, _ZL13StaticBoolVar@PCREL(0), 1
+; CHECK-NEXT:    blr
+entry:
+  %0 = load i8, i8* @_ZL13StaticBoolVar, align 1, !range !0
+  %tobool = icmp ne i8 %0, 0
+  ret i1 %tobool
+}
+
+ define signext i8 @_Z23ReadStaticSignedCharVarv() {
+; CHECK-LABEL: _Z23ReadStaticSignedCharVarv:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    plbz r3, _ZL19StaticSignedCharVar@PCREL(0), 1
+; CHECK-NEXT:    extsb r3, r3
+; CHECK-NEXT:    blr
+entry:
+  %0 = load i8, i8* @_ZL19StaticSignedCharVar, align 1
+  ret i8 %0
+}
+
+ define zeroext i8 @_Z25ReadStaticUnsignedCharVarv() {
+; CHECK-LABEL: _Z25ReadStaticUnsignedCharVarv:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    plbz r3, _ZL21StaticUnsignedCharVar@PCREL(0), 1
+; CHECK-NEXT:    blr
+entry:
+  %0 = load i8, i8* @_ZL21StaticUnsignedCharVar, align 1
+  ret i8 %0
+}
+
+ define signext i16 @_Z24ReadStaticSignedShortVarv() {
+; CHECK-LABEL: _Z24ReadStaticSignedShortVarv:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    plha r3, _ZL20StaticSignedShortVar@PCREL(0), 1
+; CHECK-NEXT:    blr
+entry:
+  %0 = load i16, i16* @_ZL20StaticSignedShortVar, align 2
+  ret i16 %0
+}
+
+ define zeroext i16 @_Z26ReadStaticUnsignedShortVarv() {
+; CHECK-LABEL: _Z26ReadStaticUnsignedShortVarv:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    plhz r3, _ZL22StaticUnsignedShortVar@PCREL(0), 1
+; CHECK-NEXT:    blr
+entry:
+  %0 = load i16, i16* @_ZL22StaticUnsignedShortVar, align 2
+  ret i16 %0
+}
+
+ define signext i32 @_Z22ReadStaticSignedIntVarv() {
+; CHECK-LABEL: _Z22ReadStaticSignedIntVarv:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    plwa r3, _ZL18StaticSignedIntVar@PCREL(0), 1
+; CHECK-NEXT:    blr
+entry:
+  %0 = load i32, i32* @_ZL18StaticSignedIntVar, align 4
+  ret i32 %0
+}
+
+ define zeroext i32 @_Z24ReadStaticUnsignedIntVarv() {
+; CHECK-LABEL: _Z24ReadStaticUnsignedIntVarv:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    plwz r3, _ZL20StaticUnsignedIntVar@PCREL(0), 1
+; CHECK-NEXT:    blr
+entry:
+  %0 = load i32, i32* @_ZL20StaticUnsignedIntVar, align 4
+  ret i32 %0
+}
+
+ ; It is the same as unsigned long version
+define i64 @_Z23ReadStaticSignedLongVarv() {
+; CHECK-LABEL: _Z23ReadStaticSignedLongVarv:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pld r3, _ZL19StaticSignedLongVar@PCREL(0), 1
+; CHECK-NEXT:    blr
+entry:
+  %0 = load i64, i64* @_ZL19StaticSignedLongVar, align 8
+  ret i64 %0
+}
+
+ define float @_Z18ReadStaticFloatVarv() {
+; CHECK-LABEL: _Z18ReadStaticFloatVarv:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    plfs f1, _ZL14StaticFloatVar@PCREL(0), 1
+; CHECK-NEXT:    blr
+entry:
+  %0 = load float, float* @_ZL14StaticFloatVar, align 4
+  ret float %0
+}
+
+ define double @_Z19ReadStaticDoubleVarv() {
+; CHECK-LABEL: _Z19ReadStaticDoubleVarv:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    plfd f1, _ZL15StaticDoubleVar@PCREL(0), 1
+; CHECK-NEXT:    blr
+entry:
+  %0 = load double, double* @_ZL15StaticDoubleVar, align 8
+  ret double %0
+}
+
+ ; FIXME:
+define ppc_fp128 @_Z23ReadStaticLongDoubleVarv() {
+; CHECK-LABEL: _Z23ReadStaticLongDoubleVarv:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    paddi r3, 0, _ZL19StaticLongDoubleVar@PCREL, 1
+; CHECK-NEXT:    lfd f2, 8(r3)
+; CHECK-NEXT:    plfd f1, _ZL19StaticLongDoubleVar@PCREL(0), 1
+; CHECK-NEXT:    blr
+entry:
+  %0 = load ppc_fp128, ppc_fp128* @_ZL19StaticLongDoubleVar, align 16
+  ret ppc_fp128 %0
+}
+
+ ; FIXME:
+define i128 @_Z27ReadStaticSigned__Int128Varv() {
+; CHECK-LABEL: _Z27ReadStaticSigned__Int128Varv:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    paddi r3, 0, _ZL23StaticSigned__Int128Var@PCREL, 1
+; CHECK-NEXT:    ld r4, 8(r3)
+; CHECK-NEXT:    pld r3, _ZL23StaticSigned__Int128Var@PCREL(0), 1
+; CHECK-NEXT:    blr
+entry:
+  %0 = load i128, i128* @_ZL23StaticSigned__Int128Var, align 16
+  ret i128 %0
+}
+
+ define fp128 @_Z23ReadStatic__Float128Varv() {
+; CHECK-LABEL: _Z23ReadStatic__Float128Varv:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    plxv v2, _ZL19Static__Float128Var@PCREL(0), 1
+; CHECK-NEXT:    blr
+entry:
+  %0 = load fp128, fp128* @_ZL19Static__Float128Var, align 16
+  ret fp128 %0
+}
+
+ define <16 x i8> @_Z29ReadStaticVectorSignedCharVarv() {
+; CHECK-LABEL: _Z29ReadStaticVectorSignedCharVarv:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    plxv v2, _ZL25StaticVectorSignedCharVar@PCREL(0), 1
+; CHECK-NEXT:    blr
+entry:
+  %0 = load <16 x i8>, <16 x i8>* @_ZL25StaticVectorSignedCharVar, align 16
+  ret <16 x i8> %0
+}
+
+ define <8 x i16> @_Z30ReadStaticVectorSignedShortVarv() {
+; CHECK-LABEL: _Z30ReadStaticVectorSignedShortVarv:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    plxv v2, _ZL26StaticVectorSignedShortVar@PCREL(0), 1
+; CHECK-NEXT:    blr
+entry:
+  %0 = load <8 x i16>, <8 x i16>* @_ZL26StaticVectorSignedShortVar, align 16
+  ret <8 x i16> %0
+}
+
+ define <4 x i32> @_Z28ReadStaticVectorSignedIntVarv() {
+; CHECK-LABEL: _Z28ReadStaticVectorSignedIntVarv:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    plxv v2, _ZL24StaticVectorSignedIntVar@PCREL(0), 1
+; CHECK-NEXT:    blr
+entry:
+  %0 = load <4 x i32>, <4 x i32>* @_ZL24StaticVectorSignedIntVar, align 16
+  ret <4 x i32> %0
+}
+
+ define <2 x i64> @_Z33ReadStaticVectorSignedLongLongVarv() {
+; CHECK-LABEL: _Z33ReadStaticVectorSignedLongLongVarv:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    plxv v2, _ZL29StaticVectorSignedLongLongVar@PCREL(0), 1
+; CHECK-NEXT:    blr
+entry:
+  %0 = load <2 x i64>, <2 x i64>* @_ZL29StaticVectorSignedLongLongVar, align 16
+  ret <2 x i64> %0
+}
+
+ define <1 x i128> @_Z33ReadStaticVectorSigned__Int128Varv() {
+; CHECK-LABEL: _Z33ReadStaticVectorSigned__Int128Varv:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    plxv v2, _ZL29StaticVectorSigned__Int128Var@PCREL(0), 1
+; CHECK-NEXT:    blr
+entry:
+  %0 = load <1 x i128>, <1 x i128>* @_ZL29StaticVectorSigned__Int128Var, align 16
+  ret <1 x i128> %0
+}
+
+ define <4 x float> @_Z24ReadStaticVectorFloatVarv() {
+; CHECK-LABEL: _Z24ReadStaticVectorFloatVarv:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    plxv v2, _ZL20StaticVectorFloatVar@PCREL(0), 1
+; CHECK-NEXT:    blr
+entry:
+  %0 = load <4 x float>, <4 x float>* @_ZL20StaticVectorFloatVar, align 16
+  ret <4 x float> %0
+}
+
+ define <2 x double> @_Z25ReadStaticVectorDoubleVarv() {
+; CHECK-LABEL: _Z25ReadStaticVectorDoubleVarv:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    plxv v2, _ZL21StaticVectorDoubleVar@PCREL(0), 1
+; CHECK-NEXT:    blr
+entry:
+  %0 = load <2 x double>, <2 x double>* @_ZL21StaticVectorDoubleVar, align 16
+  ret <2 x double> %0
+}
+
+ !0 = !{i8 0, i8 2}
+
+ define void @_Z18WriteStaticBoolVarb(i1 zeroext %val) {
+; CHECK-LABEL: _Z18WriteStaticBoolVarb:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pstb r3, _ZL13StaticBoolVar@PCREL(0), 1
+; CHECK-NEXT:    blr
+entry:
+  %frombool = zext i1 %val to i8
+  store i8 %frombool, i8* @_ZL13StaticBoolVar, align 1
+  ret void
+}
+
+ define void @_Z24WriteStaticSignedCharVara(i8 signext %val) {
+; CHECK-LABEL: _Z24WriteStaticSignedCharVara:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pstb r3, _ZL19StaticSignedCharVar@PCREL(0), 1
+; CHECK-NEXT:    blr
+entry:
+  store i8 %val, i8* @_ZL19StaticSignedCharVar, align 1
+  ret void
+}
+
+ define void @_Z26WriteStaticUnsignedCharVarh(i8 zeroext %val){
+; CHECK-LABEL: _Z26WriteStaticUnsignedCharVarh:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pstb r3, _ZL21StaticUnsignedCharVar@PCREL(0), 1
+; CHECK-NEXT:    blr
+entry:
+  store i8 %val, i8* @_ZL21StaticUnsignedCharVar, align 1
+  ret void
+}
+
+ define void @_Z25WriteStaticSignedShortVars(i16 signext %val) {
+; CHECK-LABEL: _Z25WriteStaticSignedShortVars:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    psth r3, _ZL20StaticSignedShortVar@PCREL(0), 1
+; CHECK-NEXT:    blr
+entry:
+  store i16 %val, i16* @_ZL20StaticSignedShortVar, align 2
+  ret void
+}
+
+ define void @_Z27WriteStaticUnsignedShortVart(i16 zeroext %val) {
+; CHECK-LABEL: _Z27WriteStaticUnsignedShortVart:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    psth r3, _ZL22StaticUnsignedShortVar@PCREL(0), 1
+; CHECK-NEXT:    blr
+entry:
+  store i16 %val, i16* @_ZL22StaticUnsignedShortVar, align 2
+  ret void
+}
+
+ define void @_Z23WriteStaticSignedIntVari(i32 signext %val) {
+; CHECK-LABEL: _Z23WriteStaticSignedIntVari:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pstw r3, _ZL18StaticSignedIntVar@PCREL(0), 1
+; CHECK-NEXT:    blr
+entry:
+  store i32 %val, i32* @_ZL18StaticSignedIntVar, align 4
+  ret void
+}
+
+ define void @_Z25WriteStaticUnsignedIntVarj(i32 zeroext %val) {
+; CHECK-LABEL: _Z25WriteStaticUnsignedIntVarj:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pstw r3, _ZL20StaticUnsignedIntVar@PCREL(0), 1
+; CHECK-NEXT:    blr
+entry:
+  store i32 %val, i32* @_ZL20StaticUnsignedIntVar, align 4
+  ret void
+}
+
+ define void @_Z24WriteStaticSignedLongVarl(i64 %val) {
+; CHECK-LABEL: _Z24WriteStaticSignedLongVarl:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pstd r3, _ZL19StaticSignedLongVar@PCREL(0), 1
+; CHECK-NEXT:    blr
+entry:
+  store i64 %val, i64* @_ZL19StaticSignedLongVar, align 8
+  ret void
+}
+
+ define void @_Z19WriteStaticFloatVarf(float %val) {
+; CHECK-LABEL: _Z19WriteStaticFloatVarf:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pstfs f1, _ZL14StaticFloatVar@PCREL(0), 1
+; CHECK-NEXT:    blr
+entry:
+  store float %val, float* @_ZL14StaticFloatVar, align 4
+  ret void
+}
+
+ define void @_Z20WriteStaticDoubleVard(double %val) {
+; CHECK-LABEL: _Z20WriteStaticDoubleVard:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pstfd f1, _ZL15StaticDoubleVar@PCREL(0), 1
+; CHECK-NEXT:    blr
+entry:
+  store double %val, double* @_ZL15StaticDoubleVar, align 8
+  ret void
+}
+
+ ; FIXME:
+define void @_Z24WriteStaticLongDoubleVarg(ppc_fp128 %val) {
+; CHECK-LABEL: _Z24WriteStaticLongDoubleVarg:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    paddi r3, 0, _ZL19StaticLongDoubleVar@PCREL, 1
+; CHECK-NEXT:    stfd f2, 8(r3)
+; CHECK-NEXT:    pstfd f1, _ZL19StaticLongDoubleVar@PCREL(0), 1
+; CHECK-NEXT:    blr
+entry:
+  store ppc_fp128 %val, ppc_fp128* @_ZL19StaticLongDoubleVar, align 16
+  ret void
+}
+
+ ; FIXME:
+define void @_Z28WriteStaticSigned__Int128Varn(i128 %val) {
+; CHECK-LABEL: _Z28WriteStaticSigned__Int128Varn:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    paddi r5, 0, _ZL23StaticSigned__Int128Var@PCREL, 1
+; CHECK-NEXT:    std r4, 8(r5)
+; CHECK-NEXT:    pstd r3, _ZL23StaticSigned__Int128Var@PCREL(0), 1
+; CHECK-NEXT:    blr
+entry:
+  store i128 %val, i128* @_ZL23StaticSigned__Int128Var, align 16
+  ret void
+}
+
+ define void @_Z24WriteStatic__Float128Varu9__ieee128(fp128 %val) {
+; CHECK-LABEL: _Z24WriteStatic__Float128Varu9__ieee128:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pstxv v2, _ZL19Static__Float128Var@PCREL(0), 1
+; CHECK-NEXT:    blr
+entry:
+  store fp128 %val, fp128* @_ZL19Static__Float128Var, align 16
+  ret void
+}
+
+ define void @_Z30WriteStaticVectorSignedCharVarDv16_a(<16 x i8> %val) {
+; CHECK-LABEL: _Z30WriteStaticVectorSignedCharVarDv16_a:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pstxv v2, _ZL25StaticVectorSignedCharVar@PCREL(0), 1
+; CHECK-NEXT:    blr
+entry:
+  store <16 x i8> %val, <16 x i8>* @_ZL25StaticVectorSignedCharVar, align 16
+  ret void
+}
+
+ define void @_Z31WriteStaticVectorSignedShortVarDv8_s(<8 x i16> %val) {
+; CHECK-LABEL: _Z31WriteStaticVectorSignedShortVarDv8_s:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pstxv v2, _ZL26StaticVectorSignedShortVar@PCREL(0), 1
+; CHECK-NEXT:    blr
+entry:
+  store <8 x i16> %val, <8 x i16>* @_ZL26StaticVectorSignedShortVar, align 16
+  ret void
+}
+
+ define void @_Z29WriteStaticVectorSignedIntVarDv4_i(<4 x i32> %val) {
+; CHECK-LABEL: _Z29WriteStaticVectorSignedIntVarDv4_i:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pstxv v2, _ZL24StaticVectorSignedIntVar@PCREL(0), 1
+; CHECK-NEXT:    blr
+entry:
+  store <4 x i32> %val, <4 x i32>* @_ZL24StaticVectorSignedIntVar, align 16
+  ret void
+}
+
+ define void @_Z34WriteStaticVectorSignedLongLongVarDv2_x(<2 x i64> %val) {
+; CHECK-LABEL: _Z34WriteStaticVectorSignedLongLongVarDv2_x:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pstxv v2, _ZL29StaticVectorSignedLongLongVar@PCREL(0), 1
+; CHECK-NEXT:    blr
+entry:
+  store <2 x i64> %val, <2 x i64>* @_ZL29StaticVectorSignedLongLongVar, align 16
+  ret void
+}
+
+ define void @_Z34WriteStaticVectorSigned__Int128VarDv1_n(<1 x i128> %val) {
+; CHECK-LABEL: _Z34WriteStaticVectorSigned__Int128VarDv1_n:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pstxv v2, _ZL29StaticVectorSigned__Int128Var@PCREL(0), 1
+; CHECK-NEXT:    blr
+entry:
+  store <1 x i128> %val, <1 x i128>* @_ZL29StaticVectorSigned__Int128Var, align 16
+  ret void
+}
+
+ define void @_Z25WriteStaticVectorFloatVarDv4_f(<4 x float> %val) {
+; CHECK-LABEL: _Z25WriteStaticVectorFloatVarDv4_f:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pstxv v2, _ZL20StaticVectorFloatVar@PCREL(0), 1
+; CHECK-NEXT:    blr
+entry:
+  store <4 x float> %val, <4 x float>* @_ZL20StaticVectorFloatVar, align 16
+  ret void
+}
+
+ define void @_Z26WriteStaticVectorDoubleVarDv2_d(<2 x double> %val) {
+; CHECK-LABEL: _Z26WriteStaticVectorDoubleVarDv2_d:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pstxv v2, _ZL21StaticVectorDoubleVar@PCREL(0), 1
+; CHECK-NEXT:    blr
+entry:
+  store <2 x double> %val, <2 x double>* @_ZL21StaticVectorDoubleVar, align 16
+  ret void
+}
+
+ @_ZL3ptr = internal unnamed_addr global i32* null, align 8
+define void @_Z14WriteStaticPtrv() {
+; CHECK-LABEL: _Z14WriteStaticPtrv:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pld r3, _ZL3ptr@PCREL(0), 1
+; CHECK-NEXT:    li r4, 3
+; CHECK-NEXT:    stw r4, 0(r3)
+; CHECK-NEXT:    blr
+entry:
+  %0 = load i32*, i32** @_ZL3ptr, align 8
+  store i32 3, i32* %0, align 4
+  ret void
+}
+
+ @.str = private unnamed_addr constant [13 x i8] c"Hello World\0A\00", align 1
+@str = dso_local local_unnamed_addr global i8* getelementptr inbounds ([13 x i8], [13 x i8]* @.str, i64 0, i64 0), align 8
+
+ define zeroext i8 @_Z17Char0InStrLiteralv() {
+; CHECK-LABEL: _Z17Char0InStrLiteralv:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pld r3, str@PCREL(0), 1
+; CHECK-NEXT:    lbz r3, 0(r3)
+; CHECK-NEXT:    blr
+entry:
+  %0 = load i8*, i8** @str, align 8
+  %1 = load i8, i8* %0, align 1
+  ret i8 %1
+}
+
+ define zeroext i8 @_Z17Char3InStrLiteralv() {
+; CHECK-LABEL: _Z17Char3InStrLiteralv:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pld r3, str@PCREL(0), 1
+; CHECK-NEXT:    lbz r3, 3(r3)
+; CHECK-NEXT:    blr
+entry:
+  %0 = load i8*, i8** @str, align 8
+  %arrayidx = getelementptr inbounds i8, i8* %0, i64 3
+  %1 = load i8, i8* %arrayidx, align 1
+  ret i8 %1
+}
+
+ @_ZL5array = internal global [10 x i32] zeroinitializer, align 4
+
+ ; FIXME:
+define signext i32 @_Z15ReadStaticArrayv() {
+; CHECK-LABEL: _Z15ReadStaticArrayv:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    paddi r3, 0, _ZL5array@PCREL, 1
+; CHECK-NEXT:    lwa r3, 12(r3)
+; CHECK-NEXT:    blr
+entry:
+  %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @_ZL5array, i64 0, i64 3), align 4
+  ret i32 %0
+}
+
+ ; FIXME:
+define void @_Z16WriteStaticArrayv() {
+; CHECK-LABEL: _Z16WriteStaticArrayv:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    paddi r3, 0, _ZL5array@PCREL, 1
+; CHECK-NEXT:    li r4, 5
+; CHECK-NEXT:    stw r4, 12(r3)
+; CHECK-NEXT:    blr
+entry:
+  store i32 5, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @_ZL5array, i64 0, i64 3), align 4
+  ret void
+}
+
+ %struct.Struct = type { i8, i16, i32 }
+
+ ; FIXME:
+@_ZL9structure = internal global %struct.Struct zeroinitializer, align 4
+define signext i32 @_Z16ReadStaticStructv() {
+; CHECK-LABEL: _Z16ReadStaticStructv:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    paddi r3, 0, _ZL9structure@PCREL, 1
+; CHECK-NEXT:    lwa r3, 4(r3)
+; CHECK-NEXT:    blr
+entry:
+  %0 = load i32, i32* getelementptr inbounds (%struct.Struct, %struct.Struct* @_ZL9structure, i64 0, i32 2), align 4
+  ret i32 %0
+}
+
+ ; FIXME
+define void @_Z17WriteStaticStructv() {
+; CHECK-LABEL: _Z17WriteStaticStructv:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    paddi r3, 0, _ZL9structure@PCREL, 1
+; CHECK-NEXT:    li r4, 3
+; CHECK-NEXT:    stw r4, 4(r3)
+; CHECK-NEXT:    blr
+entry:
+  store i32 3, i32* getelementptr inbounds (%struct.Struct, %struct.Struct* @_ZL9structure, i64 0, i32 2), align 4
+  ret void
+}
+
diff --git a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll
index 66d2941bd2fe..7460c67f10a9 100644
--- a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll
+++ b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-leaf.ll
@@ -39,16 +39,13 @@ entry:
 ;        goes away.
 define dso_local signext i32 @AsmClobberX2WithTOC(i32 signext %a, i32 signext %b) local_unnamed_addr {
 ; CHECK-ALL-LABEL: AsmClobberX2WithTOC:
-; CHECK-S:         addis r2, r12, .TOC.-.Lfunc_gep2@ha
-; CHECK-S-NEXT:    addi r2, r2, .TOC.-.Lfunc_gep2@l
 ; CHECK-LARGE:     ld r2, .Lfunc_toc2-.Lfunc_gep2(r12)
 ; CHECK-LARGE:     add r2, r2, r12
-; CHECK-S:         .localentry     AsmClobberX2WithTOC, .Lfunc_lep2-.Lfunc_gep2
+; CHECK-S:         .localentry     AsmClobberX2WithTOC
 ; CHECK-S:         #APP
 ; CHECK-S-NEXT:    li r2, 0
 ; CHECK-S-NEXT:    #NO_APP
-; CHECK-S-NEXT:    addis r5, r2, global_int@toc@ha
-; CHECK-S-NEXT:    lwz r5, global_int@toc@l(r5)
+; CHECK-S-NEXT:    plwz r5, global_int@PCREL(0), 1
 ; CHECK-S-NEXT:    add r3, r4, r3
 ; CHECK-S-NEXT:    add r3, r3, r5
 ; CHECK-S-NEXT:    extsw r3, r3
@@ -158,15 +155,9 @@ entry:
 
 define dso_local signext i32 @UsesX2AsTOC() local_unnamed_addr {
 ; CHECK-ALL-LABEL: UsesX2AsTOC:
-; CHECK-S:         addis r2, r12, .TOC.-.Lfunc_gep6@ha
-; CHECK-S-NEXT:    addi r2, r2, .TOC.-.Lfunc_gep6@l
 ; CHECK-LARGE:     ld r2, .Lfunc_toc6-.Lfunc_gep6(r12)
 ; CHECK-LARGE:     add r2, r2, r12
-; CHECK-S:       .localentry     UsesX2AsTOC, .Lfunc_lep6-.Lfunc_gep6
 ; CHECK-ALL:       # %bb.0: # %entry
-; CHECK-S-NEXT:    addis r3, r2, global_int@toc@ha
-; CHECK-S-NEXT:    lwa r3, global_int@toc@l(r3)
-; CHECK-S-NEXT:    blr
 entry:
   %0 = load i32, i32* @global_int, align 4
   ret i32 %0
diff --git a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll
index 64548c55792f..59001d482665 100644
--- a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll
+++ b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll
@@ -26,9 +26,7 @@ entry:
 
 define dso_local signext i32 @DirectCallLocal1(i32 signext %a, i32 signext %b) local_unnamed_addr {
 ; CHECK-ALL-LABEL: DirectCallLocal1:
-; CHECK-S:         addis r2, r12, .TOC.-.Lfunc_gep1@ha
-; CHECK-S-NEXT:    addi r2, r2, .TOC.-.Lfunc_gep1@l
-; CHECK-S:         .localentry     DirectCallLocal1, .Lfunc_lep1-.Lfunc_gep1
+; CHECK-S:         .localentry     DirectCallLocal1
 ; CHECK-S:       # %bb.0: # %entry
 ; CHECK-S-NEXT:    mflr r0
 ; CHECK-S-NEXT:    std r0, 16(r1)
@@ -37,10 +35,8 @@ define dso_local signext i32 @DirectCallLocal1(i32 signext %a, i32 signext %b) l
 ; CHECK-S-NEXT:    .cfi_offset lr, 16
 ; CHECK-S-NEXT:    add r3, r4, r3
 ; CHECK-S-NEXT:    extsw r3, r3
-; CHECK-S-NEXT:    bl localCall
-; CHECK-S-NEXT:    nop
-; CHECK-S-NEXT:    addis r4, r2, globalVar@toc@ha
-; CHECK-S-NEXT:    lwz r4, globalVar@toc@l(r4)
+; CHECK-S-NEXT:    bl localCall@notoc
+; CHECK-S-NEXT:    plwz r4, globalVar@PCREL(0), 1
 ; CHECK-S-NEXT:    mullw r3, r4, r3
 ; CHECK-S-NEXT:    extsw r3, r3
 ; CHECK-S-NEXT:    addi r1, r1, 32
@@ -115,9 +111,7 @@ entry:
 
 define dso_local signext i32 @DirectCallExtern1(i32 signext %a, i32 signext %b) local_unnamed_addr {
 ; CHECK-ALL-LABEL: DirectCallExtern1:
-; CHECK-S:         addis r2, r12, .TOC.-.Lfunc_gep4@ha
-; CHECK-S-NEXT:    addi r2, r2, .TOC.-.Lfunc_gep4@l
-; CHECK-S:         .localentry     DirectCallExtern1, .Lfunc_lep4-.Lfunc_gep4
+; CHECK-S:         .localentry     DirectCallExtern1
 ; CHECK-S:       # %bb.0: # %entry
 ; CHECK-S-NEXT:    mflr r0
 ; CHECK-S-NEXT:    std r0, 16(r1)
@@ -126,10 +120,8 @@ define dso_local signext i32 @DirectCallExtern1(i32 signext %a, i32 signext %b)
 ; CHECK-S-NEXT:    .cfi_offset lr, 16
 ; CHECK-S-NEXT:    add r3, r4, r3
 ; CHECK-S-NEXT:    extsw r3, r3
-; CHECK-S-NEXT:    bl externCall
-; CHECK-S-NEXT:    nop
-; CHECK-S-NEXT:    addis r4, r2, globalVar@toc@ha
-; CHECK-S-NEXT:    lwz r4, globalVar@toc@l(r4)
+; CHECK-S-NEXT:    bl externCall@notoc
+; CHECK-S-NEXT:    plwz r4, globalVar@PCREL(0), 1
 ; CHECK-S-NEXT:    mullw r3, r4, r3
 ; CHECK-S-NEXT:    extsw r3, r3
 ; CHECK-S-NEXT:    addi r1, r1, 32
@@ -207,21 +199,17 @@ entry:
 
 define dso_local signext i32 @TailCallLocal1(i32 signext %a) local_unnamed_addr {
 ; CHECK-ALL-LABEL: TailCallLocal1:
-; CHECK-S:         addis r2, r12, .TOC.-.Lfunc_gep7@ha
-; CHECK-S-NEXT:    addi r2, r2, .TOC.-.Lfunc_gep7@l
-; CHECK-S:         .localentry     TailCallLocal1, .Lfunc_lep7-.Lfunc_gep7
+; CHECK-S:         .localentry     TailCallLocal1
 ; CHECK-S:       # %bb.0: # %entry
 ; CHECK-S-NEXT:    mflr r0
 ; CHECK-S-NEXT:    std r0, 16(r1)
 ; CHECK-S-NEXT:    stdu r1, -32(r1)
 ; CHECK-S-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-S-NEXT:    .cfi_offset lr, 16
-; CHECK-S-NEXT:    addis r4, r2, globalVar@toc@ha
-; CHECK-S-NEXT:    lwz r4, globalVar@toc@l(r4)
+; CHECK-S-NEXT:    plwz r4, globalVar@PCREL(0), 1
 ; CHECK-S-NEXT:    add r3, r4, r3
 ; CHECK-S-NEXT:    extsw r3, r3
-; CHECK-S-NEXT:    bl localCall
-; CHECK-S-NEXT:    nop
+; CHECK-S-NEXT:    bl localCall@notoc
 ; CHECK-S-NEXT:    addi r1, r1, 32
 ; CHECK-S-NEXT:    ld r0, 16(r1)
 ; CHECK-S-NEXT:    mtlr r0
@@ -284,21 +272,17 @@ entry:
 
 define dso_local signext i32 @TailCallExtern1(i32 signext %a) local_unnamed_addr {
 ; CHECK-ALL-LABEL: TailCallExtern1:
-; CHECK-S:         addis r2, r12, .TOC.-.Lfunc_gep10@ha
-; CHECK-S-NEXT:    addi r2, r2, .TOC.-.Lfunc_gep10@l
-; CHECK-S:         .localentry     TailCallExtern1, .Lfunc_lep10-.Lfunc_gep10
+; CHECK-S:         .localentry     TailCallExtern1
 ; CHECK-S:       # %bb.0: # %entry
 ; CHECK-S-NEXT:    mflr r0
 ; CHECK-S-NEXT:    std r0, 16(r1)
 ; CHECK-S-NEXT:    stdu r1, -32(r1)
 ; CHECK-S-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-S-NEXT:    .cfi_offset lr, 16
-; CHECK-S-NEXT:    addis r4, r2, globalVar@toc@ha
-; CHECK-S-NEXT:    lwz r4, globalVar@toc@l(r4)
+; CHECK-S-NEXT:    plwz r4, globalVar@PCREL(0), 1
 ; CHECK-S-NEXT:    add r3, r4, r3
 ; CHECK-S-NEXT:    extsw r3, r3
-; CHECK-S-NEXT:    bl externCall
-; CHECK-S-NEXT:    nop
+; CHECK-S-NEXT:    bl externCall@notoc
 ; CHECK-S-NEXT:    addi r1, r1, 32
 ; CHECK-S-NEXT:    ld r0, 16(r1)
 ; CHECK-S-NEXT:    mtlr r0
@@ -370,15 +354,13 @@ define dso_local signext i32 @IndirectCall1(i32 signext %a, i32 signext %b) loca
 ; CHECK-S-NEXT:    std r2, 24(r1)
 ; CHECK-S-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-S-NEXT:    .cfi_offset lr, 16
-; CHECK-S-NEXT:    addis r5, r2, indirectCall@toc@ha
-; CHECK-S-NEXT:    ld r12, indirectCall@toc@l(r5)
+; CHECK-S-NEXT:    pld r12, indirectCall@PCREL(0), 1
 ; CHECK-S-NEXT:    add r3, r4, r3
 ; CHECK-S-NEXT:    extsw r3, r3
 ; CHECK-S-NEXT:    mtctr r12
 ; CHECK-S-NEXT:    bctrl
 ; CHECK-S-NEXT:    ld 2, 24(r1)
-; CHECK-S-NEXT:    addis r4, r2, globalVar@toc@ha
-; CHECK-S-NEXT:    lwz r4, globalVar@toc@l(r4)
+; CHECK-S-NEXT:    plwz r4, globalVar@PCREL(0), 1
 ; CHECK-S-NEXT:    mullw r3, r4, r3
 ; CHECK-S-NEXT:    extsw r3, r3
 ; CHECK-S-NEXT:    addi r1, r1, 32
@@ -406,8 +388,7 @@ define dso_local signext i32 @IndirectCall2(i32 signext %a, i32 signext %b) loca
 ; CHECK-S-NEXT:    std r2, 24(r1)
 ; CHECK-S-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-S-NEXT:    .cfi_offset lr, 16
-; CHECK-S-NEXT:    addis r5, r2, indirectCall@toc@ha
-; CHECK-S-NEXT:    ld r12, indirectCall@toc@l(r5)
+; CHECK-S-NEXT:    pld r12, indirectCall@PCREL(0), 1
 ; CHECK-S-NEXT:    add r3, r4, r3
 ; CHECK-S-NEXT:    extsw r3, r3
 ; CHECK-S-NEXT:    mtctr r12
@@ -449,8 +430,7 @@ define dso_local signext i32 @IndirectCall3(i32 signext %a, i32 signext %b, i32
 ; CHECK-S-NEXT:    mr r12, r5
 ; CHECK-S-NEXT:    bctrl
 ; CHECK-S-NEXT:    ld 2, 24(r1)
-; CHECK-S-NEXT:    addis r4, r2, globalVar@toc@ha
-; CHECK-S-NEXT:    lwz r4, globalVar@toc@l(r4)
+; CHECK-S-NEXT:    plwz r4, globalVar@PCREL(0), 1
 ; CHECK-S-NEXT:    mullw r3, r4, r3
 ; CHECK-S-NEXT:    extsw r3, r3
 ; CHECK-S-NEXT:    addi r1, r1, 32

From 6aac98f4dda957177d7af8d8a45ae44fd6e9a7b8 Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot@gmail.com>
Date: Thu, 16 Apr 2020 17:47:41 +0000
Subject: [PATCH 072/216] [gn build] Port d9085f65db0

---
 llvm/utils/gn/secondary/llvm/lib/CodeGen/GlobalISel/BUILD.gn | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/gn/secondary/llvm/lib/CodeGen/GlobalISel/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/CodeGen/GlobalISel/BUILD.gn
index dcfc1d1c3581..8b096ad9b1e5 100644
--- a/llvm/utils/gn/secondary/llvm/lib/CodeGen/GlobalISel/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/CodeGen/GlobalISel/BUILD.gn
@@ -29,6 +29,7 @@ static_library("GlobalISel") {
     "LegalizerHelper.cpp",
     "LegalizerInfo.cpp",
     "Localizer.cpp",
+    "LostDebugLocObserver.cpp",
     "MachineIRBuilder.cpp",
     "RegBankSelect.cpp",
     "RegisterBank.cpp",

From 9c7d917701f9c68ca0e41a321cb5de4e4c043f08 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Thu, 16 Apr 2020 10:54:30 -0700
Subject: [PATCH 073/216] [CallSite removal][CodeGen] Remove CallSite use from
 BasicTTIImpl.h. NFC

While there convert iterator loops to range-based.

Differential Revision: https://reviews.llvm.org/D78275
---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 4822abc46300..7646c2196941 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -29,7 +29,6 @@
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/CodeGen/ValueTypes.h"
 #include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
@@ -479,20 +478,17 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
       return;
 
     // Scan the loop: don't unroll loops with calls.
-    for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E;
-         ++I) {
-      BasicBlock *BB = *I;
-
-      for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J)
-        if (isa<CallInst>(J) || isa<InvokeInst>(J)) {
-          ImmutableCallSite CS(&*J);
-          if (const Function *F = CS.getCalledFunction()) {
+    for (BasicBlock *BB : L->blocks()) {
+      for (Instruction &I : *BB) {
+        if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
+          if (const Function *F = cast<CallBase>(I).getCalledFunction()) {
             if (!static_cast<T *>(this)->isLoweredToCall(F))
               continue;
           }
 
           return;
         }
+      }
     }
 
     // Enable runtime and partial unrolling up to the specified size.

From 84f007f7ef4b81a08eb1d775432f59dea061f036 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Thu, 16 Apr 2020 11:05:31 -0700
Subject: [PATCH 074/216] [CallSite removal][CodeGen] Drop some unneeded
 includes of CallSite.h. NFC

The uses of CallSite were removed in previous patches.
---
 llvm/include/llvm/CodeGen/Analysis.h                | 1 -
 llvm/include/llvm/CodeGen/FastISel.h                | 1 -
 llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h | 2 +-
 3 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/Analysis.h b/llvm/include/llvm/CodeGen/Analysis.h
index a2130fca7b8f..fe610b5bdc8d 100644
--- a/llvm/include/llvm/CodeGen/Analysis.h
+++ b/llvm/include/llvm/CodeGen/Analysis.h
@@ -18,7 +18,6 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/CodeGen/ISDOpcodes.h"
-#include "llvm/IR/CallSite.h"
 #include "llvm/IR/InlineAsm.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/Support/CodeGen.h"
diff --git a/llvm/include/llvm/CodeGen/FastISel.h b/llvm/include/llvm/CodeGen/FastISel.h
index f561adaa987c..02ec1d38dfb4 100644
--- a/llvm/include/llvm/CodeGen/FastISel.h
+++ b/llvm/include/llvm/CodeGen/FastISel.h
@@ -20,7 +20,6 @@
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/IR/Attributes.h"
-#include "llvm/IR/CallSite.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/DerivedTypes.h"
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
index cf21ee92e3c1..e32de8888c06 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CallLowering.h
@@ -18,7 +18,6 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/TargetCallingConv.h"
-#include "llvm/IR/CallSite.h"
 #include "llvm/IR/CallingConv.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/MachineValueType.h"
@@ -28,6 +27,7 @@
 namespace llvm {
 
 class CCState;
+class CallBase;
 class DataLayout;
 class Function;
 class MachineIRBuilder;

From 1223255c2de76c28fb07ebfda365a0c79ee3bbe8 Mon Sep 17 00:00:00 2001
From: Cameron McInally <cameron.mcinally@nyu.edu>
Date: Thu, 16 Apr 2020 13:41:14 -0500
Subject: [PATCH 075/216] [AArch64][SVE] Add DestructiveBinaryImm SQSHLU
 patterns.

Add DestructiveBinaryImm SQSHLU patterns and tests. These patterns allow the SQSHLU instruction to match with a MOVPRFX.

Differential Revision: https://reviews.llvm.org/D76728
---
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td |  2 +-
 llvm/lib/Target/AArch64/SVEInstrFormats.td    | 28 +++++++--
 .../sve2-intrinsics-uniform-dsp-zeroing.ll    | 58 +++++++++++++++++++
 3 files changed, 83 insertions(+), 5 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp-zeroing.ll

diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 87b2ed464303..c046b0e18a37 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1798,7 +1798,7 @@ let Predicates = [HasSVE2] in {
   defm UQSHL_ZPmI  : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl", "UQSHL_ZPZI">;
   defm SRSHR_ZPmI  : sve_int_bin_pred_shift_imm_right<0b1100,  "srshr",  "SRSHR_ZPZI",  int_aarch64_sve_srshr>;
   defm URSHR_ZPmI  : sve_int_bin_pred_shift_imm_right<0b1101,  "urshr",  "URSHR_ZPZI",  int_aarch64_sve_urshr>;
-  defm SQSHLU_ZPmI : sve2_int_bin_pred_shift_imm_left< 0b1111, "sqshlu", int_aarch64_sve_sqshlu>;
+  defm SQSHLU_ZPmI : sve2_int_bin_pred_shift_imm_left< 0b1111, "sqshlu", "SQSHLU_ZPZI", int_aarch64_sve_sqshlu>;
 
   // SVE2 integer add/subtract long
   defm SADDLB_ZZZ : sve2_wide_int_arith_long<0b00000, "saddlb", int_aarch64_sve_saddlb>;
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index 0524e007a990..97ef41e8179e 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -4772,19 +4772,39 @@ multiclass sve_int_bin_pred_shift_imm_left<bits<4> opc, string asm, string psNam
 }
 
 multiclass sve2_int_bin_pred_shift_imm_left<bits<4> opc, string asm,
+                                            string psName,
                                             SDPatternOperator op> {
-  def _B : sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>;
-  def _H : sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> {
+
+  def _B : SVEPseudo2Instr<psName # _B, 1>, sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>;
+  def _H : SVEPseudo2Instr<psName # _H, 1>,
+           sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> {
     let Inst{8} = imm{3};
   }
-  def _S : sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> {
+  def _S : SVEPseudo2Instr<psName # _S, 1>,
+           sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> {
     let Inst{9-8} = imm{4-3};
   }
-  def _D : sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> {
+  def _D : SVEPseudo2Instr<psName # _D, 1>,
+           sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> {
     let Inst{22}  = imm{5};
     let Inst{9-8} = imm{4-3};
   }
 
+  def _B_Z_UNDEF : PredTwoOpImmPseudo<psName # _B, ZPR8,  tvecshiftL8,  FalseLanesUndef>;
+  def _H_Z_UNDEF : PredTwoOpImmPseudo<psName # _H, ZPR16, tvecshiftL16, FalseLanesUndef>;
+  def _S_Z_UNDEF : PredTwoOpImmPseudo<psName # _S, ZPR32, tvecshiftL32, FalseLanesUndef>;
+  def _D_Z_UNDEF : PredTwoOpImmPseudo<psName # _D, ZPR64, tvecshiftL64, FalseLanesUndef>;
+
+  def _B_Z_ZERO : PredTwoOpImmPseudo<psName # _B, ZPR8,  tvecshiftL8,  FalseLanesZero>;
+  def _H_Z_ZERO : PredTwoOpImmPseudo<psName # _H, ZPR16, tvecshiftL16, FalseLanesZero>;
+  def _S_Z_ZERO : PredTwoOpImmPseudo<psName # _S, ZPR32, tvecshiftL32, FalseLanesZero>;
+  def _D_Z_ZERO : PredTwoOpImmPseudo<psName # _D, ZPR64, tvecshiftL64, FalseLanesZero>;
+
+  def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv16i8, op, nxv16i1, nxv16i8, tvecshiftL8,  !cast<Pseudo>(NAME # _B_Z_ZERO)>;
+  def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv8i16, op, nxv8i1,  nxv8i16, tvecshiftL16, !cast<Pseudo>(NAME # _H_Z_ZERO)>;
+  def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv4i32, op, nxv4i1,  nxv4i32, tvecshiftL32, !cast<Pseudo>(NAME # _S_Z_ZERO)>;
+  def : SVE_3_Op_Pat_Shift_Imm_SelZero<nxv2i64, op, nxv2i1,  nxv2i64, tvecshiftL64, !cast<Pseudo>(NAME # _D_Z_ZERO)>;
+
   def : SVE_3_Op_Imm_Pat<nxv16i8, op, nxv16i1, nxv16i8, i32, tvecshiftL8,  !cast<Instruction>(NAME # _B)>;
   def : SVE_3_Op_Imm_Pat<nxv8i16, op, nxv8i1,  nxv8i16, i32, tvecshiftL16, !cast<Instruction>(NAME # _H)>;
   def : SVE_3_Op_Imm_Pat<nxv4i32, op, nxv4i1,  nxv4i32, i32, tvecshiftL32, !cast<Instruction>(NAME # _S)>;
diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp-zeroing.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp-zeroing.ll
new file mode 100644
index 000000000000..a83f71d54fd1
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp-zeroing.ll
@@ -0,0 +1,58 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 -asm-verbose=0 < %s | FileCheck %s
+
+;
+; SQSHLU
+;
+
+define <vscale x 16 x i8> @sqshlu_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
+; CHECK-LABEL: sqshlu_i8:
+; CHECK:      movprfx z0.b, p0/z, z0.b
+; CHECK-NEXT: sqshlu z0.b, p0/m, z0.b, #2
+; CHECK-NEXT: ret
+  %a_z = select <vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> zeroinitializer
+  %out = call <vscale x 16 x i8> @llvm.aarch64.sve.sqshlu.nxv16i8(<vscale x 16 x i1> %pg,
+                                                                  <vscale x 16 x i8> %a_z,
+                                                                  i32 2)
+  ret <vscale x 16 x i8> %out
+}
+
+define <vscale x 8 x i16> @sqshlu_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a) {
+; CHECK-LABEL: sqshlu_i16:
+; CHECK:      movprfx z0.h, p0/z, z0.h
+; CHECK-NEXT: sqshlu z0.h, p0/m, z0.h, #3
+; CHECK-NEXT: ret
+  %a_z = select <vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> zeroinitializer
+  %out = call <vscale x 8 x i16> @llvm.aarch64.sve.sqshlu.nxv8i16(<vscale x 8 x i1> %pg,
+                                                                  <vscale x 8 x i16> %a_z,
+                                                                  i32 3)
+  ret <vscale x 8 x i16> %out
+}
+
+define <vscale x 4 x i32> @sqshlu_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a) {
+; CHECK-LABEL: sqshlu_i32:
+; CHECK:      movprfx z0.s, p0/z, z0.s
+; CHECK-NEXT: sqshlu z0.s, p0/m, z0.s, #29
+; CHECK-NEXT: ret
+  %a_z = select <vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> zeroinitializer
+  %out = call <vscale x 4 x i32> @llvm.aarch64.sve.sqshlu.nxv4i32(<vscale x 4 x i1> %pg,
+                                                                  <vscale x 4 x i32> %a_z,
+                                                                  i32 29)
+  ret <vscale x 4 x i32> %out
+}
+
+define <vscale x 2 x i64> @sqshlu_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a) {
+; CHECK-LABEL: sqshlu_i64:
+; CHECK:      movprfx z0.d, p0/z, z0.d
+; CHECK-NEXT: sqshlu z0.d, p0/m, z0.d, #62
+; CHECK-NEXT: ret
+  %a_z = select <vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> zeroinitializer
+  %out = call <vscale x 2 x i64> @llvm.aarch64.sve.sqshlu.nxv2i64(<vscale x 2 x i1> %pg,
+                                                                  <vscale x 2 x i64> %a_z,
+                                                                  i32 62)
+  ret <vscale x 2 x i64> %out
+}
+
+declare <vscale x 16 x i8> @llvm.aarch64.sve.sqshlu.nxv16i8(<vscale x 16 x i1>, <vscale x 16 x i8>, i32)
+declare <vscale x 8 x i16> @llvm.aarch64.sve.sqshlu.nxv8i16(<vscale x 8 x i1>, <vscale x 8 x i16>, i32)
+declare <vscale x 4 x i32> @llvm.aarch64.sve.sqshlu.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i32)
+declare <vscale x 2 x i64> @llvm.aarch64.sve.sqshlu.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i32)

From 94908088a831141cfbdd15fc5837dccf30cfeeb6 Mon Sep 17 00:00:00 2001
From: George Burgess IV <george.burgess.iv@gmail.com>
Date: Thu, 16 Apr 2020 10:56:19 -0700
Subject: [PATCH 076/216] [CodeGen] fix inline builtin-related breakage from
 D78162

In cases where we have multiple decls of an inline builtin, we may need
to go hunting for the one with a definition when setting function
attributes.

An additional test-case was provided on
https://github.com/ClangBuiltLinux/linux/issues/979
---
 clang/lib/CodeGen/CodeGenModule.cpp              | 12 +++++++++---
 .../memcpy-no-nobuiltin-if-not-emitted.cpp       | 16 ++++++++++++++++
 2 files changed, 25 insertions(+), 3 deletions(-)
 create mode 100644 clang/test/CodeGen/memcpy-no-nobuiltin-if-not-emitted.cpp

diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 1243ce50ec8d..ce28d741225e 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -1909,9 +1909,15 @@ void CodeGenModule::SetFunctionAttributes(GlobalDecl GD, llvm::Function *F,
      F->setSection(SA->getName());
 
   // If we plan on emitting this inline builtin, we can't treat it as a builtin.
-  if (FD->isInlineBuiltinDeclaration() && shouldEmitFunction(FD)) {
-    F->addAttribute(llvm::AttributeList::FunctionIndex,
-                    llvm::Attribute::NoBuiltin);
+  if (FD->isInlineBuiltinDeclaration()) {
+    const FunctionDecl *FDBody;
+    bool HasBody = FD->hasBody(FDBody);
+    (void)HasBody;
+    assert(HasBody && "Inline builtin declarations should always have an "
+                      "available body!");
+    if (shouldEmitFunction(FDBody))
+      F->addAttribute(llvm::AttributeList::FunctionIndex,
+                      llvm::Attribute::NoBuiltin);
   }
 
   if (FD->isReplaceableGlobalAllocationFunction()) {
diff --git a/clang/test/CodeGen/memcpy-no-nobuiltin-if-not-emitted.cpp b/clang/test/CodeGen/memcpy-no-nobuiltin-if-not-emitted.cpp
new file mode 100644
index 000000000000..d27aa9c53413
--- /dev/null
+++ b/clang/test/CodeGen/memcpy-no-nobuiltin-if-not-emitted.cpp
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -triple i686-linux-gnu -std=c++11 -S -emit-llvm -o - %s | FileCheck %s
+//
+// Regression test for the issue reported at
+// https://reviews.llvm.org/D78162#1986104
+
+typedef unsigned long size_t;
+
+extern "C" __inline__ __attribute__((__gnu_inline__)) void *memcpy(void *a, const void *b, unsigned c) {
+  return __builtin_memcpy(a, b, c);
+}
+void *memcpy(void *, const void *, unsigned);
+
+// CHECK-LABEL: define void @_Z1av
+void a() { (void)memcpy; }
+
+// CHECK-NOT: nobuiltin

From 561cb14e743f98da2d505c74515e2e11b3829366 Mon Sep 17 00:00:00 2001
From: Jaydeep Chauhan <jaydeepchauhan1494@gmail.com>
Date: Thu, 16 Apr 2020 11:10:47 -0700
Subject: [PATCH 077/216] [LLVM] Remove wrong DBG_VALUE instruction with one
 operand in AArch64  test case

Summary:
AArch64 test case llvm/test/CodeGen/AArch64/branch-target-enforcement.mir is checking for invalid  DBG_VALUE instruction with one operand(`DBG_VALUE $lr`). And this DBG_VALUE instruction is echoed from test case it self only.

Correct format of DBG_VALUE is given in below link:
https://llvm.org/docs/SourceLevelDebugging.html#variable-locations-in-instruction-selection-and-mir

Reviewers: dsanders, eli.friedman, jmorse, vsk

Reviewed By: dsanders

Subscribers: kristof.beyls, danielkiss, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D78309
---
 llvm/test/CodeGen/AArch64/branch-target-enforcement.mir | 2 --
 1 file changed, 2 deletions(-)

diff --git a/llvm/test/CodeGen/AArch64/branch-target-enforcement.mir b/llvm/test/CodeGen/AArch64/branch-target-enforcement.mir
index 99da912207d5..8c00caa8e81a 100644
--- a/llvm/test/CodeGen/AArch64/branch-target-enforcement.mir
+++ b/llvm/test/CodeGen/AArch64/branch-target-enforcement.mir
@@ -340,11 +340,9 @@ body:             |
 
     ; CHECK-LABEL: name: debug_ptr_auth
     ; CHECK-NOT: HINT
-    ; CHECK: DBG_VALUE
     ; CHECK: frame-setup PACIASP
     ; CHECK-NOT: HINT
     ; CHECK: RETAA
-    DBG_VALUE $lr
     frame-setup PACIASP implicit-def $lr, implicit killed $lr, implicit $sp
     frame-setup CFI_INSTRUCTION negate_ra_sign_state
     early-clobber $sp = frame-setup STRXpre killed $lr, $sp, -16 :: (store 8 into %stack.0)

From 548d501082cc638e4d1cfd8ae6afdbecb067ef5a Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne@apple.com>
Date: Thu, 16 Apr 2020 15:01:09 -0400
Subject: [PATCH 078/216] [libc++] NFC: Reindent impossible to read conditional
 in __config

---
 libcxx/include/__config | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/libcxx/include/__config b/libcxx/include/__config
index 51b2a64901d9..c9a9e5e28a69 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -755,13 +755,13 @@ typedef __char32_t char32_t;
 #endif
 
 #ifndef _LIBCPP_HAS_MERGED_TYPEINFO_NAMES_DEFAULT
-# ifdef _LIBCPP_OBJECT_FORMAT_COFF // Windows binaries can't merge typeinfos.
-# define _LIBCPP_HAS_MERGED_TYPEINFO_NAMES_DEFAULT 0
-#else
-// TODO: This isn't strictly correct on ELF platforms due to llvm.org/PR37398
-// And we should consider defaulting to OFF.
-# define _LIBCPP_HAS_MERGED_TYPEINFO_NAMES_DEFAULT 1
-#endif
+#  ifdef _LIBCPP_OBJECT_FORMAT_COFF // Windows binaries can't merge typeinfos.
+#    define _LIBCPP_HAS_MERGED_TYPEINFO_NAMES_DEFAULT 0
+#  else
+     // TODO: This isn't strictly correct on ELF platforms due to llvm.org/PR37398
+     // And we should consider defaulting to OFF.
+#    define _LIBCPP_HAS_MERGED_TYPEINFO_NAMES_DEFAULT 1
+#  endif
 #endif
 
 #ifndef _LIBCPP_HIDE_FROM_ABI

From 2b8c6acc3990872800d908b0ddf69229b21db1de Mon Sep 17 00:00:00 2001
From: Amy Huang <akhuang@google.com>
Date: Mon, 13 Apr 2020 11:06:37 -0700
Subject: [PATCH 079/216] Reland "[codeview] Reference types in type parent
 scopes"

Summary:
Original description (https://reviews.llvm/org/D69924)
Without this change, when a nested tag type of any kind (enum, class,
struct, union) is used as a variable type, it is emitted without
emitting the parent type. In CodeView, parent types point to their inner
types, and inner types do not point back to their parents. We already
walk over all of the parent scopes to build the fully qualified name.
This change simply requests their type indices as we go along to enusre
they are all emitted.

Now, while walking over the parent scopes, add the types to
DeferredCompleteTypes, since they might already be in the process of
being emitted.

Fixes PR43905

Reviewers: rnk, amccarth

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D78249
---
 llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp |  34 +++--
 llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h   |   9 ++
 .../test/DebugInfo/COFF/parent-type-scopes.ll | 133 ++++++++++++++++++
 llvm/test/DebugInfo/COFF/unnamed-nested.ll    |  51 +++++++
 4 files changed, 214 insertions(+), 13 deletions(-)
 create mode 100644 llvm/test/DebugInfo/COFF/parent-type-scopes.ll
 create mode 100644 llvm/test/DebugInfo/COFF/unnamed-nested.ll

diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index ca349cbdd0e0..7c2673a2d99a 100644
--- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -310,12 +310,19 @@ static StringRef getPrettyScopeName(const DIScope *Scope) {
   return StringRef();
 }
 
-static const DISubprogram *getQualifiedNameComponents(
+const DISubprogram *CodeViewDebug::collectParentScopeNames(
     const DIScope *Scope, SmallVectorImpl<StringRef> &QualifiedNameComponents) {
   const DISubprogram *ClosestSubprogram = nullptr;
   while (Scope != nullptr) {
     if (ClosestSubprogram == nullptr)
       ClosestSubprogram = dyn_cast<DISubprogram>(Scope);
+
+    // If a type appears in a scope chain, make sure it gets emitted. The
+    // frontend will be responsible for deciding if this should be a forward
+    // declaration or a complete type.
+    if (const auto *Ty = dyn_cast<DICompositeType>(Scope))
+      DeferredCompleteTypes.push_back(Ty);
+
     StringRef ScopeName = getPrettyScopeName(Scope);
     if (!ScopeName.empty())
       QualifiedNameComponents.push_back(ScopeName);
@@ -324,7 +331,7 @@ static const DISubprogram *getQualifiedNameComponents(
   return ClosestSubprogram;
 }
 
-static std::string getQualifiedName(ArrayRef<StringRef> QualifiedNameComponents,
+static std::string formatNestedName(ArrayRef<StringRef> QualifiedNameComponents,
                                     StringRef TypeName) {
   std::string FullyQualifiedName;
   for (StringRef QualifiedNameComponent :
@@ -336,10 +343,16 @@ static std::string getQualifiedName(ArrayRef<StringRef> QualifiedNameComponents,
   return FullyQualifiedName;
 }
 
-static std::string getFullyQualifiedName(const DIScope *Scope, StringRef Name) {
+std::string CodeViewDebug::getFullyQualifiedName(const DIScope *Scope,
+                                                 StringRef Name) {
   SmallVector<StringRef, 5> QualifiedNameComponents;
-  getQualifiedNameComponents(Scope, QualifiedNameComponents);
-  return getQualifiedName(QualifiedNameComponents, Name);
+  collectParentScopeNames(Scope, QualifiedNameComponents);
+  return formatNestedName(QualifiedNameComponents, Name);
+}
+
+std::string CodeViewDebug::getFullyQualifiedName(const DIScope *Ty) {
+  const DIScope *Scope = Ty->getScope();
+  return getFullyQualifiedName(Scope, getPrettyScopeName(Ty));
 }
 
 struct CodeViewDebug::TypeLoweringScope {
@@ -354,11 +367,6 @@ struct CodeViewDebug::TypeLoweringScope {
   CodeViewDebug &CVD;
 };
 
-static std::string getFullyQualifiedName(const DIScope *Ty) {
-  const DIScope *Scope = Ty->getScope();
-  return getFullyQualifiedName(Scope, getPrettyScopeName(Ty));
-}
-
 TypeIndex CodeViewDebug::getScopeIndex(const DIScope *Scope) {
   // No scope means global scope and that uses the zero index.
   if (!Scope || isa<DIFile>(Scope))
@@ -1477,12 +1485,12 @@ void CodeViewDebug::addToUDTs(const DIType *Ty) {
   if (!shouldEmitUdt(Ty))
     return;
 
-  SmallVector<StringRef, 5> QualifiedNameComponents;
+  SmallVector<StringRef, 5> ParentScopeNames;
   const DISubprogram *ClosestSubprogram =
-      getQualifiedNameComponents(Ty->getScope(), QualifiedNameComponents);
+      collectParentScopeNames(Ty->getScope(), ParentScopeNames);
 
   std::string FullyQualifiedName =
-      getQualifiedName(QualifiedNameComponents, getPrettyScopeName(Ty));
+      formatNestedName(ParentScopeNames, getPrettyScopeName(Ty));
 
   if (ClosestSubprogram == nullptr) {
     GlobalUDTs.emplace_back(std::move(FullyQualifiedName), Ty);
diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
index b56b9047e1a9..f690376ada89 100644
--- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
+++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.h
@@ -443,6 +443,15 @@ class LLVM_LIBRARY_VISIBILITY CodeViewDebug : public DebugHandlerBase {
                                                codeview::TypeIndex TI,
                                                const DIType *ClassTy = nullptr);
 
+  /// Collect the names of parent scopes, innermost to outermost. Return the
+  /// innermost subprogram scope if present. Ensure that parent type scopes are
+  /// inserted into the type table.
+  const DISubprogram *
+  collectParentScopeNames(const DIScope *Scope,
+                          SmallVectorImpl<StringRef> &ParentScopeNames);
+  std::string getFullyQualifiedName(const DIScope *Scope, StringRef Name);
+  std::string getFullyQualifiedName(const DIScope *Scope);
+
   unsigned getPointerSizeInBytes();
 
 protected:
diff --git a/llvm/test/DebugInfo/COFF/parent-type-scopes.ll b/llvm/test/DebugInfo/COFF/parent-type-scopes.ll
new file mode 100644
index 000000000000..9d1ba6d51db5
--- /dev/null
+++ b/llvm/test/DebugInfo/COFF/parent-type-scopes.ll
@@ -0,0 +1,133 @@
+; RUN: llc < %s -filetype=obj -o %t.o
+; RUN: llvm-pdbutil dump -types %t.o | FileCheck %s
+
+; C++ source:
+; // Note that MSVC doesn't emit anything about WrapTypedef or WrapTypedef::Inner!
+; struct WrapTypedef {
+;   typedef int Inner;
+; };
+; struct WrapStruct {
+;   struct Inner { int x; };
+; };
+; struct WrapClass {
+;   class Inner { public: int x; };
+; };
+; struct WrapEnum {
+;   enum Inner { One, Two };
+; };
+; struct WrapUnion {
+;   union Inner { int x; float y; };
+; };
+; void useInnerTypes() {
+;   WrapTypedef::Inner v1;
+;   WrapStruct::Inner v2;
+;   WrapClass::Inner v3;
+;   WrapEnum::Inner v4;
+;   WrapUnion::Inner v5;
+; }
+
+; There should be two LF_STRUCTURE records for each wrapped type, forward decl
+; and complete type. For every inner record type, there should be two. Enums
+; don't get forward decls.
+
+; CHECK-DAG: | LF_STRUCTURE {{.*}} `WrapStruct`
+; CHECK-DAG: | LF_STRUCTURE {{.*}} `WrapStruct`
+; CHECK-DAG: | LF_STRUCTURE {{.*}} `WrapStruct::Inner`
+; CHECK-DAG: | LF_STRUCTURE {{.*}} `WrapStruct::Inner`
+; CHECK-DAG: | LF_STRUCTURE {{.*}} `WrapClass`
+; CHECK-DAG: | LF_STRUCTURE {{.*}} `WrapClass`
+; CHECK-DAG: | LF_CLASS {{.*}} `WrapClass::Inner`
+; CHECK-DAG: | LF_CLASS {{.*}} `WrapClass::Inner`
+; CHECK-DAG: | LF_STRUCTURE {{.*}} `WrapEnum`
+; CHECK-DAG: | LF_STRUCTURE {{.*}} `WrapEnum`
+; CHECK-DAG: | LF_ENUM {{.*}} `WrapEnum::Inner`
+; CHECK-DAG: | LF_STRUCTURE {{.*}} `WrapUnion`
+; CHECK-DAG: | LF_STRUCTURE {{.*}} `WrapUnion`
+; CHECK-DAG: | LF_UNION {{.*}} `WrapUnion::Inner`
+; CHECK-DAG: | LF_UNION {{.*}} `WrapUnion::Inner`
+
+; ModuleID = 't.cpp'
+source_filename = "t.cpp"
+target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc19.23.28106"
+
+%"struct.WrapStruct::Inner" = type { i32 }
+%"class.WrapClass::Inner" = type { i32 }
+%"union.WrapUnion::Inner" = type { i32 }
+
+; Function Attrs: noinline nounwind optnone uwtable
+define dso_local void @"?useInnerTypes@@YAXXZ"() #0 !dbg !15 {
+entry:
+  %v1 = alloca i32, align 4
+  %v2 = alloca %"struct.WrapStruct::Inner", align 4
+  %v3 = alloca %"class.WrapClass::Inner", align 4
+  %v4 = alloca i32, align 4
+  %v5 = alloca %"union.WrapUnion::Inner", align 4
+  call void @llvm.dbg.declare(metadata i32* %v1, metadata !19, metadata !DIExpression()), !dbg !23
+  call void @llvm.dbg.declare(metadata %"struct.WrapStruct::Inner"* %v2, metadata !24, metadata !DIExpression()), !dbg !30
+  call void @llvm.dbg.declare(metadata %"class.WrapClass::Inner"* %v3, metadata !31, metadata !DIExpression()), !dbg !37
+  call void @llvm.dbg.declare(metadata i32* %v4, metadata !38, metadata !DIExpression()), !dbg !39
+  call void @llvm.dbg.declare(metadata %"union.WrapUnion::Inner"* %v5, metadata !40, metadata !DIExpression()), !dbg !48
+  ret void, !dbg !49
+}
+
+; Function Attrs: nounwind readnone speculatable willreturn
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+attributes #0 = { noinline nounwind optnone uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="none" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone speculatable willreturn }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!10, !11, !12, !13}
+!llvm.ident = !{!14}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 10.0.0 (git@github.com:llvm/llvm-project.git a8ccb48f697d3fbe85c593248ff1053fdf522a6e)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, nameTableKind: None)
+!1 = !DIFile(filename: "t.cpp", directory: "C:\\src\\llvm-project\\build", checksumkind: CSK_MD5, checksum: "4228f12f516cd3d6dd76462be09ec111")
+!2 = !{!3, !3}
+!3 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "Inner", scope: !4, file: !1, line: 11, baseType: !6, size: 32, elements: !7, identifier: ".?AW4Inner@WrapEnum@@")
+!4 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "WrapEnum", file: !1, line: 10, size: 8, flags: DIFlagTypePassByValue, elements: !5, identifier: ".?AUWrapEnum@@")
+!5 = !{!3}
+!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!7 = !{!8, !9}
+!8 = !DIEnumerator(name: "One", value: 0)
+!9 = !DIEnumerator(name: "Two", value: 1)
+!10 = !{i32 2, !"CodeView", i32 1}
+!11 = !{i32 2, !"Debug Info Version", i32 3}
+!12 = !{i32 1, !"wchar_size", i32 2}
+!13 = !{i32 7, !"PIC Level", i32 2}
+!14 = !{!"clang version 10.0.0 (git@github.com:llvm/llvm-project.git a8ccb48f697d3fbe85c593248ff1053fdf522a6e)"}
+!15 = distinct !DISubprogram(name: "useInnerTypes", linkageName: "?useInnerTypes@@YAXXZ", scope: !1, file: !1, line: 16, type: !16, scopeLine: 16, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !18)
+!16 = !DISubroutineType(types: !17)
+!17 = !{null}
+!18 = !{}
+!19 = !DILocalVariable(name: "v1", scope: !15, file: !1, line: 17, type: !20)
+!20 = !DIDerivedType(tag: DW_TAG_typedef, name: "Inner", scope: !21, file: !1, line: 2, baseType: !6)
+!21 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "WrapTypedef", file: !1, line: 1, size: 8, flags: DIFlagTypePassByValue, elements: !22, identifier: ".?AUWrapTypedef@@")
+!22 = !{!20}
+!23 = !DILocation(line: 17, scope: !15)
+!24 = !DILocalVariable(name: "v2", scope: !15, file: !1, line: 18, type: !25)
+!25 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "Inner", scope: !26, file: !1, line: 5, size: 32, flags: DIFlagTypePassByValue, elements: !28, identifier: ".?AUInner@WrapStruct@@")
+!26 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "WrapStruct", file: !1, line: 4, size: 8, flags: DIFlagTypePassByValue, elements: !27, identifier: ".?AUWrapStruct@@")
+!27 = !{!25}
+!28 = !{!29}
+!29 = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: !25, file: !1, line: 5, baseType: !6, size: 32)
+!30 = !DILocation(line: 18, scope: !15)
+!31 = !DILocalVariable(name: "v3", scope: !15, file: !1, line: 19, type: !32)
+!32 = distinct !DICompositeType(tag: DW_TAG_class_type, name: "Inner", scope: !33, file: !1, line: 8, size: 32, flags: DIFlagTypePassByValue, elements: !35, identifier: ".?AVInner@WrapClass@@")
+!33 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "WrapClass", file: !1, line: 7, size: 8, flags: DIFlagTypePassByValue, elements: !34, identifier: ".?AUWrapClass@@")
+!34 = !{!32}
+!35 = !{!36}
+!36 = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: !32, file: !1, line: 8, baseType: !6, size: 32, flags: DIFlagPublic)
+!37 = !DILocation(line: 19, scope: !15)
+!38 = !DILocalVariable(name: "v4", scope: !15, file: !1, line: 20, type: !3)
+!39 = !DILocation(line: 20, scope: !15)
+!40 = !DILocalVariable(name: "v5", scope: !15, file: !1, line: 21, type: !41)
+!41 = distinct !DICompositeType(tag: DW_TAG_union_type, name: "Inner", scope: !42, file: !1, line: 14, size: 32, flags: DIFlagTypePassByValue, elements: !44, identifier: ".?ATInner@WrapUnion@@")
+!42 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "WrapUnion", file: !1, line: 13, size: 8, flags: DIFlagTypePassByValue, elements: !43, identifier: ".?AUWrapUnion@@")
+!43 = !{!41}
+!44 = !{!45, !46}
+!45 = !DIDerivedType(tag: DW_TAG_member, name: "x", scope: !41, file: !1, line: 14, baseType: !6, size: 32)
+!46 = !DIDerivedType(tag: DW_TAG_member, name: "y", scope: !41, file: !1, line: 14, baseType: !47, size: 32)
+!47 = !DIBasicType(name: "float", size: 32, encoding: DW_ATE_float)
+!48 = !DILocation(line: 21, scope: !15)
+!49 = !DILocation(line: 22, scope: !15)
diff --git a/llvm/test/DebugInfo/COFF/unnamed-nested.ll b/llvm/test/DebugInfo/COFF/unnamed-nested.ll
new file mode 100644
index 000000000000..6cb8671e299b
--- /dev/null
+++ b/llvm/test/DebugInfo/COFF/unnamed-nested.ll
@@ -0,0 +1,51 @@
+; RUN: llc < %s -filetype=obj -o %t.o
+; RUN: llvm-pdbutil dump -types %t.o | FileCheck %s
+
+; C source to regenerate:
+; $ clang -g -gcodeview -S -emit-llvm t.c
+; $ cat t.c
+; struct {
+;   union {
+;     struct {};
+;   };
+; } S;
+
+; Test that this compiles without errors.
+
+; CHECK: LF_STRUCTURE{{.*}}<unnamed-tag>::<unnamed-tag>::<unnamed-tag>
+; CHECK: LF_UNION{{.*}}<unnamed-tag>::<unnamed-tag>
+; CHECK: LF_STRUCTURE{{.*}}<unnamed-tag>
+
+; ModuleID = 't.c'
+source_filename = "t.c"
+target datalayout = "e-m:w-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc19.24.28316"
+
+%struct.anon = type { %union.anon }
+%union.anon = type { %struct.anon.0 }
+%struct.anon.0 = type { [4 x i8] }
+
+@S = dso_local global %struct.anon zeroinitializer, align 1, !dbg !0
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!13, !14, !15, !16}
+!llvm.ident = !{!17}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = distinct !DIGlobalVariable(name: "S", scope: !2, file: !3, line: 5, type: !6, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 11.0.0 (https://github.com/llvm/llvm-project.git 60d09bec7f8699728d38057430422d955d32a904)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5, nameTableKind: None)
+!3 = !DIFile(filename: "t.c", directory: "C:\\src\\llvm-build", checksumkind: CSK_MD5, checksum: "c31fe86676dd2fb56f847f926c0f2c71")
+!4 = !{}
+!5 = !{!0}
+!6 = distinct !DICompositeType(tag: DW_TAG_structure_type, file: !3, line: 1, size: 32, elements: !7)
+!7 = !{!8, !12}
+!8 = distinct !DICompositeType(tag: DW_TAG_union_type, scope: !6, file: !3, line: 2, size: 32, elements: !9)
+!9 = !{!10, !11}
+!10 = distinct !DICompositeType(tag: DW_TAG_structure_type, scope: !8, file: !3, line: 3, size: 32, elements: !4)
+!11 = !DIDerivedType(tag: DW_TAG_member, scope: !8, file: !3, line: 3, baseType: !10, size: 32)
+!12 = !DIDerivedType(tag: DW_TAG_member, scope: !6, file: !3, line: 2, baseType: !8, size: 32)
+!13 = !{i32 2, !"CodeView", i32 1}
+!14 = !{i32 2, !"Debug Info Version", i32 3}
+!15 = !{i32 1, !"wchar_size", i32 2}
+!16 = !{i32 7, !"PIC Level", i32 2}
+!17 = !{!"clang version 11.0.0 (https://github.com/llvm/llvm-project.git 60d09bec7f8699728d38057430422d955d32a904)"}

From 86478d3de91a81978c2c310fda13f04541cd3b23 Mon Sep 17 00:00:00 2001
From: bd1976llvm <Ben.Dunbobbin@sony.com>
Date: Thu, 16 Apr 2020 10:45:31 +0000
Subject: [PATCH 080/216] [MC][ELF] Put explicit section name symbols into
 entry size compatible sections

Ensure that symbols explicitly* assigned a section name are placed into
a section with a compatible entry size.

This is done by creating multiple sections with the same name** if
incompatible symbols are explicitly given the name of an incompatible
section, whilst:

  - Avoiding using uniqued sections where possible (for readability and
    to maximize compatibly with assemblers).

  - Creating as few SHF_MERGE sections as possible (for efficiency).

Given that each symbol is assigned to a section in a single pass, we
must decide which section each symbol is assigned to without seeing the
properties of all symbols. A stable and easy to understand assignment is
desirable. The following rules facilitate this: The "generic" section
for a given section name will be mergeable if the name is a mergeable
"default" section name (such as .debug_str), a mergeable "implicit"
section name (such as .rodata.str2.2), or MC has already created a
mergeable "generic" section for the given section name (e.g. in response
to a section directive in inline assembly). Otherwise, the "generic"
section for a given name is non-mergeable; and, non-mergeable symbols
are assigned to the "generic" section, while mergeable symbols are
assigned to uniqued sections.

Terminology:
"default" sections are those always created by MC initially, e.g. .text
or .debug_str.

"implicit" sections are those created normally by MC in response to the
symbols that it encounters, i.e. in the absence of an explicit section
name assignment on the symbol, e.g. a function foo might be placed into
a .text.foo section.

"generic" sections are those that are referred to when a unique section
ID is not supplied, e.g. if there are multiple unique .bob sections then
".quad .bob" will reference the generic .bob section. Typically, the
generic section is just the first section of a given name to be created.
Default sections are always generic.

* Typically, section names might be explicitly assigned in source code
using a language extension e.g. a section attribute: _attribute_
((section ("section-name"))) -
https://clang.llvm.org/docs/AttributeReference.html

** I refer to such sections as unique/uniqued sections. In assembly the
", unique," assembly syntax is used to express such sections.

Fixes https://bugs.llvm.org/show_bug.cgi?id=43457.

See https://reviews.llvm.org/D68101 for previous discussions leading to
this patch.

Some minor fixes were required to LLVM's tests, for tests had been using
the old behavior - which allowed for explicitly assigning globals with
incompatible entry sizes to a section.

This fix relies on the ",unique ," assembly feature. This feature is not
available until bintuils version 2.35
(https://sourceware.org/bugzilla/show_bug.cgi?id=25380). If the
integrated assembler is not being used then we avoid using this feature
for compatibility and instead try to place mergeable symbols into
non-mergeable sections or issue an error otherwise.

Differential Revision: https://reviews.llvm.org/D72194
---
 .../CodeGen/cfstring-elf-sections-x86_64.c    |   4 +-
 llvm/include/llvm/IR/DiagnosticInfo.h         |   1 +
 llvm/include/llvm/MC/MCContext.h              |  43 +++
 .../CodeGen/TargetLoweringObjectFileImpl.cpp  | 179 ++++++++---
 llvm/lib/MC/MCContext.cpp                     |  41 +++
 .../CodeGen/X86/explicit-section-mergeable.ll | 296 ++++++++++++++++++
 .../LegacyRTDyldObjectLinkingLayerTest.cpp    |  10 +-
 .../Orc/RTDyldObjectLinkingLayerTest.cpp      |  10 +-
 8 files changed, 529 insertions(+), 55 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/explicit-section-mergeable.ll

diff --git a/clang/test/CodeGen/cfstring-elf-sections-x86_64.c b/clang/test/CodeGen/cfstring-elf-sections-x86_64.c
index 439113a4e855..3e300dadc5fa 100644
--- a/clang/test/CodeGen/cfstring-elf-sections-x86_64.c
+++ b/clang/test/CodeGen/cfstring-elf-sections-x86_64.c
@@ -7,12 +7,12 @@ const CFStringRef one = (CFStringRef)__builtin___CFStringMakeConstantString("one
 const CFStringRef two = (CFStringRef)__builtin___CFStringMakeConstantString("\xef\xbf\xbd\x74\xef\xbf\xbd\x77\xef\xbf\xbd\x6f");
 
 // CHECK-ELF-DATA-SECTION: .type .L.str,@object
-// CHECK-ELF-DATA-SECTION: .section .rodata,"a",@progbits
+// CHECK-ELF-DATA-SECTION: .section .rodata,"aMS",@progbits,1,unique,1
 // CHECK-ELF-DATA-SECTION: .L.str:
 // CHECK-ELF-DATA-SECTION: .asciz "one"
 
 // CHECK-ELF-DATA-SECTION: .type .L.str.1,@object
-// CHECK-ELF-DATA-SECTION: .section .rodata,"a",@progbits
+// CHECK-ELF-DATA-SECTION: .section .rodata,"aMS",@progbits,2,unique,2
 // CHECK-ELF-DATA-SECTION: .L.str.1:
 // CHECK-ELF-DATA-SECTION: .short 65533
 // CHECK-ELF-DATA-SECTION: .short 116
diff --git a/llvm/include/llvm/IR/DiagnosticInfo.h b/llvm/include/llvm/IR/DiagnosticInfo.h
index 44db0eacca1a..a8e8a7915b2a 100644
--- a/llvm/include/llvm/IR/DiagnosticInfo.h
+++ b/llvm/include/llvm/IR/DiagnosticInfo.h
@@ -55,6 +55,7 @@ enum DiagnosticKind {
   DK_ResourceLimit,
   DK_StackSize,
   DK_Linker,
+  DK_Lowering,
   DK_DebugMetadataVersion,
   DK_DebugMetadataInvalid,
   DK_ISelFallback,
diff --git a/llvm/include/llvm/MC/MCContext.h b/llvm/include/llvm/MC/MCContext.h
index 9e02bd1a4be0..b1fa8e7a1d0e 100644
--- a/llvm/include/llvm/MC/MCContext.h
+++ b/llvm/include/llvm/MC/MCContext.h
@@ -18,6 +18,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/BinaryFormat/Dwarf.h"
+#include "llvm/BinaryFormat/ELF.h"
 #include "llvm/BinaryFormat/XCOFF.h"
 #include "llvm/MC/MCAsmMacro.h"
 #include "llvm/MC/MCDwarf.h"
@@ -307,6 +308,37 @@ namespace llvm {
     /// Map of currently defined macros.
     StringMap<MCAsmMacro> MacroMap;
 
+    struct ELFEntrySizeKey {
+      std::string SectionName;
+      unsigned Flags;
+      unsigned EntrySize;
+
+      ELFEntrySizeKey(StringRef SectionName, unsigned Flags, unsigned EntrySize)
+          : SectionName(SectionName), Flags(Flags), EntrySize(EntrySize) {}
+
+      bool operator<(const ELFEntrySizeKey &Other) const {
+        if (SectionName != Other.SectionName)
+          return SectionName < Other.SectionName;
+        if ((Flags & ELF::SHF_STRINGS) != (Other.Flags & ELF::SHF_STRINGS))
+          return Other.Flags & ELF::SHF_STRINGS;
+        return EntrySize < Other.EntrySize;
+      }
+    };
+
+    // Symbols must be assigned to a section with a compatible entry
+    // size. This map is used to assign unique IDs to sections to
+    // distinguish between sections with identical names but incompatible entry
+    // sizes. This can occur when a symbol is explicitly assigned to a
+    // section, e.g. via __attribute__((section("myname"))).
+    std::map<ELFEntrySizeKey, unsigned> ELFEntrySizeMap;
+
+    // This set is used to record the generic mergeable section names seen.
+    // These are sections that are created as mergeable e.g. .debug_str. We need
+    // to avoid assigning non-mergeable symbols to these sections. It is used
+    // to prevent non-mergeable symbols being explicitly assigned  to mergeable
+    // sections (e.g. via _attribute_((section("myname")))).
+    DenseSet<StringRef> ELFSeenGenericMergeableSections;
+
   public:
     explicit MCContext(const MCAsmInfo *MAI, const MCRegisterInfo *MRI,
                        const MCObjectFileInfo *MOFI,
@@ -466,6 +498,17 @@ namespace llvm {
 
     MCSectionELF *createELFGroupSection(const MCSymbolELF *Group);
 
+    void recordELFMergeableSectionInfo(StringRef SectionName, unsigned Flags,
+                                       unsigned UniqueID, unsigned EntrySize);
+
+    bool isELFImplicitMergeableSectionNamePrefix(StringRef Name);
+
+    bool isELFGenericMergeableSection(StringRef Name);
+
+    Optional<unsigned> getELFUniqueIDForEntsize(StringRef SectionName,
+                                                unsigned Flags,
+                                                unsigned EntrySize);
+
     MCSectionCOFF *getCOFFSection(StringRef Section, unsigned Characteristics,
                                   SectionKind Kind, StringRef COMDATSymName,
                                   int Selection,
diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 0acea0871dab..a5f380168c10 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -29,6 +29,8 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/DiagnosticInfo.h"
+#include "llvm/IR/DiagnosticPrinter.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalAlias.h"
 #include "llvm/IR/GlobalObject.h"
@@ -568,6 +570,71 @@ static unsigned getEntrySizeForKind(SectionKind Kind) {
   }
 }
 
+/// Return the section prefix name used by options FunctionsSections and
+/// DataSections.
+static StringRef getSectionPrefixForGlobal(SectionKind Kind) {
+  if (Kind.isText())
+    return ".text";
+  if (Kind.isReadOnly())
+    return ".rodata";
+  if (Kind.isBSS())
+    return ".bss";
+  if (Kind.isThreadData())
+    return ".tdata";
+  if (Kind.isThreadBSS())
+    return ".tbss";
+  if (Kind.isData())
+    return ".data";
+  if (Kind.isReadOnlyWithRel())
+    return ".data.rel.ro";
+  llvm_unreachable("Unknown section kind");
+}
+
+static SmallString<128>
+getELFSectionNameForGlobal(const GlobalObject *GO, SectionKind Kind,
+                           Mangler &Mang, const TargetMachine &TM,
+                           unsigned EntrySize, bool UniqueSectionName) {
+  SmallString<128> Name;
+  if (Kind.isMergeableCString()) {
+    // We also need alignment here.
+    // FIXME: this is getting the alignment of the character, not the
+    // alignment of the global!
+    unsigned Align = GO->getParent()->getDataLayout().getPreferredAlignment(
+        cast<GlobalVariable>(GO));
+
+    std::string SizeSpec = ".rodata.str" + utostr(EntrySize) + ".";
+    Name = SizeSpec + utostr(Align);
+  } else if (Kind.isMergeableConst()) {
+    Name = ".rodata.cst";
+    Name += utostr(EntrySize);
+  } else {
+    Name = getSectionPrefixForGlobal(Kind);
+  }
+
+  if (const auto *F = dyn_cast<Function>(GO)) {
+    if (Optional<StringRef> Prefix = F->getSectionPrefix())
+      Name += *Prefix;
+  }
+
+  if (UniqueSectionName) {
+    Name.push_back('.');
+    TM.getNameWithPrefix(Name, GO, Mang, /*MayAlwaysUsePrivate*/true);
+  }
+  return Name;
+}
+
+namespace {
+class LoweringDiagnosticInfo : public DiagnosticInfo {
+  const Twine &Msg;
+
+public:
+  LoweringDiagnosticInfo(const Twine &DiagMsg,
+                         DiagnosticSeverity Severity = DS_Error)
+      : DiagnosticInfo(DK_Lowering, Severity), Msg(DiagMsg) {}
+  void print(DiagnosticPrinter &DP) const override { DP << Msg; }
+};
+}
+
 MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
     const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
   StringRef SectionName = GO->getSection();
@@ -603,6 +670,8 @@ MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
     Flags |= ELF::SHF_GROUP;
   }
 
+  unsigned EntrySize = getEntrySizeForKind(Kind);
+
   // A section can have at most one associated section. Put each global with
   // MD_associated in a unique section.
   unsigned UniqueID = MCContext::GenericSectionID;
@@ -610,35 +679,75 @@ MCSection *TargetLoweringObjectFileELF::getExplicitSectionGlobal(
   if (LinkedToSym) {
     UniqueID = NextUniqueID++;
     Flags |= ELF::SHF_LINK_ORDER;
+  } else {
+    if (getContext().getAsmInfo()->useIntegratedAssembler()) {
+      // Symbols must be placed into sections with compatible entry
+      // sizes. Generate unique sections for symbols that have not
+      // been assigned to compatible sections.
+      if (Flags & ELF::SHF_MERGE) {
+        auto maybeID = getContext().getELFUniqueIDForEntsize(SectionName, Flags,
+                                                             EntrySize);
+        if (maybeID)
+          UniqueID = *maybeID;
+        else {
+          // If the user has specified the same section name as would be created
+          // implicitly for this symbol e.g. .rodata.str1.1, then we don't need
+          // to unique the section as the entry size for this symbol will be
+          // compatible with implicitly created sections.
+          SmallString<128> ImplicitSectionNameStem = getELFSectionNameForGlobal(
+              GO, Kind, getMangler(), TM, EntrySize, false);
+          if (!(getContext().isELFImplicitMergeableSectionNamePrefix(
+                    SectionName) &&
+                SectionName.startswith(ImplicitSectionNameStem)))
+            UniqueID = NextUniqueID++;
+        }
+      } else {
+        // We need to unique the section if the user has explicity
+        // assigned a non-mergeable symbol to a section name for
+        // a generic mergeable section.
+        if (getContext().isELFGenericMergeableSection(SectionName)) {
+          auto maybeID = getContext().getELFUniqueIDForEntsize(
+              SectionName, Flags, EntrySize);
+          UniqueID = maybeID ? *maybeID : NextUniqueID++;
+        }
+      }
+    } else {
+      // If two symbols with differing sizes end up in the same mergeable
+      // section that section can be assigned an incorrect entry size. To avoid
+      // this we usually put symbols of the same size into distinct mergeable
+      // sections with the same name. Doing so relies on the ",unique ,"
+      // assembly feature. This feature is not avalible until bintuils
+      // version 2.35 (https://sourceware.org/bugzilla/show_bug.cgi?id=25380).
+      Flags &= ~ELF::SHF_MERGE;
+      EntrySize = 0;
+    }
   }
 
   MCSectionELF *Section = getContext().getELFSection(
       SectionName, getELFSectionType(SectionName, Kind), Flags,
-      getEntrySizeForKind(Kind), Group, UniqueID, LinkedToSym);
+      EntrySize, Group, UniqueID, LinkedToSym);
   // Make sure that we did not get some other section with incompatible sh_link.
   // This should not be possible due to UniqueID code above.
   assert(Section->getLinkedToSymbol() == LinkedToSym &&
          "Associated symbol mismatch between sections");
-  return Section;
-}
 
-/// Return the section prefix name used by options FunctionsSections and
-/// DataSections.
-static StringRef getSectionPrefixForGlobal(SectionKind Kind) {
-  if (Kind.isText())
-    return ".text";
-  if (Kind.isReadOnly())
-    return ".rodata";
-  if (Kind.isBSS())
-    return ".bss";
-  if (Kind.isThreadData())
-    return ".tdata";
-  if (Kind.isThreadBSS())
-    return ".tbss";
-  if (Kind.isData())
-    return ".data";
-  assert(Kind.isReadOnlyWithRel() && "Unknown section kind");
-  return ".data.rel.ro";
+  if (!getContext().getAsmInfo()->useIntegratedAssembler()) {
+    // If we are not using the integrated assembler then this symbol might have
+    // been placed in an incompatible mergeable section. Emit an error if this
+    // is the case to avoid creating broken output.
+    if ((Section->getFlags() & ELF::SHF_MERGE) &&
+        (Section->getEntrySize() != getEntrySizeForKind(Kind)))
+      GO->getContext().diagnose(LoweringDiagnosticInfo(
+          "Symbol '" + GO->getName() + "' from module '" +
+          (GO->getParent() ? GO->getParent()->getSourceFileName() : "unknown") +
+          "' required a section with entry-size=" +
+          Twine(getEntrySizeForKind(Kind)) + " but was placed in section '" +
+          SectionName + "' with entry-size=" + Twine(Section->getEntrySize()) +
+          ": Explicit assignment by pragma or attribute of an incompatible "
+          "symbol to this section?"));
+  }
+
+  return Section;
 }
 
 static MCSectionELF *selectELFSectionForGlobal(
@@ -655,39 +764,19 @@ static MCSectionELF *selectELFSectionForGlobal(
   // Get the section entry size based on the kind.
   unsigned EntrySize = getEntrySizeForKind(Kind);
 
-  SmallString<128> Name;
-  if (Kind.isMergeableCString()) {
-    // We also need alignment here.
-    // FIXME: this is getting the alignment of the character, not the
-    // alignment of the global!
-    unsigned Align = GO->getParent()->getDataLayout().getPreferredAlignment(
-        cast<GlobalVariable>(GO));
-
-    std::string SizeSpec = ".rodata.str" + utostr(EntrySize) + ".";
-    Name = SizeSpec + utostr(Align);
-  } else if (Kind.isMergeableConst()) {
-    Name = ".rodata.cst";
-    Name += utostr(EntrySize);
-  } else {
-    Name = getSectionPrefixForGlobal(Kind);
-  }
-
-  if (const auto *F = dyn_cast<Function>(GO)) {
-    const auto &OptionalPrefix = F->getSectionPrefix();
-    if (OptionalPrefix)
-      Name += *OptionalPrefix;
-  }
-
+  bool UniqueSectionName = false;
   unsigned UniqueID = MCContext::GenericSectionID;
   if (EmitUniqueSection) {
     if (TM.getUniqueSectionNames()) {
-      Name.push_back('.');
-      TM.getNameWithPrefix(Name, GO, Mang, true /*MayAlwaysUsePrivate*/);
+      UniqueSectionName = true;
     } else {
       UniqueID = *NextUniqueID;
       (*NextUniqueID)++;
     }
   }
+  SmallString<128> Name = getELFSectionNameForGlobal(
+      GO, Kind, Mang, TM, EntrySize, UniqueSectionName);
+
   // Use 0 as the unique ID for execute-only text.
   if (Kind.isExecuteOnly())
     UniqueID = 0;
diff --git a/llvm/lib/MC/MCContext.cpp b/llvm/lib/MC/MCContext.cpp
index f68320fa9ada..1bc313553aff 100644
--- a/llvm/lib/MC/MCContext.cpp
+++ b/llvm/lib/MC/MCContext.cpp
@@ -114,6 +114,9 @@ void MCContext::reset() {
   WasmUniquingMap.clear();
   XCOFFUniquingMap.clear();
 
+  ELFEntrySizeMap.clear();
+  ELFSeenGenericMergeableSections.clear();
+
   NextID.clear();
   AllowTemporaryLabels = true;
   DwarfLocSeen = false;
@@ -441,6 +444,10 @@ MCSectionELF *MCContext::getELFSection(const Twine &Section, unsigned Type,
       createELFSectionImpl(CachedName, Type, Flags, Kind, EntrySize, GroupSym,
                            UniqueID, LinkedToSym);
   Entry.second = Result;
+
+  recordELFMergeableSectionInfo(Result->getName(), Result->getFlags(),
+                                Result->getUniqueID(), Result->getEntrySize());
+
   return Result;
 }
 
@@ -450,6 +457,40 @@ MCSectionELF *MCContext::createELFGroupSection(const MCSymbolELF *Group) {
                               MCSection::NonUniqueID, nullptr);
 }
 
+void MCContext::recordELFMergeableSectionInfo(StringRef SectionName,
+                                              unsigned Flags, unsigned UniqueID,
+                                              unsigned EntrySize) {
+  bool IsMergeable = Flags & ELF::SHF_MERGE;
+  if (IsMergeable && (UniqueID == GenericSectionID))
+    ELFSeenGenericMergeableSections.insert(SectionName);
+
+  // For mergeable sections or non-mergeable sections with a generic mergeable
+  // section name we enter their Unique ID into the ELFEntrySizeMap so that
+  // compatible globals can be assigned to the same section.
+  if (IsMergeable || isELFGenericMergeableSection(SectionName)) {
+    ELFEntrySizeMap.insert(std::make_pair(
+        ELFEntrySizeKey{SectionName, Flags, EntrySize}, UniqueID));
+  }
+}
+
+bool MCContext::isELFImplicitMergeableSectionNamePrefix(StringRef SectionName) {
+  return SectionName.startswith(".rodata.str") ||
+         SectionName.startswith(".rodata.cst");
+}
+
+bool MCContext::isELFGenericMergeableSection(StringRef SectionName) {
+  return isELFImplicitMergeableSectionNamePrefix(SectionName) ||
+         ELFSeenGenericMergeableSections.count(SectionName);
+}
+
+Optional<unsigned> MCContext::getELFUniqueIDForEntsize(StringRef SectionName,
+                                                       unsigned Flags,
+                                                       unsigned EntrySize) {
+  auto I = ELFEntrySizeMap.find(
+      MCContext::ELFEntrySizeKey{SectionName, Flags, EntrySize});
+  return (I != ELFEntrySizeMap.end()) ? Optional<unsigned>(I->second) : None;
+}
+
 MCSectionCOFF *MCContext::getCOFFSection(StringRef Section,
                                          unsigned Characteristics,
                                          SectionKind Kind,
diff --git a/llvm/test/CodeGen/X86/explicit-section-mergeable.ll b/llvm/test/CodeGen/X86/explicit-section-mergeable.ll
new file mode 100644
index 000000000000..2b60a19247df
--- /dev/null
+++ b/llvm/test/CodeGen/X86/explicit-section-mergeable.ll
@@ -0,0 +1,296 @@
+; RUN: llc < %s -mtriple=x86_64 -unique-section-names=0 -data-sections 2>&1 \
+; RUN:     | FileCheck %s
+
+;; Several sections are created via inline assembly. We add checks
+;; for these lines as we want to use --implicit-check-not to reduce the
+;; number of checks in this file.
+; CHECK: .section .asm_mergeable1,"aMS",@progbits,2
+; CHECK-NEXT: .section .asm_nonmergeable1,"a",@progbits
+; CHECK-NEXT: .section .asm_mergeable2,"aMS",@progbits,2
+; CHECK-NEXT: .section .asm_nonmergeable2,"a",@progbits
+
+;; Test implicit section assignment for symbols
+; CHECK: .section .data,"aw",@progbits,unique,1
+; CHECK: uniquified:
+
+;; Create a uniquified symbol (as -unique-section-names=0) to test the uniqueID
+;; interaction with mergeable symbols.
+@uniquified = global i32 1
+
+;; Test implicit section assignment for symbols to ensure that the symbols
+;; have the expected properties.
+; CHECK: .section .rodata,"a",@progbits,unique,2
+; CHECK: implicit_nonmergeable:
+; CHECK: .section .rodata.cst4,"aM",@progbits,4
+; CHECK: implicit_rodata_cst4:
+; CHECK: .section .rodata.cst8,"aM",@progbits,8
+; CHECK: implicit_rodata_cst8:
+; CHECK: .section .rodata.str4.4,"aMS",@progbits,4
+; CHECK: implicit_rodata_str4_4:
+
+@implicit_nonmergeable  =              constant [2 x i16] [i16 1, i16 1]
+@implicit_rodata_cst4   = unnamed_addr constant [2 x i16] [i16 1, i16 1]
+@implicit_rodata_cst8   = unnamed_addr constant [2 x i32] [i32 1, i32 1]
+@implicit_rodata_str4_4 = unnamed_addr constant [2 x i32] [i32 1, i32 0]
+
+;; Basic checks that mergeable globals are placed into multiple distinct
+;; sections with the same name and a compatible entry size.
+
+; CHECK: .section .explicit_basic,"aM",@progbits,4,unique,3
+; CHECK: explicit_basic_1:
+; CHECK: explicit_basic_2:
+
+;; Assign a mergeable global to a non-existing section.
+@explicit_basic_1 = unnamed_addr constant [2 x i16] [i16 1, i16 1], section ".explicit_basic"
+;; Assign a compatible mergeable global to the previous section.
+@explicit_basic_2 = unnamed_addr constant [2 x i16] [i16 1, i16 1], section ".explicit_basic"
+
+; CHECK: .section .explicit_basic,"aM",@progbits,8,unique,4
+; CHECK: explicit_basic_3:
+; CHECK: explicit_basic_4:
+
+;; Assign a symbol with an incompatible entsize (different size) to a section with the same name.
+@explicit_basic_3 = unnamed_addr constant [2 x i32] [i32 1, i32 1], section ".explicit_basic"
+;; Assign a compatible mergeable global to the previous section.
+@explicit_basic_4 = unnamed_addr constant [2 x i32] [i32 1, i32 1], section ".explicit_basic"
+
+; CHECK: .section .explicit_basic,"aMS",@progbits,4,unique,5
+; CHECK: explicit_basic_5:
+; CHECK: explicit_basic_6:
+
+;; Assign a symbol with an incompatible entsize (string vs non-string) to a section with the same name.
+@explicit_basic_5 = unnamed_addr constant [2 x i32] [i32 1, i32 0], section ".explicit_basic"
+;; Assign a compatible mergeable global to the previous section.
+@explicit_basic_6 = unnamed_addr constant [2 x i32] [i32 1, i32 0], section ".explicit_basic"
+
+; CHECK: .section .explicit_basic,"a",@progbits
+; CHECK: explicit_basic_7:
+
+;; Assign a symbol with an incompatible entsize (non-mergeable) to a mergeable section created explicitly.
+@explicit_basic_7 = constant [2 x i16] [i16 1, i16 1], section ".explicit_basic"
+
+; CHECK: .section .explicit_initially_nonmergeable,"a",@progbits
+; CHECK: explicit_basic_8:
+; CHECK: .section .explicit_initially_nonmergeable,"aM",@progbits,4,unique,6
+; CHECK: explicit_basic_9:
+
+;; Assign a mergeble symbol to a section that initially had a non-mergeable symbol explicitly assigned to it.
+@explicit_basic_8 = constant [2 x i16] [i16 1, i16 1], section ".explicit_initially_nonmergeable"
+@explicit_basic_9 = unnamed_addr constant [2 x i16] [i16 1, i16 1], section ".explicit_initially_nonmergeable"
+
+; CHECK: .section .explicit_initially_nonmergeable,"a",@progbits
+; CHECK: explicit_basic_10:
+; CHECK: .section .explicit_initially_nonmergeable,"aM",@progbits,4,unique,6
+; CHECK: explicit_basic_11:
+
+;; Assign compatible globals to the previously created sections.
+@explicit_basic_10 = constant [2 x i16] [i16 1, i16 1], section ".explicit_initially_nonmergeable"
+@explicit_basic_11 = unnamed_addr constant [2 x i16] [i16 1, i16 1], section ".explicit_initially_nonmergeable"
+
+;; Check that mergeable symbols can be explicitly assigned to "default" sections.
+
+; CHECK: .section .rodata.cst16,"a",@progbits,unique,7
+; CHECK: explicit_default_1:
+
+;; Assign an incompatible (non-mergeable) symbol to a "default" mergeable section.
+@explicit_default_1 = constant [2 x i64] [i64 1, i64 1], section ".rodata.cst16"
+
+; CHECK: .section .rodata.cst16,"aM",@progbits,16
+; CHECK: explicit_default_2:
+
+;; Assign a compatible global to a "default" mergeable section.
+@explicit_default_2 = unnamed_addr constant [2 x i64] [i64 1, i64 1], section ".rodata.cst16"
+
+; CHECK: .section .debug_str,"MS",@progbits,1
+; CHECK: explicit_default_3:
+
+;; Non-allocatable "default" sections can have allocatable mergeable symbols with compatible entry sizes assigned to them.
+@explicit_default_3 = unnamed_addr constant [2 x i8] [i8 1, i8 0], section ".debug_str"
+
+; CHECK: .section .debug_str,"a",@progbits,unique,8
+; CHECK: explicit_default_4:
+
+;; Non-allocatable "default" sections cannot have allocatable mergeable symbols with incompatible (non-mergeable) entry sizes assigned to them.
+@explicit_default_4 = constant [2 x i16] [i16 1, i16 1], section ".debug_str"
+
+;; Test implicit section assignment for globals with associated globals.
+; CHECK: .section .rodata.cst4,"aMo",@progbits,4,implicit_rodata_cst4,unique,9
+; CHECK: implicit_rodata_cst4_assoc:
+; CHECK: .section .rodata.cst8,"aMo",@progbits,8,implicit_rodata_cst4,unique,10
+; CHECK: implicit_rodata_cst8_assoc:
+
+@implicit_rodata_cst4_assoc = unnamed_addr constant [2 x i16] [i16 1, i16 1], !associated !4
+@implicit_rodata_cst8_assoc = unnamed_addr constant [2 x i32] [i32 1, i32 1], !associated !4
+
+;; Check that globals with associated globals that are explicitly assigned
+;; to a section have been placed into distinct sections with the same name, but
+;; different entry sizes.
+; CHECK: .section .explicit,"aMo",@progbits,4,implicit_rodata_cst4,unique,11
+; CHECK: explicit_assoc_1:
+; CHECK: .section .explicit,"aMo",@progbits,4,implicit_rodata_cst4,unique,12
+; CHECK: explicit_assoc_2:
+; CHECK: .section .explicit,"aMo",@progbits,8,implicit_rodata_cst4,unique,13
+; CHECK: explicit_assoc_3:
+
+@explicit_assoc_1 = unnamed_addr constant [2 x i16] [i16 1, i16 1], section ".explicit", !associated !4
+@explicit_assoc_2 = unnamed_addr constant [2 x i16] [i16 1, i16 1], section ".explicit", !associated !4
+@explicit_assoc_3 = unnamed_addr constant [2 x i32] [i32 1, i32 1], section ".explicit", !associated !4
+
+!4 = !{[2 x i16]* @implicit_rodata_cst4}
+
+;; Test implicit section assignment for globals in distinct comdat groups.
+; CHECK: .section .rodata.cst4,"aGM",@progbits,4,f,comdat,unique,14
+; CHECK: implicit_rodata_cst4_comdat:
+; CHECK: .section .rodata.cst8,"aGM",@progbits,8,g,comdat,unique,15
+; CHECK: implicit_rodata_cst8_comdat:
+
+;; Check that globals in distinct comdat groups that are explicitly assigned
+;; to a section have been placed into distinct sections with the same name, but
+;; different entry sizes. Due to the way that MC currently works the unique ID
+;; does not have any effect here, although it appears in the assembly. The unique ID's
+;; appear incorrect as comdats are not taken into account when looking up the unique ID
+;; for a mergeable section. However, as they have no effect it doesn't matter that they
+;; are incorrect.
+; CHECK: .section .explicit_comdat_distinct,"aM",@progbits,4,unique,16
+; CHECK: explicit_comdat_distinct_supply_uid:
+; CHECK: .section .explicit_comdat_distinct,"aGM",@progbits,4,f,comdat,unique,16
+; CHECK: explicit_comdat_distinct1:
+; CHECK: .section .explicit_comdat_distinct,"aGM",@progbits,4,g,comdat,unique,16
+; CHECK: explicit_comdat_distinct2:
+; CHECK: .section .explicit_comdat_distinct,"aGM",@progbits,8,h,comdat,unique,17
+; CHECK: explicit_comdat_distinct3:
+
+$f = comdat any
+$g = comdat any
+$h = comdat any
+
+@implicit_rodata_cst4_comdat = unnamed_addr constant [2 x i16] [i16 1, i16 1], comdat($f)
+@implicit_rodata_cst8_comdat = unnamed_addr constant [2 x i32] [i32 1, i32 1], comdat($g)
+
+@explicit_comdat_distinct_supply_uid = unnamed_addr constant [2 x i16] [i16 1, i16 1], section ".explicit_comdat_distinct"
+@explicit_comdat_distinct1 = unnamed_addr constant [2 x i16] [i16 1, i16 1], section ".explicit_comdat_distinct", comdat($f)
+@explicit_comdat_distinct2 = unnamed_addr constant [2 x i16] [i16 1, i16 1], section ".explicit_comdat_distinct", comdat($g)
+@explicit_comdat_distinct3 = unnamed_addr constant [2 x i32] [i32 1, i32 1], section ".explicit_comdat_distinct", comdat($h)
+
+;; Test implicit section assignment for globals in the same comdat group.
+; CHECK: .section .rodata.cst4,"aGM",@progbits,4,i,comdat,unique,18
+; CHECK: implicit_rodata_cst4_same_comdat:
+; CHECK: .section .rodata.cst8,"aGM",@progbits,8,i,comdat,unique,19
+; CHECK: implicit_rodata_cst8_same_comdat:
+
+;; Check that globals in the same comdat group that are explicitly assigned
+;; to a section have been placed into distinct sections with the same name, but
+;; different entry sizes. Due to the way that MC currently works the unique ID
+;; does not have any effect here, although it appears in the assembly. The unique ID's
+;; appear incorrect as comdats are not taken into account when looking up the unique ID
+;; for a mergeable section. However, as they have no effect it doesn't matter that they
+;; are incorrect.
+; CHECK: .section .explicit_comdat_same,"aM",@progbits,4,unique,20
+; CHECK: explicit_comdat_same_supply_uid:
+; CHECK: .section .explicit_comdat_same,"aGM",@progbits,4,i,comdat,unique,20
+; CHECK: explicit_comdat_same1:
+; CHECK: explicit_comdat_same2:
+; CHECK: .section .explicit_comdat_same,"aGM",@progbits,8,i,comdat,unique,21
+; CHECK: explicit_comdat_same3:
+
+$i = comdat any
+
+@implicit_rodata_cst4_same_comdat = unnamed_addr constant [2 x i16] [i16 1, i16 1], comdat($i)
+@implicit_rodata_cst8_same_comdat = unnamed_addr constant [2 x i32] [i32 1, i32 1], comdat($i)
+
+@explicit_comdat_same_supply_uid = unnamed_addr constant [2 x i16] [i16 1, i16 1], section ".explicit_comdat_same"
+@explicit_comdat_same1 = unnamed_addr constant [2 x i16] [i16 1, i16 1], section ".explicit_comdat_same", comdat($i)
+@explicit_comdat_same2 = unnamed_addr constant [2 x i16] [i16 1, i16 1], section ".explicit_comdat_same", comdat($i)
+@explicit_comdat_same3 = unnamed_addr constant [2 x i32] [i32 1, i32 1], section ".explicit_comdat_same", comdat($i)
+
+;; Check interaction between symbols that are explicitly assigned
+;; to a section and implicitly assigned symbols.
+
+; CHECK: .section .rodata.str1.1,"aMS",@progbits,1
+; CHECK: implicit_rodata_str1_1:
+; CHECK: explicit_implicit_1:
+
+;; Assign a compatible global to an existing mergeable section created implicitly.
+@implicit_rodata_str1_1 = unnamed_addr constant [2 x i8] [i8 1, i8 0]
+@explicit_implicit_1 = unnamed_addr constant [2 x i8] [i8 1, i8 0], section ".rodata.str1.1"
+
+; CHECK: .section .rodata.str1.1,"a",@progbits,unique,22
+; CHECK: explicit_implicit_2:
+
+;; Assign an incompatible symbol (non-mergeable) to an existing mergeable section created implicitly.
+@explicit_implicit_2 = constant [2 x i16] [i16 1, i16 1], section ".rodata.str1.1"
+
+; CHECK: .section .rodata.str1.1,"aMS",@progbits,1
+; CHECK: explicit_implicit_3:
+; CHECK: .section .rodata.str1.1,"a",@progbits,unique,22
+; CHECK: explicit_implicit_4:
+
+;; Assign compatible globals to the previously created sections.
+@explicit_implicit_3 = unnamed_addr constant [2 x i8] [i8 1, i8 0], section ".rodata.str1.1"
+@explicit_implicit_4 = constant [2 x i16] [i16 1, i16 1], section ".rodata.str1.1"
+
+; CHECK: .section .rodata.str2.2,"aMS",@progbits,2
+; CHECK: explicit_implicit_5:
+; CHECK: implicit_rodata_str2_2:
+
+;; Implicitly assign a compatible global to an existing mergeable section created explicitly.
+@explicit_implicit_5 = unnamed_addr constant [2 x i16] [i16 1, i16 0], section ".rodata.str2.2"
+@implicit_rodata_str2_2 = unnamed_addr constant [2 x i16] [i16 1, i16 0]
+
+;; Check the interaction with inline asm.
+
+; CHECK: .section .asm_mergeable1,"aMS",@progbits,2
+; CHECK: explicit_asm_1:
+; CHECK: .section .asm_nonmergeable1,"a",@progbits
+; CHECK: explicit_asm_2:
+; CHECK: .section .asm_mergeable1,"aM",@progbits,4,unique,23
+; CHECK: explicit_asm_3:
+; CHECK: .section .asm_nonmergeable1,"aMS",@progbits,2,unique,24
+; CHECK: explicit_asm_4:
+; CHECK: .section .asm_mergeable2,"aM",@progbits,4,unique,25
+; CHECK: explicit_asm_5:
+; CHECK: .section .asm_nonmergeable2,"aMS",@progbits,2,unique,26
+; CHECK: explicit_asm_6:
+; CHECK: .section .asm_mergeable2,"aMS",@progbits,2
+; CHECK: explicit_asm_7:
+; CHECK: .section .asm_nonmergeable2,"a",@progbits
+; CHECK: explicit_asm_8:
+
+module asm ".section .asm_mergeable1,\22aMS\22,@progbits,2"
+module asm ".section .asm_nonmergeable1,\22a\22,@progbits"
+module asm ".section .asm_mergeable2,\22aMS\22,@progbits,2"
+module asm ".section .asm_nonmergeable2,\22a\22,@progbits"
+
+;; Assign compatible symbols to sections created using inline asm.
+@explicit_asm_1 = unnamed_addr constant [2 x i16] [i16 1, i16 0], section ".asm_mergeable1"
+@explicit_asm_2 = constant [2 x i16] [i16 1, i16 0], section ".asm_nonmergeable1"
+;; Assign incompatible globals to the same sections.
+@explicit_asm_3 = unnamed_addr constant [2 x i16] [i16 1, i16 1], section ".asm_mergeable1"
+@explicit_asm_4 = unnamed_addr constant [2 x i16] [i16 1, i16 0], section ".asm_nonmergeable1"
+
+;; Assign incompatible globals to sections created using inline asm.
+@explicit_asm_5 = unnamed_addr constant [2 x i16] [i16 1, i16 1], section ".asm_mergeable2"
+@explicit_asm_6 = unnamed_addr constant [2 x i16] [i16 1, i16 0], section ".asm_nonmergeable2"
+;; Assign compatible globals to the same sections.
+@explicit_asm_7 = unnamed_addr constant [2 x i16] [i16 1, i16 0], section ".asm_mergeable2"
+@explicit_asm_8 = constant [2 x i16] [i16 1, i16 0], section ".asm_nonmergeable2"
+
+;; A .note.GNU-stack section is created implicitly. We add a check for this as we want to use
+;; --implicit-check-not to reduce the number of checks in this file.
+; CHECK: .section ".note.GNU-stack","",@progbits
+
+;; --no-integrated-as avoids the use of ",unique," for compatibility with older binutils.
+
+;; Error if an incompatible symbol is explicitly placed into a mergeable section.
+; RUN: not llc < %s -mtriple=x86_64 --no-integrated-as 2>&1 \
+; RUN:     | FileCheck %s --check-prefix=NO-I-AS-ERR
+; NO-I-AS-ERR: error: Symbol 'explicit_default_1' from module '<stdin>' required a section with entry-size=0 but was placed in section '.rodata.cst16' with entry-size=16: Explicit assignment by pragma or attribute of an incompatible symbol to this section?
+; NO-I-AS-ERR: error: Symbol 'explicit_default_4' from module '<stdin>' required a section with entry-size=0 but was placed in section '.debug_str' with entry-size=1: Explicit assignment by pragma or attribute of an incompatible symbol to this section?
+; NO-I-AS-ERR: error: Symbol 'explicit_implicit_2' from module '<stdin>' required a section with entry-size=0 but was placed in section '.rodata.str1.1' with entry-size=1: Explicit assignment by pragma or attribute of an incompatible symbol to this section?
+; NO-I-AS-ERR: error: Symbol 'explicit_implicit_4' from module '<stdin>' required a section with entry-size=0 but was placed in section '.rodata.str1.1' with entry-size=1: Explicit assignment by pragma or attribute of an incompatible symbol to this section?
+
+;; Don't create mergeable sections for globals with an explicit section name.
+; RUN: echo '@explicit = unnamed_addr constant [2 x i16] [i16 1, i16 1], section ".explicit"' > %t.no_i_as.ll
+; RUN: llc < %t.no_i_as.ll -mtriple=x86_64 --no-integrated-as 2>&1 \
+; RUN:     | FileCheck %s --check-prefix=NO-I-AS
+; NO-I-AS: .section .explicit,"a",@progbits
diff --git a/llvm/unittests/ExecutionEngine/Orc/LegacyRTDyldObjectLinkingLayerTest.cpp b/llvm/unittests/ExecutionEngine/Orc/LegacyRTDyldObjectLinkingLayerTest.cpp
index c1eb45566cf1..02747b69b1c9 100644
--- a/llvm/unittests/ExecutionEngine/Orc/LegacyRTDyldObjectLinkingLayerTest.cpp
+++ b/llvm/unittests/ExecutionEngine/Orc/LegacyRTDyldObjectLinkingLayerTest.cpp
@@ -77,10 +77,12 @@ TEST(LegacyRTDyldObjectLinkingLayerTest, TestSetProcessAllSections) {
   LLVMContext Context;
   auto M = std::make_unique<Module>("", Context);
   M->setTargetTriple("x86_64-unknown-linux-gnu");
-  Type *Int32Ty = IntegerType::get(Context, 32);
-  GlobalVariable *GV =
-    new GlobalVariable(*M, Int32Ty, false, GlobalValue::ExternalLinkage,
-                         ConstantInt::get(Int32Ty, 42), "foo");
+  Constant *StrConstant = ConstantDataArray::getString(Context, "forty-two");
+  auto *GV =
+      new GlobalVariable(*M, StrConstant->getType(), true,
+                         GlobalValue::ExternalLinkage, StrConstant, "foo");
+  GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+  GV->setAlignment(Align(1));
 
   GV->setSection(".debug_str");
 
diff --git a/llvm/unittests/ExecutionEngine/Orc/RTDyldObjectLinkingLayerTest.cpp b/llvm/unittests/ExecutionEngine/Orc/RTDyldObjectLinkingLayerTest.cpp
index 9a50571b1969..4a192c1f28ac 100644
--- a/llvm/unittests/ExecutionEngine/Orc/RTDyldObjectLinkingLayerTest.cpp
+++ b/llvm/unittests/ExecutionEngine/Orc/RTDyldObjectLinkingLayerTest.cpp
@@ -74,10 +74,12 @@ TEST(RTDyldObjectLinkingLayerTest, TestSetProcessAllSections) {
   LLVMContext Context;
   auto M = std::make_unique<Module>("", Context);
   M->setTargetTriple("x86_64-unknown-linux-gnu");
-  Type *Int32Ty = IntegerType::get(Context, 32);
-  GlobalVariable *GV =
-      new GlobalVariable(*M, Int32Ty, false, GlobalValue::ExternalLinkage,
-                         ConstantInt::get(Int32Ty, 42), "foo");
+  Constant *StrConstant = ConstantDataArray::getString(Context, "forty-two");
+  auto *GV =
+      new GlobalVariable(*M, StrConstant->getType(), true,
+                         GlobalValue::ExternalLinkage, StrConstant, "foo");
+  GV->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
+  GV->setAlignment(Align(1));
 
   GV->setSection(".debug_str");
 

From b5a24610fad6d68f65bd6ec8db52b6e480c56d6c Mon Sep 17 00:00:00 2001
From: "Joel E. Denny" <jdenny.ornl@gmail.com>
Date: Thu, 16 Apr 2020 14:53:44 -0400
Subject: [PATCH 081/216] [FileCheck] Fix --dump-input implicit pattern
 location

Currently, `--dump-input` implies that all `--implicit-check-not`
patterns appear on line 1 by printing annotations like:

```
       1: foo bar baz
not:1         !~~     error: no match expected
```

This patch changes that to:

```
          1: foo bar baz
not:imp1         !~~     error: no match expected
```

`imp1` indicates the first `--implicit-check-not` pattern.

Reviewed By: thopre

Differential Revision: https://reviews.llvm.org/D77605
---
 llvm/include/llvm/Support/FileCheck.h         | 10 ++-
 llvm/lib/Support/FileCheck.cpp                | 25 +++++---
 .../test/FileCheck/dump-input-annotations.txt | 61 +++++++++++++++++++
 llvm/utils/FileCheck/FileCheck.cpp            | 53 ++++++++++------
 4 files changed, 122 insertions(+), 27 deletions(-)

diff --git a/llvm/include/llvm/Support/FileCheck.h b/llvm/include/llvm/Support/FileCheck.h
index d218ef042257..1150882b0b3e 100644
--- a/llvm/include/llvm/Support/FileCheck.h
+++ b/llvm/include/llvm/Support/FileCheck.h
@@ -88,7 +88,7 @@ struct FileCheckDiag {
   /// What is the FileCheck directive for this diagnostic?
   Check::FileCheckType CheckTy;
   /// Where is the FileCheck directive for this diagnostic?
-  unsigned CheckLine, CheckCol;
+  SMLoc CheckLoc;
   /// What type of match result does this diagnostic describe?
   ///
   /// A directive's supplied pattern is said to be either expected or excluded
@@ -160,7 +160,13 @@ class FileCheck {
   ///
   /// Only expected strings whose prefix is one of those listed in \p PrefixRE
   /// are recorded. \returns true in case of an error, false otherwise.
-  bool readCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE);
+  ///
+  /// If \p ImpPatBufferIDRange, then the range (inclusive start, exclusive end)
+  /// of IDs for source buffers added to \p SM for implicit patterns are
+  /// recorded in it.  The range is empty if there are none.
+  bool
+  readCheckFile(SourceMgr &SM, StringRef Buffer, Regex &PrefixRE,
+                std::pair<unsigned, unsigned> *ImpPatBufferIDRange = nullptr);
 
   bool ValidateCheckPrefixes();
 
diff --git a/llvm/lib/Support/FileCheck.cpp b/llvm/lib/Support/FileCheck.cpp
index 71b1e8356137..7b70112e3978 100644
--- a/llvm/lib/Support/FileCheck.cpp
+++ b/llvm/lib/Support/FileCheck.cpp
@@ -1069,16 +1069,13 @@ FileCheckDiag::FileCheckDiag(const SourceMgr &SM,
                              const Check::FileCheckType &CheckTy,
                              SMLoc CheckLoc, MatchType MatchTy,
                              SMRange InputRange)
-    : CheckTy(CheckTy), MatchTy(MatchTy) {
+    : CheckTy(CheckTy), CheckLoc(CheckLoc), MatchTy(MatchTy) {
   auto Start = SM.getLineAndColumn(InputRange.Start);
   auto End = SM.getLineAndColumn(InputRange.End);
   InputStartLine = Start.first;
   InputStartCol = Start.second;
   InputEndLine = End.first;
   InputEndCol = End.second;
-  Start = SM.getLineAndColumn(CheckLoc);
-  CheckLine = Start.first;
-  CheckCol = Start.second;
 }
 
 static bool IsPartOfWord(char c) {
@@ -1269,8 +1266,12 @@ FileCheck::FileCheck(FileCheckRequest Req)
 
 FileCheck::~FileCheck() = default;
 
-bool FileCheck::readCheckFile(SourceMgr &SM, StringRef Buffer,
-                              Regex &PrefixRE) {
+bool FileCheck::readCheckFile(
+    SourceMgr &SM, StringRef Buffer, Regex &PrefixRE,
+    std::pair<unsigned, unsigned> *ImpPatBufferIDRange) {
+  if (ImpPatBufferIDRange)
+    ImpPatBufferIDRange->first = ImpPatBufferIDRange->second = 0;
+
   Error DefineError =
       PatternContext->defineCmdlineVariables(Req.GlobalDefines, SM);
   if (DefineError) {
@@ -1291,7 +1292,17 @@ bool FileCheck::readCheckFile(SourceMgr &SM, StringRef Buffer,
 
     StringRef PatternInBuffer =
         CmdLine->getBuffer().substr(Prefix.size(), PatternString.size());
-    SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
+    unsigned BufferID = SM.AddNewSourceBuffer(std::move(CmdLine), SMLoc());
+    if (ImpPatBufferIDRange) {
+      if (ImpPatBufferIDRange->first == ImpPatBufferIDRange->second) {
+        ImpPatBufferIDRange->first = BufferID;
+        ImpPatBufferIDRange->second = BufferID + 1;
+      } else {
+        assert(BufferID == ImpPatBufferIDRange->second &&
+               "expected consecutive source buffer IDs");
+        ++ImpPatBufferIDRange->second;
+      }
+    }
 
     ImplicitNegativeChecks.push_back(
         Pattern(Check::CheckNot, PatternContext.get()));
diff --git a/llvm/test/FileCheck/dump-input-annotations.txt b/llvm/test/FileCheck/dump-input-annotations.txt
index a9072ddcf423..a0fce27b42b0 100644
--- a/llvm/test/FileCheck/dump-input-annotations.txt
+++ b/llvm/test/FileCheck/dump-input-annotations.txt
@@ -494,3 +494,64 @@
 ; LAB-NEXT:    label:3'0     ~~~
 ; LAB-NEXT:    >>>>>>
 ; LAB-NOT:     {{.}}
+
+;--------------------------------------------------
+; --implicit-check-not
+;
+; The first two --implicit-check-not patterns have no match (success).  The
+; third has an unexpected match (error).  To check per-input-line annotation
+; sorting, all of those plus the CHECK directives have annotations on the same
+; input line.
+;--------------------------------------------------
+
+; RUN: echo 'hello world again!' > %t.in
+
+; RUN: echo 'CHECK: hel' > %t.chk
+; RUN: echo 'CHECK: wor' >> %t.chk
+; RUN: echo 'CHECK: !' >> %t.chk
+
+; RUN: %ProtectFileCheckOutput \
+; RUN: not FileCheck -dump-input=always -input-file=%t.in %t.chk 2>&1 \
+; RUN:               --implicit-check-not='goodbye' \
+; RUN:               --implicit-check-not='world' \
+; RUN:               --implicit-check-not='again' \
+; RUN: | FileCheck -match-full-lines %s -check-prefix=IMPNOT \
+; RUN:             -implicit-check-not='remark:'
+; RUN: %ProtectFileCheckOutput \
+; RUN: not FileCheck -dump-input=always -input-file=%t.in %t.chk -v 2>&1 \
+; RUN:               --implicit-check-not='goodbye' \
+; RUN:               --implicit-check-not='world' \
+; RUN:               --implicit-check-not='again' \
+; RUN: | FileCheck -match-full-lines %s -check-prefixes=IMPNOT,IMPNOT-V \
+; RUN:             -implicit-check-not='remark:'
+; RUN: %ProtectFileCheckOutput \
+; RUN: not FileCheck -dump-input=always -input-file=%t.in %t.chk -vv 2>&1 \
+; RUN:               --implicit-check-not='goodbye' \
+; RUN:               --implicit-check-not='world' \
+; RUN:               --implicit-check-not='again' \
+; RUN: | FileCheck -match-full-lines %s \
+; RUN:             -check-prefixes=IMPNOT,IMPNOT-V,IMPNOT-VV \
+; RUN:             -implicit-check-not='remark:'
+
+; Verbose diagnostics are suppressed but not errors.
+; IMPNOT:{{.*}}error:{{.*}}
+
+; FIXME: All occurrences of imp1, imp2, and imp3 are sorting after the first
+; directive.  They should instead be sorted by when they execute.
+
+;         IMPNOT:<<<<<<
+;    IMPNOT-NEXT:          1: hello world again!
+;  IMPNOT-V-NEXT:check:1      ^~~
+; IMPNOT-VV-NEXT:not:imp1     X
+; IMPNOT-VV-NEXT:not:imp2     X
+; IMPNOT-VV-NEXT:not:imp3     X
+; IMPNOT-VV-NEXT:not:imp1        X~~
+; IMPNOT-VV-NEXT:not:imp2        X~~
+; IMPNOT-VV-NEXT:not:imp3        X~~
+; IMPNOT-VV-NEXT:not:imp1              X~~~~~~~
+; IMPNOT-VV-NEXT:not:imp2              X~~~~~~~
+;    IMPNOT-NEXT:not:imp3                 !~~~~  error: no match expected
+;  IMPNOT-V-NEXT:check:2            ^~~
+;  IMPNOT-V-NEXT:check:3                       ^
+;    IMPNOT-NEXT:>>>>>>
+;     IMPNOT-NOT:{{.}}
diff --git a/llvm/utils/FileCheck/FileCheck.cpp b/llvm/utils/FileCheck/FileCheck.cpp
index 539bc13f946a..6cfd0fd75878 100644
--- a/llvm/utils/FileCheck/FileCheck.cpp
+++ b/llvm/utils/FileCheck/FileCheck.cpp
@@ -193,14 +193,15 @@ static void DumpInputAnnotationHelp(raw_ostream &OS) {
   // Labels for annotation lines.
   OS << "  - ";
   WithColor(OS, raw_ostream::SAVEDCOLOR, true) << "T:L";
-  OS << "    labels the only match result for a pattern of type T from "
-     << "line L of\n"
-     << "           the check file\n";
+  OS << "    labels the only match result for either (1) a pattern of type T"
+     << " from\n"
+     << "           line L of the check file if L is an integer or (2) the"
+     << " I-th implicit\n"
+     << "           pattern if L is \"imp\" followed by an integer "
+     << "I (index origin one)\n";
   OS << "  - ";
   WithColor(OS, raw_ostream::SAVEDCOLOR, true) << "T:L'N";
-  OS << "  labels the Nth match result for a pattern of type T from line "
-     << "L of\n"
-     << "           the check file\n";
+  OS << "  labels the Nth match result for such a pattern\n";
 
   // Markers on annotation lines.
   OS << "  - ";
@@ -293,9 +294,12 @@ std::string GetCheckTypeAbbreviation(Check::FileCheckType Ty) {
   llvm_unreachable("unknown FileCheckType");
 }
 
-static void BuildInputAnnotations(const std::vector<FileCheckDiag> &Diags,
-                                  std::vector<InputAnnotation> &Annotations,
-                                  unsigned &LabelWidth) {
+static void
+BuildInputAnnotations(const SourceMgr &SM, unsigned CheckFileBufferID,
+                      const std::pair<unsigned, unsigned> &ImpPatBufferIDRange,
+                      const std::vector<FileCheckDiag> &Diags,
+                      std::vector<InputAnnotation> &Annotations,
+                      unsigned &LabelWidth) {
   // How many diagnostics has the current check seen so far?
   unsigned CheckDiagCount = 0;
   // What's the widest label?
@@ -305,14 +309,24 @@ static void BuildInputAnnotations(const std::vector<FileCheckDiag> &Diags,
     InputAnnotation A;
 
     // Build label, which uniquely identifies this check result.
-    A.CheckLine = DiagItr->CheckLine;
+    unsigned CheckBufferID = SM.FindBufferContainingLoc(DiagItr->CheckLoc);
+    auto CheckLineAndCol =
+        SM.getLineAndColumn(DiagItr->CheckLoc, CheckBufferID);
+    A.CheckLine = CheckLineAndCol.first;
     llvm::raw_string_ostream Label(A.Label);
-    Label << GetCheckTypeAbbreviation(DiagItr->CheckTy) << ":"
-          << DiagItr->CheckLine;
+    Label << GetCheckTypeAbbreviation(DiagItr->CheckTy) << ":";
+    if (CheckBufferID == CheckFileBufferID)
+      Label << CheckLineAndCol.first;
+    else if (ImpPatBufferIDRange.first <= CheckBufferID &&
+             CheckBufferID < ImpPatBufferIDRange.second)
+      Label << "imp" << (CheckBufferID - ImpPatBufferIDRange.first + 1);
+    else
+      llvm_unreachable("expected diagnostic's check location to be either in "
+                       "the check file or for an implicit pattern");
     A.CheckDiagIndex = UINT_MAX;
     auto DiagNext = std::next(DiagItr);
     if (DiagNext != DiagEnd && DiagItr->CheckTy == DiagNext->CheckTy &&
-        DiagItr->CheckLine == DiagNext->CheckLine)
+        DiagItr->CheckLoc == DiagNext->CheckLoc)
       A.CheckDiagIndex = CheckDiagCount++;
     else if (CheckDiagCount) {
       A.CheckDiagIndex = CheckDiagCount;
@@ -606,11 +620,13 @@ int main(int argc, char **argv) {
   SmallString<4096> CheckFileBuffer;
   StringRef CheckFileText = FC.CanonicalizeFile(CheckFile, CheckFileBuffer);
 
-  SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(
-                            CheckFileText, CheckFile.getBufferIdentifier()),
-                        SMLoc());
+  unsigned CheckFileBufferID =
+      SM.AddNewSourceBuffer(MemoryBuffer::getMemBuffer(
+                                CheckFileText, CheckFile.getBufferIdentifier()),
+                            SMLoc());
 
-  if (FC.readCheckFile(SM, CheckFileText, PrefixRE))
+  std::pair<unsigned, unsigned> ImpPatBufferIDRange;
+  if (FC.readCheckFile(SM, CheckFileText, PrefixRE, &ImpPatBufferIDRange))
     return 2;
 
   // Open the file to check and add it to SourceMgr.
@@ -658,7 +674,8 @@ int main(int argc, char **argv) {
            << "\n";
     std::vector<InputAnnotation> Annotations;
     unsigned LabelWidth;
-    BuildInputAnnotations(Diags, Annotations, LabelWidth);
+    BuildInputAnnotations(SM, CheckFileBufferID, ImpPatBufferIDRange, Diags,
+                          Annotations, LabelWidth);
     DumpAnnotatedInput(errs(), Req, InputFileText, Annotations, LabelWidth);
   }
 

From ce685455e4500f9f4a6686b1667a132d2c8a3c12 Mon Sep 17 00:00:00 2001
From: "Joel E. Denny" <jdenny.ornl@gmail.com>
Date: Thu, 16 Apr 2020 14:53:56 -0400
Subject: [PATCH 082/216] [FileCheck] Fix --dump-input annotation sort per
 input line

Without this patch, `--dump-input` annotations on a single input line
are sorted by the associated directive's check-file line.  That seemed
fine because that's often identical to the order in which FileCheck
looks for matches for those directives.

The first problem is that an `--implicit-check-not` pattern has no
check-file line.  The logical equivalent is sorting in command-line
order, but that's not implemented.

The second problem is that, unlike a directive, an
`--implicit-check-not` pattern applies at many points, between many
different pairs of directives.  However, sorting in command-line order
gathers all its associated diagnostics together at one point in an
input line's list of annotations.

In general, it seems to be easier to understand FileCheck's logic when
annotations on a single input line are sorted in the order FileCheck
produced the associated diagnostics, so this patch makes that change.
As documented in the patch, the annotation sort order is also
especially relevant to `CHECK-LABEL`, `CHECK-NOT`, and `CHECK-DAG`, so
this patch updates or extends tests to check the sort makes sense for
them.  (However, the sort for `CHECK-DAG` annotations should not
actually be altered by this patch.)

Reviewed By: thopre

Differential Revision: https://reviews.llvm.org/D77607
---
 .../test/FileCheck/dump-input-annotations.txt | 58 +++++++++--
 llvm/utils/FileCheck/FileCheck.cpp            | 96 +++++++++++--------
 2 files changed, 106 insertions(+), 48 deletions(-)

diff --git a/llvm/test/FileCheck/dump-input-annotations.txt b/llvm/test/FileCheck/dump-input-annotations.txt
index a0fce27b42b0..f4f0d3ca6022 100644
--- a/llvm/test/FileCheck/dump-input-annotations.txt
+++ b/llvm/test/FileCheck/dump-input-annotations.txt
@@ -280,8 +280,8 @@
 ; EMP-NEXT:            3: world
 ; EMP-NEXT:   empty:3     X~~~~ error: no match found
 ; EMP-NEXT:            4: label
-; EMP-NEXT:   empty:3     ~~~~~
 ; EMP-V-NEXT: label:4     ^~~~~
+; EMP-NEXT:   empty:3     ~~~~~
 ; EMP-NEXT:   >>>>>>
 ; EMP-NOT:    {{.}}
 
@@ -387,8 +387,8 @@
 ; NOT2-VV-NEXT: not:1       ~~~~~
 ; NOT2-NEXT:    not:2       !~~~~ error: no match expected
 ; NOT2-NEXT:             3: again
-; NOT2-VV-NEXT: not:1       ~~
 ; NOT2-V-NEXT:  check:3       ^~~
+; NOT2-VV-NEXT: not:1       ~~
 ; NOT2-NEXT:    >>>>>>
 ; NOT2-NOT:     {{.}}
 
@@ -446,6 +446,53 @@
 ; DAG-NEXT:    >>>>>>
 ; DAG-NOT:     {{.}}
 
+; Check sorting of annotations when the order of diagnostics across an input
+; line is different than the order of the associated directives in the check
+; file.  Try cases when diagnostics' input ranges overlap but are not
+; identical to check how that affects sorting.
+
+; RUN: echo 'abc def abc def' > %t.in
+
+; RUN: echo 'CHECK-DAG: def' > %t.chk
+; RUN: echo 'CHECK-DAG: bc' >> %t.chk
+; RUN: echo 'CHECK-DAG: abc' >> %t.chk
+; RUN: echo 'CHECK-DAG: de' >> %t.chk
+; RUN: echo 'CHECK-DAG: def' >> %t.chk
+
+; RUN: %ProtectFileCheckOutput \
+; RUN: not FileCheck -dump-input=always -input-file %t.in %t.chk 2>&1 \
+; RUN: | FileCheck -match-full-lines %s -check-prefixes=DAG1L,DAG1L-Q \
+; RUN:             -implicit-check-not='remark:'
+; RUN: %ProtectFileCheckOutput \
+; RUN: not FileCheck -dump-input=always -input-file %t.in %t.chk -v 2>&1 \
+; RUN: | FileCheck -match-full-lines %s -check-prefixes=DAG1L,DAG1L-V,DAG1L-VQ \
+; RUN:             -implicit-check-not='remark:'
+; RUN: %ProtectFileCheckOutput \
+; RUN: not FileCheck -dump-input=always -input-file %t.in %t.chk -vv 2>&1 \
+; RUN: | FileCheck -match-full-lines %s -check-prefixes=DAG1L,DAG1L-V,DAG1L-VV \
+; RUN:             -implicit-check-not='remark:'
+
+; Verbose diagnostics are suppressed but not errors.
+; DAG1L:{{.*}}error:{{.*}}
+
+;         DAG1L:<<<<<<
+;    DAG1L-NEXT:         1: abc def abc def
+;  DAG1L-V-NEXT:dag:1           ^~~
+;  DAG1L-V-NEXT:dag:2        ^~
+; DAG1L-VV-NEXT:dag:3'0     !~~             discard: overlaps earlier match
+; DAG1L-VQ-NEXT:dag:3               ^~~
+; DAG1L-VV-NEXT:dag:3'1             ^~~
+; DAG1L-VV-NEXT:dag:4'0         !~          discard: overlaps earlier match
+; DAG1L-VQ-NEXT:dag:4                   ^~
+; DAG1L-VV-NEXT:dag:4'1                 ^~
+; DAG1L-VV-NEXT:dag:5'0         !~~         discard: overlaps earlier match
+; DAG1L-VV-NEXT:dag:5'1                 !~~ discard: overlaps earlier match
+;  DAG1L-Q-NEXT:dag:5                     X error: no match found
+; DAG1L-VQ-NEXT:dag:5                     X error: no match found
+; DAG1L-VV-NEXT:dag:5'2                   X error: no match found
+;    DAG1L-NEXT:>>>>>>
+;     DAG1L-NOT:{{.}}
+
 ;--------------------------------------------------
 ; CHECK-LABEL
 ;
@@ -536,22 +583,19 @@
 ; Verbose diagnostics are suppressed but not errors.
 ; IMPNOT:{{.*}}error:{{.*}}
 
-; FIXME: All occurrences of imp1, imp2, and imp3 are sorting after the first
-; directive.  They should instead be sorted by when they execute.
-
 ;         IMPNOT:<<<<<<
 ;    IMPNOT-NEXT:          1: hello world again!
 ;  IMPNOT-V-NEXT:check:1      ^~~
 ; IMPNOT-VV-NEXT:not:imp1     X
 ; IMPNOT-VV-NEXT:not:imp2     X
 ; IMPNOT-VV-NEXT:not:imp3     X
+;  IMPNOT-V-NEXT:check:2            ^~~
 ; IMPNOT-VV-NEXT:not:imp1        X~~
 ; IMPNOT-VV-NEXT:not:imp2        X~~
 ; IMPNOT-VV-NEXT:not:imp3        X~~
+;  IMPNOT-V-NEXT:check:3                       ^
 ; IMPNOT-VV-NEXT:not:imp1              X~~~~~~~
 ; IMPNOT-VV-NEXT:not:imp2              X~~~~~~~
 ;    IMPNOT-NEXT:not:imp3                 !~~~~  error: no match expected
-;  IMPNOT-V-NEXT:check:2            ^~~
-;  IMPNOT-V-NEXT:check:3                       ^
 ;    IMPNOT-NEXT:>>>>>>
 ;     IMPNOT-NOT:{{.}}
diff --git a/llvm/utils/FileCheck/FileCheck.cpp b/llvm/utils/FileCheck/FileCheck.cpp
index 6cfd0fd75878..ef6c62b5ab5b 100644
--- a/llvm/utils/FileCheck/FileCheck.cpp
+++ b/llvm/utils/FileCheck/FileCheck.cpp
@@ -241,11 +241,8 @@ static void DumpInputAnnotationHelp(raw_ostream &OS) {
 
 /// An annotation for a single input line.
 struct InputAnnotation {
-  /// The check file line (one-origin indexing) where the directive that
-  /// produced this annotation is located.
-  unsigned CheckLine;
-  /// The index of the match result for this check.
-  unsigned CheckDiagIndex;
+  /// The index of the match result across all checks
+  unsigned DiagIndex;
   /// The label for this annotation.
   std::string Label;
   /// What input line (one-origin indexing) this annotation marks.  This might
@@ -253,7 +250,7 @@ struct InputAnnotation {
   /// a non-initial fragment of a diagnostic that has been broken across
   /// multiple lines.
   unsigned InputLine;
-  /// The column range (one-origin indexing, open end) in which to to mark the
+  /// The column range (one-origin indexing, open end) in which to mark the
   /// input line.  If InputEndCol is UINT_MAX, treat it as the last column
   /// before the newline.
   unsigned InputStartCol, InputEndCol;
@@ -300,6 +297,8 @@ BuildInputAnnotations(const SourceMgr &SM, unsigned CheckFileBufferID,
                       const std::vector<FileCheckDiag> &Diags,
                       std::vector<InputAnnotation> &Annotations,
                       unsigned &LabelWidth) {
+  // How many diagnostics have we seen so far?
+  unsigned DiagCount = 0;
   // How many diagnostics has the current check seen so far?
   unsigned CheckDiagCount = 0;
   // What's the widest label?
@@ -307,12 +306,12 @@ BuildInputAnnotations(const SourceMgr &SM, unsigned CheckFileBufferID,
   for (auto DiagItr = Diags.begin(), DiagEnd = Diags.end(); DiagItr != DiagEnd;
        ++DiagItr) {
     InputAnnotation A;
+    A.DiagIndex = DiagCount++;
 
     // Build label, which uniquely identifies this check result.
     unsigned CheckBufferID = SM.FindBufferContainingLoc(DiagItr->CheckLoc);
     auto CheckLineAndCol =
         SM.getLineAndColumn(DiagItr->CheckLoc, CheckBufferID);
-    A.CheckLine = CheckLineAndCol.first;
     llvm::raw_string_ostream Label(A.Label);
     Label << GetCheckTypeAbbreviation(DiagItr->CheckTy) << ":";
     if (CheckBufferID == CheckFileBufferID)
@@ -323,19 +322,17 @@ BuildInputAnnotations(const SourceMgr &SM, unsigned CheckFileBufferID,
     else
       llvm_unreachable("expected diagnostic's check location to be either in "
                        "the check file or for an implicit pattern");
-    A.CheckDiagIndex = UINT_MAX;
+    unsigned CheckDiagIndex = UINT_MAX;
     auto DiagNext = std::next(DiagItr);
     if (DiagNext != DiagEnd && DiagItr->CheckTy == DiagNext->CheckTy &&
         DiagItr->CheckLoc == DiagNext->CheckLoc)
-      A.CheckDiagIndex = CheckDiagCount++;
+      CheckDiagIndex = CheckDiagCount++;
     else if (CheckDiagCount) {
-      A.CheckDiagIndex = CheckDiagCount;
+      CheckDiagIndex = CheckDiagCount;
       CheckDiagCount = 0;
     }
-    if (A.CheckDiagIndex != UINT_MAX)
-      Label << "'" << A.CheckDiagIndex;
-    else
-      A.CheckDiagIndex = 0;
+    if (CheckDiagIndex != UINT_MAX)
+      Label << "'" << CheckDiagIndex;
     Label.flush();
     LabelWidth = std::max((std::string::size_type)LabelWidth, A.Label.size());
 
@@ -366,8 +363,7 @@ BuildInputAnnotations(const SourceMgr &SM, unsigned CheckFileBufferID,
         if (DiagItr->InputEndCol == 1 && L == E)
           break;
         InputAnnotation B;
-        B.CheckLine = A.CheckLine;
-        B.CheckDiagIndex = A.CheckDiagIndex;
+        B.DiagIndex = A.DiagIndex;
         B.Label = A.Label;
         B.InputLine = L;
         B.Marker = A.Marker;
@@ -392,35 +388,53 @@ static void DumpAnnotatedInput(raw_ostream &OS, const FileCheckRequest &Req,
   OS << "Full input was:\n<<<<<<\n";
 
   // Sort annotations.
-  //
-  // First, sort in the order of input lines to make it easier to find relevant
-  // annotations while iterating input lines in the implementation below.
-  // FileCheck diagnostics are not always reported and recorded in the order of
-  // input lines due to, for example, CHECK-DAG and CHECK-NOT.
-  //
-  // Second, for annotations for the same input line, sort in the order of the
-  // FileCheck directive's line in the check file (where there's at most one
-  // directive per line) and then by the index of the match result for that
-  // directive.  The rationale of this choice is that, for any input line, this
-  // sort establishes a total order of annotations that, with respect to match
-  // results, is consistent across multiple lines, thus making match results
-  // easier to track from one line to the next when they span multiple lines.
   std::sort(Annotations.begin(), Annotations.end(),
             [](const InputAnnotation &A, const InputAnnotation &B) {
+              // 1. Sort annotations in the order of the input lines.
+              //
+              // This makes it easier to find relevant annotations while
+              // iterating input lines in the implementation below.  FileCheck
+              // does not always produce diagnostics in the order of input
+              // lines due to, for example, CHECK-DAG and CHECK-NOT.
               if (A.InputLine != B.InputLine)
                 return A.InputLine < B.InputLine;
-              if (A.CheckLine != B.CheckLine)
-                return A.CheckLine < B.CheckLine;
-              // FIXME: Sometimes CHECK-LABEL reports its match twice with
-              // other diagnostics in between, and then diag index incrementing
-              // fails to work properly, and then this assert fails.  We should
-              // suppress one of those diagnostics or do a better job of
-              // computing this index.  For now, we just produce a redundant
-              // CHECK-LABEL annotation.
-              // assert(A.CheckDiagIndex != B.CheckDiagIndex &&
-              //        "expected diagnostic indices to be unique within a "
-              //        " check line");
-              return A.CheckDiagIndex < B.CheckDiagIndex;
+              // 2. Sort annotations in the temporal order FileCheck produced
+              // their associated diagnostics.
+              //
+              // This sort offers several benefits:
+              //
+              // A. On a single input line, the order of annotations reflects
+              //    the FileCheck logic for processing directives/patterns.
+              //    This can be helpful in understanding cases in which the
+              //    order of the associated directives/patterns in the check
+              //    file or on the command line either (i) does not match the
+              //    temporal order in which FileCheck looks for matches for the
+              //    directives/patterns (due to, for example, CHECK-LABEL,
+              //    CHECK-NOT, or `--implicit-check-not`) or (ii) does match
+              //    that order but does not match the order of those
+              //    diagnostics along an input line (due to, for example,
+              //    CHECK-DAG).
+              //
+              //    On the other hand, because our presentation format presents
+              //    input lines in order, there's no clear way to offer the
+              //    same benefit across input lines.  For consistency, it might
+              //    then seem worthwhile to have annotations on a single line
+              //    also sorted in input order (that is, by input column).
+              //    However, in practice, this appears to be more confusing
+              //    than helpful.  Perhaps it's intuitive to expect annotations
+              //    to be listed in the temporal order in which they were
+              //    produced except in cases the presentation format obviously
+              //    and inherently cannot support it (that is, across input
+              //    lines).
+              //
+              // B. When diagnostics' annotations are split among multiple
+              //    input lines, the user must track them from one input line
+              //    to the next.  One property of the sort chosen here is that
+              //    it facilitates the user in this regard by ensuring the
+              //    following: when comparing any two input lines, a
+              //    diagnostic's annotations are sorted in the same position
+              //    relative to all other diagnostics' annotations.
+              return A.DiagIndex < B.DiagIndex;
             });
 
   // Compute the width of the label column.

From 75c4408653753fbb8e273ad41cd41997d498e7d3 Mon Sep 17 00:00:00 2001
From: Dan Albert <danalbert@google.com>
Date: Thu, 12 Mar 2020 11:16:30 -0700
Subject: [PATCH 083/216] Reland: Don't expose unavailable cstdio functions.

Marked unsupported for C++03 and C++11 since this test uses alias
declarations, and at least one C++03 bot was failing with
-Wc++11-extensions.

Change-Id: I8c3a579edd7eb83e0bc74e85d116b68f22400161
---
 libcxx/include/__config                       |  7 +++++++
 libcxx/include/cstdio                         |  4 ++++
 .../no_fgetpos_fsetpos.fail.cpp               | 21 +++++++++++++++++++
 .../std/depr/depr.c.headers/stdio_h.pass.cpp  |  4 ++++
 .../file.streams/c.files/cstdio.pass.cpp      |  4 ++++
 5 files changed, 40 insertions(+)
 create mode 100644 libcxx/test/libcxx/depr/depr.c.headers/no_fgetpos_fsetpos.fail.cpp

diff --git a/libcxx/include/__config b/libcxx/include/__config
index c9a9e5e28a69..42f59ea99665 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -1541,6 +1541,13 @@ _LIBCPP_FUNC_VIS extern "C" void __sanitizer_annotate_contiguous_container(
 #define _LIBCPP_BUILTIN_CONSTANT_P(x) false
 #endif
 
+// Support for _FILE_OFFSET_BITS=64 landed gradually in Android, so the full set
+// of functions used in cstdio may not be available for low API levels when
+// using 64-bit file offsets on LP32.
+#if defined(__BIONIC__) && defined(__USE_FILE_OFFSET64) && __ANDROID_API__ < 24
+#define _LIBCPP_HAS_NO_FGETPOS_FSETPOS
+#endif
+
 #endif // __cplusplus
 
 #endif // _LIBCPP_CONFIG
diff --git a/libcxx/include/cstdio b/libcxx/include/cstdio
index 0f3f42dac2da..d0492a083505 100644
--- a/libcxx/include/cstdio
+++ b/libcxx/include/cstdio
@@ -131,9 +131,13 @@ using ::putc;
 using ::ungetc;
 using ::fread;
 using ::fwrite;
+#ifndef _LIBCPP_HAS_NO_FGETPOS_FSETPOS
 using ::fgetpos;
+#endif
 using ::fseek;
+#ifndef _LIBCPP_HAS_NO_FGETPOS_FSETPOS
 using ::fsetpos;
+#endif
 using ::ftell;
 using ::rewind;
 using ::clearerr;
diff --git a/libcxx/test/libcxx/depr/depr.c.headers/no_fgetpos_fsetpos.fail.cpp b/libcxx/test/libcxx/depr/depr.c.headers/no_fgetpos_fsetpos.fail.cpp
new file mode 100644
index 000000000000..eedcb3e8c95b
--- /dev/null
+++ b/libcxx/test/libcxx/depr/depr.c.headers/no_fgetpos_fsetpos.fail.cpp
@@ -0,0 +1,21 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// REQUIRES: verify-support
+// UNSUPPORTED: c++98 || c++03
+
+#include <cstdio>
+
+using U = decltype(::fgetpos);
+using V = decltype(::fsetpos);
+#ifdef _LIBCPP_HAS_NO_FGETPOS_FSETPOS
+// expected-error@-3 {{no member named 'fgetpos' in the global namespace}}
+// expected-error@-3 {{no member named 'fsetpos' in the global namespace}}
+#else
+// expected-no-diagnostics
+#endif
diff --git a/libcxx/test/std/depr/depr.c.headers/stdio_h.pass.cpp b/libcxx/test/std/depr/depr.c.headers/stdio_h.pass.cpp
index 97ea0d41519b..43a0e292e15d 100644
--- a/libcxx/test/std/depr/depr.c.headers/stdio_h.pass.cpp
+++ b/libcxx/test/std/depr/depr.c.headers/stdio_h.pass.cpp
@@ -156,9 +156,13 @@ int main(int, char**)
     static_assert((std::is_same<decltype(ungetc(0,fp)), int>::value), "");
     static_assert((std::is_same<decltype(fread((void*)0,0,0,fp)), size_t>::value), "");
     static_assert((std::is_same<decltype(fwrite((const void*)arr,1,0,fp)), size_t>::value), "");
+#ifndef _LIBCPP_HAS_NO_FGETPOS_FSETPOS
     static_assert((std::is_same<decltype(fgetpos(fp, &fpos)), int>::value), "");
+#endif
     static_assert((std::is_same<decltype(fseek(fp, 0,0)), int>::value), "");
+#ifndef _LIBCPP_HAS_NO_FGETPOS_FSETPOS
     static_assert((std::is_same<decltype(fsetpos(fp, &fpos)), int>::value), "");
+#endif
     static_assert((std::is_same<decltype(ftell(fp)), long>::value), "");
     static_assert((std::is_same<decltype(rewind(fp)), void>::value), "");
     static_assert((std::is_same<decltype(clearerr(fp)), void>::value), "");
diff --git a/libcxx/test/std/input.output/file.streams/c.files/cstdio.pass.cpp b/libcxx/test/std/input.output/file.streams/c.files/cstdio.pass.cpp
index af8dc97a5e83..bae82fec69b0 100644
--- a/libcxx/test/std/input.output/file.streams/c.files/cstdio.pass.cpp
+++ b/libcxx/test/std/input.output/file.streams/c.files/cstdio.pass.cpp
@@ -120,9 +120,13 @@ int main(int, char**)
     static_assert((std::is_same<decltype(std::ungetc(0,fp)), int>::value), "");
     static_assert((std::is_same<decltype(std::fread((void*)0,0,0,fp)), std::size_t>::value), "");
     static_assert((std::is_same<decltype(std::fwrite((const void*)0,0,0,fp)), std::size_t>::value), "");
+#ifndef _LIBCPP_HAS_NO_FGETPOS_FSETPOS
     static_assert((std::is_same<decltype(std::fgetpos(fp, &fpos)), int>::value), "");
+#endif
     static_assert((std::is_same<decltype(std::fseek(fp, 0,0)), int>::value), "");
+#ifndef _LIBCPP_HAS_NO_FGETPOS_FSETPOS
     static_assert((std::is_same<decltype(std::fsetpos(fp, &fpos)), int>::value), "");
+#endif
     static_assert((std::is_same<decltype(std::ftell(fp)), long>::value), "");
     static_assert((std::is_same<decltype(std::rewind(fp)), void>::value), "");
     static_assert((std::is_same<decltype(std::clearerr(fp)), void>::value), "");

From 39c9c12b76da27bd52ca1b82c3d39d9c9b59ad0f Mon Sep 17 00:00:00 2001
From: Chris Lattner <clattner@nondot.org>
Date: Wed, 15 Apr 2020 22:27:19 -0700
Subject: [PATCH 084/216] [clang-tools-extra] reimplement PreprocessorTracker
 in terms of StringSet.

Summary:
PreprocessorTracker is the last user of the old StringPool class, which
isn't super loved and isn't a great improvement over a plan StringSet.
Once this goes in we can remove StringPool entirely.

This is as discussed on cfe-dev.

Subscribers: cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D78273
---
 .../modularize/PreprocessorTracker.cpp        | 51 ++++++++-----------
 1 file changed, 20 insertions(+), 31 deletions(-)

diff --git a/clang-tools-extra/modularize/PreprocessorTracker.cpp b/clang-tools-extra/modularize/PreprocessorTracker.cpp
index 26c2923c2983..f8ab2c8067c0 100644
--- a/clang-tools-extra/modularize/PreprocessorTracker.cpp
+++ b/clang-tools-extra/modularize/PreprocessorTracker.cpp
@@ -243,19 +243,19 @@
 //
 //===--------------------------------------------------------------------===//
 
-#include "clang/Lex/LexDiagnostic.h"
 #include "PreprocessorTracker.h"
+#include "ModularizeUtilities.h"
+#include "clang/Lex/LexDiagnostic.h"
 #include "clang/Lex/MacroArgs.h"
 #include "clang/Lex/PPCallbacks.h"
 #include "llvm/ADT/SmallSet.h"
-#include "llvm/Support/StringPool.h"
+#include "llvm/ADT/StringSet.h"
 #include "llvm/Support/raw_ostream.h"
-#include "ModularizeUtilities.h"
 
 namespace Modularize {
 
 // Some handle types
-typedef llvm::PooledStringPtr StringHandle;
+typedef llvm::StringRef StringHandle;
 
 typedef int HeaderHandle;
 const HeaderHandle HeaderHandleInvalid = -1;
@@ -463,19 +463,6 @@ ConditionValueKindStrings[] = {
   "(not evaluated)", "false", "true"
 };
 
-bool operator<(const StringHandle &H1, const StringHandle &H2) {
-  const char *S1 = (H1 ? *H1 : "");
-  const char *S2 = (H2 ? *H2 : "");
-  int Diff = strcmp(S1, S2);
-  return Diff < 0;
-}
-bool operator>(const StringHandle &H1, const StringHandle &H2) {
-  const char *S1 = (H1 ? *H1 : "");
-  const char *S2 = (H2 ? *H2 : "");
-  int Diff = strcmp(S1, S2);
-  return Diff > 0;
-}
-
 // Preprocessor item key.
 //
 // This class represents a location in a source file, for use
@@ -922,7 +909,9 @@ class PreprocessorTrackerImpl : public PreprocessorTracker {
   }
 
   // Lookup/add string.
-  StringHandle addString(llvm::StringRef Str) { return Strings.intern(Str); }
+  StringHandle addString(llvm::StringRef Str) {
+    return Strings.insert(Str).first->first();
+  }
 
   // Convert to a canonical path.
   std::string getCanonicalPath(llvm::StringRef path) const {
@@ -950,7 +939,7 @@ class PreprocessorTrackerImpl : public PreprocessorTracker {
     HeaderHandle H = 0;
     for (auto I = HeaderPaths.begin(), E = HeaderPaths.end(); I != E;
          ++I, ++H) {
-      if (**I == CanonicalPath)
+      if (*I == CanonicalPath)
         return H;
     }
     return HeaderHandleInvalid;
@@ -1143,10 +1132,10 @@ class PreprocessorTrackerImpl : public PreprocessorTracker {
       // Tell caller we found one or more errors.
       ReturnValue = true;
       // Start the error message.
-      OS << *MacroExpTracker.InstanceSourceLine;
+      OS << MacroExpTracker.InstanceSourceLine;
       if (ItemKey.Column > 0)
         OS << std::string(ItemKey.Column - 1, ' ') << "^\n";
-      OS << "error: Macro instance '" << *MacroExpTracker.MacroUnexpanded
+      OS << "error: Macro instance '" << MacroExpTracker.MacroUnexpanded
          << "' has different values in this header, depending on how it was "
             "included.\n";
       // Walk all the instances.
@@ -1154,8 +1143,8 @@ class PreprocessorTrackerImpl : public PreprocessorTracker {
                 EMT = MacroExpTracker.MacroExpansionInstances.end();
            IMT != EMT; ++IMT) {
         MacroExpansionInstance &MacroInfo = *IMT;
-        OS << "  '" << *MacroExpTracker.MacroUnexpanded << "' expanded to: '"
-           << *MacroInfo.MacroExpanded
+        OS << "  '" << MacroExpTracker.MacroUnexpanded << "' expanded to: '"
+           << MacroInfo.MacroExpanded
            << "' with respect to these inclusion paths:\n";
         // Walk all the inclusion path hierarchies.
         for (auto IIP = MacroInfo.InclusionPathHandles.begin(),
@@ -1165,7 +1154,7 @@ class PreprocessorTrackerImpl : public PreprocessorTracker {
           auto Count = (int)ip.size();
           for (int Index = 0; Index < Count; ++Index) {
             HeaderHandle H = ip[Index];
-            OS << std::string((Index * 2) + 4, ' ') << *getHeaderFilePath(H)
+            OS << std::string((Index * 2) + 4, ' ') << getHeaderFilePath(H)
                << "\n";
           }
         }
@@ -1173,7 +1162,7 @@ class PreprocessorTrackerImpl : public PreprocessorTracker {
         // instance location.
         // If there is a definition...
         if (MacroInfo.DefinitionLocation.Line != ItemKey.Line) {
-          OS << *MacroInfo.DefinitionSourceLine;
+          OS << MacroInfo.DefinitionSourceLine;
           if (MacroInfo.DefinitionLocation.Column > 0)
             OS << std::string(MacroInfo.DefinitionLocation.Column - 1, ' ')
                << "^\n";
@@ -1201,13 +1190,13 @@ class PreprocessorTrackerImpl : public PreprocessorTracker {
       // Tell caller we found one or more errors.
       ReturnValue = true;
       // Start the error message.
-      OS << *HeaderPaths[ItemKey.File] << ":" << ItemKey.Line << ":"
+      OS << HeaderPaths[ItemKey.File] << ":" << ItemKey.Line << ":"
          << ItemKey.Column << "\n";
       OS << "#" << getDirectiveSpelling(CondTracker.DirectiveKind) << " "
-         << *CondTracker.ConditionUnexpanded << "\n";
+         << CondTracker.ConditionUnexpanded << "\n";
       OS << "^\n";
       OS << "error: Conditional expression instance '"
-         << *CondTracker.ConditionUnexpanded
+         << CondTracker.ConditionUnexpanded
          << "' has different values in this header, depending on how it was "
             "included.\n";
       // Walk all the instances.
@@ -1215,7 +1204,7 @@ class PreprocessorTrackerImpl : public PreprocessorTracker {
                 EMT = CondTracker.ConditionalExpansionInstances.end();
            IMT != EMT; ++IMT) {
         ConditionalExpansionInstance &MacroInfo = *IMT;
-        OS << "  '" << *CondTracker.ConditionUnexpanded << "' expanded to: '"
+        OS << "  '" << CondTracker.ConditionUnexpanded << "' expanded to: '"
            << ConditionValueKindStrings[MacroInfo.ConditionValue]
            << "' with respect to these inclusion paths:\n";
         // Walk all the inclusion path hierarchies.
@@ -1226,7 +1215,7 @@ class PreprocessorTrackerImpl : public PreprocessorTracker {
           auto Count = (int)ip.size();
           for (int Index = 0; Index < Count; ++Index) {
             HeaderHandle H = ip[Index];
-            OS << std::string((Index * 2) + 4, ' ') << *getHeaderFilePath(H)
+            OS << std::string((Index * 2) + 4, ' ') << getHeaderFilePath(H)
                << "\n";
           }
         }
@@ -1255,7 +1244,7 @@ class PreprocessorTrackerImpl : public PreprocessorTracker {
   llvm::SmallVector<std::string, 32> HeaderList;
   // Only do extern, namespace check for headers in HeaderList.
   bool BlockCheckHeaderListOnly;
-  llvm::StringPool Strings;
+  llvm::StringSet<> Strings;
   std::vector<StringHandle> HeaderPaths;
   std::vector<HeaderHandle> HeaderStack;
   std::vector<HeaderInclusionPath> InclusionPaths;

From 8e8c3c3408481f5219e9bcf8d06c464ae149c3f7 Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Thu, 16 Apr 2020 20:53:32 +0100
Subject: [PATCH 085/216] [ARM] Mir test for machine sinking multiple def
 instructions. NFC

---
 llvm/lib/CodeGen/MachineSink.cpp              |  2 +-
 .../CodeGen/ARM/machine-sink-multidef.mir     | 87 +++++++++++++++++++
 2 files changed, 88 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/ARM/machine-sink-multidef.mir

diff --git a/llvm/lib/CodeGen/MachineSink.cpp b/llvm/lib/CodeGen/MachineSink.cpp
index 01a7be47b62e..74b71eb7ebdf 100644
--- a/llvm/lib/CodeGen/MachineSink.cpp
+++ b/llvm/lib/CodeGen/MachineSink.cpp
@@ -279,7 +279,7 @@ MachineSinking::AllUsesDominatedByBlock(unsigned Reg,
   //
   // %bb.2:
   //     %p = PHI %y, %bb.0, %def, %bb.1
-  if (llvm::all_of(MRI->use_nodbg_operands(Reg), [&](MachineOperand &MO) {
+  if (all_of(MRI->use_nodbg_operands(Reg), [&](MachineOperand &MO) {
         MachineInstr *UseInst = MO.getParent();
         unsigned OpNo = UseInst->getOperandNo(&MO);
         MachineBasicBlock *UseBlock = UseInst->getParent();
diff --git a/llvm/test/CodeGen/ARM/machine-sink-multidef.mir b/llvm/test/CodeGen/ARM/machine-sink-multidef.mir
new file mode 100644
index 000000000000..f0de852d319c
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/machine-sink-multidef.mir
@@ -0,0 +1,87 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc %s -o - -run-pass=machine-sink -mtriple=arm-none-eabi | FileCheck %s
+
+--- |
+  target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+  target triple = "arm-none-unknown-eabi"
+
+  %struct.anon = type { i32, i32 }
+
+  @e = external constant [2 x %struct.anon], align 4
+
+  define arm_aapcscc void @g(i32 * noalias %a, i32 *%b, i32 %x) {
+  entry:
+    %c = getelementptr inbounds [2 x %struct.anon], [2 x %struct.anon]* @e, i32 0, i32 %x, i32 0
+    %l1 = load i32, i32* %c, align 4
+    %d = getelementptr inbounds [2 x %struct.anon], [2 x %struct.anon]* @e, i32 0, i32 %x, i32 1
+    %l2 = load i32, i32* %d, align 4
+    br i1 undef, label %land.lhs.true, label %if.end
+
+  land.lhs.true:                                    ; preds = %entry
+    br label %if.end
+
+  if.end:                                           ; preds = %land.lhs.true, %entry
+    %h.0 = phi i32 [ %l1, %entry ], [ 0, %land.lhs.true ]
+    ret void
+  }
+
+...
+---
+name:            g
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gpr, preferred-register: '' }
+  - { id: 1, class: gpr, preferred-register: '' }
+  - { id: 2, class: gpr, preferred-register: '' }
+  - { id: 6, class: gpr, preferred-register: '' }
+  - { id: 7, class: gpr, preferred-register: '' }
+  - { id: 8, class: gpr, preferred-register: '' }
+  - { id: 9, class: gprnopc, preferred-register: '' }
+liveins:
+  - { reg: '$r0', virtual-reg: '%8' }
+  - { reg: '$r1', virtual-reg: '%9' }
+liveins:         []
+body:             |
+  ; CHECK-LABEL: name: g
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.1(0x40000000), %bb.3(0x40000000)
+  ; CHECK:   liveins: $r0, $r1
+  ; CHECK:   [[COPY:%[0-9]+]]:gpr = COPY $r1
+  ; CHECK:   [[COPY1:%[0-9]+]]:gprnopc = COPY $r0
+  ; CHECK:   [[LDR_PRE_REG:%[0-9]+]]:gpr, [[LDR_PRE_REG1:%[0-9]+]]:gpr = LDR_PRE_REG [[COPY]], killed [[COPY1]], 16387, 14 /* CC::al */, $noreg :: (load 4 from %ir.c)
+  ; CHECK:   [[MOVi:%[0-9]+]]:gpr = MOVi 0, 14 /* CC::al */, $noreg, $noreg
+  ; CHECK:   CMPri [[MOVi]], 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
+  ; CHECK:   Bcc %bb.1, 0 /* CC::eq */, $cpsr
+  ; CHECK: bb.3:
+  ; CHECK:   successors: %bb.2(0x80000000)
+  ; CHECK:   B %bb.2
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.2(0x80000000)
+  ; CHECK: bb.2:
+  ; CHECK:   [[PHI:%[0-9]+]]:gpr = PHI [[LDR_PRE_REG]], %bb.3, [[MOVi]], %bb.1
+  ; CHECK:   CMPri [[MOVi]], 0, 14 /* CC::al */, $noreg, implicit-def $cpsr
+  ; CHECK:   [[LDRi12_:%[0-9]+]]:gpr = LDRi12 killed [[LDR_PRE_REG1]], 4, 14 /* CC::al */, $noreg :: (load 4 from %ir.d)
+  ; CHECK:   MOVPCLR 14 /* CC::al */, $noreg
+  bb.0:
+    liveins: $r0, $r1
+    successors: %bb.1(0x40000000), %bb.2(0x40000000)
+
+    %8:gpr = COPY $r1
+    %9:gprnopc = COPY $r0
+    %0:gpr, %6:gpr = LDR_PRE_REG %8, killed %9, 16387, 14, $noreg :: (load 4 from %ir.c)
+    %7:gpr = MOVi 0, 14, $noreg, $noreg
+    CMPri %7, 0, 14, $noreg, implicit-def $cpsr
+    Bcc %bb.2, 1, $cpsr
+    B %bb.1
+
+  bb.1:
+    successors: %bb.2(0x80000000)
+
+  bb.2:
+
+    %2:gpr = PHI %0, %bb.0, %7, %bb.1
+    CMPri %7, 0, 14, $noreg, implicit-def $cpsr
+    %1:gpr = LDRi12 killed %6, 4, 14, $noreg :: (load 4 from %ir.d)
+    MOVPCLR 14, $noreg
+
+...

From 8cac6d1875e094f2b78621f3ff12e61553cd12ec Mon Sep 17 00:00:00 2001
From: Davide Italiano <ditaliano@apple.com>
Date: Thu, 16 Apr 2020 13:22:06 -0700
Subject: [PATCH 086/216] [Shell] Remove incorrectly cargo-culted UNSUPPORTED.

Let's see if this sticks on the bots.
---
 lldb/test/Shell/SymbolFile/DWARF/static_scope.s | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lldb/test/Shell/SymbolFile/DWARF/static_scope.s b/lldb/test/Shell/SymbolFile/DWARF/static_scope.s
index 17b248579849..84a69e08ecfc 100644
--- a/lldb/test/Shell/SymbolFile/DWARF/static_scope.s
+++ b/lldb/test/Shell/SymbolFile/DWARF/static_scope.s
@@ -2,7 +2,6 @@
 # variable `b`, which is `local` and not `static`.
 
 # REQUIRES: x86
-# UNSUPPORTED: lldb-repro
 
 # RUN: llvm-mc -triple=x86_64-apple-macosx10.15.0 -filetype=obj %s > %t.o
 # RUN: lldb-test symbols %t.o | FileCheck %s

From 1fae85a8534ec51ca893899314bd244b3e9684c7 Mon Sep 17 00:00:00 2001
From: Davide Italiano <ditaliano@apple.com>
Date: Thu, 16 Apr 2020 13:31:32 -0700
Subject: [PATCH 087/216] [DWARF] Add instructions to regenerate this test, if
 needed.

---
 lldb/test/Shell/SymbolFile/DWARF/static_scope.s | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/lldb/test/Shell/SymbolFile/DWARF/static_scope.s b/lldb/test/Shell/SymbolFile/DWARF/static_scope.s
index 84a69e08ecfc..02d497ac9ccb 100644
--- a/lldb/test/Shell/SymbolFile/DWARF/static_scope.s
+++ b/lldb/test/Shell/SymbolFile/DWARF/static_scope.s
@@ -3,6 +3,15 @@
 
 # REQUIRES: x86
 
+# Original test case (for future reference), compiled with:
+# $ clang-10 -g -Og test.c -o test
+# $ cat test.c
+# volatile int a;
+# main() {
+#   int b = 3;
+#   a;
+# }
+
 # RUN: llvm-mc -triple=x86_64-apple-macosx10.15.0 -filetype=obj %s > %t.o
 # RUN: lldb-test symbols %t.o | FileCheck %s
 

From f0612957324b287d8bf3e00cac3dc3d48ba6d414 Mon Sep 17 00:00:00 2001
From: Stephen Neuendorffer <stephen.neuendorffer@xilinx.com>
Date: Thu, 16 Apr 2020 11:11:13 -0700
Subject: [PATCH 088/216] [MLIR] Complete refactoring of Affine dialect into
 sub-libraries.

There were some unused CMakeFiles for Affine/IR and Affine/EDSC.
This change builds separate MLIRAffineOps and MLIRAffineEDSC libraries
using those CMakeFiles.  This combination replaces the old MLIRAffine
library.

Differential Revision: https://reviews.llvm.org/D78317
---
 mlir/lib/Analysis/CMakeLists.txt              |  4 ++--
 .../AffineToStandard/CMakeLists.txt           |  2 +-
 mlir/lib/Conversion/LoopsToGPU/CMakeLists.txt |  2 +-
 .../Conversion/VectorToLoops/CMakeLists.txt   |  1 +
 mlir/lib/Dialect/Affine/CMakeLists.txt        | 22 ++-----------------
 mlir/lib/Dialect/Affine/EDSC/CMakeLists.txt   |  7 +++---
 mlir/lib/Dialect/Affine/IR/CMakeLists.txt     |  4 ++--
 .../Dialect/Affine/Transforms/CMakeLists.txt  |  2 +-
 mlir/lib/Dialect/Affine/Utils/CMakeLists.txt  |  2 +-
 mlir/lib/Dialect/Linalg/EDSC/CMakeLists.txt   |  3 ++-
 .../Dialect/Linalg/Transforms/CMakeLists.txt  |  2 +-
 mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt  |  2 +-
 .../Dialect/LoopOps/Transforms/CMakeLists.txt |  2 +-
 mlir/lib/Dialect/Vector/CMakeLists.txt        |  2 +-
 mlir/lib/Transforms/CMakeLists.txt            |  2 +-
 mlir/lib/Transforms/Utils/CMakeLists.txt      |  2 +-
 mlir/test/EDSC/CMakeLists.txt                 |  3 ++-
 mlir/test/lib/Transforms/CMakeLists.txt       |  2 +-
 18 files changed, 26 insertions(+), 40 deletions(-)

diff --git a/mlir/lib/Analysis/CMakeLists.txt b/mlir/lib/Analysis/CMakeLists.txt
index 262bc7e8a588..f9c0236e3d4e 100644
--- a/mlir/lib/Analysis/CMakeLists.txt
+++ b/mlir/lib/Analysis/CMakeLists.txt
@@ -24,7 +24,7 @@ add_mlir_library(MLIRAnalysis
 
 target_link_libraries(MLIRAnalysis
   PUBLIC
-  MLIRAffine
+  MLIRAffineOps
   MLIRCallInterfaces
   MLIRControlFlowInterfaces
   MLIRInferTypeOpInterface
@@ -44,7 +44,7 @@ add_mlir_library(MLIRLoopAnalysis
 
 target_link_libraries(MLIRLoopAnalysis
   PUBLIC
-  MLIRAffine
+  MLIRAffineOps
   MLIRCallInterfaces
   MLIRControlFlowInterfaces
   MLIRInferTypeOpInterface
diff --git a/mlir/lib/Conversion/AffineToStandard/CMakeLists.txt b/mlir/lib/Conversion/AffineToStandard/CMakeLists.txt
index 821a1deb0a15..9324f7b21c80 100644
--- a/mlir/lib/Conversion/AffineToStandard/CMakeLists.txt
+++ b/mlir/lib/Conversion/AffineToStandard/CMakeLists.txt
@@ -10,7 +10,7 @@ add_mlir_conversion_library(MLIRAffineToStandard
 target_link_libraries(
   MLIRAffineToStandard
   PUBLIC
-  MLIRAffine
+  MLIRAffineOps
   MLIRLoopOps
   MLIRPass
   MLIRStandardOps
diff --git a/mlir/lib/Conversion/LoopsToGPU/CMakeLists.txt b/mlir/lib/Conversion/LoopsToGPU/CMakeLists.txt
index 2c62755eebc8..5b31429a0d0c 100644
--- a/mlir/lib/Conversion/LoopsToGPU/CMakeLists.txt
+++ b/mlir/lib/Conversion/LoopsToGPU/CMakeLists.txt
@@ -10,7 +10,7 @@ add_mlir_conversion_library(MLIRLoopsToGPU
 )
 target_link_libraries(MLIRLoopsToGPU
   PUBLIC
-  MLIRAffine
+  MLIRAffineOps
   MLIRAffineToStandard
   MLIRGPU
   MLIRIR
diff --git a/mlir/lib/Conversion/VectorToLoops/CMakeLists.txt b/mlir/lib/Conversion/VectorToLoops/CMakeLists.txt
index 71aae34e4c0c..41414fc9bea7 100644
--- a/mlir/lib/Conversion/VectorToLoops/CMakeLists.txt
+++ b/mlir/lib/Conversion/VectorToLoops/CMakeLists.txt
@@ -6,6 +6,7 @@ add_mlir_conversion_library(MLIRVectorToLoops
 )
 set(LIBS
   MLIREDSC
+  MLIRAffineEDSC
   MLIRLLVMIR
   MLIRTransforms
   LLVMCore
diff --git a/mlir/lib/Dialect/Affine/CMakeLists.txt b/mlir/lib/Dialect/Affine/CMakeLists.txt
index 95cf0a44f21b..018cb0ba94a5 100644
--- a/mlir/lib/Dialect/Affine/CMakeLists.txt
+++ b/mlir/lib/Dialect/Affine/CMakeLists.txt
@@ -1,22 +1,4 @@
-add_mlir_dialect_library(MLIRAffine
-  IR/AffineOps.cpp
-  IR/AffineValueMap.cpp
-  EDSC/Builders.cpp
-
-  ADDITIONAL_HEADER_DIRS
-  ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Affine
-
-  DEPENDS
-  MLIRAffineOpsIncGen
-  )
-target_link_libraries(MLIRAffine
-  PUBLIC
-  MLIREDSC
-  MLIRIR
-  MLIRLoopLikeInterface
-  MLIRSideEffects
-  MLIRStandardOps
-  )
-
+add_subdirectory(IR)
+add_subdirectory(EDSC)
 add_subdirectory(Transforms)
 add_subdirectory(Utils)
diff --git a/mlir/lib/Dialect/Affine/EDSC/CMakeLists.txt b/mlir/lib/Dialect/Affine/EDSC/CMakeLists.txt
index 751bfd351bc6..a07905b71fbc 100644
--- a/mlir/lib/Dialect/Affine/EDSC/CMakeLists.txt
+++ b/mlir/lib/Dialect/Affine/EDSC/CMakeLists.txt
@@ -1,5 +1,5 @@
-add_mlir_dialect_library(MLIRAffine
-  EDSC/Builders.cpp
+add_mlir_dialect_library(MLIRAffineEDSC
+  Builders.cpp
 
   ADDITIONAL_HEADER_DIRS
   ${MLIR_MAIN_INCLUDE_DIR}/mlir/Dialect/Affine
@@ -7,8 +7,9 @@ add_mlir_dialect_library(MLIRAffine
   DEPENDS
   MLIRAffineOpsIncGen
   )
-target_link_libraries(MLIRAffine
+target_link_libraries(MLIRAffineEDSC
   PUBLIC
+  MLIRAffineOps
   MLIREDSC
   MLIRIR
   MLIRLoopLikeInterface
diff --git a/mlir/lib/Dialect/Affine/IR/CMakeLists.txt b/mlir/lib/Dialect/Affine/IR/CMakeLists.txt
index 91dcceaf3912..8d186ae03b54 100644
--- a/mlir/lib/Dialect/Affine/IR/CMakeLists.txt
+++ b/mlir/lib/Dialect/Affine/IR/CMakeLists.txt
@@ -1,4 +1,4 @@
-add_mlir_dialect_library(MLIRAffine
+add_mlir_dialect_library(MLIRAffineOps
   AffineOps.cpp
   AffineValueMap.cpp
 
@@ -8,7 +8,7 @@ add_mlir_dialect_library(MLIRAffine
   DEPENDS
   MLIRAffineOpsIncGen
   )
-target_link_libraries(MLIRAffine
+target_link_libraries(MLIRAffineOps
   PUBLIC
   MLIREDSC
   MLIRIR
diff --git a/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt
index 833736965776..bcad44d72497 100644
--- a/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt
@@ -17,7 +17,7 @@ add_mlir_dialect_library(MLIRAffineTransforms
   )
 target_link_libraries(MLIRAffineTransforms
   PUBLIC
-  MLIRAffine
+  MLIRAffineOps
   MLIREDSC
   MLIRIR
   MLIRPass
diff --git a/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt b/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt
index ac9dae972488..ed3b5b8b1723 100644
--- a/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt
+++ b/mlir/lib/Dialect/Affine/Utils/CMakeLists.txt
@@ -7,6 +7,6 @@ add_mlir_dialect_library(MLIRAffineUtils
   )
 target_link_libraries(MLIRAffineUtils
   PUBLIC
-  MLIRAffine
+  MLIRAffineOps
   MLIRTransformUtils
   )
diff --git a/mlir/lib/Dialect/Linalg/EDSC/CMakeLists.txt b/mlir/lib/Dialect/Linalg/EDSC/CMakeLists.txt
index bc9e244d4ad5..8ec3c6dd7d22 100644
--- a/mlir/lib/Dialect/Linalg/EDSC/CMakeLists.txt
+++ b/mlir/lib/Dialect/Linalg/EDSC/CMakeLists.txt
@@ -12,7 +12,8 @@ target_link_libraries(MLIRLinalgEDSC
   PUBLIC
   MLIREDSC
   MLIRIR
-  MLIRAffine
+  MLIRAffineOps
+  MLIRAffineEDSC
   MLIRLinalgOps
   MLIRLoopOps
   MLIRStandardOps
diff --git a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt
index 93f7142ce2df..c8e74ea30e8d 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Linalg/Transforms/CMakeLists.txt
@@ -15,7 +15,7 @@ add_mlir_dialect_library(MLIRLinalgTransforms
   )
 target_link_libraries(MLIRLinalgTransforms
   PUBLIC
-  MLIRAffine
+  MLIRAffineOps
   MLIRAnalysis
   MLIREDSC
   MLIRIR
diff --git a/mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt b/mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt
index f9ad613f2a17..681a47d31271 100644
--- a/mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt
+++ b/mlir/lib/Dialect/Linalg/Utils/CMakeLists.txt
@@ -9,7 +9,7 @@ add_mlir_dialect_library(MLIRLinalgUtils
 
 target_link_libraries(MLIRLinalgUtils
   PUBLIC
-  MLIRAffine
+  MLIRAffineOps
   MLIREDSC
   MLIRIR
   MLIRLinalgOps
diff --git a/mlir/lib/Dialect/LoopOps/Transforms/CMakeLists.txt b/mlir/lib/Dialect/LoopOps/Transforms/CMakeLists.txt
index e3ec12b4b21d..13a6aa6f288a 100644
--- a/mlir/lib/Dialect/LoopOps/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/LoopOps/Transforms/CMakeLists.txt
@@ -11,7 +11,7 @@ add_mlir_dialect_library(MLIRLoopOpsTransforms
   )
 target_link_libraries(MLIRLoopOpsTransforms
   PUBLIC
-  MLIRAffine
+  MLIRAffineOps
   MLIRIR
   MLIRPass
   MLIRLoopOps
diff --git a/mlir/lib/Dialect/Vector/CMakeLists.txt b/mlir/lib/Dialect/Vector/CMakeLists.txt
index e5e1251768cd..3e1d8de0d3ba 100644
--- a/mlir/lib/Dialect/Vector/CMakeLists.txt
+++ b/mlir/lib/Dialect/Vector/CMakeLists.txt
@@ -16,7 +16,7 @@ target_link_libraries(MLIRVector
   MLIREDSC
   MLIRIR
   MLIRStandardOps
-  MLIRAffine
+  MLIRAffineOps
   MLIRLoopOps
   MLIRLoopAnalysis
   MLIRSideEffects
diff --git a/mlir/lib/Transforms/CMakeLists.txt b/mlir/lib/Transforms/CMakeLists.txt
index 49704229ad14..4f562ff90663 100644
--- a/mlir/lib/Transforms/CMakeLists.txt
+++ b/mlir/lib/Transforms/CMakeLists.txt
@@ -28,7 +28,7 @@ add_mlir_library(MLIRTransforms
 
 target_link_libraries(MLIRTransforms
   PUBLIC
-  MLIRAffine
+  MLIRAffineOps
   MLIRAnalysis
   MLIRLoopLikeInterface
   MLIRLoopOps
diff --git a/mlir/lib/Transforms/Utils/CMakeLists.txt b/mlir/lib/Transforms/Utils/CMakeLists.txt
index e28a97c18231..1e0442179bf4 100644
--- a/mlir/lib/Transforms/Utils/CMakeLists.txt
+++ b/mlir/lib/Transforms/Utils/CMakeLists.txt
@@ -16,7 +16,7 @@ add_mlir_library(MLIRTransformUtils
 
 target_link_libraries(MLIRTransformUtils
   PUBLIC
-  MLIRAffine
+  MLIRAffineOps
   MLIRAnalysis
   MLIRLoopAnalysis
   MLIRLoopOps
diff --git a/mlir/test/EDSC/CMakeLists.txt b/mlir/test/EDSC/CMakeLists.txt
index 6592f8cccd1d..d8e3be8f2079 100644
--- a/mlir/test/EDSC/CMakeLists.txt
+++ b/mlir/test/EDSC/CMakeLists.txt
@@ -6,7 +6,8 @@ llvm_update_compile_flags(mlir-edsc-builder-api-test)
 
 target_link_libraries(mlir-edsc-builder-api-test
   PRIVATE
-  MLIRAffine
+  MLIRAffineOps
+  MLIRAffineEDSC
   MLIREDSC
   MLIRIR
   MLIRLinalgEDSC
diff --git a/mlir/test/lib/Transforms/CMakeLists.txt b/mlir/test/lib/Transforms/CMakeLists.txt
index 23107f223b9c..0417bee750ff 100644
--- a/mlir/test/lib/Transforms/CMakeLists.txt
+++ b/mlir/test/lib/Transforms/CMakeLists.txt
@@ -37,7 +37,7 @@ include_directories(${CMAKE_CURRENT_BINARY_DIR}/../DeclarativeTransforms)
 
 target_link_libraries(MLIRTestTransforms
   PUBLIC
-  MLIRAffine
+  MLIRAffineOps
   MLIRAnalysis
   MLIREDSC
   MLIRGPU

From 3b222ef246ef9298e57d00b7c8c1ffd92d0fb44d Mon Sep 17 00:00:00 2001
From: Daniel S Fava <danielsfava@gmail.com>
Date: Thu, 16 Apr 2020 22:53:26 +0200
Subject: [PATCH 089/216] tsan: fixes to ThreadClock::releaseStoreAcquire and
 tests

Fixes:
1. Setting the number of entries in a thread's clock to max between
   the thread and the SyncClock the thread is acquiring from
2. Setting last_acquire_

Unit- and stress-test for releaseStoreAcquire added to
tests/unit/tsan_clock_test.cpp
---
 compiler-rt/lib/tsan/rtl/tsan_clock.cpp       | 17 ++++++-
 .../lib/tsan/tests/unit/tsan_clock_test.cpp   | 45 ++++++++++++++++++-
 2 files changed, 59 insertions(+), 3 deletions(-)

diff --git a/compiler-rt/lib/tsan/rtl/tsan_clock.cpp b/compiler-rt/lib/tsan/rtl/tsan_clock.cpp
index 96bbfa1d4cc2..acbcf804194a 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_clock.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_clock.cpp
@@ -196,23 +196,36 @@ void ThreadClock::releaseStoreAcquire(ClockCache *c, SyncClock *sc) {
     return;
   }
 
-  // Check if we need to resize dst.
+  nclk_ = max(nclk_, (uptr) sc->size_);
+
+  // Check if we need to resize sc.
   if (sc->size_ < nclk_)
     sc->Resize(c, nclk_);
 
+  bool acquired = false;
+
   sc->Unshare(c);
   // Update sc->clk_.
   sc->FlushDirty();
   uptr i = 0;
   for (ClockElem &ce : *sc) {
     u64 tmp = clk_[i];
-    clk_[i] = max(ce.epoch, clk_[i]);
+    if (clk_[i] < ce.epoch) {
+      clk_[i] = ce.epoch;
+      acquired = true;
+    }
     ce.epoch = tmp;
     ce.reused = 0;
     i++;
   }
   sc->release_store_tid_ = kInvalidTid;
   sc->release_store_reused_ = 0;
+
+  if (acquired) {
+    CPP_STAT_INC(StatClockAcquiredSomething);
+    last_acquire_ = clk_[tid_];
+    ResetCached(c);
+  }
 }
 
 void ThreadClock::release(ClockCache *c, SyncClock *dst) {
diff --git a/compiler-rt/lib/tsan/tests/unit/tsan_clock_test.cpp b/compiler-rt/lib/tsan/tests/unit/tsan_clock_test.cpp
index 6d835ba85c3b..cdaaf30b1b20 100644
--- a/compiler-rt/lib/tsan/tests/unit/tsan_clock_test.cpp
+++ b/compiler-rt/lib/tsan/tests/unit/tsan_clock_test.cpp
@@ -108,6 +108,31 @@ TEST(Clock, RepeatedAcquire) {
   sync.Reset(&cache);
 }
 
+TEST(Clock, releaseStoreAcquire) {
+  ThreadClock thr0(0);
+  thr0.tick();
+  ThreadClock thr1(1);
+  thr1.tick();
+  SyncClock syncA;
+  SyncClock syncB;
+  ASSERT_EQ(syncA.size(), 0U);
+  ASSERT_EQ(syncB.size(), 0U);
+  thr1.releaseStoreAcquire(&cache, &syncB);
+  ASSERT_EQ(syncB.size(), 2U); // T0 and T1
+  // releaseStoreAcquire to an empty SyncClock
+  thr0.releaseStoreAcquire(&cache, &syncA);
+  ASSERT_EQ(syncA.size(), 1U);
+  // releaseStoreAcquire from a non-empty SyncClock
+  // T0 learns about T1
+  thr0.releaseStoreAcquire(&cache, &syncB);
+  // releaseStoreAcquire to the originally empty SyncClock
+  // T0 deposits info about T1 into syncA
+  thr0.releaseStoreAcquire(&cache, &syncA);
+  ASSERT_EQ(syncA.size(), 2U);
+  syncA.Reset(&cache);
+  syncB.Reset(&cache);
+}
+
 TEST(Clock, ManyThreads) {
   SyncClock chunked;
   for (unsigned i = 0; i < 200; i++) {
@@ -336,6 +361,18 @@ struct SimpleThreadClock {
       dst->clock[i] = max(dst->clock[i], clock[i]);
   }
 
+  void releaseStoreAcquire(SimpleSyncClock *sc) {
+    if (sc->size < size)
+      sc->size = size;
+    else
+      size = sc->size;
+    for (uptr i = 0; i < kThreads; i++) {
+      uptr tmp = clock[i];
+      clock[i] = max(sc->clock[i], clock[i]);
+      sc->clock[i] = tmp;
+    }
+  }
+
   void acq_rel(SimpleSyncClock *dst) {
     acquire(dst);
     release(dst);
@@ -390,7 +427,7 @@ static bool ClockFuzzer(bool printing) {
     thr0[tid]->tick();
     thr1[tid]->tick();
 
-    switch (rand() % 6) {
+    switch (rand() % 7) {
     case 0:
       if (printing)
         printf("acquire thr%d <- clk%d\n", tid, cid);
@@ -422,6 +459,12 @@ static bool ClockFuzzer(bool printing) {
       sync1[cid]->Reset(&cache);
       break;
     case 5:
+      if (printing)
+        printf("releaseStoreAcquire thr%d -> clk%d\n", tid, cid);
+      thr0[tid]->releaseStoreAcquire(sync0[cid]);
+      thr1[tid]->releaseStoreAcquire(&cache, sync1[cid]);
+      break;
+    case 6:
       if (printing)
         printf("reset thr%d\n", tid);
       u64 epoch = thr0[tid]->clock[tid] + 1;

From 9f6a308457d1bfebf1cee94b0306e738f270e512 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Thu, 16 Apr 2020 14:02:05 -0700
Subject: [PATCH 090/216] [lldb/Utility] Fix a bug in stringify_append for
 printing addresses.

The recent change in the API macros revealed that we were not printing
the pointer address for a bunch of methods, but rather the address of
the pointer. It's something I had already noticed while looking at some
reproducer traces, but hadn't made it to the top of my list yet. This
fixes the issue by providing a more specific overload.
---
 lldb/include/lldb/Utility/ReproducerInstrumentation.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/lldb/include/lldb/Utility/ReproducerInstrumentation.h b/lldb/include/lldb/Utility/ReproducerInstrumentation.h
index 3728e19386d1..3b5dde3d2e2a 100644
--- a/lldb/include/lldb/Utility/ReproducerInstrumentation.h
+++ b/lldb/include/lldb/Utility/ReproducerInstrumentation.h
@@ -32,6 +32,11 @@ inline void stringify_append(llvm::raw_string_ostream &ss, const T &t) {
   ss << &t;
 }
 
+template <typename T>
+inline void stringify_append(llvm::raw_string_ostream &ss, T *t) {
+  ss << reinterpret_cast<void *>(t);
+}
+
 template <typename T>
 inline void stringify_append(llvm::raw_string_ostream &ss, const T *t) {
   ss << reinterpret_cast<const void *>(t);
@@ -115,7 +120,7 @@ template <typename... Ts> inline std::string stringify_args(const Ts &... ts) {
 
 #define LLDB_CONSTRUCT_(T, ...)                                                \
   lldb_private::repro::Recorder _recorder(LLVM_PRETTY_FUNCTION,                \
-                                          stringify_args(__VA_ARGS__));        \
+                                          stringify_args(this, __VA_ARGS__));  \
   if (lldb_private::repro::InstrumentationData _data =                         \
           LLDB_GET_INSTRUMENTATION_DATA()) {                                   \
     _recorder.Record(_data.GetSerializer(), _data.GetRegistry(),               \

From 94052da92961242771bd95a8997a3485c20d9740 Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Tue, 14 Apr 2020 18:49:15 +0100
Subject: [PATCH 091/216] [ARM] MVE postinc tests. NFC

---
 .../CodeGen/Thumb2/mve-postinc-distribute.ll  | 394 ++++++++++++++++++
 1 file changed, 394 insertions(+)
 create mode 100644 llvm/test/CodeGen/Thumb2/mve-postinc-distribute.ll

diff --git a/llvm/test/CodeGen/Thumb2/mve-postinc-distribute.ll b/llvm/test/CodeGen/Thumb2/mve-postinc-distribute.ll
new file mode 100644
index 000000000000..ba588166522b
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/mve-postinc-distribute.ll
@@ -0,0 +1,394 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s
+
+; Check some loop postinc's for properly distributed post-incs
+
+define i32 @vaddv(i32* nocapture readonly %data, i32 %N) {
+; CHECK-LABEL: vaddv:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r7, lr}
+; CHECK-NEXT:    push {r7, lr}
+; CHECK-NEXT:    mov lr, r1
+; CHECK-NEXT:    cmp r1, #1
+; CHECK-NEXT:    blt .LBB0_4
+; CHECK-NEXT:  @ %bb.1: @ %for.body.preheader
+; CHECK-NEXT:    mov r1, r0
+; CHECK-NEXT:    movs r0, #0
+; CHECK-NEXT:    dls lr, lr
+; CHECK-NEXT:  .LBB0_2: @ %for.body
+; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    vldrw.u32 q0, [r1]
+; CHECK-NEXT:    vaddva.s32 r0, q0
+; CHECK-NEXT:    vldrw.u32 q0, [r1, #16]
+; CHECK-NEXT:    adds r1, #32
+; CHECK-NEXT:    vaddva.s32 r0, q0
+; CHECK-NEXT:    le lr, .LBB0_2
+; CHECK-NEXT:  @ %bb.3: @ %for.cond.cleanup
+; CHECK-NEXT:    pop {r7, pc}
+; CHECK-NEXT:  .LBB0_4:
+; CHECK-NEXT:    movs r0, #0
+; CHECK-NEXT:    pop {r7, pc}
+entry:
+  %cmp11 = icmp sgt i32 %N, 0
+  br i1 %cmp11, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %x.0.lcssa = phi i32 [ 0, %entry ], [ %7, %for.body ]
+  ret i32 %x.0.lcssa
+
+for.body:                                         ; preds = %entry, %for.body
+  %data.addr.014 = phi i32* [ %add.ptr1, %for.body ], [ %data, %entry ]
+  %i.013 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %x.012 = phi i32 [ %7, %for.body ], [ 0, %entry ]
+  %0 = bitcast i32* %data.addr.014 to <4 x i32>*
+  %1 = load <4 x i32>, <4 x i32>* %0, align 4
+  %2 = tail call i32 @llvm.arm.mve.addv.v4i32(<4 x i32> %1, i32 0)
+  %3 = add i32 %2, %x.012
+  %add.ptr = getelementptr inbounds i32, i32* %data.addr.014, i32 4
+  %4 = bitcast i32* %add.ptr to <4 x i32>*
+  %5 = load <4 x i32>, <4 x i32>* %4, align 4
+  %6 = tail call i32 @llvm.arm.mve.addv.v4i32(<4 x i32> %5, i32 0)
+  %7 = add i32 %3, %6
+  %add.ptr1 = getelementptr inbounds i32, i32* %data.addr.014, i32 8
+  %inc = add nuw nsw i32 %i.013, 1
+  %exitcond = icmp eq i32 %inc, %N
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+define void @arm_cmplx_dot_prod_q15(i16* nocapture readonly %pSrcA, i16* nocapture readonly %pSrcB, i32 %numSamples, i32* nocapture %realResult, i32* nocapture %imagResult) {
+; CHECK-LABEL: arm_cmplx_dot_prod_q15:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    mvn r7, #7
+; CHECK-NEXT:    mov.w r12, #0
+; CHECK-NEXT:    add.w r7, r7, r2, lsl #1
+; CHECK-NEXT:    vldrh.u16 q0, [r0]
+; CHECK-NEXT:    vldrh.u16 q1, [r1]
+; CHECK-NEXT:    movs r4, #0
+; CHECK-NEXT:    lsr.w lr, r7, #3
+; CHECK-NEXT:    mov r7, r12
+; CHECK-NEXT:    mov r9, r12
+; CHECK-NEXT:    wls lr, lr, .LBB1_4
+; CHECK-NEXT:  @ %bb.1: @ %while.body.preheader
+; CHECK-NEXT:    mov.w r9, #0
+; CHECK-NEXT:    add.w r8, r0, lr, lsl #5
+; CHECK-NEXT:    add.w r6, r0, #32
+; CHECK-NEXT:    add.w r0, r1, #32
+; CHECK-NEXT:    lsl.w r5, lr, #4
+; CHECK-NEXT:    mov r4, r9
+; CHECK-NEXT:    movs r7, #0
+; CHECK-NEXT:    mov r12, r9
+; CHECK-NEXT:  .LBB1_2: @ %while.body
+; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    vldrh.u16 q2, [r0, #-16]
+; CHECK-NEXT:    vldrh.u16 q3, [r6, #-16]
+; CHECK-NEXT:    add.w r10, r6, #32
+; CHECK-NEXT:    add.w r11, r0, #32
+; CHECK-NEXT:    vmlaldavax.s16 r4, r9, q0, q1
+; CHECK-NEXT:    vmlsldava.s16 r12, r7, q0, q1
+; CHECK-NEXT:    vldrh.u16 q0, [r6]
+; CHECK-NEXT:    vldrh.u16 q1, [r0]
+; CHECK-NEXT:    vmlaldavax.s16 r4, r9, q3, q2
+; CHECK-NEXT:    vmlsldava.s16 r12, r7, q3, q2
+; CHECK-NEXT:    mov r6, r10
+; CHECK-NEXT:    mov r0, r11
+; CHECK-NEXT:    le lr, .LBB1_2
+; CHECK-NEXT:  @ %bb.3: @ %while.cond.while.end_crit_edge
+; CHECK-NEXT:    add.w r1, r1, r5, lsl #1
+; CHECK-NEXT:    mov r0, r8
+; CHECK-NEXT:  .LBB1_4: @ %while.end
+; CHECK-NEXT:    vmlaldavax.s16 r4, r9, q0, q1
+; CHECK-NEXT:    vmlsldava.s16 r12, r7, q0, q1
+; CHECK-NEXT:    mov r10, r4
+; CHECK-NEXT:    mov r5, r9
+; CHECK-NEXT:    lsrl r10, r5, #6
+; CHECK-NEXT:    ldr.w r8, [sp, #36]
+; CHECK-NEXT:    mov r6, r12
+; CHECK-NEXT:    mov r5, r7
+; CHECK-NEXT:    and lr, r2, #3
+; CHECK-NEXT:    lsrl r6, r5, #6
+; CHECK-NEXT:    wls lr, lr, .LBB1_7
+; CHECK-NEXT:  .LBB1_5: @ %while.body11
+; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    ldrsh.w r5, [r0, #2]
+; CHECK-NEXT:    ldrsh.w r6, [r1]
+; CHECK-NEXT:    ldrsh.w r10, [r0]
+; CHECK-NEXT:    adds r0, #4
+; CHECK-NEXT:    ldrsh.w r2, [r1, #2]
+; CHECK-NEXT:    adds r1, #4
+; CHECK-NEXT:    smlalbb r4, r9, r6, r5
+; CHECK-NEXT:    smlalbb r12, r7, r6, r10
+; CHECK-NEXT:    muls r5, r2, r5
+; CHECK-NEXT:    smlalbb r4, r9, r2, r10
+; CHECK-NEXT:    subs.w r12, r12, r5
+; CHECK-NEXT:    sbc.w r7, r7, r5, asr #31
+; CHECK-NEXT:    le lr, .LBB1_5
+; CHECK-NEXT:  @ %bb.6: @ %while.end34.loopexit
+; CHECK-NEXT:    lsrl r12, r7, #6
+; CHECK-NEXT:    lsrl r4, r9, #6
+; CHECK-NEXT:    mov r6, r12
+; CHECK-NEXT:    mov r10, r4
+; CHECK-NEXT:  .LBB1_7: @ %while.end34
+; CHECK-NEXT:    str r6, [r3]
+; CHECK-NEXT:    str.w r10, [r8]
+; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+entry:
+  %mul = shl i32 %numSamples, 1
+  %sub = add i32 %mul, -8
+  %shr = lshr i32 %sub, 3
+  %vecSrcB.0.in102 = bitcast i16* %pSrcB to <8 x i16>*
+  %vecSrcB.0103 = load <8 x i16>, <8 x i16>* %vecSrcB.0.in102, align 2
+  %vecSrcA.0.in104 = bitcast i16* %pSrcA to <8 x i16>*
+  %vecSrcA.0105 = load <8 x i16>, <8 x i16>* %vecSrcA.0.in104, align 2
+  %cmp106 = icmp eq i32 %shr, 0
+  br i1 %cmp106, label %while.end, label %while.body.preheader
+
+while.body.preheader:                             ; preds = %entry
+  %0 = shl i32 %shr, 4
+  %scevgep = getelementptr i16, i16* %pSrcA, i32 %0
+  br label %while.body
+
+while.body:                                       ; preds = %while.body.preheader, %while.body
+  %vecSrcA.0115 = phi <8 x i16> [ %vecSrcA.0, %while.body ], [ %vecSrcA.0105, %while.body.preheader ]
+  %vecSrcB.0114 = phi <8 x i16> [ %vecSrcB.0, %while.body ], [ %vecSrcB.0103, %while.body.preheader ]
+  %vecSrcB.0.in.in113 = phi i16* [ %add.ptr3, %while.body ], [ %pSrcB, %while.body.preheader ]
+  %vecSrcA.0.in.in112 = phi i16* [ %add.ptr2, %while.body ], [ %pSrcA, %while.body.preheader ]
+  %accImag.0.off32111 = phi i32 [ %15, %while.body ], [ 0, %while.body.preheader ]
+  %accImag.0.off0110 = phi i32 [ %16, %while.body ], [ 0, %while.body.preheader ]
+  %accReal.0.off32109 = phi i32 [ %12, %while.body ], [ 0, %while.body.preheader ]
+  %accReal.0.off0108 = phi i32 [ %13, %while.body ], [ 0, %while.body.preheader ]
+  %blkCnt.0107 = phi i32 [ %dec, %while.body ], [ %shr, %while.body.preheader ]
+  %pSrcB.addr.0 = getelementptr inbounds i16, i16* %vecSrcB.0.in.in113, i32 8
+  %pSrcA.addr.0 = getelementptr inbounds i16, i16* %vecSrcA.0.in.in112, i32 8
+  %1 = tail call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 1, i32 0, i32 %accReal.0.off0108, i32 %accReal.0.off32109, <8 x i16> %vecSrcA.0115, <8 x i16> %vecSrcB.0114)
+  %2 = extractvalue { i32, i32 } %1, 1
+  %3 = extractvalue { i32, i32 } %1, 0
+  %4 = bitcast i16* %pSrcA.addr.0 to <8 x i16>*
+  %5 = load <8 x i16>, <8 x i16>* %4, align 2
+  %add.ptr2 = getelementptr inbounds i16, i16* %vecSrcA.0.in.in112, i32 16
+  %6 = tail call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 0, i32 1, i32 %accImag.0.off0110, i32 %accImag.0.off32111, <8 x i16> %vecSrcA.0115, <8 x i16> %vecSrcB.0114)
+  %7 = extractvalue { i32, i32 } %6, 1
+  %8 = extractvalue { i32, i32 } %6, 0
+  %9 = bitcast i16* %pSrcB.addr.0 to <8 x i16>*
+  %10 = load <8 x i16>, <8 x i16>* %9, align 2
+  %add.ptr3 = getelementptr inbounds i16, i16* %vecSrcB.0.in.in113, i32 16
+  %11 = tail call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 1, i32 0, i32 %3, i32 %2, <8 x i16> %5, <8 x i16> %10)
+  %12 = extractvalue { i32, i32 } %11, 1
+  %13 = extractvalue { i32, i32 } %11, 0
+  %14 = tail call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 0, i32 1, i32 %8, i32 %7, <8 x i16> %5, <8 x i16> %10)
+  %15 = extractvalue { i32, i32 } %14, 1
+  %16 = extractvalue { i32, i32 } %14, 0
+  %dec = add nsw i32 %blkCnt.0107, -1
+  %vecSrcB.0.in = bitcast i16* %add.ptr3 to <8 x i16>*
+  %vecSrcB.0 = load <8 x i16>, <8 x i16>* %vecSrcB.0.in, align 2
+  %vecSrcA.0.in = bitcast i16* %add.ptr2 to <8 x i16>*
+  %vecSrcA.0 = load <8 x i16>, <8 x i16>* %vecSrcA.0.in, align 2
+  %cmp = icmp eq i32 %dec, 0
+  br i1 %cmp, label %while.cond.while.end_crit_edge, label %while.body
+
+while.cond.while.end_crit_edge:                   ; preds = %while.body
+  %scevgep136 = getelementptr i16, i16* %pSrcB, i32 %0
+  br label %while.end
+
+while.end:                                        ; preds = %while.cond.while.end_crit_edge, %entry
+  %accReal.0.off0.lcssa = phi i32 [ %13, %while.cond.while.end_crit_edge ], [ 0, %entry ]
+  %accReal.0.off32.lcssa = phi i32 [ %12, %while.cond.while.end_crit_edge ], [ 0, %entry ]
+  %accImag.0.off0.lcssa = phi i32 [ %16, %while.cond.while.end_crit_edge ], [ 0, %entry ]
+  %accImag.0.off32.lcssa = phi i32 [ %15, %while.cond.while.end_crit_edge ], [ 0, %entry ]
+  %vecSrcA.0.in.in.lcssa = phi i16* [ %scevgep, %while.cond.while.end_crit_edge ], [ %pSrcA, %entry ]
+  %vecSrcB.0.in.in.lcssa = phi i16* [ %scevgep136, %while.cond.while.end_crit_edge ], [ %pSrcB, %entry ]
+  %vecSrcB.0.lcssa = phi <8 x i16> [ %vecSrcB.0, %while.cond.while.end_crit_edge ], [ %vecSrcB.0103, %entry ]
+  %vecSrcA.0.lcssa = phi <8 x i16> [ %vecSrcA.0, %while.cond.while.end_crit_edge ], [ %vecSrcA.0105, %entry ]
+  %17 = tail call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 1, i32 0, i32 %accReal.0.off0.lcssa, i32 %accReal.0.off32.lcssa, <8 x i16> %vecSrcA.0.lcssa, <8 x i16> %vecSrcB.0.lcssa)
+  %18 = extractvalue { i32, i32 } %17, 1
+  %19 = zext i32 %18 to i64
+  %20 = shl nuw i64 %19, 32
+  %21 = extractvalue { i32, i32 } %17, 0
+  %22 = zext i32 %21 to i64
+  %23 = or i64 %20, %22
+  %24 = tail call { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32 0, i32 0, i32 1, i32 %accImag.0.off0.lcssa, i32 %accImag.0.off32.lcssa, <8 x i16> %vecSrcA.0.lcssa, <8 x i16> %vecSrcB.0.lcssa)
+  %25 = extractvalue { i32, i32 } %24, 1
+  %26 = zext i32 %25 to i64
+  %27 = shl nuw i64 %26, 32
+  %28 = extractvalue { i32, i32 } %24, 0
+  %29 = zext i32 %28 to i64
+  %30 = or i64 %27, %29
+  %shr8 = and i32 %numSamples, 3
+  %cmp1095 = icmp eq i32 %shr8, 0
+  %extract = lshr i64 %23, 6
+  %extract.t = trunc i64 %extract to i32
+  %extract129 = lshr i64 %30, 6
+  %extract.t130 = trunc i64 %extract129 to i32
+  br i1 %cmp1095, label %while.end34, label %while.body11
+
+while.body11:                                     ; preds = %while.end, %while.body11
+  %pSrcA.addr.1100 = phi i16* [ %incdec.ptr12, %while.body11 ], [ %vecSrcA.0.in.in.lcssa, %while.end ]
+  %pSrcB.addr.199 = phi i16* [ %incdec.ptr14, %while.body11 ], [ %vecSrcB.0.in.in.lcssa, %while.end ]
+  %accImag.198 = phi i64 [ %add32, %while.body11 ], [ %30, %while.end ]
+  %accReal.197 = phi i64 [ %sub27, %while.body11 ], [ %23, %while.end ]
+  %blkCnt.196 = phi i32 [ %dec33, %while.body11 ], [ %shr8, %while.end ]
+  %incdec.ptr = getelementptr inbounds i16, i16* %pSrcA.addr.1100, i32 1
+  %31 = load i16, i16* %pSrcA.addr.1100, align 2
+  %incdec.ptr12 = getelementptr inbounds i16, i16* %pSrcA.addr.1100, i32 2
+  %32 = load i16, i16* %incdec.ptr, align 2
+  %incdec.ptr13 = getelementptr inbounds i16, i16* %pSrcB.addr.199, i32 1
+  %33 = load i16, i16* %pSrcB.addr.199, align 2
+  %incdec.ptr14 = getelementptr inbounds i16, i16* %pSrcB.addr.199, i32 2
+  %34 = load i16, i16* %incdec.ptr13, align 2
+  %conv = sext i16 %31 to i32
+  %conv15 = sext i16 %33 to i32
+  %mul16 = mul nsw i32 %conv15, %conv
+  %conv17 = sext i32 %mul16 to i64
+  %add = add nsw i64 %accReal.197, %conv17
+  %conv19 = sext i16 %34 to i32
+  %mul20 = mul nsw i32 %conv19, %conv
+  %conv21 = sext i32 %mul20 to i64
+  %conv23 = sext i16 %32 to i32
+  %mul25 = mul nsw i32 %conv19, %conv23
+  %conv26 = sext i32 %mul25 to i64
+  %sub27 = sub i64 %add, %conv26
+  %mul30 = mul nsw i32 %conv15, %conv23
+  %conv31 = sext i32 %mul30 to i64
+  %add22 = add i64 %accImag.198, %conv31
+  %add32 = add i64 %add22, %conv21
+  %dec33 = add nsw i32 %blkCnt.196, -1
+  %cmp10 = icmp eq i32 %dec33, 0
+  br i1 %cmp10, label %while.end34.loopexit, label %while.body11
+
+while.end34.loopexit:                             ; preds = %while.body11
+  %extract131 = lshr i64 %add32, 6
+  %extract.t132 = trunc i64 %extract131 to i32
+  %extract127 = lshr i64 %sub27, 6
+  %extract.t128 = trunc i64 %extract127 to i32
+  br label %while.end34
+
+while.end34:                                      ; preds = %while.end34.loopexit, %while.end
+  %accReal.1.lcssa.off6 = phi i32 [ %extract.t, %while.end ], [ %extract.t128, %while.end34.loopexit ]
+  %accImag.1.lcssa.off6 = phi i32 [ %extract.t130, %while.end ], [ %extract.t132, %while.end34.loopexit ]
+  store i32 %accReal.1.lcssa.off6, i32* %realResult, align 4
+  store i32 %accImag.1.lcssa.off6, i32* %imagResult, align 4
+  ret void
+}
+
+
+define void @fma8(float* noalias nocapture readonly %A, float* noalias nocapture readonly %B, float* noalias nocapture %C, i32 %n) {
+; CHECK-LABEL: fma8:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK-NEXT:    .save {r4, r5, r6, lr}
+; CHECK-NEXT:    push {r4, r5, r6, lr}
+; CHECK-NEXT:    cmp r3, #1
+; CHECK-NEXT:    blt .LBB2_8
+; CHECK-NEXT:  @ %bb.1: @ %for.body.preheader
+; CHECK-NEXT:    cmp r3, #7
+; CHECK-NEXT:    bhi .LBB2_3
+; CHECK-NEXT:  @ %bb.2:
+; CHECK-NEXT:    mov.w r12, #0
+; CHECK-NEXT:    b .LBB2_6
+; CHECK-NEXT:  .LBB2_3: @ %vector.ph
+; CHECK-NEXT:    bic r12, r3, #7
+; CHECK-NEXT:    movs r5, #1
+; CHECK-NEXT:    sub.w r6, r12, #8
+; CHECK-NEXT:    mov r4, r0
+; CHECK-NEXT:    add.w lr, r5, r6, lsr #3
+; CHECK-NEXT:    mov r5, r1
+; CHECK-NEXT:    mov r6, r2
+; CHECK-NEXT:    dls lr, lr
+; CHECK-NEXT:  .LBB2_4: @ %vector.body
+; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    vldrw.u32 q0, [r4, #16]
+; CHECK-NEXT:    vldrw.u32 q1, [r5, #16]
+; CHECK-NEXT:    vldrw.u32 q2, [r6, #16]
+; CHECK-NEXT:    vldrw.u32 q3, [r6]
+; CHECK-NEXT:    vfma.f32 q2, q1, q0
+; CHECK-NEXT:    vldrw.u32 q0, [r4]
+; CHECK-NEXT:    vldrw.u32 q1, [r5]
+; CHECK-NEXT:    vstrw.32 q2, [r6, #16]
+; CHECK-NEXT:    adds r4, #32
+; CHECK-NEXT:    adds r5, #32
+; CHECK-NEXT:    vfma.f32 q3, q1, q0
+; CHECK-NEXT:    vstrw.32 q3, [r6]
+; CHECK-NEXT:    adds r6, #32
+; CHECK-NEXT:    le lr, .LBB2_4
+; CHECK-NEXT:  @ %bb.5: @ %middle.block
+; CHECK-NEXT:    cmp r12, r3
+; CHECK-NEXT:    it eq
+; CHECK-NEXT:    popeq {r4, r5, r6, pc}
+; CHECK-NEXT:  .LBB2_6: @ %for.body.preheader12
+; CHECK-NEXT:    sub.w lr, r3, r12
+; CHECK-NEXT:    add.w r0, r0, r12, lsl #2
+; CHECK-NEXT:    add.w r1, r1, r12, lsl #2
+; CHECK-NEXT:    add.w r2, r2, r12, lsl #2
+; CHECK-NEXT:    dls lr, lr
+; CHECK-NEXT:  .LBB2_7: @ %for.body
+; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    vldr s0, [r0]
+; CHECK-NEXT:    adds r0, #4
+; CHECK-NEXT:    vldr s2, [r1]
+; CHECK-NEXT:    adds r1, #4
+; CHECK-NEXT:    vldr s4, [r2]
+; CHECK-NEXT:    vfma.f32 s4, s2, s0
+; CHECK-NEXT:    vstr s4, [r2]
+; CHECK-NEXT:    adds r2, #4
+; CHECK-NEXT:    le lr, .LBB2_7
+; CHECK-NEXT:  .LBB2_8: @ %for.cond.cleanup
+; CHECK-NEXT:    pop {r4, r5, r6, pc}
+entry:
+  %cmp8 = icmp sgt i32 %n, 0
+  br i1 %cmp8, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:                               ; preds = %entry
+  %min.iters.check = icmp ult i32 %n, 8
+  br i1 %min.iters.check, label %for.body.preheader12, label %vector.ph
+
+for.body.preheader12:                             ; preds = %middle.block, %for.body.preheader
+  %i.09.ph = phi i32 [ 0, %for.body.preheader ], [ %n.vec, %middle.block ]
+  br label %for.body
+
+vector.ph:                                        ; preds = %for.body.preheader
+  %n.vec = and i32 %n, -8
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %0 = getelementptr inbounds float, float* %A, i32 %index
+  %1 = bitcast float* %0 to <8 x float>*
+  %wide.load = load <8 x float>, <8 x float>* %1, align 4
+  %2 = getelementptr inbounds float, float* %B, i32 %index
+  %3 = bitcast float* %2 to <8 x float>*
+  %wide.load10 = load <8 x float>, <8 x float>* %3, align 4
+  %4 = fmul fast <8 x float> %wide.load10, %wide.load
+  %5 = getelementptr inbounds float, float* %C, i32 %index
+  %6 = bitcast float* %5 to <8 x float>*
+  %wide.load11 = load <8 x float>, <8 x float>* %6, align 4
+  %7 = fadd fast <8 x float> %wide.load11, %4
+  store <8 x float> %7, <8 x float>* %6, align 4
+  %index.next = add i32 %index, 8
+  %8 = icmp eq i32 %index.next, %n.vec
+  br i1 %8, label %middle.block, label %vector.body
+
+middle.block:                                     ; preds = %vector.body
+  %cmp.n = icmp eq i32 %n.vec, %n
+  br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader12
+
+for.cond.cleanup:                                 ; preds = %for.body, %middle.block, %entry
+  ret void
+
+for.body:                                         ; preds = %for.body.preheader12, %for.body
+  %i.09 = phi i32 [ %inc, %for.body ], [ %i.09.ph, %for.body.preheader12 ]
+  %arrayidx = getelementptr inbounds float, float* %A, i32 %i.09
+  %9 = load float, float* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds float, float* %B, i32 %i.09
+  %10 = load float, float* %arrayidx1, align 4
+  %mul = fmul fast float %10, %9
+  %arrayidx2 = getelementptr inbounds float, float* %C, i32 %i.09
+  %11 = load float, float* %arrayidx2, align 4
+  %add = fadd fast float %11, %mul
+  store float %add, float* %arrayidx2, align 4
+  %inc = add nuw nsw i32 %i.09, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+declare i32 @llvm.arm.mve.addv.v4i32(<4 x i32>, i32)
+declare { i32, i32 } @llvm.arm.mve.vmlldava.v8i16(i32, i32, i32, i32, i32, <8 x i16>, <8 x i16>)

From b29fca30fa61354ae7181e8e46872ee1c92c1139 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Thu, 16 Apr 2020 17:16:22 -0400
Subject: [PATCH 092/216] [x86] auto-generate complete test checks; NFC

---
 llvm/test/CodeGen/X86/isint.ll  | 90 ++++++++++++++++++++++++---------
 llvm/test/CodeGen/X86/setoeq.ll | 23 ++++++++-
 2 files changed, 86 insertions(+), 27 deletions(-)

diff --git a/llvm/test/CodeGen/X86/isint.ll b/llvm/test/CodeGen/X86/isint.ll
index 89e5f9481188..9d86b4b81bda 100644
--- a/llvm/test/CodeGen/X86/isint.ll
+++ b/llvm/test/CodeGen/X86/isint.ll
@@ -1,38 +1,58 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=x86_64-pc-unknown -mattr=+sse2 | FileCheck -check-prefix=CHECK -check-prefix=CHECK64 %s
+; RUN: llc < %s -mtriple=i686-pc-unknown -mattr=+sse2 | FileCheck -check-prefix=CHECK -check-prefix=CHECK32 %s
 
 ; PR19059
-; RUN: llc < %s -mtriple=i686-pc-unknown -mattr=+sse2 | FileCheck -check-prefix=CHECK -check-prefix=CHECK32 %s
 
 define i32 @isint_return(double %d) nounwind {
-; CHECK-LABEL: isint_return:
-; CHECK-NOT: xor
-; CHECK: cvt
+; CHECK64-LABEL: isint_return:
+; CHECK64:       # %bb.0:
+; CHECK64-NEXT:    cvttsd2si %xmm0, %eax
+; CHECK64-NEXT:    cvtsi2sd %eax, %xmm1
+; CHECK64-NEXT:    cmpeqsd %xmm0, %xmm1
+; CHECK64-NEXT:    movq %xmm1, %rax
+; CHECK64-NEXT:    andl $1, %eax
+; CHECK64-NEXT:    # kill: def $eax killed $eax killed $rax
+; CHECK64-NEXT:    retq
+;
+; CHECK32-LABEL: isint_return:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK32-NEXT:    cvttsd2si %xmm0, %eax
+; CHECK32-NEXT:    cvtsi2sd %eax, %xmm1
+; CHECK32-NEXT:    cmpeqsd %xmm0, %xmm1
+; CHECK32-NEXT:    movd %xmm1, %eax
+; CHECK32-NEXT:    andl $1, %eax
+; CHECK32-NEXT:    retl
   %i = fptosi double %d to i32
-; CHECK-NEXT: cvt
   %e = sitofp i32 %i to double
-; CHECK: cmpeqsd
   %c = fcmp oeq double %d, %e
-; CHECK32-NOT: movd {{.*}}, %r{{.*}}
-; CHECK32-NOT: andq
-; CHECK32-NEXT: movd
-; CHECK64-NEXT: movq
-; CHECK-NEXT: andl
   %z = zext i1 %c to i32
   ret i32 %z
 }
 
 define i32 @isint_float_return(float %f) nounwind {
-; CHECK-LABEL: isint_float_return:
-; CHECK-NOT: xor
-; CHECK: cvt
+; CHECK64-LABEL: isint_float_return:
+; CHECK64:       # %bb.0:
+; CHECK64-NEXT:    cvttps2dq %xmm0, %xmm1
+; CHECK64-NEXT:    cvtdq2ps %xmm1, %xmm1
+; CHECK64-NEXT:    cmpeqss %xmm0, %xmm1
+; CHECK64-NEXT:    movd %xmm1, %eax
+; CHECK64-NEXT:    andl $1, %eax
+; CHECK64-NEXT:    retq
+;
+; CHECK32-LABEL: isint_float_return:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK32-NEXT:    cvttps2dq %xmm0, %xmm1
+; CHECK32-NEXT:    cvtdq2ps %xmm1, %xmm1
+; CHECK32-NEXT:    cmpeqss %xmm0, %xmm1
+; CHECK32-NEXT:    movd %xmm1, %eax
+; CHECK32-NEXT:    andl $1, %eax
+; CHECK32-NEXT:    retl
   %i = fptosi float %f to i32
-; CHECK-NEXT: cvt
   %g = sitofp i32 %i to float
-; CHECK: cmpeqss
   %c = fcmp oeq float %f, %g
-; CHECK-NOT: movd {{.*}}, %r{{.*}}
-; CHECK-NEXT: movd
-; CHECK-NEXT: andl
   %z = zext i1 %c to i32
   ret i32 %z
 }
@@ -40,15 +60,35 @@ define i32 @isint_float_return(float %f) nounwind {
 declare void @foo()
 
 define void @isint_branch(double %d) nounwind {
-; CHECK-LABEL: isint_branch:
-; CHECK: cvt
+; CHECK64-LABEL: isint_branch:
+; CHECK64:       # %bb.0:
+; CHECK64-NEXT:    cvttsd2si %xmm0, %eax
+; CHECK64-NEXT:    cvtsi2sd %eax, %xmm1
+; CHECK64-NEXT:    ucomisd %xmm1, %xmm0
+; CHECK64-NEXT:    jne .LBB2_2
+; CHECK64-NEXT:    jp .LBB2_2
+; CHECK64-NEXT:  # %bb.1: # %true
+; CHECK64-NEXT:    pushq %rax
+; CHECK64-NEXT:    callq foo
+; CHECK64-NEXT:    popq %rax
+; CHECK64-NEXT:  .LBB2_2: # %false
+; CHECK64-NEXT:    retq
+;
+; CHECK32-LABEL: isint_branch:
+; CHECK32:       # %bb.0:
+; CHECK32-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK32-NEXT:    cvttsd2si %xmm0, %eax
+; CHECK32-NEXT:    cvtsi2sd %eax, %xmm1
+; CHECK32-NEXT:    ucomisd %xmm1, %xmm0
+; CHECK32-NEXT:    jne .LBB2_2
+; CHECK32-NEXT:    jp .LBB2_2
+; CHECK32-NEXT:  # %bb.1: # %true
+; CHECK32-NEXT:    calll foo
+; CHECK32-NEXT:  .LBB2_2: # %false
+; CHECK32-NEXT:    retl
   %i = fptosi double %d to i32
-; CHECK-NEXT: cvt
   %e = sitofp i32 %i to double
-; CHECK: ucomisd
   %c = fcmp oeq double %d, %e
-; CHECK-NEXT: jne
-; CHECK-NEXT: jp
   br i1 %c, label %true, label %false
 true:
   call void @foo()
diff --git a/llvm/test/CodeGen/X86/setoeq.ll b/llvm/test/CodeGen/X86/setoeq.ll
index 5c2f1d5c5da5..89069498c295 100644
--- a/llvm/test/CodeGen/X86/setoeq.ll
+++ b/llvm/test/CodeGen/X86/setoeq.ll
@@ -1,21 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=i686-- -mattr=+sse2 | FileCheck %s
 
 define zeroext i8 @t(double %x) nounwind readnone {
+; CHECK-LABEL: t:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT:    cvttsd2si %xmm0, %eax
+; CHECK-NEXT:    cvtsi2sd %eax, %xmm1
+; CHECK-NEXT:    cmpeqsd %xmm0, %xmm1
+; CHECK-NEXT:    movd %xmm1, %eax
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    # kill: def $al killed $al killed $eax
+; CHECK-NEXT:    retl
 entry:
 	%0 = fptosi double %x to i32		; <i32> [#uses=1]
 	%1 = sitofp i32 %0 to double		; <double> [#uses=1]
 	%2 = fcmp oeq double %1, %x		; <i1> [#uses=1]
 	%retval12 = zext i1 %2 to i8		; <i8> [#uses=1]
-; CHECK: cmpeqsd
 	ret i8 %retval12
 }
 
 define zeroext i8 @u(double %x) nounwind readnone {
+; CHECK-LABEL: u:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
+; CHECK-NEXT:    cvttsd2si %xmm0, %eax
+; CHECK-NEXT:    cvtsi2sd %eax, %xmm1
+; CHECK-NEXT:    cmpneqsd %xmm0, %xmm1
+; CHECK-NEXT:    movd %xmm1, %eax
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    # kill: def $al killed $al killed $eax
+; CHECK-NEXT:    retl
 entry:
 	%0 = fptosi double %x to i32		; <i32> [#uses=1]
 	%1 = sitofp i32 %0 to double		; <double> [#uses=1]
 	%2 = fcmp une double %1, %x		; <i1> [#uses=1]
 	%retval12 = zext i1 %2 to i8		; <i8> [#uses=1]
-; CHECK: cmpneqsd
 	ret i8 %retval12
 }

From 68587af9ad10dce24c5164db2f627d2552629f27 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne@apple.com>
Date: Thu, 16 Apr 2020 16:17:14 -0400
Subject: [PATCH 093/216] [libc++] Move handling of convenience substitutions
 outside of config.py

These substitutions are strongly tied to the operation of the test
format, so it makes sense to have them defined by the test format
instead of the Lit configuration. They should be defined regardless
of which configuration is in use.
---
 .../build_run.sh.cpp                          | 24 +++++
 .../convenience_substitutions/verify.sh.cpp   | 18 ++++
 libcxx/utils/libcxx/test/config.py            |  6 --
 libcxx/utils/libcxx/test/format.py            |  6 ++
 libcxx/utils/libcxx/test/newformat.py         | 93 ++++++++++++++-----
 5 files changed, 119 insertions(+), 28 deletions(-)
 create mode 100644 libcxx/test/libcxx/selftest/newformat/convenience_substitutions/build_run.sh.cpp
 create mode 100644 libcxx/test/libcxx/selftest/newformat/convenience_substitutions/verify.sh.cpp

diff --git a/libcxx/test/libcxx/selftest/newformat/convenience_substitutions/build_run.sh.cpp b/libcxx/test/libcxx/selftest/newformat/convenience_substitutions/build_run.sh.cpp
new file mode 100644
index 000000000000..e050f88cf009
--- /dev/null
+++ b/libcxx/test/libcxx/selftest/newformat/convenience_substitutions/build_run.sh.cpp
@@ -0,0 +1,24 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// Make sure that we provide the %{build} and %{run} convenience substitutions.
+
+// FILE_DEPENDENCIES: %t.exe
+// RUN: %{build}
+// RUN: %{run} "HELLO"
+
+#include <cassert>
+#include <string>
+
+int main(int argc, char** argv) {
+  assert(argc == 2);
+
+  std::string arg = argv[1];
+  assert(arg == "HELLO");
+  return 0;
+}
diff --git a/libcxx/test/libcxx/selftest/newformat/convenience_substitutions/verify.sh.cpp b/libcxx/test/libcxx/selftest/newformat/convenience_substitutions/verify.sh.cpp
new file mode 100644
index 000000000000..664d25826a8d
--- /dev/null
+++ b/libcxx/test/libcxx/selftest/newformat/convenience_substitutions/verify.sh.cpp
@@ -0,0 +1,18 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// Make sure that we provide the %{verify} substitution. We can only test
+// this when the verify-support feature is enabled, and it's difficult to
+// check that it's enabled when it should be, so we just trust that it is.
+
+// REQUIRES: verify-support
+// RUN: test -n "%{verify}"
+
+// RUN: %{cxx} %s %{flags} %{compile_flags} -fsyntax-only %{verify}
+
+// expected-no-diagnostics
diff --git a/libcxx/utils/libcxx/test/config.py b/libcxx/utils/libcxx/test/config.py
index b9085a4cb5bb..81cc976565a0 100644
--- a/libcxx/utils/libcxx/test/config.py
+++ b/libcxx/utils/libcxx/test/config.py
@@ -332,8 +332,6 @@ def configure_use_clang_verify(self):
             self.use_clang_verify = self.cxx.isVerifySupported()
             self.lit_config.note(
                 "inferred use_clang_verify as: %r" % self.use_clang_verify)
-        if self.use_clang_verify:
-                self.config.available_features.add('verify-support')
 
     def configure_use_thread_safety(self):
         '''If set, run clang with -verify on failing tests.'''
@@ -988,9 +986,6 @@ def configure_substitutions(self):
         sub.append(('%{compile_flags}', ' '.join(map(pipes.quote, self.cxx.compile_flags))))
         sub.append(('%{link_flags}',    ' '.join(map(pipes.quote, self.cxx.link_flags))))
         sub.append(('%{link_libcxxabi}', pipes.quote(self.cxx.link_libcxxabi_flag)))
-        if self.cxx.isVerifySupported():
-            sub.append(('%{verify}', ' '.join(self.cxx.verify_flags)))
-        sub.append(('%{build}',   '%{cxx} -o %t.exe %s %{flags} %{compile_flags} %{link_flags}'))
 
         # Configure exec prefix substitutions.
         # Configure run env substitution.
@@ -1010,7 +1005,6 @@ def configure_substitutions(self):
         sub.append(('%{exec}', '{} {} {} -- '.format(pipes.quote(sys.executable),
                                                      pipes.quote(executor),
                                                      ' '.join(exec_args))))
-        sub.append(('%{run}', '%{exec} %t.exe'))
         if self.get_lit_conf('libcxx_gdb'):
             sub.append(('%{libcxx_gdb}', self.get_lit_conf('libcxx_gdb')))
 
diff --git a/libcxx/utils/libcxx/test/format.py b/libcxx/utils/libcxx/test/format.py
index 7ad7d0a51bd0..99f10d7f2105 100644
--- a/libcxx/utils/libcxx/test/format.py
+++ b/libcxx/utils/libcxx/test/format.py
@@ -134,6 +134,12 @@ def _execute(self, test, lit_config):
         data_files = [f if os.path.isabs(f) else os.path.join(local_cwd, f) for f in data_files]
         substitutions.append(('%{file_dependencies}', ' '.join(data_files)))
 
+        # Add other convenience substitutions
+        if self.cxx.isVerifySupported():
+            substitutions.append(('%{verify}', ' '.join(self.cxx.verify_flags)))
+        substitutions.append(('%{build}', '%{cxx} -o %t.exe %s %{flags} %{compile_flags} %{link_flags}'))
+        substitutions.append(('%{run}', '%{exec} %t.exe'))
+
         script = lit.TestRunner.applySubstitutions(script, substitutions,
                                                    recursion_limit=test.config.recursiveExpansionLimit)
 
diff --git a/libcxx/utils/libcxx/test/newformat.py b/libcxx/utils/libcxx/test/newformat.py
index 5e8e90706eaf..f704b968b6da 100644
--- a/libcxx/utils/libcxx/test/newformat.py
+++ b/libcxx/utils/libcxx/test/newformat.py
@@ -10,6 +10,7 @@
 import os
 import pipes
 import re
+import subprocess
 
 class CxxStandardLibraryTest(lit.formats.TestFormat):
     """
@@ -39,6 +40,9 @@ class CxxStandardLibraryTest(lit.formats.TestFormat):
                               test otherwise. This is supported only for backwards
                               compatibility with the test suite.
 
+
+    Substitution requirements
+    ===============================
     The test format operates by assuming that each test's configuration provides
     the following substitutions, which it will reuse in the shell scripts it
     constructs:
@@ -49,11 +53,13 @@ class CxxStandardLibraryTest(lit.formats.TestFormat):
         %{exec}          - A command to prefix the execution of executables
 
     Note that when building an executable (as opposed to only compiling a source
-    file), all three of ${flags}, %{compile_flags} and %{link_flags} will be used
+    file), all three of %{flags}, %{compile_flags} and %{link_flags} will be used
     in the same command line. In other words, the test format doesn't perform
     separate compilation and linking steps in this case.
 
 
+    Additional supported directives
+    ===============================
     In addition to everything that's supported in Lit ShTests, this test format
     also understands the following directives inside test files:
 
@@ -76,7 +82,39 @@ class CxxStandardLibraryTest(lit.formats.TestFormat):
             .sh.cpp test, which would be more powerful but perhaps overkill.
 
 
-    Design note:
+    Additional provided substitutions and features
+    ==============================================
+    The test format will define the following substitutions for use inside
+    tests:
+
+        %{verify}
+
+            This expands to the set of flags that must be passed to the
+            compiler in order to use Clang-verify, if that is supported.
+
+        verify-support
+
+            This Lit feature will be made available when the compiler supports
+            Clang-verify. This can be used to disable tests that require that
+            feature, such as `.verify.cpp` tests.
+
+        %{file_dependencies}
+
+            Expands to the list of files that this test depends on.
+            See FILE_DEPENDENCIES above.
+
+        %{build}
+            Expands to a command-line that builds the current source
+            file with the %{flags}, %{compile_flags} and %{link_flags}
+            substitutions, and that produces an executable named %t.exe.
+
+        %{run}
+            Equivalent to `%{exec} %t.exe`. This is intended to be used
+            in conjunction with the %{build} substitution.
+
+
+    Design notes
+    ============
     This test format never implicitly disables a type of test. For example,
     we could be tempted to automatically mark `.verify.cpp` tests as
     UNSUPPORTED when clang-verify isn't supported by the compiler. However,
@@ -104,7 +142,7 @@ def getTestsInDirectory(self, testSuite, pathInSuite, litConfig, localConfig):
                 if any([re.search(ext, filename) for ext in SUPPORTED_SUFFIXES]):
                     yield lit.Test.Test(testSuite, pathInSuite + (filename,), localConfig)
 
-    def _checkSubstitutions(self, substitutions):
+    def _checkBaseSubstitutions(self, substitutions):
         substitutions = [s for (s, _) in substitutions]
         for s in ['%{cxx}', '%{compile_flags}', '%{link_flags}', '%{flags}', '%{exec}']:
             assert s in substitutions, "Required substitution {} was not provided".format(s)
@@ -112,11 +150,11 @@ def _checkSubstitutions(self, substitutions):
     # Determine whether clang-verify is supported.
     def _supportsVerify(self, test, litConfig):
         command = "echo | %{cxx} -xc++ - -Werror -fsyntax-only -Xclang -verify-ignore-unexpected"
-        result = lit.TestRunner.executeShTest(test, litConfig,
-                                              useExternalSh=True,
-                                              preamble_commands=[command])
-        compilerSupportsVerify = result.code != lit.Test.FAIL
-        return compilerSupportsVerify
+        command = lit.TestRunner.applySubstitutions([command], test.config.substitutions,
+                                                    recursion_limit=test.config.recursiveExpansionLimit)[0]
+        devNull = open(os.devnull, 'w')
+        result = subprocess.call(command, shell=True, stdout=devNull, stderr=devNull)
+        return result == 0
 
     def _disableWithModules(self, test, litConfig):
         with open(test.getSourcePath(), 'rb') as f:
@@ -124,8 +162,7 @@ def _disableWithModules(self, test, litConfig):
         return b'#define _LIBCPP_ASSERT' in contents
 
     def execute(self, test, litConfig):
-        self._checkSubstitutions(test.config.substitutions)
-        VERIFY_FLAGS = '-Xclang -verify -Xclang -verify-ignore-unexpected=note -ferror-limit=0'
+        self._checkBaseSubstitutions(test.config.substitutions)
         filename = test.path_in_suite[-1]
 
         # TODO(ldionne): We currently disable tests that re-define _LIBCPP_ASSERT
@@ -168,7 +205,7 @@ def execute(self, test, litConfig):
             return self._executeShTest(test, litConfig, steps, fileDependencies=['%t.exe'])
         elif filename.endswith('.verify.cpp'):
             steps = [
-                "%dbg(COMPILED WITH) %{cxx} %s %{flags} %{compile_flags} -fsyntax-only " + VERIFY_FLAGS
+                "%dbg(COMPILED WITH) %{cxx} %s %{flags} %{compile_flags} -fsyntax-only %{verify}"
             ]
             return self._executeShTest(test, litConfig, steps)
         # Make sure to check these ones last, since they will match other
@@ -185,7 +222,7 @@ def execute(self, test, litConfig):
         elif filename.endswith('.fail.cpp'):
             if self._supportsVerify(test, litConfig):
                 steps = [
-                    "%dbg(COMPILED WITH) %{cxx} %s %{flags} %{compile_flags} -fsyntax-only " + VERIFY_FLAGS
+                    "%dbg(COMPILED WITH) %{cxx} %s %{flags} %{compile_flags} -fsyntax-only %{verify}"
                 ]
             else:
                 steps = [
@@ -205,6 +242,22 @@ def _executeShTest(self, test, litConfig, steps, fileDependencies=None):
         if test.config.unsupported:
             return lit.Test.Result(lit.Test.UNSUPPORTED, 'Test is unsupported')
 
+        # Get the default substitutions
+        tmpDir, tmpBase = lit.TestRunner.getTempPaths(test)
+        useExternalSh = True
+        substitutions = lit.TestRunner.getDefaultSubstitutions(test, tmpDir, tmpBase,
+                                                               normalize_slashes=useExternalSh)
+
+        # Add the %{build} and %{run} convenience substitutions
+        substitutions.append(('%{build}', '%{cxx} %s %{flags} %{compile_flags} %{link_flags} -o %t.exe'))
+        substitutions.append(('%{run}', '%{exec} %t.exe'))
+
+        # Add the %{verify} substitution and the verify-support feature if Clang-verify is supported
+        if self._supportsVerify(test, litConfig):
+            test.config.available_features.add('verify-support')
+            substitutions.append(('%{verify}', '-Xclang -verify -Xclang -verify-ignore-unexpected=note -ferror-limit=0'))
+
+        # Parse the test file, including custom directives
         additionalCompileFlags = []
         fileDependencies = fileDependencies or []
         parsers = [
@@ -223,16 +276,9 @@ def _executeShTest(self, test, litConfig, steps, fileDependencies=None):
             return parsed
         script += parsed
 
-        if litConfig.noExecute:
-            return lit.Test.Result(lit.Test.PASS)
-
         # Add compile flags specified with ADDITIONAL_COMPILE_FLAGS.
-        self.addCompileFlags(test.config, *additionalCompileFlags)
-
-        tmpDir, tmpBase = lit.TestRunner.getTempPaths(test)
-        useExternalSh = True
-        substitutions = lit.TestRunner.getDefaultSubstitutions(test, tmpDir, tmpBase,
-                                                               normalize_slashes=useExternalSh)
+        substitutions = [(s, x + ' ' + ' '.join(additionalCompileFlags)) if s == '%{compile_flags}'
+                                else (s, x) for (s, x) in substitutions]
 
         # Perform substitutions inside FILE_DEPENDENCIES lines (or injected dependencies).
         # This allows using variables like %t in file dependencies. Also note that we really
@@ -251,4 +297,7 @@ def _executeShTest(self, test, litConfig, steps, fileDependencies=None):
         script = lit.TestRunner.applySubstitutions(script, substitutions,
                                                    recursion_limit=test.config.recursiveExpansionLimit)
 
-        return lit.TestRunner._runShTest(test, litConfig, useExternalSh, script, tmpBase)
+        if litConfig.noExecute:
+            return lit.Test.Result(lit.Test.PASS)
+        else:
+            return lit.TestRunner._runShTest(test, litConfig, useExternalSh, script, tmpBase)

From f072942fe2f94ba19482e62427e89864fb875782 Mon Sep 17 00:00:00 2001
From: Alex Zinenko <zinenko@google.com>
Date: Thu, 16 Apr 2020 23:24:58 +0200
Subject: [PATCH 094/216] [mlir] ODS: support operations with resizable operand
 lists

MLIR supports operations with resizable operand lists, but this property must
be indicated during the construction of such operations. It can be done
programmatically by calling a function on OperationState. Introduce an
ODS-internal trait `ResizableOperandList` to indicate such operations are use
it when generating the bodies of various `build` functions as well as the
`parse` function when the declarative assembly format is used.

Differential Revision: https://reviews.llvm.org/D78292
---
 mlir/include/mlir/IR/OpBase.td              |  4 ++++
 mlir/include/mlir/TableGen/Operator.h       |  3 +++
 mlir/lib/TableGen/Operator.cpp              |  4 ++++
 mlir/test/mlir-tblgen/op-operand.td         | 20 ++++++++++++++++++++
 mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp | 16 +++++++++++++---
 mlir/tools/mlir-tblgen/OpFormatGen.cpp      |  4 ++++
 6 files changed, 48 insertions(+), 3 deletions(-)

diff --git a/mlir/include/mlir/IR/OpBase.td b/mlir/include/mlir/IR/OpBase.td
index 14ef45ed4c7f..282267daf339 100644
--- a/mlir/include/mlir/IR/OpBase.td
+++ b/mlir/include/mlir/IR/OpBase.td
@@ -1653,6 +1653,10 @@ class HasParent<string op>
 def FirstAttrDerivedResultType :
   GenInternalOpTrait<"FirstAttrDerivedResultType">;
 
+// Op has a resizable operand list. Auto-generated build and parse functions
+// should construct it as such.
+def ResizableOperandList : GenInternalOpTrait<"ResizableOperandList">;
+
 // TODO(antiagainst): Turn the following into normal traits and generate
 // verification for them.
 
diff --git a/mlir/include/mlir/TableGen/Operator.h b/mlir/include/mlir/TableGen/Operator.h
index e65bc55a84f5..91f10bd8e29c 100644
--- a/mlir/include/mlir/TableGen/Operator.h
+++ b/mlir/include/mlir/TableGen/Operator.h
@@ -165,6 +165,9 @@ class Operator {
   // requiring the raw MLIR trait here.
   const OpTrait *getTrait(llvm::StringRef trait) const;
 
+  // Returns "true" if Op has a ResizableOperandList trait.
+  bool hasResizableOperandList() const;
+
   // Regions.
   using const_region_iterator = const NamedRegion *;
   const_region_iterator region_begin() const;
diff --git a/mlir/lib/TableGen/Operator.cpp b/mlir/lib/TableGen/Operator.cpp
index 808ba7aabc76..f967b76d074f 100644
--- a/mlir/lib/TableGen/Operator.cpp
+++ b/mlir/lib/TableGen/Operator.cpp
@@ -169,6 +169,10 @@ const tblgen::OpTrait *tblgen::Operator::getTrait(StringRef trait) const {
   return nullptr;
 }
 
+bool tblgen::Operator::hasResizableOperandList() const {
+  return getTrait("OpTrait::ResizableOperandList") != nullptr;
+}
+
 auto tblgen::Operator::region_begin() const -> const_region_iterator {
   return regions.begin();
 }
diff --git a/mlir/test/mlir-tblgen/op-operand.td b/mlir/test/mlir-tblgen/op-operand.td
index 2ffde33c5331..42aadfbdc119 100644
--- a/mlir/test/mlir-tblgen/op-operand.td
+++ b/mlir/test/mlir-tblgen/op-operand.td
@@ -58,3 +58,23 @@ def OpD : NS_Op<"mix_variadic_and_normal_inputs_op", [SameVariadicOperandSize]>
 // CHECK-NEXT: odsState.addOperands(input1);
 // CHECK-NEXT: odsState.addOperands(input2);
 // CHECK-NEXT: odsState.addOperands(input3);
+// CHECK-NOT: odsState.setOperandListToResizable
+
+// Check that resizable operand list flag is set up correctly in all generated
+// builders and in the parser.
+def OpE : NS_Op<"resizable_operand_list", [ResizableOperandList]> {
+  let arguments = (ins Variadic<AnyType>:$input);
+  let assemblyFormat = "$input attr-dict `:` type($input)";
+}
+
+// CHECK-LABEL: OpE::build(Builder *odsBuilder, OperationState &odsState, ValueRange
+// CHECK: odsState.setOperandListToResizable()
+
+// CHECK-LABEL: OpE::build(Builder *odsBuilder, OperationState &odsState, ArrayRef<Type>
+// CHECK: odsState.setOperandListToResizable()
+
+// CHECK-LABEL: OpE::build(Builder *, OperationState
+// CHECK: odsState.setOperandListToResizable()
+
+// CHECK-LABEL: OpE::parse
+// CHECK: result.setOperandListToResizable()
diff --git a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
index 75edc1b7d256..41f392e67f62 100644
--- a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
+++ b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
@@ -685,6 +685,7 @@ void OpEmitter::genSeparateArgParamBuilder() {
     auto &m =
         opClass.newMethod("void", "build", paramList, OpMethod::MP_Static);
     auto &body = m.body();
+
     genCodeForAddingArgAndRegionForBuilder(
         body, /*isRawValueAttr=*/attrType == AttrParamKind::UnwrappedValue);
 
@@ -762,7 +763,9 @@ void OpEmitter::genUseOperandAsResultTypeCollectiveParamBuilder() {
   auto &body = m.body();
 
   // Operands
-  body << "  " << builderOpState << ".addOperands(operands);\n\n";
+  body << "  " << builderOpState << ".addOperands(operands);\n";
+  if (op.hasResizableOperandList())
+    body << formatv("  {0}.setOperandListToResizable();\n\n", builderOpState);
 
   // Attributes
   body << "  " << builderOpState << ".addAttributes(attributes);\n";
@@ -843,7 +846,10 @@ void OpEmitter::genUseAttrAsResultTypeBuilder() {
   }
 
   // Operands
-  body << "  " << builderOpState << ".addOperands(operands);\n\n";
+  body << "  " << builderOpState << ".addOperands(operands);\n";
+  if (op.hasResizableOperandList())
+    body << formatv("  {0}.setOperandListToResizable();\n\n", builderOpState);
+
   // Attributes
   body << "  " << builderOpState << ".addAttributes(attributes);\n";
 
@@ -929,7 +935,9 @@ void OpEmitter::genCollectiveParamBuilder() {
          << (numVariadicOperands != 0 ? " >= " : " == ")
          << numNonVariadicOperands
          << "u && \"mismatched number of parameters\");\n";
-  body << "  " << builderOpState << ".addOperands(operands);\n\n";
+  body << "  " << builderOpState << ".addOperands(operands);\n";
+  if (op.hasResizableOperandList())
+    body << formatv("  {0}.setOperandListToResizable();\n\n", builderOpState);
 
   // Attributes
   body << "  " << builderOpState << ".addAttributes(attributes);\n";
@@ -1099,6 +1107,8 @@ void OpEmitter::genCodeForAddingArgAndRegionForBuilder(OpMethodBody &body,
       body << "  if (" << argName << ")\n  ";
     body << "  " << builderOpState << ".addOperands(" << argName << ");\n";
   }
+  if (op.hasResizableOperandList())
+    body << formatv("  {0}.setOperandListToResizable();\n", builderOpState);
 
   // If the operation has the operand segment size attribute, add it here.
   if (op.getTrait("OpTrait::AttrSizedOperandSegments")) {
diff --git a/mlir/tools/mlir-tblgen/OpFormatGen.cpp b/mlir/tools/mlir-tblgen/OpFormatGen.cpp
index a8116e4290b4..f76a2d3af9e8 100644
--- a/mlir/tools/mlir-tblgen/OpFormatGen.cpp
+++ b/mlir/tools/mlir-tblgen/OpFormatGen.cpp
@@ -706,6 +706,10 @@ void OperationFormat::genParser(Operator &op, OpClass &opClass) {
   genParserSuccessorResolution(op, body);
   genParserVariadicSegmentResolution(op, body);
 
+  // Mark the operation as having resizable operand list if required.
+  if (op.hasResizableOperandList())
+    body << "  result.setOperandListToResizable();\n";
+
   body << "  return success();\n";
 }
 

From ccc43e337cfa62b4787c39aefd3559ed39f78556 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Thu, 16 Apr 2020 23:35:38 +0200
Subject: [PATCH 095/216] Expose ATOMIC in the clang python bindings

Following 38ca7b11db2d22e0fdfbff3f19276f9796f747d3
---
 clang/bindings/python/clang/cindex.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang/bindings/python/clang/cindex.py b/clang/bindings/python/clang/cindex.py
index 8e5a9fe00683..aceaa131f155 100644
--- a/clang/bindings/python/clang/cindex.py
+++ b/clang/bindings/python/clang/cindex.py
@@ -2122,6 +2122,7 @@ def __repr__(self):
 TypeKind.OCLRESERVEID = TypeKind(160)
 
 TypeKind.EXTVECTOR = TypeKind(176)
+TypeKind.ATOMIC = TypeKind(177)
 
 class RefQualifierKind(BaseEnumeration):
     """Describes a specific ref-qualifier of a type."""

From c2171457e28130cbb61fae74ffe0d53547a7a505 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Sat, 11 Apr 2020 21:47:48 +0100
Subject: [PATCH 096/216] [SCCP] Add widening test case.

---
 llvm/test/Transforms/SCCP/widening.ll | 444 ++++++++++++++++++++++++++
 1 file changed, 444 insertions(+)
 create mode 100644 llvm/test/Transforms/SCCP/widening.ll

diff --git a/llvm/test/Transforms/SCCP/widening.ll b/llvm/test/Transforms/SCCP/widening.ll
new file mode 100644
index 000000000000..b475e75ed362
--- /dev/null
+++ b/llvm/test/Transforms/SCCP/widening.ll
@@ -0,0 +1,444 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt %s -sccp -S | FileCheck --check-prefix=SCCP %s
+; RUN: opt %s -ipsccp -S | FileCheck --check-prefix=IPSCCP %s
+
+; Test different widening scenarios.
+
+declare void @use(i1)
+declare i1 @cond()
+
+define void @test_2_incoming_constants(i32 %x) {
+; SCCP-LABEL: @test_2_incoming_constants(
+; SCCP-NEXT:  entry:
+; SCCP-NEXT:    [[C_1:%.*]] = call i1 @cond()
+; SCCP-NEXT:    br i1 [[C_1]], label [[BB1:%.*]], label [[EXIT:%.*]]
+; SCCP:       bb1:
+; SCCP-NEXT:    br label [[EXIT]]
+; SCCP:       exit:
+; SCCP-NEXT:    [[P:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 1, [[BB1]] ]
+; SCCP-NEXT:    [[A:%.*]] = add i32 [[P]], 1
+; SCCP-NEXT:    [[T_1:%.*]] = icmp ult i32 [[A]], 20
+; SCCP-NEXT:    call void @use(i1 [[T_1]])
+; SCCP-NEXT:    [[F_1:%.*]] = icmp ugt i32 [[A]], 10
+; SCCP-NEXT:    call void @use(i1 [[F_1]])
+; SCCP-NEXT:    ret void
+;
+; IPSCCP-LABEL: @test_2_incoming_constants(
+; IPSCCP-NEXT:  entry:
+; IPSCCP-NEXT:    [[C_1:%.*]] = call i1 @cond()
+; IPSCCP-NEXT:    br i1 [[C_1]], label [[BB1:%.*]], label [[EXIT:%.*]]
+; IPSCCP:       bb1:
+; IPSCCP-NEXT:    br label [[EXIT]]
+; IPSCCP:       exit:
+; IPSCCP-NEXT:    [[P:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 1, [[BB1]] ]
+; IPSCCP-NEXT:    [[A:%.*]] = add i32 [[P]], 1
+; IPSCCP-NEXT:    [[T_1:%.*]] = icmp ult i32 [[A]], 20
+; IPSCCP-NEXT:    call void @use(i1 [[T_1]])
+; IPSCCP-NEXT:    [[F_1:%.*]] = icmp ugt i32 [[A]], 10
+; IPSCCP-NEXT:    call void @use(i1 [[F_1]])
+; IPSCCP-NEXT:    ret void
+;
+entry:
+  %c.1 = call i1 @cond()
+  br i1 %c.1, label %bb1, label %exit
+
+bb1:
+  br label %exit
+
+exit:
+  %p = phi i32 [0, %entry], [1, %bb1]
+  %a = add i32 %p, 1
+  %t.1 = icmp ult i32 %a, 20
+  call void @use(i1 %t.1)
+  %f.1 = icmp ugt i32 %a, 10
+  call void @use(i1 %f.1)
+  ret void
+}
+
+define void @test_3_incoming_constants(i32 %x) {
+; SCCP-LABEL: @test_3_incoming_constants(
+; SCCP-NEXT:  entry:
+; SCCP-NEXT:    [[C_1:%.*]] = call i1 @cond()
+; SCCP-NEXT:    br i1 [[C_1]], label [[BB1:%.*]], label [[EXIT:%.*]]
+; SCCP:       bb1:
+; SCCP-NEXT:    [[C_2:%.*]] = call i1 @cond()
+; SCCP-NEXT:    br i1 [[C_2]], label [[BB2:%.*]], label [[EXIT]]
+; SCCP:       bb2:
+; SCCP-NEXT:    br label [[EXIT]]
+; SCCP:       exit:
+; SCCP-NEXT:    [[P:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 1, [[BB1]] ], [ 2, [[BB2]] ]
+; SCCP-NEXT:    [[A:%.*]] = add i32 [[P]], 1
+; SCCP-NEXT:    [[T_1:%.*]] = icmp ult i32 [[A]], 20
+; SCCP-NEXT:    call void @use(i1 [[T_1]])
+; SCCP-NEXT:    [[F_1:%.*]] = icmp ugt i32 [[A]], 10
+; SCCP-NEXT:    call void @use(i1 [[F_1]])
+; SCCP-NEXT:    ret void
+;
+; IPSCCP-LABEL: @test_3_incoming_constants(
+; IPSCCP-NEXT:  entry:
+; IPSCCP-NEXT:    [[C_1:%.*]] = call i1 @cond()
+; IPSCCP-NEXT:    br i1 [[C_1]], label [[BB1:%.*]], label [[EXIT:%.*]]
+; IPSCCP:       bb1:
+; IPSCCP-NEXT:    [[C_2:%.*]] = call i1 @cond()
+; IPSCCP-NEXT:    br i1 [[C_2]], label [[BB2:%.*]], label [[EXIT]]
+; IPSCCP:       bb2:
+; IPSCCP-NEXT:    br label [[EXIT]]
+; IPSCCP:       exit:
+; IPSCCP-NEXT:    [[P:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 1, [[BB1]] ], [ 2, [[BB2]] ]
+; IPSCCP-NEXT:    [[A:%.*]] = add i32 [[P]], 1
+; IPSCCP-NEXT:    [[T_1:%.*]] = icmp ult i32 [[A]], 20
+; IPSCCP-NEXT:    call void @use(i1 [[T_1]])
+; IPSCCP-NEXT:    [[F_1:%.*]] = icmp ugt i32 [[A]], 10
+; IPSCCP-NEXT:    call void @use(i1 [[F_1]])
+; IPSCCP-NEXT:    ret void
+;
+entry:
+  %c.1 = call i1 @cond()
+  br i1 %c.1, label %bb1, label %exit
+
+bb1:
+  %c.2 = call i1 @cond()
+  br i1 %c.2, label %bb2, label %exit
+
+bb2:
+  br label %exit
+
+exit:
+  %p = phi i32 [0, %entry], [1, %bb1], [2, %bb2]
+  %a = add i32 %p, 1
+  %t.1 = icmp ult i32 %a, 20
+  call void @use(i1 %t.1)
+  %f.1 = icmp ugt i32 %a, 10
+  call void @use(i1 %f.1)
+  ret void
+}
+
+define void @test_5_incoming_constants(i32 %x) {
+; SCCP-LABEL: @test_5_incoming_constants(
+; SCCP-NEXT:  entry:
+; SCCP-NEXT:    [[C_1:%.*]] = call i1 @cond()
+; SCCP-NEXT:    br i1 [[C_1]], label [[BB1:%.*]], label [[EXIT:%.*]]
+; SCCP:       bb1:
+; SCCP-NEXT:    [[C_2:%.*]] = call i1 @cond()
+; SCCP-NEXT:    br i1 [[C_2]], label [[BB2:%.*]], label [[EXIT]]
+; SCCP:       bb2:
+; SCCP-NEXT:    [[C_3:%.*]] = call i1 @cond()
+; SCCP-NEXT:    br i1 [[C_3]], label [[BB3:%.*]], label [[EXIT]]
+; SCCP:       bb3:
+; SCCP-NEXT:    [[C_4:%.*]] = call i1 @cond()
+; SCCP-NEXT:    br i1 [[C_4]], label [[BB4:%.*]], label [[EXIT]]
+; SCCP:       bb4:
+; SCCP-NEXT:    br label [[EXIT]]
+; SCCP:       exit:
+; SCCP-NEXT:    [[P:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 1, [[BB1]] ], [ 2, [[BB2]] ], [ 3, [[BB3]] ], [ 4, [[BB4]] ]
+; SCCP-NEXT:    [[A:%.*]] = add i32 [[P]], 1
+; SCCP-NEXT:    [[T_1:%.*]] = icmp ult i32 [[A]], 20
+; SCCP-NEXT:    call void @use(i1 [[T_1]])
+; SCCP-NEXT:    [[F_1:%.*]] = icmp ugt i32 [[A]], 10
+; SCCP-NEXT:    call void @use(i1 [[F_1]])
+; SCCP-NEXT:    ret void
+;
+; IPSCCP-LABEL: @test_5_incoming_constants(
+; IPSCCP-NEXT:  entry:
+; IPSCCP-NEXT:    [[C_1:%.*]] = call i1 @cond()
+; IPSCCP-NEXT:    br i1 [[C_1]], label [[BB1:%.*]], label [[EXIT:%.*]]
+; IPSCCP:       bb1:
+; IPSCCP-NEXT:    [[C_2:%.*]] = call i1 @cond()
+; IPSCCP-NEXT:    br i1 [[C_2]], label [[BB2:%.*]], label [[EXIT]]
+; IPSCCP:       bb2:
+; IPSCCP-NEXT:    [[C_3:%.*]] = call i1 @cond()
+; IPSCCP-NEXT:    br i1 [[C_3]], label [[BB3:%.*]], label [[EXIT]]
+; IPSCCP:       bb3:
+; IPSCCP-NEXT:    [[C_4:%.*]] = call i1 @cond()
+; IPSCCP-NEXT:    br i1 [[C_4]], label [[BB4:%.*]], label [[EXIT]]
+; IPSCCP:       bb4:
+; IPSCCP-NEXT:    br label [[EXIT]]
+; IPSCCP:       exit:
+; IPSCCP-NEXT:    [[P:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ 1, [[BB1]] ], [ 2, [[BB2]] ], [ 3, [[BB3]] ], [ 4, [[BB4]] ]
+; IPSCCP-NEXT:    [[A:%.*]] = add i32 [[P]], 1
+; IPSCCP-NEXT:    [[T_1:%.*]] = icmp ult i32 [[A]], 20
+; IPSCCP-NEXT:    call void @use(i1 [[T_1]])
+; IPSCCP-NEXT:    [[F_1:%.*]] = icmp ugt i32 [[A]], 10
+; IPSCCP-NEXT:    call void @use(i1 [[F_1]])
+; IPSCCP-NEXT:    ret void
+;
+entry:
+  %c.1 = call i1 @cond()
+  br i1 %c.1, label %bb1, label %exit
+
+bb1:
+  %c.2 = call i1 @cond()
+  br i1 %c.2, label %bb2, label %exit
+
+bb2:
+  %c.3 = call i1 @cond()
+  br i1 %c.3, label %bb3, label %exit
+
+bb3:
+  %c.4 = call i1 @cond()
+  br i1 %c.4, label %bb4, label %exit
+
+bb4:
+  br label %exit
+
+exit:
+  %p = phi i32 [0, %entry], [1, %bb1], [2, %bb2], [3, %bb3], [4, %bb4]
+  %a = add i32 %p, 1
+  %t.1 = icmp ult i32 %a, 20
+  call void @use(i1 %t.1)
+  %f.1 = icmp ugt i32 %a, 10
+  call void @use(i1 %f.1)
+  ret void
+}
+
+; For the rotated_loop_* test cases %p and %a are extended on each iteration.
+
+define void @rotated_loop_2(i32 %x) {
+; SCCP-LABEL: @rotated_loop_2(
+; SCCP-NEXT:  entry:
+; SCCP-NEXT:    [[C_1:%.*]] = call i1 @cond()
+; SCCP-NEXT:    br i1 [[C_1]], label [[EXIT:%.*]], label [[BB1:%.*]]
+; SCCP:       bb1:
+; SCCP-NEXT:    [[C_2:%.*]] = call i1 @cond()
+; SCCP-NEXT:    br i1 [[C_2]], label [[EXIT]], label [[BB2:%.*]]
+; SCCP:       bb2:
+; SCCP-NEXT:    [[C_3:%.*]] = call i1 @cond()
+; SCCP-NEXT:    br i1 [[C_3]], label [[BB3:%.*]], label [[EXIT]]
+; SCCP:       bb3:
+; SCCP-NEXT:    br label [[EXIT]]
+; SCCP:       exit:
+; SCCP-NEXT:    [[P:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ 3, [[BB1]] ], [ 2, [[BB2]] ], [ 5, [[BB3]] ], [ [[A:%.*]], [[EXIT]] ]
+; SCCP-NEXT:    [[A]] = add i32 [[P]], 1
+; SCCP-NEXT:    call void @use(i1 true)
+; SCCP-NEXT:    call void @use(i1 false)
+; SCCP-NEXT:    br i1 false, label [[EXIT]], label [[EXIT_1:%.*]]
+; SCCP:       exit.1:
+; SCCP-NEXT:    ret void
+;
+; IPSCCP-LABEL: @rotated_loop_2(
+; IPSCCP-NEXT:  entry:
+; IPSCCP-NEXT:    [[C_1:%.*]] = call i1 @cond()
+; IPSCCP-NEXT:    br i1 [[C_1]], label [[EXIT:%.*]], label [[BB1:%.*]]
+; IPSCCP:       bb1:
+; IPSCCP-NEXT:    [[C_2:%.*]] = call i1 @cond()
+; IPSCCP-NEXT:    br i1 [[C_2]], label [[EXIT]], label [[BB2:%.*]]
+; IPSCCP:       bb2:
+; IPSCCP-NEXT:    [[C_3:%.*]] = call i1 @cond()
+; IPSCCP-NEXT:    br i1 [[C_3]], label [[BB3:%.*]], label [[EXIT]]
+; IPSCCP:       bb3:
+; IPSCCP-NEXT:    br label [[EXIT]]
+; IPSCCP:       exit:
+; IPSCCP-NEXT:    [[P:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ 3, [[BB1]] ], [ 2, [[BB2]] ], [ 5, [[BB3]] ], [ [[A:%.*]], [[EXIT]] ]
+; IPSCCP-NEXT:    [[A]] = add i32 [[P]], 1
+; IPSCCP-NEXT:    call void @use(i1 true)
+; IPSCCP-NEXT:    call void @use(i1 false)
+; IPSCCP-NEXT:    br i1 false, label [[EXIT]], label [[EXIT_1:%.*]]
+; IPSCCP:       exit.1:
+; IPSCCP-NEXT:    ret void
+;
+entry:
+  %c.1 = call i1 @cond()
+  br i1 %c.1, label %exit, label %bb1
+
+bb1:
+  %c.2 = call i1 @cond()
+  br i1 %c.2, label %exit, label %bb2
+
+bb2:
+  %c.3 = call i1 @cond()
+  br i1 %c.3, label %bb3, label %exit
+
+bb3:
+  br label %exit
+
+exit:
+  %p = phi i32 [1, %entry], [3, %bb1], [2, %bb2], [5, %bb3], [%a, %exit]
+  %a = add i32 %p, 1
+  %t.1 = icmp ult i32 %a, 20
+  call void @use(i1 %t.1)
+  %f.1 = icmp ugt i32 %a, 10
+  call void @use(i1 %f.1)
+  %c.4 = icmp ult i32 %a, 2
+  br i1 %c.4, label %exit, label %exit.1
+
+exit.1:
+  ret void
+}
+
+define void @rotated_loop_3(i32 %x) {
+; SCCP-LABEL: @rotated_loop_3(
+; SCCP-NEXT:  entry:
+; SCCP-NEXT:    [[C_1:%.*]] = call i1 @cond()
+; SCCP-NEXT:    br i1 [[C_1]], label [[EXIT:%.*]], label [[BB1:%.*]]
+; SCCP:       bb1:
+; SCCP-NEXT:    [[C_2:%.*]] = call i1 @cond()
+; SCCP-NEXT:    br i1 [[C_2]], label [[EXIT]], label [[BB2:%.*]]
+; SCCP:       bb2:
+; SCCP-NEXT:    [[C_3:%.*]] = call i1 @cond()
+; SCCP-NEXT:    br i1 [[C_3]], label [[BB3:%.*]], label [[EXIT]]
+; SCCP:       bb3:
+; SCCP-NEXT:    br label [[EXIT]]
+; SCCP:       exit:
+; SCCP-NEXT:    [[P:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ 3, [[BB1]] ], [ 2, [[BB2]] ], [ 5, [[BB3]] ], [ [[A:%.*]], [[EXIT]] ]
+; SCCP-NEXT:    [[A]] = add i32 [[P]], 1
+; SCCP-NEXT:    [[T_1:%.*]] = icmp ult i32 [[A]], 20
+; SCCP-NEXT:    call void @use(i1 [[T_1]])
+; SCCP-NEXT:    [[F_1:%.*]] = icmp ugt i32 [[A]], 10
+; SCCP-NEXT:    call void @use(i1 [[F_1]])
+; SCCP-NEXT:    [[C_4:%.*]] = icmp ult i32 [[A]], 3
+; SCCP-NEXT:    br i1 [[C_4]], label [[EXIT]], label [[EXIT_1:%.*]]
+; SCCP:       exit.1:
+; SCCP-NEXT:    ret void
+;
+; IPSCCP-LABEL: @rotated_loop_3(
+; IPSCCP-NEXT:  entry:
+; IPSCCP-NEXT:    [[C_1:%.*]] = call i1 @cond()
+; IPSCCP-NEXT:    br i1 [[C_1]], label [[EXIT:%.*]], label [[BB1:%.*]]
+; IPSCCP:       bb1:
+; IPSCCP-NEXT:    [[C_2:%.*]] = call i1 @cond()
+; IPSCCP-NEXT:    br i1 [[C_2]], label [[EXIT]], label [[BB2:%.*]]
+; IPSCCP:       bb2:
+; IPSCCP-NEXT:    [[C_3:%.*]] = call i1 @cond()
+; IPSCCP-NEXT:    br i1 [[C_3]], label [[BB3:%.*]], label [[EXIT]]
+; IPSCCP:       bb3:
+; IPSCCP-NEXT:    br label [[EXIT]]
+; IPSCCP:       exit:
+; IPSCCP-NEXT:    [[P:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ 3, [[BB1]] ], [ 2, [[BB2]] ], [ 5, [[BB3]] ], [ [[A:%.*]], [[EXIT]] ]
+; IPSCCP-NEXT:    [[A]] = add i32 [[P]], 1
+; IPSCCP-NEXT:    [[T_1:%.*]] = icmp ult i32 [[A]], 20
+; IPSCCP-NEXT:    call void @use(i1 [[T_1]])
+; IPSCCP-NEXT:    [[F_1:%.*]] = icmp ugt i32 [[A]], 10
+; IPSCCP-NEXT:    call void @use(i1 [[F_1]])
+; IPSCCP-NEXT:    [[C_4:%.*]] = icmp ult i32 [[A]], 3
+; IPSCCP-NEXT:    br i1 [[C_4]], label [[EXIT]], label [[EXIT_1:%.*]]
+; IPSCCP:       exit.1:
+; IPSCCP-NEXT:    ret void
+;
+entry:
+  %c.1 = call i1 @cond()
+  br i1 %c.1, label %exit, label %bb1
+
+bb1:
+  %c.2 = call i1 @cond()
+  br i1 %c.2, label %exit, label %bb2
+
+bb2:
+  %c.3 = call i1 @cond()
+  br i1 %c.3, label %bb3, label %exit
+
+bb3:
+  br label %exit
+
+exit:
+  %p = phi i32 [1, %entry], [3, %bb1], [2, %bb2], [5, %bb3], [%a, %exit]
+  %a = add i32 %p, 1
+  %t.1 = icmp ult i32 %a, 20
+  call void @use(i1 %t.1)
+  %f.1 = icmp ugt i32 %a, 10
+  call void @use(i1 %f.1)
+  %c.4 = icmp ult i32 %a, 3
+  br i1 %c.4, label %exit, label %exit.1
+
+exit.1:
+  ret void
+}
+
+; For the loop_with_header_* tests, %iv and %a change on each iteration, but we
+; can use the range imposed by the condition %c.1 when widening.
+define void @loop_with_header_1(i32 %x) {
+; SCCP-LABEL: @loop_with_header_1(
+; SCCP-NEXT:  entry:
+; SCCP-NEXT:    br label [[LOOP_HEADER:%.*]]
+; SCCP:       loop.header:
+; SCCP-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_BODY:%.*]] ]
+; SCCP-NEXT:    [[C_1:%.*]] = icmp slt i32 [[IV]], 2
+; SCCP-NEXT:    br i1 [[C_1]], label [[LOOP_BODY]], label [[EXIT:%.*]]
+; SCCP:       loop.body:
+; SCCP-NEXT:    [[T_1:%.*]] = icmp slt i32 [[IV]], 2
+; SCCP-NEXT:    call void @use(i1 [[T_1]])
+; SCCP-NEXT:    [[IV_NEXT]] = add nsw i32 [[IV]], 1
+; SCCP-NEXT:    br label [[LOOP_HEADER]]
+; SCCP:       exit:
+; SCCP-NEXT:    ret void
+;
+; IPSCCP-LABEL: @loop_with_header_1(
+; IPSCCP-NEXT:  entry:
+; IPSCCP-NEXT:    br label [[LOOP_HEADER:%.*]]
+; IPSCCP:       loop.header:
+; IPSCCP-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_BODY:%.*]] ]
+; IPSCCP-NEXT:    [[C_1:%.*]] = icmp slt i32 [[IV]], 2
+; IPSCCP-NEXT:    br i1 [[C_1]], label [[LOOP_BODY]], label [[EXIT:%.*]]
+; IPSCCP:       loop.body:
+; IPSCCP-NEXT:    [[T_1:%.*]] = icmp slt i32 [[IV]], 2
+; IPSCCP-NEXT:    call void @use(i1 [[T_1]])
+; IPSCCP-NEXT:    [[IV_NEXT]] = add nsw i32 [[IV]], 1
+; IPSCCP-NEXT:    br label [[LOOP_HEADER]]
+; IPSCCP:       exit:
+; IPSCCP-NEXT:    ret void
+;
+entry:
+  br label %loop.header
+
+loop.header:
+  %iv = phi i32 [0, %entry], [%iv.next, %loop.body]
+  %c.1 = icmp slt i32 %iv, 2
+  br i1 %c.1, label %loop.body, label %exit
+
+loop.body:
+  %t.1 = icmp slt i32 %iv, 2
+  call void @use(i1 %t.1)
+  %iv.next = add nsw i32 %iv, 1
+  br label %loop.header
+
+exit:
+  ret void
+}
+
+define void @loop_with_header_2(i32 %x) {
+; SCCP-LABEL: @loop_with_header_2(
+; SCCP-NEXT:  entry:
+; SCCP-NEXT:    br label [[LOOP_HEADER:%.*]]
+; SCCP:       loop.header:
+; SCCP-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_BODY:%.*]] ]
+; SCCP-NEXT:    [[C_1:%.*]] = icmp slt i32 [[IV]], 200
+; SCCP-NEXT:    br i1 [[C_1]], label [[LOOP_BODY]], label [[EXIT:%.*]]
+; SCCP:       loop.body:
+; SCCP-NEXT:    [[T_1:%.*]] = icmp slt i32 [[IV]], 200
+; SCCP-NEXT:    call void @use(i1 [[T_1]])
+; SCCP-NEXT:    [[IV_NEXT]] = add nsw i32 [[IV]], 1
+; SCCP-NEXT:    br label [[LOOP_HEADER]]
+; SCCP:       exit:
+; SCCP-NEXT:    ret void
+;
+; IPSCCP-LABEL: @loop_with_header_2(
+; IPSCCP-NEXT:  entry:
+; IPSCCP-NEXT:    br label [[LOOP_HEADER:%.*]]
+; IPSCCP:       loop.header:
+; IPSCCP-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_BODY:%.*]] ]
+; IPSCCP-NEXT:    [[C_1:%.*]] = icmp slt i32 [[IV]], 200
+; IPSCCP-NEXT:    br i1 [[C_1]], label [[LOOP_BODY]], label [[EXIT:%.*]]
+; IPSCCP:       loop.body:
+; IPSCCP-NEXT:    [[T_1:%.*]] = icmp slt i32 [[IV]], 200
+; IPSCCP-NEXT:    call void @use(i1 [[T_1]])
+; IPSCCP-NEXT:    [[IV_NEXT]] = add nsw i32 [[IV]], 1
+; IPSCCP-NEXT:    br label [[LOOP_HEADER]]
+; IPSCCP:       exit:
+; IPSCCP-NEXT:    ret void
+;
+entry:
+  br label %loop.header
+
+loop.header:
+  %iv = phi i32 [0, %entry], [%iv.next, %loop.body]
+  %c.1 = icmp slt i32 %iv, 200
+  br i1 %c.1, label %loop.body, label %exit
+
+loop.body:
+  %t.1 = icmp slt i32 %iv, 200
+  call void @use(i1 %t.1)
+  %iv.next = add nsw i32 %iv, 1
+  br label %loop.header
+
+exit:
+  ret void
+}

From 314f00a03489c84b764de2a6f4401996865ff281 Mon Sep 17 00:00:00 2001
From: Stephen Neuendorffer <stephen.neuendorffer@xilinx.com>
Date: Thu, 16 Apr 2020 11:52:05 -0700
Subject: [PATCH 097/216] [MLIR][cmake] Remove redundant add_dependencies()

Libraries declared as target_link_libraries() do not also need
to be declared as dependencies using add_dependencies().

Differential Revision: https://reviews.llvm.org/D78320
---
 mlir/lib/Conversion/AVX512ToLLVM/CMakeLists.txt  | 6 ++----
 mlir/lib/Conversion/LinalgToLLVM/CMakeLists.txt  | 9 +++------
 mlir/lib/Conversion/VectorToLLVM/CMakeLists.txt  | 9 +++------
 mlir/lib/Conversion/VectorToLoops/CMakeLists.txt | 9 +++------
 mlir/lib/EDSC/CMakeLists.txt                     | 1 -
 5 files changed, 11 insertions(+), 23 deletions(-)

diff --git a/mlir/lib/Conversion/AVX512ToLLVM/CMakeLists.txt b/mlir/lib/Conversion/AVX512ToLLVM/CMakeLists.txt
index 9df0d4fde7f1..11397b9e105e 100644
--- a/mlir/lib/Conversion/AVX512ToLLVM/CMakeLists.txt
+++ b/mlir/lib/Conversion/AVX512ToLLVM/CMakeLists.txt
@@ -8,7 +8,8 @@ add_mlir_conversion_library(MLIRAVX512ToLLVM
   MLIRConversionPassIncGen
 )
 
-set(LIBS
+target_link_libraries(MLIRAVX512ToLLVM
+  PUBLIC
   MLIRAVX512
   MLIRLLVMAVX512
   MLIRLLVMIR
@@ -17,6 +18,3 @@ set(LIBS
   LLVMCore
   LLVMSupport
   )
-
-add_dependencies(MLIRAVX512ToLLVM ${LIBS})
-target_link_libraries(MLIRAVX512ToLLVM PUBLIC ${LIBS})
diff --git a/mlir/lib/Conversion/LinalgToLLVM/CMakeLists.txt b/mlir/lib/Conversion/LinalgToLLVM/CMakeLists.txt
index 72e0966f447b..8519566e4fb2 100644
--- a/mlir/lib/Conversion/LinalgToLLVM/CMakeLists.txt
+++ b/mlir/lib/Conversion/LinalgToLLVM/CMakeLists.txt
@@ -7,7 +7,9 @@ add_mlir_conversion_library(MLIRLinalgToLLVM
   DEPENDS
   MLIRConversionPassIncGen
 )
-set(LIBS
+
+target_link_libraries(MLIRLinalgToLLVM
+  PUBLIC
   MLIRAffineToStandard
   MLIREDSC
   MLIRIR
@@ -20,8 +22,3 @@ set(LIBS
   LLVMCore
   LLVMSupport
   )
-
-add_dependencies(MLIRLinalgToLLVM ${LIBS})
-target_link_libraries(MLIRLinalgToLLVM
-  PUBLIC
-  ${LIBS})
diff --git a/mlir/lib/Conversion/VectorToLLVM/CMakeLists.txt b/mlir/lib/Conversion/VectorToLLVM/CMakeLists.txt
index ffc6da95da7e..6d6b3b87c073 100644
--- a/mlir/lib/Conversion/VectorToLLVM/CMakeLists.txt
+++ b/mlir/lib/Conversion/VectorToLLVM/CMakeLists.txt
@@ -7,7 +7,9 @@ add_mlir_conversion_library(MLIRVectorToLLVM
   DEPENDS
   MLIRConversionPassIncGen
 )
-set(LIBS
+
+target_link_libraries(MLIRVectorToLLVM
+  PUBLIC
   MLIRLLVMIR
   MLIRStandardToLLVM
   MLIRVector
@@ -15,8 +17,3 @@ set(LIBS
   LLVMCore
   LLVMSupport
   )
-
-add_dependencies(MLIRVectorToLLVM ${LIBS})
-target_link_libraries(MLIRVectorToLLVM
-  PUBLIC
-  ${LIBS})
diff --git a/mlir/lib/Conversion/VectorToLoops/CMakeLists.txt b/mlir/lib/Conversion/VectorToLoops/CMakeLists.txt
index 41414fc9bea7..515c0e2d0344 100644
--- a/mlir/lib/Conversion/VectorToLoops/CMakeLists.txt
+++ b/mlir/lib/Conversion/VectorToLoops/CMakeLists.txt
@@ -4,7 +4,9 @@ add_mlir_conversion_library(MLIRVectorToLoops
   ADDITIONAL_HEADER_DIRS
   ${MLIR_MAIN_INCLUDE_DIR}/mlir/Conversion/VectorToLoops
 )
-set(LIBS
+
+target_link_libraries(MLIRVectorToLoops
+  PUBLIC
   MLIREDSC
   MLIRAffineEDSC
   MLIRLLVMIR
@@ -12,8 +14,3 @@ set(LIBS
   LLVMCore
   LLVMSupport
   )
-
-add_dependencies(MLIRVectorToLoops ${LIBS})
-target_link_libraries(MLIRVectorToLoops
-  PUBLIC
-  ${LIBS})
diff --git a/mlir/lib/EDSC/CMakeLists.txt b/mlir/lib/EDSC/CMakeLists.txt
index 791ef0de6458..1435dbb22926 100644
--- a/mlir/lib/EDSC/CMakeLists.txt
+++ b/mlir/lib/EDSC/CMakeLists.txt
@@ -22,7 +22,6 @@ add_mlir_library(MLIREDSCInterface
   ADDITIONAL_HEADER_DIRS
   ${MLIR_MAIN_INCLUDE_DIR}/mlir/EDSC
   )
-add_dependencies(MLIREDSCInterface MLIRIR)
 target_link_libraries(MLIREDSCInterface
   PUBLIC
   MLIRIR

From cc5c58889ec624461d41bdd0366ee88cf2805564 Mon Sep 17 00:00:00 2001
From: Bob Haarman <llvm@inglorion.net>
Date: Thu, 2 Apr 2020 23:26:04 -0700
Subject: [PATCH 098/216] [WPD] Avoid noalias assumptions in unique return
 value optimization

Summary:
Changes the type of the @__typeid_.*_unique_member imports we generate
for unique return value optimization from i8 to [0 x i8]. This
prevents assuming that these imports do not alias, such as when
two unique return values occur in the same vtable.

Fixes PR45393.

Reviewers: tejohnson, pcc

Reviewed By: pcc

Subscribers: aganea, hiraditya, rnk, george.burgess.iv, dblaikie, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D77421
---
 .../lib/Transforms/IPO/WholeProgramDevirt.cpp | 12 +++-
 .../Inputs/unique-retval-same-vtable.yaml     | 17 ++++++
 .../Transforms/WholeProgramDevirt/import.ll   | 18 +++---
 .../unique-retval-same-vtable.ll              | 59 +++++++++++++++++++
 .../WholeProgramDevirt/unique-retval.ll       |  7 +--
 5 files changed, 96 insertions(+), 17 deletions(-)
 create mode 100644 llvm/test/Transforms/WholeProgramDevirt/Inputs/unique-retval-same-vtable.yaml
 create mode 100644 llvm/test/Transforms/WholeProgramDevirt/unique-retval-same-vtable.ll

diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
index b1905e2af05a..60f8e935ffdd 100644
--- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -500,6 +500,10 @@ struct DevirtModule {
   IntegerType *Int32Ty;
   IntegerType *Int64Ty;
   IntegerType *IntPtrTy;
+  /// Sizeless array type, used for imported vtables. This provides a signal
+  /// to analyzers that these imports may alias, as they do for example
+  /// when multiple unique return values occur in the same vtable.
+  ArrayType *Int8Arr0Ty;
 
   bool RemarksEnabled;
   function_ref<OptimizationRemarkEmitter &(Function *)> OREGetter;
@@ -529,6 +533,7 @@ struct DevirtModule {
         Int32Ty(Type::getInt32Ty(M.getContext())),
         Int64Ty(Type::getInt64Ty(M.getContext())),
         IntPtrTy(M.getDataLayout().getIntPtrType(M.getContext(), 0)),
+        Int8Arr0Ty(ArrayType::get(Type::getInt8Ty(M.getContext()), 0)),
         RemarksEnabled(areRemarksEnabled()), OREGetter(OREGetter) {
     assert(!(ExportSummary && ImportSummary));
     FunctionsToSkip.init(SkipFunctionNames);
@@ -1415,7 +1420,8 @@ void DevirtModule::exportConstant(VTableSlot Slot, ArrayRef<uint64_t> Args,
 
 Constant *DevirtModule::importGlobal(VTableSlot Slot, ArrayRef<uint64_t> Args,
                                      StringRef Name) {
-  Constant *C = M.getOrInsertGlobal(getGlobalName(Slot, Args, Name), Int8Ty);
+  Constant *C =
+      M.getOrInsertGlobal(getGlobalName(Slot, Args, Name), Int8Arr0Ty);
   auto *GV = dyn_cast<GlobalVariable>(C);
   if (GV)
     GV->setVisibility(GlobalValue::HiddenVisibility);
@@ -1457,8 +1463,8 @@ void DevirtModule::applyUniqueRetValOpt(CallSiteInfo &CSInfo, StringRef FnName,
   for (auto &&Call : CSInfo.CallSites) {
     IRBuilder<> B(Call.CS.getInstruction());
     Value *Cmp =
-        B.CreateICmp(IsOne ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE,
-                     B.CreateBitCast(Call.VTable, Int8PtrTy), UniqueMemberAddr);
+        B.CreateICmp(IsOne ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE, Call.VTable,
+                     B.CreateBitCast(UniqueMemberAddr, Call.VTable->getType()));
     Cmp = B.CreateZExt(Cmp, Call.CS->getType());
     Call.replaceAndErase("unique-ret-val", FnName, RemarksEnabled, OREGetter,
                          Cmp);
diff --git a/llvm/test/Transforms/WholeProgramDevirt/Inputs/unique-retval-same-vtable.yaml b/llvm/test/Transforms/WholeProgramDevirt/Inputs/unique-retval-same-vtable.yaml
new file mode 100644
index 000000000000..198a7fc29977
--- /dev/null
+++ b/llvm/test/Transforms/WholeProgramDevirt/Inputs/unique-retval-same-vtable.yaml
@@ -0,0 +1,17 @@
+---
+TypeIdMap:
+  _ZTS1C:
+    WPDRes:
+      16:
+        Kind:            Indir
+        ResByArg:
+          '':
+            Kind:            UniqueRetVal
+            Info:            1
+      24:
+        Kind:            Indir
+        ResByArg:
+          '':
+            Kind:            UniqueRetVal
+            Info:            1
+...
diff --git a/llvm/test/Transforms/WholeProgramDevirt/import.ll b/llvm/test/Transforms/WholeProgramDevirt/import.ll
index 525d88cb6624..8beb27db26fe 100644
--- a/llvm/test/Transforms/WholeProgramDevirt/import.ll
+++ b/llvm/test/Transforms/WholeProgramDevirt/import.ll
@@ -11,10 +11,10 @@
 target datalayout = "e-p:64:64"
 target triple = "x86_64-unknown-linux-gnu"
 
-; VCP-X86: @__typeid_typeid1_0_1_byte = external hidden global i8, !absolute_symbol !0
-; VCP-X86: @__typeid_typeid1_0_1_bit = external hidden global i8, !absolute_symbol !1
-; VCP-X86: @__typeid_typeid2_8_3_byte = external hidden global i8, !absolute_symbol !0
-; VCP-X86: @__typeid_typeid2_8_3_bit = external hidden global i8, !absolute_symbol !1
+; VCP-X86: @__typeid_typeid1_0_1_byte = external hidden global [0 x i8], !absolute_symbol !0
+; VCP-X86: @__typeid_typeid1_0_1_bit = external hidden global [0 x i8], !absolute_symbol !1
+; VCP-X86: @__typeid_typeid2_8_3_byte = external hidden global [0 x i8], !absolute_symbol !0
+; VCP-X86: @__typeid_typeid2_8_3_bit = external hidden global [0 x i8], !absolute_symbol !1
 
 ; Test cases where the argument values are known and we can apply virtual
 ; constant propagation.
@@ -34,7 +34,7 @@ define i32 @call1(i8* %obj) #0 {
   ; SINGLE-IMPL: call i32 bitcast (void ()* @singleimpl1 to i32 (i8*, i32)*)
   %result = call i32 %fptr_casted(i8* %obj, i32 1)
   ; UNIFORM-RET-VAL: ret i32 42
-  ; VCP-X86: [[GEP1:%.*]] = getelementptr i8, i8* [[VT1]], i32 ptrtoint (i8* @__typeid_typeid1_0_1_byte to i32)
+  ; VCP-X86: [[GEP1:%.*]] = getelementptr i8, i8* [[VT1]], i32 ptrtoint ([0 x i8]* @__typeid_typeid1_0_1_byte to i32)
   ; VCP-ARM: [[GEP1:%.*]] = getelementptr i8, i8* [[VT1]], i32 42
   ; VCP: [[BC1:%.*]] = bitcast i8* [[GEP1]] to i32*
   ; VCP: [[LOAD1:%.*]] = load i32, i32* [[BC1]]
@@ -85,13 +85,13 @@ define i1 @call3(i8* %obj) #0 {
 cont:
   %fptr_casted = bitcast i8* %fptr to i1 (i8*, i32)*
   %result = call i1 %fptr_casted(i8* %obj, i32 3)
-  ; UNIQUE-RET-VAL0: icmp ne i8* %vtablei8, @__typeid_typeid2_8_3_unique_member
-  ; UNIQUE-RET-VAL1: icmp eq i8* %vtablei8, @__typeid_typeid2_8_3_unique_member
+  ; UNIQUE-RET-VAL0: icmp ne i8* %vtablei8, getelementptr inbounds ([0 x i8], [0 x i8]* @__typeid_typeid2_8_3_unique_member, i32 0, i32 0)
+  ; UNIQUE-RET-VAL1: icmp eq i8* %vtablei8, getelementptr inbounds ([0 x i8], [0 x i8]* @__typeid_typeid2_8_3_unique_member, i32 0, i32 0)
   ; VCP: [[VT2:%.*]] = bitcast {{.*}} to i8*
-  ; VCP-X86: [[GEP2:%.*]] = getelementptr i8, i8* [[VT2]], i32 ptrtoint (i8* @__typeid_typeid2_8_3_byte to i32)
+  ; VCP-X86: [[GEP2:%.*]] = getelementptr i8, i8* [[VT2]], i32 ptrtoint ([0 x i8]* @__typeid_typeid2_8_3_byte to i32)
   ; VCP-ARM: [[GEP2:%.*]] = getelementptr i8, i8* [[VT2]], i32 43
   ; VCP: [[LOAD2:%.*]] = load i8, i8* [[GEP2]]
-  ; VCP-X86: [[AND2:%.*]] = and i8 [[LOAD2]], ptrtoint (i8* @__typeid_typeid2_8_3_bit to i8)
+  ; VCP-X86: [[AND2:%.*]] = and i8 [[LOAD2]], ptrtoint ([0 x i8]* @__typeid_typeid2_8_3_bit to i8)
   ; VCP-ARM: [[AND2:%.*]] = and i8 [[LOAD2]], -128
   ; VCP: [[ICMP2:%.*]] = icmp ne i8 [[AND2]], 0
   ; VCP: ret i1 [[ICMP2]]
diff --git a/llvm/test/Transforms/WholeProgramDevirt/unique-retval-same-vtable.ll b/llvm/test/Transforms/WholeProgramDevirt/unique-retval-same-vtable.ll
new file mode 100644
index 000000000000..c4d6de7f932b
--- /dev/null
+++ b/llvm/test/Transforms/WholeProgramDevirt/unique-retval-same-vtable.ll
@@ -0,0 +1,59 @@
+; Test for PR45393: Two virtual functions that return unique i1 values
+; in the same vtable. Both calls are optimized to a comparison of
+; this's vptr against the address of the vtable. When nesting these
+; checks, LLVM would previously assume the nested check always fails,
+; but that assumption does not hold if both checks refer to the same vtable.
+; This tests checks that this case is handled correctly.
+;
+; RUN: opt -S -wholeprogramdevirt -wholeprogramdevirt-summary-action=import \
+; RUN:   -wholeprogramdevirt-read-summary=%p/Inputs/unique-retval-same-vtable.yaml \
+; RUN:   -O2 -o - %s | FileCheck %s
+;
+; Check that C::f() contains both possible return values.
+; CHECK-LABEL: define {{.*}} @_ZNK1C1fEv
+; CHECK-NOT: }
+; CHECK: 20074028
+; CHECK-NOT: }
+; CHECK: 1008434
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%class.C = type { i32 (...)** }
+
+define hidden i32 @_ZNK1C1fEv(%class.C* %this) {
+entry:
+  %0 = bitcast %class.C* %this to i1 (%class.C*)***
+  %vtable = load i1 (%class.C*)**, i1 (%class.C*)*** %0
+  %1 = bitcast i1 (%class.C*)** %vtable to i8*
+  %2 = tail call i1 @llvm.type.test(i8* %1, metadata !"_ZTS1C")
+  tail call void @llvm.assume(i1 %2)
+  %vfn = getelementptr inbounds i1 (%class.C*)*, i1 (%class.C*)** %vtable, i64 2
+  %3 = load i1 (%class.C*)*, i1 (%class.C*)** %vfn
+  %call = tail call zeroext i1 %3(%class.C* %this)
+  br i1 %call, label %if.then, label %return
+
+if.then:
+  %vtable2 = load i1 (%class.C*)**, i1 (%class.C*)*** %0
+  %4 = bitcast i1 (%class.C*)** %vtable2 to i8*
+  %5 = tail call i1 @llvm.type.test(i8* %4, metadata !"_ZTS1C")
+  tail call void @llvm.assume(i1 %5)
+  %vfn3 = getelementptr inbounds i1 (%class.C*)*, i1 (%class.C*)** %vtable2, i64 3
+  %6 = load i1 (%class.C*)*, i1 (%class.C*)** %vfn3
+  ; The method being called here and the method being called before
+  ; the branch above both return true in the same vtable and only that
+  ; vtable. Therefore, if this call is reached, we must select
+  ; 20074028. Earlier versions of LLVM mistakenly concluded that
+  ; this code *never* selects 200744028.
+  %call4 = tail call zeroext i1 %6(%class.C* nonnull %this)
+  %. = select i1 %call4, i32 20074028, i32 3007762
+  br label %return
+
+return:
+  %retval.0 = phi i32 [ %., %if.then ], [ 1008434, %entry ]
+  ret i32 %retval.0
+}
+
+declare i1 @llvm.type.test(i8*, metadata)
+
+declare void @llvm.assume(i1)
diff --git a/llvm/test/Transforms/WholeProgramDevirt/unique-retval.ll b/llvm/test/Transforms/WholeProgramDevirt/unique-retval.ll
index f03c07d24de6..6d91cd4be7eb 100644
--- a/llvm/test/Transforms/WholeProgramDevirt/unique-retval.ll
+++ b/llvm/test/Transforms/WholeProgramDevirt/unique-retval.ll
@@ -20,15 +20,13 @@ define i1 @vf1(i8* %this) readnone {
 define i1 @call1(i8* %obj) {
   %vtableptr = bitcast i8* %obj to [1 x i8*]**
   %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
-  ; CHECK: {{.*}} = bitcast [1 x i8*]* {{.*}} to i8*
-  ; CHECK: [[VT1:%[^ ]*]] = bitcast [1 x i8*]* {{.*}} to i8*
   %vtablei8 = bitcast [1 x i8*]* %vtable to i8*
   %p = call i1 @llvm.type.test(i8* %vtablei8, metadata !"typeid1")
   call void @llvm.assume(i1 %p)
   %fptrptr = getelementptr [1 x i8*], [1 x i8*]* %vtable, i32 0, i32 0
   %fptr = load i8*, i8** %fptrptr
   %fptr_casted = bitcast i8* %fptr to i1 (i8*)*
-  ; CHECK: [[RES1:%[^ ]*]] = icmp eq i8* [[VT1]], bitcast ([1 x i8*]* @vt3 to i8*)
+  ; CHECK: [[RES1:%[^ ]*]] = icmp eq [1 x i8*]* %vtable, @vt3
   %result = call i1 %fptr_casted(i8* %obj)
   ; CHECK: ret i1 [[RES1]]
   ret i1 %result
@@ -38,7 +36,6 @@ define i1 @call1(i8* %obj) {
 define i32 @call2(i8* %obj) {
   %vtableptr = bitcast i8* %obj to [1 x i8*]**
   %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
-  ; CHECK: [[VT2:%[^ ]*]] = bitcast [1 x i8*]* {{.*}} to i8*
   %vtablei8 = bitcast [1 x i8*]* %vtable to i8*
   %p = call i1 @llvm.type.test(i8* %vtablei8, metadata !"typeid2")
   call void @llvm.assume(i1 %p)
@@ -46,7 +43,7 @@ define i32 @call2(i8* %obj) {
   %fptr = load i8*, i8** %fptrptr
   ; Intentional type mismatch to test zero extend.
   %fptr_casted = bitcast i8* %fptr to i32 (i8*)*
-  ; CHECK: [[RES2:%[^ ]*]] = icmp ne i8* [[VT1]], bitcast ([1 x i8*]* @vt2 to i8*)
+  ; CHECK: [[RES2:%[^ ]*]] = icmp ne [1 x i8*]* %vtable, @vt2
   %result = call i32 %fptr_casted(i8* %obj)
   ; CHECK: [[ZEXT2:%[^ ]*]] = zext i1 [[RES2]] to i32
   ; CHECK: ret i32 [[ZEXT2:%[^ ]*]]

From 40d139c620f83509fe18acbff5ec358298e99def Mon Sep 17 00:00:00 2001
From: Nicolas Vasilache <ntv@google.com>
Date: Thu, 16 Apr 2020 17:24:48 -0400
Subject: [PATCH 099/216] [mlir][Linalg] NFC - Split out EDSCs that require a
 Folder

Summary: This is an NFC cleanup in preparation for end-to-end named Linalg ops.

Differential Revision: https://reviews.llvm.org/D78331
---
 .../Dialect/Linalg/EDSC/FoldedIntrinsics.h    | 31 +++++++++++++++++++
 .../mlir/Dialect/Linalg/EDSC/Intrinsics.h     | 16 ++--------
 mlir/lib/Dialect/Linalg/EDSC/Builders.cpp     |  1 +
 mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp |  2 +-
 .../Linalg/Transforms/LinalgToLoops.cpp       |  2 +-
 .../Dialect/Linalg/Transforms/Promotion.cpp   |  2 +-
 mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp |  2 +-
 mlir/test/EDSC/builder-api-test.cpp           |  1 +
 8 files changed, 40 insertions(+), 17 deletions(-)
 create mode 100644 mlir/include/mlir/Dialect/Linalg/EDSC/FoldedIntrinsics.h

diff --git a/mlir/include/mlir/Dialect/Linalg/EDSC/FoldedIntrinsics.h b/mlir/include/mlir/Dialect/Linalg/EDSC/FoldedIntrinsics.h
new file mode 100644
index 000000000000..89cde8ff3513
--- /dev/null
+++ b/mlir/include/mlir/Dialect/Linalg/EDSC/FoldedIntrinsics.h
@@ -0,0 +1,31 @@
+//===- FoldedIntrinsics.h - MLIR EDSC Intrinsics for Linalg -----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef MLIR_DIALECT_LINALG_EDSC_FOLDEDINTRINSICS_H_
+#define MLIR_DIALECT_LINALG_EDSC_FOLDEDINTRINSICS_H_
+
+#include "mlir/Dialect/Linalg/EDSC/Builders.h"
+#include "mlir/Dialect/Linalg/EDSC/Intrinsics.h"
+
+#include "mlir/Transforms/FoldUtils.h"
+
+namespace mlir {
+namespace edsc {
+
+template <typename Op, typename... Args>
+ValueHandle ValueHandle::create(OperationFolder *folder, Args... args) {
+  return folder ? ValueHandle(folder->create<Op>(ScopedContext::getBuilder(),
+                                                 ScopedContext::getLocation(),
+                                                 args...))
+                : ValueHandle(ScopedContext::getBuilder().create<Op>(
+                      ScopedContext::getLocation(), args...));
+}
+
+} // namespace edsc
+} // namespace mlir
+
+#endif // MLIR_DIALECT_LINALG_EDSC_FOLDEDINTRINSICS_H_
diff --git a/mlir/include/mlir/Dialect/Linalg/EDSC/Intrinsics.h b/mlir/include/mlir/Dialect/Linalg/EDSC/Intrinsics.h
index dedc18934b84..399c49d1e572 100644
--- a/mlir/include/mlir/Dialect/Linalg/EDSC/Intrinsics.h
+++ b/mlir/include/mlir/Dialect/Linalg/EDSC/Intrinsics.h
@@ -8,23 +8,13 @@
 #ifndef MLIR_DIALECT_LINALG_EDSC_INTRINSICS_H_
 #define MLIR_DIALECT_LINALG_EDSC_INTRINSICS_H_
 
-#include "mlir/Dialect/Linalg/EDSC/Builders.h"
-#include "mlir/EDSC/Intrinsics.h"
-#include "mlir/Transforms/FoldUtils.h"
+#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
+#include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
 
 namespace mlir {
 namespace edsc {
-
-template <typename Op, typename... Args>
-ValueHandle ValueHandle::create(OperationFolder *folder, Args... args) {
-  return folder ? ValueHandle(folder->create<Op>(ScopedContext::getBuilder(),
-                                                 ScopedContext::getLocation(),
-                                                 args...))
-                : ValueHandle(ScopedContext::getBuilder().create<Op>(
-                      ScopedContext::getLocation(), args...));
-}
-
 namespace intrinsics {
+
 using linalg_copy = OperationBuilder<linalg::CopyOp>;
 using linalg_dot = OperationBuilder<linalg::DotOp>;
 using linalg_fill = OperationBuilder<linalg::FillOp>;
diff --git a/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp b/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp
index 2fa09b7422a9..4a775f5cb296 100644
--- a/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp
+++ b/mlir/lib/Dialect/Linalg/EDSC/Builders.cpp
@@ -8,6 +8,7 @@
 
 #include "mlir/IR/Builders.h"
 #include "mlir/Dialect/Affine/EDSC/Intrinsics.h"
+#include "mlir/Dialect/Linalg/EDSC/Builders.h"
 #include "mlir/Dialect/Linalg/EDSC/Intrinsics.h"
 #include "mlir/Dialect/LoopOps/EDSC/Builders.h"
 #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp
index a5f4cd9e4592..ddfa4518a112 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Fusion.cpp
@@ -13,7 +13,7 @@
 #include "PassDetail.h"
 #include "mlir/Analysis/Dominance.h"
 #include "mlir/Dialect/Linalg/Analysis/DependenceAnalysis.h"
-#include "mlir/Dialect/Linalg/EDSC/Intrinsics.h"
+#include "mlir/Dialect/Linalg/EDSC/FoldedIntrinsics.h"
 #include "mlir/Dialect/Linalg/IR/LinalgOps.h"
 #include "mlir/Dialect/Linalg/IR/LinalgTypes.h"
 #include "mlir/Dialect/Linalg/Passes.h"
diff --git a/mlir/lib/Dialect/Linalg/Transforms/LinalgToLoops.cpp b/mlir/lib/Dialect/Linalg/Transforms/LinalgToLoops.cpp
index 07a2c370a152..b794f54ed5f9 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/LinalgToLoops.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/LinalgToLoops.cpp
@@ -8,7 +8,7 @@
 
 #include "PassDetail.h"
 #include "mlir/Dialect/Affine/EDSC/Intrinsics.h"
-#include "mlir/Dialect/Linalg/EDSC/Intrinsics.h"
+#include "mlir/Dialect/Linalg/EDSC/FoldedIntrinsics.h"
 #include "mlir/Dialect/Linalg/IR/LinalgOps.h"
 #include "mlir/Dialect/Linalg/IR/LinalgTypes.h"
 #include "mlir/Dialect/Linalg/Passes.h"
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp b/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp
index 1202d4c69cb0..09e1bebb354a 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Promotion.cpp
@@ -12,7 +12,7 @@
 
 #include "PassDetail.h"
 #include "mlir/Dialect/Affine/EDSC/Intrinsics.h"
-#include "mlir/Dialect/Linalg/EDSC/Intrinsics.h"
+#include "mlir/Dialect/Linalg/EDSC/FoldedIntrinsics.h"
 #include "mlir/Dialect/Linalg/IR/LinalgOps.h"
 #include "mlir/Dialect/Linalg/IR/LinalgTypes.h"
 #include "mlir/Dialect/Linalg/Passes.h"
diff --git a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
index f6f69b0fee8f..87a2015338ca 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Tiling.cpp
@@ -12,7 +12,7 @@
 
 #include "PassDetail.h"
 #include "mlir/Dialect/Affine/EDSC/Intrinsics.h"
-#include "mlir/Dialect/Linalg/EDSC/Intrinsics.h"
+#include "mlir/Dialect/Linalg/EDSC/FoldedIntrinsics.h"
 #include "mlir/Dialect/Linalg/IR/LinalgTypes.h"
 #include "mlir/Dialect/Linalg/Passes.h"
 #include "mlir/Dialect/Linalg/Utils/Utils.h"
diff --git a/mlir/test/EDSC/builder-api-test.cpp b/mlir/test/EDSC/builder-api-test.cpp
index 594040d60ae1..addf86a99154 100644
--- a/mlir/test/EDSC/builder-api-test.cpp
+++ b/mlir/test/EDSC/builder-api-test.cpp
@@ -9,6 +9,7 @@
 // RUN: mlir-edsc-builder-api-test | FileCheck %s -dump-input-on-failure
 
 #include "mlir/Dialect/Affine/EDSC/Intrinsics.h"
+#include "mlir/Dialect/Linalg/EDSC/Builders.h"
 #include "mlir/Dialect/Linalg/EDSC/Intrinsics.h"
 #include "mlir/Dialect/LoopOps/EDSC/Intrinsics.h"
 #include "mlir/Dialect/StandardOps/EDSC/Intrinsics.h"

From 7fa342bd2a6be51998c399f145143d8f45da1f4d Mon Sep 17 00:00:00 2001
From: Jason Molenda <jason@molenda.com>
Date: Thu, 16 Apr 2020 15:22:14 -0700
Subject: [PATCH 100/216] Remove attach-failed-due-to-SIP checks which were not
 working

The SIP debugserver was calling in attach_failed_due_to_sip
haven't worked for a while; remove them.  To check this
properly we'd need debugsever to call out to codesign(1) to
inspect the entitlements, or the equivalant API,
and I'm not interested in adding that at this point.  SIP
is has been the default on macOS for a couple of releases
and it's expected behavior now.

<rdar://problem/59198052>
---
 lldb/tools/debugserver/source/RNBRemote.cpp | 31 ---------------------
 1 file changed, 31 deletions(-)

diff --git a/lldb/tools/debugserver/source/RNBRemote.cpp b/lldb/tools/debugserver/source/RNBRemote.cpp
index 8eed06381d3a..df358065f877 100644
--- a/lldb/tools/debugserver/source/RNBRemote.cpp
+++ b/lldb/tools/debugserver/source/RNBRemote.cpp
@@ -3663,30 +3663,6 @@ static bool process_does_not_exist (nub_process_t pid) {
   return true; // process does not exist
 }
 
-static bool attach_failed_due_to_sip (nub_process_t pid) {
-  bool retval = false;
-#if defined(__APPLE__) &&                                                      \
-  (__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__ >= 101000)
-
-  // csr_check(CSR_ALLOW_TASK_FOR_PID) will be nonzero if System Integrity
-  // Protection is in effect.
-  if (csr_check(CSR_ALLOW_TASK_FOR_PID) == 0) 
-    return false;
-
-  if (rootless_allows_task_for_pid(pid) == 0)
-    retval = true;
-
-  int csops_flags = 0;
-  int csops_ret = ::csops(pid, CS_OPS_STATUS, &csops_flags,
-                       sizeof(csops_flags));
-  if (csops_ret != -1 && (csops_flags & CS_RESTRICT)) {
-    retval = true;
-  }
-#endif
-
-  return retval;
-}
-
 // my_uid and process_uid are only initialized if this function
 // returns true -- that there was a uid mismatch -- and those
 // id's may want to be used in the error message.
@@ -4065,13 +4041,6 @@ rnb_err_t RNBRemote::HandlePacket_v(const char *p) {
                                            "processes.");
           return SendPacket(return_message.c_str());
         }
-        if (attach_failed_due_to_sip (pid_attaching_to)) {
-          DNBLogError("Attach failed because of SIP protection.");
-          std::string return_message = "E96;";
-          return_message += cstring_to_asciihex_string("cannot attach "
-                            "to process due to System Integrity Protection");
-          return SendPacket(return_message.c_str());
-        }
       }
 
       std::string error_explainer = "attach failed";

From d10386e1779599d217b5b849a079f29dfbe17024 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Thu, 16 Apr 2020 15:28:42 -0700
Subject: [PATCH 101/216] [lldb/Utility] Provide a stringify_append overload
 for function pointers.

Converting a function pointer to an object pointer is illegal as nothing
requires it to be in the same address space. Add an overload for
function pointers so we don't convert do this illegal conversion, and
simply print out "function pointer".
---
 lldb/include/lldb/Utility/ReproducerInstrumentation.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/lldb/include/lldb/Utility/ReproducerInstrumentation.h b/lldb/include/lldb/Utility/ReproducerInstrumentation.h
index 3b5dde3d2e2a..75f38929e362 100644
--- a/lldb/include/lldb/Utility/ReproducerInstrumentation.h
+++ b/lldb/include/lldb/Utility/ReproducerInstrumentation.h
@@ -34,12 +34,17 @@ inline void stringify_append(llvm::raw_string_ostream &ss, const T &t) {
 
 template <typename T>
 inline void stringify_append(llvm::raw_string_ostream &ss, T *t) {
-  ss << reinterpret_cast<void *>(t);
+  ss << static_cast<void *>(t);
 }
 
 template <typename T>
 inline void stringify_append(llvm::raw_string_ostream &ss, const T *t) {
-  ss << reinterpret_cast<const void *>(t);
+  ss << static_cast<const void *>(t);
+}
+
+template <typename T, typename... Args>
+inline void stringify_append(llvm::raw_string_ostream &ss, T (*t)(Args...)) {
+  ss << "function pointer";
 }
 
 template <>

From ce7790044faa48a1ec49b6339797180e05520cef Mon Sep 17 00:00:00 2001
From: Davide Italiano <ditaliano@apple.com>
Date: Thu, 16 Apr 2020 15:36:43 -0700
Subject: [PATCH 102/216] [DWARF] Rename a function and comment it for clarity.

Pointed out by Adrian.
---
 lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp | 2 +-
 lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h   | 5 ++++-
 lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp    | 2 +-
 lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp     | 3 ++-
 4 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp
index b089c4e1f04a..1c1acd9dd61a 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.cpp
@@ -1016,7 +1016,7 @@ DWARFDebugInfoEntry::GetAbbreviationDeclarationPtr(const DWARFUnit *cu) const {
   return nullptr;
 }
 
-bool DWARFDebugInfoEntry::IsGlobalOrStaticVariable() const {
+bool DWARFDebugInfoEntry::IsGlobalOrStaticScopeVariable() const {
   if (Tag() != DW_TAG_variable)
     return false;
   const DWARFDebugInfoEntry *parent_die = GetParent();
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h
index c05d79c01817..e12a19c13d1c 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFDebugInfoEntry.h
@@ -167,7 +167,10 @@ class DWARFDebugInfoEntry {
   void SetSiblingIndex(uint32_t idx) { m_sibling_idx = idx; }
   void SetParentIndex(uint32_t idx) { m_parent_idx = idx; }
 
-  bool IsGlobalOrStaticVariable() const;
+  // This function returns true if the variable scope is either
+  // global or (file-static). It will return false for static variables
+  // that are local to a function, as they have local scope.
+  bool IsGlobalOrStaticScopeVariable() const;
 
 protected:
   static DWARFDeclContext
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp
index 535e79a7ecc7..3fe38e75e612 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/ManualDWARFIndex.cpp
@@ -204,7 +204,7 @@ void ManualDWARFIndex::IndexUnitImpl(DWARFUnit &unit,
         case DW_AT_location:
         case DW_AT_const_value:
           has_location_or_const_value = true;
-          is_global_or_static_variable = die.IsGlobalOrStaticVariable();
+          is_global_or_static_variable = die.IsGlobalOrStaticScopeVariable();
 
           break;
 
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
index 400ba6e1f443..b13331c4852e 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
@@ -3390,7 +3390,8 @@ VariableSP SymbolFileDWARF::ParseVariableDIE(const SymbolContext &sc,
             }
           }
         } else {
-          if (location_is_const_value_data && die.GetDIE()->IsGlobalOrStaticVariable())
+          if (location_is_const_value_data &&
+              die.GetDIE()->IsGlobalOrStaticScopeVariable())
             scope = eValueTypeVariableStatic;
           else {
             scope = eValueTypeVariableLocal;

From af2968e37f4c95846ffe287b64a4fcd72c765bee Mon Sep 17 00:00:00 2001
From: Eric Fiselier <eric@efcs.ca>
Date: Thu, 16 Apr 2020 18:35:31 -0400
Subject: [PATCH 103/216] [clang] Fix invalid comparator in tablegen

Summary: The current version of the comparator does not introduce a strict weak ordering.

Reviewers: fowles, bkramer, sdesmalen

Reviewed By: sdesmalen

Subscribers: cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D78323
---
 clang/utils/TableGen/SveEmitter.cpp | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/clang/utils/TableGen/SveEmitter.cpp b/clang/utils/TableGen/SveEmitter.cpp
index 79258a8fbbf2..8ef65612a243 100644
--- a/clang/utils/TableGen/SveEmitter.cpp
+++ b/clang/utils/TableGen/SveEmitter.cpp
@@ -33,6 +33,7 @@
 #include <sstream>
 #include <set>
 #include <cctype>
+#include <tuple>
 
 using namespace llvm;
 
@@ -909,9 +910,10 @@ void SVEEmitter::createHeader(raw_ostream &OS) {
   std::stable_sort(
       Defs.begin(), Defs.end(), [](const std::unique_ptr<Intrinsic> &A,
                                    const std::unique_ptr<Intrinsic> &B) {
-        return A->getGuard() < B->getGuard() ||
-               (unsigned)A->getClassKind() < (unsigned)B->getClassKind() ||
-               A->getName() < B->getName();
+        auto ToTuple = [](const std::unique_ptr<Intrinsic> &I) {
+          return std::make_tuple(I->getGuard(), (unsigned)I->getClassKind(), I->getName());
+        };
+        return ToTuple(A) < ToTuple(B);
       });
 
   StringRef InGuard = "";

From 26805f0b6408eeb30b83d37ec266f27288f1113a Mon Sep 17 00:00:00 2001
From: Stephen Neuendorffer <stephen.neuendorffer@xilinx.com>
Date: Thu, 16 Apr 2020 12:27:16 -0700
Subject: [PATCH 104/216] [MLIR][cmake] Use DEPENDS instead of
 add_dependencies()

add_llvm_library() sometimes needs access to the dependencies in order to
generate new targets.  Using DEPENDS allows this.

Differential Revision: https://reviews.llvm.org/D78321
---
 mlir/lib/Interfaces/CMakeLists.txt | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/mlir/lib/Interfaces/CMakeLists.txt b/mlir/lib/Interfaces/CMakeLists.txt
index a6e053d7674e..fdd36bb3e1fa 100644
--- a/mlir/lib/Interfaces/CMakeLists.txt
+++ b/mlir/lib/Interfaces/CMakeLists.txt
@@ -12,8 +12,8 @@ add_mlir_library(MLIRCallInterfaces
 
   ADDITIONAL_HEADER_DIRS
   ${MLIR_MAIN_INCLUDE_DIR}/mlir/Interfaces
-  )
-add_dependencies(MLIRCallInterfaces
+
+  DEPENDS
   MLIRCallInterfacesIncGen
   )
 target_link_libraries(MLIRCallInterfaces
@@ -26,8 +26,8 @@ add_mlir_library(MLIRControlFlowInterfaces
 
   ADDITIONAL_HEADER_DIRS
   ${MLIR_MAIN_INCLUDE_DIR}/mlir/Interfaces
-  )
-add_dependencies(MLIRControlFlowInterfaces
+
+  DEPENDS
   MLIRControlFlowInterfacesIncGen
   )
 target_link_libraries(MLIRControlFlowInterfaces
@@ -40,8 +40,8 @@ add_mlir_library(MLIRDerivedAttributeOpInterface
 
   ADDITIONAL_HEADER_DIRS
   ${MLIR_MAIN_INCLUDE_DIR}/mlir/Interfaces
-  )
-add_dependencies(MLIRDerivedAttributeOpInterface
+
+  DEPENDS
   MLIRDerivedAttributeOpInterfaceIncGen
   )
 target_link_libraries(MLIRDerivedAttributeOpInterface
@@ -54,8 +54,8 @@ add_mlir_library(MLIRInferTypeOpInterface
 
   ADDITIONAL_HEADER_DIRS
   ${MLIR_MAIN_INCLUDE_DIR}/mlir/Interfaces
-  )
-add_dependencies(MLIRInferTypeOpInterface
+
+  DEPENDS
   MLIRInferTypeOpInterfaceIncGen
   )
 target_link_libraries(MLIRInferTypeOpInterface
@@ -68,8 +68,8 @@ add_mlir_library(MLIRLoopLikeInterface
 
   ADDITIONAL_HEADER_DIRS
   ${MLIR_MAIN_INCLUDE_DIR}/mlir/Interfaces
-  )
-add_dependencies(MLIRLoopLikeInterface
+
+  DEPENDS
   MLIRLoopLikeInterfaceIncGen
   )
 target_link_libraries(MLIRLoopLikeInterface
@@ -82,8 +82,8 @@ add_mlir_library(MLIRSideEffects
 
   ADDITIONAL_HEADER_DIRS
   ${MLIR_MAIN_INCLUDE_DIR}/mlir/Interfaces
-  )
-add_dependencies(MLIRSideEffects
+
+  DEPENDS
   MLIRSideEffectOpInterfacesIncGen
   )
 target_link_libraries(MLIRSideEffects

From b0c4dfb3b160ecde23e668e773d340171880302c Mon Sep 17 00:00:00 2001
From: Sam McCall <sam.mccall@gmail.com>
Date: Fri, 17 Apr 2020 01:00:42 +0200
Subject: [PATCH 105/216] [clangd] Print PID on windows too

---
 clang-tools-extra/clangd/tool/ClangdMain.cpp | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/clang-tools-extra/clangd/tool/ClangdMain.cpp b/clang-tools-extra/clangd/tool/ClangdMain.cpp
index a8d8f95ce805..92e46a662953 100644
--- a/clang-tools-extra/clangd/tool/ClangdMain.cpp
+++ b/clang-tools-extra/clangd/tool/ClangdMain.cpp
@@ -570,10 +570,7 @@ clangd accepts flags on the commandline, and in the CLANGD_FLAGS environment var
   LoggingSession LoggingSession(Logger);
   // Write some initial logs before we start doing any real work.
   log("{0}", clang::getClangToolFullVersion("clangd"));
-// FIXME: abstract this better, and print PID on windows too.
-#ifndef _WIN32
-  log("PID: {0}", getpid());
-#endif
+  log("PID: {0}", llvm::sys::Process::getProcessId());
   {
     SmallString<128> CWD;
     if (auto Err = llvm::sys::fs::current_path(CWD))

From 63725df1d66973f623f41bddcaae7a235465ca81 Mon Sep 17 00:00:00 2001
From: Sam McCall <sam.mccall@gmail.com>
Date: Fri, 17 Apr 2020 01:08:32 +0200
Subject: [PATCH 106/216] [clangd] Remove unused and underused helpers. NFC

---
 clang-tools-extra/clangd/CodeComplete.cpp     |  2 +-
 clang-tools-extra/clangd/SourceCode.cpp       | 25 -------------------
 clang-tools-extra/clangd/SourceCode.h         | 13 ----------
 .../clangd/unittests/SourceCodeTests.cpp      | 15 -----------
 4 files changed, 1 insertion(+), 54 deletions(-)

diff --git a/clang-tools-extra/clangd/CodeComplete.cpp b/clang-tools-extra/clangd/CodeComplete.cpp
index b544510ecea1..7dbb4f5b78a3 100644
--- a/clang-tools-extra/clangd/CodeComplete.cpp
+++ b/clang-tools-extra/clangd/CodeComplete.cpp
@@ -1836,7 +1836,7 @@ CompletionItem CodeCompletion::render(const CodeCompleteOptions &Opts) const {
   // is mainly to help LSP clients again, so that changes do not effect each
   // other.
   for (const auto &FixIt : FixIts) {
-    if (isRangeConsecutive(FixIt.range, LSP.textEdit->range)) {
+    if (FixIt.range.end == LSP.textEdit->range.start) {
       LSP.textEdit->newText = FixIt.newText + LSP.textEdit->newText;
       LSP.textEdit->range.start = FixIt.range.start;
     } else {
diff --git a/clang-tools-extra/clangd/SourceCode.cpp b/clang-tools-extra/clangd/SourceCode.cpp
index a722ae9b0663..1943784bfd18 100644
--- a/clang-tools-extra/clangd/SourceCode.cpp
+++ b/clang-tools-extra/clangd/SourceCode.cpp
@@ -240,26 +240,6 @@ bool isValidFileRange(const SourceManager &Mgr, SourceRange R) {
   return BeginFID.isValid() && BeginFID == EndFID && BeginOffset <= EndOffset;
 }
 
-bool halfOpenRangeContains(const SourceManager &Mgr, SourceRange R,
-                           SourceLocation L) {
-  assert(isValidFileRange(Mgr, R));
-
-  FileID BeginFID;
-  size_t BeginOffset = 0;
-  std::tie(BeginFID, BeginOffset) = Mgr.getDecomposedLoc(R.getBegin());
-  size_t EndOffset = Mgr.getFileOffset(R.getEnd());
-
-  FileID LFid;
-  size_t LOffset;
-  std::tie(LFid, LOffset) = Mgr.getDecomposedLoc(L);
-  return BeginFID == LFid && BeginOffset <= LOffset && LOffset < EndOffset;
-}
-
-bool halfOpenRangeTouches(const SourceManager &Mgr, SourceRange R,
-                          SourceLocation L) {
-  return L == R.getEnd() || halfOpenRangeContains(Mgr, R, L);
-}
-
 SourceLocation includeHashLoc(FileID IncludedFile, const SourceManager &SM) {
   assert(SM.getLocForEndOfFile(IncludedFile).isFileID());
   FileID IncludingFile;
@@ -558,11 +538,6 @@ TextEdit toTextEdit(const FixItHint &FixIt, const SourceManager &M,
   return Result;
 }
 
-bool isRangeConsecutive(const Range &Left, const Range &Right) {
-  return Left.end.line == Right.start.line &&
-         Left.end.character == Right.start.character;
-}
-
 FileDigest digest(llvm::StringRef Content) {
   uint64_t Hash{llvm::xxHash64(Content)};
   FileDigest Result;
diff --git a/clang-tools-extra/clangd/SourceCode.h b/clang-tools-extra/clangd/SourceCode.h
index 8328685022de..dfa685fdd795 100644
--- a/clang-tools-extra/clangd/SourceCode.h
+++ b/clang-tools-extra/clangd/SourceCode.h
@@ -120,17 +120,6 @@ llvm::Optional<SourceRange> toHalfOpenFileRange(const SourceManager &Mgr,
 /// FIXME: introduce a type for source range with this invariant.
 bool isValidFileRange(const SourceManager &Mgr, SourceRange R);
 
-/// Returns true iff \p L is contained in \p R.
-/// EXPECTS: isValidFileRange(R) == true, L is a file location.
-bool halfOpenRangeContains(const SourceManager &Mgr, SourceRange R,
-                           SourceLocation L);
-
-/// Returns true iff \p L is contained in \p R or \p L is equal to the end point
-/// of \p R.
-/// EXPECTS: isValidFileRange(R) == true, L is a file location.
-bool halfOpenRangeTouches(const SourceManager &Mgr, SourceRange R,
-                          SourceLocation L);
-
 /// Returns the source code covered by the source range.
 /// EXPECTS: isValidFileRange(R) == true.
 llvm::StringRef toSourceCode(const SourceManager &SM, SourceRange R);
@@ -171,8 +160,6 @@ TextEdit toTextEdit(const FixItHint &FixIt, const SourceManager &M,
 llvm::Optional<std::string> getCanonicalPath(const FileEntry *F,
                                              const SourceManager &SourceMgr);
 
-bool isRangeConsecutive(const Range &Left, const Range &Right);
-
 /// Choose the clang-format style we should apply to a certain file.
 /// This will usually use FS to look for .clang-format directories.
 /// FIXME: should we be caching the .clang-format file search?
diff --git a/clang-tools-extra/clangd/unittests/SourceCodeTests.cpp b/clang-tools-extra/clangd/unittests/SourceCodeTests.cpp
index 3bc953ad2f3a..76a3a3cac267 100644
--- a/clang-tools-extra/clangd/unittests/SourceCodeTests.cpp
+++ b/clang-tools-extra/clangd/unittests/SourceCodeTests.cpp
@@ -42,13 +42,6 @@ Position position(int Line, int Character) {
   return Pos;
 }
 
-Range range(const std::pair<int, int> &P1, const std::pair<int, int> &P2) {
-  Range Range;
-  Range.start = position(P1.first, P1.second);
-  Range.end = position(P2.first, P2.second);
-  return Range;
-}
-
 TEST(SourceCodeTests, lspLength) {
   EXPECT_EQ(lspLength(""), 0UL);
   EXPECT_EQ(lspLength("ascii"), 5UL);
@@ -273,14 +266,6 @@ TEST(SourceCodeTests, OffsetToPosition) {
   EXPECT_THAT(offsetToPosition(File, 30), Pos(2, 11)) << "out of bounds";
 }
 
-TEST(SourceCodeTests, IsRangeConsecutive) {
-  EXPECT_TRUE(isRangeConsecutive(range({2, 2}, {2, 3}), range({2, 3}, {2, 4})));
-  EXPECT_FALSE(
-      isRangeConsecutive(range({0, 2}, {0, 3}), range({2, 3}, {2, 4})));
-  EXPECT_FALSE(
-      isRangeConsecutive(range({2, 2}, {2, 3}), range({2, 4}, {2, 5})));
-}
-
 TEST(SourceCodeTests, SourceLocationInMainFile) {
   Annotations Source(R"cpp(
     ^in^t ^foo

From 80022ae2b58e7c2d51e7705d39a8ab546ac7de85 Mon Sep 17 00:00:00 2001
From: Julian Lettner <julian.lettner@apple.com>
Date: Tue, 14 Apr 2020 12:48:58 -0700
Subject: [PATCH 107/216] [UBSan] Fix vptr checks on arm64e

Fix UBSan's vptr checks in the presence of arm64e pointer signing.

Radar-Id: rdar://61786404

Reviewed By: vsk

Differential Revision: https://reviews.llvm.org/D78230
---
 .../lib/ubsan/ubsan_type_hash_itanium.cpp     |  7 +++++
 .../vptr-corrupted-vtable-itanium.cpp         | 27 +++++++++++-----
 .../vptr-ptrauth-unauthenticated.cpp          | 31 +++++++++++++++++++
 3 files changed, 57 insertions(+), 8 deletions(-)
 create mode 100644 compiler-rt/test/ubsan/TestCases/TypeCheck/vptr-ptrauth-unauthenticated.cpp

diff --git a/compiler-rt/lib/ubsan/ubsan_type_hash_itanium.cpp b/compiler-rt/lib/ubsan/ubsan_type_hash_itanium.cpp
index 97846d4dd434..465aa6476ad0 100644
--- a/compiler-rt/lib/ubsan/ubsan_type_hash_itanium.cpp
+++ b/compiler-rt/lib/ubsan/ubsan_type_hash_itanium.cpp
@@ -17,6 +17,10 @@
 
 #include "sanitizer_common/sanitizer_common.h"
 
+#if __has_feature(ptrauth_calls)
+#include <ptrauth.h>
+#endif
+
 // The following are intended to be binary compatible with the definitions
 // given in the Itanium ABI. We make no attempt to be ODR-compatible with
 // those definitions, since existing ABI implementations aren't.
@@ -194,6 +198,9 @@ struct VtablePrefix {
   std::type_info *TypeInfo;
 };
 VtablePrefix *getVtablePrefix(void *Vtable) {
+#if __has_feature(ptrauth_calls)
+  Vtable = ptrauth_auth_data(Vtable, ptrauth_key_cxx_vtable_pointer, 0);
+#endif
   VtablePrefix *Vptr = reinterpret_cast<VtablePrefix*>(Vtable);
   VtablePrefix *Prefix = Vptr - 1;
   if (!IsAccessibleMemoryRange((uptr)Prefix, sizeof(VtablePrefix)))
diff --git a/compiler-rt/test/ubsan/TestCases/TypeCheck/vptr-corrupted-vtable-itanium.cpp b/compiler-rt/test/ubsan/TestCases/TypeCheck/vptr-corrupted-vtable-itanium.cpp
index 0e3d0d8592ab..ef8f22e311e5 100644
--- a/compiler-rt/test/ubsan/TestCases/TypeCheck/vptr-corrupted-vtable-itanium.cpp
+++ b/compiler-rt/test/ubsan/TestCases/TypeCheck/vptr-corrupted-vtable-itanium.cpp
@@ -1,12 +1,15 @@
-// RUN: %clangxx -frtti -fsanitize=vptr -fno-sanitize-recover=vptr,null -g %s -O3 -o %t
+// RUN: %clangxx -frtti -fsanitize=vptr -fno-sanitize-recover=vptr -g %s -O3 -o %t
 // RUN: not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-CORRUPTED-VTABLE --strict-whitespace
 
 // UNSUPPORTED: windows-msvc
 // REQUIRES: stable-runtime, cxxabi
-#include <cstddef>
 
 #include <typeinfo>
 
+#if __has_feature(ptrauth_calls)
+#include <ptrauth.h>
+#endif
+
 struct S {
   S() {}
   ~S() {}
@@ -24,15 +27,23 @@ int main(int argc, char **argv) {
   // offset is too large or too small.
   S Obj;
   void *Ptr = &Obj;
-  VtablePrefix* RealPrefix = reinterpret_cast<VtablePrefix*>(
-      *reinterpret_cast<void**>(Ptr)) - 1;
+  void *VtablePtr = *reinterpret_cast<void**>(Ptr);
+#if __has_feature(ptrauth_calls)
+  VtablePtr = ptrauth_strip(VtablePtr, 0);
+#endif
+  VtablePrefix* Prefix = reinterpret_cast<VtablePrefix*>(VtablePtr) - 1;
 
-  VtablePrefix Prefix[2];
-  Prefix[0].Offset = 1<<21; // Greater than VptrMaxOffset
-  Prefix[0].TypeInfo = RealPrefix->TypeInfo;
+  VtablePrefix FakePrefix[2];
+  FakePrefix[0].Offset = 1<<21; // Greater than VptrMaxOffset
+  FakePrefix[0].TypeInfo = Prefix->TypeInfo;
 
   // Hack Vtable ptr for Obj.
-  *reinterpret_cast<void**>(Ptr) = static_cast<void*>(&Prefix[1]);
+  void *FakeVtablePtr = static_cast<void*>(&FakePrefix[1]);
+#if __has_feature(ptrauth_calls)
+  FakeVtablePtr = ptrauth_sign_unauthenticated(
+      FakeVtablePtr, ptrauth_key_cxx_vtable_pointer, 0);
+#endif
+  *reinterpret_cast<void**>(Ptr) = FakeVtablePtr;
 
   // CHECK-CORRUPTED-VTABLE: vptr-corrupted-vtable-itanium.cpp:[[@LINE+3]]:16: runtime error: member call on address [[PTR:0x[0-9a-f]*]] which does not point to an object of type 'S'
   // CHECK-CORRUPTED-VTABLE-NEXT: [[PTR]]: note: object has a possibly invalid vptr: abs(offset to top) too big
diff --git a/compiler-rt/test/ubsan/TestCases/TypeCheck/vptr-ptrauth-unauthenticated.cpp b/compiler-rt/test/ubsan/TestCases/TypeCheck/vptr-ptrauth-unauthenticated.cpp
new file mode 100644
index 000000000000..6061cfc457fc
--- /dev/null
+++ b/compiler-rt/test/ubsan/TestCases/TypeCheck/vptr-ptrauth-unauthenticated.cpp
@@ -0,0 +1,31 @@
+// Test that we don't crash for vtable pointers with an invalid ptrauth
+// signature which includes unauthenticated vtable pointers.
+
+// RUN: %clangxx -frtti -fsanitize=vptr -fno-sanitize-recover=vptr -g %s -O3 -o %t
+// RUN: not %run %t 2>&1 | FileCheck %s
+
+// TODO(yln): introduce 'ptrauth' lit feature
+// REQUIRES: stable-runtime, cxxabi, arch=arm64e
+
+#include <typeinfo>
+#include <ptrauth.h>
+
+struct S {
+  S() {}
+  ~S() {}
+  virtual int v() { return 0; }
+};
+
+int main(int argc, char **argv) {
+  S Obj;
+  void *Ptr = &Obj;
+  void **VtablePtrPtr = reinterpret_cast<void **>(&Obj);
+  // Hack Obj: the unauthenticated Vtable ptr will trigger an auth failure in the runtime.
+  void *UnauthenticatedVtablePtr = ptrauth_strip(*VtablePtrPtr, 0);
+  *VtablePtrPtr = UnauthenticatedVtablePtr;
+
+  // CHECK: vptr-ptrauth-unauthenticated.cpp:[[@LINE+3]]:16: runtime error: member call on address [[PTR:0x[0-9a-f]*]] which does not point to an object of type 'S'
+  // CHECK: [[PTR]]: note: object has invalid vptr
+  S *Ptr2 = reinterpret_cast<S *>(Ptr);
+  return Ptr2->v();
+}

From 9a709dd2bb452883e1f1cf626d60c3f03801a9f3 Mon Sep 17 00:00:00 2001
From: Richard Smith <richard@metafoo.co.uk>
Date: Thu, 23 Jan 2020 16:25:30 -0800
Subject: [PATCH 108/216] llvm-addr2line: assume addresses on the command line
 are hexadecimal rather than attempting to guess the base based on the form of
 the number.

Summary:
This matches the behavior of GNU addr2line. We previously treated
hexadecimal addresses as binary if they started with 0b, otherwise as
octal if they started with 0, otherwise as decimal.

This only affects llvm-addr2line; the behavior of llvm-symbolize is
unaffected.

Reviewers: ikudrin, rupprecht, jhenderson

Subscribers: llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D73306
---
 llvm/docs/CommandGuide/llvm-addr2line.rst     |  5 +++
 llvm/test/lit.cfg.py                          |  8 ++---
 .../tools/llvm-symbolizer/input-base.test     | 33 +++++++++++++++++++
 .../tools/llvm-symbolizer/llvm-symbolizer.cpp | 20 +++++++----
 4 files changed, 55 insertions(+), 11 deletions(-)
 create mode 100644 llvm/test/tools/llvm-symbolizer/input-base.test

diff --git a/llvm/docs/CommandGuide/llvm-addr2line.rst b/llvm/docs/CommandGuide/llvm-addr2line.rst
index 27b0fd9a947b..646a159cd24a 100644
--- a/llvm/docs/CommandGuide/llvm-addr2line.rst
+++ b/llvm/docs/CommandGuide/llvm-addr2line.rst
@@ -17,6 +17,11 @@ GNU's :program:`addr2line`.
 
 Here are some of those differences:
 
+-  ``llvm-addr2line`` interprets all addresses as hexadecimal and ignores an
+   optional ``0x`` prefix, whereas ``llvm-symbolizer`` attempts to determine
+   the base from the literal's prefix and defaults to decimal if there is no
+   prefix.
+
 -  ``llvm-addr2line`` defaults not to print function names. Use `-f`_ to enable
    that.
 
diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index d41798248072..49d345566b65 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -146,10 +146,10 @@ def get_asan_rtlib():
 # FIXME: Why do we have both `lli` and `%lli` that do slightly different things?
 tools.extend([
     'dsymutil', 'lli', 'lli-child-target', 'llvm-ar', 'llvm-as',
-    'llvm-bcanalyzer', 'llvm-config', 'llvm-cov', 'llvm-cxxdump', 'llvm-cvtres',
-    'llvm-diff', 'llvm-dis', 'llvm-dwarfdump', 'llvm-exegesis', 'llvm-extract',
-    'llvm-isel-fuzzer', 'llvm-ifs', 'llvm-install-name-tool',
-    'llvm-jitlink', 'llvm-opt-fuzzer', 'llvm-lib',
+    'llvm-addr2line', 'llvm-bcanalyzer', 'llvm-config', 'llvm-cov',
+    'llvm-cxxdump', 'llvm-cvtres', 'llvm-diff', 'llvm-dis', 'llvm-dwarfdump',
+    'llvm-exegesis', 'llvm-extract', 'llvm-isel-fuzzer', 'llvm-ifs',
+    'llvm-install-name-tool', 'llvm-jitlink', 'llvm-opt-fuzzer', 'llvm-lib',
     'llvm-link', 'llvm-lto', 'llvm-lto2', 'llvm-mc', 'llvm-mca',
     'llvm-modextract', 'llvm-nm', 'llvm-objcopy', 'llvm-objdump',
     'llvm-pdbutil', 'llvm-profdata', 'llvm-ranlib', 'llvm-rc', 'llvm-readelf',
diff --git a/llvm/test/tools/llvm-symbolizer/input-base.test b/llvm/test/tools/llvm-symbolizer/input-base.test
new file mode 100644
index 000000000000..66244a7203c0
--- /dev/null
+++ b/llvm/test/tools/llvm-symbolizer/input-base.test
@@ -0,0 +1,33 @@
+# llvm-symbolizer infers the number base from the form of the address.
+RUN: llvm-symbolizer -e /dev/null -a 0x1234 | FileCheck %s
+RUN: llvm-symbolizer -e /dev/null -a 0X1234 | FileCheck %s
+RUN: llvm-symbolizer -e /dev/null -a 4660 | FileCheck %s
+RUN: llvm-symbolizer -e /dev/null -a 011064 | FileCheck %s
+RUN: llvm-symbolizer -e /dev/null -a 0b1001000110100 | FileCheck %s
+RUN: llvm-symbolizer -e /dev/null -a 0B1001000110100 | FileCheck %s
+RUN: llvm-symbolizer -e /dev/null -a 0o11064 | FileCheck %s
+
+# llvm-symbolizer / StringRef::getAsInteger only accepts the 0o prefix in lowercase.
+RUN: llvm-symbolizer -e /dev/null -a 0O1234 | FileCheck %s --check-prefix=INVALID-NOT-OCTAL-UPPER
+
+# llvm-addr2line always requires hexadecimal, but accepts an optional 0x prefix.
+RUN: llvm-addr2line -e /dev/null -a 0x1234 | FileCheck %s
+RUN: llvm-addr2line -e /dev/null -a 0X1234 | FileCheck %s
+RUN: llvm-addr2line -e /dev/null -a 1234 | FileCheck %s
+RUN: llvm-addr2line -e /dev/null -a 01234 | FileCheck %s
+RUN: llvm-addr2line -e /dev/null -a 0b1010 | FileCheck %s --check-prefix=HEXADECIMAL-NOT-BINARY
+RUN: llvm-addr2line -e /dev/null -a 0B1010 | FileCheck %s --check-prefix=HEXADECIMAL-NOT-BINARY
+RUN: llvm-addr2line -e /dev/null -a 0o1234 | FileCheck %s --check-prefix=INVALID-NOT-OCTAL-LOWER
+RUN: llvm-addr2line -e /dev/null -a 0O1234 | FileCheck %s --check-prefix=INVALID-NOT-OCTAL-UPPER
+
+CHECK: 0x1234
+CHECK-NEXT: ??
+
+HEXADECIMAL-NOT-BINARY: 0xb1010
+HEXADECIMAL-NOT-BINARY: ??
+
+INVALID-NOT-OCTAL-LOWER: 0o1234
+INVALID-NOT-OCTAL-LOWER-NOT: ??
+
+INVALID-NOT-OCTAL-UPPER: 0O1234
+INVALID-NOT-OCTAL-UPPER-NOT: ??
diff --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
index 8d46bd2cb627..bb282a52b331 100644
--- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
+++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
@@ -181,7 +181,7 @@ enum class Command {
   Frame,
 };
 
-static bool parseCommand(StringRef InputString, Command &Cmd,
+static bool parseCommand(bool IsAddr2Line, StringRef InputString, Command &Cmd,
                          std::string &ModuleName, uint64_t &ModuleOffset) {
   const char kDelimiters[] = " \n\r";
   ModuleName = "";
@@ -218,15 +218,21 @@ static bool parseCommand(StringRef InputString, Command &Cmd,
   // Skip delimiters and parse module offset.
   Pos += strspn(Pos, kDelimiters);
   int OffsetLength = strcspn(Pos, kDelimiters);
-  return !StringRef(Pos, OffsetLength).getAsInteger(0, ModuleOffset);
+  StringRef Offset(Pos, OffsetLength);
+  // GNU addr2line assumes the offset is hexadecimal and allows a redundant
+  // "0x" or "0X" prefix; do the same for compatibility.
+  if (IsAddr2Line)
+    Offset.consume_front("0x") || Offset.consume_front("0X");
+  return !Offset.getAsInteger(IsAddr2Line ? 16 : 0, ModuleOffset);
 }
 
-static void symbolizeInput(StringRef InputString, LLVMSymbolizer &Symbolizer,
-                           DIPrinter &Printer) {
+static void symbolizeInput(bool IsAddr2Line, StringRef InputString,
+                           LLVMSymbolizer &Symbolizer, DIPrinter &Printer) {
   Command Cmd;
   std::string ModuleName;
   uint64_t Offset = 0;
-  if (!parseCommand(StringRef(InputString), Cmd, ModuleName, Offset)) {
+  if (!parseCommand(IsAddr2Line, StringRef(InputString), Cmd, ModuleName,
+                    Offset)) {
     outs() << InputString << "\n";
     return;
   }
@@ -340,12 +346,12 @@ int main(int argc, char **argv) {
           std::remove_if(StrippedInputString.begin(), StrippedInputString.end(),
                          [](char c) { return c == '\r' || c == '\n'; }),
           StrippedInputString.end());
-      symbolizeInput(StrippedInputString, Symbolizer, Printer);
+      symbolizeInput(IsAddr2Line, StrippedInputString, Symbolizer, Printer);
       outs().flush();
     }
   } else {
     for (StringRef Address : ClInputAddresses)
-      symbolizeInput(Address, Symbolizer, Printer);
+      symbolizeInput(IsAddr2Line, Address, Symbolizer, Printer);
   }
 
   return 0;

From 386f1c114d5952c13760cb3368d41d09d8ba099c Mon Sep 17 00:00:00 2001
From: Lang Hames <lhames@gmail.com>
Date: Thu, 16 Apr 2020 15:53:00 -0700
Subject: [PATCH 109/216] [Object] Remove conditional layout of bitfields in
 MachO::relocation_info.

This removes the conditional layout of relocation_info bitfields that was
introduced in 3ccd677bf (svn r358839). The platform relocation_info
struct (defined in usr/include/mach-o/reloc.h) does not define the layout of
this struct differently on big-endian platforms and we want to keep the LLVM
and platform definitions in sync.

To fix the bug that 3ccd677bf addressed this patch modifies JITLink to construct
its relocation_info structs from the raw relocation words using shift and mask
operations.
---
 llvm/include/llvm/BinaryFormat/MachO.h             |  5 -----
 .../JITLink/MachOLinkGraphBuilder.h                | 14 ++++++++++++++
 llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp   |  9 ---------
 llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp  |  9 ---------
 4 files changed, 14 insertions(+), 23 deletions(-)

diff --git a/llvm/include/llvm/BinaryFormat/MachO.h b/llvm/include/llvm/BinaryFormat/MachO.h
index 53959182e987..0010f36e8b89 100644
--- a/llvm/include/llvm/BinaryFormat/MachO.h
+++ b/llvm/include/llvm/BinaryFormat/MachO.h
@@ -952,13 +952,8 @@ struct fat_arch_64 {
 // Structs from <mach-o/reloc.h>
 struct relocation_info {
   int32_t r_address;
-#if defined(BYTE_ORDER) && defined(BIG_ENDIAN) && (BYTE_ORDER == BIG_ENDIAN)
-  uint32_t r_type : 4,  r_extern : 1, r_length : 2, r_pcrel : 1,
-      r_symbolnum : 24;
-#else
   uint32_t r_symbolnum : 24, r_pcrel : 1, r_length : 2, r_extern : 1,
       r_type : 4;
-#endif
 };
 
 struct scattered_relocation_info {
diff --git a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h
index 06d4787fc43c..ffe010054ff5 100644
--- a/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h
+++ b/llvm/lib/ExecutionEngine/JITLink/MachOLinkGraphBuilder.h
@@ -153,6 +153,20 @@ class MachOLinkGraphBuilder {
   static Scope getScope(StringRef Name, uint8_t Type);
   static bool isAltEntry(const NormalizedSymbol &NSym);
 
+  MachO::relocation_info
+  getRelocationInfo(const object::relocation_iterator RelItr) {
+    MachO::any_relocation_info ARI =
+        getObject().getRelocation(RelItr->getRawDataRefImpl());
+    MachO::relocation_info RI;
+    RI.r_address = ARI.r_word0;
+    RI.r_symbolnum = ARI.r_word1 & 0xffffff;
+    RI.r_pcrel = (ARI.r_word1 >> 24) & 1;
+    RI.r_length = (ARI.r_word1 >> 25) & 3;
+    RI.r_extern = (ARI.r_word1 >> 27) & 1;
+    RI.r_type = (ARI.r_word1 >> 28);
+    return RI;
+  }
+
 private:
   static unsigned getPointerSize(const object::MachOObjectFile &Obj);
   static support::endianness getEndianness(const object::MachOObjectFile &Obj);
diff --git a/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp b/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp
index cc3940393bfc..55c7b36142a0 100644
--- a/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp
@@ -92,15 +92,6 @@ class MachOLinkGraphBuilder_arm64 : public MachOLinkGraphBuilder {
         ", length=" + formatv("{0:d}", RI.r_length));
   }
 
-  MachO::relocation_info
-  getRelocationInfo(const object::relocation_iterator RelItr) {
-    MachO::any_relocation_info ARI =
-        getObject().getRelocation(RelItr->getRawDataRefImpl());
-    MachO::relocation_info RI;
-    memcpy(&RI, &ARI, sizeof(MachO::relocation_info));
-    return RI;
-  }
-
   using PairRelocInfo =
       std::tuple<MachOARM64RelocationKind, Symbol *, uint64_t>;
 
diff --git a/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
index 05e323b18d80..9abeddfb1920 100644
--- a/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
@@ -95,15 +95,6 @@ class MachOLinkGraphBuilder_x86_64 : public MachOLinkGraphBuilder {
         ", length=" + formatv("{0:d}", RI.r_length));
   }
 
-  MachO::relocation_info
-  getRelocationInfo(const object::relocation_iterator RelItr) {
-    MachO::any_relocation_info ARI =
-        getObject().getRelocation(RelItr->getRawDataRefImpl());
-    MachO::relocation_info RI;
-    memcpy(&RI, &ARI, sizeof(MachO::relocation_info));
-    return RI;
-  }
-
   using PairRelocInfo = std::tuple<MachOX86RelocationKind, Symbol *, uint64_t>;
 
   // Parses paired SUBTRACTOR/UNSIGNED relocations and, on success,

From 798b262c3c9d4a8603dc6c6bbbe0a7ffb82eadbc Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Thu, 16 Apr 2020 16:23:03 -0700
Subject: [PATCH 110/216] [CallSite removal][IPO] Change implementation of
 AbstractCallSite to store a CallBase* instead of CallSite. NFCI.

CallSite will likely be removed soon, but AbstractCallSite serves a different purpose and won't be going away.

This patch switches it to internally store a CallBase* instead of a
CallSite. The only interface changes are the removal of the getCallSite
method and getCallBackUses now takes a CallBase&. These methods had only
a few callers that were easy enough to update without needing a
compatibility shim.

In the future once the other CallSites are gone, the CallSite.h
header should be renamed to AbstractCallSite.h

Differential Revision: https://reviews.llvm.org/D78322
---
 llvm/include/llvm/IR/CallSite.h               | 33 ++++++++---------
 llvm/lib/IR/AbstractCallSite.cpp              | 37 ++++++++++---------
 llvm/lib/Transforms/IPO/Attributor.cpp        | 12 +++---
 .../Transforms/IPO/AttributorAttributes.cpp   | 11 +++---
 4 files changed, 46 insertions(+), 47 deletions(-)

diff --git a/llvm/include/llvm/IR/CallSite.h b/llvm/include/llvm/IR/CallSite.h
index 41833d118c95..d3d644e44834 100644
--- a/llvm/include/llvm/IR/CallSite.h
+++ b/llvm/include/llvm/IR/CallSite.h
@@ -757,7 +757,7 @@ class AbstractCallSite {
     /// For direct/indirect calls the parameter encoding is empty. If it is not,
     /// the abstract call site represents a callback. In that case, the first
     /// element of the encoding vector represents which argument of the call
-    /// site CS is the callback callee. The remaining elements map parameters
+    /// site CB is the callback callee. The remaining elements map parameters
     /// (identified by their position) to the arguments that will be passed
     /// through (also identified by position but in the call site instruction).
     ///
@@ -774,7 +774,7 @@ class AbstractCallSite {
   /// The underlying call site:
   ///   caller -> callee,             if this is a direct or indirect call site
   ///   caller -> broker function,    if this is a callback call site
-  CallSite CS;
+  CallBase *CB;
 
   /// The encoding of a callback with regards to the underlying instruction.
   CallbackInfo CI;
@@ -802,26 +802,23 @@ class AbstractCallSite {
   ///
   /// All uses added to \p CBUses can be used to create abstract call sites for
   /// which AbstractCallSite::isCallbackCall() will return true.
-  static void getCallbackUses(ImmutableCallSite ICS,
-                              SmallVectorImpl<const Use *> &CBUses);
+  static void getCallbackUses(const CallBase &CB,
+                              SmallVectorImpl<const Use *> &CallbackUses);
 
   /// Conversion operator to conveniently check for a valid/initialized ACS.
-  explicit operator bool() const { return (bool)CS; }
+  explicit operator bool() const { return CB != nullptr; }
 
   /// Return the underlying instruction.
-  Instruction *getInstruction() const { return CS.getInstruction(); }
-
-  /// Return the call site abstraction for the underlying instruction.
-  CallSite getCallSite() const { return CS; }
+  CallBase *getInstruction() const { return CB; }
 
   /// Return true if this ACS represents a direct call.
   bool isDirectCall() const {
-    return !isCallbackCall() && !CS.isIndirectCall();
+    return !isCallbackCall() && !CB->isIndirectCall();
   }
 
   /// Return true if this ACS represents an indirect call.
   bool isIndirectCall() const {
-    return !isCallbackCall() && CS.isIndirectCall();
+    return !isCallbackCall() && CB->isIndirectCall();
   }
 
   /// Return true if this ACS represents a callback call.
@@ -839,18 +836,18 @@ class AbstractCallSite {
   /// Return true if @p U is the use that defines the callee of this ACS.
   bool isCallee(const Use *U) const {
     if (isDirectCall())
-      return CS.isCallee(U);
+      return CB->isCallee(U);
 
     assert(!CI.ParameterEncoding.empty() &&
            "Callback without parameter encoding!");
 
-    return (int)CS.getArgumentNo(U) == CI.ParameterEncoding[0];
+    return (int)CB->getArgOperandNo(U) == CI.ParameterEncoding[0];
   }
 
   /// Return the number of parameters of the callee.
   unsigned getNumArgOperands() const {
     if (isDirectCall())
-      return CS.getNumArgOperands();
+      return CB->getNumArgOperands();
     // Subtract 1 for the callee encoding.
     return CI.ParameterEncoding.size() - 1;
   }
@@ -879,10 +876,10 @@ class AbstractCallSite {
   /// function parameter number @p ArgNo or nullptr if there is none.
   Value *getCallArgOperand(unsigned ArgNo) const {
     if (isDirectCall())
-      return CS.getArgOperand(ArgNo);
+      return CB->getArgOperand(ArgNo);
     // Add 1 for the callee encoding.
     return CI.ParameterEncoding[ArgNo + 1] >= 0
-               ? CS.getArgOperand(CI.ParameterEncoding[ArgNo + 1])
+               ? CB->getArgOperand(CI.ParameterEncoding[ArgNo + 1])
                : nullptr;
   }
 
@@ -906,8 +903,8 @@ class AbstractCallSite {
   /// Return the pointer to function that is being called.
   Value *getCalledValue() const {
     if (isDirectCall())
-      return CS.getCalledValue();
-    return CS.getArgOperand(getCallArgOperandNoForCallee());
+      return CB->getCalledValue();
+    return CB->getArgOperand(getCallArgOperandNoForCallee());
   }
 
   /// Return the function being called if this is a direct call, otherwise
diff --git a/llvm/lib/IR/AbstractCallSite.cpp b/llvm/lib/IR/AbstractCallSite.cpp
index e415c48fefe5..0354e7d5305c 100644
--- a/llvm/lib/IR/AbstractCallSite.cpp
+++ b/llvm/lib/IR/AbstractCallSite.cpp
@@ -33,9 +33,9 @@ STATISTIC(NumInvalidAbstractCallSitesUnknownCallee,
 STATISTIC(NumInvalidAbstractCallSitesNoCallback,
           "Number of invalid abstract call sites created (no callback)");
 
-void AbstractCallSite::getCallbackUses(ImmutableCallSite ICS,
-                                       SmallVectorImpl<const Use *> &CBUses) {
-  const Function *Callee = ICS.getCalledFunction();
+void AbstractCallSite::getCallbackUses(const CallBase &CB,
+                                       SmallVectorImpl<const Use *> &CallbackUses) {
+  const Function *Callee = CB.getCalledFunction();
   if (!Callee)
     return;
 
@@ -48,57 +48,58 @@ void AbstractCallSite::getCallbackUses(ImmutableCallSite ICS,
     auto *CBCalleeIdxAsCM = cast<ConstantAsMetadata>(OpMD->getOperand(0));
     uint64_t CBCalleeIdx =
         cast<ConstantInt>(CBCalleeIdxAsCM->getValue())->getZExtValue();
-    if (CBCalleeIdx < ICS.arg_size())
-      CBUses.push_back(ICS.arg_begin() + CBCalleeIdx);
+    if (CBCalleeIdx < CB.arg_size())
+      CallbackUses.push_back(CB.arg_begin() + CBCalleeIdx);
   }
 }
 
 /// Create an abstract call site from a use.
-AbstractCallSite::AbstractCallSite(const Use *U) : CS(U->getUser()) {
+AbstractCallSite::AbstractCallSite(const Use *U)
+    : CB(dyn_cast<CallBase>(U->getUser())) {
 
   // First handle unknown users.
-  if (!CS) {
+  if (!CB) {
 
     // If the use is actually in a constant cast expression which itself
     // has only one use, we look through the constant cast expression.
     // This happens by updating the use @p U to the use of the constant
-    // cast expression and afterwards re-initializing CS accordingly.
+    // cast expression and afterwards re-initializing CB accordingly.
     if (ConstantExpr *CE = dyn_cast<ConstantExpr>(U->getUser()))
       if (CE->getNumUses() == 1 && CE->isCast()) {
         U = &*CE->use_begin();
-        CS = CallSite(U->getUser());
+        CB = dyn_cast<CallBase>(U->getUser());
       }
 
-    if (!CS) {
+    if (!CB) {
       NumInvalidAbstractCallSitesUnknownUse++;
       return;
     }
   }
 
   // Then handle direct or indirect calls. Thus, if U is the callee of the
-  // call site CS it is not a callback and we are done.
-  if (CS.isCallee(U)) {
+  // call site CB it is not a callback and we are done.
+  if (CB->isCallee(U)) {
     NumDirectAbstractCallSites++;
     return;
   }
 
   // If we cannot identify the broker function we cannot create a callback and
   // invalidate the abstract call site.
-  Function *Callee = CS.getCalledFunction();
+  Function *Callee = CB->getCalledFunction();
   if (!Callee) {
     NumInvalidAbstractCallSitesUnknownCallee++;
-    CS = CallSite();
+    CB = nullptr;
     return;
   }
 
   MDNode *CallbackMD = Callee->getMetadata(LLVMContext::MD_callback);
   if (!CallbackMD) {
     NumInvalidAbstractCallSitesNoCallback++;
-    CS = CallSite();
+    CB = nullptr;
     return;
   }
 
-  unsigned UseIdx = CS.getArgumentNo(U);
+  unsigned UseIdx = CB->getArgOperandNo(U);
   MDNode *CallbackEncMD = nullptr;
   for (const MDOperand &Op : CallbackMD->operands()) {
     MDNode *OpMD = cast<MDNode>(Op.get());
@@ -113,7 +114,7 @@ AbstractCallSite::AbstractCallSite(const Use *U) : CS(U->getUser()) {
 
   if (!CallbackEncMD) {
     NumInvalidAbstractCallSitesNoCallback++;
-    CS = CallSite();
+    CB = nullptr;
     return;
   }
 
@@ -121,7 +122,7 @@ AbstractCallSite::AbstractCallSite(const Use *U) : CS(U->getUser()) {
 
   assert(CallbackEncMD->getNumOperands() >= 2 && "Incomplete !callback metadata");
 
-  unsigned NumCallOperands = CS.getNumArgOperands();
+  unsigned NumCallOperands = CB->getNumArgOperands();
   // Skip the var-arg flag at the end when reading the metadata.
   for (unsigned u = 0, e = CallbackEncMD->getNumOperands() - 1; u < e; u++) {
     Metadata *OpAsM = CallbackEncMD->getOperand(u).get();
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index 365f9bfd6542..da1cbcc90be5 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -151,10 +151,10 @@ Argument *IRPosition::getAssociatedArgument() const {
   // of the underlying call site operand, we want the corresponding callback
   // callee argument and not the direct callee argument.
   Optional<Argument *> CBCandidateArg;
-  SmallVector<const Use *, 4> CBUses;
-  ImmutableCallSite ICS(&getAnchorValue());
-  AbstractCallSite::getCallbackUses(ICS, CBUses);
-  for (const Use *U : CBUses) {
+  SmallVector<const Use *, 4> CallbackUses;
+  const auto &CB = cast<CallBase>(getAnchorValue());
+  AbstractCallSite::getCallbackUses(CB, CallbackUses);
+  for (const Use *U : CallbackUses) {
     AbstractCallSite ACS(U);
     assert(ACS && ACS.isCallbackCall());
     if (!ACS.getCalledFunction())
@@ -183,7 +183,7 @@ Argument *IRPosition::getAssociatedArgument() const {
 
   // If no callbacks were found, or none used the underlying call site operand
   // exclusively, use the direct callee argument if available.
-  const Function *Callee = ICS.getCalledFunction();
+  const Function *Callee = CB.getCalledFunction();
   if (Callee && Callee->arg_size() > unsigned(ArgNo))
     return Callee->getArg(ArgNo);
 
@@ -1328,7 +1328,7 @@ bool Attributor::isValidFunctionSignatureRewrite(
 
   auto CallSiteCanBeChanged = [](AbstractCallSite ACS) {
     // Forbid must-tail calls for now.
-    return !ACS.isCallbackCall() && !ACS.getCallSite().isMustTailCall();
+    return !ACS.isCallbackCall() && !ACS.getInstruction()->isMustTailCall();
   };
 
   Function *Fn = Arg.getParent();
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 21cb9ed9d9aa..9ea314f06888 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -4993,9 +4993,10 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
     // Helper to check if for the given call site the associated argument is
     // passed to a callback where the privatization would be different.
     auto IsCompatiblePrivArgOfCallback = [&](CallSite CS) {
-      SmallVector<const Use *, 4> CBUses;
-      AbstractCallSite::getCallbackUses(CS, CBUses);
-      for (const Use *U : CBUses) {
+      SmallVector<const Use *, 4> CallbackUses;
+      AbstractCallSite::getCallbackUses(cast<CallBase>(*CS.getInstruction()),
+                                        CallbackUses);
+      for (const Use *U : CallbackUses) {
         AbstractCallSite CBACS(U);
         assert(CBACS && CBACS.isCallbackCall());
         for (Argument &CBArg : CBACS.getCalledFunction()->args()) {
@@ -5081,7 +5082,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
                << Arg->getParent()->getName()
                << ")\n[AAPrivatizablePtr] because it is an argument in a "
                   "direct call of ("
-               << ACS.getCallSite().getCalledFunction()->getName()
+               << ACS.getInstruction()->getCalledFunction()->getName()
                << ").\n[AAPrivatizablePtr] for which the argument "
                   "privatization is not compatible.\n";
       });
@@ -5093,7 +5094,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
     // here.
     auto IsCompatiblePrivArgOfOtherCallSite = [&](AbstractCallSite ACS) {
       if (ACS.isDirectCall())
-        return IsCompatiblePrivArgOfCallback(ACS.getCallSite());
+        return IsCompatiblePrivArgOfCallback(CallSite(ACS.getInstruction()));
       if (ACS.isCallbackCall())
         return IsCompatiblePrivArgOfDirectCS(ACS);
       return false;

From 48139ebc3a1adee2efa0e6a72d6058e8e3712059 Mon Sep 17 00:00:00 2001
From: Wouter van Oortmerssen <aardappel@gmail.com>
Date: Thu, 19 Mar 2020 19:53:51 -0700
Subject: [PATCH 111/216] [WebAssembly] Add int32 DW_OP_WASM_location variant

This to allow us to add reloctable global indices as a symbol.
Also adds R_WASM_GLOBAL_INDEX_I32 relocation type to support it.

See discussion in https://github.com/WebAssembly/debugging/issues/12
---
 lld/test/wasm/debuginfo.test                  |  6 +-
 lld/wasm/InputChunks.cpp                      |  5 +-
 lld/wasm/InputFiles.cpp                       |  2 +
 lld/wasm/Relocations.cpp                      |  1 +
 llvm/include/llvm/BinaryFormat/Dwarf.def      |  1 +
 llvm/include/llvm/BinaryFormat/WasmRelocs.def |  1 +
 .../llvm/DebugInfo/DWARF/DWARFExpression.h    |  1 +
 .../CodeGen/AsmPrinter/DwarfCompileUnit.cpp   | 39 ++++++++++---
 llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp    |  2 +-
 .../CodeGen/AsmPrinter/DwarfExpression.cpp    |  4 +-
 llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h |  2 +-
 llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp  | 24 +++++++-
 llvm/lib/MC/WasmObjectWriter.cpp              |  6 +-
 llvm/lib/Object/RelocationResolver.cpp        |  2 +
 llvm/lib/Object/WasmObjectFile.cpp            |  8 ++-
 llvm/lib/ObjectYAML/WasmEmitter.cpp           |  7 +--
 .../WebAssemblyWasmObjectWriter.cpp           |  2 +
 llvm/lib/Target/WebAssembly/WebAssembly.h     | 11 +++-
 .../WebAssemblyDebugValueManager.cpp          |  2 +-
 .../WebAssembly/WebAssemblyFrameLowering.cpp  |  5 +-
 .../WebAssembly/WebAssemblyInstrInfo.cpp      |  7 ++-
 .../test/CodeGen/WebAssembly/debugtest-opt.ll |  2 +-
 .../WebAssembly/dbg-value-dwarfdump.ll        |  6 +-
 llvm/test/MC/WebAssembly/debug-info.ll        | 57 +++++++++++--------
 llvm/test/MC/WebAssembly/debug-localvar.ll    |  2 +-
 llvm/test/MC/WebAssembly/dwarfdump.ll         |  6 +-
 llvm/tools/llvm-readobj/WasmDumper.cpp        | 14 +----
 27 files changed, 150 insertions(+), 75 deletions(-)

diff --git a/lld/test/wasm/debuginfo.test b/lld/test/wasm/debuginfo.test
index 59c36979b4b6..2566b74d93bf 100644
--- a/lld/test/wasm/debuginfo.test
+++ b/lld/test/wasm/debuginfo.test
@@ -45,7 +45,7 @@ CHECK-NEXT:              DW_AT_name	("hi_foo.c")
 
 CHECK:   DW_TAG_variable
 CHECK-NEXT:                DW_AT_name	("y")
-CHECK-NEXT:                DW_AT_type	(0x000000a1 "int[2]")
+CHECK-NEXT:                DW_AT_type	(0x000000a7 "int[2]")
 CHECK-NEXT:                DW_AT_external	(true)
 CHECK-NEXT:                DW_AT_decl_file	("{{.*}}hi_foo.c")
 CHECK-NEXT:                DW_AT_decl_line	(1)
@@ -67,14 +67,14 @@ CHECK-NEXT:                DW_AT_encoding	(DW_ATE_unsigned)
 
 CHECK:   DW_TAG_variable
 CHECK-NEXT:                DW_AT_name	("z")
-CHECK-NEXT:                DW_AT_type	(0x000000a1 "int[2]")
+CHECK-NEXT:                DW_AT_type	(0x000000a7 "int[2]")
 CHECK-NEXT:                DW_AT_external	(true)
 CHECK-NEXT:                DW_AT_decl_file	("{{.*}}hi_foo.c")
 CHECK-NEXT:                DW_AT_decl_line	(8)
 CHECK-NEXT:                DW_AT_location	(DW_OP_addr 0x0)
 
 CHECK:   DW_TAG_subprogram
-CHECK-NEXT:                DW_AT_low_pc	
+CHECK-NEXT:                DW_AT_low_pc
 CHECK-NEXT:                DW_AT_high_pc
 CHECK-NEXT:                DW_AT_frame_base
 CHECK-NEXT:                DW_AT_name	("foo")
diff --git a/lld/wasm/InputChunks.cpp b/lld/wasm/InputChunks.cpp
index 99a393e8ef95..077f2deac72e 100644
--- a/lld/wasm/InputChunks.cpp
+++ b/lld/wasm/InputChunks.cpp
@@ -68,6 +68,7 @@ void InputChunk::verifyRelocTargets() const {
     case R_WASM_MEMORY_ADDR_I32:
     case R_WASM_FUNCTION_OFFSET_I32:
     case R_WASM_SECTION_OFFSET_I32:
+    case R_WASM_GLOBAL_INDEX_I32:
       existingValue = static_cast<uint32_t>(read32le(loc));
       break;
     default:
@@ -77,7 +78,8 @@ void InputChunk::verifyRelocTargets() const {
     if (bytesRead && bytesRead != 5)
       warn("expected LEB at relocation site be 5-byte padded");
 
-    if (rel.Type != R_WASM_GLOBAL_INDEX_LEB) {
+    if (rel.Type != R_WASM_GLOBAL_INDEX_LEB ||
+        rel.Type != R_WASM_GLOBAL_INDEX_I32) {
       uint32_t expectedValue = file->calcExpectedValue(rel);
       if (expectedValue != existingValue)
         warn("unexpected existing value for " + relocTypeToString(rel.Type) +
@@ -132,6 +134,7 @@ void InputChunk::writeTo(uint8_t *buf) const {
     case R_WASM_MEMORY_ADDR_I32:
     case R_WASM_FUNCTION_OFFSET_I32:
     case R_WASM_SECTION_OFFSET_I32:
+    case R_WASM_GLOBAL_INDEX_I32:
       write32le(loc, value);
       break;
     default:
diff --git a/lld/wasm/InputFiles.cpp b/lld/wasm/InputFiles.cpp
index 4954e6dc591b..b7d90feb58b3 100644
--- a/lld/wasm/InputFiles.cpp
+++ b/lld/wasm/InputFiles.cpp
@@ -152,6 +152,7 @@ uint32_t ObjFile::calcExpectedValue(const WasmRelocation &reloc) const {
     return reloc.Index;
   case R_WASM_FUNCTION_INDEX_LEB:
   case R_WASM_GLOBAL_INDEX_LEB:
+  case R_WASM_GLOBAL_INDEX_I32:
   case R_WASM_EVENT_INDEX_LEB: {
     const WasmSymbol &sym = wasmObj->syms()[reloc.Index];
     return sym.Info.ElementIndex;
@@ -199,6 +200,7 @@ uint32_t ObjFile::calcNewValue(const WasmRelocation &reloc) const {
   case R_WASM_FUNCTION_INDEX_LEB:
     return getFunctionSymbol(reloc.Index)->getFunctionIndex();
   case R_WASM_GLOBAL_INDEX_LEB:
+  case R_WASM_GLOBAL_INDEX_I32:
     if (auto gs = dyn_cast<GlobalSymbol>(sym))
       return gs->getGlobalIndex();
     return sym->getGOTIndex();
diff --git a/lld/wasm/Relocations.cpp b/lld/wasm/Relocations.cpp
index 2ab449ffe256..9475ff1c312e 100644
--- a/lld/wasm/Relocations.cpp
+++ b/lld/wasm/Relocations.cpp
@@ -84,6 +84,7 @@ void scanRelocations(InputChunk *chunk) {
       out.elemSec->addEntry(cast<FunctionSymbol>(sym));
       break;
     case R_WASM_GLOBAL_INDEX_LEB:
+    case R_WASM_GLOBAL_INDEX_I32:
       if (!isa<GlobalSymbol>(sym))
         addGOTEntry(sym);
       break;
diff --git a/llvm/include/llvm/BinaryFormat/Dwarf.def b/llvm/include/llvm/BinaryFormat/Dwarf.def
index 956c67c3e137..b9a04b06a814 100644
--- a/llvm/include/llvm/BinaryFormat/Dwarf.def
+++ b/llvm/include/llvm/BinaryFormat/Dwarf.def
@@ -664,6 +664,7 @@ HANDLE_DW_OP(0xa9, reinterpret, 5, DWARF)
 HANDLE_DW_OP(0xe0, GNU_push_tls_address, 0, GNU)
 // Extensions for WebAssembly.
 HANDLE_DW_OP(0xed, WASM_location, 0, WASM)
+HANDLE_DW_OP(0xee, WASM_location_int, 0, WASM)
 // The GNU entry value extension.
 HANDLE_DW_OP(0xf3, GNU_entry_value, 0, GNU)
 // Extensions for Fission proposal.
diff --git a/llvm/include/llvm/BinaryFormat/WasmRelocs.def b/llvm/include/llvm/BinaryFormat/WasmRelocs.def
index 00dacf72abb0..897d597c4b9e 100644
--- a/llvm/include/llvm/BinaryFormat/WasmRelocs.def
+++ b/llvm/include/llvm/BinaryFormat/WasmRelocs.def
@@ -15,3 +15,4 @@ WASM_RELOC(R_WASM_SECTION_OFFSET_I32,    9)
 WASM_RELOC(R_WASM_EVENT_INDEX_LEB,      10)
 WASM_RELOC(R_WASM_MEMORY_ADDR_REL_SLEB, 11)
 WASM_RELOC(R_WASM_TABLE_INDEX_REL_SLEB, 12)
+WASM_RELOC(R_WASM_GLOBAL_INDEX_I32,     13)
diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h
index c4dc53337c07..4bbff49606ab 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h
@@ -42,6 +42,7 @@ class DWARFExpression {
       SizeRefAddr = 6,
       SizeBlock = 7, ///< Preceding operand contains block size
       BaseTypeRef = 8,
+      WasmLocationArg = 30,
       SignBit = 0x80,
       SignedSize1 = SignBit | Size1,
       SignedSize2 = SignBit | Size2,
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 34d808d8ac79..dbe782980f36 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -37,6 +37,7 @@
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCSymbolWasm.h"
 #include "llvm/MC/MachineLocation.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Target/TargetLoweringObjectFile.h"
@@ -420,13 +421,37 @@ DIE &DwarfCompileUnit::updateSubprogramScopeDIE(const DISubprogram *SP) {
       break;
     }
     case TargetFrameLowering::DwarfFrameBase::WasmFrameBase: {
-      DIELoc *Loc = new (DIEValueAllocator) DIELoc;
-      DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
-      DIExpressionCursor Cursor({});
-      DwarfExpr.addWasmLocation(FrameBase.Location.WasmLoc.Kind,
-                                FrameBase.Location.WasmLoc.Index);
-      DwarfExpr.addExpression(std::move(Cursor));
-      addBlock(*SPDie, dwarf::DW_AT_frame_base, DwarfExpr.finalize());
+      // FIXME: duplicated from Target/WebAssembly/WebAssembly.h
+      // don't want to depend on target specific headers in this code?
+      const unsigned TI_GLOBAL_RELOC = 3;
+      if (FrameBase.Location.WasmLoc.Kind == TI_GLOBAL_RELOC) {
+        // These need to be relocatable.
+        assert(FrameBase.Location.WasmLoc.Index == 0);  // Only SP so far.
+        auto SPSym = cast<MCSymbolWasm>(
+          Asm->GetExternalSymbolSymbol("__stack_pointer"));
+        // FIXME: this repeats what WebAssemblyMCInstLower::
+        // GetExternalSymbolSymbol does, since if there's no code that
+        // refers to this symbol, we have to set it here.
+        SPSym->setType(wasm::WASM_SYMBOL_TYPE_GLOBAL);
+        // FIXME: need to check subtarget to see if its wasm64, but we
+        // can't cast to WebAssemblySubtarget here.
+        SPSym->setGlobalType(wasm::WasmGlobalType{wasm::WASM_TYPE_I32, true});
+        DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+        addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_WASM_location);
+        addSInt(*Loc, dwarf::DW_FORM_sdata, FrameBase.Location.WasmLoc.Kind);
+        addLabel(*Loc, dwarf::DW_FORM_udata, SPSym);
+        DD->addArangeLabel(SymbolCU(this, SPSym));
+        addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value);
+        addBlock(*SPDie, dwarf::DW_AT_frame_base, Loc);
+      } else {
+        DIELoc *Loc = new (DIEValueAllocator) DIELoc;
+        DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
+        DIExpressionCursor Cursor({});
+        DwarfExpr.addWasmLocation(FrameBase.Location.WasmLoc.Kind,
+            FrameBase.Location.WasmLoc.Index);
+        DwarfExpr.addExpression(std::move(Cursor));
+        addBlock(*SPDie, dwarf::DW_AT_frame_base, DwarfExpr.finalize());
+      }
       break;
     }
     }
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 0624dfe8cf5a..cf1e269e80ad 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -2400,7 +2400,7 @@ void DwarfDebug::emitDebugLocValue(const AsmPrinter &AP, const DIBasicType *BT,
     TargetIndexLocation Loc = Value.getTargetIndexLocation();
     // TODO TargetIndexLocation is a target-independent. Currently only the WebAssembly-specific
     // encoding is supported.
-    DwarfExpr.addWasmLocation(Loc.Index, Loc.Offset);
+    DwarfExpr.addWasmLocation(Loc.Index, static_cast<uint64_t>(Loc.Offset));
   } else if (Value.isConstantFP()) {
     APInt RawBytes = Value.getConstantFP()->getValueAPF().bitcastToAPInt();
     DwarfExpr.addUnsignedConstant(RawBytes);
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
index be225eb03a66..a65d2d74a8c7 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.cpp
@@ -582,10 +582,10 @@ void DwarfExpression::emitLegacyZExt(unsigned FromBits) {
   emitOp(dwarf::DW_OP_and);
 }
 
-void DwarfExpression::addWasmLocation(unsigned Index, int64_t Offset) {
+void DwarfExpression::addWasmLocation(unsigned Index, uint64_t Offset) {
   assert(LocationKind == Implicit || LocationKind == Unknown);
   LocationKind = Implicit;
   emitOp(dwarf::DW_OP_WASM_location);
   emitUnsigned(Index);
-  emitSigned(Offset);
+  emitUnsigned(Offset);
 }
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
index 331891083375..5d4386282787 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfExpression.h
@@ -342,7 +342,7 @@ class DwarfExpression {
 
   /// Emit location information expressed via WebAssembly location + offset
   /// The Index is an identifier for locals, globals or operand stack.
-  void addWasmLocation(unsigned Index, int64_t Offset);
+  void addWasmLocation(unsigned Index, uint64_t Offset);
 };
 
 /// DwarfExpression implementation for .debug_loc entries.
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp b/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp
index 0a658034b67b..379b88c49c73 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp
@@ -94,7 +94,7 @@ static DescVector getDescriptions() {
       Desc(Op::Dwarf3, Op::SizeLEB, Op::SizeBlock);
   Descriptions[DW_OP_stack_value] = Desc(Op::Dwarf3);
   Descriptions[DW_OP_WASM_location] =
-      Desc(Op::Dwarf4, Op::SizeLEB, Op::SignedSizeLEB);
+      Desc(Op::Dwarf4, Op::SizeLEB, Op::WasmLocationArg);
   Descriptions[DW_OP_GNU_push_tls_address] = Desc(Op::Dwarf3);
   Descriptions[DW_OP_addrx] = Desc(Op::Dwarf4, Op::SizeLEB);
   Descriptions[DW_OP_GNU_addr_index] = Desc(Op::Dwarf4, Op::SizeLEB);
@@ -170,6 +170,19 @@ bool DWARFExpression::Operation::extract(DataExtractor Data,
     case Operation::BaseTypeRef:
       Operands[Operand] = Data.getULEB128(&Offset);
       break;
+    case Operation::WasmLocationArg:
+      assert(Operand == 1);
+      switch (Operands[0]) {
+      case 0: case 1: case 2:
+        Operands[Operand] = Data.getULEB128(&Offset);
+        break;
+      case 3: // global as uint32
+         Operands[Operand] = Data.getU32(&Offset);
+         break;
+      default:
+        return false; // Unknown Wasm location
+      }
+      break;
     case Operation::SizeBlock:
       // We need a size, so this cannot be the first operand
       if (Operand == 0)
@@ -273,6 +286,15 @@ bool DWARFExpression::Operation::print(raw_ostream &OS,
         OS << " 0x0";
       else
         prettyPrintBaseTypeRef(U, OS, Operands, Operand);
+    } else if (Size == Operation::WasmLocationArg) {
+      assert(Operand == 1);
+      switch (Operands[0]) {
+      case 0: case 1: case 2:
+      case 3: // global as uint32
+        OS << format(" 0x%" PRIx64, Operands[Operand]);
+        break;
+      default: assert(false);
+      }
     } else if (Size == Operation::SizeBlock) {
       uint64_t Offset = Operands[Operand];
       for (unsigned i = 0; i < Operands[Operand - 1]; ++i)
diff --git a/llvm/lib/MC/WasmObjectWriter.cpp b/llvm/lib/MC/WasmObjectWriter.cpp
index ec424644400b..4d0c71649e87 100644
--- a/llvm/lib/MC/WasmObjectWriter.cpp
+++ b/llvm/lib/MC/WasmObjectWriter.cpp
@@ -537,7 +537,9 @@ static const MCSymbolWasm *resolveSymbol(const MCSymbolWasm &Symbol) {
 // useable.
 uint32_t
 WasmObjectWriter::getProvisionalValue(const WasmRelocationEntry &RelEntry) {
-  if (RelEntry.Type == wasm::R_WASM_GLOBAL_INDEX_LEB && !RelEntry.Symbol->isGlobal()) {
+  if ((RelEntry.Type == wasm::R_WASM_GLOBAL_INDEX_LEB ||
+       RelEntry.Type == wasm::R_WASM_GLOBAL_INDEX_I32) &&
+      !RelEntry.Symbol->isGlobal()) {
     assert(GOTIndices.count(RelEntry.Symbol) > 0 && "symbol not found in GOT index space");
     return GOTIndices[RelEntry.Symbol];
   }
@@ -556,6 +558,7 @@ WasmObjectWriter::getProvisionalValue(const WasmRelocationEntry &RelEntry) {
     return getRelocationIndexValue(RelEntry);
   case wasm::R_WASM_FUNCTION_INDEX_LEB:
   case wasm::R_WASM_GLOBAL_INDEX_LEB:
+  case wasm::R_WASM_GLOBAL_INDEX_I32:
   case wasm::R_WASM_EVENT_INDEX_LEB:
     // Provisional value is function/global/event Wasm index
     assert(WasmIndices.count(RelEntry.Symbol) > 0 && "symbol not found in wasm index space");
@@ -660,6 +663,7 @@ void WasmObjectWriter::applyRelocations(
     case wasm::R_WASM_MEMORY_ADDR_I32:
     case wasm::R_WASM_FUNCTION_OFFSET_I32:
     case wasm::R_WASM_SECTION_OFFSET_I32:
+    case wasm::R_WASM_GLOBAL_INDEX_I32:
       writeI32(Stream, Value, Offset);
       break;
     case wasm::R_WASM_TABLE_INDEX_SLEB:
diff --git a/llvm/lib/Object/RelocationResolver.cpp b/llvm/lib/Object/RelocationResolver.cpp
index 31478be7899e..132896b1ecc2 100644
--- a/llvm/lib/Object/RelocationResolver.cpp
+++ b/llvm/lib/Object/RelocationResolver.cpp
@@ -498,6 +498,7 @@ static bool supportsWasm32(uint64_t Type) {
   case wasm::R_WASM_FUNCTION_OFFSET_I32:
   case wasm::R_WASM_SECTION_OFFSET_I32:
   case wasm::R_WASM_EVENT_INDEX_LEB:
+  case wasm::R_WASM_GLOBAL_INDEX_I32:
     return true;
   default:
     return false;
@@ -517,6 +518,7 @@ static uint64_t resolveWasm32(RelocationRef R, uint64_t S, uint64_t A) {
   case wasm::R_WASM_FUNCTION_OFFSET_I32:
   case wasm::R_WASM_SECTION_OFFSET_I32:
   case wasm::R_WASM_EVENT_INDEX_LEB:
+  case wasm::R_WASM_GLOBAL_INDEX_I32:
     // For wasm section, its offset at 0 -- ignoring Value
     return A;
   default:
diff --git a/llvm/lib/Object/WasmObjectFile.cpp b/llvm/lib/Object/WasmObjectFile.cpp
index 362834cee899..2993fd1d5559 100644
--- a/llvm/lib/Object/WasmObjectFile.cpp
+++ b/llvm/lib/Object/WasmObjectFile.cpp
@@ -798,6 +798,11 @@ Error WasmObjectFile::parseRelocSection(StringRef Name, ReadContext &Ctx) {
         return make_error<GenericBinaryError>("Bad relocation global index",
                                               object_error::parse_failed);
       break;
+    case wasm::R_WASM_GLOBAL_INDEX_I32:
+      if (!isValidGlobalSymbol(Reloc.Index))
+        return make_error<GenericBinaryError>("Bad relocation global index",
+                                              object_error::parse_failed);
+      break;
     case wasm::R_WASM_EVENT_INDEX_LEB:
       if (!isValidEventSymbol(Reloc.Index))
         return make_error<GenericBinaryError>("Bad relocation event index",
@@ -837,7 +842,8 @@ Error WasmObjectFile::parseRelocSection(StringRef Name, ReadContext &Ctx) {
     if (Reloc.Type == wasm::R_WASM_TABLE_INDEX_I32 ||
         Reloc.Type == wasm::R_WASM_MEMORY_ADDR_I32 ||
         Reloc.Type == wasm::R_WASM_SECTION_OFFSET_I32 ||
-        Reloc.Type == wasm::R_WASM_FUNCTION_OFFSET_I32)
+        Reloc.Type == wasm::R_WASM_FUNCTION_OFFSET_I32 ||
+        Reloc.Type == wasm::R_WASM_GLOBAL_INDEX_I32)
       Size = 4;
     if (Reloc.Offset + Size > EndOffset)
       return make_error<GenericBinaryError>("Bad relocation offset",
diff --git a/llvm/lib/ObjectYAML/WasmEmitter.cpp b/llvm/lib/ObjectYAML/WasmEmitter.cpp
index ef54eaedfc0b..84e364c07448 100644
--- a/llvm/lib/ObjectYAML/WasmEmitter.cpp
+++ b/llvm/lib/ObjectYAML/WasmEmitter.cpp
@@ -530,12 +530,7 @@ void WasmWriter::writeRelocSection(raw_ostream &OS, WasmYAML::Section &Sec,
     writeUint8(OS, Reloc.Type);
     encodeULEB128(Reloc.Offset, OS);
     encodeULEB128(Reloc.Index, OS);
-    switch (Reloc.Type) {
-    case wasm::R_WASM_MEMORY_ADDR_LEB:
-    case wasm::R_WASM_MEMORY_ADDR_SLEB:
-    case wasm::R_WASM_MEMORY_ADDR_I32:
-    case wasm::R_WASM_FUNCTION_OFFSET_I32:
-    case wasm::R_WASM_SECTION_OFFSET_I32:
+    if (wasm::relocTypeHasAddend(Reloc.Type)) {
       encodeULEB128(Reloc.Addend, OS);
     }
   }
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
index e7a599e3e175..99000cb64e77 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
@@ -103,6 +103,8 @@ unsigned WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target,
   case FK_Data_4:
     if (SymA.isFunction())
       return wasm::R_WASM_TABLE_INDEX_I32;
+    if (SymA.isGlobal())
+      return wasm::R_WASM_GLOBAL_INDEX_I32;
     if (auto Section = static_cast<const MCSectionWasm *>(
             getFixupSection(Fixup.getValue()))) {
       if (Section->getKind().isText())
diff --git a/llvm/lib/Target/WebAssembly/WebAssembly.h b/llvm/lib/Target/WebAssembly/WebAssembly.h
index 786e0a1d0305..890d84b82fb1 100644
--- a/llvm/lib/Target/WebAssembly/WebAssembly.h
+++ b/llvm/lib/Target/WebAssembly/WebAssembly.h
@@ -78,7 +78,16 @@ void initializeWebAssemblyRegNumberingPass(PassRegistry &);
 void initializeWebAssemblyPeepholePass(PassRegistry &);
 
 namespace WebAssembly {
-enum TargetIndex { TI_LOCAL_START, TI_GLOBAL_START, TI_OPERAND_STACK_START };
+enum TargetIndex {
+  // Followed by a local index (ULEB).
+  TI_LOCAL,
+  // Followed by an absolute global index (ULEB). DEPRECATED.
+  TI_GLOBAL_FIXED,
+  TI_OPERAND_STACK,
+  // Followed by a compilation unit relative global index (uint32_t)
+  // that will have an associated relocation.
+  TI_GLOBAL_RELOC
+};
 } // end namespace WebAssembly
 
 } // end namespace llvm
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp
index 114a50a3055d..eb67d438e9a4 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyDebugValueManager.cpp
@@ -48,6 +48,6 @@ void WebAssemblyDebugValueManager::clone(MachineInstr *Insert,
 void WebAssemblyDebugValueManager::replaceWithLocal(unsigned LocalId) {
   for (auto *DBI : DbgValues) {
     MachineOperand &Op = DBI->getOperand(0);
-    Op.ChangeToTargetIndex(llvm::WebAssembly::TI_LOCAL_START, LocalId);
+    Op.ChangeToTargetIndex(llvm::WebAssembly::TI_LOCAL, LocalId);
   }
 }
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
index 036c2aee0050..30647fd6c5bb 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyFrameLowering.cpp
@@ -266,12 +266,11 @@ WebAssemblyFrameLowering::getDwarfFrameBase(const MachineFunction &MF) const {
   const WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
   if (needsSP(MF) && MFI.isFrameBaseVirtual()) {
     unsigned LocalNum = MFI.getFrameBaseLocal();
-    Loc.Location.WasmLoc = {WebAssembly::TI_LOCAL_START, LocalNum};
+    Loc.Location.WasmLoc = {WebAssembly::TI_LOCAL, LocalNum};
   } else {
     // TODO: This should work on a breakpoint at a function with no frame,
     // but probably won't work for traversing up the stack.
-    // TODO: This needs a relocation for correct __stack_pointer
-    Loc.Location.WasmLoc = {WebAssembly::TI_GLOBAL_START, 0};
+    Loc.Location.WasmLoc = {WebAssembly::TI_GLOBAL_RELOC, 0};
   }
   return Loc;
 }
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
index 221dacaf821b..6fe1fd2b5c5a 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp
@@ -235,8 +235,9 @@ bool WebAssemblyInstrInfo::reverseBranchCondition(
 ArrayRef<std::pair<int, const char *>>
 WebAssemblyInstrInfo::getSerializableTargetIndices() const {
   static const std::pair<int, const char *> TargetIndices[] = {
-      {WebAssembly::TI_LOCAL_START, "wasm-local-start"},
-      {WebAssembly::TI_GLOBAL_START, "wasm-global-start"},
-      {WebAssembly::TI_OPERAND_STACK_START, "wasm-operator-stack-start"}};
+      {WebAssembly::TI_LOCAL, "wasm-local"},
+      {WebAssembly::TI_GLOBAL_FIXED, "wasm-global-fixed"},
+      {WebAssembly::TI_OPERAND_STACK, "wasm-operand-stack"},
+      {WebAssembly::TI_GLOBAL_RELOC, "wasm-global-reloc"}};
   return makeArrayRef(TargetIndices);
 }
diff --git a/llvm/test/CodeGen/WebAssembly/debugtest-opt.ll b/llvm/test/CodeGen/WebAssembly/debugtest-opt.ll
index 43dc2c2dfadf..20e0a9c61478 100644
--- a/llvm/test/CodeGen/WebAssembly/debugtest-opt.ll
+++ b/llvm/test/CodeGen/WebAssembly/debugtest-opt.ll
@@ -11,7 +11,7 @@
 ; CHECK-NEXT:                DW_AT_low_pc
 ; CHECK-NEXT:                DW_AT_high_pc
 ;; Check that we fall back to the default frame base (the global)
-; CHECK-NEXT:                DW_AT_frame_base	(DW_OP_WASM_location 0x1 +0, DW_OP_stack_value)
+; CHECK-NEXT:                DW_AT_frame_base	(DW_OP_WASM_location_int 0x3 0x0, DW_OP_stack_value)
 
 ; TODO: Find a more-reduced test case for The fix in WebAssemblyRegColoring
 
diff --git a/llvm/test/DebugInfo/WebAssembly/dbg-value-dwarfdump.ll b/llvm/test/DebugInfo/WebAssembly/dbg-value-dwarfdump.ll
index f15ebe8e8933..81f651b6aee9 100644
--- a/llvm/test/DebugInfo/WebAssembly/dbg-value-dwarfdump.ll
+++ b/llvm/test/DebugInfo/WebAssembly/dbg-value-dwarfdump.ll
@@ -21,11 +21,11 @@ for.body:                                         ; preds = %entry, %for.body
   %a.010 = phi i32 [ %b.011, %for.body ], [ 0, %entry ]
   %i.09 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
 
-; CHECK: DW_OP_WASM_location 0x0 +[[LOCAL_1:[0-9]+]]
+; CHECK: DW_OP_WASM_location 0x0 0x[[LOCAL_1:[0-9]+]]
   call void @llvm.dbg.value(metadata i32 %b.011, metadata !16, metadata !DIExpression()), !dbg !19
 
-; CHECK-NOT: DW_OP_WASM_location 0x0 +[[LOCAL_1]]
-; CHECK: DW_OP_WASM_location 0x0 +[[LOCAL_2:[0-9]+]]
+; CHECK-NOT: DW_OP_WASM_location 0x0 0x[[LOCAL_1]]
+; CHECK: DW_OP_WASM_location 0x0 0x[[LOCAL_2:[0-9]+]]
   %add = add nsw i32 %b.011, %a.010, !dbg !26
   %inc = add nuw nsw i32 %i.09, 1, !dbg !28
   call void @llvm.dbg.value(metadata i32 %add, metadata !16, metadata !DIExpression()), !dbg !19
diff --git a/llvm/test/MC/WebAssembly/debug-info.ll b/llvm/test/MC/WebAssembly/debug-info.ll
index 852d9ee2a93b..1da20a8a12e4 100644
--- a/llvm/test/MC/WebAssembly/debug-info.ll
+++ b/llvm/test/MC/WebAssembly/debug-info.ll
@@ -11,33 +11,33 @@
 ; CHECK-NEXT:  }
 ; CHECK-NEXT:  Section {
 ; CHECK-NEXT:    Type: IMPORT (0x2)
-; CHECK-NEXT:    Size: 58
+; CHECK-NEXT:    Size: 81
 ; CHECK-NEXT:    Offset: 18
 ; CHECK-NEXT:  }
 ; CHECK-NEXT:  Section {
 ; CHECK-NEXT:    Type: FUNCTION (0x3)
 ; CHECK-NEXT:    Size: 2
-; CHECK-NEXT:    Offset: 82
+; CHECK-NEXT:    Offset: 105
 ; CHECK-NEXT:  }
 ; CHECK-NEXT:  Section {
 ; CHECK-NEXT:    Type: ELEM (0x9)
 ; CHECK-NEXT:    Size: 7
-; CHECK-NEXT:    Offset: 90
+; CHECK-NEXT:    Offset: 113
 ; CHECK-NEXT:  }
 ; CHECK-NEXT:  Section {
 ; CHECK-NEXT:    Type: DATACOUNT (0xC)
 ; CHECK-NEXT:    Size: 1
-; CHECK-NEXT:    Offset: 103
+; CHECK-NEXT:    Offset: 126
 ; CHECK-NEXT:  }
 ; CHECK-NEXT:  Section {
 ; CHECK-NEXT:    Type: CODE (0xA)
 ; CHECK-NEXT:    Size: 4
-; CHECK-NEXT:    Offset: 110
+; CHECK-NEXT:    Offset: 133
 ; CHECK-NEXT:  }
 ; CHECK-NEXT:  Section {
 ; CHECK-NEXT:    Type: DATA (0xB)
 ; CHECK-NEXT:    Size: 19
-; CHECK-NEXT:    Offset: 120
+; CHECK-NEXT:    Offset: 143
 ; CHECK-NEXT:    Segments [
 ; CHECK-NEXT:      Segment {
 ; CHECK-NEXT:        Name: .data.foo
@@ -54,79 +54,79 @@
 ; CHECK-NEXT:  Section {
 ; CHECK-NEXT:    Type: CUSTOM (0x0)
 ; CHECK-NEXT:    Size: 86
-; CHECK-NEXT:    Offset: 145
+; CHECK-NEXT:    Offset: 168
 ; CHECK-NEXT:    Name: .debug_abbrev
 ; CHECK-NEXT:  }
 ; CHECK-NEXT:  Section {
 ; CHECK-NEXT:    Type: CUSTOM (0x0)
-; CHECK-NEXT:    Size: 111
-; CHECK-NEXT:    Offset: 251
+; CHECK-NEXT:    Size: 114
+; CHECK-NEXT:    Offset: 274
 ; CHECK-NEXT:    Name: .debug_info
 ; CHECK-NEXT:  }
 ; CHECK-NEXT:  Section {
 ; CHECK-NEXT:    Type: CUSTOM (0x0)
 ; CHECK-NEXT:    Size: 121
-; CHECK-NEXT:    Offset: 380
+; CHECK-NEXT:    Offset: 406
 ; CHECK-NEXT:    Name: .debug_str
 ; CHECK-NEXT:  }
 ; CHECK-NEXT:  Section {
 ; CHECK-NEXT:    Type: CUSTOM (0x0)
 ; CHECK-NEXT:    Size: 42
-; CHECK-NEXT:    Offset: 518
+; CHECK-NEXT:    Offset: 544
 ; CHECK-NEXT:    Name: .debug_pubnames
 ; CHECK-NEXT:  }
 ; CHECK-NEXT:  Section {
 ; CHECK-NEXT:    Type: CUSTOM (0x0)
 ; CHECK-NEXT:    Size: 26
-; CHECK-NEXT:    Offset: 582
+; CHECK-NEXT:    Offset: 608
 ; CHECK-NEXT:    Name: .debug_pubtypes
 ; CHECK-NEXT:  }
 ; CHECK-NEXT:  Section {
 ; CHECK-NEXT:    Type: CUSTOM (0x0)
 ; CHECK-NEXT:    Size: 57
-; CHECK-NEXT:    Offset: 630
+; CHECK-NEXT:    Offset: 656
 ; CHECK-NEXT:    Name: .debug_line
 ; CHECK-NEXT:  }
 ; CHECK-NEXT:  Section {
 ; CHECK-NEXT:    Type: CUSTOM (0x0)
-; CHECK-NEXT:    Size: 88
-; CHECK-NEXT:    Offset: 705
+; CHECK-NEXT:    Size: 91
+; CHECK-NEXT:    Offset: 731
 ; CHECK-NEXT:    Name: linking
 ; CHECK-NEXT:  }
 ; CHECK-NEXT:  Section {
 ; CHECK-NEXT:    Type: CUSTOM (0x0)
 ; CHECK-NEXT:    Size: 9
-; CHECK-NEXT:    Offset: 807
+; CHECK-NEXT:    Offset: 836
 ; CHECK-NEXT:    Name: reloc.DATA
 ; CHECK-NEXT:  }
 ; CHECK-NEXT:  Section {
 ; CHECK-NEXT:    Type: CUSTOM (0x0)
-; CHECK-NEXT:    Size: 58
-; CHECK-NEXT:    Offset: 833
+; CHECK-NEXT:    Size: 61
+; CHECK-NEXT:    Offset: 862
 ; CHECK-NEXT:    Name: reloc..debug_info
 ; CHECK-NEXT:  }
 ; CHECK-NEXT:  Section {
 ; CHECK-NEXT:    Type: CUSTOM (0x0)
 ; CHECK-NEXT:    Size: 6
-; CHECK-NEXT:    Offset: 915
+; CHECK-NEXT:    Offset: 947
 ; CHECK-NEXT:    Name: reloc..debug_pubnames
 ; CHECK-NEXT:  }
 ; CHECK-NEXT:  Section {
 ; CHECK-NEXT:    Type: CUSTOM (0x0)
 ; CHECK-NEXT:    Size: 6
-; CHECK-NEXT:    Offset: 949
+; CHECK-NEXT:    Offset: 981
 ; CHECK-NEXT:    Name: reloc..debug_pubtypes
 ; CHECK-NEXT:  }
 ; CHECK-NEXT:  Section {
 ; CHECK-NEXT:    Type: CUSTOM (0x0)
 ; CHECK-NEXT:    Size: 6
-; CHECK-NEXT:    Offset: 983
+; CHECK-NEXT:    Offset: 1015
 ; CHECK-NEXT:    Name: reloc..debug_line
 ; CHECK-NEXT:  }
 ; CHECK-NEXT:  Section {
 ; CHECK-NEXT:    Type: CUSTOM (0x0)
 ; CHECK-NEXT:    Size: 77
-; CHECK-NEXT:    Offset: 1013
+; CHECK-NEXT:    Offset: 1045
 ; CHECK-NEXT:    Name: producers
 ; CHECK-NEXT:  }
 ; CHECK-NEXT:]
@@ -148,7 +148,8 @@
 ; CHECK-NEXT:    0x44 R_WASM_SECTION_OFFSET_I32 .debug_str 113
 ; CHECK-NEXT:    0x50 R_WASM_MEMORY_ADDR_I32 ptr2 0
 ; CHECK-NEXT:    0x5B R_WASM_FUNCTION_OFFSET_I32 f2 0
-; CHECK-NEXT:    0x68 R_WASM_SECTION_OFFSET_I32 .debug_str 118
+; CHECK-NEXT:    0x66 R_WASM_GLOBAL_INDEX_I32 __stack_pointer
+; CHECK-NEXT:    0x6B R_WASM_SECTION_OFFSET_I32 .debug_str 118
 ; CHECK-NEXT:  }
 ; CHECK-NEXT:  Section (11) .debug_pubnames {
 ; CHECK-NEXT:    0x6 R_WASM_SECTION_OFFSET_I32 .debug_info 0
@@ -213,6 +214,16 @@
 ; CHECK-NEXT:    ElementIndex: 0x8
 ; CHECK-NEXT:  }
 ; CHECK-NEXT:  Symbol {
+; CHECK-NEXT:    Name: __stack_pointer
+; CHECK-NEXT:    Type: GLOBAL (0x2)
+; CHECK-NEXT:    Flags [ (0x10)
+; CHECK-NEXT:      UNDEFINED (0x10)
+; CHECK-NEXT:    ]
+; CHECK-NEXT:    ImportName: __stack_pointer
+; CHECK-NEXT:    ImportModule: env
+; CHECK-NEXT:    ElementIndex: 0x0
+; CHECK-NEXT:  }
+; CHECK-NEXT:  Symbol {
 ; CHECK-NEXT:    Name: .debug_str
 ; CHECK-NEXT:    Type: SECTION (0x3)
 ; CHECK-NEXT:    Flags [ (0x2)
diff --git a/llvm/test/MC/WebAssembly/debug-localvar.ll b/llvm/test/MC/WebAssembly/debug-localvar.ll
index 18acae31f39d..ffb04e4387d0 100644
--- a/llvm/test/MC/WebAssembly/debug-localvar.ll
+++ b/llvm/test/MC/WebAssembly/debug-localvar.ll
@@ -80,7 +80,7 @@ attributes #2 = { nounwind }
 ; CHECK-LABEL:     DW_TAG_subprogram
 ; CHECK-NEXT:                DW_AT_low_pc	(0x0000000000000002)
 ; CHECK-NEXT:                DW_AT_high_pc	(0x0000000000000039)
-; CHECK-NEXT:                DW_AT_frame_base	(DW_OP_WASM_location 0x0 +1, DW_OP_stack_value)
+; CHECK-NEXT:                DW_AT_frame_base	(DW_OP_WASM_location 0x0 0x1, DW_OP_stack_value)
 ; CHECK-NEXT:                DW_AT_name	("foo")
 ; CHECK-NEXT:                DW_AT_decl_file	("/s/llvm-upstream{{(/|\\)}}debugtest.c")
 ; CHECK-NEXT:                DW_AT_decl_line	(1)
diff --git a/llvm/test/MC/WebAssembly/dwarfdump.ll b/llvm/test/MC/WebAssembly/dwarfdump.ll
index 4da57c32a8da..ea956002bd83 100644
--- a/llvm/test/MC/WebAssembly/dwarfdump.ll
+++ b/llvm/test/MC/WebAssembly/dwarfdump.ll
@@ -1,7 +1,7 @@
 ; RUN: llc -filetype=obj %s -o - | llvm-dwarfdump - | FileCheck %s
 
 ; CHECK: .debug_info contents:
-; CHECK-NEXT: 0x00000000: Compile Unit: length = 0x0000006b version = 0x0004 abbr_offset = 0x0000 addr_size = 0x04 (next unit at 0x0000006f)
+; CHECK-NEXT: 0x00000000: Compile Unit: length = 0x0000006e version = 0x0004 abbr_offset = 0x0000 addr_size = 0x04 (next unit at 0x00000072)
 
 ; CHECK: 0x0000000b: DW_TAG_compile_unit
 ; CHECK-NEXT:              DW_AT_producer	("clang version 6.0.0 (trunk 315924) (llvm/trunk 315960)")
@@ -46,14 +46,14 @@
 ; CHECK: 0x0000005a:   DW_TAG_subprogram
 ; CHECK-NEXT:                DW_AT_low_pc	(0x0000000000000002)
 ; CHECK-NEXT:                DW_AT_high_pc	(0x0000000000000004)
-; CHECK-NEXT:                DW_AT_frame_base	(DW_OP_WASM_location 0x1 +0, DW_OP_stack_value)
+; CHECK-NEXT:                DW_AT_frame_base	(DW_OP_WASM_location 0x3 0x0, DW_OP_stack_value)
 ; CHECK-NEXT:                DW_AT_name	("f2")
 ; CHECK-NEXT:                DW_AT_decl_file	("/usr/local/google/home/sbc/dev/wasm/simple{{[/\\]}}test.c")
 ; CHECK-NEXT:                DW_AT_decl_line	(2)
 ; CHECK-NEXT:                DW_AT_prototyped	(true)
 ; CHECK-NEXT:                DW_AT_external		(true)
 
-; CHECK: 0x0000006e:   NULL
+; CHECK: 0x00000071:   NULL
 
 target triple = "wasm32-unknown-unknown"
 
diff --git a/llvm/tools/llvm-readobj/WasmDumper.cpp b/llvm/tools/llvm-readobj/WasmDumper.cpp
index bc163a27462b..0964ffb77eae 100644
--- a/llvm/tools/llvm-readobj/WasmDumper.cpp
+++ b/llvm/tools/llvm-readobj/WasmDumper.cpp
@@ -93,18 +93,8 @@ void WasmDumper::printRelocation(const SectionRef &Section,
   if (SI != Obj->symbol_end())
     SymName = unwrapOrError(Obj->getFileName(), SI->getName());
 
-  bool HasAddend = false;
-  switch (RelocType) {
-  case wasm::R_WASM_MEMORY_ADDR_LEB:
-  case wasm::R_WASM_MEMORY_ADDR_SLEB:
-  case wasm::R_WASM_MEMORY_ADDR_I32:
-  case wasm::R_WASM_FUNCTION_OFFSET_I32:
-  case wasm::R_WASM_SECTION_OFFSET_I32:
-    HasAddend = true;
-    break;
-  default:
-    break;
-  }
+  bool HasAddend = wasm::relocTypeHasAddend(static_cast<uint32_t>(RelocType));
+
   if (opts::ExpandRelocs) {
     DictScope Group(W, "Relocation");
     W.printNumber("Type", RelocTypeName, RelocType);

From 7bf4248521f158d9a536d6b56d93bc8da9759799 Mon Sep 17 00:00:00 2001
From: Wouter van Oortmerssen <aardappel@gmail.com>
Date: Thu, 16 Apr 2020 17:11:14 -0700
Subject: [PATCH 112/216] [WebAssembly] fixed target index strings in DebugInfo
 test

---
 llvm/test/DebugInfo/WebAssembly/dbg-value-ti.ll | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/test/DebugInfo/WebAssembly/dbg-value-ti.ll b/llvm/test/DebugInfo/WebAssembly/dbg-value-ti.ll
index a99ca2d43cb3..8d85d18f9a72 100644
--- a/llvm/test/DebugInfo/WebAssembly/dbg-value-ti.ll
+++ b/llvm/test/DebugInfo/WebAssembly/dbg-value-ti.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -stop-after=wasm-explicit-locals | FileCheck %s
 
 ; Checks if DBG_VALUEs that correspond to new `local.{tee,set}` are
-; using `target-index(wasm-local-start)` operands.
+; using `target-index(wasm-local)` operands.
 
 target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
 target triple = "wasm32-unknown-unknown-wasm"
@@ -24,12 +24,12 @@ for.body:                                         ; preds = %entry, %for.body
 
 ; CHECK: %[[REG2:.*]]:i32 = LOCAL_GET_I32 [[LOOP_LOCAL]],
 ; CHECK: %[[REG3:.*]]:i32 = LOCAL_TEE_I32 [[TMP_LOCAL:.*]], %[[REG2]],
-; CHECK: DBG_VALUE target-index(wasm-local-start) + [[TMP_LOCAL]], $noreg,
+; CHECK: DBG_VALUE target-index(wasm-local) + [[TMP_LOCAL]], $noreg,
   call void @llvm.dbg.value(metadata i32 %b.011, metadata !16, metadata !DIExpression()), !dbg !19
 
 ; CHECK: %[[REG4:.*]]:i32 = nsw ADD_I32
 ; CHECK: LOCAL_SET_I32 [[LOOP_LOCAL]], %[[REG4]],
-; CHECK: DBG_VALUE target-index(wasm-local-start) + [[LOOP_LOCAL]], $noreg,
+; CHECK: DBG_VALUE target-index(wasm-local) + [[LOOP_LOCAL]], $noreg,
   %add = add nsw i32 %b.011, %a.010, !dbg !26
   %inc = add nuw nsw i32 %i.09, 1, !dbg !28
   call void @llvm.dbg.value(metadata i32 %add, metadata !16, metadata !DIExpression()), !dbg !19

From a1526cd62b67c76b284b76d6f26f2bb394fd65ca Mon Sep 17 00:00:00 2001
From: Pratyai Mazumder <feynmanthecurious@gmail.com>
Date: Tue, 14 Apr 2020 16:13:25 -0700
Subject: [PATCH 113/216] [NFC, tsan] Update tsan tests expectation

Summary:
These tests pass with clang, but fail if gcc was used.
gcc build creates similar but not the same stacks.

Reviewers: vitalybuka

Reviewed By: vitalybuka

Subscribers: dvyukov, llvm-commits, #sanitizers

Tags: #sanitizers

Differential Revision: https://reviews.llvm.org/D78114
---
 compiler-rt/test/tsan/Linux/double_race.cpp    | 2 +-
 compiler-rt/test/tsan/inlined_memcpy_race.cpp  | 5 ++---
 compiler-rt/test/tsan/inlined_memcpy_race2.cpp | 4 ++--
 compiler-rt/test/tsan/memcmp_race.cpp          | 4 ++--
 compiler-rt/test/tsan/memcpy_race.cpp          | 4 ++--
 5 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/compiler-rt/test/tsan/Linux/double_race.cpp b/compiler-rt/test/tsan/Linux/double_race.cpp
index 963619cf9a8a..b670589f9374 100644
--- a/compiler-rt/test/tsan/Linux/double_race.cpp
+++ b/compiler-rt/test/tsan/Linux/double_race.cpp
@@ -45,7 +45,7 @@ int main() {
 // CHECK: WARNING: ThreadSanitizer: data race
 // CHECK:   Write of size 8 at {{.*}} by thread T1:
 // CHECK:     #0 memset
-// CHECK:     #1 Thread
+// CHECK:     #{{[12]}} Thread
 // CHECK-NOT: bad PC passed to __tsan_symbolize_external
 // CHECK: WARNING: ThreadSanitizer: data race
 // CHECK:   Write of size 8 at {{.*}} by thread T1:
diff --git a/compiler-rt/test/tsan/inlined_memcpy_race.cpp b/compiler-rt/test/tsan/inlined_memcpy_race.cpp
index 4d085893aae0..eb252e5c76bd 100644
--- a/compiler-rt/test/tsan/inlined_memcpy_race.cpp
+++ b/compiler-rt/test/tsan/inlined_memcpy_race.cpp
@@ -30,8 +30,7 @@ int main() {
 
 // CHECK: WARNING: ThreadSanitizer: data race
 // CHECK:   #0 memset
-// CHECK:   #1 MemSetThread
+// CHECK:   #{{[12]}} MemSetThread
 // CHECK:  Previous write
 // CHECK:   #0 {{(memcpy|memmove)}}
-// CHECK:   #1 MemCpyThread
-
+// CHECK:   #{{[12]}} MemCpyThread
diff --git a/compiler-rt/test/tsan/inlined_memcpy_race2.cpp b/compiler-rt/test/tsan/inlined_memcpy_race2.cpp
index 906a52bd32e4..75b92efda904 100644
--- a/compiler-rt/test/tsan/inlined_memcpy_race2.cpp
+++ b/compiler-rt/test/tsan/inlined_memcpy_race2.cpp
@@ -31,7 +31,7 @@ int main() {
 
 // CHECK: WARNING: ThreadSanitizer: data race
 // CHECK:   #0 memset
-// CHECK:   #1 MemSetThread
+// CHECK:   #{{[12]}} MemSetThread
 // CHECK:  Previous write
 // CHECK:   #0 {{(memcpy|memmove)}}
-// CHECK:   #1 MemMoveThread
+// CHECK:   #{{[12]}} MemMoveThread
diff --git a/compiler-rt/test/tsan/memcmp_race.cpp b/compiler-rt/test/tsan/memcmp_race.cpp
index b76f427e121c..40b11a77b364 100644
--- a/compiler-rt/test/tsan/memcmp_race.cpp
+++ b/compiler-rt/test/tsan/memcmp_race.cpp
@@ -36,7 +36,7 @@ int main() {
 // CHECK: WARNING: ThreadSanitizer: data race
 // CHECK:   Write of size 1 at [[ADDR]] by thread T2:
 // CHECK:     #0 {{(memcpy|memmove)}}
-// CHECK:     #1 Thread2
+// CHECK:     #{{[12]}} Thread2
 // CHECK:   Previous read of size 1 at [[ADDR]] by thread T1:
 // CHECK:     #0 memcmp
-// CHECK:     #1 Thread1
+// CHECK:     #{{[12]}} Thread1
diff --git a/compiler-rt/test/tsan/memcpy_race.cpp b/compiler-rt/test/tsan/memcpy_race.cpp
index 4a098c0405fc..09b2a319e205 100644
--- a/compiler-rt/test/tsan/memcpy_race.cpp
+++ b/compiler-rt/test/tsan/memcpy_race.cpp
@@ -35,7 +35,7 @@ int main() {
 // CHECK: WARNING: ThreadSanitizer: data race
 // CHECK:   Write of size 1 at [[ADDR]] by thread T2:
 // CHECK:     #0 {{(memcpy|memmove)}}
-// CHECK:     #1 Thread2
+// CHECK:     #{{[12]}} Thread2
 // CHECK:   Previous write of size 1 at [[ADDR]] by thread T1:
 // CHECK:     #0 {{(memcpy|memmove)}}
-// CHECK:     #1 Thread1
+// CHECK:     #{{[12]}} Thread1

From 3eaeebec959e64c8b855fecb51077b6009c95d64 Mon Sep 17 00:00:00 2001
From: Wouter van Oortmerssen <aardappel@gmail.com>
Date: Thu, 16 Apr 2020 17:22:38 -0700
Subject: [PATCH 114/216] [WebAssembly] Fix WasmEmitter link error.

---
 llvm/lib/ObjectYAML/WasmEmitter.cpp | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/ObjectYAML/WasmEmitter.cpp b/llvm/lib/ObjectYAML/WasmEmitter.cpp
index 84e364c07448..ef54eaedfc0b 100644
--- a/llvm/lib/ObjectYAML/WasmEmitter.cpp
+++ b/llvm/lib/ObjectYAML/WasmEmitter.cpp
@@ -530,7 +530,12 @@ void WasmWriter::writeRelocSection(raw_ostream &OS, WasmYAML::Section &Sec,
     writeUint8(OS, Reloc.Type);
     encodeULEB128(Reloc.Offset, OS);
     encodeULEB128(Reloc.Index, OS);
-    if (wasm::relocTypeHasAddend(Reloc.Type)) {
+    switch (Reloc.Type) {
+    case wasm::R_WASM_MEMORY_ADDR_LEB:
+    case wasm::R_WASM_MEMORY_ADDR_SLEB:
+    case wasm::R_WASM_MEMORY_ADDR_I32:
+    case wasm::R_WASM_FUNCTION_OFFSET_I32:
+    case wasm::R_WASM_SECTION_OFFSET_I32:
       encodeULEB128(Reloc.Addend, OS);
     }
   }

From fccea7f372cbd33376d2c776f34a0c6925982981 Mon Sep 17 00:00:00 2001
From: Dan Liew <dan@su-root.co.uk>
Date: Tue, 14 Apr 2020 21:49:50 -0700
Subject: [PATCH 115/216] [NFC] Introduce a `LateInitialize()` method to
 `SymbolizerTool` that is called during the LateInitialize stage of the
 sanitizer runtimes.

Summary:
This is implemented by adding a `Symbolizer::LateInitializeTools()`
method that iterates over the registered tools and calls the
`LateInitialize()` method on them.

`Symbolizer::LateInitializeTools()` is now called from the various
`Symbolizer::LateInitialize()` implementations.

The default implementation of `SymbolizerTool::LateInitialize()`
does nothing so this change should be NFC.

This change allows `SymbolizerTool` implementations to perform
any initialization that they need to perform at the
LateInitialize stage of a sanitizer runtime init.

rdar://problem/58789439

Reviewers: kubamracek, yln, vitalybuka, cryptoad, phosek, rnk

Subscribers: #sanitizers, llvm-commits

Tags: #sanitizers

Differential Revision: https://reviews.llvm.org/D78178
---
 compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.cpp   | 6 ++++++
 compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.h     | 3 +++
 .../lib/sanitizer_common/sanitizer_symbolizer_internal.h    | 5 +++++
 .../lib/sanitizer_common/sanitizer_symbolizer_markup.cpp    | 4 +++-
 .../sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp | 2 +-
 .../lib/sanitizer_common/sanitizer_symbolizer_win.cpp       | 2 +-
 6 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.cpp
index ce2ece5f4d51..0c4b84c767aa 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.cpp
@@ -126,4 +126,10 @@ Symbolizer::SymbolizerScope::~SymbolizerScope() {
     sym_->end_hook_();
 }
 
+void Symbolizer::LateInitializeTools() {
+  for (auto &tool : tools_) {
+    tool.LateInitialize();
+  }
+}
+
 }  // namespace __sanitizer
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.h b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.h
index 51648e2d0e8d..2476b0ea7bf7 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer.h
@@ -209,6 +209,9 @@ class Symbolizer final {
    private:
     const Symbolizer *sym_;
   };
+
+  // Calls `LateInitialize()` on all items in `tools_`.
+  void LateInitializeTools();
 };
 
 #ifdef SANITIZER_WINDOWS
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_internal.h b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_internal.h
index 063954330842..e4c351e667b4 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_internal.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_internal.h
@@ -69,6 +69,11 @@ class SymbolizerTool {
   virtual const char *Demangle(const char *name) {
     return nullptr;
   }
+
+  // Called during the LateInitialize phase of Sanitizer initialization.
+  // Usually this is a safe place to call code that might need to use user
+  // memory allocators.
+  virtual void LateInitialize() {}
 };
 
 // SymbolizerProcess encapsulates communication between the tool and
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup.cpp
index 57b4d0c9d961..2963af953609 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_markup.cpp
@@ -94,7 +94,9 @@ Symbolizer *Symbolizer::PlatformInit() {
   return new (symbolizer_allocator_) Symbolizer({});
 }
 
-void Symbolizer::LateInitialize() { Symbolizer::GetOrInit(); }
+void Symbolizer::LateInitialize() {
+  Symbolizer::GetOrInit()->LateInitializeTools();
+}
 
 void StartReportDeadlySignal() {}
 void ReportDeadlySignal(const SignalContext &sig, u32 tid,
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp
index f1dff2408e11..d7b931bc2379 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_posix_libcdep.cpp
@@ -488,7 +488,7 @@ Symbolizer *Symbolizer::PlatformInit() {
 }
 
 void Symbolizer::LateInitialize() {
-  Symbolizer::GetOrInit();
+  Symbolizer::GetOrInit()->LateInitializeTools();
   InitializeSwiftDemangler();
 }
 
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_win.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_win.cpp
index 2808779156ed..373437e7ee2a 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_win.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_symbolizer_win.cpp
@@ -310,7 +310,7 @@ Symbolizer *Symbolizer::PlatformInit() {
 }
 
 void Symbolizer::LateInitialize() {
-  Symbolizer::GetOrInit();
+  Symbolizer::GetOrInit()->LateInitializeTools();
 }
 
 }  // namespace __sanitizer

From 77e3a2e0fe02e8bdeb8276c08c95290d9e78feba Mon Sep 17 00:00:00 2001
From: Evgenii Stepanov <eugenis@google.com>
Date: Thu, 16 Apr 2020 13:12:54 -0700
Subject: [PATCH 116/216] [ubsan] Link shared runtime library with a version
 script.

Summary:
Do not reexport libgcc.a symbols and random sanitizer internal symbols
by applying a version script to the shared library build.

This fixes unwinder conflicts on Android that are created by reexporting
the unwinder interface from libgcc_real.a. The same is already done in
asan and hwasan.

Reviewers: vitalybuka, srhines

Subscribers: mgorny, #sanitizers, llvm-commits

Tags: #sanitizers

Differential Revision: https://reviews.llvm.org/D78325
---
 compiler-rt/lib/ubsan/CMakeLists.txt | 48 ++++++++++++++++++++--------
 1 file changed, 34 insertions(+), 14 deletions(-)

diff --git a/compiler-rt/lib/ubsan/CMakeLists.txt b/compiler-rt/lib/ubsan/CMakeLists.txt
index 378cbac6a31a..164f7aadcc0c 100644
--- a/compiler-rt/lib/ubsan/CMakeLists.txt
+++ b/compiler-rt/lib/ubsan/CMakeLists.txt
@@ -199,21 +199,41 @@ else()
       PARENT_TARGET ubsan)
 
     if (FUCHSIA OR UNIX)
-      add_compiler_rt_runtime(clang_rt.ubsan_standalone
-        SHARED
+      file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/dummy.cpp "")
+      add_compiler_rt_object_libraries(RTUbsan_dynamic_version_script_dummy
         ARCHS ${UBSAN_SUPPORTED_ARCH}
-        OBJECT_LIBS RTSanitizerCommon
-              RTSanitizerCommonLibc
-              RTSanitizerCommonCoverage
-              RTSanitizerCommonSymbolizer
-              RTUbsan
-              RTUbsan_cxx
-              RTUbsan_standalone
-              RTInterception
-        CFLAGS ${UBSAN_CFLAGS}
-        LINK_FLAGS ${SANITIZER_COMMON_LINK_FLAGS}
-        LINK_LIBS ${UBSAN_DYNAMIC_LIBS}
-        PARENT_TARGET ubsan)
+        SOURCES ${CMAKE_CURRENT_BINARY_DIR}/dummy.cpp
+        CFLAGS ${UBSAN_CFLAGS})
+
+      foreach(arch ${UBSAN_SUPPORTED_ARCH})
+        add_sanitizer_rt_version_list(clang_rt.ubsan_standalone-dynamic-${arch}
+                                      LIBS clang_rt.ubsan_standalone-${arch}
+                                           clang_rt.ubsan_standalone_cxx-${arch}
+                                      EXTRA ubsan.syms.extra)
+        set(VERSION_SCRIPT_FLAG
+            -Wl,--version-script,${CMAKE_CURRENT_BINARY_DIR}/clang_rt.ubsan_standalone-dynamic-${arch}.vers)
+        set_property(SOURCE
+          ${CMAKE_CURRENT_BINARY_DIR}/dummy.cpp
+          APPEND PROPERTY
+          OBJECT_DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/clang_rt.ubsan_standalone-dynamic-${arch}.vers)
+
+        add_compiler_rt_runtime(clang_rt.ubsan_standalone
+          SHARED
+          ARCHS ${arch}
+          OBJECT_LIBS RTSanitizerCommon
+                RTSanitizerCommonLibc
+                RTSanitizerCommonCoverage
+                RTSanitizerCommonSymbolizer
+                RTUbsan
+                RTUbsan_cxx
+                RTUbsan_standalone
+                RTInterception
+                RTUbsan_dynamic_version_script_dummy
+          CFLAGS ${UBSAN_CFLAGS}
+          LINK_FLAGS ${UBSAN_LINK_FLAGS} ${VERSION_SCRIPT_FLAG}
+          LINK_LIBS ${UBSAN_DYNAMIC_LIBS}
+          PARENT_TARGET ubsan)
+      endforeach()
 
       set(ARCHS_FOR_SYMBOLS ${UBSAN_SUPPORTED_ARCH})
       list(REMOVE_ITEM ARCHS_FOR_SYMBOLS i386)

From 741d3c20161ef5ebb68ede85e9f1a1150124952a Mon Sep 17 00:00:00 2001
From: Paula Toth <paulatoth@google.com>
Date: Thu, 16 Apr 2020 17:40:36 -0700
Subject: [PATCH 117/216] [libc] Add cmake target for linting libc.

Summary:
This patch implements running linting on llvm-libc using build rule targets.

1) adds a new target per entrypoint for linting with the naming convention `<qualified_target_name>.__lint__` e.g `libc.src.string.strlen.__lint__`.
2) makes the build target for each entrypoint depend on the linting targets so that they run along with compilation of each entrypoint.
3) adds a lint all target named `lint-libc`.  `check-libc` now depends on this new target.
4) linting creates a lot of additional targets from clang and clang-tidy that need to be built so an opt out flag can be passed to cmake: `LLVM_LIBC_ENABLE_LINTING`.

Reviewers: sivachandra, abrachet

Reviewed By: sivachandra

Subscribers: abrachet, mgorny, tschuett, libc-commits

Tags: #libc-project

Differential Revision: https://reviews.llvm.org/D77861
---
 libc/CMakeLists.txt                    | 22 +++++++++++++++
 libc/cmake/modules/LLVMLibCRules.cmake | 37 +++++++++++++++++++++++++-
 2 files changed, 58 insertions(+), 1 deletion(-)

diff --git a/libc/CMakeLists.txt b/libc/CMakeLists.txt
index d64665361513..2e3ba5fafc35 100644
--- a/libc/CMakeLists.txt
+++ b/libc/CMakeLists.txt
@@ -19,6 +19,28 @@ string(TOLOWER ${LIBC_TARGET_OS} LIBC_TARGET_OS)
 
 set(LIBC_TARGET_MACHINE ${CMAKE_SYSTEM_PROCESSOR})
 
+option(LLVM_LIBC_ENABLE_LINTING "Enables linting of libc source files" ON)
+if(LLVM_LIBC_ENABLE_LINTING)
+  if("clang-tools-extra" IN_LIST LLVM_ENABLE_PROJECTS
+             AND "clang" IN_LIST LLVM_ENABLE_PROJECTS)
+    add_custom_target(lint-libc)
+  else()
+    message(FATAL_ERROR "
+      'clang' and 'clang-tools-extra' are required in LLVM_LIBC_ENABLE_PROJECTS to
+      lint llvm-libc. The linting step performs important checks to help prevent
+      the introduction of subtle bugs, but it may increase build times.
+
+      To disable linting set LLVM_LIBC_ENABLE_LINTING to OFF
+      (pass -DLLVM_LIBC_ENABLE_LINTING=OFF to cmake).")
+  endif()
+else()
+  message(WARNING "
+    Linting for libc is currently disabled.
+
+    This is not recommended, to enable set LLVM_LIBC_ENABLE_LINTING to ON
+    (pass -DLLVM_LIBC_ENABLE_LINTING=ON to cmake).")
+endif()
+
 include(CMakeParseArguments)
 include(LLVMLibCRules)
 include(LLVMLibCCheckCpuFeatures)
diff --git a/libc/cmake/modules/LLVMLibCRules.cmake b/libc/cmake/modules/LLVMLibCRules.cmake
index d604da8e4c9c..74e534395fd5 100644
--- a/libc/cmake/modules/LLVMLibCRules.cmake
+++ b/libc/cmake/modules/LLVMLibCRules.cmake
@@ -347,6 +347,41 @@ function(add_entrypoint_object target_name)
       "OBJECT_FILES" "${all_objects}"
       "OBJECT_FILES_RAW" "${all_objects_raw}"
   )
+
+  if(LLVM_LIBC_ENABLE_LINTING)
+    set(lint_timestamp "${CMAKE_CURRENT_BINARY_DIR}/.${target_name}.__lint_timestamp__")
+
+    add_custom_command(
+      OUTPUT ${lint_timestamp}
+      # --quiet is used to surpress warning statistics from clang-tidy like:
+      #     Suppressed X warnings (X in non-user code).
+      # There seems to be a bug in clang-tidy where by even with --quiet some
+      # messages from clang's own diagnostics engine leak through:
+      #     X warnings generated.
+      # Until this is fixed upstream, we use -fno-caret-diagnostics to surpress
+      # these.
+      COMMAND $<TARGET_FILE:clang-tidy> "--extra-arg=-fno-caret-diagnostics" --quiet
+              # Path to directory containing compile_commands.json
+              -p ${PROJECT_BINARY_DIR}
+              ${ADD_ENTRYPOINT_OBJ_SRCS}
+      # We have two options for running commands, add_custom_command and
+      # add_custom_target. We don't want to run the linter unless source files
+      # have changed. add_custom_target explicitly runs everytime therefore we
+      # use add_custom_command. This function requires an output file and since
+      # linting doesn't produce a file, we create a dummy file using a
+      # crossplatform touch.
+      COMMAND "${CMAKE_COMMAND}" -E touch ${lint_timestamp}
+      COMMENT "Linting... ${target_name}"
+      DEPENDS ${clang-tidy} ${objects_target_name} ${ADD_ENTRYPOINT_OBJ_SRCS}
+      WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
+    )
+
+    add_custom_target(${fq_target_name}.__lint__
+      DEPENDS ${lint_timestamp})
+    add_dependencies(lint-libc ${fq_target_name}.__lint__)
+    add_dependencies(${fq_target_name} ${fq_target_name}.__lint__)
+  endif()
+
 endfunction(add_entrypoint_object)
 
 # A rule to build a library from a collection of entrypoint objects.
@@ -465,7 +500,7 @@ function(add_libc_unittest target_name)
   if(NOT LLVM_INCLUDE_TESTS)
     return()
   endif()
-  
+
   cmake_parse_arguments(
     "LIBC_UNITTEST"
     "" # No optional arguments

From 944cc5e0ab5c1323e6cd9c3a72e8de89bd8a67b0 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@gmail.com>
Date: Thu, 16 Apr 2020 17:03:16 -0700
Subject: [PATCH 118/216] [SelectionDAGBuilder][CGP][X86] Move some of SDB's
 gather/scatter uniform base handling to CGP.

I've always found the "findValue" a little odd and
inconsistent with other things in SDB.

This simplfifies the code in SDB to just handle a splat constant
address or a 2 operand GEP in the same BB. This removes the
need for "findValue" since the operands to the GEP are
guaranteed to be available. The splat constant handling is
new, but was needed to avoid regressions due to constant
folding combining GEPs created in CGP.

CGP is now responsible for canonicalizing gather/scatters into
this form. The pattern I'm using for scalarizing, a scalar GEP
followed by a GEP with an all zeroes index, seems to be subject
to constant folding that the insertelement+shufflevector was not.

Differential Revision: https://reviews.llvm.org/D76947
---
 llvm/lib/CodeGen/CodeGenPrepare.cpp           | 119 ++++++++++++++++++
 .../SelectionDAG/SelectionDAGBuilder.cpp      |  89 +++++--------
 .../SelectionDAG/SelectionDAGBuilder.h        |   1 -
 llvm/test/CodeGen/X86/masked_gather.ll        |   7 +-
 .../test/CodeGen/X86/masked_gather_scatter.ll |  20 ++-
 llvm/test/CodeGen/X86/pr45067.ll              |  10 +-
 .../CodeGenPrepare/X86/gather-scatter-opt.ll  |  88 +++++++++++++
 7 files changed, 261 insertions(+), 73 deletions(-)
 create mode 100644 llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt.ll

diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 887e37bd4a19..5eb772d12abf 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -368,6 +368,7 @@ class TypePromotionTransaction;
     bool optimizeInst(Instruction *I, bool &ModifiedDT);
     bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
                             Type *AccessTy, unsigned AddrSpace);
+    bool optimizeGatherScatterInst(Instruction *MemoryInst, Value *Ptr);
     bool optimizeInlineAsmInst(CallInst *CS);
     bool optimizeCallInst(CallInst *CI, bool &ModifiedDT);
     bool optimizeExt(Instruction *&I);
@@ -2041,7 +2042,12 @@ bool CodeGenPrepare::optimizeCallInst(CallInst *CI, bool &ModifiedDT) {
         II->eraseFromParent();
         return true;
       }
+      break;
     }
+    case Intrinsic::masked_gather:
+      return optimizeGatherScatterInst(II, II->getArgOperand(0));
+    case Intrinsic::masked_scatter:
+      return optimizeGatherScatterInst(II, II->getArgOperand(1));
     }
 
     SmallVector<Value *, 2> PtrOps;
@@ -5182,6 +5188,119 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
   return true;
 }
 
+/// Rewrite GEP input to gather/scatter to enable SelectionDAGBuilder to find
+/// a uniform base to use for ISD::MGATHER/MSCATTER. SelectionDAGBuilder can
+/// only handle a 2 operand GEP in the same basic block or a splat constant
+/// vector. The 2 operands to the GEP must have a scalar pointer and a vector
+/// index.
+///
+/// If the existing GEP has a vector base pointer that is splat, we can look
+/// through the splat to find the scalar pointer. If we can't find a scalar
+/// pointer there's nothing we can do.
+///
+/// If we have a GEP with more than 2 indices where the middle indices are all
+/// zeroes, we can replace it with 2 GEPs where the second has 2 operands.
+///
+/// If the final index isn't a vector or is a splat, we can emit a scalar GEP
+/// followed by a GEP with an all zeroes vector index. This will enable
+/// SelectionDAGBuilder to use a the scalar GEP as the uniform base and have a
+/// zero index.
+bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
+                                               Value *Ptr) {
+  const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+  if (!GEP || !GEP->hasIndices())
+    return false;
+
+  // If the GEP and the gather/scatter aren't in the same BB, don't optimize.
+  // FIXME: We should support this by sinking the GEP.
+  if (MemoryInst->getParent() != GEP->getParent())
+    return false;
+
+  SmallVector<Value *, 2> Ops(GEP->op_begin(), GEP->op_end());
+
+  bool RewriteGEP = false;
+
+  if (Ops[0]->getType()->isVectorTy()) {
+    Ops[0] = const_cast<Value *>(getSplatValue(Ops[0]));
+    if (!Ops[0])
+      return false;
+    RewriteGEP = true;
+  }
+
+  unsigned FinalIndex = Ops.size() - 1;
+
+  // Ensure all but the last index is 0.
+  // FIXME: This isn't strictly required. All that's required is that they are
+  // all scalars or splats.
+  for (unsigned i = 1; i < FinalIndex; ++i) {
+    auto *C = dyn_cast<Constant>(Ops[i]);
+    if (!C)
+      return false;
+    if (isa<VectorType>(C->getType()))
+      C = C->getSplatValue();
+    auto *CI = dyn_cast_or_null<ConstantInt>(C);
+    if (!CI || !CI->isZero())
+      return false;
+    // Scalarize the index if needed.
+    Ops[i] = CI;
+  }
+
+  // Try to scalarize the final index.
+  if (Ops[FinalIndex]->getType()->isVectorTy()) {
+    if (Value *V = const_cast<Value *>(getSplatValue(Ops[FinalIndex]))) {
+      auto *C = dyn_cast<ConstantInt>(V);
+      // Don't scalarize all zeros vector.
+      if (!C || !C->isZero()) {
+        Ops[FinalIndex] = V;
+        RewriteGEP = true;
+      }
+    }
+  }
+
+  // If we made any changes or the we have extra operands, we need to generate
+  // new instructions.
+  if (!RewriteGEP && Ops.size() == 2)
+    return false;
+
+  unsigned NumElts = Ptr->getType()->getVectorNumElements();
+
+  IRBuilder<> Builder(MemoryInst);
+
+  Type *ScalarIndexTy = DL->getIndexType(Ops[0]->getType()->getScalarType());
+
+  Value *NewAddr;
+
+  // If the final index isn't a vector, emit a scalar GEP containing all ops
+  // and a vector GEP with all zeroes final index.
+  if (!Ops[FinalIndex]->getType()->isVectorTy()) {
+    NewAddr = Builder.CreateGEP(Ops[0], makeArrayRef(Ops).drop_front());
+    Type *IndexTy = VectorType::get(ScalarIndexTy, NumElts);
+    NewAddr = Builder.CreateGEP(NewAddr, Constant::getNullValue(IndexTy));
+  } else {
+    Value *Base = Ops[0];
+    Value *Index = Ops[FinalIndex];
+
+    // Create a scalar GEP if there are more than 2 operands.
+    if (Ops.size() != 2) {
+      // Replace the last index with 0.
+      Ops[FinalIndex] = Constant::getNullValue(ScalarIndexTy);
+      Base = Builder.CreateGEP(Base, makeArrayRef(Ops).drop_front());
+    }
+
+    // Now create the GEP with scalar pointer and vector index.
+    NewAddr = Builder.CreateGEP(Base, Index);
+  }
+
+  MemoryInst->replaceUsesOfWith(Ptr, NewAddr);
+
+  // If we have no uses, recursively delete the value and all dead instructions
+  // using it.
+  if (Ptr->use_empty())
+    RecursivelyDeleteTriviallyDeadInstructions(Ptr, TLInfo);
+
+  return true;
+}
+
 /// If there are any memory operands, use OptimizeMemoryInst to sink their
 /// address computing into the block when possible / profitable.
 bool CodeGenPrepare::optimizeInlineAsmInst(CallInst *CS) {
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index a6af33e17c17..f8c7f784bf11 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -1435,12 +1435,6 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
   return Val;
 }
 
-// Return true if SDValue exists for the given Value
-bool SelectionDAGBuilder::findValue(const Value *V) const {
-  return (NodeMap.find(V) != NodeMap.end()) ||
-    (FuncInfo.ValueMap.find(V) != FuncInfo.ValueMap.end());
-}
-
 /// getNonRegisterValue - Return an SDValue for the given Value, but
 /// don't look in FuncInfo.ValueMap for a virtual register.
 SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) {
@@ -4254,70 +4248,49 @@ void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
 // In all other cases the function returns 'false'.
 static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index,
                            ISD::MemIndexType &IndexType, SDValue &Scale,
-                           SelectionDAGBuilder *SDB) {
+                           SelectionDAGBuilder *SDB, const BasicBlock *CurBB) {
   SelectionDAG& DAG = SDB->DAG;
-  LLVMContext &Context = *DAG.getContext();
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  const DataLayout &DL = DAG.getDataLayout();
 
   assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type");
-  const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
-  if (!GEP)
-    return false;
 
-  const Value *BasePtr = GEP->getPointerOperand();
-  if (BasePtr->getType()->isVectorTy()) {
-    BasePtr = getSplatValue(BasePtr);
-    if (!BasePtr)
+  // Handle splat constant pointer.
+  if (auto *C = dyn_cast<Constant>(Ptr)) {
+    C = C->getSplatValue();
+    if (!C)
       return false;
-  }
 
-  unsigned FinalIndex = GEP->getNumOperands() - 1;
-  Value *IndexVal = GEP->getOperand(FinalIndex);
-  gep_type_iterator GTI = gep_type_begin(*GEP);
+    Base = SDB->getValue(C);
 
-  // Ensure all the other indices are 0.
-  for (unsigned i = 1; i < FinalIndex; ++i, ++GTI) {
-    auto *C = dyn_cast<Constant>(GEP->getOperand(i));
-    if (!C)
-      return false;
-    if (isa<VectorType>(C->getType()))
-      C = C->getSplatValue();
-    auto *CI = dyn_cast_or_null<ConstantInt>(C);
-    if (!CI || !CI->isZero())
-      return false;
+    unsigned NumElts = Ptr->getType()->getVectorNumElements();
+    EVT VT = EVT::getVectorVT(*DAG.getContext(), TLI.getPointerTy(DL), NumElts);
+    Index = DAG.getConstant(0, SDB->getCurSDLoc(), VT);
+    IndexType = ISD::SIGNED_SCALED;
+    Scale = DAG.getTargetConstant(1, SDB->getCurSDLoc(), TLI.getPointerTy(DL));
+    return true;
   }
 
-  // The operands of the GEP may be defined in another basic block.
-  // In this case we'll not find nodes for the operands.
-  if (!SDB->findValue(BasePtr))
+  const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr);
+  if (!GEP || GEP->getParent() != CurBB)
     return false;
-  Constant *C = dyn_cast<Constant>(IndexVal);
-  if (!C && !SDB->findValue(IndexVal))
+
+  if (GEP->getNumOperands() != 2)
     return false;
 
-  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
-  const DataLayout &DL = DAG.getDataLayout();
-  StructType *STy = GTI.getStructTypeOrNull();
+  const Value *BasePtr = GEP->getPointerOperand();
+  const Value *IndexVal = GEP->getOperand(GEP->getNumOperands() - 1);
+
+  // Make sure the base is scalar and the index is a vector.
+  if (BasePtr->getType()->isVectorTy() || !IndexVal->getType()->isVectorTy())
+    return false;
 
-  if (STy) {
-    const StructLayout *SL = DL.getStructLayout(STy);
-    unsigned Field = cast<Constant>(IndexVal)->getUniqueInteger().getZExtValue();
-    Scale = DAG.getTargetConstant(1, SDB->getCurSDLoc(), TLI.getPointerTy(DL));
-    Index = DAG.getConstant(SL->getElementOffset(Field),
-                            SDB->getCurSDLoc(), TLI.getPointerTy(DL));
-  } else {
-    Scale = DAG.getTargetConstant(
-                DL.getTypeAllocSize(GEP->getResultElementType()),
-                SDB->getCurSDLoc(), TLI.getPointerTy(DL));
-    Index = SDB->getValue(IndexVal);
-  }
   Base = SDB->getValue(BasePtr);
+  Index = SDB->getValue(IndexVal);
   IndexType = ISD::SIGNED_SCALED;
-
-  if (STy || !Index.getValueType().isVector()) {
-    unsigned GEPWidth = cast<VectorType>(GEP->getType())->getNumElements();
-    EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth);
-    Index = DAG.getSplatBuildVector(VT, SDLoc(Index), Index);
-  }
+  Scale = DAG.getTargetConstant(
+              DL.getTypeAllocSize(GEP->getResultElementType()),
+              SDB->getCurSDLoc(), TLI.getPointerTy(DL));
   return true;
 }
 
@@ -4341,7 +4314,8 @@ void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
   SDValue Index;
   ISD::MemIndexType IndexType;
   SDValue Scale;
-  bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this);
+  bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this,
+                                    I.getParent());
 
   unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace();
   MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
@@ -4452,7 +4426,8 @@ void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
   SDValue Index;
   ISD::MemIndexType IndexType;
   SDValue Scale;
-  bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this);
+  bool UniformBase = getUniformBase(Ptr, Base, Index, IndexType, Scale, this,
+                                    I.getParent());
   unsigned AS = Ptr->getType()->getScalarType()->getPointerAddressSpace();
   MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand(
       MachinePointerInfo(AS), MachineMemOperand::MOLoad,
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
index 3c5436a3634e..acfa1ff9e408 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h
@@ -518,7 +518,6 @@ class SelectionDAGBuilder {
   void resolveOrClearDbgInfo();
 
   SDValue getValue(const Value *V);
-  bool findValue(const Value *V) const;
 
   /// Return the SDNode for the specified IR value if it exists.
   SDNode *getNodeForIRValue(const Value *V) {
diff --git a/llvm/test/CodeGen/X86/masked_gather.ll b/llvm/test/CodeGen/X86/masked_gather.ll
index 44dcf90c07b5..eb214368056c 100644
--- a/llvm/test/CodeGen/X86/masked_gather.ll
+++ b/llvm/test/CodeGen/X86/masked_gather.ll
@@ -1721,11 +1721,10 @@ define <8 x i32> @gather_v8i32_v8i32(<8 x i32> %trigger) {
 ; AVX512-NEXT:    vptestnmd %zmm0, %zmm0, %k0
 ; AVX512-NEXT:    kshiftlw $8, %k0, %k0
 ; AVX512-NEXT:    kshiftrw $8, %k0, %k1
-; AVX512-NEXT:    vpbroadcastd {{.*#+}} zmm0 = [3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
+; AVX512-NEXT:    vpxor %xmm0, %xmm0, %xmm0
 ; AVX512-NEXT:    kmovw %k1, %k2
-; AVX512-NEXT:    vpgatherdd c(,%zmm0,4), %zmm1 {%k2}
-; AVX512-NEXT:    vpbroadcastd {{.*#+}} zmm0 = [28,28,28,28,28,28,28,28,28,28,28,28,28,28,28,28]
-; AVX512-NEXT:    vpgatherdd c(,%zmm0), %zmm2 {%k1}
+; AVX512-NEXT:    vpgatherdd c+12(,%zmm0), %zmm1 {%k2}
+; AVX512-NEXT:    vpgatherdd c+28(,%zmm0), %zmm2 {%k1}
 ; AVX512-NEXT:    vpaddd %ymm2, %ymm2, %ymm0
 ; AVX512-NEXT:    vpaddd %ymm0, %ymm1, %ymm0
 ; AVX512-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
index 12309044e395..df3af4c24659 100644
--- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll
+++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll
@@ -638,30 +638,38 @@ entry:
 define <16 x float> @test11(float* %base, i32 %ind) {
 ; KNL_64-LABEL: test11:
 ; KNL_64:       # %bb.0:
-; KNL_64-NEXT:    vpbroadcastd %esi, %zmm1
+; KNL_64-NEXT:    movslq %esi, %rax
+; KNL_64-NEXT:    leaq (%rdi,%rax,4), %rax
+; KNL_64-NEXT:    vxorps %xmm1, %xmm1, %xmm1
 ; KNL_64-NEXT:    kxnorw %k0, %k0, %k1
-; KNL_64-NEXT:    vgatherdps (%rdi,%zmm1,4), %zmm0 {%k1}
+; KNL_64-NEXT:    vgatherdps (%rax,%zmm1,4), %zmm0 {%k1}
 ; KNL_64-NEXT:    retq
 ;
 ; KNL_32-LABEL: test11:
 ; KNL_32:       # %bb.0:
 ; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; KNL_32-NEXT:    vbroadcastss {{[0-9]+}}(%esp), %zmm1
+; KNL_32-NEXT:    shll $2, %eax
+; KNL_32-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; KNL_32-NEXT:    vxorps %xmm1, %xmm1, %xmm1
 ; KNL_32-NEXT:    kxnorw %k0, %k0, %k1
 ; KNL_32-NEXT:    vgatherdps (%eax,%zmm1,4), %zmm0 {%k1}
 ; KNL_32-NEXT:    retl
 ;
 ; SKX-LABEL: test11:
 ; SKX:       # %bb.0:
-; SKX-NEXT:    vpbroadcastd %esi, %zmm1
+; SKX-NEXT:    movslq %esi, %rax
+; SKX-NEXT:    leaq (%rdi,%rax,4), %rax
+; SKX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
 ; SKX-NEXT:    kxnorw %k0, %k0, %k1
-; SKX-NEXT:    vgatherdps (%rdi,%zmm1,4), %zmm0 {%k1}
+; SKX-NEXT:    vgatherdps (%rax,%zmm1,4), %zmm0 {%k1}
 ; SKX-NEXT:    retq
 ;
 ; SKX_32-LABEL: test11:
 ; SKX_32:       # %bb.0:
 ; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; SKX_32-NEXT:    vbroadcastss {{[0-9]+}}(%esp), %zmm1
+; SKX_32-NEXT:    shll $2, %eax
+; SKX_32-NEXT:    addl {{[0-9]+}}(%esp), %eax
+; SKX_32-NEXT:    vxorps %xmm1, %xmm1, %xmm1
 ; SKX_32-NEXT:    kxnorw %k0, %k0, %k1
 ; SKX_32-NEXT:    vgatherdps (%eax,%zmm1,4), %zmm0 {%k1}
 ; SKX_32-NEXT:    retl
diff --git a/llvm/test/CodeGen/X86/pr45067.ll b/llvm/test/CodeGen/X86/pr45067.ll
index fc1baa3d56dd..1cdd53f8db35 100644
--- a/llvm/test/CodeGen/X86/pr45067.ll
+++ b/llvm/test/CodeGen/X86/pr45067.ll
@@ -6,13 +6,13 @@
 define void @foo(<8 x i32>* %x, <8 x i1> %y) {
 ; CHECK-LABEL: foo:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1
-; CHECK-NEXT:    vpbroadcastq _global@{{.*}}(%rip), %ymm2
-; CHECK-NEXT:    vpgatherqd %xmm1, (,%ymm2), %xmm3
+; CHECK-NEXT:    vpcmpeqd %ymm1, %ymm1, %ymm1
+; CHECK-NEXT:    vpxor %xmm2, %xmm2, %xmm2
+; CHECK-NEXT:    movq _global@{{.*}}(%rip), %rax
+; CHECK-NEXT:    vpgatherdd %ymm1, (%rax,%ymm2), %ymm3
 ; CHECK-NEXT:    vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
 ; CHECK-NEXT:    vpslld $31, %ymm0, %ymm0
-; CHECK-NEXT:    vinserti128 $1, %xmm3, %ymm3, %ymm1
-; CHECK-NEXT:    vpmaskmovd %ymm1, %ymm0, (%rdi)
+; CHECK-NEXT:    vpmaskmovd %ymm3, %ymm0, (%rdi)
 ; CHECK-NEXT:    ud2
   %tmp = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> <i32* @global, i32* @global, i32* @global, i32* @global, i32* @global, i32* @global, i32* @global, i32* @global>, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
   call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %tmp, <8 x i32>* %x, i32 4, <8 x i1> %y)
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt.ll b/llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt.ll
new file mode 100644
index 000000000000..2cb749dc939c
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/gather-scatter-opt.ll
@@ -0,0 +1,88 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -codegenprepare < %s | FileCheck %s
+
+target datalayout =
+"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.a = type { i32, i32 }
+@c = external dso_local global %struct.a, align 4
+@glob_array = internal unnamed_addr constant [16 x i32] [i32 1, i32 1, i32 2, i32 3, i32 5, i32 8, i32 13, i32 21, i32 34, i32 55, i32 89, i32 144, i32 233, i32 377, i32 610, i32 987], align 16
+
+define <4 x i32> @splat_base(i32* %base, <4 x i64> %index) {
+; CHECK-LABEL: @splat_base(
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, i32* [[BASE:%.*]], <4 x i64> [[INDEX:%.*]]
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP1]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+; CHECK-NEXT:    ret <4 x i32> [[RES]]
+;
+  %broadcast.splatinsert = insertelement <4 x i32*> undef, i32* %base, i32 0
+  %broadcast.splat = shufflevector <4 x i32*> %broadcast.splatinsert, <4 x i32*> undef, <4 x i32> zeroinitializer
+  %gep = getelementptr i32, <4 x i32*> %broadcast.splat, <4 x i64> %index
+  %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @splat_struct(%struct.a* %base) {
+; CHECK-LABEL: @splat_struct(
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr [[STRUCT_A:%.*]], %struct.a* [[BASE:%.*]], i64 0, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], <4 x i64> zeroinitializer
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+; CHECK-NEXT:    ret <4 x i32> [[RES]]
+;
+  %gep = getelementptr %struct.a, %struct.a* %base, <4 x i64> zeroinitializer, i32 1
+  %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @scalar_index(i32* %base, i64 %index) {
+; CHECK-LABEL: @scalar_index(
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i64 [[INDEX:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], <4 x i64> zeroinitializer
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+; CHECK-NEXT:    ret <4 x i32> [[RES]]
+;
+  %broadcast.splatinsert = insertelement <4 x i32*> undef, i32* %base, i32 0
+  %broadcast.splat = shufflevector <4 x i32*> %broadcast.splatinsert, <4 x i32*> undef, <4 x i32> zeroinitializer
+  %gep = getelementptr i32, <4 x i32*> %broadcast.splat, i64 %index
+  %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @splat_index(i32* %base, i64 %index) {
+; CHECK-LABEL: @splat_index(
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, i32* [[BASE:%.*]], i64 [[INDEX:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], <4 x i64> zeroinitializer
+; CHECK-NEXT:    [[RES:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+; CHECK-NEXT:    ret <4 x i32> [[RES]]
+;
+  %broadcast.splatinsert = insertelement <4 x i64> undef, i64 %index, i32 0
+  %broadcast.splat = shufflevector <4 x i64> %broadcast.splatinsert, <4 x i64> undef, <4 x i32> zeroinitializer
+  %gep = getelementptr i32, i32* %base, <4 x i64> %broadcast.splat
+  %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %gep, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+  ret <4 x i32> %res
+}
+
+define <4 x i32> @test_global_array(<4 x i64> %indxs) {
+; CHECK-LABEL: @test_global_array(
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, i32* getelementptr inbounds ([16 x i32], [16 x i32]* @glob_array, i64 0, i64 0), <4 x i64> [[INDXS:%.*]]
+; CHECK-NEXT:    [[G:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP1]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+; CHECK-NEXT:    ret <4 x i32> [[G]]
+;
+  %p = getelementptr inbounds [16 x i32], [16 x i32]* @glob_array, i64 0, <4 x i64> %indxs
+  %g = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %p, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+  ret <4 x i32> %g
+}
+
+define <4 x i32> @global_struct_splat() {
+; CHECK-LABEL: @global_struct_splat(
+; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> <i32* getelementptr inbounds (%struct.a, %struct.a* @c, i64 0, i32 1), i32* getelementptr inbounds (%struct.a, %struct.a* @c, i64 0, i32 1), i32* getelementptr inbounds (%struct.a, %struct.a* @c, i64 0, i32 1), i32* getelementptr inbounds (%struct.a, %struct.a* @c, i64 0, i32 1)>, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
+;
+  %1 = insertelement <4 x %struct.a*> undef, %struct.a* @c, i32 0
+  %2 = shufflevector <4 x %struct.a*> %1, <4 x %struct.a*> undef, <4 x i32> zeroinitializer
+  %3 = getelementptr %struct.a, <4 x %struct.a*> %2, <4 x i64> zeroinitializer, i32 1
+  %4 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %3, i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+  ret <4 x i32> %4
+}
+
+declare <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)

From 9eaf0abebff9c61fa01c6ca69cbc74b1464efe14 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Thu, 16 Apr 2020 17:50:56 -0700
Subject: [PATCH 119/216] Revert "[lldb/Utility] Provide a stringify_append
 overload for function pointers."

Temporarily reverts commit d10386e1779599d217b5b849a079f29dfbe17024
because it breaks the Windows build. MSVC complains about an ambiguous
call to an overloaded function.
---
 lldb/include/lldb/Utility/ReproducerInstrumentation.h | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/lldb/include/lldb/Utility/ReproducerInstrumentation.h b/lldb/include/lldb/Utility/ReproducerInstrumentation.h
index 75f38929e362..3b5dde3d2e2a 100644
--- a/lldb/include/lldb/Utility/ReproducerInstrumentation.h
+++ b/lldb/include/lldb/Utility/ReproducerInstrumentation.h
@@ -34,17 +34,12 @@ inline void stringify_append(llvm::raw_string_ostream &ss, const T &t) {
 
 template <typename T>
 inline void stringify_append(llvm::raw_string_ostream &ss, T *t) {
-  ss << static_cast<void *>(t);
+  ss << reinterpret_cast<void *>(t);
 }
 
 template <typename T>
 inline void stringify_append(llvm::raw_string_ostream &ss, const T *t) {
-  ss << static_cast<const void *>(t);
-}
-
-template <typename T, typename... Args>
-inline void stringify_append(llvm::raw_string_ostream &ss, T (*t)(Args...)) {
-  ss << "function pointer";
+  ss << reinterpret_cast<const void *>(t);
 }
 
 template <>

From 953a814aae4997e50e73e8d3e6eb699d6b732bbc Mon Sep 17 00:00:00 2001
From: Chris Lattner <clattner@nondot.org>
Date: Thu, 16 Apr 2020 16:27:31 -0700
Subject: [PATCH 120/216] Remove the llvm/Support/StringPool.h file and related
 support now that it has no clients.  A plain old StringSet<> is a better
 replacement.

Subscribers: mgorny, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D78336
---
 llvm/include/llvm/Support/StringPool.h | 142 -------------------------
 llvm/lib/Support/CMakeLists.txt        |   1 -
 llvm/lib/Support/StringPool.cpp        |  33 ------
 llvm/unittests/Support/CMakeLists.txt  |   1 -
 llvm/unittests/Support/StringPool.cpp  |  30 ------
 5 files changed, 207 deletions(-)
 delete mode 100644 llvm/include/llvm/Support/StringPool.h
 delete mode 100644 llvm/lib/Support/StringPool.cpp
 delete mode 100644 llvm/unittests/Support/StringPool.cpp

diff --git a/llvm/include/llvm/Support/StringPool.h b/llvm/include/llvm/Support/StringPool.h
deleted file mode 100644
index aecfbee915c7..000000000000
--- a/llvm/include/llvm/Support/StringPool.h
+++ /dev/null
@@ -1,142 +0,0 @@
-//===- StringPool.h - Intern'd string pool ----------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file declares an interned string pool with separately malloc and
-// reference counted entries.  This can reduce the cost of strings by using the
-// same storage for identical strings.
-//
-// To intern a string:
-//
-//   StringPool Pool;
-//   PooledStringPtr Str = Pool.intern("wakka wakka");
-//
-// To use the value of an interned string, use operator bool and operator*:
-//
-//   if (Str)
-//     cerr << "the string is" << *Str << "\n";
-//
-// Pooled strings are immutable, but you can change a PooledStringPtr to point
-// to another instance. So that interned strings can eventually be freed,
-// strings in the string pool are reference-counted (automatically).
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_SUPPORT_STRINGPOOL_H
-#define LLVM_SUPPORT_STRINGPOOL_H
-
-#include "llvm/ADT/StringMap.h"
-
-namespace llvm {
-
-class PooledStringPtr;
-
-/// StringPool - An interned string pool. Use the intern method to add a
-/// string. Strings are removed automatically as PooledStringPtrs are
-/// destroyed.
-class StringPool {
-  /// PooledString - This is the value of an entry in the pool's interning
-  /// table.
-  struct PooledString {
-    StringPool *pool = nullptr; ///< So the string can remove itself.
-    unsigned refcount = 0;      ///< Number of referencing PooledStringPtrs.
-
-  public:
-    PooledString() = default;
-  };
-
-  friend class PooledStringPtr;
-  using Entry = StringMapEntry<PooledString>;
-  StringMap<PooledString> internTable;
-
-public:
-  StringPool();
-  ~StringPool();
-
-  /// intern - Adds a string to the pool and returns a reference-counted
-  /// pointer to it. No additional memory is allocated if the string already
-  /// exists in the pool.
-  PooledStringPtr intern(StringRef string);
-
-  /// empty - Checks whether the pool is empty. Returns true if so.
-  bool empty() const { return internTable.empty(); }
-};
-
-/// PooledStringPtr - A pointer to an interned string. Use operator bool to
-/// test whether the pointer is valid, and operator * to get the string if so.
-/// This is a lightweight value class with storage requirements equivalent to
-/// a single pointer, but it does have reference-counting overhead when
-/// copied.
-class PooledStringPtr {
-  using Entry = StringPool::Entry;
-  Entry *entry = nullptr;
-
-public:
-  PooledStringPtr() = default;
-
-  explicit PooledStringPtr(Entry *e) : entry(e) {
-    if (entry)
-      ++entry->getValue().refcount;
-  }
-
-  PooledStringPtr(const PooledStringPtr &that) : entry(that.entry) {
-    if (entry)
-      ++entry->getValue().refcount;
-  }
-
-  PooledStringPtr &operator=(const PooledStringPtr &that) {
-    if (entry != that.entry) {
-      clear();
-      entry = that.entry;
-      if (entry)
-        ++entry->getValue().refcount;
-    }
-    return *this;
-  }
-
-  void clear() {
-    if (!entry)
-      return;
-    if (--entry->getValue().refcount == 0) {
-      entry->getValue().pool->internTable.remove(entry);
-      MallocAllocator allocator;
-      entry->Destroy(allocator);
-    }
-    entry = nullptr;
-  }
-
-  ~PooledStringPtr() { clear(); }
-
-  const char *begin() const {
-    assert(*this && "Attempt to dereference empty PooledStringPtr!");
-    return entry->getKeyData();
-  }
-
-  const char *end() const {
-    assert(*this && "Attempt to dereference empty PooledStringPtr!");
-    return entry->getKeyData() + entry->getKeyLength();
-  }
-
-  unsigned size() const {
-    assert(*this && "Attempt to dereference empty PooledStringPtr!");
-    return entry->getKeyLength();
-  }
-
-  const char *operator*() const { return begin(); }
-  explicit operator bool() const { return entry != nullptr; }
-
-  bool operator==(const PooledStringPtr &that) const {
-    return entry == that.entry;
-  }
-  bool operator!=(const PooledStringPtr &that) const {
-    return entry != that.entry;
-  }
-};
-
-} // end namespace llvm
-
-#endif // LLVM_SUPPORT_STRINGPOOL_H
diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt
index b2071bee4d2d..185aff780bd8 100644
--- a/llvm/lib/Support/CMakeLists.txt
+++ b/llvm/lib/Support/CMakeLists.txt
@@ -138,7 +138,6 @@ add_llvm_component_library(LLVMSupport
   Statistic.cpp
   StringExtras.cpp
   StringMap.cpp
-  StringPool.cpp
   StringSaver.cpp
   StringRef.cpp
   SymbolRemappingReader.cpp
diff --git a/llvm/lib/Support/StringPool.cpp b/llvm/lib/Support/StringPool.cpp
deleted file mode 100644
index 7d345df14cad..000000000000
--- a/llvm/lib/Support/StringPool.cpp
+++ /dev/null
@@ -1,33 +0,0 @@
-//===-- StringPool.cpp - Intern'd string pool -----------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements the StringPool class.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Support/StringPool.h"
-using namespace llvm;
-
-StringPool::StringPool() {}
-
-StringPool::~StringPool() {
-  assert(internTable.empty() && "PooledStringPtr leaked!");
-}
-
-PooledStringPtr StringPool::intern(StringRef key) {
-  auto it = internTable.find(key);
-  if (it != internTable.end())
-    return PooledStringPtr(&*it);
-
-  MallocAllocator allocator;
-  auto *entry = Entry::Create(key, allocator);
-  entry->getValue().pool = this;
-  internTable.insert(entry);
-
-  return PooledStringPtr(entry);
-}
diff --git a/llvm/unittests/Support/CMakeLists.txt b/llvm/unittests/Support/CMakeLists.txt
index b9eeba165c96..75ca0e74f193 100644
--- a/llvm/unittests/Support/CMakeLists.txt
+++ b/llvm/unittests/Support/CMakeLists.txt
@@ -66,7 +66,6 @@ add_llvm_unittest(SupportTests
   ScaledNumberTest.cpp
   SourceMgrTest.cpp
   SpecialCaseListTest.cpp
-  StringPool.cpp
   SwapByteOrderTest.cpp
   SymbolRemappingReaderTest.cpp
   TarWriterTest.cpp
diff --git a/llvm/unittests/Support/StringPool.cpp b/llvm/unittests/Support/StringPool.cpp
deleted file mode 100644
index 0a540e95acfd..000000000000
--- a/llvm/unittests/Support/StringPool.cpp
+++ /dev/null
@@ -1,30 +0,0 @@
-//===- llvm/unittest/Support/StringPoiil.cpp - StringPool tests -----------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Support/StringPool.h"
-#include "gtest/gtest.h"
-
-using namespace llvm;
-
-namespace {
-
-TEST(PooledStringPtrTest, OperatorEquals) {
-  StringPool pool;
-  const PooledStringPtr a = pool.intern("a");
-  const PooledStringPtr b = pool.intern("b");
-  EXPECT_FALSE(a == b);
-}
-
-TEST(PooledStringPtrTest, OperatorNotEquals) {
-  StringPool pool;
-  const PooledStringPtr a = pool.intern("a");
-  const PooledStringPtr b = pool.intern("b");
-  EXPECT_TRUE(a != b);
-}
-
-}

From 634f173830ee326d973ef3bf0837765bd4ea9148 Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot@gmail.com>
Date: Fri, 17 Apr 2020 00:58:15 +0000
Subject: [PATCH 121/216] [gn build] Port 953a814aae4

---
 llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn       | 1 -
 llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn | 1 -
 2 files changed, 2 deletions(-)

diff --git a/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn
index 83440e6676d4..cbf18d48e2e1 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Support/BUILD.gn
@@ -117,7 +117,6 @@ static_library("Support") {
     "Statistic.cpp",
     "StringExtras.cpp",
     "StringMap.cpp",
-    "StringPool.cpp",
     "StringRef.cpp",
     "StringSaver.cpp",
     "SymbolRemappingReader.cpp",
diff --git a/llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn
index 435fb74b501d..2e439ceb1b35 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/Support/BUILD.gn
@@ -69,7 +69,6 @@ unittest("SupportTests") {
     "ScaledNumberTest.cpp",
     "SourceMgrTest.cpp",
     "SpecialCaseListTest.cpp",
-    "StringPool.cpp",
     "SwapByteOrderTest.cpp",
     "SymbolRemappingReaderTest.cpp",
     "TarWriterTest.cpp",

From cbf99e0fbab5248d949970f104ff4c14c3100b22 Mon Sep 17 00:00:00 2001
From: Wouter van Oortmerssen <aardappel@gmail.com>
Date: Thu, 16 Apr 2020 18:19:29 -0700
Subject: [PATCH 122/216] [WebAssembly] Fix faulty logic in verifyRelocTargets

---
 lld/wasm/InputChunks.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lld/wasm/InputChunks.cpp b/lld/wasm/InputChunks.cpp
index 077f2deac72e..81ca83d40e39 100644
--- a/lld/wasm/InputChunks.cpp
+++ b/lld/wasm/InputChunks.cpp
@@ -78,7 +78,7 @@ void InputChunk::verifyRelocTargets() const {
     if (bytesRead && bytesRead != 5)
       warn("expected LEB at relocation site be 5-byte padded");
 
-    if (rel.Type != R_WASM_GLOBAL_INDEX_LEB ||
+    if (rel.Type != R_WASM_GLOBAL_INDEX_LEB &&
         rel.Type != R_WASM_GLOBAL_INDEX_I32) {
       uint32_t expectedValue = file->calcExpectedValue(rel);
       if (expectedValue != existingValue)

From cc0ec3fdb9d1dccfc6bf6d511163cb595c8760eb Mon Sep 17 00:00:00 2001
From: Lang Hames <lhames@gmail.com>
Date: Thu, 16 Apr 2020 18:21:41 -0700
Subject: [PATCH 123/216] [llvm-objcopy][MachO] Fix MachO::relocation_info use
 after 386f1c114d5.

Use shift/mask operations to access r_symbolnum rather than relying on
MachO::relocation_info. This should fix the big-endian bot failures that were
caused by 386f1c114d5.
---
 llvm/tools/llvm-objcopy/MachO/MachOReader.cpp |  7 +++----
 llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp |  8 +++-----
 llvm/tools/llvm-objcopy/MachO/Object.h        | 19 +++++++++++++++++++
 3 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp b/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp
index dcc3a17175a5..e2eeee4ab5f4 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp
+++ b/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp
@@ -204,10 +204,9 @@ void MachOReader::setSymbolInRelocationInfo(Object &O) const {
   for (LoadCommand &LC : O.LoadCommands)
     for (std::unique_ptr<Section> &Sec : LC.Sections)
       for (auto &Reloc : Sec->Relocations)
-        if (!Reloc.Scattered) {
-          auto *Info = reinterpret_cast<MachO::relocation_info *>(&Reloc.Info);
-          Reloc.Symbol = O.SymTable.getSymbolByIndex(Info->r_symbolnum);
-        }
+        if (!Reloc.Scattered)
+          Reloc.Symbol = O.SymTable.getSymbolByIndex(
+              Reloc.getPlainRelocationSymbolNum(O.isLittleEndian()));
 }
 
 void MachOReader::readRebaseInfo(Object &O) const {
diff --git a/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp b/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp
index 29525c61cd09..a08d54d59723 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp
+++ b/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp
@@ -240,11 +240,9 @@ void MachOWriter::writeSections() {
              Sec->Content.size());
       for (size_t Index = 0; Index < Sec->Relocations.size(); ++Index) {
         auto RelocInfo = Sec->Relocations[Index];
-        if (!RelocInfo.Scattered) {
-          auto *Info =
-              reinterpret_cast<MachO::relocation_info *>(&RelocInfo.Info);
-          Info->r_symbolnum = RelocInfo.Symbol->Index;
-        }
+        if (!RelocInfo.Scattered)
+          RelocInfo.setPlainRelocationSymbolNum(RelocInfo.Symbol->Index,
+                                                O.isLittleEndian());
 
         if (IsLittleEndian != sys::IsLittleEndianHost)
           MachO::swapStruct(
diff --git a/llvm/tools/llvm-objcopy/MachO/Object.h b/llvm/tools/llvm-objcopy/MachO/Object.h
index a5cb7b1d0a54..c834105a00aa 100644
--- a/llvm/tools/llvm-objcopy/MachO/Object.h
+++ b/llvm/tools/llvm-objcopy/MachO/Object.h
@@ -161,6 +161,20 @@ struct RelocationInfo {
   // True if Info is a scattered_relocation_info.
   bool Scattered;
   MachO::any_relocation_info Info;
+
+  unsigned getPlainRelocationSymbolNum(bool IsLittleEndian) {
+    if (IsLittleEndian)
+      return Info.r_word1 & 0xffffff;
+    return Info.r_word1 >> 8;
+  }
+
+  void setPlainRelocationSymbolNum(unsigned SymbolNum, bool IsLittleEndian) {
+    assert(SymbolNum < (1 << 24) && "SymbolNum out of range");
+    if (IsLittleEndian)
+      Info.r_word1 = (Info.r_word1 & ~0x00ffffff) | SymbolNum;
+    else
+      Info.r_word1 = (Info.r_word1 & ~0xffffff00) | (SymbolNum << 8);
+  }
 };
 
 /// The location of the rebase info inside the binary is described by
@@ -300,6 +314,11 @@ struct Object {
   /// is not too long (SegName.size() should be less than or equal to 16).
   LoadCommand &addSegment(StringRef SegName);
 
+  bool isLittleEndian() const {
+    StringRef Magic(reinterpret_cast<const char *>(&Header.Magic), 4);
+    return Magic == "\xCE\xFA\xED\xFE" || Magic == "\xCF\xFA\xED\xFE";
+  }
+
   bool is64Bit() const {
     return Header.Magic == MachO::MH_MAGIC_64 ||
            Header.Magic == MachO::MH_CIGAM_64;

From 186709c6e0bd1025fb578e43911325530cb97f09 Mon Sep 17 00:00:00 2001
From: aartbik <ajcbik@google.com>
Date: Thu, 16 Apr 2020 16:01:42 -0700
Subject: [PATCH 124/216] [mlir] [VectorOps] Progressive lowering of
 vector.broadcast

Summary:
Rather than having a full, recursive, lowering of vector.broadcast
to LLVM IR, it is much more elegant to have a progressive lowering
of each vector.broadcast into a lower dimensional vector.broadcast,
until only elementary vector operations remain. This results
in more elegant, step-wise code, that is easier to understand.
Also makes some optimizations in the generated code.

Reviewers: nicolasvasilache, mehdi_amini, andydavis1, grosul1

Reviewed By: nicolasvasilache

Subscribers: mehdi_amini, rriddle, jpienaar, burmako, shauheen, antiagainst, nicolasvasilache, arpith-jacob, mgester, lucyrfox, liufengdb, Joonsoo, grosul1, frgossen, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D78071
---
 mlir/include/mlir/Dialect/Vector/VectorOps.h  |   1 +
 .../VectorToLLVM/ConvertVectorToLLVM.cpp      | 152 +--------
 mlir/lib/Dialect/Vector/VectorTransforms.cpp  | 113 ++++++-
 .../VectorToLLVM/vector-to-llvm.mlir          | 304 +++++++++---------
 .../Vector/vector-contract-transforms.mlir    | 178 +++++++++-
 5 files changed, 437 insertions(+), 311 deletions(-)

diff --git a/mlir/include/mlir/Dialect/Vector/VectorOps.h b/mlir/include/mlir/Dialect/Vector/VectorOps.h
index 2a8835102d59..c0785b9a2f9f 100644
--- a/mlir/include/mlir/Dialect/Vector/VectorOps.h
+++ b/mlir/include/mlir/Dialect/Vector/VectorOps.h
@@ -55,6 +55,7 @@ void populateVectorSlicesLoweringPatterns(OwningRewritePatternList &patterns,
 ///   ContractionOpLowering,
 ///   ShapeCastOp2DDownCastRewritePattern,
 ///   ShapeCastOp2DUpCastRewritePattern
+///   BroadcastOpLowering,
 ///   TransposeOpLowering
 ///   OuterproductOpLowering
 /// These transformation express higher level vector ops in terms of more
diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
index b7c4a57a78ba..003e06a87299 100644
--- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
+++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
@@ -126,155 +126,6 @@ static SmallVector<int64_t, 4> getI64SubArray(ArrayAttr arrayAttr,
 
 namespace {
 
-class VectorBroadcastOpConversion : public ConvertToLLVMPattern {
-public:
-  explicit VectorBroadcastOpConversion(MLIRContext *context,
-                                       LLVMTypeConverter &typeConverter)
-      : ConvertToLLVMPattern(vector::BroadcastOp::getOperationName(), context,
-                             typeConverter) {}
-
-  LogicalResult
-  matchAndRewrite(Operation *op, ArrayRef<Value> operands,
-                  ConversionPatternRewriter &rewriter) const override {
-    auto broadcastOp = cast<vector::BroadcastOp>(op);
-    VectorType dstVectorType = broadcastOp.getVectorType();
-    if (typeConverter.convertType(dstVectorType) == nullptr)
-      return failure();
-    // Rewrite when the full vector type can be lowered (which
-    // implies all 'reduced' types can be lowered too).
-    auto adaptor = vector::BroadcastOpOperandAdaptor(operands);
-    VectorType srcVectorType =
-        broadcastOp.getSourceType().dyn_cast<VectorType>();
-    rewriter.replaceOp(
-        op, expandRanks(adaptor.source(), // source value to be expanded
-                        op->getLoc(),     // location of original broadcast
-                        srcVectorType, dstVectorType, rewriter));
-    return success();
-  }
-
-private:
-  // Expands the given source value over all the ranks, as defined
-  // by the source and destination type (a null source type denotes
-  // expansion from a scalar value into a vector).
-  //
-  // TODO(ajcbik): consider replacing this one-pattern lowering
-  //               with a two-pattern lowering using other vector
-  //               ops once all insert/extract/shuffle operations
-  //               are available with lowering implementation.
-  //
-  Value expandRanks(Value value, Location loc, VectorType srcVectorType,
-                    VectorType dstVectorType,
-                    ConversionPatternRewriter &rewriter) const {
-    assert((dstVectorType != nullptr) && "invalid result type in broadcast");
-    // Determine rank of source and destination.
-    int64_t srcRank = srcVectorType ? srcVectorType.getRank() : 0;
-    int64_t dstRank = dstVectorType.getRank();
-    int64_t curDim = dstVectorType.getDimSize(0);
-    if (srcRank < dstRank)
-      // Duplicate this rank.
-      return duplicateOneRank(value, loc, srcVectorType, dstVectorType, dstRank,
-                              curDim, rewriter);
-    // If all trailing dimensions are the same, the broadcast consists of
-    // simply passing through the source value and we are done. Otherwise,
-    // any non-matching dimension forces a stretch along this rank.
-    assert((srcVectorType != nullptr) && (srcRank > 0) &&
-           (srcRank == dstRank) && "invalid rank in broadcast");
-    for (int64_t r = 0; r < dstRank; r++) {
-      if (srcVectorType.getDimSize(r) != dstVectorType.getDimSize(r)) {
-        return stretchOneRank(value, loc, srcVectorType, dstVectorType, dstRank,
-                              curDim, rewriter);
-      }
-    }
-    return value;
-  }
-
-  // Picks the best way to duplicate a single rank. For the 1-D case, a
-  // single insert-elt/shuffle is the most efficient expansion. For higher
-  // dimensions, however, we need dim x insert-values on a new broadcast
-  // with one less leading dimension, which will be lowered "recursively"
-  // to matching LLVM IR.
-  // For example:
-  //   v = broadcast s : f32 to vector<4x2xf32>
-  // becomes:
-  //   x = broadcast s : f32 to vector<2xf32>
-  //   v = [x,x,x,x]
-  // becomes:
-  //   x = [s,s]
-  //   v = [x,x,x,x]
-  Value duplicateOneRank(Value value, Location loc, VectorType srcVectorType,
-                         VectorType dstVectorType, int64_t rank, int64_t dim,
-                         ConversionPatternRewriter &rewriter) const {
-    Type llvmType = typeConverter.convertType(dstVectorType);
-    assert((llvmType != nullptr) && "unlowerable vector type");
-    if (rank == 1) {
-      Value undef = rewriter.create<LLVM::UndefOp>(loc, llvmType);
-      Value expand = insertOne(rewriter, typeConverter, loc, undef, value,
-                               llvmType, rank, 0);
-      SmallVector<int32_t, 4> zeroValues(dim, 0);
-      return rewriter.create<LLVM::ShuffleVectorOp>(
-          loc, expand, undef, rewriter.getI32ArrayAttr(zeroValues));
-    }
-    Value expand = expandRanks(value, loc, srcVectorType,
-                               reducedVectorTypeFront(dstVectorType), rewriter);
-    Value result = rewriter.create<LLVM::UndefOp>(loc, llvmType);
-    for (int64_t d = 0; d < dim; ++d) {
-      result = insertOne(rewriter, typeConverter, loc, result, expand, llvmType,
-                         rank, d);
-    }
-    return result;
-  }
-
-  // Picks the best way to stretch a single rank. For the 1-D case, a
-  // single insert-elt/shuffle is the most efficient expansion when at
-  // a stretch. Otherwise, every dimension needs to be expanded
-  // individually and individually inserted in the resulting vector.
-  // For example:
-  //   v = broadcast w : vector<4x1x2xf32> to vector<4x2x2xf32>
-  // becomes:
-  //   a = broadcast w[0] : vector<1x2xf32> to vector<2x2xf32>
-  //   b = broadcast w[1] : vector<1x2xf32> to vector<2x2xf32>
-  //   c = broadcast w[2] : vector<1x2xf32> to vector<2x2xf32>
-  //   d = broadcast w[3] : vector<1x2xf32> to vector<2x2xf32>
-  //   v = [a,b,c,d]
-  // becomes:
-  //   x = broadcast w[0][0] : vector<2xf32> to vector <2x2xf32>
-  //   y = broadcast w[1][0] : vector<2xf32> to vector <2x2xf32>
-  //   a = [x, y]
-  //   etc.
-  Value stretchOneRank(Value value, Location loc, VectorType srcVectorType,
-                       VectorType dstVectorType, int64_t rank, int64_t dim,
-                       ConversionPatternRewriter &rewriter) const {
-    Type llvmType = typeConverter.convertType(dstVectorType);
-    assert((llvmType != nullptr) && "unlowerable vector type");
-    Value result = rewriter.create<LLVM::UndefOp>(loc, llvmType);
-    bool atStretch = dim != srcVectorType.getDimSize(0);
-    if (rank == 1) {
-      assert(atStretch);
-      Type redLlvmType =
-          typeConverter.convertType(dstVectorType.getElementType());
-      Value one =
-          extractOne(rewriter, typeConverter, loc, value, redLlvmType, rank, 0);
-      Value expand = insertOne(rewriter, typeConverter, loc, result, one,
-                               llvmType, rank, 0);
-      SmallVector<int32_t, 4> zeroValues(dim, 0);
-      return rewriter.create<LLVM::ShuffleVectorOp>(
-          loc, expand, result, rewriter.getI32ArrayAttr(zeroValues));
-    }
-    VectorType redSrcType = reducedVectorTypeFront(srcVectorType);
-    VectorType redDstType = reducedVectorTypeFront(dstVectorType);
-    Type redLlvmType = typeConverter.convertType(redSrcType);
-    for (int64_t d = 0; d < dim; ++d) {
-      int64_t pos = atStretch ? 0 : d;
-      Value one = extractOne(rewriter, typeConverter, loc, value, redLlvmType,
-                             rank, pos);
-      Value expand = expandRanks(one, loc, redSrcType, redDstType, rewriter);
-      result = insertOne(rewriter, typeConverter, loc, result, expand, llvmType,
-                         rank, d);
-    }
-    return result;
-  }
-};
-
 /// Conversion pattern for a vector.matrix_multiply.
 /// This is lowered directly to the proper llvm.intr.matrix.multiply.
 class VectorMatmulOpConversion : public ConvertToLLVMPattern {
@@ -1209,8 +1060,7 @@ void mlir::populateVectorToLLVMConversionPatterns(
                   VectorInsertStridedSliceOpSameRankRewritePattern,
                   VectorStridedSliceOpConversion>(ctx);
   patterns
-      .insert<VectorBroadcastOpConversion,
-              VectorReductionOpConversion,
+      .insert<VectorReductionOpConversion,
               VectorShuffleOpConversion,
               VectorExtractElementOpConversion,
               VectorExtractOpConversion,
diff --git a/mlir/lib/Dialect/Vector/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/VectorTransforms.cpp
index e888c5cdfd2f..c0d6ce931d10 100644
--- a/mlir/lib/Dialect/Vector/VectorTransforms.cpp
+++ b/mlir/lib/Dialect/Vector/VectorTransforms.cpp
@@ -979,7 +979,114 @@ class InsertSlicesOpLowering : public OpRewritePattern<vector::InsertSlicesOp> {
   }
 };
 
-/// Progressive lowering of OuterProductOp.
+/// Progressive lowering of BroadcastOp.
+class BroadcastOpLowering : public OpRewritePattern<vector::BroadcastOp> {
+public:
+  using OpRewritePattern<vector::BroadcastOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(vector::BroadcastOp op,
+                                PatternRewriter &rewriter) const override {
+    auto loc = op.getLoc();
+    VectorType dstType = op.getVectorType();
+    VectorType srcType = op.getSourceType().dyn_cast<VectorType>();
+    Type eltType = dstType.getElementType();
+
+    // Determine rank of source and destination.
+    int64_t srcRank = srcType ? srcType.getRank() : 0;
+    int64_t dstRank = dstType.getRank();
+
+    // Duplicate this rank.
+    // For example:
+    //   %x = broadcast %y  : k-D to n-D, k < n
+    // becomes:
+    //   %b = broadcast %y  : k-D to (n-1)-D
+    //   %x = [%b,%b,%b,%b] : n-D
+    // becomes:
+    //   %b = [%y,%y]       : (n-1)-D
+    //   %x = [%b,%b,%b,%b] : n-D
+    if (srcRank < dstRank) {
+      // Scalar to any vector can use splat.
+      if (srcRank == 0) {
+        rewriter.replaceOpWithNewOp<SplatOp>(op, dstType, op.source());
+        return success();
+      }
+      // Duplication.
+      VectorType resType =
+          VectorType::get(dstType.getShape().drop_front(), eltType);
+      Value bcst =
+          rewriter.create<vector::BroadcastOp>(loc, resType, op.source());
+      Value zero = rewriter.create<ConstantOp>(loc, eltType,
+                                               rewriter.getZeroAttr(eltType));
+      Value result = rewriter.create<SplatOp>(loc, dstType, zero);
+      for (int64_t d = 0, dim = dstType.getDimSize(0); d < dim; ++d)
+        result = rewriter.create<vector::InsertOp>(loc, bcst, result, d);
+      rewriter.replaceOp(op, result);
+      return success();
+    }
+
+    // Find non-matching dimension, if any.
+    assert(srcRank == dstRank);
+    int64_t m = -1;
+    for (int64_t r = 0; r < dstRank; r++)
+      if (srcType.getDimSize(r) != dstType.getDimSize(r)) {
+        m = r;
+        break;
+      }
+
+    // All trailing dimensions are the same. Simply pass through.
+    if (m == -1) {
+      rewriter.replaceOp(op, op.source());
+      return success();
+    }
+
+    // Stretching scalar inside vector (e.g. vector<1xf32>) can use splat.
+    if (srcRank == 1) {
+      assert(m == 0);
+      Value ext = rewriter.create<vector::ExtractOp>(loc, op.source(), 0);
+      rewriter.replaceOpWithNewOp<SplatOp>(op, dstType, ext);
+      return success();
+    }
+
+    // Any non-matching dimension forces a stretch along this rank.
+    // For example:
+    //   %x = broadcast %y : vector<4x1x2xf32> to vector<4x2x2xf32>
+    // becomes:
+    //   %a = broadcast %y[0] : vector<1x2xf32> to vector<2x2xf32>
+    //   %b = broadcast %y[1] : vector<1x2xf32> to vector<2x2xf32>
+    //   %c = broadcast %y[2] : vector<1x2xf32> to vector<2x2xf32>
+    //   %d = broadcast %y[3] : vector<1x2xf32> to vector<2x2xf32>
+    //   %x = [%a,%b,%c,%d]
+    // becomes:
+    //   %u = broadcast %y[0][0] : vector<2xf32> to vector <2x2xf32>
+    //   %v = broadcast %y[1][0] : vector<2xf32> to vector <2x2xf32>
+    //   %a = [%u, %v]
+    //   ..
+    //   %x = [%a,%b,%c,%d]
+    VectorType resType =
+        VectorType::get(dstType.getShape().drop_front(), eltType);
+    Value zero = rewriter.create<ConstantOp>(loc, eltType,
+                                             rewriter.getZeroAttr(eltType));
+    Value result = rewriter.create<SplatOp>(loc, dstType, zero);
+    if (m == 0) {
+      // Stetch at start.
+      Value ext = rewriter.create<vector::ExtractOp>(loc, op.source(), 0);
+      Value bcst = rewriter.create<vector::BroadcastOp>(loc, resType, ext);
+      for (int64_t d = 0, dim = dstType.getDimSize(0); d < dim; ++d)
+        result = rewriter.create<vector::InsertOp>(loc, bcst, result, d);
+    } else {
+      // Stetch not at start.
+      for (int64_t d = 0, dim = dstType.getDimSize(0); d < dim; ++d) {
+        Value ext = rewriter.create<vector::ExtractOp>(loc, op.source(), d);
+        Value bcst = rewriter.create<vector::BroadcastOp>(loc, resType, ext);
+        result = rewriter.create<vector::InsertOp>(loc, bcst, result, d);
+      }
+    }
+    rewriter.replaceOp(op, result);
+    return success();
+  }
+};
+
+/// Progressive lowering of TransposeOp.
 /// One:
 ///   %x = vector.transpose %y, [1, 0]
 /// is replaced by:
@@ -1518,7 +1625,7 @@ void mlir::vector::populateVectorContractLoweringPatterns(
     OwningRewritePatternList &patterns, MLIRContext *context,
     VectorTransformsOptions parameters) {
   patterns.insert<ShapeCastOp2DDownCastRewritePattern,
-                  ShapeCastOp2DUpCastRewritePattern, TransposeOpLowering,
-                  OuterProductOpLowering>(context);
+                  ShapeCastOp2DUpCastRewritePattern, BroadcastOpLowering,
+                  TransposeOpLowering, OuterProductOpLowering>(context);
   patterns.insert<ContractionOpLowering>(parameters, context);
 }
diff --git a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
index 6a65b219b632..96d4343b1a4b 100644
--- a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
+++ b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
@@ -4,201 +4,199 @@ func @broadcast_vec1d_from_scalar(%arg0: f32) -> vector<2xf32> {
   %0 = vector.broadcast %arg0 : f32 to vector<2xf32>
   return %0 : vector<2xf32>
 }
-// CHECK-LABEL: llvm.func @broadcast_vec1d_from_scalar
-//       CHECK:   llvm.mlir.undef : !llvm<"<2 x float>">
-//       CHECK:   llvm.mlir.constant(0 : index) : !llvm.i64
-//       CHECK:   llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<2 x float>">
-//       CHECK:   llvm.shufflevector {{.*}}, {{.*}}[0 : i32, 0 : i32] : !llvm<"<2 x float>">, !llvm<"<2 x float>">
-//       CHECK:   llvm.return {{.*}} : !llvm<"<2 x float>">
+// CHECK-LABEL: llvm.func @broadcast_vec1d_from_scalar(
+// CHECK-SAME:  %[[A:.*]]: !llvm.float)
+// CHECK:       %[[T0:.*]] = llvm.mlir.undef : !llvm<"<2 x float>">
+// CHECK:       %[[T1:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32
+// CHECK:       %[[T2:.*]] = llvm.insertelement %[[A]], %[[T0]][%[[T1]] : !llvm.i32] : !llvm<"<2 x float>">
+// CHECK:       %[[T3:.*]] = llvm.shufflevector %[[T2]], %[[T0]] [0 : i32, 0 : i32] : !llvm<"<2 x float>">, !llvm<"<2 x float>">
+// CHECK:       llvm.return %[[T3]] : !llvm<"<2 x float>">
 
 func @broadcast_vec2d_from_scalar(%arg0: f32) -> vector<2x3xf32> {
   %0 = vector.broadcast %arg0 : f32 to vector<2x3xf32>
   return %0 : vector<2x3xf32>
 }
-// CHECK-LABEL: llvm.func @broadcast_vec2d_from_scalar
-//       CHECK:   llvm.mlir.undef : !llvm<"<3 x float>">
-//       CHECK:   llvm.mlir.constant(0 : index) : !llvm.i64
-//       CHECK:   llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<3 x float>">
-//       CHECK:   llvm.shufflevector {{.*}}, {{.*}}[0 : i32, 0 : i32, 0 : i32] : !llvm<"<3 x float>">, !llvm<"<3 x float>">
-//       CHECK:   llvm.mlir.undef : !llvm<"[2 x <3 x float>]">
-//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"[2 x <3 x float>]">
-//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"[2 x <3 x float>]">
-//       CHECK:   llvm.return {{.*}} : !llvm<"[2 x <3 x float>]">
+// CHECK-LABEL: llvm.func @broadcast_vec2d_from_scalar(
+// CHECK-SAME:  %[[A:.*]]: !llvm.float)
+// CHECK:       %[[T0:.*]] = llvm.mlir.undef : !llvm<"[2 x <3 x float>]">
+// CHECK:       %[[T1:.*]] = llvm.mlir.undef : !llvm<"<3 x float>">
+// CHECK:       %[[T2:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32
+// CHECK:       %[[T3:.*]] = llvm.insertelement %[[A]], %[[T1]][%[[T2]] : !llvm.i32] : !llvm<"<3 x float>">
+// CHECK:       %[[T4:.*]] = llvm.shufflevector %[[T3]], %[[T3]] [0 : i32, 0 : i32, 0 : i32] : !llvm<"<3 x float>">, !llvm<"<3 x float>">
+// CHECK:       %[[T5:.*]] = llvm.insertvalue %[[T4]], %[[T0]][0] : !llvm<"[2 x <3 x float>]">
+// CHECK:       %[[T6:.*]] = llvm.insertvalue %[[T4]], %[[T5]][1] : !llvm<"[2 x <3 x float>]">
+// CHECK:       llvm.return %[[T6]] : !llvm<"[2 x <3 x float>]">
 
 func @broadcast_vec3d_from_scalar(%arg0: f32) -> vector<2x3x4xf32> {
   %0 = vector.broadcast %arg0 : f32 to vector<2x3x4xf32>
   return %0 : vector<2x3x4xf32>
 }
-// CHECK-LABEL: llvm.func @broadcast_vec3d_from_scalar
-//       CHECK:   llvm.mlir.undef : !llvm<"<4 x float>">
-//       CHECK:   llvm.mlir.constant(0 : index) : !llvm.i64
-//       CHECK:   llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<4 x float>">
-//       CHECK:   llvm.shufflevector {{.*}}, {{.*}} [0 : i32, 0 : i32, 0 : i32, 0 : i32] : !llvm<"<4 x float>">, !llvm<"<4 x float>">
-//       CHECK:   llvm.mlir.undef : !llvm<"[3 x <4 x float>]">
-//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"[3 x <4 x float>]">
-//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"[3 x <4 x float>]">
-//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm<"[3 x <4 x float>]">
-//       CHECK:   llvm.mlir.undef : !llvm<"[2 x [3 x <4 x float>]]">
-//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"[2 x [3 x <4 x float>]]">
-//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"[2 x [3 x <4 x float>]]">
-//       CHECK:   llvm.return {{.*}} : !llvm<"[2 x [3 x <4 x float>]]">
+// CHECK-LABEL: llvm.func @broadcast_vec3d_from_scalar(
+// CHECK-SAME:  %[[A:.*]]: !llvm.float)
+// CHECK:       %[[T0:.*]] = llvm.mlir.undef : !llvm<"[2 x [3 x <4 x float>]]">
+// CHECK:       %[[T1:.*]] = llvm.mlir.undef : !llvm<"<4 x float>">
+// CHECK:       %[[T2:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32
+// CHECK:       %[[T3:.*]] = llvm.insertelement %[[A]], %[[T1]][%[[T2]] : !llvm.i32] : !llvm<"<4 x float>">
+// CHECK:       %[[T4:.*]] = llvm.shufflevector %[[T3]], %[[T3]] [0 : i32, 0 : i32, 0 : i32, 0 : i32] : !llvm<"<4 x float>">, !llvm<"<4 x float>">
+// CHECK:       %[[T5:.*]] = llvm.insertvalue %[[T4]], %[[T0]][0, 0] : !llvm<"[2 x [3 x <4 x float>]]">
+// CHECK:       %[[T6:.*]] = llvm.insertvalue %[[T4]], %[[T5]][0, 1] : !llvm<"[2 x [3 x <4 x float>]]">
+// CHECK:       %[[T7:.*]] = llvm.insertvalue %[[T4]], %[[T6]][0, 2] : !llvm<"[2 x [3 x <4 x float>]]">
+// CHECK:       %[[T8:.*]] = llvm.insertvalue %[[T4]], %[[T7]][1, 0] : !llvm<"[2 x [3 x <4 x float>]]">
+// CHECK:       %[[T9:.*]] = llvm.insertvalue %[[T4]], %[[T8]][1, 1] : !llvm<"[2 x [3 x <4 x float>]]">
+// CHECK:       %[[T10:.*]] = llvm.insertvalue %[[T4]], %[[T9]][1, 2] : !llvm<"[2 x [3 x <4 x float>]]">
+// CHECK:       llvm.return %[[T10]] : !llvm<"[2 x [3 x <4 x float>]]">
 
 func @broadcast_vec1d_from_vec1d(%arg0: vector<2xf32>) -> vector<2xf32> {
   %0 = vector.broadcast %arg0 : vector<2xf32> to vector<2xf32>
   return %0 : vector<2xf32>
 }
-// CHECK-LABEL: llvm.func @broadcast_vec1d_from_vec1d
-//       CHECK:   llvm.return {{.*}} : !llvm<"<2 x float>">
+// CHECK-LABEL: llvm.func @broadcast_vec1d_from_vec1d(
+// CHECK-SAME:  %[[A:.*]]: !llvm<"<2 x float>">)
+// CHECK:       llvm.return %[[A]] : !llvm<"<2 x float>">
 
 func @broadcast_vec2d_from_vec1d(%arg0: vector<2xf32>) -> vector<3x2xf32> {
   %0 = vector.broadcast %arg0 : vector<2xf32> to vector<3x2xf32>
   return %0 : vector<3x2xf32>
 }
-// CHECK-LABEL: llvm.func @broadcast_vec2d_from_vec1d
-//       CHECK:   llvm.mlir.undef : !llvm<"[3 x <2 x float>]">
-//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"[3 x <2 x float>]">
-//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"[3 x <2 x float>]">
-//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm<"[3 x <2 x float>]">
-//       CHECK:   llvm.return {{.*}} : !llvm<"[3 x <2 x float>]">
+// CHECK-LABEL: llvm.func @broadcast_vec2d_from_vec1d(
+// CHECK-SAME:  %[[A:.*]]: !llvm<"<2 x float>">)
+// CHECK:       %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<3x2xf32>) : !llvm<"[3 x <2 x float>]">
+// CHECK:       %[[T1:.*]] = llvm.insertvalue %[[A]], %[[T0]][0] : !llvm<"[3 x <2 x float>]">
+// CHECK:       %[[T2:.*]] = llvm.insertvalue %[[A]], %[[T1]][1] : !llvm<"[3 x <2 x float>]">
+// CHECK:       %[[T3:.*]] = llvm.insertvalue %[[A]], %[[T2]][2] : !llvm<"[3 x <2 x float>]">
+// CHECK:       llvm.return %[[T3]] : !llvm<"[3 x <2 x float>]">
 
 func @broadcast_vec3d_from_vec1d(%arg0: vector<2xf32>) -> vector<4x3x2xf32> {
   %0 = vector.broadcast %arg0 : vector<2xf32> to vector<4x3x2xf32>
   return %0 : vector<4x3x2xf32>
 }
-// CHECK-LABEL: llvm.func @broadcast_vec3d_from_vec1d
-//       CHECK:   llvm.mlir.undef : !llvm<"[3 x <2 x float>]">
-//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"[3 x <2 x float>]">
-//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"[3 x <2 x float>]">
-//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm<"[3 x <2 x float>]">
-//       CHECK:   llvm.mlir.undef : !llvm<"[4 x [3 x <2 x float>]]">
-//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"[4 x [3 x <2 x float>]]">
-//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"[4 x [3 x <2 x float>]]">
-//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm<"[4 x [3 x <2 x float>]]">
-//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[3] : !llvm<"[4 x [3 x <2 x float>]]">
-//       CHECK:   llvm.return {{.*}} : !llvm<"[4 x [3 x <2 x float>]]">
+// CHECK-LABEL: llvm.func @broadcast_vec3d_from_vec1d(
+// CHECK-SAME:  %[[A:.*]]: !llvm<"<2 x float>">)
+// CHECK:       %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<3x2xf32>) : !llvm<"[3 x <2 x float>]">
+// CHECK:       %[[T1:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<4x3x2xf32>) : !llvm<"[4 x [3 x <2 x float>]]">
+// CHECK:       %[[T2:.*]] = llvm.insertvalue %[[A]], %[[T0]][0] : !llvm<"[3 x <2 x float>]">
+// CHECK:       %[[T3:.*]] = llvm.insertvalue %[[A]], %[[T2]][1] : !llvm<"[3 x <2 x float>]">
+// CHECK:       %[[T4:.*]] = llvm.insertvalue %[[A]], %[[T3]][2] : !llvm<"[3 x <2 x float>]">
+// CHECK:       %[[T5:.*]] = llvm.insertvalue %[[T4]], %[[T1]][0] : !llvm<"[4 x [3 x <2 x float>]]">
+// CHECK:       %[[T6:.*]] = llvm.insertvalue %[[T4]], %[[T5]][1] : !llvm<"[4 x [3 x <2 x float>]]">
+// CHECK:       %[[T7:.*]] = llvm.insertvalue %[[T4]], %[[T6]][2] : !llvm<"[4 x [3 x <2 x float>]]">
+// CHECK:       %[[T8:.*]] = llvm.insertvalue %[[T4]], %[[T7]][3] : !llvm<"[4 x [3 x <2 x float>]]">
+// CHECK:       llvm.return %[[T8]] : !llvm<"[4 x [3 x <2 x float>]]">
 
 func @broadcast_vec3d_from_vec2d(%arg0: vector<3x2xf32>) -> vector<4x3x2xf32> {
   %0 = vector.broadcast %arg0 : vector<3x2xf32> to vector<4x3x2xf32>
   return %0 : vector<4x3x2xf32>
 }
-// CHECK-LABEL: llvm.func @broadcast_vec3d_from_vec2d
-//       CHECK:   llvm.mlir.undef : !llvm<"[4 x [3 x <2 x float>]]">
-//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"[4 x [3 x <2 x float>]]">
-//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"[4 x [3 x <2 x float>]]">
-//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm<"[4 x [3 x <2 x float>]]">
-//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[3] : !llvm<"[4 x [3 x <2 x float>]]">
-//       CHECK:   llvm.return {{.*}} : !llvm<"[4 x [3 x <2 x float>]]">
+// CHECK-LABEL: llvm.func @broadcast_vec3d_from_vec2d(
+// CHECK-SAME:  %[[A:.*]]: !llvm<"[3 x <2 x float>]">)
+// CHECK:       %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<4x3x2xf32>) : !llvm<"[4 x [3 x <2 x float>]]">
+// CHECK:       %[[T1:.*]] = llvm.insertvalue %[[A]], %[[T0]][0] : !llvm<"[4 x [3 x <2 x float>]]">
+// CHECK:       %[[T2:.*]] = llvm.insertvalue %[[A]], %[[T1]][1] : !llvm<"[4 x [3 x <2 x float>]]">
+// CHECK:       %[[T3:.*]] = llvm.insertvalue %[[A]], %[[T2]][2] : !llvm<"[4 x [3 x <2 x float>]]">
+// CHECK:       %[[T4:.*]] = llvm.insertvalue %[[A]], %[[T3]][3] : !llvm<"[4 x [3 x <2 x float>]]">
+// CHECK:       llvm.return %[[T4]] : !llvm<"[4 x [3 x <2 x float>]]">
 
 func @broadcast_stretch(%arg0: vector<1xf32>) -> vector<4xf32> {
   %0 = vector.broadcast %arg0 : vector<1xf32> to vector<4xf32>
   return %0 : vector<4xf32>
 }
-// CHECK-LABEL: llvm.func @broadcast_stretch
-//       CHECK:   llvm.mlir.undef : !llvm<"<4 x float>">
-//       CHECK:   llvm.mlir.constant(0 : index) : !llvm.i64
-//       CHECK:   llvm.extractelement {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<1 x float>">
-//       CHECK:   llvm.mlir.constant(0 : index) : !llvm.i64
-//       CHECK:   llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<4 x float>">
-//       CHECK:   llvm.shufflevector {{.*}}, {{.*}} [0 : i32, 0 : i32, 0 : i32, 0 : i32] : !llvm<"<4 x float>">, !llvm<"<4 x float>">
-//       CHECK:   llvm.return {{.*}} : !llvm<"<4 x float>">
+// CHECK-LABEL: llvm.func @broadcast_stretch(
+// CHECK-SAME:  %[[A:.*]]: !llvm<"<1 x float>">)
+// CHECK:       %[[T0:.*]] = llvm.mlir.constant(0 : i64) : !llvm.i64
+// CHECK:       %[[T1:.*]] = llvm.extractelement %[[A]][%[[T0]] : !llvm.i64] : !llvm<"<1 x float>">
+// CHECK:       %[[T2:.*]] = llvm.mlir.undef : !llvm<"<4 x float>">
+// CHECK:       %[[T3:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32
+// CHECK:       %[[T4:.*]] = llvm.insertelement %[[T1]], %[[T2]][%3 : !llvm.i32] : !llvm<"<4 x float>">
+// CHECK:       %[[T5:.*]] = llvm.shufflevector %[[T4]], %[[T2]] [0 : i32, 0 : i32, 0 : i32, 0 : i32] : !llvm<"<4 x float>">, !llvm<"<4 x float>">
+// CHECK:       llvm.return %[[T5]] : !llvm<"<4 x float>">
 
 func @broadcast_stretch_at_start(%arg0: vector<1x4xf32>) -> vector<3x4xf32> {
   %0 = vector.broadcast %arg0 : vector<1x4xf32> to vector<3x4xf32>
   return %0 : vector<3x4xf32>
 }
-// CHECK-LABEL: llvm.func @broadcast_stretch_at_start
-//       CHECK:   llvm.mlir.undef : !llvm<"[3 x <4 x float>]">
-//       CHECK:   llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <4 x float>]">
-//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"[3 x <4 x float>]">
-//       CHECK:   llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <4 x float>]">
-//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"[3 x <4 x float>]">
-//       CHECK:   llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <4 x float>]">
-//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm<"[3 x <4 x float>]">
-//       CHECK:   llvm.return {{.*}} : !llvm<"[3 x <4 x float>]">
+// CHECK-LABEL: llvm.func @broadcast_stretch_at_start(
+// CHECK-SAME:  %[[A:.*]]: !llvm<"[1 x <4 x float>]">)
+// CHECK:       %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<3x4xf32>) : !llvm<"[3 x <4 x float>]">
+// CHECK:       %[[T1:.*]] = llvm.extractvalue %[[A]][0] : !llvm<"[1 x <4 x float>]">
+// CHECK:       %[[T2:.*]] = llvm.insertvalue %[[T1]], %[[T0]][0] : !llvm<"[3 x <4 x float>]">
+// CHECK:       %[[T3:.*]] = llvm.insertvalue %[[T1]], %[[T2]][1] : !llvm<"[3 x <4 x float>]">
+// CHECK:       %[[T4:.*]] = llvm.insertvalue %[[T1]], %[[T3]][2] : !llvm<"[3 x <4 x float>]">
+// CHECK:       llvm.return %[[T4]] : !llvm<"[3 x <4 x float>]">
 
 func @broadcast_stretch_at_end(%arg0: vector<4x1xf32>) -> vector<4x3xf32> {
   %0 = vector.broadcast %arg0 : vector<4x1xf32> to vector<4x3xf32>
   return %0 : vector<4x3xf32>
 }
-// CHECK-LABEL: llvm.func @broadcast_stretch_at_end
-//       CHECK:   llvm.mlir.undef : !llvm<"[4 x <3 x float>]">
-//       CHECK:   llvm.extractvalue {{.*}}[0] : !llvm<"[4 x <1 x float>]">
-//       CHECK:   llvm.mlir.undef : !llvm<"<3 x float>">
-//       CHECK:   llvm.mlir.constant(0 : index) : !llvm.i64
-//       CHECK:   llvm.extractelement {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<1 x float>">
-//       CHECK:   llvm.mlir.constant(0 : index) : !llvm.i64
-//       CHECK:   llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<3 x float>">
-//       CHECK:   llvm.shufflevector {{.*}}, {{.*}} [0 : i32, 0 : i32, 0 : i32] : !llvm<"<3 x float>">, !llvm<"<3 x float>">
-//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"[4 x <3 x float>]">
-//       CHECK:   llvm.extractvalue {{.*}}[1] : !llvm<"[4 x <1 x float>]">
-//       CHECK:   llvm.mlir.undef : !llvm<"<3 x float>">
-//       CHECK:   llvm.mlir.constant(0 : index) : !llvm.i64
-//       CHECK:   llvm.extractelement {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<1 x float>">
-//       CHECK:   llvm.mlir.constant(0 : index) : !llvm.i64
-//       CHECK:   llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<3 x float>">
-//       CHECK:   llvm.shufflevector {{.*}}, {{.*}} [0 : i32, 0 : i32, 0 : i32] : !llvm<"<3 x float>">, !llvm<"<3 x float>">
-//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"[4 x <3 x float>]">
-//       CHECK:   llvm.extractvalue {{.*}}[2] : !llvm<"[4 x <1 x float>]">
-//       CHECK:   llvm.mlir.undef : !llvm<"<3 x float>">
-//       CHECK:   llvm.mlir.constant(0 : index) : !llvm.i64
-//       CHECK:   llvm.extractelement {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<1 x float>">
-//       CHECK:   llvm.mlir.constant(0 : index) : !llvm.i64
-//       CHECK:   llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<3 x float>">
-//       CHECK:   llvm.shufflevector {{.*}}, {{.*}} [0 : i32, 0 : i32, 0 : i32] : !llvm<"<3 x float>">, !llvm<"<3 x float>">
-//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm<"[4 x <3 x float>]">
-//       CHECK:   llvm.extractvalue {{.*}}[3] : !llvm<"[4 x <1 x float>]">
-//       CHECK:   llvm.mlir.undef : !llvm<"<3 x float>">
-//       CHECK:   llvm.mlir.constant(0 : index) : !llvm.i64
-//       CHECK:   llvm.extractelement {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<1 x float>">
-//       CHECK:   llvm.mlir.constant(0 : index) : !llvm.i64
-//       CHECK:   llvm.insertelement {{.*}}, {{.*}}[{{.*}} : !llvm.i64] : !llvm<"<3 x float>">
-//       CHECK:   llvm.shufflevector {{.*}}, {{.*}} [0 : i32, 0 : i32, 0 : i32] : !llvm<"<3 x float>">, !llvm<"<3 x float>">
-//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[3] : !llvm<"[4 x <3 x float>]">
-//       CHECK:   llvm.return {{.*}} : !llvm<"[4 x <3 x float>]">
+// CHECK-LABEL: llvm.func @broadcast_stretch_at_end(
+// CHECK-SAME:  %[[A:.*]]: !llvm<"[4 x <1 x float>]">)
+// CHECK:       %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<4x3xf32>) : !llvm<"[4 x <3 x float>]">
+// CHECK:       %[[T1:.*]] = llvm.extractvalue %[[A]][0] : !llvm<"[4 x <1 x float>]">
+// CHECK:       %[[T2:.*]] = llvm.mlir.constant(0 : i64) : !llvm.i64
+// CHECK:       %[[T3:.*]] = llvm.extractelement %[[T1]][%[[T2]] : !llvm.i64] : !llvm<"<1 x float>">
+// CHECK:       %[[T4:.*]] = llvm.mlir.undef : !llvm<"<3 x float>">
+// CHECK:       %[[T5:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32
+// CHECK:       %[[T6:.*]] = llvm.insertelement %[[T3]], %[[T4]][%[[T5]] : !llvm.i32] : !llvm<"<3 x float>">
+// CHECK:       %[[T7:.*]] = llvm.shufflevector %[[T6]], %[[T4]] [0 : i32, 0 : i32, 0 : i32] : !llvm<"<3 x float>">, !llvm<"<3 x float>">
+// CHECK:       %[[T8:.*]] = llvm.insertvalue %[[T7]], %[[T0]][0] : !llvm<"[4 x <3 x float>]">
+// CHECK:       %[[T9:.*]] = llvm.extractvalue %[[A]][1] : !llvm<"[4 x <1 x float>]">
+// CHECK:       %[[T10:.*]] = llvm.mlir.constant(0 : i64) : !llvm.i64
+// CHECK:       %[[T11:.*]] = llvm.extractelement %[[T9]][%[[T10]] : !llvm.i64] : !llvm<"<1 x float>">
+// CHECK:       %[[T12:.*]] = llvm.mlir.undef : !llvm<"<3 x float>">
+// CHECK:       %[[T13:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32
+// CHECK:       %[[T14:.*]] = llvm.insertelement %[[T11]], %[[T12]][%[[T13]] : !llvm.i32] : !llvm<"<3 x float>">
+// CHECK:       %[[T15:.*]] = llvm.shufflevector %[[T14]], %[[T12]] [0 : i32, 0 : i32, 0 : i32] : !llvm<"<3 x float>">, !llvm<"<3 x float>">
+// CHECK:       %[[T16:.*]] = llvm.insertvalue %[[T15]], %[[T8]][1] : !llvm<"[4 x <3 x float>]">
+// CHECK:       %[[T17:.*]] = llvm.extractvalue %[[A]][2] : !llvm<"[4 x <1 x float>]">
+// CHECK:       %[[T18:.*]] = llvm.mlir.constant(0 : i64) : !llvm.i64
+// CHECK:       %[[T19:.*]] = llvm.extractelement %[[T17]][%[[T18]] : !llvm.i64] : !llvm<"<1 x float>">
+// CHECK:       %[[T20:.*]] = llvm.mlir.undef : !llvm<"<3 x float>">
+// CHECK:       %[[T21:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32
+// CHECK:       %[[T22:.*]] = llvm.insertelement %[[T19]], %[[T20]][%[[T21]] : !llvm.i32] : !llvm<"<3 x float>">
+// CHECK:       %[[T23:.*]] = llvm.shufflevector %[[T22]], %[[T20]] [0 : i32, 0 : i32, 0 : i32] : !llvm<"<3 x float>">, !llvm<"<3 x float>">
+// CHECK:       %[[T24:.*]] = llvm.insertvalue %[[T23]], %[[T16]][2] : !llvm<"[4 x <3 x float>]">
+// CHECK:       %[[T25:.*]] = llvm.extractvalue %[[A]][3] : !llvm<"[4 x <1 x float>]">
+// CHECK:       %[[T26:.*]] = llvm.mlir.constant(0 : i64) : !llvm.i64
+// CHECK:       %[[T27:.*]] = llvm.extractelement %[[T25]][%[[T26]] : !llvm.i64] : !llvm<"<1 x float>">
+// CHECK:       %[[T28:.*]] = llvm.mlir.undef : !llvm<"<3 x float>">
+// CHECK:       %[[T29:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32
+// CHECK:       %[[T30:.*]] = llvm.insertelement %[[T27]], %[[T28]][%[[T29]] : !llvm.i32] : !llvm<"<3 x float>">
+// CHECK:       %[[T31:.*]] = llvm.shufflevector %[[T30]], %[[T28]] [0 : i32, 0 : i32, 0 : i32] : !llvm<"<3 x float>">, !llvm<"<3 x float>">
+// CHECK:       %[[T32:.*]] = llvm.insertvalue %[[T31]], %[[T24]][3] : !llvm<"[4 x <3 x float>]">
+// CHECK:       llvm.return %[[T32]] : !llvm<"[4 x <3 x float>]">
 
 func @broadcast_stretch_in_middle(%arg0: vector<4x1x2xf32>) -> vector<4x3x2xf32> {
   %0 = vector.broadcast %arg0 : vector<4x1x2xf32> to vector<4x3x2xf32>
   return %0 : vector<4x3x2xf32>
 }
-// CHECK-LABEL: llvm.func @broadcast_stretch_in_middle
-//       CHECK:   llvm.mlir.undef : !llvm<"[4 x [3 x <2 x float>]]">
-//       CHECK:   llvm.extractvalue {{.*}}[0] : !llvm<"[4 x [1 x <2 x float>]]">
-//       CHECK:   llvm.mlir.undef : !llvm<"[3 x <2 x float>]">
-//       CHECK:   llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <2 x float>]">
-//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"[3 x <2 x float>]">
-//       CHECK:   llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <2 x float>]">
-//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"[3 x <2 x float>]">
-//       CHECK:   llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <2 x float>]">
-//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm<"[3 x <2 x float>]">
-//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"[4 x [3 x <2 x float>]]">
-//       CHECK:   llvm.extractvalue {{.*}}[1] : !llvm<"[4 x [1 x <2 x float>]]">
-//       CHECK:   llvm.mlir.undef : !llvm<"[3 x <2 x float>]">
-//       CHECK:   llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <2 x float>]">
-//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"[3 x <2 x float>]">
-//       CHECK:   llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <2 x float>]">
-//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"[3 x <2 x float>]">
-//       CHECK:   llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <2 x float>]">
-//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm<"[3 x <2 x float>]">
-//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"[4 x [3 x <2 x float>]]">
-//       CHECK:   llvm.extractvalue {{.*}}[2] : !llvm<"[4 x [1 x <2 x float>]]">
-//       CHECK:   llvm.mlir.undef : !llvm<"[3 x <2 x float>]">
-//       CHECK:   llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <2 x float>]">
-//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"[3 x <2 x float>]">
-//       CHECK:   llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <2 x float>]">
-//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"[3 x <2 x float>]">
-//       CHECK:   llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <2 x float>]">
-//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm<"[3 x <2 x float>]">
-//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm<"[4 x [3 x <2 x float>]]">
-//       CHECK:   llvm.extractvalue {{.*}}[3] : !llvm<"[4 x [1 x <2 x float>]]">
-//       CHECK:   llvm.mlir.undef : !llvm<"[3 x <2 x float>]">
-//       CHECK:   llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <2 x float>]">
-//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[0] : !llvm<"[3 x <2 x float>]">
-//       CHECK:   llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <2 x float>]">
-//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[1] : !llvm<"[3 x <2 x float>]">
-//       CHECK:   llvm.extractvalue {{.*}}[0] : !llvm<"[1 x <2 x float>]">
-//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[2] : !llvm<"[3 x <2 x float>]">
-//       CHECK:   llvm.insertvalue {{.*}}, {{.*}}[3] : !llvm<"[4 x [3 x <2 x float>]]">
-//       CHECK:   llvm.return {{.*}} : !llvm<"[4 x [3 x <2 x float>]]">
+// CHECK-LABEL: llvm.func @broadcast_stretch_in_middle(
+// CHECK-SAME:  %[[A:.*]]: !llvm<"[4 x [1 x <2 x float>]]">)
+// CHECK:       %[[T0:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<4x3x2xf32>) : !llvm<"[4 x [3 x <2 x float>]]">
+// CHECK:       %[[T1:.*]] = llvm.mlir.constant(dense<0.000000e+00> : vector<3x2xf32>) : !llvm<"[3 x <2 x float>]">
+// CHECK:       %[[T2:.*]] = llvm.extractvalue %[[A]][0] : !llvm<"[4 x [1 x <2 x float>]]">
+// CHECK:       %[[T3:.*]] = llvm.extractvalue %[[T2]][0] : !llvm<"[1 x <2 x float>]">
+// CHECK:       %[[T4:.*]] = llvm.insertvalue %[[T3]], %[[T1]][0] : !llvm<"[3 x <2 x float>]">
+// CHECK:       %[[T5:.*]] = llvm.insertvalue %[[T3]], %[[T4]][1] : !llvm<"[3 x <2 x float>]">
+// CHECK:       %[[T6:.*]] = llvm.insertvalue %[[T3]], %[[T5]][2] : !llvm<"[3 x <2 x float>]">
+// CHECK:       %[[T7:.*]] = llvm.insertvalue %[[T6]], %[[T0]][0] : !llvm<"[4 x [3 x <2 x float>]]">
+// CHECK:       %[[T8:.*]] = llvm.extractvalue %[[A]][1] : !llvm<"[4 x [1 x <2 x float>]]">
+// CHECK:       %[[T9:.*]] = llvm.extractvalue %[[T8]][0] : !llvm<"[1 x <2 x float>]">
+// CHECK:       %[[T10:.*]] = llvm.insertvalue %[[T9]], %[[T1]][0] : !llvm<"[3 x <2 x float>]">
+// CHECK:       %[[T11:.*]] = llvm.insertvalue %[[T9]], %[[T10]][1] : !llvm<"[3 x <2 x float>]">
+// CHECK:       %[[T12:.*]] = llvm.insertvalue %[[T9]], %[[T11]][2] : !llvm<"[3 x <2 x float>]">
+// CHECK:       %[[T13:.*]] = llvm.insertvalue %[[T12]], %[[T7]][1] : !llvm<"[4 x [3 x <2 x float>]]">
+// CHECK:       %[[T14:.*]] = llvm.extractvalue %[[A]][2] : !llvm<"[4 x [1 x <2 x float>]]">
+// CHECK:       %[[T15:.*]] = llvm.extractvalue %[[T14]][0] : !llvm<"[1 x <2 x float>]">
+// CHECK:       %[[T16:.*]] = llvm.insertvalue %[[T15]], %[[T1]][0] : !llvm<"[3 x <2 x float>]">
+// CHECK:       %[[T17:.*]] = llvm.insertvalue %[[T15]], %[[T16]][1] : !llvm<"[3 x <2 x float>]">
+// CHECK:       %[[T18:.*]] = llvm.insertvalue %[[T15]], %[[T17]][2] : !llvm<"[3 x <2 x float>]">
+// CHECK:       %[[T19:.*]] = llvm.insertvalue %[[T18]], %[[T13]][2] : !llvm<"[4 x [3 x <2 x float>]]">
+// CHECK:       %[[T20:.*]] = llvm.extractvalue %[[A]][3] : !llvm<"[4 x [1 x <2 x float>]]">
+// CHECK:       %[[T21:.*]] = llvm.extractvalue %[[T20]][0] : !llvm<"[1 x <2 x float>]">
+// CHECK:       %[[T22:.*]] = llvm.insertvalue %[[T21]], %[[T1]][0] : !llvm<"[3 x <2 x float>]">
+// CHECK:       %[[T23:.*]] = llvm.insertvalue %[[T21]], %[[T22]][1] : !llvm<"[3 x <2 x float>]">
+// CHECK:       %[[T24:.*]] = llvm.insertvalue %[[T21]], %[[T23]][2] : !llvm<"[3 x <2 x float>]">
+// CHECK:       %[[T25:.*]] = llvm.insertvalue %[[T24]], %[[T19]][3] : !llvm<"[4 x [3 x <2 x float>]]">
+// CHECK:       llvm.return %[[T25]] : !llvm<"[4 x [3 x <2 x float>]]">
 
 func @outerproduct(%arg0: vector<2xf32>, %arg1: vector<3xf32>) -> vector<2x3xf32> {
   %2 = vector.outerproduct %arg0, %arg1 : vector<2xf32>, vector<3xf32>
@@ -211,16 +209,16 @@ func @outerproduct(%arg0: vector<2xf32>, %arg1: vector<3xf32>) -> vector<2x3xf32
 //      CHECK: %[[T1:.*]] = llvm.mlir.constant(0 : i64) : !llvm.i64
 //      CHECK: %[[T2:.*]] = llvm.extractelement %[[A]][%[[T1]] : !llvm.i64] : !llvm<"<2 x float>">
 //      CHECK: %[[T3:.*]] = llvm.mlir.undef : !llvm<"<3 x float>">
-//      CHECK: %[[T4:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64
-//      CHECK: %[[T5:.*]] = llvm.insertelement %[[T2]], %[[T3]][%4 : !llvm.i64] : !llvm<"<3 x float>">
+//      CHECK: %[[T4:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32
+//      CHECK: %[[T5:.*]] = llvm.insertelement %[[T2]], %[[T3]][%4 : !llvm.i32] : !llvm<"<3 x float>">
 //      CHECK: %[[T6:.*]] = llvm.shufflevector %[[T5]], %[[T3]] [0 : i32, 0 : i32, 0 : i32] : !llvm<"<3 x float>">, !llvm<"<3 x float>">
 //      CHECK: %[[T7:.*]] = llvm.fmul %[[T6]], %[[B]] : !llvm<"<3 x float>">
 //      CHECK: %[[T8:.*]] = llvm.insertvalue %[[T7]], %[[T0]][0] : !llvm<"[2 x <3 x float>]">
 //      CHECK: %[[T9:.*]] = llvm.mlir.constant(1 : i64) : !llvm.i64
 //      CHECK: %[[T10:.*]] = llvm.extractelement %[[A]][%9 : !llvm.i64] : !llvm<"<2 x float>">
 //      CHECK: %[[T11:.*]] = llvm.mlir.undef : !llvm<"<3 x float>">
-//      CHECK: %[[T12:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64
-//      CHECK: %[[T13:.*]] = llvm.insertelement %[[T10]], %[[T11]][%12 : !llvm.i64] : !llvm<"<3 x float>">
+//      CHECK: %[[T12:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32
+//      CHECK: %[[T13:.*]] = llvm.insertelement %[[T10]], %[[T11]][%12 : !llvm.i32] : !llvm<"<3 x float>">
 //      CHECK: %[[T14:.*]] = llvm.shufflevector %[[T13]], %[[T11]] [0 : i32, 0 : i32, 0 : i32] : !llvm<"<3 x float>">, !llvm<"<3 x float>">
 //      CHECK: %[[T15:.*]] = llvm.fmul %[[T14]], %[[B]] : !llvm<"<3 x float>">
 //      CHECK: %[[T16:.*]] = llvm.insertvalue %[[T15]], %[[T8]][1] : !llvm<"[2 x <3 x float>]">
@@ -238,8 +236,8 @@ func @outerproduct_add(%arg0: vector<2xf32>, %arg1: vector<3xf32>, %arg2: vector
 //      CHECK: %[[T1:.*]] = llvm.mlir.constant(0 : i64) : !llvm.i64
 //      CHECK: %[[T2:.*]] = llvm.extractelement %[[A]][%[[T1]] : !llvm.i64] : !llvm<"<2 x float>">
 //      CHECK: %[[T3:.*]] = llvm.mlir.undef : !llvm<"<3 x float>">
-//      CHECK: %[[T4:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64
-//      CHECK: %[[T5:.*]] = llvm.insertelement %[[T2]], %[[T3]][%[[T4]] : !llvm.i64] : !llvm<"<3 x float>">
+//      CHECK: %[[T4:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32
+//      CHECK: %[[T5:.*]] = llvm.insertelement %[[T2]], %[[T3]][%[[T4]] : !llvm.i32] : !llvm<"<3 x float>">
 //      CHECK: %[[T6:.*]] = llvm.shufflevector %[[T5]], %[[T3]] [0 : i32, 0 : i32, 0 : i32] : !llvm<"<3 x float>">, !llvm<"<3 x float>">
 //      CHECK: %[[T7:.*]] = llvm.extractvalue %[[C]][0] : !llvm<"[2 x <3 x float>]">
 //      CHECK: %[[T8:.*]] = "llvm.intr.fma"(%[[T6]], %[[B]], %[[T7]]) : (!llvm<"<3 x float>">, !llvm<"<3 x float>">, !llvm<"<3 x float>">)
@@ -247,8 +245,8 @@ func @outerproduct_add(%arg0: vector<2xf32>, %arg1: vector<3xf32>, %arg2: vector
 //      CHECK: %[[T10:.*]] = llvm.mlir.constant(1 : i64) : !llvm.i64
 //      CHECK: %[[T11:.*]] = llvm.extractelement %[[A]][%[[T10]] : !llvm.i64] : !llvm<"<2 x float>">
 //      CHECK: %[[T12:.*]] = llvm.mlir.undef : !llvm<"<3 x float>">
-//      CHECK: %[[T13:.*]] = llvm.mlir.constant(0 : index) : !llvm.i64
-//      CHECK: %[[T14:.*]] = llvm.insertelement %[[T11]], %[[T12]][%[[T13]] : !llvm.i64] : !llvm<"<3 x float>">
+//      CHECK: %[[T13:.*]] = llvm.mlir.constant(0 : i32) : !llvm.i32
+//      CHECK: %[[T14:.*]] = llvm.insertelement %[[T11]], %[[T12]][%[[T13]] : !llvm.i32] : !llvm<"<3 x float>">
 //      CHECK: %[[T15:.*]] = llvm.shufflevector %[[T14]], %[[T12]] [0 : i32, 0 : i32, 0 : i32] : !llvm<"<3 x float>">, !llvm<"<3 x float>">
 //      CHECK: %[[T16:.*]] = llvm.extractvalue %[[C]][1] : !llvm<"[2 x <3 x float>]">
 //      CHECK: %[[T17:.*]] = "llvm.intr.fma"(%[[T15]], %[[B]], %[[T16]]) : (!llvm<"<3 x float>">, !llvm<"<3 x float>">, !llvm<"<3 x float>">)
diff --git a/mlir/test/Dialect/Vector/vector-contract-transforms.mlir b/mlir/test/Dialect/Vector/vector-contract-transforms.mlir
index 08140b4ae065..8354677b797c 100644
--- a/mlir/test/Dialect/Vector/vector-contract-transforms.mlir
+++ b/mlir/test/Dialect/Vector/vector-contract-transforms.mlir
@@ -257,11 +257,11 @@ func @full_contract2(%arg0: vector<2x3xf32>,
 // CHECK-SAME: %[[B:.*1]]: vector<3xf32>
 // CHECK:      %[[C0:.*]] = constant dense<0.000000e+00> : vector<2x3xf32>
 // CHECK:      %[[T0:.*]] = vector.extract %[[A]][0] : vector<2xf32>
-// CHECK:      %[[T1:.*]] = vector.broadcast %[[T0]] : f32 to vector<3xf32>
+// CHECK:      %[[T1:.*]] = splat %[[T0]] : vector<3xf32>
 // CHECK:      %[[T2:.*]] = mulf %[[T1]], %[[B]] : vector<3xf32>
 // CHECK:      %[[T3:.*]] = vector.insert %[[T2]], %[[C0]] [0] : vector<3xf32> into vector<2x3xf32>
 // CHECK:      %[[T4:.*]] = vector.extract %[[A]][1] : vector<2xf32>
-// CHECK:      %[[T5:.*]] = vector.broadcast %[[T4]] : f32 to vector<3xf32>
+// CHECK:      %[[T5:.*]] = splat %[[T4]] : vector<3xf32>
 // CHECK:      %[[T6:.*]] = mulf %[[T5]], %[[B]] : vector<3xf32>
 // CHECK:      %[[T7:.*]] = vector.insert %[[T6]], %[[T3]] [1] : vector<3xf32> into vector<2x3xf32>
 // CHECK:      return %[[T7]] : vector<2x3xf32>
@@ -278,12 +278,12 @@ func @outerproduct_noacc(%arg0: vector<2xf32>,
 // CHECK-SAME: %[[C:.*2]]: vector<2x3xf32>
 // CHECK:      %[[C0:.*]] = constant dense<0.000000e+00> : vector<2x3xf32>
 // CHECK:      %[[T0:.*]] = vector.extract %[[A]][0] : vector<2xf32>
-// CHECK:      %[[T1:.*]] = vector.broadcast %[[T0]] : f32 to vector<3xf32>
+// CHECK:      %[[T1:.*]] = splat %[[T0]] : vector<3xf32>
 // CHECK:      %[[T2:.*]] = vector.extract %[[C]][0] : vector<2x3xf32>
 // CHECK:      %[[T3:.*]] = vector.fma %[[T1]], %[[B]], %[[T2]] : vector<3xf32>
 // CHECK:      %[[T4:.*]] = vector.insert %[[T3]], %[[C0]] [0] : vector<3xf32> into vector<2x3xf32>
 // CHECK:      %[[T5:.*]] = vector.extract %[[A]][1] : vector<2xf32>
-// CHECK:      %[[T6:.*]] = vector.broadcast %[[T5]] : f32 to vector<3xf32>
+// CHECK:      %[[T6:.*]] = splat %[[T5]] : vector<3xf32>
 // CHECK:      %[[T7:.*]] = vector.extract %[[C]][1] : vector<2x3xf32>
 // CHECK:      %[[T8:.*]] = vector.fma %[[T6]], %[[B]], %[[T7]] : vector<3xf32>
 // CHECK:      %[[T9:.*]] = vector.insert %[[T8]], %[[T4]] [1] : vector<3xf32> into vector<2x3xf32>
@@ -389,3 +389,173 @@ func @matmul(%arg0: vector<2x4xf32>,
     : vector<2x4xf32>, vector<4x3xf32> into vector<2x3xf32>
   return %0 : vector<2x3xf32>
 }
+
+// CHECK-LABEL: func @broadcast_vec1d_from_scalar
+// CHECK-SAME: %[[A:.*0]]: f32
+// CHECK:      %[[T0:.*]] = splat %[[A]] : vector<2xf32>
+// CHECK:      return %[[T0]] : vector<2xf32>
+
+func @broadcast_vec1d_from_scalar(%arg0: f32) -> vector<2xf32> {
+  %0 = vector.broadcast %arg0 : f32 to vector<2xf32>
+  return %0 : vector<2xf32>
+}
+
+// CHECK-LABEL: func @broadcast_vec2d_from_scalar
+// CHECK-SAME: %[[A:.*0]]: f32
+// CHECK:      %[[T0:.*]] = splat %[[A]] : vector<2x3xf32>
+// CHECK:      return %[[T0]] : vector<2x3xf32>
+
+func @broadcast_vec2d_from_scalar(%arg0: f32) -> vector<2x3xf32> {
+  %0 = vector.broadcast %arg0 : f32 to vector<2x3xf32>
+  return %0 : vector<2x3xf32>
+}
+
+// CHECK-LABEL: func @broadcast_vec3d_from_scalar
+// CHECK-SAME: %[[A:.*0]]: f32
+// CHECK:      %[[T0:.*]] = splat %[[A]] : vector<2x3x4xf32>
+// CHECK:      return %[[T0]] : vector<2x3x4xf32>
+
+func @broadcast_vec3d_from_scalar(%arg0: f32) -> vector<2x3x4xf32> {
+  %0 = vector.broadcast %arg0 : f32 to vector<2x3x4xf32>
+  return %0 : vector<2x3x4xf32>
+}
+
+// CHECK-LABEL: func @broadcast_vec1d_from_vec1d
+// CHECK-SAME: %[[A:.*0]]: vector<2xf32>
+// CHECK:      return %[[A]] : vector<2xf32>
+
+func @broadcast_vec1d_from_vec1d(%arg0: vector<2xf32>) -> vector<2xf32> {
+  %0 = vector.broadcast %arg0 : vector<2xf32> to vector<2xf32>
+  return %0 : vector<2xf32>
+}
+
+// CHECK-LABEL: func @broadcast_vec2d_from_vec1d
+// CHECK-SAME: %[[A:.*0]]: vector<2xf32>
+// CHECK:      %[[C0:.*]] = constant dense<0.000000e+00> : vector<3x2xf32>
+// CHECK:      %[[T0:.*]] = vector.insert %[[A]], %[[C0]] [0] : vector<2xf32> into vector<3x2xf32>
+// CHECK:      %[[T1:.*]] = vector.insert %[[A]], %[[T0]] [1] : vector<2xf32> into vector<3x2xf32>
+// CHECK:      %[[T2:.*]] = vector.insert %[[A]], %[[T1]] [2] : vector<2xf32> into vector<3x2xf32>
+// CHECK:      return %[[T2]] : vector<3x2xf32>
+
+func @broadcast_vec2d_from_vec1d(%arg0: vector<2xf32>) -> vector<3x2xf32> {
+  %0 = vector.broadcast %arg0 : vector<2xf32> to vector<3x2xf32>
+  return %0 : vector<3x2xf32>
+}
+
+// CHECK-LABEL: func @broadcast_vec3d_from_vec1d
+// CHECK-SAME: %[[A:.*0]]: vector<2xf32>
+// CHECK:      %[[C0:.*]] = constant dense<0.000000e+00> : vector<3x2xf32>
+// CHECK:      %[[C1:.*]] = constant dense<0.000000e+00> : vector<4x3x2xf32>
+// CHECK:      %[[T0:.*]] = vector.insert %[[A]], %[[C0]] [0] : vector<2xf32> into vector<3x2xf32>
+// CHECK:      %[[T1:.*]] = vector.insert %[[A]], %[[T0]] [1] : vector<2xf32> into vector<3x2xf32>
+// CHECK:      %[[T2:.*]] = vector.insert %[[A]], %[[T1]] [2] : vector<2xf32> into vector<3x2xf32>
+// CHECK:      %[[T3:.*]] = vector.insert %[[T2]], %[[C1]] [0] : vector<3x2xf32> into vector<4x3x2xf32>
+// CHECK:      %[[T4:.*]] = vector.insert %[[T2]], %[[T3]] [1] : vector<3x2xf32> into vector<4x3x2xf32>
+// CHECK:      %[[T5:.*]] = vector.insert %[[T2]], %[[T4]] [2] : vector<3x2xf32> into vector<4x3x2xf32>
+// CHECK:      %[[T6:.*]] = vector.insert %[[T2]], %[[T5]] [3] : vector<3x2xf32> into vector<4x3x2xf32>
+// CHECK:       return %[[T6]] : vector<4x3x2xf32>
+
+func @broadcast_vec3d_from_vec1d(%arg0: vector<2xf32>) -> vector<4x3x2xf32> {
+  %0 = vector.broadcast %arg0 : vector<2xf32> to vector<4x3x2xf32>
+  return %0 : vector<4x3x2xf32>
+}
+
+// CHECK-LABEL: func @broadcast_vec3d_from_vec2d
+// CHECK-SAME: %[[A:.*0]]: vector<3x2xf32>
+// CHECK:      %[[C0:.*]] = constant dense<0.000000e+00> : vector<4x3x2xf32>
+// CHECK:      %[[T0:.*]] = vector.insert %[[A]], %[[C0]] [0] : vector<3x2xf32> into vector<4x3x2xf32>
+// CHECK:      %[[T1:.*]] = vector.insert %[[A]], %[[T0]] [1] : vector<3x2xf32> into vector<4x3x2xf32>
+// CHECK:      %[[T2:.*]] = vector.insert %[[A]], %[[T1]] [2] : vector<3x2xf32> into vector<4x3x2xf32>
+// CHECK:      %[[T3:.*]] = vector.insert %[[A]], %[[T2]] [3] : vector<3x2xf32> into vector<4x3x2xf32>
+// CHECK:      return %[[T3]] : vector<4x3x2xf32>
+
+func @broadcast_vec3d_from_vec2d(%arg0: vector<3x2xf32>) -> vector<4x3x2xf32> {
+  %0 = vector.broadcast %arg0 : vector<3x2xf32> to vector<4x3x2xf32>
+  return %0 : vector<4x3x2xf32>
+}
+
+// CHECK-LABEL: func @broadcast_stretch
+// CHECK-SAME: %[[A:.*0]]: vector<1xf32>
+// CHECK:      %[[T0:.*]] = vector.extract %[[A]][0] : vector<1xf32>
+// CHECK:      %[[T1:.*]] = splat %[[T0]] : vector<4xf32>
+// CHECK:      return %[[T1]] : vector<4xf32>
+
+func @broadcast_stretch(%arg0: vector<1xf32>) -> vector<4xf32> {
+  %0 = vector.broadcast %arg0 : vector<1xf32> to vector<4xf32>
+  return %0 : vector<4xf32>
+}
+
+// CHECK-LABEL: func @broadcast_stretch_at_start
+// CHECK-SAME: %[[A:.*0]]: vector<1x4xf32>
+// CHECK:      %[[C0:.*]] = constant dense<0.000000e+00> : vector<3x4xf32>
+// CHECK:      %[[T0:.*]] = vector.extract %[[A]][0] : vector<1x4xf32>
+// CHECK:      %[[T1:.*]] = vector.insert %[[T0]], %[[C0]] [0] : vector<4xf32> into vector<3x4xf32>
+// CHECK:      %[[T2:.*]] = vector.insert %[[T0]], %[[T1]] [1] : vector<4xf32> into vector<3x4xf32>
+// CHECK:      %[[T3:.*]] = vector.insert %[[T0]], %[[T2]] [2] : vector<4xf32> into vector<3x4xf32>
+// CHECK:      return %[[T3]] : vector<3x4xf32>
+
+func @broadcast_stretch_at_start(%arg0: vector<1x4xf32>) -> vector<3x4xf32> {
+  %0 = vector.broadcast %arg0 : vector<1x4xf32> to vector<3x4xf32>
+  return %0 : vector<3x4xf32>
+}
+
+// CHECK-LABEL: func @broadcast_stretch_at_end
+// CHECK-SAME: %[[A:.*0]]: vector<4x1xf32>
+// CHECK:      %[[C:.*]] = constant dense<0.000000e+00> : vector<4x3xf32>
+// CHECK:      %[[T0:.*]] = vector.extract %[[A]][0] : vector<4x1xf32>
+// CHECK:      %[[T1:.*]] = vector.extract %[[T0]][0] : vector<1xf32>
+// CHECK:      %[[T2:.*]] = splat %[[T1]] : vector<3xf32>
+// CHECK:      %[[T3:.*]] = vector.insert %[[T2]], %[[C0]] [0] : vector<3xf32> into vector<4x3xf32>
+// CHECK:      %[[T4:.*]] = vector.extract %[[A]][1] : vector<4x1xf32>
+// CHECK:      %[[T5:.*]] = vector.extract %[[T4]][0] : vector<1xf32>
+// CHECK:      %[[T6:.*]] = splat %[[T5]] : vector<3xf32>
+// CHECK:      %[[T7:.*]] = vector.insert %[[T6]], %[[T3]] [1] : vector<3xf32> into vector<4x3xf32>
+// CHECK:      %[[T8:.*]] = vector.extract %[[A]][2] : vector<4x1xf32>
+// CHECK:      %[[T9:.*]] = vector.extract %[[T8]][0] : vector<1xf32>
+// CHECK:      %[[T10:.*]] = splat %[[T9]] : vector<3xf32>
+// CHECK:      %[[T11:.*]] = vector.insert %[[T10]], %[[T7]] [2] : vector<3xf32> into vector<4x3xf32>
+// CHECK:      %[[T12:.*]] = vector.extract %[[A]][3] : vector<4x1xf32>
+// CHECK:      %[[T13:.*]] = vector.extract %[[T12]][0] : vector<1xf32>
+// CHECK:      %[[T14:.*]] = splat %[[T13]] : vector<3xf32>
+// CHECK:      %[[T15:.*]] = vector.insert %[[T14]], %[[T11]] [3] : vector<3xf32> into vector<4x3xf32>
+// CHECK:      return %[[T15]] : vector<4x3xf32>
+
+func @broadcast_stretch_at_end(%arg0: vector<4x1xf32>) -> vector<4x3xf32> {
+  %0 = vector.broadcast %arg0 : vector<4x1xf32> to vector<4x3xf32>
+  return %0 : vector<4x3xf32>
+}
+
+// CHECK-LABEL: func @broadcast_stretch_in_middle
+// CHECK-SAME: %[[A:.*0]]: vector<4x1x2xf32>
+// CHECK:      %[[C0:.*]] = constant dense<0.000000e+00> : vector<4x3x2xf32>
+// CHECK:      %[[C1:.*]] = constant dense<0.000000e+00> : vector<3x2xf32>
+// CHECK:      %[[T0:.*]] = vector.extract %[[A]][0] : vector<4x1x2xf32>
+// CHECK:      %[[T1:.*]] = vector.extract %[[T0]][0] : vector<1x2xf32>
+// CHECK:      %[[T2:.*]] = vector.insert %[[T1]], %[[C1]] [0] : vector<2xf32> into vector<3x2xf32>
+// CHECK:      %[[T3:.*]] = vector.insert %[[T1]], %[[T2]] [1] : vector<2xf32> into vector<3x2xf32>
+// CHECK:      %[[T4:.*]] = vector.insert %[[T1]], %[[T3]] [2] : vector<2xf32> into vector<3x2xf32>
+// CHECK:      %[[T5:.*]] = vector.insert %[[T4]], %[[C0]] [0] : vector<3x2xf32> into vector<4x3x2xf32>
+// CHECK:      %[[T6:.*]] = vector.extract %[[A]][1] : vector<4x1x2xf32>
+// CHECK:      %[[T7:.*]] = vector.extract %[[T6]][0] : vector<1x2xf32>
+// CHECK:      %[[T8:.*]] = vector.insert %[[T7]], %[[C1]] [0] : vector<2xf32> into vector<3x2xf32>
+// CHECK:      %[[T9:.*]] = vector.insert %[[T7]], %[[T8]] [1] : vector<2xf32> into vector<3x2xf32>
+// CHECK:      %[[T10:.*]] = vector.insert %[[T7]], %[[T9]] [2] : vector<2xf32> into vector<3x2xf32>
+// CHECK:      %[[T11:.*]] = vector.insert %[[T10]], %[[T5]] [1] : vector<3x2xf32> into vector<4x3x2xf32>
+// CHECK:      %[[T12:.*]] = vector.extract %[[A]][2] : vector<4x1x2xf32>
+// CHECK:      %[[T13:.*]] = vector.extract %[[T12]][0] : vector<1x2xf32>
+// CHECK:      %[[T14:.*]] = vector.insert %[[T13]], %[[C1]] [0] : vector<2xf32> into vector<3x2xf32>
+// CHECK:      %[[T15:.*]] = vector.insert %[[T13]], %[[T14]] [1] : vector<2xf32> into vector<3x2xf32>
+// CHECK:      %[[T16:.*]] = vector.insert %[[T13]], %[[T15]] [2] : vector<2xf32> into vector<3x2xf32>
+// CHECK:      %[[T17:.*]] = vector.insert %[[T16]], %[[T11]] [2] : vector<3x2xf32> into vector<4x3x2xf32>
+// CHECK:      %[[T18:.*]] = vector.extract %[[A]][3] : vector<4x1x2xf32>
+// CHECK:      %[[T19:.*]] = vector.extract %[[T18]][0] : vector<1x2xf32>
+// CHECK:      %[[T20:.*]] = vector.insert %[[T19]], %[[C1]] [0] : vector<2xf32> into vector<3x2xf32>
+// CHECK:      %[[T21:.*]] = vector.insert %[[T19]], %[[T20]] [1] : vector<2xf32> into vector<3x2xf32>
+// CHECK:      %[[T22:.*]] = vector.insert %[[T19]], %[[T21]] [2] : vector<2xf32> into vector<3x2xf32>
+// CHECK:      %[[T23:.*]] = vector.insert %[[T22]], %[[T17]] [3] : vector<3x2xf32> into vector<4x3x2xf32>
+// CHECK:      return %[[T23]] : vector<4x3x2xf32>
+
+func @broadcast_stretch_in_middle(%arg0: vector<4x1x2xf32>) -> vector<4x3x2xf32> {
+  %0 = vector.broadcast %arg0 : vector<4x1x2xf32> to vector<4x3x2xf32>
+  return %0 : vector<4x3x2xf32>
+}

From a6f19762c6d9d3ff4074b85194c9409635aa8014 Mon Sep 17 00:00:00 2001
From: Lang Hames <lhames@gmail.com>
Date: Thu, 16 Apr 2020 21:48:40 -0700
Subject: [PATCH 125/216] [llvm-objcopy][MachO] Correctly identify object file
 endianness.

The header magic value is held in the native endianness, so the method used in
cc0ec3fdb9d. Use MachOReader / MachOWriter's existing endianness tests instead.
---
 llvm/tools/llvm-objcopy/MachO/MachOReader.cpp | 2 +-
 llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp | 2 +-
 llvm/tools/llvm-objcopy/MachO/Object.h        | 5 -----
 3 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp b/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp
index e2eeee4ab5f4..6d08e3abccf5 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp
+++ b/llvm/tools/llvm-objcopy/MachO/MachOReader.cpp
@@ -206,7 +206,7 @@ void MachOReader::setSymbolInRelocationInfo(Object &O) const {
       for (auto &Reloc : Sec->Relocations)
         if (!Reloc.Scattered)
           Reloc.Symbol = O.SymTable.getSymbolByIndex(
-              Reloc.getPlainRelocationSymbolNum(O.isLittleEndian()));
+              Reloc.getPlainRelocationSymbolNum(MachOObj.isLittleEndian()));
 }
 
 void MachOReader::readRebaseInfo(Object &O) const {
diff --git a/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp b/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp
index a08d54d59723..db31ef518aa8 100644
--- a/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp
+++ b/llvm/tools/llvm-objcopy/MachO/MachOWriter.cpp
@@ -242,7 +242,7 @@ void MachOWriter::writeSections() {
         auto RelocInfo = Sec->Relocations[Index];
         if (!RelocInfo.Scattered)
           RelocInfo.setPlainRelocationSymbolNum(RelocInfo.Symbol->Index,
-                                                O.isLittleEndian());
+                                                IsLittleEndian);
 
         if (IsLittleEndian != sys::IsLittleEndianHost)
           MachO::swapStruct(
diff --git a/llvm/tools/llvm-objcopy/MachO/Object.h b/llvm/tools/llvm-objcopy/MachO/Object.h
index c834105a00aa..d0d4554d7560 100644
--- a/llvm/tools/llvm-objcopy/MachO/Object.h
+++ b/llvm/tools/llvm-objcopy/MachO/Object.h
@@ -314,11 +314,6 @@ struct Object {
   /// is not too long (SegName.size() should be less than or equal to 16).
   LoadCommand &addSegment(StringRef SegName);
 
-  bool isLittleEndian() const {
-    StringRef Magic(reinterpret_cast<const char *>(&Header.Magic), 4);
-    return Magic == "\xCE\xFA\xED\xFE" || Magic == "\xCF\xFA\xED\xFE";
-  }
-
   bool is64Bit() const {
     return Header.Magic == MachO::MH_MAGIC_64 ||
            Header.Magic == MachO::MH_CIGAM_64;

From 5034df860038822e640234fde994783c3f26d6f6 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@gmail.com>
Date: Thu, 16 Apr 2020 21:03:39 -0700
Subject: [PATCH 126/216] [SampleProfile] Use CallBase in function arguments
 and data structures to reduce the number of explicit casts. NFCI

Removing CallSite left us with a bunch of explicit casts from
Instruction to CallBase. This moves the casts earlier so that
function arguments and data structure types are CallBase so
we don't have to cast when we use them.

Differential Revision: https://reviews.llvm.org/D78246
---
 llvm/lib/Transforms/IPO/SampleProfile.cpp | 97 +++++++++++------------
 1 file changed, 45 insertions(+), 52 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 968860583735..489976a0cb8e 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -329,18 +329,19 @@ class SampleProfileLoader {
   bool emitAnnotations(Function &F);
   ErrorOr<uint64_t> getInstWeight(const Instruction &I);
   ErrorOr<uint64_t> getBlockWeight(const BasicBlock *BB);
-  const FunctionSamples *findCalleeFunctionSamples(const Instruction &I) const;
+  const FunctionSamples *findCalleeFunctionSamples(const CallBase &I) const;
   std::vector<const FunctionSamples *>
   findIndirectCallFunctionSamples(const Instruction &I, uint64_t &Sum) const;
   mutable DenseMap<const DILocation *, const FunctionSamples *> DILocation2SampleMap;
   const FunctionSamples *findFunctionSamples(const Instruction &I) const;
-  bool inlineCallInstruction(Instruction *I);
+  bool inlineCallInstruction(CallBase &CB);
   bool inlineHotFunctions(Function &F,
                           DenseSet<GlobalValue::GUID> &InlinedGUIDs);
   // Inline cold/small functions in addition to hot ones
-  bool shouldInlineColdCallee(Instruction &CallInst);
+  bool shouldInlineColdCallee(CallBase &CallInst);
   void emitOptimizationRemarksForInlineCandidates(
-    const SmallVector<Instruction *, 10> &Candidates, const Function &F, bool Hot);
+      const SmallVectorImpl<CallBase *> &Candidates, const Function &F,
+      bool Hot);
   void printEdgeWeight(raw_ostream &OS, Edge E);
   void printBlockWeight(raw_ostream &OS, const BasicBlock *BB) const;
   void printBlockEquivalence(raw_ostream &OS, const BasicBlock *BB);
@@ -718,9 +719,9 @@ ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
   // (findCalleeFunctionSamples returns non-empty result), but not inlined here,
   // it means that the inlined callsite has no sample, thus the call
   // instruction should have 0 count.
-  if ((isa<CallInst>(Inst) || isa<InvokeInst>(Inst)) &&
-      !cast<CallBase>(Inst).isIndirectCall() && findCalleeFunctionSamples(Inst))
-    return 0;
+  if (auto *CB = dyn_cast<CallBase>(&Inst))
+    if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
+      return 0;
 
   const DILocation *DIL = DLoc;
   uint32_t LineOffset = FunctionSamples::getOffset(DIL);
@@ -808,7 +809,7 @@ bool SampleProfileLoader::computeBlockWeights(Function &F) {
 ///
 /// \returns The FunctionSamples pointer to the inlined instance.
 const FunctionSamples *
-SampleProfileLoader::findCalleeFunctionSamples(const Instruction &Inst) const {
+SampleProfileLoader::findCalleeFunctionSamples(const CallBase &Inst) const {
   const DILocation *DIL = Inst.getDebugLoc();
   if (!DIL) {
     return nullptr;
@@ -892,15 +893,11 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
   return it.first->second;
 }
 
-// FIXME(CallSite): Parameter should be CallBase&, as it's assumed to be that,
-// and non-null.
-bool SampleProfileLoader::inlineCallInstruction(Instruction *I) {
-  assert(isa<CallInst>(I) || isa<InvokeInst>(I));
-  CallBase &CS = *cast<CallBase>(I);
-  Function *CalledFunction = CS.getCalledFunction();
+bool SampleProfileLoader::inlineCallInstruction(CallBase &CB) {
+  Function *CalledFunction = CB.getCalledFunction();
   assert(CalledFunction);
-  DebugLoc DLoc = I->getDebugLoc();
-  BasicBlock *BB = I->getParent();
+  DebugLoc DLoc = CB.getDebugLoc();
+  BasicBlock *BB = CB.getParent();
   InlineParams Params = getInlineParams();
   Params.ComputeFullInlineCost = true;
   // Checks if there is anything in the reachable portion of the callee at
@@ -909,16 +906,15 @@ bool SampleProfileLoader::inlineCallInstruction(Instruction *I) {
   // when cost exceeds threshold without checking all IRs in the callee.
   // The acutal cost does not matter because we only checks isNever() to
   // see if it is legal to inline the callsite.
-  InlineCost Cost =
-      getInlineCost(cast<CallBase>(*I), Params, GetTTI(*CalledFunction), GetAC,
-                    None, GetTLI, nullptr, nullptr);
+  InlineCost Cost = getInlineCost(CB, Params, GetTTI(*CalledFunction), GetAC,
+                                  None, GetTLI, nullptr, nullptr);
   if (Cost.isNever()) {
     ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineFail", DLoc, BB)
               << "incompatible inlining");
     return false;
   }
   InlineFunctionInfo IFI(nullptr, &GetAC);
-  if (InlineFunction(CS, IFI).isSuccess()) {
+  if (InlineFunction(CB, IFI).isSuccess()) {
     // The call to InlineFunction erases I, so we can't pass it here.
     ORE->emit(OptimizationRemark(CSINLINE_DEBUG, "InlineSuccess", DLoc, BB)
               << "inlined callee '" << ore::NV("Callee", CalledFunction)
@@ -928,26 +924,25 @@ bool SampleProfileLoader::inlineCallInstruction(Instruction *I) {
   return false;
 }
 
-bool SampleProfileLoader::shouldInlineColdCallee(Instruction &CallInst) {
+bool SampleProfileLoader::shouldInlineColdCallee(CallBase &CallInst) {
   if (!ProfileSizeInline)
     return false;
 
-  Function *Callee = cast<CallBase>(CallInst).getCalledFunction();
+  Function *Callee = CallInst.getCalledFunction();
   if (Callee == nullptr)
     return false;
 
-  InlineCost Cost =
-      getInlineCost(cast<CallBase>(CallInst), getInlineParams(),
-                    GetTTI(*Callee), GetAC, None, GetTLI, nullptr, nullptr);
+  InlineCost Cost = getInlineCost(CallInst, getInlineParams(), GetTTI(*Callee),
+                                  GetAC, None, GetTLI, nullptr, nullptr);
 
   return Cost.getCost() <= SampleColdCallSiteThreshold;
 }
 
 void SampleProfileLoader::emitOptimizationRemarksForInlineCandidates(
-    const SmallVector<Instruction *, 10> &Candidates, const Function &F,
+    const SmallVectorImpl<CallBase *> &Candidates, const Function &F,
     bool Hot) {
   for (auto I : Candidates) {
-    Function *CalledFunction = cast<CallBase>(I)->getCalledFunction();
+    Function *CalledFunction = I->getCalledFunction();
     if (CalledFunction) {
       ORE->emit(OptimizationRemarkAnalysis(CSINLINE_DEBUG, "InlineAttempt", 
                                            I->getDebugLoc(), I->getParent())
@@ -984,45 +979,43 @@ bool SampleProfileLoader::inlineHotFunctions(
          "ProfAccForSymsInList should be false when profile-sample-accurate "
          "is enabled");
 
-  // FIXME(CallSite): refactor the vectors here, as they operate with CallBase
-  // values
-  DenseMap<Instruction *, const FunctionSamples *> localNotInlinedCallSites;
+  DenseMap<CallBase *, const FunctionSamples *> localNotInlinedCallSites;
   bool Changed = false;
   while (true) {
     bool LocalChanged = false;
-    SmallVector<Instruction *, 10> CIS;
+    SmallVector<CallBase *, 10> CIS;
     for (auto &BB : F) {
       bool Hot = false;
-      SmallVector<Instruction *, 10> AllCandidates;
-      SmallVector<Instruction *, 10> ColdCandidates;
+      SmallVector<CallBase *, 10> AllCandidates;
+      SmallVector<CallBase *, 10> ColdCandidates;
       for (auto &I : BB.getInstList()) {
         const FunctionSamples *FS = nullptr;
-        if ((isa<CallInst>(I) || isa<InvokeInst>(I)) &&
-            !isa<IntrinsicInst>(I) && (FS = findCalleeFunctionSamples(I))) {
-          AllCandidates.push_back(&I);
-          if (FS->getEntrySamples() > 0)
-            localNotInlinedCallSites.try_emplace(&I, FS);
-          if (callsiteIsHot(FS, PSI))
-            Hot = true;
-          else if (shouldInlineColdCallee(I))
-            ColdCandidates.push_back(&I);
+        if (auto *CB = dyn_cast<CallBase>(&I)) {
+          if (!isa<IntrinsicInst>(I) && (FS = findCalleeFunctionSamples(*CB))) {
+            AllCandidates.push_back(CB);
+            if (FS->getEntrySamples() > 0)
+              localNotInlinedCallSites.try_emplace(CB, FS);
+            if (callsiteIsHot(FS, PSI))
+              Hot = true;
+            else if (shouldInlineColdCallee(*CB))
+              ColdCandidates.push_back(CB);
+          }
         }
       }
       if (Hot) {
         CIS.insert(CIS.begin(), AllCandidates.begin(), AllCandidates.end());
         emitOptimizationRemarksForInlineCandidates(AllCandidates, F, true);
-      }
-      else {
+      } else {
         CIS.insert(CIS.begin(), ColdCandidates.begin(), ColdCandidates.end());
         emitOptimizationRemarksForInlineCandidates(ColdCandidates, F, false);
       }
     }
-    for (auto I : CIS) {
-      Function *CalledFunction = cast<CallBase>(I)->getCalledFunction();
+    for (CallBase *I : CIS) {
+      Function *CalledFunction = I->getCalledFunction();
       // Do not inline recursive calls.
       if (CalledFunction == &F)
         continue;
-      if (cast<CallBase>(I)->isIndirectCall()) {
+      if (I->isIndirectCall()) {
         if (PromotedInsns.count(I))
           continue;
         uint64_t Sum;
@@ -1049,7 +1042,7 @@ bool SampleProfileLoader::inlineHotFunctions(
           if (R != SymbolMap.end() && R->getValue() &&
               !R->getValue()->isDeclaration() &&
               R->getValue()->getSubprogram() &&
-              isLegalToPromote(*cast<CallBase>(I), R->getValue(), &Reason)) {
+              isLegalToPromote(*I, R->getValue(), &Reason)) {
             uint64_t C = FS->getEntrySamples();
             Instruction *DI =
                 pgo::promoteIndirectCall(I, R->getValue(), C, Sum, false, ORE);
@@ -1057,7 +1050,7 @@ bool SampleProfileLoader::inlineHotFunctions(
             PromotedInsns.insert(I);
             // If profile mismatches, we should not attempt to inline DI.
             if ((isa<CallInst>(DI) || isa<InvokeInst>(DI)) &&
-                inlineCallInstruction(DI)) {
+                inlineCallInstruction(*cast<CallBase>(DI))) {
               localNotInlinedCallSites.erase(I);
               LocalChanged = true;
               ++NumCSInlined;
@@ -1070,7 +1063,7 @@ bool SampleProfileLoader::inlineHotFunctions(
         }
       } else if (CalledFunction && CalledFunction->getSubprogram() &&
                  !CalledFunction->isDeclaration()) {
-        if (inlineCallInstruction(I)) {
+        if (inlineCallInstruction(*I)) {
           localNotInlinedCallSites.erase(I);
           LocalChanged = true;
           ++NumCSInlined;
@@ -1089,8 +1082,8 @@ bool SampleProfileLoader::inlineHotFunctions(
 
   // Accumulate not inlined callsite information into notInlinedSamples
   for (const auto &Pair : localNotInlinedCallSites) {
-    Instruction *I = Pair.getFirst();
-    Function *Callee = cast<CallBase>(I)->getCalledFunction();
+    CallBase *I = Pair.getFirst();
+    Function *Callee = I->getCalledFunction();
     if (!Callee || Callee->isDeclaration())
       continue;
 

From 4bd186c0ff76063de9de5507e1e512f7ab4372f1 Mon Sep 17 00:00:00 2001
From: QingShan Zhang <qshanz@cn.ibm.com>
Date: Fri, 17 Apr 2020 05:24:00 +0000
Subject: [PATCH 127/216] [PowerPC] Exploit the rldicl + rldicl when and with
 mask

If we are and the constant like 0xFFFFFFC00000, for now, we are using several
instructions to generate this 48bit constant and final an "and". However, we
could exploit it with two rotate instructions.

       MB          ME               MB+63-ME
+----------------------+     +----------------------+
|0000001111111111111000| ->  |0000000001111111111111|
+----------------------+     +----------------------+
 0                    63      0                    63
Rotate left ME + 1 bit first, and then, mask it with (MB + 63 - ME, 63),
finally, rotate back. Notice that, we need to round it with 64 bit for the
wrapping case.

Reviewed by: ChenZheng, Nemanjai

Differential Revision: https://reviews.llvm.org/D71831
---
 llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp   | 57 ++++++++++++++++++-
 .../CodeGen/PowerPC/2016-04-17-combine.ll     |  4 +-
 .../test/CodeGen/PowerPC/Frames-dyn-alloca.ll | 18 +++---
 llvm/test/CodeGen/PowerPC/and-mask.ll         | 29 ++++------
 llvm/test/CodeGen/PowerPC/cmpb.ll             | 16 ++----
 llvm/test/CodeGen/PowerPC/setcc-logic.ll      |  4 +-
 6 files changed, 83 insertions(+), 45 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index a3e3b128355b..dd6d9249ab4d 100644
--- a/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -351,6 +351,7 @@ namespace {
     bool tryAsSingleRLWINM(SDNode *N);
     bool tryAsSingleRLWINM8(SDNode *N);
     bool tryAsSingleRLWIMI(SDNode *N);
+    bool tryAsPairOfRLDICL(SDNode *N);
 
     void PeepholePPC64();
     void PeepholePPC64ZExt();
@@ -4439,6 +4440,60 @@ bool PPCDAGToDAGISel::tryAsSingleRLWINM8(SDNode *N) {
   return false;
 }
 
+bool PPCDAGToDAGISel::tryAsPairOfRLDICL(SDNode *N) {
+  assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
+  uint64_t Imm64;
+  if (!isInt64Immediate(N->getOperand(1).getNode(), Imm64))
+    return false;
+
+  // Do nothing if it is 16-bit imm as the pattern in the .td file handle
+  // it well with "andi.".
+  if (isUInt<16>(Imm64))
+    return false;
+
+  SDLoc Loc(N);
+  SDValue Val = N->getOperand(0);
+
+  // Optimized with two rldicl's as follows:
+  // Add missing bits on left to the mask and check that the mask is a
+  // wrapped run of ones, i.e.
+  // Change pattern |0001111100000011111111|
+  //             to |1111111100000011111111|.
+  unsigned NumOfLeadingZeros = countLeadingZeros(Imm64);
+  if (NumOfLeadingZeros != 0)
+    Imm64 |= maskLeadingOnes<uint64_t>(NumOfLeadingZeros);
+
+  unsigned MB, ME;
+  if (!isRunOfOnes64(Imm64, MB, ME))
+    return false;
+
+  //         ME     MB                   MB-ME+63
+  // +----------------------+     +----------------------+
+  // |1111111100000011111111| ->  |0000001111111111111111|
+  // +----------------------+     +----------------------+
+  //  0                    63      0                    63
+  // There are ME + 1 ones on the left and (MB - ME + 63) & 63 zeros in between.
+  unsigned OnesOnLeft = ME + 1;
+  unsigned ZerosInBetween = (MB - ME + 63) & 63;
+  // Rotate left by OnesOnLeft (so leading ones are now trailing ones) and clear
+  // on the left the bits that are already zeros in the mask.
+  Val = SDValue(CurDAG->getMachineNode(PPC::RLDICL, Loc, MVT::i64, Val,
+                                       getI64Imm(OnesOnLeft, Loc),
+                                       getI64Imm(ZerosInBetween, Loc)),
+                0);
+  //        MB-ME+63                      ME     MB
+  // +----------------------+     +----------------------+
+  // |0000001111111111111111| ->  |0001111100000011111111|
+  // +----------------------+     +----------------------+
+  //  0                    63      0                    63
+  // Rotate back by 64 - OnesOnLeft to undo previous rotate. Then clear on the
+  // left the number of ones we previously added.
+  SDValue Ops[] = {Val, getI64Imm(64 - OnesOnLeft, Loc),
+                   getI64Imm(NumOfLeadingZeros, Loc)};
+  CurDAG->SelectNodeTo(N, PPC::RLDICL, MVT::i64, Ops);
+  return true;
+}
+
 bool PPCDAGToDAGISel::tryAsSingleRLWIMI(SDNode *N) {
   assert(N->getOpcode() == ISD::AND && "ISD::AND SDNode expected");
   unsigned Imm;
@@ -4766,7 +4821,7 @@ void PPCDAGToDAGISel::Select(SDNode *N) {
   case ISD::AND:
     // If this is an 'and' with a mask, try to emit rlwinm/rldicl/rldicr
     if (tryAsSingleRLWINM(N) || tryAsSingleRLWIMI(N) || tryAsSingleRLDICL(N) ||
-        tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N))
+        tryAsSingleRLDICR(N) || tryAsSingleRLWINM8(N) || tryAsPairOfRLDICL(N))
       return;
 
     // Other cases are autogenerated.
diff --git a/llvm/test/CodeGen/PowerPC/2016-04-17-combine.ll b/llvm/test/CodeGen/PowerPC/2016-04-17-combine.ll
index 271dcd1402a0..944bf0f29c77 100644
--- a/llvm/test/CodeGen/PowerPC/2016-04-17-combine.ll
+++ b/llvm/test/CodeGen/PowerPC/2016-04-17-combine.ll
@@ -7,8 +7,8 @@ target triple = "powerpc64le-unknown-linux-gnu"
 %typ = type { i32, i32 }
 
 ; On release builds, it doesn't crash, spewing nonsense instead.
-; To make sure it works, check that and is still alive.
-; CHECK: and
+; To make sure it works, check that rldicl is still alive.
+; CHECK: rldicl
 ; Also, in release, it emits a COPY from a 32-bit register to
 ; a 64-bit register, which happens to be emitted as cror [!]
 ; by the confused CodeGen.  Just to be sure, check there isn't one.
diff --git a/llvm/test/CodeGen/PowerPC/Frames-dyn-alloca.ll b/llvm/test/CodeGen/PowerPC/Frames-dyn-alloca.ll
index a1773157d42b..8bfd305758dc 100644
--- a/llvm/test/CodeGen/PowerPC/Frames-dyn-alloca.ll
+++ b/llvm/test/CodeGen/PowerPC/Frames-dyn-alloca.ll
@@ -43,15 +43,13 @@ define i32* @f1(i32 %n) nounwind {
 ; PPC64-LINUX-LABEL: f1
 ; PPC64-LINUX:      std 31, -8(1)
 ; PPC64-LINUX-NEXT: stdu 1, -64(1)
-; PPC64-LINUX-NEXT: lis 4, 32767
 ; PPC64-LINUX-NEXT: rldic 3, 3, 2, 30
-; PPC64-LINUX-NEXT: ori 4, 4, 65535
-; PPC64-LINUX-NEXT: addi 3, 3, 15
-; PPC64-LINUX-NEXT: sldi 4, 4, 4
 ; PPC64-LINUX-NEXT: mr 31, 1
-; PPC64-LINUX-NEXT: and 3, 3, 4
-; PPC64-LINUX-NEXT: neg 3, 3
+; PPC64-LINUX-NEXT: addi 3, 3, 15
+; PPC64-LINUX-NEXT: rldicl 3, 3, 60, 4
 ; PPC64-LINUX-NEXT: addi 4, 31, 64
+; PPC64-LINUX-NEXT: rldicl 3, 3, 4, 29
+; PPC64-LINUX-NEXT: neg 3, 3
 ; PPC64-LINUX-NEXT: stdux 4, 1, 3
 
 ; The linkage area is always put on the top of the stack.
@@ -82,14 +80,12 @@ define i32* @f1(i32 %n) nounwind {
 ; PPC64-AIX-LABEL: f1
 ; PPC64-AIX:      std 31, -8(1)
 ; PPC64-AIX-NEXT: stdu 1, -64(1)
-; PPC64-AIX-NEXT: lis 4, 32767
 ; PPC64-AIX-NEXT: rldic 3, 3, 2, 30
-; PPC64-AIX-NEXT: ori 4, 4, 65535
-; PPC64-AIX-NEXT: addi 3, 3, 15
-; PPC64-AIX-NEXT: sldi 4, 4, 4
 ; PPC64-AIX-NEXT: mr 31, 1
-; PPC64-AIX-NEXT: and 3, 3, 4
+; PPC64-AIX-NEXT: addi 3, 3, 15
 ; PPC64-AIX-NEXT: addi 4, 31, 64
+; PPC64-AIX-NEXT: rldicl 3, 3, 60, 4 
+; PPC64-AIX-NEXT: rldicl 3, 3, 4, 29
 ; PPC64-AIX-NEXT: neg 3, 3
 ; PPC64-AIX-NEXT: stdux 4, 1, 3
 
diff --git a/llvm/test/CodeGen/PowerPC/and-mask.ll b/llvm/test/CodeGen/PowerPC/and-mask.ll
index 89f568196327..489880b29e67 100644
--- a/llvm/test/CodeGen/PowerPC/and-mask.ll
+++ b/llvm/test/CodeGen/PowerPC/and-mask.ll
@@ -15,8 +15,8 @@ define i32 @test1(i32 %a) {
 define i64 @test2(i64 %a) {
 ; CHECK-LABEL: test2:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li 4, -7
-; CHECK-NEXT:    and 3, 3, 4
+; CHECK-NEXT:    rldicl 3, 3, 61, 2
+; CHECK-NEXT:    rotldi 3, 3, 3
 ; CHECK-NEXT:    blr
   %and = and i64 %a, -7
   ret i64 %and
@@ -26,10 +26,8 @@ define i64 @test2(i64 %a) {
 define i64 @test3(i64 %a) {
 ; CHECK-LABEL: test3:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    lis 4, 1023
-; CHECK-NEXT:    ori 4, 4, 65535
-; CHECK-NEXT:    sldi 4, 4, 22
-; CHECK-NEXT:    and 3, 3, 4
+; CHECK-NEXT:    rldicl 3, 3, 42, 22
+; CHECK-NEXT:    rldicl 3, 3, 22, 16
 ; CHECK-NEXT:    blr
   %and = and i64 %a, 281474972516352
   ret i64 %and
@@ -39,10 +37,8 @@ define i64 @test3(i64 %a) {
 define i64 @test4(i64 %a) {
 ; CHECK-LABEL: test4:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li 4, 12
-; CHECK-NEXT:    sldi 4, 4, 32
-; CHECK-NEXT:    ori 4, 4, 255
-; CHECK-NEXT:    and 3, 3, 4
+; CHECK-NEXT:    rldicl 3, 3, 30, 26
+; CHECK-NEXT:    rldicl 3, 3, 34, 28
 ; CHECK-NEXT:    blr
   %and = and i64 %a, 51539607807
   ret i64 %and
@@ -52,10 +48,8 @@ define i64 @test4(i64 %a) {
 define i64 @test5(i64 %a) {
 ; CHECK-LABEL: test5:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li 4, 0
-; CHECK-NEXT:    oris 4, 4, 65472
-; CHECK-NEXT:    ori 4, 4, 65535
-; CHECK-NEXT:    and 3, 3, 4
+; CHECK-NEXT:    rldicl 3, 3, 42, 6
+; CHECK-NEXT:    rldicl 3, 3, 22, 32
 ; CHECK-NEXT:    blr
   %and = and i64 %a, 4290838527
   ret i64 %and
@@ -77,11 +71,8 @@ define i64 @test6(i64 %a) {
 define i64 @test7(i64 %a) {
 ; CHECK-LABEL: test7:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li 4, -32767
-; CHECK-NEXT:    sldi 4, 4, 32
-; CHECK-NEXT:    oris 4, 4, 65024
-; CHECK-NEXT:    rldicr 4, 4, 17, 63
-; CHECK-NEXT:    and 3, 3, 4
+; CHECK-NEXT:    rldicl 3, 3, 22, 25
+; CHECK-NEXT:    rldicl 3, 3, 42, 14
 ; CHECK-NEXT:    blr
   %and = and i64 %a, 1121501860462591
   ret i64 %and
diff --git a/llvm/test/CodeGen/PowerPC/cmpb.ll b/llvm/test/CodeGen/PowerPC/cmpb.ll
index dc70af317a29..e7f5579e0a45 100644
--- a/llvm/test/CodeGen/PowerPC/cmpb.ll
+++ b/llvm/test/CodeGen/PowerPC/cmpb.ll
@@ -123,11 +123,9 @@ entry:
   ret i32 %or55
 
 ; CHECK-LABEL: @test32p1
-; CHECK: li [[REG1:[0-9]+]], 0
-; CHECK: cmpb [[REG4:[0-9]+]], 4, 3
-; CHECK: oris [[REG2:[0-9]+]], [[REG1]], 65287
-; CHECK: ori [[REG3:[0-9]+]], [[REG2]], 65535
-; CHECK: and 3, [[REG4]], [[REG3]]
+; CHECK: cmpb [[REG1:[0-9]+]], 4, 3
+; CHECK: rldicl [[REG2:[0-9]+]], [[REG1]], 40, 5 
+; CHECK: rldicl 3, [[REG2]], 24, 32  
 ; CHECK: blr
 }
 
@@ -147,11 +145,9 @@ entry:
   ret i32 %or37
 
 ; CHECK-LABEL: @test32p2
-; CHECK: li [[REG1:[0-9]+]], 0
-; CHECK: cmpb [[REG4:[0-9]+]], 4, 3
-; CHECK: oris [[REG2:[0-9]+]], [[REG1]], 65280
-; CHECK: ori [[REG3:[0-9]+]], [[REG2]], 65535
-; CHECK: and 3, [[REG4]], [[REG3]]
+; CHECK: cmpb [[REG1:[0-9]+]], 4, 3
+; CHECK: rldicl [[REG2:[0-9]+]], [[REG1]], 40, 8 
+; CHECK: rldicl 3, [[REG2]], 24, 32 
 ; CHECK: blr
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/setcc-logic.ll b/llvm/test/CodeGen/PowerPC/setcc-logic.ll
index 2e23611bea0a..3bed3ba9ce8f 100644
--- a/llvm/test/CodeGen/PowerPC/setcc-logic.ll
+++ b/llvm/test/CodeGen/PowerPC/setcc-logic.ll
@@ -481,9 +481,9 @@ define <4 x i1> @and_eq_vec(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32>
 define i1 @or_icmps_const_1bit_diff(i64 %x) {
 ; CHECK-LABEL: or_icmps_const_1bit_diff:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    li 4, -5
 ; CHECK-NEXT:    addi 3, 3, -13
-; CHECK-NEXT:    and 3, 3, 4
+; CHECK-NEXT:    rldicl 3, 3, 61, 1
+; CHECK-NEXT:    rotldi 3, 3, 3
 ; CHECK-NEXT:    cntlzd 3, 3
 ; CHECK-NEXT:    rldicl 3, 3, 58, 63
 ; CHECK-NEXT:    blr

From 3017580c7961397f96e9481abf82bbf874bb2633 Mon Sep 17 00:00:00 2001
From: Shengchen Kan <shengchen.kan@intel.com>
Date: Fri, 17 Apr 2020 11:17:38 +0800
Subject: [PATCH 128/216] [X86][MC][NFC] Reduce the parameters of functions in
 X86MCCodeEmitter(Part II)

Summary:
We determine the REX prefix used by instruction in `determineREXPrefix`,
and this value is used in `emitMemModRMByte' and used as the return
value of `emitOpcodePrefix`.

Before this patch, REX was passed as reference to `emitPrefixImpl`, it
is strange and not necessary, e.g, we have to write
```
bool Rex = false;
emitPrefixImpl(CurOp, CurByte, Rex, MI, STI, OS);
```
in `emitPrefix` even if `Rex` will not be used.

So we let HasREX be the return value of `emitPrefixImpl`. The HasREX is passed
from `emitREXPrefix` to  `emitOpcodePrefix` and then to
`emitPrefixImpl`. This makes sense since REX is a kind of opcode prefix
and of course is a prefix.

Reviewers: craig.topper, pengfei

Reviewed By: craig.topper

Subscribers: annita.zhang, craig.topper, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D78276
---
 .../X86/MCTargetDesc/X86MCCodeEmitter.cpp     | 93 ++++++++++---------
 1 file changed, 50 insertions(+), 43 deletions(-)

diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 0176212fff5d..9655f469ac5a 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -76,13 +76,12 @@ class X86MCCodeEmitter : public MCCodeEmitter {
                    unsigned &CurByte, raw_ostream &OS) const;
 
   void emitMemModRMByte(const MCInst &MI, unsigned Op, unsigned RegOpcodeField,
-                        uint64_t TSFlags, bool Rex, unsigned &CurByte,
+                        uint64_t TSFlags, bool HasREX, unsigned &CurByte,
                         raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups,
                         const MCSubtargetInfo &STI) const;
 
-  void emitPrefixImpl(unsigned &CurOp, unsigned &CurByte, bool &Rex,
-                      const MCInst &MI, const MCSubtargetInfo &STI,
-                      raw_ostream &OS) const;
+  bool emitPrefixImpl(unsigned &CurOp, unsigned &CurByte, const MCInst &MI,
+                      const MCSubtargetInfo &STI, raw_ostream &OS) const;
 
   void emitVEXOpcodePrefix(unsigned &CurByte, int MemOperand, const MCInst &MI,
                            raw_ostream &OS) const;
@@ -93,7 +92,8 @@ class X86MCCodeEmitter : public MCCodeEmitter {
   bool emitOpcodePrefix(unsigned &CurByte, int MemOperand, const MCInst &MI,
                         const MCSubtargetInfo &STI, raw_ostream &OS) const;
 
-  uint8_t determineREXPrefix(const MCInst &MI, int MemOperand) const;
+  bool emitREXPrefix(unsigned &CurByte, int MemOperand, const MCInst &MI,
+                     raw_ostream &OS) const;
 };
 
 } // end anonymous namespace
@@ -384,7 +384,7 @@ void X86MCCodeEmitter::emitSIBByte(unsigned SS, unsigned Index, unsigned Base,
 
 void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
                                         unsigned RegOpcodeField,
-                                        uint64_t TSFlags, bool Rex,
+                                        uint64_t TSFlags, bool HasREX,
                                         unsigned &CurByte, raw_ostream &OS,
                                         SmallVectorImpl<MCFixup> &Fixups,
                                         const MCSubtargetInfo &STI) const {
@@ -412,7 +412,7 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
       default:
         return X86::reloc_riprel_4byte;
       case X86::MOV64rm:
-        assert(Rex);
+        assert(HasREX);
         return X86::reloc_riprel_4byte_movq_load;
       case X86::CALL64m:
       case X86::JMP64m:
@@ -426,8 +426,8 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
       case X86::SBB64rm:
       case X86::SUB64rm:
       case X86::XOR64rm:
-        return Rex ? X86::reloc_riprel_4byte_relax_rex
-                   : X86::reloc_riprel_4byte_relax;
+        return HasREX ? X86::reloc_riprel_4byte_relax_rex
+                      : X86::reloc_riprel_4byte_relax;
       }
     }();
 
@@ -649,8 +649,11 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
                   CurByte, OS, Fixups);
 }
 
-void X86MCCodeEmitter::emitPrefixImpl(unsigned &CurOp, unsigned &CurByte,
-                                      bool &Rex, const MCInst &MI,
+/// Emit all instruction prefixes.
+///
+/// \returns true if REX prefix is used, otherwise returns false.
+bool X86MCCodeEmitter::emitPrefixImpl(unsigned &CurOp, unsigned &CurByte,
+                                      const MCInst &MI,
                                       const MCSubtargetInfo &STI,
                                       raw_ostream &OS) const {
   uint64_t TSFlags = MCII.get(MI.getOpcode()).TSFlags;
@@ -696,10 +699,11 @@ void X86MCCodeEmitter::emitPrefixImpl(unsigned &CurOp, unsigned &CurByte,
 
   // Encoding type for this instruction.
   uint64_t Encoding = TSFlags & X86II::EncodingMask;
-  if (Encoding == 0)
-    Rex = emitOpcodePrefix(CurByte, MemoryOperand, MI, STI, OS);
-  else
+  bool HasREX = false;
+  if (Encoding)
     emitVEXOpcodePrefix(CurByte, MemoryOperand, MI, OS);
+  else
+    HasREX = emitOpcodePrefix(CurByte, MemoryOperand, MI, STI, OS);
 
   uint64_t Form = TSFlags & X86II::FormMask;
   switch (Form) {
@@ -748,6 +752,8 @@ void X86MCCodeEmitter::emitPrefixImpl(unsigned &CurOp, unsigned &CurByte,
     break;
   }
   }
+
+  return HasREX;
 }
 
 /// AVX instructions are encoded using a opcode prefix called VEX.
@@ -1181,11 +1187,14 @@ void X86MCCodeEmitter::emitVEXOpcodePrefix(unsigned &CurByte, int MemOperand,
   }
 }
 
-/// Determine if the MCInst has to be encoded with a X86-64 REX prefix which
-/// specifies 1) 64-bit instructions, 2) non-default operand size, and 3) use
-/// of X86-64 extended registers.
-uint8_t X86MCCodeEmitter::determineREXPrefix(const MCInst &MI,
-                                             int MemOperand) const {
+/// Emit REX prefix which specifies
+///   1) 64-bit instructions,
+///   2) non-default operand size, and
+///   3) use of X86-64 extended registers.
+///
+/// \returns true if REX prefix is used, otherwise returns false.
+bool X86MCCodeEmitter::emitREXPrefix(unsigned &CurByte, int MemOperand,
+                                     const MCInst &MI, raw_ostream &OS) const {
   uint8_t REX = 0;
   bool UsesHighByteReg = false;
 
@@ -1271,7 +1280,10 @@ uint8_t X86MCCodeEmitter::determineREXPrefix(const MCInst &MI,
     report_fatal_error(
         "Cannot encode high byte register in REX-prefixed instruction");
 
-  return REX;
+  if (REX)
+    emitByte(0x40 | REX, CurByte, OS);
+
+  return REX != 0;
 }
 
 /// Emit segment override opcode prefix as needed.
@@ -1289,7 +1301,7 @@ void X86MCCodeEmitter::emitSegmentOverridePrefix(unsigned &CurByte,
 /// \param MemOperand the operand # of the start of a memory operand if present.
 /// If not present, it is -1.
 ///
-/// \returns true if a REX prefix was used.
+/// \returns true if REX prefix is used, otherwise returns false.
 bool X86MCCodeEmitter::emitOpcodePrefix(unsigned &CurByte, int MemOperand,
                                         const MCInst &MI,
                                         const MCSubtargetInfo &STI,
@@ -1297,7 +1309,6 @@ bool X86MCCodeEmitter::emitOpcodePrefix(unsigned &CurByte, int MemOperand,
   const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
   uint64_t TSFlags = Desc.TSFlags;
 
-  bool Ret = false;
   // Emit the operand size opcode prefix as needed.
   if ((TSFlags & X86II::OpSizeMask) ==
       (STI.hasFeature(X86::Mode16Bit) ? X86II::OpSize32 : X86II::OpSize16))
@@ -1324,15 +1335,11 @@ bool X86MCCodeEmitter::emitOpcodePrefix(unsigned &CurByte, int MemOperand,
   }
 
   // Handle REX prefix.
-  // FIXME: Can this come before F2 etc to simplify emission?
-  if (STI.hasFeature(X86::Mode64Bit)) {
-    if (uint8_t REX = determineREXPrefix(MI, MemOperand)) {
-      emitByte(0x40 | REX, CurByte, OS);
-      Ret = true;
-    }
-  } else {
-    assert(!(TSFlags & X86II::REX_W) && "REX.W requires 64bit mode.");
-  }
+  assert((STI.hasFeature(X86::Mode64Bit) || !(TSFlags & X86II::REX_W)) &&
+         "REX.W requires 64bit mode.");
+  bool HasREX = STI.hasFeature(X86::Mode64Bit)
+                    ? emitREXPrefix(CurByte, MemOperand, MI, OS)
+                    : false;
 
   // 0x0F escape code must be emitted just before the opcode.
   switch (TSFlags & X86II::OpMapMask) {
@@ -1352,7 +1359,8 @@ bool X86MCCodeEmitter::emitOpcodePrefix(unsigned &CurByte, int MemOperand,
     emitByte(0x3A, CurByte, OS);
     break;
   }
-  return Ret;
+
+  return HasREX;
 }
 
 void X86MCCodeEmitter::emitPrefix(const MCInst &MI, raw_ostream &OS,
@@ -1370,8 +1378,7 @@ void X86MCCodeEmitter::emitPrefix(const MCInst &MI, raw_ostream &OS,
   // Keep track of the current byte being emitted.
   unsigned CurByte = 0;
 
-  bool Rex = false;
-  emitPrefixImpl(CurOp, CurByte, Rex, MI, STI, OS);
+  emitPrefixImpl(CurOp, CurByte, MI, STI, OS);
 }
 
 void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
@@ -1391,8 +1398,7 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
   // Keep track of the current byte being emitted.
   unsigned CurByte = 0;
 
-  bool Rex = false;
-  emitPrefixImpl(CurOp, CurByte, Rex, MI, STI, OS);
+  bool HasREX = emitPrefixImpl(CurOp, CurByte, MI, STI, OS);
 
   // It uses the VEX.VVVV field?
   bool HasVEX_4V = TSFlags & X86II::VEX_4V;
@@ -1497,7 +1503,7 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
       ++SrcRegNum;
 
     emitMemModRMByte(MI, CurOp, getX86RegNum(MI.getOperand(SrcRegNum)), TSFlags,
-                     Rex, CurByte, OS, Fixups, STI);
+                     HasREX, CurByte, OS, Fixups, STI);
     CurOp = SrcRegNum + 1;
     break;
   }
@@ -1570,7 +1576,7 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
     emitByte(BaseOpcode, CurByte, OS);
 
     emitMemModRMByte(MI, FirstMemOp, getX86RegNum(MI.getOperand(CurOp)),
-                     TSFlags, Rex, CurByte, OS, Fixups, STI);
+                     TSFlags, HasREX, CurByte, OS, Fixups, STI);
     CurOp = FirstMemOp + X86::AddrNumOperands;
     if (HasVEX_I8Reg)
       I8RegNum = getX86RegEncoding(MI, CurOp++);
@@ -1582,7 +1588,7 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
     emitByte(BaseOpcode, CurByte, OS);
 
     emitMemModRMByte(MI, FirstMemOp, getX86RegNum(MI.getOperand(CurOp)),
-                     TSFlags, Rex, CurByte, OS, Fixups, STI);
+                     TSFlags, HasREX, CurByte, OS, Fixups, STI);
     CurOp = FirstMemOp + X86::AddrNumOperands;
     ++CurOp; // Encoded in VEX.VVVV.
     break;
@@ -1599,7 +1605,7 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
     emitByte(BaseOpcode, CurByte, OS);
 
     emitMemModRMByte(MI, FirstMemOp, getX86RegNum(MI.getOperand(CurOp)),
-                     TSFlags, Rex, CurByte, OS, Fixups, STI);
+                     TSFlags, HasREX, CurByte, OS, Fixups, STI);
     CurOp = FirstMemOp + X86::AddrNumOperands;
     break;
   }
@@ -1612,7 +1618,7 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
     emitByte(BaseOpcode + CC, CurByte, OS);
 
     emitMemModRMByte(MI, FirstMemOp, getX86RegNum(MI.getOperand(RegOp)),
-                     TSFlags, Rex, CurByte, OS, Fixups, STI);
+                     TSFlags, HasREX, CurByte, OS, Fixups, STI);
     break;
   }
 
@@ -1651,7 +1657,8 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
     unsigned CC = MI.getOperand(CurOp++).getImm();
     emitByte(BaseOpcode + CC, CurByte, OS);
 
-    emitMemModRMByte(MI, FirstMemOp, 0, TSFlags, Rex, CurByte, OS, Fixups, STI);
+    emitMemModRMByte(MI, FirstMemOp, 0, TSFlags, HasREX, CurByte, OS, Fixups,
+                     STI);
     break;
   }
 
@@ -1671,7 +1678,7 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
     emitByte(BaseOpcode, CurByte, OS);
     emitMemModRMByte(MI, CurOp,
                      (Form == X86II::MRMXm) ? 0 : Form - X86II::MRM0m, TSFlags,
-                     Rex, CurByte, OS, Fixups, STI);
+                     HasREX, CurByte, OS, Fixups, STI);
     CurOp += X86::AddrNumOperands;
     break;
 

From c5fa0a4d4b85310fd2fb0611e5b8884f60b498af Mon Sep 17 00:00:00 2001
From: Shengchen Kan <shengchen.kan@intel.com>
Date: Fri, 17 Apr 2020 14:11:05 +0800
Subject: [PATCH 129/216] Temporaily revert [X86][MC][NFC] Reduce the
 parameters of functions in X86MCCodeEmitter(Part II)

It causes some encoding fails. Plan to recommit it after fixing that.

This reverts commit 3017580c7961397f96e9481abf82bbf874bb2633.
---
 .../X86/MCTargetDesc/X86MCCodeEmitter.cpp     | 93 +++++++++----------
 1 file changed, 43 insertions(+), 50 deletions(-)

diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 9655f469ac5a..0176212fff5d 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -76,12 +76,13 @@ class X86MCCodeEmitter : public MCCodeEmitter {
                    unsigned &CurByte, raw_ostream &OS) const;
 
   void emitMemModRMByte(const MCInst &MI, unsigned Op, unsigned RegOpcodeField,
-                        uint64_t TSFlags, bool HasREX, unsigned &CurByte,
+                        uint64_t TSFlags, bool Rex, unsigned &CurByte,
                         raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups,
                         const MCSubtargetInfo &STI) const;
 
-  bool emitPrefixImpl(unsigned &CurOp, unsigned &CurByte, const MCInst &MI,
-                      const MCSubtargetInfo &STI, raw_ostream &OS) const;
+  void emitPrefixImpl(unsigned &CurOp, unsigned &CurByte, bool &Rex,
+                      const MCInst &MI, const MCSubtargetInfo &STI,
+                      raw_ostream &OS) const;
 
   void emitVEXOpcodePrefix(unsigned &CurByte, int MemOperand, const MCInst &MI,
                            raw_ostream &OS) const;
@@ -92,8 +93,7 @@ class X86MCCodeEmitter : public MCCodeEmitter {
   bool emitOpcodePrefix(unsigned &CurByte, int MemOperand, const MCInst &MI,
                         const MCSubtargetInfo &STI, raw_ostream &OS) const;
 
-  bool emitREXPrefix(unsigned &CurByte, int MemOperand, const MCInst &MI,
-                     raw_ostream &OS) const;
+  uint8_t determineREXPrefix(const MCInst &MI, int MemOperand) const;
 };
 
 } // end anonymous namespace
@@ -384,7 +384,7 @@ void X86MCCodeEmitter::emitSIBByte(unsigned SS, unsigned Index, unsigned Base,
 
 void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
                                         unsigned RegOpcodeField,
-                                        uint64_t TSFlags, bool HasREX,
+                                        uint64_t TSFlags, bool Rex,
                                         unsigned &CurByte, raw_ostream &OS,
                                         SmallVectorImpl<MCFixup> &Fixups,
                                         const MCSubtargetInfo &STI) const {
@@ -412,7 +412,7 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
       default:
         return X86::reloc_riprel_4byte;
       case X86::MOV64rm:
-        assert(HasREX);
+        assert(Rex);
         return X86::reloc_riprel_4byte_movq_load;
       case X86::CALL64m:
       case X86::JMP64m:
@@ -426,8 +426,8 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
       case X86::SBB64rm:
       case X86::SUB64rm:
       case X86::XOR64rm:
-        return HasREX ? X86::reloc_riprel_4byte_relax_rex
-                      : X86::reloc_riprel_4byte_relax;
+        return Rex ? X86::reloc_riprel_4byte_relax_rex
+                   : X86::reloc_riprel_4byte_relax;
       }
     }();
 
@@ -649,11 +649,8 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
                   CurByte, OS, Fixups);
 }
 
-/// Emit all instruction prefixes.
-///
-/// \returns true if REX prefix is used, otherwise returns false.
-bool X86MCCodeEmitter::emitPrefixImpl(unsigned &CurOp, unsigned &CurByte,
-                                      const MCInst &MI,
+void X86MCCodeEmitter::emitPrefixImpl(unsigned &CurOp, unsigned &CurByte,
+                                      bool &Rex, const MCInst &MI,
                                       const MCSubtargetInfo &STI,
                                       raw_ostream &OS) const {
   uint64_t TSFlags = MCII.get(MI.getOpcode()).TSFlags;
@@ -699,11 +696,10 @@ bool X86MCCodeEmitter::emitPrefixImpl(unsigned &CurOp, unsigned &CurByte,
 
   // Encoding type for this instruction.
   uint64_t Encoding = TSFlags & X86II::EncodingMask;
-  bool HasREX = false;
-  if (Encoding)
-    emitVEXOpcodePrefix(CurByte, MemoryOperand, MI, OS);
+  if (Encoding == 0)
+    Rex = emitOpcodePrefix(CurByte, MemoryOperand, MI, STI, OS);
   else
-    HasREX = emitOpcodePrefix(CurByte, MemoryOperand, MI, STI, OS);
+    emitVEXOpcodePrefix(CurByte, MemoryOperand, MI, OS);
 
   uint64_t Form = TSFlags & X86II::FormMask;
   switch (Form) {
@@ -752,8 +748,6 @@ bool X86MCCodeEmitter::emitPrefixImpl(unsigned &CurOp, unsigned &CurByte,
     break;
   }
   }
-
-  return HasREX;
 }
 
 /// AVX instructions are encoded using a opcode prefix called VEX.
@@ -1187,14 +1181,11 @@ void X86MCCodeEmitter::emitVEXOpcodePrefix(unsigned &CurByte, int MemOperand,
   }
 }
 
-/// Emit REX prefix which specifies
-///   1) 64-bit instructions,
-///   2) non-default operand size, and
-///   3) use of X86-64 extended registers.
-///
-/// \returns true if REX prefix is used, otherwise returns false.
-bool X86MCCodeEmitter::emitREXPrefix(unsigned &CurByte, int MemOperand,
-                                     const MCInst &MI, raw_ostream &OS) const {
+/// Determine if the MCInst has to be encoded with a X86-64 REX prefix which
+/// specifies 1) 64-bit instructions, 2) non-default operand size, and 3) use
+/// of X86-64 extended registers.
+uint8_t X86MCCodeEmitter::determineREXPrefix(const MCInst &MI,
+                                             int MemOperand) const {
   uint8_t REX = 0;
   bool UsesHighByteReg = false;
 
@@ -1280,10 +1271,7 @@ bool X86MCCodeEmitter::emitREXPrefix(unsigned &CurByte, int MemOperand,
     report_fatal_error(
         "Cannot encode high byte register in REX-prefixed instruction");
 
-  if (REX)
-    emitByte(0x40 | REX, CurByte, OS);
-
-  return REX != 0;
+  return REX;
 }
 
 /// Emit segment override opcode prefix as needed.
@@ -1301,7 +1289,7 @@ void X86MCCodeEmitter::emitSegmentOverridePrefix(unsigned &CurByte,
 /// \param MemOperand the operand # of the start of a memory operand if present.
 /// If not present, it is -1.
 ///
-/// \returns true if REX prefix is used, otherwise returns false.
+/// \returns true if a REX prefix was used.
 bool X86MCCodeEmitter::emitOpcodePrefix(unsigned &CurByte, int MemOperand,
                                         const MCInst &MI,
                                         const MCSubtargetInfo &STI,
@@ -1309,6 +1297,7 @@ bool X86MCCodeEmitter::emitOpcodePrefix(unsigned &CurByte, int MemOperand,
   const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
   uint64_t TSFlags = Desc.TSFlags;
 
+  bool Ret = false;
   // Emit the operand size opcode prefix as needed.
   if ((TSFlags & X86II::OpSizeMask) ==
       (STI.hasFeature(X86::Mode16Bit) ? X86II::OpSize32 : X86II::OpSize16))
@@ -1335,11 +1324,15 @@ bool X86MCCodeEmitter::emitOpcodePrefix(unsigned &CurByte, int MemOperand,
   }
 
   // Handle REX prefix.
-  assert((STI.hasFeature(X86::Mode64Bit) || !(TSFlags & X86II::REX_W)) &&
-         "REX.W requires 64bit mode.");
-  bool HasREX = STI.hasFeature(X86::Mode64Bit)
-                    ? emitREXPrefix(CurByte, MemOperand, MI, OS)
-                    : false;
+  // FIXME: Can this come before F2 etc to simplify emission?
+  if (STI.hasFeature(X86::Mode64Bit)) {
+    if (uint8_t REX = determineREXPrefix(MI, MemOperand)) {
+      emitByte(0x40 | REX, CurByte, OS);
+      Ret = true;
+    }
+  } else {
+    assert(!(TSFlags & X86II::REX_W) && "REX.W requires 64bit mode.");
+  }
 
   // 0x0F escape code must be emitted just before the opcode.
   switch (TSFlags & X86II::OpMapMask) {
@@ -1359,8 +1352,7 @@ bool X86MCCodeEmitter::emitOpcodePrefix(unsigned &CurByte, int MemOperand,
     emitByte(0x3A, CurByte, OS);
     break;
   }
-
-  return HasREX;
+  return Ret;
 }
 
 void X86MCCodeEmitter::emitPrefix(const MCInst &MI, raw_ostream &OS,
@@ -1378,7 +1370,8 @@ void X86MCCodeEmitter::emitPrefix(const MCInst &MI, raw_ostream &OS,
   // Keep track of the current byte being emitted.
   unsigned CurByte = 0;
 
-  emitPrefixImpl(CurOp, CurByte, MI, STI, OS);
+  bool Rex = false;
+  emitPrefixImpl(CurOp, CurByte, Rex, MI, STI, OS);
 }
 
 void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
@@ -1398,7 +1391,8 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
   // Keep track of the current byte being emitted.
   unsigned CurByte = 0;
 
-  bool HasREX = emitPrefixImpl(CurOp, CurByte, MI, STI, OS);
+  bool Rex = false;
+  emitPrefixImpl(CurOp, CurByte, Rex, MI, STI, OS);
 
   // It uses the VEX.VVVV field?
   bool HasVEX_4V = TSFlags & X86II::VEX_4V;
@@ -1503,7 +1497,7 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
       ++SrcRegNum;
 
     emitMemModRMByte(MI, CurOp, getX86RegNum(MI.getOperand(SrcRegNum)), TSFlags,
-                     HasREX, CurByte, OS, Fixups, STI);
+                     Rex, CurByte, OS, Fixups, STI);
     CurOp = SrcRegNum + 1;
     break;
   }
@@ -1576,7 +1570,7 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
     emitByte(BaseOpcode, CurByte, OS);
 
     emitMemModRMByte(MI, FirstMemOp, getX86RegNum(MI.getOperand(CurOp)),
-                     TSFlags, HasREX, CurByte, OS, Fixups, STI);
+                     TSFlags, Rex, CurByte, OS, Fixups, STI);
     CurOp = FirstMemOp + X86::AddrNumOperands;
     if (HasVEX_I8Reg)
       I8RegNum = getX86RegEncoding(MI, CurOp++);
@@ -1588,7 +1582,7 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
     emitByte(BaseOpcode, CurByte, OS);
 
     emitMemModRMByte(MI, FirstMemOp, getX86RegNum(MI.getOperand(CurOp)),
-                     TSFlags, HasREX, CurByte, OS, Fixups, STI);
+                     TSFlags, Rex, CurByte, OS, Fixups, STI);
     CurOp = FirstMemOp + X86::AddrNumOperands;
     ++CurOp; // Encoded in VEX.VVVV.
     break;
@@ -1605,7 +1599,7 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
     emitByte(BaseOpcode, CurByte, OS);
 
     emitMemModRMByte(MI, FirstMemOp, getX86RegNum(MI.getOperand(CurOp)),
-                     TSFlags, HasREX, CurByte, OS, Fixups, STI);
+                     TSFlags, Rex, CurByte, OS, Fixups, STI);
     CurOp = FirstMemOp + X86::AddrNumOperands;
     break;
   }
@@ -1618,7 +1612,7 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
     emitByte(BaseOpcode + CC, CurByte, OS);
 
     emitMemModRMByte(MI, FirstMemOp, getX86RegNum(MI.getOperand(RegOp)),
-                     TSFlags, HasREX, CurByte, OS, Fixups, STI);
+                     TSFlags, Rex, CurByte, OS, Fixups, STI);
     break;
   }
 
@@ -1657,8 +1651,7 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
     unsigned CC = MI.getOperand(CurOp++).getImm();
     emitByte(BaseOpcode + CC, CurByte, OS);
 
-    emitMemModRMByte(MI, FirstMemOp, 0, TSFlags, HasREX, CurByte, OS, Fixups,
-                     STI);
+    emitMemModRMByte(MI, FirstMemOp, 0, TSFlags, Rex, CurByte, OS, Fixups, STI);
     break;
   }
 
@@ -1678,7 +1671,7 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
     emitByte(BaseOpcode, CurByte, OS);
     emitMemModRMByte(MI, CurOp,
                      (Form == X86II::MRMXm) ? 0 : Form - X86II::MRM0m, TSFlags,
-                     HasREX, CurByte, OS, Fixups, STI);
+                     Rex, CurByte, OS, Fixups, STI);
     CurOp += X86::AddrNumOperands;
     break;
 

From e144474448b1e1b980ae219659824c61b1decdd7 Mon Sep 17 00:00:00 2001
From: James Nagurne <j-nagurne@ti.com>
Date: Fri, 17 Apr 2020 08:01:01 +0200
Subject: [PATCH 130/216] Fix opt-viewer tests failing after move from
 cgi.escape to html.escape

These two tests utilize pre-generated opt-viewer output to diff against
a run of opt-viewer over a known yaml file.

In commit 4b428e8f (D76126), the escape function used for rendering was changed
from cgi.escape to html.escape. This modification causes a behavioral
difference with regards to quote characters.

cgi will not escape quotes by default, but html will.

Therefore, these tests were failing because they expected the old behavior
of "string", but was instead seeing &quot;string&quot.

This solution modifies the known test outputs to use the escaped quotes
rather than not escaping quotes during rendering for no particular reason.

It is notable that when testing the optimization records generated by
LLVM, there was never quotes in the remarks I could find, specifically in
the Callee field where they exist in the pre-generated yaml for testing.

Differential Revision: https://reviews.llvm.org/D78241
---
 llvm/test/tools/opt-viewer/Outputs/suppress/s.swift.html        | 2 +-
 .../tools/opt-viewer/Outputs/unicode-function-name/s.swift.html | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/test/tools/opt-viewer/Outputs/suppress/s.swift.html b/llvm/test/tools/opt-viewer/Outputs/suppress/s.swift.html
index f67352079ed6..7bb111db2b46 100644
--- a/llvm/test/tools/opt-viewer/Outputs/suppress/s.swift.html
+++ b/llvm/test/tools/opt-viewer/Outputs/suppress/s.swift.html
@@ -124,7 +124,7 @@
 <td></td>
 <td></td>
 <td class="column-entry-green">sil-inliner</td>
-<td><pre style="display:inline"></pre><span class="column-entry-yellow"> <a href="s.swift.html#L6">"s.f()"</a> inlined into "main" (cost = 20, benefit = 20)&nbsp;</span></td>
+<td><pre style="display:inline"></pre><span class="column-entry-yellow"> <a href="s.swift.html#L6">&quot;s.f()&quot;</a> inlined into &quot;main&quot; (cost = 20, benefit = 20)&nbsp;</span></td>
 <td class="column-entry-yellow">main</td>
 </tr>
 
diff --git a/llvm/test/tools/opt-viewer/Outputs/unicode-function-name/s.swift.html b/llvm/test/tools/opt-viewer/Outputs/unicode-function-name/s.swift.html
index 744c5b7579e8..995ea72f4ddb 100644
--- a/llvm/test/tools/opt-viewer/Outputs/unicode-function-name/s.swift.html
+++ b/llvm/test/tools/opt-viewer/Outputs/unicode-function-name/s.swift.html
@@ -124,7 +124,7 @@
 <td></td>
 <td></td>
 <td class="column-entry-green">sil-inliner</td>
-<td><pre style="display:inline">               </pre><span class="column-entry-yellow"> <a href="s.swift.html#L3">"s.• infix(_:_:)"</a> inlined into "main" (cost = 2, benefit = 40)&nbsp;</span></td>
+<td><pre style="display:inline">               </pre><span class="column-entry-yellow"> <a href="s.swift.html#L3">&quot;s.• infix(_:_:)&quot;</a> inlined into &quot;main&quot; (cost = 2, benefit = 40)&nbsp;</span></td>
 <td class="column-entry-yellow">main</td>
 </tr>
 

From f88000a4b5226c590482e9c7cae9d861f59a7317 Mon Sep 17 00:00:00 2001
From: Sam Parker <sam.parker@arm.com>
Date: Wed, 15 Apr 2020 09:57:13 +0100
Subject: [PATCH 131/216] [ARM][MVE] Add VHADD and VHSUB patterns

Add patterns that use a normal, non-wrapping, add and sub nodes along
with an arm vshr imm node.

Differential Revision: https://reviews.llvm.org/D77065
---
 llvm/lib/Target/ARM/ARMInstrMVE.td       |  71 +++++--
 llvm/test/CodeGen/Thumb2/mve-halving.ll  | 232 +++++++++++++++++++++++
 llvm/test/CodeGen/Thumb2/mve-vhaddsub.ll |   6 +-
 3 files changed, 287 insertions(+), 22 deletions(-)
 create mode 100644 llvm/test/CodeGen/Thumb2/mve-halving.ll

diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index 9b63ac3be9ea..8b9917ab7573 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -2069,7 +2069,8 @@ class MVE_VHSUB_<string suffix, bit U, bits<2> size,
   : MVE_VHADDSUB<"vhsub", suffix, U, 0b1, size, pattern>;
 
 multiclass MVE_VHADD_m<MVEVectorVTInfo VTI,
-                      SDNode unpred_op, Intrinsic pred_int> {
+                      SDNode unpred_op, Intrinsic pred_int, PatFrag add_op,
+                      SDNode shift_op> {
   def "" : MVE_VHADD_<VTI.Suffix, VTI.Unsigned, VTI.Size>;
   defvar Inst = !cast<Instruction>(NAME);
 
@@ -2078,6 +2079,9 @@ multiclass MVE_VHADD_m<MVEVectorVTInfo VTI,
     def : Pat<(VTI.Vec (unpred_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), (i32 VTI.Unsigned))),
               (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
 
+    def : Pat<(VTI.Vec (shift_op (add_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)), (i32 1))),
+              (Inst MQPR:$Qm, MQPR:$Qn)>;
+
     // Predicated add-and-divide-by-two
     def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), (i32 VTI.Unsigned),
                             (VTI.Pred VCCR:$mask), (VTI.Vec MQPR:$inactive))),
@@ -2087,18 +2091,44 @@ multiclass MVE_VHADD_m<MVEVectorVTInfo VTI,
   }
 }
 
-multiclass MVE_VHADD<MVEVectorVTInfo VTI>
-  : MVE_VHADD_m<VTI, int_arm_mve_vhadd, int_arm_mve_hadd_predicated>;
+multiclass MVE_VHADD<MVEVectorVTInfo VTI, PatFrag add_op, SDNode shift_op>
+  : MVE_VHADD_m<VTI, int_arm_mve_vhadd, int_arm_mve_hadd_predicated, add_op,
+                shift_op>;
+
+def addnuw : PatFrag<(ops node:$lhs, node:$rhs),
+                     (add node:$lhs, node:$rhs), [{
+  return N->getFlags().hasNoUnsignedWrap();
+}]>;
 
-defm MVE_VHADDs8  : MVE_VHADD<MVE_v16s8>;
-defm MVE_VHADDs16 : MVE_VHADD<MVE_v8s16>;
-defm MVE_VHADDs32 : MVE_VHADD<MVE_v4s32>;
-defm MVE_VHADDu8  : MVE_VHADD<MVE_v16u8>;
-defm MVE_VHADDu16 : MVE_VHADD<MVE_v8u16>;
-defm MVE_VHADDu32 : MVE_VHADD<MVE_v4u32>;
+def addnsw : PatFrag<(ops node:$lhs, node:$rhs),
+                     (add node:$lhs, node:$rhs), [{
+  return N->getFlags().hasNoSignedWrap();
+}]>;
+
+def subnuw : PatFrag<(ops node:$lhs, node:$rhs),
+                     (sub node:$lhs, node:$rhs), [{
+  return N->getFlags().hasNoUnsignedWrap();
+}]>;
+
+def subnsw : PatFrag<(ops node:$lhs, node:$rhs),
+                     (sub node:$lhs, node:$rhs), [{
+  return N->getFlags().hasNoSignedWrap();
+}]>;
+
+// Halving add/sub perform the arithemtic operation with an extra bit of
+// precision, before performing the shift, to void clipping errors. We're not
+// modelling that here with these patterns, but we're using no wrap forms of
+// add/sub to ensure that the extra bit of information is not needed.
+defm MVE_VHADDs8  : MVE_VHADD<MVE_v16s8, addnsw, ARMvshrsImm>;
+defm MVE_VHADDs16 : MVE_VHADD<MVE_v8s16, addnsw, ARMvshrsImm>;
+defm MVE_VHADDs32 : MVE_VHADD<MVE_v4s32, addnsw, ARMvshrsImm>;
+defm MVE_VHADDu8  : MVE_VHADD<MVE_v16u8, addnuw, ARMvshruImm>;
+defm MVE_VHADDu16 : MVE_VHADD<MVE_v8u16, addnuw, ARMvshruImm>;
+defm MVE_VHADDu32 : MVE_VHADD<MVE_v4u32, addnuw, ARMvshruImm>;
 
 multiclass MVE_VHSUB_m<MVEVectorVTInfo VTI,
-                      SDNode unpred_op, Intrinsic pred_int> {
+                      SDNode unpred_op, Intrinsic pred_int, PatFrag sub_op,
+                      SDNode shift_op> {
   def "" : MVE_VHSUB_<VTI.Suffix, VTI.Unsigned, VTI.Size>;
   defvar Inst = !cast<Instruction>(NAME);
 
@@ -2108,6 +2138,10 @@ multiclass MVE_VHSUB_m<MVEVectorVTInfo VTI,
                             (i32 VTI.Unsigned))),
               (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>;
 
+    def : Pat<(VTI.Vec (shift_op (sub_op (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)), (i32 1))),
+              (Inst MQPR:$Qm, MQPR:$Qn)>;
+
+
     // Predicated subtract-and-divide-by-two
     def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn),
                             (i32 VTI.Unsigned), (VTI.Pred VCCR:$mask),
@@ -2118,15 +2152,16 @@ multiclass MVE_VHSUB_m<MVEVectorVTInfo VTI,
   }
 }
 
-multiclass MVE_VHSUB<MVEVectorVTInfo VTI>
-  : MVE_VHSUB_m<VTI, int_arm_mve_vhsub, int_arm_mve_hsub_predicated>;
+multiclass MVE_VHSUB<MVEVectorVTInfo VTI, PatFrag sub_op, SDNode shift_op>
+  : MVE_VHSUB_m<VTI, int_arm_mve_vhsub, int_arm_mve_hsub_predicated, sub_op,
+                shift_op>;
 
-defm MVE_VHSUBs8  : MVE_VHSUB<MVE_v16s8>;
-defm MVE_VHSUBs16 : MVE_VHSUB<MVE_v8s16>;
-defm MVE_VHSUBs32 : MVE_VHSUB<MVE_v4s32>;
-defm MVE_VHSUBu8  : MVE_VHSUB<MVE_v16u8>;
-defm MVE_VHSUBu16 : MVE_VHSUB<MVE_v8u16>;
-defm MVE_VHSUBu32 : MVE_VHSUB<MVE_v4u32>;
+defm MVE_VHSUBs8  : MVE_VHSUB<MVE_v16s8, subnsw, ARMvshrsImm>;
+defm MVE_VHSUBs16 : MVE_VHSUB<MVE_v8s16, subnsw, ARMvshrsImm>;
+defm MVE_VHSUBs32 : MVE_VHSUB<MVE_v4s32, subnsw, ARMvshrsImm>;
+defm MVE_VHSUBu8  : MVE_VHSUB<MVE_v16u8, subnuw, ARMvshruImm>;
+defm MVE_VHSUBu16 : MVE_VHSUB<MVE_v8u16, subnuw, ARMvshruImm>;
+defm MVE_VHSUBu32 : MVE_VHSUB<MVE_v4u32, subnuw, ARMvshruImm>;
 
 class MVE_VDUP<string suffix, bit B, bit E, list<dag> pattern=[]>
   : MVE_p<(outs MQPR:$Qd), (ins rGPR:$Rt), NoItinerary,
diff --git a/llvm/test/CodeGen/Thumb2/mve-halving.ll b/llvm/test/CodeGen/Thumb2/mve-halving.ll
new file mode 100644
index 000000000000..84f4f9a6e764
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/mve-halving.ll
@@ -0,0 +1,232 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
+
+define arm_aapcs_vfpcc <16 x i8> @vhadds_v16i8(<16 x i8> %x, <16 x i8> %y) {
+; CHECK-LABEL: vhadds_v16i8:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vadd.i8 q0, q0, q1
+; CHECK-NEXT:    vshr.s8 q0, q0, #1
+; CHECK-NEXT:    bx lr
+  %add = add <16 x i8> %x, %y
+  %half = ashr <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ret <16 x i8> %half
+}
+define arm_aapcs_vfpcc <16 x i8> @vhaddu_v16i8(<16 x i8> %x, <16 x i8> %y) {
+; CHECK-LABEL: vhaddu_v16i8:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vadd.i8 q0, q0, q1
+; CHECK-NEXT:    vshr.u8 q0, q0, #1
+; CHECK-NEXT:    bx lr
+  %add = add <16 x i8> %x, %y
+  %half = lshr <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ret <16 x i8> %half
+}
+define arm_aapcs_vfpcc <8 x i16> @vhadds_v8i16(<8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: vhadds_v8i16:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vadd.i16 q0, q0, q1
+; CHECK-NEXT:    vshr.s16 q0, q0, #1
+; CHECK-NEXT:    bx lr
+  %add = add <8 x i16> %x, %y
+  %half = ashr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ret <8 x i16> %half
+}
+define arm_aapcs_vfpcc <8 x i16> @vhaddu_v8i16(<8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: vhaddu_v8i16:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vadd.i16 q0, q0, q1
+; CHECK-NEXT:    vshr.u16 q0, q0, #1
+; CHECK-NEXT:    bx lr
+  %add = add <8 x i16> %x, %y
+  %half = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ret <8 x i16> %half
+}
+define arm_aapcs_vfpcc <4 x i32> @vhadds_v4i32(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: vhadds_v4i32:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vadd.i32 q0, q0, q1
+; CHECK-NEXT:    vshr.s32 q0, q0, #1
+; CHECK-NEXT:    bx lr
+  %add = add <4 x i32> %x, %y
+  %half = ashr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %half
+}
+define arm_aapcs_vfpcc <4 x i32> @vhaddu_v4i32(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: vhaddu_v4i32:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vadd.i32 q0, q0, q1
+; CHECK-NEXT:    vshr.u32 q0, q0, #1
+; CHECK-NEXT:    bx lr
+  %add = add <4 x i32> %x, %y
+  %half = lshr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %half
+}
+define arm_aapcs_vfpcc <16 x i8> @vhsubs_v16i8(<16 x i8> %x, <16 x i8> %y) {
+; CHECK-LABEL: vhsubs_v16i8:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vsub.i8 q0, q0, q1
+; CHECK-NEXT:    vshr.s8 q0, q0, #1
+; CHECK-NEXT:    bx lr
+  %sub = sub <16 x i8> %x, %y
+  %half = ashr <16 x i8> %sub, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ret <16 x i8> %half
+}
+define arm_aapcs_vfpcc <16 x i8> @vhsubu_v16i8(<16 x i8> %x, <16 x i8> %y) {
+; CHECK-LABEL: vhsubu_v16i8:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vsub.i8 q0, q0, q1
+; CHECK-NEXT:    vshr.u8 q0, q0, #1
+; CHECK-NEXT:    bx lr
+  %sub = sub <16 x i8> %x, %y
+  %half = lshr <16 x i8> %sub, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ret <16 x i8> %half
+}
+define arm_aapcs_vfpcc <8 x i16> @vhsubs_v8i16(<8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: vhsubs_v8i16:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vsub.i16 q0, q0, q1
+; CHECK-NEXT:    vshr.s16 q0, q0, #1
+; CHECK-NEXT:    bx lr
+  %sub = sub <8 x i16> %x, %y
+  %half = ashr <8 x i16> %sub, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ret <8 x i16> %half
+}
+define arm_aapcs_vfpcc <8 x i16> @vhsubu_v8i16(<8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: vhsubu_v8i16:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vsub.i16 q0, q0, q1
+; CHECK-NEXT:    vshr.u16 q0, q0, #1
+; CHECK-NEXT:    bx lr
+  %sub = sub <8 x i16> %x, %y
+  %half = lshr <8 x i16> %sub, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ret <8 x i16> %half
+}
+define arm_aapcs_vfpcc <4 x i32> @vhsubs_v4i32(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: vhsubs_v4i32:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vsub.i32 q0, q0, q1
+; CHECK-NEXT:    vshr.s32 q0, q0, #1
+; CHECK-NEXT:    bx lr
+  %sub = sub <4 x i32> %x, %y
+  %half = ashr <4 x i32> %sub, <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %half
+}
+define arm_aapcs_vfpcc <4 x i32> @vhsubu_v4i32(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: vhsubu_v4i32:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vsub.i32 q0, q0, q1
+; CHECK-NEXT:    vshr.u32 q0, q0, #1
+; CHECK-NEXT:    bx lr
+  %sub = sub <4 x i32> %x, %y
+  %half = lshr <4 x i32> %sub, <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %half
+}
+
+define arm_aapcs_vfpcc <16 x i8> @vhadds_v16i8_nw(<16 x i8> %x, <16 x i8> %y) {
+; CHECK-LABEL: vhadds_v16i8_nw:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vhadd.s8 q0, q0, q1
+; CHECK-NEXT:    bx lr
+  %add = add nsw <16 x i8> %x, %y
+  %half = ashr <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ret <16 x i8> %half
+}
+define arm_aapcs_vfpcc <16 x i8> @vhaddu_v16i8_nw(<16 x i8> %x, <16 x i8> %y) {
+; CHECK-LABEL: vhaddu_v16i8_nw:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vhadd.u8 q0, q0, q1
+; CHECK-NEXT:    bx lr
+  %add = add nuw <16 x i8> %x, %y
+  %half = lshr <16 x i8> %add, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ret <16 x i8> %half
+}
+define arm_aapcs_vfpcc <8 x i16> @vhadds_v8i16_nw(<8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: vhadds_v8i16_nw:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vhadd.s16 q0, q0, q1
+; CHECK-NEXT:    bx lr
+  %add = add nsw <8 x i16> %x, %y
+  %half = ashr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ret <8 x i16> %half
+}
+define arm_aapcs_vfpcc <8 x i16> @vhaddu_v8i16_nw(<8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: vhaddu_v8i16_nw:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vhadd.u16 q0, q0, q1
+; CHECK-NEXT:    bx lr
+  %add = add nuw <8 x i16> %x, %y
+  %half = lshr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ret <8 x i16> %half
+}
+define arm_aapcs_vfpcc <4 x i32> @vhadds_v4i32_nw(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: vhadds_v4i32_nw:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vhadd.s32 q0, q0, q1
+; CHECK-NEXT:    bx lr
+  %add = add nsw <4 x i32> %x, %y
+  %half = ashr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %half
+}
+define arm_aapcs_vfpcc <4 x i32> @vhaddu_v4i32_nw(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: vhaddu_v4i32_nw:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vhadd.u32 q0, q0, q1
+; CHECK-NEXT:    bx lr
+  %add = add nuw <4 x i32> %x, %y
+  %half = lshr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %half
+}
+define arm_aapcs_vfpcc <16 x i8> @vhsubs_v16i8_nw(<16 x i8> %x, <16 x i8> %y) {
+; CHECK-LABEL: vhsubs_v16i8_nw:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vhsub.s8 q0, q0, q1
+; CHECK-NEXT:    bx lr
+  %sub = sub nsw <16 x i8> %x, %y
+  %half = ashr <16 x i8> %sub, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ret <16 x i8> %half
+}
+define arm_aapcs_vfpcc <16 x i8> @vhsubu_v16i8_nw(<16 x i8> %x, <16 x i8> %y) {
+; CHECK-LABEL: vhsubu_v16i8_nw:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vhsub.u8 q0, q0, q1
+; CHECK-NEXT:    bx lr
+  %sub = sub nuw <16 x i8> %x, %y
+  %half = lshr <16 x i8> %sub, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+  ret <16 x i8> %half
+}
+define arm_aapcs_vfpcc <8 x i16> @vhsubs_v8i16_nw(<8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: vhsubs_v8i16_nw:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vhsub.s16 q0, q0, q1
+; CHECK-NEXT:    bx lr
+  %sub = sub nsw <8 x i16> %x, %y
+  %half = ashr <8 x i16> %sub, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ret <8 x i16> %half
+}
+define arm_aapcs_vfpcc <8 x i16> @vhsubu_v8i16_nw(<8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: vhsubu_v8i16_nw:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vhsub.u16 q0, q0, q1
+; CHECK-NEXT:    bx lr
+  %sub = sub nuw <8 x i16> %x, %y
+  %half = lshr <8 x i16> %sub, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+  ret <8 x i16> %half
+}
+define arm_aapcs_vfpcc <4 x i32> @vhsubs_v4i32_nw(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: vhsubs_v4i32_nw:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vhsub.s32 q0, q0, q1
+; CHECK-NEXT:    bx lr
+  %sub = sub nsw <4 x i32> %x, %y
+  %half = ashr <4 x i32> %sub, <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %half
+}
+define arm_aapcs_vfpcc <4 x i32> @vhsubu_v4i32_nw(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: vhsubu_v4i32_nw:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vhsub.u32 q0, q0, q1
+; CHECK-NEXT:    bx lr
+  %sub = sub nuw <4 x i32> %x, %y
+  %half = lshr <4 x i32> %sub, <i32 1, i32 1, i32 1, i32 1>
+  ret <4 x i32> %half
+}
diff --git a/llvm/test/CodeGen/Thumb2/mve-vhaddsub.ll b/llvm/test/CodeGen/Thumb2/mve-vhaddsub.ll
index 83534e2c3e83..d70dff938ec1 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vhaddsub.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vhaddsub.ll
@@ -28,8 +28,7 @@ entry:
 define arm_aapcs_vfpcc <4 x i32> @add_ashr_v4i32(<4 x i32> %src1, <4 x i32> %src2) {
 ; CHECK-LABEL: add_ashr_v4i32:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vadd.i32 q0, q0, q1
-; CHECK-NEXT:    vshr.s32 q0, q0, #1
+; CHECK-NEXT:    vhadd.s32 q0, q0, q1
 ; CHECK-NEXT:    bx lr
 entry:
   %0 = add nsw <4 x i32> %src1, %src2
@@ -100,8 +99,7 @@ entry:
 define arm_aapcs_vfpcc <4 x i32> @sub_ashr_v4i32(<4 x i32> %src1, <4 x i32> %src2) {
 ; CHECK-LABEL: sub_ashr_v4i32:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    vsub.i32 q0, q0, q1
-; CHECK-NEXT:    vshr.s32 q0, q0, #1
+; CHECK-NEXT:    vhsub.s32 q0, q0, q1
 ; CHECK-NEXT:    bx lr
 entry:
   %0 = sub nsw <4 x i32> %src1, %src2

From 485862ecdac4a7c67c515da2e8af209d5c17d911 Mon Sep 17 00:00:00 2001
From: Petr Hosek <phosek@google.com>
Date: Fri, 27 Mar 2020 17:48:28 -0700
Subject: [PATCH 132/216] [profile] Avoid duplicating or leaking VMO

Now that write data continously into the memory mapping, we don't need
to keep the VMO handle around after it has been mapped. This change also
ensures that the VMO is always closed on error.

Differential Revision: https://reviews.llvm.org/D76963
---
 .../profile/InstrProfilingPlatformFuchsia.c   | 105 ++++++++----------
 1 file changed, 47 insertions(+), 58 deletions(-)

diff --git a/compiler-rt/lib/profile/InstrProfilingPlatformFuchsia.c b/compiler-rt/lib/profile/InstrProfilingPlatformFuchsia.c
index 828f74221393..d8b7fa21d257 100644
--- a/compiler-rt/lib/profile/InstrProfilingPlatformFuchsia.c
+++ b/compiler-rt/lib/profile/InstrProfilingPlatformFuchsia.c
@@ -44,11 +44,6 @@ COMPILER_RT_VISIBILITY unsigned lprofRuntimeCounterRelocation(void) {
 }
 COMPILER_RT_VISIBILITY void lprofSetRuntimeCounterRelocation(unsigned Value) {}
 
-/* VMO that contains the profile data for this module. */
-static zx_handle_t __llvm_profile_vmo;
-/* Current offset within the VMO where data should be written next. */
-static uint64_t __llvm_profile_offset;
-
 static const char ProfileSinkName[] = "llvm-profile";
 
 static inline void lprofWrite(const char *fmt, ...) {
@@ -62,16 +57,24 @@ static inline void lprofWrite(const char *fmt, ...) {
   __sanitizer_log_write(s, ret + 1);
 }
 
+struct lprofVMOWriterCtx {
+  /* VMO that contains the profile data for this module. */
+  zx_handle_t Vmo;
+  /* Current offset within the VMO where data should be written next. */
+  uint64_t Offset;
+};
+
 static uint32_t lprofVMOWriter(ProfDataWriter *This, ProfDataIOVec *IOVecs,
                                uint32_t NumIOVecs) {
+  struct lprofVMOWriterCtx *Ctx = (struct lprofVMOWriterCtx *)This->WriterCtx;
+
   /* Compute the total length of data to be written. */
   size_t Length = 0;
   for (uint32_t I = 0; I < NumIOVecs; I++)
     Length += IOVecs[I].ElmSize * IOVecs[I].NumElm;
 
   /* Resize the VMO to ensure there's sufficient space for the data. */
-  zx_status_t Status =
-      _zx_vmo_set_size(__llvm_profile_vmo, __llvm_profile_offset + Length);
+  zx_status_t Status = _zx_vmo_set_size(Ctx->Vmo, Ctx->Offset + Length);
   if (Status != ZX_OK)
     return -1;
 
@@ -79,33 +82,40 @@ static uint32_t lprofVMOWriter(ProfDataWriter *This, ProfDataIOVec *IOVecs,
   for (uint32_t I = 0; I < NumIOVecs; I++) {
     size_t Length = IOVecs[I].ElmSize * IOVecs[I].NumElm;
     if (IOVecs[I].Data) {
-      Status = _zx_vmo_write(__llvm_profile_vmo, IOVecs[I].Data,
-                             __llvm_profile_offset, Length);
+      Status = _zx_vmo_write(Ctx->Vmo, IOVecs[I].Data, Ctx->Offset, Length);
       if (Status != ZX_OK)
         return -1;
     } else if (IOVecs[I].UseZeroPadding) {
       /* Resizing the VMO should zero fill. */
     }
-    __llvm_profile_offset += Length;
+    Ctx->Offset += Length;
   }
 
   /* Record the profile size as a property of the VMO. */
-  _zx_object_set_property(__llvm_profile_vmo, ZX_PROP_VMO_CONTENT_SIZE,
-                          &__llvm_profile_offset,
-                          sizeof(__llvm_profile_offset));
+  _zx_object_set_property(Ctx->Vmo, ZX_PROP_VMO_CONTENT_SIZE, &Ctx->Offset,
+                          sizeof(Ctx->Offset));
 
   return 0;
 }
 
-static void initVMOWriter(ProfDataWriter *This) {
+static void initVMOWriter(ProfDataWriter *This, struct lprofVMOWriterCtx *Ctx) {
   This->Write = lprofVMOWriter;
-  This->WriterCtx = NULL;
+  This->WriterCtx = Ctx;
 }
 
 /* This method is invoked by the runtime initialization hook
  * InstrProfilingRuntime.o if it is linked in. */
 COMPILER_RT_VISIBILITY
 void __llvm_profile_initialize(void) {
+  /* Check if there is llvm/runtime version mismatch. */
+  if (GET_VERSION(__llvm_profile_get_version()) != INSTR_PROF_RAW_VERSION) {
+    lprofWrite("LLVM Profile: runtime and instrumentation version mismatch: "
+               "expected %d, but got %d\n",
+               INSTR_PROF_RAW_VERSION,
+               (int)GET_VERSION(__llvm_profile_get_version()));
+    return;
+  }
+
   /* This symbol is defined as weak and initialized to -1 by the runtimer, but
    * compiler will generate a strong definition initialized to 0 when runtime
    * counter relocation is used. */
@@ -114,22 +124,17 @@ void __llvm_profile_initialize(void) {
     return;
   }
 
-  /* Don't create VMO if it has been alread created. */
-  if (__llvm_profile_vmo != ZX_HANDLE_INVALID) {
-    lprofWrite("LLVM Profile: VMO has already been created\n");
-    return;
-  }
-
   const __llvm_profile_data *DataBegin = __llvm_profile_begin_data();
   const __llvm_profile_data *DataEnd = __llvm_profile_end_data();
   const uint64_t DataSize = __llvm_profile_get_data_size(DataBegin, DataEnd);
-  const uint64_t CountersOffset = sizeof(__llvm_profile_header) +
-    (DataSize * sizeof(__llvm_profile_data));
+  const uint64_t CountersOffset =
+      sizeof(__llvm_profile_header) + (DataSize * sizeof(__llvm_profile_data));
 
   zx_status_t Status;
 
   /* Create VMO to hold the profile data. */
-  Status = _zx_vmo_create(0, ZX_VMO_RESIZABLE, &__llvm_profile_vmo);
+  zx_handle_t Vmo = ZX_HANDLE_INVALID;
+  Status = _zx_vmo_create(0, ZX_VMO_RESIZABLE, &Vmo);
   if (Status != ZX_OK) {
     lprofWrite("LLVM Profile: cannot create VMO: %s\n",
                _zx_status_get_string(Status));
@@ -140,65 +145,49 @@ void __llvm_profile_initialize(void) {
   char VmoName[ZX_MAX_NAME_LEN];
   snprintf(VmoName, sizeof(VmoName), "%" PRIu64 ".profraw",
            lprofGetLoadModuleSignature());
-  _zx_object_set_property(__llvm_profile_vmo, ZX_PROP_NAME, VmoName,
-                          strlen(VmoName));
-
-  /* Duplicate the handle since __sanitizer_publish_data consumes it. */
-  zx_handle_t Handle;
-  Status =
-      _zx_handle_duplicate(__llvm_profile_vmo, ZX_RIGHT_SAME_RIGHTS, &Handle);
-  if (Status != ZX_OK) {
-    lprofWrite("LLVM Profile: cannot duplicate VMO handle: %s\n",
-               _zx_status_get_string(Status));
-    _zx_handle_close(__llvm_profile_vmo);
-    __llvm_profile_vmo = ZX_HANDLE_INVALID;
-    return;
-  }
-
-  /* Publish the VMO which contains profile data to the system. */
-  __sanitizer_publish_data(ProfileSinkName, Handle);
-
-  /* Use the dumpfile symbolizer markup element to write the name of VMO. */
-  lprofWrite("LLVM Profile: {{{dumpfile:%s:%s}}}\n", ProfileSinkName, VmoName);
-
-  /* Check if there is llvm/runtime version mismatch. */
-  if (GET_VERSION(__llvm_profile_get_version()) != INSTR_PROF_RAW_VERSION) {
-    lprofWrite("LLVM Profile: runtime and instrumentation version mismatch: "
-               "expected %d, but got %d\n",
-               INSTR_PROF_RAW_VERSION,
-               (int)GET_VERSION(__llvm_profile_get_version()));
-    return;
-  }
+  _zx_object_set_property(Vmo, ZX_PROP_NAME, VmoName, strlen(VmoName));
 
   /* Write the profile data into the mapped region. */
   ProfDataWriter VMOWriter;
-  initVMOWriter(&VMOWriter);
+  struct lprofVMOWriterCtx Ctx = {.Vmo = Vmo, .Offset = 0};
+  initVMOWriter(&VMOWriter, &Ctx);
   if (lprofWriteData(&VMOWriter, 0, 0) != 0) {
     lprofWrite("LLVM Profile: failed to write data\n");
+    _zx_handle_close(Vmo);
     return;
   }
 
   uint64_t Len = 0;
-  Status = _zx_vmo_get_size(__llvm_profile_vmo, &Len);
+  Status = _zx_vmo_get_size(Vmo, &Len);
   if (Status != ZX_OK) {
     lprofWrite("LLVM Profile: failed to get the VMO size: %s\n",
                _zx_status_get_string(Status));
+    _zx_handle_close(Vmo);
     return;
   }
 
   uintptr_t Mapping;
   Status =
-      _zx_vmar_map(_zx_vmar_root_self(), ZX_VM_PERM_READ | ZX_VM_PERM_WRITE,
-                   0, __llvm_profile_vmo, 0, Len, &Mapping);
+      _zx_vmar_map(_zx_vmar_root_self(), ZX_VM_PERM_READ | ZX_VM_PERM_WRITE, 0,
+                   Vmo, 0, Len, &Mapping);
   if (Status != ZX_OK) {
     lprofWrite("LLVM Profile: failed to map the VMO: %s\n",
                _zx_status_get_string(Status));
+    _zx_handle_close(Vmo);
     return;
   }
 
+  /* Publish the VMO which contains profile data to the system. Note that this
+   * also consumes the VMO handle. */
+  __sanitizer_publish_data(ProfileSinkName, Vmo);
+
+  /* Use the dumpfile symbolizer markup element to write the name of VMO. */
+  lprofWrite("LLVM Profile: {{{dumpfile:%s:%s}}}\n", ProfileSinkName, VmoName);
+
   /* Update the profile fields based on the current mapping. */
   __llvm_profile_counter_bias = (intptr_t)Mapping -
-      (uintptr_t)__llvm_profile_begin_counters() + CountersOffset;
+                                (uintptr_t)__llvm_profile_begin_counters() +
+                                CountersOffset;
 }
 
 #endif

From fd7a34186137168064ffe2ca536823559b92d939 Mon Sep 17 00:00:00 2001
From: Bevin Hansson <bevin.hansson@ericsson.com>
Date: Thu, 16 Apr 2020 15:02:41 +0200
Subject: [PATCH 133/216] [Fixed Point] Move the compassign LHS type correction
 a bit further down. NFCI.

Summary:
We can simplify the LHSTy correction for
fixed-point compassign by moving it below
the point where we know we have a compound
assignment.

Also, we shouldn't look at the LHS and RHS
separately; look at the computation result
type instead.

Looking at the LHS and RHS is also wrong
for compassigns with fixed and floating
point (though this does not work upstream
yet).

Reviewers: leonardchan

Subscribers: cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D78294
---
 clang/lib/Sema/SemaExpr.cpp | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 60d99db7ced9..31d694857e9c 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -13639,14 +13639,6 @@ ExprResult Sema::CreateBuiltinBinOp(SourceLocation OpLoc,
   if (ResultTy.isNull() || LHS.isInvalid() || RHS.isInvalid())
     return ExprError();
 
-  // The LHS is not converted to the result type for fixed-point compound
-  // assignment as the common type is computed on demand. Reset the CompLHSTy
-  // to the LHS type we would have gotten after unary conversions.
-  if (!CompLHSTy.isNull() &&
-      (LHS.get()->getType()->isFixedPointType() ||
-       RHS.get()->getType()->isFixedPointType()))
-    CompLHSTy = UsualUnaryConversions(LHS.get()).get()->getType();
-
   if (ResultTy->isRealFloatingType() &&
       (getLangOpts().getFPRoundingMode() != RoundingMode::NearestTiesToEven ||
        getLangOpts().getFPExceptionMode() != LangOptions::FPE_Ignore))
@@ -13705,6 +13697,12 @@ ExprResult Sema::CreateBuiltinBinOp(SourceLocation OpLoc,
     OK = LHS.get()->getObjectKind();
   }
 
+  // The LHS is not converted to the result type for fixed-point compound
+  // assignment as the common type is computed on demand. Reset the CompLHSTy
+  // to the LHS type we would have gotten after unary conversions.
+  if (CompResultTy->isFixedPointType())
+    CompLHSTy = UsualUnaryConversions(LHS.get()).get()->getType();
+
   if (ConvertHalfVec)
     return convertHalfVecBinOp(*this, LHS, RHS, Opc, ResultTy, VK, OK, true,
                                OpLoc, CurFPFeatures);

From 0642e5e7a7e54a11120262cfafea0193e3a75faf Mon Sep 17 00:00:00 2001
From: Matthias Gehre <gehre.matthias@gmail.com>
Date: Tue, 14 Apr 2020 20:17:22 +0200
Subject: [PATCH 134/216] [clang-tidy] modernize-use-using: Fix broken fixit
 with 'template' keyword

Summary:
Before this PR, `modernize-use-using` would transform the typedef in
```

template <typename a> class TemplateKeyword {
  typedef typename a::template f<> e;
  typedef typename a::template f<>::d e2;
};
```
into
```
template <typename a> class TemplateKeyword {
  using d = typename a::b<>;
  using d2 = typename a::template a::b<>::c;
};
```
The first one is missing the `template` keyword,
the second one has an extra `a::` scope. Both result
in compilation errors.

Reviewers: aaron.ballman, alexfh, hokein, njames93

Subscribers: xazax.hun, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D78139
---
 .../test/clang-tidy/checkers/modernize-use-using.cpp  | 11 +++++++++++
 clang/lib/AST/NestedNameSpecifier.cpp                 |  8 ++++++++
 clang/lib/AST/TypePrinter.cpp                         |  2 +-
 .../expr/expr.prim/expr.prim.req/type-requirement.cpp |  6 +++---
 4 files changed, 23 insertions(+), 4 deletions(-)

diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize-use-using.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize-use-using.cpp
index 2eccc9066fa6..8d25dbb95658 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/modernize-use-using.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/modernize-use-using.cpp
@@ -249,6 +249,17 @@ typedef TwoArgTemplate<TwoArgTemplate<int, Q<T{0 < 0}.b> >, S<(0 < 0), Q<b[0 < 0
 // CHECK-MESSAGES: :[[@LINE-1]]:1: warning: use 'using' instead of 'typedef'
 // CHECK-FIXES: using Nested_t = TwoArgTemplate<TwoArgTemplate<int, Q<T{0 < 0}.b>>, S<(0 < 0), Q<b[0 < 0]>>>;
 
+template <typename a>
+class TemplateKeyword {
+  typedef typename a::template b<> d;
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use 'using' instead of 'typedef'
+  // CHECK-FIXES: using d = typename a::template b<>;
+
+  typedef typename a::template b<>::c d2;
+  // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use 'using' instead of 'typedef'
+  // CHECK-FIXES: using d2 = typename a::template b<>::c;
+};
+
 template <typename... Args>
 class Variadic {};
 
diff --git a/clang/lib/AST/NestedNameSpecifier.cpp b/clang/lib/AST/NestedNameSpecifier.cpp
index af53c7fd9ba4..e28463516a9f 100644
--- a/clang/lib/AST/NestedNameSpecifier.cpp
+++ b/clang/lib/AST/NestedNameSpecifier.cpp
@@ -311,6 +311,14 @@ void NestedNameSpecifier::print(raw_ostream &OS, const PrintingPolicy &Policy,
       // Print the template argument list.
       printTemplateArgumentList(OS, SpecType->template_arguments(),
                                 InnerPolicy);
+    } else if (const auto *DepSpecType =
+                   dyn_cast<DependentTemplateSpecializationType>(T)) {
+      // Print the template name without its corresponding
+      // nested-name-specifier.
+      OS << DepSpecType->getIdentifier()->getName();
+      // Print the template argument list.
+      printTemplateArgumentList(OS, DepSpecType->template_arguments(),
+                                InnerPolicy);
     } else {
       // Print the type normally
       QualType(T, 0).print(OS, InnerPolicy);
diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp
index 405a760c58ce..4cc0d735ed6a 100644
--- a/clang/lib/AST/TypePrinter.cpp
+++ b/clang/lib/AST/TypePrinter.cpp
@@ -1388,7 +1388,7 @@ void TypePrinter::printDependentTemplateSpecializationBefore(
 
   if (T->getQualifier())
     T->getQualifier()->print(OS, Policy);
-  OS << T->getIdentifier()->getName();
+  OS << "template " << T->getIdentifier()->getName();
   printTemplateArgumentList(OS, T->template_arguments(), Policy);
   spaceBeforePlaceHolder(OS);
 }
diff --git a/clang/test/CXX/expr/expr.prim/expr.prim.req/type-requirement.cpp b/clang/test/CXX/expr/expr.prim/expr.prim.req/type-requirement.cpp
index b8903b884e0a..15cbe6637845 100644
--- a/clang/test/CXX/expr/expr.prim/expr.prim.req/type-requirement.cpp
+++ b/clang/test/CXX/expr/expr.prim/expr.prim.req/type-requirement.cpp
@@ -109,9 +109,9 @@ static_assert(!requires { typename ::F<int>; });
 struct G { template<typename T> static T temp; };
 
 template<typename T> requires requires { typename T::template temp<int>; }
-// expected-note@-1{{because 'typename T::temp<int>' would be invalid: type 'int' cannot be used prior to '::' because it has no members}}
-// expected-note@-2{{because 'typename T::temp<int>' would be invalid: no member named 'temp' in 'D'}}
-// expected-note@-3{{because 'typename T::temp<int>' would be invalid: template name refers to non-type template 'G::template temp'}}
+// expected-note@-1{{because 'typename T::template temp<int>' would be invalid: type 'int' cannot be used prior to '::' because it has no members}}
+// expected-note@-2{{because 'typename T::template temp<int>' would be invalid: no member named 'temp' in 'D'}}
+// expected-note@-3{{because 'typename T::template temp<int>' would be invalid: template name refers to non-type template 'G::template temp'}}
 struct r7 {};
 
 using r7i1 = r7<int>; // expected-error{{constraints not satisfied for class template 'r7' [with T = int]}}

From 0ca77adf3fc5a31aa06ecc512ae6743ff42422fa Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Wed, 15 Apr 2020 22:30:21 +0100
Subject: [PATCH 135/216] ScoreboardHazardRecognizer.h - remove unnecessary
 llvm::InstrItineraryData forward declaration. NFC.

We have to include MCInstrItineraries.h where its defined.
---
 llvm/include/llvm/CodeGen/ScoreboardHazardRecognizer.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/include/llvm/CodeGen/ScoreboardHazardRecognizer.h b/llvm/include/llvm/CodeGen/ScoreboardHazardRecognizer.h
index 87c76e2e0e1d..cefafe87a17d 100644
--- a/llvm/include/llvm/CodeGen/ScoreboardHazardRecognizer.h
+++ b/llvm/include/llvm/CodeGen/ScoreboardHazardRecognizer.h
@@ -23,7 +23,6 @@
 
 namespace llvm {
 
-class InstrItineraryData;
 class ScheduleDAG;
 class SUnit;
 

From 7fba79be095d36f0f5eb6bc706ae4c12f5793aff Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Wed, 15 Apr 2020 22:32:17 +0100
Subject: [PATCH 136/216] ScheduleDFS.h - Replace ArrayRef.h include with
 forward declaration. NFC.

We have no need to include the entire header.
---
 llvm/include/llvm/CodeGen/ScheduleDFS.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm/CodeGen/ScheduleDFS.h b/llvm/include/llvm/CodeGen/ScheduleDFS.h
index d60deab95f5d..2e0a30cc56e3 100644
--- a/llvm/include/llvm/CodeGen/ScheduleDFS.h
+++ b/llvm/include/llvm/CodeGen/ScheduleDFS.h
@@ -13,7 +13,6 @@
 #ifndef LLVM_CODEGEN_SCHEDULEDFS_H
 #define LLVM_CODEGEN_SCHEDULEDFS_H
 
-#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/ScheduleDAG.h"
 #include <cassert>
@@ -22,6 +21,7 @@
 
 namespace llvm {
 
+template <typename T> class ArrayRef;
 class raw_ostream;
 
 /// Represent the ILP of the subDAG rooted at a DAG node.

From ec99a24ccdaaf1508e43d29757ff33f3ffcb296f Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Thu, 16 Apr 2020 20:50:46 +0100
Subject: [PATCH 137/216] ConstantFolding.h - remove unused
 llvm::ConstantVector forward declaration. NFC.

---
 llvm/include/llvm/Analysis/ConstantFolding.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/include/llvm/Analysis/ConstantFolding.h b/llvm/include/llvm/Analysis/ConstantFolding.h
index 68aa1948ab88..0ccc782ad6f5 100644
--- a/llvm/include/llvm/Analysis/ConstantFolding.h
+++ b/llvm/include/llvm/Analysis/ConstantFolding.h
@@ -25,7 +25,6 @@ template <typename T> class ArrayRef;
 class CallBase;
 class Constant;
 class ConstantExpr;
-class ConstantVector;
 class DataLayout;
 class Function;
 class GlobalValue;

From aafca36609e65990e1a5df0b0cfb9352d61172b7 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Thu, 16 Apr 2020 21:44:50 +0100
Subject: [PATCH 138/216] IPO.h - remove unused llvm::Function forward
 declaration. NFC.

---
 llvm/include/llvm/Transforms/IPO.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/include/llvm/Transforms/IPO.h b/llvm/include/llvm/Transforms/IPO.h
index ea9f50dcc616..3a2edc1ff9ce 100644
--- a/llvm/include/llvm/Transforms/IPO.h
+++ b/llvm/include/llvm/Transforms/IPO.h
@@ -24,7 +24,6 @@ class StringRef;
 class ModuleSummaryIndex;
 class ModulePass;
 class Pass;
-class Function;
 class BasicBlock;
 class GlobalValue;
 class raw_ostream;

From 91c10f50f38d4897146c3490d9881c7e39d0d2a5 Mon Sep 17 00:00:00 2001
From: Alex Brachet <alexbrachetmialot@gmail.com>
Date: Fri, 17 Apr 2020 05:10:22 -0400
Subject: [PATCH 139/216] Use proper dependency name for libc.include.stdio

---
 libc/include/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt
index ae2f2ffad5ff..6cd192c1a52e 100644
--- a/libc/include/CMakeLists.txt
+++ b/libc/include/CMakeLists.txt
@@ -80,7 +80,7 @@ add_gen_header(
   DEF_FILE stdio.h.def
   GEN_HDR stdio.h
   DEPENDS
-    llvm_libc_common_h
+    .llvm_libc_common_h
 )
 
 add_gen_header(

From 54cfc6944e2669d7a41a164fc4f3d923a71e701d Mon Sep 17 00:00:00 2001
From: Yi-Hong Lyu <yilyu@microsoft.com>
Date: Thu, 16 Apr 2020 23:45:45 -0700
Subject: [PATCH 140/216] [CommandLine] Fix cl::ConsumeAfter support with more
 than one positional argument

Summary:
Currently, cl::ConsumeAfter only works for the case that has exactly one
positional argument. Without the fix, it skip fulfilling first positional
argument and put that additional positional argument in interpreter arguments.

Reviewers: bkramer, Mordante, rnk, lattner, beanz, craig.topper

Reviewed By: rnk

Subscribers: JosephTremoulet, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D77242
---
 llvm/lib/Support/CommandLine.cpp           |  6 +--
 llvm/unittests/Support/CommandLineTest.cpp | 45 ++++++++++++++++++++++
 2 files changed, 48 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Support/CommandLine.cpp b/llvm/lib/Support/CommandLine.cpp
index f78c4bfbb311..0025806ca235 100644
--- a/llvm/lib/Support/CommandLine.cpp
+++ b/llvm/lib/Support/CommandLine.cpp
@@ -1581,9 +1581,9 @@ bool CommandLineParser::ParseCommandLineOptions(int argc,
   } else {
     assert(ConsumeAfterOpt && NumPositionalRequired <= PositionalVals.size());
     unsigned ValNo = 0;
-    for (size_t j = 1, e = PositionalOpts.size(); j != e; ++j)
-      if (RequiresValue(PositionalOpts[j])) {
-        ErrorParsing |= ProvidePositionalOption(PositionalOpts[j],
+    for (size_t J = 0, E = PositionalOpts.size(); J != E; ++J)
+      if (RequiresValue(PositionalOpts[J])) {
+        ErrorParsing |= ProvidePositionalOption(PositionalOpts[J],
                                                 PositionalVals[ValNo].first,
                                                 PositionalVals[ValNo].second);
         ValNo++;
diff --git a/llvm/unittests/Support/CommandLineTest.cpp b/llvm/unittests/Support/CommandLineTest.cpp
index b6fc699cf37f..a6b26b310b97 100644
--- a/llvm/unittests/Support/CommandLineTest.cpp
+++ b/llvm/unittests/Support/CommandLineTest.cpp
@@ -1793,4 +1793,49 @@ static cl::bits<Enum> ExampleBits(
       clEnumValN(Val1, "bits-val1", "The Val1 value"),
       clEnumValN(Val1, "bits-val2", "The Val2 value")));
 
+TEST(CommandLineTest, ConsumeAfterOnePositional) {
+  cl::ResetCommandLineParser();
+
+  // input [args]
+  StackOption<std::string, cl::opt<std::string>> Input(cl::Positional,
+                                                       cl::Required);
+  StackOption<std::string, cl::list<std::string>> ExtraArgs(cl::ConsumeAfter);
+
+  const char *Args[] = {"prog", "input", "arg1", "arg2"};
+
+  std::string Errs;
+  raw_string_ostream OS(Errs);
+  EXPECT_TRUE(cl::ParseCommandLineOptions(4, Args, StringRef(), &OS));
+  OS.flush();
+  EXPECT_EQ("input", Input);
+  EXPECT_TRUE(ExtraArgs.size() == 2);
+  EXPECT_TRUE(ExtraArgs[0] == "arg1");
+  EXPECT_TRUE(ExtraArgs[1] == "arg2");
+  EXPECT_TRUE(Errs.empty());
+}
+
+TEST(CommandLineTest, ConsumeAfterTwoPositionals) {
+  cl::ResetCommandLineParser();
+
+  // input1 input2 [args]
+  StackOption<std::string, cl::opt<std::string>> Input1(cl::Positional,
+                                                        cl::Required);
+  StackOption<std::string, cl::opt<std::string>> Input2(cl::Positional,
+                                                        cl::Required);
+  StackOption<std::string, cl::list<std::string>> ExtraArgs(cl::ConsumeAfter);
+
+  const char *Args[] = {"prog", "input1", "input2", "arg1", "arg2"};
+
+  std::string Errs;
+  raw_string_ostream OS(Errs);
+  EXPECT_TRUE(cl::ParseCommandLineOptions(5, Args, StringRef(), &OS));
+  OS.flush();
+  EXPECT_EQ("input1", Input1);
+  EXPECT_EQ("input2", Input2);
+  EXPECT_TRUE(ExtraArgs.size() == 2);
+  EXPECT_TRUE(ExtraArgs[0] == "arg1");
+  EXPECT_TRUE(ExtraArgs[1] == "arg2");
+  EXPECT_TRUE(Errs.empty());
+}
+
 } // anonymous namespace

From ff9379f4b2d7ebcb8dee94df47dc43c3388f22bf Mon Sep 17 00:00:00 2001
From: Tyker <tyker1@outlook.com>
Date: Fri, 17 Apr 2020 10:53:01 +0200
Subject: [PATCH 141/216] [NFC] Remove waymarking because it improves
 performances

Summary:
This patch remove waymarking and replaces it with storing a pointer to the User in the Use.
here are the results on the measurements for the CTMark tests of the test suite.
```
Metric: instructions_count

Program                                                      baseline      patched       diff
 test-suite :: CTMark/ClamAV/clamscan.test                    72557942065   71733653521  -1.1%
 test-suite :: CTMark/sqlite3/sqlite3.test                    76281422939   75484840636  -1.0%
 test-suite :: CTMark/consumer-typeset/consumer-typeset.test  51364676366   50862185614  -1.0%
 test-suite :: CTMark/SPASS/SPASS.test                        60476106505   59908437767  -0.9%
 test-suite :: CTMark/tramp3d-v4/tramp3d-v4.test              112578442329  111725050856 -0.8%
 test-suite :: CTMark/mafft/pairlocalalign.test               50846133013   50473644539  -0.7%
 test-suite :: CTMark/kimwitu++/kc.test                       54692641250   54349070299  -0.6%
 test-suite :: CTMark/7zip/7zip-benchmark.test                182216614747  181216091230 -0.5%
 test-suite :: CTMark/Bullet/bullet.test                      123459210616  122905866767 -0.4%
 Geomean difference                                                                      -0.8%

Metric: peak_memory_use

Program                                                      baseline  patched   diff
 test-suite :: CTMark/tramp3d-v4/tramp3d-v4.test              326864    338524    3.6%
 test-suite :: CTMark/sqlite3/sqlite3.test                    216412    221240    2.2%
 test-suite :: CTMark/7zip/7zip-benchmark.test                11808284  12022604  1.8%
 test-suite :: CTMark/Bullet/bullet.test                      6831752   6945988   1.7%
 test-suite :: CTMark/SPASS/SPASS.test                        2682552   2721820   1.5%
 test-suite :: CTMark/ClamAV/clamscan.test                    5037256   5107936   1.4%
 test-suite :: CTMark/consumer-typeset/consumer-typeset.test  2752728   2790768   1.4%
 test-suite :: CTMark/mafft/pairlocalalign.test               1517676   1537244   1.3%
 test-suite :: CTMark/kimwitu++/kc.test                       1090748   1103448   1.2%
 Geomean difference                                                               1.8%

Metric: compile_time

Program                                                      baseline patched diff
 test-suite :: CTMark/consumer-typeset/consumer-typeset.test  14.71    14.38  -2.2%
 test-suite :: CTMark/sqlite3/sqlite3.test                    23.18    22.73  -2.0%
 test-suite :: CTMark/7zip/7zip-benchmark.test                57.96    56.99  -1.7%
 test-suite :: CTMark/ClamAV/clamscan.test                    20.75    20.49  -1.2%
 test-suite :: CTMark/kimwitu++/kc.test                       18.35    18.15  -1.1%
 test-suite :: CTMark/SPASS/SPASS.test                        18.72    18.57  -0.8%
 test-suite :: CTMark/mafft/pairlocalalign.test               14.09    14.00  -0.6%
 test-suite :: CTMark/Bullet/bullet.test                      37.38    37.19  -0.5%
 test-suite :: CTMark/tramp3d-v4/tramp3d-v4.test              33.81    33.76  -0.2%
 Geomean difference                                                           -1.1%
```

i believe that it is worth trading +1.8% peak memory use for -1.1% compile time.
also this patch removes waymarking which simplifies the Use and User classes.

Reviewers: nikic, lattner

Reviewed By: lattner

Subscribers: russell.gallop, foad, ggreif, rriddle, ekatz, fhahn, lebedev.ri, mgorny, hiraditya, george.burgess.iv, asbirlea, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D77144
---
 llvm/docs/ProgrammersManual.rst        | 134 -------------------------
 llvm/include/llvm/Analysis/MemorySSA.h |   7 +-
 llvm/include/llvm/IR/Instructions.h    |   8 +-
 llvm/include/llvm/IR/Use.h             |  60 ++---------
 llvm/include/llvm/IR/Value.h           |   2 -
 llvm/lib/IR/Use.cpp                    |  75 --------------
 llvm/lib/IR/User.cpp                   |  23 ++---
 llvm/unittests/IR/CMakeLists.txt       |   1 -
 llvm/unittests/IR/WaymarkTest.cpp      |  55 ----------
 9 files changed, 21 insertions(+), 344 deletions(-)
 delete mode 100644 llvm/unittests/IR/WaymarkTest.cpp

diff --git a/llvm/docs/ProgrammersManual.rst b/llvm/docs/ProgrammersManual.rst
index 4d0c29cfcd4e..1c0ab10774a6 100644
--- a/llvm/docs/ProgrammersManual.rst
+++ b/llvm/docs/ProgrammersManual.rst
@@ -3187,140 +3187,6 @@ memory layouts:
 *(In the above figures* '``P``' *stands for the* ``Use**`` *that is stored in
 each* ``Use`` *object in the member* ``Use::Prev`` *)*
 
-.. _Waymarking:
-
-The waymarking algorithm
-^^^^^^^^^^^^^^^^^^^^^^^^
-
-Since the ``Use`` objects are deprived of the direct (back)pointer to their
-``User`` objects, there must be a fast and exact method to recover it.  This is
-accomplished by the following scheme:
-
-A bit-encoding in the 2 LSBits (least significant bits) of the ``Use::Prev``
-allows to find the start of the ``User`` object:
-
-* ``00`` --- binary digit 0
-
-* ``01`` --- binary digit 1
-
-* ``10`` --- stop and calculate (``s``)
-
-* ``11`` --- full stop (``S``)
-
-Given a ``Use*``, all we have to do is to walk till we get a stop and we either
-have a ``User`` immediately behind or we have to walk to the next stop picking
-up digits and calculating the offset:
-
-.. code-block:: none
-
-  .---.---.---.---.---.---.---.---.---.---.---.---.---.---.---.---.----------------
-  | 1 | s | 1 | 0 | 1 | 0 | s | 1 | 1 | 0 | s | 1 | 1 | s | 1 | S | User (or User*)
-  '---'---'---'---'---'---'---'---'---'---'---'---'---'---'---'---'----------------
-      |+15                |+10            |+6         |+3     |+1
-      |                   |               |           |       | __>
-      |                   |               |           | __________>
-      |                   |               | ______________________>
-      |                   | ______________________________________>
-      | __________________________________________________________>
-
-Only the significant number of bits need to be stored between the stops, so that
-the *worst case is 20 memory accesses* when there are 1000 ``Use`` objects
-associated with a ``User``.
-
-.. _ReferenceImpl:
-
-Reference implementation
-^^^^^^^^^^^^^^^^^^^^^^^^
-
-The following literate Haskell fragment demonstrates the concept:
-
-.. code-block:: haskell
-
-  > import Test.QuickCheck
-  >
-  > digits :: Int -> [Char] -> [Char]
-  > digits 0 acc = '0' : acc
-  > digits 1 acc = '1' : acc
-  > digits n acc = digits (n `div` 2) $ digits (n `mod` 2) acc
-  >
-  > dist :: Int -> [Char] -> [Char]
-  > dist 0 [] = ['S']
-  > dist 0 acc = acc
-  > dist 1 acc = let r = dist 0 acc in 's' : digits (length r) r
-  > dist n acc = dist (n - 1) $ dist 1 acc
-  >
-  > takeLast n ss = reverse $ take n $ reverse ss
-  >
-  > test = takeLast 40 $ dist 20 []
-  >
-
-Printing <test> gives: ``"1s100000s11010s10100s1111s1010s110s11s1S"``
-
-The reverse algorithm computes the length of the string just by examining a
-certain prefix:
-
-.. code-block:: haskell
-
-  > pref :: [Char] -> Int
-  > pref "S" = 1
-  > pref ('s':'1':rest) = decode 2 1 rest
-  > pref (_:rest) = 1 + pref rest
-  >
-  > decode walk acc ('0':rest) = decode (walk + 1) (acc * 2) rest
-  > decode walk acc ('1':rest) = decode (walk + 1) (acc * 2 + 1) rest
-  > decode walk acc _ = walk + acc
-  >
-
-Now, as expected, printing <pref test> gives ``40``.
-
-We can *quickCheck* this with following property:
-
-.. code-block:: haskell
-
-  > testcase = dist 2000 []
-  > testcaseLength = length testcase
-  >
-  > identityProp n = n > 0 && n <= testcaseLength ==> length arr == pref arr
-  >     where arr = takeLast n testcase
-  >
-
-As expected <quickCheck identityProp> gives:
-
-::
-
-  *Main> quickCheck identityProp
-  OK, passed 100 tests.
-
-Let's be a bit more exhaustive:
-
-.. code-block:: haskell
-
-  >
-  > deepCheck p = check (defaultConfig { configMaxTest = 500 }) p
-  >
-
-And here is the result of <deepCheck identityProp>:
-
-::
-
-  *Main> deepCheck identityProp
-  OK, passed 500 tests.
-
-.. _Tagging:
-
-Tagging considerations
-^^^^^^^^^^^^^^^^^^^^^^
-
-To maintain the invariant that the 2 LSBits of each ``Use**`` in ``Use`` never
-change after being set up, setters of ``Use::Prev`` must re-tag the new
-``Use**`` on every modification.  Accordingly getters must strip the tag bits.
-
-For layout b) instead of the ``User`` we find a pointer (``User*`` with LSBit
-set).  Following this pointer brings us to the ``User``.  A portable trick
-ensures that the first bytes of ``User`` (if interpreted as a pointer) never has
-the LSBit set. (Portability is relying on the fact that all known compilers
-place the ``vptr`` in the first word of the instances.)
-
 .. _polymorphism:
 
 Designing Type Hierarchies and Polymorphic Interfaces
diff --git a/llvm/include/llvm/Analysis/MemorySSA.h b/llvm/include/llvm/Analysis/MemorySSA.h
index 9b393c9cdaa3..cc79b7ea776b 100644
--- a/llvm/include/llvm/Analysis/MemorySSA.h
+++ b/llvm/include/llvm/Analysis/MemorySSA.h
@@ -499,14 +499,11 @@ class MemoryPhi final : public MemoryAccess {
   using const_block_iterator = BasicBlock *const *;
 
   block_iterator block_begin() {
-    auto *Ref = reinterpret_cast<Use::UserRef *>(op_begin() + ReservedSpace);
-    return reinterpret_cast<block_iterator>(Ref + 1);
+    return reinterpret_cast<block_iterator>(op_begin() + ReservedSpace);
   }
 
   const_block_iterator block_begin() const {
-    const auto *Ref =
-        reinterpret_cast<const Use::UserRef *>(op_begin() + ReservedSpace);
-    return reinterpret_cast<const_block_iterator>(Ref + 1);
+    return reinterpret_cast<const_block_iterator>(op_begin() + ReservedSpace);
   }
 
   block_iterator block_end() { return block_begin() + getNumOperands(); }
diff --git a/llvm/include/llvm/IR/Instructions.h b/llvm/include/llvm/IR/Instructions.h
index ec68a5f00d32..7188b82f8c64 100644
--- a/llvm/include/llvm/IR/Instructions.h
+++ b/llvm/include/llvm/IR/Instructions.h
@@ -2549,15 +2549,11 @@ class PHINode : public Instruction {
   using const_block_iterator = BasicBlock * const *;
 
   block_iterator block_begin() {
-    Use::UserRef *ref =
-      reinterpret_cast<Use::UserRef*>(op_begin() + ReservedSpace);
-    return reinterpret_cast<block_iterator>(ref + 1);
+    return reinterpret_cast<block_iterator>(op_begin() + ReservedSpace);
   }
 
   const_block_iterator block_begin() const {
-    const Use::UserRef *ref =
-      reinterpret_cast<const Use::UserRef*>(op_begin() + ReservedSpace);
-    return reinterpret_cast<const_block_iterator>(ref + 1);
+    return reinterpret_cast<const_block_iterator>(op_begin() + ReservedSpace);
   }
 
   block_iterator block_end() {
diff --git a/llvm/include/llvm/IR/Use.h b/llvm/include/llvm/IR/Use.h
index 6c5dd60f1173..6bb958c173bf 100644
--- a/llvm/include/llvm/IR/Use.h
+++ b/llvm/include/llvm/IR/Use.h
@@ -41,17 +41,6 @@ class Value;
 /// all of the uses for a particular value definition. It also supports jumping
 /// directly to the used value when we arrive from the User's operands, and
 /// jumping directly to the User when we arrive from the Value's uses.
-///
-/// The pointer to the used Value is explicit, and the pointer to the User is
-/// implicit. The implicit pointer is found via a waymarking algorithm
-/// described in the programmer's manual:
-///
-///   http://www.llvm.org/docs/ProgrammersManual.html#the-waymarking-algorithm
-///
-/// This is essentially the single most memory intensive object in LLVM because
-/// of the number of uses in the system. At the same time, the constant time
-/// operations it allows are essential to many optimizations having reasonable
-/// time complexity.
 class Use {
 public:
   Use(const Use &U) = delete;
@@ -60,34 +49,6 @@ class Use {
   /// that also works with less standard-compliant compilers
   void swap(Use &RHS);
 
-  /// Pointer traits for the UserRef PointerIntPair. This ensures we always
-  /// use the LSB regardless of pointer alignment on different targets.
-  struct UserRefPointerTraits {
-    static inline void *getAsVoidPointer(User *P) { return P; }
-
-    static inline User *getFromVoidPointer(void *P) {
-      return (User *)P;
-    }
-
-    static constexpr int NumLowBitsAvailable = 1;
-  };
-
-  // A type for the word following an array of hung-off Uses in memory, which is
-  // a pointer back to their User with the bottom bit set.
-  using UserRef = PointerIntPair<User *, 1, unsigned, UserRefPointerTraits>;
-
-  /// Pointer traits for the Prev PointerIntPair. This ensures we always use
-  /// the two LSBs regardless of pointer alignment on different targets.
-  struct PrevPointerTraits {
-    static inline void *getAsVoidPointer(Use **P) { return P; }
-
-    static inline Use **getFromVoidPointer(void *P) {
-      return (Use **)P;
-    }
-
-    static constexpr int NumLowBitsAvailable = 2;
-  };
-
 private:
   /// Destructor - Only for zap()
   ~Use() {
@@ -95,13 +56,12 @@ class Use {
       removeFromList();
   }
 
-  enum PrevPtrTag { zeroDigitTag, oneDigitTag, stopTag, fullStopTag };
-
   /// Constructor
-  Use(PrevPtrTag tag) { Prev.setInt(tag); }
+  Use(User *Parent) : Parent(Parent) {}
 
 public:
   friend class Value;
+  friend class User;
 
   operator Value *() const { return Val; }
   Value *get() const { return Val; }
@@ -110,7 +70,7 @@ class Use {
   ///
   /// For an instruction operand, for example, this will return the
   /// instruction.
-  User *getUser() const LLVM_READONLY;
+  User *getUser() const { return Parent; };
 
   inline void set(Value *Val);
 
@@ -125,24 +85,18 @@ class Use {
   /// Return the operand # of this use in its User.
   unsigned getOperandNo() const;
 
-  /// Initializes the waymarking tags on an array of Uses.
-  ///
-  /// This sets up the array of Uses such that getUser() can find the User from
-  /// any of those Uses.
-  static Use *initTags(Use *Start, Use *Stop);
-
   /// Destroys Use operands when the number of operands of
   /// a User changes.
   static void zap(Use *Start, const Use *Stop, bool del = false);
 
 private:
-  const Use *getImpliedUser() const LLVM_READONLY;
 
   Value *Val = nullptr;
   Use *Next = nullptr;
-  PointerIntPair<Use **, 2, PrevPtrTag, PrevPointerTraits> Prev;
+  Use **Prev = nullptr;
+  User *Parent = nullptr;
 
-  void setPrev(Use **NewPrev) { Prev.setPointer(NewPrev); }
+  void setPrev(Use **NewPrev) { Prev = NewPrev; }
 
   void addToList(Use **List) {
     Next = *List;
@@ -153,7 +107,7 @@ class Use {
   }
 
   void removeFromList() {
-    Use **StrippedPrev = Prev.getPointer();
+    Use **StrippedPrev = Prev;
     *StrippedPrev = Next;
     if (Next)
       Next->setPrev(StrippedPrev);
diff --git a/llvm/include/llvm/IR/Value.h b/llvm/include/llvm/IR/Value.h
index 0f9c335b5ba8..fdaa2aed2256 100644
--- a/llvm/include/llvm/IR/Value.h
+++ b/llvm/include/llvm/IR/Value.h
@@ -72,8 +72,6 @@ using ValueName = StringMapEntry<Value *>;
 /// objects that watch it and listen to RAUW and Destroy events.  See
 /// llvm/IR/ValueHandle.h for details.
 class Value {
-  // The least-significant bit of the first word of Value *must* be zero:
-  //   http://www.llvm.org/docs/ProgrammersManual.html#the-waymarking-algorithm
   Type *VTy;
   Use *UseList;
 
diff --git a/llvm/lib/IR/Use.cpp b/llvm/lib/IR/Use.cpp
index 18c61757ee84..67ce3d26f2b8 100644
--- a/llvm/lib/IR/Use.cpp
+++ b/llvm/lib/IR/Use.cpp
@@ -37,52 +37,10 @@ void Use::swap(Use &RHS) {
   }
 }
 
-User *Use::getUser() const {
-  const Use *End = getImpliedUser();
-  const UserRef *ref = reinterpret_cast<const UserRef *>(End);
-  return ref->getInt() ? ref->getPointer()
-                       : reinterpret_cast<User *>(const_cast<Use *>(End));
-}
-
 unsigned Use::getOperandNo() const {
   return this - getUser()->op_begin();
 }
 
-// Sets up the waymarking algorithm's tags for a series of Uses. See the
-// algorithm details here:
-//
-//   http://www.llvm.org/docs/ProgrammersManual.html#the-waymarking-algorithm
-//
-Use *Use::initTags(Use *const Start, Use *Stop) {
-  ptrdiff_t Done = 0;
-  while (Done < 20) {
-    if (Start == Stop--)
-      return Start;
-    static const PrevPtrTag tags[20] = {
-        fullStopTag,  oneDigitTag,  stopTag,      oneDigitTag, oneDigitTag,
-        stopTag,      zeroDigitTag, oneDigitTag,  oneDigitTag, stopTag,
-        zeroDigitTag, oneDigitTag,  zeroDigitTag, oneDigitTag, stopTag,
-        oneDigitTag,  oneDigitTag,  oneDigitTag,  oneDigitTag, stopTag};
-    new (Stop) Use(tags[Done++]);
-  }
-
-  ptrdiff_t Count = Done;
-  while (Start != Stop) {
-    --Stop;
-    if (!Count) {
-      new (Stop) Use(stopTag);
-      ++Done;
-      Count = Done;
-    } else {
-      new (Stop) Use(PrevPtrTag(Count & 1));
-      Count >>= 1;
-      ++Done;
-    }
-  }
-
-  return Start;
-}
-
 void Use::zap(Use *Start, const Use *Stop, bool del) {
   while (Start != Stop)
     (--Stop)->~Use();
@@ -90,37 +48,4 @@ void Use::zap(Use *Start, const Use *Stop, bool del) {
     ::operator delete(Start);
 }
 
-const Use *Use::getImpliedUser() const {
-  const Use *Current = this;
-
-  while (true) {
-    unsigned Tag = (Current++)->Prev.getInt();
-    switch (Tag) {
-    case zeroDigitTag:
-    case oneDigitTag:
-      continue;
-
-    case stopTag: {
-      ++Current;
-      ptrdiff_t Offset = 1;
-      while (true) {
-        unsigned Tag = Current->Prev.getInt();
-        switch (Tag) {
-        case zeroDigitTag:
-        case oneDigitTag:
-          ++Current;
-          Offset = (Offset << 1) + Tag;
-          continue;
-        default:
-          return Current + Offset;
-        }
-      }
-    }
-
-    case fullStopTag:
-      return Current;
-    }
-  }
-}
-
 } // End llvm namespace
diff --git a/llvm/lib/IR/User.cpp b/llvm/lib/IR/User.cpp
index 3097916c5152..ab7208c318aa 100644
--- a/llvm/lib/IR/User.cpp
+++ b/llvm/lib/IR/User.cpp
@@ -40,20 +40,18 @@ void User::replaceUsesOfWith(Value *From, Value *To) {
 void User::allocHungoffUses(unsigned N, bool IsPhi) {
   assert(HasHungOffUses && "alloc must have hung off uses");
 
-  static_assert(alignof(Use) >= alignof(Use::UserRef),
-                "Alignment is insufficient for 'hung-off-uses' pieces");
-  static_assert(alignof(Use::UserRef) >= alignof(BasicBlock *),
+  static_assert(alignof(Use) >= alignof(BasicBlock *),
                 "Alignment is insufficient for 'hung-off-uses' pieces");
 
-  // Allocate the array of Uses, followed by a pointer (with bottom bit set) to
-  // the User.
-  size_t size = N * sizeof(Use) + sizeof(Use::UserRef);
+  // Allocate the array of Uses
+  size_t size = N * sizeof(Use);
   if (IsPhi)
     size += N * sizeof(BasicBlock *);
   Use *Begin = static_cast<Use*>(::operator new(size));
   Use *End = Begin + N;
-  (void) new(End) Use::UserRef(const_cast<User*>(this), 1);
-  setOperandList(Use::initTags(Begin, End));
+  setOperandList(Begin);
+  for (; Begin != End; Begin++)
+    new (Begin) Use(this);
 }
 
 void User::growHungoffUses(unsigned NewNumUses, bool IsPhi) {
@@ -74,10 +72,8 @@ void User::growHungoffUses(unsigned NewNumUses, bool IsPhi) {
 
   // If this is a Phi, then we need to copy the BB pointers too.
   if (IsPhi) {
-    auto *OldPtr =
-        reinterpret_cast<char *>(OldOps + OldNumUses) + sizeof(Use::UserRef);
-    auto *NewPtr =
-        reinterpret_cast<char *>(NewOps + NewNumUses) + sizeof(Use::UserRef);
+    auto *OldPtr = reinterpret_cast<char *>(OldOps + OldNumUses);
+    auto *NewPtr = reinterpret_cast<char *>(NewOps + NewNumUses);
     std::copy(OldPtr, OldPtr + (OldNumUses * sizeof(BasicBlock *)), NewPtr);
   }
   Use::zap(OldOps, OldOps + OldNumUses, true);
@@ -135,7 +131,8 @@ void *User::allocateFixedOperandUser(size_t Size, unsigned Us,
   Obj->NumUserOperands = Us;
   Obj->HasHungOffUses = false;
   Obj->HasDescriptor = DescBytes != 0;
-  Use::initTags(Start, End);
+  for (; Start != End; Start++)
+    new (Start) Use(Obj);
 
   if (DescBytes != 0) {
     auto *DescInfo = reinterpret_cast<DescriptorInfo *>(Storage + DescBytes);
diff --git a/llvm/unittests/IR/CMakeLists.txt b/llvm/unittests/IR/CMakeLists.txt
index d0068517f09a..4241851dfad7 100644
--- a/llvm/unittests/IR/CMakeLists.txt
+++ b/llvm/unittests/IR/CMakeLists.txt
@@ -41,7 +41,6 @@ add_llvm_unittest(IRTests
   VectorTypesTest.cpp
   VerifierTest.cpp
   VPIntrinsicTest.cpp
-  WaymarkTest.cpp
   )
 
 target_link_libraries(IRTests PRIVATE LLVMTestingSupport)
diff --git a/llvm/unittests/IR/WaymarkTest.cpp b/llvm/unittests/IR/WaymarkTest.cpp
deleted file mode 100644
index 2f64fe0ae99e..000000000000
--- a/llvm/unittests/IR/WaymarkTest.cpp
+++ /dev/null
@@ -1,55 +0,0 @@
-//===- llvm/unittest/IR/WaymarkTest.cpp - getUser() unit tests ------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// we perform white-box tests
-//
-#include "llvm/IR/Constants.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
-#include "gtest/gtest.h"
-#include <algorithm>
-
-namespace llvm {
-namespace {
-
-TEST(WaymarkTest, NativeArray) {
-  LLVMContext Context;
-  static uint8_t tail[22] = "s02s33s30y2y0s1x0syxS";
-  Value * values[22];
-  std::transform(tail, tail + 22, values, [&](char c) {
-    return ConstantInt::get(Type::getInt8Ty(Context), c);
-  });
-  FunctionType *FT = FunctionType::get(Type::getVoidTy(Context), true);
-  std::unique_ptr<Function> F(
-      Function::Create(FT, GlobalValue::ExternalLinkage));
-  const CallInst *A = CallInst::Create(F.get(), makeArrayRef(values));
-  ASSERT_NE(A, (const CallInst*)nullptr);
-  ASSERT_EQ(1U + 22, A->getNumOperands());
-  const Use *U = &A->getOperandUse(0);
-  const Use *Ue = &A->getOperandUse(22);
-  for (; U != Ue; ++U)
-  {
-    EXPECT_EQ(A, U->getUser());
-  }
-  delete A;
-}
-
-TEST(WaymarkTest, TwoBit) {
-  Use* many = (Use*)calloc(sizeof(Use), 8212 + 1);
-  ASSERT_TRUE(many);
-  Use::initTags(many, many + 8212);
-  for (Use *U = many, *Ue = many + 8212 - 1; U != Ue; ++U)
-  {
-    EXPECT_EQ(reinterpret_cast<User *>(Ue + 1), U->getUser());
-  }
-  free(many);
-}
-
-}  // end anonymous namespace
-}  // end namespace llvm

From cceaf6b8ff30b4e5f465404ee5e31e69b069ab3f Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot@gmail.com>
Date: Fri, 17 Apr 2020 09:27:53 +0000
Subject: [PATCH 142/216] [gn build] Port ff9379f4b2d

---
 llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn
index 97dd0da6f92b..1589fc603a72 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/IR/BUILD.gn
@@ -44,6 +44,5 @@ unittest("IRTests") {
     "ValueTest.cpp",
     "VectorTypesTest.cpp",
     "VerifierTest.cpp",
-    "WaymarkTest.cpp",
   ]
 }

From b6d77e792c3339425a733756b970dbac0da119fb Mon Sep 17 00:00:00 2001
From: Georgii Rymar <grimar@accesssoftek.com>
Date: Wed, 15 Apr 2020 18:28:08 +0300
Subject: [PATCH 143/216] [tools][tests] - Use --check-prefixes instead of
 multiple --check-prefix. NFCI.

There is no need to use `--check-prefix` multiple times.
It helps to improve readability/test maintainability.
This patch does it for all tools at once.

Differential revision: https://reviews.llvm.org/D78217
---
 llvm/test/tools/dsymutil/X86/accelerator.test |  8 +++---
 .../tools/dsymutil/X86/basic-linking-x86.test |  8 +++---
 .../tools/dsymutil/X86/dsym-companion.test    |  4 +--
 .../tools/dsymutil/X86/multiple-inputs.test   |  8 +++---
 llvm/test/tools/dsymutil/X86/odr-uniquing.cpp |  4 +--
 llvm/test/tools/dsymutil/X86/verify.test      |  4 +--
 llvm/test/tools/dsymutil/arch-option.test     | 14 +++++-----
 llvm/test/tools/dsymutil/basic-linking.test   |  2 +-
 llvm/test/tools/dsymutil/dump-symtab.test     |  4 +--
 .../tools/llvm-dwarfdump/X86/debug_loclists.s |  4 +--
 .../tools/llvm-mca/X86/bextr-read-after-ld.s  | 14 +++++-----
 .../tools/llvm-mca/X86/bzhi-read-after-ld.s   | 10 +++----
 llvm/test/tools/llvm-mca/X86/cpus.s           | 26 +++++++++----------
 .../tools/llvm-mca/X86/default-iterations.s   |  6 ++---
 llvm/test/tools/llvm-mca/X86/dispatch_width.s |  6 ++---
 .../tools/llvm-mca/X86/fma3-read-after-ld-1.s | 10 +++----
 .../tools/llvm-mca/X86/fma3-read-after-ld-2.s | 10 +++----
 .../tools/llvm-mca/X86/option-all-stats-1.s   |  4 +--
 .../tools/llvm-mca/X86/option-all-stats-2.s   |  8 +++---
 .../tools/llvm-mca/X86/option-all-views-1.s   |  4 +--
 .../tools/llvm-mca/X86/option-all-views-2.s   |  8 +++---
 .../test/tools/llvm-mca/X86/read-after-ld-1.s | 18 ++++++-------
 .../test/tools/llvm-mca/X86/read-after-ld-2.s | 10 +++----
 .../test/tools/llvm-mca/X86/read-after-ld-3.s |  8 +++---
 .../tools/llvm-mca/X86/sqrt-rsqrt-rcp-memop.s | 16 ++++++------
 .../X86/variable-blend-read-after-ld-1.s      | 18 ++++++-------
 .../X86/variable-blend-read-after-ld-2.s      | 18 ++++++-------
 .../llvm-objcopy/ELF/prefix-symbols.test      |  4 +--
 llvm/test/tools/llvm-profdata/c-general.test  |  2 +-
 llvm/test/tools/llvm-profdata/cutoff.test     | 12 ++++-----
 .../llvm-profdata/hash-mismatch.proftext      |  4 +--
 .../tools/llvm-profdata/multiple-inputs.test  |  8 +++---
 llvm/test/tools/llvm-profdata/overlap.test    |  4 +--
 .../tools/llvm-profdata/value-prof.proftext   |  4 +--
 .../tools/llvm-profdata/weight-instr.test     | 24 ++++++++---------
 .../test/tools/llvm-readobj/COFF/exports.test |  6 ++---
 .../tools/llvm-symbolizer/split-dwarf.test    |  6 ++---
 37 files changed, 164 insertions(+), 164 deletions(-)

diff --git a/llvm/test/tools/dsymutil/X86/accelerator.test b/llvm/test/tools/dsymutil/X86/accelerator.test
index 96fc58ee5683..4e5a2be5bb52 100644
--- a/llvm/test/tools/dsymutil/X86/accelerator.test
+++ b/llvm/test/tools/dsymutil/X86/accelerator.test
@@ -4,8 +4,8 @@ RUN: dsymutil -accelerator=Apple -oso-prepend-path=%p/.. %p/../Inputs/basic.mach
 RUN: llvm-dwarfdump -verify %t.dwarf.dSYM
 RUN: llvm-dwarfdump -verify %t.apple.dSYM
 
-RUN: llvm-dwarfdump -debug-names %t.dwarf.dSYM | FileCheck %s -check-prefix=NAMES -check-prefix=DWARF
-RUN: llvm-dwarfdump -apple-names -apple-namespaces -apple-types %t.apple.dSYM | FileCheck %s -check-prefix=NAMES -check-prefix=APPLE
+RUN: llvm-dwarfdump -debug-names %t.dwarf.dSYM | FileCheck %s -check-prefixes=NAMES,DWARF
+RUN: llvm-dwarfdump -apple-names -apple-namespaces -apple-types %t.apple.dSYM | FileCheck %s -check-prefixes=NAMES,APPLE
 
 RUN: dsymutil -update -accelerator=Dwarf %t.apple.dSYM
 RUN: dsymutil -update -accelerator=Apple %t.dwarf.dSYM
@@ -13,8 +13,8 @@ RUN: dsymutil -update -accelerator=Apple %t.dwarf.dSYM
 RUN: llvm-dwarfdump -verify %t.dwarf.dSYM
 RUN: llvm-dwarfdump -verify %t.apple.dSYM
 
-RUN: llvm-dwarfdump -debug-names %t.apple.dSYM | FileCheck %s -check-prefix=NAMES -check-prefix=DWARF
-RUN: llvm-dwarfdump -apple-names -apple-namespaces -apple-types %t.dwarf.dSYM | FileCheck %s -check-prefix=NAMES -check-prefix=APPLE
+RUN: llvm-dwarfdump -debug-names %t.apple.dSYM | FileCheck %s -check-prefixes=NAMES,DWARF
+RUN: llvm-dwarfdump -apple-names -apple-namespaces -apple-types %t.dwarf.dSYM | FileCheck %s -check-prefixes=NAMES,APPLE
 
 DWARF: .debug_names contents:
 DWARF: Compilation Unit offsets [
diff --git a/llvm/test/tools/dsymutil/X86/basic-linking-x86.test b/llvm/test/tools/dsymutil/X86/basic-linking-x86.test
index 8535153d6488..3f9ee4d51362 100644
--- a/llvm/test/tools/dsymutil/X86/basic-linking-x86.test
+++ b/llvm/test/tools/dsymutil/X86/basic-linking-x86.test
@@ -3,10 +3,10 @@ RUN: dsymutil -f -oso-prepend-path=%p/.. %t1
 RUN: llvm-dwarfdump -a %t1.dwarf | FileCheck %s
 RUN: dsymutil -f -o %t2 -oso-prepend-path=%p/.. %p/../Inputs/basic.macho.x86_64
 RUN: llvm-dwarfdump -a %t2 | FileCheck %s
-RUN: dsymutil -f -o - -oso-prepend-path=%p/.. %p/../Inputs/basic.macho.x86_64 | llvm-dwarfdump -a - | FileCheck %s --check-prefix=CHECK --check-prefix=BASIC
-RUN: dsymutil -f -o - -oso-prepend-path=%p/.. %p/../Inputs/basic-archive.macho.x86_64 | llvm-dwarfdump -a - | FileCheck %s --check-prefix=CHECK --check-prefix=ARCHIVE
-RUN: dsymutil -dump-debug-map -oso-prepend-path=%p/.. %p/../Inputs/basic.macho.x86_64 | dsymutil -f -y -o - - | llvm-dwarfdump -a - | FileCheck %s --check-prefix=CHECK --check-prefix=BASIC
-RUN: dsymutil -dump-debug-map -oso-prepend-path=%p/.. %p/../Inputs/basic-archive.macho.x86_64 | dsymutil -f -o - -y - | llvm-dwarfdump -a - | FileCheck %s --check-prefix=CHECK --check-prefix=ARCHIVE
+RUN: dsymutil -f -o - -oso-prepend-path=%p/.. %p/../Inputs/basic.macho.x86_64 | llvm-dwarfdump -a - | FileCheck %s --check-prefixes=CHECK,BASIC
+RUN: dsymutil -f -o - -oso-prepend-path=%p/.. %p/../Inputs/basic-archive.macho.x86_64 | llvm-dwarfdump -a - | FileCheck %s --check-prefixes=CHECK,ARCHIVE
+RUN: dsymutil -dump-debug-map -oso-prepend-path=%p/.. %p/../Inputs/basic.macho.x86_64 | dsymutil -f -y -o - - | llvm-dwarfdump -a - | FileCheck %s --check-prefixes=CHECK,BASIC
+RUN: dsymutil -dump-debug-map -oso-prepend-path=%p/.. %p/../Inputs/basic-archive.macho.x86_64 | dsymutil -f -o - -y - | llvm-dwarfdump -a - | FileCheck %s --check-prefixes=CHECK,ARCHIVE
 
 CHECK: file format Mach-O 64-bit x86-64
 
diff --git a/llvm/test/tools/dsymutil/X86/dsym-companion.test b/llvm/test/tools/dsymutil/X86/dsym-companion.test
index 6b567952a191..c9b0a1aa4f8d 100644
--- a/llvm/test/tools/dsymutil/X86/dsym-companion.test
+++ b/llvm/test/tools/dsymutil/X86/dsym-companion.test
@@ -1,5 +1,5 @@
-RUN: dsymutil -o - %p/../Inputs/basic.macho.i386 -f | llvm-readobj --file-headers -l -S --symbols - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK32
-RUN: dsymutil -o - -oso-prepend-path=%p/.. %p/../Inputs/basic.macho.x86_64 -f | llvm-readobj --file-headers -l -S --symbols - | FileCheck %s -check-prefix=CHECK -check-prefix=CHECK64
+RUN: dsymutil -o - %p/../Inputs/basic.macho.i386 -f | llvm-readobj --file-headers -l -S --symbols - | FileCheck %s -check-prefixes=CHECK,CHECK32
+RUN: dsymutil -o - -oso-prepend-path=%p/.. %p/../Inputs/basic.macho.x86_64 -f | llvm-readobj --file-headers -l -S --symbols - | FileCheck %s -check-prefixes=CHECK,CHECK64
 
 This test checks that the dSYM companion binaries generated in 32 and 64 bits
 are correct. The check are pretty strict (we check even the offsets and sizes
diff --git a/llvm/test/tools/dsymutil/X86/multiple-inputs.test b/llvm/test/tools/dsymutil/X86/multiple-inputs.test
index bf4674f91ee1..ea541426eb17 100644
--- a/llvm/test/tools/dsymutil/X86/multiple-inputs.test
+++ b/llvm/test/tools/dsymutil/X86/multiple-inputs.test
@@ -9,18 +9,18 @@ RUN: cat %p/../Inputs/basic-lto-dw4.macho.x86_64 > %t/basic-lto-dw4.macho.x86_64
 # Multiple inputs in flat mode
 RUN: dsymutil -f -oso-prepend-path=%p/.. %t/basic.macho.x86_64 %t/basic-archive.macho.x86_64 %t/basic-lto.macho.x86_64 %t/basic-lto-dw4.macho.x86_64
 RUN: llvm-dwarfdump -a %t/basic.macho.x86_64.dwarf \
-RUN: | FileCheck %S/basic-linking-x86.test --check-prefix=CHECK --check-prefix=BASIC
+RUN: | FileCheck %S/basic-linking-x86.test --check-prefixes=CHECK,BASIC
 RUN: llvm-dwarfdump -a %t/basic-archive.macho.x86_64.dwarf \
-RUN: | FileCheck %S/basic-linking-x86.test --check-prefix=CHECK --check-prefix=ARCHIVE
+RUN: | FileCheck %S/basic-linking-x86.test --check-prefixes=CHECK,ARCHIVE
 RUN: llvm-dwarfdump -a %t/basic-lto.macho.x86_64.dwarf | FileCheck %S/basic-lto-linking-x86.test
 RUN: llvm-dwarfdump -a %t/basic-lto-dw4.macho.x86_64.dwarf | FileCheck %S/basic-lto-dw4-linking-x86.test
 
 # Multiple inputs that end up in the same named bundle
 RUN: dsymutil -oso-prepend-path=%p/.. %t/basic.macho.x86_64 %t/basic-archive.macho.x86_64 %t/basic-lto.macho.x86_64 %t/basic-lto-dw4.macho.x86_64 -o %t.dSYM
 RUN: llvm-dwarfdump -a %t.dSYM/Contents/Resources/DWARF/basic.macho.x86_64 \
-RUN: | FileCheck %S/basic-linking-x86.test --check-prefix=CHECK --check-prefix=BASIC
+RUN: | FileCheck %S/basic-linking-x86.test --check-prefixes=CHECK,BASIC
 RUN: llvm-dwarfdump -a %t.dSYM/Contents/Resources/DWARF/basic-archive.macho.x86_64 \
-RUN: | FileCheck %S/basic-linking-x86.test --check-prefix=CHECK --check-prefix=ARCHIVE
+RUN: | FileCheck %S/basic-linking-x86.test --check-prefixes=CHECK,ARCHIVE
 RUN: llvm-dwarfdump -a %t.dSYM/Contents/Resources/DWARF/basic-lto.macho.x86_64 | FileCheck %S/basic-lto-linking-x86.test
 RUN: llvm-dwarfdump -a %t.dSYM/Contents/Resources/DWARF/basic-lto-dw4.macho.x86_64 | FileCheck %S/basic-lto-dw4-linking-x86.test
 
diff --git a/llvm/test/tools/dsymutil/X86/odr-uniquing.cpp b/llvm/test/tools/dsymutil/X86/odr-uniquing.cpp
index 4827bd8bc35d..0e3b974ba005 100644
--- a/llvm/test/tools/dsymutil/X86/odr-uniquing.cpp
+++ b/llvm/test/tools/dsymutil/X86/odr-uniquing.cpp
@@ -11,8 +11,8 @@
     - without ODR uniquing: all types are re-emited in the second CU
  */
 
-// RUN: dsymutil -f -oso-prepend-path=%p/../Inputs/odr-uniquing -y %p/dummy-debug-map.map -o - | llvm-dwarfdump -v -debug-info - | FileCheck -check-prefix=ODR -check-prefix=CHECK %s
-// RUN: dsymutil -f -oso-prepend-path=%p/../Inputs/odr-uniquing -y %p/dummy-debug-map.map -no-odr -o - | llvm-dwarfdump -v -debug-info - | FileCheck -check-prefix=NOODR -check-prefix=CHECK %s
+// RUN: dsymutil -f -oso-prepend-path=%p/../Inputs/odr-uniquing -y %p/dummy-debug-map.map -o - | llvm-dwarfdump -v -debug-info - | FileCheck -check-prefixes=ODR,CHECK %s
+// RUN: dsymutil -f -oso-prepend-path=%p/../Inputs/odr-uniquing -y %p/dummy-debug-map.map -no-odr -o - | llvm-dwarfdump -v -debug-info - | FileCheck -check-prefixes=NOODR,CHECK %s
 
 // The first compile unit contains all the types:
 // CHECK: TAG_compile_unit
diff --git a/llvm/test/tools/dsymutil/X86/verify.test b/llvm/test/tools/dsymutil/X86/verify.test
index f551bbebd5e6..1cbfa893818b 100644
--- a/llvm/test/tools/dsymutil/X86/verify.test
+++ b/llvm/test/tools/dsymutil/X86/verify.test
@@ -1,6 +1,6 @@
 # Positive tests in regular and verbose mode.
 # RUN: dsymutil -verify -oso-prepend-path=%p/.. %p/../Inputs/basic.macho.x86_64 %p/../Inputs/basic-archive.macho.x86_64 %p/../Inputs/basic-lto.macho.x86_64 %p/../Inputs/basic-lto-dw4.macho.x86_64 -o %t 2>&1 | FileCheck %s --allow-empty --check-prefix=QUIET-SUCCESS
-# RUN: dsymutil -verify -verbose -oso-prepend-path=%p/.. %p/../Inputs/basic.macho.x86_64 %p/../Inputs/basic-archive.macho.x86_64 %p/../Inputs/basic-lto.macho.x86_64 %p/../Inputs/basic-lto-dw4.macho.x86_64 -o %t 2>&1 | FileCheck %s --check-prefix=QUIET-SUCCESS --check-prefix=VERBOSE
+# RUN: dsymutil -verify -verbose -oso-prepend-path=%p/.. %p/../Inputs/basic.macho.x86_64 %p/../Inputs/basic-archive.macho.x86_64 %p/../Inputs/basic-lto.macho.x86_64 %p/../Inputs/basic-lto-dw4.macho.x86_64 -o %t 2>&1 | FileCheck %s --check-prefixes=QUIET-SUCCESS,VERBOSE
 
 # VERBOSE: Verifying DWARF for architecture: x86_64
 # QUIET-SUCCESS-NOT: error: verification failed
@@ -8,7 +8,7 @@
 # Negative tests in regular and verbose mode.
 # (Invalid object generated from ../Inputs/invalid.s by modified the low PC.)
 # RUN: not dsymutil -verify -oso-prepend-path=%p/../Inputs -y %s -o %t 2>&1 | FileCheck %s --check-prefix=QUIET-FAIL
-# RUN: not dsymutil -verify -verbose -oso-prepend-path=%p/../Inputs -y %s -o %t 2>&1 | FileCheck %s --check-prefix=QUIET-FAIL --check-prefix=VERBOSE
+# RUN: not dsymutil -verify -verbose -oso-prepend-path=%p/../Inputs -y %s -o %t 2>&1 | FileCheck %s --check-prefixes=QUIET-FAIL,VERBOSE
 
 # QUIET-FAIL: error: verification failed
 
diff --git a/llvm/test/tools/dsymutil/arch-option.test b/llvm/test/tools/dsymutil/arch-option.test
index 802c417325c8..723a9c3ac742 100644
--- a/llvm/test/tools/dsymutil/arch-option.test
+++ b/llvm/test/tools/dsymutil/arch-option.test
@@ -1,13 +1,13 @@
 Processing of the -arch option happens at debug map parsing time, thus just
 looking at the dumped debug maps is enough to validate their effects.
 
-RUN: dsymutil -oso-prepend-path %p -dump-debug-map %p/Inputs/fat-test.arm.dylib | FileCheck %s -check-prefix=ARM64 -check-prefix=ARMV7S -check-prefix=ARMV7 -check-prefix=CHECK
-RUN: dsymutil -oso-prepend-path %p -dump-debug-map %p/Inputs/fat-test.arm.dylib -arch all | FileCheck %s -check-prefix=ARM64 -check-prefix=ARMV7S -check-prefix=ARMV7 -check-prefix=CHECK
-RUN: dsymutil -oso-prepend-path %p -dump-debug-map %p/Inputs/fat-test.arm.dylib -arch='*' | FileCheck %s -check-prefix=ARM64 -check-prefix=ARMV7S -check-prefix=ARMV7 -check-prefix=CHECK
-RUN: dsymutil -oso-prepend-path %p -dump-debug-map %p/Inputs/fat-test.arm.dylib -arch arm64 | FileCheck %s -check-prefix=ARM64 -check-prefix=CHECK
-RUN: dsymutil -oso-prepend-path %p -dump-debug-map %p/Inputs/fat-test.arm.dylib -arch arm | FileCheck %s -check-prefix=ARMV7S -check-prefix=ARMV7 -check-prefix=CHECK
-RUN: dsymutil -oso-prepend-path %p -dump-debug-map %p/Inputs/fat-test.arm.dylib -arch armv7 | FileCheck %s -check-prefix=ARMV7 -check-prefix=CHECK
-RUN: dsymutil -oso-prepend-path %p -dump-debug-map %p/Inputs/fat-test.arm.dylib -arch arm64 -arch armv7s | FileCheck %s -check-prefix=ARM64 -check-prefix=ARMV7S -check-prefix=CHECK
+RUN: dsymutil -oso-prepend-path %p -dump-debug-map %p/Inputs/fat-test.arm.dylib | FileCheck %s -check-prefixes=ARM64,ARMV7S,ARMV7,CHECK
+RUN: dsymutil -oso-prepend-path %p -dump-debug-map %p/Inputs/fat-test.arm.dylib -arch all | FileCheck %s -check-prefixes=ARM64,ARMV7S,ARMV7,CHECK
+RUN: dsymutil -oso-prepend-path %p -dump-debug-map %p/Inputs/fat-test.arm.dylib -arch='*' | FileCheck %s -check-prefixes=ARM64,ARMV7S,ARMV7,CHECK
+RUN: dsymutil -oso-prepend-path %p -dump-debug-map %p/Inputs/fat-test.arm.dylib -arch arm64 | FileCheck %s -check-prefixes=ARM64,CHECK
+RUN: dsymutil -oso-prepend-path %p -dump-debug-map %p/Inputs/fat-test.arm.dylib -arch arm | FileCheck %s -check-prefixes=ARMV7S,ARMV7,CHECK
+RUN: dsymutil -oso-prepend-path %p -dump-debug-map %p/Inputs/fat-test.arm.dylib -arch armv7 | FileCheck %s -check-prefixes=ARMV7,CHECK
+RUN: dsymutil -oso-prepend-path %p -dump-debug-map %p/Inputs/fat-test.arm.dylib -arch arm64 -arch armv7s | FileCheck %s -check-prefixes=ARM64,ARMV7S,CHECK
 RUN: not dsymutil -oso-prepend-path %p -dump-debug-map %p/Inputs/fat-test.arm.dylib -arch arm42 2>&1 | FileCheck %s -check-prefix=BADARCH
 RUN: not dsymutil -oso-prepend-path %p -dump-debug-map %p/Inputs/fat-test.arm.dylib -arch i386 2>&1 | FileCheck %s -check-prefix=EMPTY
 
diff --git a/llvm/test/tools/dsymutil/basic-linking.test b/llvm/test/tools/dsymutil/basic-linking.test
index 023538ea5565..cc521430b0c8 100644
--- a/llvm/test/tools/dsymutil/basic-linking.test
+++ b/llvm/test/tools/dsymutil/basic-linking.test
@@ -1,7 +1,7 @@
 RUN: dsymutil -no-output -verbose -oso-prepend-path=%p %p/Inputs/basic.macho.x86_64 | FileCheck %s
 RUN: dsymutil -no-output -verbose -oso-prepend-path=%p %p/Inputs/basic-lto.macho.x86_64 | FileCheck %s --check-prefix=CHECK-LTO
 RUN: dsymutil -no-output -verbose -oso-prepend-path=%p %p/Inputs/basic-archive.macho.x86_64 | FileCheck %s --check-prefix=CHECK-ARCHIVE
-RUN: dsymutil -no-output -verbose -oso-prepend-path=%p %p/Inputs/basic.macho.x86_64 %p/Inputs/basic-lto.macho.x86_64 %p/Inputs/basic-archive.macho.x86_64 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LTO --check-prefix=CHECK-ARCHIVE
+RUN: dsymutil -no-output -verbose -oso-prepend-path=%p %p/Inputs/basic.macho.x86_64 %p/Inputs/basic-lto.macho.x86_64 %p/Inputs/basic-archive.macho.x86_64 | FileCheck %s --check-prefixes=CHECK,CHECK-LTO,CHECK-ARCHIVE
 
 This test check the basic Dwarf linking process through the debug dumps.
 
diff --git a/llvm/test/tools/dsymutil/dump-symtab.test b/llvm/test/tools/dsymutil/dump-symtab.test
index 45832356d89e..c0b51446c93f 100644
--- a/llvm/test/tools/dsymutil/dump-symtab.test
+++ b/llvm/test/tools/dsymutil/dump-symtab.test
@@ -1,5 +1,5 @@
-RUN: dsymutil -s %p/Inputs/fat-test.dylib | FileCheck -check-prefix=ALL -check-prefix=I386 %s 
-RUN: dsymutil -arch i386 -s %p/Inputs/fat-test.dylib | FileCheck -check-prefix=I386 -check-prefix=ONE %s
+RUN: dsymutil -s %p/Inputs/fat-test.dylib | FileCheck -check-prefixes=ALL,I386 %s
+RUN: dsymutil -arch i386 -s %p/Inputs/fat-test.dylib | FileCheck -check-prefixes=I386,ONE %s
 
 
 ALL:  ----------------------------------------------------------------------
diff --git a/llvm/test/tools/llvm-dwarfdump/X86/debug_loclists.s b/llvm/test/tools/llvm-dwarfdump/X86/debug_loclists.s
index ffd8f2ece78f..f84e3116c812 100644
--- a/llvm/test/tools/llvm-dwarfdump/X86/debug_loclists.s
+++ b/llvm/test/tools/llvm-dwarfdump/X86/debug_loclists.s
@@ -1,8 +1,8 @@
 # RUN: llvm-mc %s -filetype obj -triple x86_64-pc-linux -o %t
 # RUN: llvm-dwarfdump -debug-info -debug-loclists %t \
-# RUN:   | FileCheck %s --check-prefix=REGULAR --check-prefix=BOTH
+# RUN:   | FileCheck %s --check-prefixes=REGULAR,BOTH
 # RUN: llvm-dwarfdump -debug-info -debug-loclists --verbose %t \
-# RUN:   | FileCheck %s --check-prefix=VERBOSE --check-prefix=BOTH
+# RUN:   | FileCheck %s --check-prefixes=VERBOSE,BOTH
 
 
 # BOTH:          DW_AT_location {{.*}}(0x0000000c
diff --git a/llvm/test/tools/llvm-mca/X86/bextr-read-after-ld.s b/llvm/test/tools/llvm-mca/X86/bextr-read-after-ld.s
index 294d473e4fd9..c896f2b173bf 100644
--- a/llvm/test/tools/llvm-mca/X86/bextr-read-after-ld.s
+++ b/llvm/test/tools/llvm-mca/X86/bextr-read-after-ld.s
@@ -1,11 +1,11 @@
 # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=haswell -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=HASWELL
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=broadwell -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=BDWELL
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=SKYLAKE
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=BDVER2
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=BTVER2
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=ZNVER1
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver2 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=ZNVER2
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=haswell -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,HASWELL
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=broadwell -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,BDWELL
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,SKYLAKE
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,BDVER2
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,BTVER2
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,ZNVER1
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver2 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,ZNVER2
 
 add     %edi, %esi
 bextrl	%esi, (%rdi), %eax
diff --git a/llvm/test/tools/llvm-mca/X86/bzhi-read-after-ld.s b/llvm/test/tools/llvm-mca/X86/bzhi-read-after-ld.s
index 17bf9b295269..97fa6e4b917f 100644
--- a/llvm/test/tools/llvm-mca/X86/bzhi-read-after-ld.s
+++ b/llvm/test/tools/llvm-mca/X86/bzhi-read-after-ld.s
@@ -1,9 +1,9 @@
 # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=haswell -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=HASWELL
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=broadwell -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=BDWELL
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=SKYLAKE
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=ZNVER1
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver2 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=ZNVER2
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=haswell -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,HASWELL
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=broadwell -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,BDWELL
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,SKYLAKE
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,ZNVER1
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver2 -iterations=1 -timeline -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,ZNVER2
 
 add     %edi, %esi
 bzhil	%esi, (%rdi), %eax
diff --git a/llvm/test/tools/llvm-mca/X86/cpus.s b/llvm/test/tools/llvm-mca/X86/cpus.s
index 4480c0ed1b34..d422adc15761 100644
--- a/llvm/test/tools/llvm-mca/X86/cpus.s
+++ b/llvm/test/tools/llvm-mca/X86/cpus.s
@@ -1,17 +1,17 @@
 # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
-# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefix=ALL --check-prefix=BARCELONA %s
-# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefix=ALL --check-prefix=BDVER2 %s
-# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefix=ALL --check-prefix=BTVER2 %s
-# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=znver1 -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefix=ALL --check-prefix=ZNVER1 %s
-# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=znver2 -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefix=ALL --check-prefix=ZNVER2 %s
-# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefix=ALL --check-prefix=SANDYBRIDGE %s
-# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=ivybridge -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefix=ALL --check-prefix=IVYBRIDGE %s
-# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefix=ALL --check-prefix=HASWELL %s
-# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefix=ALL --check-prefix=BROADWELL %s
-# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=knl -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefix=ALL --check-prefix=KNL %s
-# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=skylake -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefix=ALL --check-prefix=SKX %s
-# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefix=ALL --check-prefix=SKX-AVX512 %s
-# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=slm -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefix=ALL --check-prefix=SLM %s
+# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefixes=ALL,BARCELONA %s
+# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefixes=ALL,BDVER2 %s
+# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=btver2 -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefixes=ALL,BTVER2 %s
+# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=znver1 -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefixes=ALL,ZNVER1 %s
+# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=znver2 -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefixes=ALL,ZNVER2 %s
+# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefixes=ALL,SANDYBRIDGE %s
+# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=ivybridge -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefixes=ALL,IVYBRIDGE %s
+# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=haswell -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefixes=ALL,HASWELL %s
+# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=broadwell -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefixes=ALL,BROADWELL %s
+# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=knl -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefixes=ALL,KNL %s
+# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=skylake -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefixes=ALL,SKX %s
+# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=skylake-avx512 -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefixes=ALL,SKX-AVX512 %s
+# RUN: llvm-mca %s -mtriple=x86_64-unknown-unknown -mcpu=slm -resource-pressure=false -instruction-info=false < %s | FileCheck --check-prefixes=ALL,SLM %s
 
 add %edi, %eax
 
diff --git a/llvm/test/tools/llvm-mca/X86/default-iterations.s b/llvm/test/tools/llvm-mca/X86/default-iterations.s
index ee1b6169c65b..8e94806cd705 100644
--- a/llvm/test/tools/llvm-mca/X86/default-iterations.s
+++ b/llvm/test/tools/llvm-mca/X86/default-iterations.s
@@ -1,7 +1,7 @@
 # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2               -resource-pressure=false < %s | FileCheck --check-prefix=ALL --check-prefix=DEFAULT %s
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=0 -resource-pressure=false < %s | FileCheck --check-prefix=ALL --check-prefix=DEFAULT %s
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -resource-pressure=false < %s | FileCheck --check-prefix=ALL --check-prefix=CUSTOM %s
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2               -resource-pressure=false < %s | FileCheck --check-prefixes=ALL,DEFAULT %s
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=0 -resource-pressure=false < %s | FileCheck --check-prefixes=ALL,DEFAULT %s
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -resource-pressure=false < %s | FileCheck --check-prefixes=ALL,CUSTOM %s
 
 add %eax, %eax
 
diff --git a/llvm/test/tools/llvm-mca/X86/dispatch_width.s b/llvm/test/tools/llvm-mca/X86/dispatch_width.s
index baaad160f132..5bac69c6fa82 100644
--- a/llvm/test/tools/llvm-mca/X86/dispatch_width.s
+++ b/llvm/test/tools/llvm-mca/X86/dispatch_width.s
@@ -1,7 +1,7 @@
 # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -resource-pressure=false -instruction-info=false -mcpu=btver2 < %s 2>&1 | FileCheck --check-prefix=ALL --check-prefix=DEFAULT %s
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -resource-pressure=false -instruction-info=false -mcpu=btver2 -dispatch=0 < %s 2>&1 | FileCheck --check-prefix=ALL --check-prefix=DEFAULT %s
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -resource-pressure=false -instruction-info=false -mcpu=btver2 -dispatch=1 < %s 2>&1 | FileCheck --check-prefix=ALL --check-prefix=CUSTOM %s
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -resource-pressure=false -instruction-info=false -mcpu=btver2 < %s 2>&1 | FileCheck --check-prefixes=ALL,DEFAULT %s
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -resource-pressure=false -instruction-info=false -mcpu=btver2 -dispatch=0 < %s 2>&1 | FileCheck --check-prefixes=ALL,DEFAULT %s
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -resource-pressure=false -instruction-info=false -mcpu=btver2 -dispatch=1 < %s 2>&1 | FileCheck --check-prefixes=ALL,CUSTOM %s
 
 add %eax, %eax
 
diff --git a/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-1.s b/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-1.s
index bdfe3319d191..a2d0acb0c762 100644
--- a/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-1.s
+++ b/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-1.s
@@ -1,13 +1,13 @@
 # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown  -mcpu=haswell -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=HASWELL
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown  -mcpu=haswell -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefixes=ALL,HASWELL
 
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=broadwell -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=BDWELL
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=broadwell -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefixes=ALL,BDWELL
 
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=SKYLAKE
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefixes=ALL,SKYLAKE
 
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=ZNVER1
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefixes=ALL,ZNVER1
 
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver2 -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=ZNVER1
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver2 -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefixes=ALL,ZNVER1
 
 vaddps %xmm0, %xmm0, %xmm1
 vfmadd213ps (%rdi), %xmm1, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-2.s b/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-2.s
index 7e4f69def3cc..1e8580119b56 100644
--- a/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-2.s
+++ b/llvm/test/tools/llvm-mca/X86/fma3-read-after-ld-2.s
@@ -1,13 +1,13 @@
 # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown  -mcpu=haswell -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=HASWELL
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown  -mcpu=haswell -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefixes=ALL,HASWELL
 
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=broadwell -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=BDWELL
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=broadwell -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefixes=ALL,BDWELL
 
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=SKYLAKE
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefixes=ALL,SKYLAKE
 
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=ZNVER1
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefixes=ALL,ZNVER1
 
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver2 -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=ZNVER1
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver2 -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefixes=ALL,ZNVER1
 
 vaddps %xmm0, %xmm0, %xmm2
 vfmadd213ps (%rdi), %xmm1, %xmm2
diff --git a/llvm/test/tools/llvm-mca/X86/option-all-stats-1.s b/llvm/test/tools/llvm-mca/X86/option-all-stats-1.s
index 74c09f6c360b..dbd315284f26 100644
--- a/llvm/test/tools/llvm-mca/X86/option-all-stats-1.s
+++ b/llvm/test/tools/llvm-mca/X86/option-all-stats-1.s
@@ -1,6 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -all-stats       < %s | FileCheck %s -check-prefix=ALL -check-prefix=FULLREPORT
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -all-stats=true  < %s | FileCheck %s -check-prefix=ALL -check-prefix=FULLREPORT
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -all-stats       < %s | FileCheck %s -check-prefixes=ALL,FULLREPORT
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -all-stats=true  < %s | FileCheck %s -check-prefixes=ALL,FULLREPORT
 # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -all-stats=false < %s | FileCheck %s -check-prefix=ALL
 # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2                  < %s | FileCheck %s -check-prefix=ALL
 
diff --git a/llvm/test/tools/llvm-mca/X86/option-all-stats-2.s b/llvm/test/tools/llvm-mca/X86/option-all-stats-2.s
index c0a953621d2f..49b50a7f5296 100644
--- a/llvm/test/tools/llvm-mca/X86/option-all-stats-2.s
+++ b/llvm/test/tools/llvm-mca/X86/option-all-stats-2.s
@@ -1,9 +1,9 @@
 # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
 # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -all-stats -dispatch-stats=false < %s | FileCheck %s -check-prefix=ALL
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -all-stats                       < %s | FileCheck %s -check-prefix=ALL -check-prefix=FULL
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -all-stats -dispatch-stats       < %s | FileCheck %s -check-prefix=ALL -check-prefix=FULL
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -dispatch-stats -all-stats       < %s | FileCheck %s -check-prefix=ALL -check-prefix=FULL
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -dispatch-stats=false -all-stats < %s | FileCheck %s -check-prefix=ALL -check-prefix=FULL
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -all-stats                       < %s | FileCheck %s -check-prefixes=ALL,FULL
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -all-stats -dispatch-stats       < %s | FileCheck %s -check-prefixes=ALL,FULL
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -dispatch-stats -all-stats       < %s | FileCheck %s -check-prefixes=ALL,FULL
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -dispatch-stats=false -all-stats < %s | FileCheck %s -check-prefixes=ALL,FULL
 
 add %eax, %eax
 
diff --git a/llvm/test/tools/llvm-mca/X86/option-all-views-1.s b/llvm/test/tools/llvm-mca/X86/option-all-views-1.s
index 27e3a67374cb..6b82e5e4f86a 100644
--- a/llvm/test/tools/llvm-mca/X86/option-all-views-1.s
+++ b/llvm/test/tools/llvm-mca/X86/option-all-views-1.s
@@ -1,6 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -all-views       < %s | FileCheck %s -check-prefix=DEFAULTREPORT -check-prefix=FULLREPORT
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -all-views=true  < %s | FileCheck %s -check-prefix=DEFAULTREPORT -check-prefix=FULLREPORT
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -all-views       < %s | FileCheck %s -check-prefixes=DEFAULTREPORT,FULLREPORT
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -all-views=true  < %s | FileCheck %s -check-prefixes=DEFAULTREPORT,FULLREPORT
 # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -all-views=false < %s | FileCheck %s -check-prefix=NOREPORT -allow-empty
 # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2                  < %s | FileCheck %s -check-prefix=DEFAULTREPORT
 
diff --git a/llvm/test/tools/llvm-mca/X86/option-all-views-2.s b/llvm/test/tools/llvm-mca/X86/option-all-views-2.s
index a19da0addbad..2d3e746fed7e 100644
--- a/llvm/test/tools/llvm-mca/X86/option-all-views-2.s
+++ b/llvm/test/tools/llvm-mca/X86/option-all-views-2.s
@@ -1,8 +1,8 @@
 # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -all-views                          < %s | FileCheck %s -check-prefix=ALL -check-prefix=FULLREPORT
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -all-views -resource-pressure       < %s | FileCheck %s -check-prefix=ALL -check-prefix=FULLREPORT
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -resource-pressure -all-views       < %s | FileCheck %s -check-prefix=ALL -check-prefix=FULLREPORT
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -resource-pressure=false -all-views < %s | FileCheck %s -check-prefix=ALL -check-prefix=FULLREPORT
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -all-views                          < %s | FileCheck %s -check-prefixes=ALL,FULLREPORT
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -all-views -resource-pressure       < %s | FileCheck %s -check-prefixes=ALL,FULLREPORT
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -resource-pressure -all-views       < %s | FileCheck %s -check-prefixes=ALL,FULLREPORT
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -resource-pressure=false -all-views < %s | FileCheck %s -check-prefixes=ALL,FULLREPORT
 # RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -all-views -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL
 
 add %eax, %eax
diff --git a/llvm/test/tools/llvm-mca/X86/read-after-ld-1.s b/llvm/test/tools/llvm-mca/X86/read-after-ld-1.s
index 56473aff480f..e7d8dc0b015b 100644
--- a/llvm/test/tools/llvm-mca/X86/read-after-ld-1.s
+++ b/llvm/test/tools/llvm-mca/X86/read-after-ld-1.s
@@ -1,13 +1,13 @@
 # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=SANDY
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=haswell -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=HASWELL
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=broadwell -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=BDWELL
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=SKYLAKE
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=BARCELONA
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=BDVER2
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=BTVER2
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=ZNVER1
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver2 -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=ZNVER2
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefixes=ALL,SANDY
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=haswell -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefixes=ALL,HASWELL
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=broadwell -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefixes=ALL,BDWELL
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefixes=ALL,SKYLAKE
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefixes=ALL,BARCELONA
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefixes=ALL,BDVER2
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefixes=ALL,BTVER2
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefixes=ALL,ZNVER1
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver2 -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefixes=ALL,ZNVER2
 
 vdivps  %xmm0, %xmm1, %xmm1
 vaddps  (%rax), %xmm1, %xmm1
diff --git a/llvm/test/tools/llvm-mca/X86/read-after-ld-2.s b/llvm/test/tools/llvm-mca/X86/read-after-ld-2.s
index c958b27ba2b4..2f738088ccb3 100644
--- a/llvm/test/tools/llvm-mca/X86/read-after-ld-2.s
+++ b/llvm/test/tools/llvm-mca/X86/read-after-ld-2.s
@@ -1,13 +1,13 @@
 # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown  -mcpu=haswell -iterations=100 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=HASWELL
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown  -mcpu=haswell -iterations=100 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefixes=ALL,HASWELL
 
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=broadwell -iterations=100 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=BDWELL
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=broadwell -iterations=100 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefixes=ALL,BDWELL
 
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -iterations=100 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=SKYLAKE
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -iterations=100 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefixes=ALL,SKYLAKE
 
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=100 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=ZNVER1
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=100 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefixes=ALL,ZNVER1
 
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver2 -iterations=100 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=ZNVER2
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver2 -iterations=100 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefixes=ALL,ZNVER2
 
 # Code Snippet from "Ithemal: Accurate, Portable and Fast Basic Block Throughput Estimation using Deep Neural Networks"
 # Charith Mendis, Saman Amarasinghe, Michael Carbin
diff --git a/llvm/test/tools/llvm-mca/X86/read-after-ld-3.s b/llvm/test/tools/llvm-mca/X86/read-after-ld-3.s
index f32b7d2734dd..9b8b3aac5b9c 100644
--- a/llvm/test/tools/llvm-mca/X86/read-after-ld-3.s
+++ b/llvm/test/tools/llvm-mca/X86/read-after-ld-3.s
@@ -1,8 +1,8 @@
 # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=SANDY
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=haswell -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=HASWELL
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=broadwell -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=BDWELL
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=SKYLAKE
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefixes=ALL,SANDY
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=haswell -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefixes=ALL,HASWELL
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=broadwell -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefixes=ALL,BDWELL
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -iterations=1 -resource-pressure=false -instruction-info=false -timeline < %s | FileCheck %s -check-prefixes=ALL,SKYLAKE
 
 # PR36951
 addl    %edi, %esi
diff --git a/llvm/test/tools/llvm-mca/X86/sqrt-rsqrt-rcp-memop.s b/llvm/test/tools/llvm-mca/X86/sqrt-rsqrt-rcp-memop.s
index 7aecf241434a..87a1844100c3 100644
--- a/llvm/test/tools/llvm-mca/X86/sqrt-rsqrt-rcp-memop.s
+++ b/llvm/test/tools/llvm-mca/X86/sqrt-rsqrt-rcp-memop.s
@@ -1,12 +1,12 @@
 # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -all-views=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=BARCELONA
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -iterations=1 -all-views=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=BDVER2
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -all-views=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=BTVER2
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -all-views=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=ZNVER1
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver2 -iterations=1 -all-views=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=ZNVER2
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=haswell -iterations=1 -all-views=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=HASWELL
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=broadwell -iterations=1 -all-views=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=BROADWELL
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -iterations=1 -all-views=false -timeline < %s | FileCheck %s -check-prefix=ALL -check-prefix=SKYLAKE
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -iterations=1 -all-views=false -timeline < %s | FileCheck %s -check-prefixes=ALL,BARCELONA
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -iterations=1 -all-views=false -timeline < %s | FileCheck %s -check-prefixes=ALL,BDVER2
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -all-views=false -timeline < %s | FileCheck %s -check-prefixes=ALL,BTVER2
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -all-views=false -timeline < %s | FileCheck %s -check-prefixes=ALL,ZNVER1
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver2 -iterations=1 -all-views=false -timeline < %s | FileCheck %s -check-prefixes=ALL,ZNVER2
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=haswell -iterations=1 -all-views=false -timeline < %s | FileCheck %s -check-prefixes=ALL,HASWELL
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=broadwell -iterations=1 -all-views=false -timeline < %s | FileCheck %s -check-prefixes=ALL,BROADWELL
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -iterations=1 -all-views=false -timeline < %s | FileCheck %s -check-prefixes=ALL,SKYLAKE
 
 #LLVM-MCA-BEGIN  test_sqrtss
 leaq 8(%rsp, %rdi, 2), %rax
diff --git a/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-1.s b/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-1.s
index 88f55a35b934..6a43957d049a 100644
--- a/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-1.s
+++ b/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-1.s
@@ -1,21 +1,21 @@
 # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=SANDY
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,SANDY
 
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=ivybridge -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=IVY
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=ivybridge -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,IVY
 
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown  -mcpu=haswell -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=HASWELL
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown  -mcpu=haswell -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,HASWELL
 
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=broadwell -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=BDWELL
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=broadwell -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,BDWELL
 
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=SKYLAKE
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,SKYLAKE
 
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=BDVER2
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,BDVER2
 
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=BTVER2
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,BTVER2
 
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=ZNVER1
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,ZNVER1
 
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver2 -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=ZNVER2
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver2 -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,ZNVER2
 
 vaddps %xmm0, %xmm0, %xmm1
 vblendvps %xmm1, (%rdi), %xmm2, %xmm3
diff --git a/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-2.s b/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-2.s
index 4403c1700e9a..ea5b62f0ce86 100644
--- a/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-2.s
+++ b/llvm/test/tools/llvm-mca/X86/variable-blend-read-after-ld-2.s
@@ -1,21 +1,21 @@
 # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=SANDY
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,SANDY
 
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=ivybridge -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=IVY
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=ivybridge -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,IVY
 
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown  -mcpu=haswell -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=HASWELL
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown  -mcpu=haswell -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,HASWELL
 
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=broadwell -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=BDWELL
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=broadwell -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,BDWELL
 
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=SKYLAKE
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=skylake -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,SKYLAKE
 
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=BDVER2
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=bdver2 -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,BDVER2
 
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=BTVER2
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=btver2 -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,BTVER2
 
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=ZNVER1
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver1 -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,ZNVER1
 
-# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver2 -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefix=ALL -check-prefix=ZNVER2
+# RUN: llvm-mca -mtriple=x86_64-unknown-unknown -mcpu=znver2 -iterations=1 -timeline -instruction-info=false -resource-pressure=false < %s | FileCheck %s -check-prefixes=ALL,ZNVER2
 
 vaddps %xmm0, %xmm0, %xmm2
 vblendvps %xmm1, (%rdi), %xmm2, %xmm3
diff --git a/llvm/test/tools/llvm-objcopy/ELF/prefix-symbols.test b/llvm/test/tools/llvm-objcopy/ELF/prefix-symbols.test
index 7538a1c5d227..3445183db77a 100644
--- a/llvm/test/tools/llvm-objcopy/ELF/prefix-symbols.test
+++ b/llvm/test/tools/llvm-objcopy/ELF/prefix-symbols.test
@@ -1,8 +1,8 @@
 # RUN: yaml2obj %s -o %t
 # RUN: llvm-objcopy --prefix-symbols prefix %t %t2
-# RUN: llvm-readobj --symbols %t2 | FileCheck %s --check-prefix=COMMON --check-prefix=BASIC
+# RUN: llvm-readobj --symbols %t2 | FileCheck %s --check-prefixes=COMMON,BASIC
 # RUN: llvm-objcopy --redefine-sym bar=baz --prefix-symbols prefix %t %t3
-# RUN: llvm-readobj --symbols %t3 | FileCheck %s --check-prefix=COMMON --check-prefix=REDEF
+# RUN: llvm-readobj --symbols %t3 | FileCheck %s --check-prefixes=COMMON,REDEF
 
 !ELF
 FileHeader:
diff --git a/llvm/test/tools/llvm-profdata/c-general.test b/llvm/test/tools/llvm-profdata/c-general.test
index d4ed384ad603..ae4bc0a34d25 100644
--- a/llvm/test/tools/llvm-profdata/c-general.test
+++ b/llvm/test/tools/llvm-profdata/c-general.test
@@ -10,7 +10,7 @@ $ LLVM_PROFILE_FILE=$TESTDIR/Inputs/c-general.profraw ./a.out
 
 RUN: llvm-profdata show %p/Inputs/c-general.profraw -o - | FileCheck %s
 RUN: llvm-profdata show %p/Inputs/c-general.profraw --topn=3 -o - | FileCheck %s --check-prefix=TOPN
-RUN: llvm-profdata show %p/Inputs/c-general.profraw -o - --function=switches | FileCheck %s -check-prefix=SWITCHES -check-prefix=CHECK
+RUN: llvm-profdata show %p/Inputs/c-general.profraw -o - --function=switches | FileCheck %s -check-prefixes=SWITCHES,CHECK
 
 SWITCHES-LABEL: Counters:
 SWITCHES-NEXT:   switches:
diff --git a/llvm/test/tools/llvm-profdata/cutoff.test b/llvm/test/tools/llvm-profdata/cutoff.test
index f04ea16f324e..9d59ede4e842 100644
--- a/llvm/test/tools/llvm-profdata/cutoff.test
+++ b/llvm/test/tools/llvm-profdata/cutoff.test
@@ -1,11 +1,11 @@
 Basic tests for cutoff options in show command.
 
-RUN: llvm-profdata show -value-cutoff=1 %p/Inputs/cutoff.proftext | FileCheck %s -check-prefix=CUTOFF1 -check-prefix=CHECK
-RUN: llvm-profdata show -value-cutoff=1000 %p/Inputs/cutoff.proftext | FileCheck %s -check-prefix=CUTOFF1000 -check-prefix=CHECK
-RUN: llvm-profdata show -all-functions -value-cutoff=1 %p/Inputs/cutoff.proftext | FileCheck %s -check-prefix=CUTOFF1FUNC -check-prefix=CUTOFF1 -check-prefix=CHECK
-RUN: llvm-profdata show -all-functions -value-cutoff=1000 %p/Inputs/cutoff.proftext | FileCheck %s -check-prefix=CUTOFF1000FUNC -check-prefix=CUTOFF1000 -check-prefix=CHECK
-RUN: llvm-profdata show -value-cutoff=1 -list-below-cutoff %p/Inputs/cutoff.proftext | FileCheck %s -check-prefix=BELOW1 -check-prefix=CUTOFF1 -check-prefix=CHECK
-RUN: llvm-profdata show -value-cutoff=1000 -list-below-cutoff %p/Inputs/cutoff.proftext | FileCheck %s -check-prefix=BELOW1000 -check-prefix=CUTOFF1000 -check-prefix=CHECK
+RUN: llvm-profdata show -value-cutoff=1 %p/Inputs/cutoff.proftext | FileCheck %s -check-prefixes=CUTOFF1,CHECK
+RUN: llvm-profdata show -value-cutoff=1000 %p/Inputs/cutoff.proftext | FileCheck %s -check-prefixes=CUTOFF1000,CHECK
+RUN: llvm-profdata show -all-functions -value-cutoff=1 %p/Inputs/cutoff.proftext | FileCheck %s -check-prefixes=CUTOFF1FUNC,CUTOFF1,CHECK
+RUN: llvm-profdata show -all-functions -value-cutoff=1000 %p/Inputs/cutoff.proftext | FileCheck %s -check-prefixes=CUTOFF1000FUNC,CUTOFF1000,CHECK
+RUN: llvm-profdata show -value-cutoff=1 -list-below-cutoff %p/Inputs/cutoff.proftext | FileCheck %s -check-prefixes=BELOW1,CUTOFF1,CHECK
+RUN: llvm-profdata show -value-cutoff=1000 -list-below-cutoff %p/Inputs/cutoff.proftext | FileCheck %s -check-prefixes=BELOW1000,CUTOFF1000,CHECK
 CUTOFF1FUNC-NOT: bar
 CUTOFF1FUNC: Functions shown: 2
 CUTOFF1000FUNC-NOT: bar
diff --git a/llvm/test/tools/llvm-profdata/hash-mismatch.proftext b/llvm/test/tools/llvm-profdata/hash-mismatch.proftext
index fe0d4fb4f6b5..24908e288d6b 100644
--- a/llvm/test/tools/llvm-profdata/hash-mismatch.proftext
+++ b/llvm/test/tools/llvm-profdata/hash-mismatch.proftext
@@ -6,8 +6,8 @@
 
 # The function ordering is non-deterministic, so we need to do our
 # checks in multiple runs.
-# RUN: FileCheck -check-prefix=FOO3 -check-prefix=BOTH %s -input-file %t.out
-# RUN: FileCheck -check-prefix=FOO4 -check-prefix=BOTH %s -input-file %t.out
+# RUN: FileCheck -check-prefixes=FOO3,BOTH %s -input-file %t.out
+# RUN: FileCheck -check-prefixes=FOO4,BOTH %s -input-file %t.out
 
 # FOO3: Hash: 0x{{0+}}3
 # FOO3-NEXT: Counters: 3
diff --git a/llvm/test/tools/llvm-profdata/multiple-inputs.test b/llvm/test/tools/llvm-profdata/multiple-inputs.test
index 399438a4d2df..3a3caeac0c2e 100644
--- a/llvm/test/tools/llvm-profdata/multiple-inputs.test
+++ b/llvm/test/tools/llvm-profdata/multiple-inputs.test
@@ -23,8 +23,8 @@ FOO3EMPTY: Maximum function count: 1
 FOO3EMPTY: Maximum internal block count: 3
 
 RUN: llvm-profdata merge %p/Inputs/foo3-1.proftext %p/Inputs/foo3bar3-1.proftext -o %t
-RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO3FOO3BAR3 --check-prefix=FOO3FOO3BAR3-1
-RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO3FOO3BAR3 --check-prefix=FOO3FOO3BAR3-2
+RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefixes=FOO3FOO3BAR3,FOO3FOO3BAR3-1
+RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefixes=FOO3FOO3BAR3,FOO3FOO3BAR3-2
 FOO3FOO3BAR3-1: foo:
 FOO3FOO3BAR3-1: Counters: 3
 FOO3FOO3BAR3-1: Function count: 3
@@ -38,8 +38,8 @@ FOO3FOO3BAR3: Maximum function count: 7
 FOO3FOO3BAR3: Maximum internal block count: 13
 
 RUN: llvm-profdata merge %p/Inputs/foo3-1.proftext %p/Inputs/bar3-1.proftext -o %t
-RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=DISJOINT --check-prefix=DISJOINT-1
-RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=DISJOINT --check-prefix=DISJOINT-2
+RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefixes=DISJOINT,DISJOINT-1
+RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefixes=DISJOINT,DISJOINT-2
 DISJOINT-1: foo:
 DISJOINT-1: Counters: 3
 DISJOINT-1: Function count: 1
diff --git a/llvm/test/tools/llvm-profdata/overlap.test b/llvm/test/tools/llvm-profdata/overlap.test
index a706a43550d0..91379d5722e6 100644
--- a/llvm/test/tools/llvm-profdata/overlap.test
+++ b/llvm/test/tools/llvm-profdata/overlap.test
@@ -1,6 +1,6 @@
 RUN: llvm-profdata overlap %p/Inputs/overlap_1.proftext %p/Inputs/overlap_2.proftext | FileCheck %s -check-prefix=OVERLAP
-RUN: llvm-profdata overlap -function=main %p/Inputs/overlap_1.proftext %p/Inputs/overlap_2.proftext | FileCheck %s -check-prefix=MAINFUNC -check-prefix=OVERLAP
-RUN: llvm-profdata overlap -value-cutoff=15000 %p/Inputs/overlap_1.proftext %p/Inputs/overlap_2.proftext | FileCheck %s -check-prefix=MAINFUNC -check-prefix=OVERLAP
+RUN: llvm-profdata overlap -function=main %p/Inputs/overlap_1.proftext %p/Inputs/overlap_2.proftext | FileCheck %s --check-prefixes=MAINFUNC,OVERLAP
+RUN: llvm-profdata overlap -value-cutoff=15000 %p/Inputs/overlap_1.proftext %p/Inputs/overlap_2.proftext | FileCheck %s --check-prefixes=MAINFUNC,OVERLAP
 RUN: llvm-profdata merge %p/Inputs/overlap_1.proftext -o %t_1.profdata
 RUN: llvm-profdata merge %p/Inputs/overlap_2.proftext -o %t_2.profdata
 RUN: llvm-profdata overlap %t_1.profdata %t_2.profdata | FileCheck %s -check-prefix=OVERLAP
diff --git a/llvm/test/tools/llvm-profdata/value-prof.proftext b/llvm/test/tools/llvm-profdata/value-prof.proftext
index 0d388215792f..ec09e40e496d 100644
--- a/llvm/test/tools/llvm-profdata/value-prof.proftext
+++ b/llvm/test/tools/llvm-profdata/value-prof.proftext
@@ -1,7 +1,7 @@
-# RUN: llvm-profdata show -ic-targets  -all-functions %s | FileCheck %s --check-prefix=ICTXT --check-prefix=ICSUM
+# RUN: llvm-profdata show -ic-targets  -all-functions %s | FileCheck %s --check-prefixes=ICTXT,ICSUM
 # RUN: llvm-profdata show -ic-targets -counts -text -all-functions %s | FileCheck %s --check-prefix=ICTEXT
 # RUN: llvm-profdata merge -o %t.profdata  %s
-# RUN: llvm-profdata show -ic-targets  -all-functions %t.profdata | FileCheck %s --check-prefix=IC --check-prefix=ICSUM
+# RUN: llvm-profdata show -ic-targets  -all-functions %t.profdata | FileCheck %s --check-prefixes=IC,ICSUM
 
 foo
 # Func Hash:
diff --git a/llvm/test/tools/llvm-profdata/weight-instr.test b/llvm/test/tools/llvm-profdata/weight-instr.test
index 927a89647b45..83ea1ca6f6fa 100644
--- a/llvm/test/tools/llvm-profdata/weight-instr.test
+++ b/llvm/test/tools/llvm-profdata/weight-instr.test
@@ -3,16 +3,16 @@ Tests for weighted merge of instrumented profiles.
 1- Merge the foo and bar profiles with unity weight and verify the combined output
 RUN: llvm-profdata merge -instr -weighted-input=1,%p/Inputs/weight-instr-bar.profdata -weighted-input=1,%p/Inputs/weight-instr-foo.profdata -o %t
 RUN: llvm-profdata show -instr -all-functions %t > %t.out1
-RUN: FileCheck %s -check-prefix=1X_1X_WEIGHT --check-prefix=1X_1X_WEIGHT-1 < %t.out1
-RUN: FileCheck %s -check-prefix=1X_1X_WEIGHT --check-prefix=1X_1X_WEIGHT-2 < %t.out1
-RUN: FileCheck %s -check-prefix=1X_1X_WEIGHT --check-prefix=1X_1X_WEIGHT-3 < %t.out1
-RUN: FileCheck %s -check-prefix=1X_1X_WEIGHT --check-prefix=1X_1X_WEIGHT-4 < %t.out1
+RUN: FileCheck %s -check-prefixes=1X_1X_WEIGHT,1X_1X_WEIGHT-1 < %t.out1
+RUN: FileCheck %s -check-prefixes=1X_1X_WEIGHT,1X_1X_WEIGHT-2 < %t.out1
+RUN: FileCheck %s -check-prefixes=1X_1X_WEIGHT,1X_1X_WEIGHT-3 < %t.out1
+RUN: FileCheck %s -check-prefixes=1X_1X_WEIGHT,1X_1X_WEIGHT-4 < %t.out1
 RUN: llvm-profdata merge -instr -weighted-input=1,%p/Inputs/weight-instr-bar.profdata %p/Inputs/weight-instr-foo.profdata -o %t
 RUN: llvm-profdata show -instr -all-functions %t > %t.out2
-RUN: FileCheck %s -check-prefix=1X_1X_WEIGHT --check-prefix=1X_1X_WEIGHT-1 < %t.out2
-RUN: FileCheck %s -check-prefix=1X_1X_WEIGHT --check-prefix=1X_1X_WEIGHT-2 < %t.out2
-RUN: FileCheck %s -check-prefix=1X_1X_WEIGHT --check-prefix=1X_1X_WEIGHT-3 < %t.out2
-RUN: FileCheck %s -check-prefix=1X_1X_WEIGHT --check-prefix=1X_1X_WEIGHT-4 < %t.out2
+RUN: FileCheck %s -check-prefixes=1X_1X_WEIGHT,1X_1X_WEIGHT-1 < %t.out2
+RUN: FileCheck %s -check-prefixes=1X_1X_WEIGHT,1X_1X_WEIGHT-2 < %t.out2
+RUN: FileCheck %s -check-prefixes=1X_1X_WEIGHT,1X_1X_WEIGHT-3 < %t.out2
+RUN: FileCheck %s -check-prefixes=1X_1X_WEIGHT,1X_1X_WEIGHT-4 < %t.out2
 1X_1X_WEIGHT: Counters:
 1X_1X_WEIGHT-1:   usage:
 1X_1X_WEIGHT-1:     Hash: 0x0000000000000000
@@ -38,10 +38,10 @@ RUN: FileCheck %s -check-prefix=1X_1X_WEIGHT --check-prefix=1X_1X_WEIGHT-4 < %t.
 2- Merge the foo and bar profiles with weight 3x and 5x respectively and verify the combined output
 RUN: llvm-profdata merge -instr -weighted-input=3,%p/Inputs/weight-instr-bar.profdata -weighted-input=5,%p/Inputs/weight-instr-foo.profdata -o %t
 RUN: llvm-profdata show -instr -all-functions %t > %t.out3
-RUN: FileCheck %s -check-prefix=3X_5X_WEIGHT --check-prefix=3X_5X_WEIGHT-1 < %t.out3
-RUN: FileCheck %s -check-prefix=3X_5X_WEIGHT --check-prefix=3X_5X_WEIGHT-2 < %t.out3
-RUN: FileCheck %s -check-prefix=3X_5X_WEIGHT --check-prefix=3X_5X_WEIGHT-3 < %t.out3
-RUN: FileCheck %s -check-prefix=3X_5X_WEIGHT --check-prefix=3X_5X_WEIGHT-4 < %t.out3
+RUN: FileCheck %s -check-prefixes=3X_5X_WEIGHT,3X_5X_WEIGHT-1 < %t.out3
+RUN: FileCheck %s -check-prefixes=3X_5X_WEIGHT,3X_5X_WEIGHT-2 < %t.out3
+RUN: FileCheck %s -check-prefixes=3X_5X_WEIGHT,3X_5X_WEIGHT-3 < %t.out3
+RUN: FileCheck %s -check-prefixes=3X_5X_WEIGHT,3X_5X_WEIGHT-4 < %t.out3
 3X_5X_WEIGHT: Counters:
 3X_5X_WEIGHT-1:   usage:
 3X_5X_WEIGHT-1:     Hash: 0x0000000000000000
diff --git a/llvm/test/tools/llvm-readobj/COFF/exports.test b/llvm/test/tools/llvm-readobj/COFF/exports.test
index c0c977d9d4bb..48ff57289eea 100644
--- a/llvm/test/tools/llvm-readobj/COFF/exports.test
+++ b/llvm/test/tools/llvm-readobj/COFF/exports.test
@@ -1,6 +1,6 @@
-RUN: llvm-readobj --coff-exports %p/Inputs/export-x86.dll | FileCheck %s -check-prefix CHECK -check-prefix CHECK-X86
-RUN: llvm-readobj --coff-exports %p/Inputs/export-x64.dll | FileCheck %s -check-prefix CHECK -check-prefix CHECK-X64
-RUN: llvm-readobj --coff-exports %p/Inputs/export-arm.dll | FileCheck %s -check-prefix CHECK -check-prefix CHECK-ARM
+RUN: llvm-readobj --coff-exports %p/Inputs/export-x86.dll | FileCheck %s -check-prefixes=CHECK,CHECK-X86
+RUN: llvm-readobj --coff-exports %p/Inputs/export-x64.dll | FileCheck %s -check-prefixes=CHECK,CHECK-X64
+RUN: llvm-readobj --coff-exports %p/Inputs/export-arm.dll | FileCheck %s -check-prefixes=CHECK,CHECK-ARM
 
 CHECK: Export {
 CHECK:   Ordinal: 1
diff --git a/llvm/test/tools/llvm-symbolizer/split-dwarf.test b/llvm/test/tools/llvm-symbolizer/split-dwarf.test
index 614db9662801..af758acd7e09 100644
--- a/llvm/test/tools/llvm-symbolizer/split-dwarf.test
+++ b/llvm/test/tools/llvm-symbolizer/split-dwarf.test
@@ -5,14 +5,14 @@ RUN: cp %p/Inputs/split-dwarf-test.dwo %t
 
 RUN: cd %t
 RUN: llvm-symbolizer --functions=linkage --inlining --demangle=false \
-RUN:    --obj=%p/Inputs/split-dwarf-test 0x400504 0x4004f4 | FileCheck --check-prefix=SPLIT --check-prefix=DWO %s
+RUN:    --obj=%p/Inputs/split-dwarf-test 0x400504 0x4004f4 | FileCheck --check-prefixes=SPLIT,DWO %s
 
 Ensure we get the same results in the absence of gmlt-like data in the executable but the presence of a .dwo file
 
 RUN: echo "%p/Inputs/split-dwarf-test-nogmlt 0x400504" >> %t.input
 RUN: echo "%p/Inputs/split-dwarf-test-nogmlt 0x4004f4" >> %t.input
 RUN: llvm-symbolizer --functions=linkage --inlining --demangle=false \
-RUN:    --default-arch=i386 --obj=%p/Inputs/split-dwarf-test-nogmlt 0x400504 0x4004f4 | FileCheck --check-prefix=SPLIT --check-prefix=DWO %s
+RUN:    --default-arch=i386 --obj=%p/Inputs/split-dwarf-test-nogmlt 0x400504 0x4004f4 | FileCheck --check-prefixes=SPLIT,DWO %s
 
 Ensure we get gmlt like results in the absence of a .dwo file but the presence of gmlt-like data in the executable
 
@@ -20,7 +20,7 @@ RUN: rm %t/split-dwarf-test.dwo
 RUN: echo "%p/Inputs/split-dwarf-test 0x400504" >> %t.input
 RUN: echo "%p/Inputs/split-dwarf-test 0x4004f4" >> %t.input
 RUN: llvm-symbolizer --functions=linkage --inlining --demangle=false \
-RUN:    --default-arch=i386 --obj=%p/Inputs/split-dwarf-test 0x400504 0x4004f4 | FileCheck --check-prefix=SPLIT --check-prefix=NODWO %s
+RUN:    --default-arch=i386 --obj=%p/Inputs/split-dwarf-test 0x400504 0x4004f4 | FileCheck --check-prefixes=SPLIT,NODWO %s
 
 DWO: _Z2f2v
 NODWO: {{^f2$}}

From 7f8571e57971bd0b3ba85419766397342b4ffb01 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Thu, 16 Apr 2020 18:10:59 +0100
Subject: [PATCH 144/216] MCSectionXCOFF.h - remove unnecessary Twine.h
 include. NFC

---
 llvm/include/llvm/MC/MCSectionXCOFF.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/include/llvm/MC/MCSectionXCOFF.h b/llvm/include/llvm/MC/MCSectionXCOFF.h
index 1ffb1444b564..a9ef7790bc4e 100644
--- a/llvm/include/llvm/MC/MCSectionXCOFF.h
+++ b/llvm/include/llvm/MC/MCSectionXCOFF.h
@@ -13,7 +13,6 @@
 #ifndef LLVM_MC_MCSECTIONXCOFF_H
 #define LLVM_MC_MCSECTIONXCOFF_H
 
-#include "llvm/ADT/Twine.h"
 #include "llvm/BinaryFormat/XCOFF.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCSymbolXCOFF.h"

From a0ae3d55ae278a47b2ac7eb4d4aff816d0901bd0 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Thu, 16 Apr 2020 18:13:23 +0100
Subject: [PATCH 145/216] MCWasmStreamer.h.h - cleanup includes and forward
 declarations. NFC. Remove unnecessary SmallPtrSet.h/SectionKind.h includes
 Remove unused MCAssembler/raw_ostream forward declarations

---
 llvm/include/llvm/MC/MCWasmStreamer.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/llvm/include/llvm/MC/MCWasmStreamer.h b/llvm/include/llvm/MC/MCWasmStreamer.h
index 84531498388e..c46f1cd321a5 100644
--- a/llvm/include/llvm/MC/MCWasmStreamer.h
+++ b/llvm/include/llvm/MC/MCWasmStreamer.h
@@ -11,18 +11,14 @@
 
 #include "MCAsmBackend.h"
 #include "MCCodeEmitter.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/MC/MCDirectives.h"
 #include "llvm/MC/MCObjectStreamer.h"
 #include "llvm/MC/MCObjectWriter.h"
-#include "llvm/MC/SectionKind.h"
 #include "llvm/Support/DataTypes.h"
 
 namespace llvm {
-class MCAssembler;
 class MCExpr;
 class MCInst;
-class raw_ostream;
 
 class MCWasmStreamer : public MCObjectStreamer {
 public:

From 711cdd474f72c51dee3356ae227dd2884fe41983 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Thu, 16 Apr 2020 18:21:35 +0100
Subject: [PATCH 146/216] MCStreamer.h - remove unused
 llvm::MCCodePaddingContext forward declaration. NFC.

---
 llvm/include/llvm/MC/MCStreamer.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h
index 4a34e1497ccf..78ad95c1de26 100644
--- a/llvm/include/llvm/MC/MCStreamer.h
+++ b/llvm/include/llvm/MC/MCStreamer.h
@@ -40,7 +40,6 @@ class AssemblerConstantPools;
 class formatted_raw_ostream;
 class MCAsmBackend;
 class MCCodeEmitter;
-struct MCCodePaddingContext;
 class MCContext;
 struct MCDwarfFrameInfo;
 class MCExpr;

From 29bfcbe832799db96794b2871ad3ab7576e3764d Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Thu, 16 Apr 2020 18:25:44 +0100
Subject: [PATCH 147/216] ConstantPools.h - remove unused DenseMap.h include.
 NFC.

---
 llvm/include/llvm/MC/ConstantPools.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/include/llvm/MC/ConstantPools.h b/llvm/include/llvm/MC/ConstantPools.h
index 2fe5ce252c94..9fe0cce8d68c 100644
--- a/llvm/include/llvm/MC/ConstantPools.h
+++ b/llvm/include/llvm/MC/ConstantPools.h
@@ -13,7 +13,6 @@
 #ifndef LLVM_MC_CONSTANTPOOLS_H
 #define LLVM_MC_CONSTANTPOOLS_H
 
-#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Support/SMLoc.h"

From bcd7f77713f3df09965e33ff2b7fdd603915d40a Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Thu, 16 Apr 2020 19:31:45 +0100
Subject: [PATCH 148/216] MCObjectWriter.h - remove
 Endian.h/EndianStream.h/raw_ostream.h includes. NFC

Push these includes down to the the writers that actually need them, a number of which were implicitly relying on the MCObjectWriter.h.
---
 llvm/include/llvm/MC/MCMachObjectWriter.h                  | 1 +
 llvm/include/llvm/MC/MCObjectWriter.h                      | 3 ---
 llvm/lib/MC/ELFObjectWriter.cpp                            | 2 +-
 llvm/lib/MC/MCAssembler.cpp                                | 1 +
 llvm/lib/MC/WasmObjectWriter.cpp                           | 1 +
 llvm/lib/MC/WinCOFFObjectWriter.cpp                        | 2 +-
 llvm/lib/MC/XCOFFObjectWriter.cpp                          | 1 +
 llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp | 1 +
 llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp   | 1 +
 llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp | 1 +
 llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp     | 1 +
 11 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/llvm/include/llvm/MC/MCMachObjectWriter.h b/llvm/include/llvm/MC/MCMachObjectWriter.h
index 853e5066f039..bff8808cf4ff 100644
--- a/llvm/include/llvm/MC/MCMachObjectWriter.h
+++ b/llvm/include/llvm/MC/MCMachObjectWriter.h
@@ -16,6 +16,7 @@
 #include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/StringTableBuilder.h"
+#include "llvm/Support/EndianStream.h"
 #include <cstdint>
 #include <memory>
 #include <string>
diff --git a/llvm/include/llvm/MC/MCObjectWriter.h b/llvm/include/llvm/MC/MCObjectWriter.h
index 0d742019321c..ddc2301c04c1 100644
--- a/llvm/include/llvm/MC/MCObjectWriter.h
+++ b/llvm/include/llvm/MC/MCObjectWriter.h
@@ -10,9 +10,6 @@
 #define LLVM_MC_MCOBJECTWRITER_H
 
 #include "llvm/ADT/Triple.h"
-#include "llvm/Support/Endian.h"
-#include "llvm/Support/EndianStream.h"
-#include "llvm/Support/raw_ostream.h"
 #include <cstdint>
 
 namespace llvm {
diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp
index 12f66baab2b4..1ca9d0fe1e18 100644
--- a/llvm/lib/MC/ELFObjectWriter.cpp
+++ b/llvm/lib/MC/ELFObjectWriter.cpp
@@ -40,7 +40,7 @@
 #include "llvm/Support/Allocator.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Compression.h"
-#include "llvm/Support/Endian.h"
+#include "llvm/Support/EndianStream.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/Host.h"
diff --git a/llvm/lib/MC/MCAssembler.cpp b/llvm/lib/MC/MCAssembler.cpp
index 8949a4d9ba87..b36f45c37283 100644
--- a/llvm/lib/MC/MCAssembler.cpp
+++ b/llvm/lib/MC/MCAssembler.cpp
@@ -33,6 +33,7 @@
 #include "llvm/Support/Alignment.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/EndianStream.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/LEB128.h"
 #include "llvm/Support/MathExtras.h"
diff --git a/llvm/lib/MC/WasmObjectWriter.cpp b/llvm/lib/MC/WasmObjectWriter.cpp
index 4d0c71649e87..d51d3e1b5f71 100644
--- a/llvm/lib/MC/WasmObjectWriter.cpp
+++ b/llvm/lib/MC/WasmObjectWriter.cpp
@@ -27,6 +27,7 @@
 #include "llvm/MC/MCWasmObjectWriter.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/EndianStream.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/LEB128.h"
 #include "llvm/Support/StringSaver.h"
diff --git a/llvm/lib/MC/WinCOFFObjectWriter.cpp b/llvm/lib/MC/WinCOFFObjectWriter.cpp
index 7adb9e1a751d..c6829f5e107a 100644
--- a/llvm/lib/MC/WinCOFFObjectWriter.cpp
+++ b/llvm/lib/MC/WinCOFFObjectWriter.cpp
@@ -34,7 +34,7 @@
 #include "llvm/MC/StringTableBuilder.h"
 #include "llvm/Support/CRC.h"
 #include "llvm/Support/Casting.h"
-#include "llvm/Support/Endian.h"
+#include "llvm/Support/EndianStream.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/LEB128.h"
 #include "llvm/Support/MathExtras.h"
diff --git a/llvm/lib/MC/XCOFFObjectWriter.cpp b/llvm/lib/MC/XCOFFObjectWriter.cpp
index 5def2357d612..aa3a6424a7a7 100644
--- a/llvm/lib/MC/XCOFFObjectWriter.cpp
+++ b/llvm/lib/MC/XCOFFObjectWriter.cpp
@@ -22,6 +22,7 @@
 #include "llvm/MC/MCValue.h"
 #include "llvm/MC/MCXCOFFObjectWriter.h"
 #include "llvm/MC/StringTableBuilder.h"
+#include "llvm/Support/EndianStream.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/MathExtras.h"
 
diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
index 47d115a867a0..4e9bcb503bb5 100644
--- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
+++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp
@@ -24,6 +24,7 @@
 #include "llvm/MC/MCSectionMachO.h"
 #include "llvm/MC/MCTargetOptions.h"
 #include "llvm/MC/MCValue.h"
+#include "llvm/Support/EndianStream.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/TargetRegistry.h"
 using namespace llvm;
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
index 1f94ab799122..07ab99ef94ed 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp
@@ -17,6 +17,7 @@
 #include "llvm/MC/MCFixupKindInfo.h"
 #include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCValue.h"
+#include "llvm/Support/EndianStream.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "Utils/AMDGPUBaseInfo.h"
 
diff --git a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
index ef7db3933e36..bc537bcc4c47 100644
--- a/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
+++ b/llvm/lib/Target/Hexagon/MCTargetDesc/HexagonAsmBackend.cpp
@@ -22,6 +22,7 @@
 #include "llvm/MC/MCInstrInfo.h"
 #include "llvm/MC/MCObjectWriter.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/EndianStream.h"
 #include "llvm/Support/TargetRegistry.h"
 
 #include <sstream>
diff --git a/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp b/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
index 2e8fa0dbaf4c..76ff446e8928 100644
--- a/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
+++ b/llvm/lib/Target/Sparc/MCTargetDesc/SparcAsmBackend.cpp
@@ -15,6 +15,7 @@
 #include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCSubtargetInfo.h"
 #include "llvm/MC/MCValue.h"
+#include "llvm/Support/EndianStream.h"
 #include "llvm/Support/TargetRegistry.h"
 
 using namespace llvm;

From fa7f328a15536e9c7fa82608152632254277c787 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Fri, 17 Apr 2020 11:06:26 +0100
Subject: [PATCH 149/216] [cmake] LLVMVectorize - add
 include/llvm/Transforms/Vectorize header path

MSVC projects were missing the llvm/Transforms/Vectorize/* headers
---
 llvm/lib/Transforms/Vectorize/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Transforms/Vectorize/CMakeLists.txt b/llvm/lib/Transforms/Vectorize/CMakeLists.txt
index 19ad82f2914e..6f3071ea74a1 100644
--- a/llvm/lib/Transforms/Vectorize/CMakeLists.txt
+++ b/llvm/lib/Transforms/Vectorize/CMakeLists.txt
@@ -14,6 +14,7 @@ add_llvm_component_library(LLVMVectorize
 
   ADDITIONAL_HEADER_DIRS
   ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms
+  ${LLVM_MAIN_INCLUDE_DIR}/llvm/Transforms/Vectorize
 
   DEPENDS
   intrinsics_gen

From c819ef9653635b4a15c1390c28bfa4cdff614396 Mon Sep 17 00:00:00 2001
From: Fraser Cormack <fraser@codeplay.com>
Date: Tue, 31 Mar 2020 11:57:51 +0100
Subject: [PATCH 150/216] Provide operand indices to adjustSchedDependency

This allows targets to know exactly which operands are contributing to
the dependency, which is required for targets with per-operand
scheduling models.

Differential Revision: https://reviews.llvm.org/D77135
---
 .../llvm/CodeGen/TargetSubtargetInfo.h        |  8 +++++--
 llvm/lib/CodeGen/MachinePipeliner.cpp         |  2 +-
 llvm/lib/CodeGen/ScheduleDAGInstrs.cpp        |  6 ++---
 .../SelectionDAG/ScheduleDAGSDNodes.cpp       |  3 ++-
 llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp    | 24 +++++++++----------
 llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h      |  3 ++-
 llvm/lib/Target/Hexagon/HexagonSubtarget.cpp  |  3 ++-
 llvm/lib/Target/Hexagon/HexagonSubtarget.h    |  3 ++-
 8 files changed, 30 insertions(+), 22 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
index 6768cea89406..395c9e16efa8 100644
--- a/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
+++ b/llvm/include/llvm/CodeGen/TargetSubtargetInfo.h
@@ -224,9 +224,13 @@ class TargetSubtargetInfo : public MCSubtargetInfo {
   virtual void overrideSchedPolicy(MachineSchedPolicy &Policy,
                                    unsigned NumRegionInstrs) const {}
 
-  // Perform target specific adjustments to the latency of a schedule
+  // Perform target-specific adjustments to the latency of a schedule
   // dependency.
-  virtual void adjustSchedDependency(SUnit *def, SUnit *use, SDep &dep) const {}
+  // If a pair of operands is associated with the schedule dependency, DefOpIdx
+  // and UseOpIdx are the indices of the operands in Def and Use, respectively.
+  // Otherwise, either may be -1.
+  virtual void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use,
+                                     int UseOpIdx, SDep &Dep) const {}
 
   // For use with PostRAScheduling: get the anti-dependence breaking that should
   // be performed before post-RA scheduling.
diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp
index efb44054096c..3465aaada873 100644
--- a/llvm/lib/CodeGen/MachinePipeliner.cpp
+++ b/llvm/lib/CodeGen/MachinePipeliner.cpp
@@ -809,7 +809,7 @@ void SwingSchedulerDAG::updatePhiDependences() {
           if (!MI->isPHI()) {
             SDep Dep(SU, SDep::Data, Reg);
             Dep.setLatency(0);
-            ST.adjustSchedDependency(SU, &I, Dep);
+            ST.adjustSchedDependency(SU, 0, &I, MI->getOperandNo(MOI), Dep);
             I.addPred(Dep);
           } else {
             HasPhiUse = Reg;
diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index d11406cc330f..039de203fca7 100644
--- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -269,13 +269,13 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
       if (!ImplicitPseudoDef && !ImplicitPseudoUse) {
         Dep.setLatency(SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
                                                         RegUse, UseOp));
-        ST.adjustSchedDependency(SU, UseSU, Dep);
+        ST.adjustSchedDependency(SU, OperIdx, UseSU, UseOp, Dep);
       } else {
         Dep.setLatency(0);
         // FIXME: We could always let target to adjustSchedDependency(), and
         // remove this condition, but that currently asserts in Hexagon BE.
         if (SU->getInstr()->isBundle() || (RegUse && RegUse->isBundle()))
-          ST.adjustSchedDependency(SU, UseSU, Dep);
+          ST.adjustSchedDependency(SU, OperIdx, UseSU, UseOp, Dep);
       }
 
       UseSU->addPred(Dep);
@@ -444,7 +444,7 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
         SDep Dep(SU, SDep::Data, Reg);
         Dep.setLatency(SchedModel.computeOperandLatency(MI, OperIdx, Use,
                                                         I->OperandIndex));
-        ST.adjustSchedDependency(SU, UseSU, Dep);
+        ST.adjustSchedDependency(SU, OperIdx, UseSU, I->OperandIndex, Dep);
         UseSU->addPred(Dep);
       }
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index 2a7084edefa2..794f0236fe4c 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -474,6 +474,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
 
       for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
         SDNode *OpN = N->getOperand(i).getNode();
+        unsigned DefIdx = N->getOperand(i).getResNo();
         if (isPassiveNode(OpN)) continue;   // Not scheduled.
         SUnit *OpSU = &SUnits[OpN->getNodeId()];
         assert(OpSU && "Node has no SUnit!");
@@ -508,7 +509,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
         Dep.setLatency(OpLatency);
         if (!isChain && !UnitLatencies) {
           computeOperandLatency(OpN, N, i, Dep);
-          ST.adjustSchedDependency(OpSU, SU, Dep);
+          ST.adjustSchedDependency(OpSU, DefIdx, SU, i, Dep);
         }
 
         if (!SU->addPred(Dep) && !Dep.isCtrl() && OpSU->NumRegDefsLeft > 1) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 91c1bb4fb130..58fee94f5c6a 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -722,20 +722,20 @@ unsigned GCNSubtarget::getMaxNumVGPRs(const MachineFunction &MF) const {
   return MaxNumVGPRs;
 }
 
-void GCNSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst,
-                                         SDep &Dep) const {
+void GCNSubtarget::adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use,
+                                         int UseOpIdx, SDep &Dep) const {
   if (Dep.getKind() != SDep::Kind::Data || !Dep.getReg() ||
-      !Src->isInstr() || !Dst->isInstr())
+      !Def->isInstr() || !Use->isInstr())
     return;
 
-  MachineInstr *SrcI = Src->getInstr();
-  MachineInstr *DstI = Dst->getInstr();
+  MachineInstr *DefI = Def->getInstr();
+  MachineInstr *UseI = Use->getInstr();
 
-  if (SrcI->isBundle()) {
+  if (DefI->isBundle()) {
     const SIRegisterInfo *TRI = getRegisterInfo();
     auto Reg = Dep.getReg();
-    MachineBasicBlock::const_instr_iterator I(SrcI->getIterator());
-    MachineBasicBlock::const_instr_iterator E(SrcI->getParent()->instr_end());
+    MachineBasicBlock::const_instr_iterator I(DefI->getIterator());
+    MachineBasicBlock::const_instr_iterator E(DefI->getParent()->instr_end());
     unsigned Lat = 0;
     for (++I; I != E && I->isBundledWithPred(); ++I) {
       if (I->modifiesRegister(Reg, TRI))
@@ -744,12 +744,12 @@ void GCNSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst,
         --Lat;
     }
     Dep.setLatency(Lat);
-  } else if (DstI->isBundle()) {
+  } else if (UseI->isBundle()) {
     const SIRegisterInfo *TRI = getRegisterInfo();
     auto Reg = Dep.getReg();
-    MachineBasicBlock::const_instr_iterator I(DstI->getIterator());
-    MachineBasicBlock::const_instr_iterator E(DstI->getParent()->instr_end());
-    unsigned Lat = InstrInfo.getInstrLatency(getInstrItineraryData(), *SrcI);
+    MachineBasicBlock::const_instr_iterator I(UseI->getIterator());
+    MachineBasicBlock::const_instr_iterator E(UseI->getParent()->instr_end());
+    unsigned Lat = InstrInfo.getInstrLatency(getInstrItineraryData(), *DefI);
     for (++I; I != E && I->isBundledWithPred() && Lat; ++I) {
       if (I->readsRegister(Reg, TRI))
         break;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index cadb328cc498..68372822b799 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -1193,7 +1193,8 @@ class GCNSubtarget : public AMDGPUGenSubtargetInfo,
     return AMDGPU::IsaInfo::getMinWavesPerEU(this);
   }
 
-  void adjustSchedDependency(SUnit *Src, SUnit *Dst, SDep &Dep) const override;
+  void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
+                             SDep &Dep) const override;
 };
 
 class R600Subtarget final : public R600GenSubtargetInfo,
diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
index 7bcc9560657f..4dcda3b10f4d 100644
--- a/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.cpp
@@ -315,7 +315,8 @@ bool HexagonSubtarget::useAA() const {
 
 /// Perform target specific adjustments to the latency of a schedule
 /// dependency.
-void HexagonSubtarget::adjustSchedDependency(SUnit *Src, SUnit *Dst,
+void HexagonSubtarget::adjustSchedDependency(SUnit *Src, int SrcOpIdx,
+                                             SUnit *Dst, int DstOpIdx,
                                              SDep &Dep) const {
   MachineInstr *SrcInst = Src->getInstr();
   MachineInstr *DstInst = Dst->getInstr();
diff --git a/llvm/lib/Target/Hexagon/HexagonSubtarget.h b/llvm/lib/Target/Hexagon/HexagonSubtarget.h
index c9f04651cf70..de4f245519e4 100644
--- a/llvm/lib/Target/Hexagon/HexagonSubtarget.h
+++ b/llvm/lib/Target/Hexagon/HexagonSubtarget.h
@@ -258,7 +258,8 @@ class HexagonSubtarget : public HexagonGenSubtargetInfo {
 
   /// Perform target specific adjustments to the latency of a schedule
   /// dependency.
-  void adjustSchedDependency(SUnit *def, SUnit *use, SDep& dep) const override;
+  void adjustSchedDependency(SUnit *Def, int DefOpIdx, SUnit *Use, int UseOpIdx,
+                             SDep &Dep) const override;
 
   unsigned getVectorLength() const {
     assert(useHVXOps());

From 72c13446ce81050ced80184ec2db9684f081b8c3 Mon Sep 17 00:00:00 2001
From: Max Kazantsev <mkazantsev@azul.com>
Date: Fri, 17 Apr 2020 17:18:01 +0700
Subject: [PATCH 151/216] [NFC] Add missing 'const' notion to LCSSA-related
 functions

These functions don't really do any changes to loop info or
dominator tree. We should state this explicitly using 'const'.
---
 llvm/include/llvm/Analysis/LoopInfo.h          |  5 +++--
 llvm/include/llvm/Transforms/Utils/LoopUtils.h |  7 ++++---
 llvm/lib/Analysis/LoopInfo.cpp                 |  7 ++++---
 llvm/lib/Transforms/Utils/LCSSA.cpp            | 14 +++++++-------
 4 files changed, 18 insertions(+), 15 deletions(-)

diff --git a/llvm/include/llvm/Analysis/LoopInfo.h b/llvm/include/llvm/Analysis/LoopInfo.h
index 6228505db6be..049802024985 100644
--- a/llvm/include/llvm/Analysis/LoopInfo.h
+++ b/llvm/include/llvm/Analysis/LoopInfo.h
@@ -780,10 +780,11 @@ class Loop : public LoopBase<BasicBlock, Loop> {
   bool isCanonical(ScalarEvolution &SE) const;
 
   /// Return true if the Loop is in LCSSA form.
-  bool isLCSSAForm(DominatorTree &DT) const;
+  bool isLCSSAForm(const DominatorTree &DT) const;
 
   /// Return true if this Loop and all inner subloops are in LCSSA form.
-  bool isRecursivelyLCSSAForm(DominatorTree &DT, const LoopInfo &LI) const;
+  bool isRecursivelyLCSSAForm(const DominatorTree &DT,
+                              const LoopInfo &LI) const;
 
   /// Return true if the Loop is in the form that the LoopSimplify form
   /// transforms loops to, which is sometimes called normal form.
diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
index 018f5e217922..54fc3a7ca1fd 100644
--- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@@ -77,7 +77,7 @@ bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
 ///
 /// Returns true if any modifications are made.
 bool formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
-                              DominatorTree &DT, LoopInfo &LI,
+                              const DominatorTree &DT, const LoopInfo &LI,
                               ScalarEvolution *SE);
 
 /// Put loop into LCSSA form.
@@ -92,7 +92,8 @@ bool formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
 /// If ScalarEvolution is passed in, it will be preserved.
 ///
 /// Returns true if any modifications are made to the loop.
-bool formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution *SE);
+bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI,
+               ScalarEvolution *SE);
 
 /// Put a loop nest into LCSSA form.
 ///
@@ -103,7 +104,7 @@ bool formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI, ScalarEvolution *SE);
 /// If ScalarEvolution is passed in, it will be preserved.
 ///
 /// Returns true if any modifications are made to the loop.
-bool formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI,
+bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI,
                           ScalarEvolution *SE);
 
 struct SinkAndHoistLICMFlags {
diff --git a/llvm/lib/Analysis/LoopInfo.cpp b/llvm/lib/Analysis/LoopInfo.cpp
index 6c8f65ca7be6..b5af210f1b92 100644
--- a/llvm/lib/Analysis/LoopInfo.cpp
+++ b/llvm/lib/Analysis/LoopInfo.cpp
@@ -420,7 +420,7 @@ bool Loop::isCanonical(ScalarEvolution &SE) const {
 
 // Check that 'BB' doesn't have any uses outside of the 'L'
 static bool isBlockInLCSSAForm(const Loop &L, const BasicBlock &BB,
-                               DominatorTree &DT) {
+                               const DominatorTree &DT) {
   for (const Instruction &I : BB) {
     // Tokens can't be used in PHI nodes and live-out tokens prevent loop
     // optimizations, so for the purposes of considered LCSSA form, we
@@ -446,14 +446,15 @@ static bool isBlockInLCSSAForm(const Loop &L, const BasicBlock &BB,
   return true;
 }
 
-bool Loop::isLCSSAForm(DominatorTree &DT) const {
+bool Loop::isLCSSAForm(const DominatorTree &DT) const {
   // For each block we check that it doesn't have any uses outside of this loop.
   return all_of(this->blocks(), [&](const BasicBlock *BB) {
     return isBlockInLCSSAForm(*this, *BB, DT);
   });
 }
 
-bool Loop::isRecursivelyLCSSAForm(DominatorTree &DT, const LoopInfo &LI) const {
+bool Loop::isRecursivelyLCSSAForm(const DominatorTree &DT,
+                                  const LoopInfo &LI) const {
   // For each block we check that it doesn't have any uses outside of its
   // innermost loop. This process will transitively guarantee that the current
   // loop and all of the nested loops are in LCSSA form.
diff --git a/llvm/lib/Transforms/Utils/LCSSA.cpp b/llvm/lib/Transforms/Utils/LCSSA.cpp
index 5746d69260d5..b1a1c564d217 100644
--- a/llvm/lib/Transforms/Utils/LCSSA.cpp
+++ b/llvm/lib/Transforms/Utils/LCSSA.cpp
@@ -76,7 +76,7 @@ static bool isExitBlock(BasicBlock *BB,
 /// that are outside the current loop.  If so, insert LCSSA PHI nodes and
 /// rewrite the uses.
 bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
-                                    DominatorTree &DT, LoopInfo &LI,
+                                    const DominatorTree &DT, const LoopInfo &LI,
                                     ScalarEvolution *SE) {
   SmallVector<Use *, 16> UsesToRewrite;
   SmallSetVector<PHINode *, 16> PHIsToRemove;
@@ -128,7 +128,7 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
     if (auto *Inv = dyn_cast<InvokeInst>(I))
       DomBB = Inv->getNormalDest();
 
-    DomTreeNode *DomNode = DT.getNode(DomBB);
+    const DomTreeNode *DomNode = DT.getNode(DomBB);
 
     SmallVector<PHINode *, 16> AddedPHIs;
     SmallVector<PHINode *, 8> PostProcessPHIs;
@@ -274,7 +274,7 @@ bool llvm::formLCSSAForInstructions(SmallVectorImpl<Instruction *> &Worklist,
 
 // Compute the set of BasicBlocks in the loop `L` dominating at least one exit.
 static void computeBlocksDominatingExits(
-    Loop &L, DominatorTree &DT, SmallVector<BasicBlock *, 8> &ExitBlocks,
+    Loop &L, const DominatorTree &DT, SmallVector<BasicBlock *, 8> &ExitBlocks,
     SmallSetVector<BasicBlock *, 8> &BlocksDominatingExits) {
   SmallVector<BasicBlock *, 8> BBWorklist;
 
@@ -318,7 +318,7 @@ static void computeBlocksDominatingExits(
   }
 }
 
-bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI,
+bool llvm::formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI,
                      ScalarEvolution *SE) {
   bool Changed = false;
 
@@ -383,8 +383,8 @@ bool llvm::formLCSSA(Loop &L, DominatorTree &DT, LoopInfo *LI,
 }
 
 /// Process a loop nest depth first.
-bool llvm::formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI,
-                                ScalarEvolution *SE) {
+bool llvm::formLCSSARecursively(Loop &L, const DominatorTree &DT,
+                                const LoopInfo *LI, ScalarEvolution *SE) {
   bool Changed = false;
 
   // Recurse depth-first through inner loops.
@@ -396,7 +396,7 @@ bool llvm::formLCSSARecursively(Loop &L, DominatorTree &DT, LoopInfo *LI,
 }
 
 /// Process all loops in the function, inner-most out.
-static bool formLCSSAOnAllLoops(LoopInfo *LI, DominatorTree &DT,
+static bool formLCSSAOnAllLoops(const LoopInfo *LI, const DominatorTree &DT,
                                 ScalarEvolution *SE) {
   bool Changed = false;
   for (auto &L : *LI)

From 858d8db47083d17eee5c6de5353348eaa886e5cd Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad@amd.com>
Date: Fri, 17 Apr 2020 11:49:38 +0100
Subject: [PATCH 152/216] AMDGPU/GlobalISel: Work around another selector crash

This does for G_EXTRACT_VECTOR_ELT what 588bd7be366 did for G_TRUNC.

Ideally types without a corresponding register class wouldn't reach
here, but we're currently missing some (in particular a 192-bit class
is missing).
---
 llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index f1bb8f3c5033..88d66e0963ad 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -2330,6 +2330,8 @@ bool AMDGPUInstructionSelector::selectG_EXTRACT_VECTOR_ELT(
                                                                   *MRI);
   const TargetRegisterClass *DstRC = TRI.getRegClassForTypeOnBank(DstTy, *DstRB,
                                                                   *MRI);
+  if (!SrcRC || !DstRC)
+    return false;
   if (!RBI.constrainGenericRegister(SrcReg, *SrcRC, *MRI) ||
       !RBI.constrainGenericRegister(DstReg, *DstRC, *MRI) ||
       !RBI.constrainGenericRegister(IdxReg, AMDGPU::SReg_32RegClass, *MRI))

From 2c16ab746eeae697cf9db4489fa27a7759081b9a Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Fri, 17 Apr 2020 11:49:13 +0100
Subject: [PATCH 153/216] Scalar.h - remove unused forward declarations. NFC.

---
 llvm/include/llvm/Transforms/Scalar.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h
index 1f2842836303..a1aacec76979 100644
--- a/llvm/include/llvm/Transforms/Scalar.h
+++ b/llvm/include/llvm/Transforms/Scalar.h
@@ -22,10 +22,6 @@ class Function;
 class FunctionPass;
 class ModulePass;
 class Pass;
-class GetElementPtrInst;
-class PassInfo;
-class TargetLowering;
-class TargetMachine;
 
 //===----------------------------------------------------------------------===//
 //

From 30725c2b35d4d5ae54366e1151263e907d625873 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Fri, 17 Apr 2020 12:01:25 +0100
Subject: [PATCH 154/216] SSAUpdaterBulk.h - remove unnecessary SmallPtrSet.h
 include. NFC

---
 llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h b/llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h
index 5d17d6f3d285..3a78e22b7e94 100644
--- a/llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h
+++ b/llvm/include/llvm/Transforms/Utils/SSAUpdaterBulk.h
@@ -14,7 +14,6 @@
 #define LLVM_TRANSFORMS_UTILS_SSAUPDATERBULK_H
 
 #include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/PredIteratorCache.h"
 

From de94715b64ba048728cb1c4ba88a5ac51066d61b Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev@redking.me.uk>
Date: Fri, 17 Apr 2020 12:14:02 +0100
Subject: [PATCH 155/216] UnifyFunctionExitNodes.h - remove unnecessary
 PassRegistry.h include. NFC

---
 llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h b/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h
index f1789ed2a1b1..ff70446e163d 100644
--- a/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h
+++ b/llvm/include/llvm/Transforms/Utils/UnifyFunctionExitNodes.h
@@ -18,7 +18,6 @@
 #define LLVM_TRANSFORMS_UTILS_UNIFYFUNCTIONEXITNODES_H
 
 #include "llvm/Pass.h"
-#include "llvm/PassRegistry.h"
 
 namespace llvm {
 

From c82faea9fb58d6b02cec3de8118a265395024792 Mon Sep 17 00:00:00 2001
From: Shengchen Kan <shengchen.kan@intel.com>
Date: Fri, 17 Apr 2020 11:17:38 +0800
Subject: [PATCH 156/216] Recommit [X86][MC][NFC] Reduce the parameters of
 functions in X86MCCodeEmitter(Part II)

Previous patch didn't handle the early return in `emitREXPrefix` correctly,
which causes REX prefix was not emitted for instruction without
operands. This patch includes the fix for that.
---
 .../X86/MCTargetDesc/X86MCCodeEmitter.cpp     | 259 +++++++++---------
 1 file changed, 135 insertions(+), 124 deletions(-)

diff --git a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
index 0176212fff5d..0fd88466af44 100644
--- a/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
+++ b/llvm/lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp
@@ -76,13 +76,12 @@ class X86MCCodeEmitter : public MCCodeEmitter {
                    unsigned &CurByte, raw_ostream &OS) const;
 
   void emitMemModRMByte(const MCInst &MI, unsigned Op, unsigned RegOpcodeField,
-                        uint64_t TSFlags, bool Rex, unsigned &CurByte,
+                        uint64_t TSFlags, bool HasREX, unsigned &CurByte,
                         raw_ostream &OS, SmallVectorImpl<MCFixup> &Fixups,
                         const MCSubtargetInfo &STI) const;
 
-  void emitPrefixImpl(unsigned &CurOp, unsigned &CurByte, bool &Rex,
-                      const MCInst &MI, const MCSubtargetInfo &STI,
-                      raw_ostream &OS) const;
+  bool emitPrefixImpl(unsigned &CurOp, unsigned &CurByte, const MCInst &MI,
+                      const MCSubtargetInfo &STI, raw_ostream &OS) const;
 
   void emitVEXOpcodePrefix(unsigned &CurByte, int MemOperand, const MCInst &MI,
                            raw_ostream &OS) const;
@@ -93,7 +92,8 @@ class X86MCCodeEmitter : public MCCodeEmitter {
   bool emitOpcodePrefix(unsigned &CurByte, int MemOperand, const MCInst &MI,
                         const MCSubtargetInfo &STI, raw_ostream &OS) const;
 
-  uint8_t determineREXPrefix(const MCInst &MI, int MemOperand) const;
+  bool emitREXPrefix(unsigned &CurByte, int MemOperand, const MCInst &MI,
+                     raw_ostream &OS) const;
 };
 
 } // end anonymous namespace
@@ -384,7 +384,7 @@ void X86MCCodeEmitter::emitSIBByte(unsigned SS, unsigned Index, unsigned Base,
 
 void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
                                         unsigned RegOpcodeField,
-                                        uint64_t TSFlags, bool Rex,
+                                        uint64_t TSFlags, bool HasREX,
                                         unsigned &CurByte, raw_ostream &OS,
                                         SmallVectorImpl<MCFixup> &Fixups,
                                         const MCSubtargetInfo &STI) const {
@@ -412,7 +412,7 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
       default:
         return X86::reloc_riprel_4byte;
       case X86::MOV64rm:
-        assert(Rex);
+        assert(HasREX);
         return X86::reloc_riprel_4byte_movq_load;
       case X86::CALL64m:
       case X86::JMP64m:
@@ -426,8 +426,8 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
       case X86::SBB64rm:
       case X86::SUB64rm:
       case X86::XOR64rm:
-        return Rex ? X86::reloc_riprel_4byte_relax_rex
-                   : X86::reloc_riprel_4byte_relax;
+        return HasREX ? X86::reloc_riprel_4byte_relax_rex
+                      : X86::reloc_riprel_4byte_relax;
       }
     }();
 
@@ -649,8 +649,11 @@ void X86MCCodeEmitter::emitMemModRMByte(const MCInst &MI, unsigned Op,
                   CurByte, OS, Fixups);
 }
 
-void X86MCCodeEmitter::emitPrefixImpl(unsigned &CurOp, unsigned &CurByte,
-                                      bool &Rex, const MCInst &MI,
+/// Emit all instruction prefixes.
+///
+/// \returns true if REX prefix is used, otherwise returns false.
+bool X86MCCodeEmitter::emitPrefixImpl(unsigned &CurOp, unsigned &CurByte,
+                                      const MCInst &MI,
                                       const MCSubtargetInfo &STI,
                                       raw_ostream &OS) const {
   uint64_t TSFlags = MCII.get(MI.getOpcode()).TSFlags;
@@ -696,10 +699,11 @@ void X86MCCodeEmitter::emitPrefixImpl(unsigned &CurOp, unsigned &CurByte,
 
   // Encoding type for this instruction.
   uint64_t Encoding = TSFlags & X86II::EncodingMask;
-  if (Encoding == 0)
-    Rex = emitOpcodePrefix(CurByte, MemoryOperand, MI, STI, OS);
-  else
+  bool HasREX = false;
+  if (Encoding)
     emitVEXOpcodePrefix(CurByte, MemoryOperand, MI, OS);
+  else
+    HasREX = emitOpcodePrefix(CurByte, MemoryOperand, MI, STI, OS);
 
   uint64_t Form = TSFlags & X86II::FormMask;
   switch (Form) {
@@ -748,6 +752,8 @@ void X86MCCodeEmitter::emitPrefixImpl(unsigned &CurOp, unsigned &CurByte,
     break;
   }
   }
+
+  return HasREX;
 }
 
 /// AVX instructions are encoded using a opcode prefix called VEX.
@@ -1181,97 +1187,107 @@ void X86MCCodeEmitter::emitVEXOpcodePrefix(unsigned &CurByte, int MemOperand,
   }
 }
 
-/// Determine if the MCInst has to be encoded with a X86-64 REX prefix which
-/// specifies 1) 64-bit instructions, 2) non-default operand size, and 3) use
-/// of X86-64 extended registers.
-uint8_t X86MCCodeEmitter::determineREXPrefix(const MCInst &MI,
-                                             int MemOperand) const {
-  uint8_t REX = 0;
-  bool UsesHighByteReg = false;
-
-  const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
-  uint64_t TSFlags = Desc.TSFlags;
-
-  if (TSFlags & X86II::REX_W)
-    REX |= 1 << 3; // set REX.W
+/// Emit REX prefix which specifies
+///   1) 64-bit instructions,
+///   2) non-default operand size, and
+///   3) use of X86-64 extended registers.
+///
+/// \returns true if REX prefix is used, otherwise returns false.
+bool X86MCCodeEmitter::emitREXPrefix(unsigned &CurByte, int MemOperand,
+                                     const MCInst &MI, raw_ostream &OS) const {
+  uint8_t REX = [&, MemOperand]() {
+    uint8_t REX = 0;
+    bool UsesHighByteReg = false;
+
+    const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
+    uint64_t TSFlags = Desc.TSFlags;
+
+    if (TSFlags & X86II::REX_W)
+      REX |= 1 << 3; // set REX.W
+
+    if (MI.getNumOperands() == 0)
+      return REX;
+
+    unsigned NumOps = MI.getNumOperands();
+    unsigned CurOp = X86II::getOperandBias(Desc);
+
+    // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
+    for (unsigned i = CurOp; i != NumOps; ++i) {
+      const MCOperand &MO = MI.getOperand(i);
+      if (!MO.isReg())
+        continue;
+      unsigned Reg = MO.getReg();
+      if (Reg == X86::AH || Reg == X86::BH || Reg == X86::CH || Reg == X86::DH)
+        UsesHighByteReg = true;
+      if (X86II::isX86_64NonExtLowByteReg(Reg))
+        // FIXME: The caller of determineREXPrefix slaps this prefix onto
+        // anything that returns non-zero.
+        REX |= 0x40; // REX fixed encoding prefix
+    }
 
-  if (MI.getNumOperands() == 0)
+    switch (TSFlags & X86II::FormMask) {
+    case X86II::AddRegFrm:
+      REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B
+      break;
+    case X86II::MRMSrcReg:
+    case X86II::MRMSrcRegCC:
+      REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
+      REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B
+      break;
+    case X86II::MRMSrcMem:
+    case X86II::MRMSrcMemCC:
+      REX |= isREXExtendedReg(MI, CurOp++) << 2;                        // REX.R
+      REX |= isREXExtendedReg(MI, MemOperand + X86::AddrBaseReg) << 0;  // REX.B
+      REX |= isREXExtendedReg(MI, MemOperand + X86::AddrIndexReg) << 1; // REX.X
+      CurOp += X86::AddrNumOperands;
+      break;
+    case X86II::MRMDestReg:
+      REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B
+      REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
+      break;
+    case X86II::MRMDestMem:
+      REX |= isREXExtendedReg(MI, MemOperand + X86::AddrBaseReg) << 0;  // REX.B
+      REX |= isREXExtendedReg(MI, MemOperand + X86::AddrIndexReg) << 1; // REX.X
+      CurOp += X86::AddrNumOperands;
+      REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
+      break;
+    case X86II::MRMXmCC:
+    case X86II::MRMXm:
+    case X86II::MRM0m:
+    case X86II::MRM1m:
+    case X86II::MRM2m:
+    case X86II::MRM3m:
+    case X86II::MRM4m:
+    case X86II::MRM5m:
+    case X86II::MRM6m:
+    case X86II::MRM7m:
+      REX |= isREXExtendedReg(MI, MemOperand + X86::AddrBaseReg) << 0;  // REX.B
+      REX |= isREXExtendedReg(MI, MemOperand + X86::AddrIndexReg) << 1; // REX.X
+      break;
+    case X86II::MRMXrCC:
+    case X86II::MRMXr:
+    case X86II::MRM0r:
+    case X86II::MRM1r:
+    case X86II::MRM2r:
+    case X86II::MRM3r:
+    case X86II::MRM4r:
+    case X86II::MRM5r:
+    case X86II::MRM6r:
+    case X86II::MRM7r:
+      REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B
+      break;
+    }
+    if (REX && UsesHighByteReg)
+      report_fatal_error(
+          "Cannot encode high byte register in REX-prefixed instruction");
     return REX;
+  }();
 
-  unsigned NumOps = MI.getNumOperands();
-  unsigned CurOp = X86II::getOperandBias(Desc);
-
-  // If it accesses SPL, BPL, SIL, or DIL, then it requires a 0x40 REX prefix.
-  for (unsigned i = CurOp; i != NumOps; ++i) {
-    const MCOperand &MO = MI.getOperand(i);
-    if (!MO.isReg())
-      continue;
-    unsigned Reg = MO.getReg();
-    if (Reg == X86::AH || Reg == X86::BH || Reg == X86::CH || Reg == X86::DH)
-      UsesHighByteReg = true;
-    if (X86II::isX86_64NonExtLowByteReg(Reg))
-      // FIXME: The caller of determineREXPrefix slaps this prefix onto anything
-      // that returns non-zero.
-      REX |= 0x40; // REX fixed encoding prefix
-  }
-
-  switch (TSFlags & X86II::FormMask) {
-  case X86II::AddRegFrm:
-    REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B
-    break;
-  case X86II::MRMSrcReg:
-  case X86II::MRMSrcRegCC:
-    REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
-    REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B
-    break;
-  case X86II::MRMSrcMem:
-  case X86II::MRMSrcMemCC:
-    REX |= isREXExtendedReg(MI, CurOp++) << 2;                        // REX.R
-    REX |= isREXExtendedReg(MI, MemOperand + X86::AddrBaseReg) << 0;  // REX.B
-    REX |= isREXExtendedReg(MI, MemOperand + X86::AddrIndexReg) << 1; // REX.X
-    CurOp += X86::AddrNumOperands;
-    break;
-  case X86II::MRMDestReg:
-    REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B
-    REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
-    break;
-  case X86II::MRMDestMem:
-    REX |= isREXExtendedReg(MI, MemOperand + X86::AddrBaseReg) << 0;  // REX.B
-    REX |= isREXExtendedReg(MI, MemOperand + X86::AddrIndexReg) << 1; // REX.X
-    CurOp += X86::AddrNumOperands;
-    REX |= isREXExtendedReg(MI, CurOp++) << 2; // REX.R
-    break;
-  case X86II::MRMXmCC:
-  case X86II::MRMXm:
-  case X86II::MRM0m:
-  case X86II::MRM1m:
-  case X86II::MRM2m:
-  case X86II::MRM3m:
-  case X86II::MRM4m:
-  case X86II::MRM5m:
-  case X86II::MRM6m:
-  case X86II::MRM7m:
-    REX |= isREXExtendedReg(MI, MemOperand + X86::AddrBaseReg) << 0;  // REX.B
-    REX |= isREXExtendedReg(MI, MemOperand + X86::AddrIndexReg) << 1; // REX.X
-    break;
-  case X86II::MRMXrCC:
-  case X86II::MRMXr:
-  case X86II::MRM0r:
-  case X86II::MRM1r:
-  case X86II::MRM2r:
-  case X86II::MRM3r:
-  case X86II::MRM4r:
-  case X86II::MRM5r:
-  case X86II::MRM6r:
-  case X86II::MRM7r:
-    REX |= isREXExtendedReg(MI, CurOp++) << 0; // REX.B
-    break;
-  }
-  if (REX && UsesHighByteReg)
-    report_fatal_error(
-        "Cannot encode high byte register in REX-prefixed instruction");
+  if (!REX)
+    return false;
 
-  return REX;
+  emitByte(0x40 | REX, CurByte, OS);
+  return true;
 }
 
 /// Emit segment override opcode prefix as needed.
@@ -1289,7 +1305,7 @@ void X86MCCodeEmitter::emitSegmentOverridePrefix(unsigned &CurByte,
 /// \param MemOperand the operand # of the start of a memory operand if present.
 /// If not present, it is -1.
 ///
-/// \returns true if a REX prefix was used.
+/// \returns true if REX prefix is used, otherwise returns false.
 bool X86MCCodeEmitter::emitOpcodePrefix(unsigned &CurByte, int MemOperand,
                                         const MCInst &MI,
                                         const MCSubtargetInfo &STI,
@@ -1297,7 +1313,6 @@ bool X86MCCodeEmitter::emitOpcodePrefix(unsigned &CurByte, int MemOperand,
   const MCInstrDesc &Desc = MCII.get(MI.getOpcode());
   uint64_t TSFlags = Desc.TSFlags;
 
-  bool Ret = false;
   // Emit the operand size opcode prefix as needed.
   if ((TSFlags & X86II::OpSizeMask) ==
       (STI.hasFeature(X86::Mode16Bit) ? X86II::OpSize32 : X86II::OpSize16))
@@ -1324,15 +1339,11 @@ bool X86MCCodeEmitter::emitOpcodePrefix(unsigned &CurByte, int MemOperand,
   }
 
   // Handle REX prefix.
-  // FIXME: Can this come before F2 etc to simplify emission?
-  if (STI.hasFeature(X86::Mode64Bit)) {
-    if (uint8_t REX = determineREXPrefix(MI, MemOperand)) {
-      emitByte(0x40 | REX, CurByte, OS);
-      Ret = true;
-    }
-  } else {
-    assert(!(TSFlags & X86II::REX_W) && "REX.W requires 64bit mode.");
-  }
+  assert((STI.hasFeature(X86::Mode64Bit) || !(TSFlags & X86II::REX_W)) &&
+         "REX.W requires 64bit mode.");
+  bool HasREX = STI.hasFeature(X86::Mode64Bit)
+                    ? emitREXPrefix(CurByte, MemOperand, MI, OS)
+                    : false;
 
   // 0x0F escape code must be emitted just before the opcode.
   switch (TSFlags & X86II::OpMapMask) {
@@ -1352,7 +1363,8 @@ bool X86MCCodeEmitter::emitOpcodePrefix(unsigned &CurByte, int MemOperand,
     emitByte(0x3A, CurByte, OS);
     break;
   }
-  return Ret;
+
+  return HasREX;
 }
 
 void X86MCCodeEmitter::emitPrefix(const MCInst &MI, raw_ostream &OS,
@@ -1370,8 +1382,7 @@ void X86MCCodeEmitter::emitPrefix(const MCInst &MI, raw_ostream &OS,
   // Keep track of the current byte being emitted.
   unsigned CurByte = 0;
 
-  bool Rex = false;
-  emitPrefixImpl(CurOp, CurByte, Rex, MI, STI, OS);
+  emitPrefixImpl(CurOp, CurByte, MI, STI, OS);
 }
 
 void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
@@ -1391,8 +1402,7 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
   // Keep track of the current byte being emitted.
   unsigned CurByte = 0;
 
-  bool Rex = false;
-  emitPrefixImpl(CurOp, CurByte, Rex, MI, STI, OS);
+  bool HasREX = emitPrefixImpl(CurOp, CurByte, MI, STI, OS);
 
   // It uses the VEX.VVVV field?
   bool HasVEX_4V = TSFlags & X86II::VEX_4V;
@@ -1497,7 +1507,7 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
       ++SrcRegNum;
 
     emitMemModRMByte(MI, CurOp, getX86RegNum(MI.getOperand(SrcRegNum)), TSFlags,
-                     Rex, CurByte, OS, Fixups, STI);
+                     HasREX, CurByte, OS, Fixups, STI);
     CurOp = SrcRegNum + 1;
     break;
   }
@@ -1570,7 +1580,7 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
     emitByte(BaseOpcode, CurByte, OS);
 
     emitMemModRMByte(MI, FirstMemOp, getX86RegNum(MI.getOperand(CurOp)),
-                     TSFlags, Rex, CurByte, OS, Fixups, STI);
+                     TSFlags, HasREX, CurByte, OS, Fixups, STI);
     CurOp = FirstMemOp + X86::AddrNumOperands;
     if (HasVEX_I8Reg)
       I8RegNum = getX86RegEncoding(MI, CurOp++);
@@ -1582,7 +1592,7 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
     emitByte(BaseOpcode, CurByte, OS);
 
     emitMemModRMByte(MI, FirstMemOp, getX86RegNum(MI.getOperand(CurOp)),
-                     TSFlags, Rex, CurByte, OS, Fixups, STI);
+                     TSFlags, HasREX, CurByte, OS, Fixups, STI);
     CurOp = FirstMemOp + X86::AddrNumOperands;
     ++CurOp; // Encoded in VEX.VVVV.
     break;
@@ -1599,7 +1609,7 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
     emitByte(BaseOpcode, CurByte, OS);
 
     emitMemModRMByte(MI, FirstMemOp, getX86RegNum(MI.getOperand(CurOp)),
-                     TSFlags, Rex, CurByte, OS, Fixups, STI);
+                     TSFlags, HasREX, CurByte, OS, Fixups, STI);
     CurOp = FirstMemOp + X86::AddrNumOperands;
     break;
   }
@@ -1612,7 +1622,7 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
     emitByte(BaseOpcode + CC, CurByte, OS);
 
     emitMemModRMByte(MI, FirstMemOp, getX86RegNum(MI.getOperand(RegOp)),
-                     TSFlags, Rex, CurByte, OS, Fixups, STI);
+                     TSFlags, HasREX, CurByte, OS, Fixups, STI);
     break;
   }
 
@@ -1651,7 +1661,8 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
     unsigned CC = MI.getOperand(CurOp++).getImm();
     emitByte(BaseOpcode + CC, CurByte, OS);
 
-    emitMemModRMByte(MI, FirstMemOp, 0, TSFlags, Rex, CurByte, OS, Fixups, STI);
+    emitMemModRMByte(MI, FirstMemOp, 0, TSFlags, HasREX, CurByte, OS, Fixups,
+                     STI);
     break;
   }
 
@@ -1671,7 +1682,7 @@ void X86MCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
     emitByte(BaseOpcode, CurByte, OS);
     emitMemModRMByte(MI, CurOp,
                      (Form == X86II::MRMXm) ? 0 : Form - X86II::MRM0m, TSFlags,
-                     Rex, CurByte, OS, Fixups, STI);
+                     HasREX, CurByte, OS, Fixups, STI);
     CurOp += X86::AddrNumOperands;
     break;
 

From 4503cf5f231368d6e11af724b78f1371463d86a6 Mon Sep 17 00:00:00 2001
From: Kadir Cetinkaya <kadircet@google.com>
Date: Fri, 17 Apr 2020 13:19:52 +0200
Subject: [PATCH 157/216] [clangd] Drop dangling relations while sharding

Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, usaxena95, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D78359
---
 clang-tools-extra/clangd/index/FileIndex.cpp          | 6 +++---
 clang-tools-extra/clangd/unittests/FileIndexTests.cpp | 2 ++
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/clang-tools-extra/clangd/index/FileIndex.cpp b/clang-tools-extra/clangd/index/FileIndex.cpp
index 91914be00148..590bf46ec01c 100644
--- a/clang-tools-extra/clangd/index/FileIndex.cpp
+++ b/clang-tools-extra/clangd/index/FileIndex.cpp
@@ -174,9 +174,9 @@ FileShardedIndex::FileShardedIndex(IndexFileIn Input, PathRef HintPath)
   // not have been indexed, see SymbolCollector::processRelations for details.
   if (Index.Relations) {
     for (const auto &R : *Index.Relations) {
-      auto *File = SymbolIDToFile.lookup(R.Subject);
-      assert(File && "unknown subject in relation");
-      File->Relations.insert(&R);
+      // FIXME: RelationSlab shouldn't contain dangling relations.
+      if (auto *File = SymbolIDToFile.lookup(R.Subject))
+        File->Relations.insert(&R);
     }
   }
   // Store only the direct includes of a file in a shard.
diff --git a/clang-tools-extra/clangd/unittests/FileIndexTests.cpp b/clang-tools-extra/clangd/unittests/FileIndexTests.cpp
index dc39ad2acf25..328892f750de 100644
--- a/clang-tools-extra/clangd/unittests/FileIndexTests.cpp
+++ b/clang-tools-extra/clangd/unittests/FileIndexTests.cpp
@@ -540,6 +540,8 @@ TEST(FileShardedIndexTest, Sharding) {
     B.insert(Relation{Sym1.ID, RelationKind::BaseOf, Sym2.ID});
     // Should be stored in b.h
     B.insert(Relation{Sym2.ID, RelationKind::BaseOf, Sym1.ID});
+    // Dangling relation should be dropped.
+    B.insert(Relation{symbol("3").ID, RelationKind::BaseOf, Sym1.ID});
     IF.Relations = std::move(B).build();
   }
 

From 66b54d586fa73499714e2bfef3cedffeabb08f34 Mon Sep 17 00:00:00 2001
From: Kadir Cetinkaya <kadircet@google.com>
Date: Fri, 17 Apr 2020 13:56:22 +0200
Subject: [PATCH 158/216] [clangd] Fix memory leak in FileIndexTest

---
 clang-tools-extra/clangd/unittests/FileIndexTests.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang-tools-extra/clangd/unittests/FileIndexTests.cpp b/clang-tools-extra/clangd/unittests/FileIndexTests.cpp
index 328892f750de..9631c920fb27 100644
--- a/clang-tools-extra/clangd/unittests/FileIndexTests.cpp
+++ b/clang-tools-extra/clangd/unittests/FileIndexTests.cpp
@@ -532,7 +532,7 @@ TEST(FileShardedIndexTest, Sharding) {
   }
   {
     // Should be stored in b.cc
-    IF.Refs = std::move(*refSlab(Sym1.ID, BSourceUri.c_str()).release());
+    IF.Refs = std::move(*refSlab(Sym1.ID, BSourceUri.c_str()));
   }
   {
     RelationSlab::Builder B;

From 5f236864124d3b6f29689b8779a1e880df970a1c Mon Sep 17 00:00:00 2001
From: Roger Ferrer Ibanez <roger.ferrer@bsc.es>
Date: Fri, 17 Apr 2020 07:39:49 +0000
Subject: [PATCH 159/216] [RISCV][AsmParser] Implement .option (no)pic

Differential Revision: https://reviews.llvm.org/D77867
---
 .../Target/RISCV/AsmParser/RISCVAsmParser.cpp | 45 ++++++++++++++++++-
 .../RISCV/MCTargetDesc/RISCVELFStreamer.cpp   |  2 +
 .../RISCV/MCTargetDesc/RISCVELFStreamer.h     |  2 +
 .../MCTargetDesc/RISCVTargetStreamer.cpp      |  8 ++++
 .../RISCV/MCTargetDesc/RISCVTargetStreamer.h  |  4 ++
 llvm/test/CodeGen/RISCV/option-nopic.ll       | 18 ++++++++
 llvm/test/CodeGen/RISCV/option-pic.ll         | 17 +++++++
 llvm/test/MC/RISCV/option-nopic.s             | 29 ++++++++++++
 llvm/test/MC/RISCV/option-pic.s               | 28 ++++++++++++
 llvm/test/MC/RISCV/option-pushpop.s           | 42 +++++++++++++++++
 10 files changed, 193 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/RISCV/option-nopic.ll
 create mode 100644 llvm/test/CodeGen/RISCV/option-pic.ll
 create mode 100644 llvm/test/MC/RISCV/option-nopic.s
 create mode 100644 llvm/test/MC/RISCV/option-pic.s

diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 388b55a83195..0f284710f79c 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -51,9 +51,16 @@ STATISTIC(RISCVNumInstrsCompressed,
 namespace {
 struct RISCVOperand;
 
+struct ParserOptionsSet {
+  bool IsPicEnabled;
+};
+
 class RISCVAsmParser : public MCTargetAsmParser {
   SmallVector<FeatureBitset, 4> FeatureBitStack;
 
+  SmallVector<ParserOptionsSet, 4> ParserOptionsStack;
+  ParserOptionsSet ParserOptions;
+
   SMLoc getLoc() const { return getParser().getTok().getLoc(); }
   bool isRV64() const { return getSTI().hasFeature(RISCV::Feature64Bit); }
   bool isRV32E() const { return getSTI().hasFeature(RISCV::FeatureRV32E); }
@@ -170,10 +177,15 @@ class RISCVAsmParser : public MCTargetAsmParser {
   }
 
   void pushFeatureBits() {
+    assert(FeatureBitStack.size() == ParserOptionsStack.size() &&
+           "These two stacks must be kept synchronized");
     FeatureBitStack.push_back(getSTI().getFeatureBits());
+    ParserOptionsStack.push_back(ParserOptions);
   }
 
   bool popFeatureBits() {
+    assert(FeatureBitStack.size() == ParserOptionsStack.size() &&
+           "These two stacks must be kept synchronized");
     if (FeatureBitStack.empty())
       return true;
 
@@ -181,8 +193,11 @@ class RISCVAsmParser : public MCTargetAsmParser {
     copySTI().setFeatureBits(FeatureBits);
     setAvailableFeatures(ComputeAvailableFeatures(FeatureBits));
 
+    ParserOptions = ParserOptionsStack.pop_back_val();
+
     return false;
   }
+
 public:
   enum RISCVMatchResultTy {
     Match_Dummy = FIRST_TARGET_MATCH_RESULT_TY,
@@ -216,6 +231,9 @@ class RISCVAsmParser : public MCTargetAsmParser {
                 "doesn't support the D instruction set extension (ignoring "
                 "target-abi)\n";
     }
+
+    const MCObjectFileInfo *MOFI = Parser.getContext().getObjectFileInfo();
+    ParserOptions.IsPicEnabled = MOFI->isPositionIndependent();
   }
 };
 
@@ -1668,6 +1686,30 @@ bool RISCVAsmParser::parseDirectiveOption() {
     return false;
   }
 
+  if (Option == "pic") {
+    getTargetStreamer().emitDirectiveOptionPIC();
+
+    Parser.Lex();
+    if (Parser.getTok().isNot(AsmToken::EndOfStatement))
+      return Error(Parser.getTok().getLoc(),
+                   "unexpected token, expected end of statement");
+
+    ParserOptions.IsPicEnabled = true;
+    return false;
+  }
+
+  if (Option == "nopic") {
+    getTargetStreamer().emitDirectiveOptionNoPIC();
+
+    Parser.Lex();
+    if (Parser.getTok().isNot(AsmToken::EndOfStatement))
+      return Error(Parser.getTok().getLoc(),
+                   "unexpected token, expected end of statement");
+
+    ParserOptions.IsPicEnabled = false;
+    return false;
+  }
+
   if (Option == "relax") {
     getTargetStreamer().emitDirectiveOptionRelax();
 
@@ -1931,8 +1973,7 @@ void RISCVAsmParser::emitLoadAddress(MCInst &Inst, SMLoc IDLoc,
   const MCExpr *Symbol = Inst.getOperand(1).getExpr();
   unsigned SecondOpcode;
   RISCVMCExpr::VariantKind VKHi;
-  // FIXME: Should check .option (no)pic when implemented
-  if (getContext().getObjectFileInfo()->isPositionIndependent()) {
+  if (ParserOptions.IsPicEnabled) {
     SecondOpcode = isRV64() ? RISCV::LD : RISCV::LW;
     VKHi = RISCVMCExpr::VK_RISCV_GOT_HI;
   } else {
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
index 8382edf09231..079dc919928a 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.cpp
@@ -66,6 +66,8 @@ MCELFStreamer &RISCVTargetELFStreamer::getStreamer() {
 
 void RISCVTargetELFStreamer::emitDirectiveOptionPush() {}
 void RISCVTargetELFStreamer::emitDirectiveOptionPop() {}
+void RISCVTargetELFStreamer::emitDirectiveOptionPIC() {}
+void RISCVTargetELFStreamer::emitDirectiveOptionNoPIC() {}
 void RISCVTargetELFStreamer::emitDirectiveOptionRVC() {}
 void RISCVTargetELFStreamer::emitDirectiveOptionNoRVC() {}
 void RISCVTargetELFStreamer::emitDirectiveOptionRelax() {}
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
index 0221392054ce..392c87054d43 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVELFStreamer.h
@@ -97,6 +97,8 @@ class RISCVTargetELFStreamer : public RISCVTargetStreamer {
 
   void emitDirectiveOptionPush() override;
   void emitDirectiveOptionPop() override;
+  void emitDirectiveOptionPIC() override;
+  void emitDirectiveOptionNoPIC() override;
   void emitDirectiveOptionRVC() override;
   void emitDirectiveOptionNoRVC() override;
   void emitDirectiveOptionRelax() override;
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
index b5b59c1227f6..4865d9d212fe 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.cpp
@@ -61,6 +61,14 @@ void RISCVTargetAsmStreamer::emitDirectiveOptionPop() {
   OS << "\t.option\tpop\n";
 }
 
+void RISCVTargetAsmStreamer::emitDirectiveOptionPIC() {
+  OS << "\t.option\tpic\n";
+}
+
+void RISCVTargetAsmStreamer::emitDirectiveOptionNoPIC() {
+  OS << "\t.option\tnopic\n";
+}
+
 void RISCVTargetAsmStreamer::emitDirectiveOptionRVC() {
   OS << "\t.option\trvc\n";
 }
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h
index f7c8db2edb01..94b324030154 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVTargetStreamer.h
@@ -21,6 +21,8 @@ class RISCVTargetStreamer : public MCTargetStreamer {
 
   virtual void emitDirectiveOptionPush() = 0;
   virtual void emitDirectiveOptionPop() = 0;
+  virtual void emitDirectiveOptionPIC() = 0;
+  virtual void emitDirectiveOptionNoPIC() = 0;
   virtual void emitDirectiveOptionRVC() = 0;
   virtual void emitDirectiveOptionNoRVC() = 0;
   virtual void emitDirectiveOptionRelax() = 0;
@@ -49,6 +51,8 @@ class RISCVTargetAsmStreamer : public RISCVTargetStreamer {
 
   void emitDirectiveOptionPush() override;
   void emitDirectiveOptionPop() override;
+  void emitDirectiveOptionPIC() override;
+  void emitDirectiveOptionNoPIC() override;
   void emitDirectiveOptionRVC() override;
   void emitDirectiveOptionNoRVC() override;
   void emitDirectiveOptionRelax() override;
diff --git a/llvm/test/CodeGen/RISCV/option-nopic.ll b/llvm/test/CodeGen/RISCV/option-nopic.ll
new file mode 100644
index 000000000000..681ace8ce64f
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/option-nopic.ll
@@ -0,0 +1,18 @@
+; RUN: llc -mtriple=riscv32 -filetype=obj --relocation-model=pic < %s\
+; RUN: | llvm-objdump --triple=riscv32 --mattr=+c -d -M no-aliases -\
+; RUN: | FileCheck -check-prefix=CHECK %s
+
+; This test demonstrates that .option nopic has no effect on codegen when
+; emitting an ELF directly.
+
+@symbol = global i32 zeroinitializer
+
+define i32 @get_symbol() nounwind {
+; CHECK-LABEL: <get_symbol>:
+; CHECK: auipc	a0, 0
+; CHECK: lw	a0, 0(a0)
+; CHECK: lw	a0, 0(a0)
+  tail call void asm sideeffect ".option nopic", ""()
+  %v = load i32, i32* @symbol
+  ret i32 %v
+}
diff --git a/llvm/test/CodeGen/RISCV/option-pic.ll b/llvm/test/CodeGen/RISCV/option-pic.ll
new file mode 100644
index 000000000000..0810936d7bb5
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/option-pic.ll
@@ -0,0 +1,17 @@
+; RUN: llc -mtriple=riscv32 -filetype=obj < %s\
+; RUN: | llvm-objdump --triple=riscv32 --mattr=+c -d -M no-aliases -\
+; RUN: | FileCheck -check-prefix=CHECK %s
+
+; This test demonstrates that .option pic has no effect on codegen when
+; emitting an ELF directly.
+
+@symbol = global i32 zeroinitializer
+
+define i32 @get_symbol() nounwind {
+; CHECK-LABEL: <get_symbol>:
+; CHECK: lui	a0, 0
+; CHECK: lw	a0, 0(a0)
+  tail call void asm sideeffect ".option pic", ""()
+  %v = load i32, i32* @symbol
+  ret i32 %v
+}
diff --git a/llvm/test/MC/RISCV/option-nopic.s b/llvm/test/MC/RISCV/option-nopic.s
new file mode 100644
index 000000000000..7673b0455619
--- /dev/null
+++ b/llvm/test/MC/RISCV/option-nopic.s
@@ -0,0 +1,29 @@
+# RUN: llvm-mc -triple riscv32 -mattr=-relax -riscv-no-aliases < %s \
+# RUN:     | FileCheck -check-prefix=CHECK-INST %s
+# RUN: llvm-mc -filetype=obj -triple riscv32 < %s \
+# RUN:     | llvm-readobj -r | FileCheck -check-prefix=CHECK-RELOC %s
+
+# RUN: llvm-mc -triple riscv32 -mattr=-relax -riscv-no-aliases \
+# RUN:     -position-independent < %s | FileCheck -check-prefix=CHECK-INST %s
+# RUN: llvm-mc -filetype=obj -triple riscv32 -position-independent < %s \
+# RUN:     | llvm-readobj -r | FileCheck -check-prefix=CHECK-RELOC %s
+
+# RUN: llvm-mc -triple riscv64 -mattr=-relax -riscv-no-aliases < %s \
+# RUN:     | FileCheck -check-prefix=CHECK-INST %s
+# RUN: llvm-mc -filetype=obj -triple riscv64 < %s \
+# RUN:     | llvm-readobj -r | FileCheck -check-prefix=CHECK-RELOC %s
+
+# RUN: llvm-mc -triple riscv64 -mattr=-relax -riscv-no-aliases \
+# RUN:     -position-independent < %s | FileCheck -check-prefix=CHECK-INST %s
+# RUN: llvm-mc -filetype=obj -triple riscv64 -position-independent < %s \
+# RUN:     | llvm-readobj -r | FileCheck -check-prefix=CHECK-RELOC %s
+
+.option nopic
+# CHECK-INST: .option nopic
+
+la s0, symbol
+# CHECK-INST: auipc	s0, %pcrel_hi(symbol)
+# CHECK-INST: addi	s0, s0, %pcrel_lo(.Lpcrel_hi0)
+# CHECK-RELOC: R_RISCV_PCREL_HI20 symbol 0x0
+# CHECK-RELOC: R_RISCV_PCREL_LO12_I .Lpcrel_hi0 0x0
+
diff --git a/llvm/test/MC/RISCV/option-pic.s b/llvm/test/MC/RISCV/option-pic.s
new file mode 100644
index 000000000000..37d643c32483
--- /dev/null
+++ b/llvm/test/MC/RISCV/option-pic.s
@@ -0,0 +1,28 @@
+# RUN: llvm-mc -triple riscv32 -mattr=-relax -riscv-no-aliases < %s \
+# RUN:     | FileCheck -check-prefix=CHECK-INST %s
+# RUN: llvm-mc -filetype=obj -triple riscv32 < %s \
+# RUN:     | llvm-readobj -r | FileCheck -check-prefix=CHECK-RELOC %s
+
+# RUN: llvm-mc -triple riscv32 -mattr=-relax -riscv-no-aliases \
+# RUN:     -position-independent < %s | FileCheck -check-prefix=CHECK-INST %s
+# RUN: llvm-mc -filetype=obj -triple riscv32 -position-independent < %s \
+# RUN:     | llvm-readobj -r | FileCheck -check-prefix=CHECK-RELOC %s
+
+# RUN: llvm-mc -triple riscv64 -mattr=-relax -riscv-no-aliases < %s \
+# RUN:     | FileCheck -check-prefix=CHECK-INST %s
+# RUN: llvm-mc -filetype=obj -triple riscv64 < %s \
+# RUN:     | llvm-readobj -r | FileCheck -check-prefix=CHECK-RELOC %s
+
+# RUN: llvm-mc -triple riscv64 -mattr=-relax -riscv-no-aliases \
+# RUN:     -position-independent < %s | FileCheck -check-prefix=CHECK-INST %s
+# RUN: llvm-mc -filetype=obj -triple riscv64 -position-independent < %s \
+# RUN:     | llvm-readobj -r | FileCheck -check-prefix=CHECK-RELOC %s
+
+.option pic
+# CHECK-INST: .option pic
+
+la s0, symbol
+# CHECK-INST: auipc	s0, %got_pcrel_hi(symbol)
+# CHECK-INST: l{{[wd]}}	s0, %pcrel_lo(.Lpcrel_hi0)(s0)
+# CHECK-RELOC: R_RISCV_GOT_HI20 symbol 0x0
+# CHECK-RELOC: R_RISCV_PCREL_LO12_I .Lpcrel_hi0 0x0
diff --git a/llvm/test/MC/RISCV/option-pushpop.s b/llvm/test/MC/RISCV/option-pushpop.s
index 0754a74c58a3..96101dbc6376 100644
--- a/llvm/test/MC/RISCV/option-pushpop.s
+++ b/llvm/test/MC/RISCV/option-pushpop.s
@@ -72,3 +72,45 @@ call baz
 # CHECK-BYTES: 13 04 c1 3f
 # CHECK-ALIAS: addi s0, sp, 1020
 addi s0, sp, 1020
+
+.option push    # Push pic=false
+# CHECK-INST: .option push
+
+.option pic
+# CHECK-INST: .option pic
+
+la s0, symbol
+# CHECK-INST: auipc	s0, %got_pcrel_hi(symbol)
+# CHECK-INST: l{{[wd]}}	s0, %pcrel_lo(.Lpcrel_hi0)(s0)
+# CHECK-RELOC: R_RISCV_GOT_HI20 symbol 0x0
+# CHECK-RELOC: R_RISCV_PCREL_LO12_I .Lpcrel_hi0 0x0
+
+.option push    # Push pic=true
+# CHECK-INST: .option push
+
+.option nopic
+# CHECK-INST: .option nopic
+
+la s0, symbol
+# CHECK-INST: auipc	s0, %pcrel_hi(symbol)
+# CHECK-INST: addi	s0, s0, %pcrel_lo(.Lpcrel_hi1)
+# CHECK-RELOC: R_RISCV_PCREL_HI20 symbol 0x0
+# CHECK-RELOC: R_RISCV_PCREL_LO12_I .Lpcrel_hi1 0x0
+
+.option pop    # Push pic=true
+# CHECK-INST: .option pop
+
+la s0, symbol
+# CHECK-INST: auipc	s0, %got_pcrel_hi(symbol)
+# CHECK-INST: l{{[wd]}}	s0, %pcrel_lo(.Lpcrel_hi2)(s0)
+# CHECK-RELOC: R_RISCV_GOT_HI20 symbol 0x0
+# CHECK-RELOC: R_RISCV_PCREL_LO12_I .Lpcrel_hi2 0x0
+
+.option pop    # Push pic=false
+# CHECK-INST: .option pop
+
+la s0, symbol
+# CHECK-INST: auipc	s0, %pcrel_hi(symbol)
+# CHECK-INST: addi	s0, s0, %pcrel_lo(.Lpcrel_hi3)
+# CHECK-RELOC: R_RISCV_PCREL_HI20 symbol 0x0
+# CHECK-RELOC: R_RISCV_PCREL_LO12_I .Lpcrel_hi3 0x0

From a6fc687e34a1807505acd3d42424acc2bdfaefc9 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Fri, 17 Apr 2020 08:22:17 -0400
Subject: [PATCH 160/216] [x86] add/adjust tests for FP<->int casts; NFC

---
 llvm/test/CodeGen/X86/ftrunc.ll | 100 +++++++++++++++++++++++++-------
 1 file changed, 80 insertions(+), 20 deletions(-)

diff --git a/llvm/test/CodeGen/X86/ftrunc.ll b/llvm/test/CodeGen/X86/ftrunc.ll
index 10e9b9c4ea80..abadcaa5cf6c 100644
--- a/llvm/test/CodeGen/X86/ftrunc.ll
+++ b/llvm/test/CodeGen/X86/ftrunc.ll
@@ -237,19 +237,21 @@ define float @trunc_signed_f32_no_fast_math(float %x) {
   ret float %r
 }
 
-define float @trunc_signed_f32(float %x) #0 {
-; SSE2-LABEL: trunc_signed_f32:
+; Without -0.0, it is ok to use roundss if it is available.
+
+define float @trunc_signed_f32_nsz(float %x) #0 {
+; SSE2-LABEL: trunc_signed_f32_nsz:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    cvttps2dq %xmm0, %xmm0
 ; SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
 ; SSE2-NEXT:    retq
 ;
-; SSE41-LABEL: trunc_signed_f32:
+; SSE41-LABEL: trunc_signed_f32_nsz:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    roundss $11, %xmm0, %xmm0
 ; SSE41-NEXT:    retq
 ;
-; AVX1-LABEL: trunc_signed_f32:
+; AVX1-LABEL: trunc_signed_f32_nsz:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vroundss $11, %xmm0, %xmm0, %xmm0
 ; AVX1-NEXT:    retq
@@ -258,20 +260,78 @@ define float @trunc_signed_f32(float %x) #0 {
   ret float %r
 }
 
-define double @trunc_signed_f64(double %x) #0 {
-; SSE2-LABEL: trunc_signed_f64:
+define double @trunc_signed32_f64_no_fast_math(double %x) {
+; SSE-LABEL: trunc_signed32_f64_no_fast_math:
+; SSE:       # %bb.0:
+; SSE-NEXT:    cvttsd2si %xmm0, %eax
+; SSE-NEXT:    xorps %xmm0, %xmm0
+; SSE-NEXT:    cvtsi2sd %eax, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: trunc_signed32_f64_no_fast_math:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vcvttsd2si %xmm0, %eax
+; AVX1-NEXT:    vcvtsi2sd %eax, %xmm1, %xmm0
+; AVX1-NEXT:    retq
+  %i = fptosi double %x to i32
+  %r = sitofp i32 %i to double
+  ret double %r
+}
+
+define double @trunc_signed32_f64_nsz(double %x) #0 {
+; SSE2-LABEL: trunc_signed32_f64_nsz:
+; SSE2:       # %bb.0:
+; SSE2-NEXT:    cvttsd2si %xmm0, %eax
+; SSE2-NEXT:    xorps %xmm0, %xmm0
+; SSE2-NEXT:    cvtsi2sd %eax, %xmm0
+; SSE2-NEXT:    retq
+;
+; SSE41-LABEL: trunc_signed32_f64_nsz:
+; SSE41:       # %bb.0:
+; SSE41-NEXT:    roundsd $11, %xmm0, %xmm0
+; SSE41-NEXT:    retq
+;
+; AVX1-LABEL: trunc_signed32_f64_nsz:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vroundsd $11, %xmm0, %xmm0, %xmm0
+; AVX1-NEXT:    retq
+  %i = fptosi double %x to i32
+  %r = sitofp i32 %i to double
+  ret double %r
+}
+
+define double @trunc_signed_f64_no_fast_math(double %x) {
+; SSE-LABEL: trunc_signed_f64_no_fast_math:
+; SSE:       # %bb.0:
+; SSE-NEXT:    cvttsd2si %xmm0, %rax
+; SSE-NEXT:    xorps %xmm0, %xmm0
+; SSE-NEXT:    cvtsi2sd %rax, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX1-LABEL: trunc_signed_f64_no_fast_math:
+; AVX1:       # %bb.0:
+; AVX1-NEXT:    vcvttsd2si %xmm0, %rax
+; AVX1-NEXT:    vcvtsi2sd %rax, %xmm1, %xmm0
+; AVX1-NEXT:    retq
+  %i = fptosi double %x to i64
+  %r = sitofp i64 %i to double
+  ret double %r
+}
+
+define double @trunc_signed_f64_nsz(double %x) #0 {
+; SSE2-LABEL: trunc_signed_f64_nsz:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    cvttsd2si %xmm0, %rax
 ; SSE2-NEXT:    xorps %xmm0, %xmm0
 ; SSE2-NEXT:    cvtsi2sd %rax, %xmm0
 ; SSE2-NEXT:    retq
 ;
-; SSE41-LABEL: trunc_signed_f64:
+; SSE41-LABEL: trunc_signed_f64_nsz:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    roundsd $11, %xmm0, %xmm0
 ; SSE41-NEXT:    retq
 ;
-; AVX1-LABEL: trunc_signed_f64:
+; AVX1-LABEL: trunc_signed_f64_nsz:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vroundsd $11, %xmm0, %xmm0, %xmm0
 ; AVX1-NEXT:    retq
@@ -280,19 +340,19 @@ define double @trunc_signed_f64(double %x) #0 {
   ret double %r
 }
 
-define <4 x float> @trunc_signed_v4f32(<4 x float> %x) #0 {
-; SSE2-LABEL: trunc_signed_v4f32:
+define <4 x float> @trunc_signed_v4f32_nsz(<4 x float> %x) #0 {
+; SSE2-LABEL: trunc_signed_v4f32_nsz:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    cvttps2dq %xmm0, %xmm0
 ; SSE2-NEXT:    cvtdq2ps %xmm0, %xmm0
 ; SSE2-NEXT:    retq
 ;
-; SSE41-LABEL: trunc_signed_v4f32:
+; SSE41-LABEL: trunc_signed_v4f32_nsz:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    roundps $11, %xmm0, %xmm0
 ; SSE41-NEXT:    retq
 ;
-; AVX1-LABEL: trunc_signed_v4f32:
+; AVX1-LABEL: trunc_signed_v4f32_nsz:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vroundps $11, %xmm0, %xmm0
 ; AVX1-NEXT:    retq
@@ -301,8 +361,8 @@ define <4 x float> @trunc_signed_v4f32(<4 x float> %x) #0 {
   ret <4 x float> %r
 }
 
-define <2 x double> @trunc_signed_v2f64(<2 x double> %x) #0 {
-; SSE2-LABEL: trunc_signed_v2f64:
+define <2 x double> @trunc_signed_v2f64_nsz(<2 x double> %x) #0 {
+; SSE2-LABEL: trunc_signed_v2f64_nsz:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    cvttsd2si %xmm0, %rax
 ; SSE2-NEXT:    unpckhpd {{.*#+}} xmm0 = xmm0[1,1]
@@ -313,12 +373,12 @@ define <2 x double> @trunc_signed_v2f64(<2 x double> %x) #0 {
 ; SSE2-NEXT:    unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
 ; SSE2-NEXT:    retq
 ;
-; SSE41-LABEL: trunc_signed_v2f64:
+; SSE41-LABEL: trunc_signed_v2f64_nsz:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    roundpd $11, %xmm0, %xmm0
 ; SSE41-NEXT:    retq
 ;
-; AVX1-LABEL: trunc_signed_v2f64:
+; AVX1-LABEL: trunc_signed_v2f64_nsz:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vroundpd $11, %xmm0, %xmm0
 ; AVX1-NEXT:    retq
@@ -327,8 +387,8 @@ define <2 x double> @trunc_signed_v2f64(<2 x double> %x) #0 {
   ret <2 x double> %r
 }
 
-define <4 x double> @trunc_signed_v4f64(<4 x double> %x) #0 {
-; SSE2-LABEL: trunc_signed_v4f64:
+define <4 x double> @trunc_signed_v4f64_nsz(<4 x double> %x) #0 {
+; SSE2-LABEL: trunc_signed_v4f64_nsz:
 ; SSE2:       # %bb.0:
 ; SSE2-NEXT:    cvttsd2si %xmm1, %rax
 ; SSE2-NEXT:    unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
@@ -347,13 +407,13 @@ define <4 x double> @trunc_signed_v4f64(<4 x double> %x) #0 {
 ; SSE2-NEXT:    unpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0]
 ; SSE2-NEXT:    retq
 ;
-; SSE41-LABEL: trunc_signed_v4f64:
+; SSE41-LABEL: trunc_signed_v4f64_nsz:
 ; SSE41:       # %bb.0:
 ; SSE41-NEXT:    roundpd $11, %xmm0, %xmm0
 ; SSE41-NEXT:    roundpd $11, %xmm1, %xmm1
 ; SSE41-NEXT:    retq
 ;
-; AVX1-LABEL: trunc_signed_v4f64:
+; AVX1-LABEL: trunc_signed_v4f64_nsz:
 ; AVX1:       # %bb.0:
 ; AVX1-NEXT:    vroundpd $11, %ymm0, %ymm0
 ; AVX1-NEXT:    retq

From 818126ae973e3e790185c808ee2f5b025847f3d4 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Fri, 17 Apr 2020 08:41:18 -0400
Subject: [PATCH 161/216] [x86] rename variables for types for readability; NFC

This gets harder to follow if we allow changing types/sizes
between source, dest, and intermediate value.
---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 918cb154e219..fcdfd85d42ec 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -19183,9 +19183,9 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, SelectionDAG &DAG,
 
   // See if we have a 128-bit vector cast op for this type of cast.
   unsigned NumEltsInXMM = 128 / VT.getScalarSizeInBits();
-  MVT Vec128VT = MVT::getVectorVT(VT, NumEltsInXMM);
-  MVT Int128VT = MVT::getVectorVT(IntVT, NumEltsInXMM);
-  if (!useVectorCast(CastToFP.getOpcode(), Int128VT, Vec128VT, Subtarget))
+  MVT VecFPVT = MVT::getVectorVT(VT, NumEltsInXMM);
+  MVT VecIntVT = MVT::getVectorVT(IntVT, NumEltsInXMM);
+  if (!useVectorCast(CastToFP.getOpcode(), VecIntVT, VecFPVT, Subtarget))
     return SDValue();
 
   // sint_to_fp (fp_to_sint X) --> extelt (sint_to_fp (fp_to_sint (s2v X))), 0
@@ -19196,9 +19196,9 @@ static SDValue lowerFPToIntToFP(SDValue CastToFP, SelectionDAG &DAG,
   // penalties) with cast ops.
   SDLoc DL(CastToFP);
   SDValue ZeroIdx = DAG.getIntPtrConstant(0, DL);
-  SDValue VecX = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, Vec128VT, X);
-  SDValue VCastToInt = DAG.getNode(ISD::FP_TO_SINT, DL, Int128VT, VecX);
-  SDValue VCastToFP = DAG.getNode(ISD::SINT_TO_FP, DL, Vec128VT, VCastToInt);
+  SDValue VecX = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecFPVT, X);
+  SDValue VCastToInt = DAG.getNode(ISD::FP_TO_SINT, DL, VecIntVT, VecX);
+  SDValue VCastToFP = DAG.getNode(ISD::SINT_TO_FP, DL, VecFPVT, VCastToInt);
   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VCastToFP, ZeroIdx);
 }
 

From 9a39d5a2ecf7c9290d334f2697c3a4e533aa4bea Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne@apple.com>
Date: Fri, 17 Apr 2020 08:47:40 -0400
Subject: [PATCH 162/216] [libc++] Move .fail.cpp tests with verify-support to
 .verify.cpp

---
 .../alg.random.shuffle/random_shuffle.cxx1z.pass.cpp            | 2 --
 ...n_cxx14.fail.cpp => random_shuffle.depr_in_cxx14.verify.cpp} | 0
 ..._order.fail.cpp => diagnose_invalid_memory_order.verify.cpp} | 0
 ...ator.pass.cpp => non_const_comparator.incomplete.verify.cpp} | 0
 ...onst_comparator.fail.cpp => non_const_comparator.verify.cpp} | 0
 ...ator.pass.cpp => non_const_comparator.incomplete.verify.cpp} | 2 +-
 ...onst_comparator.fail.cpp => non_const_comparator.verify.cpp} | 0
 ...depr_in_cxx11.fail.cpp => auto_ptr.depr_in_cxx11.verify.cpp} | 0
 ...o_fgetpos_fsetpos.fail.cpp => no_fgetpos_fsetpos.verify.cpp} | 0
 ....depr_in_cxx17.fail.cpp => address.depr_in_cxx17.verify.cpp} | 0
 .../{allocate.cxx2a.fail.cpp => allocate.cxx2a.verify.cpp}      | 0
 ...depr_in_cxx17.fail.cpp => allocate.depr_in_cxx17.verify.cpp} | 0
 ..._cxx17.fail.cpp => allocator_types.depr_in_cxx17.verify.cpp} | 0
 ...n_cxx17.fail.cpp => allocator_void.depr_in_cxx17.verify.cpp} | 0
 ...depr_in_cxx11.fail.cpp => adaptors.depr_in_cxx11.verify.cpp} | 0
 .../{enable_nodiscard.fail.cpp => enable_nodiscard.verify.cpp}  | 0
 ...fail.cpp => enable_nodiscard_disable_after_cxx17.verify.cpp} | 0
 ...il.cpp => enable_nodiscard_disable_nodiscard_ext.verify.cpp} | 0
 ...card_aftercxx17.fail.cpp => nodiscard_aftercxx17.verify.cpp} | 0
 ...card_extensions.fail.cpp => nodiscard_extensions.verify.cpp} | 0
 .../filesystem/{deprecated.fail.cpp => deprecated.verify.cpp}   | 0
 ...mic.availability.fail.cpp => atomic.availability.verify.cpp} | 0
 ...er.availability.fail.cpp => barrier.availability.verify.cpp} | 0
 ...atch.availability.fail.cpp => latch.availability.verify.cpp} | 0
 ....availability.fail.cpp => semaphore.availability.verify.cpp} | 0
 .../{nodiscard.fail.cpp => nodiscard.verify.cpp}                | 0
 .../{depr_in_cxx03.fail.cpp => depr_in_cxx03.verify.cpp}        | 0
 .../map/map.access/{empty.fail.cpp => empty.verify.cpp}         | 0
 .../associative/multimap/{empty.fail.cpp => empty.verify.cpp}   | 0
 .../associative/multiset/{empty.fail.cpp => empty.verify.cpp}   | 0
 .../associative/set/{empty.fail.cpp => empty.verify.cpp}        | 0
 .../priqueue.members/{empty.fail.cpp => empty.verify.cpp}       | 0
 .../queue/queue.defn/{empty.fail.cpp => empty.verify.cpp}       | 0
 .../stack/stack.defn/{empty.fail.cpp => empty.verify.cpp}       | 0
 .../sequences/array/{empty.fail.cpp => empty.verify.cpp}        | 0
 .../deque/deque.capacity/{empty.fail.cpp => empty.verify.cpp}   | 0
 .../sequences/forwardlist/{empty.fail.cpp => empty.verify.cpp}  | 0
 .../list/list.capacity/{empty.fail.cpp => empty.verify.cpp}     | 0
 .../sequences/vector.bool/{empty.fail.cpp => empty.verify.cpp}  | 0
 .../vector/vector.capacity/{empty.fail.cpp => empty.verify.cpp} | 0
 .../{copy.move_only.fail.cpp => copy.move_only.verify.cpp}      | 0
 .../unord/unord.map/{empty.fail.cpp => empty.verify.cpp}        | 0
 .../unord/unord.multimap/{empty.fail.cpp => empty.verify.cpp}   | 0
 .../unord/unord.multiset/{empty.fail.cpp => empty.verify.cpp}   | 0
 .../unord/unord.set/{empty.fail.cpp => empty.verify.cpp}        | 0
 ....depr_in_cxx11.fail.cpp => bind1st.depr_in_cxx11.verify.cpp} | 0
 ....depr_in_cxx11.fail.cpp => bind2nd.depr_in_cxx11.verify.cpp} | 0
 ...epr_in_cxx11.fail.cpp => binder1st.depr_in_cxx11.verify.cpp} | 0
 ...epr_in_cxx11.fail.cpp => binder2nd.depr_in_cxx11.verify.cpp} | 0
 .../path.decompose/{empty.fail.cpp => empty.verify.cpp}         | 0
 .../{empty.array.fail.cpp => empty.array.verify.cpp}            | 0
 .../{empty.container.fail.cpp => empty.container.verify.cpp}    | 0
 ...tializer_list.fail.cpp => empty.initializer_list.verify.cpp} | 0
 .../{new_array_ptr.fail.cpp => new_array_ptr.verify.cpp}        | 0
 .../{new_ptr.fail.cpp => new_ptr.verify.cpp}                    | 0
 .../{new_size.fail.cpp => new_size.verify.cpp}                  | 0
 .../{new_size_nothrow.fail.cpp => new_size_nothrow.verify.cpp}  | 0
 ...{launder.nodiscard.fail.cpp => launder.nodiscard.verify.cpp} | 0
 .../re.results.size/{empty.fail.cpp => empty.verify.cpp}        | 0
 .../string.capacity/{empty.fail.cpp => empty.verify.cpp}        | 0
 .../string.view.capacity/{empty.fail.cpp => empty.verify.cpp}   | 0
 .../futures/futures.async/{async.fail.cpp => async.verify.cpp}  | 0
 .../{allocate_size.fail.cpp => allocate_size.verify.cpp}        | 0
 ...llocate_size_hint.fail.cpp => allocate_size_hint.verify.cpp} | 0
 ...in_cxx17.fail.cpp => binary_negate.depr_in_cxx17.verify.cpp} | 0
 ...ot1.depr_in_cxx17.fail.cpp => not1.depr_in_cxx17.verify.cpp} | 0
 ...ot2.depr_in_cxx17.fail.cpp => not2.depr_in_cxx17.verify.cpp} | 0
 ..._in_cxx17.fail.cpp => unary_negate.depr_in_cxx17.verify.cpp} | 0
 .../{allocate.fail.cpp => allocate.verify.cpp}                  | 0
 .../{allocate.fail.cpp => allocate.verify.cpp}                  | 0
 70 files changed, 1 insertion(+), 3 deletions(-)
 rename libcxx/test/libcxx/algorithms/alg.modifying.operations/alg.random.shuffle/{random_shuffle.depr_in_cxx14.fail.cpp => random_shuffle.depr_in_cxx14.verify.cpp} (100%)
 rename libcxx/test/libcxx/atomics/{diagnose_invalid_memory_order.fail.cpp => diagnose_invalid_memory_order.verify.cpp} (100%)
 rename libcxx/test/libcxx/containers/associative/{non_const_comparator.pass.cpp => non_const_comparator.incomplete.verify.cpp} (100%)
 rename libcxx/test/libcxx/containers/associative/{non_const_comparator.fail.cpp => non_const_comparator.verify.cpp} (100%)
 rename libcxx/test/libcxx/containers/unord/{non_const_comparator.pass.cpp => non_const_comparator.incomplete.verify.cpp} (98%)
 rename libcxx/test/libcxx/containers/unord/{non_const_comparator.fail.cpp => non_const_comparator.verify.cpp} (100%)
 rename libcxx/test/libcxx/depr/depr.auto.ptr/auto.ptr/{auto_ptr.depr_in_cxx11.fail.cpp => auto_ptr.depr_in_cxx11.verify.cpp} (100%)
 rename libcxx/test/libcxx/depr/depr.c.headers/{no_fgetpos_fsetpos.fail.cpp => no_fgetpos_fsetpos.verify.cpp} (100%)
 rename libcxx/test/libcxx/depr/depr.default.allocator/allocator.members/{address.depr_in_cxx17.fail.cpp => address.depr_in_cxx17.verify.cpp} (100%)
 rename libcxx/test/libcxx/depr/depr.default.allocator/allocator.members/{allocate.cxx2a.fail.cpp => allocate.cxx2a.verify.cpp} (100%)
 rename libcxx/test/libcxx/depr/depr.default.allocator/allocator.members/{allocate.depr_in_cxx17.fail.cpp => allocate.depr_in_cxx17.verify.cpp} (100%)
 rename libcxx/test/libcxx/depr/depr.default.allocator/{allocator_types.depr_in_cxx17.fail.cpp => allocator_types.depr_in_cxx17.verify.cpp} (100%)
 rename libcxx/test/libcxx/depr/depr.default.allocator/{allocator_void.depr_in_cxx17.fail.cpp => allocator_void.depr_in_cxx17.verify.cpp} (100%)
 rename libcxx/test/libcxx/depr/depr.function.objects/{adaptors.depr_in_cxx11.fail.cpp => adaptors.depr_in_cxx11.verify.cpp} (100%)
 rename libcxx/test/libcxx/diagnostics/{enable_nodiscard.fail.cpp => enable_nodiscard.verify.cpp} (100%)
 rename libcxx/test/libcxx/diagnostics/{enable_nodiscard_disable_after_cxx17.fail.cpp => enable_nodiscard_disable_after_cxx17.verify.cpp} (100%)
 rename libcxx/test/libcxx/diagnostics/{enable_nodiscard_disable_nodiscard_ext.fail.cpp => enable_nodiscard_disable_nodiscard_ext.verify.cpp} (100%)
 rename libcxx/test/libcxx/diagnostics/{nodiscard_aftercxx17.fail.cpp => nodiscard_aftercxx17.verify.cpp} (100%)
 rename libcxx/test/libcxx/diagnostics/{nodiscard_extensions.fail.cpp => nodiscard_extensions.verify.cpp} (100%)
 rename libcxx/test/libcxx/experimental/filesystem/{deprecated.fail.cpp => deprecated.verify.cpp} (100%)
 rename libcxx/test/libcxx/thread/{atomic.availability.fail.cpp => atomic.availability.verify.cpp} (100%)
 rename libcxx/test/libcxx/thread/{barrier.availability.fail.cpp => barrier.availability.verify.cpp} (100%)
 rename libcxx/test/libcxx/thread/{latch.availability.fail.cpp => latch.availability.verify.cpp} (100%)
 rename libcxx/test/libcxx/thread/{semaphore.availability.fail.cpp => semaphore.availability.verify.cpp} (100%)
 rename libcxx/test/libcxx/thread/thread.lock/thread.lock.guard/{nodiscard.fail.cpp => nodiscard.verify.cpp} (100%)
 rename libcxx/test/libcxx/utilities/function.objects/func.wrap/{depr_in_cxx03.fail.cpp => depr_in_cxx03.verify.cpp} (100%)
 rename libcxx/test/std/containers/associative/map/map.access/{empty.fail.cpp => empty.verify.cpp} (100%)
 rename libcxx/test/std/containers/associative/multimap/{empty.fail.cpp => empty.verify.cpp} (100%)
 rename libcxx/test/std/containers/associative/multiset/{empty.fail.cpp => empty.verify.cpp} (100%)
 rename libcxx/test/std/containers/associative/set/{empty.fail.cpp => empty.verify.cpp} (100%)
 rename libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.members/{empty.fail.cpp => empty.verify.cpp} (100%)
 rename libcxx/test/std/containers/container.adaptors/queue/queue.defn/{empty.fail.cpp => empty.verify.cpp} (100%)
 rename libcxx/test/std/containers/container.adaptors/stack/stack.defn/{empty.fail.cpp => empty.verify.cpp} (100%)
 rename libcxx/test/std/containers/sequences/array/{empty.fail.cpp => empty.verify.cpp} (100%)
 rename libcxx/test/std/containers/sequences/deque/deque.capacity/{empty.fail.cpp => empty.verify.cpp} (100%)
 rename libcxx/test/std/containers/sequences/forwardlist/{empty.fail.cpp => empty.verify.cpp} (100%)
 rename libcxx/test/std/containers/sequences/list/list.capacity/{empty.fail.cpp => empty.verify.cpp} (100%)
 rename libcxx/test/std/containers/sequences/vector.bool/{empty.fail.cpp => empty.verify.cpp} (100%)
 rename libcxx/test/std/containers/sequences/vector/vector.capacity/{empty.fail.cpp => empty.verify.cpp} (100%)
 rename libcxx/test/std/containers/sequences/vector/vector.cons/{copy.move_only.fail.cpp => copy.move_only.verify.cpp} (100%)
 rename libcxx/test/std/containers/unord/unord.map/{empty.fail.cpp => empty.verify.cpp} (100%)
 rename libcxx/test/std/containers/unord/unord.multimap/{empty.fail.cpp => empty.verify.cpp} (100%)
 rename libcxx/test/std/containers/unord/unord.multiset/{empty.fail.cpp => empty.verify.cpp} (100%)
 rename libcxx/test/std/containers/unord/unord.set/{empty.fail.cpp => empty.verify.cpp} (100%)
 rename libcxx/test/std/depr/depr.lib.binders/depr.lib.bind.1st/{bind1st.depr_in_cxx11.fail.cpp => bind1st.depr_in_cxx11.verify.cpp} (100%)
 rename libcxx/test/std/depr/depr.lib.binders/depr.lib.bind.2nd/{bind2nd.depr_in_cxx11.fail.cpp => bind2nd.depr_in_cxx11.verify.cpp} (100%)
 rename libcxx/test/std/depr/depr.lib.binders/depr.lib.binder.1st/{binder1st.depr_in_cxx11.fail.cpp => binder1st.depr_in_cxx11.verify.cpp} (100%)
 rename libcxx/test/std/depr/depr.lib.binders/depr.lib.binder.2nd/{binder2nd.depr_in_cxx11.fail.cpp => binder2nd.depr_in_cxx11.verify.cpp} (100%)
 rename libcxx/test/std/input.output/filesystems/class.path/path.member/path.decompose/{empty.fail.cpp => empty.verify.cpp} (100%)
 rename libcxx/test/std/iterators/iterator.container/{empty.array.fail.cpp => empty.array.verify.cpp} (100%)
 rename libcxx/test/std/iterators/iterator.container/{empty.container.fail.cpp => empty.container.verify.cpp} (100%)
 rename libcxx/test/std/iterators/iterator.container/{empty.initializer_list.fail.cpp => empty.initializer_list.verify.cpp} (100%)
 rename libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.placement/{new_array_ptr.fail.cpp => new_array_ptr.verify.cpp} (100%)
 rename libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.placement/{new_ptr.fail.cpp => new_ptr.verify.cpp} (100%)
 rename libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/{new_size.fail.cpp => new_size.verify.cpp} (100%)
 rename libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/{new_size_nothrow.fail.cpp => new_size_nothrow.verify.cpp} (100%)
 rename libcxx/test/std/language.support/support.dynamic/ptr.launder/{launder.nodiscard.fail.cpp => launder.nodiscard.verify.cpp} (100%)
 rename libcxx/test/std/re/re.results/re.results.size/{empty.fail.cpp => empty.verify.cpp} (100%)
 rename libcxx/test/std/strings/basic.string/string.capacity/{empty.fail.cpp => empty.verify.cpp} (100%)
 rename libcxx/test/std/strings/string.view/string.view.capacity/{empty.fail.cpp => empty.verify.cpp} (100%)
 rename libcxx/test/std/thread/futures/futures.async/{async.fail.cpp => async.verify.cpp} (100%)
 rename libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/{allocate_size.fail.cpp => allocate_size.verify.cpp} (100%)
 rename libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/{allocate_size_hint.fail.cpp => allocate_size_hint.verify.cpp} (100%)
 rename libcxx/test/std/utilities/function.objects/negators/{binary_negate.depr_in_cxx17.fail.cpp => binary_negate.depr_in_cxx17.verify.cpp} (100%)
 rename libcxx/test/std/utilities/function.objects/negators/{not1.depr_in_cxx17.fail.cpp => not1.depr_in_cxx17.verify.cpp} (100%)
 rename libcxx/test/std/utilities/function.objects/negators/{not2.depr_in_cxx17.fail.cpp => not2.depr_in_cxx17.verify.cpp} (100%)
 rename libcxx/test/std/utilities/function.objects/negators/{unary_negate.depr_in_cxx17.fail.cpp => unary_negate.depr_in_cxx17.verify.cpp} (100%)
 rename libcxx/test/std/utilities/memory/allocator.traits/allocator.traits.members/{allocate.fail.cpp => allocate.verify.cpp} (100%)
 rename libcxx/test/std/utilities/memory/default.allocator/allocator.members/{allocate.fail.cpp => allocate.verify.cpp} (100%)

diff --git a/libcxx/test/libcxx/algorithms/alg.modifying.operations/alg.random.shuffle/random_shuffle.cxx1z.pass.cpp b/libcxx/test/libcxx/algorithms/alg.modifying.operations/alg.random.shuffle/random_shuffle.cxx1z.pass.cpp
index 362493c756ff..c1acc100a660 100644
--- a/libcxx/test/libcxx/algorithms/alg.modifying.operations/alg.random.shuffle/random_shuffle.cxx1z.pass.cpp
+++ b/libcxx/test/libcxx/algorithms/alg.modifying.operations/alg.random.shuffle/random_shuffle.cxx1z.pass.cpp
@@ -22,8 +22,6 @@
 //  However, for backwards compatibility, if _LIBCPP_ENABLE_CXX17_REMOVED_RANDOM_SHUFFLE
 //  is defined before including <algorithm>, then random_shuffle will be restored.
 
-// REQUIRES: verify-support
-
 // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_ENABLE_CXX17_REMOVED_RANDOM_SHUFFLE
 // ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS
 
diff --git a/libcxx/test/libcxx/algorithms/alg.modifying.operations/alg.random.shuffle/random_shuffle.depr_in_cxx14.fail.cpp b/libcxx/test/libcxx/algorithms/alg.modifying.operations/alg.random.shuffle/random_shuffle.depr_in_cxx14.verify.cpp
similarity index 100%
rename from libcxx/test/libcxx/algorithms/alg.modifying.operations/alg.random.shuffle/random_shuffle.depr_in_cxx14.fail.cpp
rename to libcxx/test/libcxx/algorithms/alg.modifying.operations/alg.random.shuffle/random_shuffle.depr_in_cxx14.verify.cpp
diff --git a/libcxx/test/libcxx/atomics/diagnose_invalid_memory_order.fail.cpp b/libcxx/test/libcxx/atomics/diagnose_invalid_memory_order.verify.cpp
similarity index 100%
rename from libcxx/test/libcxx/atomics/diagnose_invalid_memory_order.fail.cpp
rename to libcxx/test/libcxx/atomics/diagnose_invalid_memory_order.verify.cpp
diff --git a/libcxx/test/libcxx/containers/associative/non_const_comparator.pass.cpp b/libcxx/test/libcxx/containers/associative/non_const_comparator.incomplete.verify.cpp
similarity index 100%
rename from libcxx/test/libcxx/containers/associative/non_const_comparator.pass.cpp
rename to libcxx/test/libcxx/containers/associative/non_const_comparator.incomplete.verify.cpp
diff --git a/libcxx/test/libcxx/containers/associative/non_const_comparator.fail.cpp b/libcxx/test/libcxx/containers/associative/non_const_comparator.verify.cpp
similarity index 100%
rename from libcxx/test/libcxx/containers/associative/non_const_comparator.fail.cpp
rename to libcxx/test/libcxx/containers/associative/non_const_comparator.verify.cpp
diff --git a/libcxx/test/libcxx/containers/unord/non_const_comparator.pass.cpp b/libcxx/test/libcxx/containers/unord/non_const_comparator.incomplete.verify.cpp
similarity index 98%
rename from libcxx/test/libcxx/containers/unord/non_const_comparator.pass.cpp
rename to libcxx/test/libcxx/containers/unord/non_const_comparator.incomplete.verify.cpp
index e03b47e56014..b2525fbf0f03 100644
--- a/libcxx/test/libcxx/containers/unord/non_const_comparator.pass.cpp
+++ b/libcxx/test/libcxx/containers/unord/non_const_comparator.incomplete.verify.cpp
@@ -50,7 +50,7 @@ void test_map() {
 }
 
 int main(int, char**) {
-  // expected-no-disagnostics
+  // expected-no-diagnostics
   test_set<std::unordered_set>();
   test_set<std::unordered_multiset>();
   test_map<std::unordered_map>();
diff --git a/libcxx/test/libcxx/containers/unord/non_const_comparator.fail.cpp b/libcxx/test/libcxx/containers/unord/non_const_comparator.verify.cpp
similarity index 100%
rename from libcxx/test/libcxx/containers/unord/non_const_comparator.fail.cpp
rename to libcxx/test/libcxx/containers/unord/non_const_comparator.verify.cpp
diff --git a/libcxx/test/libcxx/depr/depr.auto.ptr/auto.ptr/auto_ptr.depr_in_cxx11.fail.cpp b/libcxx/test/libcxx/depr/depr.auto.ptr/auto.ptr/auto_ptr.depr_in_cxx11.verify.cpp
similarity index 100%
rename from libcxx/test/libcxx/depr/depr.auto.ptr/auto.ptr/auto_ptr.depr_in_cxx11.fail.cpp
rename to libcxx/test/libcxx/depr/depr.auto.ptr/auto.ptr/auto_ptr.depr_in_cxx11.verify.cpp
diff --git a/libcxx/test/libcxx/depr/depr.c.headers/no_fgetpos_fsetpos.fail.cpp b/libcxx/test/libcxx/depr/depr.c.headers/no_fgetpos_fsetpos.verify.cpp
similarity index 100%
rename from libcxx/test/libcxx/depr/depr.c.headers/no_fgetpos_fsetpos.fail.cpp
rename to libcxx/test/libcxx/depr/depr.c.headers/no_fgetpos_fsetpos.verify.cpp
diff --git a/libcxx/test/libcxx/depr/depr.default.allocator/allocator.members/address.depr_in_cxx17.fail.cpp b/libcxx/test/libcxx/depr/depr.default.allocator/allocator.members/address.depr_in_cxx17.verify.cpp
similarity index 100%
rename from libcxx/test/libcxx/depr/depr.default.allocator/allocator.members/address.depr_in_cxx17.fail.cpp
rename to libcxx/test/libcxx/depr/depr.default.allocator/allocator.members/address.depr_in_cxx17.verify.cpp
diff --git a/libcxx/test/libcxx/depr/depr.default.allocator/allocator.members/allocate.cxx2a.fail.cpp b/libcxx/test/libcxx/depr/depr.default.allocator/allocator.members/allocate.cxx2a.verify.cpp
similarity index 100%
rename from libcxx/test/libcxx/depr/depr.default.allocator/allocator.members/allocate.cxx2a.fail.cpp
rename to libcxx/test/libcxx/depr/depr.default.allocator/allocator.members/allocate.cxx2a.verify.cpp
diff --git a/libcxx/test/libcxx/depr/depr.default.allocator/allocator.members/allocate.depr_in_cxx17.fail.cpp b/libcxx/test/libcxx/depr/depr.default.allocator/allocator.members/allocate.depr_in_cxx17.verify.cpp
similarity index 100%
rename from libcxx/test/libcxx/depr/depr.default.allocator/allocator.members/allocate.depr_in_cxx17.fail.cpp
rename to libcxx/test/libcxx/depr/depr.default.allocator/allocator.members/allocate.depr_in_cxx17.verify.cpp
diff --git a/libcxx/test/libcxx/depr/depr.default.allocator/allocator_types.depr_in_cxx17.fail.cpp b/libcxx/test/libcxx/depr/depr.default.allocator/allocator_types.depr_in_cxx17.verify.cpp
similarity index 100%
rename from libcxx/test/libcxx/depr/depr.default.allocator/allocator_types.depr_in_cxx17.fail.cpp
rename to libcxx/test/libcxx/depr/depr.default.allocator/allocator_types.depr_in_cxx17.verify.cpp
diff --git a/libcxx/test/libcxx/depr/depr.default.allocator/allocator_void.depr_in_cxx17.fail.cpp b/libcxx/test/libcxx/depr/depr.default.allocator/allocator_void.depr_in_cxx17.verify.cpp
similarity index 100%
rename from libcxx/test/libcxx/depr/depr.default.allocator/allocator_void.depr_in_cxx17.fail.cpp
rename to libcxx/test/libcxx/depr/depr.default.allocator/allocator_void.depr_in_cxx17.verify.cpp
diff --git a/libcxx/test/libcxx/depr/depr.function.objects/adaptors.depr_in_cxx11.fail.cpp b/libcxx/test/libcxx/depr/depr.function.objects/adaptors.depr_in_cxx11.verify.cpp
similarity index 100%
rename from libcxx/test/libcxx/depr/depr.function.objects/adaptors.depr_in_cxx11.fail.cpp
rename to libcxx/test/libcxx/depr/depr.function.objects/adaptors.depr_in_cxx11.verify.cpp
diff --git a/libcxx/test/libcxx/diagnostics/enable_nodiscard.fail.cpp b/libcxx/test/libcxx/diagnostics/enable_nodiscard.verify.cpp
similarity index 100%
rename from libcxx/test/libcxx/diagnostics/enable_nodiscard.fail.cpp
rename to libcxx/test/libcxx/diagnostics/enable_nodiscard.verify.cpp
diff --git a/libcxx/test/libcxx/diagnostics/enable_nodiscard_disable_after_cxx17.fail.cpp b/libcxx/test/libcxx/diagnostics/enable_nodiscard_disable_after_cxx17.verify.cpp
similarity index 100%
rename from libcxx/test/libcxx/diagnostics/enable_nodiscard_disable_after_cxx17.fail.cpp
rename to libcxx/test/libcxx/diagnostics/enable_nodiscard_disable_after_cxx17.verify.cpp
diff --git a/libcxx/test/libcxx/diagnostics/enable_nodiscard_disable_nodiscard_ext.fail.cpp b/libcxx/test/libcxx/diagnostics/enable_nodiscard_disable_nodiscard_ext.verify.cpp
similarity index 100%
rename from libcxx/test/libcxx/diagnostics/enable_nodiscard_disable_nodiscard_ext.fail.cpp
rename to libcxx/test/libcxx/diagnostics/enable_nodiscard_disable_nodiscard_ext.verify.cpp
diff --git a/libcxx/test/libcxx/diagnostics/nodiscard_aftercxx17.fail.cpp b/libcxx/test/libcxx/diagnostics/nodiscard_aftercxx17.verify.cpp
similarity index 100%
rename from libcxx/test/libcxx/diagnostics/nodiscard_aftercxx17.fail.cpp
rename to libcxx/test/libcxx/diagnostics/nodiscard_aftercxx17.verify.cpp
diff --git a/libcxx/test/libcxx/diagnostics/nodiscard_extensions.fail.cpp b/libcxx/test/libcxx/diagnostics/nodiscard_extensions.verify.cpp
similarity index 100%
rename from libcxx/test/libcxx/diagnostics/nodiscard_extensions.fail.cpp
rename to libcxx/test/libcxx/diagnostics/nodiscard_extensions.verify.cpp
diff --git a/libcxx/test/libcxx/experimental/filesystem/deprecated.fail.cpp b/libcxx/test/libcxx/experimental/filesystem/deprecated.verify.cpp
similarity index 100%
rename from libcxx/test/libcxx/experimental/filesystem/deprecated.fail.cpp
rename to libcxx/test/libcxx/experimental/filesystem/deprecated.verify.cpp
diff --git a/libcxx/test/libcxx/thread/atomic.availability.fail.cpp b/libcxx/test/libcxx/thread/atomic.availability.verify.cpp
similarity index 100%
rename from libcxx/test/libcxx/thread/atomic.availability.fail.cpp
rename to libcxx/test/libcxx/thread/atomic.availability.verify.cpp
diff --git a/libcxx/test/libcxx/thread/barrier.availability.fail.cpp b/libcxx/test/libcxx/thread/barrier.availability.verify.cpp
similarity index 100%
rename from libcxx/test/libcxx/thread/barrier.availability.fail.cpp
rename to libcxx/test/libcxx/thread/barrier.availability.verify.cpp
diff --git a/libcxx/test/libcxx/thread/latch.availability.fail.cpp b/libcxx/test/libcxx/thread/latch.availability.verify.cpp
similarity index 100%
rename from libcxx/test/libcxx/thread/latch.availability.fail.cpp
rename to libcxx/test/libcxx/thread/latch.availability.verify.cpp
diff --git a/libcxx/test/libcxx/thread/semaphore.availability.fail.cpp b/libcxx/test/libcxx/thread/semaphore.availability.verify.cpp
similarity index 100%
rename from libcxx/test/libcxx/thread/semaphore.availability.fail.cpp
rename to libcxx/test/libcxx/thread/semaphore.availability.verify.cpp
diff --git a/libcxx/test/libcxx/thread/thread.lock/thread.lock.guard/nodiscard.fail.cpp b/libcxx/test/libcxx/thread/thread.lock/thread.lock.guard/nodiscard.verify.cpp
similarity index 100%
rename from libcxx/test/libcxx/thread/thread.lock/thread.lock.guard/nodiscard.fail.cpp
rename to libcxx/test/libcxx/thread/thread.lock/thread.lock.guard/nodiscard.verify.cpp
diff --git a/libcxx/test/libcxx/utilities/function.objects/func.wrap/depr_in_cxx03.fail.cpp b/libcxx/test/libcxx/utilities/function.objects/func.wrap/depr_in_cxx03.verify.cpp
similarity index 100%
rename from libcxx/test/libcxx/utilities/function.objects/func.wrap/depr_in_cxx03.fail.cpp
rename to libcxx/test/libcxx/utilities/function.objects/func.wrap/depr_in_cxx03.verify.cpp
diff --git a/libcxx/test/std/containers/associative/map/map.access/empty.fail.cpp b/libcxx/test/std/containers/associative/map/map.access/empty.verify.cpp
similarity index 100%
rename from libcxx/test/std/containers/associative/map/map.access/empty.fail.cpp
rename to libcxx/test/std/containers/associative/map/map.access/empty.verify.cpp
diff --git a/libcxx/test/std/containers/associative/multimap/empty.fail.cpp b/libcxx/test/std/containers/associative/multimap/empty.verify.cpp
similarity index 100%
rename from libcxx/test/std/containers/associative/multimap/empty.fail.cpp
rename to libcxx/test/std/containers/associative/multimap/empty.verify.cpp
diff --git a/libcxx/test/std/containers/associative/multiset/empty.fail.cpp b/libcxx/test/std/containers/associative/multiset/empty.verify.cpp
similarity index 100%
rename from libcxx/test/std/containers/associative/multiset/empty.fail.cpp
rename to libcxx/test/std/containers/associative/multiset/empty.verify.cpp
diff --git a/libcxx/test/std/containers/associative/set/empty.fail.cpp b/libcxx/test/std/containers/associative/set/empty.verify.cpp
similarity index 100%
rename from libcxx/test/std/containers/associative/set/empty.fail.cpp
rename to libcxx/test/std/containers/associative/set/empty.verify.cpp
diff --git a/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.members/empty.fail.cpp b/libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.members/empty.verify.cpp
similarity index 100%
rename from libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.members/empty.fail.cpp
rename to libcxx/test/std/containers/container.adaptors/priority.queue/priqueue.members/empty.verify.cpp
diff --git a/libcxx/test/std/containers/container.adaptors/queue/queue.defn/empty.fail.cpp b/libcxx/test/std/containers/container.adaptors/queue/queue.defn/empty.verify.cpp
similarity index 100%
rename from libcxx/test/std/containers/container.adaptors/queue/queue.defn/empty.fail.cpp
rename to libcxx/test/std/containers/container.adaptors/queue/queue.defn/empty.verify.cpp
diff --git a/libcxx/test/std/containers/container.adaptors/stack/stack.defn/empty.fail.cpp b/libcxx/test/std/containers/container.adaptors/stack/stack.defn/empty.verify.cpp
similarity index 100%
rename from libcxx/test/std/containers/container.adaptors/stack/stack.defn/empty.fail.cpp
rename to libcxx/test/std/containers/container.adaptors/stack/stack.defn/empty.verify.cpp
diff --git a/libcxx/test/std/containers/sequences/array/empty.fail.cpp b/libcxx/test/std/containers/sequences/array/empty.verify.cpp
similarity index 100%
rename from libcxx/test/std/containers/sequences/array/empty.fail.cpp
rename to libcxx/test/std/containers/sequences/array/empty.verify.cpp
diff --git a/libcxx/test/std/containers/sequences/deque/deque.capacity/empty.fail.cpp b/libcxx/test/std/containers/sequences/deque/deque.capacity/empty.verify.cpp
similarity index 100%
rename from libcxx/test/std/containers/sequences/deque/deque.capacity/empty.fail.cpp
rename to libcxx/test/std/containers/sequences/deque/deque.capacity/empty.verify.cpp
diff --git a/libcxx/test/std/containers/sequences/forwardlist/empty.fail.cpp b/libcxx/test/std/containers/sequences/forwardlist/empty.verify.cpp
similarity index 100%
rename from libcxx/test/std/containers/sequences/forwardlist/empty.fail.cpp
rename to libcxx/test/std/containers/sequences/forwardlist/empty.verify.cpp
diff --git a/libcxx/test/std/containers/sequences/list/list.capacity/empty.fail.cpp b/libcxx/test/std/containers/sequences/list/list.capacity/empty.verify.cpp
similarity index 100%
rename from libcxx/test/std/containers/sequences/list/list.capacity/empty.fail.cpp
rename to libcxx/test/std/containers/sequences/list/list.capacity/empty.verify.cpp
diff --git a/libcxx/test/std/containers/sequences/vector.bool/empty.fail.cpp b/libcxx/test/std/containers/sequences/vector.bool/empty.verify.cpp
similarity index 100%
rename from libcxx/test/std/containers/sequences/vector.bool/empty.fail.cpp
rename to libcxx/test/std/containers/sequences/vector.bool/empty.verify.cpp
diff --git a/libcxx/test/std/containers/sequences/vector/vector.capacity/empty.fail.cpp b/libcxx/test/std/containers/sequences/vector/vector.capacity/empty.verify.cpp
similarity index 100%
rename from libcxx/test/std/containers/sequences/vector/vector.capacity/empty.fail.cpp
rename to libcxx/test/std/containers/sequences/vector/vector.capacity/empty.verify.cpp
diff --git a/libcxx/test/std/containers/sequences/vector/vector.cons/copy.move_only.fail.cpp b/libcxx/test/std/containers/sequences/vector/vector.cons/copy.move_only.verify.cpp
similarity index 100%
rename from libcxx/test/std/containers/sequences/vector/vector.cons/copy.move_only.fail.cpp
rename to libcxx/test/std/containers/sequences/vector/vector.cons/copy.move_only.verify.cpp
diff --git a/libcxx/test/std/containers/unord/unord.map/empty.fail.cpp b/libcxx/test/std/containers/unord/unord.map/empty.verify.cpp
similarity index 100%
rename from libcxx/test/std/containers/unord/unord.map/empty.fail.cpp
rename to libcxx/test/std/containers/unord/unord.map/empty.verify.cpp
diff --git a/libcxx/test/std/containers/unord/unord.multimap/empty.fail.cpp b/libcxx/test/std/containers/unord/unord.multimap/empty.verify.cpp
similarity index 100%
rename from libcxx/test/std/containers/unord/unord.multimap/empty.fail.cpp
rename to libcxx/test/std/containers/unord/unord.multimap/empty.verify.cpp
diff --git a/libcxx/test/std/containers/unord/unord.multiset/empty.fail.cpp b/libcxx/test/std/containers/unord/unord.multiset/empty.verify.cpp
similarity index 100%
rename from libcxx/test/std/containers/unord/unord.multiset/empty.fail.cpp
rename to libcxx/test/std/containers/unord/unord.multiset/empty.verify.cpp
diff --git a/libcxx/test/std/containers/unord/unord.set/empty.fail.cpp b/libcxx/test/std/containers/unord/unord.set/empty.verify.cpp
similarity index 100%
rename from libcxx/test/std/containers/unord/unord.set/empty.fail.cpp
rename to libcxx/test/std/containers/unord/unord.set/empty.verify.cpp
diff --git a/libcxx/test/std/depr/depr.lib.binders/depr.lib.bind.1st/bind1st.depr_in_cxx11.fail.cpp b/libcxx/test/std/depr/depr.lib.binders/depr.lib.bind.1st/bind1st.depr_in_cxx11.verify.cpp
similarity index 100%
rename from libcxx/test/std/depr/depr.lib.binders/depr.lib.bind.1st/bind1st.depr_in_cxx11.fail.cpp
rename to libcxx/test/std/depr/depr.lib.binders/depr.lib.bind.1st/bind1st.depr_in_cxx11.verify.cpp
diff --git a/libcxx/test/std/depr/depr.lib.binders/depr.lib.bind.2nd/bind2nd.depr_in_cxx11.fail.cpp b/libcxx/test/std/depr/depr.lib.binders/depr.lib.bind.2nd/bind2nd.depr_in_cxx11.verify.cpp
similarity index 100%
rename from libcxx/test/std/depr/depr.lib.binders/depr.lib.bind.2nd/bind2nd.depr_in_cxx11.fail.cpp
rename to libcxx/test/std/depr/depr.lib.binders/depr.lib.bind.2nd/bind2nd.depr_in_cxx11.verify.cpp
diff --git a/libcxx/test/std/depr/depr.lib.binders/depr.lib.binder.1st/binder1st.depr_in_cxx11.fail.cpp b/libcxx/test/std/depr/depr.lib.binders/depr.lib.binder.1st/binder1st.depr_in_cxx11.verify.cpp
similarity index 100%
rename from libcxx/test/std/depr/depr.lib.binders/depr.lib.binder.1st/binder1st.depr_in_cxx11.fail.cpp
rename to libcxx/test/std/depr/depr.lib.binders/depr.lib.binder.1st/binder1st.depr_in_cxx11.verify.cpp
diff --git a/libcxx/test/std/depr/depr.lib.binders/depr.lib.binder.2nd/binder2nd.depr_in_cxx11.fail.cpp b/libcxx/test/std/depr/depr.lib.binders/depr.lib.binder.2nd/binder2nd.depr_in_cxx11.verify.cpp
similarity index 100%
rename from libcxx/test/std/depr/depr.lib.binders/depr.lib.binder.2nd/binder2nd.depr_in_cxx11.fail.cpp
rename to libcxx/test/std/depr/depr.lib.binders/depr.lib.binder.2nd/binder2nd.depr_in_cxx11.verify.cpp
diff --git a/libcxx/test/std/input.output/filesystems/class.path/path.member/path.decompose/empty.fail.cpp b/libcxx/test/std/input.output/filesystems/class.path/path.member/path.decompose/empty.verify.cpp
similarity index 100%
rename from libcxx/test/std/input.output/filesystems/class.path/path.member/path.decompose/empty.fail.cpp
rename to libcxx/test/std/input.output/filesystems/class.path/path.member/path.decompose/empty.verify.cpp
diff --git a/libcxx/test/std/iterators/iterator.container/empty.array.fail.cpp b/libcxx/test/std/iterators/iterator.container/empty.array.verify.cpp
similarity index 100%
rename from libcxx/test/std/iterators/iterator.container/empty.array.fail.cpp
rename to libcxx/test/std/iterators/iterator.container/empty.array.verify.cpp
diff --git a/libcxx/test/std/iterators/iterator.container/empty.container.fail.cpp b/libcxx/test/std/iterators/iterator.container/empty.container.verify.cpp
similarity index 100%
rename from libcxx/test/std/iterators/iterator.container/empty.container.fail.cpp
rename to libcxx/test/std/iterators/iterator.container/empty.container.verify.cpp
diff --git a/libcxx/test/std/iterators/iterator.container/empty.initializer_list.fail.cpp b/libcxx/test/std/iterators/iterator.container/empty.initializer_list.verify.cpp
similarity index 100%
rename from libcxx/test/std/iterators/iterator.container/empty.initializer_list.fail.cpp
rename to libcxx/test/std/iterators/iterator.container/empty.initializer_list.verify.cpp
diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.placement/new_array_ptr.fail.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.placement/new_array_ptr.verify.cpp
similarity index 100%
rename from libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.placement/new_array_ptr.fail.cpp
rename to libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.placement/new_array_ptr.verify.cpp
diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.placement/new_ptr.fail.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.placement/new_ptr.verify.cpp
similarity index 100%
rename from libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.placement/new_ptr.fail.cpp
rename to libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.placement/new_ptr.verify.cpp
diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_size.fail.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_size.verify.cpp
similarity index 100%
rename from libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_size.fail.cpp
rename to libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_size.verify.cpp
diff --git a/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_size_nothrow.fail.cpp b/libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_size_nothrow.verify.cpp
similarity index 100%
rename from libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_size_nothrow.fail.cpp
rename to libcxx/test/std/language.support/support.dynamic/new.delete/new.delete.single/new_size_nothrow.verify.cpp
diff --git a/libcxx/test/std/language.support/support.dynamic/ptr.launder/launder.nodiscard.fail.cpp b/libcxx/test/std/language.support/support.dynamic/ptr.launder/launder.nodiscard.verify.cpp
similarity index 100%
rename from libcxx/test/std/language.support/support.dynamic/ptr.launder/launder.nodiscard.fail.cpp
rename to libcxx/test/std/language.support/support.dynamic/ptr.launder/launder.nodiscard.verify.cpp
diff --git a/libcxx/test/std/re/re.results/re.results.size/empty.fail.cpp b/libcxx/test/std/re/re.results/re.results.size/empty.verify.cpp
similarity index 100%
rename from libcxx/test/std/re/re.results/re.results.size/empty.fail.cpp
rename to libcxx/test/std/re/re.results/re.results.size/empty.verify.cpp
diff --git a/libcxx/test/std/strings/basic.string/string.capacity/empty.fail.cpp b/libcxx/test/std/strings/basic.string/string.capacity/empty.verify.cpp
similarity index 100%
rename from libcxx/test/std/strings/basic.string/string.capacity/empty.fail.cpp
rename to libcxx/test/std/strings/basic.string/string.capacity/empty.verify.cpp
diff --git a/libcxx/test/std/strings/string.view/string.view.capacity/empty.fail.cpp b/libcxx/test/std/strings/string.view/string.view.capacity/empty.verify.cpp
similarity index 100%
rename from libcxx/test/std/strings/string.view/string.view.capacity/empty.fail.cpp
rename to libcxx/test/std/strings/string.view/string.view.capacity/empty.verify.cpp
diff --git a/libcxx/test/std/thread/futures/futures.async/async.fail.cpp b/libcxx/test/std/thread/futures/futures.async/async.verify.cpp
similarity index 100%
rename from libcxx/test/std/thread/futures/futures.async/async.fail.cpp
rename to libcxx/test/std/thread/futures/futures.async/async.verify.cpp
diff --git a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/allocate_size.fail.cpp b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/allocate_size.verify.cpp
similarity index 100%
rename from libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/allocate_size.fail.cpp
rename to libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/allocate_size.verify.cpp
diff --git a/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/allocate_size_hint.fail.cpp b/libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/allocate_size_hint.verify.cpp
similarity index 100%
rename from libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/allocate_size_hint.fail.cpp
rename to libcxx/test/std/utilities/allocator.adaptor/allocator.adaptor.members/allocate_size_hint.verify.cpp
diff --git a/libcxx/test/std/utilities/function.objects/negators/binary_negate.depr_in_cxx17.fail.cpp b/libcxx/test/std/utilities/function.objects/negators/binary_negate.depr_in_cxx17.verify.cpp
similarity index 100%
rename from libcxx/test/std/utilities/function.objects/negators/binary_negate.depr_in_cxx17.fail.cpp
rename to libcxx/test/std/utilities/function.objects/negators/binary_negate.depr_in_cxx17.verify.cpp
diff --git a/libcxx/test/std/utilities/function.objects/negators/not1.depr_in_cxx17.fail.cpp b/libcxx/test/std/utilities/function.objects/negators/not1.depr_in_cxx17.verify.cpp
similarity index 100%
rename from libcxx/test/std/utilities/function.objects/negators/not1.depr_in_cxx17.fail.cpp
rename to libcxx/test/std/utilities/function.objects/negators/not1.depr_in_cxx17.verify.cpp
diff --git a/libcxx/test/std/utilities/function.objects/negators/not2.depr_in_cxx17.fail.cpp b/libcxx/test/std/utilities/function.objects/negators/not2.depr_in_cxx17.verify.cpp
similarity index 100%
rename from libcxx/test/std/utilities/function.objects/negators/not2.depr_in_cxx17.fail.cpp
rename to libcxx/test/std/utilities/function.objects/negators/not2.depr_in_cxx17.verify.cpp
diff --git a/libcxx/test/std/utilities/function.objects/negators/unary_negate.depr_in_cxx17.fail.cpp b/libcxx/test/std/utilities/function.objects/negators/unary_negate.depr_in_cxx17.verify.cpp
similarity index 100%
rename from libcxx/test/std/utilities/function.objects/negators/unary_negate.depr_in_cxx17.fail.cpp
rename to libcxx/test/std/utilities/function.objects/negators/unary_negate.depr_in_cxx17.verify.cpp
diff --git a/libcxx/test/std/utilities/memory/allocator.traits/allocator.traits.members/allocate.fail.cpp b/libcxx/test/std/utilities/memory/allocator.traits/allocator.traits.members/allocate.verify.cpp
similarity index 100%
rename from libcxx/test/std/utilities/memory/allocator.traits/allocator.traits.members/allocate.fail.cpp
rename to libcxx/test/std/utilities/memory/allocator.traits/allocator.traits.members/allocate.verify.cpp
diff --git a/libcxx/test/std/utilities/memory/default.allocator/allocator.members/allocate.fail.cpp b/libcxx/test/std/utilities/memory/default.allocator/allocator.members/allocate.verify.cpp
similarity index 100%
rename from libcxx/test/std/utilities/memory/default.allocator/allocator.members/allocate.fail.cpp
rename to libcxx/test/std/utilities/memory/default.allocator/allocator.members/allocate.verify.cpp

From a7afb211dc460bd4cfb2542ad1f9b05876b57ba1 Mon Sep 17 00:00:00 2001
From: Dmitry Polukhin <dmitry.polukhin@gmail.com>
Date: Thu, 16 Apr 2020 09:24:46 -0700
Subject: [PATCH 163/216] [clang][AST] Support AST files larger than 512M

Summary:
Clang uses 32-bit integers for storing bit offsets from the beginning of
the file that results in 512M limit on AST file. This diff replaces
absolute offsets with relative offsets from the beginning of
corresponding data structure when it is possible. And uses 64-bit
offsets for DeclOffests and TypeOffssts because these coder AST
section may easily exceeds 512M alone.

This diff breaks AST file format compatibility so VERSION_MAJOR bumped.

Test Plan:
Existing clang AST serialization tests
Tested on clangd with ~700M and ~900M preamble files
check-clang with ubsan

Reviewers: rsmith, dexonsmith

Subscribers: ilya-biryukov, kadircet, usaxena95, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D76594
---
 .../include/clang/Serialization/ASTBitCodes.h | 46 ++++++++++++++++---
 clang/include/clang/Serialization/ASTReader.h |  7 +--
 clang/include/clang/Serialization/ASTWriter.h | 10 ++--
 .../include/clang/Serialization/ModuleFile.h  | 10 +++-
 clang/lib/Serialization/ASTReader.cpp         | 23 ++++++----
 clang/lib/Serialization/ASTReaderDecl.cpp     |  2 +-
 clang/lib/Serialization/ASTWriter.cpp         | 45 +++++++++++-------
 clang/lib/Serialization/ASTWriterDecl.cpp     |  4 +-
 8 files changed, 103 insertions(+), 44 deletions(-)

diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h
index 323edfbf8126..d5a27f487fa9 100644
--- a/clang/include/clang/Serialization/ASTBitCodes.h
+++ b/clang/include/clang/Serialization/ASTBitCodes.h
@@ -41,7 +41,7 @@ namespace serialization {
     /// Version 4 of AST files also requires that the version control branch and
     /// revision match exactly, since there is no backward compatibility of
     /// AST files at this time.
-    const unsigned VERSION_MAJOR = 9;
+    const unsigned VERSION_MAJOR = 10;
 
     /// AST file minor version number supported by this version of
     /// Clang.
@@ -181,7 +181,7 @@ namespace serialization {
       /// Raw source location of end of range.
       unsigned End;
 
-      /// Offset in the AST file.
+      /// Offset in the AST file relative to ModuleFile::MacroOffsetsBase.
       uint32_t BitOffset;
 
       PPEntityOffset(SourceRange R, uint32_t BitOffset)
@@ -216,17 +216,41 @@ namespace serialization {
       }
     };
 
-    /// Source range/offset of a preprocessed entity.
+    /// Offset in the AST file. Use splitted 64-bit integer into low/high
+    /// parts to keep structure alignment 32-bit (it is important because
+    /// blobs in bitstream are 32-bit aligned). This structure is serialized
+    /// "as is" to the AST file.
+    struct UnderalignedInt64 {
+      uint32_t BitOffsetLow = 0;
+      uint32_t BitOffsetHigh = 0;
+
+      UnderalignedInt64() = default;
+      UnderalignedInt64(uint64_t BitOffset) { setBitOffset(BitOffset); }
+
+      void setBitOffset(uint64_t Offset) {
+        BitOffsetLow = Offset;
+        BitOffsetHigh = Offset >> 32;
+      }
+
+      uint64_t getBitOffset() const {
+        return BitOffsetLow | (uint64_t(BitOffsetHigh) << 32);
+      }
+    };
+
+    /// Source location and bit offset of a declaration.
     struct DeclOffset {
       /// Raw source location.
       unsigned Loc = 0;
 
-      /// Offset in the AST file.
-      uint32_t BitOffset = 0;
+      /// Offset in the AST file. Keep structure alignment 32-bit and avoid
+      /// padding gap because undefined value in the padding affects AST hash.
+      UnderalignedInt64 BitOffset;
 
       DeclOffset() = default;
-      DeclOffset(SourceLocation Loc, uint32_t BitOffset)
-        : Loc(Loc.getRawEncoding()), BitOffset(BitOffset) {}
+      DeclOffset(SourceLocation Loc, uint64_t BitOffset) {
+        setLocation(Loc);
+        setBitOffset(BitOffset);
+      }
 
       void setLocation(SourceLocation L) {
         Loc = L.getRawEncoding();
@@ -235,6 +259,14 @@ namespace serialization {
       SourceLocation getLocation() const {
         return SourceLocation::getFromRawEncoding(Loc);
       }
+
+      void setBitOffset(uint64_t Offset) {
+        BitOffset.setBitOffset(Offset);
+      }
+
+      uint64_t getBitOffset() const {
+        return BitOffset.getBitOffset();
+      }
     };
 
     /// The number of predefined preprocessed entity IDs.
diff --git a/clang/include/clang/Serialization/ASTReader.h b/clang/include/clang/Serialization/ASTReader.h
index 94645fff9f93..11a537fad5d5 100644
--- a/clang/include/clang/Serialization/ASTReader.h
+++ b/clang/include/clang/Serialization/ASTReader.h
@@ -723,9 +723,10 @@ class ASTReader
 
   struct PendingMacroInfo {
     ModuleFile *M;
-    uint64_t MacroDirectivesOffset;
+    /// Offset relative to ModuleFile::MacroOffsetsBase.
+    uint32_t MacroDirectivesOffset;
 
-    PendingMacroInfo(ModuleFile *M, uint64_t MacroDirectivesOffset)
+    PendingMacroInfo(ModuleFile *M, uint32_t MacroDirectivesOffset)
         : M(M), MacroDirectivesOffset(MacroDirectivesOffset) {}
   };
 
@@ -2205,7 +2206,7 @@ class ASTReader
   /// \param MacroDirectivesOffset Offset of the serialized macro directive
   /// history.
   void addPendingMacro(IdentifierInfo *II, ModuleFile *M,
-                       uint64_t MacroDirectivesOffset);
+                       uint32_t MacroDirectivesOffset);
 
   /// Read the set of macros defined by this external macro source.
   void ReadDefinedMacros() override;
diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h
index c0a943adf2c7..8dc4889e3ae8 100644
--- a/clang/include/clang/Serialization/ASTWriter.h
+++ b/clang/include/clang/Serialization/ASTWriter.h
@@ -243,7 +243,7 @@ class ASTWriter : public ASTDeserializationListener,
 
   /// Offset of each type in the bitstream, indexed by
   /// the type's ID.
-  std::vector<uint32_t> TypeOffsets;
+  std::vector<serialization::UnderalignedInt64> TypeOffsets;
 
   /// The first ID number we can use for our own identifiers.
   serialization::IdentID FirstIdentID = serialization::NUM_PREDEF_IDENT_IDS;
@@ -277,7 +277,8 @@ class ASTWriter : public ASTDeserializationListener,
   /// The macro infos to emit.
   std::vector<MacroInfoToEmitData> MacroInfosToEmit;
 
-  llvm::DenseMap<const IdentifierInfo *, uint64_t> IdentMacroDirectivesOffsetMap;
+  llvm::DenseMap<const IdentifierInfo *, uint32_t>
+      IdentMacroDirectivesOffsetMap;
 
   /// @name FlushStmt Caches
   /// @{
@@ -464,7 +465,8 @@ class ASTWriter : public ASTDeserializationListener,
                                const Preprocessor &PP);
   void WritePreprocessor(const Preprocessor &PP, bool IsModule);
   void WriteHeaderSearch(const HeaderSearch &HS);
-  void WritePreprocessorDetail(PreprocessingRecord &PPRec);
+  void WritePreprocessorDetail(PreprocessingRecord &PPRec,
+                               uint64_t MacroOffsetsBase);
   void WriteSubmodules(Module *WritingModule);
 
   void WritePragmaDiagnosticMappings(const DiagnosticsEngine &Diag,
@@ -588,7 +590,7 @@ class ASTWriter : public ASTDeserializationListener,
   /// Determine the ID of an already-emitted macro.
   serialization::MacroID getMacroID(MacroInfo *MI);
 
-  uint64_t getMacroDirectivesOffset(const IdentifierInfo *Name);
+  uint32_t getMacroDirectivesOffset(const IdentifierInfo *Name);
 
   /// Emit a reference to a type.
   void AddTypeRef(QualType T, RecordDataImpl &Record);
diff --git a/clang/include/clang/Serialization/ModuleFile.h b/clang/include/clang/Serialization/ModuleFile.h
index 90d2745e080c..0cbfb2a14cd6 100644
--- a/clang/include/clang/Serialization/ModuleFile.h
+++ b/clang/include/clang/Serialization/ModuleFile.h
@@ -251,6 +251,10 @@ class ModuleFile {
   /// The base offset in the source manager's view of this module.
   unsigned SLocEntryBaseOffset = 0;
 
+  /// Base file offset for the offsets in SLocEntryOffsets. Real file offset
+  /// for the entry is SLocEntryOffsetsBase + SLocEntryOffsets[i].
+  uint64_t SLocEntryOffsetsBase = 0;
+
   /// Offsets for all of the source location entries in the
   /// AST file.
   const uint32_t *SLocEntryOffsets = nullptr;
@@ -302,6 +306,10 @@ class ModuleFile {
   /// The number of macros in this AST file.
   unsigned LocalNumMacros = 0;
 
+  /// Base file offset for the offsets in MacroOffsets. Real file offset for
+  /// the entry is MacroOffsetsBase + MacroOffsets[i].
+  uint64_t MacroOffsetsBase = 0;
+
   /// Offsets of macros in the preprocessor block.
   ///
   /// This array is indexed by the macro ID (-1), and provides
@@ -450,7 +458,7 @@ class ModuleFile {
 
   /// Offset of each type within the bitstream, indexed by the
   /// type ID, or the representation of a Type*.
-  const uint32_t *TypeOffsets = nullptr;
+  const UnderalignedInt64 *TypeOffsets = nullptr;
 
   /// Base type ID for types local to this module as represented in
   /// the global type ID space.
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 7f114c069586..f0e9bbd4dcea 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -1470,6 +1470,7 @@ bool ASTReader::ReadSLocEntry(int ID) {
 
   ModuleFile *F = GlobalSLocEntryMap.find(-ID)->second;
   if (llvm::Error Err = F->SLocEntryCursor.JumpToBit(
+          F->SLocEntryOffsetsBase +
           F->SLocEntryOffsets[ID - F->SLocEntryBaseID])) {
     Error(std::move(Err));
     return true;
@@ -1932,9 +1933,8 @@ HeaderFileInfoTrait::ReadData(internal_key_ref key, const unsigned char *d,
   return HFI;
 }
 
-void ASTReader::addPendingMacro(IdentifierInfo *II,
-                                ModuleFile *M,
-                                uint64_t MacroDirectivesOffset) {
+void ASTReader::addPendingMacro(IdentifierInfo *II, ModuleFile *M,
+                                uint32_t MacroDirectivesOffset) {
   assert(NumCurrentElementsDeserializing > 0 &&"Missing deserialization guard");
   PendingMacroIDs[II].push_back(PendingMacroInfo(M, MacroDirectivesOffset));
 }
@@ -2099,7 +2099,8 @@ void ASTReader::resolvePendingMacro(IdentifierInfo *II,
 
   BitstreamCursor &Cursor = M.MacroCursor;
   SavedStreamPosition SavedPosition(Cursor);
-  if (llvm::Error Err = Cursor.JumpToBit(PMInfo.MacroDirectivesOffset)) {
+  if (llvm::Error Err =
+          Cursor.JumpToBit(M.MacroOffsetsBase + PMInfo.MacroDirectivesOffset)) {
     Error(std::move(Err));
     return;
   }
@@ -3098,7 +3099,7 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
         Error("duplicate TYPE_OFFSET record in AST file");
         return Failure;
       }
-      F.TypeOffsets = (const uint32_t *)Blob.data();
+      F.TypeOffsets = reinterpret_cast<const UnderalignedInt64 *>(Blob.data());
       F.LocalNumTypes = Record[0];
       unsigned LocalBaseTypeIndex = Record[1];
       F.BaseTypeIndex = getTotalNumTypes();
@@ -3376,6 +3377,7 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
       F.SLocEntryOffsets = (const uint32_t *)Blob.data();
       F.LocalNumSLocEntries = Record[0];
       unsigned SLocSpaceSize = Record[1];
+      F.SLocEntryOffsetsBase = Record[2];
       std::tie(F.SLocEntryBaseID, F.SLocEntryBaseOffset) =
           SourceMgr.AllocateLoadedSLocEntries(F.LocalNumSLocEntries,
                                               SLocSpaceSize);
@@ -3694,6 +3696,7 @@ ASTReader::ReadASTBlock(ModuleFile &F, unsigned ClientLoadCapabilities) {
       F.MacroOffsets = (const uint32_t *)Blob.data();
       F.LocalNumMacros = Record[0];
       unsigned LocalBaseMacroID = Record[1];
+      F.MacroOffsetsBase = Record[2];
       F.BaseMacroID = getTotalNumMacros();
 
       if (F.LocalNumMacros > 0) {
@@ -5907,8 +5910,8 @@ PreprocessedEntity *ASTReader::ReadPreprocessedEntity(unsigned Index) {
   }
 
   SavedStreamPosition SavedPosition(M.PreprocessorDetailCursor);
-  if (llvm::Error Err =
-          M.PreprocessorDetailCursor.JumpToBit(PPOffs.BitOffset)) {
+  if (llvm::Error Err = M.PreprocessorDetailCursor.JumpToBit(
+          M.MacroOffsetsBase + PPOffs.BitOffset)) {
     Error(std::move(Err));
     return nullptr;
   }
@@ -6321,7 +6324,8 @@ ASTReader::RecordLocation ASTReader::TypeCursorForIndex(unsigned Index) {
   GlobalTypeMapType::iterator I = GlobalTypeMap.find(Index);
   assert(I != GlobalTypeMap.end() && "Corrupted global type map");
   ModuleFile *M = I->second;
-  return RecordLocation(M, M->TypeOffsets[Index - M->BaseTypeIndex]);
+  return RecordLocation(
+      M, M->TypeOffsets[Index - M->BaseTypeIndex].getBitOffset());
 }
 
 static llvm::Optional<Type::TypeClass> getTypeClassForCode(TypeCode code) {
@@ -8427,7 +8431,8 @@ MacroInfo *ASTReader::getMacro(MacroID ID) {
     assert(I != GlobalMacroMap.end() && "Corrupted global macro map");
     ModuleFile *M = I->second;
     unsigned Index = ID - M->BaseMacroID;
-    MacrosLoaded[ID] = ReadMacroRecord(*M, M->MacroOffsets[Index]);
+    MacrosLoaded[ID] =
+        ReadMacroRecord(*M, M->MacroOffsetsBase + M->MacroOffsets[Index]);
 
     if (DeserializationListener)
       DeserializationListener->MacroRead(ID + NUM_PREDEF_MACRO_IDS,
diff --git a/clang/lib/Serialization/ASTReaderDecl.cpp b/clang/lib/Serialization/ASTReaderDecl.cpp
index fce4be133220..0a278c7506e1 100644
--- a/clang/lib/Serialization/ASTReaderDecl.cpp
+++ b/clang/lib/Serialization/ASTReaderDecl.cpp
@@ -2870,7 +2870,7 @@ ASTReader::DeclCursorForID(DeclID ID, SourceLocation &Loc) {
   const DeclOffset &DOffs =
       M->DeclOffsets[ID - M->BaseDeclID - NUM_PREDEF_DECL_IDS];
   Loc = TranslateSourceLocation(*M, DOffs.getLocation());
-  return RecordLocation(M, DOffs.BitOffset);
+  return RecordLocation(M, DOffs.getBitOffset());
 }
 
 ASTReader::RecordLocation ASTReader::getLocalBitOffset(uint64_t GlobalOffset) {
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index de59dd280ba8..c8ce3edda60b 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -1893,6 +1893,7 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr,
   // Write out the source location entry table. We skip the first
   // entry, which is always the same dummy entry.
   std::vector<uint32_t> SLocEntryOffsets;
+  uint64_t SLocEntryOffsetsBase = Stream.GetCurrentBitNo();
   RecordData PreloadSLocs;
   SLocEntryOffsets.reserve(SourceMgr.local_sloc_entry_size() - 1);
   for (unsigned I = 1, N = SourceMgr.local_sloc_entry_size();
@@ -1903,7 +1904,9 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr,
     assert(&SourceMgr.getSLocEntry(FID) == SLoc);
 
     // Record the offset of this source-location entry.
-    SLocEntryOffsets.push_back(Stream.GetCurrentBitNo());
+    uint64_t Offset = Stream.GetCurrentBitNo() - SLocEntryOffsetsBase;
+    assert((Offset >> 32) == 0 && "SLocEntry offset too large");
+    SLocEntryOffsets.push_back(Offset);
 
     // Figure out which record code to use.
     unsigned Code;
@@ -2011,12 +2014,14 @@ void ASTWriter::WriteSourceManagerBlock(SourceManager &SourceMgr,
   Abbrev->Add(BitCodeAbbrevOp(SOURCE_LOCATION_OFFSETS));
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 16)); // # of slocs
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 16)); // total size
+  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 32)); // base offset
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // offsets
   unsigned SLocOffsetsAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
   {
     RecordData::value_type Record[] = {
         SOURCE_LOCATION_OFFSETS, SLocEntryOffsets.size(),
-        SourceMgr.getNextLocalOffset() - 1 /* skip dummy */};
+        SourceMgr.getNextLocalOffset() - 1 /* skip dummy */,
+        SLocEntryOffsetsBase};
     Stream.EmitRecordWithBlob(SLocOffsetsAbbrev, Record,
                               bytes(SLocEntryOffsets));
   }
@@ -2093,9 +2098,11 @@ static bool shouldIgnoreMacro(MacroDirective *MD, bool IsModule,
 /// Writes the block containing the serialized form of the
 /// preprocessor.
 void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) {
+  uint64_t MacroOffsetsBase = Stream.GetCurrentBitNo();
+
   PreprocessingRecord *PPRec = PP.getPreprocessingRecord();
   if (PPRec)
-    WritePreprocessorDetail(*PPRec);
+    WritePreprocessorDetail(*PPRec, MacroOffsetsBase);
 
   RecordData Record;
   RecordData ModuleMacroRecord;
@@ -2156,7 +2163,8 @@ void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) {
   // identifier they belong to.
   for (const IdentifierInfo *Name : MacroIdentifiers) {
     MacroDirective *MD = PP.getLocalMacroDirectiveHistory(Name);
-    auto StartOffset = Stream.GetCurrentBitNo();
+    uint64_t StartOffset = Stream.GetCurrentBitNo() - MacroOffsetsBase;
+    assert((StartOffset >> 32) == 0 && "Macro identifiers offset too large");
 
     // Emit the macro directives in reverse source order.
     for (; MD; MD = MD->getPrevious()) {
@@ -2229,14 +2237,12 @@ void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) {
 
     // Record the local offset of this macro.
     unsigned Index = ID - FirstMacroID;
-    if (Index == MacroOffsets.size())
-      MacroOffsets.push_back(Stream.GetCurrentBitNo());
-    else {
-      if (Index > MacroOffsets.size())
-        MacroOffsets.resize(Index + 1);
+    if (Index >= MacroOffsets.size())
+      MacroOffsets.resize(Index + 1);
 
-      MacroOffsets[Index] = Stream.GetCurrentBitNo();
-    }
+    uint64_t Offset = Stream.GetCurrentBitNo() - MacroOffsetsBase;
+    assert((Offset >> 32) == 0 && "Macro offset too large");
+    MacroOffsets[Index] = Offset;
 
     AddIdentifierRef(Name, Record);
     AddSourceLocation(MI->getDefinitionLoc(), Record);
@@ -2287,17 +2293,20 @@ void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) {
   Abbrev->Add(BitCodeAbbrevOp(MACRO_OFFSET));
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // # of macros
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); // first ID
+  Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 32));   // base offset
   Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
 
   unsigned MacroOffsetAbbrev = Stream.EmitAbbrev(std::move(Abbrev));
   {
     RecordData::value_type Record[] = {MACRO_OFFSET, MacroOffsets.size(),
-                                       FirstMacroID - NUM_PREDEF_MACRO_IDS};
+                                       FirstMacroID - NUM_PREDEF_MACRO_IDS,
+                                       MacroOffsetsBase};
     Stream.EmitRecordWithBlob(MacroOffsetAbbrev, Record, bytes(MacroOffsets));
   }
 }
 
-void ASTWriter::WritePreprocessorDetail(PreprocessingRecord &PPRec) {
+void ASTWriter::WritePreprocessorDetail(PreprocessingRecord &PPRec,
+                                        uint64_t MacroOffsetsBase) {
   if (PPRec.local_begin() == PPRec.local_end())
     return;
 
@@ -2334,8 +2343,10 @@ void ASTWriter::WritePreprocessorDetail(PreprocessingRecord &PPRec) {
        (void)++E, ++NumPreprocessingRecords, ++NextPreprocessorEntityID) {
     Record.clear();
 
+    uint64_t Offset = Stream.GetCurrentBitNo() - MacroOffsetsBase;
+    assert((Offset >> 32) == 0 && "Preprocessed entity offset too large");
     PreprocessedEntityOffsets.push_back(
-        PPEntityOffset((*E)->getSourceRange(), Stream.GetCurrentBitNo()));
+        PPEntityOffset((*E)->getSourceRange(), Offset));
 
     if (auto *MD = dyn_cast<MacroDefinitionRecord>(*E)) {
       // Record this macro definition's ID.
@@ -2808,10 +2819,10 @@ void ASTWriter::WriteType(QualType T) {
   // Record the offset for this type.
   unsigned Index = Idx.getIndex() - FirstTypeID;
   if (TypeOffsets.size() == Index)
-    TypeOffsets.push_back(Offset);
+    TypeOffsets.emplace_back(Offset);
   else if (TypeOffsets.size() < Index) {
     TypeOffsets.resize(Index + 1);
-    TypeOffsets[Index] = Offset;
+    TypeOffsets[Index].setBitOffset(Offset);
   } else {
     llvm_unreachable("Types emitted in wrong order");
   }
@@ -5144,7 +5155,7 @@ MacroID ASTWriter::getMacroID(MacroInfo *MI) {
   return MacroIDs[MI];
 }
 
-uint64_t ASTWriter::getMacroDirectivesOffset(const IdentifierInfo *Name) {
+uint32_t ASTWriter::getMacroDirectivesOffset(const IdentifierInfo *Name) {
   return IdentMacroDirectivesOffsetMap.lookup(Name);
 }
 
diff --git a/clang/lib/Serialization/ASTWriterDecl.cpp b/clang/lib/Serialization/ASTWriterDecl.cpp
index e847180435ec..8c5be6cacac0 100644
--- a/clang/lib/Serialization/ASTWriterDecl.cpp
+++ b/clang/lib/Serialization/ASTWriterDecl.cpp
@@ -2434,12 +2434,12 @@ void ASTWriter::WriteDecl(ASTContext &Context, Decl *D) {
   SourceLocation Loc = D->getLocation();
   unsigned Index = ID - FirstDeclID;
   if (DeclOffsets.size() == Index)
-    DeclOffsets.push_back(DeclOffset(Loc, Offset));
+    DeclOffsets.emplace_back(Loc, Offset);
   else if (DeclOffsets.size() < Index) {
     // FIXME: Can/should this happen?
     DeclOffsets.resize(Index+1);
     DeclOffsets[Index].setLocation(Loc);
-    DeclOffsets[Index].BitOffset = Offset;
+    DeclOffsets[Index].setBitOffset(Offset);
   } else {
     llvm_unreachable("declarations should be emitted in ID order");
   }

From 166467e8221d202ad3e7b8c156e99f7a6def35e0 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Fri, 17 Apr 2020 15:28:00 +0200
Subject: [PATCH 164/216] [VectorUtils] Create shufflevector masks as int
 vectors instead of Constants

No functionality change intended.
---
 llvm/include/llvm/Analysis/VectorUtils.h      | 15 +++---
 llvm/lib/Analysis/VectorUtils.cpp             | 54 +++++++++----------
 .../Target/AArch64/AArch64ISelLowering.cpp    |  4 +-
 llvm/lib/Target/ARM/ARMISelLowering.cpp       |  4 +-
 llvm/lib/Target/X86/X86InterleavedAccess.cpp  |  4 +-
 .../Scalar/LowerMatrixIntrinsics.cpp          | 18 +++----
 .../Transforms/Vectorize/LoopVectorize.cpp    | 16 +++---
 7 files changed, 57 insertions(+), 58 deletions(-)

diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h
index 6571c874d824..2b680e8131c4 100644
--- a/llvm/include/llvm/Analysis/VectorUtils.h
+++ b/llvm/include/llvm/Analysis/VectorUtils.h
@@ -450,8 +450,8 @@ Constant *createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF,
 /// For example, the mask for \p ReplicationFactor=3 and \p VF=4 is:
 ///
 ///   <0,0,0,1,1,1,2,2,2,3,3,3>
-Constant *createReplicatedMask(IRBuilderBase &Builder,
-                               unsigned ReplicationFactor, unsigned VF);
+llvm::SmallVector<int, 16> createReplicatedMask(unsigned ReplicationFactor,
+                                                unsigned VF);
 
 /// Create an interleave shuffle mask.
 ///
@@ -464,8 +464,7 @@ Constant *createReplicatedMask(IRBuilderBase &Builder,
 /// For example, the mask for VF = 4 and NumVecs = 2 is:
 ///
 ///   <0, 4, 1, 5, 2, 6, 3, 7>.
-Constant *createInterleaveMask(IRBuilderBase &Builder, unsigned VF,
-                               unsigned NumVecs);
+llvm::SmallVector<int, 16> createInterleaveMask(unsigned VF, unsigned NumVecs);
 
 /// Create a stride shuffle mask.
 ///
@@ -479,8 +478,8 @@ Constant *createInterleaveMask(IRBuilderBase &Builder, unsigned VF,
 /// For example, the mask for Start = 0, Stride = 2, and VF = 4 is:
 ///
 ///   <0, 2, 4, 6>
-Constant *createStrideMask(IRBuilderBase &Builder, unsigned Start,
-                           unsigned Stride, unsigned VF);
+llvm::SmallVector<int, 16> createStrideMask(unsigned Start, unsigned Stride,
+                                            unsigned VF);
 
 /// Create a sequential shuffle mask.
 ///
@@ -493,8 +492,8 @@ Constant *createStrideMask(IRBuilderBase &Builder, unsigned Start,
 /// For example, the mask for Start = 0, NumInsts = 4, and NumUndefs = 4 is:
 ///
 ///   <0, 1, 2, 3, undef, undef, undef, undef>
-Constant *createSequentialMask(IRBuilderBase &Builder, unsigned Start,
-                               unsigned NumInts, unsigned NumUndefs);
+llvm::SmallVector<int, 16>
+createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs);
 
 /// Concatenate a list of vectors.
 ///
diff --git a/llvm/lib/Analysis/VectorUtils.cpp b/llvm/lib/Analysis/VectorUtils.cpp
index 31b600bac745..0d411485ddd9 100644
--- a/llvm/lib/Analysis/VectorUtils.cpp
+++ b/llvm/lib/Analysis/VectorUtils.cpp
@@ -761,46 +761,46 @@ llvm::createBitMaskForGaps(IRBuilderBase &Builder, unsigned VF,
   return ConstantVector::get(Mask);
 }
 
-Constant *llvm::createReplicatedMask(IRBuilderBase &Builder, 
-                                     unsigned ReplicationFactor, unsigned VF) {
-  SmallVector<Constant *, 16> MaskVec;
+llvm::SmallVector<int, 16>
+llvm::createReplicatedMask(unsigned ReplicationFactor, unsigned VF) {
+  SmallVector<int, 16> MaskVec;
   for (unsigned i = 0; i < VF; i++)
     for (unsigned j = 0; j < ReplicationFactor; j++)
-      MaskVec.push_back(Builder.getInt32(i));
+      MaskVec.push_back(i);
 
-  return ConstantVector::get(MaskVec);
+  return MaskVec;
 }
 
-Constant *llvm::createInterleaveMask(IRBuilderBase &Builder, unsigned VF,
-                                     unsigned NumVecs) {
-  SmallVector<Constant *, 16> Mask;
+llvm::SmallVector<int, 16> llvm::createInterleaveMask(unsigned VF,
+                                                      unsigned NumVecs) {
+  SmallVector<int, 16> Mask;
   for (unsigned i = 0; i < VF; i++)
     for (unsigned j = 0; j < NumVecs; j++)
-      Mask.push_back(Builder.getInt32(j * VF + i));
+      Mask.push_back(j * VF + i);
 
-  return ConstantVector::get(Mask);
+  return Mask;
 }
 
-Constant *llvm::createStrideMask(IRBuilderBase &Builder, unsigned Start,
-                                 unsigned Stride, unsigned VF) {
-  SmallVector<Constant *, 16> Mask;
+llvm::SmallVector<int, 16>
+llvm::createStrideMask(unsigned Start, unsigned Stride, unsigned VF) {
+  SmallVector<int, 16> Mask;
   for (unsigned i = 0; i < VF; i++)
-    Mask.push_back(Builder.getInt32(Start + i * Stride));
+    Mask.push_back(Start + i * Stride);
 
-  return ConstantVector::get(Mask);
+  return Mask;
 }
 
-Constant *llvm::createSequentialMask(IRBuilderBase &Builder, unsigned Start,
-                                     unsigned NumInts, unsigned NumUndefs) {
-  SmallVector<Constant *, 16> Mask;
+llvm::SmallVector<int, 16> llvm::createSequentialMask(unsigned Start,
+                                                      unsigned NumInts,
+                                                      unsigned NumUndefs) {
+  SmallVector<int, 16> Mask;
   for (unsigned i = 0; i < NumInts; i++)
-    Mask.push_back(Builder.getInt32(Start + i));
+    Mask.push_back(Start + i);
 
-  Constant *Undef = UndefValue::get(Builder.getInt32Ty());
   for (unsigned i = 0; i < NumUndefs; i++)
-    Mask.push_back(Undef);
+    Mask.push_back(-1);
 
-  return ConstantVector::get(Mask);
+  return Mask;
 }
 
 /// A helper function for concatenating vectors. This function concatenates two
@@ -820,13 +820,13 @@ static Value *concatenateTwoVectors(IRBuilderBase &Builder, Value *V1,
 
   if (NumElts1 > NumElts2) {
     // Extend with UNDEFs.
-    Constant *ExtMask =
-        createSequentialMask(Builder, 0, NumElts2, NumElts1 - NumElts2);
-    V2 = Builder.CreateShuffleVector(V2, UndefValue::get(VecTy2), ExtMask);
+    V2 = Builder.CreateShuffleVector(
+        V2, UndefValue::get(VecTy2),
+        createSequentialMask(0, NumElts2, NumElts1 - NumElts2));
   }
 
-  Constant *Mask = createSequentialMask(Builder, 0, NumElts1 + NumElts2, 0);
-  return Builder.CreateShuffleVector(V1, V2, Mask);
+  return Builder.CreateShuffleVector(
+      V1, V2, createSequentialMask(0, NumElts1 + NumElts2, 0));
 }
 
 Value *llvm::concatenateVectors(IRBuilderBase &Builder,
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 6311137d3ef2..f285b4b6ecf6 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -9557,7 +9557,7 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
       unsigned IdxI = StoreCount * LaneLen * Factor + i;
       if (Mask[IdxI] >= 0) {
         Ops.push_back(Builder.CreateShuffleVector(
-            Op0, Op1, createSequentialMask(Builder, Mask[IdxI], LaneLen, 0)));
+            Op0, Op1, createSequentialMask(Mask[IdxI], LaneLen, 0)));
       } else {
         unsigned StartMask = 0;
         for (unsigned j = 1; j < LaneLen; j++) {
@@ -9573,7 +9573,7 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
         // Note: StartMask cannot be negative, it's checked in
         // isReInterleaveMask
         Ops.push_back(Builder.CreateShuffleVector(
-            Op0, Op1, createSequentialMask(Builder, StartMask, LaneLen, 0)));
+            Op0, Op1, createSequentialMask(StartMask, LaneLen, 0)));
       }
     }
 
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 48071485ff95..e9ecc0589350 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -18016,7 +18016,7 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
       unsigned IdxI = StoreCount * LaneLen * Factor + i;
       if (Mask[IdxI] >= 0) {
         Shuffles.push_back(Builder.CreateShuffleVector(
-            Op0, Op1, createSequentialMask(Builder, Mask[IdxI], LaneLen, 0)));
+            Op0, Op1, createSequentialMask(Mask[IdxI], LaneLen, 0)));
       } else {
         unsigned StartMask = 0;
         for (unsigned j = 1; j < LaneLen; j++) {
@@ -18033,7 +18033,7 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI,
         // Note: StartMask cannot be negative, it's checked in
         // isReInterleaveMask
         Shuffles.push_back(Builder.CreateShuffleVector(
-            Op0, Op1, createSequentialMask(Builder, StartMask, LaneLen, 0)));
+            Op0, Op1, createSequentialMask(StartMask, LaneLen, 0)));
       }
     }
 
diff --git a/llvm/lib/Target/X86/X86InterleavedAccess.cpp b/llvm/lib/Target/X86/X86InterleavedAccess.cpp
index 81879b590b16..aa25cb9d2d08 100644
--- a/llvm/lib/Target/X86/X86InterleavedAccess.cpp
+++ b/llvm/lib/Target/X86/X86InterleavedAccess.cpp
@@ -186,8 +186,8 @@ void X86InterleavedAccessGroup::decompose(
       DecomposedVectors.push_back(
           cast<ShuffleVectorInst>(Builder.CreateShuffleVector(
               Op0, Op1,
-              createSequentialMask(Builder, Indices[i],
-                                   SubVecTy->getVectorNumElements(), 0))));
+              createSequentialMask(Indices[i], SubVecTy->getVectorNumElements(),
+                                   0))));
     return;
   }
 
diff --git a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
index a2ddf858cf08..86c560022deb 100644
--- a/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
+++ b/llvm/lib/Transforms/Scalar/LowerMatrixIntrinsics.cpp
@@ -328,9 +328,9 @@ class LowerMatrixIntrinsics {
                          IRBuilder<> &Builder) const {
       Value *Vec = isColumnMajor() ? getColumn(J) : getRow(I);
       Value *Undef = UndefValue::get(Vec->getType());
-      Constant *Mask =
-          createSequentialMask(Builder, isColumnMajor() ? I : J, NumElts, 0);
-      return Builder.CreateShuffleVector(Vec, Undef, Mask, "block");
+      return Builder.CreateShuffleVector(
+          Vec, Undef, createSequentialMask(isColumnMajor() ? I : J, NumElts, 0),
+          "block");
     }
   };
 
@@ -442,9 +442,9 @@ class LowerMatrixIntrinsics {
     Value *Undef = UndefValue::get(VType);
     for (unsigned MaskStart = 0; MaskStart < VType->getNumElements();
          MaskStart += SI.getStride()) {
-      Constant *Mask =
-          createSequentialMask(Builder, MaskStart, SI.getStride(), 0);
-      Value *V = Builder.CreateShuffleVector(MatrixVal, Undef, Mask, "split");
+      Value *V = Builder.CreateShuffleVector(
+          MatrixVal, Undef, createSequentialMask(MaskStart, SI.getStride(), 0),
+          "split");
       SplitVecs.push_back(V);
     }
 
@@ -909,10 +909,10 @@ class LowerMatrixIntrinsics {
     unsigned NumElts = cast<VectorType>(Col->getType())->getNumElements();
     assert(NumElts >= BlockNumElts && "Too few elements for current block");
 
-    Value *ExtendMask =
-        createSequentialMask(Builder, 0, BlockNumElts, NumElts - BlockNumElts);
     Value *Undef = UndefValue::get(Block->getType());
-    Block = Builder.CreateShuffleVector(Block, Undef, ExtendMask);
+    Block = Builder.CreateShuffleVector(
+        Block, Undef,
+        createSequentialMask(0, BlockNumElts, NumElts - BlockNumElts));
 
     // If Col is 7 long and I is 2 and BlockNumElts is 2 the mask is: 0, 1, 7,
     // 8, 4, 5, 6
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 35ed4c04455e..7eaab8b0b739 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -2253,9 +2253,9 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
         if (BlockInMask) {
           Value *BlockInMaskPart = State.get(BlockInMask, Part);
           auto *Undefs = UndefValue::get(BlockInMaskPart->getType());
-          auto *RepMask = createReplicatedMask(Builder, InterleaveFactor, VF);
           Value *ShuffledMask = Builder.CreateShuffleVector(
-              BlockInMaskPart, Undefs, RepMask, "interleaved.mask");
+              BlockInMaskPart, Undefs,
+              createReplicatedMask(InterleaveFactor, VF), "interleaved.mask");
           GroupMask = MaskForGaps
                           ? Builder.CreateBinOp(Instruction::And, ShuffledMask,
                                                 MaskForGaps)
@@ -2281,7 +2281,7 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
       if (!Member)
         continue;
 
-      Constant *StrideMask = createStrideMask(Builder, I, InterleaveFactor, VF);
+      auto StrideMask = createStrideMask(I, InterleaveFactor, VF);
       for (unsigned Part = 0; Part < UF; Part++) {
         Value *StridedVec = Builder.CreateShuffleVector(
             NewLoads[Part], UndefVec, StrideMask, "strided.vec");
@@ -2330,17 +2330,17 @@ void InnerLoopVectorizer::vectorizeInterleaveGroup(
     Value *WideVec = concatenateVectors(Builder, StoredVecs);
 
     // Interleave the elements in the wide vector.
-    Constant *IMask = createInterleaveMask(Builder, VF, InterleaveFactor);
-    Value *IVec = Builder.CreateShuffleVector(WideVec, UndefVec, IMask,
-                                              "interleaved.vec");
+    Value *IVec = Builder.CreateShuffleVector(
+        WideVec, UndefVec, createInterleaveMask(VF, InterleaveFactor),
+        "interleaved.vec");
 
     Instruction *NewStoreInstr;
     if (BlockInMask) {
       Value *BlockInMaskPart = State.get(BlockInMask, Part);
       auto *Undefs = UndefValue::get(BlockInMaskPart->getType());
-      auto *RepMask = createReplicatedMask(Builder, InterleaveFactor, VF);
       Value *ShuffledMask = Builder.CreateShuffleVector(
-          BlockInMaskPart, Undefs, RepMask, "interleaved.mask");
+          BlockInMaskPart, Undefs, createReplicatedMask(InterleaveFactor, VF),
+          "interleaved.mask");
       NewStoreInstr = Builder.CreateMaskedStore(
           IVec, AddrParts[Part], Group->getAlign(), ShuffledMask);
     }

From 96712d6ef2c970ca3f5562be23d78d7df6360b4d Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad@amd.com>
Date: Fri, 17 Apr 2020 10:28:47 +0100
Subject: [PATCH 165/216] [AMDGPU] Simplify SIRegisterInfo::getRegSplitParts

Summary:
Use more logic and fewer tables. This reduces the line count and
reduces the effort required to introduce more register classes of
different sizes in future.

Reviewers: arsenm, rampitec, nhaehnle

Subscribers: kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D78351
---
 llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 231 +++++-----------------
 1 file changed, 54 insertions(+), 177 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index a2f7fa04c9ec..4ff5960acf03 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1558,191 +1558,68 @@ SIRegisterInfo::findUnusedRegister(const MachineRegisterInfo &MRI,
 
 ArrayRef<int16_t> SIRegisterInfo::getRegSplitParts(const TargetRegisterClass *RC,
                                                    unsigned EltSize) const {
-  if (EltSize == 4) {
-    static const int16_t Sub0_31[] = {
-      AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
-      AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
-      AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
-      AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
-      AMDGPU::sub16, AMDGPU::sub17, AMDGPU::sub18, AMDGPU::sub19,
-      AMDGPU::sub20, AMDGPU::sub21, AMDGPU::sub22, AMDGPU::sub23,
-      AMDGPU::sub24, AMDGPU::sub25, AMDGPU::sub26, AMDGPU::sub27,
-      AMDGPU::sub28, AMDGPU::sub29, AMDGPU::sub30, AMDGPU::sub31,
-    };
-
-    static const int16_t Sub0_15[] = {
-      AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
-      AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
-      AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
-      AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
-    };
-
-    static const int16_t Sub0_7[] = {
-      AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
-      AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
-    };
-
-    static const int16_t Sub0_4[] = {
-      AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3, AMDGPU::sub4,
-    };
-
-    static const int16_t Sub0_3[] = {
-      AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
-    };
-
-    static const int16_t Sub0_2[] = {
-      AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2,
-    };
-
-    static const int16_t Sub0_1[] = {
-      AMDGPU::sub0, AMDGPU::sub1,
-    };
-
-    switch (AMDGPU::getRegBitWidth(*RC->MC)) {
-    case 32:
-      return {};
-    case 64:
-      return makeArrayRef(Sub0_1);
-    case 96:
-      return makeArrayRef(Sub0_2);
-    case 128:
-      return makeArrayRef(Sub0_3);
-    case 160:
-      return makeArrayRef(Sub0_4);
-    case 256:
-      return makeArrayRef(Sub0_7);
-    case 512:
-      return makeArrayRef(Sub0_15);
-    case 1024:
-      return makeArrayRef(Sub0_31);
-    default:
-      llvm_unreachable("unhandled register size");
-    }
-  }
+  const unsigned RegBitWidth = AMDGPU::getRegBitWidth(*RC->MC);
+  assert(RegBitWidth >= 32 && RegBitWidth <= 1024);
 
-  if (EltSize == 8) {
-    static const int16_t Sub0_31_64[] = {
-      AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
-      AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
-      AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
-      AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
-      AMDGPU::sub16_sub17, AMDGPU::sub18_sub19,
-      AMDGPU::sub20_sub21, AMDGPU::sub22_sub23,
-      AMDGPU::sub24_sub25, AMDGPU::sub26_sub27,
-      AMDGPU::sub28_sub29, AMDGPU::sub30_sub31
-    };
-
-    static const int16_t Sub0_15_64[] = {
-      AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
-      AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
-      AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
-      AMDGPU::sub12_sub13, AMDGPU::sub14_sub15
-    };
-
-    static const int16_t Sub0_7_64[] = {
-      AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
-      AMDGPU::sub4_sub5, AMDGPU::sub6_sub7
-    };
-
-
-    static const int16_t Sub0_3_64[] = {
-      AMDGPU::sub0_sub1, AMDGPU::sub2_sub3
-    };
-
-    switch (AMDGPU::getRegBitWidth(*RC->MC)) {
-    case 64:
-      return {};
-    case 128:
-      return makeArrayRef(Sub0_3_64);
-    case 256:
-      return makeArrayRef(Sub0_7_64);
-    case 512:
-      return makeArrayRef(Sub0_15_64);
-    case 1024:
-      return makeArrayRef(Sub0_31_64);
-    default:
-      llvm_unreachable("unhandled register size");
-    }
-  }
+  const unsigned EltBitWidth = EltSize * 8;
+  assert(EltBitWidth >= 32 && EltBitWidth < 1024 && isPowerOf2_32(EltBitWidth));
+  const unsigned LogEltBitWidth = Log2_32(EltBitWidth);
 
-  if (EltSize == 16) {
-
-    static const int16_t Sub0_31_128[] = {
-      AMDGPU::sub0_sub1_sub2_sub3,
-      AMDGPU::sub4_sub5_sub6_sub7,
-      AMDGPU::sub8_sub9_sub10_sub11,
-      AMDGPU::sub12_sub13_sub14_sub15,
-      AMDGPU::sub16_sub17_sub18_sub19,
-      AMDGPU::sub20_sub21_sub22_sub23,
-      AMDGPU::sub24_sub25_sub26_sub27,
-      AMDGPU::sub28_sub29_sub30_sub31
-    };
-
-    static const int16_t Sub0_15_128[] = {
-      AMDGPU::sub0_sub1_sub2_sub3,
-      AMDGPU::sub4_sub5_sub6_sub7,
-      AMDGPU::sub8_sub9_sub10_sub11,
-      AMDGPU::sub12_sub13_sub14_sub15
-    };
-
-    static const int16_t Sub0_7_128[] = {
-      AMDGPU::sub0_sub1_sub2_sub3,
-      AMDGPU::sub4_sub5_sub6_sub7
-    };
-
-    switch (AMDGPU::getRegBitWidth(*RC->MC)) {
-    case 128:
-      return {};
-    case 256:
-      return makeArrayRef(Sub0_7_128);
-    case 512:
-      return makeArrayRef(Sub0_15_128);
-    case 1024:
-      return makeArrayRef(Sub0_31_128);
-    default:
-      llvm_unreachable("unhandled register size");
-    }
-  }
+  assert(RegBitWidth % EltBitWidth == 0);
 
-  if (EltSize == 32) {
-    static const int16_t Sub0_31_256[] = {
-      AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7,
-      AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15,
-      AMDGPU::sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23,
-      AMDGPU::sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31
-    };
-
-    static const int16_t Sub0_15_256[] = {
-      AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7,
-      AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15
-    };
-
-    switch (AMDGPU::getRegBitWidth(*RC->MC)) {
-    case 256:
-      return {};
-    case 512:
-      return makeArrayRef(Sub0_15_256);
-    case 1024:
-      return makeArrayRef(Sub0_31_256);
-    default:
-      llvm_unreachable("unhandled register size");
-    }
-  }
+  if (RegBitWidth == EltBitWidth)
+    return {};
+
+  static const int16_t Sub_32[] = {
+    AMDGPU::sub0, AMDGPU::sub1, AMDGPU::sub2, AMDGPU::sub3,
+    AMDGPU::sub4, AMDGPU::sub5, AMDGPU::sub6, AMDGPU::sub7,
+    AMDGPU::sub8, AMDGPU::sub9, AMDGPU::sub10, AMDGPU::sub11,
+    AMDGPU::sub12, AMDGPU::sub13, AMDGPU::sub14, AMDGPU::sub15,
+    AMDGPU::sub16, AMDGPU::sub17, AMDGPU::sub18, AMDGPU::sub19,
+    AMDGPU::sub20, AMDGPU::sub21, AMDGPU::sub22, AMDGPU::sub23,
+    AMDGPU::sub24, AMDGPU::sub25, AMDGPU::sub26, AMDGPU::sub27,
+    AMDGPU::sub28, AMDGPU::sub29, AMDGPU::sub30, AMDGPU::sub31
+  };
+
+  static const int16_t Sub_64[] = {
+    AMDGPU::sub0_sub1, AMDGPU::sub2_sub3,
+    AMDGPU::sub4_sub5, AMDGPU::sub6_sub7,
+    AMDGPU::sub8_sub9, AMDGPU::sub10_sub11,
+    AMDGPU::sub12_sub13, AMDGPU::sub14_sub15,
+    AMDGPU::sub16_sub17, AMDGPU::sub18_sub19,
+    AMDGPU::sub20_sub21, AMDGPU::sub22_sub23,
+    AMDGPU::sub24_sub25, AMDGPU::sub26_sub27,
+    AMDGPU::sub28_sub29, AMDGPU::sub30_sub31
+  };
+
+  static const int16_t Sub_128[] = {
+    AMDGPU::sub0_sub1_sub2_sub3,
+    AMDGPU::sub4_sub5_sub6_sub7,
+    AMDGPU::sub8_sub9_sub10_sub11,
+    AMDGPU::sub12_sub13_sub14_sub15,
+    AMDGPU::sub16_sub17_sub18_sub19,
+    AMDGPU::sub20_sub21_sub22_sub23,
+    AMDGPU::sub24_sub25_sub26_sub27,
+    AMDGPU::sub28_sub29_sub30_sub31
+  };
+
+  static const int16_t Sub_256[] = {
+    AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7,
+    AMDGPU::sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15,
+    AMDGPU::sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23,
+    AMDGPU::sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31
+  };
 
-  assert(EltSize == 64 && "unhandled elt size");
-  static const int16_t Sub0_31_512[] = {
+  static const int16_t Sub_512[] = {
     AMDGPU::sub0_sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15,
     AMDGPU::sub16_sub17_sub18_sub19_sub20_sub21_sub22_sub23_sub24_sub25_sub26_sub27_sub28_sub29_sub30_sub31
   };
 
-  switch (AMDGPU::getRegBitWidth(*RC->MC)) {
-  case 512:
-    return {};
-  case 1024:
-    return makeArrayRef(Sub0_31_512);
-  default:
-    llvm_unreachable("unhandled register size");
-  }
+  static const int16_t *const Subs[] = {
+    Sub_32, Sub_64, Sub_128, Sub_256, Sub_512
+  };
+
+  return makeArrayRef(Subs[LogEltBitWidth - 5], RegBitWidth >> LogEltBitWidth);
 }
 
 const TargetRegisterClass*

From 77618cc237a9b2923b032a7f669f35148cb95c0a Mon Sep 17 00:00:00 2001
From: jasonliu <jasonliu.development@gmail.com>
Date: Thu, 16 Apr 2020 19:52:34 +0000
Subject: [PATCH 166/216] [XCOFF][AIX] Fix getSymbol to return the correct
 qualname when necessary

Summary:
AIX symbol have qualname and unqualified name. The stock getSymbol
could only return unqualified name, which leads us to patch many
caller side(lowerConstant, getMCSymbolForTOCPseudoMO).
So we should try to address this problem in the callee
side(getSymbol) and clean up the caller side instead.

Note: this is a "mostly" NFC patch, with a fix for the original
lowerConstant behavior.

Differential Revision: https://reviews.llvm.org/D78045
---
 .../CodeGen/TargetLoweringObjectFileImpl.h    |   4 +
 .../llvm/Target/TargetLoweringObjectFile.h    |   7 ++
 .../CodeGen/TargetLoweringObjectFileImpl.cpp  |  34 ++++++
 llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp     | 112 ++++--------------
 llvm/lib/Target/TargetMachine.cpp             |   4 +
 .../CodeGen/PowerPC/aix-xcoff-lower-comm.ll   |  95 +++++++++++++++
 6 files changed, 164 insertions(+), 92 deletions(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/aix-xcoff-lower-comm.ll

diff --git a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
index cf913a177672..3abae7bc721e 100644
--- a/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
+++ b/llvm/include/llvm/CodeGen/TargetLoweringObjectFileImpl.h
@@ -258,6 +258,10 @@ class TargetLoweringObjectFileXCOFF : public TargetLoweringObjectFile {
   MCSection *
   getSectionForExternalReference(const GlobalObject *GO,
                                  const TargetMachine &TM) const override;
+
+  /// For functions, this will always return a function descriptor symbol.
+  MCSymbol *getTargetSymbol(const GlobalValue *GV,
+                            const TargetMachine &TM) const override;
 };
 
 } // end namespace llvm
diff --git a/llvm/include/llvm/Target/TargetLoweringObjectFile.h b/llvm/include/llvm/Target/TargetLoweringObjectFile.h
index ffe0bfe74df8..b98332ecfaf9 100644
--- a/llvm/include/llvm/Target/TargetLoweringObjectFile.h
+++ b/llvm/include/llvm/Target/TargetLoweringObjectFile.h
@@ -237,6 +237,13 @@ class TargetLoweringObjectFile : public MCObjectFileInfo {
     return nullptr;
   }
 
+  /// Targets that have a special convention for their symbols could use
+  /// this hook to return a specialized symbol.
+  virtual MCSymbol *getTargetSymbol(const GlobalValue *GV,
+                                    const TargetMachine &TM) const {
+    return nullptr;
+  }
+
 protected:
   virtual MCSection *SelectSectionForGlobal(const GlobalObject *GO,
                                             SectionKind Kind,
diff --git a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index a5f380168c10..8ed3e8673a84 100644
--- a/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -1966,6 +1966,40 @@ MCSection *TargetLoweringObjectFileWasm::getStaticDtorSection(
 //===----------------------------------------------------------------------===//
 //                                  XCOFF
 //===----------------------------------------------------------------------===//
+MCSymbol *
+TargetLoweringObjectFileXCOFF::getTargetSymbol(const GlobalValue *GV,
+                                               const TargetMachine &TM) const {
+  if (TM.getDataSections())
+    report_fatal_error("XCOFF unique data sections not yet implemented");
+
+  // We always use a qualname symbol for a GV that represents
+  // a declaration, a function descriptor, or a common symbol.
+  // It is inherently ambiguous when the GO represents the address of a
+  // function, as the GO could either represent a function descriptor or a
+  // function entry point. We choose to always return a function descriptor
+  // here.
+  if (const GlobalObject *GO = dyn_cast<GlobalObject>(GV)) {
+    if (GO->isDeclaration())
+      return cast<MCSectionXCOFF>(getSectionForExternalReference(GO, TM))
+          ->getQualNameSymbol();
+
+    SectionKind GOKind = getKindForGlobal(GO, TM);
+    if (GOKind.isText())
+      return cast<MCSectionXCOFF>(
+                 getSectionForFunctionDescriptor(cast<Function>(GO), TM))
+          ->getQualNameSymbol();
+    if (GOKind.isCommon() || GOKind.isBSSLocal())
+      return cast<MCSectionXCOFF>(SectionForGlobal(GO, GOKind, TM))
+          ->getQualNameSymbol();
+  }
+
+  // For all other cases, fall back to getSymbol to return the unqualified name.
+  // This could change for a GV that is a GlobalVariable when we decide to
+  // support -fdata-sections since we could avoid having label symbols if the
+  // linkage name is applied to the csect symbol.
+  return nullptr;
+}
+
 MCSection *TargetLoweringObjectFileXCOFF::getExplicitSectionGlobal(
     const GlobalObject *GO, SectionKind Kind, const TargetMachine &TM) const {
   report_fatal_error("XCOFF explicit sections not yet implemented.");
diff --git a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
index 1d33a478e1a5..bc2d12fa0f1a 100644
--- a/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
+++ b/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp
@@ -83,8 +83,6 @@ class PPCAsmPrinter : public AsmPrinter {
   const PPCSubtarget *Subtarget = nullptr;
   StackMaps SM;
 
-  virtual MCSymbol *getMCSymbolForTOCPseudoMO(const MachineOperand &MO);
-
 public:
   explicit PPCAsmPrinter(TargetMachine &TM,
                          std::unique_ptr<MCStreamer> Streamer)
@@ -150,8 +148,6 @@ class PPCLinuxAsmPrinter : public PPCAsmPrinter {
 class PPCAIXAsmPrinter : public PPCAsmPrinter {
 private:
   static void ValidateGV(const GlobalVariable *GV);
-protected:
-  MCSymbol *getMCSymbolForTOCPseudoMO(const MachineOperand &MO) override;
 
 public:
   PPCAIXAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer)
@@ -161,8 +157,6 @@ class PPCAIXAsmPrinter : public PPCAsmPrinter {
 
   void SetupMachineFunction(MachineFunction &MF) override;
 
-  const MCExpr *lowerConstant(const Constant *CV) override;
-
   void emitGlobalVariable(const GlobalVariable *GV) override;
 
   void emitFunctionDescriptor() override;
@@ -494,16 +488,17 @@ void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
 
 /// Map a machine operand for a TOC pseudo-machine instruction to its
 /// corresponding MCSymbol.
-MCSymbol *PPCAsmPrinter::getMCSymbolForTOCPseudoMO(const MachineOperand &MO) {
+static MCSymbol *getMCSymbolForTOCPseudoMO(const MachineOperand &MO,
+                                           AsmPrinter &AP) {
   switch (MO.getType()) {
   case MachineOperand::MO_GlobalAddress:
-    return getSymbol(MO.getGlobal());
+    return AP.getSymbol(MO.getGlobal());
   case MachineOperand::MO_ConstantPoolIndex:
-    return GetCPISymbol(MO.getIndex());
+    return AP.GetCPISymbol(MO.getIndex());
   case MachineOperand::MO_JumpTableIndex:
-    return GetJTISymbol(MO.getIndex());
+    return AP.GetJTISymbol(MO.getIndex());
   case MachineOperand::MO_BlockAddress:
-    return GetBlockAddressSymbol(MO.getBlockAddress());
+    return AP.GetBlockAddressSymbol(MO.getBlockAddress());
   default:
     llvm_unreachable("Unexpected operand type to get symbol.");
   }
@@ -664,7 +659,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
            "Invalid operand for LWZtoc.");
 
     // Map the operand to its corresponding MCSymbol.
-    const MCSymbol *const MOSymbol = getMCSymbolForTOCPseudoMO(MO);
+    const MCSymbol *const MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
 
     // Create a reference to the GOT entry for the symbol. The GOT entry will be
     // synthesized later.
@@ -723,7 +718,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     // global address operand to be a reference to the TOC entry we will
     // synthesize later.
     MCSymbol *TOCEntry =
-        lookUpOrCreateTOCEntry(getMCSymbolForTOCPseudoMO(MO));
+        lookUpOrCreateTOCEntry(getMCSymbolForTOCPseudoMO(MO, *this));
 
     const MCSymbolRefExpr::VariantKind VK =
         IsAIX ? MCSymbolRefExpr::VK_None : MCSymbolRefExpr::VK_PPC_TOC;
@@ -749,7 +744,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
            "Invalid operand for ADDIStocHA.");
 
     // Map the machine operand to its corresponding MCSymbol.
-    MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO);
+    MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
 
     // Always use TOC on AIX. Map the global address operand to be a reference
     // to the TOC entry we will synthesize later. 'TOCEntry' is a label used to
@@ -779,7 +774,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
            "Invalid operand for LWZtocL.");
 
     // Map the machine operand to its corresponding MCSymbol.
-    MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO);
+    MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
 
     // Always use TOC on AIX. Map the global address operand to be a reference
     // to the TOC entry we will synthesize later. 'TOCEntry' is a label used to
@@ -807,7 +802,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
     assert((MO.isGlobal() || MO.isCPI() || MO.isJTI() || MO.isBlockAddress()) &&
            "Invalid operand for ADDIStocHA8!");
 
-    const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO);
+    const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
 
     const bool GlobalToc =
         MO.isGlobal() && Subtarget->isGVIndirectSymbol(MO.getGlobal());
@@ -851,7 +846,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
         "LDtocL used on symbol that could be accessed directly is "
         "invalid. Must match ADDIStocHA8."));
 
-    const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO);
+    const MCSymbol *MOSymbol = getMCSymbolForTOCPseudoMO(MO, *this);
 
     if (!MO.isCPI() || TM.getCodeModel() == CodeModel::Large)
       MOSymbol = lookUpOrCreateTOCEntry(MOSymbol);
@@ -881,7 +876,7 @@ void PPCAsmPrinter::emitInstruction(const MachineInstr *MI) {
         "Interposable definitions must use indirect access."));
 
     const MCExpr *Exp =
-        MCSymbolRefExpr::create(getMCSymbolForTOCPseudoMO(MO),
+        MCSymbolRefExpr::create(getMCSymbolForTOCPseudoMO(MO, *this),
                                 MCSymbolRefExpr::VK_PPC_TOC_LO, OutContext);
     TmpInst.getOperand(2) = MCOperand::createExpr(Exp);
     EmitToStreamer(*OutStreamer, TmpInst);
@@ -1599,18 +1594,6 @@ void PPCAIXAsmPrinter::ValidateGV(const GlobalVariable *GV) {
     report_fatal_error("COMDAT not yet supported by AIX.");
 }
 
-const MCExpr *PPCAIXAsmPrinter::lowerConstant(const Constant *CV) {
-  if (const Function *F = dyn_cast<Function>(CV)) {
-    MCSectionXCOFF *Csect = cast<MCSectionXCOFF>(
-        F->isDeclaration()
-            ? getObjFileLowering().getSectionForExternalReference(F, TM)
-            : getObjFileLowering().getSectionForFunctionDescriptor(F, TM));
-
-    return MCSymbolRefExpr::create(Csect->getQualNameSymbol(), OutContext);
-  }
-  return PPCAsmPrinter::lowerConstant(CV);
-}
-
 static bool isSpecialLLVMGlobalArrayForStaticInit(const GlobalVariable *GV) {
   return StringSwitch<bool>(GV->getName())
       .Cases("llvm.global_ctors", "llvm.global_dtors", true)
@@ -1632,25 +1615,18 @@ void PPCAIXAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
   GVSym->setStorageClass(
       TargetLoweringObjectFileXCOFF::getStorageClassForGlobal(GV));
 
-  SectionKind GVKind;
-
-  // Create the containing csect and set it. We set it for externals as well,
-  // since this may not have been set elsewhere depending on how they are used.
-  MCSectionXCOFF *Csect = cast<MCSectionXCOFF>(
-      GV->isDeclaration()
-          ? getObjFileLowering().getSectionForExternalReference(GV, TM)
-          : getObjFileLowering().SectionForGlobal(
-                GV, GVKind = getObjFileLowering().getKindForGlobal(GV, TM),
-                TM));
-
   // External global variables are already handled.
   if (GV->isDeclaration())
     return;
 
+  SectionKind GVKind = getObjFileLowering().getKindForGlobal(GV, TM);
   if (!GVKind.isGlobalWriteableData() && !GVKind.isReadOnly())
     report_fatal_error("Encountered a global variable kind that is "
                        "not supported yet.");
 
+  MCSectionXCOFF *Csect = cast<MCSectionXCOFF>(
+      getObjFileLowering().SectionForGlobal(GV, GVKind, TM));
+
   // Switch to the containing csect.
   OutStreamer->SwitchSection(Csect);
 
@@ -1664,9 +1640,10 @@ void PPCAIXAsmPrinter::emitGlobalVariable(const GlobalVariable *GV) {
 
     if (GVKind.isBSSLocal())
       OutStreamer->emitXCOFFLocalCommonSymbol(
-          GVSym, Size, Csect->getQualNameSymbol(), Align);
+          OutContext.getOrCreateSymbol(GVSym->getUnqualifiedName()), Size,
+          GVSym, Align);
     else
-      OutStreamer->emitCommonSymbol(Csect->getQualNameSymbol(), Size, Align);
+      OutStreamer->emitCommonSymbol(GVSym, Size, Align);
     return;
   }
 
@@ -1733,55 +1710,6 @@ void PPCAIXAsmPrinter::emitEndOfAsmFile(Module &M) {
   }
 }
 
-MCSymbol *
-PPCAIXAsmPrinter::getMCSymbolForTOCPseudoMO(const MachineOperand &MO) {
-  const GlobalObject *GO = nullptr;
-
-  // If the MO is a function or certain kind of globals, we want to make sure to
-  // refer to the csect symbol, otherwise we can just do the default handling.
-  if (MO.getType() != MachineOperand::MO_GlobalAddress ||
-      !(GO = dyn_cast<const GlobalObject>(MO.getGlobal())))
-    return PPCAsmPrinter::getMCSymbolForTOCPseudoMO(MO);
-
-  // Do an early error check for globals we don't support. This will go away
-  // eventually.
-  const auto *GV = dyn_cast<const GlobalVariable>(GO);
-  if (GV) {
-    ValidateGV(GV);
-  }
-
-  // If the global object is a global variable without initializer or is a
-  // declaration of a function, then XSym is an external referenced symbol.
-  // Hence we may need to explictly create a MCSectionXCOFF for it so that we
-  // can return its symbol later.
-  if (GO->isDeclaration()) {
-    return cast<MCSectionXCOFF>(
-               getObjFileLowering().getSectionForExternalReference(GO, TM))
-        ->getQualNameSymbol();
-  }
-
-  // Handle initialized global variables and defined functions.
-  SectionKind GOKind = getObjFileLowering().getKindForGlobal(GO, TM);
-
-  if (GOKind.isText()) {
-    // If the MO is a function, we want to make sure to refer to the function
-    // descriptor csect.
-    return cast<MCSectionXCOFF>(
-               getObjFileLowering().getSectionForFunctionDescriptor(
-                   cast<const Function>(GO), TM))
-        ->getQualNameSymbol();
-  } else if (GOKind.isCommon() || GOKind.isBSSLocal()) {
-    // If the operand is a common then we should refer to the csect symbol.
-    return cast<MCSectionXCOFF>(
-               getObjFileLowering().SectionForGlobal(GO, GOKind, TM))
-        ->getQualNameSymbol();
-  }
-
-  // Other global variables are refered to by labels inside of a single csect,
-  // so refer to the label directly.
-  return getSymbol(GV);
-}
-
 /// createPPCAsmPrinterPass - Returns a pass that prints the PPC assembly code
 /// for a MachineFunction to the given output stream, in a format that the
 /// Darwin assembler can deal with.
diff --git a/llvm/lib/Target/TargetMachine.cpp b/llvm/lib/Target/TargetMachine.cpp
index 4844a959bb46..1de6e871569c 100644
--- a/llvm/lib/Target/TargetMachine.cpp
+++ b/llvm/lib/Target/TargetMachine.cpp
@@ -258,6 +258,10 @@ void TargetMachine::getNameWithPrefix(SmallVectorImpl<char> &Name,
 
 MCSymbol *TargetMachine::getSymbol(const GlobalValue *GV) const {
   const TargetLoweringObjectFile *TLOF = getObjFileLowering();
+  // XCOFF symbols could have special naming convention.
+  if (MCSymbol *TargetSymbol = TLOF->getTargetSymbol(GV, *this))
+    return TargetSymbol;
+
   SmallString<128> NameStr;
   getNameWithPrefix(NameStr, GV, TLOF->getMangler());
   return TLOF->getContext().getOrCreateSymbol(NameStr);
diff --git a/llvm/test/CodeGen/PowerPC/aix-xcoff-lower-comm.ll b/llvm/test/CodeGen/PowerPC/aix-xcoff-lower-comm.ll
new file mode 100644
index 000000000000..a1dd7cf5b1a7
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/aix-xcoff-lower-comm.ll
@@ -0,0 +1,95 @@
+; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mtriple powerpc-ibm-aix-xcoff < %s | FileCheck --check-prefixes=CHECK,ASM32 %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mtriple powerpc64-ibm-aix-xcoff < %s | FileCheck --check-prefixes=CHECK,ASM64 %s
+
+; RUN: llc -verify-machineinstrs -mcpu=pwr4 -mtriple powerpc-ibm-aix-xcoff -filetype=obj -o %t.o < %s
+; RUN: llvm-readobj -r --expand-relocs -t %t.o | FileCheck --check-prefixes=RELOC,SYM %s
+
+; RUN: not --crash llc -verify-machineinstrs -mcpu=pwr4 -mtriple powerpc64-ibm-aix-xcoff -filetype=obj < %s 2>&1 | \
+; RUN: FileCheck --check-prefix=XCOFF64 %s
+; XCOFF64: LLVM ERROR: 64-bit XCOFF object files are not supported yet.
+
+@common = common global i32 0, align 4
+@pointer = global i32* @common, align 4
+
+
+; CHECK:             .comm   common[RW],4,2
+; CHECK-NEXT:        .csect .data[RW]
+; CHECK-NEXT:        .globl  pointer
+; ASM32-NEXT:        .align  2
+; ASM64-NEXT:        .align  3
+; CHECK-NEXT:pointer:
+; ASM32-NEXT:        .long   common[RW]
+; ASM64-NEXT:        .llong   common[RW]
+
+
+; RELOC:      Relocations [
+; RELOC-NEXT:   Section (index: {{[0-9]+}}) .data {
+; RELOC-NEXT:   Relocation {
+; RELOC-NEXT:     Virtual Address: 0x0
+; RELOC-NEXT:     Symbol: common ([[#COM_INDX:]])
+; RELOC-NEXT:     IsSigned: No
+; RELOC-NEXT:     FixupBitValue: 0
+; RELOC-NEXT:     Length: 32
+; RELOC-NEXT:     Type: R_POS (0x0)
+; RELOC-NEXT:   }
+; RELOC-NEXT: }
+; RELOC-NEXT: ]
+
+; SYM:        Symbol {{[{][[:space:]] *}}Index: [[#INDX:]]{{[[:space:]] *}}Name: .data
+; SYM-NEXT:     Value (RelocatableAddress): 0x0
+; SYM-NEXT:     Section: .data
+; SYM-NEXT:     Type: 0x0
+; SYM-NEXT:     StorageClass: C_HIDEXT (0x6B)
+; SYM-NEXT:     NumberOfAuxEntries: 1
+; SYM-NEXT:     CSECT Auxiliary Entry {
+; SYM-NEXT:       Index: [[#INDX+1]]
+; SYM-NEXT:       SectionLen: 4
+; SYM-NEXT:       ParameterHashIndex: 0x0
+; SYM-NEXT:       TypeChkSectNum: 0x0
+; SYM-NEXT:       SymbolAlignmentLog2: 2
+; SYM-NEXT:       SymbolType: XTY_SD (0x1)
+; SYM-NEXT:       StorageMappingClass: XMC_RW (0x5)
+; SYM-NEXT:       StabInfoIndex: 0x0
+; SYM-NEXT:       StabSectNum: 0x0
+; SYM-NEXT:     }
+; SYM-NEXT:   }
+; SYM-NEXT:   Symbol {
+; SYM-NEXT:     Index: [[#INDX+2]]
+; SYM-NEXT:     Name: pointer
+; SYM-NEXT:     Value (RelocatableAddress): 0x0
+; SYM-NEXT:     Section: .data
+; SYM-NEXT:     Type: 0x0
+; SYM-NEXT:     StorageClass: C_EXT (0x2)
+; SYM-NEXT:     NumberOfAuxEntries: 1
+; SYM-NEXT:     CSECT Auxiliary Entry {
+; SYM-NEXT:       Index: [[#INDX+3]]
+; SYM-NEXT:       ContainingCsectSymbolIndex: [[#INDX]]
+; SYM-NEXT:       ParameterHashIndex: 0x0
+; SYM-NEXT:       TypeChkSectNum: 0x0
+; SYM-NEXT:       SymbolAlignmentLog2: 0
+; SYM-NEXT:       SymbolType: XTY_LD (0x2)
+; SYM-NEXT:       StorageMappingClass: XMC_RW (0x5)
+; SYM-NEXT:       StabInfoIndex: 0x0
+; SYM-NEXT:       StabSectNum: 0x0
+; SYM-NEXT:     }
+; SYM-NEXT:   }
+; SYM-NEXT:   Symbol {
+; SYM-NEXT:     Index: [[#COM_INDX]]
+; SYM-NEXT:     Name: common
+; SYM-NEXT:     Value (RelocatableAddress): 0x4
+; SYM-NEXT:     Section: .bss
+; SYM-NEXT:     Type: 0x0
+; SYM-NEXT:     StorageClass: C_EXT (0x2)
+; SYM-NEXT:     NumberOfAuxEntries: 1
+; SYM-NEXT:     CSECT Auxiliary Entry {
+; SYM-NEXT:       Index: [[#COM_INDX+1]]
+; SYM-NEXT:       SectionLen: 4
+; SYM-NEXT:       ParameterHashIndex: 0x0
+; SYM-NEXT:       TypeChkSectNum: 0x0
+; SYM-NEXT:       SymbolAlignmentLog2: 2
+; SYM-NEXT:       SymbolType: XTY_CM (0x3)
+; SYM-NEXT:       StorageMappingClass: XMC_RW (0x5)
+; SYM-NEXT:       StabInfoIndex: 0x0
+; SYM-NEXT:       StabSectNum: 0x0
+; SYM-NEXT:     }
+; SYM-NEXT:   }

From e1c67273d53eaf0fe29b6c6fc69f31ff05dbde34 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne@apple.com>
Date: Fri, 17 Apr 2020 10:06:55 -0400
Subject: [PATCH 167/216] [libc++abi] NFC: Remove trailing whitespace

---
 libcxxabi/test/test_aux_runtime.pass.cpp | 36 ++++++++++++------------
 libcxxabi/test/test_vector2.pass.cpp     | 14 ++++-----
 libcxxabi/test/unwind_06.pass.cpp        | 30 ++++++++++----------
 3 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/libcxxabi/test/test_aux_runtime.pass.cpp b/libcxxabi/test/test_aux_runtime.pass.cpp
index ddc2630956bc..4f0c1748f96a 100644
--- a/libcxxabi/test/test_aux_runtime.pass.cpp
+++ b/libcxxabi/test/test_aux_runtime.pass.cpp
@@ -12,34 +12,34 @@
 #include <iostream>
 
 //  Test taken from 5.2.8.2
-//  When typeid is applied to a glvalue expression whose type is a polymorphic 
-//  class type, (10.3), the result refers to a std::type_info object 
-//  representing the type of the most derived object (1.8) (that is, the 
-//  dynamic type) to which the glvalue refers. If the glvalue expression is 
-//  obtained by applying the unary * operator to a pointer(68) and the pointer 
-//  is a null pointer value (4.10), the typeid expression throws the 
+//  When typeid is applied to a glvalue expression whose type is a polymorphic
+//  class type, (10.3), the result refers to a std::type_info object
+//  representing the type of the most derived object (1.8) (that is, the
+//  dynamic type) to which the glvalue refers. If the glvalue expression is
+//  obtained by applying the unary * operator to a pointer(68) and the pointer
+//  is a null pointer value (4.10), the typeid expression throws the
 //  std::bad_typeid exception (18.7.3).
 //
-//  68) If p is an expression of pointer type, then *p, (*p), *(p), 
+//  68) If p is an expression of pointer type, then *p, (*p), *(p),
 //      ((*p)), *((p)), and so on all meet this requirement.
 bool bad_typeid_test () {
-    class A { virtual void f() {}}; 
-    class B { virtual void g() {}}; 
-    
+    class A { virtual void f() {}};
+    class B { virtual void g() {}};
+
     B *bp = NULL;
     try {bool b = typeid(*bp) == typeid (A); ((void)b); }
     catch ( const std::bad_typeid &) { return true; }
     return false;
     }
-    
 
-//  The value of a failed cast to pointer type is the null pointer value of 
-//  the required result type. A failed cast to reference type throws 
+
+//  The value of a failed cast to pointer type is the null pointer value of
+//  the required result type. A failed cast to reference type throws
 //  std::bad_cast (18.7.2).
 bool bad_cast_test () {
     class A { virtual void f() {}};
     class B { virtual void g() {}};
-    class D : public virtual A, private B {};   
+    class D : public virtual A, private B {};
 
     D d;
     B *bp = (B*)&d;     // cast needed to break protection
@@ -47,19 +47,19 @@ bool bad_cast_test () {
     catch ( const std::bad_cast & ) { return true; }
     return false;
     }
-    
+
 int main ( ) {
     int ret_val = 0;
-    
+
     if ( !bad_typeid_test ()) {
         std::cerr << "TypeID test failed!" << std::endl;
         ret_val = 1;
     }
-    
+
     if ( !bad_cast_test ()) {
         std::cerr << "Bad cast test failed!" << std::endl;
         ret_val = 1;
     }
-    
+
     return ret_val;
     }
diff --git a/libcxxabi/test/test_vector2.pass.cpp b/libcxxabi/test/test_vector2.pass.cpp
index e08abbb5bd21..f923aa4ff215 100644
--- a/libcxxabi/test/test_vector2.pass.cpp
+++ b/libcxxabi/test/test_vector2.pass.cpp
@@ -18,18 +18,18 @@ void my_terminate () { exit ( 0 ); }
 //  Wrapper routines
 void *my_alloc2 ( size_t sz ) {
     void *p = std::malloc ( sz );
-//  std::printf ( "Allocated %ld bytes at %lx\n", sz, (unsigned long) p );  
+//  std::printf ( "Allocated %ld bytes at %lx\n", sz, (unsigned long) p );
     return p;
     }
-    
+
 void my_dealloc2 ( void *p ) {
-//  std::printf ( "Freeing %lx\n", (unsigned long) p ); 
-    std::free ( p ); 
+//  std::printf ( "Freeing %lx\n", (unsigned long) p );
+    std::free ( p );
     }
 
 void my_dealloc3 ( void *p, size_t ) {
-//  std::printf ( "Freeing %lx (size %ld)\n", (unsigned long) p, sz );  
-    std::free ( p ); 
+//  std::printf ( "Freeing %lx (size %ld)\n", (unsigned long) p, sz );
+    std::free ( p );
     }
 
 void my_construct ( void *) {
@@ -72,7 +72,7 @@ void test_exception_in_destructor ( ) {
         }
     catch ( int i ) {}
 
-    std::cerr << "should never get here" << std::endl;    
+    std::cerr << "should never get here" << std::endl;
     }
 
 
diff --git a/libcxxabi/test/unwind_06.pass.cpp b/libcxxabi/test/unwind_06.pass.cpp
index 7d67f52f8e02..a305d3d03fa2 100644
--- a/libcxxabi/test/unwind_06.pass.cpp
+++ b/libcxxabi/test/unwind_06.pass.cpp
@@ -136,7 +136,7 @@ double foo()
   double g = get(7);
   double h = get(8);
   try {
-    try1(true);    
+    try1(true);
   }
   catch (int e) {
   }
@@ -148,9 +148,9 @@ double foo()
   assert(f == get(6));
   assert(g == get(7));
   assert(h == get(8));
-  
+
   try {
-    try2(true);    
+    try2(true);
   }
   catch (int e) {
   }
@@ -162,9 +162,9 @@ double foo()
   assert(f == get(6));
   assert(g == get(7));
   assert(h == get(8));
-  
+
   try {
-    try3(true);    
+    try3(true);
   }
   catch (int e) {
   }
@@ -176,9 +176,9 @@ double foo()
   assert(f == get(6));
   assert(g == get(7));
   assert(h == get(8));
-  
+
   try {
-    try4(true);    
+    try4(true);
   }
   catch (int e) {
   }
@@ -190,9 +190,9 @@ double foo()
   assert(f == get(6));
   assert(g == get(7));
   assert(h == get(8));
-  
+
   try {
-    try5(true);    
+    try5(true);
   }
   catch (int e) {
   }
@@ -204,9 +204,9 @@ double foo()
   assert(f == get(6));
   assert(g == get(7));
   assert(h == get(8));
-  
+
   try {
-    try6(true);    
+    try6(true);
   }
   catch (int e) {
   }
@@ -218,9 +218,9 @@ double foo()
   assert(f == get(6));
   assert(g == get(7));
   assert(h == get(8));
-  
+
   try {
-    try7(true);    
+    try7(true);
   }
   catch (int e) {
   }
@@ -232,9 +232,9 @@ double foo()
   assert(f == get(6));
   assert(g == get(7));
   assert(h == get(8));
-  
+
   try {
-    try8(true);    
+    try8(true);
   }
   catch (int e) {
   }

From 61ba1481e200b5b35baa81ffcff81acb678e8508 Mon Sep 17 00:00:00 2001
From: Erich Keane <erich.keane@intel.com>
Date: Tue, 24 Dec 2019 07:28:40 -0800
Subject: [PATCH 168/216] Implement _ExtInt as an extended int type specifier.

Introduction/Motivation:
LLVM-IR supports integers of non-power-of-2 bitwidth, in the iN syntax.
Integers of non-power-of-two aren't particularly interesting or useful
on most hardware, so much so that no language in Clang has been
motivated to expose it before.

However, in the case of FPGA hardware normal integer types where the
full bitwidth isn't used, is extremely wasteful and has severe
performance/space concerns.  Because of this, Intel has introduced this
functionality in the High Level Synthesis compiler[0]
under the name "Arbitrary Precision Integer" (ap_int for short). This
has been extremely useful and effective for our users, permitting them
to optimize their storage and operation space on an architecture where
both can be extremely expensive.

We are proposing upstreaming a more palatable version of this to the
community, in the form of this proposal and accompanying patch.  We are
proposing the syntax _ExtInt(N).  We intend to propose this to the WG14
committee[1], and the underscore-capital seems like the active direction
for a WG14 paper's acceptance.  An alternative that Richard Smith
suggested on the initial review was __int(N), however we believe that
is much less acceptable by WG14.  We considered _Int, however _Int is
used as an identifier in libstdc++ and there is no good way to fall
back to an identifier (since _Int(5) is indistinguishable from an
unnamed initializer of a template type named _Int).

[0]https://www.intel.com/content/www/us/en/software/programmable/quartus-prime/hls-compiler.html)
[1]http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2472.pdf

Differential Revision: https://reviews.llvm.org/D73967
---
 clang/docs/LanguageExtensions.rst             |  53 +++
 clang/docs/ReleaseNotes.rst                   |   8 +
 clang/include/clang/AST/ASTContext.h          |  10 +
 clang/include/clang/AST/RecursiveASTVisitor.h |   9 +
 clang/include/clang/AST/Type.h                |  70 ++-
 clang/include/clang/AST/TypeLoc.h             |   6 +
 clang/include/clang/AST/TypeProperties.td     |  25 +
 .../clang/Basic/DiagnosticSemaKinds.td        |  14 +-
 clang/include/clang/Basic/Specifiers.h        |   1 +
 clang/include/clang/Basic/TokenKinds.def      |   1 +
 clang/include/clang/Basic/TypeNodes.td        |   2 +
 clang/include/clang/Parse/Parser.h            |   1 +
 clang/include/clang/Sema/DeclSpec.h           |   6 +-
 clang/include/clang/Sema/Sema.h               |   1 +
 .../clang/Serialization/TypeBitCodes.def      |   2 +
 clang/lib/AST/ASTContext.cpp                  |  67 +++
 clang/lib/AST/ASTStructuralEquivalence.cpp    |  18 +
 clang/lib/AST/ExprConstant.cpp                |   1 +
 clang/lib/AST/ItaniumMangle.cpp               |  24 +
 clang/lib/AST/MicrosoftMangle.cpp             |  24 +
 clang/lib/AST/Type.cpp                        |  63 ++-
 clang/lib/AST/TypePrinter.cpp                 |  24 +
 clang/lib/CodeGen/CGDebugInfo.cpp             |  13 +
 clang/lib/CodeGen/CGDebugInfo.h               |   1 +
 clang/lib/CodeGen/CGExprScalar.cpp            |  26 +-
 clang/lib/CodeGen/CGRecordLayoutBuilder.cpp   |   3 +-
 clang/lib/CodeGen/CodeGenFunction.cpp         |   2 +
 clang/lib/CodeGen/CodeGenTBAA.cpp             |   9 +
 clang/lib/CodeGen/CodeGenTypes.cpp            |  21 +-
 clang/lib/CodeGen/CodeGenTypes.h              |   2 +-
 clang/lib/CodeGen/ItaniumCXXABI.cpp           |   6 +-
 clang/lib/Parse/ParseDecl.cpp                 |  30 ++
 clang/lib/Parse/ParseExpr.cpp                 |   1 +
 clang/lib/Parse/ParseExprCXX.cpp              |  13 +
 clang/lib/Parse/ParseTentative.cpp            |  20 +
 clang/lib/Sema/DeclSpec.cpp                   |  28 +-
 clang/lib/Sema/SemaChecking.cpp               |   6 +
 clang/lib/Sema/SemaDecl.cpp                   |   8 +-
 clang/lib/Sema/SemaDeclAttr.cpp               |   5 +-
 clang/lib/Sema/SemaExpr.cpp                   |  15 +-
 clang/lib/Sema/SemaLookup.cpp                 |   1 +
 clang/lib/Sema/SemaTemplate.cpp               |  17 +-
 clang/lib/Sema/SemaTemplateDeduction.cpp      |  34 ++
 clang/lib/Sema/SemaTemplateVariadic.cpp       |   1 +
 clang/lib/Sema/SemaType.cpp                   |  66 +++
 clang/lib/Sema/TreeTransform.h                |  76 +++
 clang/lib/Serialization/ASTReader.cpp         |   9 +
 clang/lib/Serialization/ASTWriter.cpp         |   8 +
 clang/test/CodeGen/ext-int-sanitizer.cpp      | 265 +++++++++++
 clang/test/CodeGen/ext-int.c                  |  44 ++
 clang/test/CodeGenCXX/ext-int.cpp             | 432 ++++++++++++++++++
 clang/test/CodeGenOpenCL/ext-int-shift.cl     |  21 +
 clang/test/Parser/ext-int.cpp                 |  15 +
 clang/test/SemaCXX/ext-int.cpp                | 278 +++++++++++
 clang/tools/libclang/CIndex.cpp               |   2 +
 55 files changed, 1872 insertions(+), 36 deletions(-)
 create mode 100644 clang/test/CodeGen/ext-int-sanitizer.cpp
 create mode 100644 clang/test/CodeGen/ext-int.c
 create mode 100644 clang/test/CodeGenCXX/ext-int.cpp
 create mode 100644 clang/test/CodeGenOpenCL/ext-int-shift.cl
 create mode 100644 clang/test/Parser/ext-int.cpp
 create mode 100644 clang/test/SemaCXX/ext-int.cpp

diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 929cd1c67e73..07062a191ce8 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -3461,3 +3461,56 @@ Since the size of ``buffer`` can't be known at compile time, Clang will fold
 ``__builtin_object_size(buffer, 0)`` into ``-1``. However, if this was written
 as ``__builtin_dynamic_object_size(buffer, 0)``, Clang will fold it into
 ``size``, providing some extra runtime safety.
+
+Extended Integer Types
+======================
+
+Clang supports a set of extended integer types under the syntax ``_ExtInt(N)``
+where ``N`` is an integer that specifies the number of bits that are used to represent
+the type, including the sign bit. The keyword ``_ExtInt`` is a type specifier, thus
+it can be used in any place a type can, including as a non-type-template-parameter,
+as the type of a bitfield, and as the underlying type of an enumeration.
+
+An extended integer can be declared either signed, or unsigned by using the
+``signed``/``unsigned`` keywords. If no sign specifier is used or if the ``signed``
+keyword is used, the extended integer type is a signed integer and can represent
+negative values.
+
+The ``N`` expression is an integer constant expression, which specifies the number
+of bits used to represent the type, following normal integer representations for
+both signed and unsigned types. Both a signed and unsigned extended integer of the
+same ``N`` value will have the same number of bits in its representation. Many
+architectures don't have a way of representing non power-of-2 integers, so these
+architectures emulate these types using larger integers. In these cases, they are
+expected to follow the 'as-if' rule and do math 'as-if' they were done at the
+specified number of bits.
+
+In order to be consistent with the C language specification, and make the extended
+integer types useful for their intended purpose, extended integers follow the C
+standard integer conversion ranks. An extended integer type has a greater rank than
+any integer type with less precision.  However, they have lower rank than any
+of the built in or other integer types (such as __int128). Usual arithmetic conversions
+also work the same, where the smaller ranked integer is converted to the larger.
+
+The one exception to the C rules for integers for these types is Integer Promotion.
+Unary +, -, and ~ operators typically will promote operands to ``int``. Doing these
+promotions would inflate the size of required hardware on some platforms, so extended
+integer types aren't subject to the integer promotion rules in these cases.
+
+In languages (such as OpenCL) that define shift by-out-of-range behavior as a mask,
+non-power-of-two versions of these types use an unsigned remainder operation to constrain
+the value to the proper range, preventing undefined behavior.
+
+Extended integer types are aligned to the next greatest power-of-2 up to 64 bits.
+The size of these types for the purposes of layout and ``sizeof`` are the number of
+bits aligned to this calculated alignment. This permits the use of these types in
+allocated arrays using common ``sizeof(Array)/sizeof(ElementType)`` pattern.
+
+Extended integer types work with the C _Atomic type modifier, however only precisions
+that are powers-of-2 greater than 8 bit are accepted.
+
+Extended integer types align with existing calling conventions. They have the same size
+and alignment as the smallest basic type that can contain them. Types that are larger
+than 64 bits are handled in the same way as _int128 is handled; they are conceptually
+treated as struct of register size chunks. They number of chunks are the smallest
+number that can contain the types which does not necessarily mean a power-of-2 size.
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index a8163cad9fde..54deba7bbd0e 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -62,6 +62,14 @@ Non-comprehensive list of changes in this release
   in the Arm C Language Extensions.
 
 
+* clang adds support for a set of  extended integer types (``_ExtInt(N)``) that
+  permit non-power of 2 integers, exposing the LLVM integer types. Since a major
+  motivating use case for these types is to limit 'bit' usage, these types don't
+  automatically promote to 'int' when operations are done between two ``ExtInt(N)``
+  types, instead math occurs at the size of the largest ``ExtInt(N)`` type.
+
+
+
 New Compiler Flags
 ------------------
 
diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h
index ac742fefc109..dedbd857819d 100644
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -224,6 +224,8 @@ class ASTContext : public RefCountedBase<ASTContext> {
   mutable llvm::FoldingSet<AtomicType> AtomicTypes;
   llvm::FoldingSet<AttributedType> AttributedTypes;
   mutable llvm::FoldingSet<PipeType> PipeTypes;
+  mutable llvm::FoldingSet<ExtIntType> ExtIntTypes;
+  mutable llvm::FoldingSet<DependentExtIntType> DependentExtIntTypes;
 
   mutable llvm::FoldingSet<QualifiedTemplateName> QualifiedTemplateNames;
   mutable llvm::FoldingSet<DependentTemplateName> DependentTemplateNames;
@@ -1203,6 +1205,14 @@ class ASTContext : public RefCountedBase<ASTContext> {
   /// Return a write_only pipe type for the specified type.
   QualType getWritePipeType(QualType T) const;
 
+  /// Return an extended integer type with the specified signedness and bit
+  /// count.
+  QualType getExtIntType(bool Unsigned, unsigned NumBits) const;
+
+  /// Return a dependent extended integer type with the specified signedness and
+  /// bit count.
+  QualType getDependentExtIntType(bool Unsigned, Expr *BitsExpr) const;
+
   /// Gets the struct used to keep track of the extended descriptor for
   /// pointer to blocks.
   QualType getBlockDescriptorExtendedType() const;
diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h
index 85eb6259a419..b71f7994e2fa 100644
--- a/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -1115,6 +1115,10 @@ DEF_TRAVERSE_TYPE(AtomicType, { TRY_TO(TraverseType(T->getValueType())); })
 
 DEF_TRAVERSE_TYPE(PipeType, { TRY_TO(TraverseType(T->getElementType())); })
 
+DEF_TRAVERSE_TYPE(ExtIntType, {})
+DEF_TRAVERSE_TYPE(DependentExtIntType,
+                  { TRY_TO(TraverseStmt(T->getNumBitsExpr())); })
+
 #undef DEF_TRAVERSE_TYPE
 
 // ----------------- TypeLoc traversal -----------------
@@ -1385,6 +1389,11 @@ DEF_TRAVERSE_TYPELOC(AtomicType, { TRY_TO(TraverseTypeLoc(TL.getValueLoc())); })
 
 DEF_TRAVERSE_TYPELOC(PipeType, { TRY_TO(TraverseTypeLoc(TL.getValueLoc())); })
 
+DEF_TRAVERSE_TYPELOC(ExtIntType, {})
+DEF_TRAVERSE_TYPELOC(DependentExtIntType, {
+  TRY_TO(TraverseStmt(TL.getTypePtr()->getNumBitsExpr()));
+})
+
 #undef DEF_TRAVERSE_TYPELOC
 
 // ----------------- Decl traversal -----------------
diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h
index f78d9d7670a7..322b14ce641a 100644
--- a/clang/include/clang/AST/Type.h
+++ b/clang/include/clang/AST/Type.h
@@ -2101,6 +2101,7 @@ class alignas(8) Type : public ExtQualsTypeCommonBase {
   bool isOCLExtOpaqueType() const;              // Any OpenCL extension type
 
   bool isPipeType() const;                      // OpenCL pipe type
+  bool isExtIntType() const;                    // Extended Int Type
   bool isOpenCLSpecificType() const;            // Any OpenCL specific type
 
   /// Determines if this type, which must satisfy
@@ -6127,6 +6128,64 @@ class PipeType : public Type, public llvm::FoldingSetNode {
   bool isReadOnly() const { return isRead; }
 };
 
+/// A fixed int type of a specified bitwidth.
+class ExtIntType final : public Type, public llvm::FoldingSetNode {
+  friend class ASTContext;
+  unsigned IsUnsigned : 1;
+  unsigned NumBits : 24;
+
+protected:
+  ExtIntType(bool isUnsigned, unsigned NumBits);
+
+public:
+  bool isUnsigned() const { return IsUnsigned; }
+  bool isSigned() const { return !IsUnsigned; }
+  unsigned getNumBits() const { return NumBits; }
+
+  bool isSugared() const { return false; }
+  QualType desugar() const { return QualType(this, 0); }
+
+  void Profile(llvm::FoldingSetNodeID &ID) {
+    Profile(ID, isUnsigned(), getNumBits());
+  }
+
+  static void Profile(llvm::FoldingSetNodeID &ID, bool IsUnsigned,
+                      unsigned NumBits) {
+    ID.AddBoolean(IsUnsigned);
+    ID.AddInteger(NumBits);
+  }
+
+  static bool classof(const Type *T) { return T->getTypeClass() == ExtInt; }
+};
+
+class DependentExtIntType final : public Type, public llvm::FoldingSetNode {
+  friend class ASTContext;
+  const ASTContext &Context;
+  llvm::PointerIntPair<Expr*, 1, bool> ExprAndUnsigned;
+
+protected:
+  DependentExtIntType(const ASTContext &Context, bool IsUnsigned,
+                      Expr *NumBits);
+
+public:
+  bool isUnsigned() const;
+  bool isSigned() const { return !isUnsigned(); }
+  Expr *getNumBitsExpr() const;
+
+  bool isSugared() const { return false; }
+  QualType desugar() const { return QualType(this, 0); }
+
+  void Profile(llvm::FoldingSetNodeID &ID) {
+    Profile(ID, Context, isUnsigned(), getNumBitsExpr());
+  }
+  static void Profile(llvm::FoldingSetNodeID &ID, const ASTContext &Context,
+                      bool IsUnsigned, Expr *NumBitsExpr);
+
+  static bool classof(const Type *T) {
+    return T->getTypeClass() == DependentExtInt;
+  }
+};
+
 /// A qualifier set is used to build a set of qualifiers.
 class QualifierCollector : public Qualifiers {
 public:
@@ -6646,6 +6705,10 @@ inline bool Type::isPipeType() const {
   return isa<PipeType>(CanonicalType);
 }
 
+inline bool Type::isExtIntType() const {
+  return isa<ExtIntType>(CanonicalType);
+}
+
 #define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \
   inline bool Type::is##Id##Type() const { \
     return isSpecificBuiltinType(BuiltinType::Id); \
@@ -6741,7 +6804,7 @@ inline bool Type::isIntegerType() const {
     return IsEnumDeclComplete(ET->getDecl()) &&
       !IsEnumDeclScoped(ET->getDecl());
   }
-  return false;
+  return isExtIntType();
 }
 
 inline bool Type::isFixedPointType() const {
@@ -6798,7 +6861,8 @@ inline bool Type::isScalarType() const {
          isa<BlockPointerType>(CanonicalType) ||
          isa<MemberPointerType>(CanonicalType) ||
          isa<ComplexType>(CanonicalType) ||
-         isa<ObjCObjectPointerType>(CanonicalType);
+         isa<ObjCObjectPointerType>(CanonicalType) ||
+         isExtIntType();
 }
 
 inline bool Type::isIntegralOrEnumerationType() const {
@@ -6811,7 +6875,7 @@ inline bool Type::isIntegralOrEnumerationType() const {
   if (const auto *ET = dyn_cast<EnumType>(CanonicalType))
     return IsEnumDeclComplete(ET->getDecl());
 
-  return false;
+  return isExtIntType();
 }
 
 inline bool Type::isBooleanType() const {
diff --git a/clang/include/clang/AST/TypeLoc.h b/clang/include/clang/AST/TypeLoc.h
index 3fc53d823c37..2221485983b2 100644
--- a/clang/include/clang/AST/TypeLoc.h
+++ b/clang/include/clang/AST/TypeLoc.h
@@ -2450,6 +2450,12 @@ inline T TypeLoc::getAsAdjusted() const {
   }
   return Cur.getAs<T>();
 }
+class ExtIntTypeLoc final
+    : public InheritingConcreteTypeLoc<TypeSpecTypeLoc, ExtIntTypeLoc,
+                                        ExtIntType> {};
+class DependentExtIntTypeLoc final
+    : public InheritingConcreteTypeLoc<TypeSpecTypeLoc, DependentExtIntTypeLoc,
+                                        DependentExtIntType> {};
 
 } // namespace clang
 
diff --git a/clang/include/clang/AST/TypeProperties.td b/clang/include/clang/AST/TypeProperties.td
index 994f932170ae..12bc5a4ee8a3 100644
--- a/clang/include/clang/AST/TypeProperties.td
+++ b/clang/include/clang/AST/TypeProperties.td
@@ -833,3 +833,28 @@ let Class = PipeType in {
     return ctx.getPipeType(elementType, isReadOnly);
   }]>;
 }
+
+let Class = ExtIntType in {
+  def : Property<"isUnsigned", Bool> {
+    let Read = [{ node->isUnsigned() }];
+  }
+  def : Property <"numBits", UInt32> {
+    let Read = [{ node->getNumBits() }];
+  }
+
+  def : Creator<[{
+    return ctx.getExtIntType(isUnsigned, numBits);
+  }]>;
+}
+
+let Class = DependentExtIntType in {
+  def : Property<"isUnsigned", Bool> {
+    let Read = [{ node->isUnsigned() }];
+  }
+  def : Property <"numBitsExpr", ExprRef> {
+    let Read = [{ node->getNumBitsExpr() }];
+  }
+  def : Creator<[{
+    return ctx.getDependentExtIntType(isUnsigned, numBitsExpr);
+  }]>;
+}
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 35a7a05667fc..97ad1a6c7920 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -5947,10 +5947,12 @@ def err_block_return_missing_expr : Error<
   "non-void block should return a value">;
 def err_func_def_incomplete_result : Error<
   "incomplete result type %0 in function definition">;
-def err_atomic_specifier_bad_type : Error<
-  "_Atomic cannot be applied to "
-  "%select{incomplete |array |function |reference |atomic |qualified |sizeless |}0type "
-  "%1 %select{|||||||which is not trivially copyable}0">;
+def err_atomic_specifier_bad_type
+    : Error<"_Atomic cannot be applied to "
+            "%select{incomplete |array |function |reference |atomic |qualified "
+            "|sizeless ||integer |integer }0type "
+            "%1 %select{|||||||which is not trivially copyable|with less than "
+            "1 byte of precision|with a non power of 2 precision}0">;
 
 // Expressions.
 def select_unary_expr_or_type_trait_kind : TextSubstitution<
@@ -10711,4 +10713,8 @@ def warn_sycl_kernel_return_type : Warning<
   "function template with 'sycl_kernel' attribute must have a 'void' return type">,
   InGroup<IgnoredAttributes>;
 
+def err_ext_int_bad_size : Error<"%select{signed|unsigned}0 _ExtInt must "
+                                 "have a bit size of at least %select{2|1}0">;
+def err_ext_int_max_size : Error<"%select{signed|unsigned}0 _ExtInt of bit "
+                                 "sizes greater than %1 not supported">;
 } // end of sema component.
diff --git a/clang/include/clang/Basic/Specifiers.h b/clang/include/clang/Basic/Specifiers.h
index 73823dc01ec7..e6c2cb39566c 100644
--- a/clang/include/clang/Basic/Specifiers.h
+++ b/clang/include/clang/Basic/Specifiers.h
@@ -67,6 +67,7 @@ namespace clang {
     TST_char32,       // C++11 char32_t
     TST_int,
     TST_int128,
+    TST_extint,       // Extended Int types.
     TST_half,         // OpenCL half, ARM NEON __fp16
     TST_Float16,      // C11 extension ISO/IEC TS 18661-3
     TST_Accum,        // ISO/IEC JTC1 SC22 WG14 N1169 Extension
diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def
index 3b1062e48767..1da24a8fd38b 100644
--- a/clang/include/clang/Basic/TokenKinds.def
+++ b/clang/include/clang/Basic/TokenKinds.def
@@ -285,6 +285,7 @@ KEYWORD(goto                        , KEYALL)
 KEYWORD(if                          , KEYALL)
 KEYWORD(inline                      , KEYC99|KEYCXX|KEYGNU)
 KEYWORD(int                         , KEYALL)
+KEYWORD(_ExtInt                     , KEYALL)
 KEYWORD(long                        , KEYALL)
 KEYWORD(register                    , KEYALL)
 KEYWORD(restrict                    , KEYC99)
diff --git a/clang/include/clang/Basic/TypeNodes.td b/clang/include/clang/Basic/TypeNodes.td
index 96d9472a488a..cd15a498642f 100644
--- a/clang/include/clang/Basic/TypeNodes.td
+++ b/clang/include/clang/Basic/TypeNodes.td
@@ -104,3 +104,5 @@ def ObjCInterfaceType : TypeNode<ObjCObjectType>, LeafType;
 def ObjCObjectPointerType : TypeNode<Type>;
 def PipeType : TypeNode<Type>;
 def AtomicType : TypeNode<Type>;
+def ExtIntType : TypeNode<Type>;
+def DependentExtIntType : TypeNode<Type>, AlwaysDependent;
diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h
index 3f73a1b90268..b4e96a5b85de 100644
--- a/clang/include/clang/Parse/Parser.h
+++ b/clang/include/clang/Parse/Parser.h
@@ -2721,6 +2721,7 @@ class Parser : public CodeCompletionHandler {
                                 SourceLocation &EllipsisLoc);
   void ParseAlignmentSpecifier(ParsedAttributes &Attrs,
                                SourceLocation *endLoc = nullptr);
+  ExprResult ParseExtIntegerArgument();
 
   VirtSpecifiers::Specifier isCXX11VirtSpecifier(const Token &Tok) const;
   VirtSpecifiers::Specifier isCXX11VirtSpecifier() const {
diff --git a/clang/include/clang/Sema/DeclSpec.h b/clang/include/clang/Sema/DeclSpec.h
index 0e95e237e974..5bc13fe343f4 100644
--- a/clang/include/clang/Sema/DeclSpec.h
+++ b/clang/include/clang/Sema/DeclSpec.h
@@ -278,6 +278,7 @@ class DeclSpec {
   static const TST TST_char32 = clang::TST_char32;
   static const TST TST_int = clang::TST_int;
   static const TST TST_int128 = clang::TST_int128;
+  static const TST TST_extint = clang::TST_extint;
   static const TST TST_half = clang::TST_half;
   static const TST TST_float = clang::TST_float;
   static const TST TST_double = clang::TST_double;
@@ -413,7 +414,7 @@ class DeclSpec {
             T == TST_underlyingType || T == TST_atomic);
   }
   static bool isExprRep(TST T) {
-    return (T == TST_typeofExpr || T == TST_decltype);
+    return (T == TST_typeofExpr || T == TST_decltype || T == TST_extint);
   }
   static bool isTemplateIdRep(TST T) {
     return (T == TST_auto || T == TST_decltype_auto);
@@ -704,6 +705,9 @@ class DeclSpec {
   bool SetTypePipe(bool isPipe, SourceLocation Loc,
                        const char *&PrevSpec, unsigned &DiagID,
                        const PrintingPolicy &Policy);
+  bool SetExtIntType(SourceLocation KWLoc, Expr *BitWidth,
+                     const char *&PrevSpec, unsigned &DiagID,
+                     const PrintingPolicy &Policy);
   bool SetTypeSpecSat(SourceLocation Loc, const char *&PrevSpec,
                       unsigned &DiagID);
   bool SetTypeSpecError();
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index a1a0b854a85b..af58b0ec4e82 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -1678,6 +1678,7 @@ class Sema final {
                          SourceLocation Loc);
   QualType BuildWritePipeType(QualType T,
                          SourceLocation Loc);
+  QualType BuildExtIntType(bool IsUnsigned, Expr *BitWidth, SourceLocation Loc);
 
   TypeSourceInfo *GetTypeForDeclarator(Declarator &D, Scope *S);
   TypeSourceInfo *GetTypeForDeclaratorCast(Declarator &D, QualType FromTy);
diff --git a/clang/include/clang/Serialization/TypeBitCodes.def b/clang/include/clang/Serialization/TypeBitCodes.def
index 38c73ccb7daf..561c8869ead6 100644
--- a/clang/include/clang/Serialization/TypeBitCodes.def
+++ b/clang/include/clang/Serialization/TypeBitCodes.def
@@ -58,5 +58,7 @@ TYPE_BIT_CODE(DependentSizedExtVector, DEPENDENT_SIZED_EXT_VECTOR, 46)
 TYPE_BIT_CODE(DependentAddressSpace, DEPENDENT_ADDRESS_SPACE, 47)
 TYPE_BIT_CODE(DependentVector, DEPENDENT_SIZED_VECTOR, 48)
 TYPE_BIT_CODE(MacroQualified, MACRO_QUALIFIED, 49)
+TYPE_BIT_CODE(ExtInt, EXT_INT, 50)
+TYPE_BIT_CODE(DependentExtInt, DEPENDENT_EXT_INT, 51)
 
 #undef TYPE_BIT_CODE
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 34bb07cd3f78..8734dd390247 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -2180,6 +2180,15 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const {
     Align = toBits(Layout.getAlignment());
     break;
   }
+  case Type::ExtInt: {
+    const auto *EIT = cast<ExtIntType>(T);
+    Align =
+        std::min(static_cast<unsigned>(std::max(
+                     getCharWidth(), llvm::PowerOf2Ceil(EIT->getNumBits()))),
+                 Target->getLongLongAlign());
+    Width = llvm::alignTo(EIT->getNumBits(), Align);
+    break;
+  }
   case Type::Record:
   case Type::Enum: {
     const auto *TT = cast<TagType>(T);
@@ -3376,6 +3385,8 @@ QualType ASTContext::getVariableArrayDecayedType(QualType type) const {
   case Type::Auto:
   case Type::DeducedTemplateSpecialization:
   case Type::PackExpansion:
+  case Type::ExtInt:
+  case Type::DependentExtInt:
     llvm_unreachable("type should never be variably-modified");
 
   // These types can be variably-modified but should never need to
@@ -4070,6 +4081,39 @@ QualType ASTContext::getWritePipeType(QualType T) const {
   return getPipeType(T, false);
 }
 
+QualType ASTContext::getExtIntType(bool IsUnsigned, unsigned NumBits) const {
+  llvm::FoldingSetNodeID ID;
+  ExtIntType::Profile(ID, IsUnsigned, NumBits);
+
+  void *InsertPos = nullptr;
+  if (ExtIntType *EIT = ExtIntTypes.FindNodeOrInsertPos(ID, InsertPos))
+    return QualType(EIT, 0);
+
+  auto *New = new (*this, TypeAlignment) ExtIntType(IsUnsigned, NumBits);
+  ExtIntTypes.InsertNode(New, InsertPos);
+  Types.push_back(New);
+  return QualType(New, 0);
+}
+
+QualType ASTContext::getDependentExtIntType(bool IsUnsigned,
+                                            Expr *NumBitsExpr) const {
+  assert(NumBitsExpr->isInstantiationDependent() && "Only good for dependent");
+  llvm::FoldingSetNodeID ID;
+  DependentExtIntType::Profile(ID, *this, IsUnsigned, NumBitsExpr);
+
+  void *InsertPos = nullptr;
+  if (DependentExtIntType *Existing =
+          DependentExtIntTypes.FindNodeOrInsertPos(ID, InsertPos))
+    return QualType(Existing, 0);
+
+  auto *New = new (*this, TypeAlignment)
+      DependentExtIntType(*this, IsUnsigned, NumBitsExpr);
+  DependentExtIntTypes.InsertNode(New, InsertPos);
+
+  Types.push_back(New);
+  return QualType(New, 0);
+}
+
 #ifndef NDEBUG
 static bool NeedsInjectedClassNameType(const RecordDecl *D) {
   if (!isa<CXXRecordDecl>(D)) return false;
@@ -5905,6 +5949,11 @@ int ASTContext::getFloatingTypeSemanticOrder(QualType LHS, QualType RHS) const {
 unsigned ASTContext::getIntegerRank(const Type *T) const {
   assert(T->isCanonicalUnqualified() && "T should be canonicalized");
 
+  // Results in this 'losing' to any type of the same size, but winning if
+  // larger.
+  if (const auto *EIT = dyn_cast<ExtIntType>(T))
+    return 0 + (EIT->getNumBits() << 3);
+
   switch (cast<BuiltinType>(T)->getKind()) {
   default: llvm_unreachable("getIntegerRank(): not a built-in integer");
   case BuiltinType::Bool:
@@ -7288,6 +7337,7 @@ void ASTContext::getObjCEncodingForTypeImpl(QualType T, std::string &S,
     return;
 
   case Type::Pipe:
+  case Type::ExtInt:
 #define ABSTRACT_TYPE(KIND, BASE)
 #define TYPE(KIND, BASE)
 #define DEPENDENT_TYPE(KIND, BASE) \
@@ -9381,6 +9431,21 @@ QualType ASTContext::mergeTypes(QualType LHS, QualType RHS,
     assert(LHS != RHS &&
            "Equivalent pipe types should have already been handled!");
     return {};
+  case Type::ExtInt: {
+    // Merge two ext-int types, while trying to preserve typedef info.
+    bool LHSUnsigned  = LHS->castAs<ExtIntType>()->isUnsigned();
+    bool RHSUnsigned = RHS->castAs<ExtIntType>()->isUnsigned();
+    unsigned LHSBits = LHS->castAs<ExtIntType>()->getNumBits();
+    unsigned RHSBits = RHS->castAs<ExtIntType>()->getNumBits();
+
+    // Like unsigned/int, shouldn't have a type if they dont match.
+    if (LHSUnsigned != RHSUnsigned)
+      return {};
+
+    if (LHSBits != RHSBits)
+      return {};
+    return LHS;
+  }
   }
 
   llvm_unreachable("Invalid Type::Class!");
@@ -9521,6 +9586,8 @@ unsigned ASTContext::getIntWidth(QualType T) const {
     T = ET->getDecl()->getIntegerType();
   if (T->isBooleanType())
     return 1;
+  if(const auto *EIT = T->getAs<ExtIntType>())
+    return EIT->getNumBits();
   // For builtin types, just use the standard type sizing method
   return (unsigned)getTypeSize(T);
 }
diff --git a/clang/lib/AST/ASTStructuralEquivalence.cpp b/clang/lib/AST/ASTStructuralEquivalence.cpp
index c29b7b2f5907..c562830c41e1 100644
--- a/clang/lib/AST/ASTStructuralEquivalence.cpp
+++ b/clang/lib/AST/ASTStructuralEquivalence.cpp
@@ -949,6 +949,24 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context,
                                   cast<PipeType>(T2)->getElementType()))
       return false;
     break;
+  case Type::ExtInt: {
+    const auto *Int1 = cast<ExtIntType>(T1);
+    const auto *Int2 = cast<ExtIntType>(T2);
+
+    if (Int1->isUnsigned() != Int2->isUnsigned() ||
+        Int1->getNumBits() != Int2->getNumBits())
+      return false;
+    break;
+  }
+  case Type::DependentExtInt: {
+    const auto *Int1 = cast<DependentExtIntType>(T1);
+    const auto *Int2 = cast<DependentExtIntType>(T2);
+
+    if (Int1->isUnsigned() != Int2->isUnsigned() ||
+        !IsStructurallyEquivalent(Context, Int1->getNumBitsExpr(),
+                                  Int2->getNumBitsExpr()))
+      return false;
+  }
   } // end switch
 
   return true;
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 5b3866d0a471..8bc7a1128e7a 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -10354,6 +10354,7 @@ EvaluateBuiltinClassifyType(QualType T, const LangOptions &LangOpts) {
   case Type::ObjCInterface:
   case Type::ObjCObjectPointer:
   case Type::Pipe:
+  case Type::ExtInt:
     // GCC classifies vectors as None. We follow its lead and classify all
     // other types that don't fit into the regular classification the same way.
     return GCCTypeClass::None;
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index 539b655fdf6a..4a45847c9425 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -2093,6 +2093,8 @@ bool CXXNameMangler::mangleUnresolvedTypeOrSimpleId(QualType Ty,
   case Type::Atomic:
   case Type::Pipe:
   case Type::MacroQualified:
+  case Type::ExtInt:
+  case Type::DependentExtInt:
     llvm_unreachable("type is illegal as a nested name specifier");
 
   case Type::SubstTemplateTypeParmPack:
@@ -3551,6 +3553,28 @@ void CXXNameMangler::mangleType(const PipeType *T) {
   Out << "8ocl_pipe";
 }
 
+void CXXNameMangler::mangleType(const ExtIntType *T) {
+  Out << "U7_ExtInt";
+  llvm::APSInt BW(32, true);
+  BW = T->getNumBits();
+  TemplateArgument TA(Context.getASTContext(), BW, getASTContext().IntTy);
+  mangleTemplateArgs(&TA, 1);
+  if (T->isUnsigned())
+    Out << "j";
+  else
+    Out << "i";
+}
+
+void CXXNameMangler::mangleType(const DependentExtIntType *T) {
+  Out << "U7_ExtInt";
+  TemplateArgument TA(T->getNumBitsExpr());
+  mangleTemplateArgs(&TA, 1);
+  if (T->isUnsigned())
+    Out << "j";
+  else
+    Out << "i";
+}
+
 void CXXNameMangler::mangleIntegerLiteral(QualType T,
                                           const llvm::APSInt &Value) {
   //  <expr-primary> ::= L <type> <value number> E # integer literal
diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp
index af51ae07bc57..dc5c15fbef68 100644
--- a/clang/lib/AST/MicrosoftMangle.cpp
+++ b/clang/lib/AST/MicrosoftMangle.cpp
@@ -2953,6 +2953,30 @@ void MicrosoftMangleContextImpl::mangleCXXName(GlobalDecl GD,
   return Mangler.mangle(D);
 }
 
+void MicrosoftCXXNameMangler::mangleType(const ExtIntType *T, Qualifiers,
+                                         SourceRange Range) {
+  llvm::SmallString<64> TemplateMangling;
+  llvm::raw_svector_ostream Stream(TemplateMangling);
+  MicrosoftCXXNameMangler Extra(Context, Stream);
+  Stream << "?$";
+  if (T->isUnsigned())
+    Extra.mangleSourceName("_UExtInt");
+  else
+    Extra.mangleSourceName("_ExtInt");
+  Extra.mangleIntegerLiteral(llvm::APSInt::getUnsigned(T->getNumBits()),
+                             /*IsBoolean=*/false);
+
+  mangleArtificialTagType(TTK_Struct, TemplateMangling, {"__clang"});
+}
+
+void MicrosoftCXXNameMangler::mangleType(const DependentExtIntType *T,
+                                         Qualifiers, SourceRange Range) {
+  DiagnosticsEngine &Diags = Context.getDiags();
+  unsigned DiagID = Diags.getCustomDiagID(
+      DiagnosticsEngine::Error, "cannot mangle this DependentExtInt type yet");
+  Diags.Report(Range.getBegin(), DiagID) << Range;
+}
+
 // <this-adjustment> ::= <no-adjustment> | <static-adjustment> |
 //                       <virtual-adjustment>
 // <no-adjustment>      ::= A # private near
diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp
index 3428437c3146..982aa8962f03 100644
--- a/clang/lib/AST/Type.cpp
+++ b/clang/lib/AST/Type.cpp
@@ -293,6 +293,39 @@ VectorType::VectorType(TypeClass tc, QualType vecType, unsigned nElements,
   VectorTypeBits.NumElements = nElements;
 }
 
+ExtIntType::ExtIntType(bool IsUnsigned, unsigned NumBits)
+    : Type(ExtInt, QualType{}, TypeDependence::None), IsUnsigned(IsUnsigned),
+      NumBits(NumBits) {}
+
+DependentExtIntType::DependentExtIntType(const ASTContext &Context,
+                                         bool IsUnsigned, Expr *NumBitsExpr)
+    : Type(DependentExtInt, QualType{},
+           ((NumBitsExpr->isValueDependent() || NumBitsExpr->isTypeDependent())
+                ? TypeDependence::Dependent
+                : TypeDependence::None) |
+               (NumBitsExpr->isInstantiationDependent()
+                    ? TypeDependence::Instantiation
+                    : TypeDependence::None) |
+               (NumBitsExpr->containsUnexpandedParameterPack()
+                    ? TypeDependence::VariablyModified
+                    : TypeDependence::None)),
+      Context(Context), ExprAndUnsigned(NumBitsExpr, IsUnsigned) {}
+
+bool DependentExtIntType::isUnsigned() const {
+  return ExprAndUnsigned.getInt();
+}
+
+clang::Expr *DependentExtIntType::getNumBitsExpr() const {
+  return ExprAndUnsigned.getPointer();
+}
+
+void DependentExtIntType::Profile(llvm::FoldingSetNodeID &ID,
+                                  const ASTContext &Context, bool IsUnsigned,
+                                  Expr *NumBitsExpr) {
+  ID.AddBoolean(IsUnsigned);
+  NumBitsExpr->Profile(ID, Context, true);
+}
+
 /// getArrayElementTypeNoTypeQual - If this is an array type, return the
 /// element type of the array, potentially with type qualifiers missing.
 /// This method should never be used when type qualifiers are meaningful.
@@ -1836,13 +1869,17 @@ bool Type::isIntegralType(const ASTContext &Ctx) const {
     if (const auto *ET = dyn_cast<EnumType>(CanonicalType))
       return ET->getDecl()->isComplete();
 
-  return false;
+  return isExtIntType();
 }
 
 bool Type::isIntegralOrUnscopedEnumerationType() const {
   if (const auto *BT = dyn_cast<BuiltinType>(CanonicalType))
     return BT->getKind() >= BuiltinType::Bool &&
            BT->getKind() <= BuiltinType::Int128;
+
+  if (isExtIntType())
+    return true;
+
   return isUnscopedEnumerationType();
 }
 
@@ -1923,6 +1960,9 @@ bool Type::isSignedIntegerType() const {
       return ET->getDecl()->getIntegerType()->isSignedIntegerType();
   }
 
+  if (const ExtIntType *IT = dyn_cast<ExtIntType>(CanonicalType))
+    return IT->isSigned();
+
   return false;
 }
 
@@ -1937,6 +1977,10 @@ bool Type::isSignedIntegerOrEnumerationType() const {
       return ET->getDecl()->getIntegerType()->isSignedIntegerType();
   }
 
+  if (const ExtIntType *IT = dyn_cast<ExtIntType>(CanonicalType))
+    return IT->isSigned();
+
+
   return false;
 }
 
@@ -1963,6 +2007,9 @@ bool Type::isUnsignedIntegerType() const {
       return ET->getDecl()->getIntegerType()->isUnsignedIntegerType();
   }
 
+  if (const ExtIntType *IT = dyn_cast<ExtIntType>(CanonicalType))
+    return IT->isUnsigned();
+
   return false;
 }
 
@@ -1977,6 +2024,9 @@ bool Type::isUnsignedIntegerOrEnumerationType() const {
       return ET->getDecl()->getIntegerType()->isUnsignedIntegerType();
   }
 
+  if (const ExtIntType *IT = dyn_cast<ExtIntType>(CanonicalType))
+    return IT->isUnsigned();
+
   return false;
 }
 
@@ -2015,7 +2065,7 @@ bool Type::isRealType() const {
            BT->getKind() <= BuiltinType::Float128;
   if (const auto *ET = dyn_cast<EnumType>(CanonicalType))
       return ET->getDecl()->isComplete() && !ET->getDecl()->isScoped();
-  return false;
+  return isExtIntType();
 }
 
 bool Type::isArithmeticType() const {
@@ -2030,7 +2080,7 @@ bool Type::isArithmeticType() const {
     // false for scoped enumerations since that will disable any
     // unwanted implicit conversions.
     return !ET->getDecl()->isScoped() && ET->getDecl()->isComplete();
-  return isa<ComplexType>(CanonicalType);
+  return isa<ComplexType>(CanonicalType) || isExtIntType();
 }
 
 Type::ScalarTypeKind Type::getScalarTypeKind() const {
@@ -2059,6 +2109,8 @@ Type::ScalarTypeKind Type::getScalarTypeKind() const {
     if (CT->getElementType()->isRealFloatingType())
       return STK_FloatingComplex;
     return STK_IntegralComplex;
+  } else if (isExtIntType()) {
+    return STK_Integral;
   }
 
   llvm_unreachable("unknown scalar type");
@@ -2224,6 +2276,7 @@ bool QualType::isCXX98PODType(const ASTContext &Context) const {
   case Type::MemberPointer:
   case Type::Vector:
   case Type::ExtVector:
+  case Type::ExtInt:
     return true;
 
   case Type::Enum:
@@ -3643,6 +3696,7 @@ static CachedProperties computeCachedProperties(const Type *T) {
     // here in error recovery.
     return CachedProperties(ExternalLinkage, false);
 
+  case Type::ExtInt:
   case Type::Builtin:
     // C++ [basic.link]p8:
     //   A type is said to have linkage if and only if:
@@ -3740,6 +3794,7 @@ LinkageInfo LinkageComputer::computeTypeLinkageInfo(const Type *T) {
     assert(T->isInstantiationDependentType());
     return LinkageInfo::external();
 
+  case Type::ExtInt:
   case Type::Builtin:
     return LinkageInfo::external();
 
@@ -3948,6 +4003,8 @@ bool Type::canHaveNullability(bool ResultIfUnknown) const {
   case Type::ObjCInterface:
   case Type::Atomic:
   case Type::Pipe:
+  case Type::ExtInt:
+  case Type::DependentExtInt:
     return false;
   }
   llvm_unreachable("bad type kind!");
diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp
index 4cc0d735ed6a..f000e1f6c932 100644
--- a/clang/lib/AST/TypePrinter.cpp
+++ b/clang/lib/AST/TypePrinter.cpp
@@ -227,6 +227,8 @@ bool TypePrinter::canPrefixQualifiers(const Type *T,
     case Type::ObjCInterface:
     case Type::Atomic:
     case Type::Pipe:
+    case Type::ExtInt:
+    case Type::DependentExtInt:
       CanPrefixQualifiers = true;
       break;
 
@@ -1114,6 +1116,28 @@ void TypePrinter::printPipeBefore(const PipeType *T, raw_ostream &OS) {
 
 void TypePrinter::printPipeAfter(const PipeType *T, raw_ostream &OS) {}
 
+void TypePrinter::printExtIntBefore(const ExtIntType *T, raw_ostream &OS) {
+  if (T->isUnsigned())
+    OS << "unsigned ";
+  OS << "_ExtInt(" << T->getNumBits() << ")";
+  spaceBeforePlaceHolder(OS);
+}
+
+void TypePrinter::printExtIntAfter(const ExtIntType *T, raw_ostream &OS) {}
+
+void TypePrinter::printDependentExtIntBefore(const DependentExtIntType *T,
+                                             raw_ostream &OS) {
+  if (T->isUnsigned())
+    OS << "unsigned ";
+  OS << "_ExtInt(";
+  T->getNumBitsExpr()->printPretty(OS, nullptr, Policy);
+  OS << ")";
+  spaceBeforePlaceHolder(OS);
+}
+
+void TypePrinter::printDependentExtIntAfter(const DependentExtIntType *T,
+                                            raw_ostream &OS) {}
+
 /// Appends the given scope to the end of a string.
 void TypePrinter::AppendScope(DeclContext *DC, raw_ostream &OS) {
   if (DC->isTranslationUnit()) return;
diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index 4ea3fbca2144..e6422a7ff1c3 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -826,6 +826,17 @@ llvm::DIType *CGDebugInfo::CreateType(const AutoType *Ty) {
   return DBuilder.createUnspecifiedType("auto");
 }
 
+llvm::DIType *CGDebugInfo::CreateType(const ExtIntType *Ty) {
+
+  StringRef Name = Ty->isUnsigned() ? "unsigned _ExtInt" : "_ExtInt";
+  llvm::dwarf::TypeKind Encoding = Ty->isUnsigned()
+                                       ? llvm::dwarf::DW_ATE_unsigned
+                                       : llvm::dwarf::DW_ATE_signed;
+
+  return DBuilder.createBasicType(Name, CGM.getContext().getTypeSize(Ty),
+                                  Encoding);
+}
+
 llvm::DIType *CGDebugInfo::CreateType(const ComplexType *Ty) {
   // Bit size and offset of the type.
   llvm::dwarf::TypeKind Encoding = llvm::dwarf::DW_ATE_complex_float;
@@ -3159,6 +3170,8 @@ llvm::DIType *CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile *Unit) {
   case Type::Atomic:
     return CreateType(cast<AtomicType>(Ty), Unit);
 
+  case Type::ExtInt:
+    return CreateType(cast<ExtIntType>(Ty));
   case Type::Pipe:
     return CreateType(cast<PipeType>(Ty), Unit);
 
diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h
index 4915e19753c6..34164fbec90e 100644
--- a/clang/lib/CodeGen/CGDebugInfo.h
+++ b/clang/lib/CodeGen/CGDebugInfo.h
@@ -168,6 +168,7 @@ class CGDebugInfo {
   llvm::DIType *CreateType(const BuiltinType *Ty);
   llvm::DIType *CreateType(const ComplexType *Ty);
   llvm::DIType *CreateType(const AutoType *Ty);
+  llvm::DIType *CreateType(const ExtIntType *Ty);
   llvm::DIType *CreateQualifiedType(QualType Ty, llvm::DIFile *Fg);
   llvm::DIType *CreateType(const TypedefType *Ty, llvm::DIFile *Fg);
   llvm::DIType *CreateType(const TemplateSpecializationType *Ty,
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 97e96941ec2f..343c62273ec4 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -760,6 +760,11 @@ class ScalarExprEmitter
                                                   llvm::Value *Zero,bool isDiv);
   // Common helper for getting how wide LHS of shift is.
   static Value *GetWidthMinusOneValue(Value* LHS,Value* RHS);
+
+  // Used for shifting constraints for OpenCL, do mask for powers of 2, URem for
+  // non powers of two.
+  Value *ConstrainShiftValue(Value *LHS, Value *RHS, const Twine &Name);
+
   Value *EmitDiv(const BinOpInfo &Ops);
   Value *EmitRem(const BinOpInfo &Ops);
   Value *EmitAdd(const BinOpInfo &Ops);
@@ -3770,6 +3775,21 @@ Value *ScalarExprEmitter::GetWidthMinusOneValue(Value* LHS,Value* RHS) {
   return llvm::ConstantInt::get(RHS->getType(), Ty->getBitWidth() - 1);
 }
 
+Value *ScalarExprEmitter::ConstrainShiftValue(Value *LHS, Value *RHS,
+                                              const Twine &Name) {
+  llvm::IntegerType *Ty;
+  if (auto *VT = dyn_cast<llvm::VectorType>(LHS->getType()))
+    Ty = cast<llvm::IntegerType>(VT->getElementType());
+  else
+    Ty = cast<llvm::IntegerType>(LHS->getType());
+
+  if (llvm::isPowerOf2_64(Ty->getBitWidth()))
+        return Builder.CreateAnd(RHS, GetWidthMinusOneValue(LHS, RHS), Name);
+
+  return Builder.CreateURem(
+      RHS, llvm::ConstantInt::get(RHS->getType(), Ty->getBitWidth()), Name);
+}
+
 Value *ScalarExprEmitter::EmitShl(const BinOpInfo &Ops) {
   // LLVM requires the LHS and RHS to be the same type: promote or truncate the
   // RHS to the same size as the LHS.
@@ -3784,8 +3804,7 @@ Value *ScalarExprEmitter::EmitShl(const BinOpInfo &Ops) {
   bool SanitizeExponent = CGF.SanOpts.has(SanitizerKind::ShiftExponent);
   // OpenCL 6.3j: shift values are effectively % word size of LHS.
   if (CGF.getLangOpts().OpenCL)
-    RHS =
-        Builder.CreateAnd(RHS, GetWidthMinusOneValue(Ops.LHS, RHS), "shl.mask");
+    RHS = ConstrainShiftValue(Ops.LHS, RHS, "shl.mask");
   else if ((SanitizeBase || SanitizeExponent) &&
            isa<llvm::IntegerType>(Ops.LHS->getType())) {
     CodeGenFunction::SanitizerScope SanScope(&CGF);
@@ -3847,8 +3866,7 @@ Value *ScalarExprEmitter::EmitShr(const BinOpInfo &Ops) {
 
   // OpenCL 6.3j: shift values are effectively % word size of LHS.
   if (CGF.getLangOpts().OpenCL)
-    RHS =
-        Builder.CreateAnd(RHS, GetWidthMinusOneValue(Ops.LHS, RHS), "shr.mask");
+    RHS = ConstrainShiftValue(Ops.LHS, RHS, "shr.mask");
   else if (CGF.SanOpts.has(SanitizerKind::ShiftExponent) &&
            isa<llvm::IntegerType>(Ops.LHS->getType())) {
     CodeGenFunction::SanitizerScope SanScope(&CGF);
diff --git a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
index 4de64a32f2ac..75af05623b03 100644
--- a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
+++ b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
@@ -385,7 +385,8 @@ CGRecordLowering::accumulateBitFields(RecordDecl::field_iterator Field,
         Run = FieldEnd;
         continue;
       }
-      llvm::Type *Type = Types.ConvertTypeForMem(Field->getType());
+      llvm::Type *Type =
+          Types.ConvertTypeForMem(Field->getType(), /*ForBitFields=*/true);
       // If we don't have a run yet, or don't live within the previous run's
       // allocated storage then we allocate some storage and start a new run.
       if (Run == FieldEnd || BitOffset >= Tail) {
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index 05bf70e5cb22..9929c154e37b 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -257,6 +257,7 @@ TypeEvaluationKind CodeGenFunction::getEvaluationKind(QualType type) {
     case Type::Enum:
     case Type::ObjCObjectPointer:
     case Type::Pipe:
+    case Type::ExtInt:
       return TEK_Scalar;
 
     // Complexes.
@@ -2010,6 +2011,7 @@ void CodeGenFunction::EmitVariablyModifiedType(QualType type) {
     case Type::ObjCObject:
     case Type::ObjCInterface:
     case Type::ObjCObjectPointer:
+    case Type::ExtInt:
       llvm_unreachable("type class is never variably-modified!");
 
     case Type::Adjusted:
diff --git a/clang/lib/CodeGen/CodeGenTBAA.cpp b/clang/lib/CodeGen/CodeGenTBAA.cpp
index 8cc8c162dfbe..f4ebe6885675 100644
--- a/clang/lib/CodeGen/CodeGenTBAA.cpp
+++ b/clang/lib/CodeGen/CodeGenTBAA.cpp
@@ -209,6 +209,15 @@ llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) {
     return createScalarTypeNode(OutName, getChar(), Size);
   }
 
+  if (const auto *EIT = dyn_cast<ExtIntType>(Ty)) {
+    SmallString<256> OutName;
+    llvm::raw_svector_ostream Out(OutName);
+    // Don't specify signed/unsigned since integer types can alias despite sign
+    // differences.
+    Out << "_ExtInt(" << EIT->getNumBits() << ')';
+    return createScalarTypeNode(OutName, getChar(), Size);
+  }
+
   // For now, handle any other kind of type conservatively.
   return getChar();
 }
diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp
index 29adc2c7adb3..d6d84a3ff051 100644
--- a/clang/lib/CodeGen/CodeGenTypes.cpp
+++ b/clang/lib/CodeGen/CodeGenTypes.cpp
@@ -83,19 +83,19 @@ void CodeGenTypes::addRecordTypeName(const RecordDecl *RD,
 /// ConvertType in that it is used to convert to the memory representation for
 /// a type.  For example, the scalar representation for _Bool is i1, but the
 /// memory representation is usually i8 or i32, depending on the target.
-llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T) {
+llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T, bool ForBitField) {
   llvm::Type *R = ConvertType(T);
 
-  // If this is a non-bool type, don't map it.
-  if (!R->isIntegerTy(1))
-    return R;
+  // If this is a bool type, or an ExtIntType in a bitfield representation,
+  // map this integer to the target-specified size.
+  if ((ForBitField && T->isExtIntType()) || R->isIntegerTy(1))
+    return llvm::IntegerType::get(getLLVMContext(),
+                                  (unsigned)Context.getTypeSize(T));
 
-  // Otherwise, return an integer of the target-specified size.
-  return llvm::IntegerType::get(getLLVMContext(),
-                                (unsigned)Context.getTypeSize(T));
+  // Else, don't map it.
+  return R;
 }
 
-
 /// isRecordLayoutComplete - Return true if the specified type is already
 /// completely laid out.
 bool CodeGenTypes::isRecordLayoutComplete(const Type *Ty) const {
@@ -731,6 +731,11 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
     ResultType = CGM.getOpenCLRuntime().getPipeType(cast<PipeType>(Ty));
     break;
   }
+  case Type::ExtInt: {
+    const auto &EIT = cast<ExtIntType>(Ty);
+    ResultType = llvm::Type::getIntNTy(getLLVMContext(), EIT->getNumBits());
+    break;
+  }
   }
 
   assert(ResultType && "Didn't convert a type?");
diff --git a/clang/lib/CodeGen/CodeGenTypes.h b/clang/lib/CodeGen/CodeGenTypes.h
index 03102329507e..394e2fdf8d65 100644
--- a/clang/lib/CodeGen/CodeGenTypes.h
+++ b/clang/lib/CodeGen/CodeGenTypes.h
@@ -134,7 +134,7 @@ class CodeGenTypes {
   /// ConvertType in that it is used to convert to the memory representation for
   /// a type.  For example, the scalar representation for _Bool is i1, but the
   /// memory representation is usually i8 or i32, depending on the target.
-  llvm::Type *ConvertTypeForMem(QualType T);
+  llvm::Type *ConvertTypeForMem(QualType T, bool ForBitField = false);
 
   /// GetFunctionType - Get the LLVM function type for \arg Info.
   llvm::FunctionType *GetFunctionType(const CGFunctionInfo &Info);
diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp
index c8a73c2757ab..4a591cf7aac5 100644
--- a/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -3219,6 +3219,7 @@ void ItaniumRTTIBuilder::BuildVTablePointer(const Type *Ty) {
     llvm_unreachable("Pipe types shouldn't get here");
 
   case Type::Builtin:
+  case Type::ExtInt:
   // GCC treats vector and complex types as fundamental types.
   case Type::Vector:
   case Type::ExtVector:
@@ -3472,7 +3473,10 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(
     llvm_unreachable("Undeduced type shouldn't get here");
 
   case Type::Pipe:
-    llvm_unreachable("Pipe type shouldn't get here");
+    break;
+
+  case Type::ExtInt:
+    break;
 
   case Type::ConstantArray:
   case Type::IncompleteArray:
diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp
index 8bd7571f1242..fe00199c1f8f 100644
--- a/clang/lib/Parse/ParseDecl.cpp
+++ b/clang/lib/Parse/ParseDecl.cpp
@@ -2880,6 +2880,25 @@ void Parser::ParseAlignmentSpecifier(ParsedAttributes &Attrs,
                ParsedAttr::AS_Keyword, EllipsisLoc);
 }
 
+ExprResult Parser::ParseExtIntegerArgument() {
+  assert(Tok.is(tok::kw__ExtInt) && "Not an extended int type");
+  ConsumeToken();
+
+  BalancedDelimiterTracker T(*this, tok::l_paren);
+  if (T.expectAndConsume())
+    return ExprError();
+
+  ExprResult ER = ParseConstantExpression();
+  if (ER.isInvalid()) {
+    T.skipToEnd();
+    return ExprError();
+  }
+
+  if(T.consumeClose())
+    return ExprError();
+  return ER;
+}
+
 /// Determine whether we're looking at something that might be a declarator
 /// in a simple-declaration. If it can't possibly be a declarator, maybe
 /// diagnose a missing semicolon after a prior tag definition in the decl
@@ -3807,6 +3826,14 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
       isInvalid = DS.SetTypeSpecType(DeclSpec::TST_int, Loc, PrevSpec,
                                      DiagID, Policy);
       break;
+    case tok::kw__ExtInt: {
+      ExprResult ER = ParseExtIntegerArgument();
+      if (ER.isInvalid())
+        continue;
+      isInvalid = DS.SetExtIntType(Loc, ER.get(), PrevSpec, DiagID, Policy);
+      ConsumedEnd = PrevTokLocation;
+      break;
+    }
     case tok::kw___int128:
       isInvalid = DS.SetTypeSpecType(DeclSpec::TST_int128, Loc, PrevSpec,
                                      DiagID, Policy);
@@ -4890,6 +4917,7 @@ bool Parser::isKnownToBeTypeSpecifier(const Token &Tok) const {
   case tok::kw_char16_t:
   case tok::kw_char32_t:
   case tok::kw_int:
+  case tok::kw__ExtInt:
   case tok::kw_half:
   case tok::kw_float:
   case tok::kw_double:
@@ -4969,6 +4997,7 @@ bool Parser::isTypeSpecifierQualifier() {
   case tok::kw_char16_t:
   case tok::kw_char32_t:
   case tok::kw_int:
+  case tok::kw__ExtInt:
   case tok::kw_half:
   case tok::kw_float:
   case tok::kw_double:
@@ -5135,6 +5164,7 @@ bool Parser::isDeclarationSpecifier(bool DisambiguatingWithExpression) {
   case tok::kw_char32_t:
 
   case tok::kw_int:
+  case tok::kw__ExtInt:
   case tok::kw_half:
   case tok::kw_float:
   case tok::kw_double:
diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp
index 0c6939b04319..29e583fcb84e 100644
--- a/clang/lib/Parse/ParseExpr.cpp
+++ b/clang/lib/Parse/ParseExpr.cpp
@@ -1492,6 +1492,7 @@ ExprResult Parser::ParseCastExpression(CastParseKind ParseKind,
   case tok::kw_long:
   case tok::kw___int64:
   case tok::kw___int128:
+  case tok::kw__ExtInt:
   case tok::kw_signed:
   case tok::kw_unsigned:
   case tok::kw_half:
diff --git a/clang/lib/Parse/ParseExprCXX.cpp b/clang/lib/Parse/ParseExprCXX.cpp
index c5e895d090a5..32e9370b0e00 100644
--- a/clang/lib/Parse/ParseExprCXX.cpp
+++ b/clang/lib/Parse/ParseExprCXX.cpp
@@ -2156,6 +2156,19 @@ void Parser::ParseCXXSimpleTypeSpecifier(DeclSpec &DS) {
     return;
   }
 
+  case tok::kw__ExtInt: {
+    ExprResult ER = ParseExtIntegerArgument();
+    if (ER.isInvalid())
+      DS.SetTypeSpecError();
+    else
+      DS.SetExtIntType(Loc, ER.get(), PrevSpec, DiagID, Policy);
+
+    // Do this here because we have already consumed the close paren.
+    DS.SetRangeEnd(PrevTokLocation);
+    DS.Finish(Actions, Policy);
+    return;
+  }
+
   // builtin types
   case tok::kw_short:
     DS.SetTypeSpecWidth(DeclSpec::TSW_short, Loc, PrevSpec, DiagID, Policy);
diff --git a/clang/lib/Parse/ParseTentative.cpp b/clang/lib/Parse/ParseTentative.cpp
index 529e3f321054..61a82664bf71 100644
--- a/clang/lib/Parse/ParseTentative.cpp
+++ b/clang/lib/Parse/ParseTentative.cpp
@@ -1141,6 +1141,7 @@ Parser::isExpressionOrTypeSpecifierSimple(tok::TokenKind Kind) {
   case tok::kw_half:
   case tok::kw_float:
   case tok::kw_int:
+  case tok::kw__ExtInt:
   case tok::kw_long:
   case tok::kw___int64:
   case tok::kw___int128:
@@ -1778,6 +1779,24 @@ Parser::isCXXDeclarationSpecifier(Parser::TPResult BracedCastResult,
   case tok::kw__Atomic:
     return TPResult::True;
 
+  case tok::kw__ExtInt: {
+    if (NextToken().isNot(tok::l_paren))
+      return TPResult::Error;
+    RevertingTentativeParsingAction PA(*this);
+    ConsumeToken();
+    ConsumeParen();
+
+    if (!SkipUntil(tok::r_paren, StopAtSemi))
+      return TPResult::Error;
+
+    if (Tok.is(tok::l_paren))
+      return TPResult::Ambiguous;
+
+    if (getLangOpts().CPlusPlus11 && Tok.is(tok::l_brace))
+      return BracedCastResult;
+
+    return TPResult::True;
+  }
   default:
     return TPResult::False;
   }
@@ -1810,6 +1829,7 @@ bool Parser::isCXXDeclarationSpecifierAType() {
   case tok::kw_bool:
   case tok::kw_short:
   case tok::kw_int:
+  case tok::kw__ExtInt:
   case tok::kw_long:
   case tok::kw___int64:
   case tok::kw___int128:
diff --git a/clang/lib/Sema/DeclSpec.cpp b/clang/lib/Sema/DeclSpec.cpp
index ae4a78a4556d..276e35a3497e 100644
--- a/clang/lib/Sema/DeclSpec.cpp
+++ b/clang/lib/Sema/DeclSpec.cpp
@@ -360,6 +360,7 @@ bool Declarator::isDeclarationOfFunction() const {
     case TST_half:
     case TST_int:
     case TST_int128:
+    case TST_extint:
     case TST_struct:
     case TST_interface:
     case TST_union:
@@ -538,6 +539,7 @@ const char *DeclSpec::getSpecifierName(DeclSpec::TST T,
   case DeclSpec::TST_char32:      return "char32_t";
   case DeclSpec::TST_int:         return "int";
   case DeclSpec::TST_int128:      return "__int128";
+  case DeclSpec::TST_extint:      return "_ExtInt";
   case DeclSpec::TST_half:        return "half";
   case DeclSpec::TST_float:       return "float";
   case DeclSpec::TST_double:      return "double";
@@ -913,6 +915,27 @@ bool DeclSpec::SetTypeSpecError() {
   return false;
 }
 
+bool DeclSpec::SetExtIntType(SourceLocation KWLoc, Expr *BitsExpr,
+                             const char *&PrevSpec, unsigned &DiagID,
+                             const PrintingPolicy &Policy) {
+  assert(BitsExpr && "no expression provided!");
+  if (TypeSpecType == TST_error)
+    return false;
+
+  if (TypeSpecType != TST_unspecified) {
+    PrevSpec = DeclSpec::getSpecifierName((TST) TypeSpecType, Policy);
+    DiagID = diag::err_invalid_decl_spec_combination;
+    return true;
+  }
+
+  TypeSpecType = TST_extint;
+  ExprRep = BitsExpr;
+  TSTLoc = KWLoc;
+  TSTNameLoc = KWLoc;
+  TypeSpecOwned = false;
+  return false;
+}
+
 bool DeclSpec::SetTypeQual(TQ T, SourceLocation Loc, const char *&PrevSpec,
                            unsigned &DiagID, const LangOptions &Lang) {
   // Duplicates are permitted in C99 onwards, but are not permitted in C89 or
@@ -1194,7 +1217,7 @@ void DeclSpec::Finish(Sema &S, const PrintingPolicy &Policy) {
       TypeSpecType = TST_int; // unsigned -> unsigned int, signed -> signed int.
     else if (TypeSpecType != TST_int && TypeSpecType != TST_int128 &&
              TypeSpecType != TST_char && TypeSpecType != TST_wchar &&
-             !IsFixedPointType) {
+             !IsFixedPointType && TypeSpecType != TST_extint) {
       S.Diag(TSSLoc, diag::err_invalid_sign_spec)
         << getSpecifierName((TST)TypeSpecType, Policy);
       // signed double -> double.
@@ -1241,7 +1264,8 @@ void DeclSpec::Finish(Sema &S, const PrintingPolicy &Policy) {
                               S.getLocForEndOfToken(getTypeSpecComplexLoc()),
                                                  " double");
       TypeSpecType = TST_double;   // _Complex -> _Complex double.
-    } else if (TypeSpecType == TST_int || TypeSpecType == TST_char) {
+    } else if (TypeSpecType == TST_int || TypeSpecType == TST_char ||
+               TypeSpecType == TST_extint) {
       // Note that this intentionally doesn't include _Complex _Bool.
       if (!S.getLangOpts().CPlusPlus)
         S.Diag(TSTLoc, diag::ext_integer_complex);
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index e7bc4994e540..037e9c332412 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -9793,6 +9793,9 @@ struct IntRange {
                         false/*NonNegative*/);
     }
 
+    if (const auto *EIT = dyn_cast<ExtIntType>(T))
+      return IntRange(EIT->getNumBits(), EIT->isUnsigned());
+
     const BuiltinType *BT = cast<BuiltinType>(T);
     assert(BT->isInteger());
 
@@ -9816,6 +9819,9 @@ struct IntRange {
     if (const EnumType *ET = dyn_cast<EnumType>(T))
       T = C.getCanonicalType(ET->getDecl()->getIntegerType()).getTypePtr();
 
+    if (const auto *EIT = dyn_cast<ExtIntType>(T))
+      return IntRange(EIT->getNumBits(), EIT->isUnsigned());
+
     const BuiltinType *BT = cast<BuiltinType>(T);
     assert(BT->isInteger());
 
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index ed082dbaf986..27c8365ab8be 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -14811,12 +14811,16 @@ bool Sema::CheckEnumUnderlyingType(TypeSourceInfo *TI) {
   if (T->isDependentType())
     return false;
 
+  // This doesn't use 'isIntegralType' despite the error message mentioning
+  // integral type because isIntegralType would also allow enum types in C.
   if (const BuiltinType *BT = T->getAs<BuiltinType>())
     if (BT->isInteger())
       return false;
 
-  Diag(UnderlyingLoc, diag::err_enum_invalid_underlying) << T;
-  return true;
+  if (T->isExtIntType())
+    return false;
+
+  return Diag(UnderlyingLoc, diag::err_enum_invalid_underlying) << T;
 }
 
 /// Check whether this is a valid redeclaration of a previous enumeration.
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 3205b4472db2..869ae5cbc40b 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -4087,8 +4087,9 @@ void Sema::AddModeAttr(Decl *D, const AttributeCommonInfo &CI,
     Diag(AttrLoc, diag::err_enum_mode_vector_type) << Name << CI.getRange();
     return;
   }
-  bool IntegralOrAnyEnumType =
-      OldElemTy->isIntegralOrEnumerationType() || OldElemTy->getAs<EnumType>();
+  bool IntegralOrAnyEnumType = (OldElemTy->isIntegralOrEnumerationType() &&
+                                !OldElemTy->isExtIntType()) ||
+                               OldElemTy->getAs<EnumType>();
 
   if (!OldElemTy->getAs<BuiltinType>() && !OldElemTy->isComplexType() &&
       !IntegralOrAnyEnumType)
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 31d694857e9c..fbb5d4b05bbf 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -1482,6 +1482,11 @@ QualType Sema::UsualArithmeticConversions(ExprResult &LHS, ExprResult &RHS,
   if (LHSType == RHSType)
     return LHSType;
 
+  // ExtInt types aren't subject to conversions between them or normal integers,
+  // so this fails. 
+  if(LHSType->isExtIntType() || RHSType->isExtIntType())
+    return QualType();
+
   // At this point, we have two different arithmetic types.
 
   // Diagnose attempts to convert between __float128 and long double where
@@ -4261,6 +4266,7 @@ static void captureVariablyModifiedType(ASTContext &Context, QualType T,
     case Type::ObjCObjectPointer:
     case Type::ObjCTypeParam:
     case Type::Pipe:
+    case Type::ExtInt:
       llvm_unreachable("type class is never variably-modified!");
     case Type::Adjusted:
       T = cast<AdjustedType>(Ty)->getOriginalType();
@@ -10431,14 +10437,19 @@ static void DiagnoseBadShiftValues(Sema& S, ExprResult &LHS, ExprResult &RHS,
                             << RHS.get()->getSourceRange());
     return;
   }
-  llvm::APInt LeftBits(Right.getBitWidth(),
-                       S.Context.getTypeSize(LHS.get()->getType()));
+
+  QualType LHSExprType = LHS.get()->getType();
+  uint64_t LeftSize = LHSExprType->isExtIntType()
+                          ? S.Context.getIntWidth(LHSExprType)
+                          : S.Context.getTypeSize(LHSExprType);
+  llvm::APInt LeftBits(Right.getBitWidth(), LeftSize);
   if (Right.uge(LeftBits)) {
     S.DiagRuntimeBehavior(Loc, RHS.get(),
                           S.PDiag(diag::warn_shift_gt_typewidth)
                             << RHS.get()->getSourceRange());
     return;
   }
+
   if (Opc != BO_Shl)
     return;
 
diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp
index 82a197196576..08d29fa51e6e 100644
--- a/clang/lib/Sema/SemaLookup.cpp
+++ b/clang/lib/Sema/SemaLookup.cpp
@@ -2967,6 +2967,7 @@ addAssociatedClassesAndNamespaces(AssociatedLookup &Result, QualType Ty) {
     case Type::Vector:
     case Type::ExtVector:
     case Type::Complex:
+    case Type::ExtInt:
       break;
 
     // Non-deduced auto types only get here for error cases.
diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index f425ec742b7b..7bd12913aec4 100755
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -5998,6 +5998,15 @@ bool UnnamedLocalNoLinkageFinder::VisitPipeType(const PipeType* T) {
   return false;
 }
 
+bool UnnamedLocalNoLinkageFinder::VisitExtIntType(const ExtIntType *T) {
+  return false;
+}
+
+bool UnnamedLocalNoLinkageFinder::VisitDependentExtIntType(
+    const DependentExtIntType *T) {
+  return false;
+}
+
 bool UnnamedLocalNoLinkageFinder::VisitTagDecl(const TagDecl *Tag) {
   if (Tag->getDeclContext()->isFunctionOrMethod()) {
     S.Diag(SR.getBegin(),
@@ -6891,7 +6900,9 @@ ExprResult Sema::CheckTemplateArgument(NonTypeTemplateParmDecl *Param,
       QualType IntegerType = ParamType;
       if (const EnumType *Enum = IntegerType->getAs<EnumType>())
         IntegerType = Enum->getDecl()->getIntegerType();
-      Value = Value.extOrTrunc(Context.getTypeSize(IntegerType));
+      Value = Value.extOrTrunc(IntegerType->isExtIntType()
+                                   ? Context.getIntWidth(IntegerType)
+                                   : Context.getTypeSize(IntegerType));
 
       Converted = TemplateArgument(Context, Value,
                                    Context.getCanonicalType(ParamType));
@@ -6985,7 +6996,9 @@ ExprResult Sema::CheckTemplateArgument(NonTypeTemplateParmDecl *Param,
 
       // Coerce the template argument's value to the value it will have
       // based on the template parameter's type.
-      unsigned AllowedBits = Context.getTypeSize(IntegerType);
+      unsigned AllowedBits = IntegerType->isExtIntType()
+                                 ? Context.getIntWidth(IntegerType)
+                                 : Context.getTypeSize(IntegerType);
       if (Value.getBitWidth() != AllowedBits)
         Value = Value.extOrTrunc(AllowedBits);
       Value.setIsSigned(IntegerType->isSignedIntegerOrEnumerationType());
diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp
index 8e3c61819571..e1d438fcb724 100644
--- a/clang/lib/Sema/SemaTemplateDeduction.cpp
+++ b/clang/lib/Sema/SemaTemplateDeduction.cpp
@@ -1515,6 +1515,7 @@ DeduceTemplateArgumentsByTypeMatch(Sema &S,
     case Type::ObjCObject:
     case Type::ObjCInterface:
     case Type::ObjCObjectPointer:
+    case Type::ExtInt:
       if (TDF & TDF_SkipNonDependent)
         return Sema::TDK_Success;
 
@@ -2106,6 +2107,33 @@ DeduceTemplateArgumentsByTypeMatch(Sema &S,
 
       return Sema::TDK_NonDeducedMismatch;
     }
+    case Type::DependentExtInt: {
+      const auto *IntParam = cast<DependentExtIntType>(Param);
+
+      if (const auto *IntArg = dyn_cast<ExtIntType>(Arg)){
+        if (IntParam->isUnsigned() != IntArg->isUnsigned())
+          return Sema::TDK_NonDeducedMismatch;
+
+        NonTypeTemplateParmDecl *NTTP =
+            getDeducedParameterFromExpr(Info, IntParam->getNumBitsExpr());
+        if (!NTTP)
+          return Sema::TDK_Success;
+
+        llvm::APSInt ArgSize(S.Context.getTypeSize(S.Context.IntTy), false);
+        ArgSize = IntArg->getNumBits();
+
+        return DeduceNonTypeTemplateArgument(S, TemplateParams, NTTP, ArgSize,
+                                             S.Context.IntTy, true, Info,
+                                             Deduced);
+      }
+
+      if (const auto *IntArg = dyn_cast<DependentExtIntType>(Arg)) {
+        if (IntParam->isUnsigned() != IntArg->isUnsigned())
+          return Sema::TDK_NonDeducedMismatch;
+        return Sema::TDK_Success;
+      }
+      return Sema::TDK_NonDeducedMismatch;
+    }
 
     case Type::TypeOfExpr:
     case Type::TypeOf:
@@ -5850,6 +5878,11 @@ MarkUsedTemplateParameters(ASTContext &Ctx, QualType T,
                                cast<DeducedType>(T)->getDeducedType(),
                                OnlyDeduced, Depth, Used);
     break;
+  case Type::DependentExtInt:
+    MarkUsedTemplateParameters(Ctx,
+                               cast<DependentExtIntType>(T)->getNumBitsExpr(),
+                               OnlyDeduced, Depth, Used);
+    break;
 
   // None of these types have any template parameters in them.
   case Type::Builtin:
@@ -5862,6 +5895,7 @@ MarkUsedTemplateParameters(ASTContext &Ctx, QualType T,
   case Type::ObjCObjectPointer:
   case Type::UnresolvedUsing:
   case Type::Pipe:
+  case Type::ExtInt:
 #define TYPE(Class, Base)
 #define ABSTRACT_TYPE(Class, Base)
 #define DEPENDENT_TYPE(Class, Base)
diff --git a/clang/lib/Sema/SemaTemplateVariadic.cpp b/clang/lib/Sema/SemaTemplateVariadic.cpp
index 825b062c0054..466cb084e7c3 100644
--- a/clang/lib/Sema/SemaTemplateVariadic.cpp
+++ b/clang/lib/Sema/SemaTemplateVariadic.cpp
@@ -847,6 +847,7 @@ bool Sema::containsUnexpandedParameterPacks(Declarator &D) {
 
   case TST_typeofExpr:
   case TST_decltype:
+  case TST_extint:
     if (DS.getRepAsExpr() &&
         DS.getRepAsExpr()->containsUnexpandedParameterPack())
       return true;
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index 53e4366f673b..4ecd36209e5b 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -35,6 +35,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/Support/ErrorHandling.h"
 
 using namespace clang;
@@ -1441,6 +1442,15 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) {
     }
     break;
   }
+  case DeclSpec::TST_extint: {
+    Result = S.BuildExtIntType(DS.getTypeSpecSign() == TSS_unsigned,
+                               DS.getRepAsExpr(), DS.getBeginLoc());
+    if (Result.isNull()) {
+      Result = Context.IntTy;
+      declarator.setInvalidType(true);
+    }
+    break;
+  }
   case DeclSpec::TST_accum: {
     switch (DS.getTypeSpecWidth()) {
       case DeclSpec::TSW_short:
@@ -2160,6 +2170,45 @@ QualType Sema::BuildWritePipeType(QualType T, SourceLocation Loc) {
   return Context.getWritePipeType(T);
 }
 
+/// Build a extended int type.
+///
+/// \param IsUnsigned Boolean representing the signedness of the type.
+///
+/// \param BitWidth Size of this int type in bits, or an expression representing
+/// that.
+///
+/// \param Loc Location of the keyword.
+QualType Sema::BuildExtIntType(bool IsUnsigned, Expr *BitWidth,
+                               SourceLocation Loc) {
+  if (BitWidth->isInstantiationDependent())
+    return Context.getDependentExtIntType(IsUnsigned, BitWidth);
+
+  llvm::APSInt Bits(32);
+  ExprResult ICE = VerifyIntegerConstantExpression(BitWidth, &Bits);
+
+  if (ICE.isInvalid())
+    return QualType();
+
+  int64_t NumBits = Bits.getSExtValue();
+  if (!IsUnsigned && NumBits < 2) {
+    Diag(Loc, diag::err_ext_int_bad_size) << 0;
+    return QualType();
+  }
+
+  if (IsUnsigned && NumBits < 1) {
+    Diag(Loc, diag::err_ext_int_bad_size) << 1;
+    return QualType();
+  }
+
+  if (NumBits > llvm::IntegerType::MAX_INT_BITS) {
+    Diag(Loc, diag::err_ext_int_max_size) << IsUnsigned
+                                          << llvm::IntegerType::MAX_INT_BITS;
+    return QualType();
+  }
+
+  return Context.getExtIntType(IsUnsigned, NumBits);
+}
+
 /// Check whether the specified array size makes the array type a VLA.  If so,
 /// return true, if not, return the size of the array in SizeVal.
 static bool isArraySizeVLA(Sema &S, Expr *ArraySize, llvm::APSInt &SizeVal) {
@@ -5774,6 +5823,14 @@ namespace {
       TL.getValueLoc().initializeFullCopy(TInfo->getTypeLoc());
     }
 
+    void VisitExtIntTypeLoc(ExtIntTypeLoc TL) {
+      TL.setNameLoc(DS.getTypeSpecTypeLoc());
+    }
+
+    void VisitDependentExtIntTypeLoc(DependentExtIntTypeLoc TL) {
+      TL.setNameLoc(DS.getTypeSpecTypeLoc());
+    }
+
     void VisitTypeLoc(TypeLoc TL) {
       // FIXME: add other typespec types and change this to an assert.
       TL.initialize(Context, DS.getTypeSpecTypeLoc());
@@ -5900,6 +5957,9 @@ namespace {
       assert(Chunk.Kind == DeclaratorChunk::Pipe);
       TL.setKWLoc(Chunk.Loc);
     }
+    void VisitExtIntTypeLoc(ExtIntTypeLoc TL) {
+      TL.setNameLoc(Chunk.Loc);
+    }
     void VisitMacroQualifiedTypeLoc(MacroQualifiedTypeLoc TL) {
       TL.setExpansionLoc(Chunk.Loc);
     }
@@ -8631,6 +8691,12 @@ QualType Sema::BuildAtomicType(QualType T, SourceLocation Loc) {
     else if (!T.isTriviallyCopyableType(Context))
       // Some other non-trivially-copyable type (probably a C++ class)
       DisallowedKind = 7;
+    else if (auto *ExtTy = T->getAs<ExtIntType>()) {
+      if (ExtTy->getNumBits() < 8)
+        DisallowedKind = 8;
+      else if (!llvm::isPowerOf2_32(ExtTy->getNumBits()))
+        DisallowedKind = 9;
+    }
 
     if (DisallowedKind != -1) {
       Diag(Loc, diag::err_atomic_specifier_bad_type) << DisallowedKind << T;
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 87b07897ec28..abde968bed8c 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -1183,6 +1183,14 @@ class TreeTransform {
   QualType RebuildPipeType(QualType ValueType, SourceLocation KWLoc,
                            bool isReadPipe);
 
+   /// Build an extended int given its value type.
+  QualType RebuildExtIntType(bool IsUnsigned, unsigned NumBits,
+                             SourceLocation Loc);
+
+  /// Build a dependent extended int given its value type.
+  QualType RebuildDependentExtIntType(bool IsUnsigned, Expr *NumBitsExpr,
+                                      SourceLocation Loc);
+
   /// Build a new template name given a nested name specifier, a flag
   /// indicating whether the "template" keyword was provided, and the template
   /// that the template name refers to.
@@ -6120,6 +6128,57 @@ QualType TreeTransform<Derived>::TransformPipeType(TypeLocBuilder &TLB,
   return Result;
 }
 
+template <typename Derived>
+QualType TreeTransform<Derived>::TransformExtIntType(TypeLocBuilder &TLB,
+                                                     ExtIntTypeLoc TL) {
+  const ExtIntType *EIT = TL.getTypePtr();
+  QualType Result = TL.getType();
+
+  if (getDerived().AlwaysRebuild()) {
+    Result = getDerived().RebuildExtIntType(EIT->isUnsigned(),
+                                            EIT->getNumBits(), TL.getNameLoc());
+    if (Result.isNull())
+      return QualType();
+  }
+
+  ExtIntTypeLoc NewTL = TLB.push<ExtIntTypeLoc>(Result);
+  NewTL.setNameLoc(TL.getNameLoc());
+  return Result;
+}
+
+template <typename Derived>
+QualType TreeTransform<Derived>::TransformDependentExtIntType(
+    TypeLocBuilder &TLB, DependentExtIntTypeLoc TL) {
+  const DependentExtIntType *EIT = TL.getTypePtr();
+
+  EnterExpressionEvaluationContext Unevaluated(
+      SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated);
+  ExprResult BitsExpr = getDerived().TransformExpr(EIT->getNumBitsExpr());
+  BitsExpr = SemaRef.ActOnConstantExpression(BitsExpr);
+
+  if (BitsExpr.isInvalid())
+    return QualType();
+
+  QualType Result = TL.getType();
+
+  if (getDerived().AlwaysRebuild() || BitsExpr.get() != EIT->getNumBitsExpr()) {
+    Result = getDerived().RebuildDependentExtIntType(
+        EIT->isUnsigned(), BitsExpr.get(), TL.getNameLoc());
+
+    if (Result.isNull())
+      return QualType();
+  }
+
+  if (isa<DependentExtIntType>(Result)) {
+    DependentExtIntTypeLoc NewTL = TLB.push<DependentExtIntTypeLoc>(Result);
+    NewTL.setNameLoc(TL.getNameLoc());
+  } else {
+    ExtIntTypeLoc NewTL = TLB.push<ExtIntTypeLoc>(Result);
+    NewTL.setNameLoc(TL.getNameLoc());
+  }
+  return Result;
+}
+
   /// Simple iterator that traverses the template arguments in a
   /// container that provides a \c getArgLoc() member function.
   ///
@@ -13782,6 +13841,23 @@ QualType TreeTransform<Derived>::RebuildPipeType(QualType ValueType,
                     : SemaRef.BuildWritePipeType(ValueType, KWLoc);
 }
 
+template <typename Derived>
+QualType TreeTransform<Derived>::RebuildExtIntType(bool IsUnsigned,
+                                                   unsigned NumBits,
+                                                   SourceLocation Loc) {
+  llvm::APInt NumBitsAP(SemaRef.Context.getIntWidth(SemaRef.Context.IntTy),
+                        NumBits, true);
+  IntegerLiteral *Bits = IntegerLiteral::Create(SemaRef.Context, NumBitsAP,
+                                                SemaRef.Context.IntTy, Loc);
+  return SemaRef.BuildExtIntType(IsUnsigned, Bits, Loc);
+}
+
+template <typename Derived>
+QualType TreeTransform<Derived>::RebuildDependentExtIntType(
+    bool IsUnsigned, Expr *NumBitsExpr, SourceLocation Loc) {
+  return SemaRef.BuildExtIntType(IsUnsigned, NumBitsExpr, Loc);
+}
+
 template<typename Derived>
 TemplateName
 TreeTransform<Derived>::RebuildTemplateName(CXXScopeSpec &SS,
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index f0e9bbd4dcea..62dd233aab3f 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -6719,6 +6719,15 @@ void TypeLocReader::VisitPipeTypeLoc(PipeTypeLoc TL) {
   TL.setKWLoc(readSourceLocation());
 }
 
+void TypeLocReader::VisitExtIntTypeLoc(clang::ExtIntTypeLoc TL) {
+  TL.setNameLoc(readSourceLocation());
+}
+void TypeLocReader::VisitDependentExtIntTypeLoc(
+    clang::DependentExtIntTypeLoc TL) {
+  TL.setNameLoc(readSourceLocation());
+}
+
+
 void ASTRecordReader::readTypeLoc(TypeLoc TL) {
   TypeLocReader TLR(*this);
   for (; !TL.isNull(); TL = TL.getNextTypeLoc())
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index c8ce3edda60b..18a92aaadd52 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -476,6 +476,14 @@ void TypeLocWriter::VisitPipeTypeLoc(PipeTypeLoc TL) {
   Record.AddSourceLocation(TL.getKWLoc());
 }
 
+void TypeLocWriter::VisitExtIntTypeLoc(clang::ExtIntTypeLoc TL) {
+  Record.AddSourceLocation(TL.getNameLoc());
+}
+void TypeLocWriter::VisitDependentExtIntTypeLoc(
+    clang::DependentExtIntTypeLoc TL) {
+  Record.AddSourceLocation(TL.getNameLoc());
+}
+
 void ASTWriter::WriteTypeAbbrevs() {
   using namespace llvm;
 
diff --git a/clang/test/CodeGen/ext-int-sanitizer.cpp b/clang/test/CodeGen/ext-int-sanitizer.cpp
new file mode 100644
index 000000000000..ddf3180e1a1b
--- /dev/null
+++ b/clang/test/CodeGen/ext-int-sanitizer.cpp
@@ -0,0 +1,265 @@
+// RUN: %clang_cc1 -triple x86_64-gnu-linux -fsanitize=array-bounds,enum,float-cast-overflow,integer-divide-by-zero,implicit-unsigned-integer-truncation,implicit-signed-integer-truncation,implicit-integer-sign-change,unsigned-integer-overflow,signed-integer-overflow,shift-base,shift-exponent -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s 
+
+
+// CHECK: define void @_Z6BoundsRA10_KiU7_ExtIntILi15EEi
+void Bounds(const int (&Array)[10], _ExtInt(15) Index) {
+  int I1 = Array[Index];
+  // CHECK: %[[SEXT:.+]] = sext i15 %{{.+}} to i64
+  // CHECK: %[[CMP:.+]] = icmp ult i64 %[[SEXT]], 10
+  // CHECK: br i1 %[[CMP]]
+  // CHECK: call void @__ubsan_handle_out_of_bounds
+}
+
+// CHECK: define void @_Z4Enumv
+void Enum() {
+  enum E1 { e1a = 0, e1b = 127 }
+  e1;
+  enum E2 { e2a = -1, e2b = 64 }
+  e2;
+  enum E3 { e3a = (1u << 31) - 1 }
+  e3;
+
+  _ExtInt(34) a = e1;
+  // CHECK: %[[E1:.+]] = icmp ule i32 %{{.*}}, 127
+  // CHECK: br i1 %[[E1]]
+  // CHECK: call void @__ubsan_handle_load_invalid_value_abort
+  _ExtInt(34) b = e2;
+  // CHECK: %[[E2HI:.*]] = icmp sle i32 {{.*}}, 127
+  // CHECK: %[[E2LO:.*]] = icmp sge i32 {{.*}}, -128
+  // CHECK: %[[E2:.*]] = and i1 %[[E2HI]], %[[E2LO]]
+  // CHECK: br i1 %[[E2]]
+  // CHECK: call void @__ubsan_handle_load_invalid_value_abort
+  _ExtInt(34) c = e3;
+  // CHECK: %[[E3:.*]] = icmp ule i32 {{.*}}, 2147483647
+  // CHECK: br i1 %[[E3]]
+  // CHECK: call void @__ubsan_handle_load_invalid_value_abort
+}
+
+// CHECK: define void @_Z13FloatOverflowfd
+void FloatOverflow(float f, double d) {
+  _ExtInt(10) E = f;
+  // CHECK: fcmp ogt float %{{.+}}, -5.130000e+02
+  // CHECK: fcmp olt float %{{.+}}, 5.120000e+02
+  _ExtInt(10) E2 = d;
+  // CHECK: fcmp ogt double %{{.+}}, -5.130000e+02
+  // CHECK: fcmp olt double %{{.+}}, 5.120000e+02
+  _ExtInt(7) E3 = f;
+  // CHECK: fcmp ogt float %{{.+}}, -6.500000e+01
+  // CHECK: fcmp olt float %{{.+}}, 6.400000e+01
+  _ExtInt(7) E4 = d;
+  // CHECK: fcmp ogt double %{{.+}}, -6.500000e+01
+  // CHECK: fcmp olt double %{{.+}}, 6.400000e+01
+}
+
+// CHECK: define void @_Z14UIntTruncationU7_ExtIntILi35EEjjy
+void UIntTruncation(unsigned _ExtInt(35) E, unsigned int i, unsigned long long ll) {
+
+  i = E;
+  // CHECK: %[[LOADE:.+]] = load i35
+  // CHECK: %[[CONV:.+]] = trunc i35 %[[LOADE]] to i32
+  // CHECK: %[[EXT:.+]] = zext i32 %[[CONV]] to i35
+  // CHECK: %[[CHECK:.+]] = icmp eq i35 %[[EXT]], %[[LOADE]]
+  // CHECK: br i1 %[[CHECK]]
+  // CHECK: call void @__ubsan_handle_implicit_conversion_abort
+
+  E = ll;
+  // CHECK: %[[LOADLL:.+]] = load i64
+  // CHECK: %[[CONV:.+]] = trunc i64 %[[LOADLL]] to i35
+  // CHECK: %[[EXT:.+]] = zext i35 %[[CONV]] to i64
+  // CHECK: %[[CHECK:.+]] = icmp eq i64 %[[EXT]], %[[LOADLL]]
+  // CHECK: br i1 %[[CHECK]]
+  // CHECK: call void @__ubsan_handle_implicit_conversion_abort
+}
+
+// CHECK: define void @_Z13IntTruncationU7_ExtIntILi35EEiU7_ExtIntILi42EEjij
+void IntTruncation(_ExtInt(35) E, unsigned _ExtInt(42) UE, int i, unsigned j) {
+
+  j = E;
+  // CHECK: %[[LOADE:.+]] = load i35
+  // CHECK: %[[CONV:.+]] = trunc i35 %[[LOADE]] to i32
+  // CHECK: %[[EXT:.+]] = zext i32 %[[CONV]] to i35
+  // CHECK: %[[CHECK:.+]] = icmp eq i35 %[[EXT]], %[[LOADE]]
+  // CHECK: br i1 %[[CHECK]]
+  // CHECK: call void @__ubsan_handle_implicit_conversion_abort
+
+  j = UE;
+  // CHECK: %[[LOADUE:.+]] = load i42
+  // CHECK: %[[CONV:.+]] = trunc i42 %[[LOADUE]] to i32
+  // CHECK: %[[EXT:.+]] = zext i32 %[[CONV]] to i42
+  // CHECK: %[[CHECK:.+]] = icmp eq i42 %[[EXT]], %[[LOADUE]]
+  // CHECK: br i1 %[[CHECK]]
+  // CHECK: call void @__ubsan_handle_implicit_conversion_abort
+
+  // Note: also triggers sign change check.
+  i = UE;
+  // CHECK: %[[LOADUE:.+]] = load i42
+  // CHECK: %[[CONV:.+]] = trunc i42 %[[LOADUE]] to i32
+  // CHECK: %[[NEG:.+]] = icmp slt i32 %[[CONV]], 0
+  // CHECK: %[[SIGNCHECK:.+]] = icmp eq i1 false, %[[NEG]]
+  // CHECK: %[[EXT:.+]] = sext i32 %[[CONV]] to i42
+  // CHECK: %[[CHECK:.+]] = icmp eq i42 %[[EXT]], %[[LOADUE]]
+  // CHECK: %[[CHECKBOTH:.+]] = and i1 %[[SIGNCHECK]], %[[CHECK]]
+  // CHECK: br i1 %[[CHECKBOTH]]
+  // CHECK: call void @__ubsan_handle_implicit_conversion_abort
+
+  // Note: also triggers sign change check.
+  E = UE;
+  // CHECK: %[[LOADUE:.+]] = load i42
+  // CHECK: %[[CONV:.+]] = trunc i42 %[[LOADUE]] to i35
+  // CHECK: %[[NEG:.+]] = icmp slt i35 %[[CONV]], 0
+  // CHECK: %[[SIGNCHECK:.+]] = icmp eq i1 false, %[[NEG]]
+  // CHECK: %[[EXT:.+]] = sext i35 %[[CONV]] to i42
+  // CHECK: %[[CHECK:.+]] = icmp eq i42 %[[EXT]], %[[LOADUE]]
+  // CHECK: %[[CHECKBOTH:.+]] = and i1 %[[SIGNCHECK]], %[[CHECK]]
+  // CHECK: br i1 %[[CHECKBOTH]]
+  // CHECK: call void @__ubsan_handle_implicit_conversion_abort
+}
+
+// CHECK: define void @_Z15SignChangeCheckU7_ExtIntILi39EEjU7_ExtIntILi39EEi
+void SignChangeCheck(unsigned _ExtInt(39) UE, _ExtInt(39) E) {
+  UE = E;
+  // CHECK: %[[LOADE:.+]] = load i39
+  // CHECK: %[[NEG:.+]] = icmp slt i39 %[[LOADE]], 0
+  // CHECK: %[[SIGNCHECK:.+]] = icmp eq i1 %[[NEG]], false
+  // CHECK: br i1 %[[SIGNCHECK]]
+  // CHECK: call void @__ubsan_handle_implicit_conversion_abort
+
+
+  E = UE;
+  // CHECK: %[[LOADUE:.+]] = load i39
+  // CHECK: %[[NEG:.+]] = icmp slt i39 %[[LOADUE]], 0
+  // CHECK: %[[SIGNCHECK:.+]] = icmp eq i1 false, %[[NEG]]
+  // CHECK: br i1 %[[SIGNCHECK]]
+  // CHECK: call void @__ubsan_handle_implicit_conversion_abort
+}
+
+// CHECK: define void @_Z9DivByZeroU7_ExtIntILi11EEii
+void DivByZero(_ExtInt(11) E, int i) {
+
+  // Also triggers signed integer overflow.
+  E / E;
+  // CHECK: %[[E:.+]] = load i11, i11*
+  // CHECK: %[[E2:.+]] = load i11, i11*
+  // CHECK: %[[NEZERO:.+]] = icmp ne i11 %[[E2]], 0
+  // CHECK: %[[NEMIN:.+]] = icmp ne i11 %[[E]], -1024
+  // CHECK: %[[NENEG1:.+]] = icmp ne i11 %[[E2]], -1
+  // CHECK: %[[OR:.+]] = or i1 %[[NEMIN]], %[[NENEG1]]
+  // CHECK: %[[AND:.+]] = and i1 %[[NEZERO]], %[[OR]]
+  // CHECK: br i1 %[[AND]]
+  // CHECK: call void @__ubsan_handle_divrem_overflow_abort
+}
+
+// TODO:
+//-fsanitize=shift: (shift-base, shift-exponent) Shift operators where the amount shifted is greater or equal to the promoted bit-width of the left hand side or less than zero, or where the left hand side is negative. For a signed left shift, also checks for signed overflow in C, and for unsigned overflow in C++. You can use -fsanitize=shift-base or -fsanitize=shift-exponent to check only left-hand side or right-hand side of shift operation, respectively.
+// CHECK: define void @_Z6ShiftsU7_ExtIntILi9EEi
+void Shifts(_ExtInt(9) E) {
+  E >> E;
+  // CHECK: %[[LHSE:.+]] = load i9, i9*
+  // CHECK: %[[RHSE:.+]] = load i9, i9*
+  // CHECK: %[[CMP:.+]] = icmp ule i9 %[[RHSE]], 8
+  // CHECK: br i1 %[[CMP]]
+  // CHECK: call void @__ubsan_handle_shift_out_of_bounds_abort
+
+  E << E;
+  // CHECK: %[[LHSE:.+]] = load i9, i9*
+  // CHECK: %[[RHSE:.+]] = load i9, i9*
+  // CHECK: %[[CMP:.+]] = icmp ule i9 %[[RHSE]], 8
+  // CHECK: br i1 %[[CMP]]
+  // CHECK: %[[ZEROS:.+]] = sub nuw nsw i9 8, %[[RHSE]]
+  // CHECK: %[[CHECK:.+]] = lshr i9 %[[LHSE]], %[[ZEROS]]
+  // CHECK: %[[SKIPSIGN:.+]] = lshr i9 %[[CHECK]], 1
+  // CHECK: %[[CHECK:.+]] = icmp eq i9 %[[SKIPSIGN]]
+  // CHECK: %[[PHI:.+]] = phi i1 [ true, %{{.+}} ], [ %[[CHECK]], %{{.+}} ]
+  // CHECK: and i1 %[[CMP]], %[[PHI]]
+  // CHECK: call void @__ubsan_handle_shift_out_of_bounds_abort
+}
+
+// CHECK: define void @_Z21SignedIntegerOverflowU7_ExtIntILi93EEiU7_ExtIntILi4EEiU7_ExtIntILi31EEi
+void SignedIntegerOverflow(_ExtInt(93) BiggestE,
+                           _ExtInt(4) SmallestE,
+                           _ExtInt(31) JustRightE) {
+  BiggestE + BiggestE;
+  // CHECK: %[[LOAD1:.+]] = load i93, i93*
+  // CHECK: %[[LOAD2:.+]] = load i93, i93*
+  // CHECK: %[[OFCALL:.+]] = call { i93, i1 } @llvm.sadd.with.overflow.i93(i93 %[[LOAD1]], i93 %[[LOAD2]])
+  // CHECK: %[[EXRESULT:.+]] = extractvalue { i93, i1 } %[[OFCALL]], 0
+  // CHECK: %[[OFRESULT:.+]] = extractvalue { i93, i1 } %[[OFCALL]], 1
+  // CHECK: %[[CHECK:.+]] = xor i1 %[[OFRESULT]], true
+  // CHECK: br i1 %[[CHECK]]
+  // CHECK: call void @__ubsan_handle_add_overflow_abort
+
+  SmallestE - SmallestE;
+  // CHECK: %[[LOAD1:.+]] = load i4, i4*
+  // CHECK: %[[LOAD2:.+]] = load i4, i4*
+  // CHECK: %[[OFCALL:.+]] = call { i4, i1 } @llvm.ssub.with.overflow.i4(i4 %[[LOAD1]], i4 %[[LOAD2]])
+  // CHECK: %[[EXRESULT:.+]] = extractvalue { i4, i1 } %[[OFCALL]], 0
+  // CHECK: %[[OFRESULT:.+]] = extractvalue { i4, i1 } %[[OFCALL]], 1
+  // CHECK: %[[CHECK:.+]] = xor i1 %[[OFRESULT]], true
+  // CHECK: br i1 %[[CHECK]]
+  // CHECK: call void @__ubsan_handle_sub_overflow_abort
+
+  JustRightE * JustRightE;
+  // CHECK: %[[LOAD1:.+]] = load i31, i31*
+  // CHECK: %[[LOAD2:.+]] = load i31, i31*
+  // CHECK: %[[OFCALL:.+]] = call { i31, i1 } @llvm.smul.with.overflow.i31(i31 %[[LOAD1]], i31 %[[LOAD2]])
+  // CHECK: %[[EXRESULT:.+]] = extractvalue { i31, i1 } %[[OFCALL]], 0
+  // CHECK: %[[OFRESULT:.+]] = extractvalue { i31, i1 } %[[OFCALL]], 1
+  // CHECK: %[[CHECK:.+]] = xor i1 %[[OFRESULT]], true
+  // CHECK: br i1 %[[CHECK]]
+  // CHECK: call void @__ubsan_handle_mul_overflow_abort
+}
+
+// CHECK: define void @_Z23UnsignedIntegerOverflowjU7_ExtIntILi23EEjU7_ExtIntILi35EEj
+void UnsignedIntegerOverflow(unsigned u,
+                             unsigned _ExtInt(23) SmallE,
+                             unsigned _ExtInt(35) BigE) {
+  u = SmallE + SmallE;
+  // CHECK: %[[LOADE1:.+]] = load i23, i23*
+  // CHECK: %[[LOADE2:.+]] = load i23, i23*
+  // CHECK: %[[OFCALL:.+]] = call { i23, i1 } @llvm.uadd.with.overflow.i23(i23 %[[LOADE1]], i23 %[[LOADE2]])
+  // CHECK: %[[EXRESULT:.+]] = extractvalue { i23, i1 } %[[OFCALL]], 0
+  // CHECK: %[[OFRESULT:.+]] = extractvalue { i23, i1 } %[[OFCALL]], 1
+  // CHECK: %[[CHECK:.+]] = xor i1 %[[OFRESULT]], true
+  // CHECK: br i1 %[[CHECK]]
+  // CHECK: call void @__ubsan_handle_add_overflow_abort
+
+  SmallE = u + u;
+  // CHECK: %[[LOADU1:.+]] = load i32, i32*
+  // CHECK: %[[LOADU2:.+]] = load i32, i32*
+  // CHECK: %[[OFCALL:.+]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %[[LOADU1]], i32 %[[LOADU2]])
+  // CHECK: %[[EXRESULT:.+]] = extractvalue { i32, i1 } %[[OFCALL]], 0
+  // CHECK: %[[OFRESULT:.+]] = extractvalue { i32, i1 } %[[OFCALL]], 1
+  // CHECK: %[[CHECK:.+]] = xor i1 %[[OFRESULT]], true
+  // CHECK: br i1 %[[CHECK]]
+  // CHECK: call void @__ubsan_handle_add_overflow_abort
+
+  SmallE = SmallE + SmallE;
+  // CHECK: %[[LOADE1:.+]] = load i23, i23*
+  // CHECK: %[[LOADE2:.+]] = load i23, i23*
+  // CHECK: %[[OFCALL:.+]] = call { i23, i1 } @llvm.uadd.with.overflow.i23(i23 %[[LOADE1]], i23 %[[LOADE2]])
+  // CHECK: %[[EXRESULT:.+]] = extractvalue { i23, i1 } %[[OFCALL]], 0
+  // CHECK: %[[OFRESULT:.+]] = extractvalue { i23, i1 } %[[OFCALL]], 1
+  // CHECK: %[[CHECK:.+]] = xor i1 %[[OFRESULT]], true
+  // CHECK: br i1 %[[CHECK]]
+  // CHECK: call void @__ubsan_handle_add_overflow_abort
+
+  SmallE = BigE + BigE;
+  // CHECK: %[[LOADE1:.+]] = load i35, i35*
+  // CHECK: %[[LOADE2:.+]] = load i35, i35*
+  // CHECK: %[[OFCALL:.+]] = call { i35, i1 } @llvm.uadd.with.overflow.i35(i35 %[[LOADE1]], i35 %[[LOADE2]])
+  // CHECK: %[[EXRESULT:.+]] = extractvalue { i35, i1 } %[[OFCALL]], 0
+  // CHECK: %[[OFRESULT:.+]] = extractvalue { i35, i1 } %[[OFCALL]], 1
+  // CHECK: %[[CHECK:.+]] = xor i1 %[[OFRESULT]], true
+  // CHECK: br i1 %[[CHECK]]
+  // CHECK: call void @__ubsan_handle_add_overflow_abort
+
+  BigE = BigE + BigE;
+  // CHECK: %[[LOADE1:.+]] = load i35, i35*
+  // CHECK: %[[LOADE2:.+]] = load i35, i35*
+  // CHECK: %[[OFCALL:.+]] = call { i35, i1 } @llvm.uadd.with.overflow.i35(i35 %[[LOADE1]], i35 %[[LOADE2]])
+  // CHECK: %[[EXRESULT:.+]] = extractvalue { i35, i1 } %[[OFCALL]], 0
+  // CHECK: %[[OFRESULT:.+]] = extractvalue { i35, i1 } %[[OFCALL]], 1
+  // CHECK: %[[CHECK:.+]] = xor i1 %[[OFRESULT]], true
+  // CHECK: br i1 %[[CHECK]]
+  // CHECK: call void @__ubsan_handle_add_overflow_abort
+}
diff --git a/clang/test/CodeGen/ext-int.c b/clang/test/CodeGen/ext-int.c
new file mode 100644
index 000000000000..ef48dd331652
--- /dev/null
+++ b/clang/test/CodeGen/ext-int.c
@@ -0,0 +1,44 @@
+// RUN: %clang_cc1 -triple x86_64-gnu-linux -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK
+// RUN: %clang_cc1 -triple x86_64-windows-pc -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK
+
+
+void GenericTest(_ExtInt(3) a, unsigned _ExtInt(3) b, _ExtInt(4) c) {
+  // CHECK: define {{.*}}void @GenericTest
+  int which = _Generic(a, _ExtInt(3): 1, unsigned _ExtInt(3) : 2, _ExtInt(4) : 3);
+  // CHECK: store i32 1
+  int which2 = _Generic(b, _ExtInt(3): 1, unsigned _ExtInt(3) : 2, _ExtInt(4) : 3);
+  // CHECK: store i32 2
+  int which3 = _Generic(c, _ExtInt(3): 1, unsigned _ExtInt(3) : 2, _ExtInt(4) : 3);
+  // CHECK: store i32 3
+}
+
+void VLATest(_ExtInt(3) A, _ExtInt(99) B, _ExtInt(123456) C) {
+  // CHECK: define {{.*}}void @VLATest
+  int AR1[A];
+  // CHECK: %[[A:.+]] = zext i3 %{{.+}} to i64
+  // CHECK: %[[VLA1:.+]] = alloca i32, i64 %[[A]]
+  int AR2[B];
+  // CHECK: %[[B:.+]] = trunc i99 %{{.+}} to i64
+  // CHECK: %[[VLA2:.+]] = alloca i32, i64 %[[B]]
+  int AR3[C];
+  // CHECK: %[[C:.+]] = trunc i123456 %{{.+}} to i64
+  // CHECK: %[[VLA3:.+]] = alloca i32, i64 %[[C]]
+}
+
+struct S {
+  _ExtInt(17) A;
+  _ExtInt(16777200) B;
+  _ExtInt(17) C;
+};
+
+void OffsetOfTest() {
+  // CHECK: define {{.*}}void @OffsetOfTest 
+  int A = __builtin_offsetof(struct S,A);
+  // CHECK: store i32 0, i32* %{{.+}}
+  int B = __builtin_offsetof(struct S,B);
+  // CHECK: store i32 8, i32* %{{.+}}
+  int C = __builtin_offsetof(struct S,C);
+  // CHECK: store i32 2097160, i32* %{{.+}}
+}
+
+
diff --git a/clang/test/CodeGenCXX/ext-int.cpp b/clang/test/CodeGenCXX/ext-int.cpp
new file mode 100644
index 000000000000..4e0c58fe1e40
--- /dev/null
+++ b/clang/test/CodeGenCXX/ext-int.cpp
@@ -0,0 +1,432 @@
+// RUN: %clang_cc1 -triple x86_64-gnu-linux -O3 -disable-llvm-passes -I%S -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,LIN,NoNewStructPathTBAA
+// RUN: %clang_cc1 -triple x86_64-gnu-linux -O3 -disable-llvm-passes -I%S -new-struct-path-tbaa -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,LIN,NewStructPathTBAA
+
+// RUN: %clang_cc1 -triple x86_64-windows-pc -O3 -disable-llvm-passes -I%S -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,WIN,NoNewStructPathTBAA
+// RUN: %clang_cc1 -triple x86_64-windows-pc -O3 -disable-llvm-passes -I%S -new-struct-path-tbaa -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,WIN,NewStructPathTBAA
+
+#include <typeinfo>
+
+// Ensure that the layout for these structs is the same as the normal bitfield
+// layouts.
+struct BitFieldsByte {
+  _ExtInt(7) A : 3;
+  _ExtInt(7) B : 3;
+  _ExtInt(7) C : 2;
+};
+// CHECK: %struct.BitFieldsByte = type { i8 }
+
+struct BitFieldsShort {
+  _ExtInt(15) A : 3;
+  _ExtInt(15) B : 3;
+  _ExtInt(15) C : 2;
+};
+// LIN: %struct.BitFieldsShort = type { i8, i8 }
+// WIN: %struct.BitFieldsShort = type { i16 }
+
+struct BitFieldsInt {
+  _ExtInt(31) A : 3;
+  _ExtInt(31) B : 3;
+  _ExtInt(31) C : 2;
+};
+// LIN: %struct.BitFieldsInt = type { i8, [3 x i8] }
+// WIN: %struct.BitFieldsInt = type { i32 }
+
+struct BitFieldsLong {
+  _ExtInt(63) A : 3;
+  _ExtInt(63) B : 3;
+  _ExtInt(63) C : 2;
+};
+// LIN: %struct.BitFieldsLong = type { i8, [7 x i8] }
+// WIN: %struct.BitFieldsLong = type { i64 }
+
+struct HasExtIntFirst {
+  _ExtInt(35) A;
+  int B;
+};
+// CHECK: %struct.HasExtIntFirst = type { i35, i32 }
+
+struct HasExtIntLast {
+  int A;
+  _ExtInt(35) B;
+};
+// CHECK: %struct.HasExtIntLast = type { i32, i35 }
+
+struct HasExtIntMiddle {
+  int A;
+  _ExtInt(35) B;
+  int C;
+};
+// CHECK: %struct.HasExtIntMiddle = type { i32, i35, i32 }
+
+// Force emitting of the above structs.
+void StructEmit() {
+  BitFieldsByte A;
+  BitFieldsShort B;
+  BitFieldsInt C;
+  BitFieldsLong D;
+
+  HasExtIntFirst E;
+  HasExtIntLast F;
+  HasExtIntMiddle G;
+}
+
+void BitfieldAssignment() {
+  // LIN: define void @_Z18BitfieldAssignmentv
+  // WIN: define dso_local void  @"?BitfieldAssignment@@YAXXZ"
+  BitFieldsByte B;
+  B.A = 3;
+  B.B = 2;
+  B.C = 1;
+  // First one is used for the lifetime start, skip that.
+  // CHECK: bitcast %struct.BitFieldsByte*
+  // CHECK: %[[BFType:.+]] = bitcast %struct.BitFieldsByte*
+  // CHECK: %[[LOADA:.+]] = load i8, i8* %[[BFType]]
+  // CHECK: %[[CLEARA:.+]] = and i8 %[[LOADA]], -8
+  // CHECK: %[[SETA:.+]] = or i8 %[[CLEARA]], 3
+  // CHECK: %[[BFType:.+]] = bitcast %struct.BitFieldsByte*
+  // CHECK: %[[LOADB:.+]] = load i8, i8* %[[BFType]]
+  // CHECK: %[[CLEARB:.+]] = and i8 %[[LOADB]], -57
+  // CHECK: %[[SETB:.+]] = or i8 %[[CLEARB]], 16
+  // CHECK: %[[BFType:.+]] = bitcast %struct.BitFieldsByte*
+  // CHECK: %[[LOADC:.+]] = load i8, i8* %[[BFType]]
+  // CHECK: %[[CLEARC:.+]] = and i8 %[[LOADC]], 63
+  // CHECK: %[[SETC:.+]] = or i8 %[[CLEARC]], 64
+}
+
+enum AsEnumUnderlyingType : _ExtInt(9) {
+  A,B,C
+};
+
+void UnderlyingTypeUsage(AsEnumUnderlyingType Param) {
+  // LIN: define void @_Z19UnderlyingTypeUsage20AsEnumUnderlyingType(i9 %
+  // WIN: define dso_local void @"?UnderlyingTypeUsage@@YAXW4AsEnumUnderlyingType@@@Z"(i9 %
+  AsEnumUnderlyingType Var;
+  // CHECK: alloca i9, align 2
+  // CHECK: store i9 %{{.*}}, align 2
+}
+
+unsigned _ExtInt(33) ManglingTestRetParam(unsigned _ExtInt(33) Param) {
+// LIN: define i33 @_Z20ManglingTestRetParamU7_ExtIntILi33EEj(i33 %
+// WIN: define dso_local i33 @"?ManglingTestRetParam@@YAU?$_UExtInt@$0CB@@__clang@@U12@@Z"(i33
+  return 0;
+}
+
+_ExtInt(33) ManglingTestRetParam(_ExtInt(33) Param) {
+// LIN: define i33 @_Z20ManglingTestRetParamU7_ExtIntILi33EEi(i33 %
+// WIN: define dso_local i33 @"?ManglingTestRetParam@@YAU?$_ExtInt@$0CB@@__clang@@U12@@Z"(i33
+  return 0;
+}
+
+template<typename T>
+void ManglingTestTemplateParam(T&);
+template<_ExtInt(99) T>
+void ManglingTestNTTP();
+
+void ManglingInstantiator() {
+  // LIN: define void @_Z20ManglingInstantiatorv()
+  // WIN: define dso_local void @"?ManglingInstantiator@@YAXXZ"()
+  _ExtInt(93) A;
+  ManglingTestTemplateParam(A);
+// LIN: call void @_Z25ManglingTestTemplateParamIU7_ExtIntILi93EEiEvRT_(i93*
+// WIN: call void @"??$ManglingTestTemplateParam@U?$_ExtInt@$0FN@@__clang@@@@YAXAEAU?$_ExtInt@$0FN@@__clang@@@Z"(i93*
+  constexpr _ExtInt(93) B = 993;
+  ManglingTestNTTP<38>();
+// LIN: call void @_Z16ManglingTestNTTPILU7_ExtIntILi99EEi38EEvv()
+// WIN: call void @"??$ManglingTestNTTP@$0CG@@@YAXXZ"()
+  ManglingTestNTTP<B>();
+// LIN: call void @_Z16ManglingTestNTTPILU7_ExtIntILi99EEi993EEvv()
+// WIN: call void @"??$ManglingTestNTTP@$0DOB@@@YAXXZ"()
+}
+
+void TakesVarargs(int i, ...) {
+  // LIN: define void @_Z12TakesVarargsiz(i32 %i, ...)
+  // WIN: define dso_local void @"?TakesVarargs@@YAXHZZ"(i32 %i, ...)
+
+  __builtin_va_list args;
+  // LIN: %[[ARGS:.+]] = alloca [1 x %struct.__va_list_tag]
+  // WIN: %[[ARGS:.+]] = alloca i8*
+  __builtin_va_start(args, i);
+  // LIN: %[[STARTAD:.+]] = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %[[ARGS]]
+  // LIN: %[[STARTAD1:.+]] = bitcast %struct.__va_list_tag* %[[STARTAD]] to i8*
+  // LIN: call void @llvm.va_start(i8* %[[STARTAD1]])
+  // WIN: %[[ARGSLLIFETIMESTART:.+]] = bitcast i8** %[[ARGS]] to i8*
+  // WIN: %[[ARGSSTART:.+]] = bitcast i8** %[[ARGS]] to i8*
+  // WIN: call void @llvm.va_start(i8* %[[ARGSSTART]])
+
+  _ExtInt(92) A = __builtin_va_arg(args, _ExtInt(92));
+  // LIN: %[[AD1:.+]] = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %[[ARGS]]
+  // LIN: %[[OFA_P1:.+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %[[AD1]], i32 0, i32 2
+  // LIN: %[[OFA1:.+]] = load i8*, i8** %[[OFA_P1]]
+  // LIN: %[[BC1:.+]] = bitcast i8* %[[OFA1]] to i92*
+  // LIN: %[[OFANEXT1:.+]] = getelementptr i8, i8* %[[OFA1]], i32 16
+  // LIN: store i8* %[[OFANEXT1]], i8** %[[OFA_P1]]
+  // LIN: %[[LOAD1:.+]] = load i92, i92* %[[BC1]]
+  // LIN: store i92 %[[LOAD1]], i92*
+  // WIN: %[[CUR1:.+]] = load i8*, i8** %[[ARGS]]
+  // WIN: %[[NEXT1:.+]] = getelementptr inbounds i8, i8* %[[CUR1]], i64 16
+  // WIN: store i8* %[[NEXT1]], i8** %[[ARGS]]
+  // WIN: %[[BC1:.+]] = bitcast i8* %[[CUR1]] to i92*
+  // WIN: %[[LOADV1:.+]] = load i92, i92* %[[BC1]]
+  // WIN: store i92 %[[LOADV1]], i92*
+
+  _ExtInt(31) B = __builtin_va_arg(args, _ExtInt(31));
+  // LIN: %[[AD2:.+]] = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %[[ARGS]]
+  // LIN: %[[OFA_P2:.+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %[[AD2]], i32 0, i32 2
+  // LIN: %[[OFA2:.+]] = load i8*, i8** %[[OFA_P2]]
+  // LIN: %[[BC2:.+]] = bitcast i8* %[[OFA2]] to i31*
+  // LIN: %[[OFANEXT2:.+]] = getelementptr i8, i8* %[[OFA2]], i32 8
+  // LIN: store i8* %[[OFANEXT2]], i8** %[[OFA_P2]]
+  // LIN: %[[LOAD2:.+]] = load i31, i31* %[[BC2]]
+  // LIN: store i31 %[[LOAD2]], i31*
+  // WIN: %[[CUR2:.+]] = load i8*, i8** %[[ARGS]]
+  // WIN: %[[NEXT2:.+]] = getelementptr inbounds i8, i8* %[[CUR2]], i64 8 
+  // WIN: store i8* %[[NEXT2]], i8** %[[ARGS]]
+  // WIN: %[[BC2:.+]] = bitcast i8* %[[CUR2]] to i31*
+  // WIN: %[[LOADV2:.+]] = load i31, i31* %[[BC2]]
+  // WIN: store i31 %[[LOADV2]], i31*
+
+  _ExtInt(16) C = __builtin_va_arg(args, _ExtInt(16));
+  // LIN: %[[AD3:.+]] = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %[[ARGS]]
+  // LIN: %[[OFA_P3:.+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %[[AD3]], i32 0, i32 2
+  // LIN: %[[OFA3:.+]] = load i8*, i8** %[[OFA_P3]]
+  // LIN: %[[BC3:.+]] = bitcast i8* %[[OFA3]] to i16*
+  // LIN: %[[OFANEXT3:.+]] = getelementptr i8, i8* %[[OFA3]], i32 8
+  // LIN: store i8* %[[OFANEXT3]], i8** %[[OFA_P3]]
+  // LIN: %[[LOAD3:.+]] = load i16, i16* %[[BC3]]
+  // LIN: store i16 %[[LOAD3]], i16*
+  // WIN: %[[CUR3:.+]] = load i8*, i8** %[[ARGS]]
+  // WIN: %[[NEXT3:.+]] = getelementptr inbounds i8, i8* %[[CUR3]], i64 8
+  // WIN: store i8* %[[NEXT3]], i8** %[[ARGS]]
+  // WIN: %[[BC3:.+]] = bitcast i8* %[[CUR3]] to i16*
+  // WIN: %[[LOADV3:.+]] = load i16, i16* %[[BC3]]
+  // WIN: store i16 %[[LOADV3]], i16*
+
+  _ExtInt(129) D = __builtin_va_arg(args, _ExtInt(129));
+  // LIN: %[[AD4:.+]] = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %[[ARGS]]
+  // LIN: %[[OFA_P4:.+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %[[AD4]], i32 0, i32 2
+  // LIN: %[[OFA4:.+]] = load i8*, i8** %[[OFA_P4]]
+  // LIN: %[[BC4:.+]] = bitcast i8* %[[OFA4]] to i129*
+  // LIN: %[[OFANEXT4:.+]] = getelementptr i8, i8* %[[OFA4]], i32 24
+  // LIN: store i8* %[[OFANEXT4]], i8** %[[OFA_P4]]
+  // LIN: %[[LOAD4:.+]] = load i129, i129* %[[BC4]]
+  // LIN: store i129 %[[LOAD4]], i129*
+  // WIN: %[[CUR4:.+]] = load i8*, i8** %[[ARGS]]
+  // WIN: %[[NEXT4:.+]] = getelementptr inbounds i8, i8* %[[CUR4]], i64 24 
+  // WIN: store i8* %[[NEXT4]], i8** %[[ARGS]]
+  // WIN: %[[BC4:.+]] = bitcast i8* %[[CUR4]] to i129*
+  // WIN: %[[LOADV4:.+]] = load i129, i129* %[[BC4]]
+  // WIN: store i129 %[[LOADV4]], i129*
+
+  _ExtInt(16777200) E = __builtin_va_arg(args, _ExtInt(16777200));
+  // LIN: %[[AD5:.+]] = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %[[ARGS]]
+  // LIN: %[[OFA_P5:.+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %[[AD5]], i32 0, i32 2
+  // LIN: %[[OFA5:.+]] = load i8*, i8** %[[OFA_P5]]
+  // LIN: %[[BC5:.+]] = bitcast i8* %[[OFA5]] to i16777200*
+  // LIN: %[[OFANEXT5:.+]] = getelementptr i8, i8* %[[OFA5]], i32 2097152
+  // LIN: store i8* %[[OFANEXT5]], i8** %[[OFA_P5]]
+  // LIN: %[[LOAD5:.+]] = load i16777200, i16777200* %[[BC5]]
+  // LIN: store i16777200 %[[LOAD5]], i16777200*
+  // WIN: %[[CUR5:.+]] = load i8*, i8** %[[ARGS]]
+  // WIN: %[[NEXT5:.+]] = getelementptr inbounds i8, i8* %[[CUR5]], i64 2097152
+  // WIN: store i8* %[[NEXT5]], i8** %[[ARGS]]
+  // WIN: %[[BC5:.+]] = bitcast i8* %[[CUR5]] to i16777200*
+  // WIN: %[[LOADV5:.+]] = load i16777200, i16777200* %[[BC5]]
+  // WIN: store i16777200 %[[LOADV5]], i16777200*
+
+  __builtin_va_end(args);
+  // LIN: %[[ENDAD:.+]] = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %[[ARGS]]
+  // LIN: %[[ENDAD1:.+]] = bitcast %struct.__va_list_tag* %[[ENDAD]] to i8*
+  // LIN: call void @llvm.va_end(i8* %[[ENDAD1]])
+  // WIN: %[[ARGSEND:.+]] = bitcast i8** %[[ARGS]] to i8*
+  // WIN: call void @llvm.va_end(i8* %[[ARGSEND]])
+}
+void typeid_tests() {
+  // LIN: define void @_Z12typeid_testsv()
+  // WIN: define dso_local void @"?typeid_tests@@YAXXZ"()
+  unsigned _ExtInt(33) U33_1, U33_2;
+  _ExtInt(33) S33_1, S33_2;
+  _ExtInt(32) S32_1, S32_2;
+
+ auto A = typeid(U33_1);
+ // LIN: call void @_ZNSt9type_infoC1ERKS_(%"class.std::type_info"* %{{.+}}, %"class.std::type_info"* dereferenceable(16) bitcast ({ i8*, i8* }* @_ZTIU7_ExtIntILi33EEj to %"class.std::type_info"*))
+ // WIN: call %"class.std::type_info"* @"??0type_info@std@@QEAA@AEBV01@@Z"(%"class.std::type_info"* %{{.+}}, %"class.std::type_info"* dereferenceable(16) bitcast (%rtti.TypeDescriptor28* @"??_R0U?$_UExtInt@$0CB@@__clang@@@8" to %"class.std::type_info"*))
+ auto B = typeid(U33_2);
+ // LIN: call void @_ZNSt9type_infoC1ERKS_(%"class.std::type_info"* %{{.+}}, %"class.std::type_info"* dereferenceable(16) bitcast ({ i8*, i8* }* @_ZTIU7_ExtIntILi33EEj to %"class.std::type_info"*))
+ // WIN:  call %"class.std::type_info"* @"??0type_info@std@@QEAA@AEBV01@@Z"(%"class.std::type_info"* %{{.+}}, %"class.std::type_info"* dereferenceable(16) bitcast (%rtti.TypeDescriptor28* @"??_R0U?$_UExtInt@$0CB@@__clang@@@8" to %"class.std::type_info"*))
+ auto C = typeid(S33_1);
+ // LIN: call void @_ZNSt9type_infoC1ERKS_(%"class.std::type_info"* %{{.+}}, %"class.std::type_info"* dereferenceable(16) bitcast ({ i8*, i8* }* @_ZTIU7_ExtIntILi33EEi to %"class.std::type_info"*))
+ // WIN:  call %"class.std::type_info"* @"??0type_info@std@@QEAA@AEBV01@@Z"(%"class.std::type_info"* %{{.+}}, %"class.std::type_info"* dereferenceable(16) bitcast (%rtti.TypeDescriptor27* @"??_R0U?$_ExtInt@$0CB@@__clang@@@8" to %"class.std::type_info"*))
+ auto D = typeid(S33_2);
+ // LIN: call void @_ZNSt9type_infoC1ERKS_(%"class.std::type_info"* %{{.+}}, %"class.std::type_info"* dereferenceable(16) bitcast ({ i8*, i8* }* @_ZTIU7_ExtIntILi33EEi to %"class.std::type_info"*))
+ // WIN:  call %"class.std::type_info"* @"??0type_info@std@@QEAA@AEBV01@@Z"(%"class.std::type_info"* %{{.+}}, %"class.std::type_info"* dereferenceable(16) bitcast (%rtti.TypeDescriptor27* @"??_R0U?$_ExtInt@$0CB@@__clang@@@8" to %"class.std::type_info"*))
+ auto E = typeid(S32_1);
+ // LIN: call void @_ZNSt9type_infoC1ERKS_(%"class.std::type_info"* %{{.+}}, %"class.std::type_info"* dereferenceable(16) bitcast ({ i8*, i8* }* @_ZTIU7_ExtIntILi32EEi to %"class.std::type_info"*))
+ // WIN:  call %"class.std::type_info"* @"??0type_info@std@@QEAA@AEBV01@@Z"(%"class.std::type_info"* %{{.+}}, %"class.std::type_info"* dereferenceable(16) bitcast (%rtti.TypeDescriptor27* @"??_R0U?$_ExtInt@$0CA@@__clang@@@8" to %"class.std::type_info"*))
+ auto F = typeid(S32_2);
+ // LIN: call void @_ZNSt9type_infoC1ERKS_(%"class.std::type_info"* %{{.+}}, %"class.std::type_info"* dereferenceable(16) bitcast ({ i8*, i8* }* @_ZTIU7_ExtIntILi32EEi to %"class.std::type_info"*))
+ // WIN:  call %"class.std::type_info"* @"??0type_info@std@@QEAA@AEBV01@@Z"(%"class.std::type_info"* %{{.+}}, %"class.std::type_info"* dereferenceable(16) bitcast (%rtti.TypeDescriptor27* @"??_R0U?$_ExtInt@$0CA@@__clang@@@8" to %"class.std::type_info"*))
+}
+
+void ExplicitCasts() {
+  // LIN: define void @_Z13ExplicitCastsv() 
+  // WIN: define dso_local void @"?ExplicitCasts@@YAXXZ"()
+
+  _ExtInt(33) a;
+  _ExtInt(31) b;
+  int i;
+
+  a = i;
+  // CHECK: %[[CONV:.+]] = sext i32 %{{.+}} to i33
+  b = i;
+  // CHECK: %[[CONV:.+]] = trunc i32 %{{.+}} to i31
+  i = a;
+  // CHECK: %[[CONV:.+]] = trunc i33 %{{.+}} to i32
+  i = b;
+  // CHECK: %[[CONV:.+]] = sext i31 %{{.+}} to i32
+}
+
+struct S {
+  _ExtInt(17) A;
+  _ExtInt(16777200) B;
+  _ExtInt(17) C;
+};
+
+void OffsetOfTest() {
+  // LIN: define void @_Z12OffsetOfTestv() 
+  // WIN: define dso_local void @"?OffsetOfTest@@YAXXZ"()
+
+  auto A = __builtin_offsetof(S,A);
+  // CHECK: store i64 0, i64* %{{.+}}
+  auto B = __builtin_offsetof(S,B);
+  // CHECK: store i64 8, i64* %{{.+}}
+  auto C = __builtin_offsetof(S,C);
+  // CHECK: store i64 2097160, i64* %{{.+}}
+}
+
+
+void ShiftExtIntByConstant(_ExtInt(28) Ext) {
+// LIN: define void @_Z21ShiftExtIntByConstantU7_ExtIntILi28EEi
+// WIN: define dso_local void @"?ShiftExtIntByConstant@@YAXU?$_ExtInt@$0BM@@__clang@@@Z"
+  Ext << 7;
+  // CHECK: shl i28 %{{.+}}, 7
+  Ext >> 7;
+  // CHECK: ashr i28 %{{.+}}, 7
+  Ext << -7;
+  // CHECK: shl i28 %{{.+}}, -7
+  Ext >> -7;
+  // CHECK: ashr i28 %{{.+}}, -7
+
+  // UB in C/C++, Defined in OpenCL.
+  Ext << 29;
+  // CHECK: shl i28 %{{.+}}, 29 
+  Ext >> 29;
+  // CHECK: ashr i28 %{{.+}}, 29
+}
+
+void ConstantShiftByExtInt(_ExtInt(28) Ext, _ExtInt(65) LargeExt) {
+  // LIN: define void @_Z21ConstantShiftByExtIntU7_ExtIntILi28EEiU7_ExtIntILi65EEi
+  // WIN: define dso_local void @"?ConstantShiftByExtInt@@YAXU?$_ExtInt@$0BM@@__clang@@U?$_ExtInt@$0EB@@2@@Z"
+  10 << Ext;
+  // CHECK: %[[PROMO:.+]] = zext i28 %{{.+}} to i32
+  // CHECK: shl i32 10, %[[PROMO]]
+  10 >> Ext;
+  // CHECK: %[[PROMO:.+]] = zext i28 %{{.+}} to i32
+  // CHECK: ashr i32 10, %[[PROMO]]
+  10 << LargeExt;
+  // CHECK: %[[PROMO:.+]] = trunc i65 %{{.+}} to i32
+  // CHECK: shl i32 10, %[[PROMO]]
+  10 >> LargeExt;
+  // CHECK: %[[PROMO:.+]] = trunc i65 %{{.+}} to i32
+  // CHECK: ashr i32 10, %[[PROMO]]
+}
+
+void Shift(_ExtInt(28) Ext, _ExtInt(65) LargeExt, int i) {
+  // LIN: define void @_Z5ShiftU7_ExtIntILi28EEiU7_ExtIntILi65EEii
+  // WIN: define dso_local void @"?Shift@@YAXU?$_ExtInt@$0BM@@__clang@@U?$_ExtInt@$0EB@@2@H@Z"
+  i << Ext;
+  // CHECK: %[[PROMO:.+]] = zext i28 %{{.+}} to i32
+  // CHECK: shl i32 {{.+}}, %[[PROMO]]
+  i >> Ext;
+  // CHECK: %[[PROMO:.+]] = zext i28 %{{.+}} to i32
+  // CHECK: ashr i32 {{.+}}, %[[PROMO]]
+
+  i << LargeExt;
+  // CHECK: %[[PROMO:.+]] = trunc i65 %{{.+}} to i32
+  // CHECK: shl i32 {{.+}}, %[[PROMO]]
+  i >> LargeExt;
+  // CHECK: %[[PROMO:.+]] = trunc i65 %{{.+}} to i32
+  // CHECK: ashr i32 {{.+}}, %[[PROMO]]
+
+  Ext << i;
+  // CHECK: %[[PROMO:.+]] = trunc i32 %{{.+}} to i28
+  // CHECK: shl i28 {{.+}}, %[[PROMO]]
+  Ext >> i;
+  // CHECK: %[[PROMO:.+]] = trunc i32 %{{.+}} to i28
+  // CHECK: ashr i28 {{.+}}, %[[PROMO]]
+
+  LargeExt << i;
+  // CHECK: %[[PROMO:.+]] = zext i32 %{{.+}} to i65
+  // CHECK: shl i65 {{.+}}, %[[PROMO]]
+  LargeExt >> i;
+  // CHECK: %[[PROMO:.+]] = zext i32 %{{.+}} to i65
+  // CHECK: ashr i65 {{.+}}, %[[PROMO]]
+
+  Ext << LargeExt;
+  // CHECK: %[[PROMO:.+]] = trunc i65 %{{.+}} to i28
+  // CHECK: shl i28 {{.+}}, %[[PROMO]]
+  Ext >> LargeExt;
+  // CHECK: %[[PROMO:.+]] = trunc i65 %{{.+}} to i28
+  // CHECK: ashr i28 {{.+}}, %[[PROMO]]
+
+  LargeExt << Ext;
+  // CHECK: %[[PROMO:.+]] = zext i28 %{{.+}} to i65
+  // CHECK: shl i65 {{.+}}, %[[PROMO]]
+  LargeExt >> Ext;
+  // CHECK: %[[PROMO:.+]] = zext i28 %{{.+}} to i65
+  // CHECK: ashr i65 {{.+}}, %[[PROMO]]
+}
+
+void ComplexTest(_Complex _ExtInt(12) first,
+                                 _Complex _ExtInt(33) second) {
+  // LIN: define void @_Z11ComplexTestCU7_ExtIntILi12EEiCU7_ExtIntILi33EEi
+  // WIN: define dso_local void  @"?ComplexTest@@YAXU?$_Complex@U?$_ExtInt@$0M@@__clang@@@__clang@@U?$_Complex@U?$_ExtInt@$0CB@@__clang@@@2@@Z"
+  first + second;
+  // CHECK: %[[FIRST_REALP:.+]] = getelementptr inbounds { i12, i12 }, { i12, i12 }* %{{.+}}, i32 0, i32 0
+  // CHECK: %[[FIRST_REAL:.+]] = load i12, i12* %[[FIRST_REALP]]
+  // CHECK: %[[FIRST_IMAGP:.+]] = getelementptr inbounds { i12, i12 }, { i12, i12 }* %{{.+}}, i32 0, i32 1
+  // CHECK: %[[FIRST_IMAG:.+]] = load i12, i12* %[[FIRST_IMAGP]]
+  // CHECK: %[[FIRST_REAL_CONV:.+]] = sext i12 %[[FIRST_REAL]]
+  // CHECK: %[[FIRST_IMAG_CONV:.+]] = sext i12 %[[FIRST_IMAG]]
+  // CHECK: %[[SECOND_REALP:.+]] = getelementptr inbounds { i33, i33 }, { i33, i33 }* %{{.+}}, i32 0, i32 0
+  // CHECK: %[[SECOND_REAL:.+]] = load i33, i33* %[[SECOND_REALP]]
+  // CHECK: %[[SECOND_IMAGP:.+]] = getelementptr inbounds { i33, i33 }, { i33, i33 }* %{{.+}}, i32 0, i32 1
+  // CHECK: %[[SECOND_IMAG:.+]] = load i33, i33* %[[SECOND_IMAGP]]
+  // CHECK: %[[REAL:.+]] = add i33 %[[FIRST_REAL_CONV]], %[[SECOND_REAL]]
+  // CHECK: %[[IMAG:.+]] = add i33 %[[FIRST_IMAG_CONV]], %[[SECOND_IMAG]]
+}
+
+// Ensure that these types don't alias the normal int types.
+void TBAATest(_ExtInt(sizeof(int) * 8) ExtInt,
+              unsigned _ExtInt(sizeof(int) * 8) ExtUInt,
+              _ExtInt(6) Other) {
+  // CHECK-DAG: store i32 %{{.+}}, i32* %{{.+}}, align 4, !tbaa ![[EXTINT_TBAA:.+]]
+  // CHECK-DAG: store i32 %{{.+}}, i32* %{{.+}}, align 4, !tbaa ![[EXTINT_TBAA]]
+  // CHECK-DAG: store i6 %{{.+}}, i6* %{{.+}}, align 1, !tbaa ![[EXTINT6_TBAA:.+]]
+  ExtInt = 5;
+  ExtUInt = 5;
+  Other = 5;
+}
+
+// NoNewStructPathTBAA-DAG: ![[CHAR_TBAA_ROOT:.+]] = !{!"omnipotent char", ![[TBAA_ROOT:.+]], i64 0}
+// NoNewStructPathTBAA-DAG: ![[TBAA_ROOT]] = !{!"Simple C++ TBAA"}
+// NoNewStructPathTBAA-DAG: ![[EXTINT_TBAA]] = !{![[EXTINT_TBAA_ROOT:.+]], ![[EXTINT_TBAA_ROOT]], i64 0}
+// NoNewStructPathTBAA-DAG: ![[EXTINT_TBAA_ROOT]] = !{!"_ExtInt(32)", ![[CHAR_TBAA_ROOT]], i64 0}
+// NoNewStructPathTBAA-DAG: ![[EXTINT6_TBAA]] = !{![[EXTINT6_TBAA_ROOT:.+]], ![[EXTINT6_TBAA_ROOT]], i64 0}
+// NoNewStructPathTBAA-DAG: ![[EXTINT6_TBAA_ROOT]] = !{!"_ExtInt(6)", ![[CHAR_TBAA_ROOT]], i64 0}
+
+// NewStructPathTBAA-DAG: ![[CHAR_TBAA_ROOT:.+]] = !{![[TBAA_ROOT:.+]], i64 1, !"omnipotent char"}
+// NewStructPathTBAA-DAG: ![[TBAA_ROOT]] = !{!"Simple C++ TBAA"}
+// NewStructPathTBAA-DAG: ![[EXTINT_TBAA]] = !{![[EXTINT_TBAA_ROOT:.+]], ![[EXTINT_TBAA_ROOT]], i64 0, i64 4}
+// NewStructPathTBAA-DAG: ![[EXTINT_TBAA_ROOT]] = !{![[CHAR_TBAA_ROOT]], i64 4, !"_ExtInt(32)"}
+// NewStructPathTBAA-DAG: ![[EXTINT6_TBAA]] = !{![[EXTINT6_TBAA_ROOT:.+]], ![[EXTINT6_TBAA_ROOT]], i64 0, i64 1}
+// NewStructPathTBAA-DAG: ![[EXTINT6_TBAA_ROOT]] = !{![[CHAR_TBAA_ROOT]], i64 1, !"_ExtInt(6)"}
diff --git a/clang/test/CodeGenOpenCL/ext-int-shift.cl b/clang/test/CodeGenOpenCL/ext-int-shift.cl
new file mode 100644
index 000000000000..4d2292daac77
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/ext-int-shift.cl
@@ -0,0 +1,21 @@
+// RUN: %clang -cc1 -triple x86_64-linux-pc -O3 -disable-llvm-passes %s -emit-llvm -o - | FileCheck %s
+
+void Shifts(_ExtInt(12) E, int i) {
+  E << 99;
+  // CHECK: shl i12 %{{.+}}, 3
+
+  77 << E;
+  // CHECK: %[[PROM:.+]] = zext i12 %{{.+}} to i32
+  // CHECK: %[[MASK:.+]] = and i32 %[[PROM]], 31
+  // CHECK: shl i32 77, %[[MASK]]
+
+  E << i;
+  // CHECK: %[[PROM:.+]] = trunc i32 %{{.+}} to i12
+  // CHECK: %[[MASK:.+]] = urem i12 %[[PROM]], 12
+  // CHECK: shl i12 %{{.+}}, %[[MASK]]
+
+  i << E;
+  // CHECK: %[[PROM:.+]] = zext i12 %{{.+}} to i32
+  // CHECK: %[[MASK:.+]] = and i32 %[[PROM]], 31
+  // CHECK: shl i32 %{{.+}}, %[[MASK]]
+}
diff --git a/clang/test/Parser/ext-int.cpp b/clang/test/Parser/ext-int.cpp
new file mode 100644
index 000000000000..4926d5190587
--- /dev/null
+++ b/clang/test/Parser/ext-int.cpp
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+
+// expected-error@+5{{expected ')'}}
+// expected-note@+4{{to match this '('}}
+// expected-error@+3{{expected unqualified-id}}
+// expected-error@+2{{extraneous closing brace}}
+// expected-error@+1{{C++ requires a type specifier for all declarations}}
+_ExtInt(32} a;
+// expected-error@+2{{expected expression}}
+// expected-error@+1{{C++ requires a type specifier for all declarations}}
+_ExtInt(32* ) b;
+// expected-error@+3{{expected '('}}
+// expected-error@+2{{expected unqualified-id}}
+// expected-error@+1{{C++ requires a type specifier for all declarations}}
+_ExtInt{32} c;
diff --git a/clang/test/SemaCXX/ext-int.cpp b/clang/test/SemaCXX/ext-int.cpp
new file mode 100644
index 000000000000..6a06280dceec
--- /dev/null
+++ b/clang/test/SemaCXX/ext-int.cpp
@@ -0,0 +1,278 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s -Wimplicit-int-conversion
+
+template<int Bounds>
+struct HasExtInt {
+  _ExtInt(Bounds) b;
+  unsigned _ExtInt(Bounds) b2;
+};
+
+// Delcaring variables:
+_ExtInt(33) Declarations(_ExtInt(48) &Param) { // Useable in params and returns.
+  short _ExtInt(43) a; // expected-error {{'short _ExtInt' is invalid}}
+  _ExtInt(43) long b;  // expected-error {{'long _ExtInt' is invalid}}
+
+  // These should all be fine:
+  const _ExtInt(5) c = 3;
+  const unsigned _ExtInt(5) d; // expected-error {{default initialization of an object of const type 'const unsigned _ExtInt(5)'}}
+  unsigned _ExtInt(5) e = 5;
+  _ExtInt(5) unsigned f;
+
+  _ExtInt(-3) g; // expected-error{{signed _ExtInt must have a bit size of at least 2}}
+  _ExtInt(0) h; // expected-error{{signed _ExtInt must have a bit size of at least 2}}
+  _ExtInt(1) i; // expected-error{{signed _ExtInt must have a bit size of at least 2}}
+  _ExtInt(2) j;;
+  unsigned _ExtInt(0) k;// expected-error{{unsigned _ExtInt must have a bit size of at least 1}}
+  unsigned _ExtInt(1) l;
+  signed _ExtInt(1) m; // expected-error{{signed _ExtInt must have a bit size of at least 2}}
+
+  constexpr _ExtInt(6) n = 33; // expected-warning{{implicit conversion from 'int' to 'const _ExtInt(6)' changes value from 33 to -31}}
+  constexpr _ExtInt(7) o = 33;
+
+  // Check LLVM imposed max size.
+  _ExtInt(0xFFFFFFFFFF) p; // expected-error {{signed _ExtInt of bit sizes greater than 16777215 not supported}}
+  unsigned _ExtInt(0xFFFFFFFFFF) q; // expected-error {{unsigned _ExtInt of bit sizes greater than 16777215 not supported}}
+
+// Ensure template params are instantiated correctly.
+  // expected-error@5{{signed _ExtInt must have a bit size of at least 2}}
+  // expected-error@6{{unsigned _ExtInt must have a bit size of at least 1}}
+  // expected-note@+1{{in instantiation of template class }}
+  HasExtInt<-1> r;
+  // expected-error@5{{signed _ExtInt must have a bit size of at least 2}}
+  // expected-error@6{{unsigned _ExtInt must have a bit size of at least 1}}
+  // expected-note@+1{{in instantiation of template class }}
+  HasExtInt<0> s;
+  // expected-error@5{{signed _ExtInt must have a bit size of at least 2}}
+  // expected-note@+1{{in instantiation of template class }}
+  HasExtInt<1> t;
+  HasExtInt<2> u;
+
+  _ExtInt(-3.0) v; // expected-error {{integral constant expression must have integral or unscoped enumeration type, not 'double'}}
+  _ExtInt(3.0) x; // expected-error {{integral constant expression must have integral or unscoped enumeration type, not 'double'}}
+
+  return 0;
+}
+
+template <_ExtInt(5) I>
+struct ExtIntTemplParam {
+  static constexpr _ExtInt(5) Var = I;
+};
+
+template<typename T>
+void deduced_whole_type(T){}
+template<int I>
+void deduced_bound(_ExtInt(I)){}
+
+// Ensure ext-int can be used in template places.
+void Templates() {
+  ExtIntTemplParam<13> a;
+  constexpr _ExtInt(3) b = 1;
+  ExtIntTemplParam<b> c;
+  constexpr _ExtInt(9) d = 1;
+  ExtIntTemplParam<b> e;
+
+  deduced_whole_type(b);
+  deduced_bound(b);
+}
+
+template <typename T, typename U>
+struct is_same {
+  static constexpr bool value = false;
+};
+template <typename T>
+struct is_same<T,T> {
+  static constexpr bool value = true;
+};
+
+// Reject vector types:
+// expected-error@+1{{invalid vector element type '_ExtInt(32)'}}
+typedef _ExtInt(32) __attribute__((vector_size(16))) VecTy;
+
+// Allow _Complex:
+_Complex _ExtInt(3) Cmplx;
+
+// Reject cases of _Atomic:
+// expected-error@+1{{_Atomic cannot be applied to integer type '_ExtInt(4)' with less than 1 byte of precision}}
+_Atomic _ExtInt(4) TooSmallAtomic;
+// expected-error@+1{{_Atomic cannot be applied to integer type '_ExtInt(9)' with a non power of 2 precision}}
+_Atomic _ExtInt(9) NotPow2Atomic;
+_Atomic _ExtInt(128) JustRightAtomic;
+
+// Test result types of Unary/Bitwise/Binary Operations:
+void Ops() {
+  _ExtInt(43) x43_s = 1, y43_s = 1;
+  _ExtInt(sizeof(int) * 8) x32_s = 1, y32_s = 1;
+  unsigned _ExtInt(sizeof(unsigned) * 8) x32_u = 1, y32_u = 1;
+  _ExtInt(4) x4_s = 1, y4_s = 1;
+  unsigned _ExtInt(43) x43_u = 1, y43_u = 1;
+  unsigned _ExtInt(4) x4_u = 1, y4_u = 1;
+  int x_int = 1, y_int = 1;
+  unsigned x_uint = 1, y_uint = 1;
+  bool b;
+
+  // Disabling mixed conversions:
+  // Signed/unsigned mixed.
+  // expected-error@+1{{invalid operands to binary expression}}
+  x43_u + y43_s;
+  // expected-error@+1{{invalid operands to binary expression}}
+  x4_s - y4_u;
+  // expected-error@+1{{invalid operands to binary expression}}
+  x43_s * y43_u;
+  // expected-error@+1{{invalid operands to binary expression}}
+  x4_u / y4_s;
+
+  // Different Sizes.
+  // expected-error@+1{{invalid operands to binary expression}}
+  x43_s + y4_s;
+  // expected-error@+1{{invalid operands to binary expression}}
+  x43_s - y4_u;
+  // expected-error@+1{{invalid operands to binary expression}}
+  x43_u * y4_u;
+  // expected-error@+1{{invalid operands to binary expression}}
+  x4_u / y43_u;
+
+  // Mixed with standard types.
+  // expected-error@+1{{invalid operands to binary expression}}
+  x43_s + x_int;
+  // expected-error@+1{{invalid operands to binary expression}}
+  x43_u - x_int;
+  // expected-error@+1{{invalid operands to binary expression}}
+  x32_s * x_int;
+  // expected-error@+1{{invalid operands to binary expression}}
+  x32_u / x_int;
+  // expected-error@+1{{invalid operands to binary expression}}
+  x32_s * x_uint;
+  // expected-error@+1{{invalid operands to binary expression}}
+  x32_u / x_uint;
+  // expected-error@+1{{invalid operands to binary expression}}
+  x4_s + x_int;
+  // expected-error@+1{{invalid operands to binary expression}}
+  x4_u - x_int;
+  // expected-error@+1{{invalid operands to binary expression}}
+  x4_s + b;
+  // expected-error@+1{{invalid operands to binary expression}}
+  x4_u - b;
+  // expected-error@+1{{invalid operands to binary expression}}
+  x43_s + b;
+  // expected-error@+1{{invalid operands to binary expression}}
+  x43_u - b;
+
+  // Bitwise checks.
+  // expected-error@+1{{invalid operands to binary expression}}
+  x43_s % y4_u;
+  // expected-error@+1{{invalid operands to binary expression}}
+  x43_u % y4_s;
+  // expected-error@+1{{invalid operands to binary expression}}
+  x4_s | y43_u;
+  // expected-error@+1{{invalid operands to binary expression}}
+  x4_u | y43_s;
+
+  // compassign.
+  // expected-error@+1{{invalid operands to binary expression}}
+  x43_s += 33;
+
+  // Comparisons.
+  // expected-error@+1{{invalid operands to binary expression}}
+  x43_s > 33;
+  // expected-error@+1{{invalid operands to binary expression}}
+  x4_s > 33;
+
+  // Same size/sign ops don't change type.
+  static_assert(is_same<decltype(x43_s + y43_s), _ExtInt(43)>::value,"");
+  static_assert(is_same<decltype(x4_s - y4_s), _ExtInt(4)>::value,"");
+  static_assert(is_same<decltype(x43_u * y43_u), unsigned _ExtInt(43)>::value,"");
+  static_assert(is_same<decltype(x4_u / y4_u), unsigned _ExtInt(4)>::value,"");
+
+  // Unary ops shouldn't go through integer promotions.
+  static_assert(is_same<decltype(~x43_s), _ExtInt(43)>::value,"");
+  static_assert(is_same<decltype(~x4_s), _ExtInt(4)>::value,"");
+  static_assert(is_same<decltype(+x43_s), _ExtInt(43)>::value,"");
+  static_assert(is_same<decltype(+x4_s), _ExtInt(4)>::value,"");
+  static_assert(is_same<decltype(-x43_u), unsigned _ExtInt(43)>::value,"");
+  static_assert(is_same<decltype(-x4_u), unsigned _ExtInt(4)>::value,"");
+  // expected-warning@+1{{expression with side effects has no effect in an unevaluated context}}
+  static_assert(is_same<decltype(++x43_s), _ExtInt(43)&>::value,"");
+  // expected-warning@+1{{expression with side effects has no effect in an unevaluated context}}
+  static_assert(is_same<decltype(--x4_s), _ExtInt(4)&>::value,"");
+  // expected-warning@+1{{expression with side effects has no effect in an unevaluated context}}
+  static_assert(is_same<decltype(x43_s--), _ExtInt(43)>::value,"");
+  // expected-warning@+1{{expression with side effects has no effect in an unevaluated context}}
+  static_assert(is_same<decltype(x4_s++), _ExtInt(4)>::value,"");
+  static_assert(is_same<decltype(x4_s >> 1), _ExtInt(4)>::value,"");
+  static_assert(is_same<decltype(x4_u << 1), unsigned _ExtInt(4)>::value,"");
+
+  static_assert(sizeof(x43_s) == 8, "");
+  static_assert(sizeof(x4_s) == 1, "");
+
+  static_assert(sizeof(_ExtInt(3340)) == 424, ""); // 424 * 8 == 3392.
+  static_assert(sizeof(_ExtInt(1049)) == 136, ""); // 136  *  8 == 1088.
+
+  static_assert(alignof(decltype(x43_s)) == 8, "");
+  static_assert(alignof(decltype(x4_s)) == 1, "");
+
+  static_assert(alignof(_ExtInt(3340)) == 8, "");
+  static_assert(alignof(_ExtInt(1049)) == 8, "");
+}
+
+constexpr int func() { return 42;}
+
+void ConstexprBitsize() {
+  _ExtInt(func()) F;
+  static_assert(is_same<decltype(F), _ExtInt(42)>::value, "");
+}
+
+// Useable as an underlying type.
+enum AsEnumUnderlyingType : _ExtInt(33) {
+};
+
+void overloaded(int);
+void overloaded(_ExtInt(32));
+void overloaded(_ExtInt(33));
+void overloaded(short);
+//expected-note@+1{{candidate function}}
+void overloaded2(_ExtInt(32));
+//expected-note@+1{{candidate function}}
+void overloaded2(_ExtInt(33));
+//expected-note@+1{{candidate function}}
+void overloaded2(short);
+
+void overload_use() {
+  int i;
+  _ExtInt(32) i32;
+  _ExtInt(33) i33;
+  short s;
+
+  // All of these get their corresponding exact matches.
+  overloaded(i);
+  overloaded(i32);
+  overloaded(i33);
+  overloaded(s);
+
+  overloaded2(i); // expected-error{{call to 'overloaded2' is ambiguous}}
+
+  overloaded2(i32);
+
+  overloaded2(s);
+}
+
+// no errors expected, this should 'just work'.
+struct UsedAsBitField {
+  _ExtInt(3) F : 3;
+  _ExtInt(3) G : 3;
+  _ExtInt(3) H : 3;
+};
+
+// expected-error@+1{{mode attribute only supported for integer and floating-point types}}
+typedef _ExtInt(33) IllegalMode __attribute__((mode(DI)));
+
+void ImplicitCasts(_ExtInt(31) s31, _ExtInt(33) s33, int i) {
+  // expected-warning@+1{{implicit conversion loses integer precision}}
+  s31 = i;
+  // expected-warning@+1{{implicit conversion loses integer precision}}
+  s31 = s33;
+  s33 = i;
+  s33 = s31;
+  i = s31;
+  // expected-warning@+1{{implicit conversion loses integer precision}}
+  i = s33;
+}
+
diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp
index fc9d8db62b2d..dafe4ccda05f 100644
--- a/clang/tools/libclang/CIndex.cpp
+++ b/clang/tools/libclang/CIndex.cpp
@@ -1793,6 +1793,8 @@ DEFAULT_TYPELOC_IMPL(Enum, TagType)
 DEFAULT_TYPELOC_IMPL(SubstTemplateTypeParm, Type)
 DEFAULT_TYPELOC_IMPL(SubstTemplateTypeParmPack, Type)
 DEFAULT_TYPELOC_IMPL(Auto, Type)
+DEFAULT_TYPELOC_IMPL(ExtInt, Type)
+DEFAULT_TYPELOC_IMPL(DependentExtInt, Type)
 
 bool CursorVisitor::VisitCXXRecordDecl(CXXRecordDecl *D) {
   // Visit the nested-name-specifier, if present.

From 96b61571d03aa20183a7f3e7f6fcc9692e7a2a7d Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad@amd.com>
Date: Thu, 9 Apr 2020 12:01:05 +0100
Subject: [PATCH 169/216] [AMDGPU] New helper functions to get a register class
 of a given width

Summary:
Introduce new helper functions getVGPRClassForBitWidth,
getAGPRClassForBitWidth, getSGPRClassForBitWidth and use them to
refactor various other functions that all contained their own lists of
valid register class widths. NFC.

Reviewers: arsenm, rampitec

Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, kerbowa, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D78311
---
 llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp | 317 +++++++++-------------
 llvm/lib/Target/AMDGPU/SIRegisterInfo.h   |  12 +-
 2 files changed, 127 insertions(+), 202 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 4ff5960acf03..a0e4c1c84407 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1274,6 +1274,75 @@ StringRef SIRegisterInfo::getRegAsmName(MCRegister Reg) const {
   return AMDGPUInstPrinter::getRegisterName(Reg);
 }
 
+static const TargetRegisterClass *getVGPRClassForBitWidth(unsigned BitWidth) {
+  switch (BitWidth) {
+  case 1:
+    return &AMDGPU::VReg_1RegClass;
+  case 16:
+    return &AMDGPU::VGPR_LO16RegClass;
+  case 32:
+    return &AMDGPU::VGPR_32RegClass;
+  case 64:
+    return &AMDGPU::VReg_64RegClass;
+  case 96:
+    return &AMDGPU::VReg_96RegClass;
+  case 128:
+    return &AMDGPU::VReg_128RegClass;
+  case 160:
+    return &AMDGPU::VReg_160RegClass;
+  case 256:
+    return &AMDGPU::VReg_256RegClass;
+  case 512:
+    return &AMDGPU::VReg_512RegClass;
+  case 1024:
+    return &AMDGPU::VReg_1024RegClass;
+  default:
+    return nullptr;
+  }
+}
+
+static const TargetRegisterClass *getAGPRClassForBitWidth(unsigned BitWidth) {
+  switch (BitWidth) {
+  case 32:
+    return &AMDGPU::AGPR_32RegClass;
+  case 64:
+    return &AMDGPU::AReg_64RegClass;
+  case 128:
+    return &AMDGPU::AReg_128RegClass;
+  case 512:
+    return &AMDGPU::AReg_512RegClass;
+  case 1024:
+    return &AMDGPU::AReg_1024RegClass;
+  default:
+    return nullptr;
+  }
+}
+
+static const TargetRegisterClass *getSGPRClassForBitWidth(unsigned BitWidth) {
+  switch (BitWidth) {
+  case 16:
+    return &AMDGPU::SGPR_LO16RegClass;
+  case 32:
+    return &AMDGPU::SReg_32RegClass;
+  case 64:
+    return &AMDGPU::SReg_64RegClass;
+  case 96:
+    return &AMDGPU::SReg_96RegClass;
+  case 128:
+    return &AMDGPU::SReg_128RegClass;
+  case 160:
+    return &AMDGPU::SReg_160RegClass;
+  case 256:
+    return &AMDGPU::SReg_256RegClass;
+  case 512:
+    return &AMDGPU::SReg_512RegClass;
+  case 1024:
+    return &AMDGPU::SReg_1024RegClass;
+  default:
+    return nullptr;
+  }
+}
+
 // FIXME: This is very slow. It might be worth creating a map from physreg to
 // register class.
 const TargetRegisterClass *
@@ -1320,129 +1389,56 @@ SIRegisterInfo::getPhysRegClass(MCRegister Reg) const {
 // TargetRegisterClass to mark which classes are VGPRs to make this trivial.
 bool SIRegisterInfo::hasVGPRs(const TargetRegisterClass *RC) const {
   unsigned Size = getRegSizeInBits(*RC);
-  switch (Size) {
-  case 16:
+  if (Size == 16) {
     return getCommonSubClass(&AMDGPU::VGPR_LO16RegClass, RC) != nullptr ||
            getCommonSubClass(&AMDGPU::VGPR_HI16RegClass, RC) != nullptr;
-  case 32:
-    return getCommonSubClass(&AMDGPU::VGPR_32RegClass, RC) != nullptr;
-  case 64:
-    return getCommonSubClass(&AMDGPU::VReg_64RegClass, RC) != nullptr;
-  case 96:
-    return getCommonSubClass(&AMDGPU::VReg_96RegClass, RC) != nullptr;
-  case 128:
-    return getCommonSubClass(&AMDGPU::VReg_128RegClass, RC) != nullptr;
-  case 160:
-    return getCommonSubClass(&AMDGPU::VReg_160RegClass, RC) != nullptr;
-  case 256:
-    return getCommonSubClass(&AMDGPU::VReg_256RegClass, RC) != nullptr;
-  case 512:
-    return getCommonSubClass(&AMDGPU::VReg_512RegClass, RC) != nullptr;
-  case 1024:
-    return getCommonSubClass(&AMDGPU::VReg_1024RegClass, RC) != nullptr;
-  case 1:
-    return getCommonSubClass(&AMDGPU::VReg_1RegClass, RC) != nullptr;
-  default:
+  }
+  const TargetRegisterClass *VRC = getVGPRClassForBitWidth(Size);
+  if (!VRC) {
     assert(Size < 32 && "Invalid register class size");
     return false;
   }
+  return getCommonSubClass(VRC, RC) != nullptr;
 }
 
 bool SIRegisterInfo::hasAGPRs(const TargetRegisterClass *RC) const {
   unsigned Size = getRegSizeInBits(*RC);
   if (Size < 32)
     return false;
-  switch (Size) {
-  case 32:
-    return getCommonSubClass(&AMDGPU::AGPR_32RegClass, RC) != nullptr;
-  case 64:
-    return getCommonSubClass(&AMDGPU::AReg_64RegClass, RC) != nullptr;
-  case 96:
+  const TargetRegisterClass *ARC = getAGPRClassForBitWidth(Size);
+  if (!ARC) {
+    assert(getVGPRClassForBitWidth(Size) && "Invalid register class size");
     return false;
-  case 128:
-    return getCommonSubClass(&AMDGPU::AReg_128RegClass, RC) != nullptr;
-  case 160:
-  case 256:
-    return false;
-  case 512:
-    return getCommonSubClass(&AMDGPU::AReg_512RegClass, RC) != nullptr;
-  case 1024:
-    return getCommonSubClass(&AMDGPU::AReg_1024RegClass, RC) != nullptr;
-  default:
-    llvm_unreachable("Invalid register class size");
   }
+  return getCommonSubClass(ARC, RC) != nullptr;
 }
 
-const TargetRegisterClass *SIRegisterInfo::getEquivalentVGPRClass(
-                                         const TargetRegisterClass *SRC) const {
-  switch (getRegSizeInBits(*SRC)) {
-  case 16:
-    return &AMDGPU::VGPR_LO16RegClass;
-  case 32:
-    return &AMDGPU::VGPR_32RegClass;
-  case 64:
-    return &AMDGPU::VReg_64RegClass;
-  case 96:
-    return &AMDGPU::VReg_96RegClass;
-  case 128:
-    return &AMDGPU::VReg_128RegClass;
-  case 160:
-    return &AMDGPU::VReg_160RegClass;
-  case 256:
-    return &AMDGPU::VReg_256RegClass;
-  case 512:
-    return &AMDGPU::VReg_512RegClass;
-  case 1024:
-    return &AMDGPU::VReg_1024RegClass;
-  case 1:
-    return &AMDGPU::VReg_1RegClass;
-  default:
-    llvm_unreachable("Invalid register class size");
-  }
+const TargetRegisterClass *
+SIRegisterInfo::getEquivalentVGPRClass(const TargetRegisterClass *SRC) const {
+  unsigned Size = getRegSizeInBits(*SRC);
+  const TargetRegisterClass *VRC = getVGPRClassForBitWidth(Size);
+  assert(VRC && "Invalid register class size");
+  return VRC;
 }
 
-const TargetRegisterClass *SIRegisterInfo::getEquivalentAGPRClass(
-                                         const TargetRegisterClass *SRC) const {
-  switch (getRegSizeInBits(*SRC)) {
-  case 32:
-    return &AMDGPU::AGPR_32RegClass;
-  case 64:
-    return &AMDGPU::AReg_64RegClass;
-  case 128:
-    return &AMDGPU::AReg_128RegClass;
-  case 512:
-    return &AMDGPU::AReg_512RegClass;
-  case 1024:
-    return &AMDGPU::AReg_1024RegClass;
-  default:
-    llvm_unreachable("Invalid register class size");
-  }
+const TargetRegisterClass *
+SIRegisterInfo::getEquivalentAGPRClass(const TargetRegisterClass *SRC) const {
+  unsigned Size = getRegSizeInBits(*SRC);
+  const TargetRegisterClass *ARC = getAGPRClassForBitWidth(Size);
+  assert(ARC && "Invalid register class size");
+  return ARC;
 }
 
-const TargetRegisterClass *SIRegisterInfo::getEquivalentSGPRClass(
-                                         const TargetRegisterClass *VRC) const {
-  switch (getRegSizeInBits(*VRC)) {
-  case 16:
-    return &AMDGPU::SGPR_LO16RegClass;
-  case 32:
+const TargetRegisterClass *
+SIRegisterInfo::getEquivalentSGPRClass(const TargetRegisterClass *VRC) const {
+  unsigned Size = getRegSizeInBits(*VRC);
+  if (Size == 32)
     return &AMDGPU::SGPR_32RegClass;
-  case 64:
-    return &AMDGPU::SReg_64RegClass;
-  case 96:
-    return &AMDGPU::SReg_96RegClass;
-  case 128:
+  if (Size == 128)
     return &AMDGPU::SGPR_128RegClass;
-  case 160:
-    return &AMDGPU::SReg_160RegClass;
-  case 256:
-    return &AMDGPU::SReg_256RegClass;
-  case 512:
-    return &AMDGPU::SReg_512RegClass;
-  case 1024:
-    return &AMDGPU::SReg_1024RegClass;
-  default:
-    llvm_unreachable("Invalid register class size");
-  }
+  const TargetRegisterClass *SRC = getSGPRClassForBitWidth(Size);
+  assert(SRC && "Invalid register class size");
+  return SRC;
 }
 
 const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
@@ -1451,62 +1447,21 @@ const TargetRegisterClass *SIRegisterInfo::getSubRegClass(
     return RC;
 
   // We can assume that each lane corresponds to one 32-bit register.
-  unsigned Count = getNumChannelsFromSubReg(SubIdx);
+  unsigned Size = getNumChannelsFromSubReg(SubIdx) * 32;
   if (isSGPRClass(RC)) {
-    switch (Count) {
-    case 1:
-      return &AMDGPU::SGPR_32RegClass;
-    case 2:
-      return &AMDGPU::SReg_64RegClass;
-    case 3:
-      return &AMDGPU::SReg_96RegClass;
-    case 4:
-      return &AMDGPU::SGPR_128RegClass;
-    case 5:
-      return &AMDGPU::SReg_160RegClass;
-    case 8:
-      return &AMDGPU::SReg_256RegClass;
-    case 16:
-      return &AMDGPU::SReg_512RegClass;
-    case 32: /* fall-through */
-    default:
-      llvm_unreachable("Invalid sub-register class size");
-    }
+    if (Size == 32)
+      RC = &AMDGPU::SGPR_32RegClass;
+    else if (Size == 128)
+      RC = &AMDGPU::SGPR_128RegClass;
+    else
+      RC = getSGPRClassForBitWidth(Size);
   } else if (hasAGPRs(RC)) {
-    switch (Count) {
-    case 1:
-      return &AMDGPU::AGPR_32RegClass;
-    case 2:
-      return &AMDGPU::AReg_64RegClass;
-    case 4:
-      return &AMDGPU::AReg_128RegClass;
-    case 16:
-      return &AMDGPU::AReg_512RegClass;
-    case 32: /* fall-through */
-    default:
-      llvm_unreachable("Invalid sub-register class size");
-    }
+    RC = getAGPRClassForBitWidth(Size);
   } else {
-    switch (Count) {
-    case 1:
-      return &AMDGPU::VGPR_32RegClass;
-    case 2:
-      return &AMDGPU::VReg_64RegClass;
-    case 3:
-      return &AMDGPU::VReg_96RegClass;
-    case 4:
-      return &AMDGPU::VReg_128RegClass;
-    case 5:
-      return &AMDGPU::VReg_160RegClass;
-    case 8:
-      return &AMDGPU::VReg_256RegClass;
-    case 16:
-      return &AMDGPU::VReg_512RegClass;
-    case 32: /* fall-through */
-    default:
-      llvm_unreachable("Invalid sub-register class size");
-    }
+    RC = getVGPRClassForBitWidth(Size);
   }
+  assert(RC && "Invalid sub-register class size");
+  return RC;
 }
 
 bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const {
@@ -1714,49 +1669,19 @@ const TargetRegisterClass *
 SIRegisterInfo::getRegClassForSizeOnBank(unsigned Size,
                                          const RegisterBank &RB,
                                          const MachineRegisterInfo &MRI) const {
-  switch (Size) {
-  case 1: {
-    switch (RB.getID()) {
-    case AMDGPU::VGPRRegBankID:
-      return &AMDGPU::VGPR_32RegClass;
-    case AMDGPU::VCCRegBankID:
-      return isWave32 ?
-        &AMDGPU::SReg_32_XM0_XEXECRegClass : &AMDGPU::SReg_64_XEXECRegClass;
-    case AMDGPU::SGPRRegBankID:
-      return &AMDGPU::SReg_32RegClass;
-    default:
-      llvm_unreachable("unknown register bank");
-    }
-  }
-  case 32:
-    return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
-                                                 &AMDGPU::SReg_32RegClass;
-  case 64:
-    return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_64RegClass :
-                                                 &AMDGPU::SReg_64RegClass;
-  case 96:
-    return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_96RegClass :
-                                                 &AMDGPU::SReg_96RegClass;
-  case 128:
-    return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_128RegClass :
-                                                 &AMDGPU::SGPR_128RegClass;
-  case 160:
-    return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_160RegClass :
-                                                 &AMDGPU::SReg_160RegClass;
-  case 256:
-    return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_256RegClass :
-                                                 &AMDGPU::SReg_256RegClass;
-  case 512:
-    return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_512RegClass :
-                                                 &AMDGPU::SReg_512RegClass;
-  case 1024:
-    return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VReg_1024RegClass :
-                                                 &AMDGPU::SReg_1024RegClass;
+  switch (RB.getID()) {
+  case AMDGPU::VGPRRegBankID:
+    return getVGPRClassForBitWidth(std::max(32u, Size));
+  case AMDGPU::VCCRegBankID:
+    assert(Size == 1);
+    return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
+                    : &AMDGPU::SReg_64_XEXECRegClass;
+  case AMDGPU::SGPRRegBankID:
+    if (Size == 128)
+      return &AMDGPU::SGPR_128RegClass;
+    return getSGPRClassForBitWidth(std::max(32u, Size));
   default:
-    if (Size < 32)
-      return RB.getID() == AMDGPU::VGPRRegBankID ? &AMDGPU::VGPR_32RegClass :
-                                                   &AMDGPU::SReg_32RegClass;
-    return nullptr;
+    llvm_unreachable("unknown register bank");
   }
 }
 
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
index 308db1b6a47c..21b021ce046b 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.h
@@ -156,16 +156,16 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
   }
 
   /// \returns A VGPR reg class with the same width as \p SRC
-  const TargetRegisterClass *getEquivalentVGPRClass(
-                                          const TargetRegisterClass *SRC) const;
+  const TargetRegisterClass *
+  getEquivalentVGPRClass(const TargetRegisterClass *SRC) const;
 
   /// \returns An AGPR reg class with the same width as \p SRC
-  const TargetRegisterClass *getEquivalentAGPRClass(
-                                          const TargetRegisterClass *SRC) const;
+  const TargetRegisterClass *
+  getEquivalentAGPRClass(const TargetRegisterClass *SRC) const;
 
   /// \returns A SGPR reg class with the same width as \p SRC
-  const TargetRegisterClass *getEquivalentSGPRClass(
-                                           const TargetRegisterClass *VRC) const;
+  const TargetRegisterClass *
+  getEquivalentSGPRClass(const TargetRegisterClass *VRC) const;
 
   /// \returns The register class that is used for a sub-register of \p RC for
   /// the given \p SubIdx.  If \p SubIdx equals NoSubRegister, \p RC will

From 9d9a088e51701577273db6ed64257e8505b3ef10 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Fri, 17 Apr 2020 09:34:34 -0400
Subject: [PATCH 170/216] [PhaseOrdering] remove blank lines in tests; NFC

---
 llvm/test/Transforms/PhaseOrdering/X86/shuffle.ll | 15 ---------------
 1 file changed, 15 deletions(-)

diff --git a/llvm/test/Transforms/PhaseOrdering/X86/shuffle.ll b/llvm/test/Transforms/PhaseOrdering/X86/shuffle.ll
index 8510eede50a3..a199c0d51cde 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/shuffle.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/shuffle.ll
@@ -82,7 +82,6 @@ define <8 x i16> @shuffle_32_add_16_masks_are_eq(<4 x i32> %v1, <4 x i32> %v2) {
 ; CHECK-NEXT:    [[ADD:%.*]] = shufflevector <8 x i16> [[TMP3]], <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 2, i32 3, i32 0, i32 1>
 ; CHECK-NEXT:    ret <8 x i16> [[ADD]]
 ;
-
   %shuffle1 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
   %shuffle2 = shufflevector <4 x i32> %v2, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
   %bc1 = bitcast <4 x i32> %shuffle1 to <8 x i16>
@@ -101,7 +100,6 @@ define <16 x i8> @shuffle_32_add_8_masks_are_eq(<4 x i32> %v1, <4 x i32> %v2) {
 ; CHECK-NEXT:    [[ADD:%.*]] = shufflevector <16 x i8> [[TMP3]], <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
 ; CHECK-NEXT:    ret <16 x i8> [[ADD]]
 ;
-
   %shuffle1 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
   %shuffle2 = shufflevector <4 x i32> %v2, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
   %bc1 = bitcast <4 x i32> %shuffle1 to <16 x i8>
@@ -120,7 +118,6 @@ define <16 x i8> @shuffle_16_add_8_masks_are_eq(<8 x i16> %v1, <8 x i16> %v2) {
 ; CHECK-NEXT:    [[ADD:%.*]] = shufflevector <16 x i8> [[TMP3]], <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 2, i32 3, i32 0, i32 1, i32 8, i32 9, i32 10, i32 11, i32 14, i32 15, i32 12, i32 13>
 ; CHECK-NEXT:    ret <16 x i8> [[ADD]]
 ;
-
   %shuffle1 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 1, i32 0, i32 4, i32 5, i32 7, i32 6>
   %shuffle2 = shufflevector <8 x i16> %v2, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 1, i32 0, i32 4, i32 5, i32 7, i32 6>
   %bc1 = bitcast <8 x i16> %shuffle1 to <16 x i8>
@@ -140,7 +137,6 @@ define <4 x i32> @shuffle_16_add_32_masks_are_eq_and_can_be_converted_up(<8 x i1
 ; CHECK-NEXT:    [[ADD:%.*]] = add <4 x i32> [[BC2]], [[BC1]]
 ; CHECK-NEXT:    ret <4 x i32> [[ADD]]
 ;
-
   %shuffle1 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
   %shuffle2 = shufflevector <8 x i16> %v2, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
   %bc1 = bitcast <8 x i16> %shuffle1 to <4 x i32>
@@ -160,7 +156,6 @@ define <4 x i32> @shuffle_8_add_32_masks_are_eq_and_can_be_converted_up(<16 x i8
 ; CHECK-NEXT:    [[ADD:%.*]] = add <4 x i32> [[BC2]], [[BC1]]
 ; CHECK-NEXT:    ret <4 x i32> [[ADD]]
 ;
-
   %shuffle1 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   %shuffle2 = shufflevector <16 x i8> %v2, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   %bc1 = bitcast <16 x i8> %shuffle1 to <4 x i32>
@@ -179,7 +174,6 @@ define <8 x i16> @shuffle_32_bitcast_16_shuffle_16_can_be_converted_up(<4 x i32>
 ; CHECK-NEXT:    [[SHUFFLE2:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 2, i32 3, i32 0, i32 1>
 ; CHECK-NEXT:    ret <8 x i16> [[SHUFFLE2]]
 ;
-
   %shuffle1 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
   %bc1 = bitcast <4 x i32> %shuffle1 to <8 x i16>
   %shuffle2 = shufflevector <8 x i16> %bc1, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 2, i32 3, i32 0, i32 1>
@@ -196,7 +190,6 @@ define <8 x i16> @shuffle_32_bitcast_16_shuffle_16_can_not_be_converted_up(<4 x
 ; CHECK-NEXT:    [[SHUFFLE2:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> undef, <8 x i32> <i32 5, i32 4, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
 ; CHECK-NEXT:    ret <8 x i16> [[SHUFFLE2]]
 ;
-
   %shuffle1 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
   %bc1 = bitcast <4 x i32> %shuffle1 to <8 x i16>
   %shuffle2 = shufflevector <8 x i16> %bc1, <8 x i16> undef, <8 x i32> <i32 5, i32 4, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
@@ -213,7 +206,6 @@ define <16 x i8> @shuffle_32_bitcast_8_shuffle_8_can_be_converted_up(<4 x i32> %
 ; CHECK-NEXT:    [[SHUFFLE2:%.*]] = shufflevector <16 x i8> [[TMP2]], <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
 ; CHECK-NEXT:    ret <16 x i8> [[SHUFFLE2]]
 ;
-
   %shuffle1 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
   %bc1 = bitcast <4 x i32> %shuffle1 to <16 x i8>
   %shuffle2 = shufflevector <16 x i8> %bc1, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
@@ -230,7 +222,6 @@ define <16 x i8> @shuffle_32_bitcast_8_shuffle_8_can_not_be_converted_up(<4 x i3
 ; CHECK-NEXT:    [[SHUFFLE2:%.*]] = shufflevector <16 x i8> [[TMP2]], <16 x i8> undef, <16 x i32> <i32 5, i32 4, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
 ; CHECK-NEXT:    ret <16 x i8> [[SHUFFLE2]]
 ;
-
   %shuffle1 = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
   %bc1 = bitcast <4 x i32> %shuffle1 to <16 x i8>
   %shuffle2 = shufflevector <16 x i8> %bc1, <16 x i8> undef, <16 x i32> <i32 5, i32 4, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
@@ -247,7 +238,6 @@ define <4 x i32> @shuffle_8_bitcast_32_shuffle_32_can_be_converted_up(<16 x i8>
 ; CHECK-NEXT:    [[SHUFFLE2:%.*]] = shufflevector <4 x i32> [[BC1]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
 ; CHECK-NEXT:    ret <4 x i32> [[SHUFFLE2]]
 ;
-
   %shuffle1 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
   %bc1 = bitcast <16 x i8> %shuffle1 to <4 x i32>
   %shuffle2 = shufflevector <4 x i32> %bc1, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
@@ -264,7 +254,6 @@ define <4 x i32> @shuffle_16_bitcast_32_shuffle_32_can_be_converted_up(<8 x i16>
 ; CHECK-NEXT:    [[SHUFFLE2:%.*]] = shufflevector <4 x i32> [[BC1]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
 ; CHECK-NEXT:    ret <4 x i32> [[SHUFFLE2]]
 ;
-
   %shuffle1 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 2, i32 3, i32 0, i32 1>
   %bc1 = bitcast <8 x i16> %shuffle1 to <4 x i32>
   %shuffle2 = shufflevector <4 x i32> %bc1, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
@@ -281,7 +270,6 @@ define <4 x i32> @shuffle_8_bitcast_32_shuffle_32_can_not_be_converted_up(<16 x
 ; CHECK-NEXT:    [[SHUFFLE2:%.*]] = shufflevector <4 x i32> [[BC1]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
 ; CHECK-NEXT:    ret <4 x i32> [[SHUFFLE2]]
 ;
-
   %shuffle1 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> <i32 9, i32 8, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
   %bc1 = bitcast <16 x i8> %shuffle1 to <4 x i32>
   %shuffle2 = shufflevector <4 x i32> %bc1, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
@@ -298,7 +286,6 @@ define <4 x i32> @shuffle_16_bitcast_32_shuffle_32_can_not_be_converted_up(<8 x
 ; CHECK-NEXT:    [[SHUFFLE2:%.*]] = shufflevector <4 x i32> [[BC1]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
 ; CHECK-NEXT:    ret <4 x i32> [[SHUFFLE2]]
 ;
-
   %shuffle1 = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> <i32 5, i32 4, i32 6, i32 7, i32 2, i32 3, i32 0, i32 1>
   %bc1 = bitcast <8 x i16> %shuffle1 to <4 x i32>
   %shuffle2 = shufflevector <4 x i32> %bc1, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
@@ -315,7 +302,6 @@ define <8 x i16> @shuffle_8_bitcast_16_shuffle_16_can__be_converted_up(<16 x i8>
 ; CHECK-NEXT:    [[SHUFFLE2:%.*]] = shufflevector <8 x i16> [[BC1]], <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 2, i32 3, i32 0, i32 1>
 ; CHECK-NEXT:    ret <8 x i16> [[SHUFFLE2]]
 ;
-
   %shuffle1 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
   %bc1 = bitcast <16 x i8> %shuffle1 to <8 x i16>
   %shuffle2 = shufflevector <8 x i16> %bc1, <8 x i16> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 2, i32 3, i32 0, i32 1>
@@ -332,7 +318,6 @@ define <8 x i16> @shuffle_8_bitcast_16_shuffle_16_can_not_be_converted_up(<16 x
 ; CHECK-NEXT:    [[SHUFFLE2:%.*]] = shufflevector <8 x i16> [[BC1]], <8 x i16> undef, <8 x i32> <i32 5, i32 4, i32 6, i32 7, i32 2, i32 3, i32 0, i32 1>
 ; CHECK-NEXT:    ret <8 x i16> [[SHUFFLE2]]
 ;
-
   %shuffle1 = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> <i32 9, i32 8, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3>
   %bc1 = bitcast <16 x i8> %shuffle1 to <8 x i16>
   %shuffle2 = shufflevector <8 x i16> %bc1, <8 x i16> undef, <8 x i32> <i32 5, i32 4, i32 6, i32 7, i32 2, i32 3, i32 0, i32 1>

From b639091c02df31954899c8107709597947e33364 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Fri, 17 Apr 2020 16:33:39 +0200
Subject: [PATCH 171/216] Change users of CreateShuffleVector to pass the masks
 as int instead of Constants

No functionality change intended.
---
 clang/lib/CodeGen/CGExprScalar.cpp            | 60 ++++++++---------
 .../InstCombine/InstCombineCalls.cpp          | 65 ++++++++-----------
 .../Instrumentation/MemorySanitizer.cpp       | 14 ++--
 .../Transforms/Vectorize/SLPVectorizer.cpp    | 31 ++++-----
 4 files changed, 72 insertions(+), 98 deletions(-)

diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 343c62273ec4..62a0f6c0efe6 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -1780,22 +1780,18 @@ Value *ScalarExprEmitter::VisitArraySubscriptExpr(ArraySubscriptExpr *E) {
   return Builder.CreateExtractElement(Base, Idx, "vecext");
 }
 
-static llvm::Constant *getMaskElt(llvm::ShuffleVectorInst *SVI, unsigned Idx,
-                                  unsigned Off, llvm::Type *I32Ty) {
+static int getMaskElt(llvm::ShuffleVectorInst *SVI, unsigned Idx,
+                      unsigned Off) {
   int MV = SVI->getMaskValue(Idx);
   if (MV == -1)
-    return llvm::UndefValue::get(I32Ty);
-  return llvm::ConstantInt::get(I32Ty, Off+MV);
+    return -1;
+  return Off + MV;
 }
 
-static llvm::Constant *getAsInt32(llvm::ConstantInt *C, llvm::Type *I32Ty) {
-  if (C->getBitWidth() != 32) {
-      assert(llvm::ConstantInt::isValueValidForType(I32Ty,
-                                                    C->getZExtValue()) &&
-             "Index operand too large for shufflevector mask!");
-      return llvm::ConstantInt::get(I32Ty, C->getZExtValue());
-  }
-  return C;
+static int getAsInt32(llvm::ConstantInt *C, llvm::Type *I32Ty) {
+  assert(llvm::ConstantInt::isValueValidForType(I32Ty, C->getZExtValue()) &&
+         "Index operand too large for shufflevector mask!");
+  return C->getZExtValue();
 }
 
 Value *ScalarExprEmitter::VisitInitListExpr(InitListExpr *E) {
@@ -1832,7 +1828,7 @@ Value *ScalarExprEmitter::VisitInitListExpr(InitListExpr *E) {
   for (unsigned i = 0; i != NumInitElements; ++i) {
     Expr *IE = E->getInit(i);
     Value *Init = Visit(IE);
-    SmallVector<llvm::Constant*, 16> Args;
+    SmallVector<int, 16> Args;
 
     llvm::VectorType *VVT = dyn_cast<llvm::VectorType>(Init->getType());
 
@@ -1850,7 +1846,7 @@ Value *ScalarExprEmitter::VisitInitListExpr(InitListExpr *E) {
             // insert into undef -> shuffle (src, undef)
             // shufflemask must use an i32
             Args.push_back(getAsInt32(C, CGF.Int32Ty));
-            Args.resize(ResElts, llvm::UndefValue::get(CGF.Int32Ty));
+            Args.resize(ResElts, -1);
 
             LHS = EI->getVectorOperand();
             RHS = V;
@@ -1859,17 +1855,16 @@ Value *ScalarExprEmitter::VisitInitListExpr(InitListExpr *E) {
             // insert into undefshuffle && size match -> shuffle (v, src)
             llvm::ShuffleVectorInst *SVV = cast<llvm::ShuffleVectorInst>(V);
             for (unsigned j = 0; j != CurIdx; ++j)
-              Args.push_back(getMaskElt(SVV, j, 0, CGF.Int32Ty));
-            Args.push_back(Builder.getInt32(ResElts + C->getZExtValue()));
-            Args.resize(ResElts, llvm::UndefValue::get(CGF.Int32Ty));
+              Args.push_back(getMaskElt(SVV, j, 0));
+            Args.push_back(ResElts + C->getZExtValue());
+            Args.resize(ResElts, -1);
 
             LHS = cast<llvm::ShuffleVectorInst>(V)->getOperand(0);
             RHS = EI->getVectorOperand();
             VIsUndefShuffle = false;
           }
           if (!Args.empty()) {
-            llvm::Constant *Mask = llvm::ConstantVector::get(Args);
-            V = Builder.CreateShuffleVector(LHS, RHS, Mask);
+            V = Builder.CreateShuffleVector(LHS, RHS, Args);
             ++CurIdx;
             continue;
           }
@@ -1898,15 +1893,14 @@ Value *ScalarExprEmitter::VisitInitListExpr(InitListExpr *E) {
           // If the current vector initializer is a shuffle with undef, merge
           // this shuffle directly into it.
           if (VIsUndefShuffle) {
-            Args.push_back(getMaskElt(cast<llvm::ShuffleVectorInst>(V), j, 0,
-                                      CGF.Int32Ty));
+            Args.push_back(getMaskElt(cast<llvm::ShuffleVectorInst>(V), j, 0));
           } else {
-            Args.push_back(Builder.getInt32(j));
+            Args.push_back(j);
           }
         }
         for (unsigned j = 0, je = InitElts; j != je; ++j)
-          Args.push_back(getMaskElt(SVI, j, Offset, CGF.Int32Ty));
-        Args.resize(ResElts, llvm::UndefValue::get(CGF.Int32Ty));
+          Args.push_back(getMaskElt(SVI, j, Offset));
+        Args.resize(ResElts, -1);
 
         if (VIsUndefShuffle)
           V = cast<llvm::ShuffleVectorInst>(V)->getOperand(0);
@@ -1919,26 +1913,24 @@ Value *ScalarExprEmitter::VisitInitListExpr(InitListExpr *E) {
     // to the vector initializer into V.
     if (Args.empty()) {
       for (unsigned j = 0; j != InitElts; ++j)
-        Args.push_back(Builder.getInt32(j));
-      Args.resize(ResElts, llvm::UndefValue::get(CGF.Int32Ty));
-      llvm::Constant *Mask = llvm::ConstantVector::get(Args);
-      Init = Builder.CreateShuffleVector(Init, llvm::UndefValue::get(VVT),
-                                         Mask, "vext");
+        Args.push_back(j);
+      Args.resize(ResElts, -1);
+      Init = Builder.CreateShuffleVector(Init, llvm::UndefValue::get(VVT), Args,
+                                         "vext");
 
       Args.clear();
       for (unsigned j = 0; j != CurIdx; ++j)
-        Args.push_back(Builder.getInt32(j));
+        Args.push_back(j);
       for (unsigned j = 0; j != InitElts; ++j)
-        Args.push_back(Builder.getInt32(j+Offset));
-      Args.resize(ResElts, llvm::UndefValue::get(CGF.Int32Ty));
+        Args.push_back(j + Offset);
+      Args.resize(ResElts, -1);
     }
 
     // If V is undef, make sure it ends up on the RHS of the shuffle to aid
     // merging subsequent shuffles into this one.
     if (CurIdx == 0)
       std::swap(V, Init);
-    llvm::Constant *Mask = llvm::ConstantVector::get(Args);
-    V = Builder.CreateShuffleVector(V, Init, Mask, "vecinit");
+    V = Builder.CreateShuffleVector(V, Init, Args, "vecinit");
     VIsUndefShuffle = isa<llvm::UndefValue>(Init);
     CurIdx += InitElts;
   }
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 26fe9e659c7e..03c6cf7c4705 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -840,22 +840,19 @@ static Value *simplifyX86extrq(IntrinsicInst &II, Value *Op0,
       Index /= 8;
 
       Type *IntTy8 = Type::getInt8Ty(II.getContext());
-      Type *IntTy32 = Type::getInt32Ty(II.getContext());
       VectorType *ShufTy = VectorType::get(IntTy8, 16);
 
-      SmallVector<Constant *, 16> ShuffleMask;
+      SmallVector<int, 16> ShuffleMask;
       for (int i = 0; i != (int)Length; ++i)
-        ShuffleMask.push_back(
-            Constant::getIntegerValue(IntTy32, APInt(32, i + Index)));
+        ShuffleMask.push_back(i + Index);
       for (int i = Length; i != 8; ++i)
-        ShuffleMask.push_back(
-            Constant::getIntegerValue(IntTy32, APInt(32, i + 16)));
+        ShuffleMask.push_back(i + 16);
       for (int i = 8; i != 16; ++i)
-        ShuffleMask.push_back(UndefValue::get(IntTy32));
+        ShuffleMask.push_back(-1);
 
       Value *SV = Builder.CreateShuffleVector(
           Builder.CreateBitCast(Op0, ShufTy),
-          ConstantAggregateZero::get(ShufTy), ConstantVector::get(ShuffleMask));
+          ConstantAggregateZero::get(ShufTy), ShuffleMask);
       return Builder.CreateBitCast(SV, II.getType());
     }
 
@@ -920,23 +917,21 @@ static Value *simplifyX86insertq(IntrinsicInst &II, Value *Op0, Value *Op1,
     Index /= 8;
 
     Type *IntTy8 = Type::getInt8Ty(II.getContext());
-    Type *IntTy32 = Type::getInt32Ty(II.getContext());
     VectorType *ShufTy = VectorType::get(IntTy8, 16);
 
-    SmallVector<Constant *, 16> ShuffleMask;
+    SmallVector<int, 16> ShuffleMask;
     for (int i = 0; i != (int)Index; ++i)
-      ShuffleMask.push_back(Constant::getIntegerValue(IntTy32, APInt(32, i)));
+      ShuffleMask.push_back(i);
     for (int i = 0; i != (int)Length; ++i)
-      ShuffleMask.push_back(
-          Constant::getIntegerValue(IntTy32, APInt(32, i + 16)));
+      ShuffleMask.push_back(i + 16);
     for (int i = Index + Length; i != 8; ++i)
-      ShuffleMask.push_back(Constant::getIntegerValue(IntTy32, APInt(32, i)));
+      ShuffleMask.push_back(i);
     for (int i = 8; i != 16; ++i)
-      ShuffleMask.push_back(UndefValue::get(IntTy32));
+      ShuffleMask.push_back(-1);
 
     Value *SV = Builder.CreateShuffleVector(Builder.CreateBitCast(Op0, ShufTy),
                                             Builder.CreateBitCast(Op1, ShufTy),
-                                            ConstantVector::get(ShuffleMask));
+                                            ShuffleMask);
     return Builder.CreateBitCast(SV, II.getType());
   }
 
@@ -988,13 +983,12 @@ static Value *simplifyX86pshufb(const IntrinsicInst &II,
     return nullptr;
 
   auto *VecTy = cast<VectorType>(II.getType());
-  auto *MaskEltTy = Type::getInt32Ty(II.getContext());
   unsigned NumElts = VecTy->getNumElements();
   assert((NumElts == 16 || NumElts == 32 || NumElts == 64) &&
          "Unexpected number of elements in shuffle mask!");
 
   // Construct a shuffle mask from constant integers or UNDEFs.
-  Constant *Indexes[64] = {nullptr};
+  int Indexes[64];
 
   // Each byte in the shuffle control mask forms an index to permute the
   // corresponding byte in the destination operand.
@@ -1004,7 +998,7 @@ static Value *simplifyX86pshufb(const IntrinsicInst &II,
       return nullptr;
 
     if (isa<UndefValue>(COp)) {
-      Indexes[I] = UndefValue::get(MaskEltTy);
+      Indexes[I] = -1;
       continue;
     }
 
@@ -1018,13 +1012,12 @@ static Value *simplifyX86pshufb(const IntrinsicInst &II,
     // The value of each index for the high 128-bit lane is the least
     // significant 4 bits of the respective shuffle control byte.
     Index = ((Index < 0) ? NumElts : Index & 0x0F) + (I & 0xF0);
-    Indexes[I] = ConstantInt::get(MaskEltTy, Index);
+    Indexes[I] = Index;
   }
 
-  auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, NumElts));
   auto V1 = II.getArgOperand(0);
   auto V2 = Constant::getNullValue(VecTy);
-  return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
+  return Builder.CreateShuffleVector(V1, V2, makeArrayRef(Indexes, NumElts));
 }
 
 /// Attempt to convert vpermilvar* to shufflevector if the mask is constant.
@@ -1035,14 +1028,13 @@ static Value *simplifyX86vpermilvar(const IntrinsicInst &II,
     return nullptr;
 
   auto *VecTy = cast<VectorType>(II.getType());
-  auto *MaskEltTy = Type::getInt32Ty(II.getContext());
   unsigned NumElts = VecTy->getNumElements();
   bool IsPD = VecTy->getScalarType()->isDoubleTy();
   unsigned NumLaneElts = IsPD ? 2 : 4;
   assert(NumElts == 16 || NumElts == 8 || NumElts == 4 || NumElts == 2);
 
   // Construct a shuffle mask from constant integers or UNDEFs.
-  Constant *Indexes[16] = {nullptr};
+  int Indexes[16];
 
   // The intrinsics only read one or two bits, clear the rest.
   for (unsigned I = 0; I < NumElts; ++I) {
@@ -1051,7 +1043,7 @@ static Value *simplifyX86vpermilvar(const IntrinsicInst &II,
       return nullptr;
 
     if (isa<UndefValue>(COp)) {
-      Indexes[I] = UndefValue::get(MaskEltTy);
+      Indexes[I] = -1;
       continue;
     }
 
@@ -1068,13 +1060,12 @@ static Value *simplifyX86vpermilvar(const IntrinsicInst &II,
     // shuffle, we have to make that explicit.
     Index += APInt(32, (I / NumLaneElts) * NumLaneElts);
 
-    Indexes[I] = ConstantInt::get(MaskEltTy, Index);
+    Indexes[I] = Index.getZExtValue();
   }
 
-  auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, NumElts));
   auto V1 = II.getArgOperand(0);
   auto V2 = UndefValue::get(V1->getType());
-  return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
+  return Builder.CreateShuffleVector(V1, V2, makeArrayRef(Indexes, NumElts));
 }
 
 /// Attempt to convert vpermd/vpermps to shufflevector if the mask is constant.
@@ -1085,13 +1076,12 @@ static Value *simplifyX86vpermv(const IntrinsicInst &II,
     return nullptr;
 
   auto *VecTy = cast<VectorType>(II.getType());
-  auto *MaskEltTy = Type::getInt32Ty(II.getContext());
   unsigned Size = VecTy->getNumElements();
   assert((Size == 4 || Size == 8 || Size == 16 || Size == 32 || Size == 64) &&
          "Unexpected shuffle mask size");
 
   // Construct a shuffle mask from constant integers or UNDEFs.
-  Constant *Indexes[64] = {nullptr};
+  int Indexes[64];
 
   for (unsigned I = 0; I < Size; ++I) {
     Constant *COp = V->getAggregateElement(I);
@@ -1099,19 +1089,18 @@ static Value *simplifyX86vpermv(const IntrinsicInst &II,
       return nullptr;
 
     if (isa<UndefValue>(COp)) {
-      Indexes[I] = UndefValue::get(MaskEltTy);
+      Indexes[I] = -1;
       continue;
     }
 
     uint32_t Index = cast<ConstantInt>(COp)->getZExtValue();
     Index &= Size - 1;
-    Indexes[I] = ConstantInt::get(MaskEltTy, Index);
+    Indexes[I] = Index;
   }
 
-  auto ShuffleMask = ConstantVector::get(makeArrayRef(Indexes, Size));
   auto V1 = II.getArgOperand(0);
   auto V2 = UndefValue::get(VecTy);
-  return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
+  return Builder.CreateShuffleVector(V1, V2, makeArrayRef(Indexes, Size));
 }
 
 // TODO, Obvious Missing Transforms:
@@ -1487,7 +1476,7 @@ static Value *simplifyNeonTbl1(const IntrinsicInst &II,
   if (!VecTy->getElementType()->isIntegerTy(8) || NumElts != 8)
     return nullptr;
 
-  uint32_t Indexes[8];
+  int Indexes[8];
 
   for (unsigned I = 0; I < NumElts; ++I) {
     Constant *COp = C->getAggregateElement(I);
@@ -1498,15 +1487,13 @@ static Value *simplifyNeonTbl1(const IntrinsicInst &II,
     Indexes[I] = cast<ConstantInt>(COp)->getLimitedValue();
 
     // Make sure the mask indices are in range.
-    if (Indexes[I] >= NumElts)
+    if ((unsigned)Indexes[I] >= NumElts)
       return nullptr;
   }
 
-  auto *ShuffleMask = ConstantDataVector::get(II.getContext(),
-                                              makeArrayRef(Indexes));
   auto *V1 = II.getArgOperand(0);
   auto *V2 = Constant::getNullValue(V1->getType());
-  return Builder.CreateShuffleVector(V1, V2, ShuffleMask);
+  return Builder.CreateShuffleVector(V1, V2, makeArrayRef(Indexes));
 }
 
 /// Convert a vector load intrinsic into a simple llvm load instruction.
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 3de7fbf04e58..0eaceb964a9a 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -3030,14 +3030,12 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     setOriginForNaryOp(I);
   }
 
-  Constant *getPclmulMask(IRBuilder<> &IRB, unsigned Width, bool OddElements) {
-    SmallVector<Constant *, 8> Mask;
+  SmallVector<int, 8> getPclmulMask(unsigned Width, bool OddElements) {
+    SmallVector<int, 8> Mask;
     for (unsigned X = OddElements ? 1 : 0; X < Width; X += 2) {
-      Constant *C = ConstantInt::get(IRB.getInt32Ty(), X);
-      Mask.push_back(C);
-      Mask.push_back(C);
+      Mask.append(2, X);
     }
-    return ConstantVector::get(Mask);
+    return Mask;
   }
 
   // Instrument pclmul intrinsics.
@@ -3058,10 +3056,10 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     unsigned Imm = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
     Value *Shuf0 =
         IRB.CreateShuffleVector(getShadow(&I, 0), UndefValue::get(ShadowTy),
-                                getPclmulMask(IRB, Width, Imm & 0x01));
+                                getPclmulMask(Width, Imm & 0x01));
     Value *Shuf1 =
         IRB.CreateShuffleVector(getShadow(&I, 1), UndefValue::get(ShadowTy),
-                                getPclmulMask(IRB, Width, Imm & 0x10));
+                                getPclmulMask(Width, Imm & 0x10));
     ShadowAndOriginCombiner SOC(this, IRB);
     SOC.Add(Shuf0, getOrigin(&I, 0));
     SOC.Add(Shuf1, getOrigin(&I, 1));
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 794ca1001c6a..f9e0e7dc1601 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -89,6 +89,7 @@
 #include "llvm/Transforms/Utils/LoopUtils.h"
 #include "llvm/Transforms/Vectorize.h"
 #include <algorithm>
+#include <bits/stdint-intn.h>
 #include <cassert>
 #include <cstdint>
 #include <iterator>
@@ -4573,24 +4574,23 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
       // each vector operation.
       ValueList OpScalars, AltScalars;
       unsigned e = E->Scalars.size();
-      SmallVector<Constant *, 8> Mask(e);
+      SmallVector<int, 8> Mask(e);
       for (unsigned i = 0; i < e; ++i) {
         auto *OpInst = cast<Instruction>(E->Scalars[i]);
         assert(E->isOpcodeOrAlt(OpInst) && "Unexpected main/alternate opcode");
         if (OpInst->getOpcode() == E->getAltOpcode()) {
-          Mask[i] = Builder.getInt32(e + i);
+          Mask[i] = e + i;
           AltScalars.push_back(E->Scalars[i]);
         } else {
-          Mask[i] = Builder.getInt32(i);
+          Mask[i] = i;
           OpScalars.push_back(E->Scalars[i]);
         }
       }
 
-      Value *ShuffleMask = ConstantVector::get(Mask);
       propagateIRFlags(V0, OpScalars);
       propagateIRFlags(V1, AltScalars);
 
-      Value *V = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
+      Value *V = Builder.CreateShuffleVector(V0, V1, Mask);
       if (Instruction *I = dyn_cast<Instruction>(V))
         V = propagateMetadata(I, E->Scalars);
       if (NeedToShuffleReuses) {
@@ -6083,24 +6083,23 @@ bool SLPVectorizerPass::tryToVectorize(Instruction *I, BoUpSLP &R) {
 ///        <0,2,...> or <1,3,..> while a splitting reduction will generate
 ///        <2,3, undef,undef> for a vector of 4 and NumElts = 2.
 /// \param IsLeft True will generate a mask of even elements, odd otherwise.
-static Value *createRdxShuffleMask(unsigned VecLen, unsigned NumEltsToRdx,
-                                   bool IsPairwise, bool IsLeft,
-                                   IRBuilder<> &Builder) {
+static SmallVector<int, 32> createRdxShuffleMask(unsigned VecLen,
+                                                 unsigned NumEltsToRdx,
+                                                 bool IsPairwise, bool IsLeft) {
   assert((IsPairwise || !IsLeft) && "Don't support a <0,1,undef,...> mask");
 
-  SmallVector<Constant *, 32> ShuffleMask(
-      VecLen, UndefValue::get(Builder.getInt32Ty()));
+  SmallVector<int, 32> ShuffleMask(VecLen, -1);
 
   if (IsPairwise)
     // Build a mask of 0, 2, ... (left) or 1, 3, ... (right).
     for (unsigned i = 0; i != NumEltsToRdx; ++i)
-      ShuffleMask[i] = Builder.getInt32(2 * i + !IsLeft);
+      ShuffleMask[i] = 2 * i + !IsLeft;
   else
     // Move the upper half of the vector to the lower half.
     for (unsigned i = 0; i != NumEltsToRdx; ++i)
-      ShuffleMask[i] = Builder.getInt32(NumEltsToRdx + i);
+      ShuffleMask[i] = NumEltsToRdx + i;
 
-  return ConstantVector::get(ShuffleMask);
+  return ShuffleMask;
 }
 
 namespace {
@@ -6974,10 +6973,8 @@ class HorizontalReduction {
 
     Value *TmpVec = VectorizedValue;
     for (unsigned i = ReduxWidth / 2; i != 0; i >>= 1) {
-      Value *LeftMask =
-          createRdxShuffleMask(ReduxWidth, i, true, true, Builder);
-      Value *RightMask =
-          createRdxShuffleMask(ReduxWidth, i, true, false, Builder);
+      auto LeftMask = createRdxShuffleMask(ReduxWidth, i, true, true);
+      auto RightMask = createRdxShuffleMask(ReduxWidth, i, true, false);
 
       Value *LeftShuf = Builder.CreateShuffleVector(
           TmpVec, UndefValue::get(TmpVec->getType()), LeftMask, "rdx.shuf.l");

From c5e7c2691df6231dd06623f33347c394c59fe55a Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Fri, 17 Apr 2020 16:36:30 +0200
Subject: [PATCH 172/216] Remove accidental include.

Thank you clangd.
---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index f9e0e7dc1601..2c54386351e2 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -89,7 +89,6 @@
 #include "llvm/Transforms/Utils/LoopUtils.h"
 #include "llvm/Transforms/Vectorize.h"
 #include <algorithm>
-#include <bits/stdint-intn.h>
 #include <cassert>
 #include <cstdint>
 #include <iterator>

From a8e4b7a5504196fc920126fb1e71d221e3879545 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne@apple.com>
Date: Fri, 17 Apr 2020 10:36:32 -0400
Subject: [PATCH 173/216] [libc++] NFC: Rename Lit feature for no RTTI to
 -fno-rtti

---
 .../std/utilities/any/any.class/any.observers/type.pass.cpp     | 2 +-
 .../test/support/test.support/test_macros_header_rtti.pass.cpp  | 2 +-
 libcxx/utils/libcxx/test/config.py                              | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/libcxx/test/std/utilities/any/any.class/any.observers/type.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.observers/type.pass.cpp
index bf7ea92eb903..a59701a27e39 100644
--- a/libcxx/test/std/utilities/any/any.class/any.observers/type.pass.cpp
+++ b/libcxx/test/std/utilities/any/any.class/any.observers/type.pass.cpp
@@ -8,7 +8,7 @@
 
 // UNSUPPORTED: c++98, c++03, c++11, c++14
 
-// XFAIL: libcpp-no-rtti
+// XFAIL: -fno-rtti
 
 // <any>
 
diff --git a/libcxx/test/support/test.support/test_macros_header_rtti.pass.cpp b/libcxx/test/support/test.support/test_macros_header_rtti.pass.cpp
index e38545f9b9cb..0369c90f2f13 100644
--- a/libcxx/test/support/test.support/test_macros_header_rtti.pass.cpp
+++ b/libcxx/test/support/test.support/test_macros_header_rtti.pass.cpp
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-// UNSUPPORTED: libcpp-no-rtti
+// UNSUPPORTED: -fno-rtti
 
 // "support/test_macros.hpp"
 
diff --git a/libcxx/utils/libcxx/test/config.py b/libcxx/utils/libcxx/test/config.py
index 81cc976565a0..1dfbfcb357f9 100644
--- a/libcxx/utils/libcxx/test/config.py
+++ b/libcxx/utils/libcxx/test/config.py
@@ -658,7 +658,7 @@ def configure_compile_flags_exceptions(self):
     def configure_compile_flags_rtti(self):
         enable_rtti = self.get_lit_bool('enable_rtti', True)
         if not enable_rtti:
-            self.config.available_features.add('libcpp-no-rtti')
+            self.config.available_features.add('-fno-rtti')
             self.cxx.compile_flags += ['-fno-rtti', '-D_LIBCPP_NO_RTTI']
 
     def configure_compile_flags_abi_version(self):

From c245d3e033a58582475c3d749085f47a9ab0dda1 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Fri, 17 Apr 2020 15:30:00 +0100
Subject: [PATCH 174/216] [ValueLattice] Steal bits from Tag to track range
 extensions (NFC).

Users of ValueLatticeElement currently have to ensure constant ranges
are not extended indefinitely. For example, in SCCP, mergeIn goes to
overdefined if a constantrange value is repeatedly merged with larger
constantranges. This is a simple form of widening.

In some cases, this leads to an unnecessary loss of information and
things can be improved by allowing a small number of extensions in the
hope that a fixed point is reached after a small number of steps.

To make better decisions about widening, it is helpful to keep track of
the number of range extensions. That state is tied directly to a
concrete ValueLatticeElement and some unused bits in the class can be
used. The current patch preserves the existing behavior by default:
CheckWiden defaults to false and if CheckWiden is true, a single change
to the range is allowed.

Follow-up patches will slightly increase the threshold for widening.

Reviewers: efriedma, davide, mssimpso

Reviewed By: efriedma

Differential Revision: https://reviews.llvm.org/D78145
---
 llvm/include/llvm/Analysis/ValueLattice.h | 24 +++++++++++++++++------
 llvm/lib/Transforms/Scalar/SCCP.cpp       | 10 +---------
 2 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/llvm/include/llvm/Analysis/ValueLattice.h b/llvm/include/llvm/Analysis/ValueLattice.h
index 1c4683857f98..6f054fd1ffc8 100644
--- a/llvm/include/llvm/Analysis/ValueLattice.h
+++ b/llvm/include/llvm/Analysis/ValueLattice.h
@@ -75,7 +75,9 @@ class ValueLatticeElement {
     overdefined,
   };
 
-  ValueLatticeElementTy Tag;
+  ValueLatticeElementTy Tag : 6;
+  /// Number of times a constant range has been extended with widening enabled.
+  unsigned NumRangeExtensions : 8;
 
   /// The union either stores a pointer to a constant or a constant range,
   /// associated to the lattice element. We have to ensure that Range is
@@ -133,6 +135,7 @@ class ValueLatticeElement {
         new (&Range) ConstantRange(Other.Range);
       else
         Range = Other.Range;
+      NumRangeExtensions = Other.NumRangeExtensions;
       break;
     case constant:
     case notconstant:
@@ -287,7 +290,8 @@ class ValueLatticeElement {
   /// range or the object must be undef. The tag is set to
   /// constant_range_including_undef if either the existing value or the new
   /// range may include undef.
-  bool markConstantRange(ConstantRange NewR, bool MayIncludeUndef = false) {
+  bool markConstantRange(ConstantRange NewR, bool MayIncludeUndef = false,
+                         bool CheckWiden = false) {
     if (NewR.isFullSet())
       return markOverdefined();
 
@@ -304,6 +308,11 @@ class ValueLatticeElement {
       if (getConstantRange() == NewR)
         return Tag != OldTag;
 
+      // Simple form of widening. If a range is extended multiple times, go to
+      // overdefined.
+      if (CheckWiden && ++NumRangeExtensions == 1)
+        return markOverdefined();
+
       assert(NewR.contains(getConstantRange()) &&
              "Existing range must be a subset of NewR");
       Range = std::move(NewR);
@@ -314,6 +323,7 @@ class ValueLatticeElement {
     if (NewR.isEmptySet())
       return markOverdefined();
 
+    NumRangeExtensions = 0;
     Tag = NewTag;
     new (&Range) ConstantRange(std::move(NewR));
     return true;
@@ -321,7 +331,7 @@ class ValueLatticeElement {
 
   /// Updates this object to approximate both this object and RHS. Returns
   /// true if this object has been changed.
-  bool mergeIn(const ValueLatticeElement &RHS) {
+  bool mergeIn(const ValueLatticeElement &RHS, bool CheckWiden = false) {
     if (RHS.isUnknown() || isOverdefined())
       return false;
     if (RHS.isOverdefined()) {
@@ -337,7 +347,7 @@ class ValueLatticeElement {
         return markConstant(RHS.getConstant(), /*MayIncludeUndef=*/true);
       if (RHS.isConstantRange())
         return markConstantRange(RHS.getConstantRange(true),
-                                 /*MayIncludeUndef=*/true);
+                                 /*MayIncludeUndef=*/true, CheckWiden);
       return markOverdefined();
     }
 
@@ -380,7 +390,7 @@ class ValueLatticeElement {
     ConstantRange NewR = getConstantRange().unionWith(RHS.getConstantRange());
     return markConstantRange(
         std::move(NewR),
-        /*MayIncludeUndef=*/RHS.isConstantRangeIncludingUndef());
+        /*MayIncludeUndef=*/RHS.isConstantRangeIncludingUndef(), CheckWiden);
   }
 
   // Compares this symbolic value with Other using Pred and returns either
@@ -412,7 +422,9 @@ class ValueLatticeElement {
   }
 };
 
-raw_ostream &operator<<(raw_ostream &OS, const ValueLatticeElement &Val);
+static_assert(sizeof(ValueLatticeElement) <= 40,
+              "size of ValueLatticeElement changed unexpectedly");
 
+raw_ostream &operator<<(raw_ostream &OS, const ValueLatticeElement &Val);
 } // end namespace llvm
 #endif
diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp
index 9cb7f0695484..f5b3ebd1a002 100644
--- a/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -401,15 +401,7 @@ class SCCPSolver : public InstVisitor<SCCPSolver> {
 
   bool mergeInValue(ValueLatticeElement &IV, Value *V,
                     ValueLatticeElement MergeWithV, bool Widen = true) {
-    // Do a simple form of widening, to avoid extending a range repeatedly in a
-    // loop. If IV is a constant range, it means we already set it once. If
-    // MergeWithV would extend IV, mark V as overdefined.
-    if (Widen && IV.isConstantRange() && MergeWithV.isConstantRange() &&
-        !IV.getConstantRange().contains(MergeWithV.getConstantRange())) {
-      markOverdefined(IV, V);
-      return true;
-    }
-    if (IV.mergeIn(MergeWithV)) {
+    if (IV.mergeIn(MergeWithV, Widen)) {
       pushToWorkList(IV, V);
       LLVM_DEBUG(dbgs() << "Merged " << MergeWithV << " into " << *V << " : "
                         << IV << "\n");

From 55e3a7c6b21fb21d88896a9548b95384d2bd97dd Mon Sep 17 00:00:00 2001
From: Dominik Montada <dominik.montada@hightec-rt.com>
Date: Tue, 14 Apr 2020 11:25:05 +0200
Subject: [PATCH 175/216] [GlobalISel][AMDGPU] add legalization for G_FREEZE

Summary:
Copy the legalization rules from SelectionDAG:
-widenScalar using anyext
-narrowScalar using intermediate merges
-scalarize/fewerElements using unmerge
-moreElements using G_IMPLICIT_DEF and insert

Add G_FREEZE legalization actions to AMDGPULegalizerInfo.
Use the same legalization actions as G_IMPLICIT_DEF.

Depends on D77795.

Reviewers: dsanders, arsenm, aqjune, aditya_nandakumar, t.p.northover, lebedev.ri, paquette, aemerson

Reviewed By: arsenm

Subscribers: kzhuravl, yaxunl, dstuttard, tpr, t-tye, jvesely, nhaehnle, kerbowa, wdng, rovka, hiraditya, volkan, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D78092
---
 .../llvm/CodeGen/GlobalISel/LegalizerHelper.h |  15 +-
 .../CodeGen/GlobalISel/LegalizerHelper.cpp    | 199 ++--
 .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp |  16 +-
 .../AMDGPU/GlobalISel/legalize-freeze.mir     | 925 ++++++++++++++++++
 .../GlobalISel/LegalizerHelperTest.cpp        | 211 ++++
 5 files changed, 1278 insertions(+), 88 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index 823d1eb32942..78d8e9800d96 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -215,11 +215,6 @@ class LegalizerHelper {
   LegalizeResult fewerElementsVectorImplicitDef(MachineInstr &MI,
                                                 unsigned TypeIdx, LLT NarrowTy);
 
-  /// Legalize a simple vector instruction where all operands are the same type
-  /// by splitting into multiple components.
-  LegalizeResult fewerElementsVectorBasic(MachineInstr &MI, unsigned TypeIdx,
-                                          LLT NarrowTy);
-
   /// Legalize a instruction with a vector type where each operand may have a
   /// different element type. All type indexes must have the same number of
   /// elements.
@@ -251,6 +246,16 @@ class LegalizerHelper {
   LegalizeResult
   reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy);
 
+  /// Legalize an instruction by reducing the operation width, either by
+  /// narrowing the type of the operation or by reducing the number of elements
+  /// of a vector.
+  /// The used strategy (narrow vs. fewerElements) is decided by \p NarrowTy.
+  /// Narrow is used if the scalar type of \p NarrowTy and \p DstTy differ,
+  /// fewerElements is used when the scalar type is the same but the number of
+  /// elements between \p NarrowTy and \p DstTy differ.
+  LegalizeResult reduceOperationWidth(MachineInstr &MI, unsigned TypeIdx,
+                                      LLT NarrowTy);
+
   LegalizeResult fewerElementsVectorSextInReg(MachineInstr &MI, unsigned TypeIdx,
                                               LLT NarrowTy);
 
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 168b78457c25..2a9cef2e1536 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -825,6 +825,9 @@ LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
     return Legalized;
   }
 
+  case TargetOpcode::G_FREEZE:
+    return reduceOperationWidth(MI, TypeIdx, NarrowTy);
+
   case TargetOpcode::G_ADD: {
     // FIXME: add support for when SizeOp0 isn't an exact multiple of
     // NarrowSize.
@@ -1728,6 +1731,13 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
     Observer.changedInstr(MI);
     return Legalized;
   }
+  case TargetOpcode::G_FREEZE:
+    Observer.changingInstr(MI);
+    widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
+    widenScalarDst(MI, WideTy);
+    Observer.changedInstr(MI);
+    return Legalized;
+
   case TargetOpcode::G_ADD:
   case TargetOpcode::G_AND:
   case TargetOpcode::G_MUL:
@@ -2594,80 +2604,6 @@ LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef(
   return Legalized;
 }
 
-// Handles operands with different types, but all must have the same number of
-// elements. There will be multiple type indexes. NarrowTy is expected to have
-// the result element type.
-LegalizerHelper::LegalizeResult
-LegalizerHelper::fewerElementsVectorBasic(MachineInstr &MI, unsigned TypeIdx,
-                                          LLT NarrowTy) {
-  assert(TypeIdx == 0 && "only one type index expected");
-
-  const unsigned Opc = MI.getOpcode();
-  const int NumOps = MI.getNumOperands() - 1;
-  const Register DstReg = MI.getOperand(0).getReg();
-  const unsigned Flags = MI.getFlags();
-
-  assert(NumOps <= 3 && "expected instruction with 1 result and 1-3 sources");
-
-  SmallVector<Register, 8> ExtractedRegs[3];
-  SmallVector<Register, 8> Parts;
-
-  unsigned NarrowElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
-
-  // Break down all the sources into NarrowTy pieces we can operate on. This may
-  // involve creating merges to a wider type, padded with undef.
-  for (int I = 0; I != NumOps; ++I) {
-    Register SrcReg =  MI.getOperand(I + 1).getReg();
-    LLT SrcTy = MRI.getType(SrcReg);
-
-    // Each operand may have its own type, but only the number of elements
-    // matters.
-    LLT OpNarrowTy = LLT::scalarOrVector(NarrowElts, SrcTy.getScalarType());
-    LLT GCDTy = extractGCDType(ExtractedRegs[I], SrcTy, OpNarrowTy, SrcReg);
-
-    // Build a sequence of NarrowTy pieces in ExtractedRegs for this operand.
-    buildLCMMergePieces(SrcTy, OpNarrowTy, GCDTy,
-                        ExtractedRegs[I], TargetOpcode::G_ANYEXT);
-  }
-
-  SmallVector<Register, 8> ResultRegs;
-
-  // Input operands for each sub-instruction.
-  SmallVector<SrcOp, 4> InputRegs(NumOps, Register());
-
-  int NumParts = ExtractedRegs[0].size();
-  const LLT DstTy = MRI.getType(DstReg);
-  const unsigned DstSize = DstTy.getSizeInBits();
-  LLT DstLCMTy = getLCMType(DstTy, NarrowTy);
-
-  const unsigned NarrowSize = NarrowTy.getSizeInBits();
-
-  // We widened the source registers to satisfy merge/unmerge size
-  // constraints. We'll have some extra fully undef parts.
-  const int NumRealParts = (DstSize + NarrowSize - 1) / NarrowSize;
-
-  for (int I = 0; I != NumRealParts; ++I) {
-    // Emit this instruction on each of the split pieces.
-    for (int J = 0; J != NumOps; ++J)
-      InputRegs[J] = ExtractedRegs[J][I];
-
-    auto Inst = MIRBuilder.buildInstr(Opc, {NarrowTy}, InputRegs, Flags);
-    ResultRegs.push_back(Inst.getReg(0));
-  }
-
-  // Fill out the widened result with undef instead of creating instructions
-  // with undef inputs.
-  int NumUndefParts = NumParts - NumRealParts;
-  if (NumUndefParts != 0)
-    ResultRegs.append(NumUndefParts, MIRBuilder.buildUndef(NarrowTy).getReg(0));
-
-  // Extract the possibly padded result to the original result register.
-  buildWidenedRemergeToDst(DstReg, DstLCMTy, ResultRegs);
-
-  MI.eraseFromParent();
-  return Legalized;
-}
-
 // Handle splitting vector operations which need to have the same number of
 // elements in each type index, but each type index may have a different element
 // type.
@@ -3210,6 +3146,117 @@ LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
   return Legalized;
 }
 
+LegalizerHelper::LegalizeResult
+LegalizerHelper::reduceOperationWidth(MachineInstr &MI, unsigned int TypeIdx,
+                                      LLT NarrowTy) {
+  assert(TypeIdx == 0 && "only one type index expected");
+
+  const unsigned Opc = MI.getOpcode();
+  const int NumOps = MI.getNumOperands() - 1;
+  const Register DstReg = MI.getOperand(0).getReg();
+  const unsigned Flags = MI.getFlags();
+  const unsigned NarrowSize = NarrowTy.getSizeInBits();
+  const LLT NarrowScalarTy = LLT::scalar(NarrowSize);
+
+  assert(NumOps <= 3 && "expected instruction with 1 result and 1-3 sources");
+
+  // First of all check whether we are narrowing (changing the element type)
+  // or reducing the vector elements
+  const LLT DstTy = MRI.getType(DstReg);
+  const bool IsNarrow = NarrowTy.getScalarType() != DstTy.getScalarType();
+
+  SmallVector<Register, 8> ExtractedRegs[3];
+  SmallVector<Register, 8> Parts;
+
+  unsigned NarrowElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
+
+  // Break down all the sources into NarrowTy pieces we can operate on. This may
+  // involve creating merges to a wider type, padded with undef.
+  for (int I = 0; I != NumOps; ++I) {
+    Register SrcReg = MI.getOperand(I + 1).getReg();
+    LLT SrcTy = MRI.getType(SrcReg);
+
+    // The type to narrow SrcReg to. For narrowing, this is a smaller scalar.
+    // For fewerElements, this is a smaller vector with the same element type.
+    LLT OpNarrowTy;
+    if (IsNarrow) {
+      OpNarrowTy = NarrowScalarTy;
+
+      // In case of narrowing, we need to cast vectors to scalars for this to
+      // work properly
+      // FIXME: Can we do without the bitcast here if we're narrowing?
+      if (SrcTy.isVector()) {
+        SrcTy = LLT::scalar(SrcTy.getSizeInBits());
+        SrcReg = MIRBuilder.buildBitcast(SrcTy, SrcReg).getReg(0);
+      }
+    } else {
+      OpNarrowTy = LLT::scalarOrVector(NarrowElts, SrcTy.getScalarType());
+    }
+
+    LLT GCDTy = extractGCDType(ExtractedRegs[I], SrcTy, OpNarrowTy, SrcReg);
+
+    // Build a sequence of NarrowTy pieces in ExtractedRegs for this operand.
+    buildLCMMergePieces(SrcTy, OpNarrowTy, GCDTy, ExtractedRegs[I],
+                        TargetOpcode::G_ANYEXT);
+  }
+
+  SmallVector<Register, 8> ResultRegs;
+
+  // Input operands for each sub-instruction.
+  SmallVector<SrcOp, 4> InputRegs(NumOps, Register());
+
+  int NumParts = ExtractedRegs[0].size();
+  const unsigned DstSize = DstTy.getSizeInBits();
+  const LLT DstScalarTy = LLT::scalar(DstSize);
+
+  // Narrowing needs to use scalar types
+  LLT DstLCMTy, NarrowDstTy;
+  if (IsNarrow) {
+    DstLCMTy = getLCMType(DstScalarTy, NarrowScalarTy);
+    NarrowDstTy = NarrowScalarTy;
+  } else {
+    DstLCMTy = getLCMType(DstTy, NarrowTy);
+    NarrowDstTy = NarrowTy;
+  }
+
+  // We widened the source registers to satisfy merge/unmerge size
+  // constraints. We'll have some extra fully undef parts.
+  const int NumRealParts = (DstSize + NarrowSize - 1) / NarrowSize;
+
+  for (int I = 0; I != NumRealParts; ++I) {
+    // Emit this instruction on each of the split pieces.
+    for (int J = 0; J != NumOps; ++J)
+      InputRegs[J] = ExtractedRegs[J][I];
+
+    auto Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy}, InputRegs, Flags);
+    ResultRegs.push_back(Inst.getReg(0));
+  }
+
+  // Fill out the widened result with undef instead of creating instructions
+  // with undef inputs.
+  int NumUndefParts = NumParts - NumRealParts;
+  if (NumUndefParts != 0)
+    ResultRegs.append(NumUndefParts,
+                      MIRBuilder.buildUndef(NarrowDstTy).getReg(0));
+
+  // Extract the possibly padded result. Use a scratch register if we need to do
+  // a final bitcast, otherwise use the original result register.
+  Register MergeDstReg;
+  if (IsNarrow && DstTy.isVector())
+    MergeDstReg = MRI.createGenericVirtualRegister(DstScalarTy);
+  else
+    MergeDstReg = DstReg;
+
+  buildWidenedRemergeToDst(MergeDstReg, DstLCMTy, ResultRegs);
+
+  // Recast to vector if we narrowed a vector
+  if (IsNarrow && DstTy.isVector())
+    MIRBuilder.buildBitcast(DstReg, MergeDstReg);
+
+  MI.eraseFromParent();
+  return Legalized;
+}
+
 LegalizerHelper::LegalizeResult
 LegalizerHelper::fewerElementsVectorSextInReg(MachineInstr &MI, unsigned TypeIdx,
                                               LLT NarrowTy) {
@@ -3293,7 +3340,8 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
   case G_FMAXIMUM:
   case G_FSHL:
   case G_FSHR:
-    return fewerElementsVectorBasic(MI, TypeIdx, NarrowTy);
+  case G_FREEZE:
+    return reduceOperationWidth(MI, TypeIdx, NarrowTy);
   case G_SHL:
   case G_LSHR:
   case G_ASHR:
@@ -3606,6 +3654,7 @@ LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
     Observer.changedInstr(MI);
     return Legalized;
   case TargetOpcode::G_INSERT:
+  case TargetOpcode::G_FREEZE:
     if (TypeIdx != 0)
       return UnableToLegalize;
     Observer.changingInstr(MI);
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 4d23de8f1afa..10c8c25a2767 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -371,14 +371,14 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
     .legalFor({S32, S64, S16})
     .clampScalar(0, S16, S64);
 
-  getActionDefinitionsBuilder(G_IMPLICIT_DEF)
-    .legalFor({S1, S32, S64, S16, V2S32, V4S32, V2S16, V4S16, GlobalPtr,
-               ConstantPtr, LocalPtr, FlatPtr, PrivatePtr})
-    .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
-    .clampScalarOrElt(0, S32, S1024)
-    .legalIf(isMultiple32(0))
-    .widenScalarToNextPow2(0, 32)
-    .clampMaxNumElements(0, S32, 16);
+  getActionDefinitionsBuilder({G_IMPLICIT_DEF, G_FREEZE})
+      .legalFor({S1, S32, S64, S16, V2S32, V4S32, V2S16, V4S16, GlobalPtr,
+                 ConstantPtr, LocalPtr, FlatPtr, PrivatePtr})
+      .moreElementsIf(isSmallOddVector(0), oneMoreElement(0))
+      .clampScalarOrElt(0, S32, S1024)
+      .legalIf(isMultiple32(0))
+      .widenScalarToNextPow2(0, 32)
+      .clampMaxNumElements(0, S32, 16);
 
   setAction({G_FRAME_INDEX, PrivatePtr}, Legal);
   getActionDefinitionsBuilder(G_GLOBAL_VALUE)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir
new file mode 100644
index 000000000000..6fd39b708e5a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-freeze.mir
@@ -0,0 +1,925 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck %s
+# RUN: llc -O0 -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -global-isel-abort=0 %s -o - | FileCheck %s
+
+---
+name: test_freeze_s1
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_s1
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[COPY]](s32)
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(s1) = G_FREEZE [[TRUNC]]
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FREEZE]](s1)
+    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s1) = G_TRUNC %0
+    %2:_(s1) = G_FREEZE %1
+    %3:_(s32) = G_ANYEXT %2
+    $vgpr0 = COPY %3
+...
+
+---
+name: test_freeze_s7
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_s7
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE [[COPY1]]
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[FREEZE]](s32)
+    ; CHECK: $vgpr0 = COPY [[COPY2]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s7) = G_TRUNC %0
+    %2:_(s7) = G_FREEZE %1
+    %3:_(s32) = G_ANYEXT %2
+    $vgpr0 = COPY %3
+...
+
+---
+name: test_freeze_s8
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_s8
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[COPY]](s32)
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE [[COPY1]]
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[FREEZE]](s32)
+    ; CHECK: $vgpr0 = COPY [[COPY2]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s8) = G_TRUNC %0
+    %2:_(s8) = G_FREEZE %1
+    %3:_(s32) = G_ANYEXT %2
+    $vgpr0 = COPY %3
+...
+
+---
+name: test_freeze_s16
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_s16
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(s16) = G_FREEZE [[TRUNC]]
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FREEZE]](s16)
+    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s16) = G_TRUNC %0
+    %2:_(s16) = G_FREEZE %1
+    %3:_(s32) = G_ANYEXT %2
+    $vgpr0 = COPY %3
+...
+
+---
+name: test_freeze_s32
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_s32
+    ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0 = COPY [[FREEZE]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = G_FREEZE %0
+    $vgpr0 = COPY %1
+...
+
+---
+name: test_freeze_s48
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_s48
+    ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY [[COPY]](s64)
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[COPY1]]
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY [[FREEZE]](s64)
+    ; CHECK: $vgpr0_vgpr1 = COPY [[COPY2]](s64)
+    %0:_(s64) = COPY $vgpr0_vgpr1
+    %1:_(s48) = G_TRUNC %0
+    %2:_(s48) = G_FREEZE %1
+    %3:_(s64) = G_ANYEXT %2
+    $vgpr0_vgpr1 = COPY %3
+...
+
+---
+name: test_freeze_s64
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_s64
+    ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(s64) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0_vgpr1 = COPY [[FREEZE]](s64)
+    %0:_(s64) = COPY $vgpr0_vgpr1
+    %1:_(s64) = G_FREEZE %0
+    $vgpr0_vgpr1 = COPY %1
+...
+
+---
+name: test_freeze_s65
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_s65
+    ; CHECK: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2
+    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96)
+    ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32)
+    ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[DEF]](s32)
+    ; CHECK: [[MV2:%[0-9]+]]:_(s128) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64)
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(s128) = G_FREEZE [[MV2]]
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s96) = G_TRUNC [[FREEZE]](s128)
+    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[TRUNC]](s96)
+    %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2
+    %1:_(s65) = G_TRUNC %0
+    %2:_(s65) = G_FREEZE %1
+    %3:_(s96) = G_ANYEXT %2
+    $vgpr0_vgpr1_vgpr2 = COPY %3
+...
+
+---
+name: test_freeze_s128
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_s128
+    ; CHECK: [[COPY:%[0-9]+]]:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(s128) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FREEZE]](s128)
+    %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    %1:_(s128) = G_FREEZE %0
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+...
+
+---
+name: test_freeze_256
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_256
+    ; CHECK: [[COPY:%[0-9]+]]:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(s256) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[FREEZE]](s256)
+    %0:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    %1:_(s256) = G_FREEZE %0
+    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1
+...
+
+---
+name: test_freeze_s448
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_s448
+    ; CHECK: [[COPY:%[0-9]+]]:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s448) = G_TRUNC [[COPY]](s512)
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(s448) = G_FREEZE [[TRUNC]]
+    ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[FREEZE]](s448)
+    ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CHECK: [[MV:%[0-9]+]]:_(s512) = G_MERGE_VALUES [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV3]](s64), [[UV4]](s64), [[UV5]](s64), [[UV6]](s64), [[DEF]](s64)
+    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[MV]](s512)
+    %0:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    %1:_(s448) = G_TRUNC %0
+    %2:_(s448) = G_FREEZE %1
+    %3:_(s512) = G_ANYEXT %2
+    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %3
+...
+
+---
+name: test_freeze_s512
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_s512
+    ; CHECK: [[COPY:%[0-9]+]]:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(s512) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[FREEZE]](s512)
+    %0:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    %1:_(s512) = G_FREEZE %0
+    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1
+...
+
+---
+name: test_freeze_s1024
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_s1024
+    ; CHECK: [[COPY:%[0-9]+]]:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s512)
+    ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CHECK: [[MV:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV3]](s64), [[UV4]](s64), [[UV5]](s64), [[UV6]](s64), [[UV7]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64)
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(s1024) = G_FREEZE [[MV]]
+    ; CHECK: S_NOP 0, implicit [[FREEZE]](s1024)
+    %0:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    %1:_(s1024) = G_ANYEXT %0
+    %2:_(s1024) = G_FREEZE %1
+    S_NOP 0, implicit %2
+...
+
+---
+name: test_freeze_s1056
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_s1056
+    ; CHECK: [[COPY:%[0-9]+]]:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s512)
+    ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV]](s32), [[UV1]](s32)
+    ; CHECK: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV2]](s32), [[UV3]](s32)
+    ; CHECK: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV4]](s32), [[UV5]](s32)
+    ; CHECK: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV6]](s32), [[UV7]](s32)
+    ; CHECK: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV8]](s32), [[UV9]](s32)
+    ; CHECK: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV10]](s32), [[UV11]](s32)
+    ; CHECK: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV12]](s32), [[UV13]](s32)
+    ; CHECK: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UV14]](s32), [[UV15]](s32)
+    ; CHECK: [[DEF1:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CHECK: [[MV8:%[0-9]+]]:_(s2112) = G_MERGE_VALUES [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64), [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64), [[DEF1]](s64)
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s1056) = G_TRUNC [[MV8]](s2112)
+    ; CHECK: [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32), [[UV48:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[TRUNC]](s1056)
+    ; CHECK: [[MV9:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[UV16]](s32), [[UV17]](s32), [[UV18]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32), [[UV22]](s32), [[UV23]](s32), [[UV24]](s32), [[UV25]](s32), [[UV26]](s32), [[UV27]](s32), [[UV28]](s32), [[UV29]](s32), [[UV30]](s32), [[UV31]](s32), [[UV32]](s32), [[UV33]](s32), [[UV34]](s32), [[UV35]](s32), [[UV36]](s32), [[UV37]](s32), [[UV38]](s32), [[UV39]](s32), [[UV40]](s32), [[UV41]](s32), [[UV42]](s32), [[UV43]](s32), [[UV44]](s32), [[UV45]](s32), [[UV46]](s32), [[UV47]](s32)
+    ; CHECK: [[MV10:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[UV48]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32)
+    ; CHECK: [[DEF2:%[0-9]+]]:_(s1024) = G_IMPLICIT_DEF
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(s1024) = G_FREEZE [[MV9]]
+    ; CHECK: [[FREEZE1:%[0-9]+]]:_(s1024) = G_FREEZE [[MV10]]
+    ; CHECK: [[MV11:%[0-9]+]]:_(s33792) = G_MERGE_VALUES [[FREEZE]](s1024), [[FREEZE1]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024), [[DEF2]](s1024)
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(s1056) = G_TRUNC [[MV11]](s33792)
+    ; CHECK: S_NOP 0, implicit [[TRUNC1]](s1056)
+    %0:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    %1:_(s1056) = G_ANYEXT %0
+    %2:_(s1056) = G_FREEZE %1
+    S_NOP 0, implicit %2
+...
+
+---
+name: test_freeze_s2048
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_s2048
+    ; CHECK: [[COPY:%[0-9]+]]:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; CHECK: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64), [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64), [[UV4:%[0-9]+]]:_(s64), [[UV5:%[0-9]+]]:_(s64), [[UV6:%[0-9]+]]:_(s64), [[UV7:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](s512)
+    ; CHECK: [[DEF:%[0-9]+]]:_(s64) = G_IMPLICIT_DEF
+    ; CHECK: [[MV:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[UV]](s64), [[UV1]](s64), [[UV2]](s64), [[UV3]](s64), [[UV4]](s64), [[UV5]](s64), [[UV6]](s64), [[UV7]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64)
+    ; CHECK: [[MV1:%[0-9]+]]:_(s1024) = G_MERGE_VALUES [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64), [[DEF]](s64)
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(s1024) = G_FREEZE [[MV]]
+    ; CHECK: [[FREEZE1:%[0-9]+]]:_(s1024) = G_FREEZE [[MV1]]
+    ; CHECK: [[MV2:%[0-9]+]]:_(s2048) = G_MERGE_VALUES [[FREEZE]](s1024), [[FREEZE1]](s1024)
+    ; CHECK: S_NOP 0, implicit [[MV2]](s2048)
+    %0:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    %1:_(s2048) = G_ANYEXT %0
+    %2:_(s2048) = G_FREEZE %1
+    S_NOP 0, implicit %2
+...
+
+---
+name: test_freeze_v2s32
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_v2s32
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(<2 x s32>) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0_vgpr1 = COPY [[FREEZE]](<2 x s32>)
+    %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    %1:_(<2 x s32>) = G_FREEZE %0
+    $vgpr0_vgpr1 = COPY %1
+...
+
+---
+name: test_freeze_v3s32
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_v3s32
+    ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(<3 x s32>) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[FREEZE]](<3 x s32>)
+    %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    %1:_(<3 x s32>) = G_FREEZE %0
+    $vgpr0_vgpr1_vgpr2 = COPY %1
+...
+
+---
+name: test_freeze_v4s32
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_v4s32
+    ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(<4 x s32>) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FREEZE]](<4 x s32>)
+    %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    %1:_(<4 x s32>) = G_FREEZE %0
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+...
+
+---
+name: test_freeze_v5s32
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_v5s32
+    ; CHECK: [[COPY:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(<5 x s32>) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[FREEZE]](<5 x s32>)
+    %0:_(<5 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
+    %1:_(<5 x s32>) = G_FREEZE %0
+    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY %1
+...
+
+---
+name: test_freeze_v6s32
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_v6s32
+    ; CHECK: [[DEF:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(<6 x s32>) = G_FREEZE [[DEF]]
+    ; CHECK: S_NOP 0, implicit [[FREEZE]](<6 x s32>)
+    %0:_(<6 x s32>) = G_IMPLICIT_DEF
+    %1:_(<6 x s32>) = G_FREEZE %0
+    S_NOP 0, implicit %1
+...
+
+---
+name: test_freeze_v7s32
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_v7s32
+    ; CHECK: [[DEF:%[0-9]+]]:_(<7 x s32>) = G_IMPLICIT_DEF
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(<7 x s32>) = G_FREEZE [[DEF]]
+    ; CHECK: S_NOP 0, implicit [[FREEZE]](<7 x s32>)
+    %0:_(<7 x s32>) = G_IMPLICIT_DEF
+    %1:_(<7 x s32>) = G_FREEZE %0
+    S_NOP 0, implicit %1
+...
+
+---
+name: test_freeze_v8s32
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_v8s32
+    ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(<8 x s32>) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[FREEZE]](<8 x s32>)
+    %0:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    %1:_(<8 x s32>) = G_FREEZE %0
+    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1
+...
+
+---
+name: test_freeze_v16s32
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_v16s32
+    ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[FREEZE]](<16 x s32>)
+    %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    %1:_(<16 x s32>) = G_FREEZE %0
+    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1
+...
+
+---
+name: test_freeze_v17s32
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_v17s32
+    ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[COPY]]
+    ; CHECK: S_NOP 0, implicit [[FREEZE]](<16 x s32>)
+    %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    %1:_(<16 x s32>) = G_FREEZE %0
+    S_NOP 0, implicit %1
+...
+
+---
+name: test_freeze_v32s32
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_v32s32
+    ; CHECK: [[DEF:%[0-9]+]]:_(<32 x s32>) = G_IMPLICIT_DEF
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(<32 x s32>) = G_FREEZE [[DEF]]
+    ; CHECK: S_NOP 0, implicit [[FREEZE]](<32 x s32>)
+    %0:_(<32 x s32>) = G_IMPLICIT_DEF
+    %1:_(<32 x s32>) = G_FREEZE %0
+    S_NOP 0, implicit %1
+...
+
+---
+name: test_freeze_v33s32
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_v33s32
+    ; CHECK: [[DEF:%[0-9]+]]:_(<33 x s32>) = G_IMPLICIT_DEF
+    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<33 x s32>)
+    ; CHECK: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32), [[UV2]](s32), [[UV3]](s32), [[UV4]](s32), [[UV5]](s32), [[UV6]](s32), [[UV7]](s32), [[UV8]](s32), [[UV9]](s32), [[UV10]](s32), [[UV11]](s32), [[UV12]](s32), [[UV13]](s32), [[UV14]](s32), [[UV15]](s32)
+    ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[UV16]](s32), [[UV17]](s32), [[UV18]](s32), [[UV19]](s32), [[UV20]](s32), [[UV21]](s32), [[UV22]](s32), [[UV23]](s32), [[UV24]](s32), [[UV25]](s32), [[UV26]](s32), [[UV27]](s32), [[UV28]](s32), [[UV29]](s32), [[UV30]](s32), [[UV31]](s32)
+    ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<16 x s32>) = G_BUILD_VECTOR [[UV32]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32), [[DEF1]](s32)
+    ; CHECK: [[DEF2:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[BUILD_VECTOR]]
+    ; CHECK: [[FREEZE1:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[BUILD_VECTOR1]]
+    ; CHECK: [[FREEZE2:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[BUILD_VECTOR2]]
+    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<528 x s32>) = G_CONCAT_VECTORS [[FREEZE]](<16 x s32>), [[FREEZE1]](<16 x s32>), [[FREEZE2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>), [[DEF2]](<16 x s32>)
+    ; CHECK: [[EXTRACT:%[0-9]+]]:_(<33 x s32>) = G_EXTRACT [[CONCAT_VECTORS]](<528 x s32>), 0
+    ; CHECK: S_NOP 0, implicit [[EXTRACT]](<33 x s32>)
+    %0:_(<33 x s32>) = G_IMPLICIT_DEF
+    %1:_(<33 x s32>) = G_FREEZE %0
+    S_NOP 0, implicit %1
+...
+
+---
+name: test_freeze_v64s32
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_v64s32
+    ; CHECK: [[DEF:%[0-9]+]]:_(<16 x s32>) = G_IMPLICIT_DEF
+    ; CHECK: [[COPY:%[0-9]+]]:_(<16 x s32>) = COPY [[DEF]](<16 x s32>)
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<16 x s32>) = COPY [[DEF]](<16 x s32>)
+    ; CHECK: [[COPY2:%[0-9]+]]:_(<16 x s32>) = COPY [[DEF]](<16 x s32>)
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[DEF]]
+    ; CHECK: [[FREEZE1:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[COPY]]
+    ; CHECK: [[FREEZE2:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[COPY1]]
+    ; CHECK: [[FREEZE3:%[0-9]+]]:_(<16 x s32>) = G_FREEZE [[COPY2]]
+    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<64 x s32>) = G_CONCAT_VECTORS [[FREEZE]](<16 x s32>), [[FREEZE1]](<16 x s32>), [[FREEZE2]](<16 x s32>), [[FREEZE3]](<16 x s32>)
+    ; CHECK: S_NOP 0, implicit [[CONCAT_VECTORS]](<64 x s32>)
+    %0:_(<64 x s32>) = G_IMPLICIT_DEF
+    %1:_(<64 x s32>) = G_FREEZE %0
+    S_NOP 0, implicit %1
+...
+
+---
+name: test_freeze_v2s1
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_v2s1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY [[DEF]](<2 x s32>)
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(<2 x s32>) = G_FREEZE [[COPY]]
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY [[FREEZE]](<2 x s32>)
+    ; CHECK: $vgpr0_vgpr1 = COPY [[COPY1]](<2 x s32>)
+    %0:_(<2 x s1>) = G_IMPLICIT_DEF
+    %1:_(<2 x s1>) = G_FREEZE %0
+    %2:_(<2 x s32>) = G_ANYEXT %1
+    $vgpr0_vgpr1 = COPY %2
+...
+
+---
+name: test_freeze_v3s1
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_v3s1
+    ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s1>) = G_IMPLICIT_DEF
+    ; CHECK: [[DEF1:%[0-9]+]]:_(<4 x s1>) = G_IMPLICIT_DEF
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[DEF1]](<4 x s1>)
+    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[ANYEXT]], [[DEF]](<3 x s1>), 0
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s1>) = G_TRUNC [[INSERT]](<4 x s16>)
+    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[TRUNC]](<4 x s1>)
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(<4 x s32>) = G_FREEZE [[ANYEXT1]]
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(<4 x s1>) = G_TRUNC [[FREEZE]](<4 x s32>)
+    ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s1>) = G_EXTRACT [[TRUNC1]](<4 x s1>), 0
+    ; CHECK: [[ANYEXT2:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[EXTRACT]](<3 x s1>)
+    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT2]](<3 x s32>)
+    %0:_(<3 x s1>) = G_IMPLICIT_DEF
+    %1:_(<3 x s1>) = G_FREEZE %0
+    %2:_(<3 x s32>) = G_ANYEXT %1
+    $vgpr0_vgpr1_vgpr2 = COPY %2
+...
+
+---
+name: test_freeze_v2s8
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_v2s8
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY [[COPY]](<2 x s32>)
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(<2 x s32>) = G_FREEZE [[COPY1]]
+    ; CHECK: [[COPY2:%[0-9]+]]:_(<2 x s32>) = COPY [[FREEZE]](<2 x s32>)
+    ; CHECK: $vgpr0_vgpr1 = COPY [[COPY2]](<2 x s32>)
+    %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    %1:_(<2 x s8>) = G_TRUNC %0
+    %2:_(<2 x s8>) = G_FREEZE %1
+    %3:_(<2 x s32>) = G_ANYEXT %2
+    $vgpr0_vgpr1 = COPY %3
+...
+
+---
+name: test_freeze_v3s8
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_v3s8
+    ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(<3 x s8>) = G_TRUNC [[COPY]](<3 x s32>)
+    ; CHECK: [[DEF:%[0-9]+]]:_(<4 x s8>) = G_IMPLICIT_DEF
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(<4 x s16>) = G_ANYEXT [[DEF]](<4 x s8>)
+    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[ANYEXT]], [[TRUNC]](<3 x s8>), 0
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[INSERT]](<4 x s16>)
+    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(<4 x s32>) = G_ANYEXT [[TRUNC1]](<4 x s8>)
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(<4 x s32>) = G_FREEZE [[ANYEXT1]]
+    ; CHECK: [[TRUNC2:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[FREEZE]](<4 x s32>)
+    ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s8>) = G_EXTRACT [[TRUNC2]](<4 x s8>), 0
+    ; CHECK: [[ANYEXT2:%[0-9]+]]:_(<3 x s32>) = G_ANYEXT [[EXTRACT]](<3 x s8>)
+    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[ANYEXT2]](<3 x s32>)
+    %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    %1:_(<3 x s8>) = G_TRUNC %0
+    %2:_(<3 x s8>) = G_FREEZE %1
+    %3:_(<3 x s32>) = G_ANYEXT %2
+    $vgpr0_vgpr1_vgpr2 = COPY %3
+...
+
+---
+name: test_freeze_v2s16
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_v2s16
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(<2 x s16>) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0 = COPY [[FREEZE]](<2 x s16>)
+    %0:_(<2 x s16>) = COPY $vgpr0
+    %1:_(<2 x s16>) = G_FREEZE %0
+    $vgpr0 = COPY %1
+...
+
+---
+name: test_freeze_v3s16
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_v3s16
+    ; CHECK: [[COPY:%[0-9]+]]:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<3 x s32>)
+    ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
+    ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[DEF]](s32)
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>)
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR1]](<2 x s32>)
+    ; CHECK: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[DEF1]](<2 x s16>)
+    ; CHECK: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<6 x s16>), 0
+    ; CHECK: [[DEF2:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; CHECK: [[INSERT:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT]](<3 x s16>), 0
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(<4 x s16>) = G_FREEZE [[INSERT]]
+    ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[FREEZE]](<4 x s16>), 0
+    ; CHECK: [[INSERT1:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF2]], [[EXTRACT1]](<3 x s16>), 0
+    ; CHECK: [[UV3:%[0-9]+]]:_(<2 x s16>), [[UV4:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
+    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV4]](<2 x s16>)
+    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
+    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[BUILD_VECTOR2]](<3 x s32>)
+    %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    %1:_(<3 x s16>) = G_TRUNC %0
+    %2:_(<3 x s16>) = G_FREEZE %1
+    %3:_(<3 x s32>) = G_ANYEXT %2
+    $vgpr0_vgpr1_vgpr2 = COPY %3
+...
+
+---
+name: test_freeze_v4s16
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_v4s16
+    ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(<4 x s16>) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0_vgpr1 = COPY [[FREEZE]](<4 x s16>)
+    %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
+    %1:_(<4 x s16>) = G_FREEZE %0
+    $vgpr0_vgpr1 = COPY %1
+...
+
+---
+name: test_freeze_v5s16
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_v5s16
+    ; CHECK: [[COPY:%[0-9]+]]:_(<5 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
+    ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<5 x s32>)
+    ; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV]](s32), [[UV1]](s32)
+    ; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV2]](s32), [[UV3]](s32)
+    ; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UV4]](s32), [[DEF]](s32)
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR]](<2 x s32>)
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR1]](<2 x s32>)
+    ; CHECK: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[BUILD_VECTOR2]](<2 x s32>)
+    ; CHECK: [[DEF1:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<10 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[TRUNC2]](<2 x s16>), [[DEF1]](<2 x s16>), [[DEF1]](<2 x s16>)
+    ; CHECK: [[EXTRACT:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<10 x s16>), 0
+    ; CHECK: [[DEF2:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF
+    ; CHECK: [[UV5:%[0-9]+]]:_(<2 x s32>), [[UV6:%[0-9]+]]:_(<2 x s32>), [[UV7:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF2]](<6 x s32>)
+    ; CHECK: [[TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV5]](<2 x s32>)
+    ; CHECK: [[TRUNC4:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV6]](<2 x s32>)
+    ; CHECK: [[TRUNC5:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV7]](<2 x s32>)
+    ; CHECK: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[TRUNC3]](<2 x s16>), [[TRUNC4]](<2 x s16>), [[TRUNC5]](<2 x s16>)
+    ; CHECK: [[INSERT:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[CONCAT_VECTORS1]], [[EXTRACT]](<5 x s16>), 0
+    ; CHECK: [[UV8:%[0-9]+]]:_(<2 x s16>), [[UV9:%[0-9]+]]:_(<2 x s16>), [[UV10:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT]](<6 x s16>)
+    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV8]](<2 x s16>)
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV9]](<2 x s16>)
+    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV10]](<2 x s16>)
+    ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<6 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32)
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(<6 x s32>) = G_FREEZE [[BUILD_VECTOR3]]
+    ; CHECK: [[UV11:%[0-9]+]]:_(<2 x s32>), [[UV12:%[0-9]+]]:_(<2 x s32>), [[UV13:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[FREEZE]](<6 x s32>)
+    ; CHECK: [[TRUNC6:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV11]](<2 x s32>)
+    ; CHECK: [[TRUNC7:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV12]](<2 x s32>)
+    ; CHECK: [[TRUNC8:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV13]](<2 x s32>)
+    ; CHECK: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[TRUNC6]](<2 x s16>), [[TRUNC7]](<2 x s16>), [[TRUNC8]](<2 x s16>)
+    ; CHECK: [[EXTRACT1:%[0-9]+]]:_(<5 x s16>) = G_EXTRACT [[CONCAT_VECTORS2]](<6 x s16>), 0
+    ; CHECK: [[DEF3:%[0-9]+]]:_(<6 x s32>) = G_IMPLICIT_DEF
+    ; CHECK: [[UV14:%[0-9]+]]:_(<2 x s32>), [[UV15:%[0-9]+]]:_(<2 x s32>), [[UV16:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[DEF3]](<6 x s32>)
+    ; CHECK: [[TRUNC9:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV14]](<2 x s32>)
+    ; CHECK: [[TRUNC10:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV15]](<2 x s32>)
+    ; CHECK: [[TRUNC11:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV16]](<2 x s32>)
+    ; CHECK: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[TRUNC9]](<2 x s16>), [[TRUNC10]](<2 x s16>), [[TRUNC11]](<2 x s16>)
+    ; CHECK: [[INSERT1:%[0-9]+]]:_(<6 x s16>) = G_INSERT [[CONCAT_VECTORS3]], [[EXTRACT1]](<5 x s16>), 0
+    ; CHECK: [[UV17:%[0-9]+]]:_(<2 x s16>), [[UV18:%[0-9]+]]:_(<2 x s16>), [[UV19:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<6 x s16>)
+    ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV17]](<2 x s16>)
+    ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; CHECK: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[UV18]](<2 x s16>)
+    ; CHECK: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32)
+    ; CHECK: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[UV19]](<2 x s16>)
+    ; CHECK: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32)
+    ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
+    ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY [[BITCAST4]](s32)
+    ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY [[LSHR4]](s32)
+    ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY [[BITCAST5]](s32)
+    ; CHECK: [[BUILD_VECTOR4:%[0-9]+]]:_(<5 x s32>) = G_BUILD_VECTOR [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[BUILD_VECTOR4]](<5 x s32>)
+    %0:_(<5 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
+    %1:_(<5 x s16>) = G_TRUNC %0
+    %2:_(<5 x s16>) = G_FREEZE %1
+    %3:_(<5 x s32>) = G_ANYEXT %2
+    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY %3
+...
+
+---
+name: test_freeze_v6s16
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_v6s16
+    ; CHECK: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
+    ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
+    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<6 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32)
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(<6 x s32>) = G_FREEZE [[BUILD_VECTOR]]
+    ; CHECK: [[UV3:%[0-9]+]]:_(<2 x s32>), [[UV4:%[0-9]+]]:_(<2 x s32>), [[UV5:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[FREEZE]](<6 x s32>)
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV3]](<2 x s32>)
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV4]](<2 x s32>)
+    ; CHECK: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV5]](<2 x s32>)
+    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[TRUNC2]](<2 x s16>)
+    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
+    %1:_(<6 x s16>) = G_FREEZE %0
+    $vgpr0_vgpr1_vgpr2 = COPY %1
+...
+
+---
+name: test_freeze_v8s16
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_v8s16
+    ; CHECK: [[COPY:%[0-9]+]]:_(<8 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>), [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<8 x s16>)
+    ; CHECK: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[UV]](<2 x s16>)
+    ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; CHECK: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32)
+    ; CHECK: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[UV1]](<2 x s16>)
+    ; CHECK: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32)
+    ; CHECK: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[UV2]](<2 x s16>)
+    ; CHECK: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32)
+    ; CHECK: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[UV3]](<2 x s16>)
+    ; CHECK: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[BITCAST]](s32)
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[LSHR]](s32)
+    ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY [[BITCAST1]](s32)
+    ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY [[LSHR1]](s32)
+    ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY [[BITCAST2]](s32)
+    ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY [[LSHR2]](s32)
+    ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY [[BITCAST3]](s32)
+    ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY [[LSHR3]](s32)
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<8 x s32>) = G_BUILD_VECTOR [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(<8 x s32>) = G_FREEZE [[BUILD_VECTOR]]
+    ; CHECK: [[UV4:%[0-9]+]]:_(<2 x s32>), [[UV5:%[0-9]+]]:_(<2 x s32>), [[UV6:%[0-9]+]]:_(<2 x s32>), [[UV7:%[0-9]+]]:_(<2 x s32>) = G_UNMERGE_VALUES [[FREEZE]](<8 x s32>)
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV4]](<2 x s32>)
+    ; CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV5]](<2 x s32>)
+    ; CHECK: [[TRUNC2:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV6]](<2 x s32>)
+    ; CHECK: [[TRUNC3:%[0-9]+]]:_(<2 x s16>) = G_TRUNC [[UV7]](<2 x s32>)
+    ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[TRUNC]](<2 x s16>), [[TRUNC1]](<2 x s16>), [[TRUNC2]](<2 x s16>), [[TRUNC3]](<2 x s16>)
+    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[CONCAT_VECTORS]](<8 x s16>)
+    %0:_(<8 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    %1:_(<8 x s16>) = G_FREEZE %0
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+...
+
+---
+name: test_freeze_v2s64
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_v2s64
+    ; CHECK: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(<2 x s64>) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FREEZE]](<2 x s64>)
+    %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    %1:_(<2 x s64>) = G_FREEZE %0
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1
+...
+
+---
+name: test_freeze_v4s8
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_v4s8
+    ; CHECK: [[COPY:%[0-9]+]]:_(<4 x s8>) = COPY $vgpr0
+    ; CHECK: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8), [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[COPY]](<4 x s8>)
+    ; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UV]](s8)
+    ; CHECK: [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[UV1]](s8)
+    ; CHECK: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[UV2]](s8)
+    ; CHECK: [[ANYEXT3:%[0-9]+]]:_(s32) = G_ANYEXT [[UV3]](s8)
+    ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[ANYEXT]](s32), [[ANYEXT1]](s32), [[ANYEXT2]](s32), [[ANYEXT3]](s32)
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(<4 x s32>) = G_FREEZE [[BUILD_VECTOR]]
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(<4 x s8>) = G_TRUNC [[FREEZE]](<4 x s32>)
+    ; CHECK: $vgpr0 = COPY [[TRUNC]](<4 x s8>)
+    %0:_(<4 x s8>) = COPY $vgpr0
+    %1:_(<4 x s8>) = G_FREEZE %0
+    $vgpr0 = COPY %1
+...
+
+---
+name: test_freeze_p0
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_p0
+    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $vgpr0_vgpr1
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(p0) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0_vgpr1 = COPY [[FREEZE]](p0)
+    %0:_(p0) = COPY $vgpr0_vgpr1
+    %1:_(p0) = G_FREEZE %0
+    $vgpr0_vgpr1 = COPY %1
+...
+
+---
+name: test_freeze_p1
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_p1
+    ; CHECK: [[COPY:%[0-9]+]]:_(p1) = COPY $vgpr0_vgpr1
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(p1) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0_vgpr1 = COPY [[FREEZE]](p1)
+    %0:_(p1) = COPY $vgpr0_vgpr1
+    %1:_(p1) = G_FREEZE %0
+    $vgpr0_vgpr1 = COPY %1
+...
+
+---
+name: test_freeze_p2
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_p2
+    ; CHECK: [[COPY:%[0-9]+]]:_(p2) = COPY $vgpr0
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(p2) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0 = COPY [[FREEZE]](p2)
+    %0:_(p2) = COPY $vgpr0
+    %1:_(p2) = G_FREEZE %0
+    $vgpr0 = COPY %1
+...
+
+---
+name: test_freeze_p3
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_p3
+    ; CHECK: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(p3) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0 = COPY [[FREEZE]](p3)
+    %0:_(p3) = COPY $vgpr0
+    %1:_(p3) = G_FREEZE %0
+    $vgpr0 = COPY %1
+...
+
+---
+name: test_freeze_p4
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_p4
+    ; CHECK: [[COPY:%[0-9]+]]:_(p4) = COPY $vgpr0_vgpr1
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(p4) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0_vgpr1 = COPY [[FREEZE]](p4)
+    %0:_(p4) = COPY $vgpr0_vgpr1
+    %1:_(p4) = G_FREEZE %0
+    $vgpr0_vgpr1 = COPY %1
+...
+
+---
+name: test_freeze_p5
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_p5
+    ; CHECK: [[COPY:%[0-9]+]]:_(p5) = COPY $vgpr0
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(p5) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0 = COPY [[FREEZE]](p5)
+    %0:_(p5) = COPY $vgpr0
+    %1:_(p5) = G_FREEZE %0
+    $vgpr0 = COPY %1
+...
+
+---
+name: test_freeze_p999
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_p999
+    ; CHECK: [[COPY:%[0-9]+]]:_(p999) = COPY $vgpr0_vgpr1
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(p999) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0_vgpr1 = COPY [[FREEZE]](p999)
+    %0:_(p999) = COPY $vgpr0_vgpr1
+    %1:_(p999) = G_FREEZE %0
+    $vgpr0_vgpr1 = COPY %1
+
+...
+
+---
+name: test_freeze_v2s1024
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_v2s1024
+    ; CHECK: [[DEF:%[0-9]+]]:_(<2 x s1024>) = G_IMPLICIT_DEF
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(<2 x s1024>) = G_FREEZE [[DEF]]
+    ; CHECK: S_NOP 0, implicit [[FREEZE]](<2 x s1024>)
+    %0:_(<2 x s1024>) = G_IMPLICIT_DEF
+    %1:_(<2 x s1024>) = G_FREEZE %0
+    S_NOP 0, implicit %1
+...
+
+---
+
+name: test_freeze_v3s1024
+body: |
+  bb.0:
+
+    ; CHECK-LABEL: name: test_freeze_v3s1024
+    ; CHECK: [[DEF:%[0-9]+]]:_(<3 x s1024>) = G_IMPLICIT_DEF
+    ; CHECK: [[FREEZE:%[0-9]+]]:_(<3 x s1024>) = G_FREEZE [[DEF]]
+    ; CHECK: S_NOP 0, implicit [[FREEZE]](<3 x s1024>)
+    %0:_(<3 x s1024>) = G_IMPLICIT_DEF
+    %1:_(<3 x s1024>) = G_FREEZE %0
+    S_NOP 0, implicit %1
+...
diff --git a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
index 4c0834e8bfaf..817d707776cb 100644
--- a/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
+++ b/llvm/unittests/CodeGen/GlobalISel/LegalizerHelperTest.cpp
@@ -2804,4 +2804,215 @@ TEST_F(AArch64GISelMITest, NarrowImplicitDef) {
   EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF;
 }
 
+// Test widening of G_FREEZE
+TEST_F(AArch64GISelMITest, WidenFreeze) {
+  setUp();
+  if (!TM)
+    return;
+
+  DefineLegalizerInfo(A, {});
+
+  // Make sure that G_FREEZE is widened with anyext
+  LLT S64{LLT::scalar(64)};
+  LLT S128{LLT::scalar(128)};
+  LLT V2S32{LLT::vector(2, 32)};
+  LLT V2S64{LLT::vector(2, 64)};
+
+  auto Vector = B.buildBitcast(V2S32, Copies[0]);
+
+  auto FreezeScalar = B.buildInstr(TargetOpcode::G_FREEZE, {S64}, {Copies[0]});
+  auto FreezeVector = B.buildInstr(TargetOpcode::G_FREEZE, {V2S32}, {Vector});
+
+  AInfo Info(MF->getSubtarget());
+  DummyGISelObserver Observer;
+  LegalizerHelper Helper(*MF, Info, Observer, B);
+
+  // Perform Legalization
+  EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
+            Helper.widenScalar(*FreezeScalar, 0, S128));
+  EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
+            Helper.widenScalar(*FreezeVector, 0, V2S64));
+
+  const auto *CheckStr = R"(
+  CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY
+  CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]]
+
+  CHECK: [[ANYEXT:%[0-9]+]]:_(s128) = G_ANYEXT [[COPY]]
+  CHECK: [[FREEZE:%[0-9]+]]:_(s128) = G_FREEZE [[ANYEXT]]
+  CHECK: [[TRUNC:%[0-9]+]]:_(s64) = G_TRUNC [[FREEZE]]
+
+  CHECK: [[ANYEXT1:%[0-9]+]]:_(<2 x s64>) = G_ANYEXT [[BITCAST]]
+  CHECK: [[FREEZE1:%[0-9]+]]:_(<2 x s64>) = G_FREEZE [[ANYEXT1]]
+  CHECK: [[TRUNC1:%[0-9]+]]:_(<2 x s32>) = G_TRUNC [[FREEZE1]]
+  )";
+
+  // Check
+  EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF;
+}
+
+// Test narrowing of G_FREEZE
+TEST_F(AArch64GISelMITest, NarrowFreeze) {
+  setUp();
+  if (!TM)
+    return;
+
+  DefineLegalizerInfo(A, {});
+
+  // Make sure that G_FREEZE is narrowed using unmerge/extract
+  LLT S16{LLT::scalar(16)};
+  LLT S32{LLT::scalar(32)};
+  LLT S33{LLT::scalar(33)};
+  LLT S64{LLT::scalar(64)};
+  LLT V2S16{LLT::vector(2, 16)};
+  LLT V2S32{LLT::vector(2, 32)};
+
+  auto Trunc = B.buildTrunc(S33, {Copies[0]});
+  auto Vector = B.buildBitcast(V2S32, Copies[0]);
+
+  auto FreezeScalar = B.buildInstr(TargetOpcode::G_FREEZE, {S64}, {Copies[0]});
+  auto FreezeOdd = B.buildInstr(TargetOpcode::G_FREEZE, {S33}, {Trunc});
+  auto FreezeVector = B.buildInstr(TargetOpcode::G_FREEZE, {V2S32}, {Vector});
+  auto FreezeVector1 = B.buildInstr(TargetOpcode::G_FREEZE, {V2S32}, {Vector});
+
+  AInfo Info(MF->getSubtarget());
+  DummyGISelObserver Observer;
+  LegalizerHelper Helper(*MF, Info, Observer, B);
+
+  // Perform Legalization
+  EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
+            Helper.narrowScalar(*FreezeScalar, 0, S32));
+  EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
+            Helper.narrowScalar(*FreezeOdd, 0, S32));
+  EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
+            Helper.narrowScalar(*FreezeVector, 0, V2S16));
+  EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
+            Helper.narrowScalar(*FreezeVector1, 0, S16));
+
+  const auto *CheckStr = R"(
+  CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY
+  CHECK: [[TRUNC:%[0-9]+]]:_(s33) = G_TRUNC [[COPY]]
+  CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]]
+
+  CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]]
+  CHECK: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE [[UV]]
+  CHECK: [[FREEZE1:%[0-9]+]]:_(s32) = G_FREEZE [[UV1]]
+  CHECK: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FREEZE]]:_(s32), [[FREEZE1]]
+
+  CHECK: (s1) = G_UNMERGE_VALUES [[TRUNC]]:_(s33)
+  CHECK: [[UNDEF:%[0-9]+]]:_(s1) = G_IMPLICIT_DEF
+  CHECK: [[MV1:%[0-9]+]]:_(s32) = G_MERGE_VALUES
+  CHECK: [[MV2:%[0-9]+]]:_(s32) = G_MERGE_VALUES
+  CHECK: [[UNDEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  CHECK: [[FREEZE2:%[0-9]+]]:_(s32) = G_FREEZE [[MV1]]
+  CHECK: [[FREEZE3:%[0-9]+]]:_(s32) = G_FREEZE [[MV2]]
+  CHECK: [[UNDEF2:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
+  CHECK: [[MV3:%[0-9]+]]:_(s1056) = G_MERGE_VALUES [[FREEZE2]]:_(s32), [[FREEZE3]]:_(s32), [[UNDEF2]]
+  CHECK: [[TRUNC1:%[0-9]+]]:_(s33) = G_TRUNC [[MV3]]
+
+  CHECK: [[BITCAST1:%[0-9]+]]:_(s64) = G_BITCAST [[BITCAST]]
+  CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST1]]
+  CHECK: [[FREEZE4:%[0-9]+]]:_(s32) = G_FREEZE [[UV2]]
+  CHECK: [[FREEZE5:%[0-9]+]]:_(s32) = G_FREEZE [[UV3]]
+  CHECK: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FREEZE4]]:_(s32), [[FREEZE5]]:_(s32)
+  CHECK: [[BITCAST2:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[MV4]]
+
+  CHECK: [[BITCAST3:%[0-9]+]]:_(s64) = G_BITCAST [[BITCAST]]
+  CHECK: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[BITCAST3]]
+  CHECK: [[FREEZE6:%[0-9]+]]:_(s16) = G_FREEZE [[UV4]]
+  CHECK: [[FREEZE7:%[0-9]+]]:_(s16) = G_FREEZE [[UV5]]
+  CHECK: [[FREEZE8:%[0-9]+]]:_(s16) = G_FREEZE [[UV6]]
+  CHECK: [[FREEZE9:%[0-9]+]]:_(s16) = G_FREEZE [[UV7]]
+  CHECK: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[FREEZE6]]:_(s16), [[FREEZE7]]:_(s16), [[FREEZE8]]:_(s16), [[FREEZE9]]
+  CHECK: [[BITCAST3:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[MV5]]
+  )";
+
+  // Check
+  EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF;
+}
+
+// Test fewer elements of G_FREEZE
+TEST_F(AArch64GISelMITest, FewerElementsFreeze) {
+  setUp();
+  if (!TM)
+    return;
+
+  DefineLegalizerInfo(A, {});
+
+  LLT S32{LLT::scalar(32)};
+  LLT V2S16{LLT::vector(2, 16)};
+  LLT V2S32{LLT::vector(2, 32)};
+  LLT V4S16{LLT::vector(4, 16)};
+
+  auto Vector1 = B.buildBitcast(V2S32, Copies[0]);
+  auto Vector2 = B.buildBitcast(V4S16, Copies[0]);
+
+  auto FreezeVector1 = B.buildInstr(TargetOpcode::G_FREEZE, {V2S32}, {Vector1});
+  auto FreezeVector2 = B.buildInstr(TargetOpcode::G_FREEZE, {V4S16}, {Vector2});
+
+  AInfo Info(MF->getSubtarget());
+  DummyGISelObserver Observer;
+  LegalizerHelper Helper(*MF, Info, Observer, B);
+
+  // Perform Legalization
+  EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
+            Helper.fewerElementsVector(*FreezeVector1, 0, S32));
+  EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
+            Helper.fewerElementsVector(*FreezeVector2, 0, V2S16));
+
+  const auto *CheckStr = R"(
+  CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY
+  CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]]
+  CHECK: [[BITCAST1:%[0-9]+]]:_(<4 x s16>) = G_BITCAST [[COPY]]
+
+  CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BITCAST]]
+  CHECK: [[FREEZE:%[0-9]+]]:_(s32) = G_FREEZE [[UV]]
+  CHECK: [[FREEZE1:%[0-9]+]]:_(s32) = G_FREEZE [[UV1]]
+  CHECK: [[MV:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FREEZE]]:_(s32), [[FREEZE1]]
+
+  CHECK: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[BITCAST1]]
+  CHECK: [[FREEZE2:%[0-9]+]]:_(<2 x s16>) = G_FREEZE [[UV]]
+  CHECK: [[FREEZE3:%[0-9]+]]:_(<2 x s16>) = G_FREEZE [[UV1]]
+  CHECK: [[MV:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FREEZE2]]:_(<2 x s16>), [[FREEZE3]]
+  )";
+
+  // Check
+  EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF;
+}
+
+// Test more elements of G_FREEZE
+TEST_F(AArch64GISelMITest, MoreElementsFreeze) {
+  setUp();
+  if (!TM)
+    return;
+
+  DefineLegalizerInfo(A, {});
+
+  LLT V2S32{LLT::vector(2, 32)};
+  LLT V4S32{LLT::vector(4, 32)};
+
+  auto Vector1 = B.buildBitcast(V2S32, Copies[0]);
+  auto FreezeVector1 = B.buildInstr(TargetOpcode::G_FREEZE, {V2S32}, {Vector1});
+
+  AInfo Info(MF->getSubtarget());
+  DummyGISelObserver Observer;
+  LegalizerHelper Helper(*MF, Info, Observer, B);
+
+  // Perform Legalization
+  EXPECT_EQ(LegalizerHelper::LegalizeResult::Legalized,
+            Helper.moreElementsVector(*FreezeVector1, 0, V4S32));
+
+  const auto *CheckStr = R"(
+  CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY
+  CHECK: [[BITCAST:%[0-9]+]]:_(<2 x s32>) = G_BITCAST [[COPY]]
+
+  CHECK: [[UNDEF:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF
+  CHECK: [[CV:%[0-9]+]]:_(<4 x s32>) = G_CONCAT_VECTORS [[BITCAST]]:_(<2 x s32>), [[UNDEF]]
+  CHECK: [[FREEZE:%[0-9]+]]:_(<4 x s32>) = G_FREEZE [[CV]]
+  CHECK: [[EXTR:%[0-9]+]]:_(<2 x s32>) = G_EXTRACT [[FREEZE]]:_(<4 x s32>), 0
+  )";
+
+  // Check
+  EXPECT_TRUE(CheckMachineFunction(*MF, CheckStr)) << *MF;
+}
+
 } // namespace

From d441188c15183c9496e44169966112304a1fc915 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Thu, 16 Apr 2020 12:05:53 -0700
Subject: [PATCH 176/216] [Support][X86] Change getHostNumPhsicalCores() to
 return number of physical cores enabled by affinity

Fixes https://bugs.llvm.org/show_bug.cgi?id=45556

While here, make the x86-64 code available for x86-32.

The output has been available and stable since
https://git.kernel.org/linus/3dd9d514846cdca1dcef2e4fce666d85e199e844 (2005)
```
processor:
...
physical id:
siblings:
core id:
```

Don't check HAVE_SCHED_GETAFFINITY/HAVE_CPU_COUNT. The interface is
simply available in every libc which can build LLVM.

Reviewed By: aganea

Differential Revision: https://reviews.llvm.org/D78324
---
 llvm/lib/Support/Host.cpp | 41 +++++++++++++++++++++------------------
 1 file changed, 22 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp
index f23645680648..43df31d0efea 100644
--- a/llvm/lib/Support/Host.cpp
+++ b/llvm/lib/Support/Host.cpp
@@ -1277,11 +1277,18 @@ StringRef sys::getHostCPUName() {
 StringRef sys::getHostCPUName() { return "generic"; }
 #endif
 
-#if defined(__linux__) && defined(__x86_64__)
+#if defined(__linux__) && (defined(__i386__) || defined(__x86_64__))
 // On Linux, the number of physical cores can be computed from /proc/cpuinfo,
 // using the number of unique physical/core id pairs. The following
 // implementation reads the /proc/cpuinfo format on an x86_64 system.
 int computeHostNumPhysicalCores() {
+  // Enabled represents the number of physical id/core id pairs with at least
+  // one processor id enabled by the CPU affinity mask.
+  cpu_set_t Affinity, Enabled;
+  if (sched_getaffinity(0, sizeof(Affinity), &Affinity) != 0)
+    return -1;
+  CPU_ZERO(&Enabled);
+
   // Read /proc/cpuinfo as a stream (until EOF reached). It cannot be
   // mmapped because it appears to have 0 size.
   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
@@ -1294,33 +1301,29 @@ int computeHostNumPhysicalCores() {
   SmallVector<StringRef, 8> strs;
   (*Text)->getBuffer().split(strs, "\n", /*MaxSplit=*/-1,
                              /*KeepEmpty=*/false);
+  int CurProcessor = -1;
   int CurPhysicalId = -1;
+  int CurSiblings = -1;
   int CurCoreId = -1;
-  SmallSet<std::pair<int, int>, 32> UniqueItems;
-  for (auto &Line : strs) {
-    Line = Line.trim();
-    if (!Line.startswith("physical id") && !Line.startswith("core id"))
-      continue;
+  for (StringRef Line : strs) {
     std::pair<StringRef, StringRef> Data = Line.split(':');
     auto Name = Data.first.trim();
     auto Val = Data.second.trim();
-    if (Name == "physical id") {
-      assert(CurPhysicalId == -1 &&
-             "Expected a core id before seeing another physical id");
+    // These fields are available if the kernel is configured with CONFIG_SMP.
+    if (Name == "processor")
+      Val.getAsInteger(10, CurProcessor);
+    else if (Name == "physical id")
       Val.getAsInteger(10, CurPhysicalId);
-    }
-    if (Name == "core id") {
-      assert(CurCoreId == -1 &&
-             "Expected a physical id before seeing another core id");
+    else if (Name == "siblings")
+      Val.getAsInteger(10, CurSiblings);
+    else if (Name == "core id") {
       Val.getAsInteger(10, CurCoreId);
-    }
-    if (CurPhysicalId != -1 && CurCoreId != -1) {
-      UniqueItems.insert(std::make_pair(CurPhysicalId, CurCoreId));
-      CurPhysicalId = -1;
-      CurCoreId = -1;
+      // The processor id corresponds to an index into cpu_set_t.
+      if (CPU_ISSET(CurProcessor, &Affinity))
+        CPU_SET(CurPhysicalId * CurSiblings + CurCoreId, &Enabled);
     }
   }
-  return UniqueItems.size();
+  return CPU_COUNT(&Enabled);
 }
 #elif defined(__APPLE__) && defined(__x86_64__)
 #include <sys/param.h>

From f89abd6923bcc0839288af4b96113a0cb4fa4edd Mon Sep 17 00:00:00 2001
From: Erich Keane <erich.keane@intel.com>
Date: Fri, 17 Apr 2020 07:52:50 -0700
Subject: [PATCH 177/216] [SYCL] Update __builtin_unique_stable_name for other
 backends.

The CUDA backend (and other downstreams) have trouble with the tilde and
arrow delimiter, so replace these with 'm' (for macro) and '_'.  Since
these are in the normal lambda ID location, the format of these should
not conflict with anything else.
---
 clang/lib/AST/ItaniumMangle.cpp               |  6 +++---
 clang/test/CodeGenSYCL/unique-stable-name.cpp | 14 +++++++-------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index 4a45847c9425..d60cacf07534 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -1795,7 +1795,7 @@ static void mangleUniqueNameLambda(CXXNameMangler &Mangler, SourceManager &SM,
 
   PresumedLoc PLoc = SM.getPresumedLoc(Loc);
   Mangler.mangleNumber(PLoc.getLine());
-  Out << "->";
+  Out << "_";
   Mangler.mangleNumber(PLoc.getColumn());
 
   while(Loc.isMacroID()) {
@@ -1804,9 +1804,9 @@ static void mangleUniqueNameLambda(CXXNameMangler &Mangler, SourceManager &SM,
       SLToPrint = SM.getImmediateExpansionRange(Loc).getBegin();
 
     PLoc = SM.getPresumedLoc(SM.getSpellingLoc(SLToPrint));
-    Out << "~";
+    Out << "m";
     Mangler.mangleNumber(PLoc.getLine());
-    Out << "->";
+    Out << "_";
     Mangler.mangleNumber(PLoc.getColumn());
 
     Loc = SM.getImmediateMacroCallerLoc(Loc);
diff --git a/clang/test/CodeGenSYCL/unique-stable-name.cpp b/clang/test/CodeGenSYCL/unique-stable-name.cpp
index b54c17baec35..ca66327b0021 100644
--- a/clang/test/CodeGenSYCL/unique-stable-name.cpp
+++ b/clang/test/CodeGenSYCL/unique-stable-name.cpp
@@ -1,12 +1,12 @@
 // RUN: %clang_cc1 -triple spir64-unknown-unknown-sycldevice -fsycl -fsycl-is-device -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s
 // CHECK: @[[INT:[^\w]+]] = private unnamed_addr constant [[INT_SIZE:\[[0-9]+ x i8\]]] c"_ZTSi\00"
-// CHECK: @[[LAMBDA_X:[^\w]+]] = private unnamed_addr constant [[LAMBDA_X_SIZE:\[[0-9]+ x i8\]]] c"_ZTSZZ4mainENKUlvE42->5clEvEUlvE46->16\00"
-// CHECK: @[[MACRO_X:[^\w]+]] = private unnamed_addr constant [[MACRO_SIZE:\[[0-9]+ x i8\]]] c"_ZTSZZ4mainENKUlvE42->5clEvEUlvE52->7~28->18\00"
-// CHECK: @[[MACRO_Y:[^\w]+]] =  private unnamed_addr constant [[MACRO_SIZE]] c"_ZTSZZ4mainENKUlvE42->5clEvEUlvE52->7~28->41\00"
-// CHECK: @[[MACRO_MACRO_X:[^\w]+]] = private unnamed_addr constant [[MACRO_MACRO_SIZE:\[[0-9]+ x i8\]]] c"_ZTSZZ4mainENKUlvE42->5clEvEUlvE55->7~28->18~33->4\00"
-// CHECK: @[[MACRO_MACRO_Y:[^\w]+]] = private unnamed_addr constant [[MACRO_MACRO_SIZE]] c"_ZTSZZ4mainENKUlvE42->5clEvEUlvE55->7~28->41~33->4\00"
-// CHECK: @[[LAMBDA_IN_DEP_INT:[^\w]+]] = private unnamed_addr constant [[DEP_INT_SIZE:\[[0-9]+ x i8\]]] c"_ZTSZ28lambda_in_dependent_functionIiEvvEUlvE23->12\00",
-// CHECK: @[[LAMBDA_IN_DEP_X:[^\w]+]] = private unnamed_addr constant [[DEP_LAMBDA_SIZE:\[[0-9]+ x i8\]]] c"_ZTSZ28lambda_in_dependent_functionIZZ4mainENKUlvE42->5clEvEUlvE46->16EvvEUlvE23->12\00",
+// CHECK: @[[LAMBDA_X:[^\w]+]] = private unnamed_addr constant [[LAMBDA_X_SIZE:\[[0-9]+ x i8\]]] c"_ZTSZZ4mainENKUlvE42_5clEvEUlvE46_16\00"
+// CHECK: @[[MACRO_X:[^\w]+]] = private unnamed_addr constant [[MACRO_SIZE:\[[0-9]+ x i8\]]] c"_ZTSZZ4mainENKUlvE42_5clEvEUlvE52_7m28_18\00"
+// CHECK: @[[MACRO_Y:[^\w]+]] =  private unnamed_addr constant [[MACRO_SIZE]] c"_ZTSZZ4mainENKUlvE42_5clEvEUlvE52_7m28_41\00"
+// CHECK: @[[MACRO_MACRO_X:[^\w]+]] = private unnamed_addr constant [[MACRO_MACRO_SIZE:\[[0-9]+ x i8\]]] c"_ZTSZZ4mainENKUlvE42_5clEvEUlvE55_7m28_18m33_4\00"
+// CHECK: @[[MACRO_MACRO_Y:[^\w]+]] = private unnamed_addr constant [[MACRO_MACRO_SIZE]] c"_ZTSZZ4mainENKUlvE42_5clEvEUlvE55_7m28_41m33_4\00"
+// CHECK: @[[LAMBDA_IN_DEP_INT:[^\w]+]] = private unnamed_addr constant [[DEP_INT_SIZE:\[[0-9]+ x i8\]]] c"_ZTSZ28lambda_in_dependent_functionIiEvvEUlvE23_12\00",
+// CHECK: @[[LAMBDA_IN_DEP_X:[^\w]+]] = private unnamed_addr constant [[DEP_LAMBDA_SIZE:\[[0-9]+ x i8\]]] c"_ZTSZ28lambda_in_dependent_functionIZZ4mainENKUlvE42_5clEvEUlvE46_16EvvEUlvE23_12\00",
 
 extern "C" void printf(const char *) {}
 

From aff950e95d4a0b09fa8629e7358e5e06ea2ceb87 Mon Sep 17 00:00:00 2001
From: LemonBoy <thatlemon@gmail.com>
Date: Fri, 17 Apr 2020 07:58:15 -0700
Subject: [PATCH 178/216] [ELF] Support a few more SPARCv9 relocations

Implemented a bunch of relocations found in binaries with medium/large code model and the Local-Exec TLS model. The binaries link and run fine in Qemu.
In addition, the emulation `elf64_sparc` is now recognized.

Reviewed By: MaskRay

Differential Revision: https://reviews.llvm.org/D77672
---
 lld/ELF/Arch/SPARCV9.cpp       | 51 ++++++++++++++++++++++++++++++++++
 lld/ELF/Driver.cpp             |  1 +
 lld/ELF/InputSection.cpp       |  1 +
 lld/ELF/ScriptParser.cpp       |  1 +
 lld/test/ELF/emulation-sparc.s | 26 +++++++++++++++++
 lld/test/ELF/sparcv9-reloc.s   | 39 ++++++++++++++++++++++++++
 lld/test/ELF/sparcv9-tls-le.s  | 17 ++++++++++++
 7 files changed, 136 insertions(+)
 create mode 100644 lld/test/ELF/emulation-sparc.s
 create mode 100644 lld/test/ELF/sparcv9-reloc.s
 create mode 100644 lld/test/ELF/sparcv9-tls-le.s

diff --git a/lld/ELF/Arch/SPARCV9.cpp b/lld/ELF/Arch/SPARCV9.cpp
index 709db1c07a31..a8dc48bbd4f1 100644
--- a/lld/ELF/Arch/SPARCV9.cpp
+++ b/lld/ELF/Arch/SPARCV9.cpp
@@ -55,6 +55,14 @@ RelExpr SPARCV9::getRelExpr(RelType type, const Symbol &s,
   case R_SPARC_UA32:
   case R_SPARC_64:
   case R_SPARC_UA64:
+  case R_SPARC_H44:
+  case R_SPARC_M44:
+  case R_SPARC_L44:
+  case R_SPARC_HH22:
+  case R_SPARC_HM10:
+  case R_SPARC_LM22:
+  case R_SPARC_HI22:
+  case R_SPARC_LO10:
     return R_ABS;
   case R_SPARC_PC10:
   case R_SPARC_PC22:
@@ -69,6 +77,9 @@ RelExpr SPARCV9::getRelExpr(RelType type, const Symbol &s,
     return R_PLT_PC;
   case R_SPARC_NONE:
     return R_NONE;
+  case R_SPARC_TLS_LE_HIX22:
+  case R_SPARC_TLS_LE_LOX10:
+    return R_TLS;
   default:
     error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) +
           ") against symbol " + toString(s));
@@ -103,9 +114,15 @@ void SPARCV9::relocate(uint8_t *loc, const Relocation &rel,
     break;
   case R_SPARC_GOT22:
   case R_SPARC_PC22:
+  case R_SPARC_LM22:
     // T-imm22
     write32be(loc, (read32be(loc) & ~0x003fffff) | ((val >> 10) & 0x003fffff));
     break;
+  case R_SPARC_HI22:
+    // V-imm22
+    checkUInt(loc, val >> 10, 22, rel);
+    write32be(loc, (read32be(loc) & ~0x003fffff) | ((val >> 10) & 0x003fffff));
+    break;
   case R_SPARC_WDISP19:
     // V-disp19
     checkInt(loc, val, 21, rel);
@@ -116,11 +133,45 @@ void SPARCV9::relocate(uint8_t *loc, const Relocation &rel,
     // T-simm10
     write32be(loc, (read32be(loc) & ~0x000003ff) | (val & 0x000003ff));
     break;
+  case R_SPARC_LO10:
+    // T-simm13
+    write32be(loc, (read32be(loc) & ~0x00001fff) | (val & 0x000003ff));
+    break;
   case R_SPARC_64:
   case R_SPARC_UA64:
     // V-xword64
     write64be(loc, val);
     break;
+  case R_SPARC_HH22:
+    // V-imm22
+    checkUInt(loc, val >> 42, 22, rel);
+    write32be(loc, (read32be(loc) & ~0x003fffff) | ((val >> 42) & 0x003fffff));
+    break;
+  case R_SPARC_HM10:
+    // T-simm13
+    write32be(loc, (read32be(loc) & ~0x00001fff) | ((val >> 32) & 0x000003ff));
+    break;
+  case R_SPARC_H44:
+    // V-imm22
+    checkUInt(loc, val >> 22, 22, rel);
+    write32be(loc, (read32be(loc) & ~0x003fffff) | ((val >> 22) & 0x003fffff));
+    break;
+  case R_SPARC_M44:
+    // T-imm10
+    write32be(loc, (read32be(loc) & ~0x000003ff) | ((val >> 12) & 0x000003ff));
+    break;
+  case R_SPARC_L44:
+    // T-imm13
+    write32be(loc, (read32be(loc) & ~0x00001fff) | (val & 0x00000fff));
+    break;
+  case R_SPARC_TLS_LE_HIX22:
+    // T-imm22
+    write32be(loc, (read32be(loc) & ~0x003fffff) | ((~val >> 10) & 0x003fffff));
+    break;
+  case R_SPARC_TLS_LE_LOX10:
+    // T-simm13
+    write32be(loc, (read32be(loc) & ~0x00001fff) | (val & 0x000003ff) | 0x1C00);
+    break;
   default:
     llvm_unreachable("unknown relocation");
   }
diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index d223ed47b461..35153a1bff8b 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -149,6 +149,7 @@ static std::tuple<ELFKind, uint16_t, uint8_t> parseEmulation(StringRef emul) {
           .Cases("elf_amd64", "elf_x86_64", {ELF64LEKind, EM_X86_64})
           .Case("elf_i386", {ELF32LEKind, EM_386})
           .Case("elf_iamcu", {ELF32LEKind, EM_IAMCU})
+          .Case("elf64_sparc", {ELF64BEKind, EM_SPARCV9})
           .Default({ELFNoneKind, EM_NONE});
 
   if (ret.first == ELFNoneKind)
diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp
index 13c3dd486c33..e150cb8f118d 100644
--- a/lld/ELF/InputSection.cpp
+++ b/lld/ELF/InputSection.cpp
@@ -650,6 +650,7 @@ static int64_t getTlsTpOffset(const Symbol &s) {
 
     // Variant 2.
   case EM_HEXAGON:
+  case EM_SPARCV9:
   case EM_386:
   case EM_X86_64:
     return s.getVA(0) - tls->p_memsz -
diff --git a/lld/ELF/ScriptParser.cpp b/lld/ELF/ScriptParser.cpp
index 037cb9f31eff..b487f31f9cae 100644
--- a/lld/ELF/ScriptParser.cpp
+++ b/lld/ELF/ScriptParser.cpp
@@ -404,6 +404,7 @@ static std::pair<ELFKind, uint16_t> parseBfdName(StringRef s) {
       .Case("elf64-tradlittlemips", {ELF64LEKind, EM_MIPS})
       .Case("elf32-littleriscv", {ELF32LEKind, EM_RISCV})
       .Case("elf64-littleriscv", {ELF64LEKind, EM_RISCV})
+      .Case("elf64-sparc", {ELF64BEKind, EM_SPARCV9})
       .Default({ELFNoneKind, EM_NONE});
 }
 
diff --git a/lld/test/ELF/emulation-sparc.s b/lld/test/ELF/emulation-sparc.s
new file mode 100644
index 000000000000..8b89a462ed8a
--- /dev/null
+++ b/lld/test/ELF/emulation-sparc.s
@@ -0,0 +1,26 @@
+# REQUIRES: sparc
+# RUN: llvm-mc -filetype=obj -triple=sparcv9 %s -o %t.o
+# RUN: ld.lld %t.o -o %t1
+# RUN: llvm-readobj --file-headers %t1 | FileCheck --check-prefix=V9 %s
+# RUN: ld.lld -m elf64_sparc %t.o -o %t2
+# RUN: cmp %t1 %t2
+# RUN: echo 'OUTPUT_FORMAT(elf64-sparc)' > %t.lds
+# RUN: ld.lld -T %t.lds %t.o -o %t3
+# RUN: llvm-readobj --file-headers %t3 | FileCheck --check-prefix=V9 %s
+
+# V9:      ElfHeader {
+# V9-NEXT:   Ident {
+# V9-NEXT:     Magic: (7F 45 4C 46)
+# V9-NEXT:     Class: 64-bit (0x2)
+# V9-NEXT:     DataEncoding: BigEndian (0x2)
+# V9-NEXT:     FileVersion: 1
+# V9-NEXT:     OS/ABI: SystemV (0x0)
+# V9-NEXT:     ABIVersion: 0
+# V9-NEXT:     Unused: (00 00 00 00 00 00 00)
+# V9-NEXT:   }
+# V9-NEXT:   Type: Executable (0x2)
+# V9-NEXT:   Machine: EM_SPARCV9 (0x2B)
+# V9-NEXT:   Version: 1
+
+.globl _start
+_start:
diff --git a/lld/test/ELF/sparcv9-reloc.s b/lld/test/ELF/sparcv9-reloc.s
new file mode 100644
index 000000000000..ec9abd6dc3bb
--- /dev/null
+++ b/lld/test/ELF/sparcv9-reloc.s
@@ -0,0 +1,39 @@
+# REQUIRES: sparc
+# RUN: llvm-mc -filetype=obj -triple=sparcv9 %s -o %t.o
+# RUN: ld.lld %t.o --defsym=a=0x0123456789ABCDEF --defsym=b=0x0123456789A --defsym=c=0x01234567 -o %t
+# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s
+# RUN: llvm-objdump -s %t | FileCheck --check-prefix=HEX %s
+
+## R_SPARC_HH22, R_SPARC_HM10
+# CHECK-LABEL: section .ABS_64:
+# CHECK:        sethi 18641, %o0
+# CHECK-NEXT:   or %o0, 359, %o0
+.section .ABS_64,"ax",@progbits
+  sethi %hh(a), %o0
+  or    %o0, %hm(a), %o0
+
+## R_SPARC_H44, R_SPARC_M44, R_SPARC_L44
+# CHECK-LABEL: section .ABS_44:
+# CHECK:        sethi 18641, %o0
+# CHECK:        or %o0, 359, %o0
+# CHECK:        or %o0, 2202, %o0
+.section .ABS_44,"ax",@progbits
+  sethi %h44(b), %o0
+  or    %o0, %m44(b), %o0
+  sllx  %o0, 12, %o0
+  or    %o0, %l44(b), %o0
+
+## R_SPARC_HI22, R_SPARC_LO10
+# CHECK-LABEL: section .ABS_32:
+# CHECK:        sethi 18641, %o0
+# CHECK-NEXT:   or %o0, 359, %o0
+.section .ABS_32,"ax",@progbits
+  sethi %hi(c), %o0
+  or    %o0, %lo(c), %o0
+
+## R_SPARC_64, R_SPARC_32
+# HEX-LABEL: section .ABS_DATA:
+# HEX-NEXT:  01234567 89abcdef 01234567
+.section .ABS_DATA,"ax",@progbits
+  .quad a
+  .long c
diff --git a/lld/test/ELF/sparcv9-tls-le.s b/lld/test/ELF/sparcv9-tls-le.s
new file mode 100644
index 000000000000..2d4fec9ccc8d
--- /dev/null
+++ b/lld/test/ELF/sparcv9-tls-le.s
@@ -0,0 +1,17 @@
+# REQUIRES: sparc
+# RUN: llvm-mc -filetype=obj -triple=sparcv9 %s -o %t.o
+# RUN: ld.lld %t.o -o %t
+# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck --check-prefix=LE %s
+
+## %hix(@tpoff(a)) = ~(st_value(a) - 1026) >> 10 = 1
+## %lo(@tpoff(a)) = (st_value(a) - 1026) & 0x3ff | 0x1c00 = -2 (0x1ffe)
+# LE:      sethi 1, %o0
+# LE-NEXT: xor %o0, -2, %o0
+sethi %tle_hix22(a), %o0
+xor   %o0, %tle_lox10(a), %o0
+
+.section .tbss
+  .globl a
+a:
+  .zero 1024+2
+b:

From 17772995d48b8c10a3142d602e228f3ebeed85bf Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Fri, 17 Apr 2020 08:29:58 -0700
Subject: [PATCH 179/216] [builtins] Add missing header in D77912 and make
 __builtin_clzll more robust

---
 compiler-rt/lib/builtins/int_div_impl.inc | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/compiler-rt/lib/builtins/int_div_impl.inc b/compiler-rt/lib/builtins/int_div_impl.inc
index 972a1085ae2c..d194ae1e3028 100644
--- a/compiler-rt/lib/builtins/int_div_impl.inc
+++ b/compiler-rt/lib/builtins/int_div_impl.inc
@@ -1,4 +1,16 @@
-#define clz(a) (sizeof(a) == 8 ? __builtin_clzll(a) : __builtin_clz(a))
+//===-- int_div_impl.inc - Integer division ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Helpers used by __udivsi3, __umodsi3, __udivdi3, and __umodsi3.
+//
+//===----------------------------------------------------------------------===//
+
+#define clz(a) (sizeof(a) == sizeof(unsigned long long) ? __builtin_clzll(a) : __builtin_clz(a))
 
 // Adapted from Figure 3-40 of The PowerPC Compiler Writer's Guide
 static __inline fixuint_t __udivXi3(fixuint_t n, fixuint_t d) {

From fce115681b71863b20cbfe021345fe2d50377535 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Fri, 17 Apr 2020 08:43:02 -0700
Subject: [PATCH 180/216] [Support][X86] Include sched.h after D78324

http://lab.llvm.org:8011/builders/clang-hexagon-elf/builds/28848/steps/build%20stage%201/logs/stdio
---
 llvm/lib/Support/Host.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp
index 43df31d0efea..ae8fd66bac30 100644
--- a/llvm/lib/Support/Host.cpp
+++ b/llvm/lib/Support/Host.cpp
@@ -29,6 +29,7 @@
 // Include the platform-specific parts of this class.
 #ifdef LLVM_ON_UNIX
 #include "Unix/Host.inc"
+#include <sched.h>
 #endif
 #ifdef _WIN32
 #include "Windows/Host.inc"

From a127d618357a9b09d755c0af3d141e49d8902c40 Mon Sep 17 00:00:00 2001
From: Siva Chandra Reddy <sivachandra@google.com>
Date: Fri, 17 Apr 2020 08:59:35 -0700
Subject: [PATCH 181/216] [libc] Disable AOR ulp tests for sinf, cosf and
 sincosf.

They seemed to fallback to the system libc and start depending on its
accuracy.
---
 libc/AOR_v20.02/math/test/runulp.sh | 38 ++++++++++++++---------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/libc/AOR_v20.02/math/test/runulp.sh b/libc/AOR_v20.02/math/test/runulp.sh
index 431829bfc52f..9e93674727a7 100755
--- a/libc/AOR_v20.02/math/test/runulp.sh
+++ b/libc/AOR_v20.02/math/test/runulp.sh
@@ -93,25 +93,25 @@ t log2f  0    0xffff0000   10000
 t log2f  0x1p-4    0x1p4   50000
 t log2f  0         inf     50000
 
-L=0.06
-t sinf  0    0xffff0000    10000
-t sinf  0x1p-14  0x1p54    50000
-t sinf -0x1p-14 -0x1p54    50000
-
-L=0.06
-t cosf  0    0xffff0000    10000
-t cosf  0x1p-14  0x1p54    50000
-t cosf -0x1p-14 -0x1p54    50000
-
-L=0.06
-t sincosf_sinf  0    0xffff0000    10000
-t sincosf_sinf  0x1p-14  0x1p54    50000
-t sincosf_sinf -0x1p-14 -0x1p54    50000
-
-L=0.06
-t sincosf_cosf  0    0xffff0000    10000
-t sincosf_cosf  0x1p-14  0x1p54    50000
-t sincosf_cosf -0x1p-14 -0x1p54    50000
+#L=0.06
+#t sinf  0    0xffff0000    10000
+#t sinf  0x1p-14  0x1p54    50000
+#t sinf -0x1p-14 -0x1p54    50000
+#
+#L=0.06
+#t cosf  0    0xffff0000    10000
+#t cosf  0x1p-14  0x1p54    50000
+#t cosf -0x1p-14 -0x1p54    50000
+#
+#L=0.06
+#t sincosf_sinf  0    0xffff0000    10000
+#t sincosf_sinf  0x1p-14  0x1p54    50000
+#t sincosf_sinf -0x1p-14 -0x1p54    50000
+#
+#L=0.06
+#t sincosf_cosf  0    0xffff0000    10000
+#t sincosf_cosf  0x1p-14  0x1p54    50000
+#t sincosf_cosf -0x1p-14 -0x1p54    50000
 
 L=0.4
 t powf  0x1p-1   0x1p1  x  0x1p-7 0x1p7   50000

From b771c4a842d65123ffcda2534540ee9dbd2ad3ce Mon Sep 17 00:00:00 2001
From: Stefan Pintilie <stefanp@ca.ibm.com>
Date: Fri, 17 Apr 2020 11:05:02 -0500
Subject: [PATCH 182/216] [PowerPC][Future] More support for PCRel addressing
 for global values

Add initial support for PC Relative addressing for global values that
require GOT indirect addressing. This patch adds PCRelative support for
global addresses that may not be known at link time and may require
access through the GOT.

Differential Revision: https://reviews.llvm.org/D76064
---
 .../llvm/BinaryFormat/ELFRelocs/PowerPC64.def |   2 +
 llvm/include/llvm/MC/MCExpr.h                 | 133 ++++-----
 llvm/lib/MC/MCExpr.cpp                        |   2 +
 .../MCTargetDesc/PPCELFObjectWriter.cpp       |  11 +-
 .../PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp |   5 +-
 llvm/lib/Target/PowerPC/PPC.h                 |   5 +
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |  18 +-
 llvm/lib/Target/PowerPC/PPCInstrInfo.cpp      |   3 +-
 llvm/lib/Target/PowerPC/PPCMCInstLower.cpp    |   2 +
 .../PowerPC/pcrel-call-linkage-with-calls.ll  |  43 +--
 .../CodeGen/PowerPC/pcrel-got-indirect.ll     | 253 ++++++++++++++++++
 11 files changed, 373 insertions(+), 104 deletions(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll

diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def
index eb88c530b42d..e28c9caaefaf 100644
--- a/llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def
+++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/PowerPC64.def
@@ -98,6 +98,7 @@
 #undef R_PPC64_DTPREL16_HIGHA
 #undef R_PPC64_REL24_NOTOC
 #undef R_PPC64_PCREL34
+#undef R_PPC64_GOT_PCREL34
 #undef R_PPC64_IRELATIVE
 #undef R_PPC64_REL16
 #undef R_PPC64_REL16_LO
@@ -194,6 +195,7 @@ ELF_RELOC(R_PPC64_DTPREL16_HIGH,        114)
 ELF_RELOC(R_PPC64_DTPREL16_HIGHA,       115)
 ELF_RELOC(R_PPC64_REL24_NOTOC,          116)
 ELF_RELOC(R_PPC64_PCREL34,              132)
+ELF_RELOC(R_PPC64_GOT_PCREL34,          133)
 ELF_RELOC(R_PPC64_IRELATIVE,            248)
 ELF_RELOC(R_PPC64_REL16,                249)
 ELF_RELOC(R_PPC64_REL16_LO,             250)
diff --git a/llvm/include/llvm/MC/MCExpr.h b/llvm/include/llvm/MC/MCExpr.h
index d1ffef779a52..776e116a6e16 100644
--- a/llvm/include/llvm/MC/MCExpr.h
+++ b/llvm/include/llvm/MC/MCExpr.h
@@ -210,9 +210,9 @@ class MCSymbolRefExpr : public MCExpr {
     VK_TLSLDM,
     VK_TPOFF,
     VK_DTPOFF,
-    VK_TLSCALL,   // symbol(tlscall)
-    VK_TLSDESC,   // symbol(tlsdesc)
-    VK_TLVP,      // Mach-O thread local variable relocations
+    VK_TLSCALL, // symbol(tlscall)
+    VK_TLSDESC, // symbol(tlsdesc)
+    VK_TLVP,    // Mach-O thread local variable relocations
     VK_TLVPPAGE,
     VK_TLVPPAGEOFF,
     VK_PAGE,
@@ -220,8 +220,8 @@ class MCSymbolRefExpr : public MCExpr {
     VK_GOTPAGE,
     VK_GOTPAGEOFF,
     VK_SECREL,
-    VK_SIZE,      // symbol@SIZE
-    VK_WEAKREF,   // The link between the symbols in .weakref foo, bar
+    VK_SIZE,    // symbol@SIZE
+    VK_WEAKREF, // The link between the symbols in .weakref foo, bar
 
     VK_X86_ABS8,
 
@@ -230,8 +230,8 @@ class MCSymbolRefExpr : public MCExpr {
     VK_ARM_TARGET1,
     VK_ARM_TARGET2,
     VK_ARM_PREL31,
-    VK_ARM_SBREL,          // symbol(sbrel)
-    VK_ARM_TLSLDO,         // symbol(tlsldo)
+    VK_ARM_SBREL,  // symbol(sbrel)
+    VK_ARM_TLSLDO, // symbol(tlsldo)
     VK_ARM_TLSDESCSEQ,
 
     VK_AVR_NONE,
@@ -242,65 +242,66 @@ class MCSymbolRefExpr : public MCExpr {
     VK_AVR_DIFF16,
     VK_AVR_DIFF32,
 
-    VK_PPC_LO,             // symbol@l
-    VK_PPC_HI,             // symbol@h
-    VK_PPC_HA,             // symbol@ha
-    VK_PPC_HIGH,           // symbol@high
-    VK_PPC_HIGHA,          // symbol@higha
-    VK_PPC_HIGHER,         // symbol@higher
-    VK_PPC_HIGHERA,        // symbol@highera
-    VK_PPC_HIGHEST,        // symbol@highest
-    VK_PPC_HIGHESTA,       // symbol@highesta
-    VK_PPC_GOT_LO,         // symbol@got@l
-    VK_PPC_GOT_HI,         // symbol@got@h
-    VK_PPC_GOT_HA,         // symbol@got@ha
-    VK_PPC_TOCBASE,        // symbol@tocbase
-    VK_PPC_TOC,            // symbol@toc
-    VK_PPC_TOC_LO,         // symbol@toc@l
-    VK_PPC_TOC_HI,         // symbol@toc@h
-    VK_PPC_TOC_HA,         // symbol@toc@ha
-    VK_PPC_U,              // symbol@u
-    VK_PPC_L,              // symbol@l
-    VK_PPC_DTPMOD,         // symbol@dtpmod
-    VK_PPC_TPREL_LO,       // symbol@tprel@l
-    VK_PPC_TPREL_HI,       // symbol@tprel@h
-    VK_PPC_TPREL_HA,       // symbol@tprel@ha
-    VK_PPC_TPREL_HIGH,     // symbol@tprel@high
-    VK_PPC_TPREL_HIGHA,    // symbol@tprel@higha
-    VK_PPC_TPREL_HIGHER,   // symbol@tprel@higher
-    VK_PPC_TPREL_HIGHERA,  // symbol@tprel@highera
-    VK_PPC_TPREL_HIGHEST,  // symbol@tprel@highest
-    VK_PPC_TPREL_HIGHESTA, // symbol@tprel@highesta
-    VK_PPC_DTPREL_LO,      // symbol@dtprel@l
-    VK_PPC_DTPREL_HI,      // symbol@dtprel@h
-    VK_PPC_DTPREL_HA,      // symbol@dtprel@ha
-    VK_PPC_DTPREL_HIGH,    // symbol@dtprel@high
-    VK_PPC_DTPREL_HIGHA,   // symbol@dtprel@higha
-    VK_PPC_DTPREL_HIGHER,  // symbol@dtprel@higher
-    VK_PPC_DTPREL_HIGHERA, // symbol@dtprel@highera
-    VK_PPC_DTPREL_HIGHEST, // symbol@dtprel@highest
-    VK_PPC_DTPREL_HIGHESTA,// symbol@dtprel@highesta
-    VK_PPC_GOT_TPREL,      // symbol@got@tprel
-    VK_PPC_GOT_TPREL_LO,   // symbol@got@tprel@l
-    VK_PPC_GOT_TPREL_HI,   // symbol@got@tprel@h
-    VK_PPC_GOT_TPREL_HA,   // symbol@got@tprel@ha
-    VK_PPC_GOT_DTPREL,     // symbol@got@dtprel
-    VK_PPC_GOT_DTPREL_LO,  // symbol@got@dtprel@l
-    VK_PPC_GOT_DTPREL_HI,  // symbol@got@dtprel@h
-    VK_PPC_GOT_DTPREL_HA,  // symbol@got@dtprel@ha
-    VK_PPC_TLS,            // symbol@tls
-    VK_PPC_GOT_TLSGD,      // symbol@got@tlsgd
-    VK_PPC_GOT_TLSGD_LO,   // symbol@got@tlsgd@l
-    VK_PPC_GOT_TLSGD_HI,   // symbol@got@tlsgd@h
-    VK_PPC_GOT_TLSGD_HA,   // symbol@got@tlsgd@ha
-    VK_PPC_TLSGD,          // symbol@tlsgd
-    VK_PPC_GOT_TLSLD,      // symbol@got@tlsld
-    VK_PPC_GOT_TLSLD_LO,   // symbol@got@tlsld@l
-    VK_PPC_GOT_TLSLD_HI,   // symbol@got@tlsld@h
-    VK_PPC_GOT_TLSLD_HA,   // symbol@got@tlsld@ha
-    VK_PPC_TLSLD,          // symbol@tlsld
-    VK_PPC_LOCAL,          // symbol@local
-    VK_PPC_NOTOC,          // symbol@notoc
+    VK_PPC_LO,              // symbol@l
+    VK_PPC_HI,              // symbol@h
+    VK_PPC_HA,              // symbol@ha
+    VK_PPC_HIGH,            // symbol@high
+    VK_PPC_HIGHA,           // symbol@higha
+    VK_PPC_HIGHER,          // symbol@higher
+    VK_PPC_HIGHERA,         // symbol@highera
+    VK_PPC_HIGHEST,         // symbol@highest
+    VK_PPC_HIGHESTA,        // symbol@highesta
+    VK_PPC_GOT_LO,          // symbol@got@l
+    VK_PPC_GOT_HI,          // symbol@got@h
+    VK_PPC_GOT_HA,          // symbol@got@ha
+    VK_PPC_TOCBASE,         // symbol@tocbase
+    VK_PPC_TOC,             // symbol@toc
+    VK_PPC_TOC_LO,          // symbol@toc@l
+    VK_PPC_TOC_HI,          // symbol@toc@h
+    VK_PPC_TOC_HA,          // symbol@toc@ha
+    VK_PPC_U,               // symbol@u
+    VK_PPC_L,               // symbol@l
+    VK_PPC_DTPMOD,          // symbol@dtpmod
+    VK_PPC_TPREL_LO,        // symbol@tprel@l
+    VK_PPC_TPREL_HI,        // symbol@tprel@h
+    VK_PPC_TPREL_HA,        // symbol@tprel@ha
+    VK_PPC_TPREL_HIGH,      // symbol@tprel@high
+    VK_PPC_TPREL_HIGHA,     // symbol@tprel@higha
+    VK_PPC_TPREL_HIGHER,    // symbol@tprel@higher
+    VK_PPC_TPREL_HIGHERA,   // symbol@tprel@highera
+    VK_PPC_TPREL_HIGHEST,   // symbol@tprel@highest
+    VK_PPC_TPREL_HIGHESTA,  // symbol@tprel@highesta
+    VK_PPC_DTPREL_LO,       // symbol@dtprel@l
+    VK_PPC_DTPREL_HI,       // symbol@dtprel@h
+    VK_PPC_DTPREL_HA,       // symbol@dtprel@ha
+    VK_PPC_DTPREL_HIGH,     // symbol@dtprel@high
+    VK_PPC_DTPREL_HIGHA,    // symbol@dtprel@higha
+    VK_PPC_DTPREL_HIGHER,   // symbol@dtprel@higher
+    VK_PPC_DTPREL_HIGHERA,  // symbol@dtprel@highera
+    VK_PPC_DTPREL_HIGHEST,  // symbol@dtprel@highest
+    VK_PPC_DTPREL_HIGHESTA, // symbol@dtprel@highesta
+    VK_PPC_GOT_TPREL,       // symbol@got@tprel
+    VK_PPC_GOT_TPREL_LO,    // symbol@got@tprel@l
+    VK_PPC_GOT_TPREL_HI,    // symbol@got@tprel@h
+    VK_PPC_GOT_TPREL_HA,    // symbol@got@tprel@ha
+    VK_PPC_GOT_DTPREL,      // symbol@got@dtprel
+    VK_PPC_GOT_DTPREL_LO,   // symbol@got@dtprel@l
+    VK_PPC_GOT_DTPREL_HI,   // symbol@got@dtprel@h
+    VK_PPC_GOT_DTPREL_HA,   // symbol@got@dtprel@ha
+    VK_PPC_TLS,             // symbol@tls
+    VK_PPC_GOT_TLSGD,       // symbol@got@tlsgd
+    VK_PPC_GOT_TLSGD_LO,    // symbol@got@tlsgd@l
+    VK_PPC_GOT_TLSGD_HI,    // symbol@got@tlsgd@h
+    VK_PPC_GOT_TLSGD_HA,    // symbol@got@tlsgd@ha
+    VK_PPC_TLSGD,           // symbol@tlsgd
+    VK_PPC_GOT_TLSLD,       // symbol@got@tlsld
+    VK_PPC_GOT_TLSLD_LO,    // symbol@got@tlsld@l
+    VK_PPC_GOT_TLSLD_HI,    // symbol@got@tlsld@h
+    VK_PPC_GOT_TLSLD_HA,    // symbol@got@tlsld@ha
+    VK_PPC_GOT_PCREL,       // symbol@got@pcrel
+    VK_PPC_TLSLD,           // symbol@tlsld
+    VK_PPC_LOCAL,           // symbol@local
+    VK_PPC_NOTOC,           // symbol@notoc
 
     VK_COFF_IMGREL32, // symbol@imgrel (image-relative)
 
diff --git a/llvm/lib/MC/MCExpr.cpp b/llvm/lib/MC/MCExpr.cpp
index 1448a54a04e3..ead2ef21c617 100644
--- a/llvm/lib/MC/MCExpr.cpp
+++ b/llvm/lib/MC/MCExpr.cpp
@@ -317,6 +317,8 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
   case VK_PPC_GOT_TLSLD_LO: return "got@tlsld@l";
   case VK_PPC_GOT_TLSLD_HI: return "got@tlsld@h";
   case VK_PPC_GOT_TLSLD_HA: return "got@tlsld@ha";
+  case VK_PPC_GOT_PCREL:
+    return "got@pcrel";
   case VK_PPC_TLSLD: return "tlsld";
   case VK_PPC_LOCAL: return "local";
   case VK_PPC_NOTOC: return "notoc";
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
index 1687b2975825..d8b3301e97f1 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp
@@ -129,7 +129,16 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target,
       errs() << '\n';
       report_fatal_error("Invalid PC-relative half16ds relocation");
     case PPC::fixup_ppc_pcrel34:
-      Type = ELF::R_PPC64_PCREL34;
+      switch (Modifier) {
+      default:
+        llvm_unreachable("Unsupported Modifier for fixup_ppc_pcrel34");
+      case MCSymbolRefExpr::VK_PCREL:
+        Type = ELF::R_PPC64_PCREL34;
+        break;
+      case MCSymbolRefExpr::VK_PPC_GOT_PCREL:
+        Type = ELF::R_PPC64_GOT_PCREL34;
+        break;
+      }
       break;
     case FK_Data_4:
     case FK_PCRel_4:
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
index e782fc194791..f1b57114ec1c 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp
@@ -193,8 +193,9 @@ PPCMCCodeEmitter::getMemRI34PCRelEncoding(const MCInst &MI, unsigned OpNo,
     const MCExpr *Expr = MO.getExpr();
     const MCSymbolRefExpr *SRE = cast<MCSymbolRefExpr>(Expr);
     (void)SRE;
-    assert(SRE->getKind() == MCSymbolRefExpr::VK_PCREL &&
-           "VariantKind must be VK_PCREL");
+    assert((SRE->getKind() == MCSymbolRefExpr::VK_PCREL ||
+            SRE->getKind() == MCSymbolRefExpr::VK_PPC_GOT_PCREL) &&
+           "VariantKind must be VK_PCREL or VK_PPC_GOT_PCREL");
     Fixups.push_back(
         MCFixup::create(IsLittleEndian ? 0 : 1, Expr,
                         static_cast<MCFixupKind>(PPC::fixup_ppc_pcrel34)));
diff --git a/llvm/lib/Target/PowerPC/PPC.h b/llvm/lib/Target/PowerPC/PPC.h
index 8a78a8d0a017..815c67286a2c 100644
--- a/llvm/lib/Target/PowerPC/PPC.h
+++ b/llvm/lib/Target/PowerPC/PPC.h
@@ -102,6 +102,11 @@ namespace llvm {
     /// the current instruction address(pc), e.g., var@pcrel. Fixup is VK_PCREL.
     MO_PCREL_FLAG = 4,
 
+    /// MO_GOT_FLAG - If this bit is set the symbol reference is to be computed
+    /// via the GOT. For example when combined with the MO_PCREL_FLAG it should
+    /// produce the relocation @got@pcrel. Fixup is VK_PPC_GOT_PCREL.
+    MO_GOT_FLAG = 32,
+
     /// The next are not flags but distinct values.
     MO_ACCESS_MASK = 0xf00,
 
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 6cf4b85a4f74..c9bd6e845450 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -3051,11 +3051,21 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
   // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
   // The actual address of the GlobalValue is stored in the TOC.
   if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
-    if (!isAccessedAsGotIndirect(Op) && Subtarget.isUsingPCRelativeCalls()) {
+    if (Subtarget.isUsingPCRelativeCalls()) {
       EVT Ty = getPointerTy(DAG.getDataLayout());
-      SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
-                                              PPCII::MO_PCREL_FLAG);
-      return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
+      if (isAccessedAsGotIndirect(Op)) {
+        SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
+                                                PPCII::MO_PCREL_FLAG |
+                                                    PPCII::MO_GOT_FLAG);
+        SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
+        SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
+                                   MachinePointerInfo());
+        return Load;
+      } else {
+        SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
+                                                PPCII::MO_PCREL_FLAG);
+        return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
+      }
     }
     setUsesTOCBasePtr(DAG);
     SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
index 2dfc5c5e2bd1..709b21384538 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -2048,7 +2048,8 @@ PPCInstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
   static const std::pair<unsigned, const char *> TargetFlags[] = {
       {MO_PLT, "ppc-plt"},
       {MO_PIC_FLAG, "ppc-pic"},
-      {MO_PCREL_FLAG, "ppc-pcrel"}};
+      {MO_PCREL_FLAG, "ppc-pcrel"},
+      {MO_GOT_FLAG, "ppc-got"}};
   return makeArrayRef(TargetFlags);
 }
 
diff --git a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
index 0a0e168c0076..add4de24275f 100644
--- a/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
+++ b/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp
@@ -82,6 +82,8 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
     RefKind = MCSymbolRefExpr::VK_PLT;
   else if (MO.getTargetFlags() == PPCII::MO_PCREL_FLAG)
     RefKind = MCSymbolRefExpr::VK_PCREL;
+  else if (MO.getTargetFlags() == (PPCII::MO_PCREL_FLAG | PPCII::MO_GOT_FLAG))
+    RefKind = MCSymbolRefExpr::VK_PPC_GOT_PCREL;
 
   const MachineInstr *MI = MO.getParent();
 
diff --git a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll
index 59001d482665..ed96e732b08b 100644
--- a/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll
+++ b/llvm/test/CodeGen/PowerPC/pcrel-call-linkage-with-calls.ll
@@ -53,9 +53,7 @@ entry:
 
 define dso_local signext i32 @DirectCallLocal2(i32 signext %a, i32 signext %b) local_unnamed_addr {
 ; CHECK-ALL-LABEL: DirectCallLocal2:
-; CHECK-S:         addis r2, r12, .TOC.-.Lfunc_gep2@ha
-; CHECK-S-NEXT:    addi r2, r2, .TOC.-.Lfunc_gep2@l
-; CHECK-S:         .localentry     DirectCallLocal2, .Lfunc_lep2-.Lfunc_gep2
+; CHECK-S:         .localentry     DirectCallLocal2
 ; CHECK-S:       # %bb.0: # %entry
 ; CHECK-S-NEXT:    mflr r0
 ; CHECK-S-NEXT:    std r0, 16(r1)
@@ -64,10 +62,8 @@ define dso_local signext i32 @DirectCallLocal2(i32 signext %a, i32 signext %b) l
 ; CHECK-S-NEXT:    .cfi_offset lr, 16
 ; CHECK-S-NEXT:    add r3, r4, r3
 ; CHECK-S-NEXT:    extsw r3, r3
-; CHECK-S-NEXT:    bl localCall
-; CHECK-S-NEXT:    nop
-; CHECK-S-NEXT:    addis r4, r2, .LC0@toc@ha
-; CHECK-S-NEXT:    ld r4, .LC0@toc@l(r4)
+; CHECK-S-NEXT:    bl localCall@notoc
+; CHECK-S-NEXT:    pld r4, externGlobalVar@got@pcrel(0), 1
 ; CHECK-S-NEXT:    lwz r4, 0(r4)
 ; CHECK-S-NEXT:    mullw r3, r4, r3
 ; CHECK-S-NEXT:    extsw r3, r3
@@ -140,9 +136,7 @@ declare signext i32 @externCall(i32 signext) local_unnamed_addr
 
 define dso_local signext i32 @DirectCallExtern2(i32 signext %a, i32 signext %b) local_unnamed_addr {
 ; CHECK-ALL-LABEL: DirectCallExtern2:
-; CHECK-S:         addis r2, r12, .TOC.-.Lfunc_gep5@ha
-; CHECK-S-NEXT:    addi r2, r2, .TOC.-.Lfunc_gep5@l
-; CHECK-S:         .localentry     DirectCallExtern2, .Lfunc_lep5-.Lfunc_gep5
+; CHECK-S:         .localentry     DirectCallExtern2
 ; CHECK-S:       # %bb.0: # %entry
 ; CHECK-S-NEXT:    mflr r0
 ; CHECK-S-NEXT:    std r0, 16(r1)
@@ -151,10 +145,8 @@ define dso_local signext i32 @DirectCallExtern2(i32 signext %a, i32 signext %b)
 ; CHECK-S-NEXT:    .cfi_offset lr, 16
 ; CHECK-S-NEXT:    add r3, r4, r3
 ; CHECK-S-NEXT:    extsw r3, r3
-; CHECK-S-NEXT:    bl externCall
-; CHECK-S-NEXT:    nop
-; CHECK-S-NEXT:    addis r4, r2, .LC0@toc@ha
-; CHECK-S-NEXT:    ld r4, .LC0@toc@l(r4)
+; CHECK-S-NEXT:    bl externCall@notoc
+; CHECK-S-NEXT:    pld r4, externGlobalVar@got@pcrel(0), 1
 ; CHECK-S-NEXT:    lwz r4, 0(r4)
 ; CHECK-S-NEXT:    mullw r3, r4, r3
 ; CHECK-S-NEXT:    extsw r3, r3
@@ -223,22 +215,18 @@ entry:
 
 define dso_local signext i32 @TailCallLocal2(i32 signext %a) local_unnamed_addr {
 ; CHECK-ALL-LABEL: TailCallLocal2:
-; CHECK-S:         addis r2, r12, .TOC.-.Lfunc_gep8@ha
-; CHECK-S-NEXT:    addi r2, r2, .TOC.-.Lfunc_gep8@l
-; CHECK-S:         .localentry     TailCallLocal2, .Lfunc_lep8-.Lfunc_gep8
+; CHECK-S:         .localentry     TailCallLocal2
 ; CHECK-S:       # %bb.0: # %entry
 ; CHECK-S-NEXT:    mflr r0
 ; CHECK-S-NEXT:    std r0, 16(r1)
 ; CHECK-S-NEXT:    stdu r1, -32(r1)
 ; CHECK-S-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-S-NEXT:    .cfi_offset lr, 16
-; CHECK-S-NEXT:    addis r4, r2, .LC0@toc@ha
-; CHECK-S-NEXT:    ld r4, .LC0@toc@l(r4)
+; CHECK-S-NEXT:    pld r4, externGlobalVar@got@pcrel(0), 1
 ; CHECK-S-NEXT:    lwz r4, 0(r4)
 ; CHECK-S-NEXT:    add r3, r4, r3
 ; CHECK-S-NEXT:    extsw r3, r3
-; CHECK-S-NEXT:    bl localCall
-; CHECK-S-NEXT:    nop
+; CHECK-S-NEXT:    bl localCall@notoc
 ; CHECK-S-NEXT:    addi r1, r1, 32
 ; CHECK-S-NEXT:    ld r0, 16(r1)
 ; CHECK-S-NEXT:    mtlr r0
@@ -296,22 +284,18 @@ entry:
 
 define dso_local signext i32 @TailCallExtern2(i32 signext %a) local_unnamed_addr {
 ; CHECK-ALL-LABEL: TailCallExtern2:
-; CHECK-S:         addis r2, r12, .TOC.-.Lfunc_gep11@ha
-; CHECK-S-NEXT:    addi r2, r2, .TOC.-.Lfunc_gep11@l
-; CHECK-S:         .localentry     TailCallExtern2, .Lfunc_lep11-.Lfunc_gep11
+; CHECK-S:         .localentry     TailCallExtern2
 ; CHECK-S:       # %bb.0: # %entry
 ; CHECK-S-NEXT:    mflr r0
 ; CHECK-S-NEXT:    std r0, 16(r1)
 ; CHECK-S-NEXT:    stdu r1, -32(r1)
 ; CHECK-S-NEXT:    .cfi_def_cfa_offset 32
 ; CHECK-S-NEXT:    .cfi_offset lr, 16
-; CHECK-S-NEXT:    addis r4, r2, .LC0@toc@ha
-; CHECK-S-NEXT:    ld r4, .LC0@toc@l(r4)
+; CHECK-S-NEXT:    pld r4, externGlobalVar@got@pcrel(0), 1
 ; CHECK-S-NEXT:    lwz r4, 0(r4)
 ; CHECK-S-NEXT:    add r3, r4, r3
 ; CHECK-S-NEXT:    extsw r3, r3
-; CHECK-S-NEXT:    bl externCall
-; CHECK-S-NEXT:    nop
+; CHECK-S-NEXT:    bl externCall@notoc
 ; CHECK-S-NEXT:    addi r1, r1, 32
 ; CHECK-S-NEXT:    ld r0, 16(r1)
 ; CHECK-S-NEXT:    mtlr r0
@@ -394,8 +378,7 @@ define dso_local signext i32 @IndirectCall2(i32 signext %a, i32 signext %b) loca
 ; CHECK-S-NEXT:    mtctr r12
 ; CHECK-S-NEXT:    bctrl
 ; CHECK-S-NEXT:    ld 2, 24(r1)
-; CHECK-S-NEXT:    addis r4, r2, .LC0@toc@ha
-; CHECK-S-NEXT:    ld r4, .LC0@toc@l(r4)
+; CHECK-S-NEXT:    pld r4, externGlobalVar@got@pcrel(0), 1
 ; CHECK-S-NEXT:    lwz r4, 0(r4)
 ; CHECK-S-NEXT:    mullw r3, r4, r3
 ; CHECK-S-NEXT:    extsw r3, r3
diff --git a/llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll b/llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll
new file mode 100644
index 000000000000..e9aeccd4ac16
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/pcrel-got-indirect.ll
@@ -0,0 +1,253 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=future -enable-ppc-quad-precision -ppc-asm-full-reg-names \
+; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s
+
+%struct.Struct = type { i8, i16, i32 }
+
+@valChar = external local_unnamed_addr global i8, align 1
+@valShort = external local_unnamed_addr global i16, align 2
+@valInt = external global i32, align 4
+@valUnsigned = external local_unnamed_addr global i32, align 4
+@valLong = external local_unnamed_addr global i64, align 8
+@ptr = external local_unnamed_addr global i32*, align 8
+@array = external local_unnamed_addr global [10 x i32], align 4
+@structure = external local_unnamed_addr global %struct.Struct, align 4
+@ptrfunc = external local_unnamed_addr global void (...)*, align 8
+
+define dso_local signext i32 @ReadGlobalVarChar() local_unnamed_addr  {
+; CHECK-LABEL: ReadGlobalVarChar:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pld r3, valChar@got@pcrel(0), 1
+; CHECK-NEXT:    lbz r3, 0(r3)
+; CHECK-NEXT:    blr
+entry:
+  %0 = load i8, i8* @valChar, align 1
+  %conv = zext i8 %0 to i32
+  ret i32 %conv
+}
+
+define dso_local void @WriteGlobalVarChar() local_unnamed_addr  {
+; CHECK-LABEL: WriteGlobalVarChar:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pld r3, valChar@got@pcrel(0), 1
+; CHECK-NEXT:    li r4, 3
+; CHECK-NEXT:    stb r4, 0(r3)
+; CHECK-NEXT:    blr
+entry:
+  store i8 3, i8* @valChar, align 1
+  ret void
+}
+
+define dso_local signext i32 @ReadGlobalVarShort() local_unnamed_addr  {
+; CHECK-LABEL: ReadGlobalVarShort:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pld r3, valShort@got@pcrel(0), 1
+; CHECK-NEXT:    lha r3, 0(r3)
+; CHECK-NEXT:    blr
+entry:
+  %0 = load i16, i16* @valShort, align 2
+  %conv = sext i16 %0 to i32
+  ret i32 %conv
+}
+
+define dso_local void @WriteGlobalVarShort() local_unnamed_addr  {
+; CHECK-LABEL: WriteGlobalVarShort:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pld r3, valShort@got@pcrel(0), 1
+; CHECK-NEXT:    li r4, 3
+; CHECK-NEXT:    sth r4, 0(r3)
+; CHECK-NEXT:    blr
+entry:
+  store i16 3, i16* @valShort, align 2
+  ret void
+}
+
+define dso_local signext i32 @ReadGlobalVarInt() local_unnamed_addr  {
+; CHECK-LABEL: ReadGlobalVarInt:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pld r3, valInt@got@pcrel(0), 1
+; CHECK-NEXT:    lwa r3, 0(r3)
+; CHECK-NEXT:    blr
+entry:
+  %0 = load i32, i32* @valInt, align 4
+  ret i32 %0
+}
+
+define dso_local void @WriteGlobalVarInt() local_unnamed_addr  {
+; CHECK-LABEL: WriteGlobalVarInt:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pld r3, valInt@got@pcrel(0), 1
+; CHECK-NEXT:    li r4, 33
+; CHECK-NEXT:    stw r4, 0(r3)
+; CHECK-NEXT:    blr
+entry:
+  store i32 33, i32* @valInt, align 4
+  ret void
+}
+
+define dso_local signext i32 @ReadGlobalVarUnsigned() local_unnamed_addr  {
+; CHECK-LABEL: ReadGlobalVarUnsigned:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pld r3, valUnsigned@got@pcrel(0), 1
+; CHECK-NEXT:    lwa r3, 0(r3)
+; CHECK-NEXT:    blr
+entry:
+  %0 = load i32, i32* @valUnsigned, align 4
+  ret i32 %0
+}
+
+define dso_local void @WriteGlobalVarUnsigned() local_unnamed_addr  {
+; CHECK-LABEL: WriteGlobalVarUnsigned:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pld r3, valUnsigned@got@pcrel(0), 1
+; CHECK-NEXT:    li r4, 33
+; CHECK-NEXT:    stw r4, 0(r3)
+; CHECK-NEXT:    blr
+entry:
+  store i32 33, i32* @valUnsigned, align 4
+  ret void
+}
+
+define dso_local signext i32 @ReadGlobalVarLong() local_unnamed_addr  {
+; CHECK-LABEL: ReadGlobalVarLong:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pld r3, valLong@got@pcrel(0), 1
+; CHECK-NEXT:    lwa r3, 0(r3)
+; CHECK-NEXT:    blr
+entry:
+  %0 = load i64, i64* @valLong, align 8
+  %conv = trunc i64 %0 to i32
+  ret i32 %conv
+}
+
+define dso_local void @WriteGlobalVarLong() local_unnamed_addr  {
+; CHECK-LABEL: WriteGlobalVarLong:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pld r3, valLong@got@pcrel(0), 1
+; CHECK-NEXT:    li r4, 3333
+; CHECK-NEXT:    std r4, 0(r3)
+; CHECK-NEXT:    blr
+entry:
+  store i64 3333, i64* @valLong, align 8
+  ret void
+}
+
+define dso_local i32* @ReadGlobalPtr() local_unnamed_addr  {
+; CHECK-LABEL: ReadGlobalPtr:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pld r3, ptr@got@pcrel(0), 1
+; CHECK-NEXT:    ld r3, 0(r3)
+; CHECK-NEXT:    blr
+entry:
+  %0 = load i32*, i32** @ptr, align 8
+  ret i32* %0
+}
+
+define dso_local void @WriteGlobalPtr() local_unnamed_addr  {
+; CHECK-LABEL: WriteGlobalPtr:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pld r3, ptr@got@pcrel(0), 1
+; CHECK-NEXT:    li r4, 3
+; CHECK-NEXT:    ld r3, 0(r3)
+; CHECK-NEXT:    stw r4, 0(r3)
+; CHECK-NEXT:    blr
+entry:
+  %0 = load i32*, i32** @ptr, align 8
+  store i32 3, i32* %0, align 4
+  ret void
+}
+
+define dso_local nonnull i32* @GlobalVarAddr() local_unnamed_addr  {
+; CHECK-LABEL: GlobalVarAddr:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pld r3, valInt@got@pcrel(0), 1
+; CHECK-NEXT:    blr
+entry:
+  ret i32* @valInt
+}
+
+define dso_local signext i32 @ReadGlobalArray() local_unnamed_addr  {
+; CHECK-LABEL: ReadGlobalArray:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pld r3, array@got@pcrel(0), 1
+; CHECK-NEXT:    lwa r3, 12(r3)
+; CHECK-NEXT:    blr
+entry:
+  %0 = load i32, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @array, i64 0, i64 3), align 4
+  ret i32 %0
+}
+
+define dso_local void @WriteGlobalArray() local_unnamed_addr  {
+; CHECK-LABEL: WriteGlobalArray:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pld r3, array@got@pcrel(0), 1
+; CHECK-NEXT:    li r4, 5
+; CHECK-NEXT:    stw r4, 12(r3)
+; CHECK-NEXT:    blr
+entry:
+  store i32 5, i32* getelementptr inbounds ([10 x i32], [10 x i32]* @array, i64 0, i64 3), align 4
+  ret void
+}
+
+define dso_local signext i32 @ReadGlobalStruct() local_unnamed_addr  {
+; CHECK-LABEL: ReadGlobalStruct:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pld r3, structure@got@pcrel(0), 1
+; CHECK-NEXT:    lwa r3, 4(r3)
+; CHECK-NEXT:    blr
+entry:
+  %0 = load i32, i32* getelementptr inbounds (%struct.Struct, %struct.Struct* @structure, i64 0, i32 2), align 4
+  ret i32 %0
+}
+
+define dso_local void @WriteGlobalStruct() local_unnamed_addr  {
+; CHECK-LABEL: WriteGlobalStruct:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pld r3, structure@got@pcrel(0), 1
+; CHECK-NEXT:    li r4, 3
+; CHECK-NEXT:    stw r4, 4(r3)
+; CHECK-NEXT:    blr
+entry:
+  store i32 3, i32* getelementptr inbounds (%struct.Struct, %struct.Struct* @structure, i64 0, i32 2), align 4
+  ret void
+}
+
+define dso_local void @ReadFuncPtr() local_unnamed_addr  {
+; CHECK-LABEL: ReadFuncPtr:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    mflr r0
+; CHECK-NEXT:    std r0, 16(r1)
+; CHECK-NEXT:    stdu r1, -32(r1)
+; CHECK-NEXT:    std r2, 24(r1)
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    .cfi_offset lr, 16
+; CHECK-NEXT:    pld r3, ptrfunc@got@pcrel(0), 1
+; CHECK-NEXT:    ld r12, 0(r3)
+; CHECK-NEXT:    mtctr r12
+; CHECK-NEXT:    bctrl
+; CHECK-NEXT:    ld 2, 24(r1)
+; CHECK-NEXT:    addi r1, r1, 32
+; CHECK-NEXT:    ld r0, 16(r1)
+; CHECK-NEXT:    mtlr r0
+; CHECK-NEXT:    blr
+entry:
+  %0 = load void ()*, void ()** bitcast (void (...)** @ptrfunc to void ()**), align 8
+  tail call void %0()
+  ret void
+}
+
+define dso_local void @WriteFuncPtr() local_unnamed_addr  {
+; CHECK-LABEL: WriteFuncPtr:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    pld r3, ptrfunc@got@pcrel(0), 1
+; CHECK-NEXT:    pld r4, function@got@pcrel(0), 1
+; CHECK-NEXT:    std r4, 0(r3)
+; CHECK-NEXT:    blr
+entry:
+  store void (...)* @function, void (...)** @ptrfunc, align 8
+  ret void
+}
+
+declare void @function(...)
+

From 5be767d489be9fe0f76f321902f492294444f424 Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <sjoerd.meijer@arm.com>
Date: Fri, 17 Apr 2020 17:08:11 +0100
Subject: [PATCH 183/216] NFC: remove outdated TODOs from ARM test file.

---
 .../LoopVectorize/ARM/tail-folding-counting-down.ll           | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-counting-down.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-counting-down.ll
index 1b3a0a065507..5fe68bf014cb 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-counting-down.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-counting-down.ll
@@ -53,9 +53,6 @@ while.end:
 define dso_local void @sgt_no_loopguard(i8* noalias nocapture readonly %a, i8* noalias nocapture readonly %b, i8* noalias nocapture %c, i32 %N) local_unnamed_addr #0 {
 ; COMMON-LABEL: @sgt_no_loopguard(
 ; COMMON:       vector.body:
-;
-; FIXME: I think this is currently miscompiled after D77635
-;
 ; CHECK-TF:     masked.load
 ; CHECK-TF:     masked.load
 ; CHECK-TF:     masked.store
@@ -255,7 +252,6 @@ while.end:
 define dso_local void @icmp_eq(i8* noalias nocapture readonly %A, i8* noalias nocapture readonly %B, i8* noalias nocapture %C, i32 %N) #0 {
 ; COMMON-LABEL: @icmp_eq
 ; COMMON:       vector.body:
-; TODO
 entry:
   %cmp6 = icmp eq i32 %N, 0
   br i1 %cmp6, label %while.end, label %while.body.preheader

From cabfcf840a9d15d92466c6774953d3aa399cde92 Mon Sep 17 00:00:00 2001
From: Petre-Ionut Tudor <petre-ionut.tudor@arm.com>
Date: Tue, 31 Mar 2020 17:27:45 +0100
Subject: [PATCH 184/216] [ARM] Fix conditions for lowering to S[LR]I

Summary:
Fixed wrong conditions for generating (S[LR]I X, Y, C2) from
(or (and X, BvecC1), (lsl Y, C2)) and added ISel nodes to lower to S[LR]I. The
optimisation is also enabled by default now.

Subscribers: kristof.beyls, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D77387
---
 .../Target/AArch64/AArch64ISelLowering.cpp    | 57 ++++++++++++-------
 llvm/lib/Target/AArch64/AArch64ISelLowering.h | 10 +++-
 llvm/lib/Target/AArch64/AArch64InstrInfo.td   | 14 +++--
 .../test/CodeGen/AArch64/arm64-sli-sri-opt.ll |  6 +-
 4 files changed, 57 insertions(+), 30 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index f285b4b6ecf6..a52dd21d44b1 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -99,11 +99,6 @@ STATISTIC(NumTailCalls, "Number of tail calls");
 STATISTIC(NumShiftInserts, "Number of vector shift inserts");
 STATISTIC(NumOptimizedImms, "Number of times immediates were optimized");
 
-static cl::opt<bool>
-EnableAArch64SlrGeneration("aarch64-shift-insert-generation", cl::Hidden,
-                           cl::desc("Allow AArch64 SLI/SRI formation"),
-                           cl::init(false));
-
 // FIXME: The necessary dtprel relocations don't seem to be supported
 // well in the GNU bfd and gold linkers at the moment. Therefore, by
 // default, for now, fall back to GeneralDynamic code generation.
@@ -1323,6 +1318,8 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
   case AArch64ISD::VSHL:              return "AArch64ISD::VSHL";
   case AArch64ISD::VLSHR:             return "AArch64ISD::VLSHR";
   case AArch64ISD::VASHR:             return "AArch64ISD::VASHR";
+  case AArch64ISD::VSLI:              return "AArch64ISD::VSLI";
+  case AArch64ISD::VSRI:              return "AArch64ISD::VSRI";
   case AArch64ISD::CMEQ:              return "AArch64ISD::CMEQ";
   case AArch64ISD::CMGE:              return "AArch64ISD::CMGE";
   case AArch64ISD::CMGT:              return "AArch64ISD::CMGT";
@@ -3145,6 +3142,23 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
           "llvm.eh.recoverfp must take a function as the first argument");
     return IncomingFPOp;
   }
+
+  case Intrinsic::aarch64_neon_vsri:
+  case Intrinsic::aarch64_neon_vsli: {
+    EVT Ty = Op.getValueType();
+
+    if (!Ty.isVector())
+      report_fatal_error("Unexpected type for aarch64_neon_vsli");
+
+    uint64_t ShiftAmount = Op.getConstantOperandVal(3);
+    unsigned ElemSizeInBits = Ty.getScalarSizeInBits();
+    assert(ShiftAmount <= ElemSizeInBits);
+
+    bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri;
+    unsigned Opcode = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
+    return DAG.getNode(Opcode, dl, Ty, Op.getOperand(1), Op.getOperand(2),
+                       Op.getOperand(3));
+  }
   }
 }
 
@@ -7893,8 +7907,9 @@ static unsigned getIntrinsicID(const SDNode *N) {
 
 // Attempt to form a vector S[LR]I from (or (and X, BvecC1), (lsl Y, C2)),
 // to (SLI X, Y, C2), where X and Y have matching vector types, BvecC1 is a
-// BUILD_VECTORs with constant element C1, C2 is a constant, and C1 == ~C2.
-// Also, logical shift right -> sri, with the same structure.
+// BUILD_VECTORs with constant element C1, C2 is a constant, and:
+//   - for the SLI case: C1 == Ones(ElemSizeInBits) >> (ElemSizeInBits - C2)
+//   - for the SRI case: C1 == Ones(ElemSizeInBits) << (ElemSizeInBits - C2)
 static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
   EVT VT = N->getValueType(0);
 
@@ -7927,25 +7942,27 @@ static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
   if (!isAllConstantBuildVector(And.getOperand(1), C1))
     return SDValue();
 
-  // Is C1 == ~C2, taking into account how much one can shift elements of a
-  // particular size?
+  // Is C1 == Ones(ElemSizeInBits) << (ElemSizeInBits - C2) or
+  // C1 == Ones(ElemSizeInBits) >> (ElemSizeInBits - C2), taking into account
+  // how much one can shift elements of a particular size?
   uint64_t C2 = C2node->getZExtValue();
   unsigned ElemSizeInBits = VT.getScalarSizeInBits();
   if (C2 > ElemSizeInBits)
     return SDValue();
   unsigned ElemMask = (1 << ElemSizeInBits) - 1;
-  if ((C1 & ElemMask) != (~C2 & ElemMask))
-    return SDValue();
+  if (IsShiftRight) {
+    if ((C1 & ElemMask) != ((ElemMask << (ElemSizeInBits - C2)) & ElemMask))
+      return SDValue();
+  } else {
+    if ((C1 & ElemMask) != ((ElemMask >> (ElemSizeInBits - C2)) & ElemMask))
+      return SDValue();
+  }
 
   SDValue X = And.getOperand(0);
   SDValue Y = Shift.getOperand(0);
 
-  unsigned Intrin =
-      IsShiftRight ? Intrinsic::aarch64_neon_vsri : Intrinsic::aarch64_neon_vsli;
-  SDValue ResultSLI =
-      DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, VT,
-                  DAG.getConstant(Intrin, DL, MVT::i32), X, Y,
-                  Shift.getOperand(1));
+  unsigned Inst = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;
+  SDValue ResultSLI = DAG.getNode(Inst, DL, VT, X, Y, Shift.getOperand(1));
 
   LLVM_DEBUG(dbgs() << "aarch64-lower: transformed: \n");
   LLVM_DEBUG(N->dump(&DAG));
@@ -7959,10 +7976,8 @@ static SDValue tryLowerToSLI(SDNode *N, SelectionDAG &DAG) {
 SDValue AArch64TargetLowering::LowerVectorOR(SDValue Op,
                                              SelectionDAG &DAG) const {
   // Attempt to form a vector S[LR]I from (or (and X, C1), (lsl Y, C2))
-  if (EnableAArch64SlrGeneration) {
-    if (SDValue Res = tryLowerToSLI(Op.getNode(), DAG))
-      return Res;
-  }
+  if (SDValue Res = tryLowerToSLI(Op.getNode(), DAG))
+    return Res;
 
   EVT VT = Op.getValueType();
 
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index a1405335b904..3b3fb40301e4 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -121,6 +121,10 @@ enum NodeType : unsigned {
   SRSHR_I,
   URSHR_I,
 
+  // Vector shift by constant and insert
+  VSLI,
+  VSRI,
+
   // Vector comparisons
   CMEQ,
   CMGE,
@@ -196,8 +200,10 @@ enum NodeType : unsigned {
   UMULL,
 
   // Reciprocal estimates and steps.
-  FRECPE, FRECPS,
-  FRSQRTE, FRSQRTS,
+  FRECPE,
+  FRECPS,
+  FRSQRTE,
+  FRSQRTS,
 
   SUNPKHI,
   SUNPKLO,
diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
index f634642a3b59..a0b53eacbc55 100644
--- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td
@@ -231,6 +231,10 @@ def SDT_AArch64ExtVec: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>,
                                           SDTCisSameAs<0,2>, SDTCisInt<3>]>;
 def SDT_AArch64vshift : SDTypeProfile<1, 2, [SDTCisSameAs<0,1>, SDTCisInt<2>]>;
 
+def SDT_AArch64vshiftinsert : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<3>,
+                                                 SDTCisSameAs<0,1>,
+                                                 SDTCisSameAs<0,2>]>;
+
 def SDT_AArch64unvec : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0,1>]>;
 def SDT_AArch64fcmpz : SDTypeProfile<1, 1, []>;
 def SDT_AArch64fcmp  : SDTypeProfile<1, 2, [SDTCisSameAs<1,2>]>;
@@ -469,6 +473,8 @@ def AArch64uqshli : SDNode<"AArch64ISD::UQSHL_I", SDT_AArch64vshift>;
 def AArch64sqshlui : SDNode<"AArch64ISD::SQSHLU_I", SDT_AArch64vshift>;
 def AArch64srshri : SDNode<"AArch64ISD::SRSHR_I", SDT_AArch64vshift>;
 def AArch64urshri : SDNode<"AArch64ISD::URSHR_I", SDT_AArch64vshift>;
+def AArch64vsli : SDNode<"AArch64ISD::VSLI", SDT_AArch64vshiftinsert>;
+def AArch64vsri : SDNode<"AArch64ISD::VSRI", SDT_AArch64vshiftinsert>;
 
 def AArch64not: SDNode<"AArch64ISD::NOT", SDT_AArch64unvec>;
 def AArch64bit: SDNode<"AArch64ISD::BIT", SDT_AArch64trivec>;
@@ -5828,8 +5834,8 @@ defm RSHRN   : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn",
 defm SHL     : SIMDVectorLShiftBHSD<0, 0b01010, "shl", AArch64vshl>;
 defm SHRN    : SIMDVectorRShiftNarrowBHS<0, 0b10000, "shrn",
                           BinOpFrag<(trunc (AArch64vashr node:$LHS, node:$RHS))>>;
-defm SLI     : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", int_aarch64_neon_vsli>;
-def : Pat<(v1i64 (int_aarch64_neon_vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
+defm SLI     : SIMDVectorLShiftBHSDTied<1, 0b01010, "sli", AArch64vsli>;
+def : Pat<(v1i64 (AArch64vsli (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
                                       (i32 vecshiftL64:$imm))),
           (SLId FPR64:$Rd, FPR64:$Rn, vecshiftL64:$imm)>;
 defm SQRSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10011, "sqrshrn",
@@ -5842,8 +5848,8 @@ defm SQSHRN  : SIMDVectorRShiftNarrowBHS<0, 0b10010, "sqshrn",
                                          int_aarch64_neon_sqshrn>;
 defm SQSHRUN : SIMDVectorRShiftNarrowBHS<1, 0b10000, "sqshrun",
                                          int_aarch64_neon_sqshrun>;
-defm SRI     : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", int_aarch64_neon_vsri>;
-def : Pat<(v1i64 (int_aarch64_neon_vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
+defm SRI     : SIMDVectorRShiftBHSDTied<1, 0b01000, "sri", AArch64vsri>;
+def : Pat<(v1i64 (AArch64vsri (v1i64 FPR64:$Rd), (v1i64 FPR64:$Rn),
                                       (i32 vecshiftR64:$imm))),
           (SRId FPR64:$Rd, FPR64:$Rn, vecshiftR64:$imm)>;
 defm SRSHR   : SIMDVectorRShiftBHSD<0, 0b00100, "srshr", AArch64srshri>;
diff --git a/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll b/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll
index b26542d759e4..204e857e45c8 100644
--- a/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll
@@ -1,9 +1,9 @@
-; RUN: llc < %s -aarch64-shift-insert-generation=true -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
+; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple | FileCheck %s
 
 define void @testLeftGood(<16 x i8> %src1, <16 x i8> %src2, <16 x i8>* %dest) nounwind {
 ; CHECK-LABEL: testLeftGood:
 ; CHECK: sli.16b v0, v1, #3
-  %and.i = and <16 x i8> %src1, <i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252>
+  %and.i = and <16 x i8> %src1, <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
   %vshl_n = shl <16 x i8> %src2, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   %result = or <16 x i8> %and.i, %vshl_n
   store <16 x i8> %result, <16 x i8>* %dest, align 16
@@ -23,7 +23,7 @@ define void @testLeftBad(<16 x i8> %src1, <16 x i8> %src2, <16 x i8>* %dest) nou
 define void @testRightGood(<16 x i8> %src1, <16 x i8> %src2, <16 x i8>* %dest) nounwind {
 ; CHECK-LABEL: testRightGood:
 ; CHECK: sri.16b v0, v1, #3
-  %and.i = and <16 x i8> %src1, <i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252>
+  %and.i = and <16 x i8> %src1, <i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224, i8 224>
   %vshl_n = lshr <16 x i8> %src2, <i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3, i8 3>
   %result = or <16 x i8> %and.i, %vshl_n
   store <16 x i8> %result, <16 x i8>* %dest, align 16

From a7e15b062672c5bd1b88b4be76bf8af430901c53 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne@apple.com>
Date: Mon, 13 Apr 2020 17:15:37 -0400
Subject: [PATCH 185/216] [libc++] List the set of __config_site macros used
 for features

Instead of creating Lit features for all __config_site macros automatically,
only do so for macros that generate features actually used in the test
suite. This makes it easier to know which ones are supported by the test
suite at a glance.

Note that the `libcpp-abi-version-vN` is dropped altogether, but it
wasn't used anywhere.
---
 libcxx/utils/libcxx/test/config.py | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/libcxx/utils/libcxx/test/config.py b/libcxx/utils/libcxx/test/config.py
index 1dfbfcb357f9..92833ceca567 100644
--- a/libcxx/utils/libcxx/test/config.py
+++ b/libcxx/utils/libcxx/test/config.py
@@ -629,22 +629,22 @@ def parse_config_site_and_add_features(self, header):
                 define += '=%s' % (feature_macros[m])
             self.cxx.modules_flags += [define]
         self.cxx.compile_flags += ['-Wno-macro-redefined']
-        # Transform each macro name into the feature name used in the tests.
+        # Transform the following macro names from __config_site into features
+        # that can be used in the tests.
         # Ex. _LIBCPP_HAS_NO_THREADS -> libcpp-has-no-threads
-        for m in feature_macros:
-            if m == '_LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS' or \
-               m == '_LIBCPP_HIDE_FROM_ABI_PER_TU_BY_DEFAULT':
-                continue
-            if m == '_LIBCPP_ABI_VERSION':
-                self.config.available_features.add('libcpp-abi-version-v%s'
-                    % feature_macros[m])
-                continue
-            if m == '_LIBCPP_NO_VCRUNTIME':
-                self.config.available_features.add('libcpp-no-vcruntime')
-                continue
-            assert m.startswith('_LIBCPP_HAS_') or m.startswith('_LIBCPP_ABI_')
-            m = m.lower()[1:].replace('_', '-')
-            self.config.available_features.add(m)
+        translate = {
+            '_LIBCPP_HAS_NO_GLOBAL_FILESYSTEM_NAMESPACE',
+            '_LIBCPP_HAS_NO_MONOTONIC_CLOCK',
+            '_LIBCPP_HAS_NO_STDIN',
+            '_LIBCPP_HAS_NO_STDOUT',
+            '_LIBCPP_HAS_NO_THREAD_UNSAFE_C_FUNCTIONS',
+            '_LIBCPP_HAS_NO_THREADS',
+            '_LIBCPP_HAS_THREAD_API_EXTERNAL',
+            '_LIBCPP_HAS_THREAD_API_PTHREAD',
+            '_LIBCPP_NO_VCRUNTIME'
+        }
+        for m in translate.intersection(feature_macros.keys()):
+            self.config.available_features.add(m.lower()[1:].replace('_', '-'))
         return feature_macros
 
 

From 0f1678cd086b8a1c5a8457b25f1dfff39ca9063f Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Thu, 16 Apr 2020 21:19:45 +0200
Subject: [PATCH 186/216] [PredicateInfo] Remove unused member (NFC)

PredicateInfo takes up a large amount of memory during IPSCCP
with many functions. And a large part of that space seems to
be going completely to waste here...
---
 llvm/include/llvm/Transforms/Utils/PredicateInfo.h | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/PredicateInfo.h b/llvm/include/llvm/Transforms/Utils/PredicateInfo.h
index a6623e231946..37231dce6b42 100644
--- a/llvm/include/llvm/Transforms/Utils/PredicateInfo.h
+++ b/llvm/include/llvm/Transforms/Utils/PredicateInfo.h
@@ -197,11 +197,8 @@ class PredicateInfo {
 private:
   // Used to store information about each value we might rename.
   struct ValueInfo {
-    // Information about each possible copy. During processing, this is each
-    // inserted info. After processing, we move the uninserted ones to the
-    // uninserted vector.
+    // Information about each possible copy.
     SmallVector<PredicateBase *, 4> Infos;
-    SmallVector<PredicateBase *, 4> UninsertedInfos;
   };
   // This owns the all the predicate infos in the function, placed or not.
   iplist<PredicateBase> AllInfos;

From 24cae17c283e1b4f4672b987d5b978b8fb997f75 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Wed, 15 Apr 2020 22:40:08 +0200
Subject: [PATCH 187/216] [MI] Reduce MachineInstr size (NFC)

Move CapOperands next to AsmPrinterFlags, to reduce size of
MachineInstr by 8 bytes.
---
 llvm/include/llvm/CodeGen/MachineInstr.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h
index 6c550dc331dd..48db14e6cd69 100644
--- a/llvm/include/llvm/CodeGen/MachineInstr.h
+++ b/llvm/include/llvm/CodeGen/MachineInstr.h
@@ -115,8 +115,6 @@ class MachineInstr
   // Operands are allocated by an ArrayRecycler.
   MachineOperand *Operands = nullptr;   // Pointer to the first operand.
   unsigned NumOperands = 0;             // Number of operands on instruction.
-  using OperandCapacity = ArrayRecycler<MachineOperand>::Capacity;
-  OperandCapacity CapOperands;          // Capacity of the Operands array.
 
   uint16_t Flags = 0;                   // Various bits of additional
                                         // information about machine
@@ -129,6 +127,11 @@ class MachineInstr
                                         // anything other than to convey comment
                                         // information to AsmPrinter.
 
+  // OperandCapacity has uint8_t size, so it should be next to AsmPrinterFlags
+  // to properly pack.
+  using OperandCapacity = ArrayRecycler<MachineOperand>::Capacity;
+  OperandCapacity CapOperands;          // Capacity of the Operands array.
+
   /// Internal implementation detail class that provides out-of-line storage for
   /// extra info used by the machine instruction when this info cannot be stored
   /// in-line within the instruction itself.

From f715eda60403194bf844a169587726744d0e9cd7 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Tue, 24 Mar 2020 20:31:13 +0100
Subject: [PATCH 188/216] [LVI] Cleanup/unify cache access

This patch combines the "has" and "get" parts of the cache access.
getCachedValueInfo() now both sets the BBLV return argument, and
returns whether the value was found.

Additionally, the management of the work stack is now integrated
into getBlockValue(). If the value is not cached yet, we try to
push to the stack (and return false, indicating that we need to
solve first), or return overdefined in case of a cycle.

These changes a) avoid a duplicate cache lookup for has & get and
b) ensure that the logic is uniform everywhere. For this reason
this change is also not quite NFC, because previously overdefined
values from the cache, and overdefined values from a cycle received
different treatment when it came to assumption intersection.

Differential Revision: https://reviews.llvm.org/D76788
---
 llvm/lib/Analysis/LazyValueInfo.cpp | 135 ++++++++++++----------------
 1 file changed, 58 insertions(+), 77 deletions(-)

diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp
index c2d0de3f15a4..bbbfe7c7eef9 100644
--- a/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -211,28 +211,21 @@ namespace {
       return ODI->second.count(V);
     }
 
-    bool hasCachedValueInfo(Value *V, BasicBlock *BB) const {
-      if (isOverdefined(V, BB))
+    bool getCachedValueInfo(ValueLatticeElement &BBLV, Value *V,
+                            BasicBlock *BB) const {
+      if (isOverdefined(V, BB)) {
+        BBLV = ValueLatticeElement::getOverdefined();
         return true;
+      }
 
       auto I = ValueCache.find_as(V);
       if (I == ValueCache.end())
         return false;
-
-      return I->second->BlockVals.count(BB);
-    }
-
-    ValueLatticeElement getCachedValueInfo(Value *V, BasicBlock *BB) const {
-      if (isOverdefined(V, BB))
-        return ValueLatticeElement::getOverdefined();
-
-      auto I = ValueCache.find_as(V);
-      if (I == ValueCache.end())
-        return ValueLatticeElement();
       auto BBI = I->second->BlockVals.find(BB);
       if (BBI == I->second->BlockVals.end())
-        return ValueLatticeElement();
-      return BBI->second;
+        return false;
+      BBLV = BBI->second;
+      return true;
     }
 
     /// clear - Empty the cache.
@@ -409,10 +402,9 @@ namespace {
     DominatorTree *DT;    ///< An optional DT pointer.
     DominatorTree *DisabledDT; ///< Stores DT if it's disabled.
 
-  ValueLatticeElement getBlockValue(Value *Val, BasicBlock *BB);
+  bool getBlockValue(ValueLatticeElement &Result, Value *Val, BasicBlock *BB);
   bool getEdgeValue(Value *V, BasicBlock *F, BasicBlock *T,
                     ValueLatticeElement &Result, Instruction *CxtI = nullptr);
-  bool hasBlockValue(Value *Val, BasicBlock *BB);
 
   // These methods process one work item and may add more. A false value
   // returned means that the work item was not completely processed and must
@@ -547,12 +539,14 @@ void LazyValueInfoImpl::solve() {
     if (solveBlockValue(e.second, e.first)) {
       // The work item was completely processed.
       assert(BlockValueStack.back() == e && "Nothing should have been pushed!");
-      assert(TheCache.hasCachedValueInfo(e.second, e.first) &&
+#ifndef NDEBUG
+      ValueLatticeElement BBLV;
+      assert(TheCache.getCachedValueInfo(BBLV, e.second, e.first) &&
              "Result should be in cache!");
-
       LLVM_DEBUG(
           dbgs() << "POP " << *e.second << " in " << e.first->getName() << " = "
-                 << TheCache.getCachedValueInfo(e.second, e.first) << "\n");
+                 << BBLV << "\n");
+#endif
 
       BlockValueStack.pop_back();
       BlockValueSet.erase(e);
@@ -563,21 +557,25 @@ void LazyValueInfoImpl::solve() {
   }
 }
 
-bool LazyValueInfoImpl::hasBlockValue(Value *Val, BasicBlock *BB) {
+bool LazyValueInfoImpl::getBlockValue(ValueLatticeElement &BBLV,
+                                      Value *Val, BasicBlock *BB) {
   // If already a constant, there is nothing to compute.
-  if (isa<Constant>(Val))
+  if (Constant *VC = dyn_cast<Constant>(Val)) {
+    BBLV = ValueLatticeElement::get(VC);
     return true;
+  }
 
-  return TheCache.hasCachedValueInfo(Val, BB);
-}
+  if (TheCache.getCachedValueInfo(BBLV, Val, BB))
+    return true;
 
-ValueLatticeElement LazyValueInfoImpl::getBlockValue(Value *Val,
-                                                     BasicBlock *BB) {
-  // If already a constant, there is nothing to compute.
-  if (Constant *VC = dyn_cast<Constant>(Val))
-    return ValueLatticeElement::get(VC);
+  // We have hit a cycle, assume overdefined.
+  if (!pushBlockValue({ BB, Val })) {
+    BBLV = ValueLatticeElement::getOverdefined();
+    return true;
+  }
 
-  return TheCache.getCachedValueInfo(Val, BB);
+  // Yet to be resolved.
+  return false;
 }
 
 static ValueLatticeElement getFromRangeMetadata(Instruction *BBI) {
@@ -598,9 +596,12 @@ static ValueLatticeElement getFromRangeMetadata(Instruction *BBI) {
 }
 
 bool LazyValueInfoImpl::solveBlockValue(Value *Val, BasicBlock *BB) {
+#ifndef NDEBUG
+  ValueLatticeElement BBLV;
   assert(!isa<Constant>(Val) && "Value should not be constant");
-  assert(!TheCache.hasCachedValueInfo(Val, BB) &&
+  assert(!TheCache.getCachedValueInfo(BBLV, Val, BB) &&
          "Value should not be in cache");
+#endif
 
   // Hold off inserting this value into the Cache in case we have to return
   // false and come back later.
@@ -851,13 +852,10 @@ bool LazyValueInfoImpl::solveBlockValueSelect(ValueLatticeElement &BBLV,
                                               SelectInst *SI, BasicBlock *BB) {
 
   // Recurse on our inputs if needed
-  if (!hasBlockValue(SI->getTrueValue(), BB)) {
-    if (pushBlockValue(std::make_pair(BB, SI->getTrueValue())))
-      return false;
-    BBLV = ValueLatticeElement::getOverdefined();
-    return true;
-  }
-  ValueLatticeElement TrueVal = getBlockValue(SI->getTrueValue(), BB);
+  ValueLatticeElement TrueVal;
+  if (!getBlockValue(TrueVal, SI->getTrueValue(), BB))
+    return false;
+
   // If we hit overdefined, don't ask more queries.  We want to avoid poisoning
   // extra slots in the table if we can.
   if (TrueVal.isOverdefined()) {
@@ -865,13 +863,10 @@ bool LazyValueInfoImpl::solveBlockValueSelect(ValueLatticeElement &BBLV,
     return true;
   }
 
-  if (!hasBlockValue(SI->getFalseValue(), BB)) {
-    if (pushBlockValue(std::make_pair(BB, SI->getFalseValue())))
-      return false;
-    BBLV = ValueLatticeElement::getOverdefined();
-    return true;
-  }
-  ValueLatticeElement FalseVal = getBlockValue(SI->getFalseValue(), BB);
+  ValueLatticeElement FalseVal;
+  if (!getBlockValue(FalseVal, SI->getFalseValue(), BB))
+    return false;
+
   // If we hit overdefined, don't ask more queries.  We want to avoid poisoning
   // extra slots in the table if we can.
   if (FalseVal.isOverdefined()) {
@@ -998,20 +993,17 @@ bool LazyValueInfoImpl::solveBlockValueSelect(ValueLatticeElement &BBLV,
 Optional<ConstantRange> LazyValueInfoImpl::getRangeForOperand(unsigned Op,
                                                               Instruction *I,
                                                               BasicBlock *BB) {
-  if (!hasBlockValue(I->getOperand(Op), BB))
-    if (pushBlockValue(std::make_pair(BB, I->getOperand(Op))))
-      return None;
+  ValueLatticeElement Val;
+  if (!getBlockValue(Val, I->getOperand(Op), BB))
+    return None;
+
+  intersectAssumeOrGuardBlockValueConstantRange(I->getOperand(Op), Val, I);
+  if (Val.isConstantRange())
+    return Val.getConstantRange();
 
   const unsigned OperandBitWidth =
     DL.getTypeSizeInBits(I->getOperand(Op)->getType());
-  ConstantRange Range = ConstantRange::getFull(OperandBitWidth);
-  if (hasBlockValue(I->getOperand(Op), BB)) {
-    ValueLatticeElement Val = getBlockValue(I->getOperand(Op), BB);
-    intersectAssumeOrGuardBlockValueConstantRange(I->getOperand(Op), Val, I);
-    if (Val.isConstantRange())
-      Range = Val.getConstantRange();
-  }
-  return Range;
+  return ConstantRange::getFull(OperandBitWidth);
 }
 
 bool LazyValueInfoImpl::solveBlockValueCast(ValueLatticeElement &BBLV,
@@ -1172,16 +1164,8 @@ bool LazyValueInfoImpl::solveBlockValueExtractValue(
   // based on replaced with.overflow intrinsics.
   if (Value *V = SimplifyExtractValueInst(
           EVI->getAggregateOperand(), EVI->getIndices(),
-          EVI->getModule()->getDataLayout())) {
-    if (!hasBlockValue(V, BB)) {
-      if (pushBlockValue({ BB, V }))
-        return false;
-      BBLV = ValueLatticeElement::getOverdefined();
-      return true;
-    }
-    BBLV = getBlockValue(V, BB);
-    return true;
-  }
+          EVI->getModule()->getDataLayout()))
+    return getBlockValue(BBLV, V, BB);
 
   LLVM_DEBUG(dbgs() << " compute BB '" << BB->getName()
                     << "' - overdefined (unknown extractvalue).\n");
@@ -1524,16 +1508,11 @@ bool LazyValueInfoImpl::getEdgeValue(Value *Val, BasicBlock *BBFrom,
     return true;
   }
 
-  if (!hasBlockValue(Val, BBFrom)) {
-    if (pushBlockValue(std::make_pair(BBFrom, Val)))
-      return false;
-    // No new information.
-    Result = LocalResult;
-    return true;
-  }
+  ValueLatticeElement InBlock;
+  if (!getBlockValue(InBlock, Val, BBFrom))
+    return false;
 
   // Try to intersect ranges of the BB and the constraint on the edge.
-  ValueLatticeElement InBlock = getBlockValue(Val, BBFrom);
   intersectAssumeOrGuardBlockValueConstantRange(Val, InBlock,
                                                 BBFrom->getTerminator());
   // We can use the context instruction (generically the ultimate instruction
@@ -1556,11 +1535,13 @@ ValueLatticeElement LazyValueInfoImpl::getValueInBlock(Value *V, BasicBlock *BB,
                     << BB->getName() << "'\n");
 
   assert(BlockValueStack.empty() && BlockValueSet.empty());
-  if (!hasBlockValue(V, BB)) {
-    pushBlockValue(std::make_pair(BB, V));
+  ValueLatticeElement Result;
+  if (!getBlockValue(Result, V, BB)) {
     solve();
+    bool ValueAvailable = getBlockValue(Result, V, BB);
+    (void) ValueAvailable;
+    assert(ValueAvailable && "Value not available after solving");
   }
-  ValueLatticeElement Result = getBlockValue(V, BB);
   intersectAssumeOrGuardBlockValueConstantRange(V, Result, CxtI);
 
   LLVM_DEBUG(dbgs() << "  Result = " << Result << "\n");

From b91f78db370bb8161472acd75a67916d033c3348 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Fri, 17 Apr 2020 09:48:11 -0700
Subject: [PATCH 189/216] [CallSite removal][MemCpyOptimizer] Replace CallSite
 with CallBase. NFC

There are also some adjustments to use MaybeAlign in here due
to CallBase::getParamAlignment() being deprecated. It would
be cleaner if getOrEnforceKnownAlignment was migrated
to Align/MaybeAlign.

Differential Revision: https://reviews.llvm.org/D78345
---
 llvm/include/llvm/Transforms/IPO/Attributor.h |  27 +--
 .../llvm/Transforms/Scalar/MemCpyOptimizer.h  |   3 +-
 llvm/lib/Transforms/IPO/Attributor.cpp        |  91 +++++----
 .../Transforms/IPO/AttributorAttributes.cpp   | 174 +++++++++---------
 .../lib/Transforms/Scalar/MemCpyOptimizer.cpp |  34 ++--
 5 files changed, 153 insertions(+), 176 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 2363a74d211b..99989775bde6 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -217,23 +217,6 @@ struct IRPosition {
     return IRPosition(const_cast<CallBase &>(CB), Kind(ArgNo));
   }
 
-  /// Create a position describing the function scope of \p ICS.
-  static const IRPosition callsite_function(ImmutableCallSite ICS) {
-    return IRPosition::callsite_function(cast<CallBase>(*ICS.getInstruction()));
-  }
-
-  /// Create a position describing the returned value of \p ICS.
-  static const IRPosition callsite_returned(ImmutableCallSite ICS) {
-    return IRPosition::callsite_returned(cast<CallBase>(*ICS.getInstruction()));
-  }
-
-  /// Create a position describing the argument of \p ICS at position \p ArgNo.
-  static const IRPosition callsite_argument(ImmutableCallSite ICS,
-                                            unsigned ArgNo) {
-    return IRPosition::callsite_argument(cast<CallBase>(*ICS.getInstruction()),
-                                         ArgNo);
-  }
-
   /// Create a position describing the argument of \p ACS at position \p ArgNo.
   static const IRPosition callsite_argument(AbstractCallSite ACS,
                                             unsigned ArgNo) {
@@ -418,9 +401,9 @@ struct IRPosition {
       return;
 
     AttributeList AttrList;
-    CallSite CS = CallSite(&getAnchorValue());
-    if (CS)
-      AttrList = CS.getAttributes();
+    auto *CB = dyn_cast<CallBase>(&getAnchorValue());
+    if (CB)
+      AttrList = CB->getAttributes();
     else
       AttrList = getAssociatedFunction()->getAttributes();
 
@@ -428,8 +411,8 @@ struct IRPosition {
     for (Attribute::AttrKind AK : AKs)
       AttrList = AttrList.removeAttribute(Ctx, getAttrIdx(), AK);
 
-    if (CS)
-      CS.setAttributes(AttrList);
+    if (CB)
+      CB->setAttributes(AttrList);
     else
       getAssociatedFunction()->setAttributes(AttrList);
   }
diff --git a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
index 5386f58b2b82..41180c5c678d 100644
--- a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
+++ b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
@@ -16,7 +16,6 @@
 
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
 #include "llvm/IR/PassManager.h"
 #include <cstdint>
 #include <functional>
@@ -66,7 +65,7 @@ class MemCpyOptPass : public PassInfoMixin<MemCpyOptPass> {
   bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep);
   bool processMemSetMemCpyDependence(MemCpyInst *M, MemSetInst *MDep);
   bool performMemCpyToMemSetOptzn(MemCpyInst *M, MemSetInst *MDep);
-  bool processByValArgument(CallSite CS, unsigned ArgNo);
+  bool processByValArgument(CallBase &CB, unsigned ArgNo);
   Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr,
                                     Value *ByteVal);
 
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index da1cbcc90be5..9304a923546a 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -228,7 +228,7 @@ IRAttributeManifest::manifestAttrs(Attributor &A, const IRPosition &IRP,
   case IRPosition::IRP_CALL_SITE:
   case IRPosition::IRP_CALL_SITE_RETURNED:
   case IRPosition::IRP_CALL_SITE_ARGUMENT:
-    Attrs = ImmutableCallSite(&IRP.getAnchorValue()).getAttributes();
+    Attrs = cast<CallBase>(IRP.getAnchorValue()).getAttributes();
     break;
   }
 
@@ -253,7 +253,7 @@ IRAttributeManifest::manifestAttrs(Attributor &A, const IRPosition &IRP,
   case IRPosition::IRP_CALL_SITE:
   case IRPosition::IRP_CALL_SITE_RETURNED:
   case IRPosition::IRP_CALL_SITE_ARGUMENT:
-    CallSite(&IRP.getAnchorValue()).setAttributes(Attrs);
+    cast<CallBase>(IRP.getAnchorValue()).setAttributes(Attrs);
     break;
   case IRPosition::IRP_INVALID:
   case IRPosition::IRP_FLOAT:
@@ -269,7 +269,7 @@ const IRPosition IRPosition::TombstoneKey(256);
 SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) {
   IRPositions.emplace_back(IRP);
 
-  ImmutableCallSite ICS(&IRP.getAnchorValue());
+  const auto *CB = dyn_cast<CallBase>(&IRP.getAnchorValue());
   switch (IRP.getPositionKind()) {
   case IRPosition::IRP_INVALID:
   case IRPosition::IRP_FLOAT:
@@ -280,41 +280,40 @@ SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) {
     IRPositions.emplace_back(IRPosition::function(*IRP.getAnchorScope()));
     return;
   case IRPosition::IRP_CALL_SITE:
-    assert(ICS && "Expected call site!");
+    assert(CB && "Expected call site!");
     // TODO: We need to look at the operand bundles similar to the redirection
     //       in CallBase.
-    if (!ICS.hasOperandBundles())
-      if (const Function *Callee = ICS.getCalledFunction())
+    if (!CB->hasOperandBundles())
+      if (const Function *Callee = CB->getCalledFunction())
         IRPositions.emplace_back(IRPosition::function(*Callee));
     return;
   case IRPosition::IRP_CALL_SITE_RETURNED:
-    assert(ICS && "Expected call site!");
+    assert(CB && "Expected call site!");
     // TODO: We need to look at the operand bundles similar to the redirection
     //       in CallBase.
-    if (!ICS.hasOperandBundles()) {
-      if (const Function *Callee = ICS.getCalledFunction()) {
+    if (!CB->hasOperandBundles()) {
+      if (const Function *Callee = CB->getCalledFunction()) {
         IRPositions.emplace_back(IRPosition::returned(*Callee));
         IRPositions.emplace_back(IRPosition::function(*Callee));
         for (const Argument &Arg : Callee->args())
           if (Arg.hasReturnedAttr()) {
             IRPositions.emplace_back(
-                IRPosition::callsite_argument(ICS, Arg.getArgNo()));
+                IRPosition::callsite_argument(*CB, Arg.getArgNo()));
             IRPositions.emplace_back(
-                IRPosition::value(*ICS.getArgOperand(Arg.getArgNo())));
+                IRPosition::value(*CB->getArgOperand(Arg.getArgNo())));
             IRPositions.emplace_back(IRPosition::argument(Arg));
           }
       }
     }
-    IRPositions.emplace_back(
-        IRPosition::callsite_function(cast<CallBase>(*ICS.getInstruction())));
+    IRPositions.emplace_back(IRPosition::callsite_function(*CB));
     return;
   case IRPosition::IRP_CALL_SITE_ARGUMENT: {
     int ArgNo = IRP.getArgNo();
-    assert(ICS && ArgNo >= 0 && "Expected call site!");
+    assert(CB && ArgNo >= 0 && "Expected call site!");
     // TODO: We need to look at the operand bundles similar to the redirection
     //       in CallBase.
-    if (!ICS.hasOperandBundles()) {
-      const Function *Callee = ICS.getCalledFunction();
+    if (!CB->hasOperandBundles()) {
+      const Function *Callee = CB->getCalledFunction();
       if (Callee && Callee->arg_size() > unsigned(ArgNo))
         IRPositions.emplace_back(IRPosition::argument(*Callee->getArg(ArgNo)));
       if (Callee)
@@ -369,8 +368,8 @@ bool IRPosition::getAttrsFromIRAttr(Attribute::AttrKind AK,
     return false;
 
   AttributeList AttrList;
-  if (ImmutableCallSite ICS = ImmutableCallSite(&getAnchorValue()))
-    AttrList = ICS.getAttributes();
+  if (const auto *CB = dyn_cast<CallBase>(&getAnchorValue()))
+    AttrList = CB->getAttributes();
   else
     AttrList = getAssociatedFunction()->getAttributes();
 
@@ -510,12 +509,12 @@ bool Attributor::isAssumedDead(const Use &U,
     return isAssumedDead(IRPosition::value(*U.get()), QueryingAA, FnLivenessAA,
                          CheckBBLivenessOnly, DepClass);
 
-  if (CallSite CS = CallSite(UserI)) {
+  if (auto *CB = dyn_cast<CallBase>(UserI)) {
     // For call site argument uses we can check if the argument is
     // unused/dead.
-    if (CS.isArgOperand(&U)) {
+    if (CB->isArgOperand(&U)) {
       const IRPosition &CSArgPos =
-          IRPosition::callsite_argument(CS, CS.getArgumentNo(&U));
+          IRPosition::callsite_argument(*CB, CB->getArgOperandNo(&U));
       return isAssumedDead(CSArgPos, QueryingAA, FnLivenessAA,
                            CheckBBLivenessOnly, DepClass);
     }
@@ -1617,8 +1616,8 @@ void InformationCache::initializeInformationCache(const Function &CF,
     // Note: There are no concrete attributes now so this is initially empty.
     switch (I.getOpcode()) {
     default:
-      assert((!ImmutableCallSite(&I)) && (!isa<CallBase>(&I)) &&
-             "New call site/base instruction type needs to be known in the "
+      assert(!isa<CallBase>(&I) &&
+             "New call base instruction type needs to be known in the "
              "Attributor.");
       break;
     case Instruction::Call:
@@ -1687,8 +1686,8 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
   InformationCache::FunctionInfo &FI = InfoCache.getFunctionInfo(F);
   if (!isModulePass() && !FI.CalledViaMustTail) {
     for (const Use &U : F.uses())
-      if (ImmutableCallSite ICS = ImmutableCallSite(U.getUser()))
-        if (ICS.isCallee(&U) && ICS.isMustTailCall())
+      if (const auto *CB = dyn_cast<CallBase>(U.getUser()))
+        if (CB->isCallee(&U) && CB->isMustTailCall())
           FI.CalledViaMustTail = true;
   }
 
@@ -1800,14 +1799,14 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
   }
 
   auto CallSitePred = [&](Instruction &I) -> bool {
-    CallSite CS(&I);
-    IRPosition CSRetPos = IRPosition::callsite_returned(CS);
+    auto *CB = dyn_cast<CallBase>(&I);
+    IRPosition CBRetPos = IRPosition::callsite_returned(*CB);
 
     // Call sites might be dead if they do not have side effects and no live
     // users. The return value might be dead if there are no live users.
-    getOrCreateAAFor<AAIsDead>(CSRetPos);
+    getOrCreateAAFor<AAIsDead>(CBRetPos);
 
-    Function *Callee = CS.getCalledFunction();
+    Function *Callee = CB->getCalledFunction();
     // TODO: Even if the callee is not known now we might be able to simplify
     //       the call/callee.
     if (!Callee)
@@ -1819,46 +1818,46 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
         !Callee->hasMetadata(LLVMContext::MD_callback))
       return true;
 
-    if (!Callee->getReturnType()->isVoidTy() && !CS->use_empty()) {
+    if (!Callee->getReturnType()->isVoidTy() && !CB->use_empty()) {
 
-      IRPosition CSRetPos = IRPosition::callsite_returned(CS);
+      IRPosition CBRetPos = IRPosition::callsite_returned(*CB);
 
       // Call site return integer values might be limited by a constant range.
       if (Callee->getReturnType()->isIntegerTy())
-        getOrCreateAAFor<AAValueConstantRange>(CSRetPos);
+        getOrCreateAAFor<AAValueConstantRange>(CBRetPos);
     }
 
-    for (int i = 0, e = CS.getNumArgOperands(); i < e; i++) {
+    for (int i = 0, e = CB->getNumArgOperands(); i < e; i++) {
 
-      IRPosition CSArgPos = IRPosition::callsite_argument(CS, i);
+      IRPosition CBArgPos = IRPosition::callsite_argument(*CB, i);
 
       // Every call site argument might be dead.
-      getOrCreateAAFor<AAIsDead>(CSArgPos);
+      getOrCreateAAFor<AAIsDead>(CBArgPos);
 
       // Call site argument might be simplified.
-      getOrCreateAAFor<AAValueSimplify>(CSArgPos);
+      getOrCreateAAFor<AAValueSimplify>(CBArgPos);
 
-      if (!CS.getArgument(i)->getType()->isPointerTy())
+      if (!CB->getArgOperand(i)->getType()->isPointerTy())
         continue;
 
       // Call site argument attribute "non-null".
-      getOrCreateAAFor<AANonNull>(CSArgPos);
+      getOrCreateAAFor<AANonNull>(CBArgPos);
 
       // Call site argument attribute "no-alias".
-      getOrCreateAAFor<AANoAlias>(CSArgPos);
+      getOrCreateAAFor<AANoAlias>(CBArgPos);
 
       // Call site argument attribute "dereferenceable".
-      getOrCreateAAFor<AADereferenceable>(CSArgPos);
+      getOrCreateAAFor<AADereferenceable>(CBArgPos);
 
       // Call site argument attribute "align".
-      getOrCreateAAFor<AAAlign>(CSArgPos);
+      getOrCreateAAFor<AAAlign>(CBArgPos);
 
       // Call site argument attribute
       // "readnone/readonly/writeonly/..."
-      getOrCreateAAFor<AAMemoryBehavior>(CSArgPos);
+      getOrCreateAAFor<AAMemoryBehavior>(CBArgPos);
 
       // Call site argument attribute "nofree".
-      getOrCreateAAFor<AANoFree>(CSArgPos);
+      getOrCreateAAFor<AANoFree>(CBArgPos);
     }
     return true;
   };
@@ -1983,9 +1982,9 @@ static bool runAttributorOnFunctions(InformationCache &InfoCache,
     // do it eagerly.
     if (F->hasLocalLinkage()) {
       if (llvm::all_of(F->uses(), [&Functions](const Use &U) {
-            ImmutableCallSite ICS(U.getUser());
-            return ICS && ICS.isCallee(&U) &&
-                   Functions.count(const_cast<Function *>(ICS.getCaller()));
+            const auto *CB = dyn_cast<CallBase>(U.getUser());
+            return CB && CB->isCallee(&U) &&
+                   Functions.count(const_cast<Function *>(CB->getCaller()));
           }))
         continue;
     }
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 9ea314f06888..9c58b3f91fc8 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -280,11 +280,11 @@ static bool genericValueTraversal(
     if (V->getType()->isPointerTy()) {
       NewV = V->stripPointerCasts();
     } else {
-      CallSite CS(V);
-      if (CS && CS.getCalledFunction()) {
-        for (Argument &Arg : CS.getCalledFunction()->args())
+      auto *CB = dyn_cast<CallBase>(V);
+      if (CB && CB->getCalledFunction()) {
+        for (Argument &Arg : CB->getCalledFunction()->args())
           if (Arg.hasReturnedAttr()) {
-            NewV = CS.getArgOperand(Arg.getArgNo());
+            NewV = CB->getArgOperand(Arg.getArgNo());
             break;
           }
       }
@@ -688,9 +688,9 @@ struct AANoUnwindImpl : AANoUnwind {
       if (!I.mayThrow())
         return true;
 
-      if (ImmutableCallSite ICS = ImmutableCallSite(&I)) {
+      if (const auto *CB = dyn_cast<CallBase>(&I)) {
         const auto &NoUnwindAA =
-            A.getAAFor<AANoUnwind>(*this, IRPosition::callsite_function(ICS));
+            A.getAAFor<AANoUnwind>(*this, IRPosition::callsite_function(*CB));
         return NoUnwindAA.isAssumedNoUnwind();
       }
       return false;
@@ -1273,8 +1273,7 @@ bool AANoSyncImpl::isNoSyncIntrinsic(Instruction *I) {
 }
 
 bool AANoSyncImpl::isVolatile(Instruction *I) {
-  assert(!ImmutableCallSite(I) && !isa<CallBase>(I) &&
-         "Calls should not be checked here");
+  assert(!isa<CallBase>(I) && "Calls should not be checked here");
 
   switch (I->getOpcode()) {
   case Instruction::AtomicRMW:
@@ -1299,12 +1298,12 @@ ChangeStatus AANoSyncImpl::updateImpl(Attributor &A) {
     if (isa<IntrinsicInst>(&I) && isNoSyncIntrinsic(&I))
       return true;
 
-    if (ImmutableCallSite ICS = ImmutableCallSite(&I)) {
-      if (ICS.hasFnAttr(Attribute::NoSync))
+    if (const auto *CB = dyn_cast<CallBase>(&I)) {
+      if (CB->hasFnAttr(Attribute::NoSync))
         return true;
 
       const auto &NoSyncAA =
-          A.getAAFor<AANoSync>(*this, IRPosition::callsite_function(ICS));
+          A.getAAFor<AANoSync>(*this, IRPosition::callsite_function(*CB));
       if (NoSyncAA.isAssumedNoSync())
         return true;
       return false;
@@ -1323,7 +1322,7 @@ ChangeStatus AANoSyncImpl::updateImpl(Attributor &A) {
       return true;
 
     // non-convergent and readnone imply nosync.
-    return !ImmutableCallSite(&I).isConvergent();
+    return !cast<CallBase>(I).isConvergent();
   };
 
   if (!A.checkForAllReadWriteInstructions(CheckRWInstForNoSync, *this) ||
@@ -1377,12 +1376,12 @@ struct AANoFreeImpl : public AANoFree {
   /// See AbstractAttribute::updateImpl(...).
   ChangeStatus updateImpl(Attributor &A) override {
     auto CheckForNoFree = [&](Instruction &I) {
-      ImmutableCallSite ICS(&I);
-      if (ICS.hasFnAttr(Attribute::NoFree))
+      const auto &CB = cast<CallBase>(I);
+      if (CB.hasFnAttr(Attribute::NoFree))
         return true;
 
       const auto &NoFreeAA =
-          A.getAAFor<AANoFree>(*this, IRPosition::callsite_function(ICS));
+          A.getAAFor<AANoFree>(*this, IRPosition::callsite_function(CB));
       return NoFreeAA.isAssumedNoFree();
     };
 
@@ -1559,17 +1558,17 @@ static int64_t getKnownNonNullAndDerefBytesForUse(
   bool NullPointerIsDefined =
       F ? llvm::NullPointerIsDefined(F, PtrTy->getPointerAddressSpace()) : true;
   const DataLayout &DL = A.getInfoCache().getDL();
-  if (ImmutableCallSite ICS = ImmutableCallSite(I)) {
-    if (ICS.isBundleOperand(U))
+  if (const auto *CB = dyn_cast<CallBase>(I)) {
+    if (CB->isBundleOperand(U))
       return 0;
 
-    if (ICS.isCallee(U)) {
+    if (CB->isCallee(U)) {
       IsNonNull |= !NullPointerIsDefined;
       return 0;
     }
 
-    unsigned ArgNo = ICS.getArgumentNo(U);
-    IRPosition IRP = IRPosition::callsite_argument(ICS, ArgNo);
+    unsigned ArgNo = CB->getArgOperandNo(U);
+    IRPosition IRP = IRPosition::callsite_argument(*CB, ArgNo);
     // As long as we only use known information there is no need to track
     // dependences here.
     auto &DerefAA = A.getAAFor<AADereferenceable>(QueryingAA, IRP,
@@ -1803,17 +1802,17 @@ struct AANoRecurseFunction final : AANoRecurseImpl {
 
     // If the above check does not hold anymore we look at the calls.
     auto CheckForNoRecurse = [&](Instruction &I) {
-      ImmutableCallSite ICS(&I);
-      if (ICS.hasFnAttr(Attribute::NoRecurse))
+      const auto &CB = cast<CallBase>(I);
+      if (CB.hasFnAttr(Attribute::NoRecurse))
         return true;
 
       const auto &NoRecurseAA =
-          A.getAAFor<AANoRecurse>(*this, IRPosition::callsite_function(ICS));
+          A.getAAFor<AANoRecurse>(*this, IRPosition::callsite_function(CB));
       if (!NoRecurseAA.isAssumedNoRecurse())
         return false;
 
       // Recursion to the same function
-      if (ICS.getCalledFunction() == getAnchorScope())
+      if (CB.getCalledFunction() == getAnchorScope())
         return false;
 
       return true;
@@ -2114,7 +2113,7 @@ struct AAWillReturnImpl : public AAWillReturn {
   /// See AbstractAttribute::updateImpl(...).
   ChangeStatus updateImpl(Attributor &A) override {
     auto CheckForWillReturn = [&](Instruction &I) {
-      IRPosition IPos = IRPosition::callsite_function(ImmutableCallSite(&I));
+      IRPosition IPos = IRPosition::callsite_function(cast<CallBase>(I));
       const auto &WillReturnAA = A.getAAFor<AAWillReturn>(*this, IPos);
       if (WillReturnAA.isKnownWillReturn())
         return true;
@@ -2321,8 +2320,8 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl {
   /// See AbstractAttribute::initialize(...).
   void initialize(Attributor &A) override {
     // See callsite argument attribute and callee argument attribute.
-    ImmutableCallSite ICS(&getAnchorValue());
-    if (ICS.paramHasAttr(getArgNo(), Attribute::NoAlias))
+    const auto &CB = cast<CallBase>(getAnchorValue());
+    if (CB.paramHasAttr(getArgNo(), Attribute::NoAlias))
       indicateOptimisticFixpoint();
     Value &Val = getAssociatedValue();
     if (isa<ConstantPointerNull>(Val) &&
@@ -2335,32 +2334,32 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl {
   /// \p OtherArgNo of \p ICS (= the underlying call site).
   bool mayAliasWithArgument(Attributor &A, AAResults *&AAR,
                             const AAMemoryBehavior &MemBehaviorAA,
-                            ImmutableCallSite ICS, unsigned OtherArgNo) {
+                            const CallBase &CB, unsigned OtherArgNo) {
     // We do not need to worry about aliasing with the underlying IRP.
     if (this->getArgNo() == (int)OtherArgNo)
       return false;
 
     // If it is not a pointer or pointer vector we do not alias.
-    const Value *ArgOp = ICS.getArgOperand(OtherArgNo);
+    const Value *ArgOp = CB.getArgOperand(OtherArgNo);
     if (!ArgOp->getType()->isPtrOrPtrVectorTy())
       return false;
 
-    auto &ICSArgMemBehaviorAA = A.getAAFor<AAMemoryBehavior>(
-        *this, IRPosition::callsite_argument(ICS, OtherArgNo),
+    auto &CBArgMemBehaviorAA = A.getAAFor<AAMemoryBehavior>(
+        *this, IRPosition::callsite_argument(CB, OtherArgNo),
         /* TrackDependence */ false);
 
     // If the argument is readnone, there is no read-write aliasing.
-    if (ICSArgMemBehaviorAA.isAssumedReadNone()) {
-      A.recordDependence(ICSArgMemBehaviorAA, *this, DepClassTy::OPTIONAL);
+    if (CBArgMemBehaviorAA.isAssumedReadNone()) {
+      A.recordDependence(CBArgMemBehaviorAA, *this, DepClassTy::OPTIONAL);
       return false;
     }
 
     // If the argument is readonly and the underlying value is readonly, there
     // is no read-write aliasing.
     bool IsReadOnly = MemBehaviorAA.isAssumedReadOnly();
-    if (ICSArgMemBehaviorAA.isAssumedReadOnly() && IsReadOnly) {
+    if (CBArgMemBehaviorAA.isAssumedReadOnly() && IsReadOnly) {
       A.recordDependence(MemBehaviorAA, *this, DepClassTy::OPTIONAL);
-      A.recordDependence(ICSArgMemBehaviorAA, *this, DepClassTy::OPTIONAL);
+      A.recordDependence(CBArgMemBehaviorAA, *this, DepClassTy::OPTIONAL);
       return false;
     }
 
@@ -2457,10 +2456,10 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl {
     // Check there is no other pointer argument which could alias with the
     // value passed at this call site.
     // TODO: AbstractCallSite
-    ImmutableCallSite ICS(&getAnchorValue());
-    for (unsigned OtherArgNo = 0; OtherArgNo < ICS.getNumArgOperands();
+    const auto &CB = cast<CallBase>(getAnchorValue());
+    for (unsigned OtherArgNo = 0; OtherArgNo < CB.getNumArgOperands();
          OtherArgNo++)
-      if (mayAliasWithArgument(A, AAR, MemBehaviorAA, ICS, OtherArgNo))
+      if (mayAliasWithArgument(A, AAR, MemBehaviorAA, CB, OtherArgNo))
         return false;
 
     return true;
@@ -2511,8 +2510,8 @@ struct AANoAliasReturned final : AANoAliasImpl {
 
       /// For now, we can only deduce noalias if we have call sites.
       /// FIXME: add more support.
-      ImmutableCallSite ICS(&RV);
-      if (!ICS)
+      const auto *CB = dyn_cast<CallBase>(&RV);
+      if (!CB)
         return false;
 
       const IRPosition &RVPos = IRPosition::value(RV);
@@ -2984,8 +2983,8 @@ struct AAIsDeadFunction : public AAIsDead {
     // is a performance optimization for blocks with calls to a lot of internal
     // functions. It can however cause dead functions to be treated as live.
     for (const Instruction &I : BB)
-      if (ImmutableCallSite ICS = ImmutableCallSite(&I))
-        if (const Function *F = ICS.getCalledFunction())
+      if (const auto *CB = dyn_cast<CallBase>(&I))
+        if (const Function *F = CB->getCalledFunction())
           if (F->hasLocalLinkage())
             A.markLiveInternalFunction(*F);
     return true;
@@ -3477,12 +3476,12 @@ static unsigned getKnownAlignForUse(Attributor &A,
   }
 
   MaybeAlign MA;
-  if (ImmutableCallSite ICS = ImmutableCallSite(I)) {
-    if (ICS.isBundleOperand(U) || ICS.isCallee(U))
+  if (const auto *CB = dyn_cast<CallBase>(I)) {
+    if (CB->isBundleOperand(U) || CB->isCallee(U))
       return 0;
 
-    unsigned ArgNo = ICS.getArgumentNo(U);
-    IRPosition IRP = IRPosition::callsite_argument(ICS, ArgNo);
+    unsigned ArgNo = CB->getArgOperandNo(U);
+    IRPosition IRP = IRPosition::callsite_argument(*CB, ArgNo);
     // As long as we only use known information there is no need to track
     // dependences here.
     auto &AlignAA = A.getAAFor<AAAlign>(QueryingAA, IRP,
@@ -3985,13 +3984,13 @@ struct AACaptureUseTracker final : public CaptureTracker {
 
     // For now we only use special logic for call sites. However, the tracker
     // itself knows about a lot of other non-capturing cases already.
-    CallSite CS(UInst);
-    if (!CS || !CS.isArgOperand(U))
+    auto *CB = dyn_cast<CallBase>(UInst);
+    if (!CB || !CB->isArgOperand(U))
       return isCapturedIn(/* Memory */ true, /* Integer */ true,
                           /* Return */ true);
 
-    unsigned ArgNo = CS.getArgumentNo(U);
-    const IRPosition &CSArgPos = IRPosition::callsite_argument(CS, ArgNo);
+    unsigned ArgNo = CB->getArgOperandNo(U);
+    const IRPosition &CSArgPos = IRPosition::callsite_argument(*CB, ArgNo);
     // If we have a abstract no-capture attribute for the argument we can use
     // it to justify a non-capture attribute here. This allows recursion!
     auto &ArgNoCaptureAA = A.getAAFor<AANoCapture>(NoCaptureAA, CSArgPos);
@@ -3999,7 +3998,7 @@ struct AACaptureUseTracker final : public CaptureTracker {
       return isCapturedIn(/* Memory */ false, /* Integer */ false,
                           /* Return */ false);
     if (ArgNoCaptureAA.isAssumedNoCaptureMaybeReturned()) {
-      addPotentialCopy(CS);
+      addPotentialCopy(*CB);
       return isCapturedIn(/* Memory */ false, /* Integer */ false,
                           /* Return */ false);
     }
@@ -4010,9 +4009,7 @@ struct AACaptureUseTracker final : public CaptureTracker {
   }
 
   /// Register \p CS as potential copy of the value we are checking.
-  void addPotentialCopy(CallSite CS) {
-    PotentialCopies.push_back(CS.getInstruction());
-  }
+  void addPotentialCopy(CallBase &CB) { PotentialCopies.push_back(&CB); }
 
   /// See CaptureTracker::shouldExplore(...).
   bool shouldExplore(const Use *U) override {
@@ -4992,10 +4989,9 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
 
     // Helper to check if for the given call site the associated argument is
     // passed to a callback where the privatization would be different.
-    auto IsCompatiblePrivArgOfCallback = [&](CallSite CS) {
+    auto IsCompatiblePrivArgOfCallback = [&](CallBase &CB) {
       SmallVector<const Use *, 4> CallbackUses;
-      AbstractCallSite::getCallbackUses(cast<CallBase>(*CS.getInstruction()),
-                                        CallbackUses);
+      AbstractCallSite::getCallbackUses(CB, CallbackUses);
       for (const Use *U : CallbackUses) {
         AbstractCallSite CBACS(U);
         assert(CBACS && CBACS.isCallbackCall());
@@ -5012,7 +5008,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
                 << CBArgNo << "@" << CBACS.getCalledFunction()->getName()
                 << ")\n[AAPrivatizablePtr] " << CBArg << " : "
                 << CBACS.getCallArgOperand(CBArg) << " vs "
-                << CS.getArgOperand(ArgNo) << "\n"
+                << CB.getArgOperand(ArgNo) << "\n"
                 << "[AAPrivatizablePtr] " << CBArg << " : "
                 << CBACS.getCallArgOperandNo(CBArg) << " vs " << ArgNo << "\n";
           });
@@ -5094,7 +5090,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
     // here.
     auto IsCompatiblePrivArgOfOtherCallSite = [&](AbstractCallSite ACS) {
       if (ACS.isDirectCall())
-        return IsCompatiblePrivArgOfCallback(CallSite(ACS.getInstruction()));
+        return IsCompatiblePrivArgOfCallback(*ACS.getInstruction());
       if (ACS.isCallbackCall())
         return IsCompatiblePrivArgOfDirectCS(ACS);
       return false;
@@ -5727,9 +5723,9 @@ ChangeStatus AAMemoryBehaviorFunction::updateImpl(Attributor &A) {
     // If the instruction has an own memory behavior state, use it to restrict
     // the local state. No further analysis is required as the other memory
     // state is as optimistic as it gets.
-    if (ImmutableCallSite ICS = ImmutableCallSite(&I)) {
+    if (const auto *CB = dyn_cast<CallBase>(&I)) {
       const auto &MemBehaviorAA = A.getAAFor<AAMemoryBehavior>(
-          *this, IRPosition::callsite_function(ICS));
+          *this, IRPosition::callsite_function(*CB));
       intersectAssumedBits(MemBehaviorAA.getAssumed());
       return !isAtFixpoint();
     }
@@ -5827,8 +5823,8 @@ bool AAMemoryBehaviorFloating::followUsersOfUseIn(Attributor &A, const Use *U,
 
   // By default we follow all uses assuming UserI might leak information on U,
   // we have special handling for call sites operands though.
-  ImmutableCallSite ICS(UserI);
-  if (!ICS || !ICS.isArgOperand(U))
+  const auto *CB = dyn_cast<CallBase>(UserI);
+  if (!CB || !CB->isArgOperand(U))
     return true;
 
   // If the use is a call argument known not to be captured, the users of
@@ -5838,9 +5834,9 @@ bool AAMemoryBehaviorFloating::followUsersOfUseIn(Attributor &A, const Use *U,
   // call might the argument "through return", which we allow and for which we
   // need to check call users.
   if (U->get()->getType()->isPointerTy()) {
-    unsigned ArgNo = ICS.getArgumentNo(U);
+    unsigned ArgNo = CB->getArgOperandNo(U);
     const auto &ArgNoCaptureAA = A.getAAFor<AANoCapture>(
-        *this, IRPosition::callsite_argument(ICS, ArgNo),
+        *this, IRPosition::callsite_argument(*CB, ArgNo),
         /* TrackDependence */ true, DepClassTy::OPTIONAL);
     return !ArgNoCaptureAA.isAssumedNoCapture();
   }
@@ -5874,17 +5870,17 @@ void AAMemoryBehaviorFloating::analyzeUseIn(Attributor &A, const Use *U,
   case Instruction::Invoke: {
     // For call sites we look at the argument memory behavior attribute (this
     // could be recursive!) in order to restrict our own state.
-    ImmutableCallSite ICS(UserI);
+    const auto *CB = cast<CallBase>(UserI);
 
     // Give up on operand bundles.
-    if (ICS.isBundleOperand(U)) {
+    if (CB->isBundleOperand(U)) {
       indicatePessimisticFixpoint();
       return;
     }
 
     // Calling a function does read the function pointer, maybe write it if the
     // function is self-modifying.
-    if (ICS.isCallee(U)) {
+    if (CB->isCallee(U)) {
       removeAssumedBits(NO_READS);
       break;
     }
@@ -5893,9 +5889,9 @@ void AAMemoryBehaviorFloating::analyzeUseIn(Attributor &A, const Use *U,
     // argument.
     IRPosition Pos;
     if (U->get()->getType()->isPointerTy())
-      Pos = IRPosition::callsite_argument(ICS, ICS.getArgumentNo(U));
+      Pos = IRPosition::callsite_argument(*CB, CB->getArgOperandNo(U));
     else
-      Pos = IRPosition::callsite_function(ICS);
+      Pos = IRPosition::callsite_function(*CB);
     const auto &MemBehaviorAA = A.getAAFor<AAMemoryBehavior>(
         *this, Pos,
         /* TrackDependence */ true, DepClassTy::OPTIONAL);
@@ -6184,9 +6180,9 @@ void AAMemoryLocationImpl::categorizePtrValue(
                                 Changed);
       return true;
     }
-    if (ImmutableCallSite ICS = ImmutableCallSite(&V)) {
+    if (const auto *CB = dyn_cast<CallBase>(&V)) {
       const auto &NoAliasAA =
-          A.getAAFor<AANoAlias>(*this, IRPosition::callsite_returned(ICS));
+          A.getAAFor<AANoAlias>(*this, IRPosition::callsite_returned(*CB));
       if (NoAliasAA.isAssumedNoAlias()) {
         updateStateAndAccessesMap(T, AccessKindAccessesMap, NO_MALLOCED_MEM, &I,
                                   &V, Changed);
@@ -6226,32 +6222,32 @@ AAMemoryLocationImpl::categorizeAccessedLocations(Attributor &A, Instruction &I,
   AAMemoryLocation::StateType AccessedLocs;
   AccessedLocs.intersectAssumedBits(NO_LOCATIONS);
 
-  if (ImmutableCallSite ICS = ImmutableCallSite(&I)) {
+  if (auto *CB = dyn_cast<CallBase>(&I)) {
 
     // First check if we assume any memory is access is visible.
-    const auto &ICSMemLocationAA =
-        A.getAAFor<AAMemoryLocation>(*this, IRPosition::callsite_function(ICS));
+    const auto &CBMemLocationAA =
+        A.getAAFor<AAMemoryLocation>(*this, IRPosition::callsite_function(*CB));
     LLVM_DEBUG(dbgs() << "[AAMemoryLocation] Categorize call site: " << I
-                      << " [" << ICSMemLocationAA << "]\n");
+                      << " [" << CBMemLocationAA << "]\n");
 
-    if (ICSMemLocationAA.isAssumedReadNone())
+    if (CBMemLocationAA.isAssumedReadNone())
       return NO_LOCATIONS;
 
-    if (ICSMemLocationAA.isAssumedInaccessibleMemOnly()) {
+    if (CBMemLocationAA.isAssumedInaccessibleMemOnly()) {
       updateStateAndAccessesMap(AccessedLocs, AccessKindAccessesMap,
                                 NO_INACCESSIBLE_MEM, &I, nullptr, Changed);
       return AccessedLocs.getAssumed();
     }
 
-    uint32_t ICSAssumedNotAccessedLocs =
-        ICSMemLocationAA.getAssumedNotAccessedLocation();
+    uint32_t CBAssumedNotAccessedLocs =
+        CBMemLocationAA.getAssumedNotAccessedLocation();
 
     // Set the argmemonly and global bit as we handle them separately below.
-    uint32_t ICSAssumedNotAccessedLocsNoArgMem =
-        ICSAssumedNotAccessedLocs | NO_ARGUMENT_MEM | NO_GLOBAL_MEM;
+    uint32_t CBAssumedNotAccessedLocsNoArgMem =
+        CBAssumedNotAccessedLocs | NO_ARGUMENT_MEM | NO_GLOBAL_MEM;
 
     for (MemoryLocationsKind CurMLK = 1; CurMLK < NO_LOCATIONS; CurMLK *= 2) {
-      if (ICSAssumedNotAccessedLocsNoArgMem & CurMLK)
+      if (CBAssumedNotAccessedLocsNoArgMem & CurMLK)
         continue;
       updateStateAndAccessesMap(AccessedLocs, AccessKindAccessesMap, CurMLK, &I,
                                 nullptr, Changed);
@@ -6259,7 +6255,7 @@ AAMemoryLocationImpl::categorizeAccessedLocations(Attributor &A, Instruction &I,
 
     // Now handle global memory if it might be accessed. This is slightly tricky
     // as NO_GLOBAL_MEM has multiple bits set.
-    bool HasGlobalAccesses = ((~ICSAssumedNotAccessedLocs) & NO_GLOBAL_MEM);
+    bool HasGlobalAccesses = ((~CBAssumedNotAccessedLocs) & NO_GLOBAL_MEM);
     if (HasGlobalAccesses) {
       auto AccessPred = [&](const Instruction *, const Value *Ptr,
                             AccessKind Kind, MemoryLocationsKind MLK) {
@@ -6267,7 +6263,7 @@ AAMemoryLocationImpl::categorizeAccessedLocations(Attributor &A, Instruction &I,
                                   Ptr, Changed);
         return true;
       };
-      if (!ICSMemLocationAA.checkForAllAccessesToMemoryKind(
+      if (!CBMemLocationAA.checkForAllAccessesToMemoryKind(
               AccessPred, inverseLocation(NO_GLOBAL_MEM, false, false)))
         return AccessedLocs.getWorstState();
     }
@@ -6277,18 +6273,18 @@ AAMemoryLocationImpl::categorizeAccessedLocations(Attributor &A, Instruction &I,
                << getMemoryLocationsAsStr(AccessedLocs.getAssumed()) << "\n");
 
     // Now handle argument memory if it might be accessed.
-    bool HasArgAccesses = ((~ICSAssumedNotAccessedLocs) & NO_ARGUMENT_MEM);
+    bool HasArgAccesses = ((~CBAssumedNotAccessedLocs) & NO_ARGUMENT_MEM);
     if (HasArgAccesses) {
-      for (unsigned ArgNo = 0, e = ICS.getNumArgOperands(); ArgNo < e;
+      for (unsigned ArgNo = 0, e = CB->getNumArgOperands(); ArgNo < e;
            ++ArgNo) {
 
         // Skip non-pointer arguments.
-        const Value *ArgOp = ICS.getArgOperand(ArgNo);
+        const Value *ArgOp = CB->getArgOperand(ArgNo);
         if (!ArgOp->getType()->isPtrOrPtrVectorTy())
           continue;
 
         // Skip readnone arguments.
-        const IRPosition &ArgOpIRP = IRPosition::callsite_argument(ICS, ArgNo);
+        const IRPosition &ArgOpIRP = IRPosition::callsite_argument(*CB, ArgNo);
         const auto &ArgOpMemLocationAA = A.getAAFor<AAMemoryBehavior>(
             *this, ArgOpIRP, /* TrackDependence */ true, DepClassTy::OPTIONAL);
 
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 0987cd6597a4..7c1610f8073a 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1242,15 +1242,15 @@ bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
 }
 
 /// This is called on every byval argument in call sites.
-bool MemCpyOptPass::processByValArgument(CallSite CS, unsigned ArgNo) {
-  const DataLayout &DL = CS.getCaller()->getParent()->getDataLayout();
+bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
+  const DataLayout &DL = CB.getCaller()->getParent()->getDataLayout();
   // Find out what feeds this byval argument.
-  Value *ByValArg = CS.getArgument(ArgNo);
+  Value *ByValArg = CB.getArgOperand(ArgNo);
   Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType();
   uint64_t ByValSize = DL.getTypeAllocSize(ByValTy);
   MemDepResult DepInfo = MD->getPointerDependencyFrom(
       MemoryLocation(ByValArg, LocationSize::precise(ByValSize)), true,
-      CS.getInstruction()->getIterator(), CS.getInstruction()->getParent());
+      CB.getIterator(), CB.getParent());
   if (!DepInfo.isClobber())
     return false;
 
@@ -1269,16 +1269,16 @@ bool MemCpyOptPass::processByValArgument(CallSite CS, unsigned ArgNo) {
 
   // Get the alignment of the byval.  If the call doesn't specify the alignment,
   // then it is some target specific value that we can't know.
-  unsigned ByValAlign = CS.getParamAlignment(ArgNo);
-  if (ByValAlign == 0) return false;
+  MaybeAlign ByValAlign = CB.getParamAlign(ArgNo);
+  if (!ByValAlign) return false;
 
   // If it is greater than the memcpy, then we check to see if we can force the
   // source of the memcpy to the alignment we need.  If we fail, we bail out.
   AssumptionCache &AC = LookupAssumptionCache();
   DominatorTree &DT = LookupDomTree();
-  if (MDep->getSourceAlignment() < ByValAlign &&
-      getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL,
-                                 CS.getInstruction(), &AC, &DT) < ByValAlign)
+  if (MDep->getSourceAlign() < ByValAlign &&
+      getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign->value(), DL,
+                                 &CB, &AC, &DT) < ByValAlign->value())
     return false;
 
   // The address space of the memcpy source must match the byval argument
@@ -1297,14 +1297,14 @@ bool MemCpyOptPass::processByValArgument(CallSite CS, unsigned ArgNo) {
   // not just the defining memcpy.
   MemDepResult SourceDep = MD->getPointerDependencyFrom(
       MemoryLocation::getForSource(MDep), false,
-      CS.getInstruction()->getIterator(), MDep->getParent());
+      CB.getIterator(), MDep->getParent());
   if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
     return false;
 
   Value *TmpCast = MDep->getSource();
   if (MDep->getSource()->getType() != ByValArg->getType()) {
     BitCastInst *TmpBitCast = new BitCastInst(MDep->getSource(), ByValArg->getType(),
-                                              "tmpcast", CS.getInstruction());
+                                              "tmpcast", &CB);
     // Set the tmpcast's DebugLoc to MDep's
     TmpBitCast->setDebugLoc(MDep->getDebugLoc());
     TmpCast = TmpBitCast;
@@ -1312,10 +1312,10 @@ bool MemCpyOptPass::processByValArgument(CallSite CS, unsigned ArgNo) {
 
   LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"
                     << "  " << *MDep << "\n"
-                    << "  " << *CS.getInstruction() << "\n");
+                    << "  " << CB << "\n");
 
   // Otherwise we're good!  Update the byval argument.
-  CS.setArgument(ArgNo, TmpCast);
+  CB.setArgOperand(ArgNo, TmpCast);
   ++NumMemCpyInstr;
   return true;
 }
@@ -1349,10 +1349,10 @@ bool MemCpyOptPass::iterateOnFunction(Function &F) {
         RepeatInstruction = processMemCpy(M);
       else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I))
         RepeatInstruction = processMemMove(M);
-      else if (auto CS = CallSite(I)) {
-        for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
-          if (CS.isByValArgument(i))
-            MadeChange |= processByValArgument(CS, i);
+      else if (auto *CB = dyn_cast<CallBase>(I)) {
+        for (unsigned I = 0, E = CB->arg_size(); I != E; ++I)
+          if (CB->isByValArgument(I))
+            MadeChange |= processByValArgument(*CB, I);
       }
 
       // Reprocess the instruction if desired.

From d9e96b6a026777d299e12d3a50d00c2308d4dcdf Mon Sep 17 00:00:00 2001
From: Alex Brachet <alexbrachetmialot@gmail.com>
Date: Fri, 17 Apr 2020 13:10:46 -0400
Subject: [PATCH 190/216] [libc] Add spec/*.td as dependencies to
 add_gen_header

Summary: It also re formats long lines in `add_gen_header`

Reviewers: sivachandra

Reviewed By: sivachandra

Subscribers: mgorny, tschuett, libc-commits

Differential Revision: https://reviews.llvm.org/D78349
---
 libc/cmake/modules/LLVMLibCRules.cmake | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/libc/cmake/modules/LLVMLibCRules.cmake b/libc/cmake/modules/LLVMLibCRules.cmake
index 74e534395fd5..f2cabee3210d 100644
--- a/libc/cmake/modules/LLVMLibCRules.cmake
+++ b/libc/cmake/modules/LLVMLibCRules.cmake
@@ -84,11 +84,17 @@ function(add_gen_header target_name)
 
   set(gen_hdr_script "${LIBC_BUILD_SCRIPTS_DIR}/gen_hdr.py")
 
+  file(GLOB td_includes ${LIBC_SOURCE_DIR}/spec/*.td)
+
   add_custom_command(
     OUTPUT ${out_file}
-    COMMAND $<TARGET_FILE:libc-hdrgen> -o ${out_file} --header ${ADD_GEN_HDR_GEN_HDR} --def ${in_file} ${replacement_params} -I ${LIBC_SOURCE_DIR} ${LIBC_SOURCE_DIR}/config/${LIBC_TARGET_OS}/api.td
+    COMMAND $<TARGET_FILE:libc-hdrgen> -o ${out_file} --header ${ADD_GEN_HDR_GEN_HDR}
+            --def ${in_file} ${replacement_params} -I ${LIBC_SOURCE_DIR}
+            ${LIBC_SOURCE_DIR}/config/${LIBC_TARGET_OS}/api.td
+
     WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
-    DEPENDS ${in_file} ${fq_data_files} ${LIBC_SOURCE_DIR}/config/${LIBC_TARGET_OS}/api.td libc-hdrgen
+    DEPENDS ${in_file} ${fq_data_files} ${td_includes} 
+            ${LIBC_SOURCE_DIR}/config/${LIBC_TARGET_OS}/api.td libc-hdrgen
   )
 
   get_fq_target_name(${target_name} fq_target_name)

From 8c94d616e111372658237b82035dc5b024e4901b Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Fri, 17 Apr 2020 10:10:53 -0700
Subject: [PATCH 191/216] Revert "[CallSite removal][MemCpyOptimizer] Replace
 CallSite with CallBase. NFC"

There were extra changes that weren't supposed to be in there

This reverts commit b91f78db370bb8161472acd75a67916d033c3348.
---
 llvm/include/llvm/Transforms/IPO/Attributor.h |  27 ++-
 .../llvm/Transforms/Scalar/MemCpyOptimizer.h  |   3 +-
 llvm/lib/Transforms/IPO/Attributor.cpp        |  91 ++++-----
 .../Transforms/IPO/AttributorAttributes.cpp   | 174 +++++++++---------
 .../lib/Transforms/Scalar/MemCpyOptimizer.cpp |  34 ++--
 5 files changed, 176 insertions(+), 153 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 99989775bde6..2363a74d211b 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -217,6 +217,23 @@ struct IRPosition {
     return IRPosition(const_cast<CallBase &>(CB), Kind(ArgNo));
   }
 
+  /// Create a position describing the function scope of \p ICS.
+  static const IRPosition callsite_function(ImmutableCallSite ICS) {
+    return IRPosition::callsite_function(cast<CallBase>(*ICS.getInstruction()));
+  }
+
+  /// Create a position describing the returned value of \p ICS.
+  static const IRPosition callsite_returned(ImmutableCallSite ICS) {
+    return IRPosition::callsite_returned(cast<CallBase>(*ICS.getInstruction()));
+  }
+
+  /// Create a position describing the argument of \p ICS at position \p ArgNo.
+  static const IRPosition callsite_argument(ImmutableCallSite ICS,
+                                            unsigned ArgNo) {
+    return IRPosition::callsite_argument(cast<CallBase>(*ICS.getInstruction()),
+                                         ArgNo);
+  }
+
   /// Create a position describing the argument of \p ACS at position \p ArgNo.
   static const IRPosition callsite_argument(AbstractCallSite ACS,
                                             unsigned ArgNo) {
@@ -401,9 +418,9 @@ struct IRPosition {
       return;
 
     AttributeList AttrList;
-    auto *CB = dyn_cast<CallBase>(&getAnchorValue());
-    if (CB)
-      AttrList = CB->getAttributes();
+    CallSite CS = CallSite(&getAnchorValue());
+    if (CS)
+      AttrList = CS.getAttributes();
     else
       AttrList = getAssociatedFunction()->getAttributes();
 
@@ -411,8 +428,8 @@ struct IRPosition {
     for (Attribute::AttrKind AK : AKs)
       AttrList = AttrList.removeAttribute(Ctx, getAttrIdx(), AK);
 
-    if (CB)
-      CB->setAttributes(AttrList);
+    if (CS)
+      CS.setAttributes(AttrList);
     else
       getAssociatedFunction()->setAttributes(AttrList);
   }
diff --git a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
index 41180c5c678d..5386f58b2b82 100644
--- a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
+++ b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
@@ -16,6 +16,7 @@
 
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CallSite.h"
 #include "llvm/IR/PassManager.h"
 #include <cstdint>
 #include <functional>
@@ -65,7 +66,7 @@ class MemCpyOptPass : public PassInfoMixin<MemCpyOptPass> {
   bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep);
   bool processMemSetMemCpyDependence(MemCpyInst *M, MemSetInst *MDep);
   bool performMemCpyToMemSetOptzn(MemCpyInst *M, MemSetInst *MDep);
-  bool processByValArgument(CallBase &CB, unsigned ArgNo);
+  bool processByValArgument(CallSite CS, unsigned ArgNo);
   Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr,
                                     Value *ByteVal);
 
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index 9304a923546a..da1cbcc90be5 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -228,7 +228,7 @@ IRAttributeManifest::manifestAttrs(Attributor &A, const IRPosition &IRP,
   case IRPosition::IRP_CALL_SITE:
   case IRPosition::IRP_CALL_SITE_RETURNED:
   case IRPosition::IRP_CALL_SITE_ARGUMENT:
-    Attrs = cast<CallBase>(IRP.getAnchorValue()).getAttributes();
+    Attrs = ImmutableCallSite(&IRP.getAnchorValue()).getAttributes();
     break;
   }
 
@@ -253,7 +253,7 @@ IRAttributeManifest::manifestAttrs(Attributor &A, const IRPosition &IRP,
   case IRPosition::IRP_CALL_SITE:
   case IRPosition::IRP_CALL_SITE_RETURNED:
   case IRPosition::IRP_CALL_SITE_ARGUMENT:
-    cast<CallBase>(IRP.getAnchorValue()).setAttributes(Attrs);
+    CallSite(&IRP.getAnchorValue()).setAttributes(Attrs);
     break;
   case IRPosition::IRP_INVALID:
   case IRPosition::IRP_FLOAT:
@@ -269,7 +269,7 @@ const IRPosition IRPosition::TombstoneKey(256);
 SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) {
   IRPositions.emplace_back(IRP);
 
-  const auto *CB = dyn_cast<CallBase>(&IRP.getAnchorValue());
+  ImmutableCallSite ICS(&IRP.getAnchorValue());
   switch (IRP.getPositionKind()) {
   case IRPosition::IRP_INVALID:
   case IRPosition::IRP_FLOAT:
@@ -280,40 +280,41 @@ SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) {
     IRPositions.emplace_back(IRPosition::function(*IRP.getAnchorScope()));
     return;
   case IRPosition::IRP_CALL_SITE:
-    assert(CB && "Expected call site!");
+    assert(ICS && "Expected call site!");
     // TODO: We need to look at the operand bundles similar to the redirection
     //       in CallBase.
-    if (!CB->hasOperandBundles())
-      if (const Function *Callee = CB->getCalledFunction())
+    if (!ICS.hasOperandBundles())
+      if (const Function *Callee = ICS.getCalledFunction())
         IRPositions.emplace_back(IRPosition::function(*Callee));
     return;
   case IRPosition::IRP_CALL_SITE_RETURNED:
-    assert(CB && "Expected call site!");
+    assert(ICS && "Expected call site!");
     // TODO: We need to look at the operand bundles similar to the redirection
     //       in CallBase.
-    if (!CB->hasOperandBundles()) {
-      if (const Function *Callee = CB->getCalledFunction()) {
+    if (!ICS.hasOperandBundles()) {
+      if (const Function *Callee = ICS.getCalledFunction()) {
         IRPositions.emplace_back(IRPosition::returned(*Callee));
         IRPositions.emplace_back(IRPosition::function(*Callee));
         for (const Argument &Arg : Callee->args())
           if (Arg.hasReturnedAttr()) {
             IRPositions.emplace_back(
-                IRPosition::callsite_argument(*CB, Arg.getArgNo()));
+                IRPosition::callsite_argument(ICS, Arg.getArgNo()));
             IRPositions.emplace_back(
-                IRPosition::value(*CB->getArgOperand(Arg.getArgNo())));
+                IRPosition::value(*ICS.getArgOperand(Arg.getArgNo())));
             IRPositions.emplace_back(IRPosition::argument(Arg));
           }
       }
     }
-    IRPositions.emplace_back(IRPosition::callsite_function(*CB));
+    IRPositions.emplace_back(
+        IRPosition::callsite_function(cast<CallBase>(*ICS.getInstruction())));
     return;
   case IRPosition::IRP_CALL_SITE_ARGUMENT: {
     int ArgNo = IRP.getArgNo();
-    assert(CB && ArgNo >= 0 && "Expected call site!");
+    assert(ICS && ArgNo >= 0 && "Expected call site!");
     // TODO: We need to look at the operand bundles similar to the redirection
     //       in CallBase.
-    if (!CB->hasOperandBundles()) {
-      const Function *Callee = CB->getCalledFunction();
+    if (!ICS.hasOperandBundles()) {
+      const Function *Callee = ICS.getCalledFunction();
       if (Callee && Callee->arg_size() > unsigned(ArgNo))
         IRPositions.emplace_back(IRPosition::argument(*Callee->getArg(ArgNo)));
       if (Callee)
@@ -368,8 +369,8 @@ bool IRPosition::getAttrsFromIRAttr(Attribute::AttrKind AK,
     return false;
 
   AttributeList AttrList;
-  if (const auto *CB = dyn_cast<CallBase>(&getAnchorValue()))
-    AttrList = CB->getAttributes();
+  if (ImmutableCallSite ICS = ImmutableCallSite(&getAnchorValue()))
+    AttrList = ICS.getAttributes();
   else
     AttrList = getAssociatedFunction()->getAttributes();
 
@@ -509,12 +510,12 @@ bool Attributor::isAssumedDead(const Use &U,
     return isAssumedDead(IRPosition::value(*U.get()), QueryingAA, FnLivenessAA,
                          CheckBBLivenessOnly, DepClass);
 
-  if (auto *CB = dyn_cast<CallBase>(UserI)) {
+  if (CallSite CS = CallSite(UserI)) {
     // For call site argument uses we can check if the argument is
     // unused/dead.
-    if (CB->isArgOperand(&U)) {
+    if (CS.isArgOperand(&U)) {
       const IRPosition &CSArgPos =
-          IRPosition::callsite_argument(*CB, CB->getArgOperandNo(&U));
+          IRPosition::callsite_argument(CS, CS.getArgumentNo(&U));
       return isAssumedDead(CSArgPos, QueryingAA, FnLivenessAA,
                            CheckBBLivenessOnly, DepClass);
     }
@@ -1616,8 +1617,8 @@ void InformationCache::initializeInformationCache(const Function &CF,
     // Note: There are no concrete attributes now so this is initially empty.
     switch (I.getOpcode()) {
     default:
-      assert(!isa<CallBase>(&I) &&
-             "New call base instruction type needs to be known in the "
+      assert((!ImmutableCallSite(&I)) && (!isa<CallBase>(&I)) &&
+             "New call site/base instruction type needs to be known in the "
              "Attributor.");
       break;
     case Instruction::Call:
@@ -1686,8 +1687,8 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
   InformationCache::FunctionInfo &FI = InfoCache.getFunctionInfo(F);
   if (!isModulePass() && !FI.CalledViaMustTail) {
     for (const Use &U : F.uses())
-      if (const auto *CB = dyn_cast<CallBase>(U.getUser()))
-        if (CB->isCallee(&U) && CB->isMustTailCall())
+      if (ImmutableCallSite ICS = ImmutableCallSite(U.getUser()))
+        if (ICS.isCallee(&U) && ICS.isMustTailCall())
           FI.CalledViaMustTail = true;
   }
 
@@ -1799,14 +1800,14 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
   }
 
   auto CallSitePred = [&](Instruction &I) -> bool {
-    auto *CB = dyn_cast<CallBase>(&I);
-    IRPosition CBRetPos = IRPosition::callsite_returned(*CB);
+    CallSite CS(&I);
+    IRPosition CSRetPos = IRPosition::callsite_returned(CS);
 
     // Call sites might be dead if they do not have side effects and no live
     // users. The return value might be dead if there are no live users.
-    getOrCreateAAFor<AAIsDead>(CBRetPos);
+    getOrCreateAAFor<AAIsDead>(CSRetPos);
 
-    Function *Callee = CB->getCalledFunction();
+    Function *Callee = CS.getCalledFunction();
     // TODO: Even if the callee is not known now we might be able to simplify
     //       the call/callee.
     if (!Callee)
@@ -1818,46 +1819,46 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
         !Callee->hasMetadata(LLVMContext::MD_callback))
       return true;
 
-    if (!Callee->getReturnType()->isVoidTy() && !CB->use_empty()) {
+    if (!Callee->getReturnType()->isVoidTy() && !CS->use_empty()) {
 
-      IRPosition CBRetPos = IRPosition::callsite_returned(*CB);
+      IRPosition CSRetPos = IRPosition::callsite_returned(CS);
 
       // Call site return integer values might be limited by a constant range.
       if (Callee->getReturnType()->isIntegerTy())
-        getOrCreateAAFor<AAValueConstantRange>(CBRetPos);
+        getOrCreateAAFor<AAValueConstantRange>(CSRetPos);
     }
 
-    for (int i = 0, e = CB->getNumArgOperands(); i < e; i++) {
+    for (int i = 0, e = CS.getNumArgOperands(); i < e; i++) {
 
-      IRPosition CBArgPos = IRPosition::callsite_argument(*CB, i);
+      IRPosition CSArgPos = IRPosition::callsite_argument(CS, i);
 
       // Every call site argument might be dead.
-      getOrCreateAAFor<AAIsDead>(CBArgPos);
+      getOrCreateAAFor<AAIsDead>(CSArgPos);
 
       // Call site argument might be simplified.
-      getOrCreateAAFor<AAValueSimplify>(CBArgPos);
+      getOrCreateAAFor<AAValueSimplify>(CSArgPos);
 
-      if (!CB->getArgOperand(i)->getType()->isPointerTy())
+      if (!CS.getArgument(i)->getType()->isPointerTy())
         continue;
 
       // Call site argument attribute "non-null".
-      getOrCreateAAFor<AANonNull>(CBArgPos);
+      getOrCreateAAFor<AANonNull>(CSArgPos);
 
       // Call site argument attribute "no-alias".
-      getOrCreateAAFor<AANoAlias>(CBArgPos);
+      getOrCreateAAFor<AANoAlias>(CSArgPos);
 
       // Call site argument attribute "dereferenceable".
-      getOrCreateAAFor<AADereferenceable>(CBArgPos);
+      getOrCreateAAFor<AADereferenceable>(CSArgPos);
 
       // Call site argument attribute "align".
-      getOrCreateAAFor<AAAlign>(CBArgPos);
+      getOrCreateAAFor<AAAlign>(CSArgPos);
 
       // Call site argument attribute
       // "readnone/readonly/writeonly/..."
-      getOrCreateAAFor<AAMemoryBehavior>(CBArgPos);
+      getOrCreateAAFor<AAMemoryBehavior>(CSArgPos);
 
       // Call site argument attribute "nofree".
-      getOrCreateAAFor<AANoFree>(CBArgPos);
+      getOrCreateAAFor<AANoFree>(CSArgPos);
     }
     return true;
   };
@@ -1982,9 +1983,9 @@ static bool runAttributorOnFunctions(InformationCache &InfoCache,
     // do it eagerly.
     if (F->hasLocalLinkage()) {
       if (llvm::all_of(F->uses(), [&Functions](const Use &U) {
-            const auto *CB = dyn_cast<CallBase>(U.getUser());
-            return CB && CB->isCallee(&U) &&
-                   Functions.count(const_cast<Function *>(CB->getCaller()));
+            ImmutableCallSite ICS(U.getUser());
+            return ICS && ICS.isCallee(&U) &&
+                   Functions.count(const_cast<Function *>(ICS.getCaller()));
           }))
         continue;
     }
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 9c58b3f91fc8..9ea314f06888 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -280,11 +280,11 @@ static bool genericValueTraversal(
     if (V->getType()->isPointerTy()) {
       NewV = V->stripPointerCasts();
     } else {
-      auto *CB = dyn_cast<CallBase>(V);
-      if (CB && CB->getCalledFunction()) {
-        for (Argument &Arg : CB->getCalledFunction()->args())
+      CallSite CS(V);
+      if (CS && CS.getCalledFunction()) {
+        for (Argument &Arg : CS.getCalledFunction()->args())
           if (Arg.hasReturnedAttr()) {
-            NewV = CB->getArgOperand(Arg.getArgNo());
+            NewV = CS.getArgOperand(Arg.getArgNo());
             break;
           }
       }
@@ -688,9 +688,9 @@ struct AANoUnwindImpl : AANoUnwind {
       if (!I.mayThrow())
         return true;
 
-      if (const auto *CB = dyn_cast<CallBase>(&I)) {
+      if (ImmutableCallSite ICS = ImmutableCallSite(&I)) {
         const auto &NoUnwindAA =
-            A.getAAFor<AANoUnwind>(*this, IRPosition::callsite_function(*CB));
+            A.getAAFor<AANoUnwind>(*this, IRPosition::callsite_function(ICS));
         return NoUnwindAA.isAssumedNoUnwind();
       }
       return false;
@@ -1273,7 +1273,8 @@ bool AANoSyncImpl::isNoSyncIntrinsic(Instruction *I) {
 }
 
 bool AANoSyncImpl::isVolatile(Instruction *I) {
-  assert(!isa<CallBase>(I) && "Calls should not be checked here");
+  assert(!ImmutableCallSite(I) && !isa<CallBase>(I) &&
+         "Calls should not be checked here");
 
   switch (I->getOpcode()) {
   case Instruction::AtomicRMW:
@@ -1298,12 +1299,12 @@ ChangeStatus AANoSyncImpl::updateImpl(Attributor &A) {
     if (isa<IntrinsicInst>(&I) && isNoSyncIntrinsic(&I))
       return true;
 
-    if (const auto *CB = dyn_cast<CallBase>(&I)) {
-      if (CB->hasFnAttr(Attribute::NoSync))
+    if (ImmutableCallSite ICS = ImmutableCallSite(&I)) {
+      if (ICS.hasFnAttr(Attribute::NoSync))
         return true;
 
       const auto &NoSyncAA =
-          A.getAAFor<AANoSync>(*this, IRPosition::callsite_function(*CB));
+          A.getAAFor<AANoSync>(*this, IRPosition::callsite_function(ICS));
       if (NoSyncAA.isAssumedNoSync())
         return true;
       return false;
@@ -1322,7 +1323,7 @@ ChangeStatus AANoSyncImpl::updateImpl(Attributor &A) {
       return true;
 
     // non-convergent and readnone imply nosync.
-    return !cast<CallBase>(I).isConvergent();
+    return !ImmutableCallSite(&I).isConvergent();
   };
 
   if (!A.checkForAllReadWriteInstructions(CheckRWInstForNoSync, *this) ||
@@ -1376,12 +1377,12 @@ struct AANoFreeImpl : public AANoFree {
   /// See AbstractAttribute::updateImpl(...).
   ChangeStatus updateImpl(Attributor &A) override {
     auto CheckForNoFree = [&](Instruction &I) {
-      const auto &CB = cast<CallBase>(I);
-      if (CB.hasFnAttr(Attribute::NoFree))
+      ImmutableCallSite ICS(&I);
+      if (ICS.hasFnAttr(Attribute::NoFree))
         return true;
 
       const auto &NoFreeAA =
-          A.getAAFor<AANoFree>(*this, IRPosition::callsite_function(CB));
+          A.getAAFor<AANoFree>(*this, IRPosition::callsite_function(ICS));
       return NoFreeAA.isAssumedNoFree();
     };
 
@@ -1558,17 +1559,17 @@ static int64_t getKnownNonNullAndDerefBytesForUse(
   bool NullPointerIsDefined =
       F ? llvm::NullPointerIsDefined(F, PtrTy->getPointerAddressSpace()) : true;
   const DataLayout &DL = A.getInfoCache().getDL();
-  if (const auto *CB = dyn_cast<CallBase>(I)) {
-    if (CB->isBundleOperand(U))
+  if (ImmutableCallSite ICS = ImmutableCallSite(I)) {
+    if (ICS.isBundleOperand(U))
       return 0;
 
-    if (CB->isCallee(U)) {
+    if (ICS.isCallee(U)) {
       IsNonNull |= !NullPointerIsDefined;
       return 0;
     }
 
-    unsigned ArgNo = CB->getArgOperandNo(U);
-    IRPosition IRP = IRPosition::callsite_argument(*CB, ArgNo);
+    unsigned ArgNo = ICS.getArgumentNo(U);
+    IRPosition IRP = IRPosition::callsite_argument(ICS, ArgNo);
     // As long as we only use known information there is no need to track
     // dependences here.
     auto &DerefAA = A.getAAFor<AADereferenceable>(QueryingAA, IRP,
@@ -1802,17 +1803,17 @@ struct AANoRecurseFunction final : AANoRecurseImpl {
 
     // If the above check does not hold anymore we look at the calls.
     auto CheckForNoRecurse = [&](Instruction &I) {
-      const auto &CB = cast<CallBase>(I);
-      if (CB.hasFnAttr(Attribute::NoRecurse))
+      ImmutableCallSite ICS(&I);
+      if (ICS.hasFnAttr(Attribute::NoRecurse))
         return true;
 
       const auto &NoRecurseAA =
-          A.getAAFor<AANoRecurse>(*this, IRPosition::callsite_function(CB));
+          A.getAAFor<AANoRecurse>(*this, IRPosition::callsite_function(ICS));
       if (!NoRecurseAA.isAssumedNoRecurse())
         return false;
 
       // Recursion to the same function
-      if (CB.getCalledFunction() == getAnchorScope())
+      if (ICS.getCalledFunction() == getAnchorScope())
         return false;
 
       return true;
@@ -2113,7 +2114,7 @@ struct AAWillReturnImpl : public AAWillReturn {
   /// See AbstractAttribute::updateImpl(...).
   ChangeStatus updateImpl(Attributor &A) override {
     auto CheckForWillReturn = [&](Instruction &I) {
-      IRPosition IPos = IRPosition::callsite_function(cast<CallBase>(I));
+      IRPosition IPos = IRPosition::callsite_function(ImmutableCallSite(&I));
       const auto &WillReturnAA = A.getAAFor<AAWillReturn>(*this, IPos);
       if (WillReturnAA.isKnownWillReturn())
         return true;
@@ -2320,8 +2321,8 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl {
   /// See AbstractAttribute::initialize(...).
   void initialize(Attributor &A) override {
     // See callsite argument attribute and callee argument attribute.
-    const auto &CB = cast<CallBase>(getAnchorValue());
-    if (CB.paramHasAttr(getArgNo(), Attribute::NoAlias))
+    ImmutableCallSite ICS(&getAnchorValue());
+    if (ICS.paramHasAttr(getArgNo(), Attribute::NoAlias))
       indicateOptimisticFixpoint();
     Value &Val = getAssociatedValue();
     if (isa<ConstantPointerNull>(Val) &&
@@ -2334,32 +2335,32 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl {
   /// \p OtherArgNo of \p ICS (= the underlying call site).
   bool mayAliasWithArgument(Attributor &A, AAResults *&AAR,
                             const AAMemoryBehavior &MemBehaviorAA,
-                            const CallBase &CB, unsigned OtherArgNo) {
+                            ImmutableCallSite ICS, unsigned OtherArgNo) {
     // We do not need to worry about aliasing with the underlying IRP.
     if (this->getArgNo() == (int)OtherArgNo)
       return false;
 
     // If it is not a pointer or pointer vector we do not alias.
-    const Value *ArgOp = CB.getArgOperand(OtherArgNo);
+    const Value *ArgOp = ICS.getArgOperand(OtherArgNo);
     if (!ArgOp->getType()->isPtrOrPtrVectorTy())
       return false;
 
-    auto &CBArgMemBehaviorAA = A.getAAFor<AAMemoryBehavior>(
-        *this, IRPosition::callsite_argument(CB, OtherArgNo),
+    auto &ICSArgMemBehaviorAA = A.getAAFor<AAMemoryBehavior>(
+        *this, IRPosition::callsite_argument(ICS, OtherArgNo),
         /* TrackDependence */ false);
 
     // If the argument is readnone, there is no read-write aliasing.
-    if (CBArgMemBehaviorAA.isAssumedReadNone()) {
-      A.recordDependence(CBArgMemBehaviorAA, *this, DepClassTy::OPTIONAL);
+    if (ICSArgMemBehaviorAA.isAssumedReadNone()) {
+      A.recordDependence(ICSArgMemBehaviorAA, *this, DepClassTy::OPTIONAL);
       return false;
     }
 
     // If the argument is readonly and the underlying value is readonly, there
     // is no read-write aliasing.
     bool IsReadOnly = MemBehaviorAA.isAssumedReadOnly();
-    if (CBArgMemBehaviorAA.isAssumedReadOnly() && IsReadOnly) {
+    if (ICSArgMemBehaviorAA.isAssumedReadOnly() && IsReadOnly) {
       A.recordDependence(MemBehaviorAA, *this, DepClassTy::OPTIONAL);
-      A.recordDependence(CBArgMemBehaviorAA, *this, DepClassTy::OPTIONAL);
+      A.recordDependence(ICSArgMemBehaviorAA, *this, DepClassTy::OPTIONAL);
       return false;
     }
 
@@ -2456,10 +2457,10 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl {
     // Check there is no other pointer argument which could alias with the
     // value passed at this call site.
     // TODO: AbstractCallSite
-    const auto &CB = cast<CallBase>(getAnchorValue());
-    for (unsigned OtherArgNo = 0; OtherArgNo < CB.getNumArgOperands();
+    ImmutableCallSite ICS(&getAnchorValue());
+    for (unsigned OtherArgNo = 0; OtherArgNo < ICS.getNumArgOperands();
          OtherArgNo++)
-      if (mayAliasWithArgument(A, AAR, MemBehaviorAA, CB, OtherArgNo))
+      if (mayAliasWithArgument(A, AAR, MemBehaviorAA, ICS, OtherArgNo))
         return false;
 
     return true;
@@ -2510,8 +2511,8 @@ struct AANoAliasReturned final : AANoAliasImpl {
 
       /// For now, we can only deduce noalias if we have call sites.
       /// FIXME: add more support.
-      const auto *CB = dyn_cast<CallBase>(&RV);
-      if (!CB)
+      ImmutableCallSite ICS(&RV);
+      if (!ICS)
         return false;
 
       const IRPosition &RVPos = IRPosition::value(RV);
@@ -2983,8 +2984,8 @@ struct AAIsDeadFunction : public AAIsDead {
     // is a performance optimization for blocks with calls to a lot of internal
     // functions. It can however cause dead functions to be treated as live.
     for (const Instruction &I : BB)
-      if (const auto *CB = dyn_cast<CallBase>(&I))
-        if (const Function *F = CB->getCalledFunction())
+      if (ImmutableCallSite ICS = ImmutableCallSite(&I))
+        if (const Function *F = ICS.getCalledFunction())
           if (F->hasLocalLinkage())
             A.markLiveInternalFunction(*F);
     return true;
@@ -3476,12 +3477,12 @@ static unsigned getKnownAlignForUse(Attributor &A,
   }
 
   MaybeAlign MA;
-  if (const auto *CB = dyn_cast<CallBase>(I)) {
-    if (CB->isBundleOperand(U) || CB->isCallee(U))
+  if (ImmutableCallSite ICS = ImmutableCallSite(I)) {
+    if (ICS.isBundleOperand(U) || ICS.isCallee(U))
       return 0;
 
-    unsigned ArgNo = CB->getArgOperandNo(U);
-    IRPosition IRP = IRPosition::callsite_argument(*CB, ArgNo);
+    unsigned ArgNo = ICS.getArgumentNo(U);
+    IRPosition IRP = IRPosition::callsite_argument(ICS, ArgNo);
     // As long as we only use known information there is no need to track
     // dependences here.
     auto &AlignAA = A.getAAFor<AAAlign>(QueryingAA, IRP,
@@ -3984,13 +3985,13 @@ struct AACaptureUseTracker final : public CaptureTracker {
 
     // For now we only use special logic for call sites. However, the tracker
     // itself knows about a lot of other non-capturing cases already.
-    auto *CB = dyn_cast<CallBase>(UInst);
-    if (!CB || !CB->isArgOperand(U))
+    CallSite CS(UInst);
+    if (!CS || !CS.isArgOperand(U))
       return isCapturedIn(/* Memory */ true, /* Integer */ true,
                           /* Return */ true);
 
-    unsigned ArgNo = CB->getArgOperandNo(U);
-    const IRPosition &CSArgPos = IRPosition::callsite_argument(*CB, ArgNo);
+    unsigned ArgNo = CS.getArgumentNo(U);
+    const IRPosition &CSArgPos = IRPosition::callsite_argument(CS, ArgNo);
     // If we have a abstract no-capture attribute for the argument we can use
     // it to justify a non-capture attribute here. This allows recursion!
     auto &ArgNoCaptureAA = A.getAAFor<AANoCapture>(NoCaptureAA, CSArgPos);
@@ -3998,7 +3999,7 @@ struct AACaptureUseTracker final : public CaptureTracker {
       return isCapturedIn(/* Memory */ false, /* Integer */ false,
                           /* Return */ false);
     if (ArgNoCaptureAA.isAssumedNoCaptureMaybeReturned()) {
-      addPotentialCopy(*CB);
+      addPotentialCopy(CS);
       return isCapturedIn(/* Memory */ false, /* Integer */ false,
                           /* Return */ false);
     }
@@ -4009,7 +4010,9 @@ struct AACaptureUseTracker final : public CaptureTracker {
   }
 
   /// Register \p CS as potential copy of the value we are checking.
-  void addPotentialCopy(CallBase &CB) { PotentialCopies.push_back(&CB); }
+  void addPotentialCopy(CallSite CS) {
+    PotentialCopies.push_back(CS.getInstruction());
+  }
 
   /// See CaptureTracker::shouldExplore(...).
   bool shouldExplore(const Use *U) override {
@@ -4989,9 +4992,10 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
 
     // Helper to check if for the given call site the associated argument is
     // passed to a callback where the privatization would be different.
-    auto IsCompatiblePrivArgOfCallback = [&](CallBase &CB) {
+    auto IsCompatiblePrivArgOfCallback = [&](CallSite CS) {
       SmallVector<const Use *, 4> CallbackUses;
-      AbstractCallSite::getCallbackUses(CB, CallbackUses);
+      AbstractCallSite::getCallbackUses(cast<CallBase>(*CS.getInstruction()),
+                                        CallbackUses);
       for (const Use *U : CallbackUses) {
         AbstractCallSite CBACS(U);
         assert(CBACS && CBACS.isCallbackCall());
@@ -5008,7 +5012,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
                 << CBArgNo << "@" << CBACS.getCalledFunction()->getName()
                 << ")\n[AAPrivatizablePtr] " << CBArg << " : "
                 << CBACS.getCallArgOperand(CBArg) << " vs "
-                << CB.getArgOperand(ArgNo) << "\n"
+                << CS.getArgOperand(ArgNo) << "\n"
                 << "[AAPrivatizablePtr] " << CBArg << " : "
                 << CBACS.getCallArgOperandNo(CBArg) << " vs " << ArgNo << "\n";
           });
@@ -5090,7 +5094,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
     // here.
     auto IsCompatiblePrivArgOfOtherCallSite = [&](AbstractCallSite ACS) {
       if (ACS.isDirectCall())
-        return IsCompatiblePrivArgOfCallback(*ACS.getInstruction());
+        return IsCompatiblePrivArgOfCallback(CallSite(ACS.getInstruction()));
       if (ACS.isCallbackCall())
         return IsCompatiblePrivArgOfDirectCS(ACS);
       return false;
@@ -5723,9 +5727,9 @@ ChangeStatus AAMemoryBehaviorFunction::updateImpl(Attributor &A) {
     // If the instruction has an own memory behavior state, use it to restrict
     // the local state. No further analysis is required as the other memory
     // state is as optimistic as it gets.
-    if (const auto *CB = dyn_cast<CallBase>(&I)) {
+    if (ImmutableCallSite ICS = ImmutableCallSite(&I)) {
       const auto &MemBehaviorAA = A.getAAFor<AAMemoryBehavior>(
-          *this, IRPosition::callsite_function(*CB));
+          *this, IRPosition::callsite_function(ICS));
       intersectAssumedBits(MemBehaviorAA.getAssumed());
       return !isAtFixpoint();
     }
@@ -5823,8 +5827,8 @@ bool AAMemoryBehaviorFloating::followUsersOfUseIn(Attributor &A, const Use *U,
 
   // By default we follow all uses assuming UserI might leak information on U,
   // we have special handling for call sites operands though.
-  const auto *CB = dyn_cast<CallBase>(UserI);
-  if (!CB || !CB->isArgOperand(U))
+  ImmutableCallSite ICS(UserI);
+  if (!ICS || !ICS.isArgOperand(U))
     return true;
 
   // If the use is a call argument known not to be captured, the users of
@@ -5834,9 +5838,9 @@ bool AAMemoryBehaviorFloating::followUsersOfUseIn(Attributor &A, const Use *U,
   // call might the argument "through return", which we allow and for which we
   // need to check call users.
   if (U->get()->getType()->isPointerTy()) {
-    unsigned ArgNo = CB->getArgOperandNo(U);
+    unsigned ArgNo = ICS.getArgumentNo(U);
     const auto &ArgNoCaptureAA = A.getAAFor<AANoCapture>(
-        *this, IRPosition::callsite_argument(*CB, ArgNo),
+        *this, IRPosition::callsite_argument(ICS, ArgNo),
         /* TrackDependence */ true, DepClassTy::OPTIONAL);
     return !ArgNoCaptureAA.isAssumedNoCapture();
   }
@@ -5870,17 +5874,17 @@ void AAMemoryBehaviorFloating::analyzeUseIn(Attributor &A, const Use *U,
   case Instruction::Invoke: {
     // For call sites we look at the argument memory behavior attribute (this
     // could be recursive!) in order to restrict our own state.
-    const auto *CB = cast<CallBase>(UserI);
+    ImmutableCallSite ICS(UserI);
 
     // Give up on operand bundles.
-    if (CB->isBundleOperand(U)) {
+    if (ICS.isBundleOperand(U)) {
       indicatePessimisticFixpoint();
       return;
     }
 
     // Calling a function does read the function pointer, maybe write it if the
     // function is self-modifying.
-    if (CB->isCallee(U)) {
+    if (ICS.isCallee(U)) {
       removeAssumedBits(NO_READS);
       break;
     }
@@ -5889,9 +5893,9 @@ void AAMemoryBehaviorFloating::analyzeUseIn(Attributor &A, const Use *U,
     // argument.
     IRPosition Pos;
     if (U->get()->getType()->isPointerTy())
-      Pos = IRPosition::callsite_argument(*CB, CB->getArgOperandNo(U));
+      Pos = IRPosition::callsite_argument(ICS, ICS.getArgumentNo(U));
     else
-      Pos = IRPosition::callsite_function(*CB);
+      Pos = IRPosition::callsite_function(ICS);
     const auto &MemBehaviorAA = A.getAAFor<AAMemoryBehavior>(
         *this, Pos,
         /* TrackDependence */ true, DepClassTy::OPTIONAL);
@@ -6180,9 +6184,9 @@ void AAMemoryLocationImpl::categorizePtrValue(
                                 Changed);
       return true;
     }
-    if (const auto *CB = dyn_cast<CallBase>(&V)) {
+    if (ImmutableCallSite ICS = ImmutableCallSite(&V)) {
       const auto &NoAliasAA =
-          A.getAAFor<AANoAlias>(*this, IRPosition::callsite_returned(*CB));
+          A.getAAFor<AANoAlias>(*this, IRPosition::callsite_returned(ICS));
       if (NoAliasAA.isAssumedNoAlias()) {
         updateStateAndAccessesMap(T, AccessKindAccessesMap, NO_MALLOCED_MEM, &I,
                                   &V, Changed);
@@ -6222,32 +6226,32 @@ AAMemoryLocationImpl::categorizeAccessedLocations(Attributor &A, Instruction &I,
   AAMemoryLocation::StateType AccessedLocs;
   AccessedLocs.intersectAssumedBits(NO_LOCATIONS);
 
-  if (auto *CB = dyn_cast<CallBase>(&I)) {
+  if (ImmutableCallSite ICS = ImmutableCallSite(&I)) {
 
     // First check if we assume any memory is access is visible.
-    const auto &CBMemLocationAA =
-        A.getAAFor<AAMemoryLocation>(*this, IRPosition::callsite_function(*CB));
+    const auto &ICSMemLocationAA =
+        A.getAAFor<AAMemoryLocation>(*this, IRPosition::callsite_function(ICS));
     LLVM_DEBUG(dbgs() << "[AAMemoryLocation] Categorize call site: " << I
-                      << " [" << CBMemLocationAA << "]\n");
+                      << " [" << ICSMemLocationAA << "]\n");
 
-    if (CBMemLocationAA.isAssumedReadNone())
+    if (ICSMemLocationAA.isAssumedReadNone())
       return NO_LOCATIONS;
 
-    if (CBMemLocationAA.isAssumedInaccessibleMemOnly()) {
+    if (ICSMemLocationAA.isAssumedInaccessibleMemOnly()) {
       updateStateAndAccessesMap(AccessedLocs, AccessKindAccessesMap,
                                 NO_INACCESSIBLE_MEM, &I, nullptr, Changed);
       return AccessedLocs.getAssumed();
     }
 
-    uint32_t CBAssumedNotAccessedLocs =
-        CBMemLocationAA.getAssumedNotAccessedLocation();
+    uint32_t ICSAssumedNotAccessedLocs =
+        ICSMemLocationAA.getAssumedNotAccessedLocation();
 
     // Set the argmemonly and global bit as we handle them separately below.
-    uint32_t CBAssumedNotAccessedLocsNoArgMem =
-        CBAssumedNotAccessedLocs | NO_ARGUMENT_MEM | NO_GLOBAL_MEM;
+    uint32_t ICSAssumedNotAccessedLocsNoArgMem =
+        ICSAssumedNotAccessedLocs | NO_ARGUMENT_MEM | NO_GLOBAL_MEM;
 
     for (MemoryLocationsKind CurMLK = 1; CurMLK < NO_LOCATIONS; CurMLK *= 2) {
-      if (CBAssumedNotAccessedLocsNoArgMem & CurMLK)
+      if (ICSAssumedNotAccessedLocsNoArgMem & CurMLK)
         continue;
       updateStateAndAccessesMap(AccessedLocs, AccessKindAccessesMap, CurMLK, &I,
                                 nullptr, Changed);
@@ -6255,7 +6259,7 @@ AAMemoryLocationImpl::categorizeAccessedLocations(Attributor &A, Instruction &I,
 
     // Now handle global memory if it might be accessed. This is slightly tricky
     // as NO_GLOBAL_MEM has multiple bits set.
-    bool HasGlobalAccesses = ((~CBAssumedNotAccessedLocs) & NO_GLOBAL_MEM);
+    bool HasGlobalAccesses = ((~ICSAssumedNotAccessedLocs) & NO_GLOBAL_MEM);
     if (HasGlobalAccesses) {
       auto AccessPred = [&](const Instruction *, const Value *Ptr,
                             AccessKind Kind, MemoryLocationsKind MLK) {
@@ -6263,7 +6267,7 @@ AAMemoryLocationImpl::categorizeAccessedLocations(Attributor &A, Instruction &I,
                                   Ptr, Changed);
         return true;
       };
-      if (!CBMemLocationAA.checkForAllAccessesToMemoryKind(
+      if (!ICSMemLocationAA.checkForAllAccessesToMemoryKind(
               AccessPred, inverseLocation(NO_GLOBAL_MEM, false, false)))
         return AccessedLocs.getWorstState();
     }
@@ -6273,18 +6277,18 @@ AAMemoryLocationImpl::categorizeAccessedLocations(Attributor &A, Instruction &I,
                << getMemoryLocationsAsStr(AccessedLocs.getAssumed()) << "\n");
 
     // Now handle argument memory if it might be accessed.
-    bool HasArgAccesses = ((~CBAssumedNotAccessedLocs) & NO_ARGUMENT_MEM);
+    bool HasArgAccesses = ((~ICSAssumedNotAccessedLocs) & NO_ARGUMENT_MEM);
     if (HasArgAccesses) {
-      for (unsigned ArgNo = 0, e = CB->getNumArgOperands(); ArgNo < e;
+      for (unsigned ArgNo = 0, e = ICS.getNumArgOperands(); ArgNo < e;
            ++ArgNo) {
 
         // Skip non-pointer arguments.
-        const Value *ArgOp = CB->getArgOperand(ArgNo);
+        const Value *ArgOp = ICS.getArgOperand(ArgNo);
         if (!ArgOp->getType()->isPtrOrPtrVectorTy())
           continue;
 
         // Skip readnone arguments.
-        const IRPosition &ArgOpIRP = IRPosition::callsite_argument(*CB, ArgNo);
+        const IRPosition &ArgOpIRP = IRPosition::callsite_argument(ICS, ArgNo);
         const auto &ArgOpMemLocationAA = A.getAAFor<AAMemoryBehavior>(
             *this, ArgOpIRP, /* TrackDependence */ true, DepClassTy::OPTIONAL);
 
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 7c1610f8073a..0987cd6597a4 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1242,15 +1242,15 @@ bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
 }
 
 /// This is called on every byval argument in call sites.
-bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
-  const DataLayout &DL = CB.getCaller()->getParent()->getDataLayout();
+bool MemCpyOptPass::processByValArgument(CallSite CS, unsigned ArgNo) {
+  const DataLayout &DL = CS.getCaller()->getParent()->getDataLayout();
   // Find out what feeds this byval argument.
-  Value *ByValArg = CB.getArgOperand(ArgNo);
+  Value *ByValArg = CS.getArgument(ArgNo);
   Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType();
   uint64_t ByValSize = DL.getTypeAllocSize(ByValTy);
   MemDepResult DepInfo = MD->getPointerDependencyFrom(
       MemoryLocation(ByValArg, LocationSize::precise(ByValSize)), true,
-      CB.getIterator(), CB.getParent());
+      CS.getInstruction()->getIterator(), CS.getInstruction()->getParent());
   if (!DepInfo.isClobber())
     return false;
 
@@ -1269,16 +1269,16 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
 
   // Get the alignment of the byval.  If the call doesn't specify the alignment,
   // then it is some target specific value that we can't know.
-  MaybeAlign ByValAlign = CB.getParamAlign(ArgNo);
-  if (!ByValAlign) return false;
+  unsigned ByValAlign = CS.getParamAlignment(ArgNo);
+  if (ByValAlign == 0) return false;
 
   // If it is greater than the memcpy, then we check to see if we can force the
   // source of the memcpy to the alignment we need.  If we fail, we bail out.
   AssumptionCache &AC = LookupAssumptionCache();
   DominatorTree &DT = LookupDomTree();
-  if (MDep->getSourceAlign() < ByValAlign &&
-      getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign->value(), DL,
-                                 &CB, &AC, &DT) < ByValAlign->value())
+  if (MDep->getSourceAlignment() < ByValAlign &&
+      getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL,
+                                 CS.getInstruction(), &AC, &DT) < ByValAlign)
     return false;
 
   // The address space of the memcpy source must match the byval argument
@@ -1297,14 +1297,14 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
   // not just the defining memcpy.
   MemDepResult SourceDep = MD->getPointerDependencyFrom(
       MemoryLocation::getForSource(MDep), false,
-      CB.getIterator(), MDep->getParent());
+      CS.getInstruction()->getIterator(), MDep->getParent());
   if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
     return false;
 
   Value *TmpCast = MDep->getSource();
   if (MDep->getSource()->getType() != ByValArg->getType()) {
     BitCastInst *TmpBitCast = new BitCastInst(MDep->getSource(), ByValArg->getType(),
-                                              "tmpcast", &CB);
+                                              "tmpcast", CS.getInstruction());
     // Set the tmpcast's DebugLoc to MDep's
     TmpBitCast->setDebugLoc(MDep->getDebugLoc());
     TmpCast = TmpBitCast;
@@ -1312,10 +1312,10 @@ bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
 
   LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"
                     << "  " << *MDep << "\n"
-                    << "  " << CB << "\n");
+                    << "  " << *CS.getInstruction() << "\n");
 
   // Otherwise we're good!  Update the byval argument.
-  CB.setArgOperand(ArgNo, TmpCast);
+  CS.setArgument(ArgNo, TmpCast);
   ++NumMemCpyInstr;
   return true;
 }
@@ -1349,10 +1349,10 @@ bool MemCpyOptPass::iterateOnFunction(Function &F) {
         RepeatInstruction = processMemCpy(M);
       else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I))
         RepeatInstruction = processMemMove(M);
-      else if (auto *CB = dyn_cast<CallBase>(I)) {
-        for (unsigned I = 0, E = CB->arg_size(); I != E; ++I)
-          if (CB->isByValArgument(I))
-            MadeChange |= processByValArgument(*CB, I);
+      else if (auto CS = CallSite(I)) {
+        for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
+          if (CS.isByValArgument(i))
+            MadeChange |= processByValArgument(CS, i);
       }
 
       // Reprocess the instruction if desired.

From 5793c84925fa98f00dbc57f476448babc5ee3aaa Mon Sep 17 00:00:00 2001
From: Alex Brachet <alexbrachetmialot@gmail.com>
Date: Fri, 17 Apr 2020 13:21:05 -0400
Subject: [PATCH 192/216] [libc] Add write(2) implementation for Linux and
 FDReader test utility

Summary: Adds `write` for Linux and FDReader utility which should be useful for some stdio tests as well.

Reviewers: sivachandra, PaulkaToast

Reviewed By: sivachandra

Subscribers: mgorny, tschuett, libc-commits

Differential Revision: https://reviews.llvm.org/D78184
---
 libc/config/linux/api.td              | 18 ++++++++++++
 libc/include/CMakeLists.txt           |  8 ++++++
 libc/include/__posix-types.h          |  5 ++++
 libc/include/unistd.h.def             | 16 +++++++++++
 libc/lib/CMakeLists.txt               |  3 ++
 libc/spec/posix.td                    | 19 +++++++++++++
 libc/spec/spec.td                     |  1 +
 libc/src/CMakeLists.txt               |  1 +
 libc/src/unistd/CMakeLists.txt        | 10 +++++++
 libc/src/unistd/linux/CMakeLists.txt  | 12 ++++++++
 libc/src/unistd/linux/write.cpp       | 27 ++++++++++++++++++
 libc/src/unistd/write.h               | 21 ++++++++++++++
 libc/test/src/CMakeLists.txt          |  1 +
 libc/test/src/unistd/CMakeLists.txt   | 15 ++++++++++
 libc/test/src/unistd/write_test.cpp   | 29 +++++++++++++++++++
 libc/utils/testutils/CMakeLists.txt   |  3 ++
 libc/utils/testutils/FDReader.h       | 29 +++++++++++++++++++
 libc/utils/testutils/FDReaderUnix.cpp | 41 +++++++++++++++++++++++++++
 18 files changed, 259 insertions(+)
 create mode 100644 libc/include/unistd.h.def
 create mode 100644 libc/src/unistd/CMakeLists.txt
 create mode 100644 libc/src/unistd/linux/CMakeLists.txt
 create mode 100644 libc/src/unistd/linux/write.cpp
 create mode 100644 libc/src/unistd/write.h
 create mode 100644 libc/test/src/unistd/CMakeLists.txt
 create mode 100644 libc/test/src/unistd/write_test.cpp
 create mode 100644 libc/utils/testutils/FDReader.h
 create mode 100644 libc/utils/testutils/FDReaderUnix.cpp

diff --git a/libc/config/linux/api.td b/libc/config/linux/api.td
index f176caee6a4e..741899abcca8 100644
--- a/libc/config/linux/api.td
+++ b/libc/config/linux/api.td
@@ -12,6 +12,13 @@ def SizeT : TypeDecl<"size_t"> {
   }];
 }
 
+def SSizeT : TypeDecl<"ssize_t"> {
+  let Decl = [{
+    #define __need_ssize_t
+    #include <__posix-types.h>
+  }];
+}
+
 def OffT : TypeDecl<"off_t"> {
   let Decl = [{
     #define __need_off_t
@@ -308,3 +315,14 @@ def ThreadsAPI : PublicAPI<"threads.h"> {
     "thrd_join",
   ];
 }
+
+def UniStdAPI : PublicAPI<"unistd.h"> {
+  let TypeDeclarations = [
+    SSizeT,
+    SizeT,
+  ];
+
+  let Functions = [
+    "write",
+  ];
+}
diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt
index 6cd192c1a52e..f9564b323494 100644
--- a/libc/include/CMakeLists.txt
+++ b/libc/include/CMakeLists.txt
@@ -91,6 +91,14 @@ add_gen_header(
     .llvm_libc_common_h
 )
 
+add_gen_header(
+  unistd
+  DEF_FILE unistd.h.def
+  GEN_HDR unistd.h
+  DEPENDS
+    .llvm_libc_common_h
+)
+
 # TODO: Not all platforms will have a include/sys directory. Add the sys
 # directory and the targets for sys/*.h files conditional to the OS requiring
 # them.
diff --git a/libc/include/__posix-types.h b/libc/include/__posix-types.h
index d891ab9a63e3..30844728633b 100644
--- a/libc/include/__posix-types.h
+++ b/libc/include/__posix-types.h
@@ -14,3 +14,8 @@
 typedef __INT64_TYPE__ off_t;
 #define __llvm_libc_off_t_defined
 #endif // __need_off_t
+
+#if defined(__need_ssize_t) && !defined(__llvm_libc_ssize_t_defined)
+typedef __INT64_TYPE__ ssize_t;
+#define __llvm_libc_ssize_t_defined
+#endif // __need_ssize_t
diff --git a/libc/include/unistd.h.def b/libc/include/unistd.h.def
new file mode 100644
index 000000000000..42bab396b2d6
--- /dev/null
+++ b/libc/include/unistd.h.def
@@ -0,0 +1,16 @@
+//===-- C standard library header unistd.h --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_UNISTD_H
+#define LLVM_LIBC_UNISTD_H
+
+#include <__llvm-libc-common.h>
+
+%%public_api()
+
+#endif // LLVM_LIBC_UNISTD_H
diff --git a/libc/lib/CMakeLists.txt b/libc/lib/CMakeLists.txt
index bc245cdb481e..39654d8b2267 100644
--- a/libc/lib/CMakeLists.txt
+++ b/libc/lib/CMakeLists.txt
@@ -35,6 +35,9 @@ add_entrypoint_library(
     libc.src.threads.mtx_unlock
     libc.src.threads.thrd_create
     libc.src.threads.thrd_join
+
+    # unistd.h entrypoints
+    libc.src.unistd.write
 )
 
 add_entrypoint_library(
diff --git a/libc/spec/posix.td b/libc/spec/posix.td
index 172b0c30fede..dfde4d1d114a 100644
--- a/libc/spec/posix.td
+++ b/libc/spec/posix.td
@@ -12,6 +12,7 @@ def ConstRestrictStructSigactionPtr : ConstType<RestrictStructSigactionPtr>;
 
 def POSIX : StandardSpec<"POSIX"> {
   NamedType OffTType = NamedType<"off_t">;
+  NamedType SSizeTType = NamedType<"ssize_t">;
 
   HeaderSpec Errno = HeaderSpec<
       "errno.h",
@@ -174,9 +175,27 @@ def POSIX : StandardSpec<"POSIX"> {
       ]
   >;
 
+  HeaderSpec UniStd = HeaderSpec<
+    "unistd.h",
+    [], // Macros
+    [
+      SSizeTType,
+      SizeTType,
+    ],
+    [], // Enumerations
+    [
+        FunctionSpec<
+          "write",
+          RetValSpec<SSizeTType>,
+          [ArgSpec<IntType>, ArgSpec<ConstVoidPtr>, ArgSpec<SizeTType>]
+        >,
+    ]
+  >;
+
   let Headers = [
     Errno,
     SysMMan,
     Signal,
+    UniStd,
   ];
 }
diff --git a/libc/spec/spec.td b/libc/spec/spec.td
index ee04cef277b4..3dd339b435c4 100644
--- a/libc/spec/spec.td
+++ b/libc/spec/spec.td
@@ -46,6 +46,7 @@ def CharType : NamedType<"char">;
 
 // Common types
 def VoidPtr : PtrType<VoidType>;
+def ConstVoidPtr : ConstType<VoidPtr>;
 def SizeTType : NamedType<"size_t">;
 def FloatPtr : PtrType<FloatType>;
 
diff --git a/libc/src/CMakeLists.txt b/libc/src/CMakeLists.txt
index 72b4bca34af2..88d2829d656a 100644
--- a/libc/src/CMakeLists.txt
+++ b/libc/src/CMakeLists.txt
@@ -8,5 +8,6 @@ add_subdirectory(string)
 # TODO: Add this target conditional to the target OS.
 add_subdirectory(sys)
 add_subdirectory(threads)
+add_subdirectory(unistd)
 
 add_subdirectory(__support)
diff --git a/libc/src/unistd/CMakeLists.txt b/libc/src/unistd/CMakeLists.txt
new file mode 100644
index 000000000000..d0e5b14d6fd9
--- /dev/null
+++ b/libc/src/unistd/CMakeLists.txt
@@ -0,0 +1,10 @@
+if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_OS})
+  add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_OS})
+endif()
+
+add_entrypoint_object(
+  write
+  ALIAS
+  DEPENDS
+    .${LIBC_TARGET_OS}.write
+)
diff --git a/libc/src/unistd/linux/CMakeLists.txt b/libc/src/unistd/linux/CMakeLists.txt
new file mode 100644
index 000000000000..bd2cba708e69
--- /dev/null
+++ b/libc/src/unistd/linux/CMakeLists.txt
@@ -0,0 +1,12 @@
+add_entrypoint_object(
+  write
+  SRCS
+    write.cpp
+  HDRS
+    ../write.h
+  DEPENDS
+    libc.include.unistd
+    libc.config.linux.linux_syscall_h
+    libc.include.sys_syscall
+    libc.src.errno.__errno_location
+)
diff --git a/libc/src/unistd/linux/write.cpp b/libc/src/unistd/linux/write.cpp
new file mode 100644
index 000000000000..2778346fa5b5
--- /dev/null
+++ b/libc/src/unistd/linux/write.cpp
@@ -0,0 +1,27 @@
+//===-- Linux implementation of write -------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/unistd/write.h"
+
+#include "config/linux/syscall.h" // For internal syscall function.
+#include "include/sys/syscall.h"  // For syscall numbers.
+#include "src/__support/common.h"
+#include "src/errno/llvmlibc_errno.h"
+
+namespace __llvm_libc {
+
+ssize_t LLVM_LIBC_ENTRYPOINT(write)(int fd, const void *buf, size_t count) {
+  long ret = __llvm_libc::syscall(SYS_write, fd, buf, count);
+  if (ret < 0) {
+    llvmlibc_errno = -ret;
+    return -1;
+  }
+  return ret;
+}
+
+} // namespace __llvm_libc
diff --git a/libc/src/unistd/write.h b/libc/src/unistd/write.h
new file mode 100644
index 000000000000..d69c10a66f0f
--- /dev/null
+++ b/libc/src/unistd/write.h
@@ -0,0 +1,21 @@
+//===-- Implementation header for write -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_UNISTD_WRITE_H
+#define LLVM_LIBC_SRC_UNISTD_WRITE_H
+
+#include "include/unistd.h"
+#include <stddef.h>
+
+namespace __llvm_libc {
+
+ssize_t write(int fd, const void *buf, size_t count);
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_UNISTD_WRITE_H
diff --git a/libc/test/src/CMakeLists.txt b/libc/test/src/CMakeLists.txt
index f92a0463b359..d333108aa02b 100644
--- a/libc/test/src/CMakeLists.txt
+++ b/libc/test/src/CMakeLists.txt
@@ -7,3 +7,4 @@ add_subdirectory(stdlib)
 add_subdirectory(string)
 add_subdirectory(sys)
 add_subdirectory(threads)
+add_subdirectory(unistd)
diff --git a/libc/test/src/unistd/CMakeLists.txt b/libc/test/src/unistd/CMakeLists.txt
new file mode 100644
index 000000000000..7aaa50d0b11b
--- /dev/null
+++ b/libc/test/src/unistd/CMakeLists.txt
@@ -0,0 +1,15 @@
+add_libc_testsuite(libc_unistd_unittests)
+
+add_libc_unittest(
+  write_test
+  SUITE
+    libc_unistd_unittests
+  SRCS
+    write_test.cpp
+  DEPENDS
+    libc.src.unistd.write
+    libc.include.errno
+    # TODO(sivachandra): Remove redundant deps.
+    libc.src.errno.__errno_location
+    libc.include.unistd
+)
diff --git a/libc/test/src/unistd/write_test.cpp b/libc/test/src/unistd/write_test.cpp
new file mode 100644
index 000000000000..2a91ef6fc277
--- /dev/null
+++ b/libc/test/src/unistd/write_test.cpp
@@ -0,0 +1,29 @@
+//===-- Unittests for write -----------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "include/errno.h"
+#include "src/unistd/write.h"
+#include "utils/UnitTest/ErrnoSetterMatcher.h"
+#include "utils/UnitTest/Test.h"
+#include "utils/testutils/FDReader.h"
+
+TEST(UniStd, WriteBasic) {
+  using __llvm_libc::testing::ErrnoSetterMatcher::Succeeds;
+  constexpr const char *hello = "hello";
+  __llvm_libc::testutils::FDReader reader;
+  EXPECT_THAT(__llvm_libc::write(reader.getWriteFD(), hello, 5), Succeeds(5));
+  EXPECT_TRUE(reader.matchWritten(hello));
+}
+
+TEST(UniStd, WriteFails) {
+  using __llvm_libc::testing::ErrnoSetterMatcher::Fails;
+
+  EXPECT_THAT(__llvm_libc::write(-1, "", 1), Fails(EBADF));
+  EXPECT_THAT(__llvm_libc::write(1, reinterpret_cast<const void *>(-1), 1),
+              Fails(EFAULT));
+}
diff --git a/libc/utils/testutils/CMakeLists.txt b/libc/utils/testutils/CMakeLists.txt
index 9ee03e18c66b..80c23f4b0769 100644
--- a/libc/utils/testutils/CMakeLists.txt
+++ b/libc/utils/testutils/CMakeLists.txt
@@ -1,5 +1,6 @@
 if(CMAKE_HOST_UNIX)
   set(EFFile ExecuteFunctionUnix.cpp)
+  set(FDReaderFile FDReaderUnix.cpp)
 endif()
 
 add_llvm_library(
@@ -8,6 +9,8 @@ add_llvm_library(
   StreamWrapper.h
   ${EFFile}
   ExecuteFunction.h
+  ${FDReaderFile}
+  FDReader.h
   LINK_COMPONENTS
     Support
 )
diff --git a/libc/utils/testutils/FDReader.h b/libc/utils/testutils/FDReader.h
new file mode 100644
index 000000000000..e25dcea290a6
--- /dev/null
+++ b/libc/utils/testutils/FDReader.h
@@ -0,0 +1,29 @@
+//===-- FDReader.h ----------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_UTILS_TESTUTILS_FDREADER_H
+#define LLVM_LIBC_UTILS_TESTUTILS_FDREADER_H
+
+namespace __llvm_libc {
+namespace testutils {
+
+class FDReader {
+  int pipefd[2];
+
+public:
+  FDReader();
+  ~FDReader();
+
+  int getWriteFD() { return pipefd[1]; }
+  bool matchWritten(const char *);
+};
+
+} // namespace testutils
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_UTILS_TESTUTILS_FDREADER_H
diff --git a/libc/utils/testutils/FDReaderUnix.cpp b/libc/utils/testutils/FDReaderUnix.cpp
new file mode 100644
index 000000000000..943d3eb5356e
--- /dev/null
+++ b/libc/utils/testutils/FDReaderUnix.cpp
@@ -0,0 +1,41 @@
+//===-- FDReader.cpp ------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "FDReader.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include <cassert>
+#include <cstring>
+#include <unistd.h>
+
+namespace __llvm_libc {
+namespace testutils {
+
+FDReader::FDReader() {
+  int err = ::pipe(pipefd);
+  assert(!err && "pipe(2) failed");
+}
+
+FDReader::~FDReader() {
+  ::close(pipefd[0]);
+  ::close(pipefd[1]);
+}
+
+bool FDReader::matchWritten(const char *str) {
+  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> bufOrErr =
+      llvm::MemoryBuffer::getOpenFile(pipefd[0], "<pipe>",
+                                      /* FileSize (irrelevant) */ 0);
+  if (!bufOrErr) {
+    assert(0 && "Error reading from pipe");
+    return false;
+  }
+  const llvm::MemoryBuffer &buf = **bufOrErr;
+  return !std::strncmp(buf.getBufferStart(), str, buf.getBufferSize());
+}
+
+} // namespace testutils
+} // namespace __llvm_libc

From a4b88c044980337bb14390be654fe76864aa60ec Mon Sep 17 00:00:00 2001
From: Sterling Augustine <saugustine@google.com>
Date: Fri, 17 Apr 2020 09:43:55 -0700
Subject: [PATCH 193/216] Revert "Implement _ExtInt as an extended int type
 specifier."

This reverts commit 61ba1481e200b5b35baa81ffcff81acb678e8508.

I'm reverting this because it breaks the lldb build with
incomplete switch coverage warnings. I would fix it forward,
but am not familiar enough with lldb to determine the correct
fix.

lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp:3958:11: error: enumeration values 'DependentExtInt' and 'ExtInt' not handled in switch [-Werror,-Wswitch]
  switch (qual_type->getTypeClass()) {
          ^
lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp:4633:11: error: enumeration values 'DependentExtInt' and 'ExtInt' not handled in switch [-Werror,-Wswitch]
  switch (qual_type->getTypeClass()) {
          ^
lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp:4889:11: error: enumeration values 'DependentExtInt' and 'ExtInt' not handled in switch [-Werror,-Wswitch]
  switch (qual_type->getTypeClass()) {
---
 clang/docs/LanguageExtensions.rst             |  53 ---
 clang/docs/ReleaseNotes.rst                   |   8 -
 clang/include/clang/AST/ASTContext.h          |  10 -
 clang/include/clang/AST/RecursiveASTVisitor.h |   9 -
 clang/include/clang/AST/Type.h                |  70 +--
 clang/include/clang/AST/TypeLoc.h             |   6 -
 clang/include/clang/AST/TypeProperties.td     |  25 -
 .../clang/Basic/DiagnosticSemaKinds.td        |  14 +-
 clang/include/clang/Basic/Specifiers.h        |   1 -
 clang/include/clang/Basic/TokenKinds.def      |   1 -
 clang/include/clang/Basic/TypeNodes.td        |   2 -
 clang/include/clang/Parse/Parser.h            |   1 -
 clang/include/clang/Sema/DeclSpec.h           |   6 +-
 clang/include/clang/Sema/Sema.h               |   1 -
 .../clang/Serialization/TypeBitCodes.def      |   2 -
 clang/lib/AST/ASTContext.cpp                  |  67 ---
 clang/lib/AST/ASTStructuralEquivalence.cpp    |  18 -
 clang/lib/AST/ExprConstant.cpp                |   1 -
 clang/lib/AST/ItaniumMangle.cpp               |  24 -
 clang/lib/AST/MicrosoftMangle.cpp             |  24 -
 clang/lib/AST/Type.cpp                        |  63 +--
 clang/lib/AST/TypePrinter.cpp                 |  24 -
 clang/lib/CodeGen/CGDebugInfo.cpp             |  13 -
 clang/lib/CodeGen/CGDebugInfo.h               |   1 -
 clang/lib/CodeGen/CGExprScalar.cpp            |  26 +-
 clang/lib/CodeGen/CGRecordLayoutBuilder.cpp   |   3 +-
 clang/lib/CodeGen/CodeGenFunction.cpp         |   2 -
 clang/lib/CodeGen/CodeGenTBAA.cpp             |   9 -
 clang/lib/CodeGen/CodeGenTypes.cpp            |  21 +-
 clang/lib/CodeGen/CodeGenTypes.h              |   2 +-
 clang/lib/CodeGen/ItaniumCXXABI.cpp           |   6 +-
 clang/lib/Parse/ParseDecl.cpp                 |  30 --
 clang/lib/Parse/ParseExpr.cpp                 |   1 -
 clang/lib/Parse/ParseExprCXX.cpp              |  13 -
 clang/lib/Parse/ParseTentative.cpp            |  20 -
 clang/lib/Sema/DeclSpec.cpp                   |  28 +-
 clang/lib/Sema/SemaChecking.cpp               |   6 -
 clang/lib/Sema/SemaDecl.cpp                   |   8 +-
 clang/lib/Sema/SemaDeclAttr.cpp               |   5 +-
 clang/lib/Sema/SemaExpr.cpp                   |  15 +-
 clang/lib/Sema/SemaLookup.cpp                 |   1 -
 clang/lib/Sema/SemaTemplate.cpp               |  17 +-
 clang/lib/Sema/SemaTemplateDeduction.cpp      |  34 --
 clang/lib/Sema/SemaTemplateVariadic.cpp       |   1 -
 clang/lib/Sema/SemaType.cpp                   |  66 ---
 clang/lib/Sema/TreeTransform.h                |  76 ---
 clang/lib/Serialization/ASTReader.cpp         |   9 -
 clang/lib/Serialization/ASTWriter.cpp         |   8 -
 clang/test/CodeGen/ext-int-sanitizer.cpp      | 265 -----------
 clang/test/CodeGen/ext-int.c                  |  44 --
 clang/test/CodeGenCXX/ext-int.cpp             | 432 ------------------
 clang/test/CodeGenOpenCL/ext-int-shift.cl     |  21 -
 clang/test/Parser/ext-int.cpp                 |  15 -
 clang/test/SemaCXX/ext-int.cpp                | 278 -----------
 clang/tools/libclang/CIndex.cpp               |   2 -
 55 files changed, 36 insertions(+), 1872 deletions(-)
 delete mode 100644 clang/test/CodeGen/ext-int-sanitizer.cpp
 delete mode 100644 clang/test/CodeGen/ext-int.c
 delete mode 100644 clang/test/CodeGenCXX/ext-int.cpp
 delete mode 100644 clang/test/CodeGenOpenCL/ext-int-shift.cl
 delete mode 100644 clang/test/Parser/ext-int.cpp
 delete mode 100644 clang/test/SemaCXX/ext-int.cpp

diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 07062a191ce8..929cd1c67e73 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -3461,56 +3461,3 @@ Since the size of ``buffer`` can't be known at compile time, Clang will fold
 ``__builtin_object_size(buffer, 0)`` into ``-1``. However, if this was written
 as ``__builtin_dynamic_object_size(buffer, 0)``, Clang will fold it into
 ``size``, providing some extra runtime safety.
-
-Extended Integer Types
-======================
-
-Clang supports a set of extended integer types under the syntax ``_ExtInt(N)``
-where ``N`` is an integer that specifies the number of bits that are used to represent
-the type, including the sign bit. The keyword ``_ExtInt`` is a type specifier, thus
-it can be used in any place a type can, including as a non-type-template-parameter,
-as the type of a bitfield, and as the underlying type of an enumeration.
-
-An extended integer can be declared either signed, or unsigned by using the
-``signed``/``unsigned`` keywords. If no sign specifier is used or if the ``signed``
-keyword is used, the extended integer type is a signed integer and can represent
-negative values.
-
-The ``N`` expression is an integer constant expression, which specifies the number
-of bits used to represent the type, following normal integer representations for
-both signed and unsigned types. Both a signed and unsigned extended integer of the
-same ``N`` value will have the same number of bits in its representation. Many
-architectures don't have a way of representing non power-of-2 integers, so these
-architectures emulate these types using larger integers. In these cases, they are
-expected to follow the 'as-if' rule and do math 'as-if' they were done at the
-specified number of bits.
-
-In order to be consistent with the C language specification, and make the extended
-integer types useful for their intended purpose, extended integers follow the C
-standard integer conversion ranks. An extended integer type has a greater rank than
-any integer type with less precision.  However, they have lower rank than any
-of the built in or other integer types (such as __int128). Usual arithmetic conversions
-also work the same, where the smaller ranked integer is converted to the larger.
-
-The one exception to the C rules for integers for these types is Integer Promotion.
-Unary +, -, and ~ operators typically will promote operands to ``int``. Doing these
-promotions would inflate the size of required hardware on some platforms, so extended
-integer types aren't subject to the integer promotion rules in these cases.
-
-In languages (such as OpenCL) that define shift by-out-of-range behavior as a mask,
-non-power-of-two versions of these types use an unsigned remainder operation to constrain
-the value to the proper range, preventing undefined behavior.
-
-Extended integer types are aligned to the next greatest power-of-2 up to 64 bits.
-The size of these types for the purposes of layout and ``sizeof`` are the number of
-bits aligned to this calculated alignment. This permits the use of these types in
-allocated arrays using common ``sizeof(Array)/sizeof(ElementType)`` pattern.
-
-Extended integer types work with the C _Atomic type modifier, however only precisions
-that are powers-of-2 greater than 8 bit are accepted.
-
-Extended integer types align with existing calling conventions. They have the same size
-and alignment as the smallest basic type that can contain them. Types that are larger
-than 64 bits are handled in the same way as _int128 is handled; they are conceptually
-treated as struct of register size chunks. They number of chunks are the smallest
-number that can contain the types which does not necessarily mean a power-of-2 size.
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 54deba7bbd0e..a8163cad9fde 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -62,14 +62,6 @@ Non-comprehensive list of changes in this release
   in the Arm C Language Extensions.
 
 
-* clang adds support for a set of  extended integer types (``_ExtInt(N)``) that
-  permit non-power of 2 integers, exposing the LLVM integer types. Since a major
-  motivating use case for these types is to limit 'bit' usage, these types don't
-  automatically promote to 'int' when operations are done between two ``ExtInt(N)``
-  types, instead math occurs at the size of the largest ``ExtInt(N)`` type.
-
-
-
 New Compiler Flags
 ------------------
 
diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h
index dedbd857819d..ac742fefc109 100644
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -224,8 +224,6 @@ class ASTContext : public RefCountedBase<ASTContext> {
   mutable llvm::FoldingSet<AtomicType> AtomicTypes;
   llvm::FoldingSet<AttributedType> AttributedTypes;
   mutable llvm::FoldingSet<PipeType> PipeTypes;
-  mutable llvm::FoldingSet<ExtIntType> ExtIntTypes;
-  mutable llvm::FoldingSet<DependentExtIntType> DependentExtIntTypes;
 
   mutable llvm::FoldingSet<QualifiedTemplateName> QualifiedTemplateNames;
   mutable llvm::FoldingSet<DependentTemplateName> DependentTemplateNames;
@@ -1205,14 +1203,6 @@ class ASTContext : public RefCountedBase<ASTContext> {
   /// Return a write_only pipe type for the specified type.
   QualType getWritePipeType(QualType T) const;
 
-  /// Return an extended integer type with the specified signedness and bit
-  /// count.
-  QualType getExtIntType(bool Unsigned, unsigned NumBits) const;
-
-  /// Return a dependent extended integer type with the specified signedness and
-  /// bit count.
-  QualType getDependentExtIntType(bool Unsigned, Expr *BitsExpr) const;
-
   /// Gets the struct used to keep track of the extended descriptor for
   /// pointer to blocks.
   QualType getBlockDescriptorExtendedType() const;
diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h
index b71f7994e2fa..85eb6259a419 100644
--- a/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -1115,10 +1115,6 @@ DEF_TRAVERSE_TYPE(AtomicType, { TRY_TO(TraverseType(T->getValueType())); })
 
 DEF_TRAVERSE_TYPE(PipeType, { TRY_TO(TraverseType(T->getElementType())); })
 
-DEF_TRAVERSE_TYPE(ExtIntType, {})
-DEF_TRAVERSE_TYPE(DependentExtIntType,
-                  { TRY_TO(TraverseStmt(T->getNumBitsExpr())); })
-
 #undef DEF_TRAVERSE_TYPE
 
 // ----------------- TypeLoc traversal -----------------
@@ -1389,11 +1385,6 @@ DEF_TRAVERSE_TYPELOC(AtomicType, { TRY_TO(TraverseTypeLoc(TL.getValueLoc())); })
 
 DEF_TRAVERSE_TYPELOC(PipeType, { TRY_TO(TraverseTypeLoc(TL.getValueLoc())); })
 
-DEF_TRAVERSE_TYPELOC(ExtIntType, {})
-DEF_TRAVERSE_TYPELOC(DependentExtIntType, {
-  TRY_TO(TraverseStmt(TL.getTypePtr()->getNumBitsExpr()));
-})
-
 #undef DEF_TRAVERSE_TYPELOC
 
 // ----------------- Decl traversal -----------------
diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h
index 322b14ce641a..f78d9d7670a7 100644
--- a/clang/include/clang/AST/Type.h
+++ b/clang/include/clang/AST/Type.h
@@ -2101,7 +2101,6 @@ class alignas(8) Type : public ExtQualsTypeCommonBase {
   bool isOCLExtOpaqueType() const;              // Any OpenCL extension type
 
   bool isPipeType() const;                      // OpenCL pipe type
-  bool isExtIntType() const;                    // Extended Int Type
   bool isOpenCLSpecificType() const;            // Any OpenCL specific type
 
   /// Determines if this type, which must satisfy
@@ -6128,64 +6127,6 @@ class PipeType : public Type, public llvm::FoldingSetNode {
   bool isReadOnly() const { return isRead; }
 };
 
-/// A fixed int type of a specified bitwidth.
-class ExtIntType final : public Type, public llvm::FoldingSetNode {
-  friend class ASTContext;
-  unsigned IsUnsigned : 1;
-  unsigned NumBits : 24;
-
-protected:
-  ExtIntType(bool isUnsigned, unsigned NumBits);
-
-public:
-  bool isUnsigned() const { return IsUnsigned; }
-  bool isSigned() const { return !IsUnsigned; }
-  unsigned getNumBits() const { return NumBits; }
-
-  bool isSugared() const { return false; }
-  QualType desugar() const { return QualType(this, 0); }
-
-  void Profile(llvm::FoldingSetNodeID &ID) {
-    Profile(ID, isUnsigned(), getNumBits());
-  }
-
-  static void Profile(llvm::FoldingSetNodeID &ID, bool IsUnsigned,
-                      unsigned NumBits) {
-    ID.AddBoolean(IsUnsigned);
-    ID.AddInteger(NumBits);
-  }
-
-  static bool classof(const Type *T) { return T->getTypeClass() == ExtInt; }
-};
-
-class DependentExtIntType final : public Type, public llvm::FoldingSetNode {
-  friend class ASTContext;
-  const ASTContext &Context;
-  llvm::PointerIntPair<Expr*, 1, bool> ExprAndUnsigned;
-
-protected:
-  DependentExtIntType(const ASTContext &Context, bool IsUnsigned,
-                      Expr *NumBits);
-
-public:
-  bool isUnsigned() const;
-  bool isSigned() const { return !isUnsigned(); }
-  Expr *getNumBitsExpr() const;
-
-  bool isSugared() const { return false; }
-  QualType desugar() const { return QualType(this, 0); }
-
-  void Profile(llvm::FoldingSetNodeID &ID) {
-    Profile(ID, Context, isUnsigned(), getNumBitsExpr());
-  }
-  static void Profile(llvm::FoldingSetNodeID &ID, const ASTContext &Context,
-                      bool IsUnsigned, Expr *NumBitsExpr);
-
-  static bool classof(const Type *T) {
-    return T->getTypeClass() == DependentExtInt;
-  }
-};
-
 /// A qualifier set is used to build a set of qualifiers.
 class QualifierCollector : public Qualifiers {
 public:
@@ -6705,10 +6646,6 @@ inline bool Type::isPipeType() const {
   return isa<PipeType>(CanonicalType);
 }
 
-inline bool Type::isExtIntType() const {
-  return isa<ExtIntType>(CanonicalType);
-}
-
 #define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \
   inline bool Type::is##Id##Type() const { \
     return isSpecificBuiltinType(BuiltinType::Id); \
@@ -6804,7 +6741,7 @@ inline bool Type::isIntegerType() const {
     return IsEnumDeclComplete(ET->getDecl()) &&
       !IsEnumDeclScoped(ET->getDecl());
   }
-  return isExtIntType();
+  return false;
 }
 
 inline bool Type::isFixedPointType() const {
@@ -6861,8 +6798,7 @@ inline bool Type::isScalarType() const {
          isa<BlockPointerType>(CanonicalType) ||
          isa<MemberPointerType>(CanonicalType) ||
          isa<ComplexType>(CanonicalType) ||
-         isa<ObjCObjectPointerType>(CanonicalType) ||
-         isExtIntType();
+         isa<ObjCObjectPointerType>(CanonicalType);
 }
 
 inline bool Type::isIntegralOrEnumerationType() const {
@@ -6875,7 +6811,7 @@ inline bool Type::isIntegralOrEnumerationType() const {
   if (const auto *ET = dyn_cast<EnumType>(CanonicalType))
     return IsEnumDeclComplete(ET->getDecl());
 
-  return isExtIntType();
+  return false;
 }
 
 inline bool Type::isBooleanType() const {
diff --git a/clang/include/clang/AST/TypeLoc.h b/clang/include/clang/AST/TypeLoc.h
index 2221485983b2..3fc53d823c37 100644
--- a/clang/include/clang/AST/TypeLoc.h
+++ b/clang/include/clang/AST/TypeLoc.h
@@ -2450,12 +2450,6 @@ inline T TypeLoc::getAsAdjusted() const {
   }
   return Cur.getAs<T>();
 }
-class ExtIntTypeLoc final
-    : public InheritingConcreteTypeLoc<TypeSpecTypeLoc, ExtIntTypeLoc,
-                                        ExtIntType> {};
-class DependentExtIntTypeLoc final
-    : public InheritingConcreteTypeLoc<TypeSpecTypeLoc, DependentExtIntTypeLoc,
-                                        DependentExtIntType> {};
 
 } // namespace clang
 
diff --git a/clang/include/clang/AST/TypeProperties.td b/clang/include/clang/AST/TypeProperties.td
index 12bc5a4ee8a3..994f932170ae 100644
--- a/clang/include/clang/AST/TypeProperties.td
+++ b/clang/include/clang/AST/TypeProperties.td
@@ -833,28 +833,3 @@ let Class = PipeType in {
     return ctx.getPipeType(elementType, isReadOnly);
   }]>;
 }
-
-let Class = ExtIntType in {
-  def : Property<"isUnsigned", Bool> {
-    let Read = [{ node->isUnsigned() }];
-  }
-  def : Property <"numBits", UInt32> {
-    let Read = [{ node->getNumBits() }];
-  }
-
-  def : Creator<[{
-    return ctx.getExtIntType(isUnsigned, numBits);
-  }]>;
-}
-
-let Class = DependentExtIntType in {
-  def : Property<"isUnsigned", Bool> {
-    let Read = [{ node->isUnsigned() }];
-  }
-  def : Property <"numBitsExpr", ExprRef> {
-    let Read = [{ node->getNumBitsExpr() }];
-  }
-  def : Creator<[{
-    return ctx.getDependentExtIntType(isUnsigned, numBitsExpr);
-  }]>;
-}
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 97ad1a6c7920..35a7a05667fc 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -5947,12 +5947,10 @@ def err_block_return_missing_expr : Error<
   "non-void block should return a value">;
 def err_func_def_incomplete_result : Error<
   "incomplete result type %0 in function definition">;
-def err_atomic_specifier_bad_type
-    : Error<"_Atomic cannot be applied to "
-            "%select{incomplete |array |function |reference |atomic |qualified "
-            "|sizeless ||integer |integer }0type "
-            "%1 %select{|||||||which is not trivially copyable|with less than "
-            "1 byte of precision|with a non power of 2 precision}0">;
+def err_atomic_specifier_bad_type : Error<
+  "_Atomic cannot be applied to "
+  "%select{incomplete |array |function |reference |atomic |qualified |sizeless |}0type "
+  "%1 %select{|||||||which is not trivially copyable}0">;
 
 // Expressions.
 def select_unary_expr_or_type_trait_kind : TextSubstitution<
@@ -10713,8 +10711,4 @@ def warn_sycl_kernel_return_type : Warning<
   "function template with 'sycl_kernel' attribute must have a 'void' return type">,
   InGroup<IgnoredAttributes>;
 
-def err_ext_int_bad_size : Error<"%select{signed|unsigned}0 _ExtInt must "
-                                 "have a bit size of at least %select{2|1}0">;
-def err_ext_int_max_size : Error<"%select{signed|unsigned}0 _ExtInt of bit "
-                                 "sizes greater than %1 not supported">;
 } // end of sema component.
diff --git a/clang/include/clang/Basic/Specifiers.h b/clang/include/clang/Basic/Specifiers.h
index e6c2cb39566c..73823dc01ec7 100644
--- a/clang/include/clang/Basic/Specifiers.h
+++ b/clang/include/clang/Basic/Specifiers.h
@@ -67,7 +67,6 @@ namespace clang {
     TST_char32,       // C++11 char32_t
     TST_int,
     TST_int128,
-    TST_extint,       // Extended Int types.
     TST_half,         // OpenCL half, ARM NEON __fp16
     TST_Float16,      // C11 extension ISO/IEC TS 18661-3
     TST_Accum,        // ISO/IEC JTC1 SC22 WG14 N1169 Extension
diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def
index 1da24a8fd38b..3b1062e48767 100644
--- a/clang/include/clang/Basic/TokenKinds.def
+++ b/clang/include/clang/Basic/TokenKinds.def
@@ -285,7 +285,6 @@ KEYWORD(goto                        , KEYALL)
 KEYWORD(if                          , KEYALL)
 KEYWORD(inline                      , KEYC99|KEYCXX|KEYGNU)
 KEYWORD(int                         , KEYALL)
-KEYWORD(_ExtInt                     , KEYALL)
 KEYWORD(long                        , KEYALL)
 KEYWORD(register                    , KEYALL)
 KEYWORD(restrict                    , KEYC99)
diff --git a/clang/include/clang/Basic/TypeNodes.td b/clang/include/clang/Basic/TypeNodes.td
index cd15a498642f..96d9472a488a 100644
--- a/clang/include/clang/Basic/TypeNodes.td
+++ b/clang/include/clang/Basic/TypeNodes.td
@@ -104,5 +104,3 @@ def ObjCInterfaceType : TypeNode<ObjCObjectType>, LeafType;
 def ObjCObjectPointerType : TypeNode<Type>;
 def PipeType : TypeNode<Type>;
 def AtomicType : TypeNode<Type>;
-def ExtIntType : TypeNode<Type>;
-def DependentExtIntType : TypeNode<Type>, AlwaysDependent;
diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h
index b4e96a5b85de..3f73a1b90268 100644
--- a/clang/include/clang/Parse/Parser.h
+++ b/clang/include/clang/Parse/Parser.h
@@ -2721,7 +2721,6 @@ class Parser : public CodeCompletionHandler {
                                 SourceLocation &EllipsisLoc);
   void ParseAlignmentSpecifier(ParsedAttributes &Attrs,
                                SourceLocation *endLoc = nullptr);
-  ExprResult ParseExtIntegerArgument();
 
   VirtSpecifiers::Specifier isCXX11VirtSpecifier(const Token &Tok) const;
   VirtSpecifiers::Specifier isCXX11VirtSpecifier() const {
diff --git a/clang/include/clang/Sema/DeclSpec.h b/clang/include/clang/Sema/DeclSpec.h
index 5bc13fe343f4..0e95e237e974 100644
--- a/clang/include/clang/Sema/DeclSpec.h
+++ b/clang/include/clang/Sema/DeclSpec.h
@@ -278,7 +278,6 @@ class DeclSpec {
   static const TST TST_char32 = clang::TST_char32;
   static const TST TST_int = clang::TST_int;
   static const TST TST_int128 = clang::TST_int128;
-  static const TST TST_extint = clang::TST_extint;
   static const TST TST_half = clang::TST_half;
   static const TST TST_float = clang::TST_float;
   static const TST TST_double = clang::TST_double;
@@ -414,7 +413,7 @@ class DeclSpec {
             T == TST_underlyingType || T == TST_atomic);
   }
   static bool isExprRep(TST T) {
-    return (T == TST_typeofExpr || T == TST_decltype || T == TST_extint);
+    return (T == TST_typeofExpr || T == TST_decltype);
   }
   static bool isTemplateIdRep(TST T) {
     return (T == TST_auto || T == TST_decltype_auto);
@@ -705,9 +704,6 @@ class DeclSpec {
   bool SetTypePipe(bool isPipe, SourceLocation Loc,
                        const char *&PrevSpec, unsigned &DiagID,
                        const PrintingPolicy &Policy);
-  bool SetExtIntType(SourceLocation KWLoc, Expr *BitWidth,
-                     const char *&PrevSpec, unsigned &DiagID,
-                     const PrintingPolicy &Policy);
   bool SetTypeSpecSat(SourceLocation Loc, const char *&PrevSpec,
                       unsigned &DiagID);
   bool SetTypeSpecError();
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index af58b0ec4e82..a1a0b854a85b 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -1678,7 +1678,6 @@ class Sema final {
                          SourceLocation Loc);
   QualType BuildWritePipeType(QualType T,
                          SourceLocation Loc);
-  QualType BuildExtIntType(bool IsUnsigned, Expr *BitWidth, SourceLocation Loc);
 
   TypeSourceInfo *GetTypeForDeclarator(Declarator &D, Scope *S);
   TypeSourceInfo *GetTypeForDeclaratorCast(Declarator &D, QualType FromTy);
diff --git a/clang/include/clang/Serialization/TypeBitCodes.def b/clang/include/clang/Serialization/TypeBitCodes.def
index 561c8869ead6..38c73ccb7daf 100644
--- a/clang/include/clang/Serialization/TypeBitCodes.def
+++ b/clang/include/clang/Serialization/TypeBitCodes.def
@@ -58,7 +58,5 @@ TYPE_BIT_CODE(DependentSizedExtVector, DEPENDENT_SIZED_EXT_VECTOR, 46)
 TYPE_BIT_CODE(DependentAddressSpace, DEPENDENT_ADDRESS_SPACE, 47)
 TYPE_BIT_CODE(DependentVector, DEPENDENT_SIZED_VECTOR, 48)
 TYPE_BIT_CODE(MacroQualified, MACRO_QUALIFIED, 49)
-TYPE_BIT_CODE(ExtInt, EXT_INT, 50)
-TYPE_BIT_CODE(DependentExtInt, DEPENDENT_EXT_INT, 51)
 
 #undef TYPE_BIT_CODE
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 8734dd390247..34bb07cd3f78 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -2180,15 +2180,6 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const {
     Align = toBits(Layout.getAlignment());
     break;
   }
-  case Type::ExtInt: {
-    const auto *EIT = cast<ExtIntType>(T);
-    Align =
-        std::min(static_cast<unsigned>(std::max(
-                     getCharWidth(), llvm::PowerOf2Ceil(EIT->getNumBits()))),
-                 Target->getLongLongAlign());
-    Width = llvm::alignTo(EIT->getNumBits(), Align);
-    break;
-  }
   case Type::Record:
   case Type::Enum: {
     const auto *TT = cast<TagType>(T);
@@ -3385,8 +3376,6 @@ QualType ASTContext::getVariableArrayDecayedType(QualType type) const {
   case Type::Auto:
   case Type::DeducedTemplateSpecialization:
   case Type::PackExpansion:
-  case Type::ExtInt:
-  case Type::DependentExtInt:
     llvm_unreachable("type should never be variably-modified");
 
   // These types can be variably-modified but should never need to
@@ -4081,39 +4070,6 @@ QualType ASTContext::getWritePipeType(QualType T) const {
   return getPipeType(T, false);
 }
 
-QualType ASTContext::getExtIntType(bool IsUnsigned, unsigned NumBits) const {
-  llvm::FoldingSetNodeID ID;
-  ExtIntType::Profile(ID, IsUnsigned, NumBits);
-
-  void *InsertPos = nullptr;
-  if (ExtIntType *EIT = ExtIntTypes.FindNodeOrInsertPos(ID, InsertPos))
-    return QualType(EIT, 0);
-
-  auto *New = new (*this, TypeAlignment) ExtIntType(IsUnsigned, NumBits);
-  ExtIntTypes.InsertNode(New, InsertPos);
-  Types.push_back(New);
-  return QualType(New, 0);
-}
-
-QualType ASTContext::getDependentExtIntType(bool IsUnsigned,
-                                            Expr *NumBitsExpr) const {
-  assert(NumBitsExpr->isInstantiationDependent() && "Only good for dependent");
-  llvm::FoldingSetNodeID ID;
-  DependentExtIntType::Profile(ID, *this, IsUnsigned, NumBitsExpr);
-
-  void *InsertPos = nullptr;
-  if (DependentExtIntType *Existing =
-          DependentExtIntTypes.FindNodeOrInsertPos(ID, InsertPos))
-    return QualType(Existing, 0);
-
-  auto *New = new (*this, TypeAlignment)
-      DependentExtIntType(*this, IsUnsigned, NumBitsExpr);
-  DependentExtIntTypes.InsertNode(New, InsertPos);
-
-  Types.push_back(New);
-  return QualType(New, 0);
-}
-
 #ifndef NDEBUG
 static bool NeedsInjectedClassNameType(const RecordDecl *D) {
   if (!isa<CXXRecordDecl>(D)) return false;
@@ -5949,11 +5905,6 @@ int ASTContext::getFloatingTypeSemanticOrder(QualType LHS, QualType RHS) const {
 unsigned ASTContext::getIntegerRank(const Type *T) const {
   assert(T->isCanonicalUnqualified() && "T should be canonicalized");
 
-  // Results in this 'losing' to any type of the same size, but winning if
-  // larger.
-  if (const auto *EIT = dyn_cast<ExtIntType>(T))
-    return 0 + (EIT->getNumBits() << 3);
-
   switch (cast<BuiltinType>(T)->getKind()) {
   default: llvm_unreachable("getIntegerRank(): not a built-in integer");
   case BuiltinType::Bool:
@@ -7337,7 +7288,6 @@ void ASTContext::getObjCEncodingForTypeImpl(QualType T, std::string &S,
     return;
 
   case Type::Pipe:
-  case Type::ExtInt:
 #define ABSTRACT_TYPE(KIND, BASE)
 #define TYPE(KIND, BASE)
 #define DEPENDENT_TYPE(KIND, BASE) \
@@ -9431,21 +9381,6 @@ QualType ASTContext::mergeTypes(QualType LHS, QualType RHS,
     assert(LHS != RHS &&
            "Equivalent pipe types should have already been handled!");
     return {};
-  case Type::ExtInt: {
-    // Merge two ext-int types, while trying to preserve typedef info.
-    bool LHSUnsigned  = LHS->castAs<ExtIntType>()->isUnsigned();
-    bool RHSUnsigned = RHS->castAs<ExtIntType>()->isUnsigned();
-    unsigned LHSBits = LHS->castAs<ExtIntType>()->getNumBits();
-    unsigned RHSBits = RHS->castAs<ExtIntType>()->getNumBits();
-
-    // Like unsigned/int, shouldn't have a type if they dont match.
-    if (LHSUnsigned != RHSUnsigned)
-      return {};
-
-    if (LHSBits != RHSBits)
-      return {};
-    return LHS;
-  }
   }
 
   llvm_unreachable("Invalid Type::Class!");
@@ -9586,8 +9521,6 @@ unsigned ASTContext::getIntWidth(QualType T) const {
     T = ET->getDecl()->getIntegerType();
   if (T->isBooleanType())
     return 1;
-  if(const auto *EIT = T->getAs<ExtIntType>())
-    return EIT->getNumBits();
   // For builtin types, just use the standard type sizing method
   return (unsigned)getTypeSize(T);
 }
diff --git a/clang/lib/AST/ASTStructuralEquivalence.cpp b/clang/lib/AST/ASTStructuralEquivalence.cpp
index c562830c41e1..c29b7b2f5907 100644
--- a/clang/lib/AST/ASTStructuralEquivalence.cpp
+++ b/clang/lib/AST/ASTStructuralEquivalence.cpp
@@ -949,24 +949,6 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context,
                                   cast<PipeType>(T2)->getElementType()))
       return false;
     break;
-  case Type::ExtInt: {
-    const auto *Int1 = cast<ExtIntType>(T1);
-    const auto *Int2 = cast<ExtIntType>(T2);
-
-    if (Int1->isUnsigned() != Int2->isUnsigned() ||
-        Int1->getNumBits() != Int2->getNumBits())
-      return false;
-    break;
-  }
-  case Type::DependentExtInt: {
-    const auto *Int1 = cast<DependentExtIntType>(T1);
-    const auto *Int2 = cast<DependentExtIntType>(T2);
-
-    if (Int1->isUnsigned() != Int2->isUnsigned() ||
-        !IsStructurallyEquivalent(Context, Int1->getNumBitsExpr(),
-                                  Int2->getNumBitsExpr()))
-      return false;
-  }
   } // end switch
 
   return true;
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 8bc7a1128e7a..5b3866d0a471 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -10354,7 +10354,6 @@ EvaluateBuiltinClassifyType(QualType T, const LangOptions &LangOpts) {
   case Type::ObjCInterface:
   case Type::ObjCObjectPointer:
   case Type::Pipe:
-  case Type::ExtInt:
     // GCC classifies vectors as None. We follow its lead and classify all
     // other types that don't fit into the regular classification the same way.
     return GCCTypeClass::None;
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index d60cacf07534..535bb86f0d5b 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -2093,8 +2093,6 @@ bool CXXNameMangler::mangleUnresolvedTypeOrSimpleId(QualType Ty,
   case Type::Atomic:
   case Type::Pipe:
   case Type::MacroQualified:
-  case Type::ExtInt:
-  case Type::DependentExtInt:
     llvm_unreachable("type is illegal as a nested name specifier");
 
   case Type::SubstTemplateTypeParmPack:
@@ -3553,28 +3551,6 @@ void CXXNameMangler::mangleType(const PipeType *T) {
   Out << "8ocl_pipe";
 }
 
-void CXXNameMangler::mangleType(const ExtIntType *T) {
-  Out << "U7_ExtInt";
-  llvm::APSInt BW(32, true);
-  BW = T->getNumBits();
-  TemplateArgument TA(Context.getASTContext(), BW, getASTContext().IntTy);
-  mangleTemplateArgs(&TA, 1);
-  if (T->isUnsigned())
-    Out << "j";
-  else
-    Out << "i";
-}
-
-void CXXNameMangler::mangleType(const DependentExtIntType *T) {
-  Out << "U7_ExtInt";
-  TemplateArgument TA(T->getNumBitsExpr());
-  mangleTemplateArgs(&TA, 1);
-  if (T->isUnsigned())
-    Out << "j";
-  else
-    Out << "i";
-}
-
 void CXXNameMangler::mangleIntegerLiteral(QualType T,
                                           const llvm::APSInt &Value) {
   //  <expr-primary> ::= L <type> <value number> E # integer literal
diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp
index dc5c15fbef68..af51ae07bc57 100644
--- a/clang/lib/AST/MicrosoftMangle.cpp
+++ b/clang/lib/AST/MicrosoftMangle.cpp
@@ -2953,30 +2953,6 @@ void MicrosoftMangleContextImpl::mangleCXXName(GlobalDecl GD,
   return Mangler.mangle(D);
 }
 
-void MicrosoftCXXNameMangler::mangleType(const ExtIntType *T, Qualifiers,
-                                         SourceRange Range) {
-  llvm::SmallString<64> TemplateMangling;
-  llvm::raw_svector_ostream Stream(TemplateMangling);
-  MicrosoftCXXNameMangler Extra(Context, Stream);
-  Stream << "?$";
-  if (T->isUnsigned())
-    Extra.mangleSourceName("_UExtInt");
-  else
-    Extra.mangleSourceName("_ExtInt");
-  Extra.mangleIntegerLiteral(llvm::APSInt::getUnsigned(T->getNumBits()),
-                             /*IsBoolean=*/false);
-
-  mangleArtificialTagType(TTK_Struct, TemplateMangling, {"__clang"});
-}
-
-void MicrosoftCXXNameMangler::mangleType(const DependentExtIntType *T,
-                                         Qualifiers, SourceRange Range) {
-  DiagnosticsEngine &Diags = Context.getDiags();
-  unsigned DiagID = Diags.getCustomDiagID(
-      DiagnosticsEngine::Error, "cannot mangle this DependentExtInt type yet");
-  Diags.Report(Range.getBegin(), DiagID) << Range;
-}
-
 // <this-adjustment> ::= <no-adjustment> | <static-adjustment> |
 //                       <virtual-adjustment>
 // <no-adjustment>      ::= A # private near
diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp
index 982aa8962f03..3428437c3146 100644
--- a/clang/lib/AST/Type.cpp
+++ b/clang/lib/AST/Type.cpp
@@ -293,39 +293,6 @@ VectorType::VectorType(TypeClass tc, QualType vecType, unsigned nElements,
   VectorTypeBits.NumElements = nElements;
 }
 
-ExtIntType::ExtIntType(bool IsUnsigned, unsigned NumBits)
-    : Type(ExtInt, QualType{}, TypeDependence::None), IsUnsigned(IsUnsigned),
-      NumBits(NumBits) {}
-
-DependentExtIntType::DependentExtIntType(const ASTContext &Context,
-                                         bool IsUnsigned, Expr *NumBitsExpr)
-    : Type(DependentExtInt, QualType{},
-           ((NumBitsExpr->isValueDependent() || NumBitsExpr->isTypeDependent())
-                ? TypeDependence::Dependent
-                : TypeDependence::None) |
-               (NumBitsExpr->isInstantiationDependent()
-                    ? TypeDependence::Instantiation
-                    : TypeDependence::None) |
-               (NumBitsExpr->containsUnexpandedParameterPack()
-                    ? TypeDependence::VariablyModified
-                    : TypeDependence::None)),
-      Context(Context), ExprAndUnsigned(NumBitsExpr, IsUnsigned) {}
-
-bool DependentExtIntType::isUnsigned() const {
-  return ExprAndUnsigned.getInt();
-}
-
-clang::Expr *DependentExtIntType::getNumBitsExpr() const {
-  return ExprAndUnsigned.getPointer();
-}
-
-void DependentExtIntType::Profile(llvm::FoldingSetNodeID &ID,
-                                  const ASTContext &Context, bool IsUnsigned,
-                                  Expr *NumBitsExpr) {
-  ID.AddBoolean(IsUnsigned);
-  NumBitsExpr->Profile(ID, Context, true);
-}
-
 /// getArrayElementTypeNoTypeQual - If this is an array type, return the
 /// element type of the array, potentially with type qualifiers missing.
 /// This method should never be used when type qualifiers are meaningful.
@@ -1869,17 +1836,13 @@ bool Type::isIntegralType(const ASTContext &Ctx) const {
     if (const auto *ET = dyn_cast<EnumType>(CanonicalType))
       return ET->getDecl()->isComplete();
 
-  return isExtIntType();
+  return false;
 }
 
 bool Type::isIntegralOrUnscopedEnumerationType() const {
   if (const auto *BT = dyn_cast<BuiltinType>(CanonicalType))
     return BT->getKind() >= BuiltinType::Bool &&
            BT->getKind() <= BuiltinType::Int128;
-
-  if (isExtIntType())
-    return true;
-
   return isUnscopedEnumerationType();
 }
 
@@ -1960,9 +1923,6 @@ bool Type::isSignedIntegerType() const {
       return ET->getDecl()->getIntegerType()->isSignedIntegerType();
   }
 
-  if (const ExtIntType *IT = dyn_cast<ExtIntType>(CanonicalType))
-    return IT->isSigned();
-
   return false;
 }
 
@@ -1977,10 +1937,6 @@ bool Type::isSignedIntegerOrEnumerationType() const {
       return ET->getDecl()->getIntegerType()->isSignedIntegerType();
   }
 
-  if (const ExtIntType *IT = dyn_cast<ExtIntType>(CanonicalType))
-    return IT->isSigned();
-
-
   return false;
 }
 
@@ -2007,9 +1963,6 @@ bool Type::isUnsignedIntegerType() const {
       return ET->getDecl()->getIntegerType()->isUnsignedIntegerType();
   }
 
-  if (const ExtIntType *IT = dyn_cast<ExtIntType>(CanonicalType))
-    return IT->isUnsigned();
-
   return false;
 }
 
@@ -2024,9 +1977,6 @@ bool Type::isUnsignedIntegerOrEnumerationType() const {
       return ET->getDecl()->getIntegerType()->isUnsignedIntegerType();
   }
 
-  if (const ExtIntType *IT = dyn_cast<ExtIntType>(CanonicalType))
-    return IT->isUnsigned();
-
   return false;
 }
 
@@ -2065,7 +2015,7 @@ bool Type::isRealType() const {
            BT->getKind() <= BuiltinType::Float128;
   if (const auto *ET = dyn_cast<EnumType>(CanonicalType))
       return ET->getDecl()->isComplete() && !ET->getDecl()->isScoped();
-  return isExtIntType();
+  return false;
 }
 
 bool Type::isArithmeticType() const {
@@ -2080,7 +2030,7 @@ bool Type::isArithmeticType() const {
     // false for scoped enumerations since that will disable any
     // unwanted implicit conversions.
     return !ET->getDecl()->isScoped() && ET->getDecl()->isComplete();
-  return isa<ComplexType>(CanonicalType) || isExtIntType();
+  return isa<ComplexType>(CanonicalType);
 }
 
 Type::ScalarTypeKind Type::getScalarTypeKind() const {
@@ -2109,8 +2059,6 @@ Type::ScalarTypeKind Type::getScalarTypeKind() const {
     if (CT->getElementType()->isRealFloatingType())
       return STK_FloatingComplex;
     return STK_IntegralComplex;
-  } else if (isExtIntType()) {
-    return STK_Integral;
   }
 
   llvm_unreachable("unknown scalar type");
@@ -2276,7 +2224,6 @@ bool QualType::isCXX98PODType(const ASTContext &Context) const {
   case Type::MemberPointer:
   case Type::Vector:
   case Type::ExtVector:
-  case Type::ExtInt:
     return true;
 
   case Type::Enum:
@@ -3696,7 +3643,6 @@ static CachedProperties computeCachedProperties(const Type *T) {
     // here in error recovery.
     return CachedProperties(ExternalLinkage, false);
 
-  case Type::ExtInt:
   case Type::Builtin:
     // C++ [basic.link]p8:
     //   A type is said to have linkage if and only if:
@@ -3794,7 +3740,6 @@ LinkageInfo LinkageComputer::computeTypeLinkageInfo(const Type *T) {
     assert(T->isInstantiationDependentType());
     return LinkageInfo::external();
 
-  case Type::ExtInt:
   case Type::Builtin:
     return LinkageInfo::external();
 
@@ -4003,8 +3948,6 @@ bool Type::canHaveNullability(bool ResultIfUnknown) const {
   case Type::ObjCInterface:
   case Type::Atomic:
   case Type::Pipe:
-  case Type::ExtInt:
-  case Type::DependentExtInt:
     return false;
   }
   llvm_unreachable("bad type kind!");
diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp
index f000e1f6c932..4cc0d735ed6a 100644
--- a/clang/lib/AST/TypePrinter.cpp
+++ b/clang/lib/AST/TypePrinter.cpp
@@ -227,8 +227,6 @@ bool TypePrinter::canPrefixQualifiers(const Type *T,
     case Type::ObjCInterface:
     case Type::Atomic:
     case Type::Pipe:
-    case Type::ExtInt:
-    case Type::DependentExtInt:
       CanPrefixQualifiers = true;
       break;
 
@@ -1116,28 +1114,6 @@ void TypePrinter::printPipeBefore(const PipeType *T, raw_ostream &OS) {
 
 void TypePrinter::printPipeAfter(const PipeType *T, raw_ostream &OS) {}
 
-void TypePrinter::printExtIntBefore(const ExtIntType *T, raw_ostream &OS) {
-  if (T->isUnsigned())
-    OS << "unsigned ";
-  OS << "_ExtInt(" << T->getNumBits() << ")";
-  spaceBeforePlaceHolder(OS);
-}
-
-void TypePrinter::printExtIntAfter(const ExtIntType *T, raw_ostream &OS) {}
-
-void TypePrinter::printDependentExtIntBefore(const DependentExtIntType *T,
-                                             raw_ostream &OS) {
-  if (T->isUnsigned())
-    OS << "unsigned ";
-  OS << "_ExtInt(";
-  T->getNumBitsExpr()->printPretty(OS, nullptr, Policy);
-  OS << ")";
-  spaceBeforePlaceHolder(OS);
-}
-
-void TypePrinter::printDependentExtIntAfter(const DependentExtIntType *T,
-                                            raw_ostream &OS) {}
-
 /// Appends the given scope to the end of a string.
 void TypePrinter::AppendScope(DeclContext *DC, raw_ostream &OS) {
   if (DC->isTranslationUnit()) return;
diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index e6422a7ff1c3..4ea3fbca2144 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -826,17 +826,6 @@ llvm::DIType *CGDebugInfo::CreateType(const AutoType *Ty) {
   return DBuilder.createUnspecifiedType("auto");
 }
 
-llvm::DIType *CGDebugInfo::CreateType(const ExtIntType *Ty) {
-
-  StringRef Name = Ty->isUnsigned() ? "unsigned _ExtInt" : "_ExtInt";
-  llvm::dwarf::TypeKind Encoding = Ty->isUnsigned()
-                                       ? llvm::dwarf::DW_ATE_unsigned
-                                       : llvm::dwarf::DW_ATE_signed;
-
-  return DBuilder.createBasicType(Name, CGM.getContext().getTypeSize(Ty),
-                                  Encoding);
-}
-
 llvm::DIType *CGDebugInfo::CreateType(const ComplexType *Ty) {
   // Bit size and offset of the type.
   llvm::dwarf::TypeKind Encoding = llvm::dwarf::DW_ATE_complex_float;
@@ -3170,8 +3159,6 @@ llvm::DIType *CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile *Unit) {
   case Type::Atomic:
     return CreateType(cast<AtomicType>(Ty), Unit);
 
-  case Type::ExtInt:
-    return CreateType(cast<ExtIntType>(Ty));
   case Type::Pipe:
     return CreateType(cast<PipeType>(Ty), Unit);
 
diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h
index 34164fbec90e..4915e19753c6 100644
--- a/clang/lib/CodeGen/CGDebugInfo.h
+++ b/clang/lib/CodeGen/CGDebugInfo.h
@@ -168,7 +168,6 @@ class CGDebugInfo {
   llvm::DIType *CreateType(const BuiltinType *Ty);
   llvm::DIType *CreateType(const ComplexType *Ty);
   llvm::DIType *CreateType(const AutoType *Ty);
-  llvm::DIType *CreateType(const ExtIntType *Ty);
   llvm::DIType *CreateQualifiedType(QualType Ty, llvm::DIFile *Fg);
   llvm::DIType *CreateType(const TypedefType *Ty, llvm::DIFile *Fg);
   llvm::DIType *CreateType(const TemplateSpecializationType *Ty,
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 62a0f6c0efe6..ce0904234333 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -760,11 +760,6 @@ class ScalarExprEmitter
                                                   llvm::Value *Zero,bool isDiv);
   // Common helper for getting how wide LHS of shift is.
   static Value *GetWidthMinusOneValue(Value* LHS,Value* RHS);
-
-  // Used for shifting constraints for OpenCL, do mask for powers of 2, URem for
-  // non powers of two.
-  Value *ConstrainShiftValue(Value *LHS, Value *RHS, const Twine &Name);
-
   Value *EmitDiv(const BinOpInfo &Ops);
   Value *EmitRem(const BinOpInfo &Ops);
   Value *EmitAdd(const BinOpInfo &Ops);
@@ -3767,21 +3762,6 @@ Value *ScalarExprEmitter::GetWidthMinusOneValue(Value* LHS,Value* RHS) {
   return llvm::ConstantInt::get(RHS->getType(), Ty->getBitWidth() - 1);
 }
 
-Value *ScalarExprEmitter::ConstrainShiftValue(Value *LHS, Value *RHS,
-                                              const Twine &Name) {
-  llvm::IntegerType *Ty;
-  if (auto *VT = dyn_cast<llvm::VectorType>(LHS->getType()))
-    Ty = cast<llvm::IntegerType>(VT->getElementType());
-  else
-    Ty = cast<llvm::IntegerType>(LHS->getType());
-
-  if (llvm::isPowerOf2_64(Ty->getBitWidth()))
-        return Builder.CreateAnd(RHS, GetWidthMinusOneValue(LHS, RHS), Name);
-
-  return Builder.CreateURem(
-      RHS, llvm::ConstantInt::get(RHS->getType(), Ty->getBitWidth()), Name);
-}
-
 Value *ScalarExprEmitter::EmitShl(const BinOpInfo &Ops) {
   // LLVM requires the LHS and RHS to be the same type: promote or truncate the
   // RHS to the same size as the LHS.
@@ -3796,7 +3776,8 @@ Value *ScalarExprEmitter::EmitShl(const BinOpInfo &Ops) {
   bool SanitizeExponent = CGF.SanOpts.has(SanitizerKind::ShiftExponent);
   // OpenCL 6.3j: shift values are effectively % word size of LHS.
   if (CGF.getLangOpts().OpenCL)
-    RHS = ConstrainShiftValue(Ops.LHS, RHS, "shl.mask");
+    RHS =
+        Builder.CreateAnd(RHS, GetWidthMinusOneValue(Ops.LHS, RHS), "shl.mask");
   else if ((SanitizeBase || SanitizeExponent) &&
            isa<llvm::IntegerType>(Ops.LHS->getType())) {
     CodeGenFunction::SanitizerScope SanScope(&CGF);
@@ -3858,7 +3839,8 @@ Value *ScalarExprEmitter::EmitShr(const BinOpInfo &Ops) {
 
   // OpenCL 6.3j: shift values are effectively % word size of LHS.
   if (CGF.getLangOpts().OpenCL)
-    RHS = ConstrainShiftValue(Ops.LHS, RHS, "shr.mask");
+    RHS =
+        Builder.CreateAnd(RHS, GetWidthMinusOneValue(Ops.LHS, RHS), "shr.mask");
   else if (CGF.SanOpts.has(SanitizerKind::ShiftExponent) &&
            isa<llvm::IntegerType>(Ops.LHS->getType())) {
     CodeGenFunction::SanitizerScope SanScope(&CGF);
diff --git a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
index 75af05623b03..4de64a32f2ac 100644
--- a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
+++ b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
@@ -385,8 +385,7 @@ CGRecordLowering::accumulateBitFields(RecordDecl::field_iterator Field,
         Run = FieldEnd;
         continue;
       }
-      llvm::Type *Type =
-          Types.ConvertTypeForMem(Field->getType(), /*ForBitFields=*/true);
+      llvm::Type *Type = Types.ConvertTypeForMem(Field->getType());
       // If we don't have a run yet, or don't live within the previous run's
       // allocated storage then we allocate some storage and start a new run.
       if (Run == FieldEnd || BitOffset >= Tail) {
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index 9929c154e37b..05bf70e5cb22 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -257,7 +257,6 @@ TypeEvaluationKind CodeGenFunction::getEvaluationKind(QualType type) {
     case Type::Enum:
     case Type::ObjCObjectPointer:
     case Type::Pipe:
-    case Type::ExtInt:
       return TEK_Scalar;
 
     // Complexes.
@@ -2011,7 +2010,6 @@ void CodeGenFunction::EmitVariablyModifiedType(QualType type) {
     case Type::ObjCObject:
     case Type::ObjCInterface:
     case Type::ObjCObjectPointer:
-    case Type::ExtInt:
       llvm_unreachable("type class is never variably-modified!");
 
     case Type::Adjusted:
diff --git a/clang/lib/CodeGen/CodeGenTBAA.cpp b/clang/lib/CodeGen/CodeGenTBAA.cpp
index f4ebe6885675..8cc8c162dfbe 100644
--- a/clang/lib/CodeGen/CodeGenTBAA.cpp
+++ b/clang/lib/CodeGen/CodeGenTBAA.cpp
@@ -209,15 +209,6 @@ llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) {
     return createScalarTypeNode(OutName, getChar(), Size);
   }
 
-  if (const auto *EIT = dyn_cast<ExtIntType>(Ty)) {
-    SmallString<256> OutName;
-    llvm::raw_svector_ostream Out(OutName);
-    // Don't specify signed/unsigned since integer types can alias despite sign
-    // differences.
-    Out << "_ExtInt(" << EIT->getNumBits() << ')';
-    return createScalarTypeNode(OutName, getChar(), Size);
-  }
-
   // For now, handle any other kind of type conservatively.
   return getChar();
 }
diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp
index d6d84a3ff051..29adc2c7adb3 100644
--- a/clang/lib/CodeGen/CodeGenTypes.cpp
+++ b/clang/lib/CodeGen/CodeGenTypes.cpp
@@ -83,19 +83,19 @@ void CodeGenTypes::addRecordTypeName(const RecordDecl *RD,
 /// ConvertType in that it is used to convert to the memory representation for
 /// a type.  For example, the scalar representation for _Bool is i1, but the
 /// memory representation is usually i8 or i32, depending on the target.
-llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T, bool ForBitField) {
+llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T) {
   llvm::Type *R = ConvertType(T);
 
-  // If this is a bool type, or an ExtIntType in a bitfield representation,
-  // map this integer to the target-specified size.
-  if ((ForBitField && T->isExtIntType()) || R->isIntegerTy(1))
-    return llvm::IntegerType::get(getLLVMContext(),
-                                  (unsigned)Context.getTypeSize(T));
+  // If this is a non-bool type, don't map it.
+  if (!R->isIntegerTy(1))
+    return R;
 
-  // Else, don't map it.
-  return R;
+  // Otherwise, return an integer of the target-specified size.
+  return llvm::IntegerType::get(getLLVMContext(),
+                                (unsigned)Context.getTypeSize(T));
 }
 
+
 /// isRecordLayoutComplete - Return true if the specified type is already
 /// completely laid out.
 bool CodeGenTypes::isRecordLayoutComplete(const Type *Ty) const {
@@ -731,11 +731,6 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
     ResultType = CGM.getOpenCLRuntime().getPipeType(cast<PipeType>(Ty));
     break;
   }
-  case Type::ExtInt: {
-    const auto &EIT = cast<ExtIntType>(Ty);
-    ResultType = llvm::Type::getIntNTy(getLLVMContext(), EIT->getNumBits());
-    break;
-  }
   }
 
   assert(ResultType && "Didn't convert a type?");
diff --git a/clang/lib/CodeGen/CodeGenTypes.h b/clang/lib/CodeGen/CodeGenTypes.h
index 394e2fdf8d65..03102329507e 100644
--- a/clang/lib/CodeGen/CodeGenTypes.h
+++ b/clang/lib/CodeGen/CodeGenTypes.h
@@ -134,7 +134,7 @@ class CodeGenTypes {
   /// ConvertType in that it is used to convert to the memory representation for
   /// a type.  For example, the scalar representation for _Bool is i1, but the
   /// memory representation is usually i8 or i32, depending on the target.
-  llvm::Type *ConvertTypeForMem(QualType T, bool ForBitField = false);
+  llvm::Type *ConvertTypeForMem(QualType T);
 
   /// GetFunctionType - Get the LLVM function type for \arg Info.
   llvm::FunctionType *GetFunctionType(const CGFunctionInfo &Info);
diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp
index 4a591cf7aac5..c8a73c2757ab 100644
--- a/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -3219,7 +3219,6 @@ void ItaniumRTTIBuilder::BuildVTablePointer(const Type *Ty) {
     llvm_unreachable("Pipe types shouldn't get here");
 
   case Type::Builtin:
-  case Type::ExtInt:
   // GCC treats vector and complex types as fundamental types.
   case Type::Vector:
   case Type::ExtVector:
@@ -3473,10 +3472,7 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(
     llvm_unreachable("Undeduced type shouldn't get here");
 
   case Type::Pipe:
-    break;
-
-  case Type::ExtInt:
-    break;
+    llvm_unreachable("Pipe type shouldn't get here");
 
   case Type::ConstantArray:
   case Type::IncompleteArray:
diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp
index fe00199c1f8f..8bd7571f1242 100644
--- a/clang/lib/Parse/ParseDecl.cpp
+++ b/clang/lib/Parse/ParseDecl.cpp
@@ -2880,25 +2880,6 @@ void Parser::ParseAlignmentSpecifier(ParsedAttributes &Attrs,
                ParsedAttr::AS_Keyword, EllipsisLoc);
 }
 
-ExprResult Parser::ParseExtIntegerArgument() {
-  assert(Tok.is(tok::kw__ExtInt) && "Not an extended int type");
-  ConsumeToken();
-
-  BalancedDelimiterTracker T(*this, tok::l_paren);
-  if (T.expectAndConsume())
-    return ExprError();
-
-  ExprResult ER = ParseConstantExpression();
-  if (ER.isInvalid()) {
-    T.skipToEnd();
-    return ExprError();
-  }
-
-  if(T.consumeClose())
-    return ExprError();
-  return ER;
-}
-
 /// Determine whether we're looking at something that might be a declarator
 /// in a simple-declaration. If it can't possibly be a declarator, maybe
 /// diagnose a missing semicolon after a prior tag definition in the decl
@@ -3826,14 +3807,6 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
       isInvalid = DS.SetTypeSpecType(DeclSpec::TST_int, Loc, PrevSpec,
                                      DiagID, Policy);
       break;
-    case tok::kw__ExtInt: {
-      ExprResult ER = ParseExtIntegerArgument();
-      if (ER.isInvalid())
-        continue;
-      isInvalid = DS.SetExtIntType(Loc, ER.get(), PrevSpec, DiagID, Policy);
-      ConsumedEnd = PrevTokLocation;
-      break;
-    }
     case tok::kw___int128:
       isInvalid = DS.SetTypeSpecType(DeclSpec::TST_int128, Loc, PrevSpec,
                                      DiagID, Policy);
@@ -4917,7 +4890,6 @@ bool Parser::isKnownToBeTypeSpecifier(const Token &Tok) const {
   case tok::kw_char16_t:
   case tok::kw_char32_t:
   case tok::kw_int:
-  case tok::kw__ExtInt:
   case tok::kw_half:
   case tok::kw_float:
   case tok::kw_double:
@@ -4997,7 +4969,6 @@ bool Parser::isTypeSpecifierQualifier() {
   case tok::kw_char16_t:
   case tok::kw_char32_t:
   case tok::kw_int:
-  case tok::kw__ExtInt:
   case tok::kw_half:
   case tok::kw_float:
   case tok::kw_double:
@@ -5164,7 +5135,6 @@ bool Parser::isDeclarationSpecifier(bool DisambiguatingWithExpression) {
   case tok::kw_char32_t:
 
   case tok::kw_int:
-  case tok::kw__ExtInt:
   case tok::kw_half:
   case tok::kw_float:
   case tok::kw_double:
diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp
index 29e583fcb84e..0c6939b04319 100644
--- a/clang/lib/Parse/ParseExpr.cpp
+++ b/clang/lib/Parse/ParseExpr.cpp
@@ -1492,7 +1492,6 @@ ExprResult Parser::ParseCastExpression(CastParseKind ParseKind,
   case tok::kw_long:
   case tok::kw___int64:
   case tok::kw___int128:
-  case tok::kw__ExtInt:
   case tok::kw_signed:
   case tok::kw_unsigned:
   case tok::kw_half:
diff --git a/clang/lib/Parse/ParseExprCXX.cpp b/clang/lib/Parse/ParseExprCXX.cpp
index 32e9370b0e00..c5e895d090a5 100644
--- a/clang/lib/Parse/ParseExprCXX.cpp
+++ b/clang/lib/Parse/ParseExprCXX.cpp
@@ -2156,19 +2156,6 @@ void Parser::ParseCXXSimpleTypeSpecifier(DeclSpec &DS) {
     return;
   }
 
-  case tok::kw__ExtInt: {
-    ExprResult ER = ParseExtIntegerArgument();
-    if (ER.isInvalid())
-      DS.SetTypeSpecError();
-    else
-      DS.SetExtIntType(Loc, ER.get(), PrevSpec, DiagID, Policy);
-
-    // Do this here because we have already consumed the close paren.
-    DS.SetRangeEnd(PrevTokLocation);
-    DS.Finish(Actions, Policy);
-    return;
-  }
-
   // builtin types
   case tok::kw_short:
     DS.SetTypeSpecWidth(DeclSpec::TSW_short, Loc, PrevSpec, DiagID, Policy);
diff --git a/clang/lib/Parse/ParseTentative.cpp b/clang/lib/Parse/ParseTentative.cpp
index 61a82664bf71..529e3f321054 100644
--- a/clang/lib/Parse/ParseTentative.cpp
+++ b/clang/lib/Parse/ParseTentative.cpp
@@ -1141,7 +1141,6 @@ Parser::isExpressionOrTypeSpecifierSimple(tok::TokenKind Kind) {
   case tok::kw_half:
   case tok::kw_float:
   case tok::kw_int:
-  case tok::kw__ExtInt:
   case tok::kw_long:
   case tok::kw___int64:
   case tok::kw___int128:
@@ -1779,24 +1778,6 @@ Parser::isCXXDeclarationSpecifier(Parser::TPResult BracedCastResult,
   case tok::kw__Atomic:
     return TPResult::True;
 
-  case tok::kw__ExtInt: {
-    if (NextToken().isNot(tok::l_paren))
-      return TPResult::Error;
-    RevertingTentativeParsingAction PA(*this);
-    ConsumeToken();
-    ConsumeParen();
-
-    if (!SkipUntil(tok::r_paren, StopAtSemi))
-      return TPResult::Error;
-
-    if (Tok.is(tok::l_paren))
-      return TPResult::Ambiguous;
-
-    if (getLangOpts().CPlusPlus11 && Tok.is(tok::l_brace))
-      return BracedCastResult;
-
-    return TPResult::True;
-  }
   default:
     return TPResult::False;
   }
@@ -1829,7 +1810,6 @@ bool Parser::isCXXDeclarationSpecifierAType() {
   case tok::kw_bool:
   case tok::kw_short:
   case tok::kw_int:
-  case tok::kw__ExtInt:
   case tok::kw_long:
   case tok::kw___int64:
   case tok::kw___int128:
diff --git a/clang/lib/Sema/DeclSpec.cpp b/clang/lib/Sema/DeclSpec.cpp
index 276e35a3497e..ae4a78a4556d 100644
--- a/clang/lib/Sema/DeclSpec.cpp
+++ b/clang/lib/Sema/DeclSpec.cpp
@@ -360,7 +360,6 @@ bool Declarator::isDeclarationOfFunction() const {
     case TST_half:
     case TST_int:
     case TST_int128:
-    case TST_extint:
     case TST_struct:
     case TST_interface:
     case TST_union:
@@ -539,7 +538,6 @@ const char *DeclSpec::getSpecifierName(DeclSpec::TST T,
   case DeclSpec::TST_char32:      return "char32_t";
   case DeclSpec::TST_int:         return "int";
   case DeclSpec::TST_int128:      return "__int128";
-  case DeclSpec::TST_extint:      return "_ExtInt";
   case DeclSpec::TST_half:        return "half";
   case DeclSpec::TST_float:       return "float";
   case DeclSpec::TST_double:      return "double";
@@ -915,27 +913,6 @@ bool DeclSpec::SetTypeSpecError() {
   return false;
 }
 
-bool DeclSpec::SetExtIntType(SourceLocation KWLoc, Expr *BitsExpr,
-                             const char *&PrevSpec, unsigned &DiagID,
-                             const PrintingPolicy &Policy) {
-  assert(BitsExpr && "no expression provided!");
-  if (TypeSpecType == TST_error)
-    return false;
-
-  if (TypeSpecType != TST_unspecified) {
-    PrevSpec = DeclSpec::getSpecifierName((TST) TypeSpecType, Policy);
-    DiagID = diag::err_invalid_decl_spec_combination;
-    return true;
-  }
-
-  TypeSpecType = TST_extint;
-  ExprRep = BitsExpr;
-  TSTLoc = KWLoc;
-  TSTNameLoc = KWLoc;
-  TypeSpecOwned = false;
-  return false;
-}
-
 bool DeclSpec::SetTypeQual(TQ T, SourceLocation Loc, const char *&PrevSpec,
                            unsigned &DiagID, const LangOptions &Lang) {
   // Duplicates are permitted in C99 onwards, but are not permitted in C89 or
@@ -1217,7 +1194,7 @@ void DeclSpec::Finish(Sema &S, const PrintingPolicy &Policy) {
       TypeSpecType = TST_int; // unsigned -> unsigned int, signed -> signed int.
     else if (TypeSpecType != TST_int && TypeSpecType != TST_int128 &&
              TypeSpecType != TST_char && TypeSpecType != TST_wchar &&
-             !IsFixedPointType && TypeSpecType != TST_extint) {
+             !IsFixedPointType) {
       S.Diag(TSSLoc, diag::err_invalid_sign_spec)
         << getSpecifierName((TST)TypeSpecType, Policy);
       // signed double -> double.
@@ -1264,8 +1241,7 @@ void DeclSpec::Finish(Sema &S, const PrintingPolicy &Policy) {
                               S.getLocForEndOfToken(getTypeSpecComplexLoc()),
                                                  " double");
       TypeSpecType = TST_double;   // _Complex -> _Complex double.
-    } else if (TypeSpecType == TST_int || TypeSpecType == TST_char ||
-               TypeSpecType == TST_extint) {
+    } else if (TypeSpecType == TST_int || TypeSpecType == TST_char) {
       // Note that this intentionally doesn't include _Complex _Bool.
       if (!S.getLangOpts().CPlusPlus)
         S.Diag(TSTLoc, diag::ext_integer_complex);
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 037e9c332412..e7bc4994e540 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -9793,9 +9793,6 @@ struct IntRange {
                         false/*NonNegative*/);
     }
 
-    if (const auto *EIT = dyn_cast<ExtIntType>(T))
-      return IntRange(EIT->getNumBits(), EIT->isUnsigned());
-
     const BuiltinType *BT = cast<BuiltinType>(T);
     assert(BT->isInteger());
 
@@ -9819,9 +9816,6 @@ struct IntRange {
     if (const EnumType *ET = dyn_cast<EnumType>(T))
       T = C.getCanonicalType(ET->getDecl()->getIntegerType()).getTypePtr();
 
-    if (const auto *EIT = dyn_cast<ExtIntType>(T))
-      return IntRange(EIT->getNumBits(), EIT->isUnsigned());
-
     const BuiltinType *BT = cast<BuiltinType>(T);
     assert(BT->isInteger());
 
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index 27c8365ab8be..ed082dbaf986 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -14811,16 +14811,12 @@ bool Sema::CheckEnumUnderlyingType(TypeSourceInfo *TI) {
   if (T->isDependentType())
     return false;
 
-  // This doesn't use 'isIntegralType' despite the error message mentioning
-  // integral type because isIntegralType would also allow enum types in C.
   if (const BuiltinType *BT = T->getAs<BuiltinType>())
     if (BT->isInteger())
       return false;
 
-  if (T->isExtIntType())
-    return false;
-
-  return Diag(UnderlyingLoc, diag::err_enum_invalid_underlying) << T;
+  Diag(UnderlyingLoc, diag::err_enum_invalid_underlying) << T;
+  return true;
 }
 
 /// Check whether this is a valid redeclaration of a previous enumeration.
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 869ae5cbc40b..3205b4472db2 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -4087,9 +4087,8 @@ void Sema::AddModeAttr(Decl *D, const AttributeCommonInfo &CI,
     Diag(AttrLoc, diag::err_enum_mode_vector_type) << Name << CI.getRange();
     return;
   }
-  bool IntegralOrAnyEnumType = (OldElemTy->isIntegralOrEnumerationType() &&
-                                !OldElemTy->isExtIntType()) ||
-                               OldElemTy->getAs<EnumType>();
+  bool IntegralOrAnyEnumType =
+      OldElemTy->isIntegralOrEnumerationType() || OldElemTy->getAs<EnumType>();
 
   if (!OldElemTy->getAs<BuiltinType>() && !OldElemTy->isComplexType() &&
       !IntegralOrAnyEnumType)
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index fbb5d4b05bbf..31d694857e9c 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -1482,11 +1482,6 @@ QualType Sema::UsualArithmeticConversions(ExprResult &LHS, ExprResult &RHS,
   if (LHSType == RHSType)
     return LHSType;
 
-  // ExtInt types aren't subject to conversions between them or normal integers,
-  // so this fails. 
-  if(LHSType->isExtIntType() || RHSType->isExtIntType())
-    return QualType();
-
   // At this point, we have two different arithmetic types.
 
   // Diagnose attempts to convert between __float128 and long double where
@@ -4266,7 +4261,6 @@ static void captureVariablyModifiedType(ASTContext &Context, QualType T,
     case Type::ObjCObjectPointer:
     case Type::ObjCTypeParam:
     case Type::Pipe:
-    case Type::ExtInt:
       llvm_unreachable("type class is never variably-modified!");
     case Type::Adjusted:
       T = cast<AdjustedType>(Ty)->getOriginalType();
@@ -10437,19 +10431,14 @@ static void DiagnoseBadShiftValues(Sema& S, ExprResult &LHS, ExprResult &RHS,
                             << RHS.get()->getSourceRange());
     return;
   }
-
-  QualType LHSExprType = LHS.get()->getType();
-  uint64_t LeftSize = LHSExprType->isExtIntType()
-                          ? S.Context.getIntWidth(LHSExprType)
-                          : S.Context.getTypeSize(LHSExprType);
-  llvm::APInt LeftBits(Right.getBitWidth(), LeftSize);
+  llvm::APInt LeftBits(Right.getBitWidth(),
+                       S.Context.getTypeSize(LHS.get()->getType()));
   if (Right.uge(LeftBits)) {
     S.DiagRuntimeBehavior(Loc, RHS.get(),
                           S.PDiag(diag::warn_shift_gt_typewidth)
                             << RHS.get()->getSourceRange());
     return;
   }
-
   if (Opc != BO_Shl)
     return;
 
diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp
index 08d29fa51e6e..82a197196576 100644
--- a/clang/lib/Sema/SemaLookup.cpp
+++ b/clang/lib/Sema/SemaLookup.cpp
@@ -2967,7 +2967,6 @@ addAssociatedClassesAndNamespaces(AssociatedLookup &Result, QualType Ty) {
     case Type::Vector:
     case Type::ExtVector:
     case Type::Complex:
-    case Type::ExtInt:
       break;
 
     // Non-deduced auto types only get here for error cases.
diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index 7bd12913aec4..f425ec742b7b 100755
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -5998,15 +5998,6 @@ bool UnnamedLocalNoLinkageFinder::VisitPipeType(const PipeType* T) {
   return false;
 }
 
-bool UnnamedLocalNoLinkageFinder::VisitExtIntType(const ExtIntType *T) {
-  return false;
-}
-
-bool UnnamedLocalNoLinkageFinder::VisitDependentExtIntType(
-    const DependentExtIntType *T) {
-  return false;
-}
-
 bool UnnamedLocalNoLinkageFinder::VisitTagDecl(const TagDecl *Tag) {
   if (Tag->getDeclContext()->isFunctionOrMethod()) {
     S.Diag(SR.getBegin(),
@@ -6900,9 +6891,7 @@ ExprResult Sema::CheckTemplateArgument(NonTypeTemplateParmDecl *Param,
       QualType IntegerType = ParamType;
       if (const EnumType *Enum = IntegerType->getAs<EnumType>())
         IntegerType = Enum->getDecl()->getIntegerType();
-      Value = Value.extOrTrunc(IntegerType->isExtIntType()
-                                   ? Context.getIntWidth(IntegerType)
-                                   : Context.getTypeSize(IntegerType));
+      Value = Value.extOrTrunc(Context.getTypeSize(IntegerType));
 
       Converted = TemplateArgument(Context, Value,
                                    Context.getCanonicalType(ParamType));
@@ -6996,9 +6985,7 @@ ExprResult Sema::CheckTemplateArgument(NonTypeTemplateParmDecl *Param,
 
       // Coerce the template argument's value to the value it will have
       // based on the template parameter's type.
-      unsigned AllowedBits = IntegerType->isExtIntType()
-                                 ? Context.getIntWidth(IntegerType)
-                                 : Context.getTypeSize(IntegerType);
+      unsigned AllowedBits = Context.getTypeSize(IntegerType);
       if (Value.getBitWidth() != AllowedBits)
         Value = Value.extOrTrunc(AllowedBits);
       Value.setIsSigned(IntegerType->isSignedIntegerOrEnumerationType());
diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp
index e1d438fcb724..8e3c61819571 100644
--- a/clang/lib/Sema/SemaTemplateDeduction.cpp
+++ b/clang/lib/Sema/SemaTemplateDeduction.cpp
@@ -1515,7 +1515,6 @@ DeduceTemplateArgumentsByTypeMatch(Sema &S,
     case Type::ObjCObject:
     case Type::ObjCInterface:
     case Type::ObjCObjectPointer:
-    case Type::ExtInt:
       if (TDF & TDF_SkipNonDependent)
         return Sema::TDK_Success;
 
@@ -2107,33 +2106,6 @@ DeduceTemplateArgumentsByTypeMatch(Sema &S,
 
       return Sema::TDK_NonDeducedMismatch;
     }
-    case Type::DependentExtInt: {
-      const auto *IntParam = cast<DependentExtIntType>(Param);
-
-      if (const auto *IntArg = dyn_cast<ExtIntType>(Arg)){
-        if (IntParam->isUnsigned() != IntArg->isUnsigned())
-          return Sema::TDK_NonDeducedMismatch;
-
-        NonTypeTemplateParmDecl *NTTP =
-            getDeducedParameterFromExpr(Info, IntParam->getNumBitsExpr());
-        if (!NTTP)
-          return Sema::TDK_Success;
-
-        llvm::APSInt ArgSize(S.Context.getTypeSize(S.Context.IntTy), false);
-        ArgSize = IntArg->getNumBits();
-
-        return DeduceNonTypeTemplateArgument(S, TemplateParams, NTTP, ArgSize,
-                                             S.Context.IntTy, true, Info,
-                                             Deduced);
-      }
-
-      if (const auto *IntArg = dyn_cast<DependentExtIntType>(Arg)) {
-        if (IntParam->isUnsigned() != IntArg->isUnsigned())
-          return Sema::TDK_NonDeducedMismatch;
-        return Sema::TDK_Success;
-      }
-      return Sema::TDK_NonDeducedMismatch;
-    }
 
     case Type::TypeOfExpr:
     case Type::TypeOf:
@@ -5878,11 +5850,6 @@ MarkUsedTemplateParameters(ASTContext &Ctx, QualType T,
                                cast<DeducedType>(T)->getDeducedType(),
                                OnlyDeduced, Depth, Used);
     break;
-  case Type::DependentExtInt:
-    MarkUsedTemplateParameters(Ctx,
-                               cast<DependentExtIntType>(T)->getNumBitsExpr(),
-                               OnlyDeduced, Depth, Used);
-    break;
 
   // None of these types have any template parameters in them.
   case Type::Builtin:
@@ -5895,7 +5862,6 @@ MarkUsedTemplateParameters(ASTContext &Ctx, QualType T,
   case Type::ObjCObjectPointer:
   case Type::UnresolvedUsing:
   case Type::Pipe:
-  case Type::ExtInt:
 #define TYPE(Class, Base)
 #define ABSTRACT_TYPE(Class, Base)
 #define DEPENDENT_TYPE(Class, Base)
diff --git a/clang/lib/Sema/SemaTemplateVariadic.cpp b/clang/lib/Sema/SemaTemplateVariadic.cpp
index 466cb084e7c3..825b062c0054 100644
--- a/clang/lib/Sema/SemaTemplateVariadic.cpp
+++ b/clang/lib/Sema/SemaTemplateVariadic.cpp
@@ -847,7 +847,6 @@ bool Sema::containsUnexpandedParameterPacks(Declarator &D) {
 
   case TST_typeofExpr:
   case TST_decltype:
-  case TST_extint:
     if (DS.getRepAsExpr() &&
         DS.getRepAsExpr()->containsUnexpandedParameterPack())
       return true;
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index 4ecd36209e5b..53e4366f673b 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -35,7 +35,6 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringSwitch.h"
-#include "llvm/IR/DerivedTypes.h"
 #include "llvm/Support/ErrorHandling.h"
 
 using namespace clang;
@@ -1442,15 +1441,6 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) {
     }
     break;
   }
-  case DeclSpec::TST_extint: {
-    Result = S.BuildExtIntType(DS.getTypeSpecSign() == TSS_unsigned,
-                               DS.getRepAsExpr(), DS.getBeginLoc());
-    if (Result.isNull()) {
-      Result = Context.IntTy;
-      declarator.setInvalidType(true);
-    }
-    break;
-  }
   case DeclSpec::TST_accum: {
     switch (DS.getTypeSpecWidth()) {
       case DeclSpec::TSW_short:
@@ -2170,45 +2160,6 @@ QualType Sema::BuildWritePipeType(QualType T, SourceLocation Loc) {
   return Context.getWritePipeType(T);
 }
 
-/// Build a extended int type.
-///
-/// \param IsUnsigned Boolean representing the signedness of the type.
-///
-/// \param BitWidth Size of this int type in bits, or an expression representing
-/// that.
-///
-/// \param Loc Location of the keyword.
-QualType Sema::BuildExtIntType(bool IsUnsigned, Expr *BitWidth,
-                               SourceLocation Loc) {
-  if (BitWidth->isInstantiationDependent())
-    return Context.getDependentExtIntType(IsUnsigned, BitWidth);
-
-  llvm::APSInt Bits(32);
-  ExprResult ICE = VerifyIntegerConstantExpression(BitWidth, &Bits);
-
-  if (ICE.isInvalid())
-    return QualType();
-
-  int64_t NumBits = Bits.getSExtValue();
-  if (!IsUnsigned && NumBits < 2) {
-    Diag(Loc, diag::err_ext_int_bad_size) << 0;
-    return QualType();
-  }
-
-  if (IsUnsigned && NumBits < 1) {
-    Diag(Loc, diag::err_ext_int_bad_size) << 1;
-    return QualType();
-  }
-
-  if (NumBits > llvm::IntegerType::MAX_INT_BITS) {
-    Diag(Loc, diag::err_ext_int_max_size) << IsUnsigned
-                                          << llvm::IntegerType::MAX_INT_BITS;
-    return QualType();
-  }
-
-  return Context.getExtIntType(IsUnsigned, NumBits);
-}
-
 /// Check whether the specified array size makes the array type a VLA.  If so,
 /// return true, if not, return the size of the array in SizeVal.
 static bool isArraySizeVLA(Sema &S, Expr *ArraySize, llvm::APSInt &SizeVal) {
@@ -5823,14 +5774,6 @@ namespace {
       TL.getValueLoc().initializeFullCopy(TInfo->getTypeLoc());
     }
 
-    void VisitExtIntTypeLoc(ExtIntTypeLoc TL) {
-      TL.setNameLoc(DS.getTypeSpecTypeLoc());
-    }
-
-    void VisitDependentExtIntTypeLoc(DependentExtIntTypeLoc TL) {
-      TL.setNameLoc(DS.getTypeSpecTypeLoc());
-    }
-
     void VisitTypeLoc(TypeLoc TL) {
       // FIXME: add other typespec types and change this to an assert.
       TL.initialize(Context, DS.getTypeSpecTypeLoc());
@@ -5957,9 +5900,6 @@ namespace {
       assert(Chunk.Kind == DeclaratorChunk::Pipe);
       TL.setKWLoc(Chunk.Loc);
     }
-    void VisitExtIntTypeLoc(ExtIntTypeLoc TL) {
-      TL.setNameLoc(Chunk.Loc);
-    }
     void VisitMacroQualifiedTypeLoc(MacroQualifiedTypeLoc TL) {
       TL.setExpansionLoc(Chunk.Loc);
     }
@@ -8691,12 +8631,6 @@ QualType Sema::BuildAtomicType(QualType T, SourceLocation Loc) {
     else if (!T.isTriviallyCopyableType(Context))
       // Some other non-trivially-copyable type (probably a C++ class)
       DisallowedKind = 7;
-    else if (auto *ExtTy = T->getAs<ExtIntType>()) {
-      if (ExtTy->getNumBits() < 8)
-        DisallowedKind = 8;
-      else if (!llvm::isPowerOf2_32(ExtTy->getNumBits()))
-        DisallowedKind = 9;
-    }
 
     if (DisallowedKind != -1) {
       Diag(Loc, diag::err_atomic_specifier_bad_type) << DisallowedKind << T;
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index abde968bed8c..87b07897ec28 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -1183,14 +1183,6 @@ class TreeTransform {
   QualType RebuildPipeType(QualType ValueType, SourceLocation KWLoc,
                            bool isReadPipe);
 
-   /// Build an extended int given its value type.
-  QualType RebuildExtIntType(bool IsUnsigned, unsigned NumBits,
-                             SourceLocation Loc);
-
-  /// Build a dependent extended int given its value type.
-  QualType RebuildDependentExtIntType(bool IsUnsigned, Expr *NumBitsExpr,
-                                      SourceLocation Loc);
-
   /// Build a new template name given a nested name specifier, a flag
   /// indicating whether the "template" keyword was provided, and the template
   /// that the template name refers to.
@@ -6128,57 +6120,6 @@ QualType TreeTransform<Derived>::TransformPipeType(TypeLocBuilder &TLB,
   return Result;
 }
 
-template <typename Derived>
-QualType TreeTransform<Derived>::TransformExtIntType(TypeLocBuilder &TLB,
-                                                     ExtIntTypeLoc TL) {
-  const ExtIntType *EIT = TL.getTypePtr();
-  QualType Result = TL.getType();
-
-  if (getDerived().AlwaysRebuild()) {
-    Result = getDerived().RebuildExtIntType(EIT->isUnsigned(),
-                                            EIT->getNumBits(), TL.getNameLoc());
-    if (Result.isNull())
-      return QualType();
-  }
-
-  ExtIntTypeLoc NewTL = TLB.push<ExtIntTypeLoc>(Result);
-  NewTL.setNameLoc(TL.getNameLoc());
-  return Result;
-}
-
-template <typename Derived>
-QualType TreeTransform<Derived>::TransformDependentExtIntType(
-    TypeLocBuilder &TLB, DependentExtIntTypeLoc TL) {
-  const DependentExtIntType *EIT = TL.getTypePtr();
-
-  EnterExpressionEvaluationContext Unevaluated(
-      SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated);
-  ExprResult BitsExpr = getDerived().TransformExpr(EIT->getNumBitsExpr());
-  BitsExpr = SemaRef.ActOnConstantExpression(BitsExpr);
-
-  if (BitsExpr.isInvalid())
-    return QualType();
-
-  QualType Result = TL.getType();
-
-  if (getDerived().AlwaysRebuild() || BitsExpr.get() != EIT->getNumBitsExpr()) {
-    Result = getDerived().RebuildDependentExtIntType(
-        EIT->isUnsigned(), BitsExpr.get(), TL.getNameLoc());
-
-    if (Result.isNull())
-      return QualType();
-  }
-
-  if (isa<DependentExtIntType>(Result)) {
-    DependentExtIntTypeLoc NewTL = TLB.push<DependentExtIntTypeLoc>(Result);
-    NewTL.setNameLoc(TL.getNameLoc());
-  } else {
-    ExtIntTypeLoc NewTL = TLB.push<ExtIntTypeLoc>(Result);
-    NewTL.setNameLoc(TL.getNameLoc());
-  }
-  return Result;
-}
-
   /// Simple iterator that traverses the template arguments in a
   /// container that provides a \c getArgLoc() member function.
   ///
@@ -13841,23 +13782,6 @@ QualType TreeTransform<Derived>::RebuildPipeType(QualType ValueType,
                     : SemaRef.BuildWritePipeType(ValueType, KWLoc);
 }
 
-template <typename Derived>
-QualType TreeTransform<Derived>::RebuildExtIntType(bool IsUnsigned,
-                                                   unsigned NumBits,
-                                                   SourceLocation Loc) {
-  llvm::APInt NumBitsAP(SemaRef.Context.getIntWidth(SemaRef.Context.IntTy),
-                        NumBits, true);
-  IntegerLiteral *Bits = IntegerLiteral::Create(SemaRef.Context, NumBitsAP,
-                                                SemaRef.Context.IntTy, Loc);
-  return SemaRef.BuildExtIntType(IsUnsigned, Bits, Loc);
-}
-
-template <typename Derived>
-QualType TreeTransform<Derived>::RebuildDependentExtIntType(
-    bool IsUnsigned, Expr *NumBitsExpr, SourceLocation Loc) {
-  return SemaRef.BuildExtIntType(IsUnsigned, NumBitsExpr, Loc);
-}
-
 template<typename Derived>
 TemplateName
 TreeTransform<Derived>::RebuildTemplateName(CXXScopeSpec &SS,
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index 62dd233aab3f..f0e9bbd4dcea 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -6719,15 +6719,6 @@ void TypeLocReader::VisitPipeTypeLoc(PipeTypeLoc TL) {
   TL.setKWLoc(readSourceLocation());
 }
 
-void TypeLocReader::VisitExtIntTypeLoc(clang::ExtIntTypeLoc TL) {
-  TL.setNameLoc(readSourceLocation());
-}
-void TypeLocReader::VisitDependentExtIntTypeLoc(
-    clang::DependentExtIntTypeLoc TL) {
-  TL.setNameLoc(readSourceLocation());
-}
-
-
 void ASTRecordReader::readTypeLoc(TypeLoc TL) {
   TypeLocReader TLR(*this);
   for (; !TL.isNull(); TL = TL.getNextTypeLoc())
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index 18a92aaadd52..c8ce3edda60b 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -476,14 +476,6 @@ void TypeLocWriter::VisitPipeTypeLoc(PipeTypeLoc TL) {
   Record.AddSourceLocation(TL.getKWLoc());
 }
 
-void TypeLocWriter::VisitExtIntTypeLoc(clang::ExtIntTypeLoc TL) {
-  Record.AddSourceLocation(TL.getNameLoc());
-}
-void TypeLocWriter::VisitDependentExtIntTypeLoc(
-    clang::DependentExtIntTypeLoc TL) {
-  Record.AddSourceLocation(TL.getNameLoc());
-}
-
 void ASTWriter::WriteTypeAbbrevs() {
   using namespace llvm;
 
diff --git a/clang/test/CodeGen/ext-int-sanitizer.cpp b/clang/test/CodeGen/ext-int-sanitizer.cpp
deleted file mode 100644
index ddf3180e1a1b..000000000000
--- a/clang/test/CodeGen/ext-int-sanitizer.cpp
+++ /dev/null
@@ -1,265 +0,0 @@
-// RUN: %clang_cc1 -triple x86_64-gnu-linux -fsanitize=array-bounds,enum,float-cast-overflow,integer-divide-by-zero,implicit-unsigned-integer-truncation,implicit-signed-integer-truncation,implicit-integer-sign-change,unsigned-integer-overflow,signed-integer-overflow,shift-base,shift-exponent -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s 
-
-
-// CHECK: define void @_Z6BoundsRA10_KiU7_ExtIntILi15EEi
-void Bounds(const int (&Array)[10], _ExtInt(15) Index) {
-  int I1 = Array[Index];
-  // CHECK: %[[SEXT:.+]] = sext i15 %{{.+}} to i64
-  // CHECK: %[[CMP:.+]] = icmp ult i64 %[[SEXT]], 10
-  // CHECK: br i1 %[[CMP]]
-  // CHECK: call void @__ubsan_handle_out_of_bounds
-}
-
-// CHECK: define void @_Z4Enumv
-void Enum() {
-  enum E1 { e1a = 0, e1b = 127 }
-  e1;
-  enum E2 { e2a = -1, e2b = 64 }
-  e2;
-  enum E3 { e3a = (1u << 31) - 1 }
-  e3;
-
-  _ExtInt(34) a = e1;
-  // CHECK: %[[E1:.+]] = icmp ule i32 %{{.*}}, 127
-  // CHECK: br i1 %[[E1]]
-  // CHECK: call void @__ubsan_handle_load_invalid_value_abort
-  _ExtInt(34) b = e2;
-  // CHECK: %[[E2HI:.*]] = icmp sle i32 {{.*}}, 127
-  // CHECK: %[[E2LO:.*]] = icmp sge i32 {{.*}}, -128
-  // CHECK: %[[E2:.*]] = and i1 %[[E2HI]], %[[E2LO]]
-  // CHECK: br i1 %[[E2]]
-  // CHECK: call void @__ubsan_handle_load_invalid_value_abort
-  _ExtInt(34) c = e3;
-  // CHECK: %[[E3:.*]] = icmp ule i32 {{.*}}, 2147483647
-  // CHECK: br i1 %[[E3]]
-  // CHECK: call void @__ubsan_handle_load_invalid_value_abort
-}
-
-// CHECK: define void @_Z13FloatOverflowfd
-void FloatOverflow(float f, double d) {
-  _ExtInt(10) E = f;
-  // CHECK: fcmp ogt float %{{.+}}, -5.130000e+02
-  // CHECK: fcmp olt float %{{.+}}, 5.120000e+02
-  _ExtInt(10) E2 = d;
-  // CHECK: fcmp ogt double %{{.+}}, -5.130000e+02
-  // CHECK: fcmp olt double %{{.+}}, 5.120000e+02
-  _ExtInt(7) E3 = f;
-  // CHECK: fcmp ogt float %{{.+}}, -6.500000e+01
-  // CHECK: fcmp olt float %{{.+}}, 6.400000e+01
-  _ExtInt(7) E4 = d;
-  // CHECK: fcmp ogt double %{{.+}}, -6.500000e+01
-  // CHECK: fcmp olt double %{{.+}}, 6.400000e+01
-}
-
-// CHECK: define void @_Z14UIntTruncationU7_ExtIntILi35EEjjy
-void UIntTruncation(unsigned _ExtInt(35) E, unsigned int i, unsigned long long ll) {
-
-  i = E;
-  // CHECK: %[[LOADE:.+]] = load i35
-  // CHECK: %[[CONV:.+]] = trunc i35 %[[LOADE]] to i32
-  // CHECK: %[[EXT:.+]] = zext i32 %[[CONV]] to i35
-  // CHECK: %[[CHECK:.+]] = icmp eq i35 %[[EXT]], %[[LOADE]]
-  // CHECK: br i1 %[[CHECK]]
-  // CHECK: call void @__ubsan_handle_implicit_conversion_abort
-
-  E = ll;
-  // CHECK: %[[LOADLL:.+]] = load i64
-  // CHECK: %[[CONV:.+]] = trunc i64 %[[LOADLL]] to i35
-  // CHECK: %[[EXT:.+]] = zext i35 %[[CONV]] to i64
-  // CHECK: %[[CHECK:.+]] = icmp eq i64 %[[EXT]], %[[LOADLL]]
-  // CHECK: br i1 %[[CHECK]]
-  // CHECK: call void @__ubsan_handle_implicit_conversion_abort
-}
-
-// CHECK: define void @_Z13IntTruncationU7_ExtIntILi35EEiU7_ExtIntILi42EEjij
-void IntTruncation(_ExtInt(35) E, unsigned _ExtInt(42) UE, int i, unsigned j) {
-
-  j = E;
-  // CHECK: %[[LOADE:.+]] = load i35
-  // CHECK: %[[CONV:.+]] = trunc i35 %[[LOADE]] to i32
-  // CHECK: %[[EXT:.+]] = zext i32 %[[CONV]] to i35
-  // CHECK: %[[CHECK:.+]] = icmp eq i35 %[[EXT]], %[[LOADE]]
-  // CHECK: br i1 %[[CHECK]]
-  // CHECK: call void @__ubsan_handle_implicit_conversion_abort
-
-  j = UE;
-  // CHECK: %[[LOADUE:.+]] = load i42
-  // CHECK: %[[CONV:.+]] = trunc i42 %[[LOADUE]] to i32
-  // CHECK: %[[EXT:.+]] = zext i32 %[[CONV]] to i42
-  // CHECK: %[[CHECK:.+]] = icmp eq i42 %[[EXT]], %[[LOADUE]]
-  // CHECK: br i1 %[[CHECK]]
-  // CHECK: call void @__ubsan_handle_implicit_conversion_abort
-
-  // Note: also triggers sign change check.
-  i = UE;
-  // CHECK: %[[LOADUE:.+]] = load i42
-  // CHECK: %[[CONV:.+]] = trunc i42 %[[LOADUE]] to i32
-  // CHECK: %[[NEG:.+]] = icmp slt i32 %[[CONV]], 0
-  // CHECK: %[[SIGNCHECK:.+]] = icmp eq i1 false, %[[NEG]]
-  // CHECK: %[[EXT:.+]] = sext i32 %[[CONV]] to i42
-  // CHECK: %[[CHECK:.+]] = icmp eq i42 %[[EXT]], %[[LOADUE]]
-  // CHECK: %[[CHECKBOTH:.+]] = and i1 %[[SIGNCHECK]], %[[CHECK]]
-  // CHECK: br i1 %[[CHECKBOTH]]
-  // CHECK: call void @__ubsan_handle_implicit_conversion_abort
-
-  // Note: also triggers sign change check.
-  E = UE;
-  // CHECK: %[[LOADUE:.+]] = load i42
-  // CHECK: %[[CONV:.+]] = trunc i42 %[[LOADUE]] to i35
-  // CHECK: %[[NEG:.+]] = icmp slt i35 %[[CONV]], 0
-  // CHECK: %[[SIGNCHECK:.+]] = icmp eq i1 false, %[[NEG]]
-  // CHECK: %[[EXT:.+]] = sext i35 %[[CONV]] to i42
-  // CHECK: %[[CHECK:.+]] = icmp eq i42 %[[EXT]], %[[LOADUE]]
-  // CHECK: %[[CHECKBOTH:.+]] = and i1 %[[SIGNCHECK]], %[[CHECK]]
-  // CHECK: br i1 %[[CHECKBOTH]]
-  // CHECK: call void @__ubsan_handle_implicit_conversion_abort
-}
-
-// CHECK: define void @_Z15SignChangeCheckU7_ExtIntILi39EEjU7_ExtIntILi39EEi
-void SignChangeCheck(unsigned _ExtInt(39) UE, _ExtInt(39) E) {
-  UE = E;
-  // CHECK: %[[LOADE:.+]] = load i39
-  // CHECK: %[[NEG:.+]] = icmp slt i39 %[[LOADE]], 0
-  // CHECK: %[[SIGNCHECK:.+]] = icmp eq i1 %[[NEG]], false
-  // CHECK: br i1 %[[SIGNCHECK]]
-  // CHECK: call void @__ubsan_handle_implicit_conversion_abort
-
-
-  E = UE;
-  // CHECK: %[[LOADUE:.+]] = load i39
-  // CHECK: %[[NEG:.+]] = icmp slt i39 %[[LOADUE]], 0
-  // CHECK: %[[SIGNCHECK:.+]] = icmp eq i1 false, %[[NEG]]
-  // CHECK: br i1 %[[SIGNCHECK]]
-  // CHECK: call void @__ubsan_handle_implicit_conversion_abort
-}
-
-// CHECK: define void @_Z9DivByZeroU7_ExtIntILi11EEii
-void DivByZero(_ExtInt(11) E, int i) {
-
-  // Also triggers signed integer overflow.
-  E / E;
-  // CHECK: %[[E:.+]] = load i11, i11*
-  // CHECK: %[[E2:.+]] = load i11, i11*
-  // CHECK: %[[NEZERO:.+]] = icmp ne i11 %[[E2]], 0
-  // CHECK: %[[NEMIN:.+]] = icmp ne i11 %[[E]], -1024
-  // CHECK: %[[NENEG1:.+]] = icmp ne i11 %[[E2]], -1
-  // CHECK: %[[OR:.+]] = or i1 %[[NEMIN]], %[[NENEG1]]
-  // CHECK: %[[AND:.+]] = and i1 %[[NEZERO]], %[[OR]]
-  // CHECK: br i1 %[[AND]]
-  // CHECK: call void @__ubsan_handle_divrem_overflow_abort
-}
-
-// TODO:
-//-fsanitize=shift: (shift-base, shift-exponent) Shift operators where the amount shifted is greater or equal to the promoted bit-width of the left hand side or less than zero, or where the left hand side is negative. For a signed left shift, also checks for signed overflow in C, and for unsigned overflow in C++. You can use -fsanitize=shift-base or -fsanitize=shift-exponent to check only left-hand side or right-hand side of shift operation, respectively.
-// CHECK: define void @_Z6ShiftsU7_ExtIntILi9EEi
-void Shifts(_ExtInt(9) E) {
-  E >> E;
-  // CHECK: %[[LHSE:.+]] = load i9, i9*
-  // CHECK: %[[RHSE:.+]] = load i9, i9*
-  // CHECK: %[[CMP:.+]] = icmp ule i9 %[[RHSE]], 8
-  // CHECK: br i1 %[[CMP]]
-  // CHECK: call void @__ubsan_handle_shift_out_of_bounds_abort
-
-  E << E;
-  // CHECK: %[[LHSE:.+]] = load i9, i9*
-  // CHECK: %[[RHSE:.+]] = load i9, i9*
-  // CHECK: %[[CMP:.+]] = icmp ule i9 %[[RHSE]], 8
-  // CHECK: br i1 %[[CMP]]
-  // CHECK: %[[ZEROS:.+]] = sub nuw nsw i9 8, %[[RHSE]]
-  // CHECK: %[[CHECK:.+]] = lshr i9 %[[LHSE]], %[[ZEROS]]
-  // CHECK: %[[SKIPSIGN:.+]] = lshr i9 %[[CHECK]], 1
-  // CHECK: %[[CHECK:.+]] = icmp eq i9 %[[SKIPSIGN]]
-  // CHECK: %[[PHI:.+]] = phi i1 [ true, %{{.+}} ], [ %[[CHECK]], %{{.+}} ]
-  // CHECK: and i1 %[[CMP]], %[[PHI]]
-  // CHECK: call void @__ubsan_handle_shift_out_of_bounds_abort
-}
-
-// CHECK: define void @_Z21SignedIntegerOverflowU7_ExtIntILi93EEiU7_ExtIntILi4EEiU7_ExtIntILi31EEi
-void SignedIntegerOverflow(_ExtInt(93) BiggestE,
-                           _ExtInt(4) SmallestE,
-                           _ExtInt(31) JustRightE) {
-  BiggestE + BiggestE;
-  // CHECK: %[[LOAD1:.+]] = load i93, i93*
-  // CHECK: %[[LOAD2:.+]] = load i93, i93*
-  // CHECK: %[[OFCALL:.+]] = call { i93, i1 } @llvm.sadd.with.overflow.i93(i93 %[[LOAD1]], i93 %[[LOAD2]])
-  // CHECK: %[[EXRESULT:.+]] = extractvalue { i93, i1 } %[[OFCALL]], 0
-  // CHECK: %[[OFRESULT:.+]] = extractvalue { i93, i1 } %[[OFCALL]], 1
-  // CHECK: %[[CHECK:.+]] = xor i1 %[[OFRESULT]], true
-  // CHECK: br i1 %[[CHECK]]
-  // CHECK: call void @__ubsan_handle_add_overflow_abort
-
-  SmallestE - SmallestE;
-  // CHECK: %[[LOAD1:.+]] = load i4, i4*
-  // CHECK: %[[LOAD2:.+]] = load i4, i4*
-  // CHECK: %[[OFCALL:.+]] = call { i4, i1 } @llvm.ssub.with.overflow.i4(i4 %[[LOAD1]], i4 %[[LOAD2]])
-  // CHECK: %[[EXRESULT:.+]] = extractvalue { i4, i1 } %[[OFCALL]], 0
-  // CHECK: %[[OFRESULT:.+]] = extractvalue { i4, i1 } %[[OFCALL]], 1
-  // CHECK: %[[CHECK:.+]] = xor i1 %[[OFRESULT]], true
-  // CHECK: br i1 %[[CHECK]]
-  // CHECK: call void @__ubsan_handle_sub_overflow_abort
-
-  JustRightE * JustRightE;
-  // CHECK: %[[LOAD1:.+]] = load i31, i31*
-  // CHECK: %[[LOAD2:.+]] = load i31, i31*
-  // CHECK: %[[OFCALL:.+]] = call { i31, i1 } @llvm.smul.with.overflow.i31(i31 %[[LOAD1]], i31 %[[LOAD2]])
-  // CHECK: %[[EXRESULT:.+]] = extractvalue { i31, i1 } %[[OFCALL]], 0
-  // CHECK: %[[OFRESULT:.+]] = extractvalue { i31, i1 } %[[OFCALL]], 1
-  // CHECK: %[[CHECK:.+]] = xor i1 %[[OFRESULT]], true
-  // CHECK: br i1 %[[CHECK]]
-  // CHECK: call void @__ubsan_handle_mul_overflow_abort
-}
-
-// CHECK: define void @_Z23UnsignedIntegerOverflowjU7_ExtIntILi23EEjU7_ExtIntILi35EEj
-void UnsignedIntegerOverflow(unsigned u,
-                             unsigned _ExtInt(23) SmallE,
-                             unsigned _ExtInt(35) BigE) {
-  u = SmallE + SmallE;
-  // CHECK: %[[LOADE1:.+]] = load i23, i23*
-  // CHECK: %[[LOADE2:.+]] = load i23, i23*
-  // CHECK: %[[OFCALL:.+]] = call { i23, i1 } @llvm.uadd.with.overflow.i23(i23 %[[LOADE1]], i23 %[[LOADE2]])
-  // CHECK: %[[EXRESULT:.+]] = extractvalue { i23, i1 } %[[OFCALL]], 0
-  // CHECK: %[[OFRESULT:.+]] = extractvalue { i23, i1 } %[[OFCALL]], 1
-  // CHECK: %[[CHECK:.+]] = xor i1 %[[OFRESULT]], true
-  // CHECK: br i1 %[[CHECK]]
-  // CHECK: call void @__ubsan_handle_add_overflow_abort
-
-  SmallE = u + u;
-  // CHECK: %[[LOADU1:.+]] = load i32, i32*
-  // CHECK: %[[LOADU2:.+]] = load i32, i32*
-  // CHECK: %[[OFCALL:.+]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %[[LOADU1]], i32 %[[LOADU2]])
-  // CHECK: %[[EXRESULT:.+]] = extractvalue { i32, i1 } %[[OFCALL]], 0
-  // CHECK: %[[OFRESULT:.+]] = extractvalue { i32, i1 } %[[OFCALL]], 1
-  // CHECK: %[[CHECK:.+]] = xor i1 %[[OFRESULT]], true
-  // CHECK: br i1 %[[CHECK]]
-  // CHECK: call void @__ubsan_handle_add_overflow_abort
-
-  SmallE = SmallE + SmallE;
-  // CHECK: %[[LOADE1:.+]] = load i23, i23*
-  // CHECK: %[[LOADE2:.+]] = load i23, i23*
-  // CHECK: %[[OFCALL:.+]] = call { i23, i1 } @llvm.uadd.with.overflow.i23(i23 %[[LOADE1]], i23 %[[LOADE2]])
-  // CHECK: %[[EXRESULT:.+]] = extractvalue { i23, i1 } %[[OFCALL]], 0
-  // CHECK: %[[OFRESULT:.+]] = extractvalue { i23, i1 } %[[OFCALL]], 1
-  // CHECK: %[[CHECK:.+]] = xor i1 %[[OFRESULT]], true
-  // CHECK: br i1 %[[CHECK]]
-  // CHECK: call void @__ubsan_handle_add_overflow_abort
-
-  SmallE = BigE + BigE;
-  // CHECK: %[[LOADE1:.+]] = load i35, i35*
-  // CHECK: %[[LOADE2:.+]] = load i35, i35*
-  // CHECK: %[[OFCALL:.+]] = call { i35, i1 } @llvm.uadd.with.overflow.i35(i35 %[[LOADE1]], i35 %[[LOADE2]])
-  // CHECK: %[[EXRESULT:.+]] = extractvalue { i35, i1 } %[[OFCALL]], 0
-  // CHECK: %[[OFRESULT:.+]] = extractvalue { i35, i1 } %[[OFCALL]], 1
-  // CHECK: %[[CHECK:.+]] = xor i1 %[[OFRESULT]], true
-  // CHECK: br i1 %[[CHECK]]
-  // CHECK: call void @__ubsan_handle_add_overflow_abort
-
-  BigE = BigE + BigE;
-  // CHECK: %[[LOADE1:.+]] = load i35, i35*
-  // CHECK: %[[LOADE2:.+]] = load i35, i35*
-  // CHECK: %[[OFCALL:.+]] = call { i35, i1 } @llvm.uadd.with.overflow.i35(i35 %[[LOADE1]], i35 %[[LOADE2]])
-  // CHECK: %[[EXRESULT:.+]] = extractvalue { i35, i1 } %[[OFCALL]], 0
-  // CHECK: %[[OFRESULT:.+]] = extractvalue { i35, i1 } %[[OFCALL]], 1
-  // CHECK: %[[CHECK:.+]] = xor i1 %[[OFRESULT]], true
-  // CHECK: br i1 %[[CHECK]]
-  // CHECK: call void @__ubsan_handle_add_overflow_abort
-}
diff --git a/clang/test/CodeGen/ext-int.c b/clang/test/CodeGen/ext-int.c
deleted file mode 100644
index ef48dd331652..000000000000
--- a/clang/test/CodeGen/ext-int.c
+++ /dev/null
@@ -1,44 +0,0 @@
-// RUN: %clang_cc1 -triple x86_64-gnu-linux -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK
-// RUN: %clang_cc1 -triple x86_64-windows-pc -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK
-
-
-void GenericTest(_ExtInt(3) a, unsigned _ExtInt(3) b, _ExtInt(4) c) {
-  // CHECK: define {{.*}}void @GenericTest
-  int which = _Generic(a, _ExtInt(3): 1, unsigned _ExtInt(3) : 2, _ExtInt(4) : 3);
-  // CHECK: store i32 1
-  int which2 = _Generic(b, _ExtInt(3): 1, unsigned _ExtInt(3) : 2, _ExtInt(4) : 3);
-  // CHECK: store i32 2
-  int which3 = _Generic(c, _ExtInt(3): 1, unsigned _ExtInt(3) : 2, _ExtInt(4) : 3);
-  // CHECK: store i32 3
-}
-
-void VLATest(_ExtInt(3) A, _ExtInt(99) B, _ExtInt(123456) C) {
-  // CHECK: define {{.*}}void @VLATest
-  int AR1[A];
-  // CHECK: %[[A:.+]] = zext i3 %{{.+}} to i64
-  // CHECK: %[[VLA1:.+]] = alloca i32, i64 %[[A]]
-  int AR2[B];
-  // CHECK: %[[B:.+]] = trunc i99 %{{.+}} to i64
-  // CHECK: %[[VLA2:.+]] = alloca i32, i64 %[[B]]
-  int AR3[C];
-  // CHECK: %[[C:.+]] = trunc i123456 %{{.+}} to i64
-  // CHECK: %[[VLA3:.+]] = alloca i32, i64 %[[C]]
-}
-
-struct S {
-  _ExtInt(17) A;
-  _ExtInt(16777200) B;
-  _ExtInt(17) C;
-};
-
-void OffsetOfTest() {
-  // CHECK: define {{.*}}void @OffsetOfTest 
-  int A = __builtin_offsetof(struct S,A);
-  // CHECK: store i32 0, i32* %{{.+}}
-  int B = __builtin_offsetof(struct S,B);
-  // CHECK: store i32 8, i32* %{{.+}}
-  int C = __builtin_offsetof(struct S,C);
-  // CHECK: store i32 2097160, i32* %{{.+}}
-}
-
-
diff --git a/clang/test/CodeGenCXX/ext-int.cpp b/clang/test/CodeGenCXX/ext-int.cpp
deleted file mode 100644
index 4e0c58fe1e40..000000000000
--- a/clang/test/CodeGenCXX/ext-int.cpp
+++ /dev/null
@@ -1,432 +0,0 @@
-// RUN: %clang_cc1 -triple x86_64-gnu-linux -O3 -disable-llvm-passes -I%S -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,LIN,NoNewStructPathTBAA
-// RUN: %clang_cc1 -triple x86_64-gnu-linux -O3 -disable-llvm-passes -I%S -new-struct-path-tbaa -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,LIN,NewStructPathTBAA
-
-// RUN: %clang_cc1 -triple x86_64-windows-pc -O3 -disable-llvm-passes -I%S -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,WIN,NoNewStructPathTBAA
-// RUN: %clang_cc1 -triple x86_64-windows-pc -O3 -disable-llvm-passes -I%S -new-struct-path-tbaa -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,WIN,NewStructPathTBAA
-
-#include <typeinfo>
-
-// Ensure that the layout for these structs is the same as the normal bitfield
-// layouts.
-struct BitFieldsByte {
-  _ExtInt(7) A : 3;
-  _ExtInt(7) B : 3;
-  _ExtInt(7) C : 2;
-};
-// CHECK: %struct.BitFieldsByte = type { i8 }
-
-struct BitFieldsShort {
-  _ExtInt(15) A : 3;
-  _ExtInt(15) B : 3;
-  _ExtInt(15) C : 2;
-};
-// LIN: %struct.BitFieldsShort = type { i8, i8 }
-// WIN: %struct.BitFieldsShort = type { i16 }
-
-struct BitFieldsInt {
-  _ExtInt(31) A : 3;
-  _ExtInt(31) B : 3;
-  _ExtInt(31) C : 2;
-};
-// LIN: %struct.BitFieldsInt = type { i8, [3 x i8] }
-// WIN: %struct.BitFieldsInt = type { i32 }
-
-struct BitFieldsLong {
-  _ExtInt(63) A : 3;
-  _ExtInt(63) B : 3;
-  _ExtInt(63) C : 2;
-};
-// LIN: %struct.BitFieldsLong = type { i8, [7 x i8] }
-// WIN: %struct.BitFieldsLong = type { i64 }
-
-struct HasExtIntFirst {
-  _ExtInt(35) A;
-  int B;
-};
-// CHECK: %struct.HasExtIntFirst = type { i35, i32 }
-
-struct HasExtIntLast {
-  int A;
-  _ExtInt(35) B;
-};
-// CHECK: %struct.HasExtIntLast = type { i32, i35 }
-
-struct HasExtIntMiddle {
-  int A;
-  _ExtInt(35) B;
-  int C;
-};
-// CHECK: %struct.HasExtIntMiddle = type { i32, i35, i32 }
-
-// Force emitting of the above structs.
-void StructEmit() {
-  BitFieldsByte A;
-  BitFieldsShort B;
-  BitFieldsInt C;
-  BitFieldsLong D;
-
-  HasExtIntFirst E;
-  HasExtIntLast F;
-  HasExtIntMiddle G;
-}
-
-void BitfieldAssignment() {
-  // LIN: define void @_Z18BitfieldAssignmentv
-  // WIN: define dso_local void  @"?BitfieldAssignment@@YAXXZ"
-  BitFieldsByte B;
-  B.A = 3;
-  B.B = 2;
-  B.C = 1;
-  // First one is used for the lifetime start, skip that.
-  // CHECK: bitcast %struct.BitFieldsByte*
-  // CHECK: %[[BFType:.+]] = bitcast %struct.BitFieldsByte*
-  // CHECK: %[[LOADA:.+]] = load i8, i8* %[[BFType]]
-  // CHECK: %[[CLEARA:.+]] = and i8 %[[LOADA]], -8
-  // CHECK: %[[SETA:.+]] = or i8 %[[CLEARA]], 3
-  // CHECK: %[[BFType:.+]] = bitcast %struct.BitFieldsByte*
-  // CHECK: %[[LOADB:.+]] = load i8, i8* %[[BFType]]
-  // CHECK: %[[CLEARB:.+]] = and i8 %[[LOADB]], -57
-  // CHECK: %[[SETB:.+]] = or i8 %[[CLEARB]], 16
-  // CHECK: %[[BFType:.+]] = bitcast %struct.BitFieldsByte*
-  // CHECK: %[[LOADC:.+]] = load i8, i8* %[[BFType]]
-  // CHECK: %[[CLEARC:.+]] = and i8 %[[LOADC]], 63
-  // CHECK: %[[SETC:.+]] = or i8 %[[CLEARC]], 64
-}
-
-enum AsEnumUnderlyingType : _ExtInt(9) {
-  A,B,C
-};
-
-void UnderlyingTypeUsage(AsEnumUnderlyingType Param) {
-  // LIN: define void @_Z19UnderlyingTypeUsage20AsEnumUnderlyingType(i9 %
-  // WIN: define dso_local void @"?UnderlyingTypeUsage@@YAXW4AsEnumUnderlyingType@@@Z"(i9 %
-  AsEnumUnderlyingType Var;
-  // CHECK: alloca i9, align 2
-  // CHECK: store i9 %{{.*}}, align 2
-}
-
-unsigned _ExtInt(33) ManglingTestRetParam(unsigned _ExtInt(33) Param) {
-// LIN: define i33 @_Z20ManglingTestRetParamU7_ExtIntILi33EEj(i33 %
-// WIN: define dso_local i33 @"?ManglingTestRetParam@@YAU?$_UExtInt@$0CB@@__clang@@U12@@Z"(i33
-  return 0;
-}
-
-_ExtInt(33) ManglingTestRetParam(_ExtInt(33) Param) {
-// LIN: define i33 @_Z20ManglingTestRetParamU7_ExtIntILi33EEi(i33 %
-// WIN: define dso_local i33 @"?ManglingTestRetParam@@YAU?$_ExtInt@$0CB@@__clang@@U12@@Z"(i33
-  return 0;
-}
-
-template<typename T>
-void ManglingTestTemplateParam(T&);
-template<_ExtInt(99) T>
-void ManglingTestNTTP();
-
-void ManglingInstantiator() {
-  // LIN: define void @_Z20ManglingInstantiatorv()
-  // WIN: define dso_local void @"?ManglingInstantiator@@YAXXZ"()
-  _ExtInt(93) A;
-  ManglingTestTemplateParam(A);
-// LIN: call void @_Z25ManglingTestTemplateParamIU7_ExtIntILi93EEiEvRT_(i93*
-// WIN: call void @"??$ManglingTestTemplateParam@U?$_ExtInt@$0FN@@__clang@@@@YAXAEAU?$_ExtInt@$0FN@@__clang@@@Z"(i93*
-  constexpr _ExtInt(93) B = 993;
-  ManglingTestNTTP<38>();
-// LIN: call void @_Z16ManglingTestNTTPILU7_ExtIntILi99EEi38EEvv()
-// WIN: call void @"??$ManglingTestNTTP@$0CG@@@YAXXZ"()
-  ManglingTestNTTP<B>();
-// LIN: call void @_Z16ManglingTestNTTPILU7_ExtIntILi99EEi993EEvv()
-// WIN: call void @"??$ManglingTestNTTP@$0DOB@@@YAXXZ"()
-}
-
-void TakesVarargs(int i, ...) {
-  // LIN: define void @_Z12TakesVarargsiz(i32 %i, ...)
-  // WIN: define dso_local void @"?TakesVarargs@@YAXHZZ"(i32 %i, ...)
-
-  __builtin_va_list args;
-  // LIN: %[[ARGS:.+]] = alloca [1 x %struct.__va_list_tag]
-  // WIN: %[[ARGS:.+]] = alloca i8*
-  __builtin_va_start(args, i);
-  // LIN: %[[STARTAD:.+]] = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %[[ARGS]]
-  // LIN: %[[STARTAD1:.+]] = bitcast %struct.__va_list_tag* %[[STARTAD]] to i8*
-  // LIN: call void @llvm.va_start(i8* %[[STARTAD1]])
-  // WIN: %[[ARGSLLIFETIMESTART:.+]] = bitcast i8** %[[ARGS]] to i8*
-  // WIN: %[[ARGSSTART:.+]] = bitcast i8** %[[ARGS]] to i8*
-  // WIN: call void @llvm.va_start(i8* %[[ARGSSTART]])
-
-  _ExtInt(92) A = __builtin_va_arg(args, _ExtInt(92));
-  // LIN: %[[AD1:.+]] = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %[[ARGS]]
-  // LIN: %[[OFA_P1:.+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %[[AD1]], i32 0, i32 2
-  // LIN: %[[OFA1:.+]] = load i8*, i8** %[[OFA_P1]]
-  // LIN: %[[BC1:.+]] = bitcast i8* %[[OFA1]] to i92*
-  // LIN: %[[OFANEXT1:.+]] = getelementptr i8, i8* %[[OFA1]], i32 16
-  // LIN: store i8* %[[OFANEXT1]], i8** %[[OFA_P1]]
-  // LIN: %[[LOAD1:.+]] = load i92, i92* %[[BC1]]
-  // LIN: store i92 %[[LOAD1]], i92*
-  // WIN: %[[CUR1:.+]] = load i8*, i8** %[[ARGS]]
-  // WIN: %[[NEXT1:.+]] = getelementptr inbounds i8, i8* %[[CUR1]], i64 16
-  // WIN: store i8* %[[NEXT1]], i8** %[[ARGS]]
-  // WIN: %[[BC1:.+]] = bitcast i8* %[[CUR1]] to i92*
-  // WIN: %[[LOADV1:.+]] = load i92, i92* %[[BC1]]
-  // WIN: store i92 %[[LOADV1]], i92*
-
-  _ExtInt(31) B = __builtin_va_arg(args, _ExtInt(31));
-  // LIN: %[[AD2:.+]] = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %[[ARGS]]
-  // LIN: %[[OFA_P2:.+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %[[AD2]], i32 0, i32 2
-  // LIN: %[[OFA2:.+]] = load i8*, i8** %[[OFA_P2]]
-  // LIN: %[[BC2:.+]] = bitcast i8* %[[OFA2]] to i31*
-  // LIN: %[[OFANEXT2:.+]] = getelementptr i8, i8* %[[OFA2]], i32 8
-  // LIN: store i8* %[[OFANEXT2]], i8** %[[OFA_P2]]
-  // LIN: %[[LOAD2:.+]] = load i31, i31* %[[BC2]]
-  // LIN: store i31 %[[LOAD2]], i31*
-  // WIN: %[[CUR2:.+]] = load i8*, i8** %[[ARGS]]
-  // WIN: %[[NEXT2:.+]] = getelementptr inbounds i8, i8* %[[CUR2]], i64 8 
-  // WIN: store i8* %[[NEXT2]], i8** %[[ARGS]]
-  // WIN: %[[BC2:.+]] = bitcast i8* %[[CUR2]] to i31*
-  // WIN: %[[LOADV2:.+]] = load i31, i31* %[[BC2]]
-  // WIN: store i31 %[[LOADV2]], i31*
-
-  _ExtInt(16) C = __builtin_va_arg(args, _ExtInt(16));
-  // LIN: %[[AD3:.+]] = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %[[ARGS]]
-  // LIN: %[[OFA_P3:.+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %[[AD3]], i32 0, i32 2
-  // LIN: %[[OFA3:.+]] = load i8*, i8** %[[OFA_P3]]
-  // LIN: %[[BC3:.+]] = bitcast i8* %[[OFA3]] to i16*
-  // LIN: %[[OFANEXT3:.+]] = getelementptr i8, i8* %[[OFA3]], i32 8
-  // LIN: store i8* %[[OFANEXT3]], i8** %[[OFA_P3]]
-  // LIN: %[[LOAD3:.+]] = load i16, i16* %[[BC3]]
-  // LIN: store i16 %[[LOAD3]], i16*
-  // WIN: %[[CUR3:.+]] = load i8*, i8** %[[ARGS]]
-  // WIN: %[[NEXT3:.+]] = getelementptr inbounds i8, i8* %[[CUR3]], i64 8
-  // WIN: store i8* %[[NEXT3]], i8** %[[ARGS]]
-  // WIN: %[[BC3:.+]] = bitcast i8* %[[CUR3]] to i16*
-  // WIN: %[[LOADV3:.+]] = load i16, i16* %[[BC3]]
-  // WIN: store i16 %[[LOADV3]], i16*
-
-  _ExtInt(129) D = __builtin_va_arg(args, _ExtInt(129));
-  // LIN: %[[AD4:.+]] = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %[[ARGS]]
-  // LIN: %[[OFA_P4:.+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %[[AD4]], i32 0, i32 2
-  // LIN: %[[OFA4:.+]] = load i8*, i8** %[[OFA_P4]]
-  // LIN: %[[BC4:.+]] = bitcast i8* %[[OFA4]] to i129*
-  // LIN: %[[OFANEXT4:.+]] = getelementptr i8, i8* %[[OFA4]], i32 24
-  // LIN: store i8* %[[OFANEXT4]], i8** %[[OFA_P4]]
-  // LIN: %[[LOAD4:.+]] = load i129, i129* %[[BC4]]
-  // LIN: store i129 %[[LOAD4]], i129*
-  // WIN: %[[CUR4:.+]] = load i8*, i8** %[[ARGS]]
-  // WIN: %[[NEXT4:.+]] = getelementptr inbounds i8, i8* %[[CUR4]], i64 24 
-  // WIN: store i8* %[[NEXT4]], i8** %[[ARGS]]
-  // WIN: %[[BC4:.+]] = bitcast i8* %[[CUR4]] to i129*
-  // WIN: %[[LOADV4:.+]] = load i129, i129* %[[BC4]]
-  // WIN: store i129 %[[LOADV4]], i129*
-
-  _ExtInt(16777200) E = __builtin_va_arg(args, _ExtInt(16777200));
-  // LIN: %[[AD5:.+]] = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %[[ARGS]]
-  // LIN: %[[OFA_P5:.+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %[[AD5]], i32 0, i32 2
-  // LIN: %[[OFA5:.+]] = load i8*, i8** %[[OFA_P5]]
-  // LIN: %[[BC5:.+]] = bitcast i8* %[[OFA5]] to i16777200*
-  // LIN: %[[OFANEXT5:.+]] = getelementptr i8, i8* %[[OFA5]], i32 2097152
-  // LIN: store i8* %[[OFANEXT5]], i8** %[[OFA_P5]]
-  // LIN: %[[LOAD5:.+]] = load i16777200, i16777200* %[[BC5]]
-  // LIN: store i16777200 %[[LOAD5]], i16777200*
-  // WIN: %[[CUR5:.+]] = load i8*, i8** %[[ARGS]]
-  // WIN: %[[NEXT5:.+]] = getelementptr inbounds i8, i8* %[[CUR5]], i64 2097152
-  // WIN: store i8* %[[NEXT5]], i8** %[[ARGS]]
-  // WIN: %[[BC5:.+]] = bitcast i8* %[[CUR5]] to i16777200*
-  // WIN: %[[LOADV5:.+]] = load i16777200, i16777200* %[[BC5]]
-  // WIN: store i16777200 %[[LOADV5]], i16777200*
-
-  __builtin_va_end(args);
-  // LIN: %[[ENDAD:.+]] = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %[[ARGS]]
-  // LIN: %[[ENDAD1:.+]] = bitcast %struct.__va_list_tag* %[[ENDAD]] to i8*
-  // LIN: call void @llvm.va_end(i8* %[[ENDAD1]])
-  // WIN: %[[ARGSEND:.+]] = bitcast i8** %[[ARGS]] to i8*
-  // WIN: call void @llvm.va_end(i8* %[[ARGSEND]])
-}
-void typeid_tests() {
-  // LIN: define void @_Z12typeid_testsv()
-  // WIN: define dso_local void @"?typeid_tests@@YAXXZ"()
-  unsigned _ExtInt(33) U33_1, U33_2;
-  _ExtInt(33) S33_1, S33_2;
-  _ExtInt(32) S32_1, S32_2;
-
- auto A = typeid(U33_1);
- // LIN: call void @_ZNSt9type_infoC1ERKS_(%"class.std::type_info"* %{{.+}}, %"class.std::type_info"* dereferenceable(16) bitcast ({ i8*, i8* }* @_ZTIU7_ExtIntILi33EEj to %"class.std::type_info"*))
- // WIN: call %"class.std::type_info"* @"??0type_info@std@@QEAA@AEBV01@@Z"(%"class.std::type_info"* %{{.+}}, %"class.std::type_info"* dereferenceable(16) bitcast (%rtti.TypeDescriptor28* @"??_R0U?$_UExtInt@$0CB@@__clang@@@8" to %"class.std::type_info"*))
- auto B = typeid(U33_2);
- // LIN: call void @_ZNSt9type_infoC1ERKS_(%"class.std::type_info"* %{{.+}}, %"class.std::type_info"* dereferenceable(16) bitcast ({ i8*, i8* }* @_ZTIU7_ExtIntILi33EEj to %"class.std::type_info"*))
- // WIN:  call %"class.std::type_info"* @"??0type_info@std@@QEAA@AEBV01@@Z"(%"class.std::type_info"* %{{.+}}, %"class.std::type_info"* dereferenceable(16) bitcast (%rtti.TypeDescriptor28* @"??_R0U?$_UExtInt@$0CB@@__clang@@@8" to %"class.std::type_info"*))
- auto C = typeid(S33_1);
- // LIN: call void @_ZNSt9type_infoC1ERKS_(%"class.std::type_info"* %{{.+}}, %"class.std::type_info"* dereferenceable(16) bitcast ({ i8*, i8* }* @_ZTIU7_ExtIntILi33EEi to %"class.std::type_info"*))
- // WIN:  call %"class.std::type_info"* @"??0type_info@std@@QEAA@AEBV01@@Z"(%"class.std::type_info"* %{{.+}}, %"class.std::type_info"* dereferenceable(16) bitcast (%rtti.TypeDescriptor27* @"??_R0U?$_ExtInt@$0CB@@__clang@@@8" to %"class.std::type_info"*))
- auto D = typeid(S33_2);
- // LIN: call void @_ZNSt9type_infoC1ERKS_(%"class.std::type_info"* %{{.+}}, %"class.std::type_info"* dereferenceable(16) bitcast ({ i8*, i8* }* @_ZTIU7_ExtIntILi33EEi to %"class.std::type_info"*))
- // WIN:  call %"class.std::type_info"* @"??0type_info@std@@QEAA@AEBV01@@Z"(%"class.std::type_info"* %{{.+}}, %"class.std::type_info"* dereferenceable(16) bitcast (%rtti.TypeDescriptor27* @"??_R0U?$_ExtInt@$0CB@@__clang@@@8" to %"class.std::type_info"*))
- auto E = typeid(S32_1);
- // LIN: call void @_ZNSt9type_infoC1ERKS_(%"class.std::type_info"* %{{.+}}, %"class.std::type_info"* dereferenceable(16) bitcast ({ i8*, i8* }* @_ZTIU7_ExtIntILi32EEi to %"class.std::type_info"*))
- // WIN:  call %"class.std::type_info"* @"??0type_info@std@@QEAA@AEBV01@@Z"(%"class.std::type_info"* %{{.+}}, %"class.std::type_info"* dereferenceable(16) bitcast (%rtti.TypeDescriptor27* @"??_R0U?$_ExtInt@$0CA@@__clang@@@8" to %"class.std::type_info"*))
- auto F = typeid(S32_2);
- // LIN: call void @_ZNSt9type_infoC1ERKS_(%"class.std::type_info"* %{{.+}}, %"class.std::type_info"* dereferenceable(16) bitcast ({ i8*, i8* }* @_ZTIU7_ExtIntILi32EEi to %"class.std::type_info"*))
- // WIN:  call %"class.std::type_info"* @"??0type_info@std@@QEAA@AEBV01@@Z"(%"class.std::type_info"* %{{.+}}, %"class.std::type_info"* dereferenceable(16) bitcast (%rtti.TypeDescriptor27* @"??_R0U?$_ExtInt@$0CA@@__clang@@@8" to %"class.std::type_info"*))
-}
-
-void ExplicitCasts() {
-  // LIN: define void @_Z13ExplicitCastsv() 
-  // WIN: define dso_local void @"?ExplicitCasts@@YAXXZ"()
-
-  _ExtInt(33) a;
-  _ExtInt(31) b;
-  int i;
-
-  a = i;
-  // CHECK: %[[CONV:.+]] = sext i32 %{{.+}} to i33
-  b = i;
-  // CHECK: %[[CONV:.+]] = trunc i32 %{{.+}} to i31
-  i = a;
-  // CHECK: %[[CONV:.+]] = trunc i33 %{{.+}} to i32
-  i = b;
-  // CHECK: %[[CONV:.+]] = sext i31 %{{.+}} to i32
-}
-
-struct S {
-  _ExtInt(17) A;
-  _ExtInt(16777200) B;
-  _ExtInt(17) C;
-};
-
-void OffsetOfTest() {
-  // LIN: define void @_Z12OffsetOfTestv() 
-  // WIN: define dso_local void @"?OffsetOfTest@@YAXXZ"()
-
-  auto A = __builtin_offsetof(S,A);
-  // CHECK: store i64 0, i64* %{{.+}}
-  auto B = __builtin_offsetof(S,B);
-  // CHECK: store i64 8, i64* %{{.+}}
-  auto C = __builtin_offsetof(S,C);
-  // CHECK: store i64 2097160, i64* %{{.+}}
-}
-
-
-void ShiftExtIntByConstant(_ExtInt(28) Ext) {
-// LIN: define void @_Z21ShiftExtIntByConstantU7_ExtIntILi28EEi
-// WIN: define dso_local void @"?ShiftExtIntByConstant@@YAXU?$_ExtInt@$0BM@@__clang@@@Z"
-  Ext << 7;
-  // CHECK: shl i28 %{{.+}}, 7
-  Ext >> 7;
-  // CHECK: ashr i28 %{{.+}}, 7
-  Ext << -7;
-  // CHECK: shl i28 %{{.+}}, -7
-  Ext >> -7;
-  // CHECK: ashr i28 %{{.+}}, -7
-
-  // UB in C/C++, Defined in OpenCL.
-  Ext << 29;
-  // CHECK: shl i28 %{{.+}}, 29 
-  Ext >> 29;
-  // CHECK: ashr i28 %{{.+}}, 29
-}
-
-void ConstantShiftByExtInt(_ExtInt(28) Ext, _ExtInt(65) LargeExt) {
-  // LIN: define void @_Z21ConstantShiftByExtIntU7_ExtIntILi28EEiU7_ExtIntILi65EEi
-  // WIN: define dso_local void @"?ConstantShiftByExtInt@@YAXU?$_ExtInt@$0BM@@__clang@@U?$_ExtInt@$0EB@@2@@Z"
-  10 << Ext;
-  // CHECK: %[[PROMO:.+]] = zext i28 %{{.+}} to i32
-  // CHECK: shl i32 10, %[[PROMO]]
-  10 >> Ext;
-  // CHECK: %[[PROMO:.+]] = zext i28 %{{.+}} to i32
-  // CHECK: ashr i32 10, %[[PROMO]]
-  10 << LargeExt;
-  // CHECK: %[[PROMO:.+]] = trunc i65 %{{.+}} to i32
-  // CHECK: shl i32 10, %[[PROMO]]
-  10 >> LargeExt;
-  // CHECK: %[[PROMO:.+]] = trunc i65 %{{.+}} to i32
-  // CHECK: ashr i32 10, %[[PROMO]]
-}
-
-void Shift(_ExtInt(28) Ext, _ExtInt(65) LargeExt, int i) {
-  // LIN: define void @_Z5ShiftU7_ExtIntILi28EEiU7_ExtIntILi65EEii
-  // WIN: define dso_local void @"?Shift@@YAXU?$_ExtInt@$0BM@@__clang@@U?$_ExtInt@$0EB@@2@H@Z"
-  i << Ext;
-  // CHECK: %[[PROMO:.+]] = zext i28 %{{.+}} to i32
-  // CHECK: shl i32 {{.+}}, %[[PROMO]]
-  i >> Ext;
-  // CHECK: %[[PROMO:.+]] = zext i28 %{{.+}} to i32
-  // CHECK: ashr i32 {{.+}}, %[[PROMO]]
-
-  i << LargeExt;
-  // CHECK: %[[PROMO:.+]] = trunc i65 %{{.+}} to i32
-  // CHECK: shl i32 {{.+}}, %[[PROMO]]
-  i >> LargeExt;
-  // CHECK: %[[PROMO:.+]] = trunc i65 %{{.+}} to i32
-  // CHECK: ashr i32 {{.+}}, %[[PROMO]]
-
-  Ext << i;
-  // CHECK: %[[PROMO:.+]] = trunc i32 %{{.+}} to i28
-  // CHECK: shl i28 {{.+}}, %[[PROMO]]
-  Ext >> i;
-  // CHECK: %[[PROMO:.+]] = trunc i32 %{{.+}} to i28
-  // CHECK: ashr i28 {{.+}}, %[[PROMO]]
-
-  LargeExt << i;
-  // CHECK: %[[PROMO:.+]] = zext i32 %{{.+}} to i65
-  // CHECK: shl i65 {{.+}}, %[[PROMO]]
-  LargeExt >> i;
-  // CHECK: %[[PROMO:.+]] = zext i32 %{{.+}} to i65
-  // CHECK: ashr i65 {{.+}}, %[[PROMO]]
-
-  Ext << LargeExt;
-  // CHECK: %[[PROMO:.+]] = trunc i65 %{{.+}} to i28
-  // CHECK: shl i28 {{.+}}, %[[PROMO]]
-  Ext >> LargeExt;
-  // CHECK: %[[PROMO:.+]] = trunc i65 %{{.+}} to i28
-  // CHECK: ashr i28 {{.+}}, %[[PROMO]]
-
-  LargeExt << Ext;
-  // CHECK: %[[PROMO:.+]] = zext i28 %{{.+}} to i65
-  // CHECK: shl i65 {{.+}}, %[[PROMO]]
-  LargeExt >> Ext;
-  // CHECK: %[[PROMO:.+]] = zext i28 %{{.+}} to i65
-  // CHECK: ashr i65 {{.+}}, %[[PROMO]]
-}
-
-void ComplexTest(_Complex _ExtInt(12) first,
-                                 _Complex _ExtInt(33) second) {
-  // LIN: define void @_Z11ComplexTestCU7_ExtIntILi12EEiCU7_ExtIntILi33EEi
-  // WIN: define dso_local void  @"?ComplexTest@@YAXU?$_Complex@U?$_ExtInt@$0M@@__clang@@@__clang@@U?$_Complex@U?$_ExtInt@$0CB@@__clang@@@2@@Z"
-  first + second;
-  // CHECK: %[[FIRST_REALP:.+]] = getelementptr inbounds { i12, i12 }, { i12, i12 }* %{{.+}}, i32 0, i32 0
-  // CHECK: %[[FIRST_REAL:.+]] = load i12, i12* %[[FIRST_REALP]]
-  // CHECK: %[[FIRST_IMAGP:.+]] = getelementptr inbounds { i12, i12 }, { i12, i12 }* %{{.+}}, i32 0, i32 1
-  // CHECK: %[[FIRST_IMAG:.+]] = load i12, i12* %[[FIRST_IMAGP]]
-  // CHECK: %[[FIRST_REAL_CONV:.+]] = sext i12 %[[FIRST_REAL]]
-  // CHECK: %[[FIRST_IMAG_CONV:.+]] = sext i12 %[[FIRST_IMAG]]
-  // CHECK: %[[SECOND_REALP:.+]] = getelementptr inbounds { i33, i33 }, { i33, i33 }* %{{.+}}, i32 0, i32 0
-  // CHECK: %[[SECOND_REAL:.+]] = load i33, i33* %[[SECOND_REALP]]
-  // CHECK: %[[SECOND_IMAGP:.+]] = getelementptr inbounds { i33, i33 }, { i33, i33 }* %{{.+}}, i32 0, i32 1
-  // CHECK: %[[SECOND_IMAG:.+]] = load i33, i33* %[[SECOND_IMAGP]]
-  // CHECK: %[[REAL:.+]] = add i33 %[[FIRST_REAL_CONV]], %[[SECOND_REAL]]
-  // CHECK: %[[IMAG:.+]] = add i33 %[[FIRST_IMAG_CONV]], %[[SECOND_IMAG]]
-}
-
-// Ensure that these types don't alias the normal int types.
-void TBAATest(_ExtInt(sizeof(int) * 8) ExtInt,
-              unsigned _ExtInt(sizeof(int) * 8) ExtUInt,
-              _ExtInt(6) Other) {
-  // CHECK-DAG: store i32 %{{.+}}, i32* %{{.+}}, align 4, !tbaa ![[EXTINT_TBAA:.+]]
-  // CHECK-DAG: store i32 %{{.+}}, i32* %{{.+}}, align 4, !tbaa ![[EXTINT_TBAA]]
-  // CHECK-DAG: store i6 %{{.+}}, i6* %{{.+}}, align 1, !tbaa ![[EXTINT6_TBAA:.+]]
-  ExtInt = 5;
-  ExtUInt = 5;
-  Other = 5;
-}
-
-// NoNewStructPathTBAA-DAG: ![[CHAR_TBAA_ROOT:.+]] = !{!"omnipotent char", ![[TBAA_ROOT:.+]], i64 0}
-// NoNewStructPathTBAA-DAG: ![[TBAA_ROOT]] = !{!"Simple C++ TBAA"}
-// NoNewStructPathTBAA-DAG: ![[EXTINT_TBAA]] = !{![[EXTINT_TBAA_ROOT:.+]], ![[EXTINT_TBAA_ROOT]], i64 0}
-// NoNewStructPathTBAA-DAG: ![[EXTINT_TBAA_ROOT]] = !{!"_ExtInt(32)", ![[CHAR_TBAA_ROOT]], i64 0}
-// NoNewStructPathTBAA-DAG: ![[EXTINT6_TBAA]] = !{![[EXTINT6_TBAA_ROOT:.+]], ![[EXTINT6_TBAA_ROOT]], i64 0}
-// NoNewStructPathTBAA-DAG: ![[EXTINT6_TBAA_ROOT]] = !{!"_ExtInt(6)", ![[CHAR_TBAA_ROOT]], i64 0}
-
-// NewStructPathTBAA-DAG: ![[CHAR_TBAA_ROOT:.+]] = !{![[TBAA_ROOT:.+]], i64 1, !"omnipotent char"}
-// NewStructPathTBAA-DAG: ![[TBAA_ROOT]] = !{!"Simple C++ TBAA"}
-// NewStructPathTBAA-DAG: ![[EXTINT_TBAA]] = !{![[EXTINT_TBAA_ROOT:.+]], ![[EXTINT_TBAA_ROOT]], i64 0, i64 4}
-// NewStructPathTBAA-DAG: ![[EXTINT_TBAA_ROOT]] = !{![[CHAR_TBAA_ROOT]], i64 4, !"_ExtInt(32)"}
-// NewStructPathTBAA-DAG: ![[EXTINT6_TBAA]] = !{![[EXTINT6_TBAA_ROOT:.+]], ![[EXTINT6_TBAA_ROOT]], i64 0, i64 1}
-// NewStructPathTBAA-DAG: ![[EXTINT6_TBAA_ROOT]] = !{![[CHAR_TBAA_ROOT]], i64 1, !"_ExtInt(6)"}
diff --git a/clang/test/CodeGenOpenCL/ext-int-shift.cl b/clang/test/CodeGenOpenCL/ext-int-shift.cl
deleted file mode 100644
index 4d2292daac77..000000000000
--- a/clang/test/CodeGenOpenCL/ext-int-shift.cl
+++ /dev/null
@@ -1,21 +0,0 @@
-// RUN: %clang -cc1 -triple x86_64-linux-pc -O3 -disable-llvm-passes %s -emit-llvm -o - | FileCheck %s
-
-void Shifts(_ExtInt(12) E, int i) {
-  E << 99;
-  // CHECK: shl i12 %{{.+}}, 3
-
-  77 << E;
-  // CHECK: %[[PROM:.+]] = zext i12 %{{.+}} to i32
-  // CHECK: %[[MASK:.+]] = and i32 %[[PROM]], 31
-  // CHECK: shl i32 77, %[[MASK]]
-
-  E << i;
-  // CHECK: %[[PROM:.+]] = trunc i32 %{{.+}} to i12
-  // CHECK: %[[MASK:.+]] = urem i12 %[[PROM]], 12
-  // CHECK: shl i12 %{{.+}}, %[[MASK]]
-
-  i << E;
-  // CHECK: %[[PROM:.+]] = zext i12 %{{.+}} to i32
-  // CHECK: %[[MASK:.+]] = and i32 %[[PROM]], 31
-  // CHECK: shl i32 %{{.+}}, %[[MASK]]
-}
diff --git a/clang/test/Parser/ext-int.cpp b/clang/test/Parser/ext-int.cpp
deleted file mode 100644
index 4926d5190587..000000000000
--- a/clang/test/Parser/ext-int.cpp
+++ /dev/null
@@ -1,15 +0,0 @@
-// RUN: %clang_cc1 -fsyntax-only -verify %s
-
-// expected-error@+5{{expected ')'}}
-// expected-note@+4{{to match this '('}}
-// expected-error@+3{{expected unqualified-id}}
-// expected-error@+2{{extraneous closing brace}}
-// expected-error@+1{{C++ requires a type specifier for all declarations}}
-_ExtInt(32} a;
-// expected-error@+2{{expected expression}}
-// expected-error@+1{{C++ requires a type specifier for all declarations}}
-_ExtInt(32* ) b;
-// expected-error@+3{{expected '('}}
-// expected-error@+2{{expected unqualified-id}}
-// expected-error@+1{{C++ requires a type specifier for all declarations}}
-_ExtInt{32} c;
diff --git a/clang/test/SemaCXX/ext-int.cpp b/clang/test/SemaCXX/ext-int.cpp
deleted file mode 100644
index 6a06280dceec..000000000000
--- a/clang/test/SemaCXX/ext-int.cpp
+++ /dev/null
@@ -1,278 +0,0 @@
-// RUN: %clang_cc1 -fsyntax-only -verify %s -Wimplicit-int-conversion
-
-template<int Bounds>
-struct HasExtInt {
-  _ExtInt(Bounds) b;
-  unsigned _ExtInt(Bounds) b2;
-};
-
-// Delcaring variables:
-_ExtInt(33) Declarations(_ExtInt(48) &Param) { // Useable in params and returns.
-  short _ExtInt(43) a; // expected-error {{'short _ExtInt' is invalid}}
-  _ExtInt(43) long b;  // expected-error {{'long _ExtInt' is invalid}}
-
-  // These should all be fine:
-  const _ExtInt(5) c = 3;
-  const unsigned _ExtInt(5) d; // expected-error {{default initialization of an object of const type 'const unsigned _ExtInt(5)'}}
-  unsigned _ExtInt(5) e = 5;
-  _ExtInt(5) unsigned f;
-
-  _ExtInt(-3) g; // expected-error{{signed _ExtInt must have a bit size of at least 2}}
-  _ExtInt(0) h; // expected-error{{signed _ExtInt must have a bit size of at least 2}}
-  _ExtInt(1) i; // expected-error{{signed _ExtInt must have a bit size of at least 2}}
-  _ExtInt(2) j;;
-  unsigned _ExtInt(0) k;// expected-error{{unsigned _ExtInt must have a bit size of at least 1}}
-  unsigned _ExtInt(1) l;
-  signed _ExtInt(1) m; // expected-error{{signed _ExtInt must have a bit size of at least 2}}
-
-  constexpr _ExtInt(6) n = 33; // expected-warning{{implicit conversion from 'int' to 'const _ExtInt(6)' changes value from 33 to -31}}
-  constexpr _ExtInt(7) o = 33;
-
-  // Check LLVM imposed max size.
-  _ExtInt(0xFFFFFFFFFF) p; // expected-error {{signed _ExtInt of bit sizes greater than 16777215 not supported}}
-  unsigned _ExtInt(0xFFFFFFFFFF) q; // expected-error {{unsigned _ExtInt of bit sizes greater than 16777215 not supported}}
-
-// Ensure template params are instantiated correctly.
-  // expected-error@5{{signed _ExtInt must have a bit size of at least 2}}
-  // expected-error@6{{unsigned _ExtInt must have a bit size of at least 1}}
-  // expected-note@+1{{in instantiation of template class }}
-  HasExtInt<-1> r;
-  // expected-error@5{{signed _ExtInt must have a bit size of at least 2}}
-  // expected-error@6{{unsigned _ExtInt must have a bit size of at least 1}}
-  // expected-note@+1{{in instantiation of template class }}
-  HasExtInt<0> s;
-  // expected-error@5{{signed _ExtInt must have a bit size of at least 2}}
-  // expected-note@+1{{in instantiation of template class }}
-  HasExtInt<1> t;
-  HasExtInt<2> u;
-
-  _ExtInt(-3.0) v; // expected-error {{integral constant expression must have integral or unscoped enumeration type, not 'double'}}
-  _ExtInt(3.0) x; // expected-error {{integral constant expression must have integral or unscoped enumeration type, not 'double'}}
-
-  return 0;
-}
-
-template <_ExtInt(5) I>
-struct ExtIntTemplParam {
-  static constexpr _ExtInt(5) Var = I;
-};
-
-template<typename T>
-void deduced_whole_type(T){}
-template<int I>
-void deduced_bound(_ExtInt(I)){}
-
-// Ensure ext-int can be used in template places.
-void Templates() {
-  ExtIntTemplParam<13> a;
-  constexpr _ExtInt(3) b = 1;
-  ExtIntTemplParam<b> c;
-  constexpr _ExtInt(9) d = 1;
-  ExtIntTemplParam<b> e;
-
-  deduced_whole_type(b);
-  deduced_bound(b);
-}
-
-template <typename T, typename U>
-struct is_same {
-  static constexpr bool value = false;
-};
-template <typename T>
-struct is_same<T,T> {
-  static constexpr bool value = true;
-};
-
-// Reject vector types:
-// expected-error@+1{{invalid vector element type '_ExtInt(32)'}}
-typedef _ExtInt(32) __attribute__((vector_size(16))) VecTy;
-
-// Allow _Complex:
-_Complex _ExtInt(3) Cmplx;
-
-// Reject cases of _Atomic:
-// expected-error@+1{{_Atomic cannot be applied to integer type '_ExtInt(4)' with less than 1 byte of precision}}
-_Atomic _ExtInt(4) TooSmallAtomic;
-// expected-error@+1{{_Atomic cannot be applied to integer type '_ExtInt(9)' with a non power of 2 precision}}
-_Atomic _ExtInt(9) NotPow2Atomic;
-_Atomic _ExtInt(128) JustRightAtomic;
-
-// Test result types of Unary/Bitwise/Binary Operations:
-void Ops() {
-  _ExtInt(43) x43_s = 1, y43_s = 1;
-  _ExtInt(sizeof(int) * 8) x32_s = 1, y32_s = 1;
-  unsigned _ExtInt(sizeof(unsigned) * 8) x32_u = 1, y32_u = 1;
-  _ExtInt(4) x4_s = 1, y4_s = 1;
-  unsigned _ExtInt(43) x43_u = 1, y43_u = 1;
-  unsigned _ExtInt(4) x4_u = 1, y4_u = 1;
-  int x_int = 1, y_int = 1;
-  unsigned x_uint = 1, y_uint = 1;
-  bool b;
-
-  // Disabling mixed conversions:
-  // Signed/unsigned mixed.
-  // expected-error@+1{{invalid operands to binary expression}}
-  x43_u + y43_s;
-  // expected-error@+1{{invalid operands to binary expression}}
-  x4_s - y4_u;
-  // expected-error@+1{{invalid operands to binary expression}}
-  x43_s * y43_u;
-  // expected-error@+1{{invalid operands to binary expression}}
-  x4_u / y4_s;
-
-  // Different Sizes.
-  // expected-error@+1{{invalid operands to binary expression}}
-  x43_s + y4_s;
-  // expected-error@+1{{invalid operands to binary expression}}
-  x43_s - y4_u;
-  // expected-error@+1{{invalid operands to binary expression}}
-  x43_u * y4_u;
-  // expected-error@+1{{invalid operands to binary expression}}
-  x4_u / y43_u;
-
-  // Mixed with standard types.
-  // expected-error@+1{{invalid operands to binary expression}}
-  x43_s + x_int;
-  // expected-error@+1{{invalid operands to binary expression}}
-  x43_u - x_int;
-  // expected-error@+1{{invalid operands to binary expression}}
-  x32_s * x_int;
-  // expected-error@+1{{invalid operands to binary expression}}
-  x32_u / x_int;
-  // expected-error@+1{{invalid operands to binary expression}}
-  x32_s * x_uint;
-  // expected-error@+1{{invalid operands to binary expression}}
-  x32_u / x_uint;
-  // expected-error@+1{{invalid operands to binary expression}}
-  x4_s + x_int;
-  // expected-error@+1{{invalid operands to binary expression}}
-  x4_u - x_int;
-  // expected-error@+1{{invalid operands to binary expression}}
-  x4_s + b;
-  // expected-error@+1{{invalid operands to binary expression}}
-  x4_u - b;
-  // expected-error@+1{{invalid operands to binary expression}}
-  x43_s + b;
-  // expected-error@+1{{invalid operands to binary expression}}
-  x43_u - b;
-
-  // Bitwise checks.
-  // expected-error@+1{{invalid operands to binary expression}}
-  x43_s % y4_u;
-  // expected-error@+1{{invalid operands to binary expression}}
-  x43_u % y4_s;
-  // expected-error@+1{{invalid operands to binary expression}}
-  x4_s | y43_u;
-  // expected-error@+1{{invalid operands to binary expression}}
-  x4_u | y43_s;
-
-  // compassign.
-  // expected-error@+1{{invalid operands to binary expression}}
-  x43_s += 33;
-
-  // Comparisons.
-  // expected-error@+1{{invalid operands to binary expression}}
-  x43_s > 33;
-  // expected-error@+1{{invalid operands to binary expression}}
-  x4_s > 33;
-
-  // Same size/sign ops don't change type.
-  static_assert(is_same<decltype(x43_s + y43_s), _ExtInt(43)>::value,"");
-  static_assert(is_same<decltype(x4_s - y4_s), _ExtInt(4)>::value,"");
-  static_assert(is_same<decltype(x43_u * y43_u), unsigned _ExtInt(43)>::value,"");
-  static_assert(is_same<decltype(x4_u / y4_u), unsigned _ExtInt(4)>::value,"");
-
-  // Unary ops shouldn't go through integer promotions.
-  static_assert(is_same<decltype(~x43_s), _ExtInt(43)>::value,"");
-  static_assert(is_same<decltype(~x4_s), _ExtInt(4)>::value,"");
-  static_assert(is_same<decltype(+x43_s), _ExtInt(43)>::value,"");
-  static_assert(is_same<decltype(+x4_s), _ExtInt(4)>::value,"");
-  static_assert(is_same<decltype(-x43_u), unsigned _ExtInt(43)>::value,"");
-  static_assert(is_same<decltype(-x4_u), unsigned _ExtInt(4)>::value,"");
-  // expected-warning@+1{{expression with side effects has no effect in an unevaluated context}}
-  static_assert(is_same<decltype(++x43_s), _ExtInt(43)&>::value,"");
-  // expected-warning@+1{{expression with side effects has no effect in an unevaluated context}}
-  static_assert(is_same<decltype(--x4_s), _ExtInt(4)&>::value,"");
-  // expected-warning@+1{{expression with side effects has no effect in an unevaluated context}}
-  static_assert(is_same<decltype(x43_s--), _ExtInt(43)>::value,"");
-  // expected-warning@+1{{expression with side effects has no effect in an unevaluated context}}
-  static_assert(is_same<decltype(x4_s++), _ExtInt(4)>::value,"");
-  static_assert(is_same<decltype(x4_s >> 1), _ExtInt(4)>::value,"");
-  static_assert(is_same<decltype(x4_u << 1), unsigned _ExtInt(4)>::value,"");
-
-  static_assert(sizeof(x43_s) == 8, "");
-  static_assert(sizeof(x4_s) == 1, "");
-
-  static_assert(sizeof(_ExtInt(3340)) == 424, ""); // 424 * 8 == 3392.
-  static_assert(sizeof(_ExtInt(1049)) == 136, ""); // 136  *  8 == 1088.
-
-  static_assert(alignof(decltype(x43_s)) == 8, "");
-  static_assert(alignof(decltype(x4_s)) == 1, "");
-
-  static_assert(alignof(_ExtInt(3340)) == 8, "");
-  static_assert(alignof(_ExtInt(1049)) == 8, "");
-}
-
-constexpr int func() { return 42;}
-
-void ConstexprBitsize() {
-  _ExtInt(func()) F;
-  static_assert(is_same<decltype(F), _ExtInt(42)>::value, "");
-}
-
-// Useable as an underlying type.
-enum AsEnumUnderlyingType : _ExtInt(33) {
-};
-
-void overloaded(int);
-void overloaded(_ExtInt(32));
-void overloaded(_ExtInt(33));
-void overloaded(short);
-//expected-note@+1{{candidate function}}
-void overloaded2(_ExtInt(32));
-//expected-note@+1{{candidate function}}
-void overloaded2(_ExtInt(33));
-//expected-note@+1{{candidate function}}
-void overloaded2(short);
-
-void overload_use() {
-  int i;
-  _ExtInt(32) i32;
-  _ExtInt(33) i33;
-  short s;
-
-  // All of these get their corresponding exact matches.
-  overloaded(i);
-  overloaded(i32);
-  overloaded(i33);
-  overloaded(s);
-
-  overloaded2(i); // expected-error{{call to 'overloaded2' is ambiguous}}
-
-  overloaded2(i32);
-
-  overloaded2(s);
-}
-
-// no errors expected, this should 'just work'.
-struct UsedAsBitField {
-  _ExtInt(3) F : 3;
-  _ExtInt(3) G : 3;
-  _ExtInt(3) H : 3;
-};
-
-// expected-error@+1{{mode attribute only supported for integer and floating-point types}}
-typedef _ExtInt(33) IllegalMode __attribute__((mode(DI)));
-
-void ImplicitCasts(_ExtInt(31) s31, _ExtInt(33) s33, int i) {
-  // expected-warning@+1{{implicit conversion loses integer precision}}
-  s31 = i;
-  // expected-warning@+1{{implicit conversion loses integer precision}}
-  s31 = s33;
-  s33 = i;
-  s33 = s31;
-  i = s31;
-  // expected-warning@+1{{implicit conversion loses integer precision}}
-  i = s33;
-}
-
diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp
index dafe4ccda05f..fc9d8db62b2d 100644
--- a/clang/tools/libclang/CIndex.cpp
+++ b/clang/tools/libclang/CIndex.cpp
@@ -1793,8 +1793,6 @@ DEFAULT_TYPELOC_IMPL(Enum, TagType)
 DEFAULT_TYPELOC_IMPL(SubstTemplateTypeParm, Type)
 DEFAULT_TYPELOC_IMPL(SubstTemplateTypeParmPack, Type)
 DEFAULT_TYPELOC_IMPL(Auto, Type)
-DEFAULT_TYPELOC_IMPL(ExtInt, Type)
-DEFAULT_TYPELOC_IMPL(DependentExtInt, Type)
 
 bool CursorVisitor::VisitCXXRecordDecl(CXXRecordDecl *D) {
   // Visit the nested-name-specifier, if present.

From 0feaba683edacc22d2f16d4645705676e499ec96 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Fri, 17 Apr 2020 10:12:15 -0700
Subject: [PATCH 194/216] [CallSite removal][MemCpyOptimizer] Replace CallSite
 with CallBase. NFC

There are also some adjustments to use MaybeAlign in here due
to CallBase::getParamAlignment() being deprecated. It would
be a little cleaner if getOrEnforceKnownAlignment was migrated
to Align/MaybeAlign.

Differential Revision: https://reviews.llvm.org/D78345
---
 .../llvm/Transforms/Scalar/MemCpyOptimizer.h  |  3 +-
 .../lib/Transforms/Scalar/MemCpyOptimizer.cpp | 34 +++++++++----------
 2 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
index 5386f58b2b82..41180c5c678d 100644
--- a/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
+++ b/llvm/include/llvm/Transforms/Scalar/MemCpyOptimizer.h
@@ -16,7 +16,6 @@
 
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/CallSite.h"
 #include "llvm/IR/PassManager.h"
 #include <cstdint>
 #include <functional>
@@ -66,7 +65,7 @@ class MemCpyOptPass : public PassInfoMixin<MemCpyOptPass> {
   bool processMemCpyMemCpyDependence(MemCpyInst *M, MemCpyInst *MDep);
   bool processMemSetMemCpyDependence(MemCpyInst *M, MemSetInst *MDep);
   bool performMemCpyToMemSetOptzn(MemCpyInst *M, MemSetInst *MDep);
-  bool processByValArgument(CallSite CS, unsigned ArgNo);
+  bool processByValArgument(CallBase &CB, unsigned ArgNo);
   Instruction *tryMergingIntoMemset(Instruction *I, Value *StartPtr,
                                     Value *ByteVal);
 
diff --git a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
index 0987cd6597a4..0a93d3e52904 100644
--- a/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
+++ b/llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp
@@ -1242,15 +1242,15 @@ bool MemCpyOptPass::processMemMove(MemMoveInst *M) {
 }
 
 /// This is called on every byval argument in call sites.
-bool MemCpyOptPass::processByValArgument(CallSite CS, unsigned ArgNo) {
-  const DataLayout &DL = CS.getCaller()->getParent()->getDataLayout();
+bool MemCpyOptPass::processByValArgument(CallBase &CB, unsigned ArgNo) {
+  const DataLayout &DL = CB.getCaller()->getParent()->getDataLayout();
   // Find out what feeds this byval argument.
-  Value *ByValArg = CS.getArgument(ArgNo);
+  Value *ByValArg = CB.getArgOperand(ArgNo);
   Type *ByValTy = cast<PointerType>(ByValArg->getType())->getElementType();
   uint64_t ByValSize = DL.getTypeAllocSize(ByValTy);
   MemDepResult DepInfo = MD->getPointerDependencyFrom(
       MemoryLocation(ByValArg, LocationSize::precise(ByValSize)), true,
-      CS.getInstruction()->getIterator(), CS.getInstruction()->getParent());
+      CB.getIterator(), CB.getParent());
   if (!DepInfo.isClobber())
     return false;
 
@@ -1269,16 +1269,16 @@ bool MemCpyOptPass::processByValArgument(CallSite CS, unsigned ArgNo) {
 
   // Get the alignment of the byval.  If the call doesn't specify the alignment,
   // then it is some target specific value that we can't know.
-  unsigned ByValAlign = CS.getParamAlignment(ArgNo);
-  if (ByValAlign == 0) return false;
+  MaybeAlign ByValAlign = CB.getParamAlign(ArgNo);
+  if (!ByValAlign) return false;
 
   // If it is greater than the memcpy, then we check to see if we can force the
   // source of the memcpy to the alignment we need.  If we fail, we bail out.
   AssumptionCache &AC = LookupAssumptionCache();
   DominatorTree &DT = LookupDomTree();
-  if (MDep->getSourceAlignment() < ByValAlign &&
-      getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign, DL,
-                                 CS.getInstruction(), &AC, &DT) < ByValAlign)
+  if (MDep->getSourceAlign() < ByValAlign &&
+      getOrEnforceKnownAlignment(MDep->getSource(), ByValAlign->value(), DL,
+                                 &CB, &AC, &DT) < ByValAlign->value())
     return false;
 
   // The address space of the memcpy source must match the byval argument
@@ -1297,14 +1297,14 @@ bool MemCpyOptPass::processByValArgument(CallSite CS, unsigned ArgNo) {
   // not just the defining memcpy.
   MemDepResult SourceDep = MD->getPointerDependencyFrom(
       MemoryLocation::getForSource(MDep), false,
-      CS.getInstruction()->getIterator(), MDep->getParent());
+      CB.getIterator(), MDep->getParent());
   if (!SourceDep.isClobber() || SourceDep.getInst() != MDep)
     return false;
 
   Value *TmpCast = MDep->getSource();
   if (MDep->getSource()->getType() != ByValArg->getType()) {
     BitCastInst *TmpBitCast = new BitCastInst(MDep->getSource(), ByValArg->getType(),
-                                              "tmpcast", CS.getInstruction());
+                                              "tmpcast", &CB);
     // Set the tmpcast's DebugLoc to MDep's
     TmpBitCast->setDebugLoc(MDep->getDebugLoc());
     TmpCast = TmpBitCast;
@@ -1312,10 +1312,10 @@ bool MemCpyOptPass::processByValArgument(CallSite CS, unsigned ArgNo) {
 
   LLVM_DEBUG(dbgs() << "MemCpyOptPass: Forwarding memcpy to byval:\n"
                     << "  " << *MDep << "\n"
-                    << "  " << *CS.getInstruction() << "\n");
+                    << "  " << CB << "\n");
 
   // Otherwise we're good!  Update the byval argument.
-  CS.setArgument(ArgNo, TmpCast);
+  CB.setArgOperand(ArgNo, TmpCast);
   ++NumMemCpyInstr;
   return true;
 }
@@ -1349,10 +1349,10 @@ bool MemCpyOptPass::iterateOnFunction(Function &F) {
         RepeatInstruction = processMemCpy(M);
       else if (MemMoveInst *M = dyn_cast<MemMoveInst>(I))
         RepeatInstruction = processMemMove(M);
-      else if (auto CS = CallSite(I)) {
-        for (unsigned i = 0, e = CS.arg_size(); i != e; ++i)
-          if (CS.isByValArgument(i))
-            MadeChange |= processByValArgument(CS, i);
+      else if (auto *CB = dyn_cast<CallBase>(I)) {
+        for (unsigned i = 0, e = CB->arg_size(); i != e; ++i)
+          if (CB->isByValArgument(i))
+            MadeChange |= processByValArgument(*CB, i);
       }
 
       // Reprocess the instruction if desired.

From d1ef44982f36b8783119fa18d6ef9edbd9617027 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Fri, 17 Apr 2020 19:43:06 +0200
Subject: [PATCH 195/216] [AArch64] Fold one-use variables into assert

Avoids unused variable warnings in Release builds.
---
 llvm/lib/Target/AArch64/AArch64ISelLowering.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index a52dd21d44b1..a0c17a9f0704 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -3150,9 +3150,7 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
     if (!Ty.isVector())
       report_fatal_error("Unexpected type for aarch64_neon_vsli");
 
-    uint64_t ShiftAmount = Op.getConstantOperandVal(3);
-    unsigned ElemSizeInBits = Ty.getScalarSizeInBits();
-    assert(ShiftAmount <= ElemSizeInBits);
+    assert(Op.getConstantOperandVal(3) <= Ty.getScalarSizeInBits());
 
     bool IsShiftRight = IntNo == Intrinsic::aarch64_neon_vsri;
     unsigned Opcode = IsShiftRight ? AArch64ISD::VSRI : AArch64ISD::VSLI;

From 5f6d93c7d3165938ed4602013f4b89929b1f9682 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Fri, 17 Apr 2020 10:33:59 -0700
Subject: [PATCH 196/216] [CallSite removal][Attributor] Replaces use of
 CallSite with CallBase. NFC

Differential Revision: https://reviews.llvm.org/D78343
---
 llvm/include/llvm/Transforms/IPO/Attributor.h |  27 +--
 llvm/lib/Transforms/IPO/Attributor.cpp        |  91 +++++----
 .../Transforms/IPO/AttributorAttributes.cpp   | 173 +++++++++---------
 3 files changed, 134 insertions(+), 157 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 2363a74d211b..99989775bde6 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -217,23 +217,6 @@ struct IRPosition {
     return IRPosition(const_cast<CallBase &>(CB), Kind(ArgNo));
   }
 
-  /// Create a position describing the function scope of \p ICS.
-  static const IRPosition callsite_function(ImmutableCallSite ICS) {
-    return IRPosition::callsite_function(cast<CallBase>(*ICS.getInstruction()));
-  }
-
-  /// Create a position describing the returned value of \p ICS.
-  static const IRPosition callsite_returned(ImmutableCallSite ICS) {
-    return IRPosition::callsite_returned(cast<CallBase>(*ICS.getInstruction()));
-  }
-
-  /// Create a position describing the argument of \p ICS at position \p ArgNo.
-  static const IRPosition callsite_argument(ImmutableCallSite ICS,
-                                            unsigned ArgNo) {
-    return IRPosition::callsite_argument(cast<CallBase>(*ICS.getInstruction()),
-                                         ArgNo);
-  }
-
   /// Create a position describing the argument of \p ACS at position \p ArgNo.
   static const IRPosition callsite_argument(AbstractCallSite ACS,
                                             unsigned ArgNo) {
@@ -418,9 +401,9 @@ struct IRPosition {
       return;
 
     AttributeList AttrList;
-    CallSite CS = CallSite(&getAnchorValue());
-    if (CS)
-      AttrList = CS.getAttributes();
+    auto *CB = dyn_cast<CallBase>(&getAnchorValue());
+    if (CB)
+      AttrList = CB->getAttributes();
     else
       AttrList = getAssociatedFunction()->getAttributes();
 
@@ -428,8 +411,8 @@ struct IRPosition {
     for (Attribute::AttrKind AK : AKs)
       AttrList = AttrList.removeAttribute(Ctx, getAttrIdx(), AK);
 
-    if (CS)
-      CS.setAttributes(AttrList);
+    if (CB)
+      CB->setAttributes(AttrList);
     else
       getAssociatedFunction()->setAttributes(AttrList);
   }
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index da1cbcc90be5..bf2e0c3c864b 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -228,7 +228,7 @@ IRAttributeManifest::manifestAttrs(Attributor &A, const IRPosition &IRP,
   case IRPosition::IRP_CALL_SITE:
   case IRPosition::IRP_CALL_SITE_RETURNED:
   case IRPosition::IRP_CALL_SITE_ARGUMENT:
-    Attrs = ImmutableCallSite(&IRP.getAnchorValue()).getAttributes();
+    Attrs = cast<CallBase>(IRP.getAnchorValue()).getAttributes();
     break;
   }
 
@@ -253,7 +253,7 @@ IRAttributeManifest::manifestAttrs(Attributor &A, const IRPosition &IRP,
   case IRPosition::IRP_CALL_SITE:
   case IRPosition::IRP_CALL_SITE_RETURNED:
   case IRPosition::IRP_CALL_SITE_ARGUMENT:
-    CallSite(&IRP.getAnchorValue()).setAttributes(Attrs);
+    cast<CallBase>(IRP.getAnchorValue()).setAttributes(Attrs);
     break;
   case IRPosition::IRP_INVALID:
   case IRPosition::IRP_FLOAT:
@@ -269,7 +269,7 @@ const IRPosition IRPosition::TombstoneKey(256);
 SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) {
   IRPositions.emplace_back(IRP);
 
-  ImmutableCallSite ICS(&IRP.getAnchorValue());
+  const auto *CB = dyn_cast<CallBase>(&IRP.getAnchorValue());
   switch (IRP.getPositionKind()) {
   case IRPosition::IRP_INVALID:
   case IRPosition::IRP_FLOAT:
@@ -280,41 +280,40 @@ SubsumingPositionIterator::SubsumingPositionIterator(const IRPosition &IRP) {
     IRPositions.emplace_back(IRPosition::function(*IRP.getAnchorScope()));
     return;
   case IRPosition::IRP_CALL_SITE:
-    assert(ICS && "Expected call site!");
+    assert(CB && "Expected call site!");
     // TODO: We need to look at the operand bundles similar to the redirection
     //       in CallBase.
-    if (!ICS.hasOperandBundles())
-      if (const Function *Callee = ICS.getCalledFunction())
+    if (!CB->hasOperandBundles())
+      if (const Function *Callee = CB->getCalledFunction())
         IRPositions.emplace_back(IRPosition::function(*Callee));
     return;
   case IRPosition::IRP_CALL_SITE_RETURNED:
-    assert(ICS && "Expected call site!");
+    assert(CB && "Expected call site!");
     // TODO: We need to look at the operand bundles similar to the redirection
     //       in CallBase.
-    if (!ICS.hasOperandBundles()) {
-      if (const Function *Callee = ICS.getCalledFunction()) {
+    if (!CB->hasOperandBundles()) {
+      if (const Function *Callee = CB->getCalledFunction()) {
         IRPositions.emplace_back(IRPosition::returned(*Callee));
         IRPositions.emplace_back(IRPosition::function(*Callee));
         for (const Argument &Arg : Callee->args())
           if (Arg.hasReturnedAttr()) {
             IRPositions.emplace_back(
-                IRPosition::callsite_argument(ICS, Arg.getArgNo()));
+                IRPosition::callsite_argument(*CB, Arg.getArgNo()));
             IRPositions.emplace_back(
-                IRPosition::value(*ICS.getArgOperand(Arg.getArgNo())));
+                IRPosition::value(*CB->getArgOperand(Arg.getArgNo())));
             IRPositions.emplace_back(IRPosition::argument(Arg));
           }
       }
     }
-    IRPositions.emplace_back(
-        IRPosition::callsite_function(cast<CallBase>(*ICS.getInstruction())));
+    IRPositions.emplace_back(IRPosition::callsite_function(*CB));
     return;
   case IRPosition::IRP_CALL_SITE_ARGUMENT: {
     int ArgNo = IRP.getArgNo();
-    assert(ICS && ArgNo >= 0 && "Expected call site!");
+    assert(CB && ArgNo >= 0 && "Expected call site!");
     // TODO: We need to look at the operand bundles similar to the redirection
     //       in CallBase.
-    if (!ICS.hasOperandBundles()) {
-      const Function *Callee = ICS.getCalledFunction();
+    if (!CB->hasOperandBundles()) {
+      const Function *Callee = CB->getCalledFunction();
       if (Callee && Callee->arg_size() > unsigned(ArgNo))
         IRPositions.emplace_back(IRPosition::argument(*Callee->getArg(ArgNo)));
       if (Callee)
@@ -369,8 +368,8 @@ bool IRPosition::getAttrsFromIRAttr(Attribute::AttrKind AK,
     return false;
 
   AttributeList AttrList;
-  if (ImmutableCallSite ICS = ImmutableCallSite(&getAnchorValue()))
-    AttrList = ICS.getAttributes();
+  if (const auto *CB = dyn_cast<CallBase>(&getAnchorValue()))
+    AttrList = CB->getAttributes();
   else
     AttrList = getAssociatedFunction()->getAttributes();
 
@@ -510,12 +509,12 @@ bool Attributor::isAssumedDead(const Use &U,
     return isAssumedDead(IRPosition::value(*U.get()), QueryingAA, FnLivenessAA,
                          CheckBBLivenessOnly, DepClass);
 
-  if (CallSite CS = CallSite(UserI)) {
+  if (auto *CB = dyn_cast<CallBase>(UserI)) {
     // For call site argument uses we can check if the argument is
     // unused/dead.
-    if (CS.isArgOperand(&U)) {
+    if (CB->isArgOperand(&U)) {
       const IRPosition &CSArgPos =
-          IRPosition::callsite_argument(CS, CS.getArgumentNo(&U));
+          IRPosition::callsite_argument(*CB, CB->getArgOperandNo(&U));
       return isAssumedDead(CSArgPos, QueryingAA, FnLivenessAA,
                            CheckBBLivenessOnly, DepClass);
     }
@@ -1617,8 +1616,8 @@ void InformationCache::initializeInformationCache(const Function &CF,
     // Note: There are no concrete attributes now so this is initially empty.
     switch (I.getOpcode()) {
     default:
-      assert((!ImmutableCallSite(&I)) && (!isa<CallBase>(&I)) &&
-             "New call site/base instruction type needs to be known in the "
+      assert(!isa<CallBase>(&I) &&
+             "New call base instruction type needs to be known in the "
              "Attributor.");
       break;
     case Instruction::Call:
@@ -1687,8 +1686,8 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
   InformationCache::FunctionInfo &FI = InfoCache.getFunctionInfo(F);
   if (!isModulePass() && !FI.CalledViaMustTail) {
     for (const Use &U : F.uses())
-      if (ImmutableCallSite ICS = ImmutableCallSite(U.getUser()))
-        if (ICS.isCallee(&U) && ICS.isMustTailCall())
+      if (const auto *CB = dyn_cast<CallBase>(U.getUser()))
+        if (CB->isCallee(&U) && CB->isMustTailCall())
           FI.CalledViaMustTail = true;
   }
 
@@ -1800,14 +1799,14 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
   }
 
   auto CallSitePred = [&](Instruction &I) -> bool {
-    CallSite CS(&I);
-    IRPosition CSRetPos = IRPosition::callsite_returned(CS);
+    auto *CB = dyn_cast<CallBase>(&I);
+    IRPosition CBRetPos = IRPosition::callsite_returned(*CB);
 
     // Call sites might be dead if they do not have side effects and no live
     // users. The return value might be dead if there are no live users.
-    getOrCreateAAFor<AAIsDead>(CSRetPos);
+    getOrCreateAAFor<AAIsDead>(CBRetPos);
 
-    Function *Callee = CS.getCalledFunction();
+    Function *Callee = CB->getCalledFunction();
     // TODO: Even if the callee is not known now we might be able to simplify
     //       the call/callee.
     if (!Callee)
@@ -1819,46 +1818,46 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
         !Callee->hasMetadata(LLVMContext::MD_callback))
       return true;
 
-    if (!Callee->getReturnType()->isVoidTy() && !CS->use_empty()) {
+    if (!Callee->getReturnType()->isVoidTy() && !CB->use_empty()) {
 
-      IRPosition CSRetPos = IRPosition::callsite_returned(CS);
+      IRPosition CBRetPos = IRPosition::callsite_returned(*CB);
 
       // Call site return integer values might be limited by a constant range.
       if (Callee->getReturnType()->isIntegerTy())
-        getOrCreateAAFor<AAValueConstantRange>(CSRetPos);
+        getOrCreateAAFor<AAValueConstantRange>(CBRetPos);
     }
 
-    for (int i = 0, e = CS.getNumArgOperands(); i < e; i++) {
+    for (int I = 0, E = CB->getNumArgOperands(); I < E; ++I) {
 
-      IRPosition CSArgPos = IRPosition::callsite_argument(CS, i);
+      IRPosition CBArgPos = IRPosition::callsite_argument(*CB, I);
 
       // Every call site argument might be dead.
-      getOrCreateAAFor<AAIsDead>(CSArgPos);
+      getOrCreateAAFor<AAIsDead>(CBArgPos);
 
       // Call site argument might be simplified.
-      getOrCreateAAFor<AAValueSimplify>(CSArgPos);
+      getOrCreateAAFor<AAValueSimplify>(CBArgPos);
 
-      if (!CS.getArgument(i)->getType()->isPointerTy())
+      if (!CB->getArgOperand(I)->getType()->isPointerTy())
         continue;
 
       // Call site argument attribute "non-null".
-      getOrCreateAAFor<AANonNull>(CSArgPos);
+      getOrCreateAAFor<AANonNull>(CBArgPos);
 
       // Call site argument attribute "no-alias".
-      getOrCreateAAFor<AANoAlias>(CSArgPos);
+      getOrCreateAAFor<AANoAlias>(CBArgPos);
 
       // Call site argument attribute "dereferenceable".
-      getOrCreateAAFor<AADereferenceable>(CSArgPos);
+      getOrCreateAAFor<AADereferenceable>(CBArgPos);
 
       // Call site argument attribute "align".
-      getOrCreateAAFor<AAAlign>(CSArgPos);
+      getOrCreateAAFor<AAAlign>(CBArgPos);
 
       // Call site argument attribute
       // "readnone/readonly/writeonly/..."
-      getOrCreateAAFor<AAMemoryBehavior>(CSArgPos);
+      getOrCreateAAFor<AAMemoryBehavior>(CBArgPos);
 
       // Call site argument attribute "nofree".
-      getOrCreateAAFor<AANoFree>(CSArgPos);
+      getOrCreateAAFor<AANoFree>(CBArgPos);
     }
     return true;
   };
@@ -1983,9 +1982,9 @@ static bool runAttributorOnFunctions(InformationCache &InfoCache,
     // do it eagerly.
     if (F->hasLocalLinkage()) {
       if (llvm::all_of(F->uses(), [&Functions](const Use &U) {
-            ImmutableCallSite ICS(U.getUser());
-            return ICS && ICS.isCallee(&U) &&
-                   Functions.count(const_cast<Function *>(ICS.getCaller()));
+            const auto *CB = dyn_cast<CallBase>(U.getUser());
+            return CB && CB->isCallee(&U) &&
+                   Functions.count(const_cast<Function *>(CB->getCaller()));
           }))
         continue;
     }
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 9ea314f06888..fb9a3e2e6f9c 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -280,11 +280,11 @@ static bool genericValueTraversal(
     if (V->getType()->isPointerTy()) {
       NewV = V->stripPointerCasts();
     } else {
-      CallSite CS(V);
-      if (CS && CS.getCalledFunction()) {
-        for (Argument &Arg : CS.getCalledFunction()->args())
+      auto *CB = dyn_cast<CallBase>(V);
+      if (CB && CB->getCalledFunction()) {
+        for (Argument &Arg : CB->getCalledFunction()->args())
           if (Arg.hasReturnedAttr()) {
-            NewV = CS.getArgOperand(Arg.getArgNo());
+            NewV = CB->getArgOperand(Arg.getArgNo());
             break;
           }
       }
@@ -688,9 +688,9 @@ struct AANoUnwindImpl : AANoUnwind {
       if (!I.mayThrow())
         return true;
 
-      if (ImmutableCallSite ICS = ImmutableCallSite(&I)) {
+      if (const auto *CB = dyn_cast<CallBase>(&I)) {
         const auto &NoUnwindAA =
-            A.getAAFor<AANoUnwind>(*this, IRPosition::callsite_function(ICS));
+            A.getAAFor<AANoUnwind>(*this, IRPosition::callsite_function(*CB));
         return NoUnwindAA.isAssumedNoUnwind();
       }
       return false;
@@ -1273,8 +1273,7 @@ bool AANoSyncImpl::isNoSyncIntrinsic(Instruction *I) {
 }
 
 bool AANoSyncImpl::isVolatile(Instruction *I) {
-  assert(!ImmutableCallSite(I) && !isa<CallBase>(I) &&
-         "Calls should not be checked here");
+  assert(!isa<CallBase>(I) && "Calls should not be checked here");
 
   switch (I->getOpcode()) {
   case Instruction::AtomicRMW:
@@ -1299,12 +1298,12 @@ ChangeStatus AANoSyncImpl::updateImpl(Attributor &A) {
     if (isa<IntrinsicInst>(&I) && isNoSyncIntrinsic(&I))
       return true;
 
-    if (ImmutableCallSite ICS = ImmutableCallSite(&I)) {
-      if (ICS.hasFnAttr(Attribute::NoSync))
+    if (const auto *CB = dyn_cast<CallBase>(&I)) {
+      if (CB->hasFnAttr(Attribute::NoSync))
         return true;
 
       const auto &NoSyncAA =
-          A.getAAFor<AANoSync>(*this, IRPosition::callsite_function(ICS));
+          A.getAAFor<AANoSync>(*this, IRPosition::callsite_function(*CB));
       if (NoSyncAA.isAssumedNoSync())
         return true;
       return false;
@@ -1323,7 +1322,7 @@ ChangeStatus AANoSyncImpl::updateImpl(Attributor &A) {
       return true;
 
     // non-convergent and readnone imply nosync.
-    return !ImmutableCallSite(&I).isConvergent();
+    return !cast<CallBase>(I).isConvergent();
   };
 
   if (!A.checkForAllReadWriteInstructions(CheckRWInstForNoSync, *this) ||
@@ -1377,12 +1376,12 @@ struct AANoFreeImpl : public AANoFree {
   /// See AbstractAttribute::updateImpl(...).
   ChangeStatus updateImpl(Attributor &A) override {
     auto CheckForNoFree = [&](Instruction &I) {
-      ImmutableCallSite ICS(&I);
-      if (ICS.hasFnAttr(Attribute::NoFree))
+      const auto &CB = cast<CallBase>(I);
+      if (CB.hasFnAttr(Attribute::NoFree))
         return true;
 
       const auto &NoFreeAA =
-          A.getAAFor<AANoFree>(*this, IRPosition::callsite_function(ICS));
+          A.getAAFor<AANoFree>(*this, IRPosition::callsite_function(CB));
       return NoFreeAA.isAssumedNoFree();
     };
 
@@ -1559,17 +1558,17 @@ static int64_t getKnownNonNullAndDerefBytesForUse(
   bool NullPointerIsDefined =
       F ? llvm::NullPointerIsDefined(F, PtrTy->getPointerAddressSpace()) : true;
   const DataLayout &DL = A.getInfoCache().getDL();
-  if (ImmutableCallSite ICS = ImmutableCallSite(I)) {
-    if (ICS.isBundleOperand(U))
+  if (const auto *CB = dyn_cast<CallBase>(I)) {
+    if (CB->isBundleOperand(U))
       return 0;
 
-    if (ICS.isCallee(U)) {
+    if (CB->isCallee(U)) {
       IsNonNull |= !NullPointerIsDefined;
       return 0;
     }
 
-    unsigned ArgNo = ICS.getArgumentNo(U);
-    IRPosition IRP = IRPosition::callsite_argument(ICS, ArgNo);
+    unsigned ArgNo = CB->getArgOperandNo(U);
+    IRPosition IRP = IRPosition::callsite_argument(*CB, ArgNo);
     // As long as we only use known information there is no need to track
     // dependences here.
     auto &DerefAA = A.getAAFor<AADereferenceable>(QueryingAA, IRP,
@@ -1803,17 +1802,17 @@ struct AANoRecurseFunction final : AANoRecurseImpl {
 
     // If the above check does not hold anymore we look at the calls.
     auto CheckForNoRecurse = [&](Instruction &I) {
-      ImmutableCallSite ICS(&I);
-      if (ICS.hasFnAttr(Attribute::NoRecurse))
+      const auto &CB = cast<CallBase>(I);
+      if (CB.hasFnAttr(Attribute::NoRecurse))
         return true;
 
       const auto &NoRecurseAA =
-          A.getAAFor<AANoRecurse>(*this, IRPosition::callsite_function(ICS));
+          A.getAAFor<AANoRecurse>(*this, IRPosition::callsite_function(CB));
       if (!NoRecurseAA.isAssumedNoRecurse())
         return false;
 
       // Recursion to the same function
-      if (ICS.getCalledFunction() == getAnchorScope())
+      if (CB.getCalledFunction() == getAnchorScope())
         return false;
 
       return true;
@@ -2114,7 +2113,7 @@ struct AAWillReturnImpl : public AAWillReturn {
   /// See AbstractAttribute::updateImpl(...).
   ChangeStatus updateImpl(Attributor &A) override {
     auto CheckForWillReturn = [&](Instruction &I) {
-      IRPosition IPos = IRPosition::callsite_function(ImmutableCallSite(&I));
+      IRPosition IPos = IRPosition::callsite_function(cast<CallBase>(I));
       const auto &WillReturnAA = A.getAAFor<AAWillReturn>(*this, IPos);
       if (WillReturnAA.isKnownWillReturn())
         return true;
@@ -2321,8 +2320,8 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl {
   /// See AbstractAttribute::initialize(...).
   void initialize(Attributor &A) override {
     // See callsite argument attribute and callee argument attribute.
-    ImmutableCallSite ICS(&getAnchorValue());
-    if (ICS.paramHasAttr(getArgNo(), Attribute::NoAlias))
+    const auto &CB = cast<CallBase>(getAnchorValue());
+    if (CB.paramHasAttr(getArgNo(), Attribute::NoAlias))
       indicateOptimisticFixpoint();
     Value &Val = getAssociatedValue();
     if (isa<ConstantPointerNull>(Val) &&
@@ -2335,32 +2334,32 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl {
   /// \p OtherArgNo of \p ICS (= the underlying call site).
   bool mayAliasWithArgument(Attributor &A, AAResults *&AAR,
                             const AAMemoryBehavior &MemBehaviorAA,
-                            ImmutableCallSite ICS, unsigned OtherArgNo) {
+                            const CallBase &CB, unsigned OtherArgNo) {
     // We do not need to worry about aliasing with the underlying IRP.
     if (this->getArgNo() == (int)OtherArgNo)
       return false;
 
     // If it is not a pointer or pointer vector we do not alias.
-    const Value *ArgOp = ICS.getArgOperand(OtherArgNo);
+    const Value *ArgOp = CB.getArgOperand(OtherArgNo);
     if (!ArgOp->getType()->isPtrOrPtrVectorTy())
       return false;
 
-    auto &ICSArgMemBehaviorAA = A.getAAFor<AAMemoryBehavior>(
-        *this, IRPosition::callsite_argument(ICS, OtherArgNo),
+    auto &CBArgMemBehaviorAA = A.getAAFor<AAMemoryBehavior>(
+        *this, IRPosition::callsite_argument(CB, OtherArgNo),
         /* TrackDependence */ false);
 
     // If the argument is readnone, there is no read-write aliasing.
-    if (ICSArgMemBehaviorAA.isAssumedReadNone()) {
-      A.recordDependence(ICSArgMemBehaviorAA, *this, DepClassTy::OPTIONAL);
+    if (CBArgMemBehaviorAA.isAssumedReadNone()) {
+      A.recordDependence(CBArgMemBehaviorAA, *this, DepClassTy::OPTIONAL);
       return false;
     }
 
     // If the argument is readonly and the underlying value is readonly, there
     // is no read-write aliasing.
     bool IsReadOnly = MemBehaviorAA.isAssumedReadOnly();
-    if (ICSArgMemBehaviorAA.isAssumedReadOnly() && IsReadOnly) {
+    if (CBArgMemBehaviorAA.isAssumedReadOnly() && IsReadOnly) {
       A.recordDependence(MemBehaviorAA, *this, DepClassTy::OPTIONAL);
-      A.recordDependence(ICSArgMemBehaviorAA, *this, DepClassTy::OPTIONAL);
+      A.recordDependence(CBArgMemBehaviorAA, *this, DepClassTy::OPTIONAL);
       return false;
     }
 
@@ -2457,10 +2456,10 @@ struct AANoAliasCallSiteArgument final : AANoAliasImpl {
     // Check there is no other pointer argument which could alias with the
     // value passed at this call site.
     // TODO: AbstractCallSite
-    ImmutableCallSite ICS(&getAnchorValue());
-    for (unsigned OtherArgNo = 0; OtherArgNo < ICS.getNumArgOperands();
+    const auto &CB = cast<CallBase>(getAnchorValue());
+    for (unsigned OtherArgNo = 0; OtherArgNo < CB.getNumArgOperands();
          OtherArgNo++)
-      if (mayAliasWithArgument(A, AAR, MemBehaviorAA, ICS, OtherArgNo))
+      if (mayAliasWithArgument(A, AAR, MemBehaviorAA, CB, OtherArgNo))
         return false;
 
     return true;
@@ -2511,8 +2510,7 @@ struct AANoAliasReturned final : AANoAliasImpl {
 
       /// For now, we can only deduce noalias if we have call sites.
       /// FIXME: add more support.
-      ImmutableCallSite ICS(&RV);
-      if (!ICS)
+      if (!isa<CallBase>(&RV))
         return false;
 
       const IRPosition &RVPos = IRPosition::value(RV);
@@ -2984,8 +2982,8 @@ struct AAIsDeadFunction : public AAIsDead {
     // is a performance optimization for blocks with calls to a lot of internal
     // functions. It can however cause dead functions to be treated as live.
     for (const Instruction &I : BB)
-      if (ImmutableCallSite ICS = ImmutableCallSite(&I))
-        if (const Function *F = ICS.getCalledFunction())
+      if (const auto *CB = dyn_cast<CallBase>(&I))
+        if (const Function *F = CB->getCalledFunction())
           if (F->hasLocalLinkage())
             A.markLiveInternalFunction(*F);
     return true;
@@ -3477,12 +3475,12 @@ static unsigned getKnownAlignForUse(Attributor &A,
   }
 
   MaybeAlign MA;
-  if (ImmutableCallSite ICS = ImmutableCallSite(I)) {
-    if (ICS.isBundleOperand(U) || ICS.isCallee(U))
+  if (const auto *CB = dyn_cast<CallBase>(I)) {
+    if (CB->isBundleOperand(U) || CB->isCallee(U))
       return 0;
 
-    unsigned ArgNo = ICS.getArgumentNo(U);
-    IRPosition IRP = IRPosition::callsite_argument(ICS, ArgNo);
+    unsigned ArgNo = CB->getArgOperandNo(U);
+    IRPosition IRP = IRPosition::callsite_argument(*CB, ArgNo);
     // As long as we only use known information there is no need to track
     // dependences here.
     auto &AlignAA = A.getAAFor<AAAlign>(QueryingAA, IRP,
@@ -3985,13 +3983,13 @@ struct AACaptureUseTracker final : public CaptureTracker {
 
     // For now we only use special logic for call sites. However, the tracker
     // itself knows about a lot of other non-capturing cases already.
-    CallSite CS(UInst);
-    if (!CS || !CS.isArgOperand(U))
+    auto *CB = dyn_cast<CallBase>(UInst);
+    if (!CB || !CB->isArgOperand(U))
       return isCapturedIn(/* Memory */ true, /* Integer */ true,
                           /* Return */ true);
 
-    unsigned ArgNo = CS.getArgumentNo(U);
-    const IRPosition &CSArgPos = IRPosition::callsite_argument(CS, ArgNo);
+    unsigned ArgNo = CB->getArgOperandNo(U);
+    const IRPosition &CSArgPos = IRPosition::callsite_argument(*CB, ArgNo);
     // If we have a abstract no-capture attribute for the argument we can use
     // it to justify a non-capture attribute here. This allows recursion!
     auto &ArgNoCaptureAA = A.getAAFor<AANoCapture>(NoCaptureAA, CSArgPos);
@@ -3999,7 +3997,7 @@ struct AACaptureUseTracker final : public CaptureTracker {
       return isCapturedIn(/* Memory */ false, /* Integer */ false,
                           /* Return */ false);
     if (ArgNoCaptureAA.isAssumedNoCaptureMaybeReturned()) {
-      addPotentialCopy(CS);
+      addPotentialCopy(*CB);
       return isCapturedIn(/* Memory */ false, /* Integer */ false,
                           /* Return */ false);
     }
@@ -4010,9 +4008,7 @@ struct AACaptureUseTracker final : public CaptureTracker {
   }
 
   /// Register \p CS as potential copy of the value we are checking.
-  void addPotentialCopy(CallSite CS) {
-    PotentialCopies.push_back(CS.getInstruction());
-  }
+  void addPotentialCopy(CallBase &CB) { PotentialCopies.push_back(&CB); }
 
   /// See CaptureTracker::shouldExplore(...).
   bool shouldExplore(const Use *U) override {
@@ -4992,10 +4988,9 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
 
     // Helper to check if for the given call site the associated argument is
     // passed to a callback where the privatization would be different.
-    auto IsCompatiblePrivArgOfCallback = [&](CallSite CS) {
+    auto IsCompatiblePrivArgOfCallback = [&](CallBase &CB) {
       SmallVector<const Use *, 4> CallbackUses;
-      AbstractCallSite::getCallbackUses(cast<CallBase>(*CS.getInstruction()),
-                                        CallbackUses);
+      AbstractCallSite::getCallbackUses(CB, CallbackUses);
       for (const Use *U : CallbackUses) {
         AbstractCallSite CBACS(U);
         assert(CBACS && CBACS.isCallbackCall());
@@ -5012,7 +5007,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
                 << CBArgNo << "@" << CBACS.getCalledFunction()->getName()
                 << ")\n[AAPrivatizablePtr] " << CBArg << " : "
                 << CBACS.getCallArgOperand(CBArg) << " vs "
-                << CS.getArgOperand(ArgNo) << "\n"
+                << CB.getArgOperand(ArgNo) << "\n"
                 << "[AAPrivatizablePtr] " << CBArg << " : "
                 << CBACS.getCallArgOperandNo(CBArg) << " vs " << ArgNo << "\n";
           });
@@ -5094,7 +5089,7 @@ struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
     // here.
     auto IsCompatiblePrivArgOfOtherCallSite = [&](AbstractCallSite ACS) {
       if (ACS.isDirectCall())
-        return IsCompatiblePrivArgOfCallback(CallSite(ACS.getInstruction()));
+        return IsCompatiblePrivArgOfCallback(*ACS.getInstruction());
       if (ACS.isCallbackCall())
         return IsCompatiblePrivArgOfDirectCS(ACS);
       return false;
@@ -5727,9 +5722,9 @@ ChangeStatus AAMemoryBehaviorFunction::updateImpl(Attributor &A) {
     // If the instruction has an own memory behavior state, use it to restrict
     // the local state. No further analysis is required as the other memory
     // state is as optimistic as it gets.
-    if (ImmutableCallSite ICS = ImmutableCallSite(&I)) {
+    if (const auto *CB = dyn_cast<CallBase>(&I)) {
       const auto &MemBehaviorAA = A.getAAFor<AAMemoryBehavior>(
-          *this, IRPosition::callsite_function(ICS));
+          *this, IRPosition::callsite_function(*CB));
       intersectAssumedBits(MemBehaviorAA.getAssumed());
       return !isAtFixpoint();
     }
@@ -5827,8 +5822,8 @@ bool AAMemoryBehaviorFloating::followUsersOfUseIn(Attributor &A, const Use *U,
 
   // By default we follow all uses assuming UserI might leak information on U,
   // we have special handling for call sites operands though.
-  ImmutableCallSite ICS(UserI);
-  if (!ICS || !ICS.isArgOperand(U))
+  const auto *CB = dyn_cast<CallBase>(UserI);
+  if (!CB || !CB->isArgOperand(U))
     return true;
 
   // If the use is a call argument known not to be captured, the users of
@@ -5838,9 +5833,9 @@ bool AAMemoryBehaviorFloating::followUsersOfUseIn(Attributor &A, const Use *U,
   // call might the argument "through return", which we allow and for which we
   // need to check call users.
   if (U->get()->getType()->isPointerTy()) {
-    unsigned ArgNo = ICS.getArgumentNo(U);
+    unsigned ArgNo = CB->getArgOperandNo(U);
     const auto &ArgNoCaptureAA = A.getAAFor<AANoCapture>(
-        *this, IRPosition::callsite_argument(ICS, ArgNo),
+        *this, IRPosition::callsite_argument(*CB, ArgNo),
         /* TrackDependence */ true, DepClassTy::OPTIONAL);
     return !ArgNoCaptureAA.isAssumedNoCapture();
   }
@@ -5874,17 +5869,17 @@ void AAMemoryBehaviorFloating::analyzeUseIn(Attributor &A, const Use *U,
   case Instruction::Invoke: {
     // For call sites we look at the argument memory behavior attribute (this
     // could be recursive!) in order to restrict our own state.
-    ImmutableCallSite ICS(UserI);
+    const auto *CB = cast<CallBase>(UserI);
 
     // Give up on operand bundles.
-    if (ICS.isBundleOperand(U)) {
+    if (CB->isBundleOperand(U)) {
       indicatePessimisticFixpoint();
       return;
     }
 
     // Calling a function does read the function pointer, maybe write it if the
     // function is self-modifying.
-    if (ICS.isCallee(U)) {
+    if (CB->isCallee(U)) {
       removeAssumedBits(NO_READS);
       break;
     }
@@ -5893,9 +5888,9 @@ void AAMemoryBehaviorFloating::analyzeUseIn(Attributor &A, const Use *U,
     // argument.
     IRPosition Pos;
     if (U->get()->getType()->isPointerTy())
-      Pos = IRPosition::callsite_argument(ICS, ICS.getArgumentNo(U));
+      Pos = IRPosition::callsite_argument(*CB, CB->getArgOperandNo(U));
     else
-      Pos = IRPosition::callsite_function(ICS);
+      Pos = IRPosition::callsite_function(*CB);
     const auto &MemBehaviorAA = A.getAAFor<AAMemoryBehavior>(
         *this, Pos,
         /* TrackDependence */ true, DepClassTy::OPTIONAL);
@@ -6184,9 +6179,9 @@ void AAMemoryLocationImpl::categorizePtrValue(
                                 Changed);
       return true;
     }
-    if (ImmutableCallSite ICS = ImmutableCallSite(&V)) {
+    if (const auto *CB = dyn_cast<CallBase>(&V)) {
       const auto &NoAliasAA =
-          A.getAAFor<AANoAlias>(*this, IRPosition::callsite_returned(ICS));
+          A.getAAFor<AANoAlias>(*this, IRPosition::callsite_returned(*CB));
       if (NoAliasAA.isAssumedNoAlias()) {
         updateStateAndAccessesMap(T, AccessKindAccessesMap, NO_MALLOCED_MEM, &I,
                                   &V, Changed);
@@ -6226,32 +6221,32 @@ AAMemoryLocationImpl::categorizeAccessedLocations(Attributor &A, Instruction &I,
   AAMemoryLocation::StateType AccessedLocs;
   AccessedLocs.intersectAssumedBits(NO_LOCATIONS);
 
-  if (ImmutableCallSite ICS = ImmutableCallSite(&I)) {
+  if (auto *CB = dyn_cast<CallBase>(&I)) {
 
     // First check if we assume any memory is access is visible.
-    const auto &ICSMemLocationAA =
-        A.getAAFor<AAMemoryLocation>(*this, IRPosition::callsite_function(ICS));
+    const auto &CBMemLocationAA =
+        A.getAAFor<AAMemoryLocation>(*this, IRPosition::callsite_function(*CB));
     LLVM_DEBUG(dbgs() << "[AAMemoryLocation] Categorize call site: " << I
-                      << " [" << ICSMemLocationAA << "]\n");
+                      << " [" << CBMemLocationAA << "]\n");
 
-    if (ICSMemLocationAA.isAssumedReadNone())
+    if (CBMemLocationAA.isAssumedReadNone())
       return NO_LOCATIONS;
 
-    if (ICSMemLocationAA.isAssumedInaccessibleMemOnly()) {
+    if (CBMemLocationAA.isAssumedInaccessibleMemOnly()) {
       updateStateAndAccessesMap(AccessedLocs, AccessKindAccessesMap,
                                 NO_INACCESSIBLE_MEM, &I, nullptr, Changed);
       return AccessedLocs.getAssumed();
     }
 
-    uint32_t ICSAssumedNotAccessedLocs =
-        ICSMemLocationAA.getAssumedNotAccessedLocation();
+    uint32_t CBAssumedNotAccessedLocs =
+        CBMemLocationAA.getAssumedNotAccessedLocation();
 
     // Set the argmemonly and global bit as we handle them separately below.
-    uint32_t ICSAssumedNotAccessedLocsNoArgMem =
-        ICSAssumedNotAccessedLocs | NO_ARGUMENT_MEM | NO_GLOBAL_MEM;
+    uint32_t CBAssumedNotAccessedLocsNoArgMem =
+        CBAssumedNotAccessedLocs | NO_ARGUMENT_MEM | NO_GLOBAL_MEM;
 
     for (MemoryLocationsKind CurMLK = 1; CurMLK < NO_LOCATIONS; CurMLK *= 2) {
-      if (ICSAssumedNotAccessedLocsNoArgMem & CurMLK)
+      if (CBAssumedNotAccessedLocsNoArgMem & CurMLK)
         continue;
       updateStateAndAccessesMap(AccessedLocs, AccessKindAccessesMap, CurMLK, &I,
                                 nullptr, Changed);
@@ -6259,7 +6254,7 @@ AAMemoryLocationImpl::categorizeAccessedLocations(Attributor &A, Instruction &I,
 
     // Now handle global memory if it might be accessed. This is slightly tricky
     // as NO_GLOBAL_MEM has multiple bits set.
-    bool HasGlobalAccesses = ((~ICSAssumedNotAccessedLocs) & NO_GLOBAL_MEM);
+    bool HasGlobalAccesses = ((~CBAssumedNotAccessedLocs) & NO_GLOBAL_MEM);
     if (HasGlobalAccesses) {
       auto AccessPred = [&](const Instruction *, const Value *Ptr,
                             AccessKind Kind, MemoryLocationsKind MLK) {
@@ -6267,7 +6262,7 @@ AAMemoryLocationImpl::categorizeAccessedLocations(Attributor &A, Instruction &I,
                                   Ptr, Changed);
         return true;
       };
-      if (!ICSMemLocationAA.checkForAllAccessesToMemoryKind(
+      if (!CBMemLocationAA.checkForAllAccessesToMemoryKind(
               AccessPred, inverseLocation(NO_GLOBAL_MEM, false, false)))
         return AccessedLocs.getWorstState();
     }
@@ -6277,18 +6272,18 @@ AAMemoryLocationImpl::categorizeAccessedLocations(Attributor &A, Instruction &I,
                << getMemoryLocationsAsStr(AccessedLocs.getAssumed()) << "\n");
 
     // Now handle argument memory if it might be accessed.
-    bool HasArgAccesses = ((~ICSAssumedNotAccessedLocs) & NO_ARGUMENT_MEM);
+    bool HasArgAccesses = ((~CBAssumedNotAccessedLocs) & NO_ARGUMENT_MEM);
     if (HasArgAccesses) {
-      for (unsigned ArgNo = 0, e = ICS.getNumArgOperands(); ArgNo < e;
+      for (unsigned ArgNo = 0, E = CB->getNumArgOperands(); ArgNo < E;
            ++ArgNo) {
 
         // Skip non-pointer arguments.
-        const Value *ArgOp = ICS.getArgOperand(ArgNo);
+        const Value *ArgOp = CB->getArgOperand(ArgNo);
         if (!ArgOp->getType()->isPtrOrPtrVectorTy())
           continue;
 
         // Skip readnone arguments.
-        const IRPosition &ArgOpIRP = IRPosition::callsite_argument(ICS, ArgNo);
+        const IRPosition &ArgOpIRP = IRPosition::callsite_argument(*CB, ArgNo);
         const auto &ArgOpMemLocationAA = A.getAAFor<AAMemoryBehavior>(
             *this, ArgOpIRP, /* TrackDependence */ true, DepClassTy::OPTIONAL);
 

From 5f0903e9bec97e67bf34d887bcbe9d05790de934 Mon Sep 17 00:00:00 2001
From: Erich Keane <erich.keane@intel.com>
Date: Fri, 17 Apr 2020 10:44:19 -0700
Subject: [PATCH 197/216] Reland Implement _ExtInt as an extended int type
 specifier.

I fixed the LLDB issue, so re-applying the patch.

This reverts commit a4b88c044980337bb14390be654fe76864aa60ec.
---
 clang/docs/LanguageExtensions.rst             |  53 +++
 clang/docs/ReleaseNotes.rst                   |   8 +
 clang/include/clang/AST/ASTContext.h          |  10 +
 clang/include/clang/AST/RecursiveASTVisitor.h |   9 +
 clang/include/clang/AST/Type.h                |  70 ++-
 clang/include/clang/AST/TypeLoc.h             |   6 +
 clang/include/clang/AST/TypeProperties.td     |  25 +
 .../clang/Basic/DiagnosticSemaKinds.td        |  14 +-
 clang/include/clang/Basic/Specifiers.h        |   1 +
 clang/include/clang/Basic/TokenKinds.def      |   1 +
 clang/include/clang/Basic/TypeNodes.td        |   2 +
 clang/include/clang/Parse/Parser.h            |   1 +
 clang/include/clang/Sema/DeclSpec.h           |   6 +-
 clang/include/clang/Sema/Sema.h               |   1 +
 .../clang/Serialization/TypeBitCodes.def      |   2 +
 clang/lib/AST/ASTContext.cpp                  |  67 +++
 clang/lib/AST/ASTStructuralEquivalence.cpp    |  18 +
 clang/lib/AST/ExprConstant.cpp                |   1 +
 clang/lib/AST/ItaniumMangle.cpp               |  24 +
 clang/lib/AST/MicrosoftMangle.cpp             |  24 +
 clang/lib/AST/Type.cpp                        |  63 ++-
 clang/lib/AST/TypePrinter.cpp                 |  24 +
 clang/lib/CodeGen/CGDebugInfo.cpp             |  13 +
 clang/lib/CodeGen/CGDebugInfo.h               |   1 +
 clang/lib/CodeGen/CGExprScalar.cpp            |  26 +-
 clang/lib/CodeGen/CGRecordLayoutBuilder.cpp   |   3 +-
 clang/lib/CodeGen/CodeGenFunction.cpp         |   2 +
 clang/lib/CodeGen/CodeGenTBAA.cpp             |   9 +
 clang/lib/CodeGen/CodeGenTypes.cpp            |  21 +-
 clang/lib/CodeGen/CodeGenTypes.h              |   2 +-
 clang/lib/CodeGen/ItaniumCXXABI.cpp           |   6 +-
 clang/lib/Parse/ParseDecl.cpp                 |  30 ++
 clang/lib/Parse/ParseExpr.cpp                 |   1 +
 clang/lib/Parse/ParseExprCXX.cpp              |  13 +
 clang/lib/Parse/ParseTentative.cpp            |  20 +
 clang/lib/Sema/DeclSpec.cpp                   |  28 +-
 clang/lib/Sema/SemaChecking.cpp               |   6 +
 clang/lib/Sema/SemaDecl.cpp                   |   8 +-
 clang/lib/Sema/SemaDeclAttr.cpp               |   5 +-
 clang/lib/Sema/SemaExpr.cpp                   |  15 +-
 clang/lib/Sema/SemaLookup.cpp                 |   1 +
 clang/lib/Sema/SemaTemplate.cpp               |  17 +-
 clang/lib/Sema/SemaTemplateDeduction.cpp      |  34 ++
 clang/lib/Sema/SemaTemplateVariadic.cpp       |   1 +
 clang/lib/Sema/SemaType.cpp                   |  66 +++
 clang/lib/Sema/TreeTransform.h                |  76 +++
 clang/lib/Serialization/ASTReader.cpp         |   9 +
 clang/lib/Serialization/ASTWriter.cpp         |   8 +
 clang/test/CodeGen/ext-int-sanitizer.cpp      | 265 +++++++++++
 clang/test/CodeGen/ext-int.c                  |  44 ++
 clang/test/CodeGenCXX/ext-int.cpp             | 432 ++++++++++++++++++
 clang/test/CodeGenOpenCL/ext-int-shift.cl     |  21 +
 clang/test/Parser/ext-int.cpp                 |  15 +
 clang/test/SemaCXX/ext-int.cpp                | 278 +++++++++++
 clang/tools/libclang/CIndex.cpp               |   2 +
 .../TypeSystem/Clang/TypeSystemClang.cpp      |  15 +-
 56 files changed, 1886 insertions(+), 37 deletions(-)
 create mode 100644 clang/test/CodeGen/ext-int-sanitizer.cpp
 create mode 100644 clang/test/CodeGen/ext-int.c
 create mode 100644 clang/test/CodeGenCXX/ext-int.cpp
 create mode 100644 clang/test/CodeGenOpenCL/ext-int-shift.cl
 create mode 100644 clang/test/Parser/ext-int.cpp
 create mode 100644 clang/test/SemaCXX/ext-int.cpp

diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index 929cd1c67e73..07062a191ce8 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -3461,3 +3461,56 @@ Since the size of ``buffer`` can't be known at compile time, Clang will fold
 ``__builtin_object_size(buffer, 0)`` into ``-1``. However, if this was written
 as ``__builtin_dynamic_object_size(buffer, 0)``, Clang will fold it into
 ``size``, providing some extra runtime safety.
+
+Extended Integer Types
+======================
+
+Clang supports a set of extended integer types under the syntax ``_ExtInt(N)``
+where ``N`` is an integer that specifies the number of bits that are used to represent
+the type, including the sign bit. The keyword ``_ExtInt`` is a type specifier, thus
+it can be used in any place a type can, including as a non-type-template-parameter,
+as the type of a bitfield, and as the underlying type of an enumeration.
+
+An extended integer can be declared either signed, or unsigned by using the
+``signed``/``unsigned`` keywords. If no sign specifier is used or if the ``signed``
+keyword is used, the extended integer type is a signed integer and can represent
+negative values.
+
+The ``N`` expression is an integer constant expression, which specifies the number
+of bits used to represent the type, following normal integer representations for
+both signed and unsigned types. Both a signed and unsigned extended integer of the
+same ``N`` value will have the same number of bits in its representation. Many
+architectures don't have a way of representing non power-of-2 integers, so these
+architectures emulate these types using larger integers. In these cases, they are
+expected to follow the 'as-if' rule and do math 'as-if' they were done at the
+specified number of bits.
+
+In order to be consistent with the C language specification, and make the extended
+integer types useful for their intended purpose, extended integers follow the C
+standard integer conversion ranks. An extended integer type has a greater rank than
+any integer type with less precision.  However, they have lower rank than any
+of the built in or other integer types (such as __int128). Usual arithmetic conversions
+also work the same, where the smaller ranked integer is converted to the larger.
+
+The one exception to the C rules for integers for these types is Integer Promotion.
+Unary +, -, and ~ operators typically will promote operands to ``int``. Doing these
+promotions would inflate the size of required hardware on some platforms, so extended
+integer types aren't subject to the integer promotion rules in these cases.
+
+In languages (such as OpenCL) that define shift by-out-of-range behavior as a mask,
+non-power-of-two versions of these types use an unsigned remainder operation to constrain
+the value to the proper range, preventing undefined behavior.
+
+Extended integer types are aligned to the next greatest power-of-2 up to 64 bits.
+The size of these types for the purposes of layout and ``sizeof`` are the number of
+bits aligned to this calculated alignment. This permits the use of these types in
+allocated arrays using common ``sizeof(Array)/sizeof(ElementType)`` pattern.
+
+Extended integer types work with the C _Atomic type modifier, however only precisions
+that are powers-of-2 greater than 8 bit are accepted.
+
+Extended integer types align with existing calling conventions. They have the same size
+and alignment as the smallest basic type that can contain them. Types that are larger
+than 64 bits are handled in the same way as _int128 is handled; they are conceptually
+treated as struct of register size chunks. They number of chunks are the smallest
+number that can contain the types which does not necessarily mean a power-of-2 size.
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index a8163cad9fde..54deba7bbd0e 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -62,6 +62,14 @@ Non-comprehensive list of changes in this release
   in the Arm C Language Extensions.
 
 
+* clang adds support for a set of  extended integer types (``_ExtInt(N)``) that
+  permit non-power of 2 integers, exposing the LLVM integer types. Since a major
+  motivating use case for these types is to limit 'bit' usage, these types don't
+  automatically promote to 'int' when operations are done between two ``ExtInt(N)``
+  types, instead math occurs at the size of the largest ``ExtInt(N)`` type.
+
+
+
 New Compiler Flags
 ------------------
 
diff --git a/clang/include/clang/AST/ASTContext.h b/clang/include/clang/AST/ASTContext.h
index ac742fefc109..dedbd857819d 100644
--- a/clang/include/clang/AST/ASTContext.h
+++ b/clang/include/clang/AST/ASTContext.h
@@ -224,6 +224,8 @@ class ASTContext : public RefCountedBase<ASTContext> {
   mutable llvm::FoldingSet<AtomicType> AtomicTypes;
   llvm::FoldingSet<AttributedType> AttributedTypes;
   mutable llvm::FoldingSet<PipeType> PipeTypes;
+  mutable llvm::FoldingSet<ExtIntType> ExtIntTypes;
+  mutable llvm::FoldingSet<DependentExtIntType> DependentExtIntTypes;
 
   mutable llvm::FoldingSet<QualifiedTemplateName> QualifiedTemplateNames;
   mutable llvm::FoldingSet<DependentTemplateName> DependentTemplateNames;
@@ -1203,6 +1205,14 @@ class ASTContext : public RefCountedBase<ASTContext> {
   /// Return a write_only pipe type for the specified type.
   QualType getWritePipeType(QualType T) const;
 
+  /// Return an extended integer type with the specified signedness and bit
+  /// count.
+  QualType getExtIntType(bool Unsigned, unsigned NumBits) const;
+
+  /// Return a dependent extended integer type with the specified signedness and
+  /// bit count.
+  QualType getDependentExtIntType(bool Unsigned, Expr *BitsExpr) const;
+
   /// Gets the struct used to keep track of the extended descriptor for
   /// pointer to blocks.
   QualType getBlockDescriptorExtendedType() const;
diff --git a/clang/include/clang/AST/RecursiveASTVisitor.h b/clang/include/clang/AST/RecursiveASTVisitor.h
index 85eb6259a419..b71f7994e2fa 100644
--- a/clang/include/clang/AST/RecursiveASTVisitor.h
+++ b/clang/include/clang/AST/RecursiveASTVisitor.h
@@ -1115,6 +1115,10 @@ DEF_TRAVERSE_TYPE(AtomicType, { TRY_TO(TraverseType(T->getValueType())); })
 
 DEF_TRAVERSE_TYPE(PipeType, { TRY_TO(TraverseType(T->getElementType())); })
 
+DEF_TRAVERSE_TYPE(ExtIntType, {})
+DEF_TRAVERSE_TYPE(DependentExtIntType,
+                  { TRY_TO(TraverseStmt(T->getNumBitsExpr())); })
+
 #undef DEF_TRAVERSE_TYPE
 
 // ----------------- TypeLoc traversal -----------------
@@ -1385,6 +1389,11 @@ DEF_TRAVERSE_TYPELOC(AtomicType, { TRY_TO(TraverseTypeLoc(TL.getValueLoc())); })
 
 DEF_TRAVERSE_TYPELOC(PipeType, { TRY_TO(TraverseTypeLoc(TL.getValueLoc())); })
 
+DEF_TRAVERSE_TYPELOC(ExtIntType, {})
+DEF_TRAVERSE_TYPELOC(DependentExtIntType, {
+  TRY_TO(TraverseStmt(TL.getTypePtr()->getNumBitsExpr()));
+})
+
 #undef DEF_TRAVERSE_TYPELOC
 
 // ----------------- Decl traversal -----------------
diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h
index f78d9d7670a7..322b14ce641a 100644
--- a/clang/include/clang/AST/Type.h
+++ b/clang/include/clang/AST/Type.h
@@ -2101,6 +2101,7 @@ class alignas(8) Type : public ExtQualsTypeCommonBase {
   bool isOCLExtOpaqueType() const;              // Any OpenCL extension type
 
   bool isPipeType() const;                      // OpenCL pipe type
+  bool isExtIntType() const;                    // Extended Int Type
   bool isOpenCLSpecificType() const;            // Any OpenCL specific type
 
   /// Determines if this type, which must satisfy
@@ -6127,6 +6128,64 @@ class PipeType : public Type, public llvm::FoldingSetNode {
   bool isReadOnly() const { return isRead; }
 };
 
+/// A fixed int type of a specified bitwidth.
+class ExtIntType final : public Type, public llvm::FoldingSetNode {
+  friend class ASTContext;
+  unsigned IsUnsigned : 1;
+  unsigned NumBits : 24;
+
+protected:
+  ExtIntType(bool isUnsigned, unsigned NumBits);
+
+public:
+  bool isUnsigned() const { return IsUnsigned; }
+  bool isSigned() const { return !IsUnsigned; }
+  unsigned getNumBits() const { return NumBits; }
+
+  bool isSugared() const { return false; }
+  QualType desugar() const { return QualType(this, 0); }
+
+  void Profile(llvm::FoldingSetNodeID &ID) {
+    Profile(ID, isUnsigned(), getNumBits());
+  }
+
+  static void Profile(llvm::FoldingSetNodeID &ID, bool IsUnsigned,
+                      unsigned NumBits) {
+    ID.AddBoolean(IsUnsigned);
+    ID.AddInteger(NumBits);
+  }
+
+  static bool classof(const Type *T) { return T->getTypeClass() == ExtInt; }
+};
+
+class DependentExtIntType final : public Type, public llvm::FoldingSetNode {
+  friend class ASTContext;
+  const ASTContext &Context;
+  llvm::PointerIntPair<Expr*, 1, bool> ExprAndUnsigned;
+
+protected:
+  DependentExtIntType(const ASTContext &Context, bool IsUnsigned,
+                      Expr *NumBits);
+
+public:
+  bool isUnsigned() const;
+  bool isSigned() const { return !isUnsigned(); }
+  Expr *getNumBitsExpr() const;
+
+  bool isSugared() const { return false; }
+  QualType desugar() const { return QualType(this, 0); }
+
+  void Profile(llvm::FoldingSetNodeID &ID) {
+    Profile(ID, Context, isUnsigned(), getNumBitsExpr());
+  }
+  static void Profile(llvm::FoldingSetNodeID &ID, const ASTContext &Context,
+                      bool IsUnsigned, Expr *NumBitsExpr);
+
+  static bool classof(const Type *T) {
+    return T->getTypeClass() == DependentExtInt;
+  }
+};
+
 /// A qualifier set is used to build a set of qualifiers.
 class QualifierCollector : public Qualifiers {
 public:
@@ -6646,6 +6705,10 @@ inline bool Type::isPipeType() const {
   return isa<PipeType>(CanonicalType);
 }
 
+inline bool Type::isExtIntType() const {
+  return isa<ExtIntType>(CanonicalType);
+}
+
 #define EXT_OPAQUE_TYPE(ExtType, Id, Ext) \
   inline bool Type::is##Id##Type() const { \
     return isSpecificBuiltinType(BuiltinType::Id); \
@@ -6741,7 +6804,7 @@ inline bool Type::isIntegerType() const {
     return IsEnumDeclComplete(ET->getDecl()) &&
       !IsEnumDeclScoped(ET->getDecl());
   }
-  return false;
+  return isExtIntType();
 }
 
 inline bool Type::isFixedPointType() const {
@@ -6798,7 +6861,8 @@ inline bool Type::isScalarType() const {
          isa<BlockPointerType>(CanonicalType) ||
          isa<MemberPointerType>(CanonicalType) ||
          isa<ComplexType>(CanonicalType) ||
-         isa<ObjCObjectPointerType>(CanonicalType);
+         isa<ObjCObjectPointerType>(CanonicalType) ||
+         isExtIntType();
 }
 
 inline bool Type::isIntegralOrEnumerationType() const {
@@ -6811,7 +6875,7 @@ inline bool Type::isIntegralOrEnumerationType() const {
   if (const auto *ET = dyn_cast<EnumType>(CanonicalType))
     return IsEnumDeclComplete(ET->getDecl());
 
-  return false;
+  return isExtIntType();
 }
 
 inline bool Type::isBooleanType() const {
diff --git a/clang/include/clang/AST/TypeLoc.h b/clang/include/clang/AST/TypeLoc.h
index 3fc53d823c37..2221485983b2 100644
--- a/clang/include/clang/AST/TypeLoc.h
+++ b/clang/include/clang/AST/TypeLoc.h
@@ -2450,6 +2450,12 @@ inline T TypeLoc::getAsAdjusted() const {
   }
   return Cur.getAs<T>();
 }
+class ExtIntTypeLoc final
+    : public InheritingConcreteTypeLoc<TypeSpecTypeLoc, ExtIntTypeLoc,
+                                        ExtIntType> {};
+class DependentExtIntTypeLoc final
+    : public InheritingConcreteTypeLoc<TypeSpecTypeLoc, DependentExtIntTypeLoc,
+                                        DependentExtIntType> {};
 
 } // namespace clang
 
diff --git a/clang/include/clang/AST/TypeProperties.td b/clang/include/clang/AST/TypeProperties.td
index 994f932170ae..12bc5a4ee8a3 100644
--- a/clang/include/clang/AST/TypeProperties.td
+++ b/clang/include/clang/AST/TypeProperties.td
@@ -833,3 +833,28 @@ let Class = PipeType in {
     return ctx.getPipeType(elementType, isReadOnly);
   }]>;
 }
+
+let Class = ExtIntType in {
+  def : Property<"isUnsigned", Bool> {
+    let Read = [{ node->isUnsigned() }];
+  }
+  def : Property <"numBits", UInt32> {
+    let Read = [{ node->getNumBits() }];
+  }
+
+  def : Creator<[{
+    return ctx.getExtIntType(isUnsigned, numBits);
+  }]>;
+}
+
+let Class = DependentExtIntType in {
+  def : Property<"isUnsigned", Bool> {
+    let Read = [{ node->isUnsigned() }];
+  }
+  def : Property <"numBitsExpr", ExprRef> {
+    let Read = [{ node->getNumBitsExpr() }];
+  }
+  def : Creator<[{
+    return ctx.getDependentExtIntType(isUnsigned, numBitsExpr);
+  }]>;
+}
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 35a7a05667fc..97ad1a6c7920 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -5947,10 +5947,12 @@ def err_block_return_missing_expr : Error<
   "non-void block should return a value">;
 def err_func_def_incomplete_result : Error<
   "incomplete result type %0 in function definition">;
-def err_atomic_specifier_bad_type : Error<
-  "_Atomic cannot be applied to "
-  "%select{incomplete |array |function |reference |atomic |qualified |sizeless |}0type "
-  "%1 %select{|||||||which is not trivially copyable}0">;
+def err_atomic_specifier_bad_type
+    : Error<"_Atomic cannot be applied to "
+            "%select{incomplete |array |function |reference |atomic |qualified "
+            "|sizeless ||integer |integer }0type "
+            "%1 %select{|||||||which is not trivially copyable|with less than "
+            "1 byte of precision|with a non power of 2 precision}0">;
 
 // Expressions.
 def select_unary_expr_or_type_trait_kind : TextSubstitution<
@@ -10711,4 +10713,8 @@ def warn_sycl_kernel_return_type : Warning<
   "function template with 'sycl_kernel' attribute must have a 'void' return type">,
   InGroup<IgnoredAttributes>;
 
+def err_ext_int_bad_size : Error<"%select{signed|unsigned}0 _ExtInt must "
+                                 "have a bit size of at least %select{2|1}0">;
+def err_ext_int_max_size : Error<"%select{signed|unsigned}0 _ExtInt of bit "
+                                 "sizes greater than %1 not supported">;
 } // end of sema component.
diff --git a/clang/include/clang/Basic/Specifiers.h b/clang/include/clang/Basic/Specifiers.h
index 73823dc01ec7..e6c2cb39566c 100644
--- a/clang/include/clang/Basic/Specifiers.h
+++ b/clang/include/clang/Basic/Specifiers.h
@@ -67,6 +67,7 @@ namespace clang {
     TST_char32,       // C++11 char32_t
     TST_int,
     TST_int128,
+    TST_extint,       // Extended Int types.
     TST_half,         // OpenCL half, ARM NEON __fp16
     TST_Float16,      // C11 extension ISO/IEC TS 18661-3
     TST_Accum,        // ISO/IEC JTC1 SC22 WG14 N1169 Extension
diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def
index 3b1062e48767..1da24a8fd38b 100644
--- a/clang/include/clang/Basic/TokenKinds.def
+++ b/clang/include/clang/Basic/TokenKinds.def
@@ -285,6 +285,7 @@ KEYWORD(goto                        , KEYALL)
 KEYWORD(if                          , KEYALL)
 KEYWORD(inline                      , KEYC99|KEYCXX|KEYGNU)
 KEYWORD(int                         , KEYALL)
+KEYWORD(_ExtInt                     , KEYALL)
 KEYWORD(long                        , KEYALL)
 KEYWORD(register                    , KEYALL)
 KEYWORD(restrict                    , KEYC99)
diff --git a/clang/include/clang/Basic/TypeNodes.td b/clang/include/clang/Basic/TypeNodes.td
index 96d9472a488a..cd15a498642f 100644
--- a/clang/include/clang/Basic/TypeNodes.td
+++ b/clang/include/clang/Basic/TypeNodes.td
@@ -104,3 +104,5 @@ def ObjCInterfaceType : TypeNode<ObjCObjectType>, LeafType;
 def ObjCObjectPointerType : TypeNode<Type>;
 def PipeType : TypeNode<Type>;
 def AtomicType : TypeNode<Type>;
+def ExtIntType : TypeNode<Type>;
+def DependentExtIntType : TypeNode<Type>, AlwaysDependent;
diff --git a/clang/include/clang/Parse/Parser.h b/clang/include/clang/Parse/Parser.h
index 3f73a1b90268..b4e96a5b85de 100644
--- a/clang/include/clang/Parse/Parser.h
+++ b/clang/include/clang/Parse/Parser.h
@@ -2721,6 +2721,7 @@ class Parser : public CodeCompletionHandler {
                                 SourceLocation &EllipsisLoc);
   void ParseAlignmentSpecifier(ParsedAttributes &Attrs,
                                SourceLocation *endLoc = nullptr);
+  ExprResult ParseExtIntegerArgument();
 
   VirtSpecifiers::Specifier isCXX11VirtSpecifier(const Token &Tok) const;
   VirtSpecifiers::Specifier isCXX11VirtSpecifier() const {
diff --git a/clang/include/clang/Sema/DeclSpec.h b/clang/include/clang/Sema/DeclSpec.h
index 0e95e237e974..5bc13fe343f4 100644
--- a/clang/include/clang/Sema/DeclSpec.h
+++ b/clang/include/clang/Sema/DeclSpec.h
@@ -278,6 +278,7 @@ class DeclSpec {
   static const TST TST_char32 = clang::TST_char32;
   static const TST TST_int = clang::TST_int;
   static const TST TST_int128 = clang::TST_int128;
+  static const TST TST_extint = clang::TST_extint;
   static const TST TST_half = clang::TST_half;
   static const TST TST_float = clang::TST_float;
   static const TST TST_double = clang::TST_double;
@@ -413,7 +414,7 @@ class DeclSpec {
             T == TST_underlyingType || T == TST_atomic);
   }
   static bool isExprRep(TST T) {
-    return (T == TST_typeofExpr || T == TST_decltype);
+    return (T == TST_typeofExpr || T == TST_decltype || T == TST_extint);
   }
   static bool isTemplateIdRep(TST T) {
     return (T == TST_auto || T == TST_decltype_auto);
@@ -704,6 +705,9 @@ class DeclSpec {
   bool SetTypePipe(bool isPipe, SourceLocation Loc,
                        const char *&PrevSpec, unsigned &DiagID,
                        const PrintingPolicy &Policy);
+  bool SetExtIntType(SourceLocation KWLoc, Expr *BitWidth,
+                     const char *&PrevSpec, unsigned &DiagID,
+                     const PrintingPolicy &Policy);
   bool SetTypeSpecSat(SourceLocation Loc, const char *&PrevSpec,
                       unsigned &DiagID);
   bool SetTypeSpecError();
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index a1a0b854a85b..af58b0ec4e82 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -1678,6 +1678,7 @@ class Sema final {
                          SourceLocation Loc);
   QualType BuildWritePipeType(QualType T,
                          SourceLocation Loc);
+  QualType BuildExtIntType(bool IsUnsigned, Expr *BitWidth, SourceLocation Loc);
 
   TypeSourceInfo *GetTypeForDeclarator(Declarator &D, Scope *S);
   TypeSourceInfo *GetTypeForDeclaratorCast(Declarator &D, QualType FromTy);
diff --git a/clang/include/clang/Serialization/TypeBitCodes.def b/clang/include/clang/Serialization/TypeBitCodes.def
index 38c73ccb7daf..561c8869ead6 100644
--- a/clang/include/clang/Serialization/TypeBitCodes.def
+++ b/clang/include/clang/Serialization/TypeBitCodes.def
@@ -58,5 +58,7 @@ TYPE_BIT_CODE(DependentSizedExtVector, DEPENDENT_SIZED_EXT_VECTOR, 46)
 TYPE_BIT_CODE(DependentAddressSpace, DEPENDENT_ADDRESS_SPACE, 47)
 TYPE_BIT_CODE(DependentVector, DEPENDENT_SIZED_VECTOR, 48)
 TYPE_BIT_CODE(MacroQualified, MACRO_QUALIFIED, 49)
+TYPE_BIT_CODE(ExtInt, EXT_INT, 50)
+TYPE_BIT_CODE(DependentExtInt, DEPENDENT_EXT_INT, 51)
 
 #undef TYPE_BIT_CODE
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 34bb07cd3f78..8734dd390247 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -2180,6 +2180,15 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const {
     Align = toBits(Layout.getAlignment());
     break;
   }
+  case Type::ExtInt: {
+    const auto *EIT = cast<ExtIntType>(T);
+    Align =
+        std::min(static_cast<unsigned>(std::max(
+                     getCharWidth(), llvm::PowerOf2Ceil(EIT->getNumBits()))),
+                 Target->getLongLongAlign());
+    Width = llvm::alignTo(EIT->getNumBits(), Align);
+    break;
+  }
   case Type::Record:
   case Type::Enum: {
     const auto *TT = cast<TagType>(T);
@@ -3376,6 +3385,8 @@ QualType ASTContext::getVariableArrayDecayedType(QualType type) const {
   case Type::Auto:
   case Type::DeducedTemplateSpecialization:
   case Type::PackExpansion:
+  case Type::ExtInt:
+  case Type::DependentExtInt:
     llvm_unreachable("type should never be variably-modified");
 
   // These types can be variably-modified but should never need to
@@ -4070,6 +4081,39 @@ QualType ASTContext::getWritePipeType(QualType T) const {
   return getPipeType(T, false);
 }
 
+QualType ASTContext::getExtIntType(bool IsUnsigned, unsigned NumBits) const {
+  llvm::FoldingSetNodeID ID;
+  ExtIntType::Profile(ID, IsUnsigned, NumBits);
+
+  void *InsertPos = nullptr;
+  if (ExtIntType *EIT = ExtIntTypes.FindNodeOrInsertPos(ID, InsertPos))
+    return QualType(EIT, 0);
+
+  auto *New = new (*this, TypeAlignment) ExtIntType(IsUnsigned, NumBits);
+  ExtIntTypes.InsertNode(New, InsertPos);
+  Types.push_back(New);
+  return QualType(New, 0);
+}
+
+QualType ASTContext::getDependentExtIntType(bool IsUnsigned,
+                                            Expr *NumBitsExpr) const {
+  assert(NumBitsExpr->isInstantiationDependent() && "Only good for dependent");
+  llvm::FoldingSetNodeID ID;
+  DependentExtIntType::Profile(ID, *this, IsUnsigned, NumBitsExpr);
+
+  void *InsertPos = nullptr;
+  if (DependentExtIntType *Existing =
+          DependentExtIntTypes.FindNodeOrInsertPos(ID, InsertPos))
+    return QualType(Existing, 0);
+
+  auto *New = new (*this, TypeAlignment)
+      DependentExtIntType(*this, IsUnsigned, NumBitsExpr);
+  DependentExtIntTypes.InsertNode(New, InsertPos);
+
+  Types.push_back(New);
+  return QualType(New, 0);
+}
+
 #ifndef NDEBUG
 static bool NeedsInjectedClassNameType(const RecordDecl *D) {
   if (!isa<CXXRecordDecl>(D)) return false;
@@ -5905,6 +5949,11 @@ int ASTContext::getFloatingTypeSemanticOrder(QualType LHS, QualType RHS) const {
 unsigned ASTContext::getIntegerRank(const Type *T) const {
   assert(T->isCanonicalUnqualified() && "T should be canonicalized");
 
+  // Results in this 'losing' to any type of the same size, but winning if
+  // larger.
+  if (const auto *EIT = dyn_cast<ExtIntType>(T))
+    return 0 + (EIT->getNumBits() << 3);
+
   switch (cast<BuiltinType>(T)->getKind()) {
   default: llvm_unreachable("getIntegerRank(): not a built-in integer");
   case BuiltinType::Bool:
@@ -7288,6 +7337,7 @@ void ASTContext::getObjCEncodingForTypeImpl(QualType T, std::string &S,
     return;
 
   case Type::Pipe:
+  case Type::ExtInt:
 #define ABSTRACT_TYPE(KIND, BASE)
 #define TYPE(KIND, BASE)
 #define DEPENDENT_TYPE(KIND, BASE) \
@@ -9381,6 +9431,21 @@ QualType ASTContext::mergeTypes(QualType LHS, QualType RHS,
     assert(LHS != RHS &&
            "Equivalent pipe types should have already been handled!");
     return {};
+  case Type::ExtInt: {
+    // Merge two ext-int types, while trying to preserve typedef info.
+    bool LHSUnsigned  = LHS->castAs<ExtIntType>()->isUnsigned();
+    bool RHSUnsigned = RHS->castAs<ExtIntType>()->isUnsigned();
+    unsigned LHSBits = LHS->castAs<ExtIntType>()->getNumBits();
+    unsigned RHSBits = RHS->castAs<ExtIntType>()->getNumBits();
+
+    // Like unsigned/int, shouldn't have a type if they dont match.
+    if (LHSUnsigned != RHSUnsigned)
+      return {};
+
+    if (LHSBits != RHSBits)
+      return {};
+    return LHS;
+  }
   }
 
   llvm_unreachable("Invalid Type::Class!");
@@ -9521,6 +9586,8 @@ unsigned ASTContext::getIntWidth(QualType T) const {
     T = ET->getDecl()->getIntegerType();
   if (T->isBooleanType())
     return 1;
+  if(const auto *EIT = T->getAs<ExtIntType>())
+    return EIT->getNumBits();
   // For builtin types, just use the standard type sizing method
   return (unsigned)getTypeSize(T);
 }
diff --git a/clang/lib/AST/ASTStructuralEquivalence.cpp b/clang/lib/AST/ASTStructuralEquivalence.cpp
index c29b7b2f5907..c562830c41e1 100644
--- a/clang/lib/AST/ASTStructuralEquivalence.cpp
+++ b/clang/lib/AST/ASTStructuralEquivalence.cpp
@@ -949,6 +949,24 @@ static bool IsStructurallyEquivalent(StructuralEquivalenceContext &Context,
                                   cast<PipeType>(T2)->getElementType()))
       return false;
     break;
+  case Type::ExtInt: {
+    const auto *Int1 = cast<ExtIntType>(T1);
+    const auto *Int2 = cast<ExtIntType>(T2);
+
+    if (Int1->isUnsigned() != Int2->isUnsigned() ||
+        Int1->getNumBits() != Int2->getNumBits())
+      return false;
+    break;
+  }
+  case Type::DependentExtInt: {
+    const auto *Int1 = cast<DependentExtIntType>(T1);
+    const auto *Int2 = cast<DependentExtIntType>(T2);
+
+    if (Int1->isUnsigned() != Int2->isUnsigned() ||
+        !IsStructurallyEquivalent(Context, Int1->getNumBitsExpr(),
+                                  Int2->getNumBitsExpr()))
+      return false;
+  }
   } // end switch
 
   return true;
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 5b3866d0a471..8bc7a1128e7a 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -10354,6 +10354,7 @@ EvaluateBuiltinClassifyType(QualType T, const LangOptions &LangOpts) {
   case Type::ObjCInterface:
   case Type::ObjCObjectPointer:
   case Type::Pipe:
+  case Type::ExtInt:
     // GCC classifies vectors as None. We follow its lead and classify all
     // other types that don't fit into the regular classification the same way.
     return GCCTypeClass::None;
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index 535bb86f0d5b..d60cacf07534 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -2093,6 +2093,8 @@ bool CXXNameMangler::mangleUnresolvedTypeOrSimpleId(QualType Ty,
   case Type::Atomic:
   case Type::Pipe:
   case Type::MacroQualified:
+  case Type::ExtInt:
+  case Type::DependentExtInt:
     llvm_unreachable("type is illegal as a nested name specifier");
 
   case Type::SubstTemplateTypeParmPack:
@@ -3551,6 +3553,28 @@ void CXXNameMangler::mangleType(const PipeType *T) {
   Out << "8ocl_pipe";
 }
 
+void CXXNameMangler::mangleType(const ExtIntType *T) {
+  Out << "U7_ExtInt";
+  llvm::APSInt BW(32, true);
+  BW = T->getNumBits();
+  TemplateArgument TA(Context.getASTContext(), BW, getASTContext().IntTy);
+  mangleTemplateArgs(&TA, 1);
+  if (T->isUnsigned())
+    Out << "j";
+  else
+    Out << "i";
+}
+
+void CXXNameMangler::mangleType(const DependentExtIntType *T) {
+  Out << "U7_ExtInt";
+  TemplateArgument TA(T->getNumBitsExpr());
+  mangleTemplateArgs(&TA, 1);
+  if (T->isUnsigned())
+    Out << "j";
+  else
+    Out << "i";
+}
+
 void CXXNameMangler::mangleIntegerLiteral(QualType T,
                                           const llvm::APSInt &Value) {
   //  <expr-primary> ::= L <type> <value number> E # integer literal
diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp
index af51ae07bc57..dc5c15fbef68 100644
--- a/clang/lib/AST/MicrosoftMangle.cpp
+++ b/clang/lib/AST/MicrosoftMangle.cpp
@@ -2953,6 +2953,30 @@ void MicrosoftMangleContextImpl::mangleCXXName(GlobalDecl GD,
   return Mangler.mangle(D);
 }
 
+void MicrosoftCXXNameMangler::mangleType(const ExtIntType *T, Qualifiers,
+                                         SourceRange Range) {
+  llvm::SmallString<64> TemplateMangling;
+  llvm::raw_svector_ostream Stream(TemplateMangling);
+  MicrosoftCXXNameMangler Extra(Context, Stream);
+  Stream << "?$";
+  if (T->isUnsigned())
+    Extra.mangleSourceName("_UExtInt");
+  else
+    Extra.mangleSourceName("_ExtInt");
+  Extra.mangleIntegerLiteral(llvm::APSInt::getUnsigned(T->getNumBits()),
+                             /*IsBoolean=*/false);
+
+  mangleArtificialTagType(TTK_Struct, TemplateMangling, {"__clang"});
+}
+
+void MicrosoftCXXNameMangler::mangleType(const DependentExtIntType *T,
+                                         Qualifiers, SourceRange Range) {
+  DiagnosticsEngine &Diags = Context.getDiags();
+  unsigned DiagID = Diags.getCustomDiagID(
+      DiagnosticsEngine::Error, "cannot mangle this DependentExtInt type yet");
+  Diags.Report(Range.getBegin(), DiagID) << Range;
+}
+
 // <this-adjustment> ::= <no-adjustment> | <static-adjustment> |
 //                       <virtual-adjustment>
 // <no-adjustment>      ::= A # private near
diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp
index 3428437c3146..982aa8962f03 100644
--- a/clang/lib/AST/Type.cpp
+++ b/clang/lib/AST/Type.cpp
@@ -293,6 +293,39 @@ VectorType::VectorType(TypeClass tc, QualType vecType, unsigned nElements,
   VectorTypeBits.NumElements = nElements;
 }
 
+ExtIntType::ExtIntType(bool IsUnsigned, unsigned NumBits)
+    : Type(ExtInt, QualType{}, TypeDependence::None), IsUnsigned(IsUnsigned),
+      NumBits(NumBits) {}
+
+DependentExtIntType::DependentExtIntType(const ASTContext &Context,
+                                         bool IsUnsigned, Expr *NumBitsExpr)
+    : Type(DependentExtInt, QualType{},
+           ((NumBitsExpr->isValueDependent() || NumBitsExpr->isTypeDependent())
+                ? TypeDependence::Dependent
+                : TypeDependence::None) |
+               (NumBitsExpr->isInstantiationDependent()
+                    ? TypeDependence::Instantiation
+                    : TypeDependence::None) |
+               (NumBitsExpr->containsUnexpandedParameterPack()
+                    ? TypeDependence::VariablyModified
+                    : TypeDependence::None)),
+      Context(Context), ExprAndUnsigned(NumBitsExpr, IsUnsigned) {}
+
+bool DependentExtIntType::isUnsigned() const {
+  return ExprAndUnsigned.getInt();
+}
+
+clang::Expr *DependentExtIntType::getNumBitsExpr() const {
+  return ExprAndUnsigned.getPointer();
+}
+
+void DependentExtIntType::Profile(llvm::FoldingSetNodeID &ID,
+                                  const ASTContext &Context, bool IsUnsigned,
+                                  Expr *NumBitsExpr) {
+  ID.AddBoolean(IsUnsigned);
+  NumBitsExpr->Profile(ID, Context, true);
+}
+
 /// getArrayElementTypeNoTypeQual - If this is an array type, return the
 /// element type of the array, potentially with type qualifiers missing.
 /// This method should never be used when type qualifiers are meaningful.
@@ -1836,13 +1869,17 @@ bool Type::isIntegralType(const ASTContext &Ctx) const {
     if (const auto *ET = dyn_cast<EnumType>(CanonicalType))
       return ET->getDecl()->isComplete();
 
-  return false;
+  return isExtIntType();
 }
 
 bool Type::isIntegralOrUnscopedEnumerationType() const {
   if (const auto *BT = dyn_cast<BuiltinType>(CanonicalType))
     return BT->getKind() >= BuiltinType::Bool &&
            BT->getKind() <= BuiltinType::Int128;
+
+  if (isExtIntType())
+    return true;
+
   return isUnscopedEnumerationType();
 }
 
@@ -1923,6 +1960,9 @@ bool Type::isSignedIntegerType() const {
       return ET->getDecl()->getIntegerType()->isSignedIntegerType();
   }
 
+  if (const ExtIntType *IT = dyn_cast<ExtIntType>(CanonicalType))
+    return IT->isSigned();
+
   return false;
 }
 
@@ -1937,6 +1977,10 @@ bool Type::isSignedIntegerOrEnumerationType() const {
       return ET->getDecl()->getIntegerType()->isSignedIntegerType();
   }
 
+  if (const ExtIntType *IT = dyn_cast<ExtIntType>(CanonicalType))
+    return IT->isSigned();
+
+
   return false;
 }
 
@@ -1963,6 +2007,9 @@ bool Type::isUnsignedIntegerType() const {
       return ET->getDecl()->getIntegerType()->isUnsignedIntegerType();
   }
 
+  if (const ExtIntType *IT = dyn_cast<ExtIntType>(CanonicalType))
+    return IT->isUnsigned();
+
   return false;
 }
 
@@ -1977,6 +2024,9 @@ bool Type::isUnsignedIntegerOrEnumerationType() const {
       return ET->getDecl()->getIntegerType()->isUnsignedIntegerType();
   }
 
+  if (const ExtIntType *IT = dyn_cast<ExtIntType>(CanonicalType))
+    return IT->isUnsigned();
+
   return false;
 }
 
@@ -2015,7 +2065,7 @@ bool Type::isRealType() const {
            BT->getKind() <= BuiltinType::Float128;
   if (const auto *ET = dyn_cast<EnumType>(CanonicalType))
       return ET->getDecl()->isComplete() && !ET->getDecl()->isScoped();
-  return false;
+  return isExtIntType();
 }
 
 bool Type::isArithmeticType() const {
@@ -2030,7 +2080,7 @@ bool Type::isArithmeticType() const {
     // false for scoped enumerations since that will disable any
     // unwanted implicit conversions.
     return !ET->getDecl()->isScoped() && ET->getDecl()->isComplete();
-  return isa<ComplexType>(CanonicalType);
+  return isa<ComplexType>(CanonicalType) || isExtIntType();
 }
 
 Type::ScalarTypeKind Type::getScalarTypeKind() const {
@@ -2059,6 +2109,8 @@ Type::ScalarTypeKind Type::getScalarTypeKind() const {
     if (CT->getElementType()->isRealFloatingType())
       return STK_FloatingComplex;
     return STK_IntegralComplex;
+  } else if (isExtIntType()) {
+    return STK_Integral;
   }
 
   llvm_unreachable("unknown scalar type");
@@ -2224,6 +2276,7 @@ bool QualType::isCXX98PODType(const ASTContext &Context) const {
   case Type::MemberPointer:
   case Type::Vector:
   case Type::ExtVector:
+  case Type::ExtInt:
     return true;
 
   case Type::Enum:
@@ -3643,6 +3696,7 @@ static CachedProperties computeCachedProperties(const Type *T) {
     // here in error recovery.
     return CachedProperties(ExternalLinkage, false);
 
+  case Type::ExtInt:
   case Type::Builtin:
     // C++ [basic.link]p8:
     //   A type is said to have linkage if and only if:
@@ -3740,6 +3794,7 @@ LinkageInfo LinkageComputer::computeTypeLinkageInfo(const Type *T) {
     assert(T->isInstantiationDependentType());
     return LinkageInfo::external();
 
+  case Type::ExtInt:
   case Type::Builtin:
     return LinkageInfo::external();
 
@@ -3948,6 +4003,8 @@ bool Type::canHaveNullability(bool ResultIfUnknown) const {
   case Type::ObjCInterface:
   case Type::Atomic:
   case Type::Pipe:
+  case Type::ExtInt:
+  case Type::DependentExtInt:
     return false;
   }
   llvm_unreachable("bad type kind!");
diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp
index 4cc0d735ed6a..f000e1f6c932 100644
--- a/clang/lib/AST/TypePrinter.cpp
+++ b/clang/lib/AST/TypePrinter.cpp
@@ -227,6 +227,8 @@ bool TypePrinter::canPrefixQualifiers(const Type *T,
     case Type::ObjCInterface:
     case Type::Atomic:
     case Type::Pipe:
+    case Type::ExtInt:
+    case Type::DependentExtInt:
       CanPrefixQualifiers = true;
       break;
 
@@ -1114,6 +1116,28 @@ void TypePrinter::printPipeBefore(const PipeType *T, raw_ostream &OS) {
 
 void TypePrinter::printPipeAfter(const PipeType *T, raw_ostream &OS) {}
 
+void TypePrinter::printExtIntBefore(const ExtIntType *T, raw_ostream &OS) {
+  if (T->isUnsigned())
+    OS << "unsigned ";
+  OS << "_ExtInt(" << T->getNumBits() << ")";
+  spaceBeforePlaceHolder(OS);
+}
+
+void TypePrinter::printExtIntAfter(const ExtIntType *T, raw_ostream &OS) {}
+
+void TypePrinter::printDependentExtIntBefore(const DependentExtIntType *T,
+                                             raw_ostream &OS) {
+  if (T->isUnsigned())
+    OS << "unsigned ";
+  OS << "_ExtInt(";
+  T->getNumBitsExpr()->printPretty(OS, nullptr, Policy);
+  OS << ")";
+  spaceBeforePlaceHolder(OS);
+}
+
+void TypePrinter::printDependentExtIntAfter(const DependentExtIntType *T,
+                                            raw_ostream &OS) {}
+
 /// Appends the given scope to the end of a string.
 void TypePrinter::AppendScope(DeclContext *DC, raw_ostream &OS) {
   if (DC->isTranslationUnit()) return;
diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp
index 4ea3fbca2144..e6422a7ff1c3 100644
--- a/clang/lib/CodeGen/CGDebugInfo.cpp
+++ b/clang/lib/CodeGen/CGDebugInfo.cpp
@@ -826,6 +826,17 @@ llvm::DIType *CGDebugInfo::CreateType(const AutoType *Ty) {
   return DBuilder.createUnspecifiedType("auto");
 }
 
+llvm::DIType *CGDebugInfo::CreateType(const ExtIntType *Ty) {
+
+  StringRef Name = Ty->isUnsigned() ? "unsigned _ExtInt" : "_ExtInt";
+  llvm::dwarf::TypeKind Encoding = Ty->isUnsigned()
+                                       ? llvm::dwarf::DW_ATE_unsigned
+                                       : llvm::dwarf::DW_ATE_signed;
+
+  return DBuilder.createBasicType(Name, CGM.getContext().getTypeSize(Ty),
+                                  Encoding);
+}
+
 llvm::DIType *CGDebugInfo::CreateType(const ComplexType *Ty) {
   // Bit size and offset of the type.
   llvm::dwarf::TypeKind Encoding = llvm::dwarf::DW_ATE_complex_float;
@@ -3159,6 +3170,8 @@ llvm::DIType *CGDebugInfo::CreateTypeNode(QualType Ty, llvm::DIFile *Unit) {
   case Type::Atomic:
     return CreateType(cast<AtomicType>(Ty), Unit);
 
+  case Type::ExtInt:
+    return CreateType(cast<ExtIntType>(Ty));
   case Type::Pipe:
     return CreateType(cast<PipeType>(Ty), Unit);
 
diff --git a/clang/lib/CodeGen/CGDebugInfo.h b/clang/lib/CodeGen/CGDebugInfo.h
index 4915e19753c6..34164fbec90e 100644
--- a/clang/lib/CodeGen/CGDebugInfo.h
+++ b/clang/lib/CodeGen/CGDebugInfo.h
@@ -168,6 +168,7 @@ class CGDebugInfo {
   llvm::DIType *CreateType(const BuiltinType *Ty);
   llvm::DIType *CreateType(const ComplexType *Ty);
   llvm::DIType *CreateType(const AutoType *Ty);
+  llvm::DIType *CreateType(const ExtIntType *Ty);
   llvm::DIType *CreateQualifiedType(QualType Ty, llvm::DIFile *Fg);
   llvm::DIType *CreateType(const TypedefType *Ty, llvm::DIFile *Fg);
   llvm::DIType *CreateType(const TemplateSpecializationType *Ty,
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index ce0904234333..62a0f6c0efe6 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -760,6 +760,11 @@ class ScalarExprEmitter
                                                   llvm::Value *Zero,bool isDiv);
   // Common helper for getting how wide LHS of shift is.
   static Value *GetWidthMinusOneValue(Value* LHS,Value* RHS);
+
+  // Used for shifting constraints for OpenCL, do mask for powers of 2, URem for
+  // non powers of two.
+  Value *ConstrainShiftValue(Value *LHS, Value *RHS, const Twine &Name);
+
   Value *EmitDiv(const BinOpInfo &Ops);
   Value *EmitRem(const BinOpInfo &Ops);
   Value *EmitAdd(const BinOpInfo &Ops);
@@ -3762,6 +3767,21 @@ Value *ScalarExprEmitter::GetWidthMinusOneValue(Value* LHS,Value* RHS) {
   return llvm::ConstantInt::get(RHS->getType(), Ty->getBitWidth() - 1);
 }
 
+Value *ScalarExprEmitter::ConstrainShiftValue(Value *LHS, Value *RHS,
+                                              const Twine &Name) {
+  llvm::IntegerType *Ty;
+  if (auto *VT = dyn_cast<llvm::VectorType>(LHS->getType()))
+    Ty = cast<llvm::IntegerType>(VT->getElementType());
+  else
+    Ty = cast<llvm::IntegerType>(LHS->getType());
+
+  if (llvm::isPowerOf2_64(Ty->getBitWidth()))
+        return Builder.CreateAnd(RHS, GetWidthMinusOneValue(LHS, RHS), Name);
+
+  return Builder.CreateURem(
+      RHS, llvm::ConstantInt::get(RHS->getType(), Ty->getBitWidth()), Name);
+}
+
 Value *ScalarExprEmitter::EmitShl(const BinOpInfo &Ops) {
   // LLVM requires the LHS and RHS to be the same type: promote or truncate the
   // RHS to the same size as the LHS.
@@ -3776,8 +3796,7 @@ Value *ScalarExprEmitter::EmitShl(const BinOpInfo &Ops) {
   bool SanitizeExponent = CGF.SanOpts.has(SanitizerKind::ShiftExponent);
   // OpenCL 6.3j: shift values are effectively % word size of LHS.
   if (CGF.getLangOpts().OpenCL)
-    RHS =
-        Builder.CreateAnd(RHS, GetWidthMinusOneValue(Ops.LHS, RHS), "shl.mask");
+    RHS = ConstrainShiftValue(Ops.LHS, RHS, "shl.mask");
   else if ((SanitizeBase || SanitizeExponent) &&
            isa<llvm::IntegerType>(Ops.LHS->getType())) {
     CodeGenFunction::SanitizerScope SanScope(&CGF);
@@ -3839,8 +3858,7 @@ Value *ScalarExprEmitter::EmitShr(const BinOpInfo &Ops) {
 
   // OpenCL 6.3j: shift values are effectively % word size of LHS.
   if (CGF.getLangOpts().OpenCL)
-    RHS =
-        Builder.CreateAnd(RHS, GetWidthMinusOneValue(Ops.LHS, RHS), "shr.mask");
+    RHS = ConstrainShiftValue(Ops.LHS, RHS, "shr.mask");
   else if (CGF.SanOpts.has(SanitizerKind::ShiftExponent) &&
            isa<llvm::IntegerType>(Ops.LHS->getType())) {
     CodeGenFunction::SanitizerScope SanScope(&CGF);
diff --git a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
index 4de64a32f2ac..75af05623b03 100644
--- a/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
+++ b/clang/lib/CodeGen/CGRecordLayoutBuilder.cpp
@@ -385,7 +385,8 @@ CGRecordLowering::accumulateBitFields(RecordDecl::field_iterator Field,
         Run = FieldEnd;
         continue;
       }
-      llvm::Type *Type = Types.ConvertTypeForMem(Field->getType());
+      llvm::Type *Type =
+          Types.ConvertTypeForMem(Field->getType(), /*ForBitFields=*/true);
       // If we don't have a run yet, or don't live within the previous run's
       // allocated storage then we allocate some storage and start a new run.
       if (Run == FieldEnd || BitOffset >= Tail) {
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index 05bf70e5cb22..9929c154e37b 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -257,6 +257,7 @@ TypeEvaluationKind CodeGenFunction::getEvaluationKind(QualType type) {
     case Type::Enum:
     case Type::ObjCObjectPointer:
     case Type::Pipe:
+    case Type::ExtInt:
       return TEK_Scalar;
 
     // Complexes.
@@ -2010,6 +2011,7 @@ void CodeGenFunction::EmitVariablyModifiedType(QualType type) {
     case Type::ObjCObject:
     case Type::ObjCInterface:
     case Type::ObjCObjectPointer:
+    case Type::ExtInt:
       llvm_unreachable("type class is never variably-modified!");
 
     case Type::Adjusted:
diff --git a/clang/lib/CodeGen/CodeGenTBAA.cpp b/clang/lib/CodeGen/CodeGenTBAA.cpp
index 8cc8c162dfbe..f4ebe6885675 100644
--- a/clang/lib/CodeGen/CodeGenTBAA.cpp
+++ b/clang/lib/CodeGen/CodeGenTBAA.cpp
@@ -209,6 +209,15 @@ llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) {
     return createScalarTypeNode(OutName, getChar(), Size);
   }
 
+  if (const auto *EIT = dyn_cast<ExtIntType>(Ty)) {
+    SmallString<256> OutName;
+    llvm::raw_svector_ostream Out(OutName);
+    // Don't specify signed/unsigned since integer types can alias despite sign
+    // differences.
+    Out << "_ExtInt(" << EIT->getNumBits() << ')';
+    return createScalarTypeNode(OutName, getChar(), Size);
+  }
+
   // For now, handle any other kind of type conservatively.
   return getChar();
 }
diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp
index 29adc2c7adb3..d6d84a3ff051 100644
--- a/clang/lib/CodeGen/CodeGenTypes.cpp
+++ b/clang/lib/CodeGen/CodeGenTypes.cpp
@@ -83,19 +83,19 @@ void CodeGenTypes::addRecordTypeName(const RecordDecl *RD,
 /// ConvertType in that it is used to convert to the memory representation for
 /// a type.  For example, the scalar representation for _Bool is i1, but the
 /// memory representation is usually i8 or i32, depending on the target.
-llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T) {
+llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T, bool ForBitField) {
   llvm::Type *R = ConvertType(T);
 
-  // If this is a non-bool type, don't map it.
-  if (!R->isIntegerTy(1))
-    return R;
+  // If this is a bool type, or an ExtIntType in a bitfield representation,
+  // map this integer to the target-specified size.
+  if ((ForBitField && T->isExtIntType()) || R->isIntegerTy(1))
+    return llvm::IntegerType::get(getLLVMContext(),
+                                  (unsigned)Context.getTypeSize(T));
 
-  // Otherwise, return an integer of the target-specified size.
-  return llvm::IntegerType::get(getLLVMContext(),
-                                (unsigned)Context.getTypeSize(T));
+  // Else, don't map it.
+  return R;
 }
 
-
 /// isRecordLayoutComplete - Return true if the specified type is already
 /// completely laid out.
 bool CodeGenTypes::isRecordLayoutComplete(const Type *Ty) const {
@@ -731,6 +731,11 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
     ResultType = CGM.getOpenCLRuntime().getPipeType(cast<PipeType>(Ty));
     break;
   }
+  case Type::ExtInt: {
+    const auto &EIT = cast<ExtIntType>(Ty);
+    ResultType = llvm::Type::getIntNTy(getLLVMContext(), EIT->getNumBits());
+    break;
+  }
   }
 
   assert(ResultType && "Didn't convert a type?");
diff --git a/clang/lib/CodeGen/CodeGenTypes.h b/clang/lib/CodeGen/CodeGenTypes.h
index 03102329507e..394e2fdf8d65 100644
--- a/clang/lib/CodeGen/CodeGenTypes.h
+++ b/clang/lib/CodeGen/CodeGenTypes.h
@@ -134,7 +134,7 @@ class CodeGenTypes {
   /// ConvertType in that it is used to convert to the memory representation for
   /// a type.  For example, the scalar representation for _Bool is i1, but the
   /// memory representation is usually i8 or i32, depending on the target.
-  llvm::Type *ConvertTypeForMem(QualType T);
+  llvm::Type *ConvertTypeForMem(QualType T, bool ForBitField = false);
 
   /// GetFunctionType - Get the LLVM function type for \arg Info.
   llvm::FunctionType *GetFunctionType(const CGFunctionInfo &Info);
diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp
index c8a73c2757ab..4a591cf7aac5 100644
--- a/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -3219,6 +3219,7 @@ void ItaniumRTTIBuilder::BuildVTablePointer(const Type *Ty) {
     llvm_unreachable("Pipe types shouldn't get here");
 
   case Type::Builtin:
+  case Type::ExtInt:
   // GCC treats vector and complex types as fundamental types.
   case Type::Vector:
   case Type::ExtVector:
@@ -3472,7 +3473,10 @@ llvm::Constant *ItaniumRTTIBuilder::BuildTypeInfo(
     llvm_unreachable("Undeduced type shouldn't get here");
 
   case Type::Pipe:
-    llvm_unreachable("Pipe type shouldn't get here");
+    break;
+
+  case Type::ExtInt:
+    break;
 
   case Type::ConstantArray:
   case Type::IncompleteArray:
diff --git a/clang/lib/Parse/ParseDecl.cpp b/clang/lib/Parse/ParseDecl.cpp
index 8bd7571f1242..fe00199c1f8f 100644
--- a/clang/lib/Parse/ParseDecl.cpp
+++ b/clang/lib/Parse/ParseDecl.cpp
@@ -2880,6 +2880,25 @@ void Parser::ParseAlignmentSpecifier(ParsedAttributes &Attrs,
                ParsedAttr::AS_Keyword, EllipsisLoc);
 }
 
+ExprResult Parser::ParseExtIntegerArgument() {
+  assert(Tok.is(tok::kw__ExtInt) && "Not an extended int type");
+  ConsumeToken();
+
+  BalancedDelimiterTracker T(*this, tok::l_paren);
+  if (T.expectAndConsume())
+    return ExprError();
+
+  ExprResult ER = ParseConstantExpression();
+  if (ER.isInvalid()) {
+    T.skipToEnd();
+    return ExprError();
+  }
+
+  if(T.consumeClose())
+    return ExprError();
+  return ER;
+}
+
 /// Determine whether we're looking at something that might be a declarator
 /// in a simple-declaration. If it can't possibly be a declarator, maybe
 /// diagnose a missing semicolon after a prior tag definition in the decl
@@ -3807,6 +3826,14 @@ void Parser::ParseDeclarationSpecifiers(DeclSpec &DS,
       isInvalid = DS.SetTypeSpecType(DeclSpec::TST_int, Loc, PrevSpec,
                                      DiagID, Policy);
       break;
+    case tok::kw__ExtInt: {
+      ExprResult ER = ParseExtIntegerArgument();
+      if (ER.isInvalid())
+        continue;
+      isInvalid = DS.SetExtIntType(Loc, ER.get(), PrevSpec, DiagID, Policy);
+      ConsumedEnd = PrevTokLocation;
+      break;
+    }
     case tok::kw___int128:
       isInvalid = DS.SetTypeSpecType(DeclSpec::TST_int128, Loc, PrevSpec,
                                      DiagID, Policy);
@@ -4890,6 +4917,7 @@ bool Parser::isKnownToBeTypeSpecifier(const Token &Tok) const {
   case tok::kw_char16_t:
   case tok::kw_char32_t:
   case tok::kw_int:
+  case tok::kw__ExtInt:
   case tok::kw_half:
   case tok::kw_float:
   case tok::kw_double:
@@ -4969,6 +4997,7 @@ bool Parser::isTypeSpecifierQualifier() {
   case tok::kw_char16_t:
   case tok::kw_char32_t:
   case tok::kw_int:
+  case tok::kw__ExtInt:
   case tok::kw_half:
   case tok::kw_float:
   case tok::kw_double:
@@ -5135,6 +5164,7 @@ bool Parser::isDeclarationSpecifier(bool DisambiguatingWithExpression) {
   case tok::kw_char32_t:
 
   case tok::kw_int:
+  case tok::kw__ExtInt:
   case tok::kw_half:
   case tok::kw_float:
   case tok::kw_double:
diff --git a/clang/lib/Parse/ParseExpr.cpp b/clang/lib/Parse/ParseExpr.cpp
index 0c6939b04319..29e583fcb84e 100644
--- a/clang/lib/Parse/ParseExpr.cpp
+++ b/clang/lib/Parse/ParseExpr.cpp
@@ -1492,6 +1492,7 @@ ExprResult Parser::ParseCastExpression(CastParseKind ParseKind,
   case tok::kw_long:
   case tok::kw___int64:
   case tok::kw___int128:
+  case tok::kw__ExtInt:
   case tok::kw_signed:
   case tok::kw_unsigned:
   case tok::kw_half:
diff --git a/clang/lib/Parse/ParseExprCXX.cpp b/clang/lib/Parse/ParseExprCXX.cpp
index c5e895d090a5..32e9370b0e00 100644
--- a/clang/lib/Parse/ParseExprCXX.cpp
+++ b/clang/lib/Parse/ParseExprCXX.cpp
@@ -2156,6 +2156,19 @@ void Parser::ParseCXXSimpleTypeSpecifier(DeclSpec &DS) {
     return;
   }
 
+  case tok::kw__ExtInt: {
+    ExprResult ER = ParseExtIntegerArgument();
+    if (ER.isInvalid())
+      DS.SetTypeSpecError();
+    else
+      DS.SetExtIntType(Loc, ER.get(), PrevSpec, DiagID, Policy);
+
+    // Do this here because we have already consumed the close paren.
+    DS.SetRangeEnd(PrevTokLocation);
+    DS.Finish(Actions, Policy);
+    return;
+  }
+
   // builtin types
   case tok::kw_short:
     DS.SetTypeSpecWidth(DeclSpec::TSW_short, Loc, PrevSpec, DiagID, Policy);
diff --git a/clang/lib/Parse/ParseTentative.cpp b/clang/lib/Parse/ParseTentative.cpp
index 529e3f321054..61a82664bf71 100644
--- a/clang/lib/Parse/ParseTentative.cpp
+++ b/clang/lib/Parse/ParseTentative.cpp
@@ -1141,6 +1141,7 @@ Parser::isExpressionOrTypeSpecifierSimple(tok::TokenKind Kind) {
   case tok::kw_half:
   case tok::kw_float:
   case tok::kw_int:
+  case tok::kw__ExtInt:
   case tok::kw_long:
   case tok::kw___int64:
   case tok::kw___int128:
@@ -1778,6 +1779,24 @@ Parser::isCXXDeclarationSpecifier(Parser::TPResult BracedCastResult,
   case tok::kw__Atomic:
     return TPResult::True;
 
+  case tok::kw__ExtInt: {
+    if (NextToken().isNot(tok::l_paren))
+      return TPResult::Error;
+    RevertingTentativeParsingAction PA(*this);
+    ConsumeToken();
+    ConsumeParen();
+
+    if (!SkipUntil(tok::r_paren, StopAtSemi))
+      return TPResult::Error;
+
+    if (Tok.is(tok::l_paren))
+      return TPResult::Ambiguous;
+
+    if (getLangOpts().CPlusPlus11 && Tok.is(tok::l_brace))
+      return BracedCastResult;
+
+    return TPResult::True;
+  }
   default:
     return TPResult::False;
   }
@@ -1810,6 +1829,7 @@ bool Parser::isCXXDeclarationSpecifierAType() {
   case tok::kw_bool:
   case tok::kw_short:
   case tok::kw_int:
+  case tok::kw__ExtInt:
   case tok::kw_long:
   case tok::kw___int64:
   case tok::kw___int128:
diff --git a/clang/lib/Sema/DeclSpec.cpp b/clang/lib/Sema/DeclSpec.cpp
index ae4a78a4556d..276e35a3497e 100644
--- a/clang/lib/Sema/DeclSpec.cpp
+++ b/clang/lib/Sema/DeclSpec.cpp
@@ -360,6 +360,7 @@ bool Declarator::isDeclarationOfFunction() const {
     case TST_half:
     case TST_int:
     case TST_int128:
+    case TST_extint:
     case TST_struct:
     case TST_interface:
     case TST_union:
@@ -538,6 +539,7 @@ const char *DeclSpec::getSpecifierName(DeclSpec::TST T,
   case DeclSpec::TST_char32:      return "char32_t";
   case DeclSpec::TST_int:         return "int";
   case DeclSpec::TST_int128:      return "__int128";
+  case DeclSpec::TST_extint:      return "_ExtInt";
   case DeclSpec::TST_half:        return "half";
   case DeclSpec::TST_float:       return "float";
   case DeclSpec::TST_double:      return "double";
@@ -913,6 +915,27 @@ bool DeclSpec::SetTypeSpecError() {
   return false;
 }
 
+bool DeclSpec::SetExtIntType(SourceLocation KWLoc, Expr *BitsExpr,
+                             const char *&PrevSpec, unsigned &DiagID,
+                             const PrintingPolicy &Policy) {
+  assert(BitsExpr && "no expression provided!");
+  if (TypeSpecType == TST_error)
+    return false;
+
+  if (TypeSpecType != TST_unspecified) {
+    PrevSpec = DeclSpec::getSpecifierName((TST) TypeSpecType, Policy);
+    DiagID = diag::err_invalid_decl_spec_combination;
+    return true;
+  }
+
+  TypeSpecType = TST_extint;
+  ExprRep = BitsExpr;
+  TSTLoc = KWLoc;
+  TSTNameLoc = KWLoc;
+  TypeSpecOwned = false;
+  return false;
+}
+
 bool DeclSpec::SetTypeQual(TQ T, SourceLocation Loc, const char *&PrevSpec,
                            unsigned &DiagID, const LangOptions &Lang) {
   // Duplicates are permitted in C99 onwards, but are not permitted in C89 or
@@ -1194,7 +1217,7 @@ void DeclSpec::Finish(Sema &S, const PrintingPolicy &Policy) {
       TypeSpecType = TST_int; // unsigned -> unsigned int, signed -> signed int.
     else if (TypeSpecType != TST_int && TypeSpecType != TST_int128 &&
              TypeSpecType != TST_char && TypeSpecType != TST_wchar &&
-             !IsFixedPointType) {
+             !IsFixedPointType && TypeSpecType != TST_extint) {
       S.Diag(TSSLoc, diag::err_invalid_sign_spec)
         << getSpecifierName((TST)TypeSpecType, Policy);
       // signed double -> double.
@@ -1241,7 +1264,8 @@ void DeclSpec::Finish(Sema &S, const PrintingPolicy &Policy) {
                               S.getLocForEndOfToken(getTypeSpecComplexLoc()),
                                                  " double");
       TypeSpecType = TST_double;   // _Complex -> _Complex double.
-    } else if (TypeSpecType == TST_int || TypeSpecType == TST_char) {
+    } else if (TypeSpecType == TST_int || TypeSpecType == TST_char ||
+               TypeSpecType == TST_extint) {
       // Note that this intentionally doesn't include _Complex _Bool.
       if (!S.getLangOpts().CPlusPlus)
         S.Diag(TSTLoc, diag::ext_integer_complex);
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index e7bc4994e540..037e9c332412 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -9793,6 +9793,9 @@ struct IntRange {
                         false/*NonNegative*/);
     }
 
+    if (const auto *EIT = dyn_cast<ExtIntType>(T))
+      return IntRange(EIT->getNumBits(), EIT->isUnsigned());
+
     const BuiltinType *BT = cast<BuiltinType>(T);
     assert(BT->isInteger());
 
@@ -9816,6 +9819,9 @@ struct IntRange {
     if (const EnumType *ET = dyn_cast<EnumType>(T))
       T = C.getCanonicalType(ET->getDecl()->getIntegerType()).getTypePtr();
 
+    if (const auto *EIT = dyn_cast<ExtIntType>(T))
+      return IntRange(EIT->getNumBits(), EIT->isUnsigned());
+
     const BuiltinType *BT = cast<BuiltinType>(T);
     assert(BT->isInteger());
 
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index ed082dbaf986..27c8365ab8be 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -14811,12 +14811,16 @@ bool Sema::CheckEnumUnderlyingType(TypeSourceInfo *TI) {
   if (T->isDependentType())
     return false;
 
+  // This doesn't use 'isIntegralType' despite the error message mentioning
+  // integral type because isIntegralType would also allow enum types in C.
   if (const BuiltinType *BT = T->getAs<BuiltinType>())
     if (BT->isInteger())
       return false;
 
-  Diag(UnderlyingLoc, diag::err_enum_invalid_underlying) << T;
-  return true;
+  if (T->isExtIntType())
+    return false;
+
+  return Diag(UnderlyingLoc, diag::err_enum_invalid_underlying) << T;
 }
 
 /// Check whether this is a valid redeclaration of a previous enumeration.
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 3205b4472db2..869ae5cbc40b 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -4087,8 +4087,9 @@ void Sema::AddModeAttr(Decl *D, const AttributeCommonInfo &CI,
     Diag(AttrLoc, diag::err_enum_mode_vector_type) << Name << CI.getRange();
     return;
   }
-  bool IntegralOrAnyEnumType =
-      OldElemTy->isIntegralOrEnumerationType() || OldElemTy->getAs<EnumType>();
+  bool IntegralOrAnyEnumType = (OldElemTy->isIntegralOrEnumerationType() &&
+                                !OldElemTy->isExtIntType()) ||
+                               OldElemTy->getAs<EnumType>();
 
   if (!OldElemTy->getAs<BuiltinType>() && !OldElemTy->isComplexType() &&
       !IntegralOrAnyEnumType)
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 31d694857e9c..fbb5d4b05bbf 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -1482,6 +1482,11 @@ QualType Sema::UsualArithmeticConversions(ExprResult &LHS, ExprResult &RHS,
   if (LHSType == RHSType)
     return LHSType;
 
+  // ExtInt types aren't subject to conversions between them or normal integers,
+  // so this fails. 
+  if(LHSType->isExtIntType() || RHSType->isExtIntType())
+    return QualType();
+
   // At this point, we have two different arithmetic types.
 
   // Diagnose attempts to convert between __float128 and long double where
@@ -4261,6 +4266,7 @@ static void captureVariablyModifiedType(ASTContext &Context, QualType T,
     case Type::ObjCObjectPointer:
     case Type::ObjCTypeParam:
     case Type::Pipe:
+    case Type::ExtInt:
       llvm_unreachable("type class is never variably-modified!");
     case Type::Adjusted:
       T = cast<AdjustedType>(Ty)->getOriginalType();
@@ -10431,14 +10437,19 @@ static void DiagnoseBadShiftValues(Sema& S, ExprResult &LHS, ExprResult &RHS,
                             << RHS.get()->getSourceRange());
     return;
   }
-  llvm::APInt LeftBits(Right.getBitWidth(),
-                       S.Context.getTypeSize(LHS.get()->getType()));
+
+  QualType LHSExprType = LHS.get()->getType();
+  uint64_t LeftSize = LHSExprType->isExtIntType()
+                          ? S.Context.getIntWidth(LHSExprType)
+                          : S.Context.getTypeSize(LHSExprType);
+  llvm::APInt LeftBits(Right.getBitWidth(), LeftSize);
   if (Right.uge(LeftBits)) {
     S.DiagRuntimeBehavior(Loc, RHS.get(),
                           S.PDiag(diag::warn_shift_gt_typewidth)
                             << RHS.get()->getSourceRange());
     return;
   }
+
   if (Opc != BO_Shl)
     return;
 
diff --git a/clang/lib/Sema/SemaLookup.cpp b/clang/lib/Sema/SemaLookup.cpp
index 82a197196576..08d29fa51e6e 100644
--- a/clang/lib/Sema/SemaLookup.cpp
+++ b/clang/lib/Sema/SemaLookup.cpp
@@ -2967,6 +2967,7 @@ addAssociatedClassesAndNamespaces(AssociatedLookup &Result, QualType Ty) {
     case Type::Vector:
     case Type::ExtVector:
     case Type::Complex:
+    case Type::ExtInt:
       break;
 
     // Non-deduced auto types only get here for error cases.
diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp
index f425ec742b7b..7bd12913aec4 100755
--- a/clang/lib/Sema/SemaTemplate.cpp
+++ b/clang/lib/Sema/SemaTemplate.cpp
@@ -5998,6 +5998,15 @@ bool UnnamedLocalNoLinkageFinder::VisitPipeType(const PipeType* T) {
   return false;
 }
 
+bool UnnamedLocalNoLinkageFinder::VisitExtIntType(const ExtIntType *T) {
+  return false;
+}
+
+bool UnnamedLocalNoLinkageFinder::VisitDependentExtIntType(
+    const DependentExtIntType *T) {
+  return false;
+}
+
 bool UnnamedLocalNoLinkageFinder::VisitTagDecl(const TagDecl *Tag) {
   if (Tag->getDeclContext()->isFunctionOrMethod()) {
     S.Diag(SR.getBegin(),
@@ -6891,7 +6900,9 @@ ExprResult Sema::CheckTemplateArgument(NonTypeTemplateParmDecl *Param,
       QualType IntegerType = ParamType;
       if (const EnumType *Enum = IntegerType->getAs<EnumType>())
         IntegerType = Enum->getDecl()->getIntegerType();
-      Value = Value.extOrTrunc(Context.getTypeSize(IntegerType));
+      Value = Value.extOrTrunc(IntegerType->isExtIntType()
+                                   ? Context.getIntWidth(IntegerType)
+                                   : Context.getTypeSize(IntegerType));
 
       Converted = TemplateArgument(Context, Value,
                                    Context.getCanonicalType(ParamType));
@@ -6985,7 +6996,9 @@ ExprResult Sema::CheckTemplateArgument(NonTypeTemplateParmDecl *Param,
 
       // Coerce the template argument's value to the value it will have
       // based on the template parameter's type.
-      unsigned AllowedBits = Context.getTypeSize(IntegerType);
+      unsigned AllowedBits = IntegerType->isExtIntType()
+                                 ? Context.getIntWidth(IntegerType)
+                                 : Context.getTypeSize(IntegerType);
       if (Value.getBitWidth() != AllowedBits)
         Value = Value.extOrTrunc(AllowedBits);
       Value.setIsSigned(IntegerType->isSignedIntegerOrEnumerationType());
diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp
index 8e3c61819571..e1d438fcb724 100644
--- a/clang/lib/Sema/SemaTemplateDeduction.cpp
+++ b/clang/lib/Sema/SemaTemplateDeduction.cpp
@@ -1515,6 +1515,7 @@ DeduceTemplateArgumentsByTypeMatch(Sema &S,
     case Type::ObjCObject:
     case Type::ObjCInterface:
     case Type::ObjCObjectPointer:
+    case Type::ExtInt:
       if (TDF & TDF_SkipNonDependent)
         return Sema::TDK_Success;
 
@@ -2106,6 +2107,33 @@ DeduceTemplateArgumentsByTypeMatch(Sema &S,
 
       return Sema::TDK_NonDeducedMismatch;
     }
+    case Type::DependentExtInt: {
+      const auto *IntParam = cast<DependentExtIntType>(Param);
+
+      if (const auto *IntArg = dyn_cast<ExtIntType>(Arg)){
+        if (IntParam->isUnsigned() != IntArg->isUnsigned())
+          return Sema::TDK_NonDeducedMismatch;
+
+        NonTypeTemplateParmDecl *NTTP =
+            getDeducedParameterFromExpr(Info, IntParam->getNumBitsExpr());
+        if (!NTTP)
+          return Sema::TDK_Success;
+
+        llvm::APSInt ArgSize(S.Context.getTypeSize(S.Context.IntTy), false);
+        ArgSize = IntArg->getNumBits();
+
+        return DeduceNonTypeTemplateArgument(S, TemplateParams, NTTP, ArgSize,
+                                             S.Context.IntTy, true, Info,
+                                             Deduced);
+      }
+
+      if (const auto *IntArg = dyn_cast<DependentExtIntType>(Arg)) {
+        if (IntParam->isUnsigned() != IntArg->isUnsigned())
+          return Sema::TDK_NonDeducedMismatch;
+        return Sema::TDK_Success;
+      }
+      return Sema::TDK_NonDeducedMismatch;
+    }
 
     case Type::TypeOfExpr:
     case Type::TypeOf:
@@ -5850,6 +5878,11 @@ MarkUsedTemplateParameters(ASTContext &Ctx, QualType T,
                                cast<DeducedType>(T)->getDeducedType(),
                                OnlyDeduced, Depth, Used);
     break;
+  case Type::DependentExtInt:
+    MarkUsedTemplateParameters(Ctx,
+                               cast<DependentExtIntType>(T)->getNumBitsExpr(),
+                               OnlyDeduced, Depth, Used);
+    break;
 
   // None of these types have any template parameters in them.
   case Type::Builtin:
@@ -5862,6 +5895,7 @@ MarkUsedTemplateParameters(ASTContext &Ctx, QualType T,
   case Type::ObjCObjectPointer:
   case Type::UnresolvedUsing:
   case Type::Pipe:
+  case Type::ExtInt:
 #define TYPE(Class, Base)
 #define ABSTRACT_TYPE(Class, Base)
 #define DEPENDENT_TYPE(Class, Base)
diff --git a/clang/lib/Sema/SemaTemplateVariadic.cpp b/clang/lib/Sema/SemaTemplateVariadic.cpp
index 825b062c0054..466cb084e7c3 100644
--- a/clang/lib/Sema/SemaTemplateVariadic.cpp
+++ b/clang/lib/Sema/SemaTemplateVariadic.cpp
@@ -847,6 +847,7 @@ bool Sema::containsUnexpandedParameterPacks(Declarator &D) {
 
   case TST_typeofExpr:
   case TST_decltype:
+  case TST_extint:
     if (DS.getRepAsExpr() &&
         DS.getRepAsExpr()->containsUnexpandedParameterPack())
       return true;
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index 53e4366f673b..4ecd36209e5b 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -35,6 +35,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/Support/ErrorHandling.h"
 
 using namespace clang;
@@ -1441,6 +1442,15 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) {
     }
     break;
   }
+  case DeclSpec::TST_extint: {
+    Result = S.BuildExtIntType(DS.getTypeSpecSign() == TSS_unsigned,
+                               DS.getRepAsExpr(), DS.getBeginLoc());
+    if (Result.isNull()) {
+      Result = Context.IntTy;
+      declarator.setInvalidType(true);
+    }
+    break;
+  }
   case DeclSpec::TST_accum: {
     switch (DS.getTypeSpecWidth()) {
       case DeclSpec::TSW_short:
@@ -2160,6 +2170,45 @@ QualType Sema::BuildWritePipeType(QualType T, SourceLocation Loc) {
   return Context.getWritePipeType(T);
 }
 
+/// Build a extended int type.
+///
+/// \param IsUnsigned Boolean representing the signedness of the type.
+///
+/// \param BitWidth Size of this int type in bits, or an expression representing
+/// that.
+///
+/// \param Loc Location of the keyword.
+QualType Sema::BuildExtIntType(bool IsUnsigned, Expr *BitWidth,
+                               SourceLocation Loc) {
+  if (BitWidth->isInstantiationDependent())
+    return Context.getDependentExtIntType(IsUnsigned, BitWidth);
+
+  llvm::APSInt Bits(32);
+  ExprResult ICE = VerifyIntegerConstantExpression(BitWidth, &Bits);
+
+  if (ICE.isInvalid())
+    return QualType();
+
+  int64_t NumBits = Bits.getSExtValue();
+  if (!IsUnsigned && NumBits < 2) {
+    Diag(Loc, diag::err_ext_int_bad_size) << 0;
+    return QualType();
+  }
+
+  if (IsUnsigned && NumBits < 1) {
+    Diag(Loc, diag::err_ext_int_bad_size) << 1;
+    return QualType();
+  }
+
+  if (NumBits > llvm::IntegerType::MAX_INT_BITS) {
+    Diag(Loc, diag::err_ext_int_max_size) << IsUnsigned
+                                          << llvm::IntegerType::MAX_INT_BITS;
+    return QualType();
+  }
+
+  return Context.getExtIntType(IsUnsigned, NumBits);
+}
+
 /// Check whether the specified array size makes the array type a VLA.  If so,
 /// return true, if not, return the size of the array in SizeVal.
 static bool isArraySizeVLA(Sema &S, Expr *ArraySize, llvm::APSInt &SizeVal) {
@@ -5774,6 +5823,14 @@ namespace {
       TL.getValueLoc().initializeFullCopy(TInfo->getTypeLoc());
     }
 
+    void VisitExtIntTypeLoc(ExtIntTypeLoc TL) {
+      TL.setNameLoc(DS.getTypeSpecTypeLoc());
+    }
+
+    void VisitDependentExtIntTypeLoc(DependentExtIntTypeLoc TL) {
+      TL.setNameLoc(DS.getTypeSpecTypeLoc());
+    }
+
     void VisitTypeLoc(TypeLoc TL) {
       // FIXME: add other typespec types and change this to an assert.
       TL.initialize(Context, DS.getTypeSpecTypeLoc());
@@ -5900,6 +5957,9 @@ namespace {
       assert(Chunk.Kind == DeclaratorChunk::Pipe);
       TL.setKWLoc(Chunk.Loc);
     }
+    void VisitExtIntTypeLoc(ExtIntTypeLoc TL) {
+      TL.setNameLoc(Chunk.Loc);
+    }
     void VisitMacroQualifiedTypeLoc(MacroQualifiedTypeLoc TL) {
       TL.setExpansionLoc(Chunk.Loc);
     }
@@ -8631,6 +8691,12 @@ QualType Sema::BuildAtomicType(QualType T, SourceLocation Loc) {
     else if (!T.isTriviallyCopyableType(Context))
       // Some other non-trivially-copyable type (probably a C++ class)
       DisallowedKind = 7;
+    else if (auto *ExtTy = T->getAs<ExtIntType>()) {
+      if (ExtTy->getNumBits() < 8)
+        DisallowedKind = 8;
+      else if (!llvm::isPowerOf2_32(ExtTy->getNumBits()))
+        DisallowedKind = 9;
+    }
 
     if (DisallowedKind != -1) {
       Diag(Loc, diag::err_atomic_specifier_bad_type) << DisallowedKind << T;
diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h
index 87b07897ec28..abde968bed8c 100644
--- a/clang/lib/Sema/TreeTransform.h
+++ b/clang/lib/Sema/TreeTransform.h
@@ -1183,6 +1183,14 @@ class TreeTransform {
   QualType RebuildPipeType(QualType ValueType, SourceLocation KWLoc,
                            bool isReadPipe);
 
+   /// Build an extended int given its value type.
+  QualType RebuildExtIntType(bool IsUnsigned, unsigned NumBits,
+                             SourceLocation Loc);
+
+  /// Build a dependent extended int given its value type.
+  QualType RebuildDependentExtIntType(bool IsUnsigned, Expr *NumBitsExpr,
+                                      SourceLocation Loc);
+
   /// Build a new template name given a nested name specifier, a flag
   /// indicating whether the "template" keyword was provided, and the template
   /// that the template name refers to.
@@ -6120,6 +6128,57 @@ QualType TreeTransform<Derived>::TransformPipeType(TypeLocBuilder &TLB,
   return Result;
 }
 
+template <typename Derived>
+QualType TreeTransform<Derived>::TransformExtIntType(TypeLocBuilder &TLB,
+                                                     ExtIntTypeLoc TL) {
+  const ExtIntType *EIT = TL.getTypePtr();
+  QualType Result = TL.getType();
+
+  if (getDerived().AlwaysRebuild()) {
+    Result = getDerived().RebuildExtIntType(EIT->isUnsigned(),
+                                            EIT->getNumBits(), TL.getNameLoc());
+    if (Result.isNull())
+      return QualType();
+  }
+
+  ExtIntTypeLoc NewTL = TLB.push<ExtIntTypeLoc>(Result);
+  NewTL.setNameLoc(TL.getNameLoc());
+  return Result;
+}
+
+template <typename Derived>
+QualType TreeTransform<Derived>::TransformDependentExtIntType(
+    TypeLocBuilder &TLB, DependentExtIntTypeLoc TL) {
+  const DependentExtIntType *EIT = TL.getTypePtr();
+
+  EnterExpressionEvaluationContext Unevaluated(
+      SemaRef, Sema::ExpressionEvaluationContext::ConstantEvaluated);
+  ExprResult BitsExpr = getDerived().TransformExpr(EIT->getNumBitsExpr());
+  BitsExpr = SemaRef.ActOnConstantExpression(BitsExpr);
+
+  if (BitsExpr.isInvalid())
+    return QualType();
+
+  QualType Result = TL.getType();
+
+  if (getDerived().AlwaysRebuild() || BitsExpr.get() != EIT->getNumBitsExpr()) {
+    Result = getDerived().RebuildDependentExtIntType(
+        EIT->isUnsigned(), BitsExpr.get(), TL.getNameLoc());
+
+    if (Result.isNull())
+      return QualType();
+  }
+
+  if (isa<DependentExtIntType>(Result)) {
+    DependentExtIntTypeLoc NewTL = TLB.push<DependentExtIntTypeLoc>(Result);
+    NewTL.setNameLoc(TL.getNameLoc());
+  } else {
+    ExtIntTypeLoc NewTL = TLB.push<ExtIntTypeLoc>(Result);
+    NewTL.setNameLoc(TL.getNameLoc());
+  }
+  return Result;
+}
+
   /// Simple iterator that traverses the template arguments in a
   /// container that provides a \c getArgLoc() member function.
   ///
@@ -13782,6 +13841,23 @@ QualType TreeTransform<Derived>::RebuildPipeType(QualType ValueType,
                     : SemaRef.BuildWritePipeType(ValueType, KWLoc);
 }
 
+template <typename Derived>
+QualType TreeTransform<Derived>::RebuildExtIntType(bool IsUnsigned,
+                                                   unsigned NumBits,
+                                                   SourceLocation Loc) {
+  llvm::APInt NumBitsAP(SemaRef.Context.getIntWidth(SemaRef.Context.IntTy),
+                        NumBits, true);
+  IntegerLiteral *Bits = IntegerLiteral::Create(SemaRef.Context, NumBitsAP,
+                                                SemaRef.Context.IntTy, Loc);
+  return SemaRef.BuildExtIntType(IsUnsigned, Bits, Loc);
+}
+
+template <typename Derived>
+QualType TreeTransform<Derived>::RebuildDependentExtIntType(
+    bool IsUnsigned, Expr *NumBitsExpr, SourceLocation Loc) {
+  return SemaRef.BuildExtIntType(IsUnsigned, NumBitsExpr, Loc);
+}
+
 template<typename Derived>
 TemplateName
 TreeTransform<Derived>::RebuildTemplateName(CXXScopeSpec &SS,
diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp
index f0e9bbd4dcea..62dd233aab3f 100644
--- a/clang/lib/Serialization/ASTReader.cpp
+++ b/clang/lib/Serialization/ASTReader.cpp
@@ -6719,6 +6719,15 @@ void TypeLocReader::VisitPipeTypeLoc(PipeTypeLoc TL) {
   TL.setKWLoc(readSourceLocation());
 }
 
+void TypeLocReader::VisitExtIntTypeLoc(clang::ExtIntTypeLoc TL) {
+  TL.setNameLoc(readSourceLocation());
+}
+void TypeLocReader::VisitDependentExtIntTypeLoc(
+    clang::DependentExtIntTypeLoc TL) {
+  TL.setNameLoc(readSourceLocation());
+}
+
+
 void ASTRecordReader::readTypeLoc(TypeLoc TL) {
   TypeLocReader TLR(*this);
   for (; !TL.isNull(); TL = TL.getNextTypeLoc())
diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp
index c8ce3edda60b..18a92aaadd52 100644
--- a/clang/lib/Serialization/ASTWriter.cpp
+++ b/clang/lib/Serialization/ASTWriter.cpp
@@ -476,6 +476,14 @@ void TypeLocWriter::VisitPipeTypeLoc(PipeTypeLoc TL) {
   Record.AddSourceLocation(TL.getKWLoc());
 }
 
+void TypeLocWriter::VisitExtIntTypeLoc(clang::ExtIntTypeLoc TL) {
+  Record.AddSourceLocation(TL.getNameLoc());
+}
+void TypeLocWriter::VisitDependentExtIntTypeLoc(
+    clang::DependentExtIntTypeLoc TL) {
+  Record.AddSourceLocation(TL.getNameLoc());
+}
+
 void ASTWriter::WriteTypeAbbrevs() {
   using namespace llvm;
 
diff --git a/clang/test/CodeGen/ext-int-sanitizer.cpp b/clang/test/CodeGen/ext-int-sanitizer.cpp
new file mode 100644
index 000000000000..ddf3180e1a1b
--- /dev/null
+++ b/clang/test/CodeGen/ext-int-sanitizer.cpp
@@ -0,0 +1,265 @@
+// RUN: %clang_cc1 -triple x86_64-gnu-linux -fsanitize=array-bounds,enum,float-cast-overflow,integer-divide-by-zero,implicit-unsigned-integer-truncation,implicit-signed-integer-truncation,implicit-integer-sign-change,unsigned-integer-overflow,signed-integer-overflow,shift-base,shift-exponent -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s 
+
+
+// CHECK: define void @_Z6BoundsRA10_KiU7_ExtIntILi15EEi
+void Bounds(const int (&Array)[10], _ExtInt(15) Index) {
+  int I1 = Array[Index];
+  // CHECK: %[[SEXT:.+]] = sext i15 %{{.+}} to i64
+  // CHECK: %[[CMP:.+]] = icmp ult i64 %[[SEXT]], 10
+  // CHECK: br i1 %[[CMP]]
+  // CHECK: call void @__ubsan_handle_out_of_bounds
+}
+
+// CHECK: define void @_Z4Enumv
+void Enum() {
+  enum E1 { e1a = 0, e1b = 127 }
+  e1;
+  enum E2 { e2a = -1, e2b = 64 }
+  e2;
+  enum E3 { e3a = (1u << 31) - 1 }
+  e3;
+
+  _ExtInt(34) a = e1;
+  // CHECK: %[[E1:.+]] = icmp ule i32 %{{.*}}, 127
+  // CHECK: br i1 %[[E1]]
+  // CHECK: call void @__ubsan_handle_load_invalid_value_abort
+  _ExtInt(34) b = e2;
+  // CHECK: %[[E2HI:.*]] = icmp sle i32 {{.*}}, 127
+  // CHECK: %[[E2LO:.*]] = icmp sge i32 {{.*}}, -128
+  // CHECK: %[[E2:.*]] = and i1 %[[E2HI]], %[[E2LO]]
+  // CHECK: br i1 %[[E2]]
+  // CHECK: call void @__ubsan_handle_load_invalid_value_abort
+  _ExtInt(34) c = e3;
+  // CHECK: %[[E3:.*]] = icmp ule i32 {{.*}}, 2147483647
+  // CHECK: br i1 %[[E3]]
+  // CHECK: call void @__ubsan_handle_load_invalid_value_abort
+}
+
+// CHECK: define void @_Z13FloatOverflowfd
+void FloatOverflow(float f, double d) {
+  _ExtInt(10) E = f;
+  // CHECK: fcmp ogt float %{{.+}}, -5.130000e+02
+  // CHECK: fcmp olt float %{{.+}}, 5.120000e+02
+  _ExtInt(10) E2 = d;
+  // CHECK: fcmp ogt double %{{.+}}, -5.130000e+02
+  // CHECK: fcmp olt double %{{.+}}, 5.120000e+02
+  _ExtInt(7) E3 = f;
+  // CHECK: fcmp ogt float %{{.+}}, -6.500000e+01
+  // CHECK: fcmp olt float %{{.+}}, 6.400000e+01
+  _ExtInt(7) E4 = d;
+  // CHECK: fcmp ogt double %{{.+}}, -6.500000e+01
+  // CHECK: fcmp olt double %{{.+}}, 6.400000e+01
+}
+
+// CHECK: define void @_Z14UIntTruncationU7_ExtIntILi35EEjjy
+void UIntTruncation(unsigned _ExtInt(35) E, unsigned int i, unsigned long long ll) {
+
+  i = E;
+  // CHECK: %[[LOADE:.+]] = load i35
+  // CHECK: %[[CONV:.+]] = trunc i35 %[[LOADE]] to i32
+  // CHECK: %[[EXT:.+]] = zext i32 %[[CONV]] to i35
+  // CHECK: %[[CHECK:.+]] = icmp eq i35 %[[EXT]], %[[LOADE]]
+  // CHECK: br i1 %[[CHECK]]
+  // CHECK: call void @__ubsan_handle_implicit_conversion_abort
+
+  E = ll;
+  // CHECK: %[[LOADLL:.+]] = load i64
+  // CHECK: %[[CONV:.+]] = trunc i64 %[[LOADLL]] to i35
+  // CHECK: %[[EXT:.+]] = zext i35 %[[CONV]] to i64
+  // CHECK: %[[CHECK:.+]] = icmp eq i64 %[[EXT]], %[[LOADLL]]
+  // CHECK: br i1 %[[CHECK]]
+  // CHECK: call void @__ubsan_handle_implicit_conversion_abort
+}
+
+// CHECK: define void @_Z13IntTruncationU7_ExtIntILi35EEiU7_ExtIntILi42EEjij
+void IntTruncation(_ExtInt(35) E, unsigned _ExtInt(42) UE, int i, unsigned j) {
+
+  j = E;
+  // CHECK: %[[LOADE:.+]] = load i35
+  // CHECK: %[[CONV:.+]] = trunc i35 %[[LOADE]] to i32
+  // CHECK: %[[EXT:.+]] = zext i32 %[[CONV]] to i35
+  // CHECK: %[[CHECK:.+]] = icmp eq i35 %[[EXT]], %[[LOADE]]
+  // CHECK: br i1 %[[CHECK]]
+  // CHECK: call void @__ubsan_handle_implicit_conversion_abort
+
+  j = UE;
+  // CHECK: %[[LOADUE:.+]] = load i42
+  // CHECK: %[[CONV:.+]] = trunc i42 %[[LOADUE]] to i32
+  // CHECK: %[[EXT:.+]] = zext i32 %[[CONV]] to i42
+  // CHECK: %[[CHECK:.+]] = icmp eq i42 %[[EXT]], %[[LOADUE]]
+  // CHECK: br i1 %[[CHECK]]
+  // CHECK: call void @__ubsan_handle_implicit_conversion_abort
+
+  // Note: also triggers sign change check.
+  i = UE;
+  // CHECK: %[[LOADUE:.+]] = load i42
+  // CHECK: %[[CONV:.+]] = trunc i42 %[[LOADUE]] to i32
+  // CHECK: %[[NEG:.+]] = icmp slt i32 %[[CONV]], 0
+  // CHECK: %[[SIGNCHECK:.+]] = icmp eq i1 false, %[[NEG]]
+  // CHECK: %[[EXT:.+]] = sext i32 %[[CONV]] to i42
+  // CHECK: %[[CHECK:.+]] = icmp eq i42 %[[EXT]], %[[LOADUE]]
+  // CHECK: %[[CHECKBOTH:.+]] = and i1 %[[SIGNCHECK]], %[[CHECK]]
+  // CHECK: br i1 %[[CHECKBOTH]]
+  // CHECK: call void @__ubsan_handle_implicit_conversion_abort
+
+  // Note: also triggers sign change check.
+  E = UE;
+  // CHECK: %[[LOADUE:.+]] = load i42
+  // CHECK: %[[CONV:.+]] = trunc i42 %[[LOADUE]] to i35
+  // CHECK: %[[NEG:.+]] = icmp slt i35 %[[CONV]], 0
+  // CHECK: %[[SIGNCHECK:.+]] = icmp eq i1 false, %[[NEG]]
+  // CHECK: %[[EXT:.+]] = sext i35 %[[CONV]] to i42
+  // CHECK: %[[CHECK:.+]] = icmp eq i42 %[[EXT]], %[[LOADUE]]
+  // CHECK: %[[CHECKBOTH:.+]] = and i1 %[[SIGNCHECK]], %[[CHECK]]
+  // CHECK: br i1 %[[CHECKBOTH]]
+  // CHECK: call void @__ubsan_handle_implicit_conversion_abort
+}
+
+// CHECK: define void @_Z15SignChangeCheckU7_ExtIntILi39EEjU7_ExtIntILi39EEi
+void SignChangeCheck(unsigned _ExtInt(39) UE, _ExtInt(39) E) {
+  UE = E;
+  // CHECK: %[[LOADE:.+]] = load i39
+  // CHECK: %[[NEG:.+]] = icmp slt i39 %[[LOADE]], 0
+  // CHECK: %[[SIGNCHECK:.+]] = icmp eq i1 %[[NEG]], false
+  // CHECK: br i1 %[[SIGNCHECK]]
+  // CHECK: call void @__ubsan_handle_implicit_conversion_abort
+
+
+  E = UE;
+  // CHECK: %[[LOADUE:.+]] = load i39
+  // CHECK: %[[NEG:.+]] = icmp slt i39 %[[LOADUE]], 0
+  // CHECK: %[[SIGNCHECK:.+]] = icmp eq i1 false, %[[NEG]]
+  // CHECK: br i1 %[[SIGNCHECK]]
+  // CHECK: call void @__ubsan_handle_implicit_conversion_abort
+}
+
+// CHECK: define void @_Z9DivByZeroU7_ExtIntILi11EEii
+void DivByZero(_ExtInt(11) E, int i) {
+
+  // Also triggers signed integer overflow.
+  E / E;
+  // CHECK: %[[E:.+]] = load i11, i11*
+  // CHECK: %[[E2:.+]] = load i11, i11*
+  // CHECK: %[[NEZERO:.+]] = icmp ne i11 %[[E2]], 0
+  // CHECK: %[[NEMIN:.+]] = icmp ne i11 %[[E]], -1024
+  // CHECK: %[[NENEG1:.+]] = icmp ne i11 %[[E2]], -1
+  // CHECK: %[[OR:.+]] = or i1 %[[NEMIN]], %[[NENEG1]]
+  // CHECK: %[[AND:.+]] = and i1 %[[NEZERO]], %[[OR]]
+  // CHECK: br i1 %[[AND]]
+  // CHECK: call void @__ubsan_handle_divrem_overflow_abort
+}
+
+// TODO:
+//-fsanitize=shift: (shift-base, shift-exponent) Shift operators where the amount shifted is greater or equal to the promoted bit-width of the left hand side or less than zero, or where the left hand side is negative. For a signed left shift, also checks for signed overflow in C, and for unsigned overflow in C++. You can use -fsanitize=shift-base or -fsanitize=shift-exponent to check only left-hand side or right-hand side of shift operation, respectively.
+// CHECK: define void @_Z6ShiftsU7_ExtIntILi9EEi
+void Shifts(_ExtInt(9) E) {
+  E >> E;
+  // CHECK: %[[LHSE:.+]] = load i9, i9*
+  // CHECK: %[[RHSE:.+]] = load i9, i9*
+  // CHECK: %[[CMP:.+]] = icmp ule i9 %[[RHSE]], 8
+  // CHECK: br i1 %[[CMP]]
+  // CHECK: call void @__ubsan_handle_shift_out_of_bounds_abort
+
+  E << E;
+  // CHECK: %[[LHSE:.+]] = load i9, i9*
+  // CHECK: %[[RHSE:.+]] = load i9, i9*
+  // CHECK: %[[CMP:.+]] = icmp ule i9 %[[RHSE]], 8
+  // CHECK: br i1 %[[CMP]]
+  // CHECK: %[[ZEROS:.+]] = sub nuw nsw i9 8, %[[RHSE]]
+  // CHECK: %[[CHECK:.+]] = lshr i9 %[[LHSE]], %[[ZEROS]]
+  // CHECK: %[[SKIPSIGN:.+]] = lshr i9 %[[CHECK]], 1
+  // CHECK: %[[CHECK:.+]] = icmp eq i9 %[[SKIPSIGN]]
+  // CHECK: %[[PHI:.+]] = phi i1 [ true, %{{.+}} ], [ %[[CHECK]], %{{.+}} ]
+  // CHECK: and i1 %[[CMP]], %[[PHI]]
+  // CHECK: call void @__ubsan_handle_shift_out_of_bounds_abort
+}
+
+// CHECK: define void @_Z21SignedIntegerOverflowU7_ExtIntILi93EEiU7_ExtIntILi4EEiU7_ExtIntILi31EEi
+void SignedIntegerOverflow(_ExtInt(93) BiggestE,
+                           _ExtInt(4) SmallestE,
+                           _ExtInt(31) JustRightE) {
+  BiggestE + BiggestE;
+  // CHECK: %[[LOAD1:.+]] = load i93, i93*
+  // CHECK: %[[LOAD2:.+]] = load i93, i93*
+  // CHECK: %[[OFCALL:.+]] = call { i93, i1 } @llvm.sadd.with.overflow.i93(i93 %[[LOAD1]], i93 %[[LOAD2]])
+  // CHECK: %[[EXRESULT:.+]] = extractvalue { i93, i1 } %[[OFCALL]], 0
+  // CHECK: %[[OFRESULT:.+]] = extractvalue { i93, i1 } %[[OFCALL]], 1
+  // CHECK: %[[CHECK:.+]] = xor i1 %[[OFRESULT]], true
+  // CHECK: br i1 %[[CHECK]]
+  // CHECK: call void @__ubsan_handle_add_overflow_abort
+
+  SmallestE - SmallestE;
+  // CHECK: %[[LOAD1:.+]] = load i4, i4*
+  // CHECK: %[[LOAD2:.+]] = load i4, i4*
+  // CHECK: %[[OFCALL:.+]] = call { i4, i1 } @llvm.ssub.with.overflow.i4(i4 %[[LOAD1]], i4 %[[LOAD2]])
+  // CHECK: %[[EXRESULT:.+]] = extractvalue { i4, i1 } %[[OFCALL]], 0
+  // CHECK: %[[OFRESULT:.+]] = extractvalue { i4, i1 } %[[OFCALL]], 1
+  // CHECK: %[[CHECK:.+]] = xor i1 %[[OFRESULT]], true
+  // CHECK: br i1 %[[CHECK]]
+  // CHECK: call void @__ubsan_handle_sub_overflow_abort
+
+  JustRightE * JustRightE;
+  // CHECK: %[[LOAD1:.+]] = load i31, i31*
+  // CHECK: %[[LOAD2:.+]] = load i31, i31*
+  // CHECK: %[[OFCALL:.+]] = call { i31, i1 } @llvm.smul.with.overflow.i31(i31 %[[LOAD1]], i31 %[[LOAD2]])
+  // CHECK: %[[EXRESULT:.+]] = extractvalue { i31, i1 } %[[OFCALL]], 0
+  // CHECK: %[[OFRESULT:.+]] = extractvalue { i31, i1 } %[[OFCALL]], 1
+  // CHECK: %[[CHECK:.+]] = xor i1 %[[OFRESULT]], true
+  // CHECK: br i1 %[[CHECK]]
+  // CHECK: call void @__ubsan_handle_mul_overflow_abort
+}
+
+// CHECK: define void @_Z23UnsignedIntegerOverflowjU7_ExtIntILi23EEjU7_ExtIntILi35EEj
+void UnsignedIntegerOverflow(unsigned u,
+                             unsigned _ExtInt(23) SmallE,
+                             unsigned _ExtInt(35) BigE) {
+  u = SmallE + SmallE;
+  // CHECK: %[[LOADE1:.+]] = load i23, i23*
+  // CHECK: %[[LOADE2:.+]] = load i23, i23*
+  // CHECK: %[[OFCALL:.+]] = call { i23, i1 } @llvm.uadd.with.overflow.i23(i23 %[[LOADE1]], i23 %[[LOADE2]])
+  // CHECK: %[[EXRESULT:.+]] = extractvalue { i23, i1 } %[[OFCALL]], 0
+  // CHECK: %[[OFRESULT:.+]] = extractvalue { i23, i1 } %[[OFCALL]], 1
+  // CHECK: %[[CHECK:.+]] = xor i1 %[[OFRESULT]], true
+  // CHECK: br i1 %[[CHECK]]
+  // CHECK: call void @__ubsan_handle_add_overflow_abort
+
+  SmallE = u + u;
+  // CHECK: %[[LOADU1:.+]] = load i32, i32*
+  // CHECK: %[[LOADU2:.+]] = load i32, i32*
+  // CHECK: %[[OFCALL:.+]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %[[LOADU1]], i32 %[[LOADU2]])
+  // CHECK: %[[EXRESULT:.+]] = extractvalue { i32, i1 } %[[OFCALL]], 0
+  // CHECK: %[[OFRESULT:.+]] = extractvalue { i32, i1 } %[[OFCALL]], 1
+  // CHECK: %[[CHECK:.+]] = xor i1 %[[OFRESULT]], true
+  // CHECK: br i1 %[[CHECK]]
+  // CHECK: call void @__ubsan_handle_add_overflow_abort
+
+  SmallE = SmallE + SmallE;
+  // CHECK: %[[LOADE1:.+]] = load i23, i23*
+  // CHECK: %[[LOADE2:.+]] = load i23, i23*
+  // CHECK: %[[OFCALL:.+]] = call { i23, i1 } @llvm.uadd.with.overflow.i23(i23 %[[LOADE1]], i23 %[[LOADE2]])
+  // CHECK: %[[EXRESULT:.+]] = extractvalue { i23, i1 } %[[OFCALL]], 0
+  // CHECK: %[[OFRESULT:.+]] = extractvalue { i23, i1 } %[[OFCALL]], 1
+  // CHECK: %[[CHECK:.+]] = xor i1 %[[OFRESULT]], true
+  // CHECK: br i1 %[[CHECK]]
+  // CHECK: call void @__ubsan_handle_add_overflow_abort
+
+  SmallE = BigE + BigE;
+  // CHECK: %[[LOADE1:.+]] = load i35, i35*
+  // CHECK: %[[LOADE2:.+]] = load i35, i35*
+  // CHECK: %[[OFCALL:.+]] = call { i35, i1 } @llvm.uadd.with.overflow.i35(i35 %[[LOADE1]], i35 %[[LOADE2]])
+  // CHECK: %[[EXRESULT:.+]] = extractvalue { i35, i1 } %[[OFCALL]], 0
+  // CHECK: %[[OFRESULT:.+]] = extractvalue { i35, i1 } %[[OFCALL]], 1
+  // CHECK: %[[CHECK:.+]] = xor i1 %[[OFRESULT]], true
+  // CHECK: br i1 %[[CHECK]]
+  // CHECK: call void @__ubsan_handle_add_overflow_abort
+
+  BigE = BigE + BigE;
+  // CHECK: %[[LOADE1:.+]] = load i35, i35*
+  // CHECK: %[[LOADE2:.+]] = load i35, i35*
+  // CHECK: %[[OFCALL:.+]] = call { i35, i1 } @llvm.uadd.with.overflow.i35(i35 %[[LOADE1]], i35 %[[LOADE2]])
+  // CHECK: %[[EXRESULT:.+]] = extractvalue { i35, i1 } %[[OFCALL]], 0
+  // CHECK: %[[OFRESULT:.+]] = extractvalue { i35, i1 } %[[OFCALL]], 1
+  // CHECK: %[[CHECK:.+]] = xor i1 %[[OFRESULT]], true
+  // CHECK: br i1 %[[CHECK]]
+  // CHECK: call void @__ubsan_handle_add_overflow_abort
+}
diff --git a/clang/test/CodeGen/ext-int.c b/clang/test/CodeGen/ext-int.c
new file mode 100644
index 000000000000..ef48dd331652
--- /dev/null
+++ b/clang/test/CodeGen/ext-int.c
@@ -0,0 +1,44 @@
+// RUN: %clang_cc1 -triple x86_64-gnu-linux -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK
+// RUN: %clang_cc1 -triple x86_64-windows-pc -O3 -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK
+
+
+void GenericTest(_ExtInt(3) a, unsigned _ExtInt(3) b, _ExtInt(4) c) {
+  // CHECK: define {{.*}}void @GenericTest
+  int which = _Generic(a, _ExtInt(3): 1, unsigned _ExtInt(3) : 2, _ExtInt(4) : 3);
+  // CHECK: store i32 1
+  int which2 = _Generic(b, _ExtInt(3): 1, unsigned _ExtInt(3) : 2, _ExtInt(4) : 3);
+  // CHECK: store i32 2
+  int which3 = _Generic(c, _ExtInt(3): 1, unsigned _ExtInt(3) : 2, _ExtInt(4) : 3);
+  // CHECK: store i32 3
+}
+
+void VLATest(_ExtInt(3) A, _ExtInt(99) B, _ExtInt(123456) C) {
+  // CHECK: define {{.*}}void @VLATest
+  int AR1[A];
+  // CHECK: %[[A:.+]] = zext i3 %{{.+}} to i64
+  // CHECK: %[[VLA1:.+]] = alloca i32, i64 %[[A]]
+  int AR2[B];
+  // CHECK: %[[B:.+]] = trunc i99 %{{.+}} to i64
+  // CHECK: %[[VLA2:.+]] = alloca i32, i64 %[[B]]
+  int AR3[C];
+  // CHECK: %[[C:.+]] = trunc i123456 %{{.+}} to i64
+  // CHECK: %[[VLA3:.+]] = alloca i32, i64 %[[C]]
+}
+
+struct S {
+  _ExtInt(17) A;
+  _ExtInt(16777200) B;
+  _ExtInt(17) C;
+};
+
+void OffsetOfTest() {
+  // CHECK: define {{.*}}void @OffsetOfTest 
+  int A = __builtin_offsetof(struct S,A);
+  // CHECK: store i32 0, i32* %{{.+}}
+  int B = __builtin_offsetof(struct S,B);
+  // CHECK: store i32 8, i32* %{{.+}}
+  int C = __builtin_offsetof(struct S,C);
+  // CHECK: store i32 2097160, i32* %{{.+}}
+}
+
+
diff --git a/clang/test/CodeGenCXX/ext-int.cpp b/clang/test/CodeGenCXX/ext-int.cpp
new file mode 100644
index 000000000000..4e0c58fe1e40
--- /dev/null
+++ b/clang/test/CodeGenCXX/ext-int.cpp
@@ -0,0 +1,432 @@
+// RUN: %clang_cc1 -triple x86_64-gnu-linux -O3 -disable-llvm-passes -I%S -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,LIN,NoNewStructPathTBAA
+// RUN: %clang_cc1 -triple x86_64-gnu-linux -O3 -disable-llvm-passes -I%S -new-struct-path-tbaa -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,LIN,NewStructPathTBAA
+
+// RUN: %clang_cc1 -triple x86_64-windows-pc -O3 -disable-llvm-passes -I%S -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,WIN,NoNewStructPathTBAA
+// RUN: %clang_cc1 -triple x86_64-windows-pc -O3 -disable-llvm-passes -I%S -new-struct-path-tbaa -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,WIN,NewStructPathTBAA
+
+#include <typeinfo>
+
+// Ensure that the layout for these structs is the same as the normal bitfield
+// layouts.
+struct BitFieldsByte {
+  _ExtInt(7) A : 3;
+  _ExtInt(7) B : 3;
+  _ExtInt(7) C : 2;
+};
+// CHECK: %struct.BitFieldsByte = type { i8 }
+
+struct BitFieldsShort {
+  _ExtInt(15) A : 3;
+  _ExtInt(15) B : 3;
+  _ExtInt(15) C : 2;
+};
+// LIN: %struct.BitFieldsShort = type { i8, i8 }
+// WIN: %struct.BitFieldsShort = type { i16 }
+
+struct BitFieldsInt {
+  _ExtInt(31) A : 3;
+  _ExtInt(31) B : 3;
+  _ExtInt(31) C : 2;
+};
+// LIN: %struct.BitFieldsInt = type { i8, [3 x i8] }
+// WIN: %struct.BitFieldsInt = type { i32 }
+
+struct BitFieldsLong {
+  _ExtInt(63) A : 3;
+  _ExtInt(63) B : 3;
+  _ExtInt(63) C : 2;
+};
+// LIN: %struct.BitFieldsLong = type { i8, [7 x i8] }
+// WIN: %struct.BitFieldsLong = type { i64 }
+
+struct HasExtIntFirst {
+  _ExtInt(35) A;
+  int B;
+};
+// CHECK: %struct.HasExtIntFirst = type { i35, i32 }
+
+struct HasExtIntLast {
+  int A;
+  _ExtInt(35) B;
+};
+// CHECK: %struct.HasExtIntLast = type { i32, i35 }
+
+struct HasExtIntMiddle {
+  int A;
+  _ExtInt(35) B;
+  int C;
+};
+// CHECK: %struct.HasExtIntMiddle = type { i32, i35, i32 }
+
+// Force emitting of the above structs.
+void StructEmit() {
+  BitFieldsByte A;
+  BitFieldsShort B;
+  BitFieldsInt C;
+  BitFieldsLong D;
+
+  HasExtIntFirst E;
+  HasExtIntLast F;
+  HasExtIntMiddle G;
+}
+
+void BitfieldAssignment() {
+  // LIN: define void @_Z18BitfieldAssignmentv
+  // WIN: define dso_local void  @"?BitfieldAssignment@@YAXXZ"
+  BitFieldsByte B;
+  B.A = 3;
+  B.B = 2;
+  B.C = 1;
+  // First one is used for the lifetime start, skip that.
+  // CHECK: bitcast %struct.BitFieldsByte*
+  // CHECK: %[[BFType:.+]] = bitcast %struct.BitFieldsByte*
+  // CHECK: %[[LOADA:.+]] = load i8, i8* %[[BFType]]
+  // CHECK: %[[CLEARA:.+]] = and i8 %[[LOADA]], -8
+  // CHECK: %[[SETA:.+]] = or i8 %[[CLEARA]], 3
+  // CHECK: %[[BFType:.+]] = bitcast %struct.BitFieldsByte*
+  // CHECK: %[[LOADB:.+]] = load i8, i8* %[[BFType]]
+  // CHECK: %[[CLEARB:.+]] = and i8 %[[LOADB]], -57
+  // CHECK: %[[SETB:.+]] = or i8 %[[CLEARB]], 16
+  // CHECK: %[[BFType:.+]] = bitcast %struct.BitFieldsByte*
+  // CHECK: %[[LOADC:.+]] = load i8, i8* %[[BFType]]
+  // CHECK: %[[CLEARC:.+]] = and i8 %[[LOADC]], 63
+  // CHECK: %[[SETC:.+]] = or i8 %[[CLEARC]], 64
+}
+
+enum AsEnumUnderlyingType : _ExtInt(9) {
+  A,B,C
+};
+
+void UnderlyingTypeUsage(AsEnumUnderlyingType Param) {
+  // LIN: define void @_Z19UnderlyingTypeUsage20AsEnumUnderlyingType(i9 %
+  // WIN: define dso_local void @"?UnderlyingTypeUsage@@YAXW4AsEnumUnderlyingType@@@Z"(i9 %
+  AsEnumUnderlyingType Var;
+  // CHECK: alloca i9, align 2
+  // CHECK: store i9 %{{.*}}, align 2
+}
+
+unsigned _ExtInt(33) ManglingTestRetParam(unsigned _ExtInt(33) Param) {
+// LIN: define i33 @_Z20ManglingTestRetParamU7_ExtIntILi33EEj(i33 %
+// WIN: define dso_local i33 @"?ManglingTestRetParam@@YAU?$_UExtInt@$0CB@@__clang@@U12@@Z"(i33
+  return 0;
+}
+
+_ExtInt(33) ManglingTestRetParam(_ExtInt(33) Param) {
+// LIN: define i33 @_Z20ManglingTestRetParamU7_ExtIntILi33EEi(i33 %
+// WIN: define dso_local i33 @"?ManglingTestRetParam@@YAU?$_ExtInt@$0CB@@__clang@@U12@@Z"(i33
+  return 0;
+}
+
+template<typename T>
+void ManglingTestTemplateParam(T&);
+template<_ExtInt(99) T>
+void ManglingTestNTTP();
+
+void ManglingInstantiator() {
+  // LIN: define void @_Z20ManglingInstantiatorv()
+  // WIN: define dso_local void @"?ManglingInstantiator@@YAXXZ"()
+  _ExtInt(93) A;
+  ManglingTestTemplateParam(A);
+// LIN: call void @_Z25ManglingTestTemplateParamIU7_ExtIntILi93EEiEvRT_(i93*
+// WIN: call void @"??$ManglingTestTemplateParam@U?$_ExtInt@$0FN@@__clang@@@@YAXAEAU?$_ExtInt@$0FN@@__clang@@@Z"(i93*
+  constexpr _ExtInt(93) B = 993;
+  ManglingTestNTTP<38>();
+// LIN: call void @_Z16ManglingTestNTTPILU7_ExtIntILi99EEi38EEvv()
+// WIN: call void @"??$ManglingTestNTTP@$0CG@@@YAXXZ"()
+  ManglingTestNTTP<B>();
+// LIN: call void @_Z16ManglingTestNTTPILU7_ExtIntILi99EEi993EEvv()
+// WIN: call void @"??$ManglingTestNTTP@$0DOB@@@YAXXZ"()
+}
+
+void TakesVarargs(int i, ...) {
+  // LIN: define void @_Z12TakesVarargsiz(i32 %i, ...)
+  // WIN: define dso_local void @"?TakesVarargs@@YAXHZZ"(i32 %i, ...)
+
+  __builtin_va_list args;
+  // LIN: %[[ARGS:.+]] = alloca [1 x %struct.__va_list_tag]
+  // WIN: %[[ARGS:.+]] = alloca i8*
+  __builtin_va_start(args, i);
+  // LIN: %[[STARTAD:.+]] = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %[[ARGS]]
+  // LIN: %[[STARTAD1:.+]] = bitcast %struct.__va_list_tag* %[[STARTAD]] to i8*
+  // LIN: call void @llvm.va_start(i8* %[[STARTAD1]])
+  // WIN: %[[ARGSLLIFETIMESTART:.+]] = bitcast i8** %[[ARGS]] to i8*
+  // WIN: %[[ARGSSTART:.+]] = bitcast i8** %[[ARGS]] to i8*
+  // WIN: call void @llvm.va_start(i8* %[[ARGSSTART]])
+
+  _ExtInt(92) A = __builtin_va_arg(args, _ExtInt(92));
+  // LIN: %[[AD1:.+]] = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %[[ARGS]]
+  // LIN: %[[OFA_P1:.+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %[[AD1]], i32 0, i32 2
+  // LIN: %[[OFA1:.+]] = load i8*, i8** %[[OFA_P1]]
+  // LIN: %[[BC1:.+]] = bitcast i8* %[[OFA1]] to i92*
+  // LIN: %[[OFANEXT1:.+]] = getelementptr i8, i8* %[[OFA1]], i32 16
+  // LIN: store i8* %[[OFANEXT1]], i8** %[[OFA_P1]]
+  // LIN: %[[LOAD1:.+]] = load i92, i92* %[[BC1]]
+  // LIN: store i92 %[[LOAD1]], i92*
+  // WIN: %[[CUR1:.+]] = load i8*, i8** %[[ARGS]]
+  // WIN: %[[NEXT1:.+]] = getelementptr inbounds i8, i8* %[[CUR1]], i64 16
+  // WIN: store i8* %[[NEXT1]], i8** %[[ARGS]]
+  // WIN: %[[BC1:.+]] = bitcast i8* %[[CUR1]] to i92*
+  // WIN: %[[LOADV1:.+]] = load i92, i92* %[[BC1]]
+  // WIN: store i92 %[[LOADV1]], i92*
+
+  _ExtInt(31) B = __builtin_va_arg(args, _ExtInt(31));
+  // LIN: %[[AD2:.+]] = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %[[ARGS]]
+  // LIN: %[[OFA_P2:.+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %[[AD2]], i32 0, i32 2
+  // LIN: %[[OFA2:.+]] = load i8*, i8** %[[OFA_P2]]
+  // LIN: %[[BC2:.+]] = bitcast i8* %[[OFA2]] to i31*
+  // LIN: %[[OFANEXT2:.+]] = getelementptr i8, i8* %[[OFA2]], i32 8
+  // LIN: store i8* %[[OFANEXT2]], i8** %[[OFA_P2]]
+  // LIN: %[[LOAD2:.+]] = load i31, i31* %[[BC2]]
+  // LIN: store i31 %[[LOAD2]], i31*
+  // WIN: %[[CUR2:.+]] = load i8*, i8** %[[ARGS]]
+  // WIN: %[[NEXT2:.+]] = getelementptr inbounds i8, i8* %[[CUR2]], i64 8 
+  // WIN: store i8* %[[NEXT2]], i8** %[[ARGS]]
+  // WIN: %[[BC2:.+]] = bitcast i8* %[[CUR2]] to i31*
+  // WIN: %[[LOADV2:.+]] = load i31, i31* %[[BC2]]
+  // WIN: store i31 %[[LOADV2]], i31*
+
+  _ExtInt(16) C = __builtin_va_arg(args, _ExtInt(16));
+  // LIN: %[[AD3:.+]] = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %[[ARGS]]
+  // LIN: %[[OFA_P3:.+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %[[AD3]], i32 0, i32 2
+  // LIN: %[[OFA3:.+]] = load i8*, i8** %[[OFA_P3]]
+  // LIN: %[[BC3:.+]] = bitcast i8* %[[OFA3]] to i16*
+  // LIN: %[[OFANEXT3:.+]] = getelementptr i8, i8* %[[OFA3]], i32 8
+  // LIN: store i8* %[[OFANEXT3]], i8** %[[OFA_P3]]
+  // LIN: %[[LOAD3:.+]] = load i16, i16* %[[BC3]]
+  // LIN: store i16 %[[LOAD3]], i16*
+  // WIN: %[[CUR3:.+]] = load i8*, i8** %[[ARGS]]
+  // WIN: %[[NEXT3:.+]] = getelementptr inbounds i8, i8* %[[CUR3]], i64 8
+  // WIN: store i8* %[[NEXT3]], i8** %[[ARGS]]
+  // WIN: %[[BC3:.+]] = bitcast i8* %[[CUR3]] to i16*
+  // WIN: %[[LOADV3:.+]] = load i16, i16* %[[BC3]]
+  // WIN: store i16 %[[LOADV3]], i16*
+
+  _ExtInt(129) D = __builtin_va_arg(args, _ExtInt(129));
+  // LIN: %[[AD4:.+]] = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %[[ARGS]]
+  // LIN: %[[OFA_P4:.+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %[[AD4]], i32 0, i32 2
+  // LIN: %[[OFA4:.+]] = load i8*, i8** %[[OFA_P4]]
+  // LIN: %[[BC4:.+]] = bitcast i8* %[[OFA4]] to i129*
+  // LIN: %[[OFANEXT4:.+]] = getelementptr i8, i8* %[[OFA4]], i32 24
+  // LIN: store i8* %[[OFANEXT4]], i8** %[[OFA_P4]]
+  // LIN: %[[LOAD4:.+]] = load i129, i129* %[[BC4]]
+  // LIN: store i129 %[[LOAD4]], i129*
+  // WIN: %[[CUR4:.+]] = load i8*, i8** %[[ARGS]]
+  // WIN: %[[NEXT4:.+]] = getelementptr inbounds i8, i8* %[[CUR4]], i64 24 
+  // WIN: store i8* %[[NEXT4]], i8** %[[ARGS]]
+  // WIN: %[[BC4:.+]] = bitcast i8* %[[CUR4]] to i129*
+  // WIN: %[[LOADV4:.+]] = load i129, i129* %[[BC4]]
+  // WIN: store i129 %[[LOADV4]], i129*
+
+  _ExtInt(16777200) E = __builtin_va_arg(args, _ExtInt(16777200));
+  // LIN: %[[AD5:.+]] = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %[[ARGS]]
+  // LIN: %[[OFA_P5:.+]] = getelementptr inbounds %struct.__va_list_tag, %struct.__va_list_tag* %[[AD5]], i32 0, i32 2
+  // LIN: %[[OFA5:.+]] = load i8*, i8** %[[OFA_P5]]
+  // LIN: %[[BC5:.+]] = bitcast i8* %[[OFA5]] to i16777200*
+  // LIN: %[[OFANEXT5:.+]] = getelementptr i8, i8* %[[OFA5]], i32 2097152
+  // LIN: store i8* %[[OFANEXT5]], i8** %[[OFA_P5]]
+  // LIN: %[[LOAD5:.+]] = load i16777200, i16777200* %[[BC5]]
+  // LIN: store i16777200 %[[LOAD5]], i16777200*
+  // WIN: %[[CUR5:.+]] = load i8*, i8** %[[ARGS]]
+  // WIN: %[[NEXT5:.+]] = getelementptr inbounds i8, i8* %[[CUR5]], i64 2097152
+  // WIN: store i8* %[[NEXT5]], i8** %[[ARGS]]
+  // WIN: %[[BC5:.+]] = bitcast i8* %[[CUR5]] to i16777200*
+  // WIN: %[[LOADV5:.+]] = load i16777200, i16777200* %[[BC5]]
+  // WIN: store i16777200 %[[LOADV5]], i16777200*
+
+  __builtin_va_end(args);
+  // LIN: %[[ENDAD:.+]] = getelementptr inbounds [1 x %struct.__va_list_tag], [1 x %struct.__va_list_tag]* %[[ARGS]]
+  // LIN: %[[ENDAD1:.+]] = bitcast %struct.__va_list_tag* %[[ENDAD]] to i8*
+  // LIN: call void @llvm.va_end(i8* %[[ENDAD1]])
+  // WIN: %[[ARGSEND:.+]] = bitcast i8** %[[ARGS]] to i8*
+  // WIN: call void @llvm.va_end(i8* %[[ARGSEND]])
+}
+void typeid_tests() {
+  // LIN: define void @_Z12typeid_testsv()
+  // WIN: define dso_local void @"?typeid_tests@@YAXXZ"()
+  unsigned _ExtInt(33) U33_1, U33_2;
+  _ExtInt(33) S33_1, S33_2;
+  _ExtInt(32) S32_1, S32_2;
+
+ auto A = typeid(U33_1);
+ // LIN: call void @_ZNSt9type_infoC1ERKS_(%"class.std::type_info"* %{{.+}}, %"class.std::type_info"* dereferenceable(16) bitcast ({ i8*, i8* }* @_ZTIU7_ExtIntILi33EEj to %"class.std::type_info"*))
+ // WIN: call %"class.std::type_info"* @"??0type_info@std@@QEAA@AEBV01@@Z"(%"class.std::type_info"* %{{.+}}, %"class.std::type_info"* dereferenceable(16) bitcast (%rtti.TypeDescriptor28* @"??_R0U?$_UExtInt@$0CB@@__clang@@@8" to %"class.std::type_info"*))
+ auto B = typeid(U33_2);
+ // LIN: call void @_ZNSt9type_infoC1ERKS_(%"class.std::type_info"* %{{.+}}, %"class.std::type_info"* dereferenceable(16) bitcast ({ i8*, i8* }* @_ZTIU7_ExtIntILi33EEj to %"class.std::type_info"*))
+ // WIN:  call %"class.std::type_info"* @"??0type_info@std@@QEAA@AEBV01@@Z"(%"class.std::type_info"* %{{.+}}, %"class.std::type_info"* dereferenceable(16) bitcast (%rtti.TypeDescriptor28* @"??_R0U?$_UExtInt@$0CB@@__clang@@@8" to %"class.std::type_info"*))
+ auto C = typeid(S33_1);
+ // LIN: call void @_ZNSt9type_infoC1ERKS_(%"class.std::type_info"* %{{.+}}, %"class.std::type_info"* dereferenceable(16) bitcast ({ i8*, i8* }* @_ZTIU7_ExtIntILi33EEi to %"class.std::type_info"*))
+ // WIN:  call %"class.std::type_info"* @"??0type_info@std@@QEAA@AEBV01@@Z"(%"class.std::type_info"* %{{.+}}, %"class.std::type_info"* dereferenceable(16) bitcast (%rtti.TypeDescriptor27* @"??_R0U?$_ExtInt@$0CB@@__clang@@@8" to %"class.std::type_info"*))
+ auto D = typeid(S33_2);
+ // LIN: call void @_ZNSt9type_infoC1ERKS_(%"class.std::type_info"* %{{.+}}, %"class.std::type_info"* dereferenceable(16) bitcast ({ i8*, i8* }* @_ZTIU7_ExtIntILi33EEi to %"class.std::type_info"*))
+ // WIN:  call %"class.std::type_info"* @"??0type_info@std@@QEAA@AEBV01@@Z"(%"class.std::type_info"* %{{.+}}, %"class.std::type_info"* dereferenceable(16) bitcast (%rtti.TypeDescriptor27* @"??_R0U?$_ExtInt@$0CB@@__clang@@@8" to %"class.std::type_info"*))
+ auto E = typeid(S32_1);
+ // LIN: call void @_ZNSt9type_infoC1ERKS_(%"class.std::type_info"* %{{.+}}, %"class.std::type_info"* dereferenceable(16) bitcast ({ i8*, i8* }* @_ZTIU7_ExtIntILi32EEi to %"class.std::type_info"*))
+ // WIN:  call %"class.std::type_info"* @"??0type_info@std@@QEAA@AEBV01@@Z"(%"class.std::type_info"* %{{.+}}, %"class.std::type_info"* dereferenceable(16) bitcast (%rtti.TypeDescriptor27* @"??_R0U?$_ExtInt@$0CA@@__clang@@@8" to %"class.std::type_info"*))
+ auto F = typeid(S32_2);
+ // LIN: call void @_ZNSt9type_infoC1ERKS_(%"class.std::type_info"* %{{.+}}, %"class.std::type_info"* dereferenceable(16) bitcast ({ i8*, i8* }* @_ZTIU7_ExtIntILi32EEi to %"class.std::type_info"*))
+ // WIN:  call %"class.std::type_info"* @"??0type_info@std@@QEAA@AEBV01@@Z"(%"class.std::type_info"* %{{.+}}, %"class.std::type_info"* dereferenceable(16) bitcast (%rtti.TypeDescriptor27* @"??_R0U?$_ExtInt@$0CA@@__clang@@@8" to %"class.std::type_info"*))
+}
+
+void ExplicitCasts() {
+  // LIN: define void @_Z13ExplicitCastsv() 
+  // WIN: define dso_local void @"?ExplicitCasts@@YAXXZ"()
+
+  _ExtInt(33) a;
+  _ExtInt(31) b;
+  int i;
+
+  a = i;
+  // CHECK: %[[CONV:.+]] = sext i32 %{{.+}} to i33
+  b = i;
+  // CHECK: %[[CONV:.+]] = trunc i32 %{{.+}} to i31
+  i = a;
+  // CHECK: %[[CONV:.+]] = trunc i33 %{{.+}} to i32
+  i = b;
+  // CHECK: %[[CONV:.+]] = sext i31 %{{.+}} to i32
+}
+
+struct S {
+  _ExtInt(17) A;
+  _ExtInt(16777200) B;
+  _ExtInt(17) C;
+};
+
+void OffsetOfTest() {
+  // LIN: define void @_Z12OffsetOfTestv() 
+  // WIN: define dso_local void @"?OffsetOfTest@@YAXXZ"()
+
+  auto A = __builtin_offsetof(S,A);
+  // CHECK: store i64 0, i64* %{{.+}}
+  auto B = __builtin_offsetof(S,B);
+  // CHECK: store i64 8, i64* %{{.+}}
+  auto C = __builtin_offsetof(S,C);
+  // CHECK: store i64 2097160, i64* %{{.+}}
+}
+
+
+void ShiftExtIntByConstant(_ExtInt(28) Ext) {
+// LIN: define void @_Z21ShiftExtIntByConstantU7_ExtIntILi28EEi
+// WIN: define dso_local void @"?ShiftExtIntByConstant@@YAXU?$_ExtInt@$0BM@@__clang@@@Z"
+  Ext << 7;
+  // CHECK: shl i28 %{{.+}}, 7
+  Ext >> 7;
+  // CHECK: ashr i28 %{{.+}}, 7
+  Ext << -7;
+  // CHECK: shl i28 %{{.+}}, -7
+  Ext >> -7;
+  // CHECK: ashr i28 %{{.+}}, -7
+
+  // UB in C/C++, Defined in OpenCL.
+  Ext << 29;
+  // CHECK: shl i28 %{{.+}}, 29 
+  Ext >> 29;
+  // CHECK: ashr i28 %{{.+}}, 29
+}
+
+void ConstantShiftByExtInt(_ExtInt(28) Ext, _ExtInt(65) LargeExt) {
+  // LIN: define void @_Z21ConstantShiftByExtIntU7_ExtIntILi28EEiU7_ExtIntILi65EEi
+  // WIN: define dso_local void @"?ConstantShiftByExtInt@@YAXU?$_ExtInt@$0BM@@__clang@@U?$_ExtInt@$0EB@@2@@Z"
+  10 << Ext;
+  // CHECK: %[[PROMO:.+]] = zext i28 %{{.+}} to i32
+  // CHECK: shl i32 10, %[[PROMO]]
+  10 >> Ext;
+  // CHECK: %[[PROMO:.+]] = zext i28 %{{.+}} to i32
+  // CHECK: ashr i32 10, %[[PROMO]]
+  10 << LargeExt;
+  // CHECK: %[[PROMO:.+]] = trunc i65 %{{.+}} to i32
+  // CHECK: shl i32 10, %[[PROMO]]
+  10 >> LargeExt;
+  // CHECK: %[[PROMO:.+]] = trunc i65 %{{.+}} to i32
+  // CHECK: ashr i32 10, %[[PROMO]]
+}
+
+void Shift(_ExtInt(28) Ext, _ExtInt(65) LargeExt, int i) {
+  // LIN: define void @_Z5ShiftU7_ExtIntILi28EEiU7_ExtIntILi65EEii
+  // WIN: define dso_local void @"?Shift@@YAXU?$_ExtInt@$0BM@@__clang@@U?$_ExtInt@$0EB@@2@H@Z"
+  i << Ext;
+  // CHECK: %[[PROMO:.+]] = zext i28 %{{.+}} to i32
+  // CHECK: shl i32 {{.+}}, %[[PROMO]]
+  i >> Ext;
+  // CHECK: %[[PROMO:.+]] = zext i28 %{{.+}} to i32
+  // CHECK: ashr i32 {{.+}}, %[[PROMO]]
+
+  i << LargeExt;
+  // CHECK: %[[PROMO:.+]] = trunc i65 %{{.+}} to i32
+  // CHECK: shl i32 {{.+}}, %[[PROMO]]
+  i >> LargeExt;
+  // CHECK: %[[PROMO:.+]] = trunc i65 %{{.+}} to i32
+  // CHECK: ashr i32 {{.+}}, %[[PROMO]]
+
+  Ext << i;
+  // CHECK: %[[PROMO:.+]] = trunc i32 %{{.+}} to i28
+  // CHECK: shl i28 {{.+}}, %[[PROMO]]
+  Ext >> i;
+  // CHECK: %[[PROMO:.+]] = trunc i32 %{{.+}} to i28
+  // CHECK: ashr i28 {{.+}}, %[[PROMO]]
+
+  LargeExt << i;
+  // CHECK: %[[PROMO:.+]] = zext i32 %{{.+}} to i65
+  // CHECK: shl i65 {{.+}}, %[[PROMO]]
+  LargeExt >> i;
+  // CHECK: %[[PROMO:.+]] = zext i32 %{{.+}} to i65
+  // CHECK: ashr i65 {{.+}}, %[[PROMO]]
+
+  Ext << LargeExt;
+  // CHECK: %[[PROMO:.+]] = trunc i65 %{{.+}} to i28
+  // CHECK: shl i28 {{.+}}, %[[PROMO]]
+  Ext >> LargeExt;
+  // CHECK: %[[PROMO:.+]] = trunc i65 %{{.+}} to i28
+  // CHECK: ashr i28 {{.+}}, %[[PROMO]]
+
+  LargeExt << Ext;
+  // CHECK: %[[PROMO:.+]] = zext i28 %{{.+}} to i65
+  // CHECK: shl i65 {{.+}}, %[[PROMO]]
+  LargeExt >> Ext;
+  // CHECK: %[[PROMO:.+]] = zext i28 %{{.+}} to i65
+  // CHECK: ashr i65 {{.+}}, %[[PROMO]]
+}
+
+void ComplexTest(_Complex _ExtInt(12) first,
+                                 _Complex _ExtInt(33) second) {
+  // LIN: define void @_Z11ComplexTestCU7_ExtIntILi12EEiCU7_ExtIntILi33EEi
+  // WIN: define dso_local void  @"?ComplexTest@@YAXU?$_Complex@U?$_ExtInt@$0M@@__clang@@@__clang@@U?$_Complex@U?$_ExtInt@$0CB@@__clang@@@2@@Z"
+  first + second;
+  // CHECK: %[[FIRST_REALP:.+]] = getelementptr inbounds { i12, i12 }, { i12, i12 }* %{{.+}}, i32 0, i32 0
+  // CHECK: %[[FIRST_REAL:.+]] = load i12, i12* %[[FIRST_REALP]]
+  // CHECK: %[[FIRST_IMAGP:.+]] = getelementptr inbounds { i12, i12 }, { i12, i12 }* %{{.+}}, i32 0, i32 1
+  // CHECK: %[[FIRST_IMAG:.+]] = load i12, i12* %[[FIRST_IMAGP]]
+  // CHECK: %[[FIRST_REAL_CONV:.+]] = sext i12 %[[FIRST_REAL]]
+  // CHECK: %[[FIRST_IMAG_CONV:.+]] = sext i12 %[[FIRST_IMAG]]
+  // CHECK: %[[SECOND_REALP:.+]] = getelementptr inbounds { i33, i33 }, { i33, i33 }* %{{.+}}, i32 0, i32 0
+  // CHECK: %[[SECOND_REAL:.+]] = load i33, i33* %[[SECOND_REALP]]
+  // CHECK: %[[SECOND_IMAGP:.+]] = getelementptr inbounds { i33, i33 }, { i33, i33 }* %{{.+}}, i32 0, i32 1
+  // CHECK: %[[SECOND_IMAG:.+]] = load i33, i33* %[[SECOND_IMAGP]]
+  // CHECK: %[[REAL:.+]] = add i33 %[[FIRST_REAL_CONV]], %[[SECOND_REAL]]
+  // CHECK: %[[IMAG:.+]] = add i33 %[[FIRST_IMAG_CONV]], %[[SECOND_IMAG]]
+}
+
+// Ensure that these types don't alias the normal int types.
+void TBAATest(_ExtInt(sizeof(int) * 8) ExtInt,
+              unsigned _ExtInt(sizeof(int) * 8) ExtUInt,
+              _ExtInt(6) Other) {
+  // CHECK-DAG: store i32 %{{.+}}, i32* %{{.+}}, align 4, !tbaa ![[EXTINT_TBAA:.+]]
+  // CHECK-DAG: store i32 %{{.+}}, i32* %{{.+}}, align 4, !tbaa ![[EXTINT_TBAA]]
+  // CHECK-DAG: store i6 %{{.+}}, i6* %{{.+}}, align 1, !tbaa ![[EXTINT6_TBAA:.+]]
+  ExtInt = 5;
+  ExtUInt = 5;
+  Other = 5;
+}
+
+// NoNewStructPathTBAA-DAG: ![[CHAR_TBAA_ROOT:.+]] = !{!"omnipotent char", ![[TBAA_ROOT:.+]], i64 0}
+// NoNewStructPathTBAA-DAG: ![[TBAA_ROOT]] = !{!"Simple C++ TBAA"}
+// NoNewStructPathTBAA-DAG: ![[EXTINT_TBAA]] = !{![[EXTINT_TBAA_ROOT:.+]], ![[EXTINT_TBAA_ROOT]], i64 0}
+// NoNewStructPathTBAA-DAG: ![[EXTINT_TBAA_ROOT]] = !{!"_ExtInt(32)", ![[CHAR_TBAA_ROOT]], i64 0}
+// NoNewStructPathTBAA-DAG: ![[EXTINT6_TBAA]] = !{![[EXTINT6_TBAA_ROOT:.+]], ![[EXTINT6_TBAA_ROOT]], i64 0}
+// NoNewStructPathTBAA-DAG: ![[EXTINT6_TBAA_ROOT]] = !{!"_ExtInt(6)", ![[CHAR_TBAA_ROOT]], i64 0}
+
+// NewStructPathTBAA-DAG: ![[CHAR_TBAA_ROOT:.+]] = !{![[TBAA_ROOT:.+]], i64 1, !"omnipotent char"}
+// NewStructPathTBAA-DAG: ![[TBAA_ROOT]] = !{!"Simple C++ TBAA"}
+// NewStructPathTBAA-DAG: ![[EXTINT_TBAA]] = !{![[EXTINT_TBAA_ROOT:.+]], ![[EXTINT_TBAA_ROOT]], i64 0, i64 4}
+// NewStructPathTBAA-DAG: ![[EXTINT_TBAA_ROOT]] = !{![[CHAR_TBAA_ROOT]], i64 4, !"_ExtInt(32)"}
+// NewStructPathTBAA-DAG: ![[EXTINT6_TBAA]] = !{![[EXTINT6_TBAA_ROOT:.+]], ![[EXTINT6_TBAA_ROOT]], i64 0, i64 1}
+// NewStructPathTBAA-DAG: ![[EXTINT6_TBAA_ROOT]] = !{![[CHAR_TBAA_ROOT]], i64 1, !"_ExtInt(6)"}
diff --git a/clang/test/CodeGenOpenCL/ext-int-shift.cl b/clang/test/CodeGenOpenCL/ext-int-shift.cl
new file mode 100644
index 000000000000..4d2292daac77
--- /dev/null
+++ b/clang/test/CodeGenOpenCL/ext-int-shift.cl
@@ -0,0 +1,21 @@
+// RUN: %clang -cc1 -triple x86_64-linux-pc -O3 -disable-llvm-passes %s -emit-llvm -o - | FileCheck %s
+
+void Shifts(_ExtInt(12) E, int i) {
+  E << 99;
+  // CHECK: shl i12 %{{.+}}, 3
+
+  77 << E;
+  // CHECK: %[[PROM:.+]] = zext i12 %{{.+}} to i32
+  // CHECK: %[[MASK:.+]] = and i32 %[[PROM]], 31
+  // CHECK: shl i32 77, %[[MASK]]
+
+  E << i;
+  // CHECK: %[[PROM:.+]] = trunc i32 %{{.+}} to i12
+  // CHECK: %[[MASK:.+]] = urem i12 %[[PROM]], 12
+  // CHECK: shl i12 %{{.+}}, %[[MASK]]
+
+  i << E;
+  // CHECK: %[[PROM:.+]] = zext i12 %{{.+}} to i32
+  // CHECK: %[[MASK:.+]] = and i32 %[[PROM]], 31
+  // CHECK: shl i32 %{{.+}}, %[[MASK]]
+}
diff --git a/clang/test/Parser/ext-int.cpp b/clang/test/Parser/ext-int.cpp
new file mode 100644
index 000000000000..4926d5190587
--- /dev/null
+++ b/clang/test/Parser/ext-int.cpp
@@ -0,0 +1,15 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s
+
+// expected-error@+5{{expected ')'}}
+// expected-note@+4{{to match this '('}}
+// expected-error@+3{{expected unqualified-id}}
+// expected-error@+2{{extraneous closing brace}}
+// expected-error@+1{{C++ requires a type specifier for all declarations}}
+_ExtInt(32} a;
+// expected-error@+2{{expected expression}}
+// expected-error@+1{{C++ requires a type specifier for all declarations}}
+_ExtInt(32* ) b;
+// expected-error@+3{{expected '('}}
+// expected-error@+2{{expected unqualified-id}}
+// expected-error@+1{{C++ requires a type specifier for all declarations}}
+_ExtInt{32} c;
diff --git a/clang/test/SemaCXX/ext-int.cpp b/clang/test/SemaCXX/ext-int.cpp
new file mode 100644
index 000000000000..6a06280dceec
--- /dev/null
+++ b/clang/test/SemaCXX/ext-int.cpp
@@ -0,0 +1,278 @@
+// RUN: %clang_cc1 -fsyntax-only -verify %s -Wimplicit-int-conversion
+
+template<int Bounds>
+struct HasExtInt {
+  _ExtInt(Bounds) b;
+  unsigned _ExtInt(Bounds) b2;
+};
+
+// Delcaring variables:
+_ExtInt(33) Declarations(_ExtInt(48) &Param) { // Useable in params and returns.
+  short _ExtInt(43) a; // expected-error {{'short _ExtInt' is invalid}}
+  _ExtInt(43) long b;  // expected-error {{'long _ExtInt' is invalid}}
+
+  // These should all be fine:
+  const _ExtInt(5) c = 3;
+  const unsigned _ExtInt(5) d; // expected-error {{default initialization of an object of const type 'const unsigned _ExtInt(5)'}}
+  unsigned _ExtInt(5) e = 5;
+  _ExtInt(5) unsigned f;
+
+  _ExtInt(-3) g; // expected-error{{signed _ExtInt must have a bit size of at least 2}}
+  _ExtInt(0) h; // expected-error{{signed _ExtInt must have a bit size of at least 2}}
+  _ExtInt(1) i; // expected-error{{signed _ExtInt must have a bit size of at least 2}}
+  _ExtInt(2) j;;
+  unsigned _ExtInt(0) k;// expected-error{{unsigned _ExtInt must have a bit size of at least 1}}
+  unsigned _ExtInt(1) l;
+  signed _ExtInt(1) m; // expected-error{{signed _ExtInt must have a bit size of at least 2}}
+
+  constexpr _ExtInt(6) n = 33; // expected-warning{{implicit conversion from 'int' to 'const _ExtInt(6)' changes value from 33 to -31}}
+  constexpr _ExtInt(7) o = 33;
+
+  // Check LLVM imposed max size.
+  _ExtInt(0xFFFFFFFFFF) p; // expected-error {{signed _ExtInt of bit sizes greater than 16777215 not supported}}
+  unsigned _ExtInt(0xFFFFFFFFFF) q; // expected-error {{unsigned _ExtInt of bit sizes greater than 16777215 not supported}}
+
+// Ensure template params are instantiated correctly.
+  // expected-error@5{{signed _ExtInt must have a bit size of at least 2}}
+  // expected-error@6{{unsigned _ExtInt must have a bit size of at least 1}}
+  // expected-note@+1{{in instantiation of template class }}
+  HasExtInt<-1> r;
+  // expected-error@5{{signed _ExtInt must have a bit size of at least 2}}
+  // expected-error@6{{unsigned _ExtInt must have a bit size of at least 1}}
+  // expected-note@+1{{in instantiation of template class }}
+  HasExtInt<0> s;
+  // expected-error@5{{signed _ExtInt must have a bit size of at least 2}}
+  // expected-note@+1{{in instantiation of template class }}
+  HasExtInt<1> t;
+  HasExtInt<2> u;
+
+  _ExtInt(-3.0) v; // expected-error {{integral constant expression must have integral or unscoped enumeration type, not 'double'}}
+  _ExtInt(3.0) x; // expected-error {{integral constant expression must have integral or unscoped enumeration type, not 'double'}}
+
+  return 0;
+}
+
+template <_ExtInt(5) I>
+struct ExtIntTemplParam {
+  static constexpr _ExtInt(5) Var = I;
+};
+
+template<typename T>
+void deduced_whole_type(T){}
+template<int I>
+void deduced_bound(_ExtInt(I)){}
+
+// Ensure ext-int can be used in template places.
+void Templates() {
+  ExtIntTemplParam<13> a;
+  constexpr _ExtInt(3) b = 1;
+  ExtIntTemplParam<b> c;
+  constexpr _ExtInt(9) d = 1;
+  ExtIntTemplParam<b> e;
+
+  deduced_whole_type(b);
+  deduced_bound(b);
+}
+
+template <typename T, typename U>
+struct is_same {
+  static constexpr bool value = false;
+};
+template <typename T>
+struct is_same<T,T> {
+  static constexpr bool value = true;
+};
+
+// Reject vector types:
+// expected-error@+1{{invalid vector element type '_ExtInt(32)'}}
+typedef _ExtInt(32) __attribute__((vector_size(16))) VecTy;
+
+// Allow _Complex:
+_Complex _ExtInt(3) Cmplx;
+
+// Reject cases of _Atomic:
+// expected-error@+1{{_Atomic cannot be applied to integer type '_ExtInt(4)' with less than 1 byte of precision}}
+_Atomic _ExtInt(4) TooSmallAtomic;
+// expected-error@+1{{_Atomic cannot be applied to integer type '_ExtInt(9)' with a non power of 2 precision}}
+_Atomic _ExtInt(9) NotPow2Atomic;
+_Atomic _ExtInt(128) JustRightAtomic;
+
+// Test result types of Unary/Bitwise/Binary Operations:
+void Ops() {
+  _ExtInt(43) x43_s = 1, y43_s = 1;
+  _ExtInt(sizeof(int) * 8) x32_s = 1, y32_s = 1;
+  unsigned _ExtInt(sizeof(unsigned) * 8) x32_u = 1, y32_u = 1;
+  _ExtInt(4) x4_s = 1, y4_s = 1;
+  unsigned _ExtInt(43) x43_u = 1, y43_u = 1;
+  unsigned _ExtInt(4) x4_u = 1, y4_u = 1;
+  int x_int = 1, y_int = 1;
+  unsigned x_uint = 1, y_uint = 1;
+  bool b;
+
+  // Disabling mixed conversions:
+  // Signed/unsigned mixed.
+  // expected-error@+1{{invalid operands to binary expression}}
+  x43_u + y43_s;
+  // expected-error@+1{{invalid operands to binary expression}}
+  x4_s - y4_u;
+  // expected-error@+1{{invalid operands to binary expression}}
+  x43_s * y43_u;
+  // expected-error@+1{{invalid operands to binary expression}}
+  x4_u / y4_s;
+
+  // Different Sizes.
+  // expected-error@+1{{invalid operands to binary expression}}
+  x43_s + y4_s;
+  // expected-error@+1{{invalid operands to binary expression}}
+  x43_s - y4_u;
+  // expected-error@+1{{invalid operands to binary expression}}
+  x43_u * y4_u;
+  // expected-error@+1{{invalid operands to binary expression}}
+  x4_u / y43_u;
+
+  // Mixed with standard types.
+  // expected-error@+1{{invalid operands to binary expression}}
+  x43_s + x_int;
+  // expected-error@+1{{invalid operands to binary expression}}
+  x43_u - x_int;
+  // expected-error@+1{{invalid operands to binary expression}}
+  x32_s * x_int;
+  // expected-error@+1{{invalid operands to binary expression}}
+  x32_u / x_int;
+  // expected-error@+1{{invalid operands to binary expression}}
+  x32_s * x_uint;
+  // expected-error@+1{{invalid operands to binary expression}}
+  x32_u / x_uint;
+  // expected-error@+1{{invalid operands to binary expression}}
+  x4_s + x_int;
+  // expected-error@+1{{invalid operands to binary expression}}
+  x4_u - x_int;
+  // expected-error@+1{{invalid operands to binary expression}}
+  x4_s + b;
+  // expected-error@+1{{invalid operands to binary expression}}
+  x4_u - b;
+  // expected-error@+1{{invalid operands to binary expression}}
+  x43_s + b;
+  // expected-error@+1{{invalid operands to binary expression}}
+  x43_u - b;
+
+  // Bitwise checks.
+  // expected-error@+1{{invalid operands to binary expression}}
+  x43_s % y4_u;
+  // expected-error@+1{{invalid operands to binary expression}}
+  x43_u % y4_s;
+  // expected-error@+1{{invalid operands to binary expression}}
+  x4_s | y43_u;
+  // expected-error@+1{{invalid operands to binary expression}}
+  x4_u | y43_s;
+
+  // compassign.
+  // expected-error@+1{{invalid operands to binary expression}}
+  x43_s += 33;
+
+  // Comparisons.
+  // expected-error@+1{{invalid operands to binary expression}}
+  x43_s > 33;
+  // expected-error@+1{{invalid operands to binary expression}}
+  x4_s > 33;
+
+  // Same size/sign ops don't change type.
+  static_assert(is_same<decltype(x43_s + y43_s), _ExtInt(43)>::value,"");
+  static_assert(is_same<decltype(x4_s - y4_s), _ExtInt(4)>::value,"");
+  static_assert(is_same<decltype(x43_u * y43_u), unsigned _ExtInt(43)>::value,"");
+  static_assert(is_same<decltype(x4_u / y4_u), unsigned _ExtInt(4)>::value,"");
+
+  // Unary ops shouldn't go through integer promotions.
+  static_assert(is_same<decltype(~x43_s), _ExtInt(43)>::value,"");
+  static_assert(is_same<decltype(~x4_s), _ExtInt(4)>::value,"");
+  static_assert(is_same<decltype(+x43_s), _ExtInt(43)>::value,"");
+  static_assert(is_same<decltype(+x4_s), _ExtInt(4)>::value,"");
+  static_assert(is_same<decltype(-x43_u), unsigned _ExtInt(43)>::value,"");
+  static_assert(is_same<decltype(-x4_u), unsigned _ExtInt(4)>::value,"");
+  // expected-warning@+1{{expression with side effects has no effect in an unevaluated context}}
+  static_assert(is_same<decltype(++x43_s), _ExtInt(43)&>::value,"");
+  // expected-warning@+1{{expression with side effects has no effect in an unevaluated context}}
+  static_assert(is_same<decltype(--x4_s), _ExtInt(4)&>::value,"");
+  // expected-warning@+1{{expression with side effects has no effect in an unevaluated context}}
+  static_assert(is_same<decltype(x43_s--), _ExtInt(43)>::value,"");
+  // expected-warning@+1{{expression with side effects has no effect in an unevaluated context}}
+  static_assert(is_same<decltype(x4_s++), _ExtInt(4)>::value,"");
+  static_assert(is_same<decltype(x4_s >> 1), _ExtInt(4)>::value,"");
+  static_assert(is_same<decltype(x4_u << 1), unsigned _ExtInt(4)>::value,"");
+
+  static_assert(sizeof(x43_s) == 8, "");
+  static_assert(sizeof(x4_s) == 1, "");
+
+  static_assert(sizeof(_ExtInt(3340)) == 424, ""); // 424 * 8 == 3392.
+  static_assert(sizeof(_ExtInt(1049)) == 136, ""); // 136  *  8 == 1088.
+
+  static_assert(alignof(decltype(x43_s)) == 8, "");
+  static_assert(alignof(decltype(x4_s)) == 1, "");
+
+  static_assert(alignof(_ExtInt(3340)) == 8, "");
+  static_assert(alignof(_ExtInt(1049)) == 8, "");
+}
+
+constexpr int func() { return 42;}
+
+void ConstexprBitsize() {
+  _ExtInt(func()) F;
+  static_assert(is_same<decltype(F), _ExtInt(42)>::value, "");
+}
+
+// Useable as an underlying type.
+enum AsEnumUnderlyingType : _ExtInt(33) {
+};
+
+void overloaded(int);
+void overloaded(_ExtInt(32));
+void overloaded(_ExtInt(33));
+void overloaded(short);
+//expected-note@+1{{candidate function}}
+void overloaded2(_ExtInt(32));
+//expected-note@+1{{candidate function}}
+void overloaded2(_ExtInt(33));
+//expected-note@+1{{candidate function}}
+void overloaded2(short);
+
+void overload_use() {
+  int i;
+  _ExtInt(32) i32;
+  _ExtInt(33) i33;
+  short s;
+
+  // All of these get their corresponding exact matches.
+  overloaded(i);
+  overloaded(i32);
+  overloaded(i33);
+  overloaded(s);
+
+  overloaded2(i); // expected-error{{call to 'overloaded2' is ambiguous}}
+
+  overloaded2(i32);
+
+  overloaded2(s);
+}
+
+// no errors expected, this should 'just work'.
+struct UsedAsBitField {
+  _ExtInt(3) F : 3;
+  _ExtInt(3) G : 3;
+  _ExtInt(3) H : 3;
+};
+
+// expected-error@+1{{mode attribute only supported for integer and floating-point types}}
+typedef _ExtInt(33) IllegalMode __attribute__((mode(DI)));
+
+void ImplicitCasts(_ExtInt(31) s31, _ExtInt(33) s33, int i) {
+  // expected-warning@+1{{implicit conversion loses integer precision}}
+  s31 = i;
+  // expected-warning@+1{{implicit conversion loses integer precision}}
+  s31 = s33;
+  s33 = i;
+  s33 = s31;
+  i = s31;
+  // expected-warning@+1{{implicit conversion loses integer precision}}
+  i = s33;
+}
+
diff --git a/clang/tools/libclang/CIndex.cpp b/clang/tools/libclang/CIndex.cpp
index fc9d8db62b2d..dafe4ccda05f 100644
--- a/clang/tools/libclang/CIndex.cpp
+++ b/clang/tools/libclang/CIndex.cpp
@@ -1793,6 +1793,8 @@ DEFAULT_TYPELOC_IMPL(Enum, TagType)
 DEFAULT_TYPELOC_IMPL(SubstTemplateTypeParm, Type)
 DEFAULT_TYPELOC_IMPL(SubstTemplateTypeParmPack, Type)
 DEFAULT_TYPELOC_IMPL(Auto, Type)
+DEFAULT_TYPELOC_IMPL(ExtInt, Type)
+DEFAULT_TYPELOC_IMPL(DependentExtInt, Type)
 
 bool CursorVisitor::VisitCXXRecordDecl(CXXRecordDecl *D) {
   // Visit the nested-name-specifier, if present.
diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
index 0a3e8735c448..cbd8e78695f4 100644
--- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
+++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
@@ -3987,6 +3987,9 @@ TypeSystemClang::GetTypeClass(lldb::opaque_compiler_type_t type) {
   case clang::Type::Vector:
     return lldb::eTypeClassVector;
   case clang::Type::Builtin:
+  // Ext-Int is just an integer type.
+  case clang::Type::ExtInt:
+  case clang::Type::DependentExtInt:
     return lldb::eTypeClassBuiltin;
   case clang::Type::ObjCObjectPointer:
     return lldb::eTypeClassObjCObjectPointer;
@@ -4661,6 +4664,11 @@ lldb::Encoding TypeSystemClang::GetEncoding(lldb::opaque_compiler_type_t type,
     // TODO: Set this to more than one???
     break;
 
+  case clang::Type::ExtInt:
+  case clang::Type::DependentExtInt:
+    return qual_type->isUnsignedIntegerType() ? lldb::eEncodingUint
+                                              : lldb::eEncodingSint;
+
   case clang::Type::Builtin:
     switch (llvm::cast<clang::BuiltinType>(qual_type)->getKind()) {
     case clang::BuiltinType::Void:
@@ -4915,6 +4923,11 @@ lldb::Format TypeSystemClang::GetFormat(lldb::opaque_compiler_type_t type) {
   case clang::Type::Vector:
     break;
 
+  case clang::Type::ExtInt:
+  case clang::Type::DependentExtInt:
+    return qual_type->isUnsignedIntegerType() ? lldb::eFormatUnsigned
+                                              : lldb::eFormatDecimal;
+
   case clang::Type::Builtin:
     switch (llvm::cast<clang::BuiltinType>(qual_type)->getKind()) {
     case clang::BuiltinType::UnknownAny:
@@ -7358,7 +7371,7 @@ clang::CXXMethodDecl *TypeSystemClang::AddMethodToCXXRecordType(
                     function_type->getReturnType())));
         cxx_conversion_decl->setType(method_qual_type);
         cxx_conversion_decl->setInlineSpecified(is_inline);
-        cxx_conversion_decl->setExplicitSpecifier(explicit_spec);        
+        cxx_conversion_decl->setExplicitSpecifier(explicit_spec);
         cxx_conversion_decl->setConstexprKind(CSK_unspecified);
         cxx_method_decl = cxx_conversion_decl;
       }

From dd24fb388ba82fec4bf610b19e9cf3d1e3535a16 Mon Sep 17 00:00:00 2001
From: Christopher Tetreault <ctetreau@quicinc.com>
Date: Fri, 17 Apr 2020 10:39:33 -0700
Subject: [PATCH 198/216] Clean up usages of asserting vector getters in Type

Summary:
Remove usages of asserting vector getters in Type in preparation for the
VectorType refactor. The existence of these functions complicates the
refactor while adding little value.

Reviewers: craig.topper, sdesmalen, efriedma, RKSimon

Reviewed By: efriedma

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D77264
---
 llvm/lib/Target/X86/X86InterleavedAccess.cpp  |  14 +-
 llvm/lib/Target/X86/X86PartialReduction.cpp   |  29 ++--
 .../X86/X86ShuffleDecodeConstantPool.cpp      |   8 +-
 .../lib/Target/X86/X86TargetTransformInfo.cpp | 153 +++++++++---------
 4 files changed, 107 insertions(+), 97 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InterleavedAccess.cpp b/llvm/lib/Target/X86/X86InterleavedAccess.cpp
index aa25cb9d2d08..de84874f715d 100644
--- a/llvm/lib/Target/X86/X86InterleavedAccess.cpp
+++ b/llvm/lib/Target/X86/X86InterleavedAccess.cpp
@@ -127,7 +127,7 @@ class X86InterleavedAccessGroup {
 
 bool X86InterleavedAccessGroup::isSupported() const {
   VectorType *ShuffleVecTy = Shuffles[0]->getType();
-  Type *ShuffleEltTy = ShuffleVecTy->getVectorElementType();
+  Type *ShuffleEltTy = ShuffleVecTy->getElementType();
   unsigned ShuffleElemSize = DL.getTypeSizeInBits(ShuffleEltTy);
   unsigned WideInstSize;
 
@@ -186,7 +186,7 @@ void X86InterleavedAccessGroup::decompose(
       DecomposedVectors.push_back(
           cast<ShuffleVectorInst>(Builder.CreateShuffleVector(
               Op0, Op1,
-              createSequentialMask(Indices[i], SubVecTy->getVectorNumElements(),
+              createSequentialMask(Indices[i], SubVecTy->getNumElements(),
                                    0))));
     return;
   }
@@ -727,8 +727,8 @@ bool X86InterleavedAccessGroup::lowerIntoOptimizedSequence() {
     // Try to generate target-sized register(/instruction).
     decompose(Inst, Factor, ShuffleTy, DecomposedVectors);
 
-    Type *ShuffleEltTy = Inst->getType();
-    unsigned NumSubVecElems = ShuffleEltTy->getVectorNumElements() / Factor;
+    auto *ShuffleEltTy = cast<VectorType>(Inst->getType());
+    unsigned NumSubVecElems = ShuffleEltTy->getNumElements() / Factor;
     // Perform matrix-transposition in order to compute interleaved
     // results by generating some sort of (optimized) target-specific
     // instructions.
@@ -756,8 +756,8 @@ bool X86InterleavedAccessGroup::lowerIntoOptimizedSequence() {
     return true;
   }
 
-  Type *ShuffleEltTy = ShuffleTy->getVectorElementType();
-  unsigned NumSubVecElems = ShuffleTy->getVectorNumElements() / Factor;
+  Type *ShuffleEltTy = ShuffleTy->getElementType();
+  unsigned NumSubVecElems = ShuffleTy->getNumElements() / Factor;
 
   // Lower the interleaved stores:
   //   1. Decompose the interleaved wide shuffle into individual shuffle
@@ -825,7 +825,7 @@ bool X86TargetLowering::lowerInterleavedStore(StoreInst *SI,
   assert(Factor >= 2 && Factor <= getMaxSupportedInterleaveFactor() &&
          "Invalid interleave factor");
 
-  assert(SVI->getType()->getVectorNumElements() % Factor == 0 &&
+  assert(SVI->getType()->getNumElements() % Factor == 0 &&
          "Invalid interleaved store");
 
   // Holds the indices of SVI that correspond to the starting index of each
diff --git a/llvm/lib/Target/X86/X86PartialReduction.cpp b/llvm/lib/Target/X86/X86PartialReduction.cpp
index 503ea777920b..3cc4d2682235 100644
--- a/llvm/lib/Target/X86/X86PartialReduction.cpp
+++ b/llvm/lib/Target/X86/X86PartialReduction.cpp
@@ -89,7 +89,7 @@ static bool isVectorReductionOp(const BinaryOperator &BO) {
     return false;
   }
 
-  unsigned ElemNum = BO.getType()->getVectorNumElements();
+  unsigned ElemNum = cast<VectorType>(BO.getType())->getNumElements();
   // Ensure the reduction size is a power of 2.
   if (!isPowerOf2_32(ElemNum))
     return false;
@@ -141,7 +141,7 @@ static bool isVectorReductionOp(const BinaryOperator &BO) {
         // ElemNumToReduce / 2 elements, and store the result in
         // ElemNumToReduce / 2 elements in another vector.
 
-        unsigned ResultElements = ShufInst->getType()->getVectorNumElements();
+        unsigned ResultElements = ShufInst->getType()->getNumElements();
         if (ResultElements < ElemNum)
           return false;
 
@@ -236,8 +236,8 @@ bool X86PartialReduction::tryMAddReplacement(Value *Op, BinaryOperator *Add) {
 
   IRBuilder<> Builder(Add);
 
-  Type *MulTy = Op->getType();
-  unsigned NumElts = MulTy->getVectorNumElements();
+  auto *MulTy = cast<VectorType>(Op->getType());
+  unsigned NumElts = MulTy->getNumElements();
 
   // Extract even elements and odd elements and add them together. This will
   // be pattern matched by SelectionDAG to pmaddwd. This instruction will be
@@ -272,11 +272,11 @@ bool X86PartialReduction::tryMAddPattern(BinaryOperator *BO) {
     return false;
 
   // Need at least 8 elements.
-  if (BO->getType()->getVectorNumElements() < 8)
+  if (cast<VectorType>(BO->getType())->getNumElements() < 8)
     return false;
 
   // Element type should be i32.
-  if (!BO->getType()->getVectorElementType()->isIntegerTy(32))
+  if (!cast<VectorType>(BO->getType())->getElementType()->isIntegerTy(32))
     return false;
 
   bool Changed = false;
@@ -305,7 +305,9 @@ bool X86PartialReduction::trySADReplacement(Value *Op, BinaryOperator *Add) {
   // Look for zero extend from i8.
   auto getZeroExtendedVal = [](Value *Op) -> Value * {
     if (auto *ZExt = dyn_cast<ZExtInst>(Op))
-      if (ZExt->getOperand(0)->getType()->getVectorElementType()->isIntegerTy(8))
+      if (cast<VectorType>(ZExt->getOperand(0)->getType())
+              ->getElementType()
+              ->isIntegerTy(8))
         return ZExt->getOperand(0);
 
     return nullptr;
@@ -319,8 +321,8 @@ bool X86PartialReduction::trySADReplacement(Value *Op, BinaryOperator *Add) {
 
   IRBuilder<> Builder(Add);
 
-  Type *OpTy = Op->getType();
-  unsigned NumElts = OpTy->getVectorNumElements();
+  auto *OpTy = cast<VectorType>(Op->getType());
+  unsigned NumElts = OpTy->getNumElements();
 
   unsigned IntrinsicNumElts;
   Intrinsic::ID IID;
@@ -371,7 +373,8 @@ bool X86PartialReduction::trySADReplacement(Value *Op, BinaryOperator *Add) {
   assert(isPowerOf2_32(NumSplits) && "Expected power of 2 splits");
   unsigned Stages = Log2_32(NumSplits);
   for (unsigned s = Stages; s > 0; --s) {
-    unsigned NumConcatElts = Ops[0]->getType()->getVectorNumElements() * 2;
+    unsigned NumConcatElts =
+        cast<VectorType>(Ops[0]->getType())->getNumElements() * 2;
     for (unsigned i = 0; i != 1U << (s - 1); ++i) {
       SmallVector<int, 64> ConcatMask(NumConcatElts);
       std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
@@ -381,13 +384,13 @@ bool X86PartialReduction::trySADReplacement(Value *Op, BinaryOperator *Add) {
 
   // At this point the final value should be in Ops[0]. Now we need to adjust
   // it to the final original type.
-  NumElts = OpTy->getVectorNumElements();
+  NumElts = cast<VectorType>(OpTy)->getNumElements();
   if (NumElts == 2) {
     // Extract down to 2 elements.
     Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ArrayRef<int>{0, 1});
   } else if (NumElts >= 8) {
     SmallVector<int, 32> ConcatMask(NumElts);
-    unsigned SubElts = Ops[0]->getType()->getVectorNumElements();
+    unsigned SubElts = cast<VectorType>(Ops[0]->getType())->getNumElements();
     for (unsigned i = 0; i != SubElts; ++i)
       ConcatMask[i] = i;
     for (unsigned i = SubElts; i != NumElts; ++i)
@@ -411,7 +414,7 @@ bool X86PartialReduction::trySADPattern(BinaryOperator *BO) {
 
   // TODO: There's nothing special about i32, any integer type above i16 should
   // work just as well.
-  if (!BO->getType()->getVectorElementType()->isIntegerTy(32))
+  if (!cast<VectorType>(BO->getType())->getElementType()->isIntegerTy(32))
     return false;
 
   bool Changed = false;
diff --git a/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp b/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
index 0a0322f9e13e..3408dab03656 100644
--- a/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
+++ b/llvm/lib/Target/X86/X86ShuffleDecodeConstantPool.cpp
@@ -36,17 +36,17 @@ static bool extractConstantMask(const Constant *C, unsigned MaskEltSizeInBits,
   //
   //   <4 x i32> <i32 -2147483648, i32 -2147483648,
   //              i32 -2147483648, i32 -2147483648>
-  Type *CstTy = C->getType();
-  if (!CstTy->isVectorTy())
+  auto *CstTy = dyn_cast<VectorType>(C->getType());
+  if (!CstTy)
     return false;
 
-  Type *CstEltTy = CstTy->getVectorElementType();
+  Type *CstEltTy = CstTy->getElementType();
   if (!CstEltTy->isIntegerTy())
     return false;
 
   unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits();
   unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits();
-  unsigned NumCstElts = CstTy->getVectorNumElements();
+  unsigned NumCstElts = CstTy->getNumElements();
 
   assert((CstSizeInBits % MaskEltSizeInBits) == 0 &&
          "Unaligned shuffle mask size");
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index 6aa5b27242a1..4530a6a0ab46 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -925,8 +925,9 @@ int X86TTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
   return BaseT::getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info);
 }
 
-int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
+int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *BaseTp, int Index,
                                Type *SubTp) {
+  auto *Tp = cast<VectorType>(BaseTp);
   // 64-bit packed float vectors (v2f32) are widened to type v4f32.
   // 64-bit packed integer vectors (v2i32) are widened to type v4i32.
   std::pair<int, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
@@ -958,18 +959,18 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
       // FIXME: Remove some of the alignment restrictions.
       // FIXME: We can use permq for 64-bit or larger extracts from 256-bit
       // vectors.
-      int OrigSubElts = SubTp->getVectorNumElements();
-      if (NumSubElts > OrigSubElts &&
-          (Index % OrigSubElts) == 0 && (NumSubElts % OrigSubElts) == 0 &&
+      int OrigSubElts = cast<VectorType>(SubTp)->getNumElements();
+      if (NumSubElts > OrigSubElts && (Index % OrigSubElts) == 0 &&
+          (NumSubElts % OrigSubElts) == 0 &&
           LT.second.getVectorElementType() ==
-            SubLT.second.getVectorElementType() &&
+              SubLT.second.getVectorElementType() &&
           LT.second.getVectorElementType().getSizeInBits() ==
-            Tp->getVectorElementType()->getPrimitiveSizeInBits()) {
+              Tp->getElementType()->getPrimitiveSizeInBits()) {
         assert(NumElts >= NumSubElts && NumElts > OrigSubElts &&
                "Unexpected number of elements!");
-        Type *VecTy = VectorType::get(Tp->getVectorElementType(),
+        Type *VecTy = VectorType::get(Tp->getElementType(),
                                       LT.second.getVectorNumElements());
-        Type *SubTy = VectorType::get(Tp->getVectorElementType(),
+        Type *SubTy = VectorType::get(Tp->getElementType(),
                                       SubLT.second.getVectorNumElements());
         int ExtractIndex = alignDown((Index % NumElts), NumSubElts);
         int ExtractCost = getShuffleCost(TTI::SK_ExtractSubvector, VecTy,
@@ -1031,8 +1032,8 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
     MVT LegalVT = LT.second;
     if (LegalVT.isVector() &&
         LegalVT.getVectorElementType().getSizeInBits() ==
-            Tp->getVectorElementType()->getPrimitiveSizeInBits() &&
-        LegalVT.getVectorNumElements() < Tp->getVectorNumElements()) {
+            Tp->getElementType()->getPrimitiveSizeInBits() &&
+        LegalVT.getVectorNumElements() < Tp->getNumElements()) {
 
       unsigned VecTySize = DL.getTypeStoreSize(Tp);
       unsigned LegalVTSize = LegalVT.getStoreSize();
@@ -1041,8 +1042,8 @@ int X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, Type *Tp, int Index,
       // Number of destination vectors after legalization:
       unsigned NumOfDests = LT.first;
 
-      Type *SingleOpTy = VectorType::get(Tp->getVectorElementType(),
-                                         LegalVT.getVectorNumElements());
+      Type *SingleOpTy =
+          VectorType::get(Tp->getElementType(), LegalVT.getVectorNumElements());
 
       unsigned NumOfShuffles = (NumOfSrcs - 1) * NumOfDests;
       return NumOfShuffles *
@@ -2675,7 +2676,7 @@ int X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
                                 const Instruction *I) {
   // Handle non-power-of-two vectors such as <3 x float>
   if (VectorType *VTy = dyn_cast<VectorType>(Src)) {
-    unsigned NumElem = VTy->getVectorNumElements();
+    unsigned NumElem = VTy->getNumElements();
 
     // Handle a few common cases:
     // <3 x float>
@@ -2725,7 +2726,7 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
     // To calculate scalar take the regular cost, without mask
     return getMemoryOpCost(Opcode, SrcTy, MaybeAlign(Alignment), AddressSpace);
 
-  unsigned NumElem = SrcVTy->getVectorNumElements();
+  unsigned NumElem = SrcVTy->getNumElements();
   VectorType *MaskTy =
       VectorType::get(Type::getInt8Ty(SrcVTy->getContext()), NumElem);
   if ((IsLoad && !isLegalMaskedLoad(SrcVTy, MaybeAlign(Alignment))) ||
@@ -2756,7 +2757,7 @@ int X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy,
             getShuffleCost(TTI::SK_PermuteTwoSrc, MaskTy, 0, nullptr);
 
   else if (LT.second.getVectorNumElements() > NumElem) {
-    VectorType *NewMaskTy = VectorType::get(MaskTy->getVectorElementType(),
+    VectorType *NewMaskTy = VectorType::get(MaskTy->getElementType(),
                                             LT.second.getVectorNumElements());
     // Expanding requires fill mask with zeroes
     Cost += getShuffleCost(TTI::SK_InsertSubvector, NewMaskTy, 0, MaskTy);
@@ -2861,12 +2862,14 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
 
   MVT MTy = LT.second;
 
+  auto *ValVTy = cast<VectorType>(ValTy);
+
   unsigned ArithmeticCost = 0;
   if (LT.first != 1 && MTy.isVector() &&
-      MTy.getVectorNumElements() < ValTy->getVectorNumElements()) {
+      MTy.getVectorNumElements() < ValVTy->getNumElements()) {
     // Type needs to be split. We need LT.first - 1 arithmetic ops.
-    Type *SingleOpTy = VectorType::get(ValTy->getVectorElementType(),
-                                       MTy.getVectorNumElements());
+    Type *SingleOpTy =
+        VectorType::get(ValVTy->getElementType(), MTy.getVectorNumElements());
     ArithmeticCost = getArithmeticInstrCost(Opcode, SingleOpTy);
     ArithmeticCost *= LT.first - 1;
   }
@@ -2930,13 +2933,13 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
   };
 
   // Handle bool allof/anyof patterns.
-  if (ValTy->getVectorElementType()->isIntegerTy(1)) {
+  if (ValVTy->getElementType()->isIntegerTy(1)) {
     unsigned ArithmeticCost = 0;
     if (LT.first != 1 && MTy.isVector() &&
-        MTy.getVectorNumElements() < ValTy->getVectorNumElements()) {
+        MTy.getVectorNumElements() < ValVTy->getNumElements()) {
       // Type needs to be split. We need LT.first - 1 arithmetic ops.
-      Type *SingleOpTy = VectorType::get(ValTy->getVectorElementType(),
-                                         MTy.getVectorNumElements());
+      Type *SingleOpTy =
+          VectorType::get(ValVTy->getElementType(), MTy.getVectorNumElements());
       ArithmeticCost = getArithmeticInstrCost(Opcode, SingleOpTy);
       ArithmeticCost *= LT.first - 1;
     }
@@ -2954,25 +2957,24 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
       if (const auto *Entry = CostTableLookup(SSE2BoolReduction, ISD, MTy))
         return ArithmeticCost + Entry->Cost;
 
-    return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwise);
+    return BaseT::getArithmeticReductionCost(Opcode, ValVTy, IsPairwise);
   }
 
-  unsigned NumVecElts = ValTy->getVectorNumElements();
-  unsigned ScalarSize = ValTy->getScalarSizeInBits();
+  unsigned NumVecElts = ValVTy->getNumElements();
+  unsigned ScalarSize = ValVTy->getScalarSizeInBits();
 
   // Special case power of 2 reductions where the scalar type isn't changed
   // by type legalization.
   if (!isPowerOf2_32(NumVecElts) || ScalarSize != MTy.getScalarSizeInBits())
-    return BaseT::getArithmeticReductionCost(Opcode, ValTy, IsPairwise);
+    return BaseT::getArithmeticReductionCost(Opcode, ValVTy, IsPairwise);
 
   unsigned ReductionCost = 0;
 
-  Type *Ty = ValTy;
+  auto *Ty = ValVTy;
   if (LT.first != 1 && MTy.isVector() &&
-      MTy.getVectorNumElements() < ValTy->getVectorNumElements()) {
+      MTy.getVectorNumElements() < ValVTy->getNumElements()) {
     // Type needs to be split. We need LT.first - 1 arithmetic ops.
-    Ty = VectorType::get(ValTy->getVectorElementType(),
-                         MTy.getVectorNumElements());
+    Ty = VectorType::get(ValVTy->getElementType(), MTy.getVectorNumElements());
     ReductionCost = getArithmeticInstrCost(Opcode, Ty);
     ReductionCost *= LT.first - 1;
     NumVecElts = MTy.getVectorNumElements();
@@ -2986,32 +2988,32 @@ int X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, Type *ValTy,
     NumVecElts /= 2;
     // If we're reducing from 256/512 bits, use an extract_subvector.
     if (Size > 128) {
-      Type *SubTy = VectorType::get(ValTy->getVectorElementType(), NumVecElts);
+      auto *SubTy = VectorType::get(ValVTy->getElementType(), NumVecElts);
       ReductionCost +=
           getShuffleCost(TTI::SK_ExtractSubvector, Ty, NumVecElts, SubTy);
       Ty = SubTy;
     } else if (Size == 128) {
       // Reducing from 128 bits is a permute of v2f64/v2i64.
-      Type *ShufTy;
-      if (ValTy->isFloatingPointTy())
-        ShufTy = VectorType::get(Type::getDoubleTy(ValTy->getContext()), 2);
+      VectorType *ShufTy;
+      if (ValVTy->isFloatingPointTy())
+        ShufTy = VectorType::get(Type::getDoubleTy(ValVTy->getContext()), 2);
       else
-        ShufTy = VectorType::get(Type::getInt64Ty(ValTy->getContext()), 2);
+        ShufTy = VectorType::get(Type::getInt64Ty(ValVTy->getContext()), 2);
       ReductionCost +=
           getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy, 0, nullptr);
     } else if (Size == 64) {
       // Reducing from 64 bits is a shuffle of v4f32/v4i32.
-      Type *ShufTy;
-      if (ValTy->isFloatingPointTy())
-        ShufTy = VectorType::get(Type::getFloatTy(ValTy->getContext()), 4);
+      VectorType *ShufTy;
+      if (ValVTy->isFloatingPointTy())
+        ShufTy = VectorType::get(Type::getFloatTy(ValVTy->getContext()), 4);
       else
-        ShufTy = VectorType::get(Type::getInt32Ty(ValTy->getContext()), 4);
+        ShufTy = VectorType::get(Type::getInt32Ty(ValVTy->getContext()), 4);
       ReductionCost +=
           getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy, 0, nullptr);
     } else {
       // Reducing from smaller size is a shift by immediate.
-      Type *ShiftTy = VectorType::get(
-          Type::getIntNTy(ValTy->getContext(), Size), 128 / Size);
+      auto *ShiftTy = VectorType::get(
+          Type::getIntNTy(ValVTy->getContext(), Size), 128 / Size);
       ReductionCost += getArithmeticInstrCost(
           Instruction::LShr, ShiftTy, TargetTransformInfo::OK_AnyValue,
           TargetTransformInfo::OK_UniformConstantValue,
@@ -3230,17 +3232,17 @@ int X86TTIImpl::getMinMaxReductionCost(Type *ValTy, Type *CondTy,
         return Entry->Cost;
   }
 
-  unsigned NumVecElts = ValTy->getVectorNumElements();
+  auto *ValVTy = cast<VectorType>(ValTy);
+  unsigned NumVecElts = ValVTy->getNumElements();
 
-  Type *Ty = ValTy;
+  auto *Ty = ValVTy;
   unsigned MinMaxCost = 0;
   if (LT.first != 1 && MTy.isVector() &&
-      MTy.getVectorNumElements() < ValTy->getVectorNumElements()) {
+      MTy.getVectorNumElements() < ValVTy->getNumElements()) {
     // Type needs to be split. We need LT.first - 1 operations ops.
-    Ty = VectorType::get(ValTy->getVectorElementType(),
-                         MTy.getVectorNumElements());
-    Type *SubCondTy = VectorType::get(CondTy->getVectorElementType(),
-                                      MTy.getVectorNumElements());
+    Ty = VectorType::get(ValVTy->getElementType(), MTy.getVectorNumElements());
+    Type *SubCondTy = VectorType::get(
+        cast<VectorType>(CondTy)->getElementType(), MTy.getVectorNumElements());
     MinMaxCost = getMinMaxCost(Ty, SubCondTy, IsUnsigned);
     MinMaxCost *= LT.first - 1;
     NumVecElts = MTy.getVectorNumElements();
@@ -3266,7 +3268,7 @@ int X86TTIImpl::getMinMaxReductionCost(Type *ValTy, Type *CondTy,
 
   // Special case power of 2 reductions where the scalar type isn't changed
   // by type legalization.
-  if (!isPowerOf2_32(ValTy->getVectorNumElements()) ||
+  if (!isPowerOf2_32(ValVTy->getNumElements()) ||
       ScalarSize != MTy.getScalarSizeInBits())
     return BaseT::getMinMaxReductionCost(ValTy, CondTy, IsPairwise, IsUnsigned);
 
@@ -3278,7 +3280,7 @@ int X86TTIImpl::getMinMaxReductionCost(Type *ValTy, Type *CondTy,
     NumVecElts /= 2;
     // If we're reducing from 256/512 bits, use an extract_subvector.
     if (Size > 128) {
-      Type *SubTy = VectorType::get(ValTy->getVectorElementType(), NumVecElts);
+      auto *SubTy = VectorType::get(ValVTy->getElementType(), NumVecElts);
       MinMaxCost +=
           getShuffleCost(TTI::SK_ExtractSubvector, Ty, NumVecElts, SubTy);
       Ty = SubTy;
@@ -3311,8 +3313,8 @@ int X86TTIImpl::getMinMaxReductionCost(Type *ValTy, Type *CondTy,
     }
 
     // Add the arithmetic op for this level.
-    Type *SubCondTy = VectorType::get(CondTy->getVectorElementType(),
-                                      Ty->getVectorNumElements());
+    auto *SubCondTy = VectorType::get(
+        cast<VectorType>(CondTy)->getElementType(), Ty->getNumElements());
     MinMaxCost += getMinMaxCost(Ty, SubCondTy, IsUnsigned);
   }
 
@@ -3519,7 +3521,7 @@ int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, Value *Ptr,
                                 unsigned Alignment, unsigned AddressSpace) {
 
   assert(isa<VectorType>(SrcVTy) && "Unexpected type in getGSVectorCost");
-  unsigned VF = SrcVTy->getVectorNumElements();
+  unsigned VF = cast<VectorType>(SrcVTy)->getNumElements();
 
   // Try to reduce index size from 64 bit (default for GEP)
   // to 32. It is essential for VF 16. If the index can't be reduced to 32, the
@@ -3540,8 +3542,8 @@ int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, Value *Ptr,
       if (isa<Constant>(GEP->getOperand(i)))
         continue;
       Type *IndxTy = GEP->getOperand(i)->getType();
-      if (IndxTy->isVectorTy())
-        IndxTy = IndxTy->getVectorElementType();
+      if (auto *IndexVTy = dyn_cast<VectorType>(IndxTy))
+        IndxTy = IndexVTy->getElementType();
       if ((IndxTy->getPrimitiveSizeInBits() == 64 &&
           !isa<SExtInst>(GEP->getOperand(i))) ||
          ++NumOfVarIndices > 1)
@@ -3589,7 +3591,7 @@ int X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, Value *Ptr,
 int X86TTIImpl::getGSScalarCost(unsigned Opcode, Type *SrcVTy,
                                 bool VariableMask, unsigned Alignment,
                                 unsigned AddressSpace) {
-  unsigned VF = SrcVTy->getVectorNumElements();
+  unsigned VF = cast<VectorType>(SrcVTy)->getNumElements();
 
   int MaskUnpackCost = 0;
   if (VariableMask) {
@@ -3628,10 +3630,11 @@ int X86TTIImpl::getGatherScatterOpCost(unsigned Opcode, Type *SrcVTy,
                                        unsigned Alignment,
                                        const Instruction *I = nullptr) {
   assert(SrcVTy->isVectorTy() && "Unexpected data type for Gather/Scatter");
-  unsigned VF = SrcVTy->getVectorNumElements();
+  unsigned VF = cast<VectorType>(SrcVTy)->getNumElements();
   PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
   if (!PtrTy && Ptr->getType()->isVectorTy())
-    PtrTy = dyn_cast<PointerType>(Ptr->getType()->getVectorElementType());
+    PtrTy = dyn_cast<PointerType>(
+        cast<VectorType>(Ptr->getType())->getElementType());
   assert(PtrTy && "Unexpected type for Ptr argument");
   unsigned AddressSpace = PtrTy->getAddressSpace();
 
@@ -3677,7 +3680,8 @@ bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy, MaybeAlign Alignment) {
     return false;
 
   // The backend can't handle a single element vector.
-  if (isa<VectorType>(DataTy) && DataTy->getVectorNumElements() == 1)
+  if (isa<VectorType>(DataTy) &&
+      cast<VectorType>(DataTy)->getNumElements() == 1)
     return false;
   Type *ScalarTy = DataTy->getScalarType();
 
@@ -3742,10 +3746,10 @@ bool X86TTIImpl::isLegalMaskedExpandLoad(Type *DataTy) {
     return false;
 
   // The backend can't handle a single element vector.
-  if (DataTy->getVectorNumElements() == 1)
+  if (cast<VectorType>(DataTy)->getNumElements() == 1)
     return false;
 
-  Type *ScalarTy = DataTy->getVectorElementType();
+  Type *ScalarTy = cast<VectorType>(DataTy)->getElementType();
 
   if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy())
     return true;
@@ -3781,8 +3785,8 @@ bool X86TTIImpl::isLegalMaskedGather(Type *DataTy, MaybeAlign Alignment) {
   // In this case we can reject non-power-of-2 vectors.
   // We also reject single element vectors as the type legalizer can't
   // scalarize it.
-  if (isa<VectorType>(DataTy)) {
-    unsigned NumElts = DataTy->getVectorNumElements();
+  if (auto *DataVTy = dyn_cast<VectorType>(DataTy)) {
+    unsigned NumElts = DataVTy->getNumElements();
     if (NumElts == 1 || !isPowerOf2_32(NumElts))
       return false;
   }
@@ -3921,8 +3925,8 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
     return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
                                              Alignment, AddressSpace);
 
-  unsigned VF = VecTy->getVectorNumElements() / Factor;
-  Type *ScalarTy = VecTy->getVectorElementType();
+  unsigned VF = cast<VectorType>(VecTy)->getNumElements() / Factor;
+  Type *ScalarTy = cast<VectorType>(VecTy)->getElementType();
 
   // Calculate the number of memory operations (NumOfMemOps), required
   // for load/store the VecTy.
@@ -3931,8 +3935,9 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX2(unsigned Opcode, Type *VecTy,
   unsigned NumOfMemOps = (VecTySize + LegalVTSize - 1) / LegalVTSize;
 
   // Get the cost of one memory operation.
-  Type *SingleMemOpTy = VectorType::get(VecTy->getVectorElementType(),
-                                        LegalVT.getVectorNumElements());
+  Type *SingleMemOpTy =
+      VectorType::get(cast<VectorType>(VecTy)->getElementType(),
+                      LegalVT.getVectorNumElements());
   unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy,
                                        MaybeAlign(Alignment), AddressSpace);
 
@@ -4031,12 +4036,13 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy,
   unsigned NumOfMemOps = (VecTySize + LegalVTSize - 1) / LegalVTSize;
 
   // Get the cost of one memory operation.
-  Type *SingleMemOpTy = VectorType::get(VecTy->getVectorElementType(),
-                                        LegalVT.getVectorNumElements());
+  Type *SingleMemOpTy =
+      VectorType::get(cast<VectorType>(VecTy)->getElementType(),
+                      LegalVT.getVectorNumElements());
   unsigned MemOpCost = getMemoryOpCost(Opcode, SingleMemOpTy,
                                        MaybeAlign(Alignment), AddressSpace);
 
-  unsigned VF = VecTy->getVectorNumElements() / Factor;
+  unsigned VF = cast<VectorType>(VecTy)->getNumElements() / Factor;
   MVT VT = MVT::getVectorVT(MVT::getVT(VecTy->getScalarType()), VF);
 
   if (Opcode == Instruction::Load) {
@@ -4068,8 +4074,9 @@ int X86TTIImpl::getInterleavedMemoryOpCostAVX512(unsigned Opcode, Type *VecTy,
 
     unsigned NumOfLoadsInInterleaveGrp =
         Indices.size() ? Indices.size() : Factor;
-    Type *ResultTy = VectorType::get(VecTy->getVectorElementType(),
-                                     VecTy->getVectorNumElements() / Factor);
+    Type *ResultTy =
+        VectorType::get(cast<VectorType>(VecTy)->getElementType(),
+                        cast<VectorType>(VecTy)->getNumElements() / Factor);
     unsigned NumOfResults =
         getTLI()->getTypeLegalizationCost(DL, ResultTy).first *
         NumOfLoadsInInterleaveGrp;
@@ -4139,7 +4146,7 @@ int X86TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
                                            bool UseMaskForCond,
                                            bool UseMaskForGaps) {
   auto isSupportedOnAVX512 = [](Type *VecTy, bool HasBW) {
-    Type *EltTy = VecTy->getVectorElementType();
+    Type *EltTy = cast<VectorType>(VecTy)->getElementType();
     if (EltTy->isFloatTy() || EltTy->isDoubleTy() || EltTy->isIntegerTy(64) ||
         EltTy->isIntegerTy(32) || EltTy->isPointerTy())
       return true;

From 681466f5e6412350a0b066791450e72325c2c074 Mon Sep 17 00:00:00 2001
From: Adrian Prantl <aprantl@apple.com>
Date: Thu, 16 Apr 2020 14:10:23 -0700
Subject: [PATCH 199/216] Allow lldb-test to combine -find with -dump-clang-ast

This patch threads an lldb::DescriptionLevel through the typesystem to
allow dumping the full Clang AST (level=verbose) of any lldb::Type in
addition to the human-readable source description (default
level=full). This type dumping interface is currently not exposed
through the SBAPI.

The application is to let lldb-test dump the clang AST of search
results. I need this to test lazy type completion of clang types in
subsequent patches.

Differential Revision: https://reviews.llvm.org/D78329
---
 lldb/include/lldb/Symbol/CompilerType.h       | 12 ++-
 lldb/include/lldb/Symbol/Type.h               |  3 +-
 lldb/include/lldb/Symbol/TypeMap.h            |  3 +-
 lldb/include/lldb/Symbol/TypeSystem.h         | 17 ++--
 .../TypeSystem/Clang/TypeSystemClang.cpp      | 89 +++++++++++--------
 .../TypeSystem/Clang/TypeSystemClang.h        |  8 +-
 lldb/source/Symbol/CompilerType.cpp           |  9 +-
 lldb/source/Symbol/Type.cpp                   |  4 +-
 lldb/source/Symbol/TypeMap.cpp                |  4 +-
 .../DWARF/Inputs/ModuleOwnership/A.h          |  1 +
 .../SymbolFile/DWARF/module-ownership.mm      | 21 +++--
 lldb/tools/lldb-test/lldb-test.cpp            | 44 ++++++---
 12 files changed, 139 insertions(+), 76 deletions(-)

diff --git a/lldb/include/lldb/Symbol/CompilerType.h b/lldb/include/lldb/Symbol/CompilerType.h
index b0a7953190f8..280966a327ec 100644
--- a/lldb/include/lldb/Symbol/CompilerType.h
+++ b/lldb/include/lldb/Symbol/CompilerType.h
@@ -371,9 +371,15 @@ class CompilerType {
                    size_t data_byte_size);
 
   /// Dump to stdout.
-  void DumpTypeDescription() const;
-
-  void DumpTypeDescription(Stream *s) const;
+  void DumpTypeDescription(lldb::DescriptionLevel level =
+                           lldb::eDescriptionLevelFull) const;
+
+  /// Print a description of the type to a stream. The exact implementation
+  /// varies, but the expectation is that eDescriptionLevelFull returns a
+  /// source-like representation of the type, whereas eDescriptionLevelVerbose
+  /// does a dump of the underlying AST if applicable.
+  void DumpTypeDescription(Stream *s, lldb::DescriptionLevel level =
+                                          lldb::eDescriptionLevelFull) const;
   /// \}
 
   bool GetValueAsScalar(const DataExtractor &data, lldb::offset_t data_offset,
diff --git a/lldb/include/lldb/Symbol/Type.h b/lldb/include/lldb/Symbol/Type.h
index dfff30029168..8735d016bb22 100644
--- a/lldb/include/lldb/Symbol/Type.h
+++ b/lldb/include/lldb/Symbol/Type.h
@@ -103,7 +103,8 @@ class Type : public std::enable_shared_from_this<Type>, public UserID {
   // they get an error.
   Type();
 
-  void Dump(Stream *s, bool show_context);
+  void Dump(Stream *s, bool show_context,
+            lldb::DescriptionLevel level = lldb::eDescriptionLevelFull);
 
   void DumpTypeName(Stream *s);
 
diff --git a/lldb/include/lldb/Symbol/TypeMap.h b/lldb/include/lldb/Symbol/TypeMap.h
index dd9dbc69f404..67bb65b5faec 100644
--- a/lldb/include/lldb/Symbol/TypeMap.h
+++ b/lldb/include/lldb/Symbol/TypeMap.h
@@ -26,7 +26,8 @@ class TypeMap {
 
   void Clear();
 
-  void Dump(Stream *s, bool show_context);
+  void Dump(Stream *s, bool show_context,
+            lldb::DescriptionLevel level = lldb::eDescriptionLevelFull);
 
   TypeMap FindTypes(ConstString name);
 
diff --git a/lldb/include/lldb/Symbol/TypeSystem.h b/lldb/include/lldb/Symbol/TypeSystem.h
index ba2bbfaf4650..e188f29354b8 100644
--- a/lldb/include/lldb/Symbol/TypeSystem.h
+++ b/lldb/include/lldb/Symbol/TypeSystem.h
@@ -374,11 +374,18 @@ class TypeSystem : public PluginInterface {
                              uint32_t bitfield_bit_offset,
                              ExecutionContextScope *exe_scope) = 0;
 
-  virtual void
-  DumpTypeDescription(lldb::opaque_compiler_type_t type) = 0; // Dump to stdout
-
-  virtual void DumpTypeDescription(lldb::opaque_compiler_type_t type,
-                                   Stream *s) = 0;
+  /// Dump the type to stdout.
+  virtual void DumpTypeDescription(
+      lldb::opaque_compiler_type_t type,
+      lldb::DescriptionLevel level = lldb::eDescriptionLevelFull) = 0;
+
+  /// Print a description of the type to a stream. The exact implementation
+  /// varies, but the expectation is that eDescriptionLevelFull returns a
+  /// source-like representation of the type, whereas eDescriptionLevelVerbose
+  /// does a dump of the underlying AST if applicable.
+  virtual void DumpTypeDescription(
+      lldb::opaque_compiler_type_t type, Stream *s,
+      lldb::DescriptionLevel level = lldb::eDescriptionLevelFull) = 0;
 
   // TODO: These methods appear unused. Should they be removed?
 
diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
index cbd8e78695f4..56c6e3502c0e 100644
--- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
+++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
@@ -8787,9 +8787,10 @@ void TypeSystemClang::DumpSummary(lldb::opaque_compiler_type_t type,
   }
 }
 
-void TypeSystemClang::DumpTypeDescription(lldb::opaque_compiler_type_t type) {
+void TypeSystemClang::DumpTypeDescription(lldb::opaque_compiler_type_t type,
+                                          lldb::DescriptionLevel level) {
   StreamFile s(stdout, false);
-  DumpTypeDescription(type, &s);
+  DumpTypeDescription(type, &s, level);
 
   CompilerType ct(this, type);
   const clang::Type *clang_type = ClangUtil::GetQualType(ct).getTypePtr();
@@ -8800,7 +8801,8 @@ void TypeSystemClang::DumpTypeDescription(lldb::opaque_compiler_type_t type) {
 }
 
 void TypeSystemClang::DumpTypeDescription(lldb::opaque_compiler_type_t type,
-                                          Stream *s) {
+                                          Stream *s,
+                                          lldb::DescriptionLevel level) {
   if (type) {
     clang::QualType qual_type =
         RemoveWrappingTypes(GetQualType(type), {clang::Type::Typedef});
@@ -8814,24 +8816,31 @@ void TypeSystemClang::DumpTypeDescription(lldb::opaque_compiler_type_t type,
     case clang::Type::ObjCInterface: {
       GetCompleteType(type);
 
-      const clang::ObjCObjectType *objc_class_type =
+      auto *objc_class_type =
           llvm::dyn_cast<clang::ObjCObjectType>(qual_type.getTypePtr());
       assert(objc_class_type);
-      if (objc_class_type) {
-        clang::ObjCInterfaceDecl *class_interface_decl =
+      if (!objc_class_type)
+        break;
+      clang::ObjCInterfaceDecl *class_interface_decl =
             objc_class_type->getInterface();
-        if (class_interface_decl) {
-          clang::PrintingPolicy policy = getASTContext().getPrintingPolicy();
-          class_interface_decl->print(llvm_ostrm, policy, s->GetIndentLevel());
-        }
-      }
+      if (!class_interface_decl)
+        break;
+      if (level == eDescriptionLevelVerbose)
+        class_interface_decl->dump(llvm_ostrm);
+      else
+        class_interface_decl->print(llvm_ostrm,
+                                    getASTContext().getPrintingPolicy(),
+                                    s->GetIndentLevel());
     } break;
 
     case clang::Type::Typedef: {
-      const clang::TypedefType *typedef_type =
-          qual_type->getAs<clang::TypedefType>();
-      if (typedef_type) {
-        const clang::TypedefNameDecl *typedef_decl = typedef_type->getDecl();
+      auto *typedef_type = qual_type->getAs<clang::TypedefType>();
+      if (!typedef_type)
+        break;
+      const clang::TypedefNameDecl *typedef_decl = typedef_type->getDecl();
+      if (level == eDescriptionLevelVerbose)
+        typedef_decl->dump(llvm_ostrm);
+      else {
         std::string clang_typedef_name(
             typedef_decl->getQualifiedNameAsString());
         if (!clang_typedef_name.empty()) {
@@ -8844,31 +8853,39 @@ void TypeSystemClang::DumpTypeDescription(lldb::opaque_compiler_type_t type,
     case clang::Type::Record: {
       GetCompleteType(type);
 
-      const clang::RecordType *record_type =
-          llvm::cast<clang::RecordType>(qual_type.getTypePtr());
+      auto *record_type = llvm::cast<clang::RecordType>(qual_type.getTypePtr());
       const clang::RecordDecl *record_decl = record_type->getDecl();
-      const clang::CXXRecordDecl *cxx_record_decl =
-          llvm::dyn_cast<clang::CXXRecordDecl>(record_decl);
-
-      if (cxx_record_decl)
-        cxx_record_decl->print(llvm_ostrm, getASTContext().getPrintingPolicy(),
-                               s->GetIndentLevel());
-      else
-        record_decl->print(llvm_ostrm, getASTContext().getPrintingPolicy(),
-                           s->GetIndentLevel());
+      if (level == eDescriptionLevelVerbose)
+        record_decl->dump(llvm_ostrm);
+      else {
+        if (auto *cxx_record_decl =
+                llvm::dyn_cast<clang::CXXRecordDecl>(record_decl))
+          cxx_record_decl->print(llvm_ostrm,
+                                 getASTContext().getPrintingPolicy(),
+                                 s->GetIndentLevel());
+        else
+          record_decl->print(llvm_ostrm, getASTContext().getPrintingPolicy(),
+                             s->GetIndentLevel());
+      }
     } break;
 
     default: {
-      const clang::TagType *tag_type =
-          llvm::dyn_cast<clang::TagType>(qual_type.getTypePtr());
-      if (tag_type) {
-        clang::TagDecl *tag_decl = tag_type->getDecl();
-        if (tag_decl)
-          tag_decl->print(llvm_ostrm, 0);
+      if (auto *tag_type =
+              llvm::dyn_cast<clang::TagType>(qual_type.getTypePtr())) {
+        if (clang::TagDecl *tag_decl = tag_type->getDecl()) {
+          if (level == eDescriptionLevelVerbose)
+            tag_decl->dump(llvm_ostrm);
+          else 
+            tag_decl->print(llvm_ostrm, 0);
+        }
       } else {
-        std::string clang_type_name(qual_type.getAsString());
-        if (!clang_type_name.empty())
-          s->PutCString(clang_type_name);
+        if (level == eDescriptionLevelVerbose)
+          qual_type->dump(llvm_ostrm);
+        else {
+          std::string clang_type_name(qual_type.getAsString());
+          if (!clang_type_name.empty())
+            s->PutCString(clang_type_name);
+        }
       }
     }
     }
@@ -8876,7 +8893,7 @@ void TypeSystemClang::DumpTypeDescription(lldb::opaque_compiler_type_t type,
     if (buf.size() > 0) {
       s->Write(buf.data(), buf.size());
     }
-  }
+}
 }
 
 void TypeSystemClang::DumpTypeName(const CompilerType &type) {
diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h
index b326ee56cb8a..920d7cb4c23d 100644
--- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h
+++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.h
@@ -975,10 +975,12 @@ class TypeSystemClang : public TypeSystem {
                    lldb::offset_t data_offset, size_t data_byte_size) override;
 
   void DumpTypeDescription(
-      lldb::opaque_compiler_type_t type) override; // Dump to stdout
+      lldb::opaque_compiler_type_t type,
+      lldb::DescriptionLevel level = lldb::eDescriptionLevelFull) override;
 
-  void DumpTypeDescription(lldb::opaque_compiler_type_t type,
-                           Stream *s) override;
+  void DumpTypeDescription(
+      lldb::opaque_compiler_type_t type, Stream *s,
+      lldb::DescriptionLevel level = lldb::eDescriptionLevelFull) override;
 
   static void DumpTypeName(const CompilerType &type);
 
diff --git a/lldb/source/Symbol/CompilerType.cpp b/lldb/source/Symbol/CompilerType.cpp
index f24d9939e6cb..ce64763dd815 100644
--- a/lldb/source/Symbol/CompilerType.cpp
+++ b/lldb/source/Symbol/CompilerType.cpp
@@ -744,14 +744,15 @@ void CompilerType::DumpSummary(ExecutionContext *exe_ctx, Stream *s,
                                data_byte_size);
 }
 
-void CompilerType::DumpTypeDescription() const {
+void CompilerType::DumpTypeDescription(lldb::DescriptionLevel level) const {
   if (IsValid())
-    m_type_system->DumpTypeDescription(m_type);
+    m_type_system->DumpTypeDescription(m_type, level);
 }
 
-void CompilerType::DumpTypeDescription(Stream *s) const {
+void CompilerType::DumpTypeDescription(Stream *s,
+                                       lldb::DescriptionLevel level) const {
   if (IsValid()) {
-    m_type_system->DumpTypeDescription(m_type, s);
+    m_type_system->DumpTypeDescription(m_type, s, level);
   }
 }
 
diff --git a/lldb/source/Symbol/Type.cpp b/lldb/source/Symbol/Type.cpp
index 058d4c714634..307e99ac84b6 100644
--- a/lldb/source/Symbol/Type.cpp
+++ b/lldb/source/Symbol/Type.cpp
@@ -234,7 +234,7 @@ void Type::GetDescription(Stream *s, lldb::DescriptionLevel level,
   }
 }
 
-void Type::Dump(Stream *s, bool show_context) {
+void Type::Dump(Stream *s, bool show_context, lldb::DescriptionLevel level) {
   s->Printf("%p: ", static_cast<void *>(this));
   s->Indent();
   *s << "Type" << static_cast<const UserID &>(*this) << ' ';
@@ -255,7 +255,7 @@ void Type::Dump(Stream *s, bool show_context) {
 
   if (m_compiler_type.IsValid()) {
     *s << ", compiler_type = " << m_compiler_type.GetOpaqueQualType() << ' ';
-    GetForwardCompilerType().DumpTypeDescription(s);
+    GetForwardCompilerType().DumpTypeDescription(s, level);
   } else if (m_encoding_uid != LLDB_INVALID_UID) {
     s->Format(", type_data = {0:x-16}", m_encoding_uid);
     switch (m_encoding_uid_type) {
diff --git a/lldb/source/Symbol/TypeMap.cpp b/lldb/source/Symbol/TypeMap.cpp
index a7a29a30df12..e810d3020073 100644
--- a/lldb/source/Symbol/TypeMap.cpp
+++ b/lldb/source/Symbol/TypeMap.cpp
@@ -121,9 +121,9 @@ bool TypeMap::Remove(const lldb::TypeSP &type_sp) {
   return false;
 }
 
-void TypeMap::Dump(Stream *s, bool show_context) {
+void TypeMap::Dump(Stream *s, bool show_context, lldb::DescriptionLevel level) {
   for (iterator pos = m_types.begin(), end = m_types.end(); pos != end; ++pos) {
-    pos->second->Dump(s, show_context);
+    pos->second->Dump(s, show_context, level);
   }
 }
 
diff --git a/lldb/test/Shell/SymbolFile/DWARF/Inputs/ModuleOwnership/A.h b/lldb/test/Shell/SymbolFile/DWARF/Inputs/ModuleOwnership/A.h
index 4b223cafdcba..0394d0a59de7 100644
--- a/lldb/test/Shell/SymbolFile/DWARF/Inputs/ModuleOwnership/A.h
+++ b/lldb/test/Shell/SymbolFile/DWARF/Inputs/ModuleOwnership/A.h
@@ -18,6 +18,7 @@ typedef enum Enum_e { a = 0 } Enum;
 
 @interface SomeClass {
 }
+@property (readonly) int number;
 @end
 
 template <typename T> struct Template { T field; };
diff --git a/lldb/test/Shell/SymbolFile/DWARF/module-ownership.mm b/lldb/test/Shell/SymbolFile/DWARF/module-ownership.mm
index 6a876d1ea578..5fccc44c34ef 100644
--- a/lldb/test/Shell/SymbolFile/DWARF/module-ownership.mm
+++ b/lldb/test/Shell/SymbolFile/DWARF/module-ownership.mm
@@ -1,17 +1,22 @@
 // RUN: %clang --target=x86_64-apple-macosx -g -gmodules \
 // RUN:    -fmodules -fmodules-cache-path=%t.cache \
 // RUN:    -c -o %t.o %s -I%S/Inputs
-// RUN: lldb-test symbols -dump-clang-ast %t.o | FileCheck %s
 // Verify that the owning module information from DWARF is preserved in the AST.
 
 @import A;
 
 Typedef t1;
-// CHECK-DAG: TypedefDecl {{.*}} imported in A Typedef
+// RUN: lldb-test symbols -dump-clang-ast -find type --language=ObjC++ \
+// RUN:   -compiler-context 'Module:A,Typedef:Typedef' %t.o \
+// RUN:   | FileCheck %s --check-prefix=CHECK-TYPEDEF
+// CHECK-TYPEDEF: TypedefDecl {{.*}} imported in A Typedef
 
 TopLevelStruct s1;
-// CHECK-DAG: CXXRecordDecl {{.*}} imported in A struct TopLevelStruct
-// CHECK-DAG: -FieldDecl {{.*}} in A a 'int'
+// RUN: lldb-test symbols -dump-clang-ast -find type --language=ObjC++ \
+// RUN:   -compiler-context 'Module:A,Struct:TopLevelStruct' %t.o \
+// RUN:   | FileCheck %s --check-prefix=CHECK-TOPLEVELSTRUCT
+// CHECK-TOPLEVELSTRUCT: CXXRecordDecl {{.*}} imported in A struct TopLevelStruct
+// CHECK-TOPLEVELSTRUCT: -FieldDecl {{.*}} in A a 'int'
 
 Struct s2;
 // CHECK-DAG: CXXRecordDecl {{.*}} imported in A struct
@@ -29,7 +34,13 @@
 // FIXME: -EnumConstantDecl {{.*}} imported in A a
 
 SomeClass *obj1;
-// CHECK-DAG: ObjCInterfaceDecl {{.*}} imported in A {{.*}} SomeClass
+// RUN: lldb-test symbols -dump-clang-ast -find type --language=ObjC++ \
+// RUN:   -compiler-context 'Module:A,Struct:SomeClass' %t.o \
+// RUN:   | FileCheck %s --check-prefix=CHECK-OBJC
+// CHECK-OBJC: ObjCInterfaceDecl {{.*}} imported in A SomeClass
+// CHECK-OBJC: |-ObjCPropertyDecl {{.*}} imported in A number 'int' readonly
+// CHECK-OBJC: | `-getter ObjCMethod {{.*}} 'number'
+// CHECK-OBJC: `-ObjCMethodDecl {{.*}} imported in A implicit - number 'int'
 
 // Template specializations are not yet supported, so they lack the ownership info:
 Template<int> t2;
diff --git a/lldb/tools/lldb-test/lldb-test.cpp b/lldb/tools/lldb-test/lldb-test.cpp
index 5b28afbf8972..6c765db8da5a 100644
--- a/lldb/tools/lldb-test/lldb-test.cpp
+++ b/lldb/tools/lldb-test/lldb-test.cpp
@@ -169,10 +169,13 @@ static FunctionNameType getFunctionNameFlags() {
 static cl::opt<bool> DumpAST("dump-ast",
                              cl::desc("Dump AST restored from symbols."),
                              cl::sub(SymbolsSubcommand));
-static cl::opt<bool>
-    DumpClangAST("dump-clang-ast",
-                 cl::desc("Dump clang AST restored from symbols."),
-                 cl::sub(SymbolsSubcommand));
+static cl::opt<bool> DumpClangAST(
+    "dump-clang-ast",
+    cl::desc("Dump clang AST restored from symbols. When used on its own this "
+             "will dump the entire AST of all loaded symbols. When combined "
+             "with -find, it changes the presentation of the search results "
+             "from pretty-printing the types to an AST dump."),
+    cl::sub(SymbolsSubcommand));
 
 static cl::opt<bool> Verify("verify", cl::desc("Verify symbol information."),
                             cl::sub(SymbolsSubcommand));
@@ -192,7 +195,7 @@ static Error findTypes(lldb_private::Module &Module);
 static Error findVariables(lldb_private::Module &Module);
 static Error dumpModule(lldb_private::Module &Module);
 static Error dumpAST(lldb_private::Module &Module);
-static Error dumpClangAST(lldb_private::Module &Module);
+static Error dumpEntireClangAST(lldb_private::Module &Module);
 static Error verify(lldb_private::Module &Module);
 
 static Expected<Error (*)(lldb_private::Module &)> getAction();
@@ -404,6 +407,10 @@ opts::symbols::getDeclContext(SymbolFile &Symfile) {
   return List.GetVariableAtIndex(0)->GetDeclContext();
 }
 
+static lldb::DescriptionLevel GetDescriptionLevel() {
+  return opts::symbols::DumpClangAST ? eDescriptionLevelVerbose : eDescriptionLevelFull;
+}
+
 Error opts::symbols::findFunctions(lldb_private::Module &Module) {
   SymbolFile &Symfile = *Module.GetSymbolFile();
   SymbolContextList List;
@@ -534,7 +541,12 @@ Error opts::symbols::findTypes(lldb_private::Module &Module) {
 
   outs() << formatv("Found {0} types:\n", Map.GetSize());
   StreamString Stream;
-  Map.Dump(&Stream, false);
+  // Resolve types to force-materialize typedef types.
+  Map.ForEach([&](TypeSP &type) {
+    type->GetFullCompilerType();
+    return false;
+  });
+  Map.Dump(&Stream, false, GetDescriptionLevel());
   outs() << Stream.GetData() << "\n";
   return Error::success();
 }
@@ -615,7 +627,7 @@ Error opts::symbols::dumpAST(lldb_private::Module &Module) {
   return Error::success();
 }
 
-Error opts::symbols::dumpClangAST(lldb_private::Module &Module) {
+Error opts::symbols::dumpEntireClangAST(lldb_private::Module &Module) {
   Module.ParseAllDebugSymbols();
 
   SymbolFile *symfile = Module.GetSymbolFile();
@@ -719,13 +731,17 @@ Expected<Error (*)(lldb_private::Module &)> opts::symbols::getAction() {
   }
 
   if (DumpClangAST) {
-    if (Find != FindType::None)
-      return make_string_error("Cannot both search and dump clang AST.");
-    if (Regex || !Context.empty() || !File.empty() || Line != 0)
-      return make_string_error(
-          "-regex, -context, -name, -file and -line options are not "
-          "applicable for dumping clang AST.");
-    return dumpClangAST;
+    if (Find == FindType::None) {
+      if (Regex || !Context.empty() || !File.empty() || Line != 0)
+        return make_string_error(
+            "-regex, -context, -name, -file and -line options are not "
+            "applicable for dumping the entire clang AST. Either combine with "
+            "-find, or use -dump-clang-ast as a standalone option.");
+      return dumpEntireClangAST;
+    }
+    if (Find != FindType::Type)
+      return make_string_error("This combination of -dump-clang-ast and -find "
+                               "<kind> is not yet implemented.");
   }
 
   if (Regex && !Context.empty())

From 48879c02bfc40017ec7d4dcfbd920ef949b53cf6 Mon Sep 17 00:00:00 2001
From: Francesco Petrogalli <francesco.petrogalli@arm.com>
Date: Fri, 17 Apr 2020 19:05:31 +0100
Subject: [PATCH 200/216] [llvm][CodeGen] Fix issue for SVE gather prefetch.

Summary:
This change is fixing an issue where the dagcombine incorrectly used an addressing mode with scaled offsets (indices), instead of unscaled offsets.
Those addressing modes do not exist for `prfh` , `prfw` and `prfd`, hence we can reuse `prfb` because that has unscaled offsets, and because the pseudo-code in the XML spec suggests that the element size is not used for the amount of data that is prefetched by the instruction.

FWIW, GCC also emits a `prfb` for these cases.

Reviewers: sdesmalen, andwar, rengolin

Reviewed By: sdesmalen

Subscribers: tschuett, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D78069
---
 .../Target/AArch64/AArch64ISelLowering.cpp    | 34 +++++--------
 ...prefetches-vect-base-invalid-imm-offset.ll | 50 +++++++++----------
 2 files changed, 38 insertions(+), 46 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index a0c17a9f0704..375e2681d100 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -13032,13 +13032,12 @@ static SDValue legalizeSVEGatherPrefetchOffsVec(SDNode *N, SelectionDAG &DAG) {
   return DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops);
 }
 
-/// Combines a node carrying the intrinsic `aarch64_sve_prf<T>_gather` into a
-/// node that uses `aarch64_sve_prf<T>_gather_scaled_uxtw` when the scalar
-/// offset passed to `aarch64_sve_prf<T>_gather` is not a valid immediate for
-/// the sve gather prefetch instruction with vector plus immediate addressing
-/// mode.
+/// Combines a node carrying the intrinsic
+/// `aarch64_sve_prf<T>_gather_scalar_offset` into a node that uses
+/// `aarch64_sve_prfb_gather_uxtw_index` when the scalar offset passed to
+/// `aarch64_sve_prf<T>_gather_scalar_offset` is not a valid immediate for the
+/// sve gather prefetch instruction with vector plus immediate addressing mode.
 static SDValue combineSVEPrefetchVecBaseImmOff(SDNode *N, SelectionDAG &DAG,
-                                               unsigned NewIID,
                                                unsigned ScalarSizeInBytes) {
   const unsigned ImmPos = 4, OffsetPos = 3;
   // No need to combine the node if the immediate is valid...
@@ -13048,10 +13047,11 @@ static SDValue combineSVEPrefetchVecBaseImmOff(SDNode *N, SelectionDAG &DAG,
   // ...otherwise swap the offset base with the offset...
   SmallVector<SDValue, 5> Ops(N->op_begin(), N->op_end());
   std::swap(Ops[ImmPos], Ops[OffsetPos]);
-  // ...and remap the intrinsic `aarch64_sve_prf_gather<T>` to
-  // `aarch64_sve_prf_gather<T>_scaled_uxtw`.
+  // ...and remap the intrinsic `aarch64_sve_prf<T>_gather_scalar_offset` to
+  // `aarch64_sve_prfb_gather_uxtw_index`.
   SDLoc DL(N);
-  Ops[1] = DAG.getConstant(NewIID, DL, MVT::i64);
+  Ops[1] = DAG.getConstant(Intrinsic::aarch64_sve_prfb_gather_uxtw_index, DL,
+                           MVT::i64);
 
   return DAG.getNode(N->getOpcode(), DL, DAG.getVTList(MVT::Other), Ops);
 }
@@ -13121,21 +13121,13 @@ SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N,
   case ISD::INTRINSIC_W_CHAIN:
     switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
     case Intrinsic::aarch64_sve_prfb_gather_scalar_offset:
-      return combineSVEPrefetchVecBaseImmOff(
-          N, DAG, Intrinsic::aarch64_sve_prfb_gather_uxtw_index,
-          1 /*=ScalarSizeInBytes*/);
+      return combineSVEPrefetchVecBaseImmOff(N, DAG, 1 /*=ScalarSizeInBytes*/);
     case Intrinsic::aarch64_sve_prfh_gather_scalar_offset:
-      return combineSVEPrefetchVecBaseImmOff(
-          N, DAG, Intrinsic::aarch64_sve_prfh_gather_uxtw_index,
-          2 /*=ScalarSizeInBytes*/);
+      return combineSVEPrefetchVecBaseImmOff(N, DAG, 2 /*=ScalarSizeInBytes*/);
     case Intrinsic::aarch64_sve_prfw_gather_scalar_offset:
-      return combineSVEPrefetchVecBaseImmOff(
-          N, DAG, Intrinsic::aarch64_sve_prfw_gather_uxtw_index,
-          4 /*=ScalarSizeInBytes*/);
+      return combineSVEPrefetchVecBaseImmOff(N, DAG, 4 /*=ScalarSizeInBytes*/);
     case Intrinsic::aarch64_sve_prfd_gather_scalar_offset:
-      return combineSVEPrefetchVecBaseImmOff(
-          N, DAG, Intrinsic::aarch64_sve_prfd_gather_uxtw_index,
-          8 /*=ScalarSizeInBytes*/);
+      return combineSVEPrefetchVecBaseImmOff(N, DAG, 8 /*=ScalarSizeInBytes*/);
     case Intrinsic::aarch64_sve_prfb_gather_uxtw_index:
     case Intrinsic::aarch64_sve_prfb_gather_sxtw_index:
     case Intrinsic::aarch64_sve_prfh_gather_uxtw_index:
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-vect-base-invalid-imm-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-vect-base-invalid-imm-offset.ll
index c525cec6598a..91da03719670 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-vect-base-invalid-imm-offset.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-vect-base-invalid-imm-offset.ll
@@ -59,7 +59,7 @@ define void @llvm_aarch64_sve_prfb_gather_scalar_offset_nx2vi64_invalid_immediat
 ; PRFH <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element, imm = 0, 2, ..., 62
 define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx4vi32_runtime_offset(<vscale x 4 x i32> %bases, i64 %offset, <vscale x 4 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_scalar_offset_nx4vi32_runtime_offset:
-; CHECK-NEXT:  prfh  pldl1strm, p0, [x0, z0.s, uxtw #1]
+; CHECK-NEXT:  prfb  pldl1strm, p0, [x0, z0.s, uxtw]
 ; CHECK-NEXT:  ret
   call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 %offset, i32 1)
   ret void
@@ -68,7 +68,7 @@ define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx4vi32_runtime_offset(<
 define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx4vi32_invalid_immediate_offset_upper_bound(<vscale x 4 x i32> %bases, <vscale x 4 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_scalar_offset_nx4vi32_invalid_immediate_offset_upper_bound:
 ; CHECK-NEXT:  mov   w[[N:[0-9]+]], #63
-; CHECK-NEXT:  prfh  pldl1strm, p0, [x[[N]], z0.s, uxtw #1]
+; CHECK-NEXT:  prfb  pldl1strm, p0, [x[[N]], z0.s, uxtw]
 ; CHECK-NEXT:  ret
   call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 63, i32 1)
   ret void
@@ -77,7 +77,7 @@ define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx4vi32_invalid_immediat
 define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx4vi32_invalid_immediate_offset_lower_bound(<vscale x 4 x i32> %bases, <vscale x 4 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_scalar_offset_nx4vi32_invalid_immediate_offset_lower_bound:
 ; CHECK-NEXT:  mov   x[[N:[0-9]+]], #-1
-; CHECK-NEXT:  prfh  pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw #1]
+; CHECK-NEXT:  prfb  pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw]
 ; CHECK-NEXT:  ret
   call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 -1, i32 1)
   ret void
@@ -86,7 +86,7 @@ define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx4vi32_invalid_immediat
 define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx4vi32_invalid_immediate_offset_inbound_not_multiple_of_2(<vscale x 4 x i32> %bases, <vscale x 4 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_scalar_offset_nx4vi32_invalid_immediate_offset_inbound_not_multiple_of_2:
 ; CHECK-NEXT:  mov   w[[N:[0-9]+]], #33
-; CHECK-NEXT:  prfh  pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw #1]
+; CHECK-NEXT:  prfb  pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw]
 ; CHECK-NEXT:  ret
   call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 33, i32 1)
   ret void
@@ -95,8 +95,8 @@ define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx4vi32_invalid_immediat
 ; PRFH <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element, imm = 0, 2, ..., 62
 define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx2vi64_runtime_offset(<vscale x 2 x i64> %bases, i64 %offset, <vscale x 2 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_scalar_offset_nx2vi64_runtime_offset:
-; CHECK-NEXT:   prfh pldl1strm, p0, [x0, z0.d, uxtw #1]
-; CHECK-NEXT:   ret
+; CHECK-NEXT:  prfb pldl1strm, p0, [x0, z0.d, uxtw]
+; CHECK-NEXT:  ret
   call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 %offset, i32 1)
   ret void
 }
@@ -104,7 +104,7 @@ define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx2vi64_runtime_offset(<
 define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx2vi64_invalid_immediate_offset_upper_bound(<vscale x 2 x i64> %bases, <vscale x 2 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_scalar_offset_nx2vi64_invalid_immediate_offset_upper_bound:
 ; CHECK-NEXT:  mov   w[[N:[0-9]+]], #63
-; CHECK-NEXT:  prfh  pldl1strm, p0, [x[[N]], z0.d, uxtw #1]
+; CHECK-NEXT:  prfb  pldl1strm, p0, [x[[N]], z0.d, uxtw]
 ; CHECK-NEXT:  ret
   call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 63, i32 1)
   ret void
@@ -113,7 +113,7 @@ define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx2vi64_invalid_immediat
 define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx2vi64_invalid_immediate_offset_lower_bound(<vscale x 2 x i64> %bases, <vscale x 2 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_scalar_offset_nx2vi64_invalid_immediate_offset_lower_bound:
 ; CHECK-NEXT:  mov   x[[N:[0-9]+]], #-1
-; CHECK-NEXT:  prfh  pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw #1]
+; CHECK-NEXT:  prfb  pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw]
 ; CHECK-NEXT:  ret
   call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 -1, i32 1)
   ret void
@@ -122,7 +122,7 @@ define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx2vi64_invalid_immediat
 define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx2vi64_invalid_immediate_offset_inbound_not_multiple_of_2(<vscale x 2 x i64> %bases, <vscale x 2 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfh_gather_scalar_offset_nx2vi64_invalid_immediate_offset_inbound_not_multiple_of_2:
 ; CHECK-NEXT:  mov   w[[N:[0-9]+]], #33
-; CHECK-NEXT:  prfh  pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw #1]
+; CHECK-NEXT:  prfb  pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw]
 ; CHECK-NEXT:  ret
   call void @llvm.aarch64.sve.prfh.gather.scalar.offset.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 33, i32 1)
   ret void
@@ -133,7 +133,7 @@ define void @llvm_aarch64_sve_prfh_gather_scalar_offset_nx2vi64_invalid_immediat
 ; PRFW <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element, imm = 0, 4, ..., 124
 define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx4vi32_runtime_offset(<vscale x 4 x i32> %bases, i64 %offset, <vscale x 4 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_scalar_offset_nx4vi32_runtime_offset:
-; CHECK-NEXT:  prfw  pldl1strm, p0, [x0, z0.s, uxtw #2]
+; CHECK-NEXT:  prfb  pldl1strm, p0, [x0, z0.s, uxtw]
 ; CHECK-NEXT:  ret
   call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 %offset, i32 1)
   ret void
@@ -142,7 +142,7 @@ define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx4vi32_runtime_offset(<
 define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx4vi32_invalid_immediate_offset_upper_bound(<vscale x 4 x i32> %bases, <vscale x 4 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_scalar_offset_nx4vi32_invalid_immediate_offset_upper_bound:
 ; CHECK-NEXT:  mov   w[[N:[0-9]+]], #125
-; CHECK-NEXT:  prfw  pldl1strm, p0, [x[[N]], z0.s, uxtw #2]
+; CHECK-NEXT:  prfb  pldl1strm, p0, [x[[N]], z0.s, uxtw]
 ; CHECK-NEXT:  ret
   call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 125, i32 1)
   ret void
@@ -151,7 +151,7 @@ define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx4vi32_invalid_immediat
 define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx4vi32_invalid_immediate_offset_lower_bound(<vscale x 4 x i32> %bases, <vscale x 4 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_scalar_offset_nx4vi32_invalid_immediate_offset_lower_bound:
 ; CHECK-NEXT:  mov   x[[N:[0-9]+]], #-1
-; CHECK-NEXT:  prfw  pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw #2]
+; CHECK-NEXT:  prfb  pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw]
 ; CHECK-NEXT:  ret
   call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 -1, i32 1)
   ret void
@@ -160,7 +160,7 @@ define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx4vi32_invalid_immediat
 define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx4vi32_invalid_immediate_offset_inbound_not_multiple_of_4(<vscale x 4 x i32> %bases, <vscale x 4 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_scalar_offset_nx4vi32_invalid_immediate_offset_inbound_not_multiple_of_4:
 ; CHECK-NEXT:  mov   w[[N:[0-9]+]], #33
-; CHECK-NEXT:  prfw  pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw #2]
+; CHECK-NEXT:  prfb  pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw]
 ; CHECK-NEXT:  ret
   call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 33, i32 1)
   ret void
@@ -169,7 +169,7 @@ define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx4vi32_invalid_immediat
 ; PRFW <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element, imm = 0, 4, ..., 124
 define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx2vi64_runtime_offset(<vscale x 2 x i64> %bases, i64 %offset, <vscale x 2 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_scalar_offset_nx2vi64_runtime_offset:
-; CHECK-NEXT:   prfw pldl1strm, p0, [x0, z0.d, uxtw #2]
+; CHECK-NEXT:   prfb pldl1strm, p0, [x0, z0.d, uxtw]
 ; CHECK-NEXT:   ret
   call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 %offset, i32 1)
   ret void
@@ -178,7 +178,7 @@ define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx2vi64_runtime_offset(<
 define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx2vi64_invalid_immediate_offset_upper_bound(<vscale x 2 x i64> %bases, <vscale x 2 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_scalar_offset_nx2vi64_invalid_immediate_offset_upper_bound:
 ; CHECK-NEXT:  mov   w[[N:[0-9]+]], #125
-; CHECK-NEXT:  prfw  pldl1strm, p0, [x[[N]], z0.d, uxtw #2]
+; CHECK-NEXT:  prfb  pldl1strm, p0, [x[[N]], z0.d, uxtw]
 ; CHECK-NEXT:  ret
   call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 125, i32 1)
   ret void
@@ -187,7 +187,7 @@ define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx2vi64_invalid_immediat
 define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx2vi64_invalid_immediate_offset_lower_bound(<vscale x 2 x i64> %bases, <vscale x 2 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_scalar_offset_nx2vi64_invalid_immediate_offset_lower_bound:
 ; CHECK-NEXT:  mov   x[[N:[0-9]+]], #-1
-; CHECK-NEXT:  prfw  pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw #2]
+; CHECK-NEXT:  prfb  pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw]
 ; CHECK-NEXT:  ret
   call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 -1, i32 1)
   ret void
@@ -196,7 +196,7 @@ define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx2vi64_invalid_immediat
 define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx2vi64_invalid_immediate_offset_inbound_not_multiple_of_4(<vscale x 2 x i64> %bases, <vscale x 2 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfw_gather_scalar_offset_nx2vi64_invalid_immediate_offset_inbound_not_multiple_of_4:
 ; CHECK-NEXT:  mov   w[[N:[0-9]+]], #33
-; CHECK-NEXT:  prfw  pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw #2]
+; CHECK-NEXT:  prfb  pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw]
 ; CHECK-NEXT:  ret
   call void @llvm.aarch64.sve.prfw.gather.scalar.offset.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 33, i32 1)
   ret void
@@ -207,7 +207,7 @@ define void @llvm_aarch64_sve_prfw_gather_scalar_offset_nx2vi64_invalid_immediat
 ; PRFD <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element, imm = 0, 8, ..., 248
 define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx4vi32_runtime_offset(<vscale x 4 x i32> %bases, i64 %offset, <vscale x 4 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_scalar_offset_nx4vi32_runtime_offset:
-; CHECK-NEXT:  prfd  pldl1strm, p0, [x0, z0.s, uxtw #3]
+; CHECK-NEXT:  prfb  pldl1strm, p0, [x0, z0.s, uxtw]
 ; CHECK-NEXT:  ret
   call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 %offset, i32 1)
   ret void
@@ -216,7 +216,7 @@ define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx4vi32_runtime_offset(<
 define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx4vi32_invalid_immediate_offset_upper_bound(<vscale x 4 x i32> %bases, <vscale x 4 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_scalar_offset_nx4vi32_invalid_immediate_offset_upper_bound:
 ; CHECK-NEXT:  mov   w[[N:[0-9]+]], #125
-; CHECK-NEXT:  prfd  pldl1strm, p0, [x[[N]], z0.s, uxtw #3]
+; CHECK-NEXT:  prfb  pldl1strm, p0, [x[[N]], z0.s, uxtw]
 ; CHECK-NEXT:  ret
   call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 125, i32 1)
   ret void
@@ -225,7 +225,7 @@ define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx4vi32_invalid_immediat
 define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx4vi32_invalid_immediate_offset_lower_bound(<vscale x 4 x i32> %bases, <vscale x 4 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_scalar_offset_nx4vi32_invalid_immediate_offset_lower_bound:
 ; CHECK-NEXT:  mov   x[[N:[0-9]+]], #-1
-; CHECK-NEXT:  prfd  pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw #3]
+; CHECK-NEXT:  prfb  pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw]
 ; CHECK-NEXT:  ret
   call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 -1, i32 1)
   ret void
@@ -234,7 +234,7 @@ define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx4vi32_invalid_immediat
 define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx4vi32_invalid_immediate_offset_inbound_not_multiple_of_8(<vscale x 4 x i32> %bases, <vscale x 4 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_scalar_offset_nx4vi32_invalid_immediate_offset_inbound_not_multiple_of_8:
 ; CHECK-NEXT:  mov   w[[N:[0-9]+]], #33
-; CHECK-NEXT:  prfd  pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw #3]
+; CHECK-NEXT:  prfb  pldl1strm, p0, [x[[N:[0-9]+]], z0.s, uxtw]
 ; CHECK-NEXT:  ret
   call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nx4vi32(<vscale x 4 x i1> %Pg, <vscale x 4 x i32> %bases, i64 33, i32 1)
   ret void
@@ -243,7 +243,7 @@ define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx4vi32_invalid_immediat
 ; PRFD <prfop>, <Pg>, [<Zn>.D{, #<imm>}] -> 64-bit element, imm = 0, 4, ..., 248
 define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx2vi64_runtime_offset(<vscale x 2 x i64> %bases, i64 %offset, <vscale x 2 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_scalar_offset_nx2vi64_runtime_offset:
-; CHECK-NEXT:   prfd pldl1strm, p0, [x0, z0.d, uxtw #3]
+; CHECK-NEXT:   prfb pldl1strm, p0, [x0, z0.d, uxtw]
 ; CHECK-NEXT:   ret
   call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 %offset, i32 1)
   ret void
@@ -252,7 +252,7 @@ define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx2vi64_runtime_offset(<
 define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx2vi64_invalid_immediate_offset_upper_bound(<vscale x 2 x i64> %bases, <vscale x 2 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_scalar_offset_nx2vi64_invalid_immediate_offset_upper_bound:
 ; CHECK-NEXT:  mov   w[[N:[0-9]+]], #125
-; CHECK-NEXT:  prfd  pldl1strm, p0, [x[[N]], z0.d, uxtw #3]
+; CHECK-NEXT:  prfb pldl1strm, p0, [x[[N]], z0.d, uxtw]
 ; CHECK-NEXT:  ret
   call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 125, i32 1)
   ret void
@@ -261,7 +261,7 @@ define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx2vi64_invalid_immediat
 define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx2vi64_invalid_immediate_offset_lower_bound(<vscale x 2 x i64> %bases, <vscale x 2 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_scalar_offset_nx2vi64_invalid_immediate_offset_lower_bound:
 ; CHECK-NEXT:  mov   x[[N:[0-9]+]], #-1
-; CHECK-NEXT:  prfd  pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw #3]
+; CHECK-NEXT:  prfb  pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw]
 ; CHECK-NEXT:  ret
   call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 -1, i32 1)
   ret void
@@ -270,7 +270,7 @@ define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx2vi64_invalid_immediat
 define void @llvm_aarch64_sve_prfd_gather_scalar_offset_nx2vi64_invalid_immediate_offset_inbound_not_multiple_of_8(<vscale x 2 x i64> %bases, <vscale x 2 x i1> %Pg) nounwind {
 ; CHECK-LABEL: llvm_aarch64_sve_prfd_gather_scalar_offset_nx2vi64_invalid_immediate_offset_inbound_not_multiple_of_8:
 ; CHECK-NEXT:  mov   w[[N:[0-9]+]], #33
-; CHECK-NEXT:  prfd  pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw #3]
+; CHECK-NEXT:  prfb  pldl1strm, p0, [x[[N:[0-9]+]], z0.d, uxtw]
 ; CHECK-NEXT:  ret
   call void @llvm.aarch64.sve.prfd.gather.scalar.offset.nx2vi64(<vscale x 2 x i1> %Pg, <vscale x 2 x i64> %bases, i64 33, i32 1)
   ret void

From fc4e954ed5c0825cdfe3a590ff1904ef38bc47db Mon Sep 17 00:00:00 2001
From: Francesco Petrogalli <francesco.petrogalli@arm.com>
Date: Fri, 17 Apr 2020 19:26:28 +0100
Subject: [PATCH 201/216] [llvm][CodeGen] Addressing modes for SVE stN.

Reviewers: efriedma, sdesmalen, c-rhodes, ctetreau

Reviewed By: c-rhodes

Subscribers: tschuett, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D77435
---
 .../Target/AArch64/AArch64ISelDAGToDAG.cpp    |  91 ++-
 .../Target/AArch64/AArch64ISelLowering.cpp    |  30 +
 .../sve-intrinsics-stN-reg-imm-addr-mode.ll   | 614 ++++++++++++++++++
 .../sve-intrinsics-stN-reg-reg-addr-mode.ll   | 367 +++++++++++
 4 files changed, 1084 insertions(+), 18 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
 create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll

diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 5bc22abde3fa..6636b38fd887 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -261,7 +261,14 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
   void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
   void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
   void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
-  void SelectPredicatedStore(SDNode *N, unsigned NumVecs, const unsigned Opc);
+  template <unsigned Scale>
+  void SelectPredicatedStore(SDNode *N, unsigned NumVecs, const unsigned Opc_rr,
+                             const unsigned Opc_ri);
+  template <unsigned Scale>
+  std::tuple<unsigned, SDValue, SDValue>
+  findAddrModeSVELoadStore(SDNode *N, const unsigned Opc_rr,
+                           const unsigned Opc_ri, const SDValue &OldBase,
+                           const SDValue &OldOffset);
 
   bool tryBitfieldExtractOp(SDNode *N);
   bool tryBitfieldExtractOpFromSExt(SDNode *N);
@@ -1408,6 +1415,30 @@ void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
   CurDAG->RemoveDeadNode(N);
 }
 
+/// Optimize \param OldBase and \param OldOffset selecting the best addressing
+/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
+/// new Base and an SDValue representing the new offset.
+template <unsigned Scale>
+std::tuple<unsigned, SDValue, SDValue>
+AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, const unsigned Opc_rr,
+                                              const unsigned Opc_ri,
+                                              const SDValue &OldBase,
+                                              const SDValue &OldOffset) {
+  SDValue NewBase = OldBase;
+  SDValue NewOffset = OldOffset;
+  // Detect a possible Reg+Imm addressing mode.
+  const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
+      N, OldBase, NewBase, NewOffset);
+
+  // Detect a possible reg+reg addressing mode, but only if we haven't already
+  // detected a Reg+Imm one.
+  const bool IsRegReg =
+      !IsRegImm && SelectSVERegRegAddrMode<Scale>(OldBase, NewBase, NewOffset);
+
+  // Select the instruction.
+  return {IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset};
+}
+
 void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
                                       unsigned Opc) {
   SDLoc dl(N);
@@ -1428,18 +1459,27 @@ void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
   ReplaceNode(N, St);
 }
 
+template <unsigned Scale>
 void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
-                                                const unsigned Opc) {
+                                                const unsigned Opc_rr,
+                                                const unsigned Opc_ri) {
   SDLoc dl(N);
 
   // Form a REG_SEQUENCE to force register allocation.
   SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
   SDValue RegSeq = createZTuple(Regs);
 
-  SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2),         // predicate
-                   N->getOperand(NumVecs + 3),                 // address
-                   CurDAG->getTargetConstant(0, dl, MVT::i64), // offset
-                   N->getOperand(0)};                          // chain
+  // Optimize addressing mode.
+  unsigned Opc;
+  SDValue Offset, Base;
+  std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore<Scale>(
+      N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
+      CurDAG->getTargetConstant(0, dl, MVT::i64));
+
+  SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
+                   Base,                               // address
+                   Offset,                             // offset
+                   N->getOperand(0)};                  // chain
   SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
 
   ReplaceNode(N, St);
@@ -3910,48 +3950,60 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
     }
     case Intrinsic::aarch64_sve_st2: {
       if (VT == MVT::nxv16i8) {
-        SelectPredicatedStore(Node, 2, AArch64::ST2B_IMM);
+        SelectPredicatedStore</*Scale=*/0>(Node, 2, AArch64::ST2B,
+                                           AArch64::ST2B_IMM);
         return;
       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16) {
-        SelectPredicatedStore(Node, 2, AArch64::ST2H_IMM);
+        SelectPredicatedStore</*Scale=*/1>(Node, 2, AArch64::ST2H,
+                                           AArch64::ST2H_IMM);
         return;
       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
-        SelectPredicatedStore(Node, 2, AArch64::ST2W_IMM);
+        SelectPredicatedStore</*Scale=*/2>(Node, 2, AArch64::ST2W,
+                                           AArch64::ST2W_IMM);
         return;
       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
-        SelectPredicatedStore(Node, 2, AArch64::ST2D_IMM);
+        SelectPredicatedStore</*Scale=*/3>(Node, 2, AArch64::ST2D,
+                                           AArch64::ST2D_IMM);
         return;
       }
       break;
     }
     case Intrinsic::aarch64_sve_st3: {
       if (VT == MVT::nxv16i8) {
-        SelectPredicatedStore(Node, 3, AArch64::ST3B_IMM);
+        SelectPredicatedStore</*Scale=*/0>(Node, 3, AArch64::ST3B,
+                                           AArch64::ST3B_IMM);
         return;
       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16) {
-        SelectPredicatedStore(Node, 3, AArch64::ST3H_IMM);
+        SelectPredicatedStore</*Scale=*/1>(Node, 3, AArch64::ST3H,
+                                           AArch64::ST3H_IMM);
         return;
       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
-        SelectPredicatedStore(Node, 3, AArch64::ST3W_IMM);
+        SelectPredicatedStore</*Scale=*/2>(Node, 3, AArch64::ST3W,
+                                           AArch64::ST3W_IMM);
         return;
       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
-        SelectPredicatedStore(Node, 3, AArch64::ST3D_IMM);
+        SelectPredicatedStore</*Scale=*/3>(Node, 3, AArch64::ST3D,
+                                           AArch64::ST3D_IMM);
         return;
       }
       break;
     }
     case Intrinsic::aarch64_sve_st4: {
       if (VT == MVT::nxv16i8) {
-        SelectPredicatedStore(Node, 4, AArch64::ST4B_IMM);
+        SelectPredicatedStore</*Scale=*/0>(Node, 4, AArch64::ST4B,
+                                           AArch64::ST4B_IMM);
         return;
       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16) {
-        SelectPredicatedStore(Node, 4, AArch64::ST4H_IMM);
+        SelectPredicatedStore</*Scale=*/1>(Node, 4, AArch64::ST4H,
+                                           AArch64::ST4H_IMM);
         return;
       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
-        SelectPredicatedStore(Node, 4, AArch64::ST4W_IMM);
+        SelectPredicatedStore</*Scale=*/2>(Node, 4, AArch64::ST4W,
+                                           AArch64::ST4W_IMM);
         return;
       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
-        SelectPredicatedStore(Node, 4, AArch64::ST4D_IMM);
+        SelectPredicatedStore</*Scale=*/3>(Node, 4, AArch64::ST4D,
+                                           AArch64::ST4D_IMM);
         return;
       }
       break;
@@ -4587,6 +4639,9 @@ static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) {
   if (isa<MemSDNode>(Root))
     return cast<MemSDNode>(Root)->getMemoryVT();
 
+  if (isa<MemIntrinsicSDNode>(Root))
+    return cast<MemIntrinsicSDNode>(Root)->getMemoryVT();
+
   const unsigned Opcode = Root->getOpcode();
   // For custom ISD nodes, we have to look at them individually to extract the
   // type of the data moved to/from memory.
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 375e2681d100..8ceb970efcf4 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -8913,6 +8913,30 @@ SDValue AArch64TargetLowering::LowerVSCALE(SDValue Op,
                             DL, VT);
 }
 
+/// Set the IntrinsicInfo for the `aarch64_sve_st<N>` intrinsics.
+template <unsigned NumVecs>
+static bool setInfoSVEStN(AArch64TargetLowering::IntrinsicInfo &Info,
+                          const CallInst &CI) {
+  Info.opc = ISD::INTRINSIC_VOID;
+  // Retrieve EC from first vector argument.
+  const EVT VT = EVT::getEVT(CI.getArgOperand(0)->getType());
+  ElementCount EC = VT.getVectorElementCount();
+#ifndef NDEBUG
+  // Check the assumption that all input vectors are the same type.
+  for (unsigned I = 0; I < NumVecs; ++I)
+    assert(VT == EVT::getEVT(CI.getArgOperand(I)->getType()) &&
+           "Invalid type.");
+#endif
+  // memVT is `NumVecs * VT`.
+  Info.memVT = EVT::getVectorVT(CI.getType()->getContext(), VT.getScalarType(),
+                                EC * NumVecs);
+  Info.ptrVal = CI.getArgOperand(CI.getNumArgOperands() - 1);
+  Info.offset = 0;
+  Info.align.reset();
+  Info.flags = MachineMemOperand::MOStore;
+  return true;
+}
+
 /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
 /// MemIntrinsicNodes.  The associated MachineMemOperands record the alignment
 /// specified in the intrinsic calls.
@@ -8922,6 +8946,12 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
                                                unsigned Intrinsic) const {
   auto &DL = I.getModule()->getDataLayout();
   switch (Intrinsic) {
+  case Intrinsic::aarch64_sve_st2:
+    return setInfoSVEStN<2>(Info, I);
+  case Intrinsic::aarch64_sve_st3:
+    return setInfoSVEStN<3>(Info, I);
+  case Intrinsic::aarch64_sve_st4:
+    return setInfoSVEStN<4>(Info, I);
   case Intrinsic::aarch64_neon_ld2:
   case Intrinsic::aarch64_neon_ld3:
   case Intrinsic::aarch64_neon_ld4:
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
new file mode 100644
index 000000000000..8ef27dc7ed70
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
@@ -0,0 +1,614 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=0 < %s | FileCheck %s
+
+; NOTE: invalid, upper and lower bound immediate values of the reg+imm
+; addressing mode are checked only for the byte version of each
+; instruction (`st<N>b`), as the code for detecting the immediate is
+; common to all instructions, and varies only for the number of
+; elements of the structured store, which is <N> = 2, 3, 4.
+
+;
+; ST2B
+;
+
+define void @st2b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+; CHECK-LABEL: st2b_i8_valid_imm:
+; CHECK: st2b { z0.b, z1.b }, p0, [x0, #2, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 2
+  call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+define void @st2b_i8_invalid_imm_not_multiple_of_2(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+; CHECK-LABEL: st2b_i8_invalid_imm_not_multiple_of_2:
+; CHECK: rdvl x[[N:[0-9]+]], #3
+; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, x[[N]]]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 3
+  call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+define void @st2b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+; CHECK-LABEL: st2b_i8_invalid_imm_out_of_lower_bound:
+; CHECK: rdvl x[[N:[0-9]+]], #-18
+; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, x[[N]]]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -18
+  call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+define void @st2b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+; CHECK-LABEL: st2b_i8_invalid_imm_out_of_upper_bound:
+; CHECK: rdvl x[[N:[0-9]+]], #16
+; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, x[[N]]]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 16
+  call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+define void @st2b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+; CHECK-LABEL: st2b_i8_valid_imm_lower_bound:
+; CHECK: st2b { z0.b, z1.b }, p0, [x0, #-16, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -16
+  call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+define void @st2b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+; CHECK-LABEL: st2b_i8_valid_imm_upper_bound:
+; CHECK: st2b { z0.b, z1.b }, p0, [x0, #14, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 14
+  call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+;
+; ST2H
+;
+
+define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, <vscale x 8 x i16>* %addr) {
+; CHECK-LABEL: st2h_i16:
+; CHECK: st2h { z0.h, z1.h }, p0, [x0, #2, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %addr, i64 2
+  call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> %v0,
+                                          <vscale x 8 x i16> %v1,
+                                          <vscale x 8 x i1> %pred,
+                                          <vscale x 8 x i16>* %base)
+  ret void
+}
+
+define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, <vscale x 8 x half>* %addr) {
+; CHECK-LABEL: st2h_f16:
+; CHECK: st2h { z0.h, z1.h }, p0, [x0, #2, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %addr, i64 2
+  call void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half> %v0,
+                                          <vscale x 8 x half> %v1,
+                                          <vscale x 8 x i1> %pred,
+                                          <vscale x 8 x half>* %base)
+  ret void
+}
+
+;
+; ST2W
+;
+
+define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, <vscale x 4 x i32>* %addr) {
+; CHECK-LABEL: st2w_i32:
+; CHECK: st2w { z0.s, z1.s }, p0, [x0, #4, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %addr, i64 4
+  call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> %v0,
+                                          <vscale x 4 x i32> %v1,
+                                          <vscale x 4 x i1> %pred,
+                                          <vscale x 4 x i32>* %base)
+  ret void
+}
+
+define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, <vscale x 4 x float>* %addr) {
+; CHECK-LABEL: st2w_f32:
+; CHECK: st2w { z0.s, z1.s }, p0, [x0, #6, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %addr, i64 6
+  call void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float> %v0,
+                                          <vscale x 4 x float> %v1,
+                                          <vscale x 4 x i1> %pred,
+                                          <vscale x 4 x float>* %base)
+  ret void
+}
+
+;
+; ST2D
+;
+
+define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, <vscale x 2 x i64>* %addr) {
+; CHECK-LABEL: st2d_i64:
+; CHECK: st2d { z0.d, z1.d }, p0, [x0, #8, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %addr, i64 8
+  call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> %v0,
+                                          <vscale x 2 x i64> %v1,
+                                          <vscale x 2 x i1> %pred,
+                                          <vscale x 2 x i64>* %base)
+  ret void
+}
+
+define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, <vscale x 2 x double>* %addr) {
+; CHECK-LABEL: st2d_f64:
+; CHECK: st2d { z0.d, z1.d }, p0, [x0, #10, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %addr, i64 10
+  call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> %v0,
+                                          <vscale x 2 x double> %v1,
+                                          <vscale x 2 x i1> %pred,
+                                          <vscale x 2 x double>* %base)
+  ret void
+}
+
+;
+; ST3B
+;
+
+define void @st3b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+; CHECK-LABEL: st3b_i8_valid_imm:
+; CHECK: st3b { z0.b, z1.b, z2.b }, p0, [x0, #3, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 3
+  call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i8> %v2,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+define void @st3b_i8_invalid_imm_not_multiple_of_3_01(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+; CHECK-LABEL: st3b_i8_invalid_imm_not_multiple_of_3_01:
+; CHECK: rdvl x[[N:[0-9]+]], #4
+; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0, x[[N]]]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 4
+  call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i8> %v2,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+define void @st3b_i8_invalid_imm_not_multiple_of_3_02(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+; CHECK-LABEL: st3b_i8_invalid_imm_not_multiple_of_3_02:
+; CHECK: rdvl x[[N:[0-9]+]], #5
+; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0, x[[N]]]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 5
+  call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i8> %v2,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+define void @st3b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+; CHECK-LABEL: st3b_i8_invalid_imm_out_of_lower_bound:
+; CHECK: rdvl x[[N:[0-9]+]], #-27
+; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0, x[[N]]]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -27
+  call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i8> %v2,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+define void @st3b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+; CHECK-LABEL: st3b_i8_invalid_imm_out_of_upper_bound:
+; CHECK: rdvl x[[N:[0-9]+]], #24
+; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0, x[[N]]]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 24
+  call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i8> %v2,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+define void @st3b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+; CHECK-LABEL: st3b_i8_valid_imm_lower_bound:
+; CHECK: st3b { z0.b, z1.b, z2.b }, p0, [x0, #-24, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -24
+  call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i8> %v2,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+define void @st3b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+; CHECK-LABEL: st3b_i8_valid_imm_upper_bound:
+; CHECK: st3b { z0.b, z1.b, z2.b }, p0, [x0, #21, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 21
+  call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i8> %v2,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+;
+; ST3H
+;
+
+define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i1> %pred, <vscale x 8 x i16>* %addr) {
+; CHECK-LABEL: st3h_i16:
+; CHECK: st3h { z0.h, z1.h, z2.h }, p0, [x0, #6, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %addr, i64 6
+  call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> %v0,
+                                          <vscale x 8 x i16> %v1,
+                                          <vscale x 8 x i16> %v2,
+                                          <vscale x 8 x i1> %pred,
+                                          <vscale x 8 x i16>* %base)
+  ret void
+}
+
+define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, <vscale x 8 x half>* %addr) {
+; CHECK-LABEL: st3h_f16:
+; CHECK: st3h { z0.h, z1.h, z2.h }, p0, [x0, #9, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %addr, i64 9
+  call void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half> %v0,
+                                          <vscale x 8 x half> %v1,
+                                          <vscale x 8 x half> %v2,
+                                          <vscale x 8 x i1> %pred,
+                                          <vscale x 8 x half>* %base)
+  ret void
+}
+
+;
+; ST3W
+;
+
+define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, <vscale x 4 x i32>* %addr) {
+; CHECK-LABEL: st3w_i32:
+; CHECK: st3w { z0.s, z1.s, z2.s }, p0, [x0, #12, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %addr, i64 12
+  call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> %v0,
+                                          <vscale x 4 x i32> %v1,
+                                          <vscale x 4 x i32> %v2,
+                                          <vscale x 4 x i1> %pred,
+                                          <vscale x 4 x i32>* %base)
+  ret void
+}
+
+define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, <vscale x 4 x float>* %addr) {
+; CHECK-LABEL: st3w_f32:
+; CHECK: st3w { z0.s, z1.s, z2.s }, p0, [x0, #15, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %addr, i64 15
+  call void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float> %v0,
+                                          <vscale x 4 x float> %v1,
+                                          <vscale x 4 x float> %v2,
+                                          <vscale x 4 x i1> %pred,
+                                          <vscale x 4 x float>* %base)
+  ret void
+}
+
+;
+; ST3D
+;
+
+define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, <vscale x 2 x i64>* %addr) {
+; CHECK-LABEL: st3d_i64:
+; CHECK: st3d { z0.d, z1.d, z2.d }, p0, [x0, #18, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %addr, i64 18
+  call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> %v0,
+                                          <vscale x 2 x i64> %v1,
+                                          <vscale x 2 x i64> %v2,
+                                          <vscale x 2 x i1> %pred,
+                                          <vscale x 2 x i64>* %base)
+  ret void
+}
+
+define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, <vscale x 2 x double>* %addr) {
+; CHECK-LABEL: st3d_f64:
+; CHECK: st3d { z0.d, z1.d, z2.d }, p0, [x0, #-3, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %addr, i64 -3
+  call void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double> %v0,
+                                          <vscale x 2 x double> %v1,
+                                          <vscale x 2 x double> %v2,
+                                          <vscale x 2 x i1> %pred,
+                                          <vscale x 2 x double>* %base)
+  ret void
+}
+
+;
+; ST4B
+;
+
+define void @st4b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+; CHECK-LABEL: st4b_i8_valid_imm:
+; CHECK: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, #4, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 4
+  call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i8> %v2,
+                                          <vscale x 16 x i8> %v3,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+define void @st4b_i8_invalid_imm_not_multiple_of_4_01(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+; CHECK-LABEL: st4b_i8_invalid_imm_not_multiple_of_4_01:
+; CHECK: rdvl x[[N:[0-9]+]], #5
+; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x[[N]]]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 5
+  call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i8> %v2,
+                                          <vscale x 16 x i8> %v3,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+define void @st4b_i8_invalid_imm_not_multiple_of_4_02(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+; CHECK-LABEL: st4b_i8_invalid_imm_not_multiple_of_4_02:
+; CHECK: rdvl x[[N:[0-9]+]], #6
+; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x[[N]]]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 6
+  call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i8> %v2,
+                                          <vscale x 16 x i8> %v3,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+define void @st4b_i8_invalid_imm_not_multiple_of_4_03(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+; CHECK-LABEL: st4b_i8_invalid_imm_not_multiple_of_4_03:
+; CHECK: rdvl x[[N:[0-9]+]], #7
+; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x[[N]]]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 7
+  call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i8> %v2,
+                                          <vscale x 16 x i8> %v3,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+define void @st4b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+; CHECK-LABEL: st4b_i8_invalid_imm_out_of_lower_bound:
+; FIXME: optimize OFFSET computation so that xOFFSET = (mul (RDVL #4) #9)
+; xM = -9 * 2^6
+; xP = RDVL * 2^-4
+; xBASE = RDVL * 2^-4 * -9 * 2^6 = RDVL * -36
+; CHECK: rdvl x[[N:[0-9]+]], #1
+; CHECK-DAG:  mov  x[[M:[0-9]+]], #-576
+; CHECK-DAG:  lsr  x[[P:[0-9]+]], x[[N]], #4
+; CHECK-DAG:  mul  x[[OFFSET:[0-9]+]], x[[P]], x[[M]]
+; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x[[OFFSET]]]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -36
+  call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i8> %v2,
+                                          <vscale x 16 x i8> %v3,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+define void @st4b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+; CHECK-LABEL: st4b_i8_invalid_imm_out_of_upper_bound:
+; FIXME: optimize OFFSET computation so that xOFFSET = (shl (RDVL #16) #1)
+; xM = 2^9
+; xP = RDVL * 2^-4
+; xOFFSET = RDVL * 2^-4 * 2^9 = RDVL * 32
+; CHECK: rdvl x[[N:[0-9]+]], #1
+; CHECK-DAG:  mov  w[[M:[0-9]+]], #512
+; CHECK-DAG:  lsr  x[[P:[0-9]+]], x[[N]], #4
+; CHECK-DAG:  mul  x[[OFFSET:[0-9]+]], x[[P]], x[[M]]
+; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x[[OFFSET]]]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 32
+  call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i8> %v2,
+                                          <vscale x 16 x i8> %v3,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+define void @st4b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+; CHECK-LABEL: st4b_i8_valid_imm_lower_bound:
+; CHECK: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, #-32, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -32
+  call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i8> %v2,
+                                          <vscale x 16 x i8> %v3,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+define void @st4b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+; CHECK-LABEL: st4b_i8_valid_imm_upper_bound:
+; CHECK: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, #28, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 28
+  call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i8> %v2,
+                                          <vscale x 16 x i8> %v3,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+;
+; ST4H
+;
+
+define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3, <vscale x 8 x i1> %pred, <vscale x 8 x i16>* %addr) {
+; CHECK-LABEL: st4h_i16:
+; CHECK: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, #8, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %addr, i64 8
+  call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> %v0,
+                                          <vscale x 8 x i16> %v1,
+                                          <vscale x 8 x i16> %v2,
+                                          <vscale x 8 x i16> %v3,
+                                          <vscale x 8 x i1> %pred,
+                                          <vscale x 8 x i16>* %base)
+  ret void
+}
+
+define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, <vscale x 8 x half>* %addr) {
+; CHECK-LABEL: st4h_f16:
+; CHECK: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, #12, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %addr, i64 12
+  call void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half> %v0,
+                                          <vscale x 8 x half> %v1,
+                                          <vscale x 8 x half> %v2,
+                                          <vscale x 8 x half> %v3,
+                                          <vscale x 8 x i1> %pred,
+                                          <vscale x 8 x half>* %base)
+  ret void
+}
+
+;
+; ST4W
+;
+
+define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, <vscale x 4 x i32>* %addr) {
+; CHECK-LABEL: st4w_i32:
+; CHECK: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, #16, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %addr, i64 16
+  call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> %v0,
+                                          <vscale x 4 x i32> %v1,
+                                          <vscale x 4 x i32> %v2,
+                                          <vscale x 4 x i32> %v3,
+                                          <vscale x 4 x i1> %pred,
+                                          <vscale x 4 x i32>* %base)
+  ret void
+}
+
+define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x float> %v3, <vscale x 4 x i1> %pred, <vscale x 4 x float>* %addr) {
+; CHECK-LABEL: st4w_f32:
+; CHECK: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, #20, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %addr, i64 20
+  call void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float> %v0,
+                                          <vscale x 4 x float> %v1,
+                                          <vscale x 4 x float> %v2,
+                                          <vscale x 4 x float> %v3,
+                                          <vscale x 4 x i1> %pred,
+                                          <vscale x 4 x float>* %base)
+  ret void
+}
+
+;
+; ST4D
+;
+
+define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, <vscale x 2 x i64>* %addr) {
+; CHECK-LABEL: st4d_i64:
+; CHECK: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, #24, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %addr, i64 24
+  call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> %v0,
+                                          <vscale x 2 x i64> %v1,
+                                          <vscale x 2 x i64> %v2,
+                                          <vscale x 2 x i64> %v3,
+                                          <vscale x 2 x i1> %pred,
+                                          <vscale x 2 x i64>* %base)
+  ret void
+}
+
+define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, <vscale x 2 x double>* %addr) {
+; CHECK-LABEL: st4d_f64:
+; CHECK: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, #28, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %addr, i64 28
+  call void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double> %v0,
+                                          <vscale x 2 x double> %v1,
+                                          <vscale x 2 x double> %v2,
+                                          <vscale x 2 x double> %v3,
+                                          <vscale x 2 x i1> %pred,
+                                          <vscale x 2 x double>* %base)
+  ret void
+}
+
+declare void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>*)
+declare void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>*)
+declare void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>*)
+declare void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>*)
+declare void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>*)
+declare void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>*)
+declare void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x double>*)
+
+declare void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>*)
+declare void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>*)
+declare void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>*)
+declare void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>*)
+declare void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>*)
+declare void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>*)
+declare void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x double>*)
+
+declare void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>*)
+declare void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>*)
+declare void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>*)
+declare void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>*)
+declare void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>*)
+declare void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>*)
+declare void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x double>*)
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll
new file mode 100644
index 000000000000..4945fdca9498
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll
@@ -0,0 +1,367 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=0 < %s | FileCheck %s
+
+;
+; ST2B
+;
+
+define void @st2b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, i8* %addr, i64 %offset) {
+; CHECK-LABEL: st2b_i8:
+; CHECK: st2b { z0.b, z1.b }, p0, [x0, x1]
+; CHECK-NEXT: ret
+  %1 = getelementptr i8, i8* %addr, i64 %offset
+  %base = bitcast i8* %1 to <vscale x 16 x i8>*
+  call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+;
+; ST2H
+;
+
+define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, i16* %addr, i64 %offset) {
+; CHECK-LABEL: st2h_i16:
+; CHECK: st2h { z0.h, z1.h }, p0, [x0, x1, lsl #1]
+; CHECK-NEXT: ret
+  %1 = getelementptr i16, i16* %addr, i64 %offset
+  %base = bitcast i16* %1 to <vscale x 8 x i16>*
+  call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> %v0,
+                                          <vscale x 8 x i16> %v1,
+                                          <vscale x 8 x i1> %pred,
+                                          <vscale x 8 x i16>* %base)
+  ret void
+}
+
+define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, half* %addr, i64 %offset) {
+; CHECK-LABEL: st2h_f16:
+; CHECK: st2h { z0.h, z1.h }, p0, [x0, x1, lsl #1]
+; CHECK-NEXT: ret
+  %1 = getelementptr half, half* %addr, i64 %offset
+  %base = bitcast half* %1 to <vscale x 8 x half>*
+  call void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half> %v0,
+                                          <vscale x 8 x half> %v1,
+                                          <vscale x 8 x i1> %pred,
+                                          <vscale x 8 x half>* %base)
+  ret void
+}
+
+;
+; ST2W
+;
+
+define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, i32* %addr, i64 %offset) {
+; CHECK-LABEL: st2w_i32:
+; CHECK: st2w { z0.s, z1.s }, p0, [x0, x1, lsl #2]
+; CHECK-NEXT: ret
+  %1 = getelementptr i32, i32* %addr, i64 %offset
+  %base = bitcast i32* %1 to <vscale x 4 x i32>*
+  call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> %v0,
+                                          <vscale x 4 x i32> %v1,
+                                          <vscale x 4 x i1> %pred,
+                                          <vscale x 4 x i32>* %base)
+  ret void
+}
+
+define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, float* %addr, i64 %offset) {
+; CHECK-LABEL: st2w_f32:
+; CHECK: st2w { z0.s, z1.s }, p0, [x0, x1, lsl #2]
+; CHECK-NEXT: ret
+  %1 = getelementptr float, float* %addr, i64 %offset
+  %base = bitcast float* %1 to <vscale x 4 x float>*
+  call void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float> %v0,
+                                          <vscale x 4 x float> %v1,
+                                          <vscale x 4 x i1> %pred,
+                                          <vscale x 4 x float>* %base)
+  ret void
+}
+
+;
+; ST2D
+;
+
+define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, i64* %addr, i64 %offset) {
+; CHECK-LABEL: st2d_i64:
+; CHECK: st2d { z0.d, z1.d }, p0, [x0, x1, lsl #3]
+; CHECK-NEXT: ret
+  %1 = getelementptr i64, i64* %addr, i64 %offset
+  %base = bitcast i64* %1 to <vscale x 2 x i64>*
+  call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> %v0,
+                                          <vscale x 2 x i64> %v1,
+                                          <vscale x 2 x i1> %pred,
+                                          <vscale x 2 x i64>* %base)
+  ret void
+}
+
+define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, double* %addr, i64 %offset) {
+; CHECK-LABEL: st2d_f64:
+; CHECK: st2d { z0.d, z1.d }, p0, [x0, x1, lsl #3]
+; CHECK-NEXT: ret
+  %1 = getelementptr double, double* %addr, i64 %offset
+  %base = bitcast double* %1 to <vscale x 2 x double>*
+  call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> %v0,
+                                          <vscale x 2 x double> %v1,
+                                          <vscale x 2 x i1> %pred,
+                                          <vscale x 2 x double>* %base)
+  ret void
+}
+
+;
+; ST3B
+;
+
+define void @st3b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, i8* %addr, i64 %offset) {
+; CHECK-LABEL: st3b_i8:
+; CHECK: st3b { z0.b, z1.b, z2.b }, p0, [x0, x1]
+; CHECK-NEXT: ret
+  %1 = getelementptr i8, i8* %addr, i64 %offset
+  %base = bitcast i8* %1 to <vscale x 16 x i8>*
+  call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i8> %v2,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+;
+; ST3H
+;
+
+define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i1> %pred, i16* %addr, i64 %offset) {
+; CHECK-LABEL: st3h_i16:
+; CHECK: st3h { z0.h, z1.h, z2.h }, p0, [x0, x1, lsl #1]
+; CHECK-NEXT: ret
+  %1 = getelementptr i16, i16* %addr, i64 %offset
+  %base = bitcast i16* %1 to <vscale x 8 x i16>*
+  call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> %v0,
+                                          <vscale x 8 x i16> %v1,
+                                          <vscale x 8 x i16> %v2,
+                                          <vscale x 8 x i1> %pred,
+                                          <vscale x 8 x i16>* %base)
+  ret void
+}
+
+define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, half* %addr, i64 %offset) {
+; CHECK-LABEL: st3h_f16:
+; CHECK: st3h { z0.h, z1.h, z2.h }, p0, [x0, x1, lsl #1]
+; CHECK-NEXT: ret
+  %1 = getelementptr half, half* %addr, i64 %offset
+  %base = bitcast half* %1 to <vscale x 8 x half>*
+  call void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half> %v0,
+                                          <vscale x 8 x half> %v1,
+                                          <vscale x 8 x half> %v2,
+                                          <vscale x 8 x i1> %pred,
+                                          <vscale x 8 x half>* %base)
+  ret void
+}
+
+;
+; ST3W
+;
+
+define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, i32* %addr, i64 %offset) {
+; CHECK-LABEL: st3w_i32:
+; CHECK: st3w { z0.s, z1.s, z2.s }, p0, [x0, x1, lsl #2]
+; CHECK-NEXT: ret
+  %1 = getelementptr i32, i32* %addr, i64 %offset
+  %base = bitcast i32* %1 to <vscale x 4 x i32>*
+  call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> %v0,
+                                          <vscale x 4 x i32> %v1,
+                                          <vscale x 4 x i32> %v2,
+                                          <vscale x 4 x i1> %pred,
+                                          <vscale x 4 x i32>* %base)
+  ret void
+}
+
+define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, float* %addr, i64 %offset) {
+; CHECK-LABEL: st3w_f32:
+; CHECK: st3w { z0.s, z1.s, z2.s }, p0, [x0, x1, lsl #2]
+; CHECK-NEXT: ret
+  %1 = getelementptr float, float* %addr, i64 %offset
+  %base = bitcast float* %1 to <vscale x 4 x float>*
+  call void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float> %v0,
+                                          <vscale x 4 x float> %v1,
+                                          <vscale x 4 x float> %v2,
+                                          <vscale x 4 x i1> %pred,
+                                          <vscale x 4 x float>* %base)
+  ret void
+}
+
+;
+; ST3D
+;
+
+define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, i64* %addr, i64 %offset) {
+; CHECK-LABEL: st3d_i64:
+; CHECK: st3d { z0.d, z1.d, z2.d }, p0, [x0, x1, lsl #3]
+; CHECK-NEXT: ret
+  %1 = getelementptr i64, i64* %addr, i64 %offset
+  %base = bitcast i64* %1 to <vscale x 2 x i64>*
+  call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> %v0,
+                                          <vscale x 2 x i64> %v1,
+                                          <vscale x 2 x i64> %v2,
+                                          <vscale x 2 x i1> %pred,
+                                          <vscale x 2 x i64>* %base)
+  ret void
+}
+
+define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, double* %addr, i64 %offset) {
+; CHECK-LABEL: st3d_f64:
+; CHECK: st3d { z0.d, z1.d, z2.d }, p0, [x0, x1, lsl #3]
+; CHECK-NEXT: ret
+  %1 = getelementptr double, double* %addr, i64 %offset
+  %base = bitcast double* %1 to <vscale x 2 x double>*
+  call void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double> %v0,
+                                          <vscale x 2 x double> %v1,
+                                          <vscale x 2 x double> %v2,
+                                          <vscale x 2 x i1> %pred,
+                                          <vscale x 2 x double>* %base)
+  ret void
+}
+
+;
+; ST4B
+;
+
+define void @st4b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, i8* %addr, i64 %offset) {
+; CHECK-LABEL: st4b_i8:
+; CHECK: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x1]
+; CHECK-NEXT: ret
+  %1 = getelementptr i8, i8* %addr, i64 %offset
+  %base = bitcast i8* %1 to <vscale x 16 x i8>*
+  call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i8> %v2,
+                                          <vscale x 16 x i8> %v3,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+;
+; ST4H
+;
+
+define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3, <vscale x 8 x i1> %pred, i16* %addr, i64 %offset) {
+; CHECK-LABEL: st4h_i16:
+; CHECK: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, x1, lsl #1]
+; CHECK-NEXT: ret
+  %1 = getelementptr i16, i16* %addr, i64 %offset
+  %base = bitcast i16* %1 to <vscale x 8 x i16>*
+  call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> %v0,
+                                          <vscale x 8 x i16> %v1,
+                                          <vscale x 8 x i16> %v2,
+                                          <vscale x 8 x i16> %v3,
+                                          <vscale x 8 x i1> %pred,
+                                          <vscale x 8 x i16>* %base)
+  ret void
+}
+
+define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, half* %addr, i64 %offset) {
+; CHECK-LABEL: st4h_f16:
+; CHECK: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, x1, lsl #1]
+; CHECK-NEXT: ret
+  %1 = getelementptr half, half* %addr, i64 %offset
+  %base = bitcast half* %1 to <vscale x 8 x half>*
+  call void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half> %v0,
+                                          <vscale x 8 x half> %v1,
+                                          <vscale x 8 x half> %v2,
+                                          <vscale x 8 x half> %v3,
+                                          <vscale x 8 x i1> %pred,
+                                          <vscale x 8 x half>* %base)
+  ret void
+}
+
+;
+; ST4W
+;
+
+define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, i32* %addr, i64 %offset) {
+; CHECK-LABEL: st4w_i32:
+; CHECK: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, x1, lsl #2]
+; CHECK-NEXT: ret
+  %1 = getelementptr i32, i32* %addr, i64 %offset
+  %base = bitcast i32* %1 to <vscale x 4 x i32>*
+  call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> %v0,
+                                          <vscale x 4 x i32> %v1,
+                                          <vscale x 4 x i32> %v2,
+                                          <vscale x 4 x i32> %v3,
+                                          <vscale x 4 x i1> %pred,
+                                          <vscale x 4 x i32>* %base)
+  ret void
+}
+
+define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x float> %v3, <vscale x 4 x i1> %pred, float* %addr, i64 %offset) {
+; CHECK-LABEL: st4w_f32:
+; CHECK: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, x1, lsl #2]
+; CHECK-NEXT: ret
+  %1 = getelementptr float, float* %addr, i64 %offset
+  %base = bitcast float* %1 to <vscale x 4 x float>*
+  call void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float> %v0,
+                                          <vscale x 4 x float> %v1,
+                                          <vscale x 4 x float> %v2,
+                                          <vscale x 4 x float> %v3,
+                                          <vscale x 4 x i1> %pred,
+                                          <vscale x 4 x float>* %base)
+  ret void
+}
+
+;
+; ST4D
+;
+
+define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, i64* %addr, i64 %offset) {
+; CHECK-LABEL: st4d_i64:
+; CHECK: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, x1, lsl #3]
+; CHECK-NEXT: ret
+  %1 = getelementptr i64, i64* %addr, i64 %offset
+  %base = bitcast i64* %1 to <vscale x 2 x i64>*
+  call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> %v0,
+                                          <vscale x 2 x i64> %v1,
+                                          <vscale x 2 x i64> %v2,
+                                          <vscale x 2 x i64> %v3,
+                                          <vscale x 2 x i1> %pred,
+                                          <vscale x 2 x i64>* %base)
+  ret void
+}
+
+define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, double* %addr, i64 %offset) {
+; CHECK-LABEL: st4d_f64:
+; CHECK: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, x1, lsl #3]
+; CHECK-NEXT: ret
+  %1 = getelementptr double, double* %addr, i64 %offset
+  %base = bitcast double* %1 to <vscale x 2 x double>*
+  call void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double> %v0,
+                                          <vscale x 2 x double> %v1,
+                                          <vscale x 2 x double> %v2,
+                                          <vscale x 2 x double> %v3,
+                                          <vscale x 2 x i1> %pred,
+                                          <vscale x 2 x double>* %base)
+  ret void
+}
+
+declare void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>*)
+declare void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>*)
+declare void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>*)
+declare void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>*)
+declare void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>*)
+declare void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>*)
+declare void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x double>*)
+
+declare void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>*)
+declare void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>*)
+declare void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>*)
+declare void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>*)
+declare void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>*)
+declare void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>*)
+declare void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x double>*)
+
+declare void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>*)
+declare void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>*)
+declare void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>*)
+declare void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>*)
+declare void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>*)
+declare void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>*)
+declare void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x double>*)

From fde2aefa22b9be803628888a21067288c8e2636d Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Date: Wed, 15 Apr 2020 15:40:13 -0700
Subject: [PATCH 202/216] [AMDGPU] Use SDWA for 16 bit subreg copy

This simplifies the logic and allows to use it on GFX8.

Differential Revision: https://reviews.llvm.org/D78150
---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp        | 65 ++++---------------
 .../CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir | 55 +++++++---------
 2 files changed, 36 insertions(+), 84 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 6e5907b12510..92d0440a5806 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -683,16 +683,6 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     assert(AMDGPU::VGPR_LO16RegClass.contains(SrcReg) ||
            AMDGPU::VGPR_HI16RegClass.contains(SrcReg));
 
-    //          d          s
-    // l -> l : hhhhxxxx : xxxxllll -> v_alignbyte_b32 d, s, d, 2
-    //          llllhhhh : xxxxllll -> v_alignbyte_b32 d, d, d, 2
-    // l -> h : xxxxllll : xxxxhhhh -> v_lshlrev_b32 d, 16, d
-    //          llll0000 : xxxxhhhh -> v_alignbyte_b32 d, s, d, 2
-    // h -> l : hhhhxxxx : llllxxxx -> v_lshrrev_b32 d, 16, d
-    //          0000hhhh : llllxxxx -> v_alignbyte_b32 d, d, s, 2
-    // h -> h : xxxxllll : hhhhxxxx -> v_alignbyte_b32 d, d, s, 2
-    //          llllhhhh : hhhhxxxx -> v_alignbyte_b32 d, d, d, 2
-
     bool DstLow = RC == &AMDGPU::VGPR_LO16RegClass;
     bool SrcLow = AMDGPU::VGPR_LO16RegClass.contains(SrcReg);
     DestReg = RI.getMatchingSuperReg(DestReg,
@@ -702,49 +692,18 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                                     SrcLow ? AMDGPU::lo16 : AMDGPU::hi16,
                                     &AMDGPU::VGPR_32RegClass);
 
-    if (DestReg == SrcReg) {
-      // l -> h : v_pk_add_u16 v1, v1, 0 op_sel_hi:[0,0]
-      // h -> l : v_pk_add_u16 v1, v1, 0 op_sel:[1,0] op_sel_hi:[1,0]
-      if (DstLow == SrcLow)
-        return;
-      BuildMI(MBB, MI, DL, get(AMDGPU::V_PK_ADD_U16), DestReg)
-        .addImm(DstLow ? SISrcMods::OP_SEL_0 | SISrcMods::OP_SEL_1 : 0)
-        .addReg(DestReg, RegState::Undef)
-        .addImm(0) // src1_mod
-        .addImm(0) // src1
-        .addImm(0)
-        .addImm(0)
-        .addImm(0)
-        .addImm(0)
-        .addImm(0);
-
-      return;
-    }
-
-    // Last instruction first:
-    auto Last = BuildMI(MBB, MI, DL, get(AMDGPU::V_ALIGNBYTE_B32), DestReg)
-      .addReg((SrcLow && !DstLow) ? SrcReg : DestReg,
-              (SrcLow && !DstLow) ? getKillRegState(KillSrc) : 0)
-      .addReg((!SrcLow && DstLow) ? SrcReg : DestReg,
-              (!SrcLow && DstLow) ? getKillRegState(KillSrc) : 0)
-      .addImm(2);
-
-    unsigned OpcFirst = (DstLow == SrcLow) ? AMDGPU::V_ALIGNBYTE_B32
-                                           : SrcLow ? AMDGPU::V_LSHRREV_B32_e32
-                                                    : AMDGPU::V_LSHLREV_B32_e32;
-    auto First = BuildMI(MBB, &*Last, DL, get(OpcFirst), DestReg);
-    if (DstLow == SrcLow) { // alignbyte
-      First
-          .addReg(SrcLow ? SrcReg : DestReg,
-                  SrcLow ? getKillRegState(KillSrc) : unsigned(RegState::Undef))
-          .addReg(SrcLow ? DestReg : SrcReg,
-                  SrcLow ? unsigned(RegState::Undef) : getKillRegState(KillSrc))
-          .addImm(2);
-    } else {
-      First.addImm(16)
-           .addReg(DestReg, RegState::Undef);
-    }
-
+    auto MIB = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_sdwa), DestReg)
+      .addImm(0) // src0_modifiers
+      .addReg(SrcReg)
+      .addImm(0) // clamp
+      .addImm(DstLow ? AMDGPU::SDWA::SdwaSel::WORD_0
+                     : AMDGPU::SDWA::SdwaSel::WORD_1)
+      .addImm(AMDGPU::SDWA::DstUnused::UNUSED_PRESERVE)
+      .addImm(SrcLow ? AMDGPU::SDWA::SdwaSel::WORD_0
+                     : AMDGPU::SDWA::SdwaSel::WORD_1)
+      .addReg(DestReg, RegState::Implicit | RegState::Undef);
+    // First implicit operand is $exec.
+    MIB->tieOperands(0, MIB->getNumOperands() - 1);
     return;
   }
 
diff --git a/llvm/test/CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir b/llvm/test/CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir
index 79d2f48421fd..f5c507be361d 100644
--- a/llvm/test/CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir
@@ -1,8 +1,9 @@
+# RUN: llc -march=amdgcn -mcpu=gfx802 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
 # RUN: llc -march=amdgcn -mcpu=gfx900 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - | FileCheck -check-prefixes=GCN,GFX10 %s
 
 # GCN-LABEL: {{^}}lo_to_lo:
-# GCN:      v_alignbyte_b32 v1, v0, v1, 2
-# GCN-NEXT: v_alignbyte_b32 v1, v1, v1, 2
+# GCN: v_mov_b32_sdwa v1, v0 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
 name: lo_to_lo
 tracksRegLiveness: true
 body:             |
@@ -13,8 +14,7 @@ body:             |
 ...
 
 # GCN-LABEL: {{^}}lo_to_hi:
-# GCN:      v_lshrrev_b32_e32 v1, 16, v1
-# GCN-NEXT: v_alignbyte_b32 v1, v0, v1, 2
+# GCN: v_mov_b32_sdwa v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
 name: lo_to_hi
 tracksRegLiveness: true
 body:             |
@@ -25,8 +25,7 @@ body:             |
 ...
 
 # GCN-LABEL: {{^}}hi_to_lo:
-# GCN:      v_lshlrev_b32_e32 v1, 16, v1
-# GCN-NEXT: v_alignbyte_b32 v1, v1, v0, 2
+# GCN: v_mov_b32_sdwa v1, v0 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1
 name: hi_to_lo
 tracksRegLiveness: true
 body:             |
@@ -37,8 +36,7 @@ body:             |
 ...
 
 # GCN-LABEL: {{^}}hi_to_hi:
-# GCN:      v_alignbyte_b32 v1, v1, v0, 2
-# GCN-NEXT: v_alignbyte_b32 v1, v1, v1, 2
+# GCN: v_mov_b32_sdwa v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1
 name: hi_to_hi
 tracksRegLiveness: true
 body:             |
@@ -49,8 +47,9 @@ body:             |
 ...
 
 # GCN-LABEL: {{^}}lo_to_lo_samereg:
-# GCN:      s_waitcnt
-# GCN-NEXT: s_endpgm
+# GCN:        s_waitcnt
+# GFX10-NEXT: s_waitcnt_vscnt
+# GCN-NEXT:   s_endpgm
 name: lo_to_lo_samereg
 tracksRegLiveness: true
 body:             |
@@ -61,7 +60,7 @@ body:             |
 ...
 
 # GCN-LABEL: {{^}}lo_to_hi_samereg:
-# GCN: v_pk_add_u16 v0, v0, 0 op_sel_hi:[0,0]
+# GCN: v_mov_b32_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
 name: lo_to_hi_samereg
 tracksRegLiveness: true
 body:             |
@@ -72,7 +71,7 @@ body:             |
 ...
 
 # GCN-LABEL: {{^}}hi_to_lo_samereg:
-# GCN: v_pk_add_u16 v0, v0, 0 op_sel:[1,0] op_sel_hi:[1,0]
+# GCN: v_mov_b32_sdwa v0, v0 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1
 name: hi_to_lo_samereg
 tracksRegLiveness: true
 body:             |
@@ -84,6 +83,7 @@ body:             |
 
 # GCN-LABEL: {{^}}hi_to_hi_samereg:
 # GCN:      s_waitcnt
+# GFX10-NEXT: s_waitcnt_vscnt
 # GCN-NEXT: s_endpgm
 name: hi_to_hi_samereg
 tracksRegLiveness: true
@@ -95,8 +95,7 @@ body:             |
 ...
 
 # GCN-LABEL: {{^}}lo_to_lo_def_livein:
-# GCN:      v_alignbyte_b32 v1, v0, v1, 2
-# GCN-NEXT: v_alignbyte_b32 v1, v1, v1, 2
+# GCN: v_mov_b32_sdwa v1, v0 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
 name: lo_to_lo_def_livein
 tracksRegLiveness: true
 body:             |
@@ -109,8 +108,7 @@ body:             |
 ...
 
 # GCN-LABEL: {{^}}lo_to_hi_def_livein:
-# GCN:      v_lshrrev_b32_e32 v1, 16, v1
-# GCN-NEXT: v_alignbyte_b32 v1, v0, v1, 2
+# GCN: v_mov_b32_sdwa v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
 name: lo_to_hi_def_livein
 tracksRegLiveness: true
 body:             |
@@ -123,8 +121,7 @@ body:             |
 ...
 
 # GCN-LABEL: {{^}}hi_to_lo_def_livein:
-# GCN:      v_lshlrev_b32_e32 v1, 16, v1
-# GCN-NEXT: v_alignbyte_b32 v1, v1, v0, 2
+# GCN: v_mov_b32_sdwa v1, v0 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1
 name: hi_to_lo_def_livein
 tracksRegLiveness: true
 body:             |
@@ -137,8 +134,7 @@ body:             |
 ...
 
 # GCN-LABEL: {{^}}hi_to_hi_def_livein:
-# GCN:      v_alignbyte_b32 v1, v1, v0, 2
-# GCN-NEXT: v_alignbyte_b32 v1, v1, v1, 2
+# GCN: v_mov_b32_sdwa v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1
 name: hi_to_hi_def_livein
 tracksRegLiveness: true
 body:             |
@@ -152,10 +148,8 @@ body:             |
 
 # TODO: This can be coalesced into a VGPR_32 copy
 # GCN-LABEL: {{^}}lo_to_lo_hi_to_hi:
-# GCN:      v_alignbyte_b32 v1, v0, v1, 2
-# GCN-NEXT: v_alignbyte_b32 v1, v1, v1, 2
-# GCN-NEXT: v_alignbyte_b32 v1, v1, v0, 2
-# GCN-NEXT: v_alignbyte_b32 v1, v1, v1, 2
+# GCN:      v_mov_b32_sdwa v1, v0 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+# GCN-NEXT: v_mov_b32_sdwa v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1
 # GCN-NEXT: v_mov_b32_e32 v2, v1
 # GCN-NEXT: s_endpgm
 name: lo_to_lo_hi_to_hi
@@ -170,10 +164,8 @@ body:             |
 ...
 
 # GCN-LABEL: {{^}}lo_to_hi_hi_to_lo:
-# GCN:      v_lshlrev_b32_e32 v1, 16, v1
-# GCN-NEXT: v_alignbyte_b32 v1, v1, v0, 2
-# GCN-NEXT: v_lshrrev_b32_e32 v1, 16, v1
-# GCN-NEXT: v_alignbyte_b32 v1, v0, v1, 2
+# GCN:      v_mov_b32_sdwa v1, v0 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_1
+# GCN-NEXT: v_mov_b32_sdwa v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
 # GCN-NEXT: v_mov_b32_e32 v2, v1
 # GCN-NEXT: s_endpgm
 name: lo_to_hi_hi_to_lo
@@ -189,9 +181,10 @@ body:             |
 
 # NB: copy of undef just killed instead of expansion
 # GCN-LABEL: {{^}}lo_to_lo_undef:
-# GCN:      s_waitcnt
-# GCN-NEXT: v_mov_b32_e32 v2, v1
-# GCN-NEXT: s_endpgm
+# GCN:        s_waitcnt
+# GFX10-NEXT: s_waitcnt_vscnt
+# GCN-NEXT:   v_mov_b32_e32 v2, v1
+# GCN-NEXT:   s_endpgm
 name: lo_to_lo_undef
 tracksRegLiveness: true
 body:             |

From 7d4546e3cf410cc5f9295ed57a8462ccbb5aa2a6 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne@apple.com>
Date: Fri, 17 Apr 2020 14:44:22 -0400
Subject: [PATCH 203/216] [libc++] Split features for platform detection into
 its own function

This will allow refactoring how the locales are figured out more easily.
---
 libcxx/utils/libcxx/test/config.py      | 6 ++----
 libcxx/utils/libcxx/test/target_info.py | 7 +++++++
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/libcxx/utils/libcxx/test/config.py b/libcxx/utils/libcxx/test/config.py
index 92833ceca567..c31a47fb4f5d 100644
--- a/libcxx/utils/libcxx/test/config.py
+++ b/libcxx/utils/libcxx/test/config.py
@@ -355,8 +355,6 @@ def configure_features(self):
                 self.config.available_features.add(f.strip())
         self.target_info.add_locale_features(self.config.available_features)
 
-        target_platform = self.target_info.platform()
-
         # Write an "available feature" that combines the triple when
         # use_system_cxx_lib is enabled. This is so that we can easily write
         # XFAIL markers for tests that are known to fail with versions of
@@ -379,8 +377,8 @@ def configure_features(self):
             self.config.available_features.add('availability=%s' % name)
             self.config.available_features.add('availability=%s%s' % (name, version))
 
-        # Insert the platform name into the available features as a lower case.
-        self.config.available_features.add(target_platform)
+        # Insert the platform name and version into the available features.
+        self.target_info.add_platform_features(self.config.available_features)
 
         # Simulator testing can take a really long time for some of these tests
         # so add a feature check so we can REQUIRES: long_tests in them
diff --git a/libcxx/utils/libcxx/test/target_info.py b/libcxx/utils/libcxx/test/target_info.py
index 00af3b8a56e5..caa161ec93be 100644
--- a/libcxx/utils/libcxx/test/target_info.py
+++ b/libcxx/utils/libcxx/test/target_info.py
@@ -40,6 +40,9 @@ def configure_env(self, env): pass
     def allow_cxxabi_link(self): return True
     def use_lit_shell_default(self): return False
 
+    def add_platform_features(self, features):
+        features.add(self.platform())
+
     def add_path(self, dest_env, new_path):
         if not new_path:
             return
@@ -229,6 +232,10 @@ def platform_ver(self):
 
     def add_locale_features(self, features):
         self.add_common_locales(features)
+
+    def add_platform_features(self, features):
+        super(LinuxLocalTI, self).add_platform_features(features)
+
         # Some linux distributions have different locale data than others.
         # Insert the distributions name and name-version into the available
         # features to allow tests to XFAIL on them.

From 4623c2ffa4cf39bd94295e8ad1b8ebaaf01a16cc Mon Sep 17 00:00:00 2001
From: Eli Friedman <efriedma@quicinc.com>
Date: Thu, 16 Apr 2020 14:32:15 -0700
Subject: [PATCH 204/216] Fix interaction of static plugins with
 -DLLVM_LINK_LLVM_DYLIB=ON.

We should link static plugins into libLLVM.so; they shouldn't depend on
libLLVM.so.

Fixes https://bugs.llvm.org/show_bug.cgi?id=45571

Differential Revision: https://reviews.llvm.org/D78332
---
 llvm/cmake/modules/AddLLVM.cmake | 2 +-
 polly/lib/CMakeLists.txt         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake
index 633a3b0c0514..625d1c74458b 100644
--- a/llvm/cmake/modules/AddLLVM.cmake
+++ b/llvm/cmake/modules/AddLLVM.cmake
@@ -883,7 +883,7 @@ function(add_llvm_pass_plugin name)
     list(REMOVE_ITEM ARG_UNPARSED_ARGUMENTS BUILDTREE_ONLY)
     # process_llvm_pass_plugins takes care of the actual linking, just create an
     # object library as of now
-    add_llvm_library(${name} OBJECT ${ARG_UNPARSED_ARGUMENTS})
+    add_llvm_component_library(${name} OBJECT ${ARG_UNPARSED_ARGUMENTS})
     target_compile_definitions(${name} PRIVATE LLVM_${name_upper}_LINK_INTO_TOOLS)
     set_property(TARGET ${name} APPEND PROPERTY COMPILE_DEFINITIONS LLVM_LINK_INTO_TOOLS)
     if (TARGET intrinsics_gen)
diff --git a/polly/lib/CMakeLists.txt b/polly/lib/CMakeLists.txt
index 35614973a5dd..e754a3103a12 100644
--- a/polly/lib/CMakeLists.txt
+++ b/polly/lib/CMakeLists.txt
@@ -107,7 +107,7 @@ if (GPU_CODEGEN)
   llvm_map_components_to_libnames(nvptx_libs NVPTX)
 endif ()
 
-if (LLVM_LINK_LLVM_DYLIB)
+if (LLVM_LINK_LLVM_DYLIB AND NOT LLVM_POLLY_LINK_INTO_TOOLS)
   # The shlib/dylib contains all the LLVM components
   # (including NVPTX is enabled) already. Adding them to target_link_libraries
   # would cause them being twice in the address space

From 992fbce4e9b034e752dcb4e1be0306b49bbc6b19 Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Date: Wed, 15 Apr 2020 16:16:13 -0700
Subject: [PATCH 205/216] [AMDGPU] copyPhysReg() for 16 bit SGPR subregs

Differential Revision: https://reviews.llvm.org/D78255
---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp        | 69 ++++++++++++++-----
 .../CodeGen/AMDGPU/lo16-hi16-illegal-copy.mir | 31 +++++++++
 .../CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir | 11 +++
 .../AMDGPU/lo16-lo16-physreg-copy-sgpr.mir    | 26 +++++++
 4 files changed, 118 insertions(+), 19 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/lo16-hi16-illegal-copy.mir
 create mode 100644 llvm/test/CodeGen/AMDGPU/lo16-lo16-physreg-copy-sgpr.mir

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 92d0440a5806..60569dfbbe1d 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -510,11 +510,10 @@ bool SIInstrInfo::shouldScheduleLoadsNear(SDNode *Load0, SDNode *Load1,
 static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator MI,
                               const DebugLoc &DL, MCRegister DestReg,
-                              MCRegister SrcReg, bool KillSrc) {
+                              MCRegister SrcReg, bool KillSrc,
+                              const char *Msg = "illegal SGPR to VGPR copy") {
   MachineFunction *MF = MBB.getParent();
-  DiagnosticInfoUnsupported IllegalCopy(MF->getFunction(),
-                                        "illegal SGPR to VGPR copy",
-                                        DL, DS_Error);
+  DiagnosticInfoUnsupported IllegalCopy(MF->getFunction(), Msg, DL, DS_Error);
   LLVMContext &C = MF->getFunction().getContext();
   C.diagnose(IllegalCopy);
 
@@ -679,29 +678,61 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     return;
   }
 
-  if (RC == &AMDGPU::VGPR_LO16RegClass || RC == &AMDGPU::VGPR_HI16RegClass) {
+  if (RC == &AMDGPU::VGPR_LO16RegClass || RC == &AMDGPU::VGPR_HI16RegClass ||
+      RC == &AMDGPU::SGPR_LO16RegClass) {
     assert(AMDGPU::VGPR_LO16RegClass.contains(SrcReg) ||
-           AMDGPU::VGPR_HI16RegClass.contains(SrcReg));
-
-    bool DstLow = RC == &AMDGPU::VGPR_LO16RegClass;
-    bool SrcLow = AMDGPU::VGPR_LO16RegClass.contains(SrcReg);
-    DestReg = RI.getMatchingSuperReg(DestReg,
-                                     DstLow ? AMDGPU::lo16 : AMDGPU::hi16,
-                                     &AMDGPU::VGPR_32RegClass);
-    SrcReg = RI.getMatchingSuperReg(SrcReg,
-                                    SrcLow ? AMDGPU::lo16 : AMDGPU::hi16,
-                                    &AMDGPU::VGPR_32RegClass);
-
-    auto MIB = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_sdwa), DestReg)
+           AMDGPU::VGPR_HI16RegClass.contains(SrcReg) ||
+           AMDGPU::SGPR_LO16RegClass.contains(SrcReg));
+
+    bool IsSGPRDst = AMDGPU::SGPR_LO16RegClass.contains(DestReg);
+    bool IsSGPRSrc = AMDGPU::SGPR_LO16RegClass.contains(SrcReg);
+    bool DstLow = (RC == &AMDGPU::VGPR_LO16RegClass ||
+                   RC == &AMDGPU::SGPR_LO16RegClass);
+    bool SrcLow = AMDGPU::VGPR_LO16RegClass.contains(SrcReg) ||
+                  AMDGPU::SGPR_LO16RegClass.contains(SrcReg);
+    const TargetRegisterClass *DstRC = IsSGPRDst ? &AMDGPU::SGPR_32RegClass
+                                                 : &AMDGPU::VGPR_32RegClass;
+    const TargetRegisterClass *SrcRC = IsSGPRSrc ? &AMDGPU::SGPR_32RegClass
+                                                 : &AMDGPU::VGPR_32RegClass;
+    MCRegister NewDestReg =
+      RI.getMatchingSuperReg(DestReg, DstLow ? AMDGPU::lo16 : AMDGPU::hi16,
+                             DstRC);
+    MCRegister NewSrcReg =
+      RI.getMatchingSuperReg(SrcReg, SrcLow ? AMDGPU::lo16 : AMDGPU::hi16,
+                             SrcRC);
+
+    if (IsSGPRDst) {
+      if (!IsSGPRSrc) {
+        reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc);
+        return;
+      }
+
+      BuildMI(MBB, MI, DL, get(AMDGPU::S_MOV_B32), NewDestReg)
+        .addReg(NewSrcReg, getKillRegState(KillSrc));
+      return;
+    }
+
+    if (IsSGPRSrc && !ST.hasSDWAScalar()) {
+      if (!DstLow || !SrcLow) {
+        reportIllegalCopy(this, MBB, MI, DL, DestReg, SrcReg, KillSrc,
+                          "Cannot use hi16 subreg on VI!");
+      }
+
+      BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_e32), NewDestReg)
+        .addReg(NewSrcReg, getKillRegState(KillSrc));
+      return;
+    }
+
+    auto MIB = BuildMI(MBB, MI, DL, get(AMDGPU::V_MOV_B32_sdwa), NewDestReg)
       .addImm(0) // src0_modifiers
-      .addReg(SrcReg)
+      .addReg(NewSrcReg)
       .addImm(0) // clamp
       .addImm(DstLow ? AMDGPU::SDWA::SdwaSel::WORD_0
                      : AMDGPU::SDWA::SdwaSel::WORD_1)
       .addImm(AMDGPU::SDWA::DstUnused::UNUSED_PRESERVE)
       .addImm(SrcLow ? AMDGPU::SDWA::SdwaSel::WORD_0
                      : AMDGPU::SDWA::SdwaSel::WORD_1)
-      .addReg(DestReg, RegState::Implicit | RegState::Undef);
+      .addReg(NewDestReg, RegState::Implicit | RegState::Undef);
     // First implicit operand is $exec.
     MIB->tieOperands(0, MIB->getNumOperands() - 1);
     return;
diff --git a/llvm/test/CodeGen/AMDGPU/lo16-hi16-illegal-copy.mir b/llvm/test/CodeGen/AMDGPU/lo16-hi16-illegal-copy.mir
new file mode 100644
index 000000000000..40bfd60e8ccf
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/lo16-hi16-illegal-copy.mir
@@ -0,0 +1,31 @@
+# RUN: not llc -march=amdgcn -mcpu=gfx802 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - 2>&1 | FileCheck -check-prefixes=ERR,GFX8-ERR %s
+# RUN: not llc -march=amdgcn -mcpu=gfx802 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - 2>&1 | FileCheck -check-prefix=GCN %s
+# RUN: not llc -march=amdgcn -mcpu=gfx900 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - 2>&1 | FileCheck -check-prefix=ERR %s
+# RUN: not llc -march=amdgcn -mcpu=gfx900 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - 2>&1 | FileCheck -check-prefixes=GCN,GFX9 %s
+
+# Note: GFX8 did not allow SDWA SGPR sources. Therefor no HI16 subregs can be used there.
+
+# GCN-LABEL: {{^}}lo_to_lo_illegal_vgpr_to_sgpr:
+# GCN: ; illegal copy v0.l to s1.l
+# ERR: error: <unknown>:0:0: in function lo_to_lo_illegal_vgpr_to_sgpr void (): illegal SGPR to VGPR copy
+name: lo_to_lo_illegal_vgpr_to_sgpr
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    $vgpr0 = IMPLICIT_DEF
+    $sgpr1_lo16 = COPY $vgpr0_lo16
+    S_ENDPGM 0
+...
+
+# GCN-LABEL: {{^}}lo_to_hi_sgpr_to_vgpr:
+# GFX8: ; illegal copy s0.l to v1.h
+# GFX9: v_mov_b32_sdwa v1, s0 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+# GFX8-ERR: error: <unknown>:0:0: in function lo_to_hi_sgpr_to_vgpr void (): Cannot use hi16 subreg on VI!
+name: lo_to_hi_sgpr_to_vgpr
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    $sgpr0 = IMPLICIT_DEF
+    $vgpr1_hi16 = COPY killed $sgpr0_lo16
+    S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir b/llvm/test/CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir
index f5c507be361d..f5b7f110ea20 100644
--- a/llvm/test/CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/lo16-hi16-physreg-copy.mir
@@ -193,3 +193,14 @@ body:             |
     $vgpr2 = COPY killed $vgpr1
     S_ENDPGM 0
 ...
+
+# GCN-LABEL: {{^}}lo_to_lo_sgpr_to_sgpr:
+# GCN: s_mov_b32 s1, s0
+name: lo_to_lo_sgpr_to_sgpr
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    $sgpr0 = IMPLICIT_DEF
+    $sgpr1_lo16 = COPY $sgpr0_lo16
+    S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/lo16-lo16-physreg-copy-sgpr.mir b/llvm/test/CodeGen/AMDGPU/lo16-lo16-physreg-copy-sgpr.mir
new file mode 100644
index 000000000000..21fc79d499bf
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/lo16-lo16-physreg-copy-sgpr.mir
@@ -0,0 +1,26 @@
+# RUN: llc -march=amdgcn -mcpu=gfx900 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -start-before postrapseudos -asm-verbose=0 -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
+
+# Note: GFX8 did not allow SDWA SGPR sources. Therefor no HI16 subregs can be used there.
+
+# GCN-LABEL: {{^}}lo_to_lo_sgpr_to_vgpr:
+# GCN: v_mov_b32_sdwa v1, s0 dst_sel:WORD_0 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+name: lo_to_lo_sgpr_to_vgpr
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    $sgpr0 = IMPLICIT_DEF
+    $vgpr1_lo16 = COPY $sgpr0_lo16
+    S_ENDPGM 0
+...
+
+# GCN-LABEL: {{^}}lo_to_hi_sgpr_to_vgpr:
+# GCN: v_mov_b32_sdwa v1, s0 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0
+name: lo_to_hi_sgpr_to_vgpr
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    $sgpr0 = IMPLICIT_DEF
+    $vgpr1_hi16 = COPY killed $sgpr0_lo16
+    S_ENDPGM 0
+...

From 17b1869b72f30f2702cb1abd7222027082e49eb6 Mon Sep 17 00:00:00 2001
From: Francesco Petrogalli <francesco.petrogalli@arm.com>
Date: Fri, 17 Apr 2020 20:02:03 +0100
Subject: [PATCH 206/216] Revert "[llvm][CodeGen] Addressing modes for SVE
 stN."

This reverts commit fc4e954ed5c0825cdfe3a590ff1904ef38bc47db.

The commit reported the following failure:

http://lab.llvm.org:8011/builders/clang-armv7-linux-build-cache/builds/29420

FAILED: lib/Target/AArch64/CMakeFiles/LLVMAArch64CodeGen.dir/AArch64ISelDAGToDAG.cpp.o
/usr/bin/c++   -DGTEST_HAS_RTTI=0 -D_DEBUG -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE -D_LARGEFILE_SOURCE -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -Ilib/Target/AArch64 -I/home/buildslave/buildslave/clang-armv7-linux-build-cache/llvm/llvm/lib/Target/AArch64 -I/usr/include/libxml2 -Iinclude -I/home/buildslave/buildslave/clang-armv7-linux-build-cache/llvm/llvm/include -mthumb -fPIC -fvisibility-inlines-hidden -Werror=date-time -Werror=unguarded-availability-new -Wall -Wextra -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wmissing-field-initializers -pedantic -Wno-long-long -Wimplicit-fallthrough -Wcovered-switch-default -Wno-noexcept-type -Wnon-virtual-dtor -Wdelete-non-virtual-dtor -Wstring-conversion -fdiagnostics-color -ffunction-sections -fdata-sections -O3  -fvisibility=hidden    -fno-exceptions -fno-rtti -UNDEBUG -std=c++14 -MMD -MT lib/Target/AArch64/CMakeFiles/LLVMAArch64CodeGen.dir/AArch64ISelDAGToDAG.cpp.o -MF lib/Target/AArch64/CMakeFiles/LLVMAArch64CodeGen.dir/AArch64ISelDAGToDAG.cpp.o.d -o lib/Target/AArch64/CMakeFiles/LLVMAArch64CodeGen.dir/AArch64ISelDAGToDAG.cpp.o -c /home/buildslave/buildslave/clang-armv7-linux-build-cache/llvm/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
/home/buildslave/buildslave/clang-armv7-linux-build-cache/llvm/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp:1439:10: error: chosen constructor is explicit in copy-initialization
  return {IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset};
           ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
	   /usr/bin/../lib/gcc/arm-linux-gnueabihf/5.4.0/../../../../include/c++/5.4.0/tuple:479:19: note: explicit constructor declared here
	           constexpr tuple(_UElements&&... __elements)
---
 .../Target/AArch64/AArch64ISelDAGToDAG.cpp    |  91 +--
 .../Target/AArch64/AArch64ISelLowering.cpp    |  30 -
 .../sve-intrinsics-stN-reg-imm-addr-mode.ll   | 614 ------------------
 .../sve-intrinsics-stN-reg-reg-addr-mode.ll   | 367 -----------
 4 files changed, 18 insertions(+), 1084 deletions(-)
 delete mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
 delete mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll

diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 6636b38fd887..5bc22abde3fa 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -261,14 +261,7 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
   void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
   void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
   void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
-  template <unsigned Scale>
-  void SelectPredicatedStore(SDNode *N, unsigned NumVecs, const unsigned Opc_rr,
-                             const unsigned Opc_ri);
-  template <unsigned Scale>
-  std::tuple<unsigned, SDValue, SDValue>
-  findAddrModeSVELoadStore(SDNode *N, const unsigned Opc_rr,
-                           const unsigned Opc_ri, const SDValue &OldBase,
-                           const SDValue &OldOffset);
+  void SelectPredicatedStore(SDNode *N, unsigned NumVecs, const unsigned Opc);
 
   bool tryBitfieldExtractOp(SDNode *N);
   bool tryBitfieldExtractOpFromSExt(SDNode *N);
@@ -1415,30 +1408,6 @@ void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
   CurDAG->RemoveDeadNode(N);
 }
 
-/// Optimize \param OldBase and \param OldOffset selecting the best addressing
-/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
-/// new Base and an SDValue representing the new offset.
-template <unsigned Scale>
-std::tuple<unsigned, SDValue, SDValue>
-AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, const unsigned Opc_rr,
-                                              const unsigned Opc_ri,
-                                              const SDValue &OldBase,
-                                              const SDValue &OldOffset) {
-  SDValue NewBase = OldBase;
-  SDValue NewOffset = OldOffset;
-  // Detect a possible Reg+Imm addressing mode.
-  const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
-      N, OldBase, NewBase, NewOffset);
-
-  // Detect a possible reg+reg addressing mode, but only if we haven't already
-  // detected a Reg+Imm one.
-  const bool IsRegReg =
-      !IsRegImm && SelectSVERegRegAddrMode<Scale>(OldBase, NewBase, NewOffset);
-
-  // Select the instruction.
-  return {IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset};
-}
-
 void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
                                       unsigned Opc) {
   SDLoc dl(N);
@@ -1459,27 +1428,18 @@ void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
   ReplaceNode(N, St);
 }
 
-template <unsigned Scale>
 void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
-                                                const unsigned Opc_rr,
-                                                const unsigned Opc_ri) {
+                                                const unsigned Opc) {
   SDLoc dl(N);
 
   // Form a REG_SEQUENCE to force register allocation.
   SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
   SDValue RegSeq = createZTuple(Regs);
 
-  // Optimize addressing mode.
-  unsigned Opc;
-  SDValue Offset, Base;
-  std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore<Scale>(
-      N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
-      CurDAG->getTargetConstant(0, dl, MVT::i64));
-
-  SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
-                   Base,                               // address
-                   Offset,                             // offset
-                   N->getOperand(0)};                  // chain
+  SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2),         // predicate
+                   N->getOperand(NumVecs + 3),                 // address
+                   CurDAG->getTargetConstant(0, dl, MVT::i64), // offset
+                   N->getOperand(0)};                          // chain
   SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
 
   ReplaceNode(N, St);
@@ -3950,60 +3910,48 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
     }
     case Intrinsic::aarch64_sve_st2: {
       if (VT == MVT::nxv16i8) {
-        SelectPredicatedStore</*Scale=*/0>(Node, 2, AArch64::ST2B,
-                                           AArch64::ST2B_IMM);
+        SelectPredicatedStore(Node, 2, AArch64::ST2B_IMM);
         return;
       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16) {
-        SelectPredicatedStore</*Scale=*/1>(Node, 2, AArch64::ST2H,
-                                           AArch64::ST2H_IMM);
+        SelectPredicatedStore(Node, 2, AArch64::ST2H_IMM);
         return;
       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
-        SelectPredicatedStore</*Scale=*/2>(Node, 2, AArch64::ST2W,
-                                           AArch64::ST2W_IMM);
+        SelectPredicatedStore(Node, 2, AArch64::ST2W_IMM);
         return;
       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
-        SelectPredicatedStore</*Scale=*/3>(Node, 2, AArch64::ST2D,
-                                           AArch64::ST2D_IMM);
+        SelectPredicatedStore(Node, 2, AArch64::ST2D_IMM);
         return;
       }
       break;
     }
     case Intrinsic::aarch64_sve_st3: {
       if (VT == MVT::nxv16i8) {
-        SelectPredicatedStore</*Scale=*/0>(Node, 3, AArch64::ST3B,
-                                           AArch64::ST3B_IMM);
+        SelectPredicatedStore(Node, 3, AArch64::ST3B_IMM);
         return;
       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16) {
-        SelectPredicatedStore</*Scale=*/1>(Node, 3, AArch64::ST3H,
-                                           AArch64::ST3H_IMM);
+        SelectPredicatedStore(Node, 3, AArch64::ST3H_IMM);
         return;
       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
-        SelectPredicatedStore</*Scale=*/2>(Node, 3, AArch64::ST3W,
-                                           AArch64::ST3W_IMM);
+        SelectPredicatedStore(Node, 3, AArch64::ST3W_IMM);
         return;
       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
-        SelectPredicatedStore</*Scale=*/3>(Node, 3, AArch64::ST3D,
-                                           AArch64::ST3D_IMM);
+        SelectPredicatedStore(Node, 3, AArch64::ST3D_IMM);
         return;
       }
       break;
     }
     case Intrinsic::aarch64_sve_st4: {
       if (VT == MVT::nxv16i8) {
-        SelectPredicatedStore</*Scale=*/0>(Node, 4, AArch64::ST4B,
-                                           AArch64::ST4B_IMM);
+        SelectPredicatedStore(Node, 4, AArch64::ST4B_IMM);
         return;
       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16) {
-        SelectPredicatedStore</*Scale=*/1>(Node, 4, AArch64::ST4H,
-                                           AArch64::ST4H_IMM);
+        SelectPredicatedStore(Node, 4, AArch64::ST4H_IMM);
         return;
       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
-        SelectPredicatedStore</*Scale=*/2>(Node, 4, AArch64::ST4W,
-                                           AArch64::ST4W_IMM);
+        SelectPredicatedStore(Node, 4, AArch64::ST4W_IMM);
         return;
       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
-        SelectPredicatedStore</*Scale=*/3>(Node, 4, AArch64::ST4D,
-                                           AArch64::ST4D_IMM);
+        SelectPredicatedStore(Node, 4, AArch64::ST4D_IMM);
         return;
       }
       break;
@@ -4639,9 +4587,6 @@ static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) {
   if (isa<MemSDNode>(Root))
     return cast<MemSDNode>(Root)->getMemoryVT();
 
-  if (isa<MemIntrinsicSDNode>(Root))
-    return cast<MemIntrinsicSDNode>(Root)->getMemoryVT();
-
   const unsigned Opcode = Root->getOpcode();
   // For custom ISD nodes, we have to look at them individually to extract the
   // type of the data moved to/from memory.
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 8ceb970efcf4..375e2681d100 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -8913,30 +8913,6 @@ SDValue AArch64TargetLowering::LowerVSCALE(SDValue Op,
                             DL, VT);
 }
 
-/// Set the IntrinsicInfo for the `aarch64_sve_st<N>` intrinsics.
-template <unsigned NumVecs>
-static bool setInfoSVEStN(AArch64TargetLowering::IntrinsicInfo &Info,
-                          const CallInst &CI) {
-  Info.opc = ISD::INTRINSIC_VOID;
-  // Retrieve EC from first vector argument.
-  const EVT VT = EVT::getEVT(CI.getArgOperand(0)->getType());
-  ElementCount EC = VT.getVectorElementCount();
-#ifndef NDEBUG
-  // Check the assumption that all input vectors are the same type.
-  for (unsigned I = 0; I < NumVecs; ++I)
-    assert(VT == EVT::getEVT(CI.getArgOperand(I)->getType()) &&
-           "Invalid type.");
-#endif
-  // memVT is `NumVecs * VT`.
-  Info.memVT = EVT::getVectorVT(CI.getType()->getContext(), VT.getScalarType(),
-                                EC * NumVecs);
-  Info.ptrVal = CI.getArgOperand(CI.getNumArgOperands() - 1);
-  Info.offset = 0;
-  Info.align.reset();
-  Info.flags = MachineMemOperand::MOStore;
-  return true;
-}
-
 /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
 /// MemIntrinsicNodes.  The associated MachineMemOperands record the alignment
 /// specified in the intrinsic calls.
@@ -8946,12 +8922,6 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
                                                unsigned Intrinsic) const {
   auto &DL = I.getModule()->getDataLayout();
   switch (Intrinsic) {
-  case Intrinsic::aarch64_sve_st2:
-    return setInfoSVEStN<2>(Info, I);
-  case Intrinsic::aarch64_sve_st3:
-    return setInfoSVEStN<3>(Info, I);
-  case Intrinsic::aarch64_sve_st4:
-    return setInfoSVEStN<4>(Info, I);
   case Intrinsic::aarch64_neon_ld2:
   case Intrinsic::aarch64_neon_ld3:
   case Intrinsic::aarch64_neon_ld4:
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
deleted file mode 100644
index 8ef27dc7ed70..000000000000
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
+++ /dev/null
@@ -1,614 +0,0 @@
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=0 < %s | FileCheck %s
-
-; NOTE: invalid, upper and lower bound immediate values of the reg+imm
-; addressing mode are checked only for the byte version of each
-; instruction (`st<N>b`), as the code for detecting the immediate is
-; common to all instructions, and varies only for the number of
-; elements of the structured store, which is <N> = 2, 3, 4.
-
-;
-; ST2B
-;
-
-define void @st2b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
-; CHECK-LABEL: st2b_i8_valid_imm:
-; CHECK: st2b { z0.b, z1.b }, p0, [x0, #2, mul vl]
-; CHECK-NEXT: ret
-  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 2
-  call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
-                                          <vscale x 16 x i8> %v1,
-                                          <vscale x 16 x i1> %pred,
-                                          <vscale x 16 x i8>* %base)
-  ret void
-}
-
-define void @st2b_i8_invalid_imm_not_multiple_of_2(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
-; CHECK-LABEL: st2b_i8_invalid_imm_not_multiple_of_2:
-; CHECK: rdvl x[[N:[0-9]+]], #3
-; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, x[[N]]]
-; CHECK-NEXT: ret
-  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 3
-  call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
-                                          <vscale x 16 x i8> %v1,
-                                          <vscale x 16 x i1> %pred,
-                                          <vscale x 16 x i8>* %base)
-  ret void
-}
-
-define void @st2b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
-; CHECK-LABEL: st2b_i8_invalid_imm_out_of_lower_bound:
-; CHECK: rdvl x[[N:[0-9]+]], #-18
-; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, x[[N]]]
-; CHECK-NEXT: ret
-  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -18
-  call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
-                                          <vscale x 16 x i8> %v1,
-                                          <vscale x 16 x i1> %pred,
-                                          <vscale x 16 x i8>* %base)
-  ret void
-}
-
-define void @st2b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
-; CHECK-LABEL: st2b_i8_invalid_imm_out_of_upper_bound:
-; CHECK: rdvl x[[N:[0-9]+]], #16
-; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, x[[N]]]
-; CHECK-NEXT: ret
-  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 16
-  call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
-                                          <vscale x 16 x i8> %v1,
-                                          <vscale x 16 x i1> %pred,
-                                          <vscale x 16 x i8>* %base)
-  ret void
-}
-
-define void @st2b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
-; CHECK-LABEL: st2b_i8_valid_imm_lower_bound:
-; CHECK: st2b { z0.b, z1.b }, p0, [x0, #-16, mul vl]
-; CHECK-NEXT: ret
-  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -16
-  call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
-                                          <vscale x 16 x i8> %v1,
-                                          <vscale x 16 x i1> %pred,
-                                          <vscale x 16 x i8>* %base)
-  ret void
-}
-
-define void @st2b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
-; CHECK-LABEL: st2b_i8_valid_imm_upper_bound:
-; CHECK: st2b { z0.b, z1.b }, p0, [x0, #14, mul vl]
-; CHECK-NEXT: ret
-  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 14
-  call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
-                                          <vscale x 16 x i8> %v1,
-                                          <vscale x 16 x i1> %pred,
-                                          <vscale x 16 x i8>* %base)
-  ret void
-}
-
-;
-; ST2H
-;
-
-define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, <vscale x 8 x i16>* %addr) {
-; CHECK-LABEL: st2h_i16:
-; CHECK: st2h { z0.h, z1.h }, p0, [x0, #2, mul vl]
-; CHECK-NEXT: ret
-  %base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %addr, i64 2
-  call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> %v0,
-                                          <vscale x 8 x i16> %v1,
-                                          <vscale x 8 x i1> %pred,
-                                          <vscale x 8 x i16>* %base)
-  ret void
-}
-
-define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, <vscale x 8 x half>* %addr) {
-; CHECK-LABEL: st2h_f16:
-; CHECK: st2h { z0.h, z1.h }, p0, [x0, #2, mul vl]
-; CHECK-NEXT: ret
-  %base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %addr, i64 2
-  call void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half> %v0,
-                                          <vscale x 8 x half> %v1,
-                                          <vscale x 8 x i1> %pred,
-                                          <vscale x 8 x half>* %base)
-  ret void
-}
-
-;
-; ST2W
-;
-
-define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, <vscale x 4 x i32>* %addr) {
-; CHECK-LABEL: st2w_i32:
-; CHECK: st2w { z0.s, z1.s }, p0, [x0, #4, mul vl]
-; CHECK-NEXT: ret
-  %base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %addr, i64 4
-  call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> %v0,
-                                          <vscale x 4 x i32> %v1,
-                                          <vscale x 4 x i1> %pred,
-                                          <vscale x 4 x i32>* %base)
-  ret void
-}
-
-define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, <vscale x 4 x float>* %addr) {
-; CHECK-LABEL: st2w_f32:
-; CHECK: st2w { z0.s, z1.s }, p0, [x0, #6, mul vl]
-; CHECK-NEXT: ret
-  %base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %addr, i64 6
-  call void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float> %v0,
-                                          <vscale x 4 x float> %v1,
-                                          <vscale x 4 x i1> %pred,
-                                          <vscale x 4 x float>* %base)
-  ret void
-}
-
-;
-; ST2D
-;
-
-define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, <vscale x 2 x i64>* %addr) {
-; CHECK-LABEL: st2d_i64:
-; CHECK: st2d { z0.d, z1.d }, p0, [x0, #8, mul vl]
-; CHECK-NEXT: ret
-  %base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %addr, i64 8
-  call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> %v0,
-                                          <vscale x 2 x i64> %v1,
-                                          <vscale x 2 x i1> %pred,
-                                          <vscale x 2 x i64>* %base)
-  ret void
-}
-
-define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, <vscale x 2 x double>* %addr) {
-; CHECK-LABEL: st2d_f64:
-; CHECK: st2d { z0.d, z1.d }, p0, [x0, #10, mul vl]
-; CHECK-NEXT: ret
-  %base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %addr, i64 10
-  call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> %v0,
-                                          <vscale x 2 x double> %v1,
-                                          <vscale x 2 x i1> %pred,
-                                          <vscale x 2 x double>* %base)
-  ret void
-}
-
-;
-; ST3B
-;
-
-define void @st3b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
-; CHECK-LABEL: st3b_i8_valid_imm:
-; CHECK: st3b { z0.b, z1.b, z2.b }, p0, [x0, #3, mul vl]
-; CHECK-NEXT: ret
-  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 3
-  call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
-                                          <vscale x 16 x i8> %v1,
-                                          <vscale x 16 x i8> %v2,
-                                          <vscale x 16 x i1> %pred,
-                                          <vscale x 16 x i8>* %base)
-  ret void
-}
-
-define void @st3b_i8_invalid_imm_not_multiple_of_3_01(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
-; CHECK-LABEL: st3b_i8_invalid_imm_not_multiple_of_3_01:
-; CHECK: rdvl x[[N:[0-9]+]], #4
-; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0, x[[N]]]
-; CHECK-NEXT: ret
-  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 4
-  call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
-                                          <vscale x 16 x i8> %v1,
-                                          <vscale x 16 x i8> %v2,
-                                          <vscale x 16 x i1> %pred,
-                                          <vscale x 16 x i8>* %base)
-  ret void
-}
-
-define void @st3b_i8_invalid_imm_not_multiple_of_3_02(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
-; CHECK-LABEL: st3b_i8_invalid_imm_not_multiple_of_3_02:
-; CHECK: rdvl x[[N:[0-9]+]], #5
-; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0, x[[N]]]
-; CHECK-NEXT: ret
-  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 5
-  call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
-                                          <vscale x 16 x i8> %v1,
-                                          <vscale x 16 x i8> %v2,
-                                          <vscale x 16 x i1> %pred,
-                                          <vscale x 16 x i8>* %base)
-  ret void
-}
-
-define void @st3b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
-; CHECK-LABEL: st3b_i8_invalid_imm_out_of_lower_bound:
-; CHECK: rdvl x[[N:[0-9]+]], #-27
-; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0, x[[N]]]
-; CHECK-NEXT: ret
-  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -27
-  call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
-                                          <vscale x 16 x i8> %v1,
-                                          <vscale x 16 x i8> %v2,
-                                          <vscale x 16 x i1> %pred,
-                                          <vscale x 16 x i8>* %base)
-  ret void
-}
-
-define void @st3b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
-; CHECK-LABEL: st3b_i8_invalid_imm_out_of_upper_bound:
-; CHECK: rdvl x[[N:[0-9]+]], #24
-; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0, x[[N]]]
-; CHECK-NEXT: ret
-  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 24
-  call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
-                                          <vscale x 16 x i8> %v1,
-                                          <vscale x 16 x i8> %v2,
-                                          <vscale x 16 x i1> %pred,
-                                          <vscale x 16 x i8>* %base)
-  ret void
-}
-
-define void @st3b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
-; CHECK-LABEL: st3b_i8_valid_imm_lower_bound:
-; CHECK: st3b { z0.b, z1.b, z2.b }, p0, [x0, #-24, mul vl]
-; CHECK-NEXT: ret
-  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -24
-  call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
-                                          <vscale x 16 x i8> %v1,
-                                          <vscale x 16 x i8> %v2,
-                                          <vscale x 16 x i1> %pred,
-                                          <vscale x 16 x i8>* %base)
-  ret void
-}
-
-define void @st3b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
-; CHECK-LABEL: st3b_i8_valid_imm_upper_bound:
-; CHECK: st3b { z0.b, z1.b, z2.b }, p0, [x0, #21, mul vl]
-; CHECK-NEXT: ret
-  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 21
-  call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
-                                          <vscale x 16 x i8> %v1,
-                                          <vscale x 16 x i8> %v2,
-                                          <vscale x 16 x i1> %pred,
-                                          <vscale x 16 x i8>* %base)
-  ret void
-}
-
-;
-; ST3H
-;
-
-define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i1> %pred, <vscale x 8 x i16>* %addr) {
-; CHECK-LABEL: st3h_i16:
-; CHECK: st3h { z0.h, z1.h, z2.h }, p0, [x0, #6, mul vl]
-; CHECK-NEXT: ret
-  %base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %addr, i64 6
-  call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> %v0,
-                                          <vscale x 8 x i16> %v1,
-                                          <vscale x 8 x i16> %v2,
-                                          <vscale x 8 x i1> %pred,
-                                          <vscale x 8 x i16>* %base)
-  ret void
-}
-
-define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, <vscale x 8 x half>* %addr) {
-; CHECK-LABEL: st3h_f16:
-; CHECK: st3h { z0.h, z1.h, z2.h }, p0, [x0, #9, mul vl]
-; CHECK-NEXT: ret
-  %base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %addr, i64 9
-  call void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half> %v0,
-                                          <vscale x 8 x half> %v1,
-                                          <vscale x 8 x half> %v2,
-                                          <vscale x 8 x i1> %pred,
-                                          <vscale x 8 x half>* %base)
-  ret void
-}
-
-;
-; ST3W
-;
-
-define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, <vscale x 4 x i32>* %addr) {
-; CHECK-LABEL: st3w_i32:
-; CHECK: st3w { z0.s, z1.s, z2.s }, p0, [x0, #12, mul vl]
-; CHECK-NEXT: ret
-  %base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %addr, i64 12
-  call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> %v0,
-                                          <vscale x 4 x i32> %v1,
-                                          <vscale x 4 x i32> %v2,
-                                          <vscale x 4 x i1> %pred,
-                                          <vscale x 4 x i32>* %base)
-  ret void
-}
-
-define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, <vscale x 4 x float>* %addr) {
-; CHECK-LABEL: st3w_f32:
-; CHECK: st3w { z0.s, z1.s, z2.s }, p0, [x0, #15, mul vl]
-; CHECK-NEXT: ret
-  %base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %addr, i64 15
-  call void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float> %v0,
-                                          <vscale x 4 x float> %v1,
-                                          <vscale x 4 x float> %v2,
-                                          <vscale x 4 x i1> %pred,
-                                          <vscale x 4 x float>* %base)
-  ret void
-}
-
-;
-; ST3D
-;
-
-define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, <vscale x 2 x i64>* %addr) {
-; CHECK-LABEL: st3d_i64:
-; CHECK: st3d { z0.d, z1.d, z2.d }, p0, [x0, #18, mul vl]
-; CHECK-NEXT: ret
-  %base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %addr, i64 18
-  call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> %v0,
-                                          <vscale x 2 x i64> %v1,
-                                          <vscale x 2 x i64> %v2,
-                                          <vscale x 2 x i1> %pred,
-                                          <vscale x 2 x i64>* %base)
-  ret void
-}
-
-define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, <vscale x 2 x double>* %addr) {
-; CHECK-LABEL: st3d_f64:
-; CHECK: st3d { z0.d, z1.d, z2.d }, p0, [x0, #-3, mul vl]
-; CHECK-NEXT: ret
-  %base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %addr, i64 -3
-  call void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double> %v0,
-                                          <vscale x 2 x double> %v1,
-                                          <vscale x 2 x double> %v2,
-                                          <vscale x 2 x i1> %pred,
-                                          <vscale x 2 x double>* %base)
-  ret void
-}
-
-;
-; ST4B
-;
-
-define void @st4b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
-; CHECK-LABEL: st4b_i8_valid_imm:
-; CHECK: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, #4, mul vl]
-; CHECK-NEXT: ret
-  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 4
-  call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
-                                          <vscale x 16 x i8> %v1,
-                                          <vscale x 16 x i8> %v2,
-                                          <vscale x 16 x i8> %v3,
-                                          <vscale x 16 x i1> %pred,
-                                          <vscale x 16 x i8>* %base)
-  ret void
-}
-
-define void @st4b_i8_invalid_imm_not_multiple_of_4_01(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
-; CHECK-LABEL: st4b_i8_invalid_imm_not_multiple_of_4_01:
-; CHECK: rdvl x[[N:[0-9]+]], #5
-; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x[[N]]]
-; CHECK-NEXT: ret
-  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 5
-  call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
-                                          <vscale x 16 x i8> %v1,
-                                          <vscale x 16 x i8> %v2,
-                                          <vscale x 16 x i8> %v3,
-                                          <vscale x 16 x i1> %pred,
-                                          <vscale x 16 x i8>* %base)
-  ret void
-}
-
-define void @st4b_i8_invalid_imm_not_multiple_of_4_02(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
-; CHECK-LABEL: st4b_i8_invalid_imm_not_multiple_of_4_02:
-; CHECK: rdvl x[[N:[0-9]+]], #6
-; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x[[N]]]
-; CHECK-NEXT: ret
-  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 6
-  call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
-                                          <vscale x 16 x i8> %v1,
-                                          <vscale x 16 x i8> %v2,
-                                          <vscale x 16 x i8> %v3,
-                                          <vscale x 16 x i1> %pred,
-                                          <vscale x 16 x i8>* %base)
-  ret void
-}
-
-define void @st4b_i8_invalid_imm_not_multiple_of_4_03(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
-; CHECK-LABEL: st4b_i8_invalid_imm_not_multiple_of_4_03:
-; CHECK: rdvl x[[N:[0-9]+]], #7
-; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x[[N]]]
-; CHECK-NEXT: ret
-  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 7
-  call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
-                                          <vscale x 16 x i8> %v1,
-                                          <vscale x 16 x i8> %v2,
-                                          <vscale x 16 x i8> %v3,
-                                          <vscale x 16 x i1> %pred,
-                                          <vscale x 16 x i8>* %base)
-  ret void
-}
-
-define void @st4b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
-; CHECK-LABEL: st4b_i8_invalid_imm_out_of_lower_bound:
-; FIXME: optimize OFFSET computation so that xOFFSET = (mul (RDVL #4) #9)
-; xM = -9 * 2^6
-; xP = RDVL * 2^-4
-; xBASE = RDVL * 2^-4 * -9 * 2^6 = RDVL * -36
-; CHECK: rdvl x[[N:[0-9]+]], #1
-; CHECK-DAG:  mov  x[[M:[0-9]+]], #-576
-; CHECK-DAG:  lsr  x[[P:[0-9]+]], x[[N]], #4
-; CHECK-DAG:  mul  x[[OFFSET:[0-9]+]], x[[P]], x[[M]]
-; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x[[OFFSET]]]
-; CHECK-NEXT: ret
-  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -36
-  call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
-                                          <vscale x 16 x i8> %v1,
-                                          <vscale x 16 x i8> %v2,
-                                          <vscale x 16 x i8> %v3,
-                                          <vscale x 16 x i1> %pred,
-                                          <vscale x 16 x i8>* %base)
-  ret void
-}
-
-define void @st4b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
-; CHECK-LABEL: st4b_i8_invalid_imm_out_of_upper_bound:
-; FIXME: optimize OFFSET computation so that xOFFSET = (shl (RDVL #16) #1)
-; xM = 2^9
-; xP = RDVL * 2^-4
-; xOFFSET = RDVL * 2^-4 * 2^9 = RDVL * 32
-; CHECK: rdvl x[[N:[0-9]+]], #1
-; CHECK-DAG:  mov  w[[M:[0-9]+]], #512
-; CHECK-DAG:  lsr  x[[P:[0-9]+]], x[[N]], #4
-; CHECK-DAG:  mul  x[[OFFSET:[0-9]+]], x[[P]], x[[M]]
-; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x[[OFFSET]]]
-; CHECK-NEXT: ret
-  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 32
-  call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
-                                          <vscale x 16 x i8> %v1,
-                                          <vscale x 16 x i8> %v2,
-                                          <vscale x 16 x i8> %v3,
-                                          <vscale x 16 x i1> %pred,
-                                          <vscale x 16 x i8>* %base)
-  ret void
-}
-
-define void @st4b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
-; CHECK-LABEL: st4b_i8_valid_imm_lower_bound:
-; CHECK: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, #-32, mul vl]
-; CHECK-NEXT: ret
-  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -32
-  call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
-                                          <vscale x 16 x i8> %v1,
-                                          <vscale x 16 x i8> %v2,
-                                          <vscale x 16 x i8> %v3,
-                                          <vscale x 16 x i1> %pred,
-                                          <vscale x 16 x i8>* %base)
-  ret void
-}
-
-define void @st4b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
-; CHECK-LABEL: st4b_i8_valid_imm_upper_bound:
-; CHECK: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, #28, mul vl]
-; CHECK-NEXT: ret
-  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 28
-  call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
-                                          <vscale x 16 x i8> %v1,
-                                          <vscale x 16 x i8> %v2,
-                                          <vscale x 16 x i8> %v3,
-                                          <vscale x 16 x i1> %pred,
-                                          <vscale x 16 x i8>* %base)
-  ret void
-}
-
-;
-; ST4H
-;
-
-define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3, <vscale x 8 x i1> %pred, <vscale x 8 x i16>* %addr) {
-; CHECK-LABEL: st4h_i16:
-; CHECK: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, #8, mul vl]
-; CHECK-NEXT: ret
-  %base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %addr, i64 8
-  call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> %v0,
-                                          <vscale x 8 x i16> %v1,
-                                          <vscale x 8 x i16> %v2,
-                                          <vscale x 8 x i16> %v3,
-                                          <vscale x 8 x i1> %pred,
-                                          <vscale x 8 x i16>* %base)
-  ret void
-}
-
-define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, <vscale x 8 x half>* %addr) {
-; CHECK-LABEL: st4h_f16:
-; CHECK: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, #12, mul vl]
-; CHECK-NEXT: ret
-  %base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %addr, i64 12
-  call void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half> %v0,
-                                          <vscale x 8 x half> %v1,
-                                          <vscale x 8 x half> %v2,
-                                          <vscale x 8 x half> %v3,
-                                          <vscale x 8 x i1> %pred,
-                                          <vscale x 8 x half>* %base)
-  ret void
-}
-
-;
-; ST4W
-;
-
-define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, <vscale x 4 x i32>* %addr) {
-; CHECK-LABEL: st4w_i32:
-; CHECK: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, #16, mul vl]
-; CHECK-NEXT: ret
-  %base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %addr, i64 16
-  call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> %v0,
-                                          <vscale x 4 x i32> %v1,
-                                          <vscale x 4 x i32> %v2,
-                                          <vscale x 4 x i32> %v3,
-                                          <vscale x 4 x i1> %pred,
-                                          <vscale x 4 x i32>* %base)
-  ret void
-}
-
-define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x float> %v3, <vscale x 4 x i1> %pred, <vscale x 4 x float>* %addr) {
-; CHECK-LABEL: st4w_f32:
-; CHECK: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, #20, mul vl]
-; CHECK-NEXT: ret
-  %base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %addr, i64 20
-  call void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float> %v0,
-                                          <vscale x 4 x float> %v1,
-                                          <vscale x 4 x float> %v2,
-                                          <vscale x 4 x float> %v3,
-                                          <vscale x 4 x i1> %pred,
-                                          <vscale x 4 x float>* %base)
-  ret void
-}
-
-;
-; ST4D
-;
-
-define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, <vscale x 2 x i64>* %addr) {
-; CHECK-LABEL: st4d_i64:
-; CHECK: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, #24, mul vl]
-; CHECK-NEXT: ret
-  %base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %addr, i64 24
-  call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> %v0,
-                                          <vscale x 2 x i64> %v1,
-                                          <vscale x 2 x i64> %v2,
-                                          <vscale x 2 x i64> %v3,
-                                          <vscale x 2 x i1> %pred,
-                                          <vscale x 2 x i64>* %base)
-  ret void
-}
-
-define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, <vscale x 2 x double>* %addr) {
-; CHECK-LABEL: st4d_f64:
-; CHECK: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, #28, mul vl]
-; CHECK-NEXT: ret
-  %base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %addr, i64 28
-  call void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double> %v0,
-                                          <vscale x 2 x double> %v1,
-                                          <vscale x 2 x double> %v2,
-                                          <vscale x 2 x double> %v3,
-                                          <vscale x 2 x i1> %pred,
-                                          <vscale x 2 x double>* %base)
-  ret void
-}
-
-declare void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>*)
-declare void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>*)
-declare void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>*)
-declare void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>*)
-declare void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>*)
-declare void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>*)
-declare void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x double>*)
-
-declare void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>*)
-declare void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>*)
-declare void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>*)
-declare void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>*)
-declare void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>*)
-declare void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>*)
-declare void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x double>*)
-
-declare void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>*)
-declare void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>*)
-declare void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>*)
-declare void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>*)
-declare void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>*)
-declare void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>*)
-declare void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x double>*)
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll
deleted file mode 100644
index 4945fdca9498..000000000000
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll
+++ /dev/null
@@ -1,367 +0,0 @@
-; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=0 < %s | FileCheck %s
-
-;
-; ST2B
-;
-
-define void @st2b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, i8* %addr, i64 %offset) {
-; CHECK-LABEL: st2b_i8:
-; CHECK: st2b { z0.b, z1.b }, p0, [x0, x1]
-; CHECK-NEXT: ret
-  %1 = getelementptr i8, i8* %addr, i64 %offset
-  %base = bitcast i8* %1 to <vscale x 16 x i8>*
-  call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
-                                          <vscale x 16 x i8> %v1,
-                                          <vscale x 16 x i1> %pred,
-                                          <vscale x 16 x i8>* %base)
-  ret void
-}
-
-;
-; ST2H
-;
-
-define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, i16* %addr, i64 %offset) {
-; CHECK-LABEL: st2h_i16:
-; CHECK: st2h { z0.h, z1.h }, p0, [x0, x1, lsl #1]
-; CHECK-NEXT: ret
-  %1 = getelementptr i16, i16* %addr, i64 %offset
-  %base = bitcast i16* %1 to <vscale x 8 x i16>*
-  call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> %v0,
-                                          <vscale x 8 x i16> %v1,
-                                          <vscale x 8 x i1> %pred,
-                                          <vscale x 8 x i16>* %base)
-  ret void
-}
-
-define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, half* %addr, i64 %offset) {
-; CHECK-LABEL: st2h_f16:
-; CHECK: st2h { z0.h, z1.h }, p0, [x0, x1, lsl #1]
-; CHECK-NEXT: ret
-  %1 = getelementptr half, half* %addr, i64 %offset
-  %base = bitcast half* %1 to <vscale x 8 x half>*
-  call void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half> %v0,
-                                          <vscale x 8 x half> %v1,
-                                          <vscale x 8 x i1> %pred,
-                                          <vscale x 8 x half>* %base)
-  ret void
-}
-
-;
-; ST2W
-;
-
-define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, i32* %addr, i64 %offset) {
-; CHECK-LABEL: st2w_i32:
-; CHECK: st2w { z0.s, z1.s }, p0, [x0, x1, lsl #2]
-; CHECK-NEXT: ret
-  %1 = getelementptr i32, i32* %addr, i64 %offset
-  %base = bitcast i32* %1 to <vscale x 4 x i32>*
-  call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> %v0,
-                                          <vscale x 4 x i32> %v1,
-                                          <vscale x 4 x i1> %pred,
-                                          <vscale x 4 x i32>* %base)
-  ret void
-}
-
-define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, float* %addr, i64 %offset) {
-; CHECK-LABEL: st2w_f32:
-; CHECK: st2w { z0.s, z1.s }, p0, [x0, x1, lsl #2]
-; CHECK-NEXT: ret
-  %1 = getelementptr float, float* %addr, i64 %offset
-  %base = bitcast float* %1 to <vscale x 4 x float>*
-  call void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float> %v0,
-                                          <vscale x 4 x float> %v1,
-                                          <vscale x 4 x i1> %pred,
-                                          <vscale x 4 x float>* %base)
-  ret void
-}
-
-;
-; ST2D
-;
-
-define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, i64* %addr, i64 %offset) {
-; CHECK-LABEL: st2d_i64:
-; CHECK: st2d { z0.d, z1.d }, p0, [x0, x1, lsl #3]
-; CHECK-NEXT: ret
-  %1 = getelementptr i64, i64* %addr, i64 %offset
-  %base = bitcast i64* %1 to <vscale x 2 x i64>*
-  call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> %v0,
-                                          <vscale x 2 x i64> %v1,
-                                          <vscale x 2 x i1> %pred,
-                                          <vscale x 2 x i64>* %base)
-  ret void
-}
-
-define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, double* %addr, i64 %offset) {
-; CHECK-LABEL: st2d_f64:
-; CHECK: st2d { z0.d, z1.d }, p0, [x0, x1, lsl #3]
-; CHECK-NEXT: ret
-  %1 = getelementptr double, double* %addr, i64 %offset
-  %base = bitcast double* %1 to <vscale x 2 x double>*
-  call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> %v0,
-                                          <vscale x 2 x double> %v1,
-                                          <vscale x 2 x i1> %pred,
-                                          <vscale x 2 x double>* %base)
-  ret void
-}
-
-;
-; ST3B
-;
-
-define void @st3b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, i8* %addr, i64 %offset) {
-; CHECK-LABEL: st3b_i8:
-; CHECK: st3b { z0.b, z1.b, z2.b }, p0, [x0, x1]
-; CHECK-NEXT: ret
-  %1 = getelementptr i8, i8* %addr, i64 %offset
-  %base = bitcast i8* %1 to <vscale x 16 x i8>*
-  call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
-                                          <vscale x 16 x i8> %v1,
-                                          <vscale x 16 x i8> %v2,
-                                          <vscale x 16 x i1> %pred,
-                                          <vscale x 16 x i8>* %base)
-  ret void
-}
-
-;
-; ST3H
-;
-
-define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i1> %pred, i16* %addr, i64 %offset) {
-; CHECK-LABEL: st3h_i16:
-; CHECK: st3h { z0.h, z1.h, z2.h }, p0, [x0, x1, lsl #1]
-; CHECK-NEXT: ret
-  %1 = getelementptr i16, i16* %addr, i64 %offset
-  %base = bitcast i16* %1 to <vscale x 8 x i16>*
-  call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> %v0,
-                                          <vscale x 8 x i16> %v1,
-                                          <vscale x 8 x i16> %v2,
-                                          <vscale x 8 x i1> %pred,
-                                          <vscale x 8 x i16>* %base)
-  ret void
-}
-
-define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, half* %addr, i64 %offset) {
-; CHECK-LABEL: st3h_f16:
-; CHECK: st3h { z0.h, z1.h, z2.h }, p0, [x0, x1, lsl #1]
-; CHECK-NEXT: ret
-  %1 = getelementptr half, half* %addr, i64 %offset
-  %base = bitcast half* %1 to <vscale x 8 x half>*
-  call void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half> %v0,
-                                          <vscale x 8 x half> %v1,
-                                          <vscale x 8 x half> %v2,
-                                          <vscale x 8 x i1> %pred,
-                                          <vscale x 8 x half>* %base)
-  ret void
-}
-
-;
-; ST3W
-;
-
-define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, i32* %addr, i64 %offset) {
-; CHECK-LABEL: st3w_i32:
-; CHECK: st3w { z0.s, z1.s, z2.s }, p0, [x0, x1, lsl #2]
-; CHECK-NEXT: ret
-  %1 = getelementptr i32, i32* %addr, i64 %offset
-  %base = bitcast i32* %1 to <vscale x 4 x i32>*
-  call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> %v0,
-                                          <vscale x 4 x i32> %v1,
-                                          <vscale x 4 x i32> %v2,
-                                          <vscale x 4 x i1> %pred,
-                                          <vscale x 4 x i32>* %base)
-  ret void
-}
-
-define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, float* %addr, i64 %offset) {
-; CHECK-LABEL: st3w_f32:
-; CHECK: st3w { z0.s, z1.s, z2.s }, p0, [x0, x1, lsl #2]
-; CHECK-NEXT: ret
-  %1 = getelementptr float, float* %addr, i64 %offset
-  %base = bitcast float* %1 to <vscale x 4 x float>*
-  call void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float> %v0,
-                                          <vscale x 4 x float> %v1,
-                                          <vscale x 4 x float> %v2,
-                                          <vscale x 4 x i1> %pred,
-                                          <vscale x 4 x float>* %base)
-  ret void
-}
-
-;
-; ST3D
-;
-
-define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, i64* %addr, i64 %offset) {
-; CHECK-LABEL: st3d_i64:
-; CHECK: st3d { z0.d, z1.d, z2.d }, p0, [x0, x1, lsl #3]
-; CHECK-NEXT: ret
-  %1 = getelementptr i64, i64* %addr, i64 %offset
-  %base = bitcast i64* %1 to <vscale x 2 x i64>*
-  call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> %v0,
-                                          <vscale x 2 x i64> %v1,
-                                          <vscale x 2 x i64> %v2,
-                                          <vscale x 2 x i1> %pred,
-                                          <vscale x 2 x i64>* %base)
-  ret void
-}
-
-define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, double* %addr, i64 %offset) {
-; CHECK-LABEL: st3d_f64:
-; CHECK: st3d { z0.d, z1.d, z2.d }, p0, [x0, x1, lsl #3]
-; CHECK-NEXT: ret
-  %1 = getelementptr double, double* %addr, i64 %offset
-  %base = bitcast double* %1 to <vscale x 2 x double>*
-  call void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double> %v0,
-                                          <vscale x 2 x double> %v1,
-                                          <vscale x 2 x double> %v2,
-                                          <vscale x 2 x i1> %pred,
-                                          <vscale x 2 x double>* %base)
-  ret void
-}
-
-;
-; ST4B
-;
-
-define void @st4b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, i8* %addr, i64 %offset) {
-; CHECK-LABEL: st4b_i8:
-; CHECK: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x1]
-; CHECK-NEXT: ret
-  %1 = getelementptr i8, i8* %addr, i64 %offset
-  %base = bitcast i8* %1 to <vscale x 16 x i8>*
-  call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
-                                          <vscale x 16 x i8> %v1,
-                                          <vscale x 16 x i8> %v2,
-                                          <vscale x 16 x i8> %v3,
-                                          <vscale x 16 x i1> %pred,
-                                          <vscale x 16 x i8>* %base)
-  ret void
-}
-
-;
-; ST4H
-;
-
-define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3, <vscale x 8 x i1> %pred, i16* %addr, i64 %offset) {
-; CHECK-LABEL: st4h_i16:
-; CHECK: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, x1, lsl #1]
-; CHECK-NEXT: ret
-  %1 = getelementptr i16, i16* %addr, i64 %offset
-  %base = bitcast i16* %1 to <vscale x 8 x i16>*
-  call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> %v0,
-                                          <vscale x 8 x i16> %v1,
-                                          <vscale x 8 x i16> %v2,
-                                          <vscale x 8 x i16> %v3,
-                                          <vscale x 8 x i1> %pred,
-                                          <vscale x 8 x i16>* %base)
-  ret void
-}
-
-define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, half* %addr, i64 %offset) {
-; CHECK-LABEL: st4h_f16:
-; CHECK: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, x1, lsl #1]
-; CHECK-NEXT: ret
-  %1 = getelementptr half, half* %addr, i64 %offset
-  %base = bitcast half* %1 to <vscale x 8 x half>*
-  call void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half> %v0,
-                                          <vscale x 8 x half> %v1,
-                                          <vscale x 8 x half> %v2,
-                                          <vscale x 8 x half> %v3,
-                                          <vscale x 8 x i1> %pred,
-                                          <vscale x 8 x half>* %base)
-  ret void
-}
-
-;
-; ST4W
-;
-
-define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, i32* %addr, i64 %offset) {
-; CHECK-LABEL: st4w_i32:
-; CHECK: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, x1, lsl #2]
-; CHECK-NEXT: ret
-  %1 = getelementptr i32, i32* %addr, i64 %offset
-  %base = bitcast i32* %1 to <vscale x 4 x i32>*
-  call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> %v0,
-                                          <vscale x 4 x i32> %v1,
-                                          <vscale x 4 x i32> %v2,
-                                          <vscale x 4 x i32> %v3,
-                                          <vscale x 4 x i1> %pred,
-                                          <vscale x 4 x i32>* %base)
-  ret void
-}
-
-define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x float> %v3, <vscale x 4 x i1> %pred, float* %addr, i64 %offset) {
-; CHECK-LABEL: st4w_f32:
-; CHECK: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, x1, lsl #2]
-; CHECK-NEXT: ret
-  %1 = getelementptr float, float* %addr, i64 %offset
-  %base = bitcast float* %1 to <vscale x 4 x float>*
-  call void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float> %v0,
-                                          <vscale x 4 x float> %v1,
-                                          <vscale x 4 x float> %v2,
-                                          <vscale x 4 x float> %v3,
-                                          <vscale x 4 x i1> %pred,
-                                          <vscale x 4 x float>* %base)
-  ret void
-}
-
-;
-; ST4D
-;
-
-define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, i64* %addr, i64 %offset) {
-; CHECK-LABEL: st4d_i64:
-; CHECK: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, x1, lsl #3]
-; CHECK-NEXT: ret
-  %1 = getelementptr i64, i64* %addr, i64 %offset
-  %base = bitcast i64* %1 to <vscale x 2 x i64>*
-  call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> %v0,
-                                          <vscale x 2 x i64> %v1,
-                                          <vscale x 2 x i64> %v2,
-                                          <vscale x 2 x i64> %v3,
-                                          <vscale x 2 x i1> %pred,
-                                          <vscale x 2 x i64>* %base)
-  ret void
-}
-
-define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, double* %addr, i64 %offset) {
-; CHECK-LABEL: st4d_f64:
-; CHECK: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, x1, lsl #3]
-; CHECK-NEXT: ret
-  %1 = getelementptr double, double* %addr, i64 %offset
-  %base = bitcast double* %1 to <vscale x 2 x double>*
-  call void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double> %v0,
-                                          <vscale x 2 x double> %v1,
-                                          <vscale x 2 x double> %v2,
-                                          <vscale x 2 x double> %v3,
-                                          <vscale x 2 x i1> %pred,
-                                          <vscale x 2 x double>* %base)
-  ret void
-}
-
-declare void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>*)
-declare void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>*)
-declare void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>*)
-declare void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>*)
-declare void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>*)
-declare void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>*)
-declare void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x double>*)
-
-declare void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>*)
-declare void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>*)
-declare void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>*)
-declare void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>*)
-declare void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>*)
-declare void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>*)
-declare void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x double>*)
-
-declare void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>*)
-declare void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>*)
-declare void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>*)
-declare void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>*)
-declare void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>*)
-declare void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>*)
-declare void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x double>*)

From 897fdec586d9ad4c101738caa723bacdda15a769 Mon Sep 17 00:00:00 2001
From: Francesco Petrogalli <francesco.petrogalli@arm.com>
Date: Fri, 17 Apr 2020 20:18:02 +0100
Subject: [PATCH 207/216] [llvm][CodeGen] Addressing modes for SVE stN.

This reverts commit 17b1869b72f30f2702cb1abd7222027082e49eb6.

It is an attempt to fix the failure reported at

The patch differs from the original one reviwed at
https://reviews.llvm.org/D77435 only for the use of the std::make_tuple
in building the return value of `findAddrModeSVELoadStore`:

   -  return {IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset};
   +  return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase,

the original patch submitted at
https://github.com/llvm/llvm-project/commit/fc4e954ed5c0825cdfe3a590ff1904ef38bc47db
was failing the following build:

http://lab.llvm.org:8011/builders/clang-armv7-linux-build-cache/builds/29420/

with error:

/home/buildslave/buildslave/clang-armv7-linux-build-cache/llvm/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
/home/buildslave/buildslave/clang-armv7-linux-build-cache/llvm/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp:1439:10:
error: chosen constructor is explicit in copy-initialization
  return {IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset};
           ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   /usr/bin/../lib/gcc/arm-linux-gnueabihf/5.4.0/../../../../include/c++/5.4.0/tuple:479:19:
   note: explicit constructor declared here
           constexpr tuple(_UElements&&... __elements)
	                     ^
			     1 error generated.
---
 .../Target/AArch64/AArch64ISelDAGToDAG.cpp    |  91 ++-
 .../Target/AArch64/AArch64ISelLowering.cpp    |  30 +
 .../sve-intrinsics-stN-reg-imm-addr-mode.ll   | 614 ++++++++++++++++++
 .../sve-intrinsics-stN-reg-reg-addr-mode.ll   | 367 +++++++++++
 4 files changed, 1084 insertions(+), 18 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
 create mode 100644 llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll

diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
index 5bc22abde3fa..6e7896e9497e 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp
@@ -261,7 +261,14 @@ class AArch64DAGToDAGISel : public SelectionDAGISel {
   void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
   void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
   void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
-  void SelectPredicatedStore(SDNode *N, unsigned NumVecs, const unsigned Opc);
+  template <unsigned Scale>
+  void SelectPredicatedStore(SDNode *N, unsigned NumVecs, const unsigned Opc_rr,
+                             const unsigned Opc_ri);
+  template <unsigned Scale>
+  std::tuple<unsigned, SDValue, SDValue>
+  findAddrModeSVELoadStore(SDNode *N, const unsigned Opc_rr,
+                           const unsigned Opc_ri, const SDValue &OldBase,
+                           const SDValue &OldOffset);
 
   bool tryBitfieldExtractOp(SDNode *N);
   bool tryBitfieldExtractOpFromSExt(SDNode *N);
@@ -1408,6 +1415,30 @@ void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
   CurDAG->RemoveDeadNode(N);
 }
 
+/// Optimize \param OldBase and \param OldOffset selecting the best addressing
+/// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
+/// new Base and an SDValue representing the new offset.
+template <unsigned Scale>
+std::tuple<unsigned, SDValue, SDValue>
+AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, const unsigned Opc_rr,
+                                              const unsigned Opc_ri,
+                                              const SDValue &OldBase,
+                                              const SDValue &OldOffset) {
+  SDValue NewBase = OldBase;
+  SDValue NewOffset = OldOffset;
+  // Detect a possible Reg+Imm addressing mode.
+  const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
+      N, OldBase, NewBase, NewOffset);
+
+  // Detect a possible reg+reg addressing mode, but only if we haven't already
+  // detected a Reg+Imm one.
+  const bool IsRegReg =
+      !IsRegImm && SelectSVERegRegAddrMode<Scale>(OldBase, NewBase, NewOffset);
+
+  // Select the instruction.
+  return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
+}
+
 void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
                                       unsigned Opc) {
   SDLoc dl(N);
@@ -1428,18 +1459,27 @@ void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
   ReplaceNode(N, St);
 }
 
+template <unsigned Scale>
 void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
-                                                const unsigned Opc) {
+                                                const unsigned Opc_rr,
+                                                const unsigned Opc_ri) {
   SDLoc dl(N);
 
   // Form a REG_SEQUENCE to force register allocation.
   SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
   SDValue RegSeq = createZTuple(Regs);
 
-  SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2),         // predicate
-                   N->getOperand(NumVecs + 3),                 // address
-                   CurDAG->getTargetConstant(0, dl, MVT::i64), // offset
-                   N->getOperand(0)};                          // chain
+  // Optimize addressing mode.
+  unsigned Opc;
+  SDValue Offset, Base;
+  std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore<Scale>(
+      N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
+      CurDAG->getTargetConstant(0, dl, MVT::i64));
+
+  SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
+                   Base,                               // address
+                   Offset,                             // offset
+                   N->getOperand(0)};                  // chain
   SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
 
   ReplaceNode(N, St);
@@ -3910,48 +3950,60 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) {
     }
     case Intrinsic::aarch64_sve_st2: {
       if (VT == MVT::nxv16i8) {
-        SelectPredicatedStore(Node, 2, AArch64::ST2B_IMM);
+        SelectPredicatedStore</*Scale=*/0>(Node, 2, AArch64::ST2B,
+                                           AArch64::ST2B_IMM);
         return;
       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16) {
-        SelectPredicatedStore(Node, 2, AArch64::ST2H_IMM);
+        SelectPredicatedStore</*Scale=*/1>(Node, 2, AArch64::ST2H,
+                                           AArch64::ST2H_IMM);
         return;
       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
-        SelectPredicatedStore(Node, 2, AArch64::ST2W_IMM);
+        SelectPredicatedStore</*Scale=*/2>(Node, 2, AArch64::ST2W,
+                                           AArch64::ST2W_IMM);
         return;
       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
-        SelectPredicatedStore(Node, 2, AArch64::ST2D_IMM);
+        SelectPredicatedStore</*Scale=*/3>(Node, 2, AArch64::ST2D,
+                                           AArch64::ST2D_IMM);
         return;
       }
       break;
     }
     case Intrinsic::aarch64_sve_st3: {
       if (VT == MVT::nxv16i8) {
-        SelectPredicatedStore(Node, 3, AArch64::ST3B_IMM);
+        SelectPredicatedStore</*Scale=*/0>(Node, 3, AArch64::ST3B,
+                                           AArch64::ST3B_IMM);
         return;
       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16) {
-        SelectPredicatedStore(Node, 3, AArch64::ST3H_IMM);
+        SelectPredicatedStore</*Scale=*/1>(Node, 3, AArch64::ST3H,
+                                           AArch64::ST3H_IMM);
         return;
       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
-        SelectPredicatedStore(Node, 3, AArch64::ST3W_IMM);
+        SelectPredicatedStore</*Scale=*/2>(Node, 3, AArch64::ST3W,
+                                           AArch64::ST3W_IMM);
         return;
       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
-        SelectPredicatedStore(Node, 3, AArch64::ST3D_IMM);
+        SelectPredicatedStore</*Scale=*/3>(Node, 3, AArch64::ST3D,
+                                           AArch64::ST3D_IMM);
         return;
       }
       break;
     }
     case Intrinsic::aarch64_sve_st4: {
       if (VT == MVT::nxv16i8) {
-        SelectPredicatedStore(Node, 4, AArch64::ST4B_IMM);
+        SelectPredicatedStore</*Scale=*/0>(Node, 4, AArch64::ST4B,
+                                           AArch64::ST4B_IMM);
         return;
       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16) {
-        SelectPredicatedStore(Node, 4, AArch64::ST4H_IMM);
+        SelectPredicatedStore</*Scale=*/1>(Node, 4, AArch64::ST4H,
+                                           AArch64::ST4H_IMM);
         return;
       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
-        SelectPredicatedStore(Node, 4, AArch64::ST4W_IMM);
+        SelectPredicatedStore</*Scale=*/2>(Node, 4, AArch64::ST4W,
+                                           AArch64::ST4W_IMM);
         return;
       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
-        SelectPredicatedStore(Node, 4, AArch64::ST4D_IMM);
+        SelectPredicatedStore</*Scale=*/3>(Node, 4, AArch64::ST4D,
+                                           AArch64::ST4D_IMM);
         return;
       }
       break;
@@ -4587,6 +4639,9 @@ static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) {
   if (isa<MemSDNode>(Root))
     return cast<MemSDNode>(Root)->getMemoryVT();
 
+  if (isa<MemIntrinsicSDNode>(Root))
+    return cast<MemIntrinsicSDNode>(Root)->getMemoryVT();
+
   const unsigned Opcode = Root->getOpcode();
   // For custom ISD nodes, we have to look at them individually to extract the
   // type of the data moved to/from memory.
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 375e2681d100..8ceb970efcf4 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -8913,6 +8913,30 @@ SDValue AArch64TargetLowering::LowerVSCALE(SDValue Op,
                             DL, VT);
 }
 
+/// Set the IntrinsicInfo for the `aarch64_sve_st<N>` intrinsics.
+template <unsigned NumVecs>
+static bool setInfoSVEStN(AArch64TargetLowering::IntrinsicInfo &Info,
+                          const CallInst &CI) {
+  Info.opc = ISD::INTRINSIC_VOID;
+  // Retrieve EC from first vector argument.
+  const EVT VT = EVT::getEVT(CI.getArgOperand(0)->getType());
+  ElementCount EC = VT.getVectorElementCount();
+#ifndef NDEBUG
+  // Check the assumption that all input vectors are the same type.
+  for (unsigned I = 0; I < NumVecs; ++I)
+    assert(VT == EVT::getEVT(CI.getArgOperand(I)->getType()) &&
+           "Invalid type.");
+#endif
+  // memVT is `NumVecs * VT`.
+  Info.memVT = EVT::getVectorVT(CI.getType()->getContext(), VT.getScalarType(),
+                                EC * NumVecs);
+  Info.ptrVal = CI.getArgOperand(CI.getNumArgOperands() - 1);
+  Info.offset = 0;
+  Info.align.reset();
+  Info.flags = MachineMemOperand::MOStore;
+  return true;
+}
+
 /// getTgtMemIntrinsic - Represent NEON load and store intrinsics as
 /// MemIntrinsicNodes.  The associated MachineMemOperands record the alignment
 /// specified in the intrinsic calls.
@@ -8922,6 +8946,12 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
                                                unsigned Intrinsic) const {
   auto &DL = I.getModule()->getDataLayout();
   switch (Intrinsic) {
+  case Intrinsic::aarch64_sve_st2:
+    return setInfoSVEStN<2>(Info, I);
+  case Intrinsic::aarch64_sve_st3:
+    return setInfoSVEStN<3>(Info, I);
+  case Intrinsic::aarch64_sve_st4:
+    return setInfoSVEStN<4>(Info, I);
   case Intrinsic::aarch64_neon_ld2:
   case Intrinsic::aarch64_neon_ld3:
   case Intrinsic::aarch64_neon_ld4:
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
new file mode 100644
index 000000000000..8ef27dc7ed70
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
@@ -0,0 +1,614 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=0 < %s | FileCheck %s
+
+; NOTE: invalid, upper and lower bound immediate values of the reg+imm
+; addressing mode are checked only for the byte version of each
+; instruction (`st<N>b`), as the code for detecting the immediate is
+; common to all instructions, and varies only for the number of
+; elements of the structured store, which is <N> = 2, 3, 4.
+
+;
+; ST2B
+;
+
+define void @st2b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+; CHECK-LABEL: st2b_i8_valid_imm:
+; CHECK: st2b { z0.b, z1.b }, p0, [x0, #2, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 2
+  call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+define void @st2b_i8_invalid_imm_not_multiple_of_2(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+; CHECK-LABEL: st2b_i8_invalid_imm_not_multiple_of_2:
+; CHECK: rdvl x[[N:[0-9]+]], #3
+; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, x[[N]]]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 3
+  call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+define void @st2b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+; CHECK-LABEL: st2b_i8_invalid_imm_out_of_lower_bound:
+; CHECK: rdvl x[[N:[0-9]+]], #-18
+; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, x[[N]]]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -18
+  call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+define void @st2b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+; CHECK-LABEL: st2b_i8_invalid_imm_out_of_upper_bound:
+; CHECK: rdvl x[[N:[0-9]+]], #16
+; CHECK-NEXT: st2b { z0.b, z1.b }, p0, [x0, x[[N]]]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 16
+  call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+define void @st2b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+; CHECK-LABEL: st2b_i8_valid_imm_lower_bound:
+; CHECK: st2b { z0.b, z1.b }, p0, [x0, #-16, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -16
+  call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+define void @st2b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+; CHECK-LABEL: st2b_i8_valid_imm_upper_bound:
+; CHECK: st2b { z0.b, z1.b }, p0, [x0, #14, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 14
+  call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+;
+; ST2H
+;
+
+define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, <vscale x 8 x i16>* %addr) {
+; CHECK-LABEL: st2h_i16:
+; CHECK: st2h { z0.h, z1.h }, p0, [x0, #2, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %addr, i64 2
+  call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> %v0,
+                                          <vscale x 8 x i16> %v1,
+                                          <vscale x 8 x i1> %pred,
+                                          <vscale x 8 x i16>* %base)
+  ret void
+}
+
+define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, <vscale x 8 x half>* %addr) {
+; CHECK-LABEL: st2h_f16:
+; CHECK: st2h { z0.h, z1.h }, p0, [x0, #2, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %addr, i64 2
+  call void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half> %v0,
+                                          <vscale x 8 x half> %v1,
+                                          <vscale x 8 x i1> %pred,
+                                          <vscale x 8 x half>* %base)
+  ret void
+}
+
+;
+; ST2W
+;
+
+define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, <vscale x 4 x i32>* %addr) {
+; CHECK-LABEL: st2w_i32:
+; CHECK: st2w { z0.s, z1.s }, p0, [x0, #4, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %addr, i64 4
+  call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> %v0,
+                                          <vscale x 4 x i32> %v1,
+                                          <vscale x 4 x i1> %pred,
+                                          <vscale x 4 x i32>* %base)
+  ret void
+}
+
+define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, <vscale x 4 x float>* %addr) {
+; CHECK-LABEL: st2w_f32:
+; CHECK: st2w { z0.s, z1.s }, p0, [x0, #6, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %addr, i64 6
+  call void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float> %v0,
+                                          <vscale x 4 x float> %v1,
+                                          <vscale x 4 x i1> %pred,
+                                          <vscale x 4 x float>* %base)
+  ret void
+}
+
+;
+; ST2D
+;
+
+define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, <vscale x 2 x i64>* %addr) {
+; CHECK-LABEL: st2d_i64:
+; CHECK: st2d { z0.d, z1.d }, p0, [x0, #8, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %addr, i64 8
+  call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> %v0,
+                                          <vscale x 2 x i64> %v1,
+                                          <vscale x 2 x i1> %pred,
+                                          <vscale x 2 x i64>* %base)
+  ret void
+}
+
+define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, <vscale x 2 x double>* %addr) {
+; CHECK-LABEL: st2d_f64:
+; CHECK: st2d { z0.d, z1.d }, p0, [x0, #10, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %addr, i64 10
+  call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> %v0,
+                                          <vscale x 2 x double> %v1,
+                                          <vscale x 2 x i1> %pred,
+                                          <vscale x 2 x double>* %base)
+  ret void
+}
+
+;
+; ST3B
+;
+
+define void @st3b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+; CHECK-LABEL: st3b_i8_valid_imm:
+; CHECK: st3b { z0.b, z1.b, z2.b }, p0, [x0, #3, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 3
+  call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i8> %v2,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+define void @st3b_i8_invalid_imm_not_multiple_of_3_01(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+; CHECK-LABEL: st3b_i8_invalid_imm_not_multiple_of_3_01:
+; CHECK: rdvl x[[N:[0-9]+]], #4
+; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0, x[[N]]]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 4
+  call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i8> %v2,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+define void @st3b_i8_invalid_imm_not_multiple_of_3_02(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+; CHECK-LABEL: st3b_i8_invalid_imm_not_multiple_of_3_02:
+; CHECK: rdvl x[[N:[0-9]+]], #5
+; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0, x[[N]]]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 5
+  call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i8> %v2,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+define void @st3b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+; CHECK-LABEL: st3b_i8_invalid_imm_out_of_lower_bound:
+; CHECK: rdvl x[[N:[0-9]+]], #-27
+; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0, x[[N]]]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -27
+  call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i8> %v2,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+define void @st3b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+; CHECK-LABEL: st3b_i8_invalid_imm_out_of_upper_bound:
+; CHECK: rdvl x[[N:[0-9]+]], #24
+; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0, x[[N]]]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 24
+  call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i8> %v2,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+define void @st3b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+; CHECK-LABEL: st3b_i8_valid_imm_lower_bound:
+; CHECK: st3b { z0.b, z1.b, z2.b }, p0, [x0, #-24, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -24
+  call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i8> %v2,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+define void @st3b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+; CHECK-LABEL: st3b_i8_valid_imm_upper_bound:
+; CHECK: st3b { z0.b, z1.b, z2.b }, p0, [x0, #21, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 21
+  call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i8> %v2,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+;
+; ST3H
+;
+
+define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i1> %pred, <vscale x 8 x i16>* %addr) {
+; CHECK-LABEL: st3h_i16:
+; CHECK: st3h { z0.h, z1.h, z2.h }, p0, [x0, #6, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %addr, i64 6
+  call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> %v0,
+                                          <vscale x 8 x i16> %v1,
+                                          <vscale x 8 x i16> %v2,
+                                          <vscale x 8 x i1> %pred,
+                                          <vscale x 8 x i16>* %base)
+  ret void
+}
+
+define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, <vscale x 8 x half>* %addr) {
+; CHECK-LABEL: st3h_f16:
+; CHECK: st3h { z0.h, z1.h, z2.h }, p0, [x0, #9, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %addr, i64 9
+  call void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half> %v0,
+                                          <vscale x 8 x half> %v1,
+                                          <vscale x 8 x half> %v2,
+                                          <vscale x 8 x i1> %pred,
+                                          <vscale x 8 x half>* %base)
+  ret void
+}
+
+;
+; ST3W
+;
+
+define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, <vscale x 4 x i32>* %addr) {
+; CHECK-LABEL: st3w_i32:
+; CHECK: st3w { z0.s, z1.s, z2.s }, p0, [x0, #12, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %addr, i64 12
+  call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> %v0,
+                                          <vscale x 4 x i32> %v1,
+                                          <vscale x 4 x i32> %v2,
+                                          <vscale x 4 x i1> %pred,
+                                          <vscale x 4 x i32>* %base)
+  ret void
+}
+
+define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, <vscale x 4 x float>* %addr) {
+; CHECK-LABEL: st3w_f32:
+; CHECK: st3w { z0.s, z1.s, z2.s }, p0, [x0, #15, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %addr, i64 15
+  call void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float> %v0,
+                                          <vscale x 4 x float> %v1,
+                                          <vscale x 4 x float> %v2,
+                                          <vscale x 4 x i1> %pred,
+                                          <vscale x 4 x float>* %base)
+  ret void
+}
+
+;
+; ST3D
+;
+
+define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, <vscale x 2 x i64>* %addr) {
+; CHECK-LABEL: st3d_i64:
+; CHECK: st3d { z0.d, z1.d, z2.d }, p0, [x0, #18, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %addr, i64 18
+  call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> %v0,
+                                          <vscale x 2 x i64> %v1,
+                                          <vscale x 2 x i64> %v2,
+                                          <vscale x 2 x i1> %pred,
+                                          <vscale x 2 x i64>* %base)
+  ret void
+}
+
+define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, <vscale x 2 x double>* %addr) {
+; CHECK-LABEL: st3d_f64:
+; CHECK: st3d { z0.d, z1.d, z2.d }, p0, [x0, #-3, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %addr, i64 -3
+  call void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double> %v0,
+                                          <vscale x 2 x double> %v1,
+                                          <vscale x 2 x double> %v2,
+                                          <vscale x 2 x i1> %pred,
+                                          <vscale x 2 x double>* %base)
+  ret void
+}
+
+;
+; ST4B
+;
+
+define void @st4b_i8_valid_imm(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+; CHECK-LABEL: st4b_i8_valid_imm:
+; CHECK: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, #4, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 4
+  call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i8> %v2,
+                                          <vscale x 16 x i8> %v3,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+define void @st4b_i8_invalid_imm_not_multiple_of_4_01(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+; CHECK-LABEL: st4b_i8_invalid_imm_not_multiple_of_4_01:
+; CHECK: rdvl x[[N:[0-9]+]], #5
+; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x[[N]]]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 5
+  call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i8> %v2,
+                                          <vscale x 16 x i8> %v3,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+define void @st4b_i8_invalid_imm_not_multiple_of_4_02(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+; CHECK-LABEL: st4b_i8_invalid_imm_not_multiple_of_4_02:
+; CHECK: rdvl x[[N:[0-9]+]], #6
+; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x[[N]]]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 6
+  call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i8> %v2,
+                                          <vscale x 16 x i8> %v3,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+define void @st4b_i8_invalid_imm_not_multiple_of_4_03(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+; CHECK-LABEL: st4b_i8_invalid_imm_not_multiple_of_4_03:
+; CHECK: rdvl x[[N:[0-9]+]], #7
+; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x[[N]]]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 7
+  call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i8> %v2,
+                                          <vscale x 16 x i8> %v3,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+define void @st4b_i8_invalid_imm_out_of_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+; CHECK-LABEL: st4b_i8_invalid_imm_out_of_lower_bound:
+; FIXME: optimize OFFSET computation so that xOFFSET = (mul (RDVL #4) #9)
+; xM = -9 * 2^6
+; xP = RDVL * 2^-4
+; xBASE = RDVL * 2^-4 * -9 * 2^6 = RDVL * -36
+; CHECK: rdvl x[[N:[0-9]+]], #1
+; CHECK-DAG:  mov  x[[M:[0-9]+]], #-576
+; CHECK-DAG:  lsr  x[[P:[0-9]+]], x[[N]], #4
+; CHECK-DAG:  mul  x[[OFFSET:[0-9]+]], x[[P]], x[[M]]
+; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x[[OFFSET]]]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -36
+  call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i8> %v2,
+                                          <vscale x 16 x i8> %v3,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+define void @st4b_i8_invalid_imm_out_of_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+; CHECK-LABEL: st4b_i8_invalid_imm_out_of_upper_bound:
+; FIXME: optimize OFFSET computation so that xOFFSET = (shl (RDVL #16) #1)
+; xM = 2^9
+; xP = RDVL * 2^-4
+; xOFFSET = RDVL * 2^-4 * 2^9 = RDVL * 32
+; CHECK: rdvl x[[N:[0-9]+]], #1
+; CHECK-DAG:  mov  w[[M:[0-9]+]], #512
+; CHECK-DAG:  lsr  x[[P:[0-9]+]], x[[N]], #4
+; CHECK-DAG:  mul  x[[OFFSET:[0-9]+]], x[[P]], x[[M]]
+; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x[[OFFSET]]]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 32
+  call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i8> %v2,
+                                          <vscale x 16 x i8> %v3,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+define void @st4b_i8_valid_imm_lower_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+; CHECK-LABEL: st4b_i8_valid_imm_lower_bound:
+; CHECK: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, #-32, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 -32
+  call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i8> %v2,
+                                          <vscale x 16 x i8> %v3,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+define void @st4b_i8_valid_imm_upper_bound(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, <vscale x 16 x i8>* %addr) {
+; CHECK-LABEL: st4b_i8_valid_imm_upper_bound:
+; CHECK: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, #28, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 16 x i8>, <vscale x 16 x i8>* %addr, i64 28
+  call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i8> %v2,
+                                          <vscale x 16 x i8> %v3,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+;
+; ST4H
+;
+
+define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3, <vscale x 8 x i1> %pred, <vscale x 8 x i16>* %addr) {
+; CHECK-LABEL: st4h_i16:
+; CHECK: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, #8, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 8 x i16>, <vscale x 8 x i16>* %addr, i64 8
+  call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> %v0,
+                                          <vscale x 8 x i16> %v1,
+                                          <vscale x 8 x i16> %v2,
+                                          <vscale x 8 x i16> %v3,
+                                          <vscale x 8 x i1> %pred,
+                                          <vscale x 8 x i16>* %base)
+  ret void
+}
+
+define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, <vscale x 8 x half>* %addr) {
+; CHECK-LABEL: st4h_f16:
+; CHECK: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, #12, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 8 x half>, <vscale x 8 x half>* %addr, i64 12
+  call void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half> %v0,
+                                          <vscale x 8 x half> %v1,
+                                          <vscale x 8 x half> %v2,
+                                          <vscale x 8 x half> %v3,
+                                          <vscale x 8 x i1> %pred,
+                                          <vscale x 8 x half>* %base)
+  ret void
+}
+
+;
+; ST4W
+;
+
+define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, <vscale x 4 x i32>* %addr) {
+; CHECK-LABEL: st4w_i32:
+; CHECK: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, #16, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* %addr, i64 16
+  call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> %v0,
+                                          <vscale x 4 x i32> %v1,
+                                          <vscale x 4 x i32> %v2,
+                                          <vscale x 4 x i32> %v3,
+                                          <vscale x 4 x i1> %pred,
+                                          <vscale x 4 x i32>* %base)
+  ret void
+}
+
+define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x float> %v3, <vscale x 4 x i1> %pred, <vscale x 4 x float>* %addr) {
+; CHECK-LABEL: st4w_f32:
+; CHECK: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, #20, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 4 x float>, <vscale x 4 x float>* %addr, i64 20
+  call void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float> %v0,
+                                          <vscale x 4 x float> %v1,
+                                          <vscale x 4 x float> %v2,
+                                          <vscale x 4 x float> %v3,
+                                          <vscale x 4 x i1> %pred,
+                                          <vscale x 4 x float>* %base)
+  ret void
+}
+
+;
+; ST4D
+;
+
+define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, <vscale x 2 x i64>* %addr) {
+; CHECK-LABEL: st4d_i64:
+; CHECK: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, #24, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 2 x i64>, <vscale x 2 x i64>* %addr, i64 24
+  call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> %v0,
+                                          <vscale x 2 x i64> %v1,
+                                          <vscale x 2 x i64> %v2,
+                                          <vscale x 2 x i64> %v3,
+                                          <vscale x 2 x i1> %pred,
+                                          <vscale x 2 x i64>* %base)
+  ret void
+}
+
+define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, <vscale x 2 x double>* %addr) {
+; CHECK-LABEL: st4d_f64:
+; CHECK: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, #28, mul vl]
+; CHECK-NEXT: ret
+  %base = getelementptr <vscale x 2 x double>, <vscale x 2 x double>* %addr, i64 28
+  call void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double> %v0,
+                                          <vscale x 2 x double> %v1,
+                                          <vscale x 2 x double> %v2,
+                                          <vscale x 2 x double> %v3,
+                                          <vscale x 2 x i1> %pred,
+                                          <vscale x 2 x double>* %base)
+  ret void
+}
+
+declare void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>*)
+declare void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>*)
+declare void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>*)
+declare void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>*)
+declare void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>*)
+declare void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>*)
+declare void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x double>*)
+
+declare void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>*)
+declare void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>*)
+declare void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>*)
+declare void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>*)
+declare void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>*)
+declare void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>*)
+declare void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x double>*)
+
+declare void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>*)
+declare void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>*)
+declare void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>*)
+declare void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>*)
+declare void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>*)
+declare void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>*)
+declare void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x double>*)
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll
new file mode 100644
index 000000000000..4945fdca9498
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll
@@ -0,0 +1,367 @@
+; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=0 < %s | FileCheck %s
+
+;
+; ST2B
+;
+
+define void @st2b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i1> %pred, i8* %addr, i64 %offset) {
+; CHECK-LABEL: st2b_i8:
+; CHECK: st2b { z0.b, z1.b }, p0, [x0, x1]
+; CHECK-NEXT: ret
+  %1 = getelementptr i8, i8* %addr, i64 %offset
+  %base = bitcast i8* %1 to <vscale x 16 x i8>*
+  call void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+;
+; ST2H
+;
+
+define void @st2h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i1> %pred, i16* %addr, i64 %offset) {
+; CHECK-LABEL: st2h_i16:
+; CHECK: st2h { z0.h, z1.h }, p0, [x0, x1, lsl #1]
+; CHECK-NEXT: ret
+  %1 = getelementptr i16, i16* %addr, i64 %offset
+  %base = bitcast i16* %1 to <vscale x 8 x i16>*
+  call void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16> %v0,
+                                          <vscale x 8 x i16> %v1,
+                                          <vscale x 8 x i1> %pred,
+                                          <vscale x 8 x i16>* %base)
+  ret void
+}
+
+define void @st2h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x i1> %pred, half* %addr, i64 %offset) {
+; CHECK-LABEL: st2h_f16:
+; CHECK: st2h { z0.h, z1.h }, p0, [x0, x1, lsl #1]
+; CHECK-NEXT: ret
+  %1 = getelementptr half, half* %addr, i64 %offset
+  %base = bitcast half* %1 to <vscale x 8 x half>*
+  call void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half> %v0,
+                                          <vscale x 8 x half> %v1,
+                                          <vscale x 8 x i1> %pred,
+                                          <vscale x 8 x half>* %base)
+  ret void
+}
+
+;
+; ST2W
+;
+
+define void @st2w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i1> %pred, i32* %addr, i64 %offset) {
+; CHECK-LABEL: st2w_i32:
+; CHECK: st2w { z0.s, z1.s }, p0, [x0, x1, lsl #2]
+; CHECK-NEXT: ret
+  %1 = getelementptr i32, i32* %addr, i64 %offset
+  %base = bitcast i32* %1 to <vscale x 4 x i32>*
+  call void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32> %v0,
+                                          <vscale x 4 x i32> %v1,
+                                          <vscale x 4 x i1> %pred,
+                                          <vscale x 4 x i32>* %base)
+  ret void
+}
+
+define void @st2w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x i1> %pred, float* %addr, i64 %offset) {
+; CHECK-LABEL: st2w_f32:
+; CHECK: st2w { z0.s, z1.s }, p0, [x0, x1, lsl #2]
+; CHECK-NEXT: ret
+  %1 = getelementptr float, float* %addr, i64 %offset
+  %base = bitcast float* %1 to <vscale x 4 x float>*
+  call void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float> %v0,
+                                          <vscale x 4 x float> %v1,
+                                          <vscale x 4 x i1> %pred,
+                                          <vscale x 4 x float>* %base)
+  ret void
+}
+
+;
+; ST2D
+;
+
+define void @st2d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i1> %pred, i64* %addr, i64 %offset) {
+; CHECK-LABEL: st2d_i64:
+; CHECK: st2d { z0.d, z1.d }, p0, [x0, x1, lsl #3]
+; CHECK-NEXT: ret
+  %1 = getelementptr i64, i64* %addr, i64 %offset
+  %base = bitcast i64* %1 to <vscale x 2 x i64>*
+  call void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64> %v0,
+                                          <vscale x 2 x i64> %v1,
+                                          <vscale x 2 x i1> %pred,
+                                          <vscale x 2 x i64>* %base)
+  ret void
+}
+
+define void @st2d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x i1> %pred, double* %addr, i64 %offset) {
+; CHECK-LABEL: st2d_f64:
+; CHECK: st2d { z0.d, z1.d }, p0, [x0, x1, lsl #3]
+; CHECK-NEXT: ret
+  %1 = getelementptr double, double* %addr, i64 %offset
+  %base = bitcast double* %1 to <vscale x 2 x double>*
+  call void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double> %v0,
+                                          <vscale x 2 x double> %v1,
+                                          <vscale x 2 x i1> %pred,
+                                          <vscale x 2 x double>* %base)
+  ret void
+}
+
+;
+; ST3B
+;
+
+define void @st3b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i1> %pred, i8* %addr, i64 %offset) {
+; CHECK-LABEL: st3b_i8:
+; CHECK: st3b { z0.b, z1.b, z2.b }, p0, [x0, x1]
+; CHECK-NEXT: ret
+  %1 = getelementptr i8, i8* %addr, i64 %offset
+  %base = bitcast i8* %1 to <vscale x 16 x i8>*
+  call void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i8> %v2,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+;
+; ST3H
+;
+
+define void @st3h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i1> %pred, i16* %addr, i64 %offset) {
+; CHECK-LABEL: st3h_i16:
+; CHECK: st3h { z0.h, z1.h, z2.h }, p0, [x0, x1, lsl #1]
+; CHECK-NEXT: ret
+  %1 = getelementptr i16, i16* %addr, i64 %offset
+  %base = bitcast i16* %1 to <vscale x 8 x i16>*
+  call void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16> %v0,
+                                          <vscale x 8 x i16> %v1,
+                                          <vscale x 8 x i16> %v2,
+                                          <vscale x 8 x i1> %pred,
+                                          <vscale x 8 x i16>* %base)
+  ret void
+}
+
+define void @st3h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x i1> %pred, half* %addr, i64 %offset) {
+; CHECK-LABEL: st3h_f16:
+; CHECK: st3h { z0.h, z1.h, z2.h }, p0, [x0, x1, lsl #1]
+; CHECK-NEXT: ret
+  %1 = getelementptr half, half* %addr, i64 %offset
+  %base = bitcast half* %1 to <vscale x 8 x half>*
+  call void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half> %v0,
+                                          <vscale x 8 x half> %v1,
+                                          <vscale x 8 x half> %v2,
+                                          <vscale x 8 x i1> %pred,
+                                          <vscale x 8 x half>* %base)
+  ret void
+}
+
+;
+; ST3W
+;
+
+define void @st3w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i1> %pred, i32* %addr, i64 %offset) {
+; CHECK-LABEL: st3w_i32:
+; CHECK: st3w { z0.s, z1.s, z2.s }, p0, [x0, x1, lsl #2]
+; CHECK-NEXT: ret
+  %1 = getelementptr i32, i32* %addr, i64 %offset
+  %base = bitcast i32* %1 to <vscale x 4 x i32>*
+  call void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32> %v0,
+                                          <vscale x 4 x i32> %v1,
+                                          <vscale x 4 x i32> %v2,
+                                          <vscale x 4 x i1> %pred,
+                                          <vscale x 4 x i32>* %base)
+  ret void
+}
+
+define void @st3w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x i1> %pred, float* %addr, i64 %offset) {
+; CHECK-LABEL: st3w_f32:
+; CHECK: st3w { z0.s, z1.s, z2.s }, p0, [x0, x1, lsl #2]
+; CHECK-NEXT: ret
+  %1 = getelementptr float, float* %addr, i64 %offset
+  %base = bitcast float* %1 to <vscale x 4 x float>*
+  call void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float> %v0,
+                                          <vscale x 4 x float> %v1,
+                                          <vscale x 4 x float> %v2,
+                                          <vscale x 4 x i1> %pred,
+                                          <vscale x 4 x float>* %base)
+  ret void
+}
+
+;
+; ST3D
+;
+
+define void @st3d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i1> %pred, i64* %addr, i64 %offset) {
+; CHECK-LABEL: st3d_i64:
+; CHECK: st3d { z0.d, z1.d, z2.d }, p0, [x0, x1, lsl #3]
+; CHECK-NEXT: ret
+  %1 = getelementptr i64, i64* %addr, i64 %offset
+  %base = bitcast i64* %1 to <vscale x 2 x i64>*
+  call void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64> %v0,
+                                          <vscale x 2 x i64> %v1,
+                                          <vscale x 2 x i64> %v2,
+                                          <vscale x 2 x i1> %pred,
+                                          <vscale x 2 x i64>* %base)
+  ret void
+}
+
+define void @st3d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x i1> %pred, double* %addr, i64 %offset) {
+; CHECK-LABEL: st3d_f64:
+; CHECK: st3d { z0.d, z1.d, z2.d }, p0, [x0, x1, lsl #3]
+; CHECK-NEXT: ret
+  %1 = getelementptr double, double* %addr, i64 %offset
+  %base = bitcast double* %1 to <vscale x 2 x double>*
+  call void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double> %v0,
+                                          <vscale x 2 x double> %v1,
+                                          <vscale x 2 x double> %v2,
+                                          <vscale x 2 x i1> %pred,
+                                          <vscale x 2 x double>* %base)
+  ret void
+}
+
+;
+; ST4B
+;
+
+define void @st4b_i8(<vscale x 16 x i8> %v0, <vscale x 16 x i8> %v1, <vscale x 16 x i8> %v2, <vscale x 16 x i8> %v3, <vscale x 16 x i1> %pred, i8* %addr, i64 %offset) {
+; CHECK-LABEL: st4b_i8:
+; CHECK: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x1]
+; CHECK-NEXT: ret
+  %1 = getelementptr i8, i8* %addr, i64 %offset
+  %base = bitcast i8* %1 to <vscale x 16 x i8>*
+  call void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8> %v0,
+                                          <vscale x 16 x i8> %v1,
+                                          <vscale x 16 x i8> %v2,
+                                          <vscale x 16 x i8> %v3,
+                                          <vscale x 16 x i1> %pred,
+                                          <vscale x 16 x i8>* %base)
+  ret void
+}
+
+;
+; ST4H
+;
+
+define void @st4h_i16(<vscale x 8 x i16> %v0, <vscale x 8 x i16> %v1, <vscale x 8 x i16> %v2, <vscale x 8 x i16> %v3, <vscale x 8 x i1> %pred, i16* %addr, i64 %offset) {
+; CHECK-LABEL: st4h_i16:
+; CHECK: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, x1, lsl #1]
+; CHECK-NEXT: ret
+  %1 = getelementptr i16, i16* %addr, i64 %offset
+  %base = bitcast i16* %1 to <vscale x 8 x i16>*
+  call void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16> %v0,
+                                          <vscale x 8 x i16> %v1,
+                                          <vscale x 8 x i16> %v2,
+                                          <vscale x 8 x i16> %v3,
+                                          <vscale x 8 x i1> %pred,
+                                          <vscale x 8 x i16>* %base)
+  ret void
+}
+
+define void @st4h_f16(<vscale x 8 x half> %v0, <vscale x 8 x half> %v1, <vscale x 8 x half> %v2, <vscale x 8 x half> %v3, <vscale x 8 x i1> %pred, half* %addr, i64 %offset) {
+; CHECK-LABEL: st4h_f16:
+; CHECK: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, x1, lsl #1]
+; CHECK-NEXT: ret
+  %1 = getelementptr half, half* %addr, i64 %offset
+  %base = bitcast half* %1 to <vscale x 8 x half>*
+  call void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half> %v0,
+                                          <vscale x 8 x half> %v1,
+                                          <vscale x 8 x half> %v2,
+                                          <vscale x 8 x half> %v3,
+                                          <vscale x 8 x i1> %pred,
+                                          <vscale x 8 x half>* %base)
+  ret void
+}
+
+;
+; ST4W
+;
+
+define void @st4w_i32(<vscale x 4 x i32> %v0, <vscale x 4 x i32> %v1, <vscale x 4 x i32> %v2, <vscale x 4 x i32> %v3, <vscale x 4 x i1> %pred, i32* %addr, i64 %offset) {
+; CHECK-LABEL: st4w_i32:
+; CHECK: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, x1, lsl #2]
+; CHECK-NEXT: ret
+  %1 = getelementptr i32, i32* %addr, i64 %offset
+  %base = bitcast i32* %1 to <vscale x 4 x i32>*
+  call void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32> %v0,
+                                          <vscale x 4 x i32> %v1,
+                                          <vscale x 4 x i32> %v2,
+                                          <vscale x 4 x i32> %v3,
+                                          <vscale x 4 x i1> %pred,
+                                          <vscale x 4 x i32>* %base)
+  ret void
+}
+
+define void @st4w_f32(<vscale x 4 x float> %v0, <vscale x 4 x float> %v1, <vscale x 4 x float> %v2, <vscale x 4 x float> %v3, <vscale x 4 x i1> %pred, float* %addr, i64 %offset) {
+; CHECK-LABEL: st4w_f32:
+; CHECK: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, x1, lsl #2]
+; CHECK-NEXT: ret
+  %1 = getelementptr float, float* %addr, i64 %offset
+  %base = bitcast float* %1 to <vscale x 4 x float>*
+  call void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float> %v0,
+                                          <vscale x 4 x float> %v1,
+                                          <vscale x 4 x float> %v2,
+                                          <vscale x 4 x float> %v3,
+                                          <vscale x 4 x i1> %pred,
+                                          <vscale x 4 x float>* %base)
+  ret void
+}
+
+;
+; ST4D
+;
+
+define void @st4d_i64(<vscale x 2 x i64> %v0, <vscale x 2 x i64> %v1, <vscale x 2 x i64> %v2, <vscale x 2 x i64> %v3, <vscale x 2 x i1> %pred, i64* %addr, i64 %offset) {
+; CHECK-LABEL: st4d_i64:
+; CHECK: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, x1, lsl #3]
+; CHECK-NEXT: ret
+  %1 = getelementptr i64, i64* %addr, i64 %offset
+  %base = bitcast i64* %1 to <vscale x 2 x i64>*
+  call void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64> %v0,
+                                          <vscale x 2 x i64> %v1,
+                                          <vscale x 2 x i64> %v2,
+                                          <vscale x 2 x i64> %v3,
+                                          <vscale x 2 x i1> %pred,
+                                          <vscale x 2 x i64>* %base)
+  ret void
+}
+
+define void @st4d_f64(<vscale x 2 x double> %v0, <vscale x 2 x double> %v1, <vscale x 2 x double> %v2, <vscale x 2 x double> %v3, <vscale x 2 x i1> %pred, double* %addr, i64 %offset) {
+; CHECK-LABEL: st4d_f64:
+; CHECK: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, x1, lsl #3]
+; CHECK-NEXT: ret
+  %1 = getelementptr double, double* %addr, i64 %offset
+  %base = bitcast double* %1 to <vscale x 2 x double>*
+  call void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double> %v0,
+                                          <vscale x 2 x double> %v1,
+                                          <vscale x 2 x double> %v2,
+                                          <vscale x 2 x double> %v3,
+                                          <vscale x 2 x i1> %pred,
+                                          <vscale x 2 x double>* %base)
+  ret void
+}
+
+declare void @llvm.aarch64.sve.st2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>*)
+declare void @llvm.aarch64.sve.st2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>*)
+declare void @llvm.aarch64.sve.st2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>*)
+declare void @llvm.aarch64.sve.st2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>*)
+declare void @llvm.aarch64.sve.st2.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>*)
+declare void @llvm.aarch64.sve.st2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>*)
+declare void @llvm.aarch64.sve.st2.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x double>*)
+
+declare void @llvm.aarch64.sve.st3.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>*)
+declare void @llvm.aarch64.sve.st3.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>*)
+declare void @llvm.aarch64.sve.st3.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>*)
+declare void @llvm.aarch64.sve.st3.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>*)
+declare void @llvm.aarch64.sve.st3.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>*)
+declare void @llvm.aarch64.sve.st3.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>*)
+declare void @llvm.aarch64.sve.st3.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x double>*)
+
+declare void @llvm.aarch64.sve.st4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i1>, <vscale x 16 x i8>*)
+declare void @llvm.aarch64.sve.st4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i1>, <vscale x 8 x i16>*)
+declare void @llvm.aarch64.sve.st4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i1>, <vscale x 4 x i32>*)
+declare void @llvm.aarch64.sve.st4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>*)
+declare void @llvm.aarch64.sve.st4.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x i1>, <vscale x 8 x half>*)
+declare void @llvm.aarch64.sve.st4.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 4 x float>*)
+declare void @llvm.aarch64.sve.st4.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x i1>, <vscale x 2 x double>*)

From 4e7e414ec943b264e5104c797b2f4a7af5151f71 Mon Sep 17 00:00:00 2001
From: Bjorn Pettersson <bjorn.a.pettersson@ericsson.com>
Date: Fri, 10 Apr 2020 19:24:11 +0200
Subject: [PATCH 208/216] [Float2Int] Make iteration over Roots deterministic

Summary:
Use a SmallSetVector instead of a SmallPtrSet when collecting
and storing Roots.

The iteration order for a SmallPtrSet is not deterministic,
so in the past the order of items inserted in the WorkList
inside walkBackwards has been non-deterministic. This patch
intends to make the order of rewrites done in Float2Int
deterministic by changing the container for the Roots set.

The semantics result of the transformation should not be
any different afaict. But at least naming of IR variables
(when outputting the result as an ll file) should be more
stable now.

Reviewers: craig.topper, spatel, cameron.mcinally

Reviewed By: spatel

Subscribers: mgrang, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D74534
---
 llvm/include/llvm/Transforms/Scalar/Float2Int.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/include/llvm/Transforms/Scalar/Float2Int.h b/llvm/include/llvm/Transforms/Scalar/Float2Int.h
index d7f36456fc2b..5fb47af6f795 100644
--- a/llvm/include/llvm/Transforms/Scalar/Float2Int.h
+++ b/llvm/include/llvm/Transforms/Scalar/Float2Int.h
@@ -16,6 +16,7 @@
 
 #include "llvm/ADT/EquivalenceClasses.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SetVector.h"
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
@@ -42,7 +43,7 @@ class Float2IntPass : public PassInfoMixin<Float2IntPass> {
   void cleanup();
 
   MapVector<Instruction *, ConstantRange> SeenInsts;
-  SmallPtrSet<Instruction *, 8> Roots;
+  SmallSetVector<Instruction *, 8> Roots;
   EquivalenceClasses<Instruction *> ECs;
   MapVector<Instruction *, Value *> ConvertedInsts;
   LLVMContext *Ctx;

From 8e20516540444618ad32dd11e835c05804053697 Mon Sep 17 00:00:00 2001
From: Raul Tambre <raul@tambre.ee>
Date: Fri, 17 Apr 2020 12:22:04 -0700
Subject: [PATCH 209/216] [CUDA] Define __CUDACC__ before standard library
 headers

libstdc++ since version 7 when GNU extensions are enabled (e.g. -std=gnu++11)
use it to avoid defining overloads using `__float128`.  This fixes compiling
with GNU extensions failing due to `__float128` being used.

Discovered at https://gitlab.kitware.com/cmake/cmake/-/merge_requests/4442#note_737136.

Differential Revision: https://reviews.llvm.org/D78392
---
 clang/lib/Headers/__clang_cuda_runtime_wrapper.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/clang/lib/Headers/__clang_cuda_runtime_wrapper.h b/clang/lib/Headers/__clang_cuda_runtime_wrapper.h
index 63404c9bdeb5..f43ed55de489 100644
--- a/clang/lib/Headers/__clang_cuda_runtime_wrapper.h
+++ b/clang/lib/Headers/__clang_cuda_runtime_wrapper.h
@@ -31,11 +31,17 @@
 // Include some forward declares that must come before cmath.
 #include <__clang_cuda_math_forward_declares.h>
 
+// Define __CUDACC__ early as libstdc++ standard headers with GNU extensions
+// enabled depend on it to avoid using __float128, which is unsupported in
+// CUDA.
+#define __CUDACC__
+
 // Include some standard headers to avoid CUDA headers including them
 // while some required macros (like __THROW) are in a weird state.
 #include <cmath>
 #include <cstdlib>
 #include <stdlib.h>
+#undef __CUDACC__
 
 // Preserve common macros that will be changed below by us or by CUDA
 // headers.

From 10b60dde767011c250649dc6a305379034c5b5c5 Mon Sep 17 00:00:00 2001
From: Lei Huang <lei@ca.ibm.com>
Date: Fri, 17 Apr 2020 15:19:46 -0500
Subject: [PATCH 210/216] [PowerPC] Refactor ppcUserFeaturesCheck()

Summary: This function keeps growing, refactor to use lambda.

Reviewers: nemanjai, stefanp

Subscribers: kbarton, shchenz, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D78308
---
 clang/lib/Basic/Targets/PPC.cpp             | 40 +++++++++------------
 clang/test/Driver/ppc-dependent-options.cpp |  6 ++++
 2 files changed, 22 insertions(+), 24 deletions(-)

diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp
index 1877d4a5ef70..81c13a8104e8 100644
--- a/clang/lib/Basic/Targets/PPC.cpp
+++ b/clang/lib/Basic/Targets/PPC.cpp
@@ -228,33 +228,25 @@ void PPCTargetInfo::getTargetDefines(const LangOptions &Opts,
 static bool ppcUserFeaturesCheck(DiagnosticsEngine &Diags,
                                  const std::vector<std::string> &FeaturesVec) {
 
-  if (llvm::find(FeaturesVec, "-vsx") != FeaturesVec.end()) {
-    if (llvm::find(FeaturesVec, "+power8-vector") != FeaturesVec.end()) {
-      Diags.Report(diag::err_opt_not_valid_with_opt) << "-mpower8-vector"
-                                                     << "-mno-vsx";
-      return false;
-    }
-
-    if (llvm::find(FeaturesVec, "+direct-move") != FeaturesVec.end()) {
-      Diags.Report(diag::err_opt_not_valid_with_opt) << "-mdirect-move"
-                                                     << "-mno-vsx";
-      return false;
-    }
-
-    if (llvm::find(FeaturesVec, "+float128") != FeaturesVec.end()) {
-      Diags.Report(diag::err_opt_not_valid_with_opt) << "-mfloat128"
-                                                     << "-mno-vsx";
-      return false;
+  // vsx was not explicitly turned off.
+  if (llvm::find(FeaturesVec, "-vsx") == FeaturesVec.end())
+    return true;
+
+  auto FindVSXSubfeature = [&](StringRef Feature, StringRef Option) {
+    if (llvm::find(FeaturesVec, Feature) != FeaturesVec.end()) {
+      Diags.Report(diag::err_opt_not_valid_with_opt) << Option << "-mno-vsx";
+      return true;
     }
+    return false;
+  };
 
-    if (llvm::find(FeaturesVec, "+power9-vector") != FeaturesVec.end()) {
-      Diags.Report(diag::err_opt_not_valid_with_opt) << "-mpower9-vector"
-                                                     << "-mno-vsx";
-      return false;
-    }
-  }
+  bool Found = FindVSXSubfeature("+power8-vector", "-mpower8-vector");
+  Found |= FindVSXSubfeature("+direct-move", "-mdirect-move");
+  Found |= FindVSXSubfeature("+float128", "-mfloat128");
+  Found |= FindVSXSubfeature("+power9-vector", "-mpower9-vector");
 
-  return true;
+  // Return false if any vsx subfeatures was found.
+  return !Found;
 }
 
 bool PPCTargetInfo::initFeatureMap(
diff --git a/clang/test/Driver/ppc-dependent-options.cpp b/clang/test/Driver/ppc-dependent-options.cpp
index 9fb812dc218f..c089422c68b9 100644
--- a/clang/test/Driver/ppc-dependent-options.cpp
+++ b/clang/test/Driver/ppc-dependent-options.cpp
@@ -54,6 +54,10 @@
 // RUN: -mcpu=power9 -std=c++11 -mno-vsx -mfloat128 %s 2>&1 | \
 // RUN: FileCheck %s -check-prefix=CHECK-NVSX-FLT128
 
+// RUN: not %clang -target powerpc64le-unknown-unknown -fsyntax-only \
+// RUN: -mcpu=power9 -std=c++11 -mno-vsx -mfloat128 -mpower9-vector %s 2>&1 | \
+// RUN: FileCheck %s -check-prefix=CHECK-NVSX-MULTI
+
 #ifdef __VSX__
 static_assert(false, "VSX enabled");
 #endif
@@ -78,5 +82,7 @@ static_assert(false, "Neither enabled");
 // CHECK-NVSX-P9V: error: option '-mpower9-vector' cannot be specified with '-mno-vsx'
 // CHECK-NVSX-FLT128: error: option '-mfloat128' cannot be specified with '-mno-vsx'
 // CHECK-NVSX-DMV: error: option '-mdirect-move' cannot be specified with '-mno-vsx'
+// CHECK-NVSX-MULTI: error: option '-mfloat128' cannot be specified with '-mno-vsx'
+// CHECK-NVSX-MULTI: error: option '-mpower9-vector' cannot be specified with '-mno-vsx'
 // CHECK-NVSX: Neither enabled
 // CHECK-VSX: VSX enabled

From 5f69e53e5592141bf81f41fdf92106bf5cc3821c Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Fri, 17 Apr 2020 13:37:47 -0700
Subject: [PATCH 211/216] [X86] Remove single incoming value phis from tests
 for the loop SAD pattern. NFC

InstCombine should ensure these don't exist.

I'm looking at making some changes to how we detect these
patterns and not having to worry about these phis will help.
---
 .../CodeGen/X86/min-legal-vector-width.ll     | 10 ++++------
 llvm/test/CodeGen/X86/sad.ll                  | 20 ++++++++-----------
 2 files changed, 12 insertions(+), 18 deletions(-)

diff --git a/llvm/test/CodeGen/X86/min-legal-vector-width.ll b/llvm/test/CodeGen/X86/min-legal-vector-width.ll
index 81de0ee4ebaa..443d57a1ad54 100644
--- a/llvm/test/CodeGen/X86/min-legal-vector-width.ll
+++ b/llvm/test/CodeGen/X86/min-legal-vector-width.ll
@@ -358,9 +358,8 @@ vector.body:
   br i1 %11, label %middle.block, label %vector.body
 
 middle.block:
-  %.lcssa = phi <16 x i32> [ %10, %vector.body ]
-  %rdx.shuf = shufflevector <16 x i32> %.lcssa, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %bin.rdx = add <16 x i32> %.lcssa, %rdx.shuf
+  %rdx.shuf = shufflevector <16 x i32> %10, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx = add <16 x i32> %10, %rdx.shuf
   %rdx.shuf2 = shufflevector <16 x i32> %bin.rdx, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   %bin.rdx2 = add <16 x i32> %bin.rdx, %rdx.shuf2
   %rdx.shuf3 = shufflevector <16 x i32> %bin.rdx2, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -420,9 +419,8 @@ vector.body:
   br i1 %11, label %middle.block, label %vector.body
 
 middle.block:
-  %.lcssa = phi <16 x i32> [ %10, %vector.body ]
-  %rdx.shuf = shufflevector <16 x i32> %.lcssa, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %bin.rdx = add <16 x i32> %.lcssa, %rdx.shuf
+  %rdx.shuf = shufflevector <16 x i32> %10, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx = add <16 x i32> %10, %rdx.shuf
   %rdx.shuf2 = shufflevector <16 x i32> %bin.rdx, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   %bin.rdx2 = add <16 x i32> %bin.rdx, %rdx.shuf2
   %rdx.shuf3 = shufflevector <16 x i32> %bin.rdx2, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
diff --git a/llvm/test/CodeGen/X86/sad.ll b/llvm/test/CodeGen/X86/sad.ll
index 6a742068f701..011f1db9d2c3 100644
--- a/llvm/test/CodeGen/X86/sad.ll
+++ b/llvm/test/CodeGen/X86/sad.ll
@@ -135,9 +135,8 @@ vector.body:
   br i1 %11, label %middle.block, label %vector.body
 
 middle.block:
-  %.lcssa = phi <16 x i32> [ %10, %vector.body ]
-  %rdx.shuf = shufflevector <16 x i32> %.lcssa, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %bin.rdx = add <16 x i32> %.lcssa, %rdx.shuf
+  %rdx.shuf = shufflevector <16 x i32> %10, <16 x i32> undef, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx = add <16 x i32> %10, %rdx.shuf
   %rdx.shuf2 = shufflevector <16 x i32> %bin.rdx, <16 x i32> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   %bin.rdx2 = add <16 x i32> %bin.rdx, %rdx.shuf2
   %rdx.shuf3 = shufflevector <16 x i32> %bin.rdx2, <16 x i32> undef, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -293,9 +292,8 @@ vector.body:
   br i1 %11, label %middle.block, label %vector.body
 
 middle.block:
-  %.lcssa = phi <32 x i32> [ %10, %vector.body ]
-  %rdx.shuf = shufflevector <32 x i32> %.lcssa, <32 x i32> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %bin.rdx = add <32 x i32> %.lcssa, %rdx.shuf
+  %rdx.shuf = shufflevector <32 x i32> %10, <32 x i32> undef, <32 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx = add <32 x i32> %10, %rdx.shuf
   %rdx.shuf2 = shufflevector <32 x i32> %bin.rdx, <32 x i32> undef, <32 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   %bin.rdx2 = add <32 x i32> %bin.rdx, %rdx.shuf2
   %rdx.shuf3 = shufflevector <32 x i32> %bin.rdx2, <32 x i32> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -525,9 +523,8 @@ vector.body:
   br i1 %11, label %middle.block, label %vector.body
 
 middle.block:
-  %.lcssa = phi <64 x i32> [ %10, %vector.body ]
-  %rdx.shuf = shufflevector <64 x i32> %.lcssa, <64 x i32> undef, <64 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-  %bin.rdx = add <64 x i32> %.lcssa, %rdx.shuf
+  %rdx.shuf = shufflevector <64 x i32> %10, <64 x i32> undef, <64 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %bin.rdx = add <64 x i32> %10, %rdx.shuf
   %rdx.shuf2 = shufflevector <64 x i32> %bin.rdx, <64 x i32> undef, <64 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   %bin.rdx2 = add <64 x i32> %bin.rdx, %rdx.shuf2
   %rdx.shuf3 = shufflevector <64 x i32> %bin.rdx2, <64 x i32> undef, <64 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
@@ -611,9 +608,8 @@ vector.body:
   br i1 %11, label %middle.block, label %vector.body
 
 middle.block:
-  %.lcssa = phi <2 x i32> [ %10, %vector.body ]
-  %rdx.shuf = shufflevector <2 x i32> %.lcssa, <2 x i32> undef, <2 x i32> <i32 1, i32 undef>
-  %bin.rdx = add <2 x i32> %.lcssa, %rdx.shuf
+  %rdx.shuf = shufflevector <2 x i32> %10, <2 x i32> undef, <2 x i32> <i32 1, i32 undef>
+  %bin.rdx = add <2 x i32> %10, %rdx.shuf
   %12 = extractelement <2 x i32> %bin.rdx, i32 0
   ret i32 %12
 }

From 7f7f98b154484a90d4f2850a19dfc0a6d03ef70c Mon Sep 17 00:00:00 2001
From: Daniel Sanders <daniel_l_sanders@apple.com>
Date: Fri, 17 Apr 2020 11:25:51 -0700
Subject: [PATCH 212/216] [globalisel][legalizer] Fix
 --verify-legalizer-debug-locs values

It was using the enum class name, like so:
    =DebugLocVerifyLevel::None                                         -   No verification
Changed it to:
    =none                                                              -   No verification
---
 llvm/lib/CodeGen/GlobalISel/Legalizer.cpp | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
index 4b6d24ba73e8..823dceeae288 100644
--- a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -52,11 +52,13 @@ enum class DebugLocVerifyLevel {
 static cl::opt<DebugLocVerifyLevel> VerifyDebugLocs(
     "verify-legalizer-debug-locs",
     cl::desc("Verify that debug locations are handled"),
-    cl::values(clEnumVal(DebugLocVerifyLevel::None, "No verification"),
-               clEnumVal(DebugLocVerifyLevel::Legalizations,
-                         "Verify legalizations"),
-               clEnumVal(DebugLocVerifyLevel::LegalizationsAndArtifactCombiners,
-                         "Verify legalizations and artifact combines")),
+    cl::values(
+        clEnumValN(DebugLocVerifyLevel::None, "none", "No verification"),
+        clEnumValN(DebugLocVerifyLevel::Legalizations, "legalizations",
+                   "Verify legalizations"),
+        clEnumValN(DebugLocVerifyLevel::LegalizationsAndArtifactCombiners,
+                   "legalizations+artifactcombiners",
+                   "Verify legalizations and artifact combines")),
     cl::init(DebugLocVerifyLevel::Legalizations));
 #else
 // Always disable it for release builds by preventing the observer from being

From 5ef64bbf7ac47028d80f0ee71b7636e80b6b419b Mon Sep 17 00:00:00 2001
From: Daniel Sanders <daniel_l_sanders@apple.com>
Date: Fri, 17 Apr 2020 11:39:54 -0700
Subject: [PATCH 213/216] [globalisel][legalizer] Include newly-dead code in
 artifact combine checks for DebugLoc loss

This dead code deletion is part of the combine and the combine
results should account for their locations.
---
 llvm/lib/CodeGen/GlobalISel/Legalizer.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
index 823dceeae288..e00d4fb7428a 100644
--- a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -283,15 +283,14 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
       if (ArtCombiner.tryCombineInstruction(MI, DeadInstructions,
                                             WrapperObserver)) {
         WorkListObserver.printNewInstrs();
-        LocObserver.checkpoint(
-            VerifyDebugLocs ==
-            DebugLocVerifyLevel::LegalizationsAndArtifactCombiners);
         for (auto *DeadMI : DeadInstructions) {
           LLVM_DEBUG(dbgs() << *DeadMI << "Is dead\n");
           RemoveDeadInstFromLists(DeadMI);
           DeadMI->eraseFromParentAndMarkDBGValuesForRemoval();
         }
-        LocObserver.checkpoint();
+        LocObserver.checkpoint(
+            VerifyDebugLocs ==
+            DebugLocVerifyLevel::LegalizationsAndArtifactCombiners);
         Changed = true;
         continue;
       }

From 701af684f65ddb91b7eef54c0a0d2e93cc2ea4cf Mon Sep 17 00:00:00 2001
From: Daniel Sanders <daniel_l_sanders@apple.com>
Date: Fri, 17 Apr 2020 11:42:53 -0700
Subject: [PATCH 214/216] [globalisel][legalizer] Expect to lose DebugLocs in
 dead code

There's not really anything else that can be done with them.
Fortunately, this dead code cleanup doesn't seem to trigger
very often.
---
 llvm/lib/CodeGen/GlobalISel/Legalizer.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
index e00d4fb7428a..0a2d71c275d5 100644
--- a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp
@@ -225,7 +225,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
       if (isTriviallyDead(MI, MRI)) {
         LLVM_DEBUG(dbgs() << MI << "Is dead; erasing.\n");
         MI.eraseFromParentAndMarkDBGValuesForRemoval();
-        LocObserver.checkpoint();
+        LocObserver.checkpoint(false);
         continue;
       }
 
@@ -275,7 +275,7 @@ Legalizer::legalizeMachineFunction(MachineFunction &MF, const LegalizerInfo &LI,
         LLVM_DEBUG(dbgs() << MI << "Is dead\n");
         RemoveDeadInstFromLists(&MI);
         MI.eraseFromParentAndMarkDBGValuesForRemoval();
-        LocObserver.checkpoint();
+        LocObserver.checkpoint(false);
         continue;
       }
       SmallVector<MachineInstr *, 4> DeadInstructions;

From f8452ddfcc3336e42544a35481507f0b3bae423e Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne@apple.com>
Date: Fri, 17 Apr 2020 16:43:35 -0400
Subject: [PATCH 215/216] [libc++] Use proper shell escaping in the executors

---
 libcxx/utils/run.py | 6 ++++--
 libcxx/utils/ssh.py | 4 +++-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/libcxx/utils/run.py b/libcxx/utils/run.py
index 7cdf65264ec0..e9f9859807b3 100644
--- a/libcxx/utils/run.py
+++ b/libcxx/utils/run.py
@@ -14,6 +14,7 @@
 
 import argparse
 import os
+import pipes
 import shutil
 import subprocess
 import sys
@@ -57,8 +58,9 @@ def main():
             else:
                 shutil.copy2(dep, args.execdir)
 
-        # Run the executable with the given environment in the execution directory.
-        return subprocess.call(' '.join(remaining), cwd=args.execdir, env=env, shell=True)
+        # Run the command line with the given environment in the execution directory.
+        commandLine = (pipes.quote(x) for x in remaining)
+        return subprocess.call(' '.join(commandLine), cwd=args.execdir, env=env, shell=True)
     finally:
         shutil.rmtree(args.execdir)
 
diff --git a/libcxx/utils/ssh.py b/libcxx/utils/ssh.py
index c7d8c97a1407..f9bcabe3c321 100644
--- a/libcxx/utils/ssh.py
+++ b/libcxx/utils/ssh.py
@@ -15,6 +15,7 @@
 
 import argparse
 import os
+import pipes
 import posixpath
 import subprocess
 import sys
@@ -97,10 +98,11 @@ def main():
         # host by transforming the path of test-executables to their path in the
         # temporary directory, where we know they have been copied when we handled
         # test dependencies above.
+        commandLine = (pathOnRemote(x) if isTestExe(x) else x for x in commandLine)
         remoteCommands += [
             'cd {}'.format(tmp),
             'export {}'.format(' '.join(args.env)),
-            ' '.join(pathOnRemote(x) if isTestExe(x) else x for x in commandLine)
+            ' '.join(pipes.quote(x) for x in commandLine)
         ]
 
         # Finally, SSH to the remote host and execute all the commands.

From c858debebc1308e748de882c745e179b1a398fa0 Mon Sep 17 00:00:00 2001
From: Christopher Tetreault <ctetreau@quicinc.com>
Date: Fri, 17 Apr 2020 13:29:38 -0700
Subject: [PATCH 216/216] Remove asserting getters from base Type

Summary:
Remove asserting vector getters from Type in preparation for the
VectorType refactor. The existence of these functions complicates the
refactor while adding little value.

Reviewers: dexonsmith, sdesmalen, efriedma

Reviewed By: efriedma

Subscribers: cfe-commits, hiraditya, llvm-commits

Tags: #llvm, #clang

Differential Revision: https://reviews.llvm.org/D77278
---
 clang/lib/CodeGen/CGBuiltin.cpp               |  3 +--
 llvm/include/llvm/IR/DerivedTypes.h           | 22 +++++++------------
 llvm/include/llvm/IR/Type.h                   | 18 +++------------
 llvm/lib/CodeGen/CodeGenPrepare.cpp           |  2 +-
 .../SelectionDAG/SelectionDAGBuilder.cpp      |  2 +-
 llvm/unittests/IR/VPIntrinsicTest.cpp         |  2 +-
 6 files changed, 15 insertions(+), 34 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index f4832ef4afb2..8ee69740f15c 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -7573,8 +7573,7 @@ Value *CodeGenFunction::EmitSVEMaskedStore(const CallExpr *E,
   // The vector type that is stored may be different from the
   // eventual type stored to memory.
   auto VectorTy = cast<llvm::VectorType>(Ops.back()->getType());
-  auto MemoryTy =
-      llvm::VectorType::get(MemEltTy, VectorTy->getVectorElementCount());
+  auto MemoryTy = llvm::VectorType::get(MemEltTy, VectorTy->getElementCount());
 
   Value *Predicate = EmitSVEPredicateCast(Ops[0], MemoryTy);
   Value *BasePtr = Builder.CreateBitCast(Ops[1], MemoryTy->getPointerTo());
diff --git a/llvm/include/llvm/IR/DerivedTypes.h b/llvm/include/llvm/IR/DerivedTypes.h
index 92017448fe0d..186430754303 100644
--- a/llvm/include/llvm/IR/DerivedTypes.h
+++ b/llvm/include/llvm/IR/DerivedTypes.h
@@ -531,18 +531,6 @@ class VectorType : public Type {
   }
 };
 
-unsigned Type::getVectorNumElements() const {
-  return cast<VectorType>(this)->getNumElements();
-}
-
-bool Type::getVectorIsScalable() const {
-  return cast<VectorType>(this)->isScalable();
-}
-
-ElementCount Type::getVectorElementCount() const {
-  return cast<VectorType>(this)->getElementCount();
-}
-
 bool Type::isVectorTy() const { return isa<VectorType>(this); }
 
 /// Class to represent pointers.
@@ -597,8 +585,8 @@ Type *Type::getWithNewBitWidth(unsigned NewBitWidth) const {
       isIntOrIntVectorTy() &&
       "Original type expected to be a vector of integers or a scalar integer.");
   Type *NewType = getIntNTy(getContext(), NewBitWidth);
-  if (isVectorTy())
-    NewType = VectorType::get(NewType, getVectorElementCount());
+  if (auto *VTy = dyn_cast<VectorType>(this))
+    NewType = VectorType::get(NewType, VTy->getElementCount());
   return NewType;
 }
 
@@ -606,6 +594,12 @@ unsigned Type::getPointerAddressSpace() const {
   return cast<PointerType>(getScalarType())->getAddressSpace();
 }
 
+Type *Type::getScalarType() const {
+  if (isVectorTy())
+    return cast<VectorType>(this)->getElementType();
+  return const_cast<Type *>(this);
+}
+
 } // end namespace llvm
 
 #endif // LLVM_IR_DERIVEDTYPES_H
diff --git a/llvm/include/llvm/IR/Type.h b/llvm/include/llvm/IR/Type.h
index 79d6964e3b3e..67be3ef480b7 100644
--- a/llvm/include/llvm/IR/Type.h
+++ b/llvm/include/llvm/IR/Type.h
@@ -300,11 +300,7 @@ class Type {
 
   /// If this is a vector type, return the element type, otherwise return
   /// 'this'.
-  Type *getScalarType() const {
-    if (isVectorTy())
-      return getVectorElementType();
-    return const_cast<Type*>(this);
-  }
+  inline Type *getScalarType() const;
 
   //===--------------------------------------------------------------------===//
   // Type Iteration support.
@@ -339,8 +335,8 @@ class Type {
 
   //===--------------------------------------------------------------------===//
   // Helper methods corresponding to subclass methods.  This forces a cast to
-  // the specified subclass and calls its accessor.  "getVectorNumElements" (for
-  // example) is shorthand for cast<VectorType>(Ty)->getNumElements().  This is
+  // the specified subclass and calls its accessor.  "getArrayNumElements" (for
+  // example) is shorthand for cast<ArrayType>(Ty)->getNumElements().  This is
   // only intended to cover the core methods that are frequently used, helper
   // methods should not be added here.
 
@@ -361,14 +357,6 @@ class Type {
     return ContainedTys[0];
   }
 
-  inline bool getVectorIsScalable() const;
-  inline unsigned getVectorNumElements() const;
-  inline ElementCount getVectorElementCount() const;
-  Type *getVectorElementType() const {
-    assert(getTypeID() == VectorTyID);
-    return ContainedTys[0];
-  }
-
   Type *getPointerElementType() const {
     assert(getTypeID() == PointerTyID);
     return ContainedTys[0];
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 5eb772d12abf..d6a216f9f12c 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -5262,7 +5262,7 @@ bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
   if (!RewriteGEP && Ops.size() == 2)
     return false;
 
-  unsigned NumElts = Ptr->getType()->getVectorNumElements();
+  unsigned NumElts = cast<VectorType>(Ptr->getType())->getNumElements();
 
   IRBuilder<> Builder(MemoryInst);
 
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index f8c7f784bf11..a05b375d5279 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -4263,7 +4263,7 @@ static bool getUniformBase(const Value *Ptr, SDValue &Base, SDValue &Index,
 
     Base = SDB->getValue(C);
 
-    unsigned NumElts = Ptr->getType()->getVectorNumElements();
+    unsigned NumElts = cast<VectorType>(Ptr->getType())->getNumElements();
     EVT VT = EVT::getVectorVT(*DAG.getContext(), TLI.getPointerTy(DL), NumElts);
     Index = DAG.getConstant(0, SDB->getCurSDLoc(), VT);
     IndexType = ISD::SIGNED_SCALED;
diff --git a/llvm/unittests/IR/VPIntrinsicTest.cpp b/llvm/unittests/IR/VPIntrinsicTest.cpp
index 919bac4ef266..35a1f3e9b4d7 100644
--- a/llvm/unittests/IR/VPIntrinsicTest.cpp
+++ b/llvm/unittests/IR/VPIntrinsicTest.cpp
@@ -107,7 +107,7 @@ TEST_F(VPIntrinsicTest, GetParamPos) {
     if (MaskParamPos.hasValue()) {
       Type *MaskParamType = F.getArg(MaskParamPos.getValue())->getType();
       ASSERT_TRUE(MaskParamType->isVectorTy());
-      ASSERT_TRUE(MaskParamType->getVectorElementType()->isIntegerTy(1));
+      ASSERT_TRUE(cast<VectorType>(MaskParamType)->getElementType()->isIntegerTy(1));
     }
 
     Optional<int> VecLenParamPos =