From a851a30192f18a9e4ea4677b24f4c88568b39e4a Mon Sep 17 00:00:00 2001
From: Valentin Churavy <v.churavy@gmail.com>
Date: Tue, 21 Aug 2018 17:46:15 -0400
Subject: [PATCH 1/3] prep patch list for llvm 7.0.0

---
 deps/llvm.mk | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/deps/llvm.mk b/deps/llvm.mk
index e3c9f6c5823a1..58d01c1ce602f 100644
--- a/deps/llvm.mk
+++ b/deps/llvm.mk
@@ -415,8 +415,8 @@ $(eval $(call LLVM_PATCH,llvm-rL332694)) # remove for 7.0
 endif
 $(eval $(call LLVM_PATCH,llvm-rL327898)) # remove for 7.0
 $(eval $(call LLVM_PATCH,llvm-6.0-DISABLE_ABI_CHECKS))
-$(eval $(call LLVM_PATCH,llvm-OProfile-line-num))
-$(eval $(call LLVM_PATCH,llvm-D44892-Perf-integration))
+$(eval $(call LLVM_PATCH,llvm-OProfile-line-num)) # Remove for 7.0
+$(eval $(call LLVM_PATCH,llvm-D44892-Perf-integration)) # Remove for 7.0
 $(eval $(call LLVM_PATCH,llvm-D49832-SCEVPred)) # Remove for 7.0
 $(eval $(call LLVM_PATCH,llvm-rL323946-LSRTy)) # Remove for 7.0
 $(eval $(call LLVM_PATCH,llvm-D50010-VNCoercion-ni))
@@ -425,6 +425,15 @@ $(eval $(call LLVM_PATCH,llvm-rL326967-aligned-load)) # remove for 7.0
 ifeq ($(LLVM_VER_PATCH), 0)
 $(eval $(call LLVM_PATCH,llvm-windows-race))
 endif
+else ifeq ($(LLVM_VER_SHORT),7.0)
+$(eval $(call LLVM_PATCH,llvm-D27629-AArch64-large_model_6.0.1))
+$(eval $(call LLVM_PATCH,llvm-D34078-vectorize-fdiv))
+$(eval $(call LLVM_PATCH,llvm-6.0-NVPTX-addrspaces)) # NVPTX
+$(eval $(call LLVM_PATCH,llvm-6.0-D44650)) # mingw32 build fix
+$(eval $(call LLVM_PATCH,llvm-D46460))
+$(eval $(call LLVM_PATCH,llvm-6.0-DISABLE_ABI_CHECKS))
+$(eval $(call LLVM_PATCH,llvm-D50010-VNCoercion-ni))
+$(eval $(call LLVM_PATCH,llvm-D50167-scev-umin))
 endif # LLVM_VER
 
 # Independent to the llvm version add a JL prefix to the version map

From f0dc0a3fca4094a9a012fa64d03adaacd137e39e Mon Sep 17 00:00:00 2001
From: Valentin Churavy <v.churavy@gmail.com>
Date: Tue, 21 Aug 2018 17:46:53 -0400
Subject: [PATCH 2/3] bump FORCE_ELF hack to < 8.0.0

---
 src/codegen.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/codegen.cpp b/src/codegen.cpp
index 4d00a02d96a11..bd978ba1abe4e 100644
--- a/src/codegen.cpp
+++ b/src/codegen.cpp
@@ -3,7 +3,7 @@
 #include "llvm-version.h"
 #include "platform.h"
 #include "options.h"
-#if defined(_OS_WINDOWS_) && JL_LLVM_VERSION < 70000
+#if defined(_OS_WINDOWS_) && JL_LLVM_VERSION < 80000
 // trick llvm into skipping the generation of _chkstk calls
 //   since it has some codegen issues associated with them:
 //   (a) assumed to be within 32-bit offset

From 12d147356e11b8d136731aafdf10f5091bca6a25 Mon Sep 17 00:00:00 2001
From: Keno Fischer <keno@juliacomputing.com>
Date: Wed, 22 Aug 2018 14:16:31 -0400
Subject: [PATCH 3/3] Add 7.0 versions of scev patches

---
 deps/llvm.mk                                  |    4 +-
 .../llvm-7.0-D50010-VNCoercion-ni.patch       |   93 +
 deps/patches/llvm-7.0-D50167-scev-umin.patch  | 1861 +++++++++++++++++
 3 files changed, 1956 insertions(+), 2 deletions(-)
 create mode 100644 deps/patches/llvm-7.0-D50010-VNCoercion-ni.patch
 create mode 100644 deps/patches/llvm-7.0-D50167-scev-umin.patch

diff --git a/deps/llvm.mk b/deps/llvm.mk
index 58d01c1ce602f..35444c6dbba90 100644
--- a/deps/llvm.mk
+++ b/deps/llvm.mk
@@ -432,8 +432,8 @@ $(eval $(call LLVM_PATCH,llvm-6.0-NVPTX-addrspaces)) # NVPTX
 $(eval $(call LLVM_PATCH,llvm-6.0-D44650)) # mingw32 build fix
 $(eval $(call LLVM_PATCH,llvm-D46460))
 $(eval $(call LLVM_PATCH,llvm-6.0-DISABLE_ABI_CHECKS))
-$(eval $(call LLVM_PATCH,llvm-D50010-VNCoercion-ni))
-$(eval $(call LLVM_PATCH,llvm-D50167-scev-umin))
+$(eval $(call LLVM_PATCH,llvm-7.0-D50010-VNCoercion-ni))
+$(eval $(call LLVM_PATCH,llvm-7.0-D50167-scev-umin))
 endif # LLVM_VER
 
 # Independent to the llvm version add a JL prefix to the version map
diff --git a/deps/patches/llvm-7.0-D50010-VNCoercion-ni.patch b/deps/patches/llvm-7.0-D50010-VNCoercion-ni.patch
new file mode 100644
index 0000000000000..368eca1e1d3b9
--- /dev/null
+++ b/deps/patches/llvm-7.0-D50010-VNCoercion-ni.patch
@@ -0,0 +1,93 @@
+commit 847c686d09bd6171569f6997bdab12719ab6fe88
+Author: Keno Fischer <keno@juliacomputing.com>
+Date:   Wed Aug 22 14:03:29 2018 -0400
+
+    [VNCoercion] Disallow coercion between different ni addrspaces
+    
+    Summary:
+    I'm not sure if it would be legal by the IR reference to introduce
+    an addrspacecast here, since the IR reference is a bit vague on
+    the exact semantics, but at least for our usage of it (and I
+    suspect for many other's usage) it is not. For us, addrspacecasts
+    between non-integral address spaces carry frontend information that the
+    optimizer cannot deduce afterwards in a generic way (though we
+    have frontend specific passes in our pipline that do propagate
+    these). In any case, I'm sure nobody is using it this way at
+    the moment, since it would have introduced inttoptrs, which
+    are definitely illegal.
+    
+    Fixes PR38375
+    
+    Reviewers: sanjoy, reames, dberlin
+    
+    Subscribers: vchuravy, llvm-commits
+    
+    Differential Revision: https://reviews.llvm.org/D50010
+
+diff --git a/lib/Transforms/Utils/VNCoercion.cpp b/lib/Transforms/Utils/VNCoercion.cpp
+index 948d9bd5baa..fbd5b9bb3be 100644
+--- a/lib/Transforms/Utils/VNCoercion.cpp
++++ b/lib/Transforms/Utils/VNCoercion.cpp
+@@ -20,7 +20,8 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
+       StoredVal->getType()->isStructTy() || StoredVal->getType()->isArrayTy())
+     return false;
+ 
+-  uint64_t StoreSize = DL.getTypeSizeInBits(StoredVal->getType());
++  Type *StoredValTy = StoredVal->getType();
++  uint64_t StoreSize = DL.getTypeSizeInBits(StoredValTy);
+ 
+   // The store size must be byte-aligned to support future type casts.
+   if (llvm::alignTo(StoreSize, 8) != StoreSize)
+@@ -30,10 +31,15 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy,
+   if (StoreSize < DL.getTypeSizeInBits(LoadTy))
+     return false;
+ 
+-  // Don't coerce non-integral pointers to integers or vice versa.
+-  if (DL.isNonIntegralPointerType(StoredVal->getType()) !=
+-      DL.isNonIntegralPointerType(LoadTy))
++  bool StoredNI = DL.isNonIntegralPointerType(StoredValTy);
++  bool LoadNI = DL.isNonIntegralPointerType(LoadTy);
++  if (StoredNI != LoadNI) {
++    return false;
++  } else if (StoredNI && LoadNI &&
++             cast<PointerType>(StoredValTy)->getAddressSpace() !=
++                 cast<PointerType>(LoadTy)->getAddressSpace()) {
+     return false;
++  }
+ 
+   return true;
+ }
+diff --git a/test/Transforms/GVN/non-integral-pointers.ll b/test/Transforms/GVN/non-integral-pointers.ll
+index 9ae4132231d..5217fc1a06a 100644
+--- a/test/Transforms/GVN/non-integral-pointers.ll
++++ b/test/Transforms/GVN/non-integral-pointers.ll
+@@ -1,6 +1,6 @@
+ ; RUN: opt -gvn -S < %s | FileCheck %s
+ 
+-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4"
++target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4:5"
+ target triple = "x86_64-unknown-linux-gnu"
+ 
+ define void @f0(i1 %alwaysFalse, i64 %val, i64* %loc) {
+@@ -37,3 +37,21 @@ define i64 @f1(i1 %alwaysFalse, i8 addrspace(4)* %val, i8 addrspace(4)** %loc) {
+  alwaysTaken:
+   ret i64 42
+ }
++
++ define i8 addrspace(5)* @multini(i1 %alwaysFalse, i8 addrspace(4)* %val, i8 addrspace(4)** %loc) {
++ ; CHECK-LABEL: @multini(
++ ; CHECK-NOT: inttoptr
++ ; CHECK-NOT: ptrtoint
++ ; CHECK-NOT: addrspacecast
++  entry:
++   store i8 addrspace(4)* %val, i8 addrspace(4)** %loc
++   br i1 %alwaysFalse, label %neverTaken, label %alwaysTaken
++
++  neverTaken:
++   %loc.bc = bitcast i8 addrspace(4)** %loc to i8 addrspace(5)**
++   %differentas = load i8 addrspace(5)*, i8 addrspace(5)** %loc.bc
++   ret i8 addrspace(5)* %differentas
++
++  alwaysTaken:
++   ret i8 addrspace(5)* null
++ }
diff --git a/deps/patches/llvm-7.0-D50167-scev-umin.patch b/deps/patches/llvm-7.0-D50167-scev-umin.patch
new file mode 100644
index 0000000000000..81576a18b7633
--- /dev/null
+++ b/deps/patches/llvm-7.0-D50167-scev-umin.patch
@@ -0,0 +1,1861 @@
+commit d68d9140287ad41d11df3fe6038b79c2b8c96bbf
+Author: Keno Fischer <keno@juliacomputing.com>
+Date:   Sat Aug 11 05:44:38 2018 -0400
+
+    RFC: [SCEV] Add explicit representations of umin/smin
+    
+    Summary:
+    Currently we express umin as `~umax(~x, ~y)`. However, this becomes
+    a problem for operands in non-integral pointer spaces, because `~x`
+    is not something we can compute for `x` non-integral. However, since
+    comparisons are generally still allowed, we are actually able to
+    express `umin(x, y)` directly as long as we don't try to express is
+    as a umax. Support this by adding an explicit umin/smin representation
+    to SCEV. We do this by factoring the existing getUMax/getSMax functions
+    into a new function that does all four. The previous two functions
+    were largely identical, except that the SMax variant used `isKnownPredicate`
+    while the UMax variant used `isKnownViaNonRecursiveReasoning`.
+    
+    Trying to make the UMax variant also use `isKnownPredicate` yields to
+    an infinite recursion, while trying to make the `SMax` variant use
+    `isKnownViaNonRecursiveReasoning` causes
+    `Transforms/IndVarSimplify/backedge-on-min-max.ll` to fail.
+    
+    I would appreciate any insight into which predicate is correct here.
+    
+    Reviewers: reames, sanjoy, mkazantsev
+    
+    Subscribers: dmgreen, vchuravy, javed.absar, llvm-commits
+    
+    Differential Revision: https://reviews.llvm.org/D50167
+
+diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h
+index 89918e3c205..73e6cc36254 100644
+--- a/include/llvm/Analysis/ScalarEvolution.h
++++ b/include/llvm/Analysis/ScalarEvolution.h
+@@ -582,6 +582,8 @@ public:
+   /// \p IndexExprs The expressions for the indices.
+   const SCEV *getGEPExpr(GEPOperator *GEP,
+                          const SmallVectorImpl<const SCEV *> &IndexExprs);
++  const SCEV *getUSMinMaxExpr(unsigned Kind,
++                              SmallVectorImpl<const SCEV *> &Operands);
+   const SCEV *getSMaxExpr(const SCEV *LHS, const SCEV *RHS);
+   const SCEV *getSMaxExpr(SmallVectorImpl<const SCEV *> &Operands);
+   const SCEV *getUMaxExpr(const SCEV *LHS, const SCEV *RHS);
+diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h
+index 58d42680d6b..57d658b157d 100644
+--- a/include/llvm/Analysis/ScalarEvolutionExpander.h
++++ b/include/llvm/Analysis/ScalarEvolutionExpander.h
+@@ -368,6 +368,10 @@ namespace llvm {
+ 
+     Value *visitUMaxExpr(const SCEVUMaxExpr *S);
+ 
++    Value *visitSMinExpr(const SCEVSMinExpr *S);
++
++    Value *visitUMinExpr(const SCEVUMinExpr *S);
++
+     Value *visitUnknown(const SCEVUnknown *S) {
+       return S->getValue();
+     }
+diff --git a/include/llvm/Analysis/ScalarEvolutionExpressions.h b/include/llvm/Analysis/ScalarEvolutionExpressions.h
+index 42e76094eb2..a4fc607b0c3 100644
+--- a/include/llvm/Analysis/ScalarEvolutionExpressions.h
++++ b/include/llvm/Analysis/ScalarEvolutionExpressions.h
+@@ -36,33 +36,42 @@ class ConstantRange;
+ class Loop;
+ class Type;
+ 
+-  enum SCEVTypes {
+-    // These should be ordered in terms of increasing complexity to make the
+-    // folders simpler.
+-    scConstant, scTruncate, scZeroExtend, scSignExtend, scAddExpr, scMulExpr,
+-    scUDivExpr, scAddRecExpr, scUMaxExpr, scSMaxExpr,
+-    scUnknown, scCouldNotCompute
+-  };
+-
+-  /// This class represents a constant integer value.
+-  class SCEVConstant : public SCEV {
+-    friend class ScalarEvolution;
+-
+-    ConstantInt *V;
+-
+-    SCEVConstant(const FoldingSetNodeIDRef ID, ConstantInt *v) :
+-      SCEV(ID, scConstant), V(v) {}
+-
+-  public:
+-    ConstantInt *getValue() const { return V; }
+-    const APInt &getAPInt() const { return getValue()->getValue(); }
+-
+-    Type *getType() const { return V->getType(); }
+-
+-    /// Methods for support type inquiry through isa, cast, and dyn_cast:
+-    static bool classof(const SCEV *S) {
+-      return S->getSCEVType() == scConstant;
+-    }
++enum SCEVTypes {
++  // These should be ordered in terms of increasing complexity to make the
++  // folders simpler.
++  scConstant,
++  scTruncate,
++  scZeroExtend,
++  scSignExtend,
++  scAddExpr,
++  scMulExpr,
++  scUDivExpr,
++  scAddRecExpr,
++  scUMaxExpr,
++  scSMaxExpr,
++  scUMinExpr,
++  scSMinExpr,
++  scUnknown,
++  scCouldNotCompute
++};
++
++/// This class represents a constant integer value.
++class SCEVConstant : public SCEV {
++  friend class ScalarEvolution;
++
++  ConstantInt *V;
++
++  SCEVConstant(const FoldingSetNodeIDRef ID, ConstantInt *v)
++      : SCEV(ID, scConstant), V(v) {}
++
++public:
++  ConstantInt *getValue() const { return V; }
++  const APInt &getAPInt() const { return getValue()->getValue(); }
++
++  Type *getType() const { return V->getType(); }
++
++  /// Methods for support type inquiry through isa, cast, and dyn_cast:
++  static bool classof(const SCEV *S) { return S->getSCEVType() == scConstant; }
+   };
+ 
+   /// This is the base class for unary cast operator classes.
+@@ -183,10 +192,9 @@ class Type;
+ 
+     /// Methods for support type inquiry through isa, cast, and dyn_cast:
+     static bool classof(const SCEV *S) {
+-      return S->getSCEVType() == scAddExpr ||
+-             S->getSCEVType() == scMulExpr ||
+-             S->getSCEVType() == scSMaxExpr ||
+-             S->getSCEVType() == scUMaxExpr ||
++      return S->getSCEVType() == scAddExpr || S->getSCEVType() == scMulExpr ||
++             S->getSCEVType() == scSMaxExpr || S->getSCEVType() == scUMaxExpr ||
++             S->getSCEVType() == scSMinExpr || S->getSCEVType() == scUMinExpr ||
+              S->getSCEVType() == scAddRecExpr;
+     }
+   };
+@@ -201,10 +209,9 @@ class Type;
+   public:
+     /// Methods for support type inquiry through isa, cast, and dyn_cast:
+     static bool classof(const SCEV *S) {
+-      return S->getSCEVType() == scAddExpr ||
+-             S->getSCEVType() == scMulExpr ||
+-             S->getSCEVType() == scSMaxExpr ||
+-             S->getSCEVType() == scUMaxExpr;
++      return S->getSCEVType() == scAddExpr || S->getSCEVType() == scMulExpr ||
++             S->getSCEVType() == scSMaxExpr || S->getSCEVType() == scUMaxExpr ||
++             S->getSCEVType() == scSMinExpr || S->getSCEVType() == scUMinExpr;
+     }
+ 
+     /// Set flags for a non-recurrence without clearing previously set flags.
+@@ -394,6 +401,40 @@ class Type;
+     }
+   };
+ 
++  /// This class represents a signed minimum selection.
++  class SCEVSMinExpr : public SCEVCommutativeExpr {
++    friend class ScalarEvolution;
++
++    SCEVSMinExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N)
++        : SCEVCommutativeExpr(ID, scSMinExpr, O, N) {
++      // Min never overflows.
++      setNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW));
++    }
++
++  public:
++    /// Methods for support type inquiry through isa, cast, and dyn_cast:
++    static bool classof(const SCEV *S) {
++      return S->getSCEVType() == scSMinExpr;
++    }
++  };
++
++  /// This class represents an unsigned minimum selection.
++  class SCEVUMinExpr : public SCEVCommutativeExpr {
++    friend class ScalarEvolution;
++
++    SCEVUMinExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N)
++        : SCEVCommutativeExpr(ID, scUMinExpr, O, N) {
++      // Min never overflows.
++      setNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW));
++    }
++
++  public:
++    /// Methods for support type inquiry through isa, cast, and dyn_cast:
++    static bool classof(const SCEV *S) {
++      return S->getSCEVType() == scUMinExpr;
++    }
++  };
++
+   /// This means that we are dealing with an entirely unknown SCEV
+   /// value, and only represent it as its LLVM Value.  This is the
+   /// "bottom" value for the analysis.
+@@ -466,6 +507,10 @@ class Type;
+         return ((SC*)this)->visitSMaxExpr((const SCEVSMaxExpr*)S);
+       case scUMaxExpr:
+         return ((SC*)this)->visitUMaxExpr((const SCEVUMaxExpr*)S);
++      case scSMinExpr:
++        return ((SC *)this)->visitSMinExpr((const SCEVSMinExpr *)S);
++      case scUMinExpr:
++        return ((SC *)this)->visitUMinExpr((const SCEVUMinExpr *)S);
+       case scUnknown:
+         return ((SC*)this)->visitUnknown((const SCEVUnknown*)S);
+       case scCouldNotCompute:
+@@ -519,6 +564,8 @@ class Type;
+         case scMulExpr:
+         case scSMaxExpr:
+         case scUMaxExpr:
++        case scSMinExpr:
++        case scUMinExpr:
+         case scAddRecExpr:
+           for (const auto *Op : cast<SCEVNAryExpr>(S)->operands())
+             push(Op);
+@@ -681,6 +728,26 @@ class Type;
+       return !Changed ? Expr : SE.getUMaxExpr(Operands);
+     }
+ 
++    const SCEV *visitSMinExpr(const SCEVSMinExpr *Expr) {
++      SmallVector<const SCEV *, 2> Operands;
++      bool Changed = false;
++      for (auto *Op : Expr->operands()) {
++        Operands.push_back(((SC *)this)->visit(Op));
++        Changed |= Op != Operands.back();
++      }
++      return !Changed ? Expr : SE.getSMinExpr(Operands);
++    }
++
++    const SCEV *visitUMinExpr(const SCEVUMinExpr *Expr) {
++      SmallVector<const SCEV *, 2> Operands;
++      bool Changed = false;
++      for (auto *Op : Expr->operands()) {
++        Operands.push_back(((SC *)this)->visit(Op));
++        Changed |= Op != Operands.back();
++      }
++      return !Changed ? Expr : SE.getUMinExpr(Operands);
++    }
++
+     const SCEV *visitUnknown(const SCEVUnknown *Expr) {
+       return Expr;
+     }
+diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
+index 0e715b8814f..19dab859a00 100644
+--- a/lib/Analysis/ScalarEvolution.cpp
++++ b/lib/Analysis/ScalarEvolution.cpp
+@@ -267,7 +267,9 @@ void SCEV::print(raw_ostream &OS) const {
+   case scAddExpr:
+   case scMulExpr:
+   case scUMaxExpr:
+-  case scSMaxExpr: {
++  case scSMaxExpr:
++  case scUMinExpr:
++  case scSMinExpr: {
+     const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(this);
+     const char *OpStr = nullptr;
+     switch (NAry->getSCEVType()) {
+@@ -275,6 +277,12 @@ void SCEV::print(raw_ostream &OS) const {
+     case scMulExpr: OpStr = " * "; break;
+     case scUMaxExpr: OpStr = " umax "; break;
+     case scSMaxExpr: OpStr = " smax "; break;
++    case scUMinExpr:
++      OpStr = " umin ";
++      break;
++    case scSMinExpr:
++      OpStr = " smin ";
++      break;
+     }
+     OS << "(";
+     for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end();
+@@ -343,6 +351,8 @@ Type *SCEV::getType() const {
+   case scMulExpr:
+   case scUMaxExpr:
+   case scSMaxExpr:
++  case scUMinExpr:
++  case scSMinExpr:
+     return cast<SCEVNAryExpr>(this)->getType();
+   case scAddExpr:
+     return cast<SCEVAddExpr>(this)->getType();
+@@ -711,7 +721,9 @@ static int CompareSCEVComplexity(
+   case scAddExpr:
+   case scMulExpr:
+   case scSMaxExpr:
+-  case scUMaxExpr: {
++  case scUMaxExpr:
++  case scSMinExpr:
++  case scUMinExpr: {
+     const SCEVNAryExpr *LC = cast<SCEVNAryExpr>(LHS);
+     const SCEVNAryExpr *RC = cast<SCEVNAryExpr>(RHS);
+ 
+@@ -915,6 +927,8 @@ public:
+   void visitUDivExpr(const SCEVUDivExpr *Numerator) {}
+   void visitSMaxExpr(const SCEVSMaxExpr *Numerator) {}
+   void visitUMaxExpr(const SCEVUMaxExpr *Numerator) {}
++  void visitSMinExpr(const SCEVSMinExpr *Numerator) {}
++  void visitUMinExpr(const SCEVUMinExpr *Numerator) {}
+   void visitUnknown(const SCEVUnknown *Numerator) {}
+   void visitCouldNotCompute(const SCEVCouldNotCompute *Numerator) {}
+ 
+@@ -3488,23 +3502,21 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP,
+   return getAddExpr(BaseExpr, TotalOffset, Wrap);
+ }
+ 
+-const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS,
+-                                         const SCEV *RHS) {
+-  SmallVector<const SCEV *, 2> Ops = {LHS, RHS};
+-  return getSMaxExpr(Ops);
+-}
+-
+ const SCEV *
+-ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
+-  assert(!Ops.empty() && "Cannot get empty smax!");
++ScalarEvolution::getUSMinMaxExpr(unsigned Kind,
++                                 SmallVectorImpl<const SCEV *> &Ops) {
++  assert(!Ops.empty() && "Cannot get empty (u|s)(min|max)!");
+   if (Ops.size() == 1) return Ops[0];
+ #ifndef NDEBUG
+   Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
+   for (unsigned i = 1, e = Ops.size(); i != e; ++i)
+     assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
+-           "SCEVSMaxExpr operand types don't match!");
++           "Operand types don't match!");
+ #endif
+ 
++  bool IsSigned = Kind == scSMaxExpr || Kind == scSMinExpr;
++  bool IsMax = Kind == scSMaxExpr || Kind == scUMaxExpr;
++
+   // Sort by complexity, this groups all similar expression types together.
+   GroupByComplexity(Ops, &LI, DT);
+ 
+@@ -3513,61 +3525,91 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
+   if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
+     ++Idx;
+     assert(Idx < Ops.size());
++    auto &FoldOp =
++        Kind == scSMaxExpr
++            ? APIntOps::smax
++            : Kind == scSMinExpr
++                  ? APIntOps::smin
++                  : Kind == scUMaxExpr ? APIntOps::umax : APIntOps::umin;
+     while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
+       // We found two constants, fold them together!
+       ConstantInt *Fold = ConstantInt::get(
+-          getContext(), APIntOps::smax(LHSC->getAPInt(), RHSC->getAPInt()));
++          getContext(), FoldOp(LHSC->getAPInt(), RHSC->getAPInt()));
+       Ops[0] = getConstant(Fold);
+       Ops.erase(Ops.begin()+1);  // Erase the folded element
+       if (Ops.size() == 1) return Ops[0];
+       LHSC = cast<SCEVConstant>(Ops[0]);
+     }
+ 
+-    // If we are left with a constant minimum-int, strip it off.
+-    if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(true)) {
+-      Ops.erase(Ops.begin());
+-      --Idx;
+-    } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(true)) {
+-      // If we have an smax with a constant maximum-int, it will always be
+-      // maximum-int.
+-      return Ops[0];
++    if (IsMax) {
++      // If we are left with a constant minimum-int, strip it off.
++      if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(IsSigned)) {
++        Ops.erase(Ops.begin());
++        --Idx;
++      } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(IsSigned)) {
++        // If we have an smax with a constant maximum-int, it will always be
++        // maximum-int.
++        return Ops[0];
++      }
++    } else {
++      // If we are left with a constant maximum-int, strip it off.
++      if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(IsSigned)) {
++        Ops.erase(Ops.begin());
++        --Idx;
++      } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(IsSigned)) {
++        // If we have an smax with a constant minimum-int, it will always be
++        // maximum-int.
++        return Ops[0];
++      }
+     }
+ 
+     if (Ops.size() == 1) return Ops[0];
+   }
+ 
+-  // Find the first SMax
+-  while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scSMaxExpr)
++  // Find the first operation of the same kind
++  while (Idx < Ops.size() && Ops[Idx]->getSCEVType() != Kind)
+     ++Idx;
+ 
+   // Check to see if one of the operands is an SMax. If so, expand its operands
+   // onto our operand list, and recurse to simplify.
+   if (Idx < Ops.size()) {
+-    bool DeletedSMax = false;
+-    while (const SCEVSMaxExpr *SMax = dyn_cast<SCEVSMaxExpr>(Ops[Idx])) {
++    bool DeletedAny = false;
++    while (Ops[Idx]->getSCEVType() == Kind) {
++      const SCEVCommutativeExpr *SCE = cast<SCEVCommutativeExpr>(Ops[Idx]);
+       Ops.erase(Ops.begin()+Idx);
+-      Ops.append(SMax->op_begin(), SMax->op_end());
+-      DeletedSMax = true;
++      Ops.append(SCE->op_begin(), SCE->op_end());
++      DeletedAny = true;
+     }
+ 
+-    if (DeletedSMax)
+-      return getSMaxExpr(Ops);
++    if (DeletedAny)
++      return getUSMinMaxExpr(Kind, Ops);
+   }
+ 
+   // Okay, check to see if the same value occurs in the operand list twice.  If
+   // so, delete one.  Since we sorted the list, these values are required to
+   // be adjacent.
+-  for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
+-    //  X smax Y smax Y  -->  X smax Y
+-    //  X smax Y         -->  X, if X is always greater than Y
+-    if (Ops[i] == Ops[i+1] ||
+-        isKnownPredicate(ICmpInst::ICMP_SGE, Ops[i], Ops[i+1])) {
+-      Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2);
+-      --i; --e;
+-    } else if (isKnownPredicate(ICmpInst::ICMP_SLE, Ops[i], Ops[i+1])) {
+-      Ops.erase(Ops.begin()+i, Ops.begin()+i+1);
+-      --i; --e;
++  llvm::CmpInst::Predicate GEPred =
++      IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
++  llvm::CmpInst::Predicate LEPred =
++      IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
++  llvm::CmpInst::Predicate FirstPred = IsMax ? GEPred : LEPred;
++  llvm::CmpInst::Predicate SecondPred = IsMax ? LEPred : GEPred;
++  for (unsigned i = 0, e = Ops.size() - 1; i != e; ++i) {
++    if (Ops[i] == Ops[i + 1] ||
++        isKnownViaNonRecursiveReasoning(FirstPred, Ops[i], Ops[i + 1])) {
++      //  X op Y op Y  -->  X op Y
++      //  X op Y       -->  X, if we know X, Y are ordered appropriately
++      Ops.erase(Ops.begin() + i + 1, Ops.begin() + i + 2);
++      --i;
++      --e;
++    } else if (isKnownViaNonRecursiveReasoning(SecondPred, Ops[i],
++                                               Ops[i + 1])) {
++      //  X op Y       -->  Y, if we know X, Y are ordered appropriately
++      Ops.erase(Ops.begin() + i, Ops.begin() + i + 1);
++      --i;
++      --e;
+     }
++  }
+ 
+   if (Ops.size() == 1) return Ops[0];
+ 
+@@ -3576,121 +3618,51 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
+   // Okay, it looks like we really DO need an smax expr.  Check to see if we
+   // already have one, otherwise create a new one.
+   FoldingSetNodeID ID;
+-  ID.AddInteger(scSMaxExpr);
++  ID.AddInteger(Kind);
+   for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+     ID.AddPointer(Ops[i]);
+   void *IP = nullptr;
+   if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+   const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
+   std::uninitialized_copy(Ops.begin(), Ops.end(), O);
+-  SCEV *S = new (SCEVAllocator) SCEVSMaxExpr(ID.Intern(SCEVAllocator),
+-                                             O, Ops.size());
++  SCEV *S = nullptr;
++
++  if (Kind == scSMaxExpr) {
++    S = new (SCEVAllocator)
++        SCEVSMaxExpr(ID.Intern(SCEVAllocator), O, Ops.size());
++  } else if (Kind == scUMaxExpr) {
++    S = new (SCEVAllocator)
++        SCEVUMaxExpr(ID.Intern(SCEVAllocator), O, Ops.size());
++  } else if (Kind == scSMinExpr) {
++    S = new (SCEVAllocator)
++        SCEVSMinExpr(ID.Intern(SCEVAllocator), O, Ops.size());
++  } else {
++    assert(Kind == scUMinExpr);
++    S = new (SCEVAllocator)
++        SCEVUMinExpr(ID.Intern(SCEVAllocator), O, Ops.size());
++  }
++
+   UniqueSCEVs.InsertNode(S, IP);
+   addToLoopUseLists(S);
+   return S;
+ }
+ 
+-const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS,
+-                                         const SCEV *RHS) {
++const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS, const SCEV *RHS) {
+   SmallVector<const SCEV *, 2> Ops = {LHS, RHS};
+-  return getUMaxExpr(Ops);
++  return getSMaxExpr(Ops);
+ }
+ 
+-const SCEV *
+-ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
+-  assert(!Ops.empty() && "Cannot get empty umax!");
+-  if (Ops.size() == 1) return Ops[0];
+-#ifndef NDEBUG
+-  Type *ETy = getEffectiveSCEVType(Ops[0]->getType());
+-  for (unsigned i = 1, e = Ops.size(); i != e; ++i)
+-    assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy &&
+-           "SCEVUMaxExpr operand types don't match!");
+-#endif
+-
+-  // Sort by complexity, this groups all similar expression types together.
+-  GroupByComplexity(Ops, &LI, DT);
+-
+-  // If there are any constants, fold them together.
+-  unsigned Idx = 0;
+-  if (const SCEVConstant *LHSC = dyn_cast<SCEVConstant>(Ops[0])) {
+-    ++Idx;
+-    assert(Idx < Ops.size());
+-    while (const SCEVConstant *RHSC = dyn_cast<SCEVConstant>(Ops[Idx])) {
+-      // We found two constants, fold them together!
+-      ConstantInt *Fold = ConstantInt::get(
+-          getContext(), APIntOps::umax(LHSC->getAPInt(), RHSC->getAPInt()));
+-      Ops[0] = getConstant(Fold);
+-      Ops.erase(Ops.begin()+1);  // Erase the folded element
+-      if (Ops.size() == 1) return Ops[0];
+-      LHSC = cast<SCEVConstant>(Ops[0]);
+-    }
+-
+-    // If we are left with a constant minimum-int, strip it off.
+-    if (cast<SCEVConstant>(Ops[0])->getValue()->isMinValue(false)) {
+-      Ops.erase(Ops.begin());
+-      --Idx;
+-    } else if (cast<SCEVConstant>(Ops[0])->getValue()->isMaxValue(false)) {
+-      // If we have an umax with a constant maximum-int, it will always be
+-      // maximum-int.
+-      return Ops[0];
+-    }
+-
+-    if (Ops.size() == 1) return Ops[0];
+-  }
+-
+-  // Find the first UMax
+-  while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scUMaxExpr)
+-    ++Idx;
+-
+-  // Check to see if one of the operands is a UMax. If so, expand its operands
+-  // onto our operand list, and recurse to simplify.
+-  if (Idx < Ops.size()) {
+-    bool DeletedUMax = false;
+-    while (const SCEVUMaxExpr *UMax = dyn_cast<SCEVUMaxExpr>(Ops[Idx])) {
+-      Ops.erase(Ops.begin()+Idx);
+-      Ops.append(UMax->op_begin(), UMax->op_end());
+-      DeletedUMax = true;
+-    }
+-
+-    if (DeletedUMax)
+-      return getUMaxExpr(Ops);
+-  }
+-
+-  // Okay, check to see if the same value occurs in the operand list twice.  If
+-  // so, delete one.  Since we sorted the list, these values are required to
+-  // be adjacent.
+-  for (unsigned i = 0, e = Ops.size()-1; i != e; ++i)
+-    //  X umax Y umax Y  -->  X umax Y
+-    //  X umax Y         -->  X, if X is always greater than Y
+-    if (Ops[i] == Ops[i + 1] || isKnownViaNonRecursiveReasoning(
+-                                    ICmpInst::ICMP_UGE, Ops[i], Ops[i + 1])) {
+-      Ops.erase(Ops.begin() + i + 1, Ops.begin() + i + 2);
+-      --i; --e;
+-    } else if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_ULE, Ops[i],
+-                                               Ops[i + 1])) {
+-      Ops.erase(Ops.begin() + i, Ops.begin() + i + 1);
+-      --i; --e;
+-    }
+-
+-  if (Ops.size() == 1) return Ops[0];
++const SCEV *ScalarEvolution::getSMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
++  return getUSMinMaxExpr(scSMaxExpr, Ops);
++}
+ 
+-  assert(!Ops.empty() && "Reduced umax down to nothing!");
++const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS, const SCEV *RHS) {
++  SmallVector<const SCEV *, 2> Ops = {LHS, RHS};
++  return getUMaxExpr(Ops);
++}
+ 
+-  // Okay, it looks like we really DO need a umax expr.  Check to see if we
+-  // already have one, otherwise create a new one.
+-  FoldingSetNodeID ID;
+-  ID.AddInteger(scUMaxExpr);
+-  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
+-    ID.AddPointer(Ops[i]);
+-  void *IP = nullptr;
+-  if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S;
+-  const SCEV **O = SCEVAllocator.Allocate<const SCEV *>(Ops.size());
+-  std::uninitialized_copy(Ops.begin(), Ops.end(), O);
+-  SCEV *S = new (SCEVAllocator) SCEVUMaxExpr(ID.Intern(SCEVAllocator),
+-                                             O, Ops.size());
+-  UniqueSCEVs.InsertNode(S, IP);
+-  addToLoopUseLists(S);
+-  return S;
++const SCEV *ScalarEvolution::getUMaxExpr(SmallVectorImpl<const SCEV *> &Ops) {
++  return getUSMinMaxExpr(scUMaxExpr, Ops);
+ }
+ 
+ const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS,
+@@ -3700,11 +3672,7 @@ const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS,
+ }
+ 
+ const SCEV *ScalarEvolution::getSMinExpr(SmallVectorImpl<const SCEV *> &Ops) {
+-  // ~smax(~x, ~y, ~z) == smin(x, y, z).
+-  SmallVector<const SCEV *, 2> NotOps;
+-  for (auto *S : Ops)
+-    NotOps.push_back(getNotSCEV(S));
+-  return getNotSCEV(getSMaxExpr(NotOps));
++  return getUSMinMaxExpr(scSMinExpr, Ops);
+ }
+ 
+ const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
+@@ -3714,16 +3682,7 @@ const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS,
+ }
+ 
+ const SCEV *ScalarEvolution::getUMinExpr(SmallVectorImpl<const SCEV *> &Ops) {
+-  assert(!Ops.empty() && "At least one operand must be!");
+-  // Trivial case.
+-  if (Ops.size() == 1)
+-    return Ops[0];
+-
+-  // ~umax(~x, ~y, ~z) == umin(x, y, z).
+-  SmallVector<const SCEV *, 2> NotOps;
+-  for (auto *S : Ops)
+-    NotOps.push_back(getNotSCEV(S));
+-  return getNotSCEV(getUMaxExpr(NotOps));
++  return getUSMinMaxExpr(scUMinExpr, Ops);
+ }
+ 
+ const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) {
+@@ -5191,6 +5150,8 @@ static bool IsAvailableOnEntry(const Loop *L, DominatorTree &DT, const SCEV *S,
+       switch (S->getSCEVType()) {
+       case scConstant: case scTruncate: case scZeroExtend: case scSignExtend:
+       case scAddExpr: case scMulExpr: case scUMaxExpr: case scSMaxExpr:
++      case scUMinExpr:
++      case scSMinExpr:
+         // These expressions are available if their operand(s) is/are.
+         return true;
+ 
+@@ -8070,7 +8031,9 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) {
+     }
+     case scSMaxExpr:
+     case scUMaxExpr:
+-      break; // TODO: smax, umax.
++    case scSMinExpr:
++    case scUMinExpr:
++      break; // TODO: smax, umax, smin, umax.
+   }
+   return nullptr;
+ }
+@@ -8200,6 +8163,10 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
+           return getSMaxExpr(NewOps);
+         if (isa<SCEVUMaxExpr>(Comm))
+           return getUMaxExpr(NewOps);
++        if (isa<SCEVSMinExpr>(Comm))
++          return getSMinExpr(NewOps);
++        if (isa<SCEVUMinExpr>(Comm))
++          return getUMinExpr(NewOps);
+         llvm_unreachable("Unknown commutative SCEV type!");
+       }
+     }
+@@ -9859,26 +9826,28 @@ static const SCEV *MatchNotExpr(const SCEV *Expr) {
+   return AddRHS->getOperand(1);
+ }
+ 
+-/// Is MaybeMaxExpr an SMax or UMax of Candidate and some other values?
+-template<typename MaxExprType>
+-static bool IsMaxConsistingOf(const SCEV *MaybeMaxExpr,
+-                              const SCEV *Candidate) {
+-  const MaxExprType *MaxExpr = dyn_cast<MaxExprType>(MaybeMaxExpr);
+-  if (!MaxExpr) return false;
++/// Is MaybeMinMaxExpr an (U|S)(Min|Max) of Candidate and some other values?
++template <typename MinMaxExprType>
++static bool IsMinMaxConsistingOf(const SCEV *MaybeMinMaxExpr,
++                                 const SCEV *Candidate) {
++  const MinMaxExprType *MinMaxExpr = dyn_cast<MinMaxExprType>(MaybeMinMaxExpr);
++  if (!MinMaxExpr)
++    return false;
+ 
+-  return find(MaxExpr->operands(), Candidate) != MaxExpr->op_end();
++  return find(MinMaxExpr->operands(), Candidate) != MinMaxExpr->op_end();
+ }
+ 
+-/// Is MaybeMinExpr an SMin or UMin of Candidate and some other values?
+-template<typename MaxExprType>
+-static bool IsMinConsistingOf(ScalarEvolution &SE,
+-                              const SCEV *MaybeMinExpr,
+-                              const SCEV *Candidate) {
+-  const SCEV *MaybeMaxExpr = MatchNotExpr(MaybeMinExpr);
+-  if (!MaybeMaxExpr)
++/// Is MaybeMinMaxExpr an ~(U|S)(Min|Max) of ~Candidate and some other values?
++template <typename MinMaxExprType>
++static bool IsMinMaxConsistingOfByNegation(ScalarEvolution &SE,
++                                           const SCEV *MaybeMinMaxExpr,
++                                           const SCEV *Candidate) {
++  const SCEV *MinMaxExpr = MatchNotExpr(MaybeMinMaxExpr);
++  if (!MinMaxExpr)
+     return false;
+ 
+-  return IsMaxConsistingOf<MaxExprType>(MaybeMaxExpr, SE.getNotSCEV(Candidate));
++  return IsMinMaxConsistingOf<MinMaxExprType>(MinMaxExpr,
++                                              SE.getNotSCEV(Candidate));
+ }
+ 
+ static bool IsKnownPredicateViaAddRecStart(ScalarEvolution &SE,
+@@ -9927,20 +9896,24 @@ static bool IsKnownPredicateViaMinOrMax(ScalarEvolution &SE,
+     LLVM_FALLTHROUGH;
+   case ICmpInst::ICMP_SLE:
+     return
+-      // min(A, ...) <= A
+-      IsMinConsistingOf<SCEVSMaxExpr>(SE, LHS, RHS) ||
+-      // A <= max(A, ...)
+-      IsMaxConsistingOf<SCEVSMaxExpr>(RHS, LHS);
++        // min(A, ...) <= A
++        IsMinMaxConsistingOfByNegation<SCEVSMaxExpr>(SE, LHS, RHS) ||
++        IsMinMaxConsistingOf<SCEVSMinExpr>(LHS, RHS) ||
++        // A <= max(A, ...)
++        IsMinMaxConsistingOfByNegation<SCEVSMinExpr>(SE, RHS, LHS) ||
++        IsMinMaxConsistingOf<SCEVSMaxExpr>(RHS, LHS);
+ 
+   case ICmpInst::ICMP_UGE:
+     std::swap(LHS, RHS);
+     LLVM_FALLTHROUGH;
+   case ICmpInst::ICMP_ULE:
+     return
+-      // min(A, ...) <= A
+-      IsMinConsistingOf<SCEVUMaxExpr>(SE, LHS, RHS) ||
+-      // A <= max(A, ...)
+-      IsMaxConsistingOf<SCEVUMaxExpr>(RHS, LHS);
++        // min(A, ...) <= A
++        IsMinMaxConsistingOfByNegation<SCEVUMaxExpr>(SE, LHS, RHS) ||
++        IsMinMaxConsistingOf<SCEVUMinExpr>(LHS, RHS) ||
++        // A <= max(A, ...)
++        IsMinMaxConsistingOfByNegation<SCEVUMinExpr>(SE, RHS, LHS) ||
++        IsMinMaxConsistingOf<SCEVUMaxExpr>(RHS, LHS);
+   }
+ 
+   llvm_unreachable("covered switch fell through?!");
+@@ -11451,7 +11424,9 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) {
+   case scAddExpr:
+   case scMulExpr:
+   case scUMaxExpr:
+-  case scSMaxExpr: {
++  case scSMaxExpr:
++  case scUMinExpr:
++  case scSMinExpr: {
+     bool HasVarying = false;
+     for (auto *Op : cast<SCEVNAryExpr>(S)->operands()) {
+       LoopDisposition D = getLoopDisposition(Op, L);
+@@ -11538,7 +11513,9 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) {
+   case scAddExpr:
+   case scMulExpr:
+   case scUMaxExpr:
+-  case scSMaxExpr: {
++  case scSMaxExpr:
++  case scUMinExpr:
++  case scSMinExpr: {
+     const SCEVNAryExpr *NAry = cast<SCEVNAryExpr>(S);
+     bool Proper = true;
+     for (const SCEV *NAryOp : NAry->operands()) {
+diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
+index 8f89389c4b5..07b8156f61b 100644
+--- a/lib/Analysis/ScalarEvolutionExpander.cpp
++++ b/lib/Analysis/ScalarEvolutionExpander.cpp
+@@ -1634,14 +1634,15 @@ Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
+   for (int i = S->getNumOperands()-2; i >= 0; --i) {
+     // In the case of mixed integer and pointer types, do the
+     // rest of the comparisons as integer.
+-    if (S->getOperand(i)->getType() != Ty) {
++    Type *OpTy = S->getOperand(i)->getType();
++    if (OpTy->isIntegerTy() != Ty->isIntegerTy()) {
+       Ty = SE.getEffectiveSCEVType(Ty);
+       LHS = InsertNoopCastOfTo(LHS, Ty);
+     }
+     Value *RHS = expandCodeFor(S->getOperand(i), Ty);
+     Value *ICmp = Builder.CreateICmpSGT(LHS, RHS);
+     rememberInstruction(ICmp);
+-    Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax");
++    Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smin");
+     rememberInstruction(Sel);
+     LHS = Sel;
+   }
+@@ -1658,13 +1659,64 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
+   for (int i = S->getNumOperands()-2; i >= 0; --i) {
+     // In the case of mixed integer and pointer types, do the
+     // rest of the comparisons as integer.
+-    if (S->getOperand(i)->getType() != Ty) {
++    Type *OpTy = S->getOperand(i)->getType();
++    if (OpTy->isIntegerTy() != Ty->isIntegerTy()) {
+       Ty = SE.getEffectiveSCEVType(Ty);
+       LHS = InsertNoopCastOfTo(LHS, Ty);
+     }
+     Value *RHS = expandCodeFor(S->getOperand(i), Ty);
+     Value *ICmp = Builder.CreateICmpUGT(LHS, RHS);
+     rememberInstruction(ICmp);
++    Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umin");
++    rememberInstruction(Sel);
++    LHS = Sel;
++  }
++  // In the case of mixed integer and pointer types, cast the
++  // final result back to the pointer type.
++  if (LHS->getType() != S->getType())
++    LHS = InsertNoopCastOfTo(LHS, S->getType());
++  return LHS;
++}
++
++Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) {
++  Value *LHS = expand(S->getOperand(S->getNumOperands() - 1));
++  Type *Ty = LHS->getType();
++  for (int i = S->getNumOperands() - 2; i >= 0; --i) {
++    // In the case of mixed integer and pointer types, do the
++    // rest of the comparisons as integer.
++    Type *OpTy = S->getOperand(i)->getType();
++    if (OpTy->isIntegerTy() != Ty->isIntegerTy()) {
++      Ty = SE.getEffectiveSCEVType(Ty);
++      LHS = InsertNoopCastOfTo(LHS, Ty);
++    }
++    Value *RHS = expandCodeFor(S->getOperand(i), Ty);
++    Value *ICmp = Builder.CreateICmpSLT(LHS, RHS);
++    rememberInstruction(ICmp);
++    Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax");
++    rememberInstruction(Sel);
++    LHS = Sel;
++  }
++  // In the case of mixed integer and pointer types, cast the
++  // final result back to the pointer type.
++  if (LHS->getType() != S->getType())
++    LHS = InsertNoopCastOfTo(LHS, S->getType());
++  return LHS;
++}
++
++Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) {
++  Value *LHS = expand(S->getOperand(S->getNumOperands() - 1));
++  Type *Ty = LHS->getType();
++  for (int i = S->getNumOperands() - 2; i >= 0; --i) {
++    // In the case of mixed integer and pointer types, do the
++    // rest of the comparisons as integer.
++    Type *OpTy = S->getOperand(i)->getType();
++    if (OpTy->isIntegerTy() != Ty->isIntegerTy()) {
++      Ty = SE.getEffectiveSCEVType(Ty);
++      LHS = InsertNoopCastOfTo(LHS, Ty);
++    }
++    Value *RHS = expandCodeFor(S->getOperand(i), Ty);
++    Value *ICmp = Builder.CreateICmpULT(LHS, RHS);
++    rememberInstruction(ICmp);
+     Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax");
+     rememberInstruction(Sel);
+     LHS = Sel;
+diff --git a/test/Analysis/LoopAccessAnalysis/memcheck-ni.ll b/test/Analysis/LoopAccessAnalysis/memcheck-ni.ll
+new file mode 100644
+index 00000000000..a08632f38d1
+--- /dev/null
++++ b/test/Analysis/LoopAccessAnalysis/memcheck-ni.ll
+@@ -0,0 +1,50 @@
++; RUN: opt -loop-versioning -S < %s | FileCheck %s
++
++; NB: addrspaces 10-13 are non-integral
++target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
++
++%jl_value_t = type opaque
++%jl_array_t = type { i8 addrspace(13)*, i64, i16, i16, i32 }
++
++define void @"japi1_permutedims!_33509"(%jl_value_t addrspace(10)**) {
++; CHECK: [[CMP:%[^ ]*]] = icmp ult double addrspace(13)* [[A:%[^ ]*]], [[B:%[^ ]*]]
++; CHECK: [[SELECT:%[^ ]*]] = select i1 %18, double addrspace(13)* [[A]], double addrspace(13)* [[B]]
++top:
++  %1 = alloca [3 x i64], align 8 
++  %2 = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %0, align 8
++  %3 = getelementptr inbounds %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %0, i64 1
++  %4 = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %3, align 8
++  %5 = getelementptr inbounds [3 x i64], [3 x i64]* %1, i64 0, i64 0
++  store i64 1, i64* %5, align 8
++  %6 = getelementptr inbounds [3 x i64], [3 x i64]* %1, i64 0, i64 1
++  %7 = load i64, i64* inttoptr (i64 24 to i64*), align 8
++  %8 = addrspacecast %jl_value_t addrspace(10)* %4 to %jl_value_t addrspace(11)*
++  %9 = bitcast %jl_value_t addrspace(11)* %8 to double addrspace(13)* addrspace(11)*
++  %10 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %9, align 8
++  %11 = addrspacecast %jl_value_t addrspace(10)* %2 to %jl_value_t addrspace(11)*
++  %12 = bitcast %jl_value_t addrspace(11)* %11 to double addrspace(13)* addrspace(11)*
++  %13 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %12, align 8
++  %14 = load i64, i64* %6, align 8
++  br label %L74
++
++L74:
++  %value_phi20 = phi i64 [ 1, %top ], [ %22, %L74 ]
++  %value_phi21 = phi i64 [ 1, %top ], [ %23, %L74 ]
++  %value_phi22 = phi i64 [ 1, %top ], [ %25, %L74 ]
++  %15 = add i64 %value_phi21, -1
++  %16 = getelementptr inbounds double, double addrspace(13)* %10, i64 %15
++  %17 = bitcast double addrspace(13)* %16 to i64 addrspace(13)*
++  %18 = load i64, i64 addrspace(13)* %17, align 8
++  %19 = add i64 %value_phi20, -1
++  %20 = getelementptr inbounds double, double addrspace(13)* %13, i64 %19
++  %21 = bitcast double addrspace(13)* %20 to i64 addrspace(13)*
++  store i64 %18, i64 addrspace(13)* %21, align 8
++  %22 = add i64 %value_phi20, 1
++  %23 = add i64 %14, %value_phi21
++  %24 = icmp eq i64 %value_phi22, %7
++  %25 = add i64 %value_phi22, 1
++  br i1 %24, label %L94, label %L74
++
++L94:
++  ret void 
++}
+diff --git a/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll b/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll
+index 405a47554e4..4285ef0f117 100644
+--- a/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll
++++ b/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll
+@@ -58,7 +58,7 @@ for.end:                                          ; preds = %for.body
+ 
+ ; Here it is not obvious what the limits are, since 'step' could be negative.
+ 
+-; CHECK: Low: (-1 + (-1 * ((-60001 + (-1 * %a)) umax (-60001 + (40000 * %step) + (-1 * %a)))))
++; CHECK: Low: ((60000 + %a)<nsw> umin (60000 + (-40000 * %step) + %a)) 
+ ; CHECK: High: (4 + ((60000 + %a)<nsw> umax (60000 + (-40000 * %step) + %a)))
+ 
+ define void @g(i64 %step) {
+diff --git a/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll b/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll
+index 3542ad2a41e..53e024a68fb 100644
+--- a/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll
++++ b/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll
+@@ -22,5 +22,5 @@ afterfor:		; preds = %forinc, %entry
+ 	ret i32 %j.0.lcssa
+ }
+ 
+-; CHECK: backedge-taken count is (-2147483632 + ((-1 + (-1 * %{{[xy]}})) smax (-1 + (-1 * %{{[xy]}}))))
++; CHECK: backedge-taken count is (-2147483633 + (-1 * (%x smin %y)))
+ 
+diff --git a/test/Analysis/ScalarEvolution/min-max-exprs.ll b/test/Analysis/ScalarEvolution/min-max-exprs.ll
+index e8c1e33e095..51f72c643cc 100644
+--- a/test/Analysis/ScalarEvolution/min-max-exprs.ll
++++ b/test/Analysis/ScalarEvolution/min-max-exprs.ll
+@@ -33,7 +33,7 @@ bb2:                                              ; preds = %bb1
+   %tmp9 = select i1 %tmp4, i64 %tmp5, i64 %tmp6
+ ;                  min(N, i+3)
+ ; CHECK:           select i1 %tmp4, i64 %tmp5, i64 %tmp6
+-; CHECK-NEXT:  --> (-1 + (-1 * ((-1 + (-1 * (sext i32 {3,+,1}<nuw><%bb1> to i64))<nsw>)<nsw> smax (-1 + (-1 * (sext i32 %N to i64))<nsw>)<nsw>))<nsw>)<nsw>
++; CHECK-NEXT:  --> ((sext i32 {3,+,1}<nuw><%bb1> to i64) smin (sext i32 %N to i64))
+   %tmp11 = getelementptr inbounds i32, i32* %A, i64 %tmp9
+   %tmp12 = load i32, i32* %tmp11, align 4
+   %tmp13 = shl nsw i32 %tmp12, 1
+diff --git a/test/Analysis/ScalarEvolution/pr28705.ll b/test/Analysis/ScalarEvolution/pr28705.ll
+index 8fbc08e3ca6..7d797a15bd5 100644
+--- a/test/Analysis/ScalarEvolution/pr28705.ll
++++ b/test/Analysis/ScalarEvolution/pr28705.ll
+@@ -5,7 +5,7 @@
+ ; with "%.sroa.speculated + 1".
+ ;
+ ; CHECK-LABEL: @foo(
+-; CHECK: %[[EXIT:.+]] = sub i32 %.sroa.speculated, -1
++; CHECK: %[[EXIT:.+]] = add i32 %.sroa.speculated, 1
+ ; CHECK: %DB.sroa.9.0.lcssa = phi i32 [ 1, %entry ], [ %[[EXIT]], %loopexit ]
+ ;
+ define void @foo(i32 %sub.ptr.div.i, i8* %ref.i1174) local_unnamed_addr {
+diff --git a/test/Analysis/ScalarEvolution/predicated-trip-count.ll b/test/Analysis/ScalarEvolution/predicated-trip-count.ll
+index a0afcf457d2..b07662ed95f 100644
+--- a/test/Analysis/ScalarEvolution/predicated-trip-count.ll
++++ b/test/Analysis/ScalarEvolution/predicated-trip-count.ll
+@@ -80,7 +80,7 @@ return:         ; preds = %bb5
+ ; CHECK-NEXT:    -->  (sext i16 {%Start,+,-1}<%bb3> to i32)
+ ; CHECK:       Loop %bb3: Unpredictable backedge-taken count.
+ ; CHECK-NEXT:  Loop %bb3: Unpredictable max backedge-taken count.
+-; CHECK-NEXT:  Loop %bb3: Predicated backedge-taken count is (2 + (sext i16 %Start to i32) + ((-2 + (-1 * (sext i16 %Start to i32))<nsw>) smax (-1 + (-1 * %M))))
++; CHECK-NEXT:  Loop %bb3: Predicated backedge-taken count is (1 + (sext i16 %Start to i32) + (-1 * ((1 + (sext i16 %Start to i32))<nsw> smin %M)))
+ ; CHECK-NEXT:  Predicates:
+ ; CHECK-NEXT:    {%Start,+,-1}<%bb3> Added Flags: <nssw>
+ 
+diff --git a/test/Analysis/ScalarEvolution/trip-count14.ll b/test/Analysis/ScalarEvolution/trip-count14.ll
+index 5e6cfe85101..15080613881 100644
+--- a/test/Analysis/ScalarEvolution/trip-count14.ll
++++ b/test/Analysis/ScalarEvolution/trip-count14.ll
+@@ -81,7 +81,7 @@ if.end:
+   br i1 %cmp1, label %do.body, label %do.end ; taken either 0 or 2 times
+ 
+ ; CHECK-LABEL: Determining loop execution counts for: @s32_max2_unpredictable_exit
+-; CHECK-NEXT: Loop %do.body: <multiple exits> backedge-taken count is (-1 + (-1 * ((-1 + (-1 * ((2 + %n) smax %n)) + %n) umax (-1 + (-1 * %x) + %n))))
++; CHECK-NEXT: Loop %do.body: <multiple exits> backedge-taken count is (((-1 * %n) + ((2 + %n) smax %n)) umin ((-1 * %n) + %x))
+ ; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2{{$}}
+ 
+ do.end:
+@@ -169,7 +169,7 @@ if.end:
+   br i1 %cmp1, label %do.body, label %do.end ; taken either 0 or 2 times
+ 
+ ; CHECK-LABEL: Determining loop execution counts for: @u32_max2_unpredictable_exit
+-; CHECK-NEXT: Loop %do.body: <multiple exits> backedge-taken count is (-1 + (-1 * ((-1 + (-1 * ((2 + %n) umax %n)) + %n) umax (-1 + (-1 * %x) + %n))))
++; CHECK-NEXT: Loop %do.body: <multiple exits> backedge-taken count is (((-1 * %n) + ((2 + %n) umax %n)) umin ((-1 * %n) + %x))
+ ; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2{{$}}
+ 
+ do.end:
+diff --git a/test/Analysis/ScalarEvolution/trip-count3.ll b/test/Analysis/ScalarEvolution/trip-count3.ll
+index df6637a4ced..e10012c0c32 100644
+--- a/test/Analysis/ScalarEvolution/trip-count3.ll
++++ b/test/Analysis/ScalarEvolution/trip-count3.ll
+@@ -4,7 +4,7 @@
+ ; dividing by the stride will have a remainder. This could theoretically
+ ; be teaching it how to use a more elaborate trip count computation.
+ 
+-; CHECK: Loop %bb3.i: backedge-taken count is ((64 + (-64 smax (-1 + (-1 * %0))) + %0) /u 64)
++; CHECK: Loop %bb3.i: backedge-taken count is ((63 + (-1 * (63 smin %0)) + %0) /u 64)
+ ; CHECK: Loop %bb3.i: max backedge-taken count is 33554431
+ 
+ %struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] }
+diff --git a/test/Transforms/IRCE/conjunctive-checks.ll b/test/Transforms/IRCE/conjunctive-checks.ll
+index 4e3cf354125..8711c1b00e8 100644
+--- a/test/Transforms/IRCE/conjunctive-checks.ll
++++ b/test/Transforms/IRCE/conjunctive-checks.ll
+@@ -5,17 +5,15 @@ define void @f_0(i32 *%arr, i32 *%a_len_ptr, i32 %n, i1* %cond_buf) {
+ ; CHECK-LABEL: @f_0(
+ 
+ ; CHECK: loop.preheader:
+-; CHECK: [[not_safe_range_end:[^ ]+]] = sub i32 3, %len
+-; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n
+-; CHECK: [[not_exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_safe_range_end]], [[not_n]]
+-; CHECK: [[not_exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_main_loop_at_hiclamp_cmp]], i32 [[not_safe_range_end]], i32 [[not_n]]
+-; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = sub i32 -1, [[not_exit_main_loop_at_hiclamp]]
++; CHECK: [[len_sub:[^ ]+]] = add i32 %len, -4
++; CHECK: [[exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp slt i32 %n, [[len_sub]]
++; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[exit_main_loop_at_hiclamp_cmp]], i32 %n, i32 [[len_sub]]
+ ; CHECK: [[exit_main_loop_at_loclamp_cmp:[^ ]+]] = icmp sgt i32 [[exit_main_loop_at_hiclamp]], 0
+ ; CHECK: [[exit_main_loop_at_loclamp:[^ ]+]] = select i1 [[exit_main_loop_at_loclamp_cmp]], i32 [[exit_main_loop_at_hiclamp]], i32 0
+ ; CHECK: [[enter_main_loop:[^ ]+]] = icmp slt i32 0, [[exit_main_loop_at_loclamp]]
+-; CHECK: br i1 [[enter_main_loop]], label %loop.preheader2, label %main.pseudo.exit
++; CHECK: br i1 [[enter_main_loop]], label %[[loop_preheader2:[^ ,]+]], label %main.pseudo.exit
+ 
+-; CHECK: loop.preheader2:
++; CHECK: [[loop_preheader2]]:
+ ; CHECK: br label %loop
+ 
+  entry:
+@@ -35,9 +33,9 @@ define void @f_0(i32 *%arr, i32 *%a_len_ptr, i32 %n, i1* %cond_buf) {
+ ; CHECK: loop:
+ ; CHECK:  %cond = load volatile i1, i1* %cond_buf
+ ; CHECK:  %abc = and i1 %cond, true
+-; CHECK:  br i1 %abc, label %in.bounds, label %out.of.bounds.loopexit3, !prof !1
++; CHECK:  br i1 %abc, label %in.bounds, label %[[loop_exit:[^ ,]+]], !prof !1
+ 
+-; CHECK: out.of.bounds.loopexit:
++; CHECK: [[loop_exit]]:
+ ; CHECK:  br label %out.of.bounds
+ 
+  in.bounds:
+@@ -58,14 +56,10 @@ define void @f_1(
+ ; CHECK-LABEL: @f_1(
+ 
+ ; CHECK: loop.preheader:
+-; CHECK: [[not_len_b:[^ ]+]] = sub i32 -1, %len.b
+-; CHECK: [[not_len_a:[^ ]+]] = sub i32 -1, %len.a
+-; CHECK: [[smax_not_len_cond:[^ ]+]] = icmp sgt i32 [[not_len_b]], [[not_len_a]]
+-; CHECK: [[smax_not_len:[^ ]+]] = select i1 [[smax_not_len_cond]], i32 [[not_len_b]], i32 [[not_len_a]]
+-; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n
+-; CHECK: [[not_upper_limit_cond_loclamp:[^ ]+]] = icmp sgt i32 [[smax_not_len]], [[not_n]]
+-; CHECK: [[not_upper_limit_loclamp:[^ ]+]] = select i1 [[not_upper_limit_cond_loclamp]], i32 [[smax_not_len]], i32 [[not_n]]
+-; CHECK: [[upper_limit_loclamp:[^ ]+]] = sub i32 -1, [[not_upper_limit_loclamp]]
++; CHECK: [[smax_len_cond:[^ ]+]] = icmp slt i32 %len.b, %len.a
++; CHECK: [[smax_len:[^ ]+]] = select i1 [[smax_len_cond]], i32 %len.b, i32 %len.a
++; CHECK: [[upper_limit_cond_loclamp:[^ ]+]] = icmp slt i32 [[smax_len]], %n 
++; CHECK: [[upper_limit_loclamp:[^ ]+]] = select i1 [[upper_limit_cond_loclamp]], i32 [[smax_len]], i32 %n
+ ; CHECK: [[upper_limit_cmp:[^ ]+]] = icmp sgt i32 [[upper_limit_loclamp]], 0
+ ; CHECK: [[upper_limit:[^ ]+]] = select i1 [[upper_limit_cmp]], i32 [[upper_limit_loclamp]], i32 0
+ 
+@@ -85,9 +79,9 @@ define void @f_1(
+ 
+ ; CHECK: loop:
+ ; CHECK:   %abc = and i1 true, true
+-; CHECK:   br i1 %abc, label %in.bounds, label %out.of.bounds.loopexit4, !prof !1
++; CHECK:   br i1 %abc, label %in.bounds, label %[[oob_loopexit:[^ ,]+]], !prof !1
+ 
+-; CHECK: out.of.bounds.loopexit:
++; CHECK: [[oob_loopexit]]:
+ ; CHECK-NEXT:  br label %out.of.bounds
+ 
+ 
+diff --git a/test/Transforms/IRCE/decrementing-loop.ll b/test/Transforms/IRCE/decrementing-loop.ll
+index 4c82cd3e341..2994a432a71 100644
+--- a/test/Transforms/IRCE/decrementing-loop.ll
++++ b/test/Transforms/IRCE/decrementing-loop.ll
+@@ -29,11 +29,8 @@ define void @decrementing_loop(i32 *%arr, i32 *%a_len_ptr, i32 %n) {
+   ret void
+ 
+ ; CHECK: loop.preheader:
+-; CHECK:   [[not_len:[^ ]+]] = sub i32 -1, %len
+-; CHECK:   [[not_n:[^ ]+]] = sub i32 -1, %n
+-; CHECK:   [[not_len_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_len]], [[not_n]]
+-; CHECK:   [[not_len_hiclamp:[^ ]+]] = select i1 [[not_len_hiclamp_cmp]], i32 [[not_len]], i32 [[not_n]]
+-; CHECK:   [[len_hiclamp:[^ ]+]] = sub i32 -1, [[not_len_hiclamp]]
++; CHECK:   [[len_hiclamp_cmp:[^ ]+]] = icmp slt i32 %len, %n
++; CHECK:   [[len_hiclamp:[^ ]+]] = select i1 [[len_hiclamp_cmp]], i32 %len, i32 %n
+ ; CHECK:   [[not_exit_preloop_at_cmp:[^ ]+]] = icmp sgt i32 [[len_hiclamp]], 0
+ ; CHECK:   [[not_exit_preloop_at:[^ ]+]] = select i1 [[not_exit_preloop_at_cmp]], i32 [[len_hiclamp]], i32 0
+ ; CHECK:   %exit.preloop.at = add i32 [[not_exit_preloop_at]], -1
+diff --git a/test/Transforms/IRCE/multiple-access-no-preloop.ll b/test/Transforms/IRCE/multiple-access-no-preloop.ll
+index 000d1ab36f2..3bde9bd8668 100644
+--- a/test/Transforms/IRCE/multiple-access-no-preloop.ll
++++ b/test/Transforms/IRCE/multiple-access-no-preloop.ll
+@@ -38,14 +38,10 @@ define void @multiple_access_no_preloop(
+ ; CHECK-LABEL: @multiple_access_no_preloop(
+ 
+ ; CHECK: loop.preheader:
+-; CHECK: [[not_len_b:[^ ]+]] = sub i32 -1, %len.b
+-; CHECK: [[not_len_a:[^ ]+]] = sub i32 -1, %len.a
+-; CHECK: [[smax_not_len_cond:[^ ]+]] = icmp sgt i32 [[not_len_b]], [[not_len_a]]
+-; CHECK: [[smax_not_len:[^ ]+]] = select i1 [[smax_not_len_cond]], i32 [[not_len_b]], i32 [[not_len_a]]
+-; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n
+-; CHECK: [[not_upper_limit_cond_loclamp:[^ ]+]] = icmp sgt i32 [[smax_not_len]], [[not_n]]
+-; CHECK: [[not_upper_limit_loclamp:[^ ]+]] = select i1 [[not_upper_limit_cond_loclamp]], i32 [[smax_not_len]], i32 [[not_n]]
+-; CHECK: [[upper_limit_loclamp:[^ ]+]] = sub i32 -1, [[not_upper_limit_loclamp]]
++; CHECK: [[smax_len_cond:[^ ]+]] = icmp slt i32 %len.b, %len.a
++; CHECK: [[smax_len:[^ ]+]] = select i1 [[smax_len_cond]], i32 %len.b, i32 %len.a
++; CHECK: [[upper_limit_cond_loclamp:[^ ]+]] = icmp slt i32 [[smax_len]], %n
++; CHECK: [[upper_limit_loclamp:[^ ]+]] = select i1 [[upper_limit_cond_loclamp]], i32 [[smax_len]], i32 %n
+ ; CHECK: [[upper_limit_cmp:[^ ]+]] = icmp sgt i32 [[upper_limit_loclamp]], 0
+ ; CHECK: [[upper_limit:[^ ]+]] = select i1 [[upper_limit_cmp]], i32 [[upper_limit_loclamp]], i32 0
+ 
+diff --git a/test/Transforms/IRCE/ranges_of_different_types.ll b/test/Transforms/IRCE/ranges_of_different_types.ll
+index 5c8161369f2..46bd94ce687 100644
+--- a/test/Transforms/IRCE/ranges_of_different_types.ll
++++ b/test/Transforms/IRCE/ranges_of_different_types.ll
+@@ -23,12 +23,11 @@ define void @test_01(i32* %arr, i32* %a_len_ptr) #0 {
+ ; CHECK-NOT:     preloop
+ ; CHECK:         entry:
+ ; CHECK-NEXT:      %len = load i32, i32* %a_len_ptr, !range !0
+-; CHECK-NEXT:      [[SUB1:%[^ ]+]] = sub i32 12, %len
+-; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB1]], -102
+-; CHECK-NEXT:      [[SMAX:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 -102
+-; CHECK-NEXT:      [[SUB2:%[^ ]+]] = sub i32 -1, [[SMAX]]
+-; CHECK-NEXT:      [[CMP2:%[^ ]+]] = icmp sgt i32 [[SUB2]], 0
+-; CHECK-NEXT:      %exit.mainloop.at = select i1 [[CMP2]], i32 [[SUB2]], i32 0
++; CHECK-NEXT:      [[SUB1:%[^ ]+]] = add i32 %len, -13
++; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp slt i32 [[SUB1]], 101
++; CHECK-NEXT:      [[SMAX:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 101
++; CHECK-NEXT:      [[CMP2:%[^ ]+]] = icmp sgt i32 [[SMAX]], 0
++; CHECK-NEXT:      %exit.mainloop.at = select i1 [[CMP2]], i32 [[SMAX]], i32 0
+ ; CHECK-NEXT:      [[GOTO_LOOP:%[^ ]+]] = icmp slt i32 0, %exit.mainloop.at
+ ; CHECK-NEXT:      br i1 [[GOTO_LOOP]], label %loop.preheader, label %main.pseudo.exit
+ ; CHECK:         loop
+@@ -83,13 +82,11 @@ define void @test_02(i32* %arr, i32* %a_len_ptr) #0 {
+ ; CHECK-NEXT:      [[LEN_MINUS_SMAX:%[^ ]+]] = add i32 %len, -2147483647
+ ; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp sgt i32 [[LEN_MINUS_SMAX]], -13
+ ; CHECK-NEXT:      [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[LEN_MINUS_SMAX]], i32 -13
+-; CHECK-NEXT:      [[ADD1:%[^ ]+]] = add i32 [[SMAX1]], -1
+-; CHECK-NEXT:      [[SUB1:%[^ ]+]] = sub i32 [[ADD1]], %len
+-; CHECK-NEXT:      [[CMP2:%[^ ]+]] = icmp sgt i32 [[SUB1]], -102
+-; CHECK-NEXT:      [[SMAX2:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB1]], i32 -102
+-; CHECK-NEXT:      [[SUB2:%[^ ]+]] = sub i32 -1, [[SMAX2]]
+-; CHECK-NEXT:      [[CMP3:%[^ ]+]] = icmp sgt i32 [[SUB2]], 0
+-; CHECK-NEXT:      %exit.mainloop.at = select i1 [[CMP3]], i32 [[SUB2]], i32 0
++; CHECK-NEXT:      [[SUB1:%[^ ]+]] = sub i32 %len, [[SMAX1]]
++; CHECK-NEXT:      [[CMP2:%[^ ]+]] = icmp slt i32 [[SUB1]], 101
++; CHECK-NEXT:      [[SMAX2:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB1]], i32 101
++; CHECK-NEXT:      [[CMP3:%[^ ]+]] = icmp sgt i32 [[SMAX2]], 0
++; CHECK-NEXT:      %exit.mainloop.at = select i1 [[CMP3]], i32 [[SMAX2]], i32 0
+ ; CHECK-NEXT:      br i1 true, label %loop.preloop.preheader
+ ; CHECK:         loop.preloop:
+ ; CHECK-NEXT:      %idx.preloop = phi i32 [ %idx.next.preloop, %in.bounds.preloop ], [ 0, %loop.preloop.preheader ]
+@@ -151,14 +148,11 @@ define void @test_03(i32* %arr, i32* %a_len_ptr) #0 {
+ ; CHECK-NOT:     preloop
+ ; CHECK:         entry:
+ ; CHECK-NEXT:      %len = load i32, i32* %a_len_ptr, !range !0
+-; CHECK-NEXT:      [[SUB1:%[^ ]+]] = sub i32 -2, %len
+-; CHECK-NEXT:      [[SUB2:%[^ ]+]] = sub i32 -1, %len
+-; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB2]], -14
+-; CHECK-NEXT:      [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB2]], i32 -14
+-; CHECK-NEXT:      [[SUB3:%[^ ]+]] = sub i32 [[SUB1]], [[SMAX1]]
+-; CHECK-NEXT:      [[CMP2:%[^ ]+]] = icmp ugt i32 [[SUB3]], -102
+-; CHECK-NEXT:      [[UMAX1:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB3]], i32 -102
+-; CHECK-NEXT:      %exit.mainloop.at = sub i32 -1, [[UMAX1]]
++; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp slt i32 %len, 13
++; CHECK-NEXT:      [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 %len, i32 13
++; CHECK-NEXT:      [[SUB3:%[^ ]+]] = sub i32 %len, [[SMAX1]]
++; CHECK-NEXT:      [[CMP2:%[^ ]+]] = icmp ult i32 [[SUB3]], 101
++; CHECK-NEXT:      %exit.mainloop.at = select i1 [[CMP2]], i32 [[SUB3]], i32 101
+ ; CHECK-NEXT:      [[CMP3:%[^ ]+]] = icmp ult i32 0, %exit.mainloop.at
+ ; CHECK-NEXT:      br i1 [[CMP3]], label %loop.preheader, label %main.pseudo.exit
+ ; CHECK:         postloop:
+@@ -208,10 +202,9 @@ define void @test_04(i32* %arr, i32* %a_len_ptr) #0 {
+ ; CHECK-LABEL: test_04(
+ ; CHECK:         entry:
+ ; CHECK-NEXT:      %len = load i32, i32* %a_len_ptr, !range !0
+-; CHECK-NEXT:      [[SUB1:%[^ ]+]] = sub i32 -14, %len
+-; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp ugt i32 [[SUB1]], -102
+-; CHECK-NEXT:      [[UMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 -102
+-; CHECK-NEXT:      %exit.mainloop.at = sub i32 -1, [[UMAX1]]
++; CHECK-NEXT:      [[SUB1:%[^ ]+]] = add i32 %len, 13
++; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp ult i32 [[SUB1]], 101
++; CHECK-NEXT:      %exit.mainloop.at = select i1 [[CMP1]], i32 [[SUB1]], i32 101
+ ; CHECK-NEXT:      br i1 true, label %loop.preloop.preheader
+ ; CHECK:         in.bounds.preloop:
+ ; CHECK-NEXT:      %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop
+@@ -252,12 +245,11 @@ define void @test_05(i32* %arr, i32* %a_len_ptr) #0 {
+ ; CHECK-NOT:     preloop
+ ; CHECK:         entry:
+ ; CHECK-NEXT:      %len = load i32, i32* %a_len_ptr, !range !0
+-; CHECK-NEXT:      [[SUB1:%[^ ]+]] = sub i32 12, %len
+-; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB1]], -102
+-; CHECK-NEXT:      [[SMAX:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 -102
+-; CHECK-NEXT:      [[SUB2:%[^ ]+]] = sub i32 -1, [[SMAX]]
+-; CHECK-NEXT:      [[CMP2:%[^ ]+]] = icmp sgt i32 [[SUB2]], 0
+-; CHECK-NEXT:      %exit.mainloop.at = select i1 [[CMP2]], i32 [[SUB2]], i32 0
++; CHECK-NEXT:      [[SUB1:%[^ ]+]] = add i32 %len, -13
++; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp slt i32 [[SUB1]], 101
++; CHECK-NEXT:      [[SMAX:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 101
++; CHECK-NEXT:      [[CMP2:%[^ ]+]] = icmp sgt i32 [[SMAX]], 0
++; CHECK-NEXT:      %exit.mainloop.at = select i1 [[CMP2]], i32 [[SMAX]], i32 0
+ ; CHECK-NEXT:      [[GOTO_LOOP:%[^ ]+]] = icmp slt i32 0, %exit.mainloop.at
+ ; CHECK-NEXT:      br i1 [[GOTO_LOOP]], label %loop.preheader, label %main.pseudo.exit
+ ; CHECK:         loop
+@@ -297,13 +289,11 @@ define void @test_06(i32* %arr, i32* %a_len_ptr) #0 {
+ ; CHECK-NEXT:      [[LEN_MINUS_SMAX:%[^ ]+]] = add i32 %len, -2147483647
+ ; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp sgt i32 [[LEN_MINUS_SMAX]], -13
+ ; CHECK-NEXT:      [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[LEN_MINUS_SMAX]], i32 -13
+-; CHECK-NEXT:      [[ADD1:%[^ ]+]] = add i32 [[SMAX1]], -1
+-; CHECK-NEXT:      [[SUB1:%[^ ]+]] = sub i32 [[ADD1]], %len
+-; CHECK-NEXT:      [[CMP2:%[^ ]+]] = icmp sgt i32 [[SUB1]], -102
+-; CHECK-NEXT:      [[SMAX2:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB1]], i32 -102
+-; CHECK-NEXT:      [[SUB2:%[^ ]+]] = sub i32 -1, [[SMAX2]]
+-; CHECK-NEXT:      [[CMP3:%[^ ]+]] = icmp sgt i32 [[SUB2]], 0
+-; CHECK-NEXT:      %exit.mainloop.at = select i1 [[CMP3]], i32 [[SUB2]], i32 0
++; CHECK-NEXT:      [[SUB1:%[^ ]+]] = sub i32 %len, [[SMAX1]]
++; CHECK-NEXT:      [[CMP2:%[^ ]+]] = icmp slt i32 [[SUB1]], 101
++; CHECK-NEXT:      [[SMAX2:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB1]], i32 101
++; CHECK-NEXT:      [[CMP3:%[^ ]+]] = icmp sgt i32 [[SMAX2]], 0
++; CHECK-NEXT:      %exit.mainloop.at = select i1 [[CMP3]], i32 [[SMAX2]], i32 0
+ ; CHECK-NEXT:      br i1 true, label %loop.preloop.preheader
+ ; CHECK:         in.bounds.preloop:
+ ; CHECK-NEXT:      %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop
+@@ -344,14 +334,11 @@ define void @test_07(i32* %arr, i32* %a_len_ptr) #0 {
+ ; CHECK-NOT:     preloop
+ ; CHECK:         entry:
+ ; CHECK-NEXT:      %len = load i32, i32* %a_len_ptr, !range !0
+-; CHECK-NEXT:      [[SUB1:%[^ ]+]] = sub i32 -2, %len
+-; CHECK-NEXT:      [[SUB2:%[^ ]+]] = sub i32 -1, %len
+-; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB2]], -14
+-; CHECK-NEXT:      [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB2]], i32 -14
+-; CHECK-NEXT:      [[SUB3:%[^ ]+]] = sub i32 [[SUB1]], [[SMAX1]]
+-; CHECK-NEXT:      [[CMP2:%[^ ]+]] = icmp ugt i32 [[SUB3]], -102
+-; CHECK-NEXT:      [[UMAX1:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB3]], i32 -102
+-; CHECK-NEXT:      %exit.mainloop.at = sub i32 -1, [[UMAX1]]
++; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp slt i32 %len, 13
++; CHECK-NEXT:      [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 %len, i32 13
++; CHECK-NEXT:      [[SUB3:%[^ ]+]] = sub i32 %len, [[SMAX1]]
++; CHECK-NEXT:      [[CMP2:%[^ ]+]] = icmp ult i32 [[SUB3]], 101
++; CHECK-NEXT:      %exit.mainloop.at = select i1 [[CMP2]], i32 [[SUB3]], i32 101
+ ; CHECK-NEXT:      [[CMP3:%[^ ]+]] = icmp ult i32 0, %exit.mainloop.at
+ ; CHECK-NEXT:      br i1 [[CMP3]], label %loop.preheader, label %main.pseudo.exit
+ ; CHECK:         loop
+@@ -388,10 +375,9 @@ define void @test_08(i32* %arr, i32* %a_len_ptr) #0 {
+ ; CHECK-LABEL: test_08(
+ ; CHECK:         entry:
+ ; CHECK-NEXT:      %len = load i32, i32* %a_len_ptr, !range !0
+-; CHECK-NEXT:      [[SUB1:%[^ ]+]] = sub i32 -14, %len
+-; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp ugt i32 [[SUB1]], -102
+-; CHECK-NEXT:      [[UMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 -102
+-; CHECK-NEXT:      %exit.mainloop.at = sub i32 -1, [[UMAX1]]
++; CHECK-NEXT:      [[SUB1:%[^ ]+]] = add i32 %len, 13
++; CHECK-NEXT:      [[CMP1:%[^ ]+]] = icmp ult i32 [[SUB1]], 101
++; CHECK-NEXT:      %exit.mainloop.at = select i1 [[CMP1]], i32 [[SUB1]], i32 101
+ ; CHECK-NEXT:      br i1 true, label %loop.preloop.preheader
+ ; CHECK:         in.bounds.preloop:
+ ; CHECK-NEXT:      %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop
+diff --git a/test/Transforms/IRCE/rc-negative-bound.ll b/test/Transforms/IRCE/rc-negative-bound.ll
+index bfc0cd14778..d226bffeaae 100644
+--- a/test/Transforms/IRCE/rc-negative-bound.ll
++++ b/test/Transforms/IRCE/rc-negative-bound.ll
+@@ -114,49 +114,44 @@ define void @test_03(i32 *%arr, i32 %n, i32 %bound) {
+ ; CHECK:       loop.preheader:
+ ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[BOUND:%.*]], -2147483647
+ ; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[TMP0]], 0
+-; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 0
+-; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 [[BOUND]], [[SMAX]]
+-; CHECK-NEXT:    [[TMP3:%.*]] = sub i32 -1, [[BOUND]]
+-; CHECK-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], -1
+-; CHECK-NEXT:    [[SMAX1:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 -1
+-; CHECK-NEXT:    [[TMP5:%.*]] = sub i32 -1, [[SMAX1]]
+-; CHECK-NEXT:    [[TMP6:%.*]] = icmp sgt i32 [[TMP5]], -1
+-; CHECK-NEXT:    [[SMAX2:%.*]] = select i1 [[TMP6]], i32 [[TMP5]], i32 -1
+-; CHECK-NEXT:    [[TMP7:%.*]] = add i32 [[SMAX2]], 1
+-; CHECK-NEXT:    [[TMP8:%.*]] = mul i32 [[TMP2]], [[TMP7]]
+-; CHECK-NEXT:    [[TMP9:%.*]] = sub i32 -1, [[TMP8]]
+-; CHECK-NEXT:    [[TMP10:%.*]] = sub i32 -1, [[N]]
+-; CHECK-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]]
+-; CHECK-NEXT:    [[SMAX3:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 [[TMP10]]
+-; CHECK-NEXT:    [[TMP12:%.*]] = sub i32 -1, [[SMAX3]]
+-; CHECK-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], 0
+-; CHECK-NEXT:    [[EXIT_MAINLOOP_AT:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 0
+-; CHECK-NEXT:    [[TMP14:%.*]] = icmp slt i32 0, [[EXIT_MAINLOOP_AT]]
+-; CHECK-NEXT:    br i1 [[TMP14]], label [[LOOP_PREHEADER5:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
+-; CHECK:       loop.preheader5:
++; CHECK-NEXT:    [[SMIN:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 0
++; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 [[BOUND]], [[SMIN]]
++; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt i32 [[BOUND]], 0
++; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP3]], i32 [[BOUND]], i32 0
++; CHECK-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[SMAX]], -1
++; CHECK-NEXT:    [[SMIN1:%.*]] = select i1 [[TMP4]], i32 [[SMAX]], i32 -1
++; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[SMIN1]], 1
++; CHECK-NEXT:    [[TMP6:%.*]] = mul i32 [[TMP2]], [[TMP5]]
++; CHECK-NEXT:    [[TMP7:%.*]] = icmp slt i32 [[N]], [[TMP6]]
++; CHECK-NEXT:    [[SMAX2:%.*]] = select i1 [[TMP7]], i32 [[N]], i32 [[TMP6]]
++; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i32 [[SMAX2]], 0
++; CHECK-NEXT:    [[EXIT_MAINLOOP_AT:%.*]] = select i1 [[TMP8]], i32 [[SMAX2]], i32 0
++; CHECK-NEXT:    [[TMP9:%.*]] = icmp slt i32 0, [[EXIT_MAINLOOP_AT]]
++; CHECK-NEXT:    br i1 [[TMP9]], label [[LOOP_PREHEADER4:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
++; CHECK:       loop.preheader4:
+ ; CHECK-NEXT:    br label [[LOOP:%.*]]
+ ; CHECK:       loop:
+-; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER5]] ]
++; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER4]] ]
+ ; CHECK-NEXT:    [[IDX_NEXT]] = add i32 [[IDX]], 1
+ ; CHECK-NEXT:    [[ABC:%.*]] = icmp slt i32 [[IDX]], [[BOUND]]
+-; CHECK-NEXT:    br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT6:%.*]], !prof !0
++; CHECK-NEXT:    br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT5:%.*]], !prof !0
+ ; CHECK:       in.bounds:
+ ; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX]]
+ ; CHECK-NEXT:    store i32 0, i32* [[ADDR]]
+ ; CHECK-NEXT:    [[NEXT:%.*]] = icmp slt i32 [[IDX_NEXT]], [[N]]
+-; CHECK-NEXT:    [[TMP15:%.*]] = icmp slt i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]]
+-; CHECK-NEXT:    br i1 [[TMP15]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]]
++; CHECK-NEXT:    [[TMP10:%.*]] = icmp slt i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]]
++; CHECK-NEXT:    br i1 [[TMP10]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]]
+ ; CHECK:       main.exit.selector:
+ ; CHECK-NEXT:    [[IDX_NEXT_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT]], [[IN_BOUNDS]] ]
+-; CHECK-NEXT:    [[TMP16:%.*]] = icmp slt i32 [[IDX_NEXT_LCSSA]], [[N]]
+-; CHECK-NEXT:    br i1 [[TMP16]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]]
++; CHECK-NEXT:    [[TMP11:%.*]] = icmp slt i32 [[IDX_NEXT_LCSSA]], [[N]]
++; CHECK-NEXT:    br i1 [[TMP11]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]]
+ ; CHECK:       main.pseudo.exit:
+ ; CHECK-NEXT:    [[IDX_COPY:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
+ ; CHECK-NEXT:    [[INDVAR_END:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
+ ; CHECK-NEXT:    br label [[POSTLOOP:%.*]]
+ ; CHECK:       out.of.bounds.loopexit:
+ ; CHECK-NEXT:    br label [[OUT_OF_BOUNDS:%.*]]
+-; CHECK:       out.of.bounds.loopexit6:
++; CHECK:       out.of.bounds.loopexit5:
+ ; CHECK-NEXT:    br label [[OUT_OF_BOUNDS]]
+ ; CHECK:       out.of.bounds:
+ ; CHECK-NEXT:    ret void
+@@ -211,47 +206,41 @@ define void @test_04(i32 *%arr, i32 %n, i32 %bound) {
+ ; CHECK-NEXT:    [[FIRST_ITR_CHECK:%.*]] = icmp sgt i32 [[N:%.*]], 0
+ ; CHECK-NEXT:    br i1 [[FIRST_ITR_CHECK]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
+ ; CHECK:       loop.preheader:
+-; CHECK-NEXT:    [[TMP0:%.*]] = sub i32 -1, [[BOUND:%.*]]
+-; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[TMP0]], -1
+-; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 -1
+-; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[BOUND]], [[SMAX]]
+-; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[TMP2]], 1
+-; CHECK-NEXT:    [[TMP4:%.*]] = sub i32 -1, [[SMAX]]
+-; CHECK-NEXT:    [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], -1
+-; CHECK-NEXT:    [[SMAX1:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 -1
+-; CHECK-NEXT:    [[TMP6:%.*]] = add i32 [[SMAX1]], 1
+-; CHECK-NEXT:    [[TMP7:%.*]] = mul i32 [[TMP3]], [[TMP6]]
+-; CHECK-NEXT:    [[TMP8:%.*]] = sub i32 -1, [[TMP7]]
+-; CHECK-NEXT:    [[TMP9:%.*]] = sub i32 -1, [[N]]
+-; CHECK-NEXT:    [[TMP10:%.*]] = icmp ugt i32 [[TMP8]], [[TMP9]]
+-; CHECK-NEXT:    [[UMAX:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]]
+-; CHECK-NEXT:    [[EXIT_MAINLOOP_AT:%.*]] = sub i32 -1, [[UMAX]]
+-; CHECK-NEXT:    [[TMP11:%.*]] = icmp ult i32 0, [[EXIT_MAINLOOP_AT]]
+-; CHECK-NEXT:    br i1 [[TMP11]], label [[LOOP_PREHEADER2:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
+-; CHECK:       loop.preheader2:
++; CHECK-NEXT:    [[TMP0:%.*]] = icmp slt i32 [[BOUND:%.*]], 0
++; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[BOUND]], i32 0
++; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[BOUND]], [[SMAX]]
++; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i32 [[SMAX]], -1
++; CHECK-NEXT:    [[SMIN:%.*]] = select i1 [[TMP2]], i32 [[SMAX]], i32 -1
++; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[SMIN]], 1
++; CHECK-NEXT:    [[TMP4:%.*]] = mul i32 [[TMP1]], [[TMP3]]
++; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult i32 [[N]], [[TMP4]]
++; CHECK-NEXT:    [[EXIT_MAINLOOP_AT:%.*]] = select i1 [[TMP5]], i32 [[N]], i32 [[TMP4]]
++; CHECK-NEXT:    [[TMP6:%.*]] = icmp ult i32 0, [[EXIT_MAINLOOP_AT]]
++; CHECK-NEXT:    br i1 [[TMP6]], label [[LOOP_PREHEADER1:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
++; CHECK:       loop.preheader1:
+ ; CHECK-NEXT:    br label [[LOOP:%.*]]
+ ; CHECK:       loop:
+-; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER2]] ]
++; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER1]] ]
+ ; CHECK-NEXT:    [[IDX_NEXT]] = add i32 [[IDX]], 1
+ ; CHECK-NEXT:    [[ABC:%.*]] = icmp slt i32 [[IDX]], [[BOUND]]
+-; CHECK-NEXT:    br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT3:%.*]], !prof !0
++; CHECK-NEXT:    br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT2:%.*]], !prof !0
+ ; CHECK:       in.bounds:
+ ; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX]]
+ ; CHECK-NEXT:    store i32 0, i32* [[ADDR]]
+ ; CHECK-NEXT:    [[NEXT:%.*]] = icmp ult i32 [[IDX_NEXT]], [[N]]
+-; CHECK-NEXT:    [[TMP12:%.*]] = icmp ult i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]]
+-; CHECK-NEXT:    br i1 [[TMP12]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]]
++; CHECK-NEXT:    [[TMP7:%.*]] = icmp ult i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]]
++; CHECK-NEXT:    br i1 [[TMP7]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]]
+ ; CHECK:       main.exit.selector:
+ ; CHECK-NEXT:    [[IDX_NEXT_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT]], [[IN_BOUNDS]] ]
+-; CHECK-NEXT:    [[TMP13:%.*]] = icmp ult i32 [[IDX_NEXT_LCSSA]], [[N]]
+-; CHECK-NEXT:    br i1 [[TMP13]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]]
++; CHECK-NEXT:    [[TMP8:%.*]] = icmp ult i32 [[IDX_NEXT_LCSSA]], [[N]]
++; CHECK-NEXT:    br i1 [[TMP8]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]]
+ ; CHECK:       main.pseudo.exit:
+ ; CHECK-NEXT:    [[IDX_COPY:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
+ ; CHECK-NEXT:    [[INDVAR_END:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
+ ; CHECK-NEXT:    br label [[POSTLOOP:%.*]]
+ ; CHECK:       out.of.bounds.loopexit:
+ ; CHECK-NEXT:    br label [[OUT_OF_BOUNDS:%.*]]
+-; CHECK:       out.of.bounds.loopexit3:
++; CHECK:       out.of.bounds.loopexit2:
+ ; CHECK-NEXT:    br label [[OUT_OF_BOUNDS]]
+ ; CHECK:       out.of.bounds:
+ ; CHECK-NEXT:    ret void
+@@ -413,49 +402,44 @@ define void @test_07(i32 *%arr, i32 %n, i32 %bound) {
+ ; CHECK:       loop.preheader:
+ ; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[BOUND:%.*]], -2147483647
+ ; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[TMP0]], 0
+-; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 0
+-; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 [[BOUND]], [[SMAX]]
+-; CHECK-NEXT:    [[TMP3:%.*]] = sub i32 -1, [[BOUND]]
+-; CHECK-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], -1
+-; CHECK-NEXT:    [[SMAX1:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 -1
+-; CHECK-NEXT:    [[TMP5:%.*]] = sub i32 -1, [[SMAX1]]
+-; CHECK-NEXT:    [[TMP6:%.*]] = icmp sgt i32 [[TMP5]], -1
+-; CHECK-NEXT:    [[SMAX2:%.*]] = select i1 [[TMP6]], i32 [[TMP5]], i32 -1
+-; CHECK-NEXT:    [[TMP7:%.*]] = add i32 [[SMAX2]], 1
+-; CHECK-NEXT:    [[TMP8:%.*]] = mul i32 [[TMP2]], [[TMP7]]
+-; CHECK-NEXT:    [[TMP9:%.*]] = sub i32 -1, [[TMP8]]
+-; CHECK-NEXT:    [[TMP10:%.*]] = sub i32 -1, [[N]]
+-; CHECK-NEXT:    [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]]
+-; CHECK-NEXT:    [[SMAX3:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 [[TMP10]]
+-; CHECK-NEXT:    [[TMP12:%.*]] = sub i32 -1, [[SMAX3]]
+-; CHECK-NEXT:    [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], 0
+-; CHECK-NEXT:    [[EXIT_MAINLOOP_AT:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 0
+-; CHECK-NEXT:    [[TMP14:%.*]] = icmp slt i32 0, [[EXIT_MAINLOOP_AT]]
+-; CHECK-NEXT:    br i1 [[TMP14]], label [[LOOP_PREHEADER5:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
+-; CHECK:       loop.preheader5:
++; CHECK-NEXT:    [[SMIN:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 0
++; CHECK-NEXT:    [[TMP2:%.*]] = sub i32 [[BOUND]], [[SMIN]]
++; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt i32 [[BOUND]], 0
++; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP3]], i32 [[BOUND]], i32 0
++; CHECK-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[SMAX]], -1
++; CHECK-NEXT:    [[SMIN1:%.*]] = select i1 [[TMP4]], i32 [[SMAX]], i32 -1
++; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[SMIN1]], 1
++; CHECK-NEXT:    [[TMP6:%.*]] = mul i32 [[TMP2]], [[TMP5]]
++; CHECK-NEXT:    [[TMP7:%.*]] = icmp slt i32 [[N]], [[TMP6]]
++; CHECK-NEXT:    [[SMAX2:%.*]] = select i1 [[TMP7]], i32 [[N]], i32 [[TMP6]]
++; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i32 [[SMAX2]], 0
++; CHECK-NEXT:    [[EXIT_MAINLOOP_AT:%.*]] = select i1 [[TMP8]], i32 [[SMAX2]], i32 0
++; CHECK-NEXT:    [[TMP9:%.*]] = icmp slt i32 0, [[EXIT_MAINLOOP_AT]]
++; CHECK-NEXT:    br i1 [[TMP9]], label [[LOOP_PREHEADER4:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
++; CHECK:       loop.preheader4:
+ ; CHECK-NEXT:    br label [[LOOP:%.*]]
+ ; CHECK:       loop:
+-; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER5]] ]
++; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER4]] ]
+ ; CHECK-NEXT:    [[IDX_NEXT]] = add i32 [[IDX]], 1
+ ; CHECK-NEXT:    [[ABC:%.*]] = icmp ult i32 [[IDX]], [[BOUND]]
+-; CHECK-NEXT:    br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT6:%.*]], !prof !0
++; CHECK-NEXT:    br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT5:%.*]], !prof !0
+ ; CHECK:       in.bounds:
+ ; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX]]
+ ; CHECK-NEXT:    store i32 0, i32* [[ADDR]]
+ ; CHECK-NEXT:    [[NEXT:%.*]] = icmp slt i32 [[IDX_NEXT]], [[N]]
+-; CHECK-NEXT:    [[TMP15:%.*]] = icmp slt i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]]
+-; CHECK-NEXT:    br i1 [[TMP15]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]]
++; CHECK-NEXT:    [[TMP10:%.*]] = icmp slt i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]]
++; CHECK-NEXT:    br i1 [[TMP10]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]]
+ ; CHECK:       main.exit.selector:
+ ; CHECK-NEXT:    [[IDX_NEXT_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT]], [[IN_BOUNDS]] ]
+-; CHECK-NEXT:    [[TMP16:%.*]] = icmp slt i32 [[IDX_NEXT_LCSSA]], [[N]]
+-; CHECK-NEXT:    br i1 [[TMP16]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]]
++; CHECK-NEXT:    [[TMP11:%.*]] = icmp slt i32 [[IDX_NEXT_LCSSA]], [[N]]
++; CHECK-NEXT:    br i1 [[TMP11]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]]
+ ; CHECK:       main.pseudo.exit:
+ ; CHECK-NEXT:    [[IDX_COPY:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
+ ; CHECK-NEXT:    [[INDVAR_END:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
+ ; CHECK-NEXT:    br label [[POSTLOOP:%.*]]
+ ; CHECK:       out.of.bounds.loopexit:
+ ; CHECK-NEXT:    br label [[OUT_OF_BOUNDS:%.*]]
+-; CHECK:       out.of.bounds.loopexit6:
++; CHECK:       out.of.bounds.loopexit5:
+ ; CHECK-NEXT:    br label [[OUT_OF_BOUNDS]]
+ ; CHECK:       out.of.bounds:
+ ; CHECK-NEXT:    ret void
+@@ -512,47 +496,41 @@ define void @test_08(i32 *%arr, i32 %n, i32 %bound) {
+ ; CHECK-NEXT:    [[FIRST_ITR_CHECK:%.*]] = icmp sgt i32 [[N:%.*]], 0
+ ; CHECK-NEXT:    br i1 [[FIRST_ITR_CHECK]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
+ ; CHECK:       loop.preheader:
+-; CHECK-NEXT:    [[TMP0:%.*]] = sub i32 -1, [[BOUND:%.*]]
+-; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[TMP0]], -1
+-; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 -1
+-; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[BOUND]], [[SMAX]]
+-; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[TMP2]], 1
+-; CHECK-NEXT:    [[TMP4:%.*]] = sub i32 -1, [[SMAX]]
+-; CHECK-NEXT:    [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], -1
+-; CHECK-NEXT:    [[SMAX1:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 -1
+-; CHECK-NEXT:    [[TMP6:%.*]] = add i32 [[SMAX1]], 1
+-; CHECK-NEXT:    [[TMP7:%.*]] = mul i32 [[TMP3]], [[TMP6]]
+-; CHECK-NEXT:    [[TMP8:%.*]] = sub i32 -1, [[TMP7]]
+-; CHECK-NEXT:    [[TMP9:%.*]] = sub i32 -1, [[N]]
+-; CHECK-NEXT:    [[TMP10:%.*]] = icmp ugt i32 [[TMP8]], [[TMP9]]
+-; CHECK-NEXT:    [[UMAX:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]]
+-; CHECK-NEXT:    [[EXIT_MAINLOOP_AT:%.*]] = sub i32 -1, [[UMAX]]
+-; CHECK-NEXT:    [[TMP11:%.*]] = icmp ult i32 0, [[EXIT_MAINLOOP_AT]]
+-; CHECK-NEXT:    br i1 [[TMP11]], label [[LOOP_PREHEADER2:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
+-; CHECK:       loop.preheader2:
++; CHECK-NEXT:    [[TMP0:%.*]] = icmp slt i32 [[BOUND:%.*]], 0
++; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[BOUND]], i32 0
++; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[BOUND]], [[SMAX]]
++; CHECK-NEXT:    [[TMP2:%.*]] = icmp sgt i32 [[SMAX]], -1
++; CHECK-NEXT:    [[SMIN:%.*]] = select i1 [[TMP2]], i32 [[SMAX]], i32 -1
++; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[SMIN]], 1
++; CHECK-NEXT:    [[TMP4:%.*]] = mul i32 [[TMP1]], [[TMP3]]
++; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult i32 [[N]], [[TMP4]]
++; CHECK-NEXT:    [[EXIT_MAINLOOP_AT:%.*]] = select i1 [[TMP5]], i32 [[N]], i32 [[TMP4]]
++; CHECK-NEXT:    [[TMP6:%.*]] = icmp ult i32 0, [[EXIT_MAINLOOP_AT]]
++; CHECK-NEXT:    br i1 [[TMP6]], label [[LOOP_PREHEADER1:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]]
++; CHECK:       loop.preheader1:
+ ; CHECK-NEXT:    br label [[LOOP:%.*]]
+ ; CHECK:       loop:
+-; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER2]] ]
++; CHECK-NEXT:    [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER1]] ]
+ ; CHECK-NEXT:    [[IDX_NEXT]] = add i32 [[IDX]], 1
+ ; CHECK-NEXT:    [[ABC:%.*]] = icmp ult i32 [[IDX]], [[BOUND]]
+-; CHECK-NEXT:    br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT3:%.*]], !prof !0
++; CHECK-NEXT:    br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT2:%.*]], !prof !0
+ ; CHECK:       in.bounds:
+ ; CHECK-NEXT:    [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX]]
+ ; CHECK-NEXT:    store i32 0, i32* [[ADDR]]
+ ; CHECK-NEXT:    [[NEXT:%.*]] = icmp ult i32 [[IDX_NEXT]], [[N]]
+-; CHECK-NEXT:    [[TMP12:%.*]] = icmp ult i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]]
+-; CHECK-NEXT:    br i1 [[TMP12]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]]
++; CHECK-NEXT:    [[TMP7:%.*]] = icmp ult i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]]
++; CHECK-NEXT:    br i1 [[TMP7]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]]
+ ; CHECK:       main.exit.selector:
+ ; CHECK-NEXT:    [[IDX_NEXT_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT]], [[IN_BOUNDS]] ]
+-; CHECK-NEXT:    [[TMP13:%.*]] = icmp ult i32 [[IDX_NEXT_LCSSA]], [[N]]
+-; CHECK-NEXT:    br i1 [[TMP13]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]]
++; CHECK-NEXT:    [[TMP8:%.*]] = icmp ult i32 [[IDX_NEXT_LCSSA]], [[N]]
++; CHECK-NEXT:    br i1 [[TMP8]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]]
+ ; CHECK:       main.pseudo.exit:
+ ; CHECK-NEXT:    [[IDX_COPY:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
+ ; CHECK-NEXT:    [[INDVAR_END:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ]
+ ; CHECK-NEXT:    br label [[POSTLOOP:%.*]]
+ ; CHECK:       out.of.bounds.loopexit:
+ ; CHECK-NEXT:    br label [[OUT_OF_BOUNDS:%.*]]
+-; CHECK:       out.of.bounds.loopexit3:
++; CHECK:       out.of.bounds.loopexit2:
+ ; CHECK-NEXT:    br label [[OUT_OF_BOUNDS]]
+ ; CHECK:       out.of.bounds:
+ ; CHECK-NEXT:    ret void
+diff --git a/test/Transforms/IRCE/single-access-no-preloop.ll b/test/Transforms/IRCE/single-access-no-preloop.ll
+index acca948a7ab..7bf36f7c254 100644
+--- a/test/Transforms/IRCE/single-access-no-preloop.ll
++++ b/test/Transforms/IRCE/single-access-no-preloop.ll
+@@ -86,15 +86,13 @@ define void @single_access_no_preloop_with_offset(i32 *%arr, i32 *%a_len_ptr, i3
+ ; CHECK-LABEL: @single_access_no_preloop_with_offset(
+ 
+ ; CHECK: loop.preheader:
+-; CHECK: [[not_safe_range_end:[^ ]+]] = sub i32 3, %len
+-; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n
+-; CHECK: [[not_exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_safe_range_end]], [[not_n]]
+-; CHECK: [[not_exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_main_loop_at_hiclamp_cmp]], i32 [[not_safe_range_end]], i32 [[not_n]]
+-; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = sub i32 -1, [[not_exit_main_loop_at_hiclamp]]
++; CHECK: [[safe_range_end:[^ ]+]] = add i32 %len, -4
++; CHECK: [[exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp slt i32 %n, [[safe_range_end]]
++; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[exit_main_loop_at_hiclamp_cmp]], i32 %n, i32 [[safe_range_end]]
+ ; CHECK: [[exit_main_loop_at_loclamp_cmp:[^ ]+]] = icmp sgt i32 [[exit_main_loop_at_hiclamp]], 0
+ ; CHECK: [[exit_main_loop_at_loclamp:[^ ]+]] = select i1 [[exit_main_loop_at_loclamp_cmp]], i32 [[exit_main_loop_at_hiclamp]], i32 0
+ ; CHECK: [[enter_main_loop:[^ ]+]] = icmp slt i32 0, [[exit_main_loop_at_loclamp]]
+-; CHECK: br i1 [[enter_main_loop]], label %loop.preheader2, label %main.pseudo.exit
++; CHECK: br i1 [[enter_main_loop]], label %[[loop_preheader:[^ ,]+]], label %main.pseudo.exit
+ 
+ ; CHECK: loop:
+ ; CHECK: br i1 true, label %in.bounds, label %out.of.bounds
+diff --git a/test/Transforms/IRCE/single-access-with-preloop.ll b/test/Transforms/IRCE/single-access-with-preloop.ll
+index 6f3b0324e39..bd235aa4a73 100644
+--- a/test/Transforms/IRCE/single-access-with-preloop.ll
++++ b/test/Transforms/IRCE/single-access-with-preloop.ll
+@@ -34,11 +34,9 @@ define void @single_access_with_preloop(i32 *%arr, i32 *%a_len_ptr, i32 %n, i32
+ ; CHECK: [[check_min_sint_offset:[^ ]+]] = icmp sgt i32 %offset, -2147483647
+ ; CHECK: [[safe_offset_preloop:[^ ]+]] = select i1 [[check_min_sint_offset]], i32 %offset, i32 -2147483647
+ ; If Offset was a SINT_MIN, we could have an overflow here. That is why we calculated its safe version.
+-; CHECK: [[not_safe_start:[^ ]+]] = add i32 [[safe_offset_preloop]], -1
+-; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n
+-; CHECK: [[not_exit_preloop_at_cond_loclamp:[^ ]+]] = icmp sgt i32 [[not_safe_start]], [[not_n]]
+-; CHECK: [[not_exit_preloop_at_loclamp:[^ ]+]] = select i1 [[not_exit_preloop_at_cond_loclamp]], i32 [[not_safe_start]], i32 [[not_n]]
+-; CHECK: [[exit_preloop_at_loclamp:[^ ]+]] = sub i32 -1, [[not_exit_preloop_at_loclamp]]
++; CHECK: [[safe_start:[^ ]+]] = sub i32 0, [[safe_offset_preloop]]
++; CHECK: [[exit_preloop_at_cond_loclamp:[^ ]+]] = icmp slt i32 %n, [[safe_start]]
++; CHECK: [[exit_preloop_at_loclamp:[^ ]+]] = select i1 [[exit_preloop_at_cond_loclamp]], i32 %n, i32 [[safe_start]]
+ ; CHECK: [[exit_preloop_at_cond:[^ ]+]] = icmp sgt i32 [[exit_preloop_at_loclamp]], 0
+ ; CHECK: [[exit_preloop_at:[^ ]+]] = select i1 [[exit_preloop_at_cond]], i32 [[exit_preloop_at_loclamp]], i32 0
+ 
+@@ -46,17 +44,15 @@ define void @single_access_with_preloop(i32 *%arr, i32 *%a_len_ptr, i32 %n, i32
+ ; CHECK: [[len_minus_sint_max:[^ ]+]] = add i32 %len, -2147483647
+ ; CHECK: [[check_len_min_sint_offset:[^ ]+]] = icmp sgt i32 %offset, [[len_minus_sint_max]]
+ ; CHECK: [[safe_offset_mainloop:[^ ]+]] = select i1 [[check_len_min_sint_offset]], i32 %offset, i32 [[len_minus_sint_max]]
+-; CHECK: [[not_safe_start_2:[^ ]+]] = add i32 [[safe_offset_mainloop]], -1
+ ; If Offset was a SINT_MIN, we could have an overflow here. That is why we calculated its safe version.
+-; CHECK: [[not_safe_upper_end:[^ ]+]] = sub i32 [[not_safe_start_2]], %len
+-; CHECK: [[not_exit_mainloop_at_cond_loclamp:[^ ]+]] = icmp sgt i32 [[not_safe_upper_end]], [[not_n]]
+-; CHECK: [[not_exit_mainloop_at_loclamp:[^ ]+]] = select i1 [[not_exit_mainloop_at_cond_loclamp]], i32 [[not_safe_upper_end]], i32 [[not_n]]
++; CHECK: [[safe_upper_end:[^ ]+]] = sub i32 %len, [[safe_offset_mainloop]]
++; CHECK: [[exit_mainloop_at_cond_loclamp:[^ ]+]] = icmp slt i32 %n, [[safe_upper_end]]
++; CHECK: [[exit_mainloop_at_loclamp:[^ ]+]] = select i1 [[exit_mainloop_at_cond_loclamp]], i32 %n, i32 [[safe_upper_end]]
+ ; CHECK: [[check_offset_mainloop_2:[^ ]+]] = icmp sgt i32 %offset, 0
+ ; CHECK: [[safe_offset_mainloop_2:[^ ]+]] = select i1 [[check_offset_mainloop_2]], i32 %offset, i32 0
+-; CHECK: [[not_safe_lower_end:[^ ]+]] = add i32 [[safe_offset_mainloop_2]], -2147483648
+-; CHECK: [[not_exit_mainloop_at_cond_hiclamp:[^ ]+]] = icmp sgt i32 [[not_exit_mainloop_at_loclamp]], [[not_safe_lower_end]]
+-; CHECK: [[not_exit_mainloop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_mainloop_at_cond_hiclamp]], i32 [[not_exit_mainloop_at_loclamp]], i32 [[not_safe_lower_end]]
+-; CHECK: [[exit_mainloop_at_hiclamp:[^ ]+]] = sub i32 -1, [[not_exit_mainloop_at_hiclamp]]
++; CHECK: [[safe_lower_end:[^ ]+]] = sub i32 2147483647, [[safe_offset_mainloop_2]]
++; CHECK: [[exit_mainloop_at_cond_hiclamp:[^ ]+]] = icmp slt i32 [[exit_mainloop_at_loclamp]], [[safe_lower_end]]
++; CHECK: [[exit_mainloop_at_hiclamp:[^ ]+]] = select i1 [[exit_mainloop_at_cond_hiclamp]], i32 [[exit_mainloop_at_loclamp]], i32 [[safe_lower_end]]
+ ; CHECK: [[exit_mainloop_at_cmp:[^ ]+]] = icmp sgt i32 [[exit_mainloop_at_hiclamp]], 0
+ ; CHECK: [[exit_mainloop_at:[^ ]+]] = select i1 [[exit_mainloop_at_cmp]], i32 [[exit_mainloop_at_hiclamp]], i32 0
+ 
+@@ -67,7 +63,7 @@ define void @single_access_with_preloop(i32 *%arr, i32 *%a_len_ptr, i32 %n, i32
+ ; CHECK: %abc.high = icmp slt i32 %array.idx, %len
+ ; CHECK: %abc.low = icmp sge i32 %array.idx, 0
+ ; CHECK: %abc = and i1 true, true
+-; CHECK: br i1 %abc, label %in.bounds, label %out.of.bounds.loopexit11
++; CHECK: br i1 %abc, label %in.bounds, label %[[loopexit:[^ ,]+]]
+ 
+ ; CHECK: in.bounds:
+ ; CHECK: [[continue_mainloop_cond:[^ ]+]] = icmp slt i32 %idx.next, [[exit_mainloop_at]]
+diff --git a/test/Transforms/IRCE/unsigned_comparisons_ugt.ll b/test/Transforms/IRCE/unsigned_comparisons_ugt.ll
+index 8f00c733569..3451d65c7bb 100644
+--- a/test/Transforms/IRCE/unsigned_comparisons_ugt.ll
++++ b/test/Transforms/IRCE/unsigned_comparisons_ugt.ll
+@@ -58,8 +58,8 @@ define void @test_02(i32* %arr, i32* %a_len_ptr) #0 {
+ ; CHECK:        entry:
+ ; CHECK-NEXT:     %len = load i32, i32* %a_len_ptr, !range !0
+ ; CHECK-NEXT:     [[COND1:%[^ ]+]] = icmp ugt i32 %len, 1
+-; CHECK-NEXT:     %umax = select i1 [[COND1]], i32 %len, i32 1
+-; CHECK-NEXT:     %exit.preloop.at = add i32 %umax, -1
++; CHECK-NEXT:     [[UMIN:%[^ ]+]] = select i1 [[COND1]], i32 %len, i32 1
++; CHECK-NEXT:     %exit.preloop.at = add i32 [[UMIN]], -1
+ ; CHECK-NEXT:     [[COND2:%[^ ]+]] = icmp ugt i32 100, %exit.preloop.at
+ ; CHECK-NEXT:     br i1 [[COND2]], label %loop.preloop.preheader, label %preloop.pseudo.exit
+ ; CHECK:        mainloop:
+@@ -149,8 +149,8 @@ define void @test_04(i32* %arr, i32* %a_len_ptr) #0 {
+ ; CHECK:        entry:
+ ; CHECK-NEXT:     %len = load i32, i32* %a_len_ptr, !range !0
+ ; CHECK-NEXT:     [[COND1:%[^ ]+]] = icmp ugt i32 %len, 1
+-; CHECK-NEXT:     %umax = select i1 [[COND1]], i32 %len, i32 1
+-; CHECK-NEXT:     %exit.preloop.at = add i32 %umax, -1
++; CHECK-NEXT:     [[UMIN:%[^ ]+]] = select i1 [[COND1]], i32 %len, i32 1
++; CHECK-NEXT:     %exit.preloop.at = add i32 [[UMIN]], -1
+ ; CHECK-NEXT:     [[COND2:%[^ ]+]] = icmp ugt i32 -2147483648, %exit.preloop.at
+ ; CHECK-NEXT:     br i1 [[COND2]], label %loop.preloop.preheader, label %preloop.pseudo.exit
+ ; CHECK:        mainloop:
+diff --git a/test/Transforms/IRCE/unsigned_comparisons_ult.ll b/test/Transforms/IRCE/unsigned_comparisons_ult.ll
+index dc59c11df1b..aca3c3d192e 100644
+--- a/test/Transforms/IRCE/unsigned_comparisons_ult.ll
++++ b/test/Transforms/IRCE/unsigned_comparisons_ult.ll
+@@ -61,8 +61,8 @@ define void @test_02(i32* %arr, i32* %a_len_ptr) #0 {
+ ; CHECK:        entry:
+ ; CHECK-NEXT:     %len = load i32, i32* %a_len_ptr, !range !0
+ ; CHECK-NEXT:     [[COND1:%[^ ]+]] = icmp ugt i32 %len, 1
+-; CHECK-NEXT:     %umax = select i1 [[COND1]], i32 %len, i32 1
+-; CHECK-NEXT:     %exit.preloop.at = add i32 %umax, -1
++; CHECK-NEXT:     [[UMIN:%[^ ]+]] = select i1 [[COND1]], i32 %len, i32 1
++; CHECK-NEXT:     %exit.preloop.at = add i32 [[UMIN]], -1
+ ; CHECK-NEXT:     [[COND2:%[^ ]+]] = icmp ugt i32 100, %exit.preloop.at
+ ; CHECK-NEXT:     br i1 [[COND2]], label %loop.preloop.preheader, label %preloop.pseudo.exit
+ ; CHECK:        mainloop:
+@@ -194,8 +194,8 @@ define void @test_05(i32* %arr, i32* %a_len_ptr) #0 {
+ ; CHECK:        entry:
+ ; CHECK-NEXT:     %len = load i32, i32* %a_len_ptr, !range !0
+ ; CHECK-NEXT:     [[COND1:%[^ ]+]] = icmp ugt i32 %len, 1
+-; CHECK-NEXT:     %umax = select i1 [[COND1]], i32 %len, i32 1
+-; CHECK-NEXT:     %exit.preloop.at = add i32 %umax, -1
++; CHECK-NEXT:     [[UMIN:%[^ ]+]] = select i1 [[COND1]], i32 %len, i32 1
++; CHECK-NEXT:     %exit.preloop.at = add i32 [[UMIN]], -1
+ ; CHECK-NEXT:     [[COND2:%[^ ]+]] = icmp ugt i32 -2147483648, %exit.preloop.at
+ ; CHECK-NEXT:     br i1 [[COND2]], label %loop.preloop.preheader, label %preloop.pseudo.exit
+ ; CHECK:        mainloop:
+diff --git a/test/Transforms/IndVarSimplify/eliminate-trunc.ll b/test/Transforms/IndVarSimplify/eliminate-trunc.ll
+index 7e0971f9f31..c83a48723ca 100644
+--- a/test/Transforms/IndVarSimplify/eliminate-trunc.ll
++++ b/test/Transforms/IndVarSimplify/eliminate-trunc.ll
+@@ -459,15 +459,17 @@ exit:
+ define void @test_10(i32 %n) {
+ ; CHECK-LABEL: @test_10(
+ ; CHECK-NEXT:  entry:
+-; CHECK-NEXT:    [[SEXT:%.*]] = sext i32 [[N:%.*]] to i64
++; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N:%.*]], 100
++; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
++; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 90
++; CHECK-NEXT:    [[UMAX:%.*]] = select i1 [[TMP2]], i64 [[TMP1]], i64 90
++; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[UMAX]], -99
+ ; CHECK-NEXT:    br label [[LOOP:%.*]]
+ ; CHECK:       loop:
+ ; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ -100, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+ ; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ne i64 [[IV]], [[SEXT]]
+-; CHECK-NEXT:    [[NEGCMP:%.*]] = icmp slt i64 [[IV]], -10
+-; CHECK-NEXT:    [[CMP:%.*]] = and i1 [[TMP0]], [[NEGCMP]]
+-; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]]
++; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[TMP3]]
++; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]]
+ ; CHECK:       exit:
+ ; CHECK-NEXT:    ret void
+ ;
+diff --git a/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll b/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll
+index ea3f6077231..d5232e1874c 100644
+--- a/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll
++++ b/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll
+@@ -14,8 +14,6 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
+ ; current LSR cost model.
+ ; CHECK-NOT: = ptrtoint i8* undef to i64
+ ; CHECK: .lr.ph
+-; CHECK: [[TMP:%[^ ]+]] = add i64 %tmp{{[0-9]+}}, -1
+-; CHECK: sub i64 [[TMP]], %tmp{{[0-9]+}}
+ ; CHECK: ret void
+ define void @VerifyDiagnosticConsumerTest() unnamed_addr nounwind uwtable align 2 {
+ bb:
+diff --git a/test/Transforms/LoopVectorize/X86/pr35432.ll b/test/Transforms/LoopVectorize/X86/pr35432.ll
+index b8760cb8d50..6aaa13c183a 100644
+--- a/test/Transforms/LoopVectorize/X86/pr35432.ll
++++ b/test/Transforms/LoopVectorize/X86/pr35432.ll
+@@ -27,7 +27,6 @@ define i32 @main() local_unnamed_addr #0 {
+ ; CHECK-NEXT:    [[CMP8:%.*]] = icmp eq i32 [[CONV17]], 0
+ ; CHECK-NEXT:    br i1 [[CMP8]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END12:%.*]]
+ ; CHECK:       for.body.lr.ph:
+-; CHECK-NEXT:    [[TMP3:%.*]] = sub i32 -1, [[TMP2]]
+ ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+ ; CHECK:       for.body:
+ ; CHECK-NEXT:    [[STOREMERGE_IN9:%.*]] = phi i32 [ [[TMP2]], [[FOR_BODY_LR_PH]] ], [ [[ADD:%.*]], [[FOR_INC9:%.*]] ]
+@@ -37,77 +36,74 @@ define i32 @main() local_unnamed_addr #0 {
+ ; CHECK:       for.body8.lr.ph:
+ ; CHECK-NEXT:    [[CONV3:%.*]] = trunc i32 [[STOREMERGE_IN9]] to i8
+ ; CHECK-NEXT:    [[DOTPROMOTED:%.*]] = load i32, i32* getelementptr inbounds ([192 x [192 x i32]], [192 x [192 x i32]]* @a, i64 0, i64 0, i64 0), align 16
+-; CHECK-NEXT:    [[TMP4:%.*]] = add i8 [[CONV3]], -1
+-; CHECK-NEXT:    [[TMP5:%.*]] = zext i8 [[TMP4]] to i32
+-; CHECK-NEXT:    [[TMP6:%.*]] = sub i32 -1, [[TMP5]]
+-; CHECK-NEXT:    [[TMP7:%.*]] = icmp ugt i32 [[TMP6]], [[TMP3]]
+-; CHECK-NEXT:    [[UMAX:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 [[TMP3]]
+-; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[UMAX]], 2
+-; CHECK-NEXT:    [[TMP9:%.*]] = add i32 [[TMP8]], [[TMP5]]
+-; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP9]], 8
++; CHECK-NEXT:    [[TMP3:%.*]] = add i8 [[CONV3]], -1
++; CHECK-NEXT:    [[TMP4:%.*]] = zext i8 [[TMP3]] to i32
++; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[TMP4]], 1
++; CHECK-NEXT:    [[TMP6:%.*]] = icmp ult i32 [[TMP2]], [[TMP4]]
++; CHECK-NEXT:    [[UMAX:%.*]] = select i1 [[TMP6]], i32 [[TMP2]], i32 [[TMP4]]
++; CHECK-NEXT:    [[TMP7:%.*]] = sub i32 [[TMP5]], [[UMAX]]
++; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP7]], 8
+ ; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
+ ; CHECK:       vector.scevcheck:
+-; CHECK-NEXT:    [[TMP10:%.*]] = add i8 [[CONV3]], -1
+-; CHECK-NEXT:    [[TMP11:%.*]] = zext i8 [[TMP10]] to i32
+-; CHECK-NEXT:    [[TMP12:%.*]] = sub i32 -1, [[TMP11]]
+-; CHECK-NEXT:    [[TMP13:%.*]] = icmp ugt i32 [[TMP12]], [[TMP3]]
+-; CHECK-NEXT:    [[UMAX1:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 [[TMP3]]
+-; CHECK-NEXT:    [[TMP14:%.*]] = add i32 [[UMAX1]], 1
+-; CHECK-NEXT:    [[TMP15:%.*]] = add i32 [[TMP14]], [[TMP11]]
+-; CHECK-NEXT:    [[TMP16:%.*]] = trunc i32 [[TMP15]] to i8
+-; CHECK-NEXT:    [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 1, i8 [[TMP16]])
++; CHECK-NEXT:    [[TMP8:%.*]] = add i8 [[CONV3]], -1
++; CHECK-NEXT:    [[TMP9:%.*]] = zext i8 [[TMP8]] to i32
++; CHECK-NEXT:    [[TMP10:%.*]] = icmp ult i32 [[TMP2]], [[TMP9]]
++; CHECK-NEXT:    [[UMAX1:%.*]] = select i1 [[TMP10]], i32 [[TMP2]], i32 [[TMP9]]
++; CHECK-NEXT:    [[TMP11:%.*]] = sub i32 [[TMP9]], [[UMAX1]]
++; CHECK-NEXT:    [[TMP12:%.*]] = trunc i32 [[TMP11]] to i8
++; CHECK-NEXT:    [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 1, i8 [[TMP12]])
+ ; CHECK-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0
+ ; CHECK-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
+-; CHECK-NEXT:    [[TMP17:%.*]] = add i8 [[TMP10]], [[MUL_RESULT]]
+-; CHECK-NEXT:    [[TMP18:%.*]] = sub i8 [[TMP10]], [[MUL_RESULT]]
+-; CHECK-NEXT:    [[TMP19:%.*]] = icmp ugt i8 [[TMP18]], [[TMP10]]
+-; CHECK-NEXT:    [[TMP20:%.*]] = icmp ult i8 [[TMP17]], [[TMP10]]
+-; CHECK-NEXT:    [[TMP21:%.*]] = select i1 true, i1 [[TMP19]], i1 [[TMP20]]
+-; CHECK-NEXT:    [[TMP22:%.*]] = icmp ugt i32 [[TMP15]], 255
+-; CHECK-NEXT:    [[TMP23:%.*]] = or i1 [[TMP21]], [[TMP22]]
+-; CHECK-NEXT:    [[TMP24:%.*]] = or i1 [[TMP23]], [[MUL_OVERFLOW]]
+-; CHECK-NEXT:    [[TMP25:%.*]] = or i1 false, [[TMP24]]
+-; CHECK-NEXT:    br i1 [[TMP25]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
++; CHECK-NEXT:    [[TMP13:%.*]] = add i8 [[TMP8]], [[MUL_RESULT]]
++; CHECK-NEXT:    [[TMP14:%.*]] = sub i8 [[TMP8]], [[MUL_RESULT]]
++; CHECK-NEXT:    [[TMP15:%.*]] = icmp ugt i8 [[TMP14]], [[TMP8]]
++; CHECK-NEXT:    [[TMP16:%.*]] = icmp ult i8 [[TMP13]], [[TMP8]]
++; CHECK-NEXT:    [[TMP17:%.*]] = select i1 true, i1 [[TMP15]], i1 [[TMP16]]
++; CHECK-NEXT:    [[TMP18:%.*]] = icmp ugt i32 [[TMP11]], 255
++; CHECK-NEXT:    [[TMP19:%.*]] = or i1 [[TMP17]], [[TMP18]]
++; CHECK-NEXT:    [[TMP20:%.*]] = or i1 [[TMP19]], [[MUL_OVERFLOW]]
++; CHECK-NEXT:    [[TMP21:%.*]] = or i1 false, [[TMP20]]
++; CHECK-NEXT:    br i1 [[TMP21]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+ ; CHECK:       vector.ph:
+-; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP9]], 8
+-; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP9]], [[N_MOD_VF]]
++; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP7]], 8
++; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP7]], [[N_MOD_VF]]
+ ; CHECK-NEXT:    [[CAST_CRD:%.*]] = trunc i32 [[N_VEC]] to i8
+ ; CHECK-NEXT:    [[IND_END:%.*]] = sub i8 [[CONV3]], [[CAST_CRD]]
+-; CHECK-NEXT:    [[TMP26:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[DOTPROMOTED]], i32 0
++; CHECK-NEXT:    [[TMP22:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[DOTPROMOTED]], i32 0
+ ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+ ; CHECK:       vector.body:
+ ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP26]], [[VECTOR_PH]] ], [ [[TMP30:%.*]], [[VECTOR_BODY]] ]
+-; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP31:%.*]], [[VECTOR_BODY]] ]
+-; CHECK-NEXT:    [[TMP27:%.*]] = trunc i32 [[INDEX]] to i8
+-; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = sub i8 [[CONV3]], [[TMP27]]
++; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP22]], [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[VECTOR_BODY]] ]
++; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP27:%.*]], [[VECTOR_BODY]] ]
++; CHECK-NEXT:    [[TMP23:%.*]] = trunc i32 [[INDEX]] to i8
++; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = sub i8 [[CONV3]], [[TMP23]]
+ ; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> undef, i8 [[OFFSET_IDX]], i32 0
+ ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> undef, <4 x i32> zeroinitializer
+ ; CHECK-NEXT:    [[INDUCTION:%.*]] = add <4 x i8> [[BROADCAST_SPLAT]], <i8 0, i8 -1, i8 -2, i8 -3>
+ ; CHECK-NEXT:    [[INDUCTION3:%.*]] = add <4 x i8> [[BROADCAST_SPLAT]], <i8 -4, i8 -5, i8 -6, i8 -7>
+-; CHECK-NEXT:    [[TMP28:%.*]] = add i8 [[OFFSET_IDX]], 0
+-; CHECK-NEXT:    [[TMP29:%.*]] = add i8 [[OFFSET_IDX]], -4
+-; CHECK-NEXT:    [[TMP30]] = add <4 x i32> [[VEC_PHI]], <i32 1, i32 1, i32 1, i32 1>
+-; CHECK-NEXT:    [[TMP31]] = add <4 x i32> [[VEC_PHI2]], <i32 1, i32 1, i32 1, i32 1>
+-; CHECK-NEXT:    [[TMP32:%.*]] = add i8 [[TMP28]], -1
+-; CHECK-NEXT:    [[TMP33:%.*]] = add i8 [[TMP29]], -1
+-; CHECK-NEXT:    [[TMP34:%.*]] = zext i8 [[TMP32]] to i32
+-; CHECK-NEXT:    [[TMP35:%.*]] = zext i8 [[TMP33]] to i32
++; CHECK-NEXT:    [[TMP24:%.*]] = add i8 [[OFFSET_IDX]], 0
++; CHECK-NEXT:    [[TMP25:%.*]] = add i8 [[OFFSET_IDX]], -4
++; CHECK-NEXT:    [[TMP26]] = add <4 x i32> [[VEC_PHI]], <i32 1, i32 1, i32 1, i32 1>
++; CHECK-NEXT:    [[TMP27]] = add <4 x i32> [[VEC_PHI2]], <i32 1, i32 1, i32 1, i32 1>
++; CHECK-NEXT:    [[TMP28:%.*]] = add i8 [[TMP24]], -1
++; CHECK-NEXT:    [[TMP29:%.*]] = add i8 [[TMP25]], -1
++; CHECK-NEXT:    [[TMP30:%.*]] = zext i8 [[TMP28]] to i32
++; CHECK-NEXT:    [[TMP31:%.*]] = zext i8 [[TMP29]] to i32
+ ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
+-; CHECK-NEXT:    [[TMP36:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+-; CHECK-NEXT:    br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
++; CHECK-NEXT:    [[TMP32:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
++; CHECK-NEXT:    br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
+ ; CHECK:       middle.block:
+-; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP31]], [[TMP30]]
++; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP27]], [[TMP26]]
+ ; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
+ ; CHECK-NEXT:    [[BIN_RDX4:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF]]
+ ; CHECK-NEXT:    [[RDX_SHUF5:%.*]] = shufflevector <4 x i32> [[BIN_RDX4]], <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
+ ; CHECK-NEXT:    [[BIN_RDX6:%.*]] = add <4 x i32> [[BIN_RDX4]], [[RDX_SHUF5]]
+-; CHECK-NEXT:    [[TMP37:%.*]] = extractelement <4 x i32> [[BIN_RDX6]], i32 0
+-; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP9]], [[N_VEC]]
++; CHECK-NEXT:    [[TMP33:%.*]] = extractelement <4 x i32> [[BIN_RDX6]], i32 0
++; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP7]], [[N_VEC]]
+ ; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_COND4_FOR_INC9_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
+ ; CHECK:       scalar.ph:
+ ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[CONV3]], [[FOR_BODY8_LR_PH]] ], [ [[CONV3]], [[VECTOR_SCEVCHECK]] ]
+-; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[DOTPROMOTED]], [[FOR_BODY8_LR_PH]] ], [ [[DOTPROMOTED]], [[VECTOR_SCEVCHECK]] ], [ [[TMP37]], [[MIDDLE_BLOCK]] ]
++; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[DOTPROMOTED]], [[FOR_BODY8_LR_PH]] ], [ [[DOTPROMOTED]], [[VECTOR_SCEVCHECK]] ], [ [[TMP33]], [[MIDDLE_BLOCK]] ]
+ ; CHECK-NEXT:    br label [[FOR_BODY8:%.*]]
+ ; CHECK:       for.body8:
+ ; CHECK-NEXT:    [[INC5:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY8]] ]
+@@ -118,7 +114,7 @@ define i32 @main() local_unnamed_addr #0 {
+ ; CHECK-NEXT:    [[CMP6:%.*]] = icmp ult i32 [[TMP2]], [[CONV5]]
+ ; CHECK-NEXT:    br i1 [[CMP6]], label [[FOR_BODY8]], label [[FOR_COND4_FOR_INC9_CRIT_EDGE]], !llvm.loop !2
+ ; CHECK:       for.cond4.for.inc9_crit_edge:
+-; CHECK-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY8]] ], [ [[TMP37]], [[MIDDLE_BLOCK]] ]
++; CHECK-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY8]] ], [ [[TMP33]], [[MIDDLE_BLOCK]] ]
+ ; CHECK-NEXT:    store i32 [[INC_LCSSA]], i32* getelementptr inbounds ([192 x [192 x i32]], [192 x [192 x i32]]* @a, i64 0, i64 0, i64 0), align 16
+ ; CHECK-NEXT:    br label [[FOR_INC9]]
+ ; CHECK:       for.inc9: