From a851a30192f18a9e4ea4677b24f4c88568b39e4a Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 21 Aug 2018 17:46:15 -0400 Subject: [PATCH 1/3] prep patch list for llvm 7.0.0 --- deps/llvm.mk | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/deps/llvm.mk b/deps/llvm.mk index e3c9f6c5823a1..58d01c1ce602f 100644 --- a/deps/llvm.mk +++ b/deps/llvm.mk @@ -415,8 +415,8 @@ $(eval $(call LLVM_PATCH,llvm-rL332694)) # remove for 7.0 endif $(eval $(call LLVM_PATCH,llvm-rL327898)) # remove for 7.0 $(eval $(call LLVM_PATCH,llvm-6.0-DISABLE_ABI_CHECKS)) -$(eval $(call LLVM_PATCH,llvm-OProfile-line-num)) -$(eval $(call LLVM_PATCH,llvm-D44892-Perf-integration)) +$(eval $(call LLVM_PATCH,llvm-OProfile-line-num)) # Remove for 7.0 +$(eval $(call LLVM_PATCH,llvm-D44892-Perf-integration)) # Remove for 7.0 $(eval $(call LLVM_PATCH,llvm-D49832-SCEVPred)) # Remove for 7.0 $(eval $(call LLVM_PATCH,llvm-rL323946-LSRTy)) # Remove for 7.0 $(eval $(call LLVM_PATCH,llvm-D50010-VNCoercion-ni)) @@ -425,6 +425,15 @@ $(eval $(call LLVM_PATCH,llvm-rL326967-aligned-load)) # remove for 7.0 ifeq ($(LLVM_VER_PATCH), 0) $(eval $(call LLVM_PATCH,llvm-windows-race)) endif +else ifeq ($(LLVM_VER_SHORT),7.0) +$(eval $(call LLVM_PATCH,llvm-D27629-AArch64-large_model_6.0.1)) +$(eval $(call LLVM_PATCH,llvm-D34078-vectorize-fdiv)) +$(eval $(call LLVM_PATCH,llvm-6.0-NVPTX-addrspaces)) # NVPTX +$(eval $(call LLVM_PATCH,llvm-6.0-D44650)) # mingw32 build fix +$(eval $(call LLVM_PATCH,llvm-D46460)) +$(eval $(call LLVM_PATCH,llvm-6.0-DISABLE_ABI_CHECKS)) +$(eval $(call LLVM_PATCH,llvm-D50010-VNCoercion-ni)) +$(eval $(call LLVM_PATCH,llvm-D50167-scev-umin)) endif # LLVM_VER # Independent to the llvm version add a JL prefix to the version map From f0dc0a3fca4094a9a012fa64d03adaacd137e39e Mon Sep 17 00:00:00 2001 From: Valentin Churavy Date: Tue, 21 Aug 2018 17:46:53 -0400 Subject: [PATCH 2/3] bump FORCE_ELF hack to < 8.0.0 --- src/codegen.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/codegen.cpp b/src/codegen.cpp index 4d00a02d96a11..bd978ba1abe4e 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -3,7 +3,7 @@ #include "llvm-version.h" #include "platform.h" #include "options.h" -#if defined(_OS_WINDOWS_) && JL_LLVM_VERSION < 70000 +#if defined(_OS_WINDOWS_) && JL_LLVM_VERSION < 80000 // trick llvm into skipping the generation of _chkstk calls // since it has some codegen issues associated with them: // (a) assumed to be within 32-bit offset From 12d147356e11b8d136731aafdf10f5091bca6a25 Mon Sep 17 00:00:00 2001 From: Keno Fischer Date: Wed, 22 Aug 2018 14:16:31 -0400 Subject: [PATCH 3/3] Add 7.0 versions of scev patches --- deps/llvm.mk | 4 +- .../llvm-7.0-D50010-VNCoercion-ni.patch | 93 + deps/patches/llvm-7.0-D50167-scev-umin.patch | 1861 +++++++++++++++++ 3 files changed, 1956 insertions(+), 2 deletions(-) create mode 100644 deps/patches/llvm-7.0-D50010-VNCoercion-ni.patch create mode 100644 deps/patches/llvm-7.0-D50167-scev-umin.patch diff --git a/deps/llvm.mk b/deps/llvm.mk index 58d01c1ce602f..35444c6dbba90 100644 --- a/deps/llvm.mk +++ b/deps/llvm.mk @@ -432,8 +432,8 @@ $(eval $(call LLVM_PATCH,llvm-6.0-NVPTX-addrspaces)) # NVPTX $(eval $(call LLVM_PATCH,llvm-6.0-D44650)) # mingw32 build fix $(eval $(call LLVM_PATCH,llvm-D46460)) $(eval $(call LLVM_PATCH,llvm-6.0-DISABLE_ABI_CHECKS)) -$(eval $(call LLVM_PATCH,llvm-D50010-VNCoercion-ni)) -$(eval $(call LLVM_PATCH,llvm-D50167-scev-umin)) +$(eval $(call LLVM_PATCH,llvm-7.0-D50010-VNCoercion-ni)) +$(eval $(call LLVM_PATCH,llvm-7.0-D50167-scev-umin)) endif # LLVM_VER # Independent to the llvm version add a JL prefix to the version map diff --git a/deps/patches/llvm-7.0-D50010-VNCoercion-ni.patch b/deps/patches/llvm-7.0-D50010-VNCoercion-ni.patch new file mode 100644 index 0000000000000..368eca1e1d3b9 --- /dev/null +++ b/deps/patches/llvm-7.0-D50010-VNCoercion-ni.patch @@ -0,0 +1,93 @@ +commit 847c686d09bd6171569f6997bdab12719ab6fe88 +Author: Keno Fischer +Date: Wed Aug 22 14:03:29 2018 -0400 + + [VNCoercion] Disallow coercion between different ni addrspaces + + Summary: + I'm not sure if it would be legal by the IR reference to introduce + an addrspacecast here, since the IR reference is a bit vague on + the exact semantics, but at least for our usage of it (and I + suspect for many other's usage) it is not. For us, addrspacecasts + between non-integral address spaces carry frontend information that the + optimizer cannot deduce afterwards in a generic way (though we + have frontend specific passes in our pipline that do propagate + these). In any case, I'm sure nobody is using it this way at + the moment, since it would have introduced inttoptrs, which + are definitely illegal. + + Fixes PR38375 + + Reviewers: sanjoy, reames, dberlin + + Subscribers: vchuravy, llvm-commits + + Differential Revision: https://reviews.llvm.org/D50010 + +diff --git a/lib/Transforms/Utils/VNCoercion.cpp b/lib/Transforms/Utils/VNCoercion.cpp +index 948d9bd5baa..fbd5b9bb3be 100644 +--- a/lib/Transforms/Utils/VNCoercion.cpp ++++ b/lib/Transforms/Utils/VNCoercion.cpp +@@ -20,7 +20,8 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy, + StoredVal->getType()->isStructTy() || StoredVal->getType()->isArrayTy()) + return false; + +- uint64_t StoreSize = DL.getTypeSizeInBits(StoredVal->getType()); ++ Type *StoredValTy = StoredVal->getType(); ++ uint64_t StoreSize = DL.getTypeSizeInBits(StoredValTy); + + // The store size must be byte-aligned to support future type casts. + if (llvm::alignTo(StoreSize, 8) != StoreSize) +@@ -30,10 +31,15 @@ bool canCoerceMustAliasedValueToLoad(Value *StoredVal, Type *LoadTy, + if (StoreSize < DL.getTypeSizeInBits(LoadTy)) + return false; + +- // Don't coerce non-integral pointers to integers or vice versa. +- if (DL.isNonIntegralPointerType(StoredVal->getType()) != +- DL.isNonIntegralPointerType(LoadTy)) ++ bool StoredNI = DL.isNonIntegralPointerType(StoredValTy); ++ bool LoadNI = DL.isNonIntegralPointerType(LoadTy); ++ if (StoredNI != LoadNI) { ++ return false; ++ } else if (StoredNI && LoadNI && ++ cast(StoredValTy)->getAddressSpace() != ++ cast(LoadTy)->getAddressSpace()) { + return false; ++ } + + return true; + } +diff --git a/test/Transforms/GVN/non-integral-pointers.ll b/test/Transforms/GVN/non-integral-pointers.ll +index 9ae4132231d..5217fc1a06a 100644 +--- a/test/Transforms/GVN/non-integral-pointers.ll ++++ b/test/Transforms/GVN/non-integral-pointers.ll +@@ -1,6 +1,6 @@ + ; RUN: opt -gvn -S < %s | FileCheck %s + +-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4" ++target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:4:5" + target triple = "x86_64-unknown-linux-gnu" + + define void @f0(i1 %alwaysFalse, i64 %val, i64* %loc) { +@@ -37,3 +37,21 @@ define i64 @f1(i1 %alwaysFalse, i8 addrspace(4)* %val, i8 addrspace(4)** %loc) { + alwaysTaken: + ret i64 42 + } ++ ++ define i8 addrspace(5)* @multini(i1 %alwaysFalse, i8 addrspace(4)* %val, i8 addrspace(4)** %loc) { ++ ; CHECK-LABEL: @multini( ++ ; CHECK-NOT: inttoptr ++ ; CHECK-NOT: ptrtoint ++ ; CHECK-NOT: addrspacecast ++ entry: ++ store i8 addrspace(4)* %val, i8 addrspace(4)** %loc ++ br i1 %alwaysFalse, label %neverTaken, label %alwaysTaken ++ ++ neverTaken: ++ %loc.bc = bitcast i8 addrspace(4)** %loc to i8 addrspace(5)** ++ %differentas = load i8 addrspace(5)*, i8 addrspace(5)** %loc.bc ++ ret i8 addrspace(5)* %differentas ++ ++ alwaysTaken: ++ ret i8 addrspace(5)* null ++ } diff --git a/deps/patches/llvm-7.0-D50167-scev-umin.patch b/deps/patches/llvm-7.0-D50167-scev-umin.patch new file mode 100644 index 0000000000000..81576a18b7633 --- /dev/null +++ b/deps/patches/llvm-7.0-D50167-scev-umin.patch @@ -0,0 +1,1861 @@ +commit d68d9140287ad41d11df3fe6038b79c2b8c96bbf +Author: Keno Fischer +Date: Sat Aug 11 05:44:38 2018 -0400 + + RFC: [SCEV] Add explicit representations of umin/smin + + Summary: + Currently we express umin as `~umax(~x, ~y)`. However, this becomes + a problem for operands in non-integral pointer spaces, because `~x` + is not something we can compute for `x` non-integral. However, since + comparisons are generally still allowed, we are actually able to + express `umin(x, y)` directly as long as we don't try to express is + as a umax. Support this by adding an explicit umin/smin representation + to SCEV. We do this by factoring the existing getUMax/getSMax functions + into a new function that does all four. The previous two functions + were largely identical, except that the SMax variant used `isKnownPredicate` + while the UMax variant used `isKnownViaNonRecursiveReasoning`. + + Trying to make the UMax variant also use `isKnownPredicate` yields to + an infinite recursion, while trying to make the `SMax` variant use + `isKnownViaNonRecursiveReasoning` causes + `Transforms/IndVarSimplify/backedge-on-min-max.ll` to fail. + + I would appreciate any insight into which predicate is correct here. + + Reviewers: reames, sanjoy, mkazantsev + + Subscribers: dmgreen, vchuravy, javed.absar, llvm-commits + + Differential Revision: https://reviews.llvm.org/D50167 + +diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h +index 89918e3c205..73e6cc36254 100644 +--- a/include/llvm/Analysis/ScalarEvolution.h ++++ b/include/llvm/Analysis/ScalarEvolution.h +@@ -582,6 +582,8 @@ public: + /// \p IndexExprs The expressions for the indices. + const SCEV *getGEPExpr(GEPOperator *GEP, + const SmallVectorImpl &IndexExprs); ++ const SCEV *getUSMinMaxExpr(unsigned Kind, ++ SmallVectorImpl &Operands); + const SCEV *getSMaxExpr(const SCEV *LHS, const SCEV *RHS); + const SCEV *getSMaxExpr(SmallVectorImpl &Operands); + const SCEV *getUMaxExpr(const SCEV *LHS, const SCEV *RHS); +diff --git a/include/llvm/Analysis/ScalarEvolutionExpander.h b/include/llvm/Analysis/ScalarEvolutionExpander.h +index 58d42680d6b..57d658b157d 100644 +--- a/include/llvm/Analysis/ScalarEvolutionExpander.h ++++ b/include/llvm/Analysis/ScalarEvolutionExpander.h +@@ -368,6 +368,10 @@ namespace llvm { + + Value *visitUMaxExpr(const SCEVUMaxExpr *S); + ++ Value *visitSMinExpr(const SCEVSMinExpr *S); ++ ++ Value *visitUMinExpr(const SCEVUMinExpr *S); ++ + Value *visitUnknown(const SCEVUnknown *S) { + return S->getValue(); + } +diff --git a/include/llvm/Analysis/ScalarEvolutionExpressions.h b/include/llvm/Analysis/ScalarEvolutionExpressions.h +index 42e76094eb2..a4fc607b0c3 100644 +--- a/include/llvm/Analysis/ScalarEvolutionExpressions.h ++++ b/include/llvm/Analysis/ScalarEvolutionExpressions.h +@@ -36,33 +36,42 @@ class ConstantRange; + class Loop; + class Type; + +- enum SCEVTypes { +- // These should be ordered in terms of increasing complexity to make the +- // folders simpler. +- scConstant, scTruncate, scZeroExtend, scSignExtend, scAddExpr, scMulExpr, +- scUDivExpr, scAddRecExpr, scUMaxExpr, scSMaxExpr, +- scUnknown, scCouldNotCompute +- }; +- +- /// This class represents a constant integer value. +- class SCEVConstant : public SCEV { +- friend class ScalarEvolution; +- +- ConstantInt *V; +- +- SCEVConstant(const FoldingSetNodeIDRef ID, ConstantInt *v) : +- SCEV(ID, scConstant), V(v) {} +- +- public: +- ConstantInt *getValue() const { return V; } +- const APInt &getAPInt() const { return getValue()->getValue(); } +- +- Type *getType() const { return V->getType(); } +- +- /// Methods for support type inquiry through isa, cast, and dyn_cast: +- static bool classof(const SCEV *S) { +- return S->getSCEVType() == scConstant; +- } ++enum SCEVTypes { ++ // These should be ordered in terms of increasing complexity to make the ++ // folders simpler. ++ scConstant, ++ scTruncate, ++ scZeroExtend, ++ scSignExtend, ++ scAddExpr, ++ scMulExpr, ++ scUDivExpr, ++ scAddRecExpr, ++ scUMaxExpr, ++ scSMaxExpr, ++ scUMinExpr, ++ scSMinExpr, ++ scUnknown, ++ scCouldNotCompute ++}; ++ ++/// This class represents a constant integer value. ++class SCEVConstant : public SCEV { ++ friend class ScalarEvolution; ++ ++ ConstantInt *V; ++ ++ SCEVConstant(const FoldingSetNodeIDRef ID, ConstantInt *v) ++ : SCEV(ID, scConstant), V(v) {} ++ ++public: ++ ConstantInt *getValue() const { return V; } ++ const APInt &getAPInt() const { return getValue()->getValue(); } ++ ++ Type *getType() const { return V->getType(); } ++ ++ /// Methods for support type inquiry through isa, cast, and dyn_cast: ++ static bool classof(const SCEV *S) { return S->getSCEVType() == scConstant; } + }; + + /// This is the base class for unary cast operator classes. +@@ -183,10 +192,9 @@ class Type; + + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const SCEV *S) { +- return S->getSCEVType() == scAddExpr || +- S->getSCEVType() == scMulExpr || +- S->getSCEVType() == scSMaxExpr || +- S->getSCEVType() == scUMaxExpr || ++ return S->getSCEVType() == scAddExpr || S->getSCEVType() == scMulExpr || ++ S->getSCEVType() == scSMaxExpr || S->getSCEVType() == scUMaxExpr || ++ S->getSCEVType() == scSMinExpr || S->getSCEVType() == scUMinExpr || + S->getSCEVType() == scAddRecExpr; + } + }; +@@ -201,10 +209,9 @@ class Type; + public: + /// Methods for support type inquiry through isa, cast, and dyn_cast: + static bool classof(const SCEV *S) { +- return S->getSCEVType() == scAddExpr || +- S->getSCEVType() == scMulExpr || +- S->getSCEVType() == scSMaxExpr || +- S->getSCEVType() == scUMaxExpr; ++ return S->getSCEVType() == scAddExpr || S->getSCEVType() == scMulExpr || ++ S->getSCEVType() == scSMaxExpr || S->getSCEVType() == scUMaxExpr || ++ S->getSCEVType() == scSMinExpr || S->getSCEVType() == scUMinExpr; + } + + /// Set flags for a non-recurrence without clearing previously set flags. +@@ -394,6 +401,40 @@ class Type; + } + }; + ++ /// This class represents a signed minimum selection. ++ class SCEVSMinExpr : public SCEVCommutativeExpr { ++ friend class ScalarEvolution; ++ ++ SCEVSMinExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N) ++ : SCEVCommutativeExpr(ID, scSMinExpr, O, N) { ++ // Min never overflows. ++ setNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW)); ++ } ++ ++ public: ++ /// Methods for support type inquiry through isa, cast, and dyn_cast: ++ static bool classof(const SCEV *S) { ++ return S->getSCEVType() == scSMinExpr; ++ } ++ }; ++ ++ /// This class represents an unsigned minimum selection. ++ class SCEVUMinExpr : public SCEVCommutativeExpr { ++ friend class ScalarEvolution; ++ ++ SCEVUMinExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N) ++ : SCEVCommutativeExpr(ID, scUMinExpr, O, N) { ++ // Min never overflows. ++ setNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW)); ++ } ++ ++ public: ++ /// Methods for support type inquiry through isa, cast, and dyn_cast: ++ static bool classof(const SCEV *S) { ++ return S->getSCEVType() == scUMinExpr; ++ } ++ }; ++ + /// This means that we are dealing with an entirely unknown SCEV + /// value, and only represent it as its LLVM Value. This is the + /// "bottom" value for the analysis. +@@ -466,6 +507,10 @@ class Type; + return ((SC*)this)->visitSMaxExpr((const SCEVSMaxExpr*)S); + case scUMaxExpr: + return ((SC*)this)->visitUMaxExpr((const SCEVUMaxExpr*)S); ++ case scSMinExpr: ++ return ((SC *)this)->visitSMinExpr((const SCEVSMinExpr *)S); ++ case scUMinExpr: ++ return ((SC *)this)->visitUMinExpr((const SCEVUMinExpr *)S); + case scUnknown: + return ((SC*)this)->visitUnknown((const SCEVUnknown*)S); + case scCouldNotCompute: +@@ -519,6 +564,8 @@ class Type; + case scMulExpr: + case scSMaxExpr: + case scUMaxExpr: ++ case scSMinExpr: ++ case scUMinExpr: + case scAddRecExpr: + for (const auto *Op : cast(S)->operands()) + push(Op); +@@ -681,6 +728,26 @@ class Type; + return !Changed ? Expr : SE.getUMaxExpr(Operands); + } + ++ const SCEV *visitSMinExpr(const SCEVSMinExpr *Expr) { ++ SmallVector Operands; ++ bool Changed = false; ++ for (auto *Op : Expr->operands()) { ++ Operands.push_back(((SC *)this)->visit(Op)); ++ Changed |= Op != Operands.back(); ++ } ++ return !Changed ? Expr : SE.getSMinExpr(Operands); ++ } ++ ++ const SCEV *visitUMinExpr(const SCEVUMinExpr *Expr) { ++ SmallVector Operands; ++ bool Changed = false; ++ for (auto *Op : Expr->operands()) { ++ Operands.push_back(((SC *)this)->visit(Op)); ++ Changed |= Op != Operands.back(); ++ } ++ return !Changed ? Expr : SE.getUMinExpr(Operands); ++ } ++ + const SCEV *visitUnknown(const SCEVUnknown *Expr) { + return Expr; + } +diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp +index 0e715b8814f..19dab859a00 100644 +--- a/lib/Analysis/ScalarEvolution.cpp ++++ b/lib/Analysis/ScalarEvolution.cpp +@@ -267,7 +267,9 @@ void SCEV::print(raw_ostream &OS) const { + case scAddExpr: + case scMulExpr: + case scUMaxExpr: +- case scSMaxExpr: { ++ case scSMaxExpr: ++ case scUMinExpr: ++ case scSMinExpr: { + const SCEVNAryExpr *NAry = cast(this); + const char *OpStr = nullptr; + switch (NAry->getSCEVType()) { +@@ -275,6 +277,12 @@ void SCEV::print(raw_ostream &OS) const { + case scMulExpr: OpStr = " * "; break; + case scUMaxExpr: OpStr = " umax "; break; + case scSMaxExpr: OpStr = " smax "; break; ++ case scUMinExpr: ++ OpStr = " umin "; ++ break; ++ case scSMinExpr: ++ OpStr = " smin "; ++ break; + } + OS << "("; + for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); +@@ -343,6 +351,8 @@ Type *SCEV::getType() const { + case scMulExpr: + case scUMaxExpr: + case scSMaxExpr: ++ case scUMinExpr: ++ case scSMinExpr: + return cast(this)->getType(); + case scAddExpr: + return cast(this)->getType(); +@@ -711,7 +721,9 @@ static int CompareSCEVComplexity( + case scAddExpr: + case scMulExpr: + case scSMaxExpr: +- case scUMaxExpr: { ++ case scUMaxExpr: ++ case scSMinExpr: ++ case scUMinExpr: { + const SCEVNAryExpr *LC = cast(LHS); + const SCEVNAryExpr *RC = cast(RHS); + +@@ -915,6 +927,8 @@ public: + void visitUDivExpr(const SCEVUDivExpr *Numerator) {} + void visitSMaxExpr(const SCEVSMaxExpr *Numerator) {} + void visitUMaxExpr(const SCEVUMaxExpr *Numerator) {} ++ void visitSMinExpr(const SCEVSMinExpr *Numerator) {} ++ void visitUMinExpr(const SCEVUMinExpr *Numerator) {} + void visitUnknown(const SCEVUnknown *Numerator) {} + void visitCouldNotCompute(const SCEVCouldNotCompute *Numerator) {} + +@@ -3488,23 +3502,21 @@ ScalarEvolution::getGEPExpr(GEPOperator *GEP, + return getAddExpr(BaseExpr, TotalOffset, Wrap); + } + +-const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS, +- const SCEV *RHS) { +- SmallVector Ops = {LHS, RHS}; +- return getSMaxExpr(Ops); +-} +- + const SCEV * +-ScalarEvolution::getSMaxExpr(SmallVectorImpl &Ops) { +- assert(!Ops.empty() && "Cannot get empty smax!"); ++ScalarEvolution::getUSMinMaxExpr(unsigned Kind, ++ SmallVectorImpl &Ops) { ++ assert(!Ops.empty() && "Cannot get empty (u|s)(min|max)!"); + if (Ops.size() == 1) return Ops[0]; + #ifndef NDEBUG + Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); + for (unsigned i = 1, e = Ops.size(); i != e; ++i) + assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && +- "SCEVSMaxExpr operand types don't match!"); ++ "Operand types don't match!"); + #endif + ++ bool IsSigned = Kind == scSMaxExpr || Kind == scSMinExpr; ++ bool IsMax = Kind == scSMaxExpr || Kind == scUMaxExpr; ++ + // Sort by complexity, this groups all similar expression types together. + GroupByComplexity(Ops, &LI, DT); + +@@ -3513,61 +3525,91 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl &Ops) { + if (const SCEVConstant *LHSC = dyn_cast(Ops[0])) { + ++Idx; + assert(Idx < Ops.size()); ++ auto &FoldOp = ++ Kind == scSMaxExpr ++ ? APIntOps::smax ++ : Kind == scSMinExpr ++ ? APIntOps::smin ++ : Kind == scUMaxExpr ? APIntOps::umax : APIntOps::umin; + while (const SCEVConstant *RHSC = dyn_cast(Ops[Idx])) { + // We found two constants, fold them together! + ConstantInt *Fold = ConstantInt::get( +- getContext(), APIntOps::smax(LHSC->getAPInt(), RHSC->getAPInt())); ++ getContext(), FoldOp(LHSC->getAPInt(), RHSC->getAPInt())); + Ops[0] = getConstant(Fold); + Ops.erase(Ops.begin()+1); // Erase the folded element + if (Ops.size() == 1) return Ops[0]; + LHSC = cast(Ops[0]); + } + +- // If we are left with a constant minimum-int, strip it off. +- if (cast(Ops[0])->getValue()->isMinValue(true)) { +- Ops.erase(Ops.begin()); +- --Idx; +- } else if (cast(Ops[0])->getValue()->isMaxValue(true)) { +- // If we have an smax with a constant maximum-int, it will always be +- // maximum-int. +- return Ops[0]; ++ if (IsMax) { ++ // If we are left with a constant minimum-int, strip it off. ++ if (cast(Ops[0])->getValue()->isMinValue(IsSigned)) { ++ Ops.erase(Ops.begin()); ++ --Idx; ++ } else if (cast(Ops[0])->getValue()->isMaxValue(IsSigned)) { ++ // If we have an smax with a constant maximum-int, it will always be ++ // maximum-int. ++ return Ops[0]; ++ } ++ } else { ++ // If we are left with a constant maximum-int, strip it off. ++ if (cast(Ops[0])->getValue()->isMaxValue(IsSigned)) { ++ Ops.erase(Ops.begin()); ++ --Idx; ++ } else if (cast(Ops[0])->getValue()->isMinValue(IsSigned)) { ++ // If we have an smax with a constant minimum-int, it will always be ++ // maximum-int. ++ return Ops[0]; ++ } + } + + if (Ops.size() == 1) return Ops[0]; + } + +- // Find the first SMax +- while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scSMaxExpr) ++ // Find the first operation of the same kind ++ while (Idx < Ops.size() && Ops[Idx]->getSCEVType() != Kind) + ++Idx; + + // Check to see if one of the operands is an SMax. If so, expand its operands + // onto our operand list, and recurse to simplify. + if (Idx < Ops.size()) { +- bool DeletedSMax = false; +- while (const SCEVSMaxExpr *SMax = dyn_cast(Ops[Idx])) { ++ bool DeletedAny = false; ++ while (Ops[Idx]->getSCEVType() == Kind) { ++ const SCEVCommutativeExpr *SCE = cast(Ops[Idx]); + Ops.erase(Ops.begin()+Idx); +- Ops.append(SMax->op_begin(), SMax->op_end()); +- DeletedSMax = true; ++ Ops.append(SCE->op_begin(), SCE->op_end()); ++ DeletedAny = true; + } + +- if (DeletedSMax) +- return getSMaxExpr(Ops); ++ if (DeletedAny) ++ return getUSMinMaxExpr(Kind, Ops); + } + + // Okay, check to see if the same value occurs in the operand list twice. If + // so, delete one. Since we sorted the list, these values are required to + // be adjacent. +- for (unsigned i = 0, e = Ops.size()-1; i != e; ++i) +- // X smax Y smax Y --> X smax Y +- // X smax Y --> X, if X is always greater than Y +- if (Ops[i] == Ops[i+1] || +- isKnownPredicate(ICmpInst::ICMP_SGE, Ops[i], Ops[i+1])) { +- Ops.erase(Ops.begin()+i+1, Ops.begin()+i+2); +- --i; --e; +- } else if (isKnownPredicate(ICmpInst::ICMP_SLE, Ops[i], Ops[i+1])) { +- Ops.erase(Ops.begin()+i, Ops.begin()+i+1); +- --i; --e; ++ llvm::CmpInst::Predicate GEPred = ++ IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; ++ llvm::CmpInst::Predicate LEPred = ++ IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; ++ llvm::CmpInst::Predicate FirstPred = IsMax ? GEPred : LEPred; ++ llvm::CmpInst::Predicate SecondPred = IsMax ? LEPred : GEPred; ++ for (unsigned i = 0, e = Ops.size() - 1; i != e; ++i) { ++ if (Ops[i] == Ops[i + 1] || ++ isKnownViaNonRecursiveReasoning(FirstPred, Ops[i], Ops[i + 1])) { ++ // X op Y op Y --> X op Y ++ // X op Y --> X, if we know X, Y are ordered appropriately ++ Ops.erase(Ops.begin() + i + 1, Ops.begin() + i + 2); ++ --i; ++ --e; ++ } else if (isKnownViaNonRecursiveReasoning(SecondPred, Ops[i], ++ Ops[i + 1])) { ++ // X op Y --> Y, if we know X, Y are ordered appropriately ++ Ops.erase(Ops.begin() + i, Ops.begin() + i + 1); ++ --i; ++ --e; + } ++ } + + if (Ops.size() == 1) return Ops[0]; + +@@ -3576,121 +3618,51 @@ ScalarEvolution::getSMaxExpr(SmallVectorImpl &Ops) { + // Okay, it looks like we really DO need an smax expr. Check to see if we + // already have one, otherwise create a new one. + FoldingSetNodeID ID; +- ID.AddInteger(scSMaxExpr); ++ ID.AddInteger(Kind); + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + ID.AddPointer(Ops[i]); + void *IP = nullptr; + if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; + const SCEV **O = SCEVAllocator.Allocate(Ops.size()); + std::uninitialized_copy(Ops.begin(), Ops.end(), O); +- SCEV *S = new (SCEVAllocator) SCEVSMaxExpr(ID.Intern(SCEVAllocator), +- O, Ops.size()); ++ SCEV *S = nullptr; ++ ++ if (Kind == scSMaxExpr) { ++ S = new (SCEVAllocator) ++ SCEVSMaxExpr(ID.Intern(SCEVAllocator), O, Ops.size()); ++ } else if (Kind == scUMaxExpr) { ++ S = new (SCEVAllocator) ++ SCEVUMaxExpr(ID.Intern(SCEVAllocator), O, Ops.size()); ++ } else if (Kind == scSMinExpr) { ++ S = new (SCEVAllocator) ++ SCEVSMinExpr(ID.Intern(SCEVAllocator), O, Ops.size()); ++ } else { ++ assert(Kind == scUMinExpr); ++ S = new (SCEVAllocator) ++ SCEVUMinExpr(ID.Intern(SCEVAllocator), O, Ops.size()); ++ } ++ + UniqueSCEVs.InsertNode(S, IP); + addToLoopUseLists(S); + return S; + } + +-const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS, +- const SCEV *RHS) { ++const SCEV *ScalarEvolution::getSMaxExpr(const SCEV *LHS, const SCEV *RHS) { + SmallVector Ops = {LHS, RHS}; +- return getUMaxExpr(Ops); ++ return getSMaxExpr(Ops); + } + +-const SCEV * +-ScalarEvolution::getUMaxExpr(SmallVectorImpl &Ops) { +- assert(!Ops.empty() && "Cannot get empty umax!"); +- if (Ops.size() == 1) return Ops[0]; +-#ifndef NDEBUG +- Type *ETy = getEffectiveSCEVType(Ops[0]->getType()); +- for (unsigned i = 1, e = Ops.size(); i != e; ++i) +- assert(getEffectiveSCEVType(Ops[i]->getType()) == ETy && +- "SCEVUMaxExpr operand types don't match!"); +-#endif +- +- // Sort by complexity, this groups all similar expression types together. +- GroupByComplexity(Ops, &LI, DT); +- +- // If there are any constants, fold them together. +- unsigned Idx = 0; +- if (const SCEVConstant *LHSC = dyn_cast(Ops[0])) { +- ++Idx; +- assert(Idx < Ops.size()); +- while (const SCEVConstant *RHSC = dyn_cast(Ops[Idx])) { +- // We found two constants, fold them together! +- ConstantInt *Fold = ConstantInt::get( +- getContext(), APIntOps::umax(LHSC->getAPInt(), RHSC->getAPInt())); +- Ops[0] = getConstant(Fold); +- Ops.erase(Ops.begin()+1); // Erase the folded element +- if (Ops.size() == 1) return Ops[0]; +- LHSC = cast(Ops[0]); +- } +- +- // If we are left with a constant minimum-int, strip it off. +- if (cast(Ops[0])->getValue()->isMinValue(false)) { +- Ops.erase(Ops.begin()); +- --Idx; +- } else if (cast(Ops[0])->getValue()->isMaxValue(false)) { +- // If we have an umax with a constant maximum-int, it will always be +- // maximum-int. +- return Ops[0]; +- } +- +- if (Ops.size() == 1) return Ops[0]; +- } +- +- // Find the first UMax +- while (Idx < Ops.size() && Ops[Idx]->getSCEVType() < scUMaxExpr) +- ++Idx; +- +- // Check to see if one of the operands is a UMax. If so, expand its operands +- // onto our operand list, and recurse to simplify. +- if (Idx < Ops.size()) { +- bool DeletedUMax = false; +- while (const SCEVUMaxExpr *UMax = dyn_cast(Ops[Idx])) { +- Ops.erase(Ops.begin()+Idx); +- Ops.append(UMax->op_begin(), UMax->op_end()); +- DeletedUMax = true; +- } +- +- if (DeletedUMax) +- return getUMaxExpr(Ops); +- } +- +- // Okay, check to see if the same value occurs in the operand list twice. If +- // so, delete one. Since we sorted the list, these values are required to +- // be adjacent. +- for (unsigned i = 0, e = Ops.size()-1; i != e; ++i) +- // X umax Y umax Y --> X umax Y +- // X umax Y --> X, if X is always greater than Y +- if (Ops[i] == Ops[i + 1] || isKnownViaNonRecursiveReasoning( +- ICmpInst::ICMP_UGE, Ops[i], Ops[i + 1])) { +- Ops.erase(Ops.begin() + i + 1, Ops.begin() + i + 2); +- --i; --e; +- } else if (isKnownViaNonRecursiveReasoning(ICmpInst::ICMP_ULE, Ops[i], +- Ops[i + 1])) { +- Ops.erase(Ops.begin() + i, Ops.begin() + i + 1); +- --i; --e; +- } +- +- if (Ops.size() == 1) return Ops[0]; ++const SCEV *ScalarEvolution::getSMaxExpr(SmallVectorImpl &Ops) { ++ return getUSMinMaxExpr(scSMaxExpr, Ops); ++} + +- assert(!Ops.empty() && "Reduced umax down to nothing!"); ++const SCEV *ScalarEvolution::getUMaxExpr(const SCEV *LHS, const SCEV *RHS) { ++ SmallVector Ops = {LHS, RHS}; ++ return getUMaxExpr(Ops); ++} + +- // Okay, it looks like we really DO need a umax expr. Check to see if we +- // already have one, otherwise create a new one. +- FoldingSetNodeID ID; +- ID.AddInteger(scUMaxExpr); +- for (unsigned i = 0, e = Ops.size(); i != e; ++i) +- ID.AddPointer(Ops[i]); +- void *IP = nullptr; +- if (const SCEV *S = UniqueSCEVs.FindNodeOrInsertPos(ID, IP)) return S; +- const SCEV **O = SCEVAllocator.Allocate(Ops.size()); +- std::uninitialized_copy(Ops.begin(), Ops.end(), O); +- SCEV *S = new (SCEVAllocator) SCEVUMaxExpr(ID.Intern(SCEVAllocator), +- O, Ops.size()); +- UniqueSCEVs.InsertNode(S, IP); +- addToLoopUseLists(S); +- return S; ++const SCEV *ScalarEvolution::getUMaxExpr(SmallVectorImpl &Ops) { ++ return getUSMinMaxExpr(scUMaxExpr, Ops); + } + + const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS, +@@ -3700,11 +3672,7 @@ const SCEV *ScalarEvolution::getSMinExpr(const SCEV *LHS, + } + + const SCEV *ScalarEvolution::getSMinExpr(SmallVectorImpl &Ops) { +- // ~smax(~x, ~y, ~z) == smin(x, y, z). +- SmallVector NotOps; +- for (auto *S : Ops) +- NotOps.push_back(getNotSCEV(S)); +- return getNotSCEV(getSMaxExpr(NotOps)); ++ return getUSMinMaxExpr(scSMinExpr, Ops); + } + + const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS, +@@ -3714,16 +3682,7 @@ const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS, + } + + const SCEV *ScalarEvolution::getUMinExpr(SmallVectorImpl &Ops) { +- assert(!Ops.empty() && "At least one operand must be!"); +- // Trivial case. +- if (Ops.size() == 1) +- return Ops[0]; +- +- // ~umax(~x, ~y, ~z) == umin(x, y, z). +- SmallVector NotOps; +- for (auto *S : Ops) +- NotOps.push_back(getNotSCEV(S)); +- return getNotSCEV(getUMaxExpr(NotOps)); ++ return getUSMinMaxExpr(scUMinExpr, Ops); + } + + const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) { +@@ -5191,6 +5150,8 @@ static bool IsAvailableOnEntry(const Loop *L, DominatorTree &DT, const SCEV *S, + switch (S->getSCEVType()) { + case scConstant: case scTruncate: case scZeroExtend: case scSignExtend: + case scAddExpr: case scMulExpr: case scUMaxExpr: case scSMaxExpr: ++ case scUMinExpr: ++ case scSMinExpr: + // These expressions are available if their operand(s) is/are. + return true; + +@@ -8070,7 +8031,9 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) { + } + case scSMaxExpr: + case scUMaxExpr: +- break; // TODO: smax, umax. ++ case scSMinExpr: ++ case scUMinExpr: ++ break; // TODO: smax, umax, smin, umax. + } + return nullptr; + } +@@ -8200,6 +8163,10 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) { + return getSMaxExpr(NewOps); + if (isa(Comm)) + return getUMaxExpr(NewOps); ++ if (isa(Comm)) ++ return getSMinExpr(NewOps); ++ if (isa(Comm)) ++ return getUMinExpr(NewOps); + llvm_unreachable("Unknown commutative SCEV type!"); + } + } +@@ -9859,26 +9826,28 @@ static const SCEV *MatchNotExpr(const SCEV *Expr) { + return AddRHS->getOperand(1); + } + +-/// Is MaybeMaxExpr an SMax or UMax of Candidate and some other values? +-template +-static bool IsMaxConsistingOf(const SCEV *MaybeMaxExpr, +- const SCEV *Candidate) { +- const MaxExprType *MaxExpr = dyn_cast(MaybeMaxExpr); +- if (!MaxExpr) return false; ++/// Is MaybeMinMaxExpr an (U|S)(Min|Max) of Candidate and some other values? ++template ++static bool IsMinMaxConsistingOf(const SCEV *MaybeMinMaxExpr, ++ const SCEV *Candidate) { ++ const MinMaxExprType *MinMaxExpr = dyn_cast(MaybeMinMaxExpr); ++ if (!MinMaxExpr) ++ return false; + +- return find(MaxExpr->operands(), Candidate) != MaxExpr->op_end(); ++ return find(MinMaxExpr->operands(), Candidate) != MinMaxExpr->op_end(); + } + +-/// Is MaybeMinExpr an SMin or UMin of Candidate and some other values? +-template +-static bool IsMinConsistingOf(ScalarEvolution &SE, +- const SCEV *MaybeMinExpr, +- const SCEV *Candidate) { +- const SCEV *MaybeMaxExpr = MatchNotExpr(MaybeMinExpr); +- if (!MaybeMaxExpr) ++/// Is MaybeMinMaxExpr an ~(U|S)(Min|Max) of ~Candidate and some other values? ++template ++static bool IsMinMaxConsistingOfByNegation(ScalarEvolution &SE, ++ const SCEV *MaybeMinMaxExpr, ++ const SCEV *Candidate) { ++ const SCEV *MinMaxExpr = MatchNotExpr(MaybeMinMaxExpr); ++ if (!MinMaxExpr) + return false; + +- return IsMaxConsistingOf(MaybeMaxExpr, SE.getNotSCEV(Candidate)); ++ return IsMinMaxConsistingOf(MinMaxExpr, ++ SE.getNotSCEV(Candidate)); + } + + static bool IsKnownPredicateViaAddRecStart(ScalarEvolution &SE, +@@ -9927,20 +9896,24 @@ static bool IsKnownPredicateViaMinOrMax(ScalarEvolution &SE, + LLVM_FALLTHROUGH; + case ICmpInst::ICMP_SLE: + return +- // min(A, ...) <= A +- IsMinConsistingOf(SE, LHS, RHS) || +- // A <= max(A, ...) +- IsMaxConsistingOf(RHS, LHS); ++ // min(A, ...) <= A ++ IsMinMaxConsistingOfByNegation(SE, LHS, RHS) || ++ IsMinMaxConsistingOf(LHS, RHS) || ++ // A <= max(A, ...) ++ IsMinMaxConsistingOfByNegation(SE, RHS, LHS) || ++ IsMinMaxConsistingOf(RHS, LHS); + + case ICmpInst::ICMP_UGE: + std::swap(LHS, RHS); + LLVM_FALLTHROUGH; + case ICmpInst::ICMP_ULE: + return +- // min(A, ...) <= A +- IsMinConsistingOf(SE, LHS, RHS) || +- // A <= max(A, ...) +- IsMaxConsistingOf(RHS, LHS); ++ // min(A, ...) <= A ++ IsMinMaxConsistingOfByNegation(SE, LHS, RHS) || ++ IsMinMaxConsistingOf(LHS, RHS) || ++ // A <= max(A, ...) ++ IsMinMaxConsistingOfByNegation(SE, RHS, LHS) || ++ IsMinMaxConsistingOf(RHS, LHS); + } + + llvm_unreachable("covered switch fell through?!"); +@@ -11451,7 +11424,9 @@ ScalarEvolution::computeLoopDisposition(const SCEV *S, const Loop *L) { + case scAddExpr: + case scMulExpr: + case scUMaxExpr: +- case scSMaxExpr: { ++ case scSMaxExpr: ++ case scUMinExpr: ++ case scSMinExpr: { + bool HasVarying = false; + for (auto *Op : cast(S)->operands()) { + LoopDisposition D = getLoopDisposition(Op, L); +@@ -11538,7 +11513,9 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) { + case scAddExpr: + case scMulExpr: + case scUMaxExpr: +- case scSMaxExpr: { ++ case scSMaxExpr: ++ case scUMinExpr: ++ case scSMinExpr: { + const SCEVNAryExpr *NAry = cast(S); + bool Proper = true; + for (const SCEV *NAryOp : NAry->operands()) { +diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp +index 8f89389c4b5..07b8156f61b 100644 +--- a/lib/Analysis/ScalarEvolutionExpander.cpp ++++ b/lib/Analysis/ScalarEvolutionExpander.cpp +@@ -1634,14 +1634,15 @@ Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) { + for (int i = S->getNumOperands()-2; i >= 0; --i) { + // In the case of mixed integer and pointer types, do the + // rest of the comparisons as integer. +- if (S->getOperand(i)->getType() != Ty) { ++ Type *OpTy = S->getOperand(i)->getType(); ++ if (OpTy->isIntegerTy() != Ty->isIntegerTy()) { + Ty = SE.getEffectiveSCEVType(Ty); + LHS = InsertNoopCastOfTo(LHS, Ty); + } + Value *RHS = expandCodeFor(S->getOperand(i), Ty); + Value *ICmp = Builder.CreateICmpSGT(LHS, RHS); + rememberInstruction(ICmp); +- Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax"); ++ Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smin"); + rememberInstruction(Sel); + LHS = Sel; + } +@@ -1658,13 +1659,64 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { + for (int i = S->getNumOperands()-2; i >= 0; --i) { + // In the case of mixed integer and pointer types, do the + // rest of the comparisons as integer. +- if (S->getOperand(i)->getType() != Ty) { ++ Type *OpTy = S->getOperand(i)->getType(); ++ if (OpTy->isIntegerTy() != Ty->isIntegerTy()) { + Ty = SE.getEffectiveSCEVType(Ty); + LHS = InsertNoopCastOfTo(LHS, Ty); + } + Value *RHS = expandCodeFor(S->getOperand(i), Ty); + Value *ICmp = Builder.CreateICmpUGT(LHS, RHS); + rememberInstruction(ICmp); ++ Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umin"); ++ rememberInstruction(Sel); ++ LHS = Sel; ++ } ++ // In the case of mixed integer and pointer types, cast the ++ // final result back to the pointer type. ++ if (LHS->getType() != S->getType()) ++ LHS = InsertNoopCastOfTo(LHS, S->getType()); ++ return LHS; ++} ++ ++Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) { ++ Value *LHS = expand(S->getOperand(S->getNumOperands() - 1)); ++ Type *Ty = LHS->getType(); ++ for (int i = S->getNumOperands() - 2; i >= 0; --i) { ++ // In the case of mixed integer and pointer types, do the ++ // rest of the comparisons as integer. ++ Type *OpTy = S->getOperand(i)->getType(); ++ if (OpTy->isIntegerTy() != Ty->isIntegerTy()) { ++ Ty = SE.getEffectiveSCEVType(Ty); ++ LHS = InsertNoopCastOfTo(LHS, Ty); ++ } ++ Value *RHS = expandCodeFor(S->getOperand(i), Ty); ++ Value *ICmp = Builder.CreateICmpSLT(LHS, RHS); ++ rememberInstruction(ICmp); ++ Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax"); ++ rememberInstruction(Sel); ++ LHS = Sel; ++ } ++ // In the case of mixed integer and pointer types, cast the ++ // final result back to the pointer type. ++ if (LHS->getType() != S->getType()) ++ LHS = InsertNoopCastOfTo(LHS, S->getType()); ++ return LHS; ++} ++ ++Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) { ++ Value *LHS = expand(S->getOperand(S->getNumOperands() - 1)); ++ Type *Ty = LHS->getType(); ++ for (int i = S->getNumOperands() - 2; i >= 0; --i) { ++ // In the case of mixed integer and pointer types, do the ++ // rest of the comparisons as integer. ++ Type *OpTy = S->getOperand(i)->getType(); ++ if (OpTy->isIntegerTy() != Ty->isIntegerTy()) { ++ Ty = SE.getEffectiveSCEVType(Ty); ++ LHS = InsertNoopCastOfTo(LHS, Ty); ++ } ++ Value *RHS = expandCodeFor(S->getOperand(i), Ty); ++ Value *ICmp = Builder.CreateICmpULT(LHS, RHS); ++ rememberInstruction(ICmp); + Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax"); + rememberInstruction(Sel); + LHS = Sel; +diff --git a/test/Analysis/LoopAccessAnalysis/memcheck-ni.ll b/test/Analysis/LoopAccessAnalysis/memcheck-ni.ll +new file mode 100644 +index 00000000000..a08632f38d1 +--- /dev/null ++++ b/test/Analysis/LoopAccessAnalysis/memcheck-ni.ll +@@ -0,0 +1,50 @@ ++; RUN: opt -loop-versioning -S < %s | FileCheck %s ++ ++; NB: addrspaces 10-13 are non-integral ++target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13" ++ ++%jl_value_t = type opaque ++%jl_array_t = type { i8 addrspace(13)*, i64, i16, i16, i32 } ++ ++define void @"japi1_permutedims!_33509"(%jl_value_t addrspace(10)**) { ++; CHECK: [[CMP:%[^ ]*]] = icmp ult double addrspace(13)* [[A:%[^ ]*]], [[B:%[^ ]*]] ++; CHECK: [[SELECT:%[^ ]*]] = select i1 %18, double addrspace(13)* [[A]], double addrspace(13)* [[B]] ++top: ++ %1 = alloca [3 x i64], align 8 ++ %2 = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %0, align 8 ++ %3 = getelementptr inbounds %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %0, i64 1 ++ %4 = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %3, align 8 ++ %5 = getelementptr inbounds [3 x i64], [3 x i64]* %1, i64 0, i64 0 ++ store i64 1, i64* %5, align 8 ++ %6 = getelementptr inbounds [3 x i64], [3 x i64]* %1, i64 0, i64 1 ++ %7 = load i64, i64* inttoptr (i64 24 to i64*), align 8 ++ %8 = addrspacecast %jl_value_t addrspace(10)* %4 to %jl_value_t addrspace(11)* ++ %9 = bitcast %jl_value_t addrspace(11)* %8 to double addrspace(13)* addrspace(11)* ++ %10 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %9, align 8 ++ %11 = addrspacecast %jl_value_t addrspace(10)* %2 to %jl_value_t addrspace(11)* ++ %12 = bitcast %jl_value_t addrspace(11)* %11 to double addrspace(13)* addrspace(11)* ++ %13 = load double addrspace(13)*, double addrspace(13)* addrspace(11)* %12, align 8 ++ %14 = load i64, i64* %6, align 8 ++ br label %L74 ++ ++L74: ++ %value_phi20 = phi i64 [ 1, %top ], [ %22, %L74 ] ++ %value_phi21 = phi i64 [ 1, %top ], [ %23, %L74 ] ++ %value_phi22 = phi i64 [ 1, %top ], [ %25, %L74 ] ++ %15 = add i64 %value_phi21, -1 ++ %16 = getelementptr inbounds double, double addrspace(13)* %10, i64 %15 ++ %17 = bitcast double addrspace(13)* %16 to i64 addrspace(13)* ++ %18 = load i64, i64 addrspace(13)* %17, align 8 ++ %19 = add i64 %value_phi20, -1 ++ %20 = getelementptr inbounds double, double addrspace(13)* %13, i64 %19 ++ %21 = bitcast double addrspace(13)* %20 to i64 addrspace(13)* ++ store i64 %18, i64 addrspace(13)* %21, align 8 ++ %22 = add i64 %value_phi20, 1 ++ %23 = add i64 %14, %value_phi21 ++ %24 = icmp eq i64 %value_phi22, %7 ++ %25 = add i64 %value_phi22, 1 ++ br i1 %24, label %L94, label %L74 ++ ++L94: ++ ret void ++} +diff --git a/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll b/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll +index 405a47554e4..4285ef0f117 100644 +--- a/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll ++++ b/test/Analysis/LoopAccessAnalysis/reverse-memcheck-bounds.ll +@@ -58,7 +58,7 @@ for.end: ; preds = %for.body + + ; Here it is not obvious what the limits are, since 'step' could be negative. + +-; CHECK: Low: (-1 + (-1 * ((-60001 + (-1 * %a)) umax (-60001 + (40000 * %step) + (-1 * %a))))) ++; CHECK: Low: ((60000 + %a) umin (60000 + (-40000 * %step) + %a)) + ; CHECK: High: (4 + ((60000 + %a) umax (60000 + (-40000 * %step) + %a))) + + define void @g(i64 %step) { +diff --git a/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll b/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll +index 3542ad2a41e..53e024a68fb 100644 +--- a/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll ++++ b/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll +@@ -22,5 +22,5 @@ afterfor: ; preds = %forinc, %entry + ret i32 %j.0.lcssa + } + +-; CHECK: backedge-taken count is (-2147483632 + ((-1 + (-1 * %{{[xy]}})) smax (-1 + (-1 * %{{[xy]}})))) ++; CHECK: backedge-taken count is (-2147483633 + (-1 * (%x smin %y))) + +diff --git a/test/Analysis/ScalarEvolution/min-max-exprs.ll b/test/Analysis/ScalarEvolution/min-max-exprs.ll +index e8c1e33e095..51f72c643cc 100644 +--- a/test/Analysis/ScalarEvolution/min-max-exprs.ll ++++ b/test/Analysis/ScalarEvolution/min-max-exprs.ll +@@ -33,7 +33,7 @@ bb2: ; preds = %bb1 + %tmp9 = select i1 %tmp4, i64 %tmp5, i64 %tmp6 + ; min(N, i+3) + ; CHECK: select i1 %tmp4, i64 %tmp5, i64 %tmp6 +-; CHECK-NEXT: --> (-1 + (-1 * ((-1 + (-1 * (sext i32 {3,+,1}<%bb1> to i64))) smax (-1 + (-1 * (sext i32 %N to i64)))))) ++; CHECK-NEXT: --> ((sext i32 {3,+,1}<%bb1> to i64) smin (sext i32 %N to i64)) + %tmp11 = getelementptr inbounds i32, i32* %A, i64 %tmp9 + %tmp12 = load i32, i32* %tmp11, align 4 + %tmp13 = shl nsw i32 %tmp12, 1 +diff --git a/test/Analysis/ScalarEvolution/pr28705.ll b/test/Analysis/ScalarEvolution/pr28705.ll +index 8fbc08e3ca6..7d797a15bd5 100644 +--- a/test/Analysis/ScalarEvolution/pr28705.ll ++++ b/test/Analysis/ScalarEvolution/pr28705.ll +@@ -5,7 +5,7 @@ + ; with "%.sroa.speculated + 1". + ; + ; CHECK-LABEL: @foo( +-; CHECK: %[[EXIT:.+]] = sub i32 %.sroa.speculated, -1 ++; CHECK: %[[EXIT:.+]] = add i32 %.sroa.speculated, 1 + ; CHECK: %DB.sroa.9.0.lcssa = phi i32 [ 1, %entry ], [ %[[EXIT]], %loopexit ] + ; + define void @foo(i32 %sub.ptr.div.i, i8* %ref.i1174) local_unnamed_addr { +diff --git a/test/Analysis/ScalarEvolution/predicated-trip-count.ll b/test/Analysis/ScalarEvolution/predicated-trip-count.ll +index a0afcf457d2..b07662ed95f 100644 +--- a/test/Analysis/ScalarEvolution/predicated-trip-count.ll ++++ b/test/Analysis/ScalarEvolution/predicated-trip-count.ll +@@ -80,7 +80,7 @@ return: ; preds = %bb5 + ; CHECK-NEXT: --> (sext i16 {%Start,+,-1}<%bb3> to i32) + ; CHECK: Loop %bb3: Unpredictable backedge-taken count. + ; CHECK-NEXT: Loop %bb3: Unpredictable max backedge-taken count. +-; CHECK-NEXT: Loop %bb3: Predicated backedge-taken count is (2 + (sext i16 %Start to i32) + ((-2 + (-1 * (sext i16 %Start to i32))) smax (-1 + (-1 * %M)))) ++; CHECK-NEXT: Loop %bb3: Predicated backedge-taken count is (1 + (sext i16 %Start to i32) + (-1 * ((1 + (sext i16 %Start to i32)) smin %M))) + ; CHECK-NEXT: Predicates: + ; CHECK-NEXT: {%Start,+,-1}<%bb3> Added Flags: + +diff --git a/test/Analysis/ScalarEvolution/trip-count14.ll b/test/Analysis/ScalarEvolution/trip-count14.ll +index 5e6cfe85101..15080613881 100644 +--- a/test/Analysis/ScalarEvolution/trip-count14.ll ++++ b/test/Analysis/ScalarEvolution/trip-count14.ll +@@ -81,7 +81,7 @@ if.end: + br i1 %cmp1, label %do.body, label %do.end ; taken either 0 or 2 times + + ; CHECK-LABEL: Determining loop execution counts for: @s32_max2_unpredictable_exit +-; CHECK-NEXT: Loop %do.body: backedge-taken count is (-1 + (-1 * ((-1 + (-1 * ((2 + %n) smax %n)) + %n) umax (-1 + (-1 * %x) + %n)))) ++; CHECK-NEXT: Loop %do.body: backedge-taken count is (((-1 * %n) + ((2 + %n) smax %n)) umin ((-1 * %n) + %x)) + ; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2{{$}} + + do.end: +@@ -169,7 +169,7 @@ if.end: + br i1 %cmp1, label %do.body, label %do.end ; taken either 0 or 2 times + + ; CHECK-LABEL: Determining loop execution counts for: @u32_max2_unpredictable_exit +-; CHECK-NEXT: Loop %do.body: backedge-taken count is (-1 + (-1 * ((-1 + (-1 * ((2 + %n) umax %n)) + %n) umax (-1 + (-1 * %x) + %n)))) ++; CHECK-NEXT: Loop %do.body: backedge-taken count is (((-1 * %n) + ((2 + %n) umax %n)) umin ((-1 * %n) + %x)) + ; CHECK-NEXT: Loop %do.body: max backedge-taken count is 2{{$}} + + do.end: +diff --git a/test/Analysis/ScalarEvolution/trip-count3.ll b/test/Analysis/ScalarEvolution/trip-count3.ll +index df6637a4ced..e10012c0c32 100644 +--- a/test/Analysis/ScalarEvolution/trip-count3.ll ++++ b/test/Analysis/ScalarEvolution/trip-count3.ll +@@ -4,7 +4,7 @@ + ; dividing by the stride will have a remainder. This could theoretically + ; be teaching it how to use a more elaborate trip count computation. + +-; CHECK: Loop %bb3.i: backedge-taken count is ((64 + (-64 smax (-1 + (-1 * %0))) + %0) /u 64) ++; CHECK: Loop %bb3.i: backedge-taken count is ((63 + (-1 * (63 smin %0)) + %0) /u 64) + ; CHECK: Loop %bb3.i: max backedge-taken count is 33554431 + + %struct.FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct.FILE*, i32, i32, i64, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i64, i32, [20 x i8] } +diff --git a/test/Transforms/IRCE/conjunctive-checks.ll b/test/Transforms/IRCE/conjunctive-checks.ll +index 4e3cf354125..8711c1b00e8 100644 +--- a/test/Transforms/IRCE/conjunctive-checks.ll ++++ b/test/Transforms/IRCE/conjunctive-checks.ll +@@ -5,17 +5,15 @@ define void @f_0(i32 *%arr, i32 *%a_len_ptr, i32 %n, i1* %cond_buf) { + ; CHECK-LABEL: @f_0( + + ; CHECK: loop.preheader: +-; CHECK: [[not_safe_range_end:[^ ]+]] = sub i32 3, %len +-; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n +-; CHECK: [[not_exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_safe_range_end]], [[not_n]] +-; CHECK: [[not_exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_main_loop_at_hiclamp_cmp]], i32 [[not_safe_range_end]], i32 [[not_n]] +-; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = sub i32 -1, [[not_exit_main_loop_at_hiclamp]] ++; CHECK: [[len_sub:[^ ]+]] = add i32 %len, -4 ++; CHECK: [[exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp slt i32 %n, [[len_sub]] ++; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[exit_main_loop_at_hiclamp_cmp]], i32 %n, i32 [[len_sub]] + ; CHECK: [[exit_main_loop_at_loclamp_cmp:[^ ]+]] = icmp sgt i32 [[exit_main_loop_at_hiclamp]], 0 + ; CHECK: [[exit_main_loop_at_loclamp:[^ ]+]] = select i1 [[exit_main_loop_at_loclamp_cmp]], i32 [[exit_main_loop_at_hiclamp]], i32 0 + ; CHECK: [[enter_main_loop:[^ ]+]] = icmp slt i32 0, [[exit_main_loop_at_loclamp]] +-; CHECK: br i1 [[enter_main_loop]], label %loop.preheader2, label %main.pseudo.exit ++; CHECK: br i1 [[enter_main_loop]], label %[[loop_preheader2:[^ ,]+]], label %main.pseudo.exit + +-; CHECK: loop.preheader2: ++; CHECK: [[loop_preheader2]]: + ; CHECK: br label %loop + + entry: +@@ -35,9 +33,9 @@ define void @f_0(i32 *%arr, i32 *%a_len_ptr, i32 %n, i1* %cond_buf) { + ; CHECK: loop: + ; CHECK: %cond = load volatile i1, i1* %cond_buf + ; CHECK: %abc = and i1 %cond, true +-; CHECK: br i1 %abc, label %in.bounds, label %out.of.bounds.loopexit3, !prof !1 ++; CHECK: br i1 %abc, label %in.bounds, label %[[loop_exit:[^ ,]+]], !prof !1 + +-; CHECK: out.of.bounds.loopexit: ++; CHECK: [[loop_exit]]: + ; CHECK: br label %out.of.bounds + + in.bounds: +@@ -58,14 +56,10 @@ define void @f_1( + ; CHECK-LABEL: @f_1( + + ; CHECK: loop.preheader: +-; CHECK: [[not_len_b:[^ ]+]] = sub i32 -1, %len.b +-; CHECK: [[not_len_a:[^ ]+]] = sub i32 -1, %len.a +-; CHECK: [[smax_not_len_cond:[^ ]+]] = icmp sgt i32 [[not_len_b]], [[not_len_a]] +-; CHECK: [[smax_not_len:[^ ]+]] = select i1 [[smax_not_len_cond]], i32 [[not_len_b]], i32 [[not_len_a]] +-; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n +-; CHECK: [[not_upper_limit_cond_loclamp:[^ ]+]] = icmp sgt i32 [[smax_not_len]], [[not_n]] +-; CHECK: [[not_upper_limit_loclamp:[^ ]+]] = select i1 [[not_upper_limit_cond_loclamp]], i32 [[smax_not_len]], i32 [[not_n]] +-; CHECK: [[upper_limit_loclamp:[^ ]+]] = sub i32 -1, [[not_upper_limit_loclamp]] ++; CHECK: [[smax_len_cond:[^ ]+]] = icmp slt i32 %len.b, %len.a ++; CHECK: [[smax_len:[^ ]+]] = select i1 [[smax_len_cond]], i32 %len.b, i32 %len.a ++; CHECK: [[upper_limit_cond_loclamp:[^ ]+]] = icmp slt i32 [[smax_len]], %n ++; CHECK: [[upper_limit_loclamp:[^ ]+]] = select i1 [[upper_limit_cond_loclamp]], i32 [[smax_len]], i32 %n + ; CHECK: [[upper_limit_cmp:[^ ]+]] = icmp sgt i32 [[upper_limit_loclamp]], 0 + ; CHECK: [[upper_limit:[^ ]+]] = select i1 [[upper_limit_cmp]], i32 [[upper_limit_loclamp]], i32 0 + +@@ -85,9 +79,9 @@ define void @f_1( + + ; CHECK: loop: + ; CHECK: %abc = and i1 true, true +-; CHECK: br i1 %abc, label %in.bounds, label %out.of.bounds.loopexit4, !prof !1 ++; CHECK: br i1 %abc, label %in.bounds, label %[[oob_loopexit:[^ ,]+]], !prof !1 + +-; CHECK: out.of.bounds.loopexit: ++; CHECK: [[oob_loopexit]]: + ; CHECK-NEXT: br label %out.of.bounds + + +diff --git a/test/Transforms/IRCE/decrementing-loop.ll b/test/Transforms/IRCE/decrementing-loop.ll +index 4c82cd3e341..2994a432a71 100644 +--- a/test/Transforms/IRCE/decrementing-loop.ll ++++ b/test/Transforms/IRCE/decrementing-loop.ll +@@ -29,11 +29,8 @@ define void @decrementing_loop(i32 *%arr, i32 *%a_len_ptr, i32 %n) { + ret void + + ; CHECK: loop.preheader: +-; CHECK: [[not_len:[^ ]+]] = sub i32 -1, %len +-; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n +-; CHECK: [[not_len_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_len]], [[not_n]] +-; CHECK: [[not_len_hiclamp:[^ ]+]] = select i1 [[not_len_hiclamp_cmp]], i32 [[not_len]], i32 [[not_n]] +-; CHECK: [[len_hiclamp:[^ ]+]] = sub i32 -1, [[not_len_hiclamp]] ++; CHECK: [[len_hiclamp_cmp:[^ ]+]] = icmp slt i32 %len, %n ++; CHECK: [[len_hiclamp:[^ ]+]] = select i1 [[len_hiclamp_cmp]], i32 %len, i32 %n + ; CHECK: [[not_exit_preloop_at_cmp:[^ ]+]] = icmp sgt i32 [[len_hiclamp]], 0 + ; CHECK: [[not_exit_preloop_at:[^ ]+]] = select i1 [[not_exit_preloop_at_cmp]], i32 [[len_hiclamp]], i32 0 + ; CHECK: %exit.preloop.at = add i32 [[not_exit_preloop_at]], -1 +diff --git a/test/Transforms/IRCE/multiple-access-no-preloop.ll b/test/Transforms/IRCE/multiple-access-no-preloop.ll +index 000d1ab36f2..3bde9bd8668 100644 +--- a/test/Transforms/IRCE/multiple-access-no-preloop.ll ++++ b/test/Transforms/IRCE/multiple-access-no-preloop.ll +@@ -38,14 +38,10 @@ define void @multiple_access_no_preloop( + ; CHECK-LABEL: @multiple_access_no_preloop( + + ; CHECK: loop.preheader: +-; CHECK: [[not_len_b:[^ ]+]] = sub i32 -1, %len.b +-; CHECK: [[not_len_a:[^ ]+]] = sub i32 -1, %len.a +-; CHECK: [[smax_not_len_cond:[^ ]+]] = icmp sgt i32 [[not_len_b]], [[not_len_a]] +-; CHECK: [[smax_not_len:[^ ]+]] = select i1 [[smax_not_len_cond]], i32 [[not_len_b]], i32 [[not_len_a]] +-; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n +-; CHECK: [[not_upper_limit_cond_loclamp:[^ ]+]] = icmp sgt i32 [[smax_not_len]], [[not_n]] +-; CHECK: [[not_upper_limit_loclamp:[^ ]+]] = select i1 [[not_upper_limit_cond_loclamp]], i32 [[smax_not_len]], i32 [[not_n]] +-; CHECK: [[upper_limit_loclamp:[^ ]+]] = sub i32 -1, [[not_upper_limit_loclamp]] ++; CHECK: [[smax_len_cond:[^ ]+]] = icmp slt i32 %len.b, %len.a ++; CHECK: [[smax_len:[^ ]+]] = select i1 [[smax_len_cond]], i32 %len.b, i32 %len.a ++; CHECK: [[upper_limit_cond_loclamp:[^ ]+]] = icmp slt i32 [[smax_len]], %n ++; CHECK: [[upper_limit_loclamp:[^ ]+]] = select i1 [[upper_limit_cond_loclamp]], i32 [[smax_len]], i32 %n + ; CHECK: [[upper_limit_cmp:[^ ]+]] = icmp sgt i32 [[upper_limit_loclamp]], 0 + ; CHECK: [[upper_limit:[^ ]+]] = select i1 [[upper_limit_cmp]], i32 [[upper_limit_loclamp]], i32 0 + +diff --git a/test/Transforms/IRCE/ranges_of_different_types.ll b/test/Transforms/IRCE/ranges_of_different_types.ll +index 5c8161369f2..46bd94ce687 100644 +--- a/test/Transforms/IRCE/ranges_of_different_types.ll ++++ b/test/Transforms/IRCE/ranges_of_different_types.ll +@@ -23,12 +23,11 @@ define void @test_01(i32* %arr, i32* %a_len_ptr) #0 { + ; CHECK-NOT: preloop + ; CHECK: entry: + ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 +-; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 12, %len +-; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB1]], -102 +-; CHECK-NEXT: [[SMAX:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 -102 +-; CHECK-NEXT: [[SUB2:%[^ ]+]] = sub i32 -1, [[SMAX]] +-; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp sgt i32 [[SUB2]], 0 +-; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP2]], i32 [[SUB2]], i32 0 ++; CHECK-NEXT: [[SUB1:%[^ ]+]] = add i32 %len, -13 ++; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp slt i32 [[SUB1]], 101 ++; CHECK-NEXT: [[SMAX:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 101 ++; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp sgt i32 [[SMAX]], 0 ++; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP2]], i32 [[SMAX]], i32 0 + ; CHECK-NEXT: [[GOTO_LOOP:%[^ ]+]] = icmp slt i32 0, %exit.mainloop.at + ; CHECK-NEXT: br i1 [[GOTO_LOOP]], label %loop.preheader, label %main.pseudo.exit + ; CHECK: loop +@@ -83,13 +82,11 @@ define void @test_02(i32* %arr, i32* %a_len_ptr) #0 { + ; CHECK-NEXT: [[LEN_MINUS_SMAX:%[^ ]+]] = add i32 %len, -2147483647 + ; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[LEN_MINUS_SMAX]], -13 + ; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[LEN_MINUS_SMAX]], i32 -13 +-; CHECK-NEXT: [[ADD1:%[^ ]+]] = add i32 [[SMAX1]], -1 +-; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 [[ADD1]], %len +-; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp sgt i32 [[SUB1]], -102 +-; CHECK-NEXT: [[SMAX2:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB1]], i32 -102 +-; CHECK-NEXT: [[SUB2:%[^ ]+]] = sub i32 -1, [[SMAX2]] +-; CHECK-NEXT: [[CMP3:%[^ ]+]] = icmp sgt i32 [[SUB2]], 0 +-; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP3]], i32 [[SUB2]], i32 0 ++; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 %len, [[SMAX1]] ++; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp slt i32 [[SUB1]], 101 ++; CHECK-NEXT: [[SMAX2:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB1]], i32 101 ++; CHECK-NEXT: [[CMP3:%[^ ]+]] = icmp sgt i32 [[SMAX2]], 0 ++; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP3]], i32 [[SMAX2]], i32 0 + ; CHECK-NEXT: br i1 true, label %loop.preloop.preheader + ; CHECK: loop.preloop: + ; CHECK-NEXT: %idx.preloop = phi i32 [ %idx.next.preloop, %in.bounds.preloop ], [ 0, %loop.preloop.preheader ] +@@ -151,14 +148,11 @@ define void @test_03(i32* %arr, i32* %a_len_ptr) #0 { + ; CHECK-NOT: preloop + ; CHECK: entry: + ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 +-; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 -2, %len +-; CHECK-NEXT: [[SUB2:%[^ ]+]] = sub i32 -1, %len +-; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB2]], -14 +-; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB2]], i32 -14 +-; CHECK-NEXT: [[SUB3:%[^ ]+]] = sub i32 [[SUB1]], [[SMAX1]] +-; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp ugt i32 [[SUB3]], -102 +-; CHECK-NEXT: [[UMAX1:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB3]], i32 -102 +-; CHECK-NEXT: %exit.mainloop.at = sub i32 -1, [[UMAX1]] ++; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp slt i32 %len, 13 ++; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 %len, i32 13 ++; CHECK-NEXT: [[SUB3:%[^ ]+]] = sub i32 %len, [[SMAX1]] ++; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp ult i32 [[SUB3]], 101 ++; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP2]], i32 [[SUB3]], i32 101 + ; CHECK-NEXT: [[CMP3:%[^ ]+]] = icmp ult i32 0, %exit.mainloop.at + ; CHECK-NEXT: br i1 [[CMP3]], label %loop.preheader, label %main.pseudo.exit + ; CHECK: postloop: +@@ -208,10 +202,9 @@ define void @test_04(i32* %arr, i32* %a_len_ptr) #0 { + ; CHECK-LABEL: test_04( + ; CHECK: entry: + ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 +-; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 -14, %len +-; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp ugt i32 [[SUB1]], -102 +-; CHECK-NEXT: [[UMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 -102 +-; CHECK-NEXT: %exit.mainloop.at = sub i32 -1, [[UMAX1]] ++; CHECK-NEXT: [[SUB1:%[^ ]+]] = add i32 %len, 13 ++; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp ult i32 [[SUB1]], 101 ++; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP1]], i32 [[SUB1]], i32 101 + ; CHECK-NEXT: br i1 true, label %loop.preloop.preheader + ; CHECK: in.bounds.preloop: + ; CHECK-NEXT: %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop +@@ -252,12 +245,11 @@ define void @test_05(i32* %arr, i32* %a_len_ptr) #0 { + ; CHECK-NOT: preloop + ; CHECK: entry: + ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 +-; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 12, %len +-; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB1]], -102 +-; CHECK-NEXT: [[SMAX:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 -102 +-; CHECK-NEXT: [[SUB2:%[^ ]+]] = sub i32 -1, [[SMAX]] +-; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp sgt i32 [[SUB2]], 0 +-; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP2]], i32 [[SUB2]], i32 0 ++; CHECK-NEXT: [[SUB1:%[^ ]+]] = add i32 %len, -13 ++; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp slt i32 [[SUB1]], 101 ++; CHECK-NEXT: [[SMAX:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 101 ++; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp sgt i32 [[SMAX]], 0 ++; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP2]], i32 [[SMAX]], i32 0 + ; CHECK-NEXT: [[GOTO_LOOP:%[^ ]+]] = icmp slt i32 0, %exit.mainloop.at + ; CHECK-NEXT: br i1 [[GOTO_LOOP]], label %loop.preheader, label %main.pseudo.exit + ; CHECK: loop +@@ -297,13 +289,11 @@ define void @test_06(i32* %arr, i32* %a_len_ptr) #0 { + ; CHECK-NEXT: [[LEN_MINUS_SMAX:%[^ ]+]] = add i32 %len, -2147483647 + ; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[LEN_MINUS_SMAX]], -13 + ; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[LEN_MINUS_SMAX]], i32 -13 +-; CHECK-NEXT: [[ADD1:%[^ ]+]] = add i32 [[SMAX1]], -1 +-; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 [[ADD1]], %len +-; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp sgt i32 [[SUB1]], -102 +-; CHECK-NEXT: [[SMAX2:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB1]], i32 -102 +-; CHECK-NEXT: [[SUB2:%[^ ]+]] = sub i32 -1, [[SMAX2]] +-; CHECK-NEXT: [[CMP3:%[^ ]+]] = icmp sgt i32 [[SUB2]], 0 +-; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP3]], i32 [[SUB2]], i32 0 ++; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 %len, [[SMAX1]] ++; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp slt i32 [[SUB1]], 101 ++; CHECK-NEXT: [[SMAX2:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB1]], i32 101 ++; CHECK-NEXT: [[CMP3:%[^ ]+]] = icmp sgt i32 [[SMAX2]], 0 ++; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP3]], i32 [[SMAX2]], i32 0 + ; CHECK-NEXT: br i1 true, label %loop.preloop.preheader + ; CHECK: in.bounds.preloop: + ; CHECK-NEXT: %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop +@@ -344,14 +334,11 @@ define void @test_07(i32* %arr, i32* %a_len_ptr) #0 { + ; CHECK-NOT: preloop + ; CHECK: entry: + ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 +-; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 -2, %len +-; CHECK-NEXT: [[SUB2:%[^ ]+]] = sub i32 -1, %len +-; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp sgt i32 [[SUB2]], -14 +-; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB2]], i32 -14 +-; CHECK-NEXT: [[SUB3:%[^ ]+]] = sub i32 [[SUB1]], [[SMAX1]] +-; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp ugt i32 [[SUB3]], -102 +-; CHECK-NEXT: [[UMAX1:%[^ ]+]] = select i1 [[CMP2]], i32 [[SUB3]], i32 -102 +-; CHECK-NEXT: %exit.mainloop.at = sub i32 -1, [[UMAX1]] ++; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp slt i32 %len, 13 ++; CHECK-NEXT: [[SMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 %len, i32 13 ++; CHECK-NEXT: [[SUB3:%[^ ]+]] = sub i32 %len, [[SMAX1]] ++; CHECK-NEXT: [[CMP2:%[^ ]+]] = icmp ult i32 [[SUB3]], 101 ++; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP2]], i32 [[SUB3]], i32 101 + ; CHECK-NEXT: [[CMP3:%[^ ]+]] = icmp ult i32 0, %exit.mainloop.at + ; CHECK-NEXT: br i1 [[CMP3]], label %loop.preheader, label %main.pseudo.exit + ; CHECK: loop +@@ -388,10 +375,9 @@ define void @test_08(i32* %arr, i32* %a_len_ptr) #0 { + ; CHECK-LABEL: test_08( + ; CHECK: entry: + ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 +-; CHECK-NEXT: [[SUB1:%[^ ]+]] = sub i32 -14, %len +-; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp ugt i32 [[SUB1]], -102 +-; CHECK-NEXT: [[UMAX1:%[^ ]+]] = select i1 [[CMP1]], i32 [[SUB1]], i32 -102 +-; CHECK-NEXT: %exit.mainloop.at = sub i32 -1, [[UMAX1]] ++; CHECK-NEXT: [[SUB1:%[^ ]+]] = add i32 %len, 13 ++; CHECK-NEXT: [[CMP1:%[^ ]+]] = icmp ult i32 [[SUB1]], 101 ++; CHECK-NEXT: %exit.mainloop.at = select i1 [[CMP1]], i32 [[SUB1]], i32 101 + ; CHECK-NEXT: br i1 true, label %loop.preloop.preheader + ; CHECK: in.bounds.preloop: + ; CHECK-NEXT: %addr.preloop = getelementptr i32, i32* %arr, i32 %idx.preloop +diff --git a/test/Transforms/IRCE/rc-negative-bound.ll b/test/Transforms/IRCE/rc-negative-bound.ll +index bfc0cd14778..d226bffeaae 100644 +--- a/test/Transforms/IRCE/rc-negative-bound.ll ++++ b/test/Transforms/IRCE/rc-negative-bound.ll +@@ -114,49 +114,44 @@ define void @test_03(i32 *%arr, i32 %n, i32 %bound) { + ; CHECK: loop.preheader: + ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BOUND:%.*]], -2147483647 + ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[TMP0]], 0 +-; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 0 +-; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[BOUND]], [[SMAX]] +-; CHECK-NEXT: [[TMP3:%.*]] = sub i32 -1, [[BOUND]] +-; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], -1 +-; CHECK-NEXT: [[SMAX1:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 -1 +-; CHECK-NEXT: [[TMP5:%.*]] = sub i32 -1, [[SMAX1]] +-; CHECK-NEXT: [[TMP6:%.*]] = icmp sgt i32 [[TMP5]], -1 +-; CHECK-NEXT: [[SMAX2:%.*]] = select i1 [[TMP6]], i32 [[TMP5]], i32 -1 +-; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[SMAX2]], 1 +-; CHECK-NEXT: [[TMP8:%.*]] = mul i32 [[TMP2]], [[TMP7]] +-; CHECK-NEXT: [[TMP9:%.*]] = sub i32 -1, [[TMP8]] +-; CHECK-NEXT: [[TMP10:%.*]] = sub i32 -1, [[N]] +-; CHECK-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +-; CHECK-NEXT: [[SMAX3:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 [[TMP10]] +-; CHECK-NEXT: [[TMP12:%.*]] = sub i32 -1, [[SMAX3]] +-; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], 0 +-; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 0 +-; CHECK-NEXT: [[TMP14:%.*]] = icmp slt i32 0, [[EXIT_MAINLOOP_AT]] +-; CHECK-NEXT: br i1 [[TMP14]], label [[LOOP_PREHEADER5:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] +-; CHECK: loop.preheader5: ++; CHECK-NEXT: [[SMIN:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 0 ++; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[BOUND]], [[SMIN]] ++; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[BOUND]], 0 ++; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP3]], i32 [[BOUND]], i32 0 ++; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[SMAX]], -1 ++; CHECK-NEXT: [[SMIN1:%.*]] = select i1 [[TMP4]], i32 [[SMAX]], i32 -1 ++; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SMIN1]], 1 ++; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[TMP2]], [[TMP5]] ++; CHECK-NEXT: [[TMP7:%.*]] = icmp slt i32 [[N]], [[TMP6]] ++; CHECK-NEXT: [[SMAX2:%.*]] = select i1 [[TMP7]], i32 [[N]], i32 [[TMP6]] ++; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[SMAX2]], 0 ++; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = select i1 [[TMP8]], i32 [[SMAX2]], i32 0 ++; CHECK-NEXT: [[TMP9:%.*]] = icmp slt i32 0, [[EXIT_MAINLOOP_AT]] ++; CHECK-NEXT: br i1 [[TMP9]], label [[LOOP_PREHEADER4:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] ++; CHECK: loop.preheader4: + ; CHECK-NEXT: br label [[LOOP:%.*]] + ; CHECK: loop: +-; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER5]] ] ++; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER4]] ] + ; CHECK-NEXT: [[IDX_NEXT]] = add i32 [[IDX]], 1 + ; CHECK-NEXT: [[ABC:%.*]] = icmp slt i32 [[IDX]], [[BOUND]] +-; CHECK-NEXT: br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT6:%.*]], !prof !0 ++; CHECK-NEXT: br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT5:%.*]], !prof !0 + ; CHECK: in.bounds: + ; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX]] + ; CHECK-NEXT: store i32 0, i32* [[ADDR]] + ; CHECK-NEXT: [[NEXT:%.*]] = icmp slt i32 [[IDX_NEXT]], [[N]] +-; CHECK-NEXT: [[TMP15:%.*]] = icmp slt i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]] +-; CHECK-NEXT: br i1 [[TMP15]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]] ++; CHECK-NEXT: [[TMP10:%.*]] = icmp slt i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]] ++; CHECK-NEXT: br i1 [[TMP10]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]] + ; CHECK: main.exit.selector: + ; CHECK-NEXT: [[IDX_NEXT_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT]], [[IN_BOUNDS]] ] +-; CHECK-NEXT: [[TMP16:%.*]] = icmp slt i32 [[IDX_NEXT_LCSSA]], [[N]] +-; CHECK-NEXT: br i1 [[TMP16]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]] ++; CHECK-NEXT: [[TMP11:%.*]] = icmp slt i32 [[IDX_NEXT_LCSSA]], [[N]] ++; CHECK-NEXT: br i1 [[TMP11]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]] + ; CHECK: main.pseudo.exit: + ; CHECK-NEXT: [[IDX_COPY:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] + ; CHECK-NEXT: [[INDVAR_END:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] + ; CHECK-NEXT: br label [[POSTLOOP:%.*]] + ; CHECK: out.of.bounds.loopexit: + ; CHECK-NEXT: br label [[OUT_OF_BOUNDS:%.*]] +-; CHECK: out.of.bounds.loopexit6: ++; CHECK: out.of.bounds.loopexit5: + ; CHECK-NEXT: br label [[OUT_OF_BOUNDS]] + ; CHECK: out.of.bounds: + ; CHECK-NEXT: ret void +@@ -211,47 +206,41 @@ define void @test_04(i32 *%arr, i32 %n, i32 %bound) { + ; CHECK-NEXT: [[FIRST_ITR_CHECK:%.*]] = icmp sgt i32 [[N:%.*]], 0 + ; CHECK-NEXT: br i1 [[FIRST_ITR_CHECK]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] + ; CHECK: loop.preheader: +-; CHECK-NEXT: [[TMP0:%.*]] = sub i32 -1, [[BOUND:%.*]] +-; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[TMP0]], -1 +-; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 -1 +-; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BOUND]], [[SMAX]] +-; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 1 +-; CHECK-NEXT: [[TMP4:%.*]] = sub i32 -1, [[SMAX]] +-; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], -1 +-; CHECK-NEXT: [[SMAX1:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 -1 +-; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[SMAX1]], 1 +-; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP3]], [[TMP6]] +-; CHECK-NEXT: [[TMP8:%.*]] = sub i32 -1, [[TMP7]] +-; CHECK-NEXT: [[TMP9:%.*]] = sub i32 -1, [[N]] +-; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i32 [[TMP8]], [[TMP9]] +-; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]] +-; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = sub i32 -1, [[UMAX]] +-; CHECK-NEXT: [[TMP11:%.*]] = icmp ult i32 0, [[EXIT_MAINLOOP_AT]] +-; CHECK-NEXT: br i1 [[TMP11]], label [[LOOP_PREHEADER2:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] +-; CHECK: loop.preheader2: ++; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i32 [[BOUND:%.*]], 0 ++; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[BOUND]], i32 0 ++; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[BOUND]], [[SMAX]] ++; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[SMAX]], -1 ++; CHECK-NEXT: [[SMIN:%.*]] = select i1 [[TMP2]], i32 [[SMAX]], i32 -1 ++; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SMIN]], 1 ++; CHECK-NEXT: [[TMP4:%.*]] = mul i32 [[TMP1]], [[TMP3]] ++; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[N]], [[TMP4]] ++; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = select i1 [[TMP5]], i32 [[N]], i32 [[TMP4]] ++; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i32 0, [[EXIT_MAINLOOP_AT]] ++; CHECK-NEXT: br i1 [[TMP6]], label [[LOOP_PREHEADER1:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] ++; CHECK: loop.preheader1: + ; CHECK-NEXT: br label [[LOOP:%.*]] + ; CHECK: loop: +-; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER2]] ] ++; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER1]] ] + ; CHECK-NEXT: [[IDX_NEXT]] = add i32 [[IDX]], 1 + ; CHECK-NEXT: [[ABC:%.*]] = icmp slt i32 [[IDX]], [[BOUND]] +-; CHECK-NEXT: br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT3:%.*]], !prof !0 ++; CHECK-NEXT: br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT2:%.*]], !prof !0 + ; CHECK: in.bounds: + ; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX]] + ; CHECK-NEXT: store i32 0, i32* [[ADDR]] + ; CHECK-NEXT: [[NEXT:%.*]] = icmp ult i32 [[IDX_NEXT]], [[N]] +-; CHECK-NEXT: [[TMP12:%.*]] = icmp ult i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]] +-; CHECK-NEXT: br i1 [[TMP12]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]] ++; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]] ++; CHECK-NEXT: br i1 [[TMP7]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]] + ; CHECK: main.exit.selector: + ; CHECK-NEXT: [[IDX_NEXT_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT]], [[IN_BOUNDS]] ] +-; CHECK-NEXT: [[TMP13:%.*]] = icmp ult i32 [[IDX_NEXT_LCSSA]], [[N]] +-; CHECK-NEXT: br i1 [[TMP13]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]] ++; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i32 [[IDX_NEXT_LCSSA]], [[N]] ++; CHECK-NEXT: br i1 [[TMP8]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]] + ; CHECK: main.pseudo.exit: + ; CHECK-NEXT: [[IDX_COPY:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] + ; CHECK-NEXT: [[INDVAR_END:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] + ; CHECK-NEXT: br label [[POSTLOOP:%.*]] + ; CHECK: out.of.bounds.loopexit: + ; CHECK-NEXT: br label [[OUT_OF_BOUNDS:%.*]] +-; CHECK: out.of.bounds.loopexit3: ++; CHECK: out.of.bounds.loopexit2: + ; CHECK-NEXT: br label [[OUT_OF_BOUNDS]] + ; CHECK: out.of.bounds: + ; CHECK-NEXT: ret void +@@ -413,49 +402,44 @@ define void @test_07(i32 *%arr, i32 %n, i32 %bound) { + ; CHECK: loop.preheader: + ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[BOUND:%.*]], -2147483647 + ; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[TMP0]], 0 +-; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 0 +-; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[BOUND]], [[SMAX]] +-; CHECK-NEXT: [[TMP3:%.*]] = sub i32 -1, [[BOUND]] +-; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], -1 +-; CHECK-NEXT: [[SMAX1:%.*]] = select i1 [[TMP4]], i32 [[TMP3]], i32 -1 +-; CHECK-NEXT: [[TMP5:%.*]] = sub i32 -1, [[SMAX1]] +-; CHECK-NEXT: [[TMP6:%.*]] = icmp sgt i32 [[TMP5]], -1 +-; CHECK-NEXT: [[SMAX2:%.*]] = select i1 [[TMP6]], i32 [[TMP5]], i32 -1 +-; CHECK-NEXT: [[TMP7:%.*]] = add i32 [[SMAX2]], 1 +-; CHECK-NEXT: [[TMP8:%.*]] = mul i32 [[TMP2]], [[TMP7]] +-; CHECK-NEXT: [[TMP9:%.*]] = sub i32 -1, [[TMP8]] +-; CHECK-NEXT: [[TMP10:%.*]] = sub i32 -1, [[N]] +-; CHECK-NEXT: [[TMP11:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +-; CHECK-NEXT: [[SMAX3:%.*]] = select i1 [[TMP11]], i32 [[TMP9]], i32 [[TMP10]] +-; CHECK-NEXT: [[TMP12:%.*]] = sub i32 -1, [[SMAX3]] +-; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[TMP12]], 0 +-; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 0 +-; CHECK-NEXT: [[TMP14:%.*]] = icmp slt i32 0, [[EXIT_MAINLOOP_AT]] +-; CHECK-NEXT: br i1 [[TMP14]], label [[LOOP_PREHEADER5:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] +-; CHECK: loop.preheader5: ++; CHECK-NEXT: [[SMIN:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 0 ++; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[BOUND]], [[SMIN]] ++; CHECK-NEXT: [[TMP3:%.*]] = icmp slt i32 [[BOUND]], 0 ++; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP3]], i32 [[BOUND]], i32 0 ++; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt i32 [[SMAX]], -1 ++; CHECK-NEXT: [[SMIN1:%.*]] = select i1 [[TMP4]], i32 [[SMAX]], i32 -1 ++; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[SMIN1]], 1 ++; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[TMP2]], [[TMP5]] ++; CHECK-NEXT: [[TMP7:%.*]] = icmp slt i32 [[N]], [[TMP6]] ++; CHECK-NEXT: [[SMAX2:%.*]] = select i1 [[TMP7]], i32 [[N]], i32 [[TMP6]] ++; CHECK-NEXT: [[TMP8:%.*]] = icmp sgt i32 [[SMAX2]], 0 ++; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = select i1 [[TMP8]], i32 [[SMAX2]], i32 0 ++; CHECK-NEXT: [[TMP9:%.*]] = icmp slt i32 0, [[EXIT_MAINLOOP_AT]] ++; CHECK-NEXT: br i1 [[TMP9]], label [[LOOP_PREHEADER4:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] ++; CHECK: loop.preheader4: + ; CHECK-NEXT: br label [[LOOP:%.*]] + ; CHECK: loop: +-; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER5]] ] ++; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER4]] ] + ; CHECK-NEXT: [[IDX_NEXT]] = add i32 [[IDX]], 1 + ; CHECK-NEXT: [[ABC:%.*]] = icmp ult i32 [[IDX]], [[BOUND]] +-; CHECK-NEXT: br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT6:%.*]], !prof !0 ++; CHECK-NEXT: br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT5:%.*]], !prof !0 + ; CHECK: in.bounds: + ; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX]] + ; CHECK-NEXT: store i32 0, i32* [[ADDR]] + ; CHECK-NEXT: [[NEXT:%.*]] = icmp slt i32 [[IDX_NEXT]], [[N]] +-; CHECK-NEXT: [[TMP15:%.*]] = icmp slt i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]] +-; CHECK-NEXT: br i1 [[TMP15]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]] ++; CHECK-NEXT: [[TMP10:%.*]] = icmp slt i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]] ++; CHECK-NEXT: br i1 [[TMP10]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]] + ; CHECK: main.exit.selector: + ; CHECK-NEXT: [[IDX_NEXT_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT]], [[IN_BOUNDS]] ] +-; CHECK-NEXT: [[TMP16:%.*]] = icmp slt i32 [[IDX_NEXT_LCSSA]], [[N]] +-; CHECK-NEXT: br i1 [[TMP16]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]] ++; CHECK-NEXT: [[TMP11:%.*]] = icmp slt i32 [[IDX_NEXT_LCSSA]], [[N]] ++; CHECK-NEXT: br i1 [[TMP11]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]] + ; CHECK: main.pseudo.exit: + ; CHECK-NEXT: [[IDX_COPY:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] + ; CHECK-NEXT: [[INDVAR_END:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] + ; CHECK-NEXT: br label [[POSTLOOP:%.*]] + ; CHECK: out.of.bounds.loopexit: + ; CHECK-NEXT: br label [[OUT_OF_BOUNDS:%.*]] +-; CHECK: out.of.bounds.loopexit6: ++; CHECK: out.of.bounds.loopexit5: + ; CHECK-NEXT: br label [[OUT_OF_BOUNDS]] + ; CHECK: out.of.bounds: + ; CHECK-NEXT: ret void +@@ -512,47 +496,41 @@ define void @test_08(i32 *%arr, i32 %n, i32 %bound) { + ; CHECK-NEXT: [[FIRST_ITR_CHECK:%.*]] = icmp sgt i32 [[N:%.*]], 0 + ; CHECK-NEXT: br i1 [[FIRST_ITR_CHECK]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]] + ; CHECK: loop.preheader: +-; CHECK-NEXT: [[TMP0:%.*]] = sub i32 -1, [[BOUND:%.*]] +-; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[TMP0]], -1 +-; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP1]], i32 [[TMP0]], i32 -1 +-; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[BOUND]], [[SMAX]] +-; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[TMP2]], 1 +-; CHECK-NEXT: [[TMP4:%.*]] = sub i32 -1, [[SMAX]] +-; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[TMP4]], -1 +-; CHECK-NEXT: [[SMAX1:%.*]] = select i1 [[TMP5]], i32 [[TMP4]], i32 -1 +-; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[SMAX1]], 1 +-; CHECK-NEXT: [[TMP7:%.*]] = mul i32 [[TMP3]], [[TMP6]] +-; CHECK-NEXT: [[TMP8:%.*]] = sub i32 -1, [[TMP7]] +-; CHECK-NEXT: [[TMP9:%.*]] = sub i32 -1, [[N]] +-; CHECK-NEXT: [[TMP10:%.*]] = icmp ugt i32 [[TMP8]], [[TMP9]] +-; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 [[TMP9]] +-; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = sub i32 -1, [[UMAX]] +-; CHECK-NEXT: [[TMP11:%.*]] = icmp ult i32 0, [[EXIT_MAINLOOP_AT]] +-; CHECK-NEXT: br i1 [[TMP11]], label [[LOOP_PREHEADER2:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] +-; CHECK: loop.preheader2: ++; CHECK-NEXT: [[TMP0:%.*]] = icmp slt i32 [[BOUND:%.*]], 0 ++; CHECK-NEXT: [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[BOUND]], i32 0 ++; CHECK-NEXT: [[TMP1:%.*]] = sub i32 [[BOUND]], [[SMAX]] ++; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i32 [[SMAX]], -1 ++; CHECK-NEXT: [[SMIN:%.*]] = select i1 [[TMP2]], i32 [[SMAX]], i32 -1 ++; CHECK-NEXT: [[TMP3:%.*]] = add i32 [[SMIN]], 1 ++; CHECK-NEXT: [[TMP4:%.*]] = mul i32 [[TMP1]], [[TMP3]] ++; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[N]], [[TMP4]] ++; CHECK-NEXT: [[EXIT_MAINLOOP_AT:%.*]] = select i1 [[TMP5]], i32 [[N]], i32 [[TMP4]] ++; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i32 0, [[EXIT_MAINLOOP_AT]] ++; CHECK-NEXT: br i1 [[TMP6]], label [[LOOP_PREHEADER1:%.*]], label [[MAIN_PSEUDO_EXIT:%.*]] ++; CHECK: loop.preheader1: + ; CHECK-NEXT: br label [[LOOP:%.*]] + ; CHECK: loop: +-; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER2]] ] ++; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ [[IDX_NEXT:%.*]], [[IN_BOUNDS:%.*]] ], [ 0, [[LOOP_PREHEADER1]] ] + ; CHECK-NEXT: [[IDX_NEXT]] = add i32 [[IDX]], 1 + ; CHECK-NEXT: [[ABC:%.*]] = icmp ult i32 [[IDX]], [[BOUND]] +-; CHECK-NEXT: br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT3:%.*]], !prof !0 ++; CHECK-NEXT: br i1 true, label [[IN_BOUNDS]], label [[OUT_OF_BOUNDS_LOOPEXIT2:%.*]], !prof !0 + ; CHECK: in.bounds: + ; CHECK-NEXT: [[ADDR:%.*]] = getelementptr i32, i32* [[ARR:%.*]], i32 [[IDX]] + ; CHECK-NEXT: store i32 0, i32* [[ADDR]] + ; CHECK-NEXT: [[NEXT:%.*]] = icmp ult i32 [[IDX_NEXT]], [[N]] +-; CHECK-NEXT: [[TMP12:%.*]] = icmp ult i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]] +-; CHECK-NEXT: br i1 [[TMP12]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]] ++; CHECK-NEXT: [[TMP7:%.*]] = icmp ult i32 [[IDX_NEXT]], [[EXIT_MAINLOOP_AT]] ++; CHECK-NEXT: br i1 [[TMP7]], label [[LOOP]], label [[MAIN_EXIT_SELECTOR:%.*]] + ; CHECK: main.exit.selector: + ; CHECK-NEXT: [[IDX_NEXT_LCSSA:%.*]] = phi i32 [ [[IDX_NEXT]], [[IN_BOUNDS]] ] +-; CHECK-NEXT: [[TMP13:%.*]] = icmp ult i32 [[IDX_NEXT_LCSSA]], [[N]] +-; CHECK-NEXT: br i1 [[TMP13]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]] ++; CHECK-NEXT: [[TMP8:%.*]] = icmp ult i32 [[IDX_NEXT_LCSSA]], [[N]] ++; CHECK-NEXT: br i1 [[TMP8]], label [[MAIN_PSEUDO_EXIT]], label [[EXIT_LOOPEXIT:%.*]] + ; CHECK: main.pseudo.exit: + ; CHECK-NEXT: [[IDX_COPY:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] + ; CHECK-NEXT: [[INDVAR_END:%.*]] = phi i32 [ 0, [[LOOP_PREHEADER]] ], [ [[IDX_NEXT_LCSSA]], [[MAIN_EXIT_SELECTOR]] ] + ; CHECK-NEXT: br label [[POSTLOOP:%.*]] + ; CHECK: out.of.bounds.loopexit: + ; CHECK-NEXT: br label [[OUT_OF_BOUNDS:%.*]] +-; CHECK: out.of.bounds.loopexit3: ++; CHECK: out.of.bounds.loopexit2: + ; CHECK-NEXT: br label [[OUT_OF_BOUNDS]] + ; CHECK: out.of.bounds: + ; CHECK-NEXT: ret void +diff --git a/test/Transforms/IRCE/single-access-no-preloop.ll b/test/Transforms/IRCE/single-access-no-preloop.ll +index acca948a7ab..7bf36f7c254 100644 +--- a/test/Transforms/IRCE/single-access-no-preloop.ll ++++ b/test/Transforms/IRCE/single-access-no-preloop.ll +@@ -86,15 +86,13 @@ define void @single_access_no_preloop_with_offset(i32 *%arr, i32 *%a_len_ptr, i3 + ; CHECK-LABEL: @single_access_no_preloop_with_offset( + + ; CHECK: loop.preheader: +-; CHECK: [[not_safe_range_end:[^ ]+]] = sub i32 3, %len +-; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n +-; CHECK: [[not_exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_safe_range_end]], [[not_n]] +-; CHECK: [[not_exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_main_loop_at_hiclamp_cmp]], i32 [[not_safe_range_end]], i32 [[not_n]] +-; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = sub i32 -1, [[not_exit_main_loop_at_hiclamp]] ++; CHECK: [[safe_range_end:[^ ]+]] = add i32 %len, -4 ++; CHECK: [[exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp slt i32 %n, [[safe_range_end]] ++; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[exit_main_loop_at_hiclamp_cmp]], i32 %n, i32 [[safe_range_end]] + ; CHECK: [[exit_main_loop_at_loclamp_cmp:[^ ]+]] = icmp sgt i32 [[exit_main_loop_at_hiclamp]], 0 + ; CHECK: [[exit_main_loop_at_loclamp:[^ ]+]] = select i1 [[exit_main_loop_at_loclamp_cmp]], i32 [[exit_main_loop_at_hiclamp]], i32 0 + ; CHECK: [[enter_main_loop:[^ ]+]] = icmp slt i32 0, [[exit_main_loop_at_loclamp]] +-; CHECK: br i1 [[enter_main_loop]], label %loop.preheader2, label %main.pseudo.exit ++; CHECK: br i1 [[enter_main_loop]], label %[[loop_preheader:[^ ,]+]], label %main.pseudo.exit + + ; CHECK: loop: + ; CHECK: br i1 true, label %in.bounds, label %out.of.bounds +diff --git a/test/Transforms/IRCE/single-access-with-preloop.ll b/test/Transforms/IRCE/single-access-with-preloop.ll +index 6f3b0324e39..bd235aa4a73 100644 +--- a/test/Transforms/IRCE/single-access-with-preloop.ll ++++ b/test/Transforms/IRCE/single-access-with-preloop.ll +@@ -34,11 +34,9 @@ define void @single_access_with_preloop(i32 *%arr, i32 *%a_len_ptr, i32 %n, i32 + ; CHECK: [[check_min_sint_offset:[^ ]+]] = icmp sgt i32 %offset, -2147483647 + ; CHECK: [[safe_offset_preloop:[^ ]+]] = select i1 [[check_min_sint_offset]], i32 %offset, i32 -2147483647 + ; If Offset was a SINT_MIN, we could have an overflow here. That is why we calculated its safe version. +-; CHECK: [[not_safe_start:[^ ]+]] = add i32 [[safe_offset_preloop]], -1 +-; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n +-; CHECK: [[not_exit_preloop_at_cond_loclamp:[^ ]+]] = icmp sgt i32 [[not_safe_start]], [[not_n]] +-; CHECK: [[not_exit_preloop_at_loclamp:[^ ]+]] = select i1 [[not_exit_preloop_at_cond_loclamp]], i32 [[not_safe_start]], i32 [[not_n]] +-; CHECK: [[exit_preloop_at_loclamp:[^ ]+]] = sub i32 -1, [[not_exit_preloop_at_loclamp]] ++; CHECK: [[safe_start:[^ ]+]] = sub i32 0, [[safe_offset_preloop]] ++; CHECK: [[exit_preloop_at_cond_loclamp:[^ ]+]] = icmp slt i32 %n, [[safe_start]] ++; CHECK: [[exit_preloop_at_loclamp:[^ ]+]] = select i1 [[exit_preloop_at_cond_loclamp]], i32 %n, i32 [[safe_start]] + ; CHECK: [[exit_preloop_at_cond:[^ ]+]] = icmp sgt i32 [[exit_preloop_at_loclamp]], 0 + ; CHECK: [[exit_preloop_at:[^ ]+]] = select i1 [[exit_preloop_at_cond]], i32 [[exit_preloop_at_loclamp]], i32 0 + +@@ -46,17 +44,15 @@ define void @single_access_with_preloop(i32 *%arr, i32 *%a_len_ptr, i32 %n, i32 + ; CHECK: [[len_minus_sint_max:[^ ]+]] = add i32 %len, -2147483647 + ; CHECK: [[check_len_min_sint_offset:[^ ]+]] = icmp sgt i32 %offset, [[len_minus_sint_max]] + ; CHECK: [[safe_offset_mainloop:[^ ]+]] = select i1 [[check_len_min_sint_offset]], i32 %offset, i32 [[len_minus_sint_max]] +-; CHECK: [[not_safe_start_2:[^ ]+]] = add i32 [[safe_offset_mainloop]], -1 + ; If Offset was a SINT_MIN, we could have an overflow here. That is why we calculated its safe version. +-; CHECK: [[not_safe_upper_end:[^ ]+]] = sub i32 [[not_safe_start_2]], %len +-; CHECK: [[not_exit_mainloop_at_cond_loclamp:[^ ]+]] = icmp sgt i32 [[not_safe_upper_end]], [[not_n]] +-; CHECK: [[not_exit_mainloop_at_loclamp:[^ ]+]] = select i1 [[not_exit_mainloop_at_cond_loclamp]], i32 [[not_safe_upper_end]], i32 [[not_n]] ++; CHECK: [[safe_upper_end:[^ ]+]] = sub i32 %len, [[safe_offset_mainloop]] ++; CHECK: [[exit_mainloop_at_cond_loclamp:[^ ]+]] = icmp slt i32 %n, [[safe_upper_end]] ++; CHECK: [[exit_mainloop_at_loclamp:[^ ]+]] = select i1 [[exit_mainloop_at_cond_loclamp]], i32 %n, i32 [[safe_upper_end]] + ; CHECK: [[check_offset_mainloop_2:[^ ]+]] = icmp sgt i32 %offset, 0 + ; CHECK: [[safe_offset_mainloop_2:[^ ]+]] = select i1 [[check_offset_mainloop_2]], i32 %offset, i32 0 +-; CHECK: [[not_safe_lower_end:[^ ]+]] = add i32 [[safe_offset_mainloop_2]], -2147483648 +-; CHECK: [[not_exit_mainloop_at_cond_hiclamp:[^ ]+]] = icmp sgt i32 [[not_exit_mainloop_at_loclamp]], [[not_safe_lower_end]] +-; CHECK: [[not_exit_mainloop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_mainloop_at_cond_hiclamp]], i32 [[not_exit_mainloop_at_loclamp]], i32 [[not_safe_lower_end]] +-; CHECK: [[exit_mainloop_at_hiclamp:[^ ]+]] = sub i32 -1, [[not_exit_mainloop_at_hiclamp]] ++; CHECK: [[safe_lower_end:[^ ]+]] = sub i32 2147483647, [[safe_offset_mainloop_2]] ++; CHECK: [[exit_mainloop_at_cond_hiclamp:[^ ]+]] = icmp slt i32 [[exit_mainloop_at_loclamp]], [[safe_lower_end]] ++; CHECK: [[exit_mainloop_at_hiclamp:[^ ]+]] = select i1 [[exit_mainloop_at_cond_hiclamp]], i32 [[exit_mainloop_at_loclamp]], i32 [[safe_lower_end]] + ; CHECK: [[exit_mainloop_at_cmp:[^ ]+]] = icmp sgt i32 [[exit_mainloop_at_hiclamp]], 0 + ; CHECK: [[exit_mainloop_at:[^ ]+]] = select i1 [[exit_mainloop_at_cmp]], i32 [[exit_mainloop_at_hiclamp]], i32 0 + +@@ -67,7 +63,7 @@ define void @single_access_with_preloop(i32 *%arr, i32 *%a_len_ptr, i32 %n, i32 + ; CHECK: %abc.high = icmp slt i32 %array.idx, %len + ; CHECK: %abc.low = icmp sge i32 %array.idx, 0 + ; CHECK: %abc = and i1 true, true +-; CHECK: br i1 %abc, label %in.bounds, label %out.of.bounds.loopexit11 ++; CHECK: br i1 %abc, label %in.bounds, label %[[loopexit:[^ ,]+]] + + ; CHECK: in.bounds: + ; CHECK: [[continue_mainloop_cond:[^ ]+]] = icmp slt i32 %idx.next, [[exit_mainloop_at]] +diff --git a/test/Transforms/IRCE/unsigned_comparisons_ugt.ll b/test/Transforms/IRCE/unsigned_comparisons_ugt.ll +index 8f00c733569..3451d65c7bb 100644 +--- a/test/Transforms/IRCE/unsigned_comparisons_ugt.ll ++++ b/test/Transforms/IRCE/unsigned_comparisons_ugt.ll +@@ -58,8 +58,8 @@ define void @test_02(i32* %arr, i32* %a_len_ptr) #0 { + ; CHECK: entry: + ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 + ; CHECK-NEXT: [[COND1:%[^ ]+]] = icmp ugt i32 %len, 1 +-; CHECK-NEXT: %umax = select i1 [[COND1]], i32 %len, i32 1 +-; CHECK-NEXT: %exit.preloop.at = add i32 %umax, -1 ++; CHECK-NEXT: [[UMIN:%[^ ]+]] = select i1 [[COND1]], i32 %len, i32 1 ++; CHECK-NEXT: %exit.preloop.at = add i32 [[UMIN]], -1 + ; CHECK-NEXT: [[COND2:%[^ ]+]] = icmp ugt i32 100, %exit.preloop.at + ; CHECK-NEXT: br i1 [[COND2]], label %loop.preloop.preheader, label %preloop.pseudo.exit + ; CHECK: mainloop: +@@ -149,8 +149,8 @@ define void @test_04(i32* %arr, i32* %a_len_ptr) #0 { + ; CHECK: entry: + ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 + ; CHECK-NEXT: [[COND1:%[^ ]+]] = icmp ugt i32 %len, 1 +-; CHECK-NEXT: %umax = select i1 [[COND1]], i32 %len, i32 1 +-; CHECK-NEXT: %exit.preloop.at = add i32 %umax, -1 ++; CHECK-NEXT: [[UMIN:%[^ ]+]] = select i1 [[COND1]], i32 %len, i32 1 ++; CHECK-NEXT: %exit.preloop.at = add i32 [[UMIN]], -1 + ; CHECK-NEXT: [[COND2:%[^ ]+]] = icmp ugt i32 -2147483648, %exit.preloop.at + ; CHECK-NEXT: br i1 [[COND2]], label %loop.preloop.preheader, label %preloop.pseudo.exit + ; CHECK: mainloop: +diff --git a/test/Transforms/IRCE/unsigned_comparisons_ult.ll b/test/Transforms/IRCE/unsigned_comparisons_ult.ll +index dc59c11df1b..aca3c3d192e 100644 +--- a/test/Transforms/IRCE/unsigned_comparisons_ult.ll ++++ b/test/Transforms/IRCE/unsigned_comparisons_ult.ll +@@ -61,8 +61,8 @@ define void @test_02(i32* %arr, i32* %a_len_ptr) #0 { + ; CHECK: entry: + ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 + ; CHECK-NEXT: [[COND1:%[^ ]+]] = icmp ugt i32 %len, 1 +-; CHECK-NEXT: %umax = select i1 [[COND1]], i32 %len, i32 1 +-; CHECK-NEXT: %exit.preloop.at = add i32 %umax, -1 ++; CHECK-NEXT: [[UMIN:%[^ ]+]] = select i1 [[COND1]], i32 %len, i32 1 ++; CHECK-NEXT: %exit.preloop.at = add i32 [[UMIN]], -1 + ; CHECK-NEXT: [[COND2:%[^ ]+]] = icmp ugt i32 100, %exit.preloop.at + ; CHECK-NEXT: br i1 [[COND2]], label %loop.preloop.preheader, label %preloop.pseudo.exit + ; CHECK: mainloop: +@@ -194,8 +194,8 @@ define void @test_05(i32* %arr, i32* %a_len_ptr) #0 { + ; CHECK: entry: + ; CHECK-NEXT: %len = load i32, i32* %a_len_ptr, !range !0 + ; CHECK-NEXT: [[COND1:%[^ ]+]] = icmp ugt i32 %len, 1 +-; CHECK-NEXT: %umax = select i1 [[COND1]], i32 %len, i32 1 +-; CHECK-NEXT: %exit.preloop.at = add i32 %umax, -1 ++; CHECK-NEXT: [[UMIN:%[^ ]+]] = select i1 [[COND1]], i32 %len, i32 1 ++; CHECK-NEXT: %exit.preloop.at = add i32 [[UMIN]], -1 + ; CHECK-NEXT: [[COND2:%[^ ]+]] = icmp ugt i32 -2147483648, %exit.preloop.at + ; CHECK-NEXT: br i1 [[COND2]], label %loop.preloop.preheader, label %preloop.pseudo.exit + ; CHECK: mainloop: +diff --git a/test/Transforms/IndVarSimplify/eliminate-trunc.ll b/test/Transforms/IndVarSimplify/eliminate-trunc.ll +index 7e0971f9f31..c83a48723ca 100644 +--- a/test/Transforms/IndVarSimplify/eliminate-trunc.ll ++++ b/test/Transforms/IndVarSimplify/eliminate-trunc.ll +@@ -459,15 +459,17 @@ exit: + define void @test_10(i32 %n) { + ; CHECK-LABEL: @test_10( + ; CHECK-NEXT: entry: +-; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[N:%.*]] to i64 ++; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N:%.*]], 100 ++; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 ++; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i64 [[TMP1]], 90 ++; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP2]], i64 [[TMP1]], i64 90 ++; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[UMAX]], -99 + ; CHECK-NEXT: br label [[LOOP:%.*]] + ; CHECK: loop: + ; CHECK-NEXT: [[IV:%.*]] = phi i64 [ -100, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] + ; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +-; CHECK-NEXT: [[TMP0:%.*]] = icmp ne i64 [[IV]], [[SEXT]] +-; CHECK-NEXT: [[NEGCMP:%.*]] = icmp slt i64 [[IV]], -10 +-; CHECK-NEXT: [[CMP:%.*]] = and i1 [[TMP0]], [[NEGCMP]] +-; CHECK-NEXT: br i1 [[CMP]], label [[LOOP]], label [[EXIT:%.*]] ++; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[IV_NEXT]], [[TMP3]] ++; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[EXIT:%.*]] + ; CHECK: exit: + ; CHECK-NEXT: ret void + ; +diff --git a/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll b/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll +index ea3f6077231..d5232e1874c 100644 +--- a/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll ++++ b/test/Transforms/LoopStrengthReduce/2013-01-14-ReuseCast.ll +@@ -14,8 +14,6 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 + ; current LSR cost model. + ; CHECK-NOT: = ptrtoint i8* undef to i64 + ; CHECK: .lr.ph +-; CHECK: [[TMP:%[^ ]+]] = add i64 %tmp{{[0-9]+}}, -1 +-; CHECK: sub i64 [[TMP]], %tmp{{[0-9]+}} + ; CHECK: ret void + define void @VerifyDiagnosticConsumerTest() unnamed_addr nounwind uwtable align 2 { + bb: +diff --git a/test/Transforms/LoopVectorize/X86/pr35432.ll b/test/Transforms/LoopVectorize/X86/pr35432.ll +index b8760cb8d50..6aaa13c183a 100644 +--- a/test/Transforms/LoopVectorize/X86/pr35432.ll ++++ b/test/Transforms/LoopVectorize/X86/pr35432.ll +@@ -27,7 +27,6 @@ define i32 @main() local_unnamed_addr #0 { + ; CHECK-NEXT: [[CMP8:%.*]] = icmp eq i32 [[CONV17]], 0 + ; CHECK-NEXT: br i1 [[CMP8]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END12:%.*]] + ; CHECK: for.body.lr.ph: +-; CHECK-NEXT: [[TMP3:%.*]] = sub i32 -1, [[TMP2]] + ; CHECK-NEXT: br label [[FOR_BODY:%.*]] + ; CHECK: for.body: + ; CHECK-NEXT: [[STOREMERGE_IN9:%.*]] = phi i32 [ [[TMP2]], [[FOR_BODY_LR_PH]] ], [ [[ADD:%.*]], [[FOR_INC9:%.*]] ] +@@ -37,77 +36,74 @@ define i32 @main() local_unnamed_addr #0 { + ; CHECK: for.body8.lr.ph: + ; CHECK-NEXT: [[CONV3:%.*]] = trunc i32 [[STOREMERGE_IN9]] to i8 + ; CHECK-NEXT: [[DOTPROMOTED:%.*]] = load i32, i32* getelementptr inbounds ([192 x [192 x i32]], [192 x [192 x i32]]* @a, i64 0, i64 0, i64 0), align 16 +-; CHECK-NEXT: [[TMP4:%.*]] = add i8 [[CONV3]], -1 +-; CHECK-NEXT: [[TMP5:%.*]] = zext i8 [[TMP4]] to i32 +-; CHECK-NEXT: [[TMP6:%.*]] = sub i32 -1, [[TMP5]] +-; CHECK-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP6]], [[TMP3]] +-; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP7]], i32 [[TMP6]], i32 [[TMP3]] +-; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[UMAX]], 2 +-; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP8]], [[TMP5]] +-; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP9]], 8 ++; CHECK-NEXT: [[TMP3:%.*]] = add i8 [[CONV3]], -1 ++; CHECK-NEXT: [[TMP4:%.*]] = zext i8 [[TMP3]] to i32 ++; CHECK-NEXT: [[TMP5:%.*]] = add i32 [[TMP4]], 1 ++; CHECK-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP2]], [[TMP4]] ++; CHECK-NEXT: [[UMAX:%.*]] = select i1 [[TMP6]], i32 [[TMP2]], i32 [[TMP4]] ++; CHECK-NEXT: [[TMP7:%.*]] = sub i32 [[TMP5]], [[UMAX]] ++; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP7]], 8 + ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] + ; CHECK: vector.scevcheck: +-; CHECK-NEXT: [[TMP10:%.*]] = add i8 [[CONV3]], -1 +-; CHECK-NEXT: [[TMP11:%.*]] = zext i8 [[TMP10]] to i32 +-; CHECK-NEXT: [[TMP12:%.*]] = sub i32 -1, [[TMP11]] +-; CHECK-NEXT: [[TMP13:%.*]] = icmp ugt i32 [[TMP12]], [[TMP3]] +-; CHECK-NEXT: [[UMAX1:%.*]] = select i1 [[TMP13]], i32 [[TMP12]], i32 [[TMP3]] +-; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[UMAX1]], 1 +-; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP14]], [[TMP11]] +-; CHECK-NEXT: [[TMP16:%.*]] = trunc i32 [[TMP15]] to i8 +-; CHECK-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 1, i8 [[TMP16]]) ++; CHECK-NEXT: [[TMP8:%.*]] = add i8 [[CONV3]], -1 ++; CHECK-NEXT: [[TMP9:%.*]] = zext i8 [[TMP8]] to i32 ++; CHECK-NEXT: [[TMP10:%.*]] = icmp ult i32 [[TMP2]], [[TMP9]] ++; CHECK-NEXT: [[UMAX1:%.*]] = select i1 [[TMP10]], i32 [[TMP2]], i32 [[TMP9]] ++; CHECK-NEXT: [[TMP11:%.*]] = sub i32 [[TMP9]], [[UMAX1]] ++; CHECK-NEXT: [[TMP12:%.*]] = trunc i32 [[TMP11]] to i8 ++; CHECK-NEXT: [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 1, i8 [[TMP12]]) + ; CHECK-NEXT: [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0 + ; CHECK-NEXT: [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1 +-; CHECK-NEXT: [[TMP17:%.*]] = add i8 [[TMP10]], [[MUL_RESULT]] +-; CHECK-NEXT: [[TMP18:%.*]] = sub i8 [[TMP10]], [[MUL_RESULT]] +-; CHECK-NEXT: [[TMP19:%.*]] = icmp ugt i8 [[TMP18]], [[TMP10]] +-; CHECK-NEXT: [[TMP20:%.*]] = icmp ult i8 [[TMP17]], [[TMP10]] +-; CHECK-NEXT: [[TMP21:%.*]] = select i1 true, i1 [[TMP19]], i1 [[TMP20]] +-; CHECK-NEXT: [[TMP22:%.*]] = icmp ugt i32 [[TMP15]], 255 +-; CHECK-NEXT: [[TMP23:%.*]] = or i1 [[TMP21]], [[TMP22]] +-; CHECK-NEXT: [[TMP24:%.*]] = or i1 [[TMP23]], [[MUL_OVERFLOW]] +-; CHECK-NEXT: [[TMP25:%.*]] = or i1 false, [[TMP24]] +-; CHECK-NEXT: br i1 [[TMP25]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ++; CHECK-NEXT: [[TMP13:%.*]] = add i8 [[TMP8]], [[MUL_RESULT]] ++; CHECK-NEXT: [[TMP14:%.*]] = sub i8 [[TMP8]], [[MUL_RESULT]] ++; CHECK-NEXT: [[TMP15:%.*]] = icmp ugt i8 [[TMP14]], [[TMP8]] ++; CHECK-NEXT: [[TMP16:%.*]] = icmp ult i8 [[TMP13]], [[TMP8]] ++; CHECK-NEXT: [[TMP17:%.*]] = select i1 true, i1 [[TMP15]], i1 [[TMP16]] ++; CHECK-NEXT: [[TMP18:%.*]] = icmp ugt i32 [[TMP11]], 255 ++; CHECK-NEXT: [[TMP19:%.*]] = or i1 [[TMP17]], [[TMP18]] ++; CHECK-NEXT: [[TMP20:%.*]] = or i1 [[TMP19]], [[MUL_OVERFLOW]] ++; CHECK-NEXT: [[TMP21:%.*]] = or i1 false, [[TMP20]] ++; CHECK-NEXT: br i1 [[TMP21]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] + ; CHECK: vector.ph: +-; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP9]], 8 +-; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP9]], [[N_MOD_VF]] ++; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[TMP7]], 8 ++; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[TMP7]], [[N_MOD_VF]] + ; CHECK-NEXT: [[CAST_CRD:%.*]] = trunc i32 [[N_VEC]] to i8 + ; CHECK-NEXT: [[IND_END:%.*]] = sub i8 [[CONV3]], [[CAST_CRD]] +-; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[DOTPROMOTED]], i32 0 ++; CHECK-NEXT: [[TMP22:%.*]] = insertelement <4 x i32> zeroinitializer, i32 [[DOTPROMOTED]], i32 0 + ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] + ; CHECK: vector.body: + ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +-; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP26]], [[VECTOR_PH]] ], [ [[TMP30:%.*]], [[VECTOR_BODY]] ] +-; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP31:%.*]], [[VECTOR_BODY]] ] +-; CHECK-NEXT: [[TMP27:%.*]] = trunc i32 [[INDEX]] to i8 +-; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i8 [[CONV3]], [[TMP27]] ++; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP22]], [[VECTOR_PH]] ], [ [[TMP26:%.*]], [[VECTOR_BODY]] ] ++; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP27:%.*]], [[VECTOR_BODY]] ] ++; CHECK-NEXT: [[TMP23:%.*]] = trunc i32 [[INDEX]] to i8 ++; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i8 [[CONV3]], [[TMP23]] + ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> undef, i8 [[OFFSET_IDX]], i32 0 + ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> undef, <4 x i32> zeroinitializer + ; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i8> [[BROADCAST_SPLAT]], + ; CHECK-NEXT: [[INDUCTION3:%.*]] = add <4 x i8> [[BROADCAST_SPLAT]], +-; CHECK-NEXT: [[TMP28:%.*]] = add i8 [[OFFSET_IDX]], 0 +-; CHECK-NEXT: [[TMP29:%.*]] = add i8 [[OFFSET_IDX]], -4 +-; CHECK-NEXT: [[TMP30]] = add <4 x i32> [[VEC_PHI]], +-; CHECK-NEXT: [[TMP31]] = add <4 x i32> [[VEC_PHI2]], +-; CHECK-NEXT: [[TMP32:%.*]] = add i8 [[TMP28]], -1 +-; CHECK-NEXT: [[TMP33:%.*]] = add i8 [[TMP29]], -1 +-; CHECK-NEXT: [[TMP34:%.*]] = zext i8 [[TMP32]] to i32 +-; CHECK-NEXT: [[TMP35:%.*]] = zext i8 [[TMP33]] to i32 ++; CHECK-NEXT: [[TMP24:%.*]] = add i8 [[OFFSET_IDX]], 0 ++; CHECK-NEXT: [[TMP25:%.*]] = add i8 [[OFFSET_IDX]], -4 ++; CHECK-NEXT: [[TMP26]] = add <4 x i32> [[VEC_PHI]], ++; CHECK-NEXT: [[TMP27]] = add <4 x i32> [[VEC_PHI2]], ++; CHECK-NEXT: [[TMP28:%.*]] = add i8 [[TMP24]], -1 ++; CHECK-NEXT: [[TMP29:%.*]] = add i8 [[TMP25]], -1 ++; CHECK-NEXT: [[TMP30:%.*]] = zext i8 [[TMP28]] to i32 ++; CHECK-NEXT: [[TMP31:%.*]] = zext i8 [[TMP29]] to i32 + ; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 8 +-; CHECK-NEXT: [[TMP36:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +-; CHECK-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 ++; CHECK-NEXT: [[TMP32:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ++; CHECK-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 + ; CHECK: middle.block: +-; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP31]], [[TMP30]] ++; CHECK-NEXT: [[BIN_RDX:%.*]] = add <4 x i32> [[TMP27]], [[TMP26]] + ; CHECK-NEXT: [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> undef, <4 x i32> + ; CHECK-NEXT: [[BIN_RDX4:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF]] + ; CHECK-NEXT: [[RDX_SHUF5:%.*]] = shufflevector <4 x i32> [[BIN_RDX4]], <4 x i32> undef, <4 x i32> + ; CHECK-NEXT: [[BIN_RDX6:%.*]] = add <4 x i32> [[BIN_RDX4]], [[RDX_SHUF5]] +-; CHECK-NEXT: [[TMP37:%.*]] = extractelement <4 x i32> [[BIN_RDX6]], i32 0 +-; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP9]], [[N_VEC]] ++; CHECK-NEXT: [[TMP33:%.*]] = extractelement <4 x i32> [[BIN_RDX6]], i32 0 ++; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[TMP7]], [[N_VEC]] + ; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND4_FOR_INC9_CRIT_EDGE:%.*]], label [[SCALAR_PH]] + ; CHECK: scalar.ph: + ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[CONV3]], [[FOR_BODY8_LR_PH]] ], [ [[CONV3]], [[VECTOR_SCEVCHECK]] ] +-; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[DOTPROMOTED]], [[FOR_BODY8_LR_PH]] ], [ [[DOTPROMOTED]], [[VECTOR_SCEVCHECK]] ], [ [[TMP37]], [[MIDDLE_BLOCK]] ] ++; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[DOTPROMOTED]], [[FOR_BODY8_LR_PH]] ], [ [[DOTPROMOTED]], [[VECTOR_SCEVCHECK]] ], [ [[TMP33]], [[MIDDLE_BLOCK]] ] + ; CHECK-NEXT: br label [[FOR_BODY8:%.*]] + ; CHECK: for.body8: + ; CHECK-NEXT: [[INC5:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY8]] ] +@@ -118,7 +114,7 @@ define i32 @main() local_unnamed_addr #0 { + ; CHECK-NEXT: [[CMP6:%.*]] = icmp ult i32 [[TMP2]], [[CONV5]] + ; CHECK-NEXT: br i1 [[CMP6]], label [[FOR_BODY8]], label [[FOR_COND4_FOR_INC9_CRIT_EDGE]], !llvm.loop !2 + ; CHECK: for.cond4.for.inc9_crit_edge: +-; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY8]] ], [ [[TMP37]], [[MIDDLE_BLOCK]] ] ++; CHECK-NEXT: [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY8]] ], [ [[TMP33]], [[MIDDLE_BLOCK]] ] + ; CHECK-NEXT: store i32 [[INC_LCSSA]], i32* getelementptr inbounds ([192 x [192 x i32]], [192 x [192 x i32]]* @a, i64 0, i64 0, i64 0), align 16 + ; CHECK-NEXT: br label [[FOR_INC9]] + ; CHECK: for.inc9: