From c85611e8583e6392d56075ebdfa60893b6284813 Mon Sep 17 00:00:00 2001 From: goldsteinn <35538541+goldsteinn@users.noreply.github.com> Date: Thu, 17 Oct 2024 11:32:55 -0400 Subject: [PATCH 01/24] [SimplifyLibCall][Attribute] Fix bug where we may keep `range` attr with incompatible type (#112649) In a variety of places we change the bitwidth of a parameter but don't update the attributes. The issue in this case is from the `range` attribute when inlining `__memset_chk`. `optimizeMemSetChk` will replace an `i32` with an `i8`, and if the `i32` had a `range` attr assosiated it will cause an error. Fixes #112633 --- llvm/include/llvm/IR/Argument.h | 2 ++ llvm/include/llvm/IR/Attributes.h | 16 +++++++++++----- llvm/include/llvm/IR/InstrTypes.h | 16 ++++++++++++---- llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 5 +++-- llvm/lib/IR/Attributes.cpp | 7 ++++++- llvm/lib/IR/AutoUpgrade.cpp | 6 ++++-- llvm/lib/IR/Function.cpp | 4 ++++ llvm/lib/IR/Verifier.cpp | 2 +- llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp | 3 ++- .../Transforms/IPO/DeadArgumentElimination.cpp | 8 +++++--- .../Transforms/InstCombine/InstCombineCalls.cpp | 11 +++++++---- .../Instrumentation/DataFlowSanitizer.cpp | 4 ++-- llvm/lib/Transforms/Utils/CallPromotionUtils.cpp | 6 ++++-- llvm/lib/Transforms/Utils/CloneFunction.cpp | 4 ++-- llvm/lib/Transforms/Utils/InlineFunction.cpp | 4 ++-- llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp | 8 +++++++- .../Transforms/InstCombine/simplify-libcalls.ll | 12 ++++++++++++ llvm/test/Verifier/range-attr.ll | 4 ++-- 18 files changed, 88 insertions(+), 34 deletions(-) diff --git a/llvm/include/llvm/IR/Argument.h b/llvm/include/llvm/IR/Argument.h index 0ffcb05519d44f..5be58d7eca0606 100644 --- a/llvm/include/llvm/IR/Argument.h +++ b/llvm/include/llvm/IR/Argument.h @@ -182,6 +182,8 @@ class Argument final : public Value { Attribute getAttribute(Attribute::AttrKind Kind) const; + AttributeSet getAttributes() const; + /// Method for support type inquiry through isa, cast, and dyn_cast. static bool classof(const Value *V) { return V->getValueID() == ArgumentVal; diff --git a/llvm/include/llvm/IR/Attributes.h b/llvm/include/llvm/IR/Attributes.h index 57db52e4879b5b..feeb3a9ddba932 100644 --- a/llvm/include/llvm/IR/Attributes.h +++ b/llvm/include/llvm/IR/Attributes.h @@ -1288,11 +1288,17 @@ enum AttributeSafetyKind : uint8_t { /// follows the same type rules as FPMathOperator. bool isNoFPClassCompatibleType(Type *Ty); -/// Which attributes cannot be applied to a type. The argument \p ASK indicates, -/// if only attributes that are known to be safely droppable are contained in -/// the mask; only attributes that might be unsafe to drop (e.g., ABI-related -/// attributes) are in the mask; or both. -AttributeMask typeIncompatible(Type *Ty, AttributeSafetyKind ASK = ASK_ALL); +/// Which attributes cannot be applied to a type. The argument \p AS +/// is used as a hint for the attributes whose compatibility is being +/// checked against \p Ty. This does not mean the return will be a +/// subset of \p AS, just that attributes that have specific dynamic +/// type compatibilities (i.e `range`) will be checked against what is +/// contained in \p AS. The argument \p ASK indicates, if only +/// attributes that are known to be safely droppable are contained in +/// the mask; only attributes that might be unsafe to drop (e.g., +/// ABI-related attributes) are in the mask; or both. +AttributeMask typeIncompatible(Type *Ty, AttributeSet AS, + AttributeSafetyKind ASK = ASK_ALL); /// Get param/return attributes which imply immediate undefined behavior if an /// invalid value is passed. For example, this includes noundef (where undef diff --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h index 86d88da3d9460e..99f72792ce4024 100644 --- a/llvm/include/llvm/IR/InstrTypes.h +++ b/llvm/include/llvm/IR/InstrTypes.h @@ -1453,14 +1453,22 @@ class CallBase : public Instruction { /// looking through to the attributes on the called function when necessary). ///@{ - /// Return the parameter attributes for this call. - /// + /// Return the attributes for this call. AttributeList getAttributes() const { return Attrs; } - /// Set the parameter attributes for this call. - /// + /// Set the attributes for this call. void setAttributes(AttributeList A) { Attrs = A; } + /// Return the return attributes for this call. + AttributeSet getRetAttributes() const { + return getAttributes().getRetAttrs(); + } + + /// Return the param attributes for this call. + AttributeSet getParamAttributes(unsigned ArgNo) const { + return getAttributes().getParamAttrs(ArgNo); + } + /// Try to intersect the attributes from 'this' CallBase and the /// 'Other' CallBase. Sets the intersected attributes to 'this' and /// return true if successful. Doesn't modify 'this' and returns diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index 182c5808f8ca1f..5a6fb5064b3166 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -7040,11 +7040,12 @@ Error BitcodeReader::materialize(GlobalValue *GV) { // Remove incompatible attributes on function calls. if (auto *CI = dyn_cast(&I)) { CI->removeRetAttrs(AttributeFuncs::typeIncompatible( - CI->getFunctionType()->getReturnType())); + CI->getFunctionType()->getReturnType(), CI->getRetAttributes())); for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ++ArgNo) CI->removeParamAttrs(ArgNo, AttributeFuncs::typeIncompatible( - CI->getArgOperand(ArgNo)->getType())); + CI->getArgOperand(ArgNo)->getType(), + CI->getParamAttributes(ArgNo))); } } diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp index c2fba49692c774..223c917766a457 100644 --- a/llvm/lib/IR/Attributes.cpp +++ b/llvm/lib/IR/Attributes.cpp @@ -2300,7 +2300,7 @@ bool AttributeFuncs::isNoFPClassCompatibleType(Type *Ty) { } /// Which attributes cannot be applied to a type. -AttributeMask AttributeFuncs::typeIncompatible(Type *Ty, +AttributeMask AttributeFuncs::typeIncompatible(Type *Ty, AttributeSet AS, AttributeSafetyKind ASK) { AttributeMask Incompatible; @@ -2316,6 +2316,11 @@ AttributeMask AttributeFuncs::typeIncompatible(Type *Ty, // Attributes that only apply to integers or vector of integers. if (ASK & ASK_SAFE_TO_DROP) Incompatible.addAttribute(Attribute::Range); + } else { + Attribute RangeAttr = AS.getAttribute(Attribute::Range); + if (RangeAttr.isValid() && + RangeAttr.getRange().getBitWidth() != Ty->getScalarSizeInBits()) + Incompatible.addAttribute(Attribute::Range); } if (!Ty->isPointerTy()) { diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 3aceb5227bb389..bb03c9290e4cf4 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -5378,9 +5378,11 @@ void llvm::UpgradeFunctionAttributes(Function &F) { } // Remove all incompatibile attributes from function. - F.removeRetAttrs(AttributeFuncs::typeIncompatible(F.getReturnType())); + F.removeRetAttrs(AttributeFuncs::typeIncompatible( + F.getReturnType(), F.getAttributes().getRetAttrs())); for (auto &Arg : F.args()) - Arg.removeAttrs(AttributeFuncs::typeIncompatible(Arg.getType())); + Arg.removeAttrs( + AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes())); // Older versions of LLVM treated an "implicit-section-name" attribute // similarly to directly setting the section on a Function. diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp index 09b90713b9c793..889295956dbf7c 100644 --- a/llvm/lib/IR/Function.cpp +++ b/llvm/lib/IR/Function.cpp @@ -359,6 +359,10 @@ Attribute Argument::getAttribute(Attribute::AttrKind Kind) const { return getParent()->getParamAttribute(getArgNo(), Kind); } +AttributeSet Argument::getAttributes() const { + return getParent()->getAttributes().getParamAttrs(getArgNo()); +} + //===----------------------------------------------------------------------===// // Helper Methods in Function //===----------------------------------------------------------------------===// diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 0412b93798b94d..f34fe7594c8602 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -2012,7 +2012,7 @@ void Verifier::verifyParameterAttrs(AttributeSet Attrs, Type *Ty, Attrs.hasAttribute(Attribute::ReadOnly)), "Attributes writable and readonly are incompatible!", V); - AttributeMask IncompatibleAttrs = AttributeFuncs::typeIncompatible(Ty); + AttributeMask IncompatibleAttrs = AttributeFuncs::typeIncompatible(Ty, Attrs); for (Attribute Attr : Attrs) { if (!Attr.isStringAttribute() && IncompatibleAttrs.contains(Attr.getKindAsEnum())) { diff --git a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp index eb553ae4eb80ff..26d8ce77d9a9b2 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULibCalls.cpp @@ -788,7 +788,8 @@ bool AMDGPULibCalls::fold(CallInst *CI) { B.CreateFPToSI(FPOp->getOperand(1), PownType->getParamType(1)); // Have to drop any nofpclass attributes on the original call site. Call->removeParamAttrs( - 1, AttributeFuncs::typeIncompatible(CastedArg->getType())); + 1, AttributeFuncs::typeIncompatible(CastedArg->getType(), + Call->getParamAttributes(1))); Call->setCalledFunction(PownFunc); Call->setArgOperand(1, CastedArg); return fold_pow(FPOp, B, PownInfo) || true; diff --git a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp index d1548592b1ce26..ed93b4491c50e4 100644 --- a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -857,9 +857,10 @@ bool DeadArgumentEliminationPass::removeDeadStuffFromFunction(Function *F) { // here. Currently, this should not be possible, but special handling might be // required when new return value attributes are added. if (NRetTy->isVoidTy()) - RAttrs.remove(AttributeFuncs::typeIncompatible(NRetTy)); + RAttrs.remove(AttributeFuncs::typeIncompatible(NRetTy, PAL.getRetAttrs())); else - assert(!RAttrs.overlaps(AttributeFuncs::typeIncompatible(NRetTy)) && + assert(!RAttrs.overlaps( + AttributeFuncs::typeIncompatible(NRetTy, PAL.getRetAttrs())) && "Return attributes no longer compatible?"); AttributeSet RetAttrs = AttributeSet::get(F->getContext(), RAttrs); @@ -903,7 +904,8 @@ bool DeadArgumentEliminationPass::removeDeadStuffFromFunction(Function *F) { // Adjust the call return attributes in case the function was changed to // return void. AttrBuilder RAttrs(F->getContext(), CallPAL.getRetAttrs()); - RAttrs.remove(AttributeFuncs::typeIncompatible(NRetTy)); + RAttrs.remove( + AttributeFuncs::typeIncompatible(NRetTy, CallPAL.getRetAttrs())); AttributeSet RetAttrs = AttributeSet::get(F->getContext(), RAttrs); // Declare these outside of the loops, so we can reuse them for the second diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 37841e91821c33..07b9405b941d68 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -4151,7 +4151,8 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) { if (!CallerPAL.isEmpty() && !Caller->use_empty()) { AttrBuilder RAttrs(FT->getContext(), CallerPAL.getRetAttrs()); - if (RAttrs.overlaps(AttributeFuncs::typeIncompatible(NewRetTy))) + if (RAttrs.overlaps(AttributeFuncs::typeIncompatible( + NewRetTy, CallerPAL.getRetAttrs()))) return false; // Attribute not compatible with transformed value. } @@ -4197,7 +4198,8 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) { // Check if there are any incompatible attributes we cannot drop safely. if (AttrBuilder(FT->getContext(), CallerPAL.getParamAttrs(i)) .overlaps(AttributeFuncs::typeIncompatible( - ParamTy, AttributeFuncs::ASK_UNSAFE_TO_DROP))) + ParamTy, CallerPAL.getParamAttrs(i), + AttributeFuncs::ASK_UNSAFE_TO_DROP))) return false; // Attribute not compatible with transformed value. if (Call.isInAllocaArgument(i) || @@ -4235,7 +4237,8 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) { // If the return value is not being used, the type may not be compatible // with the existing attributes. Wipe out any problematic attributes. - RAttrs.remove(AttributeFuncs::typeIncompatible(NewRetTy)); + RAttrs.remove( + AttributeFuncs::typeIncompatible(NewRetTy, CallerPAL.getRetAttrs())); LLVMContext &Ctx = Call.getContext(); AI = Call.arg_begin(); @@ -4250,7 +4253,7 @@ bool InstCombinerImpl::transformConstExprCastCall(CallBase &Call) { // Add any parameter attributes except the ones incompatible with the new // type. Note that we made sure all incompatible ones are safe to drop. AttributeMask IncompatibleAttrs = AttributeFuncs::typeIncompatible( - ParamTy, AttributeFuncs::ASK_SAFE_TO_DROP); + ParamTy, CallerPAL.getParamAttrs(i), AttributeFuncs::ASK_SAFE_TO_DROP); ArgAttrs.push_back( CallerPAL.getParamAttrs(i).removeAttributes(Ctx, IncompatibleAttrs)); } diff --git a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp index 577647cac3f58c..e226727e64d350 100644 --- a/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp @@ -1305,8 +1305,8 @@ DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName, Function *NewF = Function::Create(NewFT, NewFLink, F->getAddressSpace(), NewFName, F->getParent()); NewF->copyAttributesFrom(F); - NewF->removeRetAttrs( - AttributeFuncs::typeIncompatible(NewFT->getReturnType())); + NewF->removeRetAttrs(AttributeFuncs::typeIncompatible( + NewFT->getReturnType(), NewF->getAttributes().getRetAttrs())); BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF); if (F->isVarArg()) { diff --git a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp index 4ead5cdcf29c01..17cba2e642a19a 100644 --- a/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp +++ b/llvm/lib/Transforms/Utils/CallPromotionUtils.cpp @@ -529,7 +529,8 @@ CallBase &llvm::promoteCall(CallBase &CB, Function *Callee, // Remove any incompatible attributes for the argument. AttrBuilder ArgAttrs(Ctx, CallerPAL.getParamAttrs(ArgNo)); - ArgAttrs.remove(AttributeFuncs::typeIncompatible(FormalTy)); + ArgAttrs.remove(AttributeFuncs::typeIncompatible( + FormalTy, CallerPAL.getParamAttrs(ArgNo))); // We may have a different byval/inalloca type. if (ArgAttrs.getByValType()) @@ -549,7 +550,8 @@ CallBase &llvm::promoteCall(CallBase &CB, Function *Callee, AttrBuilder RAttrs(Ctx, CallerPAL.getRetAttrs()); if (!CallSiteRetTy->isVoidTy() && CallSiteRetTy != CalleeRetTy) { createRetBitCast(CB, CallSiteRetTy, RetBitCast); - RAttrs.remove(AttributeFuncs::typeIncompatible(CalleeRetTy)); + RAttrs.remove( + AttributeFuncs::typeIncompatible(CalleeRetTy, CallerPAL.getRetAttrs())); AttributeChanged = true; } diff --git a/llvm/lib/Transforms/Utils/CloneFunction.cpp b/llvm/lib/Transforms/Utils/CloneFunction.cpp index c6ba85bd9e57d4..5dc82a8dfb2dbe 100644 --- a/llvm/lib/Transforms/Utils/CloneFunction.cpp +++ b/llvm/lib/Transforms/Utils/CloneFunction.cpp @@ -819,9 +819,9 @@ void llvm::CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc, // Drop all incompatible return attributes that cannot be applied to NewFunc // during cloning, so as to allow instruction simplification to reason on the // old state of the function. The original attributes are restored later. - AttributeMask IncompatibleAttrs = - AttributeFuncs::typeIncompatible(OldFunc->getReturnType()); AttributeList Attrs = NewFunc->getAttributes(); + AttributeMask IncompatibleAttrs = AttributeFuncs::typeIncompatible( + OldFunc->getReturnType(), Attrs.getRetAttrs()); NewFunc->removeRetAttrs(IncompatibleAttrs); // As phi-nodes have been now remapped, allow incremental simplification of diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index 13eb588e46de80..a0a93dc0dab5fc 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -3057,8 +3057,8 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, else Builder.CreateRet(NewDeoptCall); // Since the ret type is changed, remove the incompatible attributes. - NewDeoptCall->removeRetAttrs( - AttributeFuncs::typeIncompatible(NewDeoptCall->getType())); + NewDeoptCall->removeRetAttrs(AttributeFuncs::typeIncompatible( + NewDeoptCall->getType(), NewDeoptCall->getRetAttributes())); } // Leave behind the normal returns so we can merge control flow. diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp index cb4ef87de1c523..79e91ad097cf00 100644 --- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -342,7 +342,13 @@ static Value *copyFlags(const CallInst &Old, Value *New) { static Value *mergeAttributesAndFlags(CallInst *NewCI, const CallInst &Old) { NewCI->setAttributes(AttributeList::get( NewCI->getContext(), {NewCI->getAttributes(), Old.getAttributes()})); - NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible(NewCI->getType())); + NewCI->removeRetAttrs(AttributeFuncs::typeIncompatible( + NewCI->getType(), NewCI->getRetAttributes())); + for (unsigned I = 0; I < NewCI->arg_size(); ++I) + NewCI->removeParamAttrs( + I, AttributeFuncs::typeIncompatible(NewCI->getArgOperand(I)->getType(), + NewCI->getParamAttributes(I))); + return copyFlags(Old, NewCI); } diff --git a/llvm/test/Transforms/InstCombine/simplify-libcalls.ll b/llvm/test/Transforms/InstCombine/simplify-libcalls.ll index bb2728a103ec6c..c4ae7e7e16bcf4 100644 --- a/llvm/test/Transforms/InstCombine/simplify-libcalls.ll +++ b/llvm/test/Transforms/InstCombine/simplify-libcalls.ll @@ -342,5 +342,17 @@ define signext i32 @emit_stpncpy() { ret i32 0 } +define void @simplify_memset_chk_pr112633(ptr %p, i32 %conv) { +; CHECK-LABEL: @simplify_memset_chk_pr112633( +; CHECK-NEXT: [[CALL_I:%.*]] = tail call ptr @__memset_chk(ptr [[P:%.*]], i32 range(i32 0, 123) [[CONV:%.*]], i64 1, i64 1) +; CHECK-NEXT: ret void +; + %call.i = tail call ptr @__memset_chk(ptr %p, i32 range(i32 0, 123) %conv, i64 1, i64 1) + ret void +} + +declare ptr @__memset_chk(ptr, i32, i64, i64) + + attributes #0 = { nobuiltin } attributes #1 = { builtin } diff --git a/llvm/test/Verifier/range-attr.ll b/llvm/test/Verifier/range-attr.ll index f985ab696eacba..91412369b0b20d 100644 --- a/llvm/test/Verifier/range-attr.ll +++ b/llvm/test/Verifier/range-attr.ll @@ -1,12 +1,12 @@ ; RUN: not llvm-as %s -o /dev/null 2>&1 | FileCheck %s -; CHECK: Range bit width must match type bit width! +; CHECK: Attribute 'range(i8 1, 0)' applied to incompatible type! ; CHECK-NEXT: ptr @bit_widths_do_not_match define void @bit_widths_do_not_match(i32 range(i8 1, 0) %a) { ret void } -; CHECK: Range bit width must match type bit width! +; CHECK: Attribute 'range(i8 1, 0)' applied to incompatible type! ; CHECK-NEXT: ptr @bit_widths_do_not_match_vector define void @bit_widths_do_not_match_vector(<4 x i32> range(i8 1, 0) %a) { ret void From 76f377618532fe486d1fff1250598a73c55f4310 Mon Sep 17 00:00:00 2001 From: David Sherwood Date: Thu, 17 Oct 2024 16:50:59 +0100 Subject: [PATCH 02/24] [NFC][LoopVectorize] Restructure simple early exit tests (#112721) The previous simple_early_exit.ll was growing too large and difficult to manage. Instead I've decided to refactor the tests by splitting out into notional groups: 1. single_early_exit.ll: loops with a single uncountable exit that do not have live-outs from the loop. 2. single_early_exit_live_outs.ll: loops with a single uncountable exit with live-outs. 3. multi_early_exit.ll: loops with multiple early exits, i.e. a mixture of countable and uncountable exits, but with no live-outs from the loop. 4. multi_early_exit_live_outs.ll: as above, but with live-outs. 5. single_early_exit_unsafe_ptrs.ll: loops with a single uncountable exit, but with pointers that are not unconditionally dereferenceable. 6. unsupported_early_exit.ll: loops with uncountable exits that we cannot yet vectorise. 7. early_exit_legality.ll: tests the debug output from LoopVectorizationLegality to make sure we handle different scenarios correctly. Only the last test now requires asserts. Over time some of these tests should start vectorising as more support is added. I also tried to rename the multi early exit tests to make it clear there what mixture of countable and uncountable exits are present. --- .../LoopVectorize/early_exit_legality.ll | 542 +++++ .../LoopVectorize/multi_early_exit.ll | 122 + .../multi_early_exit_live_outs.ll | 128 + .../LoopVectorize/simple_early_exit.ll | 2118 ----------------- .../LoopVectorize/single_early_exit.ll | 220 ++ .../single_early_exit_live_outs.ll | 997 ++++++++ .../single_early_exit_unsafe_ptrs.ll | 143 ++ .../LoopVectorize/unsupported_early_exit.ll | 494 ++++ 8 files changed, 2646 insertions(+), 2118 deletions(-) create mode 100644 llvm/test/Transforms/LoopVectorize/early_exit_legality.ll create mode 100644 llvm/test/Transforms/LoopVectorize/multi_early_exit.ll create mode 100644 llvm/test/Transforms/LoopVectorize/multi_early_exit_live_outs.ll delete mode 100644 llvm/test/Transforms/LoopVectorize/simple_early_exit.ll create mode 100644 llvm/test/Transforms/LoopVectorize/single_early_exit.ll create mode 100644 llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll create mode 100644 llvm/test/Transforms/LoopVectorize/single_early_exit_unsafe_ptrs.ll create mode 100644 llvm/test/Transforms/LoopVectorize/unsupported_early_exit.ll diff --git a/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll b/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll new file mode 100644 index 00000000000000..21433477c1d7a3 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/early_exit_legality.ll @@ -0,0 +1,542 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; REQUIRES: asserts +; RUN: opt -S < %s -p loop-vectorize -debug-only=loop-vectorize -disable-output 2>&1 | FileCheck %s + +declare void @init_mem(ptr, i64); + +; == SOME LEGAL EXAMPLES == + +; The form of the induction variables requires SCEV predicates. +define i32 @diff_exit_block_needs_scev_check(i32 %end) { +; CHECK-LABEL: LV: Checking a loop in 'diff_exit_block_needs_scev_check' +; CHECK: Found an early exit loop with symbolic max backedge taken count: (-1 + (1 umax (zext i10 (trunc i32 %end to i10) to i32))) +; CHECK-NEXT: LV: We can vectorize this loop! +; CHECK-NEXT: LV: Not vectorizing: Auto-vectorization of loops with uncountable early exit is not yet supported. +entry: + %p1 = alloca [1024 x i32] + %p2 = alloca [1024 x i32] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + %end.clamped = and i32 %end, 1023 + br label %for.body + +for.body: + %ind = phi i8 [ %ind.next, %for.inc ], [ 0, %entry ] + %gep.ind = phi i64 [ %gep.ind.next, %for.inc ], [ 0, %entry ] + %arrayidx1 = getelementptr inbounds i32, ptr %p1, i64 %gep.ind + %0 = load i32, ptr %arrayidx1, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %p2, i64 %gep.ind + %1 = load i32, ptr %arrayidx2, align 4 + %cmp.early = icmp eq i32 %0, %1 + br i1 %cmp.early, label %found, label %for.inc + +for.inc: + %ind.next = add i8 %ind, 1 + %conv = zext i8 %ind.next to i32 + %gep.ind.next = add i64 %gep.ind, 1 + %cmp = icmp ult i32 %conv, %end.clamped + br i1 %cmp, label %for.body, label %exit + +found: + ret i32 1 + +exit: + ret i32 0 +} + + +define i64 @same_exit_block_pre_inc_use1() { +; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1' +; CHECK: LV: Found an early exit loop with symbolic max backedge taken count: 63 +; CHECK-NEXT: LV: We can vectorize this loop! +; CHECK-NEXT: LV: Not vectorizing: Auto-vectorization of loops with uncountable early exit is not yet supported. +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @loop_contains_safe_call() { +; CHECK-LABEL: LV: Checking a loop in 'loop_contains_safe_call' +; CHECK: LV: Found an early exit loop with symbolic max backedge taken count: 63 +; CHECK-NEXT: LV: We can vectorize this loop! +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds float, ptr %p1, i64 %index + %ld1 = load float, ptr %arrayidx, align 1 + %sqrt = tail call fast float @llvm.sqrt.f32(float %ld1) + %cmp = fcmp fast ult float %sqrt, 3.0e+00 + br i1 %cmp, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @loop_contains_safe_div() { +; CHECK-LABEL: LV: Checking a loop in 'loop_contains_safe_div' +; CHECK: LV: Found an early exit loop with symbolic max backedge taken count: 63 +; CHECK-NEXT: LV: We can vectorize this loop! +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i32, ptr %p1, i64 %index + %ld1 = load i32, ptr %arrayidx, align 1 + %div = udiv i32 %ld1, 20000 + %cmp = icmp eq i32 %div, 1 + br i1 %cmp, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align(8) %p2) { +; CHECK-LABEL: LV: Checking a loop in 'loop_contains_load_after_early_exit' +; CHECK: LV: Found an early exit loop with symbolic max backedge taken count: 63 +; CHECK-NEXT: LV: We can vectorize this loop! +; CHECK-NEXT: LV: Not vectorizing: Auto-vectorization of loops with uncountable early exit is not yet supported. +entry: + %p1 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i32, ptr %p1, i64 %index + %ld1 = load i32, ptr %arrayidx, align 1 + %cmp = icmp eq i32 %ld1, 1 + br i1 %cmp, label %loop.inc, label %loop.end + +loop.inc: + %arrayidx2 = getelementptr inbounds i64, ptr %p2, i64 %index + %ld2 = load i64, ptr %arrayidx2, align 8 + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ %ld2, %loop.inc ] + ret i64 %retval +} + + +; == SOME ILLEGAL EXAMPLES == + + +define i64 @same_exit_block_pre_inc_use1_too_small_allocas() { +; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1_too_small_allocas' +; CHECK: LV: Not vectorizing: Loop may fault. +entry: + %p1 = alloca [42 x i8] + %p2 = alloca [42 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_pre_inc_use1_too_small_deref_ptrs(ptr dereferenceable(42) %p1, ptr dereferenceable(42) %p2) { +; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1_too_small_deref_ptrs' +; CHECK: LV: Not vectorizing: Loop may fault. +entry: + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_pre_inc_use1_unknown_ptrs(ptr %p1, ptr %p2) { +; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1_unknown_ptrs' +; CHECK: LV: Not vectorizing: Loop may fault. +entry: + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +; The early exit (i.e. unknown exit-not-taken count) is the latch - we don't +; support this yet. +define i64 @uncountable_exit_on_last_block() { +; CHECK-LABEL: LV: Checking a loop in 'uncountable_exit_on_last_block' +; CHECK: LV: Not vectorizing: Early exit is not the latch predecessor. +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %search ], [ 3, %entry ] + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %search, label %loop.end + +search: + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.end, label %loop + +loop.end: + %retval = phi i64 [ 64, %loop ], [ %index, %search ] + ret i64 %retval +} + + +; We don't currently support multiple uncountable early exits. +define i64 @multiple_uncountable_exits() { +; CHECK-LABEL: LV: Checking a loop in 'multiple_uncountable_exits' +; CHECK: LV: Not vectorizing: Loop has too many uncountable exits. +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %search1 + +search1: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp1 = icmp eq i8 %ld1, %ld2 + br i1 %cmp1, label %loop.end, label %search2 + +search2: + %cmp2 = icmp ult i8 %ld1, 34 + br i1 %cmp2, label %loop.end, label %loop.inc + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %search1, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %search1 ], [ 100, %search2 ], [ 43, %loop.inc ] + ret i64 %retval +} + + +define i64 @uncountable_exit_infinite_loop() { +; CHECK-LABEL: LV: Checking a loop in 'uncountable_exit_infinite_loop' +; CHECK: LV: Not vectorizing: Cannot determine exact exit count for latch block. +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br label %loop + +loop.end: + %retval = phi i64 [ %index, %loop ] + ret i64 %retval +} + + +define i64 @loop_contains_unsafe_call() { +; CHECK-LABEL: LV: Checking a loop in 'loop_contains_unsafe_call' +; CHECK: LV: Not vectorizing: Early exit loop contains operations that cannot be speculatively executed. +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i32, ptr %p1, i64 %index + %ld1 = load i32, ptr %arrayidx, align 1 + %bad_call = call i32 @foo(i32 %ld1) #0 + %cmp = icmp eq i32 %bad_call, 34 + br i1 %cmp, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @loop_contains_unsafe_div() { +; CHECK-LABEL: LV: Checking a loop in 'loop_contains_unsafe_div' +; CHECK: LV: Not vectorizing: Early exit loop contains operations that cannot be speculatively executed. +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i32, ptr %arrayidx, align 1 + %div = udiv i32 20000, %ld1 + %cmp = icmp eq i32 %div, 1 + br i1 %cmp, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @loop_contains_store(ptr %dest) { +; CHECK-LABEL: LV: Checking a loop in 'loop_contains_store' +; CHECK: LV: Not vectorizing: Writes to memory unsupported in early exit loops +entry: + %p1 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i32, ptr %p1, i64 %index + %ld1 = load i32, ptr %arrayidx, align 1 + %arrayidx2 = getelementptr inbounds i32, ptr %dest, i64 %index + store i32 %ld1, ptr %arrayidx2, align 4 + %cmp = icmp eq i32 %ld1, 1 + br i1 %cmp, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @uncountable_exit_in_conditional_block(ptr %mask) { +; CHECK-LABEL: LV: Checking a loop in 'uncountable_exit_in_conditional_block' +; CHECK: LV: Not vectorizing: Early exit is not the latch predecessor. +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx1 = getelementptr inbounds i8, ptr %mask, i64 %index + %ld1 = load i8, ptr %arrayidx1, align 1 + %cmp1 = icmp ne i8 %ld1, 0 + br i1 %cmp1, label %loop.search, label %loop.inc + +loop.search: + %arrayidx2 = getelementptr inbounds i8, ptr %p1, i64 %index + %ld2 = load i8, ptr %arrayidx2, align 1 + %arrayidx3 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld3 = load i8, ptr %arrayidx3, align 1 + %cmp2 = icmp eq i8 %ld2, %ld3 + br i1 %cmp2, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop.search ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_pre_inc_use1_with_reduction() { +; CHECK-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1_with_reduction' +; CHECK: LV: Not vectorizing: Found reductions or recurrences in early-exit loop. +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %red = phi i64 [ %red.next, %loop.inc ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %ld2.zext = zext i8 %ld2 to i64 + %red.next = add i64 %red, %ld2.zext + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %final.ind = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + %retval = add i64 %red.next, %final.ind + ret i64 %retval +} + + +define i64 @uncountable_exit_has_multiple_outside_successors() { +; CHECK-LABEL: LV: Checking a loop in 'uncountable_exit_has_multiple_outside_successors' +; CHECK: LV: Not vectorizing: Loop contains an unsupported switch +entry: + %p1 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + switch i8 %ld1, label %loop.inc [ + i8 2, label %loop.end + i8 3, label %loop.surprise + ] + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.surprise: + ret i64 3 + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +declare i32 @foo(i32) readonly +declare @foo_vec() + +attributes #0 = { "vector-function-abi-variant"="_ZGVsNxv_foo(foo_vec)" } diff --git a/llvm/test/Transforms/LoopVectorize/multi_early_exit.ll b/llvm/test/Transforms/LoopVectorize/multi_early_exit.ll new file mode 100644 index 00000000000000..94af5b7c7607da --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/multi_early_exit.ll @@ -0,0 +1,122 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S < %s -p loop-vectorize | FileCheck %s + +declare void @init_mem(ptr, i64); + +define i64 @one_uncountable_two_countable_same_exit_phi_of_consts() { +; CHECK-LABEL: define i64 @one_uncountable_two_countable_same_exit_phi_of_consts() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i64 [[INDEX]], 64 +; CHECK-NEXT: br i1 [[CMP1]], label [[SEARCH:%.*]], label [[LOOP_END:%.*]] +; CHECK: search: +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_END]], label [[LOOP_INC]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 128 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 0, [[LOOP]] ], [ 1, [[SEARCH]] ], [ 0, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %cmp1 = icmp ne i64 %index, 64 + br i1 %cmp1, label %search, label %loop.end + +search: + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.end, label %loop.inc + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 128 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ 0, %loop ], [ 1, %search ], [ 0, %loop.inc ] + ret i64 %retval +} + + +define i64 @one_uncountable_two_countable_diff_exit_no_phis() { +; CHECK-LABEL: define i64 @one_uncountable_two_countable_diff_exit_no_phis() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i64 [[INDEX]], 64 +; CHECK-NEXT: br i1 [[CMP1]], label [[SEARCH:%.*]], label [[LOOP_END:%.*]] +; CHECK: search: +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_END_EARLY:%.*]], label [[LOOP_INC]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 128 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end.early: +; CHECK-NEXT: ret i64 1 +; CHECK: loop.end: +; CHECK-NEXT: ret i64 0 +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %cmp1 = icmp ne i64 %index, 64 + br i1 %cmp1, label %search, label %loop.end + +search: + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.end.early, label %loop.inc + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 128 + br i1 %exitcond, label %loop, label %loop.end + +loop.end.early: + ret i64 1 + +loop.end: + ret i64 0 +} diff --git a/llvm/test/Transforms/LoopVectorize/multi_early_exit_live_outs.ll b/llvm/test/Transforms/LoopVectorize/multi_early_exit_live_outs.ll new file mode 100644 index 00000000000000..7759c10032e9bd --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/multi_early_exit_live_outs.ll @@ -0,0 +1,128 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S < %s -p loop-vectorize | FileCheck %s + +declare void @init_mem(ptr, i64); + +; There are multiple exit blocks - two of them have an exact representation for the +; exit-not-taken counts and the other is unknown, i.e. the "early exit". +define i64 @one_uncountable_two_countable_same_exit() { +; CHECK-LABEL: define i64 @one_uncountable_two_countable_same_exit() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i64 [[INDEX]], 64 +; CHECK-NEXT: br i1 [[CMP1]], label [[SEARCH:%.*]], label [[LOOP_END:%.*]] +; CHECK: search: +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_END]], label [[LOOP_INC]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 128 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 64, [[LOOP]] ], [ [[INDEX]], [[SEARCH]] ], [ 128, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %cmp1 = icmp ne i64 %index, 64 + br i1 %cmp1, label %search, label %loop.end + +search: + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.end, label %loop.inc + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 128 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ 64, %loop ], [ %index, %search ], [ 128, %loop.inc ] + ret i64 %retval +} + + +define i64 @one_uncountable_two_countable_diff_exit() { +; CHECK-LABEL: define i64 @one_uncountable_two_countable_diff_exit() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i64 [[INDEX]], 64 +; CHECK-NEXT: br i1 [[CMP1]], label [[SEARCH:%.*]], label [[LOOP_END:%.*]] +; CHECK: search: +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_END_EARLY:%.*]], label [[LOOP_INC]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 128 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end.early: +; CHECK-NEXT: [[RET_EARLY:%.*]] = phi i64 [ [[INDEX]], [[SEARCH]] ] +; CHECK-NEXT: ret i64 [[RET_EARLY]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 64, [[LOOP]] ], [ 128, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %cmp1 = icmp ne i64 %index, 64 + br i1 %cmp1, label %search, label %loop.end + +search: + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.end.early, label %loop.inc + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 128 + br i1 %exitcond, label %loop, label %loop.end + +loop.end.early: + %ret.early = phi i64 [ %index, %search ] + ret i64 %ret.early + +loop.end: + %retval = phi i64 [ 64, %loop ], [ 128, %loop.inc ] + ret i64 %retval +} diff --git a/llvm/test/Transforms/LoopVectorize/simple_early_exit.ll b/llvm/test/Transforms/LoopVectorize/simple_early_exit.ll deleted file mode 100644 index d5e4f4d016c6e2..00000000000000 --- a/llvm/test/Transforms/LoopVectorize/simple_early_exit.ll +++ /dev/null @@ -1,2118 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 -; REQUIRES: asserts -; RUN: opt -S < %s -p loop-vectorize -debug-only=loop-vectorize 2>%t | FileCheck %s --check-prefixes=CHECK -; RUN: cat %t | FileCheck %s --check-prefix=DEBUG - -declare void @init_mem(ptr, i64); - -define i64 @same_exit_block_pre_inc_use1() { -; DEBUG-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1' -; DEBUG: LV: Found an early exit loop with symbolic max backedge taken count: 63 -; DEBUG-NEXT: LV: We can vectorize this loop! -; DEBUG-NEXT: LV: Not vectorizing: Auto-vectorization of loops with uncountable early exit is not yet supported. -; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1() { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) -; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) -; CHECK-NEXT: br label [[LAND_RHS:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP38:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP39:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[TMP38]], [[TMP39]] -; CHECK-NEXT: br i1 [[CMP3]], label [[FOR_INC]], label [[FOR_END_LOOPEXIT:%.*]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LAND_RHS]], label [[FOR_END_LOOPEXIT]] -; CHECK: loop.end: -; CHECK-NEXT: [[START_0_LCSSA:%.*]] = phi i64 [ [[INDEX]], [[LAND_RHS]] ], [ 67, [[FOR_INC]] ] -; CHECK-NEXT: ret i64 [[START_0_LCSSA]] -; -entry: - %p1 = alloca [1024 x i8] - %p2 = alloca [1024 x i8] - call void @init_mem(ptr %p1, i64 1024) - call void @init_mem(ptr %p2, i64 1024) - br label %loop - -loop: - %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] - %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index - %ld1 = load i8, ptr %arrayidx, align 1 - %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index - %ld2 = load i8, ptr %arrayidx1, align 1 - %cmp3 = icmp eq i8 %ld1, %ld2 - br i1 %cmp3, label %loop.inc, label %loop.end - -loop.inc: - %index.next = add i64 %index, 1 - %exitcond = icmp ne i64 %index.next, 67 - br i1 %exitcond, label %loop, label %loop.end - -loop.end: - %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] - ret i64 %retval -} - - -define i64 @same_exit_block_pre_inc_use1_gep_two_indices() { -; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_gep_two_indices() { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) -; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i8], ptr [[P1]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [1024 x i8], ptr [[P2]], i64 0, i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] -; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] -; CHECK-NEXT: ret i64 [[RETVAL]] -; -entry: - %p1 = alloca [1024 x i8] - %p2 = alloca [1024 x i8] - call void @init_mem(ptr %p1, i64 1024) - call void @init_mem(ptr %p2, i64 1024) - br label %loop - -loop: - %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] - %arrayidx = getelementptr inbounds [1024 x i8], ptr %p1, i64 0, i64 %index - %ld1 = load i8, ptr %arrayidx, align 1 - %arrayidx1 = getelementptr inbounds [1024 x i8], ptr %p2, i64 0, i64 %index - %ld2 = load i8, ptr %arrayidx1, align 1 - %cmp3 = icmp eq i8 %ld1, %ld2 - br i1 %cmp3, label %loop.inc, label %loop.end - -loop.inc: - %index.next = add i64 %index, 1 - %exitcond = icmp ne i64 %index.next, 67 - br i1 %exitcond, label %loop, label %loop.end - -loop.end: - %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] - ret i64 %retval -} - - -define i64 @same_exit_block_pre_inc_use1_alloca_diff_type() { -; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_alloca_diff_type() { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P1:%.*]] = alloca [40 x i32], align 4 -; CHECK-NEXT: [[P2:%.*]] = alloca [40 x i32], align 4 -; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) -; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) -; CHECK-NEXT: br label [[LAND_RHS:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP38:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP39:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[TMP38]], [[TMP39]] -; CHECK-NEXT: br i1 [[CMP3]], label [[FOR_INC]], label [[FOR_END_LOOPEXIT:%.*]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LAND_RHS]], label [[FOR_END_LOOPEXIT]] -; CHECK: loop.end: -; CHECK-NEXT: [[START_0_LCSSA:%.*]] = phi i64 [ [[INDEX]], [[LAND_RHS]] ], [ 67, [[FOR_INC]] ] -; CHECK-NEXT: ret i64 [[START_0_LCSSA]] -; -entry: - %p1 = alloca [40 x i32] - %p2 = alloca [40 x i32] - call void @init_mem(ptr %p1, i64 1024) - call void @init_mem(ptr %p2, i64 1024) - br label %loop - -loop: - %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] - %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index - %ld1 = load i8, ptr %arrayidx, align 1 - %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index - %ld2 = load i8, ptr %arrayidx1, align 1 - %cmp3 = icmp eq i8 %ld1, %ld2 - br i1 %cmp3, label %loop.inc, label %loop.end - -loop.inc: - %index.next = add i64 %index, 1 - %exitcond = icmp ne i64 %index.next, 67 - br i1 %exitcond, label %loop, label %loop.end - -loop.end: - %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] - ret i64 %retval -} - - -define i64 @same_exit_block_pre_inc_use2() { -; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use2() { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) -; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] -; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 67, [[LOOP]] ], [ [[INDEX]], [[LOOP_INC]] ] -; CHECK-NEXT: ret i64 [[RETVAL]] -; -entry: - %p1 = alloca [1024 x i8] - %p2 = alloca [1024 x i8] - call void @init_mem(ptr %p1, i64 1024) - call void @init_mem(ptr %p2, i64 1024) - br label %loop - -loop: - %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] - %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index - %ld1 = load i8, ptr %arrayidx, align 1 - %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index - %ld2 = load i8, ptr %arrayidx1, align 1 - %cmp3 = icmp eq i8 %ld1, %ld2 - br i1 %cmp3, label %loop.inc, label %loop.end - -loop.inc: - %index.next = add i64 %index, 1 - %exitcond = icmp ne i64 %index.next, 67 - br i1 %exitcond, label %loop, label %loop.end - -loop.end: - %retval = phi i64 [ 67, %loop ], [ %index, %loop.inc ] - ret i64 %retval -} - -define i64 @same_exit_block_pre_inc_use3() { -; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use3() { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) -; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] -; CHECK: loop.end: -; CHECK-NEXT: [[INDEX_LCSSA:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ], [ [[INDEX]], [[LOOP]] ] -; CHECK-NEXT: ret i64 [[INDEX_LCSSA]] -; -entry: - %p1 = alloca [1024 x i8] - %p2 = alloca [1024 x i8] - call void @init_mem(ptr %p1, i64 1024) - call void @init_mem(ptr %p2, i64 1024) - br label %loop - -loop: - %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] - %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index - %ld1 = load i8, ptr %arrayidx, align 1 - %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index - %ld2 = load i8, ptr %arrayidx1, align 1 - %cmp3 = icmp eq i8 %ld1, %ld2 - br i1 %cmp3, label %loop.inc, label %loop.end - -loop.inc: - %index.next = add i64 %index, 1 - %exitcond = icmp ne i64 %index.next, 67 - br i1 %exitcond, label %loop, label %loop.end - -loop.end: - ret i64 %index -} - - -; In this example the early exit block appears in the list of ExitNotTaken -; SCEVs, but is not computable. -define i64 @same_exit_block_pre_inc_use4() { -; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use4() { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i64], align 8 -; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i64], align 8 -; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) -; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i64, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp ult i64 [[INDEX]], [[LD1]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] -; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] -; CHECK-NEXT: ret i64 [[RETVAL]] -; -entry: - %p1 = alloca [1024 x i64] - %p2 = alloca [1024 x i64] - call void @init_mem(ptr %p1, i64 1024) - call void @init_mem(ptr %p2, i64 1024) - br label %loop - -loop: - %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] - %arrayidx = getelementptr inbounds i64, ptr %p1, i64 %index - %ld1 = load i64, ptr %arrayidx, align 1 - %cmp3 = icmp ult i64 %index, %ld1 - br i1 %cmp3, label %loop.inc, label %loop.end - -loop.inc: - %index.next = add i64 %index, 1 - %exitcond = icmp ne i64 %index.next, 67 - br i1 %exitcond, label %loop, label %loop.end - -loop.end: - %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] - ret i64 %retval -} - - -define i64 @same_exit_block_post_inc_use() { -; CHECK-LABEL: define i64 @same_exit_block_post_inc_use() { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) -; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] -; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ [[INDEX_NEXT]], [[LOOP_INC]] ] -; CHECK-NEXT: ret i64 [[RETVAL]] -; -entry: - %p1 = alloca [1024 x i8] - %p2 = alloca [1024 x i8] - call void @init_mem(ptr %p1, i64 1024) - call void @init_mem(ptr %p2, i64 1024) - br label %loop - -loop: - %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] - %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index - %ld1 = load i8, ptr %arrayidx, align 1 - %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index - %ld2 = load i8, ptr %arrayidx1, align 1 - %cmp3 = icmp eq i8 %ld1, %ld2 - br i1 %cmp3, label %loop.inc, label %loop.end - -loop.inc: - %index.next = add i64 %index, 1 - %exitcond = icmp ne i64 %index.next, 67 - br i1 %exitcond, label %loop, label %loop.end - -loop.end: - %retval = phi i64 [ %index, %loop ], [ %index.next, %loop.inc ] - ret i64 %retval -} - -define i64 @same_exit_block_post_inc_use2() { -; CHECK-LABEL: define i64 @same_exit_block_post_inc_use2() { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) -; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] -; CHECK: loop.inc: -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] -; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX_NEXT]], [[LOOP]] ], [ [[INDEX]], [[LOOP_INC]] ] -; CHECK-NEXT: ret i64 [[RETVAL]] -; -entry: - %p1 = alloca [1024 x i8] - %p2 = alloca [1024 x i8] - call void @init_mem(ptr %p1, i64 1024) - call void @init_mem(ptr %p2, i64 1024) - br label %loop - -loop: - %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] - %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index - %ld1 = load i8, ptr %arrayidx, align 1 - %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index - %ld2 = load i8, ptr %arrayidx1, align 1 - %index.next = add i64 %index, 1 - %cmp3 = icmp eq i8 %ld1, %ld2 - br i1 %cmp3, label %loop.inc, label %loop.end - -loop.inc: - %exitcond = icmp ne i64 %index.next, 67 - br i1 %exitcond, label %loop, label %loop.end - -loop.end: - %retval = phi i64 [ %index.next, %loop ], [ %index, %loop.inc ] - ret i64 %retval -} - -define i64 @same_exit_block_phi_of_consts() { -; CHECK-LABEL: define i64 @same_exit_block_phi_of_consts() { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) -; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] -; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 0, [[LOOP]] ], [ 1, [[LOOP_INC]] ] -; CHECK-NEXT: ret i64 [[RETVAL]] -; -entry: - %p1 = alloca [1024 x i8] - %p2 = alloca [1024 x i8] - call void @init_mem(ptr %p1, i64 1024) - call void @init_mem(ptr %p2, i64 1024) - br label %loop - -loop: - %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] - %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index - %ld1 = load i8, ptr %arrayidx, align 1 - %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index - %ld2 = load i8, ptr %arrayidx1, align 1 - %cmp3 = icmp eq i8 %ld1, %ld2 - br i1 %cmp3, label %loop.inc, label %loop.end - -loop.inc: - %index.next = add i64 %index, 1 - %exitcond = icmp ne i64 %index.next, 67 - br i1 %exitcond, label %loop, label %loop.end - -loop.end: - %retval = phi i64 [ 0, %loop ], [ 1, %loop.inc ] - ret i64 %retval -} - - -define i64 @diff_exit_block_pre_inc_use1() { -; CHECK-LABEL: define i64 @diff_exit_block_pre_inc_use1() { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) -; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT:%.*]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END:%.*]] -; CHECK: loop.early.exit: -; CHECK-NEXT: [[RETVAL1:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ] -; CHECK-NEXT: ret i64 [[RETVAL1]] -; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL2:%.*]] = phi i64 [ 67, [[LOOP_INC]] ] -; CHECK-NEXT: ret i64 [[RETVAL2]] -; -entry: - %p1 = alloca [1024 x i8] - %p2 = alloca [1024 x i8] - call void @init_mem(ptr %p1, i64 1024) - call void @init_mem(ptr %p2, i64 1024) - br label %loop - -loop: - %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] - %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index - %ld1 = load i8, ptr %arrayidx, align 1 - %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index - %ld2 = load i8, ptr %arrayidx1, align 1 - %cmp3 = icmp eq i8 %ld1, %ld2 - br i1 %cmp3, label %loop.inc, label %loop.early.exit - -loop.inc: - %index.next = add i64 %index, 1 - %exitcond = icmp ne i64 %index.next, 67 - br i1 %exitcond, label %loop, label %loop.end - -loop.early.exit: - %retval1 = phi i64 [ %index, %loop ] - ret i64 %retval1 - -loop.end: - %retval2 = phi i64 [ 67, %loop.inc ] - ret i64 %retval2 -} - -define i64 @diff_exit_block_pre_inc_use2() { -; CHECK-LABEL: define i64 @diff_exit_block_pre_inc_use2() { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) -; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT:%.*]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END:%.*]] -; CHECK: loop.early.exit: -; CHECK-NEXT: [[RETVAL1:%.*]] = phi i64 [ 67, [[LOOP]] ] -; CHECK-NEXT: ret i64 [[RETVAL1]] -; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL2:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ] -; CHECK-NEXT: ret i64 [[RETVAL2]] -; -entry: - %p1 = alloca [1024 x i8] - %p2 = alloca [1024 x i8] - call void @init_mem(ptr %p1, i64 1024) - call void @init_mem(ptr %p2, i64 1024) - br label %loop - -loop: - %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] - %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index - %ld1 = load i8, ptr %arrayidx, align 1 - %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index - %ld2 = load i8, ptr %arrayidx1, align 1 - %cmp3 = icmp eq i8 %ld1, %ld2 - br i1 %cmp3, label %loop.inc, label %loop.early.exit - -loop.inc: - %index.next = add i64 %index, 1 - %exitcond = icmp ne i64 %index.next, 67 - br i1 %exitcond, label %loop, label %loop.end - -loop.early.exit: - %retval1 = phi i64 [ 67, %loop ] - ret i64 %retval1 - -loop.end: - %retval2 = phi i64 [ %index, %loop.inc ] - ret i64 %retval2 -} - -define i64 @diff_exit_block_pre_inc_use3() { -; CHECK-LABEL: define i64 @diff_exit_block_pre_inc_use3() { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) -; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT:%.*]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END:%.*]] -; CHECK: loop.early.exit: -; CHECK-NEXT: [[INDEX_LCSSA:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ] -; CHECK-NEXT: ret i64 [[INDEX_LCSSA]] -; CHECK: loop.end: -; CHECK-NEXT: [[INDEX_LCSSA1:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ] -; CHECK-NEXT: ret i64 [[INDEX_LCSSA1]] -; -entry: - %p1 = alloca [1024 x i8] - %p2 = alloca [1024 x i8] - call void @init_mem(ptr %p1, i64 1024) - call void @init_mem(ptr %p2, i64 1024) - br label %loop - -loop: - %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] - %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index - %ld1 = load i8, ptr %arrayidx, align 1 - %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index - %ld2 = load i8, ptr %arrayidx1, align 1 - %cmp3 = icmp eq i8 %ld1, %ld2 - br i1 %cmp3, label %loop.inc, label %loop.early.exit - -loop.inc: - %index.next = add i64 %index, 1 - %exitcond = icmp ne i64 %index.next, 67 - br i1 %exitcond, label %loop, label %loop.end - -loop.early.exit: - ret i64 %index - -loop.end: - ret i64 %index -} - - -define i64 @diff_exit_block_phi_of_consts() { -; CHECK-LABEL: define i64 @diff_exit_block_phi_of_consts() { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) -; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT:%.*]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END:%.*]] -; CHECK: loop.early.exit: -; CHECK-NEXT: ret i64 0 -; CHECK: loop.end: -; CHECK-NEXT: ret i64 1 -; -entry: - %p1 = alloca [1024 x i8] - %p2 = alloca [1024 x i8] - call void @init_mem(ptr %p1, i64 1024) - call void @init_mem(ptr %p2, i64 1024) - br label %loop - -loop: - %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] - %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index - %ld1 = load i8, ptr %arrayidx, align 1 - %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index - %ld2 = load i8, ptr %arrayidx1, align 1 - %cmp3 = icmp eq i8 %ld1, %ld2 - br i1 %cmp3, label %loop.inc, label %loop.early.exit - -loop.inc: - %index.next = add i64 %index, 1 - %exitcond = icmp ne i64 %index.next, 67 - br i1 %exitcond, label %loop, label %loop.end - -loop.early.exit: - ret i64 0 - -loop.end: - ret i64 1 -} - - -define i64 @diff_exit_block_post_inc_use1() { -; CHECK-LABEL: define i64 @diff_exit_block_post_inc_use1() { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) -; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT:%.*]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END:%.*]] -; CHECK: loop.early.exit: -; CHECK-NEXT: [[RETVAL1:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ] -; CHECK-NEXT: ret i64 [[RETVAL1]] -; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL2:%.*]] = phi i64 [ [[INDEX_NEXT]], [[LOOP_INC]] ] -; CHECK-NEXT: ret i64 [[RETVAL2]] -; -entry: - %p1 = alloca [1024 x i8] - %p2 = alloca [1024 x i8] - call void @init_mem(ptr %p1, i64 1024) - call void @init_mem(ptr %p2, i64 1024) - br label %loop - -loop: - %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] - %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index - %ld1 = load i8, ptr %arrayidx, align 1 - %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index - %ld2 = load i8, ptr %arrayidx1, align 1 - %cmp3 = icmp eq i8 %ld1, %ld2 - br i1 %cmp3, label %loop.inc, label %loop.early.exit - -loop.inc: - %index.next = add i64 %index, 1 - %exitcond = icmp ne i64 %index.next, 67 - br i1 %exitcond, label %loop, label %loop.end - -loop.early.exit: - %retval1 = phi i64 [ %index, %loop ] - ret i64 %retval1 - -loop.end: - %retval2 = phi i64 [ %index.next, %loop.inc ] - ret i64 %retval2 -} - - -define i64 @diff_exit_block_post_inc_use2() { -; CHECK-LABEL: define i64 @diff_exit_block_post_inc_use2() { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) -; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT:%.*]] -; CHECK: loop.inc: -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END:%.*]] -; CHECK: loop.early.exit: -; CHECK-NEXT: [[RETVAL1:%.*]] = phi i64 [ [[INDEX_NEXT]], [[LOOP]] ] -; CHECK-NEXT: ret i64 [[RETVAL1]] -; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL2:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ] -; CHECK-NEXT: ret i64 [[RETVAL2]] -; -entry: - %p1 = alloca [1024 x i8] - %p2 = alloca [1024 x i8] - call void @init_mem(ptr %p1, i64 1024) - call void @init_mem(ptr %p2, i64 1024) - br label %loop - -loop: - %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] - %index.next = add i64 %index, 1 - %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index - %ld1 = load i8, ptr %arrayidx, align 1 - %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index - %ld2 = load i8, ptr %arrayidx1, align 1 - %cmp3 = icmp eq i8 %ld1, %ld2 - br i1 %cmp3, label %loop.inc, label %loop.early.exit - -loop.inc: - %exitcond = icmp ne i64 %index.next, 67 - br i1 %exitcond, label %loop, label %loop.end - -loop.early.exit: - %retval1 = phi i64 [ %index.next, %loop ] - ret i64 %retval1 - -loop.end: - %retval2 = phi i64 [ %index, %loop.inc ] - ret i64 %retval2 -} - - -; The early exit (i.e. unknown exit-not-taken count) is the latch - we don't -; support this yet. -define i64 @early_exit_on_last_block() { -; DEBUG-LABEL: LV: Checking a loop in 'early_exit_on_last_block' -; DEBUG: LV: Not vectorizing: Early exit is not the latch predecessor. -; CHECK-LABEL: define i64 @early_exit_on_last_block() { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) -; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) -; CHECK-NEXT: br label [[LAND_RHS:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[SEARCH:%.*]] ], [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[CMP1]], label [[SEARCH]], label [[FOR_END_LOOPEXIT:%.*]] -; CHECK: search: -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP41:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP42:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[TMP41]], [[TMP42]] -; CHECK-NEXT: br i1 [[CMP3]], label [[FOR_END_LOOPEXIT]], label [[LAND_RHS]] -; CHECK: loop.end: -; CHECK-NEXT: [[START_0_LCSSA:%.*]] = phi i64 [ 64, [[LAND_RHS]] ], [ [[INDEX]], [[SEARCH]] ] -; CHECK-NEXT: ret i64 [[START_0_LCSSA]] -; -entry: - %p1 = alloca [1024 x i8] - %p2 = alloca [1024 x i8] - call void @init_mem(ptr %p1, i64 1024) - call void @init_mem(ptr %p2, i64 1024) - br label %loop - -loop: - %index = phi i64 [ %index.next, %search ], [ 3, %entry ] - %index.next = add i64 %index, 1 - %exitcond = icmp ne i64 %index.next, 67 - br i1 %exitcond, label %search, label %loop.end - -search: - %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index - %ld1 = load i8, ptr %arrayidx, align 1 - %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index - %ld2 = load i8, ptr %arrayidx1, align 1 - %cmp3 = icmp eq i8 %ld1, %ld2 - br i1 %cmp3, label %loop.end, label %loop - -loop.end: - %retval = phi i64 [ 64, %loop ], [ %index, %search ] - ret i64 %retval -} - - -; There are multiple exit blocks - two of them have an exact representation for the -; exit-not-taken counts and the other is unknown, i.e. the "early exit". -define i64 @multiple_exiting_one_early_same_exit() { -; CHECK-LABEL: define i64 @multiple_exiting_one_early_same_exit() { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) -; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i64 [[INDEX]], 64 -; CHECK-NEXT: br i1 [[CMP1]], label [[SEARCH:%.*]], label [[LOOP_END:%.*]] -; CHECK: search: -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_END]], label [[LOOP_INC]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 128 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] -; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 64, [[LOOP]] ], [ [[INDEX]], [[SEARCH]] ], [ 128, [[LOOP_INC]] ] -; CHECK-NEXT: ret i64 [[RETVAL]] -; -entry: - %p1 = alloca [1024 x i8] - %p2 = alloca [1024 x i8] - call void @init_mem(ptr %p1, i64 1024) - call void @init_mem(ptr %p2, i64 1024) - br label %loop - -loop: - %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] - %cmp1 = icmp ne i64 %index, 64 - br i1 %cmp1, label %search, label %loop.end - -search: - %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index - %ld1 = load i8, ptr %arrayidx, align 1 - %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index - %ld2 = load i8, ptr %arrayidx1, align 1 - %cmp3 = icmp eq i8 %ld1, %ld2 - br i1 %cmp3, label %loop.end, label %loop.inc - -loop.inc: - %index.next = add i64 %index, 1 - %exitcond = icmp ne i64 %index.next, 128 - br i1 %exitcond, label %loop, label %loop.end - -loop.end: - %retval = phi i64 [ 64, %loop ], [ %index, %search ], [ 128, %loop.inc ] - ret i64 %retval -} - - -define i64 @multiple_exiting_one_early_same_exit_phi_of_consts() { -; CHECK-LABEL: define i64 @multiple_exiting_one_early_same_exit_phi_of_consts() { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) -; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i64 [[INDEX]], 64 -; CHECK-NEXT: br i1 [[CMP1]], label [[SEARCH:%.*]], label [[LOOP_END:%.*]] -; CHECK: search: -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_END]], label [[LOOP_INC]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 128 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] -; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 0, [[LOOP]] ], [ 1, [[SEARCH]] ], [ 0, [[LOOP_INC]] ] -; CHECK-NEXT: ret i64 [[RETVAL]] -; -entry: - %p1 = alloca [1024 x i8] - %p2 = alloca [1024 x i8] - call void @init_mem(ptr %p1, i64 1024) - call void @init_mem(ptr %p2, i64 1024) - br label %loop - -loop: - %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] - %cmp1 = icmp ne i64 %index, 64 - br i1 %cmp1, label %search, label %loop.end - -search: - %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index - %ld1 = load i8, ptr %arrayidx, align 1 - %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index - %ld2 = load i8, ptr %arrayidx1, align 1 - %cmp3 = icmp eq i8 %ld1, %ld2 - br i1 %cmp3, label %loop.end, label %loop.inc - -loop.inc: - %index.next = add i64 %index, 1 - %exitcond = icmp ne i64 %index.next, 128 - br i1 %exitcond, label %loop, label %loop.end - -loop.end: - %retval = phi i64 [ 0, %loop ], [ 1, %search ], [ 0, %loop.inc ] - ret i64 %retval -} - - -define i64 @multiple_exiting_one_early_diff_exit() { -; CHECK-LABEL: define i64 @multiple_exiting_one_early_diff_exit() { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) -; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i64 [[INDEX]], 64 -; CHECK-NEXT: br i1 [[CMP1]], label [[SEARCH:%.*]], label [[LOOP_END:%.*]] -; CHECK: search: -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_END_EARLY:%.*]], label [[LOOP_INC]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 128 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] -; CHECK: loop.end.early: -; CHECK-NEXT: [[RET_EARLY:%.*]] = phi i64 [ [[INDEX]], [[SEARCH]] ] -; CHECK-NEXT: ret i64 [[RET_EARLY]] -; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 64, [[LOOP]] ], [ 128, [[LOOP_INC]] ] -; CHECK-NEXT: ret i64 [[RETVAL]] -; -entry: - %p1 = alloca [1024 x i8] - %p2 = alloca [1024 x i8] - call void @init_mem(ptr %p1, i64 1024) - call void @init_mem(ptr %p2, i64 1024) - br label %loop - -loop: - %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] - %cmp1 = icmp ne i64 %index, 64 - br i1 %cmp1, label %search, label %loop.end - -search: - %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index - %ld1 = load i8, ptr %arrayidx, align 1 - %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index - %ld2 = load i8, ptr %arrayidx1, align 1 - %cmp3 = icmp eq i8 %ld1, %ld2 - br i1 %cmp3, label %loop.end.early, label %loop.inc - -loop.inc: - %index.next = add i64 %index, 1 - %exitcond = icmp ne i64 %index.next, 128 - br i1 %exitcond, label %loop, label %loop.end - -loop.end.early: - %ret.early = phi i64 [ %index, %search ] - ret i64 %ret.early - -loop.end: - %retval = phi i64 [ 64, %loop ], [ 128, %loop.inc ] - ret i64 %retval -} - -define i64 @multiple_exiting_one_early_diff_exit_no_phis() { -; CHECK-LABEL: define i64 @multiple_exiting_one_early_diff_exit_no_phis() { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) -; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i64 [[INDEX]], 64 -; CHECK-NEXT: br i1 [[CMP1]], label [[SEARCH:%.*]], label [[LOOP_END:%.*]] -; CHECK: search: -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_END_EARLY:%.*]], label [[LOOP_INC]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 128 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] -; CHECK: loop.end.early: -; CHECK-NEXT: ret i64 1 -; CHECK: loop.end: -; CHECK-NEXT: ret i64 0 -; -entry: - %p1 = alloca [1024 x i8] - %p2 = alloca [1024 x i8] - call void @init_mem(ptr %p1, i64 1024) - call void @init_mem(ptr %p2, i64 1024) - br label %loop - -loop: - %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] - %cmp1 = icmp ne i64 %index, 64 - br i1 %cmp1, label %search, label %loop.end - -search: - %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index - %ld1 = load i8, ptr %arrayidx, align 1 - %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index - %ld2 = load i8, ptr %arrayidx1, align 1 - %cmp3 = icmp eq i8 %ld1, %ld2 - br i1 %cmp3, label %loop.end.early, label %loop.inc - -loop.inc: - %index.next = add i64 %index, 1 - %exitcond = icmp ne i64 %index.next, 128 - br i1 %exitcond, label %loop, label %loop.end - -loop.end.early: - ret i64 1 - -loop.end: - ret i64 0 -} - - -; We don't currently support multiple early exits. -define i64 @multiple_early_exits() { -; DEBUG-LABEL: LV: Checking a loop in 'multiple_early_exits' -; DEBUG: LV: Not vectorizing: Loop has too many uncountable exits. -; CHECK-LABEL: define i64 @multiple_early_exits() { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) -; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) -; CHECK-NEXT: br label [[LAND_RHS:%.*]] -; CHECK: search1: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[FOR_INC1:%.*]] ], [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP41:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP42:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[TMP41]], [[TMP42]] -; CHECK-NEXT: br i1 [[CMP3]], label [[FOR_END_LOOPEXIT:%.*]], label [[FOR_INC:%.*]] -; CHECK: search2: -; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i8 [[TMP41]], 34 -; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_END_LOOPEXIT]], label [[FOR_INC1]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LAND_RHS]], label [[FOR_END_LOOPEXIT]] -; CHECK: loop.end: -; CHECK-NEXT: [[START_0_LCSSA:%.*]] = phi i64 [ [[INDEX]], [[LAND_RHS]] ], [ 100, [[FOR_INC]] ], [ 43, [[FOR_INC1]] ] -; CHECK-NEXT: ret i64 [[START_0_LCSSA]] -; -entry: - %p1 = alloca [1024 x i8] - %p2 = alloca [1024 x i8] - call void @init_mem(ptr %p1, i64 1024) - call void @init_mem(ptr %p2, i64 1024) - br label %search1 - -search1: - %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] - %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index - %ld1 = load i8, ptr %arrayidx, align 1 - %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index - %ld2 = load i8, ptr %arrayidx1, align 1 - %cmp1 = icmp eq i8 %ld1, %ld2 - br i1 %cmp1, label %loop.end, label %search2 - -search2: - %cmp2 = icmp ult i8 %ld1, 34 - br i1 %cmp2, label %loop.end, label %loop.inc - -loop.inc: - %index.next = add i64 %index, 1 - %exitcond = icmp ne i64 %index.next, 67 - br i1 %exitcond, label %search1, label %loop.end - -loop.end: - %retval = phi i64 [ %index, %search1 ], [ 100, %search2 ], [ 43, %loop.inc ] - ret i64 %retval -} - - -define i64 @early_exit_infinite_loop() { -; DEBUG-LABEL: LV: Checking a loop in 'early_exit_infinite_loop' -; DEBUG: LV: Not vectorizing: Cannot determine exact exit count for latch block. -; CHECK-LABEL: define i64 @early_exit_infinite_loop() { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) -; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) -; CHECK-NEXT: br label [[LAND_RHS:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP38:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP39:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[TMP38]], [[TMP39]] -; CHECK-NEXT: br i1 [[CMP3]], label [[FOR_INC]], label [[FOR_END_LOOPEXIT:%.*]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br label [[LAND_RHS]] -; CHECK: loop.end: -; CHECK-NEXT: [[START_0_LCSSA:%.*]] = phi i64 [ [[INDEX]], [[LAND_RHS]] ] -; CHECK-NEXT: ret i64 [[START_0_LCSSA]] -; -entry: - %p1 = alloca [1024 x i8] - %p2 = alloca [1024 x i8] - call void @init_mem(ptr %p1, i64 1024) - call void @init_mem(ptr %p2, i64 1024) - br label %loop - -loop: - %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] - %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index - %ld1 = load i8, ptr %arrayidx, align 1 - %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index - %ld2 = load i8, ptr %arrayidx1, align 1 - %cmp3 = icmp eq i8 %ld1, %ld2 - br i1 %cmp3, label %loop.inc, label %loop.end - -loop.inc: - %index.next = add i64 %index, 1 - %exitcond = icmp ne i64 %index.next, 67 - br label %loop - -loop.end: - %retval = phi i64 [ %index, %loop ] - ret i64 %retval -} - - -define i64 @same_exit_block_pre_inc_use_inv_cond(i1 %cond) { -; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use_inv_cond( -; CHECK-SAME: i1 [[COND:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) -; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: [[CMP4:%.*]] = select i1 [[COND]], i1 [[CMP3]], i1 false -; CHECK-NEXT: br i1 [[CMP4]], label [[LOOP_INC]], label [[LOOP_END:%.*]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] -; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] -; CHECK-NEXT: ret i64 [[RETVAL]] -; -entry: - %p1 = alloca [1024 x i8] - %p2 = alloca [1024 x i8] - call void @init_mem(ptr %p1, i64 1024) - call void @init_mem(ptr %p2, i64 1024) - br label %loop - -loop: - %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] - %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index - %ld1 = load i8, ptr %arrayidx, align 1 - %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index - %ld2 = load i8, ptr %arrayidx1, align 1 - %cmp3 = icmp eq i8 %ld1, %ld2 - %cmp4 = select i1 %cond, i1 %cmp3, i1 false - br i1 %cmp4, label %loop.inc, label %loop.end - -loop.inc: - %index.next = add i64 %index, 1 - %exitcond = icmp ne i64 %index.next, 67 - br i1 %exitcond, label %loop, label %loop.end - -loop.end: - %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] - ret i64 %retval -} - - -define i64 @loop_contains_safe_call() { -; DEBUG-LABEL: LV: Checking a loop in 'loop_contains_safe_call' -; DEBUG: LV: Found an early exit loop with symbolic max backedge taken count: 63 -; DEBUG-NEXT: LV: We can vectorize this loop! -; CHECK-LABEL: define i64 @loop_contains_safe_call() { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) -; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load float, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[SQRT:%.*]] = tail call fast float @llvm.sqrt.f32(float [[LD1]]) -; CHECK-NEXT: [[CMP:%.*]] = fcmp fast ult float [[SQRT]], 3.000000e+00 -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC]], label [[LOOP_END:%.*]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] -; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] -; CHECK-NEXT: ret i64 [[RETVAL]] -; -entry: - %p1 = alloca [1024 x i8] - %p2 = alloca [1024 x i8] - call void @init_mem(ptr %p1, i64 1024) - call void @init_mem(ptr %p2, i64 1024) - br label %loop - -loop: - %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] - %arrayidx = getelementptr inbounds float, ptr %p1, i64 %index - %ld1 = load float, ptr %arrayidx, align 1 - %sqrt = tail call fast float @llvm.sqrt.f32(float %ld1) - %cmp = fcmp fast ult float %sqrt, 3.0e+00 - br i1 %cmp, label %loop.inc, label %loop.end - -loop.inc: - %index.next = add i64 %index, 1 - %exitcond = icmp ne i64 %index.next, 67 - br i1 %exitcond, label %loop, label %loop.end - -loop.end: - %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] - ret i64 %retval -} - - -define i64 @loop_contains_unsafe_call() { -; DEBUG-LABEL: LV: Checking a loop in 'loop_contains_unsafe_call' -; DEBUG: LV: Not vectorizing: Early exit loop contains operations that cannot be speculatively executed. -; CHECK-LABEL: define i64 @loop_contains_unsafe_call() { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) -; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[BAD_CALL:%.*]] = call i32 @foo(i32 [[LD1]]) #[[ATTR2:[0-9]+]] -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[BAD_CALL]], 34 -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC]], label [[LOOP_END:%.*]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] -; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] -; CHECK-NEXT: ret i64 [[RETVAL]] -; -entry: - %p1 = alloca [1024 x i8] - %p2 = alloca [1024 x i8] - call void @init_mem(ptr %p1, i64 1024) - call void @init_mem(ptr %p2, i64 1024) - br label %loop - -loop: - %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] - %arrayidx = getelementptr inbounds i32, ptr %p1, i64 %index - %ld1 = load i32, ptr %arrayidx, align 1 - %bad_call = call i32 @foo(i32 %ld1) #0 - %cmp = icmp eq i32 %bad_call, 34 - br i1 %cmp, label %loop.inc, label %loop.end - -loop.inc: - %index.next = add i64 %index, 1 - %exitcond = icmp ne i64 %index.next, 67 - br i1 %exitcond, label %loop, label %loop.end - -loop.end: - %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] - ret i64 %retval -} - - -define i64 @loop_contains_safe_div() { -; DEBUG-LABEL: LV: Checking a loop in 'loop_contains_safe_div' -; DEBUG: LV: Found an early exit loop with symbolic max backedge taken count: 63 -; DEBUG-NEXT: LV: We can vectorize this loop! -; CHECK-LABEL: define i64 @loop_contains_safe_div() { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) -; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) -; CHECK-NEXT: br label [[LOOP1:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ [[INDEX_NEXT1:%.*]], [[LOOP_INC1:%.*]] ], [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[INDEX2]] -; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[LD1]], 20000 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[DIV]], 1 -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC1]], label [[LOOP_END:%.*]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT1]] = add i64 [[INDEX2]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT1]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]] -; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX2]], [[LOOP1]] ], [ 67, [[LOOP_INC1]] ] -; CHECK-NEXT: ret i64 [[RETVAL]] -; -entry: - %p1 = alloca [1024 x i8] - %p2 = alloca [1024 x i8] - call void @init_mem(ptr %p1, i64 1024) - call void @init_mem(ptr %p2, i64 1024) - br label %loop - -loop: - %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] - %arrayidx = getelementptr inbounds i32, ptr %p1, i64 %index - %ld1 = load i32, ptr %arrayidx, align 1 - %div = udiv i32 %ld1, 20000 - %cmp = icmp eq i32 %div, 1 - br i1 %cmp, label %loop.inc, label %loop.end - -loop.inc: - %index.next = add i64 %index, 1 - %exitcond = icmp ne i64 %index.next, 67 - br i1 %exitcond, label %loop, label %loop.end - -loop.end: - %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] - ret i64 %retval -} - - -define i64 @loop_contains_unsafe_div() { -; DEBUG-LABEL: LV: Checking a loop in 'loop_contains_unsafe_div' -; DEBUG: LV: Not vectorizing: Early exit loop contains operations that cannot be speculatively executed. -; CHECK-LABEL: define i64 @loop_contains_unsafe_div() { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) -; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[DIV:%.*]] = udiv i32 20000, [[LD1]] -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[DIV]], 1 -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC]], label [[LOOP_END:%.*]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] -; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] -; CHECK-NEXT: ret i64 [[RETVAL]] -; -entry: - %p1 = alloca [1024 x i8] - %p2 = alloca [1024 x i8] - call void @init_mem(ptr %p1, i64 1024) - call void @init_mem(ptr %p2, i64 1024) - br label %loop - -loop: - %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] - %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index - %ld1 = load i32, ptr %arrayidx, align 1 - %div = udiv i32 20000, %ld1 - %cmp = icmp eq i32 %div, 1 - br i1 %cmp, label %loop.inc, label %loop.end - -loop.inc: - %index.next = add i64 %index, 1 - %exitcond = icmp ne i64 %index.next, 67 - br i1 %exitcond, label %loop, label %loop.end - -loop.end: - %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] - ret i64 %retval -} - - -define i64 @loop_contains_store(ptr %dest) { -; DEBUG-LABEL: LV: Checking a loop in 'loop_contains_store' -; DEBUG: LV: Not vectorizing: Writes to memory unsupported in early exit loops -; CHECK-LABEL: define i64 @loop_contains_store( -; CHECK-SAME: ptr [[DEST:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 [[INDEX]] -; CHECK-NEXT: store i32 [[LD1]], ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[LD1]], 1 -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC]], label [[LOOP_END:%.*]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] -; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] -; CHECK-NEXT: ret i64 [[RETVAL]] -; -entry: - %p1 = alloca [1024 x i8] - call void @init_mem(ptr %p1, i64 1024) - br label %loop - -loop: - %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] - %arrayidx = getelementptr inbounds i32, ptr %p1, i64 %index - %ld1 = load i32, ptr %arrayidx, align 1 - %arrayidx2 = getelementptr inbounds i32, ptr %dest, i64 %index - store i32 %ld1, ptr %arrayidx2, align 4 - %cmp = icmp eq i32 %ld1, 1 - br i1 %cmp, label %loop.inc, label %loop.end - -loop.inc: - %index.next = add i64 %index, 1 - %exitcond = icmp ne i64 %index.next, 67 - br i1 %exitcond, label %loop, label %loop.end - -loop.end: - %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] - ret i64 %retval -} - - -define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align(8) %p2) { -; DEBUG-LABEL: LV: Checking a loop in 'loop_contains_load_after_early_exit' -; DEBUG: LV: Found an early exit loop with symbolic max backedge taken count: 63 -; DEBUG-NEXT: LV: We can vectorize this loop! -; DEBUG-NEXT: LV: Not vectorizing: Auto-vectorization of loops with uncountable early exit is not yet supported. -; CHECK-LABEL: define i64 @loop_contains_load_after_early_exit( -; CHECK-SAME: ptr align 8 dereferenceable(1024) [[P2:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[LD1]], 1 -; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC]], label [[LOOP_END:%.*]] -; CHECK: loop.inc: -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i64, ptr [[ARRAYIDX2]], align 8 -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] -; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ [[LD2]], [[LOOP_INC]] ] -; CHECK-NEXT: ret i64 [[RETVAL]] -; -entry: - %p1 = alloca [1024 x i8] - call void @init_mem(ptr %p1, i64 1024) - br label %loop - -loop: - %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] - %arrayidx = getelementptr inbounds i32, ptr %p1, i64 %index - %ld1 = load i32, ptr %arrayidx, align 1 - %cmp = icmp eq i32 %ld1, 1 - br i1 %cmp, label %loop.inc, label %loop.end - -loop.inc: - %arrayidx2 = getelementptr inbounds i64, ptr %p2, i64 %index - %ld2 = load i64, ptr %arrayidx2, align 8 - %index.next = add i64 %index, 1 - %exitcond = icmp ne i64 %index.next, 67 - br i1 %exitcond, label %loop, label %loop.end - -loop.end: - %retval = phi i64 [ %index, %loop ], [ %ld2, %loop.inc ] - ret i64 %retval -} - - -define i64 @early_exit_in_conditional_block(ptr %mask) { -; DEBUG-LABEL: LV: Checking a loop in 'early_exit_in_conditional_block' -; DEBUG: LV: Not vectorizing: Early exit is not the latch predecessor. -; CHECK-LABEL: define i64 @early_exit_in_conditional_block( -; CHECK-SAME: ptr [[MASK:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) -; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[MASK]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i8 [[LD1]], 0 -; CHECK-NEXT: br i1 [[CMP1]], label [[LOOP_SEARCH:%.*]], label [[LOOP_INC]] -; CHECK: loop.search: -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 -; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD3:%.*]] = load i8, ptr [[ARRAYIDX3]], align 1 -; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i8 [[LD2]], [[LD3]] -; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP_INC]], label [[LOOP_END:%.*]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] -; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP_SEARCH]] ], [ 67, [[LOOP_INC]] ] -; CHECK-NEXT: ret i64 [[RETVAL]] -; -entry: - %p1 = alloca [1024 x i8] - %p2 = alloca [1024 x i8] - call void @init_mem(ptr %p1, i64 1024) - call void @init_mem(ptr %p2, i64 1024) - br label %loop - -loop: - %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] - %arrayidx1 = getelementptr inbounds i8, ptr %mask, i64 %index - %ld1 = load i8, ptr %arrayidx1, align 1 - %cmp1 = icmp ne i8 %ld1, 0 - br i1 %cmp1, label %loop.search, label %loop.inc - -loop.search: - %arrayidx2 = getelementptr inbounds i8, ptr %p1, i64 %index - %ld2 = load i8, ptr %arrayidx2, align 1 - %arrayidx3 = getelementptr inbounds i8, ptr %p2, i64 %index - %ld3 = load i8, ptr %arrayidx3, align 1 - %cmp2 = icmp eq i8 %ld2, %ld3 - br i1 %cmp2, label %loop.inc, label %loop.end - -loop.inc: - %index.next = add i64 %index, 1 - %exitcond = icmp ne i64 %index.next, 67 - br i1 %exitcond, label %loop, label %loop.end - -loop.end: - %retval = phi i64 [ %index, %loop.search ], [ 67, %loop.inc ] - ret i64 %retval -} - - -define i64 @same_exit_block_pre_inc_use1_reverse() { -; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_reverse() { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) -; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 1023, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], -1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP_END]], label [[LOOP]] -; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 1024, [[LOOP_INC]] ] -; CHECK-NEXT: ret i64 [[RETVAL]] -; -entry: - %p1 = alloca [1024 x i8] - %p2 = alloca [1024 x i8] - call void @init_mem(ptr %p1, i64 1024) - call void @init_mem(ptr %p2, i64 1024) - br label %loop - -loop: - %index = phi i64 [ %index.next, %loop.inc ], [ 1023, %entry ] - %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index - %ld1 = load i8, ptr %arrayidx, align 1 - %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index - %ld2 = load i8, ptr %arrayidx1, align 1 - %cmp3 = icmp eq i8 %ld1, %ld2 - br i1 %cmp3, label %loop.inc, label %loop.end - -loop.inc: - %index.next = add i64 %index, -1 - %exitcond = icmp eq i64 %index.next, 0 - br i1 %exitcond, label %loop.end, label %loop - -loop.end: - %retval = phi i64 [ %index, %loop ], [ 1024, %loop.inc ] - ret i64 %retval -} - - -define i64 @same_exit_block_pre_inc_use1_with_reduction() { -; DEBUG-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1_with_reduction' -; DEBUG: LV: Not vectorizing: Found reductions or recurrences in early-exit loop. -; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_with_reduction() { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) -; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) -; CHECK-NEXT: br label [[LAND_RHS:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[RED_NEXT:%.*]], [[FOR_INC]] ], [ 0, [[ENTRY]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP38:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[TMP39:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[LD2_ZEXT:%.*]] = zext i8 [[TMP39]] to i64 -; CHECK-NEXT: [[RED_NEXT]] = add i64 [[RED]], [[LD2_ZEXT]] -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[TMP38]], [[TMP39]] -; CHECK-NEXT: br i1 [[CMP3]], label [[FOR_INC]], label [[FOR_END_LOOPEXIT:%.*]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LAND_RHS]], label [[FOR_END_LOOPEXIT]] -; CHECK: loop.end: -; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i64 [ [[RED_NEXT]], [[FOR_INC]] ], [ [[RED_NEXT]], [[LAND_RHS]] ] -; CHECK-NEXT: [[FINAL_IND:%.*]] = phi i64 [ [[INDEX]], [[LAND_RHS]] ], [ 67, [[FOR_INC]] ] -; CHECK-NEXT: [[START_0_LCSSA:%.*]] = add i64 [[RED_NEXT_LCSSA]], [[FINAL_IND]] -; CHECK-NEXT: ret i64 [[START_0_LCSSA]] -; -entry: - %p1 = alloca [1024 x i8] - %p2 = alloca [1024 x i8] - call void @init_mem(ptr %p1, i64 1024) - call void @init_mem(ptr %p2, i64 1024) - br label %loop - -loop: - %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] - %red = phi i64 [ %red.next, %loop.inc ], [ 0, %entry ] - %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index - %ld1 = load i8, ptr %arrayidx, align 1 - %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index - %ld2 = load i8, ptr %arrayidx1, align 1 - %ld2.zext = zext i8 %ld2 to i64 - %red.next = add i64 %red, %ld2.zext - %cmp3 = icmp eq i8 %ld1, %ld2 - br i1 %cmp3, label %loop.inc, label %loop.end - -loop.inc: - %index.next = add i64 %index, 1 - %exitcond = icmp ne i64 %index.next, 67 - br i1 %exitcond, label %loop, label %loop.end - -loop.end: - %final.ind = phi i64 [ %index, %loop ], [ 67, %loop.inc ] - %retval = add i64 %red.next, %final.ind - ret i64 %retval -} - - -define i64 @same_exit_block_pre_inc_use1_deref_ptrs(ptr dereferenceable(1024) %p1, ptr dereferenceable(1024) %p2) { -; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_deref_ptrs( -; CHECK-SAME: ptr dereferenceable(1024) [[P1:%.*]], ptr dereferenceable(1024) [[P2:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] -; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] -; CHECK-NEXT: ret i64 [[RETVAL]] -; -entry: - br label %loop - -loop: - %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] - %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index - %ld1 = load i8, ptr %arrayidx, align 1 - %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index - %ld2 = load i8, ptr %arrayidx1, align 1 - %cmp3 = icmp eq i8 %ld1, %ld2 - br i1 %cmp3, label %loop.inc, label %loop.end - -loop.inc: - %index.next = add i64 %index, 1 - %exitcond = icmp ne i64 %index.next, 67 - br i1 %exitcond, label %loop, label %loop.end - -loop.end: - %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] - ret i64 %retval -} - - -; The form of the induction variables requires SCEV predicates. -define i32 @diff_exit_block_needs_scev_check(i32 %end) { -; DEBUG-LABEL: LV: Checking a loop in 'diff_exit_block_needs_scev_check' -; DEBUG: Found an early exit loop with symbolic max backedge taken count: (-1 + (1 umax (zext i10 (trunc i32 %end to i10) to i32))) -; DEBUG-NEXT: LV: We can vectorize this loop! -; DEBUG-NEXT: LV: Not vectorizing: Auto-vectorization of loops with uncountable early exit is not yet supported. -; CHECK-LABEL: define i32 @diff_exit_block_needs_scev_check( -; CHECK-SAME: i32 [[END:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i32], align 4 -; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i32], align 4 -; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) -; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) -; CHECK-NEXT: [[END_CLAMPED:%.*]] = and i32 [[END]], 1023 -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IND:%.*]] = phi i8 [ [[IND_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[GEP_IND:%.*]] = phi i64 [ [[GEP_IND_NEXT:%.*]], [[FOR_INC]] ], [ 0, [[ENTRY]] ] -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[GEP_IND]] -; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4 -; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[P2]], i64 [[GEP_IND]] -; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 -; CHECK-NEXT: [[CMP_EARLY:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]] -; CHECK-NEXT: br i1 [[CMP_EARLY]], label [[FOUND:%.*]], label [[FOR_INC]] -; CHECK: for.inc: -; CHECK-NEXT: [[IND_NEXT]] = add i8 [[IND]], 1 -; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[IND_NEXT]] to i32 -; CHECK-NEXT: [[GEP_IND_NEXT]] = add i64 [[GEP_IND]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[CONV]], [[END_CLAMPED]] -; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[EXIT:%.*]] -; CHECK: found: -; CHECK-NEXT: ret i32 1 -; CHECK: exit: -; CHECK-NEXT: ret i32 0 -; -entry: - %p1 = alloca [1024 x i32] - %p2 = alloca [1024 x i32] - call void @init_mem(ptr %p1, i64 1024) - call void @init_mem(ptr %p2, i64 1024) - %end.clamped = and i32 %end, 1023 - br label %for.body - -for.body: - %ind = phi i8 [ %ind.next, %for.inc ], [ 0, %entry ] - %gep.ind = phi i64 [ %gep.ind.next, %for.inc ], [ 0, %entry ] - %arrayidx1 = getelementptr inbounds i32, ptr %p1, i64 %gep.ind - %0 = load i32, ptr %arrayidx1, align 4 - %arrayidx2 = getelementptr inbounds i32, ptr %p2, i64 %gep.ind - %1 = load i32, ptr %arrayidx2, align 4 - %cmp.early = icmp eq i32 %0, %1 - br i1 %cmp.early, label %found, label %for.inc - -for.inc: - %ind.next = add i8 %ind, 1 - %conv = zext i8 %ind.next to i32 - %gep.ind.next = add i64 %gep.ind, 1 - %cmp = icmp ult i32 %conv, %end.clamped - br i1 %cmp, label %for.body, label %exit - -found: - ret i32 1 - -exit: - ret i32 0 -} - - -declare void @abort() - -; This is a variant of an early exit loop where the condition for leaving -; early is loop invariant. -define i32 @diff_blocks_invariant_early_exit_cond(ptr %s) { -; DEBUG-LABEL: LV: Checking a loop in 'diff_blocks_invariant_early_exit_cond' -; DEBUG: LV: Found an early exit loop with symbolic max backedge taken count: 275 -; DEBUG: LV: Not vectorizing: Auto-vectorization of loops with uncountable early exit is not yet supported. -; CHECK-LABEL: define i32 @diff_blocks_invariant_early_exit_cond( -; CHECK-SAME: ptr [[S:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[SVAL:%.*]] = load i32, ptr [[S]], align 4 -; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[SVAL]], 0 -; CHECK-NEXT: br label [[FOR_BODY:%.*]] -; CHECK: for.body: -; CHECK-NEXT: [[IND:%.*]] = phi i32 [ -10, [[ENTRY:%.*]] ], [ [[IND_NEXT:%.*]], [[FOR_INC:%.*]] ] -; CHECK-NEXT: br i1 [[COND]], label [[FOR_INC]], label [[EARLY_EXIT:%.*]] -; CHECK: for.inc: -; CHECK-NEXT: [[IND_NEXT]] = add nsw i32 [[IND]], 1 -; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IND_NEXT]], 266 -; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] -; CHECK: early.exit: -; CHECK-NEXT: tail call void @abort() -; CHECK-NEXT: unreachable -; CHECK: for.end: -; CHECK-NEXT: ret i32 0 -; -entry: - %sval = load i32, ptr %s, align 4 - %cond = icmp eq i32 %sval, 0 - br label %for.body - -for.body: - %ind = phi i32 [ -10, %entry ], [ %ind.next, %for.inc ] - br i1 %cond, label %for.inc, label %early.exit - -for.inc: - %ind.next = add nsw i32 %ind, 1 - %exitcond.not = icmp eq i32 %ind.next, 266 - br i1 %exitcond.not, label %for.end, label %for.body - -early.exit: - tail call void @abort() - unreachable - -for.end: - ret i32 0 -} - - -define i64 @early_exit_has_multiple_outside_successors() { -; DEBUG-LABEL: LV: Checking a loop in 'early_exit_has_multiple_outside_successors' -; DEBUG: LV: Not vectorizing: Loop contains an unsupported switch -; CHECK-LABEL: define i64 @early_exit_has_multiple_outside_successors() { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 -; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: switch i8 [[LD1]], label [[LOOP_INC]] [ -; CHECK-NEXT: i8 2, label [[LOOP_END:%.*]] -; CHECK-NEXT: i8 3, label [[LOOP_SURPRISE:%.*]] -; CHECK-NEXT: ] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] -; CHECK: loop.surprise: -; CHECK-NEXT: ret i64 3 -; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] -; CHECK-NEXT: ret i64 [[RETVAL]] -; -entry: - %p1 = alloca [1024 x i8] - call void @init_mem(ptr %p1, i64 1024) - br label %loop - -loop: - %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] - %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index - %ld1 = load i8, ptr %arrayidx, align 1 - switch i8 %ld1, label %loop.inc [ - i8 2, label %loop.end - i8 3, label %loop.surprise - ] - -loop.inc: - %index.next = add i64 %index, 1 - %exitcond = icmp ne i64 %index.next, 67 - br i1 %exitcond, label %loop, label %loop.end - -loop.surprise: - ret i64 3 - -loop.end: - %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] - ret i64 %retval -} - - -define i64 @same_exit_block_pre_inc_use1_too_small_allocas() { -; DEBUG-LABEL: LV: Checking a loop in 'same_exit_block_pre_inc_use1_too_small_allocas' -; DEBUG: LV: Not vectorizing: Loop may fault. -; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_too_small_allocas() { -; CHECK-NEXT: entry: -; CHECK-NEXT: [[P1:%.*]] = alloca [42 x i8], align 1 -; CHECK-NEXT: [[P2:%.*]] = alloca [42 x i8], align 1 -; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) -; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] -; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] -; CHECK-NEXT: ret i64 [[RETVAL]] -; -entry: - %p1 = alloca [42 x i8] - %p2 = alloca [42 x i8] - call void @init_mem(ptr %p1, i64 1024) - call void @init_mem(ptr %p2, i64 1024) - br label %loop - -loop: - %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] - %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index - %ld1 = load i8, ptr %arrayidx, align 1 - %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index - %ld2 = load i8, ptr %arrayidx1, align 1 - %cmp3 = icmp eq i8 %ld1, %ld2 - br i1 %cmp3, label %loop.inc, label %loop.end - -loop.inc: - %index.next = add i64 %index, 1 - %exitcond = icmp ne i64 %index.next, 67 - br i1 %exitcond, label %loop, label %loop.end - -loop.end: - %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] - ret i64 %retval -} - - -define i64 @same_exit_block_pre_inc_use1_too_small_deref_ptrs(ptr dereferenceable(42) %p1, ptr dereferenceable(42) %p2) { -; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_too_small_deref_ptrs( -; CHECK-SAME: ptr dereferenceable(42) [[P1:%.*]], ptr dereferenceable(42) [[P2:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] -; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] -; CHECK-NEXT: ret i64 [[RETVAL]] -; -entry: - br label %loop - -loop: - %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] - %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index - %ld1 = load i8, ptr %arrayidx, align 1 - %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index - %ld2 = load i8, ptr %arrayidx1, align 1 - %cmp3 = icmp eq i8 %ld1, %ld2 - br i1 %cmp3, label %loop.inc, label %loop.end - -loop.inc: - %index.next = add i64 %index, 1 - %exitcond = icmp ne i64 %index.next, 67 - br i1 %exitcond, label %loop, label %loop.end - -loop.end: - %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] - ret i64 %retval -} - - -define i64 @same_exit_block_pre_inc_use1_unknown_ptrs(ptr %p1, ptr %p2) { -; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_unknown_ptrs( -; CHECK-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) { -; CHECK-NEXT: entry: -; CHECK-NEXT: br label [[LOOP:%.*]] -; CHECK: loop: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] -; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] -; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 -; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] -; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 -; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] -; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] -; CHECK: loop.inc: -; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 -; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] -; CHECK: loop.end: -; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] -; CHECK-NEXT: ret i64 [[RETVAL]] -; -entry: - br label %loop - -loop: - %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] - %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index - %ld1 = load i8, ptr %arrayidx, align 1 - %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index - %ld2 = load i8, ptr %arrayidx1, align 1 - %cmp3 = icmp eq i8 %ld1, %ld2 - br i1 %cmp3, label %loop.inc, label %loop.end - -loop.inc: - %index.next = add i64 %index, 1 - %exitcond = icmp ne i64 %index.next, 67 - br i1 %exitcond, label %loop, label %loop.end - -loop.end: - %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] - ret i64 %retval -} - - - -declare i32 @foo(i32) readonly -declare @foo_vec() - -attributes #0 = { "vector-function-abi-variant"="_ZGVsNxv_foo(foo_vec)" } diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit.ll new file mode 100644 index 00000000000000..52f82d007de4df --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/single_early_exit.ll @@ -0,0 +1,220 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S < %s -p loop-vectorize | FileCheck %s + +declare void @init_mem(ptr, i64); + + +define i64 @same_exit_block_phi_of_consts() { +; CHECK-LABEL: define i64 @same_exit_block_phi_of_consts() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 0, [[LOOP]] ], [ 1, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ 0, %loop ], [ 1, %loop.inc ] + ret i64 %retval +} + + +define i64 @diff_exit_block_phi_of_consts() { +; CHECK-LABEL: define i64 @diff_exit_block_phi_of_consts() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END:%.*]] +; CHECK: loop.early.exit: +; CHECK-NEXT: ret i64 0 +; CHECK: loop.end: +; CHECK-NEXT: ret i64 1 +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.early.exit + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.early.exit: + ret i64 0 + +loop.end: + ret i64 1 +} + + +; The form of the induction variables requires SCEV predicates. +define i32 @diff_exit_block_needs_scev_check(i32 %end) { +; CHECK-LABEL: define i32 @diff_exit_block_needs_scev_check( +; CHECK-SAME: i32 [[END:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i32], align 4 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i32], align 4 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: [[END_CLAMPED:%.*]] = and i32 [[END]], 1023 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IND:%.*]] = phi i8 [ [[IND_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[GEP_IND:%.*]] = phi i64 [ [[GEP_IND_NEXT:%.*]], [[FOR_INC]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[GEP_IND]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX1]], align 4 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[P2]], i64 [[GEP_IND]] +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[CMP_EARLY:%.*]] = icmp eq i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: br i1 [[CMP_EARLY]], label [[FOUND:%.*]], label [[FOR_INC]] +; CHECK: for.inc: +; CHECK-NEXT: [[IND_NEXT]] = add i8 [[IND]], 1 +; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[IND_NEXT]] to i32 +; CHECK-NEXT: [[GEP_IND_NEXT]] = add i64 [[GEP_IND]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[CONV]], [[END_CLAMPED]] +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[EXIT:%.*]] +; CHECK: found: +; CHECK-NEXT: ret i32 1 +; CHECK: exit: +; CHECK-NEXT: ret i32 0 +; +entry: + %p1 = alloca [1024 x i32] + %p2 = alloca [1024 x i32] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + %end.clamped = and i32 %end, 1023 + br label %for.body + +for.body: + %ind = phi i8 [ %ind.next, %for.inc ], [ 0, %entry ] + %gep.ind = phi i64 [ %gep.ind.next, %for.inc ], [ 0, %entry ] + %arrayidx1 = getelementptr inbounds i32, ptr %p1, i64 %gep.ind + %0 = load i32, ptr %arrayidx1, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %p2, i64 %gep.ind + %1 = load i32, ptr %arrayidx2, align 4 + %cmp.early = icmp eq i32 %0, %1 + br i1 %cmp.early, label %found, label %for.inc + +for.inc: + %ind.next = add i8 %ind, 1 + %conv = zext i8 %ind.next to i32 + %gep.ind.next = add i64 %gep.ind, 1 + %cmp = icmp ult i32 %conv, %end.clamped + br i1 %cmp, label %for.body, label %exit + +found: + ret i32 1 + +exit: + ret i32 0 +} + + +declare void @abort() + +; This is a variant of an early exit loop where the condition for leaving +; early is loop invariant. +define i32 @diff_blocks_invariant_early_exit_cond(ptr %s) { +; CHECK-LABEL: define i32 @diff_blocks_invariant_early_exit_cond( +; CHECK-SAME: ptr [[S:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[SVAL:%.*]] = load i32, ptr [[S]], align 4 +; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[SVAL]], 0 +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IND:%.*]] = phi i32 [ -10, [[ENTRY:%.*]] ], [ [[IND_NEXT:%.*]], [[FOR_INC:%.*]] ] +; CHECK-NEXT: br i1 [[COND]], label [[FOR_INC]], label [[EARLY_EXIT:%.*]] +; CHECK: for.inc: +; CHECK-NEXT: [[IND_NEXT]] = add nsw i32 [[IND]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IND_NEXT]], 266 +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; CHECK: early.exit: +; CHECK-NEXT: tail call void @abort() +; CHECK-NEXT: unreachable +; CHECK: for.end: +; CHECK-NEXT: ret i32 0 +; +entry: + %sval = load i32, ptr %s, align 4 + %cond = icmp eq i32 %sval, 0 + br label %for.body + +for.body: + %ind = phi i32 [ -10, %entry ], [ %ind.next, %for.inc ] + br i1 %cond, label %for.inc, label %early.exit + +for.inc: + %ind.next = add nsw i32 %ind, 1 + %exitcond.not = icmp eq i32 %ind.next, 266 + br i1 %exitcond.not, label %for.end, label %for.body + +early.exit: + tail call void @abort() + unreachable + +for.end: + ret i32 0 +} diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll new file mode 100644 index 00000000000000..7889191c4b5bae --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/single_early_exit_live_outs.ll @@ -0,0 +1,997 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S < %s -p loop-vectorize | FileCheck %s + +declare void @init_mem(ptr, i64); + +define i64 @same_exit_block_pre_inc_use1() { +; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LAND_RHS:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP38:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP39:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[TMP38]], [[TMP39]] +; CHECK-NEXT: br i1 [[CMP3]], label [[FOR_INC]], label [[FOR_END_LOOPEXIT:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LAND_RHS]], label [[FOR_END_LOOPEXIT]] +; CHECK: loop.end: +; CHECK-NEXT: [[START_0_LCSSA:%.*]] = phi i64 [ [[INDEX]], [[LAND_RHS]] ], [ 67, [[FOR_INC]] ] +; CHECK-NEXT: ret i64 [[START_0_LCSSA]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_pre_inc1_use_inv_cond(i1 %cond) { +; CHECK-LABEL: define i64 @same_exit_block_pre_inc1_use_inv_cond( +; CHECK-SAME: i1 [[COND:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: [[CMP4:%.*]] = select i1 [[COND]], i1 [[CMP3]], i1 false +; CHECK-NEXT: br i1 [[CMP4]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + %cmp4 = select i1 %cond, i1 %cmp3, i1 false + br i1 %cmp4, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_pre_inc_use1_gep_two_indices() { +; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_gep_two_indices() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i8], ptr [[P1]], i64 0, i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [1024 x i8], ptr [[P2]], i64 0, i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds [1024 x i8], ptr %p1, i64 0, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds [1024 x i8], ptr %p2, i64 0, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_pre_inc_use1_alloca_diff_type() { +; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_alloca_diff_type() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [40 x i32], align 4 +; CHECK-NEXT: [[P2:%.*]] = alloca [40 x i32], align 4 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LAND_RHS:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP38:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP39:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[TMP38]], [[TMP39]] +; CHECK-NEXT: br i1 [[CMP3]], label [[FOR_INC]], label [[FOR_END_LOOPEXIT:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LAND_RHS]], label [[FOR_END_LOOPEXIT]] +; CHECK: loop.end: +; CHECK-NEXT: [[START_0_LCSSA:%.*]] = phi i64 [ [[INDEX]], [[LAND_RHS]] ], [ 67, [[FOR_INC]] ] +; CHECK-NEXT: ret i64 [[START_0_LCSSA]] +; +entry: + %p1 = alloca [40 x i32] + %p2 = alloca [40 x i32] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_pre_inc_use2() { +; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use2() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ 67, [[LOOP]] ], [ [[INDEX]], [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ 67, %loop ], [ %index, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_pre_inc_use3() { +; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use3() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[INDEX_LCSSA:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ], [ [[INDEX]], [[LOOP]] ] +; CHECK-NEXT: ret i64 [[INDEX_LCSSA]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + ret i64 %index +} + + +; In this example the early exit block appears in the list of ExitNotTaken +; SCEVs, but is not computable. +define i64 @same_exit_block_pre_inc_use4() { +; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use4() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i64], align 8 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i64], align 8 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i64, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp ult i64 [[INDEX]], [[LD1]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i64] + %p2 = alloca [1024 x i64] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i64, ptr %p1, i64 %index + %ld1 = load i64, ptr %arrayidx, align 1 + %cmp3 = icmp ult i64 %index, %ld1 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_post_inc_use() { +; CHECK-LABEL: define i64 @same_exit_block_post_inc_use() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ [[INDEX_NEXT]], [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ %index.next, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_post_inc_use2() { +; CHECK-LABEL: define i64 @same_exit_block_post_inc_use2() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX_NEXT]], [[LOOP]] ], [ [[INDEX]], [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %index.next = add i64 %index, 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index.next, %loop ], [ %index, %loop.inc ] + ret i64 %retval +} + + +define i64 @diff_exit_block_pre_inc_use1() { +; CHECK-LABEL: define i64 @diff_exit_block_pre_inc_use1() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END:%.*]] +; CHECK: loop.early.exit: +; CHECK-NEXT: [[RETVAL1:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ] +; CHECK-NEXT: ret i64 [[RETVAL1]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL2:%.*]] = phi i64 [ 67, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL2]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.early.exit + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.early.exit: + %retval1 = phi i64 [ %index, %loop ] + ret i64 %retval1 + +loop.end: + %retval2 = phi i64 [ 67, %loop.inc ] + ret i64 %retval2 +} + + +define i64 @diff_exit_block_pre_inc_use2() { +; CHECK-LABEL: define i64 @diff_exit_block_pre_inc_use2() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END:%.*]] +; CHECK: loop.early.exit: +; CHECK-NEXT: [[RETVAL1:%.*]] = phi i64 [ 67, [[LOOP]] ] +; CHECK-NEXT: ret i64 [[RETVAL1]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL2:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL2]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.early.exit + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.early.exit: + %retval1 = phi i64 [ 67, %loop ] + ret i64 %retval1 + +loop.end: + %retval2 = phi i64 [ %index, %loop.inc ] + ret i64 %retval2 +} + + +define i64 @diff_exit_block_pre_inc_use3() { +; CHECK-LABEL: define i64 @diff_exit_block_pre_inc_use3() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END:%.*]] +; CHECK: loop.early.exit: +; CHECK-NEXT: [[INDEX_LCSSA:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ] +; CHECK-NEXT: ret i64 [[INDEX_LCSSA]] +; CHECK: loop.end: +; CHECK-NEXT: [[INDEX_LCSSA1:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[INDEX_LCSSA1]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.early.exit + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.early.exit: + ret i64 %index + +loop.end: + ret i64 %index +} + + +define i64 @diff_exit_block_post_inc_use1() { +; CHECK-LABEL: define i64 @diff_exit_block_post_inc_use1() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END:%.*]] +; CHECK: loop.early.exit: +; CHECK-NEXT: [[RETVAL1:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ] +; CHECK-NEXT: ret i64 [[RETVAL1]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL2:%.*]] = phi i64 [ [[INDEX_NEXT]], [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL2]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.early.exit + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.early.exit: + %retval1 = phi i64 [ %index, %loop ] + ret i64 %retval1 + +loop.end: + %retval2 = phi i64 [ %index.next, %loop.inc ] + ret i64 %retval2 +} + + +define i64 @diff_exit_block_post_inc_use2() { +; CHECK-LABEL: define i64 @diff_exit_block_post_inc_use2() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_EARLY_EXIT:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END:%.*]] +; CHECK: loop.early.exit: +; CHECK-NEXT: [[RETVAL1:%.*]] = phi i64 [ [[INDEX_NEXT]], [[LOOP]] ] +; CHECK-NEXT: ret i64 [[RETVAL1]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL2:%.*]] = phi i64 [ [[INDEX]], [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL2]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %index.next = add i64 %index, 1 + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.early.exit + +loop.inc: + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.early.exit: + %retval1 = phi i64 [ %index.next, %loop ] + ret i64 %retval1 + +loop.end: + %retval2 = phi i64 [ %index, %loop.inc ] + ret i64 %retval2 +} + + +define i64 @loop_contains_safe_call() { +; CHECK-LABEL: define i64 @loop_contains_safe_call() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load float, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[SQRT:%.*]] = tail call fast float @llvm.sqrt.f32(float [[LD1]]) +; CHECK-NEXT: [[CMP:%.*]] = fcmp fast ult float [[SQRT]], 3.000000e+00 +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds float, ptr %p1, i64 %index + %ld1 = load float, ptr %arrayidx, align 1 + %sqrt = tail call fast float @llvm.sqrt.f32(float %ld1) + %cmp = fcmp fast ult float %sqrt, 3.0e+00 + br i1 %cmp, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @loop_contains_safe_div() { +; CHECK-LABEL: define i64 @loop_contains_safe_div() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP1:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ [[INDEX_NEXT1:%.*]], [[LOOP_INC1:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[INDEX2]] +; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[DIV:%.*]] = udiv i32 [[LD1]], 20000 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[DIV]], 1 +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC1]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT1]] = add i64 [[INDEX2]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT1]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP1]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX2]], [[LOOP1]] ], [ 67, [[LOOP_INC1]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i32, ptr %p1, i64 %index + %ld1 = load i32, ptr %arrayidx, align 1 + %div = udiv i32 %ld1, 20000 + %cmp = icmp eq i32 %div, 1 + br i1 %cmp, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @loop_contains_load_after_early_exit(ptr dereferenceable(1024) align(8) %p2) { +; CHECK-LABEL: define i64 @loop_contains_load_after_early_exit( +; CHECK-SAME: ptr align 8 dereferenceable(1024) [[P2:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[LD1]], 1 +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i64, ptr [[ARRAYIDX2]], align 8 +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ [[LD2]], [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i32, ptr %p1, i64 %index + %ld1 = load i32, ptr %arrayidx, align 1 + %cmp = icmp eq i32 %ld1, 1 + br i1 %cmp, label %loop.inc, label %loop.end + +loop.inc: + %arrayidx2 = getelementptr inbounds i64, ptr %p2, i64 %index + %ld2 = load i64, ptr %arrayidx2, align 8 + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ %ld2, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_pre_inc_use1_reverse() { +; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_reverse() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 1023, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], -1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP_END]], label [[LOOP]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 1024, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 1023, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, -1 + %exitcond = icmp eq i64 %index.next, 0 + br i1 %exitcond, label %loop.end, label %loop + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 1024, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_pre_inc_use1_deref_ptrs(ptr dereferenceable(1024) %p1, ptr dereferenceable(1024) %p2) { +; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_deref_ptrs( +; CHECK-SAME: ptr dereferenceable(1024) [[P1:%.*]], ptr dereferenceable(1024) [[P2:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +declare i32 @foo(i32) readonly +declare @foo_vec() + +attributes #0 = { "vector-function-abi-variant"="_ZGVsNxv_foo(foo_vec)" } diff --git a/llvm/test/Transforms/LoopVectorize/single_early_exit_unsafe_ptrs.ll b/llvm/test/Transforms/LoopVectorize/single_early_exit_unsafe_ptrs.ll new file mode 100644 index 00000000000000..c68eeac19c9ecf --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/single_early_exit_unsafe_ptrs.ll @@ -0,0 +1,143 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S < %s -p loop-vectorize | FileCheck %s + +declare void @init_mem(ptr, i64); + + +define i64 @same_exit_block_pre_inc_use1_too_small_allocas() { +; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_too_small_allocas() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [42 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [42 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [42 x i8] + %p2 = alloca [42 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_pre_inc_use1_too_small_deref_ptrs(ptr dereferenceable(42) %p1, ptr dereferenceable(42) %p2) { +; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_too_small_deref_ptrs( +; CHECK-SAME: ptr dereferenceable(42) [[P1:%.*]], ptr dereferenceable(42) [[P2:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_pre_inc_use1_unknown_ptrs(ptr %p1, ptr %p2) { +; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_unknown_ptrs( +; CHECK-SAME: ptr [[P1:%.*]], ptr [[P2:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} diff --git a/llvm/test/Transforms/LoopVectorize/unsupported_early_exit.ll b/llvm/test/Transforms/LoopVectorize/unsupported_early_exit.ll new file mode 100644 index 00000000000000..cd91d07120f9ee --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/unsupported_early_exit.ll @@ -0,0 +1,494 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S < %s -p loop-vectorize | FileCheck %s + +declare void @init_mem(ptr, i64); + + +; The early exit (i.e. unknown exit-not-taken count) is the latch - we don't +; support this yet. +define i64 @early_exit_on_last_block() { +; CHECK-LABEL: define i64 @early_exit_on_last_block() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LAND_RHS:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[SEARCH:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[CMP1]], label [[SEARCH]], label [[FOR_END_LOOPEXIT:%.*]] +; CHECK: search: +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP41:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP42:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[TMP41]], [[TMP42]] +; CHECK-NEXT: br i1 [[CMP3]], label [[FOR_END_LOOPEXIT]], label [[LAND_RHS]] +; CHECK: loop.end: +; CHECK-NEXT: [[START_0_LCSSA:%.*]] = phi i64 [ 64, [[LAND_RHS]] ], [ [[INDEX]], [[SEARCH]] ] +; CHECK-NEXT: ret i64 [[START_0_LCSSA]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %search ], [ 3, %entry ] + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %search, label %loop.end + +search: + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.end, label %loop + +loop.end: + %retval = phi i64 [ 64, %loop ], [ %index, %search ] + ret i64 %retval +} + + +; We don't currently support multiple early exits. +define i64 @multiple_uncountable_exits() { +; CHECK-LABEL: define i64 @multiple_uncountable_exits() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[SEARCH1:%.*]] +; CHECK: search1: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP1]], label [[LOOP_END:%.*]], label [[SEARCH2:%.*]] +; CHECK: search2: +; CHECK-NEXT: [[CMP2:%.*]] = icmp ult i8 [[LD1]], 34 +; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP_END]], label [[LOOP_INC]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[SEARCH1]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[SEARCH1]] ], [ 100, [[SEARCH2]] ], [ 43, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %search1 + +search1: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp1 = icmp eq i8 %ld1, %ld2 + br i1 %cmp1, label %loop.end, label %search2 + +search2: + %cmp2 = icmp ult i8 %ld1, 34 + br i1 %cmp2, label %loop.end, label %loop.inc + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %search1, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %search1 ], [ 100, %search2 ], [ 43, %loop.inc ] + ret i64 %retval +} + + +define i64 @uncountable_exit_infinite_loop() { +; CHECK-LABEL: define i64 @uncountable_exit_infinite_loop() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[LD1]], [[LD2]] +; CHECK-NEXT: br i1 [[CMP3]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br label [[LOOP]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br label %loop + +loop.end: + %retval = phi i64 [ %index, %loop ] + ret i64 %retval +} + + +define i64 @loop_contains_unsafe_call() { +; CHECK-LABEL: define i64 @loop_contains_unsafe_call() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[BAD_CALL:%.*]] = call i32 @foo(i32 [[LD1]]) #[[ATTR1:[0-9]+]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[BAD_CALL]], 34 +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i32, ptr %p1, i64 %index + %ld1 = load i32, ptr %arrayidx, align 1 + %bad_call = call i32 @foo(i32 %ld1) #0 + %cmp = icmp eq i32 %bad_call, 34 + br i1 %cmp, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @loop_contains_unsafe_div() { +; CHECK-LABEL: define i64 @loop_contains_unsafe_div() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[DIV:%.*]] = udiv i32 20000, [[LD1]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[DIV]], 1 +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i32, ptr %arrayidx, align 1 + %div = udiv i32 20000, %ld1 + %cmp = icmp eq i32 %div, 1 + br i1 %cmp, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @loop_contains_store(ptr %dest) { +; CHECK-LABEL: define i64 @loop_contains_store( +; CHECK-SAME: ptr [[DEST:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i32, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[DEST]], i64 [[INDEX]] +; CHECK-NEXT: store i32 [[LD1]], ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[LD1]], 1 +; CHECK-NEXT: br i1 [[CMP]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i32, ptr %p1, i64 %index + %ld1 = load i32, ptr %arrayidx, align 1 + %arrayidx2 = getelementptr inbounds i32, ptr %dest, i64 %index + store i32 %ld1, ptr %arrayidx2, align 4 + %cmp = icmp eq i32 %ld1, 1 + br i1 %cmp, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @uncountable_exit_in_conditional_block(ptr %mask) { +; CHECK-LABEL: define i64 @uncountable_exit_in_conditional_block( +; CHECK-SAME: ptr [[MASK:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[MASK]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i8 [[LD1]], 0 +; CHECK-NEXT: br i1 [[CMP1]], label [[LOOP_SEARCH:%.*]], label [[LOOP_INC]] +; CHECK: loop.search: +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD2:%.*]] = load i8, ptr [[ARRAYIDX2]], align 1 +; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[LD3:%.*]] = load i8, ptr [[ARRAYIDX3]], align 1 +; CHECK-NEXT: [[CMP2:%.*]] = icmp eq i8 [[LD2]], [[LD3]] +; CHECK-NEXT: br i1 [[CMP2]], label [[LOOP_INC]], label [[LOOP_END:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP_SEARCH]] ], [ 67, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx1 = getelementptr inbounds i8, ptr %mask, i64 %index + %ld1 = load i8, ptr %arrayidx1, align 1 + %cmp1 = icmp ne i8 %ld1, 0 + br i1 %cmp1, label %loop.search, label %loop.inc + +loop.search: + %arrayidx2 = getelementptr inbounds i8, ptr %p1, i64 %index + %ld2 = load i8, ptr %arrayidx2, align 1 + %arrayidx3 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld3 = load i8, ptr %arrayidx3, align 1 + %cmp2 = icmp eq i8 %ld2, %ld3 + br i1 %cmp2, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %retval = phi i64 [ %index, %loop.search ], [ 67, %loop.inc ] + ret i64 %retval +} + + +define i64 @same_exit_block_pre_inc_use1_with_reduction() { +; CHECK-LABEL: define i64 @same_exit_block_pre_inc_use1_with_reduction() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: [[P2:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: call void @init_mem(ptr [[P2]], i64 1024) +; CHECK-NEXT: br label [[LAND_RHS:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[RED:%.*]] = phi i64 [ [[RED_NEXT:%.*]], [[FOR_INC]] ], [ 0, [[ENTRY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP38:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, ptr [[P2]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP39:%.*]] = load i8, ptr [[ARRAYIDX1]], align 1 +; CHECK-NEXT: [[LD2_ZEXT:%.*]] = zext i8 [[TMP39]] to i64 +; CHECK-NEXT: [[RED_NEXT]] = add i64 [[RED]], [[LD2_ZEXT]] +; CHECK-NEXT: [[CMP3:%.*]] = icmp eq i8 [[TMP38]], [[TMP39]] +; CHECK-NEXT: br i1 [[CMP3]], label [[FOR_INC]], label [[FOR_END_LOOPEXIT:%.*]] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LAND_RHS]], label [[FOR_END_LOOPEXIT]] +; CHECK: loop.end: +; CHECK-NEXT: [[RED_NEXT_LCSSA:%.*]] = phi i64 [ [[RED_NEXT]], [[FOR_INC]] ], [ [[RED_NEXT]], [[LAND_RHS]] ] +; CHECK-NEXT: [[FINAL_IND:%.*]] = phi i64 [ [[INDEX]], [[LAND_RHS]] ], [ 67, [[FOR_INC]] ] +; CHECK-NEXT: [[START_0_LCSSA:%.*]] = add i64 [[RED_NEXT_LCSSA]], [[FINAL_IND]] +; CHECK-NEXT: ret i64 [[START_0_LCSSA]] +; +entry: + %p1 = alloca [1024 x i8] + %p2 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + call void @init_mem(ptr %p2, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %red = phi i64 [ %red.next, %loop.inc ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + %arrayidx1 = getelementptr inbounds i8, ptr %p2, i64 %index + %ld2 = load i8, ptr %arrayidx1, align 1 + %ld2.zext = zext i8 %ld2 to i64 + %red.next = add i64 %red, %ld2.zext + %cmp3 = icmp eq i8 %ld1, %ld2 + br i1 %cmp3, label %loop.inc, label %loop.end + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.end: + %final.ind = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + %retval = add i64 %red.next, %final.ind + ret i64 %retval +} + + +define i64 @uncountable_exit_has_multiple_outside_successors() { +; CHECK-LABEL: define i64 @uncountable_exit_has_multiple_outside_successors() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[P1:%.*]] = alloca [1024 x i8], align 1 +; CHECK-NEXT: call void @init_mem(ptr [[P1]], i64 1024) +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ [[INDEX_NEXT:%.*]], [[LOOP_INC:%.*]] ], [ 3, [[ENTRY:%.*]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[P1]], i64 [[INDEX]] +; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[ARRAYIDX]], align 1 +; CHECK-NEXT: switch i8 [[LD1]], label [[LOOP_INC]] [ +; CHECK-NEXT: i8 2, label [[LOOP_END:%.*]] +; CHECK-NEXT: i8 3, label [[LOOP_SURPRISE:%.*]] +; CHECK-NEXT: ] +; CHECK: loop.inc: +; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDEX_NEXT]], 67 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOP_END]] +; CHECK: loop.surprise: +; CHECK-NEXT: ret i64 3 +; CHECK: loop.end: +; CHECK-NEXT: [[RETVAL:%.*]] = phi i64 [ [[INDEX]], [[LOOP]] ], [ 67, [[LOOP_INC]] ] +; CHECK-NEXT: ret i64 [[RETVAL]] +; +entry: + %p1 = alloca [1024 x i8] + call void @init_mem(ptr %p1, i64 1024) + br label %loop + +loop: + %index = phi i64 [ %index.next, %loop.inc ], [ 3, %entry ] + %arrayidx = getelementptr inbounds i8, ptr %p1, i64 %index + %ld1 = load i8, ptr %arrayidx, align 1 + switch i8 %ld1, label %loop.inc [ + i8 2, label %loop.end + i8 3, label %loop.surprise + ] + +loop.inc: + %index.next = add i64 %index, 1 + %exitcond = icmp ne i64 %index.next, 67 + br i1 %exitcond, label %loop, label %loop.end + +loop.surprise: + ret i64 3 + +loop.end: + %retval = phi i64 [ %index, %loop ], [ 67, %loop.inc ] + ret i64 %retval +} + + +declare i32 @foo(i32) readonly +declare @foo_vec() + +attributes #0 = { "vector-function-abi-variant"="_ZGVsNxv_foo(foo_vec)" } From 256bbdb3f642c37268d6fa5dc35e01cd27a67b61 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 17 Oct 2024 16:29:56 +0100 Subject: [PATCH 03/24] [DAG] visitFCEIL/FTRUNC/FFLOOR/FNEG - use FoldConstantArithmetic to attempt to constant fold Don't rely on isConstantFPBuildVectorOrConstantFP followed by getNode() will constant fold - FoldConstantArithmetic will do all of this for us. Cleanup for #112682 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 644054361dd3ee..18439b87a83b7d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -18291,8 +18291,8 @@ SDValue DAGCombiner::visitFCEIL(SDNode *N) { EVT VT = N->getValueType(0); // fold (fceil c1) -> fceil(c1) - if (DAG.isConstantFPBuildVectorOrConstantFP(N0)) - return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0); + if (SDValue C = DAG.FoldConstantArithmetic(ISD::FCEIL, SDLoc(N), VT, {N0})) + return C; return SDValue(); } @@ -18302,8 +18302,8 @@ SDValue DAGCombiner::visitFTRUNC(SDNode *N) { EVT VT = N->getValueType(0); // fold (ftrunc c1) -> ftrunc(c1) - if (DAG.isConstantFPBuildVectorOrConstantFP(N0)) - return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0); + if (SDValue C = DAG.FoldConstantArithmetic(ISD::FTRUNC, SDLoc(N), VT, {N0})) + return C; // fold ftrunc (known rounded int x) -> x // ftrunc is a part of fptosi/fptoui expansion on some targets, so this is @@ -18336,8 +18336,8 @@ SDValue DAGCombiner::visitFFLOOR(SDNode *N) { EVT VT = N->getValueType(0); // fold (ffloor c1) -> ffloor(c1) - if (DAG.isConstantFPBuildVectorOrConstantFP(N0)) - return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0); + if (SDValue C = DAG.FoldConstantArithmetic(ISD::FFLOOR, SDLoc(N), VT, {N0})) + return C; return SDValue(); } @@ -18348,8 +18348,8 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { SelectionDAG::FlagInserter FlagsInserter(DAG, N); // Constant fold FNEG. - if (DAG.isConstantFPBuildVectorOrConstantFP(N0)) - return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0); + if (SDValue C = DAG.FoldConstantArithmetic(ISD::FNEG, SDLoc(N), VT, {N0})) + return C; if (SDValue NegN0 = TLI.getNegatedExpression(N0, DAG, LegalOperations, ForCodeSize)) From 6f21a7bdeeca84bcc7cf94878e17b5d7ee7b4083 Mon Sep 17 00:00:00 2001 From: Congcong Cai Date: Thu, 17 Oct 2024 17:55:37 +0200 Subject: [PATCH 04/24] [clang-tidy] insert ``static`` keyword in correct position for misc-use-internal-linkage (#108792) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes: #108760 --------- Co-authored-by: Danny Mösch --- .../misc/UseInternalLinkageCheck.cpp | 7 ++-- .../clang-tidy/utils/LexerUtils.cpp | 4 ++- clang-tools-extra/docs/ReleaseNotes.rst | 4 +++ .../misc/use-internal-linkage-func.cpp | 35 +++++++++++++++++++ .../misc/use-internal-linkage-var.cpp | 12 +++++++ 5 files changed, 59 insertions(+), 3 deletions(-) diff --git a/clang-tools-extra/clang-tidy/misc/UseInternalLinkageCheck.cpp b/clang-tools-extra/clang-tidy/misc/UseInternalLinkageCheck.cpp index c086e7721e02bd..d900978f65a944 100644 --- a/clang-tools-extra/clang-tidy/misc/UseInternalLinkageCheck.cpp +++ b/clang-tools-extra/clang-tidy/misc/UseInternalLinkageCheck.cpp @@ -8,12 +8,15 @@ #include "UseInternalLinkageCheck.h" #include "../utils/FileExtensionsUtils.h" +#include "../utils/LexerUtils.h" #include "clang/AST/Decl.h" #include "clang/ASTMatchers/ASTMatchFinder.h" #include "clang/ASTMatchers/ASTMatchers.h" #include "clang/ASTMatchers/ASTMatchersMacros.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/Specifiers.h" +#include "clang/Basic/TokenKinds.h" +#include "clang/Lex/Token.h" #include "llvm/ADT/STLExtras.h" using namespace clang::ast_matchers; @@ -113,7 +116,7 @@ static constexpr StringRef Message = void UseInternalLinkageCheck::check(const MatchFinder::MatchResult &Result) { if (const auto *FD = Result.Nodes.getNodeAs("fn")) { DiagnosticBuilder DB = diag(FD->getLocation(), Message) << "function" << FD; - SourceLocation FixLoc = FD->getTypeSpecStartLoc(); + const SourceLocation FixLoc = FD->getInnerLocStart(); if (FixLoc.isInvalid() || FixLoc.isMacroID()) return; if (FixMode == FixModeKind::UseStatic) @@ -128,7 +131,7 @@ void UseInternalLinkageCheck::check(const MatchFinder::MatchResult &Result) { return; DiagnosticBuilder DB = diag(VD->getLocation(), Message) << "variable" << VD; - SourceLocation FixLoc = VD->getTypeSpecStartLoc(); + const SourceLocation FixLoc = VD->getInnerLocStart(); if (FixLoc.isInvalid() || FixLoc.isMacroID()) return; if (FixMode == FixModeKind::UseStatic) diff --git a/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp b/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp index df2b0bef576ca3..92c3e0ed7894e1 100644 --- a/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp +++ b/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp @@ -24,13 +24,15 @@ getPreviousTokenAndStart(SourceLocation Location, const SourceManager &SM, if (Location.isInvalid()) return {Token, Location}; - auto StartOfFile = SM.getLocForStartOfFile(SM.getFileID(Location)); + const auto StartOfFile = SM.getLocForStartOfFile(SM.getFileID(Location)); while (Location != StartOfFile) { Location = Lexer::GetBeginningOfToken(Location, SM, LangOpts); if (!Lexer::getRawToken(Location, Token, SM, LangOpts) && (!SkipComments || !Token.is(tok::comment))) { break; } + if (Location == StartOfFile) + return {Token, Location}; Location = Location.getLocWithOffset(-1); } return {Token, Location}; diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 95be0a89cd6c93..e8148e06b6af28 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -195,6 +195,10 @@ Changes in existing checks ` check to fix false positive when using loop variable in initializer of lambda capture. +- Improved :doc:`misc-use-internal-linkage + ` check to insert ``static`` keyword + before type qualifiers such as ``const`` and ``volatile``. + - Improved :doc:`modernize-min-max-use-initializer-list ` check by fixing a false positive when only an implicit conversion happened inside an diff --git a/clang-tools-extra/test/clang-tidy/checkers/misc/use-internal-linkage-func.cpp b/clang-tools-extra/test/clang-tidy/checkers/misc/use-internal-linkage-func.cpp index 9c91389542b03d..8dc739da3a2734 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/misc/use-internal-linkage-func.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/misc/use-internal-linkage-func.cpp @@ -17,6 +17,41 @@ void func_cpp_inc(); // CHECK-MESSAGES: :[[@LINE-1]]:6: warning: function 'func_cpp_inc' // CHECK-FIXES: static void func_cpp_inc(); +int* func_cpp_inc_return_ptr(); +// CHECK-MESSAGES: :[[@LINE-1]]:6: warning: function 'func_cpp_inc_return_ptr' +// CHECK-FIXES: static int* func_cpp_inc_return_ptr(); + +const int* func_cpp_inc_return_const_ptr(); +// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: function 'func_cpp_inc_return_const_ptr' +// CHECK-FIXES: static const int* func_cpp_inc_return_const_ptr(); + +int const* func_cpp_inc_return_ptr_const(); +// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: function 'func_cpp_inc_return_ptr_const' +// CHECK-FIXES: static int const* func_cpp_inc_return_ptr_const(); + +int * const func_cpp_inc_return_const(); +// CHECK-MESSAGES: :[[@LINE-1]]:13: warning: function 'func_cpp_inc_return_const' +// CHECK-FIXES: static int * const func_cpp_inc_return_const(); + +volatile const int* func_cpp_inc_return_volatile_const_ptr(); +// CHECK-MESSAGES: :[[@LINE-1]]:21: warning: function 'func_cpp_inc_return_volatile_const_ptr' +// CHECK-FIXES: static volatile const int* func_cpp_inc_return_volatile_const_ptr(); + +[[nodiscard]] void func_nodiscard(); +// CHECK-MESSAGES: :[[@LINE-1]]:20: warning: function 'func_nodiscard' +// CHECK-FIXES: {{\[\[nodiscard\]\]}} static void func_nodiscard(); + +#define NDS [[nodiscard]] +#define NNDS + +NDS void func_nds(); +// CHECK-MESSAGES: :[[@LINE-1]]:10: warning: function 'func_nds' +// CHECK-FIXES: NDS static void func_nds(); + +NNDS void func_nnds(); +// CHECK-MESSAGES: :[[@LINE-1]]:11: warning: function 'func_nnds' +// CHECK-FIXES: NNDS static void func_nnds(); + #include "func_cpp.inc" void func_h_inc(); diff --git a/clang-tools-extra/test/clang-tidy/checkers/misc/use-internal-linkage-var.cpp b/clang-tools-extra/test/clang-tidy/checkers/misc/use-internal-linkage-var.cpp index 6777ce4bb0265e..901272e40b8f24 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/misc/use-internal-linkage-var.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/misc/use-internal-linkage-var.cpp @@ -13,6 +13,18 @@ T global_template; // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: variable 'global_template' // CHECK-FIXES: static T global_template; +int const* ptr_const_star; +// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: variable 'ptr_const_star' +// CHECK-FIXES: static int const* ptr_const_star; + +const int* const_ptr_star; +// CHECK-MESSAGES: :[[@LINE-1]]:12: warning: variable 'const_ptr_star' +// CHECK-FIXES: static const int* const_ptr_star; + +const volatile int* const_volatile_ptr_star; +// CHECK-MESSAGES: :[[@LINE-1]]:21: warning: variable 'const_volatile_ptr_star' +// CHECK-FIXES: static const volatile int* const_volatile_ptr_star; + int gloabl_header; extern int global_extern; From 94643a45b4c549b27407803277ec88b78315e2d9 Mon Sep 17 00:00:00 2001 From: Lukacma Date: Thu, 17 Oct 2024 17:08:43 +0100 Subject: [PATCH 05/24] [AArch64] Add armv9.6 features to AArch64AsmParser (#112722) New features need to be added to ExtensionMap for .arch and .arch_extension to work. --- .../lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index df69c20b1359fc..a5165d45893f3e 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -3736,6 +3736,17 @@ static const struct Extension { {"sme-fa64", {AArch64::FeatureSMEFA64}}, {"cpa", {AArch64::FeatureCPA}}, {"tlbiw", {AArch64::FeatureTLBIW}}, + {"cmpbr", {AArch64::FeatureCMPBR}}, + {"f8f32mm", {AArch64::FeatureF8F32MM}}, + {"f8f16mm", {AArch64::FeatureF8F16MM}}, + {"fprcvt", {AArch64::FeatureFPRCVT}}, + {"lsfe", {AArch64::FeatureLSFE}}, + {"sme2p2", {AArch64::FeatureSME2p2}}, + {"ssve-aes", {AArch64::FeatureSSVE_AES}}, + {"sve2p2", {AArch64::FeatureSVE2p2}}, + {"sve-aes2", {AArch64::FeatureSVEAES2}}, + {"sve-bfscale", {AArch64::FeatureSVEBFSCALE}}, + {"sve-f16f32mm", {AArch64::FeatureSVE_F16F32MM}}, }; static void setRequiredFeatureString(FeatureBitset FBS, std::string &Str) { From 020566701030425f44eb80387d0ae76c5a867aa9 Mon Sep 17 00:00:00 2001 From: Shimin Cui Date: Thu, 17 Oct 2024 12:10:05 -0400 Subject: [PATCH 06/24] [LTO] Add function alias as function instead of data (#112599) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On AIX, for undefined functions, only the dotnamed symbols (the address of the function) are generated after linking (i.e., no named function symbol is generated).   Currently, all alias symbols are added as defined data symbols when parsing symbols in LTOModule (the Link Time Optimization library used by linker to optimization code at link time). On AIX, if the function alias is used in the native object, and only its dotnamed symbol is generated, the linker will have problem to match the dotnamed symbol from the native object and the defined symbol marked as data from the bitcode at LTO linktime.   This patch is to add function alias as function instead of data. --- llvm/include/llvm/LTO/legacy/LTOModule.h | 2 +- llvm/lib/LTO/LTOModule.cpp | 17 +++++-- llvm/test/LTO/PowerPC/list-symbol.ll | 16 ++++++ llvm/tools/llvm-lto/llvm-lto.cpp | 65 +++++++++++++++++++++++- 4 files changed, 93 insertions(+), 7 deletions(-) create mode 100644 llvm/test/LTO/PowerPC/list-symbol.ll diff --git a/llvm/include/llvm/LTO/legacy/LTOModule.h b/llvm/include/llvm/LTO/legacy/LTOModule.h index 1b2de3b333855c..e861a56bcbac1d 100644 --- a/llvm/include/llvm/LTO/legacy/LTOModule.h +++ b/llvm/include/llvm/LTO/legacy/LTOModule.h @@ -195,7 +195,7 @@ struct LTOModule { /// Add a function symbol as defined to the list. void addDefinedFunctionSymbol(ModuleSymbolTable::Symbol Sym); - void addDefinedFunctionSymbol(StringRef Name, const Function *F); + void addDefinedFunctionSymbol(StringRef Name, const GlobalValue *F); /// Add a global symbol from module-level ASM to the defined list. void addAsmGlobalSymbol(StringRef, lto_symbol_attributes scope); diff --git a/llvm/lib/LTO/LTOModule.cpp b/llvm/lib/LTO/LTOModule.cpp index eac78069f4d2bc..00eb8adb4e1035 100644 --- a/llvm/lib/LTO/LTOModule.cpp +++ b/llvm/lib/LTO/LTOModule.cpp @@ -406,11 +406,16 @@ void LTOModule::addDefinedFunctionSymbol(ModuleSymbolTable::Symbol Sym) { Buffer.c_str(); } - const Function *F = cast(cast(Sym)); - addDefinedFunctionSymbol(Buffer, F); + auto *GV = cast(Sym); + assert((isa(GV) || + (isa(GV) && + isa(cast(GV)->getAliasee()))) && + "Not function or function alias"); + + addDefinedFunctionSymbol(Buffer, GV); } -void LTOModule::addDefinedFunctionSymbol(StringRef Name, const Function *F) { +void LTOModule::addDefinedFunctionSymbol(StringRef Name, const GlobalValue *F) { // add to list of defined symbols addDefinedSymbol(Name, F, true); } @@ -611,7 +616,11 @@ void LTOModule::parseSymbols() { } assert(isa(GV)); - addDefinedDataSymbol(Sym); + + if (isa(cast(GV)->getAliasee())) + addDefinedFunctionSymbol(Sym); + else + addDefinedDataSymbol(Sym); } // make symbols for all undefines diff --git a/llvm/test/LTO/PowerPC/list-symbol.ll b/llvm/test/LTO/PowerPC/list-symbol.ll new file mode 100644 index 00000000000000..75300b11f7f188 --- /dev/null +++ b/llvm/test/LTO/PowerPC/list-symbol.ll @@ -0,0 +1,16 @@ +; RUN: llvm-as < %s > %t +; RUN: llvm-lto -list-symbols-only %t | FileCheck %s +; REQUIRES: default_triple + +; CHECK-DAG: v { data defined default } +; CHECK-DAG: va { data defined default alias } +; CHECK-DAG: f { function defined default } +; CHECK-DAG: fa { function defined default alias } + +@v = global i32 0 +@va = alias i32, ptr @v +@fa = alias void (ptr), ptr @f + +define void @f() { + ret void +} diff --git a/llvm/tools/llvm-lto/llvm-lto.cpp b/llvm/tools/llvm-lto/llvm-lto.cpp index f5076f0b975178..4090faf4e3fd95 100644 --- a/llvm/tools/llvm-lto/llvm-lto.cpp +++ b/llvm/tools/llvm-lto/llvm-lto.cpp @@ -407,6 +407,64 @@ static void printIndexStats() { } } +/// Print the lto symbol attributes. +static void printLTOSymbolAttributes(lto_symbol_attributes Attrs) { + outs() << "{ "; + unsigned Permission = Attrs & LTO_SYMBOL_PERMISSIONS_MASK; + switch (Permission) { + case LTO_SYMBOL_PERMISSIONS_CODE: + outs() << "function "; + break; + case LTO_SYMBOL_PERMISSIONS_DATA: + outs() << "data "; + break; + case LTO_SYMBOL_PERMISSIONS_RODATA: + outs() << "constant "; + break; + } + unsigned Definition = Attrs & LTO_SYMBOL_DEFINITION_MASK; + switch (Definition) { + case LTO_SYMBOL_DEFINITION_REGULAR: + outs() << "defined "; + break; + case LTO_SYMBOL_DEFINITION_TENTATIVE: + outs() << "common "; + break; + case LTO_SYMBOL_DEFINITION_WEAK: + outs() << "weak "; + break; + case LTO_SYMBOL_DEFINITION_UNDEFINED: + outs() << "extern "; + break; + case LTO_SYMBOL_DEFINITION_WEAKUNDEF: + outs() << "extern-weak "; + break; + } + unsigned Scope = Attrs & LTO_SYMBOL_SCOPE_MASK; + switch (Scope) { + case LTO_SYMBOL_SCOPE_INTERNAL: + outs() << "internal "; + break; + case LTO_SYMBOL_SCOPE_HIDDEN: + outs() << "hidden "; + break; + case LTO_SYMBOL_SCOPE_PROTECTED: + outs() << "protected "; + break; + case LTO_SYMBOL_SCOPE_DEFAULT: + outs() << "default "; + break; + case LTO_SYMBOL_SCOPE_DEFAULT_CAN_BE_HIDDEN: + outs() << "omitted "; + break; + } + if (Attrs & LTO_SYMBOL_COMDAT) + outs() << "comdat "; + if (Attrs & LTO_SYMBOL_ALIAS) + outs() << "alias "; + outs() << "}"; +} + /// Load each IR file and dump certain information based on active flags. /// /// The main point here is to provide lit-testable coverage for the LTOModule @@ -421,8 +479,11 @@ static void testLTOModule(const TargetOptions &Options) { if (ListSymbolsOnly) { // List the symbols. outs() << Filename << ":\n"; - for (int I = 0, E = Module->getSymbolCount(); I != E; ++I) - outs() << Module->getSymbolName(I) << "\n"; + for (int I = 0, E = Module->getSymbolCount(); I != E; ++I) { + outs() << Module->getSymbolName(I) << " "; + printLTOSymbolAttributes(Module->getSymbolAttributes(I)); + outs() << "\n"; + } } if (QueryHasCtorDtor) outs() << Filename From bf1a554312bd011cb2016a2c9d7e75d6fe3b02af Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Thu, 17 Oct 2024 12:25:22 -0400 Subject: [PATCH 07/24] Document the requirement that commits have a public email address (#109318) See https://discourse.llvm.org/t/hidden-emails-on-github-should-we-do-something-about-it/74223 for details about why this is important to the community. Note, we currently have soft enforcement for this requirement in the form of a bot which posts comments letting patch authors know their email is private, so we're already setting expectations in practice; this PR is documenting those expectations for clarity. --- llvm/docs/DeveloperPolicy.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/llvm/docs/DeveloperPolicy.rst b/llvm/docs/DeveloperPolicy.rst index caa4b31b949c92..0ecf1423e60371 100644 --- a/llvm/docs/DeveloperPolicy.rst +++ b/llvm/docs/DeveloperPolicy.rst @@ -105,6 +105,13 @@ When submitting patches, please do not add confidentiality or non-disclosure notices to the patches themselves. These notices conflict with the LLVM licensing terms and may result in your contribution being excluded. +The LLVM project uses email to communicate to contributors outside of the +GitHub platform about their past contributions. Primarily, our buildbot +infrastructure uses emails to contact contributors about build and test +failures. Therefore, the LLVM community requires contributors to have a public +email address associated with their GitHub commits, so please ensure that "Keep +my email addresses private" is disabled in your +`account settings `_. .. _code review: From feedb35e41522b2f6c11dab4f9263fd305a2c13f Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 17 Oct 2024 09:20:34 -0700 Subject: [PATCH 08/24] [RISCV][GISel] Correct RORIW patterns. We had two rotl patterns and no rotr pattern. The order was such that the incorrect rotl pattern was being used. --- llvm/lib/Target/RISCV/RISCVGISel.td | 2 +- .../RISCV/GlobalISel/instruction-select/rotate-rv64.mir | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVGISel.td b/llvm/lib/Target/RISCV/RISCVGISel.td index 0656928ca41f83..67e93b812421b4 100644 --- a/llvm/lib/Target/RISCV/RISCVGISel.td +++ b/llvm/lib/Target/RISCV/RISCVGISel.td @@ -274,7 +274,7 @@ def : Pat<(i32 (xor GPR:$rs1, (not GPR:$rs2))), (XNOR GPR:$rs1, GPR:$rs2)>; def : PatGprGpr; def : PatGprGpr; -def : Pat<(i32 (rotl GPR:$rs1, uimm5i32:$imm)), +def : Pat<(i32 (rotr GPR:$rs1, uimm5i32:$imm)), (RORIW GPR:$rs1, (i64 (as_i64imm $imm)))>; def : Pat<(i32 (rotl GPR:$rs1, uimm5i32:$rs2)), diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rotate-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rotate-rv64.mir index b75e926bb50c4e..50b96e0ee972e6 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rotate-rv64.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rotate-rv64.mir @@ -117,7 +117,7 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 - ; CHECK-NEXT: [[RORIW:%[0-9]+]]:gpr = RORIW [[COPY]], 15 + ; CHECK-NEXT: [[RORIW:%[0-9]+]]:gpr = RORIW [[COPY]], 17 ; CHECK-NEXT: $x10 = COPY [[RORIW]] ; CHECK-NEXT: PseudoRET implicit $x10 %0:gprb(s64) = COPY $x10 @@ -165,9 +165,8 @@ body: | ; CHECK: liveins: $x10 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10 - ; CHECK-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 15 - ; CHECK-NEXT: [[RORW:%[0-9]+]]:gpr = RORW [[COPY]], [[ADDI]] - ; CHECK-NEXT: $x10 = COPY [[RORW]] + ; CHECK-NEXT: [[RORIW:%[0-9]+]]:gpr = RORIW [[COPY]], 15 + ; CHECK-NEXT: $x10 = COPY [[RORIW]] ; CHECK-NEXT: PseudoRET implicit $x10 %0:gprb(s64) = COPY $x10 %1:gprb(s32) = G_TRUNC %0(s64) From 87645e920528802fb1864e159da3d2be1b733432 Mon Sep 17 00:00:00 2001 From: OverMighty Date: Thu, 17 Oct 2024 18:37:58 +0200 Subject: [PATCH 09/24] [libc][math][c23] Fix undefined behavior in expxf16.h (#112734) Fixes the left-shifting of potentially negative signed integers. --- libc/src/math/generic/expxf16.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/libc/src/math/generic/expxf16.h b/libc/src/math/generic/expxf16.h index 8de329bd2ab07f..aba99a2914b41d 100644 --- a/libc/src/math/generic/expxf16.h +++ b/libc/src/math/generic/expxf16.h @@ -108,8 +108,8 @@ LIBC_INLINE ExpRangeReduction exp2_range_reduction(float16 x) { float xf = x; float kf = fputil::nearest_integer(xf * 0x1.0p+3f); int x_hi_mid = static_cast(kf); - int x_hi = x_hi_mid >> 3; - int x_mid = x_hi_mid & 0x7; + unsigned x_hi = static_cast(x_hi_mid) >> 3; + unsigned x_mid = static_cast(x_hi_mid) & 0x7; // lo = x - (hi + mid) = round(x * 2^3) * (-2^(-3)) + x float lo = fputil::multiply_add(kf, -0x1.0p-3f, xf); @@ -155,8 +155,8 @@ LIBC_INLINE ExpRangeReduction exp10_range_reduction(float16 x) { float xf = x; float kf = fputil::nearest_integer(xf * (LOG2F_10 * 0x1.0p+3f)); int x_hi_mid = static_cast(kf); - int x_hi = x_hi_mid >> 3; - int x_mid = x_hi_mid & 0x7; + unsigned x_hi = static_cast(x_hi_mid) >> 3; + unsigned x_mid = static_cast(x_hi_mid) & 0x7; // lo = x - (hi + mid) = round(x * 2^3 * log2(10)) * log10(2) * (-2^(-3)) + x float lo = fputil::multiply_add(kf, LOG10F_2 * -0x1.0p-3f, xf); From 6d7712a70c163d2ae9e1dc928db31fcb45d9e404 Mon Sep 17 00:00:00 2001 From: Nick Riasanovsky Date: Thu, 17 Oct 2024 12:42:08 -0400 Subject: [PATCH 10/24] [clang-tidy][docs] Replace _not_ in reserved-identifier.rst with *not* (#112162) Fixes a documentation formatting error where `_not_` was used which has no special meaning in reST and replaces it with `*not*`. Closes #111691. --- .../docs/clang-tidy/checks/bugprone/reserved-identifier.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang-tools-extra/docs/clang-tidy/checks/bugprone/reserved-identifier.rst b/clang-tools-extra/docs/clang-tidy/checks/bugprone/reserved-identifier.rst index a498ff8409af3a..3f6cee9b3bb5aa 100644 --- a/clang-tools-extra/docs/clang-tidy/checks/bugprone/reserved-identifier.rst +++ b/clang-tools-extra/docs/clang-tidy/checks/bugprone/reserved-identifier.rst @@ -28,7 +28,7 @@ Violating the naming rules above results in undefined behavior. int _g(); // disallowed in global namespace only The check can also be inverted, i.e. it can be configured to flag any -identifier that is _not_ a reserved identifier. This mode is for use by e.g. +identifier that is *not* a reserved identifier. This mode is for use by e.g. standard library implementors, to ensure they don't infringe on the user namespace. From 8f8d5f005a937bf8d5244c5bf22906095ff08c70 Mon Sep 17 00:00:00 2001 From: Chris Apple Date: Thu, 17 Oct 2024 10:02:08 -0700 Subject: [PATCH 11/24] [rtsan] Add statistics for suppression count (#112718) --- compiler-rt/lib/rtsan/rtsan_assertions.h | 9 +++++++-- compiler-rt/lib/rtsan/rtsan_flags.h | 2 ++ compiler-rt/lib/rtsan/rtsan_stats.cpp | 13 +++++++++++++ compiler-rt/lib/rtsan/rtsan_stats.h | 1 + compiler-rt/lib/rtsan/rtsan_suppressions.cpp | 2 +- compiler-rt/test/rtsan/exit_stats.cpp | 11 +++++++++++ compiler-rt/test/rtsan/exit_stats.cpp.supp | 1 + compiler-rt/test/rtsan/stack_suppressions.cpp | 5 ++++- 8 files changed, 40 insertions(+), 4 deletions(-) create mode 100644 compiler-rt/test/rtsan/exit_stats.cpp.supp diff --git a/compiler-rt/lib/rtsan/rtsan_assertions.h b/compiler-rt/lib/rtsan/rtsan_assertions.h index 927b32e03cf026..28a272b6462372 100644 --- a/compiler-rt/lib/rtsan/rtsan_assertions.h +++ b/compiler-rt/lib/rtsan/rtsan_assertions.h @@ -15,6 +15,7 @@ #include "rtsan/rtsan.h" #include "rtsan/rtsan_context.h" #include "rtsan/rtsan_diagnostics.h" +#include "rtsan/rtsan_stats.h" #include "rtsan/rtsan_suppressions.h" #include "sanitizer_common/sanitizer_stacktrace.h" @@ -28,8 +29,10 @@ void ExpectNotRealtime(Context &context, const DiagnosticsInfo &info, if (context.InRealtimeContext() && !context.IsBypassed()) { ScopedBypass sb{context}; - if (IsFunctionSuppressed(info.func_name)) + if (IsFunctionSuppressed(info.func_name)) { + IncrementSuppressedCount(); return; + } __sanitizer::BufferedStackTrace stack; @@ -38,8 +41,10 @@ void ExpectNotRealtime(Context &context, const DiagnosticsInfo &info, stack.Unwind(info.pc, info.bp, nullptr, __sanitizer::common_flags()->fast_unwind_on_fatal); - if (IsStackTraceSuppressed(stack)) + if (IsStackTraceSuppressed(stack)) { + IncrementSuppressedCount(); return; + } OnViolation(stack, info); } diff --git a/compiler-rt/lib/rtsan/rtsan_flags.h b/compiler-rt/lib/rtsan/rtsan_flags.h index 29025c29b6fc2a..f46e04933fa528 100644 --- a/compiler-rt/lib/rtsan/rtsan_flags.h +++ b/compiler-rt/lib/rtsan/rtsan_flags.h @@ -18,6 +18,8 @@ struct Flags { Type Name{DefaultValue}; #include "rtsan_flags.inc" #undef RTSAN_FLAG + + bool ContainsSuppresionFile() { return suppressions[0] != '\0'; } }; extern Flags flags_data; diff --git a/compiler-rt/lib/rtsan/rtsan_stats.cpp b/compiler-rt/lib/rtsan/rtsan_stats.cpp index dac7b23c3ef520..1562b73cf94cd8 100644 --- a/compiler-rt/lib/rtsan/rtsan_stats.cpp +++ b/compiler-rt/lib/rtsan/rtsan_stats.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "rtsan/rtsan_stats.h" +#include "rtsan/rtsan_flags.h" #include "sanitizer_common/sanitizer_atomic.h" #include "sanitizer_common/sanitizer_common.h" @@ -20,6 +21,7 @@ using namespace __rtsan; static atomic_uint32_t rtsan_total_error_count{0}; static atomic_uint32_t rtsan_unique_error_count{0}; +static atomic_uint32_t rtsan_suppressed_count{0}; void __rtsan::IncrementTotalErrorCount() { atomic_fetch_add(&rtsan_total_error_count, 1, memory_order_relaxed); @@ -37,9 +39,20 @@ static u32 GetUniqueErrorCount() { return atomic_load(&rtsan_unique_error_count, memory_order_relaxed); } +void __rtsan::IncrementSuppressedCount() { + atomic_fetch_add(&rtsan_suppressed_count, 1, memory_order_relaxed); +} + +static u32 GetSuppressedCount() { + return atomic_load(&rtsan_suppressed_count, memory_order_relaxed); +} + void __rtsan::PrintStatisticsSummary() { ScopedErrorReportLock l; Printf("RealtimeSanitizer exit stats:\n"); Printf(" Total error count: %u\n", GetTotalErrorCount()); Printf(" Unique error count: %u\n", GetUniqueErrorCount()); + + if (flags().ContainsSuppresionFile()) + Printf(" Suppression count: %u\n", GetSuppressedCount()); } diff --git a/compiler-rt/lib/rtsan/rtsan_stats.h b/compiler-rt/lib/rtsan/rtsan_stats.h index a72098792c89c9..a8a67ea2a44b6d 100644 --- a/compiler-rt/lib/rtsan/rtsan_stats.h +++ b/compiler-rt/lib/rtsan/rtsan_stats.h @@ -16,6 +16,7 @@ namespace __rtsan { void IncrementTotalErrorCount(); void IncrementUniqueErrorCount(); +void IncrementSuppressedCount(); void PrintStatisticsSummary(); diff --git a/compiler-rt/lib/rtsan/rtsan_suppressions.cpp b/compiler-rt/lib/rtsan/rtsan_suppressions.cpp index a7c3d42ac68af9..2bcfbeed4195bc 100644 --- a/compiler-rt/lib/rtsan/rtsan_suppressions.cpp +++ b/compiler-rt/lib/rtsan/rtsan_suppressions.cpp @@ -56,7 +56,7 @@ void __rtsan::InitializeSuppressions() { CHECK_EQ(nullptr, suppression_ctx); // We will use suppression_ctx == nullptr as an early out - if (flags().suppressions[0] == '\0') + if (!flags().ContainsSuppresionFile()) return; suppression_ctx = new (suppression_placeholder) diff --git a/compiler-rt/test/rtsan/exit_stats.cpp b/compiler-rt/test/rtsan/exit_stats.cpp index d4d19ace778ba5..92ca58f1edde86 100644 --- a/compiler-rt/test/rtsan/exit_stats.cpp +++ b/compiler-rt/test/rtsan/exit_stats.cpp @@ -1,6 +1,7 @@ // RUN: %clangxx -fsanitize=realtime %s -o %t // RUN: %env_rtsan_opts="halt_on_error=false,print_stats_on_exit=true" %run %t 2>&1 | FileCheck %s // RUN: %env_rtsan_opts="halt_on_error=true,print_stats_on_exit=true" not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-HALT +// RUN: %env_rtsan_opts="suppressions=%s.supp,print_stats_on_exit=true" not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-SUPPRESSIONS // UNSUPPORTED: ios @@ -23,7 +24,17 @@ int main() { // CHECK: RealtimeSanitizer exit stats: // CHECK-NEXT: Total error count: 10 // CHECK-NEXT: Unique error count: 1 +// CHECK-NOT: Suppression count // CHECK-HALT: RealtimeSanitizer exit stats: // CHECK-HALT-NEXT: Total error count: 1 // CHECK-HALT-NEXT: Unique error count: 1 +// CHECK-HALT-NOT: Suppression count + +// We pass in intentionally_non_existant_function in the suppressions file +// This is just to ensure we only get the "Suppression count" metric if this +// file is passed at runtime, otherwise that statistic is omitted +// CHECK-SUPPRESSIONS: RealtimeSanitizer exit stats: +// CHECK-SUPPRESSIONS-NEXT: Total error count: 1 +// CHECK-SUPPRESSIONS-NEXT: Unique error count: 1 +// CHECK-SUPPRESSIONS-NEXT: Suppression count: 0 diff --git a/compiler-rt/test/rtsan/exit_stats.cpp.supp b/compiler-rt/test/rtsan/exit_stats.cpp.supp new file mode 100644 index 00000000000000..b720bdb770808b --- /dev/null +++ b/compiler-rt/test/rtsan/exit_stats.cpp.supp @@ -0,0 +1 @@ +function-name-matches:intentionally_non_existant_function diff --git a/compiler-rt/test/rtsan/stack_suppressions.cpp b/compiler-rt/test/rtsan/stack_suppressions.cpp index b9b2d0957636d9..be1cf4963c7f80 100644 --- a/compiler-rt/test/rtsan/stack_suppressions.cpp +++ b/compiler-rt/test/rtsan/stack_suppressions.cpp @@ -1,6 +1,6 @@ // RUN: %clangxx -fsanitize=realtime %s -o %t // RUN: %env_rtsan_opts=halt_on_error=false %run %t 2>&1 | FileCheck %s --check-prefix=CHECK-NOSUPPRESSIONS -// RUN: %env_rtsan_opts=suppressions='%s.supp' not %run %t 2>&1 | FileCheck %s +// RUN: %env_rtsan_opts=suppressions='%s.supp':print_stats_on_exit=true not %run %t 2>&1 | FileCheck %s // UNSUPPORTED: ios // Intent: Ensure that suppressions work as intended @@ -61,6 +61,9 @@ int main() { // CHECK-NOT: free // CHECK-NOT: BlockFunc +// CHECK: RealtimeSanitizer exit stats: +// CHECK: Suppression count: 7 + // CHECK-NOSUPPRESSIONS: malloc // CHECK-NOSUPPRESSIONS: vector // CHECK-NOSUPPRESSIONS: free From 1a609052b65e7b8ca78159d5ad14eafbeb039eb2 Mon Sep 17 00:00:00 2001 From: Danila Malyutin Date: Thu, 17 Oct 2024 21:04:04 +0400 Subject: [PATCH 12/24] [AArch64][InstCombine] Eliminate redundant barrier intrinsics (#112023) If there are no memory ops on the path from one dmb to another then one barrier can be eliminated. --- .../AArch64/AArch64TargetTransformInfo.cpp | 31 +++ .../InstCombine/AArch64/dmb-intrinsics.ll | 220 ++++++++++++++++++ 2 files changed, 251 insertions(+) create mode 100644 llvm/test/Transforms/InstCombine/AArch64/dmb-intrinsics.ll diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index d33d0aa5855495..7c6b789b9c1b72 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -66,6 +66,10 @@ static cl::opt BaseHistCntCost("aarch64-base-histcnt-cost", cl::init(8), cl::Hidden, cl::desc("The cost of a histcnt instruction")); +static cl::opt DMBLookaheadThreshold( + "dmb-lookahead-threshold", cl::init(10), cl::Hidden, + cl::desc("The number of instructions to search for a redundant dmb")); + namespace { class TailFoldingOption { // These bitfields will only ever be set to something non-zero in operator=, @@ -2152,6 +2156,31 @@ static std::optional instCombineSVEInsr(InstCombiner &IC, return std::nullopt; } +static std::optional instCombineDMB(InstCombiner &IC, + IntrinsicInst &II) { + // If this barrier is post-dominated by identical one we can remove it + auto *NI = II.getNextNonDebugInstruction(); + unsigned LookaheadThreshold = DMBLookaheadThreshold; + auto CanSkipOver = [](Instruction *I) { + return !I->mayReadOrWriteMemory() && !I->mayHaveSideEffects(); + }; + while (LookaheadThreshold-- && CanSkipOver(NI)) { + auto *NIBB = NI->getParent(); + NI = NI->getNextNonDebugInstruction(); + if (!NI) { + if (auto *SuccBB = NIBB->getUniqueSuccessor()) + NI = SuccBB->getFirstNonPHIOrDbgOrLifetime(); + else + break; + } + } + auto *NextII = dyn_cast_or_null(NI); + if (NextII && II.isIdenticalTo(NextII)) + return IC.eraseInstFromFunction(II); + + return std::nullopt; +} + std::optional AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const { @@ -2159,6 +2188,8 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, switch (IID) { default: break; + case Intrinsic::aarch64_dmb: + return instCombineDMB(IC, II); case Intrinsic::aarch64_sve_fcvt_bf16f32_v2: case Intrinsic::aarch64_sve_fcvt_f16f32: case Intrinsic::aarch64_sve_fcvt_f16f64: diff --git a/llvm/test/Transforms/InstCombine/AArch64/dmb-intrinsics.ll b/llvm/test/Transforms/InstCombine/AArch64/dmb-intrinsics.ll new file mode 100644 index 00000000000000..dacdd413013658 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AArch64/dmb-intrinsics.ll @@ -0,0 +1,220 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -passes=instcombine < %s | FileCheck %s +; ARM64 dmb intrinsics + +target triple = "aarch64-unknown-linux-gnu" + +declare void @llvm.aarch64.dmb(i32) +declare void @llvm.aarch64.dsb(i32) +declare void @clobber() +declare void @pure() memory(none) willreturn nounwind +declare i32 @llvm.ctlz.i32(i32, i1) + +define void @simple() #0 { +; CHECK-LABEL: define void @simple() { +; CHECK-NEXT: call void @llvm.aarch64.dmb(i32 10) +; CHECK-NEXT: ret void +; + call void @llvm.aarch64.dmb(i32 10) + call void @llvm.aarch64.dmb(i32 10) + ret void +} + +; dmb ish (0xb) is technically stronger than ishst (0xa) but we don't merge for now +define void @simple_nonmatching() #0 { +; CHECK-LABEL: define void @simple_nonmatching() { +; CHECK-NEXT: call void @llvm.aarch64.dmb(i32 10) +; CHECK-NEXT: call void @llvm.aarch64.dmb(i32 11) +; CHECK-NEXT: ret void +; + call void @llvm.aarch64.dmb(i32 10) + call void @llvm.aarch64.dmb(i32 11) + ret void +} + +define ptr @simple_safe_instruction(ptr %p) #0 { +; CHECK-LABEL: define ptr @simple_safe_instruction( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: [[RES:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 8 +; CHECK-NEXT: call void @llvm.aarch64.dmb(i32 10) +; CHECK-NEXT: ret ptr [[RES]] +; + call void @llvm.aarch64.dmb(i32 10) + %res = getelementptr inbounds i8, ptr %p, i32 8 + call void @llvm.aarch64.dmb(i32 10) + ret ptr %res +} + +define i32 @simple_safe_intrinsic(i32 %n) #0 { +; CHECK-LABEL: define i32 @simple_safe_intrinsic( +; CHECK-SAME: i32 [[N:%.*]]) { +; CHECK-NEXT: [[RES:%.*]] = call range(i32 0, 33) i32 @llvm.ctlz.i32(i32 [[N]], i1 false) +; CHECK-NEXT: call void @llvm.aarch64.dmb(i32 10) +; CHECK-NEXT: ret i32 [[RES]] +; + call void @llvm.aarch64.dmb(i32 10) + %res = call i32 @llvm.ctlz.i32(i32 %n, i1 false) + call void @llvm.aarch64.dmb(i32 10) + ret i32 %res +} + +define void @simple_unsafe_intrinsic() #0 { +; CHECK-LABEL: define void @simple_unsafe_intrinsic() { +; CHECK-NEXT: call void @llvm.aarch64.dmb(i32 10) +; CHECK-NEXT: call void @llvm.aarch64.dsb(i32 10) +; CHECK-NEXT: call void @llvm.aarch64.dmb(i32 10) +; CHECK-NEXT: ret void +; + call void @llvm.aarch64.dmb(i32 10) + call void @llvm.aarch64.dsb(i32 10) + call void @llvm.aarch64.dmb(i32 10) + ret void +} + +define void @simple_safe_unsafe_instruction(ptr %p) #0 { +; CHECK-LABEL: define void @simple_safe_unsafe_instruction( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: call void @llvm.aarch64.dmb(i32 10) +; CHECK-NEXT: store i32 42, ptr [[P]], align 4 +; CHECK-NEXT: call void @llvm.aarch64.dmb(i32 10) +; CHECK-NEXT: ret void +; + call void @llvm.aarch64.dmb(i32 10) + store i32 42, ptr %p + call void @llvm.aarch64.dmb(i32 10) + ret void +} + +define void @simple_safe_unsafe_call(ptr %p) #0 { +; CHECK-LABEL: define void @simple_safe_unsafe_call( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: call void @llvm.aarch64.dmb(i32 10) +; CHECK-NEXT: call void @clobber() +; CHECK-NEXT: call void @llvm.aarch64.dmb(i32 10) +; CHECK-NEXT: ret void +; + call void @llvm.aarch64.dmb(i32 10) + call void @clobber() + call void @llvm.aarch64.dmb(i32 10) + ret void +} + +define void @simple_safe_safe_call(ptr %p) #0 { +; CHECK-LABEL: define void @simple_safe_safe_call( +; CHECK-SAME: ptr [[P:%.*]]) { +; CHECK-NEXT: call void @llvm.aarch64.dmb(i32 10) +; CHECK-NEXT: ret void +; + call void @llvm.aarch64.dmb(i32 10) + call void @pure() + call void @llvm.aarch64.dmb(i32 10) + ret void +} + +define void @multiple_bbs1(i1 %f) #0 { +; CHECK-LABEL: define void @multiple_bbs1( +; CHECK-SAME: i1 [[F:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 [[F]], label %[[BB_T:.*]], label %[[BB_F:.*]] +; CHECK: [[BB_T]]: +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[BB_F]]: +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: call void @llvm.aarch64.dmb(i32 10) +; CHECK-NEXT: ret void +; +entry: + br i1 %f, label %bb_t, label %bb_f +bb_t: + call void @llvm.aarch64.dmb(i32 10) + br label %exit +bb_f: + call void @llvm.aarch64.dmb(i32 10) + br label %exit +exit: + call void @llvm.aarch64.dmb(i32 10) + ret void +} + +define void @multiple_bbs2(i1 %f) #0 { +; CHECK-LABEL: define void @multiple_bbs2( +; CHECK-SAME: i1 [[F:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 [[F]], label %[[BB_T:.*]], label %[[BB_F:.*]] +; CHECK: [[BB_T]]: +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[BB_F]]: +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: call void @llvm.aarch64.dmb(i32 10) +; CHECK-NEXT: ret void +; +entry: + br i1 %f, label %bb_t, label %bb_f +bb_t: + call void @llvm.aarch64.dmb(i32 10) + br label %exit +bb_f: + br label %exit +exit: + call void @llvm.aarch64.dmb(i32 10) + ret void +} + +define void @multiple_bbs3(i1 %f, ptr %p) #0 { +; CHECK-LABEL: define void @multiple_bbs3( +; CHECK-SAME: i1 [[F:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 [[F]], label %[[BB_T:.*]], label %[[BB_F:.*]] +; CHECK: [[BB_T]]: +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[BB_F]]: +; CHECK-NEXT: store i32 42, ptr [[P]], align 4 +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: call void @llvm.aarch64.dmb(i32 10) +; CHECK-NEXT: ret void +; +entry: + br i1 %f, label %bb_t, label %bb_f +bb_t: + call void @llvm.aarch64.dmb(i32 10) + br label %exit +bb_f: + store i32 42, ptr %p + br label %exit +exit: + call void @llvm.aarch64.dmb(i32 10) + ret void +} + +define void @multiple_bbs_unsafe(i1 %f, ptr %p) #0 { +; CHECK-LABEL: define void @multiple_bbs_unsafe( +; CHECK-SAME: i1 [[F:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: br i1 [[F]], label %[[BB_T:.*]], label %[[BB_F:.*]] +; CHECK: [[BB_T]]: +; CHECK-NEXT: call void @llvm.aarch64.dmb(i32 10) +; CHECK-NEXT: store i32 42, ptr [[P]], align 4 +; CHECK-NEXT: br label %[[EXIT:.*]] +; CHECK: [[BB_F]]: +; CHECK-NEXT: br label %[[EXIT]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: call void @llvm.aarch64.dmb(i32 10) +; CHECK-NEXT: ret void +; +entry: + br i1 %f, label %bb_t, label %bb_f +bb_t: + call void @llvm.aarch64.dmb(i32 10) + store i32 42, ptr %p + br label %exit +bb_f: + call void @llvm.aarch64.dmb(i32 10) + br label %exit +exit: + call void @llvm.aarch64.dmb(i32 10) + ret void +} + From 2ef24e05defb6aa470fd4234853b2c11401cd660 Mon Sep 17 00:00:00 2001 From: Xing Xue Date: Thu, 17 Oct 2024 13:07:18 -0400 Subject: [PATCH 13/24] [libunwind][AIX] Remove weak declaration "__xlcxx_personality_v0" (#112436) `__xlcxx_personality_v0` is the personality routine in `libc++abi` for the EH of applications generated by the legacy IBM C++ compiler. Since the EH info generated by the legacy compiler does not provide the location of the personality routine, this routine is hard-coded as the handler for legacy EH in the unwinder. The symbol is resolved dynamically using `dlopen()` to avoid a hard dependency of `libunwind` on `libc++abi` for cases such as non-C++ applications. The weak declaration of `__xlcxx_personality_v0` was originally intended to bypass `dlopen()` if the C++ application generated by the legacy compiler is statically linked with the new LLVM C++ compiler. Unfortunately, this causes problems with runtime linking for Clang-compiled code using the unwinder that does not link with `libc++abi`. On the other hand, the C++ runtime libraries shipped for AIX are actually stripped and statically linking is not supported. So, we can fix the problem by removing the `__xlcxx_personality_v0` weak declaration. Besides, `dlopen()` would work as long as the libc++abi shared library is available. --- libunwind/src/UnwindCursor.hpp | 52 ++++++++++-------------- libunwind/test/aix_runtime_link.pass.cpp | 20 +++++++++ 2 files changed, 42 insertions(+), 30 deletions(-) create mode 100644 libunwind/test/aix_runtime_link.pass.cpp diff --git a/libunwind/src/UnwindCursor.hpp b/libunwind/src/UnwindCursor.hpp index ce6dced535e781..2a3aba28fb6ca5 100644 --- a/libunwind/src/UnwindCursor.hpp +++ b/libunwind/src/UnwindCursor.hpp @@ -2033,7 +2033,6 @@ typedef _Unwind_Reason_Code __xlcxx_personality_v0_t(int, _Unwind_Action, uint64_t, _Unwind_Exception *, struct _Unwind_Context *); -__attribute__((__weak__)) __xlcxx_personality_v0_t __xlcxx_personality_v0; } static __xlcxx_personality_v0_t *xlcPersonalityV0; @@ -2126,42 +2125,35 @@ bool UnwindCursor::getInfoFromTBTable(pint_t pc, R ®isters) { // function __xlcxx_personality_v0(), which is the personality for the state // table and is exported from libc++abi, is directly assigned as the // handler here. When a legacy XLC++ frame is encountered, the symbol - // is resolved dynamically using dlopen() to avoid hard dependency from - // libunwind on libc++abi. + // is resolved dynamically using dlopen() to avoid a hard dependency of + // libunwind on libc++abi in cases such as non-C++ applications. // Resolve the function pointer to the state table personality if it has - // not already. + // not already been done. if (xlcPersonalityV0 == NULL) { xlcPersonalityV0InitLock.lock(); if (xlcPersonalityV0 == NULL) { - // If libc++abi is statically linked in, symbol __xlcxx_personality_v0 - // has been resolved at the link time. - xlcPersonalityV0 = &__xlcxx_personality_v0; + // Resolve __xlcxx_personality_v0 using dlopen(). + const char *libcxxabi = "libc++abi.a(libc++abi.so.1)"; + void *libHandle; + // The AIX dlopen() sets errno to 0 when it is successful, which + // clobbers the value of errno from the user code. This is an AIX + // bug because according to POSIX it should not set errno to 0. To + // workaround before AIX fixes the bug, errno is saved and restored. + int saveErrno = errno; + libHandle = dlopen(libcxxabi, RTLD_MEMBER | RTLD_NOW); + if (libHandle == NULL) { + _LIBUNWIND_TRACE_UNWINDING("dlopen() failed with errno=%d\n", errno); + assert(0 && "dlopen() failed"); + } + xlcPersonalityV0 = reinterpret_cast<__xlcxx_personality_v0_t *>( + dlsym(libHandle, "__xlcxx_personality_v0")); if (xlcPersonalityV0 == NULL) { - // libc++abi is dynamically linked. Resolve __xlcxx_personality_v0 - // using dlopen(). - const char libcxxabi[] = "libc++abi.a(libc++abi.so.1)"; - void *libHandle; - // The AIX dlopen() sets errno to 0 when it is successful, which - // clobbers the value of errno from the user code. This is an AIX - // bug because according to POSIX it should not set errno to 0. To - // workaround before AIX fixes the bug, errno is saved and restored. - int saveErrno = errno; - libHandle = dlopen(libcxxabi, RTLD_MEMBER | RTLD_NOW); - if (libHandle == NULL) { - _LIBUNWIND_TRACE_UNWINDING("dlopen() failed with errno=%d\n", - errno); - assert(0 && "dlopen() failed"); - } - xlcPersonalityV0 = reinterpret_cast<__xlcxx_personality_v0_t *>( - dlsym(libHandle, "__xlcxx_personality_v0")); - if (xlcPersonalityV0 == NULL) { - _LIBUNWIND_TRACE_UNWINDING("dlsym() failed with errno=%d\n", errno); - assert(0 && "dlsym() failed"); - } - dlclose(libHandle); - errno = saveErrno; + _LIBUNWIND_TRACE_UNWINDING("dlsym() failed with errno=%d\n", errno); + assert(0 && "dlsym() failed"); } + dlclose(libHandle); + errno = saveErrno; } xlcPersonalityV0InitLock.unlock(); } diff --git a/libunwind/test/aix_runtime_link.pass.cpp b/libunwind/test/aix_runtime_link.pass.cpp new file mode 100644 index 00000000000000..deb192c07981eb --- /dev/null +++ b/libunwind/test/aix_runtime_link.pass.cpp @@ -0,0 +1,20 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// Test that libunwind loads successfully independently of libc++abi with +// runtime linking on AIX. + +// REQUIRES: target={{.+}}-aix{{.*}} +// ADDITIONAL_COMPILE_FLAGS: -Wl,-brtl + +#include +extern "C" int printf(const char *, ...); +int main(void) { + void *fp = (void *)&_Unwind_Backtrace; + printf("%p\n", fp); +} From 03888a9046167c50c20e219e790d616b42b91608 Mon Sep 17 00:00:00 2001 From: Timm Baeder Date: Thu, 17 Oct 2024 19:07:47 +0200 Subject: [PATCH 14/24] [clang][bytecode] Handle non-arrays in initElem{,Pop} (#112719) ... provided the given index is 0. Skip the atIndex() in that case. --- clang/lib/AST/ByteCode/Interp.h | 37 ++++++++++++++++++----- clang/test/AST/ByteCode/placement-new.cpp | 14 +++++++++ 2 files changed, 43 insertions(+), 8 deletions(-) diff --git a/clang/lib/AST/ByteCode/Interp.h b/clang/lib/AST/ByteCode/Interp.h index dece95971b7617..a1a92562cc5e3d 100644 --- a/clang/lib/AST/ByteCode/Interp.h +++ b/clang/lib/AST/ByteCode/Interp.h @@ -1863,13 +1863,24 @@ bool InitPop(InterpState &S, CodePtr OpPC) { template ::T> bool InitElem(InterpState &S, CodePtr OpPC, uint32_t Idx) { const T &Value = S.Stk.pop(); - const Pointer &Ptr = S.Stk.peek().atIndex(Idx); + const Pointer &Ptr = S.Stk.peek(); + if (Ptr.isUnknownSizeArray()) return false; - if (!CheckInit(S, OpPC, Ptr)) + + // In the unlikely event that we're initializing the first item of + // a non-array, skip the atIndex(). + if (Idx == 0 && !Ptr.getFieldDesc()->isArray()) { + Ptr.initialize(); + new (&Ptr.deref()) T(Value); + return true; + } + + const Pointer &ElemPtr = Ptr.atIndex(Idx); + if (!CheckInit(S, OpPC, ElemPtr)) return false; - Ptr.initialize(); - new (&Ptr.deref()) T(Value); + ElemPtr.initialize(); + new (&ElemPtr.deref()) T(Value); return true; } @@ -1877,13 +1888,23 @@ bool InitElem(InterpState &S, CodePtr OpPC, uint32_t Idx) { template ::T> bool InitElemPop(InterpState &S, CodePtr OpPC, uint32_t Idx) { const T &Value = S.Stk.pop(); - const Pointer &Ptr = S.Stk.pop().atIndex(Idx); + const Pointer &Ptr = S.Stk.pop(); if (Ptr.isUnknownSizeArray()) return false; - if (!CheckInit(S, OpPC, Ptr)) + + // In the unlikely event that we're initializing the first item of + // a non-array, skip the atIndex(). + if (Idx == 0 && !Ptr.getFieldDesc()->isArray()) { + Ptr.initialize(); + new (&Ptr.deref()) T(Value); + return true; + } + + const Pointer &ElemPtr = Ptr.atIndex(Idx); + if (!CheckInit(S, OpPC, ElemPtr)) return false; - Ptr.initialize(); - new (&Ptr.deref()) T(Value); + ElemPtr.initialize(); + new (&ElemPtr.deref()) T(Value); return true; } diff --git a/clang/test/AST/ByteCode/placement-new.cpp b/clang/test/AST/ByteCode/placement-new.cpp index caf3ac97fd1c04..6bd83f2372eab6 100644 --- a/clang/test/AST/ByteCode/placement-new.cpp +++ b/clang/test/AST/ByteCode/placement-new.cpp @@ -52,6 +52,20 @@ consteval auto ok4() { } static_assert(ok4() == 37); +consteval int ok5() { + int i; + new (&i) int[1]{1}; + + struct S { + int a; int b; + } s; + new (&s) S[1]{{12, 13}}; + + return 25; + // return s.a + s.b; FIXME: Broken in the current interpreter. +} +static_assert(ok5() == 25); + /// FIXME: Broken in both interpreters. #if 0 consteval int ok5() { From f35a14dd507b6fc90fe8e0b606c2f787d7dfedea Mon Sep 17 00:00:00 2001 From: Helena Kotas Date: Thu, 17 Oct 2024 10:17:09 -0700 Subject: [PATCH 15/24] [HLSL] Simplify debug check in ResourceBindings::addDeclBindingInfo (#112661) Follow-up for https://github.com/llvm/llvm-project/pull/111203#pullrequestreview-2373679837. --- clang/lib/Sema/SemaHLSL.cpp | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 0d23c4935e9196..efb0fbaa432d76 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -110,15 +110,7 @@ DeclBindingInfo *ResourceBindings::addDeclBindingInfo(const VarDecl *VD, ResourceClass ResClass) { assert(getDeclBindingInfo(VD, ResClass) == nullptr && "DeclBindingInfo already added"); -#ifndef NDEBUG - // Verify that existing bindings for this decl are stored sequentially - // and at the end of the BindingsList - auto I = DeclToBindingListIndex.find(VD); - if (I != DeclToBindingListIndex.end()) { - for (unsigned Index = I->getSecond(); Index < BindingsList.size(); ++Index) - assert(BindingsList[Index].Decl == VD); - } -#endif + assert(!hasBindingInfoForDecl(VD) || BindingsList.back().Decl == VD); // VarDecl may have multiple entries for different resource classes. // DeclToBindingListIndex stores the index of the first binding we saw // for this decl. If there are any additional ones then that index From dea213cb9b2e1ce7a6032ae4bc5306f74ebfc604 Mon Sep 17 00:00:00 2001 From: Tex Riddell Date: Thu, 17 Oct 2024 10:37:32 -0700 Subject: [PATCH 16/24] Add atan2 test case for prior change in X86SelLowering.cpp (#112616) When updating X86SelLowering.cpp for atan2, based on #96222, it was known that a needed change was missing which was merged later in #101268. However, the corresponding test update to `fp-strict-libcalls-msvc32.ll` was missed. This change rectifies that oversight. This also adds a missing label to the tanh test, since it's produced by update_llc_test_checks.py Part of: Implement the atan2 HLSL Function #70096. --- .../CodeGen/X86/fp-strict-libcalls-msvc32.ll | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/llvm/test/CodeGen/X86/fp-strict-libcalls-msvc32.ll b/llvm/test/CodeGen/X86/fp-strict-libcalls-msvc32.ll index 5d4e86afc8aceb..74291fbb75e81a 100644 --- a/llvm/test/CodeGen/X86/fp-strict-libcalls-msvc32.ll +++ b/llvm/test/CodeGen/X86/fp-strict-libcalls-msvc32.ll @@ -228,6 +228,26 @@ define float @atan(float %x) #0 { ret float %result } +define float @atan2(float %x, float %y) #0 { +; CHECK-LABEL: atan2: +; CHECK: # %bb.0: +; CHECK-NEXT: subl $20, %esp +; CHECK-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-NEXT: fxch %st(1) +; CHECK-NEXT: fstpl {{[0-9]+}}(%esp) +; CHECK-NEXT: fstpl (%esp) +; CHECK-NEXT: wait +; CHECK-NEXT: calll _atan2 +; CHECK-NEXT: fstps {{[0-9]+}}(%esp) +; CHECK-NEXT: flds {{[0-9]+}}(%esp) +; CHECK-NEXT: wait +; CHECK-NEXT: addl $20, %esp +; CHECK-NEXT: retl + %result = call float @llvm.experimental.constrained.atan2.f32(float %x, float %y, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + ret float %result +} + define float @cosh(float %x) #0 { ; CHECK-LABEL: cosh: ; CHECK: # %bb.0: @@ -263,6 +283,7 @@ define float @sinh(float %x) #0 { } define float @tanh(float %x) #0 { +; CHECK-LABEL: tanh: ; CHECK: # %bb.0: ; CHECK-NEXT: subl $12, %esp ; CHECK-NEXT: flds {{[0-9]+}}(%esp) @@ -293,6 +314,7 @@ declare float @llvm.experimental.constrained.tan.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.acos.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.asin.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.atan.f32(float, metadata, metadata) +declare float @llvm.experimental.constrained.atan2.f32(float, float, metadata, metadata) declare float @llvm.experimental.constrained.cosh.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.sinh.f32(float, metadata, metadata) declare float @llvm.experimental.constrained.tanh.f32(float, metadata, metadata) From 2bebeea2a1c74b78d1be32dbe3a7d724da1af102 Mon Sep 17 00:00:00 2001 From: Tex Riddell Date: Thu, 17 Oct 2024 10:39:36 -0700 Subject: [PATCH 17/24] [WebAssembly] Add atan2 to RuntimeLibcallSignatureTable (#112613) This change is part of this proposal: https://discourse.llvm.org/t/rfc-all-the-math-intrinsics/78294 - `WebAssemblyRuntimeLibcallSignatures.cpp`: Add `RTLIB::ATAN2*` to RuntimeLibcallSignatureTable - Add atan2 calls to `CodeGen/WebAssembly/libcalls-trig.ll` and update test checks Part of: Implement the atan2 HLSL Function #70096. --- .../WebAssemblyRuntimeLibcallSignatures.cpp | 3 + .../test/CodeGen/WebAssembly/libcalls-trig.ll | 316 ++++++++++-------- 2 files changed, 173 insertions(+), 146 deletions(-) diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp index ba3ab5164af267..aaa52256707210 100644 --- a/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp +++ b/llvm/lib/Target/WebAssembly/WebAssemblyRuntimeLibcallSignatures.cpp @@ -213,6 +213,9 @@ struct RuntimeLibcallSignatureTable { Table[RTLIB::ATAN_F32] = f32_func_f32; Table[RTLIB::ATAN_F64] = f64_func_f64; Table[RTLIB::ATAN_F128] = i64_i64_func_i64_i64; + Table[RTLIB::ATAN2_F32] = f32_func_f32_f32; + Table[RTLIB::ATAN2_F64] = f64_func_f64_f64; + Table[RTLIB::ATAN2_F128] = i64_i64_func_i64_i64_i64_i64; Table[RTLIB::SINH_F32] = f32_func_f32; Table[RTLIB::SINH_F64] = f64_func_f64; Table[RTLIB::SINH_F128] = i64_i64_func_i64_i64; diff --git a/llvm/test/CodeGen/WebAssembly/libcalls-trig.ll b/llvm/test/CodeGen/WebAssembly/libcalls-trig.ll index 8bc9c043fcf8ae..7850559b49b7d7 100644 --- a/llvm/test/CodeGen/WebAssembly/libcalls-trig.ll +++ b/llvm/test/CodeGen/WebAssembly/libcalls-trig.ll @@ -10,10 +10,11 @@ declare fp128 @llvm.cos.f128(fp128) declare fp128 @llvm.tan.f128(fp128) declare fp128 @llvm.asin.f128(fp128) declare fp128 @llvm.acos.f128(fp128) -declare fp128 @llvm.atan.f128.i32(fp128) +declare fp128 @llvm.atan.f128(fp128) declare fp128 @llvm.sinh.f128(fp128) declare fp128 @llvm.cosh.f128(fp128) declare fp128 @llvm.tanh.f128(fp128) +declare fp128 @llvm.atan2.f128(fp128, fp128) declare double @llvm.sin.f64(double) declare double @llvm.cos.f64(double) @@ -24,6 +25,7 @@ declare double @llvm.atan.f64(double) declare double @llvm.sinh.f64(double) declare double @llvm.cosh.f64(double) declare double @llvm.tanh.f64(double) +declare double @llvm.atan2.f64(double, double) declare float @llvm.sin.f32(float) declare float @llvm.cos.f32(float) @@ -34,6 +36,7 @@ declare float @llvm.atan.f32(float) declare float @llvm.sinh.f32(float) declare float @llvm.cosh.f32(float) declare float @llvm.tanh.f32(float) +declare float @llvm.atan2.f32(float, float) define fp128 @fp128libcalls(fp128 %x) { @@ -42,154 +45,171 @@ define fp128 @fp128libcalls(fp128 %x) { ; CHECK: .functype fp128libcalls (i32, i64, i64) -> () ; CHECK-NEXT: .local i32 ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: global.get $push28=, __stack_pointer -; CHECK-NEXT: i32.const $push29=, 144 -; CHECK-NEXT: i32.sub $push73=, $pop28, $pop29 -; CHECK-NEXT: local.tee $push72=, 3, $pop73 -; CHECK-NEXT: global.set __stack_pointer, $pop72 -; CHECK-NEXT: local.get $push74=, 3 -; CHECK-NEXT: i32.const $push62=, 128 -; CHECK-NEXT: i32.add $push63=, $pop74, $pop62 -; CHECK-NEXT: local.get $push76=, 1 -; CHECK-NEXT: local.get $push75=, 2 -; CHECK-NEXT: call sinl, $pop63, $pop76, $pop75 -; CHECK-NEXT: local.get $push77=, 3 -; CHECK-NEXT: i32.const $push58=, 112 -; CHECK-NEXT: i32.add $push59=, $pop77, $pop58 -; CHECK-NEXT: local.get $push78=, 3 -; CHECK-NEXT: i64.load $push3=, 128($pop78) -; CHECK-NEXT: local.get $push79=, 3 -; CHECK-NEXT: i32.const $push60=, 128 -; CHECK-NEXT: i32.add $push61=, $pop79, $pop60 -; CHECK-NEXT: i32.const $push0=, 8 -; CHECK-NEXT: i32.add $push1=, $pop61, $pop0 -; CHECK-NEXT: i64.load $push2=, 0($pop1) -; CHECK-NEXT: call cosl, $pop59, $pop3, $pop2 -; CHECK-NEXT: local.get $push80=, 3 -; CHECK-NEXT: i32.const $push54=, 96 -; CHECK-NEXT: i32.add $push55=, $pop80, $pop54 -; CHECK-NEXT: local.get $push81=, 3 -; CHECK-NEXT: i64.load $push6=, 112($pop81) -; CHECK-NEXT: local.get $push82=, 3 -; CHECK-NEXT: i32.const $push56=, 112 -; CHECK-NEXT: i32.add $push57=, $pop82, $pop56 -; CHECK-NEXT: i32.const $push71=, 8 -; CHECK-NEXT: i32.add $push4=, $pop57, $pop71 -; CHECK-NEXT: i64.load $push5=, 0($pop4) -; CHECK-NEXT: call tanl, $pop55, $pop6, $pop5 -; CHECK-NEXT: local.get $push83=, 3 -; CHECK-NEXT: i32.const $push50=, 80 -; CHECK-NEXT: i32.add $push51=, $pop83, $pop50 -; CHECK-NEXT: local.get $push84=, 3 -; CHECK-NEXT: i64.load $push9=, 96($pop84) -; CHECK-NEXT: local.get $push85=, 3 -; CHECK-NEXT: i32.const $push52=, 96 -; CHECK-NEXT: i32.add $push53=, $pop85, $pop52 -; CHECK-NEXT: i32.const $push70=, 8 -; CHECK-NEXT: i32.add $push7=, $pop53, $pop70 -; CHECK-NEXT: i64.load $push8=, 0($pop7) -; CHECK-NEXT: call asinl, $pop51, $pop9, $pop8 -; CHECK-NEXT: local.get $push86=, 3 -; CHECK-NEXT: i32.const $push46=, 64 -; CHECK-NEXT: i32.add $push47=, $pop86, $pop46 -; CHECK-NEXT: local.get $push87=, 3 -; CHECK-NEXT: i64.load $push12=, 80($pop87) -; CHECK-NEXT: local.get $push88=, 3 -; CHECK-NEXT: i32.const $push48=, 80 -; CHECK-NEXT: i32.add $push49=, $pop88, $pop48 -; CHECK-NEXT: i32.const $push69=, 8 -; CHECK-NEXT: i32.add $push10=, $pop49, $pop69 -; CHECK-NEXT: i64.load $push11=, 0($pop10) -; CHECK-NEXT: call acosl, $pop47, $pop12, $pop11 -; CHECK-NEXT: local.get $push89=, 3 -; CHECK-NEXT: i32.const $push42=, 48 -; CHECK-NEXT: i32.add $push43=, $pop89, $pop42 -; CHECK-NEXT: local.get $push90=, 3 -; CHECK-NEXT: i64.load $push15=, 64($pop90) -; CHECK-NEXT: local.get $push91=, 3 -; CHECK-NEXT: i32.const $push44=, 64 -; CHECK-NEXT: i32.add $push45=, $pop91, $pop44 -; CHECK-NEXT: i32.const $push68=, 8 -; CHECK-NEXT: i32.add $push13=, $pop45, $pop68 -; CHECK-NEXT: i64.load $push14=, 0($pop13) -; CHECK-NEXT: call atanl, $pop43, $pop15, $pop14 -; CHECK-NEXT: local.get $push92=, 3 -; CHECK-NEXT: i32.const $push38=, 32 -; CHECK-NEXT: i32.add $push39=, $pop92, $pop38 -; CHECK-NEXT: local.get $push93=, 3 -; CHECK-NEXT: i64.load $push18=, 48($pop93) -; CHECK-NEXT: local.get $push94=, 3 -; CHECK-NEXT: i32.const $push40=, 48 -; CHECK-NEXT: i32.add $push41=, $pop94, $pop40 -; CHECK-NEXT: i32.const $push67=, 8 -; CHECK-NEXT: i32.add $push16=, $pop41, $pop67 -; CHECK-NEXT: i64.load $push17=, 0($pop16) -; CHECK-NEXT: call sinhl, $pop39, $pop18, $pop17 -; CHECK-NEXT: local.get $push95=, 3 -; CHECK-NEXT: i32.const $push34=, 16 -; CHECK-NEXT: i32.add $push35=, $pop95, $pop34 -; CHECK-NEXT: local.get $push96=, 3 -; CHECK-NEXT: i64.load $push21=, 32($pop96) -; CHECK-NEXT: local.get $push97=, 3 -; CHECK-NEXT: i32.const $push36=, 32 -; CHECK-NEXT: i32.add $push37=, $pop97, $pop36 -; CHECK-NEXT: i32.const $push66=, 8 -; CHECK-NEXT: i32.add $push19=, $pop37, $pop66 -; CHECK-NEXT: i64.load $push20=, 0($pop19) -; CHECK-NEXT: call coshl, $pop35, $pop21, $pop20 -; CHECK-NEXT: local.get $push100=, 3 -; CHECK-NEXT: local.get $push98=, 3 -; CHECK-NEXT: i64.load $push24=, 16($pop98) -; CHECK-NEXT: local.get $push99=, 3 -; CHECK-NEXT: i32.const $push32=, 16 -; CHECK-NEXT: i32.add $push33=, $pop99, $pop32 -; CHECK-NEXT: i32.const $push65=, 8 -; CHECK-NEXT: i32.add $push22=, $pop33, $pop65 -; CHECK-NEXT: i64.load $push23=, 0($pop22) -; CHECK-NEXT: call tanhl, $pop100, $pop24, $pop23 -; CHECK-NEXT: local.get $push102=, 0 -; CHECK-NEXT: local.get $push101=, 3 -; CHECK-NEXT: i32.const $push64=, 8 -; CHECK-NEXT: i32.add $push25=, $pop101, $pop64 -; CHECK-NEXT: i64.load $push26=, 0($pop25) -; CHECK-NEXT: i64.store 8($pop102), $pop26 -; CHECK-NEXT: local.get $push104=, 0 -; CHECK-NEXT: local.get $push103=, 3 -; CHECK-NEXT: i64.load $push27=, 0($pop103) -; CHECK-NEXT: i64.store 0($pop104), $pop27 -; CHECK-NEXT: local.get $push105=, 3 -; CHECK-NEXT: i32.const $push30=, 144 -; CHECK-NEXT: i32.add $push31=, $pop105, $pop30 -; CHECK-NEXT: global.set __stack_pointer, $pop31 +; CHECK-NEXT: global.get $push31=, __stack_pointer +; CHECK-NEXT: i32.const $push32=, 160 +; CHECK-NEXT: i32.sub $push81=, $pop31, $pop32 +; CHECK-NEXT: local.tee $push80=, 3, $pop81 +; CHECK-NEXT: global.set __stack_pointer, $pop80 +; CHECK-NEXT: local.get $push82=, 3 +; CHECK-NEXT: i32.const $push69=, 144 +; CHECK-NEXT: i32.add $push70=, $pop82, $pop69 +; CHECK-NEXT: local.get $push84=, 1 +; CHECK-NEXT: local.get $push83=, 2 +; CHECK-NEXT: call sinl, $pop70, $pop84, $pop83 +; CHECK-NEXT: local.get $push85=, 3 +; CHECK-NEXT: i32.const $push65=, 128 +; CHECK-NEXT: i32.add $push66=, $pop85, $pop65 +; CHECK-NEXT: local.get $push86=, 3 +; CHECK-NEXT: i64.load $push3=, 144($pop86) +; CHECK-NEXT: local.get $push87=, 3 +; CHECK-NEXT: i32.const $push67=, 144 +; CHECK-NEXT: i32.add $push68=, $pop87, $pop67 +; CHECK-NEXT: i32.const $push0=, 8 +; CHECK-NEXT: i32.add $push1=, $pop68, $pop0 +; CHECK-NEXT: i64.load $push2=, 0($pop1) +; CHECK-NEXT: call cosl, $pop66, $pop3, $pop2 +; CHECK-NEXT: local.get $push88=, 3 +; CHECK-NEXT: i32.const $push61=, 112 +; CHECK-NEXT: i32.add $push62=, $pop88, $pop61 +; CHECK-NEXT: local.get $push89=, 3 +; CHECK-NEXT: i64.load $push6=, 128($pop89) +; CHECK-NEXT: local.get $push90=, 3 +; CHECK-NEXT: i32.const $push63=, 128 +; CHECK-NEXT: i32.add $push64=, $pop90, $pop63 +; CHECK-NEXT: i32.const $push79=, 8 +; CHECK-NEXT: i32.add $push4=, $pop64, $pop79 +; CHECK-NEXT: i64.load $push5=, 0($pop4) +; CHECK-NEXT: call tanl, $pop62, $pop6, $pop5 +; CHECK-NEXT: local.get $push91=, 3 +; CHECK-NEXT: i32.const $push57=, 96 +; CHECK-NEXT: i32.add $push58=, $pop91, $pop57 +; CHECK-NEXT: local.get $push92=, 3 +; CHECK-NEXT: i64.load $push9=, 112($pop92) +; CHECK-NEXT: local.get $push93=, 3 +; CHECK-NEXT: i32.const $push59=, 112 +; CHECK-NEXT: i32.add $push60=, $pop93, $pop59 +; CHECK-NEXT: i32.const $push78=, 8 +; CHECK-NEXT: i32.add $push7=, $pop60, $pop78 +; CHECK-NEXT: i64.load $push8=, 0($pop7) +; CHECK-NEXT: call asinl, $pop58, $pop9, $pop8 +; CHECK-NEXT: local.get $push94=, 3 +; CHECK-NEXT: i32.const $push53=, 80 +; CHECK-NEXT: i32.add $push54=, $pop94, $pop53 +; CHECK-NEXT: local.get $push95=, 3 +; CHECK-NEXT: i64.load $push12=, 96($pop95) +; CHECK-NEXT: local.get $push96=, 3 +; CHECK-NEXT: i32.const $push55=, 96 +; CHECK-NEXT: i32.add $push56=, $pop96, $pop55 +; CHECK-NEXT: i32.const $push77=, 8 +; CHECK-NEXT: i32.add $push10=, $pop56, $pop77 +; CHECK-NEXT: i64.load $push11=, 0($pop10) +; CHECK-NEXT: call acosl, $pop54, $pop12, $pop11 +; CHECK-NEXT: local.get $push97=, 3 +; CHECK-NEXT: i32.const $push49=, 64 +; CHECK-NEXT: i32.add $push50=, $pop97, $pop49 +; CHECK-NEXT: local.get $push98=, 3 +; CHECK-NEXT: i64.load $push15=, 80($pop98) +; CHECK-NEXT: local.get $push99=, 3 +; CHECK-NEXT: i32.const $push51=, 80 +; CHECK-NEXT: i32.add $push52=, $pop99, $pop51 +; CHECK-NEXT: i32.const $push76=, 8 +; CHECK-NEXT: i32.add $push13=, $pop52, $pop76 +; CHECK-NEXT: i64.load $push14=, 0($pop13) +; CHECK-NEXT: call atanl, $pop50, $pop15, $pop14 +; CHECK-NEXT: local.get $push100=, 3 +; CHECK-NEXT: i32.const $push45=, 48 +; CHECK-NEXT: i32.add $push46=, $pop100, $pop45 +; CHECK-NEXT: local.get $push101=, 3 +; CHECK-NEXT: i64.load $push18=, 64($pop101) +; CHECK-NEXT: local.get $push102=, 3 +; CHECK-NEXT: i32.const $push47=, 64 +; CHECK-NEXT: i32.add $push48=, $pop102, $pop47 +; CHECK-NEXT: i32.const $push75=, 8 +; CHECK-NEXT: i32.add $push16=, $pop48, $pop75 +; CHECK-NEXT: i64.load $push17=, 0($pop16) +; CHECK-NEXT: call sinhl, $pop46, $pop18, $pop17 +; CHECK-NEXT: local.get $push103=, 3 +; CHECK-NEXT: i32.const $push41=, 32 +; CHECK-NEXT: i32.add $push42=, $pop103, $pop41 +; CHECK-NEXT: local.get $push104=, 3 +; CHECK-NEXT: i64.load $push21=, 48($pop104) +; CHECK-NEXT: local.get $push105=, 3 +; CHECK-NEXT: i32.const $push43=, 48 +; CHECK-NEXT: i32.add $push44=, $pop105, $pop43 +; CHECK-NEXT: i32.const $push74=, 8 +; CHECK-NEXT: i32.add $push19=, $pop44, $pop74 +; CHECK-NEXT: i64.load $push20=, 0($pop19) +; CHECK-NEXT: call coshl, $pop42, $pop21, $pop20 +; CHECK-NEXT: local.get $push106=, 3 +; CHECK-NEXT: i32.const $push37=, 16 +; CHECK-NEXT: i32.add $push38=, $pop106, $pop37 +; CHECK-NEXT: local.get $push107=, 3 +; CHECK-NEXT: i64.load $push24=, 32($pop107) +; CHECK-NEXT: local.get $push108=, 3 +; CHECK-NEXT: i32.const $push39=, 32 +; CHECK-NEXT: i32.add $push40=, $pop108, $pop39 +; CHECK-NEXT: i32.const $push73=, 8 +; CHECK-NEXT: i32.add $push22=, $pop40, $pop73 +; CHECK-NEXT: i64.load $push23=, 0($pop22) +; CHECK-NEXT: call tanhl, $pop38, $pop24, $pop23 +; CHECK-NEXT: local.get $push113=, 3 +; CHECK-NEXT: local.get $push112=, 1 +; CHECK-NEXT: local.get $push111=, 2 +; CHECK-NEXT: local.get $push109=, 3 +; CHECK-NEXT: i64.load $push27=, 16($pop109) +; CHECK-NEXT: local.get $push110=, 3 +; CHECK-NEXT: i32.const $push35=, 16 +; CHECK-NEXT: i32.add $push36=, $pop110, $pop35 +; CHECK-NEXT: i32.const $push72=, 8 +; CHECK-NEXT: i32.add $push25=, $pop36, $pop72 +; CHECK-NEXT: i64.load $push26=, 0($pop25) +; CHECK-NEXT: call atan2l, $pop113, $pop112, $pop111, $pop27, $pop26 +; CHECK-NEXT: local.get $push115=, 0 +; CHECK-NEXT: local.get $push114=, 3 +; CHECK-NEXT: i32.const $push71=, 8 +; CHECK-NEXT: i32.add $push28=, $pop114, $pop71 +; CHECK-NEXT: i64.load $push29=, 0($pop28) +; CHECK-NEXT: i64.store 8($pop115), $pop29 +; CHECK-NEXT: local.get $push117=, 0 +; CHECK-NEXT: local.get $push116=, 3 +; CHECK-NEXT: i64.load $push30=, 0($pop116) +; CHECK-NEXT: i64.store 0($pop117), $pop30 +; CHECK-NEXT: local.get $push118=, 3 +; CHECK-NEXT: i32.const $push33=, 160 +; CHECK-NEXT: i32.add $push34=, $pop118, $pop33 +; CHECK-NEXT: global.set __stack_pointer, $pop34 ; CHECK-NEXT: return ; libm calls %d = call fp128 @llvm.sin.f128(fp128 %x) %e = call fp128 @llvm.cos.f128(fp128 %d) %f = call fp128 @llvm.tan.f128(fp128 %e) - %g = call fp128 @llvm.asin.f128.i32(fp128 %f) + %g = call fp128 @llvm.asin.f128(fp128 %f) %h = call fp128 @llvm.acos.f128(fp128 %g) %i = call fp128 @llvm.atan.f128(fp128 %h) %a = call fp128 @llvm.sinh.f128(fp128 %i) %b = call fp128 @llvm.cosh.f128(fp128 %a) %c = call fp128 @llvm.tanh.f128(fp128 %b) - ret fp128 %c + %j = call fp128 @llvm.atan2.f128(fp128 %x, fp128 %c) + ret fp128 %j } define double @f64libcalls(double %x) { ; CHECK-LABEL: f64libcalls: ; CHECK: .functype f64libcalls (f64) -> (f64) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: local.get $push9=, 0 -; CHECK-NEXT: call $push0=, sin, $pop9 -; CHECK-NEXT: call $push1=, cos, $pop0 -; CHECK-NEXT: call $push2=, tan, $pop1 -; CHECK-NEXT: call $push3=, asin, $pop2 -; CHECK-NEXT: call $push4=, acos, $pop3 -; CHECK-NEXT: call $push5=, atan, $pop4 -; CHECK-NEXT: call $push6=, sinh, $pop5 -; CHECK-NEXT: call $push7=, cosh, $pop6 -; CHECK-NEXT: call $push8=, tanh, $pop7 -; CHECK-NEXT: return $pop8 +; CHECK-NEXT: local.get $push11=, 0 +; CHECK-NEXT: local.get $push10=, 0 +; CHECK-NEXT: call $push0=, sin, $pop10 +; CHECK-NEXT: call $push1=, cos, $pop0 +; CHECK-NEXT: call $push2=, tan, $pop1 +; CHECK-NEXT: call $push3=, asin, $pop2 +; CHECK-NEXT: call $push4=, acos, $pop3 +; CHECK-NEXT: call $push5=, atan, $pop4 +; CHECK-NEXT: call $push6=, sinh, $pop5 +; CHECK-NEXT: call $push7=, cosh, $pop6 +; CHECK-NEXT: call $push8=, tanh, $pop7 +; CHECK-NEXT: call $push9=, atan2, $pop11, $pop8 +; CHECK-NEXT: return $pop9 %k = call double @llvm.sin.f64(double %x) @@ -201,24 +221,27 @@ define double @f64libcalls(double %x) { %f = call double @llvm.sinh.f64(double %e) %g = call double @llvm.cosh.f64(double %f) %h = call double @llvm.tanh.f64(double %g) - ret double %h + %i = call double @llvm.atan2.f64(double %x, double %h) + ret double %i } define float @f32libcalls(float %x) { ; CHECK-LABEL: f32libcalls: ; CHECK: .functype f32libcalls (f32) -> (f32) ; CHECK-NEXT: # %bb.0: -; CHECK-NEXT: local.get $push9=, 0 -; CHECK-NEXT: call $push0=, sinf, $pop9 -; CHECK-NEXT: call $push1=, cosf, $pop0 -; CHECK-NEXT: call $push2=, tanf, $pop1 -; CHECK-NEXT: call $push3=, asinf, $pop2 -; CHECK-NEXT: call $push4=, acosf, $pop3 -; CHECK-NEXT: call $push5=, atanf, $pop4 -; CHECK-NEXT: call $push6=, sinhf, $pop5 -; CHECK-NEXT: call $push7=, coshf, $pop6 -; CHECK-NEXT: call $push8=, tanhf, $pop7 -; CHECK-NEXT: return $pop8 +; CHECK-NEXT: local.get $push11=, 0 +; CHECK-NEXT: local.get $push10=, 0 +; CHECK-NEXT: call $push0=, sinf, $pop10 +; CHECK-NEXT: call $push1=, cosf, $pop0 +; CHECK-NEXT: call $push2=, tanf, $pop1 +; CHECK-NEXT: call $push3=, asinf, $pop2 +; CHECK-NEXT: call $push4=, acosf, $pop3 +; CHECK-NEXT: call $push5=, atanf, $pop4 +; CHECK-NEXT: call $push6=, sinhf, $pop5 +; CHECK-NEXT: call $push7=, coshf, $pop6 +; CHECK-NEXT: call $push8=, tanhf, $pop7 +; CHECK-NEXT: call $push9=, atan2f, $pop11, $pop8 +; CHECK-NEXT: return $pop9 %k = call float @llvm.sin.f32(float %x) @@ -230,5 +253,6 @@ define float @f32libcalls(float %x) { %f = call float @llvm.sinh.f32(float %e) %g = call float @llvm.cosh.f32(float %f) %h = call float @llvm.tanh.f32(float %g) - ret float %h + %i = call float @llvm.atan2.f32(float %x, float %h) + ret float %i } From 2c93598b32c217c605dc4eeea8e37eae2ba5799a Mon Sep 17 00:00:00 2001 From: Krzysztof Parzyszek Date: Thu, 17 Oct 2024 12:50:10 -0500 Subject: [PATCH 18/24] [flang] Update printing values in dump-parse-tree (#112709) Remove 'if std::string' that is covered by another branch of the if-statement. Add printing of 'bool' and 'int' values, since they have corresponding `GetNodeName` definitions. --- flang/include/flang/Parser/dump-parse-tree.h | 6 ++++-- flang/test/Parser/OpenMP/allocate-tree.f90 | 13 +++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h index 5d243b4e5d3e9a..ccbe5475d051e0 100644 --- a/flang/include/flang/Parser/dump-parse-tree.h +++ b/flang/include/flang/Parser/dump-parse-tree.h @@ -884,8 +884,10 @@ class ParseTreeDumper { } else if constexpr (HasSource::value) { return x.source.ToString(); #endif - } else if constexpr (std::is_same_v) { - return x; + } else if constexpr (std::is_same_v) { + return std::to_string(x); + } else if constexpr (std::is_same_v) { + return x ? "true" : "false"; } else { return ""; } diff --git a/flang/test/Parser/OpenMP/allocate-tree.f90 b/flang/test/Parser/OpenMP/allocate-tree.f90 index 9de257b00dc32f..bf413d591baf23 100644 --- a/flang/test/Parser/OpenMP/allocate-tree.f90 +++ b/flang/test/Parser/OpenMP/allocate-tree.f90 @@ -18,6 +18,19 @@ program allocate_tree allocate(w, xarray(4), zarray(t, z)) end program allocate_tree +!CHECK: | | DeclarationConstruct -> SpecificationConstruct -> TypeDeclarationStmt +!CHECK-NEXT: | | | DeclarationTypeSpec -> IntrinsicTypeSpec -> IntegerTypeSpec -> +!CHECK-NEXT: | | | AttrSpec -> Allocatable +!CHECK-NEXT: | | | EntityDecl +!CHECK-NEXT: | | | | Name = 'w' +!CHECK-NEXT: | | | EntityDecl +!CHECK-NEXT: | | | | Name = 'xarray' +!CHECK-NEXT: | | | | ArraySpec -> DeferredShapeSpecList -> int = '1' +!CHECK-NEXT: | | | EntityDecl +!CHECK-NEXT: | | | | Name = 'zarray' +!CHECK-NEXT: | | | | ArraySpec -> DeferredShapeSpecList -> int = '2' + + !CHECK: | | ExecutionPartConstruct -> ExecutableConstruct -> OpenMPConstruct -> OpenMPExecutableAllocate !CHECK-NEXT: | | | Verbatim !CHECK-NEXT: | | | OmpClauseList -> From ed7868de03c7b93809f87ed1a01103b926564feb Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Thu, 17 Oct 2024 11:07:43 -0700 Subject: [PATCH 19/24] [nfc][sanitizer] Replace mmap with InternalMmapVector in test (#112756) --- .../lib/sanitizer_common/tests/sanitizer_posix_test.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/compiler-rt/lib/sanitizer_common/tests/sanitizer_posix_test.cpp b/compiler-rt/lib/sanitizer_common/tests/sanitizer_posix_test.cpp index bed19d15a8ec77..803c8d39362e27 100644 --- a/compiler-rt/lib/sanitizer_common/tests/sanitizer_posix_test.cpp +++ b/compiler-rt/lib/sanitizer_common/tests/sanitizer_posix_test.cpp @@ -65,8 +65,8 @@ TEST(SanitizerCommon, PthreadDestructorIterations) { TEST(SanitizerCommon, IsAccessibleMemoryRange) { const int page_size = GetPageSize(); - uptr mem = (uptr)mmap(0, 3 * page_size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANON, -1, 0); + InternalMmapVector buffer(3 * page_size); + uptr mem = reinterpret_cast(buffer.data()); // Protect the middle page. mprotect((void *)(mem + page_size), page_size, PROT_NONE); EXPECT_TRUE(IsAccessibleMemoryRange(mem, page_size - 1)); @@ -78,8 +78,6 @@ TEST(SanitizerCommon, IsAccessibleMemoryRange) { EXPECT_TRUE(IsAccessibleMemoryRange(mem + 2 * page_size, page_size)); EXPECT_FALSE(IsAccessibleMemoryRange(mem, 3 * page_size)); EXPECT_FALSE(IsAccessibleMemoryRange(0x0, 2)); - - munmap((void *)mem, 3 * page_size); } } // namespace __sanitizer From ed3d05178274890fb804f43ae1bcdfd33b5fd8f0 Mon Sep 17 00:00:00 2001 From: OverMighty Date: Thu, 17 Oct 2024 20:44:23 +0200 Subject: [PATCH 20/24] [libc][math][c23] Add sinhf16 and coshf16 C23 math functions (#105947) Part of #95250. --- libc/config/gpu/entrypoints.txt | 2 + libc/config/linux/x86_64/entrypoints.txt | 2 + libc/docs/math/index.rst | 4 +- libc/spec/stdc.td | 4 + libc/src/math/CMakeLists.txt | 4 + libc/src/math/coshf16.h | 21 ++++ libc/src/math/generic/CMakeLists.txt | 39 ++++++ libc/src/math/generic/coshf16.cpp | 103 ++++++++++++++++ libc/src/math/generic/expxf16.h | 114 +++++++++++++++++ libc/src/math/generic/sinhf16.cpp | 144 ++++++++++++++++++++++ libc/src/math/sinhf16.h | 21 ++++ libc/test/src/math/CMakeLists.txt | 22 ++++ libc/test/src/math/coshf16_test.cpp | 40 ++++++ libc/test/src/math/sinhf16_test.cpp | 40 ++++++ libc/test/src/math/smoke/CMakeLists.txt | 26 ++++ libc/test/src/math/smoke/coshf16_test.cpp | 90 ++++++++++++++ libc/test/src/math/smoke/sinhf16_test.cpp | 88 +++++++++++++ 17 files changed, 762 insertions(+), 2 deletions(-) create mode 100644 libc/src/math/coshf16.h create mode 100644 libc/src/math/generic/coshf16.cpp create mode 100644 libc/src/math/generic/sinhf16.cpp create mode 100644 libc/src/math/sinhf16.h create mode 100644 libc/test/src/math/coshf16_test.cpp create mode 100644 libc/test/src/math/sinhf16_test.cpp create mode 100644 libc/test/src/math/smoke/coshf16_test.cpp create mode 100644 libc/test/src/math/smoke/sinhf16_test.cpp diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt index 251ad43ece8d05..4bb81f5d3b2de1 100644 --- a/libc/config/gpu/entrypoints.txt +++ b/libc/config/gpu/entrypoints.txt @@ -521,6 +521,7 @@ if(LIBC_TYPES_HAS_FLOAT16) libc.src.math.canonicalizef16 libc.src.math.ceilf16 libc.src.math.copysignf16 + libc.src.math.coshf16 libc.src.math.exp10f16 libc.src.math.exp10m1f16 libc.src.math.exp2f16 @@ -585,6 +586,7 @@ if(LIBC_TYPES_HAS_FLOAT16) libc.src.math.scalbnf16 libc.src.math.setpayloadf16 libc.src.math.setpayloadsigf16 + libc.src.math.sinhf16 libc.src.math.totalorderf16 libc.src.math.totalordermagf16 libc.src.math.truncf16 diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 3ca14ec03de3c7..39f451d6b5fc0e 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -610,6 +610,7 @@ if(LIBC_TYPES_HAS_FLOAT16) libc.src.math.canonicalizef16 libc.src.math.ceilf16 libc.src.math.copysignf16 + libc.src.math.coshf16 libc.src.math.exp10f16 libc.src.math.exp10m1f16 libc.src.math.exp2f16 @@ -678,6 +679,7 @@ if(LIBC_TYPES_HAS_FLOAT16) libc.src.math.scalbnf16 libc.src.math.setpayloadf16 libc.src.math.setpayloadsigf16 + libc.src.math.sinhf16 libc.src.math.sinpif16 libc.src.math.totalorderf16 libc.src.math.totalordermagf16 diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst index 95ac7f4f12f958..902645c9e00178 100644 --- a/libc/docs/math/index.rst +++ b/libc/docs/math/index.rst @@ -278,7 +278,7 @@ Higher Math Functions +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | cos | |check| | |check| | | | | 7.12.4.5 | F.10.1.5 | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ -| cosh | |check| | | | | | 7.12.5.4 | F.10.2.4 | +| cosh | |check| | | | |check| | | 7.12.5.4 | F.10.2.4 | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | cospi | |check| | | | | | 7.12.4.12 | F.10.1.12 | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ @@ -340,7 +340,7 @@ Higher Math Functions +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | sincos | |check| | |check| | | | | | | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ -| sinh | |check| | | | | | 7.12.5.5 | F.10.2.5 | +| sinh | |check| | | | |check| | | 7.12.5.5 | F.10.2.5 | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | sinpi | |check| | | | |check| | | 7.12.4.13 | F.10.1.13 | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td index ea032ba5f66e71..e4e46e7e13a586 100644 --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -792,7 +792,11 @@ def StdC : StandardSpec<"stdc"> { FunctionSpec<"pow", RetValSpec, [ArgSpec, ArgSpec]>, FunctionSpec<"coshf", RetValSpec, [ArgSpec]>, + GuardedFunctionSpec<"coshf16", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, + FunctionSpec<"sinhf", RetValSpec, [ArgSpec]>, + GuardedFunctionSpec<"sinhf16", RetValSpec, [ArgSpec], "LIBC_TYPES_HAS_FLOAT16">, + FunctionSpec<"tanhf", RetValSpec, [ArgSpec]>, FunctionSpec<"acosf", RetValSpec, [ArgSpec]>, diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt index ecf63968481458..2f76b57d19e99d 100644 --- a/libc/src/math/CMakeLists.txt +++ b/libc/src/math/CMakeLists.txt @@ -89,8 +89,11 @@ add_math_entrypoint_object(copysignf128) add_math_entrypoint_object(cos) add_math_entrypoint_object(cosf) + add_math_entrypoint_object(cosh) add_math_entrypoint_object(coshf) +add_math_entrypoint_object(coshf16) + add_math_entrypoint_object(cospif) add_math_entrypoint_object(daddl) @@ -481,6 +484,7 @@ add_math_entrypoint_object(sinpif16) add_math_entrypoint_object(sinh) add_math_entrypoint_object(sinhf) +add_math_entrypoint_object(sinhf16) add_math_entrypoint_object(sqrt) add_math_entrypoint_object(sqrtf) diff --git a/libc/src/math/coshf16.h b/libc/src/math/coshf16.h new file mode 100644 index 00000000000000..55c9d4941d4ae1 --- /dev/null +++ b/libc/src/math/coshf16.h @@ -0,0 +1,21 @@ +//===-- Implementation header for coshf16 -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_COSHF16_H +#define LLVM_LIBC_SRC_MATH_COSHF16_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +float16 coshf16(float16 x); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_COSHF16_H diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index ffa74970a2ab5d..4a3de8f0400d62 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -4218,6 +4218,25 @@ add_entrypoint_object( -O3 ) +add_entrypoint_object( + coshf16 + SRCS + coshf16.cpp + HDRS + ../coshf16.h + DEPENDS + .expxf16 + libc.hdr.errno_macros + libc.hdr.fenv_macros + libc.src.__support.FPUtil.except_value_utils + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.rounding_mode + libc.src.__support.macros.optimization + COMPILE_OPTIONS + -O3 +) + add_entrypoint_object( sinhf SRCS @@ -4233,6 +4252,25 @@ add_entrypoint_object( -O3 ) +add_entrypoint_object( + sinhf16 + SRCS + sinhf16.cpp + HDRS + ../sinhf16.h + DEPENDS + .expxf16 + libc.hdr.errno_macros + libc.hdr.fenv_macros + libc.src.__support.FPUtil.except_value_utils + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.rounding_mode + libc.src.__support.macros.optimization + COMPILE_OPTIONS + -O3 +) + add_entrypoint_object( tanhf SRCS @@ -5297,6 +5335,7 @@ add_header_library( expxf16.h DEPENDS libc.src.__support.CPP.array + libc.src.__support.FPUtil.cast libc.src.__support.FPUtil.fp_bits libc.src.__support.FPUtil.multiply_add libc.src.__support.FPUtil.nearest_integer diff --git a/libc/src/math/generic/coshf16.cpp b/libc/src/math/generic/coshf16.cpp new file mode 100644 index 00000000000000..cca7581c70e0e3 --- /dev/null +++ b/libc/src/math/generic/coshf16.cpp @@ -0,0 +1,103 @@ +//===-- Half-precision cosh(x) function -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/coshf16.h" +#include "expxf16.h" +#include "hdr/errno_macros.h" +#include "hdr/fenv_macros.h" +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/except_value_utils.h" +#include "src/__support/FPUtil/rounding_mode.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" +#include "src/__support/macros/optimization.h" + +namespace LIBC_NAMESPACE_DECL { + +static constexpr fputil::ExceptValues COSHF16_EXCEPTS_POS = {{ + // x = 0x1.6ap-5, coshf16(x) = 0x1p+0 (RZ) + {0x29a8U, 0x3c00U, 1U, 0U, 1U}, + // x = 0x1.8c4p+0, coshf16(x) = 0x1.3a8p+1 (RZ) + {0x3e31U, 0x40eaU, 1U, 0U, 0U}, + // x = 0x1.994p+0, coshf16(x) = 0x1.498p+1 (RZ) + {0x3e65U, 0x4126U, 1U, 0U, 0U}, + // x = 0x1.b6p+0, coshf16(x) = 0x1.6d8p+1 (RZ) + {0x3ed8U, 0x41b6U, 1U, 0U, 1U}, + // x = 0x1.aap+1, coshf16(x) = 0x1.be8p+3 (RZ) + {0x42a8U, 0x4afaU, 1U, 0U, 1U}, + // x = 0x1.cc4p+1, coshf16(x) = 0x1.23cp+4 (RZ) + {0x4331U, 0x4c8fU, 1U, 0U, 0U}, + // x = 0x1.288p+2, coshf16(x) = 0x1.9b4p+5 (RZ) + {0x44a2U, 0x526dU, 1U, 0U, 0U}, + // x = 0x1.958p+2, coshf16(x) = 0x1.1a4p+8 (RZ) + {0x4656U, 0x5c69U, 1U, 0U, 0U}, + // x = 0x1.5fp+3, coshf16(x) = 0x1.c54p+14 (RZ) + {0x497cU, 0x7715U, 1U, 0U, 1U}, +}}; + +static constexpr fputil::ExceptValues COSHF16_EXCEPTS_NEG = {{ + // x = -0x1.6ap-5, coshf16(x) = 0x1p+0 (RZ) + {0xa9a8U, 0x3c00U, 1U, 0U, 1U}, + // x = -0x1.b6p+0, coshf16(x) = 0x1.6d8p+1 (RZ) + {0xbed8U, 0x41b6U, 1U, 0U, 1U}, + // x = -0x1.288p+2, coshf16(x) = 0x1.9b4p+5 (RZ) + {0xc4a2U, 0x526dU, 1U, 0U, 0U}, + // x = -0x1.5fp+3, coshf16(x) = 0x1.c54p+14 (RZ) + {0xc97cU, 0x7715U, 1U, 0U, 1U}, +}}; + +LLVM_LIBC_FUNCTION(float16, coshf16, (float16 x)) { + using FPBits = fputil::FPBits; + FPBits x_bits(x); + + uint16_t x_u = x_bits.uintval(); + uint16_t x_abs = x_u & 0x7fffU; + + // When |x| >= acosh(2^16), or x is NaN. + if (LIBC_UNLIKELY(x_abs >= 0x49e5U)) { + // cosh(NaN) = NaN + if (x_bits.is_nan()) { + if (x_bits.is_signaling_nan()) { + fputil::raise_except_if_required(FE_INVALID); + return FPBits::quiet_nan().get_val(); + } + + return x; + } + + // When |x| >= acosh(2^16). + if (x_abs >= 0x49e5U) { + // cosh(+/-inf) = +inf + if (x_bits.is_inf()) + return FPBits::inf().get_val(); + + switch (fputil::quick_get_round()) { + case FE_TONEAREST: + case FE_UPWARD: + fputil::set_errno_if_required(ERANGE); + fputil::raise_except_if_required(FE_OVERFLOW | FE_INEXACT); + return FPBits::inf().get_val(); + default: + return FPBits::max_normal().get_val(); + } + } + } + + if (x_bits.is_pos()) { + if (auto r = COSHF16_EXCEPTS_POS.lookup(x_u); LIBC_UNLIKELY(r.has_value())) + return r.value(); + } else { + if (auto r = COSHF16_EXCEPTS_NEG.lookup(x_u); LIBC_UNLIKELY(r.has_value())) + return r.value(); + } + + return eval_sinh_or_cosh(x); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/expxf16.h b/libc/src/math/generic/expxf16.h index aba99a2914b41d..7202b1b113190e 100644 --- a/libc/src/math/generic/expxf16.h +++ b/libc/src/math/generic/expxf16.h @@ -12,6 +12,7 @@ #include "src/__support/CPP/array.h" #include "src/__support/FPUtil/FPBits.h" #include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/cast.h" #include "src/__support/FPUtil/multiply_add.h" #include "src/__support/FPUtil/nearest_integer.h" #include "src/__support/macros/attributes.h" @@ -174,6 +175,119 @@ LIBC_INLINE ExpRangeReduction exp10_range_reduction(float16 x) { return {exp2_hi_mid, exp10_lo}; } +// Generated by Sollya with the following commands: +// > display = hexadecimal; +// > round(log2(exp(1)), SG, RN); +static constexpr float LOG2F_E = 0x1.715476p+0f; + +// Generated by Sollya with the following commands: +// > display = hexadecimal; +// > round(log(2), SG, RN); +static constexpr float LOGF_2 = 0x1.62e43p-1f; + +// Generated by Sollya with the following commands: +// > display = hexadecimal; +// > for i from 0 to 31 do printsingle(round(2^(i * 2^-5), SG, RN)); +static constexpr cpp::array EXP2_MID_5_BITS = { + 0x3f80'0000U, 0x3f82'cd87U, 0x3f85'aac3U, 0x3f88'980fU, 0x3f8b'95c2U, + 0x3f8e'a43aU, 0x3f91'c3d3U, 0x3f94'f4f0U, 0x3f98'37f0U, 0x3f9b'8d3aU, + 0x3f9e'f532U, 0x3fa2'7043U, 0x3fa5'fed7U, 0x3fa9'a15bU, 0x3fad'583fU, + 0x3fb1'23f6U, 0x3fb5'04f3U, 0x3fb8'fbafU, 0x3fbd'08a4U, 0x3fc1'2c4dU, + 0x3fc5'672aU, 0x3fc9'b9beU, 0x3fce'248cU, 0x3fd2'a81eU, 0x3fd7'44fdU, + 0x3fdb'fbb8U, 0x3fe0'ccdfU, 0x3fe5'b907U, 0x3fea'c0c7U, 0x3fef'e4baU, + 0x3ff5'257dU, 0x3ffa'83b3U, +}; + +// This function correctly calculates sinh(x) and cosh(x) by calculating exp(x) +// and exp(-x) simultaneously. +// To compute e^x, we perform the following range reduction: +// find hi, mid, lo such that: +// x = (hi + mid) * log(2) + lo, in which +// hi is an integer, +// 0 <= mid * 2^5 < 32 is an integer +// -2^(-5) <= lo * log2(e) <= 2^-5. +// In particular, +// hi + mid = round(x * log2(e) * 2^5) * 2^(-5). +// Then, +// e^x = 2^(hi + mid) * e^lo = 2^hi * 2^mid * e^lo. +// We store 2^mid in the lookup table EXP2_MID_5_BITS, and compute 2^hi * 2^mid +// by adding hi to the exponent field of 2^mid. +// e^lo is computed using a degree-3 minimax polynomial generated by Sollya: +// e^lo ~ P(lo) +// = 1 + lo + c2 * lo^2 + ... + c5 * lo^5 +// = (1 + c2*lo^2 + c4*lo^4) + lo * (1 + c3*lo^2 + c5*lo^4) +// = P_even + lo * P_odd +// To compute e^(-x), notice that: +// e^(-x) = 2^(-(hi + mid)) * e^(-lo) +// ~ 2^(-(hi + mid)) * P(-lo) +// = 2^(-(hi + mid)) * (P_even - lo * P_odd) +// So: +// sinh(x) = (e^x - e^(-x)) / 2 +// ~ 0.5 * (2^(hi + mid) * (P_even + lo * P_odd) - +// 2^(-(hi + mid)) * (P_even - lo * P_odd)) +// = 0.5 * (P_even * (2^(hi + mid) - 2^(-(hi + mid))) + +// lo * P_odd * (2^(hi + mid) + 2^(-(hi + mid)))) +// And similarly: +// cosh(x) = (e^x + e^(-x)) / 2 +// ~ 0.5 * (P_even * (2^(hi + mid) + 2^(-(hi + mid))) + +// lo * P_odd * (2^(hi + mid) - 2^(-(hi + mid)))) +// The main point of these formulas is that the expensive part of calculating +// the polynomials approximating lower parts of e^x and e^(-x) is shared and +// only done once. +template LIBC_INLINE float16 eval_sinh_or_cosh(float16 x) { + float xf = x; + float kf = fputil::nearest_integer(xf * (LOG2F_E * 0x1.0p+5f)); + int x_hi_mid_p = static_cast(kf); + int x_hi_mid_m = -x_hi_mid_p; + + unsigned x_hi_p = static_cast(x_hi_mid_p) >> 5; + unsigned x_hi_m = static_cast(x_hi_mid_m) >> 5; + unsigned x_mid_p = static_cast(x_hi_mid_p) & 0x1f; + unsigned x_mid_m = static_cast(x_hi_mid_m) & 0x1f; + + uint32_t exp2_hi_mid_bits_p = + EXP2_MID_5_BITS[x_mid_p] + + static_cast(x_hi_p << fputil::FPBits::FRACTION_LEN); + uint32_t exp2_hi_mid_bits_m = + EXP2_MID_5_BITS[x_mid_m] + + static_cast(x_hi_m << fputil::FPBits::FRACTION_LEN); + // exp2_hi_mid_p = 2^(hi + mid) + float exp2_hi_mid_p = fputil::FPBits(exp2_hi_mid_bits_p).get_val(); + // exp2_hi_mid_m = 2^(-(hi + mid)) + float exp2_hi_mid_m = fputil::FPBits(exp2_hi_mid_bits_m).get_val(); + + // exp2_hi_mid_sum = 2^(hi + mid) + 2^(-(hi + mid)) + float exp2_hi_mid_sum = exp2_hi_mid_p + exp2_hi_mid_m; + // exp2_hi_mid_diff = 2^(hi + mid) - 2^(-(hi + mid)) + float exp2_hi_mid_diff = exp2_hi_mid_p - exp2_hi_mid_m; + + // lo = x - (hi + mid) = round(x * log2(e) * 2^5) * log(2) * (-2^(-5)) + x + float lo = fputil::multiply_add(kf, LOGF_2 * -0x1.0p-5f, xf); + float lo_sq = lo * lo; + + // Degree-3 minimax polynomial generated by Sollya with the following + // commands: + // > display = hexadecimal; + // > P = fpminimax(expm1(x)/x, 2, [|SG...|], [-2^-5, 2^-5]); + // > 1 + x * P; + constexpr cpp::array COEFFS = {0x1p+0f, 0x1p+0f, 0x1.0004p-1f, + 0x1.555778p-3f}; + float half_p_odd = + fputil::polyeval(lo_sq, COEFFS[1] * 0.5f, COEFFS[3] * 0.5f); + float half_p_even = + fputil::polyeval(lo_sq, COEFFS[0] * 0.5f, COEFFS[2] * 0.5f); + + // sinh(x) = lo * (0.5 * P_odd * (2^(hi + mid) + 2^(-(hi + mid)))) + + // (0.5 * P_even * (2^(hi + mid) - 2^(-(hi + mid)))) + if constexpr (IsSinh) + return fputil::cast(fputil::multiply_add( + lo, half_p_odd * exp2_hi_mid_sum, half_p_even * exp2_hi_mid_diff)); + // cosh(x) = lo * (0.5 * P_odd * (2^(hi + mid) - 2^(-(hi + mid)))) + + // (0.5 * P_even * (2^(hi + mid) + 2^(-(hi + mid)))) + return fputil::cast(fputil::multiply_add( + lo, half_p_odd * exp2_hi_mid_diff, half_p_even * exp2_hi_mid_sum)); +} + } // namespace LIBC_NAMESPACE_DECL #endif // LLVM_LIBC_SRC_MATH_GENERIC_EXPXF16_H diff --git a/libc/src/math/generic/sinhf16.cpp b/libc/src/math/generic/sinhf16.cpp new file mode 100644 index 00000000000000..e2dd009dc72c6d --- /dev/null +++ b/libc/src/math/generic/sinhf16.cpp @@ -0,0 +1,144 @@ +//===-- Half-precision sinh(x) function -----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/sinhf16.h" +#include "expxf16.h" +#include "hdr/errno_macros.h" +#include "hdr/fenv_macros.h" +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/except_value_utils.h" +#include "src/__support/FPUtil/rounding_mode.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" +#include "src/__support/macros/optimization.h" + +namespace LIBC_NAMESPACE_DECL { + +static constexpr fputil::ExceptValues SINHF16_EXCEPTS_POS = {{ + // x = 0x1.714p-5, sinhf16(x) = 0x1.714p-5 (RZ) + {0x29c5U, 0x29c5U, 1U, 0U, 1U}, + // x = 0x1.25p-4, sinhf16(x) = 0x1.25p-4 (RZ) + {0x2c94U, 0x2c94U, 1U, 0U, 1U}, + // x = 0x1.f5p-4, sinhf16(x) = 0x1.f64p-4 (RZ) + {0x2fd4U, 0x2fd9U, 1U, 0U, 0U}, + // x = 0x1.b1cp-3, sinhf16(x) = 0x1.b4cp-3 (RZ) + {0x32c7U, 0x32d3U, 1U, 0U, 1U}, + // x = 0x1.6e8p-2, sinhf16(x) = 0x1.764p-2 (RZ) + {0x35baU, 0x35d9U, 1U, 0U, 1U}, + // x = 0x1.6b4p-1, sinhf16(x) = 0x1.8a4p-1 (RZ) + {0x39adU, 0x3a29U, 1U, 0U, 1U}, + // x = 0x1.a58p-1, sinhf16(x) = 0x1.d68p-1 (RZ) + {0x3a96U, 0x3b5aU, 1U, 0U, 1U}, + // x = 0x1.574p+0, sinhf16(x) = 0x1.c78p+0 (RZ) + {0x3d5dU, 0x3f1eU, 1U, 0U, 1U}, + // x = 0x1.648p+1, sinhf16(x) = 0x1.024p+3 (RZ) + {0x4192U, 0x4809U, 1U, 0U, 0U}, + // x = 0x1.cdcp+1, sinhf16(x) = 0x1.26cp+4 (RZ) + {0x4337U, 0x4c9bU, 1U, 0U, 0U}, + // x = 0x1.d0cp+1, sinhf16(x) = 0x1.2d8p+4 (RZ) + {0x4343U, 0x4cb6U, 1U, 0U, 1U}, + // x = 0x1.018p+2, sinhf16(x) = 0x1.bfp+4 (RZ) + {0x4406U, 0x4efcU, 1U, 0U, 0U}, + // x = 0x1.2fcp+2, sinhf16(x) = 0x1.cc4p+5 (RZ) + {0x44bfU, 0x5331U, 1U, 0U, 1U}, + // x = 0x1.4ecp+2, sinhf16(x) = 0x1.75cp+6 (RZ) + {0x453bU, 0x55d7U, 1U, 0U, 0U}, + // x = 0x1.8a4p+2, sinhf16(x) = 0x1.d94p+7 (RZ) + {0x4629U, 0x5b65U, 1U, 0U, 1U}, + // x = 0x1.5fp+3, sinhf16(x) = 0x1.c54p+14 (RZ) + {0x497cU, 0x7715U, 1U, 0U, 1U}, +}}; + +static constexpr fputil::ExceptValues SINHF16_EXCEPTS_NEG = {{ + // x = -0x1.714p-5, sinhf16(x) = -0x1.714p-5 (RZ) + {0xa9c5U, 0xa9c5U, 0U, 1U, 1U}, + // x = -0x1.25p-4, sinhf16(x) = -0x1.25p-4 (RZ) + {0xac94U, 0xac94U, 0U, 1U, 1U}, + // x = -0x1.f5p-4, sinhf16(x) = -0x1.f64p-4 (RZ) + {0xafd4U, 0xafd9U, 0U, 1U, 0U}, + // x = -0x1.6e8p-2, sinhf16(x) = -0x1.764p-2 (RZ) + {0xb5baU, 0xb5d9U, 0U, 1U, 1U}, + // x = -0x1.a58p-1, sinhf16(x) = -0x1.d68p-1 (RZ) + {0xba96U, 0xbb5aU, 0U, 1U, 1U}, + // x = -0x1.cdcp+1, sinhf16(x) = -0x1.26cp+4 (RZ) + {0xc337U, 0xcc9bU, 0U, 1U, 0U}, + // x = -0x1.d0cp+1, sinhf16(x) = -0x1.2d8p+4 (RZ) + {0xc343U, 0xccb6U, 0U, 1U, 1U}, + // x = -0x1.018p+2, sinhf16(x) = -0x1.bfp+4 (RZ) + {0xc406U, 0xcefcU, 0U, 1U, 0U}, + // x = -0x1.2fcp+2, sinhf16(x) = -0x1.cc4p+5 (RZ) + {0xc4bfU, 0xd331U, 0U, 1U, 1U}, + // x = -0x1.4ecp+2, sinhf16(x) = -0x1.75cp+6 (RZ) + {0xc53bU, 0xd5d7U, 0U, 1U, 0U}, + // x = -0x1.8a4p+2, sinhf16(x) = -0x1.d94p+7 (RZ) + {0xc629U, 0xdb65U, 0U, 1U, 1U}, + // x = -0x1.5fp+3, sinhf16(x) = -0x1.c54p+14 (RZ) + {0xc97cU, 0xf715U, 0U, 1U, 1U}, +}}; + +LLVM_LIBC_FUNCTION(float16, sinhf16, (float16 x)) { + using FPBits = fputil::FPBits; + FPBits x_bits(x); + + uint16_t x_u = x_bits.uintval(); + uint16_t x_abs = x_u & 0x7fffU; + + // When |x| = 0, or -2^(-14) <= x <= -2^(-9), or |x| >= asinh(2^16), or x is + // NaN. + if (LIBC_UNLIKELY(x_abs == 0U || (x_u >= 0x8400U && x_u <= 0xa400U) || + x_abs >= 0x49e5U)) { + // sinh(NaN) = NaN + if (x_bits.is_nan()) { + if (x_bits.is_signaling_nan()) { + fputil::raise_except_if_required(FE_INVALID); + return FPBits::quiet_nan().get_val(); + } + + return x; + } + + // sinh(+/-0) = sinh(+/-0) + if (x_abs == 0U) + return FPBits::zero(x_bits.sign()).get_val(); + + // When |x| >= asinh(2^16). + if (x_abs >= 0x49e5U) { + // sinh(+/-inf) = +/-inf + if (x_bits.is_inf()) + return FPBits::inf(x_bits.sign()).get_val(); + + int rounding_mode = fputil::quick_get_round(); + if (rounding_mode == FE_TONEAREST || + (x_bits.is_pos() && rounding_mode == FE_UPWARD) || + (x_bits.is_neg() && rounding_mode == FE_DOWNWARD)) { + fputil::set_errno_if_required(ERANGE); + fputil::raise_except_if_required(FE_OVERFLOW | FE_INEXACT); + return FPBits::inf(x_bits.sign()).get_val(); + } + return FPBits::max_normal(x_bits.sign()).get_val(); + } + + // When -2^(-14) <= x <= -2^(-9). + if (fputil::fenv_is_round_down()) + return FPBits(static_cast(x_u + 1)).get_val(); + return FPBits(static_cast(x_u)).get_val(); + } + + if (x_bits.is_pos()) { + if (auto r = SINHF16_EXCEPTS_POS.lookup(x_u); LIBC_UNLIKELY(r.has_value())) + return r.value(); + } else { + if (auto r = SINHF16_EXCEPTS_NEG.lookup(x_u); LIBC_UNLIKELY(r.has_value())) + return r.value(); + } + + return eval_sinh_or_cosh(x); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/sinhf16.h b/libc/src/math/sinhf16.h new file mode 100644 index 00000000000000..8b8c1b64e7ec8d --- /dev/null +++ b/libc/src/math/sinhf16.h @@ -0,0 +1,21 @@ +//===-- Implementation header for sinhf16 -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_SINHF16_H +#define LLVM_LIBC_SRC_MATH_SINHF16_H + +#include "src/__support/macros/config.h" +#include "src/__support/macros/properties/types.h" + +namespace LIBC_NAMESPACE_DECL { + +float16 sinhf16(float16 x); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_MATH_SINHF16_H diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt index 5dff0b49125b96..381a3f478f3761 100644 --- a/libc/test/src/math/CMakeLists.txt +++ b/libc/test/src/math/CMakeLists.txt @@ -1916,6 +1916,17 @@ add_fp_unittest( libc.src.__support.FPUtil.fp_bits ) +add_fp_unittest( + coshf16_test + NEED_MPFR + SUITE + libc-math-unittests + SRCS + coshf16_test.cpp + DEPENDS + libc.src.math.coshf16 +) + add_fp_unittest( sinhf_test NEED_MPFR @@ -1932,6 +1943,17 @@ add_fp_unittest( libc.src.__support.FPUtil.fp_bits ) +add_fp_unittest( + sinhf16_test + NEED_MPFR + SUITE + libc-math-unittests + SRCS + sinhf16_test.cpp + DEPENDS + libc.src.math.sinhf16 +) + add_fp_unittest( tanhf_test NEED_MPFR diff --git a/libc/test/src/math/coshf16_test.cpp b/libc/test/src/math/coshf16_test.cpp new file mode 100644 index 00000000000000..a0d1fd21104788 --- /dev/null +++ b/libc/test/src/math/coshf16_test.cpp @@ -0,0 +1,40 @@ +//===-- Exhaustive test for coshf16 ---------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/coshf16.h" +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" +#include "utils/MPFRWrapper/MPFRUtils.h" + +using LlvmLibcCoshf16Test = LIBC_NAMESPACE::testing::FPTest; + +namespace mpfr = LIBC_NAMESPACE::testing::mpfr; + +// Range: [0, Inf]; +static constexpr uint16_t POS_START = 0x0000U; +static constexpr uint16_t POS_STOP = 0x7c00U; + +// Range: [-Inf, 0]; +static constexpr uint16_t NEG_START = 0x8000U; +static constexpr uint16_t NEG_STOP = 0xfc00U; + +TEST_F(LlvmLibcCoshf16Test, PositiveRange) { + for (uint16_t v = POS_START; v <= POS_STOP; ++v) { + float16 x = FPBits(v).get_val(); + EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Cosh, x, + LIBC_NAMESPACE::coshf16(x), 0.5); + } +} + +TEST_F(LlvmLibcCoshf16Test, NegativeRange) { + for (uint16_t v = NEG_START; v <= NEG_STOP; ++v) { + float16 x = FPBits(v).get_val(); + EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Cosh, x, + LIBC_NAMESPACE::coshf16(x), 0.5); + } +} diff --git a/libc/test/src/math/sinhf16_test.cpp b/libc/test/src/math/sinhf16_test.cpp new file mode 100644 index 00000000000000..a16ab9279c4576 --- /dev/null +++ b/libc/test/src/math/sinhf16_test.cpp @@ -0,0 +1,40 @@ +//===-- Exhaustive test for sinhf16 ---------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/sinhf16.h" +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" +#include "utils/MPFRWrapper/MPFRUtils.h" + +using LlvmLibcSinhf16Test = LIBC_NAMESPACE::testing::FPTest; + +namespace mpfr = LIBC_NAMESPACE::testing::mpfr; + +// Range: [0, Inf]; +static constexpr uint16_t POS_START = 0x0000U; +static constexpr uint16_t POS_STOP = 0x7c00U; + +// Range: [-Inf, 0]; +static constexpr uint16_t NEG_START = 0x8000U; +static constexpr uint16_t NEG_STOP = 0xfc00U; + +TEST_F(LlvmLibcSinhf16Test, PositiveRange) { + for (uint16_t v = POS_START; v <= POS_STOP; ++v) { + float16 x = FPBits(v).get_val(); + EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Sinh, x, + LIBC_NAMESPACE::sinhf16(x), 0.5); + } +} + +TEST_F(LlvmLibcSinhf16Test, NegativeRange) { + for (uint16_t v = NEG_START; v <= NEG_STOP; ++v) { + float16 x = FPBits(v).get_val(); + EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Sinh, x, + LIBC_NAMESPACE::sinhf16(x), 0.5); + } +} diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt index 6b3623dc0d0dbf..f713430ee27ce8 100644 --- a/libc/test/src/math/smoke/CMakeLists.txt +++ b/libc/test/src/math/smoke/CMakeLists.txt @@ -3728,6 +3728,19 @@ add_fp_unittest( libc.src.__support.FPUtil.fp_bits ) +add_fp_unittest( + coshf16_test + SUITE + libc-math-smoke-tests + SRCS + coshf16_test.cpp + DEPENDS + libc.hdr.fenv_macros + libc.src.errno.errno + libc.src.math.coshf16 + libc.src.__support.FPUtil.cast +) + add_fp_unittest( sinhf_test SUITE @@ -3741,6 +3754,19 @@ add_fp_unittest( libc.src.__support.FPUtil.fp_bits ) +add_fp_unittest( + sinhf16_test + SUITE + libc-math-smoke-tests + SRCS + sinhf16_test.cpp + DEPENDS + libc.hdr.fenv_macros + libc.src.errno.errno + libc.src.math.sinhf16 + libc.src.__support.FPUtil.cast +) + add_fp_unittest( tanhf_test SUITE diff --git a/libc/test/src/math/smoke/coshf16_test.cpp b/libc/test/src/math/smoke/coshf16_test.cpp new file mode 100644 index 00000000000000..08d05ecce86baa --- /dev/null +++ b/libc/test/src/math/smoke/coshf16_test.cpp @@ -0,0 +1,90 @@ +//===-- Unittests for coshf16 ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "hdr/fenv_macros.h" +#include "src/__support/FPUtil/cast.h" +#include "src/errno/libc_errno.h" +#include "src/math/coshf16.h" +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" + +using LlvmLibcCoshf16Test = LIBC_NAMESPACE::testing::FPTest; + +TEST_F(LlvmLibcCoshf16Test, SpecialNumbers) { + LIBC_NAMESPACE::libc_errno = 0; + + EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::coshf16(aNaN)); + EXPECT_MATH_ERRNO(0); + + EXPECT_FP_EQ_WITH_EXCEPTION(aNaN, LIBC_NAMESPACE::coshf16(sNaN), FE_INVALID); + EXPECT_MATH_ERRNO(0); + + EXPECT_FP_EQ_ALL_ROUNDING(inf, LIBC_NAMESPACE::coshf16(inf)); + EXPECT_MATH_ERRNO(0); + + EXPECT_FP_EQ_ALL_ROUNDING(inf, LIBC_NAMESPACE::coshf16(neg_inf)); + EXPECT_MATH_ERRNO(0); + + EXPECT_FP_EQ_ALL_ROUNDING(LIBC_NAMESPACE::fputil::cast(1.0), + LIBC_NAMESPACE::coshf16(zero)); + EXPECT_MATH_ERRNO(0); + + EXPECT_FP_EQ_ALL_ROUNDING(LIBC_NAMESPACE::fputil::cast(1.0), + LIBC_NAMESPACE::coshf16(neg_zero)); + EXPECT_MATH_ERRNO(0); +} + +TEST_F(LlvmLibcCoshf16Test, Overflow) { + LIBC_NAMESPACE::libc_errno = 0; + + EXPECT_FP_EQ_WITH_EXCEPTION(inf, LIBC_NAMESPACE::coshf16(max_normal), + FE_OVERFLOW | FE_INEXACT); + EXPECT_MATH_ERRNO(ERANGE); + + EXPECT_FP_EQ_WITH_EXCEPTION(inf, LIBC_NAMESPACE::coshf16(neg_max_normal), + FE_OVERFLOW | FE_INEXACT); + EXPECT_MATH_ERRNO(ERANGE); + + // round(acosh(2^16), HP, RU); + float16 x = LIBC_NAMESPACE::fputil::cast(0x1.794p+3); + + EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_NEAREST(inf, LIBC_NAMESPACE::coshf16(x), + FE_OVERFLOW | FE_INEXACT); + EXPECT_MATH_ERRNO(ERANGE); + + EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_UPWARD(inf, LIBC_NAMESPACE::coshf16(x), + FE_OVERFLOW | FE_INEXACT); + EXPECT_MATH_ERRNO(ERANGE); + + EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_DOWNWARD( + max_normal, LIBC_NAMESPACE::coshf16(x), FE_INEXACT); + EXPECT_MATH_ERRNO(0); + + EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_TOWARD_ZERO( + max_normal, LIBC_NAMESPACE::coshf16(x), FE_INEXACT); + EXPECT_MATH_ERRNO(0); + + // round(-acosh(2^16), HP, RD); + x = LIBC_NAMESPACE::fputil::cast(-0x1.794p+3); + + EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_NEAREST(inf, LIBC_NAMESPACE::coshf16(x), + FE_OVERFLOW | FE_INEXACT); + EXPECT_MATH_ERRNO(ERANGE); + + EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_UPWARD(inf, LIBC_NAMESPACE::coshf16(x), + FE_OVERFLOW | FE_INEXACT); + EXPECT_MATH_ERRNO(ERANGE); + + EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_DOWNWARD( + max_normal, LIBC_NAMESPACE::coshf16(x), FE_INEXACT); + EXPECT_MATH_ERRNO(0); + + EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_TOWARD_ZERO( + max_normal, LIBC_NAMESPACE::coshf16(x), FE_INEXACT); + EXPECT_MATH_ERRNO(0); +} diff --git a/libc/test/src/math/smoke/sinhf16_test.cpp b/libc/test/src/math/smoke/sinhf16_test.cpp new file mode 100644 index 00000000000000..4f21d33ba78e0c --- /dev/null +++ b/libc/test/src/math/smoke/sinhf16_test.cpp @@ -0,0 +1,88 @@ +//===-- Unittests for sinhf16 ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "hdr/fenv_macros.h" +#include "src/__support/FPUtil/cast.h" +#include "src/errno/libc_errno.h" +#include "src/math/sinhf16.h" +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" + +using LlvmLibcSinhf16Test = LIBC_NAMESPACE::testing::FPTest; + +TEST_F(LlvmLibcSinhf16Test, SpecialNumbers) { + LIBC_NAMESPACE::libc_errno = 0; + + EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::sinhf16(aNaN)); + EXPECT_MATH_ERRNO(0); + + EXPECT_FP_EQ_WITH_EXCEPTION(aNaN, LIBC_NAMESPACE::sinhf16(sNaN), FE_INVALID); + EXPECT_MATH_ERRNO(0); + + EXPECT_FP_EQ_ALL_ROUNDING(inf, LIBC_NAMESPACE::sinhf16(inf)); + EXPECT_MATH_ERRNO(0); + + EXPECT_FP_EQ_ALL_ROUNDING(neg_inf, LIBC_NAMESPACE::sinhf16(neg_inf)); + EXPECT_MATH_ERRNO(0); + + EXPECT_FP_EQ_ALL_ROUNDING(zero, LIBC_NAMESPACE::sinhf16(zero)); + EXPECT_MATH_ERRNO(0); + + EXPECT_FP_EQ_ALL_ROUNDING(neg_zero, LIBC_NAMESPACE::sinhf16(neg_zero)); + EXPECT_MATH_ERRNO(0); +} + +TEST_F(LlvmLibcSinhf16Test, Overflow) { + LIBC_NAMESPACE::libc_errno = 0; + + EXPECT_FP_EQ_WITH_EXCEPTION(inf, LIBC_NAMESPACE::sinhf16(max_normal), + FE_OVERFLOW | FE_INEXACT); + EXPECT_MATH_ERRNO(ERANGE); + + EXPECT_FP_EQ_WITH_EXCEPTION(neg_inf, LIBC_NAMESPACE::sinhf16(neg_max_normal), + FE_OVERFLOW | FE_INEXACT); + EXPECT_MATH_ERRNO(ERANGE); + + // round(asinh(2^16), HP, RU); + float16 x = LIBC_NAMESPACE::fputil::cast(0x1.794p+3); + + EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_NEAREST(inf, LIBC_NAMESPACE::sinhf16(x), + FE_OVERFLOW | FE_INEXACT); + EXPECT_MATH_ERRNO(ERANGE); + + EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_UPWARD(inf, LIBC_NAMESPACE::sinhf16(x), + FE_OVERFLOW | FE_INEXACT); + EXPECT_MATH_ERRNO(ERANGE); + + EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_DOWNWARD( + max_normal, LIBC_NAMESPACE::sinhf16(x), FE_INEXACT); + EXPECT_MATH_ERRNO(0); + + EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_TOWARD_ZERO( + max_normal, LIBC_NAMESPACE::sinhf16(x), FE_INEXACT); + EXPECT_MATH_ERRNO(0); + + // round(asinh(-2^16), HP, RD); + x = LIBC_NAMESPACE::fputil::cast(-0x1.794p+3); + + EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_NEAREST( + neg_inf, LIBC_NAMESPACE::sinhf16(x), FE_OVERFLOW | FE_INEXACT); + EXPECT_MATH_ERRNO(ERANGE); + + EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_UPWARD( + neg_max_normal, LIBC_NAMESPACE::sinhf16(x), FE_INEXACT); + EXPECT_MATH_ERRNO(0); + + EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_DOWNWARD( + neg_inf, LIBC_NAMESPACE::sinhf16(x), FE_OVERFLOW | FE_INEXACT); + EXPECT_MATH_ERRNO(ERANGE); + + EXPECT_FP_EQ_WITH_EXCEPTION_ROUNDING_TOWARD_ZERO( + neg_max_normal, LIBC_NAMESPACE::sinhf16(x), FE_INEXACT); + EXPECT_MATH_ERRNO(0); +} From ab7518050183162f09724ef8682a580cc68709bc Mon Sep 17 00:00:00 2001 From: Greg Roth Date: Thu, 17 Oct 2024 12:54:52 -0600 Subject: [PATCH 21/24] [DirectX] Remove trivially dead functions at linkage finalize (#106146) Functions are not removed even when made internal by DXILFinalizeLinkage. The removal code is called from alwaysinliner and globalopt, which are invoked too early to remove functions made internal by this pass. This adds a check similar to that in alwaysinliner that removes trivially dead functions after being marked internal. It refactors that code a bit to make it simpler including reversing what is stored int he work queue. Tests both the pass in isolation and the full i0nlining, linkage finalization and function removal steps. Fixes #106139 --- .../Target/DirectX/DXILFinalizeLinkage.cpp | 18 +- .../DirectX/ShaderFlags/double-extensions.ll | 3 +- .../CodeGen/DirectX/ShaderFlags/doubles.ll | 4 +- .../DirectX/conflicting-bitcast-insert.ll | 10 +- .../finalize-linkage-remove-dead-lib.ll | 222 ++++++++++++++++++ .../DirectX/finalize-linkage-remove-dead.ll | 156 ++++++++++++ llvm/test/CodeGen/DirectX/finalize_linkage.ll | 2 +- llvm/test/CodeGen/DirectX/fneg-conversion.ll | 6 +- .../CodeGen/DirectX/omit-bitcast-insert.ll | 14 +- llvm/test/CodeGen/DirectX/scalar-load.ll | 10 +- llvm/test/CodeGen/DirectX/scalar-store.ll | 6 +- .../CodeGen/DirectX/scalarize-two-calls.ll | 4 +- llvm/test/CodeGen/DirectX/strip-fn-attrs.ll | 2 +- 13 files changed, 425 insertions(+), 32 deletions(-) create mode 100644 llvm/test/CodeGen/DirectX/finalize-linkage-remove-dead-lib.ll create mode 100644 llvm/test/CodeGen/DirectX/finalize-linkage-remove-dead.ll diff --git a/llvm/lib/Target/DirectX/DXILFinalizeLinkage.cpp b/llvm/lib/Target/DirectX/DXILFinalizeLinkage.cpp index d32dda2a67c951..aa1f55c572df29 100644 --- a/llvm/lib/Target/DirectX/DXILFinalizeLinkage.cpp +++ b/llvm/lib/Target/DirectX/DXILFinalizeLinkage.cpp @@ -19,20 +19,20 @@ using namespace llvm; static bool finalizeLinkage(Module &M) { - SmallPtrSet EntriesAndExports; + SmallPtrSet Funcs; - // Find all entry points and export functions + // Collect non-entry and non-exported functions to set to internal linkage. for (Function &EF : M.functions()) { - if (!EF.hasFnAttribute("hlsl.shader") && !EF.hasFnAttribute("hlsl.export")) + if (EF.hasFnAttribute("hlsl.shader") || EF.hasFnAttribute("hlsl.export")) continue; - EntriesAndExports.insert(&EF); + Funcs.insert(&EF); } - for (Function &F : M.functions()) { - if (F.getLinkage() == GlobalValue::ExternalLinkage && - !EntriesAndExports.contains(&F)) { - F.setLinkage(GlobalValue::InternalLinkage); - } + for (Function *F : Funcs) { + if (F->getLinkage() == GlobalValue::ExternalLinkage) + F->setLinkage(GlobalValue::InternalLinkage); + if (F->isDefTriviallyDead()) + M.getFunctionList().erase(F); } return false; diff --git a/llvm/test/CodeGen/DirectX/ShaderFlags/double-extensions.ll b/llvm/test/CodeGen/DirectX/ShaderFlags/double-extensions.ll index d027216e4213d5..a8d5f9c78f0b43 100644 --- a/llvm/test/CodeGen/DirectX/ShaderFlags/double-extensions.ll +++ b/llvm/test/CodeGen/DirectX/ShaderFlags/double-extensions.ll @@ -9,11 +9,12 @@ target triple = "dxil-pc-shadermodel6.7-library" ; CHECK-NEXT: ; Double-precision extensions for 11.1 ; CHECK-NEXT: ; Note: extra DXIL module flags: ; CHECK-NEXT: {{^;$}} -define double @div(double %a, double %b) { +define double @div(double %a, double %b) #0 { %res = fdiv double %a, %b ret double %res } +attributes #0 = { convergent norecurse nounwind "hlsl.export"} ; DXC: - Name: SFI0 ; DXC-NEXT: Size: 8 diff --git a/llvm/test/CodeGen/DirectX/ShaderFlags/doubles.ll b/llvm/test/CodeGen/DirectX/ShaderFlags/doubles.ll index c1a4c219a16951..e9b44240e10b9b 100644 --- a/llvm/test/CodeGen/DirectX/ShaderFlags/doubles.ll +++ b/llvm/test/CodeGen/DirectX/ShaderFlags/doubles.ll @@ -9,11 +9,13 @@ target triple = "dxil-pc-shadermodel6.7-library" ; CHECK-NEXT: ; Note: extra DXIL module flags: ; CHECK-NEXT: {{^;$}} -define double @add(double %a, double %b) { +define double @add(double %a, double %b) #0 { %sum = fadd double %a, %b ret double %sum } +attributes #0 = { convergent norecurse nounwind "hlsl.export"} + ; DXC: - Name: SFI0 ; DXC-NEXT: Size: 8 ; DXC-NEXT: Flags: diff --git a/llvm/test/CodeGen/DirectX/conflicting-bitcast-insert.ll b/llvm/test/CodeGen/DirectX/conflicting-bitcast-insert.ll index 8f5d3ae8641717..39e21daceea81a 100644 --- a/llvm/test/CodeGen/DirectX/conflicting-bitcast-insert.ll +++ b/llvm/test/CodeGen/DirectX/conflicting-bitcast-insert.ll @@ -1,25 +1,27 @@ ; RUN: llc --filetype=asm %s -o - | FileCheck %s target triple = "dxil-unknown-shadermodel6.7-library" -define i64 @test(ptr %p) { +define i64 @test(ptr %p) #0 { store i32 0, ptr %p %v = load i64, ptr %p ret i64 %v } -; CHECK: define internal i64 @test(ptr %p) { +; CHECK: define i64 @test(ptr %p) #0 { ; CHECK-NEXT: %1 = bitcast ptr %p to ptr ; CHECK-NEXT: store i32 0, ptr %1, align 4 ; CHECK-NEXT: %2 = bitcast ptr %p to ptr ; CHECK-NEXT: %3 = load i64, ptr %2, align 8 -define i64 @testGEP(ptr %p) { +define i64 @testGEP(ptr %p) #0 { %ptr = getelementptr i32, ptr %p, i32 4 %val = load i64, ptr %p ret i64 %val } -; CHECK: define internal i64 @testGEP(ptr %p) { +attributes #0 = { convergent norecurse nounwind "hlsl.export"} + +; CHECK: define i64 @testGEP(ptr %p) #0 { ; CHECK-NEXT: %1 = bitcast ptr %p to ptr ; CHECK-NEXT: %ptr = getelementptr i32, ptr %1, i32 4 ; CHECK-NEXT: %2 = bitcast ptr %p to ptr diff --git a/llvm/test/CodeGen/DirectX/finalize-linkage-remove-dead-lib.ll b/llvm/test/CodeGen/DirectX/finalize-linkage-remove-dead-lib.ll new file mode 100644 index 00000000000000..202609c8156a72 --- /dev/null +++ b/llvm/test/CodeGen/DirectX/finalize-linkage-remove-dead-lib.ll @@ -0,0 +1,222 @@ +; RUN: opt -S -dxil-finalize-linkage -mtriple=dxil-unknown-shadermodel6.5-library %s | FileCheck %s +; RUN: llc %s --filetype=asm -o - | FileCheck %s + +target triple = "dxilv1.5-pc-shadermodel6.5-compute" + +; Confirm that DXILFinalizeLinkage will remove functions that have compatible +; linkage and are not called from anywhere. This should be any function that +; is not explicitly marked export and is not an entry point. + +; Has no specified inlining/linking behavior and is uncalled, this should be removed. +; CHECK-NOT: define {{.*}}doNothingUncalled +define void @"?doNothingUncalled@@YAXXZ"() #2 { +entry: + ret void +} + +; Alwaysinline and uncalled, this should be removed. +; CHECK-NOT: define {{.*}}doAlwaysInlineUncalled +define void @"?doAlwaysInlineUncalled@@YAXXZ"() #0 { +entry: + ret void +} + +; Noinline and uncalled, this should be removed. +; CHECK-NOT: define {{.*}}doNoinlineUncalled +define void @"?doNoinlineUncalled@@YAXXZ"() #4 { +entry: + ret void +} + +; No inlining attribute, internal, and uncalled; this should be removed. +; CHECK-NOT: define {{.*}}doInternalUncalled +define internal void @"?doInternalUncalled@@YAXXZ"() #2 { +entry: + ret void +} + +; Alwaysinline, internal, and uncalled; this should be removed. +; CHECK-NOT: define {{.*}}doAlwaysInlineInternalUncalled +define internal void @"?doAlwaysInlineInternalUncalled@@YAXXZ"() #0 { +entry: + ret void +} + +; Noinline, internal, and uncalled; this should be removed. +; CHECK-NOT: define {{.*}}doNoinlineInternalUncalled +define internal void @"?doNoinlineInternalUncalled@@YAXXZ"() #4 { +entry: + ret void +} + +; Marked external and uncalled, this should become internal and be removed. +; CHECK-NOT: define {{.*}}doExternalUncalled +define external void @"?doExternalUncalled@@YAXXZ"() #2 { +entry: + ret void +} + +; Alwaysinline, external and uncalled, this should become internal and be removed. +; CHECK-NOT: define {{.*}}doAlwaysInlineExternalUncalled +define external void @"?doAlwaysInlineExternalUncalled@@YAXXZ"() #0 { +entry: + ret void +} + +; Noinline, external and uncalled, this should become internal and be removed. +; CHECK-NOT: define {{.*}}doNoinlineExternalUncalled +define external void @"?doNoinlineExternalUncalled@@YAXXZ"() #4 { +entry: + ret void +} + +; No inlining attribute and called, this should stay. +; CHECK: define {{.*}}doNothingCalled +define void @"?doNothingCalled@@YAXXZ"() #2 { +entry: + ret void +} + +; Alwaysinline and called, this should stay. +; CHECK: define {{.*}}doAlwaysInlineCalled +define void @"?doAlwaysInlineCalled@@YAXXZ"() #0 { +entry: + ret void +} + +; Noinline and called, this should stay. +; CHECK: define {{.*}}doNoinlineCalled +define void @"?doNoinlineCalled@@YAXXZ"() #4 { +entry: + ret void +} + +; No inlining attribute, internal, and called; this should stay. +; CHECK: define {{.*}}doInternalCalled +define internal void @"?doInternalCalled@@YAXXZ"() #2 { +entry: + ret void +} + +; Alwaysinline, internal, and called; this should stay. +; CHECK: define {{.*}}doAlwaysInlineInternalCalled +define internal void @"?doAlwaysInlineInternalCalled@@YAXXZ"() #0 { +entry: + ret void +} + +; Noinline, internal, and called; this should stay. +; CHECK: define {{.*}}doNoinlineInternalCalled +define internal void @"?doNoinlineInternalCalled@@YAXXZ"() #4 { +entry: + ret void +} + +; Marked external and called, this should become internal and stay. +; CHECK: define {{.*}}doExternalCalled +define external void @"?doExternalCalled@@YAXXZ"() #2 { +entry: + ret void +} + +; Always inlined, external and called, this should become internal and stay. +; CHECK: define {{.*}}doAlwaysInlineExternalCalled +define external void @"?doAlwaysInlineExternalCalled@@YAXXZ"() #0 { +entry: + ret void +} + +; Noinline, external and called, this should become internal and stay. +; CHECK: define {{.*}}doNoinlineExternalCalled +define external void @"?doNoinlineExternalCalled@@YAXXZ"() #4 { +entry: + ret void +} + +; No inlining attribute and exported, this should stay. +; CHECK: define {{.*}}doNothingExported +define void @"?doNothingExported@@YAXXZ"() #3 { +entry: + ret void +} + +; Alwaysinline and exported, this should stay. +; CHECK: define {{.*}}doAlwaysInlineExported +define void @"?doAlwaysInlineExported@@YAXXZ"() #1 { +entry: + ret void +} + +; Noinline attribute and exported, this should stay. +; CHECK: define {{.*}}doNoinlineExported +define void @"?doNoinlineExported@@YAXXZ"() #5 { +entry: + ret void +} + +; No inlining attribute, internal, and exported; this should stay. +; CHECK: define {{.*}}doInternalExported +define internal void @"?doInternalExported@@YAXXZ"() #3 { +entry: + ret void +} + +; Alwaysinline, internal, and exported; this should stay. +; CHECK: define {{.*}}doAlwaysInlineInternalExported +define internal void @"?doAlwaysInlineInternalExported@@YAXXZ"() #1 { +entry: + ret void +} + +; Noinline, internal, and exported; this should stay. +; CHECK: define {{.*}}doNoinlineInternalExported +define internal void @"?doNoinlineInternalExported@@YAXXZ"() #5 { +entry: + ret void +} + +; Marked external and exported, this should stay. +; CHECK: define {{.*}}doExternalExported +define external void @"?doExternalExported@@YAXXZ"() #3 { +entry: + ret void +} + +; Alwaysinline, external and exported, this should stay. +; CHECK: define {{.*}}doAlwaysInlineExternalExported +define external void @"?doAlwaysInlineExternalExported@@YAXXZ"() #1 { +entry: + ret void +} + +; Noinline, external and exported, this should stay. +; CHECK: define {{.*}}doNoinlineExternalExported +define external void @"?doNoinlineExternalExported@@YAXXZ"() #5 { +entry: + ret void +} + +; Entry point function, this should stay. +; CHECK: define void @main() +define void @main() #6 { +entry: + call void @"?doNothingCalled@@YAXXZ"() #7 + call void @"?doAlwaysInlineCalled@@YAXXZ"() #7 + call void @"?doNoinlineCalled@@YAXXZ"() #7 + call void @"?doInternalCalled@@YAXXZ"() #7 + call void @"?doAlwaysInlineInternalCalled@@YAXXZ"() #7 + call void @"?doNoinlineInternalCalled@@YAXXZ"() #7 + call void @"?doExternalCalled@@YAXXZ"() #7 + call void @"?doAlwaysInlineExternalCalled@@YAXXZ"() #7 + call void @"?doNoinlineExternalCalled@@YAXXZ"() #7 + ret void +} + +attributes #0 = { alwaysinline convergent norecurse nounwind } +attributes #1 = { alwaysinline convergent norecurse nounwind "hlsl.export"} +attributes #2 = { convergent norecurse nounwind } +attributes #3 = { convergent norecurse nounwind "hlsl.export"} +attributes #4 = { convergent noinline norecurse nounwind } +attributes #5 = { convergent noinline norecurse nounwind "hlsl.export"} +attributes #6 = { convergent noinline norecurse "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" } +attributes #7 = { convergent } diff --git a/llvm/test/CodeGen/DirectX/finalize-linkage-remove-dead.ll b/llvm/test/CodeGen/DirectX/finalize-linkage-remove-dead.ll new file mode 100644 index 00000000000000..49c3bda621d74e --- /dev/null +++ b/llvm/test/CodeGen/DirectX/finalize-linkage-remove-dead.ll @@ -0,0 +1,156 @@ +; RUN: opt -S -dxil-finalize-linkage -mtriple=dxil-unknown-shadermodel6.5-compute %s | FileCheck %s +; RUN: llc %s --filetype=asm -o - | FileCheck %s + +target triple = "dxilv1.5-pc-shadermodel6.5-compute" + +; Confirm that DXILFinalizeLinkage will remove functions that have compatible +; linkage and are not called from anywhere. This should be any function that +; is not an entry point. + +; Has no specified inlining/linking behavior and is uncalled, this should be removed. +; CHECK-NOT: define {{.*}}doNothingUncalled +define void @"?doNothingUncalled@@YAXXZ"() #1 { +entry: + ret void +} + +; Alwaysinline and uncalled, this should be removed. +; CHECK-NOT: define {{.*}}doAlwaysInlineUncalled +define void @"?doAlwaysInlineUncalled@@YAXXZ"() #0 { +entry: + ret void +} + +; Noinline and uncalled, this should be removed. +; CHECK-NOT: define {{.*}}doNoinlineUncalled +define void @"?doNoinlineUncalled@@YAXXZ"() #3 { +entry: + ret void +} + +; No inlining attribute, internal, and uncalled; this should be removed. +; CHECK-NOT: define {{.*}}doInternalUncalled +define internal void @"?doInternalUncalled@@YAXXZ"() #1 { +entry: + ret void +} + +; Alwaysinline, internal, and uncalled; this should be removed. +; CHECK-NOT: define {{.*}}doAlwaysInlineInternalUncalled +define internal void @"?doAlwaysInlineInternalUncalled@@YAXXZ"() #0 { +entry: + ret void +} + +; Noinline, internal, and uncalled; this should be removed. +; CHECK-NOT: define {{.*}}doNoinlineInternalUncalled +define internal void @"?doNoinlineInternalUncalled@@YAXXZ"() #3 { +entry: + ret void +} + +; Marked external and uncalled, this should become internal and be removed. +; CHECK-NOT: define {{.*}}doExternalUncalled +define external void @"?doExternalUncalled@@YAXXZ"() #1 { +entry: + ret void +} + +; Alwaysinline, external and uncalled, this should become internal and be removed. +; CHECK-NOT: define {{.*}}doAlwaysInlineExternalUncalled +define external void @"?doAlwaysInlineExternalUncalled@@YAXXZ"() #0 { +entry: + ret void +} + +; Noinline, external and uncalled, this should become internal and be removed. +; CHECK-NOT: define {{.*}}doNoinlineExternalUncalled +define external void @"?doNoinlineExternalUncalled@@YAXXZ"() #3 { +entry: + ret void +} + +; No inlining attribute and called, this should stay. +; CHECK: define {{.*}}doNothingCalled +define void @"?doNothingCalled@@YAXXZ"() #1 { +entry: + ret void +} + +; Alwaysinline and called, this should stay. +; CHECK: define {{.*}}doAlwaysInlineCalled +define void @"?doAlwaysInlineCalled@@YAXXZ"() #0 { +entry: + ret void +} + +; Noinline and called, this should stay. +; CHECK: define {{.*}}doNoinlineCalled +define void @"?doNoinlineCalled@@YAXXZ"() #3 { +entry: + ret void +} + +; No inlining attribute, internal, and called; this should stay. +; CHECK: define {{.*}}doInternalCalled +define internal void @"?doInternalCalled@@YAXXZ"() #1 { +entry: + ret void +} + +; Alwaysinline, internal, and called; this should stay. +; CHECK: define {{.*}}doAlwaysInlineInternalCalled +define internal void @"?doAlwaysInlineInternalCalled@@YAXXZ"() #0 { +entry: + ret void +} + +; Noinline, internal, and called; this should stay. +; CHECK: define {{.*}}doNoinlineInternalCalled +define internal void @"?doNoinlineInternalCalled@@YAXXZ"() #3 { +entry: + ret void +} + +; Marked external and called, this should become internal and stay. +; CHECK: define {{.*}}doExternalCalled +define external void @"?doExternalCalled@@YAXXZ"() #1 { +entry: + ret void +} + +; Always inlined, external and called, this should become internal and stay. +; CHECK: define {{.*}}doAlwaysInlineExternalCalled +define external void @"?doAlwaysInlineExternalCalled@@YAXXZ"() #0 { +entry: + ret void +} + +; Noinline, external and called, this should become internal and stay. +; CHECK: define {{.*}}doNoinlineExternalCalled +define external void @"?doNoinlineExternalCalled@@YAXXZ"() #3 { +entry: + ret void +} + +; Entry point function, this should stay. +; CHECK: define void @main() +define void @main() #4 { +entry: + call void @"?doNothingCalled@@YAXXZ"() #5 + call void @"?doAlwaysInlineCalled@@YAXXZ"() #5 + call void @"?doNoinlineCalled@@YAXXZ"() #5 + call void @"?doInternalCalled@@YAXXZ"() #5 + call void @"?doAlwaysInlineInternalCalled@@YAXXZ"() #5 + call void @"?doNoinlineInternalCalled@@YAXXZ"() #5 + call void @"?doExternalCalled@@YAXXZ"() #5 + call void @"?doAlwaysInlineExternalCalled@@YAXXZ"() #5 + call void @"?doNoinlineExternalCalled@@YAXXZ"() #5 + ret void +} + +attributes #0 = { alwaysinline convergent norecurse nounwind } +attributes #1 = { convergent norecurse nounwind } +attributes #3 = { convergent noinline norecurse nounwind } +attributes #4 = { convergent noinline norecurse "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" } +attributes #5 = { convergent } diff --git a/llvm/test/CodeGen/DirectX/finalize_linkage.ll b/llvm/test/CodeGen/DirectX/finalize_linkage.ll index b6da9f6cb3926a..c761a79a5c28a5 100644 --- a/llvm/test/CodeGen/DirectX/finalize_linkage.ll +++ b/llvm/test/CodeGen/DirectX/finalize_linkage.ll @@ -6,7 +6,7 @@ target triple = "dxilv1.5-pc-shadermodel6.5-compute" ; DXILFinalizeLinkage changes linkage of all functions that are not ; entry points or exported function to internal. -; CHECK: define internal void @"?f1@@YAXXZ"() +; CHECK-NOT: define internal void @"?f1@@YAXXZ"() define void @"?f1@@YAXXZ"() #0 { entry: ret void diff --git a/llvm/test/CodeGen/DirectX/fneg-conversion.ll b/llvm/test/CodeGen/DirectX/fneg-conversion.ll index a397c18398c5f7..3acf4790de4b10 100644 --- a/llvm/test/CodeGen/DirectX/fneg-conversion.ll +++ b/llvm/test/CodeGen/DirectX/fneg-conversion.ll @@ -1,14 +1,16 @@ ; RUN: llc %s --filetype=asm -o - | FileCheck %s target triple = "dxil-unknown-shadermodel6.7-library" -define float @negateF(float %0) { +define float @negateF(float %0) #0 { ; CHECK: %2 = fsub float -0.000000e+00, %0 %2 = fneg float %0 ret float %2 } -define double @negateD(double %0) { +define double @negateD(double %0) #0 { ; CHECK: %2 = fsub double -0.000000e+00, %0 %2 = fneg double %0 ret double %2 } + +attributes #0 = { convergent norecurse nounwind "hlsl.export"} \ No newline at end of file diff --git a/llvm/test/CodeGen/DirectX/omit-bitcast-insert.ll b/llvm/test/CodeGen/DirectX/omit-bitcast-insert.ll index 6066a0033e45de..734ff1b4dd2d57 100644 --- a/llvm/test/CodeGen/DirectX/omit-bitcast-insert.ll +++ b/llvm/test/CodeGen/DirectX/omit-bitcast-insert.ll @@ -1,32 +1,34 @@ ; RUN: llc --filetype=asm %s -o - | FileCheck %s target triple = "dxil-unknown-shadermodel6.7-library" -define i64 @test(ptr %p) { +define i64 @test(ptr %p) #0 { %v = load i64, ptr %p ret i64 %v } -; CHECK: define internal i64 @test(ptr %p) { +; CHECK: define i64 @test(ptr %p) #0 { ; CHECK-NEXT: %v = load i64, ptr %p, align 8 ; CHECK-NEXT: ret i64 %v -define i64 @test2(ptr %p) { +define i64 @test2(ptr %p) #0 { store i64 0, ptr %p %v = load i64, ptr %p ret i64 %v } -; CHECK: define internal i64 @test2(ptr %p) { +; CHECK: define i64 @test2(ptr %p) #0 { ; CHECK-NEXT: store i64 0, ptr %p ; CHECK-NEXT: %v = load i64, ptr %p, align 8 ; CHECK-NEXT: ret i64 %v -define i32 @test3(ptr %0) { +define i32 @test3(ptr %0) #0 { %2 = getelementptr i32, ptr %0, i32 4 %3 = load i32, ptr %2 ret i32 %3 } -; CHECK: define internal i32 @test3(ptr %0) { +attributes #0 = { convergent norecurse nounwind "hlsl.export"} + +; CHECK: define i32 @test3(ptr %0) #0 { ; CHECK-NEXT: %2 = getelementptr i32, ptr %0, i32 4 ; CHECK-NEXT: %3 = load i32, ptr %2 diff --git a/llvm/test/CodeGen/DirectX/scalar-load.ll b/llvm/test/CodeGen/DirectX/scalar-load.ll index 612e5646f5a081..b911a8f7855bb8 100644 --- a/llvm/test/CodeGen/DirectX/scalar-load.ll +++ b/llvm/test/CodeGen/DirectX/scalar-load.ll @@ -20,7 +20,7 @@ ; CHECK-LABEL: load_array_vec_test -define <4 x i32> @load_array_vec_test() { +define <4 x i32> @load_array_vec_test() #0 { ; CHECK-COUNT-8: load i32, ptr addrspace(3) {{(.*@arrayofVecData.scalarized.*|%.*)}}, align 4 ; CHECK-NOT: load i32, ptr addrspace(3) {{.*}}, align 4 %1 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([2 x <4 x i32>], [2 x <4 x i32>] addrspace(3)* @"arrayofVecData", i32 0, i32 0), align 4 @@ -30,7 +30,7 @@ define <4 x i32> @load_array_vec_test() { } ; CHECK-LABEL: load_vec_test -define <4 x i32> @load_vec_test() { +define <4 x i32> @load_vec_test() #0 { ; CHECK-COUNT-4: load i32, ptr addrspace(3) {{(@vecData.scalarized|getelementptr \(i32, ptr addrspace\(3\) @vecData.scalarized, i32 .*\)|%.*)}}, align {{.*}} ; CHECK-NOT: load i32, ptr addrspace(3) {{.*}}, align 4 %1 = load <4 x i32>, <4 x i32> addrspace(3)* @"vecData", align 4 @@ -38,7 +38,7 @@ define <4 x i32> @load_vec_test() { } ; CHECK-LABEL: load_static_array_of_vec_test -define <4 x i32> @load_static_array_of_vec_test(i32 %index) { +define <4 x i32> @load_static_array_of_vec_test(i32 %index) #0 { ; CHECK: getelementptr [3 x [4 x i32]], ptr @staticArrayOfVecData.scalarized, i32 0, i32 %index ; CHECK-COUNT-4: load i32, ptr {{.*}}, align 4 ; CHECK-NOT: load i32, ptr {{.*}}, align 4 @@ -48,7 +48,7 @@ define <4 x i32> @load_static_array_of_vec_test(i32 %index) { } ; CHECK-LABEL: multid_load_test -define <4 x i32> @multid_load_test() { +define <4 x i32> @multid_load_test() #0 { ; CHECK-COUNT-8: load i32, ptr addrspace(3) {{(.*@groushared2dArrayofVectors.scalarized.*|%.*)}}, align 4 ; CHECK-NOT: load i32, ptr addrspace(3) {{.*}}, align 4 %1 = load <4 x i32>, <4 x i32> addrspace(3)* getelementptr inbounds ([3 x [3 x <4 x i32>]], [3 x [3 x <4 x i32>]] addrspace(3)* @"groushared2dArrayofVectors", i32 0, i32 0, i32 0), align 4 @@ -56,3 +56,5 @@ define <4 x i32> @multid_load_test() { %3 = add <4 x i32> %1, %2 ret <4 x i32> %3 } + +attributes #0 = { convergent norecurse nounwind "hlsl.export"} diff --git a/llvm/test/CodeGen/DirectX/scalar-store.ll b/llvm/test/CodeGen/DirectX/scalar-store.ll index 7734d32bca58cd..c45481e8cae14f 100644 --- a/llvm/test/CodeGen/DirectX/scalar-store.ll +++ b/llvm/test/CodeGen/DirectX/scalar-store.ll @@ -12,7 +12,7 @@ ; CHECK-NOT: @vecData ; CHECK-LABEL: store_array_vec_test -define void @store_array_vec_test () local_unnamed_addr { +define void @store_array_vec_test () local_unnamed_addr #0 { ; CHECK-COUNT-6: store float {{1|2|3|4|6}}.000000e+00, ptr addrspace(3) {{(.*@arrayofVecData.scalarized.*|%.*)}}, align {{4|8|16}} ; CHECK-NOT: store float {{1|2|3|4|6}}.000000e+00, ptr addrspace(3) {{(.*@arrayofVecData.scalarized.*|%.*)}}, align {{4|8|16}} store <3 x float> , ptr addrspace(3) @"arrayofVecData", align 16 @@ -21,9 +21,11 @@ define void @store_array_vec_test () local_unnamed_addr { } ; CHECK-LABEL: store_vec_test -define void @store_vec_test(<4 x i32> %inputVec) { +define void @store_vec_test(<4 x i32> %inputVec) #0 { ; CHECK-COUNT-4: store i32 %inputVec.{{.*}}, ptr addrspace(3) {{(@vecData.scalarized|getelementptr \(i32, ptr addrspace\(3\) @vecData.scalarized, i32 .*\)|%.*)}}, align 4 ; CHECK-NOT: store i32 %inputVec.{{.*}}, ptr addrspace(3) store <4 x i32> %inputVec, <4 x i32> addrspace(3)* @"vecData", align 4 ret void } + +attributes #0 = { convergent norecurse nounwind "hlsl.export"} diff --git a/llvm/test/CodeGen/DirectX/scalarize-two-calls.ll b/llvm/test/CodeGen/DirectX/scalarize-two-calls.ll index a14c1de5cc4205..0546a5505416f1 100644 --- a/llvm/test/CodeGen/DirectX/scalarize-two-calls.ll +++ b/llvm/test/CodeGen/DirectX/scalarize-two-calls.ll @@ -2,7 +2,7 @@ ; CHECK: target triple = "dxilv1.3-pc-shadermodel6.3-library" ; CHECK-LABEL: cos_sin_float_test -define noundef <4 x float> @cos_sin_float_test(<4 x float> noundef %a) { +define noundef <4 x float> @cos_sin_float_test(<4 x float> noundef %a) #0 { ; CHECK: [[ee0:%.*]] = extractelement <4 x float> %a, i64 0 ; CHECK: [[ie0:%.*]] = call float @dx.op.unary.f32(i32 13, float [[ee0]]) ; CHECK: [[ee1:%.*]] = extractelement <4 x float> %a, i64 1 @@ -23,3 +23,5 @@ define noundef <4 x float> @cos_sin_float_test(<4 x float> noundef %a) { %3 = tail call <4 x float> @llvm.cos.v4f32(<4 x float> %2) ret <4 x float> %3 } + +attributes #0 = { convergent norecurse nounwind "hlsl.export"} diff --git a/llvm/test/CodeGen/DirectX/strip-fn-attrs.ll b/llvm/test/CodeGen/DirectX/strip-fn-attrs.ll index b0dd89cf90f2b2..4223c909431945 100644 --- a/llvm/test/CodeGen/DirectX/strip-fn-attrs.ll +++ b/llvm/test/CodeGen/DirectX/strip-fn-attrs.ll @@ -12,4 +12,4 @@ define dso_local float @fma(float %0, float %1, float %2) local_unnamed_addr #0 ; CHECK: attributes #0 = { nounwind memory(none) } ; CHECK-NOT: attributes # -attributes #0 = { norecurse nounwind readnone willreturn } +attributes #0 = { norecurse nounwind readnone willreturn "hlsl.export"} From a9d39ce5d2e93e76598732e2caeaae0dbe155f1c Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 17 Oct 2024 11:55:31 -0700 Subject: [PATCH 22/24] [RISCV][GISel] Pass APInt by const reference. NFC --- llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp | 2 +- llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp index d1449f751b40a0..c06ab061ddc338 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp @@ -670,7 +670,7 @@ bool RISCVLegalizerInfo::legalizeVAStart(MachineInstr &MI, return true; } -bool RISCVLegalizerInfo::shouldBeInConstantPool(APInt APImm, +bool RISCVLegalizerInfo::shouldBeInConstantPool(const APInt &APImm, bool ShouldOptForSize) const { assert(APImm.getBitWidth() == 32 || APImm.getBitWidth() == 64); int64_t Imm = APImm.getSExtValue(); diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h index d2afb175ae42bb..ee8b08bc9e3162 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h +++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h @@ -38,7 +38,7 @@ class RISCVLegalizerInfo : public LegalizerInfo { MachineInstr &MI) const override; private: - bool shouldBeInConstantPool(APInt APImm, bool ShouldOptForSize) const; + bool shouldBeInConstantPool(const APInt &APImm, bool ShouldOptForSize) const; bool legalizeShlAshrLshr(MachineInstr &MI, MachineIRBuilder &MIRBuilder, GISelChangeObserver &Observer) const; From 29097dd2f39cfd7e5f2e389b0f0a7701188d7570 Mon Sep 17 00:00:00 2001 From: Chris Apple Date: Thu, 17 Oct 2024 11:58:07 -0700 Subject: [PATCH 23/24] [rtsan][NFC] Remove rtsan_ prefix from stats variables (#112762) --- compiler-rt/lib/rtsan/rtsan_stats.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/compiler-rt/lib/rtsan/rtsan_stats.cpp b/compiler-rt/lib/rtsan/rtsan_stats.cpp index 1562b73cf94cd8..277182a7abc840 100644 --- a/compiler-rt/lib/rtsan/rtsan_stats.cpp +++ b/compiler-rt/lib/rtsan/rtsan_stats.cpp @@ -19,32 +19,32 @@ using namespace __sanitizer; using namespace __rtsan; -static atomic_uint32_t rtsan_total_error_count{0}; -static atomic_uint32_t rtsan_unique_error_count{0}; -static atomic_uint32_t rtsan_suppressed_count{0}; +static atomic_uint32_t total_error_count{0}; +static atomic_uint32_t unique_error_count{0}; +static atomic_uint32_t suppressed_count{0}; void __rtsan::IncrementTotalErrorCount() { - atomic_fetch_add(&rtsan_total_error_count, 1, memory_order_relaxed); + atomic_fetch_add(&total_error_count, 1, memory_order_relaxed); } void __rtsan::IncrementUniqueErrorCount() { - atomic_fetch_add(&rtsan_unique_error_count, 1, memory_order_relaxed); + atomic_fetch_add(&unique_error_count, 1, memory_order_relaxed); } static u32 GetTotalErrorCount() { - return atomic_load(&rtsan_total_error_count, memory_order_relaxed); + return atomic_load(&total_error_count, memory_order_relaxed); } static u32 GetUniqueErrorCount() { - return atomic_load(&rtsan_unique_error_count, memory_order_relaxed); + return atomic_load(&unique_error_count, memory_order_relaxed); } void __rtsan::IncrementSuppressedCount() { - atomic_fetch_add(&rtsan_suppressed_count, 1, memory_order_relaxed); + atomic_fetch_add(&suppressed_count, 1, memory_order_relaxed); } static u32 GetSuppressedCount() { - return atomic_load(&rtsan_suppressed_count, memory_order_relaxed); + return atomic_load(&suppressed_count, memory_order_relaxed); } void __rtsan::PrintStatisticsSummary() { From 32b55f375feaf6bcc2c870964a0bf087cf3c22bf Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Thu, 17 Oct 2024 21:32:20 +0200 Subject: [PATCH 24/24] Add support of the next Ubuntu (Ubuntu 25.04 - Plucky Puffin) --- clang/include/clang/Driver/Distro.h | 3 ++- clang/lib/Driver/Distro.cpp | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/Driver/Distro.h b/clang/include/clang/Driver/Distro.h index 1404e168684821..b4d485dac8a269 100644 --- a/clang/include/clang/Driver/Distro.h +++ b/clang/include/clang/Driver/Distro.h @@ -80,6 +80,7 @@ class Distro { UbuntuMantic, UbuntuNoble, UbuntuOracular, + UbuntuPlucky, UnknownDistro }; @@ -131,7 +132,7 @@ class Distro { } bool IsUbuntu() const { - return DistroVal >= UbuntuHardy && DistroVal <= UbuntuOracular; + return DistroVal >= UbuntuHardy && DistroVal <= UbuntuPlucky; } bool IsAlpineLinux() const { return DistroVal == AlpineLinux; } diff --git a/clang/lib/Driver/Distro.cpp b/clang/lib/Driver/Distro.cpp index 6f49e641104ccd..3d1bce027f664d 100644 --- a/clang/lib/Driver/Distro.cpp +++ b/clang/lib/Driver/Distro.cpp @@ -96,6 +96,7 @@ static Distro::DistroType DetectLsbRelease(llvm::vfs::FileSystem &VFS) { .Case("mantic", Distro::UbuntuMantic) .Case("noble", Distro::UbuntuNoble) .Case("oracular", Distro::UbuntuOracular) + .Case("plucky", Distro::UbuntuPlucky) .Default(Distro::UnknownDistro); return Version; }