Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AArch64: Use consistent atomicrmw expansion for FP operations #103702

Merged
merged 4 commits into from
Aug 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 42 additions & 17 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27096,21 +27096,37 @@ AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
: AtomicExpansionKind::LLSC;
}

// Return true if the atomic operation expansion will lower to use a library
// call, and is thus ineligible to use an LLSC expansion.
static bool rmwOpMayLowerToLibcall(const AArch64Subtarget &Subtarget,
const AtomicRMWInst *RMW) {
if (!RMW->isFloatingPointOperation())
return false;
switch (RMW->getType()->getScalarType()->getTypeID()) {
case Type::FloatTyID:
case Type::DoubleTyID:
case Type::HalfTyID:
case Type::BFloatTyID:
// Will use soft float
return !Subtarget.hasFPARMv8();
default:
// fp128 will emit library calls.
return true;
}

llvm_unreachable("covered type switch");
}

// The "default" for integer RMW operations is to expand to an LL/SC loop.
// However, with the LSE instructions (or outline-atomics mode, which provides
// library routines in place of the LSE-instructions), we can directly emit many
// operations instead.
//
// Floating-point operations are always emitted to a cmpxchg loop, because they
// may trigger a trap which aborts an LLSC sequence.
TargetLowering::AtomicExpansionKind
AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
Type *Ty = AI->getType();
unsigned Size = Ty->getPrimitiveSizeInBits();
assert(Size <= 128 && "AtomicExpandPass should've handled larger sizes.");

if (AI->isFloatingPointOperation())
return AtomicExpansionKind::CmpXChg;

bool CanUseLSE128 = Subtarget->hasLSE128() && Size == 128 &&
(AI->getOperation() == AtomicRMWInst::Xchg ||
AI->getOperation() == AtomicRMWInst::Or ||
Expand All @@ -27120,7 +27136,8 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {

// Nand is not supported in LSE.
// Leave 128 bits to LLSC or CmpXChg.
if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128) {
if (AI->getOperation() != AtomicRMWInst::Nand && Size < 128 &&
!AI->isFloatingPointOperation()) {
if (Subtarget->hasLSE())
return AtomicExpansionKind::None;
if (Subtarget->outlineAtomics()) {
Expand All @@ -27146,7 +27163,7 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
// succeed. So at -O0 lower this operation to a CAS loop. Also worthwhile if
// we have a single CAS instruction that can replace the loop.
if (getTargetMachine().getOptLevel() == CodeGenOptLevel::None ||
Subtarget->hasLSE())
Subtarget->hasLSE() || rmwOpMayLowerToLibcall(*Subtarget, AI))
return AtomicExpansionKind::CmpXChg;

return AtomicExpansionKind::LLSC;
Expand Down Expand Up @@ -27193,10 +27210,14 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilderBase &Builder,

Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
Lo = Builder.CreateZExt(Lo, ValueTy, "lo64");
Hi = Builder.CreateZExt(Hi, ValueTy, "hi64");
return Builder.CreateOr(
Lo, Builder.CreateShl(Hi, ConstantInt::get(ValueTy, 64)), "val64");

auto *Int128Ty = Type::getInt128Ty(Builder.getContext());
Lo = Builder.CreateZExt(Lo, Int128Ty, "lo64");
Hi = Builder.CreateZExt(Hi, Int128Ty, "hi64");

Value *Or = Builder.CreateOr(
Lo, Builder.CreateShl(Hi, ConstantInt::get(Int128Ty, 64)), "val64");
return Builder.CreateBitCast(Or, ValueTy);
}

Type *Tys[] = { Addr->getType() };
Expand All @@ -27207,8 +27228,8 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilderBase &Builder,
const DataLayout &DL = M->getDataLayout();
IntegerType *IntEltTy = Builder.getIntNTy(DL.getTypeSizeInBits(ValueTy));
CallInst *CI = Builder.CreateCall(Ldxr, Addr);
CI->addParamAttr(
0, Attribute::get(Builder.getContext(), Attribute::ElementType, ValueTy));
CI->addParamAttr(0, Attribute::get(Builder.getContext(),
Attribute::ElementType, IntEltTy));
Value *Trunc = Builder.CreateTrunc(CI, IntEltTy);

return Builder.CreateBitCast(Trunc, ValueTy);
Expand All @@ -27234,9 +27255,13 @@ Value *AArch64TargetLowering::emitStoreConditional(IRBuilderBase &Builder,
IsRelease ? Intrinsic::aarch64_stlxp : Intrinsic::aarch64_stxp;
Function *Stxr = Intrinsic::getDeclaration(M, Int);
Type *Int64Ty = Type::getInt64Ty(M->getContext());
Type *Int128Ty = Type::getInt128Ty(M->getContext());

Value *CastVal = Builder.CreateBitCast(Val, Int128Ty);

Value *Lo = Builder.CreateTrunc(Val, Int64Ty, "lo");
Value *Hi = Builder.CreateTrunc(Builder.CreateLShr(Val, 64), Int64Ty, "hi");
Value *Lo = Builder.CreateTrunc(CastVal, Int64Ty, "lo");
Value *Hi =
Builder.CreateTrunc(Builder.CreateLShr(CastVal, 64), Int64Ty, "hi");
return Builder.CreateCall(Stxr, {Lo, Hi, Addr});
}

Expand Down
Loading
Loading