Skip to content

Commit

Permalink
Check if libcall
Browse files Browse the repository at this point in the history
  • Loading branch information
arsenm committed Aug 19, 2024
1 parent 4f5d9da commit 5e13bdd
Show file tree
Hide file tree
Showing 5 changed files with 202 additions and 70 deletions.
24 changes: 22 additions & 2 deletions llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27056,13 +27056,33 @@ AArch64TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
: AtomicExpansionKind::LLSC;
}

// Return true if the atomic operation expansion will lower to use a library
// call, and is thus ineligible to use an LLSC expansion.
static bool rmwOpMayLowerToLibcall(const AtomicRMWInst *RMW) {
if (!RMW->isFloatingPointOperation())
return false;
switch (RMW->getType()->getScalarType()->getTypeID()) {
case Type::FloatTyID:
case Type::DoubleTyID:
case Type::HalfTyID:
case Type::BFloatTyID:
return false;
default:
// fp128 will emit library calls.
return true;
}

llvm_unreachable("covered type switch");
}

// The "default" for integer RMW operations is to expand to an LL/SC loop.
// However, with the LSE instructions (or outline-atomics mode, which provides
// library routines in place of the LSE-instructions), we can directly emit many
// operations instead.
TargetLowering::AtomicExpansionKind
AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
unsigned Size = AI->getType()->getPrimitiveSizeInBits();
Type *Ty = AI->getType();
unsigned Size = Ty->getPrimitiveSizeInBits();
assert(Size <= 128 && "AtomicExpandPass should've handled larger sizes.");

bool CanUseLSE128 = Subtarget->hasLSE128() && Size == 128 &&
Expand Down Expand Up @@ -27101,7 +27121,7 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
// succeed. So at -O0 lower this operation to a CAS loop. Also worthwhile if
// we have a single CAS instruction that can replace the loop.
if (getTargetMachine().getOptLevel() == CodeGenOptLevel::None ||
Subtarget->hasLSE())
Subtarget->hasLSE() || rmwOpMayLowerToLibcall(AI))
return AtomicExpansionKind::CmpXChg;

return AtomicExpansionKind::LLSC;
Expand Down
62 changes: 45 additions & 17 deletions llvm/test/CodeGen/AArch64/atomicrmw-fadd.ll
Original file line number Diff line number Diff line change
Expand Up @@ -273,26 +273,54 @@ define double @test_atomicrmw_fadd_f32_seq_cst_align8(ptr %ptr, double %value) #
define fp128 @test_atomicrmw_fadd_fp128_seq_cst_align16(ptr %ptr, fp128 %value) #0 {
; NOLSE-LABEL: test_atomicrmw_fadd_fp128_seq_cst_align16:
; NOLSE: // %bb.0:
; NOLSE-NEXT: sub sp, sp, #80
; NOLSE-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
; NOLSE-NEXT: sub sp, sp, #96
; NOLSE-NEXT: ldr q1, [x0]
; NOLSE-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
; NOLSE-NEXT: mov x19, x0
; NOLSE-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
; NOLSE-NEXT: .LBB6_1: // %atomicrmw.start
; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1
; NOLSE-NEXT: ldaxp x8, x9, [x19]
; NOLSE-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
; NOLSE-NEXT: stp x8, x9, [sp, #48]
; NOLSE-NEXT: ldr q0, [sp, #48]
; NOLSE-NEXT: str q0, [sp] // 16-byte Folded Spill
; NOLSE-NEXT: b .LBB6_2
; NOLSE-NEXT: .LBB6_1: // %atomicrmw.start
; NOLSE-NEXT: // in Loop: Header=BB6_2 Depth=1
; NOLSE-NEXT: stp x12, x13, [sp, #32]
; NOLSE-NEXT: cmp x13, x10
; NOLSE-NEXT: ldr q1, [sp, #32]
; NOLSE-NEXT: ccmp x12, x11, #0, eq
; NOLSE-NEXT: b.eq .LBB6_6
; NOLSE-NEXT: .LBB6_2: // %atomicrmw.start
; NOLSE-NEXT: // =>This Loop Header: Depth=1
; NOLSE-NEXT: // Child Loop BB6_3 Depth 2
; NOLSE-NEXT: mov v0.16b, v1.16b
; NOLSE-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
; NOLSE-NEXT: ldr q1, [sp] // 16-byte Folded Reload
; NOLSE-NEXT: bl __addtf3
; NOLSE-NEXT: str q0, [sp, #32]
; NOLSE-NEXT: ldp x9, x8, [sp, #32]
; NOLSE-NEXT: stlxp w10, x9, x8, [x19]
; NOLSE-NEXT: cbnz w10, .LBB6_1
; NOLSE-NEXT: // %bb.2: // %atomicrmw.end
; NOLSE-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
; NOLSE-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; NOLSE-NEXT: add sp, sp, #80
; NOLSE-NEXT: str q0, [sp, #48]
; NOLSE-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
; NOLSE-NEXT: ldp x9, x8, [sp, #48]
; NOLSE-NEXT: str q0, [sp, #64]
; NOLSE-NEXT: ldp x11, x10, [sp, #64]
; NOLSE-NEXT: .LBB6_3: // %atomicrmw.start
; NOLSE-NEXT: // Parent Loop BB6_2 Depth=1
; NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; NOLSE-NEXT: ldaxp x12, x13, [x19]
; NOLSE-NEXT: cmp x12, x11
; NOLSE-NEXT: cset w14, ne
; NOLSE-NEXT: cmp x13, x10
; NOLSE-NEXT: cinc w14, w14, ne
; NOLSE-NEXT: cbz w14, .LBB6_5
; NOLSE-NEXT: // %bb.4: // %atomicrmw.start
; NOLSE-NEXT: // in Loop: Header=BB6_3 Depth=2
; NOLSE-NEXT: stlxp w14, x12, x13, [x19]
; NOLSE-NEXT: cbnz w14, .LBB6_3
; NOLSE-NEXT: b .LBB6_1
; NOLSE-NEXT: .LBB6_5: // %atomicrmw.start
; NOLSE-NEXT: // in Loop: Header=BB6_3 Depth=2
; NOLSE-NEXT: stlxp w14, x9, x8, [x19]
; NOLSE-NEXT: cbnz w14, .LBB6_3
; NOLSE-NEXT: b .LBB6_1
; NOLSE-NEXT: .LBB6_6: // %atomicrmw.end
; NOLSE-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
; NOLSE-NEXT: mov v0.16b, v1.16b
; NOLSE-NEXT: add sp, sp, #96
; NOLSE-NEXT: ret
;
; LSE-LABEL: test_atomicrmw_fadd_fp128_seq_cst_align16:
Expand Down
62 changes: 45 additions & 17 deletions llvm/test/CodeGen/AArch64/atomicrmw-fmax.ll
Original file line number Diff line number Diff line change
Expand Up @@ -273,26 +273,54 @@ define double @test_atomicrmw_fmax_f32_seq_cst_align8(ptr %ptr, double %value) #
define fp128 @test_atomicrmw_fmax_fp128_seq_cst_align16(ptr %ptr, fp128 %value) #0 {
; NOLSE-LABEL: test_atomicrmw_fmax_fp128_seq_cst_align16:
; NOLSE: // %bb.0:
; NOLSE-NEXT: sub sp, sp, #80
; NOLSE-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
; NOLSE-NEXT: sub sp, sp, #96
; NOLSE-NEXT: ldr q1, [x0]
; NOLSE-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
; NOLSE-NEXT: mov x19, x0
; NOLSE-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
; NOLSE-NEXT: .LBB6_1: // %atomicrmw.start
; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1
; NOLSE-NEXT: ldaxp x8, x9, [x19]
; NOLSE-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
; NOLSE-NEXT: stp x8, x9, [sp, #48]
; NOLSE-NEXT: ldr q0, [sp, #48]
; NOLSE-NEXT: str q0, [sp] // 16-byte Folded Spill
; NOLSE-NEXT: b .LBB6_2
; NOLSE-NEXT: .LBB6_1: // %atomicrmw.start
; NOLSE-NEXT: // in Loop: Header=BB6_2 Depth=1
; NOLSE-NEXT: stp x12, x13, [sp, #32]
; NOLSE-NEXT: cmp x13, x10
; NOLSE-NEXT: ldr q1, [sp, #32]
; NOLSE-NEXT: ccmp x12, x11, #0, eq
; NOLSE-NEXT: b.eq .LBB6_6
; NOLSE-NEXT: .LBB6_2: // %atomicrmw.start
; NOLSE-NEXT: // =>This Loop Header: Depth=1
; NOLSE-NEXT: // Child Loop BB6_3 Depth 2
; NOLSE-NEXT: mov v0.16b, v1.16b
; NOLSE-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
; NOLSE-NEXT: ldr q1, [sp] // 16-byte Folded Reload
; NOLSE-NEXT: bl fmaxl
; NOLSE-NEXT: str q0, [sp, #32]
; NOLSE-NEXT: ldp x9, x8, [sp, #32]
; NOLSE-NEXT: stlxp w10, x9, x8, [x19]
; NOLSE-NEXT: cbnz w10, .LBB6_1
; NOLSE-NEXT: // %bb.2: // %atomicrmw.end
; NOLSE-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
; NOLSE-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; NOLSE-NEXT: add sp, sp, #80
; NOLSE-NEXT: str q0, [sp, #48]
; NOLSE-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
; NOLSE-NEXT: ldp x9, x8, [sp, #48]
; NOLSE-NEXT: str q0, [sp, #64]
; NOLSE-NEXT: ldp x11, x10, [sp, #64]
; NOLSE-NEXT: .LBB6_3: // %atomicrmw.start
; NOLSE-NEXT: // Parent Loop BB6_2 Depth=1
; NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; NOLSE-NEXT: ldaxp x12, x13, [x19]
; NOLSE-NEXT: cmp x12, x11
; NOLSE-NEXT: cset w14, ne
; NOLSE-NEXT: cmp x13, x10
; NOLSE-NEXT: cinc w14, w14, ne
; NOLSE-NEXT: cbz w14, .LBB6_5
; NOLSE-NEXT: // %bb.4: // %atomicrmw.start
; NOLSE-NEXT: // in Loop: Header=BB6_3 Depth=2
; NOLSE-NEXT: stlxp w14, x12, x13, [x19]
; NOLSE-NEXT: cbnz w14, .LBB6_3
; NOLSE-NEXT: b .LBB6_1
; NOLSE-NEXT: .LBB6_5: // %atomicrmw.start
; NOLSE-NEXT: // in Loop: Header=BB6_3 Depth=2
; NOLSE-NEXT: stlxp w14, x9, x8, [x19]
; NOLSE-NEXT: cbnz w14, .LBB6_3
; NOLSE-NEXT: b .LBB6_1
; NOLSE-NEXT: .LBB6_6: // %atomicrmw.end
; NOLSE-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
; NOLSE-NEXT: mov v0.16b, v1.16b
; NOLSE-NEXT: add sp, sp, #96
; NOLSE-NEXT: ret
;
; LSE-LABEL: test_atomicrmw_fmax_fp128_seq_cst_align16:
Expand Down
62 changes: 45 additions & 17 deletions llvm/test/CodeGen/AArch64/atomicrmw-fmin.ll
Original file line number Diff line number Diff line change
Expand Up @@ -273,26 +273,54 @@ define double @test_atomicrmw_fmin_f32_seq_cst_align8(ptr %ptr, double %value) #
define fp128 @test_atomicrmw_fmin_fp128_seq_cst_align16(ptr %ptr, fp128 %value) #0 {
; NOLSE-LABEL: test_atomicrmw_fmin_fp128_seq_cst_align16:
; NOLSE: // %bb.0:
; NOLSE-NEXT: sub sp, sp, #80
; NOLSE-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
; NOLSE-NEXT: sub sp, sp, #96
; NOLSE-NEXT: ldr q1, [x0]
; NOLSE-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
; NOLSE-NEXT: mov x19, x0
; NOLSE-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
; NOLSE-NEXT: .LBB6_1: // %atomicrmw.start
; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1
; NOLSE-NEXT: ldaxp x8, x9, [x19]
; NOLSE-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
; NOLSE-NEXT: stp x8, x9, [sp, #48]
; NOLSE-NEXT: ldr q0, [sp, #48]
; NOLSE-NEXT: str q0, [sp] // 16-byte Folded Spill
; NOLSE-NEXT: b .LBB6_2
; NOLSE-NEXT: .LBB6_1: // %atomicrmw.start
; NOLSE-NEXT: // in Loop: Header=BB6_2 Depth=1
; NOLSE-NEXT: stp x12, x13, [sp, #32]
; NOLSE-NEXT: cmp x13, x10
; NOLSE-NEXT: ldr q1, [sp, #32]
; NOLSE-NEXT: ccmp x12, x11, #0, eq
; NOLSE-NEXT: b.eq .LBB6_6
; NOLSE-NEXT: .LBB6_2: // %atomicrmw.start
; NOLSE-NEXT: // =>This Loop Header: Depth=1
; NOLSE-NEXT: // Child Loop BB6_3 Depth 2
; NOLSE-NEXT: mov v0.16b, v1.16b
; NOLSE-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
; NOLSE-NEXT: ldr q1, [sp] // 16-byte Folded Reload
; NOLSE-NEXT: bl fminl
; NOLSE-NEXT: str q0, [sp, #32]
; NOLSE-NEXT: ldp x9, x8, [sp, #32]
; NOLSE-NEXT: stlxp w10, x9, x8, [x19]
; NOLSE-NEXT: cbnz w10, .LBB6_1
; NOLSE-NEXT: // %bb.2: // %atomicrmw.end
; NOLSE-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
; NOLSE-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; NOLSE-NEXT: add sp, sp, #80
; NOLSE-NEXT: str q0, [sp, #48]
; NOLSE-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
; NOLSE-NEXT: ldp x9, x8, [sp, #48]
; NOLSE-NEXT: str q0, [sp, #64]
; NOLSE-NEXT: ldp x11, x10, [sp, #64]
; NOLSE-NEXT: .LBB6_3: // %atomicrmw.start
; NOLSE-NEXT: // Parent Loop BB6_2 Depth=1
; NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; NOLSE-NEXT: ldaxp x12, x13, [x19]
; NOLSE-NEXT: cmp x12, x11
; NOLSE-NEXT: cset w14, ne
; NOLSE-NEXT: cmp x13, x10
; NOLSE-NEXT: cinc w14, w14, ne
; NOLSE-NEXT: cbz w14, .LBB6_5
; NOLSE-NEXT: // %bb.4: // %atomicrmw.start
; NOLSE-NEXT: // in Loop: Header=BB6_3 Depth=2
; NOLSE-NEXT: stlxp w14, x12, x13, [x19]
; NOLSE-NEXT: cbnz w14, .LBB6_3
; NOLSE-NEXT: b .LBB6_1
; NOLSE-NEXT: .LBB6_5: // %atomicrmw.start
; NOLSE-NEXT: // in Loop: Header=BB6_3 Depth=2
; NOLSE-NEXT: stlxp w14, x9, x8, [x19]
; NOLSE-NEXT: cbnz w14, .LBB6_3
; NOLSE-NEXT: b .LBB6_1
; NOLSE-NEXT: .LBB6_6: // %atomicrmw.end
; NOLSE-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
; NOLSE-NEXT: mov v0.16b, v1.16b
; NOLSE-NEXT: add sp, sp, #96
; NOLSE-NEXT: ret
;
; LSE-LABEL: test_atomicrmw_fmin_fp128_seq_cst_align16:
Expand Down
62 changes: 45 additions & 17 deletions llvm/test/CodeGen/AArch64/atomicrmw-fsub.ll
Original file line number Diff line number Diff line change
Expand Up @@ -273,26 +273,54 @@ define double @test_atomicrmw_fsub_f32_seq_cst_align8(ptr %ptr, double %value) #
define fp128 @test_atomicrmw_fsub_fp128_seq_cst_align16(ptr %ptr, fp128 %value) #0 {
; NOLSE-LABEL: test_atomicrmw_fsub_fp128_seq_cst_align16:
; NOLSE: // %bb.0:
; NOLSE-NEXT: sub sp, sp, #80
; NOLSE-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
; NOLSE-NEXT: sub sp, sp, #96
; NOLSE-NEXT: ldr q1, [x0]
; NOLSE-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
; NOLSE-NEXT: mov x19, x0
; NOLSE-NEXT: str q0, [sp, #16] // 16-byte Folded Spill
; NOLSE-NEXT: .LBB6_1: // %atomicrmw.start
; NOLSE-NEXT: // =>This Inner Loop Header: Depth=1
; NOLSE-NEXT: ldaxp x8, x9, [x19]
; NOLSE-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload
; NOLSE-NEXT: stp x8, x9, [sp, #48]
; NOLSE-NEXT: ldr q0, [sp, #48]
; NOLSE-NEXT: str q0, [sp] // 16-byte Folded Spill
; NOLSE-NEXT: b .LBB6_2
; NOLSE-NEXT: .LBB6_1: // %atomicrmw.start
; NOLSE-NEXT: // in Loop: Header=BB6_2 Depth=1
; NOLSE-NEXT: stp x12, x13, [sp, #32]
; NOLSE-NEXT: cmp x13, x10
; NOLSE-NEXT: ldr q1, [sp, #32]
; NOLSE-NEXT: ccmp x12, x11, #0, eq
; NOLSE-NEXT: b.eq .LBB6_6
; NOLSE-NEXT: .LBB6_2: // %atomicrmw.start
; NOLSE-NEXT: // =>This Loop Header: Depth=1
; NOLSE-NEXT: // Child Loop BB6_3 Depth 2
; NOLSE-NEXT: mov v0.16b, v1.16b
; NOLSE-NEXT: str q1, [sp, #16] // 16-byte Folded Spill
; NOLSE-NEXT: ldr q1, [sp] // 16-byte Folded Reload
; NOLSE-NEXT: bl __subtf3
; NOLSE-NEXT: str q0, [sp, #32]
; NOLSE-NEXT: ldp x9, x8, [sp, #32]
; NOLSE-NEXT: stlxp w10, x9, x8, [x19]
; NOLSE-NEXT: cbnz w10, .LBB6_1
; NOLSE-NEXT: // %bb.2: // %atomicrmw.end
; NOLSE-NEXT: ldp x30, x19, [sp, #64] // 16-byte Folded Reload
; NOLSE-NEXT: ldr q0, [sp] // 16-byte Folded Reload
; NOLSE-NEXT: add sp, sp, #80
; NOLSE-NEXT: str q0, [sp, #48]
; NOLSE-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload
; NOLSE-NEXT: ldp x9, x8, [sp, #48]
; NOLSE-NEXT: str q0, [sp, #64]
; NOLSE-NEXT: ldp x11, x10, [sp, #64]
; NOLSE-NEXT: .LBB6_3: // %atomicrmw.start
; NOLSE-NEXT: // Parent Loop BB6_2 Depth=1
; NOLSE-NEXT: // => This Inner Loop Header: Depth=2
; NOLSE-NEXT: ldaxp x12, x13, [x19]
; NOLSE-NEXT: cmp x12, x11
; NOLSE-NEXT: cset w14, ne
; NOLSE-NEXT: cmp x13, x10
; NOLSE-NEXT: cinc w14, w14, ne
; NOLSE-NEXT: cbz w14, .LBB6_5
; NOLSE-NEXT: // %bb.4: // %atomicrmw.start
; NOLSE-NEXT: // in Loop: Header=BB6_3 Depth=2
; NOLSE-NEXT: stlxp w14, x12, x13, [x19]
; NOLSE-NEXT: cbnz w14, .LBB6_3
; NOLSE-NEXT: b .LBB6_1
; NOLSE-NEXT: .LBB6_5: // %atomicrmw.start
; NOLSE-NEXT: // in Loop: Header=BB6_3 Depth=2
; NOLSE-NEXT: stlxp w14, x9, x8, [x19]
; NOLSE-NEXT: cbnz w14, .LBB6_3
; NOLSE-NEXT: b .LBB6_1
; NOLSE-NEXT: .LBB6_6: // %atomicrmw.end
; NOLSE-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
; NOLSE-NEXT: mov v0.16b, v1.16b
; NOLSE-NEXT: add sp, sp, #96
; NOLSE-NEXT: ret
;
; LSE-LABEL: test_atomicrmw_fsub_fp128_seq_cst_align16:
Expand Down

0 comments on commit 5e13bdd

Please sign in to comment.