Skip to content

Commit

Permalink
[RISCV] Add additional fence for amocas when required by recent ABI c…
Browse files Browse the repository at this point in the history
…hange (llvm#101023)

A recent atomics ABI change / fix requires that for the "A6C" and A6S"
atomics ABIs (i.e. both of those supported by LLVM currently), an
additional fence is inserted for an atomic_compare_exchange with seq_cst
failure ordering.
<riscv-non-isa/riscv-elf-psabi-doc#445>

This isn't trivial to support through the hooks used by AtomicExpandPass
because that pass assumes that when fences are inserted, the original
atomics ordering information can be removed from the instruction. Rather
than try to change and complicate that API, this patch implements the
needed fence insertion through a small special purpose pass.
  • Loading branch information
asb authored and tmsri committed Sep 19, 2024
1 parent 854d915 commit 1eab9ac
Show file tree
Hide file tree
Showing 11 changed files with 141 additions and 0 deletions.
1 change: 1 addition & 0 deletions llvm/lib/Target/RISCV/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ add_llvm_target(RISCVCodeGen
RISCVTargetObjectFile.cpp
RISCVTargetTransformInfo.cpp
RISCVVectorPeephole.cpp
RISCVZacasABIFix.cpp
GISel/RISCVCallLowering.cpp
GISel/RISCVInstructionSelector.cpp
GISel/RISCVLegalizerInfo.cpp
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/RISCV/RISCV.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ void initializeRISCVMoveMergePass(PassRegistry &);
FunctionPass *createRISCVPushPopOptimizationPass();
void initializeRISCVPushPopOptPass(PassRegistry &);

FunctionPass *createRISCVZacasABIFixPass();
void initializeRISCVZacasABIFixPass(PassRegistry &);

InstructionSelector *
createRISCVInstructionSelector(const RISCVTargetMachine &,
const RISCVSubtarget &,
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/RISCV/RISCVISelLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -684,6 +684,9 @@ class RISCVTargetLowering : public TargetLowering {

bool preferZeroCompareBranch() const override { return true; }

// Note that one specific case requires fence insertion for an
// AtomicCmpXchgInst but is handled via the RISCVZacasABIFix pass rather
// than this hook due to limitations in the interface here.
bool shouldInsertFencesForAtomic(const Instruction *I) const override {
return isa<LoadInst>(I) || isa<StoreInst>(I);
}
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,7 @@ bool RISCVPassConfig::addRegAssignAndRewriteOptimized() {

void RISCVPassConfig::addIRPasses() {
addPass(createAtomicExpandLegacyPass());
addPass(createRISCVZacasABIFixPass());

if (getOptLevel() != CodeGenOptLevel::None) {
if (EnableLoopDataPrefetch)
Expand Down
96 changes: 96 additions & 0 deletions llvm/lib/Target/RISCV/RISCVZacasABIFix.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
//===----- RISCVZacasABIFix.cpp -------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This pass implements a fence insertion for an atomic cmpxchg in a case that
// isn't easy to do with the current AtomicExpandPass hooks API.
//
//===----------------------------------------------------------------------===//

#include "RISCV.h"
#include "RISCVTargetMachine.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/IR/Dominators.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/IntrinsicsRISCV.h"
#include "llvm/IR/PatternMatch.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"

using namespace llvm;

#define DEBUG_TYPE "riscv-zacas-abi-fix"
#define PASS_NAME "RISC-V Zacas ABI fix"

namespace {

class RISCVZacasABIFix : public FunctionPass,
public InstVisitor<RISCVZacasABIFix, bool> {
const RISCVSubtarget *ST;

public:
static char ID;

RISCVZacasABIFix() : FunctionPass(ID) {}

bool runOnFunction(Function &F) override;

StringRef getPassName() const override { return PASS_NAME; }

void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<TargetPassConfig>();
}

bool visitInstruction(Instruction &I) { return false; }
bool visitAtomicCmpXchgInst(AtomicCmpXchgInst &I);
};

} // end anonymous namespace

// Insert a leading fence (needed for broadest atomics ABI compatibility)
// only if the Zacas extension is enabled and the AtomicCmpXchgInst has a
// SequentiallyConsistent failure ordering.
bool RISCVZacasABIFix::visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {
assert(ST->hasStdExtZacas() && "only necessary to run in presence of zacas");
IRBuilder<> Builder(&I);
if (I.getFailureOrdering() != AtomicOrdering::SequentiallyConsistent)
return false;

Builder.CreateFence(AtomicOrdering::SequentiallyConsistent);
return true;
}

bool RISCVZacasABIFix::runOnFunction(Function &F) {
auto &TPC = getAnalysis<TargetPassConfig>();
auto &TM = TPC.getTM<RISCVTargetMachine>();
ST = &TM.getSubtarget<RISCVSubtarget>(F);

if (skipFunction(F) || !ST->hasStdExtZacas())
return false;

bool MadeChange = false;
for (auto &BB : F)
for (Instruction &I : llvm::make_early_inc_range(BB))
MadeChange |= visit(I);

return MadeChange;
}

INITIALIZE_PASS_BEGIN(RISCVZacasABIFix, DEBUG_TYPE, PASS_NAME, false, false)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_END(RISCVZacasABIFix, DEBUG_TYPE, PASS_NAME, false, false)

char RISCVZacasABIFix::ID = 0;

FunctionPass *llvm::createRISCVZacasABIFixPass() {
return new RISCVZacasABIFix();
}
1 change: 1 addition & 0 deletions llvm/test/CodeGen/RISCV/O0-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
; CHECK-NEXT: Expand large div/rem
; CHECK-NEXT: Expand large fp convert
; CHECK-NEXT: Expand Atomic instructions
; CHECK-NEXT: RISC-V Zacas ABI fix
; CHECK-NEXT: Module Verifier
; CHECK-NEXT: Lower Garbage Collection Instructions
; CHECK-NEXT: Shadow Stack GC Lowering
Expand Down
1 change: 1 addition & 0 deletions llvm/test/CodeGen/RISCV/O3-pipeline.ll
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
; CHECK-NEXT: Expand large div/rem
; CHECK-NEXT: Expand large fp convert
; CHECK-NEXT: Expand Atomic instructions
; CHECK-NEXT: RISC-V Zacas ABI fix
; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: Natural Loop Information
; CHECK-NEXT: Canonicalize natural loops
Expand Down
5 changes: 5 additions & 0 deletions llvm/test/CodeGen/RISCV/atomic-cmpxchg-branch-on-result.ll
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ define void @cmpxchg_and_branch1(ptr %ptr, i32 signext %cmp, i32 signext %val) n
; ZACAS: # %bb.0: # %entry
; ZACAS-NEXT: .LBB0_1: # %do_cmpxchg
; ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
; ZACAS-NEXT: fence rw, rw
; ZACAS-NEXT: mv a3, a1
; ZACAS-NEXT: amocas.w.aqrl a3, a2, (a0)
; ZACAS-NEXT: bne a3, a1, .LBB0_1
Expand Down Expand Up @@ -76,6 +77,7 @@ define void @cmpxchg_and_branch2(ptr %ptr, i32 signext %cmp, i32 signext %val) n
; ZACAS: # %bb.0: # %entry
; ZACAS-NEXT: .LBB1_1: # %do_cmpxchg
; ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
; ZACAS-NEXT: fence rw, rw
; ZACAS-NEXT: mv a3, a1
; ZACAS-NEXT: amocas.w.aqrl a3, a2, (a0)
; ZACAS-NEXT: beq a3, a1, .LBB1_1
Expand Down Expand Up @@ -216,6 +218,7 @@ define void @cmpxchg_masked_and_branch1(ptr %ptr, i8 signext %cmp, i8 signext %v
; RV64IA-ZABHA: # %bb.0: # %entry
; RV64IA-ZABHA-NEXT: .LBB2_1: # %do_cmpxchg
; RV64IA-ZABHA-NEXT: # =>This Inner Loop Header: Depth=1
; RV64IA-ZABHA-NEXT: fence rw, rw
; RV64IA-ZABHA-NEXT: mv a3, a1
; RV64IA-ZABHA-NEXT: amocas.b.aqrl a3, a2, (a0)
; RV64IA-ZABHA-NEXT: bne a3, a1, .LBB2_1
Expand Down Expand Up @@ -368,6 +371,7 @@ define void @cmpxchg_masked_and_branch2(ptr %ptr, i8 signext %cmp, i8 signext %v
; RV64IA-ZABHA: # %bb.0: # %entry
; RV64IA-ZABHA-NEXT: .LBB3_1: # %do_cmpxchg
; RV64IA-ZABHA-NEXT: # =>This Inner Loop Header: Depth=1
; RV64IA-ZABHA-NEXT: fence rw, rw
; RV64IA-ZABHA-NEXT: mv a3, a1
; RV64IA-ZABHA-NEXT: amocas.b.aqrl a3, a2, (a0)
; RV64IA-ZABHA-NEXT: beq a3, a1, .LBB3_1
Expand Down Expand Up @@ -408,6 +412,7 @@ define void @cmpxchg_and_irrelevant_branch(ptr %ptr, i32 signext %cmp, i32 signe
; ZACAS: # %bb.0: # %entry
; ZACAS-NEXT: .LBB4_1: # %do_cmpxchg
; ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
; ZACAS-NEXT: fence rw, rw
; ZACAS-NEXT: mv a4, a1
; ZACAS-NEXT: amocas.w.aqrl a4, a2, (a0)
; ZACAS-NEXT: beqz a3, .LBB4_1
Expand Down
14 changes: 14 additions & 0 deletions llvm/test/CodeGen/RISCV/atomic-cmpxchg.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1857,6 +1857,7 @@ define void @cmpxchg_i8_seq_cst_seq_cst(ptr %ptr, i8 %cmp, i8 %val) nounwind {
;
; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i8_seq_cst_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: fence rw, rw
; RV64IA-WMO-ZABHA-NEXT: amocas.b.aqrl a1, a2, (a0)
; RV64IA-WMO-ZABHA-NEXT: ret
;
Expand Down Expand Up @@ -1885,6 +1886,7 @@ define void @cmpxchg_i8_seq_cst_seq_cst(ptr %ptr, i8 %cmp, i8 %val) nounwind {
;
; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i8_seq_cst_seq_cst:
; RV64IA-TSO-ZABHA: # %bb.0:
; RV64IA-TSO-ZABHA-NEXT: fence rw, rw
; RV64IA-TSO-ZABHA-NEXT: amocas.b a1, a2, (a0)
; RV64IA-TSO-ZABHA-NEXT: ret
%res = cmpxchg ptr %ptr, i8 %cmp, i8 %val seq_cst seq_cst
Expand Down Expand Up @@ -3787,6 +3789,7 @@ define void @cmpxchg_i16_seq_cst_seq_cst(ptr %ptr, i16 %cmp, i16 %val) nounwind
;
; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i16_seq_cst_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: fence rw, rw
; RV64IA-WMO-ZABHA-NEXT: amocas.h.aqrl a1, a2, (a0)
; RV64IA-WMO-ZABHA-NEXT: ret
;
Expand Down Expand Up @@ -3816,6 +3819,7 @@ define void @cmpxchg_i16_seq_cst_seq_cst(ptr %ptr, i16 %cmp, i16 %val) nounwind
;
; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i16_seq_cst_seq_cst:
; RV64IA-TSO-ZABHA: # %bb.0:
; RV64IA-TSO-ZABHA-NEXT: fence rw, rw
; RV64IA-TSO-ZABHA-NEXT: amocas.h a1, a2, (a0)
; RV64IA-TSO-ZABHA-NEXT: ret
%res = cmpxchg ptr %ptr, i16 %cmp, i16 %val seq_cst seq_cst
Expand Down Expand Up @@ -4788,6 +4792,7 @@ define void @cmpxchg_i32_seq_cst_seq_cst(ptr %ptr, i32 %cmp, i32 %val) nounwind
;
; RV32IA-WMO-ZACAS-LABEL: cmpxchg_i32_seq_cst_seq_cst:
; RV32IA-WMO-ZACAS: # %bb.0:
; RV32IA-WMO-ZACAS-NEXT: fence rw, rw
; RV32IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0)
; RV32IA-WMO-ZACAS-NEXT: ret
;
Expand All @@ -4804,6 +4809,7 @@ define void @cmpxchg_i32_seq_cst_seq_cst(ptr %ptr, i32 %cmp, i32 %val) nounwind
;
; RV32IA-TSO-ZACAS-LABEL: cmpxchg_i32_seq_cst_seq_cst:
; RV32IA-TSO-ZACAS: # %bb.0:
; RV32IA-TSO-ZACAS-NEXT: fence rw, rw
; RV32IA-TSO-ZACAS-NEXT: amocas.w a1, a2, (a0)
; RV32IA-TSO-ZACAS-NEXT: ret
;
Expand Down Expand Up @@ -4834,11 +4840,13 @@ define void @cmpxchg_i32_seq_cst_seq_cst(ptr %ptr, i32 %cmp, i32 %val) nounwind
;
; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i32_seq_cst_seq_cst:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: fence rw, rw
; RV64IA-WMO-ZACAS-NEXT: amocas.w.aqrl a1, a2, (a0)
; RV64IA-WMO-ZACAS-NEXT: ret
;
; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i32_seq_cst_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: fence rw, rw
; RV64IA-WMO-ZABHA-NEXT: amocas.w.aqrl a1, a2, (a0)
; RV64IA-WMO-ZABHA-NEXT: ret
;
Expand All @@ -4856,11 +4864,13 @@ define void @cmpxchg_i32_seq_cst_seq_cst(ptr %ptr, i32 %cmp, i32 %val) nounwind
;
; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i32_seq_cst_seq_cst:
; RV64IA-TSO-ZACAS: # %bb.0:
; RV64IA-TSO-ZACAS-NEXT: fence rw, rw
; RV64IA-TSO-ZACAS-NEXT: amocas.w a1, a2, (a0)
; RV64IA-TSO-ZACAS-NEXT: ret
;
; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i32_seq_cst_seq_cst:
; RV64IA-TSO-ZABHA: # %bb.0:
; RV64IA-TSO-ZABHA-NEXT: fence rw, rw
; RV64IA-TSO-ZABHA-NEXT: amocas.w a1, a2, (a0)
; RV64IA-TSO-ZABHA-NEXT: ret
%res = cmpxchg ptr %ptr, i32 %cmp, i32 %val seq_cst seq_cst
Expand Down Expand Up @@ -5753,11 +5763,13 @@ define void @cmpxchg_i64_seq_cst_seq_cst(ptr %ptr, i64 %cmp, i64 %val) nounwind
;
; RV64IA-WMO-ZACAS-LABEL: cmpxchg_i64_seq_cst_seq_cst:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: fence rw, rw
; RV64IA-WMO-ZACAS-NEXT: amocas.d.aqrl a1, a2, (a0)
; RV64IA-WMO-ZACAS-NEXT: ret
;
; RV64IA-WMO-ZABHA-LABEL: cmpxchg_i64_seq_cst_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: fence rw, rw
; RV64IA-WMO-ZABHA-NEXT: amocas.d.aqrl a1, a2, (a0)
; RV64IA-WMO-ZABHA-NEXT: ret
;
Expand All @@ -5774,11 +5786,13 @@ define void @cmpxchg_i64_seq_cst_seq_cst(ptr %ptr, i64 %cmp, i64 %val) nounwind
;
; RV64IA-TSO-ZACAS-LABEL: cmpxchg_i64_seq_cst_seq_cst:
; RV64IA-TSO-ZACAS: # %bb.0:
; RV64IA-TSO-ZACAS-NEXT: fence rw, rw
; RV64IA-TSO-ZACAS-NEXT: amocas.d a1, a2, (a0)
; RV64IA-TSO-ZACAS-NEXT: ret
;
; RV64IA-TSO-ZABHA-LABEL: cmpxchg_i64_seq_cst_seq_cst:
; RV64IA-TSO-ZABHA: # %bb.0:
; RV64IA-TSO-ZABHA-NEXT: fence rw, rw
; RV64IA-TSO-ZABHA-NEXT: amocas.d a1, a2, (a0)
; RV64IA-TSO-ZABHA-NEXT: ret
%res = cmpxchg ptr %ptr, i64 %cmp, i64 %val seq_cst seq_cst
Expand Down
Loading

0 comments on commit 1eab9ac

Please sign in to comment.