diff --git a/src/llvm-codegen-shared.h b/src/llvm-codegen-shared.h index 329cc567e8c5f..e0edb792d7645 100644 --- a/src/llvm-codegen-shared.h +++ b/src/llvm-codegen-shared.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -233,20 +234,39 @@ static inline void emit_signal_fence(llvm::IRBuilder<> &builder) builder.CreateFence(AtomicOrdering::SequentiallyConsistent, SyncScope::SingleThread); } -static inline void emit_gc_safepoint(llvm::IRBuilder<> &builder, llvm::Value *ptls, llvm::MDNode *tbaa) +static inline void emit_gc_safepoint(llvm::IRBuilder<> &builder, llvm::Value *ptls, llvm::MDNode *tbaa, bool final = false) { + using namespace llvm; + llvm::Value *signal_page = get_current_signal_page_from_ptls(builder, ptls, tbaa); emit_signal_fence(builder); - builder.CreateLoad(getSizeTy(builder.getContext()), get_current_signal_page_from_ptls(builder, ptls, tbaa), true); + Module *M = builder.GetInsertBlock()->getModule(); + LLVMContext &C = builder.getContext(); + // inline jlsafepoint_func->realize(M) + if (final) { + auto T_size = getSizeTy(builder.getContext()); + builder.CreateLoad(T_size, signal_page, true); + } + else { + Function *F = M->getFunction("julia.safepoint"); + if (!F) { + auto T_size = getSizeTy(builder.getContext()); + auto T_psize = T_size->getPointerTo(); + FunctionType *FT = FunctionType::get(Type::getVoidTy(C), {T_psize}, false); + F = Function::Create(FT, Function::ExternalLinkage, "julia.safepoint", M); + F->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); + } + builder.CreateCall(F, {signal_page}); + } emit_signal_fence(builder); } -static inline llvm::Value *emit_gc_state_set(llvm::IRBuilder<> &builder, llvm::Value *ptls, llvm::Value *state, llvm::Value *old_state) +static inline llvm::Value *emit_gc_state_set(llvm::IRBuilder<> &builder, llvm::Value *ptls, llvm::Value *state, llvm::Value *old_state, bool final) { using namespace llvm; Type *T_int8 = state->getType(); - ptls = emit_bitcast_with_builder(builder, ptls, builder.getInt8PtrTy()); + llvm::Value *ptls_i8 = emit_bitcast_with_builder(builder, ptls, builder.getInt8PtrTy()); Constant *offset = ConstantInt::getSigned(builder.getInt32Ty(), offsetof(jl_tls_states_t, gc_state)); - Value *gc_state = builder.CreateInBoundsGEP(T_int8, ptls, ArrayRef(offset), "gc_state"); + Value *gc_state = builder.CreateInBoundsGEP(T_int8, ptls_i8, ArrayRef(offset), "gc_state"); if (old_state == nullptr) { old_state = builder.CreateLoad(T_int8, gc_state); cast(old_state)->setOrdering(AtomicOrdering::Monotonic); @@ -266,38 +286,38 @@ static inline llvm::Value *emit_gc_state_set(llvm::IRBuilder<> &builder, llvm::V passBB, exitBB); builder.SetInsertPoint(passBB); MDNode *tbaa = get_tbaa_const(builder.getContext()); - emit_gc_safepoint(builder, ptls, tbaa); + emit_gc_safepoint(builder, ptls, tbaa, final); builder.CreateBr(exitBB); builder.SetInsertPoint(exitBB); return old_state; } -static inline llvm::Value *emit_gc_unsafe_enter(llvm::IRBuilder<> &builder, llvm::Value *ptls) +static inline llvm::Value *emit_gc_unsafe_enter(llvm::IRBuilder<> &builder, llvm::Value *ptls, bool final) { using namespace llvm; Value *state = builder.getInt8(0); - return emit_gc_state_set(builder, ptls, state, nullptr); + return emit_gc_state_set(builder, ptls, state, nullptr, final); } -static inline llvm::Value *emit_gc_unsafe_leave(llvm::IRBuilder<> &builder, llvm::Value *ptls, llvm::Value *state) +static inline llvm::Value *emit_gc_unsafe_leave(llvm::IRBuilder<> &builder, llvm::Value *ptls, llvm::Value *state, bool final) { using namespace llvm; Value *old_state = builder.getInt8(0); - return emit_gc_state_set(builder, ptls, state, old_state); + return emit_gc_state_set(builder, ptls, state, old_state, final); } -static inline llvm::Value *emit_gc_safe_enter(llvm::IRBuilder<> &builder, llvm::Value *ptls) +static inline llvm::Value *emit_gc_safe_enter(llvm::IRBuilder<> &builder, llvm::Value *ptls, bool final) { using namespace llvm; Value *state = builder.getInt8(JL_GC_STATE_SAFE); - return emit_gc_state_set(builder, ptls, state, nullptr); + return emit_gc_state_set(builder, ptls, state, nullptr, final); } -static inline llvm::Value *emit_gc_safe_leave(llvm::IRBuilder<> &builder, llvm::Value *ptls, llvm::Value *state) +static inline llvm::Value *emit_gc_safe_leave(llvm::IRBuilder<> &builder, llvm::Value *ptls, llvm::Value *state, bool final) { using namespace llvm; Value *old_state = builder.getInt8(JL_GC_STATE_SAFE); - return emit_gc_state_set(builder, ptls, state, old_state); + return emit_gc_state_set(builder, ptls, state, old_state, final); } // Compatibility shims for LLVM attribute APIs that were renamed in LLVM 14. diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index 6b416b05a002e..3b8533c6d0115 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -26,6 +26,7 @@ STATISTIC(PopGCFrameCount, "Number of lowered popGCFrameFunc intrinsics"); STATISTIC(GetGCFrameSlotCount, "Number of lowered getGCFrameSlotFunc intrinsics"); STATISTIC(GCAllocBytesCount, "Number of lowered GCAllocBytesFunc intrinsics"); STATISTIC(QueueGCRootCount, "Number of lowered queueGCRootFunc intrinsics"); +STATISTIC(SafepointCount, "Number of lowered safepoint intrinsics"); using namespace llvm; @@ -66,6 +67,9 @@ struct FinalLowerGC: private JuliaPassContext { // Lowers a `julia.queue_gc_root` intrinsic. Value *lowerQueueGCRoot(CallInst *target, Function &F); + + // Lowers a `julia.safepoint` intrinsic. + Value *lowerSafepoint(CallInst *target, Function &F); }; Value *FinalLowerGC::lowerNewGCFrame(CallInst *target, Function &F) @@ -188,6 +192,18 @@ Value *FinalLowerGC::lowerQueueGCRoot(CallInst *target, Function &F) return target; } +Value *FinalLowerGC::lowerSafepoint(CallInst *target, Function &F) +{ + ++SafepointCount; + assert(target->arg_size() == 1); + IRBuilder<> builder(target->getContext()); + builder.SetInsertPoint(target); + auto T_size = getSizeTy(builder.getContext()); + Value* signal_page = target->getOperand(0); + Value* load = builder.CreateLoad(T_size, signal_page, true); + return load; +} + Value *FinalLowerGC::lowerGCAllocBytes(CallInst *target, Function &F) { ++GCAllocBytesCount; @@ -292,16 +308,20 @@ static void replaceInstruction( bool FinalLowerGC::runOnFunction(Function &F) { - LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Processing function " << F.getName() << "\n"); // Check availability of functions again since they might have been deleted. initFunctions(*F.getParent()); - if (!pgcstack_getter && !adoptthread_func) + if (!pgcstack_getter && !adoptthread_func) { + LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Skipping function " << F.getName() << "\n"); return false; + } // Look for a call to 'julia.get_pgcstack'. pgcstack = getPGCstack(F); - if (!pgcstack) + if (!pgcstack) { + LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Skipping function " << F.getName() << " no pgcstack\n"); return false; + } + LLVM_DEBUG(dbgs() << "FINAL GC LOWERING: Processing function " << F.getName() << "\n"); // Acquire intrinsic functions. auto newGCFrameFunc = getOrNull(jl_intrinsics::newGCFrame); @@ -310,6 +330,7 @@ bool FinalLowerGC::runOnFunction(Function &F) auto getGCFrameSlotFunc = getOrNull(jl_intrinsics::getGCFrameSlot); auto GCAllocBytesFunc = getOrNull(jl_intrinsics::GCAllocBytes); auto queueGCRootFunc = getOrNull(jl_intrinsics::queueGCRoot); + auto safepointFunc = getOrNull(jl_intrinsics::safepoint); // Lower all calls to supported intrinsics. for (BasicBlock &BB : F) { @@ -321,6 +342,7 @@ bool FinalLowerGC::runOnFunction(Function &F) } Value *callee = CI->getCalledOperand(); + assert(callee); if (callee == newGCFrameFunc) { replaceInstruction(CI, lowerNewGCFrame(CI, F), it); @@ -342,6 +364,10 @@ bool FinalLowerGC::runOnFunction(Function &F) else if (callee == queueGCRootFunc) { replaceInstruction(CI, lowerQueueGCRoot(CI, F), it); } + else if (callee == safepointFunc) { + lowerSafepoint(CI, F); + it = CI->eraseFromParent(); + } else { ++it; } diff --git a/src/llvm-pass-helpers.cpp b/src/llvm-pass-helpers.cpp index e12fa7ad90fed..ea390f01010fd 100644 --- a/src/llvm-pass-helpers.cpp +++ b/src/llvm-pass-helpers.cpp @@ -116,6 +116,7 @@ namespace jl_intrinsics { static const char *PUSH_GC_FRAME_NAME = "julia.push_gc_frame"; static const char *POP_GC_FRAME_NAME = "julia.pop_gc_frame"; static const char *QUEUE_GC_ROOT_NAME = "julia.queue_gc_root"; + static const char *SAFEPOINT_NAME = "julia.safepoint"; // Annotates a function with attributes suitable for GC allocation // functions. Specifically, the return value is marked noalias and nonnull. @@ -206,6 +207,22 @@ namespace jl_intrinsics { intrinsic->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); return intrinsic; }); + + const IntrinsicDescription safepoint( + SAFEPOINT_NAME, + [](const JuliaPassContext &context) { + auto T_size = getSizeTy(context.getLLVMContext()); + auto T_psize = T_size->getPointerTo(); + auto intrinsic = Function::Create( + FunctionType::get( + Type::getVoidTy(context.getLLVMContext()), + {T_psize}, + false), + Function::ExternalLinkage, + SAFEPOINT_NAME); + intrinsic->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); + return intrinsic; + }); } namespace jl_well_known { diff --git a/src/llvm-pass-helpers.h b/src/llvm-pass-helpers.h index 3c06440f369d8..2b2bd50cd0e4d 100644 --- a/src/llvm-pass-helpers.h +++ b/src/llvm-pass-helpers.h @@ -126,6 +126,9 @@ namespace jl_intrinsics { // `julia.queue_gc_root`: an intrinsic that queues a GC root. extern const IntrinsicDescription queueGCRoot; + + // `julia.safepoint`: an intrinsic that triggers a GC safepoint. + extern const IntrinsicDescription safepoint; } // A namespace for well-known Julia runtime function descriptions. diff --git a/src/llvm-ptls.cpp b/src/llvm-ptls.cpp index ebd7d10a2a130..ea92e1709c597 100644 --- a/src/llvm-ptls.cpp +++ b/src/llvm-ptls.cpp @@ -207,7 +207,7 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter, IRBuilder<> builder(fastTerm->getParent()); fastTerm->removeFromParent(); MDNode *tbaa = tbaa_gcframe; - Value *prior = emit_gc_unsafe_enter(builder, get_current_ptls_from_task(builder, get_current_task_from_pgcstack(builder, pgcstack), tbaa)); + Value *prior = emit_gc_unsafe_enter(builder, get_current_ptls_from_task(builder, get_current_task_from_pgcstack(builder, pgcstack), tbaa), true); builder.Insert(fastTerm); phi->addIncoming(pgcstack, fastTerm->getParent()); // emit pre-return cleanup @@ -219,7 +219,7 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter, for (auto &BB : *pgcstack->getParent()->getParent()) { if (isa(BB.getTerminator())) { IRBuilder<> builder(BB.getTerminator()); - emit_gc_unsafe_leave(builder, get_current_ptls_from_task(builder, get_current_task_from_pgcstack(builder, phi), tbaa), last_gc_state); + emit_gc_unsafe_leave(builder, get_current_ptls_from_task(builder, get_current_task_from_pgcstack(builder, phi), tbaa), last_gc_state, true); } } }