Skip to content

Commit

Permalink
Atomically order specsigflags, specptr, and invoke (#47832)
Browse files Browse the repository at this point in the history
Co-authored-by: Jameson Nash <vtjnash@gmail.com>
Co-authored-by: Dilum Aluthge <dilum@aluthge.com>
  • Loading branch information
3 people authored Feb 22, 2023
1 parent 81f366d commit 6412a56
Show file tree
Hide file tree
Showing 7 changed files with 146 additions and 71 deletions.
30 changes: 23 additions & 7 deletions src/codegen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4236,7 +4236,7 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, const
jl_value_t *ci = ctx.params->lookup(mi, ctx.world, ctx.world); // TODO: need to use the right pair world here
jl_code_instance_t *codeinst = (jl_code_instance_t*)ci;
if (ci != jl_nothing) {
auto invoke = jl_atomic_load_relaxed(&codeinst->invoke);
auto invoke = jl_atomic_load_acquire(&codeinst->invoke);
// check if we know how to handle this specptr
if (invoke == jl_fptr_const_return_addr) {
result = mark_julia_const(ctx, codeinst->rettype_const);
Expand All @@ -4262,10 +4262,13 @@ static jl_cgval_t emit_invoke(jl_codectx_t &ctx, const jl_cgval_t &lival, const
// optimization: emit the correct name immediately, if we know it
// TODO: use `emitted` map here too to try to consolidate names?
// WARNING: isspecsig is protected by the codegen-lock. If that lock is removed, then the isspecsig load needs to be properly atomically sequenced with this.
auto invoke = jl_atomic_load_relaxed(&codeinst->invoke);
auto fptr = jl_atomic_load_relaxed(&codeinst->specptr.fptr);
if (fptr) {
if (specsig ? codeinst->isspecsig : invoke == jl_fptr_args_addr) {
while (!(jl_atomic_load_acquire(&codeinst->specsigflags) & 0b10)) {
jl_cpu_pause();
}
invoke = jl_atomic_load_relaxed(&codeinst->invoke);
if (specsig ? jl_atomic_load_relaxed(&codeinst->specsigflags) & 0b1 : invoke == jl_fptr_args_addr) {
protoname = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, codeinst);
need_to_emit = false;
}
Expand Down Expand Up @@ -5783,9 +5786,15 @@ static Function* gen_cfun_wrapper(
if (lam && params.cache) {
// TODO: this isn't ideal to be unconditionally calling type inference (and compile) from here
codeinst = jl_compile_method_internal(lam, world);
auto invoke = jl_atomic_load_relaxed(&codeinst->invoke);
auto invoke = jl_atomic_load_acquire(&codeinst->invoke);
auto fptr = jl_atomic_load_relaxed(&codeinst->specptr.fptr);
assert(invoke);
if (fptr) {
while (!(jl_atomic_load_acquire(&codeinst->specsigflags) & 0b10)) {
jl_cpu_pause();
}
invoke = jl_atomic_load_relaxed(&codeinst->invoke);
}
// WARNING: this invoke load is protected by the codegen-lock. If that lock is removed, then the isspecsig load needs to be properly atomically sequenced with this.
if (invoke == jl_fptr_args_addr) {
callptr = fptr;
Expand All @@ -5796,7 +5805,7 @@ static Function* gen_cfun_wrapper(
callptr = (void*)codeinst->rettype_const;
calltype = 2;
}
else if (codeinst->isspecsig) {
else if (jl_atomic_load_relaxed(&codeinst->specsigflags) & 0b1) {
callptr = fptr;
calltype = 3;
}
Expand Down Expand Up @@ -8526,18 +8535,25 @@ void jl_compile_workqueue(
"invalid world for code-instance");
StringRef preal_decl = "";
bool preal_specsig = false;
auto invoke = jl_atomic_load_relaxed(&codeinst->invoke);
auto invoke = jl_atomic_load_acquire(&codeinst->invoke);
bool cache_valid = params.cache;
if (params.external_linkage) {
cache_valid = 0 && jl_object_in_image((jl_value_t*)codeinst);
}
// WARNING: isspecsig is protected by the codegen-lock. If that lock is removed, then the isspecsig load needs to be properly atomically sequenced with this.
if (cache_valid && invoke != NULL) {
auto fptr = jl_atomic_load_relaxed(&codeinst->specptr.fptr);
if (fptr) {
while (!(jl_atomic_load_acquire(&codeinst->specsigflags) & 0b10)) {
jl_cpu_pause();
}
// in case we are racing with another thread that is emitting this function
invoke = jl_atomic_load_relaxed(&codeinst->invoke);
}
if (invoke == jl_fptr_args_addr) {
preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, codeinst);
}
else if (codeinst->isspecsig) {
else if (jl_atomic_load_relaxed(&codeinst->specsigflags) & 0b1) {
preal_decl = jl_ExecutionEngine->getFunctionAtAddress((uintptr_t)fptr, codeinst);
preal_specsig = true;
}
Expand Down
96 changes: 65 additions & 31 deletions src/gf.c
Original file line number Diff line number Diff line change
Expand Up @@ -414,13 +414,13 @@ JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst(
if ((const_flags & 2) == 0)
inferred_const = NULL;
codeinst->rettype_const = inferred_const;
jl_atomic_store_relaxed(&codeinst->invoke, NULL);
jl_atomic_store_relaxed(&codeinst->specptr.fptr, NULL);
jl_atomic_store_relaxed(&codeinst->invoke, NULL);
if ((const_flags & 1) != 0) {
assert(const_flags & 2);
jl_atomic_store_relaxed(&codeinst->invoke, jl_fptr_const_return);
}
codeinst->isspecsig = 0;
jl_atomic_store_relaxed(&codeinst->specsigflags, 0);
jl_atomic_store_relaxed(&codeinst->precompile, 0);
jl_atomic_store_relaxed(&codeinst->next, NULL);
codeinst->ipo_purity_bits = ipo_effects;
Expand Down Expand Up @@ -2218,12 +2218,33 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
mi, codeinst2->rettype,
codeinst2->min_world, codeinst2->max_world);
if (jl_atomic_load_relaxed(&codeinst->invoke) == NULL) {
// once set, don't change invoke-ptr, as that leads to race conditions
// with the (not) simultaneous updates to invoke and specptr
codeinst->isspecsig = codeinst2->isspecsig;
codeinst->rettype_const = codeinst2->rettype_const;
jl_atomic_store_release(&codeinst->specptr.fptr, jl_atomic_load_relaxed(&codeinst2->specptr.fptr));
jl_atomic_store_release(&codeinst->invoke, jl_atomic_load_relaxed(&codeinst2->invoke));
uint8_t specsigflags = jl_atomic_load_acquire(&codeinst2->specsigflags);
jl_callptr_t invoke = jl_atomic_load_acquire(&codeinst2->invoke);
void *fptr = jl_atomic_load_relaxed(&codeinst2->specptr.fptr);
if (fptr != NULL) {
while (!(specsigflags & 0b10)) {
jl_cpu_pause();
specsigflags = jl_atomic_load_acquire(&codeinst2->specsigflags);
}
invoke = jl_atomic_load_relaxed(&codeinst2->invoke);
void *prev_fptr = NULL;
// see jitlayers.cpp for the ordering restrictions here
if (jl_atomic_cmpswap_acqrel(&codeinst->specptr.fptr, &prev_fptr, fptr)) {
jl_atomic_store_relaxed(&codeinst->specsigflags, specsigflags & 0b1);
jl_atomic_store_release(&codeinst->invoke, invoke);
jl_atomic_store_release(&codeinst->specsigflags, specsigflags);
} else {
// someone else already compiled it
while (!(jl_atomic_load_acquire(&codeinst->specsigflags) & 0b10)) {
jl_cpu_pause();
}
// codeinst is now set up fully, safe to return
}
} else {
jl_callptr_t prev = NULL;
jl_atomic_cmpswap_acqrel(&codeinst->invoke, &prev, invoke);
}
}
// don't call record_precompile_statement here, since we already compiled it as mi2 which is better
return codeinst;
Expand All @@ -2248,14 +2269,22 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
jl_method_instance_t *unspecmi = jl_atomic_load_relaxed(&def->unspecialized);
if (unspecmi) {
jl_code_instance_t *unspec = jl_atomic_load_relaxed(&unspecmi->cache);
if (unspec && jl_atomic_load_acquire(&unspec->invoke)) {
jl_callptr_t unspec_invoke = NULL;
if (unspec && (unspec_invoke = jl_atomic_load_acquire(&unspec->invoke))) {
jl_code_instance_t *codeinst = jl_new_codeinst(mi,
(jl_value_t*)jl_any_type, NULL, NULL,
0, 1, ~(size_t)0, 0, 0, jl_nothing, 0);
codeinst->isspecsig = 0;
codeinst->specptr = unspec->specptr;
void *unspec_fptr = jl_atomic_load_relaxed(&unspec->specptr.fptr);
if (unspec_fptr) {
// wait until invoke and specsigflags are properly set
while (!(jl_atomic_load_acquire(&unspec->specsigflags) & 0b10)) {
jl_cpu_pause();
}
unspec_invoke = jl_atomic_load_relaxed(&unspec->invoke);
}
jl_atomic_store_release(&codeinst->specptr.fptr, unspec_fptr);
codeinst->rettype_const = unspec->rettype_const;
jl_atomic_store_relaxed(&codeinst->invoke, jl_atomic_load_relaxed(&unspec->invoke));
jl_atomic_store_release(&codeinst->invoke, unspec_invoke);
jl_mi_cache_insert(mi, codeinst);
record_precompile_statement(mi);
return codeinst;
Expand All @@ -2272,7 +2301,7 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
jl_code_instance_t *codeinst = jl_new_codeinst(mi,
(jl_value_t*)jl_any_type, NULL, NULL,
0, 1, ~(size_t)0, 0, 0, jl_nothing, 0);
jl_atomic_store_relaxed(&codeinst->invoke, jl_fptr_interpret_call);
jl_atomic_store_release(&codeinst->invoke, jl_fptr_interpret_call);
jl_mi_cache_insert(mi, codeinst);
record_precompile_statement(mi);
return codeinst;
Expand All @@ -2289,7 +2318,8 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
jl_method_instance_t *unspec = jl_get_unspecialized_from_mi(mi);
jl_code_instance_t *ucache = jl_get_method_inferred(unspec, (jl_value_t*)jl_any_type, 1, ~(size_t)0);
// ask codegen to make the fptr for unspec
if (jl_atomic_load_acquire(&ucache->invoke) == NULL) {
jl_callptr_t ucache_invoke = jl_atomic_load_acquire(&ucache->invoke);
if (ucache_invoke == NULL) {
if (def->source == jl_nothing && (jl_atomic_load_relaxed(&ucache->def->uninferred) == jl_nothing ||
jl_atomic_load_relaxed(&ucache->def->uninferred) == NULL)) {
jl_printf(JL_STDERR, "source not available for ");
Expand All @@ -2298,19 +2328,29 @@ jl_code_instance_t *jl_compile_method_internal(jl_method_instance_t *mi, size_t
jl_error("source missing for method that needs to be compiled");
}
jl_generate_fptr_for_unspecialized(ucache);
ucache_invoke = jl_atomic_load_acquire(&ucache->invoke);
}
assert(jl_atomic_load_relaxed(&ucache->invoke) != NULL);
if (jl_atomic_load_relaxed(&ucache->invoke) != jl_fptr_sparam &&
jl_atomic_load_relaxed(&ucache->invoke) != jl_fptr_interpret_call) {
assert(ucache_invoke != NULL);
if (ucache_invoke != jl_fptr_sparam &&
ucache_invoke != jl_fptr_interpret_call) {
// only these care about the exact specTypes, otherwise we can use it directly
return ucache;
}
codeinst = jl_new_codeinst(mi, (jl_value_t*)jl_any_type, NULL, NULL,
0, 1, ~(size_t)0, 0, 0, jl_nothing, 0);
codeinst->isspecsig = 0;
codeinst->specptr = ucache->specptr;
void *unspec_fptr = jl_atomic_load_relaxed(&ucache->specptr.fptr);
if (unspec_fptr) {
// wait until invoke and specsigflags are properly set
while (!(jl_atomic_load_acquire(&ucache->specsigflags) & 0b10)) {
jl_cpu_pause();
}
ucache_invoke = jl_atomic_load_relaxed(&ucache->invoke);
}
// unspec is always not specsig, but might use specptr
jl_atomic_store_relaxed(&codeinst->specsigflags, jl_atomic_load_relaxed(&ucache->specsigflags) & 0b10);
jl_atomic_store_relaxed(&codeinst->specptr.fptr, unspec_fptr);
codeinst->rettype_const = ucache->rettype_const;
jl_atomic_store_relaxed(&codeinst->invoke, jl_atomic_load_relaxed(&ucache->invoke));
jl_atomic_store_release(&codeinst->invoke, ucache_invoke);
jl_mi_cache_insert(mi, codeinst);
}
else {
Expand All @@ -2328,23 +2368,17 @@ jl_value_t *jl_fptr_const_return(jl_value_t *f, jl_value_t **args, uint32_t narg
jl_value_t *jl_fptr_args(jl_value_t *f, jl_value_t **args, uint32_t nargs, jl_code_instance_t *m)
{
jl_fptr_args_t invoke = jl_atomic_load_relaxed(&m->specptr.fptr1);
while (1) {
if (invoke)
return invoke(f, args, nargs);
invoke = jl_atomic_load_acquire(&m->specptr.fptr1); // require forward progress with acquire annotation
}
assert(invoke && "Forgot to set specptr for jl_fptr_args!");
return invoke(f, args, nargs);
}

jl_value_t *jl_fptr_sparam(jl_value_t *f, jl_value_t **args, uint32_t nargs, jl_code_instance_t *m)
{
jl_svec_t *sparams = m->def->sparam_vals;
assert(sparams != jl_emptysvec);
jl_fptr_sparam_t invoke = jl_atomic_load_relaxed(&m->specptr.fptr3);
while (1) {
if (invoke)
return invoke(f, args, nargs, sparams);
invoke = jl_atomic_load_acquire(&m->specptr.fptr3); // require forward progress with acquire annotation
}
assert(invoke && "Forgot to set specptr for jl_fptr_sparam!");
return invoke(f, args, nargs, sparams);
}

JL_DLLEXPORT jl_callptr_t jl_fptr_args_addr = &jl_fptr_args;
Expand Down Expand Up @@ -2667,7 +2701,7 @@ STATIC_INLINE jl_value_t *_jl_invoke(jl_value_t *F, jl_value_t **args, uint32_t
jl_code_instance_t *codeinst = jl_atomic_load_relaxed(&mfunc->cache);
while (codeinst) {
if (codeinst->min_world <= world && world <= codeinst->max_world) {
jl_callptr_t invoke = jl_atomic_load_relaxed(&codeinst->invoke);
jl_callptr_t invoke = jl_atomic_load_acquire(&codeinst->invoke);
if (invoke != NULL) {
jl_value_t *res = invoke(F, args, nargs, codeinst);
return verify_type(res);
Expand All @@ -2687,7 +2721,7 @@ STATIC_INLINE jl_value_t *_jl_invoke(jl_value_t *F, jl_value_t **args, uint32_t
errno = last_errno;
if (jl_options.malloc_log)
jl_gc_sync_total_bytes(last_alloc); // discard allocation count from compilation
jl_callptr_t invoke = jl_atomic_load_relaxed(&codeinst->invoke);
jl_callptr_t invoke = jl_atomic_load_acquire(&codeinst->invoke);
jl_value_t *res = invoke(F, args, nargs, codeinst);
return verify_type(res);
}
Expand Down
46 changes: 29 additions & 17 deletions src/jitlayers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -262,18 +262,31 @@ static jl_callptr_t _jl_compile_codeinst(
addr = (jl_callptr_t)getAddressForFunction(decls.functionObject);
isspecsig = true;
}
if (jl_atomic_load_relaxed(&this_code->invoke) == NULL) {
// once set, don't change invoke-ptr, as that leads to race conditions
// with the (not) simultaneous updates to invoke and specptr
if (!decls.specFunctionObject.empty()) {
jl_atomic_store_release(&this_code->specptr.fptr, (void*)getAddressForFunction(decls.specFunctionObject));
this_code->isspecsig = isspecsig;
if (!decls.specFunctionObject.empty()) {
void *prev_specptr = NULL;
auto spec = (void*)getAddressForFunction(decls.specFunctionObject);
if (jl_atomic_cmpswap_acqrel(&this_code->specptr.fptr, &prev_specptr, spec)) {
// only set specsig and invoke if we were the first to set specptr
jl_atomic_store_relaxed(&this_code->specsigflags, (uint8_t) isspecsig);
// we might overwrite invokeptr here; that's ok, anybody who relied on the identity of invokeptr
// either assumes that specptr was null, doesn't care about specptr,
// or will wait until specsigflags has 0b10 set before reloading invoke
jl_atomic_store_release(&this_code->invoke, addr);
jl_atomic_store_release(&this_code->specsigflags, (uint8_t) (0b10 | isspecsig));
} else {
//someone else beat us, don't commit any results
while (!(jl_atomic_load_acquire(&this_code->specsigflags) & 0b10)) {
jl_cpu_pause();
}
addr = jl_atomic_load_relaxed(&this_code->invoke);
}
} else {
jl_callptr_t prev_invoke = NULL;
if (!jl_atomic_cmpswap_acqrel(&this_code->invoke, &prev_invoke, addr)) {
addr = prev_invoke;
//TODO do we want to potentially promote invoke anyways? (e.g. invoke is jl_interpret_call or some other
//known lesser function)
}
jl_atomic_store_release(&this_code->invoke, addr);
}
else if (jl_atomic_load_relaxed(&this_code->invoke) == jl_fptr_const_return_addr && !decls.specFunctionObject.empty()) {
// hack to export this pointer value to jl_dump_method_disasm
jl_atomic_store_release(&this_code->specptr.fptr, (void*)getAddressForFunction(decls.specFunctionObject));
}
if (this_code == codeinst)
fptr = addr;
Expand Down Expand Up @@ -497,10 +510,9 @@ void jl_generate_fptr_for_unspecialized_impl(jl_code_instance_t *unspec)
assert(src && jl_is_code_info(src));
++UnspecFPtrCount;
_jl_compile_codeinst(unspec, src, unspec->min_world, *jl_ExecutionEngine->getContext());
if (jl_atomic_load_relaxed(&unspec->invoke) == NULL) {
// if we hit a codegen bug (or ran into a broken generated function or llvmcall), fall back to the interpreter as a last resort
jl_atomic_store_release(&unspec->invoke, jl_fptr_interpret_call_addr);
}
jl_callptr_t null = nullptr;
// if we hit a codegen bug (or ran into a broken generated function or llvmcall), fall back to the interpreter as a last resort
jl_atomic_cmpswap(&unspec->invoke, &null, jl_fptr_interpret_call_addr);
JL_GC_POP();
}
JL_UNLOCK(&jl_codegen_lock); // Might GC
Expand All @@ -519,7 +531,7 @@ jl_value_t *jl_dump_method_asm_impl(jl_method_instance_t *mi, size_t world,
// printing via disassembly
jl_code_instance_t *codeinst = jl_generate_fptr(mi, world);
if (codeinst) {
uintptr_t fptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->invoke);
uintptr_t fptr = (uintptr_t)jl_atomic_load_acquire(&codeinst->invoke);
if (getwrapper)
return jl_dump_fptr_asm(fptr, raw_mc, asm_variant, debuginfo, binary);
uintptr_t specfptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->specptr.fptr);
Expand Down Expand Up @@ -547,7 +559,7 @@ jl_value_t *jl_dump_method_asm_impl(jl_method_instance_t *mi, size_t world,
if (src && (jl_value_t*)src != jl_nothing)
src = jl_uncompress_ir(mi->def.method, codeinst, (jl_array_t*)src);
}
fptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->invoke);
fptr = (uintptr_t)jl_atomic_load_acquire(&codeinst->invoke);
specfptr = (uintptr_t)jl_atomic_load_relaxed(&codeinst->specptr.fptr);
if (src && jl_is_code_info(src)) {
if (fptr == (uintptr_t)jl_fptr_const_return_addr && specfptr == 0) {
Expand Down
Loading

0 comments on commit 6412a56

Please sign in to comment.