Skip to content

Commit

Permalink
Don't double-count inference time (#48033)
Browse files Browse the repository at this point in the history
* Use the same timing reentrancy counter for both inference and codegen

* Add some timing tests

* Add macro-based timer test
  • Loading branch information
pchintalapudi committed Dec 29, 2022
1 parent 6ac5159 commit 1fda4bb
Show file tree
Hide file tree
Showing 6 changed files with 90 additions and 29 deletions.
15 changes: 9 additions & 6 deletions src/aotcompile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,7 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
jl_code_info_t *src = NULL;
JL_GC_PUSH1(&src);
auto ct = jl_current_task;
ct->reentrant_codegen++;
ct->reentrant_timing++;
orc::ThreadSafeContext ctx;
orc::ThreadSafeModule backing;
if (!llvmmod) {
Expand Down Expand Up @@ -471,12 +471,13 @@ void *jl_create_native_impl(jl_array_t *methods, LLVMOrcThreadSafeModuleRef llvm
}

data->M = std::move(clone);
if (measure_compile_time_enabled)
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
if (!ct->reentrant_timing-- && measure_compile_time_enabled) {
auto end = jl_hrtime();
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time);
}
if (ctx.getContext()) {
jl_ExecutionEngine->releaseContext(std::move(ctx));
}
ct->reentrant_codegen--;
return (void*)data;
}

Expand Down Expand Up @@ -1138,8 +1139,10 @@ void jl_get_llvmf_defn_impl(jl_llvmf_dump_t* dump, jl_method_instance_t *mi, siz
F = cast<Function>(m.getModuleUnlocked()->getNamedValue(*fname));
}
JL_GC_POP();
if (measure_compile_time_enabled)
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
if (measure_compile_time_enabled) {
auto end = jl_hrtime();
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time);
}
if (F) {
dump->TSM = wrap(new orc::ThreadSafeModule(std::move(m)));
dump->F = wrap(F);
Expand Down
10 changes: 2 additions & 8 deletions src/gf.c
Original file line number Diff line number Diff line change
Expand Up @@ -3542,15 +3542,15 @@ int jl_has_concrete_subtype(jl_value_t *typ)
JL_DLLEXPORT void jl_typeinf_timing_begin(void)
{
jl_task_t *ct = jl_current_task;
if (ct->reentrant_inference == 1) {
if (!ct->reentrant_timing++) {
ct->inference_start_time = jl_hrtime();
}
}

JL_DLLEXPORT void jl_typeinf_timing_end(void)
{
jl_task_t *ct = jl_current_task;
if (ct->reentrant_inference == 1) {
if (!--ct->reentrant_timing) {
if (jl_atomic_load_relaxed(&jl_measure_compile_time_enabled)) {
uint64_t inftime = jl_hrtime() - ct->inference_start_time;
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, inftime);
Expand All @@ -3562,16 +3562,10 @@ JL_DLLEXPORT void jl_typeinf_timing_end(void)
JL_DLLEXPORT void jl_typeinf_lock_begin(void)
{
JL_LOCK(&jl_codegen_lock);
//Although this is claiming to be a typeinfer lock, it is actually
//affecting the codegen lock count, not type inference's inferencing count
jl_task_t *ct = jl_current_task;
ct->reentrant_codegen++;
}

JL_DLLEXPORT void jl_typeinf_lock_end(void)
{
jl_task_t *ct = jl_current_task;
ct->reentrant_codegen--;
JL_UNLOCK(&jl_codegen_lock);
}

Expand Down
31 changes: 19 additions & 12 deletions src/jitlayers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ extern "C" JL_DLLEXPORT
int jl_compile_extern_c_impl(LLVMOrcThreadSafeModuleRef llvmmod, void *p, void *sysimg, jl_value_t *declrt, jl_value_t *sigt)
{
auto ct = jl_current_task;
ct->reentrant_codegen++;
ct->reentrant_timing++;
uint64_t compiler_start_time = 0;
uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
if (measure_compile_time_enabled)
Expand Down Expand Up @@ -340,8 +340,10 @@ int jl_compile_extern_c_impl(LLVMOrcThreadSafeModuleRef llvmmod, void *p, void *
jl_ExecutionEngine->addModule(std::move(*into));
}
JL_UNLOCK(&jl_codegen_lock);
if (!--ct->reentrant_codegen && measure_compile_time_enabled)
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
if (!--ct->reentrant_timing && measure_compile_time_enabled) {
auto end = jl_hrtime();
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time);
}
if (ctx.getContext()) {
jl_ExecutionEngine->releaseContext(std::move(ctx));
}
Expand Down Expand Up @@ -396,7 +398,7 @@ extern "C" JL_DLLEXPORT
jl_code_instance_t *jl_generate_fptr_impl(jl_method_instance_t *mi JL_PROPAGATES_ROOT, size_t world)
{
auto ct = jl_current_task;
ct->reentrant_codegen++;
ct->reentrant_timing++;
uint64_t compiler_start_time = 0;
uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
bool is_recompile = false;
Expand Down Expand Up @@ -449,10 +451,11 @@ jl_code_instance_t *jl_generate_fptr_impl(jl_method_instance_t *mi JL_PROPAGATES
codeinst = NULL;
}
JL_UNLOCK(&jl_codegen_lock);
if (!--ct->reentrant_codegen && measure_compile_time_enabled) {
if (!--ct->reentrant_timing && measure_compile_time_enabled) {
uint64_t t_comp = jl_hrtime() - compiler_start_time;
if (is_recompile)
if (is_recompile) {
jl_atomic_fetch_add_relaxed(&jl_cumulative_recompile_time, t_comp);
}
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, t_comp);
}
JL_GC_POP();
Expand All @@ -466,7 +469,7 @@ void jl_generate_fptr_for_unspecialized_impl(jl_code_instance_t *unspec)
return;
}
auto ct = jl_current_task;
ct->reentrant_codegen++;
ct->reentrant_timing++;
uint64_t compiler_start_time = 0;
uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
if (measure_compile_time_enabled)
Expand Down Expand Up @@ -500,8 +503,10 @@ void jl_generate_fptr_for_unspecialized_impl(jl_code_instance_t *unspec)
JL_GC_POP();
}
JL_UNLOCK(&jl_codegen_lock); // Might GC
if (!--ct->reentrant_codegen && measure_compile_time_enabled)
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
if (!--ct->reentrant_timing && measure_compile_time_enabled) {
auto end = jl_hrtime();
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time);
}
}


Expand All @@ -522,7 +527,7 @@ jl_value_t *jl_dump_method_asm_impl(jl_method_instance_t *mi, size_t world,
// (using sentinel value `1` instead)
// so create an exception here so we can print pretty our lies
auto ct = jl_current_task;
ct->reentrant_codegen++;
ct->reentrant_timing++;
uint64_t compiler_start_time = 0;
uint8_t measure_compile_time_enabled = jl_atomic_load_relaxed(&jl_measure_compile_time_enabled);
if (measure_compile_time_enabled)
Expand Down Expand Up @@ -552,8 +557,10 @@ jl_value_t *jl_dump_method_asm_impl(jl_method_instance_t *mi, size_t world,
JL_GC_POP();
}
JL_UNLOCK(&jl_codegen_lock);
if (!--ct->reentrant_codegen && measure_compile_time_enabled)
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, (jl_hrtime() - compiler_start_time));
if (!--ct->reentrant_timing && measure_compile_time_enabled) {
auto end = jl_hrtime();
jl_atomic_fetch_add_relaxed(&jl_cumulative_compile_time, end - compiler_start_time);
}
}
if (specfptr != 0)
return jl_dump_fptr_asm(specfptr, raw_mc, asm_variant, debuginfo, binary);
Expand Down
2 changes: 1 addition & 1 deletion src/julia.h
Original file line number Diff line number Diff line change
Expand Up @@ -1938,7 +1938,7 @@ typedef struct _jl_task_t {
size_t bufsz; // actual sizeof stkbuf
uint64_t inference_start_time; // time when inference started
uint16_t reentrant_inference; // How many times we've reentered inference
uint16_t reentrant_codegen; // How many times we've reentered codegen
uint16_t reentrant_timing; // How many times we've reentered timing
unsigned int copy_stack:31; // sizeof stack for copybuf
unsigned int started:1;
} jl_task_t;
Expand Down
4 changes: 2 additions & 2 deletions src/task.c
Original file line number Diff line number Diff line change
Expand Up @@ -938,7 +938,7 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion
t->threadpoolid = ct->threadpoolid;
t->ptls = NULL;
t->world_age = ct->world_age;
t->reentrant_codegen = 0;
t->reentrant_timing = 0;
t->reentrant_inference = 0;
t->inference_start_time = 0;

Expand Down Expand Up @@ -1526,7 +1526,7 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
ct->sticky = 1;
ct->ptls = ptls;
ct->world_age = 1; // OK to run Julia code on this task
ct->reentrant_codegen = 0;
ct->reentrant_timing = 0;
ct->reentrant_inference = 0;
ct->inference_start_time = 0;
ptls->root_task = ct;
Expand Down
57 changes: 57 additions & 0 deletions test/misc.jl
Original file line number Diff line number Diff line change
Expand Up @@ -353,13 +353,49 @@ end

after_comp, after_recomp = Base.cumulative_compile_time_ns() # no need to turn timing off, @time will do that
@test after_comp >= before_comp;
@test after_recomp >= before_recomp;
@test after_recomp - before_recomp <= after_comp - before_comp;

# should be approximately 60,000,000 ns, we definitely shouldn't exceed 100x that value
# failing this probably means an uninitialized variable somewhere
@test after_comp - before_comp < 6_000_000_000;

end # redirect_stdout

# issue #48024, avoid overcounting timers
begin
double(x::Real) = 2x;
calldouble(container) = double(container[1]);
calldouble2(container) = calldouble(container);

Base.Experimental.@force_compile;
local elapsed = Base.time_ns();
Base.cumulative_compile_timing(true);
local compiles = Base.cumulative_compile_time_ns();
@eval calldouble([1.0]);
Base.cumulative_compile_timing(false);
compiles = Base.cumulative_compile_time_ns() .- compiles;
elapsed = Base.time_ns() - elapsed;

# compile time should be at most total time
@test compiles[1] <= elapsed
# recompile time should be at most compile time
@test compiles[2] <= compiles[1]

elapsed = Base.time_ns();
Base.cumulative_compile_timing(true);
compiles = Base.cumulative_compile_time_ns();
@eval calldouble(1.0);
Base.cumulative_compile_timing(false);
compiles = Base.cumulative_compile_time_ns() .- compiles;
elapsed = Base.time_ns() - elapsed;

# compile time should be at most total time
@test compiles[1] <= elapsed
# recompile time should be at most compile time
@test compiles[2] <= compiles[1]
end

macro capture_stdout(ex)
quote
mktemp() do fname, f
Expand All @@ -372,6 +408,27 @@ macro capture_stdout(ex)
end
end

# issue #48024, but with the time macro itself
begin
double(x::Real) = 2x;
calldouble(container) = double(container[1]);
calldouble2(container) = calldouble(container);

local first = @capture_stdout @time @eval calldouble([1.0])
local second = @capture_stdout @time @eval calldouble2(1.0)

# these functions were not recompiled
local matches = collect(eachmatch(r"(\d+(?:\.\d+)?)%", first))
@test length(matches) == 1
@test parse(Float64, matches[1][1]) > 0.0
@test parse(Float64, matches[1][1]) <= 100.0

matches = collect(eachmatch(r"(\d+(?:\.\d+)?)%", second))
@test length(matches) == 1
@test parse(Float64, matches[1][1]) > 0.0
@test parse(Float64, matches[1][1]) <= 100.0
end

# compilation reports in @time, @timev
let f = gensym("f"), callf = gensym("callf"), call2f = gensym("call2f")
@eval begin
Expand Down

0 comments on commit 1fda4bb

Please sign in to comment.