Skip to content

Commit

Permalink
[2018-02] Harden JIT job control against cctors deadlocking. (#8235)
Browse files Browse the repository at this point in the history
* [mini] Don't wait for other threads to finish JITing if the current thread is running a cctor.

This eliminates a possible source of deadlocks found during AOT runs of Roslyn.

* [mini] Introduce a 1s timeout to JIT job control to ensure we don't simply deadlock in case of execution dependency bugs.

* [mini] fix spelling5DD
  • Loading branch information
monojenkins authored and luhenry committed Apr 15, 2018
1 parent 34b31f0 commit 4a00c50
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 5 deletions.
3 changes: 3 additions & 0 deletions mono/metadata/threads-types.h
Original file line number Diff line number Diff line change
Expand Up @@ -275,4 +275,7 @@ mono_thread_internal_describe (MonoInternalThread *internal, GString *str);
gboolean
mono_thread_internal_is_current (MonoInternalThread *internal);

gboolean
mono_threads_is_current_thread_in_protected_block (void);

#endif /* _MONO_METADATA_THREADS_TYPES_H_ */
8 changes: 8 additions & 0 deletions mono/metadata/threads.c
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,14 @@ mono_thread_get_abort_prot_block_count (MonoInternalThread *thread)
return (state & ABORT_PROT_BLOCK_MASK) >> ABORT_PROT_BLOCK_SHIFT;
}

gboolean
mono_threads_is_current_thread_in_protected_block (void)
{
MonoInternalThread *thread = mono_thread_internal_current ();

return mono_thread_get_abort_prot_block_count (thread) > 0;
}

void
mono_threads_begin_abort_protected_block (void)
{
Expand Down
32 changes: 27 additions & 5 deletions mono/mini/mini-runtime.c
Original file line number Diff line number Diff line change
Expand Up @@ -1832,8 +1832,15 @@ typedef struct {
MonoCoopMutex lock;
} JitCompilationData;

/*
Timeout, in millisecounds, that we wait other threads to finish JITing.
This value can't be too small or we won't see enough methods being reused and it can't be too big to cause massive stalls due to unforseable circunstances.
*/
#define MAX_JIT_TIMEOUT_MS 1000


static JitCompilationData compilation_data;
static int jit_methods_waited, jit_methods_multiple, jit_methods_overload, jit_spurious_wakeups;
static int jit_methods_waited, jit_methods_multiple, jit_methods_overload, jit_spurious_wakeups_or_timeouts;

static void
mini_jit_init_job_control (void)
Expand Down Expand Up @@ -1898,7 +1905,7 @@ wait_or_register_method_to_compile (MonoMethod *method, MonoDomain *domain)
mono_counters_register ("JIT compile waited others", MONO_COUNTER_INT|MONO_COUNTER_JIT, &jit_methods_waited);
mono_counters_register ("JIT compile 1+ jobs", MONO_COUNTER_INT|MONO_COUNTER_JIT, &jit_methods_multiple);
mono_counters_register ("JIT compile overload wait", MONO_COUNTER_INT|MONO_COUNTER_JIT, &jit_methods_overload);
mono_counters_register ("JIT compile spurious wakeups", MONO_COUNTER_INT|MONO_COUNTER_JIT, &jit_spurious_wakeups);
mono_counters_register ("JIT compile spurious wakeups or timeouts", MONO_COUNTER_INT|MONO_COUNTER_JIT, &jit_spurious_wakeups_or_timeouts);
inited = TRUE;
}

Expand All @@ -1915,9 +1922,14 @@ wait_or_register_method_to_compile (MonoMethod *method, MonoDomain *domain)

unlock_compilation_data ();
return FALSE;
} else if (jit_tls->active_jit_methods > 0) {
} else if (jit_tls->active_jit_methods > 0 || mono_threads_is_current_thread_in_protected_block ()) {
//We can't suspend the current thread if it's already JITing a method.
//Dependency management is too compilated and we want to get rid of this anyways.

//We can't suspend the current thread if it's running a protected block (such as a cctor)
//We can't rely only on JIT nesting as cctor's can be run from outside the JIT.

//Finally, he hit a timeout or spurious wakeup. We're better off just giving up and keep recompiling
++entry->compilation_count;
++jit_methods_multiple;
++jit_tls->active_jit_methods;
Expand All @@ -1937,15 +1949,25 @@ wait_or_register_method_to_compile (MonoMethod *method, MonoDomain *domain)
++entry->threads_waiting;

g_assert (entry->has_cond);
mono_coop_cond_wait (&entry->cond, &compilation_data.lock);
mono_coop_cond_timedwait (&entry->cond, &compilation_data.lock, MAX_JIT_TIMEOUT_MS);
--entry->threads_waiting;

if (entry->done) {
unref_jit_entry (entry);
unlock_compilation_data ();
return TRUE;
} else {
++jit_spurious_wakeups;
//We hit the timeout or a spurious wakeup, fallback to JITing
g_assert (entry->ref_count > 1);
unref_jit_entry (entry);
++jit_spurious_wakeups_or_timeouts;

++entry->compilation_count;
++jit_methods_multiple;
++jit_tls->active_jit_methods;

unlock_compilation_data ();
return FALSE;
}
}
}
Expand Down

0 comments on commit 4a00c50

Please sign in to comment.