From 5a09e1213543fca0cbc9a0a61643e8d1bd4fccd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Mi=C4=85sko?= Date: Fri, 5 Nov 2021 00:00:00 +0000 Subject: [PATCH] Initialize LLVM time trace profiler on each code generation thread In https://reviews.llvm.org/D71059 LLVM 11, the time trace profiler was extended to support multiple threads. `timeTraceProfilerInitialize` creates a thread local profiler instance. When a thread finishes `timeTraceProfilerFinishThread` moves a thread local instance into a global collection of instances. Finally when all codegen work is complete `timeTraceProfilerWrite` writes data from the current thread local instance and the instances in global collection of instances. Previously, the profiler was intialized on a single thread only. Since this thread performs no code generation on its own, the resulting profile was empty. Update LLVM codegen to initialize & finish time trace profiler on each code generation thread. --- compiler/rustc_codegen_llvm/src/lib.rs | 49 +++++++++ compiler/rustc_codegen_llvm/src/llvm/ffi.rs | 2 + compiler/rustc_codegen_llvm/src/llvm_util.rs | 5 - compiler/rustc_codegen_ssa/src/back/write.rs | 102 +++++++++--------- .../rustc_codegen_ssa/src/traits/backend.rs | 22 ++++ .../rustc_llvm/llvm-wrapper/PassWrapper.cpp | 4 + 6 files changed, 128 insertions(+), 56 deletions(-) diff --git a/compiler/rustc_codegen_llvm/src/lib.rs b/compiler/rustc_codegen_llvm/src/lib.rs index 8f4d79e7147d3..64fedb7bc1a5c 100644 --- a/compiler/rustc_codegen_llvm/src/lib.rs +++ b/compiler/rustc_codegen_llvm/src/lib.rs @@ -76,6 +76,27 @@ mod value; #[derive(Clone)] pub struct LlvmCodegenBackend(()); +struct TimeTraceProfiler { + enabled: bool, +} + +impl TimeTraceProfiler { + fn new(enabled: bool) -> Self { + if enabled { + unsafe { llvm::LLVMTimeTraceProfilerInitialize() } + } + TimeTraceProfiler { enabled } + } +} + +impl Drop for TimeTraceProfiler { + fn drop(&mut self) { + if self.enabled { + unsafe { llvm::LLVMTimeTraceProfilerFinishThread() } + } + } +} + impl ExtraBackendMethods for LlvmCodegenBackend { fn new_metadata(&self, tcx: TyCtxt<'_>, mod_name: &str) -> ModuleLlvm { ModuleLlvm::new_metadata(tcx, mod_name) @@ -119,6 +140,34 @@ impl ExtraBackendMethods for LlvmCodegenBackend { fn tune_cpu<'b>(&self, sess: &'b Session) -> Option<&'b str> { llvm_util::tune_cpu(sess) } + + fn spawn_thread(time_trace: bool, f: F) -> std::thread::JoinHandle + where + F: FnOnce() -> T, + F: Send + 'static, + T: Send + 'static, + { + std::thread::spawn(move || { + let _profiler = TimeTraceProfiler::new(time_trace); + f() + }) + } + + fn spawn_named_thread( + time_trace: bool, + name: String, + f: F, + ) -> std::io::Result> + where + F: FnOnce() -> T, + F: Send + 'static, + T: Send + 'static, + { + std::thread::Builder::new().name(name).spawn(move || { + let _profiler = TimeTraceProfiler::new(time_trace); + f() + }) + } } impl WriteBackendMethods for LlvmCodegenBackend { diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs index 21d2388fc3054..749eec459aca1 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs @@ -1737,6 +1737,8 @@ extern "C" { pub fn LLVMTimeTraceProfilerInitialize(); + pub fn LLVMTimeTraceProfilerFinishThread(); + pub fn LLVMTimeTraceProfilerFinish(FileName: *const c_char); pub fn LLVMAddAnalysisPasses(T: &'a TargetMachine, PM: &PassManager<'a>); diff --git a/compiler/rustc_codegen_llvm/src/llvm_util.rs b/compiler/rustc_codegen_llvm/src/llvm_util.rs index 246bb88885d85..34c28938c85d3 100644 --- a/compiler/rustc_codegen_llvm/src/llvm_util.rs +++ b/compiler/rustc_codegen_llvm/src/llvm_util.rs @@ -113,11 +113,6 @@ unsafe fn configure_llvm(sess: &Session) { } if sess.opts.debugging_opts.llvm_time_trace { - // time-trace is not thread safe and running it in parallel will cause seg faults. - if !sess.opts.debugging_opts.no_parallel_llvm { - bug!("`-Z llvm-time-trace` requires `-Z no-parallel-llvm") - } - llvm::LLVMTimeTraceProfilerInitialize(); } diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs index da34612ce76ac..b2edc6c0183a0 100644 --- a/compiler/rustc_codegen_ssa/src/back/write.rs +++ b/compiler/rustc_codegen_ssa/src/back/write.rs @@ -310,6 +310,7 @@ pub struct CodegenContext { pub no_landing_pads: bool, pub save_temps: bool, pub fewer_names: bool, + pub time_trace: bool, pub exported_symbols: Option>, pub opts: Arc, pub crate_types: Vec, @@ -1039,6 +1040,7 @@ fn start_executing_work( no_landing_pads: sess.panic_strategy() == PanicStrategy::Abort, fewer_names: sess.fewer_names(), save_temps: sess.opts.cg.save_temps, + time_trace: sess.opts.debugging_opts.llvm_time_trace, opts: Arc::new(sess.opts.clone()), prof: sess.prof.clone(), exported_symbols, @@ -1198,7 +1200,7 @@ fn start_executing_work( // Each LLVM module is automatically sent back to the coordinator for LTO if // necessary. There's already optimizations in place to avoid sending work // back to the coordinator if LTO isn't requested. - return thread::spawn(move || { + return B::spawn_thread(cgcx.time_trace, move || { let mut worker_id_counter = 0; let mut free_worker_ids = Vec::new(); let mut get_worker_id = |free_worker_ids: &mut Vec| { @@ -1615,59 +1617,57 @@ fn start_executing_work( pub struct WorkerFatalError; fn spawn_work(cgcx: CodegenContext, work: WorkItem) { - let builder = thread::Builder::new().name(work.short_description()); - builder - .spawn(move || { - // Set up a destructor which will fire off a message that we're done as - // we exit. - struct Bomb { - coordinator_send: Sender>, - result: Option, FatalError>>, - worker_id: usize, - } - impl Drop for Bomb { - fn drop(&mut self) { - let worker_id = self.worker_id; - let msg = match self.result.take() { - Some(Ok(WorkItemResult::Compiled(m))) => { - Message::Done:: { result: Ok(m), worker_id } - } - Some(Ok(WorkItemResult::NeedsLink(m))) => { - Message::NeedsLink:: { module: m, worker_id } - } - Some(Ok(WorkItemResult::NeedsFatLTO(m))) => { - Message::NeedsFatLTO:: { result: m, worker_id } - } - Some(Ok(WorkItemResult::NeedsThinLTO(name, thin_buffer))) => { - Message::NeedsThinLTO:: { name, thin_buffer, worker_id } - } - Some(Err(FatalError)) => { - Message::Done:: { result: Err(Some(WorkerFatalError)), worker_id } - } - None => Message::Done:: { result: Err(None), worker_id }, - }; - drop(self.coordinator_send.send(Box::new(msg))); - } + B::spawn_named_thread(cgcx.time_trace, work.short_description(), move || { + // Set up a destructor which will fire off a message that we're done as + // we exit. + struct Bomb { + coordinator_send: Sender>, + result: Option, FatalError>>, + worker_id: usize, + } + impl Drop for Bomb { + fn drop(&mut self) { + let worker_id = self.worker_id; + let msg = match self.result.take() { + Some(Ok(WorkItemResult::Compiled(m))) => { + Message::Done:: { result: Ok(m), worker_id } + } + Some(Ok(WorkItemResult::NeedsLink(m))) => { + Message::NeedsLink:: { module: m, worker_id } + } + Some(Ok(WorkItemResult::NeedsFatLTO(m))) => { + Message::NeedsFatLTO:: { result: m, worker_id } + } + Some(Ok(WorkItemResult::NeedsThinLTO(name, thin_buffer))) => { + Message::NeedsThinLTO:: { name, thin_buffer, worker_id } + } + Some(Err(FatalError)) => { + Message::Done:: { result: Err(Some(WorkerFatalError)), worker_id } + } + None => Message::Done:: { result: Err(None), worker_id }, + }; + drop(self.coordinator_send.send(Box::new(msg))); } + } - let mut bomb = Bomb:: { - coordinator_send: cgcx.coordinator_send.clone(), - result: None, - worker_id: cgcx.worker, - }; + let mut bomb = Bomb:: { + coordinator_send: cgcx.coordinator_send.clone(), + result: None, + worker_id: cgcx.worker, + }; - // Execute the work itself, and if it finishes successfully then flag - // ourselves as a success as well. - // - // Note that we ignore any `FatalError` coming out of `execute_work_item`, - // as a diagnostic was already sent off to the main thread - just - // surface that there was an error in this worker. - bomb.result = { - let _prof_timer = work.start_profiling(&cgcx); - Some(execute_work_item(&cgcx, work)) - }; - }) - .expect("failed to spawn thread"); + // Execute the work itself, and if it finishes successfully then flag + // ourselves as a success as well. + // + // Note that we ignore any `FatalError` coming out of `execute_work_item`, + // as a diagnostic was already sent off to the main thread - just + // surface that there was an error in this worker. + bomb.result = { + let _prof_timer = work.start_profiling(&cgcx); + Some(execute_work_item(&cgcx, work)) + }; + }) + .expect("failed to spawn thread"); } enum SharedEmitterMessage { diff --git a/compiler/rustc_codegen_ssa/src/traits/backend.rs b/compiler/rustc_codegen_ssa/src/traits/backend.rs index 8fef8314a5ccd..9c8bc3b210988 100644 --- a/compiler/rustc_codegen_ssa/src/traits/backend.rs +++ b/compiler/rustc_codegen_ssa/src/traits/backend.rs @@ -142,4 +142,26 @@ pub trait ExtraBackendMethods: CodegenBackend + WriteBackendMethods + Sized + Se ) -> TargetMachineFactoryFn; fn target_cpu<'b>(&self, sess: &'b Session) -> &'b str; fn tune_cpu<'b>(&self, sess: &'b Session) -> Option<&'b str>; + + fn spawn_thread(_time_trace: bool, f: F) -> std::thread::JoinHandle + where + F: FnOnce() -> T, + F: Send + 'static, + T: Send + 'static, + { + std::thread::spawn(f) + } + + fn spawn_named_thread( + _time_trace: bool, + name: String, + f: F, + ) -> std::io::Result> + where + F: FnOnce() -> T, + F: Send + 'static, + T: Send + 'static, + { + std::thread::Builder::new().name(name).spawn(f) + } } diff --git a/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp b/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp index 32b866e81b131..ddbc3c5912836 100644 --- a/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp +++ b/compiler/rustc_llvm/llvm-wrapper/PassWrapper.cpp @@ -75,6 +75,10 @@ extern "C" void LLVMTimeTraceProfilerInitialize() { /* ProcName */ "rustc"); } +extern "C" void LLVMTimeTraceProfilerFinishThread() { + timeTraceProfilerFinishThread(); +} + extern "C" void LLVMTimeTraceProfilerFinish(const char* FileName) { StringRef FN(FileName); std::error_code EC;