From 14c6835e03b285f7c73459ca851848fe8513b7a5 Mon Sep 17 00:00:00 2001 From: Alex Crichton Date: Wed, 31 Oct 2018 11:32:27 -0700 Subject: [PATCH] rustc: Wait for all codegen threads to exit This commit updates rustc to wait for all codegen threads to exit before allowing the main thread to exit. This is a stab in the dark to fix the mysterious segfaults appearing on #55238, and hopefully we'll see whether this actually fixes things in practice... --- src/Cargo.lock | 12 ------ src/librustc_codegen_llvm/back/write.rs | 57 ++++++++++++++++++++++--- src/librustc_codegen_llvm/base.rs | 57 +++++++++++++++++++++++-- 3 files changed, 104 insertions(+), 22 deletions(-) diff --git a/src/Cargo.lock b/src/Cargo.lock index dce5d4eb512..2b00cde7f19 100644 --- a/src/Cargo.lock +++ b/src/Cargo.lock @@ -15,17 +15,6 @@ dependencies = [ "rand 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "alloc_jemalloc" -version = "0.0.0" -dependencies = [ - "build_helper 0.1.0", - "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)", - "compiler_builtins 0.0.0", - "core 0.0.0", - "libc 0.0.0", -] - [[package]] name = "alloc_system" version = "0.0.0" @@ -2696,7 +2685,6 @@ name = "std" version = "0.0.0" dependencies = [ "alloc 0.0.0", - "alloc_jemalloc 0.0.0", "alloc_system 0.0.0", "build_helper 0.1.0", "cc 1.0.25 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/src/librustc_codegen_llvm/back/write.rs b/src/librustc_codegen_llvm/back/write.rs index 81619c21975..b5ed256cef6 100644 --- a/src/librustc_codegen_llvm/back/write.rs +++ b/src/librustc_codegen_llvm/back/write.rs @@ -1508,6 +1508,7 @@ enum Message { }, CodegenComplete, CodegenItem, + CodegenAborted, } struct Diagnostic { @@ -1788,6 +1789,7 @@ fn start_executing_work(tcx: TyCtxt, let mut needs_lto = Vec::new(); let mut lto_import_only_modules = Vec::new(); let mut started_lto = false; + let mut codegen_aborted = false; // This flag tracks whether all items have gone through codegens let mut codegen_done = false; @@ -1805,13 +1807,19 @@ fn start_executing_work(tcx: TyCtxt, let mut llvm_start_time = None; // Run the message loop while there's still anything that needs message - // processing: + // processing. Note that as soon as codegen is aborted we simply want to + // wait for all existing work to finish, so many of the conditions here + // only apply if codegen hasn't been aborted as they represent pending + // work to be done. while !codegen_done || - work_items.len() > 0 || running > 0 || - needs_lto.len() > 0 || - lto_import_only_modules.len() > 0 || - main_thread_worker_state != MainThreadWorkerState::Idle { + (!codegen_aborted && ( + work_items.len() > 0 || + needs_lto.len() > 0 || + lto_import_only_modules.len() > 0 || + main_thread_worker_state != MainThreadWorkerState::Idle + )) + { // While there are still CGUs to be codegened, the coordinator has // to decide how to utilize the compiler processes implicit Token: @@ -1840,6 +1848,9 @@ fn start_executing_work(tcx: TyCtxt, spawn_work(cgcx, item); } } + } else if codegen_aborted { + // don't queue up any more work if codegen was aborted, we're + // just waiting for our existing children to finish } else { // If we've finished everything related to normal codegen // then it must be the case that we've got some LTO work to do. @@ -1904,7 +1915,7 @@ fn start_executing_work(tcx: TyCtxt, // Spin up what work we can, only doing this while we've got available // parallelism slots and work left to spawn. - while work_items.len() > 0 && running < tokens.len() { + while !codegen_aborted && work_items.len() > 0 && running < tokens.len() { let (item, _) = work_items.pop().unwrap(); maybe_start_llvm_timer(cgcx.config(item.module_kind()), @@ -1969,6 +1980,7 @@ fn start_executing_work(tcx: TyCtxt, if !cgcx.opts.debugging_opts.no_parallel_llvm { helper.request_token(); } + assert!(!codegen_aborted); assert_eq!(main_thread_worker_state, MainThreadWorkerState::Codegenning); main_thread_worker_state = MainThreadWorkerState::Idle; @@ -1976,11 +1988,26 @@ fn start_executing_work(tcx: TyCtxt, Message::CodegenComplete => { codegen_done = true; + assert!(!codegen_aborted); assert_eq!(main_thread_worker_state, MainThreadWorkerState::Codegenning); main_thread_worker_state = MainThreadWorkerState::Idle; } + // If codegen is aborted that means translation was aborted due + // to some normal-ish compiler error. In this situation we want + // to exit as soon as possible, but we want to make sure all + // existing work has finished. Flag codegen as being done, and + // then conditions above will ensure no more work is spawned but + // we'll keep executing this loop until `running` hits 0. + Message::CodegenAborted => { + assert!(!codegen_aborted); + codegen_done = true; + codegen_aborted = true; + assert_eq!(main_thread_worker_state, + MainThreadWorkerState::Codegenning); + } + // If a thread exits successfully then we drop a token associated // with that worker and update our `running` count. We may later // re-acquire a token to continue running more work. We may also not @@ -2446,6 +2473,19 @@ pub fn codegen_finished(&self, tcx: TyCtxt) { drop(self.coordinator_send.send(Box::new(Message::CodegenComplete))); } + /// Consume this context indicating that codegen was entirely aborted, and + /// we need to exit as quickly as possible. + /// + /// This method blocks the current thread until all worker threads have + /// finished, and all worker threads should have exited or be real close to + /// exiting at this point. + pub fn codegen_aborted(self) { + // Signal to the coordinator it should spawn no more work and start + // shutdown. + drop(self.coordinator_send.send(Box::new(Message::CodegenAborted))); + drop(self.future.join()); + } + pub fn check_for_errors(&self, sess: &Session) { self.shared_emitter_main.check(sess, false); } @@ -2464,6 +2504,11 @@ pub fn wait_for_signal_to_codegen_item(&self) { } } +// impl Drop for OngoingCodegen { +// fn drop(&mut self) { +// } +// } + pub(crate) fn submit_codegened_module_to_llvm(tcx: TyCtxt, module: ModuleCodegen, cost: u64) { diff --git a/src/librustc_codegen_llvm/base.rs b/src/librustc_codegen_llvm/base.rs index a4c7a7123b9..a55aafe8b57 100644 --- a/src/librustc_codegen_llvm/base.rs +++ b/src/librustc_codegen_llvm/base.rs @@ -76,12 +76,13 @@ use rustc_data_structures::sync::Lrc; use std::any::Any; +use std::cmp; use std::ffi::CString; -use std::sync::Arc; -use std::time::{Instant, Duration}; use std::i32; -use std::cmp; +use std::ops::{Deref, DerefMut}; +use std::sync::Arc; use std::sync::mpsc; +use std::time::{Instant, Duration}; use syntax_pos::Span; use syntax_pos::symbol::InternedString; use syntax::attr; @@ -820,6 +821,7 @@ pub fn codegen_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, metadata, rx, codegen_units.len()); + let ongoing_codegen = AbortCodegenOnDrop(Some(ongoing_codegen)); // Codegen an allocator shim, if necessary. // @@ -949,7 +951,54 @@ pub fn codegen_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, ongoing_codegen.check_for_errors(tcx.sess); assert_and_save_dep_graph(tcx); - ongoing_codegen + ongoing_codegen.into_inner() +} + +/// A curious wrapper structure whose only purpose is to call `codegen_aborted` +/// when it's dropped abnormally. +/// +/// In the process of working on rust-lang/rust#55238 a mysterious segfault was +/// stumbled upon. The segfault was never reproduced locally, but it was +/// suspected to be releated to the fact that codegen worker threads were +/// sticking around by the time the main thread was exiting, causing issues. +/// +/// This structure is an attempt to fix that issue where the `codegen_aborted` +/// message will block until all workers have finished. This should ensure that +/// even if the main codegen thread panics we'll wait for pending work to +/// complete before returning from the main thread, hopefully avoiding +/// segfaults. +/// +/// If you see this comment in the code, then it means that this workaround +/// worked! We may yet one day track down the mysterious cause of that +/// segfault... +struct AbortCodegenOnDrop(Option); + +impl AbortCodegenOnDrop { + fn into_inner(mut self) -> OngoingCodegen { + self.0.take().unwrap() + } +} + +impl Deref for AbortCodegenOnDrop { + type Target = OngoingCodegen; + + fn deref(&self) -> &OngoingCodegen { + self.0.as_ref().unwrap() + } +} + +impl DerefMut for AbortCodegenOnDrop { + fn deref_mut(&mut self) -> &mut OngoingCodegen { + self.0.as_mut().unwrap() + } +} + +impl Drop for AbortCodegenOnDrop { + fn drop(&mut self) { + if let Some(codegen) = self.0.take() { + codegen.codegen_aborted(); + } + } } fn assert_and_save_dep_graph<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>) { -- 2.44.0