]> git.lizzy.rs Git - rust.git/blobdiff - src/librustc_codegen_ssa/base.rs
Auto merge of #67900 - nikic:prepare-llvm-10, r=nagisa
[rust.git] / src / librustc_codegen_ssa / base.rs
index d2823381d887ead97f75593e6d5d1f3bbf611b72..efd560071202cd7aa1ebbc5cf6d3a32553933091 100644 (file)
@@ -86,7 +86,7 @@ pub fn bin_op_to_icmp_predicate(op: hir::BinOpKind, signed: bool) -> IntPredicat
         }
         op => bug!(
             "comparison_op_to_icmp_predicate: expected comparison operator, \
-                  found {:?}",
+             found {:?}",
             op
         ),
     }
@@ -103,7 +103,7 @@ pub fn bin_op_to_fcmp_predicate(op: hir::BinOpKind) -> RealPredicate {
         op => {
             bug!(
                 "comparison_op_to_fcmp_predicate: expected comparison operator, \
-                  found {:?}",
+                 found {:?}",
                 op
             );
         }
@@ -520,7 +520,7 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
 
         ongoing_codegen.codegen_finished(tcx);
 
-        assert_and_save_dep_graph(tcx);
+        finalize_tcx(tcx);
 
         ongoing_codegen.check_for_errors(tcx.sess);
 
@@ -609,54 +609,75 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
 
     let total_codegen_time = Lock::new(Duration::new(0, 0));
 
-    let cgu_reuse: Vec<_> = tcx.sess.time("find cgu reuse", || {
-        codegen_units.iter().map(|cgu| determine_cgu_reuse(tcx, &cgu)).collect()
-    });
-
-    let mut cgus: FxHashMap<usize, _> = if cfg!(parallel_compiler) {
-        tcx.sess.time("compile first CGUs", || {
-            // Try to find one CGU to compile per thread.
-            let cgus: Vec<_> = cgu_reuse
-                .iter()
-                .enumerate()
-                .filter(|&(_, reuse)| reuse == &CguReuse::No)
-                .take(tcx.sess.threads())
-                .collect();
-
-            // Compile the found CGUs in parallel.
-            par_iter(cgus)
-                .map(|(i, _)| {
-                    let start_time = Instant::now();
-                    let module = backend.compile_codegen_unit(tcx, codegen_units[i].name());
-                    let mut time = total_codegen_time.lock();
-                    *time += start_time.elapsed();
-                    (i, module)
-                })
-                .collect()
-        })
-    } else {
-        FxHashMap::default()
+    // The non-parallel compiler can only translate codegen units to LLVM IR
+    // on a single thread, leading to a staircase effect where the N LLVM
+    // threads have to wait on the single codegen threads to generate work
+    // for them. The parallel compiler does not have this restriction, so
+    // we can pre-load the LLVM queue in parallel before handing off
+    // coordination to the OnGoingCodegen scheduler.
+    //
+    // This likely is a temporary measure. Once we don't have to support the
+    // non-parallel compiler anymore, we can compile CGUs end-to-end in
+    // parallel and get rid of the complicated scheduling logic.
+    let pre_compile_cgus = |cgu_reuse: &[CguReuse]| {
+        if cfg!(parallel_compiler) {
+            tcx.sess.time("compile_first_CGU_batch", || {
+                // Try to find one CGU to compile per thread.
+                let cgus: Vec<_> = cgu_reuse
+                    .iter()
+                    .enumerate()
+                    .filter(|&(_, reuse)| reuse == &CguReuse::No)
+                    .take(tcx.sess.threads())
+                    .collect();
+
+                // Compile the found CGUs in parallel.
+                par_iter(cgus)
+                    .map(|(i, _)| {
+                        let start_time = Instant::now();
+                        let module = backend.compile_codegen_unit(tcx, codegen_units[i].name());
+                        let mut time = total_codegen_time.lock();
+                        *time += start_time.elapsed();
+                        (i, module)
+                    })
+                    .collect()
+            })
+        } else {
+            FxHashMap::default()
+        }
     };
 
-    let mut total_codegen_time = total_codegen_time.into_inner();
+    let mut cgu_reuse = Vec::new();
+    let mut pre_compiled_cgus: Option<FxHashMap<usize, _>> = None;
 
-    for (i, cgu) in codegen_units.into_iter().enumerate() {
+    for (i, cgu) in codegen_units.iter().enumerate() {
         ongoing_codegen.wait_for_signal_to_codegen_item();
         ongoing_codegen.check_for_errors(tcx.sess);
 
+        // Do some setup work in the first iteration
+        if pre_compiled_cgus.is_none() {
+            // Calculate the CGU reuse
+            cgu_reuse = tcx.sess.time("find_cgu_reuse", || {
+                codegen_units.iter().map(|cgu| determine_cgu_reuse(tcx, &cgu)).collect()
+            });
+            // Pre compile some CGUs
+            pre_compiled_cgus = Some(pre_compile_cgus(&cgu_reuse));
+        }
+
         let cgu_reuse = cgu_reuse[i];
         tcx.sess.cgu_reuse_tracker.set_actual_reuse(&cgu.name().as_str(), cgu_reuse);
 
         match cgu_reuse {
             CguReuse::No => {
-                let (module, cost) = if let Some(cgu) = cgus.remove(&i) {
-                    cgu
-                } else {
-                    let start_time = Instant::now();
-                    let module = backend.compile_codegen_unit(tcx, cgu.name());
-                    total_codegen_time += start_time.elapsed();
-                    module
-                };
+                let (module, cost) =
+                    if let Some(cgu) = pre_compiled_cgus.as_mut().unwrap().remove(&i) {
+                        cgu
+                    } else {
+                        let start_time = Instant::now();
+                        let module = backend.compile_codegen_unit(tcx, cgu.name());
+                        let mut time = total_codegen_time.lock();
+                        *time += start_time.elapsed();
+                        module
+                    };
                 submit_codegened_module_to_llvm(
                     &backend,
                     &ongoing_codegen.coordinator_send,
@@ -695,7 +716,11 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
 
     // Since the main thread is sometimes blocked during codegen, we keep track
     // -Ztime-passes output manually.
-    print_time_passes_entry(tcx.sess.time_passes(), "codegen_to_LLVM_IR", total_codegen_time);
+    print_time_passes_entry(
+        tcx.sess.time_passes(),
+        "codegen_to_LLVM_IR",
+        total_codegen_time.into_inner(),
+    );
 
     ::rustc_incremental::assert_module_sources::assert_module_sources(tcx);
 
@@ -703,7 +728,8 @@ pub fn codegen_crate<B: ExtraBackendMethods>(
 
     ongoing_codegen.check_for_errors(tcx.sess);
 
-    assert_and_save_dep_graph(tcx);
+    finalize_tcx(tcx);
+
     ongoing_codegen.into_inner()
 }
 
@@ -754,10 +780,16 @@ fn drop(&mut self) {
     }
 }
 
-fn assert_and_save_dep_graph(tcx: TyCtxt<'_>) {
+fn finalize_tcx(tcx: TyCtxt<'_>) {
     tcx.sess.time("assert_dep_graph", || ::rustc_incremental::assert_dep_graph(tcx));
-
     tcx.sess.time("serialize_dep_graph", || ::rustc_incremental::save_dep_graph(tcx));
+
+    // We assume that no queries are run past here. If there are new queries
+    // after this point, they'll show up as "<unknown>" in self-profiling data.
+    {
+        let _prof_timer = tcx.prof.generic_activity("self_profile_alloc_query_strings");
+        tcx.alloc_self_profile_query_strings();
+    }
 }
 
 impl CrateInfo {
@@ -766,7 +798,6 @@ pub fn new(tcx: TyCtxt<'_>) -> CrateInfo {
             panic_runtime: None,
             compiler_builtins: None,
             profiler_runtime: None,
-            sanitizer_runtime: None,
             is_no_builtins: Default::default(),
             native_libraries: Default::default(),
             used_libraries: tcx.native_libraries(LOCAL_CRATE),
@@ -802,9 +833,6 @@ pub fn new(tcx: TyCtxt<'_>) -> CrateInfo {
             if tcx.is_profiler_runtime(cnum) {
                 info.profiler_runtime = Some(cnum);
             }
-            if tcx.is_sanitizer_runtime(cnum) {
-                info.sanitizer_runtime = Some(cnum);
-            }
             if tcx.is_no_builtins(cnum) {
                 info.is_no_builtins.insert(cnum);
             }