rustc: Prepare to enable ThinLTO by default

author Alex Crichton <alex@alexcrichton.com>

Sat, 25 Nov 2017 19:13:58 +0000 (11:13 -0800)

committer Alex Crichton <alex@alexcrichton.com>

Thu, 30 Nov 2017 15:17:53 +0000 (07:17 -0800)
author Alex Crichton <alex@alexcrichton.com>
Sat, 25 Nov 2017 19:13:58 +0000 (11:13 -0800)
committer Alex Crichton <alex@alexcrichton.com>
Thu, 30 Nov 2017 15:17:53 +0000 (07:17 -0800)
diff --git a/src/librustc/session/config.rs b/src/librustc/session/config.rs

index 630832372704ac0699d941edcc4789feba3913ab..0efd24ae132f0626ca68b74ade7688b4df2345db 100644 (file)
--- a/src/librustc/session/config.rs
+++ b/src/librustc/session/config.rs
@@ -383,8 +383,13 @@ pub struct Options {
          // try to not rely on this too much.
          actually_rustdoc: bool [TRACKED],
  
-        // Number of object files/codegen units to produce on the backend
+        // Specifications of codegen units / ThinLTO which are forced as a
+        // result of parsing command line options. These are not necessarily
+        // what rustc was invoked with, but massaged a bit to agree with
+        // commands like `--emit llvm-ir` which they're often incompatible with
+        // if we otherwise use the defaults of rustc.
          cli_forced_codegen_units: Option<usize> [UNTRACKED],
+        cli_forced_thinlto: Option<bool> [UNTRACKED],
      }
  );
  
@@ -566,6 +571,7 @@ pub fn basic_options() -> Options {
          debug_assertions: true,
          actually_rustdoc: false,
          cli_forced_codegen_units: None,
+        cli_forced_thinlto: None,
      }
  }
  
@@ -1165,7 +1171,7 @@ fn parse_optimization_fuel(slot: &mut Option<(String, u64)>, v: Option<&str>) ->
                   "run the non-lexical lifetimes MIR pass"),
      trans_time_graph: bool = (false, parse_bool, [UNTRACKED],
          "generate a graphical HTML report of time spent in trans and LLVM"),
-    thinlto: bool = (false, parse_bool, [TRACKED],
+    thinlto: Option<bool> = (None, parse_opt_bool, [TRACKED],
          "enable ThinLTO when possible"),
      inline_in_all_cgus: Option<bool> = (None, parse_opt_bool, [TRACKED],
          "control whether #[inline] functions are in all cgus"),
@@ -1601,6 +1607,7 @@ pub fn build_session_options_and_crate_config(matches: &getopts::Matches)
  
      let mut cg = build_codegen_options(matches, error_format);
      let mut codegen_units = cg.codegen_units;
+    let mut thinlto = None;
  
      // Issue #30063: if user requests llvm-related output to one
      // particular path, disable codegen-units.
@@ -1622,9 +1629,13 @@ pub fn build_session_options_and_crate_config(matches: &getopts::Matches)
                      }
                      early_warn(error_format, "resetting to default -C codegen-units=1");
                      codegen_units = Some(1);
+                    thinlto = Some(false);
                  }
              }
-            _ => codegen_units = Some(1),
+            _ => {
+                codegen_units = Some(1);
+                thinlto = Some(false);
+            }
          }
      }
  
@@ -1834,6 +1845,7 @@ pub fn build_session_options_and_crate_config(matches: &getopts::Matches)
          debug_assertions,
          actually_rustdoc: false,
          cli_forced_codegen_units: codegen_units,
+        cli_forced_thinlto: thinlto,
      },
      cfg)
  }
diff --git a/src/librustc/session/mod.rs b/src/librustc/session/mod.rs

index 227efcf4d6e2198b258dc99082b8f21e2cf3b51b..df5805bacd41ae8a1248c59a0195c4a60a1fbb55 100644 (file)
--- a/src/librustc/session/mod.rs
+++ b/src/librustc/session/mod.rs
@@ -656,30 +656,91 @@ pub fn codegen_units(&self) -> usize {
              return n as usize
          }
  
+        // Why is 16 codegen units the default all the time?
+        //
+        // The main reason for enabling multiple codegen units by default is to
+        // leverage the ability for the trans backend to do translation and
+        // codegen in parallel. This allows us, especially for large crates, to
+        // make good use of all available resources on the machine once we've
+        // hit that stage of compilation. Large crates especially then often
+        // take a long time in trans/codegen and this helps us amortize that
+        // cost.
+        //
+        // Note that a high number here doesn't mean that we'll be spawning a
+        // large number of threads in parallel. The backend of rustc contains
+        // global rate limiting through the `jobserver` crate so we'll never
+        // overload the system with too much work, but rather we'll only be
+        // optimizing when we're otherwise cooperating with other instances of
+        // rustc.
+        //
+        // Rather a high number here means that we should be able to keep a lot
+        // of idle cpus busy. By ensuring that no codegen unit takes *too* long
+        // to build we'll be guaranteed that all cpus will finish pretty closely
+        // to one another and we should make relatively optimal use of system
+        // resources
+        //
+        // Note that the main cost of codegen units is that it prevents LLVM
+        // from inlining across codegen units. Users in general don't have a lot
+        // of control over how codegen units are split up so it's our job in the
+        // compiler to ensure that undue performance isn't lost when using
+        // codegen units (aka we can't require everyone to slap `#[inline]` on
+        // everything).
+        //
+        // If we're compiling at `-O0` then the number doesn't really matter too
+        // much because performance doesn't matter and inlining is ok to lose.
+        // In debug mode we just want to try to guarantee that no cpu is stuck
+        // doing work that could otherwise be farmed to others.
+        //
+        // In release mode, however (O1 and above) performance does indeed
+        // matter! To recover the loss in performance due to inlining we'll be
+        // enabling ThinLTO by default (the function for which is just below).
+        // This will ensure that we recover any inlining wins we otherwise lost
+        // through codegen unit partitioning.
+        //
+        // ---
+        //
+        // Ok that's a lot of words but the basic tl;dr; is that we want a high
+        // number here -- but not too high. Additionally we're "safe" to have it
+        // always at the same number at all optimization levels.
+        //
+        // As a result 16 was chosen here! Mostly because it was a power of 2
+        // and most benchmarks agreed it was roughly a local optimum. Not very
+        // scientific.
          match self.opts.optimize {
-            // If we're compiling at `-O0` then default to 16 codegen units.
-            // The number here shouldn't matter too too much as debug mode
-            // builds don't rely on performance at all, meaning that lost
-            // opportunities for inlining through multiple codegen units is
-            // a non-issue.
-            //
-            // Note that the high number here doesn't mean that we'll be
-            // spawning a large number of threads in parallel. The backend
-            // of rustc contains global rate limiting through the
-            // `jobserver` crate so we'll never overload the system with too
-            // much work, but rather we'll only be optimizing when we're
-            // otherwise cooperating with other instances of rustc.
-            //
-            // Rather the high number here means that we should be able to
-            // keep a lot of idle cpus busy. By ensuring that no codegen
-            // unit takes *too* long to build we'll be guaranteed that all
-            // cpus will finish pretty closely to one another and we should
-            // make relatively optimal use of system resources
              config::OptLevel::No => 16,
+            _ => 1, // FIXME(#46346) this should be 16
+        }
+    }
  
-            // All other optimization levels default use one codegen unit,
-            // the historical default in Rust for a Long Time.
-            _ => 1,
+    /// Returns whether ThinLTO is enabled for this compilation
+    pub fn thinlto(&self) -> bool {
+        // If processing command line options determined that we're incompatible
+        // with ThinLTO (e.g. `-C lto --emit llvm-ir`) then return that option.
+        if let Some(enabled) = self.opts.cli_forced_thinlto {
+            return enabled
+        }
+
+        // If explicitly specified, use that with the next highest priority
+        if let Some(enabled) = self.opts.debugging_opts.thinlto {
+            return enabled
+        }
+
+        // If there's only one codegen unit and LTO isn't enabled then there's
+        // no need for ThinLTO so just return false.
+        if self.codegen_units() == 1 && !self.lto() {
+            return false
+        }
+
+        // Right now ThinLTO isn't compatible with incremental compilation.
+        if self.opts.incremental.is_some() {
+            return false
+        }
+
+        // Now we're in "defaults" territory. By default we enable ThinLTO for
+        // optimized compiles (anything greater than O0).
+        match self.opts.optimize {
+            config::OptLevel::No => false,
+            _ => true,
          }
      }
  }
diff --git a/src/librustc_trans/back/write.rs b/src/librustc_trans/back/write.rs

index da67940abcb776973862a2c1a71896be0c68b5cc..cb883e0349f31cb16c8038f48d906c1d9f775e7e 100644 (file)
--- a/src/librustc_trans/back/write.rs
+++ b/src/librustc_trans/back/write.rs
@@ -1402,8 +1402,9 @@ fn start_executing_work(tcx: TyCtxt,
          // for doesn't require full LTO. Some targets require one LLVM module
          // (they effectively don't have a linker) so it's up to us to use LTO to
          // link everything together.
-        thinlto: sess.opts.debugging_opts.thinlto &&
-            !sess.target.target.options.requires_lto,
+        thinlto: sess.thinlto() &&
+            !sess.target.target.options.requires_lto &&
+            unsafe { llvm::LLVMRustThinLTOAvailable() },
  
          no_landing_pads: sess.no_landing_pads(),
          save_temps: sess.opts.cg.save_temps,
diff --git a/src/librustc_trans/base.rs b/src/librustc_trans/base.rs

index b7408681ed0c8a214e24237f94953dc28a4cd499..03c1e4368c9a37a72ac430876098e17516efddbc 100644 (file)
--- a/src/librustc_trans/base.rs
+++ b/src/librustc_trans/base.rs
@@ -706,7 +706,7 @@ pub fn trans_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
  
      check_for_rustc_errors_attr(tcx);
  
-    if tcx.sess.opts.debugging_opts.thinlto {
+    if let Some(true) = tcx.sess.opts.debugging_opts.thinlto {
          if unsafe { !llvm::LLVMRustThinLTOAvailable() } {
              tcx.sess.fatal("this compiler's LLVM does not support ThinLTO");
          }
diff --git a/src/libstd/sys_common/backtrace.rs b/src/libstd/sys_common/backtrace.rs

index 9f0214f5f0510c8cc44280305c06755166cac6a0..b5cf6d7d34fcc61bc78ec3ef3f842ce4c73aed91 100644 (file)
--- a/src/libstd/sys_common/backtrace.rs
+++ b/src/libstd/sys_common/backtrace.rs
@@ -252,8 +252,26 @@ fn output_fileline(w: &mut Write,
  // Note that this demangler isn't quite as fancy as it could be. We have lots
  // of other information in our symbols like hashes, version, type information,
  // etc. Additionally, this doesn't handle glue symbols at all.
-pub fn demangle(writer: &mut Write, s: &str, format: PrintFormat) -> io::Result<()> {
-    // First validate the symbol. If it doesn't look like anything we're
+pub fn demangle(writer: &mut Write, mut s: &str, format: PrintFormat) -> io::Result<()> {
+    // During ThinLTO LLVM may import and rename internal symbols, so strip out
+    // those endings first as they're one of the last manglings applied to
+    // symbol names.
+    let llvm = ".llvm.";
+    if let Some(i) = s.find(llvm) {
+        let candidate = &s[i + llvm.len()..];
+        let all_hex = candidate.chars().all(|c| {
+            match c {
+                'A' ... 'F' | '0' ... '9' => true,
+                _ => false,
+            }
+        });
+
+        if all_hex {
+            s = &s[..i];
+        }
+    }
+
+    // Validate the symbol. If it doesn't look like anything we're
      // expecting, we just print it literally. Note that we must handle non-rust
      // symbols because we could have any function in the backtrace.
      let mut valid = true;
diff --git a/src/test/run-fail/mir_trans_no_landing_pads.rs b/src/test/run-fail/mir_trans_no_landing_pads.rs

index dacb039d89dc5c0e6694b84a65899ca152ca08a4..bafb78fc213e39192585d2b91f0cd3c4343928a5 100644 (file)
--- a/src/test/run-fail/mir_trans_no_landing_pads.rs
+++ b/src/test/run-fail/mir_trans_no_landing_pads.rs
@@ -8,7 +8,7 @@
  // option. This file may not be copied, modified, or distributed
  // except according to those terms.
  
-// compile-flags: -Z no-landing-pads
+// compile-flags: -Z no-landing-pads -C codegen-units=1
  // error-pattern:converging_fn called
  use std::io::{self, Write};
  
diff --git a/src/test/run-fail/mir_trans_no_landing_pads_diverging.rs b/src/test/run-fail/mir_trans_no_landing_pads_diverging.rs

index 87037c1efed9e02e3db69003f8e6f72f3189fa13..998ee7470bbe9d53ed0ab5a1075d0ae9e7c629d4 100644 (file)
--- a/src/test/run-fail/mir_trans_no_landing_pads_diverging.rs
+++ b/src/test/run-fail/mir_trans_no_landing_pads_diverging.rs
@@ -8,7 +8,7 @@
  // option. This file may not be copied, modified, or distributed
  // except according to those terms.
  
-// compile-flags: -Z no-landing-pads
+// compile-flags: -Z no-landing-pads -C codegen-units=1
  // error-pattern:diverging_fn called
  use std::io::{self, Write};
  
diff --git a/src/test/run-pass/no-landing-pads.rs b/src/test/run-pass/no-landing-pads.rs

index e718046ebbcd7b53155214ed0996ec3f271ddb0c..73f123045d2494181ae515026a36d53c9fad4185 100644 (file)
--- a/src/test/run-pass/no-landing-pads.rs
+++ b/src/test/run-pass/no-landing-pads.rs
@@ -8,7 +8,7 @@
  // option. This file may not be copied, modified, or distributed
  // except according to those terms.
  
-// compile-flags: -Z no-landing-pads
+// compile-flags: -Z no-landing-pads -C codegen-units=1
  // ignore-emscripten no threads support
  
  use std::thread;
author	Alex Crichton <alex@alexcrichton.com>
	Sat, 25 Nov 2017 19:13:58 +0000 (11:13 -0800)
committer	Alex Crichton <alex@alexcrichton.com>
	Thu, 30 Nov 2017 15:17:53 +0000 (07:17 -0800)
src/librustc/session/config.rs		patch \| blob \| history
src/librustc/session/mod.rs		patch \| blob \| history
src/librustc_trans/back/write.rs		patch \| blob \| history
src/librustc_trans/base.rs		patch \| blob \| history
src/libstd/sys_common/backtrace.rs		patch \| blob \| history
src/test/run-fail/mir_trans_no_landing_pads.rs		patch \| blob \| history
src/test/run-fail/mir_trans_no_landing_pads_diverging.rs		patch \| blob \| history
src/test/run-pass/no-landing-pads.rs		patch \| blob \| history