// try to not rely on this too much.
actually_rustdoc: bool [TRACKED],
- // Number of object files/codegen units to produce on the backend
+ // Specifications of codegen units / ThinLTO which are forced as a
+ // result of parsing command line options. These are not necessarily
+ // what rustc was invoked with, but massaged a bit to agree with
+ // commands like `--emit llvm-ir` which they're often incompatible with
+ // if we otherwise use the defaults of rustc.
cli_forced_codegen_units: Option<usize> [UNTRACKED],
+ cli_forced_thinlto: Option<bool> [UNTRACKED],
}
);
debug_assertions: true,
actually_rustdoc: false,
cli_forced_codegen_units: None,
+ cli_forced_thinlto: None,
}
}
"run the non-lexical lifetimes MIR pass"),
trans_time_graph: bool = (false, parse_bool, [UNTRACKED],
"generate a graphical HTML report of time spent in trans and LLVM"),
- thinlto: bool = (false, parse_bool, [TRACKED],
+ thinlto: Option<bool> = (None, parse_opt_bool, [TRACKED],
"enable ThinLTO when possible"),
inline_in_all_cgus: Option<bool> = (None, parse_opt_bool, [TRACKED],
"control whether #[inline] functions are in all cgus"),
let mut cg = build_codegen_options(matches, error_format);
let mut codegen_units = cg.codegen_units;
+ let mut thinlto = None;
// Issue #30063: if user requests llvm-related output to one
// particular path, disable codegen-units.
}
early_warn(error_format, "resetting to default -C codegen-units=1");
codegen_units = Some(1);
+ thinlto = Some(false);
}
}
- _ => codegen_units = Some(1),
+ _ => {
+ codegen_units = Some(1);
+ thinlto = Some(false);
+ }
}
}
debug_assertions,
actually_rustdoc: false,
cli_forced_codegen_units: codegen_units,
+ cli_forced_thinlto: thinlto,
},
cfg)
}
return n as usize
}
+ // Why is 16 codegen units the default all the time?
+ //
+ // The main reason for enabling multiple codegen units by default is to
+ // leverage the ability for the trans backend to do translation and
+ // codegen in parallel. This allows us, especially for large crates, to
+ // make good use of all available resources on the machine once we've
+ // hit that stage of compilation. Large crates especially then often
+ // take a long time in trans/codegen and this helps us amortize that
+ // cost.
+ //
+ // Note that a high number here doesn't mean that we'll be spawning a
+ // large number of threads in parallel. The backend of rustc contains
+ // global rate limiting through the `jobserver` crate so we'll never
+ // overload the system with too much work, but rather we'll only be
+ // optimizing when we're otherwise cooperating with other instances of
+ // rustc.
+ //
+ // Rather a high number here means that we should be able to keep a lot
+ // of idle cpus busy. By ensuring that no codegen unit takes *too* long
+ // to build we'll be guaranteed that all cpus will finish pretty closely
+ // to one another and we should make relatively optimal use of system
+ // resources
+ //
+ // Note that the main cost of codegen units is that it prevents LLVM
+ // from inlining across codegen units. Users in general don't have a lot
+ // of control over how codegen units are split up so it's our job in the
+ // compiler to ensure that undue performance isn't lost when using
+ // codegen units (aka we can't require everyone to slap `#[inline]` on
+ // everything).
+ //
+ // If we're compiling at `-O0` then the number doesn't really matter too
+ // much because performance doesn't matter and inlining is ok to lose.
+ // In debug mode we just want to try to guarantee that no cpu is stuck
+ // doing work that could otherwise be farmed to others.
+ //
+ // In release mode, however (O1 and above) performance does indeed
+ // matter! To recover the loss in performance due to inlining we'll be
+ // enabling ThinLTO by default (the function for which is just below).
+ // This will ensure that we recover any inlining wins we otherwise lost
+ // through codegen unit partitioning.
+ //
+ // ---
+ //
+ // Ok that's a lot of words but the basic tl;dr; is that we want a high
+ // number here -- but not too high. Additionally we're "safe" to have it
+ // always at the same number at all optimization levels.
+ //
+ // As a result 16 was chosen here! Mostly because it was a power of 2
+ // and most benchmarks agreed it was roughly a local optimum. Not very
+ // scientific.
match self.opts.optimize {
- // If we're compiling at `-O0` then default to 16 codegen units.
- // The number here shouldn't matter too too much as debug mode
- // builds don't rely on performance at all, meaning that lost
- // opportunities for inlining through multiple codegen units is
- // a non-issue.
- //
- // Note that the high number here doesn't mean that we'll be
- // spawning a large number of threads in parallel. The backend
- // of rustc contains global rate limiting through the
- // `jobserver` crate so we'll never overload the system with too
- // much work, but rather we'll only be optimizing when we're
- // otherwise cooperating with other instances of rustc.
- //
- // Rather the high number here means that we should be able to
- // keep a lot of idle cpus busy. By ensuring that no codegen
- // unit takes *too* long to build we'll be guaranteed that all
- // cpus will finish pretty closely to one another and we should
- // make relatively optimal use of system resources
config::OptLevel::No => 16,
+ _ => 1, // FIXME(#46346) this should be 16
+ }
+ }
- // All other optimization levels default use one codegen unit,
- // the historical default in Rust for a Long Time.
- _ => 1,
+ /// Returns whether ThinLTO is enabled for this compilation
+ pub fn thinlto(&self) -> bool {
+ // If processing command line options determined that we're incompatible
+ // with ThinLTO (e.g. `-C lto --emit llvm-ir`) then return that option.
+ if let Some(enabled) = self.opts.cli_forced_thinlto {
+ return enabled
+ }
+
+ // If explicitly specified, use that with the next highest priority
+ if let Some(enabled) = self.opts.debugging_opts.thinlto {
+ return enabled
+ }
+
+ // If there's only one codegen unit and LTO isn't enabled then there's
+ // no need for ThinLTO so just return false.
+ if self.codegen_units() == 1 && !self.lto() {
+ return false
+ }
+
+ // Right now ThinLTO isn't compatible with incremental compilation.
+ if self.opts.incremental.is_some() {
+ return false
+ }
+
+ // Now we're in "defaults" territory. By default we enable ThinLTO for
+ // optimized compiles (anything greater than O0).
+ match self.opts.optimize {
+ config::OptLevel::No => false,
+ _ => true,
}
}
}
// for doesn't require full LTO. Some targets require one LLVM module
// (they effectively don't have a linker) so it's up to us to use LTO to
// link everything together.
- thinlto: sess.opts.debugging_opts.thinlto &&
- !sess.target.target.options.requires_lto,
+ thinlto: sess.thinlto() &&
+ !sess.target.target.options.requires_lto &&
+ unsafe { llvm::LLVMRustThinLTOAvailable() },
no_landing_pads: sess.no_landing_pads(),
save_temps: sess.opts.cg.save_temps,
check_for_rustc_errors_attr(tcx);
- if tcx.sess.opts.debugging_opts.thinlto {
+ if let Some(true) = tcx.sess.opts.debugging_opts.thinlto {
if unsafe { !llvm::LLVMRustThinLTOAvailable() } {
tcx.sess.fatal("this compiler's LLVM does not support ThinLTO");
}
// Note that this demangler isn't quite as fancy as it could be. We have lots
// of other information in our symbols like hashes, version, type information,
// etc. Additionally, this doesn't handle glue symbols at all.
-pub fn demangle(writer: &mut Write, s: &str, format: PrintFormat) -> io::Result<()> {
- // First validate the symbol. If it doesn't look like anything we're
+pub fn demangle(writer: &mut Write, mut s: &str, format: PrintFormat) -> io::Result<()> {
+ // During ThinLTO LLVM may import and rename internal symbols, so strip out
+ // those endings first as they're one of the last manglings applied to
+ // symbol names.
+ let llvm = ".llvm.";
+ if let Some(i) = s.find(llvm) {
+ let candidate = &s[i + llvm.len()..];
+ let all_hex = candidate.chars().all(|c| {
+ match c {
+ 'A' ... 'F' | '0' ... '9' => true,
+ _ => false,
+ }
+ });
+
+ if all_hex {
+ s = &s[..i];
+ }
+ }
+
+ // Validate the symbol. If it doesn't look like anything we're
// expecting, we just print it literally. Note that we must handle non-rust
// symbols because we could have any function in the backtrace.
let mut valid = true;
// option. This file may not be copied, modified, or distributed
// except according to those terms.
-// compile-flags: -Z no-landing-pads
+// compile-flags: -Z no-landing-pads -C codegen-units=1
// error-pattern:converging_fn called
use std::io::{self, Write};
// option. This file may not be copied, modified, or distributed
// except according to those terms.
-// compile-flags: -Z no-landing-pads
+// compile-flags: -Z no-landing-pads -C codegen-units=1
// error-pattern:diverging_fn called
use std::io::{self, Write};
// option. This file may not be copied, modified, or distributed
// except according to those terms.
-// compile-flags: -Z no-landing-pads
+// compile-flags: -Z no-landing-pads -C codegen-units=1
// ignore-emscripten no threads support
use std::thread;