1 // Copyright 2013 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 use back::bytecode::{DecodedBytecode, RLIB_BYTECODE_EXTENSION};
12 use back::symbol_export;
13 use back::write::{ModuleConfig, with_llvm_pmb, CodegenContext};
15 use errors::{FatalError, Handler};
16 use llvm::archive_ro::ArchiveRO;
17 use llvm::{ModuleRef, TargetMachineRef, True, False};
19 use rustc::hir::def_id::LOCAL_CRATE;
20 use rustc::middle::exported_symbols::SymbolExportLevel;
21 use rustc::session::config::{self, Lto};
22 use rustc::util::common::time_ext;
23 use time_graph::Timeline;
24 use {ModuleCodegen, ModuleLlvm, ModuleKind, ModuleSource};
28 use std::ffi::CString;
33 pub fn crate_type_allows_lto(crate_type: config::CrateType) -> bool {
35 config::CrateTypeExecutable |
36 config::CrateTypeStaticlib |
37 config::CrateTypeCdylib => true,
39 config::CrateTypeDylib |
40 config::CrateTypeRlib |
41 config::CrateTypeProcMacro => false,
45 pub(crate) enum LtoModuleCodegen {
47 module: Option<ModuleCodegen>,
48 _serialized_bitcode: Vec<SerializedModule>,
54 impl LtoModuleCodegen {
55 pub fn name(&self) -> &str {
57 LtoModuleCodegen::Fat { .. } => "everything",
58 LtoModuleCodegen::Thin(ref m) => m.name(),
62 /// Optimize this module within the given codegen context.
64 /// This function is unsafe as it'll return a `ModuleCodegen` still
65 /// points to LLVM data structures owned by this `LtoModuleCodegen`.
66 /// It's intended that the module returned is immediately code generated and
67 /// dropped, and then this LTO module is dropped.
68 pub(crate) unsafe fn optimize(&mut self,
69 cgcx: &CodegenContext,
70 timeline: &mut Timeline)
71 -> Result<ModuleCodegen, FatalError>
74 LtoModuleCodegen::Fat { ref mut module, .. } => {
75 let module = module.take().unwrap();
76 let config = cgcx.config(module.kind);
77 let llmod = module.llvm().unwrap().llmod;
78 let tm = module.llvm().unwrap().tm;
79 run_pass_manager(cgcx, tm, llmod, config, false);
80 timeline.record("fat-done");
83 LtoModuleCodegen::Thin(ref mut thin) => thin.optimize(cgcx, timeline),
87 /// A "gauge" of how costly it is to optimize this module, used to sort
88 /// biggest modules first.
89 pub fn cost(&self) -> u64 {
91 // Only one module with fat LTO, so the cost doesn't matter.
92 LtoModuleCodegen::Fat { .. } => 0,
93 LtoModuleCodegen::Thin(ref m) => m.cost(),
98 pub(crate) fn run(cgcx: &CodegenContext,
99 modules: Vec<ModuleCodegen>,
100 timeline: &mut Timeline)
101 -> Result<Vec<LtoModuleCodegen>, FatalError>
103 let diag_handler = cgcx.create_diag_handler();
104 let export_threshold = match cgcx.lto {
105 // We're just doing LTO for our one crate
106 Lto::ThinLocal => SymbolExportLevel::Rust,
108 // We're doing LTO for the entire crate graph
109 Lto::Yes | Lto::Fat | Lto::Thin => {
110 symbol_export::crates_export_threshold(&cgcx.crate_types)
113 Lto::No => panic!("didn't request LTO but we're doing LTO"),
116 let symbol_filter = &|&(ref name, level): &(String, SymbolExportLevel)| {
117 if level.is_below_threshold(export_threshold) {
118 let mut bytes = Vec::with_capacity(name.len() + 1);
119 bytes.extend(name.bytes());
120 Some(CString::new(bytes).unwrap())
125 let exported_symbols = cgcx.exported_symbols
126 .as_ref().expect("needs exported symbols for LTO");
127 let mut symbol_white_list = exported_symbols[&LOCAL_CRATE]
129 .filter_map(symbol_filter)
130 .collect::<Vec<CString>>();
131 timeline.record("whitelist");
132 info!("{} symbols to preserve in this crate", symbol_white_list.len());
134 // If we're performing LTO for the entire crate graph, then for each of our
135 // upstream dependencies, find the corresponding rlib and load the bitcode
138 // We save off all the bytecode and LLVM module ids for later processing
139 // with either fat or thin LTO
140 let mut upstream_modules = Vec::new();
141 if cgcx.lto != Lto::ThinLocal {
142 if cgcx.opts.cg.prefer_dynamic {
143 diag_handler.struct_err("cannot prefer dynamic linking when performing LTO")
144 .note("only 'staticlib', 'bin', and 'cdylib' outputs are \
147 return Err(FatalError)
150 // Make sure we actually can run LTO
151 for crate_type in cgcx.crate_types.iter() {
152 if !crate_type_allows_lto(*crate_type) {
153 let e = diag_handler.fatal("lto can only be run for executables, cdylibs and \
154 static library outputs");
159 for &(cnum, ref path) in cgcx.each_linked_rlib_for_lto.iter() {
160 let exported_symbols = cgcx.exported_symbols
161 .as_ref().expect("needs exported symbols for LTO");
162 symbol_white_list.extend(
163 exported_symbols[&cnum]
165 .filter_map(symbol_filter));
167 let archive = ArchiveRO::open(&path).expect("wanted an rlib");
168 let bytecodes = archive.iter().filter_map(|child| {
169 child.ok().and_then(|c| c.name().map(|name| (name, c)))
170 }).filter(|&(name, _)| name.ends_with(RLIB_BYTECODE_EXTENSION));
171 for (name, data) in bytecodes {
172 info!("adding bytecode {}", name);
173 let bc_encoded = data.data();
175 let (bc, id) = time_ext(cgcx.time_passes, None, &format!("decode {}", name), || {
176 match DecodedBytecode::new(bc_encoded) {
177 Ok(b) => Ok((b.bytecode(), b.identifier().to_string())),
178 Err(e) => Err(diag_handler.fatal(&e)),
181 let bc = SerializedModule::FromRlib(bc);
182 upstream_modules.push((bc, CString::new(id).unwrap()));
184 timeline.record(&format!("load: {}", path.display()));
188 let arr = symbol_white_list.iter().map(|c| c.as_ptr()).collect::<Vec<_>>();
190 Lto::Yes | // `-C lto` == fat LTO by default
192 fat_lto(cgcx, &diag_handler, modules, upstream_modules, &arr, timeline)
196 thin_lto(&diag_handler, modules, upstream_modules, &arr, timeline)
198 Lto::No => unreachable!(),
202 fn fat_lto(cgcx: &CodegenContext,
203 diag_handler: &Handler,
204 mut modules: Vec<ModuleCodegen>,
205 mut serialized_modules: Vec<(SerializedModule, CString)>,
206 symbol_white_list: &[*const libc::c_char],
207 timeline: &mut Timeline)
208 -> Result<Vec<LtoModuleCodegen>, FatalError>
210 info!("going for a fat lto");
212 // Find the "costliest" module and merge everything into that codegen unit.
213 // All the other modules will be serialized and reparsed into the new
214 // context, so this hopefully avoids serializing and parsing the largest
217 // Additionally use a regular module as the base here to ensure that various
218 // file copy operations in the backend work correctly. The only other kind
219 // of module here should be an allocator one, and if your crate is smaller
220 // than the allocator module then the size doesn't really matter anyway.
221 let (_, costliest_module) = modules.iter()
223 .filter(|&(_, module)| module.kind == ModuleKind::Regular)
226 llvm::LLVMRustModuleCost(module.llvm().unwrap().llmod)
231 .expect("must be codegen'ing at least one module");
232 let module = modules.remove(costliest_module);
233 let llmod = module.llvm().expect("can't lto pre-codegened modules").llmod;
234 info!("using {:?} as a base module", module.llmod_id);
236 // For all other modules we codegened we'll need to link them into our own
237 // bitcode. All modules were codegened in their own LLVM context, however,
238 // and we want to move everything to the same LLVM context. Currently the
239 // way we know of to do that is to serialize them to a string and them parse
240 // them later. Not great but hey, that's why it's "fat" LTO, right?
241 for module in modules {
242 let llvm = module.llvm().expect("can't lto pre-codegened modules");
243 let buffer = ModuleBuffer::new(llvm.llmod);
244 let llmod_id = CString::new(&module.llmod_id[..]).unwrap();
245 serialized_modules.push((SerializedModule::Local(buffer), llmod_id));
248 // For all serialized bitcode files we parse them and link them in as we did
249 // above, this is all mostly handled in C++. Like above, though, we don't
250 // know much about the memory management here so we err on the side of being
251 // save and persist everything with the original module.
252 let mut serialized_bitcode = Vec::new();
253 let mut linker = Linker::new(llmod);
254 for (bc_decoded, name) in serialized_modules {
255 info!("linking {:?}", name);
256 time_ext(cgcx.time_passes, None, &format!("ll link {:?}", name), || {
257 let data = bc_decoded.data();
258 linker.add(&data).map_err(|()| {
259 let msg = format!("failed to load bc of {:?}", name);
260 write::llvm_err(&diag_handler, msg)
263 timeline.record(&format!("link {:?}", name));
264 serialized_bitcode.push(bc_decoded);
267 cgcx.save_temp_bitcode(&module, "lto.input");
269 // Internalize everything that *isn't* in our whitelist to help strip out
270 // more modules and such
272 let ptr = symbol_white_list.as_ptr();
273 llvm::LLVMRustRunRestrictionPass(llmod,
274 ptr as *const *const libc::c_char,
275 symbol_white_list.len() as libc::size_t);
276 cgcx.save_temp_bitcode(&module, "lto.after-restriction");
279 if cgcx.no_landing_pads {
281 llvm::LLVMRustMarkAllFunctionsNounwind(llmod);
283 cgcx.save_temp_bitcode(&module, "lto.after-nounwind");
285 timeline.record("passes");
287 Ok(vec![LtoModuleCodegen::Fat {
288 module: Some(module),
289 _serialized_bitcode: serialized_bitcode,
293 struct Linker(llvm::LinkerRef);
296 fn new(llmod: ModuleRef) -> Linker {
297 unsafe { Linker(llvm::LLVMRustLinkerNew(llmod)) }
300 fn add(&mut self, bytecode: &[u8]) -> Result<(), ()> {
302 if llvm::LLVMRustLinkerAdd(self.0,
303 bytecode.as_ptr() as *const libc::c_char,
313 impl Drop for Linker {
315 unsafe { llvm::LLVMRustLinkerFree(self.0); }
319 /// Prepare "thin" LTO to get run on these modules.
321 /// The general structure of ThinLTO is quite different from the structure of
322 /// "fat" LTO above. With "fat" LTO all LLVM modules in question are merged into
323 /// one giant LLVM module, and then we run more optimization passes over this
324 /// big module after internalizing most symbols. Thin LTO, on the other hand,
325 /// avoid this large bottleneck through more targeted optimization.
327 /// At a high level Thin LTO looks like:
329 /// 1. Prepare a "summary" of each LLVM module in question which describes
330 /// the values inside, cost of the values, etc.
331 /// 2. Merge the summaries of all modules in question into one "index"
332 /// 3. Perform some global analysis on this index
333 /// 4. For each module, use the index and analysis calculated previously to
334 /// perform local transformations on the module, for example inlining
335 /// small functions from other modules.
336 /// 5. Run thin-specific optimization passes over each module, and then code
337 /// generate everything at the end.
339 /// The summary for each module is intended to be quite cheap, and the global
340 /// index is relatively quite cheap to create as well. As a result, the goal of
341 /// ThinLTO is to reduce the bottleneck on LTO and enable LTO to be used in more
342 /// situations. For example one cheap optimization is that we can parallelize
343 /// all codegen modules, easily making use of all the cores on a machine.
345 /// With all that in mind, the function here is designed at specifically just
346 /// calculating the *index* for ThinLTO. This index will then be shared amongst
347 /// all of the `LtoModuleCodegen` units returned below and destroyed once
348 /// they all go out of scope.
349 fn thin_lto(diag_handler: &Handler,
350 modules: Vec<ModuleCodegen>,
351 serialized_modules: Vec<(SerializedModule, CString)>,
352 symbol_white_list: &[*const libc::c_char],
353 timeline: &mut Timeline)
354 -> Result<Vec<LtoModuleCodegen>, FatalError>
357 info!("going for that thin, thin LTO");
359 let mut thin_buffers = Vec::new();
360 let mut module_names = Vec::new();
361 let mut thin_modules = Vec::new();
363 // FIXME: right now, like with fat LTO, we serialize all in-memory
364 // modules before working with them and ThinLTO. We really
365 // shouldn't do this, however, and instead figure out how to
366 // extract a summary from an in-memory module and then merge that
367 // into the global index. It turns out that this loop is by far
368 // the most expensive portion of this small bit of global
370 for (i, module) in modules.iter().enumerate() {
371 info!("local module: {} - {}", i, module.llmod_id);
372 let llvm = module.llvm().expect("can't lto precodegened module");
373 let name = CString::new(module.llmod_id.clone()).unwrap();
374 let buffer = ThinBuffer::new(llvm.llmod);
375 thin_modules.push(llvm::ThinLTOModule {
376 identifier: name.as_ptr(),
377 data: buffer.data().as_ptr(),
378 len: buffer.data().len(),
380 thin_buffers.push(buffer);
381 module_names.push(name);
382 timeline.record(&module.llmod_id);
385 // FIXME: All upstream crates are deserialized internally in the
386 // function below to extract their summary and modules. Note that
387 // unlike the loop above we *must* decode and/or read something
388 // here as these are all just serialized files on disk. An
389 // improvement, however, to make here would be to store the
390 // module summary separately from the actual module itself. Right
391 // now this is store in one large bitcode file, and the entire
392 // file is deflate-compressed. We could try to bypass some of the
393 // decompression by storing the index uncompressed and only
394 // lazily decompressing the bytecode if necessary.
396 // Note that truly taking advantage of this optimization will
397 // likely be further down the road. We'd have to implement
398 // incremental ThinLTO first where we could actually avoid
399 // looking at upstream modules entirely sometimes (the contents,
400 // we must always unconditionally look at the index).
401 let mut serialized = Vec::new();
402 for (module, name) in serialized_modules {
403 info!("foreign module {:?}", name);
404 thin_modules.push(llvm::ThinLTOModule {
405 identifier: name.as_ptr(),
406 data: module.data().as_ptr(),
407 len: module.data().len(),
409 serialized.push(module);
410 module_names.push(name);
413 // Delegate to the C++ bindings to create some data here. Once this is a
414 // tried-and-true interface we may wish to try to upstream some of this
415 // to LLVM itself, right now we reimplement a lot of what they do
417 let data = llvm::LLVMRustCreateThinLTOData(
418 thin_modules.as_ptr(),
419 thin_modules.len() as u32,
420 symbol_white_list.as_ptr(),
421 symbol_white_list.len() as u32,
424 let msg = format!("failed to prepare thin LTO context");
425 return Err(write::llvm_err(&diag_handler, msg))
427 let data = ThinData(data);
428 info!("thin LTO data created");
429 timeline.record("data");
431 // Throw our data in an `Arc` as we'll be sharing it across threads. We
432 // also put all memory referenced by the C++ data (buffers, ids, etc)
433 // into the arc as well. After this we'll create a thin module
434 // codegen per module in this data.
435 let shared = Arc::new(ThinShared {
438 serialized_modules: serialized,
441 Ok((0..shared.module_names.len()).map(|i| {
442 LtoModuleCodegen::Thin(ThinModule {
443 shared: shared.clone(),
450 fn run_pass_manager(cgcx: &CodegenContext,
451 tm: TargetMachineRef,
453 config: &ModuleConfig,
455 // Now we have one massive module inside of llmod. Time to run the
456 // LTO-specific optimization passes that LLVM provides.
458 // This code is based off the code found in llvm's LTO code generator:
459 // tools/lto/LTOCodeGenerator.cpp
460 debug!("running the pass manager");
462 let pm = llvm::LLVMCreatePassManager();
463 llvm::LLVMRustAddAnalysisPasses(tm, pm, llmod);
465 if !config.no_verify {
466 let pass = llvm::LLVMRustFindAndCreatePass("verify\0".as_ptr() as *const _);
467 assert!(!pass.is_null());
468 llvm::LLVMRustAddPass(pm, pass);
471 // When optimizing for LTO we don't actually pass in `-O0`, but we force
472 // it to always happen at least with `-O1`.
474 // With ThinLTO we mess around a lot with symbol visibility in a way
475 // that will actually cause linking failures if we optimize at O0 which
476 // notable is lacking in dead code elimination. To ensure we at least
477 // get some optimizations and correctly link we forcibly switch to `-O1`
478 // to get dead code elimination.
480 // Note that in general this shouldn't matter too much as you typically
481 // only turn on ThinLTO when you're compiling with optimizations
483 let opt_level = config.opt_level.unwrap_or(llvm::CodeGenOptLevel::None);
484 let opt_level = match opt_level {
485 llvm::CodeGenOptLevel::None => llvm::CodeGenOptLevel::Less,
488 with_llvm_pmb(llmod, config, opt_level, false, &mut |b| {
490 if !llvm::LLVMRustPassManagerBuilderPopulateThinLTOPassManager(b, pm) {
491 panic!("this version of LLVM does not support ThinLTO");
494 llvm::LLVMPassManagerBuilderPopulateLTOPassManager(b, pm,
495 /* Internalize = */ False,
496 /* RunInliner = */ True);
500 if !config.no_verify {
501 let pass = llvm::LLVMRustFindAndCreatePass("verify\0".as_ptr() as *const _);
502 assert!(!pass.is_null());
503 llvm::LLVMRustAddPass(pm, pass);
506 time_ext(cgcx.time_passes, None, "LTO passes", ||
507 llvm::LLVMRunPassManager(pm, llmod));
509 llvm::LLVMDisposePassManager(pm);
514 pub enum SerializedModule {
519 impl SerializedModule {
520 fn data(&self) -> &[u8] {
522 SerializedModule::Local(ref m) => m.data(),
523 SerializedModule::FromRlib(ref m) => m,
528 pub struct ModuleBuffer(*mut llvm::ModuleBuffer);
530 unsafe impl Send for ModuleBuffer {}
531 unsafe impl Sync for ModuleBuffer {}
534 pub fn new(m: ModuleRef) -> ModuleBuffer {
535 ModuleBuffer(unsafe {
536 llvm::LLVMRustModuleBufferCreate(m)
540 pub fn data(&self) -> &[u8] {
542 let ptr = llvm::LLVMRustModuleBufferPtr(self.0);
543 let len = llvm::LLVMRustModuleBufferLen(self.0);
544 slice::from_raw_parts(ptr, len)
549 impl Drop for ModuleBuffer {
551 unsafe { llvm::LLVMRustModuleBufferFree(self.0); }
555 pub struct ThinModule {
556 shared: Arc<ThinShared>,
562 thin_buffers: Vec<ThinBuffer>,
563 serialized_modules: Vec<SerializedModule>,
564 module_names: Vec<CString>,
567 struct ThinData(*mut llvm::ThinLTOData);
569 unsafe impl Send for ThinData {}
570 unsafe impl Sync for ThinData {}
572 impl Drop for ThinData {
575 llvm::LLVMRustFreeThinLTOData(self.0);
580 pub struct ThinBuffer(*mut llvm::ThinLTOBuffer);
582 unsafe impl Send for ThinBuffer {}
583 unsafe impl Sync for ThinBuffer {}
586 pub fn new(m: ModuleRef) -> ThinBuffer {
588 let buffer = llvm::LLVMRustThinLTOBufferCreate(m);
593 pub fn data(&self) -> &[u8] {
595 let ptr = llvm::LLVMRustThinLTOBufferPtr(self.0) as *const _;
596 let len = llvm::LLVMRustThinLTOBufferLen(self.0);
597 slice::from_raw_parts(ptr, len)
602 impl Drop for ThinBuffer {
605 llvm::LLVMRustThinLTOBufferFree(self.0);
611 fn name(&self) -> &str {
612 self.shared.module_names[self.idx].to_str().unwrap()
615 fn cost(&self) -> u64 {
616 // Yes, that's correct, we're using the size of the bytecode as an
617 // indicator for how costly this codegen unit is.
618 self.data().len() as u64
621 fn data(&self) -> &[u8] {
622 let a = self.shared.thin_buffers.get(self.idx).map(|b| b.data());
623 a.unwrap_or_else(|| {
624 let len = self.shared.thin_buffers.len();
625 self.shared.serialized_modules[self.idx - len].data()
629 unsafe fn optimize(&mut self, cgcx: &CodegenContext, timeline: &mut Timeline)
630 -> Result<ModuleCodegen, FatalError>
632 let diag_handler = cgcx.create_diag_handler();
633 let tm = (cgcx.tm_factory)().map_err(|e| {
634 write::llvm_err(&diag_handler, e)
637 // Right now the implementation we've got only works over serialized
638 // modules, so we create a fresh new LLVM context and parse the module
639 // into that context. One day, however, we may do this for upstream
640 // crates but for locally codegened modules we may be able to reuse
641 // that LLVM Context and Module.
642 let llcx = llvm::LLVMRustContextCreate(cgcx.fewer_names);
643 let llmod = llvm::LLVMRustParseBitcodeForThinLTO(
645 self.data().as_ptr(),
647 self.shared.module_names[self.idx].as_ptr(),
650 let msg = format!("failed to parse bitcode for thin LTO module");
651 return Err(write::llvm_err(&diag_handler, msg));
653 let module = ModuleCodegen {
654 source: ModuleSource::Codegened(ModuleLlvm {
659 llmod_id: self.name().to_string(),
660 name: self.name().to_string(),
661 kind: ModuleKind::Regular,
663 cgcx.save_temp_bitcode(&module, "thin-lto-input");
665 // Before we do much else find the "main" `DICompileUnit` that we'll be
666 // using below. If we find more than one though then rustc has changed
667 // in a way we're not ready for, so generate an ICE by returning
669 let mut cu1 = ptr::null_mut();
670 let mut cu2 = ptr::null_mut();
671 llvm::LLVMRustThinLTOGetDICompileUnit(llmod, &mut cu1, &mut cu2);
673 let msg = format!("multiple source DICompileUnits found");
674 return Err(write::llvm_err(&diag_handler, msg))
677 // Like with "fat" LTO, get some better optimizations if landing pads
678 // are disabled by removing all landing pads.
679 if cgcx.no_landing_pads {
680 llvm::LLVMRustMarkAllFunctionsNounwind(llmod);
681 cgcx.save_temp_bitcode(&module, "thin-lto-after-nounwind");
682 timeline.record("nounwind");
685 // Up next comes the per-module local analyses that we do for Thin LTO.
686 // Each of these functions is basically copied from the LLVM
687 // implementation and then tailored to suit this implementation. Ideally
688 // each of these would be supported by upstream LLVM but that's perhaps
689 // a patch for another day!
691 // You can find some more comments about these functions in the LLVM
692 // bindings we've got (currently `PassWrapper.cpp`)
693 if !llvm::LLVMRustPrepareThinLTORename(self.shared.data.0, llmod) {
694 let msg = format!("failed to prepare thin LTO module");
695 return Err(write::llvm_err(&diag_handler, msg))
697 cgcx.save_temp_bitcode(&module, "thin-lto-after-rename");
698 timeline.record("rename");
699 if !llvm::LLVMRustPrepareThinLTOResolveWeak(self.shared.data.0, llmod) {
700 let msg = format!("failed to prepare thin LTO module");
701 return Err(write::llvm_err(&diag_handler, msg))
703 cgcx.save_temp_bitcode(&module, "thin-lto-after-resolve");
704 timeline.record("resolve");
705 if !llvm::LLVMRustPrepareThinLTOInternalize(self.shared.data.0, llmod) {
706 let msg = format!("failed to prepare thin LTO module");
707 return Err(write::llvm_err(&diag_handler, msg))
709 cgcx.save_temp_bitcode(&module, "thin-lto-after-internalize");
710 timeline.record("internalize");
711 if !llvm::LLVMRustPrepareThinLTOImport(self.shared.data.0, llmod) {
712 let msg = format!("failed to prepare thin LTO module");
713 return Err(write::llvm_err(&diag_handler, msg))
715 cgcx.save_temp_bitcode(&module, "thin-lto-after-import");
716 timeline.record("import");
718 // Ok now this is a bit unfortunate. This is also something you won't
719 // find upstream in LLVM's ThinLTO passes! This is a hack for now to
720 // work around bugs in LLVM.
722 // First discovered in #45511 it was found that as part of ThinLTO
723 // importing passes LLVM will import `DICompileUnit` metadata
724 // information across modules. This means that we'll be working with one
725 // LLVM module that has multiple `DICompileUnit` instances in it (a
726 // bunch of `llvm.dbg.cu` members). Unfortunately there's a number of
727 // bugs in LLVM's backend which generates invalid DWARF in a situation
730 // https://bugs.llvm.org/show_bug.cgi?id=35212
731 // https://bugs.llvm.org/show_bug.cgi?id=35562
733 // While the first bug there is fixed the second ended up causing #46346
734 // which was basically a resurgence of #45511 after LLVM's bug 35212 was
737 // This function below is a huge hack around this problem. The function
738 // below is defined in `PassWrapper.cpp` and will basically "merge"
739 // all `DICompileUnit` instances in a module. Basically it'll take all
740 // the objects, rewrite all pointers of `DISubprogram` to point to the
741 // first `DICompileUnit`, and then delete all the other units.
743 // This is probably mangling to the debug info slightly (but hopefully
744 // not too much) but for now at least gets LLVM to emit valid DWARF (or
745 // so it appears). Hopefully we can remove this once upstream bugs are
747 llvm::LLVMRustThinLTOPatchDICompileUnit(llmod, cu1);
748 cgcx.save_temp_bitcode(&module, "thin-lto-after-patch");
749 timeline.record("patch");
751 // Alright now that we've done everything related to the ThinLTO
752 // analysis it's time to run some optimizations! Here we use the same
753 // `run_pass_manager` as the "fat" LTO above except that we tell it to
754 // populate a thin-specific pass manager, which presumably LLVM treats a
755 // little differently.
756 info!("running thin lto passes over {}", module.name);
757 let config = cgcx.config(module.kind);
758 run_pass_manager(cgcx, tm, llmod, config, true);
759 cgcx.save_temp_bitcode(&module, "thin-lto-after-pm");
760 timeline.record("thin-done");
762 // FIXME: this is a hack around a bug in LLVM right now. Discovered in
763 // #46910 it was found out that on 32-bit MSVC LLVM will hit a codegen
764 // error if there's an available_externally function in the LLVM module.
765 // Typically we don't actually use these functions but ThinLTO makes
766 // heavy use of them when inlining across modules.
768 // Tracked upstream at https://bugs.llvm.org/show_bug.cgi?id=35736 this
769 // function call (and its definition on the C++ side of things)
770 // shouldn't be necessary eventually and we can safetly delete these few
772 llvm::LLVMRustThinLTORemoveAvailableExternally(llmod);
773 cgcx.save_temp_bitcode(&module, "thin-lto-after-rm-ae");
774 timeline.record("no-ae");