1 use crate::back::write::{self, save_temp_bitcode, DiagnosticHandlers};
2 use crate::llvm::{self, build_string};
3 use crate::{LlvmCodegenBackend, ModuleLlvm};
4 use object::read::archive::ArchiveFile;
5 use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule, ThinShared};
6 use rustc_codegen_ssa::back::symbol_export;
7 use rustc_codegen_ssa::back::write::{CodegenContext, FatLTOInput, TargetMachineFactoryConfig};
8 use rustc_codegen_ssa::traits::*;
9 use rustc_codegen_ssa::{looks_like_rust_object_file, ModuleCodegen, ModuleKind};
10 use rustc_data_structures::fx::FxHashMap;
11 use rustc_data_structures::memmap::Mmap;
12 use rustc_errors::{FatalError, Handler};
13 use rustc_hir::def_id::LOCAL_CRATE;
14 use rustc_middle::bug;
15 use rustc_middle::dep_graph::WorkProduct;
16 use rustc_middle::middle::exported_symbols::{SymbolExportInfo, SymbolExportLevel};
17 use rustc_session::cgu_reuse_tracker::CguReuse;
18 use rustc_session::config::{self, CrateType, Lto};
20 use std::ffi::{CStr, CString};
29 /// We keep track of the computed LTO cache keys from the previous
30 /// session to determine which CGUs we can reuse.
31 pub const THIN_LTO_KEYS_INCR_COMP_FILE_NAME: &str = "thin-lto-past-keys.bin";
33 pub fn crate_type_allows_lto(crate_type: CrateType) -> bool {
35 CrateType::Executable | CrateType::Dylib | CrateType::Staticlib | CrateType::Cdylib => true,
36 CrateType::Rlib | CrateType::ProcMacro => false,
41 cgcx: &CodegenContext<LlvmCodegenBackend>,
42 diag_handler: &Handler,
43 ) -> Result<(Vec<CString>, Vec<(SerializedModule<ModuleBuffer>, CString)>), FatalError> {
44 let export_threshold = match cgcx.lto {
45 // We're just doing LTO for our one crate
46 Lto::ThinLocal => SymbolExportLevel::Rust,
48 // We're doing LTO for the entire crate graph
49 Lto::Fat | Lto::Thin => symbol_export::crates_export_threshold(&cgcx.crate_types),
51 Lto::No => panic!("didn't request LTO but we're doing LTO"),
54 let symbol_filter = &|&(ref name, info): &(String, SymbolExportInfo)| {
55 if info.level.is_below_threshold(export_threshold) || info.used {
56 Some(CString::new(name.as_str()).unwrap())
61 let exported_symbols = cgcx.exported_symbols.as_ref().expect("needs exported symbols for LTO");
62 let mut symbols_below_threshold = {
63 let _timer = cgcx.prof.generic_activity("LLVM_lto_generate_symbols_below_threshold");
64 exported_symbols[&LOCAL_CRATE].iter().filter_map(symbol_filter).collect::<Vec<CString>>()
66 info!("{} symbols to preserve in this crate", symbols_below_threshold.len());
68 // If we're performing LTO for the entire crate graph, then for each of our
69 // upstream dependencies, find the corresponding rlib and load the bitcode
72 // We save off all the bytecode and LLVM module ids for later processing
73 // with either fat or thin LTO
74 let mut upstream_modules = Vec::new();
75 if cgcx.lto != Lto::ThinLocal {
76 // Make sure we actually can run LTO
77 for crate_type in cgcx.crate_types.iter() {
78 if !crate_type_allows_lto(*crate_type) {
79 let e = diag_handler.fatal(
80 "lto can only be run for executables, cdylibs and \
81 static library outputs",
84 } else if *crate_type == CrateType::Dylib {
85 diag_handler.warn("LTO with dylibs may not be as effective");
89 for &(cnum, ref path) in cgcx.each_linked_rlib_for_lto.iter() {
90 let exported_symbols =
91 cgcx.exported_symbols.as_ref().expect("needs exported symbols for LTO");
94 cgcx.prof.generic_activity("LLVM_lto_generate_symbols_below_threshold");
95 symbols_below_threshold
96 .extend(exported_symbols[&cnum].iter().filter_map(symbol_filter));
99 let archive_data = unsafe {
100 Mmap::map(std::fs::File::open(&path).expect("couldn't open rlib"))
101 .expect("couldn't map rlib")
103 let archive = ArchiveFile::parse(&*archive_data).expect("wanted an rlib");
104 let obj_files = archive
106 .filter_map(|child| {
107 child.ok().and_then(|c| {
108 std::str::from_utf8(c.name()).ok().map(|name| (name.trim(), c))
111 .filter(|&(name, _)| looks_like_rust_object_file(name));
112 for (name, child) in obj_files {
113 info!("adding bitcode from {}", name);
114 match get_bitcode_slice_from_object_data(
115 child.data(&*archive_data).expect("corrupt rlib"),
118 let module = SerializedModule::FromRlib(data.to_vec());
119 upstream_modules.push((module, CString::new(name).unwrap()));
121 Err(msg) => return Err(diag_handler.fatal(&msg)),
127 Ok((symbols_below_threshold, upstream_modules))
130 fn get_bitcode_slice_from_object_data(obj: &[u8]) -> Result<&[u8], String> {
133 unsafe { llvm::LLVMRustGetBitcodeSliceFromObjectData(obj.as_ptr(), obj.len(), &mut len) };
136 let bc = unsafe { slice::from_raw_parts(data, len) };
138 // `bc` must be a sub-slice of `obj`.
139 assert!(obj.as_ptr() <= bc.as_ptr());
140 assert!(bc[bc.len()..bc.len()].as_ptr() <= obj[obj.len()..obj.len()].as_ptr());
145 let msg = llvm::last_error().unwrap_or_else(|| "unknown LLVM error".to_string());
146 Err(format!("failed to get bitcode from object file for LTO ({})", msg))
150 /// Performs fat LTO by merging all modules into a single one and returning it
151 /// for further optimization.
152 pub(crate) fn run_fat(
153 cgcx: &CodegenContext<LlvmCodegenBackend>,
154 modules: Vec<FatLTOInput<LlvmCodegenBackend>>,
155 cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
156 ) -> Result<LtoModuleCodegen<LlvmCodegenBackend>, FatalError> {
157 let diag_handler = cgcx.create_diag_handler();
158 let (symbols_below_threshold, upstream_modules) = prepare_lto(cgcx, &diag_handler)?;
159 let symbols_below_threshold =
160 symbols_below_threshold.iter().map(|c| c.as_ptr()).collect::<Vec<_>>();
167 &symbols_below_threshold,
171 /// Performs thin LTO by performing necessary global analysis and returning two
172 /// lists, one of the modules that need optimization and another for modules that
173 /// can simply be copied over from the incr. comp. cache.
174 pub(crate) fn run_thin(
175 cgcx: &CodegenContext<LlvmCodegenBackend>,
176 modules: Vec<(String, ThinBuffer)>,
177 cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
178 ) -> Result<(Vec<LtoModuleCodegen<LlvmCodegenBackend>>, Vec<WorkProduct>), FatalError> {
179 let diag_handler = cgcx.create_diag_handler();
180 let (symbols_below_threshold, upstream_modules) = prepare_lto(cgcx, &diag_handler)?;
181 let symbols_below_threshold =
182 symbols_below_threshold.iter().map(|c| c.as_ptr()).collect::<Vec<_>>();
183 if cgcx.opts.cg.linker_plugin_lto.enabled() {
185 "We should never reach this case if the LTO step \
186 is deferred to the linker"
195 &symbols_below_threshold,
199 pub(crate) fn prepare_thin(module: ModuleCodegen<ModuleLlvm>) -> (String, ThinBuffer) {
200 let name = module.name.clone();
201 let buffer = ThinBuffer::new(module.module_llvm.llmod(), true);
206 cgcx: &CodegenContext<LlvmCodegenBackend>,
207 diag_handler: &Handler,
208 modules: Vec<FatLTOInput<LlvmCodegenBackend>>,
209 cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
210 mut serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>,
211 symbols_below_threshold: &[*const libc::c_char],
212 ) -> Result<LtoModuleCodegen<LlvmCodegenBackend>, FatalError> {
213 let _timer = cgcx.prof.generic_activity("LLVM_fat_lto_build_monolithic_module");
214 info!("going for a fat lto");
216 // Sort out all our lists of incoming modules into two lists.
218 // * `serialized_modules` (also and argument to this function) contains all
219 // modules that are serialized in-memory.
220 // * `in_memory` contains modules which are already parsed and in-memory,
221 // such as from multi-CGU builds.
223 // All of `cached_modules` (cached from previous incremental builds) can
224 // immediately go onto the `serialized_modules` modules list and then we can
225 // split the `modules` array into these two lists.
226 let mut in_memory = Vec::new();
227 serialized_modules.extend(cached_modules.into_iter().map(|(buffer, wp)| {
228 info!("pushing cached module {:?}", wp.cgu_name);
229 (buffer, CString::new(wp.cgu_name).unwrap())
231 for module in modules {
233 FatLTOInput::InMemory(m) => in_memory.push(m),
234 FatLTOInput::Serialized { name, buffer } => {
235 info!("pushing serialized module {:?}", name);
236 let buffer = SerializedModule::Local(buffer);
237 serialized_modules.push((buffer, CString::new(name).unwrap()));
242 // Find the "costliest" module and merge everything into that codegen unit.
243 // All the other modules will be serialized and reparsed into the new
244 // context, so this hopefully avoids serializing and parsing the largest
247 // Additionally use a regular module as the base here to ensure that various
248 // file copy operations in the backend work correctly. The only other kind
249 // of module here should be an allocator one, and if your crate is smaller
250 // than the allocator module then the size doesn't really matter anyway.
251 let costliest_module = in_memory
254 .filter(|&(_, module)| module.kind == ModuleKind::Regular)
256 let cost = unsafe { llvm::LLVMRustModuleCost(module.module_llvm.llmod()) };
261 // If we found a costliest module, we're good to go. Otherwise all our
262 // inputs were serialized which could happen in the case, for example, that
263 // all our inputs were incrementally reread from the cache and we're just
264 // re-executing the LTO passes. If that's the case deserialize the first
265 // module and create a linker with it.
266 let module: ModuleCodegen<ModuleLlvm> = match costliest_module {
267 Some((_cost, i)) => in_memory.remove(i),
269 assert!(!serialized_modules.is_empty(), "must have at least one serialized module");
270 let (buffer, name) = serialized_modules.remove(0);
271 info!("no in-memory regular modules to choose from, parsing {:?}", name);
273 module_llvm: ModuleLlvm::parse(cgcx, &name, buffer.data(), diag_handler)?,
274 name: name.into_string().unwrap(),
275 kind: ModuleKind::Regular,
279 let mut serialized_bitcode = Vec::new();
281 let (llcx, llmod) = {
282 let llvm = &module.module_llvm;
283 (&llvm.llcx, llvm.llmod())
285 info!("using {:?} as a base module", module.name);
287 // The linking steps below may produce errors and diagnostics within LLVM
288 // which we'd like to handle and print, so set up our diagnostic handlers
289 // (which get unregistered when they go out of scope below).
290 let _handler = DiagnosticHandlers::new(cgcx, diag_handler, llcx);
292 // For all other modules we codegened we'll need to link them into our own
293 // bitcode. All modules were codegened in their own LLVM context, however,
294 // and we want to move everything to the same LLVM context. Currently the
295 // way we know of to do that is to serialize them to a string and them parse
296 // them later. Not great but hey, that's why it's "fat" LTO, right?
297 for module in in_memory {
298 let buffer = ModuleBuffer::new(module.module_llvm.llmod());
299 let llmod_id = CString::new(&module.name[..]).unwrap();
300 serialized_modules.push((SerializedModule::Local(buffer), llmod_id));
302 // Sort the modules to ensure we produce deterministic results.
303 serialized_modules.sort_by(|module1, module2| module1.1.cmp(&module2.1));
305 // For all serialized bitcode files we parse them and link them in as we did
306 // above, this is all mostly handled in C++. Like above, though, we don't
307 // know much about the memory management here so we err on the side of being
308 // save and persist everything with the original module.
309 let mut linker = Linker::new(llmod);
310 for (bc_decoded, name) in serialized_modules {
313 .generic_activity_with_arg_recorder("LLVM_fat_lto_link_module", |recorder| {
314 recorder.record_arg(format!("{:?}", name))
316 info!("linking {:?}", name);
317 let data = bc_decoded.data();
318 linker.add(data).map_err(|()| {
319 let msg = format!("failed to load bitcode of module {:?}", name);
320 write::llvm_err(diag_handler, &msg)
322 serialized_bitcode.push(bc_decoded);
325 save_temp_bitcode(cgcx, &module, "lto.input");
327 // Internalize everything below threshold to help strip out more modules and such.
329 let ptr = symbols_below_threshold.as_ptr();
330 llvm::LLVMRustRunRestrictionPass(
332 ptr as *const *const libc::c_char,
333 symbols_below_threshold.len() as libc::size_t,
335 save_temp_bitcode(cgcx, &module, "lto.after-restriction");
339 Ok(LtoModuleCodegen::Fat { module, _serialized_bitcode: serialized_bitcode })
342 pub(crate) struct Linker<'a>(&'a mut llvm::Linker<'a>);
344 impl<'a> Linker<'a> {
345 pub(crate) fn new(llmod: &'a llvm::Module) -> Self {
346 unsafe { Linker(llvm::LLVMRustLinkerNew(llmod)) }
349 pub(crate) fn add(&mut self, bytecode: &[u8]) -> Result<(), ()> {
351 if llvm::LLVMRustLinkerAdd(
353 bytecode.as_ptr() as *const libc::c_char,
364 impl Drop for Linker<'_> {
367 llvm::LLVMRustLinkerFree(&mut *(self.0 as *mut _));
372 /// Prepare "thin" LTO to get run on these modules.
374 /// The general structure of ThinLTO is quite different from the structure of
375 /// "fat" LTO above. With "fat" LTO all LLVM modules in question are merged into
376 /// one giant LLVM module, and then we run more optimization passes over this
377 /// big module after internalizing most symbols. Thin LTO, on the other hand,
378 /// avoid this large bottleneck through more targeted optimization.
380 /// At a high level Thin LTO looks like:
382 /// 1. Prepare a "summary" of each LLVM module in question which describes
383 /// the values inside, cost of the values, etc.
384 /// 2. Merge the summaries of all modules in question into one "index"
385 /// 3. Perform some global analysis on this index
386 /// 4. For each module, use the index and analysis calculated previously to
387 /// perform local transformations on the module, for example inlining
388 /// small functions from other modules.
389 /// 5. Run thin-specific optimization passes over each module, and then code
390 /// generate everything at the end.
392 /// The summary for each module is intended to be quite cheap, and the global
393 /// index is relatively quite cheap to create as well. As a result, the goal of
394 /// ThinLTO is to reduce the bottleneck on LTO and enable LTO to be used in more
395 /// situations. For example one cheap optimization is that we can parallelize
396 /// all codegen modules, easily making use of all the cores on a machine.
398 /// With all that in mind, the function here is designed at specifically just
399 /// calculating the *index* for ThinLTO. This index will then be shared amongst
400 /// all of the `LtoModuleCodegen` units returned below and destroyed once
401 /// they all go out of scope.
403 cgcx: &CodegenContext<LlvmCodegenBackend>,
404 diag_handler: &Handler,
405 modules: Vec<(String, ThinBuffer)>,
406 serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>,
407 cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
408 symbols_below_threshold: &[*const libc::c_char],
409 ) -> Result<(Vec<LtoModuleCodegen<LlvmCodegenBackend>>, Vec<WorkProduct>), FatalError> {
410 let _timer = cgcx.prof.generic_activity("LLVM_thin_lto_global_analysis");
412 info!("going for that thin, thin LTO");
414 let green_modules: FxHashMap<_, _> =
415 cached_modules.iter().map(|&(_, ref wp)| (wp.cgu_name.clone(), wp.clone())).collect();
417 let full_scope_len = modules.len() + serialized_modules.len() + cached_modules.len();
418 let mut thin_buffers = Vec::with_capacity(modules.len());
419 let mut module_names = Vec::with_capacity(full_scope_len);
420 let mut thin_modules = Vec::with_capacity(full_scope_len);
422 for (i, (name, buffer)) in modules.into_iter().enumerate() {
423 info!("local module: {} - {}", i, name);
424 let cname = CString::new(name.clone()).unwrap();
425 thin_modules.push(llvm::ThinLTOModule {
426 identifier: cname.as_ptr(),
427 data: buffer.data().as_ptr(),
428 len: buffer.data().len(),
430 thin_buffers.push(buffer);
431 module_names.push(cname);
434 // FIXME: All upstream crates are deserialized internally in the
435 // function below to extract their summary and modules. Note that
436 // unlike the loop above we *must* decode and/or read something
437 // here as these are all just serialized files on disk. An
438 // improvement, however, to make here would be to store the
439 // module summary separately from the actual module itself. Right
440 // now this is store in one large bitcode file, and the entire
441 // file is deflate-compressed. We could try to bypass some of the
442 // decompression by storing the index uncompressed and only
443 // lazily decompressing the bytecode if necessary.
445 // Note that truly taking advantage of this optimization will
446 // likely be further down the road. We'd have to implement
447 // incremental ThinLTO first where we could actually avoid
448 // looking at upstream modules entirely sometimes (the contents,
449 // we must always unconditionally look at the index).
450 let mut serialized = Vec::with_capacity(serialized_modules.len() + cached_modules.len());
453 cached_modules.into_iter().map(|(sm, wp)| (sm, CString::new(wp.cgu_name).unwrap()));
455 for (module, name) in serialized_modules.into_iter().chain(cached_modules) {
456 info!("upstream or cached module {:?}", name);
457 thin_modules.push(llvm::ThinLTOModule {
458 identifier: name.as_ptr(),
459 data: module.data().as_ptr(),
460 len: module.data().len(),
462 serialized.push(module);
463 module_names.push(name);
467 assert_eq!(thin_modules.len(), module_names.len());
469 // Delegate to the C++ bindings to create some data here. Once this is a
470 // tried-and-true interface we may wish to try to upstream some of this
471 // to LLVM itself, right now we reimplement a lot of what they do
473 let data = llvm::LLVMRustCreateThinLTOData(
474 thin_modules.as_ptr(),
475 thin_modules.len() as u32,
476 symbols_below_threshold.as_ptr(),
477 symbols_below_threshold.len() as u32,
479 .ok_or_else(|| write::llvm_err(diag_handler, "failed to prepare thin LTO context"))?;
481 let data = ThinData(data);
483 info!("thin LTO data created");
485 let (key_map_path, prev_key_map, curr_key_map) = if let Some(ref incr_comp_session_dir) =
486 cgcx.incr_comp_session_dir
488 let path = incr_comp_session_dir.join(THIN_LTO_KEYS_INCR_COMP_FILE_NAME);
489 // If the previous file was deleted, or we get an IO error
490 // reading the file, then we'll just use `None` as the
491 // prev_key_map, which will force the code to be recompiled.
493 if path.exists() { ThinLTOKeysMap::load_from_file(&path).ok() } else { None };
494 let curr = ThinLTOKeysMap::from_thin_lto_modules(&data, &thin_modules, &module_names);
495 (Some(path), prev, curr)
497 // If we don't compile incrementally, we don't need to load the
498 // import data from LLVM.
499 assert!(green_modules.is_empty());
500 let curr = ThinLTOKeysMap::default();
503 info!("thin LTO cache key map loaded");
504 info!("prev_key_map: {:#?}", prev_key_map);
505 info!("curr_key_map: {:#?}", curr_key_map);
507 // Throw our data in an `Arc` as we'll be sharing it across threads. We
508 // also put all memory referenced by the C++ data (buffers, ids, etc)
509 // into the arc as well. After this we'll create a thin module
510 // codegen per module in this data.
511 let shared = Arc::new(ThinShared {
514 serialized_modules: serialized,
518 let mut copy_jobs = vec![];
519 let mut opt_jobs = vec![];
521 info!("checking which modules can be-reused and which have to be re-optimized.");
522 for (module_index, module_name) in shared.module_names.iter().enumerate() {
523 let module_name = module_name_to_str(module_name);
524 if let (Some(prev_key_map), true) =
525 (prev_key_map.as_ref(), green_modules.contains_key(module_name))
527 assert!(cgcx.incr_comp_session_dir.is_some());
529 // If a module exists in both the current and the previous session,
530 // and has the same LTO cache key in both sessions, then we can re-use it
531 if prev_key_map.keys.get(module_name) == curr_key_map.keys.get(module_name) {
532 let work_product = green_modules[module_name].clone();
533 copy_jobs.push(work_product);
534 info!(" - {}: re-used", module_name);
535 assert!(cgcx.incr_comp_session_dir.is_some());
536 cgcx.cgu_reuse_tracker.set_actual_reuse(module_name, CguReuse::PostLto);
541 info!(" - {}: re-compiled", module_name);
542 opt_jobs.push(LtoModuleCodegen::Thin(ThinModule {
543 shared: shared.clone(),
548 // Save the current ThinLTO import information for the next compilation
549 // session, overwriting the previous serialized data (if any).
550 if let Some(path) = key_map_path {
551 if let Err(err) = curr_key_map.save_to_file(&path) {
552 let msg = format!("Error while writing ThinLTO key data: {}", err);
553 return Err(write::llvm_err(diag_handler, &msg));
557 Ok((opt_jobs, copy_jobs))
561 pub(crate) fn run_pass_manager(
562 cgcx: &CodegenContext<LlvmCodegenBackend>,
563 diag_handler: &Handler,
564 module: &mut ModuleCodegen<ModuleLlvm>,
566 ) -> Result<(), FatalError> {
567 let _timer = cgcx.prof.verbose_generic_activity_with_arg("LLVM_lto_optimize", &*module.name);
568 let config = cgcx.config(module.kind);
570 // Now we have one massive module inside of llmod. Time to run the
571 // LTO-specific optimization passes that LLVM provides.
573 // This code is based off the code found in llvm's LTO code generator:
574 // llvm/lib/LTO/LTOCodeGenerator.cpp
575 debug!("running the pass manager");
577 if !llvm::LLVMRustHasModuleFlag(
578 module.module_llvm.llmod(),
579 "LTOPostLink".as_ptr().cast(),
582 llvm::LLVMRustAddModuleFlag(
583 module.module_llvm.llmod(),
584 llvm::LLVMModFlagBehavior::Error,
585 "LTOPostLink\0".as_ptr().cast(),
589 let opt_stage = if thin { llvm::OptStage::ThinLTO } else { llvm::OptStage::FatLTO };
590 let opt_level = config.opt_level.unwrap_or(config::OptLevel::No);
591 write::llvm_optimize(cgcx, diag_handler, module, config, opt_level, opt_stage)?;
597 pub struct ModuleBuffer(&'static mut llvm::ModuleBuffer);
599 unsafe impl Send for ModuleBuffer {}
600 unsafe impl Sync for ModuleBuffer {}
603 pub fn new(m: &llvm::Module) -> ModuleBuffer {
604 ModuleBuffer(unsafe { llvm::LLVMRustModuleBufferCreate(m) })
608 impl ModuleBufferMethods for ModuleBuffer {
609 fn data(&self) -> &[u8] {
611 let ptr = llvm::LLVMRustModuleBufferPtr(self.0);
612 let len = llvm::LLVMRustModuleBufferLen(self.0);
613 slice::from_raw_parts(ptr, len)
618 impl Drop for ModuleBuffer {
621 llvm::LLVMRustModuleBufferFree(&mut *(self.0 as *mut _));
626 pub struct ThinData(&'static mut llvm::ThinLTOData);
628 unsafe impl Send for ThinData {}
629 unsafe impl Sync for ThinData {}
631 impl Drop for ThinData {
634 llvm::LLVMRustFreeThinLTOData(&mut *(self.0 as *mut _));
639 pub struct ThinBuffer(&'static mut llvm::ThinLTOBuffer);
641 unsafe impl Send for ThinBuffer {}
642 unsafe impl Sync for ThinBuffer {}
645 pub fn new(m: &llvm::Module, is_thin: bool) -> ThinBuffer {
647 let buffer = llvm::LLVMRustThinLTOBufferCreate(m, is_thin);
653 impl ThinBufferMethods for ThinBuffer {
654 fn data(&self) -> &[u8] {
656 let ptr = llvm::LLVMRustThinLTOBufferPtr(self.0) as *const _;
657 let len = llvm::LLVMRustThinLTOBufferLen(self.0);
658 slice::from_raw_parts(ptr, len)
663 impl Drop for ThinBuffer {
666 llvm::LLVMRustThinLTOBufferFree(&mut *(self.0 as *mut _));
671 pub unsafe fn optimize_thin_module(
672 thin_module: ThinModule<LlvmCodegenBackend>,
673 cgcx: &CodegenContext<LlvmCodegenBackend>,
674 ) -> Result<ModuleCodegen<ModuleLlvm>, FatalError> {
675 let diag_handler = cgcx.create_diag_handler();
677 let module_name = &thin_module.shared.module_names[thin_module.idx];
678 let tm_factory_config = TargetMachineFactoryConfig::new(cgcx, module_name.to_str().unwrap());
680 (cgcx.tm_factory)(tm_factory_config).map_err(|e| write::llvm_err(&diag_handler, &e))?;
682 // Right now the implementation we've got only works over serialized
683 // modules, so we create a fresh new LLVM context and parse the module
684 // into that context. One day, however, we may do this for upstream
685 // crates but for locally codegened modules we may be able to reuse
686 // that LLVM Context and Module.
687 let llcx = llvm::LLVMRustContextCreate(cgcx.fewer_names);
688 let llmod_raw = parse_module(llcx, module_name, thin_module.data(), &diag_handler)? as *const _;
689 let mut module = ModuleCodegen {
690 module_llvm: ModuleLlvm { llmod_raw, llcx, tm },
691 name: thin_module.name().to_string(),
692 kind: ModuleKind::Regular,
695 let target = &*module.module_llvm.tm;
696 let llmod = module.module_llvm.llmod();
697 save_temp_bitcode(cgcx, &module, "thin-lto-input");
699 // Before we do much else find the "main" `DICompileUnit` that we'll be
700 // using below. If we find more than one though then rustc has changed
701 // in a way we're not ready for, so generate an ICE by returning
703 let mut cu1 = ptr::null_mut();
704 let mut cu2 = ptr::null_mut();
705 llvm::LLVMRustThinLTOGetDICompileUnit(llmod, &mut cu1, &mut cu2);
707 let msg = "multiple source DICompileUnits found";
708 return Err(write::llvm_err(&diag_handler, msg));
711 // Up next comes the per-module local analyses that we do for Thin LTO.
712 // Each of these functions is basically copied from the LLVM
713 // implementation and then tailored to suit this implementation. Ideally
714 // each of these would be supported by upstream LLVM but that's perhaps
715 // a patch for another day!
717 // You can find some more comments about these functions in the LLVM
718 // bindings we've got (currently `PassWrapper.cpp`)
721 cgcx.prof.generic_activity_with_arg("LLVM_thin_lto_rename", thin_module.name());
722 if !llvm::LLVMRustPrepareThinLTORename(thin_module.shared.data.0, llmod, target) {
723 let msg = "failed to prepare thin LTO module";
724 return Err(write::llvm_err(&diag_handler, msg));
726 save_temp_bitcode(cgcx, &module, "thin-lto-after-rename");
732 .generic_activity_with_arg("LLVM_thin_lto_resolve_weak", thin_module.name());
733 if !llvm::LLVMRustPrepareThinLTOResolveWeak(thin_module.shared.data.0, llmod) {
734 let msg = "failed to prepare thin LTO module";
735 return Err(write::llvm_err(&diag_handler, msg));
737 save_temp_bitcode(cgcx, &module, "thin-lto-after-resolve");
743 .generic_activity_with_arg("LLVM_thin_lto_internalize", thin_module.name());
744 if !llvm::LLVMRustPrepareThinLTOInternalize(thin_module.shared.data.0, llmod) {
745 let msg = "failed to prepare thin LTO module";
746 return Err(write::llvm_err(&diag_handler, msg));
748 save_temp_bitcode(cgcx, &module, "thin-lto-after-internalize");
753 cgcx.prof.generic_activity_with_arg("LLVM_thin_lto_import", thin_module.name());
754 if !llvm::LLVMRustPrepareThinLTOImport(thin_module.shared.data.0, llmod, target) {
755 let msg = "failed to prepare thin LTO module";
756 return Err(write::llvm_err(&diag_handler, msg));
758 save_temp_bitcode(cgcx, &module, "thin-lto-after-import");
761 // Ok now this is a bit unfortunate. This is also something you won't
762 // find upstream in LLVM's ThinLTO passes! This is a hack for now to
763 // work around bugs in LLVM.
765 // First discovered in #45511 it was found that as part of ThinLTO
766 // importing passes LLVM will import `DICompileUnit` metadata
767 // information across modules. This means that we'll be working with one
768 // LLVM module that has multiple `DICompileUnit` instances in it (a
769 // bunch of `llvm.dbg.cu` members). Unfortunately there's a number of
770 // bugs in LLVM's backend which generates invalid DWARF in a situation
773 // https://bugs.llvm.org/show_bug.cgi?id=35212
774 // https://bugs.llvm.org/show_bug.cgi?id=35562
776 // While the first bug there is fixed the second ended up causing #46346
777 // which was basically a resurgence of #45511 after LLVM's bug 35212 was
780 // This function below is a huge hack around this problem. The function
781 // below is defined in `PassWrapper.cpp` and will basically "merge"
782 // all `DICompileUnit` instances in a module. Basically it'll take all
783 // the objects, rewrite all pointers of `DISubprogram` to point to the
784 // first `DICompileUnit`, and then delete all the other units.
786 // This is probably mangling to the debug info slightly (but hopefully
787 // not too much) but for now at least gets LLVM to emit valid DWARF (or
788 // so it appears). Hopefully we can remove this once upstream bugs are
793 .generic_activity_with_arg("LLVM_thin_lto_patch_debuginfo", thin_module.name());
794 llvm::LLVMRustThinLTOPatchDICompileUnit(llmod, cu1);
795 save_temp_bitcode(cgcx, &module, "thin-lto-after-patch");
798 // Alright now that we've done everything related to the ThinLTO
799 // analysis it's time to run some optimizations! Here we use the same
800 // `run_pass_manager` as the "fat" LTO above except that we tell it to
801 // populate a thin-specific pass manager, which presumably LLVM treats a
802 // little differently.
804 info!("running thin lto passes over {}", module.name);
805 run_pass_manager(cgcx, &diag_handler, &mut module, true)?;
806 save_temp_bitcode(cgcx, &module, "thin-lto-after-pm");
812 /// Maps LLVM module identifiers to their corresponding LLVM LTO cache keys
813 #[derive(Debug, Default)]
814 pub struct ThinLTOKeysMap {
815 // key = llvm name of importing module, value = LLVM cache key
816 keys: FxHashMap<String, String>,
819 impl ThinLTOKeysMap {
820 fn save_to_file(&self, path: &Path) -> io::Result<()> {
822 let file = File::create(path)?;
823 let mut writer = io::BufWriter::new(file);
824 for (module, key) in &self.keys {
825 writeln!(writer, "{} {}", module, key)?;
830 fn load_from_file(path: &Path) -> io::Result<Self> {
831 use std::io::BufRead;
832 let mut keys = FxHashMap::default();
833 let file = File::open(path)?;
834 for line in io::BufReader::new(file).lines() {
836 let mut split = line.split(' ');
837 let module = split.next().unwrap();
838 let key = split.next().unwrap();
839 assert_eq!(split.next(), None, "Expected two space-separated values, found {:?}", line);
840 keys.insert(module.to_string(), key.to_string());
845 fn from_thin_lto_modules(
847 modules: &[llvm::ThinLTOModule],
850 let keys = iter::zip(modules, names)
851 .map(|(module, name)| {
852 let key = build_string(|rust_str| unsafe {
853 llvm::LLVMRustComputeLTOCacheKey(rust_str, module.identifier, data.0);
855 .expect("Invalid ThinLTO module key");
856 (name.clone().into_string().unwrap(), key)
863 fn module_name_to_str(c_str: &CStr) -> &str {
864 c_str.to_str().unwrap_or_else(|e| {
865 bug!("Encountered non-utf8 LLVM module name `{}`: {}", c_str.to_string_lossy(), e)
869 pub fn parse_module<'a>(
870 cx: &'a llvm::Context,
873 diag_handler: &Handler,
874 ) -> Result<&'a llvm::Module, FatalError> {
876 llvm::LLVMRustParseBitcodeForLTO(cx, data.as_ptr(), data.len(), name.as_ptr()).ok_or_else(
878 let msg = "failed to parse bitcode for LTO module";
879 write::llvm_err(diag_handler, msg)