compiler/rustc_codegen_llvm/src/back/lto.rs

   1 use crate::back::write::{
   2     self, save_temp_bitcode, to_llvm_opt_settings, with_llvm_pmb, DiagnosticHandlers,
   3 };
   4 use crate::llvm::archive_ro::ArchiveRO;
   5 use crate::llvm::{self, build_string, False, True};
   6 use crate::{llvm_util, LlvmCodegenBackend, ModuleLlvm};
   7 use rustc_codegen_ssa::back::lto::{LtoModuleCodegen, SerializedModule, ThinModule, ThinShared};
   8 use rustc_codegen_ssa::back::symbol_export;
   9 use rustc_codegen_ssa::back::write::{
  10     CodegenContext, FatLTOInput, ModuleConfig, TargetMachineFactoryConfig,
  11 };
  12 use rustc_codegen_ssa::traits::*;
  13 use rustc_codegen_ssa::{looks_like_rust_object_file, ModuleCodegen, ModuleKind};
  14 use rustc_data_structures::fx::FxHashMap;
  15 use rustc_errors::{FatalError, Handler};
  16 use rustc_hir::def_id::LOCAL_CRATE;
  17 use rustc_middle::bug;
  18 use rustc_middle::dep_graph::WorkProduct;
  19 use rustc_middle::middle::exported_symbols::SymbolExportLevel;
  20 use rustc_session::cgu_reuse_tracker::CguReuse;
  21 use rustc_session::config::{self, CrateType, Lto};
  22 use tracing::{debug, info};
  23
  24 use std::ffi::{CStr, CString};
  25 use std::fs::File;
  26 use std::io;
  27 use std::iter;
  28 use std::path::Path;
  29 use std::ptr;
  30 use std::slice;
  31 use std::sync::Arc;
  32
  33 /// We keep track of the computed LTO cache keys from the previous
  34 /// session to determine which CGUs we can reuse.
  35 pub const THIN_LTO_KEYS_INCR_COMP_FILE_NAME: &str = "thin-lto-past-keys.bin";
  36
  37 pub fn crate_type_allows_lto(crate_type: CrateType) -> bool {
  38     match crate_type {
  39         CrateType::Executable | CrateType::Staticlib | CrateType::Cdylib => true,
  40         CrateType::Dylib | CrateType::Rlib | CrateType::ProcMacro => false,
  41     }
  42 }
  43
  44 fn prepare_lto(
  45     cgcx: &CodegenContext<LlvmCodegenBackend>,
  46     diag_handler: &Handler,
  47 ) -> Result<(Vec<CString>, Vec<(SerializedModule<ModuleBuffer>, CString)>), FatalError> {
  48     let export_threshold = match cgcx.lto {
  49         // We're just doing LTO for our one crate
  50         Lto::ThinLocal => SymbolExportLevel::Rust,
  51
  52         // We're doing LTO for the entire crate graph
  53         Lto::Fat | Lto::Thin => symbol_export::crates_export_threshold(&cgcx.crate_types),
  54
  55         Lto::No => panic!("didn't request LTO but we're doing LTO"),
  56     };
  57
  58     let symbol_filter = &|&(ref name, level): &(String, SymbolExportLevel)| {
  59         if level.is_below_threshold(export_threshold) {
  60             Some(CString::new(name.as_str()).unwrap())
  61         } else {
  62             None
  63         }
  64     };
  65     let exported_symbols = cgcx.exported_symbols.as_ref().expect("needs exported symbols for LTO");
  66     let mut symbols_below_threshold = {
  67         let _timer = cgcx.prof.generic_activity("LLVM_lto_generate_symbols_below_threshold");
  68         exported_symbols[&LOCAL_CRATE].iter().filter_map(symbol_filter).collect::<Vec<CString>>()
  69     };
  70     info!("{} symbols to preserve in this crate", symbols_below_threshold.len());
  71
  72     // If we're performing LTO for the entire crate graph, then for each of our
  73     // upstream dependencies, find the corresponding rlib and load the bitcode
  74     // from the archive.
  75     //
  76     // We save off all the bytecode and LLVM module ids for later processing
  77     // with either fat or thin LTO
  78     let mut upstream_modules = Vec::new();
  79     if cgcx.lto != Lto::ThinLocal {
  80         if cgcx.opts.cg.prefer_dynamic {
  81             diag_handler
  82                 .struct_err("cannot prefer dynamic linking when performing LTO")
  83                 .note(
  84                     "only 'staticlib', 'bin', and 'cdylib' outputs are \
  85                                supported with LTO",
  86                 )
  87                 .emit();
  88             return Err(FatalError);
  89         }
  90
  91         // Make sure we actually can run LTO
  92         for crate_type in cgcx.crate_types.iter() {
  93             if !crate_type_allows_lto(*crate_type) {
  94                 let e = diag_handler.fatal(
  95                     "lto can only be run for executables, cdylibs and \
  96                                             static library outputs",
  97                 );
  98                 return Err(e);
  99             }
 100         }
 101
 102         for &(cnum, ref path) in cgcx.each_linked_rlib_for_lto.iter() {
 103             let exported_symbols =
 104                 cgcx.exported_symbols.as_ref().expect("needs exported symbols for LTO");
 105             {
 106                 let _timer =
 107                     cgcx.prof.generic_activity("LLVM_lto_generate_symbols_below_threshold");
 108                 symbols_below_threshold
 109                     .extend(exported_symbols[&cnum].iter().filter_map(symbol_filter));
 110             }
 111
 112             let archive = ArchiveRO::open(path).expect("wanted an rlib");
 113             let obj_files = archive
 114                 .iter()
 115                 .filter_map(|child| child.ok().and_then(|c| c.name().map(|name| (name, c))))
 116                 .filter(|&(name, _)| looks_like_rust_object_file(name));
 117             for (name, child) in obj_files {
 118                 info!("adding bitcode from {}", name);
 119                 match get_bitcode_slice_from_object_data(child.data()) {
 120                     Ok(data) => {
 121                         let module = SerializedModule::FromRlib(data.to_vec());
 122                         upstream_modules.push((module, CString::new(name).unwrap()));
 123                     }
 124                     Err(msg) => return Err(diag_handler.fatal(&msg)),
 125                 }
 126             }
 127         }
 128     }
 129
 130     Ok((symbols_below_threshold, upstream_modules))
 131 }
 132
 133 fn get_bitcode_slice_from_object_data(obj: &[u8]) -> Result<&[u8], String> {
 134     let mut len = 0;
 135     let data =
 136         unsafe { llvm::LLVMRustGetBitcodeSliceFromObjectData(obj.as_ptr(), obj.len(), &mut len) };
 137     if !data.is_null() {
 138         assert!(len != 0);
 139         let bc = unsafe { slice::from_raw_parts(data, len) };
 140
 141         // `bc` must be a sub-slice of `obj`.
 142         assert!(obj.as_ptr() <= bc.as_ptr());
 143         assert!(bc[bc.len()..bc.len()].as_ptr() <= obj[obj.len()..obj.len()].as_ptr());
 144
 145         Ok(bc)
 146     } else {
 147         assert!(len == 0);
 148         let msg = llvm::last_error().unwrap_or_else(|| "unknown LLVM error".to_string());
 149         Err(format!("failed to get bitcode from object file for LTO ({})", msg))
 150     }
 151 }
 152
 153 /// Performs fat LTO by merging all modules into a single one and returning it
 154 /// for further optimization.
 155 pub(crate) fn run_fat(
 156     cgcx: &CodegenContext<LlvmCodegenBackend>,
 157     modules: Vec<FatLTOInput<LlvmCodegenBackend>>,
 158     cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
 159 ) -> Result<LtoModuleCodegen<LlvmCodegenBackend>, FatalError> {
 160     let diag_handler = cgcx.create_diag_handler();
 161     let (symbols_below_threshold, upstream_modules) = prepare_lto(cgcx, &diag_handler)?;
 162     let symbols_below_threshold =
 163         symbols_below_threshold.iter().map(|c| c.as_ptr()).collect::<Vec<_>>();
 164     fat_lto(
 165         cgcx,
 166         &diag_handler,
 167         modules,
 168         cached_modules,
 169         upstream_modules,
 170         &symbols_below_threshold,
 171     )
 172 }
 173
 174 /// Performs thin LTO by performing necessary global analysis and returning two
 175 /// lists, one of the modules that need optimization and another for modules that
 176 /// can simply be copied over from the incr. comp. cache.
 177 pub(crate) fn run_thin(
 178     cgcx: &CodegenContext<LlvmCodegenBackend>,
 179     modules: Vec<(String, ThinBuffer)>,
 180     cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
 181 ) -> Result<(Vec<LtoModuleCodegen<LlvmCodegenBackend>>, Vec<WorkProduct>), FatalError> {
 182     let diag_handler = cgcx.create_diag_handler();
 183     let (symbols_below_threshold, upstream_modules) = prepare_lto(cgcx, &diag_handler)?;
 184     let symbols_below_threshold =
 185         symbols_below_threshold.iter().map(|c| c.as_ptr()).collect::<Vec<_>>();
 186     if cgcx.opts.cg.linker_plugin_lto.enabled() {
 187         unreachable!(
 188             "We should never reach this case if the LTO step \
 189                       is deferred to the linker"
 190         );
 191     }
 192     thin_lto(
 193         cgcx,
 194         &diag_handler,
 195         modules,
 196         upstream_modules,
 197         cached_modules,
 198         &symbols_below_threshold,
 199     )
 200 }
 201
 202 pub(crate) fn prepare_thin(module: ModuleCodegen<ModuleLlvm>) -> (String, ThinBuffer) {
 203     let name = module.name.clone();
 204     let buffer = ThinBuffer::new(module.module_llvm.llmod());
 205     (name, buffer)
 206 }
 207
 208 fn fat_lto(
 209     cgcx: &CodegenContext<LlvmCodegenBackend>,
 210     diag_handler: &Handler,
 211     modules: Vec<FatLTOInput<LlvmCodegenBackend>>,
 212     cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
 213     mut serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>,
 214     symbols_below_threshold: &[*const libc::c_char],
 215 ) -> Result<LtoModuleCodegen<LlvmCodegenBackend>, FatalError> {
 216     let _timer = cgcx.prof.generic_activity("LLVM_fat_lto_build_monolithic_module");
 217     info!("going for a fat lto");
 218
 219     // Sort out all our lists of incoming modules into two lists.
 220     //
 221     // * `serialized_modules` (also and argument to this function) contains all
 222     //   modules that are serialized in-memory.
 223     // * `in_memory` contains modules which are already parsed and in-memory,
 224     //   such as from multi-CGU builds.
 225     //
 226     // All of `cached_modules` (cached from previous incremental builds) can
 227     // immediately go onto the `serialized_modules` modules list and then we can
 228     // split the `modules` array into these two lists.
 229     let mut in_memory = Vec::new();
 230     serialized_modules.extend(cached_modules.into_iter().map(|(buffer, wp)| {
 231         info!("pushing cached module {:?}", wp.cgu_name);
 232         (buffer, CString::new(wp.cgu_name).unwrap())
 233     }));
 234     for module in modules {
 235         match module {
 236             FatLTOInput::InMemory(m) => in_memory.push(m),
 237             FatLTOInput::Serialized { name, buffer } => {
 238                 info!("pushing serialized module {:?}", name);
 239                 let buffer = SerializedModule::Local(buffer);
 240                 serialized_modules.push((buffer, CString::new(name).unwrap()));
 241             }
 242         }
 243     }
 244
 245     // Find the "costliest" module and merge everything into that codegen unit.
 246     // All the other modules will be serialized and reparsed into the new
 247     // context, so this hopefully avoids serializing and parsing the largest
 248     // codegen unit.
 249     //
 250     // Additionally use a regular module as the base here to ensure that various
 251     // file copy operations in the backend work correctly. The only other kind
 252     // of module here should be an allocator one, and if your crate is smaller
 253     // than the allocator module then the size doesn't really matter anyway.
 254     let costliest_module = in_memory
 255         .iter()
 256         .enumerate()
 257         .filter(|&(_, module)| module.kind == ModuleKind::Regular)
 258         .map(|(i, module)| {
 259             let cost = unsafe { llvm::LLVMRustModuleCost(module.module_llvm.llmod()) };
 260             (cost, i)
 261         })
 262         .max();
 263
 264     // If we found a costliest module, we're good to go. Otherwise all our
 265     // inputs were serialized which could happen in the case, for example, that
 266     // all our inputs were incrementally reread from the cache and we're just
 267     // re-executing the LTO passes. If that's the case deserialize the first
 268     // module and create a linker with it.
 269     let module: ModuleCodegen<ModuleLlvm> = match costliest_module {
 270         Some((_cost, i)) => in_memory.remove(i),
 271         None => {
 272             assert!(!serialized_modules.is_empty(), "must have at least one serialized module");
 273             let (buffer, name) = serialized_modules.remove(0);
 274             info!("no in-memory regular modules to choose from, parsing {:?}", name);
 275             ModuleCodegen {
 276                 module_llvm: ModuleLlvm::parse(cgcx, &name, buffer.data(), diag_handler)?,
 277                 name: name.into_string().unwrap(),
 278                 kind: ModuleKind::Regular,
 279             }
 280         }
 281     };
 282     let mut serialized_bitcode = Vec::new();
 283     {
 284         let (llcx, llmod) = {
 285             let llvm = &module.module_llvm;
 286             (&llvm.llcx, llvm.llmod())
 287         };
 288         info!("using {:?} as a base module", module.name);
 289
 290         // The linking steps below may produce errors and diagnostics within LLVM
 291         // which we'd like to handle and print, so set up our diagnostic handlers
 292         // (which get unregistered when they go out of scope below).
 293         let _handler = DiagnosticHandlers::new(cgcx, diag_handler, llcx);
 294
 295         // For all other modules we codegened we'll need to link them into our own
 296         // bitcode. All modules were codegened in their own LLVM context, however,
 297         // and we want to move everything to the same LLVM context. Currently the
 298         // way we know of to do that is to serialize them to a string and them parse
 299         // them later. Not great but hey, that's why it's "fat" LTO, right?
 300         for module in in_memory {
 301             let buffer = ModuleBuffer::new(module.module_llvm.llmod());
 302             let llmod_id = CString::new(&module.name[..]).unwrap();
 303             serialized_modules.push((SerializedModule::Local(buffer), llmod_id));
 304         }
 305         // Sort the modules to ensure we produce deterministic results.
 306         serialized_modules.sort_by(|module1, module2| module1.1.cmp(&module2.1));
 307
 308         // For all serialized bitcode files we parse them and link them in as we did
 309         // above, this is all mostly handled in C++. Like above, though, we don't
 310         // know much about the memory management here so we err on the side of being
 311         // save and persist everything with the original module.
 312         let mut linker = Linker::new(llmod);
 313         for (bc_decoded, name) in serialized_modules {
 314             let _timer = cgcx
 315                 .prof
 316                 .generic_activity_with_arg("LLVM_fat_lto_link_module", format!("{:?}", name));
 317             info!("linking {:?}", name);
 318             let data = bc_decoded.data();
 319             linker.add(data).map_err(|()| {
 320                 let msg = format!("failed to load bitcode of module {:?}", name);
 321                 write::llvm_err(diag_handler, &msg)
 322             })?;
 323             serialized_bitcode.push(bc_decoded);
 324         }
 325         drop(linker);
 326         save_temp_bitcode(cgcx, &module, "lto.input");
 327
 328         // Fat LTO also suffers from the invalid DWARF issue similar to Thin LTO.
 329         // Here we rewrite all `DICompileUnit` pointers if there is only one `DICompileUnit`.
 330         // This only works around the problem when codegen-units = 1.
 331         // Refer to the comments in the `optimize_thin_module` function for more details.
 332         let mut cu1 = ptr::null_mut();
 333         let mut cu2 = ptr::null_mut();
 334         unsafe { llvm::LLVMRustLTOGetDICompileUnit(llmod, &mut cu1, &mut cu2) };
 335         if !cu2.is_null() {
 336             let _timer =
 337                 cgcx.prof.generic_activity_with_arg("LLVM_fat_lto_patch_debuginfo", &*module.name);
 338             unsafe { llvm::LLVMRustLTOPatchDICompileUnit(llmod, cu1) };
 339             save_temp_bitcode(cgcx, &module, "fat-lto-after-patch");
 340         }
 341
 342         // Internalize everything below threshold to help strip out more modules and such.
 343         unsafe {
 344             let ptr = symbols_below_threshold.as_ptr();
 345             llvm::LLVMRustRunRestrictionPass(
 346                 llmod,
 347                 ptr as *const *const libc::c_char,
 348                 symbols_below_threshold.len() as libc::size_t,
 349             );
 350             save_temp_bitcode(cgcx, &module, "lto.after-restriction");
 351         }
 352     }
 353
 354     Ok(LtoModuleCodegen::Fat { module: Some(module), _serialized_bitcode: serialized_bitcode })
 355 }
 356
 357 crate struct Linker<'a>(&'a mut llvm::Linker<'a>);
 358
 359 impl<'a> Linker<'a> {
 360     crate fn new(llmod: &'a llvm::Module) -> Self {
 361         unsafe { Linker(llvm::LLVMRustLinkerNew(llmod)) }
 362     }
 363
 364     crate fn add(&mut self, bytecode: &[u8]) -> Result<(), ()> {
 365         unsafe {
 366             if llvm::LLVMRustLinkerAdd(
 367                 self.0,
 368                 bytecode.as_ptr() as *const libc::c_char,
 369                 bytecode.len(),
 370             ) {
 371                 Ok(())
 372             } else {
 373                 Err(())
 374             }
 375         }
 376     }
 377 }
 378
 379 impl Drop for Linker<'_> {
 380     fn drop(&mut self) {
 381         unsafe {
 382             llvm::LLVMRustLinkerFree(&mut *(self.0 as *mut _));
 383         }
 384     }
 385 }
 386
 387 /// Prepare "thin" LTO to get run on these modules.
 388 ///
 389 /// The general structure of ThinLTO is quite different from the structure of
 390 /// "fat" LTO above. With "fat" LTO all LLVM modules in question are merged into
 391 /// one giant LLVM module, and then we run more optimization passes over this
 392 /// big module after internalizing most symbols. Thin LTO, on the other hand,
 393 /// avoid this large bottleneck through more targeted optimization.
 394 ///
 395 /// At a high level Thin LTO looks like:
 396 ///
 397 ///     1. Prepare a "summary" of each LLVM module in question which describes
 398 ///        the values inside, cost of the values, etc.
 399 ///     2. Merge the summaries of all modules in question into one "index"
 400 ///     3. Perform some global analysis on this index
 401 ///     4. For each module, use the index and analysis calculated previously to
 402 ///        perform local transformations on the module, for example inlining
 403 ///        small functions from other modules.
 404 ///     5. Run thin-specific optimization passes over each module, and then code
 405 ///        generate everything at the end.
 406 ///
 407 /// The summary for each module is intended to be quite cheap, and the global
 408 /// index is relatively quite cheap to create as well. As a result, the goal of
 409 /// ThinLTO is to reduce the bottleneck on LTO and enable LTO to be used in more
 410 /// situations. For example one cheap optimization is that we can parallelize
 411 /// all codegen modules, easily making use of all the cores on a machine.
 412 ///
 413 /// With all that in mind, the function here is designed at specifically just
 414 /// calculating the *index* for ThinLTO. This index will then be shared amongst
 415 /// all of the `LtoModuleCodegen` units returned below and destroyed once
 416 /// they all go out of scope.
 417 fn thin_lto(
 418     cgcx: &CodegenContext<LlvmCodegenBackend>,
 419     diag_handler: &Handler,
 420     modules: Vec<(String, ThinBuffer)>,
 421     serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>,
 422     cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
 423     symbols_below_threshold: &[*const libc::c_char],
 424 ) -> Result<(Vec<LtoModuleCodegen<LlvmCodegenBackend>>, Vec<WorkProduct>), FatalError> {
 425     let _timer = cgcx.prof.generic_activity("LLVM_thin_lto_global_analysis");
 426     unsafe {
 427         info!("going for that thin, thin LTO");
 428
 429         let green_modules: FxHashMap<_, _> =
 430             cached_modules.iter().map(|&(_, ref wp)| (wp.cgu_name.clone(), wp.clone())).collect();
 431
 432         let full_scope_len = modules.len() + serialized_modules.len() + cached_modules.len();
 433         let mut thin_buffers = Vec::with_capacity(modules.len());
 434         let mut module_names = Vec::with_capacity(full_scope_len);
 435         let mut thin_modules = Vec::with_capacity(full_scope_len);
 436
 437         for (i, (name, buffer)) in modules.into_iter().enumerate() {
 438             info!("local module: {} - {}", i, name);
 439             let cname = CString::new(name.clone()).unwrap();
 440             thin_modules.push(llvm::ThinLTOModule {
 441                 identifier: cname.as_ptr(),
 442                 data: buffer.data().as_ptr(),
 443                 len: buffer.data().len(),
 444             });
 445             thin_buffers.push(buffer);
 446             module_names.push(cname);
 447         }
 448
 449         // FIXME: All upstream crates are deserialized internally in the
 450         //        function below to extract their summary and modules. Note that
 451         //        unlike the loop above we *must* decode and/or read something
 452         //        here as these are all just serialized files on disk. An
 453         //        improvement, however, to make here would be to store the
 454         //        module summary separately from the actual module itself. Right
 455         //        now this is store in one large bitcode file, and the entire
 456         //        file is deflate-compressed. We could try to bypass some of the
 457         //        decompression by storing the index uncompressed and only
 458         //        lazily decompressing the bytecode if necessary.
 459         //
 460         //        Note that truly taking advantage of this optimization will
 461         //        likely be further down the road. We'd have to implement
 462         //        incremental ThinLTO first where we could actually avoid
 463         //        looking at upstream modules entirely sometimes (the contents,
 464         //        we must always unconditionally look at the index).
 465         let mut serialized = Vec::with_capacity(serialized_modules.len() + cached_modules.len());
 466
 467         let cached_modules =
 468             cached_modules.into_iter().map(|(sm, wp)| (sm, CString::new(wp.cgu_name).unwrap()));
 469
 470         for (module, name) in serialized_modules.into_iter().chain(cached_modules) {
 471             info!("upstream or cached module {:?}", name);
 472             thin_modules.push(llvm::ThinLTOModule {
 473                 identifier: name.as_ptr(),
 474                 data: module.data().as_ptr(),
 475                 len: module.data().len(),
 476             });
 477             serialized.push(module);
 478             module_names.push(name);
 479         }
 480
 481         // Sanity check
 482         assert_eq!(thin_modules.len(), module_names.len());
 483
 484         // Delegate to the C++ bindings to create some data here. Once this is a
 485         // tried-and-true interface we may wish to try to upstream some of this
 486         // to LLVM itself, right now we reimplement a lot of what they do
 487         // upstream...
 488         let data = llvm::LLVMRustCreateThinLTOData(
 489             thin_modules.as_ptr(),
 490             thin_modules.len() as u32,
 491             symbols_below_threshold.as_ptr(),
 492             symbols_below_threshold.len() as u32,
 493         )
 494         .ok_or_else(|| write::llvm_err(diag_handler, "failed to prepare thin LTO context"))?;
 495
 496         let data = ThinData(data);
 497
 498         info!("thin LTO data created");
 499
 500         let (key_map_path, prev_key_map, curr_key_map) = if let Some(ref incr_comp_session_dir) =
 501             cgcx.incr_comp_session_dir
 502         {
 503             let path = incr_comp_session_dir.join(THIN_LTO_KEYS_INCR_COMP_FILE_NAME);
 504             // If the previous file was deleted, or we get an IO error
 505             // reading the file, then we'll just use `None` as the
 506             // prev_key_map, which will force the code to be recompiled.
 507             let prev =
 508                 if path.exists() { ThinLTOKeysMap::load_from_file(&path).ok() } else { None };
 509             let curr = ThinLTOKeysMap::from_thin_lto_modules(&data, &thin_modules, &module_names);
 510             (Some(path), prev, curr)
 511         } else {
 512             // If we don't compile incrementally, we don't need to load the
 513             // import data from LLVM.
 514             assert!(green_modules.is_empty());
 515             let curr = ThinLTOKeysMap::default();
 516             (None, None, curr)
 517         };
 518         info!("thin LTO cache key map loaded");
 519         info!("prev_key_map: {:#?}", prev_key_map);
 520         info!("curr_key_map: {:#?}", curr_key_map);
 521
 522         // Throw our data in an `Arc` as we'll be sharing it across threads. We
 523         // also put all memory referenced by the C++ data (buffers, ids, etc)
 524         // into the arc as well. After this we'll create a thin module
 525         // codegen per module in this data.
 526         let shared = Arc::new(ThinShared {
 527             data,
 528             thin_buffers,
 529             serialized_modules: serialized,
 530             module_names,
 531         });
 532
 533         let mut copy_jobs = vec![];
 534         let mut opt_jobs = vec![];
 535
 536         info!("checking which modules can be-reused and which have to be re-optimized.");
 537         for (module_index, module_name) in shared.module_names.iter().enumerate() {
 538             let module_name = module_name_to_str(module_name);
 539             if let (Some(prev_key_map), true) =
 540                 (prev_key_map.as_ref(), green_modules.contains_key(module_name))
 541             {
 542                 assert!(cgcx.incr_comp_session_dir.is_some());
 543
 544                 // If a module exists in both the current and the previous session,
 545                 // and has the same LTO cache key in both sessions, then we can re-use it
 546                 if prev_key_map.keys.get(module_name) == curr_key_map.keys.get(module_name) {
 547                     let work_product = green_modules[module_name].clone();
 548                     copy_jobs.push(work_product);
 549                     info!(" - {}: re-used", module_name);
 550                     assert!(cgcx.incr_comp_session_dir.is_some());
 551                     cgcx.cgu_reuse_tracker.set_actual_reuse(module_name, CguReuse::PostLto);
 552                     continue;
 553                 }
 554             }
 555
 556             info!(" - {}: re-compiled", module_name);
 557             opt_jobs.push(LtoModuleCodegen::Thin(ThinModule {
 558                 shared: shared.clone(),
 559                 idx: module_index,
 560             }));
 561         }
 562
 563         // Save the current ThinLTO import information for the next compilation
 564         // session, overwriting the previous serialized data (if any).
 565         if let Some(path) = key_map_path {
 566             if let Err(err) = curr_key_map.save_to_file(&path) {
 567                 let msg = format!("Error while writing ThinLTO key data: {}", err);
 568                 return Err(write::llvm_err(diag_handler, &msg));
 569             }
 570         }
 571
 572         Ok((opt_jobs, copy_jobs))
 573     }
 574 }
 575
 576 pub(crate) fn run_pass_manager(
 577     cgcx: &CodegenContext<LlvmCodegenBackend>,
 578     diag_handler: &Handler,
 579     module: &ModuleCodegen<ModuleLlvm>,
 580     config: &ModuleConfig,
 581     thin: bool,
 582 ) -> Result<(), FatalError> {
 583     let _timer = cgcx.prof.extra_verbose_generic_activity("LLVM_lto_optimize", &*module.name);
 584
 585     // Now we have one massive module inside of llmod. Time to run the
 586     // LTO-specific optimization passes that LLVM provides.
 587     //
 588     // This code is based off the code found in llvm's LTO code generator:
 589     //      tools/lto/LTOCodeGenerator.cpp
 590     debug!("running the pass manager");
 591     unsafe {
 592         if llvm_util::should_use_new_llvm_pass_manager(
 593             &config.new_llvm_pass_manager,
 594             &cgcx.target_arch,
 595         ) {
 596             let opt_stage = if thin { llvm::OptStage::ThinLTO } else { llvm::OptStage::FatLTO };
 597             let opt_level = config.opt_level.unwrap_or(config::OptLevel::No);
 598             write::optimize_with_new_llvm_pass_manager(
 599                 cgcx,
 600                 diag_handler,
 601                 module,
 602                 config,
 603                 opt_level,
 604                 opt_stage,
 605             )?;
 606             debug!("lto done");
 607             return Ok(());
 608         }
 609
 610         let pm = llvm::LLVMCreatePassManager();
 611         llvm::LLVMAddAnalysisPasses(module.module_llvm.tm, pm);
 612
 613         if config.verify_llvm_ir {
 614             let pass = llvm::LLVMRustFindAndCreatePass("verify\0".as_ptr().cast());
 615             llvm::LLVMRustAddPass(pm, pass.unwrap());
 616         }
 617
 618         let opt_level = config
 619             .opt_level
 620             .map(|x| to_llvm_opt_settings(x).0)
 621             .unwrap_or(llvm::CodeGenOptLevel::None);
 622         with_llvm_pmb(module.module_llvm.llmod(), config, opt_level, false, &mut |b| {
 623             if thin {
 624                 llvm::LLVMRustPassManagerBuilderPopulateThinLTOPassManager(b, pm);
 625             } else {
 626                 llvm::LLVMPassManagerBuilderPopulateLTOPassManager(
 627                     b, pm, /* Internalize = */ False, /* RunInliner = */ True,
 628                 );
 629             }
 630         });
 631
 632         // We always generate bitcode through ThinLTOBuffers,
 633         // which do not support anonymous globals
 634         if config.bitcode_needed() {
 635             let pass = llvm::LLVMRustFindAndCreatePass("name-anon-globals\0".as_ptr().cast());
 636             llvm::LLVMRustAddPass(pm, pass.unwrap());
 637         }
 638
 639         if config.verify_llvm_ir {
 640             let pass = llvm::LLVMRustFindAndCreatePass("verify\0".as_ptr().cast());
 641             llvm::LLVMRustAddPass(pm, pass.unwrap());
 642         }
 643
 644         llvm::LLVMRunPassManager(pm, module.module_llvm.llmod());
 645
 646         llvm::LLVMDisposePassManager(pm);
 647     }
 648     debug!("lto done");
 649     Ok(())
 650 }
 651
 652 pub struct ModuleBuffer(&'static mut llvm::ModuleBuffer);
 653
 654 unsafe impl Send for ModuleBuffer {}
 655 unsafe impl Sync for ModuleBuffer {}
 656
 657 impl ModuleBuffer {
 658     pub fn new(m: &llvm::Module) -> ModuleBuffer {
 659         ModuleBuffer(unsafe { llvm::LLVMRustModuleBufferCreate(m) })
 660     }
 661 }
 662
 663 impl ModuleBufferMethods for ModuleBuffer {
 664     fn data(&self) -> &[u8] {
 665         unsafe {
 666             let ptr = llvm::LLVMRustModuleBufferPtr(self.0);
 667             let len = llvm::LLVMRustModuleBufferLen(self.0);
 668             slice::from_raw_parts(ptr, len)
 669         }
 670     }
 671 }
 672
 673 impl Drop for ModuleBuffer {
 674     fn drop(&mut self) {
 675         unsafe {
 676             llvm::LLVMRustModuleBufferFree(&mut *(self.0 as *mut _));
 677         }
 678     }
 679 }
 680
 681 pub struct ThinData(&'static mut llvm::ThinLTOData);
 682
 683 unsafe impl Send for ThinData {}
 684 unsafe impl Sync for ThinData {}
 685
 686 impl Drop for ThinData {
 687     fn drop(&mut self) {
 688         unsafe {
 689             llvm::LLVMRustFreeThinLTOData(&mut *(self.0 as *mut _));
 690         }
 691     }
 692 }
 693
 694 pub struct ThinBuffer(&'static mut llvm::ThinLTOBuffer);
 695
 696 unsafe impl Send for ThinBuffer {}
 697 unsafe impl Sync for ThinBuffer {}
 698
 699 impl ThinBuffer {
 700     pub fn new(m: &llvm::Module) -> ThinBuffer {
 701         unsafe {
 702             let buffer = llvm::LLVMRustThinLTOBufferCreate(m);
 703             ThinBuffer(buffer)
 704         }
 705     }
 706 }
 707
 708 impl ThinBufferMethods for ThinBuffer {
 709     fn data(&self) -> &[u8] {
 710         unsafe {
 711             let ptr = llvm::LLVMRustThinLTOBufferPtr(self.0) as *const _;
 712             let len = llvm::LLVMRustThinLTOBufferLen(self.0);
 713             slice::from_raw_parts(ptr, len)
 714         }
 715     }
 716 }
 717
 718 impl Drop for ThinBuffer {
 719     fn drop(&mut self) {
 720         unsafe {
 721             llvm::LLVMRustThinLTOBufferFree(&mut *(self.0 as *mut _));
 722         }
 723     }
 724 }
 725
 726 pub unsafe fn optimize_thin_module(
 727     thin_module: &mut ThinModule<LlvmCodegenBackend>,
 728     cgcx: &CodegenContext<LlvmCodegenBackend>,
 729 ) -> Result<ModuleCodegen<ModuleLlvm>, FatalError> {
 730     let diag_handler = cgcx.create_diag_handler();
 731
 732     let module_name = &thin_module.shared.module_names[thin_module.idx];
 733     let tm_factory_config = TargetMachineFactoryConfig::new(cgcx, module_name.to_str().unwrap());
 734     let tm =
 735         (cgcx.tm_factory)(tm_factory_config).map_err(|e| write::llvm_err(&diag_handler, &e))?;
 736
 737     // Right now the implementation we've got only works over serialized
 738     // modules, so we create a fresh new LLVM context and parse the module
 739     // into that context. One day, however, we may do this for upstream
 740     // crates but for locally codegened modules we may be able to reuse
 741     // that LLVM Context and Module.
 742     let llcx = llvm::LLVMRustContextCreate(cgcx.fewer_names);
 743     let llmod_raw = parse_module(llcx, module_name, thin_module.data(), &diag_handler)? as *const _;
 744     let module = ModuleCodegen {
 745         module_llvm: ModuleLlvm { llmod_raw, llcx, tm },
 746         name: thin_module.name().to_string(),
 747         kind: ModuleKind::Regular,
 748     };
 749     {
 750         let target = &*module.module_llvm.tm;
 751         let llmod = module.module_llvm.llmod();
 752         save_temp_bitcode(cgcx, &module, "thin-lto-input");
 753
 754         // Before we do much else find the "main" `DICompileUnit` that we'll be
 755         // using below. If we find more than one though then rustc has changed
 756         // in a way we're not ready for, so generate an ICE by returning
 757         // an error.
 758         let mut cu1 = ptr::null_mut();
 759         let mut cu2 = ptr::null_mut();
 760         llvm::LLVMRustLTOGetDICompileUnit(llmod, &mut cu1, &mut cu2);
 761         if !cu2.is_null() {
 762             let msg = "multiple source DICompileUnits found";
 763             return Err(write::llvm_err(&diag_handler, msg));
 764         }
 765
 766         // Up next comes the per-module local analyses that we do for Thin LTO.
 767         // Each of these functions is basically copied from the LLVM
 768         // implementation and then tailored to suit this implementation. Ideally
 769         // each of these would be supported by upstream LLVM but that's perhaps
 770         // a patch for another day!
 771         //
 772         // You can find some more comments about these functions in the LLVM
 773         // bindings we've got (currently `PassWrapper.cpp`)
 774         {
 775             let _timer =
 776                 cgcx.prof.generic_activity_with_arg("LLVM_thin_lto_rename", thin_module.name());
 777             if !llvm::LLVMRustPrepareThinLTORename(thin_module.shared.data.0, llmod, target) {
 778                 let msg = "failed to prepare thin LTO module";
 779                 return Err(write::llvm_err(&diag_handler, msg));
 780             }
 781             save_temp_bitcode(cgcx, &module, "thin-lto-after-rename");
 782         }
 783
 784         {
 785             let _timer = cgcx
 786                 .prof
 787                 .generic_activity_with_arg("LLVM_thin_lto_resolve_weak", thin_module.name());
 788             if !llvm::LLVMRustPrepareThinLTOResolveWeak(thin_module.shared.data.0, llmod) {
 789                 let msg = "failed to prepare thin LTO module";
 790                 return Err(write::llvm_err(&diag_handler, msg));
 791             }
 792             save_temp_bitcode(cgcx, &module, "thin-lto-after-resolve");
 793         }
 794
 795         {
 796             let _timer = cgcx
 797                 .prof
 798                 .generic_activity_with_arg("LLVM_thin_lto_internalize", thin_module.name());
 799             if !llvm::LLVMRustPrepareThinLTOInternalize(thin_module.shared.data.0, llmod) {
 800                 let msg = "failed to prepare thin LTO module";
 801                 return Err(write::llvm_err(&diag_handler, msg));
 802             }
 803             save_temp_bitcode(cgcx, &module, "thin-lto-after-internalize");
 804         }
 805
 806         {
 807             let _timer =
 808                 cgcx.prof.generic_activity_with_arg("LLVM_thin_lto_import", thin_module.name());
 809             if !llvm::LLVMRustPrepareThinLTOImport(thin_module.shared.data.0, llmod, target) {
 810                 let msg = "failed to prepare thin LTO module";
 811                 return Err(write::llvm_err(&diag_handler, msg));
 812             }
 813             save_temp_bitcode(cgcx, &module, "thin-lto-after-import");
 814         }
 815
 816         // Ok now this is a bit unfortunate. This is also something you won't
 817         // find upstream in LLVM's ThinLTO passes! This is a hack for now to
 818         // work around bugs in LLVM.
 819         //
 820         // First discovered in #45511 it was found that as part of ThinLTO
 821         // importing passes LLVM will import `DICompileUnit` metadata
 822         // information across modules. This means that we'll be working with one
 823         // LLVM module that has multiple `DICompileUnit` instances in it (a
 824         // bunch of `llvm.dbg.cu` members). Unfortunately there's a number of
 825         // bugs in LLVM's backend which generates invalid DWARF in a situation
 826         // like this:
 827         //
 828         //  https://bugs.llvm.org/show_bug.cgi?id=35212
 829         //  https://bugs.llvm.org/show_bug.cgi?id=35562
 830         //
 831         // While the first bug there is fixed the second ended up causing #46346
 832         // which was basically a resurgence of #45511 after LLVM's bug 35212 was
 833         // fixed.
 834         //
 835         // This function below is a huge hack around this problem. The function
 836         // below is defined in `PassWrapper.cpp` and will basically "merge"
 837         // all `DICompileUnit` instances in a module. Basically it'll take all
 838         // the objects, rewrite all pointers of `DISubprogram` to point to the
 839         // first `DICompileUnit`, and then delete all the other units.
 840         //
 841         // This is probably mangling to the debug info slightly (but hopefully
 842         // not too much) but for now at least gets LLVM to emit valid DWARF (or
 843         // so it appears). Hopefully we can remove this once upstream bugs are
 844         // fixed in LLVM.
 845         {
 846             let _timer = cgcx
 847                 .prof
 848                 .generic_activity_with_arg("LLVM_thin_lto_patch_debuginfo", thin_module.name());
 849             llvm::LLVMRustLTOPatchDICompileUnit(llmod, cu1);
 850             save_temp_bitcode(cgcx, &module, "thin-lto-after-patch");
 851         }
 852
 853         // Alright now that we've done everything related to the ThinLTO
 854         // analysis it's time to run some optimizations! Here we use the same
 855         // `run_pass_manager` as the "fat" LTO above except that we tell it to
 856         // populate a thin-specific pass manager, which presumably LLVM treats a
 857         // little differently.
 858         {
 859             info!("running thin lto passes over {}", module.name);
 860             let config = cgcx.config(module.kind);
 861             run_pass_manager(cgcx, &diag_handler, &module, config, true)?;
 862             save_temp_bitcode(cgcx, &module, "thin-lto-after-pm");
 863         }
 864     }
 865     Ok(module)
 866 }
 867
 868 /// Maps LLVM module identifiers to their corresponding LLVM LTO cache keys
 869 #[derive(Debug, Default)]
 870 pub struct ThinLTOKeysMap {
 871     // key = llvm name of importing module, value = LLVM cache key
 872     keys: FxHashMap<String, String>,
 873 }
 874
 875 impl ThinLTOKeysMap {
 876     fn save_to_file(&self, path: &Path) -> io::Result<()> {
 877         use std::io::Write;
 878         let file = File::create(path)?;
 879         let mut writer = io::BufWriter::new(file);
 880         for (module, key) in &self.keys {
 881             writeln!(writer, "{} {}", module, key)?;
 882         }
 883         Ok(())
 884     }
 885
 886     fn load_from_file(path: &Path) -> io::Result<Self> {
 887         use std::io::BufRead;
 888         let mut keys = FxHashMap::default();
 889         let file = File::open(path)?;
 890         for line in io::BufReader::new(file).lines() {
 891             let line = line?;
 892             let mut split = line.split(' ');
 893             let module = split.next().unwrap();
 894             let key = split.next().unwrap();
 895             assert_eq!(split.next(), None, "Expected two space-separated values, found {:?}", line);
 896             keys.insert(module.to_string(), key.to_string());
 897         }
 898         Ok(Self { keys })
 899     }
 900
 901     fn from_thin_lto_modules(
 902         data: &ThinData,
 903         modules: &[llvm::ThinLTOModule],
 904         names: &[CString],
 905     ) -> Self {
 906         let keys = iter::zip(modules, names)
 907             .map(|(module, name)| {
 908                 let key = build_string(|rust_str| unsafe {
 909                     llvm::LLVMRustComputeLTOCacheKey(rust_str, module.identifier, data.0);
 910                 })
 911                 .expect("Invalid ThinLTO module key");
 912                 (name.clone().into_string().unwrap(), key)
 913             })
 914             .collect();
 915         Self { keys }
 916     }
 917 }
 918
 919 fn module_name_to_str(c_str: &CStr) -> &str {
 920     c_str.to_str().unwrap_or_else(|e| {
 921         bug!("Encountered non-utf8 LLVM module name `{}`: {}", c_str.to_string_lossy(), e)
 922     })
 923 }
 924
 925 pub fn parse_module<'a>(
 926     cx: &'a llvm::Context,
 927     name: &CStr,
 928     data: &[u8],
 929     diag_handler: &Handler,
 930 ) -> Result<&'a llvm::Module, FatalError> {
 931     unsafe {
 932         llvm::LLVMRustParseBitcodeForLTO(cx, data.as_ptr(), data.len(), name.as_ptr()).ok_or_else(
 933             || {
 934                 let msg = "failed to parse bitcode for LTO module";
 935                 write::llvm_err(diag_handler, msg)
 936             },
 937         )
 938     }
 939 }