src/librustc_codegen_llvm/back/lto.rs

   1 use crate::back::bytecode::{DecodedBytecode, RLIB_BYTECODE_EXTENSION};
   2 use crate::back::write::{self, DiagnosticHandlers, with_llvm_pmb, save_temp_bitcode,
   3     to_llvm_opt_settings};
   4 use crate::llvm::archive_ro::ArchiveRO;
   5 use crate::llvm::{self, True, False};
   6 use crate::{ModuleLlvm, LlvmCodegenBackend};
   7 use rustc_codegen_ssa::back::symbol_export;
   8 use rustc_codegen_ssa::back::write::{ModuleConfig, CodegenContext, FatLTOInput};
   9 use rustc_codegen_ssa::back::lto::{SerializedModule, LtoModuleCodegen, ThinShared, ThinModule};
  10 use rustc_codegen_ssa::traits::*;
  11 use errors::{FatalError, Handler};
  12 use rustc::dep_graph::WorkProduct;
  13 use rustc::dep_graph::cgu_reuse_tracker::CguReuse;
  14 use rustc::hir::def_id::LOCAL_CRATE;
  15 use rustc::middle::exported_symbols::SymbolExportLevel;
  16 use rustc::session::config::{self, Lto};
  17 use rustc::util::common::time_ext;
  18 use rustc::util::profiling::ProfileCategory;
  19 use rustc_data_structures::fx::FxHashMap;
  20 use rustc_codegen_ssa::{ModuleCodegen, ModuleKind};
  21
  22 use std::ffi::{CStr, CString};
  23 use std::ptr;
  24 use std::slice;
  25 use std::sync::Arc;
  26
  27 pub fn crate_type_allows_lto(crate_type: config::CrateType) -> bool {
  28     match crate_type {
  29         config::CrateType::Executable |
  30         config::CrateType::Staticlib  |
  31         config::CrateType::Cdylib     => true,
  32
  33         config::CrateType::Dylib     |
  34         config::CrateType::Rlib      |
  35         config::CrateType::ProcMacro => false,
  36     }
  37 }
  38
  39 fn prepare_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
  40                diag_handler: &Handler)
  41     -> Result<(Vec<CString>, Vec<(SerializedModule<ModuleBuffer>, CString)>), FatalError>
  42 {
  43     let export_threshold = match cgcx.lto {
  44         // We're just doing LTO for our one crate
  45         Lto::ThinLocal => SymbolExportLevel::Rust,
  46
  47         // We're doing LTO for the entire crate graph
  48         Lto::Fat | Lto::Thin => {
  49             symbol_export::crates_export_threshold(&cgcx.crate_types)
  50         }
  51
  52         Lto::No => panic!("didn't request LTO but we're doing LTO"),
  53     };
  54
  55     let symbol_filter = &|&(ref name, level): &(String, SymbolExportLevel)| {
  56         if level.is_below_threshold(export_threshold) {
  57             let mut bytes = Vec::with_capacity(name.len() + 1);
  58             bytes.extend(name.bytes());
  59             Some(CString::new(bytes).unwrap())
  60         } else {
  61             None
  62         }
  63     };
  64     let exported_symbols = cgcx.exported_symbols
  65         .as_ref().expect("needs exported symbols for LTO");
  66     let mut symbol_white_list = exported_symbols[&LOCAL_CRATE]
  67         .iter()
  68         .filter_map(symbol_filter)
  69         .collect::<Vec<CString>>();
  70     let _timer = cgcx.profile_activity(ProfileCategory::Codegen,
  71                                        "generate_symbol_white_list_for_thinlto");
  72     info!("{} symbols to preserve in this crate", symbol_white_list.len());
  73
  74     // If we're performing LTO for the entire crate graph, then for each of our
  75     // upstream dependencies, find the corresponding rlib and load the bitcode
  76     // from the archive.
  77     //
  78     // We save off all the bytecode and LLVM module ids for later processing
  79     // with either fat or thin LTO
  80     let mut upstream_modules = Vec::new();
  81     if cgcx.lto != Lto::ThinLocal {
  82         if cgcx.opts.cg.prefer_dynamic {
  83             diag_handler.struct_err("cannot prefer dynamic linking when performing LTO")
  84                         .note("only 'staticlib', 'bin', and 'cdylib' outputs are \
  85                                supported with LTO")
  86                         .emit();
  87             return Err(FatalError)
  88         }
  89
  90         // Make sure we actually can run LTO
  91         for crate_type in cgcx.crate_types.iter() {
  92             if !crate_type_allows_lto(*crate_type) {
  93                 let e = diag_handler.fatal("lto can only be run for executables, cdylibs and \
  94                                             static library outputs");
  95                 return Err(e)
  96             }
  97         }
  98
  99         for &(cnum, ref path) in cgcx.each_linked_rlib_for_lto.iter() {
 100             let _timer = cgcx.profile_activity(ProfileCategory::Codegen,
 101                                                format!("load: {}", path.display()));
 102             let exported_symbols = cgcx.exported_symbols
 103                 .as_ref().expect("needs exported symbols for LTO");
 104             symbol_white_list.extend(
 105                 exported_symbols[&cnum]
 106                     .iter()
 107                     .filter_map(symbol_filter));
 108
 109             let archive = ArchiveRO::open(&path).expect("wanted an rlib");
 110             let bytecodes = archive.iter().filter_map(|child| {
 111                 child.ok().and_then(|c| c.name().map(|name| (name, c)))
 112             }).filter(|&(name, _)| name.ends_with(RLIB_BYTECODE_EXTENSION));
 113             for (name, data) in bytecodes {
 114                 info!("adding bytecode {}", name);
 115                 let bc_encoded = data.data();
 116
 117                 let (bc, id) = time_ext(cgcx.time_passes, None, &format!("decode {}", name), || {
 118                     match DecodedBytecode::new(bc_encoded) {
 119                         Ok(b) => Ok((b.bytecode(), b.identifier().to_string())),
 120                         Err(e) => Err(diag_handler.fatal(&e)),
 121                     }
 122                 })?;
 123                 let bc = SerializedModule::FromRlib(bc);
 124                 upstream_modules.push((bc, CString::new(id).unwrap()));
 125             }
 126         }
 127     }
 128
 129     Ok((symbol_white_list, upstream_modules))
 130 }
 131
 132 /// Performs fat LTO by merging all modules into a single one and returning it
 133 /// for further optimization.
 134 pub(crate) fn run_fat(cgcx: &CodegenContext<LlvmCodegenBackend>,
 135                       modules: Vec<FatLTOInput<LlvmCodegenBackend>>,
 136                       cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>)
 137     -> Result<LtoModuleCodegen<LlvmCodegenBackend>, FatalError>
 138 {
 139     let diag_handler = cgcx.create_diag_handler();
 140     let (symbol_white_list, upstream_modules) = prepare_lto(cgcx, &diag_handler)?;
 141     let symbol_white_list = symbol_white_list.iter()
 142                                              .map(|c| c.as_ptr())
 143                                              .collect::<Vec<_>>();
 144     fat_lto(
 145         cgcx,
 146         &diag_handler,
 147         modules,
 148         cached_modules,
 149         upstream_modules,
 150         &symbol_white_list,
 151     )
 152 }
 153
 154 /// Performs thin LTO by performing necessary global analysis and returning two
 155 /// lists, one of the modules that need optimization and another for modules that
 156 /// can simply be copied over from the incr. comp. cache.
 157 pub(crate) fn run_thin(cgcx: &CodegenContext<LlvmCodegenBackend>,
 158                        modules: Vec<(String, ThinBuffer)>,
 159                        cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>)
 160     -> Result<(Vec<LtoModuleCodegen<LlvmCodegenBackend>>, Vec<WorkProduct>), FatalError>
 161 {
 162     let diag_handler = cgcx.create_diag_handler();
 163     let (symbol_white_list, upstream_modules) = prepare_lto(cgcx, &diag_handler)?;
 164     let symbol_white_list = symbol_white_list.iter()
 165                                              .map(|c| c.as_ptr())
 166                                              .collect::<Vec<_>>();
 167     if cgcx.opts.cg.linker_plugin_lto.enabled() {
 168         unreachable!("We should never reach this case if the LTO step \
 169                       is deferred to the linker");
 170     }
 171     thin_lto(cgcx,
 172              &diag_handler,
 173              modules,
 174              upstream_modules,
 175              cached_modules,
 176              &symbol_white_list)
 177 }
 178
 179 pub(crate) fn prepare_thin(
 180     module: ModuleCodegen<ModuleLlvm>
 181 ) -> (String, ThinBuffer) {
 182     let name = module.name.clone();
 183     let buffer = ThinBuffer::new(module.module_llvm.llmod());
 184     (name, buffer)
 185 }
 186
 187 fn fat_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
 188            diag_handler: &Handler,
 189            mut modules: Vec<FatLTOInput<LlvmCodegenBackend>>,
 190            cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
 191            mut serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>,
 192            symbol_white_list: &[*const libc::c_char])
 193     -> Result<LtoModuleCodegen<LlvmCodegenBackend>, FatalError>
 194 {
 195     info!("going for a fat lto");
 196
 197     // Find the "costliest" module and merge everything into that codegen unit.
 198     // All the other modules will be serialized and reparsed into the new
 199     // context, so this hopefully avoids serializing and parsing the largest
 200     // codegen unit.
 201     //
 202     // Additionally use a regular module as the base here to ensure that various
 203     // file copy operations in the backend work correctly. The only other kind
 204     // of module here should be an allocator one, and if your crate is smaller
 205     // than the allocator module then the size doesn't really matter anyway.
 206     let costliest_module = modules.iter()
 207         .enumerate()
 208         .filter_map(|(i, module)| {
 209             match module {
 210                 FatLTOInput::InMemory(m) => Some((i, m)),
 211                 FatLTOInput::Serialized { .. } => None,
 212             }
 213         })
 214         .filter(|&(_, module)| module.kind == ModuleKind::Regular)
 215         .map(|(i, module)| {
 216             let cost = unsafe {
 217                 llvm::LLVMRustModuleCost(module.module_llvm.llmod())
 218             };
 219             (cost, i)
 220         })
 221         .max();
 222
 223     // If we found a costliest module, we're good to go. Otherwise all our
 224     // inputs were serialized which could happen in the case, for example, that
 225     // all our inputs were incrementally reread from the cache and we're just
 226     // re-executing the LTO passes. If that's the case deserialize the first
 227     // module and create a linker with it.
 228     let module: ModuleCodegen<ModuleLlvm> = match costliest_module {
 229         Some((_cost, i)) => {
 230             match modules.remove(i) {
 231                 FatLTOInput::InMemory(m) => m,
 232                 FatLTOInput::Serialized { .. } => unreachable!(),
 233             }
 234         }
 235         None => {
 236             let pos = modules.iter().position(|m| {
 237                 match m {
 238                     FatLTOInput::InMemory(_) => false,
 239                     FatLTOInput::Serialized { .. } => true,
 240                 }
 241             }).expect("must have at least one serialized module");
 242             let (name, buffer) = match modules.remove(pos) {
 243                 FatLTOInput::Serialized { name, buffer } => (name, buffer),
 244                 FatLTOInput::InMemory(_) => unreachable!(),
 245             };
 246             ModuleCodegen {
 247                 module_llvm: ModuleLlvm::parse(cgcx, &name, &buffer, diag_handler)?,
 248                 name,
 249                 kind: ModuleKind::Regular,
 250             }
 251         }
 252     };
 253     let mut serialized_bitcode = Vec::new();
 254     {
 255         let (llcx, llmod) = {
 256             let llvm = &module.module_llvm;
 257             (&llvm.llcx, llvm.llmod())
 258         };
 259         info!("using {:?} as a base module", module.name);
 260
 261         // The linking steps below may produce errors and diagnostics within LLVM
 262         // which we'd like to handle and print, so set up our diagnostic handlers
 263         // (which get unregistered when they go out of scope below).
 264         let _handler = DiagnosticHandlers::new(cgcx, diag_handler, llcx);
 265
 266         // For all other modules we codegened we'll need to link them into our own
 267         // bitcode. All modules were codegened in their own LLVM context, however,
 268         // and we want to move everything to the same LLVM context. Currently the
 269         // way we know of to do that is to serialize them to a string and them parse
 270         // them later. Not great but hey, that's why it's "fat" LTO, right?
 271         serialized_modules.extend(modules.into_iter().map(|module| {
 272             match module {
 273                 FatLTOInput::InMemory(module) => {
 274                     let buffer = ModuleBuffer::new(module.module_llvm.llmod());
 275                     let llmod_id = CString::new(&module.name[..]).unwrap();
 276                     (SerializedModule::Local(buffer), llmod_id)
 277                 }
 278                 FatLTOInput::Serialized { name, buffer } => {
 279                     let llmod_id = CString::new(name).unwrap();
 280                     (SerializedModule::Local(buffer), llmod_id)
 281                 }
 282             }
 283         }));
 284         serialized_modules.extend(cached_modules.into_iter().map(|(buffer, wp)| {
 285             (buffer, CString::new(wp.cgu_name.clone()).unwrap())
 286         }));
 287
 288         // For all serialized bitcode files we parse them and link them in as we did
 289         // above, this is all mostly handled in C++. Like above, though, we don't
 290         // know much about the memory management here so we err on the side of being
 291         // save and persist everything with the original module.
 292         let mut linker = Linker::new(llmod);
 293         for (bc_decoded, name) in serialized_modules {
 294             info!("linking {:?}", name);
 295             time_ext(cgcx.time_passes, None, &format!("ll link {:?}", name), || {
 296                 let data = bc_decoded.data();
 297                 linker.add(&data).map_err(|()| {
 298                     let msg = format!("failed to load bc of {:?}", name);
 299                     write::llvm_err(&diag_handler, &msg)
 300                 })
 301             })?;
 302             serialized_bitcode.push(bc_decoded);
 303         }
 304         drop(linker);
 305         save_temp_bitcode(&cgcx, &module, "lto.input");
 306
 307         // Internalize everything that *isn't* in our whitelist to help strip out
 308         // more modules and such
 309         unsafe {
 310             let ptr = symbol_white_list.as_ptr();
 311             llvm::LLVMRustRunRestrictionPass(llmod,
 312                                              ptr as *const *const libc::c_char,
 313                                              symbol_white_list.len() as libc::size_t);
 314             save_temp_bitcode(&cgcx, &module, "lto.after-restriction");
 315         }
 316
 317         if cgcx.no_landing_pads {
 318             unsafe {
 319                 llvm::LLVMRustMarkAllFunctionsNounwind(llmod);
 320             }
 321             save_temp_bitcode(&cgcx, &module, "lto.after-nounwind");
 322         }
 323     }
 324
 325     Ok(LtoModuleCodegen::Fat {
 326         module: Some(module),
 327         _serialized_bitcode: serialized_bitcode,
 328     })
 329 }
 330
 331 struct Linker<'a>(&'a mut llvm::Linker<'a>);
 332
 333 impl Linker<'a> {
 334     fn new(llmod: &'a llvm::Module) -> Self {
 335         unsafe { Linker(llvm::LLVMRustLinkerNew(llmod)) }
 336     }
 337
 338     fn add(&mut self, bytecode: &[u8]) -> Result<(), ()> {
 339         unsafe {
 340             if llvm::LLVMRustLinkerAdd(self.0,
 341                                        bytecode.as_ptr() as *const libc::c_char,
 342                                        bytecode.len()) {
 343                 Ok(())
 344             } else {
 345                 Err(())
 346             }
 347         }
 348     }
 349 }
 350
 351 impl Drop for Linker<'a> {
 352     fn drop(&mut self) {
 353         unsafe { llvm::LLVMRustLinkerFree(&mut *(self.0 as *mut _)); }
 354     }
 355 }
 356
 357 /// Prepare "thin" LTO to get run on these modules.
 358 ///
 359 /// The general structure of ThinLTO is quite different from the structure of
 360 /// "fat" LTO above. With "fat" LTO all LLVM modules in question are merged into
 361 /// one giant LLVM module, and then we run more optimization passes over this
 362 /// big module after internalizing most symbols. Thin LTO, on the other hand,
 363 /// avoid this large bottleneck through more targeted optimization.
 364 ///
 365 /// At a high level Thin LTO looks like:
 366 ///
 367 ///     1. Prepare a "summary" of each LLVM module in question which describes
 368 ///        the values inside, cost of the values, etc.
 369 ///     2. Merge the summaries of all modules in question into one "index"
 370 ///     3. Perform some global analysis on this index
 371 ///     4. For each module, use the index and analysis calculated previously to
 372 ///        perform local transformations on the module, for example inlining
 373 ///        small functions from other modules.
 374 ///     5. Run thin-specific optimization passes over each module, and then code
 375 ///        generate everything at the end.
 376 ///
 377 /// The summary for each module is intended to be quite cheap, and the global
 378 /// index is relatively quite cheap to create as well. As a result, the goal of
 379 /// ThinLTO is to reduce the bottleneck on LTO and enable LTO to be used in more
 380 /// situations. For example one cheap optimization is that we can parallelize
 381 /// all codegen modules, easily making use of all the cores on a machine.
 382 ///
 383 /// With all that in mind, the function here is designed at specifically just
 384 /// calculating the *index* for ThinLTO. This index will then be shared amongst
 385 /// all of the `LtoModuleCodegen` units returned below and destroyed once
 386 /// they all go out of scope.
 387 fn thin_lto(cgcx: &CodegenContext<LlvmCodegenBackend>,
 388             diag_handler: &Handler,
 389             modules: Vec<(String, ThinBuffer)>,
 390             serialized_modules: Vec<(SerializedModule<ModuleBuffer>, CString)>,
 391             cached_modules: Vec<(SerializedModule<ModuleBuffer>, WorkProduct)>,
 392             symbol_white_list: &[*const libc::c_char])
 393     -> Result<(Vec<LtoModuleCodegen<LlvmCodegenBackend>>, Vec<WorkProduct>), FatalError>
 394 {
 395     unsafe {
 396         info!("going for that thin, thin LTO");
 397
 398         let green_modules: FxHashMap<_, _> = cached_modules
 399             .iter()
 400             .map(|&(_, ref wp)| (wp.cgu_name.clone(), wp.clone()))
 401             .collect();
 402
 403         let full_scope_len = modules.len() + serialized_modules.len() + cached_modules.len();
 404         let mut thin_buffers = Vec::with_capacity(modules.len());
 405         let mut module_names = Vec::with_capacity(full_scope_len);
 406         let mut thin_modules = Vec::with_capacity(full_scope_len);
 407
 408         for (i, (name, buffer)) in modules.into_iter().enumerate() {
 409             info!("local module: {} - {}", i, name);
 410             let cname = CString::new(name.clone()).unwrap();
 411             thin_modules.push(llvm::ThinLTOModule {
 412                 identifier: cname.as_ptr(),
 413                 data: buffer.data().as_ptr(),
 414                 len: buffer.data().len(),
 415             });
 416             thin_buffers.push(buffer);
 417             module_names.push(cname);
 418         }
 419
 420         // FIXME: All upstream crates are deserialized internally in the
 421         //        function below to extract their summary and modules. Note that
 422         //        unlike the loop above we *must* decode and/or read something
 423         //        here as these are all just serialized files on disk. An
 424         //        improvement, however, to make here would be to store the
 425         //        module summary separately from the actual module itself. Right
 426         //        now this is store in one large bitcode file, and the entire
 427         //        file is deflate-compressed. We could try to bypass some of the
 428         //        decompression by storing the index uncompressed and only
 429         //        lazily decompressing the bytecode if necessary.
 430         //
 431         //        Note that truly taking advantage of this optimization will
 432         //        likely be further down the road. We'd have to implement
 433         //        incremental ThinLTO first where we could actually avoid
 434         //        looking at upstream modules entirely sometimes (the contents,
 435         //        we must always unconditionally look at the index).
 436         let mut serialized = Vec::with_capacity(serialized_modules.len() + cached_modules.len());
 437
 438         let cached_modules = cached_modules.into_iter().map(|(sm, wp)| {
 439             (sm, CString::new(wp.cgu_name).unwrap())
 440         });
 441
 442         for (module, name) in serialized_modules.into_iter().chain(cached_modules) {
 443             info!("upstream or cached module {:?}", name);
 444             thin_modules.push(llvm::ThinLTOModule {
 445                 identifier: name.as_ptr(),
 446                 data: module.data().as_ptr(),
 447                 len: module.data().len(),
 448             });
 449             serialized.push(module);
 450             module_names.push(name);
 451         }
 452
 453         // Sanity check
 454         assert_eq!(thin_modules.len(), module_names.len());
 455
 456         // Delegate to the C++ bindings to create some data here. Once this is a
 457         // tried-and-true interface we may wish to try to upstream some of this
 458         // to LLVM itself, right now we reimplement a lot of what they do
 459         // upstream...
 460         let data = llvm::LLVMRustCreateThinLTOData(
 461             thin_modules.as_ptr(),
 462             thin_modules.len() as u32,
 463             symbol_white_list.as_ptr(),
 464             symbol_white_list.len() as u32,
 465         ).ok_or_else(|| {
 466             write::llvm_err(&diag_handler, "failed to prepare thin LTO context")
 467         })?;
 468
 469         info!("thin LTO data created");
 470
 471         let import_map = if cgcx.incr_comp_session_dir.is_some() {
 472             ThinLTOImports::from_thin_lto_data(data)
 473         } else {
 474             // If we don't compile incrementally, we don't need to load the
 475             // import data from LLVM.
 476             assert!(green_modules.is_empty());
 477             ThinLTOImports::default()
 478         };
 479         info!("thin LTO import map loaded");
 480
 481         let data = ThinData(data);
 482
 483         // Throw our data in an `Arc` as we'll be sharing it across threads. We
 484         // also put all memory referenced by the C++ data (buffers, ids, etc)
 485         // into the arc as well. After this we'll create a thin module
 486         // codegen per module in this data.
 487         let shared = Arc::new(ThinShared {
 488             data,
 489             thin_buffers,
 490             serialized_modules: serialized,
 491             module_names,
 492         });
 493
 494         let mut copy_jobs = vec![];
 495         let mut opt_jobs = vec![];
 496
 497         info!("checking which modules can be-reused and which have to be re-optimized.");
 498         for (module_index, module_name) in shared.module_names.iter().enumerate() {
 499             let module_name = module_name_to_str(module_name);
 500
 501             // If the module hasn't changed and none of the modules it imports
 502             // from has changed, we can re-use the post-ThinLTO version of the
 503             // module.
 504             if green_modules.contains_key(module_name) {
 505                 let imports_all_green = import_map.modules_imported_by(module_name)
 506                     .iter()
 507                     .all(|imported_module| green_modules.contains_key(imported_module));
 508
 509                 if imports_all_green {
 510                     let work_product = green_modules[module_name].clone();
 511                     copy_jobs.push(work_product);
 512                     info!(" - {}: re-used", module_name);
 513                     cgcx.cgu_reuse_tracker.set_actual_reuse(module_name,
 514                                                             CguReuse::PostLto);
 515                     continue
 516                 }
 517             }
 518
 519             info!(" - {}: re-compiled", module_name);
 520             opt_jobs.push(LtoModuleCodegen::Thin(ThinModule {
 521                 shared: shared.clone(),
 522                 idx: module_index,
 523             }));
 524         }
 525
 526         Ok((opt_jobs, copy_jobs))
 527     }
 528 }
 529
 530 pub(crate) fn run_pass_manager(cgcx: &CodegenContext<LlvmCodegenBackend>,
 531                     module: &ModuleCodegen<ModuleLlvm>,
 532                     config: &ModuleConfig,
 533                     thin: bool) {
 534     // Now we have one massive module inside of llmod. Time to run the
 535     // LTO-specific optimization passes that LLVM provides.
 536     //
 537     // This code is based off the code found in llvm's LTO code generator:
 538     //      tools/lto/LTOCodeGenerator.cpp
 539     debug!("running the pass manager");
 540     unsafe {
 541         let pm = llvm::LLVMCreatePassManager();
 542         llvm::LLVMRustAddAnalysisPasses(module.module_llvm.tm, pm, module.module_llvm.llmod());
 543
 544         if config.verify_llvm_ir {
 545             let pass = llvm::LLVMRustFindAndCreatePass("verify\0".as_ptr() as *const _);
 546             llvm::LLVMRustAddPass(pm, pass.unwrap());
 547         }
 548
 549         // When optimizing for LTO we don't actually pass in `-O0`, but we force
 550         // it to always happen at least with `-O1`.
 551         //
 552         // With ThinLTO we mess around a lot with symbol visibility in a way
 553         // that will actually cause linking failures if we optimize at O0 which
 554         // notable is lacking in dead code elimination. To ensure we at least
 555         // get some optimizations and correctly link we forcibly switch to `-O1`
 556         // to get dead code elimination.
 557         //
 558         // Note that in general this shouldn't matter too much as you typically
 559         // only turn on ThinLTO when you're compiling with optimizations
 560         // otherwise.
 561         let opt_level = config.opt_level.map(|x| to_llvm_opt_settings(x).0)
 562             .unwrap_or(llvm::CodeGenOptLevel::None);
 563         let opt_level = match opt_level {
 564             llvm::CodeGenOptLevel::None => llvm::CodeGenOptLevel::Less,
 565             level => level,
 566         };
 567         with_llvm_pmb(module.module_llvm.llmod(), config, opt_level, false, &mut |b| {
 568             if thin {
 569                 llvm::LLVMRustPassManagerBuilderPopulateThinLTOPassManager(b, pm);
 570             } else {
 571                 llvm::LLVMPassManagerBuilderPopulateLTOPassManager(b, pm,
 572                     /* Internalize = */ False,
 573                     /* RunInliner = */ True);
 574             }
 575         });
 576
 577         // We always generate bitcode through ThinLTOBuffers,
 578         // which do not support anonymous globals
 579         if config.bitcode_needed() {
 580             let pass = llvm::LLVMRustFindAndCreatePass("name-anon-globals\0".as_ptr() as *const _);
 581             llvm::LLVMRustAddPass(pm, pass.unwrap());
 582         }
 583
 584         if config.verify_llvm_ir {
 585             let pass = llvm::LLVMRustFindAndCreatePass("verify\0".as_ptr() as *const _);
 586             llvm::LLVMRustAddPass(pm, pass.unwrap());
 587         }
 588
 589         time_ext(cgcx.time_passes, None, "LTO passes", ||
 590              llvm::LLVMRunPassManager(pm, module.module_llvm.llmod()));
 591
 592         llvm::LLVMDisposePassManager(pm);
 593     }
 594     debug!("lto done");
 595 }
 596
 597 pub struct ModuleBuffer(&'static mut llvm::ModuleBuffer);
 598
 599 unsafe impl Send for ModuleBuffer {}
 600 unsafe impl Sync for ModuleBuffer {}
 601
 602 impl ModuleBuffer {
 603     pub fn new(m: &llvm::Module) -> ModuleBuffer {
 604         ModuleBuffer(unsafe {
 605             llvm::LLVMRustModuleBufferCreate(m)
 606         })
 607     }
 608
 609     pub fn parse<'a>(
 610         &self,
 611         name: &str,
 612         cx: &'a llvm::Context,
 613         handler: &Handler,
 614     ) -> Result<&'a llvm::Module, FatalError> {
 615         let name = CString::new(name).unwrap();
 616         parse_module(cx, &name, self.data(), handler)
 617     }
 618 }
 619
 620 impl ModuleBufferMethods for ModuleBuffer {
 621     fn data(&self) -> &[u8] {
 622         unsafe {
 623             let ptr = llvm::LLVMRustModuleBufferPtr(self.0);
 624             let len = llvm::LLVMRustModuleBufferLen(self.0);
 625             slice::from_raw_parts(ptr, len)
 626         }
 627     }
 628 }
 629
 630 impl Drop for ModuleBuffer {
 631     fn drop(&mut self) {
 632         unsafe { llvm::LLVMRustModuleBufferFree(&mut *(self.0 as *mut _)); }
 633     }
 634 }
 635
 636 pub struct ThinData(&'static mut llvm::ThinLTOData);
 637
 638 unsafe impl Send for ThinData {}
 639 unsafe impl Sync for ThinData {}
 640
 641 impl Drop for ThinData {
 642     fn drop(&mut self) {
 643         unsafe {
 644             llvm::LLVMRustFreeThinLTOData(&mut *(self.0 as *mut _));
 645         }
 646     }
 647 }
 648
 649 pub struct ThinBuffer(&'static mut llvm::ThinLTOBuffer);
 650
 651 unsafe impl Send for ThinBuffer {}
 652 unsafe impl Sync for ThinBuffer {}
 653
 654 impl ThinBuffer {
 655     pub fn new(m: &llvm::Module) -> ThinBuffer {
 656         unsafe {
 657             let buffer = llvm::LLVMRustThinLTOBufferCreate(m);
 658             ThinBuffer(buffer)
 659         }
 660     }
 661 }
 662
 663 impl ThinBufferMethods for ThinBuffer {
 664     fn data(&self) -> &[u8] {
 665         unsafe {
 666             let ptr = llvm::LLVMRustThinLTOBufferPtr(self.0) as *const _;
 667             let len = llvm::LLVMRustThinLTOBufferLen(self.0);
 668             slice::from_raw_parts(ptr, len)
 669         }
 670     }
 671 }
 672
 673 impl Drop for ThinBuffer {
 674     fn drop(&mut self) {
 675         unsafe {
 676             llvm::LLVMRustThinLTOBufferFree(&mut *(self.0 as *mut _));
 677         }
 678     }
 679 }
 680
 681 pub unsafe fn optimize_thin_module(
 682     thin_module: &mut ThinModule<LlvmCodegenBackend>,
 683     cgcx: &CodegenContext<LlvmCodegenBackend>,
 684 ) -> Result<ModuleCodegen<ModuleLlvm>, FatalError> {
 685     let diag_handler = cgcx.create_diag_handler();
 686     let tm = (cgcx.tm_factory.0)().map_err(|e| {
 687         write::llvm_err(&diag_handler, &e)
 688     })?;
 689
 690     // Right now the implementation we've got only works over serialized
 691     // modules, so we create a fresh new LLVM context and parse the module
 692     // into that context. One day, however, we may do this for upstream
 693     // crates but for locally codegened modules we may be able to reuse
 694     // that LLVM Context and Module.
 695     let llcx = llvm::LLVMRustContextCreate(cgcx.fewer_names);
 696     let llmod_raw = parse_module(
 697         llcx,
 698         &thin_module.shared.module_names[thin_module.idx],
 699         thin_module.data(),
 700         &diag_handler,
 701     )? as *const _;
 702     let module = ModuleCodegen {
 703         module_llvm: ModuleLlvm {
 704             llmod_raw,
 705             llcx,
 706             tm,
 707         },
 708         name: thin_module.name().to_string(),
 709         kind: ModuleKind::Regular,
 710     };
 711     {
 712         let llmod = module.module_llvm.llmod();
 713         save_temp_bitcode(&cgcx, &module, "thin-lto-input");
 714
 715         // Before we do much else find the "main" `DICompileUnit` that we'll be
 716         // using below. If we find more than one though then rustc has changed
 717         // in a way we're not ready for, so generate an ICE by returning
 718         // an error.
 719         let mut cu1 = ptr::null_mut();
 720         let mut cu2 = ptr::null_mut();
 721         llvm::LLVMRustThinLTOGetDICompileUnit(llmod, &mut cu1, &mut cu2);
 722         if !cu2.is_null() {
 723             let msg = "multiple source DICompileUnits found";
 724             return Err(write::llvm_err(&diag_handler, msg))
 725         }
 726
 727         // Like with "fat" LTO, get some better optimizations if landing pads
 728         // are disabled by removing all landing pads.
 729         if cgcx.no_landing_pads {
 730             let _timer = cgcx.profile_activity(ProfileCategory::Codegen,
 731                                                "LLVM_remove_landing_pads");
 732             llvm::LLVMRustMarkAllFunctionsNounwind(llmod);
 733             save_temp_bitcode(&cgcx, &module, "thin-lto-after-nounwind");
 734         }
 735
 736         // Up next comes the per-module local analyses that we do for Thin LTO.
 737         // Each of these functions is basically copied from the LLVM
 738         // implementation and then tailored to suit this implementation. Ideally
 739         // each of these would be supported by upstream LLVM but that's perhaps
 740         // a patch for another day!
 741         //
 742         // You can find some more comments about these functions in the LLVM
 743         // bindings we've got (currently `PassWrapper.cpp`)
 744         if !llvm::LLVMRustPrepareThinLTORename(thin_module.shared.data.0, llmod) {
 745             let msg = "failed to prepare thin LTO module";
 746             return Err(write::llvm_err(&diag_handler, msg))
 747         }
 748         save_temp_bitcode(cgcx, &module, "thin-lto-after-rename");
 749         if !llvm::LLVMRustPrepareThinLTOResolveWeak(thin_module.shared.data.0, llmod) {
 750             let msg = "failed to prepare thin LTO module";
 751             return Err(write::llvm_err(&diag_handler, msg))
 752         }
 753         save_temp_bitcode(cgcx, &module, "thin-lto-after-resolve");
 754         if !llvm::LLVMRustPrepareThinLTOInternalize(thin_module.shared.data.0, llmod) {
 755             let msg = "failed to prepare thin LTO module";
 756             return Err(write::llvm_err(&diag_handler, msg))
 757         }
 758         save_temp_bitcode(cgcx, &module, "thin-lto-after-internalize");
 759         if !llvm::LLVMRustPrepareThinLTOImport(thin_module.shared.data.0, llmod) {
 760             let msg = "failed to prepare thin LTO module";
 761             return Err(write::llvm_err(&diag_handler, msg))
 762         }
 763         save_temp_bitcode(cgcx, &module, "thin-lto-after-import");
 764
 765         // Ok now this is a bit unfortunate. This is also something you won't
 766         // find upstream in LLVM's ThinLTO passes! This is a hack for now to
 767         // work around bugs in LLVM.
 768         //
 769         // First discovered in #45511 it was found that as part of ThinLTO
 770         // importing passes LLVM will import `DICompileUnit` metadata
 771         // information across modules. This means that we'll be working with one
 772         // LLVM module that has multiple `DICompileUnit` instances in it (a
 773         // bunch of `llvm.dbg.cu` members). Unfortunately there's a number of
 774         // bugs in LLVM's backend which generates invalid DWARF in a situation
 775         // like this:
 776         //
 777         //  https://bugs.llvm.org/show_bug.cgi?id=35212
 778         //  https://bugs.llvm.org/show_bug.cgi?id=35562
 779         //
 780         // While the first bug there is fixed the second ended up causing #46346
 781         // which was basically a resurgence of #45511 after LLVM's bug 35212 was
 782         // fixed.
 783         //
 784         // This function below is a huge hack around this problem. The function
 785         // below is defined in `PassWrapper.cpp` and will basically "merge"
 786         // all `DICompileUnit` instances in a module. Basically it'll take all
 787         // the objects, rewrite all pointers of `DISubprogram` to point to the
 788         // first `DICompileUnit`, and then delete all the other units.
 789         //
 790         // This is probably mangling to the debug info slightly (but hopefully
 791         // not too much) but for now at least gets LLVM to emit valid DWARF (or
 792         // so it appears). Hopefully we can remove this once upstream bugs are
 793         // fixed in LLVM.
 794         llvm::LLVMRustThinLTOPatchDICompileUnit(llmod, cu1);
 795         save_temp_bitcode(cgcx, &module, "thin-lto-after-patch");
 796
 797         // Alright now that we've done everything related to the ThinLTO
 798         // analysis it's time to run some optimizations! Here we use the same
 799         // `run_pass_manager` as the "fat" LTO above except that we tell it to
 800         // populate a thin-specific pass manager, which presumably LLVM treats a
 801         // little differently.
 802         info!("running thin lto passes over {}", module.name);
 803         let config = cgcx.config(module.kind);
 804         run_pass_manager(cgcx, &module, config, true);
 805         save_temp_bitcode(cgcx, &module, "thin-lto-after-pm");
 806     }
 807     Ok(module)
 808 }
 809
 810 #[derive(Debug, Default)]
 811 pub struct ThinLTOImports {
 812     // key = llvm name of importing module, value = list of modules it imports from
 813     imports: FxHashMap<String, Vec<String>>,
 814 }
 815
 816 impl ThinLTOImports {
 817     fn modules_imported_by(&self, llvm_module_name: &str) -> &[String] {
 818         self.imports.get(llvm_module_name).map(|v| &v[..]).unwrap_or(&[])
 819     }
 820
 821     /// Loads the ThinLTO import map from ThinLTOData.
 822     unsafe fn from_thin_lto_data(data: *const llvm::ThinLTOData) -> ThinLTOImports {
 823         unsafe extern "C" fn imported_module_callback(payload: *mut libc::c_void,
 824                                                       importing_module_name: *const libc::c_char,
 825                                                       imported_module_name: *const libc::c_char) {
 826             let map = &mut* (payload as *mut ThinLTOImports);
 827             let importing_module_name = CStr::from_ptr(importing_module_name);
 828             let importing_module_name = module_name_to_str(&importing_module_name);
 829             let imported_module_name = CStr::from_ptr(imported_module_name);
 830             let imported_module_name = module_name_to_str(&imported_module_name);
 831
 832             if !map.imports.contains_key(importing_module_name) {
 833                 map.imports.insert(importing_module_name.to_owned(), vec![]);
 834             }
 835
 836             map.imports
 837                .get_mut(importing_module_name)
 838                .unwrap()
 839                .push(imported_module_name.to_owned());
 840         }
 841         let mut map = ThinLTOImports::default();
 842         llvm::LLVMRustGetThinLTOModuleImports(data,
 843                                               imported_module_callback,
 844                                               &mut map as *mut _ as *mut libc::c_void);
 845         map
 846     }
 847 }
 848
 849 fn module_name_to_str(c_str: &CStr) -> &str {
 850     c_str.to_str().unwrap_or_else(|e|
 851         bug!("Encountered non-utf8 LLVM module name `{}`: {}", c_str.to_string_lossy(), e))
 852 }
 853
 854 fn parse_module<'a>(
 855     cx: &'a llvm::Context,
 856     name: &CStr,
 857     data: &[u8],
 858     diag_handler: &Handler,
 859 ) -> Result<&'a llvm::Module, FatalError> {
 860     unsafe {
 861         llvm::LLVMRustParseBitcodeForLTO(
 862             cx,
 863             data.as_ptr(),
 864             data.len(),
 865             name.as_ptr(),
 866         ).ok_or_else(|| {
 867             let msg = "failed to parse bitcode for LTO module";
 868             write::llvm_err(&diag_handler, msg)
 869         })
 870     }
 871 }