1 // Copyright 2016 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! Inlining pass for MIR functions
14 use rustc::hir::CodegenFnAttrFlags;
15 use rustc::hir::def_id::DefId;
17 use rustc_data_structures::bit_set::BitSet;
18 use rustc_data_structures::indexed_vec::{Idx, IndexVec};
21 use rustc::mir::visit::*;
22 use rustc::ty::{self, Instance, InstanceDef, ParamEnv, Ty, TyCtxt};
23 use rustc::ty::subst::{Subst,Substs};
25 use std::collections::VecDeque;
27 use transform::{MirPass, MirSource};
28 use super::simplify::{remove_dead_blocks, CfgSimplifier};
31 use rustc_target::spec::abi::Abi;
33 const DEFAULT_THRESHOLD: usize = 50;
34 const HINT_THRESHOLD: usize = 100;
36 const INSTR_COST: usize = 5;
37 const CALL_PENALTY: usize = 25;
39 const UNKNOWN_SIZE_COST: usize = 10;
43 #[derive(Copy, Clone, Debug)]
44 struct CallSite<'tcx> {
46 substs: &'tcx Substs<'tcx>,
51 impl MirPass for Inline {
52 fn run_pass<'a, 'tcx>(&self,
53 tcx: TyCtxt<'a, 'tcx, 'tcx>,
55 mir: &mut Mir<'tcx>) {
56 if tcx.sess.opts.debugging_opts.mir_opt_level >= 2 {
57 Inliner { tcx, source }.run_pass(mir);
62 struct Inliner<'a, 'tcx: 'a> {
63 tcx: TyCtxt<'a, 'tcx, 'tcx>,
67 impl<'a, 'tcx> Inliner<'a, 'tcx> {
68 fn run_pass(&self, caller_mir: &mut Mir<'tcx>) {
69 // Keep a queue of callsites to try inlining on. We take
70 // advantage of the fact that queries detect cycles here to
71 // allow us to try and fetch the fully optimized MIR of a
72 // call; if it succeeds, we can inline it and we know that
73 // they do not call us. Otherwise, we just don't try to
76 // We use a queue so that we inline "broadly" before we inline
77 // in depth. It is unclear if this is the best heuristic,
78 // really, but that's true of all the heuristics in this
81 let mut callsites = VecDeque::new();
83 let param_env = self.tcx.param_env(self.source.def_id);
85 // Only do inlining into fn bodies.
86 let id = self.tcx.hir().as_local_node_id(self.source.def_id).unwrap();
87 let body_owner_kind = self.tcx.hir().body_owner_kind(id);
89 if let (hir::BodyOwnerKind::Fn, None) = (body_owner_kind, self.source.promoted) {
91 for (bb, bb_data) in caller_mir.basic_blocks().iter_enumerated() {
92 if let Some(callsite) = self.get_valid_function_call(bb,
96 callsites.push_back(callsite);
103 let mut local_change;
104 let mut changed = false;
107 local_change = false;
108 while let Some(callsite) = callsites.pop_front() {
109 debug!("checking whether to inline callsite {:?}", callsite);
110 if !self.tcx.is_mir_available(callsite.callee) {
111 debug!("checking whether to inline callsite {:?} - MIR unavailable", callsite);
115 let callee_mir = match self.tcx.try_optimized_mir(callsite.location.span,
117 Ok(callee_mir) if self.consider_optimizing(callsite, callee_mir) => {
118 self.tcx.subst_and_normalize_erasing_regions(
127 // FIXME(#43542) shouldn't have to cancel an error
133 let start = caller_mir.basic_blocks().len();
134 debug!("attempting to inline callsite {:?} - mir={:?}", callsite, callee_mir);
135 if !self.inline_call(callsite, caller_mir, callee_mir) {
136 debug!("attempting to inline callsite {:?} - failure", callsite);
139 debug!("attempting to inline callsite {:?} - success", callsite);
141 // Add callsites from inlined function
142 for (bb, bb_data) in caller_mir.basic_blocks().iter_enumerated().skip(start) {
143 if let Some(new_callsite) = self.get_valid_function_call(bb,
147 // Don't inline the same function multiple times.
148 if callsite.callee != new_callsite.callee {
149 callsites.push_back(new_callsite);
163 // Simplify if we inlined anything.
165 debug!("Running simplify cfg on {:?}", self.source);
166 CfgSimplifier::new(caller_mir).simplify();
167 remove_dead_blocks(caller_mir);
171 fn get_valid_function_call(&self,
173 bb_data: &BasicBlockData<'tcx>,
174 caller_mir: &Mir<'tcx>,
175 param_env: ParamEnv<'tcx>,
176 ) -> Option<CallSite<'tcx>> {
177 // Don't inline calls that are in cleanup blocks.
178 if bb_data.is_cleanup { return None; }
180 // Only consider direct calls to functions
181 let terminator = bb_data.terminator();
182 if let TerminatorKind::Call { func: ref op, .. } = terminator.kind {
183 if let ty::FnDef(callee_def_id, substs) = op.ty(caller_mir, self.tcx).sty {
184 let instance = Instance::resolve(self.tcx,
189 if let InstanceDef::Virtual(..) = instance.def {
193 return Some(CallSite {
194 callee: instance.def_id(),
195 substs: instance.substs,
197 location: terminator.source_info
205 fn consider_optimizing(&self,
206 callsite: CallSite<'tcx>,
207 callee_mir: &Mir<'tcx>)
210 debug!("consider_optimizing({:?})", callsite);
211 self.should_inline(callsite, callee_mir)
212 && self.tcx.consider_optimizing(|| format!("Inline {:?} into {:?}",
217 fn should_inline(&self,
218 callsite: CallSite<'tcx>,
219 callee_mir: &Mir<'tcx>)
222 debug!("should_inline({:?})", callsite);
225 // Don't inline closures that have captures
226 // FIXME: Handle closures better
227 if callee_mir.upvar_decls.len() > 0 {
228 debug!(" upvar decls present - not inlining");
232 // Cannot inline generators which haven't been transformed yet
233 if callee_mir.yield_ty.is_some() {
234 debug!(" yield ty present - not inlining");
238 // Do not inline {u,i}128 lang items, codegen const eval depends
239 // on detecting calls to these lang items and intercepting them
240 if tcx.is_binop_lang_item(callsite.callee).is_some() {
241 debug!(" not inlining 128bit integer lang item");
245 let codegen_fn_attrs = tcx.codegen_fn_attrs(callsite.callee);
247 let hinted = match codegen_fn_attrs.inline {
248 // Just treat inline(always) as a hint for now,
249 // there are cases that prevent inlining that we
250 // need to check for first.
251 attr::InlineAttr::Always => true,
252 attr::InlineAttr::Never => {
253 debug!("#[inline(never)] present - not inlining");
256 attr::InlineAttr::Hint => true,
257 attr::InlineAttr::None => false,
260 // Only inline local functions if they would be eligible for cross-crate
261 // inlining. This is to ensure that the final crate doesn't have MIR that
262 // reference unexported symbols
263 if callsite.callee.is_local() {
264 if callsite.substs.types().count() == 0 && !hinted {
265 debug!(" callee is an exported function - not inlining");
270 let mut threshold = if hinted {
276 // Significantly lower the threshold for inlining cold functions
277 if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::COLD) {
281 // Give a bonus functions with a small number of blocks,
282 // We normally have two or three blocks for even
283 // very small functions.
284 if callee_mir.basic_blocks().len() <= 3 {
285 threshold += threshold / 4;
287 debug!(" final inline threshold = {}", threshold);
289 // FIXME: Give a bonus to functions with only a single caller
291 let param_env = tcx.param_env(self.source.def_id);
293 let mut first_block = true;
296 // Traverse the MIR manually so we can account for the effects of
297 // inlining on the CFG.
298 let mut work_list = vec![START_BLOCK];
299 let mut visited = BitSet::new_empty(callee_mir.basic_blocks().len());
300 while let Some(bb) = work_list.pop() {
301 if !visited.insert(bb.index()) { continue; }
302 let blk = &callee_mir.basic_blocks()[bb];
304 for stmt in &blk.statements {
305 // Don't count StorageLive/StorageDead in the inlining cost.
307 StatementKind::StorageLive(_) |
308 StatementKind::StorageDead(_) |
309 StatementKind::Nop => {}
310 _ => cost += INSTR_COST
313 let term = blk.terminator();
314 let mut is_drop = false;
316 TerminatorKind::Drop { ref location, target, unwind } |
317 TerminatorKind::DropAndReplace { ref location, target, unwind, .. } => {
319 work_list.push(target);
320 // If the location doesn't actually need dropping, treat it like
322 let ty = location.ty(callee_mir, tcx).subst(tcx, callsite.substs);
323 let ty = ty.to_ty(tcx);
324 if ty.needs_drop(tcx, param_env) {
325 cost += CALL_PENALTY;
326 if let Some(unwind) = unwind {
327 work_list.push(unwind);
334 TerminatorKind::Unreachable |
335 TerminatorKind::Call { destination: None, .. } if first_block => {
336 // If the function always diverges, don't inline
337 // unless the cost is zero
341 TerminatorKind::Call {func: Operand::Constant(ref f), .. } => {
342 if let ty::FnDef(def_id, _) = f.ty.sty {
343 // Don't give intrinsics the extra penalty for calls
344 let f = tcx.fn_sig(def_id);
345 if f.abi() == Abi::RustIntrinsic || f.abi() == Abi::PlatformIntrinsic {
348 cost += CALL_PENALTY;
352 TerminatorKind::Assert { .. } => cost += CALL_PENALTY,
353 _ => cost += INSTR_COST
357 for &succ in term.successors() {
358 work_list.push(succ);
365 // Count up the cost of local variables and temps, if we know the size
366 // use that, otherwise we use a moderately-large dummy cost.
368 let ptr_size = tcx.data_layout.pointer_size.bytes();
370 for v in callee_mir.vars_and_temps_iter() {
371 let v = &callee_mir.local_decls[v];
372 let ty = v.ty.subst(tcx, callsite.substs);
373 // Cost of the var is the size in machine-words, if we know
375 if let Some(size) = type_size_of(tcx, param_env.clone(), ty) {
376 cost += (size / ptr_size) as usize;
378 cost += UNKNOWN_SIZE_COST;
382 if let attr::InlineAttr::Always = codegen_fn_attrs.inline {
383 debug!("INLINING {:?} because inline(always) [cost={}]", callsite, cost);
386 if cost <= threshold {
387 debug!("INLINING {:?} [cost={} <= threshold={}]", callsite, cost, threshold);
390 debug!("NOT inlining {:?} [cost={} > threshold={}]", callsite, cost, threshold);
396 fn inline_call(&self,
397 callsite: CallSite<'tcx>,
398 caller_mir: &mut Mir<'tcx>,
399 mut callee_mir: Mir<'tcx>) -> bool {
400 let terminator = caller_mir[callsite.bb].terminator.take().unwrap();
401 match terminator.kind {
402 // FIXME: Handle inlining of diverging calls
403 TerminatorKind::Call { args, destination: Some(destination), cleanup, .. } => {
404 debug!("Inlined {:?} into {:?}", callsite.callee, self.source);
406 let mut local_map = IndexVec::with_capacity(callee_mir.local_decls.len());
407 let mut scope_map = IndexVec::with_capacity(callee_mir.source_scopes.len());
408 let mut promoted_map = IndexVec::with_capacity(callee_mir.promoted.len());
410 for mut scope in callee_mir.source_scopes.iter().cloned() {
411 if scope.parent_scope.is_none() {
412 scope.parent_scope = Some(callsite.location.scope);
413 scope.span = callee_mir.span;
416 scope.span = callsite.location.span;
418 let idx = caller_mir.source_scopes.push(scope);
422 for loc in callee_mir.vars_and_temps_iter() {
423 let mut local = callee_mir.local_decls[loc].clone();
425 local.source_info.scope =
426 scope_map[local.source_info.scope];
427 local.source_info.span = callsite.location.span;
428 local.visibility_scope = scope_map[local.visibility_scope];
430 let idx = caller_mir.local_decls.push(local);
435 callee_mir.promoted.iter().cloned().map(|p| caller_mir.promoted.push(p))
438 // If the call is something like `a[*i] = f(i)`, where
439 // `i : &mut usize`, then just duplicating the `a[*i]`
440 // Place could result in two different locations if `f`
441 // writes to `i`. To prevent this we need to create a temporary
442 // borrow of the place and pass the destination as `*temp` instead.
443 fn dest_needs_borrow(place: &Place) -> bool {
445 Place::Projection(ref p) => {
447 ProjectionElem::Deref |
448 ProjectionElem::Index(_) => true,
449 _ => dest_needs_borrow(&p.base)
452 // Static variables need a borrow because the callee
453 // might modify the same static.
454 Place::Static(_) => true,
459 let dest = if dest_needs_borrow(&destination.0) {
460 debug!("Creating temp for return destination");
461 let dest = Rvalue::Ref(
462 self.tcx.types.re_erased,
463 BorrowKind::Mut { allow_two_phase_borrow: false },
466 let ty = dest.ty(caller_mir, self.tcx);
468 let temp = LocalDecl::new_temp(ty, callsite.location.span);
470 let tmp = caller_mir.local_decls.push(temp);
471 let tmp = Place::Local(tmp);
473 let stmt = Statement {
474 source_info: callsite.location,
475 kind: StatementKind::Assign(tmp.clone(), box dest)
477 caller_mir[callsite.bb]
478 .statements.push(stmt);
484 let return_block = destination.1;
486 // Copy the arguments if needed.
487 let args: Vec<_> = self.make_call_args(args, &callsite, caller_mir);
489 let bb_len = caller_mir.basic_blocks().len();
490 let mut integrator = Integrator {
499 cleanup_block: cleanup,
500 in_cleanup_block: false
504 for (bb, mut block) in callee_mir.basic_blocks_mut().drain_enumerated(..) {
505 integrator.visit_basic_block_data(bb, &mut block);
506 caller_mir.basic_blocks_mut().push(block);
509 let terminator = Terminator {
510 source_info: callsite.location,
511 kind: TerminatorKind::Goto { target: BasicBlock::new(bb_len) }
514 caller_mir[callsite.bb].terminator = Some(terminator);
519 caller_mir[callsite.bb].terminator = Some(Terminator {
520 source_info: terminator.source_info,
530 args: Vec<Operand<'tcx>>,
531 callsite: &CallSite<'tcx>,
532 caller_mir: &mut Mir<'tcx>,
536 // There is a bit of a mismatch between the *caller* of a closure and the *callee*.
537 // The caller provides the arguments wrapped up in a tuple:
539 // tuple_tmp = (a, b, c)
540 // Fn::call(closure_ref, tuple_tmp)
542 // meanwhile the closure body expects the arguments (here, `a`, `b`, and `c`)
543 // as distinct arguments. (This is the "rust-call" ABI hack.) Normally, codegen has
544 // the job of unpacking this tuple. But here, we are codegen. =) So we want to create
547 // [closure_ref, tuple_tmp.0, tuple_tmp.1, tuple_tmp.2]
549 // Except for one tiny wrinkle: we don't actually want `tuple_tmp.0`. It's more convenient
550 // if we "spill" that into *another* temporary, so that we can map the argument
551 // variable in the callee MIR directly to an argument variable on our side.
552 // So we introduce temporaries like:
554 // tmp0 = tuple_tmp.0
555 // tmp1 = tuple_tmp.1
556 // tmp2 = tuple_tmp.2
558 // and the vector is `[closure_ref, tmp0, tmp1, tmp2]`.
559 if tcx.is_closure(callsite.callee) {
560 let mut args = args.into_iter();
561 let self_ = self.create_temp_if_necessary(args.next().unwrap(), callsite, caller_mir);
562 let tuple = self.create_temp_if_necessary(args.next().unwrap(), callsite, caller_mir);
563 assert!(args.next().is_none());
565 let tuple = Place::Local(tuple);
566 let tuple_tys = if let ty::Tuple(s) = tuple.ty(caller_mir, tcx).to_ty(tcx).sty {
569 bug!("Closure arguments are not passed as a tuple");
572 // The `closure_ref` in our example above.
573 let closure_ref_arg = iter::once(self_);
575 // The `tmp0`, `tmp1`, and `tmp2` in our example abonve.
577 tuple_tys.iter().enumerate().map(|(i, ty)| {
578 // This is e.g. `tuple_tmp.0` in our example above.
579 let tuple_field = Operand::Move(tuple.clone().field(Field::new(i), ty));
581 // Spill to a local to make e.g. `tmp0`.
582 self.create_temp_if_necessary(tuple_field, callsite, caller_mir)
585 closure_ref_arg.chain(tuple_tmp_args).collect()
588 .map(|a| self.create_temp_if_necessary(a, callsite, caller_mir))
593 /// If `arg` is already a temporary, returns it. Otherwise, introduces a fresh
594 /// temporary `T` and an instruction `T = arg`, and returns `T`.
595 fn create_temp_if_necessary(
598 callsite: &CallSite<'tcx>,
599 caller_mir: &mut Mir<'tcx>,
601 // FIXME: Analysis of the usage of the arguments to avoid
602 // unnecessary temporaries.
604 if let Operand::Move(Place::Local(local)) = arg {
605 if caller_mir.local_kind(local) == LocalKind::Temp {
606 // Reuse the operand if it's a temporary already
611 debug!("Creating temp for argument {:?}", arg);
612 // Otherwise, create a temporary for the arg
613 let arg = Rvalue::Use(arg);
615 let ty = arg.ty(caller_mir, self.tcx);
617 let arg_tmp = LocalDecl::new_temp(ty, callsite.location.span);
618 let arg_tmp = caller_mir.local_decls.push(arg_tmp);
620 let stmt = Statement {
621 source_info: callsite.location,
622 kind: StatementKind::Assign(Place::Local(arg_tmp), box arg),
624 caller_mir[callsite.bb].statements.push(stmt);
629 fn type_size_of<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
630 param_env: ty::ParamEnv<'tcx>,
631 ty: Ty<'tcx>) -> Option<u64> {
632 tcx.layout_of(param_env.and(ty)).ok().map(|layout| layout.size.bytes())
638 * Integrates blocks from the callee function into the calling function.
639 * Updates block indices, references to locals and other control flow
642 struct Integrator<'a, 'tcx: 'a> {
645 local_map: IndexVec<Local, Local>,
646 scope_map: IndexVec<SourceScope, SourceScope>,
647 promoted_map: IndexVec<Promoted, Promoted>,
648 _callsite: CallSite<'tcx>,
649 destination: Place<'tcx>,
650 return_block: BasicBlock,
651 cleanup_block: Option<BasicBlock>,
652 in_cleanup_block: bool,
655 impl<'a, 'tcx> Integrator<'a, 'tcx> {
656 fn update_target(&self, tgt: BasicBlock) -> BasicBlock {
657 let new = BasicBlock::new(tgt.index() + self.block_idx);
658 debug!("Updating target `{:?}`, new: `{:?}`", tgt, new);
663 impl<'a, 'tcx> MutVisitor<'tcx> for Integrator<'a, 'tcx> {
664 fn visit_local(&mut self,
666 _ctxt: PlaceContext<'tcx>,
667 _location: Location) {
668 if *local == RETURN_PLACE {
669 match self.destination {
674 ref place => bug!("Return place is {:?}, not local", place)
677 let idx = local.index() - 1;
678 if idx < self.args.len() {
679 *local = self.args[idx];
682 *local = self.local_map[Local::new(idx - self.args.len())];
685 fn visit_place(&mut self,
686 place: &mut Place<'tcx>,
687 _ctxt: PlaceContext<'tcx>,
688 _location: Location) {
691 Place::Local(RETURN_PLACE) => {
692 // Return pointer; update the place itself
693 *place = self.destination.clone();
695 Place::Promoted(ref mut promoted) => {
696 if let Some(p) = self.promoted_map.get(promoted.0).cloned() {
700 _ => self.super_place(place, _ctxt, _location),
704 fn visit_basic_block_data(&mut self, block: BasicBlock, data: &mut BasicBlockData<'tcx>) {
705 self.in_cleanup_block = data.is_cleanup;
706 self.super_basic_block_data(block, data);
707 self.in_cleanup_block = false;
713 two_phase: &mut bool,
714 place: &mut Place<'tcx>,
717 self.super_retag(fn_entry, two_phase, place, loc);
719 // We have to patch all inlined retags to be aware that they are no longer
720 // happening on function entry.
724 fn visit_terminator_kind(&mut self, block: BasicBlock,
725 kind: &mut TerminatorKind<'tcx>, loc: Location) {
726 self.super_terminator_kind(block, kind, loc);
729 TerminatorKind::GeneratorDrop |
730 TerminatorKind::Yield { .. } => bug!(),
731 TerminatorKind::Goto { ref mut target} => {
732 *target = self.update_target(*target);
734 TerminatorKind::SwitchInt { ref mut targets, .. } => {
736 *tgt = self.update_target(*tgt);
739 TerminatorKind::Drop { ref mut target, ref mut unwind, .. } |
740 TerminatorKind::DropAndReplace { ref mut target, ref mut unwind, .. } => {
741 *target = self.update_target(*target);
742 if let Some(tgt) = *unwind {
743 *unwind = Some(self.update_target(tgt));
744 } else if !self.in_cleanup_block {
745 // Unless this drop is in a cleanup block, add an unwind edge to
746 // the original call's cleanup block
747 *unwind = self.cleanup_block;
750 TerminatorKind::Call { ref mut destination, ref mut cleanup, .. } => {
751 if let Some((_, ref mut tgt)) = *destination {
752 *tgt = self.update_target(*tgt);
754 if let Some(tgt) = *cleanup {
755 *cleanup = Some(self.update_target(tgt));
756 } else if !self.in_cleanup_block {
757 // Unless this call is in a cleanup block, add an unwind edge to
758 // the original call's cleanup block
759 *cleanup = self.cleanup_block;
762 TerminatorKind::Assert { ref mut target, ref mut cleanup, .. } => {
763 *target = self.update_target(*target);
764 if let Some(tgt) = *cleanup {
765 *cleanup = Some(self.update_target(tgt));
766 } else if !self.in_cleanup_block {
767 // Unless this assert is in a cleanup block, add an unwind edge to
768 // the original call's cleanup block
769 *cleanup = self.cleanup_block;
772 TerminatorKind::Return => {
773 *kind = TerminatorKind::Goto { target: self.return_block };
775 TerminatorKind::Resume => {
776 if let Some(tgt) = self.cleanup_block {
777 *kind = TerminatorKind::Goto { target: tgt }
780 TerminatorKind::Abort => { }
781 TerminatorKind::Unreachable => { }
782 TerminatorKind::FalseEdges { ref mut real_target, ref mut imaginary_targets } => {
783 *real_target = self.update_target(*real_target);
784 for target in imaginary_targets {
785 *target = self.update_target(*target);
788 TerminatorKind::FalseUnwind { real_target: _ , unwind: _ } =>
789 // see the ordering of passes in the optimized_mir query.
790 bug!("False unwinds should have been removed before inlining")
794 fn visit_source_scope(&mut self, scope: &mut SourceScope) {
795 *scope = self.scope_map[*scope];