1 //! Inlining pass for MIR functions
3 use rustc::hir::CodegenFnAttrFlags;
4 use rustc::hir::def_id::DefId;
6 use rustc_data_structures::bit_set::BitSet;
7 use rustc_data_structures::indexed_vec::{Idx, IndexVec};
10 use rustc::mir::visit::*;
11 use rustc::ty::{self, Instance, InstanceDef, ParamEnv, Ty, TyCtxt};
12 use rustc::ty::subst::{Subst, SubstsRef};
14 use std::collections::VecDeque;
16 use crate::transform::{MirPass, MirSource};
17 use super::simplify::{remove_dead_blocks, CfgSimplifier};
20 use rustc_target::spec::abi::Abi;
22 const DEFAULT_THRESHOLD: usize = 50;
23 const HINT_THRESHOLD: usize = 100;
25 const INSTR_COST: usize = 5;
26 const CALL_PENALTY: usize = 25;
28 const UNKNOWN_SIZE_COST: usize = 10;
32 #[derive(Copy, Clone, Debug)]
33 struct CallSite<'tcx> {
35 substs: SubstsRef<'tcx>,
40 impl MirPass for Inline {
41 fn run_pass<'a, 'tcx>(&self,
42 tcx: TyCtxt<'a, 'tcx, 'tcx>,
43 source: MirSource<'tcx>,
44 mir: &mut Mir<'tcx>) {
45 if tcx.sess.opts.debugging_opts.mir_opt_level >= 2 {
46 Inliner { tcx, source }.run_pass(mir);
51 struct Inliner<'a, 'tcx: 'a> {
52 tcx: TyCtxt<'a, 'tcx, 'tcx>,
53 source: MirSource<'tcx>,
56 impl<'a, 'tcx> Inliner<'a, 'tcx> {
57 fn run_pass(&self, caller_mir: &mut Mir<'tcx>) {
58 // Keep a queue of callsites to try inlining on. We take
59 // advantage of the fact that queries detect cycles here to
60 // allow us to try and fetch the fully optimized MIR of a
61 // call; if it succeeds, we can inline it and we know that
62 // they do not call us. Otherwise, we just don't try to
65 // We use a queue so that we inline "broadly" before we inline
66 // in depth. It is unclear if this is the best heuristic,
67 // really, but that's true of all the heuristics in this
70 let mut callsites = VecDeque::new();
72 let param_env = self.tcx.param_env(self.source.def_id());
74 // Only do inlining into fn bodies.
75 let id = self.tcx.hir().as_local_node_id(self.source.def_id()).unwrap();
76 if self.tcx.hir().body_owner_kind(id).is_fn_or_closure() && self.source.promoted.is_none() {
77 for (bb, bb_data) in caller_mir.basic_blocks().iter_enumerated() {
78 if let Some(callsite) = self.get_valid_function_call(bb,
82 callsites.push_back(callsite);
90 let mut changed = false;
94 while let Some(callsite) = callsites.pop_front() {
95 debug!("checking whether to inline callsite {:?}", callsite);
96 if !self.tcx.is_mir_available(callsite.callee) {
97 debug!("checking whether to inline callsite {:?} - MIR unavailable", callsite);
101 let self_node_id = self.tcx.hir().as_local_node_id(self.source.def_id()).unwrap();
102 let callee_node_id = self.tcx.hir().as_local_node_id(callsite.callee);
104 let callee_mir = if let Some(callee_node_id) = callee_node_id {
105 // Avoid a cycle here by only using `optimized_mir` only if we have
106 // a lower node id than the callee. This ensures that the callee will
107 // not inline us. This trick only works without incremental compilation.
108 // So don't do it if that is enabled.
109 if !self.tcx.dep_graph.is_fully_enabled()
110 && self_node_id.as_u32() < callee_node_id.as_u32() {
111 self.tcx.optimized_mir(callsite.callee)
116 // This cannot result in a cycle since the callee MIR is from another crate
117 // and is already optimized.
118 self.tcx.optimized_mir(callsite.callee)
121 let callee_mir = if self.consider_optimizing(callsite, callee_mir) {
122 self.tcx.subst_and_normalize_erasing_regions(
131 let start = caller_mir.basic_blocks().len();
132 debug!("attempting to inline callsite {:?} - mir={:?}", callsite, callee_mir);
133 if !self.inline_call(callsite, caller_mir, callee_mir) {
134 debug!("attempting to inline callsite {:?} - failure", callsite);
137 debug!("attempting to inline callsite {:?} - success", callsite);
139 // Add callsites from inlined function
140 for (bb, bb_data) in caller_mir.basic_blocks().iter_enumerated().skip(start) {
141 if let Some(new_callsite) = self.get_valid_function_call(bb,
145 // Don't inline the same function multiple times.
146 if callsite.callee != new_callsite.callee {
147 callsites.push_back(new_callsite);
161 // Simplify if we inlined anything.
163 debug!("Running simplify cfg on {:?}", self.source);
164 CfgSimplifier::new(caller_mir).simplify();
165 remove_dead_blocks(caller_mir);
169 fn get_valid_function_call(&self,
171 bb_data: &BasicBlockData<'tcx>,
172 caller_mir: &Mir<'tcx>,
173 param_env: ParamEnv<'tcx>,
174 ) -> Option<CallSite<'tcx>> {
175 // Don't inline calls that are in cleanup blocks.
176 if bb_data.is_cleanup { return None; }
178 // Only consider direct calls to functions
179 let terminator = bb_data.terminator();
180 if let TerminatorKind::Call { func: ref op, .. } = terminator.kind {
181 if let ty::FnDef(callee_def_id, substs) = op.ty(caller_mir, self.tcx).sty {
182 let instance = Instance::resolve(self.tcx,
187 if let InstanceDef::Virtual(..) = instance.def {
191 return Some(CallSite {
192 callee: instance.def_id(),
193 substs: instance.substs,
195 location: terminator.source_info
203 fn consider_optimizing(&self,
204 callsite: CallSite<'tcx>,
205 callee_mir: &Mir<'tcx>)
208 debug!("consider_optimizing({:?})", callsite);
209 self.should_inline(callsite, callee_mir)
210 && self.tcx.consider_optimizing(|| format!("Inline {:?} into {:?}",
215 fn should_inline(&self,
216 callsite: CallSite<'tcx>,
217 callee_mir: &Mir<'tcx>)
220 debug!("should_inline({:?})", callsite);
223 // Don't inline closures that have captures
224 // FIXME: Handle closures better
225 if callee_mir.upvar_decls.len() > 0 {
226 debug!(" upvar decls present - not inlining");
230 // Cannot inline generators which haven't been transformed yet
231 if callee_mir.yield_ty.is_some() {
232 debug!(" yield ty present - not inlining");
236 // Do not inline {u,i}128 lang items, codegen const eval depends
237 // on detecting calls to these lang items and intercepting them
238 if tcx.is_binop_lang_item(callsite.callee).is_some() {
239 debug!(" not inlining 128bit integer lang item");
243 let codegen_fn_attrs = tcx.codegen_fn_attrs(callsite.callee);
245 let hinted = match codegen_fn_attrs.inline {
246 // Just treat inline(always) as a hint for now,
247 // there are cases that prevent inlining that we
248 // need to check for first.
249 attr::InlineAttr::Always => true,
250 attr::InlineAttr::Never => {
251 debug!("#[inline(never)] present - not inlining");
254 attr::InlineAttr::Hint => true,
255 attr::InlineAttr::None => false,
258 // Only inline local functions if they would be eligible for cross-crate
259 // inlining. This is to ensure that the final crate doesn't have MIR that
260 // reference unexported symbols
261 if callsite.callee.is_local() {
262 if callsite.substs.non_erasable_generics().count() == 0 && !hinted {
263 debug!(" callee is an exported function - not inlining");
268 let mut threshold = if hinted {
274 // Significantly lower the threshold for inlining cold functions
275 if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::COLD) {
279 // Give a bonus functions with a small number of blocks,
280 // We normally have two or three blocks for even
281 // very small functions.
282 if callee_mir.basic_blocks().len() <= 3 {
283 threshold += threshold / 4;
285 debug!(" final inline threshold = {}", threshold);
287 // FIXME: Give a bonus to functions with only a single caller
289 let param_env = tcx.param_env(self.source.def_id());
291 let mut first_block = true;
294 // Traverse the MIR manually so we can account for the effects of
295 // inlining on the CFG.
296 let mut work_list = vec![START_BLOCK];
297 let mut visited = BitSet::new_empty(callee_mir.basic_blocks().len());
298 while let Some(bb) = work_list.pop() {
299 if !visited.insert(bb.index()) { continue; }
300 let blk = &callee_mir.basic_blocks()[bb];
302 for stmt in &blk.statements {
303 // Don't count StorageLive/StorageDead in the inlining cost.
305 StatementKind::StorageLive(_) |
306 StatementKind::StorageDead(_) |
307 StatementKind::Nop => {}
308 _ => cost += INSTR_COST
311 let term = blk.terminator();
312 let mut is_drop = false;
314 TerminatorKind::Drop { ref location, target, unwind } |
315 TerminatorKind::DropAndReplace { ref location, target, unwind, .. } => {
317 work_list.push(target);
318 // If the location doesn't actually need dropping, treat it like
320 let ty = location.ty(callee_mir, tcx).subst(tcx, callsite.substs);
321 let ty = ty.to_ty(tcx);
322 if ty.needs_drop(tcx, param_env) {
323 cost += CALL_PENALTY;
324 if let Some(unwind) = unwind {
325 work_list.push(unwind);
332 TerminatorKind::Unreachable |
333 TerminatorKind::Call { destination: None, .. } if first_block => {
334 // If the function always diverges, don't inline
335 // unless the cost is zero
339 TerminatorKind::Call {func: Operand::Constant(ref f), .. } => {
340 if let ty::FnDef(def_id, _) = f.ty.sty {
341 // Don't give intrinsics the extra penalty for calls
342 let f = tcx.fn_sig(def_id);
343 if f.abi() == Abi::RustIntrinsic || f.abi() == Abi::PlatformIntrinsic {
346 cost += CALL_PENALTY;
350 TerminatorKind::Assert { .. } => cost += CALL_PENALTY,
351 _ => cost += INSTR_COST
355 for &succ in term.successors() {
356 work_list.push(succ);
363 // Count up the cost of local variables and temps, if we know the size
364 // use that, otherwise we use a moderately-large dummy cost.
366 let ptr_size = tcx.data_layout.pointer_size.bytes();
368 for v in callee_mir.vars_and_temps_iter() {
369 let v = &callee_mir.local_decls[v];
370 let ty = v.ty.subst(tcx, callsite.substs);
371 // Cost of the var is the size in machine-words, if we know
373 if let Some(size) = type_size_of(tcx, param_env.clone(), ty) {
374 cost += (size / ptr_size) as usize;
376 cost += UNKNOWN_SIZE_COST;
380 if let attr::InlineAttr::Always = codegen_fn_attrs.inline {
381 debug!("INLINING {:?} because inline(always) [cost={}]", callsite, cost);
384 if cost <= threshold {
385 debug!("INLINING {:?} [cost={} <= threshold={}]", callsite, cost, threshold);
388 debug!("NOT inlining {:?} [cost={} > threshold={}]", callsite, cost, threshold);
394 fn inline_call(&self,
395 callsite: CallSite<'tcx>,
396 caller_mir: &mut Mir<'tcx>,
397 mut callee_mir: Mir<'tcx>) -> bool {
398 let terminator = caller_mir[callsite.bb].terminator.take().unwrap();
399 match terminator.kind {
400 // FIXME: Handle inlining of diverging calls
401 TerminatorKind::Call { args, destination: Some(destination), cleanup, .. } => {
402 debug!("Inlined {:?} into {:?}", callsite.callee, self.source);
404 let mut local_map = IndexVec::with_capacity(callee_mir.local_decls.len());
405 let mut scope_map = IndexVec::with_capacity(callee_mir.source_scopes.len());
406 let mut promoted_map = IndexVec::with_capacity(callee_mir.promoted.len());
408 for mut scope in callee_mir.source_scopes.iter().cloned() {
409 if scope.parent_scope.is_none() {
410 scope.parent_scope = Some(callsite.location.scope);
411 scope.span = callee_mir.span;
414 scope.span = callsite.location.span;
416 let idx = caller_mir.source_scopes.push(scope);
420 for loc in callee_mir.vars_and_temps_iter() {
421 let mut local = callee_mir.local_decls[loc].clone();
423 local.source_info.scope =
424 scope_map[local.source_info.scope];
425 local.source_info.span = callsite.location.span;
426 local.visibility_scope = scope_map[local.visibility_scope];
428 let idx = caller_mir.local_decls.push(local);
433 callee_mir.promoted.iter().cloned().map(|p| caller_mir.promoted.push(p))
436 // If the call is something like `a[*i] = f(i)`, where
437 // `i : &mut usize`, then just duplicating the `a[*i]`
438 // Place could result in two different locations if `f`
439 // writes to `i`. To prevent this we need to create a temporary
440 // borrow of the place and pass the destination as `*temp` instead.
441 fn dest_needs_borrow(place: &Place<'_>) -> bool {
443 Place::Projection(ref p) => {
445 ProjectionElem::Deref |
446 ProjectionElem::Index(_) => true,
447 _ => dest_needs_borrow(&p.base)
450 // Static variables need a borrow because the callee
451 // might modify the same static.
452 Place::Base(PlaceBase::Static(_)) => true,
457 let dest = if dest_needs_borrow(&destination.0) {
458 debug!("Creating temp for return destination");
459 let dest = Rvalue::Ref(
460 self.tcx.types.re_erased,
461 BorrowKind::Mut { allow_two_phase_borrow: false },
464 let ty = dest.ty(caller_mir, self.tcx);
466 let temp = LocalDecl::new_temp(ty, callsite.location.span);
468 let tmp = caller_mir.local_decls.push(temp);
469 let tmp = Place::Base(PlaceBase::Local(tmp));
471 let stmt = Statement {
472 source_info: callsite.location,
473 kind: StatementKind::Assign(tmp.clone(), box dest)
475 caller_mir[callsite.bb]
476 .statements.push(stmt);
482 let return_block = destination.1;
484 // Copy the arguments if needed.
485 let args: Vec<_> = self.make_call_args(args, &callsite, caller_mir);
487 let bb_len = caller_mir.basic_blocks().len();
488 let mut integrator = Integrator {
497 cleanup_block: cleanup,
498 in_cleanup_block: false
502 for (bb, mut block) in callee_mir.basic_blocks_mut().drain_enumerated(..) {
503 integrator.visit_basic_block_data(bb, &mut block);
504 caller_mir.basic_blocks_mut().push(block);
507 let terminator = Terminator {
508 source_info: callsite.location,
509 kind: TerminatorKind::Goto { target: BasicBlock::new(bb_len) }
512 caller_mir[callsite.bb].terminator = Some(terminator);
517 caller_mir[callsite.bb].terminator = Some(Terminator {
518 source_info: terminator.source_info,
528 args: Vec<Operand<'tcx>>,
529 callsite: &CallSite<'tcx>,
530 caller_mir: &mut Mir<'tcx>,
534 // There is a bit of a mismatch between the *caller* of a closure and the *callee*.
535 // The caller provides the arguments wrapped up in a tuple:
537 // tuple_tmp = (a, b, c)
538 // Fn::call(closure_ref, tuple_tmp)
540 // meanwhile the closure body expects the arguments (here, `a`, `b`, and `c`)
541 // as distinct arguments. (This is the "rust-call" ABI hack.) Normally, codegen has
542 // the job of unpacking this tuple. But here, we are codegen. =) So we want to create
545 // [closure_ref, tuple_tmp.0, tuple_tmp.1, tuple_tmp.2]
547 // Except for one tiny wrinkle: we don't actually want `tuple_tmp.0`. It's more convenient
548 // if we "spill" that into *another* temporary, so that we can map the argument
549 // variable in the callee MIR directly to an argument variable on our side.
550 // So we introduce temporaries like:
552 // tmp0 = tuple_tmp.0
553 // tmp1 = tuple_tmp.1
554 // tmp2 = tuple_tmp.2
556 // and the vector is `[closure_ref, tmp0, tmp1, tmp2]`.
557 if tcx.is_closure(callsite.callee) {
558 let mut args = args.into_iter();
559 let self_ = self.create_temp_if_necessary(args.next().unwrap(), callsite, caller_mir);
560 let tuple = self.create_temp_if_necessary(args.next().unwrap(), callsite, caller_mir);
561 assert!(args.next().is_none());
563 let tuple = Place::Base(PlaceBase::Local(tuple));
564 let tuple_tys = if let ty::Tuple(s) = tuple.ty(caller_mir, tcx).to_ty(tcx).sty {
567 bug!("Closure arguments are not passed as a tuple");
570 // The `closure_ref` in our example above.
571 let closure_ref_arg = iter::once(self_);
573 // The `tmp0`, `tmp1`, and `tmp2` in our example abonve.
575 tuple_tys.iter().enumerate().map(|(i, ty)| {
576 // This is e.g., `tuple_tmp.0` in our example above.
577 let tuple_field = Operand::Move(tuple.clone().field(Field::new(i), ty));
579 // Spill to a local to make e.g., `tmp0`.
580 self.create_temp_if_necessary(tuple_field, callsite, caller_mir)
583 closure_ref_arg.chain(tuple_tmp_args).collect()
586 .map(|a| self.create_temp_if_necessary(a, callsite, caller_mir))
591 /// If `arg` is already a temporary, returns it. Otherwise, introduces a fresh
592 /// temporary `T` and an instruction `T = arg`, and returns `T`.
593 fn create_temp_if_necessary(
596 callsite: &CallSite<'tcx>,
597 caller_mir: &mut Mir<'tcx>,
599 // FIXME: Analysis of the usage of the arguments to avoid
600 // unnecessary temporaries.
602 if let Operand::Move(Place::Base(PlaceBase::Local(local))) = arg {
603 if caller_mir.local_kind(local) == LocalKind::Temp {
604 // Reuse the operand if it's a temporary already
609 debug!("Creating temp for argument {:?}", arg);
610 // Otherwise, create a temporary for the arg
611 let arg = Rvalue::Use(arg);
613 let ty = arg.ty(caller_mir, self.tcx);
615 let arg_tmp = LocalDecl::new_temp(ty, callsite.location.span);
616 let arg_tmp = caller_mir.local_decls.push(arg_tmp);
618 let stmt = Statement {
619 source_info: callsite.location,
620 kind: StatementKind::Assign(Place::Base(PlaceBase::Local(arg_tmp)), box arg),
622 caller_mir[callsite.bb].statements.push(stmt);
627 fn type_size_of<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>,
628 param_env: ty::ParamEnv<'tcx>,
629 ty: Ty<'tcx>) -> Option<u64> {
630 tcx.layout_of(param_env.and(ty)).ok().map(|layout| layout.size.bytes())
636 * Integrates blocks from the callee function into the calling function.
637 * Updates block indices, references to locals and other control flow
640 struct Integrator<'a, 'tcx: 'a> {
643 local_map: IndexVec<Local, Local>,
644 scope_map: IndexVec<SourceScope, SourceScope>,
645 promoted_map: IndexVec<Promoted, Promoted>,
646 _callsite: CallSite<'tcx>,
647 destination: Place<'tcx>,
648 return_block: BasicBlock,
649 cleanup_block: Option<BasicBlock>,
650 in_cleanup_block: bool,
653 impl<'a, 'tcx> Integrator<'a, 'tcx> {
654 fn update_target(&self, tgt: BasicBlock) -> BasicBlock {
655 let new = BasicBlock::new(tgt.index() + self.block_idx);
656 debug!("Updating target `{:?}`, new: `{:?}`", tgt, new);
661 impl<'a, 'tcx> MutVisitor<'tcx> for Integrator<'a, 'tcx> {
662 fn visit_local(&mut self,
664 _ctxt: PlaceContext<'tcx>,
665 _location: Location) {
666 if *local == RETURN_PLACE {
667 match self.destination {
668 Place::Base(PlaceBase::Local(l)) => {
672 ref place => bug!("Return place is {:?}, not local", place)
675 let idx = local.index() - 1;
676 if idx < self.args.len() {
677 *local = self.args[idx];
680 *local = self.local_map[Local::new(idx - self.args.len())];
683 fn visit_place(&mut self,
684 place: &mut Place<'tcx>,
685 _ctxt: PlaceContext<'tcx>,
686 _location: Location) {
689 Place::Base(PlaceBase::Local(RETURN_PLACE)) => {
690 // Return pointer; update the place itself
691 *place = self.destination.clone();
693 Place::Base(PlaceBase::Promoted(ref mut promoted)) => {
694 if let Some(p) = self.promoted_map.get(promoted.0).cloned() {
698 _ => self.super_place(place, _ctxt, _location),
702 fn visit_basic_block_data(&mut self, block: BasicBlock, data: &mut BasicBlockData<'tcx>) {
703 self.in_cleanup_block = data.is_cleanup;
704 self.super_basic_block_data(block, data);
705 self.in_cleanup_block = false;
710 kind: &mut RetagKind,
711 place: &mut Place<'tcx>,
714 self.super_retag(kind, place, loc);
716 // We have to patch all inlined retags to be aware that they are no longer
717 // happening on function entry.
718 if *kind == RetagKind::FnEntry {
719 *kind = RetagKind::Default;
723 fn visit_terminator_kind(&mut self, block: BasicBlock,
724 kind: &mut TerminatorKind<'tcx>, loc: Location) {
725 self.super_terminator_kind(block, kind, loc);
728 TerminatorKind::GeneratorDrop |
729 TerminatorKind::Yield { .. } => bug!(),
730 TerminatorKind::Goto { ref mut target} => {
731 *target = self.update_target(*target);
733 TerminatorKind::SwitchInt { ref mut targets, .. } => {
735 *tgt = self.update_target(*tgt);
738 TerminatorKind::Drop { ref mut target, ref mut unwind, .. } |
739 TerminatorKind::DropAndReplace { ref mut target, ref mut unwind, .. } => {
740 *target = self.update_target(*target);
741 if let Some(tgt) = *unwind {
742 *unwind = Some(self.update_target(tgt));
743 } else if !self.in_cleanup_block {
744 // Unless this drop is in a cleanup block, add an unwind edge to
745 // the original call's cleanup block
746 *unwind = self.cleanup_block;
749 TerminatorKind::Call { ref mut destination, ref mut cleanup, .. } => {
750 if let Some((_, ref mut tgt)) = *destination {
751 *tgt = self.update_target(*tgt);
753 if let Some(tgt) = *cleanup {
754 *cleanup = Some(self.update_target(tgt));
755 } else if !self.in_cleanup_block {
756 // Unless this call is in a cleanup block, add an unwind edge to
757 // the original call's cleanup block
758 *cleanup = self.cleanup_block;
761 TerminatorKind::Assert { ref mut target, ref mut cleanup, .. } => {
762 *target = self.update_target(*target);
763 if let Some(tgt) = *cleanup {
764 *cleanup = Some(self.update_target(tgt));
765 } else if !self.in_cleanup_block {
766 // Unless this assert is in a cleanup block, add an unwind edge to
767 // the original call's cleanup block
768 *cleanup = self.cleanup_block;
771 TerminatorKind::Return => {
772 *kind = TerminatorKind::Goto { target: self.return_block };
774 TerminatorKind::Resume => {
775 if let Some(tgt) = self.cleanup_block {
776 *kind = TerminatorKind::Goto { target: tgt }
779 TerminatorKind::Abort => { }
780 TerminatorKind::Unreachable => { }
781 TerminatorKind::FalseEdges { ref mut real_target, ref mut imaginary_targets } => {
782 *real_target = self.update_target(*real_target);
783 for target in imaginary_targets {
784 *target = self.update_target(*target);
787 TerminatorKind::FalseUnwind { real_target: _ , unwind: _ } =>
788 // see the ordering of passes in the optimized_mir query.
789 bug!("False unwinds should have been removed before inlining")
793 fn visit_source_scope(&mut self, scope: &mut SourceScope) {
794 *scope = self.scope_map[*scope];