1 //! Inlining pass for MIR functions
3 use rustc_hir::def_id::DefId;
5 use rustc_index::bit_set::BitSet;
6 use rustc_index::vec::{Idx, IndexVec};
8 use rustc::middle::codegen_fn_attrs::CodegenFnAttrFlags;
9 use rustc::mir::visit::*;
11 use rustc::ty::subst::{InternalSubsts, Subst, SubstsRef};
12 use rustc::ty::{self, Instance, InstanceDef, ParamEnv, Ty, TyCtxt, TypeFoldable};
14 use super::simplify::{remove_dead_blocks, CfgSimplifier};
15 use crate::transform::{MirPass, MirSource};
16 use std::collections::VecDeque;
19 use rustc_target::spec::abi::Abi;
22 const DEFAULT_THRESHOLD: usize = 50;
23 const HINT_THRESHOLD: usize = 100;
25 const INSTR_COST: usize = 5;
26 const CALL_PENALTY: usize = 25;
28 const UNKNOWN_SIZE_COST: usize = 10;
32 #[derive(Copy, Clone, Debug)]
33 struct CallSite<'tcx> {
35 substs: SubstsRef<'tcx>,
40 impl<'tcx> MirPass<'tcx> for Inline {
41 fn run_pass(&self, tcx: TyCtxt<'tcx>, source: MirSource<'tcx>, body: &mut BodyAndCache<'tcx>) {
42 if tcx.sess.opts.debugging_opts.mir_opt_level >= 2 {
43 Inliner { tcx, source }.run_pass(body);
48 struct Inliner<'tcx> {
50 source: MirSource<'tcx>,
54 fn run_pass(&self, caller_body: &mut BodyAndCache<'tcx>) {
55 // Keep a queue of callsites to try inlining on. We take
56 // advantage of the fact that queries detect cycles here to
57 // allow us to try and fetch the fully optimized MIR of a
58 // call; if it succeeds, we can inline it and we know that
59 // they do not call us. Otherwise, we just don't try to
62 // We use a queue so that we inline "broadly" before we inline
63 // in depth. It is unclear if this is the best heuristic,
64 // really, but that's true of all the heuristics in this
67 let mut callsites = VecDeque::new();
69 let mut param_env = self.tcx.param_env(self.source.def_id());
71 let substs = &InternalSubsts::identity_for_item(self.tcx, self.source.def_id());
73 // For monomorphic functions, we can use `Reveal::All` to resolve specialized instances.
74 if !substs.needs_subst() {
75 param_env = param_env.with_reveal_all();
78 // Only do inlining into fn bodies.
79 let id = self.tcx.hir().as_local_hir_id(self.source.def_id()).unwrap();
80 if self.tcx.hir().body_owner_kind(id).is_fn_or_closure() && self.source.promoted.is_none() {
81 for (bb, bb_data) in caller_body.basic_blocks().iter_enumerated() {
82 if let Some(callsite) =
83 self.get_valid_function_call(bb, bb_data, caller_body, param_env)
85 callsites.push_back(callsite);
93 let mut changed = false;
97 while let Some(callsite) = callsites.pop_front() {
98 debug!("checking whether to inline callsite {:?}", callsite);
99 if !self.tcx.is_mir_available(callsite.callee) {
100 debug!("checking whether to inline callsite {:?} - MIR unavailable", callsite);
104 let self_node_id = self.tcx.hir().as_local_node_id(self.source.def_id()).unwrap();
105 let callee_node_id = self.tcx.hir().as_local_node_id(callsite.callee);
107 let callee_body = if let Some(callee_node_id) = callee_node_id {
108 // Avoid a cycle here by only using `optimized_mir` only if we have
109 // a lower node id than the callee. This ensures that the callee will
110 // not inline us. This trick only works without incremental compilation.
111 // So don't do it if that is enabled.
112 if !self.tcx.dep_graph.is_fully_enabled()
113 && self_node_id.as_u32() < callee_node_id.as_u32()
115 self.tcx.optimized_mir(callsite.callee)
120 // This cannot result in a cycle since the callee MIR is from another crate
121 // and is already optimized.
122 self.tcx.optimized_mir(callsite.callee)
125 let callee_body = if self.consider_optimizing(callsite, callee_body) {
126 self.tcx.subst_and_normalize_erasing_regions(
135 let start = caller_body.basic_blocks().len();
136 debug!("attempting to inline callsite {:?} - body={:?}", callsite, callee_body);
137 if !self.inline_call(callsite, caller_body, callee_body) {
138 debug!("attempting to inline callsite {:?} - failure", callsite);
141 debug!("attempting to inline callsite {:?} - success", callsite);
143 // Add callsites from inlined function
144 for (bb, bb_data) in caller_body.basic_blocks().iter_enumerated().skip(start) {
145 if let Some(new_callsite) =
146 self.get_valid_function_call(bb, bb_data, caller_body, param_env)
148 // Don't inline the same function multiple times.
149 if callsite.callee != new_callsite.callee {
150 callsites.push_back(new_callsite);
164 // Simplify if we inlined anything.
166 debug!("running simplify cfg on {:?}", self.source);
167 CfgSimplifier::new(caller_body).simplify();
168 remove_dead_blocks(caller_body);
172 fn get_valid_function_call(
175 bb_data: &BasicBlockData<'tcx>,
176 caller_body: &Body<'tcx>,
177 param_env: ParamEnv<'tcx>,
178 ) -> Option<CallSite<'tcx>> {
179 // Don't inline calls that are in cleanup blocks.
180 if bb_data.is_cleanup {
184 // Only consider direct calls to functions
185 let terminator = bb_data.terminator();
186 if let TerminatorKind::Call { func: ref op, .. } = terminator.kind {
187 if let ty::FnDef(callee_def_id, substs) = op.ty(caller_body, self.tcx).kind {
188 let instance = Instance::resolve(self.tcx, param_env, callee_def_id, substs)?;
190 if let InstanceDef::Virtual(..) = instance.def {
194 return Some(CallSite {
195 callee: instance.def_id(),
196 substs: instance.substs,
198 location: terminator.source_info,
206 fn consider_optimizing(&self, callsite: CallSite<'tcx>, callee_body: &Body<'tcx>) -> bool {
207 debug!("consider_optimizing({:?})", callsite);
208 self.should_inline(callsite, callee_body)
209 && self.tcx.consider_optimizing(|| {
210 format!("Inline {:?} into {:?}", callee_body.span, callsite)
214 fn should_inline(&self, callsite: CallSite<'tcx>, callee_body: &Body<'tcx>) -> bool {
215 debug!("should_inline({:?})", callsite);
218 // Cannot inline generators which haven't been transformed yet
219 if callee_body.yield_ty.is_some() {
220 debug!(" yield ty present - not inlining");
224 let codegen_fn_attrs = tcx.codegen_fn_attrs(callsite.callee);
226 if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::TRACK_CALLER) {
227 debug!("`#[track_caller]` present - not inlining");
231 let hinted = match codegen_fn_attrs.inline {
232 // Just treat inline(always) as a hint for now,
233 // there are cases that prevent inlining that we
234 // need to check for first.
235 attr::InlineAttr::Always => true,
236 attr::InlineAttr::Never => {
237 debug!("`#[inline(never)]` present - not inlining");
240 attr::InlineAttr::Hint => true,
241 attr::InlineAttr::None => false,
244 // Only inline local functions if they would be eligible for cross-crate
245 // inlining. This is to ensure that the final crate doesn't have MIR that
246 // reference unexported symbols
247 if callsite.callee.is_local() {
248 if callsite.substs.non_erasable_generics().count() == 0 && !hinted {
249 debug!(" callee is an exported function - not inlining");
254 let mut threshold = if hinted { HINT_THRESHOLD } else { DEFAULT_THRESHOLD };
256 // Significantly lower the threshold for inlining cold functions
257 if codegen_fn_attrs.flags.contains(CodegenFnAttrFlags::COLD) {
261 // Give a bonus functions with a small number of blocks,
262 // We normally have two or three blocks for even
263 // very small functions.
264 if callee_body.basic_blocks().len() <= 3 {
265 threshold += threshold / 4;
267 debug!(" final inline threshold = {}", threshold);
269 // FIXME: Give a bonus to functions with only a single caller
271 let param_env = tcx.param_env(self.source.def_id());
273 let mut first_block = true;
276 // Traverse the MIR manually so we can account for the effects of
277 // inlining on the CFG.
278 let mut work_list = vec![START_BLOCK];
279 let mut visited = BitSet::new_empty(callee_body.basic_blocks().len());
280 while let Some(bb) = work_list.pop() {
281 if !visited.insert(bb.index()) {
284 let blk = &callee_body.basic_blocks()[bb];
286 for stmt in &blk.statements {
287 // Don't count StorageLive/StorageDead in the inlining cost.
289 StatementKind::StorageLive(_)
290 | StatementKind::StorageDead(_)
291 | StatementKind::Nop => {}
292 _ => cost += INSTR_COST,
295 let term = blk.terminator();
296 let mut is_drop = false;
298 TerminatorKind::Drop { ref location, target, unwind }
299 | TerminatorKind::DropAndReplace { ref location, target, unwind, .. } => {
301 work_list.push(target);
302 // If the location doesn't actually need dropping, treat it like
304 let ty = location.ty(callee_body, tcx).subst(tcx, callsite.substs).ty;
305 if ty.needs_drop(tcx, param_env) {
306 cost += CALL_PENALTY;
307 if let Some(unwind) = unwind {
308 work_list.push(unwind);
315 TerminatorKind::Unreachable | TerminatorKind::Call { destination: None, .. }
318 // If the function always diverges, don't inline
319 // unless the cost is zero
323 TerminatorKind::Call { func: Operand::Constant(ref f), .. } => {
324 if let ty::FnDef(def_id, _) = f.literal.ty.kind {
325 // Don't give intrinsics the extra penalty for calls
326 let f = tcx.fn_sig(def_id);
327 if f.abi() == Abi::RustIntrinsic || f.abi() == Abi::PlatformIntrinsic {
330 cost += CALL_PENALTY;
334 TerminatorKind::Assert { .. } => cost += CALL_PENALTY,
335 _ => cost += INSTR_COST,
339 for &succ in term.successors() {
340 work_list.push(succ);
347 // Count up the cost of local variables and temps, if we know the size
348 // use that, otherwise we use a moderately-large dummy cost.
350 let ptr_size = tcx.data_layout.pointer_size.bytes();
352 for v in callee_body.vars_and_temps_iter() {
353 let v = &callee_body.local_decls[v];
354 let ty = v.ty.subst(tcx, callsite.substs);
355 // Cost of the var is the size in machine-words, if we know
357 if let Some(size) = type_size_of(tcx, param_env.clone(), ty) {
358 cost += (size / ptr_size) as usize;
360 cost += UNKNOWN_SIZE_COST;
364 if let attr::InlineAttr::Always = codegen_fn_attrs.inline {
365 debug!("INLINING {:?} because inline(always) [cost={}]", callsite, cost);
368 if cost <= threshold {
369 debug!("INLINING {:?} [cost={} <= threshold={}]", callsite, cost, threshold);
372 debug!("NOT inlining {:?} [cost={} > threshold={}]", callsite, cost, threshold);
380 callsite: CallSite<'tcx>,
381 caller_body: &mut BodyAndCache<'tcx>,
382 mut callee_body: BodyAndCache<'tcx>,
384 let terminator = caller_body[callsite.bb].terminator.take().unwrap();
385 match terminator.kind {
386 // FIXME: Handle inlining of diverging calls
387 TerminatorKind::Call { args, destination: Some(destination), cleanup, .. } => {
388 debug!("inlined {:?} into {:?}", callsite.callee, self.source);
390 let mut local_map = IndexVec::with_capacity(callee_body.local_decls.len());
391 let mut scope_map = IndexVec::with_capacity(callee_body.source_scopes.len());
393 for mut scope in callee_body.source_scopes.iter().cloned() {
394 if scope.parent_scope.is_none() {
395 scope.parent_scope = Some(callsite.location.scope);
396 // FIXME(eddyb) is this really needed?
397 // (also note that it's always overwritten below)
398 scope.span = callee_body.span;
401 // FIXME(eddyb) this doesn't seem right at all.
402 // The inlined source scopes should probably be annotated as
403 // such, but also contain all of the original information.
404 scope.span = callsite.location.span;
406 let idx = caller_body.source_scopes.push(scope);
410 for loc in callee_body.vars_and_temps_iter() {
411 let mut local = callee_body.local_decls[loc].clone();
413 local.source_info.scope = scope_map[local.source_info.scope];
414 local.source_info.span = callsite.location.span;
416 let idx = caller_body.local_decls.push(local);
420 // If the call is something like `a[*i] = f(i)`, where
421 // `i : &mut usize`, then just duplicating the `a[*i]`
422 // Place could result in two different locations if `f`
423 // writes to `i`. To prevent this we need to create a temporary
424 // borrow of the place and pass the destination as `*temp` instead.
425 fn dest_needs_borrow(place: &Place<'_>) -> bool {
426 for elem in place.projection.iter() {
428 ProjectionElem::Deref | ProjectionElem::Index(_) => return true,
436 let dest = if dest_needs_borrow(&destination.0) {
437 debug!("creating temp for return destination");
438 let dest = Rvalue::Ref(
439 self.tcx.lifetimes.re_erased,
440 BorrowKind::Mut { allow_two_phase_borrow: false },
444 let ty = dest.ty(&**caller_body, self.tcx);
446 let temp = LocalDecl::new_temp(ty, callsite.location.span);
448 let tmp = caller_body.local_decls.push(temp);
449 let tmp = Place::from(tmp);
451 let stmt = Statement {
452 source_info: callsite.location,
453 kind: StatementKind::Assign(box (tmp.clone(), dest)),
455 caller_body[callsite.bb].statements.push(stmt);
456 self.tcx.mk_place_deref(tmp)
461 let return_block = destination.1;
463 // Copy the arguments if needed.
464 let args: Vec<_> = self.make_call_args(args, &callsite, caller_body);
466 let bb_len = caller_body.basic_blocks().len();
467 let mut integrator = Integrator {
474 cleanup_block: cleanup,
475 in_cleanup_block: false,
479 for mut var_debug_info in callee_body.var_debug_info.drain(..) {
480 integrator.visit_var_debug_info(&mut var_debug_info);
481 caller_body.var_debug_info.push(var_debug_info);
484 for (bb, mut block) in callee_body.basic_blocks_mut().drain_enumerated(..) {
485 integrator.visit_basic_block_data(bb, &mut block);
486 caller_body.basic_blocks_mut().push(block);
489 let terminator = Terminator {
490 source_info: callsite.location,
491 kind: TerminatorKind::Goto { target: BasicBlock::new(bb_len) },
494 caller_body[callsite.bb].terminator = Some(terminator);
499 caller_body[callsite.bb].terminator =
500 Some(Terminator { source_info: terminator.source_info, kind });
508 args: Vec<Operand<'tcx>>,
509 callsite: &CallSite<'tcx>,
510 caller_body: &mut BodyAndCache<'tcx>,
514 // There is a bit of a mismatch between the *caller* of a closure and the *callee*.
515 // The caller provides the arguments wrapped up in a tuple:
517 // tuple_tmp = (a, b, c)
518 // Fn::call(closure_ref, tuple_tmp)
520 // meanwhile the closure body expects the arguments (here, `a`, `b`, and `c`)
521 // as distinct arguments. (This is the "rust-call" ABI hack.) Normally, codegen has
522 // the job of unpacking this tuple. But here, we are codegen. =) So we want to create
525 // [closure_ref, tuple_tmp.0, tuple_tmp.1, tuple_tmp.2]
527 // Except for one tiny wrinkle: we don't actually want `tuple_tmp.0`. It's more convenient
528 // if we "spill" that into *another* temporary, so that we can map the argument
529 // variable in the callee MIR directly to an argument variable on our side.
530 // So we introduce temporaries like:
532 // tmp0 = tuple_tmp.0
533 // tmp1 = tuple_tmp.1
534 // tmp2 = tuple_tmp.2
536 // and the vector is `[closure_ref, tmp0, tmp1, tmp2]`.
537 if tcx.is_closure(callsite.callee) {
538 let mut args = args.into_iter();
539 let self_ = self.create_temp_if_necessary(args.next().unwrap(), callsite, caller_body);
540 let tuple = self.create_temp_if_necessary(args.next().unwrap(), callsite, caller_body);
541 assert!(args.next().is_none());
543 let tuple = Place::from(tuple);
544 let tuple_tys = if let ty::Tuple(s) = tuple.ty(&**caller_body, tcx).ty.kind {
547 bug!("Closure arguments are not passed as a tuple");
550 // The `closure_ref` in our example above.
551 let closure_ref_arg = iter::once(self_);
553 // The `tmp0`, `tmp1`, and `tmp2` in our example abonve.
554 let tuple_tmp_args = tuple_tys.iter().enumerate().map(|(i, ty)| {
555 // This is e.g., `tuple_tmp.0` in our example above.
557 Operand::Move(tcx.mk_place_field(tuple.clone(), Field::new(i), ty.expect_ty()));
559 // Spill to a local to make e.g., `tmp0`.
560 self.create_temp_if_necessary(tuple_field, callsite, caller_body)
563 closure_ref_arg.chain(tuple_tmp_args).collect()
566 .map(|a| self.create_temp_if_necessary(a, callsite, caller_body))
571 /// If `arg` is already a temporary, returns it. Otherwise, introduces a fresh
572 /// temporary `T` and an instruction `T = arg`, and returns `T`.
573 fn create_temp_if_necessary(
576 callsite: &CallSite<'tcx>,
577 caller_body: &mut BodyAndCache<'tcx>,
579 // FIXME: Analysis of the usage of the arguments to avoid
580 // unnecessary temporaries.
582 if let Operand::Move(place) = &arg {
583 if let Some(local) = place.as_local() {
584 if caller_body.local_kind(local) == LocalKind::Temp {
585 // Reuse the operand if it's a temporary already
591 debug!("creating temp for argument {:?}", arg);
592 // Otherwise, create a temporary for the arg
593 let arg = Rvalue::Use(arg);
595 let ty = arg.ty(&**caller_body, self.tcx);
597 let arg_tmp = LocalDecl::new_temp(ty, callsite.location.span);
598 let arg_tmp = caller_body.local_decls.push(arg_tmp);
600 let stmt = Statement {
601 source_info: callsite.location,
602 kind: StatementKind::Assign(box (Place::from(arg_tmp), arg)),
604 caller_body[callsite.bb].statements.push(stmt);
609 fn type_size_of<'tcx>(
611 param_env: ty::ParamEnv<'tcx>,
614 tcx.layout_of(param_env.and(ty)).ok().map(|layout| layout.size.bytes())
620 * Integrates blocks from the callee function into the calling function.
621 * Updates block indices, references to locals and other control flow
624 struct Integrator<'a, 'tcx> {
627 local_map: IndexVec<Local, Local>,
628 scope_map: IndexVec<SourceScope, SourceScope>,
629 destination: Place<'tcx>,
630 return_block: BasicBlock,
631 cleanup_block: Option<BasicBlock>,
632 in_cleanup_block: bool,
636 impl<'a, 'tcx> Integrator<'a, 'tcx> {
637 fn update_target(&self, tgt: BasicBlock) -> BasicBlock {
638 let new = BasicBlock::new(tgt.index() + self.block_idx);
639 debug!("updating target `{:?}`, new: `{:?}`", tgt, new);
643 fn make_integrate_local(&self, local: &Local) -> Local {
644 if *local == RETURN_PLACE {
645 return self.destination.local;
648 let idx = local.index() - 1;
649 if idx < self.args.len() {
650 return self.args[idx];
653 self.local_map[Local::new(idx - self.args.len())]
657 impl<'a, 'tcx> MutVisitor<'tcx> for Integrator<'a, 'tcx> {
658 fn tcx(&self) -> TyCtxt<'tcx> {
662 fn visit_local(&mut self, local: &mut Local, _ctxt: PlaceContext, _location: Location) {
663 *local = self.make_integrate_local(local);
666 fn visit_place(&mut self, place: &mut Place<'tcx>, context: PlaceContext, location: Location) {
667 // If this is the `RETURN_PLACE`, we need to rebase any projections onto it.
668 let dest_proj_len = self.destination.projection.len();
669 if place.local == RETURN_PLACE && dest_proj_len > 0 {
670 let mut projs = Vec::with_capacity(dest_proj_len + place.projection.len());
671 projs.extend(self.destination.projection);
672 projs.extend(place.projection);
674 place.projection = self.tcx.intern_place_elems(&*projs);
676 // Handles integrating any locals that occur in the base
678 self.super_place(place, context, location)
681 fn process_projection_elem(&mut self, elem: &PlaceElem<'tcx>) -> Option<PlaceElem<'tcx>> {
682 if let PlaceElem::Index(local) = elem {
683 let new_local = self.make_integrate_local(local);
685 if new_local != *local {
686 return Some(PlaceElem::Index(new_local));
693 fn visit_basic_block_data(&mut self, block: BasicBlock, data: &mut BasicBlockData<'tcx>) {
694 self.in_cleanup_block = data.is_cleanup;
695 self.super_basic_block_data(block, data);
696 self.in_cleanup_block = false;
699 fn visit_retag(&mut self, kind: &mut RetagKind, place: &mut Place<'tcx>, loc: Location) {
700 self.super_retag(kind, place, loc);
702 // We have to patch all inlined retags to be aware that they are no longer
703 // happening on function entry.
704 if *kind == RetagKind::FnEntry {
705 *kind = RetagKind::Default;
709 fn visit_terminator_kind(&mut self, kind: &mut TerminatorKind<'tcx>, loc: Location) {
710 self.super_terminator_kind(kind, loc);
713 TerminatorKind::GeneratorDrop | TerminatorKind::Yield { .. } => bug!(),
714 TerminatorKind::Goto { ref mut target } => {
715 *target = self.update_target(*target);
717 TerminatorKind::SwitchInt { ref mut targets, .. } => {
719 *tgt = self.update_target(*tgt);
722 TerminatorKind::Drop { ref mut target, ref mut unwind, .. }
723 | TerminatorKind::DropAndReplace { ref mut target, ref mut unwind, .. } => {
724 *target = self.update_target(*target);
725 if let Some(tgt) = *unwind {
726 *unwind = Some(self.update_target(tgt));
727 } else if !self.in_cleanup_block {
728 // Unless this drop is in a cleanup block, add an unwind edge to
729 // the original call's cleanup block
730 *unwind = self.cleanup_block;
733 TerminatorKind::Call { ref mut destination, ref mut cleanup, .. } => {
734 if let Some((_, ref mut tgt)) = *destination {
735 *tgt = self.update_target(*tgt);
737 if let Some(tgt) = *cleanup {
738 *cleanup = Some(self.update_target(tgt));
739 } else if !self.in_cleanup_block {
740 // Unless this call is in a cleanup block, add an unwind edge to
741 // the original call's cleanup block
742 *cleanup = self.cleanup_block;
745 TerminatorKind::Assert { ref mut target, ref mut cleanup, .. } => {
746 *target = self.update_target(*target);
747 if let Some(tgt) = *cleanup {
748 *cleanup = Some(self.update_target(tgt));
749 } else if !self.in_cleanup_block {
750 // Unless this assert is in a cleanup block, add an unwind edge to
751 // the original call's cleanup block
752 *cleanup = self.cleanup_block;
755 TerminatorKind::Return => {
756 *kind = TerminatorKind::Goto { target: self.return_block };
758 TerminatorKind::Resume => {
759 if let Some(tgt) = self.cleanup_block {
760 *kind = TerminatorKind::Goto { target: tgt }
763 TerminatorKind::Abort => {}
764 TerminatorKind::Unreachable => {}
765 TerminatorKind::FalseEdges { ref mut real_target, ref mut imaginary_target } => {
766 *real_target = self.update_target(*real_target);
767 *imaginary_target = self.update_target(*imaginary_target);
769 TerminatorKind::FalseUnwind { real_target: _, unwind: _ } =>
770 // see the ordering of passes in the optimized_mir query.
772 bug!("False unwinds should have been removed before inlining")
777 fn visit_source_scope(&mut self, scope: &mut SourceScope) {
778 *scope = self.scope_map[*scope];