]> git.lizzy.rs Git - rust.git/blob - compiler/rustc_mir_transform/src/inline.rs
Rollup merge of #106234 - notriddle:notriddle/button-width, r=GuillaumeGomez
[rust.git] / compiler / rustc_mir_transform / src / inline.rs
1 //! Inlining pass for MIR functions
2 use crate::deref_separator::deref_finder;
3 use rustc_attr::InlineAttr;
4 use rustc_index::bit_set::BitSet;
5 use rustc_index::vec::Idx;
6 use rustc_middle::middle::codegen_fn_attrs::{CodegenFnAttrFlags, CodegenFnAttrs};
7 use rustc_middle::mir::visit::*;
8 use rustc_middle::mir::*;
9 use rustc_middle::ty::{self, Instance, InstanceDef, ParamEnv, Ty, TyCtxt};
10 use rustc_session::config::OptLevel;
11 use rustc_span::{hygiene::ExpnKind, ExpnData, LocalExpnId, Span};
12 use rustc_target::abi::VariantIdx;
13 use rustc_target::spec::abi::Abi;
14
15 use crate::simplify::{remove_dead_blocks, CfgSimplifier};
16 use crate::util;
17 use crate::MirPass;
18 use std::iter;
19 use std::ops::{Range, RangeFrom};
20
21 pub(crate) mod cycle;
22
23 const INSTR_COST: usize = 5;
24 const CALL_PENALTY: usize = 25;
25 const LANDINGPAD_PENALTY: usize = 50;
26 const RESUME_PENALTY: usize = 45;
27
28 const UNKNOWN_SIZE_COST: usize = 10;
29
30 pub struct Inline;
31
32 #[derive(Copy, Clone, Debug)]
33 struct CallSite<'tcx> {
34     callee: Instance<'tcx>,
35     fn_sig: ty::PolyFnSig<'tcx>,
36     block: BasicBlock,
37     target: Option<BasicBlock>,
38     source_info: SourceInfo,
39 }
40
41 impl<'tcx> MirPass<'tcx> for Inline {
42     fn is_enabled(&self, sess: &rustc_session::Session) -> bool {
43         if let Some(enabled) = sess.opts.unstable_opts.inline_mir {
44             return enabled;
45         }
46
47         match sess.mir_opt_level() {
48             0 | 1 => false,
49             2 => {
50                 (sess.opts.optimize == OptLevel::Default
51                     || sess.opts.optimize == OptLevel::Aggressive)
52                     && sess.opts.incremental == None
53             }
54             _ => true,
55         }
56     }
57
58     fn run_pass(&self, tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) {
59         let span = trace_span!("inline", body = %tcx.def_path_str(body.source.def_id()));
60         let _guard = span.enter();
61         if inline(tcx, body) {
62             debug!("running simplify cfg on {:?}", body.source);
63             CfgSimplifier::new(body).simplify();
64             remove_dead_blocks(tcx, body);
65             deref_finder(tcx, body);
66         }
67     }
68 }
69
70 fn inline<'tcx>(tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) -> bool {
71     let def_id = body.source.def_id().expect_local();
72
73     // Only do inlining into fn bodies.
74     if !tcx.hir().body_owner_kind(def_id).is_fn_or_closure() {
75         return false;
76     }
77     if body.source.promoted.is_some() {
78         return false;
79     }
80     // Avoid inlining into generators, since their `optimized_mir` is used for layout computation,
81     // which can create a cycle, even when no attempt is made to inline the function in the other
82     // direction.
83     if body.generator.is_some() {
84         return false;
85     }
86
87     let param_env = tcx.param_env_reveal_all_normalized(def_id);
88
89     let mut this =
90         Inliner { tcx, param_env, codegen_fn_attrs: tcx.codegen_fn_attrs(def_id), changed: false };
91     let blocks = BasicBlock::new(0)..body.basic_blocks.next_index();
92     this.process_blocks(body, blocks);
93     this.changed
94 }
95
96 struct Inliner<'tcx> {
97     tcx: TyCtxt<'tcx>,
98     param_env: ParamEnv<'tcx>,
99     /// Caller codegen attributes.
100     codegen_fn_attrs: &'tcx CodegenFnAttrs,
101     /// Indicates that the caller body has been modified.
102     changed: bool,
103 }
104
105 impl<'tcx> Inliner<'tcx> {
106     fn process_blocks(&mut self, caller_body: &mut Body<'tcx>, blocks: Range<BasicBlock>) {
107         for bb in blocks {
108             let bb_data = &caller_body[bb];
109             if bb_data.is_cleanup {
110                 continue;
111             }
112
113             let Some(callsite) = self.resolve_callsite(caller_body, bb, bb_data) else {
114                 continue;
115             };
116
117             let span = trace_span!("process_blocks", %callsite.callee, ?bb);
118             let _guard = span.enter();
119
120             match self.try_inlining(caller_body, &callsite) {
121                 Err(reason) => {
122                     debug!("not-inlined {} [{}]", callsite.callee, reason);
123                     continue;
124                 }
125                 Ok(_) => {
126                     debug!("inlined {}", callsite.callee);
127                     self.changed = true;
128                     // We could process the blocks returned by `try_inlining` here. However, that
129                     // leads to exponential compile times due to the top-down nature of this kind
130                     // of inlining.
131                 }
132             }
133         }
134     }
135
136     /// Attempts to inline a callsite into the caller body. When successful returns basic blocks
137     /// containing the inlined body. Otherwise returns an error describing why inlining didn't take
138     /// place.
139     fn try_inlining(
140         &self,
141         caller_body: &mut Body<'tcx>,
142         callsite: &CallSite<'tcx>,
143     ) -> Result<std::ops::Range<BasicBlock>, &'static str> {
144         let callee_attrs = self.tcx.codegen_fn_attrs(callsite.callee.def_id());
145         self.check_codegen_attributes(callsite, callee_attrs)?;
146         self.check_mir_is_available(caller_body, &callsite.callee)?;
147         let callee_body = self.tcx.instance_mir(callsite.callee.def);
148         self.check_mir_body(callsite, callee_body, callee_attrs)?;
149
150         if !self.tcx.consider_optimizing(|| {
151             format!("Inline {:?} into {:?}", callsite.callee, caller_body.source)
152         }) {
153             return Err("optimization fuel exhausted");
154         }
155
156         let Ok(callee_body) = callsite.callee.try_subst_mir_and_normalize_erasing_regions(
157             self.tcx,
158             self.param_env,
159             callee_body.clone(),
160         ) else {
161             return Err("failed to normalize callee body");
162         };
163
164         // Check call signature compatibility.
165         // Normally, this shouldn't be required, but trait normalization failure can create a
166         // validation ICE.
167         let terminator = caller_body[callsite.block].terminator.as_ref().unwrap();
168         let TerminatorKind::Call { args, destination, .. } = &terminator.kind else { bug!() };
169         let destination_ty = destination.ty(&caller_body.local_decls, self.tcx).ty;
170         let output_type = callee_body.return_ty();
171         if !util::is_subtype(self.tcx, self.param_env, output_type, destination_ty) {
172             trace!(?output_type, ?destination_ty);
173             return Err("failed to normalize return type");
174         }
175         if callsite.fn_sig.abi() == Abi::RustCall {
176             let (arg_tuple, skipped_args) = match &args[..] {
177                 [arg_tuple] => (arg_tuple, 0),
178                 [_, arg_tuple] => (arg_tuple, 1),
179                 _ => bug!("Expected `rust-call` to have 1 or 2 args"),
180             };
181
182             let arg_tuple_ty = arg_tuple.ty(&caller_body.local_decls, self.tcx);
183             let ty::Tuple(arg_tuple_tys) = arg_tuple_ty.kind() else {
184                 bug!("Closure arguments are not passed as a tuple");
185             };
186
187             for (arg_ty, input) in
188                 arg_tuple_tys.iter().zip(callee_body.args_iter().skip(skipped_args))
189             {
190                 let input_type = callee_body.local_decls[input].ty;
191                 if !util::is_subtype(self.tcx, self.param_env, input_type, arg_ty) {
192                     trace!(?arg_ty, ?input_type);
193                     return Err("failed to normalize tuple argument type");
194                 }
195             }
196         } else {
197             for (arg, input) in args.iter().zip(callee_body.args_iter()) {
198                 let input_type = callee_body.local_decls[input].ty;
199                 let arg_ty = arg.ty(&caller_body.local_decls, self.tcx);
200                 if !util::is_subtype(self.tcx, self.param_env, input_type, arg_ty) {
201                     trace!(?arg_ty, ?input_type);
202                     return Err("failed to normalize argument type");
203                 }
204             }
205         }
206
207         let old_blocks = caller_body.basic_blocks.next_index();
208         self.inline_call(caller_body, &callsite, callee_body);
209         let new_blocks = old_blocks..caller_body.basic_blocks.next_index();
210
211         Ok(new_blocks)
212     }
213
214     fn check_mir_is_available(
215         &self,
216         caller_body: &Body<'tcx>,
217         callee: &Instance<'tcx>,
218     ) -> Result<(), &'static str> {
219         let caller_def_id = caller_body.source.def_id();
220         let callee_def_id = callee.def_id();
221         if callee_def_id == caller_def_id {
222             return Err("self-recursion");
223         }
224
225         match callee.def {
226             InstanceDef::Item(_) => {
227                 // If there is no MIR available (either because it was not in metadata or
228                 // because it has no MIR because it's an extern function), then the inliner
229                 // won't cause cycles on this.
230                 if !self.tcx.is_mir_available(callee_def_id) {
231                     return Err("item MIR unavailable");
232                 }
233             }
234             // These have no own callable MIR.
235             InstanceDef::Intrinsic(_) | InstanceDef::Virtual(..) => {
236                 return Err("instance without MIR (intrinsic / virtual)");
237             }
238             // This cannot result in an immediate cycle since the callee MIR is a shim, which does
239             // not get any optimizations run on it. Any subsequent inlining may cause cycles, but we
240             // do not need to catch this here, we can wait until the inliner decides to continue
241             // inlining a second time.
242             InstanceDef::VTableShim(_)
243             | InstanceDef::ReifyShim(_)
244             | InstanceDef::FnPtrShim(..)
245             | InstanceDef::ClosureOnceShim { .. }
246             | InstanceDef::DropGlue(..)
247             | InstanceDef::CloneShim(..) => return Ok(()),
248         }
249
250         if self.tcx.is_constructor(callee_def_id) {
251             trace!("constructors always have MIR");
252             // Constructor functions cannot cause a query cycle.
253             return Ok(());
254         }
255
256         if callee_def_id.is_local() {
257             // Avoid a cycle here by only using `instance_mir` only if we have
258             // a lower `DefPathHash` than the callee. This ensures that the callee will
259             // not inline us. This trick even works with incremental compilation,
260             // since `DefPathHash` is stable.
261             if self.tcx.def_path_hash(caller_def_id).local_hash()
262                 < self.tcx.def_path_hash(callee_def_id).local_hash()
263             {
264                 return Ok(());
265             }
266
267             // If we know for sure that the function we're calling will itself try to
268             // call us, then we avoid inlining that function.
269             if self.tcx.mir_callgraph_reachable((*callee, caller_def_id.expect_local())) {
270                 return Err("caller might be reachable from callee (query cycle avoidance)");
271             }
272
273             Ok(())
274         } else {
275             // This cannot result in an immediate cycle since the callee MIR is from another crate
276             // and is already optimized. Any subsequent inlining may cause cycles, but we do
277             // not need to catch this here, we can wait until the inliner decides to continue
278             // inlining a second time.
279             trace!("functions from other crates always have MIR");
280             Ok(())
281         }
282     }
283
284     fn resolve_callsite(
285         &self,
286         caller_body: &Body<'tcx>,
287         bb: BasicBlock,
288         bb_data: &BasicBlockData<'tcx>,
289     ) -> Option<CallSite<'tcx>> {
290         // Only consider direct calls to functions
291         let terminator = bb_data.terminator();
292         if let TerminatorKind::Call { ref func, target, fn_span, .. } = terminator.kind {
293             let func_ty = func.ty(caller_body, self.tcx);
294             if let ty::FnDef(def_id, substs) = *func_ty.kind() {
295                 // To resolve an instance its substs have to be fully normalized.
296                 let substs = self.tcx.try_normalize_erasing_regions(self.param_env, substs).ok()?;
297                 let callee =
298                     Instance::resolve(self.tcx, self.param_env, def_id, substs).ok().flatten()?;
299
300                 if let InstanceDef::Virtual(..) | InstanceDef::Intrinsic(_) = callee.def {
301                     return None;
302                 }
303
304                 let fn_sig = self.tcx.bound_fn_sig(def_id).subst(self.tcx, substs);
305                 let source_info = SourceInfo { span: fn_span, ..terminator.source_info };
306
307                 return Some(CallSite { callee, fn_sig, block: bb, target, source_info });
308             }
309         }
310
311         None
312     }
313
314     /// Returns an error if inlining is not possible based on codegen attributes alone. A success
315     /// indicates that inlining decision should be based on other criteria.
316     fn check_codegen_attributes(
317         &self,
318         callsite: &CallSite<'tcx>,
319         callee_attrs: &CodegenFnAttrs,
320     ) -> Result<(), &'static str> {
321         match callee_attrs.inline {
322             InlineAttr::Never => return Err("never inline hint"),
323             InlineAttr::Always | InlineAttr::Hint => {}
324             InlineAttr::None => {
325                 if self.tcx.sess.mir_opt_level() <= 2 {
326                     return Err("at mir-opt-level=2, only #[inline] is inlined");
327                 }
328             }
329         }
330
331         // Only inline local functions if they would be eligible for cross-crate
332         // inlining. This is to ensure that the final crate doesn't have MIR that
333         // reference unexported symbols
334         if callsite.callee.def_id().is_local() {
335             let is_generic = callsite.callee.substs.non_erasable_generics().next().is_some();
336             if !is_generic && !callee_attrs.requests_inline() {
337                 return Err("not exported");
338             }
339         }
340
341         if callsite.fn_sig.c_variadic() {
342             return Err("C variadic");
343         }
344
345         if callee_attrs.flags.contains(CodegenFnAttrFlags::COLD) {
346             return Err("cold");
347         }
348
349         if callee_attrs.no_sanitize != self.codegen_fn_attrs.no_sanitize {
350             return Err("incompatible sanitizer set");
351         }
352
353         // Two functions are compatible if the callee has no attribute (meaning
354         // that it's codegen agnostic), or sets an attribute that is identical
355         // to this function's attribute.
356         if callee_attrs.instruction_set.is_some()
357             && callee_attrs.instruction_set != self.codegen_fn_attrs.instruction_set
358         {
359             return Err("incompatible instruction set");
360         }
361
362         for feature in &callee_attrs.target_features {
363             if !self.codegen_fn_attrs.target_features.contains(feature) {
364                 return Err("incompatible target feature");
365             }
366         }
367
368         Ok(())
369     }
370
371     /// Returns inlining decision that is based on the examination of callee MIR body.
372     /// Assumes that codegen attributes have been checked for compatibility already.
373     #[instrument(level = "debug", skip(self, callee_body))]
374     fn check_mir_body(
375         &self,
376         callsite: &CallSite<'tcx>,
377         callee_body: &Body<'tcx>,
378         callee_attrs: &CodegenFnAttrs,
379     ) -> Result<(), &'static str> {
380         let tcx = self.tcx;
381
382         let mut threshold = if callee_attrs.requests_inline() {
383             self.tcx.sess.opts.unstable_opts.inline_mir_hint_threshold.unwrap_or(100)
384         } else {
385             self.tcx.sess.opts.unstable_opts.inline_mir_threshold.unwrap_or(50)
386         };
387
388         // Give a bonus functions with a small number of blocks,
389         // We normally have two or three blocks for even
390         // very small functions.
391         if callee_body.basic_blocks.len() <= 3 {
392             threshold += threshold / 4;
393         }
394         debug!("    final inline threshold = {}", threshold);
395
396         // FIXME: Give a bonus to functions with only a single caller
397         let diverges = matches!(
398             callee_body.basic_blocks[START_BLOCK].terminator().kind,
399             TerminatorKind::Unreachable | TerminatorKind::Call { target: None, .. }
400         );
401         if diverges && !matches!(callee_attrs.inline, InlineAttr::Always) {
402             return Err("callee diverges unconditionally");
403         }
404
405         let mut checker = CostChecker {
406             tcx: self.tcx,
407             param_env: self.param_env,
408             instance: callsite.callee,
409             callee_body,
410             cost: 0,
411             validation: Ok(()),
412         };
413
414         // Traverse the MIR manually so we can account for the effects of inlining on the CFG.
415         let mut work_list = vec![START_BLOCK];
416         let mut visited = BitSet::new_empty(callee_body.basic_blocks.len());
417         while let Some(bb) = work_list.pop() {
418             if !visited.insert(bb.index()) {
419                 continue;
420             }
421
422             let blk = &callee_body.basic_blocks[bb];
423             checker.visit_basic_block_data(bb, blk);
424
425             let term = blk.terminator();
426             if let TerminatorKind::Drop { ref place, target, unwind }
427             | TerminatorKind::DropAndReplace { ref place, target, unwind, .. } = term.kind
428             {
429                 work_list.push(target);
430
431                 // If the place doesn't actually need dropping, treat it like a regular goto.
432                 let ty = callsite.callee.subst_mir(self.tcx, &place.ty(callee_body, tcx).ty);
433                 if ty.needs_drop(tcx, self.param_env) && let Some(unwind) = unwind {
434                         work_list.push(unwind);
435                     }
436             } else if callee_attrs.instruction_set != self.codegen_fn_attrs.instruction_set
437                 && matches!(term.kind, TerminatorKind::InlineAsm { .. })
438             {
439                 // During the attribute checking stage we allow a callee with no
440                 // instruction_set assigned to count as compatible with a function that does
441                 // assign one. However, during this stage we require an exact match when any
442                 // inline-asm is detected. LLVM will still possibly do an inline later on
443                 // if the no-attribute function ends up with the same instruction set anyway.
444                 return Err("Cannot move inline-asm across instruction sets");
445             } else {
446                 work_list.extend(term.successors())
447             }
448         }
449
450         // Count up the cost of local variables and temps, if we know the size
451         // use that, otherwise we use a moderately-large dummy cost.
452         for v in callee_body.vars_and_temps_iter() {
453             checker.visit_local_decl(v, &callee_body.local_decls[v]);
454         }
455
456         // Abort if type validation found anything fishy.
457         checker.validation?;
458
459         let cost = checker.cost;
460         if let InlineAttr::Always = callee_attrs.inline {
461             debug!("INLINING {:?} because inline(always) [cost={}]", callsite, cost);
462             Ok(())
463         } else if cost <= threshold {
464             debug!("INLINING {:?} [cost={} <= threshold={}]", callsite, cost, threshold);
465             Ok(())
466         } else {
467             debug!("NOT inlining {:?} [cost={} > threshold={}]", callsite, cost, threshold);
468             Err("cost above threshold")
469         }
470     }
471
472     fn inline_call(
473         &self,
474         caller_body: &mut Body<'tcx>,
475         callsite: &CallSite<'tcx>,
476         mut callee_body: Body<'tcx>,
477     ) {
478         let terminator = caller_body[callsite.block].terminator.take().unwrap();
479         match terminator.kind {
480             TerminatorKind::Call { args, destination, cleanup, .. } => {
481                 // If the call is something like `a[*i] = f(i)`, where
482                 // `i : &mut usize`, then just duplicating the `a[*i]`
483                 // Place could result in two different locations if `f`
484                 // writes to `i`. To prevent this we need to create a temporary
485                 // borrow of the place and pass the destination as `*temp` instead.
486                 fn dest_needs_borrow(place: Place<'_>) -> bool {
487                     for elem in place.projection.iter() {
488                         match elem {
489                             ProjectionElem::Deref | ProjectionElem::Index(_) => return true,
490                             _ => {}
491                         }
492                     }
493
494                     false
495                 }
496
497                 let dest = if dest_needs_borrow(destination) {
498                     trace!("creating temp for return destination");
499                     let dest = Rvalue::Ref(
500                         self.tcx.lifetimes.re_erased,
501                         BorrowKind::Mut { allow_two_phase_borrow: false },
502                         destination,
503                     );
504                     let dest_ty = dest.ty(caller_body, self.tcx);
505                     let temp = Place::from(self.new_call_temp(caller_body, &callsite, dest_ty));
506                     caller_body[callsite.block].statements.push(Statement {
507                         source_info: callsite.source_info,
508                         kind: StatementKind::Assign(Box::new((temp, dest))),
509                     });
510                     self.tcx.mk_place_deref(temp)
511                 } else {
512                     destination
513                 };
514
515                 // Copy the arguments if needed.
516                 let args: Vec<_> = self.make_call_args(args, &callsite, caller_body, &callee_body);
517
518                 let mut expn_data = ExpnData::default(
519                     ExpnKind::Inlined,
520                     callsite.source_info.span,
521                     self.tcx.sess.edition(),
522                     None,
523                     None,
524                 );
525                 expn_data.def_site = callee_body.span;
526                 let expn_data =
527                     self.tcx.with_stable_hashing_context(|hcx| LocalExpnId::fresh(expn_data, hcx));
528                 let mut integrator = Integrator {
529                     args: &args,
530                     new_locals: Local::new(caller_body.local_decls.len())..,
531                     new_scopes: SourceScope::new(caller_body.source_scopes.len())..,
532                     new_blocks: BasicBlock::new(caller_body.basic_blocks.len())..,
533                     destination: dest,
534                     callsite_scope: caller_body.source_scopes[callsite.source_info.scope].clone(),
535                     callsite,
536                     cleanup_block: cleanup,
537                     in_cleanup_block: false,
538                     tcx: self.tcx,
539                     expn_data,
540                     always_live_locals: BitSet::new_filled(callee_body.local_decls.len()),
541                 };
542
543                 // Map all `Local`s, `SourceScope`s and `BasicBlock`s to new ones
544                 // (or existing ones, in a few special cases) in the caller.
545                 integrator.visit_body(&mut callee_body);
546
547                 // If there are any locals without storage markers, give them storage only for the
548                 // duration of the call.
549                 for local in callee_body.vars_and_temps_iter() {
550                     if !callee_body.local_decls[local].internal
551                         && integrator.always_live_locals.contains(local)
552                     {
553                         let new_local = integrator.map_local(local);
554                         caller_body[callsite.block].statements.push(Statement {
555                             source_info: callsite.source_info,
556                             kind: StatementKind::StorageLive(new_local),
557                         });
558                     }
559                 }
560                 if let Some(block) = callsite.target {
561                     // To avoid repeated O(n) insert, push any new statements to the end and rotate
562                     // the slice once.
563                     let mut n = 0;
564                     for local in callee_body.vars_and_temps_iter().rev() {
565                         if !callee_body.local_decls[local].internal
566                             && integrator.always_live_locals.contains(local)
567                         {
568                             let new_local = integrator.map_local(local);
569                             caller_body[block].statements.push(Statement {
570                                 source_info: callsite.source_info,
571                                 kind: StatementKind::StorageDead(new_local),
572                             });
573                             n += 1;
574                         }
575                     }
576                     caller_body[block].statements.rotate_right(n);
577                 }
578
579                 // Insert all of the (mapped) parts of the callee body into the caller.
580                 caller_body.local_decls.extend(callee_body.drain_vars_and_temps());
581                 caller_body.source_scopes.extend(&mut callee_body.source_scopes.drain(..));
582                 caller_body.var_debug_info.append(&mut callee_body.var_debug_info);
583                 caller_body.basic_blocks_mut().extend(callee_body.basic_blocks_mut().drain(..));
584
585                 caller_body[callsite.block].terminator = Some(Terminator {
586                     source_info: callsite.source_info,
587                     kind: TerminatorKind::Goto { target: integrator.map_block(START_BLOCK) },
588                 });
589
590                 // Copy only unevaluated constants from the callee_body into the caller_body.
591                 // Although we are only pushing `ConstKind::Unevaluated` consts to
592                 // `required_consts`, here we may not only have `ConstKind::Unevaluated`
593                 // because we are calling `subst_and_normalize_erasing_regions`.
594                 caller_body.required_consts.extend(
595                     callee_body.required_consts.iter().copied().filter(|&ct| match ct.literal {
596                         ConstantKind::Ty(_) => {
597                             bug!("should never encounter ty::UnevaluatedConst in `required_consts`")
598                         }
599                         ConstantKind::Val(..) | ConstantKind::Unevaluated(..) => true,
600                     }),
601                 );
602             }
603             kind => bug!("unexpected terminator kind {:?}", kind),
604         }
605     }
606
607     fn make_call_args(
608         &self,
609         args: Vec<Operand<'tcx>>,
610         callsite: &CallSite<'tcx>,
611         caller_body: &mut Body<'tcx>,
612         callee_body: &Body<'tcx>,
613     ) -> Vec<Local> {
614         let tcx = self.tcx;
615
616         // There is a bit of a mismatch between the *caller* of a closure and the *callee*.
617         // The caller provides the arguments wrapped up in a tuple:
618         //
619         //     tuple_tmp = (a, b, c)
620         //     Fn::call(closure_ref, tuple_tmp)
621         //
622         // meanwhile the closure body expects the arguments (here, `a`, `b`, and `c`)
623         // as distinct arguments. (This is the "rust-call" ABI hack.) Normally, codegen has
624         // the job of unpacking this tuple. But here, we are codegen. =) So we want to create
625         // a vector like
626         //
627         //     [closure_ref, tuple_tmp.0, tuple_tmp.1, tuple_tmp.2]
628         //
629         // Except for one tiny wrinkle: we don't actually want `tuple_tmp.0`. It's more convenient
630         // if we "spill" that into *another* temporary, so that we can map the argument
631         // variable in the callee MIR directly to an argument variable on our side.
632         // So we introduce temporaries like:
633         //
634         //     tmp0 = tuple_tmp.0
635         //     tmp1 = tuple_tmp.1
636         //     tmp2 = tuple_tmp.2
637         //
638         // and the vector is `[closure_ref, tmp0, tmp1, tmp2]`.
639         if callsite.fn_sig.abi() == Abi::RustCall && callee_body.spread_arg.is_none() {
640             let mut args = args.into_iter();
641             let self_ = self.create_temp_if_necessary(args.next().unwrap(), callsite, caller_body);
642             let tuple = self.create_temp_if_necessary(args.next().unwrap(), callsite, caller_body);
643             assert!(args.next().is_none());
644
645             let tuple = Place::from(tuple);
646             let ty::Tuple(tuple_tys) = tuple.ty(caller_body, tcx).ty.kind() else {
647                 bug!("Closure arguments are not passed as a tuple");
648             };
649
650             // The `closure_ref` in our example above.
651             let closure_ref_arg = iter::once(self_);
652
653             // The `tmp0`, `tmp1`, and `tmp2` in our example above.
654             let tuple_tmp_args = tuple_tys.iter().enumerate().map(|(i, ty)| {
655                 // This is e.g., `tuple_tmp.0` in our example above.
656                 let tuple_field = Operand::Move(tcx.mk_place_field(tuple, Field::new(i), ty));
657
658                 // Spill to a local to make e.g., `tmp0`.
659                 self.create_temp_if_necessary(tuple_field, callsite, caller_body)
660             });
661
662             closure_ref_arg.chain(tuple_tmp_args).collect()
663         } else {
664             args.into_iter()
665                 .map(|a| self.create_temp_if_necessary(a, callsite, caller_body))
666                 .collect()
667         }
668     }
669
670     /// If `arg` is already a temporary, returns it. Otherwise, introduces a fresh
671     /// temporary `T` and an instruction `T = arg`, and returns `T`.
672     fn create_temp_if_necessary(
673         &self,
674         arg: Operand<'tcx>,
675         callsite: &CallSite<'tcx>,
676         caller_body: &mut Body<'tcx>,
677     ) -> Local {
678         // Reuse the operand if it is a moved temporary.
679         if let Operand::Move(place) = &arg
680             && let Some(local) = place.as_local()
681             && caller_body.local_kind(local) == LocalKind::Temp
682         {
683             return local;
684         }
685
686         // Otherwise, create a temporary for the argument.
687         trace!("creating temp for argument {:?}", arg);
688         let arg_ty = arg.ty(caller_body, self.tcx);
689         let local = self.new_call_temp(caller_body, callsite, arg_ty);
690         caller_body[callsite.block].statements.push(Statement {
691             source_info: callsite.source_info,
692             kind: StatementKind::Assign(Box::new((Place::from(local), Rvalue::Use(arg)))),
693         });
694         local
695     }
696
697     /// Introduces a new temporary into the caller body that is live for the duration of the call.
698     fn new_call_temp(
699         &self,
700         caller_body: &mut Body<'tcx>,
701         callsite: &CallSite<'tcx>,
702         ty: Ty<'tcx>,
703     ) -> Local {
704         let local = caller_body.local_decls.push(LocalDecl::new(ty, callsite.source_info.span));
705
706         caller_body[callsite.block].statements.push(Statement {
707             source_info: callsite.source_info,
708             kind: StatementKind::StorageLive(local),
709         });
710
711         if let Some(block) = callsite.target {
712             caller_body[block].statements.insert(
713                 0,
714                 Statement {
715                     source_info: callsite.source_info,
716                     kind: StatementKind::StorageDead(local),
717                 },
718             );
719         }
720
721         local
722     }
723 }
724
725 fn type_size_of<'tcx>(
726     tcx: TyCtxt<'tcx>,
727     param_env: ty::ParamEnv<'tcx>,
728     ty: Ty<'tcx>,
729 ) -> Option<u64> {
730     tcx.layout_of(param_env.and(ty)).ok().map(|layout| layout.size.bytes())
731 }
732
733 /// Verify that the callee body is compatible with the caller.
734 ///
735 /// This visitor mostly computes the inlining cost,
736 /// but also needs to verify that types match because of normalization failure.
737 struct CostChecker<'b, 'tcx> {
738     tcx: TyCtxt<'tcx>,
739     param_env: ParamEnv<'tcx>,
740     cost: usize,
741     callee_body: &'b Body<'tcx>,
742     instance: ty::Instance<'tcx>,
743     validation: Result<(), &'static str>,
744 }
745
746 impl<'tcx> Visitor<'tcx> for CostChecker<'_, 'tcx> {
747     fn visit_statement(&mut self, statement: &Statement<'tcx>, location: Location) {
748         // Don't count StorageLive/StorageDead in the inlining cost.
749         match statement.kind {
750             StatementKind::StorageLive(_)
751             | StatementKind::StorageDead(_)
752             | StatementKind::Deinit(_)
753             | StatementKind::Nop => {}
754             _ => self.cost += INSTR_COST,
755         }
756
757         self.super_statement(statement, location);
758     }
759
760     fn visit_terminator(&mut self, terminator: &Terminator<'tcx>, location: Location) {
761         let tcx = self.tcx;
762         match terminator.kind {
763             TerminatorKind::Drop { ref place, unwind, .. }
764             | TerminatorKind::DropAndReplace { ref place, unwind, .. } => {
765                 // If the place doesn't actually need dropping, treat it like a regular goto.
766                 let ty = self.instance.subst_mir(tcx, &place.ty(self.callee_body, tcx).ty);
767                 if ty.needs_drop(tcx, self.param_env) {
768                     self.cost += CALL_PENALTY;
769                     if unwind.is_some() {
770                         self.cost += LANDINGPAD_PENALTY;
771                     }
772                 } else {
773                     self.cost += INSTR_COST;
774                 }
775             }
776             TerminatorKind::Call { func: Operand::Constant(ref f), cleanup, .. } => {
777                 let fn_ty = self.instance.subst_mir(tcx, &f.literal.ty());
778                 self.cost += if let ty::FnDef(def_id, _) = *fn_ty.kind() && tcx.is_intrinsic(def_id) {
779                     // Don't give intrinsics the extra penalty for calls
780                     INSTR_COST
781                 } else {
782                     CALL_PENALTY
783                 };
784                 if cleanup.is_some() {
785                     self.cost += LANDINGPAD_PENALTY;
786                 }
787             }
788             TerminatorKind::Assert { cleanup, .. } => {
789                 self.cost += CALL_PENALTY;
790                 if cleanup.is_some() {
791                     self.cost += LANDINGPAD_PENALTY;
792                 }
793             }
794             TerminatorKind::Resume => self.cost += RESUME_PENALTY,
795             TerminatorKind::InlineAsm { cleanup, .. } => {
796                 self.cost += INSTR_COST;
797                 if cleanup.is_some() {
798                     self.cost += LANDINGPAD_PENALTY;
799                 }
800             }
801             _ => self.cost += INSTR_COST,
802         }
803
804         self.super_terminator(terminator, location);
805     }
806
807     /// Count up the cost of local variables and temps, if we know the size
808     /// use that, otherwise we use a moderately-large dummy cost.
809     fn visit_local_decl(&mut self, local: Local, local_decl: &LocalDecl<'tcx>) {
810         let tcx = self.tcx;
811         let ptr_size = tcx.data_layout.pointer_size.bytes();
812
813         let ty = self.instance.subst_mir(tcx, &local_decl.ty);
814         // Cost of the var is the size in machine-words, if we know
815         // it.
816         if let Some(size) = type_size_of(tcx, self.param_env, ty) {
817             self.cost += ((size + ptr_size - 1) / ptr_size) as usize;
818         } else {
819             self.cost += UNKNOWN_SIZE_COST;
820         }
821
822         self.super_local_decl(local, local_decl)
823     }
824
825     /// This method duplicates code from MIR validation in an attempt to detect type mismatches due
826     /// to normalization failure.
827     fn visit_projection_elem(
828         &mut self,
829         local: Local,
830         proj_base: &[PlaceElem<'tcx>],
831         elem: PlaceElem<'tcx>,
832         context: PlaceContext,
833         location: Location,
834     ) {
835         if let ProjectionElem::Field(f, ty) = elem {
836             let parent = Place { local, projection: self.tcx.intern_place_elems(proj_base) };
837             let parent_ty = parent.ty(&self.callee_body.local_decls, self.tcx);
838             let check_equal = |this: &mut Self, f_ty| {
839                 if !util::is_equal_up_to_subtyping(this.tcx, this.param_env, ty, f_ty) {
840                     trace!(?ty, ?f_ty);
841                     this.validation = Err("failed to normalize projection type");
842                     return;
843                 }
844             };
845
846             let kind = match parent_ty.ty.kind() {
847                 &ty::Alias(ty::Opaque, ty::AliasTy { def_id, substs, .. }) => {
848                     self.tcx.bound_type_of(def_id).subst(self.tcx, substs).kind()
849                 }
850                 kind => kind,
851             };
852
853             match kind {
854                 ty::Tuple(fields) => {
855                     let Some(f_ty) = fields.get(f.as_usize()) else {
856                         self.validation = Err("malformed MIR");
857                         return;
858                     };
859                     check_equal(self, *f_ty);
860                 }
861                 ty::Adt(adt_def, substs) => {
862                     let var = parent_ty.variant_index.unwrap_or(VariantIdx::from_u32(0));
863                     let Some(field) = adt_def.variant(var).fields.get(f.as_usize()) else {
864                         self.validation = Err("malformed MIR");
865                         return;
866                     };
867                     check_equal(self, field.ty(self.tcx, substs));
868                 }
869                 ty::Closure(_, substs) => {
870                     let substs = substs.as_closure();
871                     let Some(f_ty) = substs.upvar_tys().nth(f.as_usize()) else {
872                         self.validation = Err("malformed MIR");
873                         return;
874                     };
875                     check_equal(self, f_ty);
876                 }
877                 &ty::Generator(def_id, substs, _) => {
878                     let f_ty = if let Some(var) = parent_ty.variant_index {
879                         let gen_body = if def_id == self.callee_body.source.def_id() {
880                             self.callee_body
881                         } else {
882                             self.tcx.optimized_mir(def_id)
883                         };
884
885                         let Some(layout) = gen_body.generator_layout() else {
886                             self.validation = Err("malformed MIR");
887                             return;
888                         };
889
890                         let Some(&local) = layout.variant_fields[var].get(f) else {
891                             self.validation = Err("malformed MIR");
892                             return;
893                         };
894
895                         let Some(&f_ty) = layout.field_tys.get(local) else {
896                             self.validation = Err("malformed MIR");
897                             return;
898                         };
899
900                         f_ty
901                     } else {
902                         let Some(f_ty) = substs.as_generator().prefix_tys().nth(f.index()) else {
903                             self.validation = Err("malformed MIR");
904                             return;
905                         };
906
907                         f_ty
908                     };
909
910                     check_equal(self, f_ty);
911                 }
912                 _ => self.validation = Err("malformed MIR"),
913             }
914         }
915
916         self.super_projection_elem(local, proj_base, elem, context, location);
917     }
918 }
919
920 /**
921  * Integrator.
922  *
923  * Integrates blocks from the callee function into the calling function.
924  * Updates block indices, references to locals and other control flow
925  * stuff.
926 */
927 struct Integrator<'a, 'tcx> {
928     args: &'a [Local],
929     new_locals: RangeFrom<Local>,
930     new_scopes: RangeFrom<SourceScope>,
931     new_blocks: RangeFrom<BasicBlock>,
932     destination: Place<'tcx>,
933     callsite_scope: SourceScopeData<'tcx>,
934     callsite: &'a CallSite<'tcx>,
935     cleanup_block: Option<BasicBlock>,
936     in_cleanup_block: bool,
937     tcx: TyCtxt<'tcx>,
938     expn_data: LocalExpnId,
939     always_live_locals: BitSet<Local>,
940 }
941
942 impl Integrator<'_, '_> {
943     fn map_local(&self, local: Local) -> Local {
944         let new = if local == RETURN_PLACE {
945             self.destination.local
946         } else {
947             let idx = local.index() - 1;
948             if idx < self.args.len() {
949                 self.args[idx]
950             } else {
951                 Local::new(self.new_locals.start.index() + (idx - self.args.len()))
952             }
953         };
954         trace!("mapping local `{:?}` to `{:?}`", local, new);
955         new
956     }
957
958     fn map_scope(&self, scope: SourceScope) -> SourceScope {
959         let new = SourceScope::new(self.new_scopes.start.index() + scope.index());
960         trace!("mapping scope `{:?}` to `{:?}`", scope, new);
961         new
962     }
963
964     fn map_block(&self, block: BasicBlock) -> BasicBlock {
965         let new = BasicBlock::new(self.new_blocks.start.index() + block.index());
966         trace!("mapping block `{:?}` to `{:?}`", block, new);
967         new
968     }
969
970     fn map_unwind(&self, unwind: Option<BasicBlock>) -> Option<BasicBlock> {
971         if self.in_cleanup_block {
972             if unwind.is_some() {
973                 bug!("cleanup on cleanup block");
974             }
975             return unwind;
976         }
977
978         match unwind {
979             Some(target) => Some(self.map_block(target)),
980             // Add an unwind edge to the original call's cleanup block
981             None => self.cleanup_block,
982         }
983     }
984 }
985
986 impl<'tcx> MutVisitor<'tcx> for Integrator<'_, 'tcx> {
987     fn tcx(&self) -> TyCtxt<'tcx> {
988         self.tcx
989     }
990
991     fn visit_local(&mut self, local: &mut Local, _ctxt: PlaceContext, _location: Location) {
992         *local = self.map_local(*local);
993     }
994
995     fn visit_source_scope_data(&mut self, scope_data: &mut SourceScopeData<'tcx>) {
996         self.super_source_scope_data(scope_data);
997         if scope_data.parent_scope.is_none() {
998             // Attach the outermost callee scope as a child of the callsite
999             // scope, via the `parent_scope` and `inlined_parent_scope` chains.
1000             scope_data.parent_scope = Some(self.callsite.source_info.scope);
1001             assert_eq!(scope_data.inlined_parent_scope, None);
1002             scope_data.inlined_parent_scope = if self.callsite_scope.inlined.is_some() {
1003                 Some(self.callsite.source_info.scope)
1004             } else {
1005                 self.callsite_scope.inlined_parent_scope
1006             };
1007
1008             // Mark the outermost callee scope as an inlined one.
1009             assert_eq!(scope_data.inlined, None);
1010             scope_data.inlined = Some((self.callsite.callee, self.callsite.source_info.span));
1011         } else if scope_data.inlined_parent_scope.is_none() {
1012             // Make it easy to find the scope with `inlined` set above.
1013             scope_data.inlined_parent_scope = Some(self.map_scope(OUTERMOST_SOURCE_SCOPE));
1014         }
1015     }
1016
1017     fn visit_source_scope(&mut self, scope: &mut SourceScope) {
1018         *scope = self.map_scope(*scope);
1019     }
1020
1021     fn visit_span(&mut self, span: &mut Span) {
1022         // Make sure that all spans track the fact that they were inlined.
1023         *span = span.fresh_expansion(self.expn_data);
1024     }
1025
1026     fn visit_place(&mut self, place: &mut Place<'tcx>, context: PlaceContext, location: Location) {
1027         for elem in place.projection {
1028             // FIXME: Make sure that return place is not used in an indexing projection, since it
1029             // won't be rebased as it is supposed to be.
1030             assert_ne!(ProjectionElem::Index(RETURN_PLACE), elem);
1031         }
1032
1033         // If this is the `RETURN_PLACE`, we need to rebase any projections onto it.
1034         let dest_proj_len = self.destination.projection.len();
1035         if place.local == RETURN_PLACE && dest_proj_len > 0 {
1036             let mut projs = Vec::with_capacity(dest_proj_len + place.projection.len());
1037             projs.extend(self.destination.projection);
1038             projs.extend(place.projection);
1039
1040             place.projection = self.tcx.intern_place_elems(&*projs);
1041         }
1042         // Handles integrating any locals that occur in the base
1043         // or projections
1044         self.super_place(place, context, location)
1045     }
1046
1047     fn visit_basic_block_data(&mut self, block: BasicBlock, data: &mut BasicBlockData<'tcx>) {
1048         self.in_cleanup_block = data.is_cleanup;
1049         self.super_basic_block_data(block, data);
1050         self.in_cleanup_block = false;
1051     }
1052
1053     fn visit_retag(&mut self, kind: &mut RetagKind, place: &mut Place<'tcx>, loc: Location) {
1054         self.super_retag(kind, place, loc);
1055
1056         // We have to patch all inlined retags to be aware that they are no longer
1057         // happening on function entry.
1058         if *kind == RetagKind::FnEntry {
1059             *kind = RetagKind::Default;
1060         }
1061     }
1062
1063     fn visit_statement(&mut self, statement: &mut Statement<'tcx>, location: Location) {
1064         if let StatementKind::StorageLive(local) | StatementKind::StorageDead(local) =
1065             statement.kind
1066         {
1067             self.always_live_locals.remove(local);
1068         }
1069         self.super_statement(statement, location);
1070     }
1071
1072     fn visit_terminator(&mut self, terminator: &mut Terminator<'tcx>, loc: Location) {
1073         // Don't try to modify the implicit `_0` access on return (`return` terminators are
1074         // replaced down below anyways).
1075         if !matches!(terminator.kind, TerminatorKind::Return) {
1076             self.super_terminator(terminator, loc);
1077         }
1078
1079         match terminator.kind {
1080             TerminatorKind::GeneratorDrop | TerminatorKind::Yield { .. } => bug!(),
1081             TerminatorKind::Goto { ref mut target } => {
1082                 *target = self.map_block(*target);
1083             }
1084             TerminatorKind::SwitchInt { ref mut targets, .. } => {
1085                 for tgt in targets.all_targets_mut() {
1086                     *tgt = self.map_block(*tgt);
1087                 }
1088             }
1089             TerminatorKind::Drop { ref mut target, ref mut unwind, .. }
1090             | TerminatorKind::DropAndReplace { ref mut target, ref mut unwind, .. } => {
1091                 *target = self.map_block(*target);
1092                 *unwind = self.map_unwind(*unwind);
1093             }
1094             TerminatorKind::Call { ref mut target, ref mut cleanup, .. } => {
1095                 if let Some(ref mut tgt) = *target {
1096                     *tgt = self.map_block(*tgt);
1097                 }
1098                 *cleanup = self.map_unwind(*cleanup);
1099             }
1100             TerminatorKind::Assert { ref mut target, ref mut cleanup, .. } => {
1101                 *target = self.map_block(*target);
1102                 *cleanup = self.map_unwind(*cleanup);
1103             }
1104             TerminatorKind::Return => {
1105                 terminator.kind = if let Some(tgt) = self.callsite.target {
1106                     TerminatorKind::Goto { target: tgt }
1107                 } else {
1108                     TerminatorKind::Unreachable
1109                 }
1110             }
1111             TerminatorKind::Resume => {
1112                 if let Some(tgt) = self.cleanup_block {
1113                     terminator.kind = TerminatorKind::Goto { target: tgt }
1114                 }
1115             }
1116             TerminatorKind::Abort => {}
1117             TerminatorKind::Unreachable => {}
1118             TerminatorKind::FalseEdge { ref mut real_target, ref mut imaginary_target } => {
1119                 *real_target = self.map_block(*real_target);
1120                 *imaginary_target = self.map_block(*imaginary_target);
1121             }
1122             TerminatorKind::FalseUnwind { real_target: _, unwind: _ } =>
1123             // see the ordering of passes in the optimized_mir query.
1124             {
1125                 bug!("False unwinds should have been removed before inlining")
1126             }
1127             TerminatorKind::InlineAsm { ref mut destination, ref mut cleanup, .. } => {
1128                 if let Some(ref mut tgt) = *destination {
1129                     *tgt = self.map_block(*tgt);
1130                 }
1131                 *cleanup = self.map_unwind(*cleanup);
1132             }
1133         }
1134     }
1135 }