]> git.lizzy.rs Git - rust.git/blob - compiler/rustc_mir_transform/src/dest_prop.rs
Auto merge of #92740 - cuviper:update-rayons, r=Mark-Simulacrum
[rust.git] / compiler / rustc_mir_transform / src / dest_prop.rs
1 //! Propagates assignment destinations backwards in the CFG to eliminate redundant assignments.
2 //!
3 //! # Motivation
4 //!
5 //! MIR building can insert a lot of redundant copies, and Rust code in general often tends to move
6 //! values around a lot. The result is a lot of assignments of the form `dest = {move} src;` in MIR.
7 //! MIR building for constants in particular tends to create additional locals that are only used
8 //! inside a single block to shuffle a value around unnecessarily.
9 //!
10 //! LLVM by itself is not good enough at eliminating these redundant copies (eg. see
11 //! <https://github.com/rust-lang/rust/issues/32966>), so this leaves some performance on the table
12 //! that we can regain by implementing an optimization for removing these assign statements in rustc
13 //! itself. When this optimization runs fast enough, it can also speed up the constant evaluation
14 //! and code generation phases of rustc due to the reduced number of statements and locals.
15 //!
16 //! # The Optimization
17 //!
18 //! Conceptually, this optimization is "destination propagation". It is similar to the Named Return
19 //! Value Optimization, or NRVO, known from the C++ world, except that it isn't limited to return
20 //! values or the return place `_0`. On a very high level, independent of the actual implementation
21 //! details, it does the following:
22 //!
23 //! 1) Identify `dest = src;` statements that can be soundly eliminated.
24 //! 2) Replace all mentions of `src` with `dest` ("unifying" them and propagating the destination
25 //!    backwards).
26 //! 3) Delete the `dest = src;` statement (by making it a `nop`).
27 //!
28 //! Step 1) is by far the hardest, so it is explained in more detail below.
29 //!
30 //! ## Soundness
31 //!
32 //! Given an `Assign` statement `dest = src;`, where `dest` is a `Place` and `src` is an `Rvalue`,
33 //! there are a few requirements that must hold for the optimization to be sound:
34 //!
35 //! * `dest` must not contain any *indirection* through a pointer. It must access part of the base
36 //!   local. Otherwise it might point to arbitrary memory that is hard to track.
37 //!
38 //!   It must also not contain any indexing projections, since those take an arbitrary `Local` as
39 //!   the index, and that local might only be initialized shortly before `dest` is used.
40 //!
41 //!   Subtle case: If `dest` is a, or projects through a union, then we have to make sure that there
42 //!   remains an assignment to it, since that sets the "active field" of the union. But if `src` is
43 //!   a ZST, it might not be initialized, so there might not be any use of it before the assignment,
44 //!   and performing the optimization would simply delete the assignment, leaving `dest`
45 //!   uninitialized.
46 //!
47 //! * `src` must be a bare `Local` without any indirections or field projections (FIXME: Is this a
48 //!   fundamental restriction or just current impl state?). It can be copied or moved by the
49 //!   assignment.
50 //!
51 //! * The `dest` and `src` locals must never be [*live*][liveness] at the same time. If they are, it
52 //!   means that they both hold a (potentially different) value that is needed by a future use of
53 //!   the locals. Unifying them would overwrite one of the values.
54 //!
55 //!   Note that computing liveness of locals that have had their address taken is more difficult:
56 //!   Short of doing full escape analysis on the address/pointer/reference, the pass would need to
57 //!   assume that any operation that can potentially involve opaque user code (such as function
58 //!   calls, destructors, and inline assembly) may access any local that had its address taken
59 //!   before that point.
60 //!
61 //! Here, the first two conditions are simple structural requirements on the `Assign` statements
62 //! that can be trivially checked. The liveness requirement however is more difficult and costly to
63 //! check.
64 //!
65 //! ## Previous Work
66 //!
67 //! A [previous attempt] at implementing an optimization like this turned out to be a significant
68 //! regression in compiler performance. Fixing the regressions introduced a lot of undesirable
69 //! complexity to the implementation.
70 //!
71 //! A [subsequent approach] tried to avoid the costly computation by limiting itself to acyclic
72 //! CFGs, but still turned out to be far too costly to run due to suboptimal performance within
73 //! individual basic blocks, requiring a walk across the entire block for every assignment found
74 //! within the block. For the `tuple-stress` benchmark, which has 458745 statements in a single
75 //! block, this proved to be far too costly.
76 //!
77 //! Since the first attempt at this, the compiler has improved dramatically, and new analysis
78 //! frameworks have been added that should make this approach viable without requiring a limited
79 //! approach that only works for some classes of CFGs:
80 //! - rustc now has a powerful dataflow analysis framework that can handle forwards and backwards
81 //!   analyses efficiently.
82 //! - Layout optimizations for generators have been added to improve code generation for
83 //!   async/await, which are very similar in spirit to what this optimization does. Both walk the
84 //!   MIR and record conflicting uses of locals in a `BitMatrix`.
85 //!
86 //! Also, rustc now has a simple NRVO pass (see `nrvo.rs`), which handles a subset of the cases that
87 //! this destination propagation pass handles, proving that similar optimizations can be performed
88 //! on MIR.
89 //!
90 //! ## Pre/Post Optimization
91 //!
92 //! It is recommended to run `SimplifyCfg` and then `SimplifyLocals` some time after this pass, as
93 //! it replaces the eliminated assign statements with `nop`s and leaves unused locals behind.
94 //!
95 //! [liveness]: https://en.wikipedia.org/wiki/Live_variable_analysis
96 //! [previous attempt]: https://github.com/rust-lang/rust/pull/47954
97 //! [subsequent approach]: https://github.com/rust-lang/rust/pull/71003
98
99 use crate::MirPass;
100 use itertools::Itertools;
101 use rustc_data_structures::unify::{InPlaceUnificationTable, UnifyKey};
102 use rustc_index::{
103     bit_set::{BitMatrix, BitSet},
104     vec::IndexVec,
105 };
106 use rustc_middle::mir::tcx::PlaceTy;
107 use rustc_middle::mir::visit::{MutVisitor, PlaceContext, Visitor};
108 use rustc_middle::mir::{dump_mir, PassWhere};
109 use rustc_middle::mir::{
110     traversal, Body, InlineAsmOperand, Local, LocalKind, Location, Operand, Place, PlaceElem,
111     Rvalue, Statement, StatementKind, Terminator, TerminatorKind,
112 };
113 use rustc_middle::ty::TyCtxt;
114 use rustc_mir_dataflow::impls::{MaybeInitializedLocals, MaybeLiveLocals};
115 use rustc_mir_dataflow::Analysis;
116
117 // Empirical measurements have resulted in some observations:
118 // - Running on a body with a single block and 500 locals takes barely any time
119 // - Running on a body with ~400 blocks and ~300 relevant locals takes "too long"
120 // ...so we just limit both to somewhat reasonable-ish looking values.
121 const MAX_LOCALS: usize = 500;
122 const MAX_BLOCKS: usize = 250;
123
124 pub struct DestinationPropagation;
125
126 impl<'tcx> MirPass<'tcx> for DestinationPropagation {
127     fn is_enabled(&self, sess: &rustc_session::Session) -> bool {
128         //  FIXME(#79191, #82678): This is unsound.
129         //
130         // Only run at mir-opt-level=3 or higher for now (we don't fix up debuginfo and remove
131         // storage statements at the moment).
132         sess.opts.debugging_opts.unsound_mir_opts && sess.mir_opt_level() >= 3
133     }
134
135     fn run_pass(&self, tcx: TyCtxt<'tcx>, body: &mut Body<'tcx>) {
136         let def_id = body.source.def_id();
137
138         let candidates = find_candidates(tcx, body);
139         if candidates.is_empty() {
140             debug!("{:?}: no dest prop candidates, done", def_id);
141             return;
142         }
143
144         // Collect all locals we care about. We only compute conflicts for these to save time.
145         let mut relevant_locals = BitSet::new_empty(body.local_decls.len());
146         for CandidateAssignment { dest, src, loc: _ } in &candidates {
147             relevant_locals.insert(dest.local);
148             relevant_locals.insert(*src);
149         }
150
151         // This pass unfortunately has `O(l² * s)` performance, where `l` is the number of locals
152         // and `s` is the number of statements and terminators in the function.
153         // To prevent blowing up compile times too much, we bail out when there are too many locals.
154         let relevant = relevant_locals.count();
155         debug!(
156             "{:?}: {} locals ({} relevant), {} blocks",
157             def_id,
158             body.local_decls.len(),
159             relevant,
160             body.basic_blocks().len()
161         );
162         if relevant > MAX_LOCALS {
163             warn!(
164                 "too many candidate locals in {:?} ({}, max is {}), not optimizing",
165                 def_id, relevant, MAX_LOCALS
166             );
167             return;
168         }
169         if body.basic_blocks().len() > MAX_BLOCKS {
170             warn!(
171                 "too many blocks in {:?} ({}, max is {}), not optimizing",
172                 def_id,
173                 body.basic_blocks().len(),
174                 MAX_BLOCKS
175             );
176             return;
177         }
178
179         let mut conflicts = Conflicts::build(tcx, body, &relevant_locals);
180
181         let mut replacements = Replacements::new(body.local_decls.len());
182         for candidate @ CandidateAssignment { dest, src, loc } in candidates {
183             // Merge locals that don't conflict.
184             if !conflicts.can_unify(dest.local, src) {
185                 debug!("at assignment {:?}, conflict {:?} vs. {:?}", loc, dest.local, src);
186                 continue;
187             }
188
189             if replacements.for_src(candidate.src).is_some() {
190                 debug!("src {:?} already has replacement", candidate.src);
191                 continue;
192             }
193
194             if !tcx.consider_optimizing(|| {
195                 format!("DestinationPropagation {:?} {:?}", def_id, candidate)
196             }) {
197                 break;
198             }
199
200             replacements.push(candidate);
201             conflicts.unify(candidate.src, candidate.dest.local);
202         }
203
204         replacements.flatten(tcx);
205
206         debug!("replacements {:?}", replacements.map);
207
208         Replacer { tcx, replacements, place_elem_cache: Vec::new() }.visit_body(body);
209
210         // FIXME fix debug info
211     }
212 }
213
214 #[derive(Debug, Eq, PartialEq, Copy, Clone)]
215 struct UnifyLocal(Local);
216
217 impl From<Local> for UnifyLocal {
218     fn from(l: Local) -> Self {
219         Self(l)
220     }
221 }
222
223 impl UnifyKey for UnifyLocal {
224     type Value = ();
225     fn index(&self) -> u32 {
226         self.0.as_u32()
227     }
228     fn from_index(u: u32) -> Self {
229         Self(Local::from_u32(u))
230     }
231     fn tag() -> &'static str {
232         "UnifyLocal"
233     }
234 }
235
236 struct Replacements<'tcx> {
237     /// Maps locals to their replacement.
238     map: IndexVec<Local, Option<Place<'tcx>>>,
239
240     /// Whose locals' live ranges to kill.
241     kill: BitSet<Local>,
242 }
243
244 impl<'tcx> Replacements<'tcx> {
245     fn new(locals: usize) -> Self {
246         Self { map: IndexVec::from_elem_n(None, locals), kill: BitSet::new_empty(locals) }
247     }
248
249     fn push(&mut self, candidate: CandidateAssignment<'tcx>) {
250         trace!("Replacements::push({:?})", candidate);
251         let entry = &mut self.map[candidate.src];
252         assert!(entry.is_none());
253
254         *entry = Some(candidate.dest);
255         self.kill.insert(candidate.src);
256         self.kill.insert(candidate.dest.local);
257     }
258
259     /// Applies the stored replacements to all replacements, until no replacements would result in
260     /// locals that need further replacements when applied.
261     fn flatten(&mut self, tcx: TyCtxt<'tcx>) {
262         // Note: This assumes that there are no cycles in the replacements, which is enforced via
263         // `self.unified_locals`. Otherwise this can cause an infinite loop.
264
265         for local in self.map.indices() {
266             if let Some(replacement) = self.map[local] {
267                 // Substitute the base local of `replacement` until fixpoint.
268                 let mut base = replacement.local;
269                 let mut reversed_projection_slices = Vec::with_capacity(1);
270                 while let Some(replacement_for_replacement) = self.map[base] {
271                     base = replacement_for_replacement.local;
272                     reversed_projection_slices.push(replacement_for_replacement.projection);
273                 }
274
275                 let projection: Vec<_> = reversed_projection_slices
276                     .iter()
277                     .rev()
278                     .flat_map(|projs| projs.iter())
279                     .chain(replacement.projection.iter())
280                     .collect();
281                 let projection = tcx.intern_place_elems(&projection);
282
283                 // Replace with the final `Place`.
284                 self.map[local] = Some(Place { local: base, projection });
285             }
286         }
287     }
288
289     fn for_src(&self, src: Local) -> Option<Place<'tcx>> {
290         self.map[src]
291     }
292 }
293
294 struct Replacer<'tcx> {
295     tcx: TyCtxt<'tcx>,
296     replacements: Replacements<'tcx>,
297     place_elem_cache: Vec<PlaceElem<'tcx>>,
298 }
299
300 impl<'tcx> MutVisitor<'tcx> for Replacer<'tcx> {
301     fn tcx(&self) -> TyCtxt<'tcx> {
302         self.tcx
303     }
304
305     fn visit_local(&mut self, local: &mut Local, context: PlaceContext, location: Location) {
306         if context.is_use() && self.replacements.for_src(*local).is_some() {
307             bug!(
308                 "use of local {:?} should have been replaced by visit_place; context={:?}, loc={:?}",
309                 local,
310                 context,
311                 location,
312             );
313         }
314     }
315
316     fn visit_place(&mut self, place: &mut Place<'tcx>, context: PlaceContext, location: Location) {
317         if let Some(replacement) = self.replacements.for_src(place.local) {
318             // Rebase `place`s projections onto `replacement`'s.
319             self.place_elem_cache.clear();
320             self.place_elem_cache.extend(replacement.projection.iter().chain(place.projection));
321             let projection = self.tcx.intern_place_elems(&self.place_elem_cache);
322             let new_place = Place { local: replacement.local, projection };
323
324             debug!("Replacer: {:?} -> {:?}", place, new_place);
325             *place = new_place;
326         }
327
328         self.super_place(place, context, location);
329     }
330
331     fn visit_statement(&mut self, statement: &mut Statement<'tcx>, location: Location) {
332         self.super_statement(statement, location);
333
334         match &statement.kind {
335             // FIXME: Don't delete storage statements, merge the live ranges instead
336             StatementKind::StorageDead(local) | StatementKind::StorageLive(local)
337                 if self.replacements.kill.contains(*local) =>
338             {
339                 statement.make_nop()
340             }
341
342             StatementKind::Assign(box (dest, rvalue)) => {
343                 match rvalue {
344                     Rvalue::Use(Operand::Copy(place) | Operand::Move(place)) => {
345                         // These might've been turned into self-assignments by the replacement
346                         // (this includes the original statement we wanted to eliminate).
347                         if dest == place {
348                             debug!("{:?} turned into self-assignment, deleting", location);
349                             statement.make_nop();
350                         }
351                     }
352                     _ => {}
353                 }
354             }
355
356             _ => {}
357         }
358     }
359 }
360
361 struct Conflicts<'a> {
362     relevant_locals: &'a BitSet<Local>,
363
364     /// The conflict matrix. It is always symmetric and the adjacency matrix of the corresponding
365     /// conflict graph.
366     matrix: BitMatrix<Local, Local>,
367
368     /// Preallocated `BitSet` used by `unify`.
369     unify_cache: BitSet<Local>,
370
371     /// Tracks locals that have been merged together to prevent cycles and propagate conflicts.
372     unified_locals: InPlaceUnificationTable<UnifyLocal>,
373 }
374
375 impl<'a> Conflicts<'a> {
376     fn build<'tcx>(
377         tcx: TyCtxt<'tcx>,
378         body: &'_ Body<'tcx>,
379         relevant_locals: &'a BitSet<Local>,
380     ) -> Self {
381         // We don't have to look out for locals that have their address taken, since
382         // `find_candidates` already takes care of that.
383
384         let conflicts = BitMatrix::from_row_n(
385             &BitSet::new_empty(body.local_decls.len()),
386             body.local_decls.len(),
387         );
388
389         let mut init = MaybeInitializedLocals
390             .into_engine(tcx, body)
391             .iterate_to_fixpoint()
392             .into_results_cursor(body);
393         let mut live =
394             MaybeLiveLocals.into_engine(tcx, body).iterate_to_fixpoint().into_results_cursor(body);
395
396         let mut reachable = None;
397         dump_mir(tcx, None, "DestinationPropagation-dataflow", &"", body, |pass_where, w| {
398             let reachable = reachable.get_or_insert_with(|| traversal::reachable_as_bitset(body));
399
400             match pass_where {
401                 PassWhere::BeforeLocation(loc) if reachable.contains(loc.block) => {
402                     init.seek_before_primary_effect(loc);
403                     live.seek_after_primary_effect(loc);
404
405                     writeln!(w, "        // init: {:?}", init.get())?;
406                     writeln!(w, "        // live: {:?}", live.get())?;
407                 }
408                 PassWhere::AfterTerminator(bb) if reachable.contains(bb) => {
409                     let loc = body.terminator_loc(bb);
410                     init.seek_after_primary_effect(loc);
411                     live.seek_before_primary_effect(loc);
412
413                     writeln!(w, "        // init: {:?}", init.get())?;
414                     writeln!(w, "        // live: {:?}", live.get())?;
415                 }
416
417                 PassWhere::BeforeBlock(bb) if reachable.contains(bb) => {
418                     init.seek_to_block_start(bb);
419                     live.seek_to_block_start(bb);
420
421                     writeln!(w, "    // init: {:?}", init.get())?;
422                     writeln!(w, "    // live: {:?}", live.get())?;
423                 }
424
425                 PassWhere::BeforeCFG | PassWhere::AfterCFG | PassWhere::AfterLocation(_) => {}
426
427                 PassWhere::BeforeLocation(_) | PassWhere::AfterTerminator(_) => {
428                     writeln!(w, "        // init: <unreachable>")?;
429                     writeln!(w, "        // live: <unreachable>")?;
430                 }
431
432                 PassWhere::BeforeBlock(_) => {
433                     writeln!(w, "    // init: <unreachable>")?;
434                     writeln!(w, "    // live: <unreachable>")?;
435                 }
436             }
437
438             Ok(())
439         });
440
441         let mut this = Self {
442             relevant_locals,
443             matrix: conflicts,
444             unify_cache: BitSet::new_empty(body.local_decls.len()),
445             unified_locals: {
446                 let mut table = InPlaceUnificationTable::new();
447                 // Pre-fill table with all locals (this creates N nodes / "connected" components,
448                 // "graph"-ically speaking).
449                 for local in 0..body.local_decls.len() {
450                     assert_eq!(table.new_key(()), UnifyLocal(Local::from_usize(local)));
451                 }
452                 table
453             },
454         };
455
456         let mut live_and_init_locals = Vec::new();
457
458         // Visit only reachable basic blocks. The exact order is not important.
459         for (block, data) in traversal::preorder(body) {
460             // We need to observe the dataflow state *before* all possible locations (statement or
461             // terminator) in each basic block, and then observe the state *after* the terminator
462             // effect is applied. As long as neither `init` nor `borrowed` has a "before" effect,
463             // we will observe all possible dataflow states.
464
465             // Since liveness is a backwards analysis, we need to walk the results backwards. To do
466             // that, we first collect in the `MaybeInitializedLocals` results in a forwards
467             // traversal.
468
469             live_and_init_locals.resize_with(data.statements.len() + 1, || {
470                 BitSet::new_empty(body.local_decls.len())
471             });
472
473             // First, go forwards for `MaybeInitializedLocals` and apply intra-statement/terminator
474             // conflicts.
475             for (i, statement) in data.statements.iter().enumerate() {
476                 this.record_statement_conflicts(statement);
477
478                 let loc = Location { block, statement_index: i };
479                 init.seek_before_primary_effect(loc);
480
481                 live_and_init_locals[i].clone_from(init.get());
482             }
483
484             this.record_terminator_conflicts(data.terminator());
485             let term_loc = Location { block, statement_index: data.statements.len() };
486             init.seek_before_primary_effect(term_loc);
487             live_and_init_locals[term_loc.statement_index].clone_from(init.get());
488
489             // Now, go backwards and union with the liveness results.
490             for statement_index in (0..=data.statements.len()).rev() {
491                 let loc = Location { block, statement_index };
492                 live.seek_after_primary_effect(loc);
493
494                 live_and_init_locals[statement_index].intersect(live.get());
495
496                 trace!("record conflicts at {:?}", loc);
497
498                 this.record_dataflow_conflicts(&mut live_and_init_locals[statement_index]);
499             }
500
501             init.seek_to_block_end(block);
502             live.seek_to_block_end(block);
503             let mut conflicts = init.get().clone();
504             conflicts.intersect(live.get());
505             trace!("record conflicts at end of {:?}", block);
506
507             this.record_dataflow_conflicts(&mut conflicts);
508         }
509
510         this
511     }
512
513     fn record_dataflow_conflicts(&mut self, new_conflicts: &mut BitSet<Local>) {
514         // Remove all locals that are not candidates.
515         new_conflicts.intersect(self.relevant_locals);
516
517         for local in new_conflicts.iter() {
518             self.matrix.union_row_with(&new_conflicts, local);
519         }
520     }
521
522     fn record_local_conflict(&mut self, a: Local, b: Local, why: &str) {
523         trace!("conflict {:?} <-> {:?} due to {}", a, b, why);
524         self.matrix.insert(a, b);
525         self.matrix.insert(b, a);
526     }
527
528     /// Records locals that must not overlap during the evaluation of `stmt`. These locals conflict
529     /// and must not be merged.
530     fn record_statement_conflicts(&mut self, stmt: &Statement<'_>) {
531         match &stmt.kind {
532             // While the left and right sides of an assignment must not overlap, we do not mark
533             // conflicts here as that would make this optimization useless. When we optimize, we
534             // eliminate the resulting self-assignments automatically.
535             StatementKind::Assign(_) => {}
536
537             StatementKind::LlvmInlineAsm(asm) => {
538                 // Inputs and outputs must not overlap.
539                 for (_, input) in &*asm.inputs {
540                     if let Some(in_place) = input.place() {
541                         if !in_place.is_indirect() {
542                             for out_place in &*asm.outputs {
543                                 if !out_place.is_indirect() && !in_place.is_indirect() {
544                                     self.record_local_conflict(
545                                         in_place.local,
546                                         out_place.local,
547                                         "aliasing llvm_asm! operands",
548                                     );
549                                 }
550                             }
551                         }
552                     }
553                 }
554             }
555
556             StatementKind::SetDiscriminant { .. }
557             | StatementKind::StorageLive(..)
558             | StatementKind::StorageDead(..)
559             | StatementKind::Retag(..)
560             | StatementKind::FakeRead(..)
561             | StatementKind::AscribeUserType(..)
562             | StatementKind::Coverage(..)
563             | StatementKind::CopyNonOverlapping(..)
564             | StatementKind::Nop => {}
565         }
566     }
567
568     fn record_terminator_conflicts(&mut self, term: &Terminator<'_>) {
569         match &term.kind {
570             TerminatorKind::DropAndReplace {
571                 place: dropped_place,
572                 value,
573                 target: _,
574                 unwind: _,
575             } => {
576                 if let Some(place) = value.place() {
577                     if !place.is_indirect() && !dropped_place.is_indirect() {
578                         self.record_local_conflict(
579                             place.local,
580                             dropped_place.local,
581                             "DropAndReplace operand overlap",
582                         );
583                     }
584                 }
585             }
586             TerminatorKind::Yield { value, resume: _, resume_arg, drop: _ } => {
587                 if let Some(place) = value.place() {
588                     if !place.is_indirect() && !resume_arg.is_indirect() {
589                         self.record_local_conflict(
590                             place.local,
591                             resume_arg.local,
592                             "Yield operand overlap",
593                         );
594                     }
595                 }
596             }
597             TerminatorKind::Call {
598                 func,
599                 args,
600                 destination: Some((dest_place, _)),
601                 cleanup: _,
602                 from_hir_call: _,
603                 fn_span: _,
604             } => {
605                 // No arguments may overlap with the destination.
606                 for arg in args.iter().chain(Some(func)) {
607                     if let Some(place) = arg.place() {
608                         if !place.is_indirect() && !dest_place.is_indirect() {
609                             self.record_local_conflict(
610                                 dest_place.local,
611                                 place.local,
612                                 "call dest/arg overlap",
613                             );
614                         }
615                     }
616                 }
617             }
618             TerminatorKind::InlineAsm {
619                 template: _,
620                 operands,
621                 options: _,
622                 line_spans: _,
623                 destination: _,
624                 cleanup: _,
625             } => {
626                 // The intended semantics here aren't documented, we just assume that nothing that
627                 // could be written to by the assembly may overlap with any other operands.
628                 for op in operands {
629                     match op {
630                         InlineAsmOperand::Out { reg: _, late: _, place: Some(dest_place) }
631                         | InlineAsmOperand::InOut {
632                             reg: _,
633                             late: _,
634                             in_value: _,
635                             out_place: Some(dest_place),
636                         } => {
637                             // For output place `place`, add all places accessed by the inline asm.
638                             for op in operands {
639                                 match op {
640                                     InlineAsmOperand::In { reg: _, value } => {
641                                         if let Some(p) = value.place() {
642                                             if !p.is_indirect() && !dest_place.is_indirect() {
643                                                 self.record_local_conflict(
644                                                     p.local,
645                                                     dest_place.local,
646                                                     "asm! operand overlap",
647                                                 );
648                                             }
649                                         }
650                                     }
651                                     InlineAsmOperand::Out {
652                                         reg: _,
653                                         late: _,
654                                         place: Some(place),
655                                     } => {
656                                         if !place.is_indirect() && !dest_place.is_indirect() {
657                                             self.record_local_conflict(
658                                                 place.local,
659                                                 dest_place.local,
660                                                 "asm! operand overlap",
661                                             );
662                                         }
663                                     }
664                                     InlineAsmOperand::InOut {
665                                         reg: _,
666                                         late: _,
667                                         in_value,
668                                         out_place,
669                                     } => {
670                                         if let Some(place) = in_value.place() {
671                                             if !place.is_indirect() && !dest_place.is_indirect() {
672                                                 self.record_local_conflict(
673                                                     place.local,
674                                                     dest_place.local,
675                                                     "asm! operand overlap",
676                                                 );
677                                             }
678                                         }
679
680                                         if let Some(place) = out_place {
681                                             if !place.is_indirect() && !dest_place.is_indirect() {
682                                                 self.record_local_conflict(
683                                                     place.local,
684                                                     dest_place.local,
685                                                     "asm! operand overlap",
686                                                 );
687                                             }
688                                         }
689                                     }
690                                     InlineAsmOperand::Out { reg: _, late: _, place: None }
691                                     | InlineAsmOperand::Const { value: _ }
692                                     | InlineAsmOperand::SymFn { value: _ }
693                                     | InlineAsmOperand::SymStatic { def_id: _ } => {}
694                                 }
695                             }
696                         }
697                         InlineAsmOperand::InOut {
698                             reg: _,
699                             late: _,
700                             in_value: _,
701                             out_place: None,
702                         }
703                         | InlineAsmOperand::In { reg: _, value: _ }
704                         | InlineAsmOperand::Out { reg: _, late: _, place: None }
705                         | InlineAsmOperand::Const { value: _ }
706                         | InlineAsmOperand::SymFn { value: _ }
707                         | InlineAsmOperand::SymStatic { def_id: _ } => {}
708                     }
709                 }
710             }
711
712             TerminatorKind::Goto { .. }
713             | TerminatorKind::Call { destination: None, .. }
714             | TerminatorKind::SwitchInt { .. }
715             | TerminatorKind::Resume
716             | TerminatorKind::Abort
717             | TerminatorKind::Return
718             | TerminatorKind::Unreachable
719             | TerminatorKind::Drop { .. }
720             | TerminatorKind::Assert { .. }
721             | TerminatorKind::GeneratorDrop
722             | TerminatorKind::FalseEdge { .. }
723             | TerminatorKind::FalseUnwind { .. } => {}
724         }
725     }
726
727     /// Checks whether `a` and `b` may be merged. Returns `false` if there's a conflict.
728     fn can_unify(&mut self, a: Local, b: Local) -> bool {
729         // After some locals have been unified, their conflicts are only tracked in the root key,
730         // so look that up.
731         let a = self.unified_locals.find(a).0;
732         let b = self.unified_locals.find(b).0;
733
734         if a == b {
735             // Already merged (part of the same connected component).
736             return false;
737         }
738
739         if self.matrix.contains(a, b) {
740             // Conflict (derived via dataflow, intra-statement conflicts, or inherited from another
741             // local during unification).
742             return false;
743         }
744
745         true
746     }
747
748     /// Merges the conflicts of `a` and `b`, so that each one inherits all conflicts of the other.
749     ///
750     /// `can_unify` must have returned `true` for the same locals, or this may panic or lead to
751     /// miscompiles.
752     ///
753     /// This is called when the pass makes the decision to unify `a` and `b` (or parts of `a` and
754     /// `b`) and is needed to ensure that future unification decisions take potentially newly
755     /// introduced conflicts into account.
756     ///
757     /// For an example, assume we have locals `_0`, `_1`, `_2`, and `_3`. There are these conflicts:
758     ///
759     /// * `_0` <-> `_1`
760     /// * `_1` <-> `_2`
761     /// * `_3` <-> `_0`
762     ///
763     /// We then decide to merge `_2` with `_3` since they don't conflict. Then we decide to merge
764     /// `_2` with `_0`, which also doesn't have a conflict in the above list. However `_2` is now
765     /// `_3`, which does conflict with `_0`.
766     fn unify(&mut self, a: Local, b: Local) {
767         trace!("unify({:?}, {:?})", a, b);
768
769         // Get the root local of the connected components. The root local stores the conflicts of
770         // all locals in the connected component (and *is stored* as the conflicting local of other
771         // locals).
772         let a = self.unified_locals.find(a).0;
773         let b = self.unified_locals.find(b).0;
774         assert_ne!(a, b);
775
776         trace!("roots: a={:?}, b={:?}", a, b);
777         trace!("{:?} conflicts: {:?}", a, self.matrix.iter(a).format(", "));
778         trace!("{:?} conflicts: {:?}", b, self.matrix.iter(b).format(", "));
779
780         self.unified_locals.union(a, b);
781
782         let root = self.unified_locals.find(a).0;
783         assert!(root == a || root == b);
784
785         // Make all locals that conflict with `a` also conflict with `b`, and vice versa.
786         self.unify_cache.clear();
787         for conflicts_with_a in self.matrix.iter(a) {
788             self.unify_cache.insert(conflicts_with_a);
789         }
790         for conflicts_with_b in self.matrix.iter(b) {
791             self.unify_cache.insert(conflicts_with_b);
792         }
793         for conflicts_with_a_or_b in self.unify_cache.iter() {
794             // Set both `a` and `b` for this local's row.
795             self.matrix.insert(conflicts_with_a_or_b, a);
796             self.matrix.insert(conflicts_with_a_or_b, b);
797         }
798
799         // Write the locals `a` conflicts with to `b`'s row.
800         self.matrix.union_rows(a, b);
801         // Write the locals `b` conflicts with to `a`'s row.
802         self.matrix.union_rows(b, a);
803     }
804 }
805
806 /// A `dest = {move} src;` statement at `loc`.
807 ///
808 /// We want to consider merging `dest` and `src` due to this assignment.
809 #[derive(Debug, Copy, Clone)]
810 struct CandidateAssignment<'tcx> {
811     /// Does not contain indirection or indexing (so the only local it contains is the place base).
812     dest: Place<'tcx>,
813     src: Local,
814     loc: Location,
815 }
816
817 /// Scans the MIR for assignments between locals that we might want to consider merging.
818 ///
819 /// This will filter out assignments that do not match the right form (as described in the top-level
820 /// comment) and also throw out assignments that involve a local that has its address taken or is
821 /// otherwise ineligible (eg. locals used as array indices are ignored because we cannot propagate
822 /// arbitrary places into array indices).
823 fn find_candidates<'tcx>(tcx: TyCtxt<'tcx>, body: &Body<'tcx>) -> Vec<CandidateAssignment<'tcx>> {
824     let mut visitor = FindAssignments {
825         tcx,
826         body,
827         candidates: Vec::new(),
828         ever_borrowed_locals: ever_borrowed_locals(body),
829         locals_used_as_array_index: locals_used_as_array_index(body),
830     };
831     visitor.visit_body(body);
832     visitor.candidates
833 }
834
835 struct FindAssignments<'a, 'tcx> {
836     tcx: TyCtxt<'tcx>,
837     body: &'a Body<'tcx>,
838     candidates: Vec<CandidateAssignment<'tcx>>,
839     ever_borrowed_locals: BitSet<Local>,
840     locals_used_as_array_index: BitSet<Local>,
841 }
842
843 impl<'tcx> Visitor<'tcx> for FindAssignments<'_, 'tcx> {
844     fn visit_statement(&mut self, statement: &Statement<'tcx>, location: Location) {
845         if let StatementKind::Assign(box (
846             dest,
847             Rvalue::Use(Operand::Copy(src) | Operand::Move(src)),
848         )) = &statement.kind
849         {
850             // `dest` must not have pointer indirection.
851             if dest.is_indirect() {
852                 return;
853             }
854
855             // `src` must be a plain local.
856             if !src.projection.is_empty() {
857                 return;
858             }
859
860             // Since we want to replace `src` with `dest`, `src` must not be required.
861             if is_local_required(src.local, self.body) {
862                 return;
863             }
864
865             // Can't optimize if both locals ever have their address taken (can introduce
866             // aliasing).
867             // FIXME: This can be smarter and take `StorageDead` into account (which
868             // invalidates borrows).
869             if self.ever_borrowed_locals.contains(dest.local)
870                 || self.ever_borrowed_locals.contains(src.local)
871             {
872                 return;
873             }
874
875             assert_ne!(dest.local, src.local, "self-assignments are UB");
876
877             // We can't replace locals occurring in `PlaceElem::Index` for now.
878             if self.locals_used_as_array_index.contains(src.local) {
879                 return;
880             }
881
882             // Handle the "subtle case" described above by rejecting any `dest` that is or
883             // projects through a union.
884             let mut place_ty = PlaceTy::from_ty(self.body.local_decls[dest.local].ty);
885             if place_ty.ty.is_union() {
886                 return;
887             }
888             for elem in dest.projection {
889                 if let PlaceElem::Index(_) = elem {
890                     // `dest` contains an indexing projection.
891                     return;
892                 }
893
894                 place_ty = place_ty.projection_ty(self.tcx, elem);
895                 if place_ty.ty.is_union() {
896                     return;
897                 }
898             }
899
900             self.candidates.push(CandidateAssignment {
901                 dest: *dest,
902                 src: src.local,
903                 loc: location,
904             });
905         }
906     }
907 }
908
909 /// Some locals are part of the function's interface and can not be removed.
910 ///
911 /// Note that these locals *can* still be merged with non-required locals by removing that other
912 /// local.
913 fn is_local_required(local: Local, body: &Body<'_>) -> bool {
914     match body.local_kind(local) {
915         LocalKind::Arg | LocalKind::ReturnPointer => true,
916         LocalKind::Var | LocalKind::Temp => false,
917     }
918 }
919
920 /// Walks MIR to find all locals that have their address taken anywhere.
921 fn ever_borrowed_locals(body: &Body<'_>) -> BitSet<Local> {
922     let mut visitor = BorrowCollector { locals: BitSet::new_empty(body.local_decls.len()) };
923     visitor.visit_body(body);
924     visitor.locals
925 }
926
927 struct BorrowCollector {
928     locals: BitSet<Local>,
929 }
930
931 impl<'tcx> Visitor<'tcx> for BorrowCollector {
932     fn visit_rvalue(&mut self, rvalue: &Rvalue<'tcx>, location: Location) {
933         self.super_rvalue(rvalue, location);
934
935         match rvalue {
936             Rvalue::AddressOf(_, borrowed_place) | Rvalue::Ref(_, _, borrowed_place) => {
937                 if !borrowed_place.is_indirect() {
938                     self.locals.insert(borrowed_place.local);
939                 }
940             }
941
942             Rvalue::Cast(..)
943             | Rvalue::ShallowInitBox(..)
944             | Rvalue::Use(..)
945             | Rvalue::Repeat(..)
946             | Rvalue::Len(..)
947             | Rvalue::BinaryOp(..)
948             | Rvalue::CheckedBinaryOp(..)
949             | Rvalue::NullaryOp(..)
950             | Rvalue::UnaryOp(..)
951             | Rvalue::Discriminant(..)
952             | Rvalue::Aggregate(..)
953             | Rvalue::ThreadLocalRef(..) => {}
954         }
955     }
956
957     fn visit_terminator(&mut self, terminator: &Terminator<'tcx>, location: Location) {
958         self.super_terminator(terminator, location);
959
960         match terminator.kind {
961             TerminatorKind::Drop { place: dropped_place, .. }
962             | TerminatorKind::DropAndReplace { place: dropped_place, .. } => {
963                 self.locals.insert(dropped_place.local);
964             }
965
966             TerminatorKind::Abort
967             | TerminatorKind::Assert { .. }
968             | TerminatorKind::Call { .. }
969             | TerminatorKind::FalseEdge { .. }
970             | TerminatorKind::FalseUnwind { .. }
971             | TerminatorKind::GeneratorDrop
972             | TerminatorKind::Goto { .. }
973             | TerminatorKind::Resume
974             | TerminatorKind::Return
975             | TerminatorKind::SwitchInt { .. }
976             | TerminatorKind::Unreachable
977             | TerminatorKind::Yield { .. }
978             | TerminatorKind::InlineAsm { .. } => {}
979         }
980     }
981 }
982
983 /// `PlaceElem::Index` only stores a `Local`, so we can't replace that with a full `Place`.
984 ///
985 /// Collect locals used as indices so we don't generate candidates that are impossible to apply
986 /// later.
987 fn locals_used_as_array_index(body: &Body<'_>) -> BitSet<Local> {
988     let mut visitor = IndexCollector { locals: BitSet::new_empty(body.local_decls.len()) };
989     visitor.visit_body(body);
990     visitor.locals
991 }
992
993 struct IndexCollector {
994     locals: BitSet<Local>,
995 }
996
997 impl<'tcx> Visitor<'tcx> for IndexCollector {
998     fn visit_projection_elem(
999         &mut self,
1000         local: Local,
1001         proj_base: &[PlaceElem<'tcx>],
1002         elem: PlaceElem<'tcx>,
1003         context: PlaceContext,
1004         location: Location,
1005     ) {
1006         if let PlaceElem::Index(i) = elem {
1007             self.locals.insert(i);
1008         }
1009         self.super_projection_elem(local, proj_base, elem, context, location);
1010     }
1011 }