]> git.lizzy.rs Git - rust.git/blob - src/optimize/stack2reg.rs
aceced41f2703e15c626e15eccd8196c5d0a58c4
[rust.git] / src / optimize / stack2reg.rs
1 //! This optimization replaces stack accesses with SSA variables and removes dead stores when possible.
2 //!
3 //! # Undefined behaviour
4 //!
5 //! This optimization is based on the assumption that stack slots which don't have their address
6 //! leaked through `stack_addr` are only accessed using `stack_load` and `stack_store` in the
7 //! function which has the stack slots. This optimization also assumes that stack slot accesses
8 //! are never out of bounds. If these assumptions are not correct, then this optimization may remove
9 //! `stack_store` instruction incorrectly, or incorrectly use a previously stored value as the value
10 //! being loaded by a `stack_load`.
11
12 use std::collections::BTreeMap;
13 use std::fmt;
14 use std::ops::Not;
15
16 use rustc_data_structures::fx::{FxHashSet, FxHasher};
17
18 use cranelift_codegen::cursor::{Cursor, FuncCursor};
19 use cranelift_codegen::ir::{InstructionData, Opcode, ValueDef};
20 use cranelift_codegen::ir::immediates::Offset32;
21
22 use hashbrown::HashSet;
23 use std::hash::BuildHasherDefault;
24
25 use crate::prelude::*;
26
27 /// Workaround for `StackSlot` not implementing `Ord`.
28 #[derive(Copy, Clone, PartialEq, Eq)]
29 struct OrdStackSlot(StackSlot);
30
31 impl fmt::Debug for OrdStackSlot {
32     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
33         write!(f, "{:?}", self.0)
34     }
35 }
36
37 impl PartialOrd for OrdStackSlot {
38     fn partial_cmp(&self, rhs: &Self) -> Option<std::cmp::Ordering> {
39         self.0.as_u32().partial_cmp(&rhs.0.as_u32())
40     }
41 }
42
43 impl Ord for OrdStackSlot {
44     fn cmp(&self, rhs: &Self) -> std::cmp::Ordering {
45         self.0.as_u32().cmp(&rhs.0.as_u32())
46     }
47 }
48
49 #[derive(Debug, Default)]
50 struct StackSlotUsage {
51     stack_addr: HashSet<Inst, BuildHasherDefault<FxHasher>>,
52     stack_load: HashSet<Inst, BuildHasherDefault<FxHasher>>,
53     stack_store: HashSet<Inst, BuildHasherDefault<FxHasher>>,
54 }
55
56 impl StackSlotUsage {
57     fn potential_stores_for_load(&self, ctx: &Context, load: Inst) -> Vec<Inst> {
58         self.stack_store.iter().cloned().filter(|&store| {
59             match spatial_overlap(&ctx.func, store, load) {
60                 SpatialOverlap::No => false, // Can never be the source of the loaded value.
61                 SpatialOverlap::Partial | SpatialOverlap::Full => true,
62             }
63         }).filter(|&store| {
64             match temporal_order(ctx, store, load) {
65                 TemporalOrder::NeverBefore => false, // Can never be the source of the loaded value.
66                 TemporalOrder::MaybeBefore | TemporalOrder::DefinitivelyBefore => true,
67             }
68         }).collect::<Vec<Inst>>()
69     }
70
71     fn potential_loads_of_store(&self, ctx: &Context, store: Inst) -> Vec<Inst> {
72         self.stack_load.iter().cloned().filter(|&load| {
73             match spatial_overlap(&ctx.func, store, load) {
74                 SpatialOverlap::No => false, // Can never be the source of the loaded value.
75                 SpatialOverlap::Partial | SpatialOverlap::Full => true,
76             }
77         }).filter(|&load| {
78             match temporal_order(ctx, store, load) {
79                 TemporalOrder::NeverBefore => false, // Can never be the source of the loaded value.
80                 TemporalOrder::MaybeBefore | TemporalOrder::DefinitivelyBefore => true,
81             }
82         }).collect::<Vec<Inst>>()
83     }
84
85     fn remove_unused_stack_addr(func: &mut Function, inst: Inst) {
86         func.dfg.detach_results(inst);
87         func.dfg.replace(inst).nop();
88     }
89
90     fn remove_unused_load(func: &mut Function, load: Inst) {
91         func.dfg.detach_results(load);
92         func.dfg.replace(load).nop();
93     }
94
95     fn remove_dead_store(&mut self, func: &mut Function, store: Inst) {
96         func.dfg.replace(store).nop();
97         self.stack_store.remove(&store);
98     }
99
100     fn change_load_to_alias(&mut self, func: &mut Function, load: Inst, value: Value) {
101         let loaded_value = func.dfg.inst_results(load)[0];
102         let loaded_type = func.dfg.value_type(loaded_value);
103
104         if func.dfg.value_type(value) == loaded_type {
105             func.dfg.detach_results(load);
106             func.dfg.replace(load).nop();
107             func.dfg.change_to_alias(loaded_value, value);
108         } else {
109             func.dfg.replace(load).bitcast(loaded_type, value);
110         }
111
112         self.stack_load.remove(&load);
113     }
114 }
115
116 struct OptimizeContext<'a> {
117     ctx: &'a mut Context,
118     stack_slot_usage_map: BTreeMap<OrdStackSlot, StackSlotUsage>,
119 }
120
121 impl<'a> OptimizeContext<'a> {
122     fn for_context(ctx: &'a mut Context) -> Self {
123         ctx.flowgraph(); // Compute cfg and domtree.
124
125         // Record all stack_addr, stack_load and stack_store instructions.
126         let mut stack_slot_usage_map = BTreeMap::<OrdStackSlot, StackSlotUsage>::new();
127
128         let mut cursor = FuncCursor::new(&mut ctx.func);
129         while let Some(_block) = cursor.next_block() {
130             while let Some(inst) = cursor.next_inst() {
131                 match cursor.func.dfg[inst] {
132                     InstructionData::StackLoad {
133                         opcode: Opcode::StackAddr,
134                         stack_slot,
135                         offset: _,
136                     } => {
137                         stack_slot_usage_map.entry(OrdStackSlot(stack_slot)).or_insert_with(StackSlotUsage::default).stack_addr.insert(inst);
138                     }
139                     InstructionData::StackLoad {
140                         opcode: Opcode::StackLoad,
141                         stack_slot,
142                         offset: _,
143                     } => {
144                         stack_slot_usage_map.entry(OrdStackSlot(stack_slot)).or_insert_with(StackSlotUsage::default).stack_load.insert(inst);
145                     }
146                     InstructionData::StackStore {
147                         opcode: Opcode::StackStore,
148                         arg: _,
149                         stack_slot,
150                         offset: _,
151                     } => {
152                         stack_slot_usage_map.entry(OrdStackSlot(stack_slot)).or_insert_with(StackSlotUsage::default).stack_store.insert(inst);
153                     }
154                     _ => {}
155                 }
156             }
157         }
158
159         OptimizeContext {
160             ctx,
161             stack_slot_usage_map,
162         }
163     }
164 }
165
166 pub(super) fn optimize_function(
167     ctx: &mut Context,
168     #[cfg_attr(not(debug_assertions), allow(unused_variables))]
169     clif_comments: &mut crate::pretty_clif::CommentWriter,
170 ) {
171     combine_stack_addr_with_load_store(&mut ctx.func);
172
173     let mut opt_ctx = OptimizeContext::for_context(ctx);
174
175     // FIXME Repeat following instructions until fixpoint.
176
177     remove_unused_stack_addr_and_stack_load(&mut opt_ctx);
178
179     #[cfg(debug_assertions)] {
180         for (&OrdStackSlot(stack_slot), usage) in &opt_ctx.stack_slot_usage_map {
181             clif_comments.add_comment(stack_slot, format!("used by: {:?}", usage));
182         }
183     }
184
185     for (stack_slot, users) in opt_ctx.stack_slot_usage_map.iter_mut() {
186         if users.stack_addr.is_empty().not() {
187             // Stack addr leaked; there may be unknown loads and stores.
188             // FIXME use stacked borrows to optimize
189             continue;
190         }
191
192         for load in users.stack_load.clone().into_iter() {
193             let potential_stores = users.potential_stores_for_load(&opt_ctx.ctx, load);
194
195             #[cfg(debug_assertions)]
196             for &store in &potential_stores {
197                 clif_comments.add_comment(load, format!(
198                     "Potential store -> load forwarding {} -> {} ({:?}, {:?})",
199                     opt_ctx.ctx.func.dfg.display_inst(store, None),
200                     opt_ctx.ctx.func.dfg.display_inst(load, None),
201                     spatial_overlap(&opt_ctx.ctx.func, store, load),
202                     temporal_order(&opt_ctx.ctx, store, load),
203                 ));
204             }
205
206             match *potential_stores {
207                 [] => {
208                     #[cfg(debug_assertions)]
209                     clif_comments.add_comment(load, format!("[BUG?] Reading uninitialized memory"));
210                 }
211                 [store] if spatial_overlap(&opt_ctx.ctx.func, store, load) == SpatialOverlap::Full && temporal_order(&opt_ctx.ctx, store, load) == TemporalOrder::DefinitivelyBefore => {
212                     // Only one store could have been the origin of the value.
213                     let stored_value = opt_ctx.ctx.func.dfg.inst_args(store)[0];
214
215                     #[cfg(debug_assertions)]
216                     clif_comments.add_comment(load, format!("Store to load forward {} -> {}", store, load));
217
218                     users.change_load_to_alias(&mut opt_ctx.ctx.func, load, stored_value);
219                 }
220                 _ => {} // FIXME implement this
221             }
222         }
223
224         for store in users.stack_store.clone().into_iter() {
225             let potential_loads = users.potential_loads_of_store(&opt_ctx.ctx, store);
226
227             #[cfg(debug_assertions)]
228             for &load in &potential_loads {
229                 clif_comments.add_comment(store, format!(
230                     "Potential load from store {} <- {} ({:?}, {:?})",
231                     opt_ctx.ctx.func.dfg.display_inst(load, None),
232                     opt_ctx.ctx.func.dfg.display_inst(store, None),
233                     spatial_overlap(&opt_ctx.ctx.func, store, load),
234                     temporal_order(&opt_ctx.ctx, store, load),
235                 ));
236             }
237
238             if potential_loads.is_empty() {
239                 // Never loaded; can safely remove all stores and the stack slot.
240                 // FIXME also remove stores when there is always a next store before a load.
241
242                 #[cfg(debug_assertions)]
243                 clif_comments.add_comment(store, format!("Remove dead stack store {} of {}", opt_ctx.ctx.func.dfg.display_inst(store, None), stack_slot.0));
244
245                 users.remove_dead_store(&mut opt_ctx.ctx.func, store);
246             }
247         }
248
249         if users.stack_store.is_empty() && users.stack_load.is_empty() {
250             opt_ctx.ctx.func.stack_slots[stack_slot.0].size = 0;
251         }
252     }
253 }
254
255 fn combine_stack_addr_with_load_store(func: &mut Function) {
256     // Turn load and store into stack_load and stack_store when possible.
257     let mut cursor = FuncCursor::new(func);
258     while let Some(_block) = cursor.next_block() {
259         while let Some(inst) = cursor.next_inst() {
260             match cursor.func.dfg[inst] {
261                 InstructionData::Load { opcode: Opcode::Load, arg: addr, flags: _, offset } => {
262                     if cursor.func.dfg.ctrl_typevar(inst) == types::I128 || cursor.func.dfg.ctrl_typevar(inst).is_vector() {
263                         continue; // WORKAROUD: stack_load.i128 not yet implemented
264                     }
265                     if let Some((stack_slot, stack_addr_offset)) = try_get_stack_slot_and_offset_for_addr(cursor.func, addr) {
266                         if let Some(combined_offset) = offset.try_add_i64(stack_addr_offset.into()) {
267                             let ty = cursor.func.dfg.ctrl_typevar(inst);
268                             cursor.func.dfg.replace(inst).stack_load(ty, stack_slot, combined_offset);
269                         }
270                     }
271                 }
272                 InstructionData::Store { opcode: Opcode::Store, args: [value, addr], flags: _, offset } => {
273                     if cursor.func.dfg.ctrl_typevar(inst) == types::I128 || cursor.func.dfg.ctrl_typevar(inst).is_vector() {
274                         continue; // WORKAROUND: stack_store.i128 not yet implemented
275                     }
276                     if let Some((stack_slot, stack_addr_offset)) = try_get_stack_slot_and_offset_for_addr(cursor.func, addr) {
277                         if let Some(combined_offset) = offset.try_add_i64(stack_addr_offset.into()) {
278                             cursor.func.dfg.replace(inst).stack_store(value, stack_slot, combined_offset);
279                         }
280                     }
281                 }
282                 _ => {}
283             }
284         }
285     }
286 }
287
288 fn remove_unused_stack_addr_and_stack_load(opt_ctx: &mut OptimizeContext<'_>) {
289     // FIXME incrementally rebuild on each call?
290     let mut stack_addr_load_insts_users = FxHashMap::<Inst, FxHashSet<Inst>>::default();
291
292     let mut cursor = FuncCursor::new(&mut opt_ctx.ctx.func);
293     while let Some(_block) = cursor.next_block() {
294         while let Some(inst) = cursor.next_inst() {
295             for &arg in cursor.func.dfg.inst_args(inst) {
296                 if let ValueDef::Result(arg_origin, 0) = cursor.func.dfg.value_def(arg) {
297                     match cursor.func.dfg[arg_origin].opcode() {
298                         Opcode::StackAddr | Opcode::StackLoad => {
299                             stack_addr_load_insts_users.entry(arg_origin).or_insert_with(FxHashSet::default).insert(inst);
300                         }
301                         _ => {}
302                     }
303                 }
304             }
305         }
306     }
307
308     #[cfg(debug_assertions)]
309     for inst in stack_addr_load_insts_users.keys() {
310         let mut is_recorded_stack_addr_or_stack_load = false;
311         for stack_slot_users in opt_ctx.stack_slot_usage_map.values() {
312             is_recorded_stack_addr_or_stack_load |= stack_slot_users.stack_addr.contains(inst) || stack_slot_users.stack_load.contains(inst);
313         }
314         assert!(is_recorded_stack_addr_or_stack_load);
315     }
316
317     // Replace all unused stack_addr and stack_load instructions with nop.
318     let mut func = &mut opt_ctx.ctx.func;
319
320     // drain_filter() on hashbrown::HashSet drains the items that do *not* match the
321     // predicate. Once hashbrown gets updated to match the behaviour of std::drain_filter
322     // (0.8.2), the predicate will have to be reversed
323     for stack_slot_users in opt_ctx.stack_slot_usage_map.values_mut() {
324         stack_slot_users
325             .stack_addr
326             .drain_filter(|inst| !(stack_addr_load_insts_users.get(inst).map(|users| users.is_empty()).unwrap_or(true)))
327             .for_each(|inst| StackSlotUsage::remove_unused_stack_addr(&mut func, inst));
328
329         stack_slot_users
330             .stack_load
331             .drain_filter(|inst| !(stack_addr_load_insts_users.get(inst).map(|users| users.is_empty()).unwrap_or(true)))
332             .for_each(|inst| StackSlotUsage::remove_unused_load(&mut func, inst));
333     }
334 }
335
336 fn try_get_stack_slot_and_offset_for_addr(func: &Function, addr: Value) -> Option<(StackSlot, Offset32)> {
337     if let ValueDef::Result(addr_inst, 0) = func.dfg.value_def(addr) {
338         if let InstructionData::StackLoad {
339             opcode: Opcode::StackAddr,
340             stack_slot,
341             offset,
342         } = func.dfg[addr_inst] {
343             return Some((stack_slot, offset));
344         }
345     }
346     None
347 }
348
349 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
350 enum SpatialOverlap {
351     No,
352     Partial,
353     Full,
354 }
355
356 fn spatial_overlap(func: &Function, src: Inst, dest: Inst) -> SpatialOverlap {
357     fn inst_info(func: &Function, inst: Inst) -> (StackSlot, Offset32, u32) {
358         match func.dfg[inst] {
359             InstructionData::StackLoad {
360                 opcode: Opcode::StackAddr,
361                 stack_slot,
362                 offset,
363             }
364             | InstructionData::StackLoad {
365                 opcode: Opcode::StackLoad,
366                 stack_slot,
367                 offset,
368             }
369             | InstructionData::StackStore {
370                 opcode: Opcode::StackStore,
371                 stack_slot,
372                 offset,
373                 arg: _,
374             } => (stack_slot, offset, func.dfg.ctrl_typevar(inst).bytes()),
375             _ => unreachable!("{:?}", func.dfg[inst]),
376         }
377     }
378
379     debug_assert_ne!(src, dest);
380
381     let (src_ss, src_offset, src_size) = inst_info(func, src);
382     let (dest_ss, dest_offset, dest_size) = inst_info(func, dest);
383
384     if src_ss != dest_ss {
385         return SpatialOverlap::No;
386     }
387
388     if src_offset == dest_offset && src_size == dest_size {
389         return SpatialOverlap::Full;
390     }
391
392     let src_end: i64 = src_offset.try_add_i64(i64::from(src_size)).unwrap().into();
393     let dest_end: i64 = dest_offset.try_add_i64(i64::from(dest_size)).unwrap().into();
394     if src_end <= dest_offset.into() || dest_end <= src_offset.into() {
395         return SpatialOverlap::No;
396     }
397
398     SpatialOverlap::Partial
399 }
400
401 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
402 enum TemporalOrder {
403     /// `src` will never be executed before `dest`.
404     NeverBefore,
405
406     /// `src` may be executed before `dest`.
407     MaybeBefore,
408
409     /// `src` will always be executed before `dest`.
410     /// There may still be other instructions in between.
411     DefinitivelyBefore,
412 }
413
414 fn temporal_order(ctx: &Context, src: Inst, dest: Inst) -> TemporalOrder {
415     debug_assert_ne!(src, dest);
416
417     if ctx.domtree.dominates(src, dest, &ctx.func.layout) {
418         TemporalOrder::DefinitivelyBefore
419     } else if ctx.domtree.dominates(src, dest, &ctx.func.layout) {
420         TemporalOrder::NeverBefore
421     } else {
422         TemporalOrder::MaybeBefore
423     }
424 }