]> git.lizzy.rs Git - rust.git/blob - src/optimize/stack2reg.rs
Only call build_value_labels_ranges when necessary
[rust.git] / src / optimize / stack2reg.rs
1 //! This optimization replaces stack accesses with SSA variables and removes dead stores when possible.
2 //!
3 //! # Undefined behaviour
4 //!
5 //! This optimization is based on the assumption that stack slots which don't have their address
6 //! leaked through `stack_addr` are only accessed using `stack_load` and `stack_store` in the
7 //! function which has the stack slots. This optimization also assumes that stack slot accesses
8 //! are never out of bounds. If these assumptions are not correct, then this optimization may remove
9 //! `stack_store` instruction incorrectly, or incorrectly use a previously stored value as the value
10 //! being loaded by a `stack_load`.
11
12 use std::collections::BTreeMap;
13 use std::fmt;
14 use std::ops::Not;
15
16 use rustc_data_structures::fx::FxHashSet;
17
18 use cranelift_codegen::cursor::{Cursor, FuncCursor};
19 use cranelift_codegen::ir::{InstructionData, Opcode, ValueDef};
20 use cranelift_codegen::ir::immediates::Offset32;
21
22 use crate::prelude::*;
23
24 /// Workaround for `StackSlot` not implementing `Ord`.
25 #[derive(Copy, Clone, PartialEq, Eq)]
26 struct OrdStackSlot(StackSlot);
27
28 impl fmt::Debug for OrdStackSlot {
29     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
30         write!(f, "{:?}", self.0)
31     }
32 }
33
34 impl PartialOrd for OrdStackSlot {
35     fn partial_cmp(&self, rhs: &Self) -> Option<std::cmp::Ordering> {
36         self.0.as_u32().partial_cmp(&rhs.0.as_u32())
37     }
38 }
39
40 impl Ord for OrdStackSlot {
41     fn cmp(&self, rhs: &Self) -> std::cmp::Ordering {
42         self.0.as_u32().cmp(&rhs.0.as_u32())
43     }
44 }
45
46 #[derive(Debug, Default)]
47 struct StackSlotUsage {
48     stack_addr: FxHashSet<Inst>,
49     stack_load: FxHashSet<Inst>,
50     stack_store: FxHashSet<Inst>,
51 }
52
53 impl StackSlotUsage {
54     fn potential_stores_for_load(&self, ctx: &Context, load: Inst) -> Vec<Inst> {
55         self.stack_store.iter().cloned().filter(|&store| {
56             match spatial_overlap(&ctx.func, store, load) {
57                 SpatialOverlap::No => false, // Can never be the source of the loaded value.
58                 SpatialOverlap::Partial | SpatialOverlap::Full => true,
59             }
60         }).filter(|&store| {
61             match temporal_order(ctx, store, load) {
62                 TemporalOrder::NeverBefore => false, // Can never be the source of the loaded value.
63                 TemporalOrder::MaybeBefore | TemporalOrder::DefinitivelyBefore => true,
64             }
65         }).collect::<Vec<Inst>>()
66     }
67
68     fn potential_loads_of_store(&self, ctx: &Context, store: Inst) -> Vec<Inst> {
69         self.stack_load.iter().cloned().filter(|&load| {
70             match spatial_overlap(&ctx.func, store, load) {
71                 SpatialOverlap::No => false, // Can never be the source of the loaded value.
72                 SpatialOverlap::Partial | SpatialOverlap::Full => true,
73             }
74         }).filter(|&load| {
75             match temporal_order(ctx, store, load) {
76                 TemporalOrder::NeverBefore => false, // Can never be the source of the loaded value.
77                 TemporalOrder::MaybeBefore | TemporalOrder::DefinitivelyBefore => true,
78             }
79         }).collect::<Vec<Inst>>()
80     }
81
82     fn remove_unused_stack_addr(&mut self, func: &mut Function, inst: Inst) {
83         func.dfg.detach_results(inst);
84         func.dfg.replace(inst).nop();
85         self.stack_addr.remove(&inst);
86     }
87
88     fn remove_unused_load(&mut self, func: &mut Function, load: Inst) {
89         func.dfg.detach_results(load);
90         func.dfg.replace(load).nop();
91         self.stack_load.remove(&load);
92     }
93
94     fn remove_dead_store(&mut self, func: &mut Function, store: Inst) {
95         func.dfg.replace(store).nop();
96         self.stack_store.remove(&store);
97     }
98
99     fn change_load_to_alias(&mut self, func: &mut Function, load: Inst, value: Value) {
100         let loaded_value = func.dfg.inst_results(load)[0];
101         let loaded_type = func.dfg.value_type(loaded_value);
102
103         if func.dfg.value_type(value) == loaded_type {
104             func.dfg.detach_results(load);
105             func.dfg.replace(load).nop();
106             func.dfg.change_to_alias(loaded_value, value);
107         } else {
108             func.dfg.replace(load).bitcast(loaded_type, value);
109         }
110
111         self.stack_load.remove(&load);
112     }
113 }
114
115 struct OptimizeContext<'a> {
116     ctx: &'a mut Context,
117     stack_slot_usage_map: BTreeMap<OrdStackSlot, StackSlotUsage>,
118 }
119
120 impl<'a> OptimizeContext<'a> {
121     fn for_context(ctx: &'a mut Context) -> Self {
122         ctx.flowgraph(); // Compute cfg and domtree.
123
124         // Record all stack_addr, stack_load and stack_store instructions.
125         let mut stack_slot_usage_map = BTreeMap::<OrdStackSlot, StackSlotUsage>::new();
126
127         let mut cursor = FuncCursor::new(&mut ctx.func);
128         while let Some(_block) = cursor.next_block() {
129             while let Some(inst) = cursor.next_inst() {
130                 match cursor.func.dfg[inst] {
131                     InstructionData::StackLoad {
132                         opcode: Opcode::StackAddr,
133                         stack_slot,
134                         offset: _,
135                     } => {
136                         stack_slot_usage_map.entry(OrdStackSlot(stack_slot)).or_insert_with(StackSlotUsage::default).stack_addr.insert(inst);
137                     }
138                     InstructionData::StackLoad {
139                         opcode: Opcode::StackLoad,
140                         stack_slot,
141                         offset: _,
142                     } => {
143                         stack_slot_usage_map.entry(OrdStackSlot(stack_slot)).or_insert_with(StackSlotUsage::default).stack_load.insert(inst);
144                     }
145                     InstructionData::StackStore {
146                         opcode: Opcode::StackStore,
147                         arg: _,
148                         stack_slot,
149                         offset: _,
150                     } => {
151                         stack_slot_usage_map.entry(OrdStackSlot(stack_slot)).or_insert_with(StackSlotUsage::default).stack_store.insert(inst);
152                     }
153                     _ => {}
154                 }
155             }
156         }
157
158         OptimizeContext {
159             ctx,
160             stack_slot_usage_map,
161         }
162     }
163 }
164
165 pub(super) fn optimize_function(
166     ctx: &mut Context,
167     #[cfg_attr(not(debug_assertions), allow(unused_variables))]
168     clif_comments: &mut crate::pretty_clif::CommentWriter,
169 ) {
170     combine_stack_addr_with_load_store(&mut ctx.func);
171
172     let mut opt_ctx = OptimizeContext::for_context(ctx);
173
174     // FIXME Repeat following instructions until fixpoint.
175
176     remove_unused_stack_addr_and_stack_load(&mut opt_ctx);
177
178     #[cfg(debug_assertions)] {
179         for (&OrdStackSlot(stack_slot), usage) in &opt_ctx.stack_slot_usage_map {
180             clif_comments.add_comment(stack_slot, format!("used by: {:?}", usage));
181         }
182     }
183
184     for (stack_slot, users) in opt_ctx.stack_slot_usage_map.iter_mut() {
185         if users.stack_addr.is_empty().not() {
186             // Stack addr leaked; there may be unknown loads and stores.
187             // FIXME use stacked borrows to optimize
188             continue;
189         }
190
191         for load in users.stack_load.clone().into_iter() {
192             let potential_stores = users.potential_stores_for_load(&opt_ctx.ctx, load);
193
194             #[cfg(debug_assertions)]
195             for &store in &potential_stores {
196                 clif_comments.add_comment(load, format!(
197                     "Potential store -> load forwarding {} -> {} ({:?}, {:?})",
198                     opt_ctx.ctx.func.dfg.display_inst(store, None),
199                     opt_ctx.ctx.func.dfg.display_inst(load, None),
200                     spatial_overlap(&opt_ctx.ctx.func, store, load),
201                     temporal_order(&opt_ctx.ctx, store, load),
202                 ));
203             }
204
205             match *potential_stores {
206                 [] => {
207                     #[cfg(debug_assertions)]
208                     clif_comments.add_comment(load, format!("[BUG?] Reading uninitialized memory"));
209                 }
210                 [store] if spatial_overlap(&opt_ctx.ctx.func, store, load) == SpatialOverlap::Full && temporal_order(&opt_ctx.ctx, store, load) == TemporalOrder::DefinitivelyBefore => {
211                     // Only one store could have been the origin of the value.
212                     let stored_value = opt_ctx.ctx.func.dfg.inst_args(store)[0];
213
214                     #[cfg(debug_assertions)]
215                     clif_comments.add_comment(load, format!("Store to load forward {} -> {}", store, load));
216
217                     users.change_load_to_alias(&mut opt_ctx.ctx.func, load, stored_value);
218                 }
219                 _ => {} // FIXME implement this
220             }
221         }
222
223         for store in users.stack_store.clone().into_iter() {
224             let potential_loads = users.potential_loads_of_store(&opt_ctx.ctx, store);
225
226             #[cfg(debug_assertions)]
227             for &load in &potential_loads {
228                 clif_comments.add_comment(store, format!(
229                     "Potential load from store {} <- {} ({:?}, {:?})",
230                     opt_ctx.ctx.func.dfg.display_inst(load, None),
231                     opt_ctx.ctx.func.dfg.display_inst(store, None),
232                     spatial_overlap(&opt_ctx.ctx.func, store, load),
233                     temporal_order(&opt_ctx.ctx, store, load),
234                 ));
235             }
236
237             if potential_loads.is_empty() {
238                 // Never loaded; can safely remove all stores and the stack slot.
239                 // FIXME also remove stores when there is always a next store before a load.
240
241                 #[cfg(debug_assertions)]
242                 clif_comments.add_comment(store, format!("Remove dead stack store {} of {}", opt_ctx.ctx.func.dfg.display_inst(store, None), stack_slot.0));
243
244                 users.remove_dead_store(&mut opt_ctx.ctx.func, store);
245             }
246         }
247
248         if users.stack_store.is_empty() && users.stack_load.is_empty() {
249             opt_ctx.ctx.func.stack_slots[stack_slot.0].size = 0;
250         }
251     }
252 }
253
254 fn combine_stack_addr_with_load_store(func: &mut Function) {
255     // Turn load and store into stack_load and stack_store when possible.
256     let mut cursor = FuncCursor::new(func);
257     while let Some(_block) = cursor.next_block() {
258         while let Some(inst) = cursor.next_inst() {
259             match cursor.func.dfg[inst] {
260                 InstructionData::Load { opcode: Opcode::Load, arg: addr, flags: _, offset } => {
261                     if cursor.func.dfg.ctrl_typevar(inst) == types::I128 || cursor.func.dfg.ctrl_typevar(inst).is_vector() {
262                         continue; // WORKAROUD: stack_load.i128 not yet implemented
263                     }
264                     if let Some((stack_slot, stack_addr_offset)) = try_get_stack_slot_and_offset_for_addr(cursor.func, addr) {
265                         if let Some(combined_offset) = offset.try_add_i64(stack_addr_offset.into()) {
266                             let ty = cursor.func.dfg.ctrl_typevar(inst);
267                             cursor.func.dfg.replace(inst).stack_load(ty, stack_slot, combined_offset);
268                         }
269                     }
270                 }
271                 InstructionData::Store { opcode: Opcode::Store, args: [value, addr], flags: _, offset } => {
272                     if cursor.func.dfg.ctrl_typevar(inst) == types::I128 || cursor.func.dfg.ctrl_typevar(inst).is_vector() {
273                         continue; // WORKAROUND: stack_store.i128 not yet implemented
274                     }
275                     if let Some((stack_slot, stack_addr_offset)) = try_get_stack_slot_and_offset_for_addr(cursor.func, addr) {
276                         if let Some(combined_offset) = offset.try_add_i64(stack_addr_offset.into()) {
277                             cursor.func.dfg.replace(inst).stack_store(value, stack_slot, combined_offset);
278                         }
279                     }
280                 }
281                 _ => {}
282             }
283         }
284     }
285 }
286
287 fn remove_unused_stack_addr_and_stack_load(opt_ctx: &mut OptimizeContext<'_>) {
288     // FIXME incrementally rebuild on each call?
289     let mut stack_addr_load_insts_users = FxHashMap::<Inst, FxHashSet<Inst>>::default();
290
291     let mut cursor = FuncCursor::new(&mut opt_ctx.ctx.func);
292     while let Some(_block) = cursor.next_block() {
293         while let Some(inst) = cursor.next_inst() {
294             for &arg in cursor.func.dfg.inst_args(inst) {
295                 if let ValueDef::Result(arg_origin, 0) = cursor.func.dfg.value_def(arg) {
296                     match cursor.func.dfg[arg_origin].opcode() {
297                         Opcode::StackAddr | Opcode::StackLoad => {
298                             stack_addr_load_insts_users.entry(arg_origin).or_insert_with(FxHashSet::default).insert(inst);
299                         }
300                         _ => {}
301                     }
302                 }
303             }
304         }
305     }
306
307     #[cfg(debug_assertions)]
308     for inst in stack_addr_load_insts_users.keys() {
309         let mut is_recorded_stack_addr_or_stack_load = false;
310         for stack_slot_users in opt_ctx.stack_slot_usage_map.values() {
311             is_recorded_stack_addr_or_stack_load |= stack_slot_users.stack_addr.contains(inst) || stack_slot_users.stack_load.contains(inst);
312         }
313         assert!(is_recorded_stack_addr_or_stack_load);
314     }
315
316     // Replace all unused stack_addr and stack_load instructions with nop.
317     for stack_slot_users in opt_ctx.stack_slot_usage_map.values_mut() {
318         // FIXME remove clone
319         for &inst in stack_slot_users.stack_addr.clone().iter() {
320             if stack_addr_load_insts_users.get(&inst).map(|users| users.is_empty()).unwrap_or(true) {
321                 stack_slot_users.remove_unused_stack_addr(&mut opt_ctx.ctx.func, inst);
322             }
323         }
324
325         for &inst in stack_slot_users.stack_load.clone().iter() {
326             if stack_addr_load_insts_users.get(&inst).map(|users| users.is_empty()).unwrap_or(true) {
327                 stack_slot_users.remove_unused_load(&mut opt_ctx.ctx.func, inst);
328             }
329         }
330     }
331 }
332
333 fn try_get_stack_slot_and_offset_for_addr(func: &Function, addr: Value) -> Option<(StackSlot, Offset32)> {
334     if let ValueDef::Result(addr_inst, 0) = func.dfg.value_def(addr) {
335         if let InstructionData::StackLoad {
336             opcode: Opcode::StackAddr,
337             stack_slot,
338             offset,
339         } = func.dfg[addr_inst] {
340             return Some((stack_slot, offset));
341         }
342     }
343     None
344 }
345
346 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
347 enum SpatialOverlap {
348     No,
349     Partial,
350     Full,
351 }
352
353 fn spatial_overlap(func: &Function, src: Inst, dest: Inst) -> SpatialOverlap {
354     fn inst_info(func: &Function, inst: Inst) -> (StackSlot, Offset32, u32) {
355         match func.dfg[inst] {
356             InstructionData::StackLoad {
357                 opcode: Opcode::StackAddr,
358                 stack_slot,
359                 offset,
360             }
361             | InstructionData::StackLoad {
362                 opcode: Opcode::StackLoad,
363                 stack_slot,
364                 offset,
365             }
366             | InstructionData::StackStore {
367                 opcode: Opcode::StackStore,
368                 stack_slot,
369                 offset,
370                 arg: _,
371             } => (stack_slot, offset, func.dfg.ctrl_typevar(inst).bytes()),
372             _ => unreachable!("{:?}", func.dfg[inst]),
373         }
374     }
375
376     debug_assert_ne!(src, dest);
377
378     let (src_ss, src_offset, src_size) = inst_info(func, src);
379     let (dest_ss, dest_offset, dest_size) = inst_info(func, dest);
380
381     if src_ss != dest_ss {
382         return SpatialOverlap::No;
383     }
384
385     if src_offset == dest_offset && src_size == dest_size {
386         return SpatialOverlap::Full;
387     }
388
389     let src_end: i64 = src_offset.try_add_i64(i64::from(src_size)).unwrap().into();
390     let dest_end: i64 = dest_offset.try_add_i64(i64::from(dest_size)).unwrap().into();
391     if src_end <= dest_offset.into() || dest_end <= src_offset.into() {
392         return SpatialOverlap::No;
393     }
394
395     SpatialOverlap::Partial
396 }
397
398 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
399 enum TemporalOrder {
400     /// `src` will never be executed before `dest`.
401     NeverBefore,
402
403     /// `src` may be executed before `dest`.
404     MaybeBefore,
405
406     /// `src` will always be executed before `dest`.
407     /// There may still be other instructions in between.
408     DefinitivelyBefore,
409 }
410
411 fn temporal_order(ctx: &Context, src: Inst, dest: Inst) -> TemporalOrder {
412     debug_assert_ne!(src, dest);
413
414     if ctx.domtree.dominates(src, dest, &ctx.func.layout) {
415         TemporalOrder::DefinitivelyBefore
416     } else if ctx.domtree.dominates(src, dest, &ctx.func.layout) {
417         TemporalOrder::NeverBefore
418     } else {
419         TemporalOrder::MaybeBefore
420     }
421 }