]> git.lizzy.rs Git - rust.git/blob - src/optimize/stack2reg.rs
Merge commit 'dbee13661efa269cb4cd57bb4c6b99a19732b484' into sync_cg_clif-2020-12-27
[rust.git] / src / optimize / stack2reg.rs
1 //! This optimization replaces stack accesses with SSA variables and removes dead stores when possible.
2 //!
3 //! # Undefined behaviour
4 //!
5 //! This optimization is based on the assumption that stack slots which don't have their address
6 //! leaked through `stack_addr` are only accessed using `stack_load` and `stack_store` in the
7 //! function which has the stack slots. This optimization also assumes that stack slot accesses
8 //! are never out of bounds. If these assumptions are not correct, then this optimization may remove
9 //! `stack_store` instruction incorrectly, or incorrectly use a previously stored value as the value
10 //! being loaded by a `stack_load`.
11
12 use std::collections::BTreeMap;
13 use std::fmt;
14 use std::ops::Not;
15
16 use rustc_data_structures::fx::FxHashSet;
17
18 use cranelift_codegen::cursor::{Cursor, FuncCursor};
19 use cranelift_codegen::ir::immediates::Offset32;
20 use cranelift_codegen::ir::{InstructionData, Opcode, ValueDef};
21
22 use crate::prelude::*;
23
24 /// Workaround for `StackSlot` not implementing `Ord`.
25 #[derive(Copy, Clone, PartialEq, Eq)]
26 struct OrdStackSlot(StackSlot);
27
28 impl fmt::Debug for OrdStackSlot {
29     fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
30         write!(f, "{:?}", self.0)
31     }
32 }
33
34 impl PartialOrd for OrdStackSlot {
35     fn partial_cmp(&self, rhs: &Self) -> Option<std::cmp::Ordering> {
36         self.0.as_u32().partial_cmp(&rhs.0.as_u32())
37     }
38 }
39
40 impl Ord for OrdStackSlot {
41     fn cmp(&self, rhs: &Self) -> std::cmp::Ordering {
42         self.0.as_u32().cmp(&rhs.0.as_u32())
43     }
44 }
45
46 #[derive(Debug, Default)]
47 struct StackSlotUsage {
48     stack_addr: FxHashSet<Inst>,
49     stack_load: FxHashSet<Inst>,
50     stack_store: FxHashSet<Inst>,
51 }
52
53 impl StackSlotUsage {
54     fn potential_stores_for_load(&self, ctx: &Context, load: Inst) -> Vec<Inst> {
55         self.stack_store
56             .iter()
57             .cloned()
58             .filter(|&store| {
59                 match spatial_overlap(&ctx.func, store, load) {
60                     SpatialOverlap::No => false, // Can never be the source of the loaded value.
61                     SpatialOverlap::Partial | SpatialOverlap::Full => true,
62                 }
63             })
64             .filter(|&store| {
65                 match temporal_order(ctx, store, load) {
66                     TemporalOrder::NeverBefore => false, // Can never be the source of the loaded value.
67                     TemporalOrder::MaybeBefore | TemporalOrder::DefinitivelyBefore => true,
68                 }
69             })
70             .collect::<Vec<Inst>>()
71     }
72
73     fn potential_loads_of_store(&self, ctx: &Context, store: Inst) -> Vec<Inst> {
74         self.stack_load
75             .iter()
76             .cloned()
77             .filter(|&load| {
78                 match spatial_overlap(&ctx.func, store, load) {
79                     SpatialOverlap::No => false, // Can never be the source of the loaded value.
80                     SpatialOverlap::Partial | SpatialOverlap::Full => true,
81                 }
82             })
83             .filter(|&load| {
84                 match temporal_order(ctx, store, load) {
85                     TemporalOrder::NeverBefore => false, // Can never be the source of the loaded value.
86                     TemporalOrder::MaybeBefore | TemporalOrder::DefinitivelyBefore => true,
87                 }
88             })
89             .collect::<Vec<Inst>>()
90     }
91
92     fn remove_unused_stack_addr(func: &mut Function, inst: Inst) {
93         func.dfg.detach_results(inst);
94         func.dfg.replace(inst).nop();
95     }
96
97     fn remove_unused_load(func: &mut Function, load: Inst) {
98         func.dfg.detach_results(load);
99         func.dfg.replace(load).nop();
100     }
101
102     fn remove_dead_store(&mut self, func: &mut Function, store: Inst) {
103         func.dfg.replace(store).nop();
104         self.stack_store.remove(&store);
105     }
106
107     fn change_load_to_alias(&mut self, func: &mut Function, load: Inst, value: Value) {
108         let loaded_value = func.dfg.inst_results(load)[0];
109         let loaded_type = func.dfg.value_type(loaded_value);
110
111         if func.dfg.value_type(value) == loaded_type {
112             func.dfg.detach_results(load);
113             func.dfg.replace(load).nop();
114             func.dfg.change_to_alias(loaded_value, value);
115         } else {
116             func.dfg.replace(load).bitcast(loaded_type, value);
117         }
118
119         self.stack_load.remove(&load);
120     }
121 }
122
123 struct OptimizeContext<'a> {
124     ctx: &'a mut Context,
125     stack_slot_usage_map: BTreeMap<OrdStackSlot, StackSlotUsage>,
126 }
127
128 impl<'a> OptimizeContext<'a> {
129     fn for_context(ctx: &'a mut Context) -> Self {
130         ctx.flowgraph(); // Compute cfg and domtree.
131
132         // Record all stack_addr, stack_load and stack_store instructions.
133         let mut stack_slot_usage_map = BTreeMap::<OrdStackSlot, StackSlotUsage>::new();
134
135         let mut cursor = FuncCursor::new(&mut ctx.func);
136         while let Some(_block) = cursor.next_block() {
137             while let Some(inst) = cursor.next_inst() {
138                 match cursor.func.dfg[inst] {
139                     InstructionData::StackLoad {
140                         opcode: Opcode::StackAddr,
141                         stack_slot,
142                         offset: _,
143                     } => {
144                         stack_slot_usage_map
145                             .entry(OrdStackSlot(stack_slot))
146                             .or_insert_with(StackSlotUsage::default)
147                             .stack_addr
148                             .insert(inst);
149                     }
150                     InstructionData::StackLoad {
151                         opcode: Opcode::StackLoad,
152                         stack_slot,
153                         offset: _,
154                     } => {
155                         stack_slot_usage_map
156                             .entry(OrdStackSlot(stack_slot))
157                             .or_insert_with(StackSlotUsage::default)
158                             .stack_load
159                             .insert(inst);
160                     }
161                     InstructionData::StackStore {
162                         opcode: Opcode::StackStore,
163                         arg: _,
164                         stack_slot,
165                         offset: _,
166                     } => {
167                         stack_slot_usage_map
168                             .entry(OrdStackSlot(stack_slot))
169                             .or_insert_with(StackSlotUsage::default)
170                             .stack_store
171                             .insert(inst);
172                     }
173                     _ => {}
174                 }
175             }
176         }
177
178         OptimizeContext {
179             ctx,
180             stack_slot_usage_map,
181         }
182     }
183 }
184
185 pub(super) fn optimize_function(
186     ctx: &mut Context,
187     #[cfg_attr(not(debug_assertions), allow(unused_variables))] clif_comments: &mut crate::pretty_clif::CommentWriter,
188 ) {
189     combine_stack_addr_with_load_store(&mut ctx.func);
190
191     let mut opt_ctx = OptimizeContext::for_context(ctx);
192
193     // FIXME Repeat following instructions until fixpoint.
194
195     remove_unused_stack_addr_and_stack_load(&mut opt_ctx);
196
197     #[cfg(debug_assertions)]
198     {
199         for (&OrdStackSlot(stack_slot), usage) in &opt_ctx.stack_slot_usage_map {
200             clif_comments.add_comment(stack_slot, format!("used by: {:?}", usage));
201         }
202     }
203
204     for (stack_slot, users) in opt_ctx.stack_slot_usage_map.iter_mut() {
205         if users.stack_addr.is_empty().not() {
206             // Stack addr leaked; there may be unknown loads and stores.
207             // FIXME use stacked borrows to optimize
208             continue;
209         }
210
211         for load in users.stack_load.clone().into_iter() {
212             let potential_stores = users.potential_stores_for_load(&opt_ctx.ctx, load);
213
214             #[cfg(debug_assertions)]
215             for &store in &potential_stores {
216                 clif_comments.add_comment(
217                     load,
218                     format!(
219                         "Potential store -> load forwarding {} -> {} ({:?}, {:?})",
220                         opt_ctx.ctx.func.dfg.display_inst(store, None),
221                         opt_ctx.ctx.func.dfg.display_inst(load, None),
222                         spatial_overlap(&opt_ctx.ctx.func, store, load),
223                         temporal_order(&opt_ctx.ctx, store, load),
224                     ),
225                 );
226             }
227
228             match *potential_stores {
229                 [] => {
230                     #[cfg(debug_assertions)]
231                     clif_comments
232                         .add_comment(load, "[BUG?] Reading uninitialized memory".to_string());
233                 }
234                 [store]
235                     if spatial_overlap(&opt_ctx.ctx.func, store, load) == SpatialOverlap::Full
236                         && temporal_order(&opt_ctx.ctx, store, load)
237                             == TemporalOrder::DefinitivelyBefore =>
238                 {
239                     // Only one store could have been the origin of the value.
240                     let stored_value = opt_ctx.ctx.func.dfg.inst_args(store)[0];
241
242                     #[cfg(debug_assertions)]
243                     clif_comments
244                         .add_comment(load, format!("Store to load forward {} -> {}", store, load));
245
246                     users.change_load_to_alias(&mut opt_ctx.ctx.func, load, stored_value);
247                 }
248                 _ => {} // FIXME implement this
249             }
250         }
251
252         for store in users.stack_store.clone().into_iter() {
253             let potential_loads = users.potential_loads_of_store(&opt_ctx.ctx, store);
254
255             #[cfg(debug_assertions)]
256             for &load in &potential_loads {
257                 clif_comments.add_comment(
258                     store,
259                     format!(
260                         "Potential load from store {} <- {} ({:?}, {:?})",
261                         opt_ctx.ctx.func.dfg.display_inst(load, None),
262                         opt_ctx.ctx.func.dfg.display_inst(store, None),
263                         spatial_overlap(&opt_ctx.ctx.func, store, load),
264                         temporal_order(&opt_ctx.ctx, store, load),
265                     ),
266                 );
267             }
268
269             if potential_loads.is_empty() {
270                 // Never loaded; can safely remove all stores and the stack slot.
271                 // FIXME also remove stores when there is always a next store before a load.
272
273                 #[cfg(debug_assertions)]
274                 clif_comments.add_comment(
275                     store,
276                     format!(
277                         "Remove dead stack store {} of {}",
278                         opt_ctx.ctx.func.dfg.display_inst(store, None),
279                         stack_slot.0
280                     ),
281                 );
282
283                 users.remove_dead_store(&mut opt_ctx.ctx.func, store);
284             }
285         }
286
287         if users.stack_store.is_empty() && users.stack_load.is_empty() {
288             opt_ctx.ctx.func.stack_slots[stack_slot.0].size = 0;
289         }
290     }
291 }
292
293 fn combine_stack_addr_with_load_store(func: &mut Function) {
294     // Turn load and store into stack_load and stack_store when possible.
295     let mut cursor = FuncCursor::new(func);
296     while let Some(_block) = cursor.next_block() {
297         while let Some(inst) = cursor.next_inst() {
298             match cursor.func.dfg[inst] {
299                 InstructionData::Load {
300                     opcode: Opcode::Load,
301                     arg: addr,
302                     flags: _,
303                     offset,
304                 } => {
305                     if cursor.func.dfg.ctrl_typevar(inst) == types::I128
306                         || cursor.func.dfg.ctrl_typevar(inst).is_vector()
307                     {
308                         continue; // WORKAROUD: stack_load.i128 not yet implemented
309                     }
310                     if let Some((stack_slot, stack_addr_offset)) =
311                         try_get_stack_slot_and_offset_for_addr(cursor.func, addr)
312                     {
313                         if let Some(combined_offset) = offset.try_add_i64(stack_addr_offset.into())
314                         {
315                             let ty = cursor.func.dfg.ctrl_typevar(inst);
316                             cursor.func.dfg.replace(inst).stack_load(
317                                 ty,
318                                 stack_slot,
319                                 combined_offset,
320                             );
321                         }
322                     }
323                 }
324                 InstructionData::Store {
325                     opcode: Opcode::Store,
326                     args: [value, addr],
327                     flags: _,
328                     offset,
329                 } => {
330                     if cursor.func.dfg.ctrl_typevar(inst) == types::I128
331                         || cursor.func.dfg.ctrl_typevar(inst).is_vector()
332                     {
333                         continue; // WORKAROUND: stack_store.i128 not yet implemented
334                     }
335                     if let Some((stack_slot, stack_addr_offset)) =
336                         try_get_stack_slot_and_offset_for_addr(cursor.func, addr)
337                     {
338                         if let Some(combined_offset) = offset.try_add_i64(stack_addr_offset.into())
339                         {
340                             cursor.func.dfg.replace(inst).stack_store(
341                                 value,
342                                 stack_slot,
343                                 combined_offset,
344                             );
345                         }
346                     }
347                 }
348                 _ => {}
349             }
350         }
351     }
352 }
353
354 fn remove_unused_stack_addr_and_stack_load(opt_ctx: &mut OptimizeContext<'_>) {
355     // FIXME incrementally rebuild on each call?
356     let mut stack_addr_load_insts_users = FxHashMap::<Inst, FxHashSet<Inst>>::default();
357
358     let mut cursor = FuncCursor::new(&mut opt_ctx.ctx.func);
359     while let Some(_block) = cursor.next_block() {
360         while let Some(inst) = cursor.next_inst() {
361             for &arg in cursor.func.dfg.inst_args(inst) {
362                 if let ValueDef::Result(arg_origin, 0) = cursor.func.dfg.value_def(arg) {
363                     match cursor.func.dfg[arg_origin].opcode() {
364                         Opcode::StackAddr | Opcode::StackLoad => {
365                             stack_addr_load_insts_users
366                                 .entry(arg_origin)
367                                 .or_insert_with(FxHashSet::default)
368                                 .insert(inst);
369                         }
370                         _ => {}
371                     }
372                 }
373             }
374         }
375     }
376
377     #[cfg(debug_assertions)]
378     for inst in stack_addr_load_insts_users.keys() {
379         let mut is_recorded_stack_addr_or_stack_load = false;
380         for stack_slot_users in opt_ctx.stack_slot_usage_map.values() {
381             is_recorded_stack_addr_or_stack_load |= stack_slot_users.stack_addr.contains(inst)
382                 || stack_slot_users.stack_load.contains(inst);
383         }
384         assert!(is_recorded_stack_addr_or_stack_load);
385     }
386
387     // Replace all unused stack_addr and stack_load instructions with nop.
388     let mut func = &mut opt_ctx.ctx.func;
389
390     for stack_slot_users in opt_ctx.stack_slot_usage_map.values_mut() {
391         stack_slot_users
392             .stack_addr
393             .drain_filter(|inst| {
394                 stack_addr_load_insts_users
395                     .get(inst)
396                     .map(|users| users.is_empty())
397                     .unwrap_or(true)
398             })
399             .for_each(|inst| StackSlotUsage::remove_unused_stack_addr(&mut func, inst));
400
401         stack_slot_users
402             .stack_load
403             .drain_filter(|inst| {
404                 stack_addr_load_insts_users
405                     .get(inst)
406                     .map(|users| users.is_empty())
407                     .unwrap_or(true)
408             })
409             .for_each(|inst| StackSlotUsage::remove_unused_load(&mut func, inst));
410     }
411 }
412
413 fn try_get_stack_slot_and_offset_for_addr(
414     func: &Function,
415     addr: Value,
416 ) -> Option<(StackSlot, Offset32)> {
417     if let ValueDef::Result(addr_inst, 0) = func.dfg.value_def(addr) {
418         if let InstructionData::StackLoad {
419             opcode: Opcode::StackAddr,
420             stack_slot,
421             offset,
422         } = func.dfg[addr_inst]
423         {
424             return Some((stack_slot, offset));
425         }
426     }
427     None
428 }
429
430 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
431 enum SpatialOverlap {
432     No,
433     Partial,
434     Full,
435 }
436
437 fn spatial_overlap(func: &Function, src: Inst, dest: Inst) -> SpatialOverlap {
438     fn inst_info(func: &Function, inst: Inst) -> (StackSlot, Offset32, u32) {
439         match func.dfg[inst] {
440             InstructionData::StackLoad {
441                 opcode: Opcode::StackAddr,
442                 stack_slot,
443                 offset,
444             }
445             | InstructionData::StackLoad {
446                 opcode: Opcode::StackLoad,
447                 stack_slot,
448                 offset,
449             }
450             | InstructionData::StackStore {
451                 opcode: Opcode::StackStore,
452                 stack_slot,
453                 offset,
454                 arg: _,
455             } => (stack_slot, offset, func.dfg.ctrl_typevar(inst).bytes()),
456             _ => unreachable!("{:?}", func.dfg[inst]),
457         }
458     }
459
460     debug_assert_ne!(src, dest);
461
462     let (src_ss, src_offset, src_size) = inst_info(func, src);
463     let (dest_ss, dest_offset, dest_size) = inst_info(func, dest);
464
465     if src_ss != dest_ss {
466         return SpatialOverlap::No;
467     }
468
469     if src_offset == dest_offset && src_size == dest_size {
470         return SpatialOverlap::Full;
471     }
472
473     let src_end: i64 = src_offset.try_add_i64(i64::from(src_size)).unwrap().into();
474     let dest_end: i64 = dest_offset
475         .try_add_i64(i64::from(dest_size))
476         .unwrap()
477         .into();
478     if src_end <= dest_offset.into() || dest_end <= src_offset.into() {
479         return SpatialOverlap::No;
480     }
481
482     SpatialOverlap::Partial
483 }
484
485 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
486 enum TemporalOrder {
487     /// `src` will never be executed before `dest`.
488     NeverBefore,
489
490     /// `src` may be executed before `dest`.
491     MaybeBefore,
492
493     /// `src` will always be executed before `dest`.
494     /// There may still be other instructions in between.
495     DefinitivelyBefore,
496 }
497
498 fn temporal_order(ctx: &Context, src: Inst, dest: Inst) -> TemporalOrder {
499     debug_assert_ne!(src, dest);
500
501     if ctx.domtree.dominates(src, dest, &ctx.func.layout) {
502         TemporalOrder::DefinitivelyBefore
503     } else if ctx.domtree.dominates(src, dest, &ctx.func.layout) {
504         TemporalOrder::NeverBefore
505     } else {
506         TemporalOrder::MaybeBefore
507     }
508 }