1 //! Dataflow analysis with arbitrary transfer functions.
3 //! This module is a work in progress. You should instead use `BitDenotation` in
4 //! `librustc_mir/dataflow/mod.rs` and encode your transfer function as a [gen/kill set][gk]. In
5 //! doing so, your analysis will run faster and you will be able to generate graphviz diagrams for
6 //! debugging with no extra effort. The interface in this module is intended only for dataflow
7 //! problems that cannot be expressed using gen/kill sets.
9 //! FIXME(ecstaticmorse): In the long term, the plan is to preserve the existing `BitDenotation`
10 //! interface, but make `Engine` and `ResultsCursor` the canonical way to perform and inspect a
11 //! dataflow analysis. This requires porting the graphviz debugging logic to this module, deciding
12 //! on a way to handle the `before` methods in `BitDenotation` and creating an adapter so that
13 //! gen-kill problems can still be evaluated efficiently. See the discussion in [#64566] for more
16 //! [gk]: https://en.wikipedia.org/wiki/Data-flow_analysis#Bit_vector_problems
17 //! [#64566]: https://github.com/rust-lang/rust/pull/64566
19 use std::borrow::Borrow;
20 use std::cmp::Ordering;
21 use std::ffi::OsString;
22 use std::path::{Path, PathBuf};
23 use std::{fs, io, ops};
25 use rustc::mir::{self, traversal, BasicBlock, Location};
26 use rustc::ty::{self, TyCtxt};
27 use rustc_data_structures::work_queue::WorkQueue;
28 use rustc_hir::def_id::DefId;
29 use rustc_index::bit_set::BitSet;
30 use rustc_index::vec::{Idx, IndexVec};
31 use rustc_span::symbol::sym;
33 use crate::dataflow::BottomValue;
37 /// A specific kind of dataflow analysis.
39 /// To run a dataflow analysis, one must set the initial state of the `START_BLOCK` via
40 /// `initialize_start_block` and define a transfer function for each statement or terminator via
41 /// the various `effect` methods. The entry set for all other basic blocks is initialized to
42 /// `Self::BOTTOM_VALUE`. The dataflow `Engine` then iteratively updates the various entry sets for
43 /// each block with the cumulative effects of the transfer functions of all preceding blocks.
45 /// You should use an `Engine` to actually run an analysis, and a `ResultsCursor` to inspect the
46 /// results of that analysis like so:
48 /// ```ignore(cross-crate-imports)
49 /// fn do_my_analysis(body: &mir::Body<'tcx>, dead_unwinds: &BitSet<BasicBlock>) {
50 /// // `MyAnalysis` implements `Analysis`.
51 /// let analysis = MyAnalysis::new();
53 /// let results = Engine::new(body, dead_unwinds, analysis).iterate_to_fixpoint();
54 /// let mut cursor = ResultsCursor::new(body, results);
56 /// for (_, statement_index) in body.block_data[START_BLOCK].statements.iter_enumerated() {
57 /// cursor.seek_after(Location { block: START_BLOCK, statement_index });
58 /// let state = cursor.get();
59 /// println!("{:?}", state);
63 pub trait Analysis<'tcx>: BottomValue {
64 /// The index type used to access the dataflow state.
67 /// A name, used for debugging, that describes this dataflow analysis.
69 /// The name should be suitable as part of a filename, so avoid whitespace, slashes or periods
70 /// and try to keep it short.
71 const NAME: &'static str;
73 /// How each element of your dataflow state will be displayed during debugging.
75 /// By default, this is the `fmt::Debug` representation of `Self::Idx`.
76 fn pretty_print_idx(&self, w: &mut impl io::Write, idx: Self::Idx) -> io::Result<()> {
77 write!(w, "{:?}", idx)
80 /// The size of each bitvector allocated for each block.
81 fn bits_per_block(&self, body: &mir::Body<'tcx>) -> usize;
83 /// Mutates the entry set of the `START_BLOCK` to contain the initial state for dataflow
85 fn initialize_start_block(&self, body: &mir::Body<'tcx>, state: &mut BitSet<Self::Idx>);
87 /// Updates the current dataflow state with the effect of evaluating a statement.
88 fn apply_statement_effect(
90 state: &mut BitSet<Self::Idx>,
91 statement: &mir::Statement<'tcx>,
95 /// Updates the current dataflow state with the effect of evaluating a terminator.
97 /// Note that the effect of a successful return from a `Call` terminator should **not** be
98 /// acounted for in this function. That should go in `apply_call_return_effect`. For example,
99 /// in the `InitializedPlaces` analyses, the return place is not marked as initialized here.
100 fn apply_terminator_effect(
102 state: &mut BitSet<Self::Idx>,
103 terminator: &mir::Terminator<'tcx>,
107 /// Updates the current dataflow state with the effect of a successful return from a `Call`
110 /// This is separated from `apply_terminator_effect` to properly track state across
111 /// unwind edges for `Call`s.
112 fn apply_call_return_effect(
114 state: &mut BitSet<Self::Idx>,
116 func: &mir::Operand<'tcx>,
117 args: &[mir::Operand<'tcx>],
118 return_place: &mir::Place<'tcx>,
121 /// Applies the cumulative effect of an entire basic block to the dataflow state (except for
122 /// `call_return_effect`, which is handled in the `Engine`).
124 /// The default implementation calls `statement_effect` for every statement in the block before
125 /// finally calling `terminator_effect`. However, some dataflow analyses are able to coalesce
126 /// transfer functions for an entire block and apply them at once. Such analyses should
127 /// override `block_effect`.
128 fn apply_whole_block_effect(
130 state: &mut BitSet<Self::Idx>,
132 block_data: &mir::BasicBlockData<'tcx>,
134 for (statement_index, stmt) in block_data.statements.iter().enumerate() {
135 let location = Location { block, statement_index };
136 self.apply_statement_effect(state, stmt, location);
139 let location = Location { block, statement_index: block_data.statements.len() };
140 self.apply_terminator_effect(state, block_data.terminator(), location);
143 /// Applies the cumulative effect of a sequence of statements (and possibly a terminator)
144 /// within a single basic block.
146 /// When called with `0..block_data.statements.len() + 1` as the statement range, this function
147 /// is equivalent to `apply_whole_block_effect`.
148 fn apply_partial_block_effect(
150 state: &mut BitSet<Self::Idx>,
152 block_data: &mir::BasicBlockData<'tcx>,
153 mut range: ops::Range<usize>,
155 if range.is_empty() {
159 // The final location might be a terminator, so iterate through all statements until the
160 // final one, then check to see whether the final one is a statement or terminator.
162 // This can't cause the range to wrap-around since we check that the range contains at
163 // least one element above.
165 let final_location = Location { block, statement_index: range.end };
167 for statement_index in range {
168 let location = Location { block, statement_index };
169 let stmt = &block_data.statements[statement_index];
170 self.apply_statement_effect(state, stmt, location);
173 if final_location.statement_index == block_data.statements.len() {
174 let terminator = block_data.terminator();
175 self.apply_terminator_effect(state, terminator, final_location);
177 let stmt = &block_data.statements[final_location.statement_index];
178 self.apply_statement_effect(state, stmt, final_location);
183 #[derive(Clone, Copy, Debug)]
184 enum CursorPosition {
185 AtBlockStart(BasicBlock),
189 impl CursorPosition {
190 fn block(&self) -> BasicBlock {
192 Self::AtBlockStart(block) => block,
193 Self::After(Location { block, .. }) => block,
198 type ResultsRefCursor<'a, 'mir, 'tcx, A> = ResultsCursor<'mir, 'tcx, A, &'a Results<'tcx, A>>;
200 /// Inspect the results of dataflow analysis.
202 /// This cursor has linear performance when visiting statements in a block in order. Visiting
203 /// statements within a block in reverse order is `O(n^2)`, where `n` is the number of statements
205 pub struct ResultsCursor<'mir, 'tcx, A, R = Results<'tcx, A>>
209 body: &'mir mir::Body<'tcx>,
211 state: BitSet<A::Idx>,
215 /// Whether the effects of `apply_call_return_effect` are currently stored in `state`.
217 /// This flag ensures that multiple calls to `seek_after_assume_call_returns` with the same
218 /// target only result in one invocation of `apply_call_return_effect`.
219 is_call_return_effect_applied: bool,
222 impl<'mir, 'tcx, A, R> ResultsCursor<'mir, 'tcx, A, R>
225 R: Borrow<Results<'tcx, A>>,
227 /// Returns a new cursor for `results` that points to the start of the `START_BLOCK`.
228 pub fn new(body: &'mir mir::Body<'tcx>, results: R) -> Self {
231 pos: CursorPosition::AtBlockStart(mir::START_BLOCK),
232 is_call_return_effect_applied: false,
233 state: results.borrow().entry_sets[mir::START_BLOCK].clone(),
238 pub fn analysis(&self) -> &A {
239 &self.results.borrow().analysis
242 /// Resets the cursor to the start of the given `block`.
243 pub fn seek_to_block_start(&mut self, block: BasicBlock) {
244 self.state.overwrite(&self.results.borrow().entry_sets[block]);
245 self.pos = CursorPosition::AtBlockStart(block);
246 self.is_call_return_effect_applied = false;
249 /// Updates the cursor to hold the dataflow state immediately before `target`.
250 pub fn seek_before(&mut self, target: Location) {
251 assert!(target <= self.body.terminator_loc(target.block));
253 if target.statement_index == 0 {
254 self.seek_to_block_start(target.block);
256 self._seek_after(Location {
258 statement_index: target.statement_index - 1,
263 /// Updates the cursor to hold the dataflow state at `target`.
265 /// If `target` is a `Call` terminator, `apply_call_return_effect` will not be called. See
266 /// `seek_after_assume_call_returns` if you wish to observe the dataflow state upon a
267 /// successful return.
268 pub fn seek_after(&mut self, target: Location) {
269 assert!(target <= self.body.terminator_loc(target.block));
271 // This check ensures the correctness of a call to `seek_after_assume_call_returns`
272 // followed by one to `seek_after` with the same target.
273 if self.is_call_return_effect_applied {
274 self.seek_to_block_start(target.block);
277 self._seek_after(target);
280 /// Equivalent to `seek_after`, but also calls `apply_call_return_effect` if `target` is a
281 /// `Call` terminator whose callee is convergent.
282 pub fn seek_after_assume_call_returns(&mut self, target: Location) {
283 assert!(target <= self.body.terminator_loc(target.block));
285 self._seek_after(target);
287 if target != self.body.terminator_loc(target.block) {
291 let term = self.body.basic_blocks()[target.block].terminator();
292 if let mir::TerminatorKind::Call {
293 destination: Some((return_place, _)), func, args, ..
296 if !self.is_call_return_effect_applied {
297 self.is_call_return_effect_applied = true;
298 self.results.borrow().analysis.apply_call_return_effect(
309 fn _seek_after(&mut self, target: Location) {
310 let Location { block: target_block, statement_index: target_index } = target;
312 if self.pos.block() != target_block {
313 self.seek_to_block_start(target_block);
316 // If we're in the same block but after the target statement, we need to reset to the start
318 if let CursorPosition::After(Location { statement_index: curr_index, .. }) = self.pos {
319 match curr_index.cmp(&target_index) {
320 Ordering::Equal => return,
322 Ordering::Greater => self.seek_to_block_start(target_block),
326 // The cursor is now in the same block as the target location pointing at an earlier
328 debug_assert_eq!(self.pos.block(), target_block);
329 if let CursorPosition::After(Location { statement_index, .. }) = self.pos {
330 debug_assert!(statement_index < target_index);
333 let first_unapplied_statement = match self.pos {
334 CursorPosition::AtBlockStart(_) => 0,
335 CursorPosition::After(Location { statement_index, .. }) => statement_index + 1,
338 let block_data = &self.body.basic_blocks()[target_block];
339 self.results.borrow().analysis.apply_partial_block_effect(
343 first_unapplied_statement..target_index + 1,
346 self.pos = CursorPosition::After(target);
347 self.is_call_return_effect_applied = false;
350 /// Gets the dataflow state at the current location.
351 pub fn get(&self) -> &BitSet<A::Idx> {
356 /// A completed dataflow analysis.
357 pub struct Results<'tcx, A>
362 entry_sets: IndexVec<BasicBlock, BitSet<A::Idx>>,
365 /// All information required to iterate a dataflow analysis to fixpoint.
366 pub struct Engine<'a, 'tcx, A>
371 bits_per_block: usize,
373 body: &'a mir::Body<'tcx>,
375 dead_unwinds: &'a BitSet<BasicBlock>,
376 entry_sets: IndexVec<BasicBlock, BitSet<A::Idx>>,
379 impl<A> Engine<'a, 'tcx, A>
385 body: &'a mir::Body<'tcx>,
387 dead_unwinds: &'a BitSet<BasicBlock>,
390 let bits_per_block = analysis.bits_per_block(body);
392 let bottom_value_set = if A::BOTTOM_VALUE == true {
393 BitSet::new_filled(bits_per_block)
395 BitSet::new_empty(bits_per_block)
398 let mut entry_sets = IndexVec::from_elem(bottom_value_set, body.basic_blocks());
399 analysis.initialize_start_block(body, &mut entry_sets[mir::START_BLOCK]);
401 Engine { analysis, bits_per_block, tcx, body, def_id, dead_unwinds, entry_sets }
404 pub fn iterate_to_fixpoint(mut self) -> Results<'tcx, A> {
405 let mut temp_state = BitSet::new_empty(self.bits_per_block);
407 let mut dirty_queue: WorkQueue<BasicBlock> =
408 WorkQueue::with_none(self.body.basic_blocks().len());
410 for (bb, _) in traversal::reverse_postorder(self.body) {
411 dirty_queue.insert(bb);
414 // Add blocks that are not reachable from START_BLOCK to the work queue. These blocks will
415 // be processed after the ones added above.
416 for bb in self.body.basic_blocks().indices() {
417 dirty_queue.insert(bb);
420 while let Some(bb) = dirty_queue.pop() {
421 let bb_data = &self.body[bb];
422 let on_entry = &self.entry_sets[bb];
424 temp_state.overwrite(on_entry);
425 self.analysis.apply_whole_block_effect(&mut temp_state, bb, bb_data);
427 self.propagate_bits_into_graph_successors_of(
434 let Engine { tcx, body, def_id, analysis, entry_sets, .. } = self;
436 let results = Results { analysis, entry_sets };
438 let attrs = tcx.get_attrs(def_id);
439 if let Some(path) = get_dataflow_graphviz_output_path(tcx, attrs, A::NAME) {
440 let result = write_dataflow_graphviz_results(body, def_id, &path, &results);
441 if let Err(e) = result {
442 warn!("Failed to write dataflow results to {}: {}", path.display(), e);
449 fn propagate_bits_into_graph_successors_of(
451 in_out: &mut BitSet<A::Idx>,
452 (bb, bb_data): (BasicBlock, &'a mir::BasicBlockData<'tcx>),
453 dirty_list: &mut WorkQueue<BasicBlock>,
455 match bb_data.terminator().kind {
456 mir::TerminatorKind::Return
457 | mir::TerminatorKind::Resume
458 | mir::TerminatorKind::Abort
459 | mir::TerminatorKind::GeneratorDrop
460 | mir::TerminatorKind::Unreachable => {}
462 mir::TerminatorKind::Goto { target }
463 | mir::TerminatorKind::Assert { target, cleanup: None, .. }
464 | mir::TerminatorKind::Yield { resume: target, drop: None, .. }
465 | mir::TerminatorKind::Drop { target, location: _, unwind: None }
466 | mir::TerminatorKind::DropAndReplace { target, value: _, location: _, unwind: None } =>
468 self.propagate_bits_into_entry_set_for(in_out, target, dirty_list);
471 mir::TerminatorKind::Yield { resume: target, drop: Some(drop), .. } => {
472 self.propagate_bits_into_entry_set_for(in_out, target, dirty_list);
473 self.propagate_bits_into_entry_set_for(in_out, drop, dirty_list);
476 mir::TerminatorKind::Assert { target, cleanup: Some(unwind), .. }
477 | mir::TerminatorKind::Drop { target, location: _, unwind: Some(unwind) }
478 | mir::TerminatorKind::DropAndReplace {
482 unwind: Some(unwind),
484 self.propagate_bits_into_entry_set_for(in_out, target, dirty_list);
485 if !self.dead_unwinds.contains(bb) {
486 self.propagate_bits_into_entry_set_for(in_out, unwind, dirty_list);
490 mir::TerminatorKind::SwitchInt { ref targets, .. } => {
491 for target in targets {
492 self.propagate_bits_into_entry_set_for(in_out, *target, dirty_list);
496 mir::TerminatorKind::Call { cleanup, ref destination, ref func, ref args, .. } => {
497 if let Some(unwind) = cleanup {
498 if !self.dead_unwinds.contains(bb) {
499 self.propagate_bits_into_entry_set_for(in_out, unwind, dirty_list);
503 if let Some((ref dest_place, dest_bb)) = *destination {
504 // N.B.: This must be done *last*, after all other
505 // propagation, as documented in comment above.
506 self.analysis.apply_call_return_effect(in_out, bb, func, args, dest_place);
507 self.propagate_bits_into_entry_set_for(in_out, dest_bb, dirty_list);
511 mir::TerminatorKind::FalseEdges { real_target, imaginary_target } => {
512 self.propagate_bits_into_entry_set_for(in_out, real_target, dirty_list);
513 self.propagate_bits_into_entry_set_for(in_out, imaginary_target, dirty_list);
516 mir::TerminatorKind::FalseUnwind { real_target, unwind } => {
517 self.propagate_bits_into_entry_set_for(in_out, real_target, dirty_list);
518 if let Some(unwind) = unwind {
519 if !self.dead_unwinds.contains(bb) {
520 self.propagate_bits_into_entry_set_for(in_out, unwind, dirty_list);
527 fn propagate_bits_into_entry_set_for(
529 in_out: &BitSet<A::Idx>,
531 dirty_queue: &mut WorkQueue<BasicBlock>,
533 let entry_set = &mut self.entry_sets[bb];
534 let set_changed = self.analysis.join(entry_set, &in_out);
536 dirty_queue.insert(bb);
541 /// Looks for attributes like `#[rustc_mir(borrowck_graphviz_postflow="./path/to/suffix.dot")]` and
542 /// extracts the path with the given analysis name prepended to the suffix.
544 /// Returns `None` if no such attribute exists.
545 fn get_dataflow_graphviz_output_path(
547 attrs: ty::Attributes<'tcx>,
549 ) -> Option<PathBuf> {
550 let mut rustc_mir_attrs = attrs
552 .filter(|attr| attr.check_name(sym::rustc_mir))
553 .flat_map(|attr| attr.meta_item_list().into_iter().flat_map(|v| v.into_iter()));
555 let borrowck_graphviz_postflow =
556 rustc_mir_attrs.find(|attr| attr.check_name(sym::borrowck_graphviz_postflow))?;
558 let path_and_suffix = match borrowck_graphviz_postflow.value_str() {
562 borrowck_graphviz_postflow.span(),
563 "borrowck_graphviz_postflow requires a path",
570 // Change "path/suffix.dot" to "path/analysis_name_suffix.dot"
571 let mut ret = PathBuf::from(path_and_suffix.to_string());
572 let suffix = ret.file_name().unwrap();
574 let mut file_name: OsString = analysis.into();
576 file_name.push(suffix);
577 ret.set_file_name(file_name);
582 fn write_dataflow_graphviz_results<A: Analysis<'tcx>>(
583 body: &mir::Body<'tcx>,
586 results: &Results<'tcx, A>,
587 ) -> io::Result<()> {
588 debug!("printing dataflow results for {:?} to {}", def_id, path.display());
590 let mut buf = Vec::new();
591 let graphviz = graphviz::Formatter::new(body, def_id, results);
593 dot::render(&graphviz, &mut buf)?;