src/librustc_mir/dataflow/generic.rs

   1 //! Dataflow analysis with arbitrary transfer functions.
   2 //!
   3 //! This module is a work in progress. You should instead use `BitDenotation` in
   4 //! `librustc_mir/dataflow/mod.rs` and encode your transfer function as a [gen/kill set][gk]. In
   5 //! doing so, your analysis will run faster and you will be able to generate graphviz diagrams for
   6 //! debugging with no extra effort. The interface in this module is intended only for dataflow
   7 //! problems that cannot be expressed using gen/kill sets.
   8 //!
   9 //! FIXME(ecstaticmorse): In the long term, the plan is to preserve the existing `BitDenotation`
  10 //! interface, but make `Engine` and `ResultsCursor` the canonical way to perform and inspect a
  11 //! dataflow analysis. This requires porting the graphviz debugging logic to this module, deciding
  12 //! on a way to handle the `before` methods in `BitDenotation` and creating an adapter so that
  13 //! gen-kill problems can still be evaluated efficiently. See the discussion in [#64566] for more
  14 //! information.
  15 //!
  16 //! [gk]: https://en.wikipedia.org/wiki/Data-flow_analysis#Bit_vector_problems
  17 //! [#64566]: https://github.com/rust-lang/rust/pull/64566
  18
  19 use std::borrow::Borrow;
  20 use std::cmp::Ordering;
  21 use std::ffi::OsString;
  22 use std::path::{Path, PathBuf};
  23 use std::{fs, io, ops};
  24
  25 use rustc::mir::{self, traversal, BasicBlock, Location};
  26 use rustc::ty::{self, TyCtxt};
  27 use rustc_data_structures::work_queue::WorkQueue;
  28 use rustc_hir::def_id::DefId;
  29 use rustc_index::bit_set::BitSet;
  30 use rustc_index::vec::{Idx, IndexVec};
  31 use rustc_span::symbol::sym;
  32
  33 use crate::dataflow::BottomValue;
  34
  35 mod graphviz;
  36
  37 /// A specific kind of dataflow analysis.
  38 ///
  39 /// To run a dataflow analysis, one must set the initial state of the `START_BLOCK` via
  40 /// `initialize_start_block` and define a transfer function for each statement or terminator via
  41 /// the various `effect` methods. The entry set for all other basic blocks is initialized to
  42 /// `Self::BOTTOM_VALUE`. The dataflow `Engine` then iteratively updates the various entry sets for
  43 /// each block with the cumulative effects of the transfer functions of all preceding blocks.
  44 ///
  45 /// You should use an `Engine` to actually run an analysis, and a `ResultsCursor` to inspect the
  46 /// results of that analysis like so:
  47 ///
  48 /// ```ignore(cross-crate-imports)
  49 /// fn do_my_analysis(body: &mir::Body<'tcx>, dead_unwinds: &BitSet<BasicBlock>) {
  50 ///     // `MyAnalysis` implements `Analysis`.
  51 ///     let analysis = MyAnalysis::new();
  52 ///
  53 ///     let results = Engine::new(body, dead_unwinds, analysis).iterate_to_fixpoint();
  54 ///     let mut cursor = ResultsCursor::new(body, results);
  55 ///
  56 ///     for (_, statement_index) in body.block_data[START_BLOCK].statements.iter_enumerated() {
  57 ///         cursor.seek_after(Location { block: START_BLOCK, statement_index });
  58 ///         let state = cursor.get();
  59 ///         println!("{:?}", state);
  60 ///     }
  61 /// }
  62 /// ```
  63 pub trait Analysis<'tcx>: BottomValue {
  64     /// The index type used to access the dataflow state.
  65     type Idx: Idx;
  66
  67     /// A name, used for debugging, that describes this dataflow analysis.
  68     ///
  69     /// The name should be suitable as part of a filename, so avoid whitespace, slashes or periods
  70     /// and try to keep it short.
  71     const NAME: &'static str;
  72
  73     /// How each element of your dataflow state will be displayed during debugging.
  74     ///
  75     /// By default, this is the `fmt::Debug` representation of `Self::Idx`.
  76     fn pretty_print_idx(&self, w: &mut impl io::Write, idx: Self::Idx) -> io::Result<()> {
  77         write!(w, "{:?}", idx)
  78     }
  79
  80     /// The size of each bitvector allocated for each block.
  81     fn bits_per_block(&self, body: &mir::Body<'tcx>) -> usize;
  82
  83     /// Mutates the entry set of the `START_BLOCK` to contain the initial state for dataflow
  84     /// analysis.
  85     fn initialize_start_block(&self, body: &mir::Body<'tcx>, state: &mut BitSet<Self::Idx>);
  86
  87     /// Updates the current dataflow state with the effect of evaluating a statement.
  88     fn apply_statement_effect(
  89         &self,
  90         state: &mut BitSet<Self::Idx>,
  91         statement: &mir::Statement<'tcx>,
  92         location: Location,
  93     );
  94
  95     /// Updates the current dataflow state with the effect of evaluating a terminator.
  96     ///
  97     /// Note that the effect of a successful return from a `Call` terminator should **not** be
  98     /// acounted for in this function. That should go in `apply_call_return_effect`. For example,
  99     /// in the `InitializedPlaces` analyses, the return place is not marked as initialized here.
 100     fn apply_terminator_effect(
 101         &self,
 102         state: &mut BitSet<Self::Idx>,
 103         terminator: &mir::Terminator<'tcx>,
 104         location: Location,
 105     );
 106
 107     /// Updates the current dataflow state with the effect of a successful return from a `Call`
 108     /// terminator.
 109     ///
 110     /// This is separated from `apply_terminator_effect` to properly track state across
 111     /// unwind edges for `Call`s.
 112     fn apply_call_return_effect(
 113         &self,
 114         state: &mut BitSet<Self::Idx>,
 115         block: BasicBlock,
 116         func: &mir::Operand<'tcx>,
 117         args: &[mir::Operand<'tcx>],
 118         return_place: &mir::Place<'tcx>,
 119     );
 120
 121     /// Applies the cumulative effect of an entire basic block to the dataflow state (except for
 122     /// `call_return_effect`, which is handled in the `Engine`).
 123     ///
 124     /// The default implementation calls `statement_effect` for every statement in the block before
 125     /// finally calling `terminator_effect`. However, some dataflow analyses are able to coalesce
 126     /// transfer functions for an entire block and apply them at once. Such analyses should
 127     /// override `block_effect`.
 128     fn apply_whole_block_effect(
 129         &self,
 130         state: &mut BitSet<Self::Idx>,
 131         block: BasicBlock,
 132         block_data: &mir::BasicBlockData<'tcx>,
 133     ) {
 134         for (statement_index, stmt) in block_data.statements.iter().enumerate() {
 135             let location = Location { block, statement_index };
 136             self.apply_statement_effect(state, stmt, location);
 137         }
 138
 139         let location = Location { block, statement_index: block_data.statements.len() };
 140         self.apply_terminator_effect(state, block_data.terminator(), location);
 141     }
 142
 143     /// Applies the cumulative effect of a sequence of statements (and possibly a terminator)
 144     /// within a single basic block.
 145     ///
 146     /// When called with `0..block_data.statements.len() + 1` as the statement range, this function
 147     /// is equivalent to `apply_whole_block_effect`.
 148     fn apply_partial_block_effect(
 149         &self,
 150         state: &mut BitSet<Self::Idx>,
 151         block: BasicBlock,
 152         block_data: &mir::BasicBlockData<'tcx>,
 153         mut range: ops::Range<usize>,
 154     ) {
 155         if range.is_empty() {
 156             return;
 157         }
 158
 159         // The final location might be a terminator, so iterate through all statements until the
 160         // final one, then check to see whether the final one is a statement or terminator.
 161         //
 162         // This can't cause the range to wrap-around since we check that the range contains at
 163         // least one element above.
 164         range.end -= 1;
 165         let final_location = Location { block, statement_index: range.end };
 166
 167         for statement_index in range {
 168             let location = Location { block, statement_index };
 169             let stmt = &block_data.statements[statement_index];
 170             self.apply_statement_effect(state, stmt, location);
 171         }
 172
 173         if final_location.statement_index == block_data.statements.len() {
 174             let terminator = block_data.terminator();
 175             self.apply_terminator_effect(state, terminator, final_location);
 176         } else {
 177             let stmt = &block_data.statements[final_location.statement_index];
 178             self.apply_statement_effect(state, stmt, final_location);
 179         }
 180     }
 181 }
 182
 183 #[derive(Clone, Copy, Debug)]
 184 enum CursorPosition {
 185     AtBlockStart(BasicBlock),
 186     After(Location),
 187 }
 188
 189 impl CursorPosition {
 190     fn block(&self) -> BasicBlock {
 191         match *self {
 192             Self::AtBlockStart(block) => block,
 193             Self::After(Location { block, .. }) => block,
 194         }
 195     }
 196 }
 197
 198 type ResultsRefCursor<'a, 'mir, 'tcx, A> = ResultsCursor<'mir, 'tcx, A, &'a Results<'tcx, A>>;
 199
 200 /// Inspect the results of dataflow analysis.
 201 ///
 202 /// This cursor has linear performance when visiting statements in a block in order. Visiting
 203 /// statements within a block in reverse order is `O(n^2)`, where `n` is the number of statements
 204 /// in that block.
 205 pub struct ResultsCursor<'mir, 'tcx, A, R = Results<'tcx, A>>
 206 where
 207     A: Analysis<'tcx>,
 208 {
 209     body: &'mir mir::Body<'tcx>,
 210     results: R,
 211     state: BitSet<A::Idx>,
 212
 213     pos: CursorPosition,
 214
 215     /// Whether the effects of `apply_call_return_effect` are currently stored in `state`.
 216     ///
 217     /// This flag ensures that multiple calls to `seek_after_assume_call_returns` with the same
 218     /// target only result in one invocation of `apply_call_return_effect`.
 219     is_call_return_effect_applied: bool,
 220 }
 221
 222 impl<'mir, 'tcx, A, R> ResultsCursor<'mir, 'tcx, A, R>
 223 where
 224     A: Analysis<'tcx>,
 225     R: Borrow<Results<'tcx, A>>,
 226 {
 227     /// Returns a new cursor for `results` that points to the start of the `START_BLOCK`.
 228     pub fn new(body: &'mir mir::Body<'tcx>, results: R) -> Self {
 229         ResultsCursor {
 230             body,
 231             pos: CursorPosition::AtBlockStart(mir::START_BLOCK),
 232             is_call_return_effect_applied: false,
 233             state: results.borrow().entry_sets[mir::START_BLOCK].clone(),
 234             results,
 235         }
 236     }
 237
 238     pub fn analysis(&self) -> &A {
 239         &self.results.borrow().analysis
 240     }
 241
 242     /// Resets the cursor to the start of the given `block`.
 243     pub fn seek_to_block_start(&mut self, block: BasicBlock) {
 244         self.state.overwrite(&self.results.borrow().entry_sets[block]);
 245         self.pos = CursorPosition::AtBlockStart(block);
 246         self.is_call_return_effect_applied = false;
 247     }
 248
 249     /// Updates the cursor to hold the dataflow state immediately before `target`.
 250     pub fn seek_before(&mut self, target: Location) {
 251         assert!(target <= self.body.terminator_loc(target.block));
 252
 253         if target.statement_index == 0 {
 254             self.seek_to_block_start(target.block);
 255         } else {
 256             self._seek_after(Location {
 257                 block: target.block,
 258                 statement_index: target.statement_index - 1,
 259             });
 260         }
 261     }
 262
 263     /// Updates the cursor to hold the dataflow state at `target`.
 264     ///
 265     /// If `target` is a `Call` terminator, `apply_call_return_effect` will not be called. See
 266     /// `seek_after_assume_call_returns` if you wish to observe the dataflow state upon a
 267     /// successful return.
 268     pub fn seek_after(&mut self, target: Location) {
 269         assert!(target <= self.body.terminator_loc(target.block));
 270
 271         // This check ensures the correctness of a call to `seek_after_assume_call_returns`
 272         // followed by one to `seek_after` with the same target.
 273         if self.is_call_return_effect_applied {
 274             self.seek_to_block_start(target.block);
 275         }
 276
 277         self._seek_after(target);
 278     }
 279
 280     /// Equivalent to `seek_after`, but also calls `apply_call_return_effect` if `target` is a
 281     /// `Call` terminator whose callee is convergent.
 282     pub fn seek_after_assume_call_returns(&mut self, target: Location) {
 283         assert!(target <= self.body.terminator_loc(target.block));
 284
 285         self._seek_after(target);
 286
 287         if target != self.body.terminator_loc(target.block) {
 288             return;
 289         }
 290
 291         let term = self.body.basic_blocks()[target.block].terminator();
 292         if let mir::TerminatorKind::Call {
 293             destination: Some((return_place, _)), func, args, ..
 294         } = &term.kind
 295         {
 296             if !self.is_call_return_effect_applied {
 297                 self.is_call_return_effect_applied = true;
 298                 self.results.borrow().analysis.apply_call_return_effect(
 299                     &mut self.state,
 300                     target.block,
 301                     func,
 302                     args,
 303                     return_place,
 304                 );
 305             }
 306         }
 307     }
 308
 309     fn _seek_after(&mut self, target: Location) {
 310         let Location { block: target_block, statement_index: target_index } = target;
 311
 312         if self.pos.block() != target_block {
 313             self.seek_to_block_start(target_block);
 314         }
 315
 316         // If we're in the same block but after the target statement, we need to reset to the start
 317         // of the block.
 318         if let CursorPosition::After(Location { statement_index: curr_index, .. }) = self.pos {
 319             match curr_index.cmp(&target_index) {
 320                 Ordering::Equal => return,
 321                 Ordering::Less => {}
 322                 Ordering::Greater => self.seek_to_block_start(target_block),
 323             }
 324         }
 325
 326         // The cursor is now in the same block as the target location pointing at an earlier
 327         // statement.
 328         debug_assert_eq!(self.pos.block(), target_block);
 329         if let CursorPosition::After(Location { statement_index, .. }) = self.pos {
 330             debug_assert!(statement_index < target_index);
 331         }
 332
 333         let first_unapplied_statement = match self.pos {
 334             CursorPosition::AtBlockStart(_) => 0,
 335             CursorPosition::After(Location { statement_index, .. }) => statement_index + 1,
 336         };
 337
 338         let block_data = &self.body.basic_blocks()[target_block];
 339         self.results.borrow().analysis.apply_partial_block_effect(
 340             &mut self.state,
 341             target_block,
 342             block_data,
 343             first_unapplied_statement..target_index + 1,
 344         );
 345
 346         self.pos = CursorPosition::After(target);
 347         self.is_call_return_effect_applied = false;
 348     }
 349
 350     /// Gets the dataflow state at the current location.
 351     pub fn get(&self) -> &BitSet<A::Idx> {
 352         &self.state
 353     }
 354 }
 355
 356 /// A completed dataflow analysis.
 357 pub struct Results<'tcx, A>
 358 where
 359     A: Analysis<'tcx>,
 360 {
 361     analysis: A,
 362     entry_sets: IndexVec<BasicBlock, BitSet<A::Idx>>,
 363 }
 364
 365 /// All information required to iterate a dataflow analysis to fixpoint.
 366 pub struct Engine<'a, 'tcx, A>
 367 where
 368     A: Analysis<'tcx>,
 369 {
 370     analysis: A,
 371     bits_per_block: usize,
 372     tcx: TyCtxt<'tcx>,
 373     body: &'a mir::Body<'tcx>,
 374     def_id: DefId,
 375     dead_unwinds: &'a BitSet<BasicBlock>,
 376     entry_sets: IndexVec<BasicBlock, BitSet<A::Idx>>,
 377 }
 378
 379 impl<A> Engine<'a, 'tcx, A>
 380 where
 381     A: Analysis<'tcx>,
 382 {
 383     pub fn new(
 384         tcx: TyCtxt<'tcx>,
 385         body: &'a mir::Body<'tcx>,
 386         def_id: DefId,
 387         dead_unwinds: &'a BitSet<BasicBlock>,
 388         analysis: A,
 389     ) -> Self {
 390         let bits_per_block = analysis.bits_per_block(body);
 391
 392         let bottom_value_set = if A::BOTTOM_VALUE == true {
 393             BitSet::new_filled(bits_per_block)
 394         } else {
 395             BitSet::new_empty(bits_per_block)
 396         };
 397
 398         let mut entry_sets = IndexVec::from_elem(bottom_value_set, body.basic_blocks());
 399         analysis.initialize_start_block(body, &mut entry_sets[mir::START_BLOCK]);
 400
 401         Engine { analysis, bits_per_block, tcx, body, def_id, dead_unwinds, entry_sets }
 402     }
 403
 404     pub fn iterate_to_fixpoint(mut self) -> Results<'tcx, A> {
 405         let mut temp_state = BitSet::new_empty(self.bits_per_block);
 406
 407         let mut dirty_queue: WorkQueue<BasicBlock> =
 408             WorkQueue::with_none(self.body.basic_blocks().len());
 409
 410         for (bb, _) in traversal::reverse_postorder(self.body) {
 411             dirty_queue.insert(bb);
 412         }
 413
 414         // Add blocks that are not reachable from START_BLOCK to the work queue. These blocks will
 415         // be processed after the ones added above.
 416         for bb in self.body.basic_blocks().indices() {
 417             dirty_queue.insert(bb);
 418         }
 419
 420         while let Some(bb) = dirty_queue.pop() {
 421             let bb_data = &self.body[bb];
 422             let on_entry = &self.entry_sets[bb];
 423
 424             temp_state.overwrite(on_entry);
 425             self.analysis.apply_whole_block_effect(&mut temp_state, bb, bb_data);
 426
 427             self.propagate_bits_into_graph_successors_of(
 428                 &mut temp_state,
 429                 (bb, bb_data),
 430                 &mut dirty_queue,
 431             );
 432         }
 433
 434         let Engine { tcx, body, def_id, analysis, entry_sets, .. } = self;
 435
 436         let results = Results { analysis, entry_sets };
 437
 438         let attrs = tcx.get_attrs(def_id);
 439         if let Some(path) = get_dataflow_graphviz_output_path(tcx, attrs, A::NAME) {
 440             let result = write_dataflow_graphviz_results(body, def_id, &path, &results);
 441             if let Err(e) = result {
 442                 warn!("Failed to write dataflow results to {}: {}", path.display(), e);
 443             }
 444         }
 445
 446         results
 447     }
 448
 449     fn propagate_bits_into_graph_successors_of(
 450         &mut self,
 451         in_out: &mut BitSet<A::Idx>,
 452         (bb, bb_data): (BasicBlock, &'a mir::BasicBlockData<'tcx>),
 453         dirty_list: &mut WorkQueue<BasicBlock>,
 454     ) {
 455         match bb_data.terminator().kind {
 456             mir::TerminatorKind::Return
 457             | mir::TerminatorKind::Resume
 458             | mir::TerminatorKind::Abort
 459             | mir::TerminatorKind::GeneratorDrop
 460             | mir::TerminatorKind::Unreachable => {}
 461
 462             mir::TerminatorKind::Goto { target }
 463             | mir::TerminatorKind::Assert { target, cleanup: None, .. }
 464             | mir::TerminatorKind::Yield { resume: target, drop: None, .. }
 465             | mir::TerminatorKind::Drop { target, location: _, unwind: None }
 466             | mir::TerminatorKind::DropAndReplace { target, value: _, location: _, unwind: None } =>
 467             {
 468                 self.propagate_bits_into_entry_set_for(in_out, target, dirty_list);
 469             }
 470
 471             mir::TerminatorKind::Yield { resume: target, drop: Some(drop), .. } => {
 472                 self.propagate_bits_into_entry_set_for(in_out, target, dirty_list);
 473                 self.propagate_bits_into_entry_set_for(in_out, drop, dirty_list);
 474             }
 475
 476             mir::TerminatorKind::Assert { target, cleanup: Some(unwind), .. }
 477             | mir::TerminatorKind::Drop { target, location: _, unwind: Some(unwind) }
 478             | mir::TerminatorKind::DropAndReplace {
 479                 target,
 480                 value: _,
 481                 location: _,
 482                 unwind: Some(unwind),
 483             } => {
 484                 self.propagate_bits_into_entry_set_for(in_out, target, dirty_list);
 485                 if !self.dead_unwinds.contains(bb) {
 486                     self.propagate_bits_into_entry_set_for(in_out, unwind, dirty_list);
 487                 }
 488             }
 489
 490             mir::TerminatorKind::SwitchInt { ref targets, .. } => {
 491                 for target in targets {
 492                     self.propagate_bits_into_entry_set_for(in_out, *target, dirty_list);
 493                 }
 494             }
 495
 496             mir::TerminatorKind::Call { cleanup, ref destination, ref func, ref args, .. } => {
 497                 if let Some(unwind) = cleanup {
 498                     if !self.dead_unwinds.contains(bb) {
 499                         self.propagate_bits_into_entry_set_for(in_out, unwind, dirty_list);
 500                     }
 501                 }
 502
 503                 if let Some((ref dest_place, dest_bb)) = *destination {
 504                     // N.B.: This must be done *last*, after all other
 505                     // propagation, as documented in comment above.
 506                     self.analysis.apply_call_return_effect(in_out, bb, func, args, dest_place);
 507                     self.propagate_bits_into_entry_set_for(in_out, dest_bb, dirty_list);
 508                 }
 509             }
 510
 511             mir::TerminatorKind::FalseEdges { real_target, imaginary_target } => {
 512                 self.propagate_bits_into_entry_set_for(in_out, real_target, dirty_list);
 513                 self.propagate_bits_into_entry_set_for(in_out, imaginary_target, dirty_list);
 514             }
 515
 516             mir::TerminatorKind::FalseUnwind { real_target, unwind } => {
 517                 self.propagate_bits_into_entry_set_for(in_out, real_target, dirty_list);
 518                 if let Some(unwind) = unwind {
 519                     if !self.dead_unwinds.contains(bb) {
 520                         self.propagate_bits_into_entry_set_for(in_out, unwind, dirty_list);
 521                     }
 522                 }
 523             }
 524         }
 525     }
 526
 527     fn propagate_bits_into_entry_set_for(
 528         &mut self,
 529         in_out: &BitSet<A::Idx>,
 530         bb: BasicBlock,
 531         dirty_queue: &mut WorkQueue<BasicBlock>,
 532     ) {
 533         let entry_set = &mut self.entry_sets[bb];
 534         let set_changed = self.analysis.join(entry_set, &in_out);
 535         if set_changed {
 536             dirty_queue.insert(bb);
 537         }
 538     }
 539 }
 540
 541 /// Looks for attributes like `#[rustc_mir(borrowck_graphviz_postflow="./path/to/suffix.dot")]` and
 542 /// extracts the path with the given analysis name prepended to the suffix.
 543 ///
 544 /// Returns `None` if no such attribute exists.
 545 fn get_dataflow_graphviz_output_path(
 546     tcx: TyCtxt<'tcx>,
 547     attrs: ty::Attributes<'tcx>,
 548     analysis: &str,
 549 ) -> Option<PathBuf> {
 550     let mut rustc_mir_attrs = attrs
 551         .into_iter()
 552         .filter(|attr| attr.check_name(sym::rustc_mir))
 553         .flat_map(|attr| attr.meta_item_list().into_iter().flat_map(|v| v.into_iter()));
 554
 555     let borrowck_graphviz_postflow =
 556         rustc_mir_attrs.find(|attr| attr.check_name(sym::borrowck_graphviz_postflow))?;
 557
 558     let path_and_suffix = match borrowck_graphviz_postflow.value_str() {
 559         Some(p) => p,
 560         None => {
 561             tcx.sess.span_err(
 562                 borrowck_graphviz_postflow.span(),
 563                 "borrowck_graphviz_postflow requires a path",
 564             );
 565
 566             return None;
 567         }
 568     };
 569
 570     // Change "path/suffix.dot" to "path/analysis_name_suffix.dot"
 571     let mut ret = PathBuf::from(path_and_suffix.to_string());
 572     let suffix = ret.file_name().unwrap();
 573
 574     let mut file_name: OsString = analysis.into();
 575     file_name.push("_");
 576     file_name.push(suffix);
 577     ret.set_file_name(file_name);
 578
 579     Some(ret)
 580 }
 581
 582 fn write_dataflow_graphviz_results<A: Analysis<'tcx>>(
 583     body: &mir::Body<'tcx>,
 584     def_id: DefId,
 585     path: &Path,
 586     results: &Results<'tcx, A>,
 587 ) -> io::Result<()> {
 588     debug!("printing dataflow results for {:?} to {}", def_id, path.display());
 589
 590     let mut buf = Vec::new();
 591     let graphviz = graphviz::Formatter::new(body, def_id, results);
 592
 593     dot::render(&graphviz, &mut buf)?;
 594     fs::write(path, buf)
 595 }