compiler/rustc_data_structures/src/transitive_relation.rs

   1 use crate::frozen::Frozen;
   2 use crate::fx::{FxHashSet, FxIndexSet};
   3 use rustc_index::bit_set::BitMatrix;
   4 use std::fmt::Debug;
   5 use std::hash::Hash;
   6 use std::mem;
   7 use std::ops::Deref;
   8
   9 #[cfg(test)]
  10 mod tests;
  11
  12 #[derive(Clone, Debug)]
  13 pub struct TransitiveRelationBuilder<T> {
  14     // List of elements. This is used to map from a T to a usize.
  15     elements: FxIndexSet<T>,
  16
  17     // List of base edges in the graph. Require to compute transitive
  18     // closure.
  19     edges: FxHashSet<Edge>,
  20 }
  21
  22 #[derive(Debug)]
  23 pub struct TransitiveRelation<T> {
  24     // Frozen transitive relation elements and edges.
  25     builder: Frozen<TransitiveRelationBuilder<T>>,
  26
  27     // Cached transitive closure derived from the edges.
  28     closure: Frozen<BitMatrix<usize, usize>>,
  29 }
  30
  31 impl<T> Deref for TransitiveRelation<T> {
  32     type Target = Frozen<TransitiveRelationBuilder<T>>;
  33
  34     fn deref(&self) -> &Self::Target {
  35         &self.builder
  36     }
  37 }
  38
  39 impl<T: Clone> Clone for TransitiveRelation<T> {
  40     fn clone(&self) -> Self {
  41         TransitiveRelation {
  42             builder: Frozen::freeze(self.builder.deref().clone()),
  43             closure: Frozen::freeze(self.closure.deref().clone()),
  44         }
  45     }
  46 }
  47
  48 // HACK(eddyb) manual impl avoids `Default` bound on `T`.
  49 impl<T: Eq + Hash> Default for TransitiveRelationBuilder<T> {
  50     fn default() -> Self {
  51         TransitiveRelationBuilder { elements: Default::default(), edges: Default::default() }
  52     }
  53 }
  54
  55 #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Debug, Hash)]
  56 struct Index(usize);
  57
  58 #[derive(Clone, PartialEq, Eq, Debug, Hash)]
  59 struct Edge {
  60     source: Index,
  61     target: Index,
  62 }
  63
  64 impl<T: Eq + Hash + Copy> TransitiveRelationBuilder<T> {
  65     pub fn is_empty(&self) -> bool {
  66         self.edges.is_empty()
  67     }
  68
  69     pub fn elements(&self) -> impl Iterator<Item = &T> {
  70         self.elements.iter()
  71     }
  72
  73     fn index(&self, a: T) -> Option<Index> {
  74         self.elements.get_index_of(&a).map(Index)
  75     }
  76
  77     fn add_index(&mut self, a: T) -> Index {
  78         let (index, _added) = self.elements.insert_full(a);
  79         Index(index)
  80     }
  81
  82     /// Applies the (partial) function to each edge and returns a new
  83     /// relation builder. If `f` returns `None` for any end-point,
  84     /// returns `None`.
  85     pub fn maybe_map<F, U>(&self, mut f: F) -> Option<TransitiveRelationBuilder<U>>
  86     where
  87         F: FnMut(T) -> Option<U>,
  88         U: Clone + Debug + Eq + Hash + Copy,
  89     {
  90         let mut result = TransitiveRelationBuilder::default();
  91         for edge in &self.edges {
  92             result.add(f(self.elements[edge.source.0])?, f(self.elements[edge.target.0])?);
  93         }
  94         Some(result)
  95     }
  96
  97     /// Indicate that `a < b` (where `<` is this relation)
  98     pub fn add(&mut self, a: T, b: T) {
  99         let a = self.add_index(a);
 100         let b = self.add_index(b);
 101         let edge = Edge { source: a, target: b };
 102         self.edges.insert(edge);
 103     }
 104
 105     /// Compute the transitive closure derived from the edges, and converted to
 106     /// the final result. After this, all elements will be immutable to maintain
 107     /// the correctness of the result.
 108     pub fn freeze(self) -> TransitiveRelation<T> {
 109         let mut matrix = BitMatrix::new(self.elements.len(), self.elements.len());
 110         let mut changed = true;
 111         while changed {
 112             changed = false;
 113             for edge in &self.edges {
 114                 // add an edge from S -> T
 115                 changed |= matrix.insert(edge.source.0, edge.target.0);
 116
 117                 // add all outgoing edges from T into S
 118                 changed |= matrix.union_rows(edge.target.0, edge.source.0);
 119             }
 120         }
 121         TransitiveRelation { builder: Frozen::freeze(self), closure: Frozen::freeze(matrix) }
 122     }
 123 }
 124
 125 impl<T: Eq + Hash + Copy> TransitiveRelation<T> {
 126     /// Applies the (partial) function to each edge and returns a new
 127     /// relation including transitive closures.
 128     pub fn maybe_map<F, U>(&self, f: F) -> Option<TransitiveRelation<U>>
 129     where
 130         F: FnMut(T) -> Option<U>,
 131         U: Clone + Debug + Eq + Hash + Copy,
 132     {
 133         Some(self.builder.maybe_map(f)?.freeze())
 134     }
 135
 136     /// Checks whether `a < target` (transitively)
 137     pub fn contains(&self, a: T, b: T) -> bool {
 138         match (self.index(a), self.index(b)) {
 139             (Some(a), Some(b)) => self.with_closure(|closure| closure.contains(a.0, b.0)),
 140             (None, _) | (_, None) => false,
 141         }
 142     }
 143
 144     /// Thinking of `x R y` as an edge `x -> y` in a graph, this
 145     /// returns all things reachable from `a`.
 146     ///
 147     /// Really this probably ought to be `impl Iterator<Item = &T>`, but
 148     /// I'm too lazy to make that work, and -- given the caching
 149     /// strategy -- it'd be a touch tricky anyhow.
 150     pub fn reachable_from(&self, a: T) -> Vec<T> {
 151         match self.index(a) {
 152             Some(a) => {
 153                 self.with_closure(|closure| closure.iter(a.0).map(|i| self.elements[i]).collect())
 154             }
 155             None => vec![],
 156         }
 157     }
 158
 159     /// Picks what I am referring to as the "postdominating"
 160     /// upper-bound for `a` and `b`. This is usually the least upper
 161     /// bound, but in cases where there is no single least upper
 162     /// bound, it is the "mutual immediate postdominator", if you
 163     /// imagine a graph where `a < b` means `a -> b`.
 164     ///
 165     /// This function is needed because region inference currently
 166     /// requires that we produce a single "UB", and there is no best
 167     /// choice for the LUB. Rather than pick arbitrarily, I pick a
 168     /// less good, but predictable choice. This should help ensure
 169     /// that region inference yields predictable results (though it
 170     /// itself is not fully sufficient).
 171     ///
 172     /// Examples are probably clearer than any prose I could write
 173     /// (there are corresponding tests below, btw). In each case,
 174     /// the query is `postdom_upper_bound(a, b)`:
 175     ///
 176     /// ```text
 177     /// // Returns Some(x), which is also LUB.
 178     /// a -> a1 -> x
 179     ///            ^
 180     ///            |
 181     /// b -> b1 ---+
 182     ///
 183     /// // Returns `Some(x)`, which is not LUB (there is none)
 184     /// // diagonal edges run left-to-right.
 185     /// a -> a1 -> x
 186     ///   \/       ^
 187     ///   /\       |
 188     /// b -> b1 ---+
 189     ///
 190     /// // Returns `None`.
 191     /// a -> a1
 192     /// b -> b1
 193     /// ```
 194     pub fn postdom_upper_bound(&self, a: T, b: T) -> Option<T> {
 195         let mubs = self.minimal_upper_bounds(a, b);
 196         self.mutual_immediate_postdominator(mubs)
 197     }
 198
 199     /// Viewing the relation as a graph, computes the "mutual
 200     /// immediate postdominator" of a set of points (if one
 201     /// exists). See `postdom_upper_bound` for details.
 202     pub fn mutual_immediate_postdominator<'a>(&'a self, mut mubs: Vec<T>) -> Option<T> {
 203         loop {
 204             match mubs.len() {
 205                 0 => return None,
 206                 1 => return Some(mubs[0]),
 207                 _ => {
 208                     let m = mubs.pop().unwrap();
 209                     let n = mubs.pop().unwrap();
 210                     mubs.extend(self.minimal_upper_bounds(n, m));
 211                 }
 212             }
 213         }
 214     }
 215
 216     /// Returns the set of bounds `X` such that:
 217     ///
 218     /// - `a < X` and `b < X`
 219     /// - there is no `Y != X` such that `a < Y` and `Y < X`
 220     ///   - except for the case where `X < a` (i.e., a strongly connected
 221     ///     component in the graph). In that case, the smallest
 222     ///     representative of the SCC is returned (as determined by the
 223     ///     internal indices).
 224     ///
 225     /// Note that this set can, in principle, have any size.
 226     pub fn minimal_upper_bounds(&self, a: T, b: T) -> Vec<T> {
 227         let (Some(mut a), Some(mut b)) = (self.index(a), self.index(b)) else {
 228             return vec![];
 229         };
 230
 231         // in some cases, there are some arbitrary choices to be made;
 232         // it doesn't really matter what we pick, as long as we pick
 233         // the same thing consistently when queried, so ensure that
 234         // (a, b) are in a consistent relative order
 235         if a > b {
 236             mem::swap(&mut a, &mut b);
 237         }
 238
 239         let lub_indices = self.with_closure(|closure| {
 240             // Easy case is when either a < b or b < a:
 241             if closure.contains(a.0, b.0) {
 242                 return vec![b.0];
 243             }
 244             if closure.contains(b.0, a.0) {
 245                 return vec![a.0];
 246             }
 247
 248             // Otherwise, the tricky part is that there may be some c
 249             // where a < c and b < c. In fact, there may be many such
 250             // values. So here is what we do:
 251             //
 252             // 1. Find the vector `[X | a < X && b < X]` of all values
 253             //    `X` where `a < X` and `b < X`.  In terms of the
 254             //    graph, this means all values reachable from both `a`
 255             //    and `b`. Note that this vector is also a set, but we
 256             //    use the term vector because the order matters
 257             //    to the steps below.
 258             //    - This vector contains upper bounds, but they are
 259             //      not minimal upper bounds. So you may have e.g.
 260             //      `[x, y, tcx, z]` where `x < tcx` and `y < tcx` and
 261             //      `z < x` and `z < y`:
 262             //
 263             //           z --+---> x ----+----> tcx
 264             //               |           |
 265             //               |           |
 266             //               +---> y ----+
 267             //
 268             //      In this case, we really want to return just `[z]`.
 269             //      The following steps below achieve this by gradually
 270             //      reducing the list.
 271             // 2. Pare down the vector using `pare_down`. This will
 272             //    remove elements from the vector that can be reached
 273             //    by an earlier element.
 274             //    - In the example above, this would convert `[x, y,
 275             //      tcx, z]` to `[x, y, z]`. Note that `x` and `y` are
 276             //      still in the vector; this is because while `z < x`
 277             //      (and `z < y`) holds, `z` comes after them in the
 278             //      vector.
 279             // 3. Reverse the vector and repeat the pare down process.
 280             //    - In the example above, we would reverse to
 281             //      `[z, y, x]` and then pare down to `[z]`.
 282             // 4. Reverse once more just so that we yield a vector in
 283             //    increasing order of index. Not necessary, but why not.
 284             //
 285             // I believe this algorithm yields a minimal set. The
 286             // argument is that, after step 2, we know that no element
 287             // can reach its successors (in the vector, not the graph).
 288             // After step 3, we know that no element can reach any of
 289             // its predecessors (because of step 2) nor successors
 290             // (because we just called `pare_down`)
 291             //
 292             // This same algorithm is used in `parents` below.
 293
 294             let mut candidates = closure.intersect_rows(a.0, b.0); // (1)
 295             pare_down(&mut candidates, closure); // (2)
 296             candidates.reverse(); // (3a)
 297             pare_down(&mut candidates, closure); // (3b)
 298             candidates
 299         });
 300
 301         lub_indices
 302             .into_iter()
 303             .rev() // (4)
 304             .map(|i| self.elements[i])
 305             .collect()
 306     }
 307
 308     /// Given an element A, returns the maximal set {B} of elements B
 309     /// such that
 310     ///
 311     /// - A != B
 312     /// - A R B is true
 313     /// - for each i, j: `B[i]` R `B[j]` does not hold
 314     ///
 315     /// The intuition is that this moves "one step up" through a lattice
 316     /// (where the relation is encoding the `<=` relation for the lattice).
 317     /// So e.g., if the relation is `->` and we have
 318     ///
 319     /// ```text
 320     /// a -> b -> d -> f
 321     /// |              ^
 322     /// +--> c -> e ---+
 323     /// ```
 324     ///
 325     /// then `parents(a)` returns `[b, c]`. The `postdom_parent` function
 326     /// would further reduce this to just `f`.
 327     pub fn parents(&self, a: T) -> Vec<T> {
 328         let Some(a) = self.index(a) else {
 329             return vec![];
 330         };
 331
 332         // Steal the algorithm for `minimal_upper_bounds` above, but
 333         // with a slight tweak. In the case where `a R a`, we remove
 334         // that from the set of candidates.
 335         let ancestors = self.with_closure(|closure| {
 336             let mut ancestors = closure.intersect_rows(a.0, a.0);
 337
 338             // Remove anything that can reach `a`. If this is a
 339             // reflexive relation, this will include `a` itself.
 340             ancestors.retain(|&e| !closure.contains(e, a.0));
 341
 342             pare_down(&mut ancestors, closure); // (2)
 343             ancestors.reverse(); // (3a)
 344             pare_down(&mut ancestors, closure); // (3b)
 345             ancestors
 346         });
 347
 348         ancestors
 349             .into_iter()
 350             .rev() // (4)
 351             .map(|i| self.elements[i])
 352             .collect()
 353     }
 354
 355     fn with_closure<OP, R>(&self, op: OP) -> R
 356     where
 357         OP: FnOnce(&BitMatrix<usize, usize>) -> R,
 358     {
 359         op(&self.closure)
 360     }
 361
 362     /// Lists all the base edges in the graph: the initial _non-transitive_ set of element
 363     /// relations, which will be later used as the basis for the transitive closure computation.
 364     pub fn base_edges(&self) -> impl Iterator<Item = (T, T)> + '_ {
 365         self.edges
 366             .iter()
 367             .map(move |edge| (self.elements[edge.source.0], self.elements[edge.target.0]))
 368     }
 369 }
 370
 371 /// Pare down is used as a step in the LUB computation. It edits the
 372 /// candidates array in place by removing any element j for which
 373 /// there exists an earlier element i<j such that i -> j. That is,
 374 /// after you run `pare_down`, you know that for all elements that
 375 /// remain in candidates, they cannot reach any of the elements that
 376 /// come after them.
 377 ///
 378 /// Examples follow. Assume that a -> b -> c and x -> y -> z.
 379 ///
 380 /// - Input: `[a, b, x]`. Output: `[a, x]`.
 381 /// - Input: `[b, a, x]`. Output: `[b, a, x]`.
 382 /// - Input: `[a, x, b, y]`. Output: `[a, x]`.
 383 fn pare_down(candidates: &mut Vec<usize>, closure: &BitMatrix<usize, usize>) {
 384     let mut i = 0;
 385     while let Some(&candidate_i) = candidates.get(i) {
 386         i += 1;
 387
 388         let mut j = i;
 389         let mut dead = 0;
 390         while let Some(&candidate_j) = candidates.get(j) {
 391             if closure.contains(candidate_i, candidate_j) {
 392                 // If `i` can reach `j`, then we can remove `j`. So just
 393                 // mark it as dead and move on; subsequent indices will be
 394                 // shifted into its place.
 395                 dead += 1;
 396             } else {
 397                 candidates[j - dead] = candidate_j;
 398             }
 399             j += 1;
 400         }
 401         candidates.truncate(j - dead);
 402     }
 403 }