1 use crate::vec::{Idx, IndexVec};
2 use arrayvec::ArrayVec;
5 use std::marker::PhantomData;
7 use std::ops::{BitAnd, BitAndAssign, BitOrAssign, Bound, Not, Range, RangeBounds, Shl};
10 use rustc_macros::{Decodable, Encodable};
16 pub const WORD_BYTES: usize = mem::size_of::<Word>();
17 pub const WORD_BITS: usize = WORD_BYTES * 8;
19 pub trait BitRelations<Rhs> {
20 fn union(&mut self, other: &Rhs) -> bool;
21 fn subtract(&mut self, other: &Rhs) -> bool;
22 fn intersect(&mut self, other: &Rhs) -> bool;
26 fn inclusive_start_end<T: Idx>(
27 range: impl RangeBounds<T>,
29 ) -> Option<(usize, usize)> {
30 // Both start and end are inclusive.
31 let start = match range.start_bound().cloned() {
32 Bound::Included(start) => start.index(),
33 Bound::Excluded(start) => start.index() + 1,
34 Bound::Unbounded => 0,
36 let end = match range.end_bound().cloned() {
37 Bound::Included(end) => end.index(),
38 Bound::Excluded(end) => end.index().checked_sub(1)?,
39 Bound::Unbounded => domain - 1,
41 assert!(end < domain);
48 macro_rules! bit_relations_inherent_impls {
50 /// Sets `self = self | other` and returns `true` if `self` changed
51 /// (i.e., if new bits were added).
52 pub fn union<Rhs>(&mut self, other: &Rhs) -> bool
54 Self: BitRelations<Rhs>,
56 <Self as BitRelations<Rhs>>::union(self, other)
59 /// Sets `self = self - other` and returns `true` if `self` changed.
60 /// (i.e., if any bits were removed).
61 pub fn subtract<Rhs>(&mut self, other: &Rhs) -> bool
63 Self: BitRelations<Rhs>,
65 <Self as BitRelations<Rhs>>::subtract(self, other)
68 /// Sets `self = self & other` and return `true` if `self` changed.
69 /// (i.e., if any bits were removed).
70 pub fn intersect<Rhs>(&mut self, other: &Rhs) -> bool
72 Self: BitRelations<Rhs>,
74 <Self as BitRelations<Rhs>>::intersect(self, other)
79 /// A fixed-size bitset type with a dense representation.
81 /// NOTE: Use [`GrowableBitSet`] if you need support for resizing after creation.
83 /// `T` is an index type, typically a newtyped `usize` wrapper, but it can also
86 /// All operations that involve an element will panic if the element is equal
87 /// to or greater than the domain size. All operations that involve two bitsets
88 /// will panic if the bitsets have differing domain sizes.
90 #[derive(Eq, PartialEq, Hash, Decodable, Encodable)]
91 pub struct BitSet<T> {
94 marker: PhantomData<T>,
98 /// Gets the domain size.
99 pub fn domain_size(&self) -> usize {
104 impl<T: Idx> BitSet<T> {
105 /// Creates a new, empty bitset with a given `domain_size`.
107 pub fn new_empty(domain_size: usize) -> BitSet<T> {
108 let num_words = num_words(domain_size);
109 BitSet { domain_size, words: vec![0; num_words], marker: PhantomData }
112 /// Creates a new, filled bitset with a given `domain_size`.
114 pub fn new_filled(domain_size: usize) -> BitSet<T> {
115 let num_words = num_words(domain_size);
116 let mut result = BitSet { domain_size, words: vec![!0; num_words], marker: PhantomData };
117 result.clear_excess_bits();
121 /// Clear all elements.
123 pub fn clear(&mut self) {
124 for word in &mut self.words {
129 /// Clear excess bits in the final word.
130 fn clear_excess_bits(&mut self) {
131 let num_bits_in_final_word = self.domain_size % WORD_BITS;
132 if num_bits_in_final_word > 0 {
133 let mask = (1 << num_bits_in_final_word) - 1;
134 let final_word_idx = self.words.len() - 1;
135 self.words[final_word_idx] &= mask;
139 /// Count the number of set bits in the set.
140 pub fn count(&self) -> usize {
141 self.words.iter().map(|e| e.count_ones() as usize).sum()
144 /// Returns `true` if `self` contains `elem`.
146 pub fn contains(&self, elem: T) -> bool {
147 assert!(elem.index() < self.domain_size);
148 let (word_index, mask) = word_index_and_mask(elem);
149 (self.words[word_index] & mask) != 0
152 /// Is `self` is a (non-strict) superset of `other`?
154 pub fn superset(&self, other: &BitSet<T>) -> bool {
155 assert_eq!(self.domain_size, other.domain_size);
156 self.words.iter().zip(&other.words).all(|(a, b)| (a & b) == *b)
159 /// Is the set empty?
161 pub fn is_empty(&self) -> bool {
162 self.words.iter().all(|a| *a == 0)
165 /// Insert `elem`. Returns whether the set has changed.
167 pub fn insert(&mut self, elem: T) -> bool {
168 assert!(elem.index() < self.domain_size);
169 let (word_index, mask) = word_index_and_mask(elem);
170 let word_ref = &mut self.words[word_index];
171 let word = *word_ref;
172 let new_word = word | mask;
173 *word_ref = new_word;
178 pub fn insert_range(&mut self, elems: impl RangeBounds<T>) {
179 let Some((start, end)) = inclusive_start_end(elems, self.domain_size) else {
183 let (start_word_index, start_mask) = word_index_and_mask(start);
184 let (end_word_index, end_mask) = word_index_and_mask(end);
186 // Set all words in between start and end (exclusively of both).
187 for word_index in (start_word_index + 1)..end_word_index {
188 self.words[word_index] = !0;
191 if start_word_index != end_word_index {
192 // Start and end are in different words, so we handle each in turn.
194 // We set all leading bits. This includes the start_mask bit.
195 self.words[start_word_index] |= !(start_mask - 1);
196 // And all trailing bits (i.e. from 0..=end) in the end word,
197 // including the end.
198 self.words[end_word_index] |= end_mask | end_mask - 1;
200 self.words[start_word_index] |= end_mask | (end_mask - start_mask);
204 /// Sets all bits to true.
205 pub fn insert_all(&mut self) {
206 for word in &mut self.words {
209 self.clear_excess_bits();
212 /// Returns `true` if the set has changed.
214 pub fn remove(&mut self, elem: T) -> bool {
215 assert!(elem.index() < self.domain_size);
216 let (word_index, mask) = word_index_and_mask(elem);
217 let word_ref = &mut self.words[word_index];
218 let word = *word_ref;
219 let new_word = word & !mask;
220 *word_ref = new_word;
224 /// Gets a slice of the underlying words.
225 pub fn words(&self) -> &[Word] {
229 /// Iterates over the indices of set bits in a sorted order.
231 pub fn iter(&self) -> BitIter<'_, T> {
232 BitIter::new(&self.words)
235 /// Duplicates the set as a hybrid set.
236 pub fn to_hybrid(&self) -> HybridBitSet<T> {
237 // Note: we currently don't bother trying to make a Sparse set.
238 HybridBitSet::Dense(self.to_owned())
241 /// Set `self = self | other`. In contrast to `union` returns `true` if the set contains at
242 /// least one bit that is not in `other` (i.e. `other` is not a superset of `self`).
244 /// This is an optimization for union of a hybrid bitset.
245 fn reverse_union_sparse(&mut self, sparse: &SparseBitSet<T>) -> bool {
246 assert!(sparse.domain_size == self.domain_size);
247 self.clear_excess_bits();
249 let mut not_already = false;
250 // Index of the current word not yet merged.
251 let mut current_index = 0;
252 // Mask of bits that came from the sparse set in the current word.
253 let mut new_bit_mask = 0;
254 for (word_index, mask) in sparse.iter().map(|x| word_index_and_mask(*x)) {
255 // Next bit is in a word not inspected yet.
256 if word_index > current_index {
257 self.words[current_index] |= new_bit_mask;
258 // Were there any bits in the old word that did not occur in the sparse set?
259 not_already |= (self.words[current_index] ^ new_bit_mask) != 0;
260 // Check all words we skipped for any set bit.
261 not_already |= self.words[current_index + 1..word_index].iter().any(|&x| x != 0);
263 current_index = word_index;
264 // Reset bit mask, no bits have been merged yet.
267 // Add bit and mark it as coming from the sparse set.
268 // self.words[word_index] |= mask;
269 new_bit_mask |= mask;
271 self.words[current_index] |= new_bit_mask;
272 // Any bits in the last inspected word that were not in the sparse set?
273 not_already |= (self.words[current_index] ^ new_bit_mask) != 0;
274 // Any bits in the tail? Note `clear_excess_bits` before.
275 not_already |= self.words[current_index + 1..].iter().any(|&x| x != 0);
280 fn last_set_in(&self, range: impl RangeBounds<T>) -> Option<T> {
281 let (start, end) = inclusive_start_end(range, self.domain_size)?;
282 let (start_word_index, _) = word_index_and_mask(start);
283 let (end_word_index, end_mask) = word_index_and_mask(end);
285 let end_word = self.words[end_word_index] & (end_mask | (end_mask - 1));
287 let pos = max_bit(end_word) + WORD_BITS * end_word_index;
289 return Some(T::new(pos));
293 // We exclude end_word_index from the range here, because we don't want
294 // to limit ourselves to *just* the last word: the bits set it in may be
295 // after `end`, so it may not work out.
296 if let Some(offset) =
297 self.words[start_word_index..end_word_index].iter().rposition(|&w| w != 0)
299 let word_idx = start_word_index + offset;
300 let start_word = self.words[word_idx];
301 let pos = max_bit(start_word) + WORD_BITS * word_idx;
303 return Some(T::new(pos));
310 bit_relations_inherent_impls! {}
314 impl<T: Idx> BitRelations<BitSet<T>> for BitSet<T> {
315 fn union(&mut self, other: &BitSet<T>) -> bool {
316 assert_eq!(self.domain_size, other.domain_size);
317 bitwise(&mut self.words, &other.words, |a, b| a | b)
320 fn subtract(&mut self, other: &BitSet<T>) -> bool {
321 assert_eq!(self.domain_size, other.domain_size);
322 bitwise(&mut self.words, &other.words, |a, b| a & !b)
325 fn intersect(&mut self, other: &BitSet<T>) -> bool {
326 assert_eq!(self.domain_size, other.domain_size);
327 bitwise(&mut self.words, &other.words, |a, b| a & b)
331 // Applies a function to mutate a bitset, and returns true if any
332 // of the applications return true
333 fn sequential_update<T: Idx>(
334 mut self_update: impl FnMut(T) -> bool,
335 it: impl Iterator<Item = T>,
337 let mut changed = false;
339 changed |= self_update(elem);
344 // Optimization of intersection for SparseBitSet that's generic
346 fn sparse_intersect<T: Idx>(
347 set: &mut SparseBitSet<T>,
348 other_contains: impl Fn(&T) -> bool,
350 let size = set.elems.len();
351 set.elems.retain(|elem| other_contains(elem));
352 set.elems.len() != size
355 // Optimization of dense/sparse intersection. The resulting set is
356 // guaranteed to be at most the size of the sparse set, and hence can be
357 // represented as a sparse set. Therefore the sparse set is copied and filtered,
358 // then returned as the new set.
359 fn dense_sparse_intersect<T: Idx>(
361 sparse: &SparseBitSet<T>,
362 ) -> (SparseBitSet<T>, bool) {
363 let mut sparse_copy = sparse.clone();
364 sparse_intersect(&mut sparse_copy, |el| dense.contains(*el));
365 let n = sparse_copy.len();
366 (sparse_copy, n != dense.count())
370 impl<T: Idx> BitRelations<BitSet<T>> for HybridBitSet<T> {
371 fn union(&mut self, other: &BitSet<T>) -> bool {
372 assert_eq!(self.domain_size(), other.domain_size);
374 HybridBitSet::Sparse(sparse) => {
375 // `self` is sparse and `other` is dense. To
376 // merge them, we have two available strategies:
377 // * Densify `self` then merge other
378 // * Clone other then integrate bits from `self`
379 // The second strategy requires dedicated method
380 // since the usual `union` returns the wrong
381 // result. In the dedicated case the computation
382 // is slightly faster if the bits of the sparse
383 // bitset map to only few words of the dense
384 // representation, i.e. indices are near each
387 // Benchmarking seems to suggest that the second
388 // option is worth it.
389 let mut new_dense = other.clone();
390 let changed = new_dense.reverse_union_sparse(sparse);
391 *self = HybridBitSet::Dense(new_dense);
395 HybridBitSet::Dense(dense) => dense.union(other),
399 fn subtract(&mut self, other: &BitSet<T>) -> bool {
400 assert_eq!(self.domain_size(), other.domain_size);
402 HybridBitSet::Sparse(sparse) => {
403 sequential_update(|elem| sparse.remove(elem), other.iter())
405 HybridBitSet::Dense(dense) => dense.subtract(other),
409 fn intersect(&mut self, other: &BitSet<T>) -> bool {
410 assert_eq!(self.domain_size(), other.domain_size);
412 HybridBitSet::Sparse(sparse) => sparse_intersect(sparse, |elem| other.contains(*elem)),
413 HybridBitSet::Dense(dense) => dense.intersect(other),
419 impl<T: Idx> BitRelations<HybridBitSet<T>> for BitSet<T> {
420 fn union(&mut self, other: &HybridBitSet<T>) -> bool {
421 assert_eq!(self.domain_size, other.domain_size());
423 HybridBitSet::Sparse(sparse) => {
424 sequential_update(|elem| self.insert(elem), sparse.iter().cloned())
426 HybridBitSet::Dense(dense) => self.union(dense),
430 fn subtract(&mut self, other: &HybridBitSet<T>) -> bool {
431 assert_eq!(self.domain_size, other.domain_size());
433 HybridBitSet::Sparse(sparse) => {
434 sequential_update(|elem| self.remove(elem), sparse.iter().cloned())
436 HybridBitSet::Dense(dense) => self.subtract(dense),
440 fn intersect(&mut self, other: &HybridBitSet<T>) -> bool {
441 assert_eq!(self.domain_size, other.domain_size());
443 HybridBitSet::Sparse(sparse) => {
444 let (updated, changed) = dense_sparse_intersect(self, sparse);
446 // We can't directly assign the SparseBitSet to the BitSet, and
447 // doing `*self = updated.to_dense()` would cause a drop / reallocation. Instead,
448 // the BitSet is cleared and `updated` is copied into `self`.
450 for elem in updated.iter() {
455 HybridBitSet::Dense(dense) => self.intersect(dense),
461 impl<T: Idx> BitRelations<HybridBitSet<T>> for HybridBitSet<T> {
462 fn union(&mut self, other: &HybridBitSet<T>) -> bool {
463 assert_eq!(self.domain_size(), other.domain_size());
465 HybridBitSet::Sparse(_) => {
467 HybridBitSet::Sparse(other_sparse) => {
468 // Both sets are sparse. Add the elements in
469 // `other_sparse` to `self` one at a time. This
470 // may or may not cause `self` to be densified.
471 let mut changed = false;
472 for elem in other_sparse.iter() {
473 changed |= self.insert(*elem);
478 HybridBitSet::Dense(other_dense) => self.union(other_dense),
482 HybridBitSet::Dense(self_dense) => self_dense.union(other),
486 fn subtract(&mut self, other: &HybridBitSet<T>) -> bool {
487 assert_eq!(self.domain_size(), other.domain_size());
489 HybridBitSet::Sparse(self_sparse) => {
490 sequential_update(|elem| self_sparse.remove(elem), other.iter())
492 HybridBitSet::Dense(self_dense) => self_dense.subtract(other),
496 fn intersect(&mut self, other: &HybridBitSet<T>) -> bool {
497 assert_eq!(self.domain_size(), other.domain_size());
499 HybridBitSet::Sparse(self_sparse) => {
500 sparse_intersect(self_sparse, |elem| other.contains(*elem))
502 HybridBitSet::Dense(self_dense) => match other {
503 HybridBitSet::Sparse(other_sparse) => {
504 let (updated, changed) = dense_sparse_intersect(self_dense, other_sparse);
505 *self = HybridBitSet::Sparse(updated);
508 HybridBitSet::Dense(other_dense) => self_dense.intersect(other_dense),
514 impl<T> Clone for BitSet<T> {
515 fn clone(&self) -> Self {
516 BitSet { domain_size: self.domain_size, words: self.words.clone(), marker: PhantomData }
519 fn clone_from(&mut self, from: &Self) {
520 if self.domain_size != from.domain_size {
521 self.words.resize(from.domain_size, 0);
522 self.domain_size = from.domain_size;
525 self.words.copy_from_slice(&from.words);
529 impl<T: Idx> fmt::Debug for BitSet<T> {
530 fn fmt(&self, w: &mut fmt::Formatter<'_>) -> fmt::Result {
531 w.debug_list().entries(self.iter()).finish()
535 impl<T: Idx> ToString for BitSet<T> {
536 fn to_string(&self) -> String {
537 let mut result = String::new();
540 // Note: this is a little endian printout of bytes.
542 // i tracks how many bits we have printed so far.
544 for word in &self.words {
545 let mut word = *word;
546 for _ in 0..WORD_BYTES {
547 // for each byte in `word`:
548 let remain = self.domain_size - i;
549 // If less than a byte remains, then mask just that many bits.
550 let mask = if remain <= 8 { (1 << remain) - 1 } else { 0xFF };
551 assert!(mask <= 0xFF);
552 let byte = word & mask;
554 result.push_str(&format!("{}{:02x}", sep, byte));
571 pub struct BitIter<'a, T: Idx> {
572 /// A copy of the current word, but with any already-visited bits cleared.
573 /// (This lets us use `trailing_zeros()` to find the next set bit.) When it
574 /// is reduced to 0, we move onto the next word.
577 /// The offset (measured in bits) of the current word.
580 /// Underlying iterator over the words.
581 iter: slice::Iter<'a, Word>,
583 marker: PhantomData<T>,
586 impl<'a, T: Idx> BitIter<'a, T> {
588 fn new(words: &'a [Word]) -> BitIter<'a, T> {
589 // We initialize `word` and `offset` to degenerate values. On the first
590 // call to `next()` we will fall through to getting the first word from
591 // `iter`, which sets `word` to the first word (if there is one) and
592 // `offset` to 0. Doing it this way saves us from having to maintain
593 // additional state about whether we have started.
596 offset: usize::MAX - (WORD_BITS - 1),
603 impl<'a, T: Idx> Iterator for BitIter<'a, T> {
605 fn next(&mut self) -> Option<T> {
608 // Get the position of the next set bit in the current word,
609 // then clear the bit.
610 let bit_pos = self.word.trailing_zeros() as usize;
611 let bit = 1 << bit_pos;
613 return Some(T::new(bit_pos + self.offset));
616 // Move onto the next word. `wrapping_add()` is needed to handle
617 // the degenerate initial value given to `offset` in `new()`.
618 let word = self.iter.next()?;
620 self.offset = self.offset.wrapping_add(WORD_BITS);
626 fn bitwise<Op>(out_vec: &mut [Word], in_vec: &[Word], op: Op) -> bool
628 Op: Fn(Word, Word) -> Word,
630 assert_eq!(out_vec.len(), in_vec.len());
632 for (out_elem, in_elem) in iter::zip(out_vec, in_vec) {
633 let old_val = *out_elem;
634 let new_val = op(old_val, *in_elem);
636 // This is essentially equivalent to a != with changed being a bool, but
637 // in practice this code gets auto-vectorized by the compiler for most
638 // operators. Using != here causes us to generate quite poor code as the
639 // compiler tries to go back to a boolean on each loop iteration.
640 changed |= old_val ^ new_val;
645 const SPARSE_MAX: usize = 8;
647 /// A fixed-size bitset type with a sparse representation and a maximum of
648 /// `SPARSE_MAX` elements. The elements are stored as a sorted `ArrayVec` with
651 /// This type is used by `HybridBitSet`; do not use directly.
652 #[derive(Clone, Debug)]
653 pub struct SparseBitSet<T> {
655 elems: ArrayVec<T, SPARSE_MAX>,
658 impl<T: Idx> SparseBitSet<T> {
659 fn new_empty(domain_size: usize) -> Self {
660 SparseBitSet { domain_size, elems: ArrayVec::new() }
663 fn len(&self) -> usize {
667 fn is_empty(&self) -> bool {
668 self.elems.len() == 0
671 fn contains(&self, elem: T) -> bool {
672 assert!(elem.index() < self.domain_size);
673 self.elems.contains(&elem)
676 fn insert(&mut self, elem: T) -> bool {
677 assert!(elem.index() < self.domain_size);
678 let changed = if let Some(i) = self.elems.iter().position(|&e| e.index() >= elem.index()) {
679 if self.elems[i] == elem {
680 // `elem` is already in the set.
683 // `elem` is smaller than one or more existing elements.
684 self.elems.insert(i, elem);
688 // `elem` is larger than all existing elements.
689 self.elems.push(elem);
692 assert!(self.len() <= SPARSE_MAX);
696 fn remove(&mut self, elem: T) -> bool {
697 assert!(elem.index() < self.domain_size);
698 if let Some(i) = self.elems.iter().position(|&e| e == elem) {
699 self.elems.remove(i);
706 fn to_dense(&self) -> BitSet<T> {
707 let mut dense = BitSet::new_empty(self.domain_size);
708 for elem in self.elems.iter() {
714 fn iter(&self) -> slice::Iter<'_, T> {
718 bit_relations_inherent_impls! {}
721 impl<T: Idx + Ord> SparseBitSet<T> {
722 fn last_set_in(&self, range: impl RangeBounds<T>) -> Option<T> {
723 let mut last_leq = None;
724 for e in self.iter() {
725 if range.contains(e) {
733 /// A fixed-size bitset type with a hybrid representation: sparse when there
734 /// are up to a `SPARSE_MAX` elements in the set, but dense when there are more
735 /// than `SPARSE_MAX`.
737 /// This type is especially efficient for sets that typically have a small
738 /// number of elements, but a large `domain_size`, and are cleared frequently.
740 /// `T` is an index type, typically a newtyped `usize` wrapper, but it can also
743 /// All operations that involve an element will panic if the element is equal
744 /// to or greater than the domain size. All operations that involve two bitsets
745 /// will panic if the bitsets have differing domain sizes.
747 pub enum HybridBitSet<T> {
748 Sparse(SparseBitSet<T>),
752 impl<T: Idx> fmt::Debug for HybridBitSet<T> {
753 fn fmt(&self, w: &mut fmt::Formatter<'_>) -> fmt::Result {
755 Self::Sparse(b) => b.fmt(w),
756 Self::Dense(b) => b.fmt(w),
761 impl<T: Idx> HybridBitSet<T> {
762 pub fn new_empty(domain_size: usize) -> Self {
763 HybridBitSet::Sparse(SparseBitSet::new_empty(domain_size))
766 pub fn domain_size(&self) -> usize {
768 HybridBitSet::Sparse(sparse) => sparse.domain_size,
769 HybridBitSet::Dense(dense) => dense.domain_size,
773 pub fn clear(&mut self) {
774 let domain_size = self.domain_size();
775 *self = HybridBitSet::new_empty(domain_size);
778 pub fn contains(&self, elem: T) -> bool {
780 HybridBitSet::Sparse(sparse) => sparse.contains(elem),
781 HybridBitSet::Dense(dense) => dense.contains(elem),
785 pub fn superset(&self, other: &HybridBitSet<T>) -> bool {
786 match (self, other) {
787 (HybridBitSet::Dense(self_dense), HybridBitSet::Dense(other_dense)) => {
788 self_dense.superset(other_dense)
791 assert!(self.domain_size() == other.domain_size());
792 other.iter().all(|elem| self.contains(elem))
797 pub fn is_empty(&self) -> bool {
799 HybridBitSet::Sparse(sparse) => sparse.is_empty(),
800 HybridBitSet::Dense(dense) => dense.is_empty(),
804 /// Returns the previous element present in the bitset from `elem`,
805 /// inclusively of elem. That is, will return `Some(elem)` if elem is in the
807 pub fn last_set_in(&self, range: impl RangeBounds<T>) -> Option<T>
812 HybridBitSet::Sparse(sparse) => sparse.last_set_in(range),
813 HybridBitSet::Dense(dense) => dense.last_set_in(range),
817 pub fn insert(&mut self, elem: T) -> bool {
818 // No need to check `elem` against `self.domain_size` here because all
819 // the match cases check it, one way or another.
821 HybridBitSet::Sparse(sparse) if sparse.len() < SPARSE_MAX => {
822 // The set is sparse and has space for `elem`.
825 HybridBitSet::Sparse(sparse) if sparse.contains(elem) => {
826 // The set is sparse and does not have space for `elem`, but
827 // that doesn't matter because `elem` is already present.
830 HybridBitSet::Sparse(sparse) => {
831 // The set is sparse and full. Convert to a dense set.
832 let mut dense = sparse.to_dense();
833 let changed = dense.insert(elem);
835 *self = HybridBitSet::Dense(dense);
838 HybridBitSet::Dense(dense) => dense.insert(elem),
842 pub fn insert_range(&mut self, elems: impl RangeBounds<T>) {
843 // No need to check `elem` against `self.domain_size` here because all
844 // the match cases check it, one way or another.
845 let start = match elems.start_bound().cloned() {
846 Bound::Included(start) => start.index(),
847 Bound::Excluded(start) => start.index() + 1,
848 Bound::Unbounded => 0,
850 let end = match elems.end_bound().cloned() {
851 Bound::Included(end) => end.index() + 1,
852 Bound::Excluded(end) => end.index(),
853 Bound::Unbounded => self.domain_size() - 1,
855 let Some(len) = end.checked_sub(start) else { return };
857 HybridBitSet::Sparse(sparse) if sparse.len() + len < SPARSE_MAX => {
858 // The set is sparse and has space for `elems`.
859 for elem in start..end {
860 sparse.insert(T::new(elem));
863 HybridBitSet::Sparse(sparse) => {
864 // The set is sparse and full. Convert to a dense set.
865 let mut dense = sparse.to_dense();
866 dense.insert_range(elems);
867 *self = HybridBitSet::Dense(dense);
869 HybridBitSet::Dense(dense) => dense.insert_range(elems),
873 pub fn insert_all(&mut self) {
874 let domain_size = self.domain_size();
876 HybridBitSet::Sparse(_) => {
877 *self = HybridBitSet::Dense(BitSet::new_filled(domain_size));
879 HybridBitSet::Dense(dense) => dense.insert_all(),
883 pub fn remove(&mut self, elem: T) -> bool {
884 // Note: we currently don't bother going from Dense back to Sparse.
886 HybridBitSet::Sparse(sparse) => sparse.remove(elem),
887 HybridBitSet::Dense(dense) => dense.remove(elem),
891 /// Converts to a dense set, consuming itself in the process.
892 pub fn to_dense(self) -> BitSet<T> {
894 HybridBitSet::Sparse(sparse) => sparse.to_dense(),
895 HybridBitSet::Dense(dense) => dense,
899 pub fn iter(&self) -> HybridIter<'_, T> {
901 HybridBitSet::Sparse(sparse) => HybridIter::Sparse(sparse.iter()),
902 HybridBitSet::Dense(dense) => HybridIter::Dense(dense.iter()),
906 bit_relations_inherent_impls! {}
909 pub enum HybridIter<'a, T: Idx> {
910 Sparse(slice::Iter<'a, T>),
911 Dense(BitIter<'a, T>),
914 impl<'a, T: Idx> Iterator for HybridIter<'a, T> {
917 fn next(&mut self) -> Option<T> {
919 HybridIter::Sparse(sparse) => sparse.next().copied(),
920 HybridIter::Dense(dense) => dense.next(),
925 /// A resizable bitset type with a dense representation.
927 /// `T` is an index type, typically a newtyped `usize` wrapper, but it can also
930 /// All operations that involve an element will panic if the element is equal
931 /// to or greater than the domain size.
932 #[derive(Clone, Debug, PartialEq)]
933 pub struct GrowableBitSet<T: Idx> {
937 impl<T: Idx> Default for GrowableBitSet<T> {
938 fn default() -> Self {
939 GrowableBitSet::new_empty()
943 impl<T: Idx> GrowableBitSet<T> {
944 /// Ensure that the set can hold at least `min_domain_size` elements.
945 pub fn ensure(&mut self, min_domain_size: usize) {
946 if self.bit_set.domain_size < min_domain_size {
947 self.bit_set.domain_size = min_domain_size;
950 let min_num_words = num_words(min_domain_size);
951 if self.bit_set.words.len() < min_num_words {
952 self.bit_set.words.resize(min_num_words, 0)
956 pub fn new_empty() -> GrowableBitSet<T> {
957 GrowableBitSet { bit_set: BitSet::new_empty(0) }
960 pub fn with_capacity(capacity: usize) -> GrowableBitSet<T> {
961 GrowableBitSet { bit_set: BitSet::new_empty(capacity) }
964 /// Returns `true` if the set has changed.
966 pub fn insert(&mut self, elem: T) -> bool {
967 self.ensure(elem.index() + 1);
968 self.bit_set.insert(elem)
971 /// Returns `true` if the set has changed.
973 pub fn remove(&mut self, elem: T) -> bool {
974 self.ensure(elem.index() + 1);
975 self.bit_set.remove(elem)
979 pub fn is_empty(&self) -> bool {
980 self.bit_set.is_empty()
984 pub fn contains(&self, elem: T) -> bool {
985 let (word_index, mask) = word_index_and_mask(elem);
986 self.bit_set.words.get(word_index).map_or(false, |word| (word & mask) != 0)
990 /// A fixed-size 2D bit matrix type with a dense representation.
992 /// `R` and `C` are index types used to identify rows and columns respectively;
993 /// typically newtyped `usize` wrappers, but they can also just be `usize`.
995 /// All operations that involve a row and/or column index will panic if the
996 /// index exceeds the relevant bound.
997 #[derive(Clone, Eq, PartialEq, Hash, Decodable, Encodable)]
998 pub struct BitMatrix<R: Idx, C: Idx> {
1002 marker: PhantomData<(R, C)>,
1005 impl<R: Idx, C: Idx> BitMatrix<R, C> {
1006 /// Creates a new `rows x columns` matrix, initially empty.
1007 pub fn new(num_rows: usize, num_columns: usize) -> BitMatrix<R, C> {
1008 // For every element, we need one bit for every other
1009 // element. Round up to an even number of words.
1010 let words_per_row = num_words(num_columns);
1014 words: vec![0; num_rows * words_per_row],
1015 marker: PhantomData,
1019 /// Creates a new matrix, with `row` used as the value for every row.
1020 pub fn from_row_n(row: &BitSet<C>, num_rows: usize) -> BitMatrix<R, C> {
1021 let num_columns = row.domain_size();
1022 let words_per_row = num_words(num_columns);
1023 assert_eq!(words_per_row, row.words().len());
1027 words: iter::repeat(row.words()).take(num_rows).flatten().cloned().collect(),
1028 marker: PhantomData,
1032 pub fn rows(&self) -> impl Iterator<Item = R> {
1033 (0..self.num_rows).map(R::new)
1036 /// The range of bits for a given row.
1037 fn range(&self, row: R) -> (usize, usize) {
1038 let words_per_row = num_words(self.num_columns);
1039 let start = row.index() * words_per_row;
1040 (start, start + words_per_row)
1043 /// Sets the cell at `(row, column)` to true. Put another way, insert
1044 /// `column` to the bitset for `row`.
1046 /// Returns `true` if this changed the matrix.
1047 pub fn insert(&mut self, row: R, column: C) -> bool {
1048 assert!(row.index() < self.num_rows && column.index() < self.num_columns);
1049 let (start, _) = self.range(row);
1050 let (word_index, mask) = word_index_and_mask(column);
1051 let words = &mut self.words[..];
1052 let word = words[start + word_index];
1053 let new_word = word | mask;
1054 words[start + word_index] = new_word;
1058 /// Do the bits from `row` contain `column`? Put another way, is
1059 /// the matrix cell at `(row, column)` true? Put yet another way,
1060 /// if the matrix represents (transitive) reachability, can
1061 /// `row` reach `column`?
1062 pub fn contains(&self, row: R, column: C) -> bool {
1063 assert!(row.index() < self.num_rows && column.index() < self.num_columns);
1064 let (start, _) = self.range(row);
1065 let (word_index, mask) = word_index_and_mask(column);
1066 (self.words[start + word_index] & mask) != 0
1069 /// Returns those indices that are true in rows `a` and `b`. This
1070 /// is an *O*(*n*) operation where *n* is the number of elements
1071 /// (somewhat independent from the actual size of the
1072 /// intersection, in particular).
1073 pub fn intersect_rows(&self, row1: R, row2: R) -> Vec<C> {
1074 assert!(row1.index() < self.num_rows && row2.index() < self.num_rows);
1075 let (row1_start, row1_end) = self.range(row1);
1076 let (row2_start, row2_end) = self.range(row2);
1077 let mut result = Vec::with_capacity(self.num_columns);
1078 for (base, (i, j)) in (row1_start..row1_end).zip(row2_start..row2_end).enumerate() {
1079 let mut v = self.words[i] & self.words[j];
1080 for bit in 0..WORD_BITS {
1085 result.push(C::new(base * WORD_BITS + bit));
1093 /// Adds the bits from row `read` to the bits from row `write`, and
1094 /// returns `true` if anything changed.
1096 /// This is used when computing transitive reachability because if
1097 /// you have an edge `write -> read`, because in that case
1098 /// `write` can reach everything that `read` can (and
1099 /// potentially more).
1100 pub fn union_rows(&mut self, read: R, write: R) -> bool {
1101 assert!(read.index() < self.num_rows && write.index() < self.num_rows);
1102 let (read_start, read_end) = self.range(read);
1103 let (write_start, write_end) = self.range(write);
1104 let words = &mut self.words[..];
1105 let mut changed = false;
1106 for (read_index, write_index) in iter::zip(read_start..read_end, write_start..write_end) {
1107 let word = words[write_index];
1108 let new_word = word | words[read_index];
1109 words[write_index] = new_word;
1110 changed |= word != new_word;
1115 /// Adds the bits from `with` to the bits from row `write`, and
1116 /// returns `true` if anything changed.
1117 pub fn union_row_with(&mut self, with: &BitSet<C>, write: R) -> bool {
1118 assert!(write.index() < self.num_rows);
1119 assert_eq!(with.domain_size(), self.num_columns);
1120 let (write_start, write_end) = self.range(write);
1121 let mut changed = false;
1122 for (read_index, write_index) in iter::zip(0..with.words().len(), write_start..write_end) {
1123 let word = self.words[write_index];
1124 let new_word = word | with.words()[read_index];
1125 self.words[write_index] = new_word;
1126 changed |= word != new_word;
1131 /// Sets every cell in `row` to true.
1132 pub fn insert_all_into_row(&mut self, row: R) {
1133 assert!(row.index() < self.num_rows);
1134 let (start, end) = self.range(row);
1135 let words = &mut self.words[..];
1136 for index in start..end {
1139 self.clear_excess_bits(row);
1142 /// Clear excess bits in the final word of the row.
1143 fn clear_excess_bits(&mut self, row: R) {
1144 let num_bits_in_final_word = self.num_columns % WORD_BITS;
1145 if num_bits_in_final_word > 0 {
1146 let mask = (1 << num_bits_in_final_word) - 1;
1147 let (_, end) = self.range(row);
1148 let final_word_idx = end - 1;
1149 self.words[final_word_idx] &= mask;
1153 /// Gets a slice of the underlying words.
1154 pub fn words(&self) -> &[Word] {
1158 /// Iterates through all the columns set to true in a given row of
1160 pub fn iter(&self, row: R) -> BitIter<'_, C> {
1161 assert!(row.index() < self.num_rows);
1162 let (start, end) = self.range(row);
1163 BitIter::new(&self.words[start..end])
1166 /// Returns the number of elements in `row`.
1167 pub fn count(&self, row: R) -> usize {
1168 let (start, end) = self.range(row);
1169 self.words[start..end].iter().map(|e| e.count_ones() as usize).sum()
1173 impl<R: Idx, C: Idx> fmt::Debug for BitMatrix<R, C> {
1174 fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
1175 /// Forces its contents to print in regular mode instead of alternate mode.
1176 struct OneLinePrinter<T>(T);
1177 impl<T: fmt::Debug> fmt::Debug for OneLinePrinter<T> {
1178 fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
1179 write!(fmt, "{:?}", self.0)
1183 write!(fmt, "BitMatrix({}x{}) ", self.num_rows, self.num_columns)?;
1184 let items = self.rows().flat_map(|r| self.iter(r).map(move |c| (r, c)));
1185 fmt.debug_set().entries(items.map(OneLinePrinter)).finish()
1189 /// A fixed-column-size, variable-row-size 2D bit matrix with a moderately
1190 /// sparse representation.
1192 /// Initially, every row has no explicit representation. If any bit within a
1193 /// row is set, the entire row is instantiated as `Some(<HybridBitSet>)`.
1194 /// Furthermore, any previously uninstantiated rows prior to it will be
1195 /// instantiated as `None`. Those prior rows may themselves become fully
1196 /// instantiated later on if any of their bits are set.
1198 /// `R` and `C` are index types used to identify rows and columns respectively;
1199 /// typically newtyped `usize` wrappers, but they can also just be `usize`.
1200 #[derive(Clone, Debug)]
1201 pub struct SparseBitMatrix<R, C>
1207 rows: IndexVec<R, Option<HybridBitSet<C>>>,
1210 impl<R: Idx, C: Idx> SparseBitMatrix<R, C> {
1211 /// Creates a new empty sparse bit matrix with no rows or columns.
1212 pub fn new(num_columns: usize) -> Self {
1213 Self { num_columns, rows: IndexVec::new() }
1216 fn ensure_row(&mut self, row: R) -> &mut HybridBitSet<C> {
1217 // Instantiate any missing rows up to and including row `row` with an empty HybridBitSet.
1218 // Then replace row `row` with a full HybridBitSet if necessary.
1219 self.rows.get_or_insert_with(row, || HybridBitSet::new_empty(self.num_columns))
1222 /// Sets the cell at `(row, column)` to true. Put another way, insert
1223 /// `column` to the bitset for `row`.
1225 /// Returns `true` if this changed the matrix.
1226 pub fn insert(&mut self, row: R, column: C) -> bool {
1227 self.ensure_row(row).insert(column)
1230 /// Sets the cell at `(row, column)` to false. Put another way, delete
1231 /// `column` from the bitset for `row`. Has no effect if `row` does not
1234 /// Returns `true` if this changed the matrix.
1235 pub fn remove(&mut self, row: R, column: C) -> bool {
1236 match self.rows.get_mut(row) {
1237 Some(Some(row)) => row.remove(column),
1242 /// Sets all columns at `row` to false. Has no effect if `row` does
1244 pub fn clear(&mut self, row: R) {
1245 if let Some(Some(row)) = self.rows.get_mut(row) {
1250 /// Do the bits from `row` contain `column`? Put another way, is
1251 /// the matrix cell at `(row, column)` true? Put yet another way,
1252 /// if the matrix represents (transitive) reachability, can
1253 /// `row` reach `column`?
1254 pub fn contains(&self, row: R, column: C) -> bool {
1255 self.row(row).map_or(false, |r| r.contains(column))
1258 /// Adds the bits from row `read` to the bits from row `write`, and
1259 /// returns `true` if anything changed.
1261 /// This is used when computing transitive reachability because if
1262 /// you have an edge `write -> read`, because in that case
1263 /// `write` can reach everything that `read` can (and
1264 /// potentially more).
1265 pub fn union_rows(&mut self, read: R, write: R) -> bool {
1266 if read == write || self.row(read).is_none() {
1270 self.ensure_row(write);
1271 if let (Some(read_row), Some(write_row)) = self.rows.pick2_mut(read, write) {
1272 write_row.union(read_row)
1278 /// Insert all bits in the given row.
1279 pub fn insert_all_into_row(&mut self, row: R) {
1280 self.ensure_row(row).insert_all();
1283 pub fn rows(&self) -> impl Iterator<Item = R> {
1287 /// Iterates through all the columns set to true in a given row of
1289 pub fn iter<'a>(&'a self, row: R) -> impl Iterator<Item = C> + 'a {
1290 self.row(row).into_iter().flat_map(|r| r.iter())
1293 pub fn row(&self, row: R) -> Option<&HybridBitSet<C>> {
1294 if let Some(Some(row)) = self.rows.get(row) { Some(row) } else { None }
1297 /// Interescts `row` with `set`. `set` can be either `BitSet` or
1298 /// `HybridBitSet`. Has no effect if `row` does not exist.
1300 /// Returns true if the row was changed.
1301 pub fn intersect_row<Set>(&mut self, row: R, set: &Set) -> bool
1303 HybridBitSet<C>: BitRelations<Set>,
1305 match self.rows.get_mut(row) {
1306 Some(Some(row)) => row.intersect(set),
1311 /// Subtracts `set from `row`. `set` can be either `BitSet` or
1312 /// `HybridBitSet`. Has no effect if `row` does not exist.
1314 /// Returns true if the row was changed.
1315 pub fn subtract_row<Set>(&mut self, row: R, set: &Set) -> bool
1317 HybridBitSet<C>: BitRelations<Set>,
1319 match self.rows.get_mut(row) {
1320 Some(Some(row)) => row.subtract(set),
1325 /// Unions `row` with `set`. `set` can be either `BitSet` or
1328 /// Returns true if the row was changed.
1329 pub fn union_row<Set>(&mut self, row: R, set: &Set) -> bool
1331 HybridBitSet<C>: BitRelations<Set>,
1333 self.ensure_row(row).union(set)
1338 fn num_words<T: Idx>(domain_size: T) -> usize {
1339 (domain_size.index() + WORD_BITS - 1) / WORD_BITS
1343 fn word_index_and_mask<T: Idx>(elem: T) -> (usize, Word) {
1344 let elem = elem.index();
1345 let word_index = elem / WORD_BITS;
1346 let mask = 1 << (elem % WORD_BITS);
1351 fn max_bit(word: Word) -> usize {
1352 WORD_BITS - 1 - word.leading_zeros() as usize
1355 /// Integral type used to represent the bit set.
1356 pub trait FiniteBitSetTy:
1357 BitAnd<Output = Self>
1363 + Not<Output = Self>
1367 /// Size of the domain representable by this type, e.g. 64 for `u64`.
1368 const DOMAIN_SIZE: u32;
1370 /// Value which represents the `FiniteBitSet` having every bit set.
1372 /// Value which represents the `FiniteBitSet` having no bits set.
1375 /// Value for one as the integral type.
1377 /// Value for zero as the integral type.
1380 /// Perform a checked left shift on the integral type.
1381 fn checked_shl(self, rhs: u32) -> Option<Self>;
1382 /// Perform a checked right shift on the integral type.
1383 fn checked_shr(self, rhs: u32) -> Option<Self>;
1386 impl FiniteBitSetTy for u32 {
1387 const DOMAIN_SIZE: u32 = 32;
1389 const FILLED: Self = Self::MAX;
1390 const EMPTY: Self = Self::MIN;
1392 const ONE: Self = 1u32;
1393 const ZERO: Self = 0u32;
1395 fn checked_shl(self, rhs: u32) -> Option<Self> {
1396 self.checked_shl(rhs)
1399 fn checked_shr(self, rhs: u32) -> Option<Self> {
1400 self.checked_shr(rhs)
1404 impl std::fmt::Debug for FiniteBitSet<u32> {
1405 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1406 write!(f, "{:032b}", self.0)
1410 impl FiniteBitSetTy for u64 {
1411 const DOMAIN_SIZE: u32 = 64;
1413 const FILLED: Self = Self::MAX;
1414 const EMPTY: Self = Self::MIN;
1416 const ONE: Self = 1u64;
1417 const ZERO: Self = 0u64;
1419 fn checked_shl(self, rhs: u32) -> Option<Self> {
1420 self.checked_shl(rhs)
1423 fn checked_shr(self, rhs: u32) -> Option<Self> {
1424 self.checked_shr(rhs)
1428 impl std::fmt::Debug for FiniteBitSet<u64> {
1429 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1430 write!(f, "{:064b}", self.0)
1434 impl FiniteBitSetTy for u128 {
1435 const DOMAIN_SIZE: u32 = 128;
1437 const FILLED: Self = Self::MAX;
1438 const EMPTY: Self = Self::MIN;
1440 const ONE: Self = 1u128;
1441 const ZERO: Self = 0u128;
1443 fn checked_shl(self, rhs: u32) -> Option<Self> {
1444 self.checked_shl(rhs)
1447 fn checked_shr(self, rhs: u32) -> Option<Self> {
1448 self.checked_shr(rhs)
1452 impl std::fmt::Debug for FiniteBitSet<u128> {
1453 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1454 write!(f, "{:0128b}", self.0)
1458 /// A fixed-sized bitset type represented by an integer type. Indices outwith than the range
1459 /// representable by `T` are considered set.
1460 #[derive(Copy, Clone, Eq, PartialEq, Decodable, Encodable)]
1461 pub struct FiniteBitSet<T: FiniteBitSetTy>(pub T);
1463 impl<T: FiniteBitSetTy> FiniteBitSet<T> {
1464 /// Creates a new, empty bitset.
1465 pub fn new_empty() -> Self {
1469 /// Sets the `index`th bit.
1470 pub fn set(&mut self, index: u32) {
1471 self.0 |= T::ONE.checked_shl(index).unwrap_or(T::ZERO);
1474 /// Unsets the `index`th bit.
1475 pub fn clear(&mut self, index: u32) {
1476 self.0 &= !T::ONE.checked_shl(index).unwrap_or(T::ZERO);
1479 /// Sets the `i`th to `j`th bits.
1480 pub fn set_range(&mut self, range: Range<u32>) {
1481 let bits = T::FILLED
1482 .checked_shl(range.end - range.start)
1485 .checked_shl(range.start)
1486 .unwrap_or(T::ZERO);
1490 /// Is the set empty?
1491 pub fn is_empty(&self) -> bool {
1495 /// Returns the domain size of the bitset.
1496 pub fn within_domain(&self, index: u32) -> bool {
1497 index < T::DOMAIN_SIZE
1500 /// Returns if the `index`th bit is set.
1501 pub fn contains(&self, index: u32) -> Option<bool> {
1502 self.within_domain(index)
1503 .then(|| ((self.0.checked_shr(index).unwrap_or(T::ONE)) & T::ONE) == T::ONE)
1507 impl<T: FiniteBitSetTy> Default for FiniteBitSet<T> {
1508 fn default() -> Self {