}
#[bench]
-fn insert(b: &mut Bencher) {
+fn grow_by_insertion(b: &mut Bencher) {
use super::HashMap;
let mut m = HashMap::new();
use default::Default;
use fmt::Show;
use fmt;
-use RandomSipHasher;
-use hash::{Hash, Hasher};
-use iter::{Iterator, FromIterator, Extendable, range};
+use hash::{Hash, Hasher, RandomSipHasher};
+use iter::{Iterator, FromIterator, Extendable};
use iter;
use mem::replace;
use num;
-use ops::Deref;
+use ops::{Deref, DerefMut};
use option::{Some, None, Option};
use result::{Ok, Err};
use ops::Index;
-use super::table::{BucketWithTable, FullBucketImm, RawTable, FullBucket, FullBucketMut, Bucket};
use super::table;
+use super::table::{
+ Bucket,
+ Empty,
+ Full,
+ FullBucket,
+ FullBucketImm,
+ FullBucketMut,
+ RawTable,
+ SafeHash
+};
static INITIAL_LOG2_CAP: uint = 5;
pub static INITIAL_CAPACITY: uint = 1 << INITIAL_LOG2_CAP; // 2^5
/// The default behavior of HashMap implements a load factor of 90.9%.
/// This behavior is characterized by the following conditions:
///
-/// - if `size * 1.1 < cap < size * 4` then shouldn't resize
-/// - if `cap < minimum_capacity * 2` then shouldn't shrink
+/// - if size > 0.909 * capacity: grow
+/// - if size < 0.25 * capacity: shrink (if this won't bring capacity lower
+/// than the minimum)
#[deriving(Clone)]
struct DefaultResizePolicy {
/// Doubled minimal capacity. The capacity must never drop below
#[inline]
fn capacity_range(&self, new_size: uint) -> (uint, uint) {
- ((new_size * 11) / 10, max(new_size << 3, self.minimum_capacity2))
+ // Here, we are rephrasing the logic by specifying the ranges:
+ //
+ // - if `size * 1.1 < cap < size * 4`: don't resize
+ // - if `cap < minimum_capacity * 2`: don't shrink
+ // - otherwise, resize accordingly
+ ((new_size * 11) / 10, max(new_size << 2, self.minimum_capacity2))
}
#[inline]
}
// The main performance trick in this hashmap is called Robin Hood Hashing.
-// It gains its excellent performance from one crucial operation:
+// It gains its excellent performance from one essential operation:
//
-// If an insertion collides with an existing element, and that elements
+// If an insertion collides with an existing element, and that element's
// "probe distance" (how far away the element is from its ideal location)
// is higher than how far we've already probed, swap the elements.
//
// α^3, etc. Therefore, the odds of colliding k times is α^k. The odds of NOT
// colliding after k tries is 1-α^k.
//
+// The paper from 1986 cited below mentions an implementation which keeps track
+// of the distance-to-initial-bucket histogram. This approach is not suitable
+// for modern architectures because it requires maintaining an internal data
+// structure. This allows very good first guesses, but we are most concerned
+// with guessing entire cache lines, not individual indexes. Furthermore, array
+// accesses are no longer linear and in one direction, as we have now. There
+// is also memory and cache pressure that this would entail that would be very
+// difficult to properly see in a microbenchmark.
+//
// Future Improvements (FIXME!)
// ============================
//
// Future Optimizations (FIXME!)
// =============================
//
-// The paper cited below mentions an implementation which keeps track of the
-// distance-to-initial-bucket histogram. I'm suspicious of this approach because
-// it requires maintaining an internal map. If this map were replaced with a
-// hashmap, it would be faster, but now our data structure is self-referential
-// and blows up. Also, this allows very good first guesses, but array accesses
-// are no longer linear and in one direction, as we have now. There is also
-// memory and cache pressure that this map would entail that would be very
-// difficult to properly see in a microbenchmark.
-//
// Another possible design choice that I made without any real reason is
// parameterizing the raw table over keys and values. Technically, all we need
// is the size and alignment of keys and values, and the code should be just as
// This would definitely be an avenue worth exploring if people start complaining
// about the size of rust executables.
//
-// There's also an "optimization" that has been omitted regarding how the
-// hashtable allocates. The vector type has set the expectation that a hashtable
-// which never has an element inserted should not allocate. I'm suspicious of
-// implementing this for hashtables, because supporting it has no performance
-// benefit over using an `Option<HashMap<K, V>>`, and is significantly more
-// complicated.
+// Annotate exceedingly likely branches in `table::make_hash`
+// and `search_hashed_generic` to reduce instruction cache pressure
+// and mispredictions once it becomes possible (blocked on issue #11092).
+//
+// Shrinking the table could simply reallocate in place after moving buckets
+// to the first half.
+//
+// The growth algorithm (fragment of the Proof of Correctness)
+// --------------------
+//
+// The growth algorithm is basically a fast path of the naive reinsertion-
+// during-resize algorithm. Other paths should never be taken.
+//
+// Consider growing a robin hood hashtable of capacity n. Normally, we do this
+// by allocating a new table of capacity `2n`, and then individually reinsert
+// each element in the old table into the new one. This guarantees that the
+// new table is a valid robin hood hashtable with all the desired statistical
+// properties. Remark that the order we reinsert the elements in should not
+// matter. For simplicity and efficiency, we will consider only linear
+// reinsertions, which consist of reinserting all elements in the old table
+// into the new one by increasing order of index. However we will not be
+// starting our reinsertions from index 0 in general. If we start from index
+// i, for the purpose of reinsertion we will consider all elements with real
+// index j < i to have virtual index n + j.
+//
+// Our hash generation scheme consists of generating a 64-bit hash and
+// truncating the most significant bits. When moving to the new table, we
+// simply introduce a new bit to the front of the hash. Therefore, if an
+// elements has ideal index i in the old table, it can have one of two ideal
+// locations in the new table. If the new bit is 0, then the new ideal index
+// is i. If the new bit is 1, then the new ideal index is n + i. Intutively,
+// we are producing two independent tables of size n, and for each element we
+// independently choose which table to insert it into with equal probability.
+// However the rather than wrapping around themselves on overflowing their
+// indexes, the first table overflows into the first, and the first into the
+// second. Visually, our new table will look something like:
+//
+// [yy_xxx_xxxx_xxx|xx_yyy_yyyy_yyy]
+//
+// Where x's are elements inserted into the first table, y's are elements
+// inserted into the second, and _'s are empty sections. We now define a few
+// key concepts that we will use later. Note that this is a very abstract
+// perspective of the table. A real resized table would be at least half
+// empty.
+//
+// Theorem: A linear robin hood reinsertion from the first ideal element
+// produces identical results to a linear naive reinsertion from the same
+// element.
+//
+// FIXME(Gankro, pczarn): review the proof and put it all in a separate doc.rs
/// A hash map implementation which uses linear probing with Robin
/// Hood bucket stealing.
// All hashes are keyed on these values, to prevent hash collision attacks.
hasher: H,
- table: table::RawTable<K, V>,
+ table: RawTable<K, V>,
// We keep this at the end since it might as well have tail padding.
resize_policy: DefaultResizePolicy,
}
/// Search for a pre-hashed key.
-fn search_hashed_generic<K, V, M: Deref<RawTable<K, V>>>(table: M, hash: &table::SafeHash, is_match: |&K| -> bool)
- -> Option<FullBucket<K, V, M>> {
+fn search_hashed_generic<K, V, M: Deref<RawTable<K, V>>>(table: M,
+ hash: &SafeHash,
+ is_match: |&K| -> bool)
+ -> SearchResult<K, V, M> {
let size = table.size();
let mut probe = Bucket::new(table, hash);
let ib = probe.index();
while probe.index() != ib + size {
let full = match probe.peek() {
- table::Empty(_) => return None, // hit an empty bucket
- table::Full(b) => b
+ Empty(b) => return TableRef(b.into_table()), // hit an empty bucket
+ Full(b) => b
};
if full.distance() + ib < full.index() {
- return None;
+ // We can finish the search early if we hit any bucket
+ // with a lower distance to initial bucket than we've probed.
+ return TableRef(full.into_table());
}
// If the hash doesn't match, it can't be this one..
// If the key doesn't match, it can't be this one..
if matched {
- return Some(full);
+ return FoundExisting(full);
}
}
probe = full.next();
}
- None
+ TableRef(probe.into_table())
}
-fn search_hashed<K: Eq, V, M: Deref<RawTable<K, V>>>(table: M, hash: &table::SafeHash, k: &K)
- -> Option<table::FullBucket<K, V, M>> {
+fn search_hashed<K: Eq, V, M: Deref<RawTable<K, V>>>(table: M, hash: &SafeHash, k: &K)
+ -> SearchResult<K, V, M> {
search_hashed_generic(table, hash, |k_| *k == *k_)
}
fn pop_internal<K, V>(starting_bucket: FullBucketMut<K, V>) -> V {
- let size = {
- let table = starting_bucket.table();
- table.size()
- };
let (empty, _k, retval) = starting_bucket.take();
let mut gap = match empty.gap_peek() {
Some(b) => b,
None => return retval
};
- // COMPILER error! wrong enum optimization. sets ptr to 0
- for _ in range(0, size) {
- if gap.full().distance() != 0 {
- gap = match gap.shift() {
- Some(b) => b,
- None => return retval
+ while gap.full().distance() != 0 {
+ gap = match gap.shift() {
+ Some(b) => b,
+ None => break
+ };
+ }
+
+ // Now we've done all our shifting. Return the value we grabbed earlier.
+ return retval;
+}
+
+/// Perform robin hood bucket stealing at the given `bucket`. You must
+/// also pass the position of that bucket's initial bucket so we don't have
+/// to recalculate it.
+///
+/// `hash`, `k`, and `v` are the elements to "robin hood" into the hashtable.
+fn robin_hood<'a, K: 'a, V: 'a>(mut bucket: FullBucketMut<'a, K, V>,
+ mut ib: uint,
+ mut hash: SafeHash,
+ mut k: K,
+ mut v: V)
+ -> &'a mut V {
+ let starting_index = bucket.index();
+ let size = {
+ let table = bucket.table(); // FIXME "lifetime too short".
+ table.size()
+ };
+ // There can be at most `size - dib` buckets to displace, because
+ // in the worst case, there are `size` elements and we already are
+ // `distance` buckets away from the initial one.
+ let idx_end = starting_index + size - bucket.distance();
+
+ loop {
+ let (old_hash, old_key, old_val) = bucket.replace(hash, k, v);
+ loop {
+ let probe = bucket.next();
+ assert!(probe.index() != idx_end);
+
+ let full_bucket = match probe.peek() {
+ table::Empty(bucket) => {
+ // Found a hole!
+ let b = bucket.put(old_hash, old_key, old_val);
+ // Now that it's stolen, just read the value's pointer
+ // right out of the table!
+ let (_, v) = Bucket::at_index(b.into_table(), starting_index).peek()
+ .expect_full()
+ .into_mut_refs();
+ return v;
+ },
+ table::Full(bucket) => bucket
};
- continue;
+
+ let probe_ib = full_bucket.index() - full_bucket.distance();
+
+ bucket = full_bucket;
+
+ // Robin hood! Steal the spot.
+ if ib < probe_ib {
+ ib = probe_ib;
+ hash = old_hash;
+ k = old_key;
+ v = old_val;
+ break;
+ }
+ }
+ }
+}
+
+/// A result that works like Option<FullBucket<..>> but preserves
+/// the reference that grants us access to the table in any case.
+enum SearchResult<K, V, M> {
+ // This is an entry that holds the given key:
+ FoundExisting(FullBucket<K, V, M>),
+
+ // There was no such entry. The reference is given back:
+ TableRef(M)
+}
+
+impl<K, V, M> SearchResult<K, V, M> {
+ fn into_option(self) -> Option<FullBucket<K, V, M>> {
+ match self {
+ FoundExisting(bucket) => Some(bucket),
+ TableRef(_) => None
}
+ }
+}
- break;
+/// A newtyped mutable reference to the hashmap that allows e.g. Deref to be
+/// implemented without making changes to the visible interface of HashMap.
+/// Used internally because it's accepted by the search functions above.
+struct MapMutRef<'a, K: 'a, V: 'a, H: 'a> {
+ map_ref: &'a mut HashMap<K, V, H>
+}
+
+impl<'a, K, V, H> Deref<RawTable<K, V>> for MapMutRef<'a, K, V, H> {
+ fn deref(&self) -> &RawTable<K, V> {
+ &self.map_ref.table
}
+}
- // Now we're done all our shifting. Return the value we grabbed
- // earlier.
- return retval;
+impl<'a, K, V, H> DerefMut<RawTable<K, V>> for MapMutRef<'a, K, V, H> {
+ fn deref_mut(&mut self) -> &mut RawTable<K, V> {
+ &mut self.map_ref.table
+ }
}
impl<K: Eq + Hash<S>, V, S, H: Hasher<S>> HashMap<K, V, H> {
- fn make_hash<X: Hash<S>>(&self, x: &X) -> table::SafeHash {
+ fn make_hash<X: Hash<S>>(&self, x: &X) -> SafeHash {
table::make_hash(&self.hasher, x)
}
fn search_equiv<'a, Q: Hash<S> + Equiv<K>>(&'a self, q: &Q)
-> Option<FullBucketImm<'a, K, V>> {
let hash = self.make_hash(q);
- search_hashed_generic(&self.table, &hash, |k| q.equiv(k))
+ search_hashed_generic(&self.table, &hash, |k| q.equiv(k)).into_option()
}
fn search_equiv_mut<'a, Q: Hash<S> + Equiv<K>>(&'a mut self, q: &Q)
-> Option<FullBucketMut<'a, K, V>> {
let hash = self.make_hash(q);
- search_hashed_generic(&mut self.table, &hash, |k| q.equiv(k))
+ search_hashed_generic(&mut self.table, &hash, |k| q.equiv(k)).into_option()
}
/// Search for a key, yielding the index if it's found in the hashtable.
/// search_hashed.
fn search<'a>(&'a self, k: &K) -> Option<FullBucketImm<'a, K, V>> {
let hash = self.make_hash(k);
- search_hashed(&self.table, &hash, k)
+ search_hashed(&self.table, &hash, k).into_option()
}
fn search_mut<'a>(&'a mut self, k: &K) -> Option<FullBucketMut<'a, K, V>> {
let hash = self.make_hash(k);
- search_hashed(&mut self.table, &hash, k)
+ search_hashed(&mut self.table, &hash, k).into_option()
}
- fn insert_hashed_ordered(&mut self, hash: table::SafeHash, k: K, v: V) {
+ // The caller should ensure that invariants by Robin Hood Hashing hold.
+ fn insert_hashed_ordered(&mut self, hash: SafeHash, k: K, v: V) {
let cap = self.table.capacity();
let mut buckets = Bucket::new(&mut self.table, &hash);
let ib = buckets.index();
+
while buckets.index() != ib + cap {
+ // We don't need to compare hashes for value swap.
+ // Not even DIBs for Robin Hood.
buckets = match buckets.peek() {
- table::Empty(empty) => {
+ Empty(empty) => {
empty.put(hash, k, v);
return;
}
- table::Full(b) => b.into_bucket()
+ Full(b) => b.into_bucket()
};
buckets.next();
}
while buckets.index() != cap {
buckets = match buckets.peek() {
- table::Empty(b) => b.next(),
- table::Full(full) => {
+ Empty(b) => b.next(),
+ Full(full) => {
let (b, _, _) = full.take();
b.next()
}
self.make_some_room(potential_new_size);
let mut retval = None;
- self.insert_or_replace_with(hash, k, v, |val_ref, val| {
+ self.insert_or_replace_with(hash, k, v, |_, val_ref, val| {
retval = Some(replace(val_ref, val));
});
retval
HashMap {
hasher: hasher,
resize_policy: DefaultResizePolicy::new(INITIAL_CAPACITY),
- table: table::RawTable::new(0),
+ table: RawTable::new(0),
}
}
HashMap {
hasher: hasher,
resize_policy: DefaultResizePolicy::new(cap),
- table: table::RawTable::new(cap),
+ table: RawTable::new(cap),
}
}
assert!(self.table.size() <= new_capacity);
assert!(num::is_power_of_two(new_capacity));
- let mut old_table = replace(&mut self.table, table::RawTable::new(new_capacity));
+ let mut old_table = replace(&mut self.table, RawTable::new(new_capacity));
let old_size = old_table.size();
- if old_table.capacity() == 0 {
+ if old_table.capacity() == 0 || old_table.size() == 0 {
return;
}
if new_capacity < old_table.capacity() {
+ // Shrink the table. Naive algorithm for resizing:
for (h, k, v) in old_table.move_iter() {
self.insert_hashed_nocheck(h, k, v);
}
} else {
+ // Grow the table.
+ // Specialization of the other branch.
let mut bucket = Bucket::first(&mut old_table);
+ // "So a few of the first shall be last: for many be called,
+ // but few chosen."
+ //
+ // We'll most likely encounter a few buckets at the beginning that
+ // have their initial buckets near the end of the table. They were
+ // placed at the beginning as the probe wrapped around the table
+ // during insertion. We must skip forward to a bucket that won't
+ // get reinserted too early and won't unfairly steal others spot.
+ // This eliminates the need for robin hood.
loop {
- match bucket.peek() {
- table::Full(full) => {
+ bucket = match bucket.peek() {
+ Full(full) => {
if full.distance() == 0 {
+ // This bucket occupies its ideal spot.
+ // It indicates the start of another "cluster".
bucket = full.into_bucket();
break;
}
- bucket = full.next();
+ // Leaving this bucket in the last cluster for later.
+ full.into_bucket()
}
- table::Empty(b) => {
- bucket = b.next();
- break;
+ Empty(b) => {
+ // Encountered a hole between clusters.
+ b.into_bucket()
}
};
+ bucket.next();
}
+ // This is how the buckets might be laid out in memory:
+ // ($ marks an initialized bucket)
+ // ________________
+ // |$$$_$$$$$$_$$$$$|
+ //
+ // But we've skipped the entire initial cluster of buckets
+ // and will continue iteration in this order:
+ // ________________
+ // |$$$$$$_$$$$$
+ // ^ wrap around once end is reached
+ // ________________
+ // $$$_____________|
+ // ^ exit once table.size == 0
loop {
bucket = match bucket.peek() {
- table::Full(bucket) => {
- {
- let t = bucket.table();
- if t.size() == 0 { break }
- }
+ Full(bucket) => {
let h = bucket.hash();
let (b, k, v) = bucket.take();
self.insert_hashed_ordered(h, k, v);
+ {
+ let t = b.table(); // FIXME "lifetime too short".
+ if t.size() == 0 { break }
+ };
b.into_bucket()
}
- table::Empty(b) => b.into_bucket()
+ Empty(b) => b.into_bucket()
};
bucket.next();
}
///
/// If the key already exists, the hashtable will be returned untouched
/// and a reference to the existing element will be returned.
- fn insert_hashed_nocheck<'a>(
- &'a mut self, hash: table::SafeHash, k: K, v: V) -> &'a mut V {
- self.insert_or_replace_with(hash, k, v, |_, _| ())
+ fn insert_hashed_nocheck(&mut self, hash: SafeHash, k: K, v: V) -> &mut V {
+ self.insert_or_replace_with(hash, k, v, |_, _, _| ())
}
- fn insert_or_replace_with<'a>(
- &'a mut self, hash: table::SafeHash, k: K, v: V,
- found_existing: |&mut V, V|
- ) -> &'a mut V {
-
+ fn insert_or_replace_with<'a>(&'a mut self,
+ hash: SafeHash,
+ k: K,
+ v: V,
+ found_existing: |&mut K, &mut V, V|)
+ -> &'a mut V {
// Worst case, we'll find one empty bucket among `size + 1` buckets.
let size = self.table.size();
- let mut rbucket = Bucket::new(&mut self.table, &hash);
- let ib = rbucket.index();
+ let mut probe = Bucket::new(&mut self.table, &hash);
+ let ib = probe.index();
loop {
- let mut bucket = match rbucket.peek() {
- table::Empty(bucket) => {
+ let mut bucket = match probe.peek() {
+ Empty(bucket) => {
// Found a hole!
let bucket = bucket.put(hash, k, v);
let (_, val) = bucket.into_mut_refs();
return val;
},
- table::Full(bucket) => bucket
+ Full(bucket) => bucket
};
if bucket.hash() == hash {
- let (bucket_k, bucket_v) = bucket.read_mut();
- // FIXME #12147 the conditional return confuses
- // borrowck if we return bucket_v directly
- let bv: *mut V = bucket_v;
- if k == *bucket_k {
+ let found_match = {
+ let (bucket_k, _) = bucket.read_mut();
+ k == *bucket_k
+ };
+ if found_match {
+ let (bucket_k, bucket_v) = bucket.into_mut_refs();
+ debug_assert!(k == *bucket_k);
// Key already exists. Get its reference.
- found_existing(bucket_v, v);
- return unsafe {&mut *bv};
+ found_existing(bucket_k, bucket_v, v);
+ return bucket_v;
}
}
if (ib as int) < robin_ib {
// Found a luckier bucket than me. Better steal his spot.
- let (mut hash, mut k, mut v) = bucket.replace(hash, k, v);
- let robin_index = bucket.index();
- let mut robin_ib = robin_ib as uint;
- let mut rbucket = bucket.next();
- loop {
- let mut bucket = match rbucket.peek() {
- table::Empty(bucket) => {
- // Found a hole!
- let b = bucket.put(hash, k, v);
- // Now that it's stolen, just read the value's pointer
- // right out of the table!
- let (_, v) = match Bucket::at_index(b.into_table(), robin_index).peek() {
- table::Full(b) => b.into_mut_refs(),
- _ => fail!()
- };
- return v;
- },
- table::Full(bucket) => bucket
- };
-
- let probe_ib = bucket.index() - bucket.distance();
-
- // Robin hood! Steal the spot.
- if robin_ib < probe_ib {
- robin_ib = probe_ib;
- let (old_hash, old_key, old_val) = bucket.replace(hash, k, v);
- hash = old_hash;
- k = old_key;
- v = old_val;
- }
- rbucket = bucket.next();
- if rbucket.index() == ib + size + 1 {
- fail!("HashMap fatal error: 100% load factor?")
- }
- }
- }
- rbucket = bucket.next();
- if rbucket.index() == ib + size + 1 {
- fail!("Internal HashMap error: Out of space.")
+ return robin_hood(bucket, robin_ib as uint, hash, k, v);
}
+
+ probe = bucket.next();
+ assert!(probe.index() != ib + size + 1);
}
}
/// Inserts an element which has already been hashed, returning a reference
/// to that element inside the hashtable. This is more efficient that using
/// `insert`, since the key will not be rehashed.
- fn insert_hashed<'a>(&'a mut self, hash: table::SafeHash, k: K, v: V) -> &'a mut V {
+ fn insert_hashed(&mut self, hash: SafeHash, k: K, v: V) -> &mut V {
let potential_new_size = self.table.size() + 1;
self.make_some_room(potential_new_size);
self.insert_hashed_nocheck(hash, k, v)
/// // Find the existing key
/// assert_eq!(*map.find_or_insert("a", -2), 1);
/// ```
- pub fn find_or_insert<'a>(&'a mut self, k: K, v: V) -> &'a mut V {
+ pub fn find_or_insert(&mut self, k: K, v: V) -> &mut V {
self.find_with_or_insert_with(k, v, |_k, _v, _a| (), |_k, a| a)
}
v: V,
f: |&K, &mut V|)
-> &'a mut V {
- self.find_with_or_insert_with(k, v, |k, v, _a| f(k, v), |_k, a| a)
+ let potential_new_size = self.table.size() + 1;
+ self.make_some_room(potential_new_size);
+
+ let hash = self.make_hash(&k);
+ self.insert_or_replace_with(hash, k, v, |kref, vref, _v| f(kref, vref))
}
/// Modify and return the value corresponding to the key in the map, or
a: A,
found: |&K, &mut V, A|,
not_found: |&K, A| -> V)
- -> &'a mut V {
+ -> &'a mut V
+ {
let hash = self.make_hash(&k);
- {
- match search_hashed(&mut self.table, &hash, &k) {
- Some(bucket) => {
- let (_, v_ref) = bucket.into_mut_refs();
- found(&k, v_ref, a);
- return v_ref;
- }
- _ => {
- }
- };
+ let this = MapMutRef { map_ref: self };
+
+ match search_hashed(this, &hash, &k) {
+ FoundExisting(bucket) => {
+ let (_, v_ref) = bucket.into_mut_refs();
+ found(&k, v_ref, a);
+ v_ref
+ }
+ TableRef(this) => {
+ let v = not_found(&k, a);
+ this.map_ref.insert_hashed(hash, k, v)
+ }
}
- let v = not_found(&k, a);
- self.insert_hashed(hash, k, v)
}
/// Retrieves a value for the given key.
/// println!("{}", key);
/// }
/// ```
- pub fn keys<'a>(&'a self) -> Keys<'a, K, V> {
+ pub fn keys(&self) -> Keys<K, V> {
self.iter().map(|(k, _v)| k)
}
/// println!("{}", key);
/// }
/// ```
- pub fn values<'a>(&'a self) -> Values<'a, K, V> {
+ pub fn values(&self) -> Values<K, V> {
self.iter().map(|(_k, v)| v)
}
/// println!("key: {} val: {}", key, val);
/// }
/// ```
- pub fn iter<'a>(&'a self) -> Entries<'a, K, V> {
- self.table.iter()
+ pub fn iter(&self) -> Entries<K, V> {
+ Entries { inner: self.table.iter() }
}
/// An iterator visiting all key-value pairs in arbitrary order,
/// println!("key: {} val: {}", key, val);
/// }
/// ```
- pub fn mut_iter<'a>(&'a mut self) -> MutEntries<'a, K, V> {
- self.table.mut_iter()
+ pub fn mut_iter(&mut self) -> MutEntries<K, V> {
+ MutEntries { inner: self.table.mut_iter() }
}
/// Creates a consuming iterator, that is, one that moves each key-value
/// let vec: Vec<(&str, int)> = map.move_iter().collect();
/// ```
pub fn move_iter(self) -> MoveEntries<K, V> {
- self.table.move_iter().map(|(_, k, v)| (k, v))
+ MoveEntries {
+ inner: self.table.move_iter().map(|(_, k, v)| (k, v))
+ }
}
}
fn eq(&self, other: &HashMap<K, V, H>) -> bool {
if self.len() != other.len() { return false; }
- self.iter()
- .all(|(key, value)| {
- match other.find(key) {
- None => false,
- Some(v) => *value == *v
- }
- })
+ self.iter().all(|(key, value)|
+ other.find(key).map_or(false, |v| *value == *v)
+ )
}
}
}*/
/// HashMap iterator
-pub type Entries<'a, K, V> = table::Entries<'a, K, V>;
+pub struct Entries<'a, K: 'a, V: 'a> {
+ inner: table::Entries<'a, K, V>
+}
/// HashMap mutable values iterator
-pub type MutEntries<'a, K, V> = table::MutEntries<'a, K, V>;
+pub struct MutEntries<'a, K: 'a, V: 'a> {
+ inner: table::MutEntries<'a, K, V>
+}
/// HashMap move iterator
-pub type MoveEntries<K, V> =
- iter::Map<'static, (table::SafeHash, K, V), (K, V), table::MoveEntries<K, V>>;
+pub struct MoveEntries<K, V> {
+ inner: iter::Map<'static, (SafeHash, K, V), (K, V), table::MoveEntries<K, V>>
+}
+
+impl<'a, K, V> Iterator<(&'a K, &'a V)> for Entries<'a, K, V> {
+ #[inline]
+ fn next(&mut self) -> Option<(&'a K, &'a V)> {
+ self.inner.next()
+ }
+ #[inline]
+ fn size_hint(&self) -> (uint, Option<uint>) {
+ self.inner.size_hint()
+ }
+}
+
+impl<'a, K, V> Iterator<(&'a K, &'a mut V)> for MutEntries<'a, K, V> {
+ #[inline]
+ fn next(&mut self) -> Option<(&'a K, &'a mut V)> {
+ self.inner.next()
+ }
+ #[inline]
+ fn size_hint(&self) -> (uint, Option<uint>) {
+ self.inner.size_hint()
+ }
+}
+
+impl<K, V> Iterator<(K, V)> for MoveEntries<K, V> {
+ #[inline]
+ fn next(&mut self) -> Option<(K, V)> {
+ self.inner.next()
+ }
+ #[inline]
+ fn size_hint(&self) -> (uint, Option<uint>) {
+ self.inner.size_hint()
+ }
+}
/// HashMap keys iterator
pub type Keys<'a, K, V> =
k: uint
}
-
impl Dropable {
fn new(k: uint) -> Dropable {
let v = drop_vector.get().unwrap();
hm
};
+ // By the way, ensure that cloning doesn't screw up the dropping.
drop(hm.clone());
{
assert_eq!(*m.find(&1).unwrap(), 2);
}
+ #[test]
+ fn test_update_with() {
+ let mut m = HashMap::with_capacity(4);
+ assert!(m.insert(1i, 2i));
+
+ for i in range(1i, 1000) {
+ assert_eq!(
+ i + 2,
+ *m.insert_or_update_with(i + 1, i + 2, |_k, _v| {
+ fail!("Key not yet present");
+ })
+ );
+ assert_eq!(
+ i + 1,
+ *m.insert_or_update_with(i, i + 3, |k, v| {
+ assert_eq!(*k, i);
+ assert_eq!(*v, i + 1);
+ })
+ );
+ }
+ }
+
#[test]
fn test_conflict_remove() {
let mut m = HashMap::with_capacity(4);
m.insert(i, i);
i += 1;
}
+ // three quarters full
assert_eq!(m.len(), i);
assert_eq!(m.table.capacity(), cap);
m.insert(i, i);
i += 1;
}
+ // half full
let new_cap = m.table.capacity();
assert_eq!(new_cap, cap * 2);
- for _ in range(0, cap / 2) {
+ for _ in range(0, cap / 2 - 1) {
i -= 1;
m.remove(&i);
assert_eq!(m.table.capacity(), new_cap);
}
-
+ // A little more than one quarter full.
+ // Shrinking starts as we remove more elements:
for _ in range(0, cap / 2 - 1) {
i -= 1;
m.remove(&i);
pub use self::map::HashMap;
pub use self::map::Entries;
+pub use self::map::MutEntries;
pub use self::map::MoveEntries;
pub use self::map::Keys;
pub use self::map::Values;
use default::Default;
use fmt::Show;
use fmt;
-use RandomSipHasher;
-use hash::{Hash, Hasher};
+use hash::{Hash, Hasher, RandomSipHasher};
use iter::{Iterator, FromIterator, FilterMap, Chain, Repeat, Zip, Extendable};
use iter;
use option::{Some, None};
use super::{HashMap, Entries, MoveEntries, INITIAL_CAPACITY};
-/// HashSet iterator
-pub type SetItems<'a, K> =
- iter::Map<'static, (&'a K, &'a ()), &'a K, Entries<'a, K, ()>>;
-/// HashSet move iterator
-pub type SetMoveItems<K> =
- iter::Map<'static, (K, ()), K, MoveEntries<K, ()>>;
+// Future Optimization (FIXME!)
+// =============================
+//
+// Iteration over zero sized values is a noop. There is no need
+// for `bucket.val` in the case of HashSet. I suppose we would need HKT
+// to get rid of it properly.
/// An implementation of a hash set using the underlying representation of a
/// HashMap where the value is (). As with the `HashMap` type, a `HashSet`
}
}
+/// HashSet iterator
+pub type SetItems<'a, K> =
+ iter::Map<'static, (&'a K, &'a ()), &'a K, Entries<'a, K, ()>>;
+
+/// HashSet move iterator
+pub type SetMoveItems<K> =
+ iter::Map<'static, (K, ()), K, MoveEntries<K, ()>>;
+
// `Repeat` is used to feed the filter closure an explicit capture
// of a reference to the other set
/// Set operations iterator
use cmp;
use hash::{Hash, Hasher};
use iter::{Iterator, count};
+use kinds::marker;
use mem::{min_align_of, size_of};
use mem;
-use num::{CheckedMul, is_power_of_two};
+use num::{CheckedAdd, CheckedMul, is_power_of_two};
use ops::{Deref, DerefMut, Drop};
use option::{Some, None, Option};
-use ptr::RawPtr;
-use ptr::set_memory;
-use ptr::write;
+use ptr::{RawPtr, copy_nonoverlapping_memory, zero_memory};
use ptr;
use rt::heap::{allocate, deallocate};
/// `Vec<Option<u64, K, V>>`, because we don't pay for the overhead of an
/// option on every element, and we get a generally more cache-aware design.
///
-/// Key invariants of this structure:
+/// Essential invariants of this structure:
///
-/// - if hashes[i] == EMPTY_BUCKET, then keys[i] and vals[i] have
-/// 'undefined' contents. Don't read from them. This invariant is
-/// enforced outside this module with the `EmptyIndex`, `FullIndex`,
+/// - if t.hashes[i] == EMPTY_BUCKET, then `Bucket::at_index(&t, i).raw`
+/// points to 'undefined' contents. Don't read from it. This invariant is
+/// enforced outside this module with the `EmptyBucket`, `FullBucket`,
/// and `SafeHash` types.
///
-/// - An `EmptyIndex` is only constructed for a bucket at an index with
+/// - An `EmptyBucket` is only constructed at an index with
/// a hash of EMPTY_BUCKET.
///
-/// - A `FullIndex` is only constructed for a bucket at an index with a
+/// - A `FullBucket` is only constructed at an index with a
/// non-EMPTY_BUCKET hash.
///
/// - A `SafeHash` is only constructed for non-`EMPTY_BUCKET` hash. We get
/// `capacity`. This is set at creation and never changes. The arrays
/// are unzipped to save space (we don't have to pay for the padding
/// between odd sized elements, such as in a map from u64 to u8), and
-/// be more cache aware (scanning through 8 hashes brings in 2 cache
-/// lines, since they're all right beside each other).
+/// be more cache aware (scanning through 8 hashes brings in at most
+/// 2 cache lines, since they're all right beside each other).
///
/// You can kind of think of this module/data structure as a safe wrapper
/// around just the "table" part of the hashtable. It enforces some
/// invariants at the type level and employs some performance trickery,
/// but in general is just a tricked out `Vec<Option<u64, K, V>>`.
-///
-/// FIXME(cgaebel):
-///
-/// Feb 11, 2014: This hashtable was just implemented, and, hard as I tried,
-/// isn't yet totally safe. There's a "known exploit" that you can create
-/// multiple FullIndexes for a bucket, `take` one, and then still `take`
-/// the other causing undefined behavior. Currently, there's no story
-/// for how to protect against this statically. Therefore, there are asserts
-/// on `take`, `get`, `get_mut`, and `put` which check the bucket state.
-/// With time, and when we're confident this works correctly, they should
-/// be removed. Also, the bounds check in `peek` is especially painful,
-/// as that's called in the innermost loops of the hashtable and has the
-/// potential to be a major performance drain. Remove this too.
-///
-/// Or, better than remove, only enable these checks for debug builds.
-/// There's currently no "debug-only" asserts in rust, so if you're reading
-/// this and going "what? of course there are debug-only asserts!", then
-/// please make this use them!
#[unsafe_no_drop_flag]
pub struct RawTable<K, V> {
capacity: uint,
size: uint,
- hashes: *mut u64
-}
-
-/// A bucket that holds a reference to the table
-pub trait BucketWithTable<M> {
- /// A bucket that holds a reference to the table
- fn table<'a>(&'a self) -> &'a M;
-
- /// Move out the reference to the table.
- fn into_table(self) -> M;
-
- /// Get the raw index.
- fn index(&self) -> uint;
+ hashes: *mut u64,
+ // Because K/V do not appear directly in any of the types in the struct,
+ // inform rustc that in fact instances of K and V are reachable from here.
+ marker: marker::CovariantType<(K,V)>,
}
struct RawBucket<K, V> {
table: M
}
-pub type EmptyBucketImm<'table,K,V> = EmptyBucket<K, V, &'table RawTable<K,V>>;
-pub type FullBucketImm<'table,K,V> = FullBucket<K, V, &'table RawTable<K,V>>;
+pub type EmptyBucketImm<'table, K, V> = EmptyBucket<K, V, &'table RawTable<K, V>>;
+pub type FullBucketImm<'table, K, V> = FullBucket<K, V, &'table RawTable<K, V>>;
-pub type EmptyBucketMut<'table,K,V> = EmptyBucket<K, V, &'table mut RawTable<K,V>>;
-pub type FullBucketMut<'table,K,V> = FullBucket<K, V, &'table mut RawTable<K,V>>;
+pub type EmptyBucketMut<'table, K, V> = EmptyBucket<K, V, &'table mut RawTable<K, V>>;
+pub type FullBucketMut<'table, K, V> = FullBucket<K, V, &'table mut RawTable<K, V>>;
+pub enum BucketState<K, V, M> {
+ Empty(EmptyBucket<K, V, M>),
+ Full(FullBucket<K, V, M>),
+}
+
+// A GapThenFull encapsulates the state of two consecutive buckets at once.
+// The first bucket, called the gap, is known to be empty.
+// The second bucket is full.
struct GapThenFull<K, V, M> {
gap: EmptyBucket<K, V, ()>,
- full: FullBucket<K, V, M>
+ full: FullBucket<K, V, M>,
}
-impl<K, V, M: Deref<RawTable<K,V>>> GapThenFull<K, V, M> {
- pub fn full<'a>(&'a self) -> &'a FullBucket<K, V, M> {
- &self.full
- }
-
- pub fn shift(mut self) -> Option<GapThenFull<K, V, M>> {
- unsafe {
- *self.gap.raw.hash = mem::replace(&mut *self.full.raw.hash, EMPTY_BUCKET);
- mem::overwrite(self.gap.raw.key, ptr::read(self.full.raw.key as *const K));
- mem::overwrite(self.gap.raw.val, ptr::read(self.full.raw.val as *const V));
- }
-
- let FullBucket { raw, idx, .. } = self.full;
-
- match self.full.next().peek() {
- Empty(_) => None,
- Full(bucket) => {
- self.gap.raw = raw;
- self.gap.idx = idx;
+/// A hash that is not zero, since we use a hash of zero to represent empty
+/// buckets.
+#[deriving(PartialEq)]
+pub struct SafeHash {
+ hash: u64,
+}
- self.full = bucket;
- self.full.idx &= self.full.table.capacity - 1;
+impl SafeHash {
+ /// Peek at the hash value, which is guaranteed to be non-zero.
+ #[inline(always)]
+ pub fn inspect(&self) -> u64 { self.hash }
+}
- Some(self)
- }
- }
+/// We need to remove hashes of 0. That's reserved for empty buckets.
+/// This function wraps up `hash_keyed` to be the only way outside this
+/// module to generate a SafeHash.
+pub fn make_hash<T: Hash<S>, S, H: Hasher<S>>(hasher: &H, t: &T) -> SafeHash {
+ match hasher.hash(t) {
+ // This constant is exceedingly likely to hash to the same
+ // bucket, but it won't be counted as empty! Just so we can maintain
+ // our precious uniform distribution of initial indexes.
+ EMPTY_BUCKET => SafeHash { hash: 0x8000_0000_0000_0000 },
+ h => SafeHash { hash: h },
}
}
-impl<K, V> RawPtr<u64> for RawBucket<K, V> {
+// `replace` casts a `*u64` to a `*SafeHash`. Since we statically
+// ensure that a `FullBucket` points to an index with a non-zero hash,
+// and a `SafeHash` is just a `u64` with a different name, this is
+// safe.
+//
+// This test ensures that a `SafeHash` really IS the same size as a
+// `u64`. If you need to change the size of `SafeHash` (and
+// consequently made this test fail), `replace` needs to be
+// modified to no longer assume this.
+#[test]
+fn can_alias_safehash_as_u64() {
+ assert_eq!(size_of::<SafeHash>(), size_of::<u64>())
+}
+
+impl<K, V> RawBucket<K, V> {
unsafe fn offset(self, count: int) -> RawBucket<K, V> {
RawBucket {
hash: self.hash.offset(count),
val: self.val.offset(count),
}
}
+}
- fn null() -> RawBucket<K, V> {
- RawBucket {
- hash: RawPtr::null(),
- key: RawPtr::null(),
- val: RawPtr::null()
+// For parameterizing over mutability.
+impl<'t, K, V> Deref<RawTable<K, V>> for &'t RawTable<K, V> {
+ fn deref(&self) -> &RawTable<K, V> {
+ &**self
+ }
+}
+
+impl<'t, K, V> Deref<RawTable<K, V>> for &'t mut RawTable<K, V> {
+ fn deref(&self) -> &RawTable<K,V> {
+ &**self
+ }
+}
+
+impl<'t, K, V> DerefMut<RawTable<K, V>> for &'t mut RawTable<K, V> {
+ fn deref_mut(&mut self) -> &mut RawTable<K,V> {
+ &mut **self
+ }
+}
+
+// Buckets hold references to the table.
+impl<K, V, M> FullBucket<K, V, M> {
+ /// Borrow a reference to the table.
+ pub fn table(&self) -> &M {
+ &self.table
+ }
+ /// Move out the reference to the table.
+ pub fn into_table(self) -> M {
+ self.table
+ }
+ /// Get the raw index.
+ pub fn index(&self) -> uint {
+ self.idx
+ }
+}
+
+impl<K, V, M> EmptyBucket<K, V, M> {
+ /// Borrow a reference to the table.
+ pub fn table(&self) -> &M {
+ &self.table
+ }
+ /// Move out the reference to the table.
+ pub fn into_table(self) -> M {
+ self.table
+ }
+}
+
+impl<K, V, M> Bucket<K, V, M> {
+ /// Move out the reference to the table.
+ pub fn into_table(self) -> M {
+ self.table
+ }
+ /// Get the raw index.
+ pub fn index(&self) -> uint {
+ self.idx
+ }
+}
+
+impl<K, V, M: Deref<RawTable<K, V>>> Bucket<K, V, M> {
+ pub fn new(table: M, hash: &SafeHash) -> Bucket<K, V, M> {
+ Bucket::at_index(table, hash.inspect() as uint)
+ }
+
+ pub fn at_index(table: M, ib_index: uint) -> Bucket<K, V, M> {
+ let ib_index = ib_index & (table.capacity() - 1);
+ Bucket {
+ raw: unsafe {
+ table.first_bucket_raw().offset(ib_index as int)
+ },
+ idx: ib_index,
+ table: table
}
}
- fn is_null(&self) -> bool {
- self.hash.is_null()
+ pub fn first(table: M) -> Bucket<K, V, M> {
+ Bucket {
+ raw: table.first_bucket_raw(),
+ idx: 0,
+ table: table
+ }
}
- fn to_uint(&self) -> uint {
- self.hash.to_uint()
+ /// Reads a bucket at a given index, returning an enum indicating whether
+ /// it's initialized or not. You need to match on this enum to get
+ /// the appropriate types to call most of the other functions in
+ /// this module.
+ pub fn peek(self) -> BucketState<K, V, M> {
+ match unsafe { *self.raw.hash } {
+ EMPTY_BUCKET =>
+ Empty(EmptyBucket {
+ raw: self.raw,
+ idx: self.idx,
+ table: self.table
+ }),
+ _ =>
+ Full(FullBucket {
+ raw: self.raw,
+ idx: self.idx,
+ table: self.table
+ })
+ }
}
- unsafe fn to_option(&self) -> Option<&u64> {
- self.hash.to_option()
+ /// Modifies the bucket pointer in place to make it point to the next slot.
+ pub fn next(&mut self) {
+ // Branchless bucket iteration step.
+ // As we reach the end of the table...
+ // We take the current idx: 0111111b
+ // Xor it by its increment: ^ 1000000b
+ // ------------
+ // 1111111b
+ // Then AND with the capacity: & 1000000b
+ // ------------
+ // to get the backwards offset: 1000000b
+ // ... and it's zero at all other times.
+ let maybe_wraparound_dist = (self.idx ^ (self.idx + 1)) & self.table.capacity();
+ // Finally, we obtain the offset 1 or the offset -cap + 1.
+ let dist = 1i - (maybe_wraparound_dist as int);
+
+ self.idx += 1;
+
+ unsafe {
+ self.raw = self.raw.offset(dist);
+ }
}
}
-impl<K, V, M: Deref<RawTable<K,V>>> EmptyBucket<K, V, M> {
+impl<K, V, M: Deref<RawTable<K, V>>> EmptyBucket<K, V, M> {
+ #[inline]
pub fn next(self) -> Bucket<K, V, M> {
let mut bucket = self.into_bucket();
bucket.next();
bucket
}
+ #[inline]
pub fn into_bucket(self) -> Bucket<K, V, M> {
Bucket {
raw: self.raw,
};
match self.next().peek() {
- Empty(_) => None,
Full(bucket) => {
Some(GapThenFull {
gap: gap,
full: bucket
})
}
+ Empty(..) => None
}
}
}
-impl<K, V, M: DerefMut<RawTable<K,V>>> EmptyBucket<K, V, M> {
+impl<K, V, M: DerefMut<RawTable<K, V>>> EmptyBucket<K, V, M> {
+ /// Puts given key and value pair, along with the key's hash,
+ /// into this bucket in the hashtable. Note how `self` is 'moved' into
+ /// this function, because this slot will no longer be empty when
+ /// we return! A `FullBucket` is returned for later use, pointing to
+ /// the newly-filled slot in the hashtable.
+ ///
+ /// Use `make_hash` to construct a `SafeHash` to pass to this function.
pub fn put(mut self, hash: SafeHash, key: K, value: V)
-> FullBucket<K, V, M> {
unsafe {
*self.raw.hash = hash.inspect();
- write(self.raw.key, key);
- write(self.raw.val, value);
+ ptr::write(self.raw.key, key);
+ ptr::write(self.raw.val, value);
}
self.table.size += 1;
}
}
-impl<K, V, M: Deref<RawTable<K,V>>> FullBucket<K, V, M> {
+impl<K, V, M: Deref<RawTable<K, V>>> FullBucket<K, V, M> {
+ #[inline]
pub fn next(self) -> Bucket<K, V, M> {
let mut bucket = self.into_bucket();
bucket.next();
bucket
}
+ #[inline]
pub fn into_bucket(self) -> Bucket<K, V, M> {
Bucket {
raw: self.raw,
}
}
+ /// Get the distance between this bucket and the 'ideal' location
+ /// as determined by the key's hash stored in it.
+ ///
+ /// In the cited blog posts above, this is called the "distance to
+ /// initial bucket", or DIB. Also known as "probe count".
pub fn distance(&self) -> uint {
+ // Calculates the distance one has to travel when going from
+ // `hash mod capacity` onwards to `idx mod capacity`, wrapping around
+ // if the destination is not reached before the end of the table.
(self.idx - self.hash().inspect() as uint) & (self.table.capacity() - 1)
}
+ #[inline]
pub fn hash(&self) -> SafeHash {
unsafe {
SafeHash {
}
}
- pub fn read<'a>(&'a self) -> (&'a K, &'a V) {
- unsafe {
- (&*self.raw.key,
- &*self.raw.val)
- }
- }
-
- pub fn into_refs(self) -> (&K, &V) {
+ /// Gets references to the key and value at a given index.
+ pub fn read(&self) -> (&K, &V) {
unsafe {
- // debug_assert!(*self.raw.hash != EMPTY_BUCKET);
(&*self.raw.key,
&*self.raw.val)
}
}
}
-impl<K, V, M: DerefMut<RawTable<K,V>>> FullBucket<K, V, M> {
+impl<K, V, M: DerefMut<RawTable<K, V>>> FullBucket<K, V, M> {
+ /// Removes this bucket's key and value from the hashtable.
+ ///
+ /// This works similarly to `put`, building an `EmptyBucket` out of the
+ /// taken bucket.
pub fn take(mut self) -> (EmptyBucket<K, V, M>, K, V) {
let key = self.raw.key as *const K;
let val = self.raw.val as *const V;
}
}
- pub fn read_mut<'a>(&'a self) -> (&'a mut K, &'a mut V) {
+ /// Gets mutable references to the key and value at a given index.
+ pub fn read_mut(&mut self) -> (&mut K, &mut V) {
unsafe {
- // debug_assert!(*self.raw.hash != EMPTY_BUCKET);
(&mut *self.raw.key,
&mut *self.raw.val)
}
}
+}
- pub fn into_mut_refs(self) -> (&mut K, &mut V) {
+impl<'t, K, V, M: Deref<RawTable<K, V>> + 't> FullBucket<K, V, M> {
+ /// Exchange a bucket state for immutable references into the table.
+ /// Because the underlying reference to the table is also consumed,
+ /// no further changes to the structure of the table are possible;
+ /// in exchange for this, the returned references have a longer lifetime
+ /// than the references returned by `read()`.
+ pub fn into_refs(self) -> (&'t K, &'t V) {
unsafe {
- // debug_assert!(*self.raw.hash != EMPTY_BUCKET);
- (&mut *self.raw.key,
- &mut *self.raw.val)
+ (&*self.raw.key,
+ &*self.raw.val)
}
}
}
-impl<K, V, M: Deref<RawTable<K,V>>> Bucket<K, V, M> {
- pub fn new(table: M, hash: &SafeHash) -> Bucket<K, V, M> {
- let ib_index = (hash.inspect() as uint) & (table.capacity() - 1);
- Bucket {
- raw: unsafe {
- table.as_mut_ptrs().offset(ib_index as int)
- },
- idx: ib_index,
- table: table
- }
- }
-
- pub fn at_index(table: M, ib_index: uint) -> Bucket<K, V, M> {
- let ib_index = ib_index & (table.capacity() - 1);
- Bucket {
- raw: unsafe {
- table.as_mut_ptrs().offset(ib_index as int)
- },
- idx: ib_index,
- table: table
- }
- }
-
- pub fn first(table: M) -> Bucket<K, V, M> {
- Bucket {
- raw: table.as_mut_ptrs(),
- idx: 0,
- table: table
- }
- }
-
- pub fn peek(self) -> BucketState<K, V, M> {
- match unsafe { *self.raw.hash } {
- EMPTY_BUCKET =>
- Empty(EmptyBucket {
- raw: self.raw,
- idx: self.idx,
- table: self.table
- }),
- _ =>
- Full(FullBucket {
- raw: self.raw,
- idx: self.idx,
- table: self.table
- })
- }
- }
-
- pub fn next(&mut self) {
- self.idx += 1;
-
- let dist = if self.idx == self.table.capacity() {
- -(self.table.capacity() as int - 1)
- } else {
- 1i
- };
-
+impl<'t, K, V, M: DerefMut<RawTable<K, V>> + 't> FullBucket<K, V, M> {
+ /// This works similarly to `into_refs`, exchanging a bucket state
+ /// for mutable references into the table.
+ pub fn into_mut_refs(self) -> (&'t mut K, &'t mut V) {
unsafe {
- self.raw = self.raw.offset(dist);
+ (&mut *self.raw.key,
+ &mut *self.raw.val)
}
}
}
-impl<K, V, M> BucketWithTable<M> for FullBucket<K, V, M> {
- fn table<'a>(&'a self) -> &'a M {
- &self.table
- }
-
- fn into_table(self) -> M {
- self.table
- }
-
- fn index(&self) -> uint {
- self.idx
- }
-}
-
-impl<K, V, M> BucketWithTable<M> for EmptyBucket<K, V, M> {
- fn table<'a>(&'a self) -> &'a M {
- &self.table
- }
-
- fn into_table(self) -> M {
- self.table
- }
-
- fn index(&self) -> uint {
- self.idx
+impl<K, V, M> BucketState<K, V, M> {
+ // For convenience.
+ pub fn expect_full(self) -> FullBucket<K, V, M> {
+ match self {
+ Full(full) => full,
+ Empty(..) => fail!("Expected full bucket")
+ }
}
}
-impl<K, V, M> BucketWithTable<M> for Bucket<K, V, M> {
- fn table<'a>(&'a self) -> &'a M {
- &self.table
+impl<K, V, M: Deref<RawTable<K, V>>> GapThenFull<K, V, M> {
+ #[inline]
+ pub fn full(&self) -> &FullBucket<K, V, M> {
+ &self.full
}
- fn into_table(self) -> M {
- self.table
- }
+ pub fn shift(mut self) -> Option<GapThenFull<K, V, M>> {
+ unsafe {
+ *self.gap.raw.hash = mem::replace(&mut *self.full.raw.hash, EMPTY_BUCKET);
+ copy_nonoverlapping_memory(self.gap.raw.key, self.full.raw.key as *const K, 1);
+ copy_nonoverlapping_memory(self.gap.raw.val, self.full.raw.val as *const V, 1);
+ }
- fn index(&self) -> uint {
- self.idx
- }
-}
+ let FullBucket { raw: prev_raw, idx: prev_idx, .. } = self.full;
-impl<'table,K,V> Deref<RawTable<K,V>> for &'table RawTable<K,V> {
- fn deref<'a>(&'a self) -> &'a RawTable<K,V> {
- &**self
- }
-}
+ match self.full.next().peek() {
+ Full(bucket) => {
+ self.gap.raw = prev_raw;
+ self.gap.idx = prev_idx;
-impl<'table,K,V> Deref<RawTable<K,V>> for &'table mut RawTable<K,V> {
- fn deref<'a>(&'a self) -> &'a RawTable<K,V> {
- &**self
- }
-}
+ self.full = bucket;
-impl<'table,K,V> DerefMut<RawTable<K,V>> for &'table mut RawTable<K,V> {
- fn deref_mut<'a>(&'a mut self) -> &'a mut RawTable<K,V> {
- &mut **self
+ Some(self)
+ }
+ Empty(..) => None
+ }
}
}
-pub enum BucketState<K, V, M> {
- Empty(EmptyBucket<K, V, M>),
- Full(FullBucket<K, V, M>),
-}
-
-/// A hash that is not zero, since we use a hash of zero to represent empty
-/// buckets.
-#[deriving(PartialEq)]
-pub struct SafeHash {
- hash: u64,
-}
-
-impl SafeHash {
- /// Peek at the hash value, which is guaranteed to be non-zero.
- #[inline(always)]
- pub fn inspect(&self) -> u64 { self.hash }
-}
-
-/// We need to remove hashes of 0. That's reserved for empty buckets.
-/// This function wraps up `hash_keyed` to be the only way outside this
-/// module to generate a SafeHash.
-pub fn make_hash<T: Hash<S>, S, H: Hasher<S>>(hasher: &H, t: &T) -> SafeHash {
- match hasher.hash(t) {
- // This constant is exceedingly likely to hash to the same
- // bucket, but it won't be counted as empty!
- EMPTY_BUCKET => SafeHash { hash: 0x8000_0000_0000_0000 },
- h => SafeHash { hash: h },
- }
-}
+/// Rounds up to a multiple of a power of two. Returns the closest multiple
+/// of `target_alignment` that is higher or equal to `unrounded`.
+///
+/// # Failure
+///
+/// Fails if `target_alignment` is not a power of two.
fn round_up_to_next(unrounded: uint, target_alignment: uint) -> uint {
assert!(is_power_of_two(target_alignment));
(unrounded + target_alignment - 1) & !(target_alignment - 1)
}
impl<K, V> RawTable<K, V> {
-
/// Does not initialize the buckets. The caller should ensure they,
/// at the very least, set every hash to EMPTY_BUCKET.
unsafe fn new_uninitialized(capacity: uint) -> RawTable<K, V> {
size: 0,
capacity: 0,
hashes: 0 as *mut u64,
+ marker: marker::CovariantType,
};
}
let hashes_size = capacity.checked_mul(&size_of::<u64>())
capacity: capacity,
size: 0,
hashes: hashes,
+ marker: marker::CovariantType,
}
}
- fn as_mut_ptrs(&self) -> RawBucket<K, V> {
+ fn first_bucket_raw(&self) -> RawBucket<K, V> {
let hashes_size = self.capacity * size_of::<u64>();
let keys_size = self.capacity * size_of::<K>();
- let keys_offset = (hashes_size + min_align_of::< K >() - 1) & !(min_align_of::< K >() - 1);
+ let keys_offset = (hashes_size + min_align_of::<K>() - 1) & !(min_align_of::<K>() - 1);
let end_of_keys = keys_offset + keys_size;
- let vals_offset = (end_of_keys + min_align_of::< V >() - 1) & !(min_align_of::< V >() - 1);
+ let vals_offset = (end_of_keys + min_align_of::<V>() - 1) & !(min_align_of::<V>() - 1);
let buffer = self.hashes as *mut u8;
pub fn new(capacity: uint) -> RawTable<K, V> {
unsafe {
let ret = RawTable::new_uninitialized(capacity);
- set_memory(ret.hashes, 0u8, capacity);
+ zero_memory(ret.hashes, capacity);
ret
}
}
self.size
}
- fn ptrs<'a>(&'a self) -> RawBuckets<'a, K, V> {
+ fn raw_buckets(&self) -> RawBuckets<K, V> {
RawBuckets {
- raw: self.as_mut_ptrs(),
+ raw: self.first_bucket_raw(),
hashes_end: unsafe {
self.hashes.offset(self.capacity as int)
}
}
}
- pub fn iter<'a>(&'a self) -> Entries<'a, K, V> {
+ pub fn iter(&self) -> Entries<K, V> {
Entries {
- iter: self.ptrs(),
+ iter: self.raw_buckets(),
elems_left: self.size(),
}
}
- pub fn mut_iter<'a>(&'a mut self) -> MutEntries<'a, K, V> {
+ pub fn mut_iter(&mut self) -> MutEntries<K, V> {
MutEntries {
- iter: self.ptrs(),
+ iter: self.raw_buckets(),
elems_left: self.size(),
}
}
pub fn move_iter(self) -> MoveEntries<K, V> {
MoveEntries {
- iter: self.ptrs(),
+ iter: self.raw_buckets(),
table: self,
}
}
- pub fn rev_move_buckets<'a>(&'a mut self) -> RevMoveBuckets<'a, K, V> {
- let raw_bucket = self.as_mut_ptrs();
- unsafe {
- RevMoveBuckets {
- raw: raw_bucket.offset(self.capacity as int),
- hashes_end: raw_bucket.hash,
- elems_left: self.size
- }
+ /// Returns an iterator that copies out each entry. Used while the table
+ /// is being dropped.
+ unsafe fn rev_move_buckets(&mut self) -> RevMoveBuckets<K, V> {
+ let raw_bucket = self.first_bucket_raw();
+ RevMoveBuckets {
+ raw: raw_bucket.offset(self.capacity as int),
+ hashes_end: raw_bucket.hash,
+ elems_left: self.size
}
}
}
-pub struct RawBuckets<'a, K, V> {
+/// A raw iterator. The basis for some other iterators in this module. Although
+/// this interface is safe, it's not used outside this module.
+struct RawBuckets<'a, K, V> {
raw: RawBucket<K, V>,
hashes_end: *mut u64
}
fn next(&mut self) -> Option<RawBucket<K, V>> {
while self.raw.hash != self.hashes_end {
unsafe {
+ // We are swapping out the pointer to a bucket and replacing
+ // it with the pointer to the next one.
let prev = ptr::replace(&mut self.raw, self.raw.offset(1));
if *prev.hash != EMPTY_BUCKET {
return Some(prev);
}
}
-pub struct RevMoveBuckets<'a, K, V> {
+/// An iterator that moves out buckets in reverse order. It leaves the table
+/// in an an inconsistent state and should only be used for dropping
+/// the table's remaining entries. It's used in the implementation of Drop.
+struct RevMoveBuckets<'a, K, V> {
raw: RawBucket<K, V>,
hashes_end: *mut u64,
elems_left: uint
}
}
-// `read_all_mut` casts a `*u64` to a `*SafeHash`. Since we statically
-// ensure that a `FullIndex` points to an index with a non-zero hash,
-// and a `SafeHash` is just a `u64` with a different name, this is
-// safe.
-//
-// This test ensures that a `SafeHash` really IS the same size as a
-// `u64`. If you need to change the size of `SafeHash` (and
-// consequently made this test fail), `read_all_mut` needs to be
-// modified to no longer assume this.
-#[test]
-fn can_alias_safehash_as_u64() {
- assert_eq!(size_of::<SafeHash>(), size_of::<u64>())
-}
-
-/// Note: stage0-specific version that lacks bound.
-#[cfg(stage0)]
-pub struct Entries<'a, K, V> {
- iter: RawBuckets<'a, K, V>,
- elems_left: uint,
-}
-
/// Iterator over shared references to entries in a table.
-#[cfg(not(stage0))]
pub struct Entries<'a, K: 'a, V: 'a> {
iter: RawBuckets<'a, K, V>,
elems_left: uint,
}
-/// Note: stage0-specific version that lacks bound.
-#[cfg(stage0)]
-pub struct MutEntries<'a, K, V> {
- iter: RawBuckets<'a, K, V>,
- elems_left: uint,
-}
-
/// Iterator over mutable references to entries in a table.
-#[cfg(not(stage0))]
pub struct MutEntries<'a, K: 'a, V: 'a> {
iter: RawBuckets<'a, K, V>,
elems_left: uint,
mem::overwrite(new_buckets.raw.key, k);
mem::overwrite(new_buckets.raw.val, v);
}
- _ => {
+ Empty(..) => {
*new_buckets.raw.hash = EMPTY_BUCKET;
}
}
new_buckets.next();
buckets.next();
}
- }
+ };
new_ht.size = self.size();
if self.hashes.is_null() {
return;
}
- // This is in reverse because we're likely to have partially taken
+ // This is done in reverse because we've likely partially taken
// some elements out with `.move_iter()` from the front.
// Check if the size is 0, so we don't do a useless scan when
// dropping empty tables such as on resize.
- // Avoid double free of elements already moved out.
- for _ in self.rev_move_buckets() {}
+ // Also avoid double drop of elements that have been already moved out.
+ unsafe {
+ for _ in self.rev_move_buckets() {}
+ }
let hashes_size = self.capacity * size_of::<u64>();
let keys_size = self.capacity * size_of::<K>();
// Remember how everything was allocated out of one buffer
// during initialization? We only need one call to free here.
}
-
- self.hashes = RawPtr::null();
}
}