1 // Copyright 2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! Unordered containers, implemented as hash-tables (`HashSet` and `HashMap` types)
13 use std::container::{Container, Mutable, Map, MutableMap, Set, MutableSet};
14 use std::clone::Clone;
15 use std::cmp::{Eq, TotalEq, Equiv, max};
16 use std::default::Default;
19 use std::hash::{Hash, Hasher, sip};
21 use std::iter::{Iterator, FromIterator, Extendable};
22 use std::iter::{FilterMap, Chain, Repeat, Zip};
23 use std::iter::{range, range_inclusive};
24 use std::mem::replace;
26 use std::option::{Option, Some, None};
29 use std::result::{Ok, Err};
30 use std::slice::ImmutableVector;
33 use std::clone::Clone;
35 use std::hash::{Hash, Hasher};
36 use std::kinds::marker;
38 use std::num::CheckedMul;
39 use std::option::{Option, Some, None};
40 use std::prelude::Drop;
43 use std::rt::global_heap;
44 use std::intrinsics::{size_of, transmute, move_val_init};
45 use std::iter::{Iterator, range_step_inclusive};
47 static EMPTY_BUCKET: u64 = 0u64;
49 /// The raw hashtable, providing safe-ish access to the unzipped and highly
50 /// optimized arrays of hashes, keys, and values.
52 /// This design uses less memory and is a lot faster than the naive
53 /// `~[Option<u64, K, V>]`, because we don't pay for the overhead of an
54 /// option on every element, and we get a generally more cache-aware design.
56 /// Key invariants of this structure:
58 /// - if hashes[i] == EMPTY_BUCKET, then keys[i] and vals[i] have
59 /// 'undefined' contents. Don't read from them. This invariant is
60 /// enforced outside this module with the [EmptyIndex], [FullIndex],
61 /// and [SafeHash] types/concepts.
63 /// - An `EmptyIndex` is only constructed for a bucket at an index with
64 /// a hash of EMPTY_BUCKET.
66 /// - A `FullIndex` is only constructed for a bucket at an index with a
67 /// non-EMPTY_BUCKET hash.
69 /// - A `SafeHash` is only constructed for non-`EMPTY_BUCKET` hash. We get
70 /// around hashes of zero by changing them to 0x800_0000, which will
71 /// likely hash to the same bucket, but not be represented as "empty".
73 /// - All three "arrays represented by pointers" are the same length:
74 /// `capacity`. This is set at creation and never changes. The arrays
75 /// are unzipped to save space (we don't have to pay for the padding
76 /// between odd sized elements, such as in a map from u64 to u8), and
77 /// be more cache aware (scanning through 8 hashes brings in 2 cache
78 /// lines, since they're all right beside each other).
80 /// You can kind of think of this module/data structure as a safe wrapper
81 /// around just the "table" part of the hashtable. It enforces some
82 /// invariants at the type level and employs some performance trickery,
83 /// but in general is just a tricked out `~[Option<u64, K, V>]`.
87 /// Feb 11, 2014: This hashtable was just implemented, and, hard as I tried,
88 /// isn't yet totally safe. There's a "known exploit" that you can create
89 /// multiple FullIndexes for a bucket, `take` one, and then still `take`
90 /// the other causing undefined behavior. Currently, there's no story
91 /// for how to protect against this statically. Therefore, there are asserts
92 /// on `take`, `get`, `get_mut`, and `put` which check the bucket state.
93 /// With time, and when we're confident this works correctly, they should
94 /// be removed. Also, the bounds check in `peek` is especially painful,
95 /// as that's called in the innermost loops of the hashtable and has the
96 /// potential to be a major performance drain. Remove this too.
98 /// Or, better than remove, only enable these checks for debug builds.
99 /// There's currently no "debug-only" asserts in rust, so if you're reading
100 /// this and going "what? of course there are debug-only asserts!", then
101 /// please make this use them!
102 pub struct RawTable<K, V> {
105 priv hashes: *mut u64,
110 /// Represents an index into a `RawTable` with no key or value in it.
111 pub struct EmptyIndex {
113 priv nopod: marker::NoPod,
116 /// Represents an index into a `RawTable` with a key, value, and hash
118 pub struct FullIndex {
121 priv nopod: marker::NoPod,
125 /// Since we get the hash for free whenever we check the bucket state,
126 /// this function is provided for fast access, letting us avoid making
127 /// redundant trips back to the hashtable.
128 pub fn hash(&self) -> SafeHash { self.hash }
130 /// Same comment as with `hash`.
131 pub fn raw_index(&self) -> uint { self.idx as uint }
134 /// Represents the state of a bucket: it can either have a key/value
135 /// pair (be full) or not (be empty). You cannot `take` empty buckets,
136 /// and you cannot `put` into full buckets.
137 pub enum BucketState {
142 /// A hash that is not zero, since we use that to represent empty buckets.
144 pub struct SafeHash {
149 /// Peek at the hash value, which is guaranteed to be non-zero.
150 pub fn inspect(&self) -> u64 { self.hash }
153 /// We need to remove hashes of 0. That's reserved for empty buckets.
154 /// This function wraps up `hash_keyed` to be the only way outside this
155 /// module to generate a SafeHash.
156 pub fn make_hash<T: Hash<S>, S, H: Hasher<S>>(hasher: &H, t: &T) -> SafeHash {
157 match hasher.hash(t) {
158 // This constant is exceedingly likely to hash to the same
159 // bucket, but it won't be counted as empty!
160 EMPTY_BUCKET => SafeHash { hash: 0x8000_0000_0000_0000 },
161 h => SafeHash { hash: h },
165 impl<K, V> RawTable<K, V> {
167 /// Does not initialize the buckets. The caller should ensure they,
168 /// at the very least, set every hash to EMPTY_BUCKET.
169 unsafe fn new_uninitialized(capacity: uint) -> RawTable<K, V> {
171 capacity.checked_mul(&size_of::<u64>()).expect("capacity overflow");
173 capacity.checked_mul(&size_of::< K >()).expect("capacity overflow");
175 capacity.checked_mul(&size_of::< V >()).expect("capacity overflow");
178 The following code was my first pass at making RawTable only
179 allocate a single buffer, since that's all it needs. There's
180 no logical reason for this to require three calls to malloc.
182 However, I'm not convinced the code below is correct. If you
183 want to take a stab at it, please do! The alignment is
184 especially tricky to get right, especially if you need more
185 alignment than malloc guarantees.
187 let hashes_offset = 0;
188 let keys_offset = align_size(hashes_offset + hashes_size, keys_align);
189 let vals_offset = align_size(keys_offset + keys_size, vals_align);
190 let end = vals_offset + vals_size;
192 let buffer = global_heap::malloc_raw(end);
194 let hashes = buffer.offset(hashes_offset) as *mut u64;
195 let keys = buffer.offset(keys_offset) as *mut K;
196 let vals = buffer.offset(vals_offset) as *mut V;
200 let hashes = global_heap::malloc_raw(hashes_size) as *mut u64;
201 let keys = global_heap::malloc_raw(keys_size) as *mut K;
202 let vals = global_heap::malloc_raw(vals_size) as *mut V;
215 /// Creates a new raw table from a given capacity. All buckets are
217 pub fn new(capacity: uint) -> RawTable<K, V> {
219 let ret = RawTable::new_uninitialized(capacity);
221 for i in range(0, ret.capacity() as int) {
222 *ret.hashes.offset(i) = EMPTY_BUCKET;
229 /// Reads a bucket at a given index, returning an enum indicating whether
230 /// there's anything there or not. You need to match on this enum to get
231 /// the appropriate types to pass on to most of the rest of the functions
233 pub fn peek(&self, index: uint) -> BucketState {
235 if cfg!(test) { assert!(index < self.capacity) }
237 let idx = index as int;
238 let hash = unsafe { *self.hashes.offset(idx) };
240 let nopod = marker::NoPod;
251 hash: SafeHash { hash: full_hash },
257 /// Gets references to the key and value at a given index.
258 pub fn read<'a>(&'a self, index: &FullIndex) -> (&'a K, &'a V) {
263 if cfg!(test) { assert!(*self.hashes.offset(idx) != EMPTY_BUCKET) }
264 (&'a *self.keys.offset(idx),
265 &'a *self.vals.offset(idx))
269 /// Gets references to the key and value at a given index, with the
270 /// value's reference being mutable.
271 pub fn read_mut<'a>(&'a mut self, index: &FullIndex) -> (&'a K, &'a mut V) {
276 if cfg!(test) { assert!(*self.hashes.offset(idx) != EMPTY_BUCKET) }
277 (&'a *self.keys.offset(idx),
278 &'a mut *self.vals.offset(idx))
282 /// Read everything, mutably.
283 pub fn read_all_mut<'a>(&'a mut self, index: &FullIndex)
284 -> (&'a mut SafeHash, &'a mut K, &'a mut V) {
287 // I'm totally abusing the fact that a pointer to any u64 in the
288 // hashtable at a full index is a safe hash. Thanks to `SafeHash`
289 // just being a wrapper around u64, this is true. It's just really
290 // really really really unsafe. However, the exposed API is now
291 // impossible to get wrong. You cannot insert an empty hash into
296 if cfg!(test) { assert!(*self.hashes.offset(idx) != EMPTY_BUCKET) }
297 (transmute(self.hashes.offset(idx)),
298 &'a mut *self.keys.offset(idx),
299 &'a mut *self.vals.offset(idx))
303 /// Puts a key and value pair, along with the key's hash, into a given
304 /// index in the hashtable. Note how the `EmptyIndex` is 'moved' into this
305 /// function, because that slot will no longer be empty when we return!
306 /// Because we know this, a FullIndex is returned for later use, pointing
307 /// to the newly-filled slot in the hashtable.
309 /// Use `make_hash` to construct a `SafeHash` to pass to this function.
310 pub fn put(&mut self, index: EmptyIndex, hash: SafeHash, k: K, v: V) -> FullIndex {
315 if cfg!(test) { assert!(*self.hashes.offset(idx) == EMPTY_BUCKET) }
316 *self.hashes.offset(idx) = hash.inspect();
317 move_val_init(&mut *self.keys.offset(idx), k);
318 move_val_init(&mut *self.vals.offset(idx), v);
323 FullIndex { idx: idx, hash: hash, nopod: marker::NoPod }
326 /// Removes a key and value from the hashtable.
328 /// This works similarly to `put`, building an `EmptyIndex` out of the
330 pub fn take(&mut self, index: FullIndex) -> (EmptyIndex, K, V) {
335 if cfg!(test) { assert!(*self.hashes.offset(idx) != EMPTY_BUCKET) }
337 let hash_ptr = self.hashes.offset(idx);
339 *hash_ptr = EMPTY_BUCKET;
341 // Drop the mutable constraint.
342 let keys = self.keys as *K;
343 let vals = self.vals as *V;
345 let k = ptr::read(keys.offset(idx));
346 let v = ptr::read(vals.offset(idx));
350 (EmptyIndex { idx: idx, nopod: marker::NoPod }, k, v)
354 /// The hashtable's capacity, similar to a vector's.
355 pub fn capacity(&self) -> uint {
359 /// The number of elements ever `put` in the hashtable, minus the number
360 /// of elements ever `take`n.
361 pub fn size(&self) -> uint {
365 pub fn iter<'a>(&'a self) -> Entries<'a, K, V> {
366 Entries { table: self, idx: 0 }
369 pub fn mut_iter<'a>(&'a mut self) -> MutEntries<'a, K, V> {
370 MutEntries { table: self, idx: 0 }
373 pub fn move_iter(self) -> MoveEntries<K, V> {
374 MoveEntries { table: self, idx: 0 }
378 pub struct Entries<'a, K, V> {
379 priv table: &'a RawTable<K, V>,
383 pub struct MutEntries<'a, K, V> {
384 priv table: &'a mut RawTable<K, V>,
388 pub struct MoveEntries<K, V> {
389 priv table: RawTable<K, V>,
393 impl<'a, K, V> Iterator<(&'a K, &'a V)> for Entries<'a, K, V> {
394 fn next(&mut self) -> Option<(&'a K, &'a V)> {
395 while self.idx < self.table.capacity() {
399 match self.table.peek(i) {
401 Full(idx) => return Some(self.table.read(&idx))
408 fn size_hint(&self) -> (uint, Option<uint>) {
409 let size = self.table.size() - self.idx;
414 impl<'a, K, V> Iterator<(&'a K, &'a mut V)> for MutEntries<'a, K, V> {
415 fn next(&mut self) -> Option<(&'a K, &'a mut V)> {
416 while self.idx < self.table.capacity() {
420 match self.table.peek(i) {
422 // the transmute here fixes:
423 // error: lifetime of `self` is too short to guarantee its contents
424 // can be safely reborrowed
425 Full(idx) => unsafe {
426 return Some(transmute(self.table.read_mut(&idx)))
434 fn size_hint(&self) -> (uint, Option<uint>) {
435 let size = self.table.size() - self.idx;
440 impl<K, V> Iterator<(SafeHash, K, V)> for MoveEntries<K, V> {
441 fn next(&mut self) -> Option<(SafeHash, K, V)> {
442 while self.idx < self.table.capacity() {
446 match self.table.peek(i) {
450 let (_, k, v) = self.table.take(idx);
451 return Some((h, k, v));
459 fn size_hint(&self) -> (uint, Option<uint>) {
460 let size = self.table.size();
465 impl<K: Clone, V: Clone> Clone for RawTable<K, V> {
466 fn clone(&self) -> RawTable<K, V> {
468 let mut new_ht = RawTable::new_uninitialized(self.capacity());
470 for i in range(0, self.capacity()) {
473 *new_ht.hashes.offset(i as int) = EMPTY_BUCKET;
476 let hash = idx.hash().inspect();
477 let (k, v) = self.read(&idx);
478 *new_ht.hashes.offset(i as int) = hash;
479 move_val_init(&mut *new_ht.keys.offset(i as int), (*k).clone());
480 move_val_init(&mut *new_ht.vals.offset(i as int), (*v).clone());
485 new_ht.size = self.size();
495 impl<K, V> Drop for RawTable<K, V> {
497 // Ideally, this should be in reverse, since we're likely to have
498 // partially taken some elements out with `.move_iter()` from the
500 for i in range_step_inclusive(self.capacity as int - 1, 0, -1) {
501 // Check if the size is 0, so we don't do a useless scan when
502 // dropping empty tables such as on resize.
504 if self.size == 0 { break }
506 match self.peek(i as uint) {
508 Full(idx) => { self.take(idx); }
512 assert!(self.size == 0);
515 libc::free(self.vals as *mut libc::c_void);
516 libc::free(self.keys as *mut libc::c_void);
517 libc::free(self.hashes as *mut libc::c_void);
523 // We use this type for the load factor, to avoid floating point operations
524 // which might not be supported efficiently on some hardware.
526 // We use small u16s here to save space in the hashtable. They get upcasted
527 // to u64s when we actually use them.
528 type Fraction = (u16, u16); // (numerator, denominator)
530 // multiplication by a fraction, in a way that won't generally overflow for
531 // array sizes outside a factor of 10 of U64_MAX.
532 fn fraction_mul(lhs: uint, (num, den): Fraction) -> uint {
533 (((lhs as u64) * (num as u64)) / (den as u64)) as uint
536 static INITIAL_LOG2_CAP: uint = 5;
537 static INITIAL_CAPACITY: uint = 1 << INITIAL_LOG2_CAP; // 2^5
538 static INITIAL_LOAD_FACTOR: Fraction = (9, 10);
540 // The main performance trick in this hashmap is called Robin Hood Hashing.
541 // It gains its excellent performance from one key invariant:
543 // If an insertion collides with an existing element, and that elements
544 // "probe distance" (how far away the element is from its ideal location)
545 // is higher than how far we've already probed, swap the elements.
547 // This massively lowers variance in probe distance, and allows us to get very
548 // high load factors with good performance. The 90% load factor I use is rather
551 // > Why a load factor of 90%?
553 // In general, all the distances to inital buckets will converge on the mean.
554 // At a load factor of α, the odds of finding the target bucket after k
555 // probes is approximately 1-α^k. If we set this equal to 50% (since we converge
556 // on the mean) and set k=8 (64-byte cache line / 8-byte hash), α=0.92. I round
557 // this down to 0.90 to make the math easier on the CPU and avoid its FPU.
558 // Since on average we start the probing in the middle of a cache line, this
559 // strategy pulls in two cache lines of hashes on every lookup. I think that's
560 // pretty good, but if you want to trade off some space, it could go down to one
561 // cache line on average with an α of 0.84.
563 // > Wait, what? Where did you get 1-α^k from?
565 // On the first probe, your odds of a collision with an existing element is α.
566 // The odds of doing this twice in a row is approximatelly α^2. For three times,
567 // α^3, etc. Therefore, the odds of colliding k times is α^k. The odds of NOT
568 // colliding after k tries is 1-α^k.
570 // Future Improvements (FIXME!)
571 // ============================
573 // Allow the load factor to be changed dynamically and/or at initialization.
574 // I'm having trouble figuring out a sane API for this without exporting my
575 // hackish fraction type, while still avoiding floating point.
577 // Also, would it be possible for us to reuse storage when growing the
578 // underlying table? This is exactly the use case for 'realloc', and may
579 // be worth exploring.
581 // Future Optimizations (FIXME!)
582 // =============================
584 // The paper cited below mentions an implementation which keeps track of the
585 // distance-to-initial-bucket histogram. I'm suspicious of this approach because
586 // it requires maintaining an internal map. If this map were replaced with a
587 // hashmap, it would be faster, but now our data structure is self-referential
588 // and blows up. Also, this allows very good first guesses, but array accesses
589 // are no longer linear and in one direction, as we have now. There is also
590 // memory and cache pressure that this map would entail that would be very
591 // difficult to properly see in a microbenchmark.
593 // Another possible design choice that I made without any real reason is
594 // parameterizing the raw table over keys and values. Technically, all we need
595 // is the size and alignment of keys and values, and the code should be just as
596 // efficient (well, we might need one for power-of-two size and one for not...).
597 // This has the potential to reduce code bloat in rust executables, without
598 // really losing anything except 4 words (key size, key alignment, val size,
599 // val alignment) which can be passed in to every call of a `RawTable` function.
600 // This would definitely be an avenue worth exploring if people start complaining
601 // about the size of rust executables.
603 // There's also two optimizations that have been omitted regarding how the
604 // hashtable allocates. The first is that a hashtable which never has an element
605 // inserted should not allocate. I'm suspicious of this one, because supporting
606 // that internally gains no performance over just using an
607 // `Option<HashMap<K, V>>`, and is significantly more complicated.
609 // The second omitted allocation optimization is that right now we allocate three
610 // arrays to back the hashtable. This is wasteful. In theory, we only need one
611 // array, and each of the three original arrays can just be slices of it. This
612 // would reduce the pressure on the allocator, and will play much nicer with the
613 // rest of the system. An initial implementation is commented out in
614 // `table::RawTable::new`, but I'm not confident it works for all sane alignments,
615 // especially if a type needs more alignment than `malloc` provides.
617 /// A hash map implementation which uses linear probing with Robin
618 /// Hood bucket stealing.
620 /// The hashes are all keyed by the task-local random number generator
621 /// on creation by default, this means the ordering of the keys is
622 /// randomized, but makes the tables more resistant to
623 /// denial-of-service attacks (Hash DoS). This behaviour can be
624 /// overriden with one of the constructors.
626 /// It is required that the keys implement the `Eq` and `Hash` traits, although
627 /// this can frequently be achieved by using `#[deriving(Eq, Hash)]`.
629 /// Relevant papers/articles:
631 /// 1. Pedro Celis. ["Robin Hood Hashing"](https://cs.uwaterloo.ca/research/tr/1986/CS-86-14.pdf)
632 /// 2. Emmanuel Goossaert. ["Robin Hood
633 /// hashing"](http://codecapsule.com/2013/11/11/robin-hood-hashing/)
634 /// 3. Emmanuel Goossaert. ["Robin Hood hashing: backward shift
635 /// deletion"](http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/)
640 /// use collections::HashMap;
642 /// // type inference lets us omit an explicit type signature (which
643 /// // would be `HashMap<&str, &str>` in this example).
644 /// let mut book_reviews = HashMap::new();
646 /// // review some books.
647 /// book_reviews.insert("Adventures of Hucklebury Fin", "My favorite book.");
648 /// book_reviews.insert("Grimms' Fairy Tales", "Masterpiece.");
649 /// book_reviews.insert("Pride and Prejudice", "Very enjoyable.");
650 /// book_reviews.insert("The Adventures of Sherlock Holmes", "Eye lyked it alot.");
652 /// // check for a specific one.
653 /// if !book_reviews.contains_key(& &"Les Misérables") {
654 /// println!("We've got {} reviews, but Les Misérables ain't one.",
655 /// book_reviews.len());
658 /// // oops, this review has a lot of spelling mistakes, let's delete it.
659 /// book_reviews.remove(& &"The Adventures of Sherlock Holmes");
661 /// // look up the values associated with some keys.
662 /// let to_find = ["Pride and Prejudice", "Alice's Adventure in Wonderland"];
663 /// for book in to_find.iter() {
664 /// match book_reviews.find(book) {
665 /// Some(review) => println!("{}: {}", *book, *review),
666 /// None => println!("{} is unreviewed.", *book)
670 /// // iterate over everything.
671 /// for (book, review) in book_reviews.iter() {
672 /// println!("{}: \"{}\"", *book, *review);
676 pub struct HashMap<K, V, H = sip::SipHasher> {
677 // All hashes are keyed on these values, to prevent hash collision attacks.
680 // When size == grow_at, we double the capacity.
683 // The capacity must never drop below this.
684 priv minimum_capacity: uint,
686 priv table: table::RawTable<K, V>,
688 // We keep this at the end since it's 4-bytes, unlike everything else
689 // in this struct. Might as well save a word of padding!
690 priv load_factor: Fraction,
693 /// Get the number of elements which will force the capacity to grow.
694 fn grow_at(capacity: uint, load_factor: Fraction) -> uint {
695 fraction_mul(capacity, load_factor)
698 impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S>> HashMap<K, V, H> {
699 /// Get the number of elements which will force the capacity to shrink.
700 /// When size == self.shrink_at(), we halve the capacity.
701 fn shrink_at(&self) -> uint {
702 self.table.capacity() >> 2
705 // Probe the `idx`th bucket for a given hash, returning the index of the
708 // This exploits the power-of-two size of the hashtable. As long as this
709 // is always true, we can use a bitmask of cap-1 to do modular arithmetic.
711 // Prefer to use this with increasing values of `idx` rather than repeatedly
712 // calling `probe_next`. This reduces data-dependencies between loops, which
713 // can help the optimizer, and certainly won't hurt it. `probe_next` is
714 // simply for convenience, and is no more efficient than `probe`.
715 fn probe(&self, hash: &table::SafeHash, idx: uint) -> uint {
716 let hash_mask = self.table.capacity() - 1;
718 // So I heard a rumor that unsigned overflow is safe in rust..
719 ((hash.inspect() as uint) + idx) & hash_mask
722 // Generate the next probe in a sequence. Prefer to use 'probe' by itself,
723 // but this can sometimes be useful.
724 fn probe_next(&self, probe: uint) -> uint {
725 let hash_mask = self.table.capacity() - 1;
726 (probe + 1) & hash_mask
729 fn make_hash<X: Hash<S>>(&self, x: &X) -> table::SafeHash {
730 table::make_hash(&self.hasher, x)
733 /// Get the distance of the bucket at the given index that it lies
734 /// from its 'ideal' location.
736 /// In the cited blog posts above, this is called the "distance to
737 /// inital bucket", or DIB.
738 fn bucket_distance(&self, index_of_elem: &table::FullIndex) -> uint {
739 // where the hash of the element that happens to reside at
740 // `index_of_elem` tried to place itself first.
741 let first_probe_index = self.probe(&index_of_elem.hash(), 0);
743 let raw_index = index_of_elem.raw_index();
745 if first_probe_index <= raw_index {
746 // probe just went forward
747 raw_index - first_probe_index
749 // probe wrapped around the hashtable
750 raw_index + (self.table.capacity() - first_probe_index)
754 /// Search for a pre-hashed key.
755 fn search_hashed_generic(&self, hash: &table::SafeHash, is_match: |&K| -> bool)
756 -> Option<table::FullIndex> {
757 for num_probes in range(0u, self.table.size()) {
758 let probe = self.probe(hash, num_probes);
760 let idx = match self.table.peek(probe) {
761 table::Empty(_) => return None, // hit an empty bucket
762 table::Full(idx) => idx
765 // We can finish the search early if we hit any bucket
766 // with a lower distance to initial bucket than we've probed.
767 if self.bucket_distance(&idx) < num_probes { return None }
769 // If the hash doesn't match, it can't be this one..
770 if hash != &idx.hash() { continue }
772 let (k, _) = self.table.read(&idx);
774 // If the key doesn't match, it can't be this one..
775 if !is_match(k) { continue }
783 fn search_hashed(&self, hash: &table::SafeHash, k: &K) -> Option<table::FullIndex> {
784 self.search_hashed_generic(hash, |k_| *k == *k_)
787 fn search_equiv<Q: Hash<S> + Equiv<K>>(&self, q: &Q) -> Option<table::FullIndex> {
788 self.search_hashed_generic(&self.make_hash(q), |k| q.equiv(k))
791 /// Search for a key, yielding the index if it's found in the hashtable.
792 /// If you already have the hash for the key lying around, use
794 fn search(&self, k: &K) -> Option<table::FullIndex> {
795 self.search_hashed(&self.make_hash(k), k)
799 impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S>> Container for HashMap<K, V, H> {
800 /// Return the number of elements in the map
801 fn len(&self) -> uint { self.table.size() }
804 impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S>> Mutable for HashMap<K, V, H> {
805 /// Clear the map, removing all key-value pairs.
806 fn clear(&mut self) {
807 self.minimum_capacity = self.table.size();
809 for i in range(0, self.table.capacity()) {
810 match self.table.peek(i) {
811 table::Empty(_) => {},
812 table::Full(idx) => { self.table.take(idx); }
819 impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S>> Map<K, V> for HashMap<K, V, H> {
820 fn find<'a>(&'a self, k: &K) -> Option<&'a V> {
821 self.search(k).map(|idx| {
822 let (_, v) = self.table.read(&idx);
827 fn contains_key(&self, k: &K) -> bool {
828 self.search(k).is_some()
832 impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S>> MutableMap<K, V> for HashMap<K, V, H> {
833 fn find_mut<'a>(&'a mut self, k: &K) -> Option<&'a mut V> {
834 match self.search(k) {
837 let (_, v) = self.table.read_mut(&idx);
843 fn swap(&mut self, k: K, v: V) -> Option<V> {
844 let hash = self.make_hash(&k);
845 let potential_new_size = self.table.size() + 1;
846 self.make_some_room(potential_new_size);
848 for dib in range_inclusive(0u, self.table.size()) {
849 let probe = self.probe(&hash, dib);
851 let idx = match self.table.peek(probe) {
852 table::Empty(idx) => {
854 self.table.put(idx, hash, k, v);
857 table::Full(idx) => idx
860 if idx.hash() == hash {
861 let (bucket_k, bucket_v) = self.table.read_mut(&idx);
863 // Found an existing value.
864 return Some(replace(bucket_v, v));
868 let probe_dib = self.bucket_distance(&idx);
871 // Found a luckier bucket. This implies that the key does not
872 // already exist in the hashtable. Just do a robin hood
874 self.robin_hood(idx, probe_dib, hash, k, v);
879 // We really shouldn't be here.
880 fail!("Internal HashMap error: Out of space.");
883 fn pop(&mut self, k: &K) -> Option<V> {
884 if self.table.size() == 0 {
888 let potential_new_size = self.table.size() - 1;
889 self.make_some_room(potential_new_size);
891 let starting_index = match self.search(k) {
896 let starting_probe = starting_index.raw_index();
899 let mut probe = self.probe_next(starting_probe);
900 for _ in range(0u, self.table.size()) {
901 match self.table.peek(probe) {
902 table::Empty(_) => {}, // empty bucket. this is the end of our shifting.
903 table::Full(idx) => {
904 // Bucket that isn't us, which has a non-zero probe distance.
905 // This isn't the ending index, so keep searching.
906 if self.bucket_distance(&idx) != 0 {
907 probe = self.probe_next(probe);
911 // if we do have a bucket_distance of zero, we're at the end
912 // of what we need to shift.
921 let (_, _, retval) = self.table.take(starting_index);
923 let mut probe = starting_probe;
924 let mut next_probe = self.probe_next(probe);
926 // backwards-shift all the elements after our newly-deleted one.
927 while next_probe != ending_probe {
928 match self.table.peek(next_probe) {
930 // nothing to shift in. just empty it out.
931 match self.table.peek(probe) {
932 table::Empty(_) => {},
933 table::Full(idx) => { self.table.take(idx); }
936 table::Full(next_idx) => {
937 // something to shift. move it over!
938 let next_hash = next_idx.hash();
939 let (_, next_key, next_val) = self.table.take(next_idx);
940 match self.table.peek(probe) {
941 table::Empty(idx) => {
942 self.table.put(idx, next_hash, next_key, next_val);
944 table::Full(idx) => {
945 let (emptyidx, _, _) = self.table.take(idx);
946 self.table.put(emptyidx, next_hash, next_key, next_val);
953 next_probe = self.probe_next(next_probe);
956 // Done the backwards shift, but there's still an element left!
958 match self.table.peek(probe) {
959 table::Empty(_) => {},
960 table::Full(idx) => { self.table.take(idx); }
963 // Now we're done all our shifting. Return the value we grabbed
969 impl<K: Hash + TotalEq, V> HashMap<K, V, sip::SipHasher> {
970 /// Create an empty HashMap.
971 pub fn new() -> HashMap<K, V, sip::SipHasher> {
972 HashMap::with_capacity(INITIAL_CAPACITY)
975 pub fn with_capacity(capacity: uint) -> HashMap<K, V, sip::SipHasher> {
976 let mut r = rand::task_rng();
979 let hasher = sip::SipHasher::new_with_keys(r0, r1);
980 HashMap::with_capacity_and_hasher(capacity, hasher)
984 impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S>> HashMap<K, V, H> {
985 pub fn with_hasher(hasher: H) -> HashMap<K, V, H> {
986 HashMap::with_capacity_and_hasher(INITIAL_CAPACITY, hasher)
989 /// Create an empty HashMap with space for at least `capacity`
990 /// elements, using `hasher` to hash the keys.
992 /// Warning: `hasher` is normally randomly generated, and
993 /// is designed to allow HashMaps to be resistant to attacks that
994 /// cause many collisions and very poor performance. Setting it
995 /// manually using this function can expose a DoS attack vector.
996 pub fn with_capacity_and_hasher(capacity: uint, hasher: H) -> HashMap<K, V, H> {
997 let cap = num::next_power_of_two(max(INITIAL_CAPACITY, capacity));
1000 load_factor: INITIAL_LOAD_FACTOR,
1001 grow_at: grow_at(cap, INITIAL_LOAD_FACTOR),
1002 minimum_capacity: cap,
1003 table: table::RawTable::new(cap),
1007 /// The hashtable will never try to shrink below this size. You can use
1008 /// this function to reduce reallocations if your hashtable frequently
1009 /// grows and shrinks by large amounts.
1011 /// This function has no effect on the operational semantics of the
1012 /// hashtable, only on performance.
1013 pub fn reserve(&mut self, new_minimum_capacity: uint) {
1014 let cap = num::next_power_of_two(
1015 max(INITIAL_CAPACITY, new_minimum_capacity));
1017 self.minimum_capacity = cap;
1019 if self.table.capacity() < cap {
1024 /// Resizes the internal vectors to a new capacity. It's your responsibility to:
1025 /// 1) Make sure the new capacity is enough for all the elements, accounting
1026 /// for the load factor.
1027 /// 2) Ensure new_capacity is a power of two.
1028 fn resize(&mut self, new_capacity: uint) {
1029 assert!(self.table.size() <= new_capacity);
1030 assert!((new_capacity - 1) & new_capacity == 0);
1032 self.grow_at = grow_at(new_capacity, self.load_factor);
1034 let old_table = replace(&mut self.table, table::RawTable::new(new_capacity));
1035 let old_size = old_table.size();
1037 for (h, k, v) in old_table.move_iter() {
1038 self.manual_insert_hashed_nocheck(h, k, v);
1041 assert_eq!(self.table.size(), old_size);
1044 /// Performs any necessary resize operations, such that there's space for
1045 /// new_size elements.
1046 fn make_some_room(&mut self, new_size: uint) {
1047 let should_shrink = new_size <= self.shrink_at();
1048 let should_grow = self.grow_at <= new_size;
1051 let new_capacity = self.table.capacity() << 1;
1052 self.resize(new_capacity);
1053 } else if should_shrink {
1054 let new_capacity = self.table.capacity() >> 1;
1056 // Never shrink below the minimum capacity
1057 if self.minimum_capacity <= new_capacity {
1058 self.resize(new_capacity);
1063 /// Perform robin hood bucket stealing at the given 'index'. You must
1064 /// also pass that probe's "distance to initial bucket" so we don't have
1065 /// to recalculate it, as well as the total number of probes already done
1066 /// so we have some sort of upper bound on the number of probes to do.
1068 /// 'hash', 'k', and 'v' are the elements to robin hood into the hashtable.
1069 fn robin_hood(&mut self, mut index: table::FullIndex, mut dib_param: uint,
1070 mut hash: table::SafeHash, mut k: K, mut v: V) {
1072 let (old_hash, old_key, old_val) = {
1073 let (old_hash_ref, old_key_ref, old_val_ref) =
1074 self.table.read_all_mut(&index);
1076 let old_hash = replace(old_hash_ref, hash);
1077 let old_key = replace(old_key_ref, k);
1078 let old_val = replace(old_val_ref, v);
1080 (old_hash, old_key, old_val)
1083 let mut probe = self.probe_next(index.raw_index());
1085 for dib in range(dib_param + 1, self.table.size()) {
1086 let full_index = match self.table.peek(probe) {
1087 table::Empty(idx) => {
1089 self.table.put(idx, old_hash, old_key, old_val);
1092 table::Full(idx) => idx
1095 let probe_dib = self.bucket_distance(&full_index);
1097 // Robin hood! Steal the spot.
1098 if probe_dib < dib {
1100 dib_param = probe_dib;
1107 probe = self.probe_next(probe);
1110 fail!("HashMap fatal error: 100% load factor?");
1114 /// Manually insert a pre-hashed key-value pair, without first checking
1115 /// that there's enough room in the buckets. Returns a reference to the
1116 /// newly insert value.
1118 /// If the key already exists, the hashtable will be returned untouched
1119 /// and a reference to the existing element will be returned.
1120 fn manual_insert_hashed_nocheck<'a>(
1121 &'a mut self, hash: table::SafeHash, k: K, v: V) -> &'a mut V {
1123 for dib in range_inclusive(0u, self.table.size()) {
1124 let probe = self.probe(&hash, dib);
1126 let idx = match self.table.peek(probe) {
1127 table::Empty(idx) => {
1129 let fullidx = self.table.put(idx, hash, k, v);
1130 let (_, val) = self.table.read_mut(&fullidx);
1133 table::Full(idx) => idx
1136 if idx.hash() == hash {
1137 let (bucket_k, bucket_v) = self.table.read_mut(&idx);
1138 // FIXME #12147 the conditional return confuses
1139 // borrowck if we return bucket_v directly
1140 let bv: *mut V = bucket_v;
1142 // Key already exists. Get its reference.
1143 return unsafe {&mut *bv};
1147 let probe_dib = self.bucket_distance(&idx);
1149 if probe_dib < dib {
1150 // Found a luckier bucket than me. Better steal his spot.
1151 self.robin_hood(idx, probe_dib, hash, k, v);
1153 // Now that it's stolen, just read the value's pointer
1154 // right out of the table!
1155 match self.table.peek(probe) {
1156 table::Empty(_) => fail!("Just stole a spot, but now that spot's empty."),
1157 table::Full(idx) => {
1158 let (_, v) = self.table.read_mut(&idx);
1165 // We really shouldn't be here.
1166 fail!("Internal HashMap error: Out of space.");
1169 fn manual_insert_hashed<'a>(&'a mut self, hash: table::SafeHash, k: K, v: V) -> &'a mut V {
1170 let potential_new_size = self.table.size() + 1;
1171 self.make_some_room(potential_new_size);
1172 self.manual_insert_hashed_nocheck(hash, k, v)
1175 /// Inserts an element, returning a reference to that element inside the
1177 fn manual_insert<'a>(&'a mut self, k: K, v: V) -> &'a mut V {
1178 let hash = self.make_hash(&k);
1179 self.manual_insert_hashed(hash, k, v)
1182 /// Return the value corresponding to the key in the map, or insert
1183 /// and return the value if it doesn't exist.
1184 pub fn find_or_insert<'a>(&'a mut self, k: K, v: V) -> &'a mut V {
1185 match self.search(&k) {
1187 let (_, v_ref) = self.table.read_mut(&idx);
1190 None => self.manual_insert(k, v)
1194 /// Return the value corresponding to the key in the map, or create,
1195 /// insert, and return a new value if it doesn't exist.
1196 pub fn find_or_insert_with<'a>(&'a mut self, k: K, f: |&K| -> V)
1198 match self.search(&k) {
1200 let (_, v_ref) = self.table.read_mut(&idx);
1205 self.manual_insert(k, v)
1210 /// Insert a key-value pair into the map if the key is not already present.
1211 /// Otherwise, modify the existing value for the key.
1212 /// Returns the new or modified value for the key.
1213 pub fn insert_or_update_with<'a>(
1219 match self.search(&k) {
1220 None => self.manual_insert(k, v),
1222 let (_, v_ref) = self.table.read_mut(&idx);
1229 /// Retrieves a value for the given key, failing if the key is not present.
1230 pub fn get<'a>(&'a self, k: &K) -> &'a V {
1231 match self.find(k) {
1233 None => fail!("No entry found for key: {:?}", k)
1237 /// Retrieves a (mutable) value for the given key, failing if the key is not present.
1238 pub fn get_mut<'a>(&'a mut self, k: &K) -> &'a mut V {
1239 match self.find_mut(k) {
1241 None => fail!("No entry found for key: {:?}", k)
1245 /// Return true if the map contains a value for the specified key,
1246 /// using equivalence.
1247 pub fn contains_key_equiv<Q: Hash<S> + Equiv<K>>(&self, key: &Q) -> bool {
1248 self.search_equiv(key).is_some()
1251 /// Return the value corresponding to the key in the map, using
1253 pub fn find_equiv<'a, Q: Hash<S> + Equiv<K>>(&'a self, k: &Q) -> Option<&'a V> {
1254 match self.search_equiv(k) {
1257 let (_, v_ref) = self.table.read(&idx);
1263 /// An iterator visiting all keys in arbitrary order.
1264 /// Iterator element type is &'a K.
1265 pub fn keys<'a>(&'a self) -> Keys<'a, K, V> {
1266 self.iter().map(|(k, _v)| k)
1269 /// An iterator visiting all values in arbitrary order.
1270 /// Iterator element type is &'a V.
1271 pub fn values<'a>(&'a self) -> Values<'a, K, V> {
1272 self.iter().map(|(_k, v)| v)
1275 /// An iterator visiting all key-value pairs in arbitrary order.
1276 /// Iterator element type is (&'a K, &'a V).
1277 pub fn iter<'a>(&'a self) -> Entries<'a, K, V> {
1281 /// An iterator visiting all key-value pairs in arbitrary order,
1282 /// with mutable references to the values.
1283 /// Iterator element type is (&'a K, &'a mut V).
1284 pub fn mut_iter<'a>(&'a mut self) -> MutEntries<'a, K, V> {
1285 self.table.mut_iter()
1288 /// Creates a consuming iterator, that is, one that moves each key-value
1289 /// pair out of the map in arbitrary order. The map cannot be used after
1291 pub fn move_iter(self) -> MoveEntries<K, V> {
1292 self.table.move_iter().map(|(_, k, v)| (k, v))
1296 impl<K: TotalEq + Hash<S>, V: Clone, S, H: Hasher<S>> HashMap<K, V, H> {
1297 /// Like `find`, but returns a copy of the value.
1298 pub fn find_copy(&self, k: &K) -> Option<V> {
1299 self.find(k).map(|v| (*v).clone())
1302 /// Like `get`, but returns a copy of the value.
1303 pub fn get_copy(&self, k: &K) -> V {
1304 (*self.get(k)).clone()
1308 impl<K: TotalEq + Hash<S>, V: Eq, S, H: Hasher<S>> Eq for HashMap<K, V, H> {
1309 fn eq(&self, other: &HashMap<K, V, H>) -> bool {
1310 if self.len() != other.len() { return false; }
1312 self.iter().all(|(key, value)| {
1313 match other.find(key) {
1315 Some(v) => *value == *v
1321 impl<K: TotalEq + Hash<S> + Show, V: Show, S, H: Hasher<S>> Show for HashMap<K, V, H> {
1322 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1323 try!(write!(f.buf, r"\{"));
1325 for (i, (k, v)) in self.iter().enumerate() {
1326 if i != 0 { try!(write!(f.buf, ", ")); }
1327 try!(write!(f.buf, "{}: {}", *k, *v));
1330 write!(f.buf, r"\}")
1334 impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S> + Default> Default for HashMap<K, V, H> {
1335 fn default() -> HashMap<K, V, H> {
1336 HashMap::with_capacity_and_hasher(INITIAL_CAPACITY, Default::default())
1340 /// HashMap iterator
1341 pub type Entries<'a, K, V> = table::Entries<'a, K, V>;
1343 /// HashMap mutable values iterator
1344 pub type MutEntries<'a, K, V> = table::MutEntries<'a, K, V>;
1346 /// HashMap move iterator
1347 pub type MoveEntries<K, V> =
1348 iter::Map<'static, (table::SafeHash, K, V), (K, V), table::MoveEntries<K, V>>;
1350 /// HashMap keys iterator
1351 pub type Keys<'a, K, V> =
1352 iter::Map<'static, (&'a K, &'a V), &'a K, Entries<'a, K, V>>;
1354 /// HashMap values iterator
1355 pub type Values<'a, K, V> =
1356 iter::Map<'static, (&'a K, &'a V), &'a V, Entries<'a, K, V>>;
1358 impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S> + Default> FromIterator<(K, V)> for HashMap<K, V, H> {
1359 fn from_iterator<T: Iterator<(K, V)>>(iter: &mut T) -> HashMap<K, V, H> {
1360 let (lower, _) = iter.size_hint();
1361 let mut map = HashMap::with_capacity_and_hasher(lower, Default::default());
1367 impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S> + Default> Extendable<(K, V)> for HashMap<K, V, H> {
1368 fn extend<T: Iterator<(K, V)>>(&mut self, iter: &mut T) {
1369 for (k, v) in *iter {
1375 /// HashSet iterator
1376 pub type SetItems<'a, K> =
1377 iter::Map<'static, (&'a K, &'a ()), &'a K, Entries<'a, K, ()>>;
1379 /// HashSet move iterator
1380 pub type SetMoveItems<K> =
1381 iter::Map<'static, (K, ()), K, MoveEntries<K, ()>>;
1383 /// An implementation of a hash set using the underlying representation of a
1384 /// HashMap where the value is (). As with the `HashMap` type, a `HashSet`
1385 /// requires that the elements implement the `Eq` and `Hash` traits.
1387 pub struct HashSet<T, H = sip::SipHasher> {
1388 priv map: HashMap<T, (), H>
1391 impl<T: TotalEq + Hash<S>, S, H: Hasher<S>> Eq for HashSet<T, H> {
1392 // FIXME #11998: Since the value is a (), and `find` returns a Some(&()),
1393 // we trigger #11998 when matching on it. I've fallen back to manual
1394 // iteration until this is fixed.
1395 fn eq(&self, other: &HashSet<T, H>) -> bool {
1396 if self.len() != other.len() { return false; }
1398 self.iter().all(|key| other.map.contains_key(key))
1402 impl<T: TotalEq + Hash<S>, S, H: Hasher<S>> Container for HashSet<T, H> {
1403 /// Return the number of elements in the set
1404 fn len(&self) -> uint { self.map.len() }
1407 impl<T: TotalEq + Hash<S>, S, H: Hasher<S>> Mutable for HashSet<T, H> {
1408 /// Clear the set, removing all values.
1409 fn clear(&mut self) { self.map.clear() }
1412 impl<T: TotalEq + Hash<S>, S, H: Hasher<S>> Set<T> for HashSet<T, H> {
1413 /// Return true if the set contains a value
1414 fn contains(&self, value: &T) -> bool { self.map.search(value).is_some() }
1416 /// Return true if the set has no elements in common with `other`.
1417 /// This is equivalent to checking for an empty intersection.
1418 fn is_disjoint(&self, other: &HashSet<T, H>) -> bool {
1419 self.iter().all(|v| !other.contains(v))
1422 /// Return true if the set is a subset of another
1423 fn is_subset(&self, other: &HashSet<T, H>) -> bool {
1424 self.iter().all(|v| other.contains(v))
1427 /// Return true if the set is a superset of another
1428 fn is_superset(&self, other: &HashSet<T, H>) -> bool {
1429 other.is_subset(self)
1433 impl<T: TotalEq + Hash<S>, S, H: Hasher<S>> MutableSet<T> for HashSet<T, H> {
1434 /// Add a value to the set. Return true if the value was not already
1435 /// present in the set.
1436 fn insert(&mut self, value: T) -> bool { self.map.insert(value, ()) }
1438 /// Remove a value from the set. Return true if the value was
1439 /// present in the set.
1440 fn remove(&mut self, value: &T) -> bool { self.map.remove(value) }
1443 impl<T: Hash + TotalEq> HashSet<T, sip::SipHasher> {
1444 /// Create an empty HashSet
1445 pub fn new() -> HashSet<T, sip::SipHasher> {
1446 HashSet::with_capacity(INITIAL_CAPACITY)
1449 /// Create an empty HashSet with space for at least `n` elements in
1451 pub fn with_capacity(capacity: uint) -> HashSet<T, sip::SipHasher> {
1452 HashSet { map: HashMap::with_capacity(capacity) }
1456 impl<T: TotalEq + Hash<S>, S, H: Hasher<S>> HashSet<T, H> {
1457 pub fn with_hasher(hasher: H) -> HashSet<T, H> {
1458 HashSet::with_capacity_and_hasher(INITIAL_CAPACITY, hasher)
1461 /// Create an empty HashSet with space for at least `capacity`
1462 /// elements in the hash table, using `hasher` to hash the keys.
1464 /// Warning: `hasher` is normally randomly generated, and
1465 /// is designed to allow `HashSet`s to be resistant to attacks that
1466 /// cause many collisions and very poor performance. Setting it
1467 /// manually using this function can expose a DoS attack vector.
1468 pub fn with_capacity_and_hasher(capacity: uint, hasher: H) -> HashSet<T, H> {
1469 HashSet { map: HashMap::with_capacity_and_hasher(capacity, hasher) }
1472 /// Reserve space for at least `n` elements in the hash table.
1473 pub fn reserve(&mut self, n: uint) {
1477 /// Returns true if the hash set contains a value equivalent to the
1478 /// given query value.
1479 pub fn contains_equiv<Q: Hash<S> + Equiv<T>>(&self, value: &Q) -> bool {
1480 self.map.contains_key_equiv(value)
1483 /// An iterator visiting all elements in arbitrary order.
1484 /// Iterator element type is &'a T.
1485 pub fn iter<'a>(&'a self) -> SetItems<'a, T> {
1489 /// Creates a consuming iterator, that is, one that moves each value out
1490 /// of the set in arbitrary order. The set cannot be used after calling
1492 pub fn move_iter(self) -> SetMoveItems<T> {
1493 self.map.move_iter().map(|(k, _)| k)
1496 /// Visit the values representing the difference
1497 pub fn difference<'a>(&'a self, other: &'a HashSet<T, H>) -> SetAlgebraItems<'a, T, H> {
1500 .filter_map(|(other, elt)| {
1501 if !other.contains(elt) { Some(elt) } else { None }
1505 /// Visit the values representing the symmetric difference
1506 pub fn symmetric_difference<'a>(&'a self, other: &'a HashSet<T, H>)
1507 -> Chain<SetAlgebraItems<'a, T, H>, SetAlgebraItems<'a, T, H>> {
1508 self.difference(other).chain(other.difference(self))
1511 /// Visit the values representing the intersection
1512 pub fn intersection<'a>(&'a self, other: &'a HashSet<T, H>)
1513 -> SetAlgebraItems<'a, T, H> {
1516 .filter_map(|(other, elt)| {
1517 if other.contains(elt) { Some(elt) } else { None }
1521 /// Visit the values representing the union
1522 pub fn union<'a>(&'a self, other: &'a HashSet<T, H>)
1523 -> Chain<SetItems<'a, T>, SetAlgebraItems<'a, T, H>> {
1524 self.iter().chain(other.difference(self))
1529 impl<T: TotalEq + Hash<S> + fmt::Show, S, H: Hasher<S>> fmt::Show for HashSet<T, H> {
1530 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1531 try!(write!(f.buf, r"\{"));
1533 for (i, x) in self.iter().enumerate() {
1534 if i != 0 { try!(write!(f.buf, ", ")); }
1535 try!(write!(f.buf, "{}", *x));
1538 write!(f.buf, r"\}")
1542 impl<T: TotalEq + Hash<S>, S, H: Hasher<S> + Default> FromIterator<T> for HashSet<T, H> {
1543 fn from_iterator<I: Iterator<T>>(iter: &mut I) -> HashSet<T, H> {
1544 let (lower, _) = iter.size_hint();
1545 let mut set = HashSet::with_capacity_and_hasher(lower, Default::default());
1551 impl<T: TotalEq + Hash<S>, S, H: Hasher<S> + Default> Extendable<T> for HashSet<T, H> {
1552 fn extend<I: Iterator<T>>(&mut self, iter: &mut I) {
1559 impl<T: TotalEq + Hash> Default for HashSet<T, sip::SipHasher> {
1560 fn default() -> HashSet<T> { HashSet::new() }
1563 // `Repeat` is used to feed the filter closure an explicit capture
1564 // of a reference to the other set
1565 /// Set operations iterator
1566 pub type SetAlgebraItems<'a, T, H> =
1567 FilterMap<'static, (&'a HashSet<T, H>, &'a T), &'a T,
1568 Zip<Repeat<&'a HashSet<T, H>>, SetItems<'a, T>>>;
1573 use std::iter::{Iterator,range_inclusive,range_step_inclusive};
1574 use std::local_data;
1578 fn test_create_capacity_zero() {
1579 let mut m = HashMap::with_capacity(0);
1581 assert!(m.insert(1, 1));
1583 assert!(m.contains_key(&1));
1584 assert!(!m.contains_key(&0));
1589 let mut m = HashMap::new();
1590 assert_eq!(m.len(), 0);
1591 assert!(m.insert(1, 2));
1592 assert_eq!(m.len(), 1);
1593 assert!(m.insert(2, 4));
1594 assert_eq!(m.len(), 2);
1595 assert_eq!(*m.find(&1).unwrap(), 2);
1596 assert_eq!(*m.find(&2).unwrap(), 4);
1599 local_data_key!(drop_vector: vec::Vec<int>)
1601 #[deriving(Hash, Eq, TotalEq)]
1608 fn new(k: int) -> Dropable {
1609 local_data::get_mut(drop_vector,
1610 |v| { v.unwrap().as_mut_slice()[k] += 1; });
1616 impl Drop for Dropable {
1617 fn drop(&mut self) {
1618 local_data::get_mut(drop_vector, |v|
1619 { v.unwrap().as_mut_slice()[self.k] -= 1; });
1625 local_data::set(drop_vector, vec::Vec::from_elem(200, 0));
1628 let mut m = HashMap::new();
1630 local_data::get(drop_vector, |v| {
1631 for i in range(0, 200) {
1632 assert_eq!(v.unwrap().as_slice()[i], 0);
1636 for i in range(0, 100) {
1637 let d1 = Dropable::new(i);
1638 let d2 = Dropable::new(i+100);
1642 local_data::get(drop_vector, |v| {
1643 for i in range(0, 200) {
1644 assert_eq!(v.unwrap().as_slice()[i], 1);
1648 for i in range(0, 50) {
1649 let k = Dropable::new(i);
1652 assert!(v.is_some());
1654 local_data::get(drop_vector, |v| {
1655 assert_eq!(v.unwrap().as_slice()[i], 1);
1656 assert_eq!(v.unwrap().as_slice()[i+100], 1);
1660 local_data::get(drop_vector, |v| {
1661 for i in range(0, 50) {
1662 assert_eq!(v.unwrap().as_slice()[i], 0);
1663 assert_eq!(v.unwrap().as_slice()[i+100], 0);
1666 for i in range(50, 100) {
1667 assert_eq!(v.unwrap().as_slice()[i], 1);
1668 assert_eq!(v.unwrap().as_slice()[i+100], 1);
1673 local_data::get(drop_vector, |v| {
1674 for i in range(0, 200) {
1675 assert_eq!(v.unwrap().as_slice()[i], 0);
1681 fn test_empty_pop() {
1682 let mut m: HashMap<int, bool> = HashMap::new();
1683 assert_eq!(m.pop(&0), None);
1687 fn test_lots_of_insertions() {
1688 let mut m = HashMap::new();
1690 // Try this a few times to make sure we never screw up the hashmap's
1692 for _ in range(0, 10) {
1693 assert!(m.is_empty());
1695 for i in range_inclusive(1, 1000) {
1696 assert!(m.insert(i, i));
1698 for j in range_inclusive(1, i) {
1700 assert_eq!(r, Some(&j));
1703 for j in range_inclusive(i+1, 1000) {
1705 assert_eq!(r, None);
1709 for i in range_inclusive(1001, 2000) {
1710 assert!(!m.contains_key(&i));
1714 for i in range_inclusive(1, 1000) {
1715 assert!(m.remove(&i));
1717 for j in range_inclusive(1, i) {
1718 assert!(!m.contains_key(&j));
1721 for j in range_inclusive(i+1, 1000) {
1722 assert!(m.contains_key(&j));
1726 for i in range_inclusive(1, 1000) {
1727 assert!(!m.contains_key(&i));
1730 for i in range_inclusive(1, 1000) {
1731 assert!(m.insert(i, i));
1735 for i in range_step_inclusive(1000, 1, -1) {
1736 assert!(m.remove(&i));
1738 for j in range_inclusive(i, 1000) {
1739 assert!(!m.contains_key(&j));
1742 for j in range_inclusive(1, i-1) {
1743 assert!(m.contains_key(&j));
1750 fn test_find_mut() {
1751 let mut m = HashMap::new();
1752 assert!(m.insert(1, 12));
1753 assert!(m.insert(2, 8));
1754 assert!(m.insert(5, 14));
1756 match m.find_mut(&5) {
1757 None => fail!(), Some(x) => *x = new
1759 assert_eq!(m.find(&5), Some(&new));
1763 fn test_insert_overwrite() {
1764 let mut m = HashMap::new();
1765 assert!(m.insert(1, 2));
1766 assert_eq!(*m.find(&1).unwrap(), 2);
1767 assert!(!m.insert(1, 3));
1768 assert_eq!(*m.find(&1).unwrap(), 3);
1772 fn test_insert_conflicts() {
1773 let mut m = HashMap::with_capacity(4);
1774 assert!(m.insert(1, 2));
1775 assert!(m.insert(5, 3));
1776 assert!(m.insert(9, 4));
1777 assert_eq!(*m.find(&9).unwrap(), 4);
1778 assert_eq!(*m.find(&5).unwrap(), 3);
1779 assert_eq!(*m.find(&1).unwrap(), 2);
1783 fn test_conflict_remove() {
1784 let mut m = HashMap::with_capacity(4);
1785 assert!(m.insert(1, 2));
1786 assert_eq!(*m.find(&1).unwrap(), 2);
1787 assert!(m.insert(5, 3));
1788 assert_eq!(*m.find(&1).unwrap(), 2);
1789 assert_eq!(*m.find(&5).unwrap(), 3);
1790 assert!(m.insert(9, 4));
1791 assert_eq!(*m.find(&1).unwrap(), 2);
1792 assert_eq!(*m.find(&5).unwrap(), 3);
1793 assert_eq!(*m.find(&9).unwrap(), 4);
1794 assert!(m.remove(&1));
1795 assert_eq!(*m.find(&9).unwrap(), 4);
1796 assert_eq!(*m.find(&5).unwrap(), 3);
1800 fn test_is_empty() {
1801 let mut m = HashMap::with_capacity(4);
1802 assert!(m.insert(1, 2));
1803 assert!(!m.is_empty());
1804 assert!(m.remove(&1));
1805 assert!(m.is_empty());
1810 let mut m = HashMap::new();
1812 assert_eq!(m.pop(&1), Some(2));
1813 assert_eq!(m.pop(&1), None);
1818 let mut m = HashMap::new();
1819 assert_eq!(m.swap(1, 2), None);
1820 assert_eq!(m.swap(1, 3), Some(2));
1821 assert_eq!(m.swap(1, 4), Some(3));
1825 fn test_move_iter() {
1827 let mut hm = HashMap::new();
1835 let v = hm.move_iter().collect::<~[(char, int)]>();
1836 assert!([('a', 1), ('b', 2)] == v || [('b', 2), ('a', 1)] == v);
1841 let mut m = HashMap::with_capacity(4);
1842 for i in range(0u, 32) {
1843 assert!(m.insert(i, i*2));
1845 assert_eq!(m.len(), 32);
1847 let mut observed = 0;
1849 for (k, v) in m.iter() {
1850 assert_eq!(*v, *k * 2);
1851 observed |= 1 << *k;
1853 assert_eq!(observed, 0xFFFF_FFFF);
1858 let vec = ~[(1, 'a'), (2, 'b'), (3, 'c')];
1859 let map = vec.move_iter().collect::<HashMap<int, char>>();
1860 let keys = map.keys().map(|&k| k).collect::<~[int]>();
1861 assert_eq!(keys.len(), 3);
1862 assert!(keys.contains(&1));
1863 assert!(keys.contains(&2));
1864 assert!(keys.contains(&3));
1869 let vec = ~[(1, 'a'), (2, 'b'), (3, 'c')];
1870 let map = vec.move_iter().collect::<HashMap<int, char>>();
1871 let values = map.values().map(|&v| v).collect::<~[char]>();
1872 assert_eq!(values.len(), 3);
1873 assert!(values.contains(&'a'));
1874 assert!(values.contains(&'b'));
1875 assert!(values.contains(&'c'));
1880 let mut m = HashMap::new();
1881 assert!(m.find(&1).is_none());
1885 Some(v) => assert!(*v == 2)
1891 let mut m1 = HashMap::new();
1896 let mut m2 = HashMap::new();
1909 let mut m = HashMap::new();
1911 assert_eq!(m.len(), 0);
1912 assert!(m.is_empty());
1915 let old_resize_at = m.grow_at;
1916 while old_resize_at == m.grow_at {
1921 assert_eq!(m.len(), i);
1922 assert!(!m.is_empty());
1926 fn test_find_equiv() {
1927 let mut m = HashMap::new();
1929 let (foo, bar, baz) = (1,2,3);
1930 m.insert(~"foo", foo);
1931 m.insert(~"bar", bar);
1932 m.insert(~"baz", baz);
1935 assert_eq!(m.find_equiv(&("foo")), Some(&foo));
1936 assert_eq!(m.find_equiv(&("bar")), Some(&bar));
1937 assert_eq!(m.find_equiv(&("baz")), Some(&baz));
1939 assert_eq!(m.find_equiv(&("qux")), None);
1943 fn test_from_iter() {
1944 let xs = ~[(1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6)];
1946 let map: HashMap<int, int> = xs.iter().map(|&x| x).collect();
1948 for &(k, v) in xs.iter() {
1949 assert_eq!(map.find(&k), Some(&v));
1957 use std::container::Container;
1958 use std::slice::ImmutableEqVector;
1961 fn test_disjoint() {
1962 let mut xs = HashSet::new();
1963 let mut ys = HashSet::new();
1964 assert!(xs.is_disjoint(&ys));
1965 assert!(ys.is_disjoint(&xs));
1966 assert!(xs.insert(5));
1967 assert!(ys.insert(11));
1968 assert!(xs.is_disjoint(&ys));
1969 assert!(ys.is_disjoint(&xs));
1970 assert!(xs.insert(7));
1971 assert!(xs.insert(19));
1972 assert!(xs.insert(4));
1973 assert!(ys.insert(2));
1974 assert!(ys.insert(-11));
1975 assert!(xs.is_disjoint(&ys));
1976 assert!(ys.is_disjoint(&xs));
1977 assert!(ys.insert(7));
1978 assert!(!xs.is_disjoint(&ys));
1979 assert!(!ys.is_disjoint(&xs));
1983 fn test_subset_and_superset() {
1984 let mut a = HashSet::new();
1985 assert!(a.insert(0));
1986 assert!(a.insert(5));
1987 assert!(a.insert(11));
1988 assert!(a.insert(7));
1990 let mut b = HashSet::new();
1991 assert!(b.insert(0));
1992 assert!(b.insert(7));
1993 assert!(b.insert(19));
1994 assert!(b.insert(250));
1995 assert!(b.insert(11));
1996 assert!(b.insert(200));
1998 assert!(!a.is_subset(&b));
1999 assert!(!a.is_superset(&b));
2000 assert!(!b.is_subset(&a));
2001 assert!(!b.is_superset(&a));
2003 assert!(b.insert(5));
2005 assert!(a.is_subset(&b));
2006 assert!(!a.is_superset(&b));
2007 assert!(!b.is_subset(&a));
2008 assert!(b.is_superset(&a));
2013 let mut a = HashSet::new();
2014 for i in range(0u, 32) {
2015 assert!(a.insert(i));
2017 let mut observed = 0;
2019 observed |= 1 << *k;
2021 assert_eq!(observed, 0xFFFF_FFFF);
2025 fn test_intersection() {
2026 let mut a = HashSet::new();
2027 let mut b = HashSet::new();
2029 assert!(a.insert(11));
2030 assert!(a.insert(1));
2031 assert!(a.insert(3));
2032 assert!(a.insert(77));
2033 assert!(a.insert(103));
2034 assert!(a.insert(5));
2035 assert!(a.insert(-5));
2037 assert!(b.insert(2));
2038 assert!(b.insert(11));
2039 assert!(b.insert(77));
2040 assert!(b.insert(-9));
2041 assert!(b.insert(-42));
2042 assert!(b.insert(5));
2043 assert!(b.insert(3));
2046 let expected = [3, 5, 11, 77];
2047 for x in a.intersection(&b) {
2048 assert!(expected.contains(x));
2051 assert_eq!(i, expected.len());
2055 fn test_difference() {
2056 let mut a = HashSet::new();
2057 let mut b = HashSet::new();
2059 assert!(a.insert(1));
2060 assert!(a.insert(3));
2061 assert!(a.insert(5));
2062 assert!(a.insert(9));
2063 assert!(a.insert(11));
2065 assert!(b.insert(3));
2066 assert!(b.insert(9));
2069 let expected = [1, 5, 11];
2070 for x in a.difference(&b) {
2071 assert!(expected.contains(x));
2074 assert_eq!(i, expected.len());
2078 fn test_symmetric_difference() {
2079 let mut a = HashSet::new();
2080 let mut b = HashSet::new();
2082 assert!(a.insert(1));
2083 assert!(a.insert(3));
2084 assert!(a.insert(5));
2085 assert!(a.insert(9));
2086 assert!(a.insert(11));
2088 assert!(b.insert(-2));
2089 assert!(b.insert(3));
2090 assert!(b.insert(9));
2091 assert!(b.insert(14));
2092 assert!(b.insert(22));
2095 let expected = [-2, 1, 5, 11, 14, 22];
2096 for x in a.symmetric_difference(&b) {
2097 assert!(expected.contains(x));
2100 assert_eq!(i, expected.len());
2105 let mut a = HashSet::new();
2106 let mut b = HashSet::new();
2108 assert!(a.insert(1));
2109 assert!(a.insert(3));
2110 assert!(a.insert(5));
2111 assert!(a.insert(9));
2112 assert!(a.insert(11));
2113 assert!(a.insert(16));
2114 assert!(a.insert(19));
2115 assert!(a.insert(24));
2117 assert!(b.insert(-2));
2118 assert!(b.insert(1));
2119 assert!(b.insert(5));
2120 assert!(b.insert(9));
2121 assert!(b.insert(13));
2122 assert!(b.insert(19));
2125 let expected = [-2, 1, 3, 5, 9, 11, 13, 16, 19, 24];
2126 for x in a.union(&b) {
2127 assert!(expected.contains(x));
2130 assert_eq!(i, expected.len());
2134 fn test_from_iter() {
2135 let xs = ~[1, 2, 3, 4, 5, 6, 7, 8, 9];
2137 let set: HashSet<int> = xs.iter().map(|&x| x).collect();
2139 for x in xs.iter() {
2140 assert!(set.contains(x));
2145 fn test_move_iter() {
2147 let mut hs = HashSet::new();
2155 let v = hs.move_iter().collect::<~[char]>();
2156 assert!(['a', 'b'] == v || ['b', 'a'] == v);
2161 // These constants once happened to expose a bug in insert().
2162 // I'm keeping them around to prevent a regression.
2163 let mut s1 = HashSet::new();
2169 let mut s2 = HashSet::new();
2183 let mut set: HashSet<int> = HashSet::new();
2184 let empty: HashSet<int> = HashSet::new();
2189 let set_str = format!("{}", set);
2191 assert!(set_str == ~"{1, 2}" || set_str == ~"{2, 1}");
2192 assert_eq!(format!("{}", empty), ~"{}");
2199 use self::test::BenchHarness;
2200 use std::iter::{range_inclusive};
2203 fn insert(b: &mut BenchHarness) {
2206 let mut m = HashMap::new();
2208 for i in range_inclusive(1, 1000) {
2221 fn find_existing(b: &mut BenchHarness) {
2224 let mut m = HashMap::new();
2226 for i in range_inclusive(1, 1000) {
2231 m.contains_key(&412);
2236 fn find_nonexisting(b: &mut BenchHarness) {
2239 let mut m = HashMap::new();
2241 for i in range_inclusive(1, 1000) {
2246 m.contains_key(&2048);
2251 fn hashmap_as_queue(b: &mut BenchHarness) {
2254 let mut m = HashMap::new();
2256 for i in range_inclusive(1, 1000) {
2264 m.insert(k + 1000, k + 1000);
2270 fn find_pop_insert(b: &mut BenchHarness) {
2273 let mut m = HashMap::new();
2275 for i in range_inclusive(1, 1000) {
2283 m.find(&(k + 2000));
2285 m.insert(k + 1000, k + 1000);