1 // Copyright 2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! Unordered containers, implemented as hash-tables (`HashSet` and `HashMap` types)
13 use std::container::{Container, Mutable, Map, MutableMap, Set, MutableSet};
14 use std::clone::Clone;
15 use std::cmp::{Eq, TotalEq, Equiv, max};
16 use std::default::Default;
19 use std::hash::{Hash, Hasher, sip};
21 use std::iter::{Iterator, FromIterator, Extendable};
22 use std::iter::{FilterMap, Chain, Repeat, Zip};
23 use std::iter::{range, range_inclusive};
24 use std::mem::replace;
26 use std::option::{Option, Some, None};
29 use std::result::{Ok, Err};
30 use std::slice::ImmutableVector;
35 use std::clone::Clone;
37 use std::hash::{Hash, Hasher};
38 use std::kinds::marker;
39 use std::num::CheckedMul;
40 use std::option::{Option, Some, None};
41 use std::prelude::Drop;
44 use std::rt::global_heap;
45 use std::intrinsics::{size_of, transmute, move_val_init};
46 use std::iter::{Iterator, range_step_inclusive};
48 static EMPTY_BUCKET: u64 = 0u64;
50 /// The raw hashtable, providing safe-ish access to the unzipped and highly
51 /// optimized arrays of hashes, keys, and values.
53 /// This design uses less memory and is a lot faster than the naive
54 /// `~[Option<u64, K, V>]`, because we don't pay for the overhead of an
55 /// option on every element, and we get a generally more cache-aware design.
57 /// Key invariants of this structure:
59 /// - if hashes[i] == EMPTY_BUCKET, then keys[i] and vals[i] have
60 /// 'undefined' contents. Don't read from them. This invariant is
61 /// enforced outside this module with the [EmptyIndex], [FullIndex],
62 /// and [SafeHash] types/concepts.
64 /// - An `EmptyIndex` is only constructed for a bucket at an index with
65 /// a hash of EMPTY_BUCKET.
67 /// - A `FullIndex` is only constructed for a bucket at an index with a
68 /// non-EMPTY_BUCKET hash.
70 /// - A `SafeHash` is only constructed for non-`EMPTY_BUCKET` hash. We get
71 /// around hashes of zero by changing them to 0x800_0000, which will
72 /// likely hash to the same bucket, but not be represented as "empty".
74 /// - All three "arrays represented by pointers" are the same length:
75 /// `capacity`. This is set at creation and never changes. The arrays
76 /// are unzipped to save space (we don't have to pay for the padding
77 /// between odd sized elements, such as in a map from u64 to u8), and
78 /// be more cache aware (scanning through 8 hashes brings in 2 cache
79 /// lines, since they're all right beside each other).
81 /// You can kind of think of this module/data structure as a safe wrapper
82 /// around just the "table" part of the hashtable. It enforces some
83 /// invariants at the type level and employs some performance trickery,
84 /// but in general is just a tricked out `Vec<Option<u64, K, V>>`.
88 /// Feb 11, 2014: This hashtable was just implemented, and, hard as I tried,
89 /// isn't yet totally safe. There's a "known exploit" that you can create
90 /// multiple FullIndexes for a bucket, `take` one, and then still `take`
91 /// the other causing undefined behavior. Currently, there's no story
92 /// for how to protect against this statically. Therefore, there are asserts
93 /// on `take`, `get`, `get_mut`, and `put` which check the bucket state.
94 /// With time, and when we're confident this works correctly, they should
95 /// be removed. Also, the bounds check in `peek` is especially painful,
96 /// as that's called in the innermost loops of the hashtable and has the
97 /// potential to be a major performance drain. Remove this too.
99 /// Or, better than remove, only enable these checks for debug builds.
100 /// There's currently no "debug-only" asserts in rust, so if you're reading
101 /// this and going "what? of course there are debug-only asserts!", then
102 /// please make this use them!
103 pub struct RawTable<K, V> {
111 /// Represents an index into a `RawTable` with no key or value in it.
112 pub struct EmptyIndex {
114 nocopy: marker::NoCopy,
117 /// Represents an index into a `RawTable` with a key, value, and hash
119 pub struct FullIndex {
122 nocopy: marker::NoCopy,
126 /// Since we get the hash for free whenever we check the bucket state,
127 /// this function is provided for fast access, letting us avoid making
128 /// redundant trips back to the hashtable.
129 pub fn hash(&self) -> SafeHash { self.hash }
131 /// Same comment as with `hash`.
132 pub fn raw_index(&self) -> uint { self.idx as uint }
135 /// Represents the state of a bucket: it can either have a key/value
136 /// pair (be full) or not (be empty). You cannot `take` empty buckets,
137 /// and you cannot `put` into full buckets.
138 pub enum BucketState {
143 /// A hash that is not zero, since we use that to represent empty buckets.
145 pub struct SafeHash {
150 /// Peek at the hash value, which is guaranteed to be non-zero.
151 pub fn inspect(&self) -> u64 { self.hash }
154 /// We need to remove hashes of 0. That's reserved for empty buckets.
155 /// This function wraps up `hash_keyed` to be the only way outside this
156 /// module to generate a SafeHash.
157 pub fn make_hash<T: Hash<S>, S, H: Hasher<S>>(hasher: &H, t: &T) -> SafeHash {
158 match hasher.hash(t) {
159 // This constant is exceedingly likely to hash to the same
160 // bucket, but it won't be counted as empty!
161 EMPTY_BUCKET => SafeHash { hash: 0x8000_0000_0000_0000 },
162 h => SafeHash { hash: h },
166 impl<K, V> RawTable<K, V> {
168 /// Does not initialize the buckets. The caller should ensure they,
169 /// at the very least, set every hash to EMPTY_BUCKET.
170 unsafe fn new_uninitialized(capacity: uint) -> RawTable<K, V> {
172 capacity.checked_mul(&size_of::<u64>()).expect("capacity overflow");
174 capacity.checked_mul(&size_of::< K >()).expect("capacity overflow");
176 capacity.checked_mul(&size_of::< V >()).expect("capacity overflow");
179 The following code was my first pass at making RawTable only
180 allocate a single buffer, since that's all it needs. There's
181 no logical reason for this to require three calls to malloc.
183 However, I'm not convinced the code below is correct. If you
184 want to take a stab at it, please do! The alignment is
185 especially tricky to get right, especially if you need more
186 alignment than malloc guarantees.
188 let hashes_offset = 0;
189 let keys_offset = align_size(hashes_offset + hashes_size, keys_align);
190 let vals_offset = align_size(keys_offset + keys_size, vals_align);
191 let end = vals_offset + vals_size;
193 let buffer = global_heap::malloc_raw(end);
195 let hashes = buffer.offset(hashes_offset) as *mut u64;
196 let keys = buffer.offset(keys_offset) as *mut K;
197 let vals = buffer.offset(vals_offset) as *mut V;
201 let hashes = global_heap::malloc_raw(hashes_size) as *mut u64;
202 let keys = global_heap::malloc_raw(keys_size) as *mut K;
203 let vals = global_heap::malloc_raw(vals_size) as *mut V;
216 /// Creates a new raw table from a given capacity. All buckets are
218 pub fn new(capacity: uint) -> RawTable<K, V> {
220 let ret = RawTable::new_uninitialized(capacity);
222 for i in range(0, ret.capacity() as int) {
223 *ret.hashes.offset(i) = EMPTY_BUCKET;
230 /// Reads a bucket at a given index, returning an enum indicating whether
231 /// there's anything there or not. You need to match on this enum to get
232 /// the appropriate types to pass on to most of the rest of the functions
234 pub fn peek(&self, index: uint) -> BucketState {
236 if cfg!(test) { assert!(index < self.capacity) }
238 let idx = index as int;
239 let hash = unsafe { *self.hashes.offset(idx) };
241 let nocopy = marker::NoCopy;
252 hash: SafeHash { hash: full_hash },
258 /// Gets references to the key and value at a given index.
259 pub fn read<'a>(&'a self, index: &FullIndex) -> (&'a K, &'a V) {
264 if cfg!(test) { assert!(*self.hashes.offset(idx) != EMPTY_BUCKET) }
265 (&'a *self.keys.offset(idx),
266 &'a *self.vals.offset(idx))
270 /// Gets references to the key and value at a given index, with the
271 /// value's reference being mutable.
272 pub fn read_mut<'a>(&'a mut self, index: &FullIndex) -> (&'a K, &'a mut V) {
277 if cfg!(test) { assert!(*self.hashes.offset(idx) != EMPTY_BUCKET) }
278 (&'a *self.keys.offset(idx),
279 &'a mut *self.vals.offset(idx))
283 /// Read everything, mutably.
284 pub fn read_all_mut<'a>(&'a mut self, index: &FullIndex)
285 -> (&'a mut SafeHash, &'a mut K, &'a mut V) {
288 // I'm totally abusing the fact that a pointer to any u64 in the
289 // hashtable at a full index is a safe hash. Thanks to `SafeHash`
290 // just being a wrapper around u64, this is true. It's just really
291 // really really really unsafe. However, the exposed API is now
292 // impossible to get wrong. You cannot insert an empty hash into
297 if cfg!(test) { assert!(*self.hashes.offset(idx) != EMPTY_BUCKET) }
298 (transmute(self.hashes.offset(idx)),
299 &'a mut *self.keys.offset(idx),
300 &'a mut *self.vals.offset(idx))
304 /// Puts a key and value pair, along with the key's hash, into a given
305 /// index in the hashtable. Note how the `EmptyIndex` is 'moved' into this
306 /// function, because that slot will no longer be empty when we return!
307 /// Because we know this, a FullIndex is returned for later use, pointing
308 /// to the newly-filled slot in the hashtable.
310 /// Use `make_hash` to construct a `SafeHash` to pass to this function.
311 pub fn put(&mut self, index: EmptyIndex, hash: SafeHash, k: K, v: V) -> FullIndex {
316 if cfg!(test) { assert!(*self.hashes.offset(idx) == EMPTY_BUCKET) }
317 *self.hashes.offset(idx) = hash.inspect();
318 move_val_init(&mut *self.keys.offset(idx), k);
319 move_val_init(&mut *self.vals.offset(idx), v);
324 FullIndex { idx: idx, hash: hash, nocopy: marker::NoCopy }
327 /// Removes a key and value from the hashtable.
329 /// This works similarly to `put`, building an `EmptyIndex` out of the
331 pub fn take(&mut self, index: FullIndex) -> (EmptyIndex, K, V) {
336 if cfg!(test) { assert!(*self.hashes.offset(idx) != EMPTY_BUCKET) }
338 let hash_ptr = self.hashes.offset(idx);
340 *hash_ptr = EMPTY_BUCKET;
342 // Drop the mutable constraint.
343 let keys = self.keys as *K;
344 let vals = self.vals as *V;
346 let k = ptr::read(keys.offset(idx));
347 let v = ptr::read(vals.offset(idx));
351 (EmptyIndex { idx: idx, nocopy: marker::NoCopy }, k, v)
355 /// The hashtable's capacity, similar to a vector's.
356 pub fn capacity(&self) -> uint {
360 /// The number of elements ever `put` in the hashtable, minus the number
361 /// of elements ever `take`n.
362 pub fn size(&self) -> uint {
366 pub fn iter<'a>(&'a self) -> Entries<'a, K, V> {
367 Entries { table: self, idx: 0 }
370 pub fn mut_iter<'a>(&'a mut self) -> MutEntries<'a, K, V> {
371 MutEntries { table: self, idx: 0 }
374 pub fn move_iter(self) -> MoveEntries<K, V> {
375 MoveEntries { table: self, idx: 0 }
379 pub struct Entries<'a, K, V> {
380 table: &'a RawTable<K, V>,
384 pub struct MutEntries<'a, K, V> {
385 table: &'a mut RawTable<K, V>,
389 pub struct MoveEntries<K, V> {
390 table: RawTable<K, V>,
394 impl<'a, K, V> Iterator<(&'a K, &'a V)> for Entries<'a, K, V> {
395 fn next(&mut self) -> Option<(&'a K, &'a V)> {
396 while self.idx < self.table.capacity() {
400 match self.table.peek(i) {
402 Full(idx) => return Some(self.table.read(&idx))
409 fn size_hint(&self) -> (uint, Option<uint>) {
410 let size = self.table.size() - self.idx;
415 impl<'a, K, V> Iterator<(&'a K, &'a mut V)> for MutEntries<'a, K, V> {
416 fn next(&mut self) -> Option<(&'a K, &'a mut V)> {
417 while self.idx < self.table.capacity() {
421 match self.table.peek(i) {
423 // the transmute here fixes:
424 // error: lifetime of `self` is too short to guarantee its contents
425 // can be safely reborrowed
426 Full(idx) => unsafe {
427 return Some(transmute(self.table.read_mut(&idx)))
435 fn size_hint(&self) -> (uint, Option<uint>) {
436 let size = self.table.size() - self.idx;
441 impl<K, V> Iterator<(SafeHash, K, V)> for MoveEntries<K, V> {
442 fn next(&mut self) -> Option<(SafeHash, K, V)> {
443 while self.idx < self.table.capacity() {
447 match self.table.peek(i) {
451 let (_, k, v) = self.table.take(idx);
452 return Some((h, k, v));
460 fn size_hint(&self) -> (uint, Option<uint>) {
461 let size = self.table.size();
466 impl<K: Clone, V: Clone> Clone for RawTable<K, V> {
467 fn clone(&self) -> RawTable<K, V> {
469 let mut new_ht = RawTable::new_uninitialized(self.capacity());
471 for i in range(0, self.capacity()) {
474 *new_ht.hashes.offset(i as int) = EMPTY_BUCKET;
477 let hash = idx.hash().inspect();
478 let (k, v) = self.read(&idx);
479 *new_ht.hashes.offset(i as int) = hash;
480 move_val_init(&mut *new_ht.keys.offset(i as int), (*k).clone());
481 move_val_init(&mut *new_ht.vals.offset(i as int), (*v).clone());
486 new_ht.size = self.size();
496 impl<K, V> Drop for RawTable<K, V> {
498 // Ideally, this should be in reverse, since we're likely to have
499 // partially taken some elements out with `.move_iter()` from the
501 for i in range_step_inclusive(self.capacity as int - 1, 0, -1) {
502 // Check if the size is 0, so we don't do a useless scan when
503 // dropping empty tables such as on resize.
505 if self.size == 0 { break }
507 match self.peek(i as uint) {
509 Full(idx) => { self.take(idx); }
513 assert!(self.size == 0);
516 libc::free(self.vals as *mut libc::c_void);
517 libc::free(self.keys as *mut libc::c_void);
518 libc::free(self.hashes as *mut libc::c_void);
524 // We use this type for the load factor, to avoid floating point operations
525 // which might not be supported efficiently on some hardware.
527 // We use small u16s here to save space in the hashtable. They get upcasted
528 // to u64s when we actually use them.
529 type Fraction = (u16, u16); // (numerator, denominator)
531 // multiplication by a fraction, in a way that won't generally overflow for
532 // array sizes outside a factor of 10 of U64_MAX.
533 fn fraction_mul(lhs: uint, (num, den): Fraction) -> uint {
534 (((lhs as u64) * (num as u64)) / (den as u64)) as uint
537 static INITIAL_LOG2_CAP: uint = 5;
538 static INITIAL_CAPACITY: uint = 1 << INITIAL_LOG2_CAP; // 2^5
539 static INITIAL_LOAD_FACTOR: Fraction = (9, 10);
541 // The main performance trick in this hashmap is called Robin Hood Hashing.
542 // It gains its excellent performance from one key invariant:
544 // If an insertion collides with an existing element, and that elements
545 // "probe distance" (how far away the element is from its ideal location)
546 // is higher than how far we've already probed, swap the elements.
548 // This massively lowers variance in probe distance, and allows us to get very
549 // high load factors with good performance. The 90% load factor I use is rather
552 // > Why a load factor of 90%?
554 // In general, all the distances to inital buckets will converge on the mean.
555 // At a load factor of α, the odds of finding the target bucket after k
556 // probes is approximately 1-α^k. If we set this equal to 50% (since we converge
557 // on the mean) and set k=8 (64-byte cache line / 8-byte hash), α=0.92. I round
558 // this down to 0.90 to make the math easier on the CPU and avoid its FPU.
559 // Since on average we start the probing in the middle of a cache line, this
560 // strategy pulls in two cache lines of hashes on every lookup. I think that's
561 // pretty good, but if you want to trade off some space, it could go down to one
562 // cache line on average with an α of 0.84.
564 // > Wait, what? Where did you get 1-α^k from?
566 // On the first probe, your odds of a collision with an existing element is α.
567 // The odds of doing this twice in a row is approximatelly α^2. For three times,
568 // α^3, etc. Therefore, the odds of colliding k times is α^k. The odds of NOT
569 // colliding after k tries is 1-α^k.
571 // Future Improvements (FIXME!)
572 // ============================
574 // Allow the load factor to be changed dynamically and/or at initialization.
575 // I'm having trouble figuring out a sane API for this without exporting my
576 // hackish fraction type, while still avoiding floating point.
578 // Also, would it be possible for us to reuse storage when growing the
579 // underlying table? This is exactly the use case for 'realloc', and may
580 // be worth exploring.
582 // Future Optimizations (FIXME!)
583 // =============================
585 // The paper cited below mentions an implementation which keeps track of the
586 // distance-to-initial-bucket histogram. I'm suspicious of this approach because
587 // it requires maintaining an internal map. If this map were replaced with a
588 // hashmap, it would be faster, but now our data structure is self-referential
589 // and blows up. Also, this allows very good first guesses, but array accesses
590 // are no longer linear and in one direction, as we have now. There is also
591 // memory and cache pressure that this map would entail that would be very
592 // difficult to properly see in a microbenchmark.
594 // Another possible design choice that I made without any real reason is
595 // parameterizing the raw table over keys and values. Technically, all we need
596 // is the size and alignment of keys and values, and the code should be just as
597 // efficient (well, we might need one for power-of-two size and one for not...).
598 // This has the potential to reduce code bloat in rust executables, without
599 // really losing anything except 4 words (key size, key alignment, val size,
600 // val alignment) which can be passed in to every call of a `RawTable` function.
601 // This would definitely be an avenue worth exploring if people start complaining
602 // about the size of rust executables.
604 // There's also two optimizations that have been omitted regarding how the
605 // hashtable allocates. The first is that a hashtable which never has an element
606 // inserted should not allocate. I'm suspicious of this one, because supporting
607 // that internally gains no performance over just using an
608 // `Option<HashMap<K, V>>`, and is significantly more complicated.
610 // The second omitted allocation optimization is that right now we allocate three
611 // arrays to back the hashtable. This is wasteful. In theory, we only need one
612 // array, and each of the three original arrays can just be slices of it. This
613 // would reduce the pressure on the allocator, and will play much nicer with the
614 // rest of the system. An initial implementation is commented out in
615 // `table::RawTable::new`, but I'm not confident it works for all sane alignments,
616 // especially if a type needs more alignment than `malloc` provides.
618 /// A hash map implementation which uses linear probing with Robin
619 /// Hood bucket stealing.
621 /// The hashes are all keyed by the task-local random number generator
622 /// on creation by default, this means the ordering of the keys is
623 /// randomized, but makes the tables more resistant to
624 /// denial-of-service attacks (Hash DoS). This behaviour can be
625 /// overriden with one of the constructors.
627 /// It is required that the keys implement the `Eq` and `Hash` traits, although
628 /// this can frequently be achieved by using `#[deriving(Eq, Hash)]`.
630 /// Relevant papers/articles:
632 /// 1. Pedro Celis. ["Robin Hood Hashing"](https://cs.uwaterloo.ca/research/tr/1986/CS-86-14.pdf)
633 /// 2. Emmanuel Goossaert. ["Robin Hood
634 /// hashing"](http://codecapsule.com/2013/11/11/robin-hood-hashing/)
635 /// 3. Emmanuel Goossaert. ["Robin Hood hashing: backward shift
636 /// deletion"](http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/)
641 /// use collections::HashMap;
643 /// // type inference lets us omit an explicit type signature (which
644 /// // would be `HashMap<&str, &str>` in this example).
645 /// let mut book_reviews = HashMap::new();
647 /// // review some books.
648 /// book_reviews.insert("Adventures of Hucklebury Fin", "My favorite book.");
649 /// book_reviews.insert("Grimms' Fairy Tales", "Masterpiece.");
650 /// book_reviews.insert("Pride and Prejudice", "Very enjoyable.");
651 /// book_reviews.insert("The Adventures of Sherlock Holmes", "Eye lyked it alot.");
653 /// // check for a specific one.
654 /// if !book_reviews.contains_key(& &"Les Misérables") {
655 /// println!("We've got {} reviews, but Les Misérables ain't one.",
656 /// book_reviews.len());
659 /// // oops, this review has a lot of spelling mistakes, let's delete it.
660 /// book_reviews.remove(& &"The Adventures of Sherlock Holmes");
662 /// // look up the values associated with some keys.
663 /// let to_find = ["Pride and Prejudice", "Alice's Adventure in Wonderland"];
664 /// for book in to_find.iter() {
665 /// match book_reviews.find(book) {
666 /// Some(review) => println!("{}: {}", *book, *review),
667 /// None => println!("{} is unreviewed.", *book)
671 /// // iterate over everything.
672 /// for (book, review) in book_reviews.iter() {
673 /// println!("{}: \"{}\"", *book, *review);
677 pub struct HashMap<K, V, H = sip::SipHasher> {
678 // All hashes are keyed on these values, to prevent hash collision attacks.
681 // When size == grow_at, we double the capacity.
684 // The capacity must never drop below this.
685 minimum_capacity: uint,
687 table: table::RawTable<K, V>,
689 // We keep this at the end since it's 4-bytes, unlike everything else
690 // in this struct. Might as well save a word of padding!
691 load_factor: Fraction,
694 /// Get the number of elements which will force the capacity to grow.
695 fn grow_at(capacity: uint, load_factor: Fraction) -> uint {
696 fraction_mul(capacity, load_factor)
699 impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S>> HashMap<K, V, H> {
700 /// Get the number of elements which will force the capacity to shrink.
701 /// When size == self.shrink_at(), we halve the capacity.
702 fn shrink_at(&self) -> uint {
703 self.table.capacity() >> 2
706 // Probe the `idx`th bucket for a given hash, returning the index of the
709 // This exploits the power-of-two size of the hashtable. As long as this
710 // is always true, we can use a bitmask of cap-1 to do modular arithmetic.
712 // Prefer to use this with increasing values of `idx` rather than repeatedly
713 // calling `probe_next`. This reduces data-dependencies between loops, which
714 // can help the optimizer, and certainly won't hurt it. `probe_next` is
715 // simply for convenience, and is no more efficient than `probe`.
716 fn probe(&self, hash: &table::SafeHash, idx: uint) -> uint {
717 let hash_mask = self.table.capacity() - 1;
719 // So I heard a rumor that unsigned overflow is safe in rust..
720 ((hash.inspect() as uint) + idx) & hash_mask
723 // Generate the next probe in a sequence. Prefer to use 'probe' by itself,
724 // but this can sometimes be useful.
725 fn probe_next(&self, probe: uint) -> uint {
726 let hash_mask = self.table.capacity() - 1;
727 (probe + 1) & hash_mask
730 fn make_hash<X: Hash<S>>(&self, x: &X) -> table::SafeHash {
731 table::make_hash(&self.hasher, x)
734 /// Get the distance of the bucket at the given index that it lies
735 /// from its 'ideal' location.
737 /// In the cited blog posts above, this is called the "distance to
738 /// inital bucket", or DIB.
739 fn bucket_distance(&self, index_of_elem: &table::FullIndex) -> uint {
740 // where the hash of the element that happens to reside at
741 // `index_of_elem` tried to place itself first.
742 let first_probe_index = self.probe(&index_of_elem.hash(), 0);
744 let raw_index = index_of_elem.raw_index();
746 if first_probe_index <= raw_index {
747 // probe just went forward
748 raw_index - first_probe_index
750 // probe wrapped around the hashtable
751 raw_index + (self.table.capacity() - first_probe_index)
755 /// Search for a pre-hashed key.
756 fn search_hashed_generic(&self, hash: &table::SafeHash, is_match: |&K| -> bool)
757 -> Option<table::FullIndex> {
758 for num_probes in range(0u, self.table.size()) {
759 let probe = self.probe(hash, num_probes);
761 let idx = match self.table.peek(probe) {
762 table::Empty(_) => return None, // hit an empty bucket
763 table::Full(idx) => idx
766 // We can finish the search early if we hit any bucket
767 // with a lower distance to initial bucket than we've probed.
768 if self.bucket_distance(&idx) < num_probes { return None }
770 // If the hash doesn't match, it can't be this one..
771 if hash != &idx.hash() { continue }
773 let (k, _) = self.table.read(&idx);
775 // If the key doesn't match, it can't be this one..
776 if !is_match(k) { continue }
784 fn search_hashed(&self, hash: &table::SafeHash, k: &K) -> Option<table::FullIndex> {
785 self.search_hashed_generic(hash, |k_| *k == *k_)
788 fn search_equiv<Q: Hash<S> + Equiv<K>>(&self, q: &Q) -> Option<table::FullIndex> {
789 self.search_hashed_generic(&self.make_hash(q), |k| q.equiv(k))
792 /// Search for a key, yielding the index if it's found in the hashtable.
793 /// If you already have the hash for the key lying around, use
795 fn search(&self, k: &K) -> Option<table::FullIndex> {
796 self.search_hashed(&self.make_hash(k), k)
799 fn pop_internal(&mut self, starting_index: table::FullIndex) -> Option<V> {
800 let starting_probe = starting_index.raw_index();
803 let mut probe = self.probe_next(starting_probe);
804 for _ in range(0u, self.table.size()) {
805 match self.table.peek(probe) {
806 table::Empty(_) => {}, // empty bucket. this is the end of our shifting.
807 table::Full(idx) => {
808 // Bucket that isn't us, which has a non-zero probe distance.
809 // This isn't the ending index, so keep searching.
810 if self.bucket_distance(&idx) != 0 {
811 probe = self.probe_next(probe);
815 // if we do have a bucket_distance of zero, we're at the end
816 // of what we need to shift.
825 let (_, _, retval) = self.table.take(starting_index);
827 let mut probe = starting_probe;
828 let mut next_probe = self.probe_next(probe);
830 // backwards-shift all the elements after our newly-deleted one.
831 while next_probe != ending_probe {
832 match self.table.peek(next_probe) {
834 // nothing to shift in. just empty it out.
835 match self.table.peek(probe) {
836 table::Empty(_) => {},
837 table::Full(idx) => { self.table.take(idx); }
840 table::Full(next_idx) => {
841 // something to shift. move it over!
842 let next_hash = next_idx.hash();
843 let (_, next_key, next_val) = self.table.take(next_idx);
844 match self.table.peek(probe) {
845 table::Empty(idx) => {
846 self.table.put(idx, next_hash, next_key, next_val);
848 table::Full(idx) => {
849 let (emptyidx, _, _) = self.table.take(idx);
850 self.table.put(emptyidx, next_hash, next_key, next_val);
857 next_probe = self.probe_next(next_probe);
860 // Done the backwards shift, but there's still an element left!
862 match self.table.peek(probe) {
863 table::Empty(_) => {},
864 table::Full(idx) => { self.table.take(idx); }
867 // Now we're done all our shifting. Return the value we grabbed
872 /// Like `pop`, but can operate on any type that is equivalent to a key.
874 pub fn pop_equiv<Q:Hash<S> + Equiv<K>>(&mut self, k: &Q) -> Option<V> {
875 if self.table.size() == 0 {
879 let potential_new_size = self.table.size() - 1;
880 self.make_some_room(potential_new_size);
882 let starting_index = match self.search_equiv(k) {
887 self.pop_internal(starting_index)
891 impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S>> Container for HashMap<K, V, H> {
892 /// Return the number of elements in the map
893 fn len(&self) -> uint { self.table.size() }
896 impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S>> Mutable for HashMap<K, V, H> {
897 /// Clear the map, removing all key-value pairs.
898 fn clear(&mut self) {
899 self.minimum_capacity = self.table.size();
901 for i in range(0, self.table.capacity()) {
902 match self.table.peek(i) {
903 table::Empty(_) => {},
904 table::Full(idx) => { self.table.take(idx); }
911 impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S>> Map<K, V> for HashMap<K, V, H> {
912 fn find<'a>(&'a self, k: &K) -> Option<&'a V> {
913 self.search(k).map(|idx| {
914 let (_, v) = self.table.read(&idx);
919 fn contains_key(&self, k: &K) -> bool {
920 self.search(k).is_some()
924 impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S>> MutableMap<K, V> for HashMap<K, V, H> {
925 fn find_mut<'a>(&'a mut self, k: &K) -> Option<&'a mut V> {
926 match self.search(k) {
929 let (_, v) = self.table.read_mut(&idx);
935 fn swap(&mut self, k: K, v: V) -> Option<V> {
936 let hash = self.make_hash(&k);
937 let potential_new_size = self.table.size() + 1;
938 self.make_some_room(potential_new_size);
940 for dib in range_inclusive(0u, self.table.size()) {
941 let probe = self.probe(&hash, dib);
943 let idx = match self.table.peek(probe) {
944 table::Empty(idx) => {
946 self.table.put(idx, hash, k, v);
949 table::Full(idx) => idx
952 if idx.hash() == hash {
953 let (bucket_k, bucket_v) = self.table.read_mut(&idx);
955 // Found an existing value.
956 return Some(replace(bucket_v, v));
960 let probe_dib = self.bucket_distance(&idx);
963 // Found a luckier bucket. This implies that the key does not
964 // already exist in the hashtable. Just do a robin hood
966 self.robin_hood(idx, probe_dib, hash, k, v);
971 // We really shouldn't be here.
972 fail!("Internal HashMap error: Out of space.");
975 fn pop(&mut self, k: &K) -> Option<V> {
976 if self.table.size() == 0 {
980 let potential_new_size = self.table.size() - 1;
981 self.make_some_room(potential_new_size);
983 let starting_index = match self.search(k) {
988 self.pop_internal(starting_index)
993 impl<K: Hash + TotalEq, V> HashMap<K, V, sip::SipHasher> {
994 /// Create an empty HashMap.
995 pub fn new() -> HashMap<K, V, sip::SipHasher> {
996 HashMap::with_capacity(INITIAL_CAPACITY)
999 pub fn with_capacity(capacity: uint) -> HashMap<K, V, sip::SipHasher> {
1000 let mut r = rand::task_rng();
1003 let hasher = sip::SipHasher::new_with_keys(r0, r1);
1004 HashMap::with_capacity_and_hasher(capacity, hasher)
1008 impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S>> HashMap<K, V, H> {
1009 pub fn with_hasher(hasher: H) -> HashMap<K, V, H> {
1010 HashMap::with_capacity_and_hasher(INITIAL_CAPACITY, hasher)
1013 /// Create an empty HashMap with space for at least `capacity`
1014 /// elements, using `hasher` to hash the keys.
1016 /// Warning: `hasher` is normally randomly generated, and
1017 /// is designed to allow HashMaps to be resistant to attacks that
1018 /// cause many collisions and very poor performance. Setting it
1019 /// manually using this function can expose a DoS attack vector.
1020 pub fn with_capacity_and_hasher(capacity: uint, hasher: H) -> HashMap<K, V, H> {
1021 let cap = num::next_power_of_two(max(INITIAL_CAPACITY, capacity));
1024 load_factor: INITIAL_LOAD_FACTOR,
1025 grow_at: grow_at(cap, INITIAL_LOAD_FACTOR),
1026 minimum_capacity: cap,
1027 table: table::RawTable::new(cap),
1031 /// The hashtable will never try to shrink below this size. You can use
1032 /// this function to reduce reallocations if your hashtable frequently
1033 /// grows and shrinks by large amounts.
1035 /// This function has no effect on the operational semantics of the
1036 /// hashtable, only on performance.
1037 pub fn reserve(&mut self, new_minimum_capacity: uint) {
1038 let cap = num::next_power_of_two(
1039 max(INITIAL_CAPACITY, new_minimum_capacity));
1041 self.minimum_capacity = cap;
1043 if self.table.capacity() < cap {
1048 /// Resizes the internal vectors to a new capacity. It's your responsibility to:
1049 /// 1) Make sure the new capacity is enough for all the elements, accounting
1050 /// for the load factor.
1051 /// 2) Ensure new_capacity is a power of two.
1052 fn resize(&mut self, new_capacity: uint) {
1053 assert!(self.table.size() <= new_capacity);
1054 assert!((new_capacity - 1) & new_capacity == 0);
1056 self.grow_at = grow_at(new_capacity, self.load_factor);
1058 let old_table = replace(&mut self.table, table::RawTable::new(new_capacity));
1059 let old_size = old_table.size();
1061 for (h, k, v) in old_table.move_iter() {
1062 self.manual_insert_hashed_nocheck(h, k, v);
1065 assert_eq!(self.table.size(), old_size);
1068 /// Performs any necessary resize operations, such that there's space for
1069 /// new_size elements.
1070 fn make_some_room(&mut self, new_size: uint) {
1071 let should_shrink = new_size <= self.shrink_at();
1072 let should_grow = self.grow_at <= new_size;
1075 let new_capacity = self.table.capacity() << 1;
1076 self.resize(new_capacity);
1077 } else if should_shrink {
1078 let new_capacity = self.table.capacity() >> 1;
1080 // Never shrink below the minimum capacity
1081 if self.minimum_capacity <= new_capacity {
1082 self.resize(new_capacity);
1087 /// Perform robin hood bucket stealing at the given 'index'. You must
1088 /// also pass that probe's "distance to initial bucket" so we don't have
1089 /// to recalculate it, as well as the total number of probes already done
1090 /// so we have some sort of upper bound on the number of probes to do.
1092 /// 'hash', 'k', and 'v' are the elements to robin hood into the hashtable.
1093 fn robin_hood(&mut self, mut index: table::FullIndex, mut dib_param: uint,
1094 mut hash: table::SafeHash, mut k: K, mut v: V) {
1096 let (old_hash, old_key, old_val) = {
1097 let (old_hash_ref, old_key_ref, old_val_ref) =
1098 self.table.read_all_mut(&index);
1100 let old_hash = replace(old_hash_ref, hash);
1101 let old_key = replace(old_key_ref, k);
1102 let old_val = replace(old_val_ref, v);
1104 (old_hash, old_key, old_val)
1107 let mut probe = self.probe_next(index.raw_index());
1109 for dib in range(dib_param + 1, self.table.size()) {
1110 let full_index = match self.table.peek(probe) {
1111 table::Empty(idx) => {
1113 self.table.put(idx, old_hash, old_key, old_val);
1116 table::Full(idx) => idx
1119 let probe_dib = self.bucket_distance(&full_index);
1121 // Robin hood! Steal the spot.
1122 if probe_dib < dib {
1124 dib_param = probe_dib;
1131 probe = self.probe_next(probe);
1134 fail!("HashMap fatal error: 100% load factor?");
1138 /// Manually insert a pre-hashed key-value pair, without first checking
1139 /// that there's enough room in the buckets. Returns a reference to the
1140 /// newly insert value.
1142 /// If the key already exists, the hashtable will be returned untouched
1143 /// and a reference to the existing element will be returned.
1144 fn manual_insert_hashed_nocheck<'a>(
1145 &'a mut self, hash: table::SafeHash, k: K, v: V) -> &'a mut V {
1147 for dib in range_inclusive(0u, self.table.size()) {
1148 let probe = self.probe(&hash, dib);
1150 let idx = match self.table.peek(probe) {
1151 table::Empty(idx) => {
1153 let fullidx = self.table.put(idx, hash, k, v);
1154 let (_, val) = self.table.read_mut(&fullidx);
1157 table::Full(idx) => idx
1160 if idx.hash() == hash {
1161 let (bucket_k, bucket_v) = self.table.read_mut(&idx);
1162 // FIXME #12147 the conditional return confuses
1163 // borrowck if we return bucket_v directly
1164 let bv: *mut V = bucket_v;
1166 // Key already exists. Get its reference.
1167 return unsafe {&mut *bv};
1171 let probe_dib = self.bucket_distance(&idx);
1173 if probe_dib < dib {
1174 // Found a luckier bucket than me. Better steal his spot.
1175 self.robin_hood(idx, probe_dib, hash, k, v);
1177 // Now that it's stolen, just read the value's pointer
1178 // right out of the table!
1179 match self.table.peek(probe) {
1180 table::Empty(_) => fail!("Just stole a spot, but now that spot's empty."),
1181 table::Full(idx) => {
1182 let (_, v) = self.table.read_mut(&idx);
1189 // We really shouldn't be here.
1190 fail!("Internal HashMap error: Out of space.");
1193 fn manual_insert_hashed<'a>(&'a mut self, hash: table::SafeHash, k: K, v: V) -> &'a mut V {
1194 let potential_new_size = self.table.size() + 1;
1195 self.make_some_room(potential_new_size);
1196 self.manual_insert_hashed_nocheck(hash, k, v)
1199 /// Inserts an element, returning a reference to that element inside the
1201 fn manual_insert<'a>(&'a mut self, k: K, v: V) -> &'a mut V {
1202 let hash = self.make_hash(&k);
1203 self.manual_insert_hashed(hash, k, v)
1206 /// Return the value corresponding to the key in the map, or insert
1207 /// and return the value if it doesn't exist.
1208 pub fn find_or_insert<'a>(&'a mut self, k: K, v: V) -> &'a mut V {
1209 match self.search(&k) {
1211 let (_, v_ref) = self.table.read_mut(&idx);
1214 None => self.manual_insert(k, v)
1218 /// Return the value corresponding to the key in the map, or create,
1219 /// insert, and return a new value if it doesn't exist.
1220 pub fn find_or_insert_with<'a>(&'a mut self, k: K, f: |&K| -> V)
1222 match self.search(&k) {
1224 let (_, v_ref) = self.table.read_mut(&idx);
1229 self.manual_insert(k, v)
1234 /// Insert a key-value pair into the map if the key is not already present.
1235 /// Otherwise, modify the existing value for the key.
1236 /// Returns the new or modified value for the key.
1237 pub fn insert_or_update_with<'a>(
1243 match self.search(&k) {
1244 None => self.manual_insert(k, v),
1246 let (_, v_ref) = self.table.read_mut(&idx);
1253 /// Retrieves a value for the given key, failing if the key is not present.
1254 pub fn get<'a>(&'a self, k: &K) -> &'a V {
1255 match self.find(k) {
1257 None => fail!("No entry found for key: {:?}", k)
1261 /// Retrieves a (mutable) value for the given key, failing if the key is not present.
1262 pub fn get_mut<'a>(&'a mut self, k: &K) -> &'a mut V {
1263 match self.find_mut(k) {
1265 None => fail!("No entry found for key: {:?}", k)
1269 /// Return true if the map contains a value for the specified key,
1270 /// using equivalence.
1271 pub fn contains_key_equiv<Q: Hash<S> + Equiv<K>>(&self, key: &Q) -> bool {
1272 self.search_equiv(key).is_some()
1275 /// Return the value corresponding to the key in the map, using
1277 pub fn find_equiv<'a, Q: Hash<S> + Equiv<K>>(&'a self, k: &Q) -> Option<&'a V> {
1278 match self.search_equiv(k) {
1281 let (_, v_ref) = self.table.read(&idx);
1287 /// An iterator visiting all keys in arbitrary order.
1288 /// Iterator element type is &'a K.
1289 pub fn keys<'a>(&'a self) -> Keys<'a, K, V> {
1290 self.iter().map(|(k, _v)| k)
1293 /// An iterator visiting all values in arbitrary order.
1294 /// Iterator element type is &'a V.
1295 pub fn values<'a>(&'a self) -> Values<'a, K, V> {
1296 self.iter().map(|(_k, v)| v)
1299 /// An iterator visiting all key-value pairs in arbitrary order.
1300 /// Iterator element type is (&'a K, &'a V).
1301 pub fn iter<'a>(&'a self) -> Entries<'a, K, V> {
1305 /// An iterator visiting all key-value pairs in arbitrary order,
1306 /// with mutable references to the values.
1307 /// Iterator element type is (&'a K, &'a mut V).
1308 pub fn mut_iter<'a>(&'a mut self) -> MutEntries<'a, K, V> {
1309 self.table.mut_iter()
1312 /// Creates a consuming iterator, that is, one that moves each key-value
1313 /// pair out of the map in arbitrary order. The map cannot be used after
1315 pub fn move_iter(self) -> MoveEntries<K, V> {
1316 self.table.move_iter().map(|(_, k, v)| (k, v))
1320 impl<K: TotalEq + Hash<S>, V: Clone, S, H: Hasher<S>> HashMap<K, V, H> {
1321 /// Like `find`, but returns a copy of the value.
1322 pub fn find_copy(&self, k: &K) -> Option<V> {
1323 self.find(k).map(|v| (*v).clone())
1326 /// Like `get`, but returns a copy of the value.
1327 pub fn get_copy(&self, k: &K) -> V {
1328 (*self.get(k)).clone()
1332 impl<K: TotalEq + Hash<S>, V: Eq, S, H: Hasher<S>> Eq for HashMap<K, V, H> {
1333 fn eq(&self, other: &HashMap<K, V, H>) -> bool {
1334 if self.len() != other.len() { return false; }
1336 self.iter().all(|(key, value)| {
1337 match other.find(key) {
1339 Some(v) => *value == *v
1345 impl<K: TotalEq + Hash<S> + Show, V: Show, S, H: Hasher<S>> Show for HashMap<K, V, H> {
1346 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1347 try!(write!(f.buf, r"\{"));
1349 for (i, (k, v)) in self.iter().enumerate() {
1350 if i != 0 { try!(write!(f.buf, ", ")); }
1351 try!(write!(f.buf, "{}: {}", *k, *v));
1354 write!(f.buf, r"\}")
1358 impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S> + Default> Default for HashMap<K, V, H> {
1359 fn default() -> HashMap<K, V, H> {
1360 HashMap::with_capacity_and_hasher(INITIAL_CAPACITY, Default::default())
1364 /// HashMap iterator
1365 pub type Entries<'a, K, V> = table::Entries<'a, K, V>;
1367 /// HashMap mutable values iterator
1368 pub type MutEntries<'a, K, V> = table::MutEntries<'a, K, V>;
1370 /// HashMap move iterator
1371 pub type MoveEntries<K, V> =
1372 iter::Map<'static, (table::SafeHash, K, V), (K, V), table::MoveEntries<K, V>>;
1374 /// HashMap keys iterator
1375 pub type Keys<'a, K, V> =
1376 iter::Map<'static, (&'a K, &'a V), &'a K, Entries<'a, K, V>>;
1378 /// HashMap values iterator
1379 pub type Values<'a, K, V> =
1380 iter::Map<'static, (&'a K, &'a V), &'a V, Entries<'a, K, V>>;
1382 impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S> + Default> FromIterator<(K, V)> for HashMap<K, V, H> {
1383 fn from_iter<T: Iterator<(K, V)>>(iter: T) -> HashMap<K, V, H> {
1384 let (lower, _) = iter.size_hint();
1385 let mut map = HashMap::with_capacity_and_hasher(lower, Default::default());
1391 impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S> + Default> Extendable<(K, V)> for HashMap<K, V, H> {
1392 fn extend<T: Iterator<(K, V)>>(&mut self, mut iter: T) {
1393 for (k, v) in iter {
1399 /// HashSet iterator
1400 pub type SetItems<'a, K> =
1401 iter::Map<'static, (&'a K, &'a ()), &'a K, Entries<'a, K, ()>>;
1403 /// HashSet move iterator
1404 pub type SetMoveItems<K> =
1405 iter::Map<'static, (K, ()), K, MoveEntries<K, ()>>;
1407 /// An implementation of a hash set using the underlying representation of a
1408 /// HashMap where the value is (). As with the `HashMap` type, a `HashSet`
1409 /// requires that the elements implement the `Eq` and `Hash` traits.
1411 pub struct HashSet<T, H = sip::SipHasher> {
1412 map: HashMap<T, (), H>
1415 impl<T: TotalEq + Hash<S>, S, H: Hasher<S>> Eq for HashSet<T, H> {
1416 // FIXME #11998: Since the value is a (), and `find` returns a Some(&()),
1417 // we trigger #11998 when matching on it. I've fallen back to manual
1418 // iteration until this is fixed.
1419 fn eq(&self, other: &HashSet<T, H>) -> bool {
1420 if self.len() != other.len() { return false; }
1422 self.iter().all(|key| other.map.contains_key(key))
1426 impl<T: TotalEq + Hash<S>, S, H: Hasher<S>> Container for HashSet<T, H> {
1427 fn len(&self) -> uint { self.map.len() }
1430 impl<T: TotalEq + Hash<S>, S, H: Hasher<S>> Mutable for HashSet<T, H> {
1431 fn clear(&mut self) { self.map.clear() }
1434 impl<T: TotalEq + Hash<S>, S, H: Hasher<S>> Set<T> for HashSet<T, H> {
1435 fn contains(&self, value: &T) -> bool { self.map.search(value).is_some() }
1437 fn is_disjoint(&self, other: &HashSet<T, H>) -> bool {
1438 self.iter().all(|v| !other.contains(v))
1441 fn is_subset(&self, other: &HashSet<T, H>) -> bool {
1442 self.iter().all(|v| other.contains(v))
1446 impl<T: TotalEq + Hash<S>, S, H: Hasher<S>> MutableSet<T> for HashSet<T, H> {
1447 fn insert(&mut self, value: T) -> bool { self.map.insert(value, ()) }
1449 fn remove(&mut self, value: &T) -> bool { self.map.remove(value) }
1452 impl<T: Hash + TotalEq> HashSet<T, sip::SipHasher> {
1453 /// Create an empty HashSet
1454 pub fn new() -> HashSet<T, sip::SipHasher> {
1455 HashSet::with_capacity(INITIAL_CAPACITY)
1458 /// Create an empty HashSet with space for at least `n` elements in
1460 pub fn with_capacity(capacity: uint) -> HashSet<T, sip::SipHasher> {
1461 HashSet { map: HashMap::with_capacity(capacity) }
1465 impl<T: TotalEq + Hash<S>, S, H: Hasher<S>> HashSet<T, H> {
1466 pub fn with_hasher(hasher: H) -> HashSet<T, H> {
1467 HashSet::with_capacity_and_hasher(INITIAL_CAPACITY, hasher)
1470 /// Create an empty HashSet with space for at least `capacity`
1471 /// elements in the hash table, using `hasher` to hash the keys.
1473 /// Warning: `hasher` is normally randomly generated, and
1474 /// is designed to allow `HashSet`s to be resistant to attacks that
1475 /// cause many collisions and very poor performance. Setting it
1476 /// manually using this function can expose a DoS attack vector.
1477 pub fn with_capacity_and_hasher(capacity: uint, hasher: H) -> HashSet<T, H> {
1478 HashSet { map: HashMap::with_capacity_and_hasher(capacity, hasher) }
1481 /// Reserve space for at least `n` elements in the hash table.
1482 pub fn reserve(&mut self, n: uint) {
1486 /// Returns true if the hash set contains a value equivalent to the
1487 /// given query value.
1488 pub fn contains_equiv<Q: Hash<S> + Equiv<T>>(&self, value: &Q) -> bool {
1489 self.map.contains_key_equiv(value)
1492 /// An iterator visiting all elements in arbitrary order.
1493 /// Iterator element type is &'a T.
1494 pub fn iter<'a>(&'a self) -> SetItems<'a, T> {
1498 /// Creates a consuming iterator, that is, one that moves each value out
1499 /// of the set in arbitrary order. The set cannot be used after calling
1501 pub fn move_iter(self) -> SetMoveItems<T> {
1502 self.map.move_iter().map(|(k, _)| k)
1505 /// Visit the values representing the difference
1506 pub fn difference<'a>(&'a self, other: &'a HashSet<T, H>) -> SetAlgebraItems<'a, T, H> {
1509 .filter_map(|(other, elt)| {
1510 if !other.contains(elt) { Some(elt) } else { None }
1514 /// Visit the values representing the symmetric difference
1515 pub fn symmetric_difference<'a>(&'a self, other: &'a HashSet<T, H>)
1516 -> Chain<SetAlgebraItems<'a, T, H>, SetAlgebraItems<'a, T, H>> {
1517 self.difference(other).chain(other.difference(self))
1520 /// Visit the values representing the intersection
1521 pub fn intersection<'a>(&'a self, other: &'a HashSet<T, H>)
1522 -> SetAlgebraItems<'a, T, H> {
1525 .filter_map(|(other, elt)| {
1526 if other.contains(elt) { Some(elt) } else { None }
1530 /// Visit the values representing the union
1531 pub fn union<'a>(&'a self, other: &'a HashSet<T, H>)
1532 -> Chain<SetItems<'a, T>, SetAlgebraItems<'a, T, H>> {
1533 self.iter().chain(other.difference(self))
1538 impl<T: TotalEq + Hash<S> + fmt::Show, S, H: Hasher<S>> fmt::Show for HashSet<T, H> {
1539 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1540 try!(write!(f.buf, r"\{"));
1542 for (i, x) in self.iter().enumerate() {
1543 if i != 0 { try!(write!(f.buf, ", ")); }
1544 try!(write!(f.buf, "{}", *x));
1547 write!(f.buf, r"\}")
1551 impl<T: TotalEq + Hash<S>, S, H: Hasher<S> + Default> FromIterator<T> for HashSet<T, H> {
1552 fn from_iter<I: Iterator<T>>(iter: I) -> HashSet<T, H> {
1553 let (lower, _) = iter.size_hint();
1554 let mut set = HashSet::with_capacity_and_hasher(lower, Default::default());
1560 impl<T: TotalEq + Hash<S>, S, H: Hasher<S> + Default> Extendable<T> for HashSet<T, H> {
1561 fn extend<I: Iterator<T>>(&mut self, mut iter: I) {
1568 impl<T: TotalEq + Hash> Default for HashSet<T, sip::SipHasher> {
1569 fn default() -> HashSet<T> { HashSet::new() }
1572 // `Repeat` is used to feed the filter closure an explicit capture
1573 // of a reference to the other set
1574 /// Set operations iterator
1575 pub type SetAlgebraItems<'a, T, H> =
1576 FilterMap<'static, (&'a HashSet<T, H>, &'a T), &'a T,
1577 Zip<Repeat<&'a HashSet<T, H>>, SetItems<'a, T>>>;
1582 use std::cmp::Equiv;
1583 use std::hash::Hash;
1584 use std::iter::{Iterator,range_inclusive,range_step_inclusive};
1585 use std::local_data;
1588 struct KindaIntLike(int);
1590 impl Equiv<int> for KindaIntLike {
1591 fn equiv(&self, other: &int) -> bool {
1592 let KindaIntLike(this) = *self;
1596 impl<S: Writer> Hash<S> for KindaIntLike {
1597 fn hash(&self, state: &mut S) {
1598 let KindaIntLike(this) = *self;
1604 fn test_create_capacity_zero() {
1605 let mut m = HashMap::with_capacity(0);
1607 assert!(m.insert(1, 1));
1609 assert!(m.contains_key(&1));
1610 assert!(!m.contains_key(&0));
1615 let mut m = HashMap::new();
1616 assert_eq!(m.len(), 0);
1617 assert!(m.insert(1, 2));
1618 assert_eq!(m.len(), 1);
1619 assert!(m.insert(2, 4));
1620 assert_eq!(m.len(), 2);
1621 assert_eq!(*m.find(&1).unwrap(), 2);
1622 assert_eq!(*m.find(&2).unwrap(), 4);
1625 local_data_key!(drop_vector: vec::Vec<int>)
1627 #[deriving(Hash, Eq, TotalEq)]
1634 fn new(k: uint) -> Dropable {
1635 local_data::get_mut(drop_vector,
1636 |v| { v.unwrap().as_mut_slice()[k] += 1; });
1642 impl Drop for Dropable {
1643 fn drop(&mut self) {
1644 local_data::get_mut(drop_vector, |v|
1645 { v.unwrap().as_mut_slice()[self.k] -= 1; });
1651 local_data::set(drop_vector, vec::Vec::from_elem(200, 0));
1654 let mut m = HashMap::new();
1656 local_data::get(drop_vector, |v| {
1657 for i in range(0u, 200) {
1658 assert_eq!(v.unwrap().as_slice()[i], 0);
1662 for i in range(0u, 100) {
1663 let d1 = Dropable::new(i);
1664 let d2 = Dropable::new(i+100);
1668 local_data::get(drop_vector, |v| {
1669 for i in range(0u, 200) {
1670 assert_eq!(v.unwrap().as_slice()[i], 1);
1674 for i in range(0u, 50) {
1675 let k = Dropable::new(i);
1678 assert!(v.is_some());
1680 local_data::get(drop_vector, |v| {
1681 assert_eq!(v.unwrap().as_slice()[i], 1);
1682 assert_eq!(v.unwrap().as_slice()[i+100], 1);
1686 local_data::get(drop_vector, |v| {
1687 for i in range(0u, 50) {
1688 assert_eq!(v.unwrap().as_slice()[i], 0);
1689 assert_eq!(v.unwrap().as_slice()[i+100], 0);
1692 for i in range(50u, 100) {
1693 assert_eq!(v.unwrap().as_slice()[i], 1);
1694 assert_eq!(v.unwrap().as_slice()[i+100], 1);
1699 local_data::get(drop_vector, |v| {
1700 for i in range(0u, 200) {
1701 assert_eq!(v.unwrap().as_slice()[i], 0);
1707 fn test_empty_pop() {
1708 let mut m: HashMap<int, bool> = HashMap::new();
1709 assert_eq!(m.pop(&0), None);
1713 fn test_lots_of_insertions() {
1714 let mut m = HashMap::new();
1716 // Try this a few times to make sure we never screw up the hashmap's
1718 for _ in range(0, 10) {
1719 assert!(m.is_empty());
1721 for i in range_inclusive(1, 1000) {
1722 assert!(m.insert(i, i));
1724 for j in range_inclusive(1, i) {
1726 assert_eq!(r, Some(&j));
1729 for j in range_inclusive(i+1, 1000) {
1731 assert_eq!(r, None);
1735 for i in range_inclusive(1001, 2000) {
1736 assert!(!m.contains_key(&i));
1740 for i in range_inclusive(1, 1000) {
1741 assert!(m.remove(&i));
1743 for j in range_inclusive(1, i) {
1744 assert!(!m.contains_key(&j));
1747 for j in range_inclusive(i+1, 1000) {
1748 assert!(m.contains_key(&j));
1752 for i in range_inclusive(1, 1000) {
1753 assert!(!m.contains_key(&i));
1756 for i in range_inclusive(1, 1000) {
1757 assert!(m.insert(i, i));
1761 for i in range_step_inclusive(1000, 1, -1) {
1762 assert!(m.remove(&i));
1764 for j in range_inclusive(i, 1000) {
1765 assert!(!m.contains_key(&j));
1768 for j in range_inclusive(1, i-1) {
1769 assert!(m.contains_key(&j));
1776 fn test_find_mut() {
1777 let mut m = HashMap::new();
1778 assert!(m.insert(1, 12));
1779 assert!(m.insert(2, 8));
1780 assert!(m.insert(5, 14));
1782 match m.find_mut(&5) {
1783 None => fail!(), Some(x) => *x = new
1785 assert_eq!(m.find(&5), Some(&new));
1789 fn test_insert_overwrite() {
1790 let mut m = HashMap::new();
1791 assert!(m.insert(1, 2));
1792 assert_eq!(*m.find(&1).unwrap(), 2);
1793 assert!(!m.insert(1, 3));
1794 assert_eq!(*m.find(&1).unwrap(), 3);
1798 fn test_insert_conflicts() {
1799 let mut m = HashMap::with_capacity(4);
1800 assert!(m.insert(1, 2));
1801 assert!(m.insert(5, 3));
1802 assert!(m.insert(9, 4));
1803 assert_eq!(*m.find(&9).unwrap(), 4);
1804 assert_eq!(*m.find(&5).unwrap(), 3);
1805 assert_eq!(*m.find(&1).unwrap(), 2);
1809 fn test_conflict_remove() {
1810 let mut m = HashMap::with_capacity(4);
1811 assert!(m.insert(1, 2));
1812 assert_eq!(*m.find(&1).unwrap(), 2);
1813 assert!(m.insert(5, 3));
1814 assert_eq!(*m.find(&1).unwrap(), 2);
1815 assert_eq!(*m.find(&5).unwrap(), 3);
1816 assert!(m.insert(9, 4));
1817 assert_eq!(*m.find(&1).unwrap(), 2);
1818 assert_eq!(*m.find(&5).unwrap(), 3);
1819 assert_eq!(*m.find(&9).unwrap(), 4);
1820 assert!(m.remove(&1));
1821 assert_eq!(*m.find(&9).unwrap(), 4);
1822 assert_eq!(*m.find(&5).unwrap(), 3);
1826 fn test_is_empty() {
1827 let mut m = HashMap::with_capacity(4);
1828 assert!(m.insert(1, 2));
1829 assert!(!m.is_empty());
1830 assert!(m.remove(&1));
1831 assert!(m.is_empty());
1836 let mut m = HashMap::new();
1838 assert_eq!(m.pop(&1), Some(2));
1839 assert_eq!(m.pop(&1), None);
1843 #[allow(experimental)]
1844 fn test_pop_equiv() {
1845 let mut m = HashMap::new();
1847 assert_eq!(m.pop_equiv(&KindaIntLike(1)), Some(2));
1848 assert_eq!(m.pop_equiv(&KindaIntLike(1)), None);
1853 let mut m = HashMap::new();
1854 assert_eq!(m.swap(1, 2), None);
1855 assert_eq!(m.swap(1, 3), Some(2));
1856 assert_eq!(m.swap(1, 4), Some(3));
1860 fn test_move_iter() {
1862 let mut hm = HashMap::new();
1870 let v = hm.move_iter().collect::<Vec<(char, int)>>();
1871 assert!([('a', 1), ('b', 2)] == v.as_slice() || [('b', 2), ('a', 1)] == v.as_slice());
1876 let mut m = HashMap::with_capacity(4);
1877 for i in range(0u, 32) {
1878 assert!(m.insert(i, i*2));
1880 assert_eq!(m.len(), 32);
1882 let mut observed = 0;
1884 for (k, v) in m.iter() {
1885 assert_eq!(*v, *k * 2);
1886 observed |= 1 << *k;
1888 assert_eq!(observed, 0xFFFF_FFFF);
1893 let vec = vec![(1, 'a'), (2, 'b'), (3, 'c')];
1894 let map = vec.move_iter().collect::<HashMap<int, char>>();
1895 let keys = map.keys().map(|&k| k).collect::<Vec<int>>();
1896 assert_eq!(keys.len(), 3);
1897 assert!(keys.contains(&1));
1898 assert!(keys.contains(&2));
1899 assert!(keys.contains(&3));
1904 let vec = vec![(1, 'a'), (2, 'b'), (3, 'c')];
1905 let map = vec.move_iter().collect::<HashMap<int, char>>();
1906 let values = map.values().map(|&v| v).collect::<Vec<char>>();
1907 assert_eq!(values.len(), 3);
1908 assert!(values.contains(&'a'));
1909 assert!(values.contains(&'b'));
1910 assert!(values.contains(&'c'));
1915 let mut m = HashMap::new();
1916 assert!(m.find(&1).is_none());
1920 Some(v) => assert!(*v == 2)
1926 let mut m1 = HashMap::new();
1931 let mut m2 = HashMap::new();
1944 let mut m = HashMap::new();
1946 assert_eq!(m.len(), 0);
1947 assert!(m.is_empty());
1950 let old_resize_at = m.grow_at;
1951 while old_resize_at == m.grow_at {
1956 assert_eq!(m.len(), i);
1957 assert!(!m.is_empty());
1961 fn test_find_equiv() {
1962 let mut m = HashMap::new();
1964 let (foo, bar, baz) = (1,2,3);
1965 m.insert(~"foo", foo);
1966 m.insert(~"bar", bar);
1967 m.insert(~"baz", baz);
1970 assert_eq!(m.find_equiv(&("foo")), Some(&foo));
1971 assert_eq!(m.find_equiv(&("bar")), Some(&bar));
1972 assert_eq!(m.find_equiv(&("baz")), Some(&baz));
1974 assert_eq!(m.find_equiv(&("qux")), None);
1978 fn test_from_iter() {
1979 let xs = [(1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6)];
1981 let map: HashMap<int, int> = xs.iter().map(|&x| x).collect();
1983 for &(k, v) in xs.iter() {
1984 assert_eq!(map.find(&k), Some(&v));
1992 use std::container::Container;
1993 use std::slice::ImmutableEqVector;
1996 fn test_disjoint() {
1997 let mut xs = HashSet::new();
1998 let mut ys = HashSet::new();
1999 assert!(xs.is_disjoint(&ys));
2000 assert!(ys.is_disjoint(&xs));
2001 assert!(xs.insert(5));
2002 assert!(ys.insert(11));
2003 assert!(xs.is_disjoint(&ys));
2004 assert!(ys.is_disjoint(&xs));
2005 assert!(xs.insert(7));
2006 assert!(xs.insert(19));
2007 assert!(xs.insert(4));
2008 assert!(ys.insert(2));
2009 assert!(ys.insert(-11));
2010 assert!(xs.is_disjoint(&ys));
2011 assert!(ys.is_disjoint(&xs));
2012 assert!(ys.insert(7));
2013 assert!(!xs.is_disjoint(&ys));
2014 assert!(!ys.is_disjoint(&xs));
2018 fn test_subset_and_superset() {
2019 let mut a = HashSet::new();
2020 assert!(a.insert(0));
2021 assert!(a.insert(5));
2022 assert!(a.insert(11));
2023 assert!(a.insert(7));
2025 let mut b = HashSet::new();
2026 assert!(b.insert(0));
2027 assert!(b.insert(7));
2028 assert!(b.insert(19));
2029 assert!(b.insert(250));
2030 assert!(b.insert(11));
2031 assert!(b.insert(200));
2033 assert!(!a.is_subset(&b));
2034 assert!(!a.is_superset(&b));
2035 assert!(!b.is_subset(&a));
2036 assert!(!b.is_superset(&a));
2038 assert!(b.insert(5));
2040 assert!(a.is_subset(&b));
2041 assert!(!a.is_superset(&b));
2042 assert!(!b.is_subset(&a));
2043 assert!(b.is_superset(&a));
2048 let mut a = HashSet::new();
2049 for i in range(0u, 32) {
2050 assert!(a.insert(i));
2052 let mut observed = 0;
2054 observed |= 1 << *k;
2056 assert_eq!(observed, 0xFFFF_FFFF);
2060 fn test_intersection() {
2061 let mut a = HashSet::new();
2062 let mut b = HashSet::new();
2064 assert!(a.insert(11));
2065 assert!(a.insert(1));
2066 assert!(a.insert(3));
2067 assert!(a.insert(77));
2068 assert!(a.insert(103));
2069 assert!(a.insert(5));
2070 assert!(a.insert(-5));
2072 assert!(b.insert(2));
2073 assert!(b.insert(11));
2074 assert!(b.insert(77));
2075 assert!(b.insert(-9));
2076 assert!(b.insert(-42));
2077 assert!(b.insert(5));
2078 assert!(b.insert(3));
2081 let expected = [3, 5, 11, 77];
2082 for x in a.intersection(&b) {
2083 assert!(expected.contains(x));
2086 assert_eq!(i, expected.len());
2090 fn test_difference() {
2091 let mut a = HashSet::new();
2092 let mut b = HashSet::new();
2094 assert!(a.insert(1));
2095 assert!(a.insert(3));
2096 assert!(a.insert(5));
2097 assert!(a.insert(9));
2098 assert!(a.insert(11));
2100 assert!(b.insert(3));
2101 assert!(b.insert(9));
2104 let expected = [1, 5, 11];
2105 for x in a.difference(&b) {
2106 assert!(expected.contains(x));
2109 assert_eq!(i, expected.len());
2113 fn test_symmetric_difference() {
2114 let mut a = HashSet::new();
2115 let mut b = HashSet::new();
2117 assert!(a.insert(1));
2118 assert!(a.insert(3));
2119 assert!(a.insert(5));
2120 assert!(a.insert(9));
2121 assert!(a.insert(11));
2123 assert!(b.insert(-2));
2124 assert!(b.insert(3));
2125 assert!(b.insert(9));
2126 assert!(b.insert(14));
2127 assert!(b.insert(22));
2130 let expected = [-2, 1, 5, 11, 14, 22];
2131 for x in a.symmetric_difference(&b) {
2132 assert!(expected.contains(x));
2135 assert_eq!(i, expected.len());
2140 let mut a = HashSet::new();
2141 let mut b = HashSet::new();
2143 assert!(a.insert(1));
2144 assert!(a.insert(3));
2145 assert!(a.insert(5));
2146 assert!(a.insert(9));
2147 assert!(a.insert(11));
2148 assert!(a.insert(16));
2149 assert!(a.insert(19));
2150 assert!(a.insert(24));
2152 assert!(b.insert(-2));
2153 assert!(b.insert(1));
2154 assert!(b.insert(5));
2155 assert!(b.insert(9));
2156 assert!(b.insert(13));
2157 assert!(b.insert(19));
2160 let expected = [-2, 1, 3, 5, 9, 11, 13, 16, 19, 24];
2161 for x in a.union(&b) {
2162 assert!(expected.contains(x));
2165 assert_eq!(i, expected.len());
2169 fn test_from_iter() {
2170 let xs = [1, 2, 3, 4, 5, 6, 7, 8, 9];
2172 let set: HashSet<int> = xs.iter().map(|&x| x).collect();
2174 for x in xs.iter() {
2175 assert!(set.contains(x));
2180 fn test_move_iter() {
2182 let mut hs = HashSet::new();
2190 let v = hs.move_iter().collect::<Vec<char>>();
2191 assert!(['a', 'b'] == v.as_slice() || ['b', 'a'] == v.as_slice());
2196 // These constants once happened to expose a bug in insert().
2197 // I'm keeping them around to prevent a regression.
2198 let mut s1 = HashSet::new();
2204 let mut s2 = HashSet::new();
2218 let mut set: HashSet<int> = HashSet::new();
2219 let empty: HashSet<int> = HashSet::new();
2224 let set_str = format!("{}", set);
2226 assert!(set_str == ~"{1, 2}" || set_str == ~"{2, 1}");
2227 assert_eq!(format!("{}", empty), ~"{}");
2234 use self::test::Bencher;
2235 use std::iter::{range_inclusive};
2238 fn insert(b: &mut Bencher) {
2241 let mut m = HashMap::new();
2243 for i in range_inclusive(1, 1000) {
2256 fn find_existing(b: &mut Bencher) {
2259 let mut m = HashMap::new();
2261 for i in range_inclusive(1, 1000) {
2266 m.contains_key(&412);
2271 fn find_nonexisting(b: &mut Bencher) {
2274 let mut m = HashMap::new();
2276 for i in range_inclusive(1, 1000) {
2281 m.contains_key(&2048);
2286 fn hashmap_as_queue(b: &mut Bencher) {
2289 let mut m = HashMap::new();
2291 for i in range_inclusive(1, 1000) {
2299 m.insert(k + 1000, k + 1000);
2305 fn find_pop_insert(b: &mut Bencher) {
2308 let mut m = HashMap::new();
2310 for i in range_inclusive(1, 1000) {
2318 m.find(&(k + 2000));
2320 m.insert(k + 1000, k + 1000);