1 // Copyright 2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 //! Unordered containers, implemented as hash-tables (`HashSet` and `HashMap` types)
13 use std::container::{Container, Mutable, Map, MutableMap, Set, MutableSet};
14 use std::clone::Clone;
15 use std::cmp::{Eq, Equiv, max};
16 use std::default::Default;
19 use std::hash::{Hash, Hasher, sip};
21 use std::iter::{Iterator, FromIterator, Extendable};
22 use std::iter::{FilterMap, Chain, Repeat, Zip};
23 use std::iter::{range, range_inclusive};
24 use std::mem::replace;
26 use std::option::{Option, Some, None};
29 use std::result::{Ok, Err};
30 use std::slice::ImmutableVector;
33 use std::clone::Clone;
35 use std::hash::{Hash, Hasher};
36 use std::kinds::marker;
38 use std::num::CheckedMul;
39 use std::option::{Option, Some, None};
40 use std::prelude::Drop;
43 use std::rt::global_heap;
44 use std::intrinsics::{size_of, transmute, move_val_init};
45 use std::iter::{Iterator, range_step_inclusive};
47 static EMPTY_BUCKET: u64 = 0u64;
49 /// The raw hashtable, providing safe-ish access to the unzipped and highly
50 /// optimized arrays of hashes, keys, and values.
52 /// This design uses less memory and is a lot faster than the naive
53 /// `~[Option<u64, K, V>]`, because we don't pay for the overhead of an
54 /// option on every element, and we get a generally more cache-aware design.
56 /// Key invariants of this structure:
58 /// - if hashes[i] == EMPTY_BUCKET, then keys[i] and vals[i] have
59 /// 'undefined' contents. Don't read from them. This invariant is
60 /// enforced outside this module with the [EmptyIndex], [FullIndex],
61 /// and [SafeHash] types/concepts.
63 /// - An `EmptyIndex` is only constructed for a bucket at an index with
64 /// a hash of EMPTY_BUCKET.
66 /// - A `FullIndex` is only constructed for a bucket at an index with a
67 /// non-EMPTY_BUCKET hash.
69 /// - A `SafeHash` is only constructed for non-`EMPTY_BUCKET` hash. We get
70 /// around hashes of zero by changing them to 0x800_0000, which will
71 /// likely hash to the same bucket, but not be represented as "empty".
73 /// - All three "arrays represented by pointers" are the same length:
74 /// `capacity`. This is set at creation and never changes. The arrays
75 /// are unzipped to save space (we don't have to pay for the padding
76 /// between odd sized elements, such as in a map from u64 to u8), and
77 /// be more cache aware (scanning through 8 hashes brings in 2 cache
78 /// lines, since they're all right beside each other).
80 /// You can kind of think of this module/data structure as a safe wrapper
81 /// around just the "table" part of the hashtable. It enforces some
82 /// invariants at the type level and employs some performance trickery,
83 /// but in general is just a tricked out `~[Option<u64, K, V>]`.
87 /// Feb 11, 2014: This hashtable was just implemented, and, hard as I tried,
88 /// isn't yet totally safe. There's a "known exploit" that you can create
89 /// multiple FullIndexes for a bucket, `take` one, and then still `take`
90 /// the other causing undefined behavior. Currently, there's no story
91 /// for how to protect against this statically. Therefore, there are asserts
92 /// on `take`, `get`, `get_mut`, and `put` which check the bucket state.
93 /// With time, and when we're confident this works correctly, they should
94 /// be removed. Also, the bounds check in `peek` is especially painful,
95 /// as that's called in the innermost loops of the hashtable and has the
96 /// potential to be a major performance drain. Remove this too.
98 /// Or, better than remove, only enable these checks for debug builds.
99 /// There's currently no "debug-only" asserts in rust, so if you're reading
100 /// this and going "what? of course there are debug-only asserts!", then
101 /// please make this use them!
102 pub struct RawTable<K, V> {
105 priv hashes: *mut u64,
110 /// Represents an index into a `RawTable` with no key or value in it.
111 pub struct EmptyIndex {
113 priv nopod: marker::NoPod,
116 /// Represents an index into a `RawTable` with a key, value, and hash
118 pub struct FullIndex {
121 priv nopod: marker::NoPod,
125 /// Since we get the hash for free whenever we check the bucket state,
126 /// this function is provided for fast access, letting us avoid making
127 /// redundant trips back to the hashtable.
128 pub fn hash(&self) -> SafeHash { self.hash }
130 /// Same comment as with `hash`.
131 pub fn raw_index(&self) -> uint { self.idx as uint }
134 /// Represents the state of a bucket: it can either have a key/value
135 /// pair (be full) or not (be empty). You cannot `take` empty buckets,
136 /// and you cannot `put` into full buckets.
137 pub enum BucketState {
142 /// A hash that is not zero, since we use that to represent empty buckets.
143 pub struct SafeHash {
148 /// Peek at the hash value, which is guaranteed to be non-zero.
149 pub fn inspect(&self) -> u64 { self.hash }
152 impl Eq for SafeHash {
153 fn eq(&self, other: &SafeHash) -> bool { self.hash == other.hash }
156 /// We need to remove hashes of 0. That's reserved for empty buckets.
157 /// This function wraps up `hash_keyed` to be the only way outside this
158 /// module to generate a SafeHash.
159 pub fn make_hash<T: Hash<S>, S, H: Hasher<S>>(hasher: &H, t: &T) -> SafeHash {
160 match hasher.hash(t) {
161 // This constant is exceedingly likely to hash to the same
162 // bucket, but it won't be counted as empty!
163 EMPTY_BUCKET => SafeHash { hash: 0x8000_0000_0000_0000 },
164 h => SafeHash { hash: h },
168 impl<K, V> RawTable<K, V> {
170 /// Does not initialize the buckets. The caller should ensure they,
171 /// at the very least, set every hash to EMPTY_BUCKET.
172 unsafe fn new_uninitialized(capacity: uint) -> RawTable<K, V> {
174 capacity.checked_mul(&size_of::<u64>()).expect("capacity overflow");
176 capacity.checked_mul(&size_of::< K >()).expect("capacity overflow");
178 capacity.checked_mul(&size_of::< V >()).expect("capacity overflow");
181 The following code was my first pass at making RawTable only
182 allocate a single buffer, since that's all it needs. There's
183 no logical reason for this to require three calls to malloc.
185 However, I'm not convinced the code below is correct. If you
186 want to take a stab at it, please do! The alignment is
187 especially tricky to get right, especially if you need more
188 alignment than malloc guarantees.
190 let hashes_offset = 0;
191 let keys_offset = align_size(hashes_offset + hashes_size, keys_align);
192 let vals_offset = align_size(keys_offset + keys_size, vals_align);
193 let end = vals_offset + vals_size;
195 let buffer = global_heap::malloc_raw(end);
197 let hashes = buffer.offset(hashes_offset) as *mut u64;
198 let keys = buffer.offset(keys_offset) as *mut K;
199 let vals = buffer.offset(vals_offset) as *mut V;
203 let hashes = global_heap::malloc_raw(hashes_size) as *mut u64;
204 let keys = global_heap::malloc_raw(keys_size) as *mut K;
205 let vals = global_heap::malloc_raw(vals_size) as *mut V;
218 /// Creates a new raw table from a given capacity. All buckets are
220 pub fn new(capacity: uint) -> RawTable<K, V> {
222 let ret = RawTable::new_uninitialized(capacity);
224 for i in range(0, ret.capacity() as int) {
225 *ret.hashes.offset(i) = EMPTY_BUCKET;
232 /// Reads a bucket at a given index, returning an enum indicating whether
233 /// there's anything there or not. You need to match on this enum to get
234 /// the appropriate types to pass on to most of the rest of the functions
236 pub fn peek(&self, index: uint) -> BucketState {
238 if cfg!(test) { assert!(index < self.capacity) }
240 let idx = index as int;
241 let hash = unsafe { *self.hashes.offset(idx) };
243 let nopod = marker::NoPod;
254 hash: SafeHash { hash: full_hash },
260 /// Gets references to the key and value at a given index.
261 pub fn read<'a>(&'a self, index: &FullIndex) -> (&'a K, &'a V) {
266 if cfg!(test) { assert!(*self.hashes.offset(idx) != EMPTY_BUCKET) }
267 (&'a *self.keys.offset(idx),
268 &'a *self.vals.offset(idx))
272 /// Gets references to the key and value at a given index, with the
273 /// value's reference being mutable.
274 pub fn read_mut<'a>(&'a mut self, index: &FullIndex) -> (&'a K, &'a mut V) {
279 if cfg!(test) { assert!(*self.hashes.offset(idx) != EMPTY_BUCKET) }
280 (&'a *self.keys.offset(idx),
281 &'a mut *self.vals.offset(idx))
285 /// Read everything, mutably.
286 pub fn read_all_mut<'a>(&'a mut self, index: &FullIndex)
287 -> (&'a mut SafeHash, &'a mut K, &'a mut V) {
290 // I'm totally abusing the fact that a pointer to any u64 in the
291 // hashtable at a full index is a safe hash. Thanks to `SafeHash`
292 // just being a wrapper around u64, this is true. It's just really
293 // really really really unsafe. However, the exposed API is now
294 // impossible to get wrong. You cannot insert an empty hash into
299 if cfg!(test) { assert!(*self.hashes.offset(idx) != EMPTY_BUCKET) }
300 (transmute(self.hashes.offset(idx)),
301 &'a mut *self.keys.offset(idx),
302 &'a mut *self.vals.offset(idx))
306 /// Puts a key and value pair, along with the key's hash, into a given
307 /// index in the hashtable. Note how the `EmptyIndex` is 'moved' into this
308 /// function, because that slot will no longer be empty when we return!
309 /// Because we know this, a FullIndex is returned for later use, pointing
310 /// to the newly-filled slot in the hashtable.
312 /// Use `make_hash` to construct a `SafeHash` to pass to this function.
313 pub fn put(&mut self, index: EmptyIndex, hash: SafeHash, k: K, v: V) -> FullIndex {
318 if cfg!(test) { assert!(*self.hashes.offset(idx) == EMPTY_BUCKET) }
319 *self.hashes.offset(idx) = hash.inspect();
320 move_val_init(&mut *self.keys.offset(idx), k);
321 move_val_init(&mut *self.vals.offset(idx), v);
326 FullIndex { idx: idx, hash: hash, nopod: marker::NoPod }
329 /// Removes a key and value from the hashtable.
331 /// This works similarly to `put`, building an `EmptyIndex` out of the
333 pub fn take(&mut self, index: FullIndex) -> (EmptyIndex, K, V) {
338 if cfg!(test) { assert!(*self.hashes.offset(idx) != EMPTY_BUCKET) }
340 let hash_ptr = self.hashes.offset(idx);
342 *hash_ptr = EMPTY_BUCKET;
344 // Drop the mutable constraint.
345 let keys = self.keys as *K;
346 let vals = self.vals as *V;
348 let k = ptr::read(keys.offset(idx));
349 let v = ptr::read(vals.offset(idx));
353 (EmptyIndex { idx: idx, nopod: marker::NoPod }, k, v)
357 /// The hashtable's capacity, similar to a vector's.
358 pub fn capacity(&self) -> uint {
362 /// The number of elements ever `put` in the hashtable, minus the number
363 /// of elements ever `take`n.
364 pub fn size(&self) -> uint {
368 pub fn iter<'a>(&'a self) -> Entries<'a, K, V> {
369 Entries { table: self, idx: 0 }
372 pub fn mut_iter<'a>(&'a mut self) -> MutEntries<'a, K, V> {
373 MutEntries { table: self, idx: 0 }
376 pub fn move_iter(self) -> MoveEntries<K, V> {
377 MoveEntries { table: self, idx: 0 }
381 pub struct Entries<'a, K, V> {
382 priv table: &'a RawTable<K, V>,
386 pub struct MutEntries<'a, K, V> {
387 priv table: &'a mut RawTable<K, V>,
391 pub struct MoveEntries<K, V> {
392 priv table: RawTable<K, V>,
396 impl<'a, K, V> Iterator<(&'a K, &'a V)> for Entries<'a, K, V> {
397 fn next(&mut self) -> Option<(&'a K, &'a V)> {
398 while self.idx < self.table.capacity() {
402 match self.table.peek(i) {
404 Full(idx) => return Some(self.table.read(&idx))
411 fn size_hint(&self) -> (uint, Option<uint>) {
412 let size = self.table.size() - self.idx;
417 impl<'a, K, V> Iterator<(&'a K, &'a mut V)> for MutEntries<'a, K, V> {
418 fn next(&mut self) -> Option<(&'a K, &'a mut V)> {
419 while self.idx < self.table.capacity() {
423 match self.table.peek(i) {
425 // the transmute here fixes:
426 // error: lifetime of `self` is too short to guarantee its contents
427 // can be safely reborrowed
428 Full(idx) => unsafe {
429 return Some(transmute(self.table.read_mut(&idx)))
437 fn size_hint(&self) -> (uint, Option<uint>) {
438 let size = self.table.size() - self.idx;
443 impl<K, V> Iterator<(SafeHash, K, V)> for MoveEntries<K, V> {
444 fn next(&mut self) -> Option<(SafeHash, K, V)> {
445 while self.idx < self.table.capacity() {
449 match self.table.peek(i) {
453 let (_, k, v) = self.table.take(idx);
454 return Some((h, k, v));
462 fn size_hint(&self) -> (uint, Option<uint>) {
463 let size = self.table.size();
468 impl<K: Clone, V: Clone> Clone for RawTable<K, V> {
469 fn clone(&self) -> RawTable<K, V> {
471 let mut new_ht = RawTable::new_uninitialized(self.capacity());
473 for i in range(0, self.capacity()) {
476 *new_ht.hashes.offset(i as int) = EMPTY_BUCKET;
479 let hash = idx.hash().inspect();
480 let (k, v) = self.read(&idx);
481 *new_ht.hashes.offset(i as int) = hash;
482 move_val_init(&mut *new_ht.keys.offset(i as int), (*k).clone());
483 move_val_init(&mut *new_ht.vals.offset(i as int), (*v).clone());
488 new_ht.size = self.size();
498 impl<K, V> Drop for RawTable<K, V> {
500 // Ideally, this should be in reverse, since we're likely to have
501 // partially taken some elements out with `.move_iter()` from the
503 for i in range_step_inclusive(self.capacity as int - 1, 0, -1) {
504 // Check if the size is 0, so we don't do a useless scan when
505 // dropping empty tables such as on resize.
507 if self.size == 0 { break }
509 match self.peek(i as uint) {
511 Full(idx) => { self.take(idx); }
515 assert!(self.size == 0);
518 libc::free(self.vals as *mut libc::c_void);
519 libc::free(self.keys as *mut libc::c_void);
520 libc::free(self.hashes as *mut libc::c_void);
526 // We use this type for the load factor, to avoid floating point operations
527 // which might not be supported efficiently on some hardware.
529 // We use small u16s here to save space in the hashtable. They get upcasted
530 // to u64s when we actually use them.
531 type Fraction = (u16, u16); // (numerator, denominator)
533 // multiplication by a fraction, in a way that won't generally overflow for
534 // array sizes outside a factor of 10 of U64_MAX.
535 fn fraction_mul(lhs: uint, (num, den): Fraction) -> uint {
536 (((lhs as u64) * (num as u64)) / (den as u64)) as uint
539 static INITIAL_LOG2_CAP: uint = 5;
540 static INITIAL_CAPACITY: uint = 1 << INITIAL_LOG2_CAP; // 2^5
541 static INITIAL_LOAD_FACTOR: Fraction = (9, 10);
543 // The main performance trick in this hashmap is called Robin Hood Hashing.
544 // It gains its excellent performance from one key invariant:
546 // If an insertion collides with an existing element, and that elements
547 // "probe distance" (how far away the element is from its ideal location)
548 // is higher than how far we've already probed, swap the elements.
550 // This massively lowers variance in probe distance, and allows us to get very
551 // high load factors with good performance. The 90% load factor I use is rather
554 // > Why a load factor of 90%?
556 // In general, all the distances to inital buckets will converge on the mean.
557 // At a load factor of α, the odds of finding the target bucket after k
558 // probes is approximately 1-α^k. If we set this equal to 50% (since we converge
559 // on the mean) and set k=8 (64-byte cache line / 8-byte hash), α=0.92. I round
560 // this down to 0.90 to make the math easier on the CPU and avoid its FPU.
561 // Since on average we start the probing in the middle of a cache line, this
562 // strategy pulls in two cache lines of hashes on every lookup. I think that's
563 // pretty good, but if you want to trade off some space, it could go down to one
564 // cache line on average with an α of 0.84.
566 // > Wait, what? Where did you get 1-α^k from?
568 // On the first probe, your odds of a collision with an existing element is α.
569 // The odds of doing this twice in a row is approximatelly α^2. For three times,
570 // α^3, etc. Therefore, the odds of colliding k times is α^k. The odds of NOT
571 // colliding after k tries is 1-α^k.
573 // Future Improvements (FIXME!)
574 // ============================
576 // Allow the load factor to be changed dynamically and/or at initialization.
577 // I'm having trouble figuring out a sane API for this without exporting my
578 // hackish fraction type, while still avoiding floating point.
580 // Also, would it be possible for us to reuse storage when growing the
581 // underlying table? This is exactly the use case for 'realloc', and may
582 // be worth exploring.
584 // Future Optimizations (FIXME!)
585 // =============================
587 // The paper cited below mentions an implementation which keeps track of the
588 // distance-to-initial-bucket histogram. I'm suspicious of this approach because
589 // it requires maintaining an internal map. If this map were replaced with a
590 // hashmap, it would be faster, but now our data structure is self-referential
591 // and blows up. Also, this allows very good first guesses, but array accesses
592 // are no longer linear and in one direction, as we have now. There is also
593 // memory and cache pressure that this map would entail that would be very
594 // difficult to properly see in a microbenchmark.
596 // Another possible design choice that I made without any real reason is
597 // parameterizing the raw table over keys and values. Technically, all we need
598 // is the size and alignment of keys and values, and the code should be just as
599 // efficient (well, we might need one for power-of-two size and one for not...).
600 // This has the potential to reduce code bloat in rust executables, without
601 // really losing anything except 4 words (key size, key alignment, val size,
602 // val alignment) which can be passed in to every call of a `RawTable` function.
603 // This would definitely be an avenue worth exploring if people start complaining
604 // about the size of rust executables.
606 // There's also two optimizations that have been omitted regarding how the
607 // hashtable allocates. The first is that a hashtable which never has an element
608 // inserted should not allocate. I'm suspicious of this one, because supporting
609 // that internally gains no performance over just using an
610 // `Option<HashMap<K, V>>`, and is significantly more complicated.
612 // The second omitted allocation optimization is that right now we allocate three
613 // arrays to back the hashtable. This is wasteful. In theory, we only need one
614 // array, and each of the three original arrays can just be slices of it. This
615 // would reduce the pressure on the allocator, and will play much nicer with the
616 // rest of the system. An initial implementation is commented out in
617 // `table::RawTable::new`, but I'm not confident it works for all sane alignments,
618 // especially if a type needs more alignment than `malloc` provides.
620 /// A hash map implementation which uses linear probing with Robin
621 /// Hood bucket stealing.
623 /// The hashes are all keyed by the task-local random number generator
624 /// on creation by default, this means the ordering of the keys is
625 /// randomized, but makes the tables more resistant to
626 /// denial-of-service attacks (Hash DoS). This behaviour can be
627 /// overriden with one of the constructors.
629 /// It is required that the keys implement the `Eq` and `Hash` traits, although
630 /// this can frequently be achieved by using `#[deriving(Eq, Hash)]`.
632 /// Relevant papers/articles:
634 /// 1. Pedro Celis. ["Robin Hood Hashing"](https://cs.uwaterloo.ca/research/tr/1986/CS-86-14.pdf)
635 /// 2. Emmanuel Goossaert. ["Robin Hood
636 /// hashing"](http://codecapsule.com/2013/11/11/robin-hood-hashing/)
637 /// 3. Emmanuel Goossaert. ["Robin Hood hashing: backward shift
638 /// deletion"](http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/)
643 /// use collections::HashMap;
645 /// // type inference lets us omit an explicit type signature (which
646 /// // would be `HashMap<&str, &str>` in this example).
647 /// let mut book_reviews = HashMap::new();
649 /// // review some books.
650 /// book_reviews.insert("Adventures of Hucklebury Fin", "My favorite book.");
651 /// book_reviews.insert("Grimms' Fairy Tales", "Masterpiece.");
652 /// book_reviews.insert("Pride and Prejudice", "Very enjoyable.");
653 /// book_reviews.insert("The Adventures of Sherlock Holmes", "Eye lyked it alot.");
655 /// // check for a specific one.
656 /// if !book_reviews.contains_key(& &"Les Misérables") {
657 /// println!("We've got {} reviews, but Les Misérables ain't one.",
658 /// book_reviews.len());
661 /// // oops, this review has a lot of spelling mistakes, let's delete it.
662 /// book_reviews.remove(& &"The Adventures of Sherlock Holmes");
664 /// // look up the values associated with some keys.
665 /// let to_find = ["Pride and Prejudice", "Alice's Adventure in Wonderland"];
666 /// for book in to_find.iter() {
667 /// match book_reviews.find(book) {
668 /// Some(review) => println!("{}: {}", *book, *review),
669 /// None => println!("{} is unreviewed.", *book)
673 /// // iterate over everything.
674 /// for (book, review) in book_reviews.iter() {
675 /// println!("{}: \"{}\"", *book, *review);
679 pub struct HashMap<K, V, H = sip::SipHasher> {
680 // All hashes are keyed on these values, to prevent hash collision attacks.
683 // When size == grow_at, we double the capacity.
686 // The capacity must never drop below this.
687 priv minimum_capacity: uint,
689 priv table: table::RawTable<K, V>,
691 // We keep this at the end since it's 4-bytes, unlike everything else
692 // in this struct. Might as well save a word of padding!
693 priv load_factor: Fraction,
696 /// Get the number of elements which will force the capacity to grow.
697 fn grow_at(capacity: uint, load_factor: Fraction) -> uint {
698 fraction_mul(capacity, load_factor)
701 impl<K: Eq + Hash<S>, V, S, H: Hasher<S>> HashMap<K, V, H> {
702 /// Get the number of elements which will force the capacity to shrink.
703 /// When size == self.shrink_at(), we halve the capacity.
704 fn shrink_at(&self) -> uint {
705 self.table.capacity() >> 2
708 // Probe the `idx`th bucket for a given hash, returning the index of the
711 // This exploits the power-of-two size of the hashtable. As long as this
712 // is always true, we can use a bitmask of cap-1 to do modular arithmetic.
714 // Prefer to use this with increasing values of `idx` rather than repeatedly
715 // calling `probe_next`. This reduces data-dependencies between loops, which
716 // can help the optimizer, and certainly won't hurt it. `probe_next` is
717 // simply for convenience, and is no more efficient than `probe`.
718 fn probe(&self, hash: &table::SafeHash, idx: uint) -> uint {
719 let hash_mask = self.table.capacity() - 1;
721 // So I heard a rumor that unsigned overflow is safe in rust..
722 ((hash.inspect() as uint) + idx) & hash_mask
725 // Generate the next probe in a sequence. Prefer to use 'probe' by itself,
726 // but this can sometimes be useful.
727 fn probe_next(&self, probe: uint) -> uint {
728 let hash_mask = self.table.capacity() - 1;
729 (probe + 1) & hash_mask
732 fn make_hash<X: Hash<S>>(&self, x: &X) -> table::SafeHash {
733 table::make_hash(&self.hasher, x)
736 /// Get the distance of the bucket at the given index that it lies
737 /// from its 'ideal' location.
739 /// In the cited blog posts above, this is called the "distance to
740 /// inital bucket", or DIB.
741 fn bucket_distance(&self, index_of_elem: &table::FullIndex) -> uint {
742 // where the hash of the element that happens to reside at
743 // `index_of_elem` tried to place itself first.
744 let first_probe_index = self.probe(&index_of_elem.hash(), 0);
746 let raw_index = index_of_elem.raw_index();
748 if first_probe_index <= raw_index {
749 // probe just went forward
750 raw_index - first_probe_index
752 // probe wrapped around the hashtable
753 raw_index + (self.table.capacity() - first_probe_index)
757 /// Search for a pre-hashed key.
758 fn search_hashed_generic(&self, hash: &table::SafeHash, is_match: |&K| -> bool)
759 -> Option<table::FullIndex> {
760 for num_probes in range(0u, self.table.size()) {
761 let probe = self.probe(hash, num_probes);
763 let idx = match self.table.peek(probe) {
764 table::Empty(_) => return None, // hit an empty bucket
765 table::Full(idx) => idx
768 // We can finish the search early if we hit any bucket
769 // with a lower distance to initial bucket than we've probed.
770 if self.bucket_distance(&idx) < num_probes { return None }
772 // If the hash doesn't match, it can't be this one..
773 if hash != &idx.hash() { continue }
775 let (k, _) = self.table.read(&idx);
777 // If the key doesn't match, it can't be this one..
778 if !is_match(k) { continue }
786 fn search_hashed(&self, hash: &table::SafeHash, k: &K) -> Option<table::FullIndex> {
787 self.search_hashed_generic(hash, |k_| *k == *k_)
790 fn search_equiv<Q: Hash<S> + Equiv<K>>(&self, q: &Q) -> Option<table::FullIndex> {
791 self.search_hashed_generic(&self.make_hash(q), |k| q.equiv(k))
794 /// Search for a key, yielding the index if it's found in the hashtable.
795 /// If you already have the hash for the key lying around, use
797 fn search(&self, k: &K) -> Option<table::FullIndex> {
798 self.search_hashed(&self.make_hash(k), k)
802 impl<K: Eq + Hash<S>, V, S, H: Hasher<S>> Container for HashMap<K, V, H> {
803 /// Return the number of elements in the map
804 fn len(&self) -> uint { self.table.size() }
807 impl<K: Eq + Hash<S>, V, S, H: Hasher<S>> Mutable for HashMap<K, V, H> {
808 /// Clear the map, removing all key-value pairs.
809 fn clear(&mut self) {
810 self.minimum_capacity = self.table.size();
812 for i in range(0, self.table.capacity()) {
813 match self.table.peek(i) {
814 table::Empty(_) => {},
815 table::Full(idx) => { self.table.take(idx); }
822 impl<K: Eq + Hash<S>, V, S, H: Hasher<S>> Map<K, V> for HashMap<K, V, H> {
823 fn find<'a>(&'a self, k: &K) -> Option<&'a V> {
824 self.search(k).map(|idx| {
825 let (_, v) = self.table.read(&idx);
830 fn contains_key(&self, k: &K) -> bool {
831 self.search(k).is_some()
835 impl<K: Eq + Hash<S>, V, S, H: Hasher<S>> MutableMap<K, V> for HashMap<K, V, H> {
836 fn find_mut<'a>(&'a mut self, k: &K) -> Option<&'a mut V> {
837 match self.search(k) {
840 let (_, v) = self.table.read_mut(&idx);
846 fn swap(&mut self, k: K, v: V) -> Option<V> {
847 let hash = self.make_hash(&k);
848 let potential_new_size = self.table.size() + 1;
849 self.make_some_room(potential_new_size);
851 for dib in range_inclusive(0u, self.table.size()) {
852 let probe = self.probe(&hash, dib);
854 let idx = match self.table.peek(probe) {
855 table::Empty(idx) => {
857 self.table.put(idx, hash, k, v);
860 table::Full(idx) => idx
863 if idx.hash() == hash {
864 let (bucket_k, bucket_v) = self.table.read_mut(&idx);
866 // Found an existing value.
867 return Some(replace(bucket_v, v));
871 let probe_dib = self.bucket_distance(&idx);
874 // Found a luckier bucket. This implies that the key does not
875 // already exist in the hashtable. Just do a robin hood
877 self.robin_hood(idx, probe_dib, hash, k, v);
882 // We really shouldn't be here.
883 fail!("Internal HashMap error: Out of space.");
886 fn pop(&mut self, k: &K) -> Option<V> {
887 if self.table.size() == 0 {
891 let potential_new_size = self.table.size() - 1;
892 self.make_some_room(potential_new_size);
894 let starting_index = match self.search(k) {
899 let starting_probe = starting_index.raw_index();
902 let mut probe = self.probe_next(starting_probe);
903 for _ in range(0u, self.table.size()) {
904 match self.table.peek(probe) {
905 table::Empty(_) => {}, // empty bucket. this is the end of our shifting.
906 table::Full(idx) => {
907 // Bucket that isn't us, which has a non-zero probe distance.
908 // This isn't the ending index, so keep searching.
909 if self.bucket_distance(&idx) != 0 {
910 probe = self.probe_next(probe);
914 // if we do have a bucket_distance of zero, we're at the end
915 // of what we need to shift.
924 let (_, _, retval) = self.table.take(starting_index);
926 let mut probe = starting_probe;
927 let mut next_probe = self.probe_next(probe);
929 // backwards-shift all the elements after our newly-deleted one.
930 while next_probe != ending_probe {
931 match self.table.peek(next_probe) {
933 // nothing to shift in. just empty it out.
934 match self.table.peek(probe) {
935 table::Empty(_) => {},
936 table::Full(idx) => { self.table.take(idx); }
939 table::Full(next_idx) => {
940 // something to shift. move it over!
941 let next_hash = next_idx.hash();
942 let (_, next_key, next_val) = self.table.take(next_idx);
943 match self.table.peek(probe) {
944 table::Empty(idx) => {
945 self.table.put(idx, next_hash, next_key, next_val);
947 table::Full(idx) => {
948 let (emptyidx, _, _) = self.table.take(idx);
949 self.table.put(emptyidx, next_hash, next_key, next_val);
956 next_probe = self.probe_next(next_probe);
959 // Done the backwards shift, but there's still an element left!
961 match self.table.peek(probe) {
962 table::Empty(_) => {},
963 table::Full(idx) => { self.table.take(idx); }
966 // Now we're done all our shifting. Return the value we grabbed
972 impl<K: Hash + Eq, V> HashMap<K, V, sip::SipHasher> {
973 /// Create an empty HashMap.
974 pub fn new() -> HashMap<K, V, sip::SipHasher> {
975 HashMap::with_capacity(INITIAL_CAPACITY)
978 pub fn with_capacity(capacity: uint) -> HashMap<K, V, sip::SipHasher> {
979 let mut r = rand::task_rng();
982 let hasher = sip::SipHasher::new_with_keys(r0, r1);
983 HashMap::with_capacity_and_hasher(capacity, hasher)
987 impl<K: Eq + Hash<S>, V, S, H: Hasher<S>> HashMap<K, V, H> {
988 pub fn with_hasher(hasher: H) -> HashMap<K, V, H> {
989 HashMap::with_capacity_and_hasher(INITIAL_CAPACITY, hasher)
992 /// Create an empty HashMap with space for at least `capacity`
993 /// elements, using `hasher` to hash the keys.
995 /// Warning: `hasher` is normally randomly generated, and
996 /// is designed to allow HashMaps to be resistant to attacks that
997 /// cause many collisions and very poor performance. Setting it
998 /// manually using this function can expose a DoS attack vector.
999 pub fn with_capacity_and_hasher(capacity: uint, hasher: H) -> HashMap<K, V, H> {
1000 let cap = num::next_power_of_two(max(INITIAL_CAPACITY, capacity));
1003 load_factor: INITIAL_LOAD_FACTOR,
1004 grow_at: grow_at(cap, INITIAL_LOAD_FACTOR),
1005 minimum_capacity: cap,
1006 table: table::RawTable::new(cap),
1010 /// The hashtable will never try to shrink below this size. You can use
1011 /// this function to reduce reallocations if your hashtable frequently
1012 /// grows and shrinks by large amounts.
1014 /// This function has no effect on the operational semantics of the
1015 /// hashtable, only on performance.
1016 pub fn reserve(&mut self, new_minimum_capacity: uint) {
1017 let cap = num::next_power_of_two(
1018 max(INITIAL_CAPACITY, new_minimum_capacity));
1020 self.minimum_capacity = cap;
1022 if self.table.capacity() < cap {
1027 /// Resizes the internal vectors to a new capacity. It's your responsibility to:
1028 /// 1) Make sure the new capacity is enough for all the elements, accounting
1029 /// for the load factor.
1030 /// 2) Ensure new_capacity is a power of two.
1031 fn resize(&mut self, new_capacity: uint) {
1032 assert!(self.table.size() <= new_capacity);
1033 assert!((new_capacity - 1) & new_capacity == 0);
1035 self.grow_at = grow_at(new_capacity, self.load_factor);
1037 let old_table = replace(&mut self.table, table::RawTable::new(new_capacity));
1038 let old_size = old_table.size();
1040 for (h, k, v) in old_table.move_iter() {
1041 self.manual_insert_hashed_nocheck(h, k, v);
1044 assert_eq!(self.table.size(), old_size);
1047 /// Performs any necessary resize operations, such that there's space for
1048 /// new_size elements.
1049 fn make_some_room(&mut self, new_size: uint) {
1050 let should_shrink = new_size <= self.shrink_at();
1051 let should_grow = self.grow_at <= new_size;
1054 let new_capacity = self.table.capacity() << 1;
1055 self.resize(new_capacity);
1056 } else if should_shrink {
1057 let new_capacity = self.table.capacity() >> 1;
1059 // Never shrink below the minimum capacity
1060 if self.minimum_capacity <= new_capacity {
1061 self.resize(new_capacity);
1066 /// Perform robin hood bucket stealing at the given 'index'. You must
1067 /// also pass that probe's "distance to initial bucket" so we don't have
1068 /// to recalculate it, as well as the total number of probes already done
1069 /// so we have some sort of upper bound on the number of probes to do.
1071 /// 'hash', 'k', and 'v' are the elements to robin hood into the hashtable.
1072 fn robin_hood(&mut self, mut index: table::FullIndex, mut dib_param: uint,
1073 mut hash: table::SafeHash, mut k: K, mut v: V) {
1075 let (old_hash, old_key, old_val) = {
1076 let (old_hash_ref, old_key_ref, old_val_ref) =
1077 self.table.read_all_mut(&index);
1079 let old_hash = replace(old_hash_ref, hash);
1080 let old_key = replace(old_key_ref, k);
1081 let old_val = replace(old_val_ref, v);
1083 (old_hash, old_key, old_val)
1086 let mut probe = self.probe_next(index.raw_index());
1088 for dib in range(dib_param + 1, self.table.size()) {
1089 let full_index = match self.table.peek(probe) {
1090 table::Empty(idx) => {
1092 self.table.put(idx, old_hash, old_key, old_val);
1095 table::Full(idx) => idx
1098 let probe_dib = self.bucket_distance(&full_index);
1100 // Robin hood! Steal the spot.
1101 if probe_dib < dib {
1103 dib_param = probe_dib;
1110 probe = self.probe_next(probe);
1113 fail!("HashMap fatal error: 100% load factor?");
1117 /// Manually insert a pre-hashed key-value pair, without first checking
1118 /// that there's enough room in the buckets. Returns a reference to the
1119 /// newly insert value.
1121 /// If the key already exists, the hashtable will be returned untouched
1122 /// and a reference to the existing element will be returned.
1123 fn manual_insert_hashed_nocheck<'a>(
1124 &'a mut self, hash: table::SafeHash, k: K, v: V) -> &'a mut V {
1126 for dib in range_inclusive(0u, self.table.size()) {
1127 let probe = self.probe(&hash, dib);
1129 let idx = match self.table.peek(probe) {
1130 table::Empty(idx) => {
1132 let fullidx = self.table.put(idx, hash, k, v);
1133 let (_, val) = self.table.read_mut(&fullidx);
1136 table::Full(idx) => idx
1139 if idx.hash() == hash {
1140 let (bucket_k, bucket_v) = self.table.read_mut(&idx);
1141 // FIXME #12147 the conditional return confuses
1142 // borrowck if we return bucket_v directly
1143 let bv: *mut V = bucket_v;
1145 // Key already exists. Get its reference.
1146 return unsafe {&mut *bv};
1150 let probe_dib = self.bucket_distance(&idx);
1152 if probe_dib < dib {
1153 // Found a luckier bucket than me. Better steal his spot.
1154 self.robin_hood(idx, probe_dib, hash, k, v);
1156 // Now that it's stolen, just read the value's pointer
1157 // right out of the table!
1158 match self.table.peek(probe) {
1159 table::Empty(_) => fail!("Just stole a spot, but now that spot's empty."),
1160 table::Full(idx) => {
1161 let (_, v) = self.table.read_mut(&idx);
1168 // We really shouldn't be here.
1169 fail!("Internal HashMap error: Out of space.");
1172 fn manual_insert_hashed<'a>(&'a mut self, hash: table::SafeHash, k: K, v: V) -> &'a mut V {
1173 let potential_new_size = self.table.size() + 1;
1174 self.make_some_room(potential_new_size);
1175 self.manual_insert_hashed_nocheck(hash, k, v)
1178 /// Inserts an element, returning a reference to that element inside the
1180 fn manual_insert<'a>(&'a mut self, k: K, v: V) -> &'a mut V {
1181 let hash = self.make_hash(&k);
1182 self.manual_insert_hashed(hash, k, v)
1185 /// Return the value corresponding to the key in the map, or insert
1186 /// and return the value if it doesn't exist.
1187 pub fn find_or_insert<'a>(&'a mut self, k: K, v: V) -> &'a mut V {
1188 match self.search(&k) {
1190 let (_, v_ref) = self.table.read_mut(&idx);
1193 None => self.manual_insert(k, v)
1197 /// Return the value corresponding to the key in the map, or create,
1198 /// insert, and return a new value if it doesn't exist.
1199 pub fn find_or_insert_with<'a>(&'a mut self, k: K, f: |&K| -> V)
1201 match self.search(&k) {
1203 let (_, v_ref) = self.table.read_mut(&idx);
1208 self.manual_insert(k, v)
1213 /// Insert a key-value pair into the map if the key is not already present.
1214 /// Otherwise, modify the existing value for the key.
1215 /// Returns the new or modified value for the key.
1216 pub fn insert_or_update_with<'a>(
1222 match self.search(&k) {
1223 None => self.manual_insert(k, v),
1225 let (_, v_ref) = self.table.read_mut(&idx);
1232 /// Retrieves a value for the given key, failing if the key is not present.
1233 pub fn get<'a>(&'a self, k: &K) -> &'a V {
1234 match self.find(k) {
1236 None => fail!("No entry found for key: {:?}", k)
1240 /// Retrieves a (mutable) value for the given key, failing if the key is not present.
1241 pub fn get_mut<'a>(&'a mut self, k: &K) -> &'a mut V {
1242 match self.find_mut(k) {
1244 None => fail!("No entry found for key: {:?}", k)
1248 /// Return true if the map contains a value for the specified key,
1249 /// using equivalence.
1250 pub fn contains_key_equiv<Q: Hash<S> + Equiv<K>>(&self, key: &Q) -> bool {
1251 self.search_equiv(key).is_some()
1254 /// Return the value corresponding to the key in the map, using
1256 pub fn find_equiv<'a, Q: Hash<S> + Equiv<K>>(&'a self, k: &Q) -> Option<&'a V> {
1257 match self.search_equiv(k) {
1260 let (_, v_ref) = self.table.read(&idx);
1266 /// An iterator visiting all keys in arbitrary order.
1267 /// Iterator element type is &'a K.
1268 pub fn keys<'a>(&'a self) -> Keys<'a, K, V> {
1269 self.iter().map(|(k, _v)| k)
1272 /// An iterator visiting all values in arbitrary order.
1273 /// Iterator element type is &'a V.
1274 pub fn values<'a>(&'a self) -> Values<'a, K, V> {
1275 self.iter().map(|(_k, v)| v)
1278 /// An iterator visiting all key-value pairs in arbitrary order.
1279 /// Iterator element type is (&'a K, &'a V).
1280 pub fn iter<'a>(&'a self) -> Entries<'a, K, V> {
1284 /// An iterator visiting all key-value pairs in arbitrary order,
1285 /// with mutable references to the values.
1286 /// Iterator element type is (&'a K, &'a mut V).
1287 pub fn mut_iter<'a>(&'a mut self) -> MutEntries<'a, K, V> {
1288 self.table.mut_iter()
1291 /// Creates a consuming iterator, that is, one that moves each key-value
1292 /// pair out of the map in arbitrary order. The map cannot be used after
1294 pub fn move_iter(self) -> MoveEntries<K, V> {
1295 self.table.move_iter().map(|(_, k, v)| (k, v))
1299 impl<K: Eq + Hash<S>, V: Clone, S, H: Hasher<S>> HashMap<K, V, H> {
1300 /// Like `find`, but returns a copy of the value.
1301 pub fn find_copy(&self, k: &K) -> Option<V> {
1302 self.find(k).map(|v| (*v).clone())
1305 /// Like `get`, but returns a copy of the value.
1306 pub fn get_copy(&self, k: &K) -> V {
1307 (*self.get(k)).clone()
1311 impl<K: Eq + Hash<S>, V: Eq, S, H: Hasher<S>> Eq for HashMap<K, V, H> {
1312 fn eq(&self, other: &HashMap<K, V, H>) -> bool {
1313 if self.len() != other.len() { return false; }
1315 self.iter().all(|(key, value)| {
1316 match other.find(key) {
1318 Some(v) => *value == *v
1324 impl<K: Eq + Hash<S> + Show, V: Show, S, H: Hasher<S>> Show for HashMap<K, V, H> {
1325 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1326 try!(write!(f.buf, r"\{"));
1328 for (i, (k, v)) in self.iter().enumerate() {
1329 if i != 0 { try!(write!(f.buf, ", ")); }
1330 try!(write!(f.buf, "{}: {}", *k, *v));
1333 write!(f.buf, r"\}")
1337 impl<K: Eq + Hash<S>, V, S, H: Hasher<S> + Default> Default for HashMap<K, V, H> {
1338 fn default() -> HashMap<K, V, H> {
1339 HashMap::with_capacity_and_hasher(INITIAL_CAPACITY, Default::default())
1343 /// HashMap iterator
1344 pub type Entries<'a, K, V> = table::Entries<'a, K, V>;
1346 /// HashMap mutable values iterator
1347 pub type MutEntries<'a, K, V> = table::MutEntries<'a, K, V>;
1349 /// HashMap move iterator
1350 pub type MoveEntries<K, V> =
1351 iter::Map<'static, (table::SafeHash, K, V), (K, V), table::MoveEntries<K, V>>;
1353 /// HashMap keys iterator
1354 pub type Keys<'a, K, V> =
1355 iter::Map<'static, (&'a K, &'a V), &'a K, Entries<'a, K, V>>;
1357 /// HashMap values iterator
1358 pub type Values<'a, K, V> =
1359 iter::Map<'static, (&'a K, &'a V), &'a V, Entries<'a, K, V>>;
1361 impl<K: Eq + Hash<S>, V, S, H: Hasher<S> + Default> FromIterator<(K, V)> for HashMap<K, V, H> {
1362 fn from_iterator<T: Iterator<(K, V)>>(iter: &mut T) -> HashMap<K, V, H> {
1363 let (lower, _) = iter.size_hint();
1364 let mut map = HashMap::with_capacity_and_hasher(lower, Default::default());
1370 impl<K: Eq + Hash<S>, V, S, H: Hasher<S> + Default> Extendable<(K, V)> for HashMap<K, V, H> {
1371 fn extend<T: Iterator<(K, V)>>(&mut self, iter: &mut T) {
1372 for (k, v) in *iter {
1378 /// HashSet iterator
1379 pub type SetItems<'a, K> =
1380 iter::Map<'static, (&'a K, &'a ()), &'a K, Entries<'a, K, ()>>;
1382 /// HashSet move iterator
1383 pub type SetMoveItems<K> =
1384 iter::Map<'static, (K, ()), K, MoveEntries<K, ()>>;
1386 /// An implementation of a hash set using the underlying representation of a
1387 /// HashMap where the value is (). As with the `HashMap` type, a `HashSet`
1388 /// requires that the elements implement the `Eq` and `Hash` traits.
1390 pub struct HashSet<T, H = sip::SipHasher> {
1391 priv map: HashMap<T, (), H>
1394 impl<T: Eq + Hash<S>, S, H: Hasher<S>> Eq for HashSet<T, H> {
1395 // FIXME #11998: Since the value is a (), and `find` returns a Some(&()),
1396 // we trigger #11998 when matching on it. I've fallen back to manual
1397 // iteration until this is fixed.
1398 fn eq(&self, other: &HashSet<T, H>) -> bool {
1399 if self.len() != other.len() { return false; }
1401 self.iter().all(|key| other.map.contains_key(key))
1405 impl<T: Eq + Hash<S>, S, H: Hasher<S>> Container for HashSet<T, H> {
1406 /// Return the number of elements in the set
1407 fn len(&self) -> uint { self.map.len() }
1410 impl<T: Eq + Hash<S>, S, H: Hasher<S>> Mutable for HashSet<T, H> {
1411 /// Clear the set, removing all values.
1412 fn clear(&mut self) { self.map.clear() }
1415 impl<T: Eq + Hash<S>, S, H: Hasher<S>> Set<T> for HashSet<T, H> {
1416 /// Return true if the set contains a value
1417 fn contains(&self, value: &T) -> bool { self.map.search(value).is_some() }
1419 /// Return true if the set has no elements in common with `other`.
1420 /// This is equivalent to checking for an empty intersection.
1421 fn is_disjoint(&self, other: &HashSet<T, H>) -> bool {
1422 self.iter().all(|v| !other.contains(v))
1425 /// Return true if the set is a subset of another
1426 fn is_subset(&self, other: &HashSet<T, H>) -> bool {
1427 self.iter().all(|v| other.contains(v))
1430 /// Return true if the set is a superset of another
1431 fn is_superset(&self, other: &HashSet<T, H>) -> bool {
1432 other.is_subset(self)
1436 impl<T: Eq + Hash<S>, S, H: Hasher<S>> MutableSet<T> for HashSet<T, H> {
1437 /// Add a value to the set. Return true if the value was not already
1438 /// present in the set.
1439 fn insert(&mut self, value: T) -> bool { self.map.insert(value, ()) }
1441 /// Remove a value from the set. Return true if the value was
1442 /// present in the set.
1443 fn remove(&mut self, value: &T) -> bool { self.map.remove(value) }
1446 impl<T: Hash + Eq> HashSet<T, sip::SipHasher> {
1447 /// Create an empty HashSet
1448 pub fn new() -> HashSet<T, sip::SipHasher> {
1449 HashSet::with_capacity(INITIAL_CAPACITY)
1452 /// Create an empty HashSet with space for at least `n` elements in
1454 pub fn with_capacity(capacity: uint) -> HashSet<T, sip::SipHasher> {
1455 HashSet { map: HashMap::with_capacity(capacity) }
1459 impl<T: Eq + Hash<S>, S, H: Hasher<S>> HashSet<T, H> {
1460 pub fn with_hasher(hasher: H) -> HashSet<T, H> {
1461 HashSet::with_capacity_and_hasher(INITIAL_CAPACITY, hasher)
1464 /// Create an empty HashSet with space for at least `capacity`
1465 /// elements in the hash table, using `hasher` to hash the keys.
1467 /// Warning: `hasher` is normally randomly generated, and
1468 /// is designed to allow `HashSet`s to be resistant to attacks that
1469 /// cause many collisions and very poor performance. Setting it
1470 /// manually using this function can expose a DoS attack vector.
1471 pub fn with_capacity_and_hasher(capacity: uint, hasher: H) -> HashSet<T, H> {
1472 HashSet { map: HashMap::with_capacity_and_hasher(capacity, hasher) }
1475 /// Reserve space for at least `n` elements in the hash table.
1476 pub fn reserve(&mut self, n: uint) {
1480 /// Returns true if the hash set contains a value equivalent to the
1481 /// given query value.
1482 pub fn contains_equiv<Q: Hash<S> + Equiv<T>>(&self, value: &Q) -> bool {
1483 self.map.contains_key_equiv(value)
1486 /// An iterator visiting all elements in arbitrary order.
1487 /// Iterator element type is &'a T.
1488 pub fn iter<'a>(&'a self) -> SetItems<'a, T> {
1492 /// Creates a consuming iterator, that is, one that moves each value out
1493 /// of the set in arbitrary order. The set cannot be used after calling
1495 pub fn move_iter(self) -> SetMoveItems<T> {
1496 self.map.move_iter().map(|(k, _)| k)
1499 /// Visit the values representing the difference
1500 pub fn difference<'a>(&'a self, other: &'a HashSet<T, H>) -> SetAlgebraItems<'a, T, H> {
1503 .filter_map(|(other, elt)| {
1504 if !other.contains(elt) { Some(elt) } else { None }
1508 /// Visit the values representing the symmetric difference
1509 pub fn symmetric_difference<'a>(&'a self, other: &'a HashSet<T, H>)
1510 -> Chain<SetAlgebraItems<'a, T, H>, SetAlgebraItems<'a, T, H>> {
1511 self.difference(other).chain(other.difference(self))
1514 /// Visit the values representing the intersection
1515 pub fn intersection<'a>(&'a self, other: &'a HashSet<T, H>)
1516 -> SetAlgebraItems<'a, T, H> {
1519 .filter_map(|(other, elt)| {
1520 if other.contains(elt) { Some(elt) } else { None }
1524 /// Visit the values representing the union
1525 pub fn union<'a>(&'a self, other: &'a HashSet<T, H>)
1526 -> Chain<SetItems<'a, T>, SetAlgebraItems<'a, T, H>> {
1527 self.iter().chain(other.difference(self))
1532 impl<T: Eq + Hash<S> + fmt::Show, S, H: Hasher<S>> fmt::Show for HashSet<T, H> {
1533 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1534 try!(write!(f.buf, r"\{"));
1536 for (i, x) in self.iter().enumerate() {
1537 if i != 0 { try!(write!(f.buf, ", ")); }
1538 try!(write!(f.buf, "{}", *x));
1541 write!(f.buf, r"\}")
1545 impl<T: Eq + Hash<S>, S, H: Hasher<S> + Default> FromIterator<T> for HashSet<T, H> {
1546 fn from_iterator<I: Iterator<T>>(iter: &mut I) -> HashSet<T, H> {
1547 let (lower, _) = iter.size_hint();
1548 let mut set = HashSet::with_capacity_and_hasher(lower, Default::default());
1554 impl<T: Eq + Hash<S>, S, H: Hasher<S> + Default> Extendable<T> for HashSet<T, H> {
1555 fn extend<I: Iterator<T>>(&mut self, iter: &mut I) {
1562 impl<T: Eq + Hash> Default for HashSet<T, sip::SipHasher> {
1563 fn default() -> HashSet<T> { HashSet::new() }
1566 // `Repeat` is used to feed the filter closure an explicit capture
1567 // of a reference to the other set
1568 /// Set operations iterator
1569 pub type SetAlgebraItems<'a, T, H> =
1570 FilterMap<'static, (&'a HashSet<T, H>, &'a T), &'a T,
1571 Zip<Repeat<&'a HashSet<T, H>>, SetItems<'a, T>>>;
1576 use std::iter::{Iterator,range_inclusive,range_step_inclusive};
1577 use std::local_data;
1581 fn test_create_capacity_zero() {
1582 let mut m = HashMap::with_capacity(0);
1584 assert!(m.insert(1, 1));
1586 assert!(m.contains_key(&1));
1587 assert!(!m.contains_key(&0));
1592 let mut m = HashMap::new();
1593 assert_eq!(m.len(), 0);
1594 assert!(m.insert(1, 2));
1595 assert_eq!(m.len(), 1);
1596 assert!(m.insert(2, 4));
1597 assert_eq!(m.len(), 2);
1598 assert_eq!(*m.find(&1).unwrap(), 2);
1599 assert_eq!(*m.find(&2).unwrap(), 4);
1602 local_data_key!(drop_vector: vec::Vec<int>)
1604 #[deriving(Hash, Eq)]
1611 fn new(k: int) -> Dropable {
1612 local_data::get_mut(drop_vector,
1613 |v| { v.unwrap().as_mut_slice()[k] += 1; });
1619 impl Drop for Dropable {
1620 fn drop(&mut self) {
1621 local_data::get_mut(drop_vector, |v|
1622 { v.unwrap().as_mut_slice()[self.k] -= 1; });
1628 local_data::set(drop_vector, vec::Vec::from_elem(200, 0));
1631 let mut m = HashMap::new();
1633 local_data::get(drop_vector, |v| {
1634 for i in range(0, 200) {
1635 assert_eq!(v.unwrap().as_slice()[i], 0);
1639 for i in range(0, 100) {
1640 let d1 = Dropable::new(i);
1641 let d2 = Dropable::new(i+100);
1645 local_data::get(drop_vector, |v| {
1646 for i in range(0, 200) {
1647 assert_eq!(v.unwrap().as_slice()[i], 1);
1651 for i in range(0, 50) {
1652 let k = Dropable::new(i);
1655 assert!(v.is_some());
1657 local_data::get(drop_vector, |v| {
1658 assert_eq!(v.unwrap().as_slice()[i], 1);
1659 assert_eq!(v.unwrap().as_slice()[i+100], 1);
1663 local_data::get(drop_vector, |v| {
1664 for i in range(0, 50) {
1665 assert_eq!(v.unwrap().as_slice()[i], 0);
1666 assert_eq!(v.unwrap().as_slice()[i+100], 0);
1669 for i in range(50, 100) {
1670 assert_eq!(v.unwrap().as_slice()[i], 1);
1671 assert_eq!(v.unwrap().as_slice()[i+100], 1);
1676 local_data::get(drop_vector, |v| {
1677 for i in range(0, 200) {
1678 assert_eq!(v.unwrap().as_slice()[i], 0);
1684 fn test_empty_pop() {
1685 let mut m: HashMap<int, bool> = HashMap::new();
1686 assert_eq!(m.pop(&0), None);
1690 fn test_lots_of_insertions() {
1691 let mut m = HashMap::new();
1693 // Try this a few times to make sure we never screw up the hashmap's
1695 for _ in range(0, 10) {
1696 assert!(m.is_empty());
1698 for i in range_inclusive(1, 1000) {
1699 assert!(m.insert(i, i));
1701 for j in range_inclusive(1, i) {
1703 assert_eq!(r, Some(&j));
1706 for j in range_inclusive(i+1, 1000) {
1708 assert_eq!(r, None);
1712 for i in range_inclusive(1001, 2000) {
1713 assert!(!m.contains_key(&i));
1717 for i in range_inclusive(1, 1000) {
1718 assert!(m.remove(&i));
1720 for j in range_inclusive(1, i) {
1721 assert!(!m.contains_key(&j));
1724 for j in range_inclusive(i+1, 1000) {
1725 assert!(m.contains_key(&j));
1729 for i in range_inclusive(1, 1000) {
1730 assert!(!m.contains_key(&i));
1733 for i in range_inclusive(1, 1000) {
1734 assert!(m.insert(i, i));
1738 for i in range_step_inclusive(1000, 1, -1) {
1739 assert!(m.remove(&i));
1741 for j in range_inclusive(i, 1000) {
1742 assert!(!m.contains_key(&j));
1745 for j in range_inclusive(1, i-1) {
1746 assert!(m.contains_key(&j));
1753 fn test_find_mut() {
1754 let mut m = HashMap::new();
1755 assert!(m.insert(1, 12));
1756 assert!(m.insert(2, 8));
1757 assert!(m.insert(5, 14));
1759 match m.find_mut(&5) {
1760 None => fail!(), Some(x) => *x = new
1762 assert_eq!(m.find(&5), Some(&new));
1766 fn test_insert_overwrite() {
1767 let mut m = HashMap::new();
1768 assert!(m.insert(1, 2));
1769 assert_eq!(*m.find(&1).unwrap(), 2);
1770 assert!(!m.insert(1, 3));
1771 assert_eq!(*m.find(&1).unwrap(), 3);
1775 fn test_insert_conflicts() {
1776 let mut m = HashMap::with_capacity(4);
1777 assert!(m.insert(1, 2));
1778 assert!(m.insert(5, 3));
1779 assert!(m.insert(9, 4));
1780 assert_eq!(*m.find(&9).unwrap(), 4);
1781 assert_eq!(*m.find(&5).unwrap(), 3);
1782 assert_eq!(*m.find(&1).unwrap(), 2);
1786 fn test_conflict_remove() {
1787 let mut m = HashMap::with_capacity(4);
1788 assert!(m.insert(1, 2));
1789 assert_eq!(*m.find(&1).unwrap(), 2);
1790 assert!(m.insert(5, 3));
1791 assert_eq!(*m.find(&1).unwrap(), 2);
1792 assert_eq!(*m.find(&5).unwrap(), 3);
1793 assert!(m.insert(9, 4));
1794 assert_eq!(*m.find(&1).unwrap(), 2);
1795 assert_eq!(*m.find(&5).unwrap(), 3);
1796 assert_eq!(*m.find(&9).unwrap(), 4);
1797 assert!(m.remove(&1));
1798 assert_eq!(*m.find(&9).unwrap(), 4);
1799 assert_eq!(*m.find(&5).unwrap(), 3);
1803 fn test_is_empty() {
1804 let mut m = HashMap::with_capacity(4);
1805 assert!(m.insert(1, 2));
1806 assert!(!m.is_empty());
1807 assert!(m.remove(&1));
1808 assert!(m.is_empty());
1813 let mut m = HashMap::new();
1815 assert_eq!(m.pop(&1), Some(2));
1816 assert_eq!(m.pop(&1), None);
1821 let mut m = HashMap::new();
1822 assert_eq!(m.swap(1, 2), None);
1823 assert_eq!(m.swap(1, 3), Some(2));
1824 assert_eq!(m.swap(1, 4), Some(3));
1828 fn test_move_iter() {
1830 let mut hm = HashMap::new();
1838 let v = hm.move_iter().collect::<~[(char, int)]>();
1839 assert!([('a', 1), ('b', 2)] == v || [('b', 2), ('a', 1)] == v);
1844 let mut m = HashMap::with_capacity(4);
1845 for i in range(0u, 32) {
1846 assert!(m.insert(i, i*2));
1848 assert_eq!(m.len(), 32);
1850 let mut observed = 0;
1852 for (k, v) in m.iter() {
1853 assert_eq!(*v, *k * 2);
1854 observed |= 1 << *k;
1856 assert_eq!(observed, 0xFFFF_FFFF);
1861 let vec = ~[(1, 'a'), (2, 'b'), (3, 'c')];
1862 let map = vec.move_iter().collect::<HashMap<int, char>>();
1863 let keys = map.keys().map(|&k| k).collect::<~[int]>();
1864 assert_eq!(keys.len(), 3);
1865 assert!(keys.contains(&1));
1866 assert!(keys.contains(&2));
1867 assert!(keys.contains(&3));
1872 let vec = ~[(1, 'a'), (2, 'b'), (3, 'c')];
1873 let map = vec.move_iter().collect::<HashMap<int, char>>();
1874 let values = map.values().map(|&v| v).collect::<~[char]>();
1875 assert_eq!(values.len(), 3);
1876 assert!(values.contains(&'a'));
1877 assert!(values.contains(&'b'));
1878 assert!(values.contains(&'c'));
1883 let mut m = HashMap::new();
1884 assert!(m.find(&1).is_none());
1888 Some(v) => assert!(*v == 2)
1894 let mut m1 = HashMap::new();
1899 let mut m2 = HashMap::new();
1912 let mut m = HashMap::new();
1914 assert_eq!(m.len(), 0);
1915 assert!(m.is_empty());
1918 let old_resize_at = m.grow_at;
1919 while old_resize_at == m.grow_at {
1924 assert_eq!(m.len(), i);
1925 assert!(!m.is_empty());
1929 fn test_find_equiv() {
1930 let mut m = HashMap::new();
1932 let (foo, bar, baz) = (1,2,3);
1933 m.insert(~"foo", foo);
1934 m.insert(~"bar", bar);
1935 m.insert(~"baz", baz);
1938 assert_eq!(m.find_equiv(&("foo")), Some(&foo));
1939 assert_eq!(m.find_equiv(&("bar")), Some(&bar));
1940 assert_eq!(m.find_equiv(&("baz")), Some(&baz));
1942 assert_eq!(m.find_equiv(&("qux")), None);
1946 fn test_from_iter() {
1947 let xs = ~[(1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6)];
1949 let map: HashMap<int, int> = xs.iter().map(|&x| x).collect();
1951 for &(k, v) in xs.iter() {
1952 assert_eq!(map.find(&k), Some(&v));
1960 use std::container::Container;
1961 use std::slice::ImmutableEqVector;
1964 fn test_disjoint() {
1965 let mut xs = HashSet::new();
1966 let mut ys = HashSet::new();
1967 assert!(xs.is_disjoint(&ys));
1968 assert!(ys.is_disjoint(&xs));
1969 assert!(xs.insert(5));
1970 assert!(ys.insert(11));
1971 assert!(xs.is_disjoint(&ys));
1972 assert!(ys.is_disjoint(&xs));
1973 assert!(xs.insert(7));
1974 assert!(xs.insert(19));
1975 assert!(xs.insert(4));
1976 assert!(ys.insert(2));
1977 assert!(ys.insert(-11));
1978 assert!(xs.is_disjoint(&ys));
1979 assert!(ys.is_disjoint(&xs));
1980 assert!(ys.insert(7));
1981 assert!(!xs.is_disjoint(&ys));
1982 assert!(!ys.is_disjoint(&xs));
1986 fn test_subset_and_superset() {
1987 let mut a = HashSet::new();
1988 assert!(a.insert(0));
1989 assert!(a.insert(5));
1990 assert!(a.insert(11));
1991 assert!(a.insert(7));
1993 let mut b = HashSet::new();
1994 assert!(b.insert(0));
1995 assert!(b.insert(7));
1996 assert!(b.insert(19));
1997 assert!(b.insert(250));
1998 assert!(b.insert(11));
1999 assert!(b.insert(200));
2001 assert!(!a.is_subset(&b));
2002 assert!(!a.is_superset(&b));
2003 assert!(!b.is_subset(&a));
2004 assert!(!b.is_superset(&a));
2006 assert!(b.insert(5));
2008 assert!(a.is_subset(&b));
2009 assert!(!a.is_superset(&b));
2010 assert!(!b.is_subset(&a));
2011 assert!(b.is_superset(&a));
2016 let mut a = HashSet::new();
2017 for i in range(0u, 32) {
2018 assert!(a.insert(i));
2020 let mut observed = 0;
2022 observed |= 1 << *k;
2024 assert_eq!(observed, 0xFFFF_FFFF);
2028 fn test_intersection() {
2029 let mut a = HashSet::new();
2030 let mut b = HashSet::new();
2032 assert!(a.insert(11));
2033 assert!(a.insert(1));
2034 assert!(a.insert(3));
2035 assert!(a.insert(77));
2036 assert!(a.insert(103));
2037 assert!(a.insert(5));
2038 assert!(a.insert(-5));
2040 assert!(b.insert(2));
2041 assert!(b.insert(11));
2042 assert!(b.insert(77));
2043 assert!(b.insert(-9));
2044 assert!(b.insert(-42));
2045 assert!(b.insert(5));
2046 assert!(b.insert(3));
2049 let expected = [3, 5, 11, 77];
2050 for x in a.intersection(&b) {
2051 assert!(expected.contains(x));
2054 assert_eq!(i, expected.len());
2058 fn test_difference() {
2059 let mut a = HashSet::new();
2060 let mut b = HashSet::new();
2062 assert!(a.insert(1));
2063 assert!(a.insert(3));
2064 assert!(a.insert(5));
2065 assert!(a.insert(9));
2066 assert!(a.insert(11));
2068 assert!(b.insert(3));
2069 assert!(b.insert(9));
2072 let expected = [1, 5, 11];
2073 for x in a.difference(&b) {
2074 assert!(expected.contains(x));
2077 assert_eq!(i, expected.len());
2081 fn test_symmetric_difference() {
2082 let mut a = HashSet::new();
2083 let mut b = HashSet::new();
2085 assert!(a.insert(1));
2086 assert!(a.insert(3));
2087 assert!(a.insert(5));
2088 assert!(a.insert(9));
2089 assert!(a.insert(11));
2091 assert!(b.insert(-2));
2092 assert!(b.insert(3));
2093 assert!(b.insert(9));
2094 assert!(b.insert(14));
2095 assert!(b.insert(22));
2098 let expected = [-2, 1, 5, 11, 14, 22];
2099 for x in a.symmetric_difference(&b) {
2100 assert!(expected.contains(x));
2103 assert_eq!(i, expected.len());
2108 let mut a = HashSet::new();
2109 let mut b = HashSet::new();
2111 assert!(a.insert(1));
2112 assert!(a.insert(3));
2113 assert!(a.insert(5));
2114 assert!(a.insert(9));
2115 assert!(a.insert(11));
2116 assert!(a.insert(16));
2117 assert!(a.insert(19));
2118 assert!(a.insert(24));
2120 assert!(b.insert(-2));
2121 assert!(b.insert(1));
2122 assert!(b.insert(5));
2123 assert!(b.insert(9));
2124 assert!(b.insert(13));
2125 assert!(b.insert(19));
2128 let expected = [-2, 1, 3, 5, 9, 11, 13, 16, 19, 24];
2129 for x in a.union(&b) {
2130 assert!(expected.contains(x));
2133 assert_eq!(i, expected.len());
2137 fn test_from_iter() {
2138 let xs = ~[1, 2, 3, 4, 5, 6, 7, 8, 9];
2140 let set: HashSet<int> = xs.iter().map(|&x| x).collect();
2142 for x in xs.iter() {
2143 assert!(set.contains(x));
2148 fn test_move_iter() {
2150 let mut hs = HashSet::new();
2158 let v = hs.move_iter().collect::<~[char]>();
2159 assert!(['a', 'b'] == v || ['b', 'a'] == v);
2164 // These constants once happened to expose a bug in insert().
2165 // I'm keeping them around to prevent a regression.
2166 let mut s1 = HashSet::new();
2172 let mut s2 = HashSet::new();
2186 let mut set: HashSet<int> = HashSet::new();
2187 let empty: HashSet<int> = HashSet::new();
2192 let set_str = format!("{}", set);
2194 assert!(set_str == ~"{1, 2}" || set_str == ~"{2, 1}");
2195 assert_eq!(format!("{}", empty), ~"{}");
2202 use self::test::BenchHarness;
2203 use std::iter::{range_inclusive};
2206 fn insert(b: &mut BenchHarness) {
2209 let mut m = HashMap::new();
2211 for i in range_inclusive(1, 1000) {
2224 fn find_existing(b: &mut BenchHarness) {
2227 let mut m = HashMap::new();
2229 for i in range_inclusive(1, 1000) {
2234 m.contains_key(&412);
2239 fn find_nonexisting(b: &mut BenchHarness) {
2242 let mut m = HashMap::new();
2244 for i in range_inclusive(1, 1000) {
2249 m.contains_key(&2048);
2254 fn hashmap_as_queue(b: &mut BenchHarness) {
2257 let mut m = HashMap::new();
2259 for i in range_inclusive(1, 1000) {
2267 m.insert(k + 1000, k + 1000);
2273 fn find_pop_insert(b: &mut BenchHarness) {
2276 let mut m = HashMap::new();
2278 for i in range_inclusive(1, 1000) {
2286 m.find(&(k + 2000));
2288 m.insert(k + 1000, k + 1000);