src/libcollections/hashmap.rs

   1 // Copyright 2014 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10
  11 //! Unordered containers, implemented as hash-tables (`HashSet` and `HashMap` types)
  12
  13 use std::container::{Container, Mutable, Map, MutableMap, Set, MutableSet};
  14 use std::clone::Clone;
  15 use std::cmp::{Eq, TotalEq, Equiv, max};
  16 use std::default::Default;
  17 use std::fmt;
  18 use std::fmt::Show;
  19 use std::hash::{Hash, Hasher, sip};
  20 use std::iter;
  21 use std::iter::{Iterator, FromIterator, Extendable};
  22 use std::iter::{FilterMap, Chain, Repeat, Zip};
  23 use std::iter::{range, range_inclusive};
  24 use std::mem::replace;
  25 use std::num;
  26 use std::option::{Option, Some, None};
  27 use rand;
  28 use rand::Rng;
  29 use std::result::{Ok, Err};
  30 use std::slice::ImmutableVector;
  31
  32 mod table {
  33     extern crate libc;
  34
  35     use std::clone::Clone;
  36     use std::cmp::Eq;
  37     use std::hash::{Hash, Hasher};
  38     use std::kinds::marker;
  39     use std::num::CheckedMul;
  40     use std::option::{Option, Some, None};
  41     use std::prelude::Drop;
  42     use std::ptr;
  43     use std::ptr::RawPtr;
  44     use std::rt::global_heap;
  45     use std::intrinsics::{size_of, transmute, move_val_init};
  46     use std::iter::{Iterator, range_step_inclusive};
  47
  48     static EMPTY_BUCKET: u64 = 0u64;
  49
  50     /// The raw hashtable, providing safe-ish access to the unzipped and highly
  51     /// optimized arrays of hashes, keys, and values.
  52     ///
  53     /// This design uses less memory and is a lot faster than the naive
  54     /// `~[Option<u64, K, V>]`, because we don't pay for the overhead of an
  55     /// option on every element, and we get a generally more cache-aware design.
  56     ///
  57     /// Key invariants of this structure:
  58     ///
  59     ///   - if hashes[i] == EMPTY_BUCKET, then keys[i] and vals[i] have
  60     ///     'undefined' contents. Don't read from them. This invariant is
  61     ///     enforced outside this module with the [EmptyIndex], [FullIndex],
  62     ///     and [SafeHash] types/concepts.
  63     ///
  64     ///   - An `EmptyIndex` is only constructed for a bucket at an index with
  65     ///     a hash of EMPTY_BUCKET.
  66     ///
  67     ///   - A `FullIndex` is only constructed for a bucket at an index with a
  68     ///     non-EMPTY_BUCKET hash.
  69     ///
  70     ///   - A `SafeHash` is only constructed for non-`EMPTY_BUCKET` hash. We get
  71     ///     around hashes of zero by changing them to 0x800_0000, which will
  72     ///     likely hash to the same bucket, but not be represented as "empty".
  73     ///
  74     ///   - All three "arrays represented by pointers" are the same length:
  75     ///     `capacity`. This is set at creation and never changes. The arrays
  76     ///     are unzipped to save space (we don't have to pay for the padding
  77     ///     between odd sized elements, such as in a map from u64 to u8), and
  78     ///     be more cache aware (scanning through 8 hashes brings in 2 cache
  79     ///     lines, since they're all right beside each other).
  80     ///
  81     /// You can kind of think of this module/data structure as a safe wrapper
  82     /// around just the "table" part of the hashtable. It enforces some
  83     /// invariants at the type level and employs some performance trickery,
  84     /// but in general is just a tricked out `Vec<Option<u64, K, V>>`.
  85     ///
  86     /// FIXME(cgaebel):
  87     ///
  88     /// Feb 11, 2014: This hashtable was just implemented, and, hard as I tried,
  89     /// isn't yet totally safe. There's a "known exploit" that you can create
  90     /// multiple FullIndexes for a bucket, `take` one, and then still `take`
  91     /// the other causing undefined behavior. Currently, there's no story
  92     /// for how to protect against this statically. Therefore, there are asserts
  93     /// on `take`, `get`, `get_mut`, and `put` which check the bucket state.
  94     /// With time, and when we're confident this works correctly, they should
  95     /// be removed. Also, the bounds check in `peek` is especially painful,
  96     /// as that's called in the innermost loops of the hashtable and has the
  97     /// potential to be a major performance drain. Remove this too.
  98     ///
  99     /// Or, better than remove, only enable these checks for debug builds.
 100     /// There's currently no "debug-only" asserts in rust, so if you're reading
 101     /// this and going "what? of course there are debug-only asserts!", then
 102     /// please make this use them!
 103     pub struct RawTable<K, V> {
 104         capacity: uint,
 105         size:     uint,
 106         hashes:   *mut u64,
 107         keys:     *mut K,
 108         vals:     *mut V,
 109     }
 110
 111     /// Represents an index into a `RawTable` with no key or value in it.
 112     pub struct EmptyIndex {
 113         idx:   int,
 114         nocopy: marker::NoCopy,
 115     }
 116
 117     /// Represents an index into a `RawTable` with a key, value, and hash
 118     /// in it.
 119     pub struct FullIndex {
 120         idx:   int,
 121         hash:  SafeHash,
 122         nocopy: marker::NoCopy,
 123     }
 124
 125     impl FullIndex {
 126         /// Since we get the hash for free whenever we check the bucket state,
 127         /// this function is provided for fast access, letting us avoid making
 128         /// redundant trips back to the hashtable.
 129         pub fn hash(&self) -> SafeHash { self.hash }
 130
 131         /// Same comment as with `hash`.
 132         pub fn raw_index(&self) -> uint { self.idx as uint }
 133     }
 134
 135     /// Represents the state of a bucket: it can either have a key/value
 136     /// pair (be full) or not (be empty). You cannot `take` empty buckets,
 137     /// and you cannot `put` into full buckets.
 138     pub enum BucketState {
 139         Empty(EmptyIndex),
 140         Full(FullIndex),
 141     }
 142
 143     /// A hash that is not zero, since we use that to represent empty buckets.
 144     #[deriving(Eq)]
 145     pub struct SafeHash {
 146         hash: u64,
 147     }
 148
 149     impl SafeHash {
 150         /// Peek at the hash value, which is guaranteed to be non-zero.
 151         pub fn inspect(&self) -> u64 { self.hash }
 152     }
 153
 154     /// We need to remove hashes of 0. That's reserved for empty buckets.
 155     /// This function wraps up `hash_keyed` to be the only way outside this
 156     /// module to generate a SafeHash.
 157     pub fn make_hash<T: Hash<S>, S, H: Hasher<S>>(hasher: &H, t: &T) -> SafeHash {
 158         match hasher.hash(t) {
 159             // This constant is exceedingly likely to hash to the same
 160             // bucket, but it won't be counted as empty!
 161             EMPTY_BUCKET => SafeHash { hash: 0x8000_0000_0000_0000 },
 162             h            => SafeHash { hash: h },
 163         }
 164     }
 165
 166     impl<K, V> RawTable<K, V> {
 167
 168         /// Does not initialize the buckets. The caller should ensure they,
 169         /// at the very least, set every hash to EMPTY_BUCKET.
 170         unsafe fn new_uninitialized(capacity: uint) -> RawTable<K, V> {
 171             let hashes_size =
 172                 capacity.checked_mul(&size_of::<u64>()).expect("capacity overflow");
 173             let keys_size   =
 174                 capacity.checked_mul(&size_of::< K >()).expect("capacity overflow");
 175             let vals_size   =
 176                 capacity.checked_mul(&size_of::< V >()).expect("capacity overflow");
 177
 178             /*
 179             The following code was my first pass at making RawTable only
 180             allocate a single buffer, since that's all it needs. There's
 181             no logical reason for this to require three calls to malloc.
 182
 183             However, I'm not convinced the code below is correct. If you
 184             want to take a stab at it, please do! The alignment is
 185             especially tricky to get right, especially if you need more
 186             alignment than malloc guarantees.
 187
 188             let hashes_offset = 0;
 189             let keys_offset   = align_size(hashes_offset + hashes_size, keys_align);
 190             let vals_offset   = align_size(keys_offset + keys_size, vals_align);
 191             let end = vals_offset + vals_size;
 192
 193             let buffer = global_heap::malloc_raw(end);
 194
 195             let hashes = buffer.offset(hashes_offset) as *mut u64;
 196             let keys   = buffer.offset(keys_offset)   as *mut K;
 197             let vals   = buffer.offset(vals_offset)   as *mut V;
 198
 199             */
 200
 201             let hashes = global_heap::malloc_raw(hashes_size) as *mut u64;
 202             let keys   = global_heap::malloc_raw(keys_size)   as *mut K;
 203             let vals   = global_heap::malloc_raw(vals_size)   as *mut V;
 204
 205             RawTable {
 206                 capacity: capacity,
 207                 size:     0,
 208                 hashes:   hashes,
 209                 keys:     keys,
 210                 vals:     vals,
 211             }
 212         }
 213
 214
 215
 216         /// Creates a new raw table from a given capacity. All buckets are
 217         /// initially empty.
 218         pub fn new(capacity: uint) -> RawTable<K, V> {
 219             unsafe {
 220                 let ret = RawTable::new_uninitialized(capacity);
 221
 222                 for i in range(0, ret.capacity() as int) {
 223                     *ret.hashes.offset(i) = EMPTY_BUCKET;
 224                 }
 225
 226                 ret
 227             }
 228         }
 229
 230         /// Reads a bucket at a given index, returning an enum indicating whether
 231         /// there's anything there or not. You need to match on this enum to get
 232         /// the appropriate types to pass on to most of the rest of the functions
 233         /// in this module.
 234         pub fn peek(&self, index: uint) -> BucketState {
 235             // FIXME #12049
 236             if cfg!(test) { assert!(index < self.capacity) }
 237
 238             let idx  = index as int;
 239             let hash = unsafe { *self.hashes.offset(idx) };
 240
 241             let nocopy = marker::NoCopy;
 242
 243             match hash {
 244                 EMPTY_BUCKET =>
 245                     Empty(EmptyIndex {
 246                         idx: idx,
 247                         nocopy: nocopy
 248                     }),
 249                 full_hash =>
 250                     Full(FullIndex {
 251                         idx:   idx,
 252                         hash:  SafeHash { hash: full_hash },
 253                         nocopy: nocopy,
 254                     })
 255             }
 256         }
 257
 258         /// Gets references to the key and value at a given index.
 259         pub fn read<'a>(&'a self, index: &FullIndex) -> (&'a K, &'a V) {
 260             let idx = index.idx;
 261
 262             unsafe {
 263                 // FIXME #12049
 264                 if cfg!(test) { assert!(*self.hashes.offset(idx) != EMPTY_BUCKET) }
 265                 (&'a *self.keys.offset(idx),
 266                  &'a *self.vals.offset(idx))
 267             }
 268         }
 269
 270         /// Gets references to the key and value at a given index, with the
 271         /// value's reference being mutable.
 272         pub fn read_mut<'a>(&'a mut self, index: &FullIndex) -> (&'a K, &'a mut V) {
 273             let idx = index.idx;
 274
 275             unsafe {
 276                 // FIXME #12049
 277                 if cfg!(test) { assert!(*self.hashes.offset(idx) != EMPTY_BUCKET) }
 278                 (&'a     *self.keys.offset(idx),
 279                  &'a mut *self.vals.offset(idx))
 280             }
 281         }
 282
 283         /// Read everything, mutably.
 284         pub fn read_all_mut<'a>(&'a mut self, index: &FullIndex)
 285             -> (&'a mut SafeHash, &'a mut K, &'a mut V) {
 286             let idx = index.idx;
 287
 288             // I'm totally abusing the fact that a pointer to any u64 in the
 289             // hashtable at a full index is a safe hash. Thanks to `SafeHash`
 290             // just being a wrapper around u64, this is true. It's just really
 291             // really really really unsafe. However, the exposed API is now
 292             // impossible to get wrong. You cannot insert an empty hash into
 293             // this slot now.
 294
 295             unsafe {
 296                 // FIXME #12049
 297                 if cfg!(test) { assert!(*self.hashes.offset(idx) != EMPTY_BUCKET) }
 298                 (transmute(self.hashes.offset(idx)),
 299                  &'a mut *self.keys.offset(idx),
 300                  &'a mut *self.vals.offset(idx))
 301             }
 302         }
 303
 304         /// Puts a key and value pair, along with the key's hash, into a given
 305         /// index in the hashtable. Note how the `EmptyIndex` is 'moved' into this
 306         /// function, because that slot will no longer be empty when we return!
 307         /// Because we know this, a FullIndex is returned for later use, pointing
 308         /// to the newly-filled slot in the hashtable.
 309         ///
 310         /// Use `make_hash` to construct a `SafeHash` to pass to this function.
 311         pub fn put(&mut self, index: EmptyIndex, hash: SafeHash, k: K, v: V) -> FullIndex {
 312             let idx = index.idx;
 313
 314             unsafe {
 315                 // FIXME #12049
 316                 if cfg!(test) { assert!(*self.hashes.offset(idx) == EMPTY_BUCKET) }
 317                 *self.hashes.offset(idx) = hash.inspect();
 318                 move_val_init(&mut *self.keys.offset(idx), k);
 319                 move_val_init(&mut *self.vals.offset(idx), v);
 320             }
 321
 322             self.size += 1;
 323
 324             FullIndex { idx: idx, hash: hash, nocopy: marker::NoCopy }
 325         }
 326
 327         /// Removes a key and value from the hashtable.
 328         ///
 329         /// This works similarly to `put`, building an `EmptyIndex` out of the
 330         /// taken FullIndex.
 331         pub fn take(&mut self, index: FullIndex) -> (EmptyIndex, K, V) {
 332             let idx  = index.idx;
 333
 334             unsafe {
 335                 // FIXME #12049
 336                 if cfg!(test) { assert!(*self.hashes.offset(idx) != EMPTY_BUCKET) }
 337
 338                 let hash_ptr = self.hashes.offset(idx);
 339
 340                 *hash_ptr = EMPTY_BUCKET;
 341
 342                 // Drop the mutable constraint.
 343                 let keys = self.keys as *K;
 344                 let vals = self.vals as *V;
 345
 346                 let k = ptr::read(keys.offset(idx));
 347                 let v = ptr::read(vals.offset(idx));
 348
 349                 self.size -= 1;
 350
 351                 (EmptyIndex { idx: idx, nocopy: marker::NoCopy }, k, v)
 352             }
 353         }
 354
 355         /// The hashtable's capacity, similar to a vector's.
 356         pub fn capacity(&self) -> uint {
 357             self.capacity
 358         }
 359
 360         /// The number of elements ever `put` in the hashtable, minus the number
 361         /// of elements ever `take`n.
 362         pub fn size(&self) -> uint {
 363             self.size
 364         }
 365
 366         pub fn iter<'a>(&'a self) -> Entries<'a, K, V> {
 367             Entries { table: self, idx: 0 }
 368         }
 369
 370         pub fn mut_iter<'a>(&'a mut self) -> MutEntries<'a, K, V> {
 371             MutEntries { table: self, idx: 0 }
 372         }
 373
 374         pub fn move_iter(self) -> MoveEntries<K, V> {
 375             MoveEntries { table: self, idx: 0 }
 376         }
 377     }
 378
 379     pub struct Entries<'a, K, V> {
 380         table: &'a RawTable<K, V>,
 381         idx: uint,
 382     }
 383
 384     pub struct MutEntries<'a, K, V> {
 385         table: &'a mut RawTable<K, V>,
 386         idx: uint,
 387     }
 388
 389     pub struct MoveEntries<K, V> {
 390         table: RawTable<K, V>,
 391         idx: uint,
 392     }
 393
 394     impl<'a, K, V> Iterator<(&'a K, &'a V)> for Entries<'a, K, V> {
 395         fn next(&mut self) -> Option<(&'a K, &'a V)> {
 396             while self.idx < self.table.capacity() {
 397                 let i = self.idx;
 398                 self.idx += 1;
 399
 400                 match self.table.peek(i) {
 401                     Empty(_)  => {},
 402                     Full(idx) => return Some(self.table.read(&idx))
 403                 }
 404             }
 405
 406             None
 407         }
 408
 409         fn size_hint(&self) -> (uint, Option<uint>) {
 410             let size = self.table.size() - self.idx;
 411             (size, Some(size))
 412         }
 413     }
 414
 415     impl<'a, K, V> Iterator<(&'a K, &'a mut V)> for MutEntries<'a, K, V> {
 416         fn next(&mut self) -> Option<(&'a K, &'a mut V)> {
 417             while self.idx < self.table.capacity() {
 418                 let i = self.idx;
 419                 self.idx += 1;
 420
 421                 match self.table.peek(i) {
 422                     Empty(_)  => {},
 423                     // the transmute here fixes:
 424                     // error: lifetime of `self` is too short to guarantee its contents
 425                     //        can be safely reborrowed
 426                     Full(idx) => unsafe {
 427                         return Some(transmute(self.table.read_mut(&idx)))
 428                     }
 429                 }
 430             }
 431
 432             None
 433         }
 434
 435         fn size_hint(&self) -> (uint, Option<uint>) {
 436             let size = self.table.size() - self.idx;
 437             (size, Some(size))
 438         }
 439     }
 440
 441     impl<K, V> Iterator<(SafeHash, K, V)> for MoveEntries<K, V> {
 442         fn next(&mut self) -> Option<(SafeHash, K, V)> {
 443             while self.idx < self.table.capacity() {
 444                 let i = self.idx;
 445                 self.idx += 1;
 446
 447                 match self.table.peek(i) {
 448                     Empty(_) => {},
 449                     Full(idx) => {
 450                         let h = idx.hash();
 451                         let (_, k, v) = self.table.take(idx);
 452                         return Some((h, k, v));
 453                     }
 454                 }
 455             }
 456
 457             None
 458         }
 459
 460         fn size_hint(&self) -> (uint, Option<uint>) {
 461             let size = self.table.size();
 462             (size, Some(size))
 463         }
 464     }
 465
 466     impl<K: Clone, V: Clone> Clone for RawTable<K, V> {
 467         fn clone(&self) -> RawTable<K, V> {
 468             unsafe {
 469                 let mut new_ht = RawTable::new_uninitialized(self.capacity());
 470
 471                 for i in range(0, self.capacity()) {
 472                     match self.peek(i) {
 473                         Empty(_)  => {
 474                             *new_ht.hashes.offset(i as int) = EMPTY_BUCKET;
 475                         },
 476                         Full(idx) => {
 477                             let hash = idx.hash().inspect();
 478                             let (k, v) = self.read(&idx);
 479                             *new_ht.hashes.offset(i as int) = hash;
 480                             move_val_init(&mut *new_ht.keys.offset(i as int), (*k).clone());
 481                             move_val_init(&mut *new_ht.vals.offset(i as int), (*v).clone());
 482                         }
 483                     }
 484                 }
 485
 486                 new_ht.size = self.size();
 487
 488                 new_ht
 489             }
 490         }
 491     }
 492
 493
 494
 495     #[unsafe_destructor]
 496     impl<K, V> Drop for RawTable<K, V> {
 497         fn drop(&mut self) {
 498             // Ideally, this should be in reverse, since we're likely to have
 499             // partially taken some elements out with `.move_iter()` from the
 500             // front.
 501             for i in range_step_inclusive(self.capacity as int - 1, 0, -1) {
 502                 // Check if the size is 0, so we don't do a useless scan when
 503                 // dropping empty tables such as on resize.
 504
 505                 if self.size == 0 { break }
 506
 507                 match self.peek(i as uint) {
 508                     Empty(_)  => {},
 509                     Full(idx) => { self.take(idx); }
 510                 }
 511             }
 512
 513             assert!(self.size == 0);
 514
 515             unsafe {
 516                 libc::free(self.vals   as *mut libc::c_void);
 517                 libc::free(self.keys   as *mut libc::c_void);
 518                 libc::free(self.hashes as *mut libc::c_void);
 519             }
 520         }
 521     }
 522 }
 523
 524 // We use this type for the load factor, to avoid floating point operations
 525 // which might not be supported efficiently on some hardware.
 526 //
 527 // We use small u16s here to save space in the hashtable. They get upcasted
 528 // to u64s when we actually use them.
 529 type Fraction = (u16, u16); // (numerator, denominator)
 530
 531 // multiplication by a fraction, in a way that won't generally overflow for
 532 // array sizes outside a factor of 10 of U64_MAX.
 533 fn fraction_mul(lhs: uint, (num, den): Fraction) -> uint {
 534     (((lhs as u64) * (num as u64)) / (den as u64)) as uint
 535 }
 536
 537 static INITIAL_LOG2_CAP: uint = 5;
 538 static INITIAL_CAPACITY: uint = 1 << INITIAL_LOG2_CAP; // 2^5
 539 static INITIAL_LOAD_FACTOR: Fraction = (9, 10);
 540
 541 // The main performance trick in this hashmap is called Robin Hood Hashing.
 542 // It gains its excellent performance from one key invariant:
 543 //
 544 //    If an insertion collides with an existing element, and that elements
 545 //    "probe distance" (how far away the element is from its ideal location)
 546 //    is higher than how far we've already probed, swap the elements.
 547 //
 548 // This massively lowers variance in probe distance, and allows us to get very
 549 // high load factors with good performance. The 90% load factor I use is rather
 550 // conservative.
 551 //
 552 // > Why a load factor of 90%?
 553 //
 554 // In general, all the distances to inital buckets will converge on the mean.
 555 // At a load factor of α, the odds of finding the target bucket after k
 556 // probes is approximately 1-α^k. If we set this equal to 50% (since we converge
 557 // on the mean) and set k=8 (64-byte cache line / 8-byte hash), α=0.92. I round
 558 // this down to 0.90 to make the math easier on the CPU and avoid its FPU.
 559 // Since on average we start the probing in the middle of a cache line, this
 560 // strategy pulls in two cache lines of hashes on every lookup. I think that's
 561 // pretty good, but if you want to trade off some space, it could go down to one
 562 // cache line on average with an α of 0.84.
 563 //
 564 // > Wait, what? Where did you get 1-α^k from?
 565 //
 566 // On the first probe, your odds of a collision with an existing element is α.
 567 // The odds of doing this twice in a row is approximatelly α^2. For three times,
 568 // α^3, etc. Therefore, the odds of colliding k times is α^k. The odds of NOT
 569 // colliding after k tries is 1-α^k.
 570 //
 571 // Future Improvements (FIXME!)
 572 // ============================
 573 //
 574 // Allow the load factor to be changed dynamically and/or at initialization.
 575 // I'm having trouble figuring out a sane API for this without exporting my
 576 // hackish fraction type, while still avoiding floating point.
 577 //
 578 // Also, would it be possible for us to reuse storage when growing the
 579 // underlying table? This is exactly the use case for 'realloc', and may
 580 // be worth exploring.
 581 //
 582 // Future Optimizations (FIXME!)
 583 // =============================
 584 //
 585 // The paper cited below mentions an implementation which keeps track of the
 586 // distance-to-initial-bucket histogram. I'm suspicious of this approach because
 587 // it requires maintaining an internal map. If this map were replaced with a
 588 // hashmap, it would be faster, but now our data structure is self-referential
 589 // and blows up. Also, this allows very good first guesses, but array accesses
 590 // are no longer linear and in one direction, as we have now. There is also
 591 // memory and cache pressure that this map would entail that would be very
 592 // difficult to properly see in a microbenchmark.
 593 //
 594 // Another possible design choice that I made without any real reason is
 595 // parameterizing the raw table over keys and values. Technically, all we need
 596 // is the size and alignment of keys and values, and the code should be just as
 597 // efficient (well, we might need one for power-of-two size and one for not...).
 598 // This has the potential to reduce code bloat in rust executables, without
 599 // really losing anything except 4 words (key size, key alignment, val size,
 600 // val alignment) which can be passed in to every call of a `RawTable` function.
 601 // This would definitely be an avenue worth exploring if people start complaining
 602 // about the size of rust executables.
 603 //
 604 // There's also two optimizations that have been omitted regarding how the
 605 // hashtable allocates. The first is that a hashtable which never has an element
 606 // inserted should not allocate. I'm suspicious of this one, because supporting
 607 // that internally gains no performance over just using an
 608 // `Option<HashMap<K, V>>`, and is significantly more complicated.
 609 //
 610 // The second omitted allocation optimization is that right now we allocate three
 611 // arrays to back the hashtable. This is wasteful. In theory, we only need one
 612 // array, and each of the three original arrays can just be slices of it. This
 613 // would reduce the pressure on the allocator, and will play much nicer with the
 614 // rest of the system. An initial implementation is commented out in
 615 // `table::RawTable::new`, but I'm not confident it works for all sane alignments,
 616 // especially if a type needs more alignment than `malloc` provides.
 617
 618 /// A hash map implementation which uses linear probing with Robin
 619 /// Hood bucket stealing.
 620 ///
 621 /// The hashes are all keyed by the task-local random number generator
 622 /// on creation by default, this means the ordering of the keys is
 623 /// randomized, but makes the tables more resistant to
 624 /// denial-of-service attacks (Hash DoS). This behaviour can be
 625 /// overriden with one of the constructors.
 626 ///
 627 /// It is required that the keys implement the `Eq` and `Hash` traits, although
 628 /// this can frequently be achieved by using `#[deriving(Eq, Hash)]`.
 629 ///
 630 /// Relevant papers/articles:
 631 ///
 632 /// 1. Pedro Celis. ["Robin Hood Hashing"](https://cs.uwaterloo.ca/research/tr/1986/CS-86-14.pdf)
 633 /// 2. Emmanuel Goossaert. ["Robin Hood
 634 ///    hashing"](http://codecapsule.com/2013/11/11/robin-hood-hashing/)
 635 /// 3. Emmanuel Goossaert. ["Robin Hood hashing: backward shift
 636 ///    deletion"](http://codecapsule.com/2013/11/17/robin-hood-hashing-backward-shift-deletion/)
 637 ///
 638 /// # Example
 639 ///
 640 /// ```rust
 641 /// use collections::HashMap;
 642 ///
 643 /// // type inference lets us omit an explicit type signature (which
 644 /// // would be `HashMap<&str, &str>` in this example).
 645 /// let mut book_reviews = HashMap::new();
 646 ///
 647 /// // review some books.
 648 /// book_reviews.insert("Adventures of Hucklebury Fin",      "My favorite book.");
 649 /// book_reviews.insert("Grimms' Fairy Tales",               "Masterpiece.");
 650 /// book_reviews.insert("Pride and Prejudice",               "Very enjoyable.");
 651 /// book_reviews.insert("The Adventures of Sherlock Holmes", "Eye lyked it alot.");
 652 ///
 653 /// // check for a specific one.
 654 /// if !book_reviews.contains_key(& &"Les Misérables") {
 655 ///     println!("We've got {} reviews, but Les Misérables ain't one.",
 656 ///              book_reviews.len());
 657 /// }
 658 ///
 659 /// // oops, this review has a lot of spelling mistakes, let's delete it.
 660 /// book_reviews.remove(& &"The Adventures of Sherlock Holmes");
 661 ///
 662 /// // look up the values associated with some keys.
 663 /// let to_find = ["Pride and Prejudice", "Alice's Adventure in Wonderland"];
 664 /// for book in to_find.iter() {
 665 ///     match book_reviews.find(book) {
 666 ///         Some(review) => println!("{}: {}", *book, *review),
 667 ///         None => println!("{} is unreviewed.", *book)
 668 ///     }
 669 /// }
 670 ///
 671 /// // iterate over everything.
 672 /// for (book, review) in book_reviews.iter() {
 673 ///     println!("{}: \"{}\"", *book, *review);
 674 /// }
 675 /// ```
 676 #[deriving(Clone)]
 677 pub struct HashMap<K, V, H = sip::SipHasher> {
 678     // All hashes are keyed on these values, to prevent hash collision attacks.
 679     hasher: H,
 680
 681     // When size == grow_at, we double the capacity.
 682     grow_at: uint,
 683
 684     // The capacity must never drop below this.
 685     minimum_capacity: uint,
 686
 687     table: table::RawTable<K, V>,
 688
 689     // We keep this at the end since it's 4-bytes, unlike everything else
 690     // in this struct. Might as well save a word of padding!
 691     load_factor: Fraction,
 692 }
 693
 694 /// Get the number of elements which will force the capacity to grow.
 695 fn grow_at(capacity: uint, load_factor: Fraction) -> uint {
 696     fraction_mul(capacity, load_factor)
 697 }
 698
 699 impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S>> HashMap<K, V, H> {
 700     /// Get the number of elements which will force the capacity to shrink.
 701     /// When size == self.shrink_at(), we halve the capacity.
 702     fn shrink_at(&self) -> uint {
 703         self.table.capacity() >> 2
 704     }
 705
 706     // Probe the `idx`th bucket for a given hash, returning the index of the
 707     // target bucket.
 708     //
 709     // This exploits the power-of-two size of the hashtable. As long as this
 710     // is always true, we can use a bitmask of cap-1 to do modular arithmetic.
 711     //
 712     // Prefer to use this with increasing values of `idx` rather than repeatedly
 713     // calling `probe_next`. This reduces data-dependencies between loops, which
 714     // can help the optimizer, and certainly won't hurt it. `probe_next` is
 715     // simply for convenience, and is no more efficient than `probe`.
 716     fn probe(&self, hash: &table::SafeHash, idx: uint) -> uint {
 717         let hash_mask = self.table.capacity() - 1;
 718
 719         // So I heard a rumor that unsigned overflow is safe in rust..
 720         ((hash.inspect() as uint) + idx) & hash_mask
 721     }
 722
 723     // Generate the next probe in a sequence. Prefer to use 'probe' by itself,
 724     // but this can sometimes be useful.
 725     fn probe_next(&self, probe: uint) -> uint {
 726         let hash_mask = self.table.capacity() - 1;
 727         (probe + 1) & hash_mask
 728     }
 729
 730     fn make_hash<X: Hash<S>>(&self, x: &X) -> table::SafeHash {
 731         table::make_hash(&self.hasher, x)
 732     }
 733
 734     /// Get the distance of the bucket at the given index that it lies
 735     /// from its 'ideal' location.
 736     ///
 737     /// In the cited blog posts above, this is called the "distance to
 738     /// inital bucket", or DIB.
 739     fn bucket_distance(&self, index_of_elem: &table::FullIndex) -> uint {
 740         // where the hash of the element that happens to reside at
 741         // `index_of_elem` tried to place itself first.
 742         let first_probe_index = self.probe(&index_of_elem.hash(), 0);
 743
 744         let raw_index = index_of_elem.raw_index();
 745
 746         if first_probe_index <= raw_index {
 747              // probe just went forward
 748             raw_index - first_probe_index
 749         } else {
 750             // probe wrapped around the hashtable
 751             raw_index + (self.table.capacity() - first_probe_index)
 752         }
 753     }
 754
 755     /// Search for a pre-hashed key.
 756     fn search_hashed_generic(&self, hash: &table::SafeHash, is_match: |&K| -> bool)
 757         -> Option<table::FullIndex> {
 758         for num_probes in range(0u, self.table.size()) {
 759             let probe = self.probe(hash, num_probes);
 760
 761             let idx = match self.table.peek(probe) {
 762                 table::Empty(_)  => return None, // hit an empty bucket
 763                 table::Full(idx) => idx
 764             };
 765
 766             // We can finish the search early if we hit any bucket
 767             // with a lower distance to initial bucket than we've probed.
 768             if self.bucket_distance(&idx) < num_probes { return None }
 769
 770             // If the hash doesn't match, it can't be this one..
 771             if hash != &idx.hash() { continue }
 772
 773             let (k, _) = self.table.read(&idx);
 774
 775             // If the key doesn't match, it can't be this one..
 776             if !is_match(k) { continue }
 777
 778             return Some(idx);
 779         }
 780
 781         return None
 782     }
 783
 784     fn search_hashed(&self, hash: &table::SafeHash, k: &K) -> Option<table::FullIndex> {
 785         self.search_hashed_generic(hash, |k_| *k == *k_)
 786     }
 787
 788     fn search_equiv<Q: Hash<S> + Equiv<K>>(&self, q: &Q) -> Option<table::FullIndex> {
 789         self.search_hashed_generic(&self.make_hash(q), |k| q.equiv(k))
 790     }
 791
 792     /// Search for a key, yielding the index if it's found in the hashtable.
 793     /// If you already have the hash for the key lying around, use
 794     /// search_hashed.
 795     fn search(&self, k: &K) -> Option<table::FullIndex> {
 796         self.search_hashed(&self.make_hash(k), k)
 797     }
 798
 799     fn pop_internal(&mut self, starting_index: table::FullIndex) -> Option<V> {
 800         let starting_probe = starting_index.raw_index();
 801
 802         let ending_probe = {
 803             let mut probe = self.probe_next(starting_probe);
 804             for _ in range(0u, self.table.size()) {
 805                 match self.table.peek(probe) {
 806                     table::Empty(_) => {}, // empty bucket. this is the end of our shifting.
 807                     table::Full(idx) => {
 808                         // Bucket that isn't us, which has a non-zero probe distance.
 809                         // This isn't the ending index, so keep searching.
 810                         if self.bucket_distance(&idx) != 0 {
 811                             probe = self.probe_next(probe);
 812                             continue;
 813                         }
 814
 815                         // if we do have a bucket_distance of zero, we're at the end
 816                         // of what we need to shift.
 817                     }
 818                 }
 819                 break;
 820             }
 821
 822             probe
 823         };
 824
 825         let (_, _, retval) = self.table.take(starting_index);
 826
 827         let mut      probe = starting_probe;
 828         let mut next_probe = self.probe_next(probe);
 829
 830         // backwards-shift all the elements after our newly-deleted one.
 831         while next_probe != ending_probe {
 832             match self.table.peek(next_probe) {
 833                 table::Empty(_) => {
 834                     // nothing to shift in. just empty it out.
 835                     match self.table.peek(probe) {
 836                         table::Empty(_) => {},
 837                         table::Full(idx) => { self.table.take(idx); }
 838                     }
 839                 },
 840                 table::Full(next_idx) => {
 841                     // something to shift. move it over!
 842                     let next_hash = next_idx.hash();
 843                     let (_, next_key, next_val) = self.table.take(next_idx);
 844                     match self.table.peek(probe) {
 845                         table::Empty(idx) => {
 846                             self.table.put(idx, next_hash, next_key, next_val);
 847                         },
 848                         table::Full(idx) => {
 849                             let (emptyidx, _, _) = self.table.take(idx);
 850                             self.table.put(emptyidx, next_hash, next_key, next_val);
 851                         }
 852                     }
 853                 }
 854             }
 855
 856             probe = next_probe;
 857             next_probe = self.probe_next(next_probe);
 858         }
 859
 860         // Done the backwards shift, but there's still an element left!
 861         // Empty it out.
 862         match self.table.peek(probe) {
 863             table::Empty(_) => {},
 864             table::Full(idx) => { self.table.take(idx); }
 865         }
 866
 867         // Now we're done all our shifting. Return the value we grabbed
 868         // earlier.
 869         return Some(retval);
 870     }
 871
 872     /// Like `pop`, but can operate on any type that is equivalent to a key.
 873     #[experimental]
 874     pub fn pop_equiv<Q:Hash<S> + Equiv<K>>(&mut self, k: &Q) -> Option<V> {
 875         if self.table.size() == 0 {
 876             return None
 877         }
 878
 879         let potential_new_size = self.table.size() - 1;
 880         self.make_some_room(potential_new_size);
 881
 882         let starting_index = match self.search_equiv(k) {
 883             Some(idx) => idx,
 884             None      => return None,
 885         };
 886
 887         self.pop_internal(starting_index)
 888     }
 889 }
 890
 891 impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S>> Container for HashMap<K, V, H> {
 892     /// Return the number of elements in the map
 893     fn len(&self) -> uint { self.table.size() }
 894 }
 895
 896 impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S>> Mutable for HashMap<K, V, H> {
 897     /// Clear the map, removing all key-value pairs.
 898     fn clear(&mut self) {
 899         self.minimum_capacity = self.table.size();
 900
 901         for i in range(0, self.table.capacity()) {
 902             match self.table.peek(i) {
 903                 table::Empty(_)  => {},
 904                 table::Full(idx) => { self.table.take(idx); }
 905             }
 906         }
 907     }
 908 }
 909
 910
 911 impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S>> Map<K, V> for HashMap<K, V, H> {
 912     fn find<'a>(&'a self, k: &K) -> Option<&'a V> {
 913         self.search(k).map(|idx| {
 914             let (_, v) = self.table.read(&idx);
 915             v
 916         })
 917     }
 918
 919     fn contains_key(&self, k: &K) -> bool {
 920         self.search(k).is_some()
 921     }
 922 }
 923
 924 impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S>> MutableMap<K, V> for HashMap<K, V, H> {
 925     fn find_mut<'a>(&'a mut self, k: &K) -> Option<&'a mut V> {
 926         match self.search(k) {
 927             None => None,
 928             Some(idx) => {
 929                 let (_, v) = self.table.read_mut(&idx);
 930                 Some(v)
 931             }
 932         }
 933     }
 934
 935     fn swap(&mut self, k: K, v: V) -> Option<V> {
 936         let hash = self.make_hash(&k);
 937         let potential_new_size = self.table.size() + 1;
 938         self.make_some_room(potential_new_size);
 939
 940         for dib in range_inclusive(0u, self.table.size()) {
 941             let probe = self.probe(&hash, dib);
 942
 943             let idx = match self.table.peek(probe) {
 944                 table::Empty(idx) => {
 945                     // Found a hole!
 946                     self.table.put(idx, hash, k, v);
 947                     return None;
 948                 },
 949                 table::Full(idx) => idx
 950             };
 951
 952             if idx.hash() == hash {
 953                 let (bucket_k, bucket_v) = self.table.read_mut(&idx);
 954                 if k == *bucket_k {
 955                     // Found an existing value.
 956                     return Some(replace(bucket_v, v));
 957                 }
 958             }
 959
 960             let probe_dib = self.bucket_distance(&idx);
 961
 962             if probe_dib < dib {
 963                 // Found a luckier bucket. This implies that the key does not
 964                 // already exist in the hashtable. Just do a robin hood
 965                 // insertion, then.
 966                 self.robin_hood(idx, probe_dib, hash, k, v);
 967                 return None;
 968             }
 969         }
 970
 971         // We really shouldn't be here.
 972         fail!("Internal HashMap error: Out of space.");
 973     }
 974
 975     fn pop(&mut self, k: &K) -> Option<V> {
 976         if self.table.size() == 0 {
 977             return None
 978         }
 979
 980         let potential_new_size = self.table.size() - 1;
 981         self.make_some_room(potential_new_size);
 982
 983         let starting_index = match self.search(k) {
 984             Some(idx) => idx,
 985             None      => return None,
 986         };
 987
 988         self.pop_internal(starting_index)
 989     }
 990
 991 }
 992
 993 impl<K: Hash + TotalEq, V> HashMap<K, V, sip::SipHasher> {
 994     /// Create an empty HashMap.
 995     pub fn new() -> HashMap<K, V, sip::SipHasher> {
 996         HashMap::with_capacity(INITIAL_CAPACITY)
 997     }
 998
 999     pub fn with_capacity(capacity: uint) -> HashMap<K, V, sip::SipHasher> {
1000         let mut r = rand::task_rng();
1001         let r0 = r.gen();
1002         let r1 = r.gen();
1003         let hasher = sip::SipHasher::new_with_keys(r0, r1);
1004         HashMap::with_capacity_and_hasher(capacity, hasher)
1005     }
1006 }
1007
1008 impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S>> HashMap<K, V, H> {
1009     pub fn with_hasher(hasher: H) -> HashMap<K, V, H> {
1010         HashMap::with_capacity_and_hasher(INITIAL_CAPACITY, hasher)
1011     }
1012
1013     /// Create an empty HashMap with space for at least `capacity`
1014     /// elements, using `hasher` to hash the keys.
1015     ///
1016     /// Warning: `hasher` is normally randomly generated, and
1017     /// is designed to allow HashMaps to be resistant to attacks that
1018     /// cause many collisions and very poor performance. Setting it
1019     /// manually using this function can expose a DoS attack vector.
1020     pub fn with_capacity_and_hasher(capacity: uint, hasher: H) -> HashMap<K, V, H> {
1021         let cap = num::next_power_of_two(max(INITIAL_CAPACITY, capacity));
1022         HashMap {
1023             hasher:           hasher,
1024             load_factor:      INITIAL_LOAD_FACTOR,
1025             grow_at:          grow_at(cap, INITIAL_LOAD_FACTOR),
1026             minimum_capacity: cap,
1027             table:            table::RawTable::new(cap),
1028         }
1029     }
1030
1031     /// The hashtable will never try to shrink below this size. You can use
1032     /// this function to reduce reallocations if your hashtable frequently
1033     /// grows and shrinks by large amounts.
1034     ///
1035     /// This function has no effect on the operational semantics of the
1036     /// hashtable, only on performance.
1037     pub fn reserve(&mut self, new_minimum_capacity: uint) {
1038         let cap = num::next_power_of_two(
1039             max(INITIAL_CAPACITY, new_minimum_capacity));
1040
1041         self.minimum_capacity = cap;
1042
1043         if self.table.capacity() < cap {
1044             self.resize(cap);
1045         }
1046     }
1047
1048     /// Resizes the internal vectors to a new capacity. It's your responsibility to:
1049     ///   1) Make sure the new capacity is enough for all the elements, accounting
1050     ///      for the load factor.
1051     ///   2) Ensure new_capacity is a power of two.
1052     fn resize(&mut self, new_capacity: uint) {
1053         assert!(self.table.size() <= new_capacity);
1054         assert!((new_capacity - 1) & new_capacity == 0);
1055
1056         self.grow_at = grow_at(new_capacity, self.load_factor);
1057
1058         let old_table = replace(&mut self.table, table::RawTable::new(new_capacity));
1059         let old_size  = old_table.size();
1060
1061         for (h, k, v) in old_table.move_iter() {
1062             self.manual_insert_hashed_nocheck(h, k, v);
1063         }
1064
1065         assert_eq!(self.table.size(), old_size);
1066     }
1067
1068     /// Performs any necessary resize operations, such that there's space for
1069     /// new_size elements.
1070     fn make_some_room(&mut self, new_size: uint) {
1071         let should_shrink = new_size <= self.shrink_at();
1072         let should_grow   = self.grow_at <= new_size;
1073
1074         if should_grow {
1075             let new_capacity = self.table.capacity() << 1;
1076             self.resize(new_capacity);
1077         } else if should_shrink {
1078             let new_capacity = self.table.capacity() >> 1;
1079
1080             // Never shrink below the minimum capacity
1081             if self.minimum_capacity <= new_capacity {
1082                 self.resize(new_capacity);
1083             }
1084         }
1085     }
1086
1087     /// Perform robin hood bucket stealing at the given 'index'. You must
1088     /// also pass that probe's "distance to initial bucket" so we don't have
1089     /// to recalculate it, as well as the total number of probes already done
1090     /// so we have some sort of upper bound on the number of probes to do.
1091     ///
1092     /// 'hash', 'k', and 'v' are the elements to robin hood into the hashtable.
1093     fn robin_hood(&mut self, mut index: table::FullIndex, mut dib_param: uint,
1094                   mut hash: table::SafeHash, mut k: K, mut v: V) {
1095         'outer: loop {
1096             let (old_hash, old_key, old_val) = {
1097                 let (old_hash_ref, old_key_ref, old_val_ref) =
1098                         self.table.read_all_mut(&index);
1099
1100                 let old_hash = replace(old_hash_ref, hash);
1101                 let old_key  = replace(old_key_ref,  k);
1102                 let old_val  = replace(old_val_ref,  v);
1103
1104                 (old_hash, old_key, old_val)
1105             };
1106
1107             let mut probe = self.probe_next(index.raw_index());
1108
1109             for dib in range(dib_param + 1, self.table.size()) {
1110                 let full_index = match self.table.peek(probe) {
1111                     table::Empty(idx) => {
1112                         // Finally. A hole!
1113                         self.table.put(idx, old_hash, old_key, old_val);
1114                         return;
1115                     },
1116                     table::Full(idx) => idx
1117                 };
1118
1119                 let probe_dib = self.bucket_distance(&full_index);
1120
1121                 // Robin hood! Steal the spot.
1122                 if probe_dib < dib {
1123                     index = full_index;
1124                     dib_param = probe_dib;
1125                     hash = old_hash;
1126                     k = old_key;
1127                     v = old_val;
1128                     continue 'outer;
1129                 }
1130
1131                 probe = self.probe_next(probe);
1132             }
1133
1134             fail!("HashMap fatal error: 100% load factor?");
1135         }
1136     }
1137
1138     /// Manually insert a pre-hashed key-value pair, without first checking
1139     /// that there's enough room in the buckets. Returns a reference to the
1140     /// newly insert value.
1141     ///
1142     /// If the key already exists, the hashtable will be returned untouched
1143     /// and a reference to the existing element will be returned.
1144     fn manual_insert_hashed_nocheck<'a>(
1145         &'a mut self, hash: table::SafeHash, k: K, v: V) -> &'a mut V {
1146
1147         for dib in range_inclusive(0u, self.table.size()) {
1148             let probe = self.probe(&hash, dib);
1149
1150             let idx = match self.table.peek(probe) {
1151                 table::Empty(idx) => {
1152                     // Found a hole!
1153                     let fullidx  = self.table.put(idx, hash, k, v);
1154                     let (_, val) = self.table.read_mut(&fullidx);
1155                     return val;
1156                 },
1157                 table::Full(idx) => idx
1158             };
1159
1160             if idx.hash() == hash {
1161                 let (bucket_k, bucket_v) = self.table.read_mut(&idx);
1162                 // FIXME #12147 the conditional return confuses
1163                 // borrowck if we return bucket_v directly
1164                 let bv: *mut V = bucket_v;
1165                 if k == *bucket_k {
1166                     // Key already exists. Get its reference.
1167                     return unsafe {&mut *bv};
1168                 }
1169             }
1170
1171             let probe_dib = self.bucket_distance(&idx);
1172
1173             if  probe_dib < dib {
1174                 // Found a luckier bucket than me. Better steal his spot.
1175                 self.robin_hood(idx, probe_dib, hash, k, v);
1176
1177                 // Now that it's stolen, just read the value's pointer
1178                 // right out of the table!
1179                 match self.table.peek(probe) {
1180                     table::Empty(_)  => fail!("Just stole a spot, but now that spot's empty."),
1181                     table::Full(idx) => {
1182                         let (_, v) = self.table.read_mut(&idx);
1183                         return v;
1184                     }
1185                 }
1186             }
1187         }
1188
1189         // We really shouldn't be here.
1190         fail!("Internal HashMap error: Out of space.");
1191     }
1192
1193     fn manual_insert_hashed<'a>(&'a mut self, hash: table::SafeHash, k: K, v: V) -> &'a mut V {
1194         let potential_new_size = self.table.size() + 1;
1195         self.make_some_room(potential_new_size);
1196         self.manual_insert_hashed_nocheck(hash, k, v)
1197     }
1198
1199     /// Inserts an element, returning a reference to that element inside the
1200     /// hashtable.
1201     fn manual_insert<'a>(&'a mut self, k: K, v: V) -> &'a mut V {
1202         let hash = self.make_hash(&k);
1203         self.manual_insert_hashed(hash, k, v)
1204     }
1205
1206     /// Return the value corresponding to the key in the map, or insert
1207     /// and return the value if it doesn't exist.
1208     pub fn find_or_insert<'a>(&'a mut self, k: K, v: V) -> &'a mut V {
1209         match self.search(&k) {
1210             Some(idx) => {
1211                 let (_, v_ref) = self.table.read_mut(&idx);
1212                 v_ref
1213             },
1214             None => self.manual_insert(k, v)
1215         }
1216     }
1217
1218     /// Return the value corresponding to the key in the map, or create,
1219     /// insert, and return a new value if it doesn't exist.
1220     pub fn find_or_insert_with<'a>(&'a mut self, k: K, f: |&K| -> V)
1221                                -> &'a mut V {
1222         match self.search(&k) {
1223             Some(idx) => {
1224                 let (_, v_ref) = self.table.read_mut(&idx);
1225                 v_ref
1226             },
1227             None      => {
1228                 let v = f(&k);
1229                 self.manual_insert(k, v)
1230             }
1231         }
1232     }
1233
1234     /// Insert a key-value pair into the map if the key is not already present.
1235     /// Otherwise, modify the existing value for the key.
1236     /// Returns the new or modified value for the key.
1237     pub fn insert_or_update_with<'a>(
1238                                  &'a mut self,
1239                                  k: K,
1240                                  v: V,
1241                                  f: |&K, &mut V|)
1242                                  -> &'a mut V {
1243         match self.search(&k) {
1244             None      => self.manual_insert(k, v),
1245             Some(idx) => {
1246                 let (_, v_ref) = self.table.read_mut(&idx);
1247                 f(&k, v_ref);
1248                 v_ref
1249             }
1250         }
1251     }
1252
1253     /// Retrieves a value for the given key, failing if the key is not present.
1254     pub fn get<'a>(&'a self, k: &K) -> &'a V {
1255         match self.find(k) {
1256             Some(v) => v,
1257             None => fail!("No entry found for key: {:?}", k)
1258         }
1259     }
1260
1261     /// Retrieves a (mutable) value for the given key, failing if the key is not present.
1262     pub fn get_mut<'a>(&'a mut self, k: &K) -> &'a mut V {
1263         match self.find_mut(k) {
1264             Some(v) => v,
1265             None => fail!("No entry found for key: {:?}", k)
1266         }
1267     }
1268
1269     /// Return true if the map contains a value for the specified key,
1270     /// using equivalence.
1271     pub fn contains_key_equiv<Q: Hash<S> + Equiv<K>>(&self, key: &Q) -> bool {
1272         self.search_equiv(key).is_some()
1273     }
1274
1275     /// Return the value corresponding to the key in the map, using
1276     /// equivalence.
1277     pub fn find_equiv<'a, Q: Hash<S> + Equiv<K>>(&'a self, k: &Q) -> Option<&'a V> {
1278         match self.search_equiv(k) {
1279             None      => None,
1280             Some(idx) => {
1281                 let (_, v_ref) = self.table.read(&idx);
1282                 Some(v_ref)
1283             }
1284         }
1285     }
1286
1287     /// An iterator visiting all keys in arbitrary order.
1288     /// Iterator element type is &'a K.
1289     pub fn keys<'a>(&'a self) -> Keys<'a, K, V> {
1290         self.iter().map(|(k, _v)| k)
1291     }
1292
1293     /// An iterator visiting all values in arbitrary order.
1294     /// Iterator element type is &'a V.
1295     pub fn values<'a>(&'a self) -> Values<'a, K, V> {
1296         self.iter().map(|(_k, v)| v)
1297     }
1298
1299     /// An iterator visiting all key-value pairs in arbitrary order.
1300     /// Iterator element type is (&'a K, &'a V).
1301     pub fn iter<'a>(&'a self) -> Entries<'a, K, V> {
1302         self.table.iter()
1303     }
1304
1305     /// An iterator visiting all key-value pairs in arbitrary order,
1306     /// with mutable references to the values.
1307     /// Iterator element type is (&'a K, &'a mut V).
1308     pub fn mut_iter<'a>(&'a mut self) -> MutEntries<'a, K, V> {
1309         self.table.mut_iter()
1310     }
1311
1312     /// Creates a consuming iterator, that is, one that moves each key-value
1313     /// pair out of the map in arbitrary order. The map cannot be used after
1314     /// calling this.
1315     pub fn move_iter(self) -> MoveEntries<K, V> {
1316         self.table.move_iter().map(|(_, k, v)| (k, v))
1317     }
1318 }
1319
1320 impl<K: TotalEq + Hash<S>, V: Clone, S, H: Hasher<S>> HashMap<K, V, H> {
1321     /// Like `find`, but returns a copy of the value.
1322     pub fn find_copy(&self, k: &K) -> Option<V> {
1323         self.find(k).map(|v| (*v).clone())
1324     }
1325
1326     /// Like `get`, but returns a copy of the value.
1327     pub fn get_copy(&self, k: &K) -> V {
1328         (*self.get(k)).clone()
1329     }
1330 }
1331
1332 impl<K: TotalEq + Hash<S>, V: Eq, S, H: Hasher<S>> Eq for HashMap<K, V, H> {
1333     fn eq(&self, other: &HashMap<K, V, H>) -> bool {
1334         if self.len() != other.len() { return false; }
1335
1336         self.iter().all(|(key, value)| {
1337             match other.find(key) {
1338                 None    => false,
1339                 Some(v) => *value == *v
1340             }
1341         })
1342     }
1343 }
1344
1345 impl<K: TotalEq + Hash<S> + Show, V: Show, S, H: Hasher<S>> Show for HashMap<K, V, H> {
1346     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1347         try!(write!(f.buf, r"\{"));
1348
1349         for (i, (k, v)) in self.iter().enumerate() {
1350             if i != 0 { try!(write!(f.buf, ", ")); }
1351             try!(write!(f.buf, "{}: {}", *k, *v));
1352         }
1353
1354         write!(f.buf, r"\}")
1355     }
1356 }
1357
1358 impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S> + Default> Default for HashMap<K, V, H> {
1359     fn default() -> HashMap<K, V, H> {
1360         HashMap::with_capacity_and_hasher(INITIAL_CAPACITY, Default::default())
1361     }
1362 }
1363
1364 /// HashMap iterator
1365 pub type Entries<'a, K, V> = table::Entries<'a, K, V>;
1366
1367 /// HashMap mutable values iterator
1368 pub type MutEntries<'a, K, V> = table::MutEntries<'a, K, V>;
1369
1370 /// HashMap move iterator
1371 pub type MoveEntries<K, V> =
1372     iter::Map<'static, (table::SafeHash, K, V), (K, V), table::MoveEntries<K, V>>;
1373
1374 /// HashMap keys iterator
1375 pub type Keys<'a, K, V> =
1376     iter::Map<'static, (&'a K, &'a V), &'a K, Entries<'a, K, V>>;
1377
1378 /// HashMap values iterator
1379 pub type Values<'a, K, V> =
1380     iter::Map<'static, (&'a K, &'a V), &'a V, Entries<'a, K, V>>;
1381
1382 impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S> + Default> FromIterator<(K, V)> for HashMap<K, V, H> {
1383     fn from_iter<T: Iterator<(K, V)>>(iter: T) -> HashMap<K, V, H> {
1384         let (lower, _) = iter.size_hint();
1385         let mut map = HashMap::with_capacity_and_hasher(lower, Default::default());
1386         map.extend(iter);
1387         map
1388     }
1389 }
1390
1391 impl<K: TotalEq + Hash<S>, V, S, H: Hasher<S> + Default> Extendable<(K, V)> for HashMap<K, V, H> {
1392     fn extend<T: Iterator<(K, V)>>(&mut self, mut iter: T) {
1393         for (k, v) in iter {
1394             self.insert(k, v);
1395         }
1396     }
1397 }
1398
1399 /// HashSet iterator
1400 pub type SetItems<'a, K> =
1401     iter::Map<'static, (&'a K, &'a ()), &'a K, Entries<'a, K, ()>>;
1402
1403 /// HashSet move iterator
1404 pub type SetMoveItems<K> =
1405     iter::Map<'static, (K, ()), K, MoveEntries<K, ()>>;
1406
1407 /// An implementation of a hash set using the underlying representation of a
1408 /// HashMap where the value is (). As with the `HashMap` type, a `HashSet`
1409 /// requires that the elements implement the `Eq` and `Hash` traits.
1410 #[deriving(Clone)]
1411 pub struct HashSet<T, H = sip::SipHasher> {
1412     map: HashMap<T, (), H>
1413 }
1414
1415 impl<T: TotalEq + Hash<S>, S, H: Hasher<S>> Eq for HashSet<T, H> {
1416     // FIXME #11998: Since the value is a (), and `find` returns a Some(&()),
1417     // we trigger #11998 when matching on it. I've fallen back to manual
1418     // iteration until this is fixed.
1419     fn eq(&self, other: &HashSet<T, H>) -> bool {
1420         if self.len() != other.len() { return false; }
1421
1422         self.iter().all(|key| other.map.contains_key(key))
1423     }
1424 }
1425
1426 impl<T: TotalEq + Hash<S>, S, H: Hasher<S>> Container for HashSet<T, H> {
1427     fn len(&self) -> uint { self.map.len() }
1428 }
1429
1430 impl<T: TotalEq + Hash<S>, S, H: Hasher<S>> Mutable for HashSet<T, H> {
1431     fn clear(&mut self) { self.map.clear() }
1432 }
1433
1434 impl<T: TotalEq + Hash<S>, S, H: Hasher<S>> Set<T> for HashSet<T, H> {
1435     fn contains(&self, value: &T) -> bool { self.map.search(value).is_some() }
1436
1437     fn is_disjoint(&self, other: &HashSet<T, H>) -> bool {
1438         self.iter().all(|v| !other.contains(v))
1439     }
1440
1441     fn is_subset(&self, other: &HashSet<T, H>) -> bool {
1442         self.iter().all(|v| other.contains(v))
1443     }
1444 }
1445
1446 impl<T: TotalEq + Hash<S>, S, H: Hasher<S>> MutableSet<T> for HashSet<T, H> {
1447     fn insert(&mut self, value: T) -> bool { self.map.insert(value, ()) }
1448
1449     fn remove(&mut self, value: &T) -> bool { self.map.remove(value) }
1450 }
1451
1452 impl<T: Hash + TotalEq> HashSet<T, sip::SipHasher> {
1453     /// Create an empty HashSet
1454     pub fn new() -> HashSet<T, sip::SipHasher> {
1455         HashSet::with_capacity(INITIAL_CAPACITY)
1456     }
1457
1458     /// Create an empty HashSet with space for at least `n` elements in
1459     /// the hash table.
1460     pub fn with_capacity(capacity: uint) -> HashSet<T, sip::SipHasher> {
1461         HashSet { map: HashMap::with_capacity(capacity) }
1462     }
1463 }
1464
1465 impl<T: TotalEq + Hash<S>, S, H: Hasher<S>> HashSet<T, H> {
1466     pub fn with_hasher(hasher: H) -> HashSet<T, H> {
1467         HashSet::with_capacity_and_hasher(INITIAL_CAPACITY, hasher)
1468     }
1469
1470     /// Create an empty HashSet with space for at least `capacity`
1471     /// elements in the hash table, using `hasher` to hash the keys.
1472     ///
1473     /// Warning: `hasher` is normally randomly generated, and
1474     /// is designed to allow `HashSet`s to be resistant to attacks that
1475     /// cause many collisions and very poor performance. Setting it
1476     /// manually using this function can expose a DoS attack vector.
1477     pub fn with_capacity_and_hasher(capacity: uint, hasher: H) -> HashSet<T, H> {
1478         HashSet { map: HashMap::with_capacity_and_hasher(capacity, hasher) }
1479     }
1480
1481     /// Reserve space for at least `n` elements in the hash table.
1482     pub fn reserve(&mut self, n: uint) {
1483         self.map.reserve(n)
1484     }
1485
1486     /// Returns true if the hash set contains a value equivalent to the
1487     /// given query value.
1488     pub fn contains_equiv<Q: Hash<S> + Equiv<T>>(&self, value: &Q) -> bool {
1489       self.map.contains_key_equiv(value)
1490     }
1491
1492     /// An iterator visiting all elements in arbitrary order.
1493     /// Iterator element type is &'a T.
1494     pub fn iter<'a>(&'a self) -> SetItems<'a, T> {
1495         self.map.keys()
1496     }
1497
1498     /// Creates a consuming iterator, that is, one that moves each value out
1499     /// of the set in arbitrary order. The set cannot be used after calling
1500     /// this.
1501     pub fn move_iter(self) -> SetMoveItems<T> {
1502         self.map.move_iter().map(|(k, _)| k)
1503     }
1504
1505     /// Visit the values representing the difference
1506     pub fn difference<'a>(&'a self, other: &'a HashSet<T, H>) -> SetAlgebraItems<'a, T, H> {
1507         Repeat::new(other)
1508             .zip(self.iter())
1509             .filter_map(|(other, elt)| {
1510                 if !other.contains(elt) { Some(elt) } else { None }
1511             })
1512     }
1513
1514     /// Visit the values representing the symmetric difference
1515     pub fn symmetric_difference<'a>(&'a self, other: &'a HashSet<T, H>)
1516         -> Chain<SetAlgebraItems<'a, T, H>, SetAlgebraItems<'a, T, H>> {
1517         self.difference(other).chain(other.difference(self))
1518     }
1519
1520     /// Visit the values representing the intersection
1521     pub fn intersection<'a>(&'a self, other: &'a HashSet<T, H>)
1522         -> SetAlgebraItems<'a, T, H> {
1523         Repeat::new(other)
1524             .zip(self.iter())
1525             .filter_map(|(other, elt)| {
1526                 if other.contains(elt) { Some(elt) } else { None }
1527             })
1528     }
1529
1530     /// Visit the values representing the union
1531     pub fn union<'a>(&'a self, other: &'a HashSet<T, H>)
1532         -> Chain<SetItems<'a, T>, SetAlgebraItems<'a, T, H>> {
1533         self.iter().chain(other.difference(self))
1534     }
1535
1536 }
1537
1538 impl<T: TotalEq + Hash<S> + fmt::Show, S, H: Hasher<S>> fmt::Show for HashSet<T, H> {
1539     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1540         try!(write!(f.buf, r"\{"));
1541
1542         for (i, x) in self.iter().enumerate() {
1543             if i != 0 { try!(write!(f.buf, ", ")); }
1544             try!(write!(f.buf, "{}", *x));
1545         }
1546
1547         write!(f.buf, r"\}")
1548     }
1549 }
1550
1551 impl<T: TotalEq + Hash<S>, S, H: Hasher<S> + Default> FromIterator<T> for HashSet<T, H> {
1552     fn from_iter<I: Iterator<T>>(iter: I) -> HashSet<T, H> {
1553         let (lower, _) = iter.size_hint();
1554         let mut set = HashSet::with_capacity_and_hasher(lower, Default::default());
1555         set.extend(iter);
1556         set
1557     }
1558 }
1559
1560 impl<T: TotalEq + Hash<S>, S, H: Hasher<S> + Default> Extendable<T> for HashSet<T, H> {
1561     fn extend<I: Iterator<T>>(&mut self, mut iter: I) {
1562         for k in iter {
1563             self.insert(k);
1564         }
1565     }
1566 }
1567
1568 impl<T: TotalEq + Hash> Default for HashSet<T, sip::SipHasher> {
1569     fn default() -> HashSet<T> { HashSet::new() }
1570 }
1571
1572 // `Repeat` is used to feed the filter closure an explicit capture
1573 // of a reference to the other set
1574 /// Set operations iterator
1575 pub type SetAlgebraItems<'a, T, H> =
1576     FilterMap<'static, (&'a HashSet<T, H>, &'a T), &'a T,
1577               Zip<Repeat<&'a HashSet<T, H>>, SetItems<'a, T>>>;
1578
1579 #[cfg(test)]
1580 mod test_map {
1581     use super::HashMap;
1582     use std::cmp::Equiv;
1583     use std::hash::Hash;
1584     use std::iter::{Iterator,range_inclusive,range_step_inclusive};
1585     use std::local_data;
1586     use std::vec;
1587
1588     struct KindaIntLike(int);
1589
1590     impl Equiv<int> for KindaIntLike {
1591         fn equiv(&self, other: &int) -> bool {
1592             let KindaIntLike(this) = *self;
1593             this == *other
1594         }
1595     }
1596     impl<S: Writer> Hash<S> for KindaIntLike {
1597         fn hash(&self, state: &mut S) {
1598             let KindaIntLike(this) = *self;
1599             this.hash(state)
1600         }
1601     }
1602
1603     #[test]
1604     fn test_create_capacity_zero() {
1605         let mut m = HashMap::with_capacity(0);
1606
1607         assert!(m.insert(1, 1));
1608
1609         assert!(m.contains_key(&1));
1610         assert!(!m.contains_key(&0));
1611     }
1612
1613     #[test]
1614     fn test_insert() {
1615         let mut m = HashMap::new();
1616         assert_eq!(m.len(), 0);
1617         assert!(m.insert(1, 2));
1618         assert_eq!(m.len(), 1);
1619         assert!(m.insert(2, 4));
1620         assert_eq!(m.len(), 2);
1621         assert_eq!(*m.find(&1).unwrap(), 2);
1622         assert_eq!(*m.find(&2).unwrap(), 4);
1623     }
1624
1625     local_data_key!(drop_vector: vec::Vec<int>)
1626
1627     #[deriving(Hash, Eq, TotalEq)]
1628     struct Dropable {
1629         k: uint
1630     }
1631
1632
1633     impl Dropable {
1634         fn new(k: uint) -> Dropable {
1635             local_data::get_mut(drop_vector,
1636                 |v| { v.unwrap().as_mut_slice()[k] += 1; });
1637
1638             Dropable { k: k }
1639         }
1640     }
1641
1642     impl Drop for Dropable {
1643         fn drop(&mut self) {
1644             local_data::get_mut(drop_vector, |v|
1645                 { v.unwrap().as_mut_slice()[self.k] -= 1; });
1646         }
1647     }
1648
1649     #[test]
1650     fn test_drops() {
1651         local_data::set(drop_vector, vec::Vec::from_elem(200, 0));
1652
1653         {
1654             let mut m = HashMap::new();
1655
1656             local_data::get(drop_vector, |v| {
1657                 for i in range(0u, 200) {
1658                     assert_eq!(v.unwrap().as_slice()[i], 0);
1659                 }
1660             });
1661
1662             for i in range(0u, 100) {
1663                 let d1 = Dropable::new(i);
1664                 let d2 = Dropable::new(i+100);
1665                 m.insert(d1, d2);
1666             }
1667
1668             local_data::get(drop_vector, |v| {
1669                 for i in range(0u, 200) {
1670                     assert_eq!(v.unwrap().as_slice()[i], 1);
1671                 }
1672             });
1673
1674             for i in range(0u, 50) {
1675                 let k = Dropable::new(i);
1676                 let v = m.pop(&k);
1677
1678                 assert!(v.is_some());
1679
1680                 local_data::get(drop_vector, |v| {
1681                     assert_eq!(v.unwrap().as_slice()[i], 1);
1682                     assert_eq!(v.unwrap().as_slice()[i+100], 1);
1683                 });
1684             }
1685
1686             local_data::get(drop_vector, |v| {
1687                 for i in range(0u, 50) {
1688                     assert_eq!(v.unwrap().as_slice()[i], 0);
1689                     assert_eq!(v.unwrap().as_slice()[i+100], 0);
1690                 }
1691
1692                 for i in range(50u, 100) {
1693                     assert_eq!(v.unwrap().as_slice()[i], 1);
1694                     assert_eq!(v.unwrap().as_slice()[i+100], 1);
1695                 }
1696             });
1697         }
1698
1699         local_data::get(drop_vector, |v| {
1700             for i in range(0u, 200) {
1701                 assert_eq!(v.unwrap().as_slice()[i], 0);
1702             }
1703         });
1704     }
1705
1706     #[test]
1707     fn test_empty_pop() {
1708         let mut m: HashMap<int, bool> = HashMap::new();
1709         assert_eq!(m.pop(&0), None);
1710     }
1711
1712     #[test]
1713     fn test_lots_of_insertions() {
1714         let mut m = HashMap::new();
1715
1716         // Try this a few times to make sure we never screw up the hashmap's
1717         // internal state.
1718         for _ in range(0, 10) {
1719             assert!(m.is_empty());
1720
1721             for i in range_inclusive(1, 1000) {
1722                 assert!(m.insert(i, i));
1723
1724                 for j in range_inclusive(1, i) {
1725                     let r = m.find(&j);
1726                     assert_eq!(r, Some(&j));
1727                 }
1728
1729                 for j in range_inclusive(i+1, 1000) {
1730                     let r = m.find(&j);
1731                     assert_eq!(r, None);
1732                 }
1733             }
1734
1735             for i in range_inclusive(1001, 2000) {
1736                 assert!(!m.contains_key(&i));
1737             }
1738
1739             // remove forwards
1740             for i in range_inclusive(1, 1000) {
1741                 assert!(m.remove(&i));
1742
1743                 for j in range_inclusive(1, i) {
1744                     assert!(!m.contains_key(&j));
1745                 }
1746
1747                 for j in range_inclusive(i+1, 1000) {
1748                     assert!(m.contains_key(&j));
1749                 }
1750             }
1751
1752             for i in range_inclusive(1, 1000) {
1753                 assert!(!m.contains_key(&i));
1754             }
1755
1756             for i in range_inclusive(1, 1000) {
1757                 assert!(m.insert(i, i));
1758             }
1759
1760             // remove backwards
1761             for i in range_step_inclusive(1000, 1, -1) {
1762                 assert!(m.remove(&i));
1763
1764                 for j in range_inclusive(i, 1000) {
1765                     assert!(!m.contains_key(&j));
1766                 }
1767
1768                 for j in range_inclusive(1, i-1) {
1769                     assert!(m.contains_key(&j));
1770                 }
1771             }
1772         }
1773     }
1774
1775     #[test]
1776     fn test_find_mut() {
1777         let mut m = HashMap::new();
1778         assert!(m.insert(1, 12));
1779         assert!(m.insert(2, 8));
1780         assert!(m.insert(5, 14));
1781         let new = 100;
1782         match m.find_mut(&5) {
1783             None => fail!(), Some(x) => *x = new
1784         }
1785         assert_eq!(m.find(&5), Some(&new));
1786     }
1787
1788     #[test]
1789     fn test_insert_overwrite() {
1790         let mut m = HashMap::new();
1791         assert!(m.insert(1, 2));
1792         assert_eq!(*m.find(&1).unwrap(), 2);
1793         assert!(!m.insert(1, 3));
1794         assert_eq!(*m.find(&1).unwrap(), 3);
1795     }
1796
1797     #[test]
1798     fn test_insert_conflicts() {
1799         let mut m = HashMap::with_capacity(4);
1800         assert!(m.insert(1, 2));
1801         assert!(m.insert(5, 3));
1802         assert!(m.insert(9, 4));
1803         assert_eq!(*m.find(&9).unwrap(), 4);
1804         assert_eq!(*m.find(&5).unwrap(), 3);
1805         assert_eq!(*m.find(&1).unwrap(), 2);
1806     }
1807
1808     #[test]
1809     fn test_conflict_remove() {
1810         let mut m = HashMap::with_capacity(4);
1811         assert!(m.insert(1, 2));
1812         assert_eq!(*m.find(&1).unwrap(), 2);
1813         assert!(m.insert(5, 3));
1814         assert_eq!(*m.find(&1).unwrap(), 2);
1815         assert_eq!(*m.find(&5).unwrap(), 3);
1816         assert!(m.insert(9, 4));
1817         assert_eq!(*m.find(&1).unwrap(), 2);
1818         assert_eq!(*m.find(&5).unwrap(), 3);
1819         assert_eq!(*m.find(&9).unwrap(), 4);
1820         assert!(m.remove(&1));
1821         assert_eq!(*m.find(&9).unwrap(), 4);
1822         assert_eq!(*m.find(&5).unwrap(), 3);
1823     }
1824
1825     #[test]
1826     fn test_is_empty() {
1827         let mut m = HashMap::with_capacity(4);
1828         assert!(m.insert(1, 2));
1829         assert!(!m.is_empty());
1830         assert!(m.remove(&1));
1831         assert!(m.is_empty());
1832     }
1833
1834     #[test]
1835     fn test_pop() {
1836         let mut m = HashMap::new();
1837         m.insert(1, 2);
1838         assert_eq!(m.pop(&1), Some(2));
1839         assert_eq!(m.pop(&1), None);
1840     }
1841
1842     #[test]
1843     #[allow(experimental)]
1844     fn test_pop_equiv() {
1845         let mut m = HashMap::new();
1846         m.insert(1, 2);
1847         assert_eq!(m.pop_equiv(&KindaIntLike(1)), Some(2));
1848         assert_eq!(m.pop_equiv(&KindaIntLike(1)), None);
1849     }
1850
1851     #[test]
1852     fn test_swap() {
1853         let mut m = HashMap::new();
1854         assert_eq!(m.swap(1, 2), None);
1855         assert_eq!(m.swap(1, 3), Some(2));
1856         assert_eq!(m.swap(1, 4), Some(3));
1857     }
1858
1859     #[test]
1860     fn test_move_iter() {
1861         let hm = {
1862             let mut hm = HashMap::new();
1863
1864             hm.insert('a', 1);
1865             hm.insert('b', 2);
1866
1867             hm
1868         };
1869
1870         let v = hm.move_iter().collect::<Vec<(char, int)>>();
1871         assert!([('a', 1), ('b', 2)] == v.as_slice() || [('b', 2), ('a', 1)] == v.as_slice());
1872     }
1873
1874     #[test]
1875     fn test_iterate() {
1876         let mut m = HashMap::with_capacity(4);
1877         for i in range(0u, 32) {
1878             assert!(m.insert(i, i*2));
1879         }
1880         assert_eq!(m.len(), 32);
1881
1882         let mut observed = 0;
1883
1884         for (k, v) in m.iter() {
1885             assert_eq!(*v, *k * 2);
1886             observed |= 1 << *k;
1887         }
1888         assert_eq!(observed, 0xFFFF_FFFF);
1889     }
1890
1891     #[test]
1892     fn test_keys() {
1893         let vec = vec![(1, 'a'), (2, 'b'), (3, 'c')];
1894         let map = vec.move_iter().collect::<HashMap<int, char>>();
1895         let keys = map.keys().map(|&k| k).collect::<Vec<int>>();
1896         assert_eq!(keys.len(), 3);
1897         assert!(keys.contains(&1));
1898         assert!(keys.contains(&2));
1899         assert!(keys.contains(&3));
1900     }
1901
1902     #[test]
1903     fn test_values() {
1904         let vec = vec![(1, 'a'), (2, 'b'), (3, 'c')];
1905         let map = vec.move_iter().collect::<HashMap<int, char>>();
1906         let values = map.values().map(|&v| v).collect::<Vec<char>>();
1907         assert_eq!(values.len(), 3);
1908         assert!(values.contains(&'a'));
1909         assert!(values.contains(&'b'));
1910         assert!(values.contains(&'c'));
1911     }
1912
1913     #[test]
1914     fn test_find() {
1915         let mut m = HashMap::new();
1916         assert!(m.find(&1).is_none());
1917         m.insert(1, 2);
1918         match m.find(&1) {
1919             None => fail!(),
1920             Some(v) => assert!(*v == 2)
1921         }
1922     }
1923
1924     #[test]
1925     fn test_eq() {
1926         let mut m1 = HashMap::new();
1927         m1.insert(1, 2);
1928         m1.insert(2, 3);
1929         m1.insert(3, 4);
1930
1931         let mut m2 = HashMap::new();
1932         m2.insert(1, 2);
1933         m2.insert(2, 3);
1934
1935         assert!(m1 != m2);
1936
1937         m2.insert(3, 4);
1938
1939         assert_eq!(m1, m2);
1940     }
1941
1942     #[test]
1943     fn test_expand() {
1944         let mut m = HashMap::new();
1945
1946         assert_eq!(m.len(), 0);
1947         assert!(m.is_empty());
1948
1949         let mut i = 0u;
1950         let old_resize_at = m.grow_at;
1951         while old_resize_at == m.grow_at {
1952             m.insert(i, i);
1953             i += 1;
1954         }
1955
1956         assert_eq!(m.len(), i);
1957         assert!(!m.is_empty());
1958     }
1959
1960     #[test]
1961     fn test_find_equiv() {
1962         let mut m = HashMap::new();
1963
1964         let (foo, bar, baz) = (1,2,3);
1965         m.insert(~"foo", foo);
1966         m.insert(~"bar", bar);
1967         m.insert(~"baz", baz);
1968
1969
1970         assert_eq!(m.find_equiv(&("foo")), Some(&foo));
1971         assert_eq!(m.find_equiv(&("bar")), Some(&bar));
1972         assert_eq!(m.find_equiv(&("baz")), Some(&baz));
1973
1974         assert_eq!(m.find_equiv(&("qux")), None);
1975     }
1976
1977     #[test]
1978     fn test_from_iter() {
1979         let xs = [(1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6)];
1980
1981         let map: HashMap<int, int> = xs.iter().map(|&x| x).collect();
1982
1983         for &(k, v) in xs.iter() {
1984             assert_eq!(map.find(&k), Some(&v));
1985         }
1986     }
1987 }
1988
1989 #[cfg(test)]
1990 mod test_set {
1991     use super::HashSet;
1992     use std::container::Container;
1993     use std::slice::ImmutableEqVector;
1994
1995     #[test]
1996     fn test_disjoint() {
1997         let mut xs = HashSet::new();
1998         let mut ys = HashSet::new();
1999         assert!(xs.is_disjoint(&ys));
2000         assert!(ys.is_disjoint(&xs));
2001         assert!(xs.insert(5));
2002         assert!(ys.insert(11));
2003         assert!(xs.is_disjoint(&ys));
2004         assert!(ys.is_disjoint(&xs));
2005         assert!(xs.insert(7));
2006         assert!(xs.insert(19));
2007         assert!(xs.insert(4));
2008         assert!(ys.insert(2));
2009         assert!(ys.insert(-11));
2010         assert!(xs.is_disjoint(&ys));
2011         assert!(ys.is_disjoint(&xs));
2012         assert!(ys.insert(7));
2013         assert!(!xs.is_disjoint(&ys));
2014         assert!(!ys.is_disjoint(&xs));
2015     }
2016
2017     #[test]
2018     fn test_subset_and_superset() {
2019         let mut a = HashSet::new();
2020         assert!(a.insert(0));
2021         assert!(a.insert(5));
2022         assert!(a.insert(11));
2023         assert!(a.insert(7));
2024
2025         let mut b = HashSet::new();
2026         assert!(b.insert(0));
2027         assert!(b.insert(7));
2028         assert!(b.insert(19));
2029         assert!(b.insert(250));
2030         assert!(b.insert(11));
2031         assert!(b.insert(200));
2032
2033         assert!(!a.is_subset(&b));
2034         assert!(!a.is_superset(&b));
2035         assert!(!b.is_subset(&a));
2036         assert!(!b.is_superset(&a));
2037
2038         assert!(b.insert(5));
2039
2040         assert!(a.is_subset(&b));
2041         assert!(!a.is_superset(&b));
2042         assert!(!b.is_subset(&a));
2043         assert!(b.is_superset(&a));
2044     }
2045
2046     #[test]
2047     fn test_iterate() {
2048         let mut a = HashSet::new();
2049         for i in range(0u, 32) {
2050             assert!(a.insert(i));
2051         }
2052         let mut observed = 0;
2053         for k in a.iter() {
2054             observed |= 1 << *k;
2055         }
2056         assert_eq!(observed, 0xFFFF_FFFF);
2057     }
2058
2059     #[test]
2060     fn test_intersection() {
2061         let mut a = HashSet::new();
2062         let mut b = HashSet::new();
2063
2064         assert!(a.insert(11));
2065         assert!(a.insert(1));
2066         assert!(a.insert(3));
2067         assert!(a.insert(77));
2068         assert!(a.insert(103));
2069         assert!(a.insert(5));
2070         assert!(a.insert(-5));
2071
2072         assert!(b.insert(2));
2073         assert!(b.insert(11));
2074         assert!(b.insert(77));
2075         assert!(b.insert(-9));
2076         assert!(b.insert(-42));
2077         assert!(b.insert(5));
2078         assert!(b.insert(3));
2079
2080         let mut i = 0;
2081         let expected = [3, 5, 11, 77];
2082         for x in a.intersection(&b) {
2083             assert!(expected.contains(x));
2084             i += 1
2085         }
2086         assert_eq!(i, expected.len());
2087     }
2088
2089     #[test]
2090     fn test_difference() {
2091         let mut a = HashSet::new();
2092         let mut b = HashSet::new();
2093
2094         assert!(a.insert(1));
2095         assert!(a.insert(3));
2096         assert!(a.insert(5));
2097         assert!(a.insert(9));
2098         assert!(a.insert(11));
2099
2100         assert!(b.insert(3));
2101         assert!(b.insert(9));
2102
2103         let mut i = 0;
2104         let expected = [1, 5, 11];
2105         for x in a.difference(&b) {
2106             assert!(expected.contains(x));
2107             i += 1
2108         }
2109         assert_eq!(i, expected.len());
2110     }
2111
2112     #[test]
2113     fn test_symmetric_difference() {
2114         let mut a = HashSet::new();
2115         let mut b = HashSet::new();
2116
2117         assert!(a.insert(1));
2118         assert!(a.insert(3));
2119         assert!(a.insert(5));
2120         assert!(a.insert(9));
2121         assert!(a.insert(11));
2122
2123         assert!(b.insert(-2));
2124         assert!(b.insert(3));
2125         assert!(b.insert(9));
2126         assert!(b.insert(14));
2127         assert!(b.insert(22));
2128
2129         let mut i = 0;
2130         let expected = [-2, 1, 5, 11, 14, 22];
2131         for x in a.symmetric_difference(&b) {
2132             assert!(expected.contains(x));
2133             i += 1
2134         }
2135         assert_eq!(i, expected.len());
2136     }
2137
2138     #[test]
2139     fn test_union() {
2140         let mut a = HashSet::new();
2141         let mut b = HashSet::new();
2142
2143         assert!(a.insert(1));
2144         assert!(a.insert(3));
2145         assert!(a.insert(5));
2146         assert!(a.insert(9));
2147         assert!(a.insert(11));
2148         assert!(a.insert(16));
2149         assert!(a.insert(19));
2150         assert!(a.insert(24));
2151
2152         assert!(b.insert(-2));
2153         assert!(b.insert(1));
2154         assert!(b.insert(5));
2155         assert!(b.insert(9));
2156         assert!(b.insert(13));
2157         assert!(b.insert(19));
2158
2159         let mut i = 0;
2160         let expected = [-2, 1, 3, 5, 9, 11, 13, 16, 19, 24];
2161         for x in a.union(&b) {
2162             assert!(expected.contains(x));
2163             i += 1
2164         }
2165         assert_eq!(i, expected.len());
2166     }
2167
2168     #[test]
2169     fn test_from_iter() {
2170         let xs = [1, 2, 3, 4, 5, 6, 7, 8, 9];
2171
2172         let set: HashSet<int> = xs.iter().map(|&x| x).collect();
2173
2174         for x in xs.iter() {
2175             assert!(set.contains(x));
2176         }
2177     }
2178
2179     #[test]
2180     fn test_move_iter() {
2181         let hs = {
2182             let mut hs = HashSet::new();
2183
2184             hs.insert('a');
2185             hs.insert('b');
2186
2187             hs
2188         };
2189
2190         let v = hs.move_iter().collect::<Vec<char>>();
2191         assert!(['a', 'b'] == v.as_slice() || ['b', 'a'] == v.as_slice());
2192     }
2193
2194     #[test]
2195     fn test_eq() {
2196         // These constants once happened to expose a bug in insert().
2197         // I'm keeping them around to prevent a regression.
2198         let mut s1 = HashSet::new();
2199
2200         s1.insert(1);
2201         s1.insert(2);
2202         s1.insert(3);
2203
2204         let mut s2 = HashSet::new();
2205
2206         s2.insert(1);
2207         s2.insert(2);
2208
2209         assert!(s1 != s2);
2210
2211         s2.insert(3);
2212
2213         assert_eq!(s1, s2);
2214     }
2215
2216     #[test]
2217     fn test_show() {
2218         let mut set: HashSet<int> = HashSet::new();
2219         let empty: HashSet<int> = HashSet::new();
2220
2221         set.insert(1);
2222         set.insert(2);
2223
2224         let set_str = format!("{}", set);
2225
2226         assert!(set_str == ~"{1, 2}" || set_str == ~"{2, 1}");
2227         assert_eq!(format!("{}", empty), ~"{}");
2228     }
2229 }
2230
2231 #[cfg(test)]
2232 mod bench {
2233     extern crate test;
2234     use self::test::Bencher;
2235     use std::iter::{range_inclusive};
2236
2237     #[bench]
2238     fn insert(b: &mut Bencher) {
2239         use super::HashMap;
2240
2241         let mut m = HashMap::new();
2242
2243         for i in range_inclusive(1, 1000) {
2244             m.insert(i, i);
2245         }
2246
2247         let mut k = 1001;
2248
2249         b.iter(|| {
2250             m.insert(k, k);
2251             k += 1;
2252         });
2253     }
2254
2255     #[bench]
2256     fn find_existing(b: &mut Bencher) {
2257         use super::HashMap;
2258
2259         let mut m = HashMap::new();
2260
2261         for i in range_inclusive(1, 1000) {
2262             m.insert(i, i);
2263         }
2264
2265         b.iter(|| {
2266             m.contains_key(&412);
2267         });
2268     }
2269
2270     #[bench]
2271     fn find_nonexisting(b: &mut Bencher) {
2272         use super::HashMap;
2273
2274         let mut m = HashMap::new();
2275
2276         for i in range_inclusive(1, 1000) {
2277             m.insert(i, i);
2278         }
2279
2280         b.iter(|| {
2281             m.contains_key(&2048);
2282         });
2283     }
2284
2285     #[bench]
2286     fn hashmap_as_queue(b: &mut Bencher) {
2287         use super::HashMap;
2288
2289         let mut m = HashMap::new();
2290
2291         for i in range_inclusive(1, 1000) {
2292             m.insert(i, i);
2293         }
2294
2295         let mut k = 1;
2296
2297         b.iter(|| {
2298             m.pop(&k);
2299             m.insert(k + 1000, k + 1000);
2300             k += 1;
2301         });
2302     }
2303
2304     #[bench]
2305     fn find_pop_insert(b: &mut Bencher) {
2306         use super::HashMap;
2307
2308         let mut m = HashMap::new();
2309
2310         for i in range_inclusive(1, 1000) {
2311             m.insert(i, i);
2312         }
2313
2314         let mut k = 1;
2315
2316         b.iter(|| {
2317             m.find(&(k + 400));
2318             m.find(&(k + 2000));
2319             m.pop(&k);
2320             m.insert(k + 1000, k + 1000);
2321             k += 1;
2322         })
2323     }
2324 }