1 // Copyright 2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 // This module represents all the internal representation and logic for a B-Tree's node
12 // with a safe interface, so that BTreeMap itself does not depend on any of these details.
14 pub use self::InsertionResult::*;
15 pub use self::SearchResult::*;
16 pub use self::TraversalItem::*;
20 use core::{slice, mem, ptr};
22 use core::borrow::BorrowFrom;
27 /// Represents the result of an Insertion: either the item fit, or the node had to split
28 pub enum InsertionResult<K, V> {
29 /// The inserted element fit
31 /// The inserted element did not fit, so the node was split
32 Split(K, V, Node<K, V>),
35 /// Represents the result of a search for a key in a single node
36 pub enum SearchResult {
37 /// The element was found at the given index
39 /// The element wasn't found, but if it's anywhere, it must be beyond this edge
43 /// A B-Tree Node. We keep keys/edges/values separate to optimize searching for keys.
45 pub struct Node<K, V> {
46 // FIXME(Gankro): This representation is super safe and easy to reason about, but painfully
47 // inefficient. As three Vecs, each node consists of *9* words: (ptr, cap, size) * 3. In
48 // theory, if we take full control of allocation like HashMap's RawTable does,
49 // and restrict leaves to max size 256 (not unreasonable for a btree node) we can cut
50 // this down to just (ptr, cap: u8, size: u8, is_leaf: bool). With generic
51 // integer arguments, cap can even move into the type, reducing this just to
52 // (ptr, size, is_leaf). This could also have cache benefits for very small nodes, as keys
53 // could bleed into edges and vals.
55 // However doing this would require a fair amount of code to reimplement all
56 // the Vec logic and iterators. It would also use *way* more unsafe code, which sucks and is
57 // hard. For now, we accept this cost in the name of correctness and simplicity.
59 // As a compromise, keys and vals could be merged into one Vec<(K, V)>, which would shave
60 // off 3 words, but possibly hurt our cache efficiency during search, which only cares about
61 // keys. This would also avoid the Zip we use in our iterator implementations. This is
62 // probably worth investigating.
64 // Note that this space waste is especially tragic since we store the Nodes by value in their
65 // parent's edges Vec, so unoccupied spaces in the edges Vec are quite large, and we have
66 // to shift around a lot more bits during insertion/removal.
69 edges: Vec<Node<K, V>>,
73 impl<K: Ord, V> Node<K, V> {
74 /// Searches for the given key in the node. If it finds an exact match,
75 /// `Found` will be yielded with the matching index. If it doesn't find an exact match,
76 /// `GoDown` will be yielded with the index of the subtree the key must lie in.
77 pub fn search<Sized? Q>(&self, key: &Q) -> SearchResult where Q: BorrowFrom<K> + Ord {
78 // FIXME(Gankro): Tune when to search linear or binary based on B (and maybe K/V).
79 // For the B configured as of this writing (B = 6), binary search was *significantly*
81 self.search_linear(key)
84 fn search_linear<Sized? Q>(&self, key: &Q) -> SearchResult where Q: BorrowFrom<K> + Ord {
85 for (i, k) in self.keys.iter().enumerate() {
86 match key.cmp(BorrowFrom::borrow_from(k)) {
88 Equal => return Found(i),
89 Less => return GoDown(i),
97 impl <K, V> Node<K, V> {
98 /// Make a new internal node
99 pub fn new_internal(capacity: uint) -> Node<K, V> {
101 keys: Vec::with_capacity(capacity),
102 vals: Vec::with_capacity(capacity),
103 edges: Vec::with_capacity(capacity + 1),
107 /// Make a new leaf node
108 pub fn new_leaf(capacity: uint) -> Node<K, V> {
110 keys: Vec::with_capacity(capacity),
111 vals: Vec::with_capacity(capacity),
116 /// Make a leaf root from scratch
117 pub fn make_leaf_root(b: uint) -> Node<K, V> {
118 Node::new_leaf(capacity_from_b(b))
121 /// Make an internal root and swap it with an old root
122 pub fn make_internal_root(left_and_out: &mut Node<K,V>, b: uint, key: K, value: V,
124 let mut node = Node::new_internal(capacity_from_b(b));
125 mem::swap(left_and_out, &mut node);
126 left_and_out.keys.push(key);
127 left_and_out.vals.push(value);
128 left_and_out.edges.push(node);
129 left_and_out.edges.push(right);
133 /// How many key-value pairs the node contains
134 pub fn len(&self) -> uint {
138 /// How many key-value pairs the node can fit
139 pub fn capacity(&self) -> uint {
143 /// If the node has any children
144 pub fn is_leaf(&self) -> bool {
145 self.edges.is_empty()
148 /// if the node has too few elements
149 pub fn is_underfull(&self) -> bool {
150 self.len() < min_load_from_capacity(self.capacity())
153 /// if the node cannot fit any more elements
154 pub fn is_full(&self) -> bool {
155 self.len() == self.capacity()
158 /// Swap the given key-value pair with the key-value pair stored in the node's index,
159 /// without checking bounds.
160 pub unsafe fn unsafe_swap(&mut self, index: uint, key: &mut K, val: &mut V) {
161 mem::swap(self.keys.unsafe_mut(index), key);
162 mem::swap(self.vals.unsafe_mut(index), val);
165 /// Get the node's key mutably without any bounds checks.
166 pub unsafe fn unsafe_key_mut(&mut self, index: uint) -> &mut K {
167 self.keys.unsafe_mut(index)
170 /// Get the node's value at the given index
171 pub fn val(&self, index: uint) -> Option<&V> {
175 /// Get the node's value at the given index
176 pub fn val_mut(&mut self, index: uint) -> Option<&mut V> {
177 self.vals.get_mut(index)
180 /// Get the node's value mutably without any bounds checks.
181 pub unsafe fn unsafe_val_mut(&mut self, index: uint) -> &mut V {
182 self.vals.unsafe_mut(index)
185 /// Get the node's edge at the given index
186 pub fn edge(&self, index: uint) -> Option<&Node<K,V>> {
187 self.edges.get(index)
190 /// Get the node's edge mutably at the given index
191 pub fn edge_mut(&mut self, index: uint) -> Option<&mut Node<K,V>> {
192 self.edges.get_mut(index)
195 /// Get the node's edge mutably without any bounds checks.
196 pub unsafe fn unsafe_edge_mut(&mut self, index: uint) -> &mut Node<K,V> {
197 self.edges.unsafe_mut(index)
200 /// Pop an edge off the end of the node
201 pub fn pop_edge(&mut self) -> Option<Node<K,V>> {
205 /// Try to insert this key-value pair at the given index in this internal node
206 /// If the node is full, we have to split it.
208 /// Returns a *mut V to the inserted value, because the caller may want this when
209 /// they're done mutating the tree, but we don't want to borrow anything for now.
210 pub fn insert_as_leaf(&mut self, index: uint, key: K, value: V) ->
211 (InsertionResult<K, V>, *mut V) {
213 // The element can fit, just insert it
214 self.insert_fit_as_leaf(index, key, value);
215 (Fit, unsafe { self.unsafe_val_mut(index) as *mut _ })
217 // The element can't fit, this node is full. Split it into two nodes.
218 let (new_key, new_val, mut new_right) = self.split();
219 let left_len = self.len();
221 let ptr = if index <= left_len {
222 self.insert_fit_as_leaf(index, key, value);
223 unsafe { self.unsafe_val_mut(index) as *mut _ }
225 new_right.insert_fit_as_leaf(index - left_len - 1, key, value);
226 unsafe { new_right.unsafe_val_mut(index - left_len - 1) as *mut _ }
229 (Split(new_key, new_val, new_right), ptr)
233 /// Try to insert this key-value pair at the given index in this internal node
234 /// If the node is full, we have to split it.
235 pub fn insert_as_internal(&mut self, index: uint, key: K, value: V, right: Node<K, V>)
236 -> InsertionResult<K, V> {
238 // The element can fit, just insert it
239 self.insert_fit_as_internal(index, key, value, right);
242 // The element can't fit, this node is full. Split it into two nodes.
243 let (new_key, new_val, mut new_right) = self.split();
244 let left_len = self.len();
246 if index <= left_len {
247 self.insert_fit_as_internal(index, key, value, right);
249 new_right.insert_fit_as_internal(index - left_len - 1, key, value, right);
252 Split(new_key, new_val, new_right)
256 /// Remove the key-value pair at the given index
257 pub fn remove_as_leaf(&mut self, index: uint) -> (K, V) {
258 match (self.keys.remove(index), self.vals.remove(index)) {
259 (Some(k), Some(v)) => (k, v),
264 /// Handle an underflow in this node's child. We favour handling "to the left" because we know
265 /// we're empty, but our neighbour can be full. Handling to the left means when we choose to
266 /// steal, we pop off the end of our neighbour (always fast) and "unshift" ourselves
267 /// (always slow, but at least faster since we know we're half-empty).
268 /// Handling "to the right" reverses these roles. Of course, we merge whenever possible
269 /// because we want dense nodes, and merging is about equal work regardless of direction.
270 pub fn handle_underflow(&mut self, underflowed_child_index: uint) {
271 assert!(underflowed_child_index <= self.len());
273 if underflowed_child_index > 0 {
274 self.handle_underflow_to_left(underflowed_child_index);
276 self.handle_underflow_to_right(underflowed_child_index);
281 pub fn iter<'a>(&'a self) -> Traversal<'a, K, V> {
282 let is_leaf = self.is_leaf();
284 elems: self.keys.iter().zip(self.vals.iter()),
285 edges: self.edges.iter(),
292 pub fn iter_mut<'a>(&'a mut self) -> MutTraversal<'a, K, V> {
293 let is_leaf = self.is_leaf();
295 elems: self.keys.iter().zip(self.vals.iter_mut()),
296 edges: self.edges.iter_mut(),
303 pub fn into_iter(self) -> MoveTraversal<K, V> {
304 let is_leaf = self.is_leaf();
306 elems: self.keys.into_iter().zip(self.vals.into_iter()),
307 edges: self.edges.into_iter(),
315 // Private implementation details
316 impl<K, V> Node<K, V> {
317 /// Make a node from its raw components
318 fn from_vecs(keys: Vec<K>, vals: Vec<V>, edges: Vec<Node<K, V>>) -> Node<K, V> {
326 /// We have somehow verified that this key-value pair will fit in this internal node,
327 /// so insert under that assumption.
328 fn insert_fit_as_leaf(&mut self, index: uint, key: K, val: V) {
329 self.keys.insert(index, key);
330 self.vals.insert(index, val);
333 /// We have somehow verified that this key-value pair will fit in this internal node,
334 /// so insert under that assumption
335 fn insert_fit_as_internal(&mut self, index: uint, key: K, val: V, right: Node<K, V>) {
336 self.keys.insert(index, key);
337 self.vals.insert(index, val);
338 self.edges.insert(index + 1, right);
341 /// Node is full, so split it into two nodes, and yield the middle-most key-value pair
342 /// because we have one too many, and our parent now has one too few
343 fn split(&mut self) -> (K, V, Node<K, V>) {
344 let r_keys = split(&mut self.keys);
345 let r_vals = split(&mut self.vals);
346 let r_edges = if self.edges.is_empty() {
349 split(&mut self.edges)
352 let right = Node::from_vecs(r_keys, r_vals, r_edges);
354 let key = self.keys.pop().unwrap();
355 let val = self.vals.pop().unwrap();
360 /// Right is underflowed. Try to steal from left,
361 /// but merge left and right if left is low too.
362 unsafe fn handle_underflow_to_left(&mut self, underflowed_child_index: uint) {
363 let left_len = self.edges[underflowed_child_index - 1].len();
364 if left_len > min_load_from_capacity(self.capacity()) {
365 self.steal_to_left(underflowed_child_index);
367 self.merge_children(underflowed_child_index - 1);
371 /// Left is underflowed. Try to steal from the right,
372 /// but merge left and right if right is low too.
373 unsafe fn handle_underflow_to_right(&mut self, underflowed_child_index: uint) {
374 let right_len = self.edges[underflowed_child_index + 1].len();
375 if right_len > min_load_from_capacity(self.capacity()) {
376 self.steal_to_right(underflowed_child_index);
378 self.merge_children(underflowed_child_index);
382 /// Steal! Stealing is roughly analogous to a binary tree rotation.
383 /// In this case, we're "rotating" right.
384 unsafe fn steal_to_left(&mut self, underflowed_child_index: uint) {
385 // Take the biggest stuff off left
386 let (mut key, mut val, edge) = {
387 let left = self.unsafe_edge_mut(underflowed_child_index - 1);
388 match (left.keys.pop(), left.vals.pop(), left.edges.pop()) {
389 (Some(k), Some(v), e) => (k, v, e),
394 // Swap the parent's separating key-value pair with left's
395 self.unsafe_swap(underflowed_child_index - 1, &mut key, &mut val);
397 // Put them at the start of right
399 let right = self.unsafe_edge_mut(underflowed_child_index);
400 right.keys.insert(0, key);
401 right.vals.insert(0, val);
404 Some(e) => right.edges.insert(0, e)
409 /// Steal! Stealing is roughly analogous to a binary tree rotation.
410 /// In this case, we're "rotating" left.
411 unsafe fn steal_to_right(&mut self, underflowed_child_index: uint) {
412 // Take the smallest stuff off right
413 let (mut key, mut val, edge) = {
414 let right = self.unsafe_edge_mut(underflowed_child_index + 1);
415 match (right.keys.remove(0), right.vals.remove(0), right.edges.remove(0)) {
416 (Some(k), Some(v), e) => (k, v, e),
421 // Swap the parent's separating key-value pair with right's
422 self.unsafe_swap(underflowed_child_index, &mut key, &mut val);
424 // Put them at the end of left
426 let left = self.unsafe_edge_mut(underflowed_child_index);
431 Some(e) => left.edges.push(e)
436 /// Merge! Left and right will be smooshed into one node, along with the key-value
437 /// pair that separated them in their parent.
438 unsafe fn merge_children(&mut self, left_index: uint) {
439 // Permanently remove right's index, and the key-value pair that separates
441 let (key, val, right) = {
442 match (self.keys.remove(left_index),
443 self.vals.remove(left_index),
444 self.edges.remove(left_index + 1)) {
445 (Some(k), Some(v), Some(e)) => (k, v, e),
450 // Give left right's stuff.
451 let left = self.unsafe_edge_mut(left_index);
452 left.absorb(key, val, right);
455 /// Take all the values from right, separated by the given key and value
456 fn absorb(&mut self, key: K, val: V, right: Node<K, V>) {
457 // Just as a sanity check, make sure we can fit this guy in
458 debug_assert!(self.len() + right.len() <= self.capacity())
462 self.keys.extend(right.keys.into_iter());
463 self.vals.extend(right.vals.into_iter());
464 self.edges.extend(right.edges.into_iter());
468 /// Takes a Vec, and splits half the elements into a new one.
469 fn split<T>(left: &mut Vec<T>) -> Vec<T> {
470 // This function is intended to be called on a full Vec of size 2B - 1 (keys, values),
471 // or 2B (edges). In the former case, left should get B elements, and right should get
472 // B - 1. In the latter case, both should get B. Therefore, we can just always take the last
473 // size / 2 elements from left, and put them on right. This also ensures this method is
474 // safe, even if the Vec isn't full. Just uninteresting for our purposes.
475 let len = left.len();
476 let right_len = len / 2;
477 let left_len = len - right_len;
478 let mut right = Vec::with_capacity(left.capacity());
480 let left_ptr = left.unsafe_get(left_len) as *const _;
481 let right_ptr = right.as_mut_ptr();
482 ptr::copy_nonoverlapping_memory(right_ptr, left_ptr, right_len);
483 left.set_len(left_len);
484 right.set_len(right_len);
489 /// Get the capacity of a node from the order of the parent B-Tree
490 fn capacity_from_b(b: uint) -> uint {
494 /// Get the minimum load of a node from its capacity
495 fn min_load_from_capacity(cap: uint) -> uint {
500 /// An abstraction over all the different kinds of traversals a node supports
501 struct AbsTraversal<Elems, Edges> {
509 /// A single atomic step in a traversal. Either an element is visited, or an edge is followed
510 pub enum TraversalItem<K, V, E> {
515 /// A traversal over a node's entries and edges
516 pub type Traversal<'a, K, V> = AbsTraversal<Zip<slice::Items<'a, K>, slice::Items<'a, V>>,
517 slice::Items<'a, Node<K, V>>>;
519 /// A mutable traversal over a node's entries and edges
520 pub type MutTraversal<'a, K, V> = AbsTraversal<Zip<slice::Items<'a, K>, slice::MutItems<'a, V>>,
521 slice::MutItems<'a, Node<K, V>>>;
523 /// An owning traversal over a node's entries and edges
524 pub type MoveTraversal<K, V> = AbsTraversal<Zip<vec::MoveItems<K>, vec::MoveItems<V>>,
525 vec::MoveItems<Node<K, V>>>;
528 impl<K, V, E, Elems: Iterator<(K, V)>, Edges: Iterator<E>>
529 Iterator<TraversalItem<K, V, E>> for AbsTraversal<Elems, Edges> {
531 fn next(&mut self) -> Option<TraversalItem<K, V, E>> {
532 let head_is_edge = self.head_is_edge;
533 self.head_is_edge = !head_is_edge;
535 if head_is_edge && self.has_edges {
536 self.edges.next().map(|node| Edge(node))
538 self.elems.next().map(|(k, v)| Elem(k, v))
543 impl<K, V, E, Elems: DoubleEndedIterator<(K, V)>, Edges: DoubleEndedIterator<E>>
544 DoubleEndedIterator<TraversalItem<K, V, E>> for AbsTraversal<Elems, Edges> {
546 fn next_back(&mut self) -> Option<TraversalItem<K, V, E>> {
547 let tail_is_edge = self.tail_is_edge;
548 self.tail_is_edge = !tail_is_edge;
550 if tail_is_edge && self.has_edges {
551 self.edges.next_back().map(|node| Edge(node))
553 self.elems.next_back().map(|(k, v)| Elem(k, v))