1 // Copyright 2017 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
13 //! This module contains an sort algorithm based on Orson Peters' pattern-defeating quicksort,
14 //! published at: https://github.com/orlp/pdqsort
16 //! Unstable sorting is compatible with libcore because it doesn't allocate memory, unlike our
17 //! stable sorting implementation.
23 /// Holds a value, but never drops it.
24 #[allow(unions_with_drop_fields)]
29 /// When dropped, copies from `src` into `dest`.
30 struct CopyOnDrop<T> {
35 impl<T> Drop for CopyOnDrop<T> {
37 unsafe { ptr::copy_nonoverlapping(self.src, self.dest, 1); }
41 /// Shifts the first element to the right until it encounters a greater or equal element.
42 fn shift_head<T, F>(v: &mut [T], is_less: &mut F)
43 where F: FnMut(&T, &T) -> bool
47 // If the first two elements are out-of-order...
48 if len >= 2 && is_less(v.get_unchecked(1), v.get_unchecked(0)) {
49 // Read the first element into a stack-allocated variable. If a following comparison
50 // operation panics, `hole` will get dropped and automatically write the element back
52 let mut tmp = NoDrop { value: ptr::read(v.get_unchecked(0)) };
53 let mut hole = CopyOnDrop {
55 dest: v.get_unchecked_mut(1),
57 ptr::copy_nonoverlapping(v.get_unchecked(1), v.get_unchecked_mut(0), 1);
60 if !is_less(v.get_unchecked(i), &tmp.value) {
64 // Move `i`-th element one place to the left, thus shifting the hole to the right.
65 ptr::copy_nonoverlapping(v.get_unchecked(i), v.get_unchecked_mut(i - 1), 1);
66 hole.dest = v.get_unchecked_mut(i);
68 // `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`.
73 /// Shifts the last element to the left until it encounters a smaller or equal element.
74 fn shift_tail<T, F>(v: &mut [T], is_less: &mut F)
75 where F: FnMut(&T, &T) -> bool
79 // If the last two elements are out-of-order...
80 if len >= 2 && is_less(v.get_unchecked(len - 1), v.get_unchecked(len - 2)) {
81 // Read the last element into a stack-allocated variable. If a following comparison
82 // operation panics, `hole` will get dropped and automatically write the element back
84 let mut tmp = NoDrop { value: ptr::read(v.get_unchecked(len - 1)) };
85 let mut hole = CopyOnDrop {
87 dest: v.get_unchecked_mut(len - 2),
89 ptr::copy_nonoverlapping(v.get_unchecked(len - 2), v.get_unchecked_mut(len - 1), 1);
91 for i in (0..len-2).rev() {
92 if !is_less(&tmp.value, v.get_unchecked(i)) {
96 // Move `i`-th element one place to the right, thus shifting the hole to the left.
97 ptr::copy_nonoverlapping(v.get_unchecked(i), v.get_unchecked_mut(i + 1), 1);
98 hole.dest = v.get_unchecked_mut(i);
100 // `hole` gets dropped and thus copies `tmp` into the remaining hole in `v`.
105 /// Partially sorts a slice by shifting several out-of-order elements around.
107 /// Returns `true` if the slice is sorted at the end. This function is `O(n)` worst-case.
109 fn partial_insertion_sort<T, F>(v: &mut [T], is_less: &mut F) -> bool
110 where F: FnMut(&T, &T) -> bool
112 // Maximum number of adjacent out-of-order pairs that will get shifted.
113 const MAX_STEPS: usize = 5;
114 // If the slice is shorter than this, don't shift any elements.
115 const SHORTEST_SHIFTING: usize = 50;
120 for _ in 0..MAX_STEPS {
122 // Find the next pair of adjacent out-of-order elements.
123 while i < len && !is_less(v.get_unchecked(i), v.get_unchecked(i - 1)) {
133 // Don't shift elements on short arrays, that has a performance cost.
134 if len < SHORTEST_SHIFTING {
138 // Swap the found pair of elements. This puts them in correct order.
141 // Shift the smaller element to the left.
142 shift_tail(&mut v[..i], is_less);
143 // Shift the greater element to the right.
144 shift_head(&mut v[i..], is_less);
147 // Didn't manage to sort the slice in the limited number of steps.
151 /// Sorts a slice using insertion sort, which is `O(n^2)` worst-case.
152 fn insertion_sort<T, F>(v: &mut [T], is_less: &mut F)
153 where F: FnMut(&T, &T) -> bool
155 for i in 2..v.len()+1 {
156 shift_tail(&mut v[..i], is_less);
160 /// Sorts `v` using heapsort, which guarantees `O(n log n)` worst-case.
162 pub fn heapsort<T, F>(v: &mut [T], is_less: &mut F)
163 where F: FnMut(&T, &T) -> bool
165 // This binary heap respects the invariant `parent >= child`.
166 let mut sift_down = |v: &mut [T], mut node| {
168 // Children of `node`:
169 let left = 2 * node + 1;
170 let right = 2 * node + 2;
172 // Choose the greater child.
173 let greater = if right < v.len() && is_less(&v[left], &v[right]) {
179 // Stop if the invariant holds at `node`.
180 if greater >= v.len() || !is_less(&v[node], &v[greater]) {
184 // Swap `node` with the greater child, move one step down, and continue sifting.
185 v.swap(node, greater);
190 // Build the heap in linear time.
191 for i in (0 .. v.len() / 2).rev() {
195 // Pop maximal elements from the heap.
196 for i in (1 .. v.len()).rev() {
198 sift_down(&mut v[..i], 0);
202 /// Partitions `v` into elements smaller than `pivot`, followed by elements greater than or equal
205 /// Returns the number of elements smaller than `pivot`.
207 /// Partitioning is performed block-by-block in order to minimize the cost of branching operations.
208 /// This idea is presented in the [BlockQuicksort][pdf] paper.
210 /// [pdf]: http://drops.dagstuhl.de/opus/volltexte/2016/6389/pdf/LIPIcs-ESA-2016-38.pdf
211 fn partition_in_blocks<T, F>(v: &mut [T], pivot: &T, is_less: &mut F) -> usize
212 where F: FnMut(&T, &T) -> bool
214 // Number of elements in a typical block.
215 const BLOCK: usize = 128;
217 // The partitioning algorithm repeats the following steps until completion:
219 // 1. Trace a block from the left side to identify elements greater than or equal to the pivot.
220 // 2. Trace a block from the right side to identify elements smaller than the pivot.
221 // 3. Exchange the identified elements between the left and right side.
223 // We keep the following variables for a block of elements:
225 // 1. `block` - Number of elements in the block.
226 // 2. `start` - Start pointer into the `offsets` array.
227 // 3. `end` - End pointer into the `offsets` array.
228 // 4. `offsets - Indices of out-of-order elements within the block.
230 // The current block on the left side (from `l` to `l.offset(block_l)`).
231 let mut l = v.as_mut_ptr();
232 let mut block_l = BLOCK;
233 let mut start_l = ptr::null_mut();
234 let mut end_l = ptr::null_mut();
235 let mut offsets_l: [u8; BLOCK] = unsafe { mem::uninitialized() };
237 // The current block on the right side (from `r.offset(-block_r)` to `r`).
238 let mut r = unsafe { l.offset(v.len() as isize) };
239 let mut block_r = BLOCK;
240 let mut start_r = ptr::null_mut();
241 let mut end_r = ptr::null_mut();
242 let mut offsets_r: [u8; BLOCK] = unsafe { mem::uninitialized() };
244 // FIXME: When we get VLAs, try creating one array of length `min(v.len(), 2 * BLOCK)` rather
245 // than two fixed-size arrays of length `BLOCK`. VLAs might be more cache-efficient.
247 // Returns the number of elements between pointers `l` (inclusive) and `r` (exclusive).
248 fn width<T>(l: *mut T, r: *mut T) -> usize {
249 assert!(mem::size_of::<T>() > 0);
250 (r as usize - l as usize) / mem::size_of::<T>()
254 // We are done with partitioning block-by-block when `l` and `r` get very close. Then we do
255 // some patch-up work in order to partition the remaining elements in between.
256 let is_done = width(l, r) <= 2 * BLOCK;
259 // Number of remaining elements (still not compared to the pivot).
260 let mut rem = width(l, r);
261 if start_l < end_l || start_r < end_r {
265 // Adjust block sizes so that the left and right block don't overlap, but get perfectly
266 // aligned to cover the whole remaining gap.
269 } else if start_r < end_r {
273 block_r = rem - block_l;
275 debug_assert!(block_l <= BLOCK && block_r <= BLOCK);
276 debug_assert!(width(l, r) == block_l + block_r);
279 if start_l == end_l {
280 // Trace `block_l` elements from the left side.
281 start_l = offsets_l.as_mut_ptr();
282 end_l = offsets_l.as_mut_ptr();
285 for i in 0..block_l {
287 // Branchless comparison.
289 end_l = end_l.offset(!is_less(&*elem, pivot) as isize);
290 elem = elem.offset(1);
295 if start_r == end_r {
296 // Trace `block_r` elements from the right side.
297 start_r = offsets_r.as_mut_ptr();
298 end_r = offsets_r.as_mut_ptr();
301 for i in 0..block_r {
303 // Branchless comparison.
304 elem = elem.offset(-1);
306 end_r = end_r.offset(is_less(&*elem, pivot) as isize);
311 // Number of out-of-order elements to swap between the left and right side.
312 let count = cmp::min(width(start_l, end_l), width(start_r, end_r));
315 macro_rules! left { () => { l.offset(*start_l as isize) } }
316 macro_rules! right { () => { r.offset(-(*start_r as isize) - 1) } }
318 // Instead of swapping one pair at the time, it is more efficient to perform a cyclic
319 // permutation. This is not strictly equivalent to swapping, but produces a similar
320 // result using fewer memory operations.
322 let tmp = ptr::read(left!());
323 ptr::copy_nonoverlapping(right!(), left!(), 1);
326 start_l = start_l.offset(1);
327 ptr::copy_nonoverlapping(left!(), right!(), 1);
328 start_r = start_r.offset(1);
329 ptr::copy_nonoverlapping(right!(), left!(), 1);
332 ptr::copy_nonoverlapping(&tmp, right!(), 1);
334 start_l = start_l.offset(1);
335 start_r = start_r.offset(1);
339 if start_l == end_l {
340 // All out-of-order elements in the left block were moved. Move to the next block.
341 l = unsafe { l.offset(block_l as isize) };
344 if start_r == end_r {
345 // All out-of-order elements in the right block were moved. Move to the previous block.
346 r = unsafe { r.offset(-(block_r as isize)) };
354 // All that remains now is at most one block (either the left or the right) with out-of-order
355 // elements that need to be moved. Such remaining elements can be simply shifted to the end
356 // within their block.
359 // The left block remains.
360 // Move it's remaining out-of-order elements to the far right.
361 debug_assert_eq!(width(l, r), block_l);
362 while start_l < end_l {
364 end_l = end_l.offset(-1);
365 ptr::swap(l.offset(*end_l as isize), r.offset(-1));
369 width(v.as_mut_ptr(), r)
370 } else if start_r < end_r {
371 // The right block remains.
372 // Move it's remaining out-of-order elements to the far left.
373 debug_assert_eq!(width(l, r), block_r);
374 while start_r < end_r {
376 end_r = end_r.offset(-1);
377 ptr::swap(l, r.offset(-(*end_r as isize) - 1));
381 width(v.as_mut_ptr(), l)
383 // Nothing else to do, we're done.
384 width(v.as_mut_ptr(), l)
388 /// Partitions `v` into elements smaller than `v[pivot]`, followed by elements greater than or
389 /// equal to `v[pivot]`.
391 /// Returns a tuple of:
393 /// 1. Number of elements smaller than `v[pivot]`.
394 /// 2. True if `v` was already partitioned.
395 fn partition<T, F>(v: &mut [T], pivot: usize, is_less: &mut F) -> (usize, bool)
396 where F: FnMut(&T, &T) -> bool
398 let (mid, was_partitioned) = {
399 // Place the pivot at the beginning of slice.
401 let (pivot, v) = v.split_at_mut(1);
402 let pivot = &mut pivot[0];
404 // Read the pivot into a stack-allocated variable for efficiency. If a following comparison
405 // operation panics, the pivot will be automatically written back into the slice.
406 let mut tmp = NoDrop { value: unsafe { ptr::read(pivot) } };
407 let _pivot_guard = CopyOnDrop {
408 src: unsafe { &mut tmp.value },
411 let pivot = unsafe { &tmp.value };
413 // Find the first pair of out-of-order elements.
417 // Find the first element greater then or equal to the pivot.
418 while l < r && is_less(v.get_unchecked(l), pivot) {
422 // Find the last element smaller that the pivot.
423 while l < r && !is_less(v.get_unchecked(r - 1), pivot) {
428 (l + partition_in_blocks(&mut v[l..r], pivot, is_less), l >= r)
430 // `_pivot_guard` goes out of scope and writes the pivot (which is a stack-allocated
431 // variable) back into the slice where it originally was. This step is critical in ensuring
435 // Place the pivot between the two partitions.
438 (mid, was_partitioned)
441 /// Partitions `v` into elements equal to `v[pivot]` followed by elements greater than `v[pivot]`.
443 /// Returns the number of elements equal to the pivot. It is assumed that `v` does not contain
444 /// elements smaller than the pivot.
445 fn partition_equal<T, F>(v: &mut [T], pivot: usize, is_less: &mut F) -> usize
446 where F: FnMut(&T, &T) -> bool
448 // Place the pivot at the beginning of slice.
450 let (pivot, v) = v.split_at_mut(1);
451 let pivot = &mut pivot[0];
453 // Read the pivot into a stack-allocated variable for efficiency. If a following comparison
454 // operation panics, the pivot will be automatically written back into the slice.
455 let mut tmp = NoDrop { value: unsafe { ptr::read(pivot) } };
456 let _pivot_guard = CopyOnDrop {
457 src: unsafe { &mut tmp.value },
460 let pivot = unsafe { &tmp.value };
462 // Now partition the slice.
467 // Find the first element greater that the pivot.
468 while l < r && !is_less(pivot, v.get_unchecked(l)) {
472 // Find the last element equal to the pivot.
473 while l < r && is_less(pivot, v.get_unchecked(r - 1)) {
482 // Swap the found pair of out-of-order elements.
484 ptr::swap(v.get_unchecked_mut(l), v.get_unchecked_mut(r));
489 // We found `l` elements equal to the pivot. Add 1 to account for the pivot itself.
492 // `_pivot_guard` goes out of scope and writes the pivot (which is a stack-allocated variable)
493 // back into the slice where it originally was. This step is critical in ensuring safety!
496 /// Scatters some elements around in an attempt to break patterns that might cause imbalanced
497 /// partitions in quicksort.
499 fn break_patterns<T>(v: &mut [T]) {
503 // A random number will be taken modulo this one. The modulus is a power of two so that we
504 // can simply take bitwise "and", thus avoiding costly CPU operations.
505 let modulus = (len / 4).next_power_of_two();
506 debug_assert!(modulus >= 1 && modulus <= len / 2);
508 // Pseudorandom number generation from the "Xorshift RNGs" paper by George Marsaglia.
509 let mut random = len;
510 random ^= random << 13;
511 random ^= random >> 17;
512 random ^= random << 5;
513 random &= modulus - 1;
514 debug_assert!(random < len / 2);
518 debug_assert!(a >= 1 && a < len - 2);
521 let b = len / 4 + random;
522 debug_assert!(b >= 1 && b < len - 2);
524 // Swap neighbourhoods of `a` and `b`.
526 v.swap(a - 1 + i, b - 1 + i);
531 /// Chooses a pivot in `v` and returns the index and `true` if the slice is likely already sorted.
533 /// Elements in `v` might be reordered in the process.
534 fn choose_pivot<T, F>(v: &mut [T], is_less: &mut F) -> (usize, bool)
535 where F: FnMut(&T, &T) -> bool
537 // Minimum length to choose the median-of-medians method.
538 // Shorter slices use the simple median-of-three method.
539 const SHORTEST_MEDIAN_OF_MEDIANS: usize = 50;
540 // Maximum number of swaps that can be performed in this function.
541 const MAX_SWAPS: usize = 4 * 3;
545 // Three indices near which we are going to choose a pivot.
546 let mut a = len / 4 * 1;
547 let mut b = len / 4 * 2;
548 let mut c = len / 4 * 3;
550 // Counts the total number of swaps we are about to perform while sorting indices.
554 // Swaps indices so that `v[a] <= v[b]`.
555 let mut sort2 = |a: &mut usize, b: &mut usize| unsafe {
556 if is_less(v.get_unchecked(*b), v.get_unchecked(*a)) {
562 // Swaps indices so that `v[a] <= v[b] <= v[c]`.
563 let mut sort3 = |a: &mut usize, b: &mut usize, c: &mut usize| {
569 if len >= SHORTEST_MEDIAN_OF_MEDIANS {
570 // Finds the median of `v[a - 1], v[a], v[a + 1]` and stores the index into `a`.
571 let mut sort_adjacent = |a: &mut usize| {
573 sort3(&mut (tmp - 1), a, &mut (tmp + 1));
576 // Find medians in the neighborhoods of `a`, `b`, and `c`.
577 sort_adjacent(&mut a);
578 sort_adjacent(&mut b);
579 sort_adjacent(&mut c);
582 // Find the median among `a`, `b`, and `c`.
583 sort3(&mut a, &mut b, &mut c);
586 if swaps < MAX_SWAPS {
589 // The maximum number of swaps was performed. Chances are the slice is descending or mostly
590 // descending, so reversing will probably help sort it faster.
596 /// Sorts `v` recursively.
598 /// If the slice had a predecessor in the original array, it is specified as `pred`.
600 /// `limit` is the number of allowed imbalanced partitions before switching to `heapsort`. If zero,
601 /// this function will immediately switch to heapsort.
602 fn recurse<'a, T, F>(mut v: &'a mut [T], is_less: &mut F, mut pred: Option<&'a T>, mut limit: usize)
603 where F: FnMut(&T, &T) -> bool
605 // Slices of up to this length get sorted using insertion sort.
606 const MAX_INSERTION: usize = 20;
608 // True if the last partitioning was reasonably balanced.
609 let mut was_balanced = true;
610 // True if the last partitioning didn't shuffle elements (the slice was already partitioned).
611 let mut was_partitioned = true;
616 // Very short slices get sorted using insertion sort.
617 if len <= MAX_INSERTION {
618 insertion_sort(v, is_less);
622 // If too many bad pivot choices were made, simply fall back to heapsort in order to
623 // guarantee `O(n log n)` worst-case.
625 heapsort(v, is_less);
629 // If the last partitioning was imbalanced, try breaking patterns in the slice by shuffling
630 // some elements around. Hopefully we'll choose a better pivot this time.
636 // Choose a pivot and try guessing whether the slice is already sorted.
637 let (pivot, likely_sorted) = choose_pivot(v, is_less);
639 // If the last partitioning was decently balanced and didn't shuffle elements, and if pivot
640 // selection predicts the slice is likely already sorted...
641 if was_balanced && was_partitioned && likely_sorted {
642 // Try identifying several out-of-order elements and shifting them to correct
643 // positions. If the slice ends up being completely sorted, we're done.
644 if partial_insertion_sort(v, is_less) {
649 // If the chosen pivot is equal to the predecessor, then it's the smallest element in the
650 // slice. Partition the slice into elements equal to and elements greater than the pivot.
651 // This case is usually hit when the slice contains many duplicate elements.
652 if let Some(p) = pred {
653 if !is_less(p, &v[pivot]) {
654 let mid = partition_equal(v, pivot, is_less);
656 // Continue sorting elements greater than the pivot.
662 // Partition the slice.
663 let (mid, was_p) = partition(v, pivot, is_less);
664 was_balanced = cmp::min(mid, len - mid) >= len / 8;
665 was_partitioned = was_p;
667 // Split the slice into `left`, `pivot`, and `right`.
668 let (left, right) = {v}.split_at_mut(mid);
669 let (pivot, right) = right.split_at_mut(1);
670 let pivot = &pivot[0];
672 // Recurse into the shorter side only in order to minimize the total number of recursive
673 // calls and consume less stack space. Then just continue with the longer side (this is
674 // akin to tail recursion).
675 if left.len() < right.len() {
676 recurse(left, is_less, pred, limit);
680 recurse(right, is_less, Some(pivot), limit);
686 /// Sorts `v` using pattern-defeating quicksort, which is `O(n log n)` worst-case.
687 pub fn quicksort<T, F>(v: &mut [T], mut is_less: F)
688 where F: FnMut(&T, &T) -> bool
690 // Sorting has no meaningful behavior on zero-sized types.
691 if mem::size_of::<T>() == 0 {
695 // Limit the number of imbalanced partitions to `floor(log2(len)) + 1`.
696 let limit = mem::size_of::<usize>() * 8 - v.len().leading_zeros() as usize;
698 recurse(v, &mut is_less, None, limit);