Improve implementation approach comments in [T]::reverse()

author Scott McMurray <scottmcm@users.noreply.github.com>

Sat, 6 May 2017 01:54:47 +0000 (18:54 -0700)

committer Scott McMurray <scottmcm@users.noreply.github.com>

Sat, 6 May 2017 01:54:47 +0000 (18:54 -0700)
author Scott McMurray <scottmcm@users.noreply.github.com>
Sat, 6 May 2017 01:54:47 +0000 (18:54 -0700)
committer Scott McMurray <scottmcm@users.noreply.github.com>
Sat, 6 May 2017 01:54:47 +0000 (18:54 -0700)
diff --git a/src/libcore/slice/mod.rs b/src/libcore/slice/mod.rs

index bf637af0639d90485764d9d28deba8b62396e570..e15eb8f24440956b5e1ff88e48b794ecca1194c6 100644 (file)
--- a/src/libcore/slice/mod.rs
+++ b/src/libcore/slice/mod.rs
@@ -540,12 +540,24 @@ fn reverse(&mut self) {
          let mut i: usize = 0;
          let ln = self.len();
  
          let mut i: usize = 0;
          let ln = self.len();
  
+        // For very small types, all the individual reads in the normal
+        // path perform poorly.  We can do better, given efficient unaligned
+        // load/store, by loading a larger chunk and reversing a register.
+
+        // Ideally LLVM would do this for us, as it knows better than we do
+        // whether unaligned reads are efficient (since that changes between
+        // different ARM versions, for example) and what the best chunk size
+        // would be.  Unfortunately, as of LLVM 4.0 (2017-05) it only unrolls
+        // the loop, so we need to do this ourselves.  (Hypothesis: reverse
+        // is troublesome because the sides can be aligned differently --
+        // will be, when the length is odd -- so there's no way of emitting
+        // pre- and postludes to use fully-aligned SIMD in the middle.)
+
          let fast_unaligned =
              cfg!(any(target_arch = "x86", target_arch = "x86_64"));
  
          if fast_unaligned && mem::size_of::<T>() == 1 {
          let fast_unaligned =
              cfg!(any(target_arch = "x86", target_arch = "x86_64"));
  
          if fast_unaligned && mem::size_of::<T>() == 1 {
-            // Single-byte read & write are comparatively slow. Instead,
-            // work in usize chunks and get bswap to do the hard work.
+            // Use the llvm.bswap intrinsic to reverse u8s in a usize
              let chunk = mem::size_of::<usize>();
              while i + chunk - 1 < ln / 2 {
                  unsafe {
              let chunk = mem::size_of::<usize>();
              while i + chunk - 1 < ln / 2 {
                  unsafe {
@@ -561,8 +573,7 @@ fn reverse(&mut self) {
          }
  
          if fast_unaligned && mem::size_of::<T>() == 2 {
          }
  
          if fast_unaligned && mem::size_of::<T>() == 2 {
-            // Not quite as good as the above, but still helpful.
-            // Same general idea, read bigger and do the swap in a register.
+            // Use rotate-by-16 to reverse u16s in a u32
              let chunk = mem::size_of::<u32>() / 2;
              while i + chunk - 1 < ln / 2 {
                  unsafe {
              let chunk = mem::size_of::<u32>() / 2;
              while i + chunk - 1 < ln / 2 {
                  unsafe {
author	Scott McMurray <scottmcm@users.noreply.github.com>
	Sat, 6 May 2017 01:54:47 +0000 (18:54 -0700)
committer	Scott McMurray <scottmcm@users.noreply.github.com>
	Sat, 6 May 2017 01:54:47 +0000 (18:54 -0700)