UTF-8 validation: Add missing if conditional for short input

author Ulrik Sverdrup <bluss@users.noreply.github.com>

Tue, 12 Jan 2016 22:04:46 +0000 (23:04 +0100)

committer Ulrik Sverdrup <bluss@users.noreply.github.com>

Thu, 14 Jan 2016 13:59:55 +0000 (14:59 +0100)
author Ulrik Sverdrup <bluss@users.noreply.github.com>
Tue, 12 Jan 2016 22:04:46 +0000 (23:04 +0100)
committer Ulrik Sverdrup <bluss@users.noreply.github.com>
Thu, 14 Jan 2016 13:59:55 +0000 (14:59 +0100)
diff --git a/src/libcore/str/mod.rs b/src/libcore/str/mod.rs

index 64c21836b000322756bd068a196939c631f0cd8a..d85212d25e792ee285149c6a79d250dedf1d9830 100644 (file)
--- a/src/libcore/str/mod.rs
+++ b/src/libcore/str/mod.rs
@@ -1158,24 +1158,27 @@ macro_rules! next { () => {{
              offset += 1;
          } else {
              // Ascii case, try to skip forward quickly.
+            // When the pointer is aligned, read 2 words of data per iteration
+            // until we find a word containing a non-ascii byte.
+            const BYTES_PER_ITERATION: usize = 2 * usize::BYTES;
              let ptr = v.as_ptr();
              let align = (ptr as usize + offset) & (usize::BYTES - 1);
              if align == 0 {
-                // When the pointer is aligned, read 2 words of data per iteration
-                // until we find a word containing a non-ascii byte.
-                while offset <= len - 2 * usize::BYTES {
-                    unsafe {
-                        let u = *(ptr.offset(offset as isize) as *const usize);
-                        let v = *(ptr.offset((offset + usize::BYTES) as isize) as *const usize);
-
-                        // break if there is a nonascii byte
-                        let zu = contains_nonascii(u);
-                        let zv = contains_nonascii(v);
-                        if zu || zv {
-                            break;
+                if len >= BYTES_PER_ITERATION {
+                    while offset <= len - BYTES_PER_ITERATION {
+                        unsafe {
+                            let u = *(ptr.offset(offset as isize) as *const usize);
+                            let v = *(ptr.offset((offset + usize::BYTES) as isize) as *const usize);
+
+                            // break if there is a nonascii byte
+                            let zu = contains_nonascii(u);
+                            let zv = contains_nonascii(v);
+                            if zu || zv {
+                                break;
+                            }
                          }
+                        offset += BYTES_PER_ITERATION;
                      }
-                    offset += usize::BYTES * 2;
                  }
                  // step from the point where the wordwise loop stopped
                  while offset < len && v[offset] < 128 {
author	Ulrik Sverdrup <bluss@users.noreply.github.com>
	Tue, 12 Jan 2016 22:04:46 +0000 (23:04 +0100)
committer	Ulrik Sverdrup <bluss@users.noreply.github.com>
	Thu, 14 Jan 2016 13:59:55 +0000 (14:59 +0100)