]> git.lizzy.rs Git - rust.git/commitdiff
UTF-8 validation: Add missing if conditional for short input
authorUlrik Sverdrup <bluss@users.noreply.github.com>
Tue, 12 Jan 2016 22:04:46 +0000 (23:04 +0100)
committerUlrik Sverdrup <bluss@users.noreply.github.com>
Thu, 14 Jan 2016 13:59:55 +0000 (14:59 +0100)
We need to guard that `len` is large enough for the fast skip loop.

src/libcore/str/mod.rs

index 64c21836b000322756bd068a196939c631f0cd8a..d85212d25e792ee285149c6a79d250dedf1d9830 100644 (file)
@@ -1158,24 +1158,27 @@ macro_rules! next { () => {{
             offset += 1;
         } else {
             // Ascii case, try to skip forward quickly.
+            // When the pointer is aligned, read 2 words of data per iteration
+            // until we find a word containing a non-ascii byte.
+            const BYTES_PER_ITERATION: usize = 2 * usize::BYTES;
             let ptr = v.as_ptr();
             let align = (ptr as usize + offset) & (usize::BYTES - 1);
             if align == 0 {
-                // When the pointer is aligned, read 2 words of data per iteration
-                // until we find a word containing a non-ascii byte.
-                while offset <= len - 2 * usize::BYTES {
-                    unsafe {
-                        let u = *(ptr.offset(offset as isize) as *const usize);
-                        let v = *(ptr.offset((offset + usize::BYTES) as isize) as *const usize);
-
-                        // break if there is a nonascii byte
-                        let zu = contains_nonascii(u);
-                        let zv = contains_nonascii(v);
-                        if zu || zv {
-                            break;
+                if len >= BYTES_PER_ITERATION {
+                    while offset <= len - BYTES_PER_ITERATION {
+                        unsafe {
+                            let u = *(ptr.offset(offset as isize) as *const usize);
+                            let v = *(ptr.offset((offset + usize::BYTES) as isize) as *const usize);
+
+                            // break if there is a nonascii byte
+                            let zu = contains_nonascii(u);
+                            let zv = contains_nonascii(v);
+                            if zu || zv {
+                                break;
+                            }
                         }
+                        offset += BYTES_PER_ITERATION;
                     }
-                    offset += usize::BYTES * 2;
                 }
                 // step from the point where the wordwise loop stopped
                 while offset < len && v[offset] < 128 {