]> git.lizzy.rs Git - rust.git/blobdiff - library/core/src/str/validations.rs
optimization continuation byte validation of strings containing multibyte chars
[rust.git] / library / core / src / str / validations.rs
index 373a8212425acb3fe203b6acf19b54d5553f7622..a078bf92a2796703dd0fc7352bce8815feefb6a6 100644 (file)
@@ -22,7 +22,7 @@ fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 {
 /// bits `10`).
 #[inline]
 pub(super) fn utf8_is_cont_byte(byte: u8) -> bool {
-    (byte & !CONT_MASK) == TAG_CONT_U8
+    (byte as i8) < -64
 }
 
 #[inline]
@@ -163,7 +163,7 @@ macro_rules! next {
             //               %xF4 %x80-8F 2( UTF8-tail )
             match w {
                 2 => {
-                    if next!() & !CONT_MASK != TAG_CONT_U8 {
+                    if !utf8_is_cont_byte(next!()) {
                         err!(Some(1))
                     }
                 }
@@ -175,7 +175,7 @@ macro_rules! next {
                         | (0xEE..=0xEF, 0x80..=0xBF) => {}
                         _ => err!(Some(1)),
                     }
-                    if next!() & !CONT_MASK != TAG_CONT_U8 {
+                    if !utf8_is_cont_byte(next!()) {
                         err!(Some(2))
                     }
                 }
@@ -184,10 +184,10 @@ macro_rules! next {
                         (0xF0, 0x90..=0xBF) | (0xF1..=0xF3, 0x80..=0xBF) | (0xF4, 0x80..=0x8F) => {}
                         _ => err!(Some(1)),
                     }
-                    if next!() & !CONT_MASK != TAG_CONT_U8 {
+                    if !utf8_is_cont_byte(next!()) {
                         err!(Some(2))
                     }
-                    if next!() & !CONT_MASK != TAG_CONT_U8 {
+                    if !utf8_is_cont_byte(next!()) {
                         err!(Some(3))
                     }
                 }
@@ -258,8 +258,6 @@ pub fn utf8_char_width(b: u8) -> usize {
 
 /// Mask of the value bits of a continuation byte.
 const CONT_MASK: u8 = 0b0011_1111;
-/// Value of the tag bits (tag mask is !CONT_MASK) of a continuation byte.
-const TAG_CONT_U8: u8 = 0b1000_0000;
 
 // truncate `&str` to length at most equal to `max`
 // return `true` if it were truncated, and the new str.