]> git.lizzy.rs Git - rust.git/commitdiff
optimization continuation byte validation of strings containing multibyte chars
authorThe8472 <git@infinite-source.de>
Fri, 10 Sep 2021 22:09:49 +0000 (00:09 +0200)
committerThe8472 <git@infinite-source.de>
Fri, 10 Sep 2021 22:25:41 +0000 (00:25 +0200)
```
old, -O2, x86-64
test str::str_validate_emoji                                    ... bench:       4,606 ns/iter (+/- 64)

new, -O2, x86-64
test str::str_validate_emoji                                    ... bench:       3,837 ns/iter (+/- 60)
```

library/core/src/str/validations.rs

index fc8f47dced46e0f8415aae89a9598c3b58f7a685..a078bf92a2796703dd0fc7352bce8815feefb6a6 100644 (file)
@@ -163,7 +163,7 @@ macro_rules! next {
             //               %xF4 %x80-8F 2( UTF8-tail )
             match w {
                 2 => {
-                    if next!() & !CONT_MASK != TAG_CONT_U8 {
+                    if !utf8_is_cont_byte(next!()) {
                         err!(Some(1))
                     }
                 }
@@ -175,7 +175,7 @@ macro_rules! next {
                         | (0xEE..=0xEF, 0x80..=0xBF) => {}
                         _ => err!(Some(1)),
                     }
-                    if next!() & !CONT_MASK != TAG_CONT_U8 {
+                    if !utf8_is_cont_byte(next!()) {
                         err!(Some(2))
                     }
                 }
@@ -184,10 +184,10 @@ macro_rules! next {
                         (0xF0, 0x90..=0xBF) | (0xF1..=0xF3, 0x80..=0xBF) | (0xF4, 0x80..=0x8F) => {}
                         _ => err!(Some(1)),
                     }
-                    if next!() & !CONT_MASK != TAG_CONT_U8 {
+                    if !utf8_is_cont_byte(next!()) {
                         err!(Some(2))
                     }
-                    if next!() & !CONT_MASK != TAG_CONT_U8 {
+                    if !utf8_is_cont_byte(next!()) {
                         err!(Some(3))
                     }
                 }
@@ -258,8 +258,6 @@ pub fn utf8_char_width(b: u8) -> usize {
 
 /// Mask of the value bits of a continuation byte.
 const CONT_MASK: u8 = 0b0011_1111;
-/// Value of the tag bits (tag mask is !CONT_MASK) of a continuation byte.
-const TAG_CONT_U8: u8 = 0b1000_0000;
 
 // truncate `&str` to length at most equal to `max`
 // return `true` if it were truncated, and the new str.