Remove unneeded macro witchery

author Tobias Bucher <tobiasbucher5991@gmail.com>

Tue, 10 Mar 2015 11:06:44 +0000 (12:06 +0100)

committer Tobias Bucher <tobiasbucher5991@gmail.com>

Tue, 10 Mar 2015 11:57:05 +0000 (12:57 +0100)
author Tobias Bucher <tobiasbucher5991@gmail.com>
Tue, 10 Mar 2015 11:06:44 +0000 (12:06 +0100)
committer Tobias Bucher <tobiasbucher5991@gmail.com>
Tue, 10 Mar 2015 11:57:05 +0000 (12:57 +0100)
diff --git a/src/libcore/str/mod.rs b/src/libcore/str/mod.rs

index b354116993c23a1e750be1fa8c900dc95261ab85..c516bd54129c867cc0de85d69c8ab5d6db6ad39f 100644 (file)
--- a/src/libcore/str/mod.rs
+++ b/src/libcore/str/mod.rs
@@ -324,21 +324,20 @@ pub struct Chars<'a> {
      iter: slice::Iter<'a, u8>
  }
  
-// Return the initial codepoint accumulator for the first byte.
-// The first byte is special, only want bottom 5 bits for width 2, 4 bits
-// for width 3, and 3 bits for width 4
-macro_rules! utf8_first_byte {
-    ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as u32)
-}
+/// Return the initial codepoint accumulator for the first byte.
+/// The first byte is special, only want bottom 5 bits for width 2, 4 bits
+/// for width 3, and 3 bits for width 4.
+#[inline]
+fn utf8_first_byte(byte: u8, width: u32) -> u32 { (byte & (0x7F >> width)) as u32 }
  
-// return the value of $ch updated with continuation byte $byte
-macro_rules! utf8_acc_cont_byte {
-    ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & CONT_MASK) as u32)
-}
+/// Return the value of `ch` updated with continuation byte `byte`.
+#[inline]
+fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 { (ch << 6) | (byte & CONT_MASK) as u32 }
  
-macro_rules! utf8_is_cont_byte {
-    ($byte:expr) => (($byte & !CONT_MASK) == TAG_CONT_U8)
-}
+/// Checks whether the byte is a UTF-8 continuation byte (i.e. starts with the
+/// bits `10`).
+#[inline]
+fn utf8_is_cont_byte(byte: u8) -> bool { (byte & !CONT_MASK) == TAG_CONT_U8 }
  
  #[inline]
  fn unwrap_or_0(opt: Option<&u8>) -> u8 {
@@ -363,20 +362,20 @@ pub fn next_code_point(bytes: &mut slice::Iter<u8>) -> Option<u32> {
      // Multibyte case follows
      // Decode from a byte combination out of: [[[x y] z] w]
      // NOTE: Performance is sensitive to the exact formulation here
-    let init = utf8_first_byte!(x, 2);
+    let init = utf8_first_byte(x, 2);
      let y = unwrap_or_0(bytes.next());
-    let mut ch = utf8_acc_cont_byte!(init, y);
+    let mut ch = utf8_acc_cont_byte(init, y);
      if x >= 0xE0 {
          // [[x y z] w] case
          // 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid
          let z = unwrap_or_0(bytes.next());
-        let y_z = utf8_acc_cont_byte!((y & CONT_MASK) as u32, z);
+        let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z);
          ch = init << 12 | y_z;
          if x >= 0xF0 {
              // [x y z w] case
              // use only the lower 3 bits of `init`
              let w = unwrap_or_0(bytes.next());
-            ch = (init & 7) << 18 | utf8_acc_cont_byte!(y_z, w);
+            ch = (init & 7) << 18 | utf8_acc_cont_byte(y_z, w);
          }
      }
  
@@ -399,18 +398,18 @@ pub fn next_code_point_reverse(bytes: &mut slice::Iter<u8>) -> Option<u32> {
      // Decode from a byte combination out of: [x [y [z w]]]
      let mut ch;
      let z = unwrap_or_0(bytes.next_back());
-    ch = utf8_first_byte!(z, 2);
-    if utf8_is_cont_byte!(z) {
+    ch = utf8_first_byte(z, 2);
+    if utf8_is_cont_byte(z) {
          let y = unwrap_or_0(bytes.next_back());
-        ch = utf8_first_byte!(y, 3);
-        if utf8_is_cont_byte!(y) {
+        ch = utf8_first_byte(y, 3);
+        if utf8_is_cont_byte(y) {
              let x = unwrap_or_0(bytes.next_back());
-            ch = utf8_first_byte!(x, 4);
-            ch = utf8_acc_cont_byte!(ch, y);
+            ch = utf8_first_byte(x, 4);
+            ch = utf8_acc_cont_byte(ch, y);
          }
-        ch = utf8_acc_cont_byte!(ch, z);
+        ch = utf8_acc_cont_byte(ch, z);
      }
-    ch = utf8_acc_cont_byte!(ch, w);
+    ch = utf8_acc_cont_byte(ch, w);
  
      Some(ch)
  }
@@ -1027,7 +1026,7 @@ macro_rules! next { () => {
          // ASCII characters are always valid, so only large
          // bytes need more examination.
          if first >= 128 {
-            let w = UTF8_CHAR_WIDTH[first as usize] as usize;
+            let w = UTF8_CHAR_WIDTH[first as usize];
              let second = next!();
              // 2-byte encoding is for codepoints  \u{0080} to  \u{07ff}
              //        first  C2 80        last DF BF
@@ -1580,14 +1579,14 @@ fn multibyte_char_range_at_reverse(s: &str, mut i: usize) -> CharRange {
                  i -= 1;
              }
  
-            let mut val = s.as_bytes()[i] as u32;
-            let w = UTF8_CHAR_WIDTH[val as usize] as usize;
-            assert!((w != 0));
+            let first= s.as_bytes()[i];
+            let w = UTF8_CHAR_WIDTH[first as usize];
+            assert!(w != 0);
  
-            val = utf8_first_byte!(val, w);
-            val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 1]);
-            if w > 2 { val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 2]); }
-            if w > 3 { val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 3]); }
+            let mut val = utf8_first_byte(first, w as u32);
+            val = utf8_acc_cont_byte(val, s.as_bytes()[i + 1]);
+            if w > 2 { val = utf8_acc_cont_byte(val, s.as_bytes()[i + 2]); }
+            if w > 3 { val = utf8_acc_cont_byte(val, s.as_bytes()[i + 3]); }
  
              return CharRange {ch: unsafe { mem::transmute(val) }, next: i};
          }
@@ -1672,16 +1671,16 @@ pub fn char_range_at_raw(bytes: &[u8], i: usize) -> (u32, usize) {
  
      // Multibyte case is a fn to allow char_range_at to inline cleanly
      fn multibyte_char_range_at(bytes: &[u8], i: usize) -> (u32, usize) {
-        let mut val = bytes[i] as u32;
-        let w = UTF8_CHAR_WIDTH[val as usize] as usize;
-        assert!((w != 0));
+        let first = bytes[i];
+        let w = UTF8_CHAR_WIDTH[first as usize];
+        assert!(w != 0);
  
-        val = utf8_first_byte!(val, w);
-        val = utf8_acc_cont_byte!(val, bytes[i + 1]);
-        if w > 2 { val = utf8_acc_cont_byte!(val, bytes[i + 2]); }
-        if w > 3 { val = utf8_acc_cont_byte!(val, bytes[i + 3]); }
+        let mut val = utf8_first_byte(first, w as u32);
+        val = utf8_acc_cont_byte(val, bytes[i + 1]);
+        if w > 2 { val = utf8_acc_cont_byte(val, bytes[i + 2]); }
+        if w > 3 { val = utf8_acc_cont_byte(val, bytes[i + 3]); }
  
-        return (val, i + w);
+        return (val, i + w as usize);
      }
  
      multibyte_char_range_at(bytes, i)
author	Tobias Bucher <tobiasbucher5991@gmail.com>
	Tue, 10 Mar 2015 11:06:44 +0000 (12:06 +0100)
committer	Tobias Bucher <tobiasbucher5991@gmail.com>
	Tue, 10 Mar 2015 11:57:05 +0000 (12:57 +0100)