From ff5fdffc135f55e751dfcff0df1f52b9f4faad17 Mon Sep 17 00:00:00 2001 From: Steven Fackler Date: Sun, 4 Aug 2013 23:51:26 -0400 Subject: [PATCH] ToBase64 and ToHex perf improvements The overhead of str::push_char is high enough to cripple the performance of these two functions. I've switched them to build the output in a ~[u8] and then convert to a string later. Since we know exactly the bytes going into the vector, we can use the unsafe version to avoid the is_utf8 check. I could have riced it further with vec::raw::get, but it only added ~10MB/s so I didn't think it was worth it. ToHex is still ~30% slower than FromHex, which is puzzling. Before: ``` test base64::test::from_base64 ... bench: 1000 ns/iter (+/- 349) = 204 MB/s test base64::test::to_base64 ... bench: 2390 ns/iter (+/- 1130) = 63 MB/s ... test hex::tests::bench_from_hex ... bench: 884 ns/iter (+/- 220) = 341 MB/s test hex::tests::bench_to_hex ... bench: 2453 ns/iter (+/- 919) = 61 MB/s ``` After: ``` test base64::test::from_base64 ... bench: 1271 ns/iter (+/- 600) = 160 MB/s test base64::test::to_base64 ... bench: 759 ns/iter (+/- 286) = 198 MB/s ... test hex::tests::bench_from_hex ... bench: 875 ns/iter (+/- 377) = 345 MB/s test hex::tests::bench_to_hex ... bench: 593 ns/iter (+/- 240) = 254 MB/s ``` --- src/libextra/base64.rs | 61 +++++++++++++++++++++--------------------- src/libextra/hex.rs | 14 +++++----- 2 files changed, 38 insertions(+), 37 deletions(-) diff --git a/src/libextra/base64.rs b/src/libextra/base64.rs index 550b891a4db..d6ab36ca721 100644 --- a/src/libextra/base64.rs +++ b/src/libextra/base64.rs @@ -9,6 +9,7 @@ // except according to those terms. //! Base64 binary-to-text encoding +use std::str; /// Available encoding character sets pub enum CharacterSet { @@ -40,21 +41,13 @@ pub struct Config { pub static MIME: Config = Config {char_set: Standard, pad: true, line_length: Some(76)}; -static STANDARD_CHARS: [char, ..64] = [ - 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', - 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', - 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', - 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', - '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/' -]; - -static URLSAFE_CHARS: [char, ..64] = [ - 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', - 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', - 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', - 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', - '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-', '_' -]; +static STANDARD_CHARS: &'static[u8] = bytes!("ABCDEFGHIJKLMNOPQRSTUVWXYZ", + "abcdefghijklmnopqrstuvwxyz", + "0123456789+/"); + +static URLSAFE_CHARS: &'static[u8] = bytes!("ABCDEFGHIJKLMNOPQRSTUVWXYZ", + "abcdefghijklmnopqrstuvwxyz", + "0123456789-_"); /// A trait for converting a value to base64 encoding. pub trait ToBase64 { @@ -80,12 +73,12 @@ impl<'self> ToBase64 for &'self [u8] { * ~~~ */ fn to_base64(&self, config: Config) -> ~str { - let chars = match config.char_set { + let bytes = match config.char_set { Standard => STANDARD_CHARS, UrlSafe => URLSAFE_CHARS }; - let mut s = ~""; + let mut v: ~[u8] = ~[]; let mut i = 0; let mut cur_length = 0; let len = self.len(); @@ -93,7 +86,8 @@ fn to_base64(&self, config: Config) -> ~str { match config.line_length { Some(line_length) => if cur_length >= line_length { - s.push_str("\r\n"); + v.push('\r' as u8); + v.push('\n' as u8); cur_length = 0; }, None => () @@ -104,10 +98,10 @@ fn to_base64(&self, config: Config) -> ~str { (self[i + 2] as u32); // This 24-bit number gets separated into four 6-bit numbers. - s.push_char(chars[(n >> 18) & 63]); - s.push_char(chars[(n >> 12) & 63]); - s.push_char(chars[(n >> 6 ) & 63]); - s.push_char(chars[n & 63]); + v.push(bytes[(n >> 18) & 63]); + v.push(bytes[(n >> 12) & 63]); + v.push(bytes[(n >> 6 ) & 63]); + v.push(bytes[n & 63]); cur_length += 4; i += 3; @@ -117,7 +111,8 @@ fn to_base64(&self, config: Config) -> ~str { match config.line_length { Some(line_length) => if cur_length >= line_length { - s.push_str("\r\n"); + v.push('\r' as u8); + v.push('\n' as u8); }, None => () } @@ -129,25 +124,29 @@ fn to_base64(&self, config: Config) -> ~str { 0 => (), 1 => { let n = (self[i] as u32) << 16; - s.push_char(chars[(n >> 18) & 63]); - s.push_char(chars[(n >> 12) & 63]); + v.push(bytes[(n >> 18) & 63]); + v.push(bytes[(n >> 12) & 63]); if config.pad { - s.push_str("=="); + v.push('=' as u8); + v.push('=' as u8); } } 2 => { let n = (self[i] as u32) << 16 | (self[i + 1u] as u32) << 8; - s.push_char(chars[(n >> 18) & 63]); - s.push_char(chars[(n >> 12) & 63]); - s.push_char(chars[(n >> 6 ) & 63]); + v.push(bytes[(n >> 18) & 63]); + v.push(bytes[(n >> 12) & 63]); + v.push(bytes[(n >> 6 ) & 63]); if config.pad { - s.push_char('='); + v.push('=' as u8); } } _ => fail!("Algebra is broken, please alert the math police") } - s + + unsafe { + str::raw::from_bytes_owned(v) + } } } diff --git a/src/libextra/hex.rs b/src/libextra/hex.rs index 5609c566d92..748d0391588 100644 --- a/src/libextra/hex.rs +++ b/src/libextra/hex.rs @@ -19,8 +19,7 @@ pub trait ToHex { fn to_hex(&self) -> ~str; } -static CHARS: [char, ..16] = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', - 'a', 'b', 'c', 'd', 'e', 'f']; +static CHARS: &'static[u8] = bytes!("0123456789abcdef"); impl<'self> ToHex for &'self [u8] { /** @@ -39,13 +38,16 @@ impl<'self> ToHex for &'self [u8] { * ~~~ */ fn to_hex(&self) -> ~str { - let mut s = str::with_capacity(self.len() * 2); + // +1 for NULL terminator + let mut v = vec::with_capacity(self.len() * 2 + 1); for &byte in self.iter() { - s.push_char(CHARS[byte >> 4]); - s.push_char(CHARS[byte & 0xf]); + v.push(CHARS[byte >> 4]); + v.push(CHARS[byte & 0xf]); } - s + unsafe { + str::raw::from_bytes_owned(v) + } } } -- 2.44.0