/// Parse a `bool` from a string.
///
- /// Yields an `Option<bool>`, because `s` may or may not actually be
- /// parseable.
+ /// Yields a `Result<bool, ParseBoolError>`, because `s` may or may not
+ /// actually be parseable.
///
/// # Examples
///
/// ```rust
+ /// use std::str::FromStr;
+ ///
+ /// assert_eq!(FromStr::from_str("true"), Ok(true));
+ /// assert_eq!(FromStr::from_str("false"), Ok(false));
+ /// assert!(<bool as FromStr>::from_str("not even a boolean").is_err());
+ /// ```
+ ///
+ /// Note, in many cases, the StrExt::parse() which is based on
+ /// this FromStr::from_str() is more proper.
+ ///
+ /// ```rust
/// assert_eq!("true".parse(), Ok(true));
/// assert_eq!("false".parse(), Ok(false));
/// assert!("not even a boolean".parse::<bool>().is_err());
iter: slice::Iter<'a, u8>
}
-// Return the initial codepoint accumulator for the first byte.
-// The first byte is special, only want bottom 5 bits for width 2, 4 bits
-// for width 3, and 3 bits for width 4
-macro_rules! utf8_first_byte {
- ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as u32)
-}
+/// Return the initial codepoint accumulator for the first byte.
+/// The first byte is special, only want bottom 5 bits for width 2, 4 bits
+/// for width 3, and 3 bits for width 4.
+#[inline]
+fn utf8_first_byte(byte: u8, width: u32) -> u32 { (byte & (0x7F >> width)) as u32 }
-// return the value of $ch updated with continuation byte $byte
-macro_rules! utf8_acc_cont_byte {
- ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & CONT_MASK) as u32)
-}
+/// Return the value of `ch` updated with continuation byte `byte`.
+#[inline]
+fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 { (ch << 6) | (byte & CONT_MASK) as u32 }
-macro_rules! utf8_is_cont_byte {
- ($byte:expr) => (($byte & !CONT_MASK) == TAG_CONT_U8)
-}
+/// Checks whether the byte is a UTF-8 continuation byte (i.e. starts with the
+/// bits `10`).
+#[inline]
+fn utf8_is_cont_byte(byte: u8) -> bool { (byte & !CONT_MASK) == TAG_CONT_U8 }
#[inline]
fn unwrap_or_0(opt: Option<&u8>) -> u8 {
// Multibyte case follows
// Decode from a byte combination out of: [[[x y] z] w]
// NOTE: Performance is sensitive to the exact formulation here
- let init = utf8_first_byte!(x, 2);
+ let init = utf8_first_byte(x, 2);
let y = unwrap_or_0(bytes.next());
- let mut ch = utf8_acc_cont_byte!(init, y);
+ let mut ch = utf8_acc_cont_byte(init, y);
if x >= 0xE0 {
// [[x y z] w] case
// 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid
let z = unwrap_or_0(bytes.next());
- let y_z = utf8_acc_cont_byte!((y & CONT_MASK) as u32, z);
+ let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z);
ch = init << 12 | y_z;
if x >= 0xF0 {
// [x y z w] case
// use only the lower 3 bits of `init`
let w = unwrap_or_0(bytes.next());
- ch = (init & 7) << 18 | utf8_acc_cont_byte!(y_z, w);
+ ch = (init & 7) << 18 | utf8_acc_cont_byte(y_z, w);
}
}
// Decode from a byte combination out of: [x [y [z w]]]
let mut ch;
let z = unwrap_or_0(bytes.next_back());
- ch = utf8_first_byte!(z, 2);
- if utf8_is_cont_byte!(z) {
+ ch = utf8_first_byte(z, 2);
+ if utf8_is_cont_byte(z) {
let y = unwrap_or_0(bytes.next_back());
- ch = utf8_first_byte!(y, 3);
- if utf8_is_cont_byte!(y) {
+ ch = utf8_first_byte(y, 3);
+ if utf8_is_cont_byte(y) {
let x = unwrap_or_0(bytes.next_back());
- ch = utf8_first_byte!(x, 4);
- ch = utf8_acc_cont_byte!(ch, y);
+ ch = utf8_first_byte(x, 4);
+ ch = utf8_acc_cont_byte(ch, y);
}
- ch = utf8_acc_cont_byte!(ch, z);
+ ch = utf8_acc_cont_byte(ch, z);
}
- ch = utf8_acc_cont_byte!(ch, w);
+ ch = utf8_acc_cont_byte(ch, w);
Some(ch)
}
#[inline]
fn size_hint(&self) -> (usize, Option<usize>) {
let (len, _) = self.iter.size_hint();
- (len.saturating_add(3) / 4, Some(len))
+ // `(len + 3)` can't overflow, because we know that the `slice::Iter`
+ // belongs to a slice in memory which has a maximum length of
+ // `isize::MAX` (that's well below `usize::MAX`).
+ ((len + 3) / 4, Some(len))
}
}
#[inline]
#[allow(dead_code)]
fn maximal_suffix(arr: &[u8], reversed: bool) -> (usize, usize) {
+ use num::wrapping::WrappingOps;
let mut left = -1; // Corresponds to i in the paper
let mut right = 0; // Corresponds to j in the paper
let mut offset = 1; // Corresponds to k in the paper
let a;
let b;
if reversed {
- a = arr[left + offset];
+ a = arr[left.wrapping_add(offset)];
b = arr[right + offset];
} else {
a = arr[right + offset];
- b = arr[left + offset];
+ b = arr[left.wrapping_add(offset)];
}
if a < b {
// Suffix is smaller, period is entire prefix so far.
right += offset;
offset = 1;
- period = right - left;
+ period = right.wrapping_sub(left);
} else if a == b {
// Advance through repetition of the current period.
if offset == period {
period = 1;
}
}
- (left + 1, period)
+ (left.wrapping_add(1), period)
}
}
#[unstable(feature = "core")]
#[deprecated(since = "1.0.0", reason = "use `Split` with a `&str`")]
pub struct SplitStr<'a, P: Pattern<'a>>(Split<'a, P>);
+#[allow(deprecated)]
impl<'a, P: Pattern<'a>> Iterator for SplitStr<'a, P> {
type Item = &'a str;
// ASCII characters are always valid, so only large
// bytes need more examination.
if first >= 128 {
- let w = UTF8_CHAR_WIDTH[first as usize] as usize;
+ let w = UTF8_CHAR_WIDTH[first as usize];
let second = next!();
// 2-byte encoding is for codepoints \u{0080} to \u{07ff}
// first C2 80 last DF BF
}
/// Mask of the value bits of a continuation byte
-const CONT_MASK: u8 = 0b0011_1111u8;
+const CONT_MASK: u8 = 0b0011_1111;
/// Value of the tag bits (tag mask is !CONT_MASK) of a continuation byte
-const TAG_CONT_U8: u8 = 0b1000_0000u8;
+const TAG_CONT_U8: u8 = 0b1000_0000;
/*
Section: Trait implementations
fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P>;
fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P>;
fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P>;
+ #[allow(deprecated) /* for SplitStr */]
fn split_str<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitStr<'a, P>;
fn lines<'a>(&'a self) -> Lines<'a>;
fn lines_any<'a>(&'a self) -> LinesAny<'a>;
if index == self.len() { return true; }
match self.as_bytes().get(index) {
None => false,
- Some(&b) => b < 128u8 || b >= 192u8,
+ Some(&b) => b < 128 || b >= 192,
}
}
i -= 1;
}
- let mut val = s.as_bytes()[i] as u32;
- let w = UTF8_CHAR_WIDTH[val as usize] as usize;
- assert!((w != 0));
+ let first= s.as_bytes()[i];
+ let w = UTF8_CHAR_WIDTH[first as usize];
+ assert!(w != 0);
- val = utf8_first_byte!(val, w);
- val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 1]);
- if w > 2 { val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 2]); }
- if w > 3 { val = utf8_acc_cont_byte!(val, s.as_bytes()[i + 3]); }
+ let mut val = utf8_first_byte(first, w as u32);
+ val = utf8_acc_cont_byte(val, s.as_bytes()[i + 1]);
+ if w > 2 { val = utf8_acc_cont_byte(val, s.as_bytes()[i + 2]); }
+ if w > 3 { val = utf8_acc_cont_byte(val, s.as_bytes()[i + 3]); }
return CharRange {ch: unsafe { mem::transmute(val) }, next: i};
}
#[inline]
#[unstable(feature = "core")]
pub fn char_range_at_raw(bytes: &[u8], i: usize) -> (u32, usize) {
- if bytes[i] < 128u8 {
+ if bytes[i] < 128 {
return (bytes[i] as u32, i + 1);
}
// Multibyte case is a fn to allow char_range_at to inline cleanly
fn multibyte_char_range_at(bytes: &[u8], i: usize) -> (u32, usize) {
- let mut val = bytes[i] as u32;
- let w = UTF8_CHAR_WIDTH[val as usize] as usize;
- assert!((w != 0));
+ let first = bytes[i];
+ let w = UTF8_CHAR_WIDTH[first as usize];
+ assert!(w != 0);
- val = utf8_first_byte!(val, w);
- val = utf8_acc_cont_byte!(val, bytes[i + 1]);
- if w > 2 { val = utf8_acc_cont_byte!(val, bytes[i + 2]); }
- if w > 3 { val = utf8_acc_cont_byte!(val, bytes[i + 3]); }
+ let mut val = utf8_first_byte(first, w as u32);
+ val = utf8_acc_cont_byte(val, bytes[i + 1]);
+ if w > 2 { val = utf8_acc_cont_byte(val, bytes[i + 2]); }
+ if w > 3 { val = utf8_acc_cont_byte(val, bytes[i + 3]); }
- return (val, i + w);
+ return (val, i + w as usize);
}
multibyte_char_range_at(bytes, i)