#[inline]
fn next(&mut self) -> Option<char> {
- next_code_point(&mut self.iter).map(|ch| {
- // SAFETY: `str` invariant says `ch` is a valid Unicode Scalar Value.
- unsafe { char::from_u32_unchecked(ch) }
- })
+ // SAFETY: `str` invariant says `self.iter` is a valid UTF-8 string and
+ // the resulting `ch` is a valid Unicode Scalar Value.
+ unsafe { next_code_point(&mut self.iter).map(|ch| char::from_u32_unchecked(ch)) }
}
#[inline]
impl<'a> DoubleEndedIterator for Chars<'a> {
#[inline]
fn next_back(&mut self) -> Option<char> {
- next_code_point_reverse(&mut self.iter).map(|ch| {
- // SAFETY: `str` invariant says `ch` is a valid Unicode Scalar Value.
- unsafe { char::from_u32_unchecked(ch) }
- })
+ // SAFETY: `str` invariant says `self.iter` is a valid UTF-8 string and
+ // the resulting `ch` is a valid Unicode Scalar Value.
+ unsafe { next_code_point_reverse(&mut self.iter).map(|ch| char::from_u32_unchecked(ch)) }
}
}
(byte as i8) < -64
}
-#[inline]
-const fn unwrap_or_0(opt: Option<&u8>) -> u8 {
- match opt {
- Some(&byte) => byte,
- None => 0,
- }
-}
-
/// Reads the next code point out of a byte iterator (assuming a
/// UTF-8-like encoding).
+///
+/// # Safety
+///
+/// `bytes` must produce a valid UTF-8-like (UTF-8 or WTF-8) string
#[unstable(feature = "str_internals", issue = "none")]
#[inline]
-pub fn next_code_point<'a, I: Iterator<Item = &'a u8>>(bytes: &mut I) -> Option<u32> {
+pub unsafe fn next_code_point<'a, I: Iterator<Item = &'a u8>>(bytes: &mut I) -> Option<u32> {
// Decode UTF-8
let x = *bytes.next()?;
if x < 128 {
// Decode from a byte combination out of: [[[x y] z] w]
// NOTE: Performance is sensitive to the exact formulation here
let init = utf8_first_byte(x, 2);
- let y = unwrap_or_0(bytes.next());
+ // SAFETY: `bytes` produces an UTF-8-like string,
+ // so the iterator must produce a value here.
+ let y = unsafe { *bytes.next().unwrap_unchecked() };
let mut ch = utf8_acc_cont_byte(init, y);
if x >= 0xE0 {
// [[x y z] w] case
// 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid
- let z = unwrap_or_0(bytes.next());
+ // SAFETY: `bytes` produces an UTF-8-like string,
+ // so the iterator must produce a value here.
+ let z = unsafe { *bytes.next().unwrap_unchecked() };
let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z);
ch = init << 12 | y_z;
if x >= 0xF0 {
// [x y z w] case
// use only the lower 3 bits of `init`
- let w = unwrap_or_0(bytes.next());
+ // SAFETY: `bytes` produces an UTF-8-like string,
+ // so the iterator must produce a value here.
+ let w = unsafe { *bytes.next().unwrap_unchecked() };
ch = (init & 7) << 18 | utf8_acc_cont_byte(y_z, w);
}
}
/// Reads the last code point out of a byte iterator (assuming a
/// UTF-8-like encoding).
+///
+/// # Safety
+///
+/// `bytes` must produce a valid UTF-8-like (UTF-8 or WTF-8) string
#[inline]
-pub(super) fn next_code_point_reverse<'a, I>(bytes: &mut I) -> Option<u32>
+pub(super) unsafe fn next_code_point_reverse<'a, I>(bytes: &mut I) -> Option<u32>
where
I: DoubleEndedIterator<Item = &'a u8>,
{
// Multibyte case follows
// Decode from a byte combination out of: [x [y [z w]]]
let mut ch;
- let z = unwrap_or_0(bytes.next_back());
+ // SAFETY: `bytes` produces an UTF-8-like string,
+ // so the iterator must produce a value here.
+ let z = unsafe { *bytes.next_back().unwrap_unchecked() };
ch = utf8_first_byte(z, 2);
if utf8_is_cont_byte(z) {
- let y = unwrap_or_0(bytes.next_back());
+ // SAFETY: `bytes` produces an UTF-8-like string,
+ // so the iterator must produce a value here.
+ let y = unsafe { *bytes.next_back().unwrap_unchecked() };
ch = utf8_first_byte(y, 3);
if utf8_is_cont_byte(y) {
- let x = unwrap_or_0(bytes.next_back());
+ // SAFETY: `bytes` produces an UTF-8-like string,
+ // so the iterator must produce a value here.
+ let x = unsafe { *bytes.next_back().unwrap_unchecked() };
ch = utf8_first_byte(x, 4);
ch = utf8_acc_cont_byte(ch, y);
}