/// invalid sequence.
#[inline(always)]
fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
- let mut offset = 0;
+ let mut index = 0;
let len = v.len();
let usize_bytes = mem::size_of::<usize>();
let ascii_block_size = 2 * usize_bytes;
let blocks_end = if len >= ascii_block_size { len - ascii_block_size + 1 } else { 0 };
- while offset < len {
- let old_offset = offset;
+ while index < len {
+ let old_offset = index;
macro_rules! err { () => {{
return Err(Utf8Error {
valid_up_to: old_offset
}}}
macro_rules! next { () => {{
- offset += 1;
+ index += 1;
// we needed data, but there was none: error!
- if offset >= len {
+ if index >= len {
err!()
}
- v[offset]
+ v[index]
}}}
- let first = v[offset];
+ let first = v[index];
if first >= 128 {
let w = UTF8_CHAR_WIDTH[first as usize];
let second = next!();
}
_ => err!()
}
- offset += 1;
+ index += 1;
} else {
// Ascii case, try to skip forward quickly.
// When the pointer is aligned, read 2 words of data per iteration
// until we find a word containing a non-ascii byte.
let ptr = v.as_ptr();
- let align = (ptr as usize + offset) & (usize_bytes - 1);
+ let align = (ptr as usize + index) & (usize_bytes - 1);
if align == 0 {
- while offset < blocks_end {
+ while index < blocks_end {
unsafe {
- let block = ptr.offset(offset as isize) as *const usize;
+ let block = ptr.offset(index as isize) as *const usize;
// break if there is a nonascii byte
let zu = contains_nonascii(*block);
let zv = contains_nonascii(*block.offset(1));
break;
}
}
- offset += ascii_block_size;
+ index += ascii_block_size;
}
// step from the point where the wordwise loop stopped
- while offset < len && v[offset] < 128 {
- offset += 1;
+ while index < len && v[index] < 128 {
+ index += 1;
}
} else {
- offset += 1;
+ index += 1;
}
}
}