src/libstd/sys_common/memchr.rs

   1 // Copyright 2015 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10 //
  11 // Original implementation taken from rust-memchr
  12 // Copyright 2015 Andrew Gallant, bluss and Nicolas Koch
  13
  14 #[allow(dead_code)]
  15 pub mod fallback {
  16     use cmp;
  17     use mem;
  18
  19     const LO_U64: u64 = 0x0101010101010101;
  20     const HI_U64: u64 = 0x8080808080808080;
  21
  22     // use truncation
  23     const LO_USIZE: usize = LO_U64 as usize;
  24     const HI_USIZE: usize = HI_U64 as usize;
  25
  26     /// Return `true` if `x` contains any zero byte.
  27     ///
  28     /// From *Matters Computational*, J. Arndt
  29     ///
  30     /// "The idea is to subtract one from each of the bytes and then look for
  31     /// bytes where the borrow propagated all the way to the most significant
  32     /// bit."
  33     #[inline]
  34     fn contains_zero_byte(x: usize) -> bool {
  35         x.wrapping_sub(LO_USIZE) & !x & HI_USIZE != 0
  36     }
  37
  38     #[cfg(target_pointer_width = "32")]
  39     #[inline]
  40     fn repeat_byte(b: u8) -> usize {
  41         let mut rep = (b as usize) << 8 | b as usize;
  42         rep = rep << 16 | rep;
  43         rep
  44     }
  45
  46     #[cfg(target_pointer_width = "64")]
  47     #[inline]
  48     fn repeat_byte(b: u8) -> usize {
  49         let mut rep = (b as usize) << 8 | b as usize;
  50         rep = rep << 16 | rep;
  51         rep = rep << 32 | rep;
  52         rep
  53     }
  54
  55     /// Return the first index matching the byte `a` in `text`.
  56     pub fn memchr(x: u8, text: &[u8]) -> Option<usize> {
  57         // Scan for a single byte value by reading two `usize` words at a time.
  58         //
  59         // Split `text` in three parts
  60         // - unaligned initial part, before the first word aligned address in text
  61         // - body, scan by 2 words at a time
  62         // - the last remaining part, < 2 word size
  63         let len = text.len();
  64         let ptr = text.as_ptr();
  65         let usize_bytes = mem::size_of::<usize>();
  66
  67         // search up to an aligned boundary
  68         let mut offset = ptr.align_offset(usize_bytes);
  69         if offset > 0 {
  70             offset = cmp::min(offset, len);
  71             if let Some(index) = text[..offset].iter().position(|elt| *elt == x) {
  72                 return Some(index);
  73             }
  74         }
  75
  76         // search the body of the text
  77         let repeated_x = repeat_byte(x);
  78
  79         if len >= 2 * usize_bytes {
  80             while offset <= len - 2 * usize_bytes {
  81                 unsafe {
  82                     let u = *(ptr.offset(offset as isize) as *const usize);
  83                     let v = *(ptr.offset((offset + usize_bytes) as isize) as *const usize);
  84
  85                     // break if there is a matching byte
  86                     let zu = contains_zero_byte(u ^ repeated_x);
  87                     let zv = contains_zero_byte(v ^ repeated_x);
  88                     if zu || zv {
  89                         break;
  90                     }
  91                 }
  92                 offset += usize_bytes * 2;
  93             }
  94         }
  95
  96         // find the byte after the point the body loop stopped
  97         text[offset..].iter().position(|elt| *elt == x).map(|i| offset + i)
  98     }
  99
 100     /// Return the last index matching the byte `a` in `text`.
 101     pub fn memrchr(x: u8, text: &[u8]) -> Option<usize> {
 102         // Scan for a single byte value by reading two `usize` words at a time.
 103         //
 104         // Split `text` in three parts
 105         // - unaligned tail, after the last word aligned address in text
 106         // - body, scan by 2 words at a time
 107         // - the first remaining bytes, < 2 word size
 108         let len = text.len();
 109         let ptr = text.as_ptr();
 110         let usize_bytes = mem::size_of::<usize>();
 111
 112         // search to an aligned boundary
 113         let end_align = (ptr as usize + len) & (usize_bytes - 1);
 114         let mut offset;
 115         if end_align > 0 {
 116             offset = if end_align >= len { 0 } else { len - end_align };
 117             if let Some(index) = text[offset..].iter().rposition(|elt| *elt == x) {
 118                 return Some(offset + index);
 119             }
 120         } else {
 121             offset = len;
 122         }
 123
 124         // search the body of the text
 125         let repeated_x = repeat_byte(x);
 126
 127         while offset >= 2 * usize_bytes {
 128             unsafe {
 129                 let u = *(ptr.offset(offset as isize - 2 * usize_bytes as isize) as *const usize);
 130                 let v = *(ptr.offset(offset as isize - usize_bytes as isize) as *const usize);
 131
 132                 // break if there is a matching byte
 133                 let zu = contains_zero_byte(u ^ repeated_x);
 134                 let zv = contains_zero_byte(v ^ repeated_x);
 135                 if zu || zv {
 136                     break;
 137                 }
 138             }
 139             offset -= 2 * usize_bytes;
 140         }
 141
 142         // find the byte before the point the body loop stopped
 143         text[..offset].iter().rposition(|elt| *elt == x)
 144     }
 145
 146     // test fallback implementations on all platforms
 147     #[test]
 148     fn matches_one() {
 149         assert_eq!(Some(0), memchr(b'a', b"a"));
 150     }
 151
 152     #[test]
 153     fn matches_begin() {
 154         assert_eq!(Some(0), memchr(b'a', b"aaaa"));
 155     }
 156
 157     #[test]
 158     fn matches_end() {
 159         assert_eq!(Some(4), memchr(b'z', b"aaaaz"));
 160     }
 161
 162     #[test]
 163     fn matches_nul() {
 164         assert_eq!(Some(4), memchr(b'\x00', b"aaaa\x00"));
 165     }
 166
 167     #[test]
 168     fn matches_past_nul() {
 169         assert_eq!(Some(5), memchr(b'z', b"aaaa\x00z"));
 170     }
 171
 172     #[test]
 173     fn no_match_empty() {
 174         assert_eq!(None, memchr(b'a', b""));
 175     }
 176
 177     #[test]
 178     fn no_match() {
 179         assert_eq!(None, memchr(b'a', b"xyz"));
 180     }
 181
 182     #[test]
 183     fn matches_one_reversed() {
 184         assert_eq!(Some(0), memrchr(b'a', b"a"));
 185     }
 186
 187     #[test]
 188     fn matches_begin_reversed() {
 189         assert_eq!(Some(3), memrchr(b'a', b"aaaa"));
 190     }
 191
 192     #[test]
 193     fn matches_end_reversed() {
 194         assert_eq!(Some(0), memrchr(b'z', b"zaaaa"));
 195     }
 196
 197     #[test]
 198     fn matches_nul_reversed() {
 199         assert_eq!(Some(4), memrchr(b'\x00', b"aaaa\x00"));
 200     }
 201
 202     #[test]
 203     fn matches_past_nul_reversed() {
 204         assert_eq!(Some(0), memrchr(b'z', b"z\x00aaaa"));
 205     }
 206
 207     #[test]
 208     fn no_match_empty_reversed() {
 209         assert_eq!(None, memrchr(b'a', b""));
 210     }
 211
 212     #[test]
 213     fn no_match_reversed() {
 214         assert_eq!(None, memrchr(b'a', b"xyz"));
 215     }
 216
 217     #[test]
 218     fn each_alignment_reversed() {
 219         let mut data = [1u8; 64];
 220         let needle = 2;
 221         let pos = 40;
 222         data[pos] = needle;
 223         for start in 0..16 {
 224             assert_eq!(Some(pos - start), memrchr(needle, &data[start..]));
 225         }
 226     }
 227 }