src/libcore/str/mod.rs

   1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10
  11 //! String manipulation
  12 //!
  13 //! For more details, see std::str
  14
  15 #![stable(feature = "rust1", since = "1.0.0")]
  16
  17 use self::pattern::Pattern;
  18 use self::pattern::{Searcher, ReverseSearcher, DoubleEndedSearcher};
  19
  20 use char;
  21 use fmt;
  22 use iter::{Map, Cloned, FusedIterator};
  23 use mem;
  24 use slice;
  25
  26 pub mod pattern;
  27
  28 /// A trait to abstract the idea of creating a new instance of a type from a
  29 /// string.
  30 ///
  31 /// `FromStr`'s [`from_str()`] method is often used implicitly, through
  32 /// [`str`]'s [`parse()`] method. See [`parse()`]'s documentation for examples.
  33 ///
  34 /// [`from_str()`]: #tymethod.from_str
  35 /// [`str`]: ../../std/primitive.str.html
  36 /// [`parse()`]: ../../std/primitive.str.html#method.parse
  37 #[stable(feature = "rust1", since = "1.0.0")]
  38 pub trait FromStr: Sized {
  39     /// The associated error which can be returned from parsing.
  40     #[stable(feature = "rust1", since = "1.0.0")]
  41     type Err;
  42
  43     /// Parses a string `s` to return a value of this type.
  44     ///
  45     /// If parsing succeeds, return the value inside `Ok`, otherwise
  46     /// when the string is ill-formatted return an error specific to the
  47     /// inside `Err`. The error type is specific to implementation of the trait.
  48     ///
  49     /// # Examples
  50     ///
  51     /// Basic usage with [`i32`][ithirtytwo], a type that implements `FromStr`:
  52     ///
  53     /// [ithirtytwo]: ../../std/primitive.i32.html
  54     ///
  55     /// ```
  56     /// use std::str::FromStr;
  57     ///
  58     /// let s = "5";
  59     /// let x = i32::from_str(s).unwrap();
  60     ///
  61     /// assert_eq!(5, x);
  62     /// ```
  63     #[stable(feature = "rust1", since = "1.0.0")]
  64     fn from_str(s: &str) -> Result<Self, Self::Err>;
  65 }
  66
  67 #[stable(feature = "rust1", since = "1.0.0")]
  68 impl FromStr for bool {
  69     type Err = ParseBoolError;
  70
  71     /// Parse a `bool` from a string.
  72     ///
  73     /// Yields a `Result<bool, ParseBoolError>`, because `s` may or may not
  74     /// actually be parseable.
  75     ///
  76     /// # Examples
  77     ///
  78     /// ```
  79     /// use std::str::FromStr;
  80     ///
  81     /// assert_eq!(FromStr::from_str("true"), Ok(true));
  82     /// assert_eq!(FromStr::from_str("false"), Ok(false));
  83     /// assert!(<bool as FromStr>::from_str("not even a boolean").is_err());
  84     /// ```
  85     ///
  86     /// Note, in many cases, the `.parse()` method on `str` is more proper.
  87     ///
  88     /// ```
  89     /// assert_eq!("true".parse(), Ok(true));
  90     /// assert_eq!("false".parse(), Ok(false));
  91     /// assert!("not even a boolean".parse::<bool>().is_err());
  92     /// ```
  93     #[inline]
  94     fn from_str(s: &str) -> Result<bool, ParseBoolError> {
  95         match s {
  96             "true"  => Ok(true),
  97             "false" => Ok(false),
  98             _       => Err(ParseBoolError { _priv: () }),
  99         }
 100     }
 101 }
 102
 103 /// An error returned when parsing a `bool` from a string fails.
 104 #[derive(Debug, Clone, PartialEq, Eq)]
 105 #[stable(feature = "rust1", since = "1.0.0")]
 106 pub struct ParseBoolError { _priv: () }
 107
 108 #[stable(feature = "rust1", since = "1.0.0")]
 109 impl fmt::Display for ParseBoolError {
 110     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 111         "provided string was not `true` or `false`".fmt(f)
 112     }
 113 }
 114
 115 /*
 116 Section: Creating a string
 117 */
 118
 119 /// Errors which can occur when attempting to interpret a sequence of `u8`
 120 /// as a string.
 121 ///
 122 /// As such, the `from_utf8` family of functions and methods for both `String`s
 123 /// and `&str`s make use of this error, for example.
 124 #[derive(Copy, Eq, PartialEq, Clone, Debug)]
 125 #[stable(feature = "rust1", since = "1.0.0")]
 126 pub struct Utf8Error {
 127     valid_up_to: usize,
 128 }
 129
 130 impl Utf8Error {
 131     /// Returns the index in the given string up to which valid UTF-8 was
 132     /// verified.
 133     ///
 134     /// It is the maximum index such that `from_utf8(input[..index])`
 135     /// would return `Ok(_)`.
 136     ///
 137     /// # Examples
 138     ///
 139     /// Basic usage:
 140     ///
 141     /// ```
 142     /// use std::str;
 143     ///
 144     /// // some invalid bytes, in a vector
 145     /// let sparkle_heart = vec![0, 159, 146, 150];
 146     ///
 147     /// // std::str::from_utf8 returns a Utf8Error
 148     /// let error = str::from_utf8(&sparkle_heart).unwrap_err();
 149     ///
 150     /// // the second byte is invalid here
 151     /// assert_eq!(1, error.valid_up_to());
 152     /// ```
 153     #[stable(feature = "utf8_error", since = "1.5.0")]
 154     pub fn valid_up_to(&self) -> usize { self.valid_up_to }
 155 }
 156
 157 /// Converts a slice of bytes to a string slice.
 158 ///
 159 /// A string slice (`&str`) is made of bytes (`u8`), and a byte slice (`&[u8]`)
 160 /// is made of bytes, so this function converts between the two. Not all byte
 161 /// slices are valid string slices, however: `&str` requires that it is valid
 162 /// UTF-8. `from_utf8()` checks to ensure that the bytes are valid UTF-8, and
 163 /// then does the conversion.
 164 ///
 165 /// If you are sure that the byte slice is valid UTF-8, and you don't want to
 166 /// incur the overhead of the validity check, there is an unsafe version of
 167 /// this function, [`from_utf8_unchecked()`][fromutf8u], which has the same
 168 /// behavior but skips the check.
 169 ///
 170 /// [fromutf8u]: fn.from_utf8_unchecked.html
 171 ///
 172 /// If you need a `String` instead of a `&str`, consider
 173 /// [`String::from_utf8()`][string].
 174 ///
 175 /// [string]: ../../std/string/struct.String.html#method.from_utf8
 176 ///
 177 /// Because you can stack-allocate a `[u8; N]`, and you can take a `&[u8]` of
 178 /// it, this function is one way to have a stack-allocated string. There is
 179 /// an example of this in the examples section below.
 180 ///
 181 /// # Errors
 182 ///
 183 /// Returns `Err` if the slice is not UTF-8 with a description as to why the
 184 /// provided slice is not UTF-8.
 185 ///
 186 /// # Examples
 187 ///
 188 /// Basic usage:
 189 ///
 190 /// ```
 191 /// use std::str;
 192 ///
 193 /// // some bytes, in a vector
 194 /// let sparkle_heart = vec![240, 159, 146, 150];
 195 ///
 196 /// // We know these bytes are valid, so just use `unwrap()`.
 197 /// let sparkle_heart = str::from_utf8(&sparkle_heart).unwrap();
 198 ///
 199 /// assert_eq!("💖", sparkle_heart);
 200 /// ```
 201 ///
 202 /// Incorrect bytes:
 203 ///
 204 /// ```
 205 /// use std::str;
 206 ///
 207 /// // some invalid bytes, in a vector
 208 /// let sparkle_heart = vec![0, 159, 146, 150];
 209 ///
 210 /// assert!(str::from_utf8(&sparkle_heart).is_err());
 211 /// ```
 212 ///
 213 /// See the docs for [`Utf8Error`][error] for more details on the kinds of
 214 /// errors that can be returned.
 215 ///
 216 /// [error]: struct.Utf8Error.html
 217 ///
 218 /// A "stack allocated string":
 219 ///
 220 /// ```
 221 /// use std::str;
 222 ///
 223 /// // some bytes, in a stack-allocated array
 224 /// let sparkle_heart = [240, 159, 146, 150];
 225 ///
 226 /// // We know these bytes are valid, so just use `unwrap()`.
 227 /// let sparkle_heart = str::from_utf8(&sparkle_heart).unwrap();
 228 ///
 229 /// assert_eq!("💖", sparkle_heart);
 230 /// ```
 231 #[stable(feature = "rust1", since = "1.0.0")]
 232 pub fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
 233     run_utf8_validation(v)?;
 234     Ok(unsafe { from_utf8_unchecked(v) })
 235 }
 236
 237 /// Forms a str from a pointer and a length.
 238 ///
 239 /// The `len` argument is the number of bytes in the string.
 240 ///
 241 /// # Safety
 242 ///
 243 /// This function is unsafe as there is no guarantee that the given pointer is
 244 /// valid for `len` bytes, nor whether the lifetime inferred is a suitable
 245 /// lifetime for the returned str.
 246 ///
 247 /// The data must be valid UTF-8
 248 ///
 249 /// `p` must be non-null, even for zero-length str.
 250 ///
 251 /// # Caveat
 252 ///
 253 /// The lifetime for the returned str is inferred from its usage. To
 254 /// prevent accidental misuse, it's suggested to tie the lifetime to whichever
 255 /// source lifetime is safe in the context, such as by providing a helper
 256 /// function taking the lifetime of a host value for the str, or by explicit
 257 /// annotation.
 258 /// Performs the same functionality as `from_raw_parts`, except that a mutable
 259 /// str is returned.
 260 ///
 261 unsafe fn from_raw_parts_mut<'a>(p: *mut u8, len: usize) -> &'a mut str {
 262     mem::transmute::<&mut [u8], &mut str>(slice::from_raw_parts_mut(p, len))
 263 }
 264
 265 /// Converts a slice of bytes to a string slice without checking
 266 /// that the string contains valid UTF-8.
 267 ///
 268 /// See the safe version, [`from_utf8()`][fromutf8], for more information.
 269 ///
 270 /// [fromutf8]: fn.from_utf8.html
 271 ///
 272 /// # Safety
 273 ///
 274 /// This function is unsafe because it does not check that the bytes passed to
 275 /// it are valid UTF-8. If this constraint is violated, undefined behavior
 276 /// results, as the rest of Rust assumes that `&str`s are valid UTF-8.
 277 ///
 278 /// # Examples
 279 ///
 280 /// Basic usage:
 281 ///
 282 /// ```
 283 /// use std::str;
 284 ///
 285 /// // some bytes, in a vector
 286 /// let sparkle_heart = vec![240, 159, 146, 150];
 287 ///
 288 /// let sparkle_heart = unsafe {
 289 ///     str::from_utf8_unchecked(&sparkle_heart)
 290 /// };
 291 ///
 292 /// assert_eq!("💖", sparkle_heart);
 293 /// ```
 294 #[inline(always)]
 295 #[stable(feature = "rust1", since = "1.0.0")]
 296 pub unsafe fn from_utf8_unchecked(v: &[u8]) -> &str {
 297     mem::transmute(v)
 298 }
 299
 300 #[stable(feature = "rust1", since = "1.0.0")]
 301 impl fmt::Display for Utf8Error {
 302     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 303         write!(f, "invalid utf-8: invalid byte near index {}", self.valid_up_to)
 304     }
 305 }
 306
 307 /*
 308 Section: Iterators
 309 */
 310
 311 /// Iterator for the char (representing *Unicode Scalar Values*) of a string
 312 ///
 313 /// Created with the method [`chars()`].
 314 ///
 315 /// [`chars()`]: ../../std/primitive.str.html#method.chars
 316 #[derive(Clone, Debug)]
 317 #[stable(feature = "rust1", since = "1.0.0")]
 318 pub struct Chars<'a> {
 319     iter: slice::Iter<'a, u8>
 320 }
 321
 322 /// Return the initial codepoint accumulator for the first byte.
 323 /// The first byte is special, only want bottom 5 bits for width 2, 4 bits
 324 /// for width 3, and 3 bits for width 4.
 325 #[inline]
 326 fn utf8_first_byte(byte: u8, width: u32) -> u32 { (byte & (0x7F >> width)) as u32 }
 327
 328 /// Return the value of `ch` updated with continuation byte `byte`.
 329 #[inline]
 330 fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 { (ch << 6) | (byte & CONT_MASK) as u32 }
 331
 332 /// Checks whether the byte is a UTF-8 continuation byte (i.e. starts with the
 333 /// bits `10`).
 334 #[inline]
 335 fn utf8_is_cont_byte(byte: u8) -> bool { (byte & !CONT_MASK) == TAG_CONT_U8 }
 336
 337 #[inline]
 338 fn unwrap_or_0(opt: Option<&u8>) -> u8 {
 339     match opt {
 340         Some(&byte) => byte,
 341         None => 0,
 342     }
 343 }
 344
 345 /// Reads the next code point out of a byte iterator (assuming a
 346 /// UTF-8-like encoding).
 347 #[unstable(feature = "str_internals", issue = "0")]
 348 #[inline]
 349 pub fn next_code_point<'a, I: Iterator<Item = &'a u8>>(bytes: &mut I) -> Option<u32> {
 350     // Decode UTF-8
 351     let x = match bytes.next() {
 352         None => return None,
 353         Some(&next_byte) if next_byte < 128 => return Some(next_byte as u32),
 354         Some(&next_byte) => next_byte,
 355     };
 356
 357     // Multibyte case follows
 358     // Decode from a byte combination out of: [[[x y] z] w]
 359     // NOTE: Performance is sensitive to the exact formulation here
 360     let init = utf8_first_byte(x, 2);
 361     let y = unwrap_or_0(bytes.next());
 362     let mut ch = utf8_acc_cont_byte(init, y);
 363     if x >= 0xE0 {
 364         // [[x y z] w] case
 365         // 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid
 366         let z = unwrap_or_0(bytes.next());
 367         let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z);
 368         ch = init << 12 | y_z;
 369         if x >= 0xF0 {
 370             // [x y z w] case
 371             // use only the lower 3 bits of `init`
 372             let w = unwrap_or_0(bytes.next());
 373             ch = (init & 7) << 18 | utf8_acc_cont_byte(y_z, w);
 374         }
 375     }
 376
 377     Some(ch)
 378 }
 379
 380 /// Reads the last code point out of a byte iterator (assuming a
 381 /// UTF-8-like encoding).
 382 #[inline]
 383 fn next_code_point_reverse<'a, I>(bytes: &mut I) -> Option<u32>
 384     where I: DoubleEndedIterator<Item = &'a u8>,
 385 {
 386     // Decode UTF-8
 387     let w = match bytes.next_back() {
 388         None => return None,
 389         Some(&next_byte) if next_byte < 128 => return Some(next_byte as u32),
 390         Some(&back_byte) => back_byte,
 391     };
 392
 393     // Multibyte case follows
 394     // Decode from a byte combination out of: [x [y [z w]]]
 395     let mut ch;
 396     let z = unwrap_or_0(bytes.next_back());
 397     ch = utf8_first_byte(z, 2);
 398     if utf8_is_cont_byte(z) {
 399         let y = unwrap_or_0(bytes.next_back());
 400         ch = utf8_first_byte(y, 3);
 401         if utf8_is_cont_byte(y) {
 402             let x = unwrap_or_0(bytes.next_back());
 403             ch = utf8_first_byte(x, 4);
 404             ch = utf8_acc_cont_byte(ch, y);
 405         }
 406         ch = utf8_acc_cont_byte(ch, z);
 407     }
 408     ch = utf8_acc_cont_byte(ch, w);
 409
 410     Some(ch)
 411 }
 412
 413 #[stable(feature = "rust1", since = "1.0.0")]
 414 impl<'a> Iterator for Chars<'a> {
 415     type Item = char;
 416
 417     #[inline]
 418     fn next(&mut self) -> Option<char> {
 419         next_code_point(&mut self.iter).map(|ch| {
 420             // str invariant says `ch` is a valid Unicode Scalar Value
 421             unsafe {
 422                 char::from_u32_unchecked(ch)
 423             }
 424         })
 425     }
 426
 427     #[inline]
 428     fn count(self) -> usize {
 429         // length in `char` is equal to the number of non-continuation bytes
 430         let bytes_len = self.iter.len();
 431         let mut cont_bytes = 0;
 432         for &byte in self.iter {
 433             cont_bytes += utf8_is_cont_byte(byte) as usize;
 434         }
 435         bytes_len - cont_bytes
 436     }
 437
 438     #[inline]
 439     fn size_hint(&self) -> (usize, Option<usize>) {
 440         let len = self.iter.len();
 441         // `(len + 3)` can't overflow, because we know that the `slice::Iter`
 442         // belongs to a slice in memory which has a maximum length of
 443         // `isize::MAX` (that's well below `usize::MAX`).
 444         ((len + 3) / 4, Some(len))
 445     }
 446
 447     #[inline]
 448     fn last(mut self) -> Option<char> {
 449         // No need to go through the entire string.
 450         self.next_back()
 451     }
 452 }
 453
 454 #[stable(feature = "rust1", since = "1.0.0")]
 455 impl<'a> DoubleEndedIterator for Chars<'a> {
 456     #[inline]
 457     fn next_back(&mut self) -> Option<char> {
 458         next_code_point_reverse(&mut self.iter).map(|ch| {
 459             // str invariant says `ch` is a valid Unicode Scalar Value
 460             unsafe {
 461                 char::from_u32_unchecked(ch)
 462             }
 463         })
 464     }
 465 }
 466
 467 #[unstable(feature = "fused", issue = "35602")]
 468 impl<'a> FusedIterator for Chars<'a> {}
 469
 470 impl<'a> Chars<'a> {
 471     /// View the underlying data as a subslice of the original data.
 472     ///
 473     /// This has the same lifetime as the original slice, and so the
 474     /// iterator can continue to be used while this exists.
 475     ///
 476     /// # Examples
 477     ///
 478     /// ```
 479     /// let mut chars = "abc".chars();
 480     ///
 481     /// assert_eq!(chars.as_str(), "abc");
 482     /// chars.next();
 483     /// assert_eq!(chars.as_str(), "bc");
 484     /// chars.next();
 485     /// chars.next();
 486     /// assert_eq!(chars.as_str(), "");
 487     /// ```
 488     #[stable(feature = "iter_to_slice", since = "1.4.0")]
 489     #[inline]
 490     pub fn as_str(&self) -> &'a str {
 491         unsafe { from_utf8_unchecked(self.iter.as_slice()) }
 492     }
 493 }
 494
 495 /// Iterator for a string's characters and their byte offsets.
 496 #[derive(Clone, Debug)]
 497 #[stable(feature = "rust1", since = "1.0.0")]
 498 pub struct CharIndices<'a> {
 499     front_offset: usize,
 500     iter: Chars<'a>,
 501 }
 502
 503 #[stable(feature = "rust1", since = "1.0.0")]
 504 impl<'a> Iterator for CharIndices<'a> {
 505     type Item = (usize, char);
 506
 507     #[inline]
 508     fn next(&mut self) -> Option<(usize, char)> {
 509         let pre_len = self.iter.iter.len();
 510         match self.iter.next() {
 511             None => None,
 512             Some(ch) => {
 513                 let index = self.front_offset;
 514                 let len = self.iter.iter.len();
 515                 self.front_offset += pre_len - len;
 516                 Some((index, ch))
 517             }
 518         }
 519     }
 520
 521     #[inline]
 522     fn count(self) -> usize {
 523         self.iter.count()
 524     }
 525
 526     #[inline]
 527     fn size_hint(&self) -> (usize, Option<usize>) {
 528         self.iter.size_hint()
 529     }
 530
 531     #[inline]
 532     fn last(mut self) -> Option<(usize, char)> {
 533         // No need to go through the entire string.
 534         self.next_back()
 535     }
 536 }
 537
 538 #[stable(feature = "rust1", since = "1.0.0")]
 539 impl<'a> DoubleEndedIterator for CharIndices<'a> {
 540     #[inline]
 541     fn next_back(&mut self) -> Option<(usize, char)> {
 542         match self.iter.next_back() {
 543             None => None,
 544             Some(ch) => {
 545                 let index = self.front_offset + self.iter.iter.len();
 546                 Some((index, ch))
 547             }
 548         }
 549     }
 550 }
 551
 552 #[unstable(feature = "fused", issue = "35602")]
 553 impl<'a> FusedIterator for CharIndices<'a> {}
 554
 555 impl<'a> CharIndices<'a> {
 556     /// View the underlying data as a subslice of the original data.
 557     ///
 558     /// This has the same lifetime as the original slice, and so the
 559     /// iterator can continue to be used while this exists.
 560     #[stable(feature = "iter_to_slice", since = "1.4.0")]
 561     #[inline]
 562     pub fn as_str(&self) -> &'a str {
 563         self.iter.as_str()
 564     }
 565 }
 566
 567 /// External iterator for a string's bytes.
 568 /// Use with the `std::iter` module.
 569 ///
 570 /// Created with the method [`bytes()`].
 571 ///
 572 /// [`bytes()`]: ../../std/primitive.str.html#method.bytes
 573 #[stable(feature = "rust1", since = "1.0.0")]
 574 #[derive(Clone, Debug)]
 575 pub struct Bytes<'a>(Cloned<slice::Iter<'a, u8>>);
 576
 577 #[stable(feature = "rust1", since = "1.0.0")]
 578 impl<'a> Iterator for Bytes<'a> {
 579     type Item = u8;
 580
 581     #[inline]
 582     fn next(&mut self) -> Option<u8> {
 583         self.0.next()
 584     }
 585
 586     #[inline]
 587     fn size_hint(&self) -> (usize, Option<usize>) {
 588         self.0.size_hint()
 589     }
 590
 591     #[inline]
 592     fn count(self) -> usize {
 593         self.0.count()
 594     }
 595
 596     #[inline]
 597     fn last(self) -> Option<Self::Item> {
 598         self.0.last()
 599     }
 600
 601     #[inline]
 602     fn nth(&mut self, n: usize) -> Option<Self::Item> {
 603         self.0.nth(n)
 604     }
 605 }
 606
 607 #[stable(feature = "rust1", since = "1.0.0")]
 608 impl<'a> DoubleEndedIterator for Bytes<'a> {
 609     #[inline]
 610     fn next_back(&mut self) -> Option<u8> {
 611         self.0.next_back()
 612     }
 613 }
 614
 615 #[stable(feature = "rust1", since = "1.0.0")]
 616 impl<'a> ExactSizeIterator for Bytes<'a> {
 617     #[inline]
 618     fn len(&self) -> usize {
 619         self.0.len()
 620     }
 621
 622     #[inline]
 623     fn is_empty(&self) -> bool {
 624         self.0.is_empty()
 625     }
 626 }
 627
 628 #[unstable(feature = "fused", issue = "35602")]
 629 impl<'a> FusedIterator for Bytes<'a> {}
 630
 631 /// This macro generates a Clone impl for string pattern API
 632 /// wrapper types of the form X<'a, P>
 633 macro_rules! derive_pattern_clone {
 634     (clone $t:ident with |$s:ident| $e:expr) => {
 635         impl<'a, P: Pattern<'a>> Clone for $t<'a, P>
 636             where P::Searcher: Clone
 637         {
 638             fn clone(&self) -> Self {
 639                 let $s = self;
 640                 $e
 641             }
 642         }
 643     }
 644 }
 645
 646 /// This macro generates two public iterator structs
 647 /// wrapping a private internal one that makes use of the `Pattern` API.
 648 ///
 649 /// For all patterns `P: Pattern<'a>` the following items will be
 650 /// generated (generics omitted):
 651 ///
 652 /// struct $forward_iterator($internal_iterator);
 653 /// struct $reverse_iterator($internal_iterator);
 654 ///
 655 /// impl Iterator for $forward_iterator
 656 /// { /* internal ends up calling Searcher::next_match() */ }
 657 ///
 658 /// impl DoubleEndedIterator for $forward_iterator
 659 ///       where P::Searcher: DoubleEndedSearcher
 660 /// { /* internal ends up calling Searcher::next_match_back() */ }
 661 ///
 662 /// impl Iterator for $reverse_iterator
 663 ///       where P::Searcher: ReverseSearcher
 664 /// { /* internal ends up calling Searcher::next_match_back() */ }
 665 ///
 666 /// impl DoubleEndedIterator for $reverse_iterator
 667 ///       where P::Searcher: DoubleEndedSearcher
 668 /// { /* internal ends up calling Searcher::next_match() */ }
 669 ///
 670 /// The internal one is defined outside the macro, and has almost the same
 671 /// semantic as a DoubleEndedIterator by delegating to `pattern::Searcher` and
 672 /// `pattern::ReverseSearcher` for both forward and reverse iteration.
 673 ///
 674 /// "Almost", because a `Searcher` and a `ReverseSearcher` for a given
 675 /// `Pattern` might not return the same elements, so actually implementing
 676 /// `DoubleEndedIterator` for it would be incorrect.
 677 /// (See the docs in `str::pattern` for more details)
 678 ///
 679 /// However, the internal struct still represents a single ended iterator from
 680 /// either end, and depending on pattern is also a valid double ended iterator,
 681 /// so the two wrapper structs implement `Iterator`
 682 /// and `DoubleEndedIterator` depending on the concrete pattern type, leading
 683 /// to the complex impls seen above.
 684 macro_rules! generate_pattern_iterators {
 685     {
 686         // Forward iterator
 687         forward:
 688             $(#[$forward_iterator_attribute:meta])*
 689             struct $forward_iterator:ident;
 690
 691         // Reverse iterator
 692         reverse:
 693             $(#[$reverse_iterator_attribute:meta])*
 694             struct $reverse_iterator:ident;
 695
 696         // Stability of all generated items
 697         stability:
 698             $(#[$common_stability_attribute:meta])*
 699
 700         // Internal almost-iterator that is being delegated to
 701         internal:
 702             $internal_iterator:ident yielding ($iterty:ty);
 703
 704         // Kind of delgation - either single ended or double ended
 705         delegate $($t:tt)*
 706     } => {
 707         $(#[$forward_iterator_attribute])*
 708         $(#[$common_stability_attribute])*
 709         pub struct $forward_iterator<'a, P: Pattern<'a>>($internal_iterator<'a, P>);
 710
 711         $(#[$common_stability_attribute])*
 712         impl<'a, P: Pattern<'a>> fmt::Debug for $forward_iterator<'a, P>
 713             where P::Searcher: fmt::Debug
 714         {
 715             fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 716                 f.debug_tuple(stringify!($forward_iterator))
 717                     .field(&self.0)
 718                     .finish()
 719             }
 720         }
 721
 722         $(#[$common_stability_attribute])*
 723         impl<'a, P: Pattern<'a>> Iterator for $forward_iterator<'a, P> {
 724             type Item = $iterty;
 725
 726             #[inline]
 727             fn next(&mut self) -> Option<$iterty> {
 728                 self.0.next()
 729             }
 730         }
 731
 732         $(#[$common_stability_attribute])*
 733         impl<'a, P: Pattern<'a>> Clone for $forward_iterator<'a, P>
 734             where P::Searcher: Clone
 735         {
 736             fn clone(&self) -> Self {
 737                 $forward_iterator(self.0.clone())
 738             }
 739         }
 740
 741         $(#[$reverse_iterator_attribute])*
 742         $(#[$common_stability_attribute])*
 743         pub struct $reverse_iterator<'a, P: Pattern<'a>>($internal_iterator<'a, P>);
 744
 745         $(#[$common_stability_attribute])*
 746         impl<'a, P: Pattern<'a>> fmt::Debug for $reverse_iterator<'a, P>
 747             where P::Searcher: fmt::Debug
 748         {
 749             fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 750                 f.debug_tuple(stringify!($reverse_iterator))
 751                     .field(&self.0)
 752                     .finish()
 753             }
 754         }
 755
 756         $(#[$common_stability_attribute])*
 757         impl<'a, P: Pattern<'a>> Iterator for $reverse_iterator<'a, P>
 758             where P::Searcher: ReverseSearcher<'a>
 759         {
 760             type Item = $iterty;
 761
 762             #[inline]
 763             fn next(&mut self) -> Option<$iterty> {
 764                 self.0.next_back()
 765             }
 766         }
 767
 768         $(#[$common_stability_attribute])*
 769         impl<'a, P: Pattern<'a>> Clone for $reverse_iterator<'a, P>
 770             where P::Searcher: Clone
 771         {
 772             fn clone(&self) -> Self {
 773                 $reverse_iterator(self.0.clone())
 774             }
 775         }
 776
 777         #[unstable(feature = "fused", issue = "35602")]
 778         impl<'a, P: Pattern<'a>> FusedIterator for $forward_iterator<'a, P> {}
 779
 780         #[unstable(feature = "fused", issue = "35602")]
 781         impl<'a, P: Pattern<'a>> FusedIterator for $reverse_iterator<'a, P>
 782             where P::Searcher: ReverseSearcher<'a> {}
 783
 784         generate_pattern_iterators!($($t)* with $(#[$common_stability_attribute])*,
 785                                                 $forward_iterator,
 786                                                 $reverse_iterator, $iterty);
 787     };
 788     {
 789         double ended; with $(#[$common_stability_attribute:meta])*,
 790                            $forward_iterator:ident,
 791                            $reverse_iterator:ident, $iterty:ty
 792     } => {
 793         $(#[$common_stability_attribute])*
 794         impl<'a, P: Pattern<'a>> DoubleEndedIterator for $forward_iterator<'a, P>
 795             where P::Searcher: DoubleEndedSearcher<'a>
 796         {
 797             #[inline]
 798             fn next_back(&mut self) -> Option<$iterty> {
 799                 self.0.next_back()
 800             }
 801         }
 802
 803         $(#[$common_stability_attribute])*
 804         impl<'a, P: Pattern<'a>> DoubleEndedIterator for $reverse_iterator<'a, P>
 805             where P::Searcher: DoubleEndedSearcher<'a>
 806         {
 807             #[inline]
 808             fn next_back(&mut self) -> Option<$iterty> {
 809                 self.0.next()
 810             }
 811         }
 812     };
 813     {
 814         single ended; with $(#[$common_stability_attribute:meta])*,
 815                            $forward_iterator:ident,
 816                            $reverse_iterator:ident, $iterty:ty
 817     } => {}
 818 }
 819
 820 derive_pattern_clone!{
 821     clone SplitInternal
 822     with |s| SplitInternal { matcher: s.matcher.clone(), ..*s }
 823 }
 824
 825 struct SplitInternal<'a, P: Pattern<'a>> {
 826     start: usize,
 827     end: usize,
 828     matcher: P::Searcher,
 829     allow_trailing_empty: bool,
 830     finished: bool,
 831 }
 832
 833 impl<'a, P: Pattern<'a>> fmt::Debug for SplitInternal<'a, P> where P::Searcher: fmt::Debug {
 834     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 835         f.debug_struct("SplitInternal")
 836             .field("start", &self.start)
 837             .field("end", &self.end)
 838             .field("matcher", &self.matcher)
 839             .field("allow_trailing_empty", &self.allow_trailing_empty)
 840             .field("finished", &self.finished)
 841             .finish()
 842     }
 843 }
 844
 845 impl<'a, P: Pattern<'a>> SplitInternal<'a, P> {
 846     #[inline]
 847     fn get_end(&mut self) -> Option<&'a str> {
 848         if !self.finished && (self.allow_trailing_empty || self.end - self.start > 0) {
 849             self.finished = true;
 850             unsafe {
 851                 let string = self.matcher.haystack().slice_unchecked(self.start, self.end);
 852                 Some(string)
 853             }
 854         } else {
 855             None
 856         }
 857     }
 858
 859     #[inline]
 860     fn next(&mut self) -> Option<&'a str> {
 861         if self.finished { return None }
 862
 863         let haystack = self.matcher.haystack();
 864         match self.matcher.next_match() {
 865             Some((a, b)) => unsafe {
 866                 let elt = haystack.slice_unchecked(self.start, a);
 867                 self.start = b;
 868                 Some(elt)
 869             },
 870             None => self.get_end(),
 871         }
 872     }
 873
 874     #[inline]
 875     fn next_back(&mut self) -> Option<&'a str>
 876         where P::Searcher: ReverseSearcher<'a>
 877     {
 878         if self.finished { return None }
 879
 880         if !self.allow_trailing_empty {
 881             self.allow_trailing_empty = true;
 882             match self.next_back() {
 883                 Some(elt) if !elt.is_empty() => return Some(elt),
 884                 _ => if self.finished { return None }
 885             }
 886         }
 887
 888         let haystack = self.matcher.haystack();
 889         match self.matcher.next_match_back() {
 890             Some((a, b)) => unsafe {
 891                 let elt = haystack.slice_unchecked(b, self.end);
 892                 self.end = a;
 893                 Some(elt)
 894             },
 895             None => unsafe {
 896                 self.finished = true;
 897                 Some(haystack.slice_unchecked(self.start, self.end))
 898             },
 899         }
 900     }
 901 }
 902
 903 generate_pattern_iterators! {
 904     forward:
 905         /// Created with the method [`split()`].
 906         ///
 907         /// [`split()`]: ../../std/primitive.str.html#method.split
 908         struct Split;
 909     reverse:
 910         /// Created with the method [`rsplit()`].
 911         ///
 912         /// [`rsplit()`]: ../../std/primitive.str.html#method.rsplit
 913         struct RSplit;
 914     stability:
 915         #[stable(feature = "rust1", since = "1.0.0")]
 916     internal:
 917         SplitInternal yielding (&'a str);
 918     delegate double ended;
 919 }
 920
 921 generate_pattern_iterators! {
 922     forward:
 923         /// Created with the method [`split_terminator()`].
 924         ///
 925         /// [`split_terminator()`]: ../../std/primitive.str.html#method.split_terminator
 926         struct SplitTerminator;
 927     reverse:
 928         /// Created with the method [`rsplit_terminator()`].
 929         ///
 930         /// [`rsplit_terminator()`]: ../../std/primitive.str.html#method.rsplit_terminator
 931         struct RSplitTerminator;
 932     stability:
 933         #[stable(feature = "rust1", since = "1.0.0")]
 934     internal:
 935         SplitInternal yielding (&'a str);
 936     delegate double ended;
 937 }
 938
 939 derive_pattern_clone!{
 940     clone SplitNInternal
 941     with |s| SplitNInternal { iter: s.iter.clone(), ..*s }
 942 }
 943
 944 struct SplitNInternal<'a, P: Pattern<'a>> {
 945     iter: SplitInternal<'a, P>,
 946     /// The number of splits remaining
 947     count: usize,
 948 }
 949
 950 impl<'a, P: Pattern<'a>> fmt::Debug for SplitNInternal<'a, P> where P::Searcher: fmt::Debug {
 951     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 952         f.debug_struct("SplitNInternal")
 953             .field("iter", &self.iter)
 954             .field("count", &self.count)
 955             .finish()
 956     }
 957 }
 958
 959 impl<'a, P: Pattern<'a>> SplitNInternal<'a, P> {
 960     #[inline]
 961     fn next(&mut self) -> Option<&'a str> {
 962         match self.count {
 963             0 => None,
 964             1 => { self.count = 0; self.iter.get_end() }
 965             _ => { self.count -= 1; self.iter.next() }
 966         }
 967     }
 968
 969     #[inline]
 970     fn next_back(&mut self) -> Option<&'a str>
 971         where P::Searcher: ReverseSearcher<'a>
 972     {
 973         match self.count {
 974             0 => None,
 975             1 => { self.count = 0; self.iter.get_end() }
 976             _ => { self.count -= 1; self.iter.next_back() }
 977         }
 978     }
 979 }
 980
 981 generate_pattern_iterators! {
 982     forward:
 983         /// Created with the method [`splitn()`].
 984         ///
 985         /// [`splitn()`]: ../../std/primitive.str.html#method.splitn
 986         struct SplitN;
 987     reverse:
 988         /// Created with the method [`rsplitn()`].
 989         ///
 990         /// [`rsplitn()`]: ../../std/primitive.str.html#method.rsplitn
 991         struct RSplitN;
 992     stability:
 993         #[stable(feature = "rust1", since = "1.0.0")]
 994     internal:
 995         SplitNInternal yielding (&'a str);
 996     delegate single ended;
 997 }
 998
 999 derive_pattern_clone!{
1000     clone MatchIndicesInternal
1001     with |s| MatchIndicesInternal(s.0.clone())
1002 }
1003
1004 struct MatchIndicesInternal<'a, P: Pattern<'a>>(P::Searcher);
1005
1006 impl<'a, P: Pattern<'a>> fmt::Debug for MatchIndicesInternal<'a, P> where P::Searcher: fmt::Debug {
1007     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1008         f.debug_tuple("MatchIndicesInternal")
1009             .field(&self.0)
1010             .finish()
1011     }
1012 }
1013
1014 impl<'a, P: Pattern<'a>> MatchIndicesInternal<'a, P> {
1015     #[inline]
1016     fn next(&mut self) -> Option<(usize, &'a str)> {
1017         self.0.next_match().map(|(start, end)| unsafe {
1018             (start, self.0.haystack().slice_unchecked(start, end))
1019         })
1020     }
1021
1022     #[inline]
1023     fn next_back(&mut self) -> Option<(usize, &'a str)>
1024         where P::Searcher: ReverseSearcher<'a>
1025     {
1026         self.0.next_match_back().map(|(start, end)| unsafe {
1027             (start, self.0.haystack().slice_unchecked(start, end))
1028         })
1029     }
1030 }
1031
1032 generate_pattern_iterators! {
1033     forward:
1034         /// Created with the method [`match_indices()`].
1035         ///
1036         /// [`match_indices()`]: ../../std/primitive.str.html#method.match_indices
1037         struct MatchIndices;
1038     reverse:
1039         /// Created with the method [`rmatch_indices()`].
1040         ///
1041         /// [`rmatch_indices()`]: ../../std/primitive.str.html#method.rmatch_indices
1042         struct RMatchIndices;
1043     stability:
1044         #[stable(feature = "str_match_indices", since = "1.5.0")]
1045     internal:
1046         MatchIndicesInternal yielding ((usize, &'a str));
1047     delegate double ended;
1048 }
1049
1050 derive_pattern_clone!{
1051     clone MatchesInternal
1052     with |s| MatchesInternal(s.0.clone())
1053 }
1054
1055 struct MatchesInternal<'a, P: Pattern<'a>>(P::Searcher);
1056
1057 impl<'a, P: Pattern<'a>> fmt::Debug for MatchesInternal<'a, P> where P::Searcher: fmt::Debug {
1058     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1059         f.debug_tuple("MatchesInternal")
1060             .field(&self.0)
1061             .finish()
1062     }
1063 }
1064
1065 impl<'a, P: Pattern<'a>> MatchesInternal<'a, P> {
1066     #[inline]
1067     fn next(&mut self) -> Option<&'a str> {
1068         self.0.next_match().map(|(a, b)| unsafe {
1069             // Indices are known to be on utf8 boundaries
1070             self.0.haystack().slice_unchecked(a, b)
1071         })
1072     }
1073
1074     #[inline]
1075     fn next_back(&mut self) -> Option<&'a str>
1076         where P::Searcher: ReverseSearcher<'a>
1077     {
1078         self.0.next_match_back().map(|(a, b)| unsafe {
1079             // Indices are known to be on utf8 boundaries
1080             self.0.haystack().slice_unchecked(a, b)
1081         })
1082     }
1083 }
1084
1085 generate_pattern_iterators! {
1086     forward:
1087         /// Created with the method [`matches()`].
1088         ///
1089         /// [`matches()`]: ../../std/primitive.str.html#method.matches
1090         struct Matches;
1091     reverse:
1092         /// Created with the method [`rmatches()`].
1093         ///
1094         /// [`rmatches()`]: ../../std/primitive.str.html#method.rmatches
1095         struct RMatches;
1096     stability:
1097         #[stable(feature = "str_matches", since = "1.2.0")]
1098     internal:
1099         MatchesInternal yielding (&'a str);
1100     delegate double ended;
1101 }
1102
1103 /// Created with the method [`lines()`].
1104 ///
1105 /// [`lines()`]: ../../std/primitive.str.html#method.lines
1106 #[stable(feature = "rust1", since = "1.0.0")]
1107 #[derive(Clone, Debug)]
1108 pub struct Lines<'a>(Map<SplitTerminator<'a, char>, LinesAnyMap>);
1109
1110 #[stable(feature = "rust1", since = "1.0.0")]
1111 impl<'a> Iterator for Lines<'a> {
1112     type Item = &'a str;
1113
1114     #[inline]
1115     fn next(&mut self) -> Option<&'a str> {
1116         self.0.next()
1117     }
1118
1119     #[inline]
1120     fn size_hint(&self) -> (usize, Option<usize>) {
1121         self.0.size_hint()
1122     }
1123 }
1124
1125 #[stable(feature = "rust1", since = "1.0.0")]
1126 impl<'a> DoubleEndedIterator for Lines<'a> {
1127     #[inline]
1128     fn next_back(&mut self) -> Option<&'a str> {
1129         self.0.next_back()
1130     }
1131 }
1132
1133 #[unstable(feature = "fused", issue = "35602")]
1134 impl<'a> FusedIterator for Lines<'a> {}
1135
1136 /// Created with the method [`lines_any()`].
1137 ///
1138 /// [`lines_any()`]: ../../std/primitive.str.html#method.lines_any
1139 #[stable(feature = "rust1", since = "1.0.0")]
1140 #[rustc_deprecated(since = "1.4.0", reason = "use lines()/Lines instead now")]
1141 #[derive(Clone, Debug)]
1142 #[allow(deprecated)]
1143 pub struct LinesAny<'a>(Lines<'a>);
1144
1145 /// A nameable, cloneable fn type
1146 #[derive(Clone)]
1147 struct LinesAnyMap;
1148
1149 impl<'a> Fn<(&'a str,)> for LinesAnyMap {
1150     #[inline]
1151     extern "rust-call" fn call(&self, (line,): (&'a str,)) -> &'a str {
1152         let l = line.len();
1153         if l > 0 && line.as_bytes()[l - 1] == b'\r' { &line[0 .. l - 1] }
1154         else { line }
1155     }
1156 }
1157
1158 impl<'a> FnMut<(&'a str,)> for LinesAnyMap {
1159     #[inline]
1160     extern "rust-call" fn call_mut(&mut self, (line,): (&'a str,)) -> &'a str {
1161         Fn::call(&*self, (line,))
1162     }
1163 }
1164
1165 impl<'a> FnOnce<(&'a str,)> for LinesAnyMap {
1166     type Output = &'a str;
1167
1168     #[inline]
1169     extern "rust-call" fn call_once(self, (line,): (&'a str,)) -> &'a str {
1170         Fn::call(&self, (line,))
1171     }
1172 }
1173
1174 #[stable(feature = "rust1", since = "1.0.0")]
1175 #[allow(deprecated)]
1176 impl<'a> Iterator for LinesAny<'a> {
1177     type Item = &'a str;
1178
1179     #[inline]
1180     fn next(&mut self) -> Option<&'a str> {
1181         self.0.next()
1182     }
1183
1184     #[inline]
1185     fn size_hint(&self) -> (usize, Option<usize>) {
1186         self.0.size_hint()
1187     }
1188 }
1189
1190 #[stable(feature = "rust1", since = "1.0.0")]
1191 #[allow(deprecated)]
1192 impl<'a> DoubleEndedIterator for LinesAny<'a> {
1193     #[inline]
1194     fn next_back(&mut self) -> Option<&'a str> {
1195         self.0.next_back()
1196     }
1197 }
1198
1199 #[unstable(feature = "fused", issue = "35602")]
1200 #[allow(deprecated)]
1201 impl<'a> FusedIterator for LinesAny<'a> {}
1202
1203 /*
1204 Section: Comparing strings
1205 */
1206
1207 /// Bytewise slice equality
1208 /// NOTE: This function is (ab)used in rustc::middle::trans::_match
1209 /// to compare &[u8] byte slices that are not necessarily valid UTF-8.
1210 #[lang = "str_eq"]
1211 #[inline]
1212 fn eq_slice(a: &str, b: &str) -> bool {
1213     a.as_bytes() == b.as_bytes()
1214 }
1215
1216 /*
1217 Section: UTF-8 validation
1218 */
1219
1220 // use truncation to fit u64 into usize
1221 const NONASCII_MASK: usize = 0x80808080_80808080u64 as usize;
1222
1223 /// Return `true` if any byte in the word `x` is nonascii (>= 128).
1224 #[inline]
1225 fn contains_nonascii(x: usize) -> bool {
1226     (x & NONASCII_MASK) != 0
1227 }
1228
1229 /// Walk through `iter` checking that it's a valid UTF-8 sequence,
1230 /// returning `true` in that case, or, if it is invalid, `false` with
1231 /// `iter` reset such that it is pointing at the first byte in the
1232 /// invalid sequence.
1233 #[inline(always)]
1234 fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
1235     let mut index = 0;
1236     let len = v.len();
1237
1238     let usize_bytes = mem::size_of::<usize>();
1239     let ascii_block_size = 2 * usize_bytes;
1240     let blocks_end = if len >= ascii_block_size { len - ascii_block_size + 1 } else { 0 };
1241
1242     while index < len {
1243         let old_offset = index;
1244         macro_rules! err { () => {{
1245             return Err(Utf8Error {
1246                 valid_up_to: old_offset
1247             })
1248         }}}
1249
1250         macro_rules! next { () => {{
1251             index += 1;
1252             // we needed data, but there was none: error!
1253             if index >= len {
1254                 err!()
1255             }
1256             v[index]
1257         }}}
1258
1259         let first = v[index];
1260         if first >= 128 {
1261             let w = UTF8_CHAR_WIDTH[first as usize];
1262             let second = next!();
1263             // 2-byte encoding is for codepoints  \u{0080} to  \u{07ff}
1264             //        first  C2 80        last DF BF
1265             // 3-byte encoding is for codepoints  \u{0800} to  \u{ffff}
1266             //        first  E0 A0 80     last EF BF BF
1267             //   excluding surrogates codepoints  \u{d800} to  \u{dfff}
1268             //               ED A0 80 to       ED BF BF
1269             // 4-byte encoding is for codepoints \u{1000}0 to \u{10ff}ff
1270             //        first  F0 90 80 80  last F4 8F BF BF
1271             //
1272             // Use the UTF-8 syntax from the RFC
1273             //
1274             // https://tools.ietf.org/html/rfc3629
1275             // UTF8-1      = %x00-7F
1276             // UTF8-2      = %xC2-DF UTF8-tail
1277             // UTF8-3      = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
1278             //               %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
1279             // UTF8-4      = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
1280             //               %xF4 %x80-8F 2( UTF8-tail )
1281             match w {
1282                 2 => if second & !CONT_MASK != TAG_CONT_U8 {err!()},
1283                 3 => {
1284                     match (first, second, next!() & !CONT_MASK) {
1285                         (0xE0         , 0xA0 ... 0xBF, TAG_CONT_U8) |
1286                         (0xE1 ... 0xEC, 0x80 ... 0xBF, TAG_CONT_U8) |
1287                         (0xED         , 0x80 ... 0x9F, TAG_CONT_U8) |
1288                         (0xEE ... 0xEF, 0x80 ... 0xBF, TAG_CONT_U8) => {}
1289                         _ => err!()
1290                     }
1291                 }
1292                 4 => {
1293                     match (first, second, next!() & !CONT_MASK, next!() & !CONT_MASK) {
1294                         (0xF0         , 0x90 ... 0xBF, TAG_CONT_U8, TAG_CONT_U8) |
1295                         (0xF1 ... 0xF3, 0x80 ... 0xBF, TAG_CONT_U8, TAG_CONT_U8) |
1296                         (0xF4         , 0x80 ... 0x8F, TAG_CONT_U8, TAG_CONT_U8) => {}
1297                         _ => err!()
1298                     }
1299                 }
1300                 _ => err!()
1301             }
1302             index += 1;
1303         } else {
1304             // Ascii case, try to skip forward quickly.
1305             // When the pointer is aligned, read 2 words of data per iteration
1306             // until we find a word containing a non-ascii byte.
1307             let ptr = v.as_ptr();
1308             let align = (ptr as usize + index) & (usize_bytes - 1);
1309             if align == 0 {
1310                 while index < blocks_end {
1311                     unsafe {
1312                         let block = ptr.offset(index as isize) as *const usize;
1313                         // break if there is a nonascii byte
1314                         let zu = contains_nonascii(*block);
1315                         let zv = contains_nonascii(*block.offset(1));
1316                         if zu | zv {
1317                             break;
1318                         }
1319                     }
1320                     index += ascii_block_size;
1321                 }
1322                 // step from the point where the wordwise loop stopped
1323                 while index < len && v[index] < 128 {
1324                     index += 1;
1325                 }
1326             } else {
1327                 index += 1;
1328             }
1329         }
1330     }
1331
1332     Ok(())
1333 }
1334
1335 // https://tools.ietf.org/html/rfc3629
1336 static UTF8_CHAR_WIDTH: [u8; 256] = [
1337 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1338 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x1F
1339 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1340 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x3F
1341 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1342 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x5F
1343 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1344 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x7F
1345 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1346 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x9F
1347 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1348 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xBF
1349 0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
1350 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xDF
1351 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, // 0xEF
1352 4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // 0xFF
1353 ];
1354
1355 /// Given a first byte, determine how many bytes are in this UTF-8 character
1356 #[unstable(feature = "str_internals", issue = "0")]
1357 #[inline]
1358 pub fn utf8_char_width(b: u8) -> usize {
1359     return UTF8_CHAR_WIDTH[b as usize] as usize;
1360 }
1361
1362 /// Mask of the value bits of a continuation byte
1363 const CONT_MASK: u8 = 0b0011_1111;
1364 /// Value of the tag bits (tag mask is !CONT_MASK) of a continuation byte
1365 const TAG_CONT_U8: u8 = 0b1000_0000;
1366
1367 /*
1368 Section: Trait implementations
1369 */
1370
1371 mod traits {
1372     use cmp::Ordering;
1373     use ops;
1374     use str::eq_slice;
1375
1376     /// Implements ordering of strings.
1377     ///
1378     /// Strings are ordered  lexicographically by their byte values.  This orders Unicode code
1379     /// points based on their positions in the code charts.  This is not necessarily the same as
1380     /// "alphabetical" order, which varies by language and locale.  Sorting strings according to
1381     /// culturally-accepted standards requires locale-specific data that is outside the scope of
1382     /// the `str` type.
1383     #[stable(feature = "rust1", since = "1.0.0")]
1384     impl Ord for str {
1385         #[inline]
1386         fn cmp(&self, other: &str) -> Ordering {
1387             self.as_bytes().cmp(other.as_bytes())
1388         }
1389     }
1390
1391     #[stable(feature = "rust1", since = "1.0.0")]
1392     impl PartialEq for str {
1393         #[inline]
1394         fn eq(&self, other: &str) -> bool {
1395             eq_slice(self, other)
1396         }
1397         #[inline]
1398         fn ne(&self, other: &str) -> bool { !(*self).eq(other) }
1399     }
1400
1401     #[stable(feature = "rust1", since = "1.0.0")]
1402     impl Eq for str {}
1403
1404     /// Implements comparison operations on strings.
1405     ///
1406     /// Strings are compared lexicographically by their byte values.  This compares Unicode code
1407     /// points based on their positions in the code charts.  This is not necessarily the same as
1408     /// "alphabetical" order, which varies by language and locale.  Comparing strings according to
1409     /// culturally-accepted standards requires locale-specific data that is outside the scope of
1410     /// the `str` type.
1411     #[stable(feature = "rust1", since = "1.0.0")]
1412     impl PartialOrd for str {
1413         #[inline]
1414         fn partial_cmp(&self, other: &str) -> Option<Ordering> {
1415             Some(self.cmp(other))
1416         }
1417     }
1418
1419     /// Implements substring slicing with syntax `&self[begin .. end]`.
1420     ///
1421     /// Returns a slice of the given string from the byte range
1422     /// [`begin`..`end`).
1423     ///
1424     /// This operation is `O(1)`.
1425     ///
1426     /// # Panics
1427     ///
1428     /// Panics if `begin` or `end` does not point to the starting
1429     /// byte offset of a character (as defined by `is_char_boundary`).
1430     /// Requires that `begin <= end` and `end <= len` where `len` is the
1431     /// length of the string.
1432     ///
1433     /// # Examples
1434     ///
1435     /// ```
1436     /// let s = "Löwe 老虎 Léopard";
1437     /// assert_eq!(&s[0 .. 1], "L");
1438     ///
1439     /// assert_eq!(&s[1 .. 9], "öwe 老");
1440     ///
1441     /// // these will panic:
1442     /// // byte 2 lies within `ö`:
1443     /// // &s[2 ..3];
1444     ///
1445     /// // byte 8 lies within `老`
1446     /// // &s[1 .. 8];
1447     ///
1448     /// // byte 100 is outside the string
1449     /// // &s[3 .. 100];
1450     /// ```
1451     #[stable(feature = "rust1", since = "1.0.0")]
1452     impl ops::Index<ops::Range<usize>> for str {
1453         type Output = str;
1454         #[inline]
1455         fn index(&self, index: ops::Range<usize>) -> &str {
1456             // is_char_boundary checks that the index is in [0, .len()]
1457             if index.start <= index.end &&
1458                self.is_char_boundary(index.start) &&
1459                self.is_char_boundary(index.end) {
1460                 unsafe { self.slice_unchecked(index.start, index.end) }
1461             } else {
1462                 super::slice_error_fail(self, index.start, index.end)
1463             }
1464         }
1465     }
1466
1467     /// Implements mutable substring slicing with syntax
1468     /// `&mut self[begin .. end]`.
1469     ///
1470     /// Returns a mutable slice of the given string from the byte range
1471     /// [`begin`..`end`).
1472     ///
1473     /// This operation is `O(1)`.
1474     ///
1475     /// # Panics
1476     ///
1477     /// Panics if `begin` or `end` does not point to the starting
1478     /// byte offset of a character (as defined by `is_char_boundary`).
1479     /// Requires that `begin <= end` and `end <= len` where `len` is the
1480     /// length of the string.
1481     #[stable(feature = "derefmut_for_string", since = "1.2.0")]
1482     impl ops::IndexMut<ops::Range<usize>> for str {
1483         #[inline]
1484         fn index_mut(&mut self, index: ops::Range<usize>) -> &mut str {
1485             // is_char_boundary checks that the index is in [0, .len()]
1486             if index.start <= index.end &&
1487                self.is_char_boundary(index.start) &&
1488                self.is_char_boundary(index.end) {
1489                 unsafe { self.slice_mut_unchecked(index.start, index.end) }
1490             } else {
1491                 super::slice_error_fail(self, index.start, index.end)
1492             }
1493         }
1494     }
1495
1496     /// Implements substring slicing with syntax `&self[.. end]`.
1497     ///
1498     /// Returns a slice of the string from the beginning to byte offset
1499     /// `end`.
1500     ///
1501     /// Equivalent to `&self[0 .. end]`.
1502     #[stable(feature = "rust1", since = "1.0.0")]
1503     impl ops::Index<ops::RangeTo<usize>> for str {
1504         type Output = str;
1505
1506         #[inline]
1507         fn index(&self, index: ops::RangeTo<usize>) -> &str {
1508             // is_char_boundary checks that the index is in [0, .len()]
1509             if self.is_char_boundary(index.end) {
1510                 unsafe { self.slice_unchecked(0, index.end) }
1511             } else {
1512                 super::slice_error_fail(self, 0, index.end)
1513             }
1514         }
1515     }
1516
1517     /// Implements mutable substring slicing with syntax `&mut self[.. end]`.
1518     ///
1519     /// Returns a mutable slice of the string from the beginning to byte offset
1520     /// `end`.
1521     ///
1522     /// Equivalent to `&mut self[0 .. end]`.
1523     #[stable(feature = "derefmut_for_string", since = "1.2.0")]
1524     impl ops::IndexMut<ops::RangeTo<usize>> for str {
1525         #[inline]
1526         fn index_mut(&mut self, index: ops::RangeTo<usize>) -> &mut str {
1527             // is_char_boundary checks that the index is in [0, .len()]
1528             if self.is_char_boundary(index.end) {
1529                 unsafe { self.slice_mut_unchecked(0, index.end) }
1530             } else {
1531                 super::slice_error_fail(self, 0, index.end)
1532             }
1533         }
1534     }
1535
1536     /// Implements substring slicing with syntax `&self[begin ..]`.
1537     ///
1538     /// Returns a slice of the string from byte offset `begin`
1539     /// to the end of the string.
1540     ///
1541     /// Equivalent to `&self[begin .. len]`.
1542     #[stable(feature = "rust1", since = "1.0.0")]
1543     impl ops::Index<ops::RangeFrom<usize>> for str {
1544         type Output = str;
1545
1546         #[inline]
1547         fn index(&self, index: ops::RangeFrom<usize>) -> &str {
1548             // is_char_boundary checks that the index is in [0, .len()]
1549             if self.is_char_boundary(index.start) {
1550                 unsafe { self.slice_unchecked(index.start, self.len()) }
1551             } else {
1552                 super::slice_error_fail(self, index.start, self.len())
1553             }
1554         }
1555     }
1556
1557     /// Implements mutable substring slicing with syntax `&mut self[begin ..]`.
1558     ///
1559     /// Returns a mutable slice of the string from byte offset `begin`
1560     /// to the end of the string.
1561     ///
1562     /// Equivalent to `&mut self[begin .. len]`.
1563     #[stable(feature = "derefmut_for_string", since = "1.2.0")]
1564     impl ops::IndexMut<ops::RangeFrom<usize>> for str {
1565         #[inline]
1566         fn index_mut(&mut self, index: ops::RangeFrom<usize>) -> &mut str {
1567             // is_char_boundary checks that the index is in [0, .len()]
1568             if self.is_char_boundary(index.start) {
1569                 let len = self.len();
1570                 unsafe { self.slice_mut_unchecked(index.start, len) }
1571             } else {
1572                 super::slice_error_fail(self, index.start, self.len())
1573             }
1574         }
1575     }
1576
1577     /// Implements substring slicing with syntax `&self[..]`.
1578     ///
1579     /// Returns a slice of the whole string. This operation can
1580     /// never panic.
1581     ///
1582     /// Equivalent to `&self[0 .. len]`.
1583     #[stable(feature = "rust1", since = "1.0.0")]
1584     impl ops::Index<ops::RangeFull> for str {
1585         type Output = str;
1586
1587         #[inline]
1588         fn index(&self, _index: ops::RangeFull) -> &str {
1589             self
1590         }
1591     }
1592
1593     /// Implements mutable substring slicing with syntax `&mut self[..]`.
1594     ///
1595     /// Returns a mutable slice of the whole string. This operation can
1596     /// never panic.
1597     ///
1598     /// Equivalent to `&mut self[0 .. len]`.
1599     #[stable(feature = "derefmut_for_string", since = "1.2.0")]
1600     impl ops::IndexMut<ops::RangeFull> for str {
1601         #[inline]
1602         fn index_mut(&mut self, _index: ops::RangeFull) -> &mut str {
1603             self
1604         }
1605     }
1606
1607     #[unstable(feature = "inclusive_range",
1608                reason = "recently added, follows RFC",
1609                issue = "28237")]
1610     impl ops::Index<ops::RangeInclusive<usize>> for str {
1611         type Output = str;
1612
1613         #[inline]
1614         fn index(&self, index: ops::RangeInclusive<usize>) -> &str {
1615             match index {
1616                 ops::RangeInclusive::Empty { .. } => "",
1617                 ops::RangeInclusive::NonEmpty { end, .. } if end == usize::max_value() =>
1618                     panic!("attempted to index slice up to maximum usize"),
1619                 ops::RangeInclusive::NonEmpty { start, end } =>
1620                     self.index(start .. end+1)
1621             }
1622         }
1623     }
1624     #[unstable(feature = "inclusive_range",
1625                reason = "recently added, follows RFC",
1626                issue = "28237")]
1627     impl ops::Index<ops::RangeToInclusive<usize>> for str {
1628         type Output = str;
1629
1630         #[inline]
1631         fn index(&self, index: ops::RangeToInclusive<usize>) -> &str {
1632             self.index(0...index.end)
1633         }
1634     }
1635
1636     #[unstable(feature = "inclusive_range",
1637                reason = "recently added, follows RFC",
1638                issue = "28237")]
1639     impl ops::IndexMut<ops::RangeInclusive<usize>> for str {
1640         #[inline]
1641         fn index_mut(&mut self, index: ops::RangeInclusive<usize>) -> &mut str {
1642             match index {
1643                 ops::RangeInclusive::Empty { .. } => &mut self[0..0], // `&mut ""` doesn't work
1644                 ops::RangeInclusive::NonEmpty { end, .. } if end == usize::max_value() =>
1645                     panic!("attempted to index str up to maximum usize"),
1646                     ops::RangeInclusive::NonEmpty { start, end } =>
1647                         self.index_mut(start .. end+1)
1648             }
1649         }
1650     }
1651     #[unstable(feature = "inclusive_range",
1652                reason = "recently added, follows RFC",
1653                issue = "28237")]
1654     impl ops::IndexMut<ops::RangeToInclusive<usize>> for str {
1655         #[inline]
1656         fn index_mut(&mut self, index: ops::RangeToInclusive<usize>) -> &mut str {
1657             self.index_mut(0...index.end)
1658         }
1659     }
1660 }
1661
1662 /// Methods for string slices
1663 #[allow(missing_docs)]
1664 #[doc(hidden)]
1665 #[unstable(feature = "core_str_ext",
1666            reason = "stable interface provided by `impl str` in later crates",
1667            issue = "32110")]
1668 pub trait StrExt {
1669     // NB there are no docs here are they're all located on the StrExt trait in
1670     // libcollections, not here.
1671
1672     #[stable(feature = "core", since = "1.6.0")]
1673     fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool;
1674     #[stable(feature = "core", since = "1.6.0")]
1675     fn chars(&self) -> Chars;
1676     #[stable(feature = "core", since = "1.6.0")]
1677     fn bytes(&self) -> Bytes;
1678     #[stable(feature = "core", since = "1.6.0")]
1679     fn char_indices(&self) -> CharIndices;
1680     #[stable(feature = "core", since = "1.6.0")]
1681     fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P>;
1682     #[stable(feature = "core", since = "1.6.0")]
1683     fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P>
1684         where P::Searcher: ReverseSearcher<'a>;
1685     #[stable(feature = "core", since = "1.6.0")]
1686     fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P>;
1687     #[stable(feature = "core", since = "1.6.0")]
1688     fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P>
1689         where P::Searcher: ReverseSearcher<'a>;
1690     #[stable(feature = "core", since = "1.6.0")]
1691     fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P>;
1692     #[stable(feature = "core", since = "1.6.0")]
1693     fn rsplit_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplitTerminator<'a, P>
1694         where P::Searcher: ReverseSearcher<'a>;
1695     #[stable(feature = "core", since = "1.6.0")]
1696     fn matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> Matches<'a, P>;
1697     #[stable(feature = "core", since = "1.6.0")]
1698     fn rmatches<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatches<'a, P>
1699         where P::Searcher: ReverseSearcher<'a>;
1700     #[stable(feature = "core", since = "1.6.0")]
1701     fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P>;
1702     #[stable(feature = "core", since = "1.6.0")]
1703     fn rmatch_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatchIndices<'a, P>
1704         where P::Searcher: ReverseSearcher<'a>;
1705     #[stable(feature = "core", since = "1.6.0")]
1706     fn lines(&self) -> Lines;
1707     #[stable(feature = "core", since = "1.6.0")]
1708     #[rustc_deprecated(since = "1.6.0", reason = "use lines() instead now")]
1709     #[allow(deprecated)]
1710     fn lines_any(&self) -> LinesAny;
1711     #[stable(feature = "core", since = "1.6.0")]
1712     unsafe fn slice_unchecked(&self, begin: usize, end: usize) -> &str;
1713     #[stable(feature = "core", since = "1.6.0")]
1714     unsafe fn slice_mut_unchecked(&mut self, begin: usize, end: usize) -> &mut str;
1715     #[stable(feature = "core", since = "1.6.0")]
1716     fn starts_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool;
1717     #[stable(feature = "core", since = "1.6.0")]
1718     fn ends_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool
1719         where P::Searcher: ReverseSearcher<'a>;
1720     #[stable(feature = "core", since = "1.6.0")]
1721     fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
1722         where P::Searcher: DoubleEndedSearcher<'a>;
1723     #[stable(feature = "core", since = "1.6.0")]
1724     fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str;
1725     #[stable(feature = "core", since = "1.6.0")]
1726     fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
1727         where P::Searcher: ReverseSearcher<'a>;
1728     #[stable(feature = "is_char_boundary", since = "1.9.0")]
1729     fn is_char_boundary(&self, index: usize) -> bool;
1730     #[stable(feature = "core", since = "1.6.0")]
1731     fn as_bytes(&self) -> &[u8];
1732     #[stable(feature = "core", since = "1.6.0")]
1733     fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>;
1734     #[stable(feature = "core", since = "1.6.0")]
1735     fn rfind<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>
1736         where P::Searcher: ReverseSearcher<'a>;
1737     fn find_str<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>;
1738     #[stable(feature = "core", since = "1.6.0")]
1739     fn split_at(&self, mid: usize) -> (&str, &str);
1740     #[stable(feature = "core", since = "1.6.0")]
1741     fn split_at_mut(&mut self, mid: usize) -> (&mut str, &mut str);
1742     #[stable(feature = "core", since = "1.6.0")]
1743     fn as_ptr(&self) -> *const u8;
1744     #[stable(feature = "core", since = "1.6.0")]
1745     fn len(&self) -> usize;
1746     #[stable(feature = "core", since = "1.6.0")]
1747     fn is_empty(&self) -> bool;
1748     #[stable(feature = "core", since = "1.6.0")]
1749     fn parse<T: FromStr>(&self) -> Result<T, T::Err>;
1750 }
1751
1752 // truncate `&str` to length at most equal to `max`
1753 // return `true` if it were truncated, and the new str.
1754 fn truncate_to_char_boundary(s: &str, mut max: usize) -> (bool, &str) {
1755     if max >= s.len() {
1756         (false, s)
1757     } else {
1758         while !s.is_char_boundary(max) {
1759             max -= 1;
1760         }
1761         (true, &s[..max])
1762     }
1763 }
1764
1765 #[inline(never)]
1766 #[cold]
1767 fn slice_error_fail(s: &str, begin: usize, end: usize) -> ! {
1768     const MAX_DISPLAY_LENGTH: usize = 256;
1769     let (truncated, s_trunc) = truncate_to_char_boundary(s, MAX_DISPLAY_LENGTH);
1770     let ellipsis = if truncated { "[...]" } else { "" };
1771
1772     // 1. out of bounds
1773     if begin > s.len() || end > s.len() {
1774         let oob_index = if begin > s.len() { begin } else { end };
1775         panic!("byte index {} is out of bounds of `{}`{}", oob_index, s_trunc, ellipsis);
1776     }
1777
1778     // 2. begin <= end
1779     assert!(begin <= end, "begin <= end ({} <= {}) when slicing `{}`{}",
1780             begin, end, s_trunc, ellipsis);
1781
1782     // 3. character boundary
1783     let index = if !s.is_char_boundary(begin) { begin } else { end };
1784     // find the character
1785     let mut char_start = index;
1786     while !s.is_char_boundary(char_start) {
1787         char_start -= 1;
1788     }
1789     // `char_start` must be less than len and a char boundary
1790     let ch = s[char_start..].chars().next().unwrap();
1791     let char_range = char_start .. char_start + ch.len_utf8();
1792     panic!("byte index {} is not a char boundary; it is inside {:?} (bytes {:?}) of `{}`{}",
1793            index, ch, char_range, s_trunc, ellipsis);
1794 }
1795
1796 #[stable(feature = "core", since = "1.6.0")]
1797 impl StrExt for str {
1798     #[inline]
1799     fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
1800         pat.is_contained_in(self)
1801     }
1802
1803     #[inline]
1804     fn chars(&self) -> Chars {
1805         Chars{iter: self.as_bytes().iter()}
1806     }
1807
1808     #[inline]
1809     fn bytes(&self) -> Bytes {
1810         Bytes(self.as_bytes().iter().cloned())
1811     }
1812
1813     #[inline]
1814     fn char_indices(&self) -> CharIndices {
1815         CharIndices { front_offset: 0, iter: self.chars() }
1816     }
1817
1818     #[inline]
1819     fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P> {
1820         Split(SplitInternal {
1821             start: 0,
1822             end: self.len(),
1823             matcher: pat.into_searcher(self),
1824             allow_trailing_empty: true,
1825             finished: false,
1826         })
1827     }
1828
1829     #[inline]
1830     fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P>
1831         where P::Searcher: ReverseSearcher<'a>
1832     {
1833         RSplit(self.split(pat).0)
1834     }
1835
1836     #[inline]
1837     fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P> {
1838         SplitN(SplitNInternal {
1839             iter: self.split(pat).0,
1840             count: count,
1841         })
1842     }
1843
1844     #[inline]
1845     fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P>
1846         where P::Searcher: ReverseSearcher<'a>
1847     {
1848         RSplitN(self.splitn(count, pat).0)
1849     }
1850
1851     #[inline]
1852     fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P> {
1853         SplitTerminator(SplitInternal {
1854             allow_trailing_empty: false,
1855             ..self.split(pat).0
1856         })
1857     }
1858
1859     #[inline]
1860     fn rsplit_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplitTerminator<'a, P>
1861         where P::Searcher: ReverseSearcher<'a>
1862     {
1863         RSplitTerminator(self.split_terminator(pat).0)
1864     }
1865
1866     #[inline]
1867     fn matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> Matches<'a, P> {
1868         Matches(MatchesInternal(pat.into_searcher(self)))
1869     }
1870
1871     #[inline]
1872     fn rmatches<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatches<'a, P>
1873         where P::Searcher: ReverseSearcher<'a>
1874     {
1875         RMatches(self.matches(pat).0)
1876     }
1877
1878     #[inline]
1879     fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P> {
1880         MatchIndices(MatchIndicesInternal(pat.into_searcher(self)))
1881     }
1882
1883     #[inline]
1884     fn rmatch_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatchIndices<'a, P>
1885         where P::Searcher: ReverseSearcher<'a>
1886     {
1887         RMatchIndices(self.match_indices(pat).0)
1888     }
1889     #[inline]
1890     fn lines(&self) -> Lines {
1891         Lines(self.split_terminator('\n').map(LinesAnyMap))
1892     }
1893
1894     #[inline]
1895     #[allow(deprecated)]
1896     fn lines_any(&self) -> LinesAny {
1897         LinesAny(self.lines())
1898     }
1899
1900     #[inline]
1901     unsafe fn slice_unchecked(&self, begin: usize, end: usize) -> &str {
1902         let ptr = self.as_ptr().offset(begin as isize);
1903         let len = end - begin;
1904         from_utf8_unchecked(slice::from_raw_parts(ptr, len))
1905     }
1906
1907     #[inline]
1908     unsafe fn slice_mut_unchecked(&mut self, begin: usize, end: usize) -> &mut str {
1909         let ptr = self.as_ptr().offset(begin as isize);
1910         let len = end - begin;
1911         mem::transmute(slice::from_raw_parts_mut(ptr as *mut u8, len))
1912     }
1913
1914     #[inline]
1915     fn starts_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
1916         pat.is_prefix_of(self)
1917     }
1918
1919     #[inline]
1920     fn ends_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool
1921         where P::Searcher: ReverseSearcher<'a>
1922     {
1923         pat.is_suffix_of(self)
1924     }
1925
1926     #[inline]
1927     fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
1928         where P::Searcher: DoubleEndedSearcher<'a>
1929     {
1930         let mut i = 0;
1931         let mut j = 0;
1932         let mut matcher = pat.into_searcher(self);
1933         if let Some((a, b)) = matcher.next_reject() {
1934             i = a;
1935             j = b; // Remember earliest known match, correct it below if
1936                    // last match is different
1937         }
1938         if let Some((_, b)) = matcher.next_reject_back() {
1939             j = b;
1940         }
1941         unsafe {
1942             // Searcher is known to return valid indices
1943             self.slice_unchecked(i, j)
1944         }
1945     }
1946
1947     #[inline]
1948     fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str {
1949         let mut i = self.len();
1950         let mut matcher = pat.into_searcher(self);
1951         if let Some((a, _)) = matcher.next_reject() {
1952             i = a;
1953         }
1954         unsafe {
1955             // Searcher is known to return valid indices
1956             self.slice_unchecked(i, self.len())
1957         }
1958     }
1959
1960     #[inline]
1961     fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
1962         where P::Searcher: ReverseSearcher<'a>
1963     {
1964         let mut j = 0;
1965         let mut matcher = pat.into_searcher(self);
1966         if let Some((_, b)) = matcher.next_reject_back() {
1967             j = b;
1968         }
1969         unsafe {
1970             // Searcher is known to return valid indices
1971             self.slice_unchecked(0, j)
1972         }
1973     }
1974
1975     #[inline]
1976     fn is_char_boundary(&self, index: usize) -> bool {
1977         // 0 and len are always ok.
1978         // Test for 0 explicitly so that it can optimize out the check
1979         // easily and skip reading string data for that case.
1980         if index == 0 || index == self.len() { return true; }
1981         match self.as_bytes().get(index) {
1982             None => false,
1983             // This is bit magic equivalent to: b < 128 || b >= 192
1984             Some(&b) => (b as i8) >= -0x40,
1985         }
1986     }
1987
1988     #[inline]
1989     fn as_bytes(&self) -> &[u8] {
1990         unsafe { mem::transmute(self) }
1991     }
1992
1993     fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> {
1994         pat.into_searcher(self).next_match().map(|(i, _)| i)
1995     }
1996
1997     fn rfind<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>
1998         where P::Searcher: ReverseSearcher<'a>
1999     {
2000         pat.into_searcher(self).next_match_back().map(|(i, _)| i)
2001     }
2002
2003     fn find_str<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> {
2004         self.find(pat)
2005     }
2006
2007     #[inline]
2008     fn split_at(&self, mid: usize) -> (&str, &str) {
2009         // is_char_boundary checks that the index is in [0, .len()]
2010         if self.is_char_boundary(mid) {
2011             unsafe {
2012                 (self.slice_unchecked(0, mid),
2013                  self.slice_unchecked(mid, self.len()))
2014             }
2015         } else {
2016             slice_error_fail(self, 0, mid)
2017         }
2018     }
2019
2020     fn split_at_mut(&mut self, mid: usize) -> (&mut str, &mut str) {
2021         // is_char_boundary checks that the index is in [0, .len()]
2022         if self.is_char_boundary(mid) {
2023             let len = self.len();
2024             let ptr = self.as_ptr() as *mut u8;
2025             unsafe {
2026                 (from_raw_parts_mut(ptr, mid),
2027                  from_raw_parts_mut(ptr.offset(mid as isize), len - mid))
2028             }
2029         } else {
2030             slice_error_fail(self, 0, mid)
2031         }
2032     }
2033
2034     #[inline]
2035     fn as_ptr(&self) -> *const u8 {
2036         self as *const str as *const u8
2037     }
2038
2039     #[inline]
2040     fn len(&self) -> usize {
2041         self.as_bytes().len()
2042     }
2043
2044     #[inline]
2045     fn is_empty(&self) -> bool { self.len() == 0 }
2046
2047     #[inline]
2048     fn parse<T: FromStr>(&self) -> Result<T, T::Err> { FromStr::from_str(self) }
2049 }
2050
2051 #[stable(feature = "rust1", since = "1.0.0")]
2052 impl AsRef<[u8]> for str {
2053     #[inline]
2054     fn as_ref(&self) -> &[u8] {
2055         self.as_bytes()
2056     }
2057 }
2058
2059 #[stable(feature = "rust1", since = "1.0.0")]
2060 impl<'a> Default for &'a str {
2061     /// Creates an empty str
2062     fn default() -> &'a str { "" }
2063 }