src/libcore/str/mod.rs

   1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10
  11 //! String manipulation
  12 //!
  13 //! For more details, see std::str
  14
  15 #![stable(feature = "rust1", since = "1.0.0")]
  16
  17 use self::pattern::Pattern;
  18 use self::pattern::{Searcher, ReverseSearcher, DoubleEndedSearcher};
  19
  20 use char;
  21 use convert::TryFrom;
  22 use fmt;
  23 use iter::{Map, Cloned, FusedIterator};
  24 use mem;
  25 use slice;
  26
  27 pub mod pattern;
  28
  29 /// A trait to abstract the idea of creating a new instance of a type from a
  30 /// string.
  31 ///
  32 /// `FromStr`'s [`from_str`] method is often used implicitly, through
  33 /// [`str`]'s [`parse`] method. See [`parse`]'s documentation for examples.
  34 ///
  35 /// [`from_str`]: #tymethod.from_str
  36 /// [`str`]: ../../std/primitive.str.html
  37 /// [`parse`]: ../../std/primitive.str.html#method.parse
  38 ///
  39 /// # Examples
  40 ///
  41 /// Basic implementation of `FromStr` on an example `Point` type:
  42 ///
  43 /// ```
  44 /// use std::str::FromStr;
  45 /// use std::num::ParseIntError;
  46 ///
  47 /// #[derive(Debug, PartialEq)]
  48 /// struct Point {
  49 ///     x: i32,
  50 ///     y: i32
  51 /// }
  52 ///
  53 /// impl FromStr for Point {
  54 ///     type Err = ParseIntError;
  55 ///
  56 ///     fn from_str(s: &str) -> Result<Self, Self::Err> {
  57 ///         let coords: Vec<&str> = s.trim_matches(|p| p == '(' || p == ')' )
  58 ///                                  .split(",")
  59 ///                                  .collect();
  60 ///
  61 ///         let x_fromstr = coords[0].parse::<i32>()?;
  62 ///         let y_fromstr = coords[1].parse::<i32>()?;
  63 ///
  64 ///         Ok(Point { x: x_fromstr, y: y_fromstr })
  65 ///     }
  66 /// }
  67 ///
  68 /// let p = Point::from_str("(1,2)");
  69 /// assert_eq!(p.unwrap(), Point{ x: 1, y: 2} )
  70 /// ```
  71 #[stable(feature = "rust1", since = "1.0.0")]
  72 pub trait FromStr: Sized {
  73     /// The associated error which can be returned from parsing.
  74     #[stable(feature = "rust1", since = "1.0.0")]
  75     type Err;
  76
  77     /// Parses a string `s` to return a value of this type.
  78     ///
  79     /// If parsing succeeds, return the value inside `Ok`, otherwise
  80     /// when the string is ill-formatted return an error specific to the
  81     /// inside `Err`. The error type is specific to implementation of the trait.
  82     ///
  83     /// # Examples
  84     ///
  85     /// Basic usage with [`i32`][ithirtytwo], a type that implements `FromStr`:
  86     ///
  87     /// [ithirtytwo]: ../../std/primitive.i32.html
  88     ///
  89     /// ```
  90     /// use std::str::FromStr;
  91     ///
  92     /// let s = "5";
  93     /// let x = i32::from_str(s).unwrap();
  94     ///
  95     /// assert_eq!(5, x);
  96     /// ```
  97     #[stable(feature = "rust1", since = "1.0.0")]
  98     fn from_str(s: &str) -> Result<Self, Self::Err>;
  99 }
 100
 101 #[stable(feature = "rust1", since = "1.0.0")]
 102 impl FromStr for bool {
 103     type Err = ParseBoolError;
 104
 105     /// Parse a `bool` from a string.
 106     ///
 107     /// Yields a `Result<bool, ParseBoolError>`, because `s` may or may not
 108     /// actually be parseable.
 109     ///
 110     /// # Examples
 111     ///
 112     /// ```
 113     /// use std::str::FromStr;
 114     ///
 115     /// assert_eq!(FromStr::from_str("true"), Ok(true));
 116     /// assert_eq!(FromStr::from_str("false"), Ok(false));
 117     /// assert!(<bool as FromStr>::from_str("not even a boolean").is_err());
 118     /// ```
 119     ///
 120     /// Note, in many cases, the `.parse()` method on `str` is more proper.
 121     ///
 122     /// ```
 123     /// assert_eq!("true".parse(), Ok(true));
 124     /// assert_eq!("false".parse(), Ok(false));
 125     /// assert!("not even a boolean".parse::<bool>().is_err());
 126     /// ```
 127     #[inline]
 128     fn from_str(s: &str) -> Result<bool, ParseBoolError> {
 129         match s {
 130             "true"  => Ok(true),
 131             "false" => Ok(false),
 132             _       => Err(ParseBoolError { _priv: () }),
 133         }
 134     }
 135 }
 136
 137 /// An error returned when parsing a `bool` using [`from_str`] fails
 138 ///
 139 /// [`from_str`]: ../../std/primitive.bool.html#method.from_str
 140 #[derive(Debug, Clone, PartialEq, Eq)]
 141 #[stable(feature = "rust1", since = "1.0.0")]
 142 pub struct ParseBoolError { _priv: () }
 143
 144 #[stable(feature = "rust1", since = "1.0.0")]
 145 impl fmt::Display for ParseBoolError {
 146     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 147         "provided string was not `true` or `false`".fmt(f)
 148     }
 149 }
 150
 151 /*
 152 Section: Creating a string
 153 */
 154
 155 /// Errors which can occur when attempting to interpret a sequence of `u8`
 156 /// as a string.
 157 ///
 158 /// As such, the `from_utf8` family of functions and methods for both `String`s
 159 /// and `&str`s make use of this error, for example.
 160 #[derive(Copy, Eq, PartialEq, Clone, Debug)]
 161 #[stable(feature = "rust1", since = "1.0.0")]
 162 pub struct Utf8Error {
 163     valid_up_to: usize,
 164     error_len: Option<u8>,
 165 }
 166
 167 impl Utf8Error {
 168     /// Returns the index in the given string up to which valid UTF-8 was
 169     /// verified.
 170     ///
 171     /// It is the maximum index such that `from_utf8(&input[..index])`
 172     /// would return `Ok(_)`.
 173     ///
 174     /// # Examples
 175     ///
 176     /// Basic usage:
 177     ///
 178     /// ```
 179     /// use std::str;
 180     ///
 181     /// // some invalid bytes, in a vector
 182     /// let sparkle_heart = vec![0, 159, 146, 150];
 183     ///
 184     /// // std::str::from_utf8 returns a Utf8Error
 185     /// let error = str::from_utf8(&sparkle_heart).unwrap_err();
 186     ///
 187     /// // the second byte is invalid here
 188     /// assert_eq!(1, error.valid_up_to());
 189     /// ```
 190     #[stable(feature = "utf8_error", since = "1.5.0")]
 191     pub fn valid_up_to(&self) -> usize { self.valid_up_to }
 192
 193     /// Provide more information about the failure:
 194     ///
 195     /// * `None`: the end of the input was reached unexpectedly.
 196     ///   `self.valid_up_to()` is 1 to 3 bytes from the end of the input.
 197     ///   If a byte stream (such as a file or a network socket) is being decoded incrementally,
 198     ///   this could be a valid `char` whose UTF-8 byte sequence is spanning multiple chunks.
 199     ///
 200     /// * `Some(len)`: an unexpected byte was encountered.
 201     ///   The length provided is that of the invalid byte sequence
 202     ///   that starts at the index given by `valid_up_to()`.
 203     ///   Decoding should resume after that sequence
 204     ///   (after inserting a U+FFFD REPLACEMENT CHARACTER) in case of lossy decoding.
 205     #[unstable(feature = "utf8_error_error_len", reason ="new", issue = "40494")]
 206     pub fn error_len(&self) -> Option<usize> {
 207         self.error_len.map(|len| len as usize)
 208     }
 209 }
 210
 211 /// Converts a slice of bytes to a string slice.
 212 ///
 213 /// A string slice (`&str`) is made of bytes (`u8`), and a byte slice (`&[u8]`)
 214 /// is made of bytes, so this function converts between the two. Not all byte
 215 /// slices are valid string slices, however: `&str` requires that it is valid
 216 /// UTF-8. `from_utf8()` checks to ensure that the bytes are valid UTF-8, and
 217 /// then does the conversion.
 218 ///
 219 /// If you are sure that the byte slice is valid UTF-8, and you don't want to
 220 /// incur the overhead of the validity check, there is an unsafe version of
 221 /// this function, [`from_utf8_unchecked`][fromutf8u], which has the same
 222 /// behavior but skips the check.
 223 ///
 224 /// [fromutf8u]: fn.from_utf8_unchecked.html
 225 ///
 226 /// If you need a `String` instead of a `&str`, consider
 227 /// [`String::from_utf8`][string].
 228 ///
 229 /// [string]: ../../std/string/struct.String.html#method.from_utf8
 230 ///
 231 /// Because you can stack-allocate a `[u8; N]`, and you can take a `&[u8]` of
 232 /// it, this function is one way to have a stack-allocated string. There is
 233 /// an example of this in the examples section below.
 234 ///
 235 /// # Errors
 236 ///
 237 /// Returns `Err` if the slice is not UTF-8 with a description as to why the
 238 /// provided slice is not UTF-8.
 239 ///
 240 /// # Examples
 241 ///
 242 /// Basic usage:
 243 ///
 244 /// ```
 245 /// use std::str;
 246 ///
 247 /// // some bytes, in a vector
 248 /// let sparkle_heart = vec![240, 159, 146, 150];
 249 ///
 250 /// // We know these bytes are valid, so just use `unwrap()`.
 251 /// let sparkle_heart = str::from_utf8(&sparkle_heart).unwrap();
 252 ///
 253 /// assert_eq!("💖", sparkle_heart);
 254 /// ```
 255 ///
 256 /// Incorrect bytes:
 257 ///
 258 /// ```
 259 /// use std::str;
 260 ///
 261 /// // some invalid bytes, in a vector
 262 /// let sparkle_heart = vec![0, 159, 146, 150];
 263 ///
 264 /// assert!(str::from_utf8(&sparkle_heart).is_err());
 265 /// ```
 266 ///
 267 /// See the docs for [`Utf8Error`][error] for more details on the kinds of
 268 /// errors that can be returned.
 269 ///
 270 /// [error]: struct.Utf8Error.html
 271 ///
 272 /// A "stack allocated string":
 273 ///
 274 /// ```
 275 /// use std::str;
 276 ///
 277 /// // some bytes, in a stack-allocated array
 278 /// let sparkle_heart = [240, 159, 146, 150];
 279 ///
 280 /// // We know these bytes are valid, so just use `unwrap()`.
 281 /// let sparkle_heart = str::from_utf8(&sparkle_heart).unwrap();
 282 ///
 283 /// assert_eq!("💖", sparkle_heart);
 284 /// ```
 285 #[stable(feature = "rust1", since = "1.0.0")]
 286 pub fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
 287     run_utf8_validation(v)?;
 288     Ok(unsafe { from_utf8_unchecked(v) })
 289 }
 290
 291 /// Forms a str from a pointer and a length.
 292 ///
 293 /// The `len` argument is the number of bytes in the string.
 294 ///
 295 /// # Safety
 296 ///
 297 /// This function is unsafe as there is no guarantee that the given pointer is
 298 /// valid for `len` bytes, nor whether the lifetime inferred is a suitable
 299 /// lifetime for the returned str.
 300 ///
 301 /// The data must be valid UTF-8
 302 ///
 303 /// `p` must be non-null, even for zero-length str.
 304 ///
 305 /// # Caveat
 306 ///
 307 /// The lifetime for the returned str is inferred from its usage. To
 308 /// prevent accidental misuse, it's suggested to tie the lifetime to whichever
 309 /// source lifetime is safe in the context, such as by providing a helper
 310 /// function taking the lifetime of a host value for the str, or by explicit
 311 /// annotation.
 312 /// Performs the same functionality as `from_raw_parts`, except that a mutable
 313 /// str is returned.
 314 ///
 315 unsafe fn from_raw_parts_mut<'a>(p: *mut u8, len: usize) -> &'a mut str {
 316     mem::transmute::<&mut [u8], &mut str>(slice::from_raw_parts_mut(p, len))
 317 }
 318
 319 /// Converts a slice of bytes to a string slice without checking
 320 /// that the string contains valid UTF-8.
 321 ///
 322 /// See the safe version, [`from_utf8`][fromutf8], for more information.
 323 ///
 324 /// [fromutf8]: fn.from_utf8.html
 325 ///
 326 /// # Safety
 327 ///
 328 /// This function is unsafe because it does not check that the bytes passed to
 329 /// it are valid UTF-8. If this constraint is violated, undefined behavior
 330 /// results, as the rest of Rust assumes that [`&str`]s are valid UTF-8.
 331 ///
 332 /// [`&str`]: ../../std/primitive.str.html
 333 ///
 334 /// # Examples
 335 ///
 336 /// Basic usage:
 337 ///
 338 /// ```
 339 /// use std::str;
 340 ///
 341 /// // some bytes, in a vector
 342 /// let sparkle_heart = vec![240, 159, 146, 150];
 343 ///
 344 /// let sparkle_heart = unsafe {
 345 ///     str::from_utf8_unchecked(&sparkle_heart)
 346 /// };
 347 ///
 348 /// assert_eq!("💖", sparkle_heart);
 349 /// ```
 350 #[inline(always)]
 351 #[stable(feature = "rust1", since = "1.0.0")]
 352 pub unsafe fn from_utf8_unchecked(v: &[u8]) -> &str {
 353     mem::transmute(v)
 354 }
 355
 356 #[stable(feature = "rust1", since = "1.0.0")]
 357 impl fmt::Display for Utf8Error {
 358     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 359         if let Some(error_len) = self.error_len {
 360             write!(f, "invalid utf-8 sequence of {} bytes from index {}",
 361                    error_len, self.valid_up_to)
 362         } else {
 363             write!(f, "incomplete utf-8 byte sequence from index {}", self.valid_up_to)
 364         }
 365     }
 366 }
 367
 368 /*
 369 Section: Iterators
 370 */
 371
 372 /// Iterator for the char (representing *Unicode Scalar Values*) of a string.
 373 ///
 374 /// Created with the method [`chars`].
 375 ///
 376 /// [`chars`]: ../../std/primitive.str.html#method.chars
 377 #[derive(Clone, Debug)]
 378 #[stable(feature = "rust1", since = "1.0.0")]
 379 pub struct Chars<'a> {
 380     iter: slice::Iter<'a, u8>
 381 }
 382
 383 /// Returns the initial codepoint accumulator for the first byte.
 384 /// The first byte is special, only want bottom 5 bits for width 2, 4 bits
 385 /// for width 3, and 3 bits for width 4.
 386 #[inline]
 387 fn utf8_first_byte(byte: u8, width: u32) -> u32 { (byte & (0x7F >> width)) as u32 }
 388
 389 /// Returns the value of `ch` updated with continuation byte `byte`.
 390 #[inline]
 391 fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 { (ch << 6) | (byte & CONT_MASK) as u32 }
 392
 393 /// Checks whether the byte is a UTF-8 continuation byte (i.e. starts with the
 394 /// bits `10`).
 395 #[inline]
 396 fn utf8_is_cont_byte(byte: u8) -> bool { (byte & !CONT_MASK) == TAG_CONT_U8 }
 397
 398 #[inline]
 399 fn unwrap_or_0(opt: Option<&u8>) -> u8 {
 400     match opt {
 401         Some(&byte) => byte,
 402         None => 0,
 403     }
 404 }
 405
 406 /// Reads the next code point out of a byte iterator (assuming a
 407 /// UTF-8-like encoding).
 408 #[unstable(feature = "str_internals", issue = "0")]
 409 #[inline]
 410 pub fn next_code_point<'a, I: Iterator<Item = &'a u8>>(bytes: &mut I) -> Option<u32> {
 411     // Decode UTF-8
 412     let x = match bytes.next() {
 413         None => return None,
 414         Some(&next_byte) if next_byte < 128 => return Some(next_byte as u32),
 415         Some(&next_byte) => next_byte,
 416     };
 417
 418     // Multibyte case follows
 419     // Decode from a byte combination out of: [[[x y] z] w]
 420     // NOTE: Performance is sensitive to the exact formulation here
 421     let init = utf8_first_byte(x, 2);
 422     let y = unwrap_or_0(bytes.next());
 423     let mut ch = utf8_acc_cont_byte(init, y);
 424     if x >= 0xE0 {
 425         // [[x y z] w] case
 426         // 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid
 427         let z = unwrap_or_0(bytes.next());
 428         let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z);
 429         ch = init << 12 | y_z;
 430         if x >= 0xF0 {
 431             // [x y z w] case
 432             // use only the lower 3 bits of `init`
 433             let w = unwrap_or_0(bytes.next());
 434             ch = (init & 7) << 18 | utf8_acc_cont_byte(y_z, w);
 435         }
 436     }
 437
 438     Some(ch)
 439 }
 440
 441 /// Reads the last code point out of a byte iterator (assuming a
 442 /// UTF-8-like encoding).
 443 #[inline]
 444 fn next_code_point_reverse<'a, I>(bytes: &mut I) -> Option<u32>
 445     where I: DoubleEndedIterator<Item = &'a u8>,
 446 {
 447     // Decode UTF-8
 448     let w = match bytes.next_back() {
 449         None => return None,
 450         Some(&next_byte) if next_byte < 128 => return Some(next_byte as u32),
 451         Some(&back_byte) => back_byte,
 452     };
 453
 454     // Multibyte case follows
 455     // Decode from a byte combination out of: [x [y [z w]]]
 456     let mut ch;
 457     let z = unwrap_or_0(bytes.next_back());
 458     ch = utf8_first_byte(z, 2);
 459     if utf8_is_cont_byte(z) {
 460         let y = unwrap_or_0(bytes.next_back());
 461         ch = utf8_first_byte(y, 3);
 462         if utf8_is_cont_byte(y) {
 463             let x = unwrap_or_0(bytes.next_back());
 464             ch = utf8_first_byte(x, 4);
 465             ch = utf8_acc_cont_byte(ch, y);
 466         }
 467         ch = utf8_acc_cont_byte(ch, z);
 468     }
 469     ch = utf8_acc_cont_byte(ch, w);
 470
 471     Some(ch)
 472 }
 473
 474 #[stable(feature = "rust1", since = "1.0.0")]
 475 impl<'a> Iterator for Chars<'a> {
 476     type Item = char;
 477
 478     #[inline]
 479     fn next(&mut self) -> Option<char> {
 480         next_code_point(&mut self.iter).map(|ch| {
 481             // str invariant says `ch` is a valid Unicode Scalar Value
 482             unsafe {
 483                 char::from_u32_unchecked(ch)
 484             }
 485         })
 486     }
 487
 488     #[inline]
 489     fn count(self) -> usize {
 490         // length in `char` is equal to the number of non-continuation bytes
 491         let bytes_len = self.iter.len();
 492         let mut cont_bytes = 0;
 493         for &byte in self.iter {
 494             cont_bytes += utf8_is_cont_byte(byte) as usize;
 495         }
 496         bytes_len - cont_bytes
 497     }
 498
 499     #[inline]
 500     fn size_hint(&self) -> (usize, Option<usize>) {
 501         let len = self.iter.len();
 502         // `(len + 3)` can't overflow, because we know that the `slice::Iter`
 503         // belongs to a slice in memory which has a maximum length of
 504         // `isize::MAX` (that's well below `usize::MAX`).
 505         ((len + 3) / 4, Some(len))
 506     }
 507
 508     #[inline]
 509     fn last(mut self) -> Option<char> {
 510         // No need to go through the entire string.
 511         self.next_back()
 512     }
 513 }
 514
 515 #[stable(feature = "rust1", since = "1.0.0")]
 516 impl<'a> DoubleEndedIterator for Chars<'a> {
 517     #[inline]
 518     fn next_back(&mut self) -> Option<char> {
 519         next_code_point_reverse(&mut self.iter).map(|ch| {
 520             // str invariant says `ch` is a valid Unicode Scalar Value
 521             unsafe {
 522                 char::from_u32_unchecked(ch)
 523             }
 524         })
 525     }
 526 }
 527
 528 #[unstable(feature = "fused", issue = "35602")]
 529 impl<'a> FusedIterator for Chars<'a> {}
 530
 531 impl<'a> Chars<'a> {
 532     /// View the underlying data as a subslice of the original data.
 533     ///
 534     /// This has the same lifetime as the original slice, and so the
 535     /// iterator can continue to be used while this exists.
 536     ///
 537     /// # Examples
 538     ///
 539     /// ```
 540     /// let mut chars = "abc".chars();
 541     ///
 542     /// assert_eq!(chars.as_str(), "abc");
 543     /// chars.next();
 544     /// assert_eq!(chars.as_str(), "bc");
 545     /// chars.next();
 546     /// chars.next();
 547     /// assert_eq!(chars.as_str(), "");
 548     /// ```
 549     #[stable(feature = "iter_to_slice", since = "1.4.0")]
 550     #[inline]
 551     pub fn as_str(&self) -> &'a str {
 552         unsafe { from_utf8_unchecked(self.iter.as_slice()) }
 553     }
 554 }
 555
 556 /// An iterator over the [`char`]s of a string slice, and their positions.
 557 ///
 558 /// [`char`]: ../../std/primitive.char.html
 559 ///
 560 /// This struct is created by the [`char_indices()`] method on [`str`].
 561 /// See its documentation for more.
 562 ///
 563 /// [`char_indices()`]: ../../std/primitive.str.html#method.char_indices
 564 /// [`str`]: ../../std/primitive.str.html
 565 #[derive(Clone, Debug)]
 566 #[stable(feature = "rust1", since = "1.0.0")]
 567 pub struct CharIndices<'a> {
 568     front_offset: usize,
 569     iter: Chars<'a>,
 570 }
 571
 572 #[stable(feature = "rust1", since = "1.0.0")]
 573 impl<'a> Iterator for CharIndices<'a> {
 574     type Item = (usize, char);
 575
 576     #[inline]
 577     fn next(&mut self) -> Option<(usize, char)> {
 578         let pre_len = self.iter.iter.len();
 579         match self.iter.next() {
 580             None => None,
 581             Some(ch) => {
 582                 let index = self.front_offset;
 583                 let len = self.iter.iter.len();
 584                 self.front_offset += pre_len - len;
 585                 Some((index, ch))
 586             }
 587         }
 588     }
 589
 590     #[inline]
 591     fn count(self) -> usize {
 592         self.iter.count()
 593     }
 594
 595     #[inline]
 596     fn size_hint(&self) -> (usize, Option<usize>) {
 597         self.iter.size_hint()
 598     }
 599
 600     #[inline]
 601     fn last(mut self) -> Option<(usize, char)> {
 602         // No need to go through the entire string.
 603         self.next_back()
 604     }
 605 }
 606
 607 #[stable(feature = "rust1", since = "1.0.0")]
 608 impl<'a> DoubleEndedIterator for CharIndices<'a> {
 609     #[inline]
 610     fn next_back(&mut self) -> Option<(usize, char)> {
 611         match self.iter.next_back() {
 612             None => None,
 613             Some(ch) => {
 614                 let index = self.front_offset + self.iter.iter.len();
 615                 Some((index, ch))
 616             }
 617         }
 618     }
 619 }
 620
 621 #[unstable(feature = "fused", issue = "35602")]
 622 impl<'a> FusedIterator for CharIndices<'a> {}
 623
 624 impl<'a> CharIndices<'a> {
 625     /// View the underlying data as a subslice of the original data.
 626     ///
 627     /// This has the same lifetime as the original slice, and so the
 628     /// iterator can continue to be used while this exists.
 629     #[stable(feature = "iter_to_slice", since = "1.4.0")]
 630     #[inline]
 631     pub fn as_str(&self) -> &'a str {
 632         self.iter.as_str()
 633     }
 634 }
 635
 636 /// An iterator over the bytes of a string slice.
 637 ///
 638 /// This struct is created by the [`bytes()`] method on [`str`].
 639 /// See its documentation for more.
 640 ///
 641 /// [`bytes()`]: ../../std/primitive.str.html#method.bytes
 642 /// [`str`]: ../../std/primitive.str.html
 643 #[stable(feature = "rust1", since = "1.0.0")]
 644 #[derive(Clone, Debug)]
 645 pub struct Bytes<'a>(Cloned<slice::Iter<'a, u8>>);
 646
 647 #[stable(feature = "rust1", since = "1.0.0")]
 648 impl<'a> Iterator for Bytes<'a> {
 649     type Item = u8;
 650
 651     #[inline]
 652     fn next(&mut self) -> Option<u8> {
 653         self.0.next()
 654     }
 655
 656     #[inline]
 657     fn size_hint(&self) -> (usize, Option<usize>) {
 658         self.0.size_hint()
 659     }
 660
 661     #[inline]
 662     fn count(self) -> usize {
 663         self.0.count()
 664     }
 665
 666     #[inline]
 667     fn last(self) -> Option<Self::Item> {
 668         self.0.last()
 669     }
 670
 671     #[inline]
 672     fn nth(&mut self, n: usize) -> Option<Self::Item> {
 673         self.0.nth(n)
 674     }
 675 }
 676
 677 #[stable(feature = "rust1", since = "1.0.0")]
 678 impl<'a> DoubleEndedIterator for Bytes<'a> {
 679     #[inline]
 680     fn next_back(&mut self) -> Option<u8> {
 681         self.0.next_back()
 682     }
 683 }
 684
 685 #[stable(feature = "rust1", since = "1.0.0")]
 686 impl<'a> ExactSizeIterator for Bytes<'a> {
 687     #[inline]
 688     fn len(&self) -> usize {
 689         self.0.len()
 690     }
 691
 692     #[inline]
 693     fn is_empty(&self) -> bool {
 694         self.0.is_empty()
 695     }
 696 }
 697
 698 #[unstable(feature = "fused", issue = "35602")]
 699 impl<'a> FusedIterator for Bytes<'a> {}
 700
 701 /// This macro generates a Clone impl for string pattern API
 702 /// wrapper types of the form X<'a, P>
 703 macro_rules! derive_pattern_clone {
 704     (clone $t:ident with |$s:ident| $e:expr) => {
 705         impl<'a, P: Pattern<'a>> Clone for $t<'a, P>
 706             where P::Searcher: Clone
 707         {
 708             fn clone(&self) -> Self {
 709                 let $s = self;
 710                 $e
 711             }
 712         }
 713     }
 714 }
 715
 716 /// This macro generates two public iterator structs
 717 /// wrapping a private internal one that makes use of the `Pattern` API.
 718 ///
 719 /// For all patterns `P: Pattern<'a>` the following items will be
 720 /// generated (generics omitted):
 721 ///
 722 /// struct $forward_iterator($internal_iterator);
 723 /// struct $reverse_iterator($internal_iterator);
 724 ///
 725 /// impl Iterator for $forward_iterator
 726 /// { /* internal ends up calling Searcher::next_match() */ }
 727 ///
 728 /// impl DoubleEndedIterator for $forward_iterator
 729 ///       where P::Searcher: DoubleEndedSearcher
 730 /// { /* internal ends up calling Searcher::next_match_back() */ }
 731 ///
 732 /// impl Iterator for $reverse_iterator
 733 ///       where P::Searcher: ReverseSearcher
 734 /// { /* internal ends up calling Searcher::next_match_back() */ }
 735 ///
 736 /// impl DoubleEndedIterator for $reverse_iterator
 737 ///       where P::Searcher: DoubleEndedSearcher
 738 /// { /* internal ends up calling Searcher::next_match() */ }
 739 ///
 740 /// The internal one is defined outside the macro, and has almost the same
 741 /// semantic as a DoubleEndedIterator by delegating to `pattern::Searcher` and
 742 /// `pattern::ReverseSearcher` for both forward and reverse iteration.
 743 ///
 744 /// "Almost", because a `Searcher` and a `ReverseSearcher` for a given
 745 /// `Pattern` might not return the same elements, so actually implementing
 746 /// `DoubleEndedIterator` for it would be incorrect.
 747 /// (See the docs in `str::pattern` for more details)
 748 ///
 749 /// However, the internal struct still represents a single ended iterator from
 750 /// either end, and depending on pattern is also a valid double ended iterator,
 751 /// so the two wrapper structs implement `Iterator`
 752 /// and `DoubleEndedIterator` depending on the concrete pattern type, leading
 753 /// to the complex impls seen above.
 754 macro_rules! generate_pattern_iterators {
 755     {
 756         // Forward iterator
 757         forward:
 758             $(#[$forward_iterator_attribute:meta])*
 759             struct $forward_iterator:ident;
 760
 761         // Reverse iterator
 762         reverse:
 763             $(#[$reverse_iterator_attribute:meta])*
 764             struct $reverse_iterator:ident;
 765
 766         // Stability of all generated items
 767         stability:
 768             $(#[$common_stability_attribute:meta])*
 769
 770         // Internal almost-iterator that is being delegated to
 771         internal:
 772             $internal_iterator:ident yielding ($iterty:ty);
 773
 774         // Kind of delgation - either single ended or double ended
 775         delegate $($t:tt)*
 776     } => {
 777         $(#[$forward_iterator_attribute])*
 778         $(#[$common_stability_attribute])*
 779         pub struct $forward_iterator<'a, P: Pattern<'a>>($internal_iterator<'a, P>);
 780
 781         $(#[$common_stability_attribute])*
 782         impl<'a, P: Pattern<'a>> fmt::Debug for $forward_iterator<'a, P>
 783             where P::Searcher: fmt::Debug
 784         {
 785             fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 786                 f.debug_tuple(stringify!($forward_iterator))
 787                     .field(&self.0)
 788                     .finish()
 789             }
 790         }
 791
 792         $(#[$common_stability_attribute])*
 793         impl<'a, P: Pattern<'a>> Iterator for $forward_iterator<'a, P> {
 794             type Item = $iterty;
 795
 796             #[inline]
 797             fn next(&mut self) -> Option<$iterty> {
 798                 self.0.next()
 799             }
 800         }
 801
 802         $(#[$common_stability_attribute])*
 803         impl<'a, P: Pattern<'a>> Clone for $forward_iterator<'a, P>
 804             where P::Searcher: Clone
 805         {
 806             fn clone(&self) -> Self {
 807                 $forward_iterator(self.0.clone())
 808             }
 809         }
 810
 811         $(#[$reverse_iterator_attribute])*
 812         $(#[$common_stability_attribute])*
 813         pub struct $reverse_iterator<'a, P: Pattern<'a>>($internal_iterator<'a, P>);
 814
 815         $(#[$common_stability_attribute])*
 816         impl<'a, P: Pattern<'a>> fmt::Debug for $reverse_iterator<'a, P>
 817             where P::Searcher: fmt::Debug
 818         {
 819             fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 820                 f.debug_tuple(stringify!($reverse_iterator))
 821                     .field(&self.0)
 822                     .finish()
 823             }
 824         }
 825
 826         $(#[$common_stability_attribute])*
 827         impl<'a, P: Pattern<'a>> Iterator for $reverse_iterator<'a, P>
 828             where P::Searcher: ReverseSearcher<'a>
 829         {
 830             type Item = $iterty;
 831
 832             #[inline]
 833             fn next(&mut self) -> Option<$iterty> {
 834                 self.0.next_back()
 835             }
 836         }
 837
 838         $(#[$common_stability_attribute])*
 839         impl<'a, P: Pattern<'a>> Clone for $reverse_iterator<'a, P>
 840             where P::Searcher: Clone
 841         {
 842             fn clone(&self) -> Self {
 843                 $reverse_iterator(self.0.clone())
 844             }
 845         }
 846
 847         #[unstable(feature = "fused", issue = "35602")]
 848         impl<'a, P: Pattern<'a>> FusedIterator for $forward_iterator<'a, P> {}
 849
 850         #[unstable(feature = "fused", issue = "35602")]
 851         impl<'a, P: Pattern<'a>> FusedIterator for $reverse_iterator<'a, P>
 852             where P::Searcher: ReverseSearcher<'a> {}
 853
 854         generate_pattern_iterators!($($t)* with $(#[$common_stability_attribute])*,
 855                                                 $forward_iterator,
 856                                                 $reverse_iterator, $iterty);
 857     };
 858     {
 859         double ended; with $(#[$common_stability_attribute:meta])*,
 860                            $forward_iterator:ident,
 861                            $reverse_iterator:ident, $iterty:ty
 862     } => {
 863         $(#[$common_stability_attribute])*
 864         impl<'a, P: Pattern<'a>> DoubleEndedIterator for $forward_iterator<'a, P>
 865             where P::Searcher: DoubleEndedSearcher<'a>
 866         {
 867             #[inline]
 868             fn next_back(&mut self) -> Option<$iterty> {
 869                 self.0.next_back()
 870             }
 871         }
 872
 873         $(#[$common_stability_attribute])*
 874         impl<'a, P: Pattern<'a>> DoubleEndedIterator for $reverse_iterator<'a, P>
 875             where P::Searcher: DoubleEndedSearcher<'a>
 876         {
 877             #[inline]
 878             fn next_back(&mut self) -> Option<$iterty> {
 879                 self.0.next()
 880             }
 881         }
 882     };
 883     {
 884         single ended; with $(#[$common_stability_attribute:meta])*,
 885                            $forward_iterator:ident,
 886                            $reverse_iterator:ident, $iterty:ty
 887     } => {}
 888 }
 889
 890 derive_pattern_clone!{
 891     clone SplitInternal
 892     with |s| SplitInternal { matcher: s.matcher.clone(), ..*s }
 893 }
 894
 895 struct SplitInternal<'a, P: Pattern<'a>> {
 896     start: usize,
 897     end: usize,
 898     matcher: P::Searcher,
 899     allow_trailing_empty: bool,
 900     finished: bool,
 901 }
 902
 903 impl<'a, P: Pattern<'a>> fmt::Debug for SplitInternal<'a, P> where P::Searcher: fmt::Debug {
 904     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 905         f.debug_struct("SplitInternal")
 906             .field("start", &self.start)
 907             .field("end", &self.end)
 908             .field("matcher", &self.matcher)
 909             .field("allow_trailing_empty", &self.allow_trailing_empty)
 910             .field("finished", &self.finished)
 911             .finish()
 912     }
 913 }
 914
 915 impl<'a, P: Pattern<'a>> SplitInternal<'a, P> {
 916     #[inline]
 917     fn get_end(&mut self) -> Option<&'a str> {
 918         if !self.finished && (self.allow_trailing_empty || self.end - self.start > 0) {
 919             self.finished = true;
 920             unsafe {
 921                 let string = self.matcher.haystack().slice_unchecked(self.start, self.end);
 922                 Some(string)
 923             }
 924         } else {
 925             None
 926         }
 927     }
 928
 929     #[inline]
 930     fn next(&mut self) -> Option<&'a str> {
 931         if self.finished { return None }
 932
 933         let haystack = self.matcher.haystack();
 934         match self.matcher.next_match() {
 935             Some((a, b)) => unsafe {
 936                 let elt = haystack.slice_unchecked(self.start, a);
 937                 self.start = b;
 938                 Some(elt)
 939             },
 940             None => self.get_end(),
 941         }
 942     }
 943
 944     #[inline]
 945     fn next_back(&mut self) -> Option<&'a str>
 946         where P::Searcher: ReverseSearcher<'a>
 947     {
 948         if self.finished { return None }
 949
 950         if !self.allow_trailing_empty {
 951             self.allow_trailing_empty = true;
 952             match self.next_back() {
 953                 Some(elt) if !elt.is_empty() => return Some(elt),
 954                 _ => if self.finished { return None }
 955             }
 956         }
 957
 958         let haystack = self.matcher.haystack();
 959         match self.matcher.next_match_back() {
 960             Some((a, b)) => unsafe {
 961                 let elt = haystack.slice_unchecked(b, self.end);
 962                 self.end = a;
 963                 Some(elt)
 964             },
 965             None => unsafe {
 966                 self.finished = true;
 967                 Some(haystack.slice_unchecked(self.start, self.end))
 968             },
 969         }
 970     }
 971 }
 972
 973 generate_pattern_iterators! {
 974     forward:
 975         /// Created with the method [`split`].
 976         ///
 977         /// [`split`]: ../../std/primitive.str.html#method.split
 978         struct Split;
 979     reverse:
 980         /// Created with the method [`rsplit`].
 981         ///
 982         /// [`rsplit`]: ../../std/primitive.str.html#method.rsplit
 983         struct RSplit;
 984     stability:
 985         #[stable(feature = "rust1", since = "1.0.0")]
 986     internal:
 987         SplitInternal yielding (&'a str);
 988     delegate double ended;
 989 }
 990
 991 generate_pattern_iterators! {
 992     forward:
 993         /// Created with the method [`split_terminator`].
 994         ///
 995         /// [`split_terminator`]: ../../std/primitive.str.html#method.split_terminator
 996         struct SplitTerminator;
 997     reverse:
 998         /// Created with the method [`rsplit_terminator`].
 999         ///
1000         /// [`rsplit_terminator`]: ../../std/primitive.str.html#method.rsplit_terminator
1001         struct RSplitTerminator;
1002     stability:
1003         #[stable(feature = "rust1", since = "1.0.0")]
1004     internal:
1005         SplitInternal yielding (&'a str);
1006     delegate double ended;
1007 }
1008
1009 derive_pattern_clone!{
1010     clone SplitNInternal
1011     with |s| SplitNInternal { iter: s.iter.clone(), ..*s }
1012 }
1013
1014 struct SplitNInternal<'a, P: Pattern<'a>> {
1015     iter: SplitInternal<'a, P>,
1016     /// The number of splits remaining
1017     count: usize,
1018 }
1019
1020 impl<'a, P: Pattern<'a>> fmt::Debug for SplitNInternal<'a, P> where P::Searcher: fmt::Debug {
1021     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1022         f.debug_struct("SplitNInternal")
1023             .field("iter", &self.iter)
1024             .field("count", &self.count)
1025             .finish()
1026     }
1027 }
1028
1029 impl<'a, P: Pattern<'a>> SplitNInternal<'a, P> {
1030     #[inline]
1031     fn next(&mut self) -> Option<&'a str> {
1032         match self.count {
1033             0 => None,
1034             1 => { self.count = 0; self.iter.get_end() }
1035             _ => { self.count -= 1; self.iter.next() }
1036         }
1037     }
1038
1039     #[inline]
1040     fn next_back(&mut self) -> Option<&'a str>
1041         where P::Searcher: ReverseSearcher<'a>
1042     {
1043         match self.count {
1044             0 => None,
1045             1 => { self.count = 0; self.iter.get_end() }
1046             _ => { self.count -= 1; self.iter.next_back() }
1047         }
1048     }
1049 }
1050
1051 generate_pattern_iterators! {
1052     forward:
1053         /// Created with the method [`splitn`].
1054         ///
1055         /// [`splitn`]: ../../std/primitive.str.html#method.splitn
1056         struct SplitN;
1057     reverse:
1058         /// Created with the method [`rsplitn`].
1059         ///
1060         /// [`rsplitn`]: ../../std/primitive.str.html#method.rsplitn
1061         struct RSplitN;
1062     stability:
1063         #[stable(feature = "rust1", since = "1.0.0")]
1064     internal:
1065         SplitNInternal yielding (&'a str);
1066     delegate single ended;
1067 }
1068
1069 derive_pattern_clone!{
1070     clone MatchIndicesInternal
1071     with |s| MatchIndicesInternal(s.0.clone())
1072 }
1073
1074 struct MatchIndicesInternal<'a, P: Pattern<'a>>(P::Searcher);
1075
1076 impl<'a, P: Pattern<'a>> fmt::Debug for MatchIndicesInternal<'a, P> where P::Searcher: fmt::Debug {
1077     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1078         f.debug_tuple("MatchIndicesInternal")
1079             .field(&self.0)
1080             .finish()
1081     }
1082 }
1083
1084 impl<'a, P: Pattern<'a>> MatchIndicesInternal<'a, P> {
1085     #[inline]
1086     fn next(&mut self) -> Option<(usize, &'a str)> {
1087         self.0.next_match().map(|(start, end)| unsafe {
1088             (start, self.0.haystack().slice_unchecked(start, end))
1089         })
1090     }
1091
1092     #[inline]
1093     fn next_back(&mut self) -> Option<(usize, &'a str)>
1094         where P::Searcher: ReverseSearcher<'a>
1095     {
1096         self.0.next_match_back().map(|(start, end)| unsafe {
1097             (start, self.0.haystack().slice_unchecked(start, end))
1098         })
1099     }
1100 }
1101
1102 generate_pattern_iterators! {
1103     forward:
1104         /// Created with the method [`match_indices`].
1105         ///
1106         /// [`match_indices`]: ../../std/primitive.str.html#method.match_indices
1107         struct MatchIndices;
1108     reverse:
1109         /// Created with the method [`rmatch_indices`].
1110         ///
1111         /// [`rmatch_indices`]: ../../std/primitive.str.html#method.rmatch_indices
1112         struct RMatchIndices;
1113     stability:
1114         #[stable(feature = "str_match_indices", since = "1.5.0")]
1115     internal:
1116         MatchIndicesInternal yielding ((usize, &'a str));
1117     delegate double ended;
1118 }
1119
1120 derive_pattern_clone!{
1121     clone MatchesInternal
1122     with |s| MatchesInternal(s.0.clone())
1123 }
1124
1125 struct MatchesInternal<'a, P: Pattern<'a>>(P::Searcher);
1126
1127 impl<'a, P: Pattern<'a>> fmt::Debug for MatchesInternal<'a, P> where P::Searcher: fmt::Debug {
1128     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1129         f.debug_tuple("MatchesInternal")
1130             .field(&self.0)
1131             .finish()
1132     }
1133 }
1134
1135 impl<'a, P: Pattern<'a>> MatchesInternal<'a, P> {
1136     #[inline]
1137     fn next(&mut self) -> Option<&'a str> {
1138         self.0.next_match().map(|(a, b)| unsafe {
1139             // Indices are known to be on utf8 boundaries
1140             self.0.haystack().slice_unchecked(a, b)
1141         })
1142     }
1143
1144     #[inline]
1145     fn next_back(&mut self) -> Option<&'a str>
1146         where P::Searcher: ReverseSearcher<'a>
1147     {
1148         self.0.next_match_back().map(|(a, b)| unsafe {
1149             // Indices are known to be on utf8 boundaries
1150             self.0.haystack().slice_unchecked(a, b)
1151         })
1152     }
1153 }
1154
1155 generate_pattern_iterators! {
1156     forward:
1157         /// Created with the method [`matches`].
1158         ///
1159         /// [`matches`]: ../../std/primitive.str.html#method.matches
1160         struct Matches;
1161     reverse:
1162         /// Created with the method [`rmatches`].
1163         ///
1164         /// [`rmatches`]: ../../std/primitive.str.html#method.rmatches
1165         struct RMatches;
1166     stability:
1167         #[stable(feature = "str_matches", since = "1.2.0")]
1168     internal:
1169         MatchesInternal yielding (&'a str);
1170     delegate double ended;
1171 }
1172
1173 /// An iterator over the lines of a string, as string slices.
1174 ///
1175 /// This struct is created with the [`lines()`] method on [`str`].
1176 /// See its documentation for more.
1177 ///
1178 /// [`lines()`]: ../../std/primitive.str.html#method.lines
1179 /// [`str`]: ../../std/primitive.str.html
1180 #[stable(feature = "rust1", since = "1.0.0")]
1181 #[derive(Clone, Debug)]
1182 pub struct Lines<'a>(Map<SplitTerminator<'a, char>, LinesAnyMap>);
1183
1184 #[stable(feature = "rust1", since = "1.0.0")]
1185 impl<'a> Iterator for Lines<'a> {
1186     type Item = &'a str;
1187
1188     #[inline]
1189     fn next(&mut self) -> Option<&'a str> {
1190         self.0.next()
1191     }
1192
1193     #[inline]
1194     fn size_hint(&self) -> (usize, Option<usize>) {
1195         self.0.size_hint()
1196     }
1197 }
1198
1199 #[stable(feature = "rust1", since = "1.0.0")]
1200 impl<'a> DoubleEndedIterator for Lines<'a> {
1201     #[inline]
1202     fn next_back(&mut self) -> Option<&'a str> {
1203         self.0.next_back()
1204     }
1205 }
1206
1207 #[unstable(feature = "fused", issue = "35602")]
1208 impl<'a> FusedIterator for Lines<'a> {}
1209
1210 /// Created with the method [`lines_any`].
1211 ///
1212 /// [`lines_any`]: ../../std/primitive.str.html#method.lines_any
1213 #[stable(feature = "rust1", since = "1.0.0")]
1214 #[rustc_deprecated(since = "1.4.0", reason = "use lines()/Lines instead now")]
1215 #[derive(Clone, Debug)]
1216 #[allow(deprecated)]
1217 pub struct LinesAny<'a>(Lines<'a>);
1218
1219 /// A nameable, cloneable fn type
1220 #[derive(Clone)]
1221 struct LinesAnyMap;
1222
1223 impl<'a> Fn<(&'a str,)> for LinesAnyMap {
1224     #[inline]
1225     extern "rust-call" fn call(&self, (line,): (&'a str,)) -> &'a str {
1226         let l = line.len();
1227         if l > 0 && line.as_bytes()[l - 1] == b'\r' { &line[0 .. l - 1] }
1228         else { line }
1229     }
1230 }
1231
1232 impl<'a> FnMut<(&'a str,)> for LinesAnyMap {
1233     #[inline]
1234     extern "rust-call" fn call_mut(&mut self, (line,): (&'a str,)) -> &'a str {
1235         Fn::call(&*self, (line,))
1236     }
1237 }
1238
1239 impl<'a> FnOnce<(&'a str,)> for LinesAnyMap {
1240     type Output = &'a str;
1241
1242     #[inline]
1243     extern "rust-call" fn call_once(self, (line,): (&'a str,)) -> &'a str {
1244         Fn::call(&self, (line,))
1245     }
1246 }
1247
1248 #[stable(feature = "rust1", since = "1.0.0")]
1249 #[allow(deprecated)]
1250 impl<'a> Iterator for LinesAny<'a> {
1251     type Item = &'a str;
1252
1253     #[inline]
1254     fn next(&mut self) -> Option<&'a str> {
1255         self.0.next()
1256     }
1257
1258     #[inline]
1259     fn size_hint(&self) -> (usize, Option<usize>) {
1260         self.0.size_hint()
1261     }
1262 }
1263
1264 #[stable(feature = "rust1", since = "1.0.0")]
1265 #[allow(deprecated)]
1266 impl<'a> DoubleEndedIterator for LinesAny<'a> {
1267     #[inline]
1268     fn next_back(&mut self) -> Option<&'a str> {
1269         self.0.next_back()
1270     }
1271 }
1272
1273 #[unstable(feature = "fused", issue = "35602")]
1274 #[allow(deprecated)]
1275 impl<'a> FusedIterator for LinesAny<'a> {}
1276
1277 /*
1278 Section: Comparing strings
1279 */
1280
1281 /// Bytewise slice equality
1282 /// NOTE: This function is (ab)used in rustc::middle::trans::_match
1283 /// to compare &[u8] byte slices that are not necessarily valid UTF-8.
1284 #[lang = "str_eq"]
1285 #[inline]
1286 fn eq_slice(a: &str, b: &str) -> bool {
1287     a.as_bytes() == b.as_bytes()
1288 }
1289
1290 /*
1291 Section: UTF-8 validation
1292 */
1293
1294 // use truncation to fit u64 into usize
1295 const NONASCII_MASK: usize = 0x80808080_80808080u64 as usize;
1296
1297 /// Returns `true` if any byte in the word `x` is nonascii (>= 128).
1298 #[inline]
1299 fn contains_nonascii(x: usize) -> bool {
1300     (x & NONASCII_MASK) != 0
1301 }
1302
1303 /// Walks through `iter` checking that it's a valid UTF-8 sequence,
1304 /// returning `true` in that case, or, if it is invalid, `false` with
1305 /// `iter` reset such that it is pointing at the first byte in the
1306 /// invalid sequence.
1307 #[inline(always)]
1308 fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
1309     let mut index = 0;
1310     let len = v.len();
1311
1312     let usize_bytes = mem::size_of::<usize>();
1313     let ascii_block_size = 2 * usize_bytes;
1314     let blocks_end = if len >= ascii_block_size { len - ascii_block_size + 1 } else { 0 };
1315
1316     while index < len {
1317         let old_offset = index;
1318         macro_rules! err {
1319             ($error_len: expr) => {
1320                 return Err(Utf8Error {
1321                     valid_up_to: old_offset,
1322                     error_len: $error_len,
1323                 })
1324             }
1325         }
1326
1327         macro_rules! next { () => {{
1328             index += 1;
1329             // we needed data, but there was none: error!
1330             if index >= len {
1331                 err!(None)
1332             }
1333             v[index]
1334         }}}
1335
1336         let first = v[index];
1337         if first >= 128 {
1338             let w = UTF8_CHAR_WIDTH[first as usize];
1339             // 2-byte encoding is for codepoints  \u{0080} to  \u{07ff}
1340             //        first  C2 80        last DF BF
1341             // 3-byte encoding is for codepoints  \u{0800} to  \u{ffff}
1342             //        first  E0 A0 80     last EF BF BF
1343             //   excluding surrogates codepoints  \u{d800} to  \u{dfff}
1344             //               ED A0 80 to       ED BF BF
1345             // 4-byte encoding is for codepoints \u{1000}0 to \u{10ff}ff
1346             //        first  F0 90 80 80  last F4 8F BF BF
1347             //
1348             // Use the UTF-8 syntax from the RFC
1349             //
1350             // https://tools.ietf.org/html/rfc3629
1351             // UTF8-1      = %x00-7F
1352             // UTF8-2      = %xC2-DF UTF8-tail
1353             // UTF8-3      = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
1354             //               %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
1355             // UTF8-4      = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
1356             //               %xF4 %x80-8F 2( UTF8-tail )
1357             match w {
1358                 2 => if next!() & !CONT_MASK != TAG_CONT_U8 {
1359                     err!(Some(1))
1360                 },
1361                 3 => {
1362                     match (first, next!()) {
1363                         (0xE0         , 0xA0 ... 0xBF) |
1364                         (0xE1 ... 0xEC, 0x80 ... 0xBF) |
1365                         (0xED         , 0x80 ... 0x9F) |
1366                         (0xEE ... 0xEF, 0x80 ... 0xBF) => {}
1367                         _ => err!(Some(1))
1368                     }
1369                     if next!() & !CONT_MASK != TAG_CONT_U8 {
1370                         err!(Some(2))
1371                     }
1372                 }
1373                 4 => {
1374                     match (first, next!()) {
1375                         (0xF0         , 0x90 ... 0xBF) |
1376                         (0xF1 ... 0xF3, 0x80 ... 0xBF) |
1377                         (0xF4         , 0x80 ... 0x8F) => {}
1378                         _ => err!(Some(1))
1379                     }
1380                     if next!() & !CONT_MASK != TAG_CONT_U8 {
1381                         err!(Some(2))
1382                     }
1383                     if next!() & !CONT_MASK != TAG_CONT_U8 {
1384                         err!(Some(3))
1385                     }
1386                 }
1387                 _ => err!(Some(1))
1388             }
1389             index += 1;
1390         } else {
1391             // Ascii case, try to skip forward quickly.
1392             // When the pointer is aligned, read 2 words of data per iteration
1393             // until we find a word containing a non-ascii byte.
1394             let ptr = v.as_ptr();
1395             let align = (ptr as usize + index) & (usize_bytes - 1);
1396             if align == 0 {
1397                 while index < blocks_end {
1398                     unsafe {
1399                         let block = ptr.offset(index as isize) as *const usize;
1400                         // break if there is a nonascii byte
1401                         let zu = contains_nonascii(*block);
1402                         let zv = contains_nonascii(*block.offset(1));
1403                         if zu | zv {
1404                             break;
1405                         }
1406                     }
1407                     index += ascii_block_size;
1408                 }
1409                 // step from the point where the wordwise loop stopped
1410                 while index < len && v[index] < 128 {
1411                     index += 1;
1412                 }
1413             } else {
1414                 index += 1;
1415             }
1416         }
1417     }
1418
1419     Ok(())
1420 }
1421
1422 // https://tools.ietf.org/html/rfc3629
1423 static UTF8_CHAR_WIDTH: [u8; 256] = [
1424 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1425 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x1F
1426 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1427 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x3F
1428 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1429 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x5F
1430 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1431 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x7F
1432 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1433 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x9F
1434 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1435 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xBF
1436 0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
1437 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xDF
1438 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, // 0xEF
1439 4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // 0xFF
1440 ];
1441
1442 /// Given a first byte, determines how many bytes are in this UTF-8 character.
1443 #[unstable(feature = "str_internals", issue = "0")]
1444 #[inline]
1445 pub fn utf8_char_width(b: u8) -> usize {
1446     return UTF8_CHAR_WIDTH[b as usize] as usize;
1447 }
1448
1449 /// Mask of the value bits of a continuation byte.
1450 const CONT_MASK: u8 = 0b0011_1111;
1451 /// Value of the tag bits (tag mask is !CONT_MASK) of a continuation byte.
1452 const TAG_CONT_U8: u8 = 0b1000_0000;
1453
1454 /*
1455 Section: Trait implementations
1456 */
1457
1458 mod traits {
1459     use cmp::Ordering;
1460     use ops;
1461     use str::eq_slice;
1462
1463     /// Implements ordering of strings.
1464     ///
1465     /// Strings are ordered  lexicographically by their byte values.  This orders Unicode code
1466     /// points based on their positions in the code charts.  This is not necessarily the same as
1467     /// "alphabetical" order, which varies by language and locale.  Sorting strings according to
1468     /// culturally-accepted standards requires locale-specific data that is outside the scope of
1469     /// the `str` type.
1470     #[stable(feature = "rust1", since = "1.0.0")]
1471     impl Ord for str {
1472         #[inline]
1473         fn cmp(&self, other: &str) -> Ordering {
1474             self.as_bytes().cmp(other.as_bytes())
1475         }
1476     }
1477
1478     #[stable(feature = "rust1", since = "1.0.0")]
1479     impl PartialEq for str {
1480         #[inline]
1481         fn eq(&self, other: &str) -> bool {
1482             eq_slice(self, other)
1483         }
1484         #[inline]
1485         fn ne(&self, other: &str) -> bool { !(*self).eq(other) }
1486     }
1487
1488     #[stable(feature = "rust1", since = "1.0.0")]
1489     impl Eq for str {}
1490
1491     /// Implements comparison operations on strings.
1492     ///
1493     /// Strings are compared lexicographically by their byte values.  This compares Unicode code
1494     /// points based on their positions in the code charts.  This is not necessarily the same as
1495     /// "alphabetical" order, which varies by language and locale.  Comparing strings according to
1496     /// culturally-accepted standards requires locale-specific data that is outside the scope of
1497     /// the `str` type.
1498     #[stable(feature = "rust1", since = "1.0.0")]
1499     impl PartialOrd for str {
1500         #[inline]
1501         fn partial_cmp(&self, other: &str) -> Option<Ordering> {
1502             Some(self.cmp(other))
1503         }
1504     }
1505
1506     /// Implements substring slicing with syntax `&self[begin .. end]`.
1507     ///
1508     /// Returns a slice of the given string from the byte range
1509     /// [`begin`..`end`).
1510     ///
1511     /// This operation is `O(1)`.
1512     ///
1513     /// # Panics
1514     ///
1515     /// Panics if `begin` or `end` does not point to the starting
1516     /// byte offset of a character (as defined by `is_char_boundary`).
1517     /// Requires that `begin <= end` and `end <= len` where `len` is the
1518     /// length of the string.
1519     ///
1520     /// # Examples
1521     ///
1522     /// ```
1523     /// let s = "Löwe 老虎 Léopard";
1524     /// assert_eq!(&s[0 .. 1], "L");
1525     ///
1526     /// assert_eq!(&s[1 .. 9], "öwe 老");
1527     ///
1528     /// // these will panic:
1529     /// // byte 2 lies within `ö`:
1530     /// // &s[2 ..3];
1531     ///
1532     /// // byte 8 lies within `老`
1533     /// // &s[1 .. 8];
1534     ///
1535     /// // byte 100 is outside the string
1536     /// // &s[3 .. 100];
1537     /// ```
1538     #[stable(feature = "rust1", since = "1.0.0")]
1539     impl ops::Index<ops::Range<usize>> for str {
1540         type Output = str;
1541         #[inline]
1542         fn index(&self, index: ops::Range<usize>) -> &str {
1543             // is_char_boundary checks that the index is in [0, .len()]
1544             if index.start <= index.end &&
1545                self.is_char_boundary(index.start) &&
1546                self.is_char_boundary(index.end) {
1547                 unsafe { self.slice_unchecked(index.start, index.end) }
1548             } else {
1549                 super::slice_error_fail(self, index.start, index.end)
1550             }
1551         }
1552     }
1553
1554     /// Implements mutable substring slicing with syntax
1555     /// `&mut self[begin .. end]`.
1556     ///
1557     /// Returns a mutable slice of the given string from the byte range
1558     /// [`begin`..`end`).
1559     ///
1560     /// This operation is `O(1)`.
1561     ///
1562     /// # Panics
1563     ///
1564     /// Panics if `begin` or `end` does not point to the starting
1565     /// byte offset of a character (as defined by `is_char_boundary`).
1566     /// Requires that `begin <= end` and `end <= len` where `len` is the
1567     /// length of the string.
1568     #[stable(feature = "derefmut_for_string", since = "1.2.0")]
1569     impl ops::IndexMut<ops::Range<usize>> for str {
1570         #[inline]
1571         fn index_mut(&mut self, index: ops::Range<usize>) -> &mut str {
1572             // is_char_boundary checks that the index is in [0, .len()]
1573             if index.start <= index.end &&
1574                self.is_char_boundary(index.start) &&
1575                self.is_char_boundary(index.end) {
1576                 unsafe { self.slice_mut_unchecked(index.start, index.end) }
1577             } else {
1578                 super::slice_error_fail(self, index.start, index.end)
1579             }
1580         }
1581     }
1582
1583     /// Implements substring slicing with syntax `&self[.. end]`.
1584     ///
1585     /// Returns a slice of the string from the beginning to byte offset
1586     /// `end`.
1587     ///
1588     /// Equivalent to `&self[0 .. end]`.
1589     #[stable(feature = "rust1", since = "1.0.0")]
1590     impl ops::Index<ops::RangeTo<usize>> for str {
1591         type Output = str;
1592
1593         #[inline]
1594         fn index(&self, index: ops::RangeTo<usize>) -> &str {
1595             // is_char_boundary checks that the index is in [0, .len()]
1596             if self.is_char_boundary(index.end) {
1597                 unsafe { self.slice_unchecked(0, index.end) }
1598             } else {
1599                 super::slice_error_fail(self, 0, index.end)
1600             }
1601         }
1602     }
1603
1604     /// Implements mutable substring slicing with syntax `&mut self[.. end]`.
1605     ///
1606     /// Returns a mutable slice of the string from the beginning to byte offset
1607     /// `end`.
1608     ///
1609     /// Equivalent to `&mut self[0 .. end]`.
1610     #[stable(feature = "derefmut_for_string", since = "1.2.0")]
1611     impl ops::IndexMut<ops::RangeTo<usize>> for str {
1612         #[inline]
1613         fn index_mut(&mut self, index: ops::RangeTo<usize>) -> &mut str {
1614             // is_char_boundary checks that the index is in [0, .len()]
1615             if self.is_char_boundary(index.end) {
1616                 unsafe { self.slice_mut_unchecked(0, index.end) }
1617             } else {
1618                 super::slice_error_fail(self, 0, index.end)
1619             }
1620         }
1621     }
1622
1623     /// Implements substring slicing with syntax `&self[begin ..]`.
1624     ///
1625     /// Returns a slice of the string from byte offset `begin`
1626     /// to the end of the string.
1627     ///
1628     /// Equivalent to `&self[begin .. len]`.
1629     #[stable(feature = "rust1", since = "1.0.0")]
1630     impl ops::Index<ops::RangeFrom<usize>> for str {
1631         type Output = str;
1632
1633         #[inline]
1634         fn index(&self, index: ops::RangeFrom<usize>) -> &str {
1635             // is_char_boundary checks that the index is in [0, .len()]
1636             if self.is_char_boundary(index.start) {
1637                 unsafe { self.slice_unchecked(index.start, self.len()) }
1638             } else {
1639                 super::slice_error_fail(self, index.start, self.len())
1640             }
1641         }
1642     }
1643
1644     /// Implements mutable substring slicing with syntax `&mut self[begin ..]`.
1645     ///
1646     /// Returns a mutable slice of the string from byte offset `begin`
1647     /// to the end of the string.
1648     ///
1649     /// Equivalent to `&mut self[begin .. len]`.
1650     #[stable(feature = "derefmut_for_string", since = "1.2.0")]
1651     impl ops::IndexMut<ops::RangeFrom<usize>> for str {
1652         #[inline]
1653         fn index_mut(&mut self, index: ops::RangeFrom<usize>) -> &mut str {
1654             // is_char_boundary checks that the index is in [0, .len()]
1655             if self.is_char_boundary(index.start) {
1656                 let len = self.len();
1657                 unsafe { self.slice_mut_unchecked(index.start, len) }
1658             } else {
1659                 super::slice_error_fail(self, index.start, self.len())
1660             }
1661         }
1662     }
1663
1664     /// Implements substring slicing with syntax `&self[..]`.
1665     ///
1666     /// Returns a slice of the whole string. This operation can
1667     /// never panic.
1668     ///
1669     /// Equivalent to `&self[0 .. len]`.
1670     #[stable(feature = "rust1", since = "1.0.0")]
1671     impl ops::Index<ops::RangeFull> for str {
1672         type Output = str;
1673
1674         #[inline]
1675         fn index(&self, _index: ops::RangeFull) -> &str {
1676             self
1677         }
1678     }
1679
1680     /// Implements mutable substring slicing with syntax `&mut self[..]`.
1681     ///
1682     /// Returns a mutable slice of the whole string. This operation can
1683     /// never panic.
1684     ///
1685     /// Equivalent to `&mut self[0 .. len]`.
1686     #[stable(feature = "derefmut_for_string", since = "1.2.0")]
1687     impl ops::IndexMut<ops::RangeFull> for str {
1688         #[inline]
1689         fn index_mut(&mut self, _index: ops::RangeFull) -> &mut str {
1690             self
1691         }
1692     }
1693
1694     #[unstable(feature = "inclusive_range",
1695                reason = "recently added, follows RFC",
1696                issue = "28237")]
1697     impl ops::Index<ops::RangeInclusive<usize>> for str {
1698         type Output = str;
1699
1700         #[inline]
1701         fn index(&self, index: ops::RangeInclusive<usize>) -> &str {
1702             match index {
1703                 ops::RangeInclusive::Empty { .. } => "",
1704                 ops::RangeInclusive::NonEmpty { end, .. } if end == usize::max_value() =>
1705                     panic!("attempted to index slice up to maximum usize"),
1706                 ops::RangeInclusive::NonEmpty { start, end } =>
1707                     self.index(start .. end+1)
1708             }
1709         }
1710     }
1711     #[unstable(feature = "inclusive_range",
1712                reason = "recently added, follows RFC",
1713                issue = "28237")]
1714     impl ops::Index<ops::RangeToInclusive<usize>> for str {
1715         type Output = str;
1716
1717         #[inline]
1718         fn index(&self, index: ops::RangeToInclusive<usize>) -> &str {
1719             self.index(0...index.end)
1720         }
1721     }
1722
1723     #[unstable(feature = "inclusive_range",
1724                reason = "recently added, follows RFC",
1725                issue = "28237")]
1726     impl ops::IndexMut<ops::RangeInclusive<usize>> for str {
1727         #[inline]
1728         fn index_mut(&mut self, index: ops::RangeInclusive<usize>) -> &mut str {
1729             match index {
1730                 ops::RangeInclusive::Empty { .. } => &mut self[0..0], // `&mut ""` doesn't work
1731                 ops::RangeInclusive::NonEmpty { end, .. } if end == usize::max_value() =>
1732                     panic!("attempted to index str up to maximum usize"),
1733                     ops::RangeInclusive::NonEmpty { start, end } =>
1734                         self.index_mut(start .. end+1)
1735             }
1736         }
1737     }
1738     #[unstable(feature = "inclusive_range",
1739                reason = "recently added, follows RFC",
1740                issue = "28237")]
1741     impl ops::IndexMut<ops::RangeToInclusive<usize>> for str {
1742         #[inline]
1743         fn index_mut(&mut self, index: ops::RangeToInclusive<usize>) -> &mut str {
1744             self.index_mut(0...index.end)
1745         }
1746     }
1747 }
1748
1749 /// Methods for string slices
1750 #[allow(missing_docs)]
1751 #[doc(hidden)]
1752 #[unstable(feature = "core_str_ext",
1753            reason = "stable interface provided by `impl str` in later crates",
1754            issue = "32110")]
1755 pub trait StrExt {
1756     // NB there are no docs here are they're all located on the StrExt trait in
1757     // libcollections, not here.
1758
1759     #[stable(feature = "core", since = "1.6.0")]
1760     fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool;
1761     #[stable(feature = "core", since = "1.6.0")]
1762     fn chars(&self) -> Chars;
1763     #[stable(feature = "core", since = "1.6.0")]
1764     fn bytes(&self) -> Bytes;
1765     #[stable(feature = "core", since = "1.6.0")]
1766     fn char_indices(&self) -> CharIndices;
1767     #[stable(feature = "core", since = "1.6.0")]
1768     fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P>;
1769     #[stable(feature = "core", since = "1.6.0")]
1770     fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P>
1771         where P::Searcher: ReverseSearcher<'a>;
1772     #[stable(feature = "core", since = "1.6.0")]
1773     fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P>;
1774     #[stable(feature = "core", since = "1.6.0")]
1775     fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P>
1776         where P::Searcher: ReverseSearcher<'a>;
1777     #[stable(feature = "core", since = "1.6.0")]
1778     fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P>;
1779     #[stable(feature = "core", since = "1.6.0")]
1780     fn rsplit_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplitTerminator<'a, P>
1781         where P::Searcher: ReverseSearcher<'a>;
1782     #[stable(feature = "core", since = "1.6.0")]
1783     fn matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> Matches<'a, P>;
1784     #[stable(feature = "core", since = "1.6.0")]
1785     fn rmatches<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatches<'a, P>
1786         where P::Searcher: ReverseSearcher<'a>;
1787     #[stable(feature = "core", since = "1.6.0")]
1788     fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P>;
1789     #[stable(feature = "core", since = "1.6.0")]
1790     fn rmatch_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatchIndices<'a, P>
1791         where P::Searcher: ReverseSearcher<'a>;
1792     #[stable(feature = "core", since = "1.6.0")]
1793     fn lines(&self) -> Lines;
1794     #[stable(feature = "core", since = "1.6.0")]
1795     #[rustc_deprecated(since = "1.6.0", reason = "use lines() instead now")]
1796     #[allow(deprecated)]
1797     fn lines_any(&self) -> LinesAny;
1798     #[stable(feature = "core", since = "1.6.0")]
1799     unsafe fn slice_unchecked(&self, begin: usize, end: usize) -> &str;
1800     #[stable(feature = "core", since = "1.6.0")]
1801     unsafe fn slice_mut_unchecked(&mut self, begin: usize, end: usize) -> &mut str;
1802     #[stable(feature = "core", since = "1.6.0")]
1803     fn starts_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool;
1804     #[stable(feature = "core", since = "1.6.0")]
1805     fn ends_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool
1806         where P::Searcher: ReverseSearcher<'a>;
1807     #[stable(feature = "core", since = "1.6.0")]
1808     fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
1809         where P::Searcher: DoubleEndedSearcher<'a>;
1810     #[stable(feature = "core", since = "1.6.0")]
1811     fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str;
1812     #[stable(feature = "core", since = "1.6.0")]
1813     fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
1814         where P::Searcher: ReverseSearcher<'a>;
1815     #[stable(feature = "is_char_boundary", since = "1.9.0")]
1816     fn is_char_boundary(&self, index: usize) -> bool;
1817     #[stable(feature = "core", since = "1.6.0")]
1818     fn as_bytes(&self) -> &[u8];
1819     #[stable(feature = "core", since = "1.6.0")]
1820     fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>;
1821     #[stable(feature = "core", since = "1.6.0")]
1822     fn rfind<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>
1823         where P::Searcher: ReverseSearcher<'a>;
1824     fn find_str<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>;
1825     #[stable(feature = "core", since = "1.6.0")]
1826     fn split_at(&self, mid: usize) -> (&str, &str);
1827     #[stable(feature = "core", since = "1.6.0")]
1828     fn split_at_mut(&mut self, mid: usize) -> (&mut str, &mut str);
1829     #[stable(feature = "core", since = "1.6.0")]
1830     fn as_ptr(&self) -> *const u8;
1831     #[stable(feature = "core", since = "1.6.0")]
1832     fn len(&self) -> usize;
1833     #[stable(feature = "core", since = "1.6.0")]
1834     fn is_empty(&self) -> bool;
1835     #[stable(feature = "core", since = "1.6.0")]
1836     fn parse<'a, T: TryFrom<&'a str>>(&'a self) -> Result<T, T::Error>;
1837 }
1838
1839 // truncate `&str` to length at most equal to `max`
1840 // return `true` if it were truncated, and the new str.
1841 fn truncate_to_char_boundary(s: &str, mut max: usize) -> (bool, &str) {
1842     if max >= s.len() {
1843         (false, s)
1844     } else {
1845         while !s.is_char_boundary(max) {
1846             max -= 1;
1847         }
1848         (true, &s[..max])
1849     }
1850 }
1851
1852 #[inline(never)]
1853 #[cold]
1854 fn slice_error_fail(s: &str, begin: usize, end: usize) -> ! {
1855     const MAX_DISPLAY_LENGTH: usize = 256;
1856     let (truncated, s_trunc) = truncate_to_char_boundary(s, MAX_DISPLAY_LENGTH);
1857     let ellipsis = if truncated { "[...]" } else { "" };
1858
1859     // 1. out of bounds
1860     if begin > s.len() || end > s.len() {
1861         let oob_index = if begin > s.len() { begin } else { end };
1862         panic!("byte index {} is out of bounds of `{}`{}", oob_index, s_trunc, ellipsis);
1863     }
1864
1865     // 2. begin <= end
1866     assert!(begin <= end, "begin <= end ({} <= {}) when slicing `{}`{}",
1867             begin, end, s_trunc, ellipsis);
1868
1869     // 3. character boundary
1870     let index = if !s.is_char_boundary(begin) { begin } else { end };
1871     // find the character
1872     let mut char_start = index;
1873     while !s.is_char_boundary(char_start) {
1874         char_start -= 1;
1875     }
1876     // `char_start` must be less than len and a char boundary
1877     let ch = s[char_start..].chars().next().unwrap();
1878     let char_range = char_start .. char_start + ch.len_utf8();
1879     panic!("byte index {} is not a char boundary; it is inside {:?} (bytes {:?}) of `{}`{}",
1880            index, ch, char_range, s_trunc, ellipsis);
1881 }
1882
1883 #[stable(feature = "core", since = "1.6.0")]
1884 impl StrExt for str {
1885     #[inline]
1886     fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
1887         pat.is_contained_in(self)
1888     }
1889
1890     #[inline]
1891     fn chars(&self) -> Chars {
1892         Chars{iter: self.as_bytes().iter()}
1893     }
1894
1895     #[inline]
1896     fn bytes(&self) -> Bytes {
1897         Bytes(self.as_bytes().iter().cloned())
1898     }
1899
1900     #[inline]
1901     fn char_indices(&self) -> CharIndices {
1902         CharIndices { front_offset: 0, iter: self.chars() }
1903     }
1904
1905     #[inline]
1906     fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P> {
1907         Split(SplitInternal {
1908             start: 0,
1909             end: self.len(),
1910             matcher: pat.into_searcher(self),
1911             allow_trailing_empty: true,
1912             finished: false,
1913         })
1914     }
1915
1916     #[inline]
1917     fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P>
1918         where P::Searcher: ReverseSearcher<'a>
1919     {
1920         RSplit(self.split(pat).0)
1921     }
1922
1923     #[inline]
1924     fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P> {
1925         SplitN(SplitNInternal {
1926             iter: self.split(pat).0,
1927             count: count,
1928         })
1929     }
1930
1931     #[inline]
1932     fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P>
1933         where P::Searcher: ReverseSearcher<'a>
1934     {
1935         RSplitN(self.splitn(count, pat).0)
1936     }
1937
1938     #[inline]
1939     fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P> {
1940         SplitTerminator(SplitInternal {
1941             allow_trailing_empty: false,
1942             ..self.split(pat).0
1943         })
1944     }
1945
1946     #[inline]
1947     fn rsplit_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplitTerminator<'a, P>
1948         where P::Searcher: ReverseSearcher<'a>
1949     {
1950         RSplitTerminator(self.split_terminator(pat).0)
1951     }
1952
1953     #[inline]
1954     fn matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> Matches<'a, P> {
1955         Matches(MatchesInternal(pat.into_searcher(self)))
1956     }
1957
1958     #[inline]
1959     fn rmatches<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatches<'a, P>
1960         where P::Searcher: ReverseSearcher<'a>
1961     {
1962         RMatches(self.matches(pat).0)
1963     }
1964
1965     #[inline]
1966     fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P> {
1967         MatchIndices(MatchIndicesInternal(pat.into_searcher(self)))
1968     }
1969
1970     #[inline]
1971     fn rmatch_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatchIndices<'a, P>
1972         where P::Searcher: ReverseSearcher<'a>
1973     {
1974         RMatchIndices(self.match_indices(pat).0)
1975     }
1976     #[inline]
1977     fn lines(&self) -> Lines {
1978         Lines(self.split_terminator('\n').map(LinesAnyMap))
1979     }
1980
1981     #[inline]
1982     #[allow(deprecated)]
1983     fn lines_any(&self) -> LinesAny {
1984         LinesAny(self.lines())
1985     }
1986
1987     #[inline]
1988     unsafe fn slice_unchecked(&self, begin: usize, end: usize) -> &str {
1989         let ptr = self.as_ptr().offset(begin as isize);
1990         let len = end - begin;
1991         from_utf8_unchecked(slice::from_raw_parts(ptr, len))
1992     }
1993
1994     #[inline]
1995     unsafe fn slice_mut_unchecked(&mut self, begin: usize, end: usize) -> &mut str {
1996         let ptr = self.as_ptr().offset(begin as isize);
1997         let len = end - begin;
1998         mem::transmute(slice::from_raw_parts_mut(ptr as *mut u8, len))
1999     }
2000
2001     #[inline]
2002     fn starts_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
2003         pat.is_prefix_of(self)
2004     }
2005
2006     #[inline]
2007     fn ends_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool
2008         where P::Searcher: ReverseSearcher<'a>
2009     {
2010         pat.is_suffix_of(self)
2011     }
2012
2013     #[inline]
2014     fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
2015         where P::Searcher: DoubleEndedSearcher<'a>
2016     {
2017         let mut i = 0;
2018         let mut j = 0;
2019         let mut matcher = pat.into_searcher(self);
2020         if let Some((a, b)) = matcher.next_reject() {
2021             i = a;
2022             j = b; // Remember earliest known match, correct it below if
2023                    // last match is different
2024         }
2025         if let Some((_, b)) = matcher.next_reject_back() {
2026             j = b;
2027         }
2028         unsafe {
2029             // Searcher is known to return valid indices
2030             self.slice_unchecked(i, j)
2031         }
2032     }
2033
2034     #[inline]
2035     fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str {
2036         let mut i = self.len();
2037         let mut matcher = pat.into_searcher(self);
2038         if let Some((a, _)) = matcher.next_reject() {
2039             i = a;
2040         }
2041         unsafe {
2042             // Searcher is known to return valid indices
2043             self.slice_unchecked(i, self.len())
2044         }
2045     }
2046
2047     #[inline]
2048     fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
2049         where P::Searcher: ReverseSearcher<'a>
2050     {
2051         let mut j = 0;
2052         let mut matcher = pat.into_searcher(self);
2053         if let Some((_, b)) = matcher.next_reject_back() {
2054             j = b;
2055         }
2056         unsafe {
2057             // Searcher is known to return valid indices
2058             self.slice_unchecked(0, j)
2059         }
2060     }
2061
2062     #[inline]
2063     fn is_char_boundary(&self, index: usize) -> bool {
2064         // 0 and len are always ok.
2065         // Test for 0 explicitly so that it can optimize out the check
2066         // easily and skip reading string data for that case.
2067         if index == 0 || index == self.len() { return true; }
2068         match self.as_bytes().get(index) {
2069             None => false,
2070             // This is bit magic equivalent to: b < 128 || b >= 192
2071             Some(&b) => (b as i8) >= -0x40,
2072         }
2073     }
2074
2075     #[inline]
2076     fn as_bytes(&self) -> &[u8] {
2077         unsafe { mem::transmute(self) }
2078     }
2079
2080     fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> {
2081         pat.into_searcher(self).next_match().map(|(i, _)| i)
2082     }
2083
2084     fn rfind<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>
2085         where P::Searcher: ReverseSearcher<'a>
2086     {
2087         pat.into_searcher(self).next_match_back().map(|(i, _)| i)
2088     }
2089
2090     fn find_str<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> {
2091         self.find(pat)
2092     }
2093
2094     #[inline]
2095     fn split_at(&self, mid: usize) -> (&str, &str) {
2096         // is_char_boundary checks that the index is in [0, .len()]
2097         if self.is_char_boundary(mid) {
2098             unsafe {
2099                 (self.slice_unchecked(0, mid),
2100                  self.slice_unchecked(mid, self.len()))
2101             }
2102         } else {
2103             slice_error_fail(self, 0, mid)
2104         }
2105     }
2106
2107     fn split_at_mut(&mut self, mid: usize) -> (&mut str, &mut str) {
2108         // is_char_boundary checks that the index is in [0, .len()]
2109         if self.is_char_boundary(mid) {
2110             let len = self.len();
2111             let ptr = self.as_ptr() as *mut u8;
2112             unsafe {
2113                 (from_raw_parts_mut(ptr, mid),
2114                  from_raw_parts_mut(ptr.offset(mid as isize), len - mid))
2115             }
2116         } else {
2117             slice_error_fail(self, 0, mid)
2118         }
2119     }
2120
2121     #[inline]
2122     fn as_ptr(&self) -> *const u8 {
2123         self as *const str as *const u8
2124     }
2125
2126     #[inline]
2127     fn len(&self) -> usize {
2128         self.as_bytes().len()
2129     }
2130
2131     #[inline]
2132     fn is_empty(&self) -> bool { self.len() == 0 }
2133
2134     #[inline]
2135     fn parse<'a, T>(&'a self) -> Result<T, T::Error> where T: TryFrom<&'a str> {
2136         T::try_from(self)
2137     }
2138 }
2139
2140 #[stable(feature = "rust1", since = "1.0.0")]
2141 impl AsRef<[u8]> for str {
2142     #[inline]
2143     fn as_ref(&self) -> &[u8] {
2144         self.as_bytes()
2145     }
2146 }
2147
2148 #[stable(feature = "rust1", since = "1.0.0")]
2149 impl<'a> Default for &'a str {
2150     /// Creates an empty str
2151     fn default() -> &'a str { "" }
2152 }