src/libcore/str/mod.rs

   1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10
  11 //! String manipulation
  12 //!
  13 //! For more details, see std::str
  14
  15 #![stable(feature = "rust1", since = "1.0.0")]
  16
  17 use self::pattern::Pattern;
  18 use self::pattern::{Searcher, ReverseSearcher, DoubleEndedSearcher};
  19
  20 use char;
  21 use convert::TryFrom;
  22 use fmt;
  23 use iter::{Map, Cloned, FusedIterator};
  24 use mem;
  25 use slice::{self, SliceIndex};
  26
  27 pub mod pattern;
  28
  29 /// A trait to abstract the idea of creating a new instance of a type from a
  30 /// string.
  31 ///
  32 /// `FromStr`'s [`from_str`] method is often used implicitly, through
  33 /// [`str`]'s [`parse`] method. See [`parse`]'s documentation for examples.
  34 ///
  35 /// [`from_str`]: #tymethod.from_str
  36 /// [`str`]: ../../std/primitive.str.html
  37 /// [`parse`]: ../../std/primitive.str.html#method.parse
  38 ///
  39 /// # Examples
  40 ///
  41 /// Basic implementation of `FromStr` on an example `Point` type:
  42 ///
  43 /// ```
  44 /// use std::str::FromStr;
  45 /// use std::num::ParseIntError;
  46 ///
  47 /// #[derive(Debug, PartialEq)]
  48 /// struct Point {
  49 ///     x: i32,
  50 ///     y: i32
  51 /// }
  52 ///
  53 /// impl FromStr for Point {
  54 ///     type Err = ParseIntError;
  55 ///
  56 ///     fn from_str(s: &str) -> Result<Self, Self::Err> {
  57 ///         let coords: Vec<&str> = s.trim_matches(|p| p == '(' || p == ')' )
  58 ///                                  .split(",")
  59 ///                                  .collect();
  60 ///
  61 ///         let x_fromstr = coords[0].parse::<i32>()?;
  62 ///         let y_fromstr = coords[1].parse::<i32>()?;
  63 ///
  64 ///         Ok(Point { x: x_fromstr, y: y_fromstr })
  65 ///     }
  66 /// }
  67 ///
  68 /// let p = Point::from_str("(1,2)");
  69 /// assert_eq!(p.unwrap(), Point{ x: 1, y: 2} )
  70 /// ```
  71 #[stable(feature = "rust1", since = "1.0.0")]
  72 pub trait FromStr: Sized {
  73     /// The associated error which can be returned from parsing.
  74     #[stable(feature = "rust1", since = "1.0.0")]
  75     type Err;
  76
  77     /// Parses a string `s` to return a value of this type.
  78     ///
  79     /// If parsing succeeds, return the value inside `Ok`, otherwise
  80     /// when the string is ill-formatted return an error specific to the
  81     /// inside `Err`. The error type is specific to implementation of the trait.
  82     ///
  83     /// # Examples
  84     ///
  85     /// Basic usage with [`i32`][ithirtytwo], a type that implements `FromStr`:
  86     ///
  87     /// [ithirtytwo]: ../../std/primitive.i32.html
  88     ///
  89     /// ```
  90     /// use std::str::FromStr;
  91     ///
  92     /// let s = "5";
  93     /// let x = i32::from_str(s).unwrap();
  94     ///
  95     /// assert_eq!(5, x);
  96     /// ```
  97     #[stable(feature = "rust1", since = "1.0.0")]
  98     fn from_str(s: &str) -> Result<Self, Self::Err>;
  99 }
 100
 101 #[stable(feature = "rust1", since = "1.0.0")]
 102 impl FromStr for bool {
 103     type Err = ParseBoolError;
 104
 105     /// Parse a `bool` from a string.
 106     ///
 107     /// Yields a `Result<bool, ParseBoolError>`, because `s` may or may not
 108     /// actually be parseable.
 109     ///
 110     /// # Examples
 111     ///
 112     /// ```
 113     /// use std::str::FromStr;
 114     ///
 115     /// assert_eq!(FromStr::from_str("true"), Ok(true));
 116     /// assert_eq!(FromStr::from_str("false"), Ok(false));
 117     /// assert!(<bool as FromStr>::from_str("not even a boolean").is_err());
 118     /// ```
 119     ///
 120     /// Note, in many cases, the `.parse()` method on `str` is more proper.
 121     ///
 122     /// ```
 123     /// assert_eq!("true".parse(), Ok(true));
 124     /// assert_eq!("false".parse(), Ok(false));
 125     /// assert!("not even a boolean".parse::<bool>().is_err());
 126     /// ```
 127     #[inline]
 128     fn from_str(s: &str) -> Result<bool, ParseBoolError> {
 129         match s {
 130             "true"  => Ok(true),
 131             "false" => Ok(false),
 132             _       => Err(ParseBoolError { _priv: () }),
 133         }
 134     }
 135 }
 136
 137 /// An error returned when parsing a `bool` using [`from_str`] fails
 138 ///
 139 /// [`from_str`]: ../../std/primitive.bool.html#method.from_str
 140 #[derive(Debug, Clone, PartialEq, Eq)]
 141 #[stable(feature = "rust1", since = "1.0.0")]
 142 pub struct ParseBoolError { _priv: () }
 143
 144 #[stable(feature = "rust1", since = "1.0.0")]
 145 impl fmt::Display for ParseBoolError {
 146     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 147         "provided string was not `true` or `false`".fmt(f)
 148     }
 149 }
 150
 151 /*
 152 Section: Creating a string
 153 */
 154
 155 /// Errors which can occur when attempting to interpret a sequence of [`u8`]
 156 /// as a string.
 157 ///
 158 /// [`u8`]: ../../std/primitive.u8.html
 159 ///
 160 /// As such, the `from_utf8` family of functions and methods for both [`String`]s
 161 /// and [`&str`]s make use of this error, for example.
 162 ///
 163 /// [`String`]: ../../std/string/struct.String.html#method.from_utf8
 164 /// [`&str`]: ../../std/str/fn.from_utf8.html
 165 #[derive(Copy, Eq, PartialEq, Clone, Debug)]
 166 #[stable(feature = "rust1", since = "1.0.0")]
 167 pub struct Utf8Error {
 168     valid_up_to: usize,
 169     error_len: Option<u8>,
 170 }
 171
 172 impl Utf8Error {
 173     /// Returns the index in the given string up to which valid UTF-8 was
 174     /// verified.
 175     ///
 176     /// It is the maximum index such that `from_utf8(&input[..index])`
 177     /// would return `Ok(_)`.
 178     ///
 179     /// # Examples
 180     ///
 181     /// Basic usage:
 182     ///
 183     /// ```
 184     /// use std::str;
 185     ///
 186     /// // some invalid bytes, in a vector
 187     /// let sparkle_heart = vec![0, 159, 146, 150];
 188     ///
 189     /// // std::str::from_utf8 returns a Utf8Error
 190     /// let error = str::from_utf8(&sparkle_heart).unwrap_err();
 191     ///
 192     /// // the second byte is invalid here
 193     /// assert_eq!(1, error.valid_up_to());
 194     /// ```
 195     #[stable(feature = "utf8_error", since = "1.5.0")]
 196     pub fn valid_up_to(&self) -> usize { self.valid_up_to }
 197
 198     /// Provide more information about the failure:
 199     ///
 200     /// * `None`: the end of the input was reached unexpectedly.
 201     ///   `self.valid_up_to()` is 1 to 3 bytes from the end of the input.
 202     ///   If a byte stream (such as a file or a network socket) is being decoded incrementally,
 203     ///   this could be a valid `char` whose UTF-8 byte sequence is spanning multiple chunks.
 204     ///
 205     /// * `Some(len)`: an unexpected byte was encountered.
 206     ///   The length provided is that of the invalid byte sequence
 207     ///   that starts at the index given by `valid_up_to()`.
 208     ///   Decoding should resume after that sequence
 209     ///   (after inserting a U+FFFD REPLACEMENT CHARACTER) in case of lossy decoding.
 210     #[unstable(feature = "utf8_error_error_len", reason ="new", issue = "40494")]
 211     pub fn error_len(&self) -> Option<usize> {
 212         self.error_len.map(|len| len as usize)
 213     }
 214 }
 215
 216 /// Converts a slice of bytes to a string slice.
 217 ///
 218 /// A string slice (`&str`) is made of bytes (`u8`), and a byte slice (`&[u8]`)
 219 /// is made of bytes, so this function converts between the two. Not all byte
 220 /// slices are valid string slices, however: `&str` requires that it is valid
 221 /// UTF-8. `from_utf8()` checks to ensure that the bytes are valid UTF-8, and
 222 /// then does the conversion.
 223 ///
 224 /// If you are sure that the byte slice is valid UTF-8, and you don't want to
 225 /// incur the overhead of the validity check, there is an unsafe version of
 226 /// this function, [`from_utf8_unchecked`][fromutf8u], which has the same
 227 /// behavior but skips the check.
 228 ///
 229 /// [fromutf8u]: fn.from_utf8_unchecked.html
 230 ///
 231 /// If you need a `String` instead of a `&str`, consider
 232 /// [`String::from_utf8`][string].
 233 ///
 234 /// [string]: ../../std/string/struct.String.html#method.from_utf8
 235 ///
 236 /// Because you can stack-allocate a `[u8; N]`, and you can take a `&[u8]` of
 237 /// it, this function is one way to have a stack-allocated string. There is
 238 /// an example of this in the examples section below.
 239 ///
 240 /// # Errors
 241 ///
 242 /// Returns `Err` if the slice is not UTF-8 with a description as to why the
 243 /// provided slice is not UTF-8.
 244 ///
 245 /// # Examples
 246 ///
 247 /// Basic usage:
 248 ///
 249 /// ```
 250 /// use std::str;
 251 ///
 252 /// // some bytes, in a vector
 253 /// let sparkle_heart = vec![240, 159, 146, 150];
 254 ///
 255 /// // We know these bytes are valid, so just use `unwrap()`.
 256 /// let sparkle_heart = str::from_utf8(&sparkle_heart).unwrap();
 257 ///
 258 /// assert_eq!("💖", sparkle_heart);
 259 /// ```
 260 ///
 261 /// Incorrect bytes:
 262 ///
 263 /// ```
 264 /// use std::str;
 265 ///
 266 /// // some invalid bytes, in a vector
 267 /// let sparkle_heart = vec![0, 159, 146, 150];
 268 ///
 269 /// assert!(str::from_utf8(&sparkle_heart).is_err());
 270 /// ```
 271 ///
 272 /// See the docs for [`Utf8Error`][error] for more details on the kinds of
 273 /// errors that can be returned.
 274 ///
 275 /// [error]: struct.Utf8Error.html
 276 ///
 277 /// A "stack allocated string":
 278 ///
 279 /// ```
 280 /// use std::str;
 281 ///
 282 /// // some bytes, in a stack-allocated array
 283 /// let sparkle_heart = [240, 159, 146, 150];
 284 ///
 285 /// // We know these bytes are valid, so just use `unwrap()`.
 286 /// let sparkle_heart = str::from_utf8(&sparkle_heart).unwrap();
 287 ///
 288 /// assert_eq!("💖", sparkle_heart);
 289 /// ```
 290 #[stable(feature = "rust1", since = "1.0.0")]
 291 pub fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
 292     run_utf8_validation(v)?;
 293     Ok(unsafe { from_utf8_unchecked(v) })
 294 }
 295
 296 /// Forms a str from a pointer and a length.
 297 ///
 298 /// The `len` argument is the number of bytes in the string.
 299 ///
 300 /// # Safety
 301 ///
 302 /// This function is unsafe as there is no guarantee that the given pointer is
 303 /// valid for `len` bytes, nor whether the lifetime inferred is a suitable
 304 /// lifetime for the returned str.
 305 ///
 306 /// The data must be valid UTF-8
 307 ///
 308 /// `p` must be non-null, even for zero-length str.
 309 ///
 310 /// # Caveat
 311 ///
 312 /// The lifetime for the returned str is inferred from its usage. To
 313 /// prevent accidental misuse, it's suggested to tie the lifetime to whichever
 314 /// source lifetime is safe in the context, such as by providing a helper
 315 /// function taking the lifetime of a host value for the str, or by explicit
 316 /// annotation.
 317 /// Performs the same functionality as `from_raw_parts`, except that a mutable
 318 /// str is returned.
 319 ///
 320 unsafe fn from_raw_parts_mut<'a>(p: *mut u8, len: usize) -> &'a mut str {
 321     mem::transmute::<&mut [u8], &mut str>(slice::from_raw_parts_mut(p, len))
 322 }
 323
 324 /// Converts a slice of bytes to a string slice without checking
 325 /// that the string contains valid UTF-8.
 326 ///
 327 /// See the safe version, [`from_utf8`][fromutf8], for more information.
 328 ///
 329 /// [fromutf8]: fn.from_utf8.html
 330 ///
 331 /// # Safety
 332 ///
 333 /// This function is unsafe because it does not check that the bytes passed to
 334 /// it are valid UTF-8. If this constraint is violated, undefined behavior
 335 /// results, as the rest of Rust assumes that [`&str`]s are valid UTF-8.
 336 ///
 337 /// [`&str`]: ../../std/primitive.str.html
 338 ///
 339 /// # Examples
 340 ///
 341 /// Basic usage:
 342 ///
 343 /// ```
 344 /// use std::str;
 345 ///
 346 /// // some bytes, in a vector
 347 /// let sparkle_heart = vec![240, 159, 146, 150];
 348 ///
 349 /// let sparkle_heart = unsafe {
 350 ///     str::from_utf8_unchecked(&sparkle_heart)
 351 /// };
 352 ///
 353 /// assert_eq!("💖", sparkle_heart);
 354 /// ```
 355 #[inline(always)]
 356 #[stable(feature = "rust1", since = "1.0.0")]
 357 pub unsafe fn from_utf8_unchecked(v: &[u8]) -> &str {
 358     mem::transmute(v)
 359 }
 360
 361 #[stable(feature = "rust1", since = "1.0.0")]
 362 impl fmt::Display for Utf8Error {
 363     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 364         if let Some(error_len) = self.error_len {
 365             write!(f, "invalid utf-8 sequence of {} bytes from index {}",
 366                    error_len, self.valid_up_to)
 367         } else {
 368             write!(f, "incomplete utf-8 byte sequence from index {}", self.valid_up_to)
 369         }
 370     }
 371 }
 372
 373 /*
 374 Section: Iterators
 375 */
 376
 377 /// An iterator over the [`char`]s of a string slice.
 378 ///
 379 /// [`char`]: ../../std/primitive.char.html
 380 ///
 381 /// This struct is created by the [`chars`] method on [`str`].
 382 /// See its documentation for more.
 383 ///
 384 /// [`chars`]: ../../std/primitive.str.html#method.chars
 385 /// [`str`]: ../../std/primitive.str.html
 386 #[derive(Clone, Debug)]
 387 #[stable(feature = "rust1", since = "1.0.0")]
 388 pub struct Chars<'a> {
 389     iter: slice::Iter<'a, u8>
 390 }
 391
 392 /// Returns the initial codepoint accumulator for the first byte.
 393 /// The first byte is special, only want bottom 5 bits for width 2, 4 bits
 394 /// for width 3, and 3 bits for width 4.
 395 #[inline]
 396 fn utf8_first_byte(byte: u8, width: u32) -> u32 { (byte & (0x7F >> width)) as u32 }
 397
 398 /// Returns the value of `ch` updated with continuation byte `byte`.
 399 #[inline]
 400 fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 { (ch << 6) | (byte & CONT_MASK) as u32 }
 401
 402 /// Checks whether the byte is a UTF-8 continuation byte (i.e. starts with the
 403 /// bits `10`).
 404 #[inline]
 405 fn utf8_is_cont_byte(byte: u8) -> bool { (byte & !CONT_MASK) == TAG_CONT_U8 }
 406
 407 #[inline]
 408 fn unwrap_or_0(opt: Option<&u8>) -> u8 {
 409     match opt {
 410         Some(&byte) => byte,
 411         None => 0,
 412     }
 413 }
 414
 415 /// Reads the next code point out of a byte iterator (assuming a
 416 /// UTF-8-like encoding).
 417 #[unstable(feature = "str_internals", issue = "0")]
 418 #[inline]
 419 pub fn next_code_point<'a, I: Iterator<Item = &'a u8>>(bytes: &mut I) -> Option<u32> {
 420     // Decode UTF-8
 421     let x = match bytes.next() {
 422         None => return None,
 423         Some(&next_byte) if next_byte < 128 => return Some(next_byte as u32),
 424         Some(&next_byte) => next_byte,
 425     };
 426
 427     // Multibyte case follows
 428     // Decode from a byte combination out of: [[[x y] z] w]
 429     // NOTE: Performance is sensitive to the exact formulation here
 430     let init = utf8_first_byte(x, 2);
 431     let y = unwrap_or_0(bytes.next());
 432     let mut ch = utf8_acc_cont_byte(init, y);
 433     if x >= 0xE0 {
 434         // [[x y z] w] case
 435         // 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid
 436         let z = unwrap_or_0(bytes.next());
 437         let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z);
 438         ch = init << 12 | y_z;
 439         if x >= 0xF0 {
 440             // [x y z w] case
 441             // use only the lower 3 bits of `init`
 442             let w = unwrap_or_0(bytes.next());
 443             ch = (init & 7) << 18 | utf8_acc_cont_byte(y_z, w);
 444         }
 445     }
 446
 447     Some(ch)
 448 }
 449
 450 /// Reads the last code point out of a byte iterator (assuming a
 451 /// UTF-8-like encoding).
 452 #[inline]
 453 fn next_code_point_reverse<'a, I>(bytes: &mut I) -> Option<u32>
 454     where I: DoubleEndedIterator<Item = &'a u8>,
 455 {
 456     // Decode UTF-8
 457     let w = match bytes.next_back() {
 458         None => return None,
 459         Some(&next_byte) if next_byte < 128 => return Some(next_byte as u32),
 460         Some(&back_byte) => back_byte,
 461     };
 462
 463     // Multibyte case follows
 464     // Decode from a byte combination out of: [x [y [z w]]]
 465     let mut ch;
 466     let z = unwrap_or_0(bytes.next_back());
 467     ch = utf8_first_byte(z, 2);
 468     if utf8_is_cont_byte(z) {
 469         let y = unwrap_or_0(bytes.next_back());
 470         ch = utf8_first_byte(y, 3);
 471         if utf8_is_cont_byte(y) {
 472             let x = unwrap_or_0(bytes.next_back());
 473             ch = utf8_first_byte(x, 4);
 474             ch = utf8_acc_cont_byte(ch, y);
 475         }
 476         ch = utf8_acc_cont_byte(ch, z);
 477     }
 478     ch = utf8_acc_cont_byte(ch, w);
 479
 480     Some(ch)
 481 }
 482
 483 #[stable(feature = "rust1", since = "1.0.0")]
 484 impl<'a> Iterator for Chars<'a> {
 485     type Item = char;
 486
 487     #[inline]
 488     fn next(&mut self) -> Option<char> {
 489         next_code_point(&mut self.iter).map(|ch| {
 490             // str invariant says `ch` is a valid Unicode Scalar Value
 491             unsafe {
 492                 char::from_u32_unchecked(ch)
 493             }
 494         })
 495     }
 496
 497     #[inline]
 498     fn count(self) -> usize {
 499         // length in `char` is equal to the number of non-continuation bytes
 500         let bytes_len = self.iter.len();
 501         let mut cont_bytes = 0;
 502         for &byte in self.iter {
 503             cont_bytes += utf8_is_cont_byte(byte) as usize;
 504         }
 505         bytes_len - cont_bytes
 506     }
 507
 508     #[inline]
 509     fn size_hint(&self) -> (usize, Option<usize>) {
 510         let len = self.iter.len();
 511         // `(len + 3)` can't overflow, because we know that the `slice::Iter`
 512         // belongs to a slice in memory which has a maximum length of
 513         // `isize::MAX` (that's well below `usize::MAX`).
 514         ((len + 3) / 4, Some(len))
 515     }
 516
 517     #[inline]
 518     fn last(mut self) -> Option<char> {
 519         // No need to go through the entire string.
 520         self.next_back()
 521     }
 522 }
 523
 524 #[stable(feature = "rust1", since = "1.0.0")]
 525 impl<'a> DoubleEndedIterator for Chars<'a> {
 526     #[inline]
 527     fn next_back(&mut self) -> Option<char> {
 528         next_code_point_reverse(&mut self.iter).map(|ch| {
 529             // str invariant says `ch` is a valid Unicode Scalar Value
 530             unsafe {
 531                 char::from_u32_unchecked(ch)
 532             }
 533         })
 534     }
 535 }
 536
 537 #[unstable(feature = "fused", issue = "35602")]
 538 impl<'a> FusedIterator for Chars<'a> {}
 539
 540 impl<'a> Chars<'a> {
 541     /// View the underlying data as a subslice of the original data.
 542     ///
 543     /// This has the same lifetime as the original slice, and so the
 544     /// iterator can continue to be used while this exists.
 545     ///
 546     /// # Examples
 547     ///
 548     /// ```
 549     /// let mut chars = "abc".chars();
 550     ///
 551     /// assert_eq!(chars.as_str(), "abc");
 552     /// chars.next();
 553     /// assert_eq!(chars.as_str(), "bc");
 554     /// chars.next();
 555     /// chars.next();
 556     /// assert_eq!(chars.as_str(), "");
 557     /// ```
 558     #[stable(feature = "iter_to_slice", since = "1.4.0")]
 559     #[inline]
 560     pub fn as_str(&self) -> &'a str {
 561         unsafe { from_utf8_unchecked(self.iter.as_slice()) }
 562     }
 563 }
 564
 565 /// An iterator over the [`char`]s of a string slice, and their positions.
 566 ///
 567 /// [`char`]: ../../std/primitive.char.html
 568 ///
 569 /// This struct is created by the [`char_indices`] method on [`str`].
 570 /// See its documentation for more.
 571 ///
 572 /// [`char_indices`]: ../../std/primitive.str.html#method.char_indices
 573 /// [`str`]: ../../std/primitive.str.html
 574 #[derive(Clone, Debug)]
 575 #[stable(feature = "rust1", since = "1.0.0")]
 576 pub struct CharIndices<'a> {
 577     front_offset: usize,
 578     iter: Chars<'a>,
 579 }
 580
 581 #[stable(feature = "rust1", since = "1.0.0")]
 582 impl<'a> Iterator for CharIndices<'a> {
 583     type Item = (usize, char);
 584
 585     #[inline]
 586     fn next(&mut self) -> Option<(usize, char)> {
 587         let pre_len = self.iter.iter.len();
 588         match self.iter.next() {
 589             None => None,
 590             Some(ch) => {
 591                 let index = self.front_offset;
 592                 let len = self.iter.iter.len();
 593                 self.front_offset += pre_len - len;
 594                 Some((index, ch))
 595             }
 596         }
 597     }
 598
 599     #[inline]
 600     fn count(self) -> usize {
 601         self.iter.count()
 602     }
 603
 604     #[inline]
 605     fn size_hint(&self) -> (usize, Option<usize>) {
 606         self.iter.size_hint()
 607     }
 608
 609     #[inline]
 610     fn last(mut self) -> Option<(usize, char)> {
 611         // No need to go through the entire string.
 612         self.next_back()
 613     }
 614 }
 615
 616 #[stable(feature = "rust1", since = "1.0.0")]
 617 impl<'a> DoubleEndedIterator for CharIndices<'a> {
 618     #[inline]
 619     fn next_back(&mut self) -> Option<(usize, char)> {
 620         match self.iter.next_back() {
 621             None => None,
 622             Some(ch) => {
 623                 let index = self.front_offset + self.iter.iter.len();
 624                 Some((index, ch))
 625             }
 626         }
 627     }
 628 }
 629
 630 #[unstable(feature = "fused", issue = "35602")]
 631 impl<'a> FusedIterator for CharIndices<'a> {}
 632
 633 impl<'a> CharIndices<'a> {
 634     /// View the underlying data as a subslice of the original data.
 635     ///
 636     /// This has the same lifetime as the original slice, and so the
 637     /// iterator can continue to be used while this exists.
 638     #[stable(feature = "iter_to_slice", since = "1.4.0")]
 639     #[inline]
 640     pub fn as_str(&self) -> &'a str {
 641         self.iter.as_str()
 642     }
 643 }
 644
 645 /// An iterator over the bytes of a string slice.
 646 ///
 647 /// This struct is created by the [`bytes`] method on [`str`].
 648 /// See its documentation for more.
 649 ///
 650 /// [`bytes`]: ../../std/primitive.str.html#method.bytes
 651 /// [`str`]: ../../std/primitive.str.html
 652 #[stable(feature = "rust1", since = "1.0.0")]
 653 #[derive(Clone, Debug)]
 654 pub struct Bytes<'a>(Cloned<slice::Iter<'a, u8>>);
 655
 656 #[stable(feature = "rust1", since = "1.0.0")]
 657 impl<'a> Iterator for Bytes<'a> {
 658     type Item = u8;
 659
 660     #[inline]
 661     fn next(&mut self) -> Option<u8> {
 662         self.0.next()
 663     }
 664
 665     #[inline]
 666     fn size_hint(&self) -> (usize, Option<usize>) {
 667         self.0.size_hint()
 668     }
 669
 670     #[inline]
 671     fn count(self) -> usize {
 672         self.0.count()
 673     }
 674
 675     #[inline]
 676     fn last(self) -> Option<Self::Item> {
 677         self.0.last()
 678     }
 679
 680     #[inline]
 681     fn nth(&mut self, n: usize) -> Option<Self::Item> {
 682         self.0.nth(n)
 683     }
 684 }
 685
 686 #[stable(feature = "rust1", since = "1.0.0")]
 687 impl<'a> DoubleEndedIterator for Bytes<'a> {
 688     #[inline]
 689     fn next_back(&mut self) -> Option<u8> {
 690         self.0.next_back()
 691     }
 692 }
 693
 694 #[stable(feature = "rust1", since = "1.0.0")]
 695 impl<'a> ExactSizeIterator for Bytes<'a> {
 696     #[inline]
 697     fn len(&self) -> usize {
 698         self.0.len()
 699     }
 700
 701     #[inline]
 702     fn is_empty(&self) -> bool {
 703         self.0.is_empty()
 704     }
 705 }
 706
 707 #[unstable(feature = "fused", issue = "35602")]
 708 impl<'a> FusedIterator for Bytes<'a> {}
 709
 710 /// This macro generates a Clone impl for string pattern API
 711 /// wrapper types of the form X<'a, P>
 712 macro_rules! derive_pattern_clone {
 713     (clone $t:ident with |$s:ident| $e:expr) => {
 714         impl<'a, P: Pattern<'a>> Clone for $t<'a, P>
 715             where P::Searcher: Clone
 716         {
 717             fn clone(&self) -> Self {
 718                 let $s = self;
 719                 $e
 720             }
 721         }
 722     }
 723 }
 724
 725 /// This macro generates two public iterator structs
 726 /// wrapping a private internal one that makes use of the `Pattern` API.
 727 ///
 728 /// For all patterns `P: Pattern<'a>` the following items will be
 729 /// generated (generics omitted):
 730 ///
 731 /// struct $forward_iterator($internal_iterator);
 732 /// struct $reverse_iterator($internal_iterator);
 733 ///
 734 /// impl Iterator for $forward_iterator
 735 /// { /* internal ends up calling Searcher::next_match() */ }
 736 ///
 737 /// impl DoubleEndedIterator for $forward_iterator
 738 ///       where P::Searcher: DoubleEndedSearcher
 739 /// { /* internal ends up calling Searcher::next_match_back() */ }
 740 ///
 741 /// impl Iterator for $reverse_iterator
 742 ///       where P::Searcher: ReverseSearcher
 743 /// { /* internal ends up calling Searcher::next_match_back() */ }
 744 ///
 745 /// impl DoubleEndedIterator for $reverse_iterator
 746 ///       where P::Searcher: DoubleEndedSearcher
 747 /// { /* internal ends up calling Searcher::next_match() */ }
 748 ///
 749 /// The internal one is defined outside the macro, and has almost the same
 750 /// semantic as a DoubleEndedIterator by delegating to `pattern::Searcher` and
 751 /// `pattern::ReverseSearcher` for both forward and reverse iteration.
 752 ///
 753 /// "Almost", because a `Searcher` and a `ReverseSearcher` for a given
 754 /// `Pattern` might not return the same elements, so actually implementing
 755 /// `DoubleEndedIterator` for it would be incorrect.
 756 /// (See the docs in `str::pattern` for more details)
 757 ///
 758 /// However, the internal struct still represents a single ended iterator from
 759 /// either end, and depending on pattern is also a valid double ended iterator,
 760 /// so the two wrapper structs implement `Iterator`
 761 /// and `DoubleEndedIterator` depending on the concrete pattern type, leading
 762 /// to the complex impls seen above.
 763 macro_rules! generate_pattern_iterators {
 764     {
 765         // Forward iterator
 766         forward:
 767             $(#[$forward_iterator_attribute:meta])*
 768             struct $forward_iterator:ident;
 769
 770         // Reverse iterator
 771         reverse:
 772             $(#[$reverse_iterator_attribute:meta])*
 773             struct $reverse_iterator:ident;
 774
 775         // Stability of all generated items
 776         stability:
 777             $(#[$common_stability_attribute:meta])*
 778
 779         // Internal almost-iterator that is being delegated to
 780         internal:
 781             $internal_iterator:ident yielding ($iterty:ty);
 782
 783         // Kind of delgation - either single ended or double ended
 784         delegate $($t:tt)*
 785     } => {
 786         $(#[$forward_iterator_attribute])*
 787         $(#[$common_stability_attribute])*
 788         pub struct $forward_iterator<'a, P: Pattern<'a>>($internal_iterator<'a, P>);
 789
 790         $(#[$common_stability_attribute])*
 791         impl<'a, P: Pattern<'a>> fmt::Debug for $forward_iterator<'a, P>
 792             where P::Searcher: fmt::Debug
 793         {
 794             fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 795                 f.debug_tuple(stringify!($forward_iterator))
 796                     .field(&self.0)
 797                     .finish()
 798             }
 799         }
 800
 801         $(#[$common_stability_attribute])*
 802         impl<'a, P: Pattern<'a>> Iterator for $forward_iterator<'a, P> {
 803             type Item = $iterty;
 804
 805             #[inline]
 806             fn next(&mut self) -> Option<$iterty> {
 807                 self.0.next()
 808             }
 809         }
 810
 811         $(#[$common_stability_attribute])*
 812         impl<'a, P: Pattern<'a>> Clone for $forward_iterator<'a, P>
 813             where P::Searcher: Clone
 814         {
 815             fn clone(&self) -> Self {
 816                 $forward_iterator(self.0.clone())
 817             }
 818         }
 819
 820         $(#[$reverse_iterator_attribute])*
 821         $(#[$common_stability_attribute])*
 822         pub struct $reverse_iterator<'a, P: Pattern<'a>>($internal_iterator<'a, P>);
 823
 824         $(#[$common_stability_attribute])*
 825         impl<'a, P: Pattern<'a>> fmt::Debug for $reverse_iterator<'a, P>
 826             where P::Searcher: fmt::Debug
 827         {
 828             fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 829                 f.debug_tuple(stringify!($reverse_iterator))
 830                     .field(&self.0)
 831                     .finish()
 832             }
 833         }
 834
 835         $(#[$common_stability_attribute])*
 836         impl<'a, P: Pattern<'a>> Iterator for $reverse_iterator<'a, P>
 837             where P::Searcher: ReverseSearcher<'a>
 838         {
 839             type Item = $iterty;
 840
 841             #[inline]
 842             fn next(&mut self) -> Option<$iterty> {
 843                 self.0.next_back()
 844             }
 845         }
 846
 847         $(#[$common_stability_attribute])*
 848         impl<'a, P: Pattern<'a>> Clone for $reverse_iterator<'a, P>
 849             where P::Searcher: Clone
 850         {
 851             fn clone(&self) -> Self {
 852                 $reverse_iterator(self.0.clone())
 853             }
 854         }
 855
 856         #[unstable(feature = "fused", issue = "35602")]
 857         impl<'a, P: Pattern<'a>> FusedIterator for $forward_iterator<'a, P> {}
 858
 859         #[unstable(feature = "fused", issue = "35602")]
 860         impl<'a, P: Pattern<'a>> FusedIterator for $reverse_iterator<'a, P>
 861             where P::Searcher: ReverseSearcher<'a> {}
 862
 863         generate_pattern_iterators!($($t)* with $(#[$common_stability_attribute])*,
 864                                                 $forward_iterator,
 865                                                 $reverse_iterator, $iterty);
 866     };
 867     {
 868         double ended; with $(#[$common_stability_attribute:meta])*,
 869                            $forward_iterator:ident,
 870                            $reverse_iterator:ident, $iterty:ty
 871     } => {
 872         $(#[$common_stability_attribute])*
 873         impl<'a, P: Pattern<'a>> DoubleEndedIterator for $forward_iterator<'a, P>
 874             where P::Searcher: DoubleEndedSearcher<'a>
 875         {
 876             #[inline]
 877             fn next_back(&mut self) -> Option<$iterty> {
 878                 self.0.next_back()
 879             }
 880         }
 881
 882         $(#[$common_stability_attribute])*
 883         impl<'a, P: Pattern<'a>> DoubleEndedIterator for $reverse_iterator<'a, P>
 884             where P::Searcher: DoubleEndedSearcher<'a>
 885         {
 886             #[inline]
 887             fn next_back(&mut self) -> Option<$iterty> {
 888                 self.0.next()
 889             }
 890         }
 891     };
 892     {
 893         single ended; with $(#[$common_stability_attribute:meta])*,
 894                            $forward_iterator:ident,
 895                            $reverse_iterator:ident, $iterty:ty
 896     } => {}
 897 }
 898
 899 derive_pattern_clone!{
 900     clone SplitInternal
 901     with |s| SplitInternal { matcher: s.matcher.clone(), ..*s }
 902 }
 903
 904 struct SplitInternal<'a, P: Pattern<'a>> {
 905     start: usize,
 906     end: usize,
 907     matcher: P::Searcher,
 908     allow_trailing_empty: bool,
 909     finished: bool,
 910 }
 911
 912 impl<'a, P: Pattern<'a>> fmt::Debug for SplitInternal<'a, P> where P::Searcher: fmt::Debug {
 913     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 914         f.debug_struct("SplitInternal")
 915             .field("start", &self.start)
 916             .field("end", &self.end)
 917             .field("matcher", &self.matcher)
 918             .field("allow_trailing_empty", &self.allow_trailing_empty)
 919             .field("finished", &self.finished)
 920             .finish()
 921     }
 922 }
 923
 924 impl<'a, P: Pattern<'a>> SplitInternal<'a, P> {
 925     #[inline]
 926     fn get_end(&mut self) -> Option<&'a str> {
 927         if !self.finished && (self.allow_trailing_empty || self.end - self.start > 0) {
 928             self.finished = true;
 929             unsafe {
 930                 let string = self.matcher.haystack().slice_unchecked(self.start, self.end);
 931                 Some(string)
 932             }
 933         } else {
 934             None
 935         }
 936     }
 937
 938     #[inline]
 939     fn next(&mut self) -> Option<&'a str> {
 940         if self.finished { return None }
 941
 942         let haystack = self.matcher.haystack();
 943         match self.matcher.next_match() {
 944             Some((a, b)) => unsafe {
 945                 let elt = haystack.slice_unchecked(self.start, a);
 946                 self.start = b;
 947                 Some(elt)
 948             },
 949             None => self.get_end(),
 950         }
 951     }
 952
 953     #[inline]
 954     fn next_back(&mut self) -> Option<&'a str>
 955         where P::Searcher: ReverseSearcher<'a>
 956     {
 957         if self.finished { return None }
 958
 959         if !self.allow_trailing_empty {
 960             self.allow_trailing_empty = true;
 961             match self.next_back() {
 962                 Some(elt) if !elt.is_empty() => return Some(elt),
 963                 _ => if self.finished { return None }
 964             }
 965         }
 966
 967         let haystack = self.matcher.haystack();
 968         match self.matcher.next_match_back() {
 969             Some((a, b)) => unsafe {
 970                 let elt = haystack.slice_unchecked(b, self.end);
 971                 self.end = a;
 972                 Some(elt)
 973             },
 974             None => unsafe {
 975                 self.finished = true;
 976                 Some(haystack.slice_unchecked(self.start, self.end))
 977             },
 978         }
 979     }
 980 }
 981
 982 generate_pattern_iterators! {
 983     forward:
 984         /// Created with the method [`split`].
 985         ///
 986         /// [`split`]: ../../std/primitive.str.html#method.split
 987         struct Split;
 988     reverse:
 989         /// Created with the method [`rsplit`].
 990         ///
 991         /// [`rsplit`]: ../../std/primitive.str.html#method.rsplit
 992         struct RSplit;
 993     stability:
 994         #[stable(feature = "rust1", since = "1.0.0")]
 995     internal:
 996         SplitInternal yielding (&'a str);
 997     delegate double ended;
 998 }
 999
1000 generate_pattern_iterators! {
1001     forward:
1002         /// Created with the method [`split_terminator`].
1003         ///
1004         /// [`split_terminator`]: ../../std/primitive.str.html#method.split_terminator
1005         struct SplitTerminator;
1006     reverse:
1007         /// Created with the method [`rsplit_terminator`].
1008         ///
1009         /// [`rsplit_terminator`]: ../../std/primitive.str.html#method.rsplit_terminator
1010         struct RSplitTerminator;
1011     stability:
1012         #[stable(feature = "rust1", since = "1.0.0")]
1013     internal:
1014         SplitInternal yielding (&'a str);
1015     delegate double ended;
1016 }
1017
1018 derive_pattern_clone!{
1019     clone SplitNInternal
1020     with |s| SplitNInternal { iter: s.iter.clone(), ..*s }
1021 }
1022
1023 struct SplitNInternal<'a, P: Pattern<'a>> {
1024     iter: SplitInternal<'a, P>,
1025     /// The number of splits remaining
1026     count: usize,
1027 }
1028
1029 impl<'a, P: Pattern<'a>> fmt::Debug for SplitNInternal<'a, P> where P::Searcher: fmt::Debug {
1030     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1031         f.debug_struct("SplitNInternal")
1032             .field("iter", &self.iter)
1033             .field("count", &self.count)
1034             .finish()
1035     }
1036 }
1037
1038 impl<'a, P: Pattern<'a>> SplitNInternal<'a, P> {
1039     #[inline]
1040     fn next(&mut self) -> Option<&'a str> {
1041         match self.count {
1042             0 => None,
1043             1 => { self.count = 0; self.iter.get_end() }
1044             _ => { self.count -= 1; self.iter.next() }
1045         }
1046     }
1047
1048     #[inline]
1049     fn next_back(&mut self) -> Option<&'a str>
1050         where P::Searcher: ReverseSearcher<'a>
1051     {
1052         match self.count {
1053             0 => None,
1054             1 => { self.count = 0; self.iter.get_end() }
1055             _ => { self.count -= 1; self.iter.next_back() }
1056         }
1057     }
1058 }
1059
1060 generate_pattern_iterators! {
1061     forward:
1062         /// Created with the method [`splitn`].
1063         ///
1064         /// [`splitn`]: ../../std/primitive.str.html#method.splitn
1065         struct SplitN;
1066     reverse:
1067         /// Created with the method [`rsplitn`].
1068         ///
1069         /// [`rsplitn`]: ../../std/primitive.str.html#method.rsplitn
1070         struct RSplitN;
1071     stability:
1072         #[stable(feature = "rust1", since = "1.0.0")]
1073     internal:
1074         SplitNInternal yielding (&'a str);
1075     delegate single ended;
1076 }
1077
1078 derive_pattern_clone!{
1079     clone MatchIndicesInternal
1080     with |s| MatchIndicesInternal(s.0.clone())
1081 }
1082
1083 struct MatchIndicesInternal<'a, P: Pattern<'a>>(P::Searcher);
1084
1085 impl<'a, P: Pattern<'a>> fmt::Debug for MatchIndicesInternal<'a, P> where P::Searcher: fmt::Debug {
1086     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1087         f.debug_tuple("MatchIndicesInternal")
1088             .field(&self.0)
1089             .finish()
1090     }
1091 }
1092
1093 impl<'a, P: Pattern<'a>> MatchIndicesInternal<'a, P> {
1094     #[inline]
1095     fn next(&mut self) -> Option<(usize, &'a str)> {
1096         self.0.next_match().map(|(start, end)| unsafe {
1097             (start, self.0.haystack().slice_unchecked(start, end))
1098         })
1099     }
1100
1101     #[inline]
1102     fn next_back(&mut self) -> Option<(usize, &'a str)>
1103         where P::Searcher: ReverseSearcher<'a>
1104     {
1105         self.0.next_match_back().map(|(start, end)| unsafe {
1106             (start, self.0.haystack().slice_unchecked(start, end))
1107         })
1108     }
1109 }
1110
1111 generate_pattern_iterators! {
1112     forward:
1113         /// Created with the method [`match_indices`].
1114         ///
1115         /// [`match_indices`]: ../../std/primitive.str.html#method.match_indices
1116         struct MatchIndices;
1117     reverse:
1118         /// Created with the method [`rmatch_indices`].
1119         ///
1120         /// [`rmatch_indices`]: ../../std/primitive.str.html#method.rmatch_indices
1121         struct RMatchIndices;
1122     stability:
1123         #[stable(feature = "str_match_indices", since = "1.5.0")]
1124     internal:
1125         MatchIndicesInternal yielding ((usize, &'a str));
1126     delegate double ended;
1127 }
1128
1129 derive_pattern_clone!{
1130     clone MatchesInternal
1131     with |s| MatchesInternal(s.0.clone())
1132 }
1133
1134 struct MatchesInternal<'a, P: Pattern<'a>>(P::Searcher);
1135
1136 impl<'a, P: Pattern<'a>> fmt::Debug for MatchesInternal<'a, P> where P::Searcher: fmt::Debug {
1137     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
1138         f.debug_tuple("MatchesInternal")
1139             .field(&self.0)
1140             .finish()
1141     }
1142 }
1143
1144 impl<'a, P: Pattern<'a>> MatchesInternal<'a, P> {
1145     #[inline]
1146     fn next(&mut self) -> Option<&'a str> {
1147         self.0.next_match().map(|(a, b)| unsafe {
1148             // Indices are known to be on utf8 boundaries
1149             self.0.haystack().slice_unchecked(a, b)
1150         })
1151     }
1152
1153     #[inline]
1154     fn next_back(&mut self) -> Option<&'a str>
1155         where P::Searcher: ReverseSearcher<'a>
1156     {
1157         self.0.next_match_back().map(|(a, b)| unsafe {
1158             // Indices are known to be on utf8 boundaries
1159             self.0.haystack().slice_unchecked(a, b)
1160         })
1161     }
1162 }
1163
1164 generate_pattern_iterators! {
1165     forward:
1166         /// Created with the method [`matches`].
1167         ///
1168         /// [`matches`]: ../../std/primitive.str.html#method.matches
1169         struct Matches;
1170     reverse:
1171         /// Created with the method [`rmatches`].
1172         ///
1173         /// [`rmatches`]: ../../std/primitive.str.html#method.rmatches
1174         struct RMatches;
1175     stability:
1176         #[stable(feature = "str_matches", since = "1.2.0")]
1177     internal:
1178         MatchesInternal yielding (&'a str);
1179     delegate double ended;
1180 }
1181
1182 /// An iterator over the lines of a string, as string slices.
1183 ///
1184 /// This struct is created with the [`lines`] method on [`str`].
1185 /// See its documentation for more.
1186 ///
1187 /// [`lines`]: ../../std/primitive.str.html#method.lines
1188 /// [`str`]: ../../std/primitive.str.html
1189 #[stable(feature = "rust1", since = "1.0.0")]
1190 #[derive(Clone, Debug)]
1191 pub struct Lines<'a>(Map<SplitTerminator<'a, char>, LinesAnyMap>);
1192
1193 #[stable(feature = "rust1", since = "1.0.0")]
1194 impl<'a> Iterator for Lines<'a> {
1195     type Item = &'a str;
1196
1197     #[inline]
1198     fn next(&mut self) -> Option<&'a str> {
1199         self.0.next()
1200     }
1201
1202     #[inline]
1203     fn size_hint(&self) -> (usize, Option<usize>) {
1204         self.0.size_hint()
1205     }
1206 }
1207
1208 #[stable(feature = "rust1", since = "1.0.0")]
1209 impl<'a> DoubleEndedIterator for Lines<'a> {
1210     #[inline]
1211     fn next_back(&mut self) -> Option<&'a str> {
1212         self.0.next_back()
1213     }
1214 }
1215
1216 #[unstable(feature = "fused", issue = "35602")]
1217 impl<'a> FusedIterator for Lines<'a> {}
1218
1219 /// Created with the method [`lines_any`].
1220 ///
1221 /// [`lines_any`]: ../../std/primitive.str.html#method.lines_any
1222 #[stable(feature = "rust1", since = "1.0.0")]
1223 #[rustc_deprecated(since = "1.4.0", reason = "use lines()/Lines instead now")]
1224 #[derive(Clone, Debug)]
1225 #[allow(deprecated)]
1226 pub struct LinesAny<'a>(Lines<'a>);
1227
1228 /// A nameable, cloneable fn type
1229 #[derive(Clone)]
1230 struct LinesAnyMap;
1231
1232 impl<'a> Fn<(&'a str,)> for LinesAnyMap {
1233     #[inline]
1234     extern "rust-call" fn call(&self, (line,): (&'a str,)) -> &'a str {
1235         let l = line.len();
1236         if l > 0 && line.as_bytes()[l - 1] == b'\r' { &line[0 .. l - 1] }
1237         else { line }
1238     }
1239 }
1240
1241 impl<'a> FnMut<(&'a str,)> for LinesAnyMap {
1242     #[inline]
1243     extern "rust-call" fn call_mut(&mut self, (line,): (&'a str,)) -> &'a str {
1244         Fn::call(&*self, (line,))
1245     }
1246 }
1247
1248 impl<'a> FnOnce<(&'a str,)> for LinesAnyMap {
1249     type Output = &'a str;
1250
1251     #[inline]
1252     extern "rust-call" fn call_once(self, (line,): (&'a str,)) -> &'a str {
1253         Fn::call(&self, (line,))
1254     }
1255 }
1256
1257 #[stable(feature = "rust1", since = "1.0.0")]
1258 #[allow(deprecated)]
1259 impl<'a> Iterator for LinesAny<'a> {
1260     type Item = &'a str;
1261
1262     #[inline]
1263     fn next(&mut self) -> Option<&'a str> {
1264         self.0.next()
1265     }
1266
1267     #[inline]
1268     fn size_hint(&self) -> (usize, Option<usize>) {
1269         self.0.size_hint()
1270     }
1271 }
1272
1273 #[stable(feature = "rust1", since = "1.0.0")]
1274 #[allow(deprecated)]
1275 impl<'a> DoubleEndedIterator for LinesAny<'a> {
1276     #[inline]
1277     fn next_back(&mut self) -> Option<&'a str> {
1278         self.0.next_back()
1279     }
1280 }
1281
1282 #[unstable(feature = "fused", issue = "35602")]
1283 #[allow(deprecated)]
1284 impl<'a> FusedIterator for LinesAny<'a> {}
1285
1286 /*
1287 Section: Comparing strings
1288 */
1289
1290 /// Bytewise slice equality
1291 /// NOTE: This function is (ab)used in rustc::middle::trans::_match
1292 /// to compare &[u8] byte slices that are not necessarily valid UTF-8.
1293 #[lang = "str_eq"]
1294 #[inline]
1295 fn eq_slice(a: &str, b: &str) -> bool {
1296     a.as_bytes() == b.as_bytes()
1297 }
1298
1299 /*
1300 Section: UTF-8 validation
1301 */
1302
1303 // use truncation to fit u64 into usize
1304 const NONASCII_MASK: usize = 0x80808080_80808080u64 as usize;
1305
1306 /// Returns `true` if any byte in the word `x` is nonascii (>= 128).
1307 #[inline]
1308 fn contains_nonascii(x: usize) -> bool {
1309     (x & NONASCII_MASK) != 0
1310 }
1311
1312 /// Walks through `iter` checking that it's a valid UTF-8 sequence,
1313 /// returning `true` in that case, or, if it is invalid, `false` with
1314 /// `iter` reset such that it is pointing at the first byte in the
1315 /// invalid sequence.
1316 #[inline(always)]
1317 fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
1318     let mut index = 0;
1319     let len = v.len();
1320
1321     let usize_bytes = mem::size_of::<usize>();
1322     let ascii_block_size = 2 * usize_bytes;
1323     let blocks_end = if len >= ascii_block_size { len - ascii_block_size + 1 } else { 0 };
1324
1325     while index < len {
1326         let old_offset = index;
1327         macro_rules! err {
1328             ($error_len: expr) => {
1329                 return Err(Utf8Error {
1330                     valid_up_to: old_offset,
1331                     error_len: $error_len,
1332                 })
1333             }
1334         }
1335
1336         macro_rules! next { () => {{
1337             index += 1;
1338             // we needed data, but there was none: error!
1339             if index >= len {
1340                 err!(None)
1341             }
1342             v[index]
1343         }}}
1344
1345         let first = v[index];
1346         if first >= 128 {
1347             let w = UTF8_CHAR_WIDTH[first as usize];
1348             // 2-byte encoding is for codepoints  \u{0080} to  \u{07ff}
1349             //        first  C2 80        last DF BF
1350             // 3-byte encoding is for codepoints  \u{0800} to  \u{ffff}
1351             //        first  E0 A0 80     last EF BF BF
1352             //   excluding surrogates codepoints  \u{d800} to  \u{dfff}
1353             //               ED A0 80 to       ED BF BF
1354             // 4-byte encoding is for codepoints \u{1000}0 to \u{10ff}ff
1355             //        first  F0 90 80 80  last F4 8F BF BF
1356             //
1357             // Use the UTF-8 syntax from the RFC
1358             //
1359             // https://tools.ietf.org/html/rfc3629
1360             // UTF8-1      = %x00-7F
1361             // UTF8-2      = %xC2-DF UTF8-tail
1362             // UTF8-3      = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
1363             //               %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
1364             // UTF8-4      = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
1365             //               %xF4 %x80-8F 2( UTF8-tail )
1366             match w {
1367                 2 => if next!() & !CONT_MASK != TAG_CONT_U8 {
1368                     err!(Some(1))
1369                 },
1370                 3 => {
1371                     match (first, next!()) {
1372                         (0xE0         , 0xA0 ... 0xBF) |
1373                         (0xE1 ... 0xEC, 0x80 ... 0xBF) |
1374                         (0xED         , 0x80 ... 0x9F) |
1375                         (0xEE ... 0xEF, 0x80 ... 0xBF) => {}
1376                         _ => err!(Some(1))
1377                     }
1378                     if next!() & !CONT_MASK != TAG_CONT_U8 {
1379                         err!(Some(2))
1380                     }
1381                 }
1382                 4 => {
1383                     match (first, next!()) {
1384                         (0xF0         , 0x90 ... 0xBF) |
1385                         (0xF1 ... 0xF3, 0x80 ... 0xBF) |
1386                         (0xF4         , 0x80 ... 0x8F) => {}
1387                         _ => err!(Some(1))
1388                     }
1389                     if next!() & !CONT_MASK != TAG_CONT_U8 {
1390                         err!(Some(2))
1391                     }
1392                     if next!() & !CONT_MASK != TAG_CONT_U8 {
1393                         err!(Some(3))
1394                     }
1395                 }
1396                 _ => err!(Some(1))
1397             }
1398             index += 1;
1399         } else {
1400             // Ascii case, try to skip forward quickly.
1401             // When the pointer is aligned, read 2 words of data per iteration
1402             // until we find a word containing a non-ascii byte.
1403             let ptr = v.as_ptr();
1404             let align = (ptr as usize + index) & (usize_bytes - 1);
1405             if align == 0 {
1406                 while index < blocks_end {
1407                     unsafe {
1408                         let block = ptr.offset(index as isize) as *const usize;
1409                         // break if there is a nonascii byte
1410                         let zu = contains_nonascii(*block);
1411                         let zv = contains_nonascii(*block.offset(1));
1412                         if zu | zv {
1413                             break;
1414                         }
1415                     }
1416                     index += ascii_block_size;
1417                 }
1418                 // step from the point where the wordwise loop stopped
1419                 while index < len && v[index] < 128 {
1420                     index += 1;
1421                 }
1422             } else {
1423                 index += 1;
1424             }
1425         }
1426     }
1427
1428     Ok(())
1429 }
1430
1431 // https://tools.ietf.org/html/rfc3629
1432 static UTF8_CHAR_WIDTH: [u8; 256] = [
1433 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1434 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x1F
1435 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1436 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x3F
1437 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1438 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x5F
1439 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1440 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x7F
1441 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1442 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x9F
1443 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1444 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xBF
1445 0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
1446 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xDF
1447 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, // 0xEF
1448 4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // 0xFF
1449 ];
1450
1451 /// Given a first byte, determines how many bytes are in this UTF-8 character.
1452 #[unstable(feature = "str_internals", issue = "0")]
1453 #[inline]
1454 pub fn utf8_char_width(b: u8) -> usize {
1455     return UTF8_CHAR_WIDTH[b as usize] as usize;
1456 }
1457
1458 /// Mask of the value bits of a continuation byte.
1459 const CONT_MASK: u8 = 0b0011_1111;
1460 /// Value of the tag bits (tag mask is !CONT_MASK) of a continuation byte.
1461 const TAG_CONT_U8: u8 = 0b1000_0000;
1462
1463 /*
1464 Section: Trait implementations
1465 */
1466
1467 mod traits {
1468     use cmp::Ordering;
1469     use ops;
1470     use mem;
1471     use slice::{self, SliceIndex};
1472     use str::eq_slice;
1473
1474     /// Implements ordering of strings.
1475     ///
1476     /// Strings are ordered  lexicographically by their byte values.  This orders Unicode code
1477     /// points based on their positions in the code charts.  This is not necessarily the same as
1478     /// "alphabetical" order, which varies by language and locale.  Sorting strings according to
1479     /// culturally-accepted standards requires locale-specific data that is outside the scope of
1480     /// the `str` type.
1481     #[stable(feature = "rust1", since = "1.0.0")]
1482     impl Ord for str {
1483         #[inline]
1484         fn cmp(&self, other: &str) -> Ordering {
1485             self.as_bytes().cmp(other.as_bytes())
1486         }
1487     }
1488
1489     #[stable(feature = "rust1", since = "1.0.0")]
1490     impl PartialEq for str {
1491         #[inline]
1492         fn eq(&self, other: &str) -> bool {
1493             eq_slice(self, other)
1494         }
1495         #[inline]
1496         fn ne(&self, other: &str) -> bool { !(*self).eq(other) }
1497     }
1498
1499     #[stable(feature = "rust1", since = "1.0.0")]
1500     impl Eq for str {}
1501
1502     /// Implements comparison operations on strings.
1503     ///
1504     /// Strings are compared lexicographically by their byte values.  This compares Unicode code
1505     /// points based on their positions in the code charts.  This is not necessarily the same as
1506     /// "alphabetical" order, which varies by language and locale.  Comparing strings according to
1507     /// culturally-accepted standards requires locale-specific data that is outside the scope of
1508     /// the `str` type.
1509     #[stable(feature = "rust1", since = "1.0.0")]
1510     impl PartialOrd for str {
1511         #[inline]
1512         fn partial_cmp(&self, other: &str) -> Option<Ordering> {
1513             Some(self.cmp(other))
1514         }
1515     }
1516
1517     /// Implements substring slicing with syntax `&self[begin .. end]`.
1518     ///
1519     /// Returns a slice of the given string from the byte range
1520     /// [`begin`..`end`).
1521     ///
1522     /// This operation is `O(1)`.
1523     ///
1524     /// # Panics
1525     ///
1526     /// Panics if `begin` or `end` does not point to the starting
1527     /// byte offset of a character (as defined by `is_char_boundary`).
1528     /// Requires that `begin <= end` and `end <= len` where `len` is the
1529     /// length of the string.
1530     ///
1531     /// # Examples
1532     ///
1533     /// ```
1534     /// let s = "Löwe 老虎 Léopard";
1535     /// assert_eq!(&s[0 .. 1], "L");
1536     ///
1537     /// assert_eq!(&s[1 .. 9], "öwe 老");
1538     ///
1539     /// // these will panic:
1540     /// // byte 2 lies within `ö`:
1541     /// // &s[2 ..3];
1542     ///
1543     /// // byte 8 lies within `老`
1544     /// // &s[1 .. 8];
1545     ///
1546     /// // byte 100 is outside the string
1547     /// // &s[3 .. 100];
1548     /// ```
1549     #[stable(feature = "rust1", since = "1.0.0")]
1550     impl ops::Index<ops::Range<usize>> for str {
1551         type Output = str;
1552         #[inline]
1553         fn index(&self, index: ops::Range<usize>) -> &str {
1554             index.index(self)
1555         }
1556     }
1557
1558     /// Implements mutable substring slicing with syntax
1559     /// `&mut self[begin .. end]`.
1560     ///
1561     /// Returns a mutable slice of the given string from the byte range
1562     /// [`begin`..`end`).
1563     ///
1564     /// This operation is `O(1)`.
1565     ///
1566     /// # Panics
1567     ///
1568     /// Panics if `begin` or `end` does not point to the starting
1569     /// byte offset of a character (as defined by `is_char_boundary`).
1570     /// Requires that `begin <= end` and `end <= len` where `len` is the
1571     /// length of the string.
1572     #[stable(feature = "derefmut_for_string", since = "1.2.0")]
1573     impl ops::IndexMut<ops::Range<usize>> for str {
1574         #[inline]
1575         fn index_mut(&mut self, index: ops::Range<usize>) -> &mut str {
1576             index.index_mut(self)
1577         }
1578     }
1579
1580     /// Implements substring slicing with syntax `&self[.. end]`.
1581     ///
1582     /// Returns a slice of the string from the beginning to byte offset
1583     /// `end`.
1584     ///
1585     /// Equivalent to `&self[0 .. end]`.
1586     #[stable(feature = "rust1", since = "1.0.0")]
1587     impl ops::Index<ops::RangeTo<usize>> for str {
1588         type Output = str;
1589
1590         #[inline]
1591         fn index(&self, index: ops::RangeTo<usize>) -> &str {
1592             // is_char_boundary checks that the index is in [0, .len()]
1593             if self.is_char_boundary(index.end) {
1594                 unsafe { self.slice_unchecked(0, index.end) }
1595             } else {
1596                 super::slice_error_fail(self, 0, index.end)
1597             }
1598         }
1599     }
1600
1601     /// Implements mutable substring slicing with syntax `&mut self[.. end]`.
1602     ///
1603     /// Returns a mutable slice of the string from the beginning to byte offset
1604     /// `end`.
1605     ///
1606     /// Equivalent to `&mut self[0 .. end]`.
1607     #[stable(feature = "derefmut_for_string", since = "1.2.0")]
1608     impl ops::IndexMut<ops::RangeTo<usize>> for str {
1609         #[inline]
1610         fn index_mut(&mut self, index: ops::RangeTo<usize>) -> &mut str {
1611             // is_char_boundary checks that the index is in [0, .len()]
1612             if self.is_char_boundary(index.end) {
1613                 unsafe { self.slice_mut_unchecked(0, index.end) }
1614             } else {
1615                 super::slice_error_fail(self, 0, index.end)
1616             }
1617         }
1618     }
1619
1620     /// Implements substring slicing with syntax `&self[begin ..]`.
1621     ///
1622     /// Returns a slice of the string from byte offset `begin`
1623     /// to the end of the string.
1624     ///
1625     /// Equivalent to `&self[begin .. len]`.
1626     #[stable(feature = "rust1", since = "1.0.0")]
1627     impl ops::Index<ops::RangeFrom<usize>> for str {
1628         type Output = str;
1629
1630         #[inline]
1631         fn index(&self, index: ops::RangeFrom<usize>) -> &str {
1632             // is_char_boundary checks that the index is in [0, .len()]
1633             if self.is_char_boundary(index.start) {
1634                 unsafe { self.slice_unchecked(index.start, self.len()) }
1635             } else {
1636                 super::slice_error_fail(self, index.start, self.len())
1637             }
1638         }
1639     }
1640
1641     /// Implements mutable substring slicing with syntax `&mut self[begin ..]`.
1642     ///
1643     /// Returns a mutable slice of the string from byte offset `begin`
1644     /// to the end of the string.
1645     ///
1646     /// Equivalent to `&mut self[begin .. len]`.
1647     #[stable(feature = "derefmut_for_string", since = "1.2.0")]
1648     impl ops::IndexMut<ops::RangeFrom<usize>> for str {
1649         #[inline]
1650         fn index_mut(&mut self, index: ops::RangeFrom<usize>) -> &mut str {
1651             // is_char_boundary checks that the index is in [0, .len()]
1652             if self.is_char_boundary(index.start) {
1653                 let len = self.len();
1654                 unsafe { self.slice_mut_unchecked(index.start, len) }
1655             } else {
1656                 super::slice_error_fail(self, index.start, self.len())
1657             }
1658         }
1659     }
1660
1661     /// Implements substring slicing with syntax `&self[..]`.
1662     ///
1663     /// Returns a slice of the whole string. This operation can
1664     /// never panic.
1665     ///
1666     /// Equivalent to `&self[0 .. len]`.
1667     #[stable(feature = "rust1", since = "1.0.0")]
1668     impl ops::Index<ops::RangeFull> for str {
1669         type Output = str;
1670
1671         #[inline]
1672         fn index(&self, _index: ops::RangeFull) -> &str {
1673             self
1674         }
1675     }
1676
1677     /// Implements mutable substring slicing with syntax `&mut self[..]`.
1678     ///
1679     /// Returns a mutable slice of the whole string. This operation can
1680     /// never panic.
1681     ///
1682     /// Equivalent to `&mut self[0 .. len]`.
1683     #[stable(feature = "derefmut_for_string", since = "1.2.0")]
1684     impl ops::IndexMut<ops::RangeFull> for str {
1685         #[inline]
1686         fn index_mut(&mut self, _index: ops::RangeFull) -> &mut str {
1687             self
1688         }
1689     }
1690
1691     #[unstable(feature = "inclusive_range",
1692                reason = "recently added, follows RFC",
1693                issue = "28237")]
1694     impl ops::Index<ops::RangeInclusive<usize>> for str {
1695         type Output = str;
1696
1697         #[inline]
1698         fn index(&self, index: ops::RangeInclusive<usize>) -> &str {
1699             match index {
1700                 ops::RangeInclusive::Empty { .. } => "",
1701                 ops::RangeInclusive::NonEmpty { end, .. } if end == usize::max_value() =>
1702                     panic!("attempted to index slice up to maximum usize"),
1703                 ops::RangeInclusive::NonEmpty { start, end } =>
1704                     self.index(start .. end+1)
1705             }
1706         }
1707     }
1708     #[unstable(feature = "inclusive_range",
1709                reason = "recently added, follows RFC",
1710                issue = "28237")]
1711     impl ops::Index<ops::RangeToInclusive<usize>> for str {
1712         type Output = str;
1713
1714         #[inline]
1715         fn index(&self, index: ops::RangeToInclusive<usize>) -> &str {
1716             self.index(0...index.end)
1717         }
1718     }
1719
1720     #[unstable(feature = "inclusive_range",
1721                reason = "recently added, follows RFC",
1722                issue = "28237")]
1723     impl ops::IndexMut<ops::RangeInclusive<usize>> for str {
1724         #[inline]
1725         fn index_mut(&mut self, index: ops::RangeInclusive<usize>) -> &mut str {
1726             match index {
1727                 ops::RangeInclusive::Empty { .. } => &mut self[0..0], // `&mut ""` doesn't work
1728                 ops::RangeInclusive::NonEmpty { end, .. } if end == usize::max_value() =>
1729                     panic!("attempted to index str up to maximum usize"),
1730                     ops::RangeInclusive::NonEmpty { start, end } =>
1731                         self.index_mut(start .. end+1)
1732             }
1733         }
1734     }
1735     #[unstable(feature = "inclusive_range",
1736                reason = "recently added, follows RFC",
1737                issue = "28237")]
1738     impl ops::IndexMut<ops::RangeToInclusive<usize>> for str {
1739         #[inline]
1740         fn index_mut(&mut self, index: ops::RangeToInclusive<usize>) -> &mut str {
1741             self.index_mut(0...index.end)
1742         }
1743     }
1744
1745     #[unstable(feature = "str_checked_slicing", issue = "39932")]
1746     impl SliceIndex<str> for ops::RangeFull {
1747         type Output = str;
1748         #[inline]
1749         fn get(self, slice: &str) -> Option<&Self::Output> {
1750             Some(slice)
1751         }
1752         #[inline]
1753         fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
1754             Some(slice)
1755         }
1756         #[inline]
1757         unsafe fn get_unchecked(self, slice: &str) -> &Self::Output {
1758             slice
1759         }
1760         #[inline]
1761         unsafe fn get_unchecked_mut(self, slice: &mut str) -> &mut Self::Output {
1762             slice
1763         }
1764         #[inline]
1765         fn index(self, slice: &str) -> &Self::Output {
1766             slice
1767         }
1768         #[inline]
1769         fn index_mut(self, slice: &mut str) -> &mut Self::Output {
1770             slice
1771         }
1772     }
1773
1774     #[unstable(feature = "str_checked_slicing", issue = "39932")]
1775     impl SliceIndex<str> for ops::Range<usize> {
1776         type Output = str;
1777         #[inline]
1778         fn get(self, slice: &str) -> Option<&Self::Output> {
1779             if self.start <= self.end &&
1780                slice.is_char_boundary(self.start) &&
1781                slice.is_char_boundary(self.end) {
1782                 Some(unsafe { self.get_unchecked(slice) })
1783             } else {
1784                 None
1785             }
1786         }
1787         #[inline]
1788         fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
1789             if self.start <= self.end &&
1790                slice.is_char_boundary(self.start) &&
1791                slice.is_char_boundary(self.end) {
1792                 Some(unsafe { self.get_unchecked_mut(slice) })
1793             } else {
1794                 None
1795             }
1796         }
1797         #[inline]
1798         unsafe fn get_unchecked(self, slice: &str) -> &Self::Output {
1799             let ptr = slice.as_ptr().offset(self.start as isize);
1800             let len = self.end - self.start;
1801             super::from_utf8_unchecked(slice::from_raw_parts(ptr, len))
1802         }
1803         #[inline]
1804         unsafe fn get_unchecked_mut(self, slice: &mut str) -> &mut Self::Output {
1805             let ptr = slice.as_ptr().offset(self.start as isize);
1806             let len = self.end - self.start;
1807             mem::transmute(slice::from_raw_parts_mut(ptr as *mut u8, len))
1808         }
1809         #[inline]
1810         fn index(self, slice: &str) -> &Self::Output {
1811             let (start, end) = (self.start, self.end);
1812             self.get(slice).unwrap_or_else(|| super::slice_error_fail(slice, start, end))
1813         }
1814         #[inline]
1815         fn index_mut(self, slice: &mut str) -> &mut Self::Output {
1816             // is_char_boundary checks that the index is in [0, .len()]
1817             // canot reuse `get` as above, because of NLL trouble
1818             if self.start <= self.end &&
1819                slice.is_char_boundary(self.start) &&
1820                slice.is_char_boundary(self.end) {
1821                 unsafe { self.get_unchecked_mut(slice) }
1822             } else {
1823                 super::slice_error_fail(slice, self.start, self.end)
1824             }
1825         }
1826     }
1827
1828     #[unstable(feature = "str_checked_slicing", issue = "39932")]
1829     impl SliceIndex<str> for ops::RangeTo<usize> {
1830         type Output = str;
1831         #[inline]
1832         fn get(self, slice: &str) -> Option<&Self::Output> {
1833             if slice.is_char_boundary(self.end) {
1834                 Some(unsafe { self.get_unchecked(slice) })
1835             } else {
1836                 None
1837             }
1838         }
1839         #[inline]
1840         fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
1841             if slice.is_char_boundary(self.end) {
1842                 Some(unsafe { self.get_unchecked_mut(slice) })
1843             } else {
1844                 None
1845             }
1846         }
1847         #[inline]
1848         unsafe fn get_unchecked(self, slice: &str) -> &Self::Output {
1849             let ptr = slice.as_ptr();
1850             super::from_utf8_unchecked(slice::from_raw_parts(ptr, self.end))
1851         }
1852         #[inline]
1853         unsafe fn get_unchecked_mut(self, slice: &mut str) -> &mut Self::Output {
1854             let ptr = slice.as_ptr();
1855             mem::transmute(slice::from_raw_parts_mut(ptr as *mut u8, self.end))
1856         }
1857         #[inline]
1858         fn index(self, slice: &str) -> &Self::Output {
1859             let end = self.end;
1860             self.get(slice).unwrap_or_else(|| super::slice_error_fail(slice, 0, end))
1861         }
1862         #[inline]
1863         fn index_mut(self, slice: &mut str) -> &mut Self::Output {
1864             if slice.is_char_boundary(self.end) {
1865                 unsafe { self.get_unchecked_mut(slice) }
1866             } else {
1867                 super::slice_error_fail(slice, 0, self.end)
1868             }
1869         }
1870     }
1871
1872     #[unstable(feature = "str_checked_slicing", issue = "39932")]
1873     impl SliceIndex<str> for ops::RangeFrom<usize> {
1874         type Output = str;
1875         #[inline]
1876         fn get(self, slice: &str) -> Option<&Self::Output> {
1877             if slice.is_char_boundary(self.start) {
1878                 Some(unsafe { self.get_unchecked(slice) })
1879             } else {
1880                 None
1881             }
1882         }
1883         #[inline]
1884         fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
1885             if slice.is_char_boundary(self.start) {
1886                 Some(unsafe { self.get_unchecked_mut(slice) })
1887             } else {
1888                 None
1889             }
1890         }
1891         #[inline]
1892         unsafe fn get_unchecked(self, slice: &str) -> &Self::Output {
1893             let ptr = slice.as_ptr().offset(self.start as isize);
1894             let len = slice.len() - self.start;
1895             super::from_utf8_unchecked(slice::from_raw_parts(ptr, len))
1896         }
1897         #[inline]
1898         unsafe fn get_unchecked_mut(self, slice: &mut str) -> &mut Self::Output {
1899             let ptr = slice.as_ptr().offset(self.start as isize);
1900             let len = slice.len() - self.start;
1901             mem::transmute(slice::from_raw_parts_mut(ptr as *mut u8, len))
1902         }
1903         #[inline]
1904         fn index(self, slice: &str) -> &Self::Output {
1905             let (start, end) = (self.start, slice.len());
1906             self.get(slice).unwrap_or_else(|| super::slice_error_fail(slice, start, end))
1907         }
1908         #[inline]
1909         fn index_mut(self, slice: &mut str) -> &mut Self::Output {
1910             if slice.is_char_boundary(self.start) {
1911                 unsafe { self.get_unchecked_mut(slice) }
1912             } else {
1913                 super::slice_error_fail(slice, self.start, slice.len())
1914             }
1915         }
1916     }
1917
1918     #[unstable(feature = "str_checked_slicing", issue = "39932")]
1919     impl SliceIndex<str> for ops::RangeInclusive<usize> {
1920         type Output = str;
1921         #[inline]
1922         fn get(self, slice: &str) -> Option<&Self::Output> {
1923             match self {
1924                 ops::RangeInclusive::Empty { .. } => 0..0,
1925                 ops::RangeInclusive::NonEmpty { start, end } => start..end+1,
1926             }.get(slice)
1927         }
1928         #[inline]
1929         fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
1930             match self {
1931                 ops::RangeInclusive::Empty { .. } => 0..0,
1932                 ops::RangeInclusive::NonEmpty { start, end } => start..end+1,
1933             }.get_mut(slice)
1934         }
1935         #[inline]
1936         unsafe fn get_unchecked(self, slice: &str) -> &Self::Output {
1937             match self {
1938                 ops::RangeInclusive::Empty { .. } => 0..0,
1939                 ops::RangeInclusive::NonEmpty { start, end } => start..end+1,
1940             }.get_unchecked(slice)
1941         }
1942         #[inline]
1943         unsafe fn get_unchecked_mut(self, slice: &mut str) -> &mut Self::Output {
1944             match self {
1945                 ops::RangeInclusive::Empty { .. } => 0..0,
1946                 ops::RangeInclusive::NonEmpty { start, end } => start..end+1,
1947             }.get_unchecked_mut(slice)
1948         }
1949         #[inline]
1950         fn index(self, slice: &str) -> &Self::Output {
1951             match self {
1952                 ops::RangeInclusive::Empty { .. } => 0..0,
1953                 ops::RangeInclusive::NonEmpty { start, end } => start..end+1,
1954             }.index(slice)
1955         }
1956         #[inline]
1957         fn index_mut(self, slice: &mut str) -> &mut Self::Output {
1958             match self {
1959                 ops::RangeInclusive::Empty { .. } => 0..0,
1960                 ops::RangeInclusive::NonEmpty { start, end } => start..end+1,
1961             }.index_mut(slice)
1962         }
1963     }
1964
1965
1966
1967     #[unstable(feature = "str_checked_slicing", issue = "39932")]
1968     impl SliceIndex<str> for ops::RangeToInclusive<usize> {
1969         type Output = str;
1970         #[inline]
1971         fn get(self, slice: &str) -> Option<&Self::Output> {
1972             if slice.is_char_boundary(self.end + 1) {
1973                 Some(unsafe { self.get_unchecked(slice) })
1974             } else {
1975                 None
1976             }
1977         }
1978         #[inline]
1979         fn get_mut(self, slice: &mut str) -> Option<&mut Self::Output> {
1980             if slice.is_char_boundary(self.end + 1) {
1981                 Some(unsafe { self.get_unchecked_mut(slice) })
1982             } else {
1983                 None
1984             }
1985         }
1986         #[inline]
1987         unsafe fn get_unchecked(self, slice: &str) -> &Self::Output {
1988             let ptr = slice.as_ptr();
1989             super::from_utf8_unchecked(slice::from_raw_parts(ptr, self.end + 1))
1990         }
1991         #[inline]
1992         unsafe fn get_unchecked_mut(self, slice: &mut str) -> &mut Self::Output {
1993             let ptr = slice.as_ptr();
1994             mem::transmute(slice::from_raw_parts_mut(ptr as *mut u8, self.end + 1))
1995         }
1996         #[inline]
1997         fn index(self, slice: &str) -> &Self::Output {
1998             let end = self.end + 1;
1999             self.get(slice).unwrap_or_else(|| super::slice_error_fail(slice, 0, end))
2000         }
2001         #[inline]
2002         fn index_mut(self, slice: &mut str) -> &mut Self::Output {
2003             if slice.is_char_boundary(self.end) {
2004                 unsafe { self.get_unchecked_mut(slice) }
2005             } else {
2006                 super::slice_error_fail(slice, 0, self.end + 1)
2007             }
2008         }
2009     }
2010
2011 }
2012
2013
2014 /// Methods for string slices
2015 #[allow(missing_docs)]
2016 #[doc(hidden)]
2017 #[unstable(feature = "core_str_ext",
2018            reason = "stable interface provided by `impl str` in later crates",
2019            issue = "32110")]
2020 pub trait StrExt {
2021     // NB there are no docs here are they're all located on the StrExt trait in
2022     // libcollections, not here.
2023
2024     #[stable(feature = "core", since = "1.6.0")]
2025     fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool;
2026     #[stable(feature = "core", since = "1.6.0")]
2027     fn chars(&self) -> Chars;
2028     #[stable(feature = "core", since = "1.6.0")]
2029     fn bytes(&self) -> Bytes;
2030     #[stable(feature = "core", since = "1.6.0")]
2031     fn char_indices(&self) -> CharIndices;
2032     #[stable(feature = "core", since = "1.6.0")]
2033     fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P>;
2034     #[stable(feature = "core", since = "1.6.0")]
2035     fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P>
2036         where P::Searcher: ReverseSearcher<'a>;
2037     #[stable(feature = "core", since = "1.6.0")]
2038     fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P>;
2039     #[stable(feature = "core", since = "1.6.0")]
2040     fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P>
2041         where P::Searcher: ReverseSearcher<'a>;
2042     #[stable(feature = "core", since = "1.6.0")]
2043     fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P>;
2044     #[stable(feature = "core", since = "1.6.0")]
2045     fn rsplit_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplitTerminator<'a, P>
2046         where P::Searcher: ReverseSearcher<'a>;
2047     #[stable(feature = "core", since = "1.6.0")]
2048     fn matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> Matches<'a, P>;
2049     #[stable(feature = "core", since = "1.6.0")]
2050     fn rmatches<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatches<'a, P>
2051         where P::Searcher: ReverseSearcher<'a>;
2052     #[stable(feature = "core", since = "1.6.0")]
2053     fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P>;
2054     #[stable(feature = "core", since = "1.6.0")]
2055     fn rmatch_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatchIndices<'a, P>
2056         where P::Searcher: ReverseSearcher<'a>;
2057     #[stable(feature = "core", since = "1.6.0")]
2058     fn lines(&self) -> Lines;
2059     #[stable(feature = "core", since = "1.6.0")]
2060     #[rustc_deprecated(since = "1.6.0", reason = "use lines() instead now")]
2061     #[allow(deprecated)]
2062     fn lines_any(&self) -> LinesAny;
2063     #[unstable(feature = "str_checked_slicing", issue = "39932")]
2064     fn get<I: SliceIndex<str>>(&self, i: I) -> Option<&I::Output>;
2065     #[unstable(feature = "str_checked_slicing", issue = "39932")]
2066     fn get_mut<I: SliceIndex<str>>(&mut self, i: I) -> Option<&mut I::Output>;
2067     #[unstable(feature = "str_checked_slicing", issue = "39932")]
2068     unsafe fn get_unchecked<I: SliceIndex<str>>(&self, i: I) -> &I::Output;
2069     #[unstable(feature = "str_checked_slicing", issue = "39932")]
2070     unsafe fn get_unchecked_mut<I: SliceIndex<str>>(&mut self, i: I) -> &mut I::Output;
2071     #[stable(feature = "core", since = "1.6.0")]
2072     unsafe fn slice_unchecked(&self, begin: usize, end: usize) -> &str;
2073     #[stable(feature = "core", since = "1.6.0")]
2074     unsafe fn slice_mut_unchecked(&mut self, begin: usize, end: usize) -> &mut str;
2075     #[stable(feature = "core", since = "1.6.0")]
2076     fn starts_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool;
2077     #[stable(feature = "core", since = "1.6.0")]
2078     fn ends_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool
2079         where P::Searcher: ReverseSearcher<'a>;
2080     #[stable(feature = "core", since = "1.6.0")]
2081     fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
2082         where P::Searcher: DoubleEndedSearcher<'a>;
2083     #[stable(feature = "core", since = "1.6.0")]
2084     fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str;
2085     #[stable(feature = "core", since = "1.6.0")]
2086     fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
2087         where P::Searcher: ReverseSearcher<'a>;
2088     #[stable(feature = "is_char_boundary", since = "1.9.0")]
2089     fn is_char_boundary(&self, index: usize) -> bool;
2090     #[stable(feature = "core", since = "1.6.0")]
2091     fn as_bytes(&self) -> &[u8];
2092     #[stable(feature = "core", since = "1.6.0")]
2093     fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>;
2094     #[stable(feature = "core", since = "1.6.0")]
2095     fn rfind<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>
2096         where P::Searcher: ReverseSearcher<'a>;
2097     fn find_str<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>;
2098     #[stable(feature = "core", since = "1.6.0")]
2099     fn split_at(&self, mid: usize) -> (&str, &str);
2100     #[stable(feature = "core", since = "1.6.0")]
2101     fn split_at_mut(&mut self, mid: usize) -> (&mut str, &mut str);
2102     #[stable(feature = "core", since = "1.6.0")]
2103     fn as_ptr(&self) -> *const u8;
2104     #[stable(feature = "core", since = "1.6.0")]
2105     fn len(&self) -> usize;
2106     #[stable(feature = "core", since = "1.6.0")]
2107     fn is_empty(&self) -> bool;
2108     #[stable(feature = "core", since = "1.6.0")]
2109     fn parse<'a, T: TryFrom<&'a str>>(&'a self) -> Result<T, T::Error>;
2110 }
2111
2112 // truncate `&str` to length at most equal to `max`
2113 // return `true` if it were truncated, and the new str.
2114 fn truncate_to_char_boundary(s: &str, mut max: usize) -> (bool, &str) {
2115     if max >= s.len() {
2116         (false, s)
2117     } else {
2118         while !s.is_char_boundary(max) {
2119             max -= 1;
2120         }
2121         (true, &s[..max])
2122     }
2123 }
2124
2125 #[inline(never)]
2126 #[cold]
2127 fn slice_error_fail(s: &str, begin: usize, end: usize) -> ! {
2128     const MAX_DISPLAY_LENGTH: usize = 256;
2129     let (truncated, s_trunc) = truncate_to_char_boundary(s, MAX_DISPLAY_LENGTH);
2130     let ellipsis = if truncated { "[...]" } else { "" };
2131
2132     // 1. out of bounds
2133     if begin > s.len() || end > s.len() {
2134         let oob_index = if begin > s.len() { begin } else { end };
2135         panic!("byte index {} is out of bounds of `{}`{}", oob_index, s_trunc, ellipsis);
2136     }
2137
2138     // 2. begin <= end
2139     assert!(begin <= end, "begin <= end ({} <= {}) when slicing `{}`{}",
2140             begin, end, s_trunc, ellipsis);
2141
2142     // 3. character boundary
2143     let index = if !s.is_char_boundary(begin) { begin } else { end };
2144     // find the character
2145     let mut char_start = index;
2146     while !s.is_char_boundary(char_start) {
2147         char_start -= 1;
2148     }
2149     // `char_start` must be less than len and a char boundary
2150     let ch = s[char_start..].chars().next().unwrap();
2151     let char_range = char_start .. char_start + ch.len_utf8();
2152     panic!("byte index {} is not a char boundary; it is inside {:?} (bytes {:?}) of `{}`{}",
2153            index, ch, char_range, s_trunc, ellipsis);
2154 }
2155
2156 #[stable(feature = "core", since = "1.6.0")]
2157 impl StrExt for str {
2158     #[inline]
2159     fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
2160         pat.is_contained_in(self)
2161     }
2162
2163     #[inline]
2164     fn chars(&self) -> Chars {
2165         Chars{iter: self.as_bytes().iter()}
2166     }
2167
2168     #[inline]
2169     fn bytes(&self) -> Bytes {
2170         Bytes(self.as_bytes().iter().cloned())
2171     }
2172
2173     #[inline]
2174     fn char_indices(&self) -> CharIndices {
2175         CharIndices { front_offset: 0, iter: self.chars() }
2176     }
2177
2178     #[inline]
2179     fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P> {
2180         Split(SplitInternal {
2181             start: 0,
2182             end: self.len(),
2183             matcher: pat.into_searcher(self),
2184             allow_trailing_empty: true,
2185             finished: false,
2186         })
2187     }
2188
2189     #[inline]
2190     fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P>
2191         where P::Searcher: ReverseSearcher<'a>
2192     {
2193         RSplit(self.split(pat).0)
2194     }
2195
2196     #[inline]
2197     fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P> {
2198         SplitN(SplitNInternal {
2199             iter: self.split(pat).0,
2200             count: count,
2201         })
2202     }
2203
2204     #[inline]
2205     fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P>
2206         where P::Searcher: ReverseSearcher<'a>
2207     {
2208         RSplitN(self.splitn(count, pat).0)
2209     }
2210
2211     #[inline]
2212     fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P> {
2213         SplitTerminator(SplitInternal {
2214             allow_trailing_empty: false,
2215             ..self.split(pat).0
2216         })
2217     }
2218
2219     #[inline]
2220     fn rsplit_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplitTerminator<'a, P>
2221         where P::Searcher: ReverseSearcher<'a>
2222     {
2223         RSplitTerminator(self.split_terminator(pat).0)
2224     }
2225
2226     #[inline]
2227     fn matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> Matches<'a, P> {
2228         Matches(MatchesInternal(pat.into_searcher(self)))
2229     }
2230
2231     #[inline]
2232     fn rmatches<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatches<'a, P>
2233         where P::Searcher: ReverseSearcher<'a>
2234     {
2235         RMatches(self.matches(pat).0)
2236     }
2237
2238     #[inline]
2239     fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P> {
2240         MatchIndices(MatchIndicesInternal(pat.into_searcher(self)))
2241     }
2242
2243     #[inline]
2244     fn rmatch_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatchIndices<'a, P>
2245         where P::Searcher: ReverseSearcher<'a>
2246     {
2247         RMatchIndices(self.match_indices(pat).0)
2248     }
2249     #[inline]
2250     fn lines(&self) -> Lines {
2251         Lines(self.split_terminator('\n').map(LinesAnyMap))
2252     }
2253
2254     #[inline]
2255     #[allow(deprecated)]
2256     fn lines_any(&self) -> LinesAny {
2257         LinesAny(self.lines())
2258     }
2259
2260     #[inline]
2261     fn get<I: SliceIndex<str>>(&self, i: I) -> Option<&I::Output> {
2262         i.get(self)
2263     }
2264
2265     #[inline]
2266     fn get_mut<I: SliceIndex<str>>(&mut self, i: I) -> Option<&mut I::Output> {
2267         i.get_mut(self)
2268     }
2269
2270     #[inline]
2271     unsafe fn get_unchecked<I: SliceIndex<str>>(&self, i: I) -> &I::Output {
2272         i.get_unchecked(self)
2273     }
2274
2275     #[inline]
2276     unsafe fn get_unchecked_mut<I: SliceIndex<str>>(&mut self, i: I) -> &mut I::Output {
2277         i.get_unchecked_mut(self)
2278     }
2279
2280     #[inline]
2281     unsafe fn slice_unchecked(&self, begin: usize, end: usize) -> &str {
2282         (begin..end).get_unchecked(self)
2283     }
2284
2285     #[inline]
2286     unsafe fn slice_mut_unchecked(&mut self, begin: usize, end: usize) -> &mut str {
2287         (begin..end).get_unchecked_mut(self)
2288     }
2289
2290     #[inline]
2291     fn starts_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
2292         pat.is_prefix_of(self)
2293     }
2294
2295     #[inline]
2296     fn ends_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool
2297         where P::Searcher: ReverseSearcher<'a>
2298     {
2299         pat.is_suffix_of(self)
2300     }
2301
2302     #[inline]
2303     fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
2304         where P::Searcher: DoubleEndedSearcher<'a>
2305     {
2306         let mut i = 0;
2307         let mut j = 0;
2308         let mut matcher = pat.into_searcher(self);
2309         if let Some((a, b)) = matcher.next_reject() {
2310             i = a;
2311             j = b; // Remember earliest known match, correct it below if
2312                    // last match is different
2313         }
2314         if let Some((_, b)) = matcher.next_reject_back() {
2315             j = b;
2316         }
2317         unsafe {
2318             // Searcher is known to return valid indices
2319             self.slice_unchecked(i, j)
2320         }
2321     }
2322
2323     #[inline]
2324     fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str {
2325         let mut i = self.len();
2326         let mut matcher = pat.into_searcher(self);
2327         if let Some((a, _)) = matcher.next_reject() {
2328             i = a;
2329         }
2330         unsafe {
2331             // Searcher is known to return valid indices
2332             self.slice_unchecked(i, self.len())
2333         }
2334     }
2335
2336     #[inline]
2337     fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
2338         where P::Searcher: ReverseSearcher<'a>
2339     {
2340         let mut j = 0;
2341         let mut matcher = pat.into_searcher(self);
2342         if let Some((_, b)) = matcher.next_reject_back() {
2343             j = b;
2344         }
2345         unsafe {
2346             // Searcher is known to return valid indices
2347             self.slice_unchecked(0, j)
2348         }
2349     }
2350
2351     #[inline]
2352     fn is_char_boundary(&self, index: usize) -> bool {
2353         // 0 and len are always ok.
2354         // Test for 0 explicitly so that it can optimize out the check
2355         // easily and skip reading string data for that case.
2356         if index == 0 || index == self.len() { return true; }
2357         match self.as_bytes().get(index) {
2358             None => false,
2359             // This is bit magic equivalent to: b < 128 || b >= 192
2360             Some(&b) => (b as i8) >= -0x40,
2361         }
2362     }
2363
2364     #[inline]
2365     fn as_bytes(&self) -> &[u8] {
2366         unsafe { mem::transmute(self) }
2367     }
2368
2369     fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> {
2370         pat.into_searcher(self).next_match().map(|(i, _)| i)
2371     }
2372
2373     fn rfind<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>
2374         where P::Searcher: ReverseSearcher<'a>
2375     {
2376         pat.into_searcher(self).next_match_back().map(|(i, _)| i)
2377     }
2378
2379     fn find_str<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> {
2380         self.find(pat)
2381     }
2382
2383     #[inline]
2384     fn split_at(&self, mid: usize) -> (&str, &str) {
2385         // is_char_boundary checks that the index is in [0, .len()]
2386         if self.is_char_boundary(mid) {
2387             unsafe {
2388                 (self.slice_unchecked(0, mid),
2389                  self.slice_unchecked(mid, self.len()))
2390             }
2391         } else {
2392             slice_error_fail(self, 0, mid)
2393         }
2394     }
2395
2396     fn split_at_mut(&mut self, mid: usize) -> (&mut str, &mut str) {
2397         // is_char_boundary checks that the index is in [0, .len()]
2398         if self.is_char_boundary(mid) {
2399             let len = self.len();
2400             let ptr = self.as_ptr() as *mut u8;
2401             unsafe {
2402                 (from_raw_parts_mut(ptr, mid),
2403                  from_raw_parts_mut(ptr.offset(mid as isize), len - mid))
2404             }
2405         } else {
2406             slice_error_fail(self, 0, mid)
2407         }
2408     }
2409
2410     #[inline]
2411     fn as_ptr(&self) -> *const u8 {
2412         self as *const str as *const u8
2413     }
2414
2415     #[inline]
2416     fn len(&self) -> usize {
2417         self.as_bytes().len()
2418     }
2419
2420     #[inline]
2421     fn is_empty(&self) -> bool { self.len() == 0 }
2422
2423     #[inline]
2424     fn parse<'a, T>(&'a self) -> Result<T, T::Error> where T: TryFrom<&'a str> {
2425         T::try_from(self)
2426     }
2427 }
2428
2429 #[stable(feature = "rust1", since = "1.0.0")]
2430 impl AsRef<[u8]> for str {
2431     #[inline]
2432     fn as_ref(&self) -> &[u8] {
2433         self.as_bytes()
2434     }
2435 }
2436
2437 #[stable(feature = "rust1", since = "1.0.0")]
2438 impl<'a> Default for &'a str {
2439     /// Creates an empty str
2440     fn default() -> &'a str { "" }
2441 }