src/libcore/str/mod.rs

   1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10
  11 //! String manipulation
  12 //!
  13 //! For more details, see std::str
  14
  15 #![doc(primitive = "str")]
  16 #![stable(feature = "rust1", since = "1.0.0")]
  17
  18 use self::pattern::Pattern;
  19 use self::pattern::{Searcher, ReverseSearcher, DoubleEndedSearcher};
  20
  21 use char::CharExt;
  22 use clone::Clone;
  23 use cmp::Eq;
  24 use convert::AsRef;
  25 use default::Default;
  26 use fmt;
  27 use iter::ExactSizeIterator;
  28 use iter::{Map, Iterator, DoubleEndedIterator};
  29 use mem;
  30 use ops::{Fn, FnMut, FnOnce};
  31 use option::Option::{self, None, Some};
  32 use raw::{Repr, Slice};
  33 use result::Result::{self, Ok, Err};
  34 use slice::{self, SliceExt};
  35
  36 pub mod pattern;
  37
  38 /// A trait to abstract the idea of creating a new instance of a type from a
  39 /// string.
  40 #[stable(feature = "rust1", since = "1.0.0")]
  41 pub trait FromStr {
  42     /// The associated error which can be returned from parsing.
  43     #[stable(feature = "rust1", since = "1.0.0")]
  44     type Err;
  45
  46     /// Parses a string `s` to return a value of this type.
  47     ///
  48     /// If parsing succeeds, return the value inside `Ok`, otherwise
  49     /// when the string is ill-formatted return an error specific to the
  50     /// inside `Err`. The error type is specific to implementation of the trait.
  51     #[stable(feature = "rust1", since = "1.0.0")]
  52     fn from_str(s: &str) -> Result<Self, Self::Err>;
  53 }
  54
  55 #[stable(feature = "rust1", since = "1.0.0")]
  56 impl FromStr for bool {
  57     type Err = ParseBoolError;
  58
  59     /// Parse a `bool` from a string.
  60     ///
  61     /// Yields a `Result<bool, ParseBoolError>`, because `s` may or may not
  62     /// actually be parseable.
  63     ///
  64     /// # Examples
  65     ///
  66     /// ```
  67     /// use std::str::FromStr;
  68     ///
  69     /// assert_eq!(FromStr::from_str("true"), Ok(true));
  70     /// assert_eq!(FromStr::from_str("false"), Ok(false));
  71     /// assert!(<bool as FromStr>::from_str("not even a boolean").is_err());
  72     /// ```
  73     ///
  74     /// Note, in many cases, the `.parse()` method on `str` is more proper.
  75     ///
  76     /// ```
  77     /// assert_eq!("true".parse(), Ok(true));
  78     /// assert_eq!("false".parse(), Ok(false));
  79     /// assert!("not even a boolean".parse::<bool>().is_err());
  80     /// ```
  81     #[inline]
  82     fn from_str(s: &str) -> Result<bool, ParseBoolError> {
  83         match s {
  84             "true"  => Ok(true),
  85             "false" => Ok(false),
  86             _       => Err(ParseBoolError { _priv: () }),
  87         }
  88     }
  89 }
  90
  91 /// An error returned when parsing a `bool` from a string fails.
  92 #[derive(Debug, Clone, PartialEq)]
  93 #[stable(feature = "rust1", since = "1.0.0")]
  94 pub struct ParseBoolError { _priv: () }
  95
  96 #[stable(feature = "rust1", since = "1.0.0")]
  97 impl fmt::Display for ParseBoolError {
  98     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
  99         "provided string was not `true` or `false`".fmt(f)
 100     }
 101 }
 102
 103 /*
 104 Section: Creating a string
 105 */
 106
 107 /// Errors which can occur when attempting to interpret a byte slice as a `str`.
 108 #[derive(Copy, Eq, PartialEq, Clone, Debug)]
 109 #[stable(feature = "rust1", since = "1.0.0")]
 110 pub struct Utf8Error {
 111     valid_up_to: usize,
 112 }
 113
 114 impl Utf8Error {
 115     /// Returns the index in the given string up to which valid UTF-8 was
 116     /// verified.
 117     ///
 118     /// Starting at the index provided, but not necessarily at it precisely, an
 119     /// invalid UTF-8 encoding sequence was found.
 120     #[unstable(feature = "utf8_error", reason = "method just added")]
 121     pub fn valid_up_to(&self) -> usize { self.valid_up_to }
 122 }
 123
 124 /// Converts a slice of bytes to a string slice without performing any
 125 /// allocations.
 126 ///
 127 /// Once the slice has been validated as utf-8, it is transmuted in-place and
 128 /// returned as a '&str' instead of a '&[u8]'
 129 ///
 130 /// # Failure
 131 ///
 132 /// Returns `Err` if the slice is not utf-8 with a description as to why the
 133 /// provided slice is not utf-8.
 134 #[stable(feature = "rust1", since = "1.0.0")]
 135 pub fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
 136     try!(run_utf8_validation_iterator(&mut v.iter()));
 137     Ok(unsafe { from_utf8_unchecked(v) })
 138 }
 139
 140 /// Converts a slice of bytes to a string slice without checking
 141 /// that the string contains valid UTF-8.
 142 #[inline(always)]
 143 #[stable(feature = "rust1", since = "1.0.0")]
 144 pub unsafe fn from_utf8_unchecked<'a>(v: &'a [u8]) -> &'a str {
 145     mem::transmute(v)
 146 }
 147
 148 #[stable(feature = "rust1", since = "1.0.0")]
 149 impl fmt::Display for Utf8Error {
 150     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 151         write!(f, "invalid utf-8: invalid byte near index {}", self.valid_up_to)
 152     }
 153 }
 154
 155 /*
 156 Section: Iterators
 157 */
 158
 159 /// Iterator for the char (representing *Unicode Scalar Values*) of a string
 160 ///
 161 /// Created with the method `.chars()`.
 162 #[derive(Clone)]
 163 #[stable(feature = "rust1", since = "1.0.0")]
 164 pub struct Chars<'a> {
 165     iter: slice::Iter<'a, u8>
 166 }
 167
 168 /// Return the initial codepoint accumulator for the first byte.
 169 /// The first byte is special, only want bottom 5 bits for width 2, 4 bits
 170 /// for width 3, and 3 bits for width 4.
 171 #[inline]
 172 fn utf8_first_byte(byte: u8, width: u32) -> u32 { (byte & (0x7F >> width)) as u32 }
 173
 174 /// Return the value of `ch` updated with continuation byte `byte`.
 175 #[inline]
 176 fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 { (ch << 6) | (byte & CONT_MASK) as u32 }
 177
 178 /// Checks whether the byte is a UTF-8 continuation byte (i.e. starts with the
 179 /// bits `10`).
 180 #[inline]
 181 fn utf8_is_cont_byte(byte: u8) -> bool { (byte & !CONT_MASK) == TAG_CONT_U8 }
 182
 183 #[inline]
 184 fn unwrap_or_0(opt: Option<&u8>) -> u8 {
 185     match opt {
 186         Some(&byte) => byte,
 187         None => 0,
 188     }
 189 }
 190
 191 /// Reads the next code point out of a byte iterator (assuming a
 192 /// UTF-8-like encoding).
 193 #[unstable(feature = "str_internals")]
 194 #[inline]
 195 pub fn next_code_point(bytes: &mut slice::Iter<u8>) -> Option<u32> {
 196     // Decode UTF-8
 197     let x = match bytes.next() {
 198         None => return None,
 199         Some(&next_byte) if next_byte < 128 => return Some(next_byte as u32),
 200         Some(&next_byte) => next_byte,
 201     };
 202
 203     // Multibyte case follows
 204     // Decode from a byte combination out of: [[[x y] z] w]
 205     // NOTE: Performance is sensitive to the exact formulation here
 206     let init = utf8_first_byte(x, 2);
 207     let y = unwrap_or_0(bytes.next());
 208     let mut ch = utf8_acc_cont_byte(init, y);
 209     if x >= 0xE0 {
 210         // [[x y z] w] case
 211         // 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid
 212         let z = unwrap_or_0(bytes.next());
 213         let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z);
 214         ch = init << 12 | y_z;
 215         if x >= 0xF0 {
 216             // [x y z w] case
 217             // use only the lower 3 bits of `init`
 218             let w = unwrap_or_0(bytes.next());
 219             ch = (init & 7) << 18 | utf8_acc_cont_byte(y_z, w);
 220         }
 221     }
 222
 223     Some(ch)
 224 }
 225
 226 /// Reads the last code point out of a byte iterator (assuming a
 227 /// UTF-8-like encoding).
 228 #[inline]
 229 fn next_code_point_reverse(bytes: &mut slice::Iter<u8>) -> Option<u32> {
 230     // Decode UTF-8
 231     let w = match bytes.next_back() {
 232         None => return None,
 233         Some(&next_byte) if next_byte < 128 => return Some(next_byte as u32),
 234         Some(&back_byte) => back_byte,
 235     };
 236
 237     // Multibyte case follows
 238     // Decode from a byte combination out of: [x [y [z w]]]
 239     let mut ch;
 240     let z = unwrap_or_0(bytes.next_back());
 241     ch = utf8_first_byte(z, 2);
 242     if utf8_is_cont_byte(z) {
 243         let y = unwrap_or_0(bytes.next_back());
 244         ch = utf8_first_byte(y, 3);
 245         if utf8_is_cont_byte(y) {
 246             let x = unwrap_or_0(bytes.next_back());
 247             ch = utf8_first_byte(x, 4);
 248             ch = utf8_acc_cont_byte(ch, y);
 249         }
 250         ch = utf8_acc_cont_byte(ch, z);
 251     }
 252     ch = utf8_acc_cont_byte(ch, w);
 253
 254     Some(ch)
 255 }
 256
 257 #[stable(feature = "rust1", since = "1.0.0")]
 258 impl<'a> Iterator for Chars<'a> {
 259     type Item = char;
 260
 261     #[inline]
 262     fn next(&mut self) -> Option<char> {
 263         next_code_point(&mut self.iter).map(|ch| {
 264             // str invariant says `ch` is a valid Unicode Scalar Value
 265             unsafe {
 266                 mem::transmute(ch)
 267             }
 268         })
 269     }
 270
 271     #[inline]
 272     fn size_hint(&self) -> (usize, Option<usize>) {
 273         let (len, _) = self.iter.size_hint();
 274         // `(len + 3)` can't overflow, because we know that the `slice::Iter`
 275         // belongs to a slice in memory which has a maximum length of
 276         // `isize::MAX` (that's well below `usize::MAX`).
 277         ((len + 3) / 4, Some(len))
 278     }
 279 }
 280
 281 #[stable(feature = "rust1", since = "1.0.0")]
 282 impl<'a> DoubleEndedIterator for Chars<'a> {
 283     #[inline]
 284     fn next_back(&mut self) -> Option<char> {
 285         next_code_point_reverse(&mut self.iter).map(|ch| {
 286             // str invariant says `ch` is a valid Unicode Scalar Value
 287             unsafe {
 288                 mem::transmute(ch)
 289             }
 290         })
 291     }
 292 }
 293
 294 /// Iterator for a string's characters and their byte offsets.
 295 #[derive(Clone)]
 296 #[stable(feature = "rust1", since = "1.0.0")]
 297 pub struct CharIndices<'a> {
 298     front_offset: usize,
 299     iter: Chars<'a>,
 300 }
 301
 302 #[stable(feature = "rust1", since = "1.0.0")]
 303 impl<'a> Iterator for CharIndices<'a> {
 304     type Item = (usize, char);
 305
 306     #[inline]
 307     fn next(&mut self) -> Option<(usize, char)> {
 308         let (pre_len, _) = self.iter.iter.size_hint();
 309         match self.iter.next() {
 310             None => None,
 311             Some(ch) => {
 312                 let index = self.front_offset;
 313                 let (len, _) = self.iter.iter.size_hint();
 314                 self.front_offset += pre_len - len;
 315                 Some((index, ch))
 316             }
 317         }
 318     }
 319
 320     #[inline]
 321     fn size_hint(&self) -> (usize, Option<usize>) {
 322         self.iter.size_hint()
 323     }
 324 }
 325
 326 #[stable(feature = "rust1", since = "1.0.0")]
 327 impl<'a> DoubleEndedIterator for CharIndices<'a> {
 328     #[inline]
 329     fn next_back(&mut self) -> Option<(usize, char)> {
 330         match self.iter.next_back() {
 331             None => None,
 332             Some(ch) => {
 333                 let (len, _) = self.iter.iter.size_hint();
 334                 let index = self.front_offset + len;
 335                 Some((index, ch))
 336             }
 337         }
 338     }
 339 }
 340
 341 /// External iterator for a string's bytes.
 342 /// Use with the `std::iter` module.
 343 ///
 344 /// Created with the method `.bytes()`.
 345 #[stable(feature = "rust1", since = "1.0.0")]
 346 #[derive(Clone)]
 347 pub struct Bytes<'a>(Map<slice::Iter<'a, u8>, BytesDeref>);
 348
 349 /// A nameable, clonable fn type
 350 #[derive(Clone)]
 351 struct BytesDeref;
 352
 353 impl<'a> Fn<(&'a u8,)> for BytesDeref {
 354     #[inline]
 355     extern "rust-call" fn call(&self, (ptr,): (&'a u8,)) -> u8 {
 356         *ptr
 357     }
 358 }
 359
 360 impl<'a> FnMut<(&'a u8,)> for BytesDeref {
 361     #[inline]
 362     extern "rust-call" fn call_mut(&mut self, (ptr,): (&'a u8,)) -> u8 {
 363         Fn::call(&*self, (ptr,))
 364     }
 365 }
 366
 367 impl<'a> FnOnce<(&'a u8,)> for BytesDeref {
 368     type Output = u8;
 369
 370     #[inline]
 371     extern "rust-call" fn call_once(self, (ptr,): (&'a u8,)) -> u8 {
 372         Fn::call(&self, (ptr,))
 373     }
 374 }
 375
 376 #[stable(feature = "rust1", since = "1.0.0")]
 377 impl<'a> Iterator for Bytes<'a> {
 378     type Item = u8;
 379
 380     #[inline]
 381     fn next(&mut self) -> Option<u8> {
 382         self.0.next()
 383     }
 384
 385     #[inline]
 386     fn size_hint(&self) -> (usize, Option<usize>) {
 387         self.0.size_hint()
 388     }
 389 }
 390
 391 #[stable(feature = "rust1", since = "1.0.0")]
 392 impl<'a> DoubleEndedIterator for Bytes<'a> {
 393     #[inline]
 394     fn next_back(&mut self) -> Option<u8> {
 395         self.0.next_back()
 396     }
 397 }
 398
 399 #[stable(feature = "rust1", since = "1.0.0")]
 400 impl<'a> ExactSizeIterator for Bytes<'a> {
 401     #[inline]
 402     fn len(&self) -> usize {
 403         self.0.len()
 404     }
 405 }
 406
 407 /// This macro generates a Clone impl for string pattern API
 408 /// wrapper types of the form X<'a, P>
 409 macro_rules! derive_pattern_clone {
 410     (clone $t:ident with |$s:ident| $e:expr) => {
 411         impl<'a, P: Pattern<'a>> Clone for $t<'a, P>
 412             where P::Searcher: Clone
 413         {
 414             fn clone(&self) -> Self {
 415                 let $s = self;
 416                 $e
 417             }
 418         }
 419     }
 420 }
 421
 422 /// This macro generates two public iterator structs
 423 /// wrapping an private internal one that makes use of the `Pattern` API.
 424 ///
 425 /// For all patterns `P: Pattern<'a>` the following items will be
 426 /// generated (generics omitted):
 427 ///
 428 /// struct $forward_iterator($internal_iterator);
 429 /// struct $reverse_iterator($internal_iterator);
 430 ///
 431 /// impl Iterator for $forward_iterator
 432 /// { /* internal ends up calling Searcher::next_match() */ }
 433 ///
 434 /// impl DoubleEndedIterator for $forward_iterator
 435 ///       where P::Searcher: DoubleEndedSearcher
 436 /// { /* internal ends up calling Searcher::next_match_back() */ }
 437 ///
 438 /// impl Iterator for $reverse_iterator
 439 ///       where P::Searcher: ReverseSearcher
 440 /// { /* internal ends up calling Searcher::next_match_back() */ }
 441 ///
 442 /// impl DoubleEndedIterator for $reverse_iterator
 443 ///       where P::Searcher: DoubleEndedSearcher
 444 /// { /* internal ends up calling Searcher::next_match() */ }
 445 ///
 446 /// The internal one is defined outside the macro, and has almost the same
 447 /// semantic as a DoubleEndedIterator by delegating to `pattern::Searcher` and
 448 /// `pattern::ReverseSearcher` for both forward and reverse iteration.
 449 ///
 450 /// "Almost", because a `Searcher` and a `ReverseSearcher` for a given
 451 /// `Pattern` might not return the same elements, so actually implementing
 452 /// `DoubleEndedIterator` for it would be incorrect.
 453 /// (See the docs in `str::pattern` for more details)
 454 ///
 455 /// However, the internal struct still represents a single ended iterator from
 456 /// either end, and depending on pattern is also a valid double ended iterator,
 457 /// so the two wrapper structs implement `Iterator`
 458 /// and `DoubleEndedIterator` depending on the concrete pattern type, leading
 459 /// to the complex impls seen above.
 460 macro_rules! generate_pattern_iterators {
 461     {
 462         // Forward iterator
 463         forward:
 464             $(#[$forward_iterator_attribute:meta])*
 465             struct $forward_iterator:ident;
 466
 467         // Reverse iterator
 468         reverse:
 469             $(#[$reverse_iterator_attribute:meta])*
 470             struct $reverse_iterator:ident;
 471
 472         // Stability of all generated items
 473         stability:
 474             $(#[$common_stability_attribute:meta])*
 475
 476         // Internal almost-iterator that is being delegated to
 477         internal:
 478             $internal_iterator:ident yielding ($iterty:ty);
 479
 480         // Kind of delgation - either single ended or double ended
 481         delegate $($t:tt)*
 482     } => {
 483         $(#[$forward_iterator_attribute])*
 484         $(#[$common_stability_attribute])*
 485         pub struct $forward_iterator<'a, P: Pattern<'a>>($internal_iterator<'a, P>);
 486
 487         $(#[$common_stability_attribute])*
 488         impl<'a, P: Pattern<'a>> Iterator for $forward_iterator<'a, P> {
 489             type Item = $iterty;
 490
 491             #[inline]
 492             fn next(&mut self) -> Option<$iterty> {
 493                 self.0.next()
 494             }
 495         }
 496
 497         $(#[$common_stability_attribute])*
 498         impl<'a, P: Pattern<'a>> Clone for $forward_iterator<'a, P>
 499             where P::Searcher: Clone
 500         {
 501             fn clone(&self) -> Self {
 502                 $forward_iterator(self.0.clone())
 503             }
 504         }
 505
 506         $(#[$reverse_iterator_attribute])*
 507         $(#[$common_stability_attribute])*
 508         pub struct $reverse_iterator<'a, P: Pattern<'a>>($internal_iterator<'a, P>);
 509
 510         $(#[$common_stability_attribute])*
 511         impl<'a, P: Pattern<'a>> Iterator for $reverse_iterator<'a, P>
 512             where P::Searcher: ReverseSearcher<'a>
 513         {
 514             type Item = $iterty;
 515
 516             #[inline]
 517             fn next(&mut self) -> Option<$iterty> {
 518                 self.0.next_back()
 519             }
 520         }
 521
 522         $(#[$common_stability_attribute])*
 523         impl<'a, P: Pattern<'a>> Clone for $reverse_iterator<'a, P>
 524             where P::Searcher: Clone
 525         {
 526             fn clone(&self) -> Self {
 527                 $reverse_iterator(self.0.clone())
 528             }
 529         }
 530
 531         generate_pattern_iterators!($($t)* with $(#[$common_stability_attribute])*,
 532                                                 $forward_iterator,
 533                                                 $reverse_iterator, $iterty);
 534     };
 535     {
 536         double ended; with $(#[$common_stability_attribute:meta])*,
 537                            $forward_iterator:ident,
 538                            $reverse_iterator:ident, $iterty:ty
 539     } => {
 540         $(#[$common_stability_attribute])*
 541         impl<'a, P: Pattern<'a>> DoubleEndedIterator for $forward_iterator<'a, P>
 542             where P::Searcher: DoubleEndedSearcher<'a>
 543         {
 544             #[inline]
 545             fn next_back(&mut self) -> Option<$iterty> {
 546                 self.0.next_back()
 547             }
 548         }
 549
 550         $(#[$common_stability_attribute])*
 551         impl<'a, P: Pattern<'a>> DoubleEndedIterator for $reverse_iterator<'a, P>
 552             where P::Searcher: DoubleEndedSearcher<'a>
 553         {
 554             #[inline]
 555             fn next_back(&mut self) -> Option<$iterty> {
 556                 self.0.next()
 557             }
 558         }
 559     };
 560     {
 561         single ended; with $(#[$common_stability_attribute:meta])*,
 562                            $forward_iterator:ident,
 563                            $reverse_iterator:ident, $iterty:ty
 564     } => {}
 565 }
 566
 567 derive_pattern_clone!{
 568     clone SplitInternal
 569     with |s| SplitInternal { matcher: s.matcher.clone(), ..*s }
 570 }
 571 struct SplitInternal<'a, P: Pattern<'a>> {
 572     start: usize,
 573     end: usize,
 574     matcher: P::Searcher,
 575     allow_trailing_empty: bool,
 576     finished: bool,
 577 }
 578
 579 impl<'a, P: Pattern<'a>> SplitInternal<'a, P> {
 580     #[inline]
 581     fn get_end(&mut self) -> Option<&'a str> {
 582         if !self.finished && (self.allow_trailing_empty || self.end - self.start > 0) {
 583             self.finished = true;
 584             unsafe {
 585                 let string = self.matcher.haystack().slice_unchecked(self.start, self.end);
 586                 Some(string)
 587             }
 588         } else {
 589             None
 590         }
 591     }
 592
 593     #[inline]
 594     fn next(&mut self) -> Option<&'a str> {
 595         if self.finished { return None }
 596
 597         let haystack = self.matcher.haystack();
 598         match self.matcher.next_match() {
 599             Some((a, b)) => unsafe {
 600                 let elt = haystack.slice_unchecked(self.start, a);
 601                 self.start = b;
 602                 Some(elt)
 603             },
 604             None => self.get_end(),
 605         }
 606     }
 607
 608     #[inline]
 609     fn next_back(&mut self) -> Option<&'a str>
 610         where P::Searcher: ReverseSearcher<'a>
 611     {
 612         if self.finished { return None }
 613
 614         if !self.allow_trailing_empty {
 615             self.allow_trailing_empty = true;
 616             match self.next_back() {
 617                 Some(elt) if !elt.is_empty() => return Some(elt),
 618                 _ => if self.finished { return None }
 619             }
 620         }
 621
 622         let haystack = self.matcher.haystack();
 623         match self.matcher.next_match_back() {
 624             Some((a, b)) => unsafe {
 625                 let elt = haystack.slice_unchecked(b, self.end);
 626                 self.end = a;
 627                 Some(elt)
 628             },
 629             None => unsafe {
 630                 self.finished = true;
 631                 Some(haystack.slice_unchecked(self.start, self.end))
 632             },
 633         }
 634     }
 635 }
 636
 637 generate_pattern_iterators! {
 638     forward:
 639         #[doc="Created with the method `.split()`."]
 640         struct Split;
 641     reverse:
 642         #[doc="Created with the method `.rsplit()`."]
 643         struct RSplit;
 644     stability:
 645         #[stable(feature = "rust1", since = "1.0.0")]
 646     internal:
 647         SplitInternal yielding (&'a str);
 648     delegate double ended;
 649 }
 650
 651 generate_pattern_iterators! {
 652     forward:
 653         #[doc="Created with the method `.split_terminator()`."]
 654         struct SplitTerminator;
 655     reverse:
 656         #[doc="Created with the method `.rsplit_terminator()`."]
 657         struct RSplitTerminator;
 658     stability:
 659         #[stable(feature = "rust1", since = "1.0.0")]
 660     internal:
 661         SplitInternal yielding (&'a str);
 662     delegate double ended;
 663 }
 664
 665 derive_pattern_clone!{
 666     clone SplitNInternal
 667     with |s| SplitNInternal { iter: s.iter.clone(), ..*s }
 668 }
 669 struct SplitNInternal<'a, P: Pattern<'a>> {
 670     iter: SplitInternal<'a, P>,
 671     /// The number of splits remaining
 672     count: usize,
 673 }
 674
 675 impl<'a, P: Pattern<'a>> SplitNInternal<'a, P> {
 676     #[inline]
 677     fn next(&mut self) -> Option<&'a str> {
 678         match self.count {
 679             0 => None,
 680             1 => { self.count = 0; self.iter.get_end() }
 681             _ => { self.count -= 1; self.iter.next() }
 682         }
 683     }
 684
 685     #[inline]
 686     fn next_back(&mut self) -> Option<&'a str>
 687         where P::Searcher: ReverseSearcher<'a>
 688     {
 689         match self.count {
 690             0 => None,
 691             1 => { self.count = 0; self.iter.get_end() }
 692             _ => { self.count -= 1; self.iter.next_back() }
 693         }
 694     }
 695 }
 696
 697 generate_pattern_iterators! {
 698     forward:
 699         #[doc="Created with the method `.splitn()`."]
 700         struct SplitN;
 701     reverse:
 702         #[doc="Created with the method `.rsplitn()`."]
 703         struct RSplitN;
 704     stability:
 705         #[stable(feature = "rust1", since = "1.0.0")]
 706     internal:
 707         SplitNInternal yielding (&'a str);
 708     delegate single ended;
 709 }
 710
 711 derive_pattern_clone!{
 712     clone MatchIndicesInternal
 713     with |s| MatchIndicesInternal(s.0.clone())
 714 }
 715 struct MatchIndicesInternal<'a, P: Pattern<'a>>(P::Searcher);
 716
 717 impl<'a, P: Pattern<'a>> MatchIndicesInternal<'a, P> {
 718     #[inline]
 719     fn next(&mut self) -> Option<(usize, usize)> {
 720         self.0.next_match()
 721     }
 722
 723     #[inline]
 724     fn next_back(&mut self) -> Option<(usize, usize)>
 725         where P::Searcher: ReverseSearcher<'a>
 726     {
 727         self.0.next_match_back()
 728     }
 729 }
 730
 731 generate_pattern_iterators! {
 732     forward:
 733         #[doc="Created with the method `.match_indices()`."]
 734         struct MatchIndices;
 735     reverse:
 736         #[doc="Created with the method `.rmatch_indices()`."]
 737         struct RMatchIndices;
 738     stability:
 739         #[unstable(feature = "str_match_indices",
 740                    reason = "type may be removed or have its iterator impl changed")]
 741     internal:
 742         MatchIndicesInternal yielding ((usize, usize));
 743     delegate double ended;
 744 }
 745
 746 derive_pattern_clone!{
 747     clone MatchesInternal
 748     with |s| MatchesInternal(s.0.clone())
 749 }
 750 struct MatchesInternal<'a, P: Pattern<'a>>(P::Searcher);
 751
 752 impl<'a, P: Pattern<'a>> MatchesInternal<'a, P> {
 753     #[inline]
 754     fn next(&mut self) -> Option<&'a str> {
 755         self.0.next_match().map(|(a, b)| unsafe {
 756             // Indices are known to be on utf8 boundaries
 757             self.0.haystack().slice_unchecked(a, b)
 758         })
 759     }
 760
 761     #[inline]
 762     fn next_back(&mut self) -> Option<&'a str>
 763         where P::Searcher: ReverseSearcher<'a>
 764     {
 765         self.0.next_match_back().map(|(a, b)| unsafe {
 766             // Indices are known to be on utf8 boundaries
 767             self.0.haystack().slice_unchecked(a, b)
 768         })
 769     }
 770 }
 771
 772 generate_pattern_iterators! {
 773     forward:
 774         #[doc="Created with the method `.matches()`."]
 775         struct Matches;
 776     reverse:
 777         #[doc="Created with the method `.rmatches()`."]
 778         struct RMatches;
 779     stability:
 780         #[stable(feature = "str_matches", since = "1.2.0")]
 781     internal:
 782         MatchesInternal yielding (&'a str);
 783     delegate double ended;
 784 }
 785
 786 /// Created with the method `.lines()`.
 787 #[stable(feature = "rust1", since = "1.0.0")]
 788 #[derive(Clone)]
 789 pub struct Lines<'a>(SplitTerminator<'a, char>);
 790
 791 #[stable(feature = "rust1", since = "1.0.0")]
 792 impl<'a> Iterator for Lines<'a> {
 793     type Item = &'a str;
 794
 795     #[inline]
 796     fn next(&mut self) -> Option<&'a str> {
 797         self.0.next()
 798     }
 799
 800     #[inline]
 801     fn size_hint(&self) -> (usize, Option<usize>) {
 802         self.0.size_hint()
 803     }
 804 }
 805
 806 #[stable(feature = "rust1", since = "1.0.0")]
 807 impl<'a> DoubleEndedIterator for Lines<'a> {
 808     #[inline]
 809     fn next_back(&mut self) -> Option<&'a str> {
 810         self.0.next_back()
 811     }
 812 }
 813
 814 /// Created with the method `.lines_any()`.
 815 #[stable(feature = "rust1", since = "1.0.0")]
 816 #[derive(Clone)]
 817 pub struct LinesAny<'a>(Map<Lines<'a>, LinesAnyMap>);
 818
 819 /// A nameable, clonable fn type
 820 #[derive(Clone)]
 821 struct LinesAnyMap;
 822
 823 impl<'a> Fn<(&'a str,)> for LinesAnyMap {
 824     #[inline]
 825     extern "rust-call" fn call(&self, (line,): (&'a str,)) -> &'a str {
 826         let l = line.len();
 827         if l > 0 && line.as_bytes()[l - 1] == b'\r' { &line[0 .. l - 1] }
 828         else { line }
 829     }
 830 }
 831
 832 impl<'a> FnMut<(&'a str,)> for LinesAnyMap {
 833     #[inline]
 834     extern "rust-call" fn call_mut(&mut self, (line,): (&'a str,)) -> &'a str {
 835         Fn::call(&*self, (line,))
 836     }
 837 }
 838
 839 impl<'a> FnOnce<(&'a str,)> for LinesAnyMap {
 840     type Output = &'a str;
 841
 842     #[inline]
 843     extern "rust-call" fn call_once(self, (line,): (&'a str,)) -> &'a str {
 844         Fn::call(&self, (line,))
 845     }
 846 }
 847
 848 #[stable(feature = "rust1", since = "1.0.0")]
 849 impl<'a> Iterator for LinesAny<'a> {
 850     type Item = &'a str;
 851
 852     #[inline]
 853     fn next(&mut self) -> Option<&'a str> {
 854         self.0.next()
 855     }
 856
 857     #[inline]
 858     fn size_hint(&self) -> (usize, Option<usize>) {
 859         self.0.size_hint()
 860     }
 861 }
 862
 863 #[stable(feature = "rust1", since = "1.0.0")]
 864 impl<'a> DoubleEndedIterator for LinesAny<'a> {
 865     #[inline]
 866     fn next_back(&mut self) -> Option<&'a str> {
 867         self.0.next_back()
 868     }
 869 }
 870
 871 /*
 872 Section: Comparing strings
 873 */
 874
 875 // share the implementation of the lang-item vs. non-lang-item
 876 // eq_slice.
 877 /// NOTE: This function is (ab)used in rustc::middle::trans::_match
 878 /// to compare &[u8] byte slices that are not necessarily valid UTF-8.
 879 #[inline]
 880 fn eq_slice_(a: &str, b: &str) -> bool {
 881     // NOTE: In theory n should be libc::size_t and not usize, but libc is not available here
 882     #[allow(improper_ctypes)]
 883     extern { fn memcmp(s1: *const i8, s2: *const i8, n: usize) -> i32; }
 884     a.len() == b.len() && unsafe {
 885         memcmp(a.as_ptr() as *const i8,
 886                b.as_ptr() as *const i8,
 887                a.len()) == 0
 888     }
 889 }
 890
 891 /// Bytewise slice equality
 892 /// NOTE: This function is (ab)used in rustc::middle::trans::_match
 893 /// to compare &[u8] byte slices that are not necessarily valid UTF-8.
 894 #[lang = "str_eq"]
 895 #[inline]
 896 fn eq_slice(a: &str, b: &str) -> bool {
 897     eq_slice_(a, b)
 898 }
 899
 900 /*
 901 Section: Misc
 902 */
 903
 904 /// Walk through `iter` checking that it's a valid UTF-8 sequence,
 905 /// returning `true` in that case, or, if it is invalid, `false` with
 906 /// `iter` reset such that it is pointing at the first byte in the
 907 /// invalid sequence.
 908 #[inline(always)]
 909 fn run_utf8_validation_iterator(iter: &mut slice::Iter<u8>)
 910                                 -> Result<(), Utf8Error> {
 911     let whole = iter.as_slice();
 912     loop {
 913         // save the current thing we're pointing at.
 914         let old = iter.clone();
 915
 916         // restore the iterator we had at the start of this codepoint.
 917         macro_rules! err { () => {{
 918             *iter = old.clone();
 919             return Err(Utf8Error {
 920                 valid_up_to: whole.len() - iter.as_slice().len()
 921             })
 922         }}}
 923
 924         macro_rules! next { () => {
 925             match iter.next() {
 926                 Some(a) => *a,
 927                 // we needed data, but there was none: error!
 928                 None => err!(),
 929             }
 930         }}
 931
 932         let first = match iter.next() {
 933             Some(&b) => b,
 934             // we're at the end of the iterator and a codepoint
 935             // boundary at the same time, so this string is valid.
 936             None => return Ok(())
 937         };
 938
 939         // ASCII characters are always valid, so only large
 940         // bytes need more examination.
 941         if first >= 128 {
 942             let w = UTF8_CHAR_WIDTH[first as usize];
 943             let second = next!();
 944             // 2-byte encoding is for codepoints  \u{0080} to  \u{07ff}
 945             //        first  C2 80        last DF BF
 946             // 3-byte encoding is for codepoints  \u{0800} to  \u{ffff}
 947             //        first  E0 A0 80     last EF BF BF
 948             //   excluding surrogates codepoints  \u{d800} to  \u{dfff}
 949             //               ED A0 80 to       ED BF BF
 950             // 4-byte encoding is for codepoints \u{1000}0 to \u{10ff}ff
 951             //        first  F0 90 80 80  last F4 8F BF BF
 952             //
 953             // Use the UTF-8 syntax from the RFC
 954             //
 955             // https://tools.ietf.org/html/rfc3629
 956             // UTF8-1      = %x00-7F
 957             // UTF8-2      = %xC2-DF UTF8-tail
 958             // UTF8-3      = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
 959             //               %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
 960             // UTF8-4      = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
 961             //               %xF4 %x80-8F 2( UTF8-tail )
 962             match w {
 963                 2 => if second & !CONT_MASK != TAG_CONT_U8 {err!()},
 964                 3 => {
 965                     match (first, second, next!() & !CONT_MASK) {
 966                         (0xE0         , 0xA0 ... 0xBF, TAG_CONT_U8) |
 967                         (0xE1 ... 0xEC, 0x80 ... 0xBF, TAG_CONT_U8) |
 968                         (0xED         , 0x80 ... 0x9F, TAG_CONT_U8) |
 969                         (0xEE ... 0xEF, 0x80 ... 0xBF, TAG_CONT_U8) => {}
 970                         _ => err!()
 971                     }
 972                 }
 973                 4 => {
 974                     match (first, second, next!() & !CONT_MASK, next!() & !CONT_MASK) {
 975                         (0xF0         , 0x90 ... 0xBF, TAG_CONT_U8, TAG_CONT_U8) |
 976                         (0xF1 ... 0xF3, 0x80 ... 0xBF, TAG_CONT_U8, TAG_CONT_U8) |
 977                         (0xF4         , 0x80 ... 0x8F, TAG_CONT_U8, TAG_CONT_U8) => {}
 978                         _ => err!()
 979                     }
 980                 }
 981                 _ => err!()
 982             }
 983         }
 984     }
 985 }
 986
 987 // https://tools.ietf.org/html/rfc3629
 988 static UTF8_CHAR_WIDTH: [u8; 256] = [
 989 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 990 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x1F
 991 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 992 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x3F
 993 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 994 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x5F
 995 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 996 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x7F
 997 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 998 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x9F
 999 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1000 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xBF
1001 0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
1002 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xDF
1003 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, // 0xEF
1004 4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // 0xFF
1005 ];
1006
1007 /// Struct that contains a `char` and the index of the first byte of
1008 /// the next `char` in a string.  This can be used as a data structure
1009 /// for iterating over the UTF-8 bytes of a string.
1010 #[derive(Copy, Clone)]
1011 #[unstable(feature = "str_char",
1012            reason = "existence of this struct is uncertain as it is frequently \
1013                      able to be replaced with char.len_utf8() and/or \
1014                      char/char_indices iterators")]
1015 pub struct CharRange {
1016     /// Current `char`
1017     pub ch: char,
1018     /// Index of the first byte of the next `char`
1019     pub next: usize,
1020 }
1021
1022 /// Mask of the value bits of a continuation byte
1023 const CONT_MASK: u8 = 0b0011_1111;
1024 /// Value of the tag bits (tag mask is !CONT_MASK) of a continuation byte
1025 const TAG_CONT_U8: u8 = 0b1000_0000;
1026
1027 /*
1028 Section: Trait implementations
1029 */
1030
1031 mod traits {
1032     use cmp::{Ordering, Ord, PartialEq, PartialOrd, Eq};
1033     use cmp::Ordering::{Less, Equal, Greater};
1034     use iter::Iterator;
1035     use option::Option;
1036     use option::Option::Some;
1037     use ops;
1038     use str::{StrExt, eq_slice};
1039
1040     #[stable(feature = "rust1", since = "1.0.0")]
1041     impl Ord for str {
1042         #[inline]
1043         fn cmp(&self, other: &str) -> Ordering {
1044             for (s_b, o_b) in self.bytes().zip(other.bytes()) {
1045                 match s_b.cmp(&o_b) {
1046                     Greater => return Greater,
1047                     Less => return Less,
1048                     Equal => ()
1049                 }
1050             }
1051
1052             self.len().cmp(&other.len())
1053         }
1054     }
1055
1056     #[stable(feature = "rust1", since = "1.0.0")]
1057     impl PartialEq for str {
1058         #[inline]
1059         fn eq(&self, other: &str) -> bool {
1060             eq_slice(self, other)
1061         }
1062         #[inline]
1063         fn ne(&self, other: &str) -> bool { !(*self).eq(other) }
1064     }
1065
1066     #[stable(feature = "rust1", since = "1.0.0")]
1067     impl Eq for str {}
1068
1069     #[stable(feature = "rust1", since = "1.0.0")]
1070     impl PartialOrd for str {
1071         #[inline]
1072         fn partial_cmp(&self, other: &str) -> Option<Ordering> {
1073             Some(self.cmp(other))
1074         }
1075     }
1076
1077     /// Returns a slice of the given string from the byte range
1078     /// [`begin`..`end`).
1079     ///
1080     /// This operation is `O(1)`.
1081     ///
1082     /// Panics when `begin` and `end` do not point to valid characters
1083     /// or point beyond the last character of the string.
1084     ///
1085     /// # Examples
1086     ///
1087     /// ```
1088     /// let s = "Löwe 老虎 Léopard";
1089     /// assert_eq!(&s[0 .. 1], "L");
1090     ///
1091     /// assert_eq!(&s[1 .. 9], "öwe 老");
1092     ///
1093     /// // these will panic:
1094     /// // byte 2 lies within `ö`:
1095     /// // &s[2 ..3];
1096     ///
1097     /// // byte 8 lies within `老`
1098     /// // &s[1 .. 8];
1099     ///
1100     /// // byte 100 is outside the string
1101     /// // &s[3 .. 100];
1102     /// ```
1103     #[stable(feature = "rust1", since = "1.0.0")]
1104     impl ops::Index<ops::Range<usize>> for str {
1105         type Output = str;
1106         #[inline]
1107         fn index(&self, index: ops::Range<usize>) -> &str {
1108             // is_char_boundary checks that the index is in [0, .len()]
1109             if index.start <= index.end &&
1110                self.is_char_boundary(index.start) &&
1111                self.is_char_boundary(index.end) {
1112                 unsafe { self.slice_unchecked(index.start, index.end) }
1113             } else {
1114                 super::slice_error_fail(self, index.start, index.end)
1115             }
1116         }
1117     }
1118
1119     /// Returns a mutable slice of the given string from the byte range
1120     /// [`begin`..`end`).
1121     #[stable(feature = "derefmut_for_string", since = "1.2.0")]
1122     impl ops::IndexMut<ops::Range<usize>> for str {
1123         #[inline]
1124         fn index_mut(&mut self, index: ops::Range<usize>) -> &mut str {
1125             // is_char_boundary checks that the index is in [0, .len()]
1126             if index.start <= index.end &&
1127                self.is_char_boundary(index.start) &&
1128                self.is_char_boundary(index.end) {
1129                 unsafe { self.slice_mut_unchecked(index.start, index.end) }
1130             } else {
1131                 super::slice_error_fail(self, index.start, index.end)
1132             }
1133         }
1134     }
1135
1136     /// Returns a slice of the string from the beginning to byte
1137     /// `end`.
1138     ///
1139     /// Equivalent to `self[0 .. end]`.
1140     ///
1141     /// Panics when `end` does not point to a valid character, or is
1142     /// out of bounds.
1143     #[stable(feature = "rust1", since = "1.0.0")]
1144     impl ops::Index<ops::RangeTo<usize>> for str {
1145         type Output = str;
1146
1147         #[inline]
1148         fn index(&self, index: ops::RangeTo<usize>) -> &str {
1149             // is_char_boundary checks that the index is in [0, .len()]
1150             if self.is_char_boundary(index.end) {
1151                 unsafe { self.slice_unchecked(0, index.end) }
1152             } else {
1153                 super::slice_error_fail(self, 0, index.end)
1154             }
1155         }
1156     }
1157
1158     /// Returns a mutable slice of the string from the beginning to byte
1159     /// `end`.
1160     #[stable(feature = "derefmut_for_string", since = "1.2.0")]
1161     impl ops::IndexMut<ops::RangeTo<usize>> for str {
1162         #[inline]
1163         fn index_mut(&mut self, index: ops::RangeTo<usize>) -> &mut str {
1164             // is_char_boundary checks that the index is in [0, .len()]
1165             if self.is_char_boundary(index.end) {
1166                 unsafe { self.slice_mut_unchecked(0, index.end) }
1167             } else {
1168                 super::slice_error_fail(self, 0, index.end)
1169             }
1170         }
1171     }
1172
1173     /// Returns a slice of the string from `begin` to its end.
1174     ///
1175     /// Equivalent to `self[begin .. self.len()]`.
1176     ///
1177     /// Panics when `begin` does not point to a valid character, or is
1178     /// out of bounds.
1179     #[stable(feature = "rust1", since = "1.0.0")]
1180     impl ops::Index<ops::RangeFrom<usize>> for str {
1181         type Output = str;
1182
1183         #[inline]
1184         fn index(&self, index: ops::RangeFrom<usize>) -> &str {
1185             // is_char_boundary checks that the index is in [0, .len()]
1186             if self.is_char_boundary(index.start) {
1187                 unsafe { self.slice_unchecked(index.start, self.len()) }
1188             } else {
1189                 super::slice_error_fail(self, index.start, self.len())
1190             }
1191         }
1192     }
1193
1194     /// Returns a slice of the string from `begin` to its end.
1195     #[stable(feature = "derefmut_for_string", since = "1.2.0")]
1196     impl ops::IndexMut<ops::RangeFrom<usize>> for str {
1197         #[inline]
1198         fn index_mut(&mut self, index: ops::RangeFrom<usize>) -> &mut str {
1199             // is_char_boundary checks that the index is in [0, .len()]
1200             if self.is_char_boundary(index.start) {
1201                 let len = self.len();
1202                 unsafe { self.slice_mut_unchecked(index.start, len) }
1203             } else {
1204                 super::slice_error_fail(self, index.start, self.len())
1205             }
1206         }
1207     }
1208
1209     #[stable(feature = "rust1", since = "1.0.0")]
1210     impl ops::Index<ops::RangeFull> for str {
1211         type Output = str;
1212
1213         #[inline]
1214         fn index(&self, _index: ops::RangeFull) -> &str {
1215             self
1216         }
1217     }
1218
1219     #[stable(feature = "derefmut_for_string", since = "1.2.0")]
1220     impl ops::IndexMut<ops::RangeFull> for str {
1221         #[inline]
1222         fn index_mut(&mut self, _index: ops::RangeFull) -> &mut str {
1223             self
1224         }
1225     }
1226 }
1227
1228 /// Methods for string slices
1229 #[allow(missing_docs)]
1230 #[doc(hidden)]
1231 #[unstable(feature = "core_str_ext",
1232            reason = "stable interface provided by `impl str` in later crates")]
1233 pub trait StrExt {
1234     // NB there are no docs here are they're all located on the StrExt trait in
1235     // libcollections, not here.
1236
1237     fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool;
1238     fn contains_char<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool;
1239     fn chars<'a>(&'a self) -> Chars<'a>;
1240     fn bytes<'a>(&'a self) -> Bytes<'a>;
1241     fn char_indices<'a>(&'a self) -> CharIndices<'a>;
1242     fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P>;
1243     fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P>
1244         where P::Searcher: ReverseSearcher<'a>;
1245     fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P>;
1246     fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P>
1247         where P::Searcher: ReverseSearcher<'a>;
1248     fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P>;
1249     fn rsplit_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplitTerminator<'a, P>
1250         where P::Searcher: ReverseSearcher<'a>;
1251     fn matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> Matches<'a, P>;
1252     fn rmatches<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatches<'a, P>
1253         where P::Searcher: ReverseSearcher<'a>;
1254     fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P>;
1255     fn rmatch_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatchIndices<'a, P>
1256         where P::Searcher: ReverseSearcher<'a>;
1257     fn lines<'a>(&'a self) -> Lines<'a>;
1258     fn lines_any<'a>(&'a self) -> LinesAny<'a>;
1259     fn char_len(&self) -> usize;
1260     fn slice_chars<'a>(&'a self, begin: usize, end: usize) -> &'a str;
1261     unsafe fn slice_unchecked<'a>(&'a self, begin: usize, end: usize) -> &'a str;
1262     unsafe fn slice_mut_unchecked<'a>(&'a mut self, begin: usize, end: usize) -> &'a mut str;
1263     fn starts_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool;
1264     fn ends_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool
1265         where P::Searcher: ReverseSearcher<'a>;
1266     fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
1267         where P::Searcher: DoubleEndedSearcher<'a>;
1268     fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str;
1269     fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
1270         where P::Searcher: ReverseSearcher<'a>;
1271     fn is_char_boundary(&self, index: usize) -> bool;
1272     fn char_range_at(&self, start: usize) -> CharRange;
1273     fn char_range_at_reverse(&self, start: usize) -> CharRange;
1274     fn char_at(&self, i: usize) -> char;
1275     fn char_at_reverse(&self, i: usize) -> char;
1276     fn as_bytes<'a>(&'a self) -> &'a [u8];
1277     fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>;
1278     fn rfind<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>
1279         where P::Searcher: ReverseSearcher<'a>;
1280     fn find_str<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>;
1281     fn split_at(&self, mid: usize) -> (&str, &str);
1282     fn split_at_mut(&mut self, mid: usize) -> (&mut str, &mut str);
1283     fn slice_shift_char<'a>(&'a self) -> Option<(char, &'a str)>;
1284     fn subslice_offset(&self, inner: &str) -> usize;
1285     fn as_ptr(&self) -> *const u8;
1286     fn len(&self) -> usize;
1287     fn is_empty(&self) -> bool;
1288     fn parse<T: FromStr>(&self) -> Result<T, T::Err>;
1289 }
1290
1291 #[inline(never)]
1292 fn slice_error_fail(s: &str, begin: usize, end: usize) -> ! {
1293     assert!(begin <= end);
1294     panic!("index {} and/or {} in `{}` do not lie on character boundary",
1295           begin, end, s);
1296 }
1297
1298 impl StrExt for str {
1299     #[inline]
1300     fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
1301         pat.is_contained_in(self)
1302     }
1303
1304     #[inline]
1305     fn contains_char<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
1306         pat.is_contained_in(self)
1307     }
1308
1309     #[inline]
1310     fn chars(&self) -> Chars {
1311         Chars{iter: self.as_bytes().iter()}
1312     }
1313
1314     #[inline]
1315     fn bytes(&self) -> Bytes {
1316         Bytes(self.as_bytes().iter().map(BytesDeref))
1317     }
1318
1319     #[inline]
1320     fn char_indices(&self) -> CharIndices {
1321         CharIndices { front_offset: 0, iter: self.chars() }
1322     }
1323
1324     #[inline]
1325     fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P> {
1326         Split(SplitInternal {
1327             start: 0,
1328             end: self.len(),
1329             matcher: pat.into_searcher(self),
1330             allow_trailing_empty: true,
1331             finished: false,
1332         })
1333     }
1334
1335     #[inline]
1336     fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P>
1337         where P::Searcher: ReverseSearcher<'a>
1338     {
1339         RSplit(self.split(pat).0)
1340     }
1341
1342     #[inline]
1343     fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P> {
1344         SplitN(SplitNInternal {
1345             iter: self.split(pat).0,
1346             count: count,
1347         })
1348     }
1349
1350     #[inline]
1351     fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P>
1352         where P::Searcher: ReverseSearcher<'a>
1353     {
1354         RSplitN(self.splitn(count, pat).0)
1355     }
1356
1357     #[inline]
1358     fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P> {
1359         SplitTerminator(SplitInternal {
1360             allow_trailing_empty: false,
1361             ..self.split(pat).0
1362         })
1363     }
1364
1365     #[inline]
1366     fn rsplit_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplitTerminator<'a, P>
1367         where P::Searcher: ReverseSearcher<'a>
1368     {
1369         RSplitTerminator(self.split_terminator(pat).0)
1370     }
1371
1372     #[inline]
1373     fn matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> Matches<'a, P> {
1374         Matches(MatchesInternal(pat.into_searcher(self)))
1375     }
1376
1377     #[inline]
1378     fn rmatches<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatches<'a, P>
1379         where P::Searcher: ReverseSearcher<'a>
1380     {
1381         RMatches(self.matches(pat).0)
1382     }
1383
1384     #[inline]
1385     fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P> {
1386         MatchIndices(MatchIndicesInternal(pat.into_searcher(self)))
1387     }
1388
1389     #[inline]
1390     fn rmatch_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatchIndices<'a, P>
1391         where P::Searcher: ReverseSearcher<'a>
1392     {
1393         RMatchIndices(self.match_indices(pat).0)
1394     }
1395     #[inline]
1396     fn lines(&self) -> Lines {
1397         Lines(self.split_terminator('\n'))
1398     }
1399
1400     #[inline]
1401     fn lines_any(&self) -> LinesAny {
1402         LinesAny(self.lines().map(LinesAnyMap))
1403     }
1404
1405     #[inline]
1406     fn char_len(&self) -> usize { self.chars().count() }
1407
1408     fn slice_chars(&self, begin: usize, end: usize) -> &str {
1409         assert!(begin <= end);
1410         let mut count = 0;
1411         let mut begin_byte = None;
1412         let mut end_byte = None;
1413
1414         // This could be even more efficient by not decoding,
1415         // only finding the char boundaries
1416         for (idx, _) in self.char_indices() {
1417             if count == begin { begin_byte = Some(idx); }
1418             if count == end { end_byte = Some(idx); break; }
1419             count += 1;
1420         }
1421         if begin_byte.is_none() && count == begin { begin_byte = Some(self.len()) }
1422         if end_byte.is_none() && count == end { end_byte = Some(self.len()) }
1423
1424         match (begin_byte, end_byte) {
1425             (None, _) => panic!("slice_chars: `begin` is beyond end of string"),
1426             (_, None) => panic!("slice_chars: `end` is beyond end of string"),
1427             (Some(a), Some(b)) => unsafe { self.slice_unchecked(a, b) }
1428         }
1429     }
1430
1431     #[inline]
1432     unsafe fn slice_unchecked(&self, begin: usize, end: usize) -> &str {
1433         mem::transmute(Slice {
1434             data: self.as_ptr().offset(begin as isize),
1435             len: end - begin,
1436         })
1437     }
1438
1439     #[inline]
1440     unsafe fn slice_mut_unchecked(&mut self, begin: usize, end: usize) -> &mut str {
1441         mem::transmute(Slice {
1442             data: self.as_ptr().offset(begin as isize),
1443             len: end - begin,
1444         })
1445     }
1446
1447     #[inline]
1448     fn starts_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
1449         pat.is_prefix_of(self)
1450     }
1451
1452     #[inline]
1453     fn ends_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool
1454         where P::Searcher: ReverseSearcher<'a>
1455     {
1456         pat.is_suffix_of(self)
1457     }
1458
1459     #[inline]
1460     fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
1461         where P::Searcher: DoubleEndedSearcher<'a>
1462     {
1463         let mut i = 0;
1464         let mut j = 0;
1465         let mut matcher = pat.into_searcher(self);
1466         if let Some((a, b)) = matcher.next_reject() {
1467             i = a;
1468             j = b; // Rember earliest known match, correct it below if
1469                    // last match is different
1470         }
1471         if let Some((_, b)) = matcher.next_reject_back() {
1472             j = b;
1473         }
1474         unsafe {
1475             // Searcher is known to return valid indices
1476             self.slice_unchecked(i, j)
1477         }
1478     }
1479
1480     #[inline]
1481     fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str {
1482         let mut i = self.len();
1483         let mut matcher = pat.into_searcher(self);
1484         if let Some((a, _)) = matcher.next_reject() {
1485             i = a;
1486         }
1487         unsafe {
1488             // Searcher is known to return valid indices
1489             self.slice_unchecked(i, self.len())
1490         }
1491     }
1492
1493     #[inline]
1494     fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
1495         where P::Searcher: ReverseSearcher<'a>
1496     {
1497         let mut j = 0;
1498         let mut matcher = pat.into_searcher(self);
1499         if let Some((_, b)) = matcher.next_reject_back() {
1500             j = b;
1501         }
1502         unsafe {
1503             // Searcher is known to return valid indices
1504             self.slice_unchecked(0, j)
1505         }
1506     }
1507
1508     #[inline]
1509     fn is_char_boundary(&self, index: usize) -> bool {
1510         if index == self.len() { return true; }
1511         match self.as_bytes().get(index) {
1512             None => false,
1513             Some(&b) => b < 128 || b >= 192,
1514         }
1515     }
1516
1517     #[inline]
1518     fn char_range_at(&self, i: usize) -> CharRange {
1519         let (c, n) = char_range_at_raw(self.as_bytes(), i);
1520         CharRange { ch: unsafe { mem::transmute(c) }, next: n }
1521     }
1522
1523     #[inline]
1524     fn char_range_at_reverse(&self, start: usize) -> CharRange {
1525         let mut prev = start;
1526
1527         prev = prev.saturating_sub(1);
1528         if self.as_bytes()[prev] < 128 {
1529             return CharRange{ch: self.as_bytes()[prev] as char, next: prev}
1530         }
1531
1532         // Multibyte case is a fn to allow char_range_at_reverse to inline cleanly
1533         fn multibyte_char_range_at_reverse(s: &str, mut i: usize) -> CharRange {
1534             // while there is a previous byte == 10......
1535             while i > 0 && s.as_bytes()[i] & !CONT_MASK == TAG_CONT_U8 {
1536                 i -= 1;
1537             }
1538
1539             let first= s.as_bytes()[i];
1540             let w = UTF8_CHAR_WIDTH[first as usize];
1541             assert!(w != 0);
1542
1543             let mut val = utf8_first_byte(first, w as u32);
1544             val = utf8_acc_cont_byte(val, s.as_bytes()[i + 1]);
1545             if w > 2 { val = utf8_acc_cont_byte(val, s.as_bytes()[i + 2]); }
1546             if w > 3 { val = utf8_acc_cont_byte(val, s.as_bytes()[i + 3]); }
1547
1548             return CharRange {ch: unsafe { mem::transmute(val) }, next: i};
1549         }
1550
1551         return multibyte_char_range_at_reverse(self, prev);
1552     }
1553
1554     #[inline]
1555     fn char_at(&self, i: usize) -> char {
1556         self.char_range_at(i).ch
1557     }
1558
1559     #[inline]
1560     fn char_at_reverse(&self, i: usize) -> char {
1561         self.char_range_at_reverse(i).ch
1562     }
1563
1564     #[inline]
1565     fn as_bytes(&self) -> &[u8] {
1566         unsafe { mem::transmute(self) }
1567     }
1568
1569     fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> {
1570         pat.into_searcher(self).next_match().map(|(i, _)| i)
1571     }
1572
1573     fn rfind<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>
1574         where P::Searcher: ReverseSearcher<'a>
1575     {
1576         pat.into_searcher(self).next_match_back().map(|(i, _)| i)
1577     }
1578
1579     fn find_str<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> {
1580         self.find(pat)
1581     }
1582
1583     fn split_at(&self, mid: usize) -> (&str, &str) {
1584         // is_char_boundary checks that the index is in [0, .len()]
1585         if self.is_char_boundary(mid) {
1586             unsafe {
1587                 (self.slice_unchecked(0, mid),
1588                  self.slice_unchecked(mid, self.len()))
1589             }
1590         } else {
1591             slice_error_fail(self, 0, mid)
1592         }
1593     }
1594
1595     fn split_at_mut(&mut self, mid: usize) -> (&mut str, &mut str) {
1596         // is_char_boundary checks that the index is in [0, .len()]
1597         if self.is_char_boundary(mid) {
1598             let len = self.len();
1599             unsafe {
1600                 let self2: &mut str = mem::transmute_copy(&self);
1601                 (self.slice_mut_unchecked(0, mid),
1602                  self2.slice_mut_unchecked(mid, len))
1603             }
1604         } else {
1605             slice_error_fail(self, 0, mid)
1606         }
1607     }
1608
1609     #[inline]
1610     fn slice_shift_char(&self) -> Option<(char, &str)> {
1611         if self.is_empty() {
1612             None
1613         } else {
1614             let ch = self.char_at(0);
1615             let next_s = unsafe { self.slice_unchecked(ch.len_utf8(), self.len()) };
1616             Some((ch, next_s))
1617         }
1618     }
1619
1620     fn subslice_offset(&self, inner: &str) -> usize {
1621         let a_start = self.as_ptr() as usize;
1622         let a_end = a_start + self.len();
1623         let b_start = inner.as_ptr() as usize;
1624         let b_end = b_start + inner.len();
1625
1626         assert!(a_start <= b_start);
1627         assert!(b_end <= a_end);
1628         b_start - a_start
1629     }
1630
1631     #[inline]
1632     fn as_ptr(&self) -> *const u8 {
1633         self.repr().data
1634     }
1635
1636     #[inline]
1637     fn len(&self) -> usize { self.repr().len }
1638
1639     #[inline]
1640     fn is_empty(&self) -> bool { self.len() == 0 }
1641
1642     #[inline]
1643     fn parse<T: FromStr>(&self) -> Result<T, T::Err> { FromStr::from_str(self) }
1644 }
1645
1646 #[stable(feature = "rust1", since = "1.0.0")]
1647 impl AsRef<[u8]> for str {
1648     #[inline]
1649     fn as_ref(&self) -> &[u8] {
1650         self.as_bytes()
1651     }
1652 }
1653
1654 /// Pluck a code point out of a UTF-8-like byte slice and return the
1655 /// index of the next code point.
1656 #[inline]
1657 fn char_range_at_raw(bytes: &[u8], i: usize) -> (u32, usize) {
1658     if bytes[i] < 128 {
1659         return (bytes[i] as u32, i + 1);
1660     }
1661
1662     // Multibyte case is a fn to allow char_range_at to inline cleanly
1663     fn multibyte_char_range_at(bytes: &[u8], i: usize) -> (u32, usize) {
1664         let first = bytes[i];
1665         let w = UTF8_CHAR_WIDTH[first as usize];
1666         assert!(w != 0);
1667
1668         let mut val = utf8_first_byte(first, w as u32);
1669         val = utf8_acc_cont_byte(val, bytes[i + 1]);
1670         if w > 2 { val = utf8_acc_cont_byte(val, bytes[i + 2]); }
1671         if w > 3 { val = utf8_acc_cont_byte(val, bytes[i + 3]); }
1672
1673         return (val, i + w as usize);
1674     }
1675
1676     multibyte_char_range_at(bytes, i)
1677 }
1678
1679 #[stable(feature = "rust1", since = "1.0.0")]
1680 impl<'a> Default for &'a str {
1681     #[stable(feature = "rust1", since = "1.0.0")]
1682     fn default() -> &'a str { "" }
1683 }