src/libcollections/str.rs

   1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10
  11 //! Unicode string manipulation (the `str` type).
  12 //!
  13 //! Rust's `str` type is one of the core primitive types of the language. `&str`
  14 //! is the borrowed string type. This type of string can only be created from
  15 //! other strings, unless it is a `&'static str` (see below). It is not possible
  16 //! to move out of borrowed strings because they are owned elsewhere.
  17 //!
  18 //! # Examples
  19 //!
  20 //! Here's some code that uses a `&str`:
  21 //!
  22 //! ```
  23 //! let s = "Hello, world.";
  24 //! ```
  25 //!
  26 //! This `&str` is a `&'static str`, which is the type of string literals.
  27 //! They're `'static` because literals are available for the entire lifetime of
  28 //! the program.
  29 //!
  30 //! You can get a non-`'static` `&str` by taking a slice of a `String`:
  31 //!
  32 //! ```
  33 //! let some_string = "Hello, world.".to_string();
  34 //! let s = &some_string;
  35 //! ```
  36 //!
  37 //! # Representation
  38 //!
  39 //! Rust's string type, `str`, is a sequence of Unicode scalar values encoded as
  40 //! a stream of UTF-8 bytes. All [strings](../../reference.html#literals) are
  41 //! guaranteed to be validly encoded UTF-8 sequences. Additionally, strings are
  42 //! not null-terminated and can thus contain null bytes.
  43 //!
  44 //! The actual representation of `str`s have direct mappings to slices: `&str`
  45 //! is the same as `&[u8]`.
  46
  47 #![doc(primitive = "str")]
  48 #![stable(feature = "rust1", since = "1.0.0")]
  49
  50 // Many of the usings in this module are only used in the test configuration.
  51 // It's cleaner to just turn off the unused_imports warning than to fix them.
  52 #![allow(unused_imports)]
  53
  54 use self::RecompositionState::*;
  55 use self::DecompositionType::*;
  56
  57 use core::clone::Clone;
  58 use core::iter::{Iterator, Extend};
  59 use core::option::Option::{self, Some, None};
  60 use core::result::Result;
  61 use core::str as core_str;
  62 use core::str::pattern::Pattern;
  63 use core::str::pattern::{Searcher, ReverseSearcher, DoubleEndedSearcher};
  64 use core::mem;
  65 use rustc_unicode::str::{UnicodeStr, Utf16Encoder};
  66
  67 use vec_deque::VecDeque;
  68 use borrow::{Borrow, ToOwned};
  69 use string::String;
  70 use rustc_unicode;
  71 use vec::Vec;
  72 use slice::SliceConcatExt;
  73 use boxed::Box;
  74
  75 pub use core::str::{FromStr, Utf8Error};
  76 pub use core::str::{Lines, LinesAny, CharRange};
  77 pub use core::str::{Split, RSplit};
  78 pub use core::str::{SplitN, RSplitN};
  79 pub use core::str::{SplitTerminator, RSplitTerminator};
  80 pub use core::str::{Matches, RMatches};
  81 pub use core::str::{MatchIndices, RMatchIndices};
  82 pub use core::str::{from_utf8, Chars, CharIndices, Bytes};
  83 pub use core::str::{from_utf8_unchecked, ParseBoolError};
  84 pub use rustc_unicode::str::{SplitWhitespace, Words, Graphemes, GraphemeIndices};
  85 pub use core::str::pattern;
  86
  87 impl<S: Borrow<str>> SliceConcatExt<str> for [S] {
  88     type Output = String;
  89
  90     fn concat(&self) -> String {
  91         if self.is_empty() {
  92             return String::new();
  93         }
  94
  95         // `len` calculation may overflow but push_str will check boundaries
  96         let len = self.iter().map(|s| s.borrow().len()).sum();
  97         let mut result = String::with_capacity(len);
  98
  99         for s in self {
 100             result.push_str(s.borrow())
 101         }
 102
 103         result
 104     }
 105
 106     fn connect(&self, sep: &str) -> String {
 107         if self.is_empty() {
 108             return String::new();
 109         }
 110
 111         // concat is faster
 112         if sep.is_empty() {
 113             return self.concat();
 114         }
 115
 116         // this is wrong without the guarantee that `self` is non-empty
 117         // `len` calculation may overflow but push_str but will check boundaries
 118         let len = sep.len() * (self.len() - 1)
 119             + self.iter().map(|s| s.borrow().len()).sum::<usize>();
 120         let mut result = String::with_capacity(len);
 121         let mut first = true;
 122
 123         for s in self {
 124             if first {
 125                 first = false;
 126             } else {
 127                 result.push_str(sep);
 128             }
 129             result.push_str(s.borrow());
 130         }
 131         result
 132     }
 133 }
 134
 135 // Helper functions used for Unicode normalization
 136 fn canonical_sort(comb: &mut [(char, u8)]) {
 137     let len = comb.len();
 138     for i in 0..len {
 139         let mut swapped = false;
 140         for j in 1..len-i {
 141             let class_a = comb[j-1].1;
 142             let class_b = comb[j].1;
 143             if class_a != 0 && class_b != 0 && class_a > class_b {
 144                 comb.swap(j-1, j);
 145                 swapped = true;
 146             }
 147         }
 148         if !swapped { break; }
 149     }
 150 }
 151
 152 #[derive(Clone)]
 153 enum DecompositionType {
 154     Canonical,
 155     Compatible
 156 }
 157
 158 /// External iterator for a string decomposition's characters.
 159 ///
 160 /// For use with the `std::iter` module.
 161 #[allow(deprecated)]
 162 #[deprecated(reason = "use the crates.io `unicode-normalization` library instead",
 163              since = "1.0.0")]
 164 #[derive(Clone)]
 165 #[unstable(feature = "unicode",
 166            reason = "this functionality may be replaced with a more generic \
 167                      unicode crate on crates.io")]
 168 pub struct Decompositions<'a> {
 169     kind: DecompositionType,
 170     iter: Chars<'a>,
 171     buffer: Vec<(char, u8)>,
 172     sorted: bool
 173 }
 174
 175 #[allow(deprecated)]
 176 #[stable(feature = "rust1", since = "1.0.0")]
 177 impl<'a> Iterator for Decompositions<'a> {
 178     type Item = char;
 179
 180     #[inline]
 181     fn next(&mut self) -> Option<char> {
 182         match self.buffer.first() {
 183             Some(&(c, 0)) => {
 184                 self.sorted = false;
 185                 self.buffer.remove(0);
 186                 return Some(c);
 187             }
 188             Some(&(c, _)) if self.sorted => {
 189                 self.buffer.remove(0);
 190                 return Some(c);
 191             }
 192             _ => self.sorted = false
 193         }
 194
 195         if !self.sorted {
 196             for ch in self.iter.by_ref() {
 197                 let buffer = &mut self.buffer;
 198                 let sorted = &mut self.sorted;
 199                 {
 200                     let callback = |d| {
 201                         let class =
 202                             rustc_unicode::char::canonical_combining_class(d);
 203                         if class == 0 && !*sorted {
 204                             canonical_sort(buffer);
 205                             *sorted = true;
 206                         }
 207                         buffer.push((d, class));
 208                     };
 209                     match self.kind {
 210                         Canonical => {
 211                             rustc_unicode::char::decompose_canonical(ch, callback)
 212                         }
 213                         Compatible => {
 214                             rustc_unicode::char::decompose_compatible(ch, callback)
 215                         }
 216                     }
 217                 }
 218                 if *sorted {
 219                     break
 220                 }
 221             }
 222         }
 223
 224         if !self.sorted {
 225             canonical_sort(&mut self.buffer);
 226             self.sorted = true;
 227         }
 228
 229         if self.buffer.is_empty() {
 230             None
 231         } else {
 232             match self.buffer.remove(0) {
 233                 (c, 0) => {
 234                     self.sorted = false;
 235                     Some(c)
 236                 }
 237                 (c, _) => Some(c),
 238             }
 239         }
 240     }
 241
 242     fn size_hint(&self) -> (usize, Option<usize>) {
 243         let (lower, _) = self.iter.size_hint();
 244         (lower, None)
 245     }
 246 }
 247
 248 #[derive(Clone)]
 249 enum RecompositionState {
 250     Composing,
 251     Purging,
 252     Finished
 253 }
 254
 255 /// External iterator for a string recomposition's characters.
 256 ///
 257 /// For use with the `std::iter` module.
 258 #[allow(deprecated)]
 259 #[deprecated(reason = "use the crates.io `unicode-normalization` library instead",
 260              since = "1.0.0")]
 261 #[derive(Clone)]
 262 #[unstable(feature = "unicode",
 263            reason = "this functionality may be replaced with a more generic \
 264                      unicode crate on crates.io")]
 265 pub struct Recompositions<'a> {
 266     iter: Decompositions<'a>,
 267     state: RecompositionState,
 268     buffer: VecDeque<char>,
 269     composee: Option<char>,
 270     last_ccc: Option<u8>
 271 }
 272
 273 #[allow(deprecated)]
 274 #[stable(feature = "rust1", since = "1.0.0")]
 275 impl<'a> Iterator for Recompositions<'a> {
 276     type Item = char;
 277
 278     #[inline]
 279     fn next(&mut self) -> Option<char> {
 280         loop {
 281             match self.state {
 282                 Composing => {
 283                     for ch in self.iter.by_ref() {
 284                         let ch_class = rustc_unicode::char::canonical_combining_class(ch);
 285                         if self.composee.is_none() {
 286                             if ch_class != 0 {
 287                                 return Some(ch);
 288                             }
 289                             self.composee = Some(ch);
 290                             continue;
 291                         }
 292                         let k = self.composee.clone().unwrap();
 293
 294                         match self.last_ccc {
 295                             None => {
 296                                 match rustc_unicode::char::compose(k, ch) {
 297                                     Some(r) => {
 298                                         self.composee = Some(r);
 299                                         continue;
 300                                     }
 301                                     None => {
 302                                         if ch_class == 0 {
 303                                             self.composee = Some(ch);
 304                                             return Some(k);
 305                                         }
 306                                         self.buffer.push_back(ch);
 307                                         self.last_ccc = Some(ch_class);
 308                                     }
 309                                 }
 310                             }
 311                             Some(l_class) => {
 312                                 if l_class >= ch_class {
 313                                     // `ch` is blocked from `composee`
 314                                     if ch_class == 0 {
 315                                         self.composee = Some(ch);
 316                                         self.last_ccc = None;
 317                                         self.state = Purging;
 318                                         return Some(k);
 319                                     }
 320                                     self.buffer.push_back(ch);
 321                                     self.last_ccc = Some(ch_class);
 322                                     continue;
 323                                 }
 324                                 match rustc_unicode::char::compose(k, ch) {
 325                                     Some(r) => {
 326                                         self.composee = Some(r);
 327                                         continue;
 328                                     }
 329                                     None => {
 330                                         self.buffer.push_back(ch);
 331                                         self.last_ccc = Some(ch_class);
 332                                     }
 333                                 }
 334                             }
 335                         }
 336                     }
 337                     self.state = Finished;
 338                     if self.composee.is_some() {
 339                         return self.composee.take();
 340                     }
 341                 }
 342                 Purging => {
 343                     match self.buffer.pop_front() {
 344                         None => self.state = Composing,
 345                         s => return s
 346                     }
 347                 }
 348                 Finished => {
 349                     match self.buffer.pop_front() {
 350                         None => return self.composee.take(),
 351                         s => return s
 352                     }
 353                 }
 354             }
 355         }
 356     }
 357 }
 358
 359 /// External iterator for a string's UTF16 codeunits.
 360 ///
 361 /// For use with the `std::iter` module.
 362 #[derive(Clone)]
 363 #[unstable(feature = "str_utf16")]
 364 pub struct Utf16Units<'a> {
 365     encoder: Utf16Encoder<Chars<'a>>
 366 }
 367
 368 #[stable(feature = "rust1", since = "1.0.0")]
 369 impl<'a> Iterator for Utf16Units<'a> {
 370     type Item = u16;
 371
 372     #[inline]
 373     fn next(&mut self) -> Option<u16> { self.encoder.next() }
 374
 375     #[inline]
 376     fn size_hint(&self) -> (usize, Option<usize>) { self.encoder.size_hint() }
 377 }
 378
 379 // Return the initial codepoint accumulator for the first byte.
 380 // The first byte is special, only want bottom 5 bits for width 2, 4 bits
 381 // for width 3, and 3 bits for width 4
 382 macro_rules! utf8_first_byte {
 383     ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as u32)
 384 }
 385
 386 // return the value of $ch updated with continuation byte $byte
 387 macro_rules! utf8_acc_cont_byte {
 388     ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63) as u32)
 389 }
 390
 391 #[stable(feature = "rust1", since = "1.0.0")]
 392 impl Borrow<str> for String {
 393     #[inline]
 394     fn borrow(&self) -> &str { &self[..] }
 395 }
 396
 397 #[stable(feature = "rust1", since = "1.0.0")]
 398 impl ToOwned for str {
 399     type Owned = String;
 400     fn to_owned(&self) -> String {
 401         unsafe {
 402             String::from_utf8_unchecked(self.as_bytes().to_owned())
 403         }
 404     }
 405 }
 406
 407 /// Any string that can be represented as a slice.
 408 #[lang = "str"]
 409 #[cfg(not(test))]
 410 #[stable(feature = "rust1", since = "1.0.0")]
 411 impl str {
 412     /// Returns the length of `self` in bytes.
 413     ///
 414     /// # Examples
 415     ///
 416     /// ```
 417     /// assert_eq!("foo".len(), 3);
 418     /// assert_eq!("ƒoo".len(), 4); // fancy f!
 419     /// ```
 420     #[stable(feature = "rust1", since = "1.0.0")]
 421     #[inline]
 422     pub fn len(&self) -> usize {
 423         core_str::StrExt::len(self)
 424     }
 425
 426     /// Returns true if this slice has a length of zero bytes.
 427     ///
 428     /// # Examples
 429     ///
 430     /// ```
 431     /// assert!("".is_empty());
 432     /// ```
 433     #[inline]
 434     #[stable(feature = "rust1", since = "1.0.0")]
 435     pub fn is_empty(&self) -> bool {
 436         core_str::StrExt::is_empty(self)
 437     }
 438
 439     /// Returns a string's displayed width in columns.
 440     ///
 441     /// Control characters have zero width.
 442     ///
 443     /// `is_cjk` determines behavior for characters in the Ambiguous category:
 444     /// if `is_cjk` is
 445     /// `true`, these are 2 columns wide; otherwise, they are 1.
 446     /// In CJK locales, `is_cjk` should be
 447     /// `true`, else it should be `false`.
 448     /// [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
 449     /// recommends that these
 450     /// characters be treated as 1 column (i.e., `is_cjk = false`) if the
 451     /// locale is unknown.
 452     #[deprecated(reason = "use the crates.io `unicode-width` library instead",
 453                  since = "1.0.0")]
 454     #[unstable(feature = "unicode",
 455                reason = "this functionality may only be provided by libunicode")]
 456     pub fn width(&self, is_cjk: bool) -> usize {
 457         UnicodeStr::width(self, is_cjk)
 458     }
 459
 460     /// Checks that `index`-th byte lies at the start and/or end of a
 461     /// UTF-8 code point sequence.
 462     ///
 463     /// The start and end of the string (when `index == self.len()`) are
 464     /// considered to be
 465     /// boundaries.
 466     ///
 467     /// Returns `false` if `index` is greater than `self.len()`.
 468     ///
 469     /// # Examples
 470     ///
 471     /// ```
 472     /// # #![feature(str_char)]
 473     /// let s = "Löwe 老虎 Léopard";
 474     /// assert!(s.is_char_boundary(0));
 475     /// // start of `老`
 476     /// assert!(s.is_char_boundary(6));
 477     /// assert!(s.is_char_boundary(s.len()));
 478     ///
 479     /// // second byte of `ö`
 480     /// assert!(!s.is_char_boundary(2));
 481     ///
 482     /// // third byte of `老`
 483     /// assert!(!s.is_char_boundary(8));
 484     /// ```
 485     #[unstable(feature = "str_char",
 486                reason = "it is unclear whether this method pulls its weight \
 487                          with the existence of the char_indices iterator or \
 488                          this method may want to be replaced with checked \
 489                          slicing")]
 490     pub fn is_char_boundary(&self, index: usize) -> bool {
 491         core_str::StrExt::is_char_boundary(self, index)
 492     }
 493
 494     /// Converts `self` to a byte slice.
 495     ///
 496     /// # Examples
 497     ///
 498     /// ```
 499     /// assert_eq!("bors".as_bytes(), b"bors");
 500     /// ```
 501     #[stable(feature = "rust1", since = "1.0.0")]
 502     #[inline(always)]
 503     pub fn as_bytes(&self) -> &[u8] {
 504         core_str::StrExt::as_bytes(self)
 505     }
 506
 507     /// Returns a raw pointer to the `&str`'s buffer.
 508     ///
 509     /// The caller must ensure that the string outlives this pointer, and
 510     /// that it is not
 511     /// reallocated (e.g. by pushing to the string).
 512     ///
 513     /// # Examples
 514     ///
 515     /// ```
 516     /// let s = "Hello";
 517     /// let p = s.as_ptr();
 518     /// ```
 519     #[stable(feature = "rust1", since = "1.0.0")]
 520     #[inline]
 521     pub fn as_ptr(&self) -> *const u8 {
 522         core_str::StrExt::as_ptr(self)
 523     }
 524
 525     /// Takes a bytewise slice from a string.
 526     ///
 527     /// Returns the substring from [`begin`..`end`).
 528     ///
 529     /// # Unsafety
 530     ///
 531     /// Caller must check both UTF-8 character boundaries and the boundaries
 532     /// of the entire slice as
 533     /// well.
 534     ///
 535     /// # Examples
 536     ///
 537     /// ```
 538     /// let s = "Löwe 老虎 Léopard";
 539     ///
 540     /// unsafe {
 541     ///     assert_eq!(s.slice_unchecked(0, 21), "Löwe 老虎 Léopard");
 542     /// }
 543     /// ```
 544     #[stable(feature = "rust1", since = "1.0.0")]
 545     pub unsafe fn slice_unchecked(&self, begin: usize, end: usize) -> &str {
 546         core_str::StrExt::slice_unchecked(self, begin, end)
 547     }
 548
 549     /// Takes a bytewise mutable slice from a string.
 550     ///
 551     /// Same as `slice_unchecked`, but works with `&mut str` instead of `&str`.
 552     #[stable(feature = "derefmut_for_string", since = "1.2.0")]
 553     pub unsafe fn slice_mut_unchecked(&mut self, begin: usize, end: usize) -> &mut str {
 554         core_str::StrExt::slice_mut_unchecked(self, begin, end)
 555     }
 556
 557     /// Returns a slice of the string from the character range [`begin`..`end`).
 558     ///
 559     /// That is, start at the `begin`-th code point of the string and continue
 560     /// to the `end`-th code point. This does not detect or handle edge cases
 561     /// such as leaving a combining character as the first code point of the
 562     /// string.
 563     ///
 564     /// Due to the design of UTF-8, this operation is `O(end)`. Use slicing
 565     /// syntax if you want to use byte indices rather than codepoint indices.
 566     ///
 567     /// # Panics
 568     ///
 569     /// Panics if `begin` > `end` or the either `begin` or `end` are beyond the
 570     /// last character of the string.
 571     ///
 572     /// # Examples
 573     ///
 574     /// ```
 575     /// # #![feature(slice_chars)]
 576     /// let s = "Löwe 老虎 Léopard";
 577     ///
 578     /// assert_eq!(s.slice_chars(0, 4), "Löwe");
 579     /// assert_eq!(s.slice_chars(5, 7), "老虎");
 580     /// ```
 581     #[unstable(feature = "slice_chars",
 582                reason = "may have yet to prove its worth")]
 583     pub fn slice_chars(&self, begin: usize, end: usize) -> &str {
 584         core_str::StrExt::slice_chars(self, begin, end)
 585     }
 586
 587     /// Given a byte position, return the next char and its index.
 588     ///
 589     /// This can be used to iterate over the Unicode characters of a string.
 590     ///
 591     /// # Panics
 592     ///
 593     /// If `i` is greater than or equal to the length of the string.
 594     /// If `i` is not the index of the beginning of a valid UTF-8 character.
 595     ///
 596     /// # Examples
 597     ///
 598     /// This example manually iterates through the characters of a string;
 599     /// this should normally be
 600     /// done by `.chars()` or `.char_indices()`.
 601     ///
 602     /// ```
 603     /// # #![feature(str_char, core)]
 604     /// use std::str::CharRange;
 605     ///
 606     /// let s = "中华Việt Nam";
 607     /// let mut i = 0;
 608     /// while i < s.len() {
 609     ///     let CharRange {ch, next} = s.char_range_at(i);
 610     ///     println!("{}: {}", i, ch);
 611     ///     i = next;
 612     /// }
 613     /// ```
 614     ///
 615     /// This outputs:
 616     ///
 617     /// ```text
 618     /// 0: 中
 619     /// 3: 华
 620     /// 6: V
 621     /// 7: i
 622     /// 8: ệ
 623     /// 11: t
 624     /// 12:
 625     /// 13: N
 626     /// 14: a
 627     /// 15: m
 628     /// ```
 629     #[unstable(feature = "str_char",
 630                reason = "often replaced by char_indices, this method may \
 631                          be removed in favor of just char_at() or eventually \
 632                          removed altogether")]
 633     pub fn char_range_at(&self, start: usize) -> CharRange {
 634         core_str::StrExt::char_range_at(self, start)
 635     }
 636
 637     /// Given a byte position, return the previous `char` and its position.
 638     ///
 639     /// This function can be used to iterate over a Unicode string in reverse.
 640     ///
 641     /// Returns 0 for next index if called on start index 0.
 642     ///
 643     /// # Panics
 644     ///
 645     /// If `i` is greater than the length of the string.
 646     /// If `i` is not an index following a valid UTF-8 character.
 647     ///
 648     /// # Examples
 649     ///
 650     /// This example manually iterates through the characters of a string;
 651     /// this should normally be
 652     /// done by `.chars().rev()` or `.char_indices()`.
 653     ///
 654     /// ```
 655     /// # #![feature(str_char, core)]
 656     /// use std::str::CharRange;
 657     ///
 658     /// let s = "中华Việt Nam";
 659     /// let mut i = s.len();
 660     /// while i > 0 {
 661     ///     let CharRange {ch, next} = s.char_range_at_reverse(i);
 662     ///     println!("{}: {}", i, ch);
 663     ///     i = next;
 664     /// }
 665     /// ```
 666     ///
 667     /// This outputs:
 668     ///
 669     /// ```text
 670     /// 16: m
 671     /// 15: a
 672     /// 14: N
 673     /// 13:
 674     /// 12: t
 675     /// 11: ệ
 676     /// 8: i
 677     /// 7: V
 678     /// 6: 华
 679     /// 3: 中
 680     /// ```
 681     #[unstable(feature = "str_char",
 682                reason = "often replaced by char_indices, this method may \
 683                          be removed in favor of just char_at_reverse() or \
 684                          eventually removed altogether")]
 685     pub fn char_range_at_reverse(&self, start: usize) -> CharRange {
 686         core_str::StrExt::char_range_at_reverse(self, start)
 687     }
 688
 689     /// Given a byte position, return the `char` at that position.
 690     ///
 691     /// # Panics
 692     ///
 693     /// If `i` is greater than or equal to the length of the string.
 694     /// If `i` is not the index of the beginning of a valid UTF-8 character.
 695     ///
 696     /// # Examples
 697     ///
 698     /// ```
 699     /// # #![feature(str_char)]
 700     /// let s = "abπc";
 701     /// assert_eq!(s.char_at(1), 'b');
 702     /// assert_eq!(s.char_at(2), 'π');
 703     /// ```
 704     #[unstable(feature = "str_char",
 705                reason = "frequently replaced by the chars() iterator, this \
 706                          method may be removed or possibly renamed in the \
 707                          future; it is normally replaced by chars/char_indices \
 708                          iterators or by getting the first char from a \
 709                          subslice")]
 710     pub fn char_at(&self, i: usize) -> char {
 711         core_str::StrExt::char_at(self, i)
 712     }
 713
 714     /// Given a byte position, return the `char` at that position, counting
 715     /// from the end.
 716     ///
 717     /// # Panics
 718     ///
 719     /// If `i` is greater than the length of the string.
 720     /// If `i` is not an index following a valid UTF-8 character.
 721     ///
 722     /// # Examples
 723     ///
 724     /// ```
 725     /// # #![feature(str_char)]
 726     /// let s = "abπc";
 727     /// assert_eq!(s.char_at_reverse(1), 'a');
 728     /// assert_eq!(s.char_at_reverse(2), 'b');
 729     /// ```
 730     #[unstable(feature = "str_char",
 731                reason = "see char_at for more details, but reverse semantics \
 732                          are also somewhat unclear, especially with which \
 733                          cases generate panics")]
 734     pub fn char_at_reverse(&self, i: usize) -> char {
 735         core_str::StrExt::char_at_reverse(self, i)
 736     }
 737
 738     /// Retrieves the first character from a `&str` and returns it.
 739     ///
 740     /// This does not allocate a new string; instead, it returns a slice that
 741     /// points one character
 742     /// beyond the character that was shifted.
 743     ///
 744     /// If the slice does not contain any characters, None is returned instead.
 745     ///
 746     /// # Examples
 747     ///
 748     /// ```
 749     /// # #![feature(str_char)]
 750     /// let s = "Löwe 老虎 Léopard";
 751     /// let (c, s1) = s.slice_shift_char().unwrap();
 752     ///
 753     /// assert_eq!(c, 'L');
 754     /// assert_eq!(s1, "öwe 老虎 Léopard");
 755     ///
 756     /// let (c, s2) = s1.slice_shift_char().unwrap();
 757     ///
 758     /// assert_eq!(c, 'ö');
 759     /// assert_eq!(s2, "we 老虎 Léopard");
 760     /// ```
 761     #[unstable(feature = "str_char",
 762                reason = "awaiting conventions about shifting and slices and \
 763                          may not be warranted with the existence of the chars \
 764                          and/or char_indices iterators")]
 765     pub fn slice_shift_char(&self) -> Option<(char, &str)> {
 766         core_str::StrExt::slice_shift_char(self)
 767     }
 768
 769     /// Divide one string slice into two at an index.
 770     ///
 771     /// The index `mid` is a byte offset from the start of the string
 772     /// that must be on a character boundary.
 773     ///
 774     /// Return slices `&self[..mid]` and `&self[mid..]`.
 775     ///
 776     /// # Panics
 777     ///
 778     /// Panics if `mid` is beyond the last character of the string,
 779     /// or if it is not on a character boundary.
 780     ///
 781     /// # Examples
 782     /// ```
 783     /// # #![feature(collections)]
 784     /// let s = "Löwe 老虎 Léopard";
 785     /// let first_space = s.find(' ').unwrap_or(s.len());
 786     /// let (a, b) = s.split_at(first_space);
 787     ///
 788     /// assert_eq!(a, "Löwe");
 789     /// assert_eq!(b, " 老虎 Léopard");
 790     /// ```
 791     #[inline]
 792     pub fn split_at(&self, mid: usize) -> (&str, &str) {
 793         core_str::StrExt::split_at(self, mid)
 794     }
 795
 796     /// An iterator over the codepoints of `self`.
 797     ///
 798     /// # Examples
 799     ///
 800     /// ```
 801     /// let v: Vec<char> = "abc åäö".chars().collect();
 802     ///
 803     /// assert_eq!(v, ['a', 'b', 'c', ' ', 'å', 'ä', 'ö']);
 804     /// ```
 805     #[stable(feature = "rust1", since = "1.0.0")]
 806     pub fn chars(&self) -> Chars {
 807         core_str::StrExt::chars(self)
 808     }
 809
 810     /// An iterator over the characters of `self` and their byte offsets.
 811     ///
 812     /// # Examples
 813     ///
 814     /// ```
 815     /// let v: Vec<(usize, char)> = "abc".char_indices().collect();
 816     /// let b = vec![(0, 'a'), (1, 'b'), (2, 'c')];
 817     ///
 818     /// assert_eq!(v, b);
 819     /// ```
 820     #[stable(feature = "rust1", since = "1.0.0")]
 821     pub fn char_indices(&self) -> CharIndices {
 822         core_str::StrExt::char_indices(self)
 823     }
 824
 825     /// An iterator over the bytes of `self`.
 826     ///
 827     /// # Examples
 828     ///
 829     /// ```
 830     /// let v: Vec<u8> = "bors".bytes().collect();
 831     ///
 832     /// assert_eq!(v, b"bors".to_vec());
 833     /// ```
 834     #[stable(feature = "rust1", since = "1.0.0")]
 835     pub fn bytes(&self) -> Bytes {
 836         core_str::StrExt::bytes(self)
 837     }
 838
 839     /// An iterator over the non-empty substrings of `self` which contain no whitespace,
 840     /// and which are separated by any amount of whitespace.
 841     ///
 842     /// # Examples
 843     ///
 844     /// ```
 845     /// let some_words = " Mary   had\ta little  \n\t lamb";
 846     /// let v: Vec<&str> = some_words.split_whitespace().collect();
 847     ///
 848     /// assert_eq!(v, ["Mary", "had", "a", "little", "lamb"]);
 849     /// ```
 850     #[stable(feature = "split_whitespace", since = "1.1.0")]
 851     pub fn split_whitespace(&self) -> SplitWhitespace {
 852         UnicodeStr::split_whitespace(self)
 853     }
 854
 855     /// An iterator over the non-empty substrings of `self` which contain no whitespace,
 856     /// and which are separated by any amount of whitespace.
 857     ///
 858     /// # Examples
 859     ///
 860     /// ```
 861     /// # #![feature(str_words)]
 862     /// # #![allow(deprecated)]
 863     /// let some_words = " Mary   had\ta little  \n\t lamb";
 864     /// let v: Vec<&str> = some_words.words().collect();
 865     ///
 866     /// assert_eq!(v, ["Mary", "had", "a", "little", "lamb"]);
 867     /// ```
 868     #[deprecated(reason = "words() will be removed. Use split_whitespace() instead",
 869                  since = "1.1.0")]
 870     #[unstable(feature = "str_words",
 871                reason = "the precise algorithm to use is unclear")]
 872     #[allow(deprecated)]
 873     pub fn words(&self) -> Words {
 874         UnicodeStr::words(self)
 875     }
 876
 877     /// An iterator over the lines of a string, separated by `\n`.
 878     ///
 879     /// This does not include the empty string after a trailing `\n`.
 880     ///
 881     /// # Examples
 882     ///
 883     /// ```
 884     /// let four_lines = "foo\nbar\n\nbaz";
 885     /// let v: Vec<&str> = four_lines.lines().collect();
 886     ///
 887     /// assert_eq!(v, ["foo", "bar", "", "baz"]);
 888     /// ```
 889     ///
 890     /// Leaving off the trailing character:
 891     ///
 892     /// ```
 893     /// let four_lines = "foo\nbar\n\nbaz\n";
 894     /// let v: Vec<&str> = four_lines.lines().collect();
 895     ///
 896     /// assert_eq!(v, ["foo", "bar", "", "baz"]);
 897     /// ```
 898     #[stable(feature = "rust1", since = "1.0.0")]
 899     pub fn lines(&self) -> Lines {
 900         core_str::StrExt::lines(self)
 901     }
 902
 903     /// An iterator over the lines of a string, separated by either
 904     /// `\n` or `\r\n`.
 905     ///
 906     /// As with `.lines()`, this does not include an empty trailing line.
 907     ///
 908     /// # Examples
 909     ///
 910     /// ```
 911     /// let four_lines = "foo\r\nbar\n\r\nbaz";
 912     /// let v: Vec<&str> = four_lines.lines_any().collect();
 913     ///
 914     /// assert_eq!(v, ["foo", "bar", "", "baz"]);
 915     /// ```
 916     ///
 917     /// Leaving off the trailing character:
 918     ///
 919     /// ```
 920     /// let four_lines = "foo\r\nbar\n\r\nbaz\n";
 921     /// let v: Vec<&str> = four_lines.lines_any().collect();
 922     ///
 923     /// assert_eq!(v, ["foo", "bar", "", "baz"]);
 924     /// ```
 925     #[stable(feature = "rust1", since = "1.0.0")]
 926     pub fn lines_any(&self) -> LinesAny {
 927         core_str::StrExt::lines_any(self)
 928     }
 929
 930     /// Returns an iterator over the string in Unicode Normalization Form D
 931     /// (canonical decomposition).
 932     #[allow(deprecated)]
 933     #[deprecated(reason = "use the crates.io `unicode-normalization` library instead",
 934              since = "1.0.0")]
 935     #[inline]
 936     #[unstable(feature = "unicode",
 937                reason = "this functionality may be replaced with a more generic \
 938                          unicode crate on crates.io")]
 939     pub fn nfd_chars(&self) -> Decompositions {
 940         Decompositions {
 941             iter: self[..].chars(),
 942             buffer: Vec::new(),
 943             sorted: false,
 944             kind: Canonical
 945         }
 946     }
 947
 948     /// Returns an iterator over the string in Unicode Normalization Form KD
 949     /// (compatibility decomposition).
 950     #[allow(deprecated)]
 951     #[deprecated(reason = "use the crates.io `unicode-normalization` library instead",
 952              since = "1.0.0")]
 953     #[inline]
 954     #[unstable(feature = "unicode",
 955                reason = "this functionality may be replaced with a more generic \
 956                          unicode crate on crates.io")]
 957     pub fn nfkd_chars(&self) -> Decompositions {
 958         Decompositions {
 959             iter: self[..].chars(),
 960             buffer: Vec::new(),
 961             sorted: false,
 962             kind: Compatible
 963         }
 964     }
 965
 966     /// An Iterator over the string in Unicode Normalization Form C
 967     /// (canonical decomposition followed by canonical composition).
 968     #[allow(deprecated)]
 969     #[deprecated(reason = "use the crates.io `unicode-normalization` library instead",
 970              since = "1.0.0")]
 971     #[inline]
 972     #[unstable(feature = "unicode",
 973                reason = "this functionality may be replaced with a more generic \
 974                          unicode crate on crates.io")]
 975     pub fn nfc_chars(&self) -> Recompositions {
 976         Recompositions {
 977             iter: self.nfd_chars(),
 978             state: Composing,
 979             buffer: VecDeque::new(),
 980             composee: None,
 981             last_ccc: None
 982         }
 983     }
 984
 985     /// An Iterator over the string in Unicode Normalization Form KC
 986     /// (compatibility decomposition followed by canonical composition).
 987     #[allow(deprecated)]
 988     #[deprecated(reason = "use the crates.io `unicode-normalization` library instead",
 989              since = "1.0.0")]
 990     #[inline]
 991     #[unstable(feature = "unicode",
 992                reason = "this functionality may be replaced with a more generic \
 993                          unicode crate on crates.io")]
 994     pub fn nfkc_chars(&self) -> Recompositions {
 995         Recompositions {
 996             iter: self.nfkd_chars(),
 997             state: Composing,
 998             buffer: VecDeque::new(),
 999             composee: None,
1000             last_ccc: None
1001         }
1002     }
1003
1004     /// Returns an iterator over the [grapheme clusters][graphemes] of `self`.
1005     ///
1006     /// [graphemes]: http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries
1007     ///
1008     /// If `is_extended` is true, the iterator is over the
1009     /// *extended grapheme clusters*;
1010     /// otherwise, the iterator is over the *legacy grapheme clusters*.
1011     /// [UAX#29](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
1012     /// recommends extended grapheme cluster boundaries for general processing.
1013     ///
1014     /// # Examples
1015     ///
1016     /// ```
1017     /// # #![feature(unicode, core)]
1018     /// let gr1 = "a\u{310}e\u{301}o\u{308}\u{332}".graphemes(true).collect::<Vec<&str>>();
1019     /// let b: &[_] = &["a\u{310}", "e\u{301}", "o\u{308}\u{332}"];
1020     ///
1021     /// assert_eq!(&gr1[..], b);
1022     ///
1023     /// let gr2 = "a\r\nb🇷🇺🇸🇹".graphemes(true).collect::<Vec<&str>>();
1024     /// let b: &[_] = &["a", "\r\n", "b", "🇷🇺🇸🇹"];
1025     ///
1026     /// assert_eq!(&gr2[..], b);
1027     /// ```
1028     #[deprecated(reason = "use the crates.io `unicode-segmentation` library instead",
1029              since = "1.0.0")]
1030     #[unstable(feature = "unicode",
1031                reason = "this functionality may only be provided by libunicode")]
1032     pub fn graphemes(&self, is_extended: bool) -> Graphemes {
1033         UnicodeStr::graphemes(self, is_extended)
1034     }
1035
1036     /// Returns an iterator over the grapheme clusters of `self` and their
1037     /// byte offsets. See
1038     /// `graphemes()` for more information.
1039     ///
1040     /// # Examples
1041     ///
1042     /// ```
1043     /// # #![feature(unicode, core)]
1044     /// let gr_inds = "a̐éö̲\r\n".grapheme_indices(true).collect::<Vec<(usize, &str)>>();
1045     /// let b: &[_] = &[(0, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
1046     ///
1047     /// assert_eq!(&gr_inds[..], b);
1048     /// ```
1049     #[deprecated(reason = "use the crates.io `unicode-segmentation` library instead",
1050              since = "1.0.0")]
1051     #[unstable(feature = "unicode",
1052                reason = "this functionality may only be provided by libunicode")]
1053     pub fn grapheme_indices(&self, is_extended: bool) -> GraphemeIndices {
1054         UnicodeStr::grapheme_indices(self, is_extended)
1055     }
1056
1057     /// Returns an iterator of `u16` over the string encoded as UTF-16.
1058     #[unstable(feature = "str_utf16",
1059                reason = "this functionality may only be provided by libunicode")]
1060     pub fn utf16_units(&self) -> Utf16Units {
1061         Utf16Units { encoder: Utf16Encoder::new(self[..].chars()) }
1062     }
1063
1064     /// Returns `true` if `self` contains another `&str`.
1065     ///
1066     /// # Examples
1067     ///
1068     /// ```
1069     /// assert!("bananas".contains("nana"));
1070     ///
1071     /// assert!(!"bananas".contains("foobar"));
1072     /// ```
1073     #[stable(feature = "rust1", since = "1.0.0")]
1074     pub fn contains<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
1075         core_str::StrExt::contains(self, pat)
1076     }
1077
1078     /// Returns `true` if the given `&str` is a prefix of the string.
1079     ///
1080     /// # Examples
1081     ///
1082     /// ```
1083     /// assert!("banana".starts_with("ba"));
1084     /// ```
1085     #[stable(feature = "rust1", since = "1.0.0")]
1086     pub fn starts_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool {
1087         core_str::StrExt::starts_with(self, pat)
1088     }
1089
1090     /// Returns true if the given `&str` is a suffix of the string.
1091     ///
1092     /// # Examples
1093     ///
1094     /// ```rust
1095     /// assert!("banana".ends_with("nana"));
1096     /// ```
1097     #[stable(feature = "rust1", since = "1.0.0")]
1098     pub fn ends_with<'a, P: Pattern<'a>>(&'a self, pat: P) -> bool
1099         where P::Searcher: ReverseSearcher<'a>
1100     {
1101         core_str::StrExt::ends_with(self, pat)
1102     }
1103
1104     /// Returns the byte index of the first character of `self` that matches
1105     /// the pattern, if it
1106     /// exists.
1107     ///
1108     /// Returns `None` if it doesn't exist.
1109     ///
1110     /// The pattern can be a simple `&str`, `char`, or a closure that
1111     /// determines the
1112     /// split.
1113     ///
1114     /// # Examples
1115     ///
1116     /// Simple patterns:
1117     ///
1118     /// ```
1119     /// let s = "Löwe 老虎 Léopard";
1120     ///
1121     /// assert_eq!(s.find('L'), Some(0));
1122     /// assert_eq!(s.find('é'), Some(14));
1123     /// assert_eq!(s.find("Léopard"), Some(13));
1124     ///
1125     /// ```
1126     ///
1127     /// More complex patterns with closures:
1128     ///
1129     /// ```
1130     /// let s = "Löwe 老虎 Léopard";
1131     ///
1132     /// assert_eq!(s.find(char::is_whitespace), Some(5));
1133     /// assert_eq!(s.find(char::is_lowercase), Some(1));
1134     /// ```
1135     ///
1136     /// Not finding the pattern:
1137     ///
1138     /// ```
1139     /// let s = "Löwe 老虎 Léopard";
1140     /// let x: &[_] = &['1', '2'];
1141     ///
1142     /// assert_eq!(s.find(x), None);
1143     /// ```
1144     #[stable(feature = "rust1", since = "1.0.0")]
1145     pub fn find<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize> {
1146         core_str::StrExt::find(self, pat)
1147     }
1148
1149     /// Returns the byte index of the last character of `self` that
1150     /// matches the pattern, if it
1151     /// exists.
1152     ///
1153     /// Returns `None` if it doesn't exist.
1154     ///
1155     /// The pattern can be a simple `&str`, `char`,
1156     /// or a closure that determines the split.
1157     ///
1158     /// # Examples
1159     ///
1160     /// Simple patterns:
1161     ///
1162     /// ```
1163     /// let s = "Löwe 老虎 Léopard";
1164     ///
1165     /// assert_eq!(s.rfind('L'), Some(13));
1166     /// assert_eq!(s.rfind('é'), Some(14));
1167     /// ```
1168     ///
1169     /// More complex patterns with closures:
1170     ///
1171     /// ```
1172     /// let s = "Löwe 老虎 Léopard";
1173     ///
1174     /// assert_eq!(s.rfind(char::is_whitespace), Some(12));
1175     /// assert_eq!(s.rfind(char::is_lowercase), Some(20));
1176     /// ```
1177     ///
1178     /// Not finding the pattern:
1179     ///
1180     /// ```
1181     /// let s = "Löwe 老虎 Léopard";
1182     /// let x: &[_] = &['1', '2'];
1183     ///
1184     /// assert_eq!(s.rfind(x), None);
1185     /// ```
1186     #[stable(feature = "rust1", since = "1.0.0")]
1187     pub fn rfind<'a, P: Pattern<'a>>(&'a self, pat: P) -> Option<usize>
1188         where P::Searcher: ReverseSearcher<'a>
1189     {
1190         core_str::StrExt::rfind(self, pat)
1191     }
1192
1193     /// An iterator over substrings of `self`, separated by characters
1194     /// matched by a pattern.
1195     ///
1196     /// The pattern can be a simple `&str`, `char`, or a closure that
1197     /// determines the split. Additional libraries might provide more complex
1198     /// patterns like regular expressions.
1199     ///
1200     /// # Iterator behavior
1201     ///
1202     /// The returned iterator will be double ended if the pattern allows a
1203     /// reverse search and forward/reverse search yields the same elements.
1204     /// This is true for, eg, `char` but not
1205     /// for `&str`.
1206     ///
1207     /// If the pattern allows a reverse search but its results might differ
1208     /// from a forward search, `rsplit()` can be used.
1209     ///
1210     /// # Examples
1211     ///
1212     /// Simple patterns:
1213     ///
1214     /// ```
1215     /// let v: Vec<&str> = "Mary had a little lamb".split(' ').collect();
1216     /// assert_eq!(v, ["Mary", "had", "a", "little", "lamb"]);
1217     ///
1218     /// let v: Vec<&str> = "".split('X').collect();
1219     /// assert_eq!(v, [""]);
1220     ///
1221     /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').collect();
1222     /// assert_eq!(v, ["lion", "", "tiger", "leopard"]);
1223     ///
1224     /// let v: Vec<&str> = "lion::tiger::leopard".split("::").collect();
1225     /// assert_eq!(v, ["lion", "tiger", "leopard"]);
1226     ///
1227     /// let v: Vec<&str> = "abc1def2ghi".split(char::is_numeric).collect();
1228     /// assert_eq!(v, ["abc", "def", "ghi"]);
1229     ///
1230     /// let v: Vec<&str> = "lionXtigerXleopard".split(char::is_uppercase).collect();
1231     /// assert_eq!(v, ["lion", "tiger", "leopard"]);
1232     /// ```
1233     ///
1234     /// A more complex pattern, using a closure:
1235     ///
1236     /// ```
1237     /// let v: Vec<&str> = "abc1defXghi".split(|c| c == '1' || c == 'X').collect();
1238     /// assert_eq!(v, ["abc", "def", "ghi"]);
1239     /// ```
1240     ///
1241     /// If a string contains multiple contiguous separators, you will end up
1242     /// with empty strings in the output:
1243     ///
1244     /// ```
1245     /// let x = "||||a||b|c".to_string();
1246     /// let d: Vec<_> = x.split('|').collect();
1247     ///
1248     /// assert_eq!(d, &["", "", "", "", "a", "", "b", "c"]);
1249     /// ```
1250     ///
1251     /// This can lead to possibly surprising behavior when whitespace is used
1252     /// as the separator. This code is correct:
1253     ///
1254     /// ```
1255     /// let x = "    a  b c".to_string();
1256     /// let d: Vec<_> = x.split(' ').collect();
1257     ///
1258     /// assert_eq!(d, &["", "", "", "", "a", "", "b", "c"]);
1259     /// ```
1260     ///
1261     /// It does _not_ give you:
1262     ///
1263     /// ```rust,ignore
1264     /// assert_eq!(d, &["a", "b", "c"]);
1265     /// ```
1266     #[stable(feature = "rust1", since = "1.0.0")]
1267     pub fn split<'a, P: Pattern<'a>>(&'a self, pat: P) -> Split<'a, P> {
1268         core_str::StrExt::split(self, pat)
1269     }
1270
1271     /// An iterator over substrings of `self`, separated by characters
1272     /// matched by a pattern and yielded in reverse order.
1273     ///
1274     /// The pattern can be a simple `&str`, `char`, or a closure that
1275     /// determines the split.
1276     /// Additional libraries might provide more complex patterns like
1277     /// regular expressions.
1278     ///
1279     /// # Iterator behavior
1280     ///
1281     /// The returned iterator requires that the pattern supports a
1282     /// reverse search,
1283     /// and it will be double ended if a forward/reverse search yields
1284     /// the same elements.
1285     ///
1286     /// For iterating from the front, `split()` can be used.
1287     ///
1288     /// # Examples
1289     ///
1290     /// Simple patterns:
1291     ///
1292     /// ```rust
1293     /// let v: Vec<&str> = "Mary had a little lamb".rsplit(' ').collect();
1294     /// assert_eq!(v, ["lamb", "little", "a", "had", "Mary"]);
1295     ///
1296     /// let v: Vec<&str> = "".rsplit('X').collect();
1297     /// assert_eq!(v, [""]);
1298     ///
1299     /// let v: Vec<&str> = "lionXXtigerXleopard".rsplit('X').collect();
1300     /// assert_eq!(v, ["leopard", "tiger", "", "lion"]);
1301     ///
1302     /// let v: Vec<&str> = "lion::tiger::leopard".rsplit("::").collect();
1303     /// assert_eq!(v, ["leopard", "tiger", "lion"]);
1304     /// ```
1305     ///
1306     /// A more complex pattern, using a closure:
1307     ///
1308     /// ```
1309     /// let v: Vec<&str> = "abc1defXghi".rsplit(|c| c == '1' || c == 'X').collect();
1310     /// assert_eq!(v, ["ghi", "def", "abc"]);
1311     /// ```
1312     #[stable(feature = "rust1", since = "1.0.0")]
1313     pub fn rsplit<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplit<'a, P>
1314         where P::Searcher: ReverseSearcher<'a>
1315     {
1316         core_str::StrExt::rsplit(self, pat)
1317     }
1318
1319     /// An iterator over substrings of `self`, separated by characters
1320     /// matched by a pattern.
1321     ///
1322     /// The pattern can be a simple `&str`, `char`, or a closure that
1323     /// determines the split.
1324     /// Additional libraries might provide more complex patterns
1325     /// like regular expressions.
1326     ///
1327     /// Equivalent to `split`, except that the trailing substring
1328     /// is skipped if empty.
1329     ///
1330     /// This method can be used for string data that is _terminated_,
1331     /// rather than _separated_ by a pattern.
1332     ///
1333     /// # Iterator behavior
1334     ///
1335     /// The returned iterator will be double ended if the pattern allows a
1336     /// reverse search
1337     /// and forward/reverse search yields the same elements. This is true
1338     /// for, eg, `char` but not for `&str`.
1339     ///
1340     /// If the pattern allows a reverse search but its results might differ
1341     /// from a forward search, `rsplit_terminator()` can be used.
1342     ///
1343     /// # Examples
1344     ///
1345     /// ```
1346     /// let v: Vec<&str> = "A.B.".split_terminator('.').collect();
1347     /// assert_eq!(v, ["A", "B"]);
1348     ///
1349     /// let v: Vec<&str> = "A..B..".split_terminator(".").collect();
1350     /// assert_eq!(v, ["A", "", "B", ""]);
1351     /// ```
1352     #[stable(feature = "rust1", since = "1.0.0")]
1353     pub fn split_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> SplitTerminator<'a, P> {
1354         core_str::StrExt::split_terminator(self, pat)
1355     }
1356
1357     /// An iterator over substrings of `self`, separated by characters
1358     /// matched by a pattern and yielded in reverse order.
1359     ///
1360     /// The pattern can be a simple `&str`, `char`, or a closure that
1361     /// determines the split.
1362     /// Additional libraries might provide more complex patterns like
1363     /// regular expressions.
1364     ///
1365     /// Equivalent to `split`, except that the trailing substring is
1366     /// skipped if empty.
1367     ///
1368     /// This method can be used for string data that is _terminated_,
1369     /// rather than _separated_ by a pattern.
1370     ///
1371     /// # Iterator behavior
1372     ///
1373     /// The returned iterator requires that the pattern supports a
1374     /// reverse search, and it will be double ended if a forward/reverse
1375     /// search yields the same elements.
1376     ///
1377     /// For iterating from the front, `split_terminator()` can be used.
1378     ///
1379     /// # Examples
1380     ///
1381     /// ```
1382     /// let v: Vec<&str> = "A.B.".rsplit_terminator('.').collect();
1383     /// assert_eq!(v, ["B", "A"]);
1384     ///
1385     /// let v: Vec<&str> = "A..B..".rsplit_terminator(".").collect();
1386     /// assert_eq!(v, ["", "B", "", "A"]);
1387     /// ```
1388     #[stable(feature = "rust1", since = "1.0.0")]
1389     pub fn rsplit_terminator<'a, P: Pattern<'a>>(&'a self, pat: P) -> RSplitTerminator<'a, P>
1390         where P::Searcher: ReverseSearcher<'a>
1391     {
1392         core_str::StrExt::rsplit_terminator(self, pat)
1393     }
1394
1395     /// An iterator over substrings of `self`, separated by a pattern,
1396     /// restricted to returning
1397     /// at most `count` items.
1398     ///
1399     /// The last element returned, if any, will contain the remainder of the
1400     /// string.
1401     /// The pattern can be a simple `&str`, `char`, or a closure that
1402     /// determines the split.
1403     /// Additional libraries might provide more complex patterns like
1404     /// regular expressions.
1405     ///
1406     /// # Iterator behavior
1407     ///
1408     /// The returned iterator will not be double ended, because it is
1409     /// not efficient to support.
1410     ///
1411     /// If the pattern allows a reverse search, `rsplitn()` can be used.
1412     ///
1413     /// # Examples
1414     ///
1415     /// Simple patterns:
1416     ///
1417     /// ```
1418     /// let v: Vec<&str> = "Mary had a little lambda".splitn(3, ' ').collect();
1419     /// assert_eq!(v, ["Mary", "had", "a little lambda"]);
1420     ///
1421     /// let v: Vec<&str> = "lionXXtigerXleopard".splitn(3, "X").collect();
1422     /// assert_eq!(v, ["lion", "", "tigerXleopard"]);
1423     ///
1424     /// let v: Vec<&str> = "abcXdef".splitn(1, 'X').collect();
1425     /// assert_eq!(v, ["abcXdef"]);
1426     ///
1427     /// let v: Vec<&str> = "".splitn(1, 'X').collect();
1428     /// assert_eq!(v, [""]);
1429     /// ```
1430     ///
1431     /// A more complex pattern, using a closure:
1432     ///
1433     /// ```
1434     /// let v: Vec<&str> = "abc1defXghi".splitn(2, |c| c == '1' || c == 'X').collect();
1435     /// assert_eq!(v, ["abc", "defXghi"]);
1436     /// ```
1437     #[stable(feature = "rust1", since = "1.0.0")]
1438     pub fn splitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> SplitN<'a, P> {
1439         core_str::StrExt::splitn(self, count, pat)
1440     }
1441
1442     /// An iterator over substrings of `self`, separated by a pattern,
1443     /// starting from the end of the string, restricted to returning
1444     /// at most `count` items.
1445     ///
1446     /// The last element returned, if any, will contain the remainder of the
1447     /// string.
1448     ///
1449     /// The pattern can be a simple `&str`, `char`, or a closure that
1450     /// determines the split.
1451     /// Additional libraries might provide more complex patterns like
1452     /// regular expressions.
1453     ///
1454     /// # Iterator behavior
1455     ///
1456     /// The returned iterator will not be double ended, because it is not
1457     /// efficient to support.
1458     ///
1459     /// `splitn()` can be used for splitting from the front.
1460     ///
1461     /// # Examples
1462     ///
1463     /// Simple patterns:
1464     ///
1465     /// ```
1466     /// let v: Vec<&str> = "Mary had a little lamb".rsplitn(3, ' ').collect();
1467     /// assert_eq!(v, ["lamb", "little", "Mary had a"]);
1468     ///
1469     /// let v: Vec<&str> = "lionXXtigerXleopard".rsplitn(3, 'X').collect();
1470     /// assert_eq!(v, ["leopard", "tiger", "lionX"]);
1471     ///
1472     /// let v: Vec<&str> = "lion::tiger::leopard".rsplitn(2, "::").collect();
1473     /// assert_eq!(v, ["leopard", "lion::tiger"]);
1474     /// ```
1475     ///
1476     /// A more complex pattern, using a closure:
1477     ///
1478     /// ```
1479     /// let v: Vec<&str> = "abc1defXghi".rsplitn(2, |c| c == '1' || c == 'X').collect();
1480     /// assert_eq!(v, ["ghi", "abc1def"]);
1481     /// ```
1482     #[stable(feature = "rust1", since = "1.0.0")]
1483     pub fn rsplitn<'a, P: Pattern<'a>>(&'a self, count: usize, pat: P) -> RSplitN<'a, P>
1484         where P::Searcher: ReverseSearcher<'a>
1485     {
1486         core_str::StrExt::rsplitn(self, count, pat)
1487     }
1488
1489     /// An iterator over the matches of a pattern within `self`.
1490     ///
1491     /// The pattern can be a simple `&str`, `char`, or a closure that
1492     /// determines the split.
1493     /// Additional libraries might provide more complex patterns like
1494     /// regular expressions.
1495     ///
1496     /// # Iterator behavior
1497     ///
1498     /// The returned iterator will be double ended if the pattern allows
1499     /// a reverse search
1500     /// and forward/reverse search yields the same elements. This is true
1501     /// for, eg, `char` but not
1502     /// for `&str`.
1503     ///
1504     /// If the pattern allows a reverse search but its results might differ
1505     /// from a forward search, `rmatches()` can be used.
1506     ///
1507     /// # Examples
1508     ///
1509     /// ```
1510     /// let v: Vec<&str> = "abcXXXabcYYYabc".matches("abc").collect();
1511     /// assert_eq!(v, ["abc", "abc", "abc"]);
1512     ///
1513     /// let v: Vec<&str> = "1abc2abc3".matches(char::is_numeric).collect();
1514     /// assert_eq!(v, ["1", "2", "3"]);
1515     /// ```
1516     #[stable(feature = "str_matches", since = "1.2.0")]
1517     pub fn matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> Matches<'a, P> {
1518         core_str::StrExt::matches(self, pat)
1519     }
1520
1521     /// An iterator over the matches of a pattern within `self`, yielded in
1522     /// reverse order.
1523     ///
1524     /// The pattern can be a simple `&str`, `char`, or a closure that
1525     /// determines the split.
1526     /// Additional libraries might provide more complex patterns like
1527     /// regular expressions.
1528     ///
1529     /// # Iterator behavior
1530     ///
1531     /// The returned iterator requires that the pattern supports a
1532     /// reverse search,
1533     /// and it will be double ended if a forward/reverse search yields
1534     /// the same elements.
1535     ///
1536     /// For iterating from the front, `matches()` can be used.
1537     ///
1538     /// # Examples
1539     ///
1540     /// ```
1541     /// let v: Vec<&str> = "abcXXXabcYYYabc".rmatches("abc").collect();
1542     /// assert_eq!(v, ["abc", "abc", "abc"]);
1543     ///
1544     /// let v: Vec<&str> = "1abc2abc3".rmatches(char::is_numeric).collect();
1545     /// assert_eq!(v, ["3", "2", "1"]);
1546     /// ```
1547     #[stable(feature = "str_matches", since = "1.2.0")]
1548     pub fn rmatches<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatches<'a, P>
1549         where P::Searcher: ReverseSearcher<'a>
1550     {
1551         core_str::StrExt::rmatches(self, pat)
1552     }
1553
1554     /// An iterator over the start and end indices of the disjoint matches
1555     /// of a pattern within `self`.
1556     ///
1557     /// For matches of `pat` within `self` that overlap, only the indices
1558     /// corresponding to the first
1559     /// match are returned.
1560     ///
1561     /// The pattern can be a simple `&str`, `char`, or a closure that
1562     /// determines
1563     /// the split.
1564     /// Additional libraries might provide more complex patterns like
1565     /// regular expressions.
1566     ///
1567     /// # Iterator behavior
1568     ///
1569     /// The returned iterator will be double ended if the pattern allows a
1570     /// reverse search
1571     /// and forward/reverse search yields the same elements. This is true for,
1572     /// eg, `char` but not
1573     /// for `&str`.
1574     ///
1575     /// If the pattern allows a reverse search but its results might differ
1576     /// from a forward search, `rmatch_indices()` can be used.
1577     ///
1578     /// # Examples
1579     ///
1580     /// ```
1581     /// # #![feature(str_match_indices)]
1582     /// let v: Vec<(usize, usize)> = "abcXXXabcYYYabc".match_indices("abc").collect();
1583     /// assert_eq!(v, [(0, 3), (6, 9), (12, 15)]);
1584     ///
1585     /// let v: Vec<(usize, usize)> = "1abcabc2".match_indices("abc").collect();
1586     /// assert_eq!(v, [(1, 4), (4, 7)]);
1587     ///
1588     /// let v: Vec<(usize, usize)> = "ababa".match_indices("aba").collect();
1589     /// assert_eq!(v, [(0, 3)]); // only the first `aba`
1590     /// ```
1591     #[unstable(feature = "str_match_indices",
1592                reason = "might have its iterator type changed")]
1593     // NB: Right now MatchIndices yields `(usize, usize)`, but it would
1594     // be more consistent with `matches` and `char_indices` to return `(usize, &str)`
1595     pub fn match_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> MatchIndices<'a, P> {
1596         core_str::StrExt::match_indices(self, pat)
1597     }
1598
1599     /// An iterator over the start and end indices of the disjoint matches of
1600     /// a pattern within
1601     /// `self`, yielded in reverse order.
1602     ///
1603     /// For matches of `pat` within `self` that overlap, only the indices
1604     /// corresponding to the last
1605     /// match are returned.
1606     ///
1607     /// The pattern can be a simple `&str`, `char`, or a closure that
1608     /// determines
1609     /// the split.
1610     /// Additional libraries might provide more complex patterns like
1611     /// regular expressions.
1612     ///
1613     /// # Iterator behavior
1614     ///
1615     /// The returned iterator requires that the pattern supports a
1616     /// reverse search,
1617     /// and it will be double ended if a forward/reverse search yields
1618     /// the same elements.
1619     ///
1620     /// For iterating from the front, `match_indices()` can be used.
1621     ///
1622     /// # Examples
1623     ///
1624     /// ```
1625     /// # #![feature(str_match_indices)]
1626     /// let v: Vec<(usize, usize)> = "abcXXXabcYYYabc".rmatch_indices("abc").collect();
1627     /// assert_eq!(v, [(12, 15), (6, 9), (0, 3)]);
1628     ///
1629     /// let v: Vec<(usize, usize)> = "1abcabc2".rmatch_indices("abc").collect();
1630     /// assert_eq!(v, [(4, 7), (1, 4)]);
1631     ///
1632     /// let v: Vec<(usize, usize)> = "ababa".rmatch_indices("aba").collect();
1633     /// assert_eq!(v, [(2, 5)]); // only the last `aba`
1634     /// ```
1635     #[unstable(feature = "str_match_indices",
1636                reason = "might have its iterator type changed")]
1637     // NB: Right now RMatchIndices yields `(usize, usize)`, but it would
1638     // be more consistent with `rmatches` and `char_indices` to return `(usize, &str)`
1639     pub fn rmatch_indices<'a, P: Pattern<'a>>(&'a self, pat: P) -> RMatchIndices<'a, P>
1640         where P::Searcher: ReverseSearcher<'a>
1641     {
1642         core_str::StrExt::rmatch_indices(self, pat)
1643     }
1644
1645     /// Returns the byte offset of an inner slice relative to an enclosing
1646     /// outer slice.
1647     ///
1648     /// # Panics
1649     ///
1650     /// Panics if `inner` is not a direct slice contained within self.
1651     ///
1652     /// # Examples
1653     ///
1654     /// ```
1655     /// # #![feature(subslice_offset)]
1656     /// let string = "a\nb\nc";
1657     /// let lines: Vec<&str> = string.lines().collect();
1658     ///
1659     /// assert!(string.subslice_offset(lines[0]) == 0); // &"a"
1660     /// assert!(string.subslice_offset(lines[1]) == 2); // &"b"
1661     /// assert!(string.subslice_offset(lines[2]) == 4); // &"c"
1662     /// ```
1663     #[unstable(feature = "subslice_offset",
1664                reason = "awaiting convention about comparability of arbitrary slices")]
1665     pub fn subslice_offset(&self, inner: &str) -> usize {
1666         core_str::StrExt::subslice_offset(self, inner)
1667     }
1668
1669     /// Returns a `&str` with leading and trailing whitespace removed.
1670     ///
1671     /// # Examples
1672     ///
1673     /// ```
1674     /// let s = " Hello\tworld\t";
1675     /// assert_eq!(s.trim(), "Hello\tworld");
1676     /// ```
1677     #[stable(feature = "rust1", since = "1.0.0")]
1678     pub fn trim(&self) -> &str {
1679         UnicodeStr::trim(self)
1680     }
1681
1682     /// Returns a `&str` with leading whitespace removed.
1683     ///
1684     /// # Examples
1685     ///
1686     /// ```
1687     /// let s = " Hello\tworld\t";
1688     /// assert_eq!(s.trim_left(), "Hello\tworld\t");
1689     /// ```
1690     #[stable(feature = "rust1", since = "1.0.0")]
1691     pub fn trim_left(&self) -> &str {
1692         UnicodeStr::trim_left(self)
1693     }
1694
1695     /// Returns a `&str` with trailing whitespace removed.
1696     ///
1697     /// # Examples
1698     ///
1699     /// ```
1700     /// let s = " Hello\tworld\t";
1701     /// assert_eq!(s.trim_right(), " Hello\tworld");
1702     /// ```
1703     #[stable(feature = "rust1", since = "1.0.0")]
1704     pub fn trim_right(&self) -> &str {
1705         UnicodeStr::trim_right(self)
1706     }
1707
1708     /// Returns a string with all pre- and suffixes that match a pattern
1709     /// repeatedly removed.
1710     ///
1711     /// The pattern can be a simple `char`, or a closure that determines
1712     /// the split.
1713     ///
1714     /// # Examples
1715     ///
1716     /// Simple patterns:
1717     ///
1718     /// ```
1719     /// assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar");
1720     /// assert_eq!("123foo1bar123".trim_matches(char::is_numeric), "foo1bar");
1721     ///
1722     /// let x: &[_] = &['1', '2'];
1723     /// assert_eq!("12foo1bar12".trim_matches(x), "foo1bar");
1724     /// ```
1725     ///
1726     /// A more complex pattern, using a closure:
1727     ///
1728     /// ```
1729     /// assert_eq!("1foo1barXX".trim_matches(|c| c == '1' || c == 'X'), "foo1bar");
1730     /// ```
1731     #[stable(feature = "rust1", since = "1.0.0")]
1732     pub fn trim_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
1733         where P::Searcher: DoubleEndedSearcher<'a>
1734     {
1735         core_str::StrExt::trim_matches(self, pat)
1736     }
1737
1738     /// Returns a string with all prefixes that match a pattern
1739     /// repeatedly removed.
1740     ///
1741     /// The pattern can be a simple `&str`, `char`, or a closure that
1742     /// determines the split.
1743     ///
1744     /// # Examples
1745     ///
1746     /// ```
1747     /// assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11");
1748     /// assert_eq!("123foo1bar123".trim_left_matches(char::is_numeric), "foo1bar123");
1749     ///
1750     /// let x: &[_] = &['1', '2'];
1751     /// assert_eq!("12foo1bar12".trim_left_matches(x), "foo1bar12");
1752     /// ```
1753     #[stable(feature = "rust1", since = "1.0.0")]
1754     pub fn trim_left_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str {
1755         core_str::StrExt::trim_left_matches(self, pat)
1756     }
1757
1758     /// Returns a string with all suffixes that match a pattern
1759     /// repeatedly removed.
1760     ///
1761     /// The pattern can be a simple `&str`, `char`, or a closure that
1762     /// determines the split.
1763     ///
1764     /// # Examples
1765     ///
1766     /// Simple patterns:
1767     ///
1768     /// ```
1769     /// assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar");
1770     /// assert_eq!("123foo1bar123".trim_right_matches(char::is_numeric), "123foo1bar");
1771     ///
1772     /// let x: &[_] = &['1', '2'];
1773     /// assert_eq!("12foo1bar12".trim_right_matches(x), "12foo1bar");
1774     /// ```
1775     ///
1776     /// A more complex pattern, using a closure:
1777     ///
1778     /// ```
1779     /// assert_eq!("1fooX".trim_left_matches(|c| c == '1' || c == 'X'), "fooX");
1780     /// ```
1781     #[stable(feature = "rust1", since = "1.0.0")]
1782     pub fn trim_right_matches<'a, P: Pattern<'a>>(&'a self, pat: P) -> &'a str
1783         where P::Searcher: ReverseSearcher<'a>
1784     {
1785         core_str::StrExt::trim_right_matches(self, pat)
1786     }
1787
1788     /// Parses `self` into the specified type.
1789     ///
1790     /// # Failure
1791     ///
1792     /// Will return `Err` if it's not possible to parse `self` into the type.
1793     ///
1794     /// # Example
1795     ///
1796     /// ```
1797     /// assert_eq!("4".parse::<u32>(), Ok(4));
1798     /// ```
1799     ///
1800     /// Failing:
1801     ///
1802     /// ```
1803     /// assert!("j".parse::<u32>().is_err());
1804     /// ```
1805     #[inline]
1806     #[stable(feature = "rust1", since = "1.0.0")]
1807     pub fn parse<F: FromStr>(&self) -> Result<F, F::Err> {
1808         core_str::StrExt::parse(self)
1809     }
1810
1811     /// Replaces all occurrences of one string with another.
1812     ///
1813     /// `replace` takes two arguments, a sub-`&str` to find in `self`, and a
1814     /// second `&str` to
1815     /// replace it with. If the original `&str` isn't found, no change occurs.
1816     ///
1817     /// # Examples
1818     ///
1819     /// ```
1820     /// let s = "this is old";
1821     ///
1822     /// assert_eq!(s.replace("old", "new"), "this is new");
1823     /// ```
1824     ///
1825     /// When a `&str` isn't found:
1826     ///
1827     /// ```
1828     /// let s = "this is old";
1829     /// assert_eq!(s.replace("cookie monster", "little lamb"), s);
1830     /// ```
1831     #[stable(feature = "rust1", since = "1.0.0")]
1832     pub fn replace(&self, from: &str, to: &str) -> String {
1833         let mut result = String::new();
1834         let mut last_end = 0;
1835         for (start, end) in self.match_indices(from) {
1836             result.push_str(unsafe { self.slice_unchecked(last_end, start) });
1837             result.push_str(to);
1838             last_end = end;
1839         }
1840         result.push_str(unsafe { self.slice_unchecked(last_end, self.len()) });
1841         result
1842     }
1843
1844     /// Returns the lowercase equivalent of this string.
1845     ///
1846     /// # Examples
1847     ///
1848     /// ```
1849     /// let s = "HELLO";
1850     /// assert_eq!(s.to_lowercase(), "hello");
1851     /// ```
1852     #[stable(feature = "unicode_case_mapping", since = "1.2.0")]
1853     pub fn to_lowercase(&self) -> String {
1854         let mut s = String::with_capacity(self.len());
1855         for (i, c) in self[..].char_indices() {
1856             if c == 'Σ' {
1857                 // Σ maps to σ, except at the end of a word where it maps to ς.
1858                 // This is the only conditional (contextual) but language-independent mapping
1859                 // in `SpecialCasing.txt`,
1860                 // so hard-code it rather than have a generic "condition" mechanim.
1861                 // See https://github.com/rust-lang/rust/issues/26035
1862                 map_uppercase_sigma(self, i, &mut s)
1863             } else {
1864                 s.extend(c.to_lowercase());
1865             }
1866         }
1867         return s;
1868
1869         fn map_uppercase_sigma(from: &str, i: usize, to: &mut String) {
1870             // See http://www.unicode.org/versions/Unicode7.0.0/ch03.pdf#G33992
1871             // for the definition of `Final_Sigma`.
1872             debug_assert!('Σ'.len_utf8() == 2);
1873             let is_word_final =
1874                 case_ignoreable_then_cased(from[..i].chars().rev()) &&
1875                 !case_ignoreable_then_cased(from[i + 2..].chars());
1876             to.push_str(if is_word_final { "ς" } else { "σ" });
1877         }
1878
1879         fn case_ignoreable_then_cased<I: Iterator<Item=char>>(iter: I) -> bool {
1880             use rustc_unicode::derived_property::{Cased, Case_Ignorable};
1881             match iter.skip_while(|&c| Case_Ignorable(c)).next() {
1882                 Some(c) => Cased(c),
1883                 None => false,
1884             }
1885         }
1886     }
1887
1888     /// Returns the uppercase equivalent of this string.
1889     ///
1890     /// # Examples
1891     ///
1892     /// ```
1893     /// let s = "hello";
1894     /// assert_eq!(s.to_uppercase(), "HELLO");
1895     /// ```
1896     #[stable(feature = "unicode_case_mapping", since = "1.2.0")]
1897     pub fn to_uppercase(&self) -> String {
1898         let mut s = String::with_capacity(self.len());
1899         s.extend(self.chars().flat_map(|c| c.to_uppercase()));
1900         return s;
1901     }
1902
1903     /// Escapes each char in `s` with `char::escape_default`.
1904     #[unstable(feature = "str_escape",
1905                reason = "return type may change to be an iterator")]
1906     pub fn escape_default(&self) -> String {
1907         self.chars().flat_map(|c| c.escape_default()).collect()
1908     }
1909
1910     /// Escapes each char in `s` with `char::escape_unicode`.
1911     #[unstable(feature = "str_escape",
1912                reason = "return type may change to be an iterator")]
1913     pub fn escape_unicode(&self) -> String {
1914         self.chars().flat_map(|c| c.escape_unicode()).collect()
1915     }
1916
1917     /// Converts the `Box<str>` into a `String` without copying or allocating.
1918     #[unstable(feature = "box_str",
1919                reason = "recently added, matches RFC")]
1920     pub fn into_string(self: Box<str>) -> String {
1921         unsafe {
1922             let slice = mem::transmute::<Box<str>, Box<[u8]>>(self);
1923             String::from_utf8_unchecked(slice.into_vec())
1924         }
1925     }
1926 }