src/libcollections/str.rs

   1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10 //
  11 // ignore-lexer-test FIXME #15679
  12
  13 //! Unicode string manipulation (`str` type)
  14 //!
  15 //! # Basic Usage
  16 //!
  17 //! Rust's string type is one of the core primitive types of the language. While
  18 //! represented by the name `str`, the name `str` is not actually a valid type in
  19 //! Rust. Each string must also be decorated with a pointer. `String` is used
  20 //! for an owned string, so there is only one commonly-used `str` type in Rust:
  21 //! `&str`.
  22 //!
  23 //! `&str` is the borrowed string type. This type of string can only be created
  24 //! from other strings, unless it is a static string (see below). As the word
  25 //! "borrowed" implies, this type of string is owned elsewhere, and this string
  26 //! cannot be moved out of.
  27 //!
  28 //! As an example, here's some code that uses a string.
  29 //!
  30 //! ```rust
  31 //! fn main() {
  32 //!     let borrowed_string = "This string is borrowed with the 'static lifetime";
  33 //! }
  34 //! ```
  35 //!
  36 //! From the example above, you can guess that Rust's string literals have the
  37 //! `'static` lifetime. This is akin to C's concept of a static string.
  38 //! More precisely, string literals are immutable views with a 'static lifetime
  39 //! (otherwise known as the lifetime of the entire program), and thus have the
  40 //! type `&'static str`.
  41 //!
  42 //! # Representation
  43 //!
  44 //! Rust's string type, `str`, is a sequence of Unicode scalar values encoded as a
  45 //! stream of UTF-8 bytes. All [strings](../../reference.html#literals) are
  46 //! guaranteed to be validly encoded UTF-8 sequences. Additionally, strings are
  47 //! not null-terminated and can thus contain null bytes.
  48 //!
  49 //! The actual representation of strings have direct mappings to slices: `&str`
  50 //! is the same as `&[u8]`.
  51
  52 #![doc(primitive = "str")]
  53
  54 use self::RecompositionState::*;
  55 use self::DecompositionType::*;
  56
  57 use core::borrow::{BorrowFrom, ToOwned};
  58 use core::char::CharExt;
  59 use core::clone::Clone;
  60 use core::iter::AdditiveIterator;
  61 use core::iter::{range, Iterator, IteratorExt};
  62 use core::kinds::Sized;
  63 use core::ops;
  64 use core::option::Option::{self, Some, None};
  65 use core::slice::AsSlice;
  66 use core::str as core_str;
  67 use unicode::str::{UnicodeStr, Utf16Encoder};
  68
  69 use ring_buf::RingBuf;
  70 use slice::SliceExt;
  71 use string::String;
  72 use unicode;
  73 use vec::Vec;
  74 use slice::SliceConcatExt;
  75
  76 pub use core::str::{FromStr, Utf8Error, Str};
  77 pub use core::str::{Lines, LinesAny, MatchIndices, SplitStr, CharRange};
  78 pub use core::str::{Split, SplitTerminator};
  79 pub use core::str::{SplitN, RSplitN};
  80 pub use core::str::{from_utf8, CharEq, Chars, CharIndices, Bytes};
  81 pub use core::str::{from_utf8_unchecked, from_c_str};
  82 pub use unicode::str::{Words, Graphemes, GraphemeIndices};
  83
  84 /*
  85 Section: Creating a string
  86 */
  87
  88 impl<S: Str> SliceConcatExt<str, String> for [S] {
  89     fn concat(&self) -> String {
  90         let s = self.as_slice();
  91
  92         if s.is_empty() {
  93             return String::new();
  94         }
  95
  96         // `len` calculation may overflow but push_str will check boundaries
  97         let len = s.iter().map(|s| s.as_slice().len()).sum();
  98         let mut result = String::with_capacity(len);
  99
 100         for s in s.iter() {
 101             result.push_str(s.as_slice())
 102         }
 103
 104         result
 105     }
 106
 107     fn connect(&self, sep: &str) -> String {
 108         let s = self.as_slice();
 109
 110         if s.is_empty() {
 111             return String::new();
 112         }
 113
 114         // concat is faster
 115         if sep.is_empty() {
 116             return s.concat();
 117         }
 118
 119         // this is wrong without the guarantee that `self` is non-empty
 120         // `len` calculation may overflow but push_str but will check boundaries
 121         let len = sep.len() * (s.len() - 1)
 122             + s.iter().map(|s| s.as_slice().len()).sum();
 123         let mut result = String::with_capacity(len);
 124         let mut first = true;
 125
 126         for s in s.iter() {
 127             if first {
 128                 first = false;
 129             } else {
 130                 result.push_str(sep);
 131             }
 132             result.push_str(s.as_slice());
 133         }
 134         result
 135     }
 136 }
 137
 138 /*
 139 Section: Iterators
 140 */
 141
 142 // Helper functions used for Unicode normalization
 143 fn canonical_sort(comb: &mut [(char, u8)]) {
 144     let len = comb.len();
 145     for i in range(0, len) {
 146         let mut swapped = false;
 147         for j in range(1, len-i) {
 148             let class_a = comb[j-1].1;
 149             let class_b = comb[j].1;
 150             if class_a != 0 && class_b != 0 && class_a > class_b {
 151                 comb.swap(j-1, j);
 152                 swapped = true;
 153             }
 154         }
 155         if !swapped { break; }
 156     }
 157 }
 158
 159 #[derive(Clone)]
 160 enum DecompositionType {
 161     Canonical,
 162     Compatible
 163 }
 164
 165 /// External iterator for a string's decomposition's characters.
 166 /// Use with the `std::iter` module.
 167 #[derive(Clone)]
 168 #[unstable]
 169 pub struct Decompositions<'a> {
 170     kind: DecompositionType,
 171     iter: Chars<'a>,
 172     buffer: Vec<(char, u8)>,
 173     sorted: bool
 174 }
 175
 176 #[stable]
 177 impl<'a> Iterator for Decompositions<'a> {
 178     type Item = char;
 179
 180     #[inline]
 181     fn next(&mut self) -> Option<char> {
 182         match self.buffer.first() {
 183             Some(&(c, 0)) => {
 184                 self.sorted = false;
 185                 self.buffer.remove(0);
 186                 return Some(c);
 187             }
 188             Some(&(c, _)) if self.sorted => {
 189                 self.buffer.remove(0);
 190                 return Some(c);
 191             }
 192             _ => self.sorted = false
 193         }
 194
 195         if !self.sorted {
 196             for ch in self.iter {
 197                 let buffer = &mut self.buffer;
 198                 let sorted = &mut self.sorted;
 199                 {
 200                     let callback = |&mut: d| {
 201                         let class =
 202                             unicode::char::canonical_combining_class(d);
 203                         if class == 0 && !*sorted {
 204                             canonical_sort(buffer.as_mut_slice());
 205                             *sorted = true;
 206                         }
 207                         buffer.push((d, class));
 208                     };
 209                     match self.kind {
 210                         Canonical => {
 211                             unicode::char::decompose_canonical(ch, callback)
 212                         }
 213                         Compatible => {
 214                             unicode::char::decompose_compatible(ch, callback)
 215                         }
 216                     }
 217                 }
 218                 if *sorted {
 219                     break
 220                 }
 221             }
 222         }
 223
 224         if !self.sorted {
 225             canonical_sort(self.buffer.as_mut_slice());
 226             self.sorted = true;
 227         }
 228
 229         if self.buffer.is_empty() {
 230             None
 231         } else {
 232             match self.buffer.remove(0) {
 233                 (c, 0) => {
 234                     self.sorted = false;
 235                     Some(c)
 236                 }
 237                 (c, _) => Some(c),
 238             }
 239         }
 240     }
 241
 242     fn size_hint(&self) -> (uint, Option<uint>) {
 243         let (lower, _) = self.iter.size_hint();
 244         (lower, None)
 245     }
 246 }
 247
 248 #[derive(Clone)]
 249 enum RecompositionState {
 250     Composing,
 251     Purging,
 252     Finished
 253 }
 254
 255 /// External iterator for a string's recomposition's characters.
 256 /// Use with the `std::iter` module.
 257 #[derive(Clone)]
 258 #[unstable]
 259 pub struct Recompositions<'a> {
 260     iter: Decompositions<'a>,
 261     state: RecompositionState,
 262     buffer: RingBuf<char>,
 263     composee: Option<char>,
 264     last_ccc: Option<u8>
 265 }
 266
 267 #[stable]
 268 impl<'a> Iterator for Recompositions<'a> {
 269     type Item = char;
 270
 271     #[inline]
 272     fn next(&mut self) -> Option<char> {
 273         loop {
 274             match self.state {
 275                 Composing => {
 276                     for ch in self.iter {
 277                         let ch_class = unicode::char::canonical_combining_class(ch);
 278                         if self.composee.is_none() {
 279                             if ch_class != 0 {
 280                                 return Some(ch);
 281                             }
 282                             self.composee = Some(ch);
 283                             continue;
 284                         }
 285                         let k = self.composee.clone().unwrap();
 286
 287                         match self.last_ccc {
 288                             None => {
 289                                 match unicode::char::compose(k, ch) {
 290                                     Some(r) => {
 291                                         self.composee = Some(r);
 292                                         continue;
 293                                     }
 294                                     None => {
 295                                         if ch_class == 0 {
 296                                             self.composee = Some(ch);
 297                                             return Some(k);
 298                                         }
 299                                         self.buffer.push_back(ch);
 300                                         self.last_ccc = Some(ch_class);
 301                                     }
 302                                 }
 303                             }
 304                             Some(l_class) => {
 305                                 if l_class >= ch_class {
 306                                     // `ch` is blocked from `composee`
 307                                     if ch_class == 0 {
 308                                         self.composee = Some(ch);
 309                                         self.last_ccc = None;
 310                                         self.state = Purging;
 311                                         return Some(k);
 312                                     }
 313                                     self.buffer.push_back(ch);
 314                                     self.last_ccc = Some(ch_class);
 315                                     continue;
 316                                 }
 317                                 match unicode::char::compose(k, ch) {
 318                                     Some(r) => {
 319                                         self.composee = Some(r);
 320                                         continue;
 321                                     }
 322                                     None => {
 323                                         self.buffer.push_back(ch);
 324                                         self.last_ccc = Some(ch_class);
 325                                     }
 326                                 }
 327                             }
 328                         }
 329                     }
 330                     self.state = Finished;
 331                     if self.composee.is_some() {
 332                         return self.composee.take();
 333                     }
 334                 }
 335                 Purging => {
 336                     match self.buffer.pop_front() {
 337                         None => self.state = Composing,
 338                         s => return s
 339                     }
 340                 }
 341                 Finished => {
 342                     match self.buffer.pop_front() {
 343                         None => return self.composee.take(),
 344                         s => return s
 345                     }
 346                 }
 347             }
 348         }
 349     }
 350 }
 351
 352 /// External iterator for a string's UTF16 codeunits.
 353 /// Use with the `std::iter` module.
 354 #[derive(Clone)]
 355 #[unstable]
 356 pub struct Utf16Units<'a> {
 357     encoder: Utf16Encoder<Chars<'a>>
 358 }
 359
 360 #[stable]
 361 impl<'a> Iterator for Utf16Units<'a> {
 362     type Item = u16;
 363
 364     #[inline]
 365     fn next(&mut self) -> Option<u16> { self.encoder.next() }
 366
 367     #[inline]
 368     fn size_hint(&self) -> (uint, Option<uint>) { self.encoder.size_hint() }
 369 }
 370
 371 /*
 372 Section: Misc
 373 */
 374
 375 // Return the initial codepoint accumulator for the first byte.
 376 // The first byte is special, only want bottom 5 bits for width 2, 4 bits
 377 // for width 3, and 3 bits for width 4
 378 macro_rules! utf8_first_byte {
 379     ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as u32)
 380 }
 381
 382 // return the value of $ch updated with continuation byte $byte
 383 macro_rules! utf8_acc_cont_byte {
 384     ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as u32)
 385 }
 386
 387 #[unstable = "trait is unstable"]
 388 impl BorrowFrom<String> for str {
 389     fn borrow_from(owned: &String) -> &str { owned[] }
 390 }
 391
 392 #[unstable = "trait is unstable"]
 393 impl ToOwned<String> for str {
 394     fn to_owned(&self) -> String {
 395         unsafe {
 396             String::from_utf8_unchecked(self.as_bytes().to_owned())
 397         }
 398     }
 399 }
 400
 401 /*
 402 Section: CowString
 403 */
 404
 405 /*
 406 Section: Trait implementations
 407 */
 408
 409 /// Any string that can be represented as a slice.
 410 pub trait StrExt for Sized?: ops::Slice<uint, str> {
 411     /// Escapes each char in `s` with `char::escape_default`.
 412     #[unstable = "return type may change to be an iterator"]
 413     fn escape_default(&self) -> String {
 414         self.chars().flat_map(|c| c.escape_default()).collect()
 415     }
 416
 417     /// Escapes each char in `s` with `char::escape_unicode`.
 418     #[unstable = "return type may change to be an iterator"]
 419     fn escape_unicode(&self) -> String {
 420         self.chars().flat_map(|c| c.escape_unicode()).collect()
 421     }
 422
 423     /// Replaces all occurrences of one string with another.
 424     ///
 425     /// # Arguments
 426     ///
 427     /// * `from` - The string to replace
 428     /// * `to` - The replacement string
 429     ///
 430     /// # Return value
 431     ///
 432     /// The original string with all occurrences of `from` replaced with `to`.
 433     ///
 434     /// # Examples
 435     ///
 436     /// ```rust
 437     /// let s = "Do you know the muffin man,
 438     /// The muffin man, the muffin man, ...".to_string();
 439     ///
 440     /// assert_eq!(s.replace("muffin man", "little lamb"),
 441     ///            "Do you know the little lamb,
 442     /// The little lamb, the little lamb, ...".to_string());
 443     ///
 444     /// // not found, so no change.
 445     /// assert_eq!(s.replace("cookie monster", "little lamb"), s);
 446     /// ```
 447     #[stable]
 448     fn replace(&self, from: &str, to: &str) -> String {
 449         let mut result = String::new();
 450         let mut last_end = 0;
 451         for (start, end) in self.match_indices(from) {
 452             result.push_str(unsafe { self.slice_unchecked(last_end, start) });
 453             result.push_str(to);
 454             last_end = end;
 455         }
 456         result.push_str(unsafe { self.slice_unchecked(last_end, self.len()) });
 457         result
 458     }
 459
 460     /// Returns an iterator over the string in Unicode Normalization Form D
 461     /// (canonical decomposition).
 462     #[inline]
 463     #[unstable = "this functionality may be moved to libunicode"]
 464     fn nfd_chars<'a>(&'a self) -> Decompositions<'a> {
 465         Decompositions {
 466             iter: self[].chars(),
 467             buffer: Vec::new(),
 468             sorted: false,
 469             kind: Canonical
 470         }
 471     }
 472
 473     /// Returns an iterator over the string in Unicode Normalization Form KD
 474     /// (compatibility decomposition).
 475     #[inline]
 476     #[unstable = "this functionality may be moved to libunicode"]
 477     fn nfkd_chars<'a>(&'a self) -> Decompositions<'a> {
 478         Decompositions {
 479             iter: self[].chars(),
 480             buffer: Vec::new(),
 481             sorted: false,
 482             kind: Compatible
 483         }
 484     }
 485
 486     /// An Iterator over the string in Unicode Normalization Form C
 487     /// (canonical decomposition followed by canonical composition).
 488     #[inline]
 489     #[unstable = "this functionality may be moved to libunicode"]
 490     fn nfc_chars<'a>(&'a self) -> Recompositions<'a> {
 491         Recompositions {
 492             iter: self.nfd_chars(),
 493             state: Composing,
 494             buffer: RingBuf::new(),
 495             composee: None,
 496             last_ccc: None
 497         }
 498     }
 499
 500     /// An Iterator over the string in Unicode Normalization Form KC
 501     /// (compatibility decomposition followed by canonical composition).
 502     #[inline]
 503     #[unstable = "this functionality may be moved to libunicode"]
 504     fn nfkc_chars<'a>(&'a self) -> Recompositions<'a> {
 505         Recompositions {
 506             iter: self.nfkd_chars(),
 507             state: Composing,
 508             buffer: RingBuf::new(),
 509             composee: None,
 510             last_ccc: None
 511         }
 512     }
 513
 514     /// Returns true if a string contains a string pattern.
 515     ///
 516     /// # Arguments
 517     ///
 518     /// - pat - The string pattern to look for
 519     ///
 520     /// # Example
 521     ///
 522     /// ```rust
 523     /// assert!("bananas".contains("nana"));
 524     /// ```
 525     #[stable]
 526     fn contains(&self, pat: &str) -> bool {
 527         core_str::StrExt::contains(self[], pat)
 528     }
 529
 530     /// Returns true if a string contains a char pattern.
 531     ///
 532     /// # Arguments
 533     ///
 534     /// - pat - The char pattern to look for
 535     ///
 536     /// # Example
 537     ///
 538     /// ```rust
 539     /// assert!("hello".contains_char('e'));
 540     /// ```
 541     #[unstable = "might get removed in favour of a more generic contains()"]
 542     fn contains_char<P: CharEq>(&self, pat: P) -> bool {
 543         core_str::StrExt::contains_char(self[], pat)
 544     }
 545
 546     /// An iterator over the characters of `self`. Note, this iterates
 547     /// over Unicode code-points, not Unicode graphemes.
 548     ///
 549     /// # Example
 550     ///
 551     /// ```rust
 552     /// let v: Vec<char> = "abc åäö".chars().collect();
 553     /// assert_eq!(v, vec!['a', 'b', 'c', ' ', 'å', 'ä', 'ö']);
 554     /// ```
 555     #[stable]
 556     fn chars(&self) -> Chars {
 557         core_str::StrExt::chars(self[])
 558     }
 559
 560     /// An iterator over the bytes of `self`
 561     ///
 562     /// # Example
 563     ///
 564     /// ```rust
 565     /// let v: Vec<u8> = "bors".bytes().collect();
 566     /// assert_eq!(v, b"bors".to_vec());
 567     /// ```
 568     #[stable]
 569     fn bytes(&self) -> Bytes {
 570         core_str::StrExt::bytes(self[])
 571     }
 572
 573     /// An iterator over the characters of `self` and their byte offsets.
 574     #[stable]
 575     fn char_indices(&self) -> CharIndices {
 576         core_str::StrExt::char_indices(self[])
 577     }
 578
 579     /// An iterator over substrings of `self`, separated by characters
 580     /// matched by the pattern `pat`.
 581     ///
 582     /// # Example
 583     ///
 584     /// ```rust
 585     /// let v: Vec<&str> = "Mary had a little lamb".split(' ').collect();
 586     /// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]);
 587     ///
 588     /// let v: Vec<&str> = "abc1def2ghi".split(|&: c: char| c.is_numeric()).collect();
 589     /// assert_eq!(v, vec!["abc", "def", "ghi"]);
 590     ///
 591     /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').collect();
 592     /// assert_eq!(v, vec!["lion", "", "tiger", "leopard"]);
 593     ///
 594     /// let v: Vec<&str> = "".split('X').collect();
 595     /// assert_eq!(v, vec![""]);
 596     /// ```
 597     #[stable]
 598     fn split<P: CharEq>(&self, pat: P) -> Split<P> {
 599         core_str::StrExt::split(self[], pat)
 600     }
 601
 602     /// An iterator over substrings of `self`, separated by characters
 603     /// matched by the pattern `pat`, restricted to splitting at most `count`
 604     /// times.
 605     ///
 606     /// # Example
 607     ///
 608     /// ```rust
 609     /// let v: Vec<&str> = "Mary had a little lambda".splitn(2, ' ').collect();
 610     /// assert_eq!(v, vec!["Mary", "had", "a little lambda"]);
 611     ///
 612     /// let v: Vec<&str> = "abc1def2ghi".splitn(1, |&: c: char| c.is_numeric()).collect();
 613     /// assert_eq!(v, vec!["abc", "def2ghi"]);
 614     ///
 615     /// let v: Vec<&str> = "lionXXtigerXleopard".splitn(2, 'X').collect();
 616     /// assert_eq!(v, vec!["lion", "", "tigerXleopard"]);
 617     ///
 618     /// let v: Vec<&str> = "abcXdef".splitn(0, 'X').collect();
 619     /// assert_eq!(v, vec!["abcXdef"]);
 620     ///
 621     /// let v: Vec<&str> = "".splitn(1, 'X').collect();
 622     /// assert_eq!(v, vec![""]);
 623     /// ```
 624     #[stable]
 625     fn splitn<P: CharEq>(&self, count: uint, pat: P) -> SplitN<P> {
 626         core_str::StrExt::splitn(self[], count, pat)
 627     }
 628
 629     /// An iterator over substrings of `self`, separated by characters
 630     /// matched by the pattern `pat`.
 631     ///
 632     /// Equivalent to `split`, except that the trailing substring
 633     /// is skipped if empty (terminator semantics).
 634     ///
 635     /// # Example
 636     ///
 637     /// ```rust
 638     /// let v: Vec<&str> = "A.B.".split_terminator('.').collect();
 639     /// assert_eq!(v, vec!["A", "B"]);
 640     ///
 641     /// let v: Vec<&str> = "A..B..".split_terminator('.').collect();
 642     /// assert_eq!(v, vec!["A", "", "B", ""]);
 643     ///
 644     /// let v: Vec<&str> = "Mary had a little lamb".split(' ').rev().collect();
 645     /// assert_eq!(v, vec!["lamb", "little", "a", "had", "Mary"]);
 646     ///
 647     /// let v: Vec<&str> = "abc1def2ghi".split(|&: c: char| c.is_numeric()).rev().collect();
 648     /// assert_eq!(v, vec!["ghi", "def", "abc"]);
 649     ///
 650     /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').rev().collect();
 651     /// assert_eq!(v, vec!["leopard", "tiger", "", "lion"]);
 652     /// ```
 653     #[unstable = "might get removed"]
 654     fn split_terminator<P: CharEq>(&self, pat: P) -> SplitTerminator<P> {
 655         core_str::StrExt::split_terminator(self[], pat)
 656     }
 657
 658     /// An iterator over substrings of `self`, separated by characters
 659     /// matched by the pattern `pat`, starting from the end of the string.
 660     /// Restricted to splitting at most `count` times.
 661     ///
 662     /// # Example
 663     ///
 664     /// ```rust
 665     /// let v: Vec<&str> = "Mary had a little lamb".rsplitn(2, ' ').collect();
 666     /// assert_eq!(v, vec!["lamb", "little", "Mary had a"]);
 667     ///
 668     /// let v: Vec<&str> = "abc1def2ghi".rsplitn(1, |&: c: char| c.is_numeric()).collect();
 669     /// assert_eq!(v, vec!["ghi", "abc1def"]);
 670     ///
 671     /// let v: Vec<&str> = "lionXXtigerXleopard".rsplitn(2, 'X').collect();
 672     /// assert_eq!(v, vec!["leopard", "tiger", "lionX"]);
 673     /// ```
 674     #[stable]
 675     fn rsplitn<P: CharEq>(&self, count: uint, pat: P) -> RSplitN<P> {
 676         core_str::StrExt::rsplitn(self[], count, pat)
 677     }
 678
 679     /// An iterator over the start and end indices of the disjoint
 680     /// matches of the pattern `pat` within `self`.
 681     ///
 682     /// That is, each returned value `(start, end)` satisfies
 683     /// `self.slice(start, end) == sep`. For matches of `sep` within
 684     /// `self` that overlap, only the indices corresponding to the
 685     /// first match are returned.
 686     ///
 687     /// # Example
 688     ///
 689     /// ```rust
 690     /// let v: Vec<(uint, uint)> = "abcXXXabcYYYabc".match_indices("abc").collect();
 691     /// assert_eq!(v, vec![(0,3), (6,9), (12,15)]);
 692     ///
 693     /// let v: Vec<(uint, uint)> = "1abcabc2".match_indices("abc").collect();
 694     /// assert_eq!(v, vec![(1,4), (4,7)]);
 695     ///
 696     /// let v: Vec<(uint, uint)> = "ababa".match_indices("aba").collect();
 697     /// assert_eq!(v, vec![(0, 3)]); // only the first `aba`
 698     /// ```
 699     #[unstable = "might have its iterator type changed"]
 700     fn match_indices<'a>(&'a self, pat: &'a str) -> MatchIndices<'a> {
 701         core_str::StrExt::match_indices(self[], pat)
 702     }
 703
 704     /// An iterator over the substrings of `self` separated by the pattern `sep`.
 705     ///
 706     /// # Example
 707     ///
 708     /// ```rust
 709     /// let v: Vec<&str> = "abcXXXabcYYYabc".split_str("abc").collect();
 710     /// assert_eq!(v, vec!["", "XXX", "YYY", ""]);
 711     ///
 712     /// let v: Vec<&str> = "1abcabc2".split_str("abc").collect();
 713     /// assert_eq!(v, vec!["1", "", "2"]);
 714     /// ```
 715     #[unstable = "might get removed in the future in favor of a more generic split()"]
 716     fn split_str<'a>(&'a self, pat: &'a str) -> SplitStr<'a> {
 717         core_str::StrExt::split_str(self[], pat)
 718     }
 719
 720     /// An iterator over the lines of a string (subsequences separated
 721     /// by `\n`). This does not include the empty string after a
 722     /// trailing `\n`.
 723     ///
 724     /// # Example
 725     ///
 726     /// ```rust
 727     /// let four_lines = "foo\nbar\n\nbaz\n";
 728     /// let v: Vec<&str> = four_lines.lines().collect();
 729     /// assert_eq!(v, vec!["foo", "bar", "", "baz"]);
 730     /// ```
 731     #[stable]
 732     fn lines(&self) -> Lines {
 733         core_str::StrExt::lines(self[])
 734     }
 735
 736     /// An iterator over the lines of a string, separated by either
 737     /// `\n` or `\r\n`. As with `.lines()`, this does not include an
 738     /// empty trailing line.
 739     ///
 740     /// # Example
 741     ///
 742     /// ```rust
 743     /// let four_lines = "foo\r\nbar\n\r\nbaz\n";
 744     /// let v: Vec<&str> = four_lines.lines_any().collect();
 745     /// assert_eq!(v, vec!["foo", "bar", "", "baz"]);
 746     /// ```
 747     #[stable]
 748     fn lines_any(&self) -> LinesAny {
 749         core_str::StrExt::lines_any(self[])
 750     }
 751
 752     /// Returns a slice of the given string from the byte range
 753     /// [`begin`..`end`).
 754     ///
 755     /// This operation is `O(1)`.
 756     ///
 757     /// Panics when `begin` and `end` do not point to valid characters
 758     /// or point beyond the last character of the string.
 759     ///
 760     /// See also `slice_to` and `slice_from` for slicing prefixes and
 761     /// suffixes of strings, and `slice_chars` for slicing based on
 762     /// code point counts.
 763     ///
 764     /// # Example
 765     ///
 766     /// ```rust
 767     /// let s = "Löwe 老虎 Léopard";
 768     /// assert_eq!(s.slice(0, 1), "L");
 769     ///
 770     /// assert_eq!(s.slice(1, 9), "öwe 老");
 771     ///
 772     /// // these will panic:
 773     /// // byte 2 lies within `ö`:
 774     /// // s.slice(2, 3);
 775     ///
 776     /// // byte 8 lies within `老`
 777     /// // s.slice(1, 8);
 778     ///
 779     /// // byte 100 is outside the string
 780     /// // s.slice(3, 100);
 781     /// ```
 782     #[unstable = "use slice notation [a..b] instead"]
 783     fn slice(&self, begin: uint, end: uint) -> &str {
 784         core_str::StrExt::slice(self[], begin, end)
 785     }
 786
 787     /// Returns a slice of the string from `begin` to its end.
 788     ///
 789     /// Equivalent to `self.slice(begin, self.len())`.
 790     ///
 791     /// Panics when `begin` does not point to a valid character, or is
 792     /// out of bounds.
 793     ///
 794     /// See also `slice`, `slice_to` and `slice_chars`.
 795     #[unstable = "use slice notation [a..] instead"]
 796     fn slice_from(&self, begin: uint) -> &str {
 797         core_str::StrExt::slice_from(self[], begin)
 798     }
 799
 800     /// Returns a slice of the string from the beginning to byte
 801     /// `end`.
 802     ///
 803     /// Equivalent to `self.slice(0, end)`.
 804     ///
 805     /// Panics when `end` does not point to a valid character, or is
 806     /// out of bounds.
 807     ///
 808     /// See also `slice`, `slice_from` and `slice_chars`.
 809     #[unstable = "use slice notation [0..a] instead"]
 810     fn slice_to(&self, end: uint) -> &str {
 811         core_str::StrExt::slice_to(self[], end)
 812     }
 813
 814     /// Returns a slice of the string from the character range
 815     /// [`begin`..`end`).
 816     ///
 817     /// That is, start at the `begin`-th code point of the string and
 818     /// continue to the `end`-th code point. This does not detect or
 819     /// handle edge cases such as leaving a combining character as the
 820     /// first code point of the string.
 821     ///
 822     /// Due to the design of UTF-8, this operation is `O(end)`.
 823     /// See `slice`, `slice_to` and `slice_from` for `O(1)`
 824     /// variants that use byte indices rather than code point
 825     /// indices.
 826     ///
 827     /// Panics if `begin` > `end` or the either `begin` or `end` are
 828     /// beyond the last character of the string.
 829     ///
 830     /// # Example
 831     ///
 832     /// ```rust
 833     /// let s = "Löwe 老虎 Léopard";
 834     /// assert_eq!(s.slice_chars(0, 4), "Löwe");
 835     /// assert_eq!(s.slice_chars(5, 7), "老虎");
 836     /// ```
 837     #[unstable = "may have yet to prove its worth"]
 838     fn slice_chars(&self, begin: uint, end: uint) -> &str {
 839         core_str::StrExt::slice_chars(self[], begin, end)
 840     }
 841
 842     /// Takes a bytewise (not UTF-8) slice from a string.
 843     ///
 844     /// Returns the substring from [`begin`..`end`).
 845     ///
 846     /// Caller must check both UTF-8 character boundaries and the boundaries of
 847     /// the entire slice as well.
 848     #[stable]
 849     unsafe fn slice_unchecked(&self, begin: uint, end: uint) -> &str {
 850         core_str::StrExt::slice_unchecked(self[], begin, end)
 851     }
 852
 853     /// Returns true if the pattern `pat` is a prefix of the string.
 854     ///
 855     /// # Example
 856     ///
 857     /// ```rust
 858     /// assert!("banana".starts_with("ba"));
 859     /// ```
 860     #[stable]
 861     fn starts_with(&self, pat: &str) -> bool {
 862         core_str::StrExt::starts_with(self[], pat)
 863     }
 864
 865     /// Returns true if the pattern `pat` is a suffix of the string.
 866     ///
 867     /// # Example
 868     ///
 869     /// ```rust
 870     /// assert!("banana".ends_with("nana"));
 871     /// ```
 872     #[stable]
 873     fn ends_with(&self, pat: &str) -> bool {
 874         core_str::StrExt::ends_with(self[], pat)
 875     }
 876
 877     /// Returns a string with all pre- and suffixes that match
 878     /// the pattern `pat` repeatedly removed.
 879     ///
 880     /// # Arguments
 881     ///
 882     /// * pat - a string pattern
 883     ///
 884     /// # Example
 885     ///
 886     /// ```rust
 887     /// assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar");
 888     /// let x: &[_] = &['1', '2'];
 889     /// assert_eq!("12foo1bar12".trim_matches(x), "foo1bar");
 890     /// assert_eq!("123foo1bar123".trim_matches(|&: c: char| c.is_numeric()), "foo1bar");
 891     /// ```
 892     #[stable]
 893     fn trim_matches<P: CharEq>(&self, pat: P) -> &str {
 894         core_str::StrExt::trim_matches(self[], pat)
 895     }
 896
 897     /// Returns a string with all prefixes that match
 898     /// the pattern `pat` repeatedly removed.
 899     ///
 900     /// # Arguments
 901     ///
 902     /// * pat - a string pattern
 903     ///
 904     /// # Example
 905     ///
 906     /// ```rust
 907     /// assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11");
 908     /// let x: &[_] = &['1', '2'];
 909     /// assert_eq!("12foo1bar12".trim_left_matches(x), "foo1bar12");
 910     /// assert_eq!("123foo1bar123".trim_left_matches(|&: c: char| c.is_numeric()), "foo1bar123");
 911     /// ```
 912     #[stable]
 913     fn trim_left_matches<P: CharEq>(&self, pat: P) -> &str {
 914         core_str::StrExt::trim_left_matches(self[], pat)
 915     }
 916
 917     /// Returns a string with all suffixes that match
 918     /// the pattern `pat` repeatedly removed.
 919     ///
 920     /// # Arguments
 921     ///
 922     /// * pat - a string pattern
 923     ///
 924     /// # Example
 925     ///
 926     /// ```rust
 927     /// assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar");
 928     /// let x: &[_] = &['1', '2'];
 929     /// assert_eq!("12foo1bar12".trim_right_matches(x), "12foo1bar");
 930     /// assert_eq!("123foo1bar123".trim_right_matches(|&: c: char| c.is_numeric()), "123foo1bar");
 931     /// ```
 932     #[stable]
 933     fn trim_right_matches<P: CharEq>(&self, pat: P) -> &str {
 934         core_str::StrExt::trim_right_matches(self[], pat)
 935     }
 936
 937     /// Check that `index`-th byte lies at the start and/or end of a
 938     /// UTF-8 code point sequence.
 939     ///
 940     /// The start and end of the string (when `index == self.len()`)
 941     /// are considered to be boundaries.
 942     ///
 943     /// Panics if `index` is greater than `self.len()`.
 944     ///
 945     /// # Example
 946     ///
 947     /// ```rust
 948     /// let s = "Löwe 老虎 Léopard";
 949     /// assert!(s.is_char_boundary(0));
 950     /// // start of `老`
 951     /// assert!(s.is_char_boundary(6));
 952     /// assert!(s.is_char_boundary(s.len()));
 953     ///
 954     /// // second byte of `ö`
 955     /// assert!(!s.is_char_boundary(2));
 956     ///
 957     /// // third byte of `老`
 958     /// assert!(!s.is_char_boundary(8));
 959     /// ```
 960     #[unstable = "naming is uncertain with container conventions"]
 961     fn is_char_boundary(&self, index: uint) -> bool {
 962         core_str::StrExt::is_char_boundary(self[], index)
 963     }
 964
 965     /// Pluck a character out of a string and return the index of the next
 966     /// character.
 967     ///
 968     /// This function can be used to iterate over the Unicode characters of a
 969     /// string.
 970     ///
 971     /// # Example
 972     ///
 973     /// This example manually iterates through the characters of a
 974     /// string; this should normally be done by `.chars()` or
 975     /// `.char_indices`.
 976     ///
 977     /// ```rust
 978     /// use std::str::CharRange;
 979     ///
 980     /// let s = "中华Việt Nam";
 981     /// let mut i = 0u;
 982     /// while i < s.len() {
 983     ///     let CharRange {ch, next} = s.char_range_at(i);
 984     ///     println!("{}: {}", i, ch);
 985     ///     i = next;
 986     /// }
 987     /// ```
 988     ///
 989     /// This outputs:
 990     ///
 991     /// ```text
 992     /// 0: 中
 993     /// 3: 华
 994     /// 6: V
 995     /// 7: i
 996     /// 8: ệ
 997     /// 11: t
 998     /// 12:
 999     /// 13: N
1000     /// 14: a
1001     /// 15: m
1002     /// ```
1003     ///
1004     /// # Arguments
1005     ///
1006     /// * s - The string
1007     /// * i - The byte offset of the char to extract
1008     ///
1009     /// # Return value
1010     ///
1011     /// A record {ch: char, next: uint} containing the char value and the byte
1012     /// index of the next Unicode character.
1013     ///
1014     /// # Panics
1015     ///
1016     /// If `i` is greater than or equal to the length of the string.
1017     /// If `i` is not the index of the beginning of a valid UTF-8 character.
1018     #[unstable = "naming is uncertain with container conventions"]
1019     fn char_range_at(&self, start: uint) -> CharRange {
1020         core_str::StrExt::char_range_at(self[], start)
1021     }
1022
1023     /// Given a byte position and a str, return the previous char and its position.
1024     ///
1025     /// This function can be used to iterate over a Unicode string in reverse.
1026     ///
1027     /// Returns 0 for next index if called on start index 0.
1028     ///
1029     /// # Panics
1030     ///
1031     /// If `i` is greater than the length of the string.
1032     /// If `i` is not an index following a valid UTF-8 character.
1033     #[unstable = "naming is uncertain with container conventions"]
1034     fn char_range_at_reverse(&self, start: uint) -> CharRange {
1035         core_str::StrExt::char_range_at_reverse(self[], start)
1036     }
1037
1038     /// Plucks the character starting at the `i`th byte of a string.
1039     ///
1040     /// # Example
1041     ///
1042     /// ```rust
1043     /// let s = "abπc";
1044     /// assert_eq!(s.char_at(1), 'b');
1045     /// assert_eq!(s.char_at(2), 'π');
1046     /// assert_eq!(s.char_at(4), 'c');
1047     /// ```
1048     ///
1049     /// # Panics
1050     ///
1051     /// If `i` is greater than or equal to the length of the string.
1052     /// If `i` is not the index of the beginning of a valid UTF-8 character.
1053     #[unstable = "naming is uncertain with container conventions"]
1054     fn char_at(&self, i: uint) -> char {
1055         core_str::StrExt::char_at(self[], i)
1056     }
1057
1058     /// Plucks the character ending at the `i`th byte of a string.
1059     ///
1060     /// # Panics
1061     ///
1062     /// If `i` is greater than the length of the string.
1063     /// If `i` is not an index following a valid UTF-8 character.
1064     #[unstable = "naming is uncertain with container conventions"]
1065     fn char_at_reverse(&self, i: uint) -> char {
1066         core_str::StrExt::char_at_reverse(self[], i)
1067     }
1068
1069     /// Work with the byte buffer of a string as a byte slice.
1070     ///
1071     /// # Example
1072     ///
1073     /// ```rust
1074     /// assert_eq!("bors".as_bytes(), b"bors");
1075     /// ```
1076     #[stable]
1077     fn as_bytes(&self) -> &[u8] {
1078         core_str::StrExt::as_bytes(self[])
1079     }
1080
1081     /// Returns the byte index of the first character of `self` that
1082     /// matches the pattern `pat`.
1083     ///
1084     /// # Return value
1085     ///
1086     /// `Some` containing the byte index of the last matching character
1087     /// or `None` if there is no match
1088     ///
1089     /// # Example
1090     ///
1091     /// ```rust
1092     /// let s = "Löwe 老虎 Léopard";
1093     ///
1094     /// assert_eq!(s.find('L'), Some(0));
1095     /// assert_eq!(s.find('é'), Some(14));
1096     ///
1097     /// // the first space
1098     /// assert_eq!(s.find(|&: c: char| c.is_whitespace()), Some(5));
1099     ///
1100     /// // neither are found
1101     /// let x: &[_] = &['1', '2'];
1102     /// assert_eq!(s.find(x), None);
1103     /// ```
1104     #[stable]
1105     fn find<P: CharEq>(&self, pat: P) -> Option<uint> {
1106         core_str::StrExt::find(self[], pat)
1107     }
1108
1109     /// Returns the byte index of the last character of `self` that
1110     /// matches the pattern `pat`.
1111     ///
1112     /// # Return value
1113     ///
1114     /// `Some` containing the byte index of the last matching character
1115     /// or `None` if there is no match.
1116     ///
1117     /// # Example
1118     ///
1119     /// ```rust
1120     /// let s = "Löwe 老虎 Léopard";
1121     ///
1122     /// assert_eq!(s.rfind('L'), Some(13));
1123     /// assert_eq!(s.rfind('é'), Some(14));
1124     ///
1125     /// // the second space
1126     /// assert_eq!(s.rfind(|&: c: char| c.is_whitespace()), Some(12));
1127     ///
1128     /// // searches for an occurrence of either `1` or `2`, but neither are found
1129     /// let x: &[_] = &['1', '2'];
1130     /// assert_eq!(s.rfind(x), None);
1131     /// ```
1132     #[stable]
1133     fn rfind<P: CharEq>(&self, pat: P) -> Option<uint> {
1134         core_str::StrExt::rfind(self[], pat)
1135     }
1136
1137     /// Returns the byte index of the first matching substring
1138     ///
1139     /// # Arguments
1140     ///
1141     /// * `needle` - The string to search for
1142     ///
1143     /// # Return value
1144     ///
1145     /// `Some` containing the byte index of the first matching substring
1146     /// or `None` if there is no match.
1147     ///
1148     /// # Example
1149     ///
1150     /// ```rust
1151     /// let s = "Löwe 老虎 Léopard";
1152     ///
1153     /// assert_eq!(s.find_str("老虎 L"), Some(6));
1154     /// assert_eq!(s.find_str("muffin man"), None);
1155     /// ```
1156     #[unstable = "might get removed in favor of a more generic find in the future"]
1157     fn find_str(&self, needle: &str) -> Option<uint> {
1158         core_str::StrExt::find_str(self[], needle)
1159     }
1160
1161     /// Retrieves the first character from a string slice and returns
1162     /// it. This does not allocate a new string; instead, it returns a
1163     /// slice that point one character beyond the character that was
1164     /// shifted. If the string does not contain any characters,
1165     /// None is returned instead.
1166     ///
1167     /// # Example
1168     ///
1169     /// ```rust
1170     /// let s = "Löwe 老虎 Léopard";
1171     /// let (c, s1) = s.slice_shift_char().unwrap();
1172     /// assert_eq!(c, 'L');
1173     /// assert_eq!(s1, "öwe 老虎 Léopard");
1174     ///
1175     /// let (c, s2) = s1.slice_shift_char().unwrap();
1176     /// assert_eq!(c, 'ö');
1177     /// assert_eq!(s2, "we 老虎 Léopard");
1178     /// ```
1179     #[unstable = "awaiting conventions about shifting and slices"]
1180     fn slice_shift_char(&self) -> Option<(char, &str)> {
1181         core_str::StrExt::slice_shift_char(self[])
1182     }
1183
1184     /// Returns the byte offset of an inner slice relative to an enclosing outer slice.
1185     ///
1186     /// Panics if `inner` is not a direct slice contained within self.
1187     ///
1188     /// # Example
1189     ///
1190     /// ```rust
1191     /// let string = "a\nb\nc";
1192     /// let lines: Vec<&str> = string.lines().collect();
1193     ///
1194     /// assert!(string.subslice_offset(lines[0]) == 0); // &"a"
1195     /// assert!(string.subslice_offset(lines[1]) == 2); // &"b"
1196     /// assert!(string.subslice_offset(lines[2]) == 4); // &"c"
1197     /// ```
1198     #[unstable = "awaiting convention about comparability of arbitrary slices"]
1199     fn subslice_offset(&self, inner: &str) -> uint {
1200         core_str::StrExt::subslice_offset(self[], inner)
1201     }
1202
1203     /// Return an unsafe pointer to the strings buffer.
1204     ///
1205     /// The caller must ensure that the string outlives this pointer,
1206     /// and that it is not reallocated (e.g. by pushing to the
1207     /// string).
1208     #[stable]
1209     #[inline]
1210     fn as_ptr(&self) -> *const u8 {
1211         core_str::StrExt::as_ptr(self[])
1212     }
1213
1214     /// Return an iterator of `u16` over the string encoded as UTF-16.
1215     #[unstable = "this functionality may only be provided by libunicode"]
1216     fn utf16_units(&self) -> Utf16Units {
1217         Utf16Units { encoder: Utf16Encoder::new(self[].chars()) }
1218     }
1219
1220     /// Return the number of bytes in this string
1221     ///
1222     /// # Example
1223     ///
1224     /// ```
1225     /// assert_eq!("foo".len(), 3);
1226     /// assert_eq!("ƒoo".len(), 4);
1227     /// ```
1228     #[stable]
1229     #[inline]
1230     fn len(&self) -> uint {
1231         core_str::StrExt::len(self[])
1232     }
1233
1234     /// Returns true if this slice contains no bytes
1235     ///
1236     /// # Example
1237     ///
1238     /// ```
1239     /// assert!("".is_empty());
1240     /// ```
1241     #[inline]
1242     #[stable]
1243     fn is_empty(&self) -> bool {
1244         core_str::StrExt::is_empty(self[])
1245     }
1246
1247     /// Parse this string into the specified type.
1248     ///
1249     /// # Example
1250     ///
1251     /// ```
1252     /// assert_eq!("4".parse::<u32>(), Some(4));
1253     /// assert_eq!("j".parse::<u32>(), None);
1254     /// ```
1255     #[inline]
1256     #[unstable = "this method was just created"]
1257     fn parse<F: FromStr>(&self) -> Option<F> {
1258         core_str::StrExt::parse(self[])
1259     }
1260
1261     /// Returns an iterator over the
1262     /// [grapheme clusters](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
1263     /// of the string.
1264     ///
1265     /// If `is_extended` is true, the iterator is over the *extended grapheme clusters*;
1266     /// otherwise, the iterator is over the *legacy grapheme clusters*.
1267     /// [UAX#29](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
1268     /// recommends extended grapheme cluster boundaries for general processing.
1269     ///
1270     /// # Example
1271     ///
1272     /// ```rust
1273     /// let gr1 = "a\u{310}e\u{301}o\u{308}\u{332}".graphemes(true).collect::<Vec<&str>>();
1274     /// let b: &[_] = &["a\u{310}", "e\u{301}", "o\u{308}\u{332}"];
1275     /// assert_eq!(gr1.as_slice(), b);
1276     /// let gr2 = "a\r\nb🇷🇺🇸🇹".graphemes(true).collect::<Vec<&str>>();
1277     /// let b: &[_] = &["a", "\r\n", "b", "🇷🇺🇸🇹"];
1278     /// assert_eq!(gr2.as_slice(), b);
1279     /// ```
1280     #[unstable = "this functionality may only be provided by libunicode"]
1281     fn graphemes(&self, is_extended: bool) -> Graphemes {
1282         UnicodeStr::graphemes(self[], is_extended)
1283     }
1284
1285     /// Returns an iterator over the grapheme clusters of self and their byte offsets.
1286     /// See `graphemes()` method for more information.
1287     ///
1288     /// # Example
1289     ///
1290     /// ```rust
1291     /// let gr_inds = "a̐éö̲\r\n".grapheme_indices(true).collect::<Vec<(uint, &str)>>();
1292     /// let b: &[_] = &[(0u, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
1293     /// assert_eq!(gr_inds.as_slice(), b);
1294     /// ```
1295     #[unstable = "this functionality may only be provided by libunicode"]
1296     fn grapheme_indices(&self, is_extended: bool) -> GraphemeIndices {
1297         UnicodeStr::grapheme_indices(self[], is_extended)
1298     }
1299
1300     /// An iterator over the words of a string (subsequences separated
1301     /// by any sequence of whitespace). Sequences of whitespace are
1302     /// collapsed, so empty "words" are not included.
1303     ///
1304     /// # Example
1305     ///
1306     /// ```rust
1307     /// let some_words = " Mary   had\ta little  \n\t lamb";
1308     /// let v: Vec<&str> = some_words.words().collect();
1309     /// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]);
1310     /// ```
1311     #[stable]
1312     fn words(&self) -> Words {
1313         UnicodeStr::words(self[])
1314     }
1315
1316     /// Returns a string's displayed width in columns, treating control
1317     /// characters as zero-width.
1318     ///
1319     /// `is_cjk` determines behavior for characters in the Ambiguous category:
1320     /// if `is_cjk` is `true`, these are 2 columns wide; otherwise, they are 1.
1321     /// In CJK locales, `is_cjk` should be `true`, else it should be `false`.
1322     /// [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
1323     /// recommends that these characters be treated as 1 column (i.e.,
1324     /// `is_cjk` = `false`) if the locale is unknown.
1325     #[unstable = "this functionality may only be provided by libunicode"]
1326     fn width(&self, is_cjk: bool) -> uint {
1327         UnicodeStr::width(self[], is_cjk)
1328     }
1329
1330     /// Returns a string with leading and trailing whitespace removed.
1331     #[stable]
1332     fn trim(&self) -> &str {
1333         UnicodeStr::trim(self[])
1334     }
1335
1336     /// Returns a string with leading whitespace removed.
1337     #[stable]
1338     fn trim_left(&self) -> &str {
1339         UnicodeStr::trim_left(self[])
1340     }
1341
1342     /// Returns a string with trailing whitespace removed.
1343     #[stable]
1344     fn trim_right(&self) -> &str {
1345         UnicodeStr::trim_right(self[])
1346     }
1347 }
1348
1349 impl StrExt for str {}
1350
1351 #[cfg(test)]
1352 mod tests {
1353     use prelude::*;
1354
1355     use core::iter::AdditiveIterator;
1356     use super::from_utf8;
1357     use super::Utf8Error;
1358
1359     #[test]
1360     fn test_le() {
1361         assert!("" <= "");
1362         assert!("" <= "foo");
1363         assert!("foo" <= "foo");
1364         assert!("foo" != "bar");
1365     }
1366
1367     #[test]
1368     fn test_len() {
1369         assert_eq!("".len(), 0u);
1370         assert_eq!("hello world".len(), 11u);
1371         assert_eq!("\x63".len(), 1u);
1372         assert_eq!("\u{a2}".len(), 2u);
1373         assert_eq!("\u{3c0}".len(), 2u);
1374         assert_eq!("\u{2620}".len(), 3u);
1375         assert_eq!("\u{1d11e}".len(), 4u);
1376
1377         assert_eq!("".chars().count(), 0u);
1378         assert_eq!("hello world".chars().count(), 11u);
1379         assert_eq!("\x63".chars().count(), 1u);
1380         assert_eq!("\u{a2}".chars().count(), 1u);
1381         assert_eq!("\u{3c0}".chars().count(), 1u);
1382         assert_eq!("\u{2620}".chars().count(), 1u);
1383         assert_eq!("\u{1d11e}".chars().count(), 1u);
1384         assert_eq!("ประเทศไทย中华Việt Nam".chars().count(), 19u);
1385
1386         assert_eq!("ｈｅｌｌｏ".width(false), 10u);
1387         assert_eq!("ｈｅｌｌｏ".width(true), 10u);
1388         assert_eq!("\0\0\0\0\0".width(false), 0u);
1389         assert_eq!("\0\0\0\0\0".width(true), 0u);
1390         assert_eq!("".width(false), 0u);
1391         assert_eq!("".width(true), 0u);
1392         assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width(false), 4u);
1393         assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width(true), 8u);
1394     }
1395
1396     #[test]
1397     fn test_find() {
1398         assert_eq!("hello".find('l'), Some(2u));
1399         assert_eq!("hello".find(|&: c:char| c == 'o'), Some(4u));
1400         assert!("hello".find('x').is_none());
1401         assert!("hello".find(|&: c:char| c == 'x').is_none());
1402         assert_eq!("ประเทศไทย中华Việt Nam".find('华'), Some(30u));
1403         assert_eq!("ประเทศไทย中华Việt Nam".find(|&: c: char| c == '华'), Some(30u));
1404     }
1405
1406     #[test]
1407     fn test_rfind() {
1408         assert_eq!("hello".rfind('l'), Some(3u));
1409         assert_eq!("hello".rfind(|&: c:char| c == 'o'), Some(4u));
1410         assert!("hello".rfind('x').is_none());
1411         assert!("hello".rfind(|&: c:char| c == 'x').is_none());
1412         assert_eq!("ประเทศไทย中华Việt Nam".rfind('华'), Some(30u));
1413         assert_eq!("ประเทศไทย中华Việt Nam".rfind(|&: c: char| c == '华'), Some(30u));
1414     }
1415
1416     #[test]
1417     fn test_collect() {
1418         let empty = String::from_str("");
1419         let s: String = empty.chars().collect();
1420         assert_eq!(empty, s);
1421         let data = String::from_str("ประเทศไทย中");
1422         let s: String = data.chars().collect();
1423         assert_eq!(data, s);
1424     }
1425
1426     #[test]
1427     fn test_into_bytes() {
1428         let data = String::from_str("asdf");
1429         let buf = data.into_bytes();
1430         assert_eq!(b"asdf", buf);
1431     }
1432
1433     #[test]
1434     fn test_find_str() {
1435         // byte positions
1436         assert_eq!("".find_str(""), Some(0u));
1437         assert!("banana".find_str("apple pie").is_none());
1438
1439         let data = "abcabc";
1440         assert_eq!(data.slice(0u, 6u).find_str("ab"), Some(0u));
1441         assert_eq!(data.slice(2u, 6u).find_str("ab"), Some(3u - 2u));
1442         assert!(data.slice(2u, 4u).find_str("ab").is_none());
1443
1444         let string = "ประเทศไทย中华Việt Nam";
1445         let mut data = String::from_str(string);
1446         data.push_str(string);
1447         assert!(data.find_str("ไท华").is_none());
1448         assert_eq!(data.slice(0u, 43u).find_str(""), Some(0u));
1449         assert_eq!(data.slice(6u, 43u).find_str(""), Some(6u - 6u));
1450
1451         assert_eq!(data.slice(0u, 43u).find_str("ประ"), Some( 0u));
1452         assert_eq!(data.slice(0u, 43u).find_str("ทศไ"), Some(12u));
1453         assert_eq!(data.slice(0u, 43u).find_str("ย中"), Some(24u));
1454         assert_eq!(data.slice(0u, 43u).find_str("iệt"), Some(34u));
1455         assert_eq!(data.slice(0u, 43u).find_str("Nam"), Some(40u));
1456
1457         assert_eq!(data.slice(43u, 86u).find_str("ประ"), Some(43u - 43u));
1458         assert_eq!(data.slice(43u, 86u).find_str("ทศไ"), Some(55u - 43u));
1459         assert_eq!(data.slice(43u, 86u).find_str("ย中"), Some(67u - 43u));
1460         assert_eq!(data.slice(43u, 86u).find_str("iệt"), Some(77u - 43u));
1461         assert_eq!(data.slice(43u, 86u).find_str("Nam"), Some(83u - 43u));
1462     }
1463
1464     #[test]
1465     fn test_slice_chars() {
1466         fn t(a: &str, b: &str, start: uint) {
1467             assert_eq!(a.slice_chars(start, start + b.chars().count()), b);
1468         }
1469         t("", "", 0);
1470         t("hello", "llo", 2);
1471         t("hello", "el", 1);
1472         t("αβλ", "β", 1);
1473         t("αβλ", "", 3);
1474         assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".slice_chars(2, 8));
1475     }
1476
1477     fn s(x: &str) -> String { x.to_string() }
1478
1479     macro_rules! test_concat {
1480         ($expected: expr, $string: expr) => {
1481             {
1482                 let s: String = $string.concat();
1483                 assert_eq!($expected, s);
1484             }
1485         }
1486     }
1487
1488     #[test]
1489     fn test_concat_for_different_types() {
1490         test_concat!("ab", vec![s("a"), s("b")]);
1491         test_concat!("ab", vec!["a", "b"]);
1492         test_concat!("ab", vec!["a", "b"].as_slice());
1493         test_concat!("ab", vec![s("a"), s("b")]);
1494     }
1495
1496     #[test]
1497     fn test_concat_for_different_lengths() {
1498         let empty: &[&str] = &[];
1499         test_concat!("", empty);
1500         test_concat!("a", ["a"]);
1501         test_concat!("ab", ["a", "b"]);
1502         test_concat!("abc", ["", "a", "bc"]);
1503     }
1504
1505     macro_rules! test_connect {
1506         ($expected: expr, $string: expr, $delim: expr) => {
1507             {
1508                 let s = $string.connect($delim);
1509                 assert_eq!($expected, s);
1510             }
1511         }
1512     }
1513
1514     #[test]
1515     fn test_connect_for_different_types() {
1516         test_connect!("a-b", ["a", "b"], "-");
1517         let hyphen = "-".to_string();
1518         test_connect!("a-b", [s("a"), s("b")], hyphen.as_slice());
1519         test_connect!("a-b", vec!["a", "b"], hyphen.as_slice());
1520         test_connect!("a-b", vec!["a", "b"].as_slice(), "-");
1521         test_connect!("a-b", vec![s("a"), s("b")], "-");
1522     }
1523
1524     #[test]
1525     fn test_connect_for_different_lengths() {
1526         let empty: &[&str] = &[];
1527         test_connect!("", empty, "-");
1528         test_connect!("a", ["a"], "-");
1529         test_connect!("a-b", ["a", "b"], "-");
1530         test_connect!("-a-bc", ["", "a", "bc"], "-");
1531     }
1532
1533     #[test]
1534     fn test_unsafe_slice() {
1535         assert_eq!("ab", unsafe {"abc".slice_unchecked(0, 2)});
1536         assert_eq!("bc", unsafe {"abc".slice_unchecked(1, 3)});
1537         assert_eq!("", unsafe {"abc".slice_unchecked(1, 1)});
1538         fn a_million_letter_a() -> String {
1539             let mut i = 0u;
1540             let mut rs = String::new();
1541             while i < 100000 {
1542                 rs.push_str("aaaaaaaaaa");
1543                 i += 1;
1544             }
1545             rs
1546         }
1547         fn half_a_million_letter_a() -> String {
1548             let mut i = 0u;
1549             let mut rs = String::new();
1550             while i < 100000 {
1551                 rs.push_str("aaaaa");
1552                 i += 1;
1553             }
1554             rs
1555         }
1556         let letters = a_million_letter_a();
1557         assert!(half_a_million_letter_a() ==
1558             unsafe {String::from_str(letters.slice_unchecked(
1559                                      0u,
1560                                      500000))});
1561     }
1562
1563     #[test]
1564     fn test_starts_with() {
1565         assert!(("".starts_with("")));
1566         assert!(("abc".starts_with("")));
1567         assert!(("abc".starts_with("a")));
1568         assert!((!"a".starts_with("abc")));
1569         assert!((!"".starts_with("abc")));
1570         assert!((!"ödd".starts_with("-")));
1571         assert!(("ödd".starts_with("öd")));
1572     }
1573
1574     #[test]
1575     fn test_ends_with() {
1576         assert!(("".ends_with("")));
1577         assert!(("abc".ends_with("")));
1578         assert!(("abc".ends_with("c")));
1579         assert!((!"a".ends_with("abc")));
1580         assert!((!"".ends_with("abc")));
1581         assert!((!"ddö".ends_with("-")));
1582         assert!(("ddö".ends_with("dö")));
1583     }
1584
1585     #[test]
1586     fn test_is_empty() {
1587         assert!("".is_empty());
1588         assert!(!"a".is_empty());
1589     }
1590
1591     #[test]
1592     fn test_replace() {
1593         let a = "a";
1594         assert_eq!("".replace(a, "b"), String::from_str(""));
1595         assert_eq!("a".replace(a, "b"), String::from_str("b"));
1596         assert_eq!("ab".replace(a, "b"), String::from_str("bb"));
1597         let test = "test";
1598         assert!(" test test ".replace(test, "toast") ==
1599             String::from_str(" toast toast "));
1600         assert_eq!(" test test ".replace(test, ""), String::from_str("   "));
1601     }
1602
1603     #[test]
1604     fn test_replace_2a() {
1605         let data = "ประเทศไทย中华";
1606         let repl = "دولة الكويت";
1607
1608         let a = "ประเ";
1609         let a2 = "دولة الكويتทศไทย中华";
1610         assert_eq!(data.replace(a, repl), a2);
1611     }
1612
1613     #[test]
1614     fn test_replace_2b() {
1615         let data = "ประเทศไทย中华";
1616         let repl = "دولة الكويت";
1617
1618         let b = "ะเ";
1619         let b2 = "ปรدولة الكويتทศไทย中华";
1620         assert_eq!(data.replace(b, repl), b2);
1621     }
1622
1623     #[test]
1624     fn test_replace_2c() {
1625         let data = "ประเทศไทย中华";
1626         let repl = "دولة الكويت";
1627
1628         let c = "中华";
1629         let c2 = "ประเทศไทยدولة الكويت";
1630         assert_eq!(data.replace(c, repl), c2);
1631     }
1632
1633     #[test]
1634     fn test_replace_2d() {
1635         let data = "ประเทศไทย中华";
1636         let repl = "دولة الكويت";
1637
1638         let d = "ไท华";
1639         assert_eq!(data.replace(d, repl), data);
1640     }
1641
1642     #[test]
1643     fn test_slice() {
1644         assert_eq!("ab", "abc".slice(0, 2));
1645         assert_eq!("bc", "abc".slice(1, 3));
1646         assert_eq!("", "abc".slice(1, 1));
1647         assert_eq!("\u{65e5}", "\u{65e5}\u{672c}".slice(0, 3));
1648
1649         let data = "ประเทศไทย中华";
1650         assert_eq!("ป", data.slice(0, 3));
1651         assert_eq!("ร", data.slice(3, 6));
1652         assert_eq!("", data.slice(3, 3));
1653         assert_eq!("华", data.slice(30, 33));
1654
1655         fn a_million_letter_x() -> String {
1656             let mut i = 0u;
1657             let mut rs = String::new();
1658             while i < 100000 {
1659                 rs.push_str("华华华华华华华华华华");
1660                 i += 1;
1661             }
1662             rs
1663         }
1664         fn half_a_million_letter_x() -> String {
1665             let mut i = 0u;
1666             let mut rs = String::new();
1667             while i < 100000 {
1668                 rs.push_str("华华华华华");
1669                 i += 1;
1670             }
1671             rs
1672         }
1673         let letters = a_million_letter_x();
1674         assert!(half_a_million_letter_x() ==
1675             String::from_str(letters.slice(0u, 3u * 500000u)));
1676     }
1677
1678     #[test]
1679     fn test_slice_2() {
1680         let ss = "中华Việt Nam";
1681
1682         assert_eq!("华", ss.slice(3u, 6u));
1683         assert_eq!("Việt Nam", ss.slice(6u, 16u));
1684
1685         assert_eq!("ab", "abc".slice(0u, 2u));
1686         assert_eq!("bc", "abc".slice(1u, 3u));
1687         assert_eq!("", "abc".slice(1u, 1u));
1688
1689         assert_eq!("中", ss.slice(0u, 3u));
1690         assert_eq!("华V", ss.slice(3u, 7u));
1691         assert_eq!("", ss.slice(3u, 3u));
1692         /*0: 中
1693           3: 华
1694           6: V
1695           7: i
1696           8: ệ
1697          11: t
1698          12:
1699          13: N
1700          14: a
1701          15: m */
1702     }
1703
1704     #[test]
1705     #[should_fail]
1706     fn test_slice_fail() {
1707         "中华Việt Nam".slice(0u, 2u);
1708     }
1709
1710     #[test]
1711     fn test_slice_from() {
1712         assert_eq!("abcd".slice_from(0), "abcd");
1713         assert_eq!("abcd".slice_from(2), "cd");
1714         assert_eq!("abcd".slice_from(4), "");
1715     }
1716     #[test]
1717     fn test_slice_to() {
1718         assert_eq!("abcd".slice_to(0), "");
1719         assert_eq!("abcd".slice_to(2), "ab");
1720         assert_eq!("abcd".slice_to(4), "abcd");
1721     }
1722
1723     #[test]
1724     fn test_trim_left_matches() {
1725         let v: &[char] = &[];
1726         assert_eq!(" *** foo *** ".trim_left_matches(v), " *** foo *** ");
1727         let chars: &[char] = &['*', ' '];
1728         assert_eq!(" *** foo *** ".trim_left_matches(chars), "foo *** ");
1729         assert_eq!(" ***  *** ".trim_left_matches(chars), "");
1730         assert_eq!("foo *** ".trim_left_matches(chars), "foo *** ");
1731
1732         assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11");
1733         let chars: &[char] = &['1', '2'];
1734         assert_eq!("12foo1bar12".trim_left_matches(chars), "foo1bar12");
1735         assert_eq!("123foo1bar123".trim_left_matches(|&: c: char| c.is_numeric()), "foo1bar123");
1736     }
1737
1738     #[test]
1739     fn test_trim_right_matches() {
1740         let v: &[char] = &[];
1741         assert_eq!(" *** foo *** ".trim_right_matches(v), " *** foo *** ");
1742         let chars: &[char] = &['*', ' '];
1743         assert_eq!(" *** foo *** ".trim_right_matches(chars), " *** foo");
1744         assert_eq!(" ***  *** ".trim_right_matches(chars), "");
1745         assert_eq!(" *** foo".trim_right_matches(chars), " *** foo");
1746
1747         assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar");
1748         let chars: &[char] = &['1', '2'];
1749         assert_eq!("12foo1bar12".trim_right_matches(chars), "12foo1bar");
1750         assert_eq!("123foo1bar123".trim_right_matches(|&: c: char| c.is_numeric()), "123foo1bar");
1751     }
1752
1753     #[test]
1754     fn test_trim_matches() {
1755         let v: &[char] = &[];
1756         assert_eq!(" *** foo *** ".trim_matches(v), " *** foo *** ");
1757         let chars: &[char] = &['*', ' '];
1758         assert_eq!(" *** foo *** ".trim_matches(chars), "foo");
1759         assert_eq!(" ***  *** ".trim_matches(chars), "");
1760         assert_eq!("foo".trim_matches(chars), "foo");
1761
1762         assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar");
1763         let chars: &[char] = &['1', '2'];
1764         assert_eq!("12foo1bar12".trim_matches(chars), "foo1bar");
1765         assert_eq!("123foo1bar123".trim_matches(|&: c: char| c.is_numeric()), "foo1bar");
1766     }
1767
1768     #[test]
1769     fn test_trim_left() {
1770         assert_eq!("".trim_left(), "");
1771         assert_eq!("a".trim_left(), "a");
1772         assert_eq!("    ".trim_left(), "");
1773         assert_eq!("     blah".trim_left(), "blah");
1774         assert_eq!("   \u{3000}  wut".trim_left(), "wut");
1775         assert_eq!("hey ".trim_left(), "hey ");
1776     }
1777
1778     #[test]
1779     fn test_trim_right() {
1780         assert_eq!("".trim_right(), "");
1781         assert_eq!("a".trim_right(), "a");
1782         assert_eq!("    ".trim_right(), "");
1783         assert_eq!("blah     ".trim_right(), "blah");
1784         assert_eq!("wut   \u{3000}  ".trim_right(), "wut");
1785         assert_eq!(" hey".trim_right(), " hey");
1786     }
1787
1788     #[test]
1789     fn test_trim() {
1790         assert_eq!("".trim(), "");
1791         assert_eq!("a".trim(), "a");
1792         assert_eq!("    ".trim(), "");
1793         assert_eq!("    blah     ".trim(), "blah");
1794         assert_eq!("\nwut   \u{3000}  ".trim(), "wut");
1795         assert_eq!(" hey dude ".trim(), "hey dude");
1796     }
1797
1798     #[test]
1799     fn test_is_whitespace() {
1800         assert!("".chars().all(|c| c.is_whitespace()));
1801         assert!(" ".chars().all(|c| c.is_whitespace()));
1802         assert!("\u{2009}".chars().all(|c| c.is_whitespace())); // Thin space
1803         assert!("  \n\t   ".chars().all(|c| c.is_whitespace()));
1804         assert!(!"   _   ".chars().all(|c| c.is_whitespace()));
1805     }
1806
1807     #[test]
1808     fn test_slice_shift_char() {
1809         let data = "ประเทศไทย中";
1810         assert_eq!(data.slice_shift_char(), Some(('ป', "ระเทศไทย中")));
1811     }
1812
1813     #[test]
1814     fn test_slice_shift_char_2() {
1815         let empty = "";
1816         assert_eq!(empty.slice_shift_char(), None);
1817     }
1818
1819     #[test]
1820     fn test_is_utf8() {
1821         // deny overlong encodings
1822         assert!(from_utf8(&[0xc0, 0x80]).is_err());
1823         assert!(from_utf8(&[0xc0, 0xae]).is_err());
1824         assert!(from_utf8(&[0xe0, 0x80, 0x80]).is_err());
1825         assert!(from_utf8(&[0xe0, 0x80, 0xaf]).is_err());
1826         assert!(from_utf8(&[0xe0, 0x81, 0x81]).is_err());
1827         assert!(from_utf8(&[0xf0, 0x82, 0x82, 0xac]).is_err());
1828         assert!(from_utf8(&[0xf4, 0x90, 0x80, 0x80]).is_err());
1829
1830         // deny surrogates
1831         assert!(from_utf8(&[0xED, 0xA0, 0x80]).is_err());
1832         assert!(from_utf8(&[0xED, 0xBF, 0xBF]).is_err());
1833
1834         assert!(from_utf8(&[0xC2, 0x80]).is_ok());
1835         assert!(from_utf8(&[0xDF, 0xBF]).is_ok());
1836         assert!(from_utf8(&[0xE0, 0xA0, 0x80]).is_ok());
1837         assert!(from_utf8(&[0xED, 0x9F, 0xBF]).is_ok());
1838         assert!(from_utf8(&[0xEE, 0x80, 0x80]).is_ok());
1839         assert!(from_utf8(&[0xEF, 0xBF, 0xBF]).is_ok());
1840         assert!(from_utf8(&[0xF0, 0x90, 0x80, 0x80]).is_ok());
1841         assert!(from_utf8(&[0xF4, 0x8F, 0xBF, 0xBF]).is_ok());
1842     }
1843
1844     #[test]
1845     fn test_is_utf16() {
1846         use unicode::str::is_utf16;
1847         macro_rules! pos ( ($($e:expr),*) => { { $(assert!(is_utf16($e));)* } });
1848
1849         // non-surrogates
1850         pos!(&[0x0000],
1851              &[0x0001, 0x0002],
1852              &[0xD7FF],
1853              &[0xE000]);
1854
1855         // surrogate pairs (randomly generated with Python 3's
1856         // .encode('utf-16be'))
1857         pos!(&[0xdb54, 0xdf16, 0xd880, 0xdee0, 0xdb6a, 0xdd45],
1858              &[0xd91f, 0xdeb1, 0xdb31, 0xdd84, 0xd8e2, 0xde14],
1859              &[0xdb9f, 0xdc26, 0xdb6f, 0xde58, 0xd850, 0xdfae]);
1860
1861         // mixtures (also random)
1862         pos!(&[0xd921, 0xdcc2, 0x002d, 0x004d, 0xdb32, 0xdf65],
1863              &[0xdb45, 0xdd2d, 0x006a, 0xdacd, 0xddfe, 0x0006],
1864              &[0x0067, 0xd8ff, 0xddb7, 0x000f, 0xd900, 0xdc80]);
1865
1866         // negative tests
1867         macro_rules! neg ( ($($e:expr),*) => { { $(assert!(!is_utf16($e));)* } });
1868
1869         neg!(
1870             // surrogate + regular unit
1871             &[0xdb45, 0x0000],
1872             // surrogate + lead surrogate
1873             &[0xd900, 0xd900],
1874             // unterminated surrogate
1875             &[0xd8ff],
1876             // trail surrogate without a lead
1877             &[0xddb7]);
1878
1879         // random byte sequences that Python 3's .decode('utf-16be')
1880         // failed on
1881         neg!(&[0x5b3d, 0x0141, 0xde9e, 0x8fdc, 0xc6e7],
1882              &[0xdf5a, 0x82a5, 0x62b9, 0xb447, 0x92f3],
1883              &[0xda4e, 0x42bc, 0x4462, 0xee98, 0xc2ca],
1884              &[0xbe00, 0xb04a, 0x6ecb, 0xdd89, 0xe278],
1885              &[0x0465, 0xab56, 0xdbb6, 0xa893, 0x665e],
1886              &[0x6b7f, 0x0a19, 0x40f4, 0xa657, 0xdcc5],
1887              &[0x9b50, 0xda5e, 0x24ec, 0x03ad, 0x6dee],
1888              &[0x8d17, 0xcaa7, 0xf4ae, 0xdf6e, 0xbed7],
1889              &[0xdaee, 0x2584, 0x7d30, 0xa626, 0x121a],
1890              &[0xd956, 0x4b43, 0x7570, 0xccd6, 0x4f4a],
1891              &[0x9dcf, 0x1b49, 0x4ba5, 0xfce9, 0xdffe],
1892              &[0x6572, 0xce53, 0xb05a, 0xf6af, 0xdacf],
1893              &[0x1b90, 0x728c, 0x9906, 0xdb68, 0xf46e],
1894              &[0x1606, 0xbeca, 0xbe76, 0x860f, 0xdfa5],
1895              &[0x8b4f, 0xde7a, 0xd220, 0x9fac, 0x2b6f],
1896              &[0xb8fe, 0xebbe, 0xda32, 0x1a5f, 0x8b8b],
1897              &[0x934b, 0x8956, 0xc434, 0x1881, 0xddf7],
1898              &[0x5a95, 0x13fc, 0xf116, 0xd89b, 0x93f9],
1899              &[0xd640, 0x71f1, 0xdd7d, 0x77eb, 0x1cd8],
1900              &[0x348b, 0xaef0, 0xdb2c, 0xebf1, 0x1282],
1901              &[0x50d7, 0xd824, 0x5010, 0xb369, 0x22ea]);
1902     }
1903
1904     #[test]
1905     fn test_as_bytes() {
1906         // no null
1907         let v = [
1908             224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1909             184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1910             109
1911         ];
1912         let b: &[u8] = &[];
1913         assert_eq!("".as_bytes(), b);
1914         assert_eq!("abc".as_bytes(), b"abc");
1915         assert_eq!("ศไทย中华Việt Nam".as_bytes(), v);
1916     }
1917
1918     #[test]
1919     #[should_fail]
1920     fn test_as_bytes_fail() {
1921         // Don't double free. (I'm not sure if this exercises the
1922         // original problem code path anymore.)
1923         let s = String::from_str("");
1924         let _bytes = s.as_bytes();
1925         panic!();
1926     }
1927
1928     #[test]
1929     fn test_as_ptr() {
1930         let buf = "hello".as_ptr();
1931         unsafe {
1932             assert_eq!(*buf.offset(0), b'h');
1933             assert_eq!(*buf.offset(1), b'e');
1934             assert_eq!(*buf.offset(2), b'l');
1935             assert_eq!(*buf.offset(3), b'l');
1936             assert_eq!(*buf.offset(4), b'o');
1937         }
1938     }
1939
1940     #[test]
1941     fn test_subslice_offset() {
1942         let a = "kernelsprite";
1943         let b = a.slice(7, a.len());
1944         let c = a.slice(0, a.len() - 6);
1945         assert_eq!(a.subslice_offset(b), 7);
1946         assert_eq!(a.subslice_offset(c), 0);
1947
1948         let string = "a\nb\nc";
1949         let lines: Vec<&str> = string.lines().collect();
1950         assert_eq!(string.subslice_offset(lines[0]), 0);
1951         assert_eq!(string.subslice_offset(lines[1]), 2);
1952         assert_eq!(string.subslice_offset(lines[2]), 4);
1953     }
1954
1955     #[test]
1956     #[should_fail]
1957     fn test_subslice_offset_2() {
1958         let a = "alchemiter";
1959         let b = "cruxtruder";
1960         a.subslice_offset(b);
1961     }
1962
1963     #[test]
1964     fn vec_str_conversions() {
1965         let s1: String = String::from_str("All mimsy were the borogoves");
1966
1967         let v: Vec<u8> = s1.as_bytes().to_vec();
1968         let s2: String = String::from_str(from_utf8(v.as_slice()).unwrap());
1969         let mut i: uint = 0u;
1970         let n1: uint = s1.len();
1971         let n2: uint = v.len();
1972         assert_eq!(n1, n2);
1973         while i < n1 {
1974             let a: u8 = s1.as_bytes()[i];
1975             let b: u8 = s2.as_bytes()[i];
1976             debug!("{}", a);
1977             debug!("{}", b);
1978             assert_eq!(a, b);
1979             i += 1u;
1980         }
1981     }
1982
1983     #[test]
1984     fn test_contains() {
1985         assert!("abcde".contains("bcd"));
1986         assert!("abcde".contains("abcd"));
1987         assert!("abcde".contains("bcde"));
1988         assert!("abcde".contains(""));
1989         assert!("".contains(""));
1990         assert!(!"abcde".contains("def"));
1991         assert!(!"".contains("a"));
1992
1993         let data = "ประเทศไทย中华Việt Nam";
1994         assert!(data.contains("ประเ"));
1995         assert!(data.contains("ะเ"));
1996         assert!(data.contains("中华"));
1997         assert!(!data.contains("ไท华"));
1998     }
1999
2000     #[test]
2001     fn test_contains_char() {
2002         assert!("abc".contains_char('b'));
2003         assert!("a".contains_char('a'));
2004         assert!(!"abc".contains_char('d'));
2005         assert!(!"".contains_char('a'));
2006     }
2007
2008     #[test]
2009     fn test_char_at() {
2010         let s = "ศไทย中华Việt Nam";
2011         let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2012         let mut pos = 0;
2013         for ch in v.iter() {
2014             assert!(s.char_at(pos) == *ch);
2015             pos += ch.to_string().len();
2016         }
2017     }
2018
2019     #[test]
2020     fn test_char_at_reverse() {
2021         let s = "ศไทย中华Việt Nam";
2022         let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2023         let mut pos = s.len();
2024         for ch in v.iter().rev() {
2025             assert!(s.char_at_reverse(pos) == *ch);
2026             pos -= ch.to_string().len();
2027         }
2028     }
2029
2030     #[test]
2031     fn test_escape_unicode() {
2032         assert_eq!("abc".escape_unicode(),
2033                    String::from_str("\\u{61}\\u{62}\\u{63}"));
2034         assert_eq!("a c".escape_unicode(),
2035                    String::from_str("\\u{61}\\u{20}\\u{63}"));
2036         assert_eq!("\r\n\t".escape_unicode(),
2037                    String::from_str("\\u{d}\\u{a}\\u{9}"));
2038         assert_eq!("'\"\\".escape_unicode(),
2039                    String::from_str("\\u{27}\\u{22}\\u{5c}"));
2040         assert_eq!("\x00\x01\u{fe}\u{ff}".escape_unicode(),
2041                    String::from_str("\\u{0}\\u{1}\\u{fe}\\u{ff}"));
2042         assert_eq!("\u{100}\u{ffff}".escape_unicode(),
2043                    String::from_str("\\u{100}\\u{ffff}"));
2044         assert_eq!("\u{10000}\u{10ffff}".escape_unicode(),
2045                    String::from_str("\\u{10000}\\u{10ffff}"));
2046         assert_eq!("ab\u{fb00}".escape_unicode(),
2047                    String::from_str("\\u{61}\\u{62}\\u{fb00}"));
2048         assert_eq!("\u{1d4ea}\r".escape_unicode(),
2049                    String::from_str("\\u{1d4ea}\\u{d}"));
2050     }
2051
2052     #[test]
2053     fn test_escape_default() {
2054         assert_eq!("abc".escape_default(), String::from_str("abc"));
2055         assert_eq!("a c".escape_default(), String::from_str("a c"));
2056         assert_eq!("\r\n\t".escape_default(), String::from_str("\\r\\n\\t"));
2057         assert_eq!("'\"\\".escape_default(), String::from_str("\\'\\\"\\\\"));
2058         assert_eq!("\u{100}\u{ffff}".escape_default(),
2059                    String::from_str("\\u{100}\\u{ffff}"));
2060         assert_eq!("\u{10000}\u{10ffff}".escape_default(),
2061                    String::from_str("\\u{10000}\\u{10ffff}"));
2062         assert_eq!("ab\u{fb00}".escape_default(),
2063                    String::from_str("ab\\u{fb00}"));
2064         assert_eq!("\u{1d4ea}\r".escape_default(),
2065                    String::from_str("\\u{1d4ea}\\r"));
2066     }
2067
2068     #[test]
2069     fn test_total_ord() {
2070         "1234".cmp("123") == Greater;
2071         "123".cmp("1234") == Less;
2072         "1234".cmp("1234") == Equal;
2073         "12345555".cmp("123456") == Less;
2074         "22".cmp("1234") == Greater;
2075     }
2076
2077     #[test]
2078     fn test_char_range_at() {
2079         let data = "b¢€𤭢𤭢€¢b";
2080         assert_eq!('b', data.char_range_at(0).ch);
2081         assert_eq!('¢', data.char_range_at(1).ch);
2082         assert_eq!('€', data.char_range_at(3).ch);
2083         assert_eq!('𤭢', data.char_range_at(6).ch);
2084         assert_eq!('𤭢', data.char_range_at(10).ch);
2085         assert_eq!('€', data.char_range_at(14).ch);
2086         assert_eq!('¢', data.char_range_at(17).ch);
2087         assert_eq!('b', data.char_range_at(19).ch);
2088     }
2089
2090     #[test]
2091     fn test_char_range_at_reverse_underflow() {
2092         assert_eq!("abc".char_range_at_reverse(0).next, 0);
2093     }
2094
2095     #[test]
2096     fn test_iterator() {
2097         let s = "ศไทย中华Việt Nam";
2098         let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2099
2100         let mut pos = 0;
2101         let mut it = s.chars();
2102
2103         for c in it {
2104             assert_eq!(c, v[pos]);
2105             pos += 1;
2106         }
2107         assert_eq!(pos, v.len());
2108     }
2109
2110     #[test]
2111     fn test_rev_iterator() {
2112         let s = "ศไทย中华Việt Nam";
2113         let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
2114
2115         let mut pos = 0;
2116         let mut it = s.chars().rev();
2117
2118         for c in it {
2119             assert_eq!(c, v[pos]);
2120             pos += 1;
2121         }
2122         assert_eq!(pos, v.len());
2123     }
2124
2125     #[test]
2126     fn test_chars_decoding() {
2127         let mut bytes = [0u8; 4];
2128         for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
2129             let len = c.encode_utf8(&mut bytes).unwrap_or(0);
2130             let s = ::core::str::from_utf8(bytes[..len]).unwrap();
2131             if Some(c) != s.chars().next() {
2132                 panic!("character {:x}={} does not decode correctly", c as u32, c);
2133             }
2134         }
2135     }
2136
2137     #[test]
2138     fn test_chars_rev_decoding() {
2139         let mut bytes = [0u8; 4];
2140         for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
2141             let len = c.encode_utf8(&mut bytes).unwrap_or(0);
2142             let s = ::core::str::from_utf8(bytes[..len]).unwrap();
2143             if Some(c) != s.chars().rev().next() {
2144                 panic!("character {:x}={} does not decode correctly", c as u32, c);
2145             }
2146         }
2147     }
2148
2149     #[test]
2150     fn test_iterator_clone() {
2151         let s = "ศไทย中华Việt Nam";
2152         let mut it = s.chars();
2153         it.next();
2154         assert!(it.zip(it.clone()).all(|(x,y)| x == y));
2155     }
2156
2157     #[test]
2158     fn test_bytesator() {
2159         let s = "ศไทย中华Việt Nam";
2160         let v = [
2161             224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
2162             184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
2163             109
2164         ];
2165         let mut pos = 0;
2166
2167         for b in s.bytes() {
2168             assert_eq!(b, v[pos]);
2169             pos += 1;
2170         }
2171     }
2172
2173     #[test]
2174     fn test_bytes_revator() {
2175         let s = "ศไทย中华Việt Nam";
2176         let v = [
2177             224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
2178             184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
2179             109
2180         ];
2181         let mut pos = v.len();
2182
2183         for b in s.bytes().rev() {
2184             pos -= 1;
2185             assert_eq!(b, v[pos]);
2186         }
2187     }
2188
2189     #[test]
2190     fn test_char_indicesator() {
2191         let s = "ศไทย中华Việt Nam";
2192         let p = [0, 3, 6, 9, 12, 15, 18, 19, 20, 23, 24, 25, 26, 27];
2193         let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2194
2195         let mut pos = 0;
2196         let mut it = s.char_indices();
2197
2198         for c in it {
2199             assert_eq!(c, (p[pos], v[pos]));
2200             pos += 1;
2201         }
2202         assert_eq!(pos, v.len());
2203         assert_eq!(pos, p.len());
2204     }
2205
2206     #[test]
2207     fn test_char_indices_revator() {
2208         let s = "ศไทย中华Việt Nam";
2209         let p = [27, 26, 25, 24, 23, 20, 19, 18, 15, 12, 9, 6, 3, 0];
2210         let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
2211
2212         let mut pos = 0;
2213         let mut it = s.char_indices().rev();
2214
2215         for c in it {
2216             assert_eq!(c, (p[pos], v[pos]));
2217             pos += 1;
2218         }
2219         assert_eq!(pos, v.len());
2220         assert_eq!(pos, p.len());
2221     }
2222
2223     #[test]
2224     fn test_splitn_char_iterator() {
2225         let data = "\nMäry häd ä little lämb\nLittle lämb\n";
2226
2227         let split: Vec<&str> = data.splitn(3, ' ').collect();
2228         assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
2229
2230         let split: Vec<&str> = data.splitn(3, |&: c: char| c == ' ').collect();
2231         assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
2232
2233         // Unicode
2234         let split: Vec<&str> = data.splitn(3, 'ä').collect();
2235         assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
2236
2237         let split: Vec<&str> = data.splitn(3, |&: c: char| c == 'ä').collect();
2238         assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
2239     }
2240
2241     #[test]
2242     fn test_split_char_iterator_no_trailing() {
2243         let data = "\nMäry häd ä little lämb\nLittle lämb\n";
2244
2245         let split: Vec<&str> = data.split('\n').collect();
2246         assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
2247
2248         let split: Vec<&str> = data.split_terminator('\n').collect();
2249         assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
2250     }
2251
2252     #[test]
2253     fn test_words() {
2254         let data = "\n \tMäry   häd\tä  little lämb\nLittle lämb\n";
2255         let words: Vec<&str> = data.words().collect();
2256         assert_eq!(words, vec!["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
2257     }
2258
2259     #[test]
2260     fn test_nfd_chars() {
2261         macro_rules! t {
2262             ($input: expr, $expected: expr) => {
2263                 assert_eq!($input.nfd_chars().collect::<String>(), $expected);
2264             }
2265         }
2266         t!("abc", "abc");
2267         t!("\u{1e0b}\u{1c4}", "d\u{307}\u{1c4}");
2268         t!("\u{2026}", "\u{2026}");
2269         t!("\u{2126}", "\u{3a9}");
2270         t!("\u{1e0b}\u{323}", "d\u{323}\u{307}");
2271         t!("\u{1e0d}\u{307}", "d\u{323}\u{307}");
2272         t!("a\u{301}", "a\u{301}");
2273         t!("\u{301}a", "\u{301}a");
2274         t!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}");
2275         t!("\u{ac1c}", "\u{1100}\u{1162}");
2276     }
2277
2278     #[test]
2279     fn test_nfkd_chars() {
2280         macro_rules! t {
2281             ($input: expr, $expected: expr) => {
2282                 assert_eq!($input.nfkd_chars().collect::<String>(), $expected);
2283             }
2284         }
2285         t!("abc", "abc");
2286         t!("\u{1e0b}\u{1c4}", "d\u{307}DZ\u{30c}");
2287         t!("\u{2026}", "...");
2288         t!("\u{2126}", "\u{3a9}");
2289         t!("\u{1e0b}\u{323}", "d\u{323}\u{307}");
2290         t!("\u{1e0d}\u{307}", "d\u{323}\u{307}");
2291         t!("a\u{301}", "a\u{301}");
2292         t!("\u{301}a", "\u{301}a");
2293         t!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}");
2294         t!("\u{ac1c}", "\u{1100}\u{1162}");
2295     }
2296
2297     #[test]
2298     fn test_nfc_chars() {
2299         macro_rules! t {
2300             ($input: expr, $expected: expr) => {
2301                 assert_eq!($input.nfc_chars().collect::<String>(), $expected);
2302             }
2303         }
2304         t!("abc", "abc");
2305         t!("\u{1e0b}\u{1c4}", "\u{1e0b}\u{1c4}");
2306         t!("\u{2026}", "\u{2026}");
2307         t!("\u{2126}", "\u{3a9}");
2308         t!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}");
2309         t!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}");
2310         t!("a\u{301}", "\u{e1}");
2311         t!("\u{301}a", "\u{301}a");
2312         t!("\u{d4db}", "\u{d4db}");
2313         t!("\u{ac1c}", "\u{ac1c}");
2314         t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
2315     }
2316
2317     #[test]
2318     fn test_nfkc_chars() {
2319         macro_rules! t {
2320             ($input: expr, $expected: expr) => {
2321                 assert_eq!($input.nfkc_chars().collect::<String>(), $expected);
2322             }
2323         }
2324         t!("abc", "abc");
2325         t!("\u{1e0b}\u{1c4}", "\u{1e0b}D\u{17d}");
2326         t!("\u{2026}", "...");
2327         t!("\u{2126}", "\u{3a9}");
2328         t!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}");
2329         t!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}");
2330         t!("a\u{301}", "\u{e1}");
2331         t!("\u{301}a", "\u{301}a");
2332         t!("\u{d4db}", "\u{d4db}");
2333         t!("\u{ac1c}", "\u{ac1c}");
2334         t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
2335     }
2336
2337     #[test]
2338     fn test_lines() {
2339         let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
2340         let lines: Vec<&str> = data.lines().collect();
2341         assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2342
2343         let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
2344         let lines: Vec<&str> = data.lines().collect();
2345         assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2346     }
2347
2348     #[test]
2349     fn test_graphemes() {
2350         use core::iter::order;
2351         // official Unicode test data
2352         // from http://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt
2353         let test_same: [(_, &[_]); 325] = [
2354             ("\u{20}\u{20}", &["\u{20}", "\u{20}"]),
2355             ("\u{20}\u{308}\u{20}", &["\u{20}\u{308}", "\u{20}"]),
2356             ("\u{20}\u{D}", &["\u{20}", "\u{D}"]),
2357             ("\u{20}\u{308}\u{D}", &["\u{20}\u{308}", "\u{D}"]),
2358             ("\u{20}\u{A}", &["\u{20}", "\u{A}"]),
2359             ("\u{20}\u{308}\u{A}", &["\u{20}\u{308}", "\u{A}"]),
2360             ("\u{20}\u{1}", &["\u{20}", "\u{1}"]),
2361             ("\u{20}\u{308}\u{1}", &["\u{20}\u{308}", "\u{1}"]),
2362             ("\u{20}\u{300}", &["\u{20}\u{300}"]),
2363             ("\u{20}\u{308}\u{300}", &["\u{20}\u{308}\u{300}"]),
2364             ("\u{20}\u{1100}", &["\u{20}", "\u{1100}"]),
2365             ("\u{20}\u{308}\u{1100}", &["\u{20}\u{308}", "\u{1100}"]),
2366             ("\u{20}\u{1160}", &["\u{20}", "\u{1160}"]),
2367             ("\u{20}\u{308}\u{1160}", &["\u{20}\u{308}", "\u{1160}"]),
2368             ("\u{20}\u{11A8}", &["\u{20}", "\u{11A8}"]),
2369             ("\u{20}\u{308}\u{11A8}", &["\u{20}\u{308}", "\u{11A8}"]),
2370             ("\u{20}\u{AC00}", &["\u{20}", "\u{AC00}"]),
2371             ("\u{20}\u{308}\u{AC00}", &["\u{20}\u{308}", "\u{AC00}"]),
2372             ("\u{20}\u{AC01}", &["\u{20}", "\u{AC01}"]),
2373             ("\u{20}\u{308}\u{AC01}", &["\u{20}\u{308}", "\u{AC01}"]),
2374             ("\u{20}\u{1F1E6}", &["\u{20}", "\u{1F1E6}"]),
2375             ("\u{20}\u{308}\u{1F1E6}", &["\u{20}\u{308}", "\u{1F1E6}"]),
2376             ("\u{20}\u{378}", &["\u{20}", "\u{378}"]),
2377             ("\u{20}\u{308}\u{378}", &["\u{20}\u{308}", "\u{378}"]),
2378             ("\u{D}\u{20}", &["\u{D}", "\u{20}"]),
2379             ("\u{D}\u{308}\u{20}", &["\u{D}", "\u{308}", "\u{20}"]),
2380             ("\u{D}\u{D}", &["\u{D}", "\u{D}"]),
2381             ("\u{D}\u{308}\u{D}", &["\u{D}", "\u{308}", "\u{D}"]),
2382             ("\u{D}\u{A}", &["\u{D}\u{A}"]),
2383             ("\u{D}\u{308}\u{A}", &["\u{D}", "\u{308}", "\u{A}"]),
2384             ("\u{D}\u{1}", &["\u{D}", "\u{1}"]),
2385             ("\u{D}\u{308}\u{1}", &["\u{D}", "\u{308}", "\u{1}"]),
2386             ("\u{D}\u{300}", &["\u{D}", "\u{300}"]),
2387             ("\u{D}\u{308}\u{300}", &["\u{D}", "\u{308}\u{300}"]),
2388             ("\u{D}\u{903}", &["\u{D}", "\u{903}"]),
2389             ("\u{D}\u{1100}", &["\u{D}", "\u{1100}"]),
2390             ("\u{D}\u{308}\u{1100}", &["\u{D}", "\u{308}", "\u{1100}"]),
2391             ("\u{D}\u{1160}", &["\u{D}", "\u{1160}"]),
2392             ("\u{D}\u{308}\u{1160}", &["\u{D}", "\u{308}", "\u{1160}"]),
2393             ("\u{D}\u{11A8}", &["\u{D}", "\u{11A8}"]),
2394             ("\u{D}\u{308}\u{11A8}", &["\u{D}", "\u{308}", "\u{11A8}"]),
2395             ("\u{D}\u{AC00}", &["\u{D}", "\u{AC00}"]),
2396             ("\u{D}\u{308}\u{AC00}", &["\u{D}", "\u{308}", "\u{AC00}"]),
2397             ("\u{D}\u{AC01}", &["\u{D}", "\u{AC01}"]),
2398             ("\u{D}\u{308}\u{AC01}", &["\u{D}", "\u{308}", "\u{AC01}"]),
2399             ("\u{D}\u{1F1E6}", &["\u{D}", "\u{1F1E6}"]),
2400             ("\u{D}\u{308}\u{1F1E6}", &["\u{D}", "\u{308}", "\u{1F1E6}"]),
2401             ("\u{D}\u{378}", &["\u{D}", "\u{378}"]),
2402             ("\u{D}\u{308}\u{378}", &["\u{D}", "\u{308}", "\u{378}"]),
2403             ("\u{A}\u{20}", &["\u{A}", "\u{20}"]),
2404             ("\u{A}\u{308}\u{20}", &["\u{A}", "\u{308}", "\u{20}"]),
2405             ("\u{A}\u{D}", &["\u{A}", "\u{D}"]),
2406             ("\u{A}\u{308}\u{D}", &["\u{A}", "\u{308}", "\u{D}"]),
2407             ("\u{A}\u{A}", &["\u{A}", "\u{A}"]),
2408             ("\u{A}\u{308}\u{A}", &["\u{A}", "\u{308}", "\u{A}"]),
2409             ("\u{A}\u{1}", &["\u{A}", "\u{1}"]),
2410             ("\u{A}\u{308}\u{1}", &["\u{A}", "\u{308}", "\u{1}"]),
2411             ("\u{A}\u{300}", &["\u{A}", "\u{300}"]),
2412             ("\u{A}\u{308}\u{300}", &["\u{A}", "\u{308}\u{300}"]),
2413             ("\u{A}\u{903}", &["\u{A}", "\u{903}"]),
2414             ("\u{A}\u{1100}", &["\u{A}", "\u{1100}"]),
2415             ("\u{A}\u{308}\u{1100}", &["\u{A}", "\u{308}", "\u{1100}"]),
2416             ("\u{A}\u{1160}", &["\u{A}", "\u{1160}"]),
2417             ("\u{A}\u{308}\u{1160}", &["\u{A}", "\u{308}", "\u{1160}"]),
2418             ("\u{A}\u{11A8}", &["\u{A}", "\u{11A8}"]),
2419             ("\u{A}\u{308}\u{11A8}", &["\u{A}", "\u{308}", "\u{11A8}"]),
2420             ("\u{A}\u{AC00}", &["\u{A}", "\u{AC00}"]),
2421             ("\u{A}\u{308}\u{AC00}", &["\u{A}", "\u{308}", "\u{AC00}"]),
2422             ("\u{A}\u{AC01}", &["\u{A}", "\u{AC01}"]),
2423             ("\u{A}\u{308}\u{AC01}", &["\u{A}", "\u{308}", "\u{AC01}"]),
2424             ("\u{A}\u{1F1E6}", &["\u{A}", "\u{1F1E6}"]),
2425             ("\u{A}\u{308}\u{1F1E6}", &["\u{A}", "\u{308}", "\u{1F1E6}"]),
2426             ("\u{A}\u{378}", &["\u{A}", "\u{378}"]),
2427             ("\u{A}\u{308}\u{378}", &["\u{A}", "\u{308}", "\u{378}"]),
2428             ("\u{1}\u{20}", &["\u{1}", "\u{20}"]),
2429             ("\u{1}\u{308}\u{20}", &["\u{1}", "\u{308}", "\u{20}"]),
2430             ("\u{1}\u{D}", &["\u{1}", "\u{D}"]),
2431             ("\u{1}\u{308}\u{D}", &["\u{1}", "\u{308}", "\u{D}"]),
2432             ("\u{1}\u{A}", &["\u{1}", "\u{A}"]),
2433             ("\u{1}\u{308}\u{A}", &["\u{1}", "\u{308}", "\u{A}"]),
2434             ("\u{1}\u{1}", &["\u{1}", "\u{1}"]),
2435             ("\u{1}\u{308}\u{1}", &["\u{1}", "\u{308}", "\u{1}"]),
2436             ("\u{1}\u{300}", &["\u{1}", "\u{300}"]),
2437             ("\u{1}\u{308}\u{300}", &["\u{1}", "\u{308}\u{300}"]),
2438             ("\u{1}\u{903}", &["\u{1}", "\u{903}"]),
2439             ("\u{1}\u{1100}", &["\u{1}", "\u{1100}"]),
2440             ("\u{1}\u{308}\u{1100}", &["\u{1}", "\u{308}", "\u{1100}"]),
2441             ("\u{1}\u{1160}", &["\u{1}", "\u{1160}"]),
2442             ("\u{1}\u{308}\u{1160}", &["\u{1}", "\u{308}", "\u{1160}"]),
2443             ("\u{1}\u{11A8}", &["\u{1}", "\u{11A8}"]),
2444             ("\u{1}\u{308}\u{11A8}", &["\u{1}", "\u{308}", "\u{11A8}"]),
2445             ("\u{1}\u{AC00}", &["\u{1}", "\u{AC00}"]),
2446             ("\u{1}\u{308}\u{AC00}", &["\u{1}", "\u{308}", "\u{AC00}"]),
2447             ("\u{1}\u{AC01}", &["\u{1}", "\u{AC01}"]),
2448             ("\u{1}\u{308}\u{AC01}", &["\u{1}", "\u{308}", "\u{AC01}"]),
2449             ("\u{1}\u{1F1E6}", &["\u{1}", "\u{1F1E6}"]),
2450             ("\u{1}\u{308}\u{1F1E6}", &["\u{1}", "\u{308}", "\u{1F1E6}"]),
2451             ("\u{1}\u{378}", &["\u{1}", "\u{378}"]),
2452             ("\u{1}\u{308}\u{378}", &["\u{1}", "\u{308}", "\u{378}"]),
2453             ("\u{300}\u{20}", &["\u{300}", "\u{20}"]),
2454             ("\u{300}\u{308}\u{20}", &["\u{300}\u{308}", "\u{20}"]),
2455             ("\u{300}\u{D}", &["\u{300}", "\u{D}"]),
2456             ("\u{300}\u{308}\u{D}", &["\u{300}\u{308}", "\u{D}"]),
2457             ("\u{300}\u{A}", &["\u{300}", "\u{A}"]),
2458             ("\u{300}\u{308}\u{A}", &["\u{300}\u{308}", "\u{A}"]),
2459             ("\u{300}\u{1}", &["\u{300}", "\u{1}"]),
2460             ("\u{300}\u{308}\u{1}", &["\u{300}\u{308}", "\u{1}"]),
2461             ("\u{300}\u{300}", &["\u{300}\u{300}"]),
2462             ("\u{300}\u{308}\u{300}", &["\u{300}\u{308}\u{300}"]),
2463             ("\u{300}\u{1100}", &["\u{300}", "\u{1100}"]),
2464             ("\u{300}\u{308}\u{1100}", &["\u{300}\u{308}", "\u{1100}"]),
2465             ("\u{300}\u{1160}", &["\u{300}", "\u{1160}"]),
2466             ("\u{300}\u{308}\u{1160}", &["\u{300}\u{308}", "\u{1160}"]),
2467             ("\u{300}\u{11A8}", &["\u{300}", "\u{11A8}"]),
2468             ("\u{300}\u{308}\u{11A8}", &["\u{300}\u{308}", "\u{11A8}"]),
2469             ("\u{300}\u{AC00}", &["\u{300}", "\u{AC00}"]),
2470             ("\u{300}\u{308}\u{AC00}", &["\u{300}\u{308}", "\u{AC00}"]),
2471             ("\u{300}\u{AC01}", &["\u{300}", "\u{AC01}"]),
2472             ("\u{300}\u{308}\u{AC01}", &["\u{300}\u{308}", "\u{AC01}"]),
2473             ("\u{300}\u{1F1E6}", &["\u{300}", "\u{1F1E6}"]),
2474             ("\u{300}\u{308}\u{1F1E6}", &["\u{300}\u{308}", "\u{1F1E6}"]),
2475             ("\u{300}\u{378}", &["\u{300}", "\u{378}"]),
2476             ("\u{300}\u{308}\u{378}", &["\u{300}\u{308}", "\u{378}"]),
2477             ("\u{903}\u{20}", &["\u{903}", "\u{20}"]),
2478             ("\u{903}\u{308}\u{20}", &["\u{903}\u{308}", "\u{20}"]),
2479             ("\u{903}\u{D}", &["\u{903}", "\u{D}"]),
2480             ("\u{903}\u{308}\u{D}", &["\u{903}\u{308}", "\u{D}"]),
2481             ("\u{903}\u{A}", &["\u{903}", "\u{A}"]),
2482             ("\u{903}\u{308}\u{A}", &["\u{903}\u{308}", "\u{A}"]),
2483             ("\u{903}\u{1}", &["\u{903}", "\u{1}"]),
2484             ("\u{903}\u{308}\u{1}", &["\u{903}\u{308}", "\u{1}"]),
2485             ("\u{903}\u{300}", &["\u{903}\u{300}"]),
2486             ("\u{903}\u{308}\u{300}", &["\u{903}\u{308}\u{300}"]),
2487             ("\u{903}\u{1100}", &["\u{903}", "\u{1100}"]),
2488             ("\u{903}\u{308}\u{1100}", &["\u{903}\u{308}", "\u{1100}"]),
2489             ("\u{903}\u{1160}", &["\u{903}", "\u{1160}"]),
2490             ("\u{903}\u{308}\u{1160}", &["\u{903}\u{308}", "\u{1160}"]),
2491             ("\u{903}\u{11A8}", &["\u{903}", "\u{11A8}"]),
2492             ("\u{903}\u{308}\u{11A8}", &["\u{903}\u{308}", "\u{11A8}"]),
2493             ("\u{903}\u{AC00}", &["\u{903}", "\u{AC00}"]),
2494             ("\u{903}\u{308}\u{AC00}", &["\u{903}\u{308}", "\u{AC00}"]),
2495             ("\u{903}\u{AC01}", &["\u{903}", "\u{AC01}"]),
2496             ("\u{903}\u{308}\u{AC01}", &["\u{903}\u{308}", "\u{AC01}"]),
2497             ("\u{903}\u{1F1E6}", &["\u{903}", "\u{1F1E6}"]),
2498             ("\u{903}\u{308}\u{1F1E6}", &["\u{903}\u{308}", "\u{1F1E6}"]),
2499             ("\u{903}\u{378}", &["\u{903}", "\u{378}"]),
2500             ("\u{903}\u{308}\u{378}", &["\u{903}\u{308}", "\u{378}"]),
2501             ("\u{1100}\u{20}", &["\u{1100}", "\u{20}"]),
2502             ("\u{1100}\u{308}\u{20}", &["\u{1100}\u{308}", "\u{20}"]),
2503             ("\u{1100}\u{D}", &["\u{1100}", "\u{D}"]),
2504             ("\u{1100}\u{308}\u{D}", &["\u{1100}\u{308}", "\u{D}"]),
2505             ("\u{1100}\u{A}", &["\u{1100}", "\u{A}"]),
2506             ("\u{1100}\u{308}\u{A}", &["\u{1100}\u{308}", "\u{A}"]),
2507             ("\u{1100}\u{1}", &["\u{1100}", "\u{1}"]),
2508             ("\u{1100}\u{308}\u{1}", &["\u{1100}\u{308}", "\u{1}"]),
2509             ("\u{1100}\u{300}", &["\u{1100}\u{300}"]),
2510             ("\u{1100}\u{308}\u{300}", &["\u{1100}\u{308}\u{300}"]),
2511             ("\u{1100}\u{1100}", &["\u{1100}\u{1100}"]),
2512             ("\u{1100}\u{308}\u{1100}", &["\u{1100}\u{308}", "\u{1100}"]),
2513             ("\u{1100}\u{1160}", &["\u{1100}\u{1160}"]),
2514             ("\u{1100}\u{308}\u{1160}", &["\u{1100}\u{308}", "\u{1160}"]),
2515             ("\u{1100}\u{11A8}", &["\u{1100}", "\u{11A8}"]),
2516             ("\u{1100}\u{308}\u{11A8}", &["\u{1100}\u{308}", "\u{11A8}"]),
2517             ("\u{1100}\u{AC00}", &["\u{1100}\u{AC00}"]),
2518             ("\u{1100}\u{308}\u{AC00}", &["\u{1100}\u{308}", "\u{AC00}"]),
2519             ("\u{1100}\u{AC01}", &["\u{1100}\u{AC01}"]),
2520             ("\u{1100}\u{308}\u{AC01}", &["\u{1100}\u{308}", "\u{AC01}"]),
2521             ("\u{1100}\u{1F1E6}", &["\u{1100}", "\u{1F1E6}"]),
2522             ("\u{1100}\u{308}\u{1F1E6}", &["\u{1100}\u{308}", "\u{1F1E6}"]),
2523             ("\u{1100}\u{378}", &["\u{1100}", "\u{378}"]),
2524             ("\u{1100}\u{308}\u{378}", &["\u{1100}\u{308}", "\u{378}"]),
2525             ("\u{1160}\u{20}", &["\u{1160}", "\u{20}"]),
2526             ("\u{1160}\u{308}\u{20}", &["\u{1160}\u{308}", "\u{20}"]),
2527             ("\u{1160}\u{D}", &["\u{1160}", "\u{D}"]),
2528             ("\u{1160}\u{308}\u{D}", &["\u{1160}\u{308}", "\u{D}"]),
2529             ("\u{1160}\u{A}", &["\u{1160}", "\u{A}"]),
2530             ("\u{1160}\u{308}\u{A}", &["\u{1160}\u{308}", "\u{A}"]),
2531             ("\u{1160}\u{1}", &["\u{1160}", "\u{1}"]),
2532             ("\u{1160}\u{308}\u{1}", &["\u{1160}\u{308}", "\u{1}"]),
2533             ("\u{1160}\u{300}", &["\u{1160}\u{300}"]),
2534             ("\u{1160}\u{308}\u{300}", &["\u{1160}\u{308}\u{300}"]),
2535             ("\u{1160}\u{1100}", &["\u{1160}", "\u{1100}"]),
2536             ("\u{1160}\u{308}\u{1100}", &["\u{1160}\u{308}", "\u{1100}"]),
2537             ("\u{1160}\u{1160}", &["\u{1160}\u{1160}"]),
2538             ("\u{1160}\u{308}\u{1160}", &["\u{1160}\u{308}", "\u{1160}"]),
2539             ("\u{1160}\u{11A8}", &["\u{1160}\u{11A8}"]),
2540             ("\u{1160}\u{308}\u{11A8}", &["\u{1160}\u{308}", "\u{11A8}"]),
2541             ("\u{1160}\u{AC00}", &["\u{1160}", "\u{AC00}"]),
2542             ("\u{1160}\u{308}\u{AC00}", &["\u{1160}\u{308}", "\u{AC00}"]),
2543             ("\u{1160}\u{AC01}", &["\u{1160}", "\u{AC01}"]),
2544             ("\u{1160}\u{308}\u{AC01}", &["\u{1160}\u{308}", "\u{AC01}"]),
2545             ("\u{1160}\u{1F1E6}", &["\u{1160}", "\u{1F1E6}"]),
2546             ("\u{1160}\u{308}\u{1F1E6}", &["\u{1160}\u{308}", "\u{1F1E6}"]),
2547             ("\u{1160}\u{378}", &["\u{1160}", "\u{378}"]),
2548             ("\u{1160}\u{308}\u{378}", &["\u{1160}\u{308}", "\u{378}"]),
2549             ("\u{11A8}\u{20}", &["\u{11A8}", "\u{20}"]),
2550             ("\u{11A8}\u{308}\u{20}", &["\u{11A8}\u{308}", "\u{20}"]),
2551             ("\u{11A8}\u{D}", &["\u{11A8}", "\u{D}"]),
2552             ("\u{11A8}\u{308}\u{D}", &["\u{11A8}\u{308}", "\u{D}"]),
2553             ("\u{11A8}\u{A}", &["\u{11A8}", "\u{A}"]),
2554             ("\u{11A8}\u{308}\u{A}", &["\u{11A8}\u{308}", "\u{A}"]),
2555             ("\u{11A8}\u{1}", &["\u{11A8}", "\u{1}"]),
2556             ("\u{11A8}\u{308}\u{1}", &["\u{11A8}\u{308}", "\u{1}"]),
2557             ("\u{11A8}\u{300}", &["\u{11A8}\u{300}"]),
2558             ("\u{11A8}\u{308}\u{300}", &["\u{11A8}\u{308}\u{300}"]),
2559             ("\u{11A8}\u{1100}", &["\u{11A8}", "\u{1100}"]),
2560             ("\u{11A8}\u{308}\u{1100}", &["\u{11A8}\u{308}", "\u{1100}"]),
2561             ("\u{11A8}\u{1160}", &["\u{11A8}", "\u{1160}"]),
2562             ("\u{11A8}\u{308}\u{1160}", &["\u{11A8}\u{308}", "\u{1160}"]),
2563             ("\u{11A8}\u{11A8}", &["\u{11A8}\u{11A8}"]),
2564             ("\u{11A8}\u{308}\u{11A8}", &["\u{11A8}\u{308}", "\u{11A8}"]),
2565             ("\u{11A8}\u{AC00}", &["\u{11A8}", "\u{AC00}"]),
2566             ("\u{11A8}\u{308}\u{AC00}", &["\u{11A8}\u{308}", "\u{AC00}"]),
2567             ("\u{11A8}\u{AC01}", &["\u{11A8}", "\u{AC01}"]),
2568             ("\u{11A8}\u{308}\u{AC01}", &["\u{11A8}\u{308}", "\u{AC01}"]),
2569             ("\u{11A8}\u{1F1E6}", &["\u{11A8}", "\u{1F1E6}"]),
2570             ("\u{11A8}\u{308}\u{1F1E6}", &["\u{11A8}\u{308}", "\u{1F1E6}"]),
2571             ("\u{11A8}\u{378}", &["\u{11A8}", "\u{378}"]),
2572             ("\u{11A8}\u{308}\u{378}", &["\u{11A8}\u{308}", "\u{378}"]),
2573             ("\u{AC00}\u{20}", &["\u{AC00}", "\u{20}"]),
2574             ("\u{AC00}\u{308}\u{20}", &["\u{AC00}\u{308}", "\u{20}"]),
2575             ("\u{AC00}\u{D}", &["\u{AC00}", "\u{D}"]),
2576             ("\u{AC00}\u{308}\u{D}", &["\u{AC00}\u{308}", "\u{D}"]),
2577             ("\u{AC00}\u{A}", &["\u{AC00}", "\u{A}"]),
2578             ("\u{AC00}\u{308}\u{A}", &["\u{AC00}\u{308}", "\u{A}"]),
2579             ("\u{AC00}\u{1}", &["\u{AC00}", "\u{1}"]),
2580             ("\u{AC00}\u{308}\u{1}", &["\u{AC00}\u{308}", "\u{1}"]),
2581             ("\u{AC00}\u{300}", &["\u{AC00}\u{300}"]),
2582             ("\u{AC00}\u{308}\u{300}", &["\u{AC00}\u{308}\u{300}"]),
2583             ("\u{AC00}\u{1100}", &["\u{AC00}", "\u{1100}"]),
2584             ("\u{AC00}\u{308}\u{1100}", &["\u{AC00}\u{308}", "\u{1100}"]),
2585             ("\u{AC00}\u{1160}", &["\u{AC00}\u{1160}"]),
2586             ("\u{AC00}\u{308}\u{1160}", &["\u{AC00}\u{308}", "\u{1160}"]),
2587             ("\u{AC00}\u{11A8}", &["\u{AC00}\u{11A8}"]),
2588             ("\u{AC00}\u{308}\u{11A8}", &["\u{AC00}\u{308}", "\u{11A8}"]),
2589             ("\u{AC00}\u{AC00}", &["\u{AC00}", "\u{AC00}"]),
2590             ("\u{AC00}\u{308}\u{AC00}", &["\u{AC00}\u{308}", "\u{AC00}"]),
2591             ("\u{AC00}\u{AC01}", &["\u{AC00}", "\u{AC01}"]),
2592             ("\u{AC00}\u{308}\u{AC01}", &["\u{AC00}\u{308}", "\u{AC01}"]),
2593             ("\u{AC00}\u{1F1E6}", &["\u{AC00}", "\u{1F1E6}"]),
2594             ("\u{AC00}\u{308}\u{1F1E6}", &["\u{AC00}\u{308}", "\u{1F1E6}"]),
2595             ("\u{AC00}\u{378}", &["\u{AC00}", "\u{378}"]),
2596             ("\u{AC00}\u{308}\u{378}", &["\u{AC00}\u{308}", "\u{378}"]),
2597             ("\u{AC01}\u{20}", &["\u{AC01}", "\u{20}"]),
2598             ("\u{AC01}\u{308}\u{20}", &["\u{AC01}\u{308}", "\u{20}"]),
2599             ("\u{AC01}\u{D}", &["\u{AC01}", "\u{D}"]),
2600             ("\u{AC01}\u{308}\u{D}", &["\u{AC01}\u{308}", "\u{D}"]),
2601             ("\u{AC01}\u{A}", &["\u{AC01}", "\u{A}"]),
2602             ("\u{AC01}\u{308}\u{A}", &["\u{AC01}\u{308}", "\u{A}"]),
2603             ("\u{AC01}\u{1}", &["\u{AC01}", "\u{1}"]),
2604             ("\u{AC01}\u{308}\u{1}", &["\u{AC01}\u{308}", "\u{1}"]),
2605             ("\u{AC01}\u{300}", &["\u{AC01}\u{300}"]),
2606             ("\u{AC01}\u{308}\u{300}", &["\u{AC01}\u{308}\u{300}"]),
2607             ("\u{AC01}\u{1100}", &["\u{AC01}", "\u{1100}"]),
2608             ("\u{AC01}\u{308}\u{1100}", &["\u{AC01}\u{308}", "\u{1100}"]),
2609             ("\u{AC01}\u{1160}", &["\u{AC01}", "\u{1160}"]),
2610             ("\u{AC01}\u{308}\u{1160}", &["\u{AC01}\u{308}", "\u{1160}"]),
2611             ("\u{AC01}\u{11A8}", &["\u{AC01}\u{11A8}"]),
2612             ("\u{AC01}\u{308}\u{11A8}", &["\u{AC01}\u{308}", "\u{11A8}"]),
2613             ("\u{AC01}\u{AC00}", &["\u{AC01}", "\u{AC00}"]),
2614             ("\u{AC01}\u{308}\u{AC00}", &["\u{AC01}\u{308}", "\u{AC00}"]),
2615             ("\u{AC01}\u{AC01}", &["\u{AC01}", "\u{AC01}"]),
2616             ("\u{AC01}\u{308}\u{AC01}", &["\u{AC01}\u{308}", "\u{AC01}"]),
2617             ("\u{AC01}\u{1F1E6}", &["\u{AC01}", "\u{1F1E6}"]),
2618             ("\u{AC01}\u{308}\u{1F1E6}", &["\u{AC01}\u{308}", "\u{1F1E6}"]),
2619             ("\u{AC01}\u{378}", &["\u{AC01}", "\u{378}"]),
2620             ("\u{AC01}\u{308}\u{378}", &["\u{AC01}\u{308}", "\u{378}"]),
2621             ("\u{1F1E6}\u{20}", &["\u{1F1E6}", "\u{20}"]),
2622             ("\u{1F1E6}\u{308}\u{20}", &["\u{1F1E6}\u{308}", "\u{20}"]),
2623             ("\u{1F1E6}\u{D}", &["\u{1F1E6}", "\u{D}"]),
2624             ("\u{1F1E6}\u{308}\u{D}", &["\u{1F1E6}\u{308}", "\u{D}"]),
2625             ("\u{1F1E6}\u{A}", &["\u{1F1E6}", "\u{A}"]),
2626             ("\u{1F1E6}\u{308}\u{A}", &["\u{1F1E6}\u{308}", "\u{A}"]),
2627             ("\u{1F1E6}\u{1}", &["\u{1F1E6}", "\u{1}"]),
2628             ("\u{1F1E6}\u{308}\u{1}", &["\u{1F1E6}\u{308}", "\u{1}"]),
2629             ("\u{1F1E6}\u{300}", &["\u{1F1E6}\u{300}"]),
2630             ("\u{1F1E6}\u{308}\u{300}", &["\u{1F1E6}\u{308}\u{300}"]),
2631             ("\u{1F1E6}\u{1100}", &["\u{1F1E6}", "\u{1100}"]),
2632             ("\u{1F1E6}\u{308}\u{1100}", &["\u{1F1E6}\u{308}", "\u{1100}"]),
2633             ("\u{1F1E6}\u{1160}", &["\u{1F1E6}", "\u{1160}"]),
2634             ("\u{1F1E6}\u{308}\u{1160}", &["\u{1F1E6}\u{308}", "\u{1160}"]),
2635             ("\u{1F1E6}\u{11A8}", &["\u{1F1E6}", "\u{11A8}"]),
2636             ("\u{1F1E6}\u{308}\u{11A8}", &["\u{1F1E6}\u{308}", "\u{11A8}"]),
2637             ("\u{1F1E6}\u{AC00}", &["\u{1F1E6}", "\u{AC00}"]),
2638             ("\u{1F1E6}\u{308}\u{AC00}", &["\u{1F1E6}\u{308}", "\u{AC00}"]),
2639             ("\u{1F1E6}\u{AC01}", &["\u{1F1E6}", "\u{AC01}"]),
2640             ("\u{1F1E6}\u{308}\u{AC01}", &["\u{1F1E6}\u{308}", "\u{AC01}"]),
2641             ("\u{1F1E6}\u{1F1E6}", &["\u{1F1E6}\u{1F1E6}"]),
2642             ("\u{1F1E6}\u{308}\u{1F1E6}", &["\u{1F1E6}\u{308}", "\u{1F1E6}"]),
2643             ("\u{1F1E6}\u{378}", &["\u{1F1E6}", "\u{378}"]),
2644             ("\u{1F1E6}\u{308}\u{378}", &["\u{1F1E6}\u{308}", "\u{378}"]),
2645             ("\u{378}\u{20}", &["\u{378}", "\u{20}"]),
2646             ("\u{378}\u{308}\u{20}", &["\u{378}\u{308}", "\u{20}"]),
2647             ("\u{378}\u{D}", &["\u{378}", "\u{D}"]),
2648             ("\u{378}\u{308}\u{D}", &["\u{378}\u{308}", "\u{D}"]),
2649             ("\u{378}\u{A}", &["\u{378}", "\u{A}"]),
2650             ("\u{378}\u{308}\u{A}", &["\u{378}\u{308}", "\u{A}"]),
2651             ("\u{378}\u{1}", &["\u{378}", "\u{1}"]),
2652             ("\u{378}\u{308}\u{1}", &["\u{378}\u{308}", "\u{1}"]),
2653             ("\u{378}\u{300}", &["\u{378}\u{300}"]),
2654             ("\u{378}\u{308}\u{300}", &["\u{378}\u{308}\u{300}"]),
2655             ("\u{378}\u{1100}", &["\u{378}", "\u{1100}"]),
2656             ("\u{378}\u{308}\u{1100}", &["\u{378}\u{308}", "\u{1100}"]),
2657             ("\u{378}\u{1160}", &["\u{378}", "\u{1160}"]),
2658             ("\u{378}\u{308}\u{1160}", &["\u{378}\u{308}", "\u{1160}"]),
2659             ("\u{378}\u{11A8}", &["\u{378}", "\u{11A8}"]),
2660             ("\u{378}\u{308}\u{11A8}", &["\u{378}\u{308}", "\u{11A8}"]),
2661             ("\u{378}\u{AC00}", &["\u{378}", "\u{AC00}"]),
2662             ("\u{378}\u{308}\u{AC00}", &["\u{378}\u{308}", "\u{AC00}"]),
2663             ("\u{378}\u{AC01}", &["\u{378}", "\u{AC01}"]),
2664             ("\u{378}\u{308}\u{AC01}", &["\u{378}\u{308}", "\u{AC01}"]),
2665             ("\u{378}\u{1F1E6}", &["\u{378}", "\u{1F1E6}"]),
2666             ("\u{378}\u{308}\u{1F1E6}", &["\u{378}\u{308}", "\u{1F1E6}"]),
2667             ("\u{378}\u{378}", &["\u{378}", "\u{378}"]),
2668             ("\u{378}\u{308}\u{378}", &["\u{378}\u{308}", "\u{378}"]),
2669             ("\u{61}\u{1F1E6}\u{62}", &["\u{61}", "\u{1F1E6}", "\u{62}"]),
2670             ("\u{1F1F7}\u{1F1FA}", &["\u{1F1F7}\u{1F1FA}"]),
2671             ("\u{1F1F7}\u{1F1FA}\u{1F1F8}", &["\u{1F1F7}\u{1F1FA}\u{1F1F8}"]),
2672             ("\u{1F1F7}\u{1F1FA}\u{1F1F8}\u{1F1EA}",
2673             &["\u{1F1F7}\u{1F1FA}\u{1F1F8}\u{1F1EA}"]),
2674             ("\u{1F1F7}\u{1F1FA}\u{200B}\u{1F1F8}\u{1F1EA}",
2675              &["\u{1F1F7}\u{1F1FA}", "\u{200B}", "\u{1F1F8}\u{1F1EA}"]),
2676             ("\u{1F1E6}\u{1F1E7}\u{1F1E8}", &["\u{1F1E6}\u{1F1E7}\u{1F1E8}"]),
2677             ("\u{1F1E6}\u{200D}\u{1F1E7}\u{1F1E8}", &["\u{1F1E6}\u{200D}",
2678              "\u{1F1E7}\u{1F1E8}"]),
2679             ("\u{1F1E6}\u{1F1E7}\u{200D}\u{1F1E8}",
2680              &["\u{1F1E6}\u{1F1E7}\u{200D}", "\u{1F1E8}"]),
2681             ("\u{20}\u{200D}\u{646}", &["\u{20}\u{200D}", "\u{646}"]),
2682             ("\u{646}\u{200D}\u{20}", &["\u{646}\u{200D}", "\u{20}"]),
2683         ];
2684
2685         let test_diff: [(_, &[_], &[_]); 23] = [
2686             ("\u{20}\u{903}", &["\u{20}\u{903}"], &["\u{20}", "\u{903}"]), ("\u{20}\u{308}\u{903}",
2687             &["\u{20}\u{308}\u{903}"], &["\u{20}\u{308}", "\u{903}"]), ("\u{D}\u{308}\u{903}",
2688             &["\u{D}", "\u{308}\u{903}"], &["\u{D}", "\u{308}", "\u{903}"]), ("\u{A}\u{308}\u{903}",
2689             &["\u{A}", "\u{308}\u{903}"], &["\u{A}", "\u{308}", "\u{903}"]), ("\u{1}\u{308}\u{903}",
2690             &["\u{1}", "\u{308}\u{903}"], &["\u{1}", "\u{308}", "\u{903}"]), ("\u{300}\u{903}",
2691             &["\u{300}\u{903}"], &["\u{300}", "\u{903}"]), ("\u{300}\u{308}\u{903}",
2692             &["\u{300}\u{308}\u{903}"], &["\u{300}\u{308}", "\u{903}"]), ("\u{903}\u{903}",
2693             &["\u{903}\u{903}"], &["\u{903}", "\u{903}"]), ("\u{903}\u{308}\u{903}",
2694             &["\u{903}\u{308}\u{903}"], &["\u{903}\u{308}", "\u{903}"]), ("\u{1100}\u{903}",
2695             &["\u{1100}\u{903}"], &["\u{1100}", "\u{903}"]), ("\u{1100}\u{308}\u{903}",
2696             &["\u{1100}\u{308}\u{903}"], &["\u{1100}\u{308}", "\u{903}"]), ("\u{1160}\u{903}",
2697             &["\u{1160}\u{903}"], &["\u{1160}", "\u{903}"]), ("\u{1160}\u{308}\u{903}",
2698             &["\u{1160}\u{308}\u{903}"], &["\u{1160}\u{308}", "\u{903}"]), ("\u{11A8}\u{903}",
2699             &["\u{11A8}\u{903}"], &["\u{11A8}", "\u{903}"]), ("\u{11A8}\u{308}\u{903}",
2700             &["\u{11A8}\u{308}\u{903}"], &["\u{11A8}\u{308}", "\u{903}"]), ("\u{AC00}\u{903}",
2701             &["\u{AC00}\u{903}"], &["\u{AC00}", "\u{903}"]), ("\u{AC00}\u{308}\u{903}",
2702             &["\u{AC00}\u{308}\u{903}"], &["\u{AC00}\u{308}", "\u{903}"]), ("\u{AC01}\u{903}",
2703             &["\u{AC01}\u{903}"], &["\u{AC01}", "\u{903}"]), ("\u{AC01}\u{308}\u{903}",
2704             &["\u{AC01}\u{308}\u{903}"], &["\u{AC01}\u{308}", "\u{903}"]), ("\u{1F1E6}\u{903}",
2705             &["\u{1F1E6}\u{903}"], &["\u{1F1E6}", "\u{903}"]), ("\u{1F1E6}\u{308}\u{903}",
2706             &["\u{1F1E6}\u{308}\u{903}"], &["\u{1F1E6}\u{308}", "\u{903}"]), ("\u{378}\u{903}",
2707             &["\u{378}\u{903}"], &["\u{378}", "\u{903}"]), ("\u{378}\u{308}\u{903}",
2708             &["\u{378}\u{308}\u{903}"], &["\u{378}\u{308}", "\u{903}"]),
2709         ];
2710
2711         for &(s, g) in test_same.iter() {
2712             // test forward iterator
2713             assert!(order::equals(s.graphemes(true), g.iter().map(|&x| x)));
2714             assert!(order::equals(s.graphemes(false), g.iter().map(|&x| x)));
2715
2716             // test reverse iterator
2717             assert!(order::equals(s.graphemes(true).rev(), g.iter().rev().map(|&x| x)));
2718             assert!(order::equals(s.graphemes(false).rev(), g.iter().rev().map(|&x| x)));
2719         }
2720
2721         for &(s, gt, gf) in test_diff.iter() {
2722             // test forward iterator
2723             assert!(order::equals(s.graphemes(true), gt.iter().map(|&x| x)));
2724             assert!(order::equals(s.graphemes(false), gf.iter().map(|&x| x)));
2725
2726             // test reverse iterator
2727             assert!(order::equals(s.graphemes(true).rev(), gt.iter().rev().map(|&x| x)));
2728             assert!(order::equals(s.graphemes(false).rev(), gf.iter().rev().map(|&x| x)));
2729         }
2730
2731         // test the indices iterators
2732         let s = "a̐éö̲\r\n";
2733         let gr_inds = s.grapheme_indices(true).collect::<Vec<(uint, &str)>>();
2734         let b: &[_] = &[(0u, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
2735         assert_eq!(gr_inds, b);
2736         let gr_inds = s.grapheme_indices(true).rev().collect::<Vec<(uint, &str)>>();
2737         let b: &[_] = &[(11, "\r\n"), (6, "ö̲"), (3, "é"), (0u, "a̐")];
2738         assert_eq!(gr_inds, b);
2739         let mut gr_inds_iter = s.grapheme_indices(true);
2740         {
2741             let gr_inds = gr_inds_iter.by_ref();
2742             let e1 = gr_inds.size_hint();
2743             assert_eq!(e1, (1, Some(13)));
2744             let c = gr_inds.count();
2745             assert_eq!(c, 4);
2746         }
2747         let e2 = gr_inds_iter.size_hint();
2748         assert_eq!(e2, (0, Some(0)));
2749
2750         // make sure the reverse iterator does the right thing with "\n" at beginning of string
2751         let s = "\n\r\n\r";
2752         let gr = s.graphemes(true).rev().collect::<Vec<&str>>();
2753         let b: &[_] = &["\r", "\r\n", "\n"];
2754         assert_eq!(gr, b);
2755     }
2756
2757     #[test]
2758     fn test_split_strator() {
2759         fn t(s: &str, sep: &str, u: &[&str]) {
2760             let v: Vec<&str> = s.split_str(sep).collect();
2761             assert_eq!(v, u);
2762         }
2763         t("--1233345--", "12345", &["--1233345--"]);
2764         t("abc::hello::there", "::", &["abc", "hello", "there"]);
2765         t("::hello::there", "::", &["", "hello", "there"]);
2766         t("hello::there::", "::", &["hello", "there", ""]);
2767         t("::hello::there::", "::", &["", "hello", "there", ""]);
2768         t("ประเทศไทย中华Việt Nam", "中华", &["ประเทศไทย", "Việt Nam"]);
2769         t("zzXXXzzYYYzz", "zz", &["", "XXX", "YYY", ""]);
2770         t("zzXXXzYYYz", "XXX", &["zz", "zYYYz"]);
2771         t(".XXX.YYY.", ".", &["", "XXX", "YYY", ""]);
2772         t("", ".", &[""]);
2773         t("zz", "zz", &["",""]);
2774         t("ok", "z", &["ok"]);
2775         t("zzz", "zz", &["","z"]);
2776         t("zzzzz", "zz", &["","","z"]);
2777     }
2778
2779     #[test]
2780     fn test_str_default() {
2781         use core::default::Default;
2782         fn t<S: Default + Str>() {
2783             let s: S = Default::default();
2784             assert_eq!(s.as_slice(), "");
2785         }
2786
2787         t::<&str>();
2788         t::<String>();
2789     }
2790
2791     #[test]
2792     fn test_str_container() {
2793         fn sum_len(v: &[&str]) -> uint {
2794             v.iter().map(|x| x.len()).sum()
2795         }
2796
2797         let s = String::from_str("01234");
2798         assert_eq!(5, sum_len(&["012", "", "34"]));
2799         assert_eq!(5, sum_len(&[String::from_str("01").as_slice(),
2800                                 String::from_str("2").as_slice(),
2801                                 String::from_str("34").as_slice(),
2802                                 String::from_str("").as_slice()]));
2803         assert_eq!(5, sum_len(&[s.as_slice()]));
2804     }
2805
2806     #[test]
2807     fn test_str_from_utf8() {
2808         let xs = b"hello";
2809         assert_eq!(from_utf8(xs), Ok("hello"));
2810
2811         let xs = "ศไทย中华Việt Nam".as_bytes();
2812         assert_eq!(from_utf8(xs), Ok("ศไทย中华Việt Nam"));
2813
2814         let xs = b"hello\xFF";
2815         assert_eq!(from_utf8(xs), Err(Utf8Error::TooShort));
2816     }
2817 }
2818
2819 #[cfg(test)]
2820 mod bench {
2821     use super::*;
2822     use prelude::{SliceExt, IteratorExt, SliceConcatExt};
2823     use test::Bencher;
2824     use test::black_box;
2825
2826     #[bench]
2827     fn char_iterator(b: &mut Bencher) {
2828         let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2829
2830         b.iter(|| s.chars().count());
2831     }
2832
2833     #[bench]
2834     fn char_iterator_for(b: &mut Bencher) {
2835         let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2836
2837         b.iter(|| {
2838             for ch in s.chars() { black_box(ch) }
2839         });
2840     }
2841
2842     #[bench]
2843     fn char_iterator_ascii(b: &mut Bencher) {
2844         let s = "Mary had a little lamb, Little lamb
2845         Mary had a little lamb, Little lamb
2846         Mary had a little lamb, Little lamb
2847         Mary had a little lamb, Little lamb
2848         Mary had a little lamb, Little lamb
2849         Mary had a little lamb, Little lamb";
2850
2851         b.iter(|| s.chars().count());
2852     }
2853
2854     #[bench]
2855     fn char_iterator_rev(b: &mut Bencher) {
2856         let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2857
2858         b.iter(|| s.chars().rev().count());
2859     }
2860
2861     #[bench]
2862     fn char_iterator_rev_for(b: &mut Bencher) {
2863         let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2864
2865         b.iter(|| {
2866             for ch in s.chars().rev() { black_box(ch) }
2867         });
2868     }
2869
2870     #[bench]
2871     fn char_indicesator(b: &mut Bencher) {
2872         let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2873         let len = s.chars().count();
2874
2875         b.iter(|| assert_eq!(s.char_indices().count(), len));
2876     }
2877
2878     #[bench]
2879     fn char_indicesator_rev(b: &mut Bencher) {
2880         let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2881         let len = s.chars().count();
2882
2883         b.iter(|| assert_eq!(s.char_indices().rev().count(), len));
2884     }
2885
2886     #[bench]
2887     fn split_unicode_ascii(b: &mut Bencher) {
2888         let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2889
2890         b.iter(|| assert_eq!(s.split('V').count(), 3));
2891     }
2892
2893     #[bench]
2894     fn split_unicode_not_ascii(b: &mut Bencher) {
2895         struct NotAscii(char);
2896         impl CharEq for NotAscii {
2897             fn matches(&mut self, c: char) -> bool {
2898                 let NotAscii(cc) = *self;
2899                 cc == c
2900             }
2901             fn only_ascii(&self) -> bool { false }
2902         }
2903         let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2904
2905         b.iter(|| assert_eq!(s.split(NotAscii('V')).count(), 3));
2906     }
2907
2908
2909     #[bench]
2910     fn split_ascii(b: &mut Bencher) {
2911         let s = "Mary had a little lamb, Little lamb, little-lamb.";
2912         let len = s.split(' ').count();
2913
2914         b.iter(|| assert_eq!(s.split(' ').count(), len));
2915     }
2916
2917     #[bench]
2918     fn split_not_ascii(b: &mut Bencher) {
2919         struct NotAscii(char);
2920         impl CharEq for NotAscii {
2921             #[inline]
2922             fn matches(&mut self, c: char) -> bool {
2923                 let NotAscii(cc) = *self;
2924                 cc == c
2925             }
2926             fn only_ascii(&self) -> bool { false }
2927         }
2928         let s = "Mary had a little lamb, Little lamb, little-lamb.";
2929         let len = s.split(' ').count();
2930
2931         b.iter(|| assert_eq!(s.split(NotAscii(' ')).count(), len));
2932     }
2933
2934     #[bench]
2935     fn split_extern_fn(b: &mut Bencher) {
2936         let s = "Mary had a little lamb, Little lamb, little-lamb.";
2937         let len = s.split(' ').count();
2938         fn pred(c: char) -> bool { c == ' ' }
2939
2940         b.iter(|| assert_eq!(s.split(pred).count(), len));
2941     }
2942
2943     #[bench]
2944     fn split_closure(b: &mut Bencher) {
2945         let s = "Mary had a little lamb, Little lamb, little-lamb.";
2946         let len = s.split(' ').count();
2947
2948         b.iter(|| assert_eq!(s.split(|&: c: char| c == ' ').count(), len));
2949     }
2950
2951     #[bench]
2952     fn split_slice(b: &mut Bencher) {
2953         let s = "Mary had a little lamb, Little lamb, little-lamb.";
2954         let len = s.split(' ').count();
2955
2956         let c: &[char] = &[' '];
2957         b.iter(|| assert_eq!(s.split(c).count(), len));
2958     }
2959
2960     #[bench]
2961     fn bench_connect(b: &mut Bencher) {
2962         let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2963         let sep = "→";
2964         let v = vec![s, s, s, s, s, s, s, s, s, s];
2965         b.iter(|| {
2966             assert_eq!(v.connect(sep).len(), s.len() * 10 + sep.len() * 9);
2967         })
2968     }
2969
2970     #[bench]
2971     fn bench_contains_short_short(b: &mut Bencher) {
2972         let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2973         let needle = "sit";
2974
2975         b.iter(|| {
2976             assert!(haystack.contains(needle));
2977         })
2978     }
2979
2980     #[bench]
2981     fn bench_contains_short_long(b: &mut Bencher) {
2982         let haystack = "\
2983 Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \
2984 ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \
2985 eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \
2986 sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \
2987 tempus vel, gravida nec quam.
2988
2989 In est dui, tincidunt sed tempus interdum, adipiscing laoreet ante. Etiam tempor, tellus quis \
2990 sagittis interdum, nulla purus mattis sem, quis auctor erat odio ac tellus. In nec nunc sit amet \
2991 diam volutpat molestie at sed ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan \
2992 lorem ac dignissim placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat. In vel \
2993 eleifend felis. Sed suscipit nulla lorem, sed mollis est sollicitudin et. Nam fermentum egestas \
2994 interdum. Curabitur ut nisi justo.
2995
2996 Sed sollicitudin ipsum tellus, ut condimentum leo eleifend nec. Cras ut velit ante. Phasellus nec \
2997 mollis odio. Mauris molestie erat in arcu mattis, at aliquet dolor vehicula. Quisque malesuada \
2998 lectus sit amet nisi pretium, a condimentum ipsum porta. Morbi at dapibus diam. Praesent egestas \
2999 est sed risus elementum, eu rutrum metus ultrices. Etiam fermentum consectetur magna, id rutrum \
3000 felis accumsan a. Aliquam ut pellentesque libero. Sed mi nulla, lobortis eu tortor id, suscipit \
3001 ultricies neque. Morbi iaculis sit amet risus at iaculis. Praesent eget ligula quis turpis \
3002 feugiat suscipit vel non arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. \
3003 Aliquam sit amet placerat lorem.
3004
3005 Cras a lacus vel ante posuere elementum. Nunc est leo, bibendum ut facilisis vel, bibendum at \
3006 mauris. Nullam adipiscing diam vel odio ornare, luctus adipiscing mi luctus. Nulla facilisi. \
3007 Mauris adipiscing bibendum neque, quis adipiscing lectus tempus et. Sed feugiat erat et nisl \
3008 lobortis pharetra. Donec vitae erat enim. Nullam sit amet felis et quam lacinia tincidunt. Aliquam \
3009 suscipit dapibus urna. Sed volutpat urna in magna pulvinar volutpat. Phasellus nec tellus ac diam \
3010 cursus accumsan.
3011
3012 Nam lectus enim, dapibus non nisi tempor, consectetur convallis massa. Maecenas eleifend dictum \
3013 feugiat. Etiam quis mauris vel risus luctus mattis a a nunc. Nullam orci quam, imperdiet id \
3014 vehicula in, porttitor ut nibh. Duis sagittis adipiscing nisl vitae congue. Donec mollis risus eu \
3015 leo suscipit, varius porttitor nulla porta. Pellentesque ut sem nec nisi euismod vehicula. Nulla \
3016 malesuada sollicitudin quam eu fermentum.";
3017         let needle = "english";
3018
3019         b.iter(|| {
3020             assert!(!haystack.contains(needle));
3021         })
3022     }
3023
3024     #[bench]
3025     fn bench_contains_bad_naive(b: &mut Bencher) {
3026         let haystack = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
3027         let needle = "aaaaaaaab";
3028
3029         b.iter(|| {
3030             assert!(!haystack.contains(needle));
3031         })
3032     }
3033
3034     #[bench]
3035     fn bench_contains_equal(b: &mut Bencher) {
3036         let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
3037         let needle = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
3038
3039         b.iter(|| {
3040             assert!(haystack.contains(needle));
3041         })
3042     }
3043 }