src/libcollections/str.rs

   1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10 //
  11 // ignore-lexer-test FIXME #15679
  12
  13 //! Unicode string manipulation (`str` type)
  14 //!
  15 //! # Basic Usage
  16 //!
  17 //! Rust's string type is one of the core primitive types of the language. While
  18 //! represented by the name `str`, the name `str` is not actually a valid type in
  19 //! Rust. Each string must also be decorated with a pointer. `String` is used
  20 //! for an owned string, so there is only one commonly-used `str` type in Rust:
  21 //! `&str`.
  22 //!
  23 //! `&str` is the borrowed string type. This type of string can only be created
  24 //! from other strings, unless it is a static string (see below). As the word
  25 //! "borrowed" implies, this type of string is owned elsewhere, and this string
  26 //! cannot be moved out of.
  27 //!
  28 //! As an example, here's some code that uses a string.
  29 //!
  30 //! ```rust
  31 //! fn main() {
  32 //!     let borrowed_string = "This string is borrowed with the 'static lifetime";
  33 //! }
  34 //! ```
  35 //!
  36 //! From the example above, you can guess that Rust's string literals have the
  37 //! `'static` lifetime. This is akin to C's concept of a static string.
  38 //! More precisely, string literals are immutable views with a 'static lifetime
  39 //! (otherwise known as the lifetime of the entire program), and thus have the
  40 //! type `&'static str`.
  41 //!
  42 //! # Representation
  43 //!
  44 //! Rust's string type, `str`, is a sequence of Unicode scalar values encoded as a
  45 //! stream of UTF-8 bytes. All [strings](../../reference.html#literals) are
  46 //! guaranteed to be validly encoded UTF-8 sequences. Additionally, strings are
  47 //! not null-terminated and can thus contain null bytes.
  48 //!
  49 //! The actual representation of strings have direct mappings to slices: `&str`
  50 //! is the same as `&[u8]`.
  51
  52 #![doc(primitive = "str")]
  53 #![stable]
  54
  55 use self::RecompositionState::*;
  56 use self::DecompositionType::*;
  57
  58 use core::borrow::{BorrowFrom, ToOwned};
  59 use core::char::CharExt;
  60 use core::clone::Clone;
  61 use core::iter::AdditiveIterator;
  62 use core::iter::{range, Iterator, IteratorExt};
  63 use core::ops;
  64 use core::option::Option::{self, Some, None};
  65 use core::slice::AsSlice;
  66 use core::str as core_str;
  67 use unicode::str::{UnicodeStr, Utf16Encoder};
  68
  69 use ring_buf::RingBuf;
  70 use slice::SliceExt;
  71 use string::String;
  72 use unicode;
  73 use vec::Vec;
  74 use slice::SliceConcatExt;
  75
  76 pub use core::str::{FromStr, Utf8Error, Str};
  77 pub use core::str::{Lines, LinesAny, MatchIndices, SplitStr, CharRange};
  78 pub use core::str::{Split, SplitTerminator};
  79 pub use core::str::{SplitN, RSplitN};
  80 pub use core::str::{from_utf8, CharEq, Chars, CharIndices, Bytes};
  81 pub use core::str::{from_utf8_unchecked, from_c_str};
  82 pub use unicode::str::{Words, Graphemes, GraphemeIndices};
  83
  84 /*
  85 Section: Creating a string
  86 */
  87
  88 impl<S: Str> SliceConcatExt<str, String> for [S] {
  89     fn concat(&self) -> String {
  90         let s = self.as_slice();
  91
  92         if s.is_empty() {
  93             return String::new();
  94         }
  95
  96         // `len` calculation may overflow but push_str will check boundaries
  97         let len = s.iter().map(|s| s.as_slice().len()).sum();
  98         let mut result = String::with_capacity(len);
  99
 100         for s in s.iter() {
 101             result.push_str(s.as_slice())
 102         }
 103
 104         result
 105     }
 106
 107     fn connect(&self, sep: &str) -> String {
 108         let s = self.as_slice();
 109
 110         if s.is_empty() {
 111             return String::new();
 112         }
 113
 114         // concat is faster
 115         if sep.is_empty() {
 116             return s.concat();
 117         }
 118
 119         // this is wrong without the guarantee that `self` is non-empty
 120         // `len` calculation may overflow but push_str but will check boundaries
 121         let len = sep.len() * (s.len() - 1)
 122             + s.iter().map(|s| s.as_slice().len()).sum();
 123         let mut result = String::with_capacity(len);
 124         let mut first = true;
 125
 126         for s in s.iter() {
 127             if first {
 128                 first = false;
 129             } else {
 130                 result.push_str(sep);
 131             }
 132             result.push_str(s.as_slice());
 133         }
 134         result
 135     }
 136 }
 137
 138 /*
 139 Section: Iterators
 140 */
 141
 142 // Helper functions used for Unicode normalization
 143 fn canonical_sort(comb: &mut [(char, u8)]) {
 144     let len = comb.len();
 145     for i in range(0, len) {
 146         let mut swapped = false;
 147         for j in range(1, len-i) {
 148             let class_a = comb[j-1].1;
 149             let class_b = comb[j].1;
 150             if class_a != 0 && class_b != 0 && class_a > class_b {
 151                 comb.swap(j-1, j);
 152                 swapped = true;
 153             }
 154         }
 155         if !swapped { break; }
 156     }
 157 }
 158
 159 #[derive(Clone)]
 160 enum DecompositionType {
 161     Canonical,
 162     Compatible
 163 }
 164
 165 /// External iterator for a string's decomposition's characters.
 166 /// Use with the `std::iter` module.
 167 #[derive(Clone)]
 168 #[unstable]
 169 pub struct Decompositions<'a> {
 170     kind: DecompositionType,
 171     iter: Chars<'a>,
 172     buffer: Vec<(char, u8)>,
 173     sorted: bool
 174 }
 175
 176 #[stable]
 177 impl<'a> Iterator for Decompositions<'a> {
 178     type Item = char;
 179
 180     #[inline]
 181     fn next(&mut self) -> Option<char> {
 182         match self.buffer.first() {
 183             Some(&(c, 0)) => {
 184                 self.sorted = false;
 185                 self.buffer.remove(0);
 186                 return Some(c);
 187             }
 188             Some(&(c, _)) if self.sorted => {
 189                 self.buffer.remove(0);
 190                 return Some(c);
 191             }
 192             _ => self.sorted = false
 193         }
 194
 195         if !self.sorted {
 196             for ch in self.iter {
 197                 let buffer = &mut self.buffer;
 198                 let sorted = &mut self.sorted;
 199                 {
 200                     let callback = |&mut: d| {
 201                         let class =
 202                             unicode::char::canonical_combining_class(d);
 203                         if class == 0 && !*sorted {
 204                             canonical_sort(buffer.as_mut_slice());
 205                             *sorted = true;
 206                         }
 207                         buffer.push((d, class));
 208                     };
 209                     match self.kind {
 210                         Canonical => {
 211                             unicode::char::decompose_canonical(ch, callback)
 212                         }
 213                         Compatible => {
 214                             unicode::char::decompose_compatible(ch, callback)
 215                         }
 216                     }
 217                 }
 218                 if *sorted {
 219                     break
 220                 }
 221             }
 222         }
 223
 224         if !self.sorted {
 225             canonical_sort(self.buffer.as_mut_slice());
 226             self.sorted = true;
 227         }
 228
 229         if self.buffer.is_empty() {
 230             None
 231         } else {
 232             match self.buffer.remove(0) {
 233                 (c, 0) => {
 234                     self.sorted = false;
 235                     Some(c)
 236                 }
 237                 (c, _) => Some(c),
 238             }
 239         }
 240     }
 241
 242     fn size_hint(&self) -> (uint, Option<uint>) {
 243         let (lower, _) = self.iter.size_hint();
 244         (lower, None)
 245     }
 246 }
 247
 248 #[derive(Clone)]
 249 enum RecompositionState {
 250     Composing,
 251     Purging,
 252     Finished
 253 }
 254
 255 /// External iterator for a string's recomposition's characters.
 256 /// Use with the `std::iter` module.
 257 #[derive(Clone)]
 258 #[unstable]
 259 pub struct Recompositions<'a> {
 260     iter: Decompositions<'a>,
 261     state: RecompositionState,
 262     buffer: RingBuf<char>,
 263     composee: Option<char>,
 264     last_ccc: Option<u8>
 265 }
 266
 267 #[stable]
 268 impl<'a> Iterator for Recompositions<'a> {
 269     type Item = char;
 270
 271     #[inline]
 272     fn next(&mut self) -> Option<char> {
 273         loop {
 274             match self.state {
 275                 Composing => {
 276                     for ch in self.iter {
 277                         let ch_class = unicode::char::canonical_combining_class(ch);
 278                         if self.composee.is_none() {
 279                             if ch_class != 0 {
 280                                 return Some(ch);
 281                             }
 282                             self.composee = Some(ch);
 283                             continue;
 284                         }
 285                         let k = self.composee.clone().unwrap();
 286
 287                         match self.last_ccc {
 288                             None => {
 289                                 match unicode::char::compose(k, ch) {
 290                                     Some(r) => {
 291                                         self.composee = Some(r);
 292                                         continue;
 293                                     }
 294                                     None => {
 295                                         if ch_class == 0 {
 296                                             self.composee = Some(ch);
 297                                             return Some(k);
 298                                         }
 299                                         self.buffer.push_back(ch);
 300                                         self.last_ccc = Some(ch_class);
 301                                     }
 302                                 }
 303                             }
 304                             Some(l_class) => {
 305                                 if l_class >= ch_class {
 306                                     // `ch` is blocked from `composee`
 307                                     if ch_class == 0 {
 308                                         self.composee = Some(ch);
 309                                         self.last_ccc = None;
 310                                         self.state = Purging;
 311                                         return Some(k);
 312                                     }
 313                                     self.buffer.push_back(ch);
 314                                     self.last_ccc = Some(ch_class);
 315                                     continue;
 316                                 }
 317                                 match unicode::char::compose(k, ch) {
 318                                     Some(r) => {
 319                                         self.composee = Some(r);
 320                                         continue;
 321                                     }
 322                                     None => {
 323                                         self.buffer.push_back(ch);
 324                                         self.last_ccc = Some(ch_class);
 325                                     }
 326                                 }
 327                             }
 328                         }
 329                     }
 330                     self.state = Finished;
 331                     if self.composee.is_some() {
 332                         return self.composee.take();
 333                     }
 334                 }
 335                 Purging => {
 336                     match self.buffer.pop_front() {
 337                         None => self.state = Composing,
 338                         s => return s
 339                     }
 340                 }
 341                 Finished => {
 342                     match self.buffer.pop_front() {
 343                         None => return self.composee.take(),
 344                         s => return s
 345                     }
 346                 }
 347             }
 348         }
 349     }
 350 }
 351
 352 /// External iterator for a string's UTF16 codeunits.
 353 /// Use with the `std::iter` module.
 354 #[derive(Clone)]
 355 #[unstable]
 356 pub struct Utf16Units<'a> {
 357     encoder: Utf16Encoder<Chars<'a>>
 358 }
 359
 360 #[stable]
 361 impl<'a> Iterator for Utf16Units<'a> {
 362     type Item = u16;
 363
 364     #[inline]
 365     fn next(&mut self) -> Option<u16> { self.encoder.next() }
 366
 367     #[inline]
 368     fn size_hint(&self) -> (uint, Option<uint>) { self.encoder.size_hint() }
 369 }
 370
 371 /*
 372 Section: Misc
 373 */
 374
 375 // Return the initial codepoint accumulator for the first byte.
 376 // The first byte is special, only want bottom 5 bits for width 2, 4 bits
 377 // for width 3, and 3 bits for width 4
 378 macro_rules! utf8_first_byte {
 379     ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as u32)
 380 }
 381
 382 // return the value of $ch updated with continuation byte $byte
 383 macro_rules! utf8_acc_cont_byte {
 384     ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as u32)
 385 }
 386
 387 #[unstable = "trait is unstable"]
 388 impl BorrowFrom<String> for str {
 389     fn borrow_from(owned: &String) -> &str { owned[] }
 390 }
 391
 392 #[unstable = "trait is unstable"]
 393 impl ToOwned<String> for str {
 394     fn to_owned(&self) -> String {
 395         unsafe {
 396             String::from_utf8_unchecked(self.as_bytes().to_owned())
 397         }
 398     }
 399 }
 400
 401 /*
 402 Section: CowString
 403 */
 404
 405 /*
 406 Section: Trait implementations
 407 */
 408
 409 /// Any string that can be represented as a slice.
 410 #[stable]
 411 pub trait StrExt: ops::Slice<uint, str> {
 412     /// Escapes each char in `s` with `char::escape_default`.
 413     #[unstable = "return type may change to be an iterator"]
 414     fn escape_default(&self) -> String {
 415         self.chars().flat_map(|c| c.escape_default()).collect()
 416     }
 417
 418     /// Escapes each char in `s` with `char::escape_unicode`.
 419     #[unstable = "return type may change to be an iterator"]
 420     fn escape_unicode(&self) -> String {
 421         self.chars().flat_map(|c| c.escape_unicode()).collect()
 422     }
 423
 424     /// Replaces all occurrences of one string with another.
 425     ///
 426     /// # Arguments
 427     ///
 428     /// * `from` - The string to replace
 429     /// * `to` - The replacement string
 430     ///
 431     /// # Return value
 432     ///
 433     /// The original string with all occurrences of `from` replaced with `to`.
 434     ///
 435     /// # Examples
 436     ///
 437     /// ```rust
 438     /// let s = "Do you know the muffin man,
 439     /// The muffin man, the muffin man, ...".to_string();
 440     ///
 441     /// assert_eq!(s.replace("muffin man", "little lamb"),
 442     ///            "Do you know the little lamb,
 443     /// The little lamb, the little lamb, ...".to_string());
 444     ///
 445     /// // not found, so no change.
 446     /// assert_eq!(s.replace("cookie monster", "little lamb"), s);
 447     /// ```
 448     #[stable]
 449     fn replace(&self, from: &str, to: &str) -> String {
 450         let mut result = String::new();
 451         let mut last_end = 0;
 452         for (start, end) in self.match_indices(from) {
 453             result.push_str(unsafe { self.slice_unchecked(last_end, start) });
 454             result.push_str(to);
 455             last_end = end;
 456         }
 457         result.push_str(unsafe { self.slice_unchecked(last_end, self.len()) });
 458         result
 459     }
 460
 461     /// Returns an iterator over the string in Unicode Normalization Form D
 462     /// (canonical decomposition).
 463     #[inline]
 464     #[unstable = "this functionality may be moved to libunicode"]
 465     fn nfd_chars<'a>(&'a self) -> Decompositions<'a> {
 466         Decompositions {
 467             iter: self[].chars(),
 468             buffer: Vec::new(),
 469             sorted: false,
 470             kind: Canonical
 471         }
 472     }
 473
 474     /// Returns an iterator over the string in Unicode Normalization Form KD
 475     /// (compatibility decomposition).
 476     #[inline]
 477     #[unstable = "this functionality may be moved to libunicode"]
 478     fn nfkd_chars<'a>(&'a self) -> Decompositions<'a> {
 479         Decompositions {
 480             iter: self[].chars(),
 481             buffer: Vec::new(),
 482             sorted: false,
 483             kind: Compatible
 484         }
 485     }
 486
 487     /// An Iterator over the string in Unicode Normalization Form C
 488     /// (canonical decomposition followed by canonical composition).
 489     #[inline]
 490     #[unstable = "this functionality may be moved to libunicode"]
 491     fn nfc_chars<'a>(&'a self) -> Recompositions<'a> {
 492         Recompositions {
 493             iter: self.nfd_chars(),
 494             state: Composing,
 495             buffer: RingBuf::new(),
 496             composee: None,
 497             last_ccc: None
 498         }
 499     }
 500
 501     /// An Iterator over the string in Unicode Normalization Form KC
 502     /// (compatibility decomposition followed by canonical composition).
 503     #[inline]
 504     #[unstable = "this functionality may be moved to libunicode"]
 505     fn nfkc_chars<'a>(&'a self) -> Recompositions<'a> {
 506         Recompositions {
 507             iter: self.nfkd_chars(),
 508             state: Composing,
 509             buffer: RingBuf::new(),
 510             composee: None,
 511             last_ccc: None
 512         }
 513     }
 514
 515     /// Returns true if a string contains a string pattern.
 516     ///
 517     /// # Arguments
 518     ///
 519     /// - pat - The string pattern to look for
 520     ///
 521     /// # Example
 522     ///
 523     /// ```rust
 524     /// assert!("bananas".contains("nana"));
 525     /// ```
 526     #[stable]
 527     fn contains(&self, pat: &str) -> bool {
 528         core_str::StrExt::contains(self[], pat)
 529     }
 530
 531     /// Returns true if a string contains a char pattern.
 532     ///
 533     /// # Arguments
 534     ///
 535     /// - pat - The char pattern to look for
 536     ///
 537     /// # Example
 538     ///
 539     /// ```rust
 540     /// assert!("hello".contains_char('e'));
 541     /// ```
 542     #[unstable = "might get removed in favour of a more generic contains()"]
 543     fn contains_char<P: CharEq>(&self, pat: P) -> bool {
 544         core_str::StrExt::contains_char(self[], pat)
 545     }
 546
 547     /// An iterator over the characters of `self`. Note, this iterates
 548     /// over Unicode code-points, not Unicode graphemes.
 549     ///
 550     /// # Example
 551     ///
 552     /// ```rust
 553     /// let v: Vec<char> = "abc åäö".chars().collect();
 554     /// assert_eq!(v, vec!['a', 'b', 'c', ' ', 'å', 'ä', 'ö']);
 555     /// ```
 556     #[stable]
 557     fn chars(&self) -> Chars {
 558         core_str::StrExt::chars(self[])
 559     }
 560
 561     /// An iterator over the bytes of `self`
 562     ///
 563     /// # Example
 564     ///
 565     /// ```rust
 566     /// let v: Vec<u8> = "bors".bytes().collect();
 567     /// assert_eq!(v, b"bors".to_vec());
 568     /// ```
 569     #[stable]
 570     fn bytes(&self) -> Bytes {
 571         core_str::StrExt::bytes(self[])
 572     }
 573
 574     /// An iterator over the characters of `self` and their byte offsets.
 575     #[stable]
 576     fn char_indices(&self) -> CharIndices {
 577         core_str::StrExt::char_indices(self[])
 578     }
 579
 580     /// An iterator over substrings of `self`, separated by characters
 581     /// matched by the pattern `pat`.
 582     ///
 583     /// # Example
 584     ///
 585     /// ```rust
 586     /// let v: Vec<&str> = "Mary had a little lamb".split(' ').collect();
 587     /// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]);
 588     ///
 589     /// let v: Vec<&str> = "abc1def2ghi".split(|&: c: char| c.is_numeric()).collect();
 590     /// assert_eq!(v, vec!["abc", "def", "ghi"]);
 591     ///
 592     /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').collect();
 593     /// assert_eq!(v, vec!["lion", "", "tiger", "leopard"]);
 594     ///
 595     /// let v: Vec<&str> = "".split('X').collect();
 596     /// assert_eq!(v, vec![""]);
 597     /// ```
 598     #[stable]
 599     fn split<P: CharEq>(&self, pat: P) -> Split<P> {
 600         core_str::StrExt::split(self[], pat)
 601     }
 602
 603     /// An iterator over substrings of `self`, separated by characters
 604     /// matched by the pattern `pat`, restricted to splitting at most `count`
 605     /// times.
 606     ///
 607     /// # Example
 608     ///
 609     /// ```rust
 610     /// let v: Vec<&str> = "Mary had a little lambda".splitn(2, ' ').collect();
 611     /// assert_eq!(v, vec!["Mary", "had", "a little lambda"]);
 612     ///
 613     /// let v: Vec<&str> = "abc1def2ghi".splitn(1, |&: c: char| c.is_numeric()).collect();
 614     /// assert_eq!(v, vec!["abc", "def2ghi"]);
 615     ///
 616     /// let v: Vec<&str> = "lionXXtigerXleopard".splitn(2, 'X').collect();
 617     /// assert_eq!(v, vec!["lion", "", "tigerXleopard"]);
 618     ///
 619     /// let v: Vec<&str> = "abcXdef".splitn(0, 'X').collect();
 620     /// assert_eq!(v, vec!["abcXdef"]);
 621     ///
 622     /// let v: Vec<&str> = "".splitn(1, 'X').collect();
 623     /// assert_eq!(v, vec![""]);
 624     /// ```
 625     #[stable]
 626     fn splitn<P: CharEq>(&self, count: uint, pat: P) -> SplitN<P> {
 627         core_str::StrExt::splitn(self[], count, pat)
 628     }
 629
 630     /// An iterator over substrings of `self`, separated by characters
 631     /// matched by the pattern `pat`.
 632     ///
 633     /// Equivalent to `split`, except that the trailing substring
 634     /// is skipped if empty (terminator semantics).
 635     ///
 636     /// # Example
 637     ///
 638     /// ```rust
 639     /// let v: Vec<&str> = "A.B.".split_terminator('.').collect();
 640     /// assert_eq!(v, vec!["A", "B"]);
 641     ///
 642     /// let v: Vec<&str> = "A..B..".split_terminator('.').collect();
 643     /// assert_eq!(v, vec!["A", "", "B", ""]);
 644     ///
 645     /// let v: Vec<&str> = "Mary had a little lamb".split(' ').rev().collect();
 646     /// assert_eq!(v, vec!["lamb", "little", "a", "had", "Mary"]);
 647     ///
 648     /// let v: Vec<&str> = "abc1def2ghi".split(|&: c: char| c.is_numeric()).rev().collect();
 649     /// assert_eq!(v, vec!["ghi", "def", "abc"]);
 650     ///
 651     /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').rev().collect();
 652     /// assert_eq!(v, vec!["leopard", "tiger", "", "lion"]);
 653     /// ```
 654     #[unstable = "might get removed"]
 655     fn split_terminator<P: CharEq>(&self, pat: P) -> SplitTerminator<P> {
 656         core_str::StrExt::split_terminator(self[], pat)
 657     }
 658
 659     /// An iterator over substrings of `self`, separated by characters
 660     /// matched by the pattern `pat`, starting from the end of the string.
 661     /// Restricted to splitting at most `count` times.
 662     ///
 663     /// # Example
 664     ///
 665     /// ```rust
 666     /// let v: Vec<&str> = "Mary had a little lamb".rsplitn(2, ' ').collect();
 667     /// assert_eq!(v, vec!["lamb", "little", "Mary had a"]);
 668     ///
 669     /// let v: Vec<&str> = "abc1def2ghi".rsplitn(1, |&: c: char| c.is_numeric()).collect();
 670     /// assert_eq!(v, vec!["ghi", "abc1def"]);
 671     ///
 672     /// let v: Vec<&str> = "lionXXtigerXleopard".rsplitn(2, 'X').collect();
 673     /// assert_eq!(v, vec!["leopard", "tiger", "lionX"]);
 674     /// ```
 675     #[stable]
 676     fn rsplitn<P: CharEq>(&self, count: uint, pat: P) -> RSplitN<P> {
 677         core_str::StrExt::rsplitn(self[], count, pat)
 678     }
 679
 680     /// An iterator over the start and end indices of the disjoint
 681     /// matches of the pattern `pat` within `self`.
 682     ///
 683     /// That is, each returned value `(start, end)` satisfies
 684     /// `self.slice(start, end) == sep`. For matches of `sep` within
 685     /// `self` that overlap, only the indices corresponding to the
 686     /// first match are returned.
 687     ///
 688     /// # Example
 689     ///
 690     /// ```rust
 691     /// let v: Vec<(uint, uint)> = "abcXXXabcYYYabc".match_indices("abc").collect();
 692     /// assert_eq!(v, vec![(0,3), (6,9), (12,15)]);
 693     ///
 694     /// let v: Vec<(uint, uint)> = "1abcabc2".match_indices("abc").collect();
 695     /// assert_eq!(v, vec![(1,4), (4,7)]);
 696     ///
 697     /// let v: Vec<(uint, uint)> = "ababa".match_indices("aba").collect();
 698     /// assert_eq!(v, vec![(0, 3)]); // only the first `aba`
 699     /// ```
 700     #[unstable = "might have its iterator type changed"]
 701     fn match_indices<'a>(&'a self, pat: &'a str) -> MatchIndices<'a> {
 702         core_str::StrExt::match_indices(self[], pat)
 703     }
 704
 705     /// An iterator over the substrings of `self` separated by the pattern `sep`.
 706     ///
 707     /// # Example
 708     ///
 709     /// ```rust
 710     /// let v: Vec<&str> = "abcXXXabcYYYabc".split_str("abc").collect();
 711     /// assert_eq!(v, vec!["", "XXX", "YYY", ""]);
 712     ///
 713     /// let v: Vec<&str> = "1abcabc2".split_str("abc").collect();
 714     /// assert_eq!(v, vec!["1", "", "2"]);
 715     /// ```
 716     #[unstable = "might get removed in the future in favor of a more generic split()"]
 717     fn split_str<'a>(&'a self, pat: &'a str) -> SplitStr<'a> {
 718         core_str::StrExt::split_str(self[], pat)
 719     }
 720
 721     /// An iterator over the lines of a string (subsequences separated
 722     /// by `\n`). This does not include the empty string after a
 723     /// trailing `\n`.
 724     ///
 725     /// # Example
 726     ///
 727     /// ```rust
 728     /// let four_lines = "foo\nbar\n\nbaz\n";
 729     /// let v: Vec<&str> = four_lines.lines().collect();
 730     /// assert_eq!(v, vec!["foo", "bar", "", "baz"]);
 731     /// ```
 732     #[stable]
 733     fn lines(&self) -> Lines {
 734         core_str::StrExt::lines(self[])
 735     }
 736
 737     /// An iterator over the lines of a string, separated by either
 738     /// `\n` or `\r\n`. As with `.lines()`, this does not include an
 739     /// empty trailing line.
 740     ///
 741     /// # Example
 742     ///
 743     /// ```rust
 744     /// let four_lines = "foo\r\nbar\n\r\nbaz\n";
 745     /// let v: Vec<&str> = four_lines.lines_any().collect();
 746     /// assert_eq!(v, vec!["foo", "bar", "", "baz"]);
 747     /// ```
 748     #[stable]
 749     fn lines_any(&self) -> LinesAny {
 750         core_str::StrExt::lines_any(self[])
 751     }
 752
 753     /// Returns a slice of the given string from the byte range
 754     /// [`begin`..`end`).
 755     ///
 756     /// This operation is `O(1)`.
 757     ///
 758     /// Panics when `begin` and `end` do not point to valid characters
 759     /// or point beyond the last character of the string.
 760     ///
 761     /// See also `slice_to` and `slice_from` for slicing prefixes and
 762     /// suffixes of strings, and `slice_chars` for slicing based on
 763     /// code point counts.
 764     ///
 765     /// # Example
 766     ///
 767     /// ```rust
 768     /// let s = "Löwe 老虎 Léopard";
 769     /// assert_eq!(s.slice(0, 1), "L");
 770     ///
 771     /// assert_eq!(s.slice(1, 9), "öwe 老");
 772     ///
 773     /// // these will panic:
 774     /// // byte 2 lies within `ö`:
 775     /// // s.slice(2, 3);
 776     ///
 777     /// // byte 8 lies within `老`
 778     /// // s.slice(1, 8);
 779     ///
 780     /// // byte 100 is outside the string
 781     /// // s.slice(3, 100);
 782     /// ```
 783     #[unstable = "use slice notation [a..b] instead"]
 784     fn slice(&self, begin: uint, end: uint) -> &str {
 785         core_str::StrExt::slice(self[], begin, end)
 786     }
 787
 788     /// Returns a slice of the string from `begin` to its end.
 789     ///
 790     /// Equivalent to `self.slice(begin, self.len())`.
 791     ///
 792     /// Panics when `begin` does not point to a valid character, or is
 793     /// out of bounds.
 794     ///
 795     /// See also `slice`, `slice_to` and `slice_chars`.
 796     #[unstable = "use slice notation [a..] instead"]
 797     fn slice_from(&self, begin: uint) -> &str {
 798         core_str::StrExt::slice_from(self[], begin)
 799     }
 800
 801     /// Returns a slice of the string from the beginning to byte
 802     /// `end`.
 803     ///
 804     /// Equivalent to `self.slice(0, end)`.
 805     ///
 806     /// Panics when `end` does not point to a valid character, or is
 807     /// out of bounds.
 808     ///
 809     /// See also `slice`, `slice_from` and `slice_chars`.
 810     #[unstable = "use slice notation [0..a] instead"]
 811     fn slice_to(&self, end: uint) -> &str {
 812         core_str::StrExt::slice_to(self[], end)
 813     }
 814
 815     /// Returns a slice of the string from the character range
 816     /// [`begin`..`end`).
 817     ///
 818     /// That is, start at the `begin`-th code point of the string and
 819     /// continue to the `end`-th code point. This does not detect or
 820     /// handle edge cases such as leaving a combining character as the
 821     /// first code point of the string.
 822     ///
 823     /// Due to the design of UTF-8, this operation is `O(end)`.
 824     /// See `slice`, `slice_to` and `slice_from` for `O(1)`
 825     /// variants that use byte indices rather than code point
 826     /// indices.
 827     ///
 828     /// Panics if `begin` > `end` or the either `begin` or `end` are
 829     /// beyond the last character of the string.
 830     ///
 831     /// # Example
 832     ///
 833     /// ```rust
 834     /// let s = "Löwe 老虎 Léopard";
 835     /// assert_eq!(s.slice_chars(0, 4), "Löwe");
 836     /// assert_eq!(s.slice_chars(5, 7), "老虎");
 837     /// ```
 838     #[unstable = "may have yet to prove its worth"]
 839     fn slice_chars(&self, begin: uint, end: uint) -> &str {
 840         core_str::StrExt::slice_chars(self[], begin, end)
 841     }
 842
 843     /// Takes a bytewise (not UTF-8) slice from a string.
 844     ///
 845     /// Returns the substring from [`begin`..`end`).
 846     ///
 847     /// Caller must check both UTF-8 character boundaries and the boundaries of
 848     /// the entire slice as well.
 849     #[stable]
 850     unsafe fn slice_unchecked(&self, begin: uint, end: uint) -> &str {
 851         core_str::StrExt::slice_unchecked(self[], begin, end)
 852     }
 853
 854     /// Returns true if the pattern `pat` is a prefix of the string.
 855     ///
 856     /// # Example
 857     ///
 858     /// ```rust
 859     /// assert!("banana".starts_with("ba"));
 860     /// ```
 861     #[stable]
 862     fn starts_with(&self, pat: &str) -> bool {
 863         core_str::StrExt::starts_with(self[], pat)
 864     }
 865
 866     /// Returns true if the pattern `pat` is a suffix of the string.
 867     ///
 868     /// # Example
 869     ///
 870     /// ```rust
 871     /// assert!("banana".ends_with("nana"));
 872     /// ```
 873     #[stable]
 874     fn ends_with(&self, pat: &str) -> bool {
 875         core_str::StrExt::ends_with(self[], pat)
 876     }
 877
 878     /// Returns a string with all pre- and suffixes that match
 879     /// the pattern `pat` repeatedly removed.
 880     ///
 881     /// # Arguments
 882     ///
 883     /// * pat - a string pattern
 884     ///
 885     /// # Example
 886     ///
 887     /// ```rust
 888     /// assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar");
 889     /// let x: &[_] = &['1', '2'];
 890     /// assert_eq!("12foo1bar12".trim_matches(x), "foo1bar");
 891     /// assert_eq!("123foo1bar123".trim_matches(|&: c: char| c.is_numeric()), "foo1bar");
 892     /// ```
 893     #[stable]
 894     fn trim_matches<P: CharEq>(&self, pat: P) -> &str {
 895         core_str::StrExt::trim_matches(self[], pat)
 896     }
 897
 898     /// Returns a string with all prefixes that match
 899     /// the pattern `pat` repeatedly removed.
 900     ///
 901     /// # Arguments
 902     ///
 903     /// * pat - a string pattern
 904     ///
 905     /// # Example
 906     ///
 907     /// ```rust
 908     /// assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11");
 909     /// let x: &[_] = &['1', '2'];
 910     /// assert_eq!("12foo1bar12".trim_left_matches(x), "foo1bar12");
 911     /// assert_eq!("123foo1bar123".trim_left_matches(|&: c: char| c.is_numeric()), "foo1bar123");
 912     /// ```
 913     #[stable]
 914     fn trim_left_matches<P: CharEq>(&self, pat: P) -> &str {
 915         core_str::StrExt::trim_left_matches(self[], pat)
 916     }
 917
 918     /// Returns a string with all suffixes that match
 919     /// the pattern `pat` repeatedly removed.
 920     ///
 921     /// # Arguments
 922     ///
 923     /// * pat - a string pattern
 924     ///
 925     /// # Example
 926     ///
 927     /// ```rust
 928     /// assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar");
 929     /// let x: &[_] = &['1', '2'];
 930     /// assert_eq!("12foo1bar12".trim_right_matches(x), "12foo1bar");
 931     /// assert_eq!("123foo1bar123".trim_right_matches(|&: c: char| c.is_numeric()), "123foo1bar");
 932     /// ```
 933     #[stable]
 934     fn trim_right_matches<P: CharEq>(&self, pat: P) -> &str {
 935         core_str::StrExt::trim_right_matches(self[], pat)
 936     }
 937
 938     /// Check that `index`-th byte lies at the start and/or end of a
 939     /// UTF-8 code point sequence.
 940     ///
 941     /// The start and end of the string (when `index == self.len()`)
 942     /// are considered to be boundaries.
 943     ///
 944     /// Panics if `index` is greater than `self.len()`.
 945     ///
 946     /// # Example
 947     ///
 948     /// ```rust
 949     /// let s = "Löwe 老虎 Léopard";
 950     /// assert!(s.is_char_boundary(0));
 951     /// // start of `老`
 952     /// assert!(s.is_char_boundary(6));
 953     /// assert!(s.is_char_boundary(s.len()));
 954     ///
 955     /// // second byte of `ö`
 956     /// assert!(!s.is_char_boundary(2));
 957     ///
 958     /// // third byte of `老`
 959     /// assert!(!s.is_char_boundary(8));
 960     /// ```
 961     #[unstable = "naming is uncertain with container conventions"]
 962     fn is_char_boundary(&self, index: uint) -> bool {
 963         core_str::StrExt::is_char_boundary(self[], index)
 964     }
 965
 966     /// Pluck a character out of a string and return the index of the next
 967     /// character.
 968     ///
 969     /// This function can be used to iterate over the Unicode characters of a
 970     /// string.
 971     ///
 972     /// # Example
 973     ///
 974     /// This example manually iterates through the characters of a
 975     /// string; this should normally be done by `.chars()` or
 976     /// `.char_indices`.
 977     ///
 978     /// ```rust
 979     /// use std::str::CharRange;
 980     ///
 981     /// let s = "中华Việt Nam";
 982     /// let mut i = 0u;
 983     /// while i < s.len() {
 984     ///     let CharRange {ch, next} = s.char_range_at(i);
 985     ///     println!("{}: {}", i, ch);
 986     ///     i = next;
 987     /// }
 988     /// ```
 989     ///
 990     /// This outputs:
 991     ///
 992     /// ```text
 993     /// 0: 中
 994     /// 3: 华
 995     /// 6: V
 996     /// 7: i
 997     /// 8: ệ
 998     /// 11: t
 999     /// 12:
1000     /// 13: N
1001     /// 14: a
1002     /// 15: m
1003     /// ```
1004     ///
1005     /// # Arguments
1006     ///
1007     /// * s - The string
1008     /// * i - The byte offset of the char to extract
1009     ///
1010     /// # Return value
1011     ///
1012     /// A record {ch: char, next: uint} containing the char value and the byte
1013     /// index of the next Unicode character.
1014     ///
1015     /// # Panics
1016     ///
1017     /// If `i` is greater than or equal to the length of the string.
1018     /// If `i` is not the index of the beginning of a valid UTF-8 character.
1019     #[unstable = "naming is uncertain with container conventions"]
1020     fn char_range_at(&self, start: uint) -> CharRange {
1021         core_str::StrExt::char_range_at(self[], start)
1022     }
1023
1024     /// Given a byte position and a str, return the previous char and its position.
1025     ///
1026     /// This function can be used to iterate over a Unicode string in reverse.
1027     ///
1028     /// Returns 0 for next index if called on start index 0.
1029     ///
1030     /// # Panics
1031     ///
1032     /// If `i` is greater than the length of the string.
1033     /// If `i` is not an index following a valid UTF-8 character.
1034     #[unstable = "naming is uncertain with container conventions"]
1035     fn char_range_at_reverse(&self, start: uint) -> CharRange {
1036         core_str::StrExt::char_range_at_reverse(self[], start)
1037     }
1038
1039     /// Plucks the character starting at the `i`th byte of a string.
1040     ///
1041     /// # Example
1042     ///
1043     /// ```rust
1044     /// let s = "abπc";
1045     /// assert_eq!(s.char_at(1), 'b');
1046     /// assert_eq!(s.char_at(2), 'π');
1047     /// assert_eq!(s.char_at(4), 'c');
1048     /// ```
1049     ///
1050     /// # Panics
1051     ///
1052     /// If `i` is greater than or equal to the length of the string.
1053     /// If `i` is not the index of the beginning of a valid UTF-8 character.
1054     #[unstable = "naming is uncertain with container conventions"]
1055     fn char_at(&self, i: uint) -> char {
1056         core_str::StrExt::char_at(self[], i)
1057     }
1058
1059     /// Plucks the character ending at the `i`th byte of a string.
1060     ///
1061     /// # Panics
1062     ///
1063     /// If `i` is greater than the length of the string.
1064     /// If `i` is not an index following a valid UTF-8 character.
1065     #[unstable = "naming is uncertain with container conventions"]
1066     fn char_at_reverse(&self, i: uint) -> char {
1067         core_str::StrExt::char_at_reverse(self[], i)
1068     }
1069
1070     /// Work with the byte buffer of a string as a byte slice.
1071     ///
1072     /// # Example
1073     ///
1074     /// ```rust
1075     /// assert_eq!("bors".as_bytes(), b"bors");
1076     /// ```
1077     #[stable]
1078     fn as_bytes(&self) -> &[u8] {
1079         core_str::StrExt::as_bytes(self[])
1080     }
1081
1082     /// Returns the byte index of the first character of `self` that
1083     /// matches the pattern `pat`.
1084     ///
1085     /// # Return value
1086     ///
1087     /// `Some` containing the byte index of the last matching character
1088     /// or `None` if there is no match
1089     ///
1090     /// # Example
1091     ///
1092     /// ```rust
1093     /// let s = "Löwe 老虎 Léopard";
1094     ///
1095     /// assert_eq!(s.find('L'), Some(0));
1096     /// assert_eq!(s.find('é'), Some(14));
1097     ///
1098     /// // the first space
1099     /// assert_eq!(s.find(|&: c: char| c.is_whitespace()), Some(5));
1100     ///
1101     /// // neither are found
1102     /// let x: &[_] = &['1', '2'];
1103     /// assert_eq!(s.find(x), None);
1104     /// ```
1105     #[stable]
1106     fn find<P: CharEq>(&self, pat: P) -> Option<uint> {
1107         core_str::StrExt::find(self[], pat)
1108     }
1109
1110     /// Returns the byte index of the last character of `self` that
1111     /// matches the pattern `pat`.
1112     ///
1113     /// # Return value
1114     ///
1115     /// `Some` containing the byte index of the last matching character
1116     /// or `None` if there is no match.
1117     ///
1118     /// # Example
1119     ///
1120     /// ```rust
1121     /// let s = "Löwe 老虎 Léopard";
1122     ///
1123     /// assert_eq!(s.rfind('L'), Some(13));
1124     /// assert_eq!(s.rfind('é'), Some(14));
1125     ///
1126     /// // the second space
1127     /// assert_eq!(s.rfind(|&: c: char| c.is_whitespace()), Some(12));
1128     ///
1129     /// // searches for an occurrence of either `1` or `2`, but neither are found
1130     /// let x: &[_] = &['1', '2'];
1131     /// assert_eq!(s.rfind(x), None);
1132     /// ```
1133     #[stable]
1134     fn rfind<P: CharEq>(&self, pat: P) -> Option<uint> {
1135         core_str::StrExt::rfind(self[], pat)
1136     }
1137
1138     /// Returns the byte index of the first matching substring
1139     ///
1140     /// # Arguments
1141     ///
1142     /// * `needle` - The string to search for
1143     ///
1144     /// # Return value
1145     ///
1146     /// `Some` containing the byte index of the first matching substring
1147     /// or `None` if there is no match.
1148     ///
1149     /// # Example
1150     ///
1151     /// ```rust
1152     /// let s = "Löwe 老虎 Léopard";
1153     ///
1154     /// assert_eq!(s.find_str("老虎 L"), Some(6));
1155     /// assert_eq!(s.find_str("muffin man"), None);
1156     /// ```
1157     #[unstable = "might get removed in favor of a more generic find in the future"]
1158     fn find_str(&self, needle: &str) -> Option<uint> {
1159         core_str::StrExt::find_str(self[], needle)
1160     }
1161
1162     /// Retrieves the first character from a string slice and returns
1163     /// it. This does not allocate a new string; instead, it returns a
1164     /// slice that point one character beyond the character that was
1165     /// shifted. If the string does not contain any characters,
1166     /// None is returned instead.
1167     ///
1168     /// # Example
1169     ///
1170     /// ```rust
1171     /// let s = "Löwe 老虎 Léopard";
1172     /// let (c, s1) = s.slice_shift_char().unwrap();
1173     /// assert_eq!(c, 'L');
1174     /// assert_eq!(s1, "öwe 老虎 Léopard");
1175     ///
1176     /// let (c, s2) = s1.slice_shift_char().unwrap();
1177     /// assert_eq!(c, 'ö');
1178     /// assert_eq!(s2, "we 老虎 Léopard");
1179     /// ```
1180     #[unstable = "awaiting conventions about shifting and slices"]
1181     fn slice_shift_char(&self) -> Option<(char, &str)> {
1182         core_str::StrExt::slice_shift_char(self[])
1183     }
1184
1185     /// Returns the byte offset of an inner slice relative to an enclosing outer slice.
1186     ///
1187     /// Panics if `inner` is not a direct slice contained within self.
1188     ///
1189     /// # Example
1190     ///
1191     /// ```rust
1192     /// let string = "a\nb\nc";
1193     /// let lines: Vec<&str> = string.lines().collect();
1194     ///
1195     /// assert!(string.subslice_offset(lines[0]) == 0); // &"a"
1196     /// assert!(string.subslice_offset(lines[1]) == 2); // &"b"
1197     /// assert!(string.subslice_offset(lines[2]) == 4); // &"c"
1198     /// ```
1199     #[unstable = "awaiting convention about comparability of arbitrary slices"]
1200     fn subslice_offset(&self, inner: &str) -> uint {
1201         core_str::StrExt::subslice_offset(self[], inner)
1202     }
1203
1204     /// Return an unsafe pointer to the strings buffer.
1205     ///
1206     /// The caller must ensure that the string outlives this pointer,
1207     /// and that it is not reallocated (e.g. by pushing to the
1208     /// string).
1209     #[stable]
1210     #[inline]
1211     fn as_ptr(&self) -> *const u8 {
1212         core_str::StrExt::as_ptr(self[])
1213     }
1214
1215     /// Return an iterator of `u16` over the string encoded as UTF-16.
1216     #[unstable = "this functionality may only be provided by libunicode"]
1217     fn utf16_units(&self) -> Utf16Units {
1218         Utf16Units { encoder: Utf16Encoder::new(self[].chars()) }
1219     }
1220
1221     /// Return the number of bytes in this string
1222     ///
1223     /// # Example
1224     ///
1225     /// ```
1226     /// assert_eq!("foo".len(), 3);
1227     /// assert_eq!("ƒoo".len(), 4);
1228     /// ```
1229     #[stable]
1230     #[inline]
1231     fn len(&self) -> uint {
1232         core_str::StrExt::len(self[])
1233     }
1234
1235     /// Returns true if this slice contains no bytes
1236     ///
1237     /// # Example
1238     ///
1239     /// ```
1240     /// assert!("".is_empty());
1241     /// ```
1242     #[inline]
1243     #[stable]
1244     fn is_empty(&self) -> bool {
1245         core_str::StrExt::is_empty(self[])
1246     }
1247
1248     /// Parse this string into the specified type.
1249     ///
1250     /// # Example
1251     ///
1252     /// ```
1253     /// assert_eq!("4".parse::<u32>(), Some(4));
1254     /// assert_eq!("j".parse::<u32>(), None);
1255     /// ```
1256     #[inline]
1257     #[unstable = "this method was just created"]
1258     fn parse<F: FromStr>(&self) -> Option<F> {
1259         core_str::StrExt::parse(self[])
1260     }
1261
1262     /// Returns an iterator over the
1263     /// [grapheme clusters](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
1264     /// of the string.
1265     ///
1266     /// If `is_extended` is true, the iterator is over the *extended grapheme clusters*;
1267     /// otherwise, the iterator is over the *legacy grapheme clusters*.
1268     /// [UAX#29](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
1269     /// recommends extended grapheme cluster boundaries for general processing.
1270     ///
1271     /// # Example
1272     ///
1273     /// ```rust
1274     /// let gr1 = "a\u{310}e\u{301}o\u{308}\u{332}".graphemes(true).collect::<Vec<&str>>();
1275     /// let b: &[_] = &["a\u{310}", "e\u{301}", "o\u{308}\u{332}"];
1276     /// assert_eq!(gr1.as_slice(), b);
1277     /// let gr2 = "a\r\nb🇷🇺🇸🇹".graphemes(true).collect::<Vec<&str>>();
1278     /// let b: &[_] = &["a", "\r\n", "b", "🇷🇺🇸🇹"];
1279     /// assert_eq!(gr2.as_slice(), b);
1280     /// ```
1281     #[unstable = "this functionality may only be provided by libunicode"]
1282     fn graphemes(&self, is_extended: bool) -> Graphemes {
1283         UnicodeStr::graphemes(self[], is_extended)
1284     }
1285
1286     /// Returns an iterator over the grapheme clusters of self and their byte offsets.
1287     /// See `graphemes()` method for more information.
1288     ///
1289     /// # Example
1290     ///
1291     /// ```rust
1292     /// let gr_inds = "a̐éö̲\r\n".grapheme_indices(true).collect::<Vec<(uint, &str)>>();
1293     /// let b: &[_] = &[(0u, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
1294     /// assert_eq!(gr_inds.as_slice(), b);
1295     /// ```
1296     #[unstable = "this functionality may only be provided by libunicode"]
1297     fn grapheme_indices(&self, is_extended: bool) -> GraphemeIndices {
1298         UnicodeStr::grapheme_indices(self[], is_extended)
1299     }
1300
1301     /// An iterator over the words of a string (subsequences separated
1302     /// by any sequence of whitespace). Sequences of whitespace are
1303     /// collapsed, so empty "words" are not included.
1304     ///
1305     /// # Example
1306     ///
1307     /// ```rust
1308     /// let some_words = " Mary   had\ta little  \n\t lamb";
1309     /// let v: Vec<&str> = some_words.words().collect();
1310     /// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]);
1311     /// ```
1312     #[stable]
1313     fn words(&self) -> Words {
1314         UnicodeStr::words(self[])
1315     }
1316
1317     /// Returns a string's displayed width in columns, treating control
1318     /// characters as zero-width.
1319     ///
1320     /// `is_cjk` determines behavior for characters in the Ambiguous category:
1321     /// if `is_cjk` is `true`, these are 2 columns wide; otherwise, they are 1.
1322     /// In CJK locales, `is_cjk` should be `true`, else it should be `false`.
1323     /// [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
1324     /// recommends that these characters be treated as 1 column (i.e.,
1325     /// `is_cjk` = `false`) if the locale is unknown.
1326     #[unstable = "this functionality may only be provided by libunicode"]
1327     fn width(&self, is_cjk: bool) -> uint {
1328         UnicodeStr::width(self[], is_cjk)
1329     }
1330
1331     /// Returns a string with leading and trailing whitespace removed.
1332     #[stable]
1333     fn trim(&self) -> &str {
1334         UnicodeStr::trim(self[])
1335     }
1336
1337     /// Returns a string with leading whitespace removed.
1338     #[stable]
1339     fn trim_left(&self) -> &str {
1340         UnicodeStr::trim_left(self[])
1341     }
1342
1343     /// Returns a string with trailing whitespace removed.
1344     #[stable]
1345     fn trim_right(&self) -> &str {
1346         UnicodeStr::trim_right(self[])
1347     }
1348 }
1349
1350 #[stable]
1351 impl StrExt for str {}
1352
1353 #[cfg(test)]
1354 mod tests {
1355     use prelude::*;
1356
1357     use core::iter::AdditiveIterator;
1358     use super::from_utf8;
1359     use super::Utf8Error;
1360
1361     #[test]
1362     fn test_le() {
1363         assert!("" <= "");
1364         assert!("" <= "foo");
1365         assert!("foo" <= "foo");
1366         assert!("foo" != "bar");
1367     }
1368
1369     #[test]
1370     fn test_len() {
1371         assert_eq!("".len(), 0u);
1372         assert_eq!("hello world".len(), 11u);
1373         assert_eq!("\x63".len(), 1u);
1374         assert_eq!("\u{a2}".len(), 2u);
1375         assert_eq!("\u{3c0}".len(), 2u);
1376         assert_eq!("\u{2620}".len(), 3u);
1377         assert_eq!("\u{1d11e}".len(), 4u);
1378
1379         assert_eq!("".chars().count(), 0u);
1380         assert_eq!("hello world".chars().count(), 11u);
1381         assert_eq!("\x63".chars().count(), 1u);
1382         assert_eq!("\u{a2}".chars().count(), 1u);
1383         assert_eq!("\u{3c0}".chars().count(), 1u);
1384         assert_eq!("\u{2620}".chars().count(), 1u);
1385         assert_eq!("\u{1d11e}".chars().count(), 1u);
1386         assert_eq!("ประเทศไทย中华Việt Nam".chars().count(), 19u);
1387
1388         assert_eq!("ｈｅｌｌｏ".width(false), 10u);
1389         assert_eq!("ｈｅｌｌｏ".width(true), 10u);
1390         assert_eq!("\0\0\0\0\0".width(false), 0u);
1391         assert_eq!("\0\0\0\0\0".width(true), 0u);
1392         assert_eq!("".width(false), 0u);
1393         assert_eq!("".width(true), 0u);
1394         assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width(false), 4u);
1395         assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width(true), 8u);
1396     }
1397
1398     #[test]
1399     fn test_find() {
1400         assert_eq!("hello".find('l'), Some(2u));
1401         assert_eq!("hello".find(|&: c:char| c == 'o'), Some(4u));
1402         assert!("hello".find('x').is_none());
1403         assert!("hello".find(|&: c:char| c == 'x').is_none());
1404         assert_eq!("ประเทศไทย中华Việt Nam".find('华'), Some(30u));
1405         assert_eq!("ประเทศไทย中华Việt Nam".find(|&: c: char| c == '华'), Some(30u));
1406     }
1407
1408     #[test]
1409     fn test_rfind() {
1410         assert_eq!("hello".rfind('l'), Some(3u));
1411         assert_eq!("hello".rfind(|&: c:char| c == 'o'), Some(4u));
1412         assert!("hello".rfind('x').is_none());
1413         assert!("hello".rfind(|&: c:char| c == 'x').is_none());
1414         assert_eq!("ประเทศไทย中华Việt Nam".rfind('华'), Some(30u));
1415         assert_eq!("ประเทศไทย中华Việt Nam".rfind(|&: c: char| c == '华'), Some(30u));
1416     }
1417
1418     #[test]
1419     fn test_collect() {
1420         let empty = String::from_str("");
1421         let s: String = empty.chars().collect();
1422         assert_eq!(empty, s);
1423         let data = String::from_str("ประเทศไทย中");
1424         let s: String = data.chars().collect();
1425         assert_eq!(data, s);
1426     }
1427
1428     #[test]
1429     fn test_into_bytes() {
1430         let data = String::from_str("asdf");
1431         let buf = data.into_bytes();
1432         assert_eq!(b"asdf", buf);
1433     }
1434
1435     #[test]
1436     fn test_find_str() {
1437         // byte positions
1438         assert_eq!("".find_str(""), Some(0u));
1439         assert!("banana".find_str("apple pie").is_none());
1440
1441         let data = "abcabc";
1442         assert_eq!(data.slice(0u, 6u).find_str("ab"), Some(0u));
1443         assert_eq!(data.slice(2u, 6u).find_str("ab"), Some(3u - 2u));
1444         assert!(data.slice(2u, 4u).find_str("ab").is_none());
1445
1446         let string = "ประเทศไทย中华Việt Nam";
1447         let mut data = String::from_str(string);
1448         data.push_str(string);
1449         assert!(data.find_str("ไท华").is_none());
1450         assert_eq!(data.slice(0u, 43u).find_str(""), Some(0u));
1451         assert_eq!(data.slice(6u, 43u).find_str(""), Some(6u - 6u));
1452
1453         assert_eq!(data.slice(0u, 43u).find_str("ประ"), Some( 0u));
1454         assert_eq!(data.slice(0u, 43u).find_str("ทศไ"), Some(12u));
1455         assert_eq!(data.slice(0u, 43u).find_str("ย中"), Some(24u));
1456         assert_eq!(data.slice(0u, 43u).find_str("iệt"), Some(34u));
1457         assert_eq!(data.slice(0u, 43u).find_str("Nam"), Some(40u));
1458
1459         assert_eq!(data.slice(43u, 86u).find_str("ประ"), Some(43u - 43u));
1460         assert_eq!(data.slice(43u, 86u).find_str("ทศไ"), Some(55u - 43u));
1461         assert_eq!(data.slice(43u, 86u).find_str("ย中"), Some(67u - 43u));
1462         assert_eq!(data.slice(43u, 86u).find_str("iệt"), Some(77u - 43u));
1463         assert_eq!(data.slice(43u, 86u).find_str("Nam"), Some(83u - 43u));
1464     }
1465
1466     #[test]
1467     fn test_slice_chars() {
1468         fn t(a: &str, b: &str, start: uint) {
1469             assert_eq!(a.slice_chars(start, start + b.chars().count()), b);
1470         }
1471         t("", "", 0);
1472         t("hello", "llo", 2);
1473         t("hello", "el", 1);
1474         t("αβλ", "β", 1);
1475         t("αβλ", "", 3);
1476         assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".slice_chars(2, 8));
1477     }
1478
1479     fn s(x: &str) -> String { x.to_string() }
1480
1481     macro_rules! test_concat {
1482         ($expected: expr, $string: expr) => {
1483             {
1484                 let s: String = $string.concat();
1485                 assert_eq!($expected, s);
1486             }
1487         }
1488     }
1489
1490     #[test]
1491     fn test_concat_for_different_types() {
1492         test_concat!("ab", vec![s("a"), s("b")]);
1493         test_concat!("ab", vec!["a", "b"]);
1494         test_concat!("ab", vec!["a", "b"].as_slice());
1495         test_concat!("ab", vec![s("a"), s("b")]);
1496     }
1497
1498     #[test]
1499     fn test_concat_for_different_lengths() {
1500         let empty: &[&str] = &[];
1501         test_concat!("", empty);
1502         test_concat!("a", ["a"]);
1503         test_concat!("ab", ["a", "b"]);
1504         test_concat!("abc", ["", "a", "bc"]);
1505     }
1506
1507     macro_rules! test_connect {
1508         ($expected: expr, $string: expr, $delim: expr) => {
1509             {
1510                 let s = $string.connect($delim);
1511                 assert_eq!($expected, s);
1512             }
1513         }
1514     }
1515
1516     #[test]
1517     fn test_connect_for_different_types() {
1518         test_connect!("a-b", ["a", "b"], "-");
1519         let hyphen = "-".to_string();
1520         test_connect!("a-b", [s("a"), s("b")], hyphen.as_slice());
1521         test_connect!("a-b", vec!["a", "b"], hyphen.as_slice());
1522         test_connect!("a-b", vec!["a", "b"].as_slice(), "-");
1523         test_connect!("a-b", vec![s("a"), s("b")], "-");
1524     }
1525
1526     #[test]
1527     fn test_connect_for_different_lengths() {
1528         let empty: &[&str] = &[];
1529         test_connect!("", empty, "-");
1530         test_connect!("a", ["a"], "-");
1531         test_connect!("a-b", ["a", "b"], "-");
1532         test_connect!("-a-bc", ["", "a", "bc"], "-");
1533     }
1534
1535     #[test]
1536     fn test_unsafe_slice() {
1537         assert_eq!("ab", unsafe {"abc".slice_unchecked(0, 2)});
1538         assert_eq!("bc", unsafe {"abc".slice_unchecked(1, 3)});
1539         assert_eq!("", unsafe {"abc".slice_unchecked(1, 1)});
1540         fn a_million_letter_a() -> String {
1541             let mut i = 0u;
1542             let mut rs = String::new();
1543             while i < 100000 {
1544                 rs.push_str("aaaaaaaaaa");
1545                 i += 1;
1546             }
1547             rs
1548         }
1549         fn half_a_million_letter_a() -> String {
1550             let mut i = 0u;
1551             let mut rs = String::new();
1552             while i < 100000 {
1553                 rs.push_str("aaaaa");
1554                 i += 1;
1555             }
1556             rs
1557         }
1558         let letters = a_million_letter_a();
1559         assert!(half_a_million_letter_a() ==
1560             unsafe {String::from_str(letters.slice_unchecked(
1561                                      0u,
1562                                      500000))});
1563     }
1564
1565     #[test]
1566     fn test_starts_with() {
1567         assert!(("".starts_with("")));
1568         assert!(("abc".starts_with("")));
1569         assert!(("abc".starts_with("a")));
1570         assert!((!"a".starts_with("abc")));
1571         assert!((!"".starts_with("abc")));
1572         assert!((!"ödd".starts_with("-")));
1573         assert!(("ödd".starts_with("öd")));
1574     }
1575
1576     #[test]
1577     fn test_ends_with() {
1578         assert!(("".ends_with("")));
1579         assert!(("abc".ends_with("")));
1580         assert!(("abc".ends_with("c")));
1581         assert!((!"a".ends_with("abc")));
1582         assert!((!"".ends_with("abc")));
1583         assert!((!"ddö".ends_with("-")));
1584         assert!(("ddö".ends_with("dö")));
1585     }
1586
1587     #[test]
1588     fn test_is_empty() {
1589         assert!("".is_empty());
1590         assert!(!"a".is_empty());
1591     }
1592
1593     #[test]
1594     fn test_replace() {
1595         let a = "a";
1596         assert_eq!("".replace(a, "b"), String::from_str(""));
1597         assert_eq!("a".replace(a, "b"), String::from_str("b"));
1598         assert_eq!("ab".replace(a, "b"), String::from_str("bb"));
1599         let test = "test";
1600         assert!(" test test ".replace(test, "toast") ==
1601             String::from_str(" toast toast "));
1602         assert_eq!(" test test ".replace(test, ""), String::from_str("   "));
1603     }
1604
1605     #[test]
1606     fn test_replace_2a() {
1607         let data = "ประเทศไทย中华";
1608         let repl = "دولة الكويت";
1609
1610         let a = "ประเ";
1611         let a2 = "دولة الكويتทศไทย中华";
1612         assert_eq!(data.replace(a, repl), a2);
1613     }
1614
1615     #[test]
1616     fn test_replace_2b() {
1617         let data = "ประเทศไทย中华";
1618         let repl = "دولة الكويت";
1619
1620         let b = "ะเ";
1621         let b2 = "ปรدولة الكويتทศไทย中华";
1622         assert_eq!(data.replace(b, repl), b2);
1623     }
1624
1625     #[test]
1626     fn test_replace_2c() {
1627         let data = "ประเทศไทย中华";
1628         let repl = "دولة الكويت";
1629
1630         let c = "中华";
1631         let c2 = "ประเทศไทยدولة الكويت";
1632         assert_eq!(data.replace(c, repl), c2);
1633     }
1634
1635     #[test]
1636     fn test_replace_2d() {
1637         let data = "ประเทศไทย中华";
1638         let repl = "دولة الكويت";
1639
1640         let d = "ไท华";
1641         assert_eq!(data.replace(d, repl), data);
1642     }
1643
1644     #[test]
1645     fn test_slice() {
1646         assert_eq!("ab", "abc".slice(0, 2));
1647         assert_eq!("bc", "abc".slice(1, 3));
1648         assert_eq!("", "abc".slice(1, 1));
1649         assert_eq!("\u{65e5}", "\u{65e5}\u{672c}".slice(0, 3));
1650
1651         let data = "ประเทศไทย中华";
1652         assert_eq!("ป", data.slice(0, 3));
1653         assert_eq!("ร", data.slice(3, 6));
1654         assert_eq!("", data.slice(3, 3));
1655         assert_eq!("华", data.slice(30, 33));
1656
1657         fn a_million_letter_x() -> String {
1658             let mut i = 0u;
1659             let mut rs = String::new();
1660             while i < 100000 {
1661                 rs.push_str("华华华华华华华华华华");
1662                 i += 1;
1663             }
1664             rs
1665         }
1666         fn half_a_million_letter_x() -> String {
1667             let mut i = 0u;
1668             let mut rs = String::new();
1669             while i < 100000 {
1670                 rs.push_str("华华华华华");
1671                 i += 1;
1672             }
1673             rs
1674         }
1675         let letters = a_million_letter_x();
1676         assert!(half_a_million_letter_x() ==
1677             String::from_str(letters.slice(0u, 3u * 500000u)));
1678     }
1679
1680     #[test]
1681     fn test_slice_2() {
1682         let ss = "中华Việt Nam";
1683
1684         assert_eq!("华", ss.slice(3u, 6u));
1685         assert_eq!("Việt Nam", ss.slice(6u, 16u));
1686
1687         assert_eq!("ab", "abc".slice(0u, 2u));
1688         assert_eq!("bc", "abc".slice(1u, 3u));
1689         assert_eq!("", "abc".slice(1u, 1u));
1690
1691         assert_eq!("中", ss.slice(0u, 3u));
1692         assert_eq!("华V", ss.slice(3u, 7u));
1693         assert_eq!("", ss.slice(3u, 3u));
1694         /*0: 中
1695           3: 华
1696           6: V
1697           7: i
1698           8: ệ
1699          11: t
1700          12:
1701          13: N
1702          14: a
1703          15: m */
1704     }
1705
1706     #[test]
1707     #[should_fail]
1708     fn test_slice_fail() {
1709         "中华Việt Nam".slice(0u, 2u);
1710     }
1711
1712     #[test]
1713     fn test_slice_from() {
1714         assert_eq!("abcd".slice_from(0), "abcd");
1715         assert_eq!("abcd".slice_from(2), "cd");
1716         assert_eq!("abcd".slice_from(4), "");
1717     }
1718     #[test]
1719     fn test_slice_to() {
1720         assert_eq!("abcd".slice_to(0), "");
1721         assert_eq!("abcd".slice_to(2), "ab");
1722         assert_eq!("abcd".slice_to(4), "abcd");
1723     }
1724
1725     #[test]
1726     fn test_trim_left_matches() {
1727         let v: &[char] = &[];
1728         assert_eq!(" *** foo *** ".trim_left_matches(v), " *** foo *** ");
1729         let chars: &[char] = &['*', ' '];
1730         assert_eq!(" *** foo *** ".trim_left_matches(chars), "foo *** ");
1731         assert_eq!(" ***  *** ".trim_left_matches(chars), "");
1732         assert_eq!("foo *** ".trim_left_matches(chars), "foo *** ");
1733
1734         assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11");
1735         let chars: &[char] = &['1', '2'];
1736         assert_eq!("12foo1bar12".trim_left_matches(chars), "foo1bar12");
1737         assert_eq!("123foo1bar123".trim_left_matches(|&: c: char| c.is_numeric()), "foo1bar123");
1738     }
1739
1740     #[test]
1741     fn test_trim_right_matches() {
1742         let v: &[char] = &[];
1743         assert_eq!(" *** foo *** ".trim_right_matches(v), " *** foo *** ");
1744         let chars: &[char] = &['*', ' '];
1745         assert_eq!(" *** foo *** ".trim_right_matches(chars), " *** foo");
1746         assert_eq!(" ***  *** ".trim_right_matches(chars), "");
1747         assert_eq!(" *** foo".trim_right_matches(chars), " *** foo");
1748
1749         assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar");
1750         let chars: &[char] = &['1', '2'];
1751         assert_eq!("12foo1bar12".trim_right_matches(chars), "12foo1bar");
1752         assert_eq!("123foo1bar123".trim_right_matches(|&: c: char| c.is_numeric()), "123foo1bar");
1753     }
1754
1755     #[test]
1756     fn test_trim_matches() {
1757         let v: &[char] = &[];
1758         assert_eq!(" *** foo *** ".trim_matches(v), " *** foo *** ");
1759         let chars: &[char] = &['*', ' '];
1760         assert_eq!(" *** foo *** ".trim_matches(chars), "foo");
1761         assert_eq!(" ***  *** ".trim_matches(chars), "");
1762         assert_eq!("foo".trim_matches(chars), "foo");
1763
1764         assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar");
1765         let chars: &[char] = &['1', '2'];
1766         assert_eq!("12foo1bar12".trim_matches(chars), "foo1bar");
1767         assert_eq!("123foo1bar123".trim_matches(|&: c: char| c.is_numeric()), "foo1bar");
1768     }
1769
1770     #[test]
1771     fn test_trim_left() {
1772         assert_eq!("".trim_left(), "");
1773         assert_eq!("a".trim_left(), "a");
1774         assert_eq!("    ".trim_left(), "");
1775         assert_eq!("     blah".trim_left(), "blah");
1776         assert_eq!("   \u{3000}  wut".trim_left(), "wut");
1777         assert_eq!("hey ".trim_left(), "hey ");
1778     }
1779
1780     #[test]
1781     fn test_trim_right() {
1782         assert_eq!("".trim_right(), "");
1783         assert_eq!("a".trim_right(), "a");
1784         assert_eq!("    ".trim_right(), "");
1785         assert_eq!("blah     ".trim_right(), "blah");
1786         assert_eq!("wut   \u{3000}  ".trim_right(), "wut");
1787         assert_eq!(" hey".trim_right(), " hey");
1788     }
1789
1790     #[test]
1791     fn test_trim() {
1792         assert_eq!("".trim(), "");
1793         assert_eq!("a".trim(), "a");
1794         assert_eq!("    ".trim(), "");
1795         assert_eq!("    blah     ".trim(), "blah");
1796         assert_eq!("\nwut   \u{3000}  ".trim(), "wut");
1797         assert_eq!(" hey dude ".trim(), "hey dude");
1798     }
1799
1800     #[test]
1801     fn test_is_whitespace() {
1802         assert!("".chars().all(|c| c.is_whitespace()));
1803         assert!(" ".chars().all(|c| c.is_whitespace()));
1804         assert!("\u{2009}".chars().all(|c| c.is_whitespace())); // Thin space
1805         assert!("  \n\t   ".chars().all(|c| c.is_whitespace()));
1806         assert!(!"   _   ".chars().all(|c| c.is_whitespace()));
1807     }
1808
1809     #[test]
1810     fn test_slice_shift_char() {
1811         let data = "ประเทศไทย中";
1812         assert_eq!(data.slice_shift_char(), Some(('ป', "ระเทศไทย中")));
1813     }
1814
1815     #[test]
1816     fn test_slice_shift_char_2() {
1817         let empty = "";
1818         assert_eq!(empty.slice_shift_char(), None);
1819     }
1820
1821     #[test]
1822     fn test_is_utf8() {
1823         // deny overlong encodings
1824         assert!(from_utf8(&[0xc0, 0x80]).is_err());
1825         assert!(from_utf8(&[0xc0, 0xae]).is_err());
1826         assert!(from_utf8(&[0xe0, 0x80, 0x80]).is_err());
1827         assert!(from_utf8(&[0xe0, 0x80, 0xaf]).is_err());
1828         assert!(from_utf8(&[0xe0, 0x81, 0x81]).is_err());
1829         assert!(from_utf8(&[0xf0, 0x82, 0x82, 0xac]).is_err());
1830         assert!(from_utf8(&[0xf4, 0x90, 0x80, 0x80]).is_err());
1831
1832         // deny surrogates
1833         assert!(from_utf8(&[0xED, 0xA0, 0x80]).is_err());
1834         assert!(from_utf8(&[0xED, 0xBF, 0xBF]).is_err());
1835
1836         assert!(from_utf8(&[0xC2, 0x80]).is_ok());
1837         assert!(from_utf8(&[0xDF, 0xBF]).is_ok());
1838         assert!(from_utf8(&[0xE0, 0xA0, 0x80]).is_ok());
1839         assert!(from_utf8(&[0xED, 0x9F, 0xBF]).is_ok());
1840         assert!(from_utf8(&[0xEE, 0x80, 0x80]).is_ok());
1841         assert!(from_utf8(&[0xEF, 0xBF, 0xBF]).is_ok());
1842         assert!(from_utf8(&[0xF0, 0x90, 0x80, 0x80]).is_ok());
1843         assert!(from_utf8(&[0xF4, 0x8F, 0xBF, 0xBF]).is_ok());
1844     }
1845
1846     #[test]
1847     fn test_is_utf16() {
1848         use unicode::str::is_utf16;
1849         macro_rules! pos ( ($($e:expr),*) => { { $(assert!(is_utf16($e));)* } });
1850
1851         // non-surrogates
1852         pos!(&[0x0000],
1853              &[0x0001, 0x0002],
1854              &[0xD7FF],
1855              &[0xE000]);
1856
1857         // surrogate pairs (randomly generated with Python 3's
1858         // .encode('utf-16be'))
1859         pos!(&[0xdb54, 0xdf16, 0xd880, 0xdee0, 0xdb6a, 0xdd45],
1860              &[0xd91f, 0xdeb1, 0xdb31, 0xdd84, 0xd8e2, 0xde14],
1861              &[0xdb9f, 0xdc26, 0xdb6f, 0xde58, 0xd850, 0xdfae]);
1862
1863         // mixtures (also random)
1864         pos!(&[0xd921, 0xdcc2, 0x002d, 0x004d, 0xdb32, 0xdf65],
1865              &[0xdb45, 0xdd2d, 0x006a, 0xdacd, 0xddfe, 0x0006],
1866              &[0x0067, 0xd8ff, 0xddb7, 0x000f, 0xd900, 0xdc80]);
1867
1868         // negative tests
1869         macro_rules! neg ( ($($e:expr),*) => { { $(assert!(!is_utf16($e));)* } });
1870
1871         neg!(
1872             // surrogate + regular unit
1873             &[0xdb45, 0x0000],
1874             // surrogate + lead surrogate
1875             &[0xd900, 0xd900],
1876             // unterminated surrogate
1877             &[0xd8ff],
1878             // trail surrogate without a lead
1879             &[0xddb7]);
1880
1881         // random byte sequences that Python 3's .decode('utf-16be')
1882         // failed on
1883         neg!(&[0x5b3d, 0x0141, 0xde9e, 0x8fdc, 0xc6e7],
1884              &[0xdf5a, 0x82a5, 0x62b9, 0xb447, 0x92f3],
1885              &[0xda4e, 0x42bc, 0x4462, 0xee98, 0xc2ca],
1886              &[0xbe00, 0xb04a, 0x6ecb, 0xdd89, 0xe278],
1887              &[0x0465, 0xab56, 0xdbb6, 0xa893, 0x665e],
1888              &[0x6b7f, 0x0a19, 0x40f4, 0xa657, 0xdcc5],
1889              &[0x9b50, 0xda5e, 0x24ec, 0x03ad, 0x6dee],
1890              &[0x8d17, 0xcaa7, 0xf4ae, 0xdf6e, 0xbed7],
1891              &[0xdaee, 0x2584, 0x7d30, 0xa626, 0x121a],
1892              &[0xd956, 0x4b43, 0x7570, 0xccd6, 0x4f4a],
1893              &[0x9dcf, 0x1b49, 0x4ba5, 0xfce9, 0xdffe],
1894              &[0x6572, 0xce53, 0xb05a, 0xf6af, 0xdacf],
1895              &[0x1b90, 0x728c, 0x9906, 0xdb68, 0xf46e],
1896              &[0x1606, 0xbeca, 0xbe76, 0x860f, 0xdfa5],
1897              &[0x8b4f, 0xde7a, 0xd220, 0x9fac, 0x2b6f],
1898              &[0xb8fe, 0xebbe, 0xda32, 0x1a5f, 0x8b8b],
1899              &[0x934b, 0x8956, 0xc434, 0x1881, 0xddf7],
1900              &[0x5a95, 0x13fc, 0xf116, 0xd89b, 0x93f9],
1901              &[0xd640, 0x71f1, 0xdd7d, 0x77eb, 0x1cd8],
1902              &[0x348b, 0xaef0, 0xdb2c, 0xebf1, 0x1282],
1903              &[0x50d7, 0xd824, 0x5010, 0xb369, 0x22ea]);
1904     }
1905
1906     #[test]
1907     fn test_as_bytes() {
1908         // no null
1909         let v = [
1910             224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1911             184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1912             109
1913         ];
1914         let b: &[u8] = &[];
1915         assert_eq!("".as_bytes(), b);
1916         assert_eq!("abc".as_bytes(), b"abc");
1917         assert_eq!("ศไทย中华Việt Nam".as_bytes(), v);
1918     }
1919
1920     #[test]
1921     #[should_fail]
1922     fn test_as_bytes_fail() {
1923         // Don't double free. (I'm not sure if this exercises the
1924         // original problem code path anymore.)
1925         let s = String::from_str("");
1926         let _bytes = s.as_bytes();
1927         panic!();
1928     }
1929
1930     #[test]
1931     fn test_as_ptr() {
1932         let buf = "hello".as_ptr();
1933         unsafe {
1934             assert_eq!(*buf.offset(0), b'h');
1935             assert_eq!(*buf.offset(1), b'e');
1936             assert_eq!(*buf.offset(2), b'l');
1937             assert_eq!(*buf.offset(3), b'l');
1938             assert_eq!(*buf.offset(4), b'o');
1939         }
1940     }
1941
1942     #[test]
1943     fn test_subslice_offset() {
1944         let a = "kernelsprite";
1945         let b = a.slice(7, a.len());
1946         let c = a.slice(0, a.len() - 6);
1947         assert_eq!(a.subslice_offset(b), 7);
1948         assert_eq!(a.subslice_offset(c), 0);
1949
1950         let string = "a\nb\nc";
1951         let lines: Vec<&str> = string.lines().collect();
1952         assert_eq!(string.subslice_offset(lines[0]), 0);
1953         assert_eq!(string.subslice_offset(lines[1]), 2);
1954         assert_eq!(string.subslice_offset(lines[2]), 4);
1955     }
1956
1957     #[test]
1958     #[should_fail]
1959     fn test_subslice_offset_2() {
1960         let a = "alchemiter";
1961         let b = "cruxtruder";
1962         a.subslice_offset(b);
1963     }
1964
1965     #[test]
1966     fn vec_str_conversions() {
1967         let s1: String = String::from_str("All mimsy were the borogoves");
1968
1969         let v: Vec<u8> = s1.as_bytes().to_vec();
1970         let s2: String = String::from_str(from_utf8(v.as_slice()).unwrap());
1971         let mut i: uint = 0u;
1972         let n1: uint = s1.len();
1973         let n2: uint = v.len();
1974         assert_eq!(n1, n2);
1975         while i < n1 {
1976             let a: u8 = s1.as_bytes()[i];
1977             let b: u8 = s2.as_bytes()[i];
1978             debug!("{}", a);
1979             debug!("{}", b);
1980             assert_eq!(a, b);
1981             i += 1u;
1982         }
1983     }
1984
1985     #[test]
1986     fn test_contains() {
1987         assert!("abcde".contains("bcd"));
1988         assert!("abcde".contains("abcd"));
1989         assert!("abcde".contains("bcde"));
1990         assert!("abcde".contains(""));
1991         assert!("".contains(""));
1992         assert!(!"abcde".contains("def"));
1993         assert!(!"".contains("a"));
1994
1995         let data = "ประเทศไทย中华Việt Nam";
1996         assert!(data.contains("ประเ"));
1997         assert!(data.contains("ะเ"));
1998         assert!(data.contains("中华"));
1999         assert!(!data.contains("ไท华"));
2000     }
2001
2002     #[test]
2003     fn test_contains_char() {
2004         assert!("abc".contains_char('b'));
2005         assert!("a".contains_char('a'));
2006         assert!(!"abc".contains_char('d'));
2007         assert!(!"".contains_char('a'));
2008     }
2009
2010     #[test]
2011     fn test_char_at() {
2012         let s = "ศไทย中华Việt Nam";
2013         let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2014         let mut pos = 0;
2015         for ch in v.iter() {
2016             assert!(s.char_at(pos) == *ch);
2017             pos += ch.to_string().len();
2018         }
2019     }
2020
2021     #[test]
2022     fn test_char_at_reverse() {
2023         let s = "ศไทย中华Việt Nam";
2024         let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2025         let mut pos = s.len();
2026         for ch in v.iter().rev() {
2027             assert!(s.char_at_reverse(pos) == *ch);
2028             pos -= ch.to_string().len();
2029         }
2030     }
2031
2032     #[test]
2033     fn test_escape_unicode() {
2034         assert_eq!("abc".escape_unicode(),
2035                    String::from_str("\\u{61}\\u{62}\\u{63}"));
2036         assert_eq!("a c".escape_unicode(),
2037                    String::from_str("\\u{61}\\u{20}\\u{63}"));
2038         assert_eq!("\r\n\t".escape_unicode(),
2039                    String::from_str("\\u{d}\\u{a}\\u{9}"));
2040         assert_eq!("'\"\\".escape_unicode(),
2041                    String::from_str("\\u{27}\\u{22}\\u{5c}"));
2042         assert_eq!("\x00\x01\u{fe}\u{ff}".escape_unicode(),
2043                    String::from_str("\\u{0}\\u{1}\\u{fe}\\u{ff}"));
2044         assert_eq!("\u{100}\u{ffff}".escape_unicode(),
2045                    String::from_str("\\u{100}\\u{ffff}"));
2046         assert_eq!("\u{10000}\u{10ffff}".escape_unicode(),
2047                    String::from_str("\\u{10000}\\u{10ffff}"));
2048         assert_eq!("ab\u{fb00}".escape_unicode(),
2049                    String::from_str("\\u{61}\\u{62}\\u{fb00}"));
2050         assert_eq!("\u{1d4ea}\r".escape_unicode(),
2051                    String::from_str("\\u{1d4ea}\\u{d}"));
2052     }
2053
2054     #[test]
2055     fn test_escape_default() {
2056         assert_eq!("abc".escape_default(), String::from_str("abc"));
2057         assert_eq!("a c".escape_default(), String::from_str("a c"));
2058         assert_eq!("\r\n\t".escape_default(), String::from_str("\\r\\n\\t"));
2059         assert_eq!("'\"\\".escape_default(), String::from_str("\\'\\\"\\\\"));
2060         assert_eq!("\u{100}\u{ffff}".escape_default(),
2061                    String::from_str("\\u{100}\\u{ffff}"));
2062         assert_eq!("\u{10000}\u{10ffff}".escape_default(),
2063                    String::from_str("\\u{10000}\\u{10ffff}"));
2064         assert_eq!("ab\u{fb00}".escape_default(),
2065                    String::from_str("ab\\u{fb00}"));
2066         assert_eq!("\u{1d4ea}\r".escape_default(),
2067                    String::from_str("\\u{1d4ea}\\r"));
2068     }
2069
2070     #[test]
2071     fn test_total_ord() {
2072         "1234".cmp("123") == Greater;
2073         "123".cmp("1234") == Less;
2074         "1234".cmp("1234") == Equal;
2075         "12345555".cmp("123456") == Less;
2076         "22".cmp("1234") == Greater;
2077     }
2078
2079     #[test]
2080     fn test_char_range_at() {
2081         let data = "b¢€𤭢𤭢€¢b";
2082         assert_eq!('b', data.char_range_at(0).ch);
2083         assert_eq!('¢', data.char_range_at(1).ch);
2084         assert_eq!('€', data.char_range_at(3).ch);
2085         assert_eq!('𤭢', data.char_range_at(6).ch);
2086         assert_eq!('𤭢', data.char_range_at(10).ch);
2087         assert_eq!('€', data.char_range_at(14).ch);
2088         assert_eq!('¢', data.char_range_at(17).ch);
2089         assert_eq!('b', data.char_range_at(19).ch);
2090     }
2091
2092     #[test]
2093     fn test_char_range_at_reverse_underflow() {
2094         assert_eq!("abc".char_range_at_reverse(0).next, 0);
2095     }
2096
2097     #[test]
2098     fn test_iterator() {
2099         let s = "ศไทย中华Việt Nam";
2100         let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2101
2102         let mut pos = 0;
2103         let mut it = s.chars();
2104
2105         for c in it {
2106             assert_eq!(c, v[pos]);
2107             pos += 1;
2108         }
2109         assert_eq!(pos, v.len());
2110     }
2111
2112     #[test]
2113     fn test_rev_iterator() {
2114         let s = "ศไทย中华Việt Nam";
2115         let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
2116
2117         let mut pos = 0;
2118         let mut it = s.chars().rev();
2119
2120         for c in it {
2121             assert_eq!(c, v[pos]);
2122             pos += 1;
2123         }
2124         assert_eq!(pos, v.len());
2125     }
2126
2127     #[test]
2128     fn test_chars_decoding() {
2129         let mut bytes = [0u8; 4];
2130         for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
2131             let len = c.encode_utf8(&mut bytes).unwrap_or(0);
2132             let s = ::core::str::from_utf8(bytes[..len]).unwrap();
2133             if Some(c) != s.chars().next() {
2134                 panic!("character {:x}={} does not decode correctly", c as u32, c);
2135             }
2136         }
2137     }
2138
2139     #[test]
2140     fn test_chars_rev_decoding() {
2141         let mut bytes = [0u8; 4];
2142         for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
2143             let len = c.encode_utf8(&mut bytes).unwrap_or(0);
2144             let s = ::core::str::from_utf8(bytes[..len]).unwrap();
2145             if Some(c) != s.chars().rev().next() {
2146                 panic!("character {:x}={} does not decode correctly", c as u32, c);
2147             }
2148         }
2149     }
2150
2151     #[test]
2152     fn test_iterator_clone() {
2153         let s = "ศไทย中华Việt Nam";
2154         let mut it = s.chars();
2155         it.next();
2156         assert!(it.zip(it.clone()).all(|(x,y)| x == y));
2157     }
2158
2159     #[test]
2160     fn test_bytesator() {
2161         let s = "ศไทย中华Việt Nam";
2162         let v = [
2163             224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
2164             184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
2165             109
2166         ];
2167         let mut pos = 0;
2168
2169         for b in s.bytes() {
2170             assert_eq!(b, v[pos]);
2171             pos += 1;
2172         }
2173     }
2174
2175     #[test]
2176     fn test_bytes_revator() {
2177         let s = "ศไทย中华Việt Nam";
2178         let v = [
2179             224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
2180             184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
2181             109
2182         ];
2183         let mut pos = v.len();
2184
2185         for b in s.bytes().rev() {
2186             pos -= 1;
2187             assert_eq!(b, v[pos]);
2188         }
2189     }
2190
2191     #[test]
2192     fn test_char_indicesator() {
2193         let s = "ศไทย中华Việt Nam";
2194         let p = [0, 3, 6, 9, 12, 15, 18, 19, 20, 23, 24, 25, 26, 27];
2195         let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2196
2197         let mut pos = 0;
2198         let mut it = s.char_indices();
2199
2200         for c in it {
2201             assert_eq!(c, (p[pos], v[pos]));
2202             pos += 1;
2203         }
2204         assert_eq!(pos, v.len());
2205         assert_eq!(pos, p.len());
2206     }
2207
2208     #[test]
2209     fn test_char_indices_revator() {
2210         let s = "ศไทย中华Việt Nam";
2211         let p = [27, 26, 25, 24, 23, 20, 19, 18, 15, 12, 9, 6, 3, 0];
2212         let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
2213
2214         let mut pos = 0;
2215         let mut it = s.char_indices().rev();
2216
2217         for c in it {
2218             assert_eq!(c, (p[pos], v[pos]));
2219             pos += 1;
2220         }
2221         assert_eq!(pos, v.len());
2222         assert_eq!(pos, p.len());
2223     }
2224
2225     #[test]
2226     fn test_splitn_char_iterator() {
2227         let data = "\nMäry häd ä little lämb\nLittle lämb\n";
2228
2229         let split: Vec<&str> = data.splitn(3, ' ').collect();
2230         assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
2231
2232         let split: Vec<&str> = data.splitn(3, |&: c: char| c == ' ').collect();
2233         assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
2234
2235         // Unicode
2236         let split: Vec<&str> = data.splitn(3, 'ä').collect();
2237         assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
2238
2239         let split: Vec<&str> = data.splitn(3, |&: c: char| c == 'ä').collect();
2240         assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
2241     }
2242
2243     #[test]
2244     fn test_split_char_iterator_no_trailing() {
2245         let data = "\nMäry häd ä little lämb\nLittle lämb\n";
2246
2247         let split: Vec<&str> = data.split('\n').collect();
2248         assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
2249
2250         let split: Vec<&str> = data.split_terminator('\n').collect();
2251         assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
2252     }
2253
2254     #[test]
2255     fn test_words() {
2256         let data = "\n \tMäry   häd\tä  little lämb\nLittle lämb\n";
2257         let words: Vec<&str> = data.words().collect();
2258         assert_eq!(words, vec!["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
2259     }
2260
2261     #[test]
2262     fn test_nfd_chars() {
2263         macro_rules! t {
2264             ($input: expr, $expected: expr) => {
2265                 assert_eq!($input.nfd_chars().collect::<String>(), $expected);
2266             }
2267         }
2268         t!("abc", "abc");
2269         t!("\u{1e0b}\u{1c4}", "d\u{307}\u{1c4}");
2270         t!("\u{2026}", "\u{2026}");
2271         t!("\u{2126}", "\u{3a9}");
2272         t!("\u{1e0b}\u{323}", "d\u{323}\u{307}");
2273         t!("\u{1e0d}\u{307}", "d\u{323}\u{307}");
2274         t!("a\u{301}", "a\u{301}");
2275         t!("\u{301}a", "\u{301}a");
2276         t!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}");
2277         t!("\u{ac1c}", "\u{1100}\u{1162}");
2278     }
2279
2280     #[test]
2281     fn test_nfkd_chars() {
2282         macro_rules! t {
2283             ($input: expr, $expected: expr) => {
2284                 assert_eq!($input.nfkd_chars().collect::<String>(), $expected);
2285             }
2286         }
2287         t!("abc", "abc");
2288         t!("\u{1e0b}\u{1c4}", "d\u{307}DZ\u{30c}");
2289         t!("\u{2026}", "...");
2290         t!("\u{2126}", "\u{3a9}");
2291         t!("\u{1e0b}\u{323}", "d\u{323}\u{307}");
2292         t!("\u{1e0d}\u{307}", "d\u{323}\u{307}");
2293         t!("a\u{301}", "a\u{301}");
2294         t!("\u{301}a", "\u{301}a");
2295         t!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}");
2296         t!("\u{ac1c}", "\u{1100}\u{1162}");
2297     }
2298
2299     #[test]
2300     fn test_nfc_chars() {
2301         macro_rules! t {
2302             ($input: expr, $expected: expr) => {
2303                 assert_eq!($input.nfc_chars().collect::<String>(), $expected);
2304             }
2305         }
2306         t!("abc", "abc");
2307         t!("\u{1e0b}\u{1c4}", "\u{1e0b}\u{1c4}");
2308         t!("\u{2026}", "\u{2026}");
2309         t!("\u{2126}", "\u{3a9}");
2310         t!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}");
2311         t!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}");
2312         t!("a\u{301}", "\u{e1}");
2313         t!("\u{301}a", "\u{301}a");
2314         t!("\u{d4db}", "\u{d4db}");
2315         t!("\u{ac1c}", "\u{ac1c}");
2316         t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
2317     }
2318
2319     #[test]
2320     fn test_nfkc_chars() {
2321         macro_rules! t {
2322             ($input: expr, $expected: expr) => {
2323                 assert_eq!($input.nfkc_chars().collect::<String>(), $expected);
2324             }
2325         }
2326         t!("abc", "abc");
2327         t!("\u{1e0b}\u{1c4}", "\u{1e0b}D\u{17d}");
2328         t!("\u{2026}", "...");
2329         t!("\u{2126}", "\u{3a9}");
2330         t!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}");
2331         t!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}");
2332         t!("a\u{301}", "\u{e1}");
2333         t!("\u{301}a", "\u{301}a");
2334         t!("\u{d4db}", "\u{d4db}");
2335         t!("\u{ac1c}", "\u{ac1c}");
2336         t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
2337     }
2338
2339     #[test]
2340     fn test_lines() {
2341         let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
2342         let lines: Vec<&str> = data.lines().collect();
2343         assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2344
2345         let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
2346         let lines: Vec<&str> = data.lines().collect();
2347         assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2348     }
2349
2350     #[test]
2351     fn test_graphemes() {
2352         use core::iter::order;
2353         // official Unicode test data
2354         // from http://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt
2355         let test_same: [(_, &[_]); 325] = [
2356             ("\u{20}\u{20}", &["\u{20}", "\u{20}"]),
2357             ("\u{20}\u{308}\u{20}", &["\u{20}\u{308}", "\u{20}"]),
2358             ("\u{20}\u{D}", &["\u{20}", "\u{D}"]),
2359             ("\u{20}\u{308}\u{D}", &["\u{20}\u{308}", "\u{D}"]),
2360             ("\u{20}\u{A}", &["\u{20}", "\u{A}"]),
2361             ("\u{20}\u{308}\u{A}", &["\u{20}\u{308}", "\u{A}"]),
2362             ("\u{20}\u{1}", &["\u{20}", "\u{1}"]),
2363             ("\u{20}\u{308}\u{1}", &["\u{20}\u{308}", "\u{1}"]),
2364             ("\u{20}\u{300}", &["\u{20}\u{300}"]),
2365             ("\u{20}\u{308}\u{300}", &["\u{20}\u{308}\u{300}"]),
2366             ("\u{20}\u{1100}", &["\u{20}", "\u{1100}"]),
2367             ("\u{20}\u{308}\u{1100}", &["\u{20}\u{308}", "\u{1100}"]),
2368             ("\u{20}\u{1160}", &["\u{20}", "\u{1160}"]),
2369             ("\u{20}\u{308}\u{1160}", &["\u{20}\u{308}", "\u{1160}"]),
2370             ("\u{20}\u{11A8}", &["\u{20}", "\u{11A8}"]),
2371             ("\u{20}\u{308}\u{11A8}", &["\u{20}\u{308}", "\u{11A8}"]),
2372             ("\u{20}\u{AC00}", &["\u{20}", "\u{AC00}"]),
2373             ("\u{20}\u{308}\u{AC00}", &["\u{20}\u{308}", "\u{AC00}"]),
2374             ("\u{20}\u{AC01}", &["\u{20}", "\u{AC01}"]),
2375             ("\u{20}\u{308}\u{AC01}", &["\u{20}\u{308}", "\u{AC01}"]),
2376             ("\u{20}\u{1F1E6}", &["\u{20}", "\u{1F1E6}"]),
2377             ("\u{20}\u{308}\u{1F1E6}", &["\u{20}\u{308}", "\u{1F1E6}"]),
2378             ("\u{20}\u{378}", &["\u{20}", "\u{378}"]),
2379             ("\u{20}\u{308}\u{378}", &["\u{20}\u{308}", "\u{378}"]),
2380             ("\u{D}\u{20}", &["\u{D}", "\u{20}"]),
2381             ("\u{D}\u{308}\u{20}", &["\u{D}", "\u{308}", "\u{20}"]),
2382             ("\u{D}\u{D}", &["\u{D}", "\u{D}"]),
2383             ("\u{D}\u{308}\u{D}", &["\u{D}", "\u{308}", "\u{D}"]),
2384             ("\u{D}\u{A}", &["\u{D}\u{A}"]),
2385             ("\u{D}\u{308}\u{A}", &["\u{D}", "\u{308}", "\u{A}"]),
2386             ("\u{D}\u{1}", &["\u{D}", "\u{1}"]),
2387             ("\u{D}\u{308}\u{1}", &["\u{D}", "\u{308}", "\u{1}"]),
2388             ("\u{D}\u{300}", &["\u{D}", "\u{300}"]),
2389             ("\u{D}\u{308}\u{300}", &["\u{D}", "\u{308}\u{300}"]),
2390             ("\u{D}\u{903}", &["\u{D}", "\u{903}"]),
2391             ("\u{D}\u{1100}", &["\u{D}", "\u{1100}"]),
2392             ("\u{D}\u{308}\u{1100}", &["\u{D}", "\u{308}", "\u{1100}"]),
2393             ("\u{D}\u{1160}", &["\u{D}", "\u{1160}"]),
2394             ("\u{D}\u{308}\u{1160}", &["\u{D}", "\u{308}", "\u{1160}"]),
2395             ("\u{D}\u{11A8}", &["\u{D}", "\u{11A8}"]),
2396             ("\u{D}\u{308}\u{11A8}", &["\u{D}", "\u{308}", "\u{11A8}"]),
2397             ("\u{D}\u{AC00}", &["\u{D}", "\u{AC00}"]),
2398             ("\u{D}\u{308}\u{AC00}", &["\u{D}", "\u{308}", "\u{AC00}"]),
2399             ("\u{D}\u{AC01}", &["\u{D}", "\u{AC01}"]),
2400             ("\u{D}\u{308}\u{AC01}", &["\u{D}", "\u{308}", "\u{AC01}"]),
2401             ("\u{D}\u{1F1E6}", &["\u{D}", "\u{1F1E6}"]),
2402             ("\u{D}\u{308}\u{1F1E6}", &["\u{D}", "\u{308}", "\u{1F1E6}"]),
2403             ("\u{D}\u{378}", &["\u{D}", "\u{378}"]),
2404             ("\u{D}\u{308}\u{378}", &["\u{D}", "\u{308}", "\u{378}"]),
2405             ("\u{A}\u{20}", &["\u{A}", "\u{20}"]),
2406             ("\u{A}\u{308}\u{20}", &["\u{A}", "\u{308}", "\u{20}"]),
2407             ("\u{A}\u{D}", &["\u{A}", "\u{D}"]),
2408             ("\u{A}\u{308}\u{D}", &["\u{A}", "\u{308}", "\u{D}"]),
2409             ("\u{A}\u{A}", &["\u{A}", "\u{A}"]),
2410             ("\u{A}\u{308}\u{A}", &["\u{A}", "\u{308}", "\u{A}"]),
2411             ("\u{A}\u{1}", &["\u{A}", "\u{1}"]),
2412             ("\u{A}\u{308}\u{1}", &["\u{A}", "\u{308}", "\u{1}"]),
2413             ("\u{A}\u{300}", &["\u{A}", "\u{300}"]),
2414             ("\u{A}\u{308}\u{300}", &["\u{A}", "\u{308}\u{300}"]),
2415             ("\u{A}\u{903}", &["\u{A}", "\u{903}"]),
2416             ("\u{A}\u{1100}", &["\u{A}", "\u{1100}"]),
2417             ("\u{A}\u{308}\u{1100}", &["\u{A}", "\u{308}", "\u{1100}"]),
2418             ("\u{A}\u{1160}", &["\u{A}", "\u{1160}"]),
2419             ("\u{A}\u{308}\u{1160}", &["\u{A}", "\u{308}", "\u{1160}"]),
2420             ("\u{A}\u{11A8}", &["\u{A}", "\u{11A8}"]),
2421             ("\u{A}\u{308}\u{11A8}", &["\u{A}", "\u{308}", "\u{11A8}"]),
2422             ("\u{A}\u{AC00}", &["\u{A}", "\u{AC00}"]),
2423             ("\u{A}\u{308}\u{AC00}", &["\u{A}", "\u{308}", "\u{AC00}"]),
2424             ("\u{A}\u{AC01}", &["\u{A}", "\u{AC01}"]),
2425             ("\u{A}\u{308}\u{AC01}", &["\u{A}", "\u{308}", "\u{AC01}"]),
2426             ("\u{A}\u{1F1E6}", &["\u{A}", "\u{1F1E6}"]),
2427             ("\u{A}\u{308}\u{1F1E6}", &["\u{A}", "\u{308}", "\u{1F1E6}"]),
2428             ("\u{A}\u{378}", &["\u{A}", "\u{378}"]),
2429             ("\u{A}\u{308}\u{378}", &["\u{A}", "\u{308}", "\u{378}"]),
2430             ("\u{1}\u{20}", &["\u{1}", "\u{20}"]),
2431             ("\u{1}\u{308}\u{20}", &["\u{1}", "\u{308}", "\u{20}"]),
2432             ("\u{1}\u{D}", &["\u{1}", "\u{D}"]),
2433             ("\u{1}\u{308}\u{D}", &["\u{1}", "\u{308}", "\u{D}"]),
2434             ("\u{1}\u{A}", &["\u{1}", "\u{A}"]),
2435             ("\u{1}\u{308}\u{A}", &["\u{1}", "\u{308}", "\u{A}"]),
2436             ("\u{1}\u{1}", &["\u{1}", "\u{1}"]),
2437             ("\u{1}\u{308}\u{1}", &["\u{1}", "\u{308}", "\u{1}"]),
2438             ("\u{1}\u{300}", &["\u{1}", "\u{300}"]),
2439             ("\u{1}\u{308}\u{300}", &["\u{1}", "\u{308}\u{300}"]),
2440             ("\u{1}\u{903}", &["\u{1}", "\u{903}"]),
2441             ("\u{1}\u{1100}", &["\u{1}", "\u{1100}"]),
2442             ("\u{1}\u{308}\u{1100}", &["\u{1}", "\u{308}", "\u{1100}"]),
2443             ("\u{1}\u{1160}", &["\u{1}", "\u{1160}"]),
2444             ("\u{1}\u{308}\u{1160}", &["\u{1}", "\u{308}", "\u{1160}"]),
2445             ("\u{1}\u{11A8}", &["\u{1}", "\u{11A8}"]),
2446             ("\u{1}\u{308}\u{11A8}", &["\u{1}", "\u{308}", "\u{11A8}"]),
2447             ("\u{1}\u{AC00}", &["\u{1}", "\u{AC00}"]),
2448             ("\u{1}\u{308}\u{AC00}", &["\u{1}", "\u{308}", "\u{AC00}"]),
2449             ("\u{1}\u{AC01}", &["\u{1}", "\u{AC01}"]),
2450             ("\u{1}\u{308}\u{AC01}", &["\u{1}", "\u{308}", "\u{AC01}"]),
2451             ("\u{1}\u{1F1E6}", &["\u{1}", "\u{1F1E6}"]),
2452             ("\u{1}\u{308}\u{1F1E6}", &["\u{1}", "\u{308}", "\u{1F1E6}"]),
2453             ("\u{1}\u{378}", &["\u{1}", "\u{378}"]),
2454             ("\u{1}\u{308}\u{378}", &["\u{1}", "\u{308}", "\u{378}"]),
2455             ("\u{300}\u{20}", &["\u{300}", "\u{20}"]),
2456             ("\u{300}\u{308}\u{20}", &["\u{300}\u{308}", "\u{20}"]),
2457             ("\u{300}\u{D}", &["\u{300}", "\u{D}"]),
2458             ("\u{300}\u{308}\u{D}", &["\u{300}\u{308}", "\u{D}"]),
2459             ("\u{300}\u{A}", &["\u{300}", "\u{A}"]),
2460             ("\u{300}\u{308}\u{A}", &["\u{300}\u{308}", "\u{A}"]),
2461             ("\u{300}\u{1}", &["\u{300}", "\u{1}"]),
2462             ("\u{300}\u{308}\u{1}", &["\u{300}\u{308}", "\u{1}"]),
2463             ("\u{300}\u{300}", &["\u{300}\u{300}"]),
2464             ("\u{300}\u{308}\u{300}", &["\u{300}\u{308}\u{300}"]),
2465             ("\u{300}\u{1100}", &["\u{300}", "\u{1100}"]),
2466             ("\u{300}\u{308}\u{1100}", &["\u{300}\u{308}", "\u{1100}"]),
2467             ("\u{300}\u{1160}", &["\u{300}", "\u{1160}"]),
2468             ("\u{300}\u{308}\u{1160}", &["\u{300}\u{308}", "\u{1160}"]),
2469             ("\u{300}\u{11A8}", &["\u{300}", "\u{11A8}"]),
2470             ("\u{300}\u{308}\u{11A8}", &["\u{300}\u{308}", "\u{11A8}"]),
2471             ("\u{300}\u{AC00}", &["\u{300}", "\u{AC00}"]),
2472             ("\u{300}\u{308}\u{AC00}", &["\u{300}\u{308}", "\u{AC00}"]),
2473             ("\u{300}\u{AC01}", &["\u{300}", "\u{AC01}"]),
2474             ("\u{300}\u{308}\u{AC01}", &["\u{300}\u{308}", "\u{AC01}"]),
2475             ("\u{300}\u{1F1E6}", &["\u{300}", "\u{1F1E6}"]),
2476             ("\u{300}\u{308}\u{1F1E6}", &["\u{300}\u{308}", "\u{1F1E6}"]),
2477             ("\u{300}\u{378}", &["\u{300}", "\u{378}"]),
2478             ("\u{300}\u{308}\u{378}", &["\u{300}\u{308}", "\u{378}"]),
2479             ("\u{903}\u{20}", &["\u{903}", "\u{20}"]),
2480             ("\u{903}\u{308}\u{20}", &["\u{903}\u{308}", "\u{20}"]),
2481             ("\u{903}\u{D}", &["\u{903}", "\u{D}"]),
2482             ("\u{903}\u{308}\u{D}", &["\u{903}\u{308}", "\u{D}"]),
2483             ("\u{903}\u{A}", &["\u{903}", "\u{A}"]),
2484             ("\u{903}\u{308}\u{A}", &["\u{903}\u{308}", "\u{A}"]),
2485             ("\u{903}\u{1}", &["\u{903}", "\u{1}"]),
2486             ("\u{903}\u{308}\u{1}", &["\u{903}\u{308}", "\u{1}"]),
2487             ("\u{903}\u{300}", &["\u{903}\u{300}"]),
2488             ("\u{903}\u{308}\u{300}", &["\u{903}\u{308}\u{300}"]),
2489             ("\u{903}\u{1100}", &["\u{903}", "\u{1100}"]),
2490             ("\u{903}\u{308}\u{1100}", &["\u{903}\u{308}", "\u{1100}"]),
2491             ("\u{903}\u{1160}", &["\u{903}", "\u{1160}"]),
2492             ("\u{903}\u{308}\u{1160}", &["\u{903}\u{308}", "\u{1160}"]),
2493             ("\u{903}\u{11A8}", &["\u{903}", "\u{11A8}"]),
2494             ("\u{903}\u{308}\u{11A8}", &["\u{903}\u{308}", "\u{11A8}"]),
2495             ("\u{903}\u{AC00}", &["\u{903}", "\u{AC00}"]),
2496             ("\u{903}\u{308}\u{AC00}", &["\u{903}\u{308}", "\u{AC00}"]),
2497             ("\u{903}\u{AC01}", &["\u{903}", "\u{AC01}"]),
2498             ("\u{903}\u{308}\u{AC01}", &["\u{903}\u{308}", "\u{AC01}"]),
2499             ("\u{903}\u{1F1E6}", &["\u{903}", "\u{1F1E6}"]),
2500             ("\u{903}\u{308}\u{1F1E6}", &["\u{903}\u{308}", "\u{1F1E6}"]),
2501             ("\u{903}\u{378}", &["\u{903}", "\u{378}"]),
2502             ("\u{903}\u{308}\u{378}", &["\u{903}\u{308}", "\u{378}"]),
2503             ("\u{1100}\u{20}", &["\u{1100}", "\u{20}"]),
2504             ("\u{1100}\u{308}\u{20}", &["\u{1100}\u{308}", "\u{20}"]),
2505             ("\u{1100}\u{D}", &["\u{1100}", "\u{D}"]),
2506             ("\u{1100}\u{308}\u{D}", &["\u{1100}\u{308}", "\u{D}"]),
2507             ("\u{1100}\u{A}", &["\u{1100}", "\u{A}"]),
2508             ("\u{1100}\u{308}\u{A}", &["\u{1100}\u{308}", "\u{A}"]),
2509             ("\u{1100}\u{1}", &["\u{1100}", "\u{1}"]),
2510             ("\u{1100}\u{308}\u{1}", &["\u{1100}\u{308}", "\u{1}"]),
2511             ("\u{1100}\u{300}", &["\u{1100}\u{300}"]),
2512             ("\u{1100}\u{308}\u{300}", &["\u{1100}\u{308}\u{300}"]),
2513             ("\u{1100}\u{1100}", &["\u{1100}\u{1100}"]),
2514             ("\u{1100}\u{308}\u{1100}", &["\u{1100}\u{308}", "\u{1100}"]),
2515             ("\u{1100}\u{1160}", &["\u{1100}\u{1160}"]),
2516             ("\u{1100}\u{308}\u{1160}", &["\u{1100}\u{308}", "\u{1160}"]),
2517             ("\u{1100}\u{11A8}", &["\u{1100}", "\u{11A8}"]),
2518             ("\u{1100}\u{308}\u{11A8}", &["\u{1100}\u{308}", "\u{11A8}"]),
2519             ("\u{1100}\u{AC00}", &["\u{1100}\u{AC00}"]),
2520             ("\u{1100}\u{308}\u{AC00}", &["\u{1100}\u{308}", "\u{AC00}"]),
2521             ("\u{1100}\u{AC01}", &["\u{1100}\u{AC01}"]),
2522             ("\u{1100}\u{308}\u{AC01}", &["\u{1100}\u{308}", "\u{AC01}"]),
2523             ("\u{1100}\u{1F1E6}", &["\u{1100}", "\u{1F1E6}"]),
2524             ("\u{1100}\u{308}\u{1F1E6}", &["\u{1100}\u{308}", "\u{1F1E6}"]),
2525             ("\u{1100}\u{378}", &["\u{1100}", "\u{378}"]),
2526             ("\u{1100}\u{308}\u{378}", &["\u{1100}\u{308}", "\u{378}"]),
2527             ("\u{1160}\u{20}", &["\u{1160}", "\u{20}"]),
2528             ("\u{1160}\u{308}\u{20}", &["\u{1160}\u{308}", "\u{20}"]),
2529             ("\u{1160}\u{D}", &["\u{1160}", "\u{D}"]),
2530             ("\u{1160}\u{308}\u{D}", &["\u{1160}\u{308}", "\u{D}"]),
2531             ("\u{1160}\u{A}", &["\u{1160}", "\u{A}"]),
2532             ("\u{1160}\u{308}\u{A}", &["\u{1160}\u{308}", "\u{A}"]),
2533             ("\u{1160}\u{1}", &["\u{1160}", "\u{1}"]),
2534             ("\u{1160}\u{308}\u{1}", &["\u{1160}\u{308}", "\u{1}"]),
2535             ("\u{1160}\u{300}", &["\u{1160}\u{300}"]),
2536             ("\u{1160}\u{308}\u{300}", &["\u{1160}\u{308}\u{300}"]),
2537             ("\u{1160}\u{1100}", &["\u{1160}", "\u{1100}"]),
2538             ("\u{1160}\u{308}\u{1100}", &["\u{1160}\u{308}", "\u{1100}"]),
2539             ("\u{1160}\u{1160}", &["\u{1160}\u{1160}"]),
2540             ("\u{1160}\u{308}\u{1160}", &["\u{1160}\u{308}", "\u{1160}"]),
2541             ("\u{1160}\u{11A8}", &["\u{1160}\u{11A8}"]),
2542             ("\u{1160}\u{308}\u{11A8}", &["\u{1160}\u{308}", "\u{11A8}"]),
2543             ("\u{1160}\u{AC00}", &["\u{1160}", "\u{AC00}"]),
2544             ("\u{1160}\u{308}\u{AC00}", &["\u{1160}\u{308}", "\u{AC00}"]),
2545             ("\u{1160}\u{AC01}", &["\u{1160}", "\u{AC01}"]),
2546             ("\u{1160}\u{308}\u{AC01}", &["\u{1160}\u{308}", "\u{AC01}"]),
2547             ("\u{1160}\u{1F1E6}", &["\u{1160}", "\u{1F1E6}"]),
2548             ("\u{1160}\u{308}\u{1F1E6}", &["\u{1160}\u{308}", "\u{1F1E6}"]),
2549             ("\u{1160}\u{378}", &["\u{1160}", "\u{378}"]),
2550             ("\u{1160}\u{308}\u{378}", &["\u{1160}\u{308}", "\u{378}"]),
2551             ("\u{11A8}\u{20}", &["\u{11A8}", "\u{20}"]),
2552             ("\u{11A8}\u{308}\u{20}", &["\u{11A8}\u{308}", "\u{20}"]),
2553             ("\u{11A8}\u{D}", &["\u{11A8}", "\u{D}"]),
2554             ("\u{11A8}\u{308}\u{D}", &["\u{11A8}\u{308}", "\u{D}"]),
2555             ("\u{11A8}\u{A}", &["\u{11A8}", "\u{A}"]),
2556             ("\u{11A8}\u{308}\u{A}", &["\u{11A8}\u{308}", "\u{A}"]),
2557             ("\u{11A8}\u{1}", &["\u{11A8}", "\u{1}"]),
2558             ("\u{11A8}\u{308}\u{1}", &["\u{11A8}\u{308}", "\u{1}"]),
2559             ("\u{11A8}\u{300}", &["\u{11A8}\u{300}"]),
2560             ("\u{11A8}\u{308}\u{300}", &["\u{11A8}\u{308}\u{300}"]),
2561             ("\u{11A8}\u{1100}", &["\u{11A8}", "\u{1100}"]),
2562             ("\u{11A8}\u{308}\u{1100}", &["\u{11A8}\u{308}", "\u{1100}"]),
2563             ("\u{11A8}\u{1160}", &["\u{11A8}", "\u{1160}"]),
2564             ("\u{11A8}\u{308}\u{1160}", &["\u{11A8}\u{308}", "\u{1160}"]),
2565             ("\u{11A8}\u{11A8}", &["\u{11A8}\u{11A8}"]),
2566             ("\u{11A8}\u{308}\u{11A8}", &["\u{11A8}\u{308}", "\u{11A8}"]),
2567             ("\u{11A8}\u{AC00}", &["\u{11A8}", "\u{AC00}"]),
2568             ("\u{11A8}\u{308}\u{AC00}", &["\u{11A8}\u{308}", "\u{AC00}"]),
2569             ("\u{11A8}\u{AC01}", &["\u{11A8}", "\u{AC01}"]),
2570             ("\u{11A8}\u{308}\u{AC01}", &["\u{11A8}\u{308}", "\u{AC01}"]),
2571             ("\u{11A8}\u{1F1E6}", &["\u{11A8}", "\u{1F1E6}"]),
2572             ("\u{11A8}\u{308}\u{1F1E6}", &["\u{11A8}\u{308}", "\u{1F1E6}"]),
2573             ("\u{11A8}\u{378}", &["\u{11A8}", "\u{378}"]),
2574             ("\u{11A8}\u{308}\u{378}", &["\u{11A8}\u{308}", "\u{378}"]),
2575             ("\u{AC00}\u{20}", &["\u{AC00}", "\u{20}"]),
2576             ("\u{AC00}\u{308}\u{20}", &["\u{AC00}\u{308}", "\u{20}"]),
2577             ("\u{AC00}\u{D}", &["\u{AC00}", "\u{D}"]),
2578             ("\u{AC00}\u{308}\u{D}", &["\u{AC00}\u{308}", "\u{D}"]),
2579             ("\u{AC00}\u{A}", &["\u{AC00}", "\u{A}"]),
2580             ("\u{AC00}\u{308}\u{A}", &["\u{AC00}\u{308}", "\u{A}"]),
2581             ("\u{AC00}\u{1}", &["\u{AC00}", "\u{1}"]),
2582             ("\u{AC00}\u{308}\u{1}", &["\u{AC00}\u{308}", "\u{1}"]),
2583             ("\u{AC00}\u{300}", &["\u{AC00}\u{300}"]),
2584             ("\u{AC00}\u{308}\u{300}", &["\u{AC00}\u{308}\u{300}"]),
2585             ("\u{AC00}\u{1100}", &["\u{AC00}", "\u{1100}"]),
2586             ("\u{AC00}\u{308}\u{1100}", &["\u{AC00}\u{308}", "\u{1100}"]),
2587             ("\u{AC00}\u{1160}", &["\u{AC00}\u{1160}"]),
2588             ("\u{AC00}\u{308}\u{1160}", &["\u{AC00}\u{308}", "\u{1160}"]),
2589             ("\u{AC00}\u{11A8}", &["\u{AC00}\u{11A8}"]),
2590             ("\u{AC00}\u{308}\u{11A8}", &["\u{AC00}\u{308}", "\u{11A8}"]),
2591             ("\u{AC00}\u{AC00}", &["\u{AC00}", "\u{AC00}"]),
2592             ("\u{AC00}\u{308}\u{AC00}", &["\u{AC00}\u{308}", "\u{AC00}"]),
2593             ("\u{AC00}\u{AC01}", &["\u{AC00}", "\u{AC01}"]),
2594             ("\u{AC00}\u{308}\u{AC01}", &["\u{AC00}\u{308}", "\u{AC01}"]),
2595             ("\u{AC00}\u{1F1E6}", &["\u{AC00}", "\u{1F1E6}"]),
2596             ("\u{AC00}\u{308}\u{1F1E6}", &["\u{AC00}\u{308}", "\u{1F1E6}"]),
2597             ("\u{AC00}\u{378}", &["\u{AC00}", "\u{378}"]),
2598             ("\u{AC00}\u{308}\u{378}", &["\u{AC00}\u{308}", "\u{378}"]),
2599             ("\u{AC01}\u{20}", &["\u{AC01}", "\u{20}"]),
2600             ("\u{AC01}\u{308}\u{20}", &["\u{AC01}\u{308}", "\u{20}"]),
2601             ("\u{AC01}\u{D}", &["\u{AC01}", "\u{D}"]),
2602             ("\u{AC01}\u{308}\u{D}", &["\u{AC01}\u{308}", "\u{D}"]),
2603             ("\u{AC01}\u{A}", &["\u{AC01}", "\u{A}"]),
2604             ("\u{AC01}\u{308}\u{A}", &["\u{AC01}\u{308}", "\u{A}"]),
2605             ("\u{AC01}\u{1}", &["\u{AC01}", "\u{1}"]),
2606             ("\u{AC01}\u{308}\u{1}", &["\u{AC01}\u{308}", "\u{1}"]),
2607             ("\u{AC01}\u{300}", &["\u{AC01}\u{300}"]),
2608             ("\u{AC01}\u{308}\u{300}", &["\u{AC01}\u{308}\u{300}"]),
2609             ("\u{AC01}\u{1100}", &["\u{AC01}", "\u{1100}"]),
2610             ("\u{AC01}\u{308}\u{1100}", &["\u{AC01}\u{308}", "\u{1100}"]),
2611             ("\u{AC01}\u{1160}", &["\u{AC01}", "\u{1160}"]),
2612             ("\u{AC01}\u{308}\u{1160}", &["\u{AC01}\u{308}", "\u{1160}"]),
2613             ("\u{AC01}\u{11A8}", &["\u{AC01}\u{11A8}"]),
2614             ("\u{AC01}\u{308}\u{11A8}", &["\u{AC01}\u{308}", "\u{11A8}"]),
2615             ("\u{AC01}\u{AC00}", &["\u{AC01}", "\u{AC00}"]),
2616             ("\u{AC01}\u{308}\u{AC00}", &["\u{AC01}\u{308}", "\u{AC00}"]),
2617             ("\u{AC01}\u{AC01}", &["\u{AC01}", "\u{AC01}"]),
2618             ("\u{AC01}\u{308}\u{AC01}", &["\u{AC01}\u{308}", "\u{AC01}"]),
2619             ("\u{AC01}\u{1F1E6}", &["\u{AC01}", "\u{1F1E6}"]),
2620             ("\u{AC01}\u{308}\u{1F1E6}", &["\u{AC01}\u{308}", "\u{1F1E6}"]),
2621             ("\u{AC01}\u{378}", &["\u{AC01}", "\u{378}"]),
2622             ("\u{AC01}\u{308}\u{378}", &["\u{AC01}\u{308}", "\u{378}"]),
2623             ("\u{1F1E6}\u{20}", &["\u{1F1E6}", "\u{20}"]),
2624             ("\u{1F1E6}\u{308}\u{20}", &["\u{1F1E6}\u{308}", "\u{20}"]),
2625             ("\u{1F1E6}\u{D}", &["\u{1F1E6}", "\u{D}"]),
2626             ("\u{1F1E6}\u{308}\u{D}", &["\u{1F1E6}\u{308}", "\u{D}"]),
2627             ("\u{1F1E6}\u{A}", &["\u{1F1E6}", "\u{A}"]),
2628             ("\u{1F1E6}\u{308}\u{A}", &["\u{1F1E6}\u{308}", "\u{A}"]),
2629             ("\u{1F1E6}\u{1}", &["\u{1F1E6}", "\u{1}"]),
2630             ("\u{1F1E6}\u{308}\u{1}", &["\u{1F1E6}\u{308}", "\u{1}"]),
2631             ("\u{1F1E6}\u{300}", &["\u{1F1E6}\u{300}"]),
2632             ("\u{1F1E6}\u{308}\u{300}", &["\u{1F1E6}\u{308}\u{300}"]),
2633             ("\u{1F1E6}\u{1100}", &["\u{1F1E6}", "\u{1100}"]),
2634             ("\u{1F1E6}\u{308}\u{1100}", &["\u{1F1E6}\u{308}", "\u{1100}"]),
2635             ("\u{1F1E6}\u{1160}", &["\u{1F1E6}", "\u{1160}"]),
2636             ("\u{1F1E6}\u{308}\u{1160}", &["\u{1F1E6}\u{308}", "\u{1160}"]),
2637             ("\u{1F1E6}\u{11A8}", &["\u{1F1E6}", "\u{11A8}"]),
2638             ("\u{1F1E6}\u{308}\u{11A8}", &["\u{1F1E6}\u{308}", "\u{11A8}"]),
2639             ("\u{1F1E6}\u{AC00}", &["\u{1F1E6}", "\u{AC00}"]),
2640             ("\u{1F1E6}\u{308}\u{AC00}", &["\u{1F1E6}\u{308}", "\u{AC00}"]),
2641             ("\u{1F1E6}\u{AC01}", &["\u{1F1E6}", "\u{AC01}"]),
2642             ("\u{1F1E6}\u{308}\u{AC01}", &["\u{1F1E6}\u{308}", "\u{AC01}"]),
2643             ("\u{1F1E6}\u{1F1E6}", &["\u{1F1E6}\u{1F1E6}"]),
2644             ("\u{1F1E6}\u{308}\u{1F1E6}", &["\u{1F1E6}\u{308}", "\u{1F1E6}"]),
2645             ("\u{1F1E6}\u{378}", &["\u{1F1E6}", "\u{378}"]),
2646             ("\u{1F1E6}\u{308}\u{378}", &["\u{1F1E6}\u{308}", "\u{378}"]),
2647             ("\u{378}\u{20}", &["\u{378}", "\u{20}"]),
2648             ("\u{378}\u{308}\u{20}", &["\u{378}\u{308}", "\u{20}"]),
2649             ("\u{378}\u{D}", &["\u{378}", "\u{D}"]),
2650             ("\u{378}\u{308}\u{D}", &["\u{378}\u{308}", "\u{D}"]),
2651             ("\u{378}\u{A}", &["\u{378}", "\u{A}"]),
2652             ("\u{378}\u{308}\u{A}", &["\u{378}\u{308}", "\u{A}"]),
2653             ("\u{378}\u{1}", &["\u{378}", "\u{1}"]),
2654             ("\u{378}\u{308}\u{1}", &["\u{378}\u{308}", "\u{1}"]),
2655             ("\u{378}\u{300}", &["\u{378}\u{300}"]),
2656             ("\u{378}\u{308}\u{300}", &["\u{378}\u{308}\u{300}"]),
2657             ("\u{378}\u{1100}", &["\u{378}", "\u{1100}"]),
2658             ("\u{378}\u{308}\u{1100}", &["\u{378}\u{308}", "\u{1100}"]),
2659             ("\u{378}\u{1160}", &["\u{378}", "\u{1160}"]),
2660             ("\u{378}\u{308}\u{1160}", &["\u{378}\u{308}", "\u{1160}"]),
2661             ("\u{378}\u{11A8}", &["\u{378}", "\u{11A8}"]),
2662             ("\u{378}\u{308}\u{11A8}", &["\u{378}\u{308}", "\u{11A8}"]),
2663             ("\u{378}\u{AC00}", &["\u{378}", "\u{AC00}"]),
2664             ("\u{378}\u{308}\u{AC00}", &["\u{378}\u{308}", "\u{AC00}"]),
2665             ("\u{378}\u{AC01}", &["\u{378}", "\u{AC01}"]),
2666             ("\u{378}\u{308}\u{AC01}", &["\u{378}\u{308}", "\u{AC01}"]),
2667             ("\u{378}\u{1F1E6}", &["\u{378}", "\u{1F1E6}"]),
2668             ("\u{378}\u{308}\u{1F1E6}", &["\u{378}\u{308}", "\u{1F1E6}"]),
2669             ("\u{378}\u{378}", &["\u{378}", "\u{378}"]),
2670             ("\u{378}\u{308}\u{378}", &["\u{378}\u{308}", "\u{378}"]),
2671             ("\u{61}\u{1F1E6}\u{62}", &["\u{61}", "\u{1F1E6}", "\u{62}"]),
2672             ("\u{1F1F7}\u{1F1FA}", &["\u{1F1F7}\u{1F1FA}"]),
2673             ("\u{1F1F7}\u{1F1FA}\u{1F1F8}", &["\u{1F1F7}\u{1F1FA}\u{1F1F8}"]),
2674             ("\u{1F1F7}\u{1F1FA}\u{1F1F8}\u{1F1EA}",
2675             &["\u{1F1F7}\u{1F1FA}\u{1F1F8}\u{1F1EA}"]),
2676             ("\u{1F1F7}\u{1F1FA}\u{200B}\u{1F1F8}\u{1F1EA}",
2677              &["\u{1F1F7}\u{1F1FA}", "\u{200B}", "\u{1F1F8}\u{1F1EA}"]),
2678             ("\u{1F1E6}\u{1F1E7}\u{1F1E8}", &["\u{1F1E6}\u{1F1E7}\u{1F1E8}"]),
2679             ("\u{1F1E6}\u{200D}\u{1F1E7}\u{1F1E8}", &["\u{1F1E6}\u{200D}",
2680              "\u{1F1E7}\u{1F1E8}"]),
2681             ("\u{1F1E6}\u{1F1E7}\u{200D}\u{1F1E8}",
2682              &["\u{1F1E6}\u{1F1E7}\u{200D}", "\u{1F1E8}"]),
2683             ("\u{20}\u{200D}\u{646}", &["\u{20}\u{200D}", "\u{646}"]),
2684             ("\u{646}\u{200D}\u{20}", &["\u{646}\u{200D}", "\u{20}"]),
2685         ];
2686
2687         let test_diff: [(_, &[_], &[_]); 23] = [
2688             ("\u{20}\u{903}", &["\u{20}\u{903}"], &["\u{20}", "\u{903}"]), ("\u{20}\u{308}\u{903}",
2689             &["\u{20}\u{308}\u{903}"], &["\u{20}\u{308}", "\u{903}"]), ("\u{D}\u{308}\u{903}",
2690             &["\u{D}", "\u{308}\u{903}"], &["\u{D}", "\u{308}", "\u{903}"]), ("\u{A}\u{308}\u{903}",
2691             &["\u{A}", "\u{308}\u{903}"], &["\u{A}", "\u{308}", "\u{903}"]), ("\u{1}\u{308}\u{903}",
2692             &["\u{1}", "\u{308}\u{903}"], &["\u{1}", "\u{308}", "\u{903}"]), ("\u{300}\u{903}",
2693             &["\u{300}\u{903}"], &["\u{300}", "\u{903}"]), ("\u{300}\u{308}\u{903}",
2694             &["\u{300}\u{308}\u{903}"], &["\u{300}\u{308}", "\u{903}"]), ("\u{903}\u{903}",
2695             &["\u{903}\u{903}"], &["\u{903}", "\u{903}"]), ("\u{903}\u{308}\u{903}",
2696             &["\u{903}\u{308}\u{903}"], &["\u{903}\u{308}", "\u{903}"]), ("\u{1100}\u{903}",
2697             &["\u{1100}\u{903}"], &["\u{1100}", "\u{903}"]), ("\u{1100}\u{308}\u{903}",
2698             &["\u{1100}\u{308}\u{903}"], &["\u{1100}\u{308}", "\u{903}"]), ("\u{1160}\u{903}",
2699             &["\u{1160}\u{903}"], &["\u{1160}", "\u{903}"]), ("\u{1160}\u{308}\u{903}",
2700             &["\u{1160}\u{308}\u{903}"], &["\u{1160}\u{308}", "\u{903}"]), ("\u{11A8}\u{903}",
2701             &["\u{11A8}\u{903}"], &["\u{11A8}", "\u{903}"]), ("\u{11A8}\u{308}\u{903}",
2702             &["\u{11A8}\u{308}\u{903}"], &["\u{11A8}\u{308}", "\u{903}"]), ("\u{AC00}\u{903}",
2703             &["\u{AC00}\u{903}"], &["\u{AC00}", "\u{903}"]), ("\u{AC00}\u{308}\u{903}",
2704             &["\u{AC00}\u{308}\u{903}"], &["\u{AC00}\u{308}", "\u{903}"]), ("\u{AC01}\u{903}",
2705             &["\u{AC01}\u{903}"], &["\u{AC01}", "\u{903}"]), ("\u{AC01}\u{308}\u{903}",
2706             &["\u{AC01}\u{308}\u{903}"], &["\u{AC01}\u{308}", "\u{903}"]), ("\u{1F1E6}\u{903}",
2707             &["\u{1F1E6}\u{903}"], &["\u{1F1E6}", "\u{903}"]), ("\u{1F1E6}\u{308}\u{903}",
2708             &["\u{1F1E6}\u{308}\u{903}"], &["\u{1F1E6}\u{308}", "\u{903}"]), ("\u{378}\u{903}",
2709             &["\u{378}\u{903}"], &["\u{378}", "\u{903}"]), ("\u{378}\u{308}\u{903}",
2710             &["\u{378}\u{308}\u{903}"], &["\u{378}\u{308}", "\u{903}"]),
2711         ];
2712
2713         for &(s, g) in test_same.iter() {
2714             // test forward iterator
2715             assert!(order::equals(s.graphemes(true), g.iter().map(|&x| x)));
2716             assert!(order::equals(s.graphemes(false), g.iter().map(|&x| x)));
2717
2718             // test reverse iterator
2719             assert!(order::equals(s.graphemes(true).rev(), g.iter().rev().map(|&x| x)));
2720             assert!(order::equals(s.graphemes(false).rev(), g.iter().rev().map(|&x| x)));
2721         }
2722
2723         for &(s, gt, gf) in test_diff.iter() {
2724             // test forward iterator
2725             assert!(order::equals(s.graphemes(true), gt.iter().map(|&x| x)));
2726             assert!(order::equals(s.graphemes(false), gf.iter().map(|&x| x)));
2727
2728             // test reverse iterator
2729             assert!(order::equals(s.graphemes(true).rev(), gt.iter().rev().map(|&x| x)));
2730             assert!(order::equals(s.graphemes(false).rev(), gf.iter().rev().map(|&x| x)));
2731         }
2732
2733         // test the indices iterators
2734         let s = "a̐éö̲\r\n";
2735         let gr_inds = s.grapheme_indices(true).collect::<Vec<(uint, &str)>>();
2736         let b: &[_] = &[(0u, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
2737         assert_eq!(gr_inds, b);
2738         let gr_inds = s.grapheme_indices(true).rev().collect::<Vec<(uint, &str)>>();
2739         let b: &[_] = &[(11, "\r\n"), (6, "ö̲"), (3, "é"), (0u, "a̐")];
2740         assert_eq!(gr_inds, b);
2741         let mut gr_inds_iter = s.grapheme_indices(true);
2742         {
2743             let gr_inds = gr_inds_iter.by_ref();
2744             let e1 = gr_inds.size_hint();
2745             assert_eq!(e1, (1, Some(13)));
2746             let c = gr_inds.count();
2747             assert_eq!(c, 4);
2748         }
2749         let e2 = gr_inds_iter.size_hint();
2750         assert_eq!(e2, (0, Some(0)));
2751
2752         // make sure the reverse iterator does the right thing with "\n" at beginning of string
2753         let s = "\n\r\n\r";
2754         let gr = s.graphemes(true).rev().collect::<Vec<&str>>();
2755         let b: &[_] = &["\r", "\r\n", "\n"];
2756         assert_eq!(gr, b);
2757     }
2758
2759     #[test]
2760     fn test_split_strator() {
2761         fn t(s: &str, sep: &str, u: &[&str]) {
2762             let v: Vec<&str> = s.split_str(sep).collect();
2763             assert_eq!(v, u);
2764         }
2765         t("--1233345--", "12345", &["--1233345--"]);
2766         t("abc::hello::there", "::", &["abc", "hello", "there"]);
2767         t("::hello::there", "::", &["", "hello", "there"]);
2768         t("hello::there::", "::", &["hello", "there", ""]);
2769         t("::hello::there::", "::", &["", "hello", "there", ""]);
2770         t("ประเทศไทย中华Việt Nam", "中华", &["ประเทศไทย", "Việt Nam"]);
2771         t("zzXXXzzYYYzz", "zz", &["", "XXX", "YYY", ""]);
2772         t("zzXXXzYYYz", "XXX", &["zz", "zYYYz"]);
2773         t(".XXX.YYY.", ".", &["", "XXX", "YYY", ""]);
2774         t("", ".", &[""]);
2775         t("zz", "zz", &["",""]);
2776         t("ok", "z", &["ok"]);
2777         t("zzz", "zz", &["","z"]);
2778         t("zzzzz", "zz", &["","","z"]);
2779     }
2780
2781     #[test]
2782     fn test_str_default() {
2783         use core::default::Default;
2784         fn t<S: Default + Str>() {
2785             let s: S = Default::default();
2786             assert_eq!(s.as_slice(), "");
2787         }
2788
2789         t::<&str>();
2790         t::<String>();
2791     }
2792
2793     #[test]
2794     fn test_str_container() {
2795         fn sum_len(v: &[&str]) -> uint {
2796             v.iter().map(|x| x.len()).sum()
2797         }
2798
2799         let s = String::from_str("01234");
2800         assert_eq!(5, sum_len(&["012", "", "34"]));
2801         assert_eq!(5, sum_len(&[String::from_str("01").as_slice(),
2802                                 String::from_str("2").as_slice(),
2803                                 String::from_str("34").as_slice(),
2804                                 String::from_str("").as_slice()]));
2805         assert_eq!(5, sum_len(&[s.as_slice()]));
2806     }
2807
2808     #[test]
2809     fn test_str_from_utf8() {
2810         let xs = b"hello";
2811         assert_eq!(from_utf8(xs), Ok("hello"));
2812
2813         let xs = "ศไทย中华Việt Nam".as_bytes();
2814         assert_eq!(from_utf8(xs), Ok("ศไทย中华Việt Nam"));
2815
2816         let xs = b"hello\xFF";
2817         assert_eq!(from_utf8(xs), Err(Utf8Error::TooShort));
2818     }
2819 }
2820
2821 #[cfg(test)]
2822 mod bench {
2823     use super::*;
2824     use prelude::{SliceExt, IteratorExt, SliceConcatExt};
2825     use test::Bencher;
2826     use test::black_box;
2827
2828     #[bench]
2829     fn char_iterator(b: &mut Bencher) {
2830         let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2831
2832         b.iter(|| s.chars().count());
2833     }
2834
2835     #[bench]
2836     fn char_iterator_for(b: &mut Bencher) {
2837         let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2838
2839         b.iter(|| {
2840             for ch in s.chars() { black_box(ch) }
2841         });
2842     }
2843
2844     #[bench]
2845     fn char_iterator_ascii(b: &mut Bencher) {
2846         let s = "Mary had a little lamb, Little lamb
2847         Mary had a little lamb, Little lamb
2848         Mary had a little lamb, Little lamb
2849         Mary had a little lamb, Little lamb
2850         Mary had a little lamb, Little lamb
2851         Mary had a little lamb, Little lamb";
2852
2853         b.iter(|| s.chars().count());
2854     }
2855
2856     #[bench]
2857     fn char_iterator_rev(b: &mut Bencher) {
2858         let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2859
2860         b.iter(|| s.chars().rev().count());
2861     }
2862
2863     #[bench]
2864     fn char_iterator_rev_for(b: &mut Bencher) {
2865         let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2866
2867         b.iter(|| {
2868             for ch in s.chars().rev() { black_box(ch) }
2869         });
2870     }
2871
2872     #[bench]
2873     fn char_indicesator(b: &mut Bencher) {
2874         let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2875         let len = s.chars().count();
2876
2877         b.iter(|| assert_eq!(s.char_indices().count(), len));
2878     }
2879
2880     #[bench]
2881     fn char_indicesator_rev(b: &mut Bencher) {
2882         let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2883         let len = s.chars().count();
2884
2885         b.iter(|| assert_eq!(s.char_indices().rev().count(), len));
2886     }
2887
2888     #[bench]
2889     fn split_unicode_ascii(b: &mut Bencher) {
2890         let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2891
2892         b.iter(|| assert_eq!(s.split('V').count(), 3));
2893     }
2894
2895     #[bench]
2896     fn split_unicode_not_ascii(b: &mut Bencher) {
2897         struct NotAscii(char);
2898         impl CharEq for NotAscii {
2899             fn matches(&mut self, c: char) -> bool {
2900                 let NotAscii(cc) = *self;
2901                 cc == c
2902             }
2903             fn only_ascii(&self) -> bool { false }
2904         }
2905         let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2906
2907         b.iter(|| assert_eq!(s.split(NotAscii('V')).count(), 3));
2908     }
2909
2910
2911     #[bench]
2912     fn split_ascii(b: &mut Bencher) {
2913         let s = "Mary had a little lamb, Little lamb, little-lamb.";
2914         let len = s.split(' ').count();
2915
2916         b.iter(|| assert_eq!(s.split(' ').count(), len));
2917     }
2918
2919     #[bench]
2920     fn split_not_ascii(b: &mut Bencher) {
2921         struct NotAscii(char);
2922         impl CharEq for NotAscii {
2923             #[inline]
2924             fn matches(&mut self, c: char) -> bool {
2925                 let NotAscii(cc) = *self;
2926                 cc == c
2927             }
2928             fn only_ascii(&self) -> bool { false }
2929         }
2930         let s = "Mary had a little lamb, Little lamb, little-lamb.";
2931         let len = s.split(' ').count();
2932
2933         b.iter(|| assert_eq!(s.split(NotAscii(' ')).count(), len));
2934     }
2935
2936     #[bench]
2937     fn split_extern_fn(b: &mut Bencher) {
2938         let s = "Mary had a little lamb, Little lamb, little-lamb.";
2939         let len = s.split(' ').count();
2940         fn pred(c: char) -> bool { c == ' ' }
2941
2942         b.iter(|| assert_eq!(s.split(pred).count(), len));
2943     }
2944
2945     #[bench]
2946     fn split_closure(b: &mut Bencher) {
2947         let s = "Mary had a little lamb, Little lamb, little-lamb.";
2948         let len = s.split(' ').count();
2949
2950         b.iter(|| assert_eq!(s.split(|&: c: char| c == ' ').count(), len));
2951     }
2952
2953     #[bench]
2954     fn split_slice(b: &mut Bencher) {
2955         let s = "Mary had a little lamb, Little lamb, little-lamb.";
2956         let len = s.split(' ').count();
2957
2958         let c: &[char] = &[' '];
2959         b.iter(|| assert_eq!(s.split(c).count(), len));
2960     }
2961
2962     #[bench]
2963     fn bench_connect(b: &mut Bencher) {
2964         let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2965         let sep = "→";
2966         let v = vec![s, s, s, s, s, s, s, s, s, s];
2967         b.iter(|| {
2968             assert_eq!(v.connect(sep).len(), s.len() * 10 + sep.len() * 9);
2969         })
2970     }
2971
2972     #[bench]
2973     fn bench_contains_short_short(b: &mut Bencher) {
2974         let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2975         let needle = "sit";
2976
2977         b.iter(|| {
2978             assert!(haystack.contains(needle));
2979         })
2980     }
2981
2982     #[bench]
2983     fn bench_contains_short_long(b: &mut Bencher) {
2984         let haystack = "\
2985 Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \
2986 ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \
2987 eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \
2988 sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \
2989 tempus vel, gravida nec quam.
2990
2991 In est dui, tincidunt sed tempus interdum, adipiscing laoreet ante. Etiam tempor, tellus quis \
2992 sagittis interdum, nulla purus mattis sem, quis auctor erat odio ac tellus. In nec nunc sit amet \
2993 diam volutpat molestie at sed ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan \
2994 lorem ac dignissim placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat. In vel \
2995 eleifend felis. Sed suscipit nulla lorem, sed mollis est sollicitudin et. Nam fermentum egestas \
2996 interdum. Curabitur ut nisi justo.
2997
2998 Sed sollicitudin ipsum tellus, ut condimentum leo eleifend nec. Cras ut velit ante. Phasellus nec \
2999 mollis odio. Mauris molestie erat in arcu mattis, at aliquet dolor vehicula. Quisque malesuada \
3000 lectus sit amet nisi pretium, a condimentum ipsum porta. Morbi at dapibus diam. Praesent egestas \
3001 est sed risus elementum, eu rutrum metus ultrices. Etiam fermentum consectetur magna, id rutrum \
3002 felis accumsan a. Aliquam ut pellentesque libero. Sed mi nulla, lobortis eu tortor id, suscipit \
3003 ultricies neque. Morbi iaculis sit amet risus at iaculis. Praesent eget ligula quis turpis \
3004 feugiat suscipit vel non arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. \
3005 Aliquam sit amet placerat lorem.
3006
3007 Cras a lacus vel ante posuere elementum. Nunc est leo, bibendum ut facilisis vel, bibendum at \
3008 mauris. Nullam adipiscing diam vel odio ornare, luctus adipiscing mi luctus. Nulla facilisi. \
3009 Mauris adipiscing bibendum neque, quis adipiscing lectus tempus et. Sed feugiat erat et nisl \
3010 lobortis pharetra. Donec vitae erat enim. Nullam sit amet felis et quam lacinia tincidunt. Aliquam \
3011 suscipit dapibus urna. Sed volutpat urna in magna pulvinar volutpat. Phasellus nec tellus ac diam \
3012 cursus accumsan.
3013
3014 Nam lectus enim, dapibus non nisi tempor, consectetur convallis massa. Maecenas eleifend dictum \
3015 feugiat. Etiam quis mauris vel risus luctus mattis a a nunc. Nullam orci quam, imperdiet id \
3016 vehicula in, porttitor ut nibh. Duis sagittis adipiscing nisl vitae congue. Donec mollis risus eu \
3017 leo suscipit, varius porttitor nulla porta. Pellentesque ut sem nec nisi euismod vehicula. Nulla \
3018 malesuada sollicitudin quam eu fermentum.";
3019         let needle = "english";
3020
3021         b.iter(|| {
3022             assert!(!haystack.contains(needle));
3023         })
3024     }
3025
3026     #[bench]
3027     fn bench_contains_bad_naive(b: &mut Bencher) {
3028         let haystack = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
3029         let needle = "aaaaaaaab";
3030
3031         b.iter(|| {
3032             assert!(!haystack.contains(needle));
3033         })
3034     }
3035
3036     #[bench]
3037     fn bench_contains_equal(b: &mut Bencher) {
3038         let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
3039         let needle = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
3040
3041         b.iter(|| {
3042             assert!(haystack.contains(needle));
3043         })
3044     }
3045 }