src/libcollections/str.rs

   1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10 //
  11 // ignore-lexer-test FIXME #15679
  12
  13 //! Unicode string manipulation (`str` type)
  14 //!
  15 //! # Basic Usage
  16 //!
  17 //! Rust's string type is one of the core primitive types of the language. While
  18 //! represented by the name `str`, the name `str` is not actually a valid type in
  19 //! Rust. Each string must also be decorated with a pointer. `String` is used
  20 //! for an owned string, so there is only one commonly-used `str` type in Rust:
  21 //! `&str`.
  22 //!
  23 //! `&str` is the borrowed string type. This type of string can only be created
  24 //! from other strings, unless it is a static string (see below). As the word
  25 //! "borrowed" implies, this type of string is owned elsewhere, and this string
  26 //! cannot be moved out of.
  27 //!
  28 //! As an example, here's some code that uses a string.
  29 //!
  30 //! ```rust
  31 //! fn main() {
  32 //!     let borrowed_string = "This string is borrowed with the 'static lifetime";
  33 //! }
  34 //! ```
  35 //!
  36 //! From the example above, you can guess that Rust's string literals have the
  37 //! `'static` lifetime. This is akin to C's concept of a static string.
  38 //! More precisely, string literals are immutable views with a 'static lifetime
  39 //! (otherwise known as the lifetime of the entire program), and thus have the
  40 //! type `&'static str`.
  41 //!
  42 //! # Representation
  43 //!
  44 //! Rust's string type, `str`, is a sequence of Unicode scalar values encoded as a
  45 //! stream of UTF-8 bytes. All [strings](../../reference.html#literals) are
  46 //! guaranteed to be validly encoded UTF-8 sequences. Additionally, strings are
  47 //! not null-terminated and can thus contain null bytes.
  48 //!
  49 //! The actual representation of strings have direct mappings to slices: `&str`
  50 //! is the same as `&[u8]`.
  51
  52 #![doc(primitive = "str")]
  53
  54 use self::RecompositionState::*;
  55 use self::DecompositionType::*;
  56
  57 use core::borrow::{BorrowFrom, ToOwned};
  58 use core::char::Char;
  59 use core::clone::Clone;
  60 use core::iter::AdditiveIterator;
  61 use core::iter::{range, Iterator, IteratorExt};
  62 use core::kinds::Sized;
  63 use core::ops;
  64 use core::option::Option::{self, Some, None};
  65 use core::slice::AsSlice;
  66 use core::str as core_str;
  67 use unicode::str::{UnicodeStr, Utf16Encoder};
  68
  69 use ring_buf::RingBuf;
  70 use slice::SliceExt;
  71 use string::String;
  72 use unicode;
  73 use vec::Vec;
  74 use slice::SliceConcatExt;
  75
  76 pub use core::str::{FromStr, Utf8Error, Str};
  77 pub use core::str::{Lines, LinesAny, MatchIndices, SplitStr, CharRange};
  78 pub use core::str::{Split, SplitTerminator};
  79 pub use core::str::{SplitN, RSplitN};
  80 pub use core::str::{from_utf8, CharEq, Chars, CharIndices, Bytes};
  81 pub use core::str::{from_utf8_unchecked, from_c_str};
  82 pub use unicode::str::{Words, Graphemes, GraphemeIndices};
  83
  84 /*
  85 Section: Creating a string
  86 */
  87
  88 impl<S: Str> SliceConcatExt<str, String> for [S] {
  89     fn concat(&self) -> String {
  90         let s = self.as_slice();
  91
  92         if s.is_empty() {
  93             return String::new();
  94         }
  95
  96         // `len` calculation may overflow but push_str will check boundaries
  97         let len = s.iter().map(|s| s.as_slice().len()).sum();
  98         let mut result = String::with_capacity(len);
  99
 100         for s in s.iter() {
 101             result.push_str(s.as_slice())
 102         }
 103
 104         result
 105     }
 106
 107     fn connect(&self, sep: &str) -> String {
 108         let s = self.as_slice();
 109
 110         if s.is_empty() {
 111             return String::new();
 112         }
 113
 114         // concat is faster
 115         if sep.is_empty() {
 116             return s.concat();
 117         }
 118
 119         // this is wrong without the guarantee that `self` is non-empty
 120         // `len` calculation may overflow but push_str but will check boundaries
 121         let len = sep.len() * (s.len() - 1)
 122             + s.iter().map(|s| s.as_slice().len()).sum();
 123         let mut result = String::with_capacity(len);
 124         let mut first = true;
 125
 126         for s in s.iter() {
 127             if first {
 128                 first = false;
 129             } else {
 130                 result.push_str(sep);
 131             }
 132             result.push_str(s.as_slice());
 133         }
 134         result
 135     }
 136 }
 137
 138 /*
 139 Section: Iterators
 140 */
 141
 142 // Helper functions used for Unicode normalization
 143 fn canonical_sort(comb: &mut [(char, u8)]) {
 144     let len = comb.len();
 145     for i in range(0, len) {
 146         let mut swapped = false;
 147         for j in range(1, len-i) {
 148             let class_a = comb[j-1].1;
 149             let class_b = comb[j].1;
 150             if class_a != 0 && class_b != 0 && class_a > class_b {
 151                 comb.swap(j-1, j);
 152                 swapped = true;
 153             }
 154         }
 155         if !swapped { break; }
 156     }
 157 }
 158
 159 #[derive(Clone)]
 160 enum DecompositionType {
 161     Canonical,
 162     Compatible
 163 }
 164
 165 /// External iterator for a string's decomposition's characters.
 166 /// Use with the `std::iter` module.
 167 #[derive(Clone)]
 168 pub struct Decompositions<'a> {
 169     kind: DecompositionType,
 170     iter: Chars<'a>,
 171     buffer: Vec<(char, u8)>,
 172     sorted: bool
 173 }
 174
 175 impl<'a> Iterator for Decompositions<'a> {
 176     type Item = char;
 177
 178     #[inline]
 179     fn next(&mut self) -> Option<char> {
 180         match self.buffer.first() {
 181             Some(&(c, 0)) => {
 182                 self.sorted = false;
 183                 self.buffer.remove(0);
 184                 return Some(c);
 185             }
 186             Some(&(c, _)) if self.sorted => {
 187                 self.buffer.remove(0);
 188                 return Some(c);
 189             }
 190             _ => self.sorted = false
 191         }
 192
 193         if !self.sorted {
 194             for ch in self.iter {
 195                 let buffer = &mut self.buffer;
 196                 let sorted = &mut self.sorted;
 197                 {
 198                     let callback = |&mut: d| {
 199                         let class =
 200                             unicode::char::canonical_combining_class(d);
 201                         if class == 0 && !*sorted {
 202                             canonical_sort(buffer.as_mut_slice());
 203                             *sorted = true;
 204                         }
 205                         buffer.push((d, class));
 206                     };
 207                     match self.kind {
 208                         Canonical => {
 209                             unicode::char::decompose_canonical(ch, callback)
 210                         }
 211                         Compatible => {
 212                             unicode::char::decompose_compatible(ch, callback)
 213                         }
 214                     }
 215                 }
 216                 if *sorted {
 217                     break
 218                 }
 219             }
 220         }
 221
 222         if !self.sorted {
 223             canonical_sort(self.buffer.as_mut_slice());
 224             self.sorted = true;
 225         }
 226
 227         if self.buffer.is_empty() {
 228             None
 229         } else {
 230             match self.buffer.remove(0) {
 231                 (c, 0) => {
 232                     self.sorted = false;
 233                     Some(c)
 234                 }
 235                 (c, _) => Some(c),
 236             }
 237         }
 238     }
 239
 240     fn size_hint(&self) -> (uint, Option<uint>) {
 241         let (lower, _) = self.iter.size_hint();
 242         (lower, None)
 243     }
 244 }
 245
 246 #[derive(Clone)]
 247 enum RecompositionState {
 248     Composing,
 249     Purging,
 250     Finished
 251 }
 252
 253 /// External iterator for a string's recomposition's characters.
 254 /// Use with the `std::iter` module.
 255 #[derive(Clone)]
 256 pub struct Recompositions<'a> {
 257     iter: Decompositions<'a>,
 258     state: RecompositionState,
 259     buffer: RingBuf<char>,
 260     composee: Option<char>,
 261     last_ccc: Option<u8>
 262 }
 263
 264 impl<'a> Iterator for Recompositions<'a> {
 265     type Item = char;
 266
 267     #[inline]
 268     fn next(&mut self) -> Option<char> {
 269         loop {
 270             match self.state {
 271                 Composing => {
 272                     for ch in self.iter {
 273                         let ch_class = unicode::char::canonical_combining_class(ch);
 274                         if self.composee.is_none() {
 275                             if ch_class != 0 {
 276                                 return Some(ch);
 277                             }
 278                             self.composee = Some(ch);
 279                             continue;
 280                         }
 281                         let k = self.composee.clone().unwrap();
 282
 283                         match self.last_ccc {
 284                             None => {
 285                                 match unicode::char::compose(k, ch) {
 286                                     Some(r) => {
 287                                         self.composee = Some(r);
 288                                         continue;
 289                                     }
 290                                     None => {
 291                                         if ch_class == 0 {
 292                                             self.composee = Some(ch);
 293                                             return Some(k);
 294                                         }
 295                                         self.buffer.push_back(ch);
 296                                         self.last_ccc = Some(ch_class);
 297                                     }
 298                                 }
 299                             }
 300                             Some(l_class) => {
 301                                 if l_class >= ch_class {
 302                                     // `ch` is blocked from `composee`
 303                                     if ch_class == 0 {
 304                                         self.composee = Some(ch);
 305                                         self.last_ccc = None;
 306                                         self.state = Purging;
 307                                         return Some(k);
 308                                     }
 309                                     self.buffer.push_back(ch);
 310                                     self.last_ccc = Some(ch_class);
 311                                     continue;
 312                                 }
 313                                 match unicode::char::compose(k, ch) {
 314                                     Some(r) => {
 315                                         self.composee = Some(r);
 316                                         continue;
 317                                     }
 318                                     None => {
 319                                         self.buffer.push_back(ch);
 320                                         self.last_ccc = Some(ch_class);
 321                                     }
 322                                 }
 323                             }
 324                         }
 325                     }
 326                     self.state = Finished;
 327                     if self.composee.is_some() {
 328                         return self.composee.take();
 329                     }
 330                 }
 331                 Purging => {
 332                     match self.buffer.pop_front() {
 333                         None => self.state = Composing,
 334                         s => return s
 335                     }
 336                 }
 337                 Finished => {
 338                     match self.buffer.pop_front() {
 339                         None => return self.composee.take(),
 340                         s => return s
 341                     }
 342                 }
 343             }
 344         }
 345     }
 346 }
 347
 348 /// External iterator for a string's UTF16 codeunits.
 349 /// Use with the `std::iter` module.
 350 #[derive(Clone)]
 351 pub struct Utf16Units<'a> {
 352     encoder: Utf16Encoder<Chars<'a>>
 353 }
 354
 355 impl<'a> Iterator for Utf16Units<'a> {
 356     type Item = u16;
 357
 358     #[inline]
 359     fn next(&mut self) -> Option<u16> { self.encoder.next() }
 360
 361     #[inline]
 362     fn size_hint(&self) -> (uint, Option<uint>) { self.encoder.size_hint() }
 363 }
 364
 365 /*
 366 Section: Misc
 367 */
 368
 369 // Return the initial codepoint accumulator for the first byte.
 370 // The first byte is special, only want bottom 5 bits for width 2, 4 bits
 371 // for width 3, and 3 bits for width 4
 372 macro_rules! utf8_first_byte {
 373     ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as u32)
 374 }
 375
 376 // return the value of $ch updated with continuation byte $byte
 377 macro_rules! utf8_acc_cont_byte {
 378     ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as u32)
 379 }
 380
 381 #[unstable = "trait is unstable"]
 382 impl BorrowFrom<String> for str {
 383     fn borrow_from(owned: &String) -> &str { owned[] }
 384 }
 385
 386 #[unstable = "trait is unstable"]
 387 impl ToOwned<String> for str {
 388     fn to_owned(&self) -> String {
 389         unsafe {
 390             String::from_utf8_unchecked(self.as_bytes().to_owned())
 391         }
 392     }
 393 }
 394
 395 /*
 396 Section: CowString
 397 */
 398
 399 /*
 400 Section: Trait implementations
 401 */
 402
 403 /// Any string that can be represented as a slice.
 404 pub trait StrExt for Sized?: ops::Slice<uint, str> {
 405     /// Escapes each char in `s` with `char::escape_default`.
 406     #[unstable = "return type may change to be an iterator"]
 407     fn escape_default(&self) -> String {
 408         self.chars().flat_map(|c| c.escape_default()).collect()
 409     }
 410
 411     /// Escapes each char in `s` with `char::escape_unicode`.
 412     #[unstable = "return type may change to be an iterator"]
 413     fn escape_unicode(&self) -> String {
 414         self.chars().flat_map(|c| c.escape_unicode()).collect()
 415     }
 416
 417     /// Replaces all occurrences of one string with another.
 418     ///
 419     /// # Arguments
 420     ///
 421     /// * `from` - The string to replace
 422     /// * `to` - The replacement string
 423     ///
 424     /// # Return value
 425     ///
 426     /// The original string with all occurrences of `from` replaced with `to`.
 427     ///
 428     /// # Examples
 429     ///
 430     /// ```rust
 431     /// let s = "Do you know the muffin man,
 432     /// The muffin man, the muffin man, ...".to_string();
 433     ///
 434     /// assert_eq!(s.replace("muffin man", "little lamb"),
 435     ///            "Do you know the little lamb,
 436     /// The little lamb, the little lamb, ...".to_string());
 437     ///
 438     /// // not found, so no change.
 439     /// assert_eq!(s.replace("cookie monster", "little lamb"), s);
 440     /// ```
 441     #[stable]
 442     fn replace(&self, from: &str, to: &str) -> String {
 443         let mut result = String::new();
 444         let mut last_end = 0;
 445         for (start, end) in self.match_indices(from) {
 446             result.push_str(unsafe { self.slice_unchecked(last_end, start) });
 447             result.push_str(to);
 448             last_end = end;
 449         }
 450         result.push_str(unsafe { self.slice_unchecked(last_end, self.len()) });
 451         result
 452     }
 453
 454     /// Returns an iterator over the string in Unicode Normalization Form D
 455     /// (canonical decomposition).
 456     #[inline]
 457     #[unstable = "this functionality may be moved to libunicode"]
 458     fn nfd_chars<'a>(&'a self) -> Decompositions<'a> {
 459         Decompositions {
 460             iter: self[].chars(),
 461             buffer: Vec::new(),
 462             sorted: false,
 463             kind: Canonical
 464         }
 465     }
 466
 467     /// Returns an iterator over the string in Unicode Normalization Form KD
 468     /// (compatibility decomposition).
 469     #[inline]
 470     #[unstable = "this functionality may be moved to libunicode"]
 471     fn nfkd_chars<'a>(&'a self) -> Decompositions<'a> {
 472         Decompositions {
 473             iter: self[].chars(),
 474             buffer: Vec::new(),
 475             sorted: false,
 476             kind: Compatible
 477         }
 478     }
 479
 480     /// An Iterator over the string in Unicode Normalization Form C
 481     /// (canonical decomposition followed by canonical composition).
 482     #[inline]
 483     #[unstable = "this functionality may be moved to libunicode"]
 484     fn nfc_chars<'a>(&'a self) -> Recompositions<'a> {
 485         Recompositions {
 486             iter: self.nfd_chars(),
 487             state: Composing,
 488             buffer: RingBuf::new(),
 489             composee: None,
 490             last_ccc: None
 491         }
 492     }
 493
 494     /// An Iterator over the string in Unicode Normalization Form KC
 495     /// (compatibility decomposition followed by canonical composition).
 496     #[inline]
 497     #[unstable = "this functionality may be moved to libunicode"]
 498     fn nfkc_chars<'a>(&'a self) -> Recompositions<'a> {
 499         Recompositions {
 500             iter: self.nfkd_chars(),
 501             state: Composing,
 502             buffer: RingBuf::new(),
 503             composee: None,
 504             last_ccc: None
 505         }
 506     }
 507
 508     /// Returns true if a string contains a string pattern.
 509     ///
 510     /// # Arguments
 511     ///
 512     /// - pat - The string pattern to look for
 513     ///
 514     /// # Example
 515     ///
 516     /// ```rust
 517     /// assert!("bananas".contains("nana"));
 518     /// ```
 519     #[stable]
 520     fn contains(&self, pat: &str) -> bool {
 521         core_str::StrExt::contains(self[], pat)
 522     }
 523
 524     /// Returns true if a string contains a char pattern.
 525     ///
 526     /// # Arguments
 527     ///
 528     /// - pat - The char pattern to look for
 529     ///
 530     /// # Example
 531     ///
 532     /// ```rust
 533     /// assert!("hello".contains_char('e'));
 534     /// ```
 535     #[unstable = "might get removed in favour of a more generic contains()"]
 536     fn contains_char<P: CharEq>(&self, pat: P) -> bool {
 537         core_str::StrExt::contains_char(self[], pat)
 538     }
 539
 540     /// An iterator over the characters of `self`. Note, this iterates
 541     /// over Unicode code-points, not Unicode graphemes.
 542     ///
 543     /// # Example
 544     ///
 545     /// ```rust
 546     /// let v: Vec<char> = "abc åäö".chars().collect();
 547     /// assert_eq!(v, vec!['a', 'b', 'c', ' ', 'å', 'ä', 'ö']);
 548     /// ```
 549     #[stable]
 550     fn chars(&self) -> Chars {
 551         core_str::StrExt::chars(self[])
 552     }
 553
 554     /// An iterator over the bytes of `self`
 555     ///
 556     /// # Example
 557     ///
 558     /// ```rust
 559     /// let v: Vec<u8> = "bors".bytes().collect();
 560     /// assert_eq!(v, b"bors".to_vec());
 561     /// ```
 562     #[stable]
 563     fn bytes(&self) -> Bytes {
 564         core_str::StrExt::bytes(self[])
 565     }
 566
 567     /// An iterator over the characters of `self` and their byte offsets.
 568     #[stable]
 569     fn char_indices(&self) -> CharIndices {
 570         core_str::StrExt::char_indices(self[])
 571     }
 572
 573     /// An iterator over substrings of `self`, separated by characters
 574     /// matched by the pattern `pat`.
 575     ///
 576     /// # Example
 577     ///
 578     /// ```rust
 579     /// let v: Vec<&str> = "Mary had a little lamb".split(' ').collect();
 580     /// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]);
 581     ///
 582     /// let v: Vec<&str> = "abc1def2ghi".split(|&: c: char| c.is_numeric()).collect();
 583     /// assert_eq!(v, vec!["abc", "def", "ghi"]);
 584     ///
 585     /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').collect();
 586     /// assert_eq!(v, vec!["lion", "", "tiger", "leopard"]);
 587     ///
 588     /// let v: Vec<&str> = "".split('X').collect();
 589     /// assert_eq!(v, vec![""]);
 590     /// ```
 591     #[stable]
 592     fn split<P: CharEq>(&self, pat: P) -> Split<P> {
 593         core_str::StrExt::split(self[], pat)
 594     }
 595
 596     /// An iterator over substrings of `self`, separated by characters
 597     /// matched by the pattern `pat`, restricted to splitting at most `count`
 598     /// times.
 599     ///
 600     /// # Example
 601     ///
 602     /// ```rust
 603     /// let v: Vec<&str> = "Mary had a little lambda".splitn(2, ' ').collect();
 604     /// assert_eq!(v, vec!["Mary", "had", "a little lambda"]);
 605     ///
 606     /// let v: Vec<&str> = "abc1def2ghi".splitn(1, |&: c: char| c.is_numeric()).collect();
 607     /// assert_eq!(v, vec!["abc", "def2ghi"]);
 608     ///
 609     /// let v: Vec<&str> = "lionXXtigerXleopard".splitn(2, 'X').collect();
 610     /// assert_eq!(v, vec!["lion", "", "tigerXleopard"]);
 611     ///
 612     /// let v: Vec<&str> = "abcXdef".splitn(0, 'X').collect();
 613     /// assert_eq!(v, vec!["abcXdef"]);
 614     ///
 615     /// let v: Vec<&str> = "".splitn(1, 'X').collect();
 616     /// assert_eq!(v, vec![""]);
 617     /// ```
 618     #[stable]
 619     fn splitn<P: CharEq>(&self, count: uint, pat: P) -> SplitN<P> {
 620         core_str::StrExt::splitn(self[], count, pat)
 621     }
 622
 623     /// An iterator over substrings of `self`, separated by characters
 624     /// matched by the pattern `pat`.
 625     ///
 626     /// Equivalent to `split`, except that the trailing substring
 627     /// is skipped if empty (terminator semantics).
 628     ///
 629     /// # Example
 630     ///
 631     /// ```rust
 632     /// let v: Vec<&str> = "A.B.".split_terminator('.').collect();
 633     /// assert_eq!(v, vec!["A", "B"]);
 634     ///
 635     /// let v: Vec<&str> = "A..B..".split_terminator('.').collect();
 636     /// assert_eq!(v, vec!["A", "", "B", ""]);
 637     ///
 638     /// let v: Vec<&str> = "Mary had a little lamb".split(' ').rev().collect();
 639     /// assert_eq!(v, vec!["lamb", "little", "a", "had", "Mary"]);
 640     ///
 641     /// let v: Vec<&str> = "abc1def2ghi".split(|&: c: char| c.is_numeric()).rev().collect();
 642     /// assert_eq!(v, vec!["ghi", "def", "abc"]);
 643     ///
 644     /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').rev().collect();
 645     /// assert_eq!(v, vec!["leopard", "tiger", "", "lion"]);
 646     /// ```
 647     #[unstable = "might get removed"]
 648     fn split_terminator<P: CharEq>(&self, pat: P) -> SplitTerminator<P> {
 649         core_str::StrExt::split_terminator(self[], pat)
 650     }
 651
 652     /// An iterator over substrings of `self`, separated by characters
 653     /// matched by the pattern `pat`, starting from the end of the string.
 654     /// Restricted to splitting at most `count` times.
 655     ///
 656     /// # Example
 657     ///
 658     /// ```rust
 659     /// let v: Vec<&str> = "Mary had a little lamb".rsplitn(2, ' ').collect();
 660     /// assert_eq!(v, vec!["lamb", "little", "Mary had a"]);
 661     ///
 662     /// let v: Vec<&str> = "abc1def2ghi".rsplitn(1, |&: c: char| c.is_numeric()).collect();
 663     /// assert_eq!(v, vec!["ghi", "abc1def"]);
 664     ///
 665     /// let v: Vec<&str> = "lionXXtigerXleopard".rsplitn(2, 'X').collect();
 666     /// assert_eq!(v, vec!["leopard", "tiger", "lionX"]);
 667     /// ```
 668     #[stable]
 669     fn rsplitn<P: CharEq>(&self, count: uint, pat: P) -> RSplitN<P> {
 670         core_str::StrExt::rsplitn(self[], count, pat)
 671     }
 672
 673     /// An iterator over the start and end indices of the disjoint
 674     /// matches of the pattern `pat` within `self`.
 675     ///
 676     /// That is, each returned value `(start, end)` satisfies
 677     /// `self.slice(start, end) == sep`. For matches of `sep` within
 678     /// `self` that overlap, only the indices corresponding to the
 679     /// first match are returned.
 680     ///
 681     /// # Example
 682     ///
 683     /// ```rust
 684     /// let v: Vec<(uint, uint)> = "abcXXXabcYYYabc".match_indices("abc").collect();
 685     /// assert_eq!(v, vec![(0,3), (6,9), (12,15)]);
 686     ///
 687     /// let v: Vec<(uint, uint)> = "1abcabc2".match_indices("abc").collect();
 688     /// assert_eq!(v, vec![(1,4), (4,7)]);
 689     ///
 690     /// let v: Vec<(uint, uint)> = "ababa".match_indices("aba").collect();
 691     /// assert_eq!(v, vec![(0, 3)]); // only the first `aba`
 692     /// ```
 693     #[unstable = "might have its iterator type changed"]
 694     fn match_indices<'a>(&'a self, pat: &'a str) -> MatchIndices<'a> {
 695         core_str::StrExt::match_indices(self[], pat)
 696     }
 697
 698     /// An iterator over the substrings of `self` separated by the pattern `sep`.
 699     ///
 700     /// # Example
 701     ///
 702     /// ```rust
 703     /// let v: Vec<&str> = "abcXXXabcYYYabc".split_str("abc").collect();
 704     /// assert_eq!(v, vec!["", "XXX", "YYY", ""]);
 705     ///
 706     /// let v: Vec<&str> = "1abcabc2".split_str("abc").collect();
 707     /// assert_eq!(v, vec!["1", "", "2"]);
 708     /// ```
 709     #[unstable = "might get removed in the future in favor of a more generic split()"]
 710     fn split_str<'a>(&'a self, pat: &'a str) -> SplitStr<'a> {
 711         core_str::StrExt::split_str(self[], pat)
 712     }
 713
 714     /// An iterator over the lines of a string (subsequences separated
 715     /// by `\n`). This does not include the empty string after a
 716     /// trailing `\n`.
 717     ///
 718     /// # Example
 719     ///
 720     /// ```rust
 721     /// let four_lines = "foo\nbar\n\nbaz\n";
 722     /// let v: Vec<&str> = four_lines.lines().collect();
 723     /// assert_eq!(v, vec!["foo", "bar", "", "baz"]);
 724     /// ```
 725     #[stable]
 726     fn lines(&self) -> Lines {
 727         core_str::StrExt::lines(self[])
 728     }
 729
 730     /// An iterator over the lines of a string, separated by either
 731     /// `\n` or `\r\n`. As with `.lines()`, this does not include an
 732     /// empty trailing line.
 733     ///
 734     /// # Example
 735     ///
 736     /// ```rust
 737     /// let four_lines = "foo\r\nbar\n\r\nbaz\n";
 738     /// let v: Vec<&str> = four_lines.lines_any().collect();
 739     /// assert_eq!(v, vec!["foo", "bar", "", "baz"]);
 740     /// ```
 741     #[stable]
 742     fn lines_any(&self) -> LinesAny {
 743         core_str::StrExt::lines_any(self[])
 744     }
 745
 746     /// Returns a slice of the given string from the byte range
 747     /// [`begin`..`end`).
 748     ///
 749     /// This operation is `O(1)`.
 750     ///
 751     /// Panics when `begin` and `end` do not point to valid characters
 752     /// or point beyond the last character of the string.
 753     ///
 754     /// See also `slice_to` and `slice_from` for slicing prefixes and
 755     /// suffixes of strings, and `slice_chars` for slicing based on
 756     /// code point counts.
 757     ///
 758     /// # Example
 759     ///
 760     /// ```rust
 761     /// let s = "Löwe 老虎 Léopard";
 762     /// assert_eq!(s.slice(0, 1), "L");
 763     ///
 764     /// assert_eq!(s.slice(1, 9), "öwe 老");
 765     ///
 766     /// // these will panic:
 767     /// // byte 2 lies within `ö`:
 768     /// // s.slice(2, 3);
 769     ///
 770     /// // byte 8 lies within `老`
 771     /// // s.slice(1, 8);
 772     ///
 773     /// // byte 100 is outside the string
 774     /// // s.slice(3, 100);
 775     /// ```
 776     #[unstable = "use slice notation [a..b] instead"]
 777     fn slice(&self, begin: uint, end: uint) -> &str {
 778         core_str::StrExt::slice(self[], begin, end)
 779     }
 780
 781     /// Returns a slice of the string from `begin` to its end.
 782     ///
 783     /// Equivalent to `self.slice(begin, self.len())`.
 784     ///
 785     /// Panics when `begin` does not point to a valid character, or is
 786     /// out of bounds.
 787     ///
 788     /// See also `slice`, `slice_to` and `slice_chars`.
 789     #[unstable = "use slice notation [a..] instead"]
 790     fn slice_from(&self, begin: uint) -> &str {
 791         core_str::StrExt::slice_from(self[], begin)
 792     }
 793
 794     /// Returns a slice of the string from the beginning to byte
 795     /// `end`.
 796     ///
 797     /// Equivalent to `self.slice(0, end)`.
 798     ///
 799     /// Panics when `end` does not point to a valid character, or is
 800     /// out of bounds.
 801     ///
 802     /// See also `slice`, `slice_from` and `slice_chars`.
 803     #[unstable = "use slice notation [0..a] instead"]
 804     fn slice_to(&self, end: uint) -> &str {
 805         core_str::StrExt::slice_to(self[], end)
 806     }
 807
 808     /// Returns a slice of the string from the character range
 809     /// [`begin`..`end`).
 810     ///
 811     /// That is, start at the `begin`-th code point of the string and
 812     /// continue to the `end`-th code point. This does not detect or
 813     /// handle edge cases such as leaving a combining character as the
 814     /// first code point of the string.
 815     ///
 816     /// Due to the design of UTF-8, this operation is `O(end)`.
 817     /// See `slice`, `slice_to` and `slice_from` for `O(1)`
 818     /// variants that use byte indices rather than code point
 819     /// indices.
 820     ///
 821     /// Panics if `begin` > `end` or the either `begin` or `end` are
 822     /// beyond the last character of the string.
 823     ///
 824     /// # Example
 825     ///
 826     /// ```rust
 827     /// let s = "Löwe 老虎 Léopard";
 828     /// assert_eq!(s.slice_chars(0, 4), "Löwe");
 829     /// assert_eq!(s.slice_chars(5, 7), "老虎");
 830     /// ```
 831     #[unstable = "may have yet to prove its worth"]
 832     fn slice_chars(&self, begin: uint, end: uint) -> &str {
 833         core_str::StrExt::slice_chars(self[], begin, end)
 834     }
 835
 836     /// Takes a bytewise (not UTF-8) slice from a string.
 837     ///
 838     /// Returns the substring from [`begin`..`end`).
 839     ///
 840     /// Caller must check both UTF-8 character boundaries and the boundaries of
 841     /// the entire slice as well.
 842     #[stable]
 843     unsafe fn slice_unchecked(&self, begin: uint, end: uint) -> &str {
 844         core_str::StrExt::slice_unchecked(self[], begin, end)
 845     }
 846
 847     /// Returns true if the pattern `pat` is a prefix of the string.
 848     ///
 849     /// # Example
 850     ///
 851     /// ```rust
 852     /// assert!("banana".starts_with("ba"));
 853     /// ```
 854     #[stable]
 855     fn starts_with(&self, pat: &str) -> bool {
 856         core_str::StrExt::starts_with(self[], pat)
 857     }
 858
 859     /// Returns true if the pattern `pat` is a suffix of the string.
 860     ///
 861     /// # Example
 862     ///
 863     /// ```rust
 864     /// assert!("banana".ends_with("nana"));
 865     /// ```
 866     #[stable]
 867     fn ends_with(&self, pat: &str) -> bool {
 868         core_str::StrExt::ends_with(self[], pat)
 869     }
 870
 871     /// Returns a string with all pre- and suffixes that match
 872     /// the pattern `pat` repeatedly removed.
 873     ///
 874     /// # Arguments
 875     ///
 876     /// * pat - a string pattern
 877     ///
 878     /// # Example
 879     ///
 880     /// ```rust
 881     /// assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar");
 882     /// let x: &[_] = &['1', '2'];
 883     /// assert_eq!("12foo1bar12".trim_matches(x), "foo1bar");
 884     /// assert_eq!("123foo1bar123".trim_matches(|&: c: char| c.is_numeric()), "foo1bar");
 885     /// ```
 886     #[stable]
 887     fn trim_matches<P: CharEq>(&self, pat: P) -> &str {
 888         core_str::StrExt::trim_matches(self[], pat)
 889     }
 890
 891     /// Returns a string with all prefixes that match
 892     /// the pattern `pat` repeatedly removed.
 893     ///
 894     /// # Arguments
 895     ///
 896     /// * pat - a string pattern
 897     ///
 898     /// # Example
 899     ///
 900     /// ```rust
 901     /// assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11");
 902     /// let x: &[_] = &['1', '2'];
 903     /// assert_eq!("12foo1bar12".trim_left_matches(x), "foo1bar12");
 904     /// assert_eq!("123foo1bar123".trim_left_matches(|&: c: char| c.is_numeric()), "foo1bar123");
 905     /// ```
 906     #[stable]
 907     fn trim_left_matches<P: CharEq>(&self, pat: P) -> &str {
 908         core_str::StrExt::trim_left_matches(self[], pat)
 909     }
 910
 911     /// Returns a string with all suffixes that match
 912     /// the pattern `pat` repeatedly removed.
 913     ///
 914     /// # Arguments
 915     ///
 916     /// * pat - a string pattern
 917     ///
 918     /// # Example
 919     ///
 920     /// ```rust
 921     /// assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar");
 922     /// let x: &[_] = &['1', '2'];
 923     /// assert_eq!("12foo1bar12".trim_right_matches(x), "12foo1bar");
 924     /// assert_eq!("123foo1bar123".trim_right_matches(|&: c: char| c.is_numeric()), "123foo1bar");
 925     /// ```
 926     #[stable]
 927     fn trim_right_matches<P: CharEq>(&self, pat: P) -> &str {
 928         core_str::StrExt::trim_right_matches(self[], pat)
 929     }
 930
 931     /// Check that `index`-th byte lies at the start and/or end of a
 932     /// UTF-8 code point sequence.
 933     ///
 934     /// The start and end of the string (when `index == self.len()`)
 935     /// are considered to be boundaries.
 936     ///
 937     /// Panics if `index` is greater than `self.len()`.
 938     ///
 939     /// # Example
 940     ///
 941     /// ```rust
 942     /// let s = "Löwe 老虎 Léopard";
 943     /// assert!(s.is_char_boundary(0));
 944     /// // start of `老`
 945     /// assert!(s.is_char_boundary(6));
 946     /// assert!(s.is_char_boundary(s.len()));
 947     ///
 948     /// // second byte of `ö`
 949     /// assert!(!s.is_char_boundary(2));
 950     ///
 951     /// // third byte of `老`
 952     /// assert!(!s.is_char_boundary(8));
 953     /// ```
 954     #[unstable = "naming is uncertain with container conventions"]
 955     fn is_char_boundary(&self, index: uint) -> bool {
 956         core_str::StrExt::is_char_boundary(self[], index)
 957     }
 958
 959     /// Pluck a character out of a string and return the index of the next
 960     /// character.
 961     ///
 962     /// This function can be used to iterate over the Unicode characters of a
 963     /// string.
 964     ///
 965     /// # Example
 966     ///
 967     /// This example manually iterates through the characters of a
 968     /// string; this should normally be done by `.chars()` or
 969     /// `.char_indices`.
 970     ///
 971     /// ```rust
 972     /// use std::str::CharRange;
 973     ///
 974     /// let s = "中华Việt Nam";
 975     /// let mut i = 0u;
 976     /// while i < s.len() {
 977     ///     let CharRange {ch, next} = s.char_range_at(i);
 978     ///     println!("{}: {}", i, ch);
 979     ///     i = next;
 980     /// }
 981     /// ```
 982     ///
 983     /// This outputs:
 984     ///
 985     /// ```text
 986     /// 0: 中
 987     /// 3: 华
 988     /// 6: V
 989     /// 7: i
 990     /// 8: ệ
 991     /// 11: t
 992     /// 12:
 993     /// 13: N
 994     /// 14: a
 995     /// 15: m
 996     /// ```
 997     ///
 998     /// # Arguments
 999     ///
1000     /// * s - The string
1001     /// * i - The byte offset of the char to extract
1002     ///
1003     /// # Return value
1004     ///
1005     /// A record {ch: char, next: uint} containing the char value and the byte
1006     /// index of the next Unicode character.
1007     ///
1008     /// # Panics
1009     ///
1010     /// If `i` is greater than or equal to the length of the string.
1011     /// If `i` is not the index of the beginning of a valid UTF-8 character.
1012     #[unstable = "naming is uncertain with container conventions"]
1013     fn char_range_at(&self, start: uint) -> CharRange {
1014         core_str::StrExt::char_range_at(self[], start)
1015     }
1016
1017     /// Given a byte position and a str, return the previous char and its position.
1018     ///
1019     /// This function can be used to iterate over a Unicode string in reverse.
1020     ///
1021     /// Returns 0 for next index if called on start index 0.
1022     ///
1023     /// # Panics
1024     ///
1025     /// If `i` is greater than the length of the string.
1026     /// If `i` is not an index following a valid UTF-8 character.
1027     #[unstable = "naming is uncertain with container conventions"]
1028     fn char_range_at_reverse(&self, start: uint) -> CharRange {
1029         core_str::StrExt::char_range_at_reverse(self[], start)
1030     }
1031
1032     /// Plucks the character starting at the `i`th byte of a string.
1033     ///
1034     /// # Example
1035     ///
1036     /// ```rust
1037     /// let s = "abπc";
1038     /// assert_eq!(s.char_at(1), 'b');
1039     /// assert_eq!(s.char_at(2), 'π');
1040     /// assert_eq!(s.char_at(4), 'c');
1041     /// ```
1042     ///
1043     /// # Panics
1044     ///
1045     /// If `i` is greater than or equal to the length of the string.
1046     /// If `i` is not the index of the beginning of a valid UTF-8 character.
1047     #[unstable = "naming is uncertain with container conventions"]
1048     fn char_at(&self, i: uint) -> char {
1049         core_str::StrExt::char_at(self[], i)
1050     }
1051
1052     /// Plucks the character ending at the `i`th byte of a string.
1053     ///
1054     /// # Panics
1055     ///
1056     /// If `i` is greater than the length of the string.
1057     /// If `i` is not an index following a valid UTF-8 character.
1058     #[unstable = "naming is uncertain with container conventions"]
1059     fn char_at_reverse(&self, i: uint) -> char {
1060         core_str::StrExt::char_at_reverse(self[], i)
1061     }
1062
1063     /// Work with the byte buffer of a string as a byte slice.
1064     ///
1065     /// # Example
1066     ///
1067     /// ```rust
1068     /// assert_eq!("bors".as_bytes(), b"bors");
1069     /// ```
1070     #[stable]
1071     fn as_bytes(&self) -> &[u8] {
1072         core_str::StrExt::as_bytes(self[])
1073     }
1074
1075     /// Returns the byte index of the first character of `self` that
1076     /// matches the pattern `pat`.
1077     ///
1078     /// # Return value
1079     ///
1080     /// `Some` containing the byte index of the last matching character
1081     /// or `None` if there is no match
1082     ///
1083     /// # Example
1084     ///
1085     /// ```rust
1086     /// let s = "Löwe 老虎 Léopard";
1087     ///
1088     /// assert_eq!(s.find('L'), Some(0));
1089     /// assert_eq!(s.find('é'), Some(14));
1090     ///
1091     /// // the first space
1092     /// assert_eq!(s.find(|&: c: char| c.is_whitespace()), Some(5));
1093     ///
1094     /// // neither are found
1095     /// let x: &[_] = &['1', '2'];
1096     /// assert_eq!(s.find(x), None);
1097     /// ```
1098     #[stable]
1099     fn find<P: CharEq>(&self, pat: P) -> Option<uint> {
1100         core_str::StrExt::find(self[], pat)
1101     }
1102
1103     /// Returns the byte index of the last character of `self` that
1104     /// matches the pattern `pat`.
1105     ///
1106     /// # Return value
1107     ///
1108     /// `Some` containing the byte index of the last matching character
1109     /// or `None` if there is no match.
1110     ///
1111     /// # Example
1112     ///
1113     /// ```rust
1114     /// let s = "Löwe 老虎 Léopard";
1115     ///
1116     /// assert_eq!(s.rfind('L'), Some(13));
1117     /// assert_eq!(s.rfind('é'), Some(14));
1118     ///
1119     /// // the second space
1120     /// assert_eq!(s.rfind(|&: c: char| c.is_whitespace()), Some(12));
1121     ///
1122     /// // searches for an occurrence of either `1` or `2`, but neither are found
1123     /// let x: &[_] = &['1', '2'];
1124     /// assert_eq!(s.rfind(x), None);
1125     /// ```
1126     #[stable]
1127     fn rfind<P: CharEq>(&self, pat: P) -> Option<uint> {
1128         core_str::StrExt::rfind(self[], pat)
1129     }
1130
1131     /// Returns the byte index of the first matching substring
1132     ///
1133     /// # Arguments
1134     ///
1135     /// * `needle` - The string to search for
1136     ///
1137     /// # Return value
1138     ///
1139     /// `Some` containing the byte index of the first matching substring
1140     /// or `None` if there is no match.
1141     ///
1142     /// # Example
1143     ///
1144     /// ```rust
1145     /// let s = "Löwe 老虎 Léopard";
1146     ///
1147     /// assert_eq!(s.find_str("老虎 L"), Some(6));
1148     /// assert_eq!(s.find_str("muffin man"), None);
1149     /// ```
1150     #[unstable = "might get removed in favor of a more generic find in the future"]
1151     fn find_str(&self, needle: &str) -> Option<uint> {
1152         core_str::StrExt::find_str(self[], needle)
1153     }
1154
1155     /// Retrieves the first character from a string slice and returns
1156     /// it. This does not allocate a new string; instead, it returns a
1157     /// slice that point one character beyond the character that was
1158     /// shifted. If the string does not contain any characters,
1159     /// None is returned instead.
1160     ///
1161     /// # Example
1162     ///
1163     /// ```rust
1164     /// let s = "Löwe 老虎 Léopard";
1165     /// let (c, s1) = s.slice_shift_char().unwrap();
1166     /// assert_eq!(c, 'L');
1167     /// assert_eq!(s1, "öwe 老虎 Léopard");
1168     ///
1169     /// let (c, s2) = s1.slice_shift_char().unwrap();
1170     /// assert_eq!(c, 'ö');
1171     /// assert_eq!(s2, "we 老虎 Léopard");
1172     /// ```
1173     #[unstable = "awaiting conventions about shifting and slices"]
1174     fn slice_shift_char(&self) -> Option<(char, &str)> {
1175         core_str::StrExt::slice_shift_char(self[])
1176     }
1177
1178     /// Returns the byte offset of an inner slice relative to an enclosing outer slice.
1179     ///
1180     /// Panics if `inner` is not a direct slice contained within self.
1181     ///
1182     /// # Example
1183     ///
1184     /// ```rust
1185     /// let string = "a\nb\nc";
1186     /// let lines: Vec<&str> = string.lines().collect();
1187     ///
1188     /// assert!(string.subslice_offset(lines[0]) == 0); // &"a"
1189     /// assert!(string.subslice_offset(lines[1]) == 2); // &"b"
1190     /// assert!(string.subslice_offset(lines[2]) == 4); // &"c"
1191     /// ```
1192     #[unstable = "awaiting convention about comparability of arbitrary slices"]
1193     fn subslice_offset(&self, inner: &str) -> uint {
1194         core_str::StrExt::subslice_offset(self[], inner)
1195     }
1196
1197     /// Return an unsafe pointer to the strings buffer.
1198     ///
1199     /// The caller must ensure that the string outlives this pointer,
1200     /// and that it is not reallocated (e.g. by pushing to the
1201     /// string).
1202     #[stable]
1203     #[inline]
1204     fn as_ptr(&self) -> *const u8 {
1205         core_str::StrExt::as_ptr(self[])
1206     }
1207
1208     /// Return an iterator of `u16` over the string encoded as UTF-16.
1209     #[unstable = "this functionality may only be provided by libunicode"]
1210     fn utf16_units(&self) -> Utf16Units {
1211         Utf16Units { encoder: Utf16Encoder::new(self[].chars()) }
1212     }
1213
1214     /// Return the number of bytes in this string
1215     ///
1216     /// # Example
1217     ///
1218     /// ```
1219     /// assert_eq!("foo".len(), 3);
1220     /// assert_eq!("ƒoo".len(), 4);
1221     /// ```
1222     #[stable]
1223     #[inline]
1224     fn len(&self) -> uint {
1225         core_str::StrExt::len(self[])
1226     }
1227
1228     /// Returns true if this slice contains no bytes
1229     ///
1230     /// # Example
1231     ///
1232     /// ```
1233     /// assert!("".is_empty());
1234     /// ```
1235     #[inline]
1236     #[stable]
1237     fn is_empty(&self) -> bool {
1238         core_str::StrExt::is_empty(self[])
1239     }
1240
1241     /// Parse this string into the specified type.
1242     ///
1243     /// # Example
1244     ///
1245     /// ```
1246     /// assert_eq!("4".parse::<u32>(), Some(4));
1247     /// assert_eq!("j".parse::<u32>(), None);
1248     /// ```
1249     #[inline]
1250     #[unstable = "this method was just created"]
1251     fn parse<F: FromStr>(&self) -> Option<F> {
1252         core_str::StrExt::parse(self[])
1253     }
1254
1255     /// Returns an iterator over the
1256     /// [grapheme clusters](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
1257     /// of the string.
1258     ///
1259     /// If `is_extended` is true, the iterator is over the *extended grapheme clusters*;
1260     /// otherwise, the iterator is over the *legacy grapheme clusters*.
1261     /// [UAX#29](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
1262     /// recommends extended grapheme cluster boundaries for general processing.
1263     ///
1264     /// # Example
1265     ///
1266     /// ```rust
1267     /// let gr1 = "a\u{310}e\u{301}o\u{308}\u{332}".graphemes(true).collect::<Vec<&str>>();
1268     /// let b: &[_] = &["a\u{310}", "e\u{301}", "o\u{308}\u{332}"];
1269     /// assert_eq!(gr1.as_slice(), b);
1270     /// let gr2 = "a\r\nb🇷🇺🇸🇹".graphemes(true).collect::<Vec<&str>>();
1271     /// let b: &[_] = &["a", "\r\n", "b", "🇷🇺🇸🇹"];
1272     /// assert_eq!(gr2.as_slice(), b);
1273     /// ```
1274     #[unstable = "this functionality may only be provided by libunicode"]
1275     fn graphemes(&self, is_extended: bool) -> Graphemes {
1276         UnicodeStr::graphemes(self[], is_extended)
1277     }
1278
1279     /// Returns an iterator over the grapheme clusters of self and their byte offsets.
1280     /// See `graphemes()` method for more information.
1281     ///
1282     /// # Example
1283     ///
1284     /// ```rust
1285     /// let gr_inds = "a̐éö̲\r\n".grapheme_indices(true).collect::<Vec<(uint, &str)>>();
1286     /// let b: &[_] = &[(0u, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
1287     /// assert_eq!(gr_inds.as_slice(), b);
1288     /// ```
1289     #[unstable = "this functionality may only be provided by libunicode"]
1290     fn grapheme_indices(&self, is_extended: bool) -> GraphemeIndices {
1291         UnicodeStr::grapheme_indices(self[], is_extended)
1292     }
1293
1294     /// An iterator over the words of a string (subsequences separated
1295     /// by any sequence of whitespace). Sequences of whitespace are
1296     /// collapsed, so empty "words" are not included.
1297     ///
1298     /// # Example
1299     ///
1300     /// ```rust
1301     /// let some_words = " Mary   had\ta little  \n\t lamb";
1302     /// let v: Vec<&str> = some_words.words().collect();
1303     /// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]);
1304     /// ```
1305     #[stable]
1306     fn words(&self) -> Words {
1307         UnicodeStr::words(self[])
1308     }
1309
1310     /// Returns a string's displayed width in columns, treating control
1311     /// characters as zero-width.
1312     ///
1313     /// `is_cjk` determines behavior for characters in the Ambiguous category:
1314     /// if `is_cjk` is `true`, these are 2 columns wide; otherwise, they are 1.
1315     /// In CJK locales, `is_cjk` should be `true`, else it should be `false`.
1316     /// [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
1317     /// recommends that these characters be treated as 1 column (i.e.,
1318     /// `is_cjk` = `false`) if the locale is unknown.
1319     #[unstable = "this functionality may only be provided by libunicode"]
1320     fn width(&self, is_cjk: bool) -> uint {
1321         UnicodeStr::width(self[], is_cjk)
1322     }
1323
1324     /// Returns a string with leading and trailing whitespace removed.
1325     #[stable]
1326     fn trim(&self) -> &str {
1327         UnicodeStr::trim(self[])
1328     }
1329
1330     /// Returns a string with leading whitespace removed.
1331     #[stable]
1332     fn trim_left(&self) -> &str {
1333         UnicodeStr::trim_left(self[])
1334     }
1335
1336     /// Returns a string with trailing whitespace removed.
1337     #[stable]
1338     fn trim_right(&self) -> &str {
1339         UnicodeStr::trim_right(self[])
1340     }
1341 }
1342
1343 impl StrExt for str {}
1344
1345 #[cfg(test)]
1346 mod tests {
1347     use prelude::*;
1348
1349     use core::iter::AdditiveIterator;
1350     use super::from_utf8;
1351     use super::Utf8Error;
1352
1353     #[test]
1354     fn test_le() {
1355         assert!("" <= "");
1356         assert!("" <= "foo");
1357         assert!("foo" <= "foo");
1358         assert!("foo" != "bar");
1359     }
1360
1361     #[test]
1362     fn test_len() {
1363         assert_eq!("".len(), 0u);
1364         assert_eq!("hello world".len(), 11u);
1365         assert_eq!("\x63".len(), 1u);
1366         assert_eq!("\u{a2}".len(), 2u);
1367         assert_eq!("\u{3c0}".len(), 2u);
1368         assert_eq!("\u{2620}".len(), 3u);
1369         assert_eq!("\u{1d11e}".len(), 4u);
1370
1371         assert_eq!("".chars().count(), 0u);
1372         assert_eq!("hello world".chars().count(), 11u);
1373         assert_eq!("\x63".chars().count(), 1u);
1374         assert_eq!("\u{a2}".chars().count(), 1u);
1375         assert_eq!("\u{3c0}".chars().count(), 1u);
1376         assert_eq!("\u{2620}".chars().count(), 1u);
1377         assert_eq!("\u{1d11e}".chars().count(), 1u);
1378         assert_eq!("ประเทศไทย中华Việt Nam".chars().count(), 19u);
1379
1380         assert_eq!("ｈｅｌｌｏ".width(false), 10u);
1381         assert_eq!("ｈｅｌｌｏ".width(true), 10u);
1382         assert_eq!("\0\0\0\0\0".width(false), 0u);
1383         assert_eq!("\0\0\0\0\0".width(true), 0u);
1384         assert_eq!("".width(false), 0u);
1385         assert_eq!("".width(true), 0u);
1386         assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width(false), 4u);
1387         assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width(true), 8u);
1388     }
1389
1390     #[test]
1391     fn test_find() {
1392         assert_eq!("hello".find('l'), Some(2u));
1393         assert_eq!("hello".find(|&: c:char| c == 'o'), Some(4u));
1394         assert!("hello".find('x').is_none());
1395         assert!("hello".find(|&: c:char| c == 'x').is_none());
1396         assert_eq!("ประเทศไทย中华Việt Nam".find('华'), Some(30u));
1397         assert_eq!("ประเทศไทย中华Việt Nam".find(|&: c: char| c == '华'), Some(30u));
1398     }
1399
1400     #[test]
1401     fn test_rfind() {
1402         assert_eq!("hello".rfind('l'), Some(3u));
1403         assert_eq!("hello".rfind(|&: c:char| c == 'o'), Some(4u));
1404         assert!("hello".rfind('x').is_none());
1405         assert!("hello".rfind(|&: c:char| c == 'x').is_none());
1406         assert_eq!("ประเทศไทย中华Việt Nam".rfind('华'), Some(30u));
1407         assert_eq!("ประเทศไทย中华Việt Nam".rfind(|&: c: char| c == '华'), Some(30u));
1408     }
1409
1410     #[test]
1411     fn test_collect() {
1412         let empty = String::from_str("");
1413         let s: String = empty.chars().collect();
1414         assert_eq!(empty, s);
1415         let data = String::from_str("ประเทศไทย中");
1416         let s: String = data.chars().collect();
1417         assert_eq!(data, s);
1418     }
1419
1420     #[test]
1421     fn test_into_bytes() {
1422         let data = String::from_str("asdf");
1423         let buf = data.into_bytes();
1424         assert_eq!(b"asdf", buf);
1425     }
1426
1427     #[test]
1428     fn test_find_str() {
1429         // byte positions
1430         assert_eq!("".find_str(""), Some(0u));
1431         assert!("banana".find_str("apple pie").is_none());
1432
1433         let data = "abcabc";
1434         assert_eq!(data.slice(0u, 6u).find_str("ab"), Some(0u));
1435         assert_eq!(data.slice(2u, 6u).find_str("ab"), Some(3u - 2u));
1436         assert!(data.slice(2u, 4u).find_str("ab").is_none());
1437
1438         let string = "ประเทศไทย中华Việt Nam";
1439         let mut data = String::from_str(string);
1440         data.push_str(string);
1441         assert!(data.find_str("ไท华").is_none());
1442         assert_eq!(data.slice(0u, 43u).find_str(""), Some(0u));
1443         assert_eq!(data.slice(6u, 43u).find_str(""), Some(6u - 6u));
1444
1445         assert_eq!(data.slice(0u, 43u).find_str("ประ"), Some( 0u));
1446         assert_eq!(data.slice(0u, 43u).find_str("ทศไ"), Some(12u));
1447         assert_eq!(data.slice(0u, 43u).find_str("ย中"), Some(24u));
1448         assert_eq!(data.slice(0u, 43u).find_str("iệt"), Some(34u));
1449         assert_eq!(data.slice(0u, 43u).find_str("Nam"), Some(40u));
1450
1451         assert_eq!(data.slice(43u, 86u).find_str("ประ"), Some(43u - 43u));
1452         assert_eq!(data.slice(43u, 86u).find_str("ทศไ"), Some(55u - 43u));
1453         assert_eq!(data.slice(43u, 86u).find_str("ย中"), Some(67u - 43u));
1454         assert_eq!(data.slice(43u, 86u).find_str("iệt"), Some(77u - 43u));
1455         assert_eq!(data.slice(43u, 86u).find_str("Nam"), Some(83u - 43u));
1456     }
1457
1458     #[test]
1459     fn test_slice_chars() {
1460         fn t(a: &str, b: &str, start: uint) {
1461             assert_eq!(a.slice_chars(start, start + b.chars().count()), b);
1462         }
1463         t("", "", 0);
1464         t("hello", "llo", 2);
1465         t("hello", "el", 1);
1466         t("αβλ", "β", 1);
1467         t("αβλ", "", 3);
1468         assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".slice_chars(2, 8));
1469     }
1470
1471     fn s(x: &str) -> String { x.to_string() }
1472
1473     macro_rules! test_concat {
1474         ($expected: expr, $string: expr) => {
1475             {
1476                 let s: String = $string.concat();
1477                 assert_eq!($expected, s);
1478             }
1479         }
1480     }
1481
1482     #[test]
1483     fn test_concat_for_different_types() {
1484         test_concat!("ab", vec![s("a"), s("b")]);
1485         test_concat!("ab", vec!["a", "b"]);
1486         test_concat!("ab", vec!["a", "b"].as_slice());
1487         test_concat!("ab", vec![s("a"), s("b")]);
1488     }
1489
1490     #[test]
1491     fn test_concat_for_different_lengths() {
1492         let empty: &[&str] = &[];
1493         test_concat!("", empty);
1494         test_concat!("a", ["a"]);
1495         test_concat!("ab", ["a", "b"]);
1496         test_concat!("abc", ["", "a", "bc"]);
1497     }
1498
1499     macro_rules! test_connect {
1500         ($expected: expr, $string: expr, $delim: expr) => {
1501             {
1502                 let s = $string.connect($delim);
1503                 assert_eq!($expected, s);
1504             }
1505         }
1506     }
1507
1508     #[test]
1509     fn test_connect_for_different_types() {
1510         test_connect!("a-b", ["a", "b"], "-");
1511         let hyphen = "-".to_string();
1512         test_connect!("a-b", [s("a"), s("b")], hyphen.as_slice());
1513         test_connect!("a-b", vec!["a", "b"], hyphen.as_slice());
1514         test_connect!("a-b", vec!["a", "b"].as_slice(), "-");
1515         test_connect!("a-b", vec![s("a"), s("b")], "-");
1516     }
1517
1518     #[test]
1519     fn test_connect_for_different_lengths() {
1520         let empty: &[&str] = &[];
1521         test_connect!("", empty, "-");
1522         test_connect!("a", ["a"], "-");
1523         test_connect!("a-b", ["a", "b"], "-");
1524         test_connect!("-a-bc", ["", "a", "bc"], "-");
1525     }
1526
1527     #[test]
1528     fn test_unsafe_slice() {
1529         assert_eq!("ab", unsafe {"abc".slice_unchecked(0, 2)});
1530         assert_eq!("bc", unsafe {"abc".slice_unchecked(1, 3)});
1531         assert_eq!("", unsafe {"abc".slice_unchecked(1, 1)});
1532         fn a_million_letter_a() -> String {
1533             let mut i = 0u;
1534             let mut rs = String::new();
1535             while i < 100000 {
1536                 rs.push_str("aaaaaaaaaa");
1537                 i += 1;
1538             }
1539             rs
1540         }
1541         fn half_a_million_letter_a() -> String {
1542             let mut i = 0u;
1543             let mut rs = String::new();
1544             while i < 100000 {
1545                 rs.push_str("aaaaa");
1546                 i += 1;
1547             }
1548             rs
1549         }
1550         let letters = a_million_letter_a();
1551         assert!(half_a_million_letter_a() ==
1552             unsafe {String::from_str(letters.slice_unchecked(
1553                                      0u,
1554                                      500000))});
1555     }
1556
1557     #[test]
1558     fn test_starts_with() {
1559         assert!(("".starts_with("")));
1560         assert!(("abc".starts_with("")));
1561         assert!(("abc".starts_with("a")));
1562         assert!((!"a".starts_with("abc")));
1563         assert!((!"".starts_with("abc")));
1564         assert!((!"ödd".starts_with("-")));
1565         assert!(("ödd".starts_with("öd")));
1566     }
1567
1568     #[test]
1569     fn test_ends_with() {
1570         assert!(("".ends_with("")));
1571         assert!(("abc".ends_with("")));
1572         assert!(("abc".ends_with("c")));
1573         assert!((!"a".ends_with("abc")));
1574         assert!((!"".ends_with("abc")));
1575         assert!((!"ddö".ends_with("-")));
1576         assert!(("ddö".ends_with("dö")));
1577     }
1578
1579     #[test]
1580     fn test_is_empty() {
1581         assert!("".is_empty());
1582         assert!(!"a".is_empty());
1583     }
1584
1585     #[test]
1586     fn test_replace() {
1587         let a = "a";
1588         assert_eq!("".replace(a, "b"), String::from_str(""));
1589         assert_eq!("a".replace(a, "b"), String::from_str("b"));
1590         assert_eq!("ab".replace(a, "b"), String::from_str("bb"));
1591         let test = "test";
1592         assert!(" test test ".replace(test, "toast") ==
1593             String::from_str(" toast toast "));
1594         assert_eq!(" test test ".replace(test, ""), String::from_str("   "));
1595     }
1596
1597     #[test]
1598     fn test_replace_2a() {
1599         let data = "ประเทศไทย中华";
1600         let repl = "دولة الكويت";
1601
1602         let a = "ประเ";
1603         let a2 = "دولة الكويتทศไทย中华";
1604         assert_eq!(data.replace(a, repl), a2);
1605     }
1606
1607     #[test]
1608     fn test_replace_2b() {
1609         let data = "ประเทศไทย中华";
1610         let repl = "دولة الكويت";
1611
1612         let b = "ะเ";
1613         let b2 = "ปรدولة الكويتทศไทย中华";
1614         assert_eq!(data.replace(b, repl), b2);
1615     }
1616
1617     #[test]
1618     fn test_replace_2c() {
1619         let data = "ประเทศไทย中华";
1620         let repl = "دولة الكويت";
1621
1622         let c = "中华";
1623         let c2 = "ประเทศไทยدولة الكويت";
1624         assert_eq!(data.replace(c, repl), c2);
1625     }
1626
1627     #[test]
1628     fn test_replace_2d() {
1629         let data = "ประเทศไทย中华";
1630         let repl = "دولة الكويت";
1631
1632         let d = "ไท华";
1633         assert_eq!(data.replace(d, repl), data);
1634     }
1635
1636     #[test]
1637     fn test_slice() {
1638         assert_eq!("ab", "abc".slice(0, 2));
1639         assert_eq!("bc", "abc".slice(1, 3));
1640         assert_eq!("", "abc".slice(1, 1));
1641         assert_eq!("\u{65e5}", "\u{65e5}\u{672c}".slice(0, 3));
1642
1643         let data = "ประเทศไทย中华";
1644         assert_eq!("ป", data.slice(0, 3));
1645         assert_eq!("ร", data.slice(3, 6));
1646         assert_eq!("", data.slice(3, 3));
1647         assert_eq!("华", data.slice(30, 33));
1648
1649         fn a_million_letter_x() -> String {
1650             let mut i = 0u;
1651             let mut rs = String::new();
1652             while i < 100000 {
1653                 rs.push_str("华华华华华华华华华华");
1654                 i += 1;
1655             }
1656             rs
1657         }
1658         fn half_a_million_letter_x() -> String {
1659             let mut i = 0u;
1660             let mut rs = String::new();
1661             while i < 100000 {
1662                 rs.push_str("华华华华华");
1663                 i += 1;
1664             }
1665             rs
1666         }
1667         let letters = a_million_letter_x();
1668         assert!(half_a_million_letter_x() ==
1669             String::from_str(letters.slice(0u, 3u * 500000u)));
1670     }
1671
1672     #[test]
1673     fn test_slice_2() {
1674         let ss = "中华Việt Nam";
1675
1676         assert_eq!("华", ss.slice(3u, 6u));
1677         assert_eq!("Việt Nam", ss.slice(6u, 16u));
1678
1679         assert_eq!("ab", "abc".slice(0u, 2u));
1680         assert_eq!("bc", "abc".slice(1u, 3u));
1681         assert_eq!("", "abc".slice(1u, 1u));
1682
1683         assert_eq!("中", ss.slice(0u, 3u));
1684         assert_eq!("华V", ss.slice(3u, 7u));
1685         assert_eq!("", ss.slice(3u, 3u));
1686         /*0: 中
1687           3: 华
1688           6: V
1689           7: i
1690           8: ệ
1691          11: t
1692          12:
1693          13: N
1694          14: a
1695          15: m */
1696     }
1697
1698     #[test]
1699     #[should_fail]
1700     fn test_slice_fail() {
1701         "中华Việt Nam".slice(0u, 2u);
1702     }
1703
1704     #[test]
1705     fn test_slice_from() {
1706         assert_eq!("abcd".slice_from(0), "abcd");
1707         assert_eq!("abcd".slice_from(2), "cd");
1708         assert_eq!("abcd".slice_from(4), "");
1709     }
1710     #[test]
1711     fn test_slice_to() {
1712         assert_eq!("abcd".slice_to(0), "");
1713         assert_eq!("abcd".slice_to(2), "ab");
1714         assert_eq!("abcd".slice_to(4), "abcd");
1715     }
1716
1717     #[test]
1718     fn test_trim_left_matches() {
1719         let v: &[char] = &[];
1720         assert_eq!(" *** foo *** ".trim_left_matches(v), " *** foo *** ");
1721         let chars: &[char] = &['*', ' '];
1722         assert_eq!(" *** foo *** ".trim_left_matches(chars), "foo *** ");
1723         assert_eq!(" ***  *** ".trim_left_matches(chars), "");
1724         assert_eq!("foo *** ".trim_left_matches(chars), "foo *** ");
1725
1726         assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11");
1727         let chars: &[char] = &['1', '2'];
1728         assert_eq!("12foo1bar12".trim_left_matches(chars), "foo1bar12");
1729         assert_eq!("123foo1bar123".trim_left_matches(|&: c: char| c.is_numeric()), "foo1bar123");
1730     }
1731
1732     #[test]
1733     fn test_trim_right_matches() {
1734         let v: &[char] = &[];
1735         assert_eq!(" *** foo *** ".trim_right_matches(v), " *** foo *** ");
1736         let chars: &[char] = &['*', ' '];
1737         assert_eq!(" *** foo *** ".trim_right_matches(chars), " *** foo");
1738         assert_eq!(" ***  *** ".trim_right_matches(chars), "");
1739         assert_eq!(" *** foo".trim_right_matches(chars), " *** foo");
1740
1741         assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar");
1742         let chars: &[char] = &['1', '2'];
1743         assert_eq!("12foo1bar12".trim_right_matches(chars), "12foo1bar");
1744         assert_eq!("123foo1bar123".trim_right_matches(|&: c: char| c.is_numeric()), "123foo1bar");
1745     }
1746
1747     #[test]
1748     fn test_trim_matches() {
1749         let v: &[char] = &[];
1750         assert_eq!(" *** foo *** ".trim_matches(v), " *** foo *** ");
1751         let chars: &[char] = &['*', ' '];
1752         assert_eq!(" *** foo *** ".trim_matches(chars), "foo");
1753         assert_eq!(" ***  *** ".trim_matches(chars), "");
1754         assert_eq!("foo".trim_matches(chars), "foo");
1755
1756         assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar");
1757         let chars: &[char] = &['1', '2'];
1758         assert_eq!("12foo1bar12".trim_matches(chars), "foo1bar");
1759         assert_eq!("123foo1bar123".trim_matches(|&: c: char| c.is_numeric()), "foo1bar");
1760     }
1761
1762     #[test]
1763     fn test_trim_left() {
1764         assert_eq!("".trim_left(), "");
1765         assert_eq!("a".trim_left(), "a");
1766         assert_eq!("    ".trim_left(), "");
1767         assert_eq!("     blah".trim_left(), "blah");
1768         assert_eq!("   \u{3000}  wut".trim_left(), "wut");
1769         assert_eq!("hey ".trim_left(), "hey ");
1770     }
1771
1772     #[test]
1773     fn test_trim_right() {
1774         assert_eq!("".trim_right(), "");
1775         assert_eq!("a".trim_right(), "a");
1776         assert_eq!("    ".trim_right(), "");
1777         assert_eq!("blah     ".trim_right(), "blah");
1778         assert_eq!("wut   \u{3000}  ".trim_right(), "wut");
1779         assert_eq!(" hey".trim_right(), " hey");
1780     }
1781
1782     #[test]
1783     fn test_trim() {
1784         assert_eq!("".trim(), "");
1785         assert_eq!("a".trim(), "a");
1786         assert_eq!("    ".trim(), "");
1787         assert_eq!("    blah     ".trim(), "blah");
1788         assert_eq!("\nwut   \u{3000}  ".trim(), "wut");
1789         assert_eq!(" hey dude ".trim(), "hey dude");
1790     }
1791
1792     #[test]
1793     fn test_is_whitespace() {
1794         assert!("".chars().all(|c| c.is_whitespace()));
1795         assert!(" ".chars().all(|c| c.is_whitespace()));
1796         assert!("\u{2009}".chars().all(|c| c.is_whitespace())); // Thin space
1797         assert!("  \n\t   ".chars().all(|c| c.is_whitespace()));
1798         assert!(!"   _   ".chars().all(|c| c.is_whitespace()));
1799     }
1800
1801     #[test]
1802     fn test_slice_shift_char() {
1803         let data = "ประเทศไทย中";
1804         assert_eq!(data.slice_shift_char(), Some(('ป', "ระเทศไทย中")));
1805     }
1806
1807     #[test]
1808     fn test_slice_shift_char_2() {
1809         let empty = "";
1810         assert_eq!(empty.slice_shift_char(), None);
1811     }
1812
1813     #[test]
1814     fn test_is_utf8() {
1815         // deny overlong encodings
1816         assert!(from_utf8(&[0xc0, 0x80]).is_err());
1817         assert!(from_utf8(&[0xc0, 0xae]).is_err());
1818         assert!(from_utf8(&[0xe0, 0x80, 0x80]).is_err());
1819         assert!(from_utf8(&[0xe0, 0x80, 0xaf]).is_err());
1820         assert!(from_utf8(&[0xe0, 0x81, 0x81]).is_err());
1821         assert!(from_utf8(&[0xf0, 0x82, 0x82, 0xac]).is_err());
1822         assert!(from_utf8(&[0xf4, 0x90, 0x80, 0x80]).is_err());
1823
1824         // deny surrogates
1825         assert!(from_utf8(&[0xED, 0xA0, 0x80]).is_err());
1826         assert!(from_utf8(&[0xED, 0xBF, 0xBF]).is_err());
1827
1828         assert!(from_utf8(&[0xC2, 0x80]).is_ok());
1829         assert!(from_utf8(&[0xDF, 0xBF]).is_ok());
1830         assert!(from_utf8(&[0xE0, 0xA0, 0x80]).is_ok());
1831         assert!(from_utf8(&[0xED, 0x9F, 0xBF]).is_ok());
1832         assert!(from_utf8(&[0xEE, 0x80, 0x80]).is_ok());
1833         assert!(from_utf8(&[0xEF, 0xBF, 0xBF]).is_ok());
1834         assert!(from_utf8(&[0xF0, 0x90, 0x80, 0x80]).is_ok());
1835         assert!(from_utf8(&[0xF4, 0x8F, 0xBF, 0xBF]).is_ok());
1836     }
1837
1838     #[test]
1839     fn test_is_utf16() {
1840         use unicode::str::is_utf16;
1841         macro_rules! pos ( ($($e:expr),*) => { { $(assert!(is_utf16($e));)* } });
1842
1843         // non-surrogates
1844         pos!(&[0x0000],
1845              &[0x0001, 0x0002],
1846              &[0xD7FF],
1847              &[0xE000]);
1848
1849         // surrogate pairs (randomly generated with Python 3's
1850         // .encode('utf-16be'))
1851         pos!(&[0xdb54, 0xdf16, 0xd880, 0xdee0, 0xdb6a, 0xdd45],
1852              &[0xd91f, 0xdeb1, 0xdb31, 0xdd84, 0xd8e2, 0xde14],
1853              &[0xdb9f, 0xdc26, 0xdb6f, 0xde58, 0xd850, 0xdfae]);
1854
1855         // mixtures (also random)
1856         pos!(&[0xd921, 0xdcc2, 0x002d, 0x004d, 0xdb32, 0xdf65],
1857              &[0xdb45, 0xdd2d, 0x006a, 0xdacd, 0xddfe, 0x0006],
1858              &[0x0067, 0xd8ff, 0xddb7, 0x000f, 0xd900, 0xdc80]);
1859
1860         // negative tests
1861         macro_rules! neg ( ($($e:expr),*) => { { $(assert!(!is_utf16($e));)* } });
1862
1863         neg!(
1864             // surrogate + regular unit
1865             &[0xdb45, 0x0000],
1866             // surrogate + lead surrogate
1867             &[0xd900, 0xd900],
1868             // unterminated surrogate
1869             &[0xd8ff],
1870             // trail surrogate without a lead
1871             &[0xddb7]);
1872
1873         // random byte sequences that Python 3's .decode('utf-16be')
1874         // failed on
1875         neg!(&[0x5b3d, 0x0141, 0xde9e, 0x8fdc, 0xc6e7],
1876              &[0xdf5a, 0x82a5, 0x62b9, 0xb447, 0x92f3],
1877              &[0xda4e, 0x42bc, 0x4462, 0xee98, 0xc2ca],
1878              &[0xbe00, 0xb04a, 0x6ecb, 0xdd89, 0xe278],
1879              &[0x0465, 0xab56, 0xdbb6, 0xa893, 0x665e],
1880              &[0x6b7f, 0x0a19, 0x40f4, 0xa657, 0xdcc5],
1881              &[0x9b50, 0xda5e, 0x24ec, 0x03ad, 0x6dee],
1882              &[0x8d17, 0xcaa7, 0xf4ae, 0xdf6e, 0xbed7],
1883              &[0xdaee, 0x2584, 0x7d30, 0xa626, 0x121a],
1884              &[0xd956, 0x4b43, 0x7570, 0xccd6, 0x4f4a],
1885              &[0x9dcf, 0x1b49, 0x4ba5, 0xfce9, 0xdffe],
1886              &[0x6572, 0xce53, 0xb05a, 0xf6af, 0xdacf],
1887              &[0x1b90, 0x728c, 0x9906, 0xdb68, 0xf46e],
1888              &[0x1606, 0xbeca, 0xbe76, 0x860f, 0xdfa5],
1889              &[0x8b4f, 0xde7a, 0xd220, 0x9fac, 0x2b6f],
1890              &[0xb8fe, 0xebbe, 0xda32, 0x1a5f, 0x8b8b],
1891              &[0x934b, 0x8956, 0xc434, 0x1881, 0xddf7],
1892              &[0x5a95, 0x13fc, 0xf116, 0xd89b, 0x93f9],
1893              &[0xd640, 0x71f1, 0xdd7d, 0x77eb, 0x1cd8],
1894              &[0x348b, 0xaef0, 0xdb2c, 0xebf1, 0x1282],
1895              &[0x50d7, 0xd824, 0x5010, 0xb369, 0x22ea]);
1896     }
1897
1898     #[test]
1899     fn test_as_bytes() {
1900         // no null
1901         let v = [
1902             224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1903             184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1904             109
1905         ];
1906         let b: &[u8] = &[];
1907         assert_eq!("".as_bytes(), b);
1908         assert_eq!("abc".as_bytes(), b"abc");
1909         assert_eq!("ศไทย中华Việt Nam".as_bytes(), v);
1910     }
1911
1912     #[test]
1913     #[should_fail]
1914     fn test_as_bytes_fail() {
1915         // Don't double free. (I'm not sure if this exercises the
1916         // original problem code path anymore.)
1917         let s = String::from_str("");
1918         let _bytes = s.as_bytes();
1919         panic!();
1920     }
1921
1922     #[test]
1923     fn test_as_ptr() {
1924         let buf = "hello".as_ptr();
1925         unsafe {
1926             assert_eq!(*buf.offset(0), b'h');
1927             assert_eq!(*buf.offset(1), b'e');
1928             assert_eq!(*buf.offset(2), b'l');
1929             assert_eq!(*buf.offset(3), b'l');
1930             assert_eq!(*buf.offset(4), b'o');
1931         }
1932     }
1933
1934     #[test]
1935     fn test_subslice_offset() {
1936         let a = "kernelsprite";
1937         let b = a.slice(7, a.len());
1938         let c = a.slice(0, a.len() - 6);
1939         assert_eq!(a.subslice_offset(b), 7);
1940         assert_eq!(a.subslice_offset(c), 0);
1941
1942         let string = "a\nb\nc";
1943         let lines: Vec<&str> = string.lines().collect();
1944         assert_eq!(string.subslice_offset(lines[0]), 0);
1945         assert_eq!(string.subslice_offset(lines[1]), 2);
1946         assert_eq!(string.subslice_offset(lines[2]), 4);
1947     }
1948
1949     #[test]
1950     #[should_fail]
1951     fn test_subslice_offset_2() {
1952         let a = "alchemiter";
1953         let b = "cruxtruder";
1954         a.subslice_offset(b);
1955     }
1956
1957     #[test]
1958     fn vec_str_conversions() {
1959         let s1: String = String::from_str("All mimsy were the borogoves");
1960
1961         let v: Vec<u8> = s1.as_bytes().to_vec();
1962         let s2: String = String::from_str(from_utf8(v.as_slice()).unwrap());
1963         let mut i: uint = 0u;
1964         let n1: uint = s1.len();
1965         let n2: uint = v.len();
1966         assert_eq!(n1, n2);
1967         while i < n1 {
1968             let a: u8 = s1.as_bytes()[i];
1969             let b: u8 = s2.as_bytes()[i];
1970             debug!("{}", a);
1971             debug!("{}", b);
1972             assert_eq!(a, b);
1973             i += 1u;
1974         }
1975     }
1976
1977     #[test]
1978     fn test_contains() {
1979         assert!("abcde".contains("bcd"));
1980         assert!("abcde".contains("abcd"));
1981         assert!("abcde".contains("bcde"));
1982         assert!("abcde".contains(""));
1983         assert!("".contains(""));
1984         assert!(!"abcde".contains("def"));
1985         assert!(!"".contains("a"));
1986
1987         let data = "ประเทศไทย中华Việt Nam";
1988         assert!(data.contains("ประเ"));
1989         assert!(data.contains("ะเ"));
1990         assert!(data.contains("中华"));
1991         assert!(!data.contains("ไท华"));
1992     }
1993
1994     #[test]
1995     fn test_contains_char() {
1996         assert!("abc".contains_char('b'));
1997         assert!("a".contains_char('a'));
1998         assert!(!"abc".contains_char('d'));
1999         assert!(!"".contains_char('a'));
2000     }
2001
2002     #[test]
2003     fn test_char_at() {
2004         let s = "ศไทย中华Việt Nam";
2005         let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2006         let mut pos = 0;
2007         for ch in v.iter() {
2008             assert!(s.char_at(pos) == *ch);
2009             pos += ch.to_string().len();
2010         }
2011     }
2012
2013     #[test]
2014     fn test_char_at_reverse() {
2015         let s = "ศไทย中华Việt Nam";
2016         let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2017         let mut pos = s.len();
2018         for ch in v.iter().rev() {
2019             assert!(s.char_at_reverse(pos) == *ch);
2020             pos -= ch.to_string().len();
2021         }
2022     }
2023
2024     #[test]
2025     fn test_escape_unicode() {
2026         assert_eq!("abc".escape_unicode(),
2027                    String::from_str("\\u{61}\\u{62}\\u{63}"));
2028         assert_eq!("a c".escape_unicode(),
2029                    String::from_str("\\u{61}\\u{20}\\u{63}"));
2030         assert_eq!("\r\n\t".escape_unicode(),
2031                    String::from_str("\\u{d}\\u{a}\\u{9}"));
2032         assert_eq!("'\"\\".escape_unicode(),
2033                    String::from_str("\\u{27}\\u{22}\\u{5c}"));
2034         assert_eq!("\x00\x01\u{fe}\u{ff}".escape_unicode(),
2035                    String::from_str("\\u{0}\\u{1}\\u{fe}\\u{ff}"));
2036         assert_eq!("\u{100}\u{ffff}".escape_unicode(),
2037                    String::from_str("\\u{100}\\u{ffff}"));
2038         assert_eq!("\u{10000}\u{10ffff}".escape_unicode(),
2039                    String::from_str("\\u{10000}\\u{10ffff}"));
2040         assert_eq!("ab\u{fb00}".escape_unicode(),
2041                    String::from_str("\\u{61}\\u{62}\\u{fb00}"));
2042         assert_eq!("\u{1d4ea}\r".escape_unicode(),
2043                    String::from_str("\\u{1d4ea}\\u{d}"));
2044     }
2045
2046     #[test]
2047     fn test_escape_default() {
2048         assert_eq!("abc".escape_default(), String::from_str("abc"));
2049         assert_eq!("a c".escape_default(), String::from_str("a c"));
2050         assert_eq!("\r\n\t".escape_default(), String::from_str("\\r\\n\\t"));
2051         assert_eq!("'\"\\".escape_default(), String::from_str("\\'\\\"\\\\"));
2052         assert_eq!("\u{100}\u{ffff}".escape_default(),
2053                    String::from_str("\\u{100}\\u{ffff}"));
2054         assert_eq!("\u{10000}\u{10ffff}".escape_default(),
2055                    String::from_str("\\u{10000}\\u{10ffff}"));
2056         assert_eq!("ab\u{fb00}".escape_default(),
2057                    String::from_str("ab\\u{fb00}"));
2058         assert_eq!("\u{1d4ea}\r".escape_default(),
2059                    String::from_str("\\u{1d4ea}\\r"));
2060     }
2061
2062     #[test]
2063     fn test_total_ord() {
2064         "1234".cmp("123") == Greater;
2065         "123".cmp("1234") == Less;
2066         "1234".cmp("1234") == Equal;
2067         "12345555".cmp("123456") == Less;
2068         "22".cmp("1234") == Greater;
2069     }
2070
2071     #[test]
2072     fn test_char_range_at() {
2073         let data = "b¢€𤭢𤭢€¢b";
2074         assert_eq!('b', data.char_range_at(0).ch);
2075         assert_eq!('¢', data.char_range_at(1).ch);
2076         assert_eq!('€', data.char_range_at(3).ch);
2077         assert_eq!('𤭢', data.char_range_at(6).ch);
2078         assert_eq!('𤭢', data.char_range_at(10).ch);
2079         assert_eq!('€', data.char_range_at(14).ch);
2080         assert_eq!('¢', data.char_range_at(17).ch);
2081         assert_eq!('b', data.char_range_at(19).ch);
2082     }
2083
2084     #[test]
2085     fn test_char_range_at_reverse_underflow() {
2086         assert_eq!("abc".char_range_at_reverse(0).next, 0);
2087     }
2088
2089     #[test]
2090     fn test_iterator() {
2091         let s = "ศไทย中华Việt Nam";
2092         let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2093
2094         let mut pos = 0;
2095         let mut it = s.chars();
2096
2097         for c in it {
2098             assert_eq!(c, v[pos]);
2099             pos += 1;
2100         }
2101         assert_eq!(pos, v.len());
2102     }
2103
2104     #[test]
2105     fn test_rev_iterator() {
2106         let s = "ศไทย中华Việt Nam";
2107         let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
2108
2109         let mut pos = 0;
2110         let mut it = s.chars().rev();
2111
2112         for c in it {
2113             assert_eq!(c, v[pos]);
2114             pos += 1;
2115         }
2116         assert_eq!(pos, v.len());
2117     }
2118
2119     #[test]
2120     fn test_chars_decoding() {
2121         let mut bytes = [0u8; 4];
2122         for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
2123             let len = c.encode_utf8(&mut bytes).unwrap_or(0);
2124             let s = ::core::str::from_utf8(bytes[..len]).unwrap();
2125             if Some(c) != s.chars().next() {
2126                 panic!("character {:x}={} does not decode correctly", c as u32, c);
2127             }
2128         }
2129     }
2130
2131     #[test]
2132     fn test_chars_rev_decoding() {
2133         let mut bytes = [0u8; 4];
2134         for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
2135             let len = c.encode_utf8(&mut bytes).unwrap_or(0);
2136             let s = ::core::str::from_utf8(bytes[..len]).unwrap();
2137             if Some(c) != s.chars().rev().next() {
2138                 panic!("character {:x}={} does not decode correctly", c as u32, c);
2139             }
2140         }
2141     }
2142
2143     #[test]
2144     fn test_iterator_clone() {
2145         let s = "ศไทย中华Việt Nam";
2146         let mut it = s.chars();
2147         it.next();
2148         assert!(it.zip(it.clone()).all(|(x,y)| x == y));
2149     }
2150
2151     #[test]
2152     fn test_bytesator() {
2153         let s = "ศไทย中华Việt Nam";
2154         let v = [
2155             224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
2156             184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
2157             109
2158         ];
2159         let mut pos = 0;
2160
2161         for b in s.bytes() {
2162             assert_eq!(b, v[pos]);
2163             pos += 1;
2164         }
2165     }
2166
2167     #[test]
2168     fn test_bytes_revator() {
2169         let s = "ศไทย中华Việt Nam";
2170         let v = [
2171             224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
2172             184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
2173             109
2174         ];
2175         let mut pos = v.len();
2176
2177         for b in s.bytes().rev() {
2178             pos -= 1;
2179             assert_eq!(b, v[pos]);
2180         }
2181     }
2182
2183     #[test]
2184     fn test_char_indicesator() {
2185         let s = "ศไทย中华Việt Nam";
2186         let p = [0, 3, 6, 9, 12, 15, 18, 19, 20, 23, 24, 25, 26, 27];
2187         let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2188
2189         let mut pos = 0;
2190         let mut it = s.char_indices();
2191
2192         for c in it {
2193             assert_eq!(c, (p[pos], v[pos]));
2194             pos += 1;
2195         }
2196         assert_eq!(pos, v.len());
2197         assert_eq!(pos, p.len());
2198     }
2199
2200     #[test]
2201     fn test_char_indices_revator() {
2202         let s = "ศไทย中华Việt Nam";
2203         let p = [27, 26, 25, 24, 23, 20, 19, 18, 15, 12, 9, 6, 3, 0];
2204         let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
2205
2206         let mut pos = 0;
2207         let mut it = s.char_indices().rev();
2208
2209         for c in it {
2210             assert_eq!(c, (p[pos], v[pos]));
2211             pos += 1;
2212         }
2213         assert_eq!(pos, v.len());
2214         assert_eq!(pos, p.len());
2215     }
2216
2217     #[test]
2218     fn test_splitn_char_iterator() {
2219         let data = "\nMäry häd ä little lämb\nLittle lämb\n";
2220
2221         let split: Vec<&str> = data.splitn(3, ' ').collect();
2222         assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
2223
2224         let split: Vec<&str> = data.splitn(3, |&: c: char| c == ' ').collect();
2225         assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
2226
2227         // Unicode
2228         let split: Vec<&str> = data.splitn(3, 'ä').collect();
2229         assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
2230
2231         let split: Vec<&str> = data.splitn(3, |&: c: char| c == 'ä').collect();
2232         assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
2233     }
2234
2235     #[test]
2236     fn test_split_char_iterator_no_trailing() {
2237         let data = "\nMäry häd ä little lämb\nLittle lämb\n";
2238
2239         let split: Vec<&str> = data.split('\n').collect();
2240         assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
2241
2242         let split: Vec<&str> = data.split_terminator('\n').collect();
2243         assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
2244     }
2245
2246     #[test]
2247     fn test_words() {
2248         let data = "\n \tMäry   häd\tä  little lämb\nLittle lämb\n";
2249         let words: Vec<&str> = data.words().collect();
2250         assert_eq!(words, vec!["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
2251     }
2252
2253     #[test]
2254     fn test_nfd_chars() {
2255         macro_rules! t {
2256             ($input: expr, $expected: expr) => {
2257                 assert_eq!($input.nfd_chars().collect::<String>(), $expected);
2258             }
2259         }
2260         t!("abc", "abc");
2261         t!("\u{1e0b}\u{1c4}", "d\u{307}\u{1c4}");
2262         t!("\u{2026}", "\u{2026}");
2263         t!("\u{2126}", "\u{3a9}");
2264         t!("\u{1e0b}\u{323}", "d\u{323}\u{307}");
2265         t!("\u{1e0d}\u{307}", "d\u{323}\u{307}");
2266         t!("a\u{301}", "a\u{301}");
2267         t!("\u{301}a", "\u{301}a");
2268         t!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}");
2269         t!("\u{ac1c}", "\u{1100}\u{1162}");
2270     }
2271
2272     #[test]
2273     fn test_nfkd_chars() {
2274         macro_rules! t {
2275             ($input: expr, $expected: expr) => {
2276                 assert_eq!($input.nfkd_chars().collect::<String>(), $expected);
2277             }
2278         }
2279         t!("abc", "abc");
2280         t!("\u{1e0b}\u{1c4}", "d\u{307}DZ\u{30c}");
2281         t!("\u{2026}", "...");
2282         t!("\u{2126}", "\u{3a9}");
2283         t!("\u{1e0b}\u{323}", "d\u{323}\u{307}");
2284         t!("\u{1e0d}\u{307}", "d\u{323}\u{307}");
2285         t!("a\u{301}", "a\u{301}");
2286         t!("\u{301}a", "\u{301}a");
2287         t!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}");
2288         t!("\u{ac1c}", "\u{1100}\u{1162}");
2289     }
2290
2291     #[test]
2292     fn test_nfc_chars() {
2293         macro_rules! t {
2294             ($input: expr, $expected: expr) => {
2295                 assert_eq!($input.nfc_chars().collect::<String>(), $expected);
2296             }
2297         }
2298         t!("abc", "abc");
2299         t!("\u{1e0b}\u{1c4}", "\u{1e0b}\u{1c4}");
2300         t!("\u{2026}", "\u{2026}");
2301         t!("\u{2126}", "\u{3a9}");
2302         t!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}");
2303         t!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}");
2304         t!("a\u{301}", "\u{e1}");
2305         t!("\u{301}a", "\u{301}a");
2306         t!("\u{d4db}", "\u{d4db}");
2307         t!("\u{ac1c}", "\u{ac1c}");
2308         t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
2309     }
2310
2311     #[test]
2312     fn test_nfkc_chars() {
2313         macro_rules! t {
2314             ($input: expr, $expected: expr) => {
2315                 assert_eq!($input.nfkc_chars().collect::<String>(), $expected);
2316             }
2317         }
2318         t!("abc", "abc");
2319         t!("\u{1e0b}\u{1c4}", "\u{1e0b}D\u{17d}");
2320         t!("\u{2026}", "...");
2321         t!("\u{2126}", "\u{3a9}");
2322         t!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}");
2323         t!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}");
2324         t!("a\u{301}", "\u{e1}");
2325         t!("\u{301}a", "\u{301}a");
2326         t!("\u{d4db}", "\u{d4db}");
2327         t!("\u{ac1c}", "\u{ac1c}");
2328         t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
2329     }
2330
2331     #[test]
2332     fn test_lines() {
2333         let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
2334         let lines: Vec<&str> = data.lines().collect();
2335         assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2336
2337         let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
2338         let lines: Vec<&str> = data.lines().collect();
2339         assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2340     }
2341
2342     #[test]
2343     fn test_graphemes() {
2344         use core::iter::order;
2345         // official Unicode test data
2346         // from http://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt
2347         let test_same: [(_, &[_]); 325] = [
2348             ("\u{20}\u{20}", &["\u{20}", "\u{20}"]),
2349             ("\u{20}\u{308}\u{20}", &["\u{20}\u{308}", "\u{20}"]),
2350             ("\u{20}\u{D}", &["\u{20}", "\u{D}"]),
2351             ("\u{20}\u{308}\u{D}", &["\u{20}\u{308}", "\u{D}"]),
2352             ("\u{20}\u{A}", &["\u{20}", "\u{A}"]),
2353             ("\u{20}\u{308}\u{A}", &["\u{20}\u{308}", "\u{A}"]),
2354             ("\u{20}\u{1}", &["\u{20}", "\u{1}"]),
2355             ("\u{20}\u{308}\u{1}", &["\u{20}\u{308}", "\u{1}"]),
2356             ("\u{20}\u{300}", &["\u{20}\u{300}"]),
2357             ("\u{20}\u{308}\u{300}", &["\u{20}\u{308}\u{300}"]),
2358             ("\u{20}\u{1100}", &["\u{20}", "\u{1100}"]),
2359             ("\u{20}\u{308}\u{1100}", &["\u{20}\u{308}", "\u{1100}"]),
2360             ("\u{20}\u{1160}", &["\u{20}", "\u{1160}"]),
2361             ("\u{20}\u{308}\u{1160}", &["\u{20}\u{308}", "\u{1160}"]),
2362             ("\u{20}\u{11A8}", &["\u{20}", "\u{11A8}"]),
2363             ("\u{20}\u{308}\u{11A8}", &["\u{20}\u{308}", "\u{11A8}"]),
2364             ("\u{20}\u{AC00}", &["\u{20}", "\u{AC00}"]),
2365             ("\u{20}\u{308}\u{AC00}", &["\u{20}\u{308}", "\u{AC00}"]),
2366             ("\u{20}\u{AC01}", &["\u{20}", "\u{AC01}"]),
2367             ("\u{20}\u{308}\u{AC01}", &["\u{20}\u{308}", "\u{AC01}"]),
2368             ("\u{20}\u{1F1E6}", &["\u{20}", "\u{1F1E6}"]),
2369             ("\u{20}\u{308}\u{1F1E6}", &["\u{20}\u{308}", "\u{1F1E6}"]),
2370             ("\u{20}\u{378}", &["\u{20}", "\u{378}"]),
2371             ("\u{20}\u{308}\u{378}", &["\u{20}\u{308}", "\u{378}"]),
2372             ("\u{D}\u{20}", &["\u{D}", "\u{20}"]),
2373             ("\u{D}\u{308}\u{20}", &["\u{D}", "\u{308}", "\u{20}"]),
2374             ("\u{D}\u{D}", &["\u{D}", "\u{D}"]),
2375             ("\u{D}\u{308}\u{D}", &["\u{D}", "\u{308}", "\u{D}"]),
2376             ("\u{D}\u{A}", &["\u{D}\u{A}"]),
2377             ("\u{D}\u{308}\u{A}", &["\u{D}", "\u{308}", "\u{A}"]),
2378             ("\u{D}\u{1}", &["\u{D}", "\u{1}"]),
2379             ("\u{D}\u{308}\u{1}", &["\u{D}", "\u{308}", "\u{1}"]),
2380             ("\u{D}\u{300}", &["\u{D}", "\u{300}"]),
2381             ("\u{D}\u{308}\u{300}", &["\u{D}", "\u{308}\u{300}"]),
2382             ("\u{D}\u{903}", &["\u{D}", "\u{903}"]),
2383             ("\u{D}\u{1100}", &["\u{D}", "\u{1100}"]),
2384             ("\u{D}\u{308}\u{1100}", &["\u{D}", "\u{308}", "\u{1100}"]),
2385             ("\u{D}\u{1160}", &["\u{D}", "\u{1160}"]),
2386             ("\u{D}\u{308}\u{1160}", &["\u{D}", "\u{308}", "\u{1160}"]),
2387             ("\u{D}\u{11A8}", &["\u{D}", "\u{11A8}"]),
2388             ("\u{D}\u{308}\u{11A8}", &["\u{D}", "\u{308}", "\u{11A8}"]),
2389             ("\u{D}\u{AC00}", &["\u{D}", "\u{AC00}"]),
2390             ("\u{D}\u{308}\u{AC00}", &["\u{D}", "\u{308}", "\u{AC00}"]),
2391             ("\u{D}\u{AC01}", &["\u{D}", "\u{AC01}"]),
2392             ("\u{D}\u{308}\u{AC01}", &["\u{D}", "\u{308}", "\u{AC01}"]),
2393             ("\u{D}\u{1F1E6}", &["\u{D}", "\u{1F1E6}"]),
2394             ("\u{D}\u{308}\u{1F1E6}", &["\u{D}", "\u{308}", "\u{1F1E6}"]),
2395             ("\u{D}\u{378}", &["\u{D}", "\u{378}"]),
2396             ("\u{D}\u{308}\u{378}", &["\u{D}", "\u{308}", "\u{378}"]),
2397             ("\u{A}\u{20}", &["\u{A}", "\u{20}"]),
2398             ("\u{A}\u{308}\u{20}", &["\u{A}", "\u{308}", "\u{20}"]),
2399             ("\u{A}\u{D}", &["\u{A}", "\u{D}"]),
2400             ("\u{A}\u{308}\u{D}", &["\u{A}", "\u{308}", "\u{D}"]),
2401             ("\u{A}\u{A}", &["\u{A}", "\u{A}"]),
2402             ("\u{A}\u{308}\u{A}", &["\u{A}", "\u{308}", "\u{A}"]),
2403             ("\u{A}\u{1}", &["\u{A}", "\u{1}"]),
2404             ("\u{A}\u{308}\u{1}", &["\u{A}", "\u{308}", "\u{1}"]),
2405             ("\u{A}\u{300}", &["\u{A}", "\u{300}"]),
2406             ("\u{A}\u{308}\u{300}", &["\u{A}", "\u{308}\u{300}"]),
2407             ("\u{A}\u{903}", &["\u{A}", "\u{903}"]),
2408             ("\u{A}\u{1100}", &["\u{A}", "\u{1100}"]),
2409             ("\u{A}\u{308}\u{1100}", &["\u{A}", "\u{308}", "\u{1100}"]),
2410             ("\u{A}\u{1160}", &["\u{A}", "\u{1160}"]),
2411             ("\u{A}\u{308}\u{1160}", &["\u{A}", "\u{308}", "\u{1160}"]),
2412             ("\u{A}\u{11A8}", &["\u{A}", "\u{11A8}"]),
2413             ("\u{A}\u{308}\u{11A8}", &["\u{A}", "\u{308}", "\u{11A8}"]),
2414             ("\u{A}\u{AC00}", &["\u{A}", "\u{AC00}"]),
2415             ("\u{A}\u{308}\u{AC00}", &["\u{A}", "\u{308}", "\u{AC00}"]),
2416             ("\u{A}\u{AC01}", &["\u{A}", "\u{AC01}"]),
2417             ("\u{A}\u{308}\u{AC01}", &["\u{A}", "\u{308}", "\u{AC01}"]),
2418             ("\u{A}\u{1F1E6}", &["\u{A}", "\u{1F1E6}"]),
2419             ("\u{A}\u{308}\u{1F1E6}", &["\u{A}", "\u{308}", "\u{1F1E6}"]),
2420             ("\u{A}\u{378}", &["\u{A}", "\u{378}"]),
2421             ("\u{A}\u{308}\u{378}", &["\u{A}", "\u{308}", "\u{378}"]),
2422             ("\u{1}\u{20}", &["\u{1}", "\u{20}"]),
2423             ("\u{1}\u{308}\u{20}", &["\u{1}", "\u{308}", "\u{20}"]),
2424             ("\u{1}\u{D}", &["\u{1}", "\u{D}"]),
2425             ("\u{1}\u{308}\u{D}", &["\u{1}", "\u{308}", "\u{D}"]),
2426             ("\u{1}\u{A}", &["\u{1}", "\u{A}"]),
2427             ("\u{1}\u{308}\u{A}", &["\u{1}", "\u{308}", "\u{A}"]),
2428             ("\u{1}\u{1}", &["\u{1}", "\u{1}"]),
2429             ("\u{1}\u{308}\u{1}", &["\u{1}", "\u{308}", "\u{1}"]),
2430             ("\u{1}\u{300}", &["\u{1}", "\u{300}"]),
2431             ("\u{1}\u{308}\u{300}", &["\u{1}", "\u{308}\u{300}"]),
2432             ("\u{1}\u{903}", &["\u{1}", "\u{903}"]),
2433             ("\u{1}\u{1100}", &["\u{1}", "\u{1100}"]),
2434             ("\u{1}\u{308}\u{1100}", &["\u{1}", "\u{308}", "\u{1100}"]),
2435             ("\u{1}\u{1160}", &["\u{1}", "\u{1160}"]),
2436             ("\u{1}\u{308}\u{1160}", &["\u{1}", "\u{308}", "\u{1160}"]),
2437             ("\u{1}\u{11A8}", &["\u{1}", "\u{11A8}"]),
2438             ("\u{1}\u{308}\u{11A8}", &["\u{1}", "\u{308}", "\u{11A8}"]),
2439             ("\u{1}\u{AC00}", &["\u{1}", "\u{AC00}"]),
2440             ("\u{1}\u{308}\u{AC00}", &["\u{1}", "\u{308}", "\u{AC00}"]),
2441             ("\u{1}\u{AC01}", &["\u{1}", "\u{AC01}"]),
2442             ("\u{1}\u{308}\u{AC01}", &["\u{1}", "\u{308}", "\u{AC01}"]),
2443             ("\u{1}\u{1F1E6}", &["\u{1}", "\u{1F1E6}"]),
2444             ("\u{1}\u{308}\u{1F1E6}", &["\u{1}", "\u{308}", "\u{1F1E6}"]),
2445             ("\u{1}\u{378}", &["\u{1}", "\u{378}"]),
2446             ("\u{1}\u{308}\u{378}", &["\u{1}", "\u{308}", "\u{378}"]),
2447             ("\u{300}\u{20}", &["\u{300}", "\u{20}"]),
2448             ("\u{300}\u{308}\u{20}", &["\u{300}\u{308}", "\u{20}"]),
2449             ("\u{300}\u{D}", &["\u{300}", "\u{D}"]),
2450             ("\u{300}\u{308}\u{D}", &["\u{300}\u{308}", "\u{D}"]),
2451             ("\u{300}\u{A}", &["\u{300}", "\u{A}"]),
2452             ("\u{300}\u{308}\u{A}", &["\u{300}\u{308}", "\u{A}"]),
2453             ("\u{300}\u{1}", &["\u{300}", "\u{1}"]),
2454             ("\u{300}\u{308}\u{1}", &["\u{300}\u{308}", "\u{1}"]),
2455             ("\u{300}\u{300}", &["\u{300}\u{300}"]),
2456             ("\u{300}\u{308}\u{300}", &["\u{300}\u{308}\u{300}"]),
2457             ("\u{300}\u{1100}", &["\u{300}", "\u{1100}"]),
2458             ("\u{300}\u{308}\u{1100}", &["\u{300}\u{308}", "\u{1100}"]),
2459             ("\u{300}\u{1160}", &["\u{300}", "\u{1160}"]),
2460             ("\u{300}\u{308}\u{1160}", &["\u{300}\u{308}", "\u{1160}"]),
2461             ("\u{300}\u{11A8}", &["\u{300}", "\u{11A8}"]),
2462             ("\u{300}\u{308}\u{11A8}", &["\u{300}\u{308}", "\u{11A8}"]),
2463             ("\u{300}\u{AC00}", &["\u{300}", "\u{AC00}"]),
2464             ("\u{300}\u{308}\u{AC00}", &["\u{300}\u{308}", "\u{AC00}"]),
2465             ("\u{300}\u{AC01}", &["\u{300}", "\u{AC01}"]),
2466             ("\u{300}\u{308}\u{AC01}", &["\u{300}\u{308}", "\u{AC01}"]),
2467             ("\u{300}\u{1F1E6}", &["\u{300}", "\u{1F1E6}"]),
2468             ("\u{300}\u{308}\u{1F1E6}", &["\u{300}\u{308}", "\u{1F1E6}"]),
2469             ("\u{300}\u{378}", &["\u{300}", "\u{378}"]),
2470             ("\u{300}\u{308}\u{378}", &["\u{300}\u{308}", "\u{378}"]),
2471             ("\u{903}\u{20}", &["\u{903}", "\u{20}"]),
2472             ("\u{903}\u{308}\u{20}", &["\u{903}\u{308}", "\u{20}"]),
2473             ("\u{903}\u{D}", &["\u{903}", "\u{D}"]),
2474             ("\u{903}\u{308}\u{D}", &["\u{903}\u{308}", "\u{D}"]),
2475             ("\u{903}\u{A}", &["\u{903}", "\u{A}"]),
2476             ("\u{903}\u{308}\u{A}", &["\u{903}\u{308}", "\u{A}"]),
2477             ("\u{903}\u{1}", &["\u{903}", "\u{1}"]),
2478             ("\u{903}\u{308}\u{1}", &["\u{903}\u{308}", "\u{1}"]),
2479             ("\u{903}\u{300}", &["\u{903}\u{300}"]),
2480             ("\u{903}\u{308}\u{300}", &["\u{903}\u{308}\u{300}"]),
2481             ("\u{903}\u{1100}", &["\u{903}", "\u{1100}"]),
2482             ("\u{903}\u{308}\u{1100}", &["\u{903}\u{308}", "\u{1100}"]),
2483             ("\u{903}\u{1160}", &["\u{903}", "\u{1160}"]),
2484             ("\u{903}\u{308}\u{1160}", &["\u{903}\u{308}", "\u{1160}"]),
2485             ("\u{903}\u{11A8}", &["\u{903}", "\u{11A8}"]),
2486             ("\u{903}\u{308}\u{11A8}", &["\u{903}\u{308}", "\u{11A8}"]),
2487             ("\u{903}\u{AC00}", &["\u{903}", "\u{AC00}"]),
2488             ("\u{903}\u{308}\u{AC00}", &["\u{903}\u{308}", "\u{AC00}"]),
2489             ("\u{903}\u{AC01}", &["\u{903}", "\u{AC01}"]),
2490             ("\u{903}\u{308}\u{AC01}", &["\u{903}\u{308}", "\u{AC01}"]),
2491             ("\u{903}\u{1F1E6}", &["\u{903}", "\u{1F1E6}"]),
2492             ("\u{903}\u{308}\u{1F1E6}", &["\u{903}\u{308}", "\u{1F1E6}"]),
2493             ("\u{903}\u{378}", &["\u{903}", "\u{378}"]),
2494             ("\u{903}\u{308}\u{378}", &["\u{903}\u{308}", "\u{378}"]),
2495             ("\u{1100}\u{20}", &["\u{1100}", "\u{20}"]),
2496             ("\u{1100}\u{308}\u{20}", &["\u{1100}\u{308}", "\u{20}"]),
2497             ("\u{1100}\u{D}", &["\u{1100}", "\u{D}"]),
2498             ("\u{1100}\u{308}\u{D}", &["\u{1100}\u{308}", "\u{D}"]),
2499             ("\u{1100}\u{A}", &["\u{1100}", "\u{A}"]),
2500             ("\u{1100}\u{308}\u{A}", &["\u{1100}\u{308}", "\u{A}"]),
2501             ("\u{1100}\u{1}", &["\u{1100}", "\u{1}"]),
2502             ("\u{1100}\u{308}\u{1}", &["\u{1100}\u{308}", "\u{1}"]),
2503             ("\u{1100}\u{300}", &["\u{1100}\u{300}"]),
2504             ("\u{1100}\u{308}\u{300}", &["\u{1100}\u{308}\u{300}"]),
2505             ("\u{1100}\u{1100}", &["\u{1100}\u{1100}"]),
2506             ("\u{1100}\u{308}\u{1100}", &["\u{1100}\u{308}", "\u{1100}"]),
2507             ("\u{1100}\u{1160}", &["\u{1100}\u{1160}"]),
2508             ("\u{1100}\u{308}\u{1160}", &["\u{1100}\u{308}", "\u{1160}"]),
2509             ("\u{1100}\u{11A8}", &["\u{1100}", "\u{11A8}"]),
2510             ("\u{1100}\u{308}\u{11A8}", &["\u{1100}\u{308}", "\u{11A8}"]),
2511             ("\u{1100}\u{AC00}", &["\u{1100}\u{AC00}"]),
2512             ("\u{1100}\u{308}\u{AC00}", &["\u{1100}\u{308}", "\u{AC00}"]),
2513             ("\u{1100}\u{AC01}", &["\u{1100}\u{AC01}"]),
2514             ("\u{1100}\u{308}\u{AC01}", &["\u{1100}\u{308}", "\u{AC01}"]),
2515             ("\u{1100}\u{1F1E6}", &["\u{1100}", "\u{1F1E6}"]),
2516             ("\u{1100}\u{308}\u{1F1E6}", &["\u{1100}\u{308}", "\u{1F1E6}"]),
2517             ("\u{1100}\u{378}", &["\u{1100}", "\u{378}"]),
2518             ("\u{1100}\u{308}\u{378}", &["\u{1100}\u{308}", "\u{378}"]),
2519             ("\u{1160}\u{20}", &["\u{1160}", "\u{20}"]),
2520             ("\u{1160}\u{308}\u{20}", &["\u{1160}\u{308}", "\u{20}"]),
2521             ("\u{1160}\u{D}", &["\u{1160}", "\u{D}"]),
2522             ("\u{1160}\u{308}\u{D}", &["\u{1160}\u{308}", "\u{D}"]),
2523             ("\u{1160}\u{A}", &["\u{1160}", "\u{A}"]),
2524             ("\u{1160}\u{308}\u{A}", &["\u{1160}\u{308}", "\u{A}"]),
2525             ("\u{1160}\u{1}", &["\u{1160}", "\u{1}"]),
2526             ("\u{1160}\u{308}\u{1}", &["\u{1160}\u{308}", "\u{1}"]),
2527             ("\u{1160}\u{300}", &["\u{1160}\u{300}"]),
2528             ("\u{1160}\u{308}\u{300}", &["\u{1160}\u{308}\u{300}"]),
2529             ("\u{1160}\u{1100}", &["\u{1160}", "\u{1100}"]),
2530             ("\u{1160}\u{308}\u{1100}", &["\u{1160}\u{308}", "\u{1100}"]),
2531             ("\u{1160}\u{1160}", &["\u{1160}\u{1160}"]),
2532             ("\u{1160}\u{308}\u{1160}", &["\u{1160}\u{308}", "\u{1160}"]),
2533             ("\u{1160}\u{11A8}", &["\u{1160}\u{11A8}"]),
2534             ("\u{1160}\u{308}\u{11A8}", &["\u{1160}\u{308}", "\u{11A8}"]),
2535             ("\u{1160}\u{AC00}", &["\u{1160}", "\u{AC00}"]),
2536             ("\u{1160}\u{308}\u{AC00}", &["\u{1160}\u{308}", "\u{AC00}"]),
2537             ("\u{1160}\u{AC01}", &["\u{1160}", "\u{AC01}"]),
2538             ("\u{1160}\u{308}\u{AC01}", &["\u{1160}\u{308}", "\u{AC01}"]),
2539             ("\u{1160}\u{1F1E6}", &["\u{1160}", "\u{1F1E6}"]),
2540             ("\u{1160}\u{308}\u{1F1E6}", &["\u{1160}\u{308}", "\u{1F1E6}"]),
2541             ("\u{1160}\u{378}", &["\u{1160}", "\u{378}"]),
2542             ("\u{1160}\u{308}\u{378}", &["\u{1160}\u{308}", "\u{378}"]),
2543             ("\u{11A8}\u{20}", &["\u{11A8}", "\u{20}"]),
2544             ("\u{11A8}\u{308}\u{20}", &["\u{11A8}\u{308}", "\u{20}"]),
2545             ("\u{11A8}\u{D}", &["\u{11A8}", "\u{D}"]),
2546             ("\u{11A8}\u{308}\u{D}", &["\u{11A8}\u{308}", "\u{D}"]),
2547             ("\u{11A8}\u{A}", &["\u{11A8}", "\u{A}"]),
2548             ("\u{11A8}\u{308}\u{A}", &["\u{11A8}\u{308}", "\u{A}"]),
2549             ("\u{11A8}\u{1}", &["\u{11A8}", "\u{1}"]),
2550             ("\u{11A8}\u{308}\u{1}", &["\u{11A8}\u{308}", "\u{1}"]),
2551             ("\u{11A8}\u{300}", &["\u{11A8}\u{300}"]),
2552             ("\u{11A8}\u{308}\u{300}", &["\u{11A8}\u{308}\u{300}"]),
2553             ("\u{11A8}\u{1100}", &["\u{11A8}", "\u{1100}"]),
2554             ("\u{11A8}\u{308}\u{1100}", &["\u{11A8}\u{308}", "\u{1100}"]),
2555             ("\u{11A8}\u{1160}", &["\u{11A8}", "\u{1160}"]),
2556             ("\u{11A8}\u{308}\u{1160}", &["\u{11A8}\u{308}", "\u{1160}"]),
2557             ("\u{11A8}\u{11A8}", &["\u{11A8}\u{11A8}"]),
2558             ("\u{11A8}\u{308}\u{11A8}", &["\u{11A8}\u{308}", "\u{11A8}"]),
2559             ("\u{11A8}\u{AC00}", &["\u{11A8}", "\u{AC00}"]),
2560             ("\u{11A8}\u{308}\u{AC00}", &["\u{11A8}\u{308}", "\u{AC00}"]),
2561             ("\u{11A8}\u{AC01}", &["\u{11A8}", "\u{AC01}"]),
2562             ("\u{11A8}\u{308}\u{AC01}", &["\u{11A8}\u{308}", "\u{AC01}"]),
2563             ("\u{11A8}\u{1F1E6}", &["\u{11A8}", "\u{1F1E6}"]),
2564             ("\u{11A8}\u{308}\u{1F1E6}", &["\u{11A8}\u{308}", "\u{1F1E6}"]),
2565             ("\u{11A8}\u{378}", &["\u{11A8}", "\u{378}"]),
2566             ("\u{11A8}\u{308}\u{378}", &["\u{11A8}\u{308}", "\u{378}"]),
2567             ("\u{AC00}\u{20}", &["\u{AC00}", "\u{20}"]),
2568             ("\u{AC00}\u{308}\u{20}", &["\u{AC00}\u{308}", "\u{20}"]),
2569             ("\u{AC00}\u{D}", &["\u{AC00}", "\u{D}"]),
2570             ("\u{AC00}\u{308}\u{D}", &["\u{AC00}\u{308}", "\u{D}"]),
2571             ("\u{AC00}\u{A}", &["\u{AC00}", "\u{A}"]),
2572             ("\u{AC00}\u{308}\u{A}", &["\u{AC00}\u{308}", "\u{A}"]),
2573             ("\u{AC00}\u{1}", &["\u{AC00}", "\u{1}"]),
2574             ("\u{AC00}\u{308}\u{1}", &["\u{AC00}\u{308}", "\u{1}"]),
2575             ("\u{AC00}\u{300}", &["\u{AC00}\u{300}"]),
2576             ("\u{AC00}\u{308}\u{300}", &["\u{AC00}\u{308}\u{300}"]),
2577             ("\u{AC00}\u{1100}", &["\u{AC00}", "\u{1100}"]),
2578             ("\u{AC00}\u{308}\u{1100}", &["\u{AC00}\u{308}", "\u{1100}"]),
2579             ("\u{AC00}\u{1160}", &["\u{AC00}\u{1160}"]),
2580             ("\u{AC00}\u{308}\u{1160}", &["\u{AC00}\u{308}", "\u{1160}"]),
2581             ("\u{AC00}\u{11A8}", &["\u{AC00}\u{11A8}"]),
2582             ("\u{AC00}\u{308}\u{11A8}", &["\u{AC00}\u{308}", "\u{11A8}"]),
2583             ("\u{AC00}\u{AC00}", &["\u{AC00}", "\u{AC00}"]),
2584             ("\u{AC00}\u{308}\u{AC00}", &["\u{AC00}\u{308}", "\u{AC00}"]),
2585             ("\u{AC00}\u{AC01}", &["\u{AC00}", "\u{AC01}"]),
2586             ("\u{AC00}\u{308}\u{AC01}", &["\u{AC00}\u{308}", "\u{AC01}"]),
2587             ("\u{AC00}\u{1F1E6}", &["\u{AC00}", "\u{1F1E6}"]),
2588             ("\u{AC00}\u{308}\u{1F1E6}", &["\u{AC00}\u{308}", "\u{1F1E6}"]),
2589             ("\u{AC00}\u{378}", &["\u{AC00}", "\u{378}"]),
2590             ("\u{AC00}\u{308}\u{378}", &["\u{AC00}\u{308}", "\u{378}"]),
2591             ("\u{AC01}\u{20}", &["\u{AC01}", "\u{20}"]),
2592             ("\u{AC01}\u{308}\u{20}", &["\u{AC01}\u{308}", "\u{20}"]),
2593             ("\u{AC01}\u{D}", &["\u{AC01}", "\u{D}"]),
2594             ("\u{AC01}\u{308}\u{D}", &["\u{AC01}\u{308}", "\u{D}"]),
2595             ("\u{AC01}\u{A}", &["\u{AC01}", "\u{A}"]),
2596             ("\u{AC01}\u{308}\u{A}", &["\u{AC01}\u{308}", "\u{A}"]),
2597             ("\u{AC01}\u{1}", &["\u{AC01}", "\u{1}"]),
2598             ("\u{AC01}\u{308}\u{1}", &["\u{AC01}\u{308}", "\u{1}"]),
2599             ("\u{AC01}\u{300}", &["\u{AC01}\u{300}"]),
2600             ("\u{AC01}\u{308}\u{300}", &["\u{AC01}\u{308}\u{300}"]),
2601             ("\u{AC01}\u{1100}", &["\u{AC01}", "\u{1100}"]),
2602             ("\u{AC01}\u{308}\u{1100}", &["\u{AC01}\u{308}", "\u{1100}"]),
2603             ("\u{AC01}\u{1160}", &["\u{AC01}", "\u{1160}"]),
2604             ("\u{AC01}\u{308}\u{1160}", &["\u{AC01}\u{308}", "\u{1160}"]),
2605             ("\u{AC01}\u{11A8}", &["\u{AC01}\u{11A8}"]),
2606             ("\u{AC01}\u{308}\u{11A8}", &["\u{AC01}\u{308}", "\u{11A8}"]),
2607             ("\u{AC01}\u{AC00}", &["\u{AC01}", "\u{AC00}"]),
2608             ("\u{AC01}\u{308}\u{AC00}", &["\u{AC01}\u{308}", "\u{AC00}"]),
2609             ("\u{AC01}\u{AC01}", &["\u{AC01}", "\u{AC01}"]),
2610             ("\u{AC01}\u{308}\u{AC01}", &["\u{AC01}\u{308}", "\u{AC01}"]),
2611             ("\u{AC01}\u{1F1E6}", &["\u{AC01}", "\u{1F1E6}"]),
2612             ("\u{AC01}\u{308}\u{1F1E6}", &["\u{AC01}\u{308}", "\u{1F1E6}"]),
2613             ("\u{AC01}\u{378}", &["\u{AC01}", "\u{378}"]),
2614             ("\u{AC01}\u{308}\u{378}", &["\u{AC01}\u{308}", "\u{378}"]),
2615             ("\u{1F1E6}\u{20}", &["\u{1F1E6}", "\u{20}"]),
2616             ("\u{1F1E6}\u{308}\u{20}", &["\u{1F1E6}\u{308}", "\u{20}"]),
2617             ("\u{1F1E6}\u{D}", &["\u{1F1E6}", "\u{D}"]),
2618             ("\u{1F1E6}\u{308}\u{D}", &["\u{1F1E6}\u{308}", "\u{D}"]),
2619             ("\u{1F1E6}\u{A}", &["\u{1F1E6}", "\u{A}"]),
2620             ("\u{1F1E6}\u{308}\u{A}", &["\u{1F1E6}\u{308}", "\u{A}"]),
2621             ("\u{1F1E6}\u{1}", &["\u{1F1E6}", "\u{1}"]),
2622             ("\u{1F1E6}\u{308}\u{1}", &["\u{1F1E6}\u{308}", "\u{1}"]),
2623             ("\u{1F1E6}\u{300}", &["\u{1F1E6}\u{300}"]),
2624             ("\u{1F1E6}\u{308}\u{300}", &["\u{1F1E6}\u{308}\u{300}"]),
2625             ("\u{1F1E6}\u{1100}", &["\u{1F1E6}", "\u{1100}"]),
2626             ("\u{1F1E6}\u{308}\u{1100}", &["\u{1F1E6}\u{308}", "\u{1100}"]),
2627             ("\u{1F1E6}\u{1160}", &["\u{1F1E6}", "\u{1160}"]),
2628             ("\u{1F1E6}\u{308}\u{1160}", &["\u{1F1E6}\u{308}", "\u{1160}"]),
2629             ("\u{1F1E6}\u{11A8}", &["\u{1F1E6}", "\u{11A8}"]),
2630             ("\u{1F1E6}\u{308}\u{11A8}", &["\u{1F1E6}\u{308}", "\u{11A8}"]),
2631             ("\u{1F1E6}\u{AC00}", &["\u{1F1E6}", "\u{AC00}"]),
2632             ("\u{1F1E6}\u{308}\u{AC00}", &["\u{1F1E6}\u{308}", "\u{AC00}"]),
2633             ("\u{1F1E6}\u{AC01}", &["\u{1F1E6}", "\u{AC01}"]),
2634             ("\u{1F1E6}\u{308}\u{AC01}", &["\u{1F1E6}\u{308}", "\u{AC01}"]),
2635             ("\u{1F1E6}\u{1F1E6}", &["\u{1F1E6}\u{1F1E6}"]),
2636             ("\u{1F1E6}\u{308}\u{1F1E6}", &["\u{1F1E6}\u{308}", "\u{1F1E6}"]),
2637             ("\u{1F1E6}\u{378}", &["\u{1F1E6}", "\u{378}"]),
2638             ("\u{1F1E6}\u{308}\u{378}", &["\u{1F1E6}\u{308}", "\u{378}"]),
2639             ("\u{378}\u{20}", &["\u{378}", "\u{20}"]),
2640             ("\u{378}\u{308}\u{20}", &["\u{378}\u{308}", "\u{20}"]),
2641             ("\u{378}\u{D}", &["\u{378}", "\u{D}"]),
2642             ("\u{378}\u{308}\u{D}", &["\u{378}\u{308}", "\u{D}"]),
2643             ("\u{378}\u{A}", &["\u{378}", "\u{A}"]),
2644             ("\u{378}\u{308}\u{A}", &["\u{378}\u{308}", "\u{A}"]),
2645             ("\u{378}\u{1}", &["\u{378}", "\u{1}"]),
2646             ("\u{378}\u{308}\u{1}", &["\u{378}\u{308}", "\u{1}"]),
2647             ("\u{378}\u{300}", &["\u{378}\u{300}"]),
2648             ("\u{378}\u{308}\u{300}", &["\u{378}\u{308}\u{300}"]),
2649             ("\u{378}\u{1100}", &["\u{378}", "\u{1100}"]),
2650             ("\u{378}\u{308}\u{1100}", &["\u{378}\u{308}", "\u{1100}"]),
2651             ("\u{378}\u{1160}", &["\u{378}", "\u{1160}"]),
2652             ("\u{378}\u{308}\u{1160}", &["\u{378}\u{308}", "\u{1160}"]),
2653             ("\u{378}\u{11A8}", &["\u{378}", "\u{11A8}"]),
2654             ("\u{378}\u{308}\u{11A8}", &["\u{378}\u{308}", "\u{11A8}"]),
2655             ("\u{378}\u{AC00}", &["\u{378}", "\u{AC00}"]),
2656             ("\u{378}\u{308}\u{AC00}", &["\u{378}\u{308}", "\u{AC00}"]),
2657             ("\u{378}\u{AC01}", &["\u{378}", "\u{AC01}"]),
2658             ("\u{378}\u{308}\u{AC01}", &["\u{378}\u{308}", "\u{AC01}"]),
2659             ("\u{378}\u{1F1E6}", &["\u{378}", "\u{1F1E6}"]),
2660             ("\u{378}\u{308}\u{1F1E6}", &["\u{378}\u{308}", "\u{1F1E6}"]),
2661             ("\u{378}\u{378}", &["\u{378}", "\u{378}"]),
2662             ("\u{378}\u{308}\u{378}", &["\u{378}\u{308}", "\u{378}"]),
2663             ("\u{61}\u{1F1E6}\u{62}", &["\u{61}", "\u{1F1E6}", "\u{62}"]),
2664             ("\u{1F1F7}\u{1F1FA}", &["\u{1F1F7}\u{1F1FA}"]),
2665             ("\u{1F1F7}\u{1F1FA}\u{1F1F8}", &["\u{1F1F7}\u{1F1FA}\u{1F1F8}"]),
2666             ("\u{1F1F7}\u{1F1FA}\u{1F1F8}\u{1F1EA}",
2667             &["\u{1F1F7}\u{1F1FA}\u{1F1F8}\u{1F1EA}"]),
2668             ("\u{1F1F7}\u{1F1FA}\u{200B}\u{1F1F8}\u{1F1EA}",
2669              &["\u{1F1F7}\u{1F1FA}", "\u{200B}", "\u{1F1F8}\u{1F1EA}"]),
2670             ("\u{1F1E6}\u{1F1E7}\u{1F1E8}", &["\u{1F1E6}\u{1F1E7}\u{1F1E8}"]),
2671             ("\u{1F1E6}\u{200D}\u{1F1E7}\u{1F1E8}", &["\u{1F1E6}\u{200D}",
2672              "\u{1F1E7}\u{1F1E8}"]),
2673             ("\u{1F1E6}\u{1F1E7}\u{200D}\u{1F1E8}",
2674              &["\u{1F1E6}\u{1F1E7}\u{200D}", "\u{1F1E8}"]),
2675             ("\u{20}\u{200D}\u{646}", &["\u{20}\u{200D}", "\u{646}"]),
2676             ("\u{646}\u{200D}\u{20}", &["\u{646}\u{200D}", "\u{20}"]),
2677         ];
2678
2679         let test_diff: [(_, &[_], &[_]); 23] = [
2680             ("\u{20}\u{903}", &["\u{20}\u{903}"], &["\u{20}", "\u{903}"]), ("\u{20}\u{308}\u{903}",
2681             &["\u{20}\u{308}\u{903}"], &["\u{20}\u{308}", "\u{903}"]), ("\u{D}\u{308}\u{903}",
2682             &["\u{D}", "\u{308}\u{903}"], &["\u{D}", "\u{308}", "\u{903}"]), ("\u{A}\u{308}\u{903}",
2683             &["\u{A}", "\u{308}\u{903}"], &["\u{A}", "\u{308}", "\u{903}"]), ("\u{1}\u{308}\u{903}",
2684             &["\u{1}", "\u{308}\u{903}"], &["\u{1}", "\u{308}", "\u{903}"]), ("\u{300}\u{903}",
2685             &["\u{300}\u{903}"], &["\u{300}", "\u{903}"]), ("\u{300}\u{308}\u{903}",
2686             &["\u{300}\u{308}\u{903}"], &["\u{300}\u{308}", "\u{903}"]), ("\u{903}\u{903}",
2687             &["\u{903}\u{903}"], &["\u{903}", "\u{903}"]), ("\u{903}\u{308}\u{903}",
2688             &["\u{903}\u{308}\u{903}"], &["\u{903}\u{308}", "\u{903}"]), ("\u{1100}\u{903}",
2689             &["\u{1100}\u{903}"], &["\u{1100}", "\u{903}"]), ("\u{1100}\u{308}\u{903}",
2690             &["\u{1100}\u{308}\u{903}"], &["\u{1100}\u{308}", "\u{903}"]), ("\u{1160}\u{903}",
2691             &["\u{1160}\u{903}"], &["\u{1160}", "\u{903}"]), ("\u{1160}\u{308}\u{903}",
2692             &["\u{1160}\u{308}\u{903}"], &["\u{1160}\u{308}", "\u{903}"]), ("\u{11A8}\u{903}",
2693             &["\u{11A8}\u{903}"], &["\u{11A8}", "\u{903}"]), ("\u{11A8}\u{308}\u{903}",
2694             &["\u{11A8}\u{308}\u{903}"], &["\u{11A8}\u{308}", "\u{903}"]), ("\u{AC00}\u{903}",
2695             &["\u{AC00}\u{903}"], &["\u{AC00}", "\u{903}"]), ("\u{AC00}\u{308}\u{903}",
2696             &["\u{AC00}\u{308}\u{903}"], &["\u{AC00}\u{308}", "\u{903}"]), ("\u{AC01}\u{903}",
2697             &["\u{AC01}\u{903}"], &["\u{AC01}", "\u{903}"]), ("\u{AC01}\u{308}\u{903}",
2698             &["\u{AC01}\u{308}\u{903}"], &["\u{AC01}\u{308}", "\u{903}"]), ("\u{1F1E6}\u{903}",
2699             &["\u{1F1E6}\u{903}"], &["\u{1F1E6}", "\u{903}"]), ("\u{1F1E6}\u{308}\u{903}",
2700             &["\u{1F1E6}\u{308}\u{903}"], &["\u{1F1E6}\u{308}", "\u{903}"]), ("\u{378}\u{903}",
2701             &["\u{378}\u{903}"], &["\u{378}", "\u{903}"]), ("\u{378}\u{308}\u{903}",
2702             &["\u{378}\u{308}\u{903}"], &["\u{378}\u{308}", "\u{903}"]),
2703         ];
2704
2705         for &(s, g) in test_same.iter() {
2706             // test forward iterator
2707             assert!(order::equals(s.graphemes(true), g.iter().map(|&x| x)));
2708             assert!(order::equals(s.graphemes(false), g.iter().map(|&x| x)));
2709
2710             // test reverse iterator
2711             assert!(order::equals(s.graphemes(true).rev(), g.iter().rev().map(|&x| x)));
2712             assert!(order::equals(s.graphemes(false).rev(), g.iter().rev().map(|&x| x)));
2713         }
2714
2715         for &(s, gt, gf) in test_diff.iter() {
2716             // test forward iterator
2717             assert!(order::equals(s.graphemes(true), gt.iter().map(|&x| x)));
2718             assert!(order::equals(s.graphemes(false), gf.iter().map(|&x| x)));
2719
2720             // test reverse iterator
2721             assert!(order::equals(s.graphemes(true).rev(), gt.iter().rev().map(|&x| x)));
2722             assert!(order::equals(s.graphemes(false).rev(), gf.iter().rev().map(|&x| x)));
2723         }
2724
2725         // test the indices iterators
2726         let s = "a̐éö̲\r\n";
2727         let gr_inds = s.grapheme_indices(true).collect::<Vec<(uint, &str)>>();
2728         let b: &[_] = &[(0u, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
2729         assert_eq!(gr_inds, b);
2730         let gr_inds = s.grapheme_indices(true).rev().collect::<Vec<(uint, &str)>>();
2731         let b: &[_] = &[(11, "\r\n"), (6, "ö̲"), (3, "é"), (0u, "a̐")];
2732         assert_eq!(gr_inds, b);
2733         let mut gr_inds_iter = s.grapheme_indices(true);
2734         {
2735             let gr_inds = gr_inds_iter.by_ref();
2736             let e1 = gr_inds.size_hint();
2737             assert_eq!(e1, (1, Some(13)));
2738             let c = gr_inds.count();
2739             assert_eq!(c, 4);
2740         }
2741         let e2 = gr_inds_iter.size_hint();
2742         assert_eq!(e2, (0, Some(0)));
2743
2744         // make sure the reverse iterator does the right thing with "\n" at beginning of string
2745         let s = "\n\r\n\r";
2746         let gr = s.graphemes(true).rev().collect::<Vec<&str>>();
2747         let b: &[_] = &["\r", "\r\n", "\n"];
2748         assert_eq!(gr, b);
2749     }
2750
2751     #[test]
2752     fn test_split_strator() {
2753         fn t(s: &str, sep: &str, u: &[&str]) {
2754             let v: Vec<&str> = s.split_str(sep).collect();
2755             assert_eq!(v, u);
2756         }
2757         t("--1233345--", "12345", &["--1233345--"]);
2758         t("abc::hello::there", "::", &["abc", "hello", "there"]);
2759         t("::hello::there", "::", &["", "hello", "there"]);
2760         t("hello::there::", "::", &["hello", "there", ""]);
2761         t("::hello::there::", "::", &["", "hello", "there", ""]);
2762         t("ประเทศไทย中华Việt Nam", "中华", &["ประเทศไทย", "Việt Nam"]);
2763         t("zzXXXzzYYYzz", "zz", &["", "XXX", "YYY", ""]);
2764         t("zzXXXzYYYz", "XXX", &["zz", "zYYYz"]);
2765         t(".XXX.YYY.", ".", &["", "XXX", "YYY", ""]);
2766         t("", ".", &[""]);
2767         t("zz", "zz", &["",""]);
2768         t("ok", "z", &["ok"]);
2769         t("zzz", "zz", &["","z"]);
2770         t("zzzzz", "zz", &["","","z"]);
2771     }
2772
2773     #[test]
2774     fn test_str_default() {
2775         use core::default::Default;
2776         fn t<S: Default + Str>() {
2777             let s: S = Default::default();
2778             assert_eq!(s.as_slice(), "");
2779         }
2780
2781         t::<&str>();
2782         t::<String>();
2783     }
2784
2785     #[test]
2786     fn test_str_container() {
2787         fn sum_len(v: &[&str]) -> uint {
2788             v.iter().map(|x| x.len()).sum()
2789         }
2790
2791         let s = String::from_str("01234");
2792         assert_eq!(5, sum_len(&["012", "", "34"]));
2793         assert_eq!(5, sum_len(&[String::from_str("01").as_slice(),
2794                                 String::from_str("2").as_slice(),
2795                                 String::from_str("34").as_slice(),
2796                                 String::from_str("").as_slice()]));
2797         assert_eq!(5, sum_len(&[s.as_slice()]));
2798     }
2799
2800     #[test]
2801     fn test_str_from_utf8() {
2802         let xs = b"hello";
2803         assert_eq!(from_utf8(xs), Ok("hello"));
2804
2805         let xs = "ศไทย中华Việt Nam".as_bytes();
2806         assert_eq!(from_utf8(xs), Ok("ศไทย中华Việt Nam"));
2807
2808         let xs = b"hello\xFF";
2809         assert_eq!(from_utf8(xs), Err(Utf8Error::TooShort));
2810     }
2811 }
2812
2813 #[cfg(test)]
2814 mod bench {
2815     use super::*;
2816     use prelude::{SliceExt, IteratorExt, SliceConcatExt};
2817     use test::Bencher;
2818     use test::black_box;
2819
2820     #[bench]
2821     fn char_iterator(b: &mut Bencher) {
2822         let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2823
2824         b.iter(|| s.chars().count());
2825     }
2826
2827     #[bench]
2828     fn char_iterator_for(b: &mut Bencher) {
2829         let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2830
2831         b.iter(|| {
2832             for ch in s.chars() { black_box(ch) }
2833         });
2834     }
2835
2836     #[bench]
2837     fn char_iterator_ascii(b: &mut Bencher) {
2838         let s = "Mary had a little lamb, Little lamb
2839         Mary had a little lamb, Little lamb
2840         Mary had a little lamb, Little lamb
2841         Mary had a little lamb, Little lamb
2842         Mary had a little lamb, Little lamb
2843         Mary had a little lamb, Little lamb";
2844
2845         b.iter(|| s.chars().count());
2846     }
2847
2848     #[bench]
2849     fn char_iterator_rev(b: &mut Bencher) {
2850         let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2851
2852         b.iter(|| s.chars().rev().count());
2853     }
2854
2855     #[bench]
2856     fn char_iterator_rev_for(b: &mut Bencher) {
2857         let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2858
2859         b.iter(|| {
2860             for ch in s.chars().rev() { black_box(ch) }
2861         });
2862     }
2863
2864     #[bench]
2865     fn char_indicesator(b: &mut Bencher) {
2866         let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2867         let len = s.chars().count();
2868
2869         b.iter(|| assert_eq!(s.char_indices().count(), len));
2870     }
2871
2872     #[bench]
2873     fn char_indicesator_rev(b: &mut Bencher) {
2874         let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2875         let len = s.chars().count();
2876
2877         b.iter(|| assert_eq!(s.char_indices().rev().count(), len));
2878     }
2879
2880     #[bench]
2881     fn split_unicode_ascii(b: &mut Bencher) {
2882         let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2883
2884         b.iter(|| assert_eq!(s.split('V').count(), 3));
2885     }
2886
2887     #[bench]
2888     fn split_unicode_not_ascii(b: &mut Bencher) {
2889         struct NotAscii(char);
2890         impl CharEq for NotAscii {
2891             fn matches(&mut self, c: char) -> bool {
2892                 let NotAscii(cc) = *self;
2893                 cc == c
2894             }
2895             fn only_ascii(&self) -> bool { false }
2896         }
2897         let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2898
2899         b.iter(|| assert_eq!(s.split(NotAscii('V')).count(), 3));
2900     }
2901
2902
2903     #[bench]
2904     fn split_ascii(b: &mut Bencher) {
2905         let s = "Mary had a little lamb, Little lamb, little-lamb.";
2906         let len = s.split(' ').count();
2907
2908         b.iter(|| assert_eq!(s.split(' ').count(), len));
2909     }
2910
2911     #[bench]
2912     fn split_not_ascii(b: &mut Bencher) {
2913         struct NotAscii(char);
2914         impl CharEq for NotAscii {
2915             #[inline]
2916             fn matches(&mut self, c: char) -> bool {
2917                 let NotAscii(cc) = *self;
2918                 cc == c
2919             }
2920             fn only_ascii(&self) -> bool { false }
2921         }
2922         let s = "Mary had a little lamb, Little lamb, little-lamb.";
2923         let len = s.split(' ').count();
2924
2925         b.iter(|| assert_eq!(s.split(NotAscii(' ')).count(), len));
2926     }
2927
2928     #[bench]
2929     fn split_extern_fn(b: &mut Bencher) {
2930         let s = "Mary had a little lamb, Little lamb, little-lamb.";
2931         let len = s.split(' ').count();
2932         fn pred(c: char) -> bool { c == ' ' }
2933
2934         b.iter(|| assert_eq!(s.split(pred).count(), len));
2935     }
2936
2937     #[bench]
2938     fn split_closure(b: &mut Bencher) {
2939         let s = "Mary had a little lamb, Little lamb, little-lamb.";
2940         let len = s.split(' ').count();
2941
2942         b.iter(|| assert_eq!(s.split(|&: c: char| c == ' ').count(), len));
2943     }
2944
2945     #[bench]
2946     fn split_slice(b: &mut Bencher) {
2947         let s = "Mary had a little lamb, Little lamb, little-lamb.";
2948         let len = s.split(' ').count();
2949
2950         let c: &[char] = &[' '];
2951         b.iter(|| assert_eq!(s.split(c).count(), len));
2952     }
2953
2954     #[bench]
2955     fn bench_connect(b: &mut Bencher) {
2956         let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2957         let sep = "→";
2958         let v = vec![s, s, s, s, s, s, s, s, s, s];
2959         b.iter(|| {
2960             assert_eq!(v.connect(sep).len(), s.len() * 10 + sep.len() * 9);
2961         })
2962     }
2963
2964     #[bench]
2965     fn bench_contains_short_short(b: &mut Bencher) {
2966         let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2967         let needle = "sit";
2968
2969         b.iter(|| {
2970             assert!(haystack.contains(needle));
2971         })
2972     }
2973
2974     #[bench]
2975     fn bench_contains_short_long(b: &mut Bencher) {
2976         let haystack = "\
2977 Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \
2978 ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \
2979 eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \
2980 sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \
2981 tempus vel, gravida nec quam.
2982
2983 In est dui, tincidunt sed tempus interdum, adipiscing laoreet ante. Etiam tempor, tellus quis \
2984 sagittis interdum, nulla purus mattis sem, quis auctor erat odio ac tellus. In nec nunc sit amet \
2985 diam volutpat molestie at sed ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan \
2986 lorem ac dignissim placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat. In vel \
2987 eleifend felis. Sed suscipit nulla lorem, sed mollis est sollicitudin et. Nam fermentum egestas \
2988 interdum. Curabitur ut nisi justo.
2989
2990 Sed sollicitudin ipsum tellus, ut condimentum leo eleifend nec. Cras ut velit ante. Phasellus nec \
2991 mollis odio. Mauris molestie erat in arcu mattis, at aliquet dolor vehicula. Quisque malesuada \
2992 lectus sit amet nisi pretium, a condimentum ipsum porta. Morbi at dapibus diam. Praesent egestas \
2993 est sed risus elementum, eu rutrum metus ultrices. Etiam fermentum consectetur magna, id rutrum \
2994 felis accumsan a. Aliquam ut pellentesque libero. Sed mi nulla, lobortis eu tortor id, suscipit \
2995 ultricies neque. Morbi iaculis sit amet risus at iaculis. Praesent eget ligula quis turpis \
2996 feugiat suscipit vel non arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. \
2997 Aliquam sit amet placerat lorem.
2998
2999 Cras a lacus vel ante posuere elementum. Nunc est leo, bibendum ut facilisis vel, bibendum at \
3000 mauris. Nullam adipiscing diam vel odio ornare, luctus adipiscing mi luctus. Nulla facilisi. \
3001 Mauris adipiscing bibendum neque, quis adipiscing lectus tempus et. Sed feugiat erat et nisl \
3002 lobortis pharetra. Donec vitae erat enim. Nullam sit amet felis et quam lacinia tincidunt. Aliquam \
3003 suscipit dapibus urna. Sed volutpat urna in magna pulvinar volutpat. Phasellus nec tellus ac diam \
3004 cursus accumsan.
3005
3006 Nam lectus enim, dapibus non nisi tempor, consectetur convallis massa. Maecenas eleifend dictum \
3007 feugiat. Etiam quis mauris vel risus luctus mattis a a nunc. Nullam orci quam, imperdiet id \
3008 vehicula in, porttitor ut nibh. Duis sagittis adipiscing nisl vitae congue. Donec mollis risus eu \
3009 leo suscipit, varius porttitor nulla porta. Pellentesque ut sem nec nisi euismod vehicula. Nulla \
3010 malesuada sollicitudin quam eu fermentum.";
3011         let needle = "english";
3012
3013         b.iter(|| {
3014             assert!(!haystack.contains(needle));
3015         })
3016     }
3017
3018     #[bench]
3019     fn bench_contains_bad_naive(b: &mut Bencher) {
3020         let haystack = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
3021         let needle = "aaaaaaaab";
3022
3023         b.iter(|| {
3024             assert!(!haystack.contains(needle));
3025         })
3026     }
3027
3028     #[bench]
3029     fn bench_contains_equal(b: &mut Bencher) {
3030         let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
3031         let needle = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
3032
3033         b.iter(|| {
3034             assert!(haystack.contains(needle));
3035         })
3036     }
3037 }