src/libcollections/str.rs

   1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10 //
  11 // ignore-lexer-test FIXME #15679
  12
  13 //! Unicode string manipulation (`str` type)
  14 //!
  15 //! # Basic Usage
  16 //!
  17 //! Rust's string type is one of the core primitive types of the language. While
  18 //! represented by the name `str`, the name `str` is not actually a valid type in
  19 //! Rust. Each string must also be decorated with a pointer. `String` is used
  20 //! for an owned string, so there is only one commonly-used `str` type in Rust:
  21 //! `&str`.
  22 //!
  23 //! `&str` is the borrowed string type. This type of string can only be created
  24 //! from other strings, unless it is a static string (see below). As the word
  25 //! "borrowed" implies, this type of string is owned elsewhere, and this string
  26 //! cannot be moved out of.
  27 //!
  28 //! As an example, here's some code that uses a string.
  29 //!
  30 //! ```rust
  31 //! fn main() {
  32 //!     let borrowed_string = "This string is borrowed with the 'static lifetime";
  33 //! }
  34 //! ```
  35 //!
  36 //! From the example above, you can guess that Rust's string literals have the
  37 //! `'static` lifetime. This is akin to C's concept of a static string.
  38 //! More precisely, string literals are immutable views with a 'static lifetime
  39 //! (otherwise known as the lifetime of the entire program), and thus have the
  40 //! type `&'static str`.
  41 //!
  42 //! # Representation
  43 //!
  44 //! Rust's string type, `str`, is a sequence of Unicode scalar values encoded as a
  45 //! stream of UTF-8 bytes. All [strings](../../reference.html#literals) are
  46 //! guaranteed to be validly encoded UTF-8 sequences. Additionally, strings are
  47 //! not null-terminated and can thus contain null bytes.
  48 //!
  49 //! The actual representation of strings have direct mappings to slices: `&str`
  50 //! is the same as `&[u8]`.
  51
  52 #![doc(primitive = "str")]
  53
  54 use self::MaybeOwned::*;
  55 use self::RecompositionState::*;
  56 use self::DecompositionType::*;
  57
  58 use core::borrow::{BorrowFrom, Cow, ToOwned};
  59 use core::char::Char;
  60 use core::clone::Clone;
  61 use core::cmp::{Equiv, PartialEq, Eq, PartialOrd, Ord, Ordering};
  62 use core::cmp;
  63 use core::default::Default;
  64 use core::fmt;
  65 use core::hash;
  66 use core::iter::AdditiveIterator;
  67 use core::iter::{mod, range, Iterator, IteratorExt};
  68 use core::kinds::Sized;
  69 use core::ops;
  70 use core::option::Option::{mod, Some, None};
  71 use core::slice::AsSlice;
  72 use core::str as core_str;
  73 use unicode::str::{UnicodeStr, Utf16Encoder};
  74
  75 use ring_buf::RingBuf;
  76 use slice::SliceExt;
  77 use string::String;
  78 use unicode;
  79 use vec::Vec;
  80 use slice::SliceConcatExt;
  81
  82 pub use core::str::{from_utf8, CharEq, Chars, CharIndices};
  83 pub use core::str::{Bytes, CharSplits, is_utf8};
  84 pub use core::str::{CharSplitsN, Lines, LinesAny, MatchIndices, StrSplits, SplitStr};
  85 pub use core::str::{CharRange};
  86 pub use core::str::{FromStr, from_str, Utf8Error};
  87 pub use core::str::Str;
  88 pub use core::str::{from_utf8_unchecked, from_c_str};
  89 pub use unicode::str::{Words, Graphemes, GraphemeIndices};
  90 pub use core::str::{Split, SplitTerminator};
  91 pub use core::str::{SplitN, RSplitN};
  92
  93 /*
  94 Section: Creating a string
  95 */
  96
  97 impl<S: Str> SliceConcatExt<str, String> for [S] {
  98     fn concat(&self) -> String {
  99         let s = self.as_slice();
 100
 101         if s.is_empty() {
 102             return String::new();
 103         }
 104
 105         // `len` calculation may overflow but push_str will check boundaries
 106         let len = s.iter().map(|s| s.as_slice().len()).sum();
 107         let mut result = String::with_capacity(len);
 108
 109         for s in s.iter() {
 110             result.push_str(s.as_slice())
 111         }
 112
 113         result
 114     }
 115
 116     fn connect(&self, sep: &str) -> String {
 117         let s = self.as_slice();
 118
 119         if s.is_empty() {
 120             return String::new();
 121         }
 122
 123         // concat is faster
 124         if sep.is_empty() {
 125             return s.concat();
 126         }
 127
 128         // this is wrong without the guarantee that `self` is non-empty
 129         // `len` calculation may overflow but push_str but will check boundaries
 130         let len = sep.len() * (s.len() - 1)
 131             + s.iter().map(|s| s.as_slice().len()).sum();
 132         let mut result = String::with_capacity(len);
 133         let mut first = true;
 134
 135         for s in s.iter() {
 136             if first {
 137                 first = false;
 138             } else {
 139                 result.push_str(sep);
 140             }
 141             result.push_str(s.as_slice());
 142         }
 143         result
 144     }
 145 }
 146
 147 /*
 148 Section: Iterators
 149 */
 150
 151 // Helper functions used for Unicode normalization
 152 fn canonical_sort(comb: &mut [(char, u8)]) {
 153     let len = comb.len();
 154     for i in range(0, len) {
 155         let mut swapped = false;
 156         for j in range(1, len-i) {
 157             let class_a = comb[j-1].1;
 158             let class_b = comb[j].1;
 159             if class_a != 0 && class_b != 0 && class_a > class_b {
 160                 comb.swap(j-1, j);
 161                 swapped = true;
 162             }
 163         }
 164         if !swapped { break; }
 165     }
 166 }
 167
 168 #[deriving(Clone)]
 169 enum DecompositionType {
 170     Canonical,
 171     Compatible
 172 }
 173
 174 /// External iterator for a string's decomposition's characters.
 175 /// Use with the `std::iter` module.
 176 #[deriving(Clone)]
 177 pub struct Decompositions<'a> {
 178     kind: DecompositionType,
 179     iter: Chars<'a>,
 180     buffer: Vec<(char, u8)>,
 181     sorted: bool
 182 }
 183
 184 impl<'a> Iterator<char> for Decompositions<'a> {
 185     #[inline]
 186     fn next(&mut self) -> Option<char> {
 187         match self.buffer.first() {
 188             Some(&(c, 0)) => {
 189                 self.sorted = false;
 190                 self.buffer.remove(0);
 191                 return Some(c);
 192             }
 193             Some(&(c, _)) if self.sorted => {
 194                 self.buffer.remove(0);
 195                 return Some(c);
 196             }
 197             _ => self.sorted = false
 198         }
 199
 200         if !self.sorted {
 201             for ch in self.iter {
 202                 let buffer = &mut self.buffer;
 203                 let sorted = &mut self.sorted;
 204                 {
 205                     let callback = |d| {
 206                         let class =
 207                             unicode::char::canonical_combining_class(d);
 208                         if class == 0 && !*sorted {
 209                             canonical_sort(buffer.as_mut_slice());
 210                             *sorted = true;
 211                         }
 212                         buffer.push((d, class));
 213                     };
 214                     match self.kind {
 215                         Canonical => {
 216                             unicode::char::decompose_canonical(ch, callback)
 217                         }
 218                         Compatible => {
 219                             unicode::char::decompose_compatible(ch, callback)
 220                         }
 221                     }
 222                 }
 223                 if *sorted {
 224                     break
 225                 }
 226             }
 227         }
 228
 229         if !self.sorted {
 230             canonical_sort(self.buffer.as_mut_slice());
 231             self.sorted = true;
 232         }
 233
 234         if self.buffer.is_empty() {
 235             None
 236         } else {
 237             match self.buffer.remove(0) {
 238                 (c, 0) => {
 239                     self.sorted = false;
 240                     Some(c)
 241                 }
 242                 (c, _) => Some(c),
 243             }
 244         }
 245     }
 246
 247     fn size_hint(&self) -> (uint, Option<uint>) {
 248         let (lower, _) = self.iter.size_hint();
 249         (lower, None)
 250     }
 251 }
 252
 253 #[deriving(Clone)]
 254 enum RecompositionState {
 255     Composing,
 256     Purging,
 257     Finished
 258 }
 259
 260 /// External iterator for a string's recomposition's characters.
 261 /// Use with the `std::iter` module.
 262 #[deriving(Clone)]
 263 pub struct Recompositions<'a> {
 264     iter: Decompositions<'a>,
 265     state: RecompositionState,
 266     buffer: RingBuf<char>,
 267     composee: Option<char>,
 268     last_ccc: Option<u8>
 269 }
 270
 271 impl<'a> Iterator<char> for Recompositions<'a> {
 272     #[inline]
 273     fn next(&mut self) -> Option<char> {
 274         loop {
 275             match self.state {
 276                 Composing => {
 277                     for ch in self.iter {
 278                         let ch_class = unicode::char::canonical_combining_class(ch);
 279                         if self.composee.is_none() {
 280                             if ch_class != 0 {
 281                                 return Some(ch);
 282                             }
 283                             self.composee = Some(ch);
 284                             continue;
 285                         }
 286                         let k = self.composee.clone().unwrap();
 287
 288                         match self.last_ccc {
 289                             None => {
 290                                 match unicode::char::compose(k, ch) {
 291                                     Some(r) => {
 292                                         self.composee = Some(r);
 293                                         continue;
 294                                     }
 295                                     None => {
 296                                         if ch_class == 0 {
 297                                             self.composee = Some(ch);
 298                                             return Some(k);
 299                                         }
 300                                         self.buffer.push_back(ch);
 301                                         self.last_ccc = Some(ch_class);
 302                                     }
 303                                 }
 304                             }
 305                             Some(l_class) => {
 306                                 if l_class >= ch_class {
 307                                     // `ch` is blocked from `composee`
 308                                     if ch_class == 0 {
 309                                         self.composee = Some(ch);
 310                                         self.last_ccc = None;
 311                                         self.state = Purging;
 312                                         return Some(k);
 313                                     }
 314                                     self.buffer.push_back(ch);
 315                                     self.last_ccc = Some(ch_class);
 316                                     continue;
 317                                 }
 318                                 match unicode::char::compose(k, ch) {
 319                                     Some(r) => {
 320                                         self.composee = Some(r);
 321                                         continue;
 322                                     }
 323                                     None => {
 324                                         self.buffer.push_back(ch);
 325                                         self.last_ccc = Some(ch_class);
 326                                     }
 327                                 }
 328                             }
 329                         }
 330                     }
 331                     self.state = Finished;
 332                     if self.composee.is_some() {
 333                         return self.composee.take();
 334                     }
 335                 }
 336                 Purging => {
 337                     match self.buffer.pop_front() {
 338                         None => self.state = Composing,
 339                         s => return s
 340                     }
 341                 }
 342                 Finished => {
 343                     match self.buffer.pop_front() {
 344                         None => return self.composee.take(),
 345                         s => return s
 346                     }
 347                 }
 348             }
 349         }
 350     }
 351 }
 352
 353 /// External iterator for a string's UTF16 codeunits.
 354 /// Use with the `std::iter` module.
 355 #[deriving(Clone)]
 356 pub struct Utf16Units<'a> {
 357     encoder: Utf16Encoder<Chars<'a>>
 358 }
 359
 360 impl<'a> Iterator<u16> for Utf16Units<'a> {
 361     #[inline]
 362     fn next(&mut self) -> Option<u16> { self.encoder.next() }
 363
 364     #[inline]
 365     fn size_hint(&self) -> (uint, Option<uint>) { self.encoder.size_hint() }
 366 }
 367
 368 /// Replaces all occurrences of one string with another.
 369 ///
 370 /// # Arguments
 371 ///
 372 /// * s - The string containing substrings to replace
 373 /// * from - The string to replace
 374 /// * to - The replacement string
 375 ///
 376 /// # Return value
 377 ///
 378 /// The original string with all occurrences of `from` replaced with `to`.
 379 ///
 380 /// # Examples
 381 ///
 382 /// ```rust
 383 /// # #![allow(deprecated)]
 384 /// use std::str;
 385 /// let string = "orange";
 386 /// let new_string = str::replace(string, "or", "str");
 387 /// assert_eq!(new_string.as_slice(), "strange");
 388 /// ```
 389 #[deprecated = "call the inherent method instead"]
 390 pub fn replace(s: &str, from: &str, to: &str) -> String {
 391     s.replace(from, to)
 392 }
 393
 394 /*
 395 Section: Misc
 396 */
 397
 398 // Return the initial codepoint accumulator for the first byte.
 399 // The first byte is special, only want bottom 5 bits for width 2, 4 bits
 400 // for width 3, and 3 bits for width 4
 401 macro_rules! utf8_first_byte {
 402     ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as u32)
 403 }
 404
 405 // return the value of $ch updated with continuation byte $byte
 406 macro_rules! utf8_acc_cont_byte {
 407     ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as u32)
 408 }
 409
 410 /*
 411 Section: MaybeOwned
 412 */
 413
 414 /// A string type that can hold either a `String` or a `&str`.
 415 /// This can be useful as an optimization when an allocation is sometimes
 416 /// needed but not always.
 417 #[deprecated = "use std::string::CowString"]
 418 pub enum MaybeOwned<'a> {
 419     /// A borrowed string.
 420     Slice(&'a str),
 421     /// An owned string.
 422     Owned(String)
 423 }
 424
 425 /// A specialization of `CowString` to be sendable.
 426 #[deprecated = "use std::string::CowString<'static>"]
 427 pub type SendStr = CowString<'static>;
 428
 429 #[deprecated = "use std::string::CowString"]
 430 impl<'a> MaybeOwned<'a> {
 431     /// Returns `true` if this `MaybeOwned` wraps an owned string.
 432     ///
 433     /// # Examples
 434     ///
 435     /// ``` ignore
 436     /// let string = String::from_str("orange");
 437     /// let maybe_owned_string = string.into_maybe_owned();
 438     /// assert_eq!(true, maybe_owned_string.is_owned());
 439     /// ```
 440     #[inline]
 441     pub fn is_owned(&self) -> bool {
 442         match *self {
 443             Slice(_) => false,
 444             Owned(_) => true
 445         }
 446     }
 447
 448     /// Returns `true` if this `MaybeOwned` wraps a borrowed string.
 449     ///
 450     /// # Examples
 451     ///
 452     /// ``` ignore
 453     /// let string = "orange";
 454     /// let maybe_owned_string = string.as_slice().into_maybe_owned();
 455     /// assert_eq!(true, maybe_owned_string.is_slice());
 456     /// ```
 457     #[inline]
 458     pub fn is_slice(&self) -> bool {
 459         match *self {
 460             Slice(_) => true,
 461             Owned(_) => false
 462         }
 463     }
 464
 465     /// Return the number of bytes in this string.
 466     #[inline]
 467     #[allow(deprecated)]
 468     pub fn len(&self) -> uint { self.as_slice().len() }
 469
 470     /// Returns true if the string contains no bytes
 471     #[allow(deprecated)]
 472     #[inline]
 473     pub fn is_empty(&self) -> bool { self.len() == 0 }
 474 }
 475
 476 #[deprecated = "use std::borrow::IntoCow"]
 477 /// Trait for moving into a `MaybeOwned`.
 478 pub trait IntoMaybeOwned<'a> {
 479     /// Moves `self` into a `MaybeOwned`.
 480     fn into_maybe_owned(self) -> MaybeOwned<'a>;
 481 }
 482
 483 #[deprecated = "use std::borrow::IntoCow"]
 484 #[allow(deprecated)]
 485 impl<'a> IntoMaybeOwned<'a> for String {
 486     /// # Examples
 487     ///
 488     /// ``` ignore
 489     /// let owned_string = String::from_str("orange");
 490     /// let maybe_owned_string = owned_string.into_maybe_owned();
 491     /// assert_eq!(true, maybe_owned_string.is_owned());
 492     /// ```
 493     #[allow(deprecated)]
 494     #[inline]
 495     fn into_maybe_owned(self) -> MaybeOwned<'a> {
 496         Owned(self)
 497     }
 498 }
 499
 500 #[deprecated = "use std::borrow::IntoCow"]
 501 #[allow(deprecated)]
 502 impl<'a> IntoMaybeOwned<'a> for &'a str {
 503     /// # Examples
 504     ///
 505     /// ``` ignore
 506     /// let string = "orange";
 507     /// let maybe_owned_str = string.as_slice().into_maybe_owned();
 508     /// assert_eq!(false, maybe_owned_str.is_owned());
 509     /// ```
 510     #[allow(deprecated)]
 511     #[inline]
 512     fn into_maybe_owned(self) -> MaybeOwned<'a> { Slice(self) }
 513 }
 514
 515 #[allow(deprecated)]
 516 #[deprecated = "use std::borrow::IntoCow"]
 517 impl<'a> IntoMaybeOwned<'a> for MaybeOwned<'a> {
 518     /// # Examples
 519     ///
 520     /// ``` ignore
 521     /// let str = "orange";
 522     /// let maybe_owned_str = str.as_slice().into_maybe_owned();
 523     /// let maybe_maybe_owned_str = maybe_owned_str.into_maybe_owned();
 524     /// assert_eq!(false, maybe_maybe_owned_str.is_owned());
 525     /// ```
 526     #[inline]
 527     fn into_maybe_owned(self) -> MaybeOwned<'a> { self }
 528 }
 529
 530 #[deprecated = "use std::string::CowString"]
 531 #[allow(deprecated)]
 532 impl<'a> PartialEq for MaybeOwned<'a> {
 533     #[inline]
 534     fn eq(&self, other: &MaybeOwned) -> bool {
 535         self.as_slice() == other.as_slice()
 536     }
 537 }
 538
 539 #[deprecated = "use std::string::CowString"]
 540 impl<'a> Eq for MaybeOwned<'a> {}
 541
 542 #[deprecated = "use std::string::CowString"]
 543 impl<'a> PartialOrd for MaybeOwned<'a> {
 544     #[inline]
 545     fn partial_cmp(&self, other: &MaybeOwned) -> Option<Ordering> {
 546         Some(self.cmp(other))
 547     }
 548 }
 549
 550 #[deprecated = "use std::string::CowString"]
 551 impl<'a> Ord for MaybeOwned<'a> {
 552     #[inline]
 553     #[allow(deprecated)]
 554     fn cmp(&self, other: &MaybeOwned) -> Ordering {
 555         self.as_slice().cmp(other.as_slice())
 556     }
 557 }
 558
 559 #[allow(deprecated)]
 560 #[deprecated = "use std::string::CowString"]
 561 impl<'a, S: Str> Equiv<S> for MaybeOwned<'a> {
 562     #[inline]
 563     fn equiv(&self, other: &S) -> bool {
 564         self.as_slice() == other.as_slice()
 565     }
 566 }
 567
 568 #[deprecated = "use std::string::CowString"]
 569 #[allow(deprecated)]
 570 impl<'a> Str for MaybeOwned<'a> {
 571     #[inline]
 572     fn as_slice<'b>(&'b self) -> &'b str {
 573         match *self {
 574             Slice(s) => s,
 575             Owned(ref s) => s.as_slice()
 576         }
 577     }
 578 }
 579
 580 #[deprecated = "use std::string::CowString"]
 581 impl<'a> Clone for MaybeOwned<'a> {
 582     #[allow(deprecated)]
 583     #[inline]
 584     fn clone(&self) -> MaybeOwned<'a> {
 585         match *self {
 586             Slice(s) => Slice(s),
 587             Owned(ref s) => Owned(String::from_str(s.as_slice()))
 588         }
 589     }
 590 }
 591
 592 #[deprecated = "use std::string::CowString"]
 593 impl<'a> Default for MaybeOwned<'a> {
 594     #[allow(deprecated)]
 595     #[inline]
 596     fn default() -> MaybeOwned<'a> { Slice("") }
 597 }
 598
 599 #[deprecated = "use std::string::CowString"]
 600 #[allow(deprecated)]
 601 impl<'a, H: hash::Writer> hash::Hash<H> for MaybeOwned<'a> {
 602     #[inline]
 603     fn hash(&self, hasher: &mut H) {
 604         self.as_slice().hash(hasher)
 605     }
 606 }
 607
 608 #[deprecated = "use std::string::CowString"]
 609 impl<'a> fmt::Show for MaybeOwned<'a> {
 610     #[inline]
 611     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 612         match *self {
 613             Slice(ref s) => s.fmt(f),
 614             Owned(ref s) => s.fmt(f)
 615         }
 616     }
 617 }
 618
 619 #[unstable = "trait is unstable"]
 620 impl BorrowFrom<String> for str {
 621     fn borrow_from(owned: &String) -> &str { owned[] }
 622 }
 623
 624 #[unstable = "trait is unstable"]
 625 impl ToOwned<String> for str {
 626     fn to_owned(&self) -> String {
 627         unsafe {
 628             String::from_utf8_unchecked(self.as_bytes().to_owned())
 629         }
 630     }
 631 }
 632
 633 /// Unsafe string operations.
 634 #[deprecated]
 635 pub mod raw {
 636     pub use core::str::raw::{from_utf8, c_str_to_static_slice, slice_bytes};
 637     pub use core::str::raw::{slice_unchecked};
 638 }
 639
 640 /*
 641 Section: CowString
 642 */
 643
 644 /// A clone-on-write string
 645 #[deprecated = "use std::string::CowString instead"]
 646 pub type CowString<'a> = Cow<'a, String, str>;
 647
 648 /*
 649 Section: Trait implementations
 650 */
 651
 652 /// Any string that can be represented as a slice.
 653 pub trait StrExt for Sized?: ops::Slice<uint, str> {
 654     /// Escapes each char in `s` with `char::escape_default`.
 655     #[unstable = "return type may change to be an iterator"]
 656     fn escape_default(&self) -> String {
 657         self.chars().flat_map(|c| c.escape_default()).collect()
 658     }
 659
 660     /// Escapes each char in `s` with `char::escape_unicode`.
 661     #[unstable = "return type may change to be an iterator"]
 662     fn escape_unicode(&self) -> String {
 663         self.chars().flat_map(|c| c.escape_unicode()).collect()
 664     }
 665
 666     /// Replaces all occurrences of one string with another.
 667     ///
 668     /// # Arguments
 669     ///
 670     /// * `from` - The string to replace
 671     /// * `to` - The replacement string
 672     ///
 673     /// # Return value
 674     ///
 675     /// The original string with all occurrences of `from` replaced with `to`.
 676     ///
 677     /// # Examples
 678     ///
 679     /// ```rust
 680     /// let s = "Do you know the muffin man,
 681     /// The muffin man, the muffin man, ...".to_string();
 682     ///
 683     /// assert_eq!(s.replace("muffin man", "little lamb"),
 684     ///            "Do you know the little lamb,
 685     /// The little lamb, the little lamb, ...".to_string());
 686     ///
 687     /// // not found, so no change.
 688     /// assert_eq!(s.replace("cookie monster", "little lamb"), s);
 689     /// ```
 690     #[stable]
 691     fn replace(&self, from: &str, to: &str) -> String {
 692         let mut result = String::new();
 693         let mut last_end = 0;
 694         for (start, end) in self.match_indices(from) {
 695             result.push_str(unsafe { self.slice_unchecked(last_end, start) });
 696             result.push_str(to);
 697             last_end = end;
 698         }
 699         result.push_str(unsafe { self.slice_unchecked(last_end, self.len()) });
 700         result
 701     }
 702
 703     /// Given a string, makes a new string with repeated copies of it.
 704     #[deprecated = "use repeat(self).take(n).collect() instead"]
 705     fn repeat(&self, nn: uint) -> String {
 706         iter::repeat(self[]).take(nn).collect()
 707     }
 708
 709     /// Returns the Levenshtein Distance between two strings.
 710     #[deprecated = "this function will be removed"]
 711     fn lev_distance(&self, t: &str) -> uint {
 712         let me = self[];
 713         if me.is_empty() { return t.chars().count(); }
 714         if t.is_empty() { return me.chars().count(); }
 715
 716         let mut dcol: Vec<_> = range(0, t.len() + 1).collect();
 717         let mut t_last = 0;
 718
 719         for (i, sc) in me.chars().enumerate() {
 720
 721             let mut current = i;
 722             dcol[0] = current + 1;
 723
 724             for (j, tc) in t.chars().enumerate() {
 725
 726                 let next = dcol[j + 1];
 727
 728                 if sc == tc {
 729                     dcol[j + 1] = current;
 730                 } else {
 731                     dcol[j + 1] = cmp::min(current, next);
 732                     dcol[j + 1] = cmp::min(dcol[j + 1], dcol[j]) + 1;
 733                 }
 734
 735                 current = next;
 736                 t_last = j;
 737             }
 738         }
 739
 740         dcol[t_last + 1]
 741     }
 742
 743     /// Returns an iterator over the string in Unicode Normalization Form D
 744     /// (canonical decomposition).
 745     #[inline]
 746     #[unstable = "this functionality may be moved to libunicode"]
 747     fn nfd_chars<'a>(&'a self) -> Decompositions<'a> {
 748         Decompositions {
 749             iter: self[].chars(),
 750             buffer: Vec::new(),
 751             sorted: false,
 752             kind: Canonical
 753         }
 754     }
 755
 756     /// Returns an iterator over the string in Unicode Normalization Form KD
 757     /// (compatibility decomposition).
 758     #[inline]
 759     #[unstable = "this functionality may be moved to libunicode"]
 760     fn nfkd_chars<'a>(&'a self) -> Decompositions<'a> {
 761         Decompositions {
 762             iter: self[].chars(),
 763             buffer: Vec::new(),
 764             sorted: false,
 765             kind: Compatible
 766         }
 767     }
 768
 769     /// An Iterator over the string in Unicode Normalization Form C
 770     /// (canonical decomposition followed by canonical composition).
 771     #[inline]
 772     #[unstable = "this functionality may be moved to libunicode"]
 773     fn nfc_chars<'a>(&'a self) -> Recompositions<'a> {
 774         Recompositions {
 775             iter: self.nfd_chars(),
 776             state: Composing,
 777             buffer: RingBuf::new(),
 778             composee: None,
 779             last_ccc: None
 780         }
 781     }
 782
 783     /// An Iterator over the string in Unicode Normalization Form KC
 784     /// (compatibility decomposition followed by canonical composition).
 785     #[inline]
 786     #[unstable = "this functionality may be moved to libunicode"]
 787     fn nfkc_chars<'a>(&'a self) -> Recompositions<'a> {
 788         Recompositions {
 789             iter: self.nfkd_chars(),
 790             state: Composing,
 791             buffer: RingBuf::new(),
 792             composee: None,
 793             last_ccc: None
 794         }
 795     }
 796
 797     /// Returns true if a string contains a string pattern.
 798     ///
 799     /// # Arguments
 800     ///
 801     /// - pat - The string pattern to look for
 802     ///
 803     /// # Example
 804     ///
 805     /// ```rust
 806     /// assert!("bananas".contains("nana"));
 807     /// ```
 808     #[stable]
 809     fn contains(&self, pat: &str) -> bool {
 810         core_str::StrExt::contains(self[], pat)
 811     }
 812
 813     /// Returns true if a string contains a char pattern.
 814     ///
 815     /// # Arguments
 816     ///
 817     /// - pat - The char pattern to look for
 818     ///
 819     /// # Example
 820     ///
 821     /// ```rust
 822     /// assert!("hello".contains_char('e'));
 823     /// ```
 824     #[unstable = "might get removed in favour of a more generic contains()"]
 825     fn contains_char<P: CharEq>(&self, pat: P) -> bool {
 826         core_str::StrExt::contains_char(self[], pat)
 827     }
 828
 829     /// An iterator over the characters of `self`. Note, this iterates
 830     /// over Unicode code-points, not Unicode graphemes.
 831     ///
 832     /// # Example
 833     ///
 834     /// ```rust
 835     /// let v: Vec<char> = "abc åäö".chars().collect();
 836     /// assert_eq!(v, vec!['a', 'b', 'c', ' ', 'å', 'ä', 'ö']);
 837     /// ```
 838     #[stable]
 839     fn chars(&self) -> Chars {
 840         core_str::StrExt::chars(self[])
 841     }
 842
 843     /// An iterator over the bytes of `self`
 844     ///
 845     /// # Example
 846     ///
 847     /// ```rust
 848     /// let v: Vec<u8> = "bors".bytes().collect();
 849     /// assert_eq!(v, b"bors".to_vec());
 850     /// ```
 851     #[stable]
 852     fn bytes(&self) -> Bytes {
 853         core_str::StrExt::bytes(self[])
 854     }
 855
 856     /// An iterator over the characters of `self` and their byte offsets.
 857     #[stable]
 858     fn char_indices(&self) -> CharIndices {
 859         core_str::StrExt::char_indices(self[])
 860     }
 861
 862     /// An iterator over substrings of `self`, separated by characters
 863     /// matched by the pattern `pat`.
 864     ///
 865     /// # Example
 866     ///
 867     /// ```rust
 868     /// let v: Vec<&str> = "Mary had a little lamb".split(' ').collect();
 869     /// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]);
 870     ///
 871     /// let v: Vec<&str> = "abc1def2ghi".split(|&: c: char| c.is_numeric()).collect();
 872     /// assert_eq!(v, vec!["abc", "def", "ghi"]);
 873     ///
 874     /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').collect();
 875     /// assert_eq!(v, vec!["lion", "", "tiger", "leopard"]);
 876     ///
 877     /// let v: Vec<&str> = "".split('X').collect();
 878     /// assert_eq!(v, vec![""]);
 879     /// ```
 880     #[stable]
 881     fn split<P: CharEq>(&self, pat: P) -> Split<P> {
 882         core_str::StrExt::split(self[], pat)
 883     }
 884
 885     /// An iterator over substrings of `self`, separated by characters
 886     /// matched by the pattern `pat`, restricted to splitting at most `count`
 887     /// times.
 888     ///
 889     /// # Example
 890     ///
 891     /// ```rust
 892     /// let v: Vec<&str> = "Mary had a little lambda".splitn(2, ' ').collect();
 893     /// assert_eq!(v, vec!["Mary", "had", "a little lambda"]);
 894     ///
 895     /// let v: Vec<&str> = "abc1def2ghi".splitn(1, |&: c: char| c.is_numeric()).collect();
 896     /// assert_eq!(v, vec!["abc", "def2ghi"]);
 897     ///
 898     /// let v: Vec<&str> = "lionXXtigerXleopard".splitn(2, 'X').collect();
 899     /// assert_eq!(v, vec!["lion", "", "tigerXleopard"]);
 900     ///
 901     /// let v: Vec<&str> = "abcXdef".splitn(0, 'X').collect();
 902     /// assert_eq!(v, vec!["abcXdef"]);
 903     ///
 904     /// let v: Vec<&str> = "".splitn(1, 'X').collect();
 905     /// assert_eq!(v, vec![""]);
 906     /// ```
 907     #[stable]
 908     fn splitn<P: CharEq>(&self, count: uint, pat: P) -> SplitN<P> {
 909         core_str::StrExt::splitn(self[], count, pat)
 910     }
 911
 912     /// An iterator over substrings of `self`, separated by characters
 913     /// matched by the pattern `pat`.
 914     ///
 915     /// Equivalent to `split`, except that the trailing substring
 916     /// is skipped if empty (terminator semantics).
 917     ///
 918     /// # Example
 919     ///
 920     /// ```rust
 921     /// let v: Vec<&str> = "A.B.".split_terminator('.').collect();
 922     /// assert_eq!(v, vec!["A", "B"]);
 923     ///
 924     /// let v: Vec<&str> = "A..B..".split_terminator('.').collect();
 925     /// assert_eq!(v, vec!["A", "", "B", ""]);
 926     ///
 927     /// let v: Vec<&str> = "Mary had a little lamb".split(' ').rev().collect();
 928     /// assert_eq!(v, vec!["lamb", "little", "a", "had", "Mary"]);
 929     ///
 930     /// let v: Vec<&str> = "abc1def2ghi".split(|&: c: char| c.is_numeric()).rev().collect();
 931     /// assert_eq!(v, vec!["ghi", "def", "abc"]);
 932     ///
 933     /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').rev().collect();
 934     /// assert_eq!(v, vec!["leopard", "tiger", "", "lion"]);
 935     /// ```
 936     #[unstable = "might get removed"]
 937     fn split_terminator<P: CharEq>(&self, pat: P) -> SplitTerminator<P> {
 938         core_str::StrExt::split_terminator(self[], pat)
 939     }
 940
 941     /// An iterator over substrings of `self`, separated by characters
 942     /// matched by the pattern `pat`, starting from the end of the string.
 943     /// Restricted to splitting at most `count` times.
 944     ///
 945     /// # Example
 946     ///
 947     /// ```rust
 948     /// let v: Vec<&str> = "Mary had a little lamb".rsplitn(2, ' ').collect();
 949     /// assert_eq!(v, vec!["lamb", "little", "Mary had a"]);
 950     ///
 951     /// let v: Vec<&str> = "abc1def2ghi".rsplitn(1, |&: c: char| c.is_numeric()).collect();
 952     /// assert_eq!(v, vec!["ghi", "abc1def"]);
 953     ///
 954     /// let v: Vec<&str> = "lionXXtigerXleopard".rsplitn(2, 'X').collect();
 955     /// assert_eq!(v, vec!["leopard", "tiger", "lionX"]);
 956     /// ```
 957     #[stable]
 958     fn rsplitn<P: CharEq>(&self, count: uint, pat: P) -> RSplitN<P> {
 959         core_str::StrExt::rsplitn(self[], count, pat)
 960     }
 961
 962     /// An iterator over the start and end indices of the disjoint
 963     /// matches of the pattern `pat` within `self`.
 964     ///
 965     /// That is, each returned value `(start, end)` satisfies
 966     /// `self.slice(start, end) == sep`. For matches of `sep` within
 967     /// `self` that overlap, only the indices corresponding to the
 968     /// first match are returned.
 969     ///
 970     /// # Example
 971     ///
 972     /// ```rust
 973     /// let v: Vec<(uint, uint)> = "abcXXXabcYYYabc".match_indices("abc").collect();
 974     /// assert_eq!(v, vec![(0,3), (6,9), (12,15)]);
 975     ///
 976     /// let v: Vec<(uint, uint)> = "1abcabc2".match_indices("abc").collect();
 977     /// assert_eq!(v, vec![(1,4), (4,7)]);
 978     ///
 979     /// let v: Vec<(uint, uint)> = "ababa".match_indices("aba").collect();
 980     /// assert_eq!(v, vec![(0, 3)]); // only the first `aba`
 981     /// ```
 982     #[unstable = "might have its iterator type changed"]
 983     fn match_indices<'a>(&'a self, pat: &'a str) -> MatchIndices<'a> {
 984         core_str::StrExt::match_indices(self[], pat)
 985     }
 986
 987     /// An iterator over the substrings of `self` separated by the pattern `sep`.
 988     ///
 989     /// # Example
 990     ///
 991     /// ```rust
 992     /// let v: Vec<&str> = "abcXXXabcYYYabc".split_str("abc").collect();
 993     /// assert_eq!(v, vec!["", "XXX", "YYY", ""]);
 994     ///
 995     /// let v: Vec<&str> = "1abcabc2".split_str("abc").collect();
 996     /// assert_eq!(v, vec!["1", "", "2"]);
 997     /// ```
 998     #[unstable = "might get removed in the future in favor of a more generic split()"]
 999     fn split_str<'a>(&'a self, pat: &'a str) -> StrSplits<'a> {
1000         core_str::StrExt::split_str(self[], pat)
1001     }
1002
1003     /// An iterator over the lines of a string (subsequences separated
1004     /// by `\n`). This does not include the empty string after a
1005     /// trailing `\n`.
1006     ///
1007     /// # Example
1008     ///
1009     /// ```rust
1010     /// let four_lines = "foo\nbar\n\nbaz\n";
1011     /// let v: Vec<&str> = four_lines.lines().collect();
1012     /// assert_eq!(v, vec!["foo", "bar", "", "baz"]);
1013     /// ```
1014     #[stable]
1015     fn lines(&self) -> Lines {
1016         core_str::StrExt::lines(self[])
1017     }
1018
1019     /// An iterator over the lines of a string, separated by either
1020     /// `\n` or `\r\n`. As with `.lines()`, this does not include an
1021     /// empty trailing line.
1022     ///
1023     /// # Example
1024     ///
1025     /// ```rust
1026     /// let four_lines = "foo\r\nbar\n\r\nbaz\n";
1027     /// let v: Vec<&str> = four_lines.lines_any().collect();
1028     /// assert_eq!(v, vec!["foo", "bar", "", "baz"]);
1029     /// ```
1030     #[stable]
1031     fn lines_any(&self) -> LinesAny {
1032         core_str::StrExt::lines_any(self[])
1033     }
1034
1035     /// Returns the number of Unicode code points (`char`) that a
1036     /// string holds.
1037     ///
1038     /// This does not perform any normalization, and is `O(n)`, since
1039     /// UTF-8 is a variable width encoding of code points.
1040     ///
1041     /// *Warning*: The number of code points in a string does not directly
1042     /// correspond to the number of visible characters or width of the
1043     /// visible text due to composing characters, and double- and
1044     /// zero-width ones.
1045     ///
1046     /// See also `.len()` for the byte length.
1047     ///
1048     /// # Example
1049     ///
1050     /// ```rust
1051     /// # #![allow(deprecated)]
1052     /// // composed forms of `ö` and `é`
1053     /// let c = "Löwe 老虎 Léopard"; // German, Simplified Chinese, French
1054     /// // decomposed forms of `ö` and `é`
1055     /// let d = "Lo\u{0308}we 老虎 Le\u{0301}opard";
1056     ///
1057     /// assert_eq!(c.char_len(), 15);
1058     /// assert_eq!(d.char_len(), 17);
1059     ///
1060     /// assert_eq!(c.len(), 21);
1061     /// assert_eq!(d.len(), 23);
1062     ///
1063     /// // the two strings *look* the same
1064     /// println!("{}", c);
1065     /// println!("{}", d);
1066     /// ```
1067     #[deprecated = "call .chars().count() instead"]
1068     fn char_len(&self) -> uint {
1069         core_str::StrExt::char_len(self[])
1070     }
1071
1072     /// Returns a slice of the given string from the byte range
1073     /// [`begin`..`end`).
1074     ///
1075     /// This operation is `O(1)`.
1076     ///
1077     /// Panics when `begin` and `end` do not point to valid characters
1078     /// or point beyond the last character of the string.
1079     ///
1080     /// See also `slice_to` and `slice_from` for slicing prefixes and
1081     /// suffixes of strings, and `slice_chars` for slicing based on
1082     /// code point counts.
1083     ///
1084     /// # Example
1085     ///
1086     /// ```rust
1087     /// let s = "Löwe 老虎 Léopard";
1088     /// assert_eq!(s.slice(0, 1), "L");
1089     ///
1090     /// assert_eq!(s.slice(1, 9), "öwe 老");
1091     ///
1092     /// // these will panic:
1093     /// // byte 2 lies within `ö`:
1094     /// // s.slice(2, 3);
1095     ///
1096     /// // byte 8 lies within `老`
1097     /// // s.slice(1, 8);
1098     ///
1099     /// // byte 100 is outside the string
1100     /// // s.slice(3, 100);
1101     /// ```
1102     #[unstable = "use slice notation [a..b] instead"]
1103     fn slice(&self, begin: uint, end: uint) -> &str {
1104         core_str::StrExt::slice(self[], begin, end)
1105     }
1106
1107     /// Returns a slice of the string from `begin` to its end.
1108     ///
1109     /// Equivalent to `self.slice(begin, self.len())`.
1110     ///
1111     /// Panics when `begin` does not point to a valid character, or is
1112     /// out of bounds.
1113     ///
1114     /// See also `slice`, `slice_to` and `slice_chars`.
1115     #[unstable = "use slice notation [a..] instead"]
1116     fn slice_from(&self, begin: uint) -> &str {
1117         core_str::StrExt::slice_from(self[], begin)
1118     }
1119
1120     /// Returns a slice of the string from the beginning to byte
1121     /// `end`.
1122     ///
1123     /// Equivalent to `self.slice(0, end)`.
1124     ///
1125     /// Panics when `end` does not point to a valid character, or is
1126     /// out of bounds.
1127     ///
1128     /// See also `slice`, `slice_from` and `slice_chars`.
1129     #[unstable = "use slice notation [0..a] instead"]
1130     fn slice_to(&self, end: uint) -> &str {
1131         core_str::StrExt::slice_to(self[], end)
1132     }
1133
1134     /// Returns a slice of the string from the character range
1135     /// [`begin`..`end`).
1136     ///
1137     /// That is, start at the `begin`-th code point of the string and
1138     /// continue to the `end`-th code point. This does not detect or
1139     /// handle edge cases such as leaving a combining character as the
1140     /// first code point of the string.
1141     ///
1142     /// Due to the design of UTF-8, this operation is `O(end)`.
1143     /// See `slice`, `slice_to` and `slice_from` for `O(1)`
1144     /// variants that use byte indices rather than code point
1145     /// indices.
1146     ///
1147     /// Panics if `begin` > `end` or the either `begin` or `end` are
1148     /// beyond the last character of the string.
1149     ///
1150     /// # Example
1151     ///
1152     /// ```rust
1153     /// let s = "Löwe 老虎 Léopard";
1154     /// assert_eq!(s.slice_chars(0, 4), "Löwe");
1155     /// assert_eq!(s.slice_chars(5, 7), "老虎");
1156     /// ```
1157     #[unstable = "may have yet to prove its worth"]
1158     fn slice_chars(&self, begin: uint, end: uint) -> &str {
1159         core_str::StrExt::slice_chars(self[], begin, end)
1160     }
1161
1162     /// Takes a bytewise (not UTF-8) slice from a string.
1163     ///
1164     /// Returns the substring from [`begin`..`end`).
1165     ///
1166     /// Caller must check both UTF-8 character boundaries and the boundaries of
1167     /// the entire slice as well.
1168     #[stable]
1169     unsafe fn slice_unchecked(&self, begin: uint, end: uint) -> &str {
1170         core_str::StrExt::slice_unchecked(self[], begin, end)
1171     }
1172
1173     /// Returns true if the pattern `pat` is a prefix of the string.
1174     ///
1175     /// # Example
1176     ///
1177     /// ```rust
1178     /// assert!("banana".starts_with("ba"));
1179     /// ```
1180     #[stable]
1181     fn starts_with(&self, pat: &str) -> bool {
1182         core_str::StrExt::starts_with(self[], pat)
1183     }
1184
1185     /// Returns true if the pattern `pat` is a suffix of the string.
1186     ///
1187     /// # Example
1188     ///
1189     /// ```rust
1190     /// assert!("banana".ends_with("nana"));
1191     /// ```
1192     #[stable]
1193     fn ends_with(&self, pat: &str) -> bool {
1194         core_str::StrExt::ends_with(self[], pat)
1195     }
1196
1197     /// Returns a string with all pre- and suffixes that match
1198     /// the pattern `pat` repeatedly removed.
1199     ///
1200     /// # Arguments
1201     ///
1202     /// * pat - a string pattern
1203     ///
1204     /// # Example
1205     ///
1206     /// ```rust
1207     /// assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar");
1208     /// let x: &[_] = &['1', '2'];
1209     /// assert_eq!("12foo1bar12".trim_matches(x), "foo1bar");
1210     /// assert_eq!("123foo1bar123".trim_matches(|&: c: char| c.is_numeric()), "foo1bar");
1211     /// ```
1212     #[stable]
1213     fn trim_matches<P: CharEq>(&self, pat: P) -> &str {
1214         core_str::StrExt::trim_matches(self[], pat)
1215     }
1216
1217     /// Deprecated
1218     #[deprecated = "Replaced by `trim_matches`"]
1219     fn trim_chars<'a, C: CharEq>(&'a self, to_trim: C) -> &'a str {
1220         self.trim_matches(to_trim)
1221     }
1222
1223     /// Returns a string with all prefixes that match
1224     /// the pattern `pat` repeatedly removed.
1225     ///
1226     /// # Arguments
1227     ///
1228     /// * pat - a string pattern
1229     ///
1230     /// # Example
1231     ///
1232     /// ```rust
1233     /// assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11");
1234     /// let x: &[_] = &['1', '2'];
1235     /// assert_eq!("12foo1bar12".trim_left_matches(x), "foo1bar12");
1236     /// assert_eq!("123foo1bar123".trim_left_matches(|&: c: char| c.is_numeric()), "foo1bar123");
1237     /// ```
1238     #[stable]
1239     fn trim_left_matches<P: CharEq>(&self, pat: P) -> &str {
1240         core_str::StrExt::trim_left_matches(self[], pat)
1241     }
1242
1243     /// Deprecated
1244     #[deprecated = "Replaced by `trim_left_matches`"]
1245     fn trim_left_chars<'a, C: CharEq>(&'a self, to_trim: C) -> &'a str {
1246         self.trim_left_matches(to_trim)
1247     }
1248
1249     /// Returns a string with all suffixes that match
1250     /// the pattern `pat` repeatedly removed.
1251     ///
1252     /// # Arguments
1253     ///
1254     /// * pat - a string pattern
1255     ///
1256     /// # Example
1257     ///
1258     /// ```rust
1259     /// assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar");
1260     /// let x: &[_] = &['1', '2'];
1261     /// assert_eq!("12foo1bar12".trim_right_matches(x), "12foo1bar");
1262     /// assert_eq!("123foo1bar123".trim_right_matches(|&: c: char| c.is_numeric()), "123foo1bar");
1263     /// ```
1264     #[stable]
1265     fn trim_right_matches<P: CharEq>(&self, pat: P) -> &str {
1266         core_str::StrExt::trim_right_matches(self[], pat)
1267     }
1268
1269     /// Deprecated
1270     #[deprecated = "Replaced by `trim_right_matches`"]
1271     fn trim_right_chars<'a, C: CharEq>(&'a self, to_trim: C) -> &'a str {
1272         self.trim_right_matches(to_trim)
1273     }
1274
1275     /// Check that `index`-th byte lies at the start and/or end of a
1276     /// UTF-8 code point sequence.
1277     ///
1278     /// The start and end of the string (when `index == self.len()`)
1279     /// are considered to be boundaries.
1280     ///
1281     /// Panics if `index` is greater than `self.len()`.
1282     ///
1283     /// # Example
1284     ///
1285     /// ```rust
1286     /// let s = "Löwe 老虎 Léopard";
1287     /// assert!(s.is_char_boundary(0));
1288     /// // start of `老`
1289     /// assert!(s.is_char_boundary(6));
1290     /// assert!(s.is_char_boundary(s.len()));
1291     ///
1292     /// // second byte of `ö`
1293     /// assert!(!s.is_char_boundary(2));
1294     ///
1295     /// // third byte of `老`
1296     /// assert!(!s.is_char_boundary(8));
1297     /// ```
1298     #[unstable = "naming is uncertain with container conventions"]
1299     fn is_char_boundary(&self, index: uint) -> bool {
1300         core_str::StrExt::is_char_boundary(self[], index)
1301     }
1302
1303     /// Pluck a character out of a string and return the index of the next
1304     /// character.
1305     ///
1306     /// This function can be used to iterate over the Unicode characters of a
1307     /// string.
1308     ///
1309     /// # Example
1310     ///
1311     /// This example manually iterates through the characters of a
1312     /// string; this should normally be done by `.chars()` or
1313     /// `.char_indices`.
1314     ///
1315     /// ```rust
1316     /// use std::str::CharRange;
1317     ///
1318     /// let s = "中华Việt Nam";
1319     /// let mut i = 0u;
1320     /// while i < s.len() {
1321     ///     let CharRange {ch, next} = s.char_range_at(i);
1322     ///     println!("{}: {}", i, ch);
1323     ///     i = next;
1324     /// }
1325     /// ```
1326     ///
1327     /// This outputs:
1328     ///
1329     /// ```text
1330     /// 0: 中
1331     /// 3: 华
1332     /// 6: V
1333     /// 7: i
1334     /// 8: ệ
1335     /// 11: t
1336     /// 12:
1337     /// 13: N
1338     /// 14: a
1339     /// 15: m
1340     /// ```
1341     ///
1342     /// # Arguments
1343     ///
1344     /// * s - The string
1345     /// * i - The byte offset of the char to extract
1346     ///
1347     /// # Return value
1348     ///
1349     /// A record {ch: char, next: uint} containing the char value and the byte
1350     /// index of the next Unicode character.
1351     ///
1352     /// # Panics
1353     ///
1354     /// If `i` is greater than or equal to the length of the string.
1355     /// If `i` is not the index of the beginning of a valid UTF-8 character.
1356     #[unstable = "naming is uncertain with container conventions"]
1357     fn char_range_at(&self, start: uint) -> CharRange {
1358         core_str::StrExt::char_range_at(self[], start)
1359     }
1360
1361     /// Given a byte position and a str, return the previous char and its position.
1362     ///
1363     /// This function can be used to iterate over a Unicode string in reverse.
1364     ///
1365     /// Returns 0 for next index if called on start index 0.
1366     ///
1367     /// # Panics
1368     ///
1369     /// If `i` is greater than the length of the string.
1370     /// If `i` is not an index following a valid UTF-8 character.
1371     #[unstable = "naming is uncertain with container conventions"]
1372     fn char_range_at_reverse(&self, start: uint) -> CharRange {
1373         core_str::StrExt::char_range_at_reverse(self[], start)
1374     }
1375
1376     /// Plucks the character starting at the `i`th byte of a string.
1377     ///
1378     /// # Example
1379     ///
1380     /// ```rust
1381     /// let s = "abπc";
1382     /// assert_eq!(s.char_at(1), 'b');
1383     /// assert_eq!(s.char_at(2), 'π');
1384     /// assert_eq!(s.char_at(4), 'c');
1385     /// ```
1386     ///
1387     /// # Panics
1388     ///
1389     /// If `i` is greater than or equal to the length of the string.
1390     /// If `i` is not the index of the beginning of a valid UTF-8 character.
1391     #[unstable = "naming is uncertain with container conventions"]
1392     fn char_at(&self, i: uint) -> char {
1393         core_str::StrExt::char_at(self[], i)
1394     }
1395
1396     /// Plucks the character ending at the `i`th byte of a string.
1397     ///
1398     /// # Panics
1399     ///
1400     /// If `i` is greater than the length of the string.
1401     /// If `i` is not an index following a valid UTF-8 character.
1402     #[unstable = "naming is uncertain with container conventions"]
1403     fn char_at_reverse(&self, i: uint) -> char {
1404         core_str::StrExt::char_at_reverse(self[], i)
1405     }
1406
1407     /// Work with the byte buffer of a string as a byte slice.
1408     ///
1409     /// # Example
1410     ///
1411     /// ```rust
1412     /// assert_eq!("bors".as_bytes(), b"bors");
1413     /// ```
1414     #[stable]
1415     fn as_bytes(&self) -> &[u8] {
1416         core_str::StrExt::as_bytes(self[])
1417     }
1418
1419     /// Returns the byte index of the first character of `self` that
1420     /// matches the pattern `pat`.
1421     ///
1422     /// # Return value
1423     ///
1424     /// `Some` containing the byte index of the last matching character
1425     /// or `None` if there is no match
1426     ///
1427     /// # Example
1428     ///
1429     /// ```rust
1430     /// let s = "Löwe 老虎 Léopard";
1431     ///
1432     /// assert_eq!(s.find('L'), Some(0));
1433     /// assert_eq!(s.find('é'), Some(14));
1434     ///
1435     /// // the first space
1436     /// assert_eq!(s.find(|&: c: char| c.is_whitespace()), Some(5));
1437     ///
1438     /// // neither are found
1439     /// let x: &[_] = &['1', '2'];
1440     /// assert_eq!(s.find(x), None);
1441     /// ```
1442     #[stable]
1443     fn find<P: CharEq>(&self, pat: P) -> Option<uint> {
1444         core_str::StrExt::find(self[], pat)
1445     }
1446
1447     /// Returns the byte index of the last character of `self` that
1448     /// matches the pattern `pat`.
1449     ///
1450     /// # Return value
1451     ///
1452     /// `Some` containing the byte index of the last matching character
1453     /// or `None` if there is no match.
1454     ///
1455     /// # Example
1456     ///
1457     /// ```rust
1458     /// let s = "Löwe 老虎 Léopard";
1459     ///
1460     /// assert_eq!(s.rfind('L'), Some(13));
1461     /// assert_eq!(s.rfind('é'), Some(14));
1462     ///
1463     /// // the second space
1464     /// assert_eq!(s.rfind(|&: c: char| c.is_whitespace()), Some(12));
1465     ///
1466     /// // searches for an occurrence of either `1` or `2`, but neither are found
1467     /// let x: &[_] = &['1', '2'];
1468     /// assert_eq!(s.rfind(x), None);
1469     /// ```
1470     #[stable]
1471     fn rfind<P: CharEq>(&self, pat: P) -> Option<uint> {
1472         core_str::StrExt::rfind(self[], pat)
1473     }
1474
1475     /// Returns the byte index of the first matching substring
1476     ///
1477     /// # Arguments
1478     ///
1479     /// * `needle` - The string to search for
1480     ///
1481     /// # Return value
1482     ///
1483     /// `Some` containing the byte index of the first matching substring
1484     /// or `None` if there is no match.
1485     ///
1486     /// # Example
1487     ///
1488     /// ```rust
1489     /// let s = "Löwe 老虎 Léopard";
1490     ///
1491     /// assert_eq!(s.find_str("老虎 L"), Some(6));
1492     /// assert_eq!(s.find_str("muffin man"), None);
1493     /// ```
1494     #[unstable = "might get removed in favor of a more generic find in the future"]
1495     fn find_str(&self, needle: &str) -> Option<uint> {
1496         core_str::StrExt::find_str(self[], needle)
1497     }
1498
1499     /// Retrieves the first character from a string slice and returns
1500     /// it. This does not allocate a new string; instead, it returns a
1501     /// slice that point one character beyond the character that was
1502     /// shifted. If the string does not contain any characters,
1503     /// None is returned instead.
1504     ///
1505     /// # Example
1506     ///
1507     /// ```rust
1508     /// let s = "Löwe 老虎 Léopard";
1509     /// let (c, s1) = s.slice_shift_char().unwrap();
1510     /// assert_eq!(c, 'L');
1511     /// assert_eq!(s1, "öwe 老虎 Léopard");
1512     ///
1513     /// let (c, s2) = s1.slice_shift_char().unwrap();
1514     /// assert_eq!(c, 'ö');
1515     /// assert_eq!(s2, "we 老虎 Léopard");
1516     /// ```
1517     #[unstable = "awaiting conventions about shifting and slices"]
1518     fn slice_shift_char(&self) -> Option<(char, &str)> {
1519         core_str::StrExt::slice_shift_char(self[])
1520     }
1521
1522     /// Returns the byte offset of an inner slice relative to an enclosing outer slice.
1523     ///
1524     /// Panics if `inner` is not a direct slice contained within self.
1525     ///
1526     /// # Example
1527     ///
1528     /// ```rust
1529     /// let string = "a\nb\nc";
1530     /// let lines: Vec<&str> = string.lines().collect();
1531     ///
1532     /// assert!(string.subslice_offset(lines[0]) == 0); // &"a"
1533     /// assert!(string.subslice_offset(lines[1]) == 2); // &"b"
1534     /// assert!(string.subslice_offset(lines[2]) == 4); // &"c"
1535     /// ```
1536     #[unstable = "awaiting convention about comparability of arbitrary slices"]
1537     fn subslice_offset(&self, inner: &str) -> uint {
1538         core_str::StrExt::subslice_offset(self[], inner)
1539     }
1540
1541     /// Return an unsafe pointer to the strings buffer.
1542     ///
1543     /// The caller must ensure that the string outlives this pointer,
1544     /// and that it is not reallocated (e.g. by pushing to the
1545     /// string).
1546     #[stable]
1547     #[inline]
1548     fn as_ptr(&self) -> *const u8 {
1549         core_str::StrExt::as_ptr(self[])
1550     }
1551
1552     /// Return an iterator of `u16` over the string encoded as UTF-16.
1553     #[unstable = "this functionality may only be provided by libunicode"]
1554     fn utf16_units(&self) -> Utf16Units {
1555         Utf16Units { encoder: Utf16Encoder::new(self[].chars()) }
1556     }
1557
1558     /// Return the number of bytes in this string
1559     ///
1560     /// # Example
1561     ///
1562     /// ```
1563     /// assert_eq!("foo".len(), 3);
1564     /// assert_eq!("ƒoo".len(), 4);
1565     /// ```
1566     #[stable]
1567     #[inline]
1568     fn len(&self) -> uint {
1569         core_str::StrExt::len(self[])
1570     }
1571
1572     /// Returns true if this slice contains no bytes
1573     ///
1574     /// # Example
1575     ///
1576     /// ```
1577     /// assert!("".is_empty());
1578     /// ```
1579     #[inline]
1580     #[stable]
1581     fn is_empty(&self) -> bool {
1582         core_str::StrExt::is_empty(self[])
1583     }
1584
1585     /// Parse this string into the specified type.
1586     ///
1587     /// # Example
1588     ///
1589     /// ```
1590     /// assert_eq!("4".parse::<u32>(), Some(4));
1591     /// assert_eq!("j".parse::<u32>(), None);
1592     /// ```
1593     #[inline]
1594     #[unstable = "this method was just created"]
1595     fn parse<F: FromStr>(&self) -> Option<F> {
1596         FromStr::from_str(self[])
1597     }
1598
1599     /// Returns an iterator over the
1600     /// [grapheme clusters](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
1601     /// of the string.
1602     ///
1603     /// If `is_extended` is true, the iterator is over the *extended grapheme clusters*;
1604     /// otherwise, the iterator is over the *legacy grapheme clusters*.
1605     /// [UAX#29](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
1606     /// recommends extended grapheme cluster boundaries for general processing.
1607     ///
1608     /// # Example
1609     ///
1610     /// ```rust
1611     /// let gr1 = "a\u{310}e\u{301}o\u{308}\u{332}".graphemes(true).collect::<Vec<&str>>();
1612     /// let b: &[_] = &["a\u{310}", "e\u{301}", "o\u{308}\u{332}"];
1613     /// assert_eq!(gr1.as_slice(), b);
1614     /// let gr2 = "a\r\nb🇷🇺🇸🇹".graphemes(true).collect::<Vec<&str>>();
1615     /// let b: &[_] = &["a", "\r\n", "b", "🇷🇺🇸🇹"];
1616     /// assert_eq!(gr2.as_slice(), b);
1617     /// ```
1618     #[unstable = "this functionality may only be provided by libunicode"]
1619     fn graphemes(&self, is_extended: bool) -> Graphemes {
1620         UnicodeStr::graphemes(self[], is_extended)
1621     }
1622
1623     /// Returns an iterator over the grapheme clusters of self and their byte offsets.
1624     /// See `graphemes()` method for more information.
1625     ///
1626     /// # Example
1627     ///
1628     /// ```rust
1629     /// let gr_inds = "a̐éö̲\r\n".grapheme_indices(true).collect::<Vec<(uint, &str)>>();
1630     /// let b: &[_] = &[(0u, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
1631     /// assert_eq!(gr_inds.as_slice(), b);
1632     /// ```
1633     #[unstable = "this functionality may only be provided by libunicode"]
1634     fn grapheme_indices(&self, is_extended: bool) -> GraphemeIndices {
1635         UnicodeStr::grapheme_indices(self[], is_extended)
1636     }
1637
1638     /// An iterator over the words of a string (subsequences separated
1639     /// by any sequence of whitespace). Sequences of whitespace are
1640     /// collapsed, so empty "words" are not included.
1641     ///
1642     /// # Example
1643     ///
1644     /// ```rust
1645     /// let some_words = " Mary   had\ta little  \n\t lamb";
1646     /// let v: Vec<&str> = some_words.words().collect();
1647     /// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]);
1648     /// ```
1649     #[stable]
1650     fn words(&self) -> Words {
1651         UnicodeStr::words(self[])
1652     }
1653
1654     /// Returns true if the string contains only whitespace.
1655     ///
1656     /// Whitespace characters are determined by `char::is_whitespace`.
1657     ///
1658     /// # Example
1659     ///
1660     /// ```rust
1661     /// # #![allow(deprecated)]
1662     /// assert!(" \t\n".is_whitespace());
1663     /// assert!("".is_whitespace());
1664     ///
1665     /// assert!( !"abc".is_whitespace());
1666     /// ```
1667     #[deprecated = "use .chars().all(|c| c.is_whitespace())"]
1668     fn is_whitespace(&self) -> bool {
1669         UnicodeStr::is_whitespace(self[])
1670     }
1671
1672     /// Returns true if the string contains only alphanumeric code
1673     /// points.
1674     ///
1675     /// Alphanumeric characters are determined by `char::is_alphanumeric`.
1676     ///
1677     /// # Example
1678     ///
1679     /// ```rust
1680     /// # #![allow(deprecated)]
1681     /// assert!("Löwe老虎Léopard123".is_alphanumeric());
1682     /// assert!("".is_alphanumeric());
1683     ///
1684     /// assert!( !" &*~".is_alphanumeric());
1685     /// ```
1686     #[deprecated = "use .chars().all(|c| c.is_alphanumeric())"]
1687     fn is_alphanumeric(&self) -> bool {
1688         UnicodeStr::is_alphanumeric(self[])
1689     }
1690
1691     /// Returns a string's displayed width in columns, treating control
1692     /// characters as zero-width.
1693     ///
1694     /// `is_cjk` determines behavior for characters in the Ambiguous category:
1695     /// if `is_cjk` is `true`, these are 2 columns wide; otherwise, they are 1.
1696     /// In CJK locales, `is_cjk` should be `true`, else it should be `false`.
1697     /// [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
1698     /// recommends that these characters be treated as 1 column (i.e.,
1699     /// `is_cjk` = `false`) if the locale is unknown.
1700     #[unstable = "this functionality may only be provided by libunicode"]
1701     fn width(&self, is_cjk: bool) -> uint {
1702         UnicodeStr::width(self[], is_cjk)
1703     }
1704
1705     /// Returns a string with leading and trailing whitespace removed.
1706     #[stable]
1707     fn trim(&self) -> &str {
1708         UnicodeStr::trim(self[])
1709     }
1710
1711     /// Returns a string with leading whitespace removed.
1712     #[stable]
1713     fn trim_left(&self) -> &str {
1714         UnicodeStr::trim_left(self[])
1715     }
1716
1717     /// Returns a string with trailing whitespace removed.
1718     #[stable]
1719     fn trim_right(&self) -> &str {
1720         UnicodeStr::trim_right(self[])
1721     }
1722
1723     /// Deprecated, call `.to_owned()` instead from the `std::borrow::ToOwned`
1724     /// trait.
1725     #[deprecated = "call `.to_owned()` on `std::borrow::ToOwned` instead"]
1726     fn into_string(&self) -> String {
1727         self[].to_owned()
1728     }
1729 }
1730
1731 impl StrExt for str {}
1732
1733 #[cfg(test)]
1734 mod tests {
1735     use prelude::*;
1736
1737     use core::default::Default;
1738     use core::iter::AdditiveIterator;
1739     use super::{from_utf8, is_utf8, raw};
1740     use super::MaybeOwned::{Owned, Slice};
1741     use super::Utf8Error;
1742
1743     #[test]
1744     fn test_le() {
1745         assert!("" <= "");
1746         assert!("" <= "foo");
1747         assert!("foo" <= "foo");
1748         assert!("foo" != "bar");
1749     }
1750
1751     #[test]
1752     fn test_len() {
1753         assert_eq!("".len(), 0u);
1754         assert_eq!("hello world".len(), 11u);
1755         assert_eq!("\x63".len(), 1u);
1756         assert_eq!("\u{a2}".len(), 2u);
1757         assert_eq!("\u{3c0}".len(), 2u);
1758         assert_eq!("\u{2620}".len(), 3u);
1759         assert_eq!("\u{1d11e}".len(), 4u);
1760
1761         assert_eq!("".char_len(), 0u);
1762         assert_eq!("hello world".char_len(), 11u);
1763         assert_eq!("\x63".char_len(), 1u);
1764         assert_eq!("\u{a2}".char_len(), 1u);
1765         assert_eq!("\u{3c0}".char_len(), 1u);
1766         assert_eq!("\u{2620}".char_len(), 1u);
1767         assert_eq!("\u{1d11e}".char_len(), 1u);
1768         assert_eq!("ประเทศไทย中华Việt Nam".char_len(), 19u);
1769
1770         assert_eq!("ｈｅｌｌｏ".width(false), 10u);
1771         assert_eq!("ｈｅｌｌｏ".width(true), 10u);
1772         assert_eq!("\0\0\0\0\0".width(false), 0u);
1773         assert_eq!("\0\0\0\0\0".width(true), 0u);
1774         assert_eq!("".width(false), 0u);
1775         assert_eq!("".width(true), 0u);
1776         assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width(false), 4u);
1777         assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width(true), 8u);
1778     }
1779
1780     #[test]
1781     fn test_find() {
1782         assert_eq!("hello".find('l'), Some(2u));
1783         assert_eq!("hello".find(|&: c:char| c == 'o'), Some(4u));
1784         assert!("hello".find('x').is_none());
1785         assert!("hello".find(|&: c:char| c == 'x').is_none());
1786         assert_eq!("ประเทศไทย中华Việt Nam".find('华'), Some(30u));
1787         assert_eq!("ประเทศไทย中华Việt Nam".find(|&: c: char| c == '华'), Some(30u));
1788     }
1789
1790     #[test]
1791     fn test_rfind() {
1792         assert_eq!("hello".rfind('l'), Some(3u));
1793         assert_eq!("hello".rfind(|&: c:char| c == 'o'), Some(4u));
1794         assert!("hello".rfind('x').is_none());
1795         assert!("hello".rfind(|&: c:char| c == 'x').is_none());
1796         assert_eq!("ประเทศไทย中华Việt Nam".rfind('华'), Some(30u));
1797         assert_eq!("ประเทศไทย中华Việt Nam".rfind(|&: c: char| c == '华'), Some(30u));
1798     }
1799
1800     #[test]
1801     fn test_collect() {
1802         let empty = String::from_str("");
1803         let s: String = empty.chars().collect();
1804         assert_eq!(empty, s);
1805         let data = String::from_str("ประเทศไทย中");
1806         let s: String = data.chars().collect();
1807         assert_eq!(data, s);
1808     }
1809
1810     #[test]
1811     fn test_into_bytes() {
1812         let data = String::from_str("asdf");
1813         let buf = data.into_bytes();
1814         assert_eq!(b"asdf", buf);
1815     }
1816
1817     #[test]
1818     fn test_find_str() {
1819         // byte positions
1820         assert_eq!("".find_str(""), Some(0u));
1821         assert!("banana".find_str("apple pie").is_none());
1822
1823         let data = "abcabc";
1824         assert_eq!(data.slice(0u, 6u).find_str("ab"), Some(0u));
1825         assert_eq!(data.slice(2u, 6u).find_str("ab"), Some(3u - 2u));
1826         assert!(data.slice(2u, 4u).find_str("ab").is_none());
1827
1828         let string = "ประเทศไทย中华Việt Nam";
1829         let mut data = String::from_str(string);
1830         data.push_str(string);
1831         assert!(data.find_str("ไท华").is_none());
1832         assert_eq!(data.slice(0u, 43u).find_str(""), Some(0u));
1833         assert_eq!(data.slice(6u, 43u).find_str(""), Some(6u - 6u));
1834
1835         assert_eq!(data.slice(0u, 43u).find_str("ประ"), Some( 0u));
1836         assert_eq!(data.slice(0u, 43u).find_str("ทศไ"), Some(12u));
1837         assert_eq!(data.slice(0u, 43u).find_str("ย中"), Some(24u));
1838         assert_eq!(data.slice(0u, 43u).find_str("iệt"), Some(34u));
1839         assert_eq!(data.slice(0u, 43u).find_str("Nam"), Some(40u));
1840
1841         assert_eq!(data.slice(43u, 86u).find_str("ประ"), Some(43u - 43u));
1842         assert_eq!(data.slice(43u, 86u).find_str("ทศไ"), Some(55u - 43u));
1843         assert_eq!(data.slice(43u, 86u).find_str("ย中"), Some(67u - 43u));
1844         assert_eq!(data.slice(43u, 86u).find_str("iệt"), Some(77u - 43u));
1845         assert_eq!(data.slice(43u, 86u).find_str("Nam"), Some(83u - 43u));
1846     }
1847
1848     #[test]
1849     fn test_slice_chars() {
1850         fn t(a: &str, b: &str, start: uint) {
1851             assert_eq!(a.slice_chars(start, start + b.char_len()), b);
1852         }
1853         t("", "", 0);
1854         t("hello", "llo", 2);
1855         t("hello", "el", 1);
1856         t("αβλ", "β", 1);
1857         t("αβλ", "", 3);
1858         assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".slice_chars(2, 8));
1859     }
1860
1861     fn s(x: &str) -> String { x.into_string() }
1862
1863     macro_rules! test_concat {
1864         ($expected: expr, $string: expr) => {
1865             {
1866                 let s: String = $string.concat();
1867                 assert_eq!($expected, s);
1868             }
1869         }
1870     }
1871
1872     #[test]
1873     fn test_concat_for_different_types() {
1874         test_concat!("ab", vec![s("a"), s("b")]);
1875         test_concat!("ab", vec!["a", "b"]);
1876         test_concat!("ab", vec!["a", "b"].as_slice());
1877         test_concat!("ab", vec![s("a"), s("b")]);
1878     }
1879
1880     #[test]
1881     fn test_concat_for_different_lengths() {
1882         let empty: &[&str] = &[];
1883         test_concat!("", empty);
1884         test_concat!("a", ["a"]);
1885         test_concat!("ab", ["a", "b"]);
1886         test_concat!("abc", ["", "a", "bc"]);
1887     }
1888
1889     macro_rules! test_connect {
1890         ($expected: expr, $string: expr, $delim: expr) => {
1891             {
1892                 let s = $string.connect($delim);
1893                 assert_eq!($expected, s);
1894             }
1895         }
1896     }
1897
1898     #[test]
1899     fn test_connect_for_different_types() {
1900         test_connect!("a-b", ["a", "b"], "-");
1901         let hyphen = "-".into_string();
1902         test_connect!("a-b", [s("a"), s("b")], hyphen.as_slice());
1903         test_connect!("a-b", vec!["a", "b"], hyphen.as_slice());
1904         test_connect!("a-b", vec!["a", "b"].as_slice(), "-");
1905         test_connect!("a-b", vec![s("a"), s("b")], "-");
1906     }
1907
1908     #[test]
1909     fn test_connect_for_different_lengths() {
1910         let empty: &[&str] = &[];
1911         test_connect!("", empty, "-");
1912         test_connect!("a", ["a"], "-");
1913         test_connect!("a-b", ["a", "b"], "-");
1914         test_connect!("-a-bc", ["", "a", "bc"], "-");
1915     }
1916
1917     #[test]
1918     fn test_repeat() {
1919         assert_eq!("x".repeat(4), String::from_str("xxxx"));
1920         assert_eq!("hi".repeat(4), String::from_str("hihihihi"));
1921         assert_eq!("ไท华".repeat(3), String::from_str("ไท华ไท华ไท华"));
1922         assert_eq!("".repeat(4), String::from_str(""));
1923         assert_eq!("hi".repeat(0), String::from_str(""));
1924     }
1925
1926     #[test]
1927     fn test_unsafe_slice() {
1928         assert_eq!("ab", unsafe {raw::slice_bytes("abc", 0, 2)});
1929         assert_eq!("bc", unsafe {raw::slice_bytes("abc", 1, 3)});
1930         assert_eq!("", unsafe {raw::slice_bytes("abc", 1, 1)});
1931         fn a_million_letter_a() -> String {
1932             let mut i = 0u;
1933             let mut rs = String::new();
1934             while i < 100000 {
1935                 rs.push_str("aaaaaaaaaa");
1936                 i += 1;
1937             }
1938             rs
1939         }
1940         fn half_a_million_letter_a() -> String {
1941             let mut i = 0u;
1942             let mut rs = String::new();
1943             while i < 100000 {
1944                 rs.push_str("aaaaa");
1945                 i += 1;
1946             }
1947             rs
1948         }
1949         let letters = a_million_letter_a();
1950         assert!(half_a_million_letter_a() ==
1951             unsafe {String::from_str(raw::slice_bytes(letters.as_slice(),
1952                                      0u,
1953                                      500000))});
1954     }
1955
1956     #[test]
1957     fn test_starts_with() {
1958         assert!(("".starts_with("")));
1959         assert!(("abc".starts_with("")));
1960         assert!(("abc".starts_with("a")));
1961         assert!((!"a".starts_with("abc")));
1962         assert!((!"".starts_with("abc")));
1963         assert!((!"ödd".starts_with("-")));
1964         assert!(("ödd".starts_with("öd")));
1965     }
1966
1967     #[test]
1968     fn test_ends_with() {
1969         assert!(("".ends_with("")));
1970         assert!(("abc".ends_with("")));
1971         assert!(("abc".ends_with("c")));
1972         assert!((!"a".ends_with("abc")));
1973         assert!((!"".ends_with("abc")));
1974         assert!((!"ddö".ends_with("-")));
1975         assert!(("ddö".ends_with("dö")));
1976     }
1977
1978     #[test]
1979     fn test_is_empty() {
1980         assert!("".is_empty());
1981         assert!(!"a".is_empty());
1982     }
1983
1984     #[test]
1985     fn test_replace() {
1986         let a = "a";
1987         assert_eq!("".replace(a, "b"), String::from_str(""));
1988         assert_eq!("a".replace(a, "b"), String::from_str("b"));
1989         assert_eq!("ab".replace(a, "b"), String::from_str("bb"));
1990         let test = "test";
1991         assert!(" test test ".replace(test, "toast") ==
1992             String::from_str(" toast toast "));
1993         assert_eq!(" test test ".replace(test, ""), String::from_str("   "));
1994     }
1995
1996     #[test]
1997     fn test_replace_2a() {
1998         let data = "ประเทศไทย中华";
1999         let repl = "دولة الكويت";
2000
2001         let a = "ประเ";
2002         let a2 = "دولة الكويتทศไทย中华";
2003         assert_eq!(data.replace(a, repl), a2);
2004     }
2005
2006     #[test]
2007     fn test_replace_2b() {
2008         let data = "ประเทศไทย中华";
2009         let repl = "دولة الكويت";
2010
2011         let b = "ะเ";
2012         let b2 = "ปรدولة الكويتทศไทย中华";
2013         assert_eq!(data.replace(b, repl), b2);
2014     }
2015
2016     #[test]
2017     fn test_replace_2c() {
2018         let data = "ประเทศไทย中华";
2019         let repl = "دولة الكويت";
2020
2021         let c = "中华";
2022         let c2 = "ประเทศไทยدولة الكويت";
2023         assert_eq!(data.replace(c, repl), c2);
2024     }
2025
2026     #[test]
2027     fn test_replace_2d() {
2028         let data = "ประเทศไทย中华";
2029         let repl = "دولة الكويت";
2030
2031         let d = "ไท华";
2032         assert_eq!(data.replace(d, repl), data);
2033     }
2034
2035     #[test]
2036     fn test_slice() {
2037         assert_eq!("ab", "abc".slice(0, 2));
2038         assert_eq!("bc", "abc".slice(1, 3));
2039         assert_eq!("", "abc".slice(1, 1));
2040         assert_eq!("\u{65e5}", "\u{65e5}\u{672c}".slice(0, 3));
2041
2042         let data = "ประเทศไทย中华";
2043         assert_eq!("ป", data.slice(0, 3));
2044         assert_eq!("ร", data.slice(3, 6));
2045         assert_eq!("", data.slice(3, 3));
2046         assert_eq!("华", data.slice(30, 33));
2047
2048         fn a_million_letter_x() -> String {
2049             let mut i = 0u;
2050             let mut rs = String::new();
2051             while i < 100000 {
2052                 rs.push_str("华华华华华华华华华华");
2053                 i += 1;
2054             }
2055             rs
2056         }
2057         fn half_a_million_letter_x() -> String {
2058             let mut i = 0u;
2059             let mut rs = String::new();
2060             while i < 100000 {
2061                 rs.push_str("华华华华华");
2062                 i += 1;
2063             }
2064             rs
2065         }
2066         let letters = a_million_letter_x();
2067         assert!(half_a_million_letter_x() ==
2068             String::from_str(letters.slice(0u, 3u * 500000u)));
2069     }
2070
2071     #[test]
2072     fn test_slice_2() {
2073         let ss = "中华Việt Nam";
2074
2075         assert_eq!("华", ss.slice(3u, 6u));
2076         assert_eq!("Việt Nam", ss.slice(6u, 16u));
2077
2078         assert_eq!("ab", "abc".slice(0u, 2u));
2079         assert_eq!("bc", "abc".slice(1u, 3u));
2080         assert_eq!("", "abc".slice(1u, 1u));
2081
2082         assert_eq!("中", ss.slice(0u, 3u));
2083         assert_eq!("华V", ss.slice(3u, 7u));
2084         assert_eq!("", ss.slice(3u, 3u));
2085         /*0: 中
2086           3: 华
2087           6: V
2088           7: i
2089           8: ệ
2090          11: t
2091          12:
2092          13: N
2093          14: a
2094          15: m */
2095     }
2096
2097     #[test]
2098     #[should_fail]
2099     fn test_slice_fail() {
2100         "中华Việt Nam".slice(0u, 2u);
2101     }
2102
2103     #[test]
2104     fn test_slice_from() {
2105         assert_eq!("abcd".slice_from(0), "abcd");
2106         assert_eq!("abcd".slice_from(2), "cd");
2107         assert_eq!("abcd".slice_from(4), "");
2108     }
2109     #[test]
2110     fn test_slice_to() {
2111         assert_eq!("abcd".slice_to(0), "");
2112         assert_eq!("abcd".slice_to(2), "ab");
2113         assert_eq!("abcd".slice_to(4), "abcd");
2114     }
2115
2116     #[test]
2117     fn test_trim_left_chars() {
2118         let v: &[char] = &[];
2119         assert_eq!(" *** foo *** ".trim_left_chars(v), " *** foo *** ");
2120         let chars: &[char] = &['*', ' '];
2121         assert_eq!(" *** foo *** ".trim_left_chars(chars), "foo *** ");
2122         assert_eq!(" ***  *** ".trim_left_chars(chars), "");
2123         assert_eq!("foo *** ".trim_left_chars(chars), "foo *** ");
2124
2125         assert_eq!("11foo1bar11".trim_left_chars('1'), "foo1bar11");
2126         let chars: &[char] = &['1', '2'];
2127         assert_eq!("12foo1bar12".trim_left_chars(chars), "foo1bar12");
2128         assert_eq!("123foo1bar123".trim_left_chars(|&: c: char| c.is_numeric()), "foo1bar123");
2129     }
2130
2131     #[test]
2132     fn test_trim_right_chars() {
2133         let v: &[char] = &[];
2134         assert_eq!(" *** foo *** ".trim_right_chars(v), " *** foo *** ");
2135         let chars: &[char] = &['*', ' '];
2136         assert_eq!(" *** foo *** ".trim_right_chars(chars), " *** foo");
2137         assert_eq!(" ***  *** ".trim_right_chars(chars), "");
2138         assert_eq!(" *** foo".trim_right_chars(chars), " *** foo");
2139
2140         assert_eq!("11foo1bar11".trim_right_chars('1'), "11foo1bar");
2141         let chars: &[char] = &['1', '2'];
2142         assert_eq!("12foo1bar12".trim_right_chars(chars), "12foo1bar");
2143         assert_eq!("123foo1bar123".trim_right_chars(|&: c: char| c.is_numeric()), "123foo1bar");
2144     }
2145
2146     #[test]
2147     fn test_trim_chars() {
2148         let v: &[char] = &[];
2149         assert_eq!(" *** foo *** ".trim_chars(v), " *** foo *** ");
2150         let chars: &[char] = &['*', ' '];
2151         assert_eq!(" *** foo *** ".trim_chars(chars), "foo");
2152         assert_eq!(" ***  *** ".trim_chars(chars), "");
2153         assert_eq!("foo".trim_chars(chars), "foo");
2154
2155         assert_eq!("11foo1bar11".trim_chars('1'), "foo1bar");
2156         let chars: &[char] = &['1', '2'];
2157         assert_eq!("12foo1bar12".trim_chars(chars), "foo1bar");
2158         assert_eq!("123foo1bar123".trim_chars(|&: c: char| c.is_numeric()), "foo1bar");
2159     }
2160
2161     #[test]
2162     fn test_trim_left() {
2163         assert_eq!("".trim_left(), "");
2164         assert_eq!("a".trim_left(), "a");
2165         assert_eq!("    ".trim_left(), "");
2166         assert_eq!("     blah".trim_left(), "blah");
2167         assert_eq!("   \u{3000}  wut".trim_left(), "wut");
2168         assert_eq!("hey ".trim_left(), "hey ");
2169     }
2170
2171     #[test]
2172     fn test_trim_right() {
2173         assert_eq!("".trim_right(), "");
2174         assert_eq!("a".trim_right(), "a");
2175         assert_eq!("    ".trim_right(), "");
2176         assert_eq!("blah     ".trim_right(), "blah");
2177         assert_eq!("wut   \u{3000}  ".trim_right(), "wut");
2178         assert_eq!(" hey".trim_right(), " hey");
2179     }
2180
2181     #[test]
2182     fn test_trim() {
2183         assert_eq!("".trim(), "");
2184         assert_eq!("a".trim(), "a");
2185         assert_eq!("    ".trim(), "");
2186         assert_eq!("    blah     ".trim(), "blah");
2187         assert_eq!("\nwut   \u{3000}  ".trim(), "wut");
2188         assert_eq!(" hey dude ".trim(), "hey dude");
2189     }
2190
2191     #[test]
2192     fn test_is_whitespace() {
2193         assert!("".is_whitespace());
2194         assert!(" ".is_whitespace());
2195         assert!("\u{2009}".is_whitespace()); // Thin space
2196         assert!("  \n\t   ".is_whitespace());
2197         assert!(!"   _   ".is_whitespace());
2198     }
2199
2200     #[test]
2201     fn test_slice_shift_char() {
2202         let data = "ประเทศไทย中";
2203         assert_eq!(data.slice_shift_char(), Some(('ป', "ระเทศไทย中")));
2204     }
2205
2206     #[test]
2207     fn test_slice_shift_char_2() {
2208         let empty = "";
2209         assert_eq!(empty.slice_shift_char(), None);
2210     }
2211
2212     #[test]
2213     fn test_is_utf8() {
2214         // deny overlong encodings
2215         assert!(!is_utf8(&[0xc0, 0x80]));
2216         assert!(!is_utf8(&[0xc0, 0xae]));
2217         assert!(!is_utf8(&[0xe0, 0x80, 0x80]));
2218         assert!(!is_utf8(&[0xe0, 0x80, 0xaf]));
2219         assert!(!is_utf8(&[0xe0, 0x81, 0x81]));
2220         assert!(!is_utf8(&[0xf0, 0x82, 0x82, 0xac]));
2221         assert!(!is_utf8(&[0xf4, 0x90, 0x80, 0x80]));
2222
2223         // deny surrogates
2224         assert!(!is_utf8(&[0xED, 0xA0, 0x80]));
2225         assert!(!is_utf8(&[0xED, 0xBF, 0xBF]));
2226
2227         assert!(is_utf8(&[0xC2, 0x80]));
2228         assert!(is_utf8(&[0xDF, 0xBF]));
2229         assert!(is_utf8(&[0xE0, 0xA0, 0x80]));
2230         assert!(is_utf8(&[0xED, 0x9F, 0xBF]));
2231         assert!(is_utf8(&[0xEE, 0x80, 0x80]));
2232         assert!(is_utf8(&[0xEF, 0xBF, 0xBF]));
2233         assert!(is_utf8(&[0xF0, 0x90, 0x80, 0x80]));
2234         assert!(is_utf8(&[0xF4, 0x8F, 0xBF, 0xBF]));
2235     }
2236
2237     #[test]
2238     fn test_is_utf16() {
2239         use unicode::str::is_utf16;
2240         macro_rules! pos ( ($($e:expr),*) => { { $(assert!(is_utf16($e));)* } });
2241
2242         // non-surrogates
2243         pos!(&[0x0000],
2244              &[0x0001, 0x0002],
2245              &[0xD7FF],
2246              &[0xE000]);
2247
2248         // surrogate pairs (randomly generated with Python 3's
2249         // .encode('utf-16be'))
2250         pos!(&[0xdb54, 0xdf16, 0xd880, 0xdee0, 0xdb6a, 0xdd45],
2251              &[0xd91f, 0xdeb1, 0xdb31, 0xdd84, 0xd8e2, 0xde14],
2252              &[0xdb9f, 0xdc26, 0xdb6f, 0xde58, 0xd850, 0xdfae]);
2253
2254         // mixtures (also random)
2255         pos!(&[0xd921, 0xdcc2, 0x002d, 0x004d, 0xdb32, 0xdf65],
2256              &[0xdb45, 0xdd2d, 0x006a, 0xdacd, 0xddfe, 0x0006],
2257              &[0x0067, 0xd8ff, 0xddb7, 0x000f, 0xd900, 0xdc80]);
2258
2259         // negative tests
2260         macro_rules! neg ( ($($e:expr),*) => { { $(assert!(!is_utf16($e));)* } });
2261
2262         neg!(
2263             // surrogate + regular unit
2264             &[0xdb45, 0x0000],
2265             // surrogate + lead surrogate
2266             &[0xd900, 0xd900],
2267             // unterminated surrogate
2268             &[0xd8ff],
2269             // trail surrogate without a lead
2270             &[0xddb7]);
2271
2272         // random byte sequences that Python 3's .decode('utf-16be')
2273         // failed on
2274         neg!(&[0x5b3d, 0x0141, 0xde9e, 0x8fdc, 0xc6e7],
2275              &[0xdf5a, 0x82a5, 0x62b9, 0xb447, 0x92f3],
2276              &[0xda4e, 0x42bc, 0x4462, 0xee98, 0xc2ca],
2277              &[0xbe00, 0xb04a, 0x6ecb, 0xdd89, 0xe278],
2278              &[0x0465, 0xab56, 0xdbb6, 0xa893, 0x665e],
2279              &[0x6b7f, 0x0a19, 0x40f4, 0xa657, 0xdcc5],
2280              &[0x9b50, 0xda5e, 0x24ec, 0x03ad, 0x6dee],
2281              &[0x8d17, 0xcaa7, 0xf4ae, 0xdf6e, 0xbed7],
2282              &[0xdaee, 0x2584, 0x7d30, 0xa626, 0x121a],
2283              &[0xd956, 0x4b43, 0x7570, 0xccd6, 0x4f4a],
2284              &[0x9dcf, 0x1b49, 0x4ba5, 0xfce9, 0xdffe],
2285              &[0x6572, 0xce53, 0xb05a, 0xf6af, 0xdacf],
2286              &[0x1b90, 0x728c, 0x9906, 0xdb68, 0xf46e],
2287              &[0x1606, 0xbeca, 0xbe76, 0x860f, 0xdfa5],
2288              &[0x8b4f, 0xde7a, 0xd220, 0x9fac, 0x2b6f],
2289              &[0xb8fe, 0xebbe, 0xda32, 0x1a5f, 0x8b8b],
2290              &[0x934b, 0x8956, 0xc434, 0x1881, 0xddf7],
2291              &[0x5a95, 0x13fc, 0xf116, 0xd89b, 0x93f9],
2292              &[0xd640, 0x71f1, 0xdd7d, 0x77eb, 0x1cd8],
2293              &[0x348b, 0xaef0, 0xdb2c, 0xebf1, 0x1282],
2294              &[0x50d7, 0xd824, 0x5010, 0xb369, 0x22ea]);
2295     }
2296
2297     #[test]
2298     fn test_as_bytes() {
2299         // no null
2300         let v = [
2301             224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
2302             184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
2303             109
2304         ];
2305         let b: &[u8] = &[];
2306         assert_eq!("".as_bytes(), b);
2307         assert_eq!("abc".as_bytes(), b"abc");
2308         assert_eq!("ศไทย中华Việt Nam".as_bytes(), v);
2309     }
2310
2311     #[test]
2312     #[should_fail]
2313     fn test_as_bytes_fail() {
2314         // Don't double free. (I'm not sure if this exercises the
2315         // original problem code path anymore.)
2316         let s = String::from_str("");
2317         let _bytes = s.as_bytes();
2318         panic!();
2319     }
2320
2321     #[test]
2322     fn test_as_ptr() {
2323         let buf = "hello".as_ptr();
2324         unsafe {
2325             assert_eq!(*buf.offset(0), b'h');
2326             assert_eq!(*buf.offset(1), b'e');
2327             assert_eq!(*buf.offset(2), b'l');
2328             assert_eq!(*buf.offset(3), b'l');
2329             assert_eq!(*buf.offset(4), b'o');
2330         }
2331     }
2332
2333     #[test]
2334     fn test_subslice_offset() {
2335         let a = "kernelsprite";
2336         let b = a.slice(7, a.len());
2337         let c = a.slice(0, a.len() - 6);
2338         assert_eq!(a.subslice_offset(b), 7);
2339         assert_eq!(a.subslice_offset(c), 0);
2340
2341         let string = "a\nb\nc";
2342         let lines: Vec<&str> = string.lines().collect();
2343         assert_eq!(string.subslice_offset(lines[0]), 0);
2344         assert_eq!(string.subslice_offset(lines[1]), 2);
2345         assert_eq!(string.subslice_offset(lines[2]), 4);
2346     }
2347
2348     #[test]
2349     #[should_fail]
2350     fn test_subslice_offset_2() {
2351         let a = "alchemiter";
2352         let b = "cruxtruder";
2353         a.subslice_offset(b);
2354     }
2355
2356     #[test]
2357     fn vec_str_conversions() {
2358         let s1: String = String::from_str("All mimsy were the borogoves");
2359
2360         let v: Vec<u8> = s1.as_bytes().to_vec();
2361         let s2: String = String::from_str(from_utf8(v.as_slice()).unwrap());
2362         let mut i: uint = 0u;
2363         let n1: uint = s1.len();
2364         let n2: uint = v.len();
2365         assert_eq!(n1, n2);
2366         while i < n1 {
2367             let a: u8 = s1.as_bytes()[i];
2368             let b: u8 = s2.as_bytes()[i];
2369             debug!("{}", a);
2370             debug!("{}", b);
2371             assert_eq!(a, b);
2372             i += 1u;
2373         }
2374     }
2375
2376     #[test]
2377     fn test_contains() {
2378         assert!("abcde".contains("bcd"));
2379         assert!("abcde".contains("abcd"));
2380         assert!("abcde".contains("bcde"));
2381         assert!("abcde".contains(""));
2382         assert!("".contains(""));
2383         assert!(!"abcde".contains("def"));
2384         assert!(!"".contains("a"));
2385
2386         let data = "ประเทศไทย中华Việt Nam";
2387         assert!(data.contains("ประเ"));
2388         assert!(data.contains("ะเ"));
2389         assert!(data.contains("中华"));
2390         assert!(!data.contains("ไท华"));
2391     }
2392
2393     #[test]
2394     fn test_contains_char() {
2395         assert!("abc".contains_char('b'));
2396         assert!("a".contains_char('a'));
2397         assert!(!"abc".contains_char('d'));
2398         assert!(!"".contains_char('a'));
2399     }
2400
2401     #[test]
2402     fn test_char_at() {
2403         let s = "ศไทย中华Việt Nam";
2404         let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2405         let mut pos = 0;
2406         for ch in v.iter() {
2407             assert!(s.char_at(pos) == *ch);
2408             pos += String::from_char(1, *ch).len();
2409         }
2410     }
2411
2412     #[test]
2413     fn test_char_at_reverse() {
2414         let s = "ศไทย中华Việt Nam";
2415         let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2416         let mut pos = s.len();
2417         for ch in v.iter().rev() {
2418             assert!(s.char_at_reverse(pos) == *ch);
2419             pos -= String::from_char(1, *ch).len();
2420         }
2421     }
2422
2423     #[test]
2424     fn test_escape_unicode() {
2425         assert_eq!("abc".escape_unicode(),
2426                    String::from_str("\\u{61}\\u{62}\\u{63}"));
2427         assert_eq!("a c".escape_unicode(),
2428                    String::from_str("\\u{61}\\u{20}\\u{63}"));
2429         assert_eq!("\r\n\t".escape_unicode(),
2430                    String::from_str("\\u{d}\\u{a}\\u{9}"));
2431         assert_eq!("'\"\\".escape_unicode(),
2432                    String::from_str("\\u{27}\\u{22}\\u{5c}"));
2433         assert_eq!("\x00\x01\u{fe}\u{ff}".escape_unicode(),
2434                    String::from_str("\\u{0}\\u{1}\\u{fe}\\u{ff}"));
2435         assert_eq!("\u{100}\u{ffff}".escape_unicode(),
2436                    String::from_str("\\u{100}\\u{ffff}"));
2437         assert_eq!("\u{10000}\u{10ffff}".escape_unicode(),
2438                    String::from_str("\\u{10000}\\u{10ffff}"));
2439         assert_eq!("ab\u{fb00}".escape_unicode(),
2440                    String::from_str("\\u{61}\\u{62}\\u{fb00}"));
2441         assert_eq!("\u{1d4ea}\r".escape_unicode(),
2442                    String::from_str("\\u{1d4ea}\\u{d}"));
2443     }
2444
2445     #[test]
2446     fn test_escape_default() {
2447         assert_eq!("abc".escape_default(), String::from_str("abc"));
2448         assert_eq!("a c".escape_default(), String::from_str("a c"));
2449         assert_eq!("\r\n\t".escape_default(), String::from_str("\\r\\n\\t"));
2450         assert_eq!("'\"\\".escape_default(), String::from_str("\\'\\\"\\\\"));
2451         assert_eq!("\u{100}\u{ffff}".escape_default(),
2452                    String::from_str("\\u{100}\\u{ffff}"));
2453         assert_eq!("\u{10000}\u{10ffff}".escape_default(),
2454                    String::from_str("\\u{10000}\\u{10ffff}"));
2455         assert_eq!("ab\u{fb00}".escape_default(),
2456                    String::from_str("ab\\u{fb00}"));
2457         assert_eq!("\u{1d4ea}\r".escape_default(),
2458                    String::from_str("\\u{1d4ea}\\r"));
2459     }
2460
2461     #[test]
2462     fn test_total_ord() {
2463         "1234".cmp("123") == Greater;
2464         "123".cmp("1234") == Less;
2465         "1234".cmp("1234") == Equal;
2466         "12345555".cmp("123456") == Less;
2467         "22".cmp("1234") == Greater;
2468     }
2469
2470     #[test]
2471     fn test_char_range_at() {
2472         let data = "b¢€𤭢𤭢€¢b";
2473         assert_eq!('b', data.char_range_at(0).ch);
2474         assert_eq!('¢', data.char_range_at(1).ch);
2475         assert_eq!('€', data.char_range_at(3).ch);
2476         assert_eq!('𤭢', data.char_range_at(6).ch);
2477         assert_eq!('𤭢', data.char_range_at(10).ch);
2478         assert_eq!('€', data.char_range_at(14).ch);
2479         assert_eq!('¢', data.char_range_at(17).ch);
2480         assert_eq!('b', data.char_range_at(19).ch);
2481     }
2482
2483     #[test]
2484     fn test_char_range_at_reverse_underflow() {
2485         assert_eq!("abc".char_range_at_reverse(0).next, 0);
2486     }
2487
2488     #[test]
2489     fn test_iterator() {
2490         let s = "ศไทย中华Việt Nam";
2491         let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2492
2493         let mut pos = 0;
2494         let mut it = s.chars();
2495
2496         for c in it {
2497             assert_eq!(c, v[pos]);
2498             pos += 1;
2499         }
2500         assert_eq!(pos, v.len());
2501     }
2502
2503     #[test]
2504     fn test_rev_iterator() {
2505         let s = "ศไทย中华Việt Nam";
2506         let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
2507
2508         let mut pos = 0;
2509         let mut it = s.chars().rev();
2510
2511         for c in it {
2512             assert_eq!(c, v[pos]);
2513             pos += 1;
2514         }
2515         assert_eq!(pos, v.len());
2516     }
2517
2518     #[test]
2519     fn test_chars_decoding() {
2520         let mut bytes = [0u8, ..4];
2521         for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
2522             let len = c.encode_utf8(&mut bytes).unwrap_or(0);
2523             let s = ::core::str::from_utf8(bytes[..len]).unwrap();
2524             if Some(c) != s.chars().next() {
2525                 panic!("character {:x}={} does not decode correctly", c as u32, c);
2526             }
2527         }
2528     }
2529
2530     #[test]
2531     fn test_chars_rev_decoding() {
2532         let mut bytes = [0u8, ..4];
2533         for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
2534             let len = c.encode_utf8(&mut bytes).unwrap_or(0);
2535             let s = ::core::str::from_utf8(bytes[..len]).unwrap();
2536             if Some(c) != s.chars().rev().next() {
2537                 panic!("character {:x}={} does not decode correctly", c as u32, c);
2538             }
2539         }
2540     }
2541
2542     #[test]
2543     fn test_iterator_clone() {
2544         let s = "ศไทย中华Việt Nam";
2545         let mut it = s.chars();
2546         it.next();
2547         assert!(it.zip(it.clone()).all(|(x,y)| x == y));
2548     }
2549
2550     #[test]
2551     fn test_bytesator() {
2552         let s = "ศไทย中华Việt Nam";
2553         let v = [
2554             224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
2555             184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
2556             109
2557         ];
2558         let mut pos = 0;
2559
2560         for b in s.bytes() {
2561             assert_eq!(b, v[pos]);
2562             pos += 1;
2563         }
2564     }
2565
2566     #[test]
2567     fn test_bytes_revator() {
2568         let s = "ศไทย中华Việt Nam";
2569         let v = [
2570             224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
2571             184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
2572             109
2573         ];
2574         let mut pos = v.len();
2575
2576         for b in s.bytes().rev() {
2577             pos -= 1;
2578             assert_eq!(b, v[pos]);
2579         }
2580     }
2581
2582     #[test]
2583     fn test_char_indicesator() {
2584         let s = "ศไทย中华Việt Nam";
2585         let p = [0, 3, 6, 9, 12, 15, 18, 19, 20, 23, 24, 25, 26, 27];
2586         let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2587
2588         let mut pos = 0;
2589         let mut it = s.char_indices();
2590
2591         for c in it {
2592             assert_eq!(c, (p[pos], v[pos]));
2593             pos += 1;
2594         }
2595         assert_eq!(pos, v.len());
2596         assert_eq!(pos, p.len());
2597     }
2598
2599     #[test]
2600     fn test_char_indices_revator() {
2601         let s = "ศไทย中华Việt Nam";
2602         let p = [27, 26, 25, 24, 23, 20, 19, 18, 15, 12, 9, 6, 3, 0];
2603         let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
2604
2605         let mut pos = 0;
2606         let mut it = s.char_indices().rev();
2607
2608         for c in it {
2609             assert_eq!(c, (p[pos], v[pos]));
2610             pos += 1;
2611         }
2612         assert_eq!(pos, v.len());
2613         assert_eq!(pos, p.len());
2614     }
2615
2616     #[test]
2617     fn test_splitn_char_iterator() {
2618         let data = "\nMäry häd ä little lämb\nLittle lämb\n";
2619
2620         let split: Vec<&str> = data.splitn(3, ' ').collect();
2621         assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
2622
2623         let split: Vec<&str> = data.splitn(3, |&: c: char| c == ' ').collect();
2624         assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
2625
2626         // Unicode
2627         let split: Vec<&str> = data.splitn(3, 'ä').collect();
2628         assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
2629
2630         let split: Vec<&str> = data.splitn(3, |&: c: char| c == 'ä').collect();
2631         assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
2632     }
2633
2634     #[test]
2635     fn test_split_char_iterator_no_trailing() {
2636         let data = "\nMäry häd ä little lämb\nLittle lämb\n";
2637
2638         let split: Vec<&str> = data.split('\n').collect();
2639         assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
2640
2641         let split: Vec<&str> = data.split_terminator('\n').collect();
2642         assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
2643     }
2644
2645     #[test]
2646     fn test_words() {
2647         let data = "\n \tMäry   häd\tä  little lämb\nLittle lämb\n";
2648         let words: Vec<&str> = data.words().collect();
2649         assert_eq!(words, vec!["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
2650     }
2651
2652     #[test]
2653     fn test_nfd_chars() {
2654         macro_rules! t {
2655             ($input: expr, $expected: expr) => {
2656                 assert_eq!($input.nfd_chars().collect::<String>(), $expected);
2657             }
2658         }
2659         t!("abc", "abc");
2660         t!("\u{1e0b}\u{1c4}", "d\u{307}\u{1c4}");
2661         t!("\u{2026}", "\u{2026}");
2662         t!("\u{2126}", "\u{3a9}");
2663         t!("\u{1e0b}\u{323}", "d\u{323}\u{307}");
2664         t!("\u{1e0d}\u{307}", "d\u{323}\u{307}");
2665         t!("a\u{301}", "a\u{301}");
2666         t!("\u{301}a", "\u{301}a");
2667         t!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}");
2668         t!("\u{ac1c}", "\u{1100}\u{1162}");
2669     }
2670
2671     #[test]
2672     fn test_nfkd_chars() {
2673         macro_rules! t {
2674             ($input: expr, $expected: expr) => {
2675                 assert_eq!($input.nfkd_chars().collect::<String>(), $expected);
2676             }
2677         }
2678         t!("abc", "abc");
2679         t!("\u{1e0b}\u{1c4}", "d\u{307}DZ\u{30c}");
2680         t!("\u{2026}", "...");
2681         t!("\u{2126}", "\u{3a9}");
2682         t!("\u{1e0b}\u{323}", "d\u{323}\u{307}");
2683         t!("\u{1e0d}\u{307}", "d\u{323}\u{307}");
2684         t!("a\u{301}", "a\u{301}");
2685         t!("\u{301}a", "\u{301}a");
2686         t!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}");
2687         t!("\u{ac1c}", "\u{1100}\u{1162}");
2688     }
2689
2690     #[test]
2691     fn test_nfc_chars() {
2692         macro_rules! t {
2693             ($input: expr, $expected: expr) => {
2694                 assert_eq!($input.nfc_chars().collect::<String>(), $expected);
2695             }
2696         }
2697         t!("abc", "abc");
2698         t!("\u{1e0b}\u{1c4}", "\u{1e0b}\u{1c4}");
2699         t!("\u{2026}", "\u{2026}");
2700         t!("\u{2126}", "\u{3a9}");
2701         t!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}");
2702         t!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}");
2703         t!("a\u{301}", "\u{e1}");
2704         t!("\u{301}a", "\u{301}a");
2705         t!("\u{d4db}", "\u{d4db}");
2706         t!("\u{ac1c}", "\u{ac1c}");
2707         t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
2708     }
2709
2710     #[test]
2711     fn test_nfkc_chars() {
2712         macro_rules! t {
2713             ($input: expr, $expected: expr) => {
2714                 assert_eq!($input.nfkc_chars().collect::<String>(), $expected);
2715             }
2716         }
2717         t!("abc", "abc");
2718         t!("\u{1e0b}\u{1c4}", "\u{1e0b}D\u{17d}");
2719         t!("\u{2026}", "...");
2720         t!("\u{2126}", "\u{3a9}");
2721         t!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}");
2722         t!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}");
2723         t!("a\u{301}", "\u{e1}");
2724         t!("\u{301}a", "\u{301}a");
2725         t!("\u{d4db}", "\u{d4db}");
2726         t!("\u{ac1c}", "\u{ac1c}");
2727         t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
2728     }
2729
2730     #[test]
2731     fn test_lines() {
2732         let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
2733         let lines: Vec<&str> = data.lines().collect();
2734         assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2735
2736         let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
2737         let lines: Vec<&str> = data.lines().collect();
2738         assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2739     }
2740
2741     #[test]
2742     fn test_graphemes() {
2743         use core::iter::order;
2744         // official Unicode test data
2745         // from http://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt
2746         let test_same: [(_, &[_]), .. 325] = [
2747             ("\u{20}\u{20}", &["\u{20}", "\u{20}"]),
2748             ("\u{20}\u{308}\u{20}", &["\u{20}\u{308}", "\u{20}"]),
2749             ("\u{20}\u{D}", &["\u{20}", "\u{D}"]),
2750             ("\u{20}\u{308}\u{D}", &["\u{20}\u{308}", "\u{D}"]),
2751             ("\u{20}\u{A}", &["\u{20}", "\u{A}"]),
2752             ("\u{20}\u{308}\u{A}", &["\u{20}\u{308}", "\u{A}"]),
2753             ("\u{20}\u{1}", &["\u{20}", "\u{1}"]),
2754             ("\u{20}\u{308}\u{1}", &["\u{20}\u{308}", "\u{1}"]),
2755             ("\u{20}\u{300}", &["\u{20}\u{300}"]),
2756             ("\u{20}\u{308}\u{300}", &["\u{20}\u{308}\u{300}"]),
2757             ("\u{20}\u{1100}", &["\u{20}", "\u{1100}"]),
2758             ("\u{20}\u{308}\u{1100}", &["\u{20}\u{308}", "\u{1100}"]),
2759             ("\u{20}\u{1160}", &["\u{20}", "\u{1160}"]),
2760             ("\u{20}\u{308}\u{1160}", &["\u{20}\u{308}", "\u{1160}"]),
2761             ("\u{20}\u{11A8}", &["\u{20}", "\u{11A8}"]),
2762             ("\u{20}\u{308}\u{11A8}", &["\u{20}\u{308}", "\u{11A8}"]),
2763             ("\u{20}\u{AC00}", &["\u{20}", "\u{AC00}"]),
2764             ("\u{20}\u{308}\u{AC00}", &["\u{20}\u{308}", "\u{AC00}"]),
2765             ("\u{20}\u{AC01}", &["\u{20}", "\u{AC01}"]),
2766             ("\u{20}\u{308}\u{AC01}", &["\u{20}\u{308}", "\u{AC01}"]),
2767             ("\u{20}\u{1F1E6}", &["\u{20}", "\u{1F1E6}"]),
2768             ("\u{20}\u{308}\u{1F1E6}", &["\u{20}\u{308}", "\u{1F1E6}"]),
2769             ("\u{20}\u{378}", &["\u{20}", "\u{378}"]),
2770             ("\u{20}\u{308}\u{378}", &["\u{20}\u{308}", "\u{378}"]),
2771             ("\u{D}\u{20}", &["\u{D}", "\u{20}"]),
2772             ("\u{D}\u{308}\u{20}", &["\u{D}", "\u{308}", "\u{20}"]),
2773             ("\u{D}\u{D}", &["\u{D}", "\u{D}"]),
2774             ("\u{D}\u{308}\u{D}", &["\u{D}", "\u{308}", "\u{D}"]),
2775             ("\u{D}\u{A}", &["\u{D}\u{A}"]),
2776             ("\u{D}\u{308}\u{A}", &["\u{D}", "\u{308}", "\u{A}"]),
2777             ("\u{D}\u{1}", &["\u{D}", "\u{1}"]),
2778             ("\u{D}\u{308}\u{1}", &["\u{D}", "\u{308}", "\u{1}"]),
2779             ("\u{D}\u{300}", &["\u{D}", "\u{300}"]),
2780             ("\u{D}\u{308}\u{300}", &["\u{D}", "\u{308}\u{300}"]),
2781             ("\u{D}\u{903}", &["\u{D}", "\u{903}"]),
2782             ("\u{D}\u{1100}", &["\u{D}", "\u{1100}"]),
2783             ("\u{D}\u{308}\u{1100}", &["\u{D}", "\u{308}", "\u{1100}"]),
2784             ("\u{D}\u{1160}", &["\u{D}", "\u{1160}"]),
2785             ("\u{D}\u{308}\u{1160}", &["\u{D}", "\u{308}", "\u{1160}"]),
2786             ("\u{D}\u{11A8}", &["\u{D}", "\u{11A8}"]),
2787             ("\u{D}\u{308}\u{11A8}", &["\u{D}", "\u{308}", "\u{11A8}"]),
2788             ("\u{D}\u{AC00}", &["\u{D}", "\u{AC00}"]),
2789             ("\u{D}\u{308}\u{AC00}", &["\u{D}", "\u{308}", "\u{AC00}"]),
2790             ("\u{D}\u{AC01}", &["\u{D}", "\u{AC01}"]),
2791             ("\u{D}\u{308}\u{AC01}", &["\u{D}", "\u{308}", "\u{AC01}"]),
2792             ("\u{D}\u{1F1E6}", &["\u{D}", "\u{1F1E6}"]),
2793             ("\u{D}\u{308}\u{1F1E6}", &["\u{D}", "\u{308}", "\u{1F1E6}"]),
2794             ("\u{D}\u{378}", &["\u{D}", "\u{378}"]),
2795             ("\u{D}\u{308}\u{378}", &["\u{D}", "\u{308}", "\u{378}"]),
2796             ("\u{A}\u{20}", &["\u{A}", "\u{20}"]),
2797             ("\u{A}\u{308}\u{20}", &["\u{A}", "\u{308}", "\u{20}"]),
2798             ("\u{A}\u{D}", &["\u{A}", "\u{D}"]),
2799             ("\u{A}\u{308}\u{D}", &["\u{A}", "\u{308}", "\u{D}"]),
2800             ("\u{A}\u{A}", &["\u{A}", "\u{A}"]),
2801             ("\u{A}\u{308}\u{A}", &["\u{A}", "\u{308}", "\u{A}"]),
2802             ("\u{A}\u{1}", &["\u{A}", "\u{1}"]),
2803             ("\u{A}\u{308}\u{1}", &["\u{A}", "\u{308}", "\u{1}"]),
2804             ("\u{A}\u{300}", &["\u{A}", "\u{300}"]),
2805             ("\u{A}\u{308}\u{300}", &["\u{A}", "\u{308}\u{300}"]),
2806             ("\u{A}\u{903}", &["\u{A}", "\u{903}"]),
2807             ("\u{A}\u{1100}", &["\u{A}", "\u{1100}"]),
2808             ("\u{A}\u{308}\u{1100}", &["\u{A}", "\u{308}", "\u{1100}"]),
2809             ("\u{A}\u{1160}", &["\u{A}", "\u{1160}"]),
2810             ("\u{A}\u{308}\u{1160}", &["\u{A}", "\u{308}", "\u{1160}"]),
2811             ("\u{A}\u{11A8}", &["\u{A}", "\u{11A8}"]),
2812             ("\u{A}\u{308}\u{11A8}", &["\u{A}", "\u{308}", "\u{11A8}"]),
2813             ("\u{A}\u{AC00}", &["\u{A}", "\u{AC00}"]),
2814             ("\u{A}\u{308}\u{AC00}", &["\u{A}", "\u{308}", "\u{AC00}"]),
2815             ("\u{A}\u{AC01}", &["\u{A}", "\u{AC01}"]),
2816             ("\u{A}\u{308}\u{AC01}", &["\u{A}", "\u{308}", "\u{AC01}"]),
2817             ("\u{A}\u{1F1E6}", &["\u{A}", "\u{1F1E6}"]),
2818             ("\u{A}\u{308}\u{1F1E6}", &["\u{A}", "\u{308}", "\u{1F1E6}"]),
2819             ("\u{A}\u{378}", &["\u{A}", "\u{378}"]),
2820             ("\u{A}\u{308}\u{378}", &["\u{A}", "\u{308}", "\u{378}"]),
2821             ("\u{1}\u{20}", &["\u{1}", "\u{20}"]),
2822             ("\u{1}\u{308}\u{20}", &["\u{1}", "\u{308}", "\u{20}"]),
2823             ("\u{1}\u{D}", &["\u{1}", "\u{D}"]),
2824             ("\u{1}\u{308}\u{D}", &["\u{1}", "\u{308}", "\u{D}"]),
2825             ("\u{1}\u{A}", &["\u{1}", "\u{A}"]),
2826             ("\u{1}\u{308}\u{A}", &["\u{1}", "\u{308}", "\u{A}"]),
2827             ("\u{1}\u{1}", &["\u{1}", "\u{1}"]),
2828             ("\u{1}\u{308}\u{1}", &["\u{1}", "\u{308}", "\u{1}"]),
2829             ("\u{1}\u{300}", &["\u{1}", "\u{300}"]),
2830             ("\u{1}\u{308}\u{300}", &["\u{1}", "\u{308}\u{300}"]),
2831             ("\u{1}\u{903}", &["\u{1}", "\u{903}"]),
2832             ("\u{1}\u{1100}", &["\u{1}", "\u{1100}"]),
2833             ("\u{1}\u{308}\u{1100}", &["\u{1}", "\u{308}", "\u{1100}"]),
2834             ("\u{1}\u{1160}", &["\u{1}", "\u{1160}"]),
2835             ("\u{1}\u{308}\u{1160}", &["\u{1}", "\u{308}", "\u{1160}"]),
2836             ("\u{1}\u{11A8}", &["\u{1}", "\u{11A8}"]),
2837             ("\u{1}\u{308}\u{11A8}", &["\u{1}", "\u{308}", "\u{11A8}"]),
2838             ("\u{1}\u{AC00}", &["\u{1}", "\u{AC00}"]),
2839             ("\u{1}\u{308}\u{AC00}", &["\u{1}", "\u{308}", "\u{AC00}"]),
2840             ("\u{1}\u{AC01}", &["\u{1}", "\u{AC01}"]),
2841             ("\u{1}\u{308}\u{AC01}", &["\u{1}", "\u{308}", "\u{AC01}"]),
2842             ("\u{1}\u{1F1E6}", &["\u{1}", "\u{1F1E6}"]),
2843             ("\u{1}\u{308}\u{1F1E6}", &["\u{1}", "\u{308}", "\u{1F1E6}"]),
2844             ("\u{1}\u{378}", &["\u{1}", "\u{378}"]),
2845             ("\u{1}\u{308}\u{378}", &["\u{1}", "\u{308}", "\u{378}"]),
2846             ("\u{300}\u{20}", &["\u{300}", "\u{20}"]),
2847             ("\u{300}\u{308}\u{20}", &["\u{300}\u{308}", "\u{20}"]),
2848             ("\u{300}\u{D}", &["\u{300}", "\u{D}"]),
2849             ("\u{300}\u{308}\u{D}", &["\u{300}\u{308}", "\u{D}"]),
2850             ("\u{300}\u{A}", &["\u{300}", "\u{A}"]),
2851             ("\u{300}\u{308}\u{A}", &["\u{300}\u{308}", "\u{A}"]),
2852             ("\u{300}\u{1}", &["\u{300}", "\u{1}"]),
2853             ("\u{300}\u{308}\u{1}", &["\u{300}\u{308}", "\u{1}"]),
2854             ("\u{300}\u{300}", &["\u{300}\u{300}"]),
2855             ("\u{300}\u{308}\u{300}", &["\u{300}\u{308}\u{300}"]),
2856             ("\u{300}\u{1100}", &["\u{300}", "\u{1100}"]),
2857             ("\u{300}\u{308}\u{1100}", &["\u{300}\u{308}", "\u{1100}"]),
2858             ("\u{300}\u{1160}", &["\u{300}", "\u{1160}"]),
2859             ("\u{300}\u{308}\u{1160}", &["\u{300}\u{308}", "\u{1160}"]),
2860             ("\u{300}\u{11A8}", &["\u{300}", "\u{11A8}"]),
2861             ("\u{300}\u{308}\u{11A8}", &["\u{300}\u{308}", "\u{11A8}"]),
2862             ("\u{300}\u{AC00}", &["\u{300}", "\u{AC00}"]),
2863             ("\u{300}\u{308}\u{AC00}", &["\u{300}\u{308}", "\u{AC00}"]),
2864             ("\u{300}\u{AC01}", &["\u{300}", "\u{AC01}"]),
2865             ("\u{300}\u{308}\u{AC01}", &["\u{300}\u{308}", "\u{AC01}"]),
2866             ("\u{300}\u{1F1E6}", &["\u{300}", "\u{1F1E6}"]),
2867             ("\u{300}\u{308}\u{1F1E6}", &["\u{300}\u{308}", "\u{1F1E6}"]),
2868             ("\u{300}\u{378}", &["\u{300}", "\u{378}"]),
2869             ("\u{300}\u{308}\u{378}", &["\u{300}\u{308}", "\u{378}"]),
2870             ("\u{903}\u{20}", &["\u{903}", "\u{20}"]),
2871             ("\u{903}\u{308}\u{20}", &["\u{903}\u{308}", "\u{20}"]),
2872             ("\u{903}\u{D}", &["\u{903}", "\u{D}"]),
2873             ("\u{903}\u{308}\u{D}", &["\u{903}\u{308}", "\u{D}"]),
2874             ("\u{903}\u{A}", &["\u{903}", "\u{A}"]),
2875             ("\u{903}\u{308}\u{A}", &["\u{903}\u{308}", "\u{A}"]),
2876             ("\u{903}\u{1}", &["\u{903}", "\u{1}"]),
2877             ("\u{903}\u{308}\u{1}", &["\u{903}\u{308}", "\u{1}"]),
2878             ("\u{903}\u{300}", &["\u{903}\u{300}"]),
2879             ("\u{903}\u{308}\u{300}", &["\u{903}\u{308}\u{300}"]),
2880             ("\u{903}\u{1100}", &["\u{903}", "\u{1100}"]),
2881             ("\u{903}\u{308}\u{1100}", &["\u{903}\u{308}", "\u{1100}"]),
2882             ("\u{903}\u{1160}", &["\u{903}", "\u{1160}"]),
2883             ("\u{903}\u{308}\u{1160}", &["\u{903}\u{308}", "\u{1160}"]),
2884             ("\u{903}\u{11A8}", &["\u{903}", "\u{11A8}"]),
2885             ("\u{903}\u{308}\u{11A8}", &["\u{903}\u{308}", "\u{11A8}"]),
2886             ("\u{903}\u{AC00}", &["\u{903}", "\u{AC00}"]),
2887             ("\u{903}\u{308}\u{AC00}", &["\u{903}\u{308}", "\u{AC00}"]),
2888             ("\u{903}\u{AC01}", &["\u{903}", "\u{AC01}"]),
2889             ("\u{903}\u{308}\u{AC01}", &["\u{903}\u{308}", "\u{AC01}"]),
2890             ("\u{903}\u{1F1E6}", &["\u{903}", "\u{1F1E6}"]),
2891             ("\u{903}\u{308}\u{1F1E6}", &["\u{903}\u{308}", "\u{1F1E6}"]),
2892             ("\u{903}\u{378}", &["\u{903}", "\u{378}"]),
2893             ("\u{903}\u{308}\u{378}", &["\u{903}\u{308}", "\u{378}"]),
2894             ("\u{1100}\u{20}", &["\u{1100}", "\u{20}"]),
2895             ("\u{1100}\u{308}\u{20}", &["\u{1100}\u{308}", "\u{20}"]),
2896             ("\u{1100}\u{D}", &["\u{1100}", "\u{D}"]),
2897             ("\u{1100}\u{308}\u{D}", &["\u{1100}\u{308}", "\u{D}"]),
2898             ("\u{1100}\u{A}", &["\u{1100}", "\u{A}"]),
2899             ("\u{1100}\u{308}\u{A}", &["\u{1100}\u{308}", "\u{A}"]),
2900             ("\u{1100}\u{1}", &["\u{1100}", "\u{1}"]),
2901             ("\u{1100}\u{308}\u{1}", &["\u{1100}\u{308}", "\u{1}"]),
2902             ("\u{1100}\u{300}", &["\u{1100}\u{300}"]),
2903             ("\u{1100}\u{308}\u{300}", &["\u{1100}\u{308}\u{300}"]),
2904             ("\u{1100}\u{1100}", &["\u{1100}\u{1100}"]),
2905             ("\u{1100}\u{308}\u{1100}", &["\u{1100}\u{308}", "\u{1100}"]),
2906             ("\u{1100}\u{1160}", &["\u{1100}\u{1160}"]),
2907             ("\u{1100}\u{308}\u{1160}", &["\u{1100}\u{308}", "\u{1160}"]),
2908             ("\u{1100}\u{11A8}", &["\u{1100}", "\u{11A8}"]),
2909             ("\u{1100}\u{308}\u{11A8}", &["\u{1100}\u{308}", "\u{11A8}"]),
2910             ("\u{1100}\u{AC00}", &["\u{1100}\u{AC00}"]),
2911             ("\u{1100}\u{308}\u{AC00}", &["\u{1100}\u{308}", "\u{AC00}"]),
2912             ("\u{1100}\u{AC01}", &["\u{1100}\u{AC01}"]),
2913             ("\u{1100}\u{308}\u{AC01}", &["\u{1100}\u{308}", "\u{AC01}"]),
2914             ("\u{1100}\u{1F1E6}", &["\u{1100}", "\u{1F1E6}"]),
2915             ("\u{1100}\u{308}\u{1F1E6}", &["\u{1100}\u{308}", "\u{1F1E6}"]),
2916             ("\u{1100}\u{378}", &["\u{1100}", "\u{378}"]),
2917             ("\u{1100}\u{308}\u{378}", &["\u{1100}\u{308}", "\u{378}"]),
2918             ("\u{1160}\u{20}", &["\u{1160}", "\u{20}"]),
2919             ("\u{1160}\u{308}\u{20}", &["\u{1160}\u{308}", "\u{20}"]),
2920             ("\u{1160}\u{D}", &["\u{1160}", "\u{D}"]),
2921             ("\u{1160}\u{308}\u{D}", &["\u{1160}\u{308}", "\u{D}"]),
2922             ("\u{1160}\u{A}", &["\u{1160}", "\u{A}"]),
2923             ("\u{1160}\u{308}\u{A}", &["\u{1160}\u{308}", "\u{A}"]),
2924             ("\u{1160}\u{1}", &["\u{1160}", "\u{1}"]),
2925             ("\u{1160}\u{308}\u{1}", &["\u{1160}\u{308}", "\u{1}"]),
2926             ("\u{1160}\u{300}", &["\u{1160}\u{300}"]),
2927             ("\u{1160}\u{308}\u{300}", &["\u{1160}\u{308}\u{300}"]),
2928             ("\u{1160}\u{1100}", &["\u{1160}", "\u{1100}"]),
2929             ("\u{1160}\u{308}\u{1100}", &["\u{1160}\u{308}", "\u{1100}"]),
2930             ("\u{1160}\u{1160}", &["\u{1160}\u{1160}"]),
2931             ("\u{1160}\u{308}\u{1160}", &["\u{1160}\u{308}", "\u{1160}"]),
2932             ("\u{1160}\u{11A8}", &["\u{1160}\u{11A8}"]),
2933             ("\u{1160}\u{308}\u{11A8}", &["\u{1160}\u{308}", "\u{11A8}"]),
2934             ("\u{1160}\u{AC00}", &["\u{1160}", "\u{AC00}"]),
2935             ("\u{1160}\u{308}\u{AC00}", &["\u{1160}\u{308}", "\u{AC00}"]),
2936             ("\u{1160}\u{AC01}", &["\u{1160}", "\u{AC01}"]),
2937             ("\u{1160}\u{308}\u{AC01}", &["\u{1160}\u{308}", "\u{AC01}"]),
2938             ("\u{1160}\u{1F1E6}", &["\u{1160}", "\u{1F1E6}"]),
2939             ("\u{1160}\u{308}\u{1F1E6}", &["\u{1160}\u{308}", "\u{1F1E6}"]),
2940             ("\u{1160}\u{378}", &["\u{1160}", "\u{378}"]),
2941             ("\u{1160}\u{308}\u{378}", &["\u{1160}\u{308}", "\u{378}"]),
2942             ("\u{11A8}\u{20}", &["\u{11A8}", "\u{20}"]),
2943             ("\u{11A8}\u{308}\u{20}", &["\u{11A8}\u{308}", "\u{20}"]),
2944             ("\u{11A8}\u{D}", &["\u{11A8}", "\u{D}"]),
2945             ("\u{11A8}\u{308}\u{D}", &["\u{11A8}\u{308}", "\u{D}"]),
2946             ("\u{11A8}\u{A}", &["\u{11A8}", "\u{A}"]),
2947             ("\u{11A8}\u{308}\u{A}", &["\u{11A8}\u{308}", "\u{A}"]),
2948             ("\u{11A8}\u{1}", &["\u{11A8}", "\u{1}"]),
2949             ("\u{11A8}\u{308}\u{1}", &["\u{11A8}\u{308}", "\u{1}"]),
2950             ("\u{11A8}\u{300}", &["\u{11A8}\u{300}"]),
2951             ("\u{11A8}\u{308}\u{300}", &["\u{11A8}\u{308}\u{300}"]),
2952             ("\u{11A8}\u{1100}", &["\u{11A8}", "\u{1100}"]),
2953             ("\u{11A8}\u{308}\u{1100}", &["\u{11A8}\u{308}", "\u{1100}"]),
2954             ("\u{11A8}\u{1160}", &["\u{11A8}", "\u{1160}"]),
2955             ("\u{11A8}\u{308}\u{1160}", &["\u{11A8}\u{308}", "\u{1160}"]),
2956             ("\u{11A8}\u{11A8}", &["\u{11A8}\u{11A8}"]),
2957             ("\u{11A8}\u{308}\u{11A8}", &["\u{11A8}\u{308}", "\u{11A8}"]),
2958             ("\u{11A8}\u{AC00}", &["\u{11A8}", "\u{AC00}"]),
2959             ("\u{11A8}\u{308}\u{AC00}", &["\u{11A8}\u{308}", "\u{AC00}"]),
2960             ("\u{11A8}\u{AC01}", &["\u{11A8}", "\u{AC01}"]),
2961             ("\u{11A8}\u{308}\u{AC01}", &["\u{11A8}\u{308}", "\u{AC01}"]),
2962             ("\u{11A8}\u{1F1E6}", &["\u{11A8}", "\u{1F1E6}"]),
2963             ("\u{11A8}\u{308}\u{1F1E6}", &["\u{11A8}\u{308}", "\u{1F1E6}"]),
2964             ("\u{11A8}\u{378}", &["\u{11A8}", "\u{378}"]),
2965             ("\u{11A8}\u{308}\u{378}", &["\u{11A8}\u{308}", "\u{378}"]),
2966             ("\u{AC00}\u{20}", &["\u{AC00}", "\u{20}"]),
2967             ("\u{AC00}\u{308}\u{20}", &["\u{AC00}\u{308}", "\u{20}"]),
2968             ("\u{AC00}\u{D}", &["\u{AC00}", "\u{D}"]),
2969             ("\u{AC00}\u{308}\u{D}", &["\u{AC00}\u{308}", "\u{D}"]),
2970             ("\u{AC00}\u{A}", &["\u{AC00}", "\u{A}"]),
2971             ("\u{AC00}\u{308}\u{A}", &["\u{AC00}\u{308}", "\u{A}"]),
2972             ("\u{AC00}\u{1}", &["\u{AC00}", "\u{1}"]),
2973             ("\u{AC00}\u{308}\u{1}", &["\u{AC00}\u{308}", "\u{1}"]),
2974             ("\u{AC00}\u{300}", &["\u{AC00}\u{300}"]),
2975             ("\u{AC00}\u{308}\u{300}", &["\u{AC00}\u{308}\u{300}"]),
2976             ("\u{AC00}\u{1100}", &["\u{AC00}", "\u{1100}"]),
2977             ("\u{AC00}\u{308}\u{1100}", &["\u{AC00}\u{308}", "\u{1100}"]),
2978             ("\u{AC00}\u{1160}", &["\u{AC00}\u{1160}"]),
2979             ("\u{AC00}\u{308}\u{1160}", &["\u{AC00}\u{308}", "\u{1160}"]),
2980             ("\u{AC00}\u{11A8}", &["\u{AC00}\u{11A8}"]),
2981             ("\u{AC00}\u{308}\u{11A8}", &["\u{AC00}\u{308}", "\u{11A8}"]),
2982             ("\u{AC00}\u{AC00}", &["\u{AC00}", "\u{AC00}"]),
2983             ("\u{AC00}\u{308}\u{AC00}", &["\u{AC00}\u{308}", "\u{AC00}"]),
2984             ("\u{AC00}\u{AC01}", &["\u{AC00}", "\u{AC01}"]),
2985             ("\u{AC00}\u{308}\u{AC01}", &["\u{AC00}\u{308}", "\u{AC01}"]),
2986             ("\u{AC00}\u{1F1E6}", &["\u{AC00}", "\u{1F1E6}"]),
2987             ("\u{AC00}\u{308}\u{1F1E6}", &["\u{AC00}\u{308}", "\u{1F1E6}"]),
2988             ("\u{AC00}\u{378}", &["\u{AC00}", "\u{378}"]),
2989             ("\u{AC00}\u{308}\u{378}", &["\u{AC00}\u{308}", "\u{378}"]),
2990             ("\u{AC01}\u{20}", &["\u{AC01}", "\u{20}"]),
2991             ("\u{AC01}\u{308}\u{20}", &["\u{AC01}\u{308}", "\u{20}"]),
2992             ("\u{AC01}\u{D}", &["\u{AC01}", "\u{D}"]),
2993             ("\u{AC01}\u{308}\u{D}", &["\u{AC01}\u{308}", "\u{D}"]),
2994             ("\u{AC01}\u{A}", &["\u{AC01}", "\u{A}"]),
2995             ("\u{AC01}\u{308}\u{A}", &["\u{AC01}\u{308}", "\u{A}"]),
2996             ("\u{AC01}\u{1}", &["\u{AC01}", "\u{1}"]),
2997             ("\u{AC01}\u{308}\u{1}", &["\u{AC01}\u{308}", "\u{1}"]),
2998             ("\u{AC01}\u{300}", &["\u{AC01}\u{300}"]),
2999             ("\u{AC01}\u{308}\u{300}", &["\u{AC01}\u{308}\u{300}"]),
3000             ("\u{AC01}\u{1100}", &["\u{AC01}", "\u{1100}"]),
3001             ("\u{AC01}\u{308}\u{1100}", &["\u{AC01}\u{308}", "\u{1100}"]),
3002             ("\u{AC01}\u{1160}", &["\u{AC01}", "\u{1160}"]),
3003             ("\u{AC01}\u{308}\u{1160}", &["\u{AC01}\u{308}", "\u{1160}"]),
3004             ("\u{AC01}\u{11A8}", &["\u{AC01}\u{11A8}"]),
3005             ("\u{AC01}\u{308}\u{11A8}", &["\u{AC01}\u{308}", "\u{11A8}"]),
3006             ("\u{AC01}\u{AC00}", &["\u{AC01}", "\u{AC00}"]),
3007             ("\u{AC01}\u{308}\u{AC00}", &["\u{AC01}\u{308}", "\u{AC00}"]),
3008             ("\u{AC01}\u{AC01}", &["\u{AC01}", "\u{AC01}"]),
3009             ("\u{AC01}\u{308}\u{AC01}", &["\u{AC01}\u{308}", "\u{AC01}"]),
3010             ("\u{AC01}\u{1F1E6}", &["\u{AC01}", "\u{1F1E6}"]),
3011             ("\u{AC01}\u{308}\u{1F1E6}", &["\u{AC01}\u{308}", "\u{1F1E6}"]),
3012             ("\u{AC01}\u{378}", &["\u{AC01}", "\u{378}"]),
3013             ("\u{AC01}\u{308}\u{378}", &["\u{AC01}\u{308}", "\u{378}"]),
3014             ("\u{1F1E6}\u{20}", &["\u{1F1E6}", "\u{20}"]),
3015             ("\u{1F1E6}\u{308}\u{20}", &["\u{1F1E6}\u{308}", "\u{20}"]),
3016             ("\u{1F1E6}\u{D}", &["\u{1F1E6}", "\u{D}"]),
3017             ("\u{1F1E6}\u{308}\u{D}", &["\u{1F1E6}\u{308}", "\u{D}"]),
3018             ("\u{1F1E6}\u{A}", &["\u{1F1E6}", "\u{A}"]),
3019             ("\u{1F1E6}\u{308}\u{A}", &["\u{1F1E6}\u{308}", "\u{A}"]),
3020             ("\u{1F1E6}\u{1}", &["\u{1F1E6}", "\u{1}"]),
3021             ("\u{1F1E6}\u{308}\u{1}", &["\u{1F1E6}\u{308}", "\u{1}"]),
3022             ("\u{1F1E6}\u{300}", &["\u{1F1E6}\u{300}"]),
3023             ("\u{1F1E6}\u{308}\u{300}", &["\u{1F1E6}\u{308}\u{300}"]),
3024             ("\u{1F1E6}\u{1100}", &["\u{1F1E6}", "\u{1100}"]),
3025             ("\u{1F1E6}\u{308}\u{1100}", &["\u{1F1E6}\u{308}", "\u{1100}"]),
3026             ("\u{1F1E6}\u{1160}", &["\u{1F1E6}", "\u{1160}"]),
3027             ("\u{1F1E6}\u{308}\u{1160}", &["\u{1F1E6}\u{308}", "\u{1160}"]),
3028             ("\u{1F1E6}\u{11A8}", &["\u{1F1E6}", "\u{11A8}"]),
3029             ("\u{1F1E6}\u{308}\u{11A8}", &["\u{1F1E6}\u{308}", "\u{11A8}"]),
3030             ("\u{1F1E6}\u{AC00}", &["\u{1F1E6}", "\u{AC00}"]),
3031             ("\u{1F1E6}\u{308}\u{AC00}", &["\u{1F1E6}\u{308}", "\u{AC00}"]),
3032             ("\u{1F1E6}\u{AC01}", &["\u{1F1E6}", "\u{AC01}"]),
3033             ("\u{1F1E6}\u{308}\u{AC01}", &["\u{1F1E6}\u{308}", "\u{AC01}"]),
3034             ("\u{1F1E6}\u{1F1E6}", &["\u{1F1E6}\u{1F1E6}"]),
3035             ("\u{1F1E6}\u{308}\u{1F1E6}", &["\u{1F1E6}\u{308}", "\u{1F1E6}"]),
3036             ("\u{1F1E6}\u{378}", &["\u{1F1E6}", "\u{378}"]),
3037             ("\u{1F1E6}\u{308}\u{378}", &["\u{1F1E6}\u{308}", "\u{378}"]),
3038             ("\u{378}\u{20}", &["\u{378}", "\u{20}"]),
3039             ("\u{378}\u{308}\u{20}", &["\u{378}\u{308}", "\u{20}"]),
3040             ("\u{378}\u{D}", &["\u{378}", "\u{D}"]),
3041             ("\u{378}\u{308}\u{D}", &["\u{378}\u{308}", "\u{D}"]),
3042             ("\u{378}\u{A}", &["\u{378}", "\u{A}"]),
3043             ("\u{378}\u{308}\u{A}", &["\u{378}\u{308}", "\u{A}"]),
3044             ("\u{378}\u{1}", &["\u{378}", "\u{1}"]),
3045             ("\u{378}\u{308}\u{1}", &["\u{378}\u{308}", "\u{1}"]),
3046             ("\u{378}\u{300}", &["\u{378}\u{300}"]),
3047             ("\u{378}\u{308}\u{300}", &["\u{378}\u{308}\u{300}"]),
3048             ("\u{378}\u{1100}", &["\u{378}", "\u{1100}"]),
3049             ("\u{378}\u{308}\u{1100}", &["\u{378}\u{308}", "\u{1100}"]),
3050             ("\u{378}\u{1160}", &["\u{378}", "\u{1160}"]),
3051             ("\u{378}\u{308}\u{1160}", &["\u{378}\u{308}", "\u{1160}"]),
3052             ("\u{378}\u{11A8}", &["\u{378}", "\u{11A8}"]),
3053             ("\u{378}\u{308}\u{11A8}", &["\u{378}\u{308}", "\u{11A8}"]),
3054             ("\u{378}\u{AC00}", &["\u{378}", "\u{AC00}"]),
3055             ("\u{378}\u{308}\u{AC00}", &["\u{378}\u{308}", "\u{AC00}"]),
3056             ("\u{378}\u{AC01}", &["\u{378}", "\u{AC01}"]),
3057             ("\u{378}\u{308}\u{AC01}", &["\u{378}\u{308}", "\u{AC01}"]),
3058             ("\u{378}\u{1F1E6}", &["\u{378}", "\u{1F1E6}"]),
3059             ("\u{378}\u{308}\u{1F1E6}", &["\u{378}\u{308}", "\u{1F1E6}"]),
3060             ("\u{378}\u{378}", &["\u{378}", "\u{378}"]),
3061             ("\u{378}\u{308}\u{378}", &["\u{378}\u{308}", "\u{378}"]),
3062             ("\u{61}\u{1F1E6}\u{62}", &["\u{61}", "\u{1F1E6}", "\u{62}"]),
3063             ("\u{1F1F7}\u{1F1FA}", &["\u{1F1F7}\u{1F1FA}"]),
3064             ("\u{1F1F7}\u{1F1FA}\u{1F1F8}", &["\u{1F1F7}\u{1F1FA}\u{1F1F8}"]),
3065             ("\u{1F1F7}\u{1F1FA}\u{1F1F8}\u{1F1EA}",
3066             &["\u{1F1F7}\u{1F1FA}\u{1F1F8}\u{1F1EA}"]),
3067             ("\u{1F1F7}\u{1F1FA}\u{200B}\u{1F1F8}\u{1F1EA}",
3068              &["\u{1F1F7}\u{1F1FA}", "\u{200B}", "\u{1F1F8}\u{1F1EA}"]),
3069             ("\u{1F1E6}\u{1F1E7}\u{1F1E8}", &["\u{1F1E6}\u{1F1E7}\u{1F1E8}"]),
3070             ("\u{1F1E6}\u{200D}\u{1F1E7}\u{1F1E8}", &["\u{1F1E6}\u{200D}",
3071              "\u{1F1E7}\u{1F1E8}"]),
3072             ("\u{1F1E6}\u{1F1E7}\u{200D}\u{1F1E8}",
3073              &["\u{1F1E6}\u{1F1E7}\u{200D}", "\u{1F1E8}"]),
3074             ("\u{20}\u{200D}\u{646}", &["\u{20}\u{200D}", "\u{646}"]),
3075             ("\u{646}\u{200D}\u{20}", &["\u{646}\u{200D}", "\u{20}"]),
3076         ];
3077
3078         let test_diff: [(_, &[_], &[_]), .. 23] = [
3079             ("\u{20}\u{903}", &["\u{20}\u{903}"], &["\u{20}", "\u{903}"]), ("\u{20}\u{308}\u{903}",
3080             &["\u{20}\u{308}\u{903}"], &["\u{20}\u{308}", "\u{903}"]), ("\u{D}\u{308}\u{903}",
3081             &["\u{D}", "\u{308}\u{903}"], &["\u{D}", "\u{308}", "\u{903}"]), ("\u{A}\u{308}\u{903}",
3082             &["\u{A}", "\u{308}\u{903}"], &["\u{A}", "\u{308}", "\u{903}"]), ("\u{1}\u{308}\u{903}",
3083             &["\u{1}", "\u{308}\u{903}"], &["\u{1}", "\u{308}", "\u{903}"]), ("\u{300}\u{903}",
3084             &["\u{300}\u{903}"], &["\u{300}", "\u{903}"]), ("\u{300}\u{308}\u{903}",
3085             &["\u{300}\u{308}\u{903}"], &["\u{300}\u{308}", "\u{903}"]), ("\u{903}\u{903}",
3086             &["\u{903}\u{903}"], &["\u{903}", "\u{903}"]), ("\u{903}\u{308}\u{903}",
3087             &["\u{903}\u{308}\u{903}"], &["\u{903}\u{308}", "\u{903}"]), ("\u{1100}\u{903}",
3088             &["\u{1100}\u{903}"], &["\u{1100}", "\u{903}"]), ("\u{1100}\u{308}\u{903}",
3089             &["\u{1100}\u{308}\u{903}"], &["\u{1100}\u{308}", "\u{903}"]), ("\u{1160}\u{903}",
3090             &["\u{1160}\u{903}"], &["\u{1160}", "\u{903}"]), ("\u{1160}\u{308}\u{903}",
3091             &["\u{1160}\u{308}\u{903}"], &["\u{1160}\u{308}", "\u{903}"]), ("\u{11A8}\u{903}",
3092             &["\u{11A8}\u{903}"], &["\u{11A8}", "\u{903}"]), ("\u{11A8}\u{308}\u{903}",
3093             &["\u{11A8}\u{308}\u{903}"], &["\u{11A8}\u{308}", "\u{903}"]), ("\u{AC00}\u{903}",
3094             &["\u{AC00}\u{903}"], &["\u{AC00}", "\u{903}"]), ("\u{AC00}\u{308}\u{903}",
3095             &["\u{AC00}\u{308}\u{903}"], &["\u{AC00}\u{308}", "\u{903}"]), ("\u{AC01}\u{903}",
3096             &["\u{AC01}\u{903}"], &["\u{AC01}", "\u{903}"]), ("\u{AC01}\u{308}\u{903}",
3097             &["\u{AC01}\u{308}\u{903}"], &["\u{AC01}\u{308}", "\u{903}"]), ("\u{1F1E6}\u{903}",
3098             &["\u{1F1E6}\u{903}"], &["\u{1F1E6}", "\u{903}"]), ("\u{1F1E6}\u{308}\u{903}",
3099             &["\u{1F1E6}\u{308}\u{903}"], &["\u{1F1E6}\u{308}", "\u{903}"]), ("\u{378}\u{903}",
3100             &["\u{378}\u{903}"], &["\u{378}", "\u{903}"]), ("\u{378}\u{308}\u{903}",
3101             &["\u{378}\u{308}\u{903}"], &["\u{378}\u{308}", "\u{903}"]),
3102         ];
3103
3104         for &(s, g) in test_same.iter() {
3105             // test forward iterator
3106             assert!(order::equals(s.graphemes(true), g.iter().map(|&x| x)));
3107             assert!(order::equals(s.graphemes(false), g.iter().map(|&x| x)));
3108
3109             // test reverse iterator
3110             assert!(order::equals(s.graphemes(true).rev(), g.iter().rev().map(|&x| x)));
3111             assert!(order::equals(s.graphemes(false).rev(), g.iter().rev().map(|&x| x)));
3112         }
3113
3114         for &(s, gt, gf) in test_diff.iter() {
3115             // test forward iterator
3116             assert!(order::equals(s.graphemes(true), gt.iter().map(|&x| x)));
3117             assert!(order::equals(s.graphemes(false), gf.iter().map(|&x| x)));
3118
3119             // test reverse iterator
3120             assert!(order::equals(s.graphemes(true).rev(), gt.iter().rev().map(|&x| x)));
3121             assert!(order::equals(s.graphemes(false).rev(), gf.iter().rev().map(|&x| x)));
3122         }
3123
3124         // test the indices iterators
3125         let s = "a̐éö̲\r\n";
3126         let gr_inds = s.grapheme_indices(true).collect::<Vec<(uint, &str)>>();
3127         let b: &[_] = &[(0u, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
3128         assert_eq!(gr_inds, b);
3129         let gr_inds = s.grapheme_indices(true).rev().collect::<Vec<(uint, &str)>>();
3130         let b: &[_] = &[(11, "\r\n"), (6, "ö̲"), (3, "é"), (0u, "a̐")];
3131         assert_eq!(gr_inds, b);
3132         let mut gr_inds_iter = s.grapheme_indices(true);
3133         {
3134             let gr_inds = gr_inds_iter.by_ref();
3135             let e1 = gr_inds.size_hint();
3136             assert_eq!(e1, (1, Some(13)));
3137             let c = gr_inds.count();
3138             assert_eq!(c, 4);
3139         }
3140         let e2 = gr_inds_iter.size_hint();
3141         assert_eq!(e2, (0, Some(0)));
3142
3143         // make sure the reverse iterator does the right thing with "\n" at beginning of string
3144         let s = "\n\r\n\r";
3145         let gr = s.graphemes(true).rev().collect::<Vec<&str>>();
3146         let b: &[_] = &["\r", "\r\n", "\n"];
3147         assert_eq!(gr, b);
3148     }
3149
3150     #[test]
3151     fn test_split_strator() {
3152         fn t(s: &str, sep: &str, u: &[&str]) {
3153             let v: Vec<&str> = s.split_str(sep).collect();
3154             assert_eq!(v, u);
3155         }
3156         t("--1233345--", "12345", &["--1233345--"]);
3157         t("abc::hello::there", "::", &["abc", "hello", "there"]);
3158         t("::hello::there", "::", &["", "hello", "there"]);
3159         t("hello::there::", "::", &["hello", "there", ""]);
3160         t("::hello::there::", "::", &["", "hello", "there", ""]);
3161         t("ประเทศไทย中华Việt Nam", "中华", &["ประเทศไทย", "Việt Nam"]);
3162         t("zzXXXzzYYYzz", "zz", &["", "XXX", "YYY", ""]);
3163         t("zzXXXzYYYz", "XXX", &["zz", "zYYYz"]);
3164         t(".XXX.YYY.", ".", &["", "XXX", "YYY", ""]);
3165         t("", ".", &[""]);
3166         t("zz", "zz", &["",""]);
3167         t("ok", "z", &["ok"]);
3168         t("zzz", "zz", &["","z"]);
3169         t("zzzzz", "zz", &["","","z"]);
3170     }
3171
3172     #[test]
3173     fn test_str_default() {
3174         use core::default::Default;
3175         fn t<S: Default + Str>() {
3176             let s: S = Default::default();
3177             assert_eq!(s.as_slice(), "");
3178         }
3179
3180         t::<&str>();
3181         t::<String>();
3182     }
3183
3184     #[test]
3185     fn test_str_container() {
3186         fn sum_len(v: &[&str]) -> uint {
3187             v.iter().map(|x| x.len()).sum()
3188         }
3189
3190         let s = String::from_str("01234");
3191         assert_eq!(5, sum_len(&["012", "", "34"]));
3192         assert_eq!(5, sum_len(&[String::from_str("01").as_slice(),
3193                                 String::from_str("2").as_slice(),
3194                                 String::from_str("34").as_slice(),
3195                                 String::from_str("").as_slice()]));
3196         assert_eq!(5, sum_len(&[s.as_slice()]));
3197     }
3198
3199     #[test]
3200     fn test_str_from_utf8() {
3201         let xs = b"hello";
3202         assert_eq!(from_utf8(xs), Ok("hello"));
3203
3204         let xs = "ศไทย中华Việt Nam".as_bytes();
3205         assert_eq!(from_utf8(xs), Ok("ศไทย中华Việt Nam"));
3206
3207         let xs = b"hello\xFF";
3208         assert_eq!(from_utf8(xs), Err(Utf8Error::TooShort));
3209     }
3210
3211     #[test]
3212     fn test_maybe_owned_traits() {
3213         let s = Slice("abcde");
3214         assert_eq!(s.len(), 5);
3215         assert_eq!(s.as_slice(), "abcde");
3216         assert_eq!(String::from_str(s.as_slice()).as_slice(), "abcde");
3217         assert_eq!(format!("{}", s).as_slice(), "abcde");
3218         assert!(s.lt(&Owned(String::from_str("bcdef"))));
3219         assert_eq!(Slice(""), Default::default());
3220
3221         let o = Owned(String::from_str("abcde"));
3222         assert_eq!(o.len(), 5);
3223         assert_eq!(o.as_slice(), "abcde");
3224         assert_eq!(String::from_str(o.as_slice()).as_slice(), "abcde");
3225         assert_eq!(format!("{}", o).as_slice(), "abcde");
3226         assert!(o.lt(&Slice("bcdef")));
3227         assert_eq!(Owned(String::from_str("")), Default::default());
3228
3229         assert!(s.cmp(&o) == Equal);
3230         assert!(s.equiv(&o));
3231
3232         assert!(o.cmp(&s) == Equal);
3233         assert!(o.equiv(&s));
3234     }
3235
3236     #[test]
3237     fn test_maybe_owned_methods() {
3238         let s = Slice("abcde");
3239         assert!(s.is_slice());
3240         assert!(!s.is_owned());
3241
3242         let o = Owned(String::from_str("abcde"));
3243         assert!(!o.is_slice());
3244         assert!(o.is_owned());
3245     }
3246
3247     #[test]
3248     fn test_maybe_owned_clone() {
3249         assert_eq!(Owned(String::from_str("abcde")), Slice("abcde").clone());
3250         assert_eq!(Owned(String::from_str("abcde")), Owned(String::from_str("abcde")).clone());
3251         assert_eq!(Slice("abcde"), Slice("abcde").clone());
3252         assert_eq!(Slice("abcde"), Owned(String::from_str("abcde")).clone());
3253     }
3254
3255     #[test]
3256     fn test_maybe_owned_into_string() {
3257         assert_eq!(Slice("abcde").to_string(), String::from_str("abcde"));
3258         assert_eq!(Owned(String::from_str("abcde")).to_string(),
3259                    String::from_str("abcde"));
3260     }
3261
3262     #[test]
3263     fn test_into_maybe_owned() {
3264         assert_eq!("abcde".into_maybe_owned(), Slice("abcde"));
3265         assert_eq!((String::from_str("abcde")).into_maybe_owned(), Slice("abcde"));
3266         assert_eq!("abcde".into_maybe_owned(), Owned(String::from_str("abcde")));
3267         assert_eq!((String::from_str("abcde")).into_maybe_owned(),
3268                    Owned(String::from_str("abcde")));
3269     }
3270 }
3271
3272 #[cfg(test)]
3273 mod bench {
3274     use super::*;
3275     use prelude::{SliceExt, IteratorExt, DoubleEndedIteratorExt, SliceConcatExt};
3276     use test::Bencher;
3277     use test::black_box;
3278
3279     #[bench]
3280     fn char_iterator(b: &mut Bencher) {
3281         let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
3282
3283         b.iter(|| s.chars().count());
3284     }
3285
3286     #[bench]
3287     fn char_iterator_for(b: &mut Bencher) {
3288         let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
3289
3290         b.iter(|| {
3291             for ch in s.chars() { black_box(ch) }
3292         });
3293     }
3294
3295     #[bench]
3296     fn char_iterator_ascii(b: &mut Bencher) {
3297         let s = "Mary had a little lamb, Little lamb
3298         Mary had a little lamb, Little lamb
3299         Mary had a little lamb, Little lamb
3300         Mary had a little lamb, Little lamb
3301         Mary had a little lamb, Little lamb
3302         Mary had a little lamb, Little lamb";
3303
3304         b.iter(|| s.chars().count());
3305     }
3306
3307     #[bench]
3308     fn char_iterator_rev(b: &mut Bencher) {
3309         let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
3310
3311         b.iter(|| s.chars().rev().count());
3312     }
3313
3314     #[bench]
3315     fn char_iterator_rev_for(b: &mut Bencher) {
3316         let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
3317
3318         b.iter(|| {
3319             for ch in s.chars().rev() { black_box(ch) }
3320         });
3321     }
3322
3323     #[bench]
3324     fn char_indicesator(b: &mut Bencher) {
3325         let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
3326         let len = s.char_len();
3327
3328         b.iter(|| assert_eq!(s.char_indices().count(), len));
3329     }
3330
3331     #[bench]
3332     fn char_indicesator_rev(b: &mut Bencher) {
3333         let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
3334         let len = s.char_len();
3335
3336         b.iter(|| assert_eq!(s.char_indices().rev().count(), len));
3337     }
3338
3339     #[bench]
3340     fn split_unicode_ascii(b: &mut Bencher) {
3341         let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
3342
3343         b.iter(|| assert_eq!(s.split('V').count(), 3));
3344     }
3345
3346     #[bench]
3347     fn split_unicode_not_ascii(b: &mut Bencher) {
3348         struct NotAscii(char);
3349         impl CharEq for NotAscii {
3350             fn matches(&mut self, c: char) -> bool {
3351                 let NotAscii(cc) = *self;
3352                 cc == c
3353             }
3354             fn only_ascii(&self) -> bool { false }
3355         }
3356         let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
3357
3358         b.iter(|| assert_eq!(s.split(NotAscii('V')).count(), 3));
3359     }
3360
3361
3362     #[bench]
3363     fn split_ascii(b: &mut Bencher) {
3364         let s = "Mary had a little lamb, Little lamb, little-lamb.";
3365         let len = s.split(' ').count();
3366
3367         b.iter(|| assert_eq!(s.split(' ').count(), len));
3368     }
3369
3370     #[bench]
3371     fn split_not_ascii(b: &mut Bencher) {
3372         struct NotAscii(char);
3373         impl CharEq for NotAscii {
3374             #[inline]
3375             fn matches(&mut self, c: char) -> bool {
3376                 let NotAscii(cc) = *self;
3377                 cc == c
3378             }
3379             fn only_ascii(&self) -> bool { false }
3380         }
3381         let s = "Mary had a little lamb, Little lamb, little-lamb.";
3382         let len = s.split(' ').count();
3383
3384         b.iter(|| assert_eq!(s.split(NotAscii(' ')).count(), len));
3385     }
3386
3387     #[bench]
3388     fn split_extern_fn(b: &mut Bencher) {
3389         let s = "Mary had a little lamb, Little lamb, little-lamb.";
3390         let len = s.split(' ').count();
3391         fn pred(c: char) -> bool { c == ' ' }
3392
3393         b.iter(|| assert_eq!(s.split(pred).count(), len));
3394     }
3395
3396     #[bench]
3397     fn split_closure(b: &mut Bencher) {
3398         let s = "Mary had a little lamb, Little lamb, little-lamb.";
3399         let len = s.split(' ').count();
3400
3401         b.iter(|| assert_eq!(s.split(|&: c: char| c == ' ').count(), len));
3402     }
3403
3404     #[bench]
3405     fn split_slice(b: &mut Bencher) {
3406         let s = "Mary had a little lamb, Little lamb, little-lamb.";
3407         let len = s.split(' ').count();
3408
3409         let c: &[char] = &[' '];
3410         b.iter(|| assert_eq!(s.split(c).count(), len));
3411     }
3412
3413     #[bench]
3414     fn is_utf8_100_ascii(b: &mut Bencher) {
3415
3416         let s = b"Hello there, the quick brown fox jumped over the lazy dog! \
3417                   Lorem ipsum dolor sit amet, consectetur. ";
3418
3419         assert_eq!(100, s.len());
3420         b.iter(|| {
3421             is_utf8(s)
3422         });
3423     }
3424
3425     #[bench]
3426     fn is_utf8_100_multibyte(b: &mut Bencher) {
3427         let s = "𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰".as_bytes();
3428         assert_eq!(100, s.len());
3429         b.iter(|| {
3430             is_utf8(s)
3431         });
3432     }
3433
3434     #[bench]
3435     fn bench_connect(b: &mut Bencher) {
3436         let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
3437         let sep = "→";
3438         let v = vec![s, s, s, s, s, s, s, s, s, s];
3439         b.iter(|| {
3440             assert_eq!(v.connect(sep).len(), s.len() * 10 + sep.len() * 9);
3441         })
3442     }
3443
3444     #[bench]
3445     fn bench_contains_short_short(b: &mut Bencher) {
3446         let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
3447         let needle = "sit";
3448
3449         b.iter(|| {
3450             assert!(haystack.contains(needle));
3451         })
3452     }
3453
3454     #[bench]
3455     fn bench_contains_short_long(b: &mut Bencher) {
3456         let haystack = "\
3457 Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \
3458 ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \
3459 eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \
3460 sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \
3461 tempus vel, gravida nec quam.
3462
3463 In est dui, tincidunt sed tempus interdum, adipiscing laoreet ante. Etiam tempor, tellus quis \
3464 sagittis interdum, nulla purus mattis sem, quis auctor erat odio ac tellus. In nec nunc sit amet \
3465 diam volutpat molestie at sed ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan \
3466 lorem ac dignissim placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat. In vel \
3467 eleifend felis. Sed suscipit nulla lorem, sed mollis est sollicitudin et. Nam fermentum egestas \
3468 interdum. Curabitur ut nisi justo.
3469
3470 Sed sollicitudin ipsum tellus, ut condimentum leo eleifend nec. Cras ut velit ante. Phasellus nec \
3471 mollis odio. Mauris molestie erat in arcu mattis, at aliquet dolor vehicula. Quisque malesuada \
3472 lectus sit amet nisi pretium, a condimentum ipsum porta. Morbi at dapibus diam. Praesent egestas \
3473 est sed risus elementum, eu rutrum metus ultrices. Etiam fermentum consectetur magna, id rutrum \
3474 felis accumsan a. Aliquam ut pellentesque libero. Sed mi nulla, lobortis eu tortor id, suscipit \
3475 ultricies neque. Morbi iaculis sit amet risus at iaculis. Praesent eget ligula quis turpis \
3476 feugiat suscipit vel non arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. \
3477 Aliquam sit amet placerat lorem.
3478
3479 Cras a lacus vel ante posuere elementum. Nunc est leo, bibendum ut facilisis vel, bibendum at \
3480 mauris. Nullam adipiscing diam vel odio ornare, luctus adipiscing mi luctus. Nulla facilisi. \
3481 Mauris adipiscing bibendum neque, quis adipiscing lectus tempus et. Sed feugiat erat et nisl \
3482 lobortis pharetra. Donec vitae erat enim. Nullam sit amet felis et quam lacinia tincidunt. Aliquam \
3483 suscipit dapibus urna. Sed volutpat urna in magna pulvinar volutpat. Phasellus nec tellus ac diam \
3484 cursus accumsan.
3485
3486 Nam lectus enim, dapibus non nisi tempor, consectetur convallis massa. Maecenas eleifend dictum \
3487 feugiat. Etiam quis mauris vel risus luctus mattis a a nunc. Nullam orci quam, imperdiet id \
3488 vehicula in, porttitor ut nibh. Duis sagittis adipiscing nisl vitae congue. Donec mollis risus eu \
3489 leo suscipit, varius porttitor nulla porta. Pellentesque ut sem nec nisi euismod vehicula. Nulla \
3490 malesuada sollicitudin quam eu fermentum.";
3491         let needle = "english";
3492
3493         b.iter(|| {
3494             assert!(!haystack.contains(needle));
3495         })
3496     }
3497
3498     #[bench]
3499     fn bench_contains_bad_naive(b: &mut Bencher) {
3500         let haystack = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
3501         let needle = "aaaaaaaab";
3502
3503         b.iter(|| {
3504             assert!(!haystack.contains(needle));
3505         })
3506     }
3507
3508     #[bench]
3509     fn bench_contains_equal(b: &mut Bencher) {
3510         let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
3511         let needle = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
3512
3513         b.iter(|| {
3514             assert!(haystack.contains(needle));
3515         })
3516     }
3517 }