src/libstd/str.rs

   1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10
  11 /*!
  12
  13 Unicode string manipulation (`str` type)
  14
  15 # Basic Usage
  16
  17 Rust's string type is one of the core primitive types of the language. While
  18 represented by the name `str`, the name `str` is not actually a valid type in
  19 Rust. Each string must also be decorated with its ownership. This means that
  20 there are two common kinds of strings in Rust:
  21
  22 * `~str` - This is an owned string. This type obeys all of the normal semantics
  23            of the `Box<T>` types, meaning that it has one, and only one,
  24            owner. This type cannot be implicitly copied, and is moved out of
  25            when passed to other functions.
  26
  27 * `&str` - This is the borrowed string type. This type of string can only be
  28            created from the other kind of string. As the name "borrowed"
  29            implies, this type of string is owned elsewhere, and this string
  30            cannot be moved out of.
  31
  32 As an example, here's a few different kinds of strings.
  33
  34 ```rust
  35 fn main() {
  36     let owned_string = "I am an owned string".to_owned();
  37     let borrowed_string1 = "This string is borrowed with the 'static lifetime";
  38     let borrowed_string2: &str = owned_string;   // owned strings can be borrowed
  39 }
  40 ```
  41
  42 From the example above, you can see that Rust has 2 different kinds of string
  43 literals. The owned literals correspond to the owned string types, but the
  44 "borrowed literal" is actually more akin to C's concept of a static string.
  45
  46 When a string is declared without a `~` sigil, then the string is allocated
  47 statically in the rodata of the executable/library. The string then has the
  48 type `&'static str` meaning that the string is valid for the `'static`
  49 lifetime, otherwise known as the lifetime of the entire program. As can be
  50 inferred from the type, these static strings are not mutable.
  51
  52 # Mutability
  53
  54 Many languages have immutable strings by default, and Rust has a particular
  55 flavor on this idea. As with the rest of Rust types, strings are immutable by
  56 default. If a string is declared as `mut`, however, it may be mutated. This
  57 works the same way as the rest of Rust's type system in the sense that if
  58 there's a mutable reference to a string, there may only be one mutable reference
  59 to that string. With these guarantees, strings can easily transition between
  60 being mutable/immutable with the same benefits of having mutable strings in
  61 other languages.
  62
  63 # Representation
  64
  65 Rust's string type, `str`, is a sequence of unicode codepoints encoded as a
  66 stream of UTF-8 bytes. All safely-created strings are guaranteed to be validly
  67 encoded UTF-8 sequences. Additionally, strings are not null-terminated
  68 and can contain null codepoints.
  69
  70 The actual representation of strings have direct mappings to vectors:
  71
  72 * `~str` is the same as `~[u8]`
  73 * `&str` is the same as `&[u8]`
  74
  75 */
  76
  77 use char::Char;
  78 use char;
  79 use clone::Clone;
  80 use cmp::{Eq, TotalEq, Ord, TotalOrd, Equiv, Ordering};
  81 use container::Container;
  82 use default::Default;
  83 use fmt;
  84 use from_str::FromStr;
  85 use io::Writer;
  86 use iter::{Iterator, range, AdditiveIterator};
  87 use mem::transmute;
  88 use mem;
  89 use option::{None, Option, Some};
  90 use result::{Result, Ok, Err};
  91 use slice::Vector;
  92 use slice::{ImmutableVector, MutableVector, CloneableVector};
  93 use strbuf::StrBuf;
  94 use vec::Vec;
  95
  96 pub use core::str::{from_utf8, CharEq, Chars, CharOffsets};
  97 pub use core::str::{Bytes, CharSplits};
  98 pub use core::str::{CharSplitsN, Words, AnyLines, MatchIndices, StrSplits};
  99 pub use core::str::{eq_slice, eq, is_utf8, is_utf16, UTF16Items};
 100 pub use core::str::{UTF16Item, ScalarValue, LoneSurrogate, utf16_items};
 101 pub use core::str::{truncate_utf16_at_nul, utf8_char_width, CharRange};
 102 pub use core::str::{Str, StrSlice};
 103
 104 /*
 105 Section: Creating a string
 106 */
 107
 108 /// Consumes a vector of bytes to create a new utf-8 string.
 109 ///
 110 /// Returns `Err` with the original vector if the vector contains invalid
 111 /// UTF-8.
 112 pub fn from_utf8_owned(vv: ~[u8]) -> Result<~str, ~[u8]> {
 113     if is_utf8(vv) {
 114         Ok(unsafe { raw::from_utf8_owned(vv) })
 115     } else {
 116         Err(vv)
 117     }
 118 }
 119
 120 impl FromStr for ~str {
 121     #[inline]
 122     fn from_str(s: &str) -> Option<~str> { Some(s.to_owned()) }
 123 }
 124
 125 /// Convert a byte to a UTF-8 string
 126 ///
 127 /// # Failure
 128 ///
 129 /// Fails if invalid UTF-8
 130 pub fn from_byte(b: u8) -> ~str {
 131     assert!(b < 128u8);
 132     unsafe { ::mem::transmute(box [b]) }
 133 }
 134
 135 /// Convert a char to a string
 136 pub fn from_char(ch: char) -> ~str {
 137     let mut buf = StrBuf::new();
 138     buf.push_char(ch);
 139     buf.into_owned()
 140 }
 141
 142 /// Convert a vector of chars to a string
 143 pub fn from_chars(chs: &[char]) -> ~str {
 144     chs.iter().map(|c| *c).collect()
 145 }
 146
 147 /// Methods for vectors of strings
 148 pub trait StrVector {
 149     /// Concatenate a vector of strings.
 150     fn concat(&self) -> ~str;
 151
 152     /// Concatenate a vector of strings, placing a given separator between each.
 153     fn connect(&self, sep: &str) -> ~str;
 154 }
 155
 156 impl<'a, S: Str> StrVector for &'a [S] {
 157     fn concat(&self) -> ~str {
 158         if self.is_empty() { return "".to_owned(); }
 159
 160         // `len` calculation may overflow but push_str but will check boundaries
 161         let len = self.iter().map(|s| s.as_slice().len()).sum();
 162
 163         let mut result = StrBuf::with_capacity(len);
 164
 165         for s in self.iter() {
 166             result.push_str(s.as_slice())
 167         }
 168
 169         result.into_owned()
 170     }
 171
 172     fn connect(&self, sep: &str) -> ~str {
 173         if self.is_empty() { return "".to_owned(); }
 174
 175         // concat is faster
 176         if sep.is_empty() { return self.concat(); }
 177
 178         // this is wrong without the guarantee that `self` is non-empty
 179         // `len` calculation may overflow but push_str but will check boundaries
 180         let len = sep.len() * (self.len() - 1)
 181             + self.iter().map(|s| s.as_slice().len()).sum();
 182         let mut result = StrBuf::with_capacity(len);
 183         let mut first = true;
 184
 185         for s in self.iter() {
 186             if first {
 187                 first = false;
 188             } else {
 189                 result.push_str(sep);
 190             }
 191             result.push_str(s.as_slice());
 192         }
 193         result.into_owned()
 194     }
 195 }
 196
 197 impl<'a, S: Str> StrVector for Vec<S> {
 198     #[inline]
 199     fn concat(&self) -> ~str {
 200         self.as_slice().concat()
 201     }
 202
 203     #[inline]
 204     fn connect(&self, sep: &str) -> ~str {
 205         self.as_slice().connect(sep)
 206     }
 207 }
 208
 209 /*
 210 Section: Iterators
 211 */
 212
 213 // Helper functions used for Unicode normalization
 214 fn canonical_sort(comb: &mut [(char, u8)]) {
 215     use iter::range;
 216     use tuple::Tuple2;
 217
 218     let len = comb.len();
 219     for i in range(0, len) {
 220         let mut swapped = false;
 221         for j in range(1, len-i) {
 222             let class_a = *comb[j-1].ref1();
 223             let class_b = *comb[j].ref1();
 224             if class_a != 0 && class_b != 0 && class_a > class_b {
 225                 comb.swap(j-1, j);
 226                 swapped = true;
 227             }
 228         }
 229         if !swapped { break; }
 230     }
 231 }
 232
 233 #[deriving(Clone)]
 234 enum DecompositionType {
 235     Canonical,
 236     Compatible
 237 }
 238
 239 /// External iterator for a string's decomposition's characters.
 240 /// Use with the `std::iter` module.
 241 #[deriving(Clone)]
 242 pub struct Decompositions<'a> {
 243     kind: DecompositionType,
 244     iter: Chars<'a>,
 245     buffer: Vec<(char, u8)>,
 246     sorted: bool
 247 }
 248
 249 impl<'a> Iterator<char> for Decompositions<'a> {
 250     #[inline]
 251     fn next(&mut self) -> Option<char> {
 252         use unicode::normalization::canonical_combining_class;
 253
 254         match self.buffer.as_slice().head() {
 255             Some(&(c, 0)) => {
 256                 self.sorted = false;
 257                 self.buffer.shift();
 258                 return Some(c);
 259             }
 260             Some(&(c, _)) if self.sorted => {
 261                 self.buffer.shift();
 262                 return Some(c);
 263             }
 264             _ => self.sorted = false
 265         }
 266
 267         let decomposer = match self.kind {
 268             Canonical => char::decompose_canonical,
 269             Compatible => char::decompose_compatible
 270         };
 271
 272         if !self.sorted {
 273             for ch in self.iter {
 274                 let buffer = &mut self.buffer;
 275                 let sorted = &mut self.sorted;
 276                 decomposer(ch, |d| {
 277                     let class = canonical_combining_class(d);
 278                     if class == 0 && !*sorted {
 279                         canonical_sort(buffer.as_mut_slice());
 280                         *sorted = true;
 281                     }
 282                     buffer.push((d, class));
 283                 });
 284                 if *sorted { break }
 285             }
 286         }
 287
 288         if !self.sorted {
 289             canonical_sort(self.buffer.as_mut_slice());
 290             self.sorted = true;
 291         }
 292
 293         match self.buffer.shift() {
 294             Some((c, 0)) => {
 295                 self.sorted = false;
 296                 Some(c)
 297             }
 298             Some((c, _)) => Some(c),
 299             None => None
 300         }
 301     }
 302
 303     fn size_hint(&self) -> (uint, Option<uint>) {
 304         let (lower, _) = self.iter.size_hint();
 305         (lower, None)
 306     }
 307 }
 308
 309 /// Replace all occurrences of one string with another
 310 ///
 311 /// # Arguments
 312 ///
 313 /// * s - The string containing substrings to replace
 314 /// * from - The string to replace
 315 /// * to - The replacement string
 316 ///
 317 /// # Return value
 318 ///
 319 /// The original string with all occurrences of `from` replaced with `to`
 320 pub fn replace(s: &str, from: &str, to: &str) -> ~str {
 321     let mut result = StrBuf::new();
 322     let mut last_end = 0;
 323     for (start, end) in s.match_indices(from) {
 324         result.push_str(unsafe{raw::slice_bytes(s, last_end, start)});
 325         result.push_str(to);
 326         last_end = end;
 327     }
 328     result.push_str(unsafe{raw::slice_bytes(s, last_end, s.len())});
 329     result.into_owned()
 330 }
 331
 332 /*
 333 Section: Misc
 334 */
 335
 336 /// Decode a UTF-16 encoded vector `v` into a string, returning `None`
 337 /// if `v` contains any invalid data.
 338 ///
 339 /// # Example
 340 ///
 341 /// ```rust
 342 /// use std::str;
 343 ///
 344 /// // 𝄞music
 345 /// let mut v = [0xD834, 0xDD1E, 0x006d, 0x0075,
 346 ///              0x0073, 0x0069, 0x0063];
 347 /// assert_eq!(str::from_utf16(v), Some("𝄞music".to_owned()));
 348 ///
 349 /// // 𝄞mu<invalid>ic
 350 /// v[4] = 0xD800;
 351 /// assert_eq!(str::from_utf16(v), None);
 352 /// ```
 353 pub fn from_utf16(v: &[u16]) -> Option<~str> {
 354     let mut s = StrBuf::with_capacity(v.len() / 2);
 355     for c in utf16_items(v) {
 356         match c {
 357             ScalarValue(c) => s.push_char(c),
 358             LoneSurrogate(_) => return None
 359         }
 360     }
 361     Some(s.into_owned())
 362 }
 363
 364 /// Decode a UTF-16 encoded vector `v` into a string, replacing
 365 /// invalid data with the replacement character (U+FFFD).
 366 ///
 367 /// # Example
 368 /// ```rust
 369 /// use std::str;
 370 ///
 371 /// // 𝄞mus<invalid>ic<invalid>
 372 /// let v = [0xD834, 0xDD1E, 0x006d, 0x0075,
 373 ///          0x0073, 0xDD1E, 0x0069, 0x0063,
 374 ///          0xD834];
 375 ///
 376 /// assert_eq!(str::from_utf16_lossy(v),
 377 ///            "𝄞mus\uFFFDic\uFFFD".to_owned());
 378 /// ```
 379 pub fn from_utf16_lossy(v: &[u16]) -> ~str {
 380     utf16_items(v).map(|c| c.to_char_lossy()).collect()
 381 }
 382
 383 // Return the initial codepoint accumulator for the first byte.
 384 // The first byte is special, only want bottom 5 bits for width 2, 4 bits
 385 // for width 3, and 3 bits for width 4
 386 macro_rules! utf8_first_byte(
 387     ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as u32)
 388 )
 389
 390 // return the value of $ch updated with continuation byte $byte
 391 macro_rules! utf8_acc_cont_byte(
 392     ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as u32)
 393 )
 394
 395 static TAG_CONT_U8: u8 = 128u8;
 396
 397 /// Converts a vector of bytes to a new utf-8 string.
 398 /// Any invalid utf-8 sequences are replaced with U+FFFD REPLACEMENT CHARACTER.
 399 ///
 400 /// # Example
 401 ///
 402 /// ```rust
 403 /// let input = bytes!("Hello ", 0xF0, 0x90, 0x80, "World");
 404 /// let output = std::str::from_utf8_lossy(input);
 405 /// assert_eq!(output.as_slice(), "Hello \uFFFDWorld");
 406 /// ```
 407 pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> MaybeOwned<'a> {
 408     if is_utf8(v) {
 409         return Slice(unsafe { mem::transmute(v) })
 410     }
 411
 412     static REPLACEMENT: &'static [u8] = bytes!(0xEF, 0xBF, 0xBD); // U+FFFD in UTF-8
 413     let mut i = 0;
 414     let total = v.len();
 415     fn unsafe_get(xs: &[u8], i: uint) -> u8 {
 416         unsafe { *xs.unsafe_ref(i) }
 417     }
 418     fn safe_get(xs: &[u8], i: uint, total: uint) -> u8 {
 419         if i >= total {
 420             0
 421         } else {
 422             unsafe_get(xs, i)
 423         }
 424     }
 425
 426     let mut res = StrBuf::with_capacity(total);
 427
 428     if i > 0 {
 429         unsafe {
 430             res.push_bytes(v.slice_to(i))
 431         };
 432     }
 433
 434     // subseqidx is the index of the first byte of the subsequence we're looking at.
 435     // It's used to copy a bunch of contiguous good codepoints at once instead of copying
 436     // them one by one.
 437     let mut subseqidx = 0;
 438
 439     while i < total {
 440         let i_ = i;
 441         let byte = unsafe_get(v, i);
 442         i += 1;
 443
 444         macro_rules! error(() => ({
 445             unsafe {
 446                 if subseqidx != i_ {
 447                     res.push_bytes(v.slice(subseqidx, i_));
 448                 }
 449                 subseqidx = i;
 450                 res.push_bytes(REPLACEMENT);
 451             }
 452         }))
 453
 454         if byte < 128u8 {
 455             // subseqidx handles this
 456         } else {
 457             let w = utf8_char_width(byte);
 458
 459             match w {
 460                 2 => {
 461                     if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
 462                         error!();
 463                         continue;
 464                     }
 465                     i += 1;
 466                 }
 467                 3 => {
 468                     match (byte, safe_get(v, i, total)) {
 469                         (0xE0        , 0xA0 .. 0xBF) => (),
 470                         (0xE1 .. 0xEC, 0x80 .. 0xBF) => (),
 471                         (0xED        , 0x80 .. 0x9F) => (),
 472                         (0xEE .. 0xEF, 0x80 .. 0xBF) => (),
 473                         _ => {
 474                             error!();
 475                             continue;
 476                         }
 477                     }
 478                     i += 1;
 479                     if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
 480                         error!();
 481                         continue;
 482                     }
 483                     i += 1;
 484                 }
 485                 4 => {
 486                     match (byte, safe_get(v, i, total)) {
 487                         (0xF0        , 0x90 .. 0xBF) => (),
 488                         (0xF1 .. 0xF3, 0x80 .. 0xBF) => (),
 489                         (0xF4        , 0x80 .. 0x8F) => (),
 490                         _ => {
 491                             error!();
 492                             continue;
 493                         }
 494                     }
 495                     i += 1;
 496                     if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
 497                         error!();
 498                         continue;
 499                     }
 500                     i += 1;
 501                     if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
 502                         error!();
 503                         continue;
 504                     }
 505                     i += 1;
 506                 }
 507                 _ => {
 508                     error!();
 509                     continue;
 510                 }
 511             }
 512         }
 513     }
 514     if subseqidx < total {
 515         unsafe {
 516             res.push_bytes(v.slice(subseqidx, total))
 517         };
 518     }
 519     Owned(res.into_owned())
 520 }
 521
 522 /*
 523 Section: MaybeOwned
 524 */
 525
 526 /// A MaybeOwned is a string that can hold either a ~str or a &str.
 527 /// This can be useful as an optimization when an allocation is sometimes
 528 /// needed but not always.
 529 pub enum MaybeOwned<'a> {
 530     /// A borrowed string
 531     Slice(&'a str),
 532     /// An owned string
 533     Owned(~str)
 534 }
 535
 536 /// SendStr is a specialization of `MaybeOwned` to be sendable
 537 pub type SendStr = MaybeOwned<'static>;
 538
 539 impl<'a> MaybeOwned<'a> {
 540     /// Returns `true` if this `MaybeOwned` wraps an owned string
 541     #[inline]
 542     pub fn is_owned(&self) -> bool {
 543         match *self {
 544             Slice(_) => false,
 545             Owned(_) => true
 546         }
 547     }
 548
 549     /// Returns `true` if this `MaybeOwned` wraps a borrowed string
 550     #[inline]
 551     pub fn is_slice(&self) -> bool {
 552         match *self {
 553             Slice(_) => true,
 554             Owned(_) => false
 555         }
 556     }
 557 }
 558
 559 /// Trait for moving into a `MaybeOwned`
 560 pub trait IntoMaybeOwned<'a> {
 561     /// Moves self into a `MaybeOwned`
 562     fn into_maybe_owned(self) -> MaybeOwned<'a>;
 563 }
 564
 565 impl<'a> IntoMaybeOwned<'a> for ~str {
 566     #[inline]
 567     fn into_maybe_owned(self) -> MaybeOwned<'a> { Owned(self) }
 568 }
 569
 570 impl<'a> IntoMaybeOwned<'a> for StrBuf {
 571     #[inline]
 572     fn into_maybe_owned(self) -> MaybeOwned<'a> { Owned(self.into_owned()) }
 573 }
 574
 575 impl<'a> IntoMaybeOwned<'a> for &'a str {
 576     #[inline]
 577     fn into_maybe_owned(self) -> MaybeOwned<'a> { Slice(self) }
 578 }
 579
 580 impl<'a> IntoMaybeOwned<'a> for MaybeOwned<'a> {
 581     #[inline]
 582     fn into_maybe_owned(self) -> MaybeOwned<'a> { self }
 583 }
 584
 585 impl<'a> Eq for MaybeOwned<'a> {
 586     #[inline]
 587     fn eq(&self, other: &MaybeOwned) -> bool {
 588         self.as_slice() == other.as_slice()
 589     }
 590 }
 591
 592 impl<'a> TotalEq for MaybeOwned<'a> {}
 593
 594 impl<'a> Ord for MaybeOwned<'a> {
 595     #[inline]
 596     fn lt(&self, other: &MaybeOwned) -> bool {
 597         self.as_slice().lt(&other.as_slice())
 598     }
 599 }
 600
 601 impl<'a> TotalOrd for MaybeOwned<'a> {
 602     #[inline]
 603     fn cmp(&self, other: &MaybeOwned) -> Ordering {
 604         self.as_slice().cmp(&other.as_slice())
 605     }
 606 }
 607
 608 impl<'a, S: Str> Equiv<S> for MaybeOwned<'a> {
 609     #[inline]
 610     fn equiv(&self, other: &S) -> bool {
 611         self.as_slice() == other.as_slice()
 612     }
 613 }
 614
 615 impl<'a> Str for MaybeOwned<'a> {
 616     #[inline]
 617     fn as_slice<'b>(&'b self) -> &'b str {
 618         match *self {
 619             Slice(s) => s,
 620             Owned(ref s) => s.as_slice()
 621         }
 622     }
 623 }
 624
 625 impl<'a> StrAllocating for MaybeOwned<'a> {
 626     #[inline]
 627     fn into_owned(self) -> ~str {
 628         match self {
 629             Slice(s) => s.to_owned(),
 630             Owned(s) => s
 631         }
 632     }
 633 }
 634
 635 impl<'a> Container for MaybeOwned<'a> {
 636     #[inline]
 637     fn len(&self) -> uint { self.as_slice().len() }
 638 }
 639
 640 impl<'a> Clone for MaybeOwned<'a> {
 641     #[inline]
 642     fn clone(&self) -> MaybeOwned<'a> {
 643         match *self {
 644             Slice(s) => Slice(s),
 645             Owned(ref s) => Owned(s.to_owned())
 646         }
 647     }
 648 }
 649
 650 impl<'a> Default for MaybeOwned<'a> {
 651     #[inline]
 652     fn default() -> MaybeOwned<'a> { Slice("") }
 653 }
 654
 655 impl<'a, H: Writer> ::hash::Hash<H> for MaybeOwned<'a> {
 656     #[inline]
 657     fn hash(&self, hasher: &mut H) {
 658         match *self {
 659             Slice(s) => s.hash(hasher),
 660             Owned(ref s) => s.hash(hasher),
 661         }
 662     }
 663 }
 664
 665 impl<'a> fmt::Show for MaybeOwned<'a> {
 666     #[inline]
 667     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
 668         match *self {
 669             Slice(ref s) => s.fmt(f),
 670             Owned(ref s) => s.fmt(f)
 671         }
 672     }
 673 }
 674
 675 /// Unsafe operations
 676 pub mod raw {
 677     use libc;
 678     use mem;
 679     use ptr::RawPtr;
 680     use raw::Slice;
 681     use slice::CloneableVector;
 682     use str::{is_utf8, StrAllocating};
 683
 684     pub use core::str::raw::{from_utf8, c_str_to_static_slice, slice_bytes};
 685     pub use core::str::raw::{slice_unchecked};
 686
 687     /// Create a Rust string from a *u8 buffer of the given length
 688     pub unsafe fn from_buf_len(buf: *u8, len: uint) -> ~str {
 689         let v = Slice { data: buf, len: len };
 690         let bytes: &[u8] = ::mem::transmute(v);
 691         assert!(is_utf8(bytes));
 692         let s: &str = ::mem::transmute(bytes);
 693         s.to_owned()
 694     }
 695
 696     #[lang="strdup_uniq"]
 697     #[cfg(not(test))]
 698     #[inline]
 699     unsafe fn strdup_uniq(ptr: *u8, len: uint) -> ~str {
 700         from_buf_len(ptr, len)
 701     }
 702
 703     /// Create a Rust string from a null-terminated C string
 704     pub unsafe fn from_c_str(buf: *libc::c_char) -> ~str {
 705         let mut curr = buf;
 706         let mut i = 0;
 707         while *curr != 0 {
 708             i += 1;
 709             curr = buf.offset(i);
 710         }
 711         from_buf_len(buf as *u8, i as uint)
 712     }
 713
 714     /// Converts an owned vector of bytes to a new owned string. This assumes
 715     /// that the utf-8-ness of the vector has already been validated
 716     #[inline]
 717     pub unsafe fn from_utf8_owned(v: ~[u8]) -> ~str {
 718         mem::transmute(v)
 719     }
 720
 721     /// Converts a byte to a string.
 722     pub unsafe fn from_byte(u: u8) -> ~str { from_utf8_owned(box [u]) }
 723
 724     /// Access the str in its vector representation.
 725     /// The caller must preserve the valid UTF-8 property when modifying.
 726     #[inline]
 727     pub unsafe fn as_owned_vec<'a>(s: &'a mut ~str) -> &'a mut ~[u8] {
 728         mem::transmute(s)
 729     }
 730
 731     /// Sets the length of a string
 732     ///
 733     /// This will explicitly set the size of the string, without actually
 734     /// modifying its buffers, so it is up to the caller to ensure that
 735     /// the string is actually the specified size.
 736     #[test]
 737     fn test_from_buf_len() {
 738         use slice::ImmutableVector;
 739         use str::StrAllocating;
 740
 741         unsafe {
 742             let a = ~[65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8];
 743             let b = a.as_ptr();
 744             let c = from_buf_len(b, 3u);
 745             assert_eq!(c, "AAA".to_owned());
 746         }
 747     }
 748 }
 749
 750 /*
 751 Section: Trait implementations
 752 */
 753
 754 /// Any string that can be represented as a slice
 755 pub trait StrAllocating: Str {
 756     /// Convert `self` into a ~str, not making a copy if possible.
 757     fn into_owned(self) -> ~str;
 758
 759     /// Convert `self` into a `StrBuf`.
 760     #[inline]
 761     fn to_strbuf(&self) -> StrBuf {
 762         StrBuf::from_str(self.as_slice())
 763     }
 764
 765     /// Convert `self` into a `StrBuf`, not making a copy if possible.
 766     #[inline]
 767     fn into_strbuf(self) -> StrBuf {
 768         StrBuf::from_owned_str(self.into_owned())
 769     }
 770
 771     /// Escape each char in `s` with `char::escape_default`.
 772     fn escape_default(&self) -> ~str {
 773         let me = self.as_slice();
 774         let mut out = StrBuf::with_capacity(me.len());
 775         for c in me.chars() {
 776             c.escape_default(|c| out.push_char(c));
 777         }
 778         out.into_owned()
 779     }
 780
 781     /// Escape each char in `s` with `char::escape_unicode`.
 782     fn escape_unicode(&self) -> ~str {
 783         let me = self.as_slice();
 784         let mut out = StrBuf::with_capacity(me.len());
 785         for c in me.chars() {
 786             c.escape_unicode(|c| out.push_char(c));
 787         }
 788         out.into_owned()
 789     }
 790
 791     /// Replace all occurrences of one string with another.
 792     ///
 793     /// # Arguments
 794     ///
 795     /// * `from` - The string to replace
 796     /// * `to` - The replacement string
 797     ///
 798     /// # Return value
 799     ///
 800     /// The original string with all occurrences of `from` replaced with `to`.
 801     ///
 802     /// # Example
 803     ///
 804     /// ```rust
 805     /// let s = "Do you know the muffin man,
 806     /// The muffin man, the muffin man, ...".to_owned();
 807     ///
 808     /// assert_eq!(s.replace("muffin man", "little lamb"),
 809     ///            "Do you know the little lamb,
 810     /// The little lamb, the little lamb, ...".to_owned());
 811     ///
 812     /// // not found, so no change.
 813     /// assert_eq!(s.replace("cookie monster", "little lamb"), s);
 814     /// ```
 815     fn replace(&self, from: &str, to: &str) -> ~str {
 816         let me = self.as_slice();
 817         let mut result = StrBuf::new();
 818         let mut last_end = 0;
 819         for (start, end) in me.match_indices(from) {
 820             result.push_str(unsafe{raw::slice_bytes(me, last_end, start)});
 821             result.push_str(to);
 822             last_end = end;
 823         }
 824         result.push_str(unsafe{raw::slice_bytes(me, last_end, me.len())});
 825         result.into_owned()
 826     }
 827
 828     /// Copy a slice into a new owned str.
 829     #[inline]
 830     fn to_owned(&self) -> ~str {
 831         use slice::Vector;
 832
 833         unsafe {
 834             ::mem::transmute(self.as_slice().as_bytes().to_owned())
 835         }
 836     }
 837
 838     /// Converts to a vector of `u16` encoded as UTF-16.
 839     fn to_utf16(&self) -> Vec<u16> {
 840         let me = self.as_slice();
 841         let mut u = Vec::new();
 842         for ch in me.chars() {
 843             let mut buf = [0u16, ..2];
 844             let n = ch.encode_utf16(buf /* as mut slice! */);
 845             u.push_all(buf.slice_to(n));
 846         }
 847         u
 848     }
 849
 850     /// Given a string, make a new string with repeated copies of it.
 851     fn repeat(&self, nn: uint) -> ~str {
 852         let me = self.as_slice();
 853         let mut ret = StrBuf::with_capacity(nn * me.len());
 854         for _ in range(0, nn) {
 855             ret.push_str(me);
 856         }
 857         ret.into_owned()
 858     }
 859
 860     /// Levenshtein Distance between two strings.
 861     fn lev_distance(&self, t: &str) -> uint {
 862         let me = self.as_slice();
 863         let slen = me.len();
 864         let tlen = t.len();
 865
 866         if slen == 0 { return tlen; }
 867         if tlen == 0 { return slen; }
 868
 869         let mut dcol = Vec::from_fn(tlen + 1, |x| x);
 870
 871         for (i, sc) in me.chars().enumerate() {
 872
 873             let mut current = i;
 874             *dcol.get_mut(0) = current + 1;
 875
 876             for (j, tc) in t.chars().enumerate() {
 877
 878                 let next = *dcol.get(j + 1);
 879
 880                 if sc == tc {
 881                     *dcol.get_mut(j + 1) = current;
 882                 } else {
 883                     *dcol.get_mut(j + 1) = ::cmp::min(current, next);
 884                     *dcol.get_mut(j + 1) = ::cmp::min(*dcol.get(j + 1),
 885                                                       *dcol.get(j)) + 1;
 886                 }
 887
 888                 current = next;
 889             }
 890         }
 891
 892         return *dcol.get(tlen);
 893     }
 894
 895     /// An Iterator over the string in Unicode Normalization Form D
 896     /// (canonical decomposition).
 897     #[inline]
 898     fn nfd_chars<'a>(&'a self) -> Decompositions<'a> {
 899         Decompositions {
 900             iter: self.as_slice().chars(),
 901             buffer: Vec::new(),
 902             sorted: false,
 903             kind: Canonical
 904         }
 905     }
 906
 907     /// An Iterator over the string in Unicode Normalization Form KD
 908     /// (compatibility decomposition).
 909     #[inline]
 910     fn nfkd_chars<'a>(&'a self) -> Decompositions<'a> {
 911         Decompositions {
 912             iter: self.as_slice().chars(),
 913             buffer: Vec::new(),
 914             sorted: false,
 915             kind: Compatible
 916         }
 917     }
 918 }
 919
 920 impl<'a> StrAllocating for &'a str {
 921     #[inline]
 922     fn into_owned(self) -> ~str { self.to_owned() }
 923 }
 924
 925 impl<'a> StrAllocating for ~str {
 926     #[inline]
 927     fn into_owned(self) -> ~str { self }
 928 }
 929
 930 /// Methods for owned strings
 931 pub trait OwnedStr {
 932     /// Consumes the string, returning the underlying byte buffer.
 933     ///
 934     /// The buffer does not have a null terminator.
 935     fn into_bytes(self) -> ~[u8];
 936
 937     /// Pushes the given string onto this string, returning the concatenation of the two strings.
 938     fn append(self, rhs: &str) -> ~str;
 939 }
 940
 941 impl OwnedStr for ~str {
 942     #[inline]
 943     fn into_bytes(self) -> ~[u8] {
 944         unsafe { mem::transmute(self) }
 945     }
 946
 947     #[inline]
 948     fn append(self, rhs: &str) -> ~str {
 949         let mut new_str = StrBuf::from_owned_str(self);
 950         new_str.push_str(rhs);
 951         new_str.into_owned()
 952     }
 953 }
 954
 955 #[cfg(test)]
 956 mod tests {
 957     use iter::AdditiveIterator;
 958     use default::Default;
 959     use prelude::*;
 960     use str::*;
 961     use strbuf::StrBuf;
 962
 963     #[test]
 964     fn test_eq() {
 965         assert!((eq(&"".to_owned(), &"".to_owned())));
 966         assert!((eq(&"foo".to_owned(), &"foo".to_owned())));
 967         assert!((!eq(&"foo".to_owned(), &"bar".to_owned())));
 968     }
 969
 970     #[test]
 971     fn test_eq_slice() {
 972         assert!((eq_slice("foobar".slice(0, 3), "foo")));
 973         assert!((eq_slice("barfoo".slice(3, 6), "foo")));
 974         assert!((!eq_slice("foo1", "foo2")));
 975     }
 976
 977     #[test]
 978     fn test_le() {
 979         assert!("" <= "");
 980         assert!("" <= "foo");
 981         assert!("foo" <= "foo");
 982         assert!("foo" != "bar");
 983     }
 984
 985     #[test]
 986     fn test_len() {
 987         assert_eq!("".len(), 0u);
 988         assert_eq!("hello world".len(), 11u);
 989         assert_eq!("\x63".len(), 1u);
 990         assert_eq!("\xa2".len(), 2u);
 991         assert_eq!("\u03c0".len(), 2u);
 992         assert_eq!("\u2620".len(), 3u);
 993         assert_eq!("\U0001d11e".len(), 4u);
 994
 995         assert_eq!("".char_len(), 0u);
 996         assert_eq!("hello world".char_len(), 11u);
 997         assert_eq!("\x63".char_len(), 1u);
 998         assert_eq!("\xa2".char_len(), 1u);
 999         assert_eq!("\u03c0".char_len(), 1u);
1000         assert_eq!("\u2620".char_len(), 1u);
1001         assert_eq!("\U0001d11e".char_len(), 1u);
1002         assert_eq!("ประเทศไทย中华Việt Nam".char_len(), 19u);
1003     }
1004
1005     #[test]
1006     fn test_find() {
1007         assert_eq!("hello".find('l'), Some(2u));
1008         assert_eq!("hello".find(|c:char| c == 'o'), Some(4u));
1009         assert!("hello".find('x').is_none());
1010         assert!("hello".find(|c:char| c == 'x').is_none());
1011         assert_eq!("ประเทศไทย中华Việt Nam".find('华'), Some(30u));
1012         assert_eq!("ประเทศไทย中华Việt Nam".find(|c: char| c == '华'), Some(30u));
1013     }
1014
1015     #[test]
1016     fn test_rfind() {
1017         assert_eq!("hello".rfind('l'), Some(3u));
1018         assert_eq!("hello".rfind(|c:char| c == 'o'), Some(4u));
1019         assert!("hello".rfind('x').is_none());
1020         assert!("hello".rfind(|c:char| c == 'x').is_none());
1021         assert_eq!("ประเทศไทย中华Việt Nam".rfind('华'), Some(30u));
1022         assert_eq!("ประเทศไทย中华Việt Nam".rfind(|c: char| c == '华'), Some(30u));
1023     }
1024
1025     #[test]
1026     fn test_collect() {
1027         let empty = "".to_owned();
1028         let s: ~str = empty.chars().collect();
1029         assert_eq!(empty, s);
1030         let data = "ประเทศไทย中".to_owned();
1031         let s: ~str = data.chars().collect();
1032         assert_eq!(data, s);
1033     }
1034
1035     #[test]
1036     fn test_into_bytes() {
1037         let data = "asdf".to_owned();
1038         let buf = data.into_bytes();
1039         assert_eq!(bytes!("asdf"), buf.as_slice());
1040     }
1041
1042     #[test]
1043     fn test_find_str() {
1044         // byte positions
1045         assert_eq!("".find_str(""), Some(0u));
1046         assert!("banana".find_str("apple pie").is_none());
1047
1048         let data = "abcabc";
1049         assert_eq!(data.slice(0u, 6u).find_str("ab"), Some(0u));
1050         assert_eq!(data.slice(2u, 6u).find_str("ab"), Some(3u - 2u));
1051         assert!(data.slice(2u, 4u).find_str("ab").is_none());
1052
1053         let mut data = "ประเทศไทย中华Việt Nam".to_owned();
1054         data = data + data;
1055         assert!(data.find_str("ไท华").is_none());
1056         assert_eq!(data.slice(0u, 43u).find_str(""), Some(0u));
1057         assert_eq!(data.slice(6u, 43u).find_str(""), Some(6u - 6u));
1058
1059         assert_eq!(data.slice(0u, 43u).find_str("ประ"), Some( 0u));
1060         assert_eq!(data.slice(0u, 43u).find_str("ทศไ"), Some(12u));
1061         assert_eq!(data.slice(0u, 43u).find_str("ย中"), Some(24u));
1062         assert_eq!(data.slice(0u, 43u).find_str("iệt"), Some(34u));
1063         assert_eq!(data.slice(0u, 43u).find_str("Nam"), Some(40u));
1064
1065         assert_eq!(data.slice(43u, 86u).find_str("ประ"), Some(43u - 43u));
1066         assert_eq!(data.slice(43u, 86u).find_str("ทศไ"), Some(55u - 43u));
1067         assert_eq!(data.slice(43u, 86u).find_str("ย中"), Some(67u - 43u));
1068         assert_eq!(data.slice(43u, 86u).find_str("iệt"), Some(77u - 43u));
1069         assert_eq!(data.slice(43u, 86u).find_str("Nam"), Some(83u - 43u));
1070     }
1071
1072     #[test]
1073     fn test_slice_chars() {
1074         fn t(a: &str, b: &str, start: uint) {
1075             assert_eq!(a.slice_chars(start, start + b.char_len()), b);
1076         }
1077         t("", "", 0);
1078         t("hello", "llo", 2);
1079         t("hello", "el", 1);
1080         t("αβλ", "β", 1);
1081         t("αβλ", "", 3);
1082         assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".slice_chars(2, 8));
1083     }
1084
1085     #[test]
1086     fn test_concat() {
1087         fn t(v: &[~str], s: &str) {
1088             assert_eq!(v.concat(), s.to_str());
1089         }
1090         t(["you".to_owned(), "know".to_owned(), "I'm".to_owned(),
1091           "no".to_owned(), "good".to_owned()], "youknowI'mnogood");
1092         let v: &[~str] = [];
1093         t(v, "");
1094         t(["hi".to_owned()], "hi");
1095     }
1096
1097     #[test]
1098     fn test_connect() {
1099         fn t(v: &[~str], sep: &str, s: &str) {
1100             assert_eq!(v.connect(sep), s.to_str());
1101         }
1102         t(["you".to_owned(), "know".to_owned(), "I'm".to_owned(),
1103            "no".to_owned(), "good".to_owned()],
1104           " ", "you know I'm no good");
1105         let v: &[~str] = [];
1106         t(v, " ", "");
1107         t(["hi".to_owned()], " ", "hi");
1108     }
1109
1110     #[test]
1111     fn test_concat_slices() {
1112         fn t(v: &[&str], s: &str) {
1113             assert_eq!(v.concat(), s.to_str());
1114         }
1115         t(["you", "know", "I'm", "no", "good"], "youknowI'mnogood");
1116         let v: &[&str] = [];
1117         t(v, "");
1118         t(["hi"], "hi");
1119     }
1120
1121     #[test]
1122     fn test_connect_slices() {
1123         fn t(v: &[&str], sep: &str, s: &str) {
1124             assert_eq!(v.connect(sep), s.to_str());
1125         }
1126         t(["you", "know", "I'm", "no", "good"],
1127           " ", "you know I'm no good");
1128         t([], " ", "");
1129         t(["hi"], " ", "hi");
1130     }
1131
1132     #[test]
1133     fn test_repeat() {
1134         assert_eq!("x".repeat(4), "xxxx".to_owned());
1135         assert_eq!("hi".repeat(4), "hihihihi".to_owned());
1136         assert_eq!("ไท华".repeat(3), "ไท华ไท华ไท华".to_owned());
1137         assert_eq!("".repeat(4), "".to_owned());
1138         assert_eq!("hi".repeat(0), "".to_owned());
1139     }
1140
1141     #[test]
1142     fn test_unsafe_slice() {
1143         assert_eq!("ab", unsafe {raw::slice_bytes("abc", 0, 2)});
1144         assert_eq!("bc", unsafe {raw::slice_bytes("abc", 1, 3)});
1145         assert_eq!("", unsafe {raw::slice_bytes("abc", 1, 1)});
1146         fn a_million_letter_a() -> ~str {
1147             let mut i = 0;
1148             let mut rs = StrBuf::new();
1149             while i < 100000 {
1150                 rs.push_str("aaaaaaaaaa");
1151                 i += 1;
1152             }
1153             rs.into_owned()
1154         }
1155         fn half_a_million_letter_a() -> ~str {
1156             let mut i = 0;
1157             let mut rs = StrBuf::new();
1158             while i < 100000 {
1159                 rs.push_str("aaaaa");
1160                 i += 1;
1161             }
1162             rs.into_owned()
1163         }
1164         let letters = a_million_letter_a();
1165         assert!(half_a_million_letter_a() ==
1166             unsafe {raw::slice_bytes(letters, 0u, 500000)}.to_owned());
1167     }
1168
1169     #[test]
1170     fn test_starts_with() {
1171         assert!(("".starts_with("")));
1172         assert!(("abc".starts_with("")));
1173         assert!(("abc".starts_with("a")));
1174         assert!((!"a".starts_with("abc")));
1175         assert!((!"".starts_with("abc")));
1176         assert!((!"ödd".starts_with("-")));
1177         assert!(("ödd".starts_with("öd")));
1178     }
1179
1180     #[test]
1181     fn test_ends_with() {
1182         assert!(("".ends_with("")));
1183         assert!(("abc".ends_with("")));
1184         assert!(("abc".ends_with("c")));
1185         assert!((!"a".ends_with("abc")));
1186         assert!((!"".ends_with("abc")));
1187         assert!((!"ddö".ends_with("-")));
1188         assert!(("ddö".ends_with("dö")));
1189     }
1190
1191     #[test]
1192     fn test_is_empty() {
1193         assert!("".is_empty());
1194         assert!(!"a".is_empty());
1195     }
1196
1197     #[test]
1198     fn test_replace() {
1199         let a = "a";
1200         assert_eq!("".replace(a, "b"), "".to_owned());
1201         assert_eq!("a".replace(a, "b"), "b".to_owned());
1202         assert_eq!("ab".replace(a, "b"), "bb".to_owned());
1203         let test = "test";
1204         assert!(" test test ".replace(test, "toast") ==
1205             " toast toast ".to_owned());
1206         assert_eq!(" test test ".replace(test, ""), "   ".to_owned());
1207     }
1208
1209     #[test]
1210     fn test_replace_2a() {
1211         let data = "ประเทศไทย中华".to_owned();
1212         let repl = "دولة الكويت".to_owned();
1213
1214         let a = "ประเ".to_owned();
1215         let a2 = "دولة الكويتทศไทย中华".to_owned();
1216         assert_eq!(data.replace(a, repl), a2);
1217     }
1218
1219     #[test]
1220     fn test_replace_2b() {
1221         let data = "ประเทศไทย中华".to_owned();
1222         let repl = "دولة الكويت".to_owned();
1223
1224         let b = "ะเ".to_owned();
1225         let b2 = "ปรدولة الكويتทศไทย中华".to_owned();
1226         assert_eq!(data.replace(b, repl), b2);
1227     }
1228
1229     #[test]
1230     fn test_replace_2c() {
1231         let data = "ประเทศไทย中华".to_owned();
1232         let repl = "دولة الكويت".to_owned();
1233
1234         let c = "中华".to_owned();
1235         let c2 = "ประเทศไทยدولة الكويت".to_owned();
1236         assert_eq!(data.replace(c, repl), c2);
1237     }
1238
1239     #[test]
1240     fn test_replace_2d() {
1241         let data = "ประเทศไทย中华".to_owned();
1242         let repl = "دولة الكويت".to_owned();
1243
1244         let d = "ไท华".to_owned();
1245         assert_eq!(data.replace(d, repl), data);
1246     }
1247
1248     #[test]
1249     fn test_slice() {
1250         assert_eq!("ab", "abc".slice(0, 2));
1251         assert_eq!("bc", "abc".slice(1, 3));
1252         assert_eq!("", "abc".slice(1, 1));
1253         assert_eq!("\u65e5", "\u65e5\u672c".slice(0, 3));
1254
1255         let data = "ประเทศไทย中华";
1256         assert_eq!("ป", data.slice(0, 3));
1257         assert_eq!("ร", data.slice(3, 6));
1258         assert_eq!("", data.slice(3, 3));
1259         assert_eq!("华", data.slice(30, 33));
1260
1261         fn a_million_letter_X() -> ~str {
1262             let mut i = 0;
1263             let mut rs = StrBuf::new();
1264             while i < 100000 {
1265                 rs.push_str("华华华华华华华华华华");
1266                 i += 1;
1267             }
1268             rs.into_owned()
1269         }
1270         fn half_a_million_letter_X() -> ~str {
1271             let mut i = 0;
1272             let mut rs = StrBuf::new();
1273             while i < 100000 {
1274                 rs.push_str("华华华华华");
1275                 i += 1;
1276             }
1277             rs.into_owned()
1278         }
1279         let letters = a_million_letter_X();
1280         assert!(half_a_million_letter_X() ==
1281             letters.slice(0u, 3u * 500000u).to_owned());
1282     }
1283
1284     #[test]
1285     fn test_slice_2() {
1286         let ss = "中华Việt Nam";
1287
1288         assert_eq!("华", ss.slice(3u, 6u));
1289         assert_eq!("Việt Nam", ss.slice(6u, 16u));
1290
1291         assert_eq!("ab", "abc".slice(0u, 2u));
1292         assert_eq!("bc", "abc".slice(1u, 3u));
1293         assert_eq!("", "abc".slice(1u, 1u));
1294
1295         assert_eq!("中", ss.slice(0u, 3u));
1296         assert_eq!("华V", ss.slice(3u, 7u));
1297         assert_eq!("", ss.slice(3u, 3u));
1298         /*0: 中
1299           3: 华
1300           6: V
1301           7: i
1302           8: ệ
1303          11: t
1304          12:
1305          13: N
1306          14: a
1307          15: m */
1308     }
1309
1310     #[test]
1311     #[should_fail]
1312     fn test_slice_fail() {
1313         "中华Việt Nam".slice(0u, 2u);
1314     }
1315
1316     #[test]
1317     fn test_slice_from() {
1318         assert_eq!("abcd".slice_from(0), "abcd");
1319         assert_eq!("abcd".slice_from(2), "cd");
1320         assert_eq!("abcd".slice_from(4), "");
1321     }
1322     #[test]
1323     fn test_slice_to() {
1324         assert_eq!("abcd".slice_to(0), "");
1325         assert_eq!("abcd".slice_to(2), "ab");
1326         assert_eq!("abcd".slice_to(4), "abcd");
1327     }
1328
1329     #[test]
1330     fn test_trim_left_chars() {
1331         let v: &[char] = &[];
1332         assert_eq!(" *** foo *** ".trim_left_chars(v), " *** foo *** ");
1333         assert_eq!(" *** foo *** ".trim_left_chars(&['*', ' ']), "foo *** ");
1334         assert_eq!(" ***  *** ".trim_left_chars(&['*', ' ']), "");
1335         assert_eq!("foo *** ".trim_left_chars(&['*', ' ']), "foo *** ");
1336
1337         assert_eq!("11foo1bar11".trim_left_chars('1'), "foo1bar11");
1338         assert_eq!("12foo1bar12".trim_left_chars(&['1', '2']), "foo1bar12");
1339         assert_eq!("123foo1bar123".trim_left_chars(|c: char| c.is_digit()), "foo1bar123");
1340     }
1341
1342     #[test]
1343     fn test_trim_right_chars() {
1344         let v: &[char] = &[];
1345         assert_eq!(" *** foo *** ".trim_right_chars(v), " *** foo *** ");
1346         assert_eq!(" *** foo *** ".trim_right_chars(&['*', ' ']), " *** foo");
1347         assert_eq!(" ***  *** ".trim_right_chars(&['*', ' ']), "");
1348         assert_eq!(" *** foo".trim_right_chars(&['*', ' ']), " *** foo");
1349
1350         assert_eq!("11foo1bar11".trim_right_chars('1'), "11foo1bar");
1351         assert_eq!("12foo1bar12".trim_right_chars(&['1', '2']), "12foo1bar");
1352         assert_eq!("123foo1bar123".trim_right_chars(|c: char| c.is_digit()), "123foo1bar");
1353     }
1354
1355     #[test]
1356     fn test_trim_chars() {
1357         let v: &[char] = &[];
1358         assert_eq!(" *** foo *** ".trim_chars(v), " *** foo *** ");
1359         assert_eq!(" *** foo *** ".trim_chars(&['*', ' ']), "foo");
1360         assert_eq!(" ***  *** ".trim_chars(&['*', ' ']), "");
1361         assert_eq!("foo".trim_chars(&['*', ' ']), "foo");
1362
1363         assert_eq!("11foo1bar11".trim_chars('1'), "foo1bar");
1364         assert_eq!("12foo1bar12".trim_chars(&['1', '2']), "foo1bar");
1365         assert_eq!("123foo1bar123".trim_chars(|c: char| c.is_digit()), "foo1bar");
1366     }
1367
1368     #[test]
1369     fn test_trim_left() {
1370         assert_eq!("".trim_left(), "");
1371         assert_eq!("a".trim_left(), "a");
1372         assert_eq!("    ".trim_left(), "");
1373         assert_eq!("     blah".trim_left(), "blah");
1374         assert_eq!("   \u3000  wut".trim_left(), "wut");
1375         assert_eq!("hey ".trim_left(), "hey ");
1376     }
1377
1378     #[test]
1379     fn test_trim_right() {
1380         assert_eq!("".trim_right(), "");
1381         assert_eq!("a".trim_right(), "a");
1382         assert_eq!("    ".trim_right(), "");
1383         assert_eq!("blah     ".trim_right(), "blah");
1384         assert_eq!("wut   \u3000  ".trim_right(), "wut");
1385         assert_eq!(" hey".trim_right(), " hey");
1386     }
1387
1388     #[test]
1389     fn test_trim() {
1390         assert_eq!("".trim(), "");
1391         assert_eq!("a".trim(), "a");
1392         assert_eq!("    ".trim(), "");
1393         assert_eq!("    blah     ".trim(), "blah");
1394         assert_eq!("\nwut   \u3000  ".trim(), "wut");
1395         assert_eq!(" hey dude ".trim(), "hey dude");
1396     }
1397
1398     #[test]
1399     fn test_is_whitespace() {
1400         assert!("".is_whitespace());
1401         assert!(" ".is_whitespace());
1402         assert!("\u2009".is_whitespace()); // Thin space
1403         assert!("  \n\t   ".is_whitespace());
1404         assert!(!"   _   ".is_whitespace());
1405     }
1406
1407     #[test]
1408     fn test_slice_shift_char() {
1409         let data = "ประเทศไทย中";
1410         assert_eq!(data.slice_shift_char(), (Some('ป'), "ระเทศไทย中"));
1411     }
1412
1413     #[test]
1414     fn test_slice_shift_char_2() {
1415         let empty = "";
1416         assert_eq!(empty.slice_shift_char(), (None, ""));
1417     }
1418
1419     #[test]
1420     fn test_is_utf8() {
1421         // deny overlong encodings
1422         assert!(!is_utf8([0xc0, 0x80]));
1423         assert!(!is_utf8([0xc0, 0xae]));
1424         assert!(!is_utf8([0xe0, 0x80, 0x80]));
1425         assert!(!is_utf8([0xe0, 0x80, 0xaf]));
1426         assert!(!is_utf8([0xe0, 0x81, 0x81]));
1427         assert!(!is_utf8([0xf0, 0x82, 0x82, 0xac]));
1428         assert!(!is_utf8([0xf4, 0x90, 0x80, 0x80]));
1429
1430         // deny surrogates
1431         assert!(!is_utf8([0xED, 0xA0, 0x80]));
1432         assert!(!is_utf8([0xED, 0xBF, 0xBF]));
1433
1434         assert!(is_utf8([0xC2, 0x80]));
1435         assert!(is_utf8([0xDF, 0xBF]));
1436         assert!(is_utf8([0xE0, 0xA0, 0x80]));
1437         assert!(is_utf8([0xED, 0x9F, 0xBF]));
1438         assert!(is_utf8([0xEE, 0x80, 0x80]));
1439         assert!(is_utf8([0xEF, 0xBF, 0xBF]));
1440         assert!(is_utf8([0xF0, 0x90, 0x80, 0x80]));
1441         assert!(is_utf8([0xF4, 0x8F, 0xBF, 0xBF]));
1442     }
1443
1444     #[test]
1445     fn test_is_utf16() {
1446         macro_rules! pos ( ($($e:expr),*) => { { $(assert!(is_utf16($e));)* } });
1447
1448         // non-surrogates
1449         pos!([0x0000],
1450              [0x0001, 0x0002],
1451              [0xD7FF],
1452              [0xE000]);
1453
1454         // surrogate pairs (randomly generated with Python 3's
1455         // .encode('utf-16be'))
1456         pos!([0xdb54, 0xdf16, 0xd880, 0xdee0, 0xdb6a, 0xdd45],
1457              [0xd91f, 0xdeb1, 0xdb31, 0xdd84, 0xd8e2, 0xde14],
1458              [0xdb9f, 0xdc26, 0xdb6f, 0xde58, 0xd850, 0xdfae]);
1459
1460         // mixtures (also random)
1461         pos!([0xd921, 0xdcc2, 0x002d, 0x004d, 0xdb32, 0xdf65],
1462              [0xdb45, 0xdd2d, 0x006a, 0xdacd, 0xddfe, 0x0006],
1463              [0x0067, 0xd8ff, 0xddb7, 0x000f, 0xd900, 0xdc80]);
1464
1465         // negative tests
1466         macro_rules! neg ( ($($e:expr),*) => { { $(assert!(!is_utf16($e));)* } });
1467
1468         neg!(
1469             // surrogate + regular unit
1470             [0xdb45, 0x0000],
1471             // surrogate + lead surrogate
1472             [0xd900, 0xd900],
1473             // unterminated surrogate
1474             [0xd8ff],
1475             // trail surrogate without a lead
1476             [0xddb7]);
1477
1478         // random byte sequences that Python 3's .decode('utf-16be')
1479         // failed on
1480         neg!([0x5b3d, 0x0141, 0xde9e, 0x8fdc, 0xc6e7],
1481              [0xdf5a, 0x82a5, 0x62b9, 0xb447, 0x92f3],
1482              [0xda4e, 0x42bc, 0x4462, 0xee98, 0xc2ca],
1483              [0xbe00, 0xb04a, 0x6ecb, 0xdd89, 0xe278],
1484              [0x0465, 0xab56, 0xdbb6, 0xa893, 0x665e],
1485              [0x6b7f, 0x0a19, 0x40f4, 0xa657, 0xdcc5],
1486              [0x9b50, 0xda5e, 0x24ec, 0x03ad, 0x6dee],
1487              [0x8d17, 0xcaa7, 0xf4ae, 0xdf6e, 0xbed7],
1488              [0xdaee, 0x2584, 0x7d30, 0xa626, 0x121a],
1489              [0xd956, 0x4b43, 0x7570, 0xccd6, 0x4f4a],
1490              [0x9dcf, 0x1b49, 0x4ba5, 0xfce9, 0xdffe],
1491              [0x6572, 0xce53, 0xb05a, 0xf6af, 0xdacf],
1492              [0x1b90, 0x728c, 0x9906, 0xdb68, 0xf46e],
1493              [0x1606, 0xbeca, 0xbe76, 0x860f, 0xdfa5],
1494              [0x8b4f, 0xde7a, 0xd220, 0x9fac, 0x2b6f],
1495              [0xb8fe, 0xebbe, 0xda32, 0x1a5f, 0x8b8b],
1496              [0x934b, 0x8956, 0xc434, 0x1881, 0xddf7],
1497              [0x5a95, 0x13fc, 0xf116, 0xd89b, 0x93f9],
1498              [0xd640, 0x71f1, 0xdd7d, 0x77eb, 0x1cd8],
1499              [0x348b, 0xaef0, 0xdb2c, 0xebf1, 0x1282],
1500              [0x50d7, 0xd824, 0x5010, 0xb369, 0x22ea]);
1501     }
1502
1503     #[test]
1504     fn test_raw_from_c_str() {
1505         unsafe {
1506             let a = box [65, 65, 65, 65, 65, 65, 65, 0];
1507             let b = a.as_ptr();
1508             let c = raw::from_c_str(b);
1509             assert_eq!(c, "AAAAAAA".to_owned());
1510         }
1511     }
1512
1513     #[test]
1514     fn test_as_bytes() {
1515         // no null
1516         let v = [
1517             224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1518             184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1519             109
1520         ];
1521         assert_eq!("".as_bytes(), &[]);
1522         assert_eq!("abc".as_bytes(), &['a' as u8, 'b' as u8, 'c' as u8]);
1523         assert_eq!("ศไทย中华Việt Nam".as_bytes(), v.as_slice());
1524     }
1525
1526     #[test]
1527     #[should_fail]
1528     fn test_as_bytes_fail() {
1529         // Don't double free. (I'm not sure if this exercises the
1530         // original problem code path anymore.)
1531         let s = "".to_owned();
1532         let _bytes = s.as_bytes();
1533         fail!();
1534     }
1535
1536     #[test]
1537     fn test_as_ptr() {
1538         let buf = "hello".as_ptr();
1539         unsafe {
1540             assert_eq!(*buf.offset(0), 'h' as u8);
1541             assert_eq!(*buf.offset(1), 'e' as u8);
1542             assert_eq!(*buf.offset(2), 'l' as u8);
1543             assert_eq!(*buf.offset(3), 'l' as u8);
1544             assert_eq!(*buf.offset(4), 'o' as u8);
1545         }
1546     }
1547
1548     #[test]
1549     fn test_subslice_offset() {
1550         let a = "kernelsprite";
1551         let b = a.slice(7, a.len());
1552         let c = a.slice(0, a.len() - 6);
1553         assert_eq!(a.subslice_offset(b), 7);
1554         assert_eq!(a.subslice_offset(c), 0);
1555
1556         let string = "a\nb\nc";
1557         let lines: Vec<&str> = string.lines().collect();
1558         let lines = lines.as_slice();
1559         assert_eq!(string.subslice_offset(lines[0]), 0);
1560         assert_eq!(string.subslice_offset(lines[1]), 2);
1561         assert_eq!(string.subslice_offset(lines[2]), 4);
1562     }
1563
1564     #[test]
1565     #[should_fail]
1566     fn test_subslice_offset_2() {
1567         let a = "alchemiter";
1568         let b = "cruxtruder";
1569         a.subslice_offset(b);
1570     }
1571
1572     #[test]
1573     fn vec_str_conversions() {
1574         let s1: ~str = "All mimsy were the borogoves".to_owned();
1575
1576         let v: ~[u8] = s1.as_bytes().to_owned();
1577         let s2: ~str = from_utf8(v).unwrap().to_owned();
1578         let mut i: uint = 0u;
1579         let n1: uint = s1.len();
1580         let n2: uint = v.len();
1581         assert_eq!(n1, n2);
1582         while i < n1 {
1583             let a: u8 = s1[i];
1584             let b: u8 = s2[i];
1585             debug!("{}", a);
1586             debug!("{}", b);
1587             assert_eq!(a, b);
1588             i += 1u;
1589         }
1590     }
1591
1592     #[test]
1593     fn test_contains() {
1594         assert!("abcde".contains("bcd"));
1595         assert!("abcde".contains("abcd"));
1596         assert!("abcde".contains("bcde"));
1597         assert!("abcde".contains(""));
1598         assert!("".contains(""));
1599         assert!(!"abcde".contains("def"));
1600         assert!(!"".contains("a"));
1601
1602         let data = "ประเทศไทย中华Việt Nam".to_owned();
1603         assert!(data.contains("ประเ"));
1604         assert!(data.contains("ะเ"));
1605         assert!(data.contains("中华"));
1606         assert!(!data.contains("ไท华"));
1607     }
1608
1609     #[test]
1610     fn test_contains_char() {
1611         assert!("abc".contains_char('b'));
1612         assert!("a".contains_char('a'));
1613         assert!(!"abc".contains_char('d'));
1614         assert!(!"".contains_char('a'));
1615     }
1616
1617     #[test]
1618     fn test_utf16() {
1619         let pairs =
1620             [("𐍅𐌿𐌻𐍆𐌹𐌻𐌰\n".to_owned(),
1621               vec![0xd800_u16, 0xdf45_u16, 0xd800_u16, 0xdf3f_u16,
1622                 0xd800_u16, 0xdf3b_u16, 0xd800_u16, 0xdf46_u16,
1623                 0xd800_u16, 0xdf39_u16, 0xd800_u16, 0xdf3b_u16,
1624                 0xd800_u16, 0xdf30_u16, 0x000a_u16]),
1625
1626              ("𐐒𐑉𐐮𐑀𐐲𐑋 𐐏𐐲𐑍\n".to_owned(),
1627               vec![0xd801_u16, 0xdc12_u16, 0xd801_u16,
1628                 0xdc49_u16, 0xd801_u16, 0xdc2e_u16, 0xd801_u16,
1629                 0xdc40_u16, 0xd801_u16, 0xdc32_u16, 0xd801_u16,
1630                 0xdc4b_u16, 0x0020_u16, 0xd801_u16, 0xdc0f_u16,
1631                 0xd801_u16, 0xdc32_u16, 0xd801_u16, 0xdc4d_u16,
1632                 0x000a_u16]),
1633
1634              ("𐌀𐌖𐌋𐌄𐌑𐌉·𐌌𐌄𐌕𐌄𐌋𐌉𐌑\n".to_owned(),
1635               vec![0xd800_u16, 0xdf00_u16, 0xd800_u16, 0xdf16_u16,
1636                 0xd800_u16, 0xdf0b_u16, 0xd800_u16, 0xdf04_u16,
1637                 0xd800_u16, 0xdf11_u16, 0xd800_u16, 0xdf09_u16,
1638                 0x00b7_u16, 0xd800_u16, 0xdf0c_u16, 0xd800_u16,
1639                 0xdf04_u16, 0xd800_u16, 0xdf15_u16, 0xd800_u16,
1640                 0xdf04_u16, 0xd800_u16, 0xdf0b_u16, 0xd800_u16,
1641                 0xdf09_u16, 0xd800_u16, 0xdf11_u16, 0x000a_u16 ]),
1642
1643              ("𐒋𐒘𐒈𐒑𐒛𐒒 𐒕𐒓 𐒈𐒚𐒍 𐒏𐒜𐒒𐒖𐒆 𐒕𐒆\n".to_owned(),
1644               vec![0xd801_u16, 0xdc8b_u16, 0xd801_u16, 0xdc98_u16,
1645                 0xd801_u16, 0xdc88_u16, 0xd801_u16, 0xdc91_u16,
1646                 0xd801_u16, 0xdc9b_u16, 0xd801_u16, 0xdc92_u16,
1647                 0x0020_u16, 0xd801_u16, 0xdc95_u16, 0xd801_u16,
1648                 0xdc93_u16, 0x0020_u16, 0xd801_u16, 0xdc88_u16,
1649                 0xd801_u16, 0xdc9a_u16, 0xd801_u16, 0xdc8d_u16,
1650                 0x0020_u16, 0xd801_u16, 0xdc8f_u16, 0xd801_u16,
1651                 0xdc9c_u16, 0xd801_u16, 0xdc92_u16, 0xd801_u16,
1652                 0xdc96_u16, 0xd801_u16, 0xdc86_u16, 0x0020_u16,
1653                 0xd801_u16, 0xdc95_u16, 0xd801_u16, 0xdc86_u16,
1654                 0x000a_u16 ]),
1655              // Issue #12318, even-numbered non-BMP planes
1656              ("\U00020000".to_owned(),
1657               vec![0xD840, 0xDC00])];
1658
1659         for p in pairs.iter() {
1660             let (s, u) = (*p).clone();
1661             assert!(is_utf16(u.as_slice()));
1662             assert_eq!(s.to_utf16(), u);
1663
1664             assert_eq!(from_utf16(u.as_slice()).unwrap(), s);
1665             assert_eq!(from_utf16_lossy(u.as_slice()), s);
1666
1667             assert_eq!(from_utf16(s.to_utf16().as_slice()).unwrap(), s);
1668             assert_eq!(from_utf16(u.as_slice()).unwrap().to_utf16(), u);
1669         }
1670     }
1671
1672     #[test]
1673     fn test_utf16_invalid() {
1674         // completely positive cases tested above.
1675         // lead + eof
1676         assert_eq!(from_utf16([0xD800]), None);
1677         // lead + lead
1678         assert_eq!(from_utf16([0xD800, 0xD800]), None);
1679
1680         // isolated trail
1681         assert_eq!(from_utf16([0x0061, 0xDC00]), None);
1682
1683         // general
1684         assert_eq!(from_utf16([0xD800, 0xd801, 0xdc8b, 0xD800]), None);
1685     }
1686
1687     #[test]
1688     fn test_utf16_lossy() {
1689         // completely positive cases tested above.
1690         // lead + eof
1691         assert_eq!(from_utf16_lossy([0xD800]), "\uFFFD".to_owned());
1692         // lead + lead
1693         assert_eq!(from_utf16_lossy([0xD800, 0xD800]), "\uFFFD\uFFFD".to_owned());
1694
1695         // isolated trail
1696         assert_eq!(from_utf16_lossy([0x0061, 0xDC00]), "a\uFFFD".to_owned());
1697
1698         // general
1699         assert_eq!(from_utf16_lossy([0xD800, 0xd801, 0xdc8b, 0xD800]), "\uFFFD𐒋\uFFFD".to_owned());
1700     }
1701
1702     #[test]
1703     fn test_truncate_utf16_at_nul() {
1704         let v = [];
1705         assert_eq!(truncate_utf16_at_nul(v), &[]);
1706
1707         let v = [0, 2, 3];
1708         assert_eq!(truncate_utf16_at_nul(v), &[]);
1709
1710         let v = [1, 0, 3];
1711         assert_eq!(truncate_utf16_at_nul(v), &[1]);
1712
1713         let v = [1, 2, 0];
1714         assert_eq!(truncate_utf16_at_nul(v), &[1, 2]);
1715
1716         let v = [1, 2, 3];
1717         assert_eq!(truncate_utf16_at_nul(v), &[1, 2, 3]);
1718     }
1719
1720     #[test]
1721     fn test_char_at() {
1722         let s = "ศไทย中华Việt Nam".to_owned();
1723         let v = box ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1724         let mut pos = 0;
1725         for ch in v.iter() {
1726             assert!(s.char_at(pos) == *ch);
1727             pos += from_char(*ch).len();
1728         }
1729     }
1730
1731     #[test]
1732     fn test_char_at_reverse() {
1733         let s = "ศไทย中华Việt Nam".to_owned();
1734         let v = box ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1735         let mut pos = s.len();
1736         for ch in v.iter().rev() {
1737             assert!(s.char_at_reverse(pos) == *ch);
1738             pos -= from_char(*ch).len();
1739         }
1740     }
1741
1742     #[test]
1743     fn test_escape_unicode() {
1744         assert_eq!("abc".escape_unicode(), "\\x61\\x62\\x63".to_owned());
1745         assert_eq!("a c".escape_unicode(), "\\x61\\x20\\x63".to_owned());
1746         assert_eq!("\r\n\t".escape_unicode(), "\\x0d\\x0a\\x09".to_owned());
1747         assert_eq!("'\"\\".escape_unicode(), "\\x27\\x22\\x5c".to_owned());
1748         assert_eq!("\x00\x01\xfe\xff".escape_unicode(), "\\x00\\x01\\xfe\\xff".to_owned());
1749         assert_eq!("\u0100\uffff".escape_unicode(), "\\u0100\\uffff".to_owned());
1750         assert_eq!("\U00010000\U0010ffff".escape_unicode(), "\\U00010000\\U0010ffff".to_owned());
1751         assert_eq!("ab\ufb00".escape_unicode(), "\\x61\\x62\\ufb00".to_owned());
1752         assert_eq!("\U0001d4ea\r".escape_unicode(), "\\U0001d4ea\\x0d".to_owned());
1753     }
1754
1755     #[test]
1756     fn test_escape_default() {
1757         assert_eq!("abc".escape_default(), "abc".to_owned());
1758         assert_eq!("a c".escape_default(), "a c".to_owned());
1759         assert_eq!("\r\n\t".escape_default(), "\\r\\n\\t".to_owned());
1760         assert_eq!("'\"\\".escape_default(), "\\'\\\"\\\\".to_owned());
1761         assert_eq!("\u0100\uffff".escape_default(), "\\u0100\\uffff".to_owned());
1762         assert_eq!("\U00010000\U0010ffff".escape_default(), "\\U00010000\\U0010ffff".to_owned());
1763         assert_eq!("ab\ufb00".escape_default(), "ab\\ufb00".to_owned());
1764         assert_eq!("\U0001d4ea\r".escape_default(), "\\U0001d4ea\\r".to_owned());
1765     }
1766
1767     #[test]
1768     fn test_total_ord() {
1769         "1234".cmp(&("123")) == Greater;
1770         "123".cmp(&("1234")) == Less;
1771         "1234".cmp(&("1234")) == Equal;
1772         "12345555".cmp(&("123456")) == Less;
1773         "22".cmp(&("1234")) == Greater;
1774     }
1775
1776     #[test]
1777     fn test_char_range_at() {
1778         let data = "b¢€𤭢𤭢€¢b".to_owned();
1779         assert_eq!('b', data.char_range_at(0).ch);
1780         assert_eq!('¢', data.char_range_at(1).ch);
1781         assert_eq!('€', data.char_range_at(3).ch);
1782         assert_eq!('𤭢', data.char_range_at(6).ch);
1783         assert_eq!('𤭢', data.char_range_at(10).ch);
1784         assert_eq!('€', data.char_range_at(14).ch);
1785         assert_eq!('¢', data.char_range_at(17).ch);
1786         assert_eq!('b', data.char_range_at(19).ch);
1787     }
1788
1789     #[test]
1790     fn test_char_range_at_reverse_underflow() {
1791         assert_eq!("abc".char_range_at_reverse(0).next, 0);
1792     }
1793
1794     #[test]
1795     fn test_add() {
1796         #![allow(unnecessary_allocation)]
1797         macro_rules! t (
1798             ($s1:expr, $s2:expr, $e:expr) => { {
1799                 let s1 = $s1;
1800                 let s2 = $s2;
1801                 let e = $e;
1802                 assert_eq!(s1 + s2, e.to_owned());
1803                 assert_eq!(s1.to_owned() + s2, e.to_owned());
1804             } }
1805         );
1806
1807         t!("foo",  "bar", "foobar");
1808         t!("foo", "bar".to_owned(), "foobar");
1809         t!("ศไทย中",  "华Việt Nam", "ศไทย中华Việt Nam");
1810         t!("ศไทย中", "华Việt Nam".to_owned(), "ศไทย中华Việt Nam");
1811     }
1812
1813     #[test]
1814     fn test_iterator() {
1815         use iter::*;
1816         let s = "ศไทย中华Việt Nam".to_owned();
1817         let v = box ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1818
1819         let mut pos = 0;
1820         let mut it = s.chars();
1821
1822         for c in it {
1823             assert_eq!(c, v[pos]);
1824             pos += 1;
1825         }
1826         assert_eq!(pos, v.len());
1827     }
1828
1829     #[test]
1830     fn test_rev_iterator() {
1831         use iter::*;
1832         let s = "ศไทย中华Việt Nam".to_owned();
1833         let v = box ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
1834
1835         let mut pos = 0;
1836         let mut it = s.chars().rev();
1837
1838         for c in it {
1839             assert_eq!(c, v[pos]);
1840             pos += 1;
1841         }
1842         assert_eq!(pos, v.len());
1843     }
1844
1845     #[test]
1846     fn test_iterator_clone() {
1847         let s = "ศไทย中华Việt Nam";
1848         let mut it = s.chars();
1849         it.next();
1850         assert!(it.zip(it.clone()).all(|(x,y)| x == y));
1851     }
1852
1853     #[test]
1854     fn test_bytesator() {
1855         let s = "ศไทย中华Việt Nam".to_owned();
1856         let v = [
1857             224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1858             184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1859             109
1860         ];
1861         let mut pos = 0;
1862
1863         for b in s.bytes() {
1864             assert_eq!(b, v[pos]);
1865             pos += 1;
1866         }
1867     }
1868
1869     #[test]
1870     fn test_bytes_revator() {
1871         let s = "ศไทย中华Việt Nam".to_owned();
1872         let v = [
1873             224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1874             184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1875             109
1876         ];
1877         let mut pos = v.len();
1878
1879         for b in s.bytes().rev() {
1880             pos -= 1;
1881             assert_eq!(b, v[pos]);
1882         }
1883     }
1884
1885     #[test]
1886     fn test_char_indicesator() {
1887         use iter::*;
1888         let s = "ศไทย中华Việt Nam";
1889         let p = [0, 3, 6, 9, 12, 15, 18, 19, 20, 23, 24, 25, 26, 27];
1890         let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1891
1892         let mut pos = 0;
1893         let mut it = s.char_indices();
1894
1895         for c in it {
1896             assert_eq!(c, (p[pos], v[pos]));
1897             pos += 1;
1898         }
1899         assert_eq!(pos, v.len());
1900         assert_eq!(pos, p.len());
1901     }
1902
1903     #[test]
1904     fn test_char_indices_revator() {
1905         use iter::*;
1906         let s = "ศไทย中华Việt Nam";
1907         let p = [27, 26, 25, 24, 23, 20, 19, 18, 15, 12, 9, 6, 3, 0];
1908         let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
1909
1910         let mut pos = 0;
1911         let mut it = s.char_indices().rev();
1912
1913         for c in it {
1914             assert_eq!(c, (p[pos], v[pos]));
1915             pos += 1;
1916         }
1917         assert_eq!(pos, v.len());
1918         assert_eq!(pos, p.len());
1919     }
1920
1921     #[test]
1922     fn test_split_char_iterator() {
1923         let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1924
1925         let split: Vec<&str> = data.split(' ').collect();
1926         assert_eq!( split, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1927
1928         let mut rsplit: Vec<&str> = data.split(' ').rev().collect();
1929         rsplit.reverse();
1930         assert_eq!(rsplit, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1931
1932         let split: Vec<&str> = data.split(|c: char| c == ' ').collect();
1933         assert_eq!( split, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1934
1935         let mut rsplit: Vec<&str> = data.split(|c: char| c == ' ').rev().collect();
1936         rsplit.reverse();
1937         assert_eq!(rsplit, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1938
1939         // Unicode
1940         let split: Vec<&str> = data.split('ä').collect();
1941         assert_eq!( split, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1942
1943         let mut rsplit: Vec<&str> = data.split('ä').rev().collect();
1944         rsplit.reverse();
1945         assert_eq!(rsplit, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1946
1947         let split: Vec<&str> = data.split(|c: char| c == 'ä').collect();
1948         assert_eq!( split, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1949
1950         let mut rsplit: Vec<&str> = data.split(|c: char| c == 'ä').rev().collect();
1951         rsplit.reverse();
1952         assert_eq!(rsplit, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1953     }
1954
1955     #[test]
1956     fn test_splitn_char_iterator() {
1957         let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1958
1959         let split: Vec<&str> = data.splitn(' ', 3).collect();
1960         assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
1961
1962         let split: Vec<&str> = data.splitn(|c: char| c == ' ', 3).collect();
1963         assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
1964
1965         // Unicode
1966         let split: Vec<&str> = data.splitn('ä', 3).collect();
1967         assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
1968
1969         let split: Vec<&str> = data.splitn(|c: char| c == 'ä', 3).collect();
1970         assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
1971     }
1972
1973     #[test]
1974     fn test_rsplitn_char_iterator() {
1975         let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1976
1977         let mut split: Vec<&str> = data.rsplitn(' ', 3).collect();
1978         split.reverse();
1979         assert_eq!(split, vec!["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]);
1980
1981         let mut split: Vec<&str> = data.rsplitn(|c: char| c == ' ', 3).collect();
1982         split.reverse();
1983         assert_eq!(split, vec!["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]);
1984
1985         // Unicode
1986         let mut split: Vec<&str> = data.rsplitn('ä', 3).collect();
1987         split.reverse();
1988         assert_eq!(split, vec!["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]);
1989
1990         let mut split: Vec<&str> = data.rsplitn(|c: char| c == 'ä', 3).collect();
1991         split.reverse();
1992         assert_eq!(split, vec!["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]);
1993     }
1994
1995     #[test]
1996     fn test_split_char_iterator_no_trailing() {
1997         let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1998
1999         let split: Vec<&str> = data.split('\n').collect();
2000         assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
2001
2002         let split: Vec<&str> = data.split_terminator('\n').collect();
2003         assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
2004     }
2005
2006     #[test]
2007     fn test_rev_split_char_iterator_no_trailing() {
2008         let data = "\nMäry häd ä little lämb\nLittle lämb\n";
2009
2010         let mut split: Vec<&str> = data.split('\n').rev().collect();
2011         split.reverse();
2012         assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
2013
2014         let mut split: Vec<&str> = data.split_terminator('\n').rev().collect();
2015         split.reverse();
2016         assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
2017     }
2018
2019     #[test]
2020     fn test_words() {
2021         let data = "\n \tMäry   häd\tä  little lämb\nLittle lämb\n";
2022         let words: Vec<&str> = data.words().collect();
2023         assert_eq!(words, vec!["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
2024     }
2025
2026     #[test]
2027     fn test_nfd_chars() {
2028         assert_eq!("abc".nfd_chars().collect::<~str>(), "abc".to_owned());
2029         assert_eq!("\u1e0b\u01c4".nfd_chars().collect::<~str>(), "d\u0307\u01c4".to_owned());
2030         assert_eq!("\u2026".nfd_chars().collect::<~str>(), "\u2026".to_owned());
2031         assert_eq!("\u2126".nfd_chars().collect::<~str>(), "\u03a9".to_owned());
2032         assert_eq!("\u1e0b\u0323".nfd_chars().collect::<~str>(), "d\u0323\u0307".to_owned());
2033         assert_eq!("\u1e0d\u0307".nfd_chars().collect::<~str>(), "d\u0323\u0307".to_owned());
2034         assert_eq!("a\u0301".nfd_chars().collect::<~str>(), "a\u0301".to_owned());
2035         assert_eq!("\u0301a".nfd_chars().collect::<~str>(), "\u0301a".to_owned());
2036         assert_eq!("\ud4db".nfd_chars().collect::<~str>(), "\u1111\u1171\u11b6".to_owned());
2037         assert_eq!("\uac1c".nfd_chars().collect::<~str>(), "\u1100\u1162".to_owned());
2038     }
2039
2040     #[test]
2041     fn test_nfkd_chars() {
2042         assert_eq!("abc".nfkd_chars().collect::<~str>(), "abc".to_owned());
2043         assert_eq!("\u1e0b\u01c4".nfkd_chars().collect::<~str>(), "d\u0307DZ\u030c".to_owned());
2044         assert_eq!("\u2026".nfkd_chars().collect::<~str>(), "...".to_owned());
2045         assert_eq!("\u2126".nfkd_chars().collect::<~str>(), "\u03a9".to_owned());
2046         assert_eq!("\u1e0b\u0323".nfkd_chars().collect::<~str>(), "d\u0323\u0307".to_owned());
2047         assert_eq!("\u1e0d\u0307".nfkd_chars().collect::<~str>(), "d\u0323\u0307".to_owned());
2048         assert_eq!("a\u0301".nfkd_chars().collect::<~str>(), "a\u0301".to_owned());
2049         assert_eq!("\u0301a".nfkd_chars().collect::<~str>(), "\u0301a".to_owned());
2050         assert_eq!("\ud4db".nfkd_chars().collect::<~str>(), "\u1111\u1171\u11b6".to_owned());
2051         assert_eq!("\uac1c".nfkd_chars().collect::<~str>(), "\u1100\u1162".to_owned());
2052     }
2053
2054     #[test]
2055     fn test_lines() {
2056         let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
2057         let lines: Vec<&str> = data.lines().collect();
2058         assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2059
2060         let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
2061         let lines: Vec<&str> = data.lines().collect();
2062         assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2063     }
2064
2065     #[test]
2066     fn test_split_strator() {
2067         fn t(s: &str, sep: &str, u: &[&str]) {
2068             let v: Vec<&str> = s.split_str(sep).collect();
2069             assert_eq!(v.as_slice(), u.as_slice());
2070         }
2071         t("--1233345--", "12345", ["--1233345--"]);
2072         t("abc::hello::there", "::", ["abc", "hello", "there"]);
2073         t("::hello::there", "::", ["", "hello", "there"]);
2074         t("hello::there::", "::", ["hello", "there", ""]);
2075         t("::hello::there::", "::", ["", "hello", "there", ""]);
2076         t("ประเทศไทย中华Việt Nam", "中华", ["ประเทศไทย", "Việt Nam"]);
2077         t("zzXXXzzYYYzz", "zz", ["", "XXX", "YYY", ""]);
2078         t("zzXXXzYYYz", "XXX", ["zz", "zYYYz"]);
2079         t(".XXX.YYY.", ".", ["", "XXX", "YYY", ""]);
2080         t("", ".", [""]);
2081         t("zz", "zz", ["",""]);
2082         t("ok", "z", ["ok"]);
2083         t("zzz", "zz", ["","z"]);
2084         t("zzzzz", "zz", ["","","z"]);
2085     }
2086
2087     #[test]
2088     fn test_str_default() {
2089         use default::Default;
2090         fn t<S: Default + Str>() {
2091             let s: S = Default::default();
2092             assert_eq!(s.as_slice(), "");
2093         }
2094
2095         t::<&str>();
2096         t::<~str>();
2097     }
2098
2099     #[test]
2100     fn test_str_container() {
2101         fn sum_len<S: Container>(v: &[S]) -> uint {
2102             v.iter().map(|x| x.len()).sum()
2103         }
2104
2105         let s = "01234".to_owned();
2106         assert_eq!(5, sum_len(["012", "", "34"]));
2107         assert_eq!(5, sum_len(["01".to_owned(), "2".to_owned(), "34".to_owned(), "".to_owned()]));
2108         assert_eq!(5, sum_len([s.as_slice()]));
2109     }
2110
2111     #[test]
2112     fn test_str_from_utf8() {
2113         let xs = bytes!("hello");
2114         assert_eq!(from_utf8(xs), Some("hello"));
2115
2116         let xs = bytes!("ศไทย中华Việt Nam");
2117         assert_eq!(from_utf8(xs), Some("ศไทย中华Việt Nam"));
2118
2119         let xs = bytes!("hello", 0xff);
2120         assert_eq!(from_utf8(xs), None);
2121     }
2122
2123     #[test]
2124     fn test_str_from_utf8_owned() {
2125         let xs = bytes!("hello").to_owned();
2126         assert_eq!(from_utf8_owned(xs), Ok("hello".to_owned()));
2127
2128         let xs = bytes!("ศไทย中华Việt Nam").to_owned();
2129         assert_eq!(from_utf8_owned(xs), Ok("ศไทย中华Việt Nam".to_owned()));
2130
2131         let xs = bytes!("hello", 0xff).to_owned();
2132         assert_eq!(from_utf8_owned(xs), Err(bytes!("hello", 0xff).to_owned()));
2133     }
2134
2135     #[test]
2136     fn test_str_from_utf8_lossy() {
2137         let xs = bytes!("hello");
2138         assert_eq!(from_utf8_lossy(xs), Slice("hello"));
2139
2140         let xs = bytes!("ศไทย中华Việt Nam");
2141         assert_eq!(from_utf8_lossy(xs), Slice("ศไทย中华Việt Nam"));
2142
2143         let xs = bytes!("Hello", 0xC2, " There", 0xFF, " Goodbye");
2144         assert_eq!(from_utf8_lossy(xs), Owned("Hello\uFFFD There\uFFFD Goodbye".to_owned()));
2145
2146         let xs = bytes!("Hello", 0xC0, 0x80, " There", 0xE6, 0x83, " Goodbye");
2147         assert_eq!(from_utf8_lossy(xs), Owned("Hello\uFFFD\uFFFD There\uFFFD Goodbye".to_owned()));
2148
2149         let xs = bytes!(0xF5, "foo", 0xF5, 0x80, "bar");
2150         assert_eq!(from_utf8_lossy(xs), Owned("\uFFFDfoo\uFFFD\uFFFDbar".to_owned()));
2151
2152         let xs = bytes!(0xF1, "foo", 0xF1, 0x80, "bar", 0xF1, 0x80, 0x80, "baz");
2153         assert_eq!(from_utf8_lossy(xs), Owned("\uFFFDfoo\uFFFDbar\uFFFDbaz".to_owned()));
2154
2155         let xs = bytes!(0xF4, "foo", 0xF4, 0x80, "bar", 0xF4, 0xBF, "baz");
2156         assert_eq!(from_utf8_lossy(xs), Owned("\uFFFDfoo\uFFFDbar\uFFFD\uFFFDbaz".to_owned()));
2157
2158         let xs = bytes!(0xF0, 0x80, 0x80, 0x80, "foo", 0xF0, 0x90, 0x80, 0x80, "bar");
2159         assert_eq!(from_utf8_lossy(xs), Owned("\uFFFD\uFFFD\uFFFD\uFFFD\
2160                                                foo\U00010000bar".to_owned()));
2161
2162         // surrogates
2163         let xs = bytes!(0xED, 0xA0, 0x80, "foo", 0xED, 0xBF, 0xBF, "bar");
2164         assert_eq!(from_utf8_lossy(xs), Owned("\uFFFD\uFFFD\uFFFDfoo\
2165                                                \uFFFD\uFFFD\uFFFDbar".to_owned()));
2166     }
2167
2168     #[test]
2169     fn test_from_str() {
2170       let owned: Option<~str> = from_str("string");
2171       assert_eq!(owned, Some("string".to_owned()));
2172     }
2173
2174     #[test]
2175     fn test_maybe_owned_traits() {
2176         let s = Slice("abcde");
2177         assert_eq!(s.len(), 5);
2178         assert_eq!(s.as_slice(), "abcde");
2179         assert_eq!(s.to_str(), "abcde".to_owned());
2180         assert_eq!(format!("{}", s), "abcde".to_owned());
2181         assert!(s.lt(&Owned("bcdef".to_owned())));
2182         assert_eq!(Slice(""), Default::default());
2183
2184         let o = Owned("abcde".to_owned());
2185         assert_eq!(o.len(), 5);
2186         assert_eq!(o.as_slice(), "abcde");
2187         assert_eq!(o.to_str(), "abcde".to_owned());
2188         assert_eq!(format!("{}", o), "abcde".to_owned());
2189         assert!(o.lt(&Slice("bcdef")));
2190         assert_eq!(Owned("".to_owned()), Default::default());
2191
2192         assert!(s.cmp(&o) == Equal);
2193         assert!(s.equiv(&o));
2194
2195         assert!(o.cmp(&s) == Equal);
2196         assert!(o.equiv(&s));
2197     }
2198
2199     #[test]
2200     fn test_maybe_owned_methods() {
2201         let s = Slice("abcde");
2202         assert!(s.is_slice());
2203         assert!(!s.is_owned());
2204
2205         let o = Owned("abcde".to_owned());
2206         assert!(!o.is_slice());
2207         assert!(o.is_owned());
2208     }
2209
2210     #[test]
2211     fn test_maybe_owned_clone() {
2212         assert_eq!(Owned("abcde".to_owned()), Slice("abcde").clone());
2213         assert_eq!(Owned("abcde".to_owned()), Owned("abcde".to_owned()).clone());
2214         assert_eq!(Slice("abcde"), Slice("abcde").clone());
2215         assert_eq!(Slice("abcde"), Owned("abcde".to_owned()).clone());
2216     }
2217
2218     #[test]
2219     fn test_maybe_owned_into_owned() {
2220         assert_eq!(Slice("abcde").into_owned(), "abcde".to_owned());
2221         assert_eq!(Owned("abcde".to_owned()).into_owned(), "abcde".to_owned());
2222     }
2223
2224     #[test]
2225     fn test_into_maybe_owned() {
2226         assert_eq!("abcde".into_maybe_owned(), Slice("abcde"));
2227         assert_eq!(("abcde".to_owned()).into_maybe_owned(), Slice("abcde"));
2228         assert_eq!("abcde".into_maybe_owned(), Owned("abcde".to_owned()));
2229         assert_eq!(("abcde".to_owned()).into_maybe_owned(), Owned("abcde".to_owned()));
2230     }
2231 }
2232
2233 #[cfg(test)]
2234 mod bench {
2235     extern crate test;
2236     use self::test::Bencher;
2237     use super::*;
2238     use prelude::*;
2239
2240     #[bench]
2241     fn char_iterator(b: &mut Bencher) {
2242         let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2243         let len = s.char_len();
2244
2245         b.iter(|| assert_eq!(s.chars().len(), len));
2246     }
2247
2248     #[bench]
2249     fn char_iterator_ascii(b: &mut Bencher) {
2250         let s = "Mary had a little lamb, Little lamb
2251         Mary had a little lamb, Little lamb
2252         Mary had a little lamb, Little lamb
2253         Mary had a little lamb, Little lamb
2254         Mary had a little lamb, Little lamb
2255         Mary had a little lamb, Little lamb";
2256         let len = s.char_len();
2257
2258         b.iter(|| assert_eq!(s.chars().len(), len));
2259     }
2260
2261     #[bench]
2262     fn char_iterator_rev(b: &mut Bencher) {
2263         let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2264         let len = s.char_len();
2265
2266         b.iter(|| assert_eq!(s.chars().rev().len(), len));
2267     }
2268
2269     #[bench]
2270     fn char_indicesator(b: &mut Bencher) {
2271         let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2272         let len = s.char_len();
2273
2274         b.iter(|| assert_eq!(s.char_indices().len(), len));
2275     }
2276
2277     #[bench]
2278     fn char_indicesator_rev(b: &mut Bencher) {
2279         let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2280         let len = s.char_len();
2281
2282         b.iter(|| assert_eq!(s.char_indices().rev().len(), len));
2283     }
2284
2285     #[bench]
2286     fn split_unicode_ascii(b: &mut Bencher) {
2287         let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2288
2289         b.iter(|| assert_eq!(s.split('V').len(), 3));
2290     }
2291
2292     #[bench]
2293     fn split_unicode_not_ascii(b: &mut Bencher) {
2294         struct NotAscii(char);
2295         impl CharEq for NotAscii {
2296             fn matches(&mut self, c: char) -> bool {
2297                 let NotAscii(cc) = *self;
2298                 cc == c
2299             }
2300             fn only_ascii(&self) -> bool { false }
2301         }
2302         let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2303
2304         b.iter(|| assert_eq!(s.split(NotAscii('V')).len(), 3));
2305     }
2306
2307
2308     #[bench]
2309     fn split_ascii(b: &mut Bencher) {
2310         let s = "Mary had a little lamb, Little lamb, little-lamb.";
2311         let len = s.split(' ').len();
2312
2313         b.iter(|| assert_eq!(s.split(' ').len(), len));
2314     }
2315
2316     #[bench]
2317     fn split_not_ascii(b: &mut Bencher) {
2318         struct NotAscii(char);
2319         impl CharEq for NotAscii {
2320             #[inline]
2321             fn matches(&mut self, c: char) -> bool {
2322                 let NotAscii(cc) = *self;
2323                 cc == c
2324             }
2325             fn only_ascii(&self) -> bool { false }
2326         }
2327         let s = "Mary had a little lamb, Little lamb, little-lamb.";
2328         let len = s.split(' ').len();
2329
2330         b.iter(|| assert_eq!(s.split(NotAscii(' ')).len(), len));
2331     }
2332
2333     #[bench]
2334     fn split_extern_fn(b: &mut Bencher) {
2335         let s = "Mary had a little lamb, Little lamb, little-lamb.";
2336         let len = s.split(' ').len();
2337         fn pred(c: char) -> bool { c == ' ' }
2338
2339         b.iter(|| assert_eq!(s.split(pred).len(), len));
2340     }
2341
2342     #[bench]
2343     fn split_closure(b: &mut Bencher) {
2344         let s = "Mary had a little lamb, Little lamb, little-lamb.";
2345         let len = s.split(' ').len();
2346
2347         b.iter(|| assert_eq!(s.split(|c: char| c == ' ').len(), len));
2348     }
2349
2350     #[bench]
2351     fn split_slice(b: &mut Bencher) {
2352         let s = "Mary had a little lamb, Little lamb, little-lamb.";
2353         let len = s.split(' ').len();
2354
2355         b.iter(|| assert_eq!(s.split(&[' ']).len(), len));
2356     }
2357
2358     #[bench]
2359     fn is_utf8_100_ascii(b: &mut Bencher) {
2360
2361         let s = bytes!("Hello there, the quick brown fox jumped over the lazy dog! \
2362                         Lorem ipsum dolor sit amet, consectetur. ");
2363
2364         assert_eq!(100, s.len());
2365         b.iter(|| {
2366             is_utf8(s)
2367         });
2368     }
2369
2370     #[bench]
2371     fn is_utf8_100_multibyte(b: &mut Bencher) {
2372         let s = bytes!("𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰");
2373         assert_eq!(100, s.len());
2374         b.iter(|| {
2375             is_utf8(s)
2376         });
2377     }
2378
2379     #[bench]
2380     fn from_utf8_lossy_100_ascii(b: &mut Bencher) {
2381         let s = bytes!("Hello there, the quick brown fox jumped over the lazy dog! \
2382                         Lorem ipsum dolor sit amet, consectetur. ");
2383
2384         assert_eq!(100, s.len());
2385         b.iter(|| {
2386             let _ = from_utf8_lossy(s);
2387         });
2388     }
2389
2390     #[bench]
2391     fn from_utf8_lossy_100_multibyte(b: &mut Bencher) {
2392         let s = bytes!("𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰");
2393         assert_eq!(100, s.len());
2394         b.iter(|| {
2395             let _ = from_utf8_lossy(s);
2396         });
2397     }
2398
2399     #[bench]
2400     fn from_utf8_lossy_invalid(b: &mut Bencher) {
2401         let s = bytes!("Hello", 0xC0, 0x80, " There", 0xE6, 0x83, " Goodbye");
2402         b.iter(|| {
2403             let _ = from_utf8_lossy(s);
2404         });
2405     }
2406
2407     #[bench]
2408     fn from_utf8_lossy_100_invalid(b: &mut Bencher) {
2409         let s = Vec::from_elem(100, 0xF5u8);
2410         b.iter(|| {
2411             let _ = from_utf8_lossy(s.as_slice());
2412         });
2413     }
2414
2415     #[bench]
2416     fn bench_connect(b: &mut Bencher) {
2417         let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2418         let sep = "→";
2419         let v = [s, s, s, s, s, s, s, s, s, s];
2420         b.iter(|| {
2421             assert_eq!(v.connect(sep).len(), s.len() * 10 + sep.len() * 9);
2422         })
2423     }
2424
2425     #[bench]
2426     fn bench_contains_short_short(b: &mut Bencher) {
2427         let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2428         let needle = "sit";
2429
2430         b.iter(|| {
2431             assert!(haystack.contains(needle));
2432         })
2433     }
2434
2435     #[bench]
2436     fn bench_contains_short_long(b: &mut Bencher) {
2437         let haystack = "\
2438 Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \
2439 ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \
2440 eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \
2441 sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \
2442 tempus vel, gravida nec quam.
2443
2444 In est dui, tincidunt sed tempus interdum, adipiscing laoreet ante. Etiam tempor, tellus quis \
2445 sagittis interdum, nulla purus mattis sem, quis auctor erat odio ac tellus. In nec nunc sit amet \
2446 diam volutpat molestie at sed ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan \
2447 lorem ac dignissim placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat. In vel \
2448 eleifend felis. Sed suscipit nulla lorem, sed mollis est sollicitudin et. Nam fermentum egestas \
2449 interdum. Curabitur ut nisi justo.
2450
2451 Sed sollicitudin ipsum tellus, ut condimentum leo eleifend nec. Cras ut velit ante. Phasellus nec \
2452 mollis odio. Mauris molestie erat in arcu mattis, at aliquet dolor vehicula. Quisque malesuada \
2453 lectus sit amet nisi pretium, a condimentum ipsum porta. Morbi at dapibus diam. Praesent egestas \
2454 est sed risus elementum, eu rutrum metus ultrices. Etiam fermentum consectetur magna, id rutrum \
2455 felis accumsan a. Aliquam ut pellentesque libero. Sed mi nulla, lobortis eu tortor id, suscipit \
2456 ultricies neque. Morbi iaculis sit amet risus at iaculis. Praesent eget ligula quis turpis \
2457 feugiat suscipit vel non arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. \
2458 Aliquam sit amet placerat lorem.
2459
2460 Cras a lacus vel ante posuere elementum. Nunc est leo, bibendum ut facilisis vel, bibendum at \
2461 mauris. Nullam adipiscing diam vel odio ornare, luctus adipiscing mi luctus. Nulla facilisi. \
2462 Mauris adipiscing bibendum neque, quis adipiscing lectus tempus et. Sed feugiat erat et nisl \
2463 lobortis pharetra. Donec vitae erat enim. Nullam sit amet felis et quam lacinia tincidunt. Aliquam \
2464 suscipit dapibus urna. Sed volutpat urna in magna pulvinar volutpat. Phasellus nec tellus ac diam \
2465 cursus accumsan.
2466
2467 Nam lectus enim, dapibus non nisi tempor, consectetur convallis massa. Maecenas eleifend dictum \
2468 feugiat. Etiam quis mauris vel risus luctus mattis a a nunc. Nullam orci quam, imperdiet id \
2469 vehicula in, porttitor ut nibh. Duis sagittis adipiscing nisl vitae congue. Donec mollis risus eu \
2470 leo suscipit, varius porttitor nulla porta. Pellentesque ut sem nec nisi euismod vehicula. Nulla \
2471 malesuada sollicitudin quam eu fermentum.";
2472         let needle = "english";
2473
2474         b.iter(|| {
2475             assert!(!haystack.contains(needle));
2476         })
2477     }
2478
2479     #[bench]
2480     fn bench_contains_bad_naive(b: &mut Bencher) {
2481         let haystack = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
2482         let needle = "aaaaaaaab";
2483
2484         b.iter(|| {
2485             assert!(!haystack.contains(needle));
2486         })
2487     }
2488
2489     #[bench]
2490     fn bench_contains_equal(b: &mut Bencher) {
2491         let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2492         let needle = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2493
2494         b.iter(|| {
2495             assert!(haystack.contains(needle));
2496         })
2497     }
2498 }