src/libstd/str.rs

   1 // Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
   2 // file at the top-level directory of this distribution and at
   3 // http://rust-lang.org/COPYRIGHT.
   4 //
   5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
   6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
   7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
   8 // option. This file may not be copied, modified, or distributed
   9 // except according to those terms.
  10
  11 /*!
  12  * String manipulation
  13  *
  14  * Strings are a packed UTF-8 representation of text, stored as null
  15  * terminated buffers of u8 bytes.  Strings should be indexed in bytes,
  16  * for efficiency, but UTF-8 unsafe operations should be avoided.
  17  */
  18
  19 use at_vec;
  20 use cast::transmute;
  21 use cast;
  22 use char;
  23 use char::Char;
  24 use clone::Clone;
  25 use container::{Container, Mutable};
  26 use iter::Times;
  27 use iterator::{Iterator, IteratorUtil, FilterIterator, AdditiveIterator, MapIterator};
  28 use libc;
  29 use num::Zero;
  30 use option::{None, Option, Some};
  31 use ptr;
  32 use ptr::RawPtr;
  33 use to_str::ToStr;
  34 use uint;
  35 use vec;
  36 use vec::{OwnedVector, OwnedCopyableVector, ImmutableVector};
  37
  38 /*
  39 Section: Conditions
  40 */
  41 condition! {
  42     not_utf8: (~str) -> ~str;
  43 }
  44
  45 /*
  46 Section: Creating a string
  47 */
  48
  49 /**
  50  * Convert a vector of bytes to a new UTF-8 string
  51  *
  52  * # Failure
  53  *
  54  * Raises the `not_utf8` condition if invalid UTF-8
  55  */
  56 pub fn from_bytes(vv: &[u8]) -> ~str {
  57     use str::not_utf8::cond;
  58
  59     if !is_utf8(vv) {
  60         let first_bad_byte = *vv.iter().find_(|&b| !is_utf8([*b])).get();
  61         cond.raise(fmt!("from_bytes: input is not UTF-8; first bad byte is %u",
  62                         first_bad_byte as uint))
  63     }
  64     else {
  65         return unsafe { raw::from_bytes(vv) }
  66     }
  67 }
  68
  69 /**
  70  * Consumes a vector of bytes to create a new utf-8 string
  71  *
  72  * # Failure
  73  *
  74  * Raises the `not_utf8` condition if invalid UTF-8
  75  */
  76 pub fn from_bytes_owned(vv: ~[u8]) -> ~str {
  77     use str::not_utf8::cond;
  78
  79     if !is_utf8(vv) {
  80         let first_bad_byte = *vv.iter().find_(|&b| !is_utf8([*b])).get();
  81         cond.raise(fmt!("from_bytes: input is not UTF-8; first bad byte is %u",
  82                         first_bad_byte as uint))
  83     } else {
  84         return unsafe { raw::from_bytes_owned(vv) }
  85     }
  86 }
  87
  88 /**
  89  * Convert a vector of bytes to a UTF-8 string.
  90  * The vector needs to be one byte longer than the string, and end with a 0 byte.
  91  *
  92  * Compared to `from_bytes()`, this fn doesn't need to allocate a new owned str.
  93  *
  94  * # Failure
  95  *
  96  * Fails if invalid UTF-8
  97  * Fails if not null terminated
  98  */
  99 pub fn from_bytes_with_null<'a>(vv: &'a [u8]) -> &'a str {
 100     assert_eq!(vv[vv.len() - 1], 0);
 101     assert!(is_utf8(vv));
 102     return unsafe { raw::from_bytes_with_null(vv) };
 103 }
 104
 105 /**
 106  * Converts a vector to a string slice without performing any allocations.
 107  *
 108  * Once the slice has been validated as utf-8, it is transmuted in-place and
 109  * returned as a '&str' instead of a '&[u8]'
 110  *
 111  * # Failure
 112  *
 113  * Fails if invalid UTF-8
 114  */
 115 pub fn from_bytes_slice<'a>(vector: &'a [u8]) -> &'a str {
 116     unsafe {
 117         assert!(is_utf8(vector));
 118         let (ptr, len): (*u8, uint) = ::cast::transmute(vector);
 119         let string: &'a str = ::cast::transmute((ptr, len + 1));
 120         string
 121     }
 122 }
 123
 124 /// Copy a slice into a new unique str
 125 #[inline]
 126 pub fn to_owned(s: &str) -> ~str {
 127     unsafe { raw::slice_bytes_owned(s, 0, s.len()) }
 128 }
 129
 130 impl ToStr for ~str {
 131     #[inline]
 132     fn to_str(&self) -> ~str { to_owned(*self) }
 133 }
 134 impl<'self> ToStr for &'self str {
 135     #[inline]
 136     fn to_str(&self) -> ~str { to_owned(*self) }
 137 }
 138 impl ToStr for @str {
 139     #[inline]
 140     fn to_str(&self) -> ~str { to_owned(*self) }
 141 }
 142
 143 /**
 144  * Convert a byte to a UTF-8 string
 145  *
 146  * # Failure
 147  *
 148  * Fails if invalid UTF-8
 149  */
 150 pub fn from_byte(b: u8) -> ~str {
 151     assert!(b < 128u8);
 152     unsafe { ::cast::transmute(~[b, 0u8]) }
 153 }
 154
 155 /// Convert a char to a string
 156 pub fn from_char(ch: char) -> ~str {
 157     let mut buf = ~"";
 158     buf.push_char(ch);
 159     buf
 160 }
 161
 162 /// Convert a vector of chars to a string
 163 pub fn from_chars(chs: &[char]) -> ~str {
 164     let mut buf = ~"";
 165     buf.reserve(chs.len());
 166     for chs.iter().advance |ch| {
 167         buf.push_char(*ch)
 168     }
 169     buf
 170 }
 171
 172 #[doc(hidden)]
 173 pub fn push_str(lhs: &mut ~str, rhs: &str) {
 174     lhs.push_str(rhs)
 175 }
 176
 177 #[allow(missing_doc)]
 178 pub trait StrVector {
 179     pub fn concat(&self) -> ~str;
 180     pub fn connect(&self, sep: &str) -> ~str;
 181 }
 182
 183 impl<'self, S: Str> StrVector for &'self [S] {
 184     /// Concatenate a vector of strings.
 185     pub fn concat(&self) -> ~str {
 186         if self.is_empty() { return ~""; }
 187
 188         let len = self.iter().transform(|s| s.as_slice().len()).sum();
 189
 190         let mut s = ~"";
 191
 192         s.reserve(len);
 193
 194         unsafe {
 195             do as_buf(s) |buf, _| {
 196                 let mut buf = ::cast::transmute_mut_unsafe(buf);
 197                 for self.iter().advance |ss| {
 198                     do as_buf(ss.as_slice()) |ssbuf, sslen| {
 199                         let sslen = sslen - 1;
 200                         ptr::copy_memory(buf, ssbuf, sslen);
 201                         buf = buf.offset(sslen);
 202                     }
 203                 }
 204             }
 205             raw::set_len(&mut s, len);
 206         }
 207         s
 208     }
 209
 210     /// Concatenate a vector of strings, placing a given separator between each.
 211     pub fn connect(&self, sep: &str) -> ~str {
 212         if self.is_empty() { return ~""; }
 213
 214         // concat is faster
 215         if sep.is_empty() { return self.concat(); }
 216
 217         // this is wrong without the guarantee that `self` is non-empty
 218         let len = sep.len() * (self.len() - 1)
 219             + self.iter().transform(|s| s.as_slice().len()).sum();
 220         let mut s = ~"";
 221         let mut first = true;
 222
 223         s.reserve(len);
 224
 225         unsafe {
 226             do as_buf(s) |buf, _| {
 227                 do as_buf(sep) |sepbuf, seplen| {
 228                     let seplen = seplen - 1;
 229                     let mut buf = ::cast::transmute_mut_unsafe(buf);
 230                     for self.iter().advance |ss| {
 231                         do as_buf(ss.as_slice()) |ssbuf, sslen| {
 232                             let sslen = sslen - 1;
 233                             if first {
 234                                 first = false;
 235                             } else {
 236                                 ptr::copy_memory(buf, sepbuf, seplen);
 237                                 buf = buf.offset(seplen);
 238                             }
 239                             ptr::copy_memory(buf, ssbuf, sslen);
 240                             buf = buf.offset(sslen);
 241                         }
 242                     }
 243                 }
 244             }
 245             raw::set_len(&mut s, len);
 246         }
 247         s
 248     }
 249 }
 250
 251 /// Something that can be used to compare against a character
 252 pub trait CharEq {
 253     /// Determine if the splitter should split at the given character
 254     fn matches(&self, char) -> bool;
 255     /// Indicate if this is only concerned about ASCII characters,
 256     /// which can allow for a faster implementation.
 257     fn only_ascii(&self) -> bool;
 258 }
 259 impl CharEq for char {
 260     #[inline]
 261     fn matches(&self, c: char) -> bool { *self == c }
 262
 263     fn only_ascii(&self) -> bool { (*self as uint) < 128 }
 264 }
 265 impl<'self> CharEq for &'self fn(char) -> bool {
 266     #[inline]
 267     fn matches(&self, c: char) -> bool { (*self)(c) }
 268
 269     fn only_ascii(&self) -> bool { false }
 270 }
 271 impl CharEq for extern "Rust" fn(char) -> bool {
 272     #[inline]
 273     fn matches(&self, c: char) -> bool { (*self)(c) }
 274
 275     fn only_ascii(&self) -> bool { false }
 276 }
 277
 278 impl<'self, C: CharEq> CharEq for &'self [C] {
 279     #[inline]
 280     fn matches(&self, c: char) -> bool {
 281         self.iter().any(|m| m.matches(c))
 282     }
 283
 284     fn only_ascii(&self) -> bool {
 285         self.iter().all(|m| m.only_ascii())
 286     }
 287 }
 288
 289
 290 /// An iterator over the substrings of a string, separated by `sep`.
 291 #[deriving(Clone)]
 292 pub struct StrCharSplitIterator<'self,Sep> {
 293     priv string: &'self str,
 294     priv position: uint,
 295     priv sep: Sep,
 296     /// The number of splits remaining
 297     priv count: uint,
 298     /// Whether an empty string at the end is allowed
 299     priv allow_trailing_empty: bool,
 300     priv finished: bool,
 301     priv only_ascii: bool
 302 }
 303
 304 /// An iterator over the words of a string, separated by an sequence of whitespace
 305 pub type WordIterator<'self> =
 306     FilterIterator<'self, &'self str,
 307              StrCharSplitIterator<'self, extern "Rust" fn(char) -> bool>>;
 308
 309 /// An iterator over the lines of a string, separated by either `\n` or (`\r\n`).
 310 pub type AnyLineIterator<'self> =
 311     MapIterator<'self, &'self str, &'self str, StrCharSplitIterator<'self, char>>;
 312
 313 impl<'self, Sep: CharEq> Iterator<&'self str> for StrCharSplitIterator<'self, Sep> {
 314     #[inline]
 315     fn next(&mut self) -> Option<&'self str> {
 316         if self.finished { return None }
 317
 318         let l = self.string.len();
 319         let start = self.position;
 320
 321         if self.only_ascii {
 322             // this gives a *huge* speed up for splitting on ASCII
 323             // characters (e.g. '\n' or ' ')
 324             while self.position < l && self.count > 0 {
 325                 let byte = self.string[self.position];
 326
 327                 if self.sep.matches(byte as char) {
 328                     let slice = unsafe { raw::slice_bytes(self.string, start, self.position) };
 329                     self.position += 1;
 330                     self.count -= 1;
 331                     return Some(slice);
 332                 }
 333                 self.position += 1;
 334             }
 335         } else {
 336             while self.position < l && self.count > 0 {
 337                 let CharRange {ch, next} = self.string.char_range_at(self.position);
 338
 339                 if self.sep.matches(ch) {
 340                     let slice = unsafe { raw::slice_bytes(self.string, start, self.position) };
 341                     self.position = next;
 342                     self.count -= 1;
 343                     return Some(slice);
 344                 }
 345                 self.position = next;
 346             }
 347         }
 348         self.finished = true;
 349         if self.allow_trailing_empty || start < l {
 350             Some(unsafe { raw::slice_bytes(self.string, start, l) })
 351         } else {
 352             None
 353         }
 354     }
 355 }
 356
 357 /// An iterator over the start and end indicies of the matches of a
 358 /// substring within a larger string
 359 #[deriving(Clone)]
 360 pub struct StrMatchesIndexIterator<'self> {
 361     priv haystack: &'self str,
 362     priv needle: &'self str,
 363     priv position: uint,
 364 }
 365
 366 /// An iterator over the substrings of a string separated by a given
 367 /// search string
 368 #[deriving(Clone)]
 369 pub struct StrStrSplitIterator<'self> {
 370     priv it: StrMatchesIndexIterator<'self>,
 371     priv last_end: uint,
 372     priv finished: bool
 373 }
 374
 375 impl<'self> Iterator<(uint, uint)> for StrMatchesIndexIterator<'self> {
 376     #[inline]
 377     fn next(&mut self) -> Option<(uint, uint)> {
 378         // See Issue #1932 for why this is a naive search
 379         let (h_len, n_len) = (self.haystack.len(), self.needle.len());
 380         let mut match_start = 0;
 381         let mut match_i = 0;
 382
 383         while self.position < h_len {
 384             if self.haystack[self.position] == self.needle[match_i] {
 385                 if match_i == 0 { match_start = self.position; }
 386                 match_i += 1;
 387                 self.position += 1;
 388
 389                 if match_i == n_len {
 390                     // found a match!
 391                     return Some((match_start, self.position));
 392                 }
 393             } else {
 394                 // failed match, backtrack
 395                 if match_i > 0 {
 396                     match_i = 0;
 397                     self.position = match_start;
 398                 }
 399                 self.position += 1;
 400             }
 401         }
 402         None
 403     }
 404 }
 405
 406 impl<'self> Iterator<&'self str> for StrStrSplitIterator<'self> {
 407     #[inline]
 408     fn next(&mut self) -> Option<&'self str> {
 409         if self.finished { return None; }
 410
 411         match self.it.next() {
 412             Some((from, to)) => {
 413                 let ret = Some(self.it.haystack.slice(self.last_end, from));
 414                 self.last_end = to;
 415                 ret
 416             }
 417             None => {
 418                 self.finished = true;
 419                 Some(self.it.haystack.slice(self.last_end, self.it.haystack.len()))
 420             }
 421         }
 422     }
 423 }
 424
 425 /** Splits a string into substrings with possibly internal whitespace,
 426  *  each of them at most `lim` bytes long. The substrings have leading and trailing
 427  *  whitespace removed, and are only cut at whitespace boundaries.
 428  *
 429  *  #Failure:
 430  *
 431  *  Fails during iteration if the string contains a non-whitespace
 432  *  sequence longer than the limit.
 433  */
 434 pub fn each_split_within<'a>(ss: &'a str,
 435                               lim: uint,
 436                               it: &fn(&'a str) -> bool) -> bool {
 437     // Just for fun, let's write this as an state machine:
 438
 439     enum SplitWithinState {
 440         A,  // leading whitespace, initial state
 441         B,  // words
 442         C,  // internal and trailing whitespace
 443     }
 444     enum Whitespace {
 445         Ws, // current char is whitespace
 446         Cr  // current char is not whitespace
 447     }
 448     enum LengthLimit {
 449         UnderLim, // current char makes current substring still fit in limit
 450         OverLim   // current char makes current substring no longer fit in limit
 451     }
 452
 453     let mut slice_start = 0;
 454     let mut last_start = 0;
 455     let mut last_end = 0;
 456     let mut state = A;
 457     let mut fake_i = ss.len();
 458     let mut lim = lim;
 459
 460     let mut cont = true;
 461     let slice: &fn() = || { cont = it(ss.slice(slice_start, last_end)) };
 462
 463     // if the limit is larger than the string, lower it to save cycles
 464     if (lim >= fake_i) {
 465         lim = fake_i;
 466     }
 467
 468     let machine: &fn((uint, char)) -> bool = |(i, c)| {
 469         let whitespace = if char::is_whitespace(c)       { Ws }       else { Cr };
 470         let limit      = if (i - slice_start + 1) <= lim { UnderLim } else { OverLim };
 471
 472         state = match (state, whitespace, limit) {
 473             (A, Ws, _)        => { A }
 474             (A, Cr, _)        => { slice_start = i; last_start = i; B }
 475
 476             (B, Cr, UnderLim) => { B }
 477             (B, Cr, OverLim)  if (i - last_start + 1) > lim
 478                               => fail!("word starting with %? longer than limit!",
 479                                        ss.slice(last_start, i + 1)),
 480             (B, Cr, OverLim)  => { slice(); slice_start = last_start; B }
 481             (B, Ws, UnderLim) => { last_end = i; C }
 482             (B, Ws, OverLim)  => { last_end = i; slice(); A }
 483
 484             (C, Cr, UnderLim) => { last_start = i; B }
 485             (C, Cr, OverLim)  => { slice(); slice_start = i; last_start = i; last_end = i; B }
 486             (C, Ws, OverLim)  => { slice(); A }
 487             (C, Ws, UnderLim) => { C }
 488         };
 489
 490         cont
 491     };
 492
 493     ss.iter().enumerate().advance(|x| machine(x));
 494
 495     // Let the automaton 'run out' by supplying trailing whitespace
 496     while cont && match state { B | C => true, A => false } {
 497         machine((fake_i, ' '));
 498         fake_i += 1;
 499     }
 500     return cont;
 501 }
 502
 503 /**
 504  * Replace all occurrences of one string with another
 505  *
 506  * # Arguments
 507  *
 508  * * s - The string containing substrings to replace
 509  * * from - The string to replace
 510  * * to - The replacement string
 511  *
 512  * # Return value
 513  *
 514  * The original string with all occurances of `from` replaced with `to`
 515  */
 516 pub fn replace(s: &str, from: &str, to: &str) -> ~str {
 517     let mut result = ~"";
 518     let mut last_end = 0;
 519     for s.matches_index_iter(from).advance |(start, end)| {
 520         result.push_str(unsafe{raw::slice_bytes(s, last_end, start)});
 521         result.push_str(to);
 522         last_end = end;
 523     }
 524     result.push_str(unsafe{raw::slice_bytes(s, last_end, s.len())});
 525     result
 526 }
 527
 528 /*
 529 Section: Comparing strings
 530 */
 531
 532 /// Bytewise slice equality
 533 #[cfg(not(test))]
 534 #[lang="str_eq"]
 535 #[inline]
 536 pub fn eq_slice(a: &str, b: &str) -> bool {
 537     do as_buf(a) |ap, alen| {
 538         do as_buf(b) |bp, blen| {
 539             if (alen != blen) { false }
 540             else {
 541                 unsafe {
 542                     libc::memcmp(ap as *libc::c_void,
 543                                  bp as *libc::c_void,
 544                                  (alen - 1) as libc::size_t) == 0
 545                 }
 546             }
 547         }
 548     }
 549 }
 550
 551 #[cfg(test)]
 552 #[inline]
 553 pub fn eq_slice(a: &str, b: &str) -> bool {
 554     do as_buf(a) |ap, alen| {
 555         do as_buf(b) |bp, blen| {
 556             if (alen != blen) { false }
 557             else {
 558                 unsafe {
 559                     libc::memcmp(ap as *libc::c_void,
 560                                  bp as *libc::c_void,
 561                                  (alen - 1) as libc::size_t) == 0
 562                 }
 563             }
 564         }
 565     }
 566 }
 567
 568 /// Bytewise string equality
 569 #[cfg(not(test))]
 570 #[lang="uniq_str_eq"]
 571 #[inline]
 572 pub fn eq(a: &~str, b: &~str) -> bool {
 573     eq_slice(*a, *b)
 574 }
 575
 576 #[cfg(test)]
 577 #[inline]
 578 pub fn eq(a: &~str, b: &~str) -> bool {
 579     eq_slice(*a, *b)
 580 }
 581
 582 /*
 583 Section: Searching
 584 */
 585
 586 // Utility used by various searching functions
 587 fn match_at<'a,'b>(haystack: &'a str, needle: &'b str, at: uint) -> bool {
 588     let mut i = at;
 589     for needle.bytes_iter().advance |c| { if haystack[i] != c { return false; } i += 1u; }
 590     return true;
 591 }
 592
 593 /*
 594 Section: Misc
 595 */
 596
 597 /// Determines if a vector of bytes contains valid UTF-8
 598 pub fn is_utf8(v: &[u8]) -> bool {
 599     let mut i = 0u;
 600     let total = v.len();
 601     while i < total {
 602         if v[i] < 128u8 {
 603             i += 1u;
 604         } else {
 605             let w = utf8_char_width(v[i]);
 606             if w == 0u { return false; }
 607
 608             let nexti = i + w;
 609             if nexti > total { return false; }
 610
 611             if v[i + 1] & 192u8 != TAG_CONT_U8 { return false; }
 612             if w > 2 {
 613                 if v[i + 2] & 192u8 != TAG_CONT_U8 { return false; }
 614                 if w > 3 && (v[i + 3] & 192u8 != TAG_CONT_U8) { return false; }
 615             }
 616
 617             i = nexti;
 618         }
 619     }
 620     true
 621 }
 622
 623 /// Determines if a vector of `u16` contains valid UTF-16
 624 pub fn is_utf16(v: &[u16]) -> bool {
 625     let len = v.len();
 626     let mut i = 0u;
 627     while (i < len) {
 628         let u = v[i];
 629
 630         if  u <= 0xD7FF_u16 || u >= 0xE000_u16 {
 631             i += 1u;
 632
 633         } else {
 634             if i+1u < len { return false; }
 635             let u2 = v[i+1u];
 636             if u < 0xD7FF_u16 || u > 0xDBFF_u16 { return false; }
 637             if u2 < 0xDC00_u16 || u2 > 0xDFFF_u16 { return false; }
 638             i += 2u;
 639         }
 640     }
 641     return true;
 642 }
 643
 644 /// Iterates over the utf-16 characters in the specified slice, yielding each
 645 /// decoded unicode character to the function provided.
 646 ///
 647 /// # Failures
 648 ///
 649 /// * Fails on invalid utf-16 data
 650 pub fn utf16_chars(v: &[u16], f: &fn(char)) {
 651     let len = v.len();
 652     let mut i = 0u;
 653     while (i < len && v[i] != 0u16) {
 654         let u = v[i];
 655
 656         if  u <= 0xD7FF_u16 || u >= 0xE000_u16 {
 657             f(u as char);
 658             i += 1u;
 659
 660         } else {
 661             let u2 = v[i+1u];
 662             assert!(u >= 0xD800_u16 && u <= 0xDBFF_u16);
 663             assert!(u2 >= 0xDC00_u16 && u2 <= 0xDFFF_u16);
 664             let mut c = (u - 0xD800_u16) as char;
 665             c = c << 10;
 666             c |= (u2 - 0xDC00_u16) as char;
 667             c |= 0x1_0000_u32 as char;
 668             f(c);
 669             i += 2u;
 670         }
 671     }
 672 }
 673
 674 /**
 675  * Allocates a new string from the utf-16 slice provided
 676  */
 677 pub fn from_utf16(v: &[u16]) -> ~str {
 678     let mut buf = ~"";
 679     buf.reserve(v.len());
 680     utf16_chars(v, |ch| buf.push_char(ch));
 681     buf
 682 }
 683
 684 /**
 685  * Allocates a new string with the specified capacity. The string returned is
 686  * the empty string, but has capacity for much more.
 687  */
 688 pub fn with_capacity(capacity: uint) -> ~str {
 689     let mut buf = ~"";
 690     buf.reserve(capacity);
 691     buf
 692 }
 693
 694 /**
 695  * As char_len but for a slice of a string
 696  *
 697  * # Arguments
 698  *
 699  * * s - A valid string
 700  * * start - The position inside `s` where to start counting in bytes
 701  * * end - The position where to stop counting
 702  *
 703  * # Return value
 704  *
 705  * The number of Unicode characters in `s` between the given indices.
 706  */
 707 pub fn count_chars(s: &str, start: uint, end: uint) -> uint {
 708     assert!(s.is_char_boundary(start));
 709     assert!(s.is_char_boundary(end));
 710     let mut i = start;
 711     let mut len = 0u;
 712     while i < end {
 713         let next = s.char_range_at(i).next;
 714         len += 1u;
 715         i = next;
 716     }
 717     return len;
 718 }
 719
 720 /// Counts the number of bytes taken by the first `n` chars in `s`
 721 /// starting from `start`.
 722 pub fn count_bytes<'b>(s: &'b str, start: uint, n: uint) -> uint {
 723     assert!(s.is_char_boundary(start));
 724     let mut end = start;
 725     let mut cnt = n;
 726     let l = s.len();
 727     while cnt > 0u {
 728         assert!(end < l);
 729         let next = s.char_range_at(end).next;
 730         cnt -= 1u;
 731         end = next;
 732     }
 733     end - start
 734 }
 735
 736 // https://tools.ietf.org/html/rfc3629
 737 static UTF8_CHAR_WIDTH: [u8, ..256] = [
 738 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 739 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x1F
 740 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 741 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x3F
 742 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 743 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x5F
 744 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 745 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x7F
 746 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 747 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x9F
 748 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 749 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xBF
 750 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
 751 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xDF
 752 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, // 0xEF
 753 4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // 0xFF
 754 ];
 755
 756 /// Given a first byte, determine how many bytes are in this UTF-8 character
 757 pub fn utf8_char_width(b: u8) -> uint {
 758     return UTF8_CHAR_WIDTH[b] as uint;
 759 }
 760
 761 #[allow(missing_doc)]
 762 pub struct CharRange {
 763     ch: char,
 764     next: uint
 765 }
 766
 767 // UTF-8 tags and ranges
 768 static TAG_CONT_U8: u8 = 128u8;
 769 static TAG_CONT: uint = 128u;
 770 static MAX_ONE_B: uint = 128u;
 771 static TAG_TWO_B: uint = 192u;
 772 static MAX_TWO_B: uint = 2048u;
 773 static TAG_THREE_B: uint = 224u;
 774 static MAX_THREE_B: uint = 65536u;
 775 static TAG_FOUR_B: uint = 240u;
 776
 777 /**
 778  * A dummy trait to hold all the utility methods that we implement on strings.
 779  */
 780 pub trait StrUtil {
 781     /**
 782      * Work with the byte buffer of a string as a null-terminated C string.
 783      *
 784      * Allows for unsafe manipulation of strings, which is useful for foreign
 785      * interop. This is similar to `str::as_buf`, but guarantees null-termination.
 786      * If the given slice is not already null-terminated, this function will
 787      * allocate a temporary, copy the slice, null terminate it, and pass
 788      * that instead.
 789      *
 790      * # Example
 791      *
 792      * ~~~ {.rust}
 793      * let s = "PATH".as_c_str(|path| libc::getenv(path));
 794      * ~~~
 795      */
 796     fn as_c_str<T>(self, f: &fn(*libc::c_char) -> T) -> T;
 797 }
 798
 799 impl<'self> StrUtil for &'self str {
 800     #[inline]
 801     fn as_c_str<T>(self, f: &fn(*libc::c_char) -> T) -> T {
 802         do as_buf(self) |buf, len| {
 803             // NB: len includes the trailing null.
 804             assert!(len > 0);
 805             if unsafe { *(ptr::offset(buf,len-1)) != 0 } {
 806                 to_owned(self).as_c_str(|s| f(s))
 807             } else {
 808                 f(buf as *libc::c_char)
 809             }
 810         }
 811     }
 812 }
 813
 814 /**
 815  * Deprecated. Use the `as_c_str` method on strings instead.
 816  */
 817 #[inline]
 818 pub fn as_c_str<T>(s: &str, f: &fn(*libc::c_char) -> T) -> T {
 819     s.as_c_str(f)
 820 }
 821
 822 /**
 823  * Work with the byte buffer and length of a slice.
 824  *
 825  * The given length is one byte longer than the 'official' indexable
 826  * length of the string. This is to permit probing the byte past the
 827  * indexable area for a null byte, as is the case in slices pointing
 828  * to full strings, or suffixes of them.
 829  */
 830 #[inline]
 831 pub fn as_buf<T>(s: &str, f: &fn(*u8, uint) -> T) -> T {
 832     unsafe {
 833         let v : *(*u8,uint) = transmute(&s);
 834         let (buf,len) = *v;
 835         f(buf, len)
 836     }
 837 }
 838
 839 /// Unsafe operations
 840 pub mod raw {
 841     use cast;
 842     use libc;
 843     use ptr;
 844     use str::raw;
 845     use str::{as_buf, is_utf8};
 846     use vec;
 847     use vec::MutableVector;
 848
 849     /// Create a Rust string from a null-terminated *u8 buffer
 850     pub unsafe fn from_buf(buf: *u8) -> ~str {
 851         let mut curr = buf;
 852         let mut i = 0u;
 853         while *curr != 0u8 {
 854             i += 1u;
 855             curr = ptr::offset(buf, i);
 856         }
 857         return from_buf_len(buf, i);
 858     }
 859
 860     /// Create a Rust string from a *u8 buffer of the given length
 861     pub unsafe fn from_buf_len(buf: *u8, len: uint) -> ~str {
 862         let mut v: ~[u8] = vec::with_capacity(len + 1);
 863         v.as_mut_buf(|vbuf, _len| {
 864             ptr::copy_memory(vbuf, buf as *u8, len)
 865         });
 866         vec::raw::set_len(&mut v, len);
 867         v.push(0u8);
 868
 869         assert!(is_utf8(v));
 870         return ::cast::transmute(v);
 871     }
 872
 873     /// Create a Rust string from a null-terminated C string
 874     pub unsafe fn from_c_str(c_str: *libc::c_char) -> ~str {
 875         from_buf(::cast::transmute(c_str))
 876     }
 877
 878     /// Create a Rust string from a `*c_char` buffer of the given length
 879     pub unsafe fn from_c_str_len(c_str: *libc::c_char, len: uint) -> ~str {
 880         from_buf_len(::cast::transmute(c_str), len)
 881     }
 882
 883     /// Converts a vector of bytes to a new owned string.
 884     pub unsafe fn from_bytes(v: &[u8]) -> ~str {
 885         do v.as_imm_buf |buf, len| {
 886             from_buf_len(buf, len)
 887         }
 888     }
 889
 890     /// Converts an owned vector of bytes to a new owned string. This assumes
 891     /// that the utf-8-ness of the vector has already been validated
 892     pub unsafe fn from_bytes_owned(mut v: ~[u8]) -> ~str {
 893         v.push(0u8);
 894         cast::transmute(v)
 895     }
 896
 897     /// Converts a vector of bytes to a string.
 898     /// The byte slice needs to contain valid utf8 and needs to be one byte longer than
 899     /// the string, if possible ending in a 0 byte.
 900     pub unsafe fn from_bytes_with_null<'a>(v: &'a [u8]) -> &'a str {
 901         cast::transmute(v)
 902     }
 903
 904     /// Converts a byte to a string.
 905     pub unsafe fn from_byte(u: u8) -> ~str { raw::from_bytes([u]) }
 906
 907     /// Form a slice from a C string. Unsafe because the caller must ensure the
 908     /// C string has the static lifetime, or else the return value may be
 909     /// invalidated later.
 910     pub unsafe fn c_str_to_static_slice(s: *libc::c_char) -> &'static str {
 911         let s = s as *u8;
 912         let mut curr = s;
 913         let mut len = 0u;
 914         while *curr != 0u8 {
 915             len += 1u;
 916             curr = ptr::offset(s, len);
 917         }
 918         let v = (s, len + 1);
 919         assert!(is_utf8(::cast::transmute(v)));
 920         ::cast::transmute(v)
 921     }
 922
 923     /**
 924      * Takes a bytewise (not UTF-8) slice from a string.
 925      *
 926      * Returns the substring from [`begin`..`end`).
 927      *
 928      * # Failure
 929      *
 930      * If begin is greater than end.
 931      * If end is greater than the length of the string.
 932      */
 933     pub unsafe fn slice_bytes_owned(s: &str, begin: uint, end: uint) -> ~str {
 934         do as_buf(s) |sbuf, n| {
 935             assert!((begin <= end));
 936             assert!((end <= n));
 937
 938             let mut v = vec::with_capacity(end - begin + 1u);
 939             do v.as_imm_buf |vbuf, _vlen| {
 940                 let vbuf = ::cast::transmute_mut_unsafe(vbuf);
 941                 let src = ptr::offset(sbuf, begin);
 942                 ptr::copy_memory(vbuf, src, end - begin);
 943             }
 944             vec::raw::set_len(&mut v, end - begin);
 945             v.push(0u8);
 946             ::cast::transmute(v)
 947         }
 948     }
 949
 950     /**
 951      * Takes a bytewise (not UTF-8) slice from a string.
 952      *
 953      * Returns the substring from [`begin`..`end`).
 954      *
 955      * # Failure
 956      *
 957      * If begin is greater than end.
 958      * If end is greater than the length of the string.
 959      */
 960     #[inline]
 961     pub unsafe fn slice_bytes(s: &str, begin: uint, end: uint) -> &str {
 962         do as_buf(s) |sbuf, n| {
 963              assert!((begin <= end));
 964              assert!((end <= n));
 965
 966              let tuple = (ptr::offset(sbuf, begin), end - begin + 1);
 967              ::cast::transmute(tuple)
 968         }
 969     }
 970
 971     /// Appends a byte to a string. (Not UTF-8 safe).
 972     pub unsafe fn push_byte(s: &mut ~str, b: u8) {
 973         let new_len = s.len() + 1;
 974         s.reserve_at_least(new_len);
 975         do as_buf(*s) |buf, len| {
 976             let buf: *mut u8 = ::cast::transmute(buf);
 977             *ptr::mut_offset(buf, len) = b;
 978         }
 979         set_len(&mut *s, new_len);
 980     }
 981
 982     /// Appends a vector of bytes to a string. (Not UTF-8 safe).
 983     unsafe fn push_bytes(s: &mut ~str, bytes: &[u8]) {
 984         let new_len = s.len() + bytes.len();
 985         s.reserve_at_least(new_len);
 986         for bytes.iter().advance |byte| { push_byte(&mut *s, *byte); }
 987     }
 988
 989     /// Removes the last byte from a string and returns it. (Not UTF-8 safe).
 990     pub unsafe fn pop_byte(s: &mut ~str) -> u8 {
 991         let len = s.len();
 992         assert!((len > 0u));
 993         let b = s[len - 1u];
 994         set_len(s, len - 1u);
 995         return b;
 996     }
 997
 998     /// Removes the first byte from a string and returns it. (Not UTF-8 safe).
 999     pub unsafe fn shift_byte(s: &mut ~str) -> u8 {
1000         let len = s.len();
1001         assert!((len > 0u));
1002         let b = s[0];
1003         *s = raw::slice_bytes_owned(*s, 1u, len);
1004         return b;
1005     }
1006
1007     /// Sets the length of the string and adds the null terminator
1008     #[inline]
1009     pub unsafe fn set_len(v: &mut ~str, new_len: uint) {
1010         let v: **mut vec::UnboxedVecRepr = cast::transmute(v);
1011         let repr: *mut vec::UnboxedVecRepr = *v;
1012         (*repr).fill = new_len + 1u;
1013         let null = ptr::mut_offset(cast::transmute(&((*repr).data)),
1014                                    new_len);
1015         *null = 0u8;
1016     }
1017
1018     #[test]
1019     fn test_from_buf_len() {
1020         unsafe {
1021             let a = ~[65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8];
1022             let b = vec::raw::to_ptr(a);
1023             let c = from_buf_len(b, 3u);
1024             assert_eq!(c, ~"AAA");
1025         }
1026     }
1027
1028 }
1029
1030 #[cfg(not(test))]
1031 pub mod traits {
1032     use ops::Add;
1033     use cmp::{TotalOrd, Ordering, Less, Equal, Greater, Eq, Ord, Equiv, TotalEq};
1034     use super::{Str, eq_slice};
1035
1036     impl<'self> Add<&'self str,~str> for &'self str {
1037         #[inline]
1038         fn add(&self, rhs: & &'self str) -> ~str {
1039             let mut ret = self.to_owned();
1040             ret.push_str(*rhs);
1041             ret
1042         }
1043     }
1044
1045     impl<'self> TotalOrd for &'self str {
1046         #[inline]
1047         fn cmp(&self, other: & &'self str) -> Ordering {
1048             for self.bytes_iter().zip(other.bytes_iter()).advance |(s_b, o_b)| {
1049                 match s_b.cmp(&o_b) {
1050                     Greater => return Greater,
1051                     Less => return Less,
1052                     Equal => ()
1053                 }
1054             }
1055
1056             self.len().cmp(&other.len())
1057         }
1058     }
1059
1060     impl TotalOrd for ~str {
1061         #[inline]
1062         fn cmp(&self, other: &~str) -> Ordering { self.as_slice().cmp(&other.as_slice()) }
1063     }
1064
1065     impl TotalOrd for @str {
1066         #[inline]
1067         fn cmp(&self, other: &@str) -> Ordering { self.as_slice().cmp(&other.as_slice()) }
1068     }
1069
1070     impl<'self> Eq for &'self str {
1071         #[inline]
1072         fn eq(&self, other: & &'self str) -> bool {
1073             eq_slice((*self), (*other))
1074         }
1075         #[inline]
1076         fn ne(&self, other: & &'self str) -> bool { !(*self).eq(other) }
1077     }
1078
1079     impl Eq for ~str {
1080         #[inline]
1081         fn eq(&self, other: &~str) -> bool {
1082             eq_slice((*self), (*other))
1083         }
1084         #[inline]
1085         fn ne(&self, other: &~str) -> bool { !(*self).eq(other) }
1086     }
1087
1088     impl Eq for @str {
1089         #[inline]
1090         fn eq(&self, other: &@str) -> bool {
1091             eq_slice((*self), (*other))
1092         }
1093         #[inline]
1094         fn ne(&self, other: &@str) -> bool { !(*self).eq(other) }
1095     }
1096
1097     impl<'self> TotalEq for &'self str {
1098         #[inline]
1099         fn equals(&self, other: & &'self str) -> bool {
1100             eq_slice((*self), (*other))
1101         }
1102     }
1103
1104     impl TotalEq for ~str {
1105         #[inline]
1106         fn equals(&self, other: &~str) -> bool {
1107             eq_slice((*self), (*other))
1108         }
1109     }
1110
1111     impl TotalEq for @str {
1112         #[inline]
1113         fn equals(&self, other: &@str) -> bool {
1114             eq_slice((*self), (*other))
1115         }
1116     }
1117
1118     impl<'self> Ord for &'self str {
1119         #[inline]
1120         fn lt(&self, other: & &'self str) -> bool { self.cmp(other) == Less }
1121         #[inline]
1122         fn le(&self, other: & &'self str) -> bool { self.cmp(other) != Greater }
1123         #[inline]
1124         fn ge(&self, other: & &'self str) -> bool { self.cmp(other) != Less }
1125         #[inline]
1126         fn gt(&self, other: & &'self str) -> bool { self.cmp(other) == Greater }
1127     }
1128
1129     impl Ord for ~str {
1130         #[inline]
1131         fn lt(&self, other: &~str) -> bool { self.cmp(other) == Less }
1132         #[inline]
1133         fn le(&self, other: &~str) -> bool { self.cmp(other) != Greater }
1134         #[inline]
1135         fn ge(&self, other: &~str) -> bool { self.cmp(other) != Less }
1136         #[inline]
1137         fn gt(&self, other: &~str) -> bool { self.cmp(other) == Greater }
1138     }
1139
1140     impl Ord for @str {
1141         #[inline]
1142         fn lt(&self, other: &@str) -> bool { self.cmp(other) == Less }
1143         #[inline]
1144         fn le(&self, other: &@str) -> bool { self.cmp(other) != Greater }
1145         #[inline]
1146         fn ge(&self, other: &@str) -> bool { self.cmp(other) != Less }
1147         #[inline]
1148         fn gt(&self, other: &@str) -> bool { self.cmp(other) == Greater }
1149     }
1150
1151     impl<'self, S: Str> Equiv<S> for &'self str {
1152         #[inline]
1153         fn equiv(&self, other: &S) -> bool { eq_slice(*self, other.as_slice()) }
1154     }
1155
1156     impl<'self, S: Str> Equiv<S> for @str {
1157         #[inline]
1158         fn equiv(&self, other: &S) -> bool { eq_slice(*self, other.as_slice()) }
1159     }
1160
1161     impl<'self, S: Str> Equiv<S> for ~str {
1162         #[inline]
1163         fn equiv(&self, other: &S) -> bool { eq_slice(*self, other.as_slice()) }
1164     }
1165 }
1166
1167 #[cfg(test)]
1168 pub mod traits {}
1169
1170 /// Any string that can be represented as a slice
1171 pub trait Str {
1172     /// Work with `self` as a slice.
1173     fn as_slice<'a>(&'a self) -> &'a str;
1174 }
1175
1176 impl<'self> Str for &'self str {
1177     #[inline]
1178     fn as_slice<'a>(&'a self) -> &'a str { *self }
1179 }
1180 impl<'self> Str for ~str {
1181     #[inline]
1182     fn as_slice<'a>(&'a self) -> &'a str {
1183         let s: &'a str = *self; s
1184     }
1185 }
1186 impl<'self> Str for @str {
1187     #[inline]
1188     fn as_slice<'a>(&'a self) -> &'a str {
1189         let s: &'a str = *self; s
1190     }
1191 }
1192
1193 impl<'self> Container for &'self str {
1194     #[inline]
1195     fn len(&self) -> uint {
1196         do as_buf(*self) |_p, n| { n - 1u }
1197     }
1198     #[inline]
1199     fn is_empty(&self) -> bool {
1200         self.len() == 0
1201     }
1202 }
1203
1204 impl Container for ~str {
1205     #[inline]
1206     fn len(&self) -> uint { self.as_slice().len() }
1207     #[inline]
1208     fn is_empty(&self) -> bool { self.len() == 0 }
1209 }
1210
1211 impl Container for @str {
1212     #[inline]
1213     fn len(&self) -> uint { self.as_slice().len() }
1214     #[inline]
1215     fn is_empty(&self) -> bool { self.len() == 0 }
1216 }
1217
1218 impl Mutable for ~str {
1219     /// Remove all content, make the string empty
1220     #[inline]
1221     fn clear(&mut self) {
1222         unsafe {
1223             raw::set_len(self, 0)
1224         }
1225     }
1226 }
1227
1228
1229 #[allow(missing_doc)]
1230 pub trait StrSlice<'self> {
1231     fn contains<'a>(&self, needle: &'a str) -> bool;
1232     fn contains_char(&self, needle: char) -> bool;
1233     fn iter(&self) -> StrCharIterator<'self>;
1234     fn rev_iter(&self) -> StrCharRevIterator<'self>;
1235     fn bytes_iter(&self) -> StrBytesIterator<'self>;
1236     fn bytes_rev_iter(&self) -> StrBytesRevIterator<'self>;
1237     fn split_iter<Sep: CharEq>(&self, sep: Sep) -> StrCharSplitIterator<'self, Sep>;
1238     fn splitn_iter<Sep: CharEq>(&self, sep: Sep, count: uint) -> StrCharSplitIterator<'self, Sep>;
1239     fn split_options_iter<Sep: CharEq>(&self, sep: Sep, count: uint, allow_trailing_empty: bool)
1240         -> StrCharSplitIterator<'self, Sep>;
1241     fn matches_index_iter(&self, sep: &'self str) -> StrMatchesIndexIterator<'self>;
1242     fn split_str_iter(&self, &'self str) -> StrStrSplitIterator<'self>;
1243     fn line_iter(&self) -> StrCharSplitIterator<'self, char>;
1244     fn any_line_iter(&self) -> AnyLineIterator<'self>;
1245     fn word_iter(&self) -> WordIterator<'self>;
1246     fn ends_with(&self, needle: &str) -> bool;
1247     fn is_whitespace(&self) -> bool;
1248     fn is_alphanumeric(&self) -> bool;
1249     fn char_len(&self) -> uint;
1250
1251     fn slice(&self, begin: uint, end: uint) -> &'self str;
1252     fn slice_from(&self, begin: uint) -> &'self str;
1253     fn slice_to(&self, end: uint) -> &'self str;
1254
1255     fn slice_chars(&self, begin: uint, end: uint) -> &'self str;
1256
1257     fn starts_with(&self, needle: &str) -> bool;
1258     fn escape_default(&self) -> ~str;
1259     fn escape_unicode(&self) -> ~str;
1260     fn trim(&self) -> &'self str;
1261     fn trim_left(&self) -> &'self str;
1262     fn trim_right(&self) -> &'self str;
1263     fn trim_chars<C: CharEq>(&self, to_trim: &C) -> &'self str;
1264     fn trim_left_chars<C: CharEq>(&self, to_trim: &C) -> &'self str;
1265     fn trim_right_chars<C: CharEq>(&self, to_trim: &C) -> &'self str;
1266     fn replace(&self, from: &str, to: &str) -> ~str;
1267     fn to_owned(&self) -> ~str;
1268     fn to_managed(&self) -> @str;
1269     fn to_utf16(&self) -> ~[u16];
1270     fn is_char_boundary(&self, index: uint) -> bool;
1271     fn char_range_at(&self, start: uint) -> CharRange;
1272     fn char_at(&self, i: uint) -> char;
1273     fn char_range_at_reverse(&self, start: uint) -> CharRange;
1274     fn char_at_reverse(&self, i: uint) -> char;
1275     fn as_bytes(&self) -> &'self [u8];
1276
1277     fn find<C: CharEq>(&self, search: C) -> Option<uint>;
1278     fn rfind<C: CharEq>(&self, search: C) -> Option<uint>;
1279     fn find_str(&self, &str) -> Option<uint>;
1280
1281     fn repeat(&self, nn: uint) -> ~str;
1282
1283     fn slice_shift_char(&self) -> (char, &'self str);
1284
1285     fn map_chars(&self, ff: &fn(char) -> char) -> ~str;
1286
1287     fn lev_distance(&self, t: &str) -> uint;
1288
1289     fn subslice_offset(&self, inner: &str) -> uint;
1290 }
1291
1292 /// Extension methods for strings
1293 impl<'self> StrSlice<'self> for &'self str {
1294     /**
1295      * Returns true if one string contains another
1296      *
1297      * # Arguments
1298      *
1299      * * needle - The string to look for
1300      */
1301     #[inline]
1302     fn contains<'a>(&self, needle: &'a str) -> bool {
1303         self.find_str(needle).is_some()
1304     }
1305     /**
1306      * Returns true if a string contains a char.
1307      *
1308      * # Arguments
1309      *
1310      * * needle - The char to look for
1311      */
1312     #[inline]
1313     fn contains_char(&self, needle: char) -> bool {
1314         self.find(needle).is_some()
1315     }
1316     /// An iterator over the characters of `self`. Note, this iterates
1317     /// over unicode code-points, not unicode graphemes.
1318     ///
1319     /// # Example
1320     ///
1321     /// ~~~ {.rust}
1322     /// let v: ~[char] = "abc åäö".iter().collect();
1323     /// assert_eq!(v, ~['a', 'b', 'c', ' ', 'å', 'ä', 'ö']);
1324     /// ~~~
1325     #[inline]
1326     fn iter(&self) -> StrCharIterator<'self> {
1327         StrCharIterator {
1328             index: 0,
1329             string: *self
1330         }
1331     }
1332     /// An iterator over the characters of `self`, in reverse order.
1333     #[inline]
1334     fn rev_iter(&self) -> StrCharRevIterator<'self> {
1335         StrCharRevIterator {
1336             index: self.len(),
1337             string: *self
1338         }
1339     }
1340
1341     /// An iterator over the bytes of `self`
1342     #[inline]
1343     fn bytes_iter(&self) -> StrBytesIterator<'self> {
1344         StrBytesIterator { it: self.as_bytes().iter() }
1345     }
1346     /// An iterator over the bytes of `self`, in reverse order
1347     #[inline]
1348     fn bytes_rev_iter(&self) -> StrBytesRevIterator<'self> {
1349         StrBytesRevIterator { it: self.as_bytes().rev_iter() }
1350     }
1351
1352     /// An iterator over substrings of `self`, separated by characters
1353     /// matched by `sep`.
1354     ///
1355     /// # Example
1356     ///
1357     /// ~~~ {.rust}
1358     /// let v: ~[&str] = "Mary had a little lamb".split_iter(' ').collect();
1359     /// assert_eq!(v, ~["Mary", "had", "a", "little", "lamb"]);
1360     ///
1361     /// let v: ~[&str] = "abc1def2ghi".split_iter(|c: char| c.is_digit()).collect();
1362     /// assert_eq!(v, ~["abc", "def", "ghi"]);
1363     /// ~~~
1364     #[inline]
1365     fn split_iter<Sep: CharEq>(&self, sep: Sep) -> StrCharSplitIterator<'self, Sep> {
1366         self.split_options_iter(sep, self.len(), true)
1367     }
1368
1369     /// An iterator over substrings of `self`, separated by characters
1370     /// matched by `sep`, restricted to splitting at most `count`
1371     /// times.
1372     #[inline]
1373     fn splitn_iter<Sep: CharEq>(&self, sep: Sep, count: uint) -> StrCharSplitIterator<'self, Sep> {
1374         self.split_options_iter(sep, count, true)
1375     }
1376
1377     /// An iterator over substrings of `self`, separated by characters
1378     /// matched by `sep`, splitting at most `count` times, and
1379     /// possibly not including the trailing empty substring, if it
1380     /// exists.
1381     #[inline]
1382     fn split_options_iter<Sep: CharEq>(&self, sep: Sep, count: uint, allow_trailing_empty: bool)
1383         -> StrCharSplitIterator<'self, Sep> {
1384         let only_ascii = sep.only_ascii();
1385         StrCharSplitIterator {
1386             string: *self,
1387             position: 0,
1388             sep: sep,
1389             count: count,
1390             allow_trailing_empty: allow_trailing_empty,
1391             finished: false,
1392             only_ascii: only_ascii
1393         }
1394     }
1395     /// An iterator over the start and end indices of each match of
1396     /// `sep` within `self`.
1397     #[inline]
1398     fn matches_index_iter(&self, sep: &'self str) -> StrMatchesIndexIterator<'self> {
1399         assert!(!sep.is_empty())
1400         StrMatchesIndexIterator {
1401             haystack: *self,
1402             needle: sep,
1403             position: 0
1404         }
1405     }
1406     /**
1407      * An iterator over the substrings of `self` separated by `sep`.
1408      *
1409      * # Example
1410      *
1411      * ~~~ {.rust}
1412      * let v: ~[&str] = "abcXXXabcYYYabc".split_str_iter("abc").collect()
1413      * assert_eq!(v, ["", "XXX", "YYY", ""]);
1414      * ~~~
1415      */
1416     #[inline]
1417     fn split_str_iter(&self, sep: &'self str) -> StrStrSplitIterator<'self> {
1418         StrStrSplitIterator {
1419             it: self.matches_index_iter(sep),
1420             last_end: 0,
1421             finished: false
1422         }
1423     }
1424
1425     /// An iterator over the lines of a string (subsequences separated
1426     /// by `\n`).
1427     #[inline]
1428     fn line_iter(&self) -> StrCharSplitIterator<'self, char> {
1429         self.split_options_iter('\n', self.len(), false)
1430     }
1431
1432     /// An iterator over the lines of a string, separated by either
1433     /// `\n` or (`\r\n`).
1434     fn any_line_iter(&self) -> AnyLineIterator<'self> {
1435         do self.line_iter().transform |line| {
1436             let l = line.len();
1437             if l > 0 && line[l - 1] == '\r' as u8 { line.slice(0, l - 1) }
1438             else { line }
1439         }
1440     }
1441
1442     /// An iterator over the words of a string (subsequences separated
1443     /// by any sequence of whitespace).
1444     #[inline]
1445     fn word_iter(&self) -> WordIterator<'self> {
1446         self.split_iter(char::is_whitespace).filter(|s| !s.is_empty())
1447     }
1448
1449     /**
1450      * Returns true if the string contains only whitespace
1451      *
1452      * Whitespace characters are determined by `char::is_whitespace`
1453      */
1454     #[inline]
1455     fn is_whitespace(&self) -> bool { self.iter().all(char::is_whitespace) }
1456     /**
1457      * Returns true if the string contains only alphanumerics
1458      *
1459      * Alphanumeric characters are determined by `char::is_alphanumeric`
1460      */
1461     #[inline]
1462     fn is_alphanumeric(&self) -> bool { self.iter().all(char::is_alphanumeric) }
1463     /// Returns the number of characters that a string holds
1464     #[inline]
1465     fn char_len(&self) -> uint { self.iter().len_() }
1466
1467     /**
1468      * Returns a slice of the given string from the byte range
1469      * [`begin`..`end`)
1470      *
1471      * Fails when `begin` and `end` do not point to valid characters or
1472      * beyond the last character of the string
1473      */
1474     #[inline]
1475     fn slice(&self, begin: uint, end: uint) -> &'self str {
1476         assert!(self.is_char_boundary(begin));
1477         assert!(self.is_char_boundary(end));
1478         unsafe { raw::slice_bytes(*self, begin, end) }
1479     }
1480     /// Returns a slice of the string from `begin` to its end.
1481     ///
1482     /// Fails when `begin` does not point to a valid character, or is
1483     /// out of bounds.
1484     #[inline]
1485     fn slice_from(&self, begin: uint) -> &'self str {
1486         self.slice(begin, self.len())
1487     }
1488     /// Returns a slice of the string from the beginning to byte
1489     /// `end`.
1490     ///
1491     /// Fails when `end` does not point to a valid character, or is
1492     /// out of bounds.
1493     #[inline]
1494     fn slice_to(&self, end: uint) -> &'self str {
1495         self.slice(0, end)
1496     }
1497
1498     /// Returns a slice of the string from the char range
1499     /// [`begin`..`end`).
1500     ///
1501     /// Fails if `begin` > `end` or the either `begin` or `end` are
1502     /// beyond the last character of the string.
1503     fn slice_chars(&self, begin: uint, end: uint) -> &'self str {
1504         assert!(begin <= end);
1505         // not sure how to use the iterators for this nicely.
1506         let mut position = 0;
1507         let mut count = 0;
1508         let l = self.len();
1509         while count < begin && position < l {
1510             position = self.char_range_at(position).next;
1511             count += 1;
1512         }
1513         if count < begin { fail!("Attempted to begin slice_chars beyond end of string") }
1514         let start_byte = position;
1515         while count < end && position < l {
1516             position = self.char_range_at(position).next;
1517             count += 1;
1518         }
1519         if count < end { fail!("Attempted to end slice_chars beyond end of string") }
1520
1521         self.slice(start_byte, position)
1522     }
1523
1524     /// Returns true if `needle` is a prefix of the string.
1525     fn starts_with<'a>(&self, needle: &'a str) -> bool {
1526         let (self_len, needle_len) = (self.len(), needle.len());
1527         if needle_len == 0u { true }
1528         else if needle_len > self_len { false }
1529         else { match_at(*self, needle, 0u) }
1530     }
1531     /// Returns true if `needle` is a suffix of the string.
1532     fn ends_with(&self, needle: &str) -> bool {
1533         let (self_len, needle_len) = (self.len(), needle.len());
1534         if needle_len == 0u { true }
1535         else if needle_len > self_len { false }
1536         else { match_at(*self, needle, self_len - needle_len) }
1537     }
1538
1539     /// Escape each char in `s` with char::escape_default.
1540     fn escape_default(&self) -> ~str {
1541         let mut out: ~str = ~"";
1542         out.reserve_at_least(self.len());
1543         for self.iter().advance |c| {
1544             do c.escape_default |c| {
1545                 out.push_char(c);
1546             }
1547         }
1548         out
1549     }
1550
1551     /// Escape each char in `s` with char::escape_unicode.
1552     fn escape_unicode(&self) -> ~str {
1553         let mut out: ~str = ~"";
1554         out.reserve_at_least(self.len());
1555         for self.iter().advance |c| {
1556             do c.escape_unicode |c| {
1557                 out.push_char(c);
1558             }
1559         }
1560         out
1561     }
1562
1563     /// Returns a string with leading and trailing whitespace removed
1564     #[inline]
1565     fn trim(&self) -> &'self str {
1566         self.trim_left().trim_right()
1567     }
1568     /// Returns a string with leading whitespace removed
1569     #[inline]
1570     fn trim_left(&self) -> &'self str {
1571         self.trim_left_chars(&char::is_whitespace)
1572     }
1573     /// Returns a string with trailing whitespace removed
1574     #[inline]
1575     fn trim_right(&self) -> &'self str {
1576         self.trim_right_chars(&char::is_whitespace)
1577     }
1578
1579     /**
1580      * Returns a string with characters that match `to_trim` removed.
1581      *
1582      * # Arguments
1583      *
1584      * * to_trim - a character matcher
1585      *
1586      * # Example
1587      *
1588      * ~~~ {.rust}
1589      * assert_eq!("11foo1bar11".trim_chars(&'1'), "foo1bar")
1590      * assert_eq!("12foo1bar12".trim_chars(& &['1', '2']), "foo1bar")
1591      * assert_eq!("123foo1bar123".trim_chars(&|c: char| c.is_digit()), "foo1bar")
1592      * ~~~
1593      */
1594     #[inline]
1595     fn trim_chars<C: CharEq>(&self, to_trim: &C) -> &'self str {
1596         self.trim_left_chars(to_trim).trim_right_chars(to_trim)
1597     }
1598     /**
1599      * Returns a string with leading `chars_to_trim` removed.
1600      *
1601      * # Arguments
1602      *
1603      * * to_trim - a character matcher
1604      *
1605      * # Example
1606      *
1607      * ~~~ {.rust}
1608      * assert_eq!("11foo1bar11".trim_left_chars(&'1'), "foo1bar11")
1609      * assert_eq!("12foo1bar12".trim_left_chars(& &['1', '2']), "foo1bar12")
1610      * assert_eq!("123foo1bar123".trim_left_chars(&|c: char| c.is_digit()), "foo1bar123")
1611      * ~~~
1612      */
1613     #[inline]
1614     fn trim_left_chars<C: CharEq>(&self, to_trim: &C) -> &'self str {
1615         match self.find(|c: char| !to_trim.matches(c)) {
1616             None => "",
1617             Some(first) => unsafe { raw::slice_bytes(*self, first, self.len()) }
1618         }
1619     }
1620     /**
1621      * Returns a string with trailing `chars_to_trim` removed.
1622      *
1623      * # Arguments
1624      *
1625      * * to_trim - a character matcher
1626      *
1627      * # Example
1628      *
1629      * ~~~ {.rust}
1630      * assert_eq!("11foo1bar11".trim_right_chars(&'1'), "11foo1bar")
1631      * assert_eq!("12foo1bar12".trim_right_chars(& &['1', '2']), "12foo1bar")
1632      * assert_eq!("123foo1bar123".trim_right_chars(&|c: char| c.is_digit()), "123foo1bar")
1633      * ~~~
1634      */
1635     #[inline]
1636     fn trim_right_chars<C: CharEq>(&self, to_trim: &C) -> &'self str {
1637         match self.rfind(|c: char| !to_trim.matches(c)) {
1638             None => "",
1639             Some(last) => {
1640                 let next = self.char_range_at(last).next;
1641                 unsafe { raw::slice_bytes(*self, 0u, next) }
1642             }
1643         }
1644     }
1645
1646     /**
1647      * Replace all occurrences of one string with another
1648      *
1649      * # Arguments
1650      *
1651      * * from - The string to replace
1652      * * to - The replacement string
1653      *
1654      * # Return value
1655      *
1656      * The original string with all occurances of `from` replaced with `to`
1657      */
1658     pub fn replace(&self, from: &str, to: &str) -> ~str {
1659         let mut result = ~"";
1660         let mut last_end = 0;
1661         for self.matches_index_iter(from).advance |(start, end)| {
1662             result.push_str(unsafe{raw::slice_bytes(*self, last_end, start)});
1663             result.push_str(to);
1664             last_end = end;
1665         }
1666         result.push_str(unsafe{raw::slice_bytes(*self, last_end, self.len())});
1667         result
1668     }
1669
1670     /// Copy a slice into a new unique str
1671     #[inline]
1672     fn to_owned(&self) -> ~str { to_owned(*self) }
1673
1674     #[inline]
1675     fn to_managed(&self) -> @str {
1676         let v = at_vec::from_fn(self.len() + 1, |i| {
1677             if i == self.len() { 0 } else { self[i] }
1678         });
1679         unsafe { ::cast::transmute(v) }
1680     }
1681
1682     /// Converts to a vector of `u16` encoded as UTF-16.
1683     fn to_utf16(&self) -> ~[u16] {
1684         let mut u = ~[];
1685         for self.iter().advance |ch| {
1686             // Arithmetic with u32 literals is easier on the eyes than chars.
1687             let mut ch = ch as u32;
1688
1689             if (ch & 0xFFFF_u32) == ch {
1690                 // The BMP falls through (assuming non-surrogate, as it
1691                 // should)
1692                 assert!(ch <= 0xD7FF_u32 || ch >= 0xE000_u32);
1693                 u.push(ch as u16)
1694             } else {
1695                 // Supplementary planes break into surrogates.
1696                 assert!(ch >= 0x1_0000_u32 && ch <= 0x10_FFFF_u32);
1697                 ch -= 0x1_0000_u32;
1698                 let w1 = 0xD800_u16 | ((ch >> 10) as u16);
1699                 let w2 = 0xDC00_u16 | ((ch as u16) & 0x3FF_u16);
1700                 u.push_all([w1, w2])
1701             }
1702         }
1703         u
1704     }
1705
1706     /**
1707      * Returns false if the index points into the middle of a multi-byte
1708      * character sequence.
1709      */
1710     fn is_char_boundary(&self, index: uint) -> bool {
1711         if index == self.len() { return true; }
1712         let b = self[index];
1713         return b < 128u8 || b >= 192u8;
1714     }
1715
1716     /**
1717      * Pluck a character out of a string and return the index of the next
1718      * character.
1719      *
1720      * This function can be used to iterate over the unicode characters of a
1721      * string.
1722      *
1723      * # Example
1724      *
1725      * ~~~ {.rust}
1726      * let s = "中华Việt Nam";
1727      * let i = 0u;
1728      * while i < s.len() {
1729      *     let CharRange {ch, next} = s.char_range_at(i);
1730      *     std::io::println(fmt!("%u: %c",i,ch));
1731      *     i = next;
1732      * }
1733      * ~~~
1734      *
1735      * # Example output
1736      *
1737      * ~~~
1738      * 0: 中
1739      * 3: 华
1740      * 6: V
1741      * 7: i
1742      * 8: ệ
1743      * 11: t
1744      * 12:
1745      * 13: N
1746      * 14: a
1747      * 15: m
1748      * ~~~
1749      *
1750      * # Arguments
1751      *
1752      * * s - The string
1753      * * i - The byte offset of the char to extract
1754      *
1755      * # Return value
1756      *
1757      * A record {ch: char, next: uint} containing the char value and the byte
1758      * index of the next unicode character.
1759      *
1760      * # Failure
1761      *
1762      * If `i` is greater than or equal to the length of the string.
1763      * If `i` is not the index of the beginning of a valid UTF-8 character.
1764      */
1765     #[inline]
1766     fn char_range_at(&self, i: uint) -> CharRange {
1767         if (self[i] < 128u8) {
1768             return CharRange {ch: self[i] as char, next: i + 1 };
1769         }
1770
1771         // Multibyte case is a fn to allow char_range_at to inline cleanly
1772         fn multibyte_char_range_at(s: &str, i: uint) -> CharRange {
1773             let mut val = s[i] as uint;
1774             let w = UTF8_CHAR_WIDTH[val] as uint;
1775             assert!((w != 0));
1776
1777             // First byte is special, only want bottom 5 bits for width 2, 4 bits
1778             // for width 3, and 3 bits for width 4
1779             val &= 0x7Fu >> w;
1780             val = (val << 6) | (s[i + 1] & 63u8) as uint;
1781             if w > 2 { val = (val << 6) | (s[i + 2] & 63u8) as uint; }
1782             if w > 3 { val = (val << 6) | (s[i + 3] & 63u8) as uint; }
1783
1784             return CharRange {ch: val as char, next: i + w};
1785         }
1786
1787         return multibyte_char_range_at(*self, i);
1788     }
1789
1790     /// Plucks the character starting at the `i`th byte of a string
1791     #[inline]
1792     fn char_at(&self, i: uint) -> char { self.char_range_at(i).ch }
1793
1794     /**
1795      * Given a byte position and a str, return the previous char and its position.
1796      *
1797      * This function can be used to iterate over a unicode string in reverse.
1798      *
1799      * Returns 0 for next index if called on start index 0.
1800      */
1801     fn char_range_at_reverse(&self, start: uint) -> CharRange {
1802         let mut prev = start;
1803
1804         // while there is a previous byte == 10......
1805         while prev > 0u && self[prev - 1u] & 192u8 == TAG_CONT_U8 {
1806             prev -= 1u;
1807         }
1808
1809         // now refer to the initial byte of previous char
1810         if prev > 0u {
1811             prev -= 1u;
1812         } else {
1813             prev = 0u;
1814         }
1815
1816
1817         let ch = self.char_at(prev);
1818         return CharRange {ch:ch, next:prev};
1819     }
1820
1821     /// Plucks the character ending at the `i`th byte of a string
1822     #[inline]
1823     fn char_at_reverse(&self, i: uint) -> char {
1824         self.char_range_at_reverse(i).ch
1825     }
1826
1827     /**
1828      * Work with the byte buffer of a string as a byte slice.
1829      *
1830      * The byte slice does not include the null terminator.
1831      */
1832     fn as_bytes(&self) -> &'self [u8] {
1833         unsafe {
1834             let (ptr, len): (*u8, uint) = ::cast::transmute(*self);
1835             let outgoing_tuple: (*u8, uint) = (ptr, len - 1);
1836             ::cast::transmute(outgoing_tuple)
1837         }
1838     }
1839
1840     /**
1841      * Returns the byte index of the first character of `self` that matches `search`
1842      *
1843      * # Return value
1844      *
1845      * `Some` containing the byte index of the last matching character
1846      * or `None` if there is no match
1847      */
1848     fn find<C: CharEq>(&self, search: C) -> Option<uint> {
1849         if search.only_ascii() {
1850             for self.bytes_iter().enumerate().advance |(i, b)| {
1851                 if search.matches(b as char) { return Some(i) }
1852             }
1853         } else {
1854             let mut index = 0;
1855             for self.iter().advance |c| {
1856                 if search.matches(c) { return Some(index); }
1857                 index += c.len_utf8_bytes();
1858             }
1859         }
1860
1861         None
1862     }
1863     /**
1864      * Returns the byte index of the last character of `self` that matches `search`
1865      *
1866      * # Return value
1867      *
1868      * `Some` containing the byte index of the last matching character
1869      * or `None` if there is no match
1870      */
1871     fn rfind<C: CharEq>(&self, search: C) -> Option<uint> {
1872         let mut index = self.len();
1873         if search.only_ascii() {
1874             for self.bytes_rev_iter().advance |b| {
1875                 index -= 1;
1876                 if search.matches(b as char) { return Some(index); }
1877             }
1878         } else {
1879             for self.rev_iter().advance |c| {
1880                 index -= c.len_utf8_bytes();
1881                 if search.matches(c) { return Some(index); }
1882             }
1883         }
1884
1885         None
1886     }
1887
1888     /**
1889      * Returns the byte index of the first matching substring
1890      *
1891      * # Arguments
1892      *
1893      * * `needle` - The string to search for
1894      *
1895      * # Return value
1896      *
1897      * `Some` containing the byte index of the first matching substring
1898      * or `None` if there is no match
1899      */
1900     fn find_str(&self, needle: &str) -> Option<uint> {
1901         if needle.is_empty() {
1902             Some(0)
1903         } else {
1904             self.matches_index_iter(needle)
1905                 .next()
1906                 .map_consume(|(start, _end)| start)
1907         }
1908     }
1909
1910     /// Given a string, make a new string with repeated copies of it.
1911     fn repeat(&self, nn: uint) -> ~str {
1912         do as_buf(*self) |buf, len| {
1913             let mut ret = ~"";
1914             // ignore the NULL terminator
1915             let len = len - 1;
1916             ret.reserve(nn * len);
1917
1918             unsafe {
1919                 do as_buf(ret) |rbuf, _len| {
1920                     let mut rbuf = ::cast::transmute_mut_unsafe(rbuf);
1921
1922                     for nn.times {
1923                         ptr::copy_memory(rbuf, buf, len);
1924                         rbuf = rbuf.offset(len);
1925                     }
1926                 }
1927                 raw::set_len(&mut ret, nn * len);
1928             }
1929             ret
1930         }
1931     }
1932
1933     /**
1934      * Retrieves the first character from a string slice and returns
1935      * it. This does not allocate a new string; instead, it returns a
1936      * slice that point one character beyond the character that was
1937      * shifted.
1938      *
1939      * # Failure
1940      *
1941      * If the string does not contain any characters
1942      */
1943     #[inline]
1944     fn slice_shift_char(&self) -> (char, &'self str) {
1945         let CharRange {ch, next} = self.char_range_at(0u);
1946         let next_s = unsafe { raw::slice_bytes(*self, next, self.len()) };
1947         return (ch, next_s);
1948     }
1949
1950
1951     /// Apply a function to each character.
1952     fn map_chars(&self, ff: &fn(char) -> char) -> ~str {
1953         let mut result = with_capacity(self.len());
1954         for self.iter().advance |cc| {
1955             result.push_char(ff(cc));
1956         }
1957         result
1958     }
1959
1960     /// Levenshtein Distance between two strings.
1961     fn lev_distance(&self, t: &str) -> uint {
1962         let slen = self.len();
1963         let tlen = t.len();
1964
1965         if slen == 0 { return tlen; }
1966         if tlen == 0 { return slen; }
1967
1968         let mut dcol = vec::from_fn(tlen + 1, |x| x);
1969
1970         for self.iter().enumerate().advance |(i, sc)| {
1971
1972             let mut current = i;
1973             dcol[0] = current + 1;
1974
1975             for t.iter().enumerate().advance |(j, tc)| {
1976
1977                 let next = dcol[j + 1];
1978
1979                 if sc == tc {
1980                     dcol[j + 1] = current;
1981                 } else {
1982                     dcol[j + 1] = ::cmp::min(current, next);
1983                     dcol[j + 1] = ::cmp::min(dcol[j + 1], dcol[j]) + 1;
1984                 }
1985
1986                 current = next;
1987             }
1988         }
1989
1990         return dcol[tlen];
1991     }
1992
1993
1994     /**
1995      * Returns the byte offset of an inner slice relative to an enclosing outer slice.
1996      *
1997      * Fails if `inner` is not a direct slice contained within self.
1998      *
1999      * # Example
2000      *
2001      * ~~~ {.rust}
2002      * let string = "a\nb\nc";
2003      * let mut lines = ~[];
2004      * for string.line_iter().advance |line| { lines.push(line) }
2005      *
2006      * assert!(string.subslice_offset(lines[0]) == 0); // &"a"
2007      * assert!(string.subslice_offset(lines[1]) == 2); // &"b"
2008      * assert!(string.subslice_offset(lines[2]) == 4); // &"c"
2009      * ~~~
2010      */
2011     #[inline]
2012     fn subslice_offset(&self, inner: &str) -> uint {
2013         do as_buf(*self) |a, a_len| {
2014             do as_buf(inner) |b, b_len| {
2015                 let a_start: uint;
2016                 let a_end: uint;
2017                 let b_start: uint;
2018                 let b_end: uint;
2019                 unsafe {
2020                     a_start = cast::transmute(a); a_end = a_len + cast::transmute(a);
2021                     b_start = cast::transmute(b); b_end = b_len + cast::transmute(b);
2022                 }
2023                 assert!(a_start <= b_start);
2024                 assert!(b_end <= a_end);
2025                 b_start - a_start
2026             }
2027         }
2028     }
2029
2030 }
2031
2032 #[allow(missing_doc)]
2033 pub trait NullTerminatedStr {
2034     fn as_bytes_with_null<'a>(&'a self) -> &'a [u8];
2035 }
2036
2037 impl NullTerminatedStr for ~str {
2038     /**
2039      * Work with the byte buffer of a string as a byte slice.
2040      *
2041      * The byte slice does include the null terminator.
2042      */
2043     #[inline]
2044     fn as_bytes_with_null<'a>(&'a self) -> &'a [u8] {
2045         let ptr: &'a ~[u8] = unsafe { ::cast::transmute(self) };
2046         let slice: &'a [u8] = *ptr;
2047         slice
2048     }
2049 }
2050 impl NullTerminatedStr for @str {
2051     /**
2052      * Work with the byte buffer of a string as a byte slice.
2053      *
2054      * The byte slice does include the null terminator.
2055      */
2056     #[inline]
2057     fn as_bytes_with_null<'a>(&'a self) -> &'a [u8] {
2058         let ptr: &'a @[u8] = unsafe { ::cast::transmute(self) };
2059         let slice: &'a [u8] = *ptr;
2060         slice
2061     }
2062 }
2063
2064 #[allow(missing_doc)]
2065 pub trait OwnedStr {
2066     fn push_str_no_overallocate(&mut self, rhs: &str);
2067     fn push_str(&mut self, rhs: &str);
2068     fn push_char(&mut self, c: char);
2069     fn pop_char(&mut self) -> char;
2070     fn shift_char(&mut self) -> char;
2071     fn unshift_char(&mut self, ch: char);
2072     fn append(&self, rhs: &str) -> ~str; // FIXME #4850: this should consume self.
2073     fn reserve(&mut self, n: uint);
2074     fn reserve_at_least(&mut self, n: uint);
2075     fn capacity(&self) -> uint;
2076
2077     fn as_bytes_with_null_consume(self) -> ~[u8];
2078 }
2079
2080 impl OwnedStr for ~str {
2081     /// Appends a string slice to the back of a string, without overallocating
2082     #[inline]
2083     fn push_str_no_overallocate(&mut self, rhs: &str) {
2084         unsafe {
2085             let llen = self.len();
2086             let rlen = rhs.len();
2087             self.reserve(llen + rlen);
2088             do as_buf(*self) |lbuf, _llen| {
2089                 do as_buf(rhs) |rbuf, _rlen| {
2090                     let dst = ptr::offset(lbuf, llen);
2091                     let dst = ::cast::transmute_mut_unsafe(dst);
2092                     ptr::copy_memory(dst, rbuf, rlen);
2093                 }
2094             }
2095             raw::set_len(self, llen + rlen);
2096         }
2097     }
2098
2099     /// Appends a string slice to the back of a string
2100     #[inline]
2101     fn push_str(&mut self, rhs: &str) {
2102         unsafe {
2103             let llen = self.len();
2104             let rlen = rhs.len();
2105             self.reserve_at_least(llen + rlen);
2106             do as_buf(*self) |lbuf, _llen| {
2107                 do as_buf(rhs) |rbuf, _rlen| {
2108                     let dst = ptr::offset(lbuf, llen);
2109                     let dst = ::cast::transmute_mut_unsafe(dst);
2110                     ptr::copy_memory(dst, rbuf, rlen);
2111                 }
2112             }
2113             raw::set_len(self, llen + rlen);
2114         }
2115     }
2116     /// Appends a character to the back of a string
2117     #[inline]
2118     fn push_char(&mut self, c: char) {
2119         assert!(c as uint <= 0x10ffff); // FIXME: #7609: should be enforced on all `char`
2120         unsafe {
2121             let code = c as uint;
2122             let nb = if code < MAX_ONE_B { 1u }
2123             else if code < MAX_TWO_B { 2u }
2124             else if code < MAX_THREE_B { 3u }
2125             else { 4u };
2126             let len = self.len();
2127             let new_len = len + nb;
2128             self.reserve_at_least(new_len);
2129             let off = len;
2130             do as_buf(*self) |buf, _len| {
2131                 let buf: *mut u8 = ::cast::transmute(buf);
2132                 match nb {
2133                     1u => {
2134                         *ptr::mut_offset(buf, off) = code as u8;
2135                     }
2136                     2u => {
2137                         *ptr::mut_offset(buf, off) = (code >> 6u & 31u | TAG_TWO_B) as u8;
2138                         *ptr::mut_offset(buf, off + 1u) = (code & 63u | TAG_CONT) as u8;
2139                     }
2140                     3u => {
2141                         *ptr::mut_offset(buf, off) = (code >> 12u & 15u | TAG_THREE_B) as u8;
2142                         *ptr::mut_offset(buf, off + 1u) = (code >> 6u & 63u | TAG_CONT) as u8;
2143                         *ptr::mut_offset(buf, off + 2u) = (code & 63u | TAG_CONT) as u8;
2144                     }
2145                     4u => {
2146                         *ptr::mut_offset(buf, off) = (code >> 18u & 7u | TAG_FOUR_B) as u8;
2147                         *ptr::mut_offset(buf, off + 1u) = (code >> 12u & 63u | TAG_CONT) as u8;
2148                         *ptr::mut_offset(buf, off + 2u) = (code >> 6u & 63u | TAG_CONT) as u8;
2149                         *ptr::mut_offset(buf, off + 3u) = (code & 63u | TAG_CONT) as u8;
2150                     }
2151                     _ => {}
2152                 }
2153             }
2154             raw::set_len(self, new_len);
2155         }
2156     }
2157     /**
2158      * Remove the final character from a string and return it
2159      *
2160      * # Failure
2161      *
2162      * If the string does not contain any characters
2163      */
2164     fn pop_char(&mut self) -> char {
2165         let end = self.len();
2166         assert!(end > 0u);
2167         let CharRange {ch, next} = self.char_range_at_reverse(end);
2168         unsafe { raw::set_len(self, next); }
2169         return ch;
2170     }
2171
2172     /**
2173      * Remove the first character from a string and return it
2174      *
2175      * # Failure
2176      *
2177      * If the string does not contain any characters
2178      */
2179     fn shift_char(&mut self) -> char {
2180         let CharRange {ch, next} = self.char_range_at(0u);
2181         *self = unsafe { raw::slice_bytes_owned(*self, next, self.len()) };
2182         return ch;
2183     }
2184
2185     /// Prepend a char to a string
2186     fn unshift_char(&mut self, ch: char) {
2187         // This could be more efficient.
2188         let mut new_str = ~"";
2189         new_str.push_char(ch);
2190         new_str.push_str(*self);
2191         *self = new_str;
2192     }
2193
2194     /// Concatenate two strings together.
2195     #[inline]
2196     fn append(&self, rhs: &str) -> ~str {
2197         // FIXME #4850: this should consume self, but that causes segfaults
2198         let mut v = self.clone();
2199         v.push_str_no_overallocate(rhs);
2200         v
2201     }
2202
2203     /**
2204      * Reserves capacity for exactly `n` bytes in the given string, not including
2205      * the null terminator.
2206      *
2207      * Assuming single-byte characters, the resulting string will be large
2208      * enough to hold a string of length `n`. To account for the null terminator,
2209      * the underlying buffer will have the size `n` + 1.
2210      *
2211      * If the capacity for `s` is already equal to or greater than the requested
2212      * capacity, then no action is taken.
2213      *
2214      * # Arguments
2215      *
2216      * * s - A string
2217      * * n - The number of bytes to reserve space for
2218      */
2219     #[inline]
2220     pub fn reserve(&mut self, n: uint) {
2221         unsafe {
2222             let v: *mut ~[u8] = cast::transmute(self);
2223             (*v).reserve(n + 1);
2224         }
2225     }
2226
2227     /**
2228      * Reserves capacity for at least `n` bytes in the given string, not including
2229      * the null terminator.
2230      *
2231      * Assuming single-byte characters, the resulting string will be large
2232      * enough to hold a string of length `n`. To account for the null terminator,
2233      * the underlying buffer will have the size `n` + 1.
2234      *
2235      * This function will over-allocate in order to amortize the allocation costs
2236      * in scenarios where the caller may need to repeatedly reserve additional
2237      * space.
2238      *
2239      * If the capacity for `s` is already equal to or greater than the requested
2240      * capacity, then no action is taken.
2241      *
2242      * # Arguments
2243      *
2244      * * s - A string
2245      * * n - The number of bytes to reserve space for
2246      */
2247     #[inline]
2248     fn reserve_at_least(&mut self, n: uint) {
2249         self.reserve(uint::next_power_of_two(n + 1u) - 1u)
2250     }
2251
2252     /**
2253      * Returns the number of single-byte characters the string can hold without
2254      * reallocating
2255      */
2256     fn capacity(&self) -> uint {
2257         let buf: &~[u8] = unsafe { cast::transmute(self) };
2258         let vcap = buf.capacity();
2259         assert!(vcap > 0u);
2260         vcap - 1u
2261     }
2262
2263     /// Convert to a vector of bytes. This does not allocate a new
2264     /// string, and includes the null terminator.
2265     #[inline]
2266     fn as_bytes_with_null_consume(self) -> ~[u8] {
2267         unsafe { ::cast::transmute(self) }
2268     }
2269 }
2270
2271 impl Clone for ~str {
2272     #[inline]
2273     fn clone(&self) -> ~str {
2274         to_owned(*self)
2275     }
2276 }
2277
2278 impl Clone for @str {
2279     #[inline]
2280     fn clone(&self) -> @str {
2281         *self
2282     }
2283 }
2284
2285 /// External iterator for a string's characters. Use with the `std::iterator`
2286 /// module.
2287 #[deriving(Clone)]
2288 pub struct StrCharIterator<'self> {
2289     priv index: uint,
2290     priv string: &'self str,
2291 }
2292
2293 impl<'self> Iterator<char> for StrCharIterator<'self> {
2294     #[inline]
2295     fn next(&mut self) -> Option<char> {
2296         if self.index < self.string.len() {
2297             let CharRange {ch, next} = self.string.char_range_at(self.index);
2298             self.index = next;
2299             Some(ch)
2300         } else {
2301             None
2302         }
2303     }
2304 }
2305 /// External iterator for a string's characters in reverse order. Use
2306 /// with the `std::iterator` module.
2307 #[deriving(Clone)]
2308 pub struct StrCharRevIterator<'self> {
2309     priv index: uint,
2310     priv string: &'self str,
2311 }
2312
2313 impl<'self> Iterator<char> for StrCharRevIterator<'self> {
2314     #[inline]
2315     fn next(&mut self) -> Option<char> {
2316         if self.index > 0 {
2317             let CharRange {ch, next} = self.string.char_range_at_reverse(self.index);
2318             self.index = next;
2319             Some(ch)
2320         } else {
2321             None
2322         }
2323     }
2324 }
2325
2326 /// External iterator for a string's bytes. Use with the `std::iterator`
2327 /// module.
2328 #[deriving(Clone)]
2329 pub struct StrBytesIterator<'self> {
2330     priv it: vec::VecIterator<'self, u8>
2331 }
2332
2333 impl<'self> Iterator<u8> for StrBytesIterator<'self> {
2334     #[inline]
2335     fn next(&mut self) -> Option<u8> {
2336         self.it.next().map_consume(|&x| x)
2337     }
2338 }
2339
2340 /// External iterator for a string's bytes in reverse order. Use with
2341 /// the `std::iterator` module.
2342 #[deriving(Clone)]
2343 pub struct StrBytesRevIterator<'self> {
2344     priv it: vec::VecRevIterator<'self, u8>
2345 }
2346
2347 impl<'self> Iterator<u8> for StrBytesRevIterator<'self> {
2348     #[inline]
2349     fn next(&mut self) -> Option<u8> {
2350         self.it.next().map_consume(|&x| x)
2351     }
2352 }
2353
2354 // This works because every lifetime is a sub-lifetime of 'static
2355 impl<'self> Zero for &'self str {
2356     fn zero() -> &'self str { "" }
2357     fn is_zero(&self) -> bool { self.is_empty() }
2358 }
2359
2360 impl Zero for ~str {
2361     fn zero() -> ~str { ~"" }
2362     fn is_zero(&self) -> bool { self.len() == 0 }
2363 }
2364
2365 impl Zero for @str {
2366     fn zero() -> @str { @"" }
2367     fn is_zero(&self) -> bool { self.len() == 0 }
2368 }
2369
2370 #[cfg(test)]
2371 mod tests {
2372     use iterator::IteratorUtil;
2373     use container::Container;
2374     use option::Some;
2375     use libc::c_char;
2376     use libc;
2377     use ptr;
2378     use str::*;
2379     use uint;
2380     use vec;
2381     use vec::{ImmutableVector, CopyableVector};
2382     use cmp::{TotalOrd, Less, Equal, Greater};
2383
2384     #[test]
2385     fn test_eq() {
2386         assert!((eq(&~"", &~"")));
2387         assert!((eq(&~"foo", &~"foo")));
2388         assert!((!eq(&~"foo", &~"bar")));
2389     }
2390
2391     #[test]
2392     fn test_eq_slice() {
2393         assert!((eq_slice("foobar".slice(0, 3), "foo")));
2394         assert!((eq_slice("barfoo".slice(3, 6), "foo")));
2395         assert!((!eq_slice("foo1", "foo2")));
2396     }
2397
2398     #[test]
2399     fn test_le() {
2400         assert!("" <= "");
2401         assert!("" <= "foo");
2402         assert!("foo" <= "foo");
2403         assert!("foo" != "bar");
2404     }
2405
2406     #[test]
2407     fn test_len() {
2408         assert_eq!("".len(), 0u);
2409         assert_eq!("hello world".len(), 11u);
2410         assert_eq!("\x63".len(), 1u);
2411         assert_eq!("\xa2".len(), 2u);
2412         assert_eq!("\u03c0".len(), 2u);
2413         assert_eq!("\u2620".len(), 3u);
2414         assert_eq!("\U0001d11e".len(), 4u);
2415
2416         assert_eq!("".char_len(), 0u);
2417         assert_eq!("hello world".char_len(), 11u);
2418         assert_eq!("\x63".char_len(), 1u);
2419         assert_eq!("\xa2".char_len(), 1u);
2420         assert_eq!("\u03c0".char_len(), 1u);
2421         assert_eq!("\u2620".char_len(), 1u);
2422         assert_eq!("\U0001d11e".char_len(), 1u);
2423         assert_eq!("ประเทศไทย中华Việt Nam".char_len(), 19u);
2424     }
2425
2426     #[test]
2427     fn test_find() {
2428         assert_eq!("hello".find('l'), Some(2u));
2429         assert_eq!("hello".find(|c:char| c == 'o'), Some(4u));
2430         assert!("hello".find('x').is_none());
2431         assert!("hello".find(|c:char| c == 'x').is_none());
2432         assert_eq!("ประเทศไทย中华Việt Nam".find('华'), Some(30u));
2433         assert_eq!("ประเทศไทย中华Việt Nam".find(|c: char| c == '华'), Some(30u));
2434     }
2435
2436     #[test]
2437     fn test_rfind() {
2438         assert_eq!("hello".rfind('l'), Some(3u));
2439         assert_eq!("hello".rfind(|c:char| c == 'o'), Some(4u));
2440         assert!("hello".rfind('x').is_none());
2441         assert!("hello".rfind(|c:char| c == 'x').is_none());
2442         assert_eq!("ประเทศไทย中华Việt Nam".rfind('华'), Some(30u));
2443         assert_eq!("ประเทศไทย中华Việt Nam".rfind(|c: char| c == '华'), Some(30u));
2444     }
2445
2446     #[test]
2447     fn test_push_str() {
2448         let mut s = ~"";
2449         s.push_str("");
2450         assert_eq!(s.slice_from(0), "");
2451         s.push_str("abc");
2452         assert_eq!(s.slice_from(0), "abc");
2453         s.push_str("ประเทศไทย中华Việt Nam");
2454         assert_eq!(s.slice_from(0), "abcประเทศไทย中华Việt Nam");
2455     }
2456     #[test]
2457     fn test_append() {
2458         let mut s = ~"";
2459         s = s.append("");
2460         assert_eq!(s.slice_from(0), "");
2461         s = s.append("abc");
2462         assert_eq!(s.slice_from(0), "abc");
2463         s = s.append("ประเทศไทย中华Việt Nam");
2464         assert_eq!(s.slice_from(0), "abcประเทศไทย中华Việt Nam");
2465     }
2466
2467     #[test]
2468     fn test_pop_char() {
2469         let mut data = ~"ประเทศไทย中华";
2470         let cc = data.pop_char();
2471         assert_eq!(~"ประเทศไทย中", data);
2472         assert_eq!('华', cc);
2473     }
2474
2475     #[test]
2476     fn test_pop_char_2() {
2477         let mut data2 = ~"华";
2478         let cc2 = data2.pop_char();
2479         assert_eq!(~"", data2);
2480         assert_eq!('华', cc2);
2481     }
2482
2483     #[test]
2484     #[should_fail]
2485     #[ignore(cfg(windows))]
2486     fn test_pop_char_fail() {
2487         let mut data = ~"";
2488         let _cc3 = data.pop_char();
2489     }
2490
2491     #[test]
2492     fn test_push_char() {
2493         let mut data = ~"ประเทศไทย中";
2494         data.push_char('华');
2495         data.push_char('b'); // 1 byte
2496         data.push_char('¢'); // 2 byte
2497         data.push_char('€'); // 3 byte
2498         data.push_char('𤭢'); // 4 byte
2499         assert_eq!(~"ประเทศไทย中华b¢€𤭢", data);
2500     }
2501
2502     #[test]
2503     fn test_shift_char() {
2504         let mut data = ~"ประเทศไทย中";
2505         let cc = data.shift_char();
2506         assert_eq!(~"ระเทศไทย中", data);
2507         assert_eq!('ป', cc);
2508     }
2509
2510     #[test]
2511     fn test_unshift_char() {
2512         let mut data = ~"ประเทศไทย中";
2513         data.unshift_char('华');
2514         assert_eq!(~"华ประเทศไทย中", data);
2515     }
2516
2517     #[test]
2518     fn test_clear() {
2519         let mut empty = ~"";
2520         empty.clear();
2521         assert_eq!("", empty.as_slice());
2522         let mut data = ~"ประเทศไทย中";
2523         data.clear();
2524         assert_eq!("", data.as_slice());
2525         data.push_char('华');
2526         assert_eq!("华", data.as_slice());
2527     }
2528
2529     #[test]
2530     fn test_split_within() {
2531         fn t(s: &str, i: uint, u: &[~str]) {
2532             let mut v = ~[];
2533             for each_split_within(s, i) |s| { v.push(s.to_owned()) }
2534             assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b));
2535         }
2536         t("", 0, []);
2537         t("", 15, []);
2538         t("hello", 15, [~"hello"]);
2539         t("\nMary had a little lamb\nLittle lamb\n", 15,
2540             [~"Mary had a", ~"little lamb", ~"Little lamb"]);
2541         t("\nMary had a little lamb\nLittle lamb\n", uint::max_value,
2542             [~"Mary had a little lamb\nLittle lamb"]);
2543     }
2544
2545     #[test]
2546     fn test_find_str() {
2547         // byte positions
2548         assert_eq!("".find_str(""), Some(0u));
2549         assert!("banana".find_str("apple pie").is_none());
2550
2551         let data = "abcabc";
2552         assert_eq!(data.slice(0u, 6u).find_str("ab"), Some(0u));
2553         assert_eq!(data.slice(2u, 6u).find_str("ab"), Some(3u - 2u));
2554         assert!(data.slice(2u, 4u).find_str("ab").is_none());
2555
2556         let mut data = ~"ประเทศไทย中华Việt Nam";
2557         data = data + data;
2558         assert!(data.find_str("ไท华").is_none());
2559         assert_eq!(data.slice(0u, 43u).find_str(""), Some(0u));
2560         assert_eq!(data.slice(6u, 43u).find_str(""), Some(6u - 6u));
2561
2562         assert_eq!(data.slice(0u, 43u).find_str("ประ"), Some( 0u));
2563         assert_eq!(data.slice(0u, 43u).find_str("ทศไ"), Some(12u));
2564         assert_eq!(data.slice(0u, 43u).find_str("ย中"), Some(24u));
2565         assert_eq!(data.slice(0u, 43u).find_str("iệt"), Some(34u));
2566         assert_eq!(data.slice(0u, 43u).find_str("Nam"), Some(40u));
2567
2568         assert_eq!(data.slice(43u, 86u).find_str("ประ"), Some(43u - 43u));
2569         assert_eq!(data.slice(43u, 86u).find_str("ทศไ"), Some(55u - 43u));
2570         assert_eq!(data.slice(43u, 86u).find_str("ย中"), Some(67u - 43u));
2571         assert_eq!(data.slice(43u, 86u).find_str("iệt"), Some(77u - 43u));
2572         assert_eq!(data.slice(43u, 86u).find_str("Nam"), Some(83u - 43u));
2573     }
2574
2575     #[test]
2576     fn test_slice_chars() {
2577         fn t(a: &str, b: &str, start: uint) {
2578             assert_eq!(a.slice_chars(start, start + b.char_len()), b);
2579         }
2580         t("hello", "llo", 2);
2581         t("hello", "el", 1);
2582         assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".slice_chars(2, 8));
2583     }
2584
2585     #[test]
2586     fn test_concat() {
2587         fn t(v: &[~str], s: &str) {
2588             assert_eq!(v.concat(), s.to_str());
2589         }
2590         t([~"you", ~"know", ~"I'm", ~"no", ~"good"], "youknowI'mnogood");
2591         let v: &[~str] = [];
2592         t(v, "");
2593         t([~"hi"], "hi");
2594     }
2595
2596     #[test]
2597     fn test_connect() {
2598         fn t(v: &[~str], sep: &str, s: &str) {
2599             assert_eq!(v.connect(sep), s.to_str());
2600         }
2601         t([~"you", ~"know", ~"I'm", ~"no", ~"good"],
2602           " ", "you know I'm no good");
2603         let v: &[~str] = [];
2604         t(v, " ", "");
2605         t([~"hi"], " ", "hi");
2606     }
2607
2608     #[test]
2609     fn test_concat_slices() {
2610         fn t(v: &[&str], s: &str) {
2611             assert_eq!(v.concat(), s.to_str());
2612         }
2613         t(["you", "know", "I'm", "no", "good"], "youknowI'mnogood");
2614         let v: &[&str] = [];
2615         t(v, "");
2616         t(["hi"], "hi");
2617     }
2618
2619     #[test]
2620     fn test_connect_slices() {
2621         fn t(v: &[&str], sep: &str, s: &str) {
2622             assert_eq!(v.connect(sep), s.to_str());
2623         }
2624         t(["you", "know", "I'm", "no", "good"],
2625           " ", "you know I'm no good");
2626         t([], " ", "");
2627         t(["hi"], " ", "hi");
2628     }
2629
2630     #[test]
2631     fn test_repeat() {
2632         assert_eq!("x".repeat(4), ~"xxxx");
2633         assert_eq!("hi".repeat(4), ~"hihihihi");
2634         assert_eq!("ไท华".repeat(3), ~"ไท华ไท华ไท华");
2635         assert_eq!("".repeat(4), ~"");
2636         assert_eq!("hi".repeat(0), ~"");
2637     }
2638
2639     #[test]
2640     fn test_unsafe_slice() {
2641         assert_eq!("ab", unsafe {raw::slice_bytes("abc", 0, 2)});
2642         assert_eq!("bc", unsafe {raw::slice_bytes("abc", 1, 3)});
2643         assert_eq!("", unsafe {raw::slice_bytes("abc", 1, 1)});
2644         fn a_million_letter_a() -> ~str {
2645             let mut i = 0;
2646             let mut rs = ~"";
2647             while i < 100000 { rs.push_str("aaaaaaaaaa"); i += 1; }
2648             rs
2649         }
2650         fn half_a_million_letter_a() -> ~str {
2651             let mut i = 0;
2652             let mut rs = ~"";
2653             while i < 100000 { rs.push_str("aaaaa"); i += 1; }
2654             rs
2655         }
2656         let letters = a_million_letter_a();
2657         assert!(half_a_million_letter_a() ==
2658             unsafe {raw::slice_bytes(letters, 0u, 500000)}.to_owned());
2659     }
2660
2661     #[test]
2662     fn test_starts_with() {
2663         assert!(("".starts_with("")));
2664         assert!(("abc".starts_with("")));
2665         assert!(("abc".starts_with("a")));
2666         assert!((!"a".starts_with("abc")));
2667         assert!((!"".starts_with("abc")));
2668     }
2669
2670     #[test]
2671     fn test_ends_with() {
2672         assert!(("".ends_with("")));
2673         assert!(("abc".ends_with("")));
2674         assert!(("abc".ends_with("c")));
2675         assert!((!"a".ends_with("abc")));
2676         assert!((!"".ends_with("abc")));
2677     }
2678
2679     #[test]
2680     fn test_is_empty() {
2681         assert!("".is_empty());
2682         assert!(!"a".is_empty());
2683     }
2684
2685     #[test]
2686     fn test_replace() {
2687         let a = "a";
2688         assert_eq!("".replace(a, "b"), ~"");
2689         assert_eq!("a".replace(a, "b"), ~"b");
2690         assert_eq!("ab".replace(a, "b"), ~"bb");
2691         let test = "test";
2692         assert!(" test test ".replace(test, "toast") ==
2693             ~" toast toast ");
2694         assert_eq!(" test test ".replace(test, ""), ~"   ");
2695     }
2696
2697     #[test]
2698     fn test_replace_2a() {
2699         let data = ~"ประเทศไทย中华";
2700         let repl = ~"دولة الكويت";
2701
2702         let a = ~"ประเ";
2703         let A = ~"دولة الكويتทศไทย中华";
2704         assert_eq!(data.replace(a, repl), A);
2705     }
2706
2707     #[test]
2708     fn test_replace_2b() {
2709         let data = ~"ประเทศไทย中华";
2710         let repl = ~"دولة الكويت";
2711
2712         let b = ~"ะเ";
2713         let B = ~"ปรدولة الكويتทศไทย中华";
2714         assert_eq!(data.replace(b,   repl), B);
2715     }
2716
2717     #[test]
2718     fn test_replace_2c() {
2719         let data = ~"ประเทศไทย中华";
2720         let repl = ~"دولة الكويت";
2721
2722         let c = ~"中华";
2723         let C = ~"ประเทศไทยدولة الكويت";
2724         assert_eq!(data.replace(c, repl), C);
2725     }
2726
2727     #[test]
2728     fn test_replace_2d() {
2729         let data = ~"ประเทศไทย中华";
2730         let repl = ~"دولة الكويت";
2731
2732         let d = ~"ไท华";
2733         assert_eq!(data.replace(d, repl), data);
2734     }
2735
2736     #[test]
2737     fn test_slice() {
2738         assert_eq!("ab", "abc".slice(0, 2));
2739         assert_eq!("bc", "abc".slice(1, 3));
2740         assert_eq!("", "abc".slice(1, 1));
2741         assert_eq!("\u65e5", "\u65e5\u672c".slice(0, 3));
2742
2743         let data = "ประเทศไทย中华";
2744         assert_eq!("ป", data.slice(0, 3));
2745         assert_eq!("ร", data.slice(3, 6));
2746         assert_eq!("", data.slice(3, 3));
2747         assert_eq!("华", data.slice(30, 33));
2748
2749         fn a_million_letter_X() -> ~str {
2750             let mut i = 0;
2751             let mut rs = ~"";
2752             while i < 100000 {
2753                 push_str(&mut rs, "华华华华华华华华华华");
2754                 i += 1;
2755             }
2756             rs
2757         }
2758         fn half_a_million_letter_X() -> ~str {
2759             let mut i = 0;
2760             let mut rs = ~"";
2761             while i < 100000 { push_str(&mut rs, "华华华华华"); i += 1; }
2762             rs
2763         }
2764         let letters = a_million_letter_X();
2765         assert!(half_a_million_letter_X() ==
2766             letters.slice(0u, 3u * 500000u).to_owned());
2767     }
2768
2769     #[test]
2770     fn test_slice_2() {
2771         let ss = "中华Việt Nam";
2772
2773         assert_eq!("华", ss.slice(3u, 6u));
2774         assert_eq!("Việt Nam", ss.slice(6u, 16u));
2775
2776         assert_eq!("ab", "abc".slice(0u, 2u));
2777         assert_eq!("bc", "abc".slice(1u, 3u));
2778         assert_eq!("", "abc".slice(1u, 1u));
2779
2780         assert_eq!("中", ss.slice(0u, 3u));
2781         assert_eq!("华V", ss.slice(3u, 7u));
2782         assert_eq!("", ss.slice(3u, 3u));
2783         /*0: 中
2784           3: 华
2785           6: V
2786           7: i
2787           8: ệ
2788          11: t
2789          12:
2790          13: N
2791          14: a
2792          15: m */
2793     }
2794
2795     #[test]
2796     #[should_fail]
2797     #[ignore(cfg(windows))]
2798     fn test_slice_fail() {
2799         "中华Việt Nam".slice(0u, 2u);
2800     }
2801
2802     #[test]
2803     fn test_slice_from() {
2804         assert_eq!("abcd".slice_from(0), "abcd");
2805         assert_eq!("abcd".slice_from(2), "cd");
2806         assert_eq!("abcd".slice_from(4), "");
2807     }
2808     #[test]
2809     fn test_slice_to() {
2810         assert_eq!("abcd".slice_to(0), "");
2811         assert_eq!("abcd".slice_to(2), "ab");
2812         assert_eq!("abcd".slice_to(4), "abcd");
2813     }
2814
2815     #[test]
2816     fn test_trim_left_chars() {
2817         let v: &[char] = &[];
2818         assert_eq!(" *** foo *** ".trim_left_chars(&v), " *** foo *** ");
2819         assert_eq!(" *** foo *** ".trim_left_chars(& &['*', ' ']), "foo *** ");
2820         assert_eq!(" ***  *** ".trim_left_chars(& &['*', ' ']), "");
2821         assert_eq!("foo *** ".trim_left_chars(& &['*', ' ']), "foo *** ");
2822
2823         assert_eq!("11foo1bar11".trim_left_chars(&'1'), "foo1bar11");
2824         assert_eq!("12foo1bar12".trim_left_chars(& &['1', '2']), "foo1bar12");
2825         assert_eq!("123foo1bar123".trim_left_chars(&|c: char| c.is_digit()), "foo1bar123");
2826     }
2827
2828     #[test]
2829     fn test_trim_right_chars() {
2830         let v: &[char] = &[];
2831         assert_eq!(" *** foo *** ".trim_right_chars(&v), " *** foo *** ");
2832         assert_eq!(" *** foo *** ".trim_right_chars(& &['*', ' ']), " *** foo");
2833         assert_eq!(" ***  *** ".trim_right_chars(& &['*', ' ']), "");
2834         assert_eq!(" *** foo".trim_right_chars(& &['*', ' ']), " *** foo");
2835
2836         assert_eq!("11foo1bar11".trim_right_chars(&'1'), "11foo1bar");
2837         assert_eq!("12foo1bar12".trim_right_chars(& &['1', '2']), "12foo1bar");
2838         assert_eq!("123foo1bar123".trim_right_chars(&|c: char| c.is_digit()), "123foo1bar");
2839     }
2840
2841     #[test]
2842     fn test_trim_chars() {
2843         let v: &[char] = &[];
2844         assert_eq!(" *** foo *** ".trim_chars(&v), " *** foo *** ");
2845         assert_eq!(" *** foo *** ".trim_chars(& &['*', ' ']), "foo");
2846         assert_eq!(" ***  *** ".trim_chars(& &['*', ' ']), "");
2847         assert_eq!("foo".trim_chars(& &['*', ' ']), "foo");
2848
2849         assert_eq!("11foo1bar11".trim_chars(&'1'), "foo1bar");
2850         assert_eq!("12foo1bar12".trim_chars(& &['1', '2']), "foo1bar");
2851         assert_eq!("123foo1bar123".trim_chars(&|c: char| c.is_digit()), "foo1bar");
2852     }
2853
2854     #[test]
2855     fn test_trim_left() {
2856         assert_eq!("".trim_left(), "");
2857         assert_eq!("a".trim_left(), "a");
2858         assert_eq!("    ".trim_left(), "");
2859         assert_eq!("     blah".trim_left(), "blah");
2860         assert_eq!("   \u3000  wut".trim_left(), "wut");
2861         assert_eq!("hey ".trim_left(), "hey ");
2862     }
2863
2864     #[test]
2865     fn test_trim_right() {
2866         assert_eq!("".trim_right(), "");
2867         assert_eq!("a".trim_right(), "a");
2868         assert_eq!("    ".trim_right(), "");
2869         assert_eq!("blah     ".trim_right(), "blah");
2870         assert_eq!("wut   \u3000  ".trim_right(), "wut");
2871         assert_eq!(" hey".trim_right(), " hey");
2872     }
2873
2874     #[test]
2875     fn test_trim() {
2876         assert_eq!("".trim(), "");
2877         assert_eq!("a".trim(), "a");
2878         assert_eq!("    ".trim(), "");
2879         assert_eq!("    blah     ".trim(), "blah");
2880         assert_eq!("\nwut   \u3000  ".trim(), "wut");
2881         assert_eq!(" hey dude ".trim(), "hey dude");
2882     }
2883
2884     #[test]
2885     fn test_is_whitespace() {
2886         assert!("".is_whitespace());
2887         assert!(" ".is_whitespace());
2888         assert!("\u2009".is_whitespace()); // Thin space
2889         assert!("  \n\t   ".is_whitespace());
2890         assert!(!"   _   ".is_whitespace());
2891     }
2892
2893     #[test]
2894     fn test_shift_byte() {
2895         let mut s = ~"ABC";
2896         let b = unsafe{raw::shift_byte(&mut s)};
2897         assert_eq!(s, ~"BC");
2898         assert_eq!(b, 65u8);
2899     }
2900
2901     #[test]
2902     fn test_pop_byte() {
2903         let mut s = ~"ABC";
2904         let b = unsafe{raw::pop_byte(&mut s)};
2905         assert_eq!(s, ~"AB");
2906         assert_eq!(b, 67u8);
2907     }
2908
2909     #[test]
2910     fn test_unsafe_from_bytes() {
2911         let a = ~[65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8];
2912         let b = unsafe { raw::from_bytes(a) };
2913         assert_eq!(b, ~"AAAAAAA");
2914     }
2915
2916     #[test]
2917     fn test_from_bytes() {
2918         let ss = ~"ศไทย中华Việt Nam";
2919         let bb = ~[0xe0_u8, 0xb8_u8, 0xa8_u8,
2920                   0xe0_u8, 0xb9_u8, 0x84_u8,
2921                   0xe0_u8, 0xb8_u8, 0x97_u8,
2922                   0xe0_u8, 0xb8_u8, 0xa2_u8,
2923                   0xe4_u8, 0xb8_u8, 0xad_u8,
2924                   0xe5_u8, 0x8d_u8, 0x8e_u8,
2925                   0x56_u8, 0x69_u8, 0xe1_u8,
2926                   0xbb_u8, 0x87_u8, 0x74_u8,
2927                   0x20_u8, 0x4e_u8, 0x61_u8,
2928                   0x6d_u8];
2929
2930         assert_eq!(ss, from_bytes(bb));
2931     }
2932
2933     #[test]
2934     #[ignore(cfg(windows))]
2935     fn test_from_bytes_fail() {
2936         use str::not_utf8::cond;
2937
2938         let bb = ~[0xff_u8, 0xb8_u8, 0xa8_u8,
2939                   0xe0_u8, 0xb9_u8, 0x84_u8,
2940                   0xe0_u8, 0xb8_u8, 0x97_u8,
2941                   0xe0_u8, 0xb8_u8, 0xa2_u8,
2942                   0xe4_u8, 0xb8_u8, 0xad_u8,
2943                   0xe5_u8, 0x8d_u8, 0x8e_u8,
2944                   0x56_u8, 0x69_u8, 0xe1_u8,
2945                   0xbb_u8, 0x87_u8, 0x74_u8,
2946                   0x20_u8, 0x4e_u8, 0x61_u8,
2947                   0x6d_u8];
2948
2949         let mut error_happened = false;
2950         let _x = do cond.trap(|err| {
2951             assert_eq!(err, ~"from_bytes: input is not UTF-8; first bad byte is 255");
2952             error_happened = true;
2953             ~""
2954         }).in {
2955             from_bytes(bb)
2956         };
2957         assert!(error_happened);
2958     }
2959
2960     #[test]
2961     fn test_unsafe_from_bytes_with_null() {
2962         let a = [65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8];
2963         let b = unsafe { raw::from_bytes_with_null(a) };
2964         assert_eq!(b, "AAAAAAA");
2965     }
2966
2967     #[test]
2968     fn test_from_bytes_with_null() {
2969         let ss = "ศไทย中华Việt Nam";
2970         let bb = [0xe0_u8, 0xb8_u8, 0xa8_u8,
2971                   0xe0_u8, 0xb9_u8, 0x84_u8,
2972                   0xe0_u8, 0xb8_u8, 0x97_u8,
2973                   0xe0_u8, 0xb8_u8, 0xa2_u8,
2974                   0xe4_u8, 0xb8_u8, 0xad_u8,
2975                   0xe5_u8, 0x8d_u8, 0x8e_u8,
2976                   0x56_u8, 0x69_u8, 0xe1_u8,
2977                   0xbb_u8, 0x87_u8, 0x74_u8,
2978                   0x20_u8, 0x4e_u8, 0x61_u8,
2979                   0x6d_u8, 0x0_u8];
2980
2981         assert_eq!(ss, from_bytes_with_null(bb));
2982     }
2983
2984     #[test]
2985     #[should_fail]
2986     #[ignore(cfg(windows))]
2987     fn test_from_bytes_with_null_fail() {
2988         let bb = [0xff_u8, 0xb8_u8, 0xa8_u8,
2989                   0xe0_u8, 0xb9_u8, 0x84_u8,
2990                   0xe0_u8, 0xb8_u8, 0x97_u8,
2991                   0xe0_u8, 0xb8_u8, 0xa2_u8,
2992                   0xe4_u8, 0xb8_u8, 0xad_u8,
2993                   0xe5_u8, 0x8d_u8, 0x8e_u8,
2994                   0x56_u8, 0x69_u8, 0xe1_u8,
2995                   0xbb_u8, 0x87_u8, 0x74_u8,
2996                   0x20_u8, 0x4e_u8, 0x61_u8,
2997                   0x6d_u8, 0x0_u8];
2998
2999          let _x = from_bytes_with_null(bb);
3000     }
3001
3002     #[test]
3003     #[should_fail]
3004     #[ignore(cfg(windows))]
3005     fn test_from_bytes_with_null_fail_2() {
3006         let bb = [0xff_u8, 0xb8_u8, 0xa8_u8,
3007                   0xe0_u8, 0xb9_u8, 0x84_u8,
3008                   0xe0_u8, 0xb8_u8, 0x97_u8,
3009                   0xe0_u8, 0xb8_u8, 0xa2_u8,
3010                   0xe4_u8, 0xb8_u8, 0xad_u8,
3011                   0xe5_u8, 0x8d_u8, 0x8e_u8,
3012                   0x56_u8, 0x69_u8, 0xe1_u8,
3013                   0xbb_u8, 0x87_u8, 0x74_u8,
3014                   0x20_u8, 0x4e_u8, 0x61_u8,
3015                   0x6d_u8, 0x60_u8];
3016
3017          let _x = from_bytes_with_null(bb);
3018     }
3019
3020     #[test]
3021     fn test_from_buf() {
3022         unsafe {
3023             let a = ~[65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8];
3024             let b = vec::raw::to_ptr(a);
3025             let c = raw::from_buf(b);
3026             assert_eq!(c, ~"AAAAAAA");
3027         }
3028     }
3029
3030     #[test]
3031     fn test_as_bytes() {
3032         // no null
3033         let v = [
3034             224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
3035             184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
3036             109
3037         ];
3038         assert_eq!("".as_bytes(), &[]);
3039         assert_eq!("abc".as_bytes(), &['a' as u8, 'b' as u8, 'c' as u8]);
3040         assert_eq!("ศไทย中华Việt Nam".as_bytes(), v);
3041     }
3042
3043     #[test]
3044     fn test_as_bytes_with_null() {
3045         // has null
3046         let v = [
3047             224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
3048             184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
3049             109, 0
3050         ];
3051
3052         let s1 = @"";
3053         let s2 = @"abc";
3054         let s3 = @"ศไทย中华Việt Nam";
3055         assert_eq!(s1.as_bytes_with_null(), &[0]);
3056         assert_eq!(s2.as_bytes_with_null(), &['a' as u8, 'b' as u8, 'c' as u8, 0]);
3057         assert_eq!(s3.as_bytes_with_null(), v);
3058
3059         let s1 = ~"";
3060         let s2 = ~"abc";
3061         let s3 = ~"ศไทย中华Việt Nam";
3062         assert_eq!(s1.as_bytes_with_null(), &[0]);
3063         assert_eq!(s2.as_bytes_with_null(), &['a' as u8, 'b' as u8, 'c' as u8, 0]);
3064         assert_eq!(s3.as_bytes_with_null(), v);
3065     }
3066
3067     #[test]
3068     fn test_as_bytes_with_null_consume() {
3069         let s = ~"ศไทย中华Việt Nam";
3070         let v = ~[
3071             224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
3072             184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
3073             109, 0
3074         ];
3075         assert_eq!((~"").as_bytes_with_null_consume(), ~[0]);
3076         assert_eq!((~"abc").as_bytes_with_null_consume(),
3077                    ~['a' as u8, 'b' as u8, 'c' as u8, 0]);
3078         assert_eq!(s.as_bytes_with_null_consume(), v);
3079     }
3080
3081     #[test]
3082     #[ignore(cfg(windows))]
3083     #[should_fail]
3084     fn test_as_bytes_fail() {
3085         // Don't double free. (I'm not sure if this exercises the
3086         // original problem code path anymore.)
3087         let s = ~"";
3088         let _bytes = s.as_bytes_with_null();
3089         fail!();
3090     }
3091
3092     #[test]
3093     fn test_as_buf() {
3094         let a = "Abcdefg";
3095         let b = as_buf(a, |buf, _l| {
3096             assert_eq!(unsafe { *buf }, 65u8);
3097             100
3098         });
3099         assert_eq!(b, 100);
3100     }
3101
3102     #[test]
3103     fn test_as_buf_small() {
3104         let a = "A";
3105         let b = as_buf(a, |buf, _l| {
3106             assert_eq!(unsafe { *buf }, 65u8);
3107             100
3108         });
3109         assert_eq!(b, 100);
3110     }
3111
3112     #[test]
3113     fn test_as_buf2() {
3114         unsafe {
3115             let s = ~"hello";
3116             let sb = as_buf(s, |b, _l| b);
3117             let s_cstr = raw::from_buf(sb);
3118             assert_eq!(s_cstr, s);
3119         }
3120     }
3121
3122     #[test]
3123     fn test_as_buf_3() {
3124         let a = ~"hello";
3125         do as_buf(a) |buf, len| {
3126             unsafe {
3127                 assert_eq!(a[0], 'h' as u8);
3128                 assert_eq!(*buf, 'h' as u8);
3129                 assert_eq!(len, 6u);
3130                 assert_eq!(*ptr::offset(buf,4u), 'o' as u8);
3131                 assert_eq!(*ptr::offset(buf,5u), 0u8);
3132             }
3133         }
3134     }
3135
3136     #[test]
3137     fn test_subslice_offset() {
3138         let a = "kernelsprite";
3139         let b = a.slice(7, a.len());
3140         let c = a.slice(0, a.len() - 6);
3141         assert_eq!(a.subslice_offset(b), 7);
3142         assert_eq!(a.subslice_offset(c), 0);
3143
3144         let string = "a\nb\nc";
3145         let mut lines = ~[];
3146         for string.line_iter().advance |line| { lines.push(line) }
3147         assert_eq!(string.subslice_offset(lines[0]), 0);
3148         assert_eq!(string.subslice_offset(lines[1]), 2);
3149         assert_eq!(string.subslice_offset(lines[2]), 4);
3150     }
3151
3152     #[test]
3153     #[should_fail]
3154     fn test_subslice_offset_2() {
3155         let a = "alchemiter";
3156         let b = "cruxtruder";
3157         a.subslice_offset(b);
3158     }
3159
3160     #[test]
3161     fn vec_str_conversions() {
3162         let s1: ~str = ~"All mimsy were the borogoves";
3163
3164         let v: ~[u8] = s1.as_bytes().to_owned();
3165         let s2: ~str = from_bytes(v);
3166         let mut i: uint = 0u;
3167         let n1: uint = s1.len();
3168         let n2: uint = v.len();
3169         assert_eq!(n1, n2);
3170         while i < n1 {
3171             let a: u8 = s1[i];
3172             let b: u8 = s2[i];
3173             debug!(a);
3174             debug!(b);
3175             assert_eq!(a, b);
3176             i += 1u;
3177         }
3178     }
3179
3180     #[test]
3181     fn test_contains() {
3182         assert!("abcde".contains("bcd"));
3183         assert!("abcde".contains("abcd"));
3184         assert!("abcde".contains("bcde"));
3185         assert!("abcde".contains(""));
3186         assert!("".contains(""));
3187         assert!(!"abcde".contains("def"));
3188         assert!(!"".contains("a"));
3189
3190         let data = ~"ประเทศไทย中华Việt Nam";
3191         assert!(data.contains("ประเ"));
3192         assert!(data.contains("ะเ"));
3193         assert!(data.contains("中华"));
3194         assert!(!data.contains("ไท华"));
3195     }
3196
3197     #[test]
3198     fn test_contains_char() {
3199         assert!("abc".contains_char('b'));
3200         assert!("a".contains_char('a'));
3201         assert!(!"abc".contains_char('d'));
3202         assert!(!"".contains_char('a'));
3203     }
3204
3205     #[test]
3206     fn test_map() {
3207         assert_eq!(~"", "".map_chars(|c| unsafe {libc::toupper(c as c_char)} as char));
3208         assert_eq!(~"YMCA", "ymca".map_chars(|c| unsafe {libc::toupper(c as c_char)} as char));
3209     }
3210
3211     #[test]
3212     fn test_utf16() {
3213         let pairs =
3214             [(~"𐍅𐌿𐌻𐍆𐌹𐌻𐌰\n",
3215               ~[0xd800_u16, 0xdf45_u16, 0xd800_u16, 0xdf3f_u16,
3216                 0xd800_u16, 0xdf3b_u16, 0xd800_u16, 0xdf46_u16,
3217                 0xd800_u16, 0xdf39_u16, 0xd800_u16, 0xdf3b_u16,
3218                 0xd800_u16, 0xdf30_u16, 0x000a_u16]),
3219
3220              (~"𐐒𐑉𐐮𐑀𐐲𐑋 𐐏𐐲𐑍\n",
3221               ~[0xd801_u16, 0xdc12_u16, 0xd801_u16,
3222                 0xdc49_u16, 0xd801_u16, 0xdc2e_u16, 0xd801_u16,
3223                 0xdc40_u16, 0xd801_u16, 0xdc32_u16, 0xd801_u16,
3224                 0xdc4b_u16, 0x0020_u16, 0xd801_u16, 0xdc0f_u16,
3225                 0xd801_u16, 0xdc32_u16, 0xd801_u16, 0xdc4d_u16,
3226                 0x000a_u16]),
3227
3228              (~"𐌀𐌖𐌋𐌄𐌑𐌉·𐌌𐌄𐌕𐌄𐌋𐌉𐌑\n",
3229               ~[0xd800_u16, 0xdf00_u16, 0xd800_u16, 0xdf16_u16,
3230                 0xd800_u16, 0xdf0b_u16, 0xd800_u16, 0xdf04_u16,
3231                 0xd800_u16, 0xdf11_u16, 0xd800_u16, 0xdf09_u16,
3232                 0x00b7_u16, 0xd800_u16, 0xdf0c_u16, 0xd800_u16,
3233                 0xdf04_u16, 0xd800_u16, 0xdf15_u16, 0xd800_u16,
3234                 0xdf04_u16, 0xd800_u16, 0xdf0b_u16, 0xd800_u16,
3235                 0xdf09_u16, 0xd800_u16, 0xdf11_u16, 0x000a_u16 ]),
3236
3237              (~"𐒋𐒘𐒈𐒑𐒛𐒒 𐒕𐒓 𐒈𐒚𐒍 𐒏𐒜𐒒𐒖𐒆 𐒕𐒆\n",
3238               ~[0xd801_u16, 0xdc8b_u16, 0xd801_u16, 0xdc98_u16,
3239                 0xd801_u16, 0xdc88_u16, 0xd801_u16, 0xdc91_u16,
3240                 0xd801_u16, 0xdc9b_u16, 0xd801_u16, 0xdc92_u16,
3241                 0x0020_u16, 0xd801_u16, 0xdc95_u16, 0xd801_u16,
3242                 0xdc93_u16, 0x0020_u16, 0xd801_u16, 0xdc88_u16,
3243                 0xd801_u16, 0xdc9a_u16, 0xd801_u16, 0xdc8d_u16,
3244                 0x0020_u16, 0xd801_u16, 0xdc8f_u16, 0xd801_u16,
3245                 0xdc9c_u16, 0xd801_u16, 0xdc92_u16, 0xd801_u16,
3246                 0xdc96_u16, 0xd801_u16, 0xdc86_u16, 0x0020_u16,
3247                 0xd801_u16, 0xdc95_u16, 0xd801_u16, 0xdc86_u16,
3248                 0x000a_u16 ]) ];
3249
3250         for pairs.iter().advance |p| {
3251             let (s, u) = (*p).clone();
3252             assert!(s.to_utf16() == u);
3253             assert!(from_utf16(u) == s);
3254             assert!(from_utf16(s.to_utf16()) == s);
3255             assert!(from_utf16(u).to_utf16() == u);
3256         }
3257     }
3258
3259     #[test]
3260     fn test_char_at() {
3261         let s = ~"ศไทย中华Việt Nam";
3262         let v = ~['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
3263         let mut pos = 0;
3264         for v.iter().advance |ch| {
3265             assert!(s.char_at(pos) == *ch);
3266             pos += from_char(*ch).len();
3267         }
3268     }
3269
3270     #[test]
3271     fn test_char_at_reverse() {
3272         let s = ~"ศไทย中华Việt Nam";
3273         let v = ~['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
3274         let mut pos = s.len();
3275         for v.rev_iter().advance |ch| {
3276             assert!(s.char_at_reverse(pos) == *ch);
3277             pos -= from_char(*ch).len();
3278         }
3279     }
3280
3281     #[test]
3282     fn test_escape_unicode() {
3283         assert_eq!("abc".escape_unicode(), ~"\\x61\\x62\\x63");
3284         assert_eq!("a c".escape_unicode(), ~"\\x61\\x20\\x63");
3285         assert_eq!("\r\n\t".escape_unicode(), ~"\\x0d\\x0a\\x09");
3286         assert_eq!("'\"\\".escape_unicode(), ~"\\x27\\x22\\x5c");
3287         assert_eq!("\x00\x01\xfe\xff".escape_unicode(), ~"\\x00\\x01\\xfe\\xff");
3288         assert_eq!("\u0100\uffff".escape_unicode(), ~"\\u0100\\uffff");
3289         assert_eq!("\U00010000\U0010ffff".escape_unicode(), ~"\\U00010000\\U0010ffff");
3290         assert_eq!("ab\ufb00".escape_unicode(), ~"\\x61\\x62\\ufb00");
3291         assert_eq!("\U0001d4ea\r".escape_unicode(), ~"\\U0001d4ea\\x0d");
3292     }
3293
3294     #[test]
3295     fn test_escape_default() {
3296         assert_eq!("abc".escape_default(), ~"abc");
3297         assert_eq!("a c".escape_default(), ~"a c");
3298         assert_eq!("\r\n\t".escape_default(), ~"\\r\\n\\t");
3299         assert_eq!("'\"\\".escape_default(), ~"\\'\\\"\\\\");
3300         assert_eq!("\u0100\uffff".escape_default(), ~"\\u0100\\uffff");
3301         assert_eq!("\U00010000\U0010ffff".escape_default(), ~"\\U00010000\\U0010ffff");
3302         assert_eq!("ab\ufb00".escape_default(), ~"ab\\ufb00");
3303         assert_eq!("\U0001d4ea\r".escape_default(), ~"\\U0001d4ea\\r");
3304     }
3305
3306     #[test]
3307     fn test_to_managed() {
3308         assert_eq!("abc".to_managed(), @"abc");
3309         assert_eq!("abcdef".slice(1, 5).to_managed(), @"bcde");
3310     }
3311
3312     #[test]
3313     fn test_total_ord() {
3314         "1234".cmp(& &"123") == Greater;
3315         "123".cmp(& &"1234") == Less;
3316         "1234".cmp(& &"1234") == Equal;
3317         "12345555".cmp(& &"123456") == Less;
3318         "22".cmp(& &"1234") == Greater;
3319     }
3320
3321     #[test]
3322     fn test_char_range_at() {
3323         let data = ~"b¢€𤭢𤭢€¢b";
3324         assert_eq!('b', data.char_range_at(0).ch);
3325         assert_eq!('¢', data.char_range_at(1).ch);
3326         assert_eq!('€', data.char_range_at(3).ch);
3327         assert_eq!('𤭢', data.char_range_at(6).ch);
3328         assert_eq!('𤭢', data.char_range_at(10).ch);
3329         assert_eq!('€', data.char_range_at(14).ch);
3330         assert_eq!('¢', data.char_range_at(17).ch);
3331         assert_eq!('b', data.char_range_at(19).ch);
3332     }
3333
3334     #[test]
3335     fn test_char_range_at_reverse_underflow() {
3336         assert_eq!("abc".char_range_at_reverse(0).next, 0);
3337     }
3338
3339     #[test]
3340     fn test_add() {
3341         #[allow(unnecessary_allocation)];
3342         macro_rules! t (
3343             ($s1:expr, $s2:expr, $e:expr) => {
3344                 assert_eq!($s1 + $s2, $e);
3345                 assert_eq!($s1.to_owned() + $s2, $e);
3346                 assert_eq!($s1.to_managed() + $s2, $e);
3347             }
3348         );
3349
3350         t!("foo",  "bar", ~"foobar");
3351         t!("foo", @"bar", ~"foobar");
3352         t!("foo", ~"bar", ~"foobar");
3353         t!("ศไทย中",  "华Việt Nam", ~"ศไทย中华Việt Nam");
3354         t!("ศไทย中", @"华Việt Nam", ~"ศไทย中华Việt Nam");
3355         t!("ศไทย中", ~"华Việt Nam", ~"ศไทย中华Việt Nam");
3356     }
3357
3358     #[test]
3359     fn test_iterator() {
3360         use iterator::*;
3361         let s = ~"ศไทย中华Việt Nam";
3362         let v = ~['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
3363
3364         let mut pos = 0;
3365         let mut it = s.iter();
3366
3367         for it.advance |c| {
3368             assert_eq!(c, v[pos]);
3369             pos += 1;
3370         }
3371         assert_eq!(pos, v.len());
3372     }
3373
3374     #[test]
3375     fn test_rev_iterator() {
3376         use iterator::*;
3377         let s = ~"ศไทย中华Việt Nam";
3378         let v = ~['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
3379
3380         let mut pos = 0;
3381         let mut it = s.rev_iter();
3382
3383         for it.advance |c| {
3384             assert_eq!(c, v[pos]);
3385             pos += 1;
3386         }
3387         assert_eq!(pos, v.len());
3388     }
3389
3390     #[test]
3391     fn test_bytes_iterator() {
3392         let s = ~"ศไทย中华Việt Nam";
3393         let v = [
3394             224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
3395             184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
3396             109
3397         ];
3398         let mut pos = 0;
3399
3400         for s.bytes_iter().advance |b| {
3401             assert_eq!(b, v[pos]);
3402             pos += 1;
3403         }
3404     }
3405
3406     #[test]
3407     fn test_bytes_rev_iterator() {
3408         let s = ~"ศไทย中华Việt Nam";
3409         let v = [
3410             224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
3411             184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
3412             109
3413         ];
3414         let mut pos = v.len();
3415
3416         for s.bytes_rev_iter().advance |b| {
3417             pos -= 1;
3418             assert_eq!(b, v[pos]);
3419         }
3420     }
3421
3422     #[test]
3423     fn test_split_char_iterator() {
3424         let data = "\nMäry häd ä little lämb\nLittle lämb\n";
3425
3426         let split: ~[&str] = data.split_iter(' ').collect();
3427         assert_eq!(split, ~["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
3428
3429         let split: ~[&str] = data.split_iter(|c: char| c == ' ').collect();
3430         assert_eq!(split, ~["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
3431
3432         // Unicode
3433         let split: ~[&str] = data.split_iter('ä').collect();
3434         assert_eq!(split, ~["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
3435
3436         let split: ~[&str] = data.split_iter(|c: char| c == 'ä').collect();
3437         assert_eq!(split, ~["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
3438     }
3439     #[test]
3440     fn test_splitn_char_iterator() {
3441         let data = "\nMäry häd ä little lämb\nLittle lämb\n";
3442
3443         let split: ~[&str] = data.splitn_iter(' ', 3).collect();
3444         assert_eq!(split, ~["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
3445
3446         let split: ~[&str] = data.splitn_iter(|c: char| c == ' ', 3).collect();
3447         assert_eq!(split, ~["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
3448
3449         // Unicode
3450         let split: ~[&str] = data.splitn_iter('ä', 3).collect();
3451         assert_eq!(split, ~["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
3452
3453         let split: ~[&str] = data.splitn_iter(|c: char| c == 'ä', 3).collect();
3454         assert_eq!(split, ~["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
3455     }
3456
3457     #[test]
3458     fn test_split_char_iterator_no_trailing() {
3459         let data = "\nMäry häd ä little lämb\nLittle lämb\n";
3460
3461         let split: ~[&str] = data.split_options_iter('\n', 1000, true).collect();
3462         assert_eq!(split, ~["", "Märy häd ä little lämb", "Little lämb", ""]);
3463
3464         let split: ~[&str] = data.split_options_iter('\n', 1000, false).collect();
3465         assert_eq!(split, ~["", "Märy häd ä little lämb", "Little lämb"]);
3466     }
3467
3468     #[test]
3469     fn test_word_iter() {
3470         let data = "\n \tMäry   häd\tä  little lämb\nLittle lämb\n";
3471         let words: ~[&str] = data.word_iter().collect();
3472         assert_eq!(words, ~["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
3473     }
3474
3475     #[test]
3476     fn test_line_iter() {
3477         let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
3478         let lines: ~[&str] = data.line_iter().collect();
3479         assert_eq!(lines, ~["", "Märy häd ä little lämb", "", "Little lämb"]);
3480
3481         let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
3482         let lines: ~[&str] = data.line_iter().collect();
3483         assert_eq!(lines, ~["", "Märy häd ä little lämb", "", "Little lämb"]);
3484     }
3485
3486     #[test]
3487     fn test_split_str_iterator() {
3488         fn t<'a>(s: &str, sep: &'a str, u: ~[&str]) {
3489             let v: ~[&str] = s.split_str_iter(sep).collect();
3490             assert_eq!(v, u);
3491         }
3492         t("--1233345--", "12345", ~["--1233345--"]);
3493         t("abc::hello::there", "::", ~["abc", "hello", "there"]);
3494         t("::hello::there", "::", ~["", "hello", "there"]);
3495         t("hello::there::", "::", ~["hello", "there", ""]);
3496         t("::hello::there::", "::", ~["", "hello", "there", ""]);
3497         t("ประเทศไทย中华Việt Nam", "中华", ~["ประเทศไทย", "Việt Nam"]);
3498         t("zzXXXzzYYYzz", "zz", ~["", "XXX", "YYY", ""]);
3499         t("zzXXXzYYYz", "XXX", ~["zz", "zYYYz"]);
3500         t(".XXX.YYY.", ".", ~["", "XXX", "YYY", ""]);
3501         t("", ".", ~[""]);
3502         t("zz", "zz", ~["",""]);
3503         t("ok", "z", ~["ok"]);
3504         t("zzz", "zz", ~["","z"]);
3505         t("zzzzz", "zz", ~["","","z"]);
3506     }
3507
3508     #[test]
3509     fn test_str_zero() {
3510         use num::Zero;
3511         fn t<S: Zero + Str>() {
3512             let s: S = Zero::zero();
3513             assert_eq!(s.as_slice(), "");
3514             assert!(s.is_zero());
3515         }
3516
3517         t::<&str>();
3518         t::<@str>();
3519         t::<~str>();
3520     }
3521
3522     #[test]
3523     fn test_str_container() {
3524         fn sum_len<S: Container>(v: &[S]) -> uint {
3525             v.iter().transform(|x| x.len()).sum()
3526         }
3527
3528         let s = ~"01234";
3529         assert_eq!(5, sum_len(["012", "", "34"]));
3530         assert_eq!(5, sum_len([@"01", @"2", @"34", @""]));
3531         assert_eq!(5, sum_len([~"01", ~"2", ~"34", ~""]));
3532         assert_eq!(5, sum_len([s.as_slice()]));
3533     }
3534 }
3535
3536 #[cfg(test)]
3537 mod bench {
3538     use extra::test::BenchHarness;
3539     use str;
3540
3541     #[bench]
3542     fn is_utf8_100_ascii(bh: &mut BenchHarness) {
3543
3544         let s = bytes!("Hello there, the quick brown fox jumped over the lazy dog! \
3545                         Lorem ipsum dolor sit amet, consectetur. ");
3546
3547         assert_eq!(100, s.len());
3548         do bh.iter {
3549             str::is_utf8(s);
3550         }
3551     }
3552
3553     #[bench]
3554     fn is_utf8_100_multibyte(bh: &mut BenchHarness) {
3555         let s = bytes!("𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰");
3556         assert_eq!(100, s.len());
3557         do bh.iter {
3558             str::is_utf8(s);
3559         }
3560     }
3561
3562     #[bench]
3563     fn map_chars_100_ascii(bh: &mut BenchHarness) {
3564         let s = "HelloHelloHelloHelloHelloHelloHelloHelloHelloHello\
3565                  HelloHelloHelloHelloHelloHelloHelloHelloHelloHello";
3566         do bh.iter {
3567             s.map_chars(|c| ((c as uint) + 1) as char);
3568         }
3569     }
3570
3571     #[bench]
3572     fn map_chars_100_multibytes(bh: &mut BenchHarness) {
3573         let s = "𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑\
3574                  𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑";
3575         do bh.iter {
3576             s.map_chars(|c| ((c as uint) + 1) as char);
3577         }
3578     }
3579 }