1 // Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
14 * Strings are a packed UTF-8 representation of text, stored as null
15 * terminated buffers of u8 bytes. Strings should be indexed in bytes,
16 * for efficiency, but UTF-8 unsafe operations should be avoided.
25 use container::{Container, Mutable};
27 use iterator::{Iterator, IteratorUtil, FilterIterator, AdditiveIterator, MapIterator};
30 use option::{None, Option, Some};
36 use vec::{OwnedVector, OwnedCopyableVector, ImmutableVector};
42 not_utf8: (~str) -> ~str;
46 Section: Creating a string
50 * Convert a vector of bytes to a new UTF-8 string
54 * Raises the `not_utf8` condition if invalid UTF-8
56 pub fn from_bytes(vv: &[u8]) -> ~str {
57 use str::not_utf8::cond;
60 let first_bad_byte = *vv.iter().find_(|&b| !is_utf8([*b])).get();
61 cond.raise(fmt!("from_bytes: input is not UTF-8; first bad byte is %u",
62 first_bad_byte as uint))
65 return unsafe { raw::from_bytes(vv) }
70 * Consumes a vector of bytes to create a new utf-8 string
74 * Raises the `not_utf8` condition if invalid UTF-8
76 pub fn from_bytes_owned(vv: ~[u8]) -> ~str {
77 use str::not_utf8::cond;
80 let first_bad_byte = *vv.iter().find_(|&b| !is_utf8([*b])).get();
81 cond.raise(fmt!("from_bytes: input is not UTF-8; first bad byte is %u",
82 first_bad_byte as uint))
84 return unsafe { raw::from_bytes_owned(vv) }
89 * Convert a vector of bytes to a UTF-8 string.
90 * The vector needs to be one byte longer than the string, and end with a 0 byte.
92 * Compared to `from_bytes()`, this fn doesn't need to allocate a new owned str.
96 * Fails if invalid UTF-8
97 * Fails if not null terminated
99 pub fn from_bytes_with_null<'a>(vv: &'a [u8]) -> &'a str {
100 assert_eq!(vv[vv.len() - 1], 0);
101 assert!(is_utf8(vv));
102 return unsafe { raw::from_bytes_with_null(vv) };
106 * Converts a vector to a string slice without performing any allocations.
108 * Once the slice has been validated as utf-8, it is transmuted in-place and
109 * returned as a '&str' instead of a '&[u8]'
113 * Fails if invalid UTF-8
115 pub fn from_bytes_slice<'a>(vector: &'a [u8]) -> &'a str {
117 assert!(is_utf8(vector));
118 let (ptr, len): (*u8, uint) = ::cast::transmute(vector);
119 let string: &'a str = ::cast::transmute((ptr, len + 1));
124 /// Copy a slice into a new unique str
126 pub fn to_owned(s: &str) -> ~str {
127 unsafe { raw::slice_bytes_owned(s, 0, s.len()) }
130 impl ToStr for ~str {
132 fn to_str(&self) -> ~str { to_owned(*self) }
134 impl<'self> ToStr for &'self str {
136 fn to_str(&self) -> ~str { to_owned(*self) }
138 impl ToStr for @str {
140 fn to_str(&self) -> ~str { to_owned(*self) }
144 * Convert a byte to a UTF-8 string
148 * Fails if invalid UTF-8
150 pub fn from_byte(b: u8) -> ~str {
152 unsafe { ::cast::transmute(~[b, 0u8]) }
155 /// Convert a char to a string
156 pub fn from_char(ch: char) -> ~str {
162 /// Convert a vector of chars to a string
163 pub fn from_chars(chs: &[char]) -> ~str {
165 buf.reserve(chs.len());
166 for chs.iter().advance |ch| {
173 pub fn push_str(lhs: &mut ~str, rhs: &str) {
177 #[allow(missing_doc)]
178 pub trait StrVector {
179 pub fn concat(&self) -> ~str;
180 pub fn connect(&self, sep: &str) -> ~str;
183 impl<'self, S: Str> StrVector for &'self [S] {
184 /// Concatenate a vector of strings.
185 pub fn concat(&self) -> ~str {
186 if self.is_empty() { return ~""; }
188 let len = self.iter().transform(|s| s.as_slice().len()).sum();
195 do as_buf(s) |buf, _| {
196 let mut buf = ::cast::transmute_mut_unsafe(buf);
197 for self.iter().advance |ss| {
198 do as_buf(ss.as_slice()) |ssbuf, sslen| {
199 let sslen = sslen - 1;
200 ptr::copy_memory(buf, ssbuf, sslen);
201 buf = buf.offset(sslen);
205 raw::set_len(&mut s, len);
210 /// Concatenate a vector of strings, placing a given separator between each.
211 pub fn connect(&self, sep: &str) -> ~str {
212 if self.is_empty() { return ~""; }
215 if sep.is_empty() { return self.concat(); }
217 // this is wrong without the guarantee that `self` is non-empty
218 let len = sep.len() * (self.len() - 1)
219 + self.iter().transform(|s| s.as_slice().len()).sum();
221 let mut first = true;
226 do as_buf(s) |buf, _| {
227 do as_buf(sep) |sepbuf, seplen| {
228 let seplen = seplen - 1;
229 let mut buf = ::cast::transmute_mut_unsafe(buf);
230 for self.iter().advance |ss| {
231 do as_buf(ss.as_slice()) |ssbuf, sslen| {
232 let sslen = sslen - 1;
236 ptr::copy_memory(buf, sepbuf, seplen);
237 buf = buf.offset(seplen);
239 ptr::copy_memory(buf, ssbuf, sslen);
240 buf = buf.offset(sslen);
245 raw::set_len(&mut s, len);
251 /// Something that can be used to compare against a character
253 /// Determine if the splitter should split at the given character
254 fn matches(&self, char) -> bool;
255 /// Indicate if this is only concerned about ASCII characters,
256 /// which can allow for a faster implementation.
257 fn only_ascii(&self) -> bool;
259 impl CharEq for char {
261 fn matches(&self, c: char) -> bool { *self == c }
263 fn only_ascii(&self) -> bool { (*self as uint) < 128 }
265 impl<'self> CharEq for &'self fn(char) -> bool {
267 fn matches(&self, c: char) -> bool { (*self)(c) }
269 fn only_ascii(&self) -> bool { false }
271 impl CharEq for extern "Rust" fn(char) -> bool {
273 fn matches(&self, c: char) -> bool { (*self)(c) }
275 fn only_ascii(&self) -> bool { false }
278 impl<'self, C: CharEq> CharEq for &'self [C] {
280 fn matches(&self, c: char) -> bool {
281 self.iter().any(|m| m.matches(c))
284 fn only_ascii(&self) -> bool {
285 self.iter().all(|m| m.only_ascii())
290 /// An iterator over the substrings of a string, separated by `sep`.
292 pub struct StrCharSplitIterator<'self,Sep> {
293 priv string: &'self str,
296 /// The number of splits remaining
298 /// Whether an empty string at the end is allowed
299 priv allow_trailing_empty: bool,
301 priv only_ascii: bool
304 /// An iterator over the words of a string, separated by an sequence of whitespace
305 pub type WordIterator<'self> =
306 FilterIterator<'self, &'self str,
307 StrCharSplitIterator<'self, extern "Rust" fn(char) -> bool>>;
309 /// An iterator over the lines of a string, separated by either `\n` or (`\r\n`).
310 pub type AnyLineIterator<'self> =
311 MapIterator<'self, &'self str, &'self str, StrCharSplitIterator<'self, char>>;
313 impl<'self, Sep: CharEq> Iterator<&'self str> for StrCharSplitIterator<'self, Sep> {
315 fn next(&mut self) -> Option<&'self str> {
316 if self.finished { return None }
318 let l = self.string.len();
319 let start = self.position;
322 // this gives a *huge* speed up for splitting on ASCII
323 // characters (e.g. '\n' or ' ')
324 while self.position < l && self.count > 0 {
325 let byte = self.string[self.position];
327 if self.sep.matches(byte as char) {
328 let slice = unsafe { raw::slice_bytes(self.string, start, self.position) };
336 while self.position < l && self.count > 0 {
337 let CharRange {ch, next} = self.string.char_range_at(self.position);
339 if self.sep.matches(ch) {
340 let slice = unsafe { raw::slice_bytes(self.string, start, self.position) };
341 self.position = next;
345 self.position = next;
348 self.finished = true;
349 if self.allow_trailing_empty || start < l {
350 Some(unsafe { raw::slice_bytes(self.string, start, l) })
357 /// An iterator over the start and end indicies of the matches of a
358 /// substring within a larger string
360 pub struct StrMatchesIndexIterator<'self> {
361 priv haystack: &'self str,
362 priv needle: &'self str,
366 /// An iterator over the substrings of a string separated by a given
369 pub struct StrStrSplitIterator<'self> {
370 priv it: StrMatchesIndexIterator<'self>,
375 impl<'self> Iterator<(uint, uint)> for StrMatchesIndexIterator<'self> {
377 fn next(&mut self) -> Option<(uint, uint)> {
378 // See Issue #1932 for why this is a naive search
379 let (h_len, n_len) = (self.haystack.len(), self.needle.len());
380 let mut match_start = 0;
383 while self.position < h_len {
384 if self.haystack[self.position] == self.needle[match_i] {
385 if match_i == 0 { match_start = self.position; }
389 if match_i == n_len {
391 return Some((match_start, self.position));
394 // failed match, backtrack
397 self.position = match_start;
406 impl<'self> Iterator<&'self str> for StrStrSplitIterator<'self> {
408 fn next(&mut self) -> Option<&'self str> {
409 if self.finished { return None; }
411 match self.it.next() {
412 Some((from, to)) => {
413 let ret = Some(self.it.haystack.slice(self.last_end, from));
418 self.finished = true;
419 Some(self.it.haystack.slice(self.last_end, self.it.haystack.len()))
425 /** Splits a string into substrings with possibly internal whitespace,
426 * each of them at most `lim` bytes long. The substrings have leading and trailing
427 * whitespace removed, and are only cut at whitespace boundaries.
431 * Fails during iteration if the string contains a non-whitespace
432 * sequence longer than the limit.
434 pub fn each_split_within<'a>(ss: &'a str,
436 it: &fn(&'a str) -> bool) -> bool {
437 // Just for fun, let's write this as an state machine:
439 enum SplitWithinState {
440 A, // leading whitespace, initial state
442 C, // internal and trailing whitespace
445 Ws, // current char is whitespace
446 Cr // current char is not whitespace
449 UnderLim, // current char makes current substring still fit in limit
450 OverLim // current char makes current substring no longer fit in limit
453 let mut slice_start = 0;
454 let mut last_start = 0;
455 let mut last_end = 0;
457 let mut fake_i = ss.len();
461 let slice: &fn() = || { cont = it(ss.slice(slice_start, last_end)) };
463 // if the limit is larger than the string, lower it to save cycles
468 let machine: &fn((uint, char)) -> bool = |(i, c)| {
469 let whitespace = if char::is_whitespace(c) { Ws } else { Cr };
470 let limit = if (i - slice_start + 1) <= lim { UnderLim } else { OverLim };
472 state = match (state, whitespace, limit) {
474 (A, Cr, _) => { slice_start = i; last_start = i; B }
476 (B, Cr, UnderLim) => { B }
477 (B, Cr, OverLim) if (i - last_start + 1) > lim
478 => fail!("word starting with %? longer than limit!",
479 ss.slice(last_start, i + 1)),
480 (B, Cr, OverLim) => { slice(); slice_start = last_start; B }
481 (B, Ws, UnderLim) => { last_end = i; C }
482 (B, Ws, OverLim) => { last_end = i; slice(); A }
484 (C, Cr, UnderLim) => { last_start = i; B }
485 (C, Cr, OverLim) => { slice(); slice_start = i; last_start = i; last_end = i; B }
486 (C, Ws, OverLim) => { slice(); A }
487 (C, Ws, UnderLim) => { C }
493 ss.iter().enumerate().advance(|x| machine(x));
495 // Let the automaton 'run out' by supplying trailing whitespace
496 while cont && match state { B | C => true, A => false } {
497 machine((fake_i, ' '));
504 * Replace all occurrences of one string with another
508 * * s - The string containing substrings to replace
509 * * from - The string to replace
510 * * to - The replacement string
514 * The original string with all occurances of `from` replaced with `to`
516 pub fn replace(s: &str, from: &str, to: &str) -> ~str {
517 let mut result = ~"";
518 let mut last_end = 0;
519 for s.matches_index_iter(from).advance |(start, end)| {
520 result.push_str(unsafe{raw::slice_bytes(s, last_end, start)});
524 result.push_str(unsafe{raw::slice_bytes(s, last_end, s.len())});
529 Section: Comparing strings
532 /// Bytewise slice equality
536 pub fn eq_slice(a: &str, b: &str) -> bool {
537 do as_buf(a) |ap, alen| {
538 do as_buf(b) |bp, blen| {
539 if (alen != blen) { false }
542 libc::memcmp(ap as *libc::c_void,
544 (alen - 1) as libc::size_t) == 0
553 pub fn eq_slice(a: &str, b: &str) -> bool {
554 do as_buf(a) |ap, alen| {
555 do as_buf(b) |bp, blen| {
556 if (alen != blen) { false }
559 libc::memcmp(ap as *libc::c_void,
561 (alen - 1) as libc::size_t) == 0
568 /// Bytewise string equality
570 #[lang="uniq_str_eq"]
572 pub fn eq(a: &~str, b: &~str) -> bool {
578 pub fn eq(a: &~str, b: &~str) -> bool {
586 // Utility used by various searching functions
587 fn match_at<'a,'b>(haystack: &'a str, needle: &'b str, at: uint) -> bool {
589 for needle.bytes_iter().advance |c| { if haystack[i] != c { return false; } i += 1u; }
597 /// Determines if a vector of bytes contains valid UTF-8
598 pub fn is_utf8(v: &[u8]) -> bool {
605 let w = utf8_char_width(v[i]);
606 if w == 0u { return false; }
609 if nexti > total { return false; }
611 if v[i + 1] & 192u8 != TAG_CONT_U8 { return false; }
613 if v[i + 2] & 192u8 != TAG_CONT_U8 { return false; }
614 if w > 3 && (v[i + 3] & 192u8 != TAG_CONT_U8) { return false; }
623 /// Determines if a vector of `u16` contains valid UTF-16
624 pub fn is_utf16(v: &[u16]) -> bool {
630 if u <= 0xD7FF_u16 || u >= 0xE000_u16 {
634 if i+1u < len { return false; }
636 if u < 0xD7FF_u16 || u > 0xDBFF_u16 { return false; }
637 if u2 < 0xDC00_u16 || u2 > 0xDFFF_u16 { return false; }
644 /// Iterates over the utf-16 characters in the specified slice, yielding each
645 /// decoded unicode character to the function provided.
649 /// * Fails on invalid utf-16 data
650 pub fn utf16_chars(v: &[u16], f: &fn(char)) {
653 while (i < len && v[i] != 0u16) {
656 if u <= 0xD7FF_u16 || u >= 0xE000_u16 {
662 assert!(u >= 0xD800_u16 && u <= 0xDBFF_u16);
663 assert!(u2 >= 0xDC00_u16 && u2 <= 0xDFFF_u16);
664 let mut c = (u - 0xD800_u16) as char;
666 c |= (u2 - 0xDC00_u16) as char;
667 c |= 0x1_0000_u32 as char;
675 * Allocates a new string from the utf-16 slice provided
677 pub fn from_utf16(v: &[u16]) -> ~str {
679 buf.reserve(v.len());
680 utf16_chars(v, |ch| buf.push_char(ch));
685 * Allocates a new string with the specified capacity. The string returned is
686 * the empty string, but has capacity for much more.
688 pub fn with_capacity(capacity: uint) -> ~str {
690 buf.reserve(capacity);
695 * As char_len but for a slice of a string
699 * * s - A valid string
700 * * start - The position inside `s` where to start counting in bytes
701 * * end - The position where to stop counting
705 * The number of Unicode characters in `s` between the given indices.
707 pub fn count_chars(s: &str, start: uint, end: uint) -> uint {
708 assert!(s.is_char_boundary(start));
709 assert!(s.is_char_boundary(end));
713 let next = s.char_range_at(i).next;
720 /// Counts the number of bytes taken by the first `n` chars in `s`
721 /// starting from `start`.
722 pub fn count_bytes<'b>(s: &'b str, start: uint, n: uint) -> uint {
723 assert!(s.is_char_boundary(start));
729 let next = s.char_range_at(end).next;
736 // https://tools.ietf.org/html/rfc3629
737 static UTF8_CHAR_WIDTH: [u8, ..256] = [
738 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
739 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x1F
740 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
741 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x3F
742 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
743 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x5F
744 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
745 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x7F
746 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
747 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x9F
748 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
749 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xBF
750 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
751 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xDF
752 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, // 0xEF
753 4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // 0xFF
756 /// Given a first byte, determine how many bytes are in this UTF-8 character
757 pub fn utf8_char_width(b: u8) -> uint {
758 return UTF8_CHAR_WIDTH[b] as uint;
761 #[allow(missing_doc)]
762 pub struct CharRange {
767 // UTF-8 tags and ranges
768 static TAG_CONT_U8: u8 = 128u8;
769 static TAG_CONT: uint = 128u;
770 static MAX_ONE_B: uint = 128u;
771 static TAG_TWO_B: uint = 192u;
772 static MAX_TWO_B: uint = 2048u;
773 static TAG_THREE_B: uint = 224u;
774 static MAX_THREE_B: uint = 65536u;
775 static TAG_FOUR_B: uint = 240u;
778 * A dummy trait to hold all the utility methods that we implement on strings.
782 * Work with the byte buffer of a string as a null-terminated C string.
784 * Allows for unsafe manipulation of strings, which is useful for foreign
785 * interop. This is similar to `str::as_buf`, but guarantees null-termination.
786 * If the given slice is not already null-terminated, this function will
787 * allocate a temporary, copy the slice, null terminate it, and pass
793 * let s = "PATH".as_c_str(|path| libc::getenv(path));
796 fn as_c_str<T>(self, f: &fn(*libc::c_char) -> T) -> T;
799 impl<'self> StrUtil for &'self str {
801 fn as_c_str<T>(self, f: &fn(*libc::c_char) -> T) -> T {
802 do as_buf(self) |buf, len| {
803 // NB: len includes the trailing null.
805 if unsafe { *(ptr::offset(buf,len-1)) != 0 } {
806 to_owned(self).as_c_str(|s| f(s))
808 f(buf as *libc::c_char)
815 * Deprecated. Use the `as_c_str` method on strings instead.
818 pub fn as_c_str<T>(s: &str, f: &fn(*libc::c_char) -> T) -> T {
823 * Work with the byte buffer and length of a slice.
825 * The given length is one byte longer than the 'official' indexable
826 * length of the string. This is to permit probing the byte past the
827 * indexable area for a null byte, as is the case in slices pointing
828 * to full strings, or suffixes of them.
831 pub fn as_buf<T>(s: &str, f: &fn(*u8, uint) -> T) -> T {
833 let v : *(*u8,uint) = transmute(&s);
839 /// Unsafe operations
845 use str::{as_buf, is_utf8};
847 use vec::MutableVector;
849 /// Create a Rust string from a null-terminated *u8 buffer
850 pub unsafe fn from_buf(buf: *u8) -> ~str {
855 curr = ptr::offset(buf, i);
857 return from_buf_len(buf, i);
860 /// Create a Rust string from a *u8 buffer of the given length
861 pub unsafe fn from_buf_len(buf: *u8, len: uint) -> ~str {
862 let mut v: ~[u8] = vec::with_capacity(len + 1);
863 v.as_mut_buf(|vbuf, _len| {
864 ptr::copy_memory(vbuf, buf as *u8, len)
866 vec::raw::set_len(&mut v, len);
870 return ::cast::transmute(v);
873 /// Create a Rust string from a null-terminated C string
874 pub unsafe fn from_c_str(c_str: *libc::c_char) -> ~str {
875 from_buf(::cast::transmute(c_str))
878 /// Create a Rust string from a `*c_char` buffer of the given length
879 pub unsafe fn from_c_str_len(c_str: *libc::c_char, len: uint) -> ~str {
880 from_buf_len(::cast::transmute(c_str), len)
883 /// Converts a vector of bytes to a new owned string.
884 pub unsafe fn from_bytes(v: &[u8]) -> ~str {
885 do v.as_imm_buf |buf, len| {
886 from_buf_len(buf, len)
890 /// Converts an owned vector of bytes to a new owned string. This assumes
891 /// that the utf-8-ness of the vector has already been validated
892 pub unsafe fn from_bytes_owned(mut v: ~[u8]) -> ~str {
897 /// Converts a vector of bytes to a string.
898 /// The byte slice needs to contain valid utf8 and needs to be one byte longer than
899 /// the string, if possible ending in a 0 byte.
900 pub unsafe fn from_bytes_with_null<'a>(v: &'a [u8]) -> &'a str {
904 /// Converts a byte to a string.
905 pub unsafe fn from_byte(u: u8) -> ~str { raw::from_bytes([u]) }
907 /// Form a slice from a C string. Unsafe because the caller must ensure the
908 /// C string has the static lifetime, or else the return value may be
909 /// invalidated later.
910 pub unsafe fn c_str_to_static_slice(s: *libc::c_char) -> &'static str {
916 curr = ptr::offset(s, len);
918 let v = (s, len + 1);
919 assert!(is_utf8(::cast::transmute(v)));
924 * Takes a bytewise (not UTF-8) slice from a string.
926 * Returns the substring from [`begin`..`end`).
930 * If begin is greater than end.
931 * If end is greater than the length of the string.
933 pub unsafe fn slice_bytes_owned(s: &str, begin: uint, end: uint) -> ~str {
934 do as_buf(s) |sbuf, n| {
935 assert!((begin <= end));
938 let mut v = vec::with_capacity(end - begin + 1u);
939 do v.as_imm_buf |vbuf, _vlen| {
940 let vbuf = ::cast::transmute_mut_unsafe(vbuf);
941 let src = ptr::offset(sbuf, begin);
942 ptr::copy_memory(vbuf, src, end - begin);
944 vec::raw::set_len(&mut v, end - begin);
951 * Takes a bytewise (not UTF-8) slice from a string.
953 * Returns the substring from [`begin`..`end`).
957 * If begin is greater than end.
958 * If end is greater than the length of the string.
961 pub unsafe fn slice_bytes(s: &str, begin: uint, end: uint) -> &str {
962 do as_buf(s) |sbuf, n| {
963 assert!((begin <= end));
966 let tuple = (ptr::offset(sbuf, begin), end - begin + 1);
967 ::cast::transmute(tuple)
971 /// Appends a byte to a string. (Not UTF-8 safe).
972 pub unsafe fn push_byte(s: &mut ~str, b: u8) {
973 let new_len = s.len() + 1;
974 s.reserve_at_least(new_len);
975 do as_buf(*s) |buf, len| {
976 let buf: *mut u8 = ::cast::transmute(buf);
977 *ptr::mut_offset(buf, len) = b;
979 set_len(&mut *s, new_len);
982 /// Appends a vector of bytes to a string. (Not UTF-8 safe).
983 unsafe fn push_bytes(s: &mut ~str, bytes: &[u8]) {
984 let new_len = s.len() + bytes.len();
985 s.reserve_at_least(new_len);
986 for bytes.iter().advance |byte| { push_byte(&mut *s, *byte); }
989 /// Removes the last byte from a string and returns it. (Not UTF-8 safe).
990 pub unsafe fn pop_byte(s: &mut ~str) -> u8 {
994 set_len(s, len - 1u);
998 /// Removes the first byte from a string and returns it. (Not UTF-8 safe).
999 pub unsafe fn shift_byte(s: &mut ~str) -> u8 {
1001 assert!((len > 0u));
1003 *s = raw::slice_bytes_owned(*s, 1u, len);
1007 /// Sets the length of the string and adds the null terminator
1009 pub unsafe fn set_len(v: &mut ~str, new_len: uint) {
1010 let v: **mut vec::UnboxedVecRepr = cast::transmute(v);
1011 let repr: *mut vec::UnboxedVecRepr = *v;
1012 (*repr).fill = new_len + 1u;
1013 let null = ptr::mut_offset(cast::transmute(&((*repr).data)),
1019 fn test_from_buf_len() {
1021 let a = ~[65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8];
1022 let b = vec::raw::to_ptr(a);
1023 let c = from_buf_len(b, 3u);
1024 assert_eq!(c, ~"AAA");
1033 use cmp::{TotalOrd, Ordering, Less, Equal, Greater, Eq, Ord, Equiv, TotalEq};
1034 use super::{Str, eq_slice};
1036 impl<'self> Add<&'self str,~str> for &'self str {
1038 fn add(&self, rhs: & &'self str) -> ~str {
1039 let mut ret = self.to_owned();
1045 impl<'self> TotalOrd for &'self str {
1047 fn cmp(&self, other: & &'self str) -> Ordering {
1048 for self.bytes_iter().zip(other.bytes_iter()).advance |(s_b, o_b)| {
1049 match s_b.cmp(&o_b) {
1050 Greater => return Greater,
1051 Less => return Less,
1056 self.len().cmp(&other.len())
1060 impl TotalOrd for ~str {
1062 fn cmp(&self, other: &~str) -> Ordering { self.as_slice().cmp(&other.as_slice()) }
1065 impl TotalOrd for @str {
1067 fn cmp(&self, other: &@str) -> Ordering { self.as_slice().cmp(&other.as_slice()) }
1070 impl<'self> Eq for &'self str {
1072 fn eq(&self, other: & &'self str) -> bool {
1073 eq_slice((*self), (*other))
1076 fn ne(&self, other: & &'self str) -> bool { !(*self).eq(other) }
1081 fn eq(&self, other: &~str) -> bool {
1082 eq_slice((*self), (*other))
1085 fn ne(&self, other: &~str) -> bool { !(*self).eq(other) }
1090 fn eq(&self, other: &@str) -> bool {
1091 eq_slice((*self), (*other))
1094 fn ne(&self, other: &@str) -> bool { !(*self).eq(other) }
1097 impl<'self> TotalEq for &'self str {
1099 fn equals(&self, other: & &'self str) -> bool {
1100 eq_slice((*self), (*other))
1104 impl TotalEq for ~str {
1106 fn equals(&self, other: &~str) -> bool {
1107 eq_slice((*self), (*other))
1111 impl TotalEq for @str {
1113 fn equals(&self, other: &@str) -> bool {
1114 eq_slice((*self), (*other))
1118 impl<'self> Ord for &'self str {
1120 fn lt(&self, other: & &'self str) -> bool { self.cmp(other) == Less }
1122 fn le(&self, other: & &'self str) -> bool { self.cmp(other) != Greater }
1124 fn ge(&self, other: & &'self str) -> bool { self.cmp(other) != Less }
1126 fn gt(&self, other: & &'self str) -> bool { self.cmp(other) == Greater }
1131 fn lt(&self, other: &~str) -> bool { self.cmp(other) == Less }
1133 fn le(&self, other: &~str) -> bool { self.cmp(other) != Greater }
1135 fn ge(&self, other: &~str) -> bool { self.cmp(other) != Less }
1137 fn gt(&self, other: &~str) -> bool { self.cmp(other) == Greater }
1142 fn lt(&self, other: &@str) -> bool { self.cmp(other) == Less }
1144 fn le(&self, other: &@str) -> bool { self.cmp(other) != Greater }
1146 fn ge(&self, other: &@str) -> bool { self.cmp(other) != Less }
1148 fn gt(&self, other: &@str) -> bool { self.cmp(other) == Greater }
1151 impl<'self, S: Str> Equiv<S> for &'self str {
1153 fn equiv(&self, other: &S) -> bool { eq_slice(*self, other.as_slice()) }
1156 impl<'self, S: Str> Equiv<S> for @str {
1158 fn equiv(&self, other: &S) -> bool { eq_slice(*self, other.as_slice()) }
1161 impl<'self, S: Str> Equiv<S> for ~str {
1163 fn equiv(&self, other: &S) -> bool { eq_slice(*self, other.as_slice()) }
1170 /// Any string that can be represented as a slice
1172 /// Work with `self` as a slice.
1173 fn as_slice<'a>(&'a self) -> &'a str;
1176 impl<'self> Str for &'self str {
1178 fn as_slice<'a>(&'a self) -> &'a str { *self }
1180 impl<'self> Str for ~str {
1182 fn as_slice<'a>(&'a self) -> &'a str {
1183 let s: &'a str = *self; s
1186 impl<'self> Str for @str {
1188 fn as_slice<'a>(&'a self) -> &'a str {
1189 let s: &'a str = *self; s
1193 impl<'self> Container for &'self str {
1195 fn len(&self) -> uint {
1196 do as_buf(*self) |_p, n| { n - 1u }
1199 fn is_empty(&self) -> bool {
1204 impl Container for ~str {
1206 fn len(&self) -> uint { self.as_slice().len() }
1208 fn is_empty(&self) -> bool { self.len() == 0 }
1211 impl Container for @str {
1213 fn len(&self) -> uint { self.as_slice().len() }
1215 fn is_empty(&self) -> bool { self.len() == 0 }
1218 impl Mutable for ~str {
1219 /// Remove all content, make the string empty
1221 fn clear(&mut self) {
1223 raw::set_len(self, 0)
1229 #[allow(missing_doc)]
1230 pub trait StrSlice<'self> {
1231 fn contains<'a>(&self, needle: &'a str) -> bool;
1232 fn contains_char(&self, needle: char) -> bool;
1233 fn iter(&self) -> StrCharIterator<'self>;
1234 fn rev_iter(&self) -> StrCharRevIterator<'self>;
1235 fn bytes_iter(&self) -> StrBytesIterator<'self>;
1236 fn bytes_rev_iter(&self) -> StrBytesRevIterator<'self>;
1237 fn split_iter<Sep: CharEq>(&self, sep: Sep) -> StrCharSplitIterator<'self, Sep>;
1238 fn splitn_iter<Sep: CharEq>(&self, sep: Sep, count: uint) -> StrCharSplitIterator<'self, Sep>;
1239 fn split_options_iter<Sep: CharEq>(&self, sep: Sep, count: uint, allow_trailing_empty: bool)
1240 -> StrCharSplitIterator<'self, Sep>;
1241 fn matches_index_iter(&self, sep: &'self str) -> StrMatchesIndexIterator<'self>;
1242 fn split_str_iter(&self, &'self str) -> StrStrSplitIterator<'self>;
1243 fn line_iter(&self) -> StrCharSplitIterator<'self, char>;
1244 fn any_line_iter(&self) -> AnyLineIterator<'self>;
1245 fn word_iter(&self) -> WordIterator<'self>;
1246 fn ends_with(&self, needle: &str) -> bool;
1247 fn is_whitespace(&self) -> bool;
1248 fn is_alphanumeric(&self) -> bool;
1249 fn char_len(&self) -> uint;
1251 fn slice(&self, begin: uint, end: uint) -> &'self str;
1252 fn slice_from(&self, begin: uint) -> &'self str;
1253 fn slice_to(&self, end: uint) -> &'self str;
1255 fn slice_chars(&self, begin: uint, end: uint) -> &'self str;
1257 fn starts_with(&self, needle: &str) -> bool;
1258 fn escape_default(&self) -> ~str;
1259 fn escape_unicode(&self) -> ~str;
1260 fn trim(&self) -> &'self str;
1261 fn trim_left(&self) -> &'self str;
1262 fn trim_right(&self) -> &'self str;
1263 fn trim_chars<C: CharEq>(&self, to_trim: &C) -> &'self str;
1264 fn trim_left_chars<C: CharEq>(&self, to_trim: &C) -> &'self str;
1265 fn trim_right_chars<C: CharEq>(&self, to_trim: &C) -> &'self str;
1266 fn replace(&self, from: &str, to: &str) -> ~str;
1267 fn to_owned(&self) -> ~str;
1268 fn to_managed(&self) -> @str;
1269 fn to_utf16(&self) -> ~[u16];
1270 fn is_char_boundary(&self, index: uint) -> bool;
1271 fn char_range_at(&self, start: uint) -> CharRange;
1272 fn char_at(&self, i: uint) -> char;
1273 fn char_range_at_reverse(&self, start: uint) -> CharRange;
1274 fn char_at_reverse(&self, i: uint) -> char;
1275 fn as_bytes(&self) -> &'self [u8];
1277 fn find<C: CharEq>(&self, search: C) -> Option<uint>;
1278 fn rfind<C: CharEq>(&self, search: C) -> Option<uint>;
1279 fn find_str(&self, &str) -> Option<uint>;
1281 fn repeat(&self, nn: uint) -> ~str;
1283 fn slice_shift_char(&self) -> (char, &'self str);
1285 fn map_chars(&self, ff: &fn(char) -> char) -> ~str;
1287 fn lev_distance(&self, t: &str) -> uint;
1289 fn subslice_offset(&self, inner: &str) -> uint;
1292 /// Extension methods for strings
1293 impl<'self> StrSlice<'self> for &'self str {
1295 * Returns true if one string contains another
1299 * * needle - The string to look for
1302 fn contains<'a>(&self, needle: &'a str) -> bool {
1303 self.find_str(needle).is_some()
1306 * Returns true if a string contains a char.
1310 * * needle - The char to look for
1313 fn contains_char(&self, needle: char) -> bool {
1314 self.find(needle).is_some()
1316 /// An iterator over the characters of `self`. Note, this iterates
1317 /// over unicode code-points, not unicode graphemes.
1322 /// let v: ~[char] = "abc åäö".iter().collect();
1323 /// assert_eq!(v, ~['a', 'b', 'c', ' ', 'å', 'ä', 'ö']);
1326 fn iter(&self) -> StrCharIterator<'self> {
1332 /// An iterator over the characters of `self`, in reverse order.
1334 fn rev_iter(&self) -> StrCharRevIterator<'self> {
1335 StrCharRevIterator {
1341 /// An iterator over the bytes of `self`
1343 fn bytes_iter(&self) -> StrBytesIterator<'self> {
1344 StrBytesIterator { it: self.as_bytes().iter() }
1346 /// An iterator over the bytes of `self`, in reverse order
1348 fn bytes_rev_iter(&self) -> StrBytesRevIterator<'self> {
1349 StrBytesRevIterator { it: self.as_bytes().rev_iter() }
1352 /// An iterator over substrings of `self`, separated by characters
1353 /// matched by `sep`.
1358 /// let v: ~[&str] = "Mary had a little lamb".split_iter(' ').collect();
1359 /// assert_eq!(v, ~["Mary", "had", "a", "little", "lamb"]);
1361 /// let v: ~[&str] = "abc1def2ghi".split_iter(|c: char| c.is_digit()).collect();
1362 /// assert_eq!(v, ~["abc", "def", "ghi"]);
1365 fn split_iter<Sep: CharEq>(&self, sep: Sep) -> StrCharSplitIterator<'self, Sep> {
1366 self.split_options_iter(sep, self.len(), true)
1369 /// An iterator over substrings of `self`, separated by characters
1370 /// matched by `sep`, restricted to splitting at most `count`
1373 fn splitn_iter<Sep: CharEq>(&self, sep: Sep, count: uint) -> StrCharSplitIterator<'self, Sep> {
1374 self.split_options_iter(sep, count, true)
1377 /// An iterator over substrings of `self`, separated by characters
1378 /// matched by `sep`, splitting at most `count` times, and
1379 /// possibly not including the trailing empty substring, if it
1382 fn split_options_iter<Sep: CharEq>(&self, sep: Sep, count: uint, allow_trailing_empty: bool)
1383 -> StrCharSplitIterator<'self, Sep> {
1384 let only_ascii = sep.only_ascii();
1385 StrCharSplitIterator {
1390 allow_trailing_empty: allow_trailing_empty,
1392 only_ascii: only_ascii
1395 /// An iterator over the start and end indices of each match of
1396 /// `sep` within `self`.
1398 fn matches_index_iter(&self, sep: &'self str) -> StrMatchesIndexIterator<'self> {
1399 assert!(!sep.is_empty())
1400 StrMatchesIndexIterator {
1407 * An iterator over the substrings of `self` separated by `sep`.
1412 * let v: ~[&str] = "abcXXXabcYYYabc".split_str_iter("abc").collect()
1413 * assert_eq!(v, ["", "XXX", "YYY", ""]);
1417 fn split_str_iter(&self, sep: &'self str) -> StrStrSplitIterator<'self> {
1418 StrStrSplitIterator {
1419 it: self.matches_index_iter(sep),
1425 /// An iterator over the lines of a string (subsequences separated
1428 fn line_iter(&self) -> StrCharSplitIterator<'self, char> {
1429 self.split_options_iter('\n', self.len(), false)
1432 /// An iterator over the lines of a string, separated by either
1433 /// `\n` or (`\r\n`).
1434 fn any_line_iter(&self) -> AnyLineIterator<'self> {
1435 do self.line_iter().transform |line| {
1437 if l > 0 && line[l - 1] == '\r' as u8 { line.slice(0, l - 1) }
1442 /// An iterator over the words of a string (subsequences separated
1443 /// by any sequence of whitespace).
1445 fn word_iter(&self) -> WordIterator<'self> {
1446 self.split_iter(char::is_whitespace).filter(|s| !s.is_empty())
1450 * Returns true if the string contains only whitespace
1452 * Whitespace characters are determined by `char::is_whitespace`
1455 fn is_whitespace(&self) -> bool { self.iter().all(char::is_whitespace) }
1457 * Returns true if the string contains only alphanumerics
1459 * Alphanumeric characters are determined by `char::is_alphanumeric`
1462 fn is_alphanumeric(&self) -> bool { self.iter().all(char::is_alphanumeric) }
1463 /// Returns the number of characters that a string holds
1465 fn char_len(&self) -> uint { self.iter().len_() }
1468 * Returns a slice of the given string from the byte range
1471 * Fails when `begin` and `end` do not point to valid characters or
1472 * beyond the last character of the string
1475 fn slice(&self, begin: uint, end: uint) -> &'self str {
1476 assert!(self.is_char_boundary(begin));
1477 assert!(self.is_char_boundary(end));
1478 unsafe { raw::slice_bytes(*self, begin, end) }
1480 /// Returns a slice of the string from `begin` to its end.
1482 /// Fails when `begin` does not point to a valid character, or is
1485 fn slice_from(&self, begin: uint) -> &'self str {
1486 self.slice(begin, self.len())
1488 /// Returns a slice of the string from the beginning to byte
1491 /// Fails when `end` does not point to a valid character, or is
1494 fn slice_to(&self, end: uint) -> &'self str {
1498 /// Returns a slice of the string from the char range
1499 /// [`begin`..`end`).
1501 /// Fails if `begin` > `end` or the either `begin` or `end` are
1502 /// beyond the last character of the string.
1503 fn slice_chars(&self, begin: uint, end: uint) -> &'self str {
1504 assert!(begin <= end);
1505 // not sure how to use the iterators for this nicely.
1506 let mut position = 0;
1509 while count < begin && position < l {
1510 position = self.char_range_at(position).next;
1513 if count < begin { fail!("Attempted to begin slice_chars beyond end of string") }
1514 let start_byte = position;
1515 while count < end && position < l {
1516 position = self.char_range_at(position).next;
1519 if count < end { fail!("Attempted to end slice_chars beyond end of string") }
1521 self.slice(start_byte, position)
1524 /// Returns true if `needle` is a prefix of the string.
1525 fn starts_with<'a>(&self, needle: &'a str) -> bool {
1526 let (self_len, needle_len) = (self.len(), needle.len());
1527 if needle_len == 0u { true }
1528 else if needle_len > self_len { false }
1529 else { match_at(*self, needle, 0u) }
1531 /// Returns true if `needle` is a suffix of the string.
1532 fn ends_with(&self, needle: &str) -> bool {
1533 let (self_len, needle_len) = (self.len(), needle.len());
1534 if needle_len == 0u { true }
1535 else if needle_len > self_len { false }
1536 else { match_at(*self, needle, self_len - needle_len) }
1539 /// Escape each char in `s` with char::escape_default.
1540 fn escape_default(&self) -> ~str {
1541 let mut out: ~str = ~"";
1542 out.reserve_at_least(self.len());
1543 for self.iter().advance |c| {
1544 do c.escape_default |c| {
1551 /// Escape each char in `s` with char::escape_unicode.
1552 fn escape_unicode(&self) -> ~str {
1553 let mut out: ~str = ~"";
1554 out.reserve_at_least(self.len());
1555 for self.iter().advance |c| {
1556 do c.escape_unicode |c| {
1563 /// Returns a string with leading and trailing whitespace removed
1565 fn trim(&self) -> &'self str {
1566 self.trim_left().trim_right()
1568 /// Returns a string with leading whitespace removed
1570 fn trim_left(&self) -> &'self str {
1571 self.trim_left_chars(&char::is_whitespace)
1573 /// Returns a string with trailing whitespace removed
1575 fn trim_right(&self) -> &'self str {
1576 self.trim_right_chars(&char::is_whitespace)
1580 * Returns a string with characters that match `to_trim` removed.
1584 * * to_trim - a character matcher
1589 * assert_eq!("11foo1bar11".trim_chars(&'1'), "foo1bar")
1590 * assert_eq!("12foo1bar12".trim_chars(& &['1', '2']), "foo1bar")
1591 * assert_eq!("123foo1bar123".trim_chars(&|c: char| c.is_digit()), "foo1bar")
1595 fn trim_chars<C: CharEq>(&self, to_trim: &C) -> &'self str {
1596 self.trim_left_chars(to_trim).trim_right_chars(to_trim)
1599 * Returns a string with leading `chars_to_trim` removed.
1603 * * to_trim - a character matcher
1608 * assert_eq!("11foo1bar11".trim_left_chars(&'1'), "foo1bar11")
1609 * assert_eq!("12foo1bar12".trim_left_chars(& &['1', '2']), "foo1bar12")
1610 * assert_eq!("123foo1bar123".trim_left_chars(&|c: char| c.is_digit()), "foo1bar123")
1614 fn trim_left_chars<C: CharEq>(&self, to_trim: &C) -> &'self str {
1615 match self.find(|c: char| !to_trim.matches(c)) {
1617 Some(first) => unsafe { raw::slice_bytes(*self, first, self.len()) }
1621 * Returns a string with trailing `chars_to_trim` removed.
1625 * * to_trim - a character matcher
1630 * assert_eq!("11foo1bar11".trim_right_chars(&'1'), "11foo1bar")
1631 * assert_eq!("12foo1bar12".trim_right_chars(& &['1', '2']), "12foo1bar")
1632 * assert_eq!("123foo1bar123".trim_right_chars(&|c: char| c.is_digit()), "123foo1bar")
1636 fn trim_right_chars<C: CharEq>(&self, to_trim: &C) -> &'self str {
1637 match self.rfind(|c: char| !to_trim.matches(c)) {
1640 let next = self.char_range_at(last).next;
1641 unsafe { raw::slice_bytes(*self, 0u, next) }
1647 * Replace all occurrences of one string with another
1651 * * from - The string to replace
1652 * * to - The replacement string
1656 * The original string with all occurances of `from` replaced with `to`
1658 pub fn replace(&self, from: &str, to: &str) -> ~str {
1659 let mut result = ~"";
1660 let mut last_end = 0;
1661 for self.matches_index_iter(from).advance |(start, end)| {
1662 result.push_str(unsafe{raw::slice_bytes(*self, last_end, start)});
1663 result.push_str(to);
1666 result.push_str(unsafe{raw::slice_bytes(*self, last_end, self.len())});
1670 /// Copy a slice into a new unique str
1672 fn to_owned(&self) -> ~str { to_owned(*self) }
1675 fn to_managed(&self) -> @str {
1676 let v = at_vec::from_fn(self.len() + 1, |i| {
1677 if i == self.len() { 0 } else { self[i] }
1679 unsafe { ::cast::transmute(v) }
1682 /// Converts to a vector of `u16` encoded as UTF-16.
1683 fn to_utf16(&self) -> ~[u16] {
1685 for self.iter().advance |ch| {
1686 // Arithmetic with u32 literals is easier on the eyes than chars.
1687 let mut ch = ch as u32;
1689 if (ch & 0xFFFF_u32) == ch {
1690 // The BMP falls through (assuming non-surrogate, as it
1692 assert!(ch <= 0xD7FF_u32 || ch >= 0xE000_u32);
1695 // Supplementary planes break into surrogates.
1696 assert!(ch >= 0x1_0000_u32 && ch <= 0x10_FFFF_u32);
1698 let w1 = 0xD800_u16 | ((ch >> 10) as u16);
1699 let w2 = 0xDC00_u16 | ((ch as u16) & 0x3FF_u16);
1700 u.push_all([w1, w2])
1707 * Returns false if the index points into the middle of a multi-byte
1708 * character sequence.
1710 fn is_char_boundary(&self, index: uint) -> bool {
1711 if index == self.len() { return true; }
1712 let b = self[index];
1713 return b < 128u8 || b >= 192u8;
1717 * Pluck a character out of a string and return the index of the next
1720 * This function can be used to iterate over the unicode characters of a
1726 * let s = "中华Việt Nam";
1728 * while i < s.len() {
1729 * let CharRange {ch, next} = s.char_range_at(i);
1730 * std::io::println(fmt!("%u: %c",i,ch));
1753 * * i - The byte offset of the char to extract
1757 * A record {ch: char, next: uint} containing the char value and the byte
1758 * index of the next unicode character.
1762 * If `i` is greater than or equal to the length of the string.
1763 * If `i` is not the index of the beginning of a valid UTF-8 character.
1766 fn char_range_at(&self, i: uint) -> CharRange {
1767 if (self[i] < 128u8) {
1768 return CharRange {ch: self[i] as char, next: i + 1 };
1771 // Multibyte case is a fn to allow char_range_at to inline cleanly
1772 fn multibyte_char_range_at(s: &str, i: uint) -> CharRange {
1773 let mut val = s[i] as uint;
1774 let w = UTF8_CHAR_WIDTH[val] as uint;
1777 // First byte is special, only want bottom 5 bits for width 2, 4 bits
1778 // for width 3, and 3 bits for width 4
1780 val = (val << 6) | (s[i + 1] & 63u8) as uint;
1781 if w > 2 { val = (val << 6) | (s[i + 2] & 63u8) as uint; }
1782 if w > 3 { val = (val << 6) | (s[i + 3] & 63u8) as uint; }
1784 return CharRange {ch: val as char, next: i + w};
1787 return multibyte_char_range_at(*self, i);
1790 /// Plucks the character starting at the `i`th byte of a string
1792 fn char_at(&self, i: uint) -> char { self.char_range_at(i).ch }
1795 * Given a byte position and a str, return the previous char and its position.
1797 * This function can be used to iterate over a unicode string in reverse.
1799 * Returns 0 for next index if called on start index 0.
1801 fn char_range_at_reverse(&self, start: uint) -> CharRange {
1802 let mut prev = start;
1804 // while there is a previous byte == 10......
1805 while prev > 0u && self[prev - 1u] & 192u8 == TAG_CONT_U8 {
1809 // now refer to the initial byte of previous char
1817 let ch = self.char_at(prev);
1818 return CharRange {ch:ch, next:prev};
1821 /// Plucks the character ending at the `i`th byte of a string
1823 fn char_at_reverse(&self, i: uint) -> char {
1824 self.char_range_at_reverse(i).ch
1828 * Work with the byte buffer of a string as a byte slice.
1830 * The byte slice does not include the null terminator.
1832 fn as_bytes(&self) -> &'self [u8] {
1834 let (ptr, len): (*u8, uint) = ::cast::transmute(*self);
1835 let outgoing_tuple: (*u8, uint) = (ptr, len - 1);
1836 ::cast::transmute(outgoing_tuple)
1841 * Returns the byte index of the first character of `self` that matches `search`
1845 * `Some` containing the byte index of the last matching character
1846 * or `None` if there is no match
1848 fn find<C: CharEq>(&self, search: C) -> Option<uint> {
1849 if search.only_ascii() {
1850 for self.bytes_iter().enumerate().advance |(i, b)| {
1851 if search.matches(b as char) { return Some(i) }
1855 for self.iter().advance |c| {
1856 if search.matches(c) { return Some(index); }
1857 index += c.len_utf8_bytes();
1864 * Returns the byte index of the last character of `self` that matches `search`
1868 * `Some` containing the byte index of the last matching character
1869 * or `None` if there is no match
1871 fn rfind<C: CharEq>(&self, search: C) -> Option<uint> {
1872 let mut index = self.len();
1873 if search.only_ascii() {
1874 for self.bytes_rev_iter().advance |b| {
1876 if search.matches(b as char) { return Some(index); }
1879 for self.rev_iter().advance |c| {
1880 index -= c.len_utf8_bytes();
1881 if search.matches(c) { return Some(index); }
1889 * Returns the byte index of the first matching substring
1893 * * `needle` - The string to search for
1897 * `Some` containing the byte index of the first matching substring
1898 * or `None` if there is no match
1900 fn find_str(&self, needle: &str) -> Option<uint> {
1901 if needle.is_empty() {
1904 self.matches_index_iter(needle)
1906 .map_consume(|(start, _end)| start)
1910 /// Given a string, make a new string with repeated copies of it.
1911 fn repeat(&self, nn: uint) -> ~str {
1912 do as_buf(*self) |buf, len| {
1914 // ignore the NULL terminator
1916 ret.reserve(nn * len);
1919 do as_buf(ret) |rbuf, _len| {
1920 let mut rbuf = ::cast::transmute_mut_unsafe(rbuf);
1923 ptr::copy_memory(rbuf, buf, len);
1924 rbuf = rbuf.offset(len);
1927 raw::set_len(&mut ret, nn * len);
1934 * Retrieves the first character from a string slice and returns
1935 * it. This does not allocate a new string; instead, it returns a
1936 * slice that point one character beyond the character that was
1941 * If the string does not contain any characters
1944 fn slice_shift_char(&self) -> (char, &'self str) {
1945 let CharRange {ch, next} = self.char_range_at(0u);
1946 let next_s = unsafe { raw::slice_bytes(*self, next, self.len()) };
1947 return (ch, next_s);
1951 /// Apply a function to each character.
1952 fn map_chars(&self, ff: &fn(char) -> char) -> ~str {
1953 let mut result = with_capacity(self.len());
1954 for self.iter().advance |cc| {
1955 result.push_char(ff(cc));
1960 /// Levenshtein Distance between two strings.
1961 fn lev_distance(&self, t: &str) -> uint {
1962 let slen = self.len();
1965 if slen == 0 { return tlen; }
1966 if tlen == 0 { return slen; }
1968 let mut dcol = vec::from_fn(tlen + 1, |x| x);
1970 for self.iter().enumerate().advance |(i, sc)| {
1972 let mut current = i;
1973 dcol[0] = current + 1;
1975 for t.iter().enumerate().advance |(j, tc)| {
1977 let next = dcol[j + 1];
1980 dcol[j + 1] = current;
1982 dcol[j + 1] = ::cmp::min(current, next);
1983 dcol[j + 1] = ::cmp::min(dcol[j + 1], dcol[j]) + 1;
1995 * Returns the byte offset of an inner slice relative to an enclosing outer slice.
1997 * Fails if `inner` is not a direct slice contained within self.
2002 * let string = "a\nb\nc";
2003 * let mut lines = ~[];
2004 * for string.line_iter().advance |line| { lines.push(line) }
2006 * assert!(string.subslice_offset(lines[0]) == 0); // &"a"
2007 * assert!(string.subslice_offset(lines[1]) == 2); // &"b"
2008 * assert!(string.subslice_offset(lines[2]) == 4); // &"c"
2012 fn subslice_offset(&self, inner: &str) -> uint {
2013 do as_buf(*self) |a, a_len| {
2014 do as_buf(inner) |b, b_len| {
2020 a_start = cast::transmute(a); a_end = a_len + cast::transmute(a);
2021 b_start = cast::transmute(b); b_end = b_len + cast::transmute(b);
2023 assert!(a_start <= b_start);
2024 assert!(b_end <= a_end);
2032 #[allow(missing_doc)]
2033 pub trait NullTerminatedStr {
2034 fn as_bytes_with_null<'a>(&'a self) -> &'a [u8];
2037 impl NullTerminatedStr for ~str {
2039 * Work with the byte buffer of a string as a byte slice.
2041 * The byte slice does include the null terminator.
2044 fn as_bytes_with_null<'a>(&'a self) -> &'a [u8] {
2045 let ptr: &'a ~[u8] = unsafe { ::cast::transmute(self) };
2046 let slice: &'a [u8] = *ptr;
2050 impl NullTerminatedStr for @str {
2052 * Work with the byte buffer of a string as a byte slice.
2054 * The byte slice does include the null terminator.
2057 fn as_bytes_with_null<'a>(&'a self) -> &'a [u8] {
2058 let ptr: &'a @[u8] = unsafe { ::cast::transmute(self) };
2059 let slice: &'a [u8] = *ptr;
2064 #[allow(missing_doc)]
2065 pub trait OwnedStr {
2066 fn push_str_no_overallocate(&mut self, rhs: &str);
2067 fn push_str(&mut self, rhs: &str);
2068 fn push_char(&mut self, c: char);
2069 fn pop_char(&mut self) -> char;
2070 fn shift_char(&mut self) -> char;
2071 fn unshift_char(&mut self, ch: char);
2072 fn append(&self, rhs: &str) -> ~str; // FIXME #4850: this should consume self.
2073 fn reserve(&mut self, n: uint);
2074 fn reserve_at_least(&mut self, n: uint);
2075 fn capacity(&self) -> uint;
2076 fn to_bytes_with_null(self) -> ~[u8];
2079 impl OwnedStr for ~str {
2080 /// Appends a string slice to the back of a string, without overallocating
2082 fn push_str_no_overallocate(&mut self, rhs: &str) {
2084 let llen = self.len();
2085 let rlen = rhs.len();
2086 self.reserve(llen + rlen);
2087 do as_buf(*self) |lbuf, _llen| {
2088 do as_buf(rhs) |rbuf, _rlen| {
2089 let dst = ptr::offset(lbuf, llen);
2090 let dst = ::cast::transmute_mut_unsafe(dst);
2091 ptr::copy_memory(dst, rbuf, rlen);
2094 raw::set_len(self, llen + rlen);
2098 /// Appends a string slice to the back of a string
2100 fn push_str(&mut self, rhs: &str) {
2102 let llen = self.len();
2103 let rlen = rhs.len();
2104 self.reserve_at_least(llen + rlen);
2105 do as_buf(*self) |lbuf, _llen| {
2106 do as_buf(rhs) |rbuf, _rlen| {
2107 let dst = ptr::offset(lbuf, llen);
2108 let dst = ::cast::transmute_mut_unsafe(dst);
2109 ptr::copy_memory(dst, rbuf, rlen);
2112 raw::set_len(self, llen + rlen);
2115 /// Appends a character to the back of a string
2117 fn push_char(&mut self, c: char) {
2118 assert!(c as uint <= 0x10ffff); // FIXME: #7609: should be enforced on all `char`
2120 let code = c as uint;
2121 let nb = if code < MAX_ONE_B { 1u }
2122 else if code < MAX_TWO_B { 2u }
2123 else if code < MAX_THREE_B { 3u }
2125 let len = self.len();
2126 let new_len = len + nb;
2127 self.reserve_at_least(new_len);
2129 do as_buf(*self) |buf, _len| {
2130 let buf: *mut u8 = ::cast::transmute(buf);
2133 *ptr::mut_offset(buf, off) = code as u8;
2136 *ptr::mut_offset(buf, off) = (code >> 6u & 31u | TAG_TWO_B) as u8;
2137 *ptr::mut_offset(buf, off + 1u) = (code & 63u | TAG_CONT) as u8;
2140 *ptr::mut_offset(buf, off) = (code >> 12u & 15u | TAG_THREE_B) as u8;
2141 *ptr::mut_offset(buf, off + 1u) = (code >> 6u & 63u | TAG_CONT) as u8;
2142 *ptr::mut_offset(buf, off + 2u) = (code & 63u | TAG_CONT) as u8;
2145 *ptr::mut_offset(buf, off) = (code >> 18u & 7u | TAG_FOUR_B) as u8;
2146 *ptr::mut_offset(buf, off + 1u) = (code >> 12u & 63u | TAG_CONT) as u8;
2147 *ptr::mut_offset(buf, off + 2u) = (code >> 6u & 63u | TAG_CONT) as u8;
2148 *ptr::mut_offset(buf, off + 3u) = (code & 63u | TAG_CONT) as u8;
2153 raw::set_len(self, new_len);
2157 * Remove the final character from a string and return it
2161 * If the string does not contain any characters
2163 fn pop_char(&mut self) -> char {
2164 let end = self.len();
2166 let CharRange {ch, next} = self.char_range_at_reverse(end);
2167 unsafe { raw::set_len(self, next); }
2172 * Remove the first character from a string and return it
2176 * If the string does not contain any characters
2178 fn shift_char(&mut self) -> char {
2179 let CharRange {ch, next} = self.char_range_at(0u);
2180 *self = unsafe { raw::slice_bytes_owned(*self, next, self.len()) };
2184 /// Prepend a char to a string
2185 fn unshift_char(&mut self, ch: char) {
2186 // This could be more efficient.
2187 let mut new_str = ~"";
2188 new_str.push_char(ch);
2189 new_str.push_str(*self);
2193 /// Concatenate two strings together.
2195 fn append(&self, rhs: &str) -> ~str {
2196 // FIXME #4850: this should consume self, but that causes segfaults
2197 let mut v = self.clone();
2198 v.push_str_no_overallocate(rhs);
2203 * Reserves capacity for exactly `n` bytes in the given string, not including
2204 * the null terminator.
2206 * Assuming single-byte characters, the resulting string will be large
2207 * enough to hold a string of length `n`. To account for the null terminator,
2208 * the underlying buffer will have the size `n` + 1.
2210 * If the capacity for `s` is already equal to or greater than the requested
2211 * capacity, then no action is taken.
2216 * * n - The number of bytes to reserve space for
2219 pub fn reserve(&mut self, n: uint) {
2221 let v: *mut ~[u8] = cast::transmute(self);
2222 (*v).reserve(n + 1);
2227 * Reserves capacity for at least `n` bytes in the given string, not including
2228 * the null terminator.
2230 * Assuming single-byte characters, the resulting string will be large
2231 * enough to hold a string of length `n`. To account for the null terminator,
2232 * the underlying buffer will have the size `n` + 1.
2234 * This function will over-allocate in order to amortize the allocation costs
2235 * in scenarios where the caller may need to repeatedly reserve additional
2238 * If the capacity for `s` is already equal to or greater than the requested
2239 * capacity, then no action is taken.
2244 * * n - The number of bytes to reserve space for
2247 fn reserve_at_least(&mut self, n: uint) {
2248 self.reserve(uint::next_power_of_two(n + 1u) - 1u)
2252 * Returns the number of single-byte characters the string can hold without
2255 fn capacity(&self) -> uint {
2256 let buf: &~[u8] = unsafe { cast::transmute(self) };
2257 let vcap = buf.capacity();
2262 /// Convert to a vector of bytes. This does not allocate a new
2263 /// string, and includes the null terminator.
2265 fn to_bytes_with_null(self) -> ~[u8] {
2266 unsafe { ::cast::transmute(self) }
2270 impl Clone for ~str {
2272 fn clone(&self) -> ~str {
2277 impl Clone for @str {
2279 fn clone(&self) -> @str {
2284 /// External iterator for a string's characters. Use with the `std::iterator`
2287 pub struct StrCharIterator<'self> {
2289 priv string: &'self str,
2292 impl<'self> Iterator<char> for StrCharIterator<'self> {
2294 fn next(&mut self) -> Option<char> {
2295 if self.index < self.string.len() {
2296 let CharRange {ch, next} = self.string.char_range_at(self.index);
2304 /// External iterator for a string's characters in reverse order. Use
2305 /// with the `std::iterator` module.
2307 pub struct StrCharRevIterator<'self> {
2309 priv string: &'self str,
2312 impl<'self> Iterator<char> for StrCharRevIterator<'self> {
2314 fn next(&mut self) -> Option<char> {
2316 let CharRange {ch, next} = self.string.char_range_at_reverse(self.index);
2325 /// External iterator for a string's bytes. Use with the `std::iterator`
2328 pub struct StrBytesIterator<'self> {
2329 priv it: vec::VecIterator<'self, u8>
2332 impl<'self> Iterator<u8> for StrBytesIterator<'self> {
2334 fn next(&mut self) -> Option<u8> {
2335 self.it.next().map_consume(|&x| x)
2339 /// External iterator for a string's bytes in reverse order. Use with
2340 /// the `std::iterator` module.
2342 pub struct StrBytesRevIterator<'self> {
2343 priv it: vec::VecRevIterator<'self, u8>
2346 impl<'self> Iterator<u8> for StrBytesRevIterator<'self> {
2348 fn next(&mut self) -> Option<u8> {
2349 self.it.next().map_consume(|&x| x)
2353 // This works because every lifetime is a sub-lifetime of 'static
2354 impl<'self> Zero for &'self str {
2355 fn zero() -> &'self str { "" }
2356 fn is_zero(&self) -> bool { self.is_empty() }
2359 impl Zero for ~str {
2360 fn zero() -> ~str { ~"" }
2361 fn is_zero(&self) -> bool { self.len() == 0 }
2364 impl Zero for @str {
2365 fn zero() -> @str { @"" }
2366 fn is_zero(&self) -> bool { self.len() == 0 }
2371 use iterator::IteratorUtil;
2372 use container::Container;
2380 use vec::{ImmutableVector, CopyableVector};
2381 use cmp::{TotalOrd, Less, Equal, Greater};
2385 assert!((eq(&~"", &~"")));
2386 assert!((eq(&~"foo", &~"foo")));
2387 assert!((!eq(&~"foo", &~"bar")));
2391 fn test_eq_slice() {
2392 assert!((eq_slice("foobar".slice(0, 3), "foo")));
2393 assert!((eq_slice("barfoo".slice(3, 6), "foo")));
2394 assert!((!eq_slice("foo1", "foo2")));
2400 assert!("" <= "foo");
2401 assert!("foo" <= "foo");
2402 assert!("foo" != "bar");
2407 assert_eq!("".len(), 0u);
2408 assert_eq!("hello world".len(), 11u);
2409 assert_eq!("\x63".len(), 1u);
2410 assert_eq!("\xa2".len(), 2u);
2411 assert_eq!("\u03c0".len(), 2u);
2412 assert_eq!("\u2620".len(), 3u);
2413 assert_eq!("\U0001d11e".len(), 4u);
2415 assert_eq!("".char_len(), 0u);
2416 assert_eq!("hello world".char_len(), 11u);
2417 assert_eq!("\x63".char_len(), 1u);
2418 assert_eq!("\xa2".char_len(), 1u);
2419 assert_eq!("\u03c0".char_len(), 1u);
2420 assert_eq!("\u2620".char_len(), 1u);
2421 assert_eq!("\U0001d11e".char_len(), 1u);
2422 assert_eq!("ประเทศไทย中华Việt Nam".char_len(), 19u);
2427 assert_eq!("hello".find('l'), Some(2u));
2428 assert_eq!("hello".find(|c:char| c == 'o'), Some(4u));
2429 assert!("hello".find('x').is_none());
2430 assert!("hello".find(|c:char| c == 'x').is_none());
2431 assert_eq!("ประเทศไทย中华Việt Nam".find('华'), Some(30u));
2432 assert_eq!("ประเทศไทย中华Việt Nam".find(|c: char| c == '华'), Some(30u));
2437 assert_eq!("hello".rfind('l'), Some(3u));
2438 assert_eq!("hello".rfind(|c:char| c == 'o'), Some(4u));
2439 assert!("hello".rfind('x').is_none());
2440 assert!("hello".rfind(|c:char| c == 'x').is_none());
2441 assert_eq!("ประเทศไทย中华Việt Nam".rfind('华'), Some(30u));
2442 assert_eq!("ประเทศไทย中华Việt Nam".rfind(|c: char| c == '华'), Some(30u));
2446 fn test_push_str() {
2449 assert_eq!(s.slice_from(0), "");
2451 assert_eq!(s.slice_from(0), "abc");
2452 s.push_str("ประเทศไทย中华Việt Nam");
2453 assert_eq!(s.slice_from(0), "abcประเทศไทย中华Việt Nam");
2459 assert_eq!(s.slice_from(0), "");
2460 s = s.append("abc");
2461 assert_eq!(s.slice_from(0), "abc");
2462 s = s.append("ประเทศไทย中华Việt Nam");
2463 assert_eq!(s.slice_from(0), "abcประเทศไทย中华Việt Nam");
2467 fn test_pop_char() {
2468 let mut data = ~"ประเทศไทย中华";
2469 let cc = data.pop_char();
2470 assert_eq!(~"ประเทศไทย中", data);
2471 assert_eq!('华', cc);
2475 fn test_pop_char_2() {
2476 let mut data2 = ~"华";
2477 let cc2 = data2.pop_char();
2478 assert_eq!(~"", data2);
2479 assert_eq!('华', cc2);
2484 #[ignore(cfg(windows))]
2485 fn test_pop_char_fail() {
2487 let _cc3 = data.pop_char();
2491 fn test_push_char() {
2492 let mut data = ~"ประเทศไทย中";
2493 data.push_char('华');
2494 data.push_char('b'); // 1 byte
2495 data.push_char('¢'); // 2 byte
2496 data.push_char('€'); // 3 byte
2497 data.push_char('𤭢'); // 4 byte
2498 assert_eq!(~"ประเทศไทย中华b¢€𤭢", data);
2502 fn test_shift_char() {
2503 let mut data = ~"ประเทศไทย中";
2504 let cc = data.shift_char();
2505 assert_eq!(~"ระเทศไทย中", data);
2506 assert_eq!('ป', cc);
2510 fn test_unshift_char() {
2511 let mut data = ~"ประเทศไทย中";
2512 data.unshift_char('华');
2513 assert_eq!(~"华ประเทศไทย中", data);
2518 let mut empty = ~"";
2520 assert_eq!("", empty.as_slice());
2521 let mut data = ~"ประเทศไทย中";
2523 assert_eq!("", data.as_slice());
2524 data.push_char('华');
2525 assert_eq!("华", data.as_slice());
2529 fn test_split_within() {
2530 fn t(s: &str, i: uint, u: &[~str]) {
2532 for each_split_within(s, i) |s| { v.push(s.to_owned()) }
2533 assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b));
2537 t("hello", 15, [~"hello"]);
2538 t("\nMary had a little lamb\nLittle lamb\n", 15,
2539 [~"Mary had a", ~"little lamb", ~"Little lamb"]);
2540 t("\nMary had a little lamb\nLittle lamb\n", uint::max_value,
2541 [~"Mary had a little lamb\nLittle lamb"]);
2545 fn test_find_str() {
2547 assert_eq!("".find_str(""), Some(0u));
2548 assert!("banana".find_str("apple pie").is_none());
2550 let data = "abcabc";
2551 assert_eq!(data.slice(0u, 6u).find_str("ab"), Some(0u));
2552 assert_eq!(data.slice(2u, 6u).find_str("ab"), Some(3u - 2u));
2553 assert!(data.slice(2u, 4u).find_str("ab").is_none());
2555 let mut data = ~"ประเทศไทย中华Việt Nam";
2557 assert!(data.find_str("ไท华").is_none());
2558 assert_eq!(data.slice(0u, 43u).find_str(""), Some(0u));
2559 assert_eq!(data.slice(6u, 43u).find_str(""), Some(6u - 6u));
2561 assert_eq!(data.slice(0u, 43u).find_str("ประ"), Some( 0u));
2562 assert_eq!(data.slice(0u, 43u).find_str("ทศไ"), Some(12u));
2563 assert_eq!(data.slice(0u, 43u).find_str("ย中"), Some(24u));
2564 assert_eq!(data.slice(0u, 43u).find_str("iệt"), Some(34u));
2565 assert_eq!(data.slice(0u, 43u).find_str("Nam"), Some(40u));
2567 assert_eq!(data.slice(43u, 86u).find_str("ประ"), Some(43u - 43u));
2568 assert_eq!(data.slice(43u, 86u).find_str("ทศไ"), Some(55u - 43u));
2569 assert_eq!(data.slice(43u, 86u).find_str("ย中"), Some(67u - 43u));
2570 assert_eq!(data.slice(43u, 86u).find_str("iệt"), Some(77u - 43u));
2571 assert_eq!(data.slice(43u, 86u).find_str("Nam"), Some(83u - 43u));
2575 fn test_slice_chars() {
2576 fn t(a: &str, b: &str, start: uint) {
2577 assert_eq!(a.slice_chars(start, start + b.char_len()), b);
2579 t("hello", "llo", 2);
2580 t("hello", "el", 1);
2581 assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".slice_chars(2, 8));
2586 fn t(v: &[~str], s: &str) {
2587 assert_eq!(v.concat(), s.to_str());
2589 t([~"you", ~"know", ~"I'm", ~"no", ~"good"], "youknowI'mnogood");
2590 let v: &[~str] = [];
2597 fn t(v: &[~str], sep: &str, s: &str) {
2598 assert_eq!(v.connect(sep), s.to_str());
2600 t([~"you", ~"know", ~"I'm", ~"no", ~"good"],
2601 " ", "you know I'm no good");
2602 let v: &[~str] = [];
2604 t([~"hi"], " ", "hi");
2608 fn test_concat_slices() {
2609 fn t(v: &[&str], s: &str) {
2610 assert_eq!(v.concat(), s.to_str());
2612 t(["you", "know", "I'm", "no", "good"], "youknowI'mnogood");
2613 let v: &[&str] = [];
2619 fn test_connect_slices() {
2620 fn t(v: &[&str], sep: &str, s: &str) {
2621 assert_eq!(v.connect(sep), s.to_str());
2623 t(["you", "know", "I'm", "no", "good"],
2624 " ", "you know I'm no good");
2626 t(["hi"], " ", "hi");
2631 assert_eq!("x".repeat(4), ~"xxxx");
2632 assert_eq!("hi".repeat(4), ~"hihihihi");
2633 assert_eq!("ไท华".repeat(3), ~"ไท华ไท华ไท华");
2634 assert_eq!("".repeat(4), ~"");
2635 assert_eq!("hi".repeat(0), ~"");
2639 fn test_unsafe_slice() {
2640 assert_eq!("ab", unsafe {raw::slice_bytes("abc", 0, 2)});
2641 assert_eq!("bc", unsafe {raw::slice_bytes("abc", 1, 3)});
2642 assert_eq!("", unsafe {raw::slice_bytes("abc", 1, 1)});
2643 fn a_million_letter_a() -> ~str {
2646 while i < 100000 { rs.push_str("aaaaaaaaaa"); i += 1; }
2649 fn half_a_million_letter_a() -> ~str {
2652 while i < 100000 { rs.push_str("aaaaa"); i += 1; }
2655 let letters = a_million_letter_a();
2656 assert!(half_a_million_letter_a() ==
2657 unsafe {raw::slice_bytes(letters, 0u, 500000)}.to_owned());
2661 fn test_starts_with() {
2662 assert!(("".starts_with("")));
2663 assert!(("abc".starts_with("")));
2664 assert!(("abc".starts_with("a")));
2665 assert!((!"a".starts_with("abc")));
2666 assert!((!"".starts_with("abc")));
2670 fn test_ends_with() {
2671 assert!(("".ends_with("")));
2672 assert!(("abc".ends_with("")));
2673 assert!(("abc".ends_with("c")));
2674 assert!((!"a".ends_with("abc")));
2675 assert!((!"".ends_with("abc")));
2679 fn test_is_empty() {
2680 assert!("".is_empty());
2681 assert!(!"a".is_empty());
2687 assert_eq!("".replace(a, "b"), ~"");
2688 assert_eq!("a".replace(a, "b"), ~"b");
2689 assert_eq!("ab".replace(a, "b"), ~"bb");
2691 assert!(" test test ".replace(test, "toast") ==
2693 assert_eq!(" test test ".replace(test, ""), ~" ");
2697 fn test_replace_2a() {
2698 let data = ~"ประเทศไทย中华";
2699 let repl = ~"دولة الكويت";
2702 let A = ~"دولة الكويتทศไทย中华";
2703 assert_eq!(data.replace(a, repl), A);
2707 fn test_replace_2b() {
2708 let data = ~"ประเทศไทย中华";
2709 let repl = ~"دولة الكويت";
2712 let B = ~"ปรدولة الكويتทศไทย中华";
2713 assert_eq!(data.replace(b, repl), B);
2717 fn test_replace_2c() {
2718 let data = ~"ประเทศไทย中华";
2719 let repl = ~"دولة الكويت";
2722 let C = ~"ประเทศไทยدولة الكويت";
2723 assert_eq!(data.replace(c, repl), C);
2727 fn test_replace_2d() {
2728 let data = ~"ประเทศไทย中华";
2729 let repl = ~"دولة الكويت";
2732 assert_eq!(data.replace(d, repl), data);
2737 assert_eq!("ab", "abc".slice(0, 2));
2738 assert_eq!("bc", "abc".slice(1, 3));
2739 assert_eq!("", "abc".slice(1, 1));
2740 assert_eq!("\u65e5", "\u65e5\u672c".slice(0, 3));
2742 let data = "ประเทศไทย中华";
2743 assert_eq!("ป", data.slice(0, 3));
2744 assert_eq!("ร", data.slice(3, 6));
2745 assert_eq!("", data.slice(3, 3));
2746 assert_eq!("华", data.slice(30, 33));
2748 fn a_million_letter_X() -> ~str {
2752 push_str(&mut rs, "华华华华华华华华华华");
2757 fn half_a_million_letter_X() -> ~str {
2760 while i < 100000 { push_str(&mut rs, "华华华华华"); i += 1; }
2763 let letters = a_million_letter_X();
2764 assert!(half_a_million_letter_X() ==
2765 letters.slice(0u, 3u * 500000u).to_owned());
2770 let ss = "中华Việt Nam";
2772 assert_eq!("华", ss.slice(3u, 6u));
2773 assert_eq!("Việt Nam", ss.slice(6u, 16u));
2775 assert_eq!("ab", "abc".slice(0u, 2u));
2776 assert_eq!("bc", "abc".slice(1u, 3u));
2777 assert_eq!("", "abc".slice(1u, 1u));
2779 assert_eq!("中", ss.slice(0u, 3u));
2780 assert_eq!("华V", ss.slice(3u, 7u));
2781 assert_eq!("", ss.slice(3u, 3u));
2796 #[ignore(cfg(windows))]
2797 fn test_slice_fail() {
2798 "中华Việt Nam".slice(0u, 2u);
2802 fn test_slice_from() {
2803 assert_eq!("abcd".slice_from(0), "abcd");
2804 assert_eq!("abcd".slice_from(2), "cd");
2805 assert_eq!("abcd".slice_from(4), "");
2808 fn test_slice_to() {
2809 assert_eq!("abcd".slice_to(0), "");
2810 assert_eq!("abcd".slice_to(2), "ab");
2811 assert_eq!("abcd".slice_to(4), "abcd");
2815 fn test_trim_left_chars() {
2816 let v: &[char] = &[];
2817 assert_eq!(" *** foo *** ".trim_left_chars(&v), " *** foo *** ");
2818 assert_eq!(" *** foo *** ".trim_left_chars(& &['*', ' ']), "foo *** ");
2819 assert_eq!(" *** *** ".trim_left_chars(& &['*', ' ']), "");
2820 assert_eq!("foo *** ".trim_left_chars(& &['*', ' ']), "foo *** ");
2822 assert_eq!("11foo1bar11".trim_left_chars(&'1'), "foo1bar11");
2823 assert_eq!("12foo1bar12".trim_left_chars(& &['1', '2']), "foo1bar12");
2824 assert_eq!("123foo1bar123".trim_left_chars(&|c: char| c.is_digit()), "foo1bar123");
2828 fn test_trim_right_chars() {
2829 let v: &[char] = &[];
2830 assert_eq!(" *** foo *** ".trim_right_chars(&v), " *** foo *** ");
2831 assert_eq!(" *** foo *** ".trim_right_chars(& &['*', ' ']), " *** foo");
2832 assert_eq!(" *** *** ".trim_right_chars(& &['*', ' ']), "");
2833 assert_eq!(" *** foo".trim_right_chars(& &['*', ' ']), " *** foo");
2835 assert_eq!("11foo1bar11".trim_right_chars(&'1'), "11foo1bar");
2836 assert_eq!("12foo1bar12".trim_right_chars(& &['1', '2']), "12foo1bar");
2837 assert_eq!("123foo1bar123".trim_right_chars(&|c: char| c.is_digit()), "123foo1bar");
2841 fn test_trim_chars() {
2842 let v: &[char] = &[];
2843 assert_eq!(" *** foo *** ".trim_chars(&v), " *** foo *** ");
2844 assert_eq!(" *** foo *** ".trim_chars(& &['*', ' ']), "foo");
2845 assert_eq!(" *** *** ".trim_chars(& &['*', ' ']), "");
2846 assert_eq!("foo".trim_chars(& &['*', ' ']), "foo");
2848 assert_eq!("11foo1bar11".trim_chars(&'1'), "foo1bar");
2849 assert_eq!("12foo1bar12".trim_chars(& &['1', '2']), "foo1bar");
2850 assert_eq!("123foo1bar123".trim_chars(&|c: char| c.is_digit()), "foo1bar");
2854 fn test_trim_left() {
2855 assert_eq!("".trim_left(), "");
2856 assert_eq!("a".trim_left(), "a");
2857 assert_eq!(" ".trim_left(), "");
2858 assert_eq!(" blah".trim_left(), "blah");
2859 assert_eq!(" \u3000 wut".trim_left(), "wut");
2860 assert_eq!("hey ".trim_left(), "hey ");
2864 fn test_trim_right() {
2865 assert_eq!("".trim_right(), "");
2866 assert_eq!("a".trim_right(), "a");
2867 assert_eq!(" ".trim_right(), "");
2868 assert_eq!("blah ".trim_right(), "blah");
2869 assert_eq!("wut \u3000 ".trim_right(), "wut");
2870 assert_eq!(" hey".trim_right(), " hey");
2875 assert_eq!("".trim(), "");
2876 assert_eq!("a".trim(), "a");
2877 assert_eq!(" ".trim(), "");
2878 assert_eq!(" blah ".trim(), "blah");
2879 assert_eq!("\nwut \u3000 ".trim(), "wut");
2880 assert_eq!(" hey dude ".trim(), "hey dude");
2884 fn test_is_whitespace() {
2885 assert!("".is_whitespace());
2886 assert!(" ".is_whitespace());
2887 assert!("\u2009".is_whitespace()); // Thin space
2888 assert!(" \n\t ".is_whitespace());
2889 assert!(!" _ ".is_whitespace());
2893 fn test_shift_byte() {
2895 let b = unsafe{raw::shift_byte(&mut s)};
2896 assert_eq!(s, ~"BC");
2897 assert_eq!(b, 65u8);
2901 fn test_pop_byte() {
2903 let b = unsafe{raw::pop_byte(&mut s)};
2904 assert_eq!(s, ~"AB");
2905 assert_eq!(b, 67u8);
2909 fn test_unsafe_from_bytes() {
2910 let a = ~[65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8];
2911 let b = unsafe { raw::from_bytes(a) };
2912 assert_eq!(b, ~"AAAAAAA");
2916 fn test_from_bytes() {
2917 let ss = ~"ศไทย中华Việt Nam";
2918 let bb = ~[0xe0_u8, 0xb8_u8, 0xa8_u8,
2919 0xe0_u8, 0xb9_u8, 0x84_u8,
2920 0xe0_u8, 0xb8_u8, 0x97_u8,
2921 0xe0_u8, 0xb8_u8, 0xa2_u8,
2922 0xe4_u8, 0xb8_u8, 0xad_u8,
2923 0xe5_u8, 0x8d_u8, 0x8e_u8,
2924 0x56_u8, 0x69_u8, 0xe1_u8,
2925 0xbb_u8, 0x87_u8, 0x74_u8,
2926 0x20_u8, 0x4e_u8, 0x61_u8,
2929 assert_eq!(ss, from_bytes(bb));
2933 #[ignore(cfg(windows))]
2934 fn test_from_bytes_fail() {
2935 use str::not_utf8::cond;
2937 let bb = ~[0xff_u8, 0xb8_u8, 0xa8_u8,
2938 0xe0_u8, 0xb9_u8, 0x84_u8,
2939 0xe0_u8, 0xb8_u8, 0x97_u8,
2940 0xe0_u8, 0xb8_u8, 0xa2_u8,
2941 0xe4_u8, 0xb8_u8, 0xad_u8,
2942 0xe5_u8, 0x8d_u8, 0x8e_u8,
2943 0x56_u8, 0x69_u8, 0xe1_u8,
2944 0xbb_u8, 0x87_u8, 0x74_u8,
2945 0x20_u8, 0x4e_u8, 0x61_u8,
2948 let mut error_happened = false;
2949 let _x = do cond.trap(|err| {
2950 assert_eq!(err, ~"from_bytes: input is not UTF-8; first bad byte is 255");
2951 error_happened = true;
2956 assert!(error_happened);
2960 fn test_unsafe_from_bytes_with_null() {
2961 let a = [65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8];
2962 let b = unsafe { raw::from_bytes_with_null(a) };
2963 assert_eq!(b, "AAAAAAA");
2967 fn test_from_bytes_with_null() {
2968 let ss = "ศไทย中华Việt Nam";
2969 let bb = [0xe0_u8, 0xb8_u8, 0xa8_u8,
2970 0xe0_u8, 0xb9_u8, 0x84_u8,
2971 0xe0_u8, 0xb8_u8, 0x97_u8,
2972 0xe0_u8, 0xb8_u8, 0xa2_u8,
2973 0xe4_u8, 0xb8_u8, 0xad_u8,
2974 0xe5_u8, 0x8d_u8, 0x8e_u8,
2975 0x56_u8, 0x69_u8, 0xe1_u8,
2976 0xbb_u8, 0x87_u8, 0x74_u8,
2977 0x20_u8, 0x4e_u8, 0x61_u8,
2980 assert_eq!(ss, from_bytes_with_null(bb));
2985 #[ignore(cfg(windows))]
2986 fn test_from_bytes_with_null_fail() {
2987 let bb = [0xff_u8, 0xb8_u8, 0xa8_u8,
2988 0xe0_u8, 0xb9_u8, 0x84_u8,
2989 0xe0_u8, 0xb8_u8, 0x97_u8,
2990 0xe0_u8, 0xb8_u8, 0xa2_u8,
2991 0xe4_u8, 0xb8_u8, 0xad_u8,
2992 0xe5_u8, 0x8d_u8, 0x8e_u8,
2993 0x56_u8, 0x69_u8, 0xe1_u8,
2994 0xbb_u8, 0x87_u8, 0x74_u8,
2995 0x20_u8, 0x4e_u8, 0x61_u8,
2998 let _x = from_bytes_with_null(bb);
3003 #[ignore(cfg(windows))]
3004 fn test_from_bytes_with_null_fail_2() {
3005 let bb = [0xff_u8, 0xb8_u8, 0xa8_u8,
3006 0xe0_u8, 0xb9_u8, 0x84_u8,
3007 0xe0_u8, 0xb8_u8, 0x97_u8,
3008 0xe0_u8, 0xb8_u8, 0xa2_u8,
3009 0xe4_u8, 0xb8_u8, 0xad_u8,
3010 0xe5_u8, 0x8d_u8, 0x8e_u8,
3011 0x56_u8, 0x69_u8, 0xe1_u8,
3012 0xbb_u8, 0x87_u8, 0x74_u8,
3013 0x20_u8, 0x4e_u8, 0x61_u8,
3016 let _x = from_bytes_with_null(bb);
3020 fn test_from_buf() {
3022 let a = ~[65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8];
3023 let b = vec::raw::to_ptr(a);
3024 let c = raw::from_buf(b);
3025 assert_eq!(c, ~"AAAAAAA");
3030 fn test_as_bytes() {
3033 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
3034 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
3037 assert_eq!("".as_bytes(), &[]);
3038 assert_eq!("abc".as_bytes(), &['a' as u8, 'b' as u8, 'c' as u8]);
3039 assert_eq!("ศไทย中华Việt Nam".as_bytes(), v);
3043 fn test_as_bytes_with_null() {
3046 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
3047 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
3053 let s3 = @"ศไทย中华Việt Nam";
3054 assert_eq!(s1.as_bytes_with_null(), &[0]);
3055 assert_eq!(s2.as_bytes_with_null(), &['a' as u8, 'b' as u8, 'c' as u8, 0]);
3056 assert_eq!(s3.as_bytes_with_null(), v);
3060 let s3 = ~"ศไทย中华Việt Nam";
3061 assert_eq!(s1.as_bytes_with_null(), &[0]);
3062 assert_eq!(s2.as_bytes_with_null(), &['a' as u8, 'b' as u8, 'c' as u8, 0]);
3063 assert_eq!(s3.as_bytes_with_null(), v);
3067 fn test_to_bytes_with_null() {
3068 let s = ~"ศไทย中华Việt Nam";
3070 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
3071 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
3074 assert_eq!((~"").to_bytes_with_null(), ~[0]);
3075 assert_eq!((~"abc").to_bytes_with_null(),
3076 ~['a' as u8, 'b' as u8, 'c' as u8, 0]);
3077 assert_eq!(s.to_bytes_with_null(), v);
3081 #[ignore(cfg(windows))]
3083 fn test_as_bytes_fail() {
3084 // Don't double free. (I'm not sure if this exercises the
3085 // original problem code path anymore.)
3087 let _bytes = s.as_bytes_with_null();
3094 let b = as_buf(a, |buf, _l| {
3095 assert_eq!(unsafe { *buf }, 65u8);
3102 fn test_as_buf_small() {
3104 let b = as_buf(a, |buf, _l| {
3105 assert_eq!(unsafe { *buf }, 65u8);
3115 let sb = as_buf(s, |b, _l| b);
3116 let s_cstr = raw::from_buf(sb);
3117 assert_eq!(s_cstr, s);
3122 fn test_as_buf_3() {
3124 do as_buf(a) |buf, len| {
3126 assert_eq!(a[0], 'h' as u8);
3127 assert_eq!(*buf, 'h' as u8);
3128 assert_eq!(len, 6u);
3129 assert_eq!(*ptr::offset(buf,4u), 'o' as u8);
3130 assert_eq!(*ptr::offset(buf,5u), 0u8);
3136 fn test_subslice_offset() {
3137 let a = "kernelsprite";
3138 let b = a.slice(7, a.len());
3139 let c = a.slice(0, a.len() - 6);
3140 assert_eq!(a.subslice_offset(b), 7);
3141 assert_eq!(a.subslice_offset(c), 0);
3143 let string = "a\nb\nc";
3144 let mut lines = ~[];
3145 for string.line_iter().advance |line| { lines.push(line) }
3146 assert_eq!(string.subslice_offset(lines[0]), 0);
3147 assert_eq!(string.subslice_offset(lines[1]), 2);
3148 assert_eq!(string.subslice_offset(lines[2]), 4);
3153 fn test_subslice_offset_2() {
3154 let a = "alchemiter";
3155 let b = "cruxtruder";
3156 a.subslice_offset(b);
3160 fn vec_str_conversions() {
3161 let s1: ~str = ~"All mimsy were the borogoves";
3163 let v: ~[u8] = s1.as_bytes().to_owned();
3164 let s2: ~str = from_bytes(v);
3165 let mut i: uint = 0u;
3166 let n1: uint = s1.len();
3167 let n2: uint = v.len();
3180 fn test_contains() {
3181 assert!("abcde".contains("bcd"));
3182 assert!("abcde".contains("abcd"));
3183 assert!("abcde".contains("bcde"));
3184 assert!("abcde".contains(""));
3185 assert!("".contains(""));
3186 assert!(!"abcde".contains("def"));
3187 assert!(!"".contains("a"));
3189 let data = ~"ประเทศไทย中华Việt Nam";
3190 assert!(data.contains("ประเ"));
3191 assert!(data.contains("ะเ"));
3192 assert!(data.contains("中华"));
3193 assert!(!data.contains("ไท华"));
3197 fn test_contains_char() {
3198 assert!("abc".contains_char('b'));
3199 assert!("a".contains_char('a'));
3200 assert!(!"abc".contains_char('d'));
3201 assert!(!"".contains_char('a'));
3206 assert_eq!(~"", "".map_chars(|c| unsafe {libc::toupper(c as c_char)} as char));
3207 assert_eq!(~"YMCA", "ymca".map_chars(|c| unsafe {libc::toupper(c as c_char)} as char));
3214 ~[0xd800_u16, 0xdf45_u16, 0xd800_u16, 0xdf3f_u16,
3215 0xd800_u16, 0xdf3b_u16, 0xd800_u16, 0xdf46_u16,
3216 0xd800_u16, 0xdf39_u16, 0xd800_u16, 0xdf3b_u16,
3217 0xd800_u16, 0xdf30_u16, 0x000a_u16]),
3220 ~[0xd801_u16, 0xdc12_u16, 0xd801_u16,
3221 0xdc49_u16, 0xd801_u16, 0xdc2e_u16, 0xd801_u16,
3222 0xdc40_u16, 0xd801_u16, 0xdc32_u16, 0xd801_u16,
3223 0xdc4b_u16, 0x0020_u16, 0xd801_u16, 0xdc0f_u16,
3224 0xd801_u16, 0xdc32_u16, 0xd801_u16, 0xdc4d_u16,
3227 (~"𐌀𐌖𐌋𐌄𐌑𐌉·𐌌𐌄𐌕𐌄𐌋𐌉𐌑\n",
3228 ~[0xd800_u16, 0xdf00_u16, 0xd800_u16, 0xdf16_u16,
3229 0xd800_u16, 0xdf0b_u16, 0xd800_u16, 0xdf04_u16,
3230 0xd800_u16, 0xdf11_u16, 0xd800_u16, 0xdf09_u16,
3231 0x00b7_u16, 0xd800_u16, 0xdf0c_u16, 0xd800_u16,
3232 0xdf04_u16, 0xd800_u16, 0xdf15_u16, 0xd800_u16,
3233 0xdf04_u16, 0xd800_u16, 0xdf0b_u16, 0xd800_u16,
3234 0xdf09_u16, 0xd800_u16, 0xdf11_u16, 0x000a_u16 ]),
3236 (~"𐒋𐒘𐒈𐒑𐒛𐒒 𐒕𐒓 𐒈𐒚𐒍 𐒏𐒜𐒒𐒖𐒆 𐒕𐒆\n",
3237 ~[0xd801_u16, 0xdc8b_u16, 0xd801_u16, 0xdc98_u16,
3238 0xd801_u16, 0xdc88_u16, 0xd801_u16, 0xdc91_u16,
3239 0xd801_u16, 0xdc9b_u16, 0xd801_u16, 0xdc92_u16,
3240 0x0020_u16, 0xd801_u16, 0xdc95_u16, 0xd801_u16,
3241 0xdc93_u16, 0x0020_u16, 0xd801_u16, 0xdc88_u16,
3242 0xd801_u16, 0xdc9a_u16, 0xd801_u16, 0xdc8d_u16,
3243 0x0020_u16, 0xd801_u16, 0xdc8f_u16, 0xd801_u16,
3244 0xdc9c_u16, 0xd801_u16, 0xdc92_u16, 0xd801_u16,
3245 0xdc96_u16, 0xd801_u16, 0xdc86_u16, 0x0020_u16,
3246 0xd801_u16, 0xdc95_u16, 0xd801_u16, 0xdc86_u16,
3249 for pairs.iter().advance |p| {
3250 let (s, u) = (*p).clone();
3251 assert!(s.to_utf16() == u);
3252 assert!(from_utf16(u) == s);
3253 assert!(from_utf16(s.to_utf16()) == s);
3254 assert!(from_utf16(u).to_utf16() == u);
3260 let s = ~"ศไทย中华Việt Nam";
3261 let v = ~['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
3263 for v.iter().advance |ch| {
3264 assert!(s.char_at(pos) == *ch);
3265 pos += from_char(*ch).len();
3270 fn test_char_at_reverse() {
3271 let s = ~"ศไทย中华Việt Nam";
3272 let v = ~['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
3273 let mut pos = s.len();
3274 for v.rev_iter().advance |ch| {
3275 assert!(s.char_at_reverse(pos) == *ch);
3276 pos -= from_char(*ch).len();
3281 fn test_escape_unicode() {
3282 assert_eq!("abc".escape_unicode(), ~"\\x61\\x62\\x63");
3283 assert_eq!("a c".escape_unicode(), ~"\\x61\\x20\\x63");
3284 assert_eq!("\r\n\t".escape_unicode(), ~"\\x0d\\x0a\\x09");
3285 assert_eq!("'\"\\".escape_unicode(), ~"\\x27\\x22\\x5c");
3286 assert_eq!("\x00\x01\xfe\xff".escape_unicode(), ~"\\x00\\x01\\xfe\\xff");
3287 assert_eq!("\u0100\uffff".escape_unicode(), ~"\\u0100\\uffff");
3288 assert_eq!("\U00010000\U0010ffff".escape_unicode(), ~"\\U00010000\\U0010ffff");
3289 assert_eq!("ab\ufb00".escape_unicode(), ~"\\x61\\x62\\ufb00");
3290 assert_eq!("\U0001d4ea\r".escape_unicode(), ~"\\U0001d4ea\\x0d");
3294 fn test_escape_default() {
3295 assert_eq!("abc".escape_default(), ~"abc");
3296 assert_eq!("a c".escape_default(), ~"a c");
3297 assert_eq!("\r\n\t".escape_default(), ~"\\r\\n\\t");
3298 assert_eq!("'\"\\".escape_default(), ~"\\'\\\"\\\\");
3299 assert_eq!("\u0100\uffff".escape_default(), ~"\\u0100\\uffff");
3300 assert_eq!("\U00010000\U0010ffff".escape_default(), ~"\\U00010000\\U0010ffff");
3301 assert_eq!("ab\ufb00".escape_default(), ~"ab\\ufb00");
3302 assert_eq!("\U0001d4ea\r".escape_default(), ~"\\U0001d4ea\\r");
3306 fn test_to_managed() {
3307 assert_eq!("abc".to_managed(), @"abc");
3308 assert_eq!("abcdef".slice(1, 5).to_managed(), @"bcde");
3312 fn test_total_ord() {
3313 "1234".cmp(& &"123") == Greater;
3314 "123".cmp(& &"1234") == Less;
3315 "1234".cmp(& &"1234") == Equal;
3316 "12345555".cmp(& &"123456") == Less;
3317 "22".cmp(& &"1234") == Greater;
3321 fn test_char_range_at() {
3322 let data = ~"b¢€𤭢𤭢€¢b";
3323 assert_eq!('b', data.char_range_at(0).ch);
3324 assert_eq!('¢', data.char_range_at(1).ch);
3325 assert_eq!('€', data.char_range_at(3).ch);
3326 assert_eq!('𤭢', data.char_range_at(6).ch);
3327 assert_eq!('𤭢', data.char_range_at(10).ch);
3328 assert_eq!('€', data.char_range_at(14).ch);
3329 assert_eq!('¢', data.char_range_at(17).ch);
3330 assert_eq!('b', data.char_range_at(19).ch);
3334 fn test_char_range_at_reverse_underflow() {
3335 assert_eq!("abc".char_range_at_reverse(0).next, 0);
3340 #[allow(unnecessary_allocation)];
3342 ($s1:expr, $s2:expr, $e:expr) => {
3343 assert_eq!($s1 + $s2, $e);
3344 assert_eq!($s1.to_owned() + $s2, $e);
3345 assert_eq!($s1.to_managed() + $s2, $e);
3349 t!("foo", "bar", ~"foobar");
3350 t!("foo", @"bar", ~"foobar");
3351 t!("foo", ~"bar", ~"foobar");
3352 t!("ศไทย中", "华Việt Nam", ~"ศไทย中华Việt Nam");
3353 t!("ศไทย中", @"华Việt Nam", ~"ศไทย中华Việt Nam");
3354 t!("ศไทย中", ~"华Việt Nam", ~"ศไทย中华Việt Nam");
3358 fn test_iterator() {
3360 let s = ~"ศไทย中华Việt Nam";
3361 let v = ~['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
3364 let mut it = s.iter();
3366 for it.advance |c| {
3367 assert_eq!(c, v[pos]);
3370 assert_eq!(pos, v.len());
3374 fn test_rev_iterator() {
3376 let s = ~"ศไทย中华Việt Nam";
3377 let v = ~['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
3380 let mut it = s.rev_iter();
3382 for it.advance |c| {
3383 assert_eq!(c, v[pos]);
3386 assert_eq!(pos, v.len());
3390 fn test_bytes_iterator() {
3391 let s = ~"ศไทย中华Việt Nam";
3393 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
3394 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
3399 for s.bytes_iter().advance |b| {
3400 assert_eq!(b, v[pos]);
3406 fn test_bytes_rev_iterator() {
3407 let s = ~"ศไทย中华Việt Nam";
3409 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
3410 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
3413 let mut pos = v.len();
3415 for s.bytes_rev_iter().advance |b| {
3417 assert_eq!(b, v[pos]);
3422 fn test_split_char_iterator() {
3423 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
3425 let split: ~[&str] = data.split_iter(' ').collect();
3426 assert_eq!(split, ~["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
3428 let split: ~[&str] = data.split_iter(|c: char| c == ' ').collect();
3429 assert_eq!(split, ~["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
3432 let split: ~[&str] = data.split_iter('ä').collect();
3433 assert_eq!(split, ~["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
3435 let split: ~[&str] = data.split_iter(|c: char| c == 'ä').collect();
3436 assert_eq!(split, ~["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
3439 fn test_splitn_char_iterator() {
3440 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
3442 let split: ~[&str] = data.splitn_iter(' ', 3).collect();
3443 assert_eq!(split, ~["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
3445 let split: ~[&str] = data.splitn_iter(|c: char| c == ' ', 3).collect();
3446 assert_eq!(split, ~["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
3449 let split: ~[&str] = data.splitn_iter('ä', 3).collect();
3450 assert_eq!(split, ~["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
3452 let split: ~[&str] = data.splitn_iter(|c: char| c == 'ä', 3).collect();
3453 assert_eq!(split, ~["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
3457 fn test_split_char_iterator_no_trailing() {
3458 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
3460 let split: ~[&str] = data.split_options_iter('\n', 1000, true).collect();
3461 assert_eq!(split, ~["", "Märy häd ä little lämb", "Little lämb", ""]);
3463 let split: ~[&str] = data.split_options_iter('\n', 1000, false).collect();
3464 assert_eq!(split, ~["", "Märy häd ä little lämb", "Little lämb"]);
3468 fn test_word_iter() {
3469 let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n";
3470 let words: ~[&str] = data.word_iter().collect();
3471 assert_eq!(words, ~["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
3475 fn test_line_iter() {
3476 let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
3477 let lines: ~[&str] = data.line_iter().collect();
3478 assert_eq!(lines, ~["", "Märy häd ä little lämb", "", "Little lämb"]);
3480 let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
3481 let lines: ~[&str] = data.line_iter().collect();
3482 assert_eq!(lines, ~["", "Märy häd ä little lämb", "", "Little lämb"]);
3486 fn test_split_str_iterator() {
3487 fn t<'a>(s: &str, sep: &'a str, u: ~[&str]) {
3488 let v: ~[&str] = s.split_str_iter(sep).collect();
3491 t("--1233345--", "12345", ~["--1233345--"]);
3492 t("abc::hello::there", "::", ~["abc", "hello", "there"]);
3493 t("::hello::there", "::", ~["", "hello", "there"]);
3494 t("hello::there::", "::", ~["hello", "there", ""]);
3495 t("::hello::there::", "::", ~["", "hello", "there", ""]);
3496 t("ประเทศไทย中华Việt Nam", "中华", ~["ประเทศไทย", "Việt Nam"]);
3497 t("zzXXXzzYYYzz", "zz", ~["", "XXX", "YYY", ""]);
3498 t("zzXXXzYYYz", "XXX", ~["zz", "zYYYz"]);
3499 t(".XXX.YYY.", ".", ~["", "XXX", "YYY", ""]);
3501 t("zz", "zz", ~["",""]);
3502 t("ok", "z", ~["ok"]);
3503 t("zzz", "zz", ~["","z"]);
3504 t("zzzzz", "zz", ~["","","z"]);
3508 fn test_str_zero() {
3510 fn t<S: Zero + Str>() {
3511 let s: S = Zero::zero();
3512 assert_eq!(s.as_slice(), "");
3513 assert!(s.is_zero());
3522 fn test_str_container() {
3523 fn sum_len<S: Container>(v: &[S]) -> uint {
3524 v.iter().transform(|x| x.len()).sum()
3528 assert_eq!(5, sum_len(["012", "", "34"]));
3529 assert_eq!(5, sum_len([@"01", @"2", @"34", @""]));
3530 assert_eq!(5, sum_len([~"01", ~"2", ~"34", ~""]));
3531 assert_eq!(5, sum_len([s.as_slice()]));