1 // Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
14 * Strings are a packed UTF-8 representation of text, stored as null
15 * terminated buffers of u8 bytes. Strings should be indexed in bytes,
16 * for efficiency, but UTF-8 unsafe operations should be avoided.
25 use container::{Container, Mutable};
27 use iterator::{Iterator, IteratorUtil, FilterIterator, AdditiveIterator, MapIterator};
30 use option::{None, Option, Some};
36 use vec::{OwnedVector, OwnedCopyableVector, ImmutableVector};
42 not_utf8: (~str) -> ~str;
46 Section: Creating a string
50 * Convert a vector of bytes to a new UTF-8 string
54 * Raises the `not_utf8` condition if invalid UTF-8
56 pub fn from_bytes(vv: &[u8]) -> ~str {
57 use str::not_utf8::cond;
60 let first_bad_byte = *vv.iter().find_(|&b| !is_utf8([*b])).get();
61 cond.raise(fmt!("from_bytes: input is not UTF-8; first bad byte is %u",
62 first_bad_byte as uint))
65 return unsafe { raw::from_bytes(vv) }
70 * Consumes a vector of bytes to create a new utf-8 string
74 * Raises the `not_utf8` condition if invalid UTF-8
76 pub fn from_bytes_owned(vv: ~[u8]) -> ~str {
77 use str::not_utf8::cond;
80 let first_bad_byte = *vv.iter().find_(|&b| !is_utf8([*b])).get();
81 cond.raise(fmt!("from_bytes: input is not UTF-8; first bad byte is %u",
82 first_bad_byte as uint))
84 return unsafe { raw::from_bytes_owned(vv) }
89 * Convert a vector of bytes to a UTF-8 string.
90 * The vector needs to be one byte longer than the string, and end with a 0 byte.
92 * Compared to `from_bytes()`, this fn doesn't need to allocate a new owned str.
96 * Fails if invalid UTF-8
97 * Fails if not null terminated
99 pub fn from_bytes_with_null<'a>(vv: &'a [u8]) -> &'a str {
100 assert_eq!(vv[vv.len() - 1], 0);
101 assert!(is_utf8(vv));
102 return unsafe { raw::from_bytes_with_null(vv) };
106 * Converts a vector to a string slice without performing any allocations.
108 * Once the slice has been validated as utf-8, it is transmuted in-place and
109 * returned as a '&str' instead of a '&[u8]'
113 * Fails if invalid UTF-8
115 pub fn from_bytes_slice<'a>(vector: &'a [u8]) -> &'a str {
117 assert!(is_utf8(vector));
118 let (ptr, len): (*u8, uint) = ::cast::transmute(vector);
119 let string: &'a str = ::cast::transmute((ptr, len + 1));
124 /// Copy a slice into a new unique str
126 pub fn to_owned(s: &str) -> ~str {
127 unsafe { raw::slice_bytes_owned(s, 0, s.len()) }
130 impl ToStr for ~str {
132 fn to_str(&self) -> ~str { to_owned(*self) }
134 impl<'self> ToStr for &'self str {
136 fn to_str(&self) -> ~str { to_owned(*self) }
138 impl ToStr for @str {
140 fn to_str(&self) -> ~str { to_owned(*self) }
144 * Convert a byte to a UTF-8 string
148 * Fails if invalid UTF-8
150 pub fn from_byte(b: u8) -> ~str {
152 unsafe { ::cast::transmute(~[b, 0u8]) }
155 /// Convert a char to a string
156 pub fn from_char(ch: char) -> ~str {
162 /// Convert a vector of chars to a string
163 pub fn from_chars(chs: &[char]) -> ~str {
165 buf.reserve(chs.len());
166 for chs.iter().advance |ch| {
173 pub fn push_str(lhs: &mut ~str, rhs: &str) {
177 #[allow(missing_doc)]
178 pub trait StrVector {
179 pub fn concat(&self) -> ~str;
180 pub fn connect(&self, sep: &str) -> ~str;
183 impl<'self, S: Str> StrVector for &'self [S] {
184 /// Concatenate a vector of strings.
185 pub fn concat(&self) -> ~str {
186 if self.is_empty() { return ~""; }
188 let len = self.iter().transform(|s| s.as_slice().len()).sum();
195 do as_buf(s) |buf, _| {
196 let mut buf = ::cast::transmute_mut_unsafe(buf);
197 for self.iter().advance |ss| {
198 do as_buf(ss.as_slice()) |ssbuf, sslen| {
199 let sslen = sslen - 1;
200 ptr::copy_memory(buf, ssbuf, sslen);
201 buf = buf.offset(sslen);
205 raw::set_len(&mut s, len);
210 /// Concatenate a vector of strings, placing a given separator between each.
211 pub fn connect(&self, sep: &str) -> ~str {
212 if self.is_empty() { return ~""; }
215 if sep.is_empty() { return self.concat(); }
217 // this is wrong without the guarantee that `self` is non-empty
218 let len = sep.len() * (self.len() - 1)
219 + self.iter().transform(|s| s.as_slice().len()).sum();
221 let mut first = true;
226 do as_buf(s) |buf, _| {
227 do as_buf(sep) |sepbuf, seplen| {
228 let seplen = seplen - 1;
229 let mut buf = ::cast::transmute_mut_unsafe(buf);
230 for self.iter().advance |ss| {
231 do as_buf(ss.as_slice()) |ssbuf, sslen| {
232 let sslen = sslen - 1;
236 ptr::copy_memory(buf, sepbuf, seplen);
237 buf = buf.offset(seplen);
239 ptr::copy_memory(buf, ssbuf, sslen);
240 buf = buf.offset(sslen);
245 raw::set_len(&mut s, len);
251 /// Something that can be used to compare against a character
253 /// Determine if the splitter should split at the given character
254 fn matches(&self, char) -> bool;
255 /// Indicate if this is only concerned about ASCII characters,
256 /// which can allow for a faster implementation.
257 fn only_ascii(&self) -> bool;
259 impl CharEq for char {
261 fn matches(&self, c: char) -> bool { *self == c }
263 fn only_ascii(&self) -> bool { (*self as uint) < 128 }
265 impl<'self> CharEq for &'self fn(char) -> bool {
267 fn matches(&self, c: char) -> bool { (*self)(c) }
269 fn only_ascii(&self) -> bool { false }
271 impl CharEq for extern "Rust" fn(char) -> bool {
273 fn matches(&self, c: char) -> bool { (*self)(c) }
275 fn only_ascii(&self) -> bool { false }
278 impl<'self, C: CharEq> CharEq for &'self [C] {
280 fn matches(&self, c: char) -> bool {
281 self.iter().any(|m| m.matches(c))
284 fn only_ascii(&self) -> bool {
285 self.iter().all(|m| m.only_ascii())
290 /// An iterator over the substrings of a string, separated by `sep`.
292 pub struct StrCharSplitIterator<'self,Sep> {
293 priv string: &'self str,
296 /// The number of splits remaining
298 /// Whether an empty string at the end is allowed
299 priv allow_trailing_empty: bool,
301 priv only_ascii: bool
304 /// An iterator over the words of a string, separated by an sequence of whitespace
305 pub type WordIterator<'self> =
306 FilterIterator<'self, &'self str,
307 StrCharSplitIterator<'self, extern "Rust" fn(char) -> bool>>;
309 /// An iterator over the lines of a string, separated by either `\n` or (`\r\n`).
310 pub type AnyLineIterator<'self> =
311 MapIterator<'self, &'self str, &'self str, StrCharSplitIterator<'self, char>>;
313 impl<'self, Sep: CharEq> Iterator<&'self str> for StrCharSplitIterator<'self, Sep> {
315 fn next(&mut self) -> Option<&'self str> {
316 if self.finished { return None }
318 let l = self.string.len();
319 let start = self.position;
322 // this gives a *huge* speed up for splitting on ASCII
323 // characters (e.g. '\n' or ' ')
324 while self.position < l && self.count > 0 {
325 let byte = self.string[self.position];
327 if self.sep.matches(byte as char) {
328 let slice = unsafe { raw::slice_bytes(self.string, start, self.position) };
336 while self.position < l && self.count > 0 {
337 let CharRange {ch, next} = self.string.char_range_at(self.position);
339 if self.sep.matches(ch) {
340 let slice = unsafe { raw::slice_bytes(self.string, start, self.position) };
341 self.position = next;
345 self.position = next;
348 self.finished = true;
349 if self.allow_trailing_empty || start < l {
350 Some(unsafe { raw::slice_bytes(self.string, start, l) })
357 /// An iterator over the start and end indicies of the matches of a
358 /// substring within a larger string
360 pub struct StrMatchesIndexIterator<'self> {
361 priv haystack: &'self str,
362 priv needle: &'self str,
366 /// An iterator over the substrings of a string separated by a given
369 pub struct StrStrSplitIterator<'self> {
370 priv it: StrMatchesIndexIterator<'self>,
375 impl<'self> Iterator<(uint, uint)> for StrMatchesIndexIterator<'self> {
377 fn next(&mut self) -> Option<(uint, uint)> {
378 // See Issue #1932 for why this is a naive search
379 let (h_len, n_len) = (self.haystack.len(), self.needle.len());
380 let mut match_start = 0;
383 while self.position < h_len {
384 if self.haystack[self.position] == self.needle[match_i] {
385 if match_i == 0 { match_start = self.position; }
389 if match_i == n_len {
391 return Some((match_start, self.position));
394 // failed match, backtrack
397 self.position = match_start;
406 impl<'self> Iterator<&'self str> for StrStrSplitIterator<'self> {
408 fn next(&mut self) -> Option<&'self str> {
409 if self.finished { return None; }
411 match self.it.next() {
412 Some((from, to)) => {
413 let ret = Some(self.it.haystack.slice(self.last_end, from));
418 self.finished = true;
419 Some(self.it.haystack.slice(self.last_end, self.it.haystack.len()))
425 /** Splits a string into substrings with possibly internal whitespace,
426 * each of them at most `lim` bytes long. The substrings have leading and trailing
427 * whitespace removed, and are only cut at whitespace boundaries.
431 * Fails during iteration if the string contains a non-whitespace
432 * sequence longer than the limit.
434 pub fn each_split_within<'a>(ss: &'a str,
436 it: &fn(&'a str) -> bool) -> bool {
437 // Just for fun, let's write this as an state machine:
439 enum SplitWithinState {
440 A, // leading whitespace, initial state
442 C, // internal and trailing whitespace
445 Ws, // current char is whitespace
446 Cr // current char is not whitespace
449 UnderLim, // current char makes current substring still fit in limit
450 OverLim // current char makes current substring no longer fit in limit
453 let mut slice_start = 0;
454 let mut last_start = 0;
455 let mut last_end = 0;
457 let mut fake_i = ss.len();
461 let slice: &fn() = || { cont = it(ss.slice(slice_start, last_end)) };
463 // if the limit is larger than the string, lower it to save cycles
468 let machine: &fn((uint, char)) -> bool = |(i, c)| {
469 let whitespace = if char::is_whitespace(c) { Ws } else { Cr };
470 let limit = if (i - slice_start + 1) <= lim { UnderLim } else { OverLim };
472 state = match (state, whitespace, limit) {
474 (A, Cr, _) => { slice_start = i; last_start = i; B }
476 (B, Cr, UnderLim) => { B }
477 (B, Cr, OverLim) if (i - last_start + 1) > lim
478 => fail!("word starting with %? longer than limit!",
479 ss.slice(last_start, i + 1)),
480 (B, Cr, OverLim) => { slice(); slice_start = last_start; B }
481 (B, Ws, UnderLim) => { last_end = i; C }
482 (B, Ws, OverLim) => { last_end = i; slice(); A }
484 (C, Cr, UnderLim) => { last_start = i; B }
485 (C, Cr, OverLim) => { slice(); slice_start = i; last_start = i; last_end = i; B }
486 (C, Ws, OverLim) => { slice(); A }
487 (C, Ws, UnderLim) => { C }
493 ss.iter().enumerate().advance(|x| machine(x));
495 // Let the automaton 'run out' by supplying trailing whitespace
496 while cont && match state { B | C => true, A => false } {
497 machine((fake_i, ' '));
504 * Replace all occurrences of one string with another
508 * * s - The string containing substrings to replace
509 * * from - The string to replace
510 * * to - The replacement string
514 * The original string with all occurances of `from` replaced with `to`
516 pub fn replace(s: &str, from: &str, to: &str) -> ~str {
517 let mut result = ~"";
518 let mut last_end = 0;
519 for s.matches_index_iter(from).advance |(start, end)| {
520 result.push_str(unsafe{raw::slice_bytes(s, last_end, start)});
524 result.push_str(unsafe{raw::slice_bytes(s, last_end, s.len())});
529 Section: Comparing strings
532 /// Bytewise slice equality
536 pub fn eq_slice(a: &str, b: &str) -> bool {
537 do as_buf(a) |ap, alen| {
538 do as_buf(b) |bp, blen| {
539 if (alen != blen) { false }
542 libc::memcmp(ap as *libc::c_void,
544 (alen - 1) as libc::size_t) == 0
553 pub fn eq_slice(a: &str, b: &str) -> bool {
554 do as_buf(a) |ap, alen| {
555 do as_buf(b) |bp, blen| {
556 if (alen != blen) { false }
559 libc::memcmp(ap as *libc::c_void,
561 (alen - 1) as libc::size_t) == 0
568 /// Bytewise string equality
570 #[lang="uniq_str_eq"]
572 pub fn eq(a: &~str, b: &~str) -> bool {
578 pub fn eq(a: &~str, b: &~str) -> bool {
586 // Utility used by various searching functions
587 fn match_at<'a,'b>(haystack: &'a str, needle: &'b str, at: uint) -> bool {
589 for needle.bytes_iter().advance |c| { if haystack[i] != c { return false; } i += 1u; }
597 /// Determines if a vector of bytes contains valid UTF-8
598 pub fn is_utf8(v: &[u8]) -> bool {
605 let w = utf8_char_width(v[i]);
606 if w == 0u { return false; }
609 if nexti > total { return false; }
611 if v[i + 1] & 192u8 != TAG_CONT_U8 { return false; }
613 if v[i + 2] & 192u8 != TAG_CONT_U8 { return false; }
614 if w > 3 && (v[i + 3] & 192u8 != TAG_CONT_U8) { return false; }
623 /// Determines if a vector of `u16` contains valid UTF-16
624 pub fn is_utf16(v: &[u16]) -> bool {
630 if u <= 0xD7FF_u16 || u >= 0xE000_u16 {
634 if i+1u < len { return false; }
636 if u < 0xD7FF_u16 || u > 0xDBFF_u16 { return false; }
637 if u2 < 0xDC00_u16 || u2 > 0xDFFF_u16 { return false; }
644 /// Iterates over the utf-16 characters in the specified slice, yielding each
645 /// decoded unicode character to the function provided.
649 /// * Fails on invalid utf-16 data
650 pub fn utf16_chars(v: &[u16], f: &fn(char)) {
653 while (i < len && v[i] != 0u16) {
656 if u <= 0xD7FF_u16 || u >= 0xE000_u16 {
662 assert!(u >= 0xD800_u16 && u <= 0xDBFF_u16);
663 assert!(u2 >= 0xDC00_u16 && u2 <= 0xDFFF_u16);
664 let mut c = (u - 0xD800_u16) as char;
666 c |= (u2 - 0xDC00_u16) as char;
667 c |= 0x1_0000_u32 as char;
675 * Allocates a new string from the utf-16 slice provided
677 pub fn from_utf16(v: &[u16]) -> ~str {
679 buf.reserve(v.len());
680 utf16_chars(v, |ch| buf.push_char(ch));
685 * Allocates a new string with the specified capacity. The string returned is
686 * the empty string, but has capacity for much more.
688 pub fn with_capacity(capacity: uint) -> ~str {
690 buf.reserve(capacity);
695 * As char_len but for a slice of a string
699 * * s - A valid string
700 * * start - The position inside `s` where to start counting in bytes
701 * * end - The position where to stop counting
705 * The number of Unicode characters in `s` between the given indices.
707 pub fn count_chars(s: &str, start: uint, end: uint) -> uint {
708 assert!(s.is_char_boundary(start));
709 assert!(s.is_char_boundary(end));
713 let next = s.char_range_at(i).next;
720 /// Counts the number of bytes taken by the first `n` chars in `s`
721 /// starting from `start`.
722 pub fn count_bytes<'b>(s: &'b str, start: uint, n: uint) -> uint {
723 assert!(s.is_char_boundary(start));
729 let next = s.char_range_at(end).next;
736 // https://tools.ietf.org/html/rfc3629
737 static UTF8_CHAR_WIDTH: [u8, ..256] = [
738 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
739 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x1F
740 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
741 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x3F
742 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
743 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x5F
744 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
745 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x7F
746 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
747 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x9F
748 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
749 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xBF
750 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
751 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xDF
752 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, // 0xEF
753 4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // 0xFF
756 /// Given a first byte, determine how many bytes are in this UTF-8 character
757 pub fn utf8_char_width(b: u8) -> uint {
758 return UTF8_CHAR_WIDTH[b] as uint;
761 #[allow(missing_doc)]
762 pub struct CharRange {
767 // UTF-8 tags and ranges
768 static TAG_CONT_U8: u8 = 128u8;
769 static TAG_CONT: uint = 128u;
770 static MAX_ONE_B: uint = 128u;
771 static TAG_TWO_B: uint = 192u;
772 static MAX_TWO_B: uint = 2048u;
773 static TAG_THREE_B: uint = 224u;
774 static MAX_THREE_B: uint = 65536u;
775 static TAG_FOUR_B: uint = 240u;
778 * A dummy trait to hold all the utility methods that we implement on strings.
782 * Work with the byte buffer of a string as a null-terminated C string.
784 * Allows for unsafe manipulation of strings, which is useful for foreign
785 * interop. This is similar to `str::as_buf`, but guarantees null-termination.
786 * If the given slice is not already null-terminated, this function will
787 * allocate a temporary, copy the slice, null terminate it, and pass
793 * let s = "PATH".as_c_str(|path| libc::getenv(path));
796 fn as_c_str<T>(self, f: &fn(*libc::c_char) -> T) -> T;
799 impl<'self> StrUtil for &'self str {
801 fn as_c_str<T>(self, f: &fn(*libc::c_char) -> T) -> T {
802 do as_buf(self) |buf, len| {
803 // NB: len includes the trailing null.
805 if unsafe { *(ptr::offset(buf,len-1)) != 0 } {
806 to_owned(self).as_c_str(|s| f(s))
808 f(buf as *libc::c_char)
815 * Deprecated. Use the `as_c_str` method on strings instead.
818 pub fn as_c_str<T>(s: &str, f: &fn(*libc::c_char) -> T) -> T {
823 * Work with the byte buffer and length of a slice.
825 * The given length is one byte longer than the 'official' indexable
826 * length of the string. This is to permit probing the byte past the
827 * indexable area for a null byte, as is the case in slices pointing
828 * to full strings, or suffixes of them.
831 pub fn as_buf<T>(s: &str, f: &fn(*u8, uint) -> T) -> T {
833 let v : *(*u8,uint) = transmute(&s);
839 /// Unsafe operations
845 use str::{as_buf, is_utf8};
847 use vec::MutableVector;
849 /// Create a Rust string from a null-terminated *u8 buffer
850 pub unsafe fn from_buf(buf: *u8) -> ~str {
855 curr = ptr::offset(buf, i);
857 return from_buf_len(buf, i);
860 /// Create a Rust string from a *u8 buffer of the given length
861 pub unsafe fn from_buf_len(buf: *u8, len: uint) -> ~str {
862 let mut v: ~[u8] = vec::with_capacity(len + 1);
863 v.as_mut_buf(|vbuf, _len| {
864 ptr::copy_memory(vbuf, buf as *u8, len)
866 vec::raw::set_len(&mut v, len);
870 return ::cast::transmute(v);
873 /// Create a Rust string from a null-terminated C string
874 pub unsafe fn from_c_str(c_str: *libc::c_char) -> ~str {
875 from_buf(::cast::transmute(c_str))
878 /// Create a Rust string from a `*c_char` buffer of the given length
879 pub unsafe fn from_c_str_len(c_str: *libc::c_char, len: uint) -> ~str {
880 from_buf_len(::cast::transmute(c_str), len)
883 /// Converts a vector of bytes to a new owned string.
884 pub unsafe fn from_bytes(v: &[u8]) -> ~str {
885 do v.as_imm_buf |buf, len| {
886 from_buf_len(buf, len)
890 /// Converts an owned vector of bytes to a new owned string. This assumes
891 /// that the utf-8-ness of the vector has already been validated
892 pub unsafe fn from_bytes_owned(mut v: ~[u8]) -> ~str {
897 /// Converts a vector of bytes to a string.
898 /// The byte slice needs to contain valid utf8 and needs to be one byte longer than
899 /// the string, if possible ending in a 0 byte.
900 pub unsafe fn from_bytes_with_null<'a>(v: &'a [u8]) -> &'a str {
904 /// Converts a byte to a string.
905 pub unsafe fn from_byte(u: u8) -> ~str { raw::from_bytes([u]) }
907 /// Form a slice from a C string. Unsafe because the caller must ensure the
908 /// C string has the static lifetime, or else the return value may be
909 /// invalidated later.
910 pub unsafe fn c_str_to_static_slice(s: *libc::c_char) -> &'static str {
916 curr = ptr::offset(s, len);
918 let v = (s, len + 1);
919 assert!(is_utf8(::cast::transmute(v)));
924 * Takes a bytewise (not UTF-8) slice from a string.
926 * Returns the substring from [`begin`..`end`).
930 * If begin is greater than end.
931 * If end is greater than the length of the string.
933 pub unsafe fn slice_bytes_owned(s: &str, begin: uint, end: uint) -> ~str {
934 do as_buf(s) |sbuf, n| {
935 assert!((begin <= end));
938 let mut v = vec::with_capacity(end - begin + 1u);
939 do v.as_imm_buf |vbuf, _vlen| {
940 let vbuf = ::cast::transmute_mut_unsafe(vbuf);
941 let src = ptr::offset(sbuf, begin);
942 ptr::copy_memory(vbuf, src, end - begin);
944 vec::raw::set_len(&mut v, end - begin);
951 * Takes a bytewise (not UTF-8) slice from a string.
953 * Returns the substring from [`begin`..`end`).
957 * If begin is greater than end.
958 * If end is greater than the length of the string.
961 pub unsafe fn slice_bytes(s: &str, begin: uint, end: uint) -> &str {
962 do as_buf(s) |sbuf, n| {
963 assert!((begin <= end));
966 let tuple = (ptr::offset(sbuf, begin), end - begin + 1);
967 ::cast::transmute(tuple)
971 /// Appends a byte to a string. (Not UTF-8 safe).
972 pub unsafe fn push_byte(s: &mut ~str, b: u8) {
973 let new_len = s.len() + 1;
974 s.reserve_at_least(new_len);
975 do as_buf(*s) |buf, len| {
976 let buf: *mut u8 = ::cast::transmute(buf);
977 *ptr::mut_offset(buf, len) = b;
979 set_len(&mut *s, new_len);
982 /// Appends a vector of bytes to a string. (Not UTF-8 safe).
983 unsafe fn push_bytes(s: &mut ~str, bytes: &[u8]) {
984 let new_len = s.len() + bytes.len();
985 s.reserve_at_least(new_len);
986 for bytes.iter().advance |byte| { push_byte(&mut *s, *byte); }
989 /// Removes the last byte from a string and returns it. (Not UTF-8 safe).
990 pub unsafe fn pop_byte(s: &mut ~str) -> u8 {
994 set_len(s, len - 1u);
998 /// Removes the first byte from a string and returns it. (Not UTF-8 safe).
999 pub unsafe fn shift_byte(s: &mut ~str) -> u8 {
1001 assert!((len > 0u));
1003 *s = raw::slice_bytes_owned(*s, 1u, len);
1007 /// Sets the length of the string and adds the null terminator
1009 pub unsafe fn set_len(v: &mut ~str, new_len: uint) {
1010 let v: **mut vec::UnboxedVecRepr = cast::transmute(v);
1011 let repr: *mut vec::UnboxedVecRepr = *v;
1012 (*repr).fill = new_len + 1u;
1013 let null = ptr::mut_offset(cast::transmute(&((*repr).data)),
1019 fn test_from_buf_len() {
1021 let a = ~[65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8];
1022 let b = vec::raw::to_ptr(a);
1023 let c = from_buf_len(b, 3u);
1024 assert_eq!(c, ~"AAA");
1033 use cmp::{TotalOrd, Ordering, Less, Equal, Greater, Eq, Ord, Equiv, TotalEq};
1034 use super::{Str, eq_slice};
1036 impl<'self> Add<&'self str,~str> for &'self str {
1038 fn add(&self, rhs: & &'self str) -> ~str {
1039 let mut ret = self.to_owned();
1045 impl<'self> TotalOrd for &'self str {
1047 fn cmp(&self, other: & &'self str) -> Ordering {
1048 for self.bytes_iter().zip(other.bytes_iter()).advance |(s_b, o_b)| {
1049 match s_b.cmp(&o_b) {
1050 Greater => return Greater,
1051 Less => return Less,
1056 self.len().cmp(&other.len())
1060 impl TotalOrd for ~str {
1062 fn cmp(&self, other: &~str) -> Ordering { self.as_slice().cmp(&other.as_slice()) }
1065 impl TotalOrd for @str {
1067 fn cmp(&self, other: &@str) -> Ordering { self.as_slice().cmp(&other.as_slice()) }
1070 impl<'self> Eq for &'self str {
1072 fn eq(&self, other: & &'self str) -> bool {
1073 eq_slice((*self), (*other))
1076 fn ne(&self, other: & &'self str) -> bool { !(*self).eq(other) }
1081 fn eq(&self, other: &~str) -> bool {
1082 eq_slice((*self), (*other))
1085 fn ne(&self, other: &~str) -> bool { !(*self).eq(other) }
1090 fn eq(&self, other: &@str) -> bool {
1091 eq_slice((*self), (*other))
1094 fn ne(&self, other: &@str) -> bool { !(*self).eq(other) }
1097 impl<'self> TotalEq for &'self str {
1099 fn equals(&self, other: & &'self str) -> bool {
1100 eq_slice((*self), (*other))
1104 impl TotalEq for ~str {
1106 fn equals(&self, other: &~str) -> bool {
1107 eq_slice((*self), (*other))
1111 impl TotalEq for @str {
1113 fn equals(&self, other: &@str) -> bool {
1114 eq_slice((*self), (*other))
1118 impl<'self> Ord for &'self str {
1120 fn lt(&self, other: & &'self str) -> bool { self.cmp(other) == Less }
1122 fn le(&self, other: & &'self str) -> bool { self.cmp(other) != Greater }
1124 fn ge(&self, other: & &'self str) -> bool { self.cmp(other) != Less }
1126 fn gt(&self, other: & &'self str) -> bool { self.cmp(other) == Greater }
1131 fn lt(&self, other: &~str) -> bool { self.cmp(other) == Less }
1133 fn le(&self, other: &~str) -> bool { self.cmp(other) != Greater }
1135 fn ge(&self, other: &~str) -> bool { self.cmp(other) != Less }
1137 fn gt(&self, other: &~str) -> bool { self.cmp(other) == Greater }
1142 fn lt(&self, other: &@str) -> bool { self.cmp(other) == Less }
1144 fn le(&self, other: &@str) -> bool { self.cmp(other) != Greater }
1146 fn ge(&self, other: &@str) -> bool { self.cmp(other) != Less }
1148 fn gt(&self, other: &@str) -> bool { self.cmp(other) == Greater }
1151 impl<'self, S: Str> Equiv<S> for &'self str {
1153 fn equiv(&self, other: &S) -> bool { eq_slice(*self, other.as_slice()) }
1156 impl<'self, S: Str> Equiv<S> for @str {
1158 fn equiv(&self, other: &S) -> bool { eq_slice(*self, other.as_slice()) }
1161 impl<'self, S: Str> Equiv<S> for ~str {
1163 fn equiv(&self, other: &S) -> bool { eq_slice(*self, other.as_slice()) }
1170 /// Any string that can be represented as a slice
1172 /// Work with `self` as a slice.
1173 fn as_slice<'a>(&'a self) -> &'a str;
1176 impl<'self> Str for &'self str {
1178 fn as_slice<'a>(&'a self) -> &'a str { *self }
1180 impl<'self> Str for ~str {
1182 fn as_slice<'a>(&'a self) -> &'a str {
1183 let s: &'a str = *self; s
1186 impl<'self> Str for @str {
1188 fn as_slice<'a>(&'a self) -> &'a str {
1189 let s: &'a str = *self; s
1193 impl<'self> Container for &'self str {
1195 fn len(&self) -> uint {
1196 do as_buf(*self) |_p, n| { n - 1u }
1199 fn is_empty(&self) -> bool {
1204 impl Container for ~str {
1206 fn len(&self) -> uint { self.as_slice().len() }
1208 fn is_empty(&self) -> bool { self.len() == 0 }
1211 impl Container for @str {
1213 fn len(&self) -> uint { self.as_slice().len() }
1215 fn is_empty(&self) -> bool { self.len() == 0 }
1218 impl Mutable for ~str {
1219 /// Remove all content, make the string empty
1221 fn clear(&mut self) {
1223 raw::set_len(self, 0)
1229 #[allow(missing_doc)]
1230 pub trait StrSlice<'self> {
1231 fn contains<'a>(&self, needle: &'a str) -> bool;
1232 fn contains_char(&self, needle: char) -> bool;
1233 fn iter(&self) -> StrCharIterator<'self>;
1234 fn rev_iter(&self) -> StrCharRevIterator<'self>;
1235 fn bytes_iter(&self) -> StrBytesIterator<'self>;
1236 fn bytes_rev_iter(&self) -> StrBytesRevIterator<'self>;
1237 fn split_iter<Sep: CharEq>(&self, sep: Sep) -> StrCharSplitIterator<'self, Sep>;
1238 fn splitn_iter<Sep: CharEq>(&self, sep: Sep, count: uint) -> StrCharSplitIterator<'self, Sep>;
1239 fn split_options_iter<Sep: CharEq>(&self, sep: Sep, count: uint, allow_trailing_empty: bool)
1240 -> StrCharSplitIterator<'self, Sep>;
1241 fn matches_index_iter(&self, sep: &'self str) -> StrMatchesIndexIterator<'self>;
1242 fn split_str_iter(&self, &'self str) -> StrStrSplitIterator<'self>;
1243 fn line_iter(&self) -> StrCharSplitIterator<'self, char>;
1244 fn any_line_iter(&self) -> AnyLineIterator<'self>;
1245 fn word_iter(&self) -> WordIterator<'self>;
1246 fn ends_with(&self, needle: &str) -> bool;
1247 fn is_whitespace(&self) -> bool;
1248 fn is_alphanumeric(&self) -> bool;
1249 fn char_len(&self) -> uint;
1251 fn slice(&self, begin: uint, end: uint) -> &'self str;
1252 fn slice_from(&self, begin: uint) -> &'self str;
1253 fn slice_to(&self, end: uint) -> &'self str;
1255 fn slice_chars(&self, begin: uint, end: uint) -> &'self str;
1257 fn starts_with(&self, needle: &str) -> bool;
1258 fn escape_default(&self) -> ~str;
1259 fn escape_unicode(&self) -> ~str;
1260 fn trim(&self) -> &'self str;
1261 fn trim_left(&self) -> &'self str;
1262 fn trim_right(&self) -> &'self str;
1263 fn trim_chars<C: CharEq>(&self, to_trim: &C) -> &'self str;
1264 fn trim_left_chars<C: CharEq>(&self, to_trim: &C) -> &'self str;
1265 fn trim_right_chars<C: CharEq>(&self, to_trim: &C) -> &'self str;
1266 fn replace(&self, from: &str, to: &str) -> ~str;
1267 fn to_owned(&self) -> ~str;
1268 fn to_managed(&self) -> @str;
1269 fn to_utf16(&self) -> ~[u16];
1270 fn is_char_boundary(&self, index: uint) -> bool;
1271 fn char_range_at(&self, start: uint) -> CharRange;
1272 fn char_at(&self, i: uint) -> char;
1273 fn char_range_at_reverse(&self, start: uint) -> CharRange;
1274 fn char_at_reverse(&self, i: uint) -> char;
1275 fn as_bytes(&self) -> &'self [u8];
1277 fn find<C: CharEq>(&self, search: C) -> Option<uint>;
1278 fn rfind<C: CharEq>(&self, search: C) -> Option<uint>;
1279 fn find_str(&self, &str) -> Option<uint>;
1281 fn repeat(&self, nn: uint) -> ~str;
1283 fn slice_shift_char(&self) -> (char, &'self str);
1285 fn map_chars(&self, ff: &fn(char) -> char) -> ~str;
1287 fn lev_distance(&self, t: &str) -> uint;
1289 fn subslice_offset(&self, inner: &str) -> uint;
1292 /// Extension methods for strings
1293 impl<'self> StrSlice<'self> for &'self str {
1295 * Returns true if one string contains another
1299 * * needle - The string to look for
1302 fn contains<'a>(&self, needle: &'a str) -> bool {
1303 self.find_str(needle).is_some()
1306 * Returns true if a string contains a char.
1310 * * needle - The char to look for
1313 fn contains_char(&self, needle: char) -> bool {
1314 self.find(needle).is_some()
1316 /// An iterator over the characters of `self`. Note, this iterates
1317 /// over unicode code-points, not unicode graphemes.
1322 /// let v: ~[char] = "abc åäö".iter().collect();
1323 /// assert_eq!(v, ~['a', 'b', 'c', ' ', 'å', 'ä', 'ö']);
1326 fn iter(&self) -> StrCharIterator<'self> {
1332 /// An iterator over the characters of `self`, in reverse order.
1334 fn rev_iter(&self) -> StrCharRevIterator<'self> {
1335 StrCharRevIterator {
1341 /// An iterator over the bytes of `self`
1343 fn bytes_iter(&self) -> StrBytesIterator<'self> {
1344 StrBytesIterator { it: self.as_bytes().iter() }
1346 /// An iterator over the bytes of `self`, in reverse order
1348 fn bytes_rev_iter(&self) -> StrBytesRevIterator<'self> {
1349 StrBytesRevIterator { it: self.as_bytes().rev_iter() }
1352 /// An iterator over substrings of `self`, separated by characters
1353 /// matched by `sep`.
1358 /// let v: ~[&str] = "Mary had a little lamb".split_iter(' ').collect();
1359 /// assert_eq!(v, ~["Mary", "had", "a", "little", "lamb"]);
1361 /// let v: ~[&str] = "abc1def2ghi".split_iter(|c: char| c.is_digit()).collect();
1362 /// assert_eq!(v, ~["abc", "def", "ghi"]);
1365 fn split_iter<Sep: CharEq>(&self, sep: Sep) -> StrCharSplitIterator<'self, Sep> {
1366 self.split_options_iter(sep, self.len(), true)
1369 /// An iterator over substrings of `self`, separated by characters
1370 /// matched by `sep`, restricted to splitting at most `count`
1373 fn splitn_iter<Sep: CharEq>(&self, sep: Sep, count: uint) -> StrCharSplitIterator<'self, Sep> {
1374 self.split_options_iter(sep, count, true)
1377 /// An iterator over substrings of `self`, separated by characters
1378 /// matched by `sep`, splitting at most `count` times, and
1379 /// possibly not including the trailing empty substring, if it
1382 fn split_options_iter<Sep: CharEq>(&self, sep: Sep, count: uint, allow_trailing_empty: bool)
1383 -> StrCharSplitIterator<'self, Sep> {
1384 let only_ascii = sep.only_ascii();
1385 StrCharSplitIterator {
1390 allow_trailing_empty: allow_trailing_empty,
1392 only_ascii: only_ascii
1395 /// An iterator over the start and end indices of each match of
1396 /// `sep` within `self`.
1398 fn matches_index_iter(&self, sep: &'self str) -> StrMatchesIndexIterator<'self> {
1399 assert!(!sep.is_empty())
1400 StrMatchesIndexIterator {
1407 * An iterator over the substrings of `self` separated by `sep`.
1412 * let v: ~[&str] = "abcXXXabcYYYabc".split_str_iter("abc").collect()
1413 * assert_eq!(v, ["", "XXX", "YYY", ""]);
1417 fn split_str_iter(&self, sep: &'self str) -> StrStrSplitIterator<'self> {
1418 StrStrSplitIterator {
1419 it: self.matches_index_iter(sep),
1425 /// An iterator over the lines of a string (subsequences separated
1428 fn line_iter(&self) -> StrCharSplitIterator<'self, char> {
1429 self.split_options_iter('\n', self.len(), false)
1432 /// An iterator over the lines of a string, separated by either
1433 /// `\n` or (`\r\n`).
1434 fn any_line_iter(&self) -> AnyLineIterator<'self> {
1435 do self.line_iter().transform |line| {
1437 if l > 0 && line[l - 1] == '\r' as u8 { line.slice(0, l - 1) }
1442 /// An iterator over the words of a string (subsequences separated
1443 /// by any sequence of whitespace).
1445 fn word_iter(&self) -> WordIterator<'self> {
1446 self.split_iter(char::is_whitespace).filter(|s| !s.is_empty())
1450 * Returns true if the string contains only whitespace
1452 * Whitespace characters are determined by `char::is_whitespace`
1455 fn is_whitespace(&self) -> bool { self.iter().all(char::is_whitespace) }
1457 * Returns true if the string contains only alphanumerics
1459 * Alphanumeric characters are determined by `char::is_alphanumeric`
1462 fn is_alphanumeric(&self) -> bool { self.iter().all(char::is_alphanumeric) }
1463 /// Returns the number of characters that a string holds
1465 fn char_len(&self) -> uint { self.iter().len_() }
1468 * Returns a slice of the given string from the byte range
1471 * Fails when `begin` and `end` do not point to valid characters or
1472 * beyond the last character of the string
1475 fn slice(&self, begin: uint, end: uint) -> &'self str {
1476 assert!(self.is_char_boundary(begin));
1477 assert!(self.is_char_boundary(end));
1478 unsafe { raw::slice_bytes(*self, begin, end) }
1480 /// Returns a slice of the string from `begin` to its end.
1482 /// Fails when `begin` does not point to a valid character, or is
1485 fn slice_from(&self, begin: uint) -> &'self str {
1486 self.slice(begin, self.len())
1488 /// Returns a slice of the string from the beginning to byte
1491 /// Fails when `end` does not point to a valid character, or is
1494 fn slice_to(&self, end: uint) -> &'self str {
1498 /// Returns a slice of the string from the char range
1499 /// [`begin`..`end`).
1501 /// Fails if `begin` > `end` or the either `begin` or `end` are
1502 /// beyond the last character of the string.
1503 fn slice_chars(&self, begin: uint, end: uint) -> &'self str {
1504 assert!(begin <= end);
1505 // not sure how to use the iterators for this nicely.
1506 let mut position = 0;
1509 while count < begin && position < l {
1510 position = self.char_range_at(position).next;
1513 if count < begin { fail!("Attempted to begin slice_chars beyond end of string") }
1514 let start_byte = position;
1515 while count < end && position < l {
1516 position = self.char_range_at(position).next;
1519 if count < end { fail!("Attempted to end slice_chars beyond end of string") }
1521 self.slice(start_byte, position)
1524 /// Returns true if `needle` is a prefix of the string.
1525 fn starts_with<'a>(&self, needle: &'a str) -> bool {
1526 let (self_len, needle_len) = (self.len(), needle.len());
1527 if needle_len == 0u { true }
1528 else if needle_len > self_len { false }
1529 else { match_at(*self, needle, 0u) }
1531 /// Returns true if `needle` is a suffix of the string.
1532 fn ends_with(&self, needle: &str) -> bool {
1533 let (self_len, needle_len) = (self.len(), needle.len());
1534 if needle_len == 0u { true }
1535 else if needle_len > self_len { false }
1536 else { match_at(*self, needle, self_len - needle_len) }
1539 /// Escape each char in `s` with char::escape_default.
1540 fn escape_default(&self) -> ~str {
1541 let mut out: ~str = ~"";
1542 out.reserve_at_least(self.len());
1543 for self.iter().advance |c| {
1544 do c.escape_default |c| {
1551 /// Escape each char in `s` with char::escape_unicode.
1552 fn escape_unicode(&self) -> ~str {
1553 let mut out: ~str = ~"";
1554 out.reserve_at_least(self.len());
1555 for self.iter().advance |c| {
1556 do c.escape_unicode |c| {
1563 /// Returns a string with leading and trailing whitespace removed
1565 fn trim(&self) -> &'self str {
1566 self.trim_left().trim_right()
1568 /// Returns a string with leading whitespace removed
1570 fn trim_left(&self) -> &'self str {
1571 self.trim_left_chars(&char::is_whitespace)
1573 /// Returns a string with trailing whitespace removed
1575 fn trim_right(&self) -> &'self str {
1576 self.trim_right_chars(&char::is_whitespace)
1580 * Returns a string with characters that match `to_trim` removed.
1584 * * to_trim - a character matcher
1589 * assert_eq!("11foo1bar11".trim_chars(&'1'), "foo1bar")
1590 * assert_eq!("12foo1bar12".trim_chars(& &['1', '2']), "foo1bar")
1591 * assert_eq!("123foo1bar123".trim_chars(&|c: char| c.is_digit()), "foo1bar")
1595 fn trim_chars<C: CharEq>(&self, to_trim: &C) -> &'self str {
1596 self.trim_left_chars(to_trim).trim_right_chars(to_trim)
1599 * Returns a string with leading `chars_to_trim` removed.
1603 * * to_trim - a character matcher
1608 * assert_eq!("11foo1bar11".trim_left_chars(&'1'), "foo1bar11")
1609 * assert_eq!("12foo1bar12".trim_left_chars(& &['1', '2']), "foo1bar12")
1610 * assert_eq!("123foo1bar123".trim_left_chars(&|c: char| c.is_digit()), "foo1bar123")
1614 fn trim_left_chars<C: CharEq>(&self, to_trim: &C) -> &'self str {
1615 match self.find(|c: char| !to_trim.matches(c)) {
1617 Some(first) => unsafe { raw::slice_bytes(*self, first, self.len()) }
1621 * Returns a string with trailing `chars_to_trim` removed.
1625 * * to_trim - a character matcher
1630 * assert_eq!("11foo1bar11".trim_right_chars(&'1'), "11foo1bar")
1631 * assert_eq!("12foo1bar12".trim_right_chars(& &['1', '2']), "12foo1bar")
1632 * assert_eq!("123foo1bar123".trim_right_chars(&|c: char| c.is_digit()), "123foo1bar")
1636 fn trim_right_chars<C: CharEq>(&self, to_trim: &C) -> &'self str {
1637 match self.rfind(|c: char| !to_trim.matches(c)) {
1640 let next = self.char_range_at(last).next;
1641 unsafe { raw::slice_bytes(*self, 0u, next) }
1647 * Replace all occurrences of one string with another
1651 * * from - The string to replace
1652 * * to - The replacement string
1656 * The original string with all occurances of `from` replaced with `to`
1658 pub fn replace(&self, from: &str, to: &str) -> ~str {
1659 let mut result = ~"";
1660 let mut last_end = 0;
1661 for self.matches_index_iter(from).advance |(start, end)| {
1662 result.push_str(unsafe{raw::slice_bytes(*self, last_end, start)});
1663 result.push_str(to);
1666 result.push_str(unsafe{raw::slice_bytes(*self, last_end, self.len())});
1670 /// Copy a slice into a new unique str
1672 fn to_owned(&self) -> ~str { to_owned(*self) }
1675 fn to_managed(&self) -> @str {
1676 let v = at_vec::from_fn(self.len() + 1, |i| {
1677 if i == self.len() { 0 } else { self[i] }
1679 unsafe { ::cast::transmute(v) }
1682 /// Converts to a vector of `u16` encoded as UTF-16.
1683 fn to_utf16(&self) -> ~[u16] {
1685 for self.iter().advance |ch| {
1686 // Arithmetic with u32 literals is easier on the eyes than chars.
1687 let mut ch = ch as u32;
1689 if (ch & 0xFFFF_u32) == ch {
1690 // The BMP falls through (assuming non-surrogate, as it
1692 assert!(ch <= 0xD7FF_u32 || ch >= 0xE000_u32);
1695 // Supplementary planes break into surrogates.
1696 assert!(ch >= 0x1_0000_u32 && ch <= 0x10_FFFF_u32);
1698 let w1 = 0xD800_u16 | ((ch >> 10) as u16);
1699 let w2 = 0xDC00_u16 | ((ch as u16) & 0x3FF_u16);
1700 u.push_all([w1, w2])
1707 * Returns false if the index points into the middle of a multi-byte
1708 * character sequence.
1710 fn is_char_boundary(&self, index: uint) -> bool {
1711 if index == self.len() { return true; }
1712 let b = self[index];
1713 return b < 128u8 || b >= 192u8;
1717 * Pluck a character out of a string and return the index of the next
1720 * This function can be used to iterate over the unicode characters of a
1726 * let s = "中华Việt Nam";
1728 * while i < s.len() {
1729 * let CharRange {ch, next} = s.char_range_at(i);
1730 * std::io::println(fmt!("%u: %c",i,ch));
1753 * * i - The byte offset of the char to extract
1757 * A record {ch: char, next: uint} containing the char value and the byte
1758 * index of the next unicode character.
1762 * If `i` is greater than or equal to the length of the string.
1763 * If `i` is not the index of the beginning of a valid UTF-8 character.
1766 fn char_range_at(&self, i: uint) -> CharRange {
1767 if (self[i] < 128u8) {
1768 return CharRange {ch: self[i] as char, next: i + 1 };
1771 // Multibyte case is a fn to allow char_range_at to inline cleanly
1772 fn multibyte_char_range_at(s: &str, i: uint) -> CharRange {
1773 let mut val = s[i] as uint;
1774 let w = UTF8_CHAR_WIDTH[val] as uint;
1777 // First byte is special, only want bottom 5 bits for width 2, 4 bits
1778 // for width 3, and 3 bits for width 4
1780 val = (val << 6) | (s[i + 1] & 63u8) as uint;
1781 if w > 2 { val = (val << 6) | (s[i + 2] & 63u8) as uint; }
1782 if w > 3 { val = (val << 6) | (s[i + 3] & 63u8) as uint; }
1784 return CharRange {ch: val as char, next: i + w};
1787 return multibyte_char_range_at(*self, i);
1790 /// Plucks the character starting at the `i`th byte of a string
1792 fn char_at(&self, i: uint) -> char { self.char_range_at(i).ch }
1795 * Given a byte position and a str, return the previous char and its position.
1797 * This function can be used to iterate over a unicode string in reverse.
1799 * Returns 0 for next index if called on start index 0.
1801 fn char_range_at_reverse(&self, start: uint) -> CharRange {
1802 let mut prev = start;
1804 // while there is a previous byte == 10......
1805 while prev > 0u && self[prev - 1u] & 192u8 == TAG_CONT_U8 {
1809 // now refer to the initial byte of previous char
1817 let ch = self.char_at(prev);
1818 return CharRange {ch:ch, next:prev};
1821 /// Plucks the character ending at the `i`th byte of a string
1823 fn char_at_reverse(&self, i: uint) -> char {
1824 self.char_range_at_reverse(i).ch
1828 * Work with the byte buffer of a string as a byte slice.
1830 * The byte slice does not include the null terminator.
1832 fn as_bytes(&self) -> &'self [u8] {
1834 let (ptr, len): (*u8, uint) = ::cast::transmute(*self);
1835 let outgoing_tuple: (*u8, uint) = (ptr, len - 1);
1836 ::cast::transmute(outgoing_tuple)
1841 * Returns the byte index of the first character of `self` that matches `search`
1845 * `Some` containing the byte index of the last matching character
1846 * or `None` if there is no match
1848 fn find<C: CharEq>(&self, search: C) -> Option<uint> {
1849 if search.only_ascii() {
1850 for self.bytes_iter().enumerate().advance |(i, b)| {
1851 if search.matches(b as char) { return Some(i) }
1855 for self.iter().advance |c| {
1856 if search.matches(c) { return Some(index); }
1857 index += c.len_utf8_bytes();
1864 * Returns the byte index of the last character of `self` that matches `search`
1868 * `Some` containing the byte index of the last matching character
1869 * or `None` if there is no match
1871 fn rfind<C: CharEq>(&self, search: C) -> Option<uint> {
1872 let mut index = self.len();
1873 if search.only_ascii() {
1874 for self.bytes_rev_iter().advance |b| {
1876 if search.matches(b as char) { return Some(index); }
1879 for self.rev_iter().advance |c| {
1880 index -= c.len_utf8_bytes();
1881 if search.matches(c) { return Some(index); }
1889 * Returns the byte index of the first matching substring
1893 * * `needle` - The string to search for
1897 * `Some` containing the byte index of the first matching substring
1898 * or `None` if there is no match
1900 fn find_str(&self, needle: &str) -> Option<uint> {
1901 if needle.is_empty() {
1904 self.matches_index_iter(needle)
1906 .map_consume(|(start, _end)| start)
1910 /// Given a string, make a new string with repeated copies of it.
1911 fn repeat(&self, nn: uint) -> ~str {
1912 do as_buf(*self) |buf, len| {
1914 // ignore the NULL terminator
1916 ret.reserve(nn * len);
1919 do as_buf(ret) |rbuf, _len| {
1920 let mut rbuf = ::cast::transmute_mut_unsafe(rbuf);
1923 ptr::copy_memory(rbuf, buf, len);
1924 rbuf = rbuf.offset(len);
1927 raw::set_len(&mut ret, nn * len);
1934 * Retrieves the first character from a string slice and returns
1935 * it. This does not allocate a new string; instead, it returns a
1936 * slice that point one character beyond the character that was
1941 * If the string does not contain any characters
1944 fn slice_shift_char(&self) -> (char, &'self str) {
1945 let CharRange {ch, next} = self.char_range_at(0u);
1946 let next_s = unsafe { raw::slice_bytes(*self, next, self.len()) };
1947 return (ch, next_s);
1951 /// Apply a function to each character.
1952 fn map_chars(&self, ff: &fn(char) -> char) -> ~str {
1953 let mut result = with_capacity(self.len());
1954 for self.iter().advance |cc| {
1955 result.push_char(ff(cc));
1960 /// Levenshtein Distance between two strings.
1961 fn lev_distance(&self, t: &str) -> uint {
1962 let slen = self.len();
1965 if slen == 0 { return tlen; }
1966 if tlen == 0 { return slen; }
1968 let mut dcol = vec::from_fn(tlen + 1, |x| x);
1970 for self.iter().enumerate().advance |(i, sc)| {
1972 let mut current = i;
1973 dcol[0] = current + 1;
1975 for t.iter().enumerate().advance |(j, tc)| {
1977 let next = dcol[j + 1];
1980 dcol[j + 1] = current;
1982 dcol[j + 1] = ::cmp::min(current, next);
1983 dcol[j + 1] = ::cmp::min(dcol[j + 1], dcol[j]) + 1;
1995 * Returns the byte offset of an inner slice relative to an enclosing outer slice.
1997 * Fails if `inner` is not a direct slice contained within self.
2002 * let string = "a\nb\nc";
2003 * let mut lines = ~[];
2004 * for string.line_iter().advance |line| { lines.push(line) }
2006 * assert!(string.subslice_offset(lines[0]) == 0); // &"a"
2007 * assert!(string.subslice_offset(lines[1]) == 2); // &"b"
2008 * assert!(string.subslice_offset(lines[2]) == 4); // &"c"
2012 fn subslice_offset(&self, inner: &str) -> uint {
2013 do as_buf(*self) |a, a_len| {
2014 do as_buf(inner) |b, b_len| {
2020 a_start = cast::transmute(a); a_end = a_len + cast::transmute(a);
2021 b_start = cast::transmute(b); b_end = b_len + cast::transmute(b);
2023 assert!(a_start <= b_start);
2024 assert!(b_end <= a_end);
2032 #[allow(missing_doc)]
2033 pub trait NullTerminatedStr {
2034 fn as_bytes_with_null<'a>(&'a self) -> &'a [u8];
2037 impl NullTerminatedStr for ~str {
2039 * Work with the byte buffer of a string as a byte slice.
2041 * The byte slice does include the null terminator.
2044 fn as_bytes_with_null<'a>(&'a self) -> &'a [u8] {
2045 let ptr: &'a ~[u8] = unsafe { ::cast::transmute(self) };
2046 let slice: &'a [u8] = *ptr;
2050 impl NullTerminatedStr for @str {
2052 * Work with the byte buffer of a string as a byte slice.
2054 * The byte slice does include the null terminator.
2057 fn as_bytes_with_null<'a>(&'a self) -> &'a [u8] {
2058 let ptr: &'a @[u8] = unsafe { ::cast::transmute(self) };
2059 let slice: &'a [u8] = *ptr;
2064 #[allow(missing_doc)]
2065 pub trait OwnedStr {
2066 fn push_str_no_overallocate(&mut self, rhs: &str);
2067 fn push_str(&mut self, rhs: &str);
2068 fn push_char(&mut self, c: char);
2069 fn pop_char(&mut self) -> char;
2070 fn shift_char(&mut self) -> char;
2071 fn unshift_char(&mut self, ch: char);
2072 fn append(&self, rhs: &str) -> ~str; // FIXME #4850: this should consume self.
2073 fn reserve(&mut self, n: uint);
2074 fn reserve_at_least(&mut self, n: uint);
2075 fn capacity(&self) -> uint;
2077 fn as_bytes_with_null_consume(self) -> ~[u8];
2080 impl OwnedStr for ~str {
2081 /// Appends a string slice to the back of a string, without overallocating
2083 fn push_str_no_overallocate(&mut self, rhs: &str) {
2085 let llen = self.len();
2086 let rlen = rhs.len();
2087 self.reserve(llen + rlen);
2088 do as_buf(*self) |lbuf, _llen| {
2089 do as_buf(rhs) |rbuf, _rlen| {
2090 let dst = ptr::offset(lbuf, llen);
2091 let dst = ::cast::transmute_mut_unsafe(dst);
2092 ptr::copy_memory(dst, rbuf, rlen);
2095 raw::set_len(self, llen + rlen);
2099 /// Appends a string slice to the back of a string
2101 fn push_str(&mut self, rhs: &str) {
2103 let llen = self.len();
2104 let rlen = rhs.len();
2105 self.reserve_at_least(llen + rlen);
2106 do as_buf(*self) |lbuf, _llen| {
2107 do as_buf(rhs) |rbuf, _rlen| {
2108 let dst = ptr::offset(lbuf, llen);
2109 let dst = ::cast::transmute_mut_unsafe(dst);
2110 ptr::copy_memory(dst, rbuf, rlen);
2113 raw::set_len(self, llen + rlen);
2116 /// Appends a character to the back of a string
2118 fn push_char(&mut self, c: char) {
2119 assert!(c as uint <= 0x10ffff); // FIXME: #7609: should be enforced on all `char`
2121 let code = c as uint;
2122 let nb = if code < MAX_ONE_B { 1u }
2123 else if code < MAX_TWO_B { 2u }
2124 else if code < MAX_THREE_B { 3u }
2126 let len = self.len();
2127 let new_len = len + nb;
2128 self.reserve_at_least(new_len);
2130 do as_buf(*self) |buf, _len| {
2131 let buf: *mut u8 = ::cast::transmute(buf);
2134 *ptr::mut_offset(buf, off) = code as u8;
2137 *ptr::mut_offset(buf, off) = (code >> 6u & 31u | TAG_TWO_B) as u8;
2138 *ptr::mut_offset(buf, off + 1u) = (code & 63u | TAG_CONT) as u8;
2141 *ptr::mut_offset(buf, off) = (code >> 12u & 15u | TAG_THREE_B) as u8;
2142 *ptr::mut_offset(buf, off + 1u) = (code >> 6u & 63u | TAG_CONT) as u8;
2143 *ptr::mut_offset(buf, off + 2u) = (code & 63u | TAG_CONT) as u8;
2146 *ptr::mut_offset(buf, off) = (code >> 18u & 7u | TAG_FOUR_B) as u8;
2147 *ptr::mut_offset(buf, off + 1u) = (code >> 12u & 63u | TAG_CONT) as u8;
2148 *ptr::mut_offset(buf, off + 2u) = (code >> 6u & 63u | TAG_CONT) as u8;
2149 *ptr::mut_offset(buf, off + 3u) = (code & 63u | TAG_CONT) as u8;
2154 raw::set_len(self, new_len);
2158 * Remove the final character from a string and return it
2162 * If the string does not contain any characters
2164 fn pop_char(&mut self) -> char {
2165 let end = self.len();
2167 let CharRange {ch, next} = self.char_range_at_reverse(end);
2168 unsafe { raw::set_len(self, next); }
2173 * Remove the first character from a string and return it
2177 * If the string does not contain any characters
2179 fn shift_char(&mut self) -> char {
2180 let CharRange {ch, next} = self.char_range_at(0u);
2181 *self = unsafe { raw::slice_bytes_owned(*self, next, self.len()) };
2185 /// Prepend a char to a string
2186 fn unshift_char(&mut self, ch: char) {
2187 // This could be more efficient.
2188 let mut new_str = ~"";
2189 new_str.push_char(ch);
2190 new_str.push_str(*self);
2194 /// Concatenate two strings together.
2196 fn append(&self, rhs: &str) -> ~str {
2197 // FIXME #4850: this should consume self, but that causes segfaults
2198 let mut v = self.clone();
2199 v.push_str_no_overallocate(rhs);
2204 * Reserves capacity for exactly `n` bytes in the given string, not including
2205 * the null terminator.
2207 * Assuming single-byte characters, the resulting string will be large
2208 * enough to hold a string of length `n`. To account for the null terminator,
2209 * the underlying buffer will have the size `n` + 1.
2211 * If the capacity for `s` is already equal to or greater than the requested
2212 * capacity, then no action is taken.
2217 * * n - The number of bytes to reserve space for
2220 pub fn reserve(&mut self, n: uint) {
2222 let v: *mut ~[u8] = cast::transmute(self);
2223 (*v).reserve(n + 1);
2228 * Reserves capacity for at least `n` bytes in the given string, not including
2229 * the null terminator.
2231 * Assuming single-byte characters, the resulting string will be large
2232 * enough to hold a string of length `n`. To account for the null terminator,
2233 * the underlying buffer will have the size `n` + 1.
2235 * This function will over-allocate in order to amortize the allocation costs
2236 * in scenarios where the caller may need to repeatedly reserve additional
2239 * If the capacity for `s` is already equal to or greater than the requested
2240 * capacity, then no action is taken.
2245 * * n - The number of bytes to reserve space for
2248 fn reserve_at_least(&mut self, n: uint) {
2249 self.reserve(uint::next_power_of_two(n + 1u) - 1u)
2253 * Returns the number of single-byte characters the string can hold without
2256 fn capacity(&self) -> uint {
2257 let buf: &~[u8] = unsafe { cast::transmute(self) };
2258 let vcap = buf.capacity();
2263 /// Convert to a vector of bytes. This does not allocate a new
2264 /// string, and includes the null terminator.
2266 fn as_bytes_with_null_consume(self) -> ~[u8] {
2267 unsafe { ::cast::transmute(self) }
2271 impl Clone for ~str {
2273 fn clone(&self) -> ~str {
2278 impl Clone for @str {
2280 fn clone(&self) -> @str {
2285 /// External iterator for a string's characters. Use with the `std::iterator`
2288 pub struct StrCharIterator<'self> {
2290 priv string: &'self str,
2293 impl<'self> Iterator<char> for StrCharIterator<'self> {
2295 fn next(&mut self) -> Option<char> {
2296 if self.index < self.string.len() {
2297 let CharRange {ch, next} = self.string.char_range_at(self.index);
2305 /// External iterator for a string's characters in reverse order. Use
2306 /// with the `std::iterator` module.
2308 pub struct StrCharRevIterator<'self> {
2310 priv string: &'self str,
2313 impl<'self> Iterator<char> for StrCharRevIterator<'self> {
2315 fn next(&mut self) -> Option<char> {
2317 let CharRange {ch, next} = self.string.char_range_at_reverse(self.index);
2326 /// External iterator for a string's bytes. Use with the `std::iterator`
2329 pub struct StrBytesIterator<'self> {
2330 priv it: vec::VecIterator<'self, u8>
2333 impl<'self> Iterator<u8> for StrBytesIterator<'self> {
2335 fn next(&mut self) -> Option<u8> {
2336 self.it.next().map_consume(|&x| x)
2340 /// External iterator for a string's bytes in reverse order. Use with
2341 /// the `std::iterator` module.
2343 pub struct StrBytesRevIterator<'self> {
2344 priv it: vec::VecRevIterator<'self, u8>
2347 impl<'self> Iterator<u8> for StrBytesRevIterator<'self> {
2349 fn next(&mut self) -> Option<u8> {
2350 self.it.next().map_consume(|&x| x)
2354 // This works because every lifetime is a sub-lifetime of 'static
2355 impl<'self> Zero for &'self str {
2356 fn zero() -> &'self str { "" }
2357 fn is_zero(&self) -> bool { self.is_empty() }
2360 impl Zero for ~str {
2361 fn zero() -> ~str { ~"" }
2362 fn is_zero(&self) -> bool { self.len() == 0 }
2365 impl Zero for @str {
2366 fn zero() -> @str { @"" }
2367 fn is_zero(&self) -> bool { self.len() == 0 }
2372 use iterator::IteratorUtil;
2373 use container::Container;
2381 use vec::{ImmutableVector, CopyableVector};
2382 use cmp::{TotalOrd, Less, Equal, Greater};
2386 assert!((eq(&~"", &~"")));
2387 assert!((eq(&~"foo", &~"foo")));
2388 assert!((!eq(&~"foo", &~"bar")));
2392 fn test_eq_slice() {
2393 assert!((eq_slice("foobar".slice(0, 3), "foo")));
2394 assert!((eq_slice("barfoo".slice(3, 6), "foo")));
2395 assert!((!eq_slice("foo1", "foo2")));
2401 assert!("" <= "foo");
2402 assert!("foo" <= "foo");
2403 assert!("foo" != "bar");
2408 assert_eq!("".len(), 0u);
2409 assert_eq!("hello world".len(), 11u);
2410 assert_eq!("\x63".len(), 1u);
2411 assert_eq!("\xa2".len(), 2u);
2412 assert_eq!("\u03c0".len(), 2u);
2413 assert_eq!("\u2620".len(), 3u);
2414 assert_eq!("\U0001d11e".len(), 4u);
2416 assert_eq!("".char_len(), 0u);
2417 assert_eq!("hello world".char_len(), 11u);
2418 assert_eq!("\x63".char_len(), 1u);
2419 assert_eq!("\xa2".char_len(), 1u);
2420 assert_eq!("\u03c0".char_len(), 1u);
2421 assert_eq!("\u2620".char_len(), 1u);
2422 assert_eq!("\U0001d11e".char_len(), 1u);
2423 assert_eq!("ประเทศไทย中华Việt Nam".char_len(), 19u);
2428 assert_eq!("hello".find('l'), Some(2u));
2429 assert_eq!("hello".find(|c:char| c == 'o'), Some(4u));
2430 assert!("hello".find('x').is_none());
2431 assert!("hello".find(|c:char| c == 'x').is_none());
2432 assert_eq!("ประเทศไทย中华Việt Nam".find('华'), Some(30u));
2433 assert_eq!("ประเทศไทย中华Việt Nam".find(|c: char| c == '华'), Some(30u));
2438 assert_eq!("hello".rfind('l'), Some(3u));
2439 assert_eq!("hello".rfind(|c:char| c == 'o'), Some(4u));
2440 assert!("hello".rfind('x').is_none());
2441 assert!("hello".rfind(|c:char| c == 'x').is_none());
2442 assert_eq!("ประเทศไทย中华Việt Nam".rfind('华'), Some(30u));
2443 assert_eq!("ประเทศไทย中华Việt Nam".rfind(|c: char| c == '华'), Some(30u));
2447 fn test_push_str() {
2450 assert_eq!(s.slice_from(0), "");
2452 assert_eq!(s.slice_from(0), "abc");
2453 s.push_str("ประเทศไทย中华Việt Nam");
2454 assert_eq!(s.slice_from(0), "abcประเทศไทย中华Việt Nam");
2460 assert_eq!(s.slice_from(0), "");
2461 s = s.append("abc");
2462 assert_eq!(s.slice_from(0), "abc");
2463 s = s.append("ประเทศไทย中华Việt Nam");
2464 assert_eq!(s.slice_from(0), "abcประเทศไทย中华Việt Nam");
2468 fn test_pop_char() {
2469 let mut data = ~"ประเทศไทย中华";
2470 let cc = data.pop_char();
2471 assert_eq!(~"ประเทศไทย中", data);
2472 assert_eq!('华', cc);
2476 fn test_pop_char_2() {
2477 let mut data2 = ~"华";
2478 let cc2 = data2.pop_char();
2479 assert_eq!(~"", data2);
2480 assert_eq!('华', cc2);
2485 #[ignore(cfg(windows))]
2486 fn test_pop_char_fail() {
2488 let _cc3 = data.pop_char();
2492 fn test_push_char() {
2493 let mut data = ~"ประเทศไทย中";
2494 data.push_char('华');
2495 data.push_char('b'); // 1 byte
2496 data.push_char('¢'); // 2 byte
2497 data.push_char('€'); // 3 byte
2498 data.push_char('𤭢'); // 4 byte
2499 assert_eq!(~"ประเทศไทย中华b¢€𤭢", data);
2503 fn test_shift_char() {
2504 let mut data = ~"ประเทศไทย中";
2505 let cc = data.shift_char();
2506 assert_eq!(~"ระเทศไทย中", data);
2507 assert_eq!('ป', cc);
2511 fn test_unshift_char() {
2512 let mut data = ~"ประเทศไทย中";
2513 data.unshift_char('华');
2514 assert_eq!(~"华ประเทศไทย中", data);
2519 let mut empty = ~"";
2521 assert_eq!("", empty.as_slice());
2522 let mut data = ~"ประเทศไทย中";
2524 assert_eq!("", data.as_slice());
2525 data.push_char('华');
2526 assert_eq!("华", data.as_slice());
2530 fn test_split_within() {
2531 fn t(s: &str, i: uint, u: &[~str]) {
2533 for each_split_within(s, i) |s| { v.push(s.to_owned()) }
2534 assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b));
2538 t("hello", 15, [~"hello"]);
2539 t("\nMary had a little lamb\nLittle lamb\n", 15,
2540 [~"Mary had a", ~"little lamb", ~"Little lamb"]);
2541 t("\nMary had a little lamb\nLittle lamb\n", uint::max_value,
2542 [~"Mary had a little lamb\nLittle lamb"]);
2546 fn test_find_str() {
2548 assert_eq!("".find_str(""), Some(0u));
2549 assert!("banana".find_str("apple pie").is_none());
2551 let data = "abcabc";
2552 assert_eq!(data.slice(0u, 6u).find_str("ab"), Some(0u));
2553 assert_eq!(data.slice(2u, 6u).find_str("ab"), Some(3u - 2u));
2554 assert!(data.slice(2u, 4u).find_str("ab").is_none());
2556 let mut data = ~"ประเทศไทย中华Việt Nam";
2558 assert!(data.find_str("ไท华").is_none());
2559 assert_eq!(data.slice(0u, 43u).find_str(""), Some(0u));
2560 assert_eq!(data.slice(6u, 43u).find_str(""), Some(6u - 6u));
2562 assert_eq!(data.slice(0u, 43u).find_str("ประ"), Some( 0u));
2563 assert_eq!(data.slice(0u, 43u).find_str("ทศไ"), Some(12u));
2564 assert_eq!(data.slice(0u, 43u).find_str("ย中"), Some(24u));
2565 assert_eq!(data.slice(0u, 43u).find_str("iệt"), Some(34u));
2566 assert_eq!(data.slice(0u, 43u).find_str("Nam"), Some(40u));
2568 assert_eq!(data.slice(43u, 86u).find_str("ประ"), Some(43u - 43u));
2569 assert_eq!(data.slice(43u, 86u).find_str("ทศไ"), Some(55u - 43u));
2570 assert_eq!(data.slice(43u, 86u).find_str("ย中"), Some(67u - 43u));
2571 assert_eq!(data.slice(43u, 86u).find_str("iệt"), Some(77u - 43u));
2572 assert_eq!(data.slice(43u, 86u).find_str("Nam"), Some(83u - 43u));
2576 fn test_slice_chars() {
2577 fn t(a: &str, b: &str, start: uint) {
2578 assert_eq!(a.slice_chars(start, start + b.char_len()), b);
2580 t("hello", "llo", 2);
2581 t("hello", "el", 1);
2582 assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".slice_chars(2, 8));
2587 fn t(v: &[~str], s: &str) {
2588 assert_eq!(v.concat(), s.to_str());
2590 t([~"you", ~"know", ~"I'm", ~"no", ~"good"], "youknowI'mnogood");
2591 let v: &[~str] = [];
2598 fn t(v: &[~str], sep: &str, s: &str) {
2599 assert_eq!(v.connect(sep), s.to_str());
2601 t([~"you", ~"know", ~"I'm", ~"no", ~"good"],
2602 " ", "you know I'm no good");
2603 let v: &[~str] = [];
2605 t([~"hi"], " ", "hi");
2609 fn test_concat_slices() {
2610 fn t(v: &[&str], s: &str) {
2611 assert_eq!(v.concat(), s.to_str());
2613 t(["you", "know", "I'm", "no", "good"], "youknowI'mnogood");
2614 let v: &[&str] = [];
2620 fn test_connect_slices() {
2621 fn t(v: &[&str], sep: &str, s: &str) {
2622 assert_eq!(v.connect(sep), s.to_str());
2624 t(["you", "know", "I'm", "no", "good"],
2625 " ", "you know I'm no good");
2627 t(["hi"], " ", "hi");
2632 assert_eq!("x".repeat(4), ~"xxxx");
2633 assert_eq!("hi".repeat(4), ~"hihihihi");
2634 assert_eq!("ไท华".repeat(3), ~"ไท华ไท华ไท华");
2635 assert_eq!("".repeat(4), ~"");
2636 assert_eq!("hi".repeat(0), ~"");
2640 fn test_unsafe_slice() {
2641 assert_eq!("ab", unsafe {raw::slice_bytes("abc", 0, 2)});
2642 assert_eq!("bc", unsafe {raw::slice_bytes("abc", 1, 3)});
2643 assert_eq!("", unsafe {raw::slice_bytes("abc", 1, 1)});
2644 fn a_million_letter_a() -> ~str {
2647 while i < 100000 { rs.push_str("aaaaaaaaaa"); i += 1; }
2650 fn half_a_million_letter_a() -> ~str {
2653 while i < 100000 { rs.push_str("aaaaa"); i += 1; }
2656 let letters = a_million_letter_a();
2657 assert!(half_a_million_letter_a() ==
2658 unsafe {raw::slice_bytes(letters, 0u, 500000)}.to_owned());
2662 fn test_starts_with() {
2663 assert!(("".starts_with("")));
2664 assert!(("abc".starts_with("")));
2665 assert!(("abc".starts_with("a")));
2666 assert!((!"a".starts_with("abc")));
2667 assert!((!"".starts_with("abc")));
2671 fn test_ends_with() {
2672 assert!(("".ends_with("")));
2673 assert!(("abc".ends_with("")));
2674 assert!(("abc".ends_with("c")));
2675 assert!((!"a".ends_with("abc")));
2676 assert!((!"".ends_with("abc")));
2680 fn test_is_empty() {
2681 assert!("".is_empty());
2682 assert!(!"a".is_empty());
2688 assert_eq!("".replace(a, "b"), ~"");
2689 assert_eq!("a".replace(a, "b"), ~"b");
2690 assert_eq!("ab".replace(a, "b"), ~"bb");
2692 assert!(" test test ".replace(test, "toast") ==
2694 assert_eq!(" test test ".replace(test, ""), ~" ");
2698 fn test_replace_2a() {
2699 let data = ~"ประเทศไทย中华";
2700 let repl = ~"دولة الكويت";
2703 let A = ~"دولة الكويتทศไทย中华";
2704 assert_eq!(data.replace(a, repl), A);
2708 fn test_replace_2b() {
2709 let data = ~"ประเทศไทย中华";
2710 let repl = ~"دولة الكويت";
2713 let B = ~"ปรدولة الكويتทศไทย中华";
2714 assert_eq!(data.replace(b, repl), B);
2718 fn test_replace_2c() {
2719 let data = ~"ประเทศไทย中华";
2720 let repl = ~"دولة الكويت";
2723 let C = ~"ประเทศไทยدولة الكويت";
2724 assert_eq!(data.replace(c, repl), C);
2728 fn test_replace_2d() {
2729 let data = ~"ประเทศไทย中华";
2730 let repl = ~"دولة الكويت";
2733 assert_eq!(data.replace(d, repl), data);
2738 assert_eq!("ab", "abc".slice(0, 2));
2739 assert_eq!("bc", "abc".slice(1, 3));
2740 assert_eq!("", "abc".slice(1, 1));
2741 assert_eq!("\u65e5", "\u65e5\u672c".slice(0, 3));
2743 let data = "ประเทศไทย中华";
2744 assert_eq!("ป", data.slice(0, 3));
2745 assert_eq!("ร", data.slice(3, 6));
2746 assert_eq!("", data.slice(3, 3));
2747 assert_eq!("华", data.slice(30, 33));
2749 fn a_million_letter_X() -> ~str {
2753 push_str(&mut rs, "华华华华华华华华华华");
2758 fn half_a_million_letter_X() -> ~str {
2761 while i < 100000 { push_str(&mut rs, "华华华华华"); i += 1; }
2764 let letters = a_million_letter_X();
2765 assert!(half_a_million_letter_X() ==
2766 letters.slice(0u, 3u * 500000u).to_owned());
2771 let ss = "中华Việt Nam";
2773 assert_eq!("华", ss.slice(3u, 6u));
2774 assert_eq!("Việt Nam", ss.slice(6u, 16u));
2776 assert_eq!("ab", "abc".slice(0u, 2u));
2777 assert_eq!("bc", "abc".slice(1u, 3u));
2778 assert_eq!("", "abc".slice(1u, 1u));
2780 assert_eq!("中", ss.slice(0u, 3u));
2781 assert_eq!("华V", ss.slice(3u, 7u));
2782 assert_eq!("", ss.slice(3u, 3u));
2797 #[ignore(cfg(windows))]
2798 fn test_slice_fail() {
2799 "中华Việt Nam".slice(0u, 2u);
2803 fn test_slice_from() {
2804 assert_eq!("abcd".slice_from(0), "abcd");
2805 assert_eq!("abcd".slice_from(2), "cd");
2806 assert_eq!("abcd".slice_from(4), "");
2809 fn test_slice_to() {
2810 assert_eq!("abcd".slice_to(0), "");
2811 assert_eq!("abcd".slice_to(2), "ab");
2812 assert_eq!("abcd".slice_to(4), "abcd");
2816 fn test_trim_left_chars() {
2817 let v: &[char] = &[];
2818 assert_eq!(" *** foo *** ".trim_left_chars(&v), " *** foo *** ");
2819 assert_eq!(" *** foo *** ".trim_left_chars(& &['*', ' ']), "foo *** ");
2820 assert_eq!(" *** *** ".trim_left_chars(& &['*', ' ']), "");
2821 assert_eq!("foo *** ".trim_left_chars(& &['*', ' ']), "foo *** ");
2823 assert_eq!("11foo1bar11".trim_left_chars(&'1'), "foo1bar11");
2824 assert_eq!("12foo1bar12".trim_left_chars(& &['1', '2']), "foo1bar12");
2825 assert_eq!("123foo1bar123".trim_left_chars(&|c: char| c.is_digit()), "foo1bar123");
2829 fn test_trim_right_chars() {
2830 let v: &[char] = &[];
2831 assert_eq!(" *** foo *** ".trim_right_chars(&v), " *** foo *** ");
2832 assert_eq!(" *** foo *** ".trim_right_chars(& &['*', ' ']), " *** foo");
2833 assert_eq!(" *** *** ".trim_right_chars(& &['*', ' ']), "");
2834 assert_eq!(" *** foo".trim_right_chars(& &['*', ' ']), " *** foo");
2836 assert_eq!("11foo1bar11".trim_right_chars(&'1'), "11foo1bar");
2837 assert_eq!("12foo1bar12".trim_right_chars(& &['1', '2']), "12foo1bar");
2838 assert_eq!("123foo1bar123".trim_right_chars(&|c: char| c.is_digit()), "123foo1bar");
2842 fn test_trim_chars() {
2843 let v: &[char] = &[];
2844 assert_eq!(" *** foo *** ".trim_chars(&v), " *** foo *** ");
2845 assert_eq!(" *** foo *** ".trim_chars(& &['*', ' ']), "foo");
2846 assert_eq!(" *** *** ".trim_chars(& &['*', ' ']), "");
2847 assert_eq!("foo".trim_chars(& &['*', ' ']), "foo");
2849 assert_eq!("11foo1bar11".trim_chars(&'1'), "foo1bar");
2850 assert_eq!("12foo1bar12".trim_chars(& &['1', '2']), "foo1bar");
2851 assert_eq!("123foo1bar123".trim_chars(&|c: char| c.is_digit()), "foo1bar");
2855 fn test_trim_left() {
2856 assert_eq!("".trim_left(), "");
2857 assert_eq!("a".trim_left(), "a");
2858 assert_eq!(" ".trim_left(), "");
2859 assert_eq!(" blah".trim_left(), "blah");
2860 assert_eq!(" \u3000 wut".trim_left(), "wut");
2861 assert_eq!("hey ".trim_left(), "hey ");
2865 fn test_trim_right() {
2866 assert_eq!("".trim_right(), "");
2867 assert_eq!("a".trim_right(), "a");
2868 assert_eq!(" ".trim_right(), "");
2869 assert_eq!("blah ".trim_right(), "blah");
2870 assert_eq!("wut \u3000 ".trim_right(), "wut");
2871 assert_eq!(" hey".trim_right(), " hey");
2876 assert_eq!("".trim(), "");
2877 assert_eq!("a".trim(), "a");
2878 assert_eq!(" ".trim(), "");
2879 assert_eq!(" blah ".trim(), "blah");
2880 assert_eq!("\nwut \u3000 ".trim(), "wut");
2881 assert_eq!(" hey dude ".trim(), "hey dude");
2885 fn test_is_whitespace() {
2886 assert!("".is_whitespace());
2887 assert!(" ".is_whitespace());
2888 assert!("\u2009".is_whitespace()); // Thin space
2889 assert!(" \n\t ".is_whitespace());
2890 assert!(!" _ ".is_whitespace());
2894 fn test_shift_byte() {
2896 let b = unsafe{raw::shift_byte(&mut s)};
2897 assert_eq!(s, ~"BC");
2898 assert_eq!(b, 65u8);
2902 fn test_pop_byte() {
2904 let b = unsafe{raw::pop_byte(&mut s)};
2905 assert_eq!(s, ~"AB");
2906 assert_eq!(b, 67u8);
2910 fn test_unsafe_from_bytes() {
2911 let a = ~[65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8];
2912 let b = unsafe { raw::from_bytes(a) };
2913 assert_eq!(b, ~"AAAAAAA");
2917 fn test_from_bytes() {
2918 let ss = ~"ศไทย中华Việt Nam";
2919 let bb = ~[0xe0_u8, 0xb8_u8, 0xa8_u8,
2920 0xe0_u8, 0xb9_u8, 0x84_u8,
2921 0xe0_u8, 0xb8_u8, 0x97_u8,
2922 0xe0_u8, 0xb8_u8, 0xa2_u8,
2923 0xe4_u8, 0xb8_u8, 0xad_u8,
2924 0xe5_u8, 0x8d_u8, 0x8e_u8,
2925 0x56_u8, 0x69_u8, 0xe1_u8,
2926 0xbb_u8, 0x87_u8, 0x74_u8,
2927 0x20_u8, 0x4e_u8, 0x61_u8,
2930 assert_eq!(ss, from_bytes(bb));
2934 #[ignore(cfg(windows))]
2935 fn test_from_bytes_fail() {
2936 use str::not_utf8::cond;
2938 let bb = ~[0xff_u8, 0xb8_u8, 0xa8_u8,
2939 0xe0_u8, 0xb9_u8, 0x84_u8,
2940 0xe0_u8, 0xb8_u8, 0x97_u8,
2941 0xe0_u8, 0xb8_u8, 0xa2_u8,
2942 0xe4_u8, 0xb8_u8, 0xad_u8,
2943 0xe5_u8, 0x8d_u8, 0x8e_u8,
2944 0x56_u8, 0x69_u8, 0xe1_u8,
2945 0xbb_u8, 0x87_u8, 0x74_u8,
2946 0x20_u8, 0x4e_u8, 0x61_u8,
2949 let mut error_happened = false;
2950 let _x = do cond.trap(|err| {
2951 assert_eq!(err, ~"from_bytes: input is not UTF-8; first bad byte is 255");
2952 error_happened = true;
2957 assert!(error_happened);
2961 fn test_unsafe_from_bytes_with_null() {
2962 let a = [65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8];
2963 let b = unsafe { raw::from_bytes_with_null(a) };
2964 assert_eq!(b, "AAAAAAA");
2968 fn test_from_bytes_with_null() {
2969 let ss = "ศไทย中华Việt Nam";
2970 let bb = [0xe0_u8, 0xb8_u8, 0xa8_u8,
2971 0xe0_u8, 0xb9_u8, 0x84_u8,
2972 0xe0_u8, 0xb8_u8, 0x97_u8,
2973 0xe0_u8, 0xb8_u8, 0xa2_u8,
2974 0xe4_u8, 0xb8_u8, 0xad_u8,
2975 0xe5_u8, 0x8d_u8, 0x8e_u8,
2976 0x56_u8, 0x69_u8, 0xe1_u8,
2977 0xbb_u8, 0x87_u8, 0x74_u8,
2978 0x20_u8, 0x4e_u8, 0x61_u8,
2981 assert_eq!(ss, from_bytes_with_null(bb));
2986 #[ignore(cfg(windows))]
2987 fn test_from_bytes_with_null_fail() {
2988 let bb = [0xff_u8, 0xb8_u8, 0xa8_u8,
2989 0xe0_u8, 0xb9_u8, 0x84_u8,
2990 0xe0_u8, 0xb8_u8, 0x97_u8,
2991 0xe0_u8, 0xb8_u8, 0xa2_u8,
2992 0xe4_u8, 0xb8_u8, 0xad_u8,
2993 0xe5_u8, 0x8d_u8, 0x8e_u8,
2994 0x56_u8, 0x69_u8, 0xe1_u8,
2995 0xbb_u8, 0x87_u8, 0x74_u8,
2996 0x20_u8, 0x4e_u8, 0x61_u8,
2999 let _x = from_bytes_with_null(bb);
3004 #[ignore(cfg(windows))]
3005 fn test_from_bytes_with_null_fail_2() {
3006 let bb = [0xff_u8, 0xb8_u8, 0xa8_u8,
3007 0xe0_u8, 0xb9_u8, 0x84_u8,
3008 0xe0_u8, 0xb8_u8, 0x97_u8,
3009 0xe0_u8, 0xb8_u8, 0xa2_u8,
3010 0xe4_u8, 0xb8_u8, 0xad_u8,
3011 0xe5_u8, 0x8d_u8, 0x8e_u8,
3012 0x56_u8, 0x69_u8, 0xe1_u8,
3013 0xbb_u8, 0x87_u8, 0x74_u8,
3014 0x20_u8, 0x4e_u8, 0x61_u8,
3017 let _x = from_bytes_with_null(bb);
3021 fn test_from_buf() {
3023 let a = ~[65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8];
3024 let b = vec::raw::to_ptr(a);
3025 let c = raw::from_buf(b);
3026 assert_eq!(c, ~"AAAAAAA");
3031 fn test_as_bytes() {
3034 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
3035 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
3038 assert_eq!("".as_bytes(), &[]);
3039 assert_eq!("abc".as_bytes(), &['a' as u8, 'b' as u8, 'c' as u8]);
3040 assert_eq!("ศไทย中华Việt Nam".as_bytes(), v);
3044 fn test_as_bytes_with_null() {
3047 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
3048 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
3054 let s3 = @"ศไทย中华Việt Nam";
3055 assert_eq!(s1.as_bytes_with_null(), &[0]);
3056 assert_eq!(s2.as_bytes_with_null(), &['a' as u8, 'b' as u8, 'c' as u8, 0]);
3057 assert_eq!(s3.as_bytes_with_null(), v);
3061 let s3 = ~"ศไทย中华Việt Nam";
3062 assert_eq!(s1.as_bytes_with_null(), &[0]);
3063 assert_eq!(s2.as_bytes_with_null(), &['a' as u8, 'b' as u8, 'c' as u8, 0]);
3064 assert_eq!(s3.as_bytes_with_null(), v);
3068 fn test_as_bytes_with_null_consume() {
3069 let s = ~"ศไทย中华Việt Nam";
3071 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
3072 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
3075 assert_eq!((~"").as_bytes_with_null_consume(), ~[0]);
3076 assert_eq!((~"abc").as_bytes_with_null_consume(),
3077 ~['a' as u8, 'b' as u8, 'c' as u8, 0]);
3078 assert_eq!(s.as_bytes_with_null_consume(), v);
3082 #[ignore(cfg(windows))]
3084 fn test_as_bytes_fail() {
3085 // Don't double free. (I'm not sure if this exercises the
3086 // original problem code path anymore.)
3088 let _bytes = s.as_bytes_with_null();
3095 let b = as_buf(a, |buf, _l| {
3096 assert_eq!(unsafe { *buf }, 65u8);
3103 fn test_as_buf_small() {
3105 let b = as_buf(a, |buf, _l| {
3106 assert_eq!(unsafe { *buf }, 65u8);
3116 let sb = as_buf(s, |b, _l| b);
3117 let s_cstr = raw::from_buf(sb);
3118 assert_eq!(s_cstr, s);
3123 fn test_as_buf_3() {
3125 do as_buf(a) |buf, len| {
3127 assert_eq!(a[0], 'h' as u8);
3128 assert_eq!(*buf, 'h' as u8);
3129 assert_eq!(len, 6u);
3130 assert_eq!(*ptr::offset(buf,4u), 'o' as u8);
3131 assert_eq!(*ptr::offset(buf,5u), 0u8);
3137 fn test_subslice_offset() {
3138 let a = "kernelsprite";
3139 let b = a.slice(7, a.len());
3140 let c = a.slice(0, a.len() - 6);
3141 assert_eq!(a.subslice_offset(b), 7);
3142 assert_eq!(a.subslice_offset(c), 0);
3144 let string = "a\nb\nc";
3145 let mut lines = ~[];
3146 for string.line_iter().advance |line| { lines.push(line) }
3147 assert_eq!(string.subslice_offset(lines[0]), 0);
3148 assert_eq!(string.subslice_offset(lines[1]), 2);
3149 assert_eq!(string.subslice_offset(lines[2]), 4);
3154 fn test_subslice_offset_2() {
3155 let a = "alchemiter";
3156 let b = "cruxtruder";
3157 a.subslice_offset(b);
3161 fn vec_str_conversions() {
3162 let s1: ~str = ~"All mimsy were the borogoves";
3164 let v: ~[u8] = s1.as_bytes().to_owned();
3165 let s2: ~str = from_bytes(v);
3166 let mut i: uint = 0u;
3167 let n1: uint = s1.len();
3168 let n2: uint = v.len();
3181 fn test_contains() {
3182 assert!("abcde".contains("bcd"));
3183 assert!("abcde".contains("abcd"));
3184 assert!("abcde".contains("bcde"));
3185 assert!("abcde".contains(""));
3186 assert!("".contains(""));
3187 assert!(!"abcde".contains("def"));
3188 assert!(!"".contains("a"));
3190 let data = ~"ประเทศไทย中华Việt Nam";
3191 assert!(data.contains("ประเ"));
3192 assert!(data.contains("ะเ"));
3193 assert!(data.contains("中华"));
3194 assert!(!data.contains("ไท华"));
3198 fn test_contains_char() {
3199 assert!("abc".contains_char('b'));
3200 assert!("a".contains_char('a'));
3201 assert!(!"abc".contains_char('d'));
3202 assert!(!"".contains_char('a'));
3207 assert_eq!(~"", "".map_chars(|c| unsafe {libc::toupper(c as c_char)} as char));
3208 assert_eq!(~"YMCA", "ymca".map_chars(|c| unsafe {libc::toupper(c as c_char)} as char));
3215 ~[0xd800_u16, 0xdf45_u16, 0xd800_u16, 0xdf3f_u16,
3216 0xd800_u16, 0xdf3b_u16, 0xd800_u16, 0xdf46_u16,
3217 0xd800_u16, 0xdf39_u16, 0xd800_u16, 0xdf3b_u16,
3218 0xd800_u16, 0xdf30_u16, 0x000a_u16]),
3221 ~[0xd801_u16, 0xdc12_u16, 0xd801_u16,
3222 0xdc49_u16, 0xd801_u16, 0xdc2e_u16, 0xd801_u16,
3223 0xdc40_u16, 0xd801_u16, 0xdc32_u16, 0xd801_u16,
3224 0xdc4b_u16, 0x0020_u16, 0xd801_u16, 0xdc0f_u16,
3225 0xd801_u16, 0xdc32_u16, 0xd801_u16, 0xdc4d_u16,
3228 (~"𐌀𐌖𐌋𐌄𐌑𐌉·𐌌𐌄𐌕𐌄𐌋𐌉𐌑\n",
3229 ~[0xd800_u16, 0xdf00_u16, 0xd800_u16, 0xdf16_u16,
3230 0xd800_u16, 0xdf0b_u16, 0xd800_u16, 0xdf04_u16,
3231 0xd800_u16, 0xdf11_u16, 0xd800_u16, 0xdf09_u16,
3232 0x00b7_u16, 0xd800_u16, 0xdf0c_u16, 0xd800_u16,
3233 0xdf04_u16, 0xd800_u16, 0xdf15_u16, 0xd800_u16,
3234 0xdf04_u16, 0xd800_u16, 0xdf0b_u16, 0xd800_u16,
3235 0xdf09_u16, 0xd800_u16, 0xdf11_u16, 0x000a_u16 ]),
3237 (~"𐒋𐒘𐒈𐒑𐒛𐒒 𐒕𐒓 𐒈𐒚𐒍 𐒏𐒜𐒒𐒖𐒆 𐒕𐒆\n",
3238 ~[0xd801_u16, 0xdc8b_u16, 0xd801_u16, 0xdc98_u16,
3239 0xd801_u16, 0xdc88_u16, 0xd801_u16, 0xdc91_u16,
3240 0xd801_u16, 0xdc9b_u16, 0xd801_u16, 0xdc92_u16,
3241 0x0020_u16, 0xd801_u16, 0xdc95_u16, 0xd801_u16,
3242 0xdc93_u16, 0x0020_u16, 0xd801_u16, 0xdc88_u16,
3243 0xd801_u16, 0xdc9a_u16, 0xd801_u16, 0xdc8d_u16,
3244 0x0020_u16, 0xd801_u16, 0xdc8f_u16, 0xd801_u16,
3245 0xdc9c_u16, 0xd801_u16, 0xdc92_u16, 0xd801_u16,
3246 0xdc96_u16, 0xd801_u16, 0xdc86_u16, 0x0020_u16,
3247 0xd801_u16, 0xdc95_u16, 0xd801_u16, 0xdc86_u16,
3250 for pairs.iter().advance |p| {
3251 let (s, u) = (*p).clone();
3252 assert!(s.to_utf16() == u);
3253 assert!(from_utf16(u) == s);
3254 assert!(from_utf16(s.to_utf16()) == s);
3255 assert!(from_utf16(u).to_utf16() == u);
3261 let s = ~"ศไทย中华Việt Nam";
3262 let v = ~['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
3264 for v.iter().advance |ch| {
3265 assert!(s.char_at(pos) == *ch);
3266 pos += from_char(*ch).len();
3271 fn test_char_at_reverse() {
3272 let s = ~"ศไทย中华Việt Nam";
3273 let v = ~['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
3274 let mut pos = s.len();
3275 for v.rev_iter().advance |ch| {
3276 assert!(s.char_at_reverse(pos) == *ch);
3277 pos -= from_char(*ch).len();
3282 fn test_escape_unicode() {
3283 assert_eq!("abc".escape_unicode(), ~"\\x61\\x62\\x63");
3284 assert_eq!("a c".escape_unicode(), ~"\\x61\\x20\\x63");
3285 assert_eq!("\r\n\t".escape_unicode(), ~"\\x0d\\x0a\\x09");
3286 assert_eq!("'\"\\".escape_unicode(), ~"\\x27\\x22\\x5c");
3287 assert_eq!("\x00\x01\xfe\xff".escape_unicode(), ~"\\x00\\x01\\xfe\\xff");
3288 assert_eq!("\u0100\uffff".escape_unicode(), ~"\\u0100\\uffff");
3289 assert_eq!("\U00010000\U0010ffff".escape_unicode(), ~"\\U00010000\\U0010ffff");
3290 assert_eq!("ab\ufb00".escape_unicode(), ~"\\x61\\x62\\ufb00");
3291 assert_eq!("\U0001d4ea\r".escape_unicode(), ~"\\U0001d4ea\\x0d");
3295 fn test_escape_default() {
3296 assert_eq!("abc".escape_default(), ~"abc");
3297 assert_eq!("a c".escape_default(), ~"a c");
3298 assert_eq!("\r\n\t".escape_default(), ~"\\r\\n\\t");
3299 assert_eq!("'\"\\".escape_default(), ~"\\'\\\"\\\\");
3300 assert_eq!("\u0100\uffff".escape_default(), ~"\\u0100\\uffff");
3301 assert_eq!("\U00010000\U0010ffff".escape_default(), ~"\\U00010000\\U0010ffff");
3302 assert_eq!("ab\ufb00".escape_default(), ~"ab\\ufb00");
3303 assert_eq!("\U0001d4ea\r".escape_default(), ~"\\U0001d4ea\\r");
3307 fn test_to_managed() {
3308 assert_eq!("abc".to_managed(), @"abc");
3309 assert_eq!("abcdef".slice(1, 5).to_managed(), @"bcde");
3313 fn test_total_ord() {
3314 "1234".cmp(& &"123") == Greater;
3315 "123".cmp(& &"1234") == Less;
3316 "1234".cmp(& &"1234") == Equal;
3317 "12345555".cmp(& &"123456") == Less;
3318 "22".cmp(& &"1234") == Greater;
3322 fn test_char_range_at() {
3323 let data = ~"b¢€𤭢𤭢€¢b";
3324 assert_eq!('b', data.char_range_at(0).ch);
3325 assert_eq!('¢', data.char_range_at(1).ch);
3326 assert_eq!('€', data.char_range_at(3).ch);
3327 assert_eq!('𤭢', data.char_range_at(6).ch);
3328 assert_eq!('𤭢', data.char_range_at(10).ch);
3329 assert_eq!('€', data.char_range_at(14).ch);
3330 assert_eq!('¢', data.char_range_at(17).ch);
3331 assert_eq!('b', data.char_range_at(19).ch);
3335 fn test_char_range_at_reverse_underflow() {
3336 assert_eq!("abc".char_range_at_reverse(0).next, 0);
3341 #[allow(unnecessary_allocation)];
3343 ($s1:expr, $s2:expr, $e:expr) => {
3344 assert_eq!($s1 + $s2, $e);
3345 assert_eq!($s1.to_owned() + $s2, $e);
3346 assert_eq!($s1.to_managed() + $s2, $e);
3350 t!("foo", "bar", ~"foobar");
3351 t!("foo", @"bar", ~"foobar");
3352 t!("foo", ~"bar", ~"foobar");
3353 t!("ศไทย中", "华Việt Nam", ~"ศไทย中华Việt Nam");
3354 t!("ศไทย中", @"华Việt Nam", ~"ศไทย中华Việt Nam");
3355 t!("ศไทย中", ~"华Việt Nam", ~"ศไทย中华Việt Nam");
3359 fn test_iterator() {
3361 let s = ~"ศไทย中华Việt Nam";
3362 let v = ~['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
3365 let mut it = s.iter();
3367 for it.advance |c| {
3368 assert_eq!(c, v[pos]);
3371 assert_eq!(pos, v.len());
3375 fn test_rev_iterator() {
3377 let s = ~"ศไทย中华Việt Nam";
3378 let v = ~['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
3381 let mut it = s.rev_iter();
3383 for it.advance |c| {
3384 assert_eq!(c, v[pos]);
3387 assert_eq!(pos, v.len());
3391 fn test_bytes_iterator() {
3392 let s = ~"ศไทย中华Việt Nam";
3394 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
3395 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
3400 for s.bytes_iter().advance |b| {
3401 assert_eq!(b, v[pos]);
3407 fn test_bytes_rev_iterator() {
3408 let s = ~"ศไทย中华Việt Nam";
3410 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
3411 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
3414 let mut pos = v.len();
3416 for s.bytes_rev_iter().advance |b| {
3418 assert_eq!(b, v[pos]);
3423 fn test_split_char_iterator() {
3424 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
3426 let split: ~[&str] = data.split_iter(' ').collect();
3427 assert_eq!(split, ~["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
3429 let split: ~[&str] = data.split_iter(|c: char| c == ' ').collect();
3430 assert_eq!(split, ~["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
3433 let split: ~[&str] = data.split_iter('ä').collect();
3434 assert_eq!(split, ~["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
3436 let split: ~[&str] = data.split_iter(|c: char| c == 'ä').collect();
3437 assert_eq!(split, ~["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
3440 fn test_splitn_char_iterator() {
3441 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
3443 let split: ~[&str] = data.splitn_iter(' ', 3).collect();
3444 assert_eq!(split, ~["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
3446 let split: ~[&str] = data.splitn_iter(|c: char| c == ' ', 3).collect();
3447 assert_eq!(split, ~["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
3450 let split: ~[&str] = data.splitn_iter('ä', 3).collect();
3451 assert_eq!(split, ~["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
3453 let split: ~[&str] = data.splitn_iter(|c: char| c == 'ä', 3).collect();
3454 assert_eq!(split, ~["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
3458 fn test_split_char_iterator_no_trailing() {
3459 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
3461 let split: ~[&str] = data.split_options_iter('\n', 1000, true).collect();
3462 assert_eq!(split, ~["", "Märy häd ä little lämb", "Little lämb", ""]);
3464 let split: ~[&str] = data.split_options_iter('\n', 1000, false).collect();
3465 assert_eq!(split, ~["", "Märy häd ä little lämb", "Little lämb"]);
3469 fn test_word_iter() {
3470 let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n";
3471 let words: ~[&str] = data.word_iter().collect();
3472 assert_eq!(words, ~["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
3476 fn test_line_iter() {
3477 let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
3478 let lines: ~[&str] = data.line_iter().collect();
3479 assert_eq!(lines, ~["", "Märy häd ä little lämb", "", "Little lämb"]);
3481 let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
3482 let lines: ~[&str] = data.line_iter().collect();
3483 assert_eq!(lines, ~["", "Märy häd ä little lämb", "", "Little lämb"]);
3487 fn test_split_str_iterator() {
3488 fn t<'a>(s: &str, sep: &'a str, u: ~[&str]) {
3489 let v: ~[&str] = s.split_str_iter(sep).collect();
3492 t("--1233345--", "12345", ~["--1233345--"]);
3493 t("abc::hello::there", "::", ~["abc", "hello", "there"]);
3494 t("::hello::there", "::", ~["", "hello", "there"]);
3495 t("hello::there::", "::", ~["hello", "there", ""]);
3496 t("::hello::there::", "::", ~["", "hello", "there", ""]);
3497 t("ประเทศไทย中华Việt Nam", "中华", ~["ประเทศไทย", "Việt Nam"]);
3498 t("zzXXXzzYYYzz", "zz", ~["", "XXX", "YYY", ""]);
3499 t("zzXXXzYYYz", "XXX", ~["zz", "zYYYz"]);
3500 t(".XXX.YYY.", ".", ~["", "XXX", "YYY", ""]);
3502 t("zz", "zz", ~["",""]);
3503 t("ok", "z", ~["ok"]);
3504 t("zzz", "zz", ~["","z"]);
3505 t("zzzzz", "zz", ~["","","z"]);
3509 fn test_str_zero() {
3511 fn t<S: Zero + Str>() {
3512 let s: S = Zero::zero();
3513 assert_eq!(s.as_slice(), "");
3514 assert!(s.is_zero());
3523 fn test_str_container() {
3524 fn sum_len<S: Container>(v: &[S]) -> uint {
3525 v.iter().transform(|x| x.len()).sum()
3529 assert_eq!(5, sum_len(["012", "", "34"]));
3530 assert_eq!(5, sum_len([@"01", @"2", @"34", @""]));
3531 assert_eq!(5, sum_len([~"01", ~"2", ~"34", ~""]));
3532 assert_eq!(5, sum_len([s.as_slice()]));
3538 use extra::test::BenchHarness;
3542 fn is_utf8_100_ascii(bh: &mut BenchHarness) {
3544 let s = bytes!("Hello there, the quick brown fox jumped over the lazy dog! \
3545 Lorem ipsum dolor sit amet, consectetur. ");
3547 assert_eq!(100, s.len());
3554 fn is_utf8_100_multibyte(bh: &mut BenchHarness) {
3555 let s = bytes!("𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰");
3556 assert_eq!(100, s.len());
3563 fn map_chars_100_ascii(bh: &mut BenchHarness) {
3564 let s = "HelloHelloHelloHelloHelloHelloHelloHelloHelloHello\
3565 HelloHelloHelloHelloHelloHelloHelloHelloHelloHello";
3567 s.map_chars(|c| ((c as uint) + 1) as char);
3572 fn map_chars_100_multibytes(bh: &mut BenchHarness) {
3573 let s = "𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑\
3574 𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑";
3576 s.map_chars(|c| ((c as uint) + 1) as char);