1 // Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
14 * Strings are a packed UTF-8 representation of text, stored as null
15 * terminated buffers of u8 bytes. Strings should be indexed in bytes,
16 * for efficiency, but UTF-8 unsafe operations should be avoided.
24 use container::{Container, Mutable};
26 use iterator::{Iterator, IteratorUtil, FilterIterator, AdditiveIterator, MapIterator};
29 use option::{None, Option, Some};
35 use vec::{OwnedVector, OwnedCopyableVector, ImmutableVector, MutableVector};
41 not_utf8: (~str) -> ~str;
45 Section: Creating a string
49 * Convert a vector of bytes to a new UTF-8 string
53 * Raises the `not_utf8` condition if invalid UTF-8
55 pub fn from_bytes(vv: &[u8]) -> ~str {
56 use str::not_utf8::cond;
59 let first_bad_byte = *vv.iter().find_(|&b| !is_utf8([*b])).get();
60 cond.raise(fmt!("from_bytes: input is not UTF-8; first bad byte is %u",
61 first_bad_byte as uint))
64 return unsafe { raw::from_bytes(vv) }
69 * Consumes a vector of bytes to create a new utf-8 string
73 * Raises the `not_utf8` condition if invalid UTF-8
75 pub fn from_bytes_owned(vv: ~[u8]) -> ~str {
76 use str::not_utf8::cond;
79 let first_bad_byte = *vv.iter().find_(|&b| !is_utf8([*b])).get();
80 cond.raise(fmt!("from_bytes: input is not UTF-8; first bad byte is %u",
81 first_bad_byte as uint))
83 return unsafe { raw::from_bytes_owned(vv) }
88 * Convert a vector of bytes to a UTF-8 string.
89 * The vector needs to be one byte longer than the string, and end with a 0 byte.
91 * Compared to `from_bytes()`, this fn doesn't need to allocate a new owned str.
95 * Fails if invalid UTF-8
96 * Fails if not null terminated
98 pub fn from_bytes_with_null<'a>(vv: &'a [u8]) -> &'a str {
99 assert_eq!(vv[vv.len() - 1], 0);
100 assert!(is_utf8(vv));
101 return unsafe { raw::from_bytes_with_null(vv) };
105 * Converts a vector to a string slice without performing any allocations.
107 * Once the slice has been validated as utf-8, it is transmuted in-place and
108 * returned as a '&str' instead of a '&[u8]'
112 * Fails if invalid UTF-8
114 pub fn from_bytes_slice<'a>(vector: &'a [u8]) -> &'a str {
116 assert!(is_utf8(vector));
117 let (ptr, len): (*u8, uint) = ::cast::transmute(vector);
118 let string: &'a str = ::cast::transmute((ptr, len + 1));
123 impl ToStr for ~str {
125 fn to_str(&self) -> ~str { self.to_owned() }
127 impl<'self> ToStr for &'self str {
129 fn to_str(&self) -> ~str { self.to_owned() }
131 impl ToStr for @str {
133 fn to_str(&self) -> ~str { self.to_owned() }
137 * Convert a byte to a UTF-8 string
141 * Fails if invalid UTF-8
143 pub fn from_byte(b: u8) -> ~str {
145 unsafe { ::cast::transmute(~[b, 0u8]) }
148 /// Convert a char to a string
149 pub fn from_char(ch: char) -> ~str {
155 /// Convert a vector of chars to a string
156 pub fn from_chars(chs: &[char]) -> ~str {
158 buf.reserve(chs.len());
159 for chs.iter().advance |ch| {
166 pub fn push_str(lhs: &mut ~str, rhs: &str) {
170 #[allow(missing_doc)]
171 pub trait StrVector {
172 pub fn concat(&self) -> ~str;
173 pub fn connect(&self, sep: &str) -> ~str;
176 impl<'self, S: Str> StrVector for &'self [S] {
177 /// Concatenate a vector of strings.
178 pub fn concat(&self) -> ~str {
179 if self.is_empty() { return ~""; }
181 let len = self.iter().transform(|s| s.as_slice().len()).sum();
183 let mut s = with_capacity(len);
186 do s.as_mut_buf |buf, _| {
188 for self.iter().advance |ss| {
189 do ss.as_slice().as_imm_buf |ssbuf, sslen| {
190 let sslen = sslen - 1;
191 ptr::copy_memory(buf, ssbuf, sslen);
192 buf = buf.offset(sslen);
196 raw::set_len(&mut s, len);
201 /// Concatenate a vector of strings, placing a given separator between each.
202 pub fn connect(&self, sep: &str) -> ~str {
203 if self.is_empty() { return ~""; }
206 if sep.is_empty() { return self.concat(); }
208 // this is wrong without the guarantee that `self` is non-empty
209 let len = sep.len() * (self.len() - 1)
210 + self.iter().transform(|s| s.as_slice().len()).sum();
212 let mut first = true;
217 do s.as_mut_buf |buf, _| {
218 do sep.as_imm_buf |sepbuf, seplen| {
219 let seplen = seplen - 1;
220 let mut buf = ::cast::transmute_mut_unsafe(buf);
221 for self.iter().advance |ss| {
222 do ss.as_slice().as_imm_buf |ssbuf, sslen| {
223 let sslen = sslen - 1;
227 ptr::copy_memory(buf, sepbuf, seplen);
228 buf = buf.offset(seplen);
230 ptr::copy_memory(buf, ssbuf, sslen);
231 buf = buf.offset(sslen);
236 raw::set_len(&mut s, len);
242 /// Something that can be used to compare against a character
244 /// Determine if the splitter should split at the given character
245 fn matches(&self, char) -> bool;
246 /// Indicate if this is only concerned about ASCII characters,
247 /// which can allow for a faster implementation.
248 fn only_ascii(&self) -> bool;
250 impl CharEq for char {
252 fn matches(&self, c: char) -> bool { *self == c }
254 fn only_ascii(&self) -> bool { (*self as uint) < 128 }
256 impl<'self> CharEq for &'self fn(char) -> bool {
258 fn matches(&self, c: char) -> bool { (*self)(c) }
260 fn only_ascii(&self) -> bool { false }
262 impl CharEq for extern "Rust" fn(char) -> bool {
264 fn matches(&self, c: char) -> bool { (*self)(c) }
266 fn only_ascii(&self) -> bool { false }
269 impl<'self, C: CharEq> CharEq for &'self [C] {
271 fn matches(&self, c: char) -> bool {
272 self.iter().any(|m| m.matches(c))
275 fn only_ascii(&self) -> bool {
276 self.iter().all(|m| m.only_ascii())
281 /// An iterator over the substrings of a string, separated by `sep`.
283 pub struct StrCharSplitIterator<'self,Sep> {
284 priv string: &'self str,
287 /// The number of splits remaining
289 /// Whether an empty string at the end is allowed
290 priv allow_trailing_empty: bool,
292 priv only_ascii: bool
295 /// An iterator over the words of a string, separated by an sequence of whitespace
296 pub type WordIterator<'self> =
297 FilterIterator<'self, &'self str,
298 StrCharSplitIterator<'self, extern "Rust" fn(char) -> bool>>;
300 /// An iterator over the lines of a string, separated by either `\n` or (`\r\n`).
301 pub type AnyLineIterator<'self> =
302 MapIterator<'self, &'self str, &'self str, StrCharSplitIterator<'self, char>>;
304 impl<'self, Sep: CharEq> Iterator<&'self str> for StrCharSplitIterator<'self, Sep> {
306 fn next(&mut self) -> Option<&'self str> {
307 if self.finished { return None }
309 let l = self.string.len();
310 let start = self.position;
313 // this gives a *huge* speed up for splitting on ASCII
314 // characters (e.g. '\n' or ' ')
315 while self.position < l && self.count > 0 {
316 let byte = self.string[self.position];
318 if self.sep.matches(byte as char) {
319 let slice = unsafe { raw::slice_bytes(self.string, start, self.position) };
327 while self.position < l && self.count > 0 {
328 let CharRange {ch, next} = self.string.char_range_at(self.position);
330 if self.sep.matches(ch) {
331 let slice = unsafe { raw::slice_bytes(self.string, start, self.position) };
332 self.position = next;
336 self.position = next;
339 self.finished = true;
340 if self.allow_trailing_empty || start < l {
341 Some(unsafe { raw::slice_bytes(self.string, start, l) })
348 /// An iterator over the start and end indicies of the matches of a
349 /// substring within a larger string
351 pub struct StrMatchesIndexIterator<'self> {
352 priv haystack: &'self str,
353 priv needle: &'self str,
357 /// An iterator over the substrings of a string separated by a given
360 pub struct StrStrSplitIterator<'self> {
361 priv it: StrMatchesIndexIterator<'self>,
366 impl<'self> Iterator<(uint, uint)> for StrMatchesIndexIterator<'self> {
368 fn next(&mut self) -> Option<(uint, uint)> {
369 // See Issue #1932 for why this is a naive search
370 let (h_len, n_len) = (self.haystack.len(), self.needle.len());
371 let mut match_start = 0;
374 while self.position < h_len {
375 if self.haystack[self.position] == self.needle[match_i] {
376 if match_i == 0 { match_start = self.position; }
380 if match_i == n_len {
382 return Some((match_start, self.position));
385 // failed match, backtrack
388 self.position = match_start;
397 impl<'self> Iterator<&'self str> for StrStrSplitIterator<'self> {
399 fn next(&mut self) -> Option<&'self str> {
400 if self.finished { return None; }
402 match self.it.next() {
403 Some((from, to)) => {
404 let ret = Some(self.it.haystack.slice(self.last_end, from));
409 self.finished = true;
410 Some(self.it.haystack.slice(self.last_end, self.it.haystack.len()))
416 /** Splits a string into substrings with possibly internal whitespace,
417 * each of them at most `lim` bytes long. The substrings have leading and trailing
418 * whitespace removed, and are only cut at whitespace boundaries.
422 * Fails during iteration if the string contains a non-whitespace
423 * sequence longer than the limit.
425 pub fn each_split_within<'a>(ss: &'a str,
427 it: &fn(&'a str) -> bool) -> bool {
428 // Just for fun, let's write this as an state machine:
430 enum SplitWithinState {
431 A, // leading whitespace, initial state
433 C, // internal and trailing whitespace
436 Ws, // current char is whitespace
437 Cr // current char is not whitespace
440 UnderLim, // current char makes current substring still fit in limit
441 OverLim // current char makes current substring no longer fit in limit
444 let mut slice_start = 0;
445 let mut last_start = 0;
446 let mut last_end = 0;
448 let mut fake_i = ss.len();
452 let slice: &fn() = || { cont = it(ss.slice(slice_start, last_end)) };
454 // if the limit is larger than the string, lower it to save cycles
459 let machine: &fn((uint, char)) -> bool = |(i, c)| {
460 let whitespace = if char::is_whitespace(c) { Ws } else { Cr };
461 let limit = if (i - slice_start + 1) <= lim { UnderLim } else { OverLim };
463 state = match (state, whitespace, limit) {
465 (A, Cr, _) => { slice_start = i; last_start = i; B }
467 (B, Cr, UnderLim) => { B }
468 (B, Cr, OverLim) if (i - last_start + 1) > lim
469 => fail!("word starting with %? longer than limit!",
470 ss.slice(last_start, i + 1)),
471 (B, Cr, OverLim) => { slice(); slice_start = last_start; B }
472 (B, Ws, UnderLim) => { last_end = i; C }
473 (B, Ws, OverLim) => { last_end = i; slice(); A }
475 (C, Cr, UnderLim) => { last_start = i; B }
476 (C, Cr, OverLim) => { slice(); slice_start = i; last_start = i; last_end = i; B }
477 (C, Ws, OverLim) => { slice(); A }
478 (C, Ws, UnderLim) => { C }
484 ss.iter().enumerate().advance(|x| machine(x));
486 // Let the automaton 'run out' by supplying trailing whitespace
487 while cont && match state { B | C => true, A => false } {
488 machine((fake_i, ' '));
495 * Replace all occurrences of one string with another
499 * * s - The string containing substrings to replace
500 * * from - The string to replace
501 * * to - The replacement string
505 * The original string with all occurances of `from` replaced with `to`
507 pub fn replace(s: &str, from: &str, to: &str) -> ~str {
508 let mut result = ~"";
509 let mut last_end = 0;
510 for s.matches_index_iter(from).advance |(start, end)| {
511 result.push_str(unsafe{raw::slice_bytes(s, last_end, start)});
515 result.push_str(unsafe{raw::slice_bytes(s, last_end, s.len())});
520 Section: Comparing strings
523 /// Bytewise slice equality
527 pub fn eq_slice(a: &str, b: &str) -> bool {
528 do a.as_imm_buf |ap, alen| {
529 do b.as_imm_buf |bp, blen| {
530 if (alen != blen) { false }
533 libc::memcmp(ap as *libc::c_void,
535 (alen - 1) as libc::size_t) == 0
544 pub fn eq_slice(a: &str, b: &str) -> bool {
545 do a.as_imm_buf |ap, alen| {
546 do b.as_imm_buf |bp, blen| {
547 if (alen != blen) { false }
550 libc::memcmp(ap as *libc::c_void,
552 (alen - 1) as libc::size_t) == 0
559 /// Bytewise string equality
561 #[lang="uniq_str_eq"]
563 pub fn eq(a: &~str, b: &~str) -> bool {
569 pub fn eq(a: &~str, b: &~str) -> bool {
577 // Utility used by various searching functions
578 fn match_at<'a,'b>(haystack: &'a str, needle: &'b str, at: uint) -> bool {
580 for needle.bytes_iter().advance |c| { if haystack[i] != c { return false; } i += 1u; }
588 /// Determines if a vector of bytes contains valid UTF-8
589 pub fn is_utf8(v: &[u8]) -> bool {
596 let w = utf8_char_width(v[i]);
597 if w == 0u { return false; }
600 if nexti > total { return false; }
602 if v[i + 1] & 192u8 != TAG_CONT_U8 { return false; }
604 if v[i + 2] & 192u8 != TAG_CONT_U8 { return false; }
605 if w > 3 && (v[i + 3] & 192u8 != TAG_CONT_U8) { return false; }
614 /// Determines if a vector of `u16` contains valid UTF-16
615 pub fn is_utf16(v: &[u16]) -> bool {
621 if u <= 0xD7FF_u16 || u >= 0xE000_u16 {
625 if i+1u < len { return false; }
627 if u < 0xD7FF_u16 || u > 0xDBFF_u16 { return false; }
628 if u2 < 0xDC00_u16 || u2 > 0xDFFF_u16 { return false; }
635 /// Iterates over the utf-16 characters in the specified slice, yielding each
636 /// decoded unicode character to the function provided.
640 /// * Fails on invalid utf-16 data
641 pub fn utf16_chars(v: &[u16], f: &fn(char)) {
644 while (i < len && v[i] != 0u16) {
647 if u <= 0xD7FF_u16 || u >= 0xE000_u16 {
653 assert!(u >= 0xD800_u16 && u <= 0xDBFF_u16);
654 assert!(u2 >= 0xDC00_u16 && u2 <= 0xDFFF_u16);
655 let mut c = (u - 0xD800_u16) as char;
657 c |= (u2 - 0xDC00_u16) as char;
658 c |= 0x1_0000_u32 as char;
666 * Allocates a new string from the utf-16 slice provided
668 pub fn from_utf16(v: &[u16]) -> ~str {
670 buf.reserve(v.len());
671 utf16_chars(v, |ch| buf.push_char(ch));
676 * Allocates a new string with the specified capacity. The string returned is
677 * the empty string, but has capacity for much more.
680 pub fn with_capacity(capacity: uint) -> ~str {
682 buf.reserve(capacity);
687 * As char_len but for a slice of a string
691 * * s - A valid string
692 * * start - The position inside `s` where to start counting in bytes
693 * * end - The position where to stop counting
697 * The number of Unicode characters in `s` between the given indices.
699 pub fn count_chars(s: &str, start: uint, end: uint) -> uint {
700 assert!(s.is_char_boundary(start));
701 assert!(s.is_char_boundary(end));
705 let next = s.char_range_at(i).next;
712 /// Counts the number of bytes taken by the first `n` chars in `s`
713 /// starting from `start`.
714 pub fn count_bytes<'b>(s: &'b str, start: uint, n: uint) -> uint {
715 assert!(s.is_char_boundary(start));
721 let next = s.char_range_at(end).next;
728 // https://tools.ietf.org/html/rfc3629
729 static UTF8_CHAR_WIDTH: [u8, ..256] = [
730 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
731 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x1F
732 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
733 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x3F
734 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
735 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x5F
736 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
737 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x7F
738 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
739 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x9F
740 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
741 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xBF
742 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
743 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xDF
744 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, // 0xEF
745 4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // 0xFF
748 /// Given a first byte, determine how many bytes are in this UTF-8 character
749 pub fn utf8_char_width(b: u8) -> uint {
750 return UTF8_CHAR_WIDTH[b] as uint;
753 #[allow(missing_doc)]
754 pub struct CharRange {
759 // UTF-8 tags and ranges
760 static TAG_CONT_U8: u8 = 128u8;
761 static TAG_CONT: uint = 128u;
762 static MAX_ONE_B: uint = 128u;
763 static TAG_TWO_B: uint = 192u;
764 static MAX_TWO_B: uint = 2048u;
765 static TAG_THREE_B: uint = 224u;
766 static MAX_THREE_B: uint = 65536u;
767 static TAG_FOUR_B: uint = 240u;
769 /// Unsafe operations
777 use vec::MutableVector;
779 /// Create a Rust string from a null-terminated *u8 buffer
780 pub unsafe fn from_buf(buf: *u8) -> ~str {
785 curr = ptr::offset(buf, i);
787 return from_buf_len(buf, i);
790 /// Create a Rust string from a *u8 buffer of the given length
791 pub unsafe fn from_buf_len(buf: *u8, len: uint) -> ~str {
792 let mut v: ~[u8] = vec::with_capacity(len + 1);
793 v.as_mut_buf(|vbuf, _len| {
794 ptr::copy_memory(vbuf, buf as *u8, len)
796 vec::raw::set_len(&mut v, len);
800 return ::cast::transmute(v);
803 /// Create a Rust string from a null-terminated C string
804 pub unsafe fn from_c_str(c_str: *libc::c_char) -> ~str {
805 from_buf(::cast::transmute(c_str))
808 /// Create a Rust string from a `*c_char` buffer of the given length
809 pub unsafe fn from_c_str_len(c_str: *libc::c_char, len: uint) -> ~str {
810 from_buf_len(::cast::transmute(c_str), len)
813 /// Converts a vector of bytes to a new owned string.
814 pub unsafe fn from_bytes(v: &[u8]) -> ~str {
815 do v.as_imm_buf |buf, len| {
816 from_buf_len(buf, len)
820 /// Converts an owned vector of bytes to a new owned string. This assumes
821 /// that the utf-8-ness of the vector has already been validated
822 pub unsafe fn from_bytes_owned(mut v: ~[u8]) -> ~str {
827 /// Converts a vector of bytes to a string.
828 /// The byte slice needs to contain valid utf8 and needs to be one byte longer than
829 /// the string, if possible ending in a 0 byte.
830 pub unsafe fn from_bytes_with_null<'a>(v: &'a [u8]) -> &'a str {
834 /// Converts a byte to a string.
835 pub unsafe fn from_byte(u: u8) -> ~str { raw::from_bytes([u]) }
837 /// Form a slice from a C string. Unsafe because the caller must ensure the
838 /// C string has the static lifetime, or else the return value may be
839 /// invalidated later.
840 pub unsafe fn c_str_to_static_slice(s: *libc::c_char) -> &'static str {
846 curr = ptr::offset(s, len);
848 let v = (s, len + 1);
849 assert!(is_utf8(::cast::transmute(v)));
854 * Takes a bytewise (not UTF-8) slice from a string.
856 * Returns the substring from [`begin`..`end`).
860 * If begin is greater than end.
861 * If end is greater than the length of the string.
864 pub unsafe fn slice_bytes(s: &str, begin: uint, end: uint) -> &str {
865 do s.as_imm_buf |sbuf, n| {
866 assert!((begin <= end));
869 let tuple = (ptr::offset(sbuf, begin), end - begin + 1);
870 ::cast::transmute(tuple)
874 /// Appends a byte to a string. (Not UTF-8 safe).
875 pub unsafe fn push_byte(s: &mut ~str, b: u8) {
876 let new_len = s.len() + 1;
877 s.reserve_at_least(new_len);
878 do s.as_mut_buf |buf, len| {
879 *ptr::mut_offset(buf, len) = b;
881 set_len(&mut *s, new_len);
884 /// Appends a vector of bytes to a string. (Not UTF-8 safe).
885 unsafe fn push_bytes(s: &mut ~str, bytes: &[u8]) {
886 let new_len = s.len() + bytes.len();
887 s.reserve_at_least(new_len);
888 for bytes.iter().advance |byte| { push_byte(&mut *s, *byte); }
891 /// Removes the last byte from a string and returns it. (Not UTF-8 safe).
892 pub unsafe fn pop_byte(s: &mut ~str) -> u8 {
896 set_len(s, len - 1u);
900 /// Removes the first byte from a string and returns it. (Not UTF-8 safe).
901 pub unsafe fn shift_byte(s: &mut ~str) -> u8 {
905 *s = s.slice(1, len).to_owned();
909 /// Sets the length of the string and adds the null terminator
911 pub unsafe fn set_len(v: &mut ~str, new_len: uint) {
912 let v: **mut vec::UnboxedVecRepr = cast::transmute(v);
913 let repr: *mut vec::UnboxedVecRepr = *v;
914 (*repr).fill = new_len + 1u;
915 let null = ptr::mut_offset(cast::transmute(&((*repr).data)),
921 fn test_from_buf_len() {
923 let a = ~[65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8];
924 let b = vec::raw::to_ptr(a);
925 let c = from_buf_len(b, 3u);
926 assert_eq!(c, ~"AAA");
935 use cmp::{TotalOrd, Ordering, Less, Equal, Greater, Eq, Ord, Equiv, TotalEq};
936 use super::{Str, eq_slice};
938 impl<'self> Add<&'self str,~str> for &'self str {
940 fn add(&self, rhs: & &'self str) -> ~str {
941 let mut ret = self.to_owned();
947 impl<'self> TotalOrd for &'self str {
949 fn cmp(&self, other: & &'self str) -> Ordering {
950 for self.bytes_iter().zip(other.bytes_iter()).advance |(s_b, o_b)| {
951 match s_b.cmp(&o_b) {
952 Greater => return Greater,
958 self.len().cmp(&other.len())
962 impl TotalOrd for ~str {
964 fn cmp(&self, other: &~str) -> Ordering { self.as_slice().cmp(&other.as_slice()) }
967 impl TotalOrd for @str {
969 fn cmp(&self, other: &@str) -> Ordering { self.as_slice().cmp(&other.as_slice()) }
972 impl<'self> Eq for &'self str {
974 fn eq(&self, other: & &'self str) -> bool {
975 eq_slice((*self), (*other))
978 fn ne(&self, other: & &'self str) -> bool { !(*self).eq(other) }
983 fn eq(&self, other: &~str) -> bool {
984 eq_slice((*self), (*other))
987 fn ne(&self, other: &~str) -> bool { !(*self).eq(other) }
992 fn eq(&self, other: &@str) -> bool {
993 eq_slice((*self), (*other))
996 fn ne(&self, other: &@str) -> bool { !(*self).eq(other) }
999 impl<'self> TotalEq for &'self str {
1001 fn equals(&self, other: & &'self str) -> bool {
1002 eq_slice((*self), (*other))
1006 impl TotalEq for ~str {
1008 fn equals(&self, other: &~str) -> bool {
1009 eq_slice((*self), (*other))
1013 impl TotalEq for @str {
1015 fn equals(&self, other: &@str) -> bool {
1016 eq_slice((*self), (*other))
1020 impl<'self> Ord for &'self str {
1022 fn lt(&self, other: & &'self str) -> bool { self.cmp(other) == Less }
1024 fn le(&self, other: & &'self str) -> bool { self.cmp(other) != Greater }
1026 fn ge(&self, other: & &'self str) -> bool { self.cmp(other) != Less }
1028 fn gt(&self, other: & &'self str) -> bool { self.cmp(other) == Greater }
1033 fn lt(&self, other: &~str) -> bool { self.cmp(other) == Less }
1035 fn le(&self, other: &~str) -> bool { self.cmp(other) != Greater }
1037 fn ge(&self, other: &~str) -> bool { self.cmp(other) != Less }
1039 fn gt(&self, other: &~str) -> bool { self.cmp(other) == Greater }
1044 fn lt(&self, other: &@str) -> bool { self.cmp(other) == Less }
1046 fn le(&self, other: &@str) -> bool { self.cmp(other) != Greater }
1048 fn ge(&self, other: &@str) -> bool { self.cmp(other) != Less }
1050 fn gt(&self, other: &@str) -> bool { self.cmp(other) == Greater }
1053 impl<'self, S: Str> Equiv<S> for &'self str {
1055 fn equiv(&self, other: &S) -> bool { eq_slice(*self, other.as_slice()) }
1058 impl<'self, S: Str> Equiv<S> for @str {
1060 fn equiv(&self, other: &S) -> bool { eq_slice(*self, other.as_slice()) }
1063 impl<'self, S: Str> Equiv<S> for ~str {
1065 fn equiv(&self, other: &S) -> bool { eq_slice(*self, other.as_slice()) }
1072 /// Any string that can be represented as a slice
1074 /// Work with `self` as a slice.
1075 fn as_slice<'a>(&'a self) -> &'a str;
1078 impl<'self> Str for &'self str {
1080 fn as_slice<'a>(&'a self) -> &'a str { *self }
1082 impl<'self> Str for ~str {
1084 fn as_slice<'a>(&'a self) -> &'a str {
1085 let s: &'a str = *self; s
1088 impl<'self> Str for @str {
1090 fn as_slice<'a>(&'a self) -> &'a str {
1091 let s: &'a str = *self; s
1095 impl<'self> Container for &'self str {
1097 fn len(&self) -> uint {
1098 do self.as_imm_buf |_p, n| { n - 1u }
1101 fn is_empty(&self) -> bool {
1106 impl Container for ~str {
1108 fn len(&self) -> uint { self.as_slice().len() }
1110 fn is_empty(&self) -> bool { self.len() == 0 }
1113 impl Container for @str {
1115 fn len(&self) -> uint { self.as_slice().len() }
1117 fn is_empty(&self) -> bool { self.len() == 0 }
1120 impl Mutable for ~str {
1121 /// Remove all content, make the string empty
1123 fn clear(&mut self) {
1125 raw::set_len(self, 0)
1131 #[allow(missing_doc)]
1132 pub trait StrSlice<'self> {
1133 fn contains<'a>(&self, needle: &'a str) -> bool;
1134 fn contains_char(&self, needle: char) -> bool;
1135 fn iter(&self) -> StrCharIterator<'self>;
1136 fn rev_iter(&self) -> StrCharRevIterator<'self>;
1137 fn bytes_iter(&self) -> StrBytesIterator<'self>;
1138 fn bytes_rev_iter(&self) -> StrBytesRevIterator<'self>;
1139 fn split_iter<Sep: CharEq>(&self, sep: Sep) -> StrCharSplitIterator<'self, Sep>;
1140 fn splitn_iter<Sep: CharEq>(&self, sep: Sep, count: uint) -> StrCharSplitIterator<'self, Sep>;
1141 fn split_options_iter<Sep: CharEq>(&self, sep: Sep, count: uint, allow_trailing_empty: bool)
1142 -> StrCharSplitIterator<'self, Sep>;
1143 fn matches_index_iter(&self, sep: &'self str) -> StrMatchesIndexIterator<'self>;
1144 fn split_str_iter(&self, &'self str) -> StrStrSplitIterator<'self>;
1145 fn line_iter(&self) -> StrCharSplitIterator<'self, char>;
1146 fn any_line_iter(&self) -> AnyLineIterator<'self>;
1147 fn word_iter(&self) -> WordIterator<'self>;
1148 fn ends_with(&self, needle: &str) -> bool;
1149 fn is_whitespace(&self) -> bool;
1150 fn is_alphanumeric(&self) -> bool;
1151 fn char_len(&self) -> uint;
1153 fn slice(&self, begin: uint, end: uint) -> &'self str;
1154 fn slice_from(&self, begin: uint) -> &'self str;
1155 fn slice_to(&self, end: uint) -> &'self str;
1157 fn slice_chars(&self, begin: uint, end: uint) -> &'self str;
1159 fn starts_with(&self, needle: &str) -> bool;
1160 fn escape_default(&self) -> ~str;
1161 fn escape_unicode(&self) -> ~str;
1162 fn trim(&self) -> &'self str;
1163 fn trim_left(&self) -> &'self str;
1164 fn trim_right(&self) -> &'self str;
1165 fn trim_chars<C: CharEq>(&self, to_trim: &C) -> &'self str;
1166 fn trim_left_chars<C: CharEq>(&self, to_trim: &C) -> &'self str;
1167 fn trim_right_chars<C: CharEq>(&self, to_trim: &C) -> &'self str;
1168 fn replace(&self, from: &str, to: &str) -> ~str;
1169 fn to_owned(&self) -> ~str;
1170 fn to_managed(&self) -> @str;
1171 fn to_utf16(&self) -> ~[u16];
1172 fn is_char_boundary(&self, index: uint) -> bool;
1173 fn char_range_at(&self, start: uint) -> CharRange;
1174 fn char_at(&self, i: uint) -> char;
1175 fn char_range_at_reverse(&self, start: uint) -> CharRange;
1176 fn char_at_reverse(&self, i: uint) -> char;
1177 fn as_bytes(&self) -> &'self [u8];
1179 fn find<C: CharEq>(&self, search: C) -> Option<uint>;
1180 fn rfind<C: CharEq>(&self, search: C) -> Option<uint>;
1181 fn find_str(&self, &str) -> Option<uint>;
1183 fn repeat(&self, nn: uint) -> ~str;
1185 fn slice_shift_char(&self) -> (char, &'self str);
1187 fn map_chars(&self, ff: &fn(char) -> char) -> ~str;
1189 fn lev_distance(&self, t: &str) -> uint;
1191 fn subslice_offset(&self, inner: &str) -> uint;
1193 fn as_imm_buf<T>(&self, f: &fn(*u8, uint) -> T) -> T;
1194 fn as_c_str<T>(&self, f: &fn(*libc::c_char) -> T) -> T;
1197 /// Extension methods for strings
1198 impl<'self> StrSlice<'self> for &'self str {
1200 * Returns true if one string contains another
1204 * * needle - The string to look for
1207 fn contains<'a>(&self, needle: &'a str) -> bool {
1208 self.find_str(needle).is_some()
1211 * Returns true if a string contains a char.
1215 * * needle - The char to look for
1218 fn contains_char(&self, needle: char) -> bool {
1219 self.find(needle).is_some()
1221 /// An iterator over the characters of `self`. Note, this iterates
1222 /// over unicode code-points, not unicode graphemes.
1227 /// let v: ~[char] = "abc åäö".iter().collect();
1228 /// assert_eq!(v, ~['a', 'b', 'c', ' ', 'å', 'ä', 'ö']);
1231 fn iter(&self) -> StrCharIterator<'self> {
1237 /// An iterator over the characters of `self`, in reverse order.
1239 fn rev_iter(&self) -> StrCharRevIterator<'self> {
1240 StrCharRevIterator {
1246 /// An iterator over the bytes of `self`
1248 fn bytes_iter(&self) -> StrBytesIterator<'self> {
1249 StrBytesIterator { it: self.as_bytes().iter() }
1251 /// An iterator over the bytes of `self`, in reverse order
1253 fn bytes_rev_iter(&self) -> StrBytesRevIterator<'self> {
1254 StrBytesRevIterator { it: self.as_bytes().rev_iter() }
1257 /// An iterator over substrings of `self`, separated by characters
1258 /// matched by `sep`.
1263 /// let v: ~[&str] = "Mary had a little lamb".split_iter(' ').collect();
1264 /// assert_eq!(v, ~["Mary", "had", "a", "little", "lamb"]);
1266 /// let v: ~[&str] = "abc1def2ghi".split_iter(|c: char| c.is_digit()).collect();
1267 /// assert_eq!(v, ~["abc", "def", "ghi"]);
1270 fn split_iter<Sep: CharEq>(&self, sep: Sep) -> StrCharSplitIterator<'self, Sep> {
1271 self.split_options_iter(sep, self.len(), true)
1274 /// An iterator over substrings of `self`, separated by characters
1275 /// matched by `sep`, restricted to splitting at most `count`
1278 fn splitn_iter<Sep: CharEq>(&self, sep: Sep, count: uint) -> StrCharSplitIterator<'self, Sep> {
1279 self.split_options_iter(sep, count, true)
1282 /// An iterator over substrings of `self`, separated by characters
1283 /// matched by `sep`, splitting at most `count` times, and
1284 /// possibly not including the trailing empty substring, if it
1287 fn split_options_iter<Sep: CharEq>(&self, sep: Sep, count: uint, allow_trailing_empty: bool)
1288 -> StrCharSplitIterator<'self, Sep> {
1289 let only_ascii = sep.only_ascii();
1290 StrCharSplitIterator {
1295 allow_trailing_empty: allow_trailing_empty,
1297 only_ascii: only_ascii
1300 /// An iterator over the start and end indices of each match of
1301 /// `sep` within `self`.
1303 fn matches_index_iter(&self, sep: &'self str) -> StrMatchesIndexIterator<'self> {
1304 assert!(!sep.is_empty())
1305 StrMatchesIndexIterator {
1312 * An iterator over the substrings of `self` separated by `sep`.
1317 * let v: ~[&str] = "abcXXXabcYYYabc".split_str_iter("abc").collect()
1318 * assert_eq!(v, ["", "XXX", "YYY", ""]);
1322 fn split_str_iter(&self, sep: &'self str) -> StrStrSplitIterator<'self> {
1323 StrStrSplitIterator {
1324 it: self.matches_index_iter(sep),
1330 /// An iterator over the lines of a string (subsequences separated
1333 fn line_iter(&self) -> StrCharSplitIterator<'self, char> {
1334 self.split_options_iter('\n', self.len(), false)
1337 /// An iterator over the lines of a string, separated by either
1338 /// `\n` or (`\r\n`).
1339 fn any_line_iter(&self) -> AnyLineIterator<'self> {
1340 do self.line_iter().transform |line| {
1342 if l > 0 && line[l - 1] == '\r' as u8 { line.slice(0, l - 1) }
1347 /// An iterator over the words of a string (subsequences separated
1348 /// by any sequence of whitespace).
1350 fn word_iter(&self) -> WordIterator<'self> {
1351 self.split_iter(char::is_whitespace).filter(|s| !s.is_empty())
1355 * Returns true if the string contains only whitespace
1357 * Whitespace characters are determined by `char::is_whitespace`
1360 fn is_whitespace(&self) -> bool { self.iter().all(char::is_whitespace) }
1362 * Returns true if the string contains only alphanumerics
1364 * Alphanumeric characters are determined by `char::is_alphanumeric`
1367 fn is_alphanumeric(&self) -> bool { self.iter().all(char::is_alphanumeric) }
1368 /// Returns the number of characters that a string holds
1370 fn char_len(&self) -> uint { self.iter().len_() }
1373 * Returns a slice of the given string from the byte range
1376 * Fails when `begin` and `end` do not point to valid characters or
1377 * beyond the last character of the string
1380 fn slice(&self, begin: uint, end: uint) -> &'self str {
1381 assert!(self.is_char_boundary(begin));
1382 assert!(self.is_char_boundary(end));
1383 unsafe { raw::slice_bytes(*self, begin, end) }
1385 /// Returns a slice of the string from `begin` to its end.
1387 /// Fails when `begin` does not point to a valid character, or is
1390 fn slice_from(&self, begin: uint) -> &'self str {
1391 self.slice(begin, self.len())
1393 /// Returns a slice of the string from the beginning to byte
1396 /// Fails when `end` does not point to a valid character, or is
1399 fn slice_to(&self, end: uint) -> &'self str {
1403 /// Returns a slice of the string from the char range
1404 /// [`begin`..`end`).
1406 /// Fails if `begin` > `end` or the either `begin` or `end` are
1407 /// beyond the last character of the string.
1408 fn slice_chars(&self, begin: uint, end: uint) -> &'self str {
1409 assert!(begin <= end);
1410 // not sure how to use the iterators for this nicely.
1411 let mut position = 0;
1414 while count < begin && position < l {
1415 position = self.char_range_at(position).next;
1418 if count < begin { fail!("Attempted to begin slice_chars beyond end of string") }
1419 let start_byte = position;
1420 while count < end && position < l {
1421 position = self.char_range_at(position).next;
1424 if count < end { fail!("Attempted to end slice_chars beyond end of string") }
1426 self.slice(start_byte, position)
1429 /// Returns true if `needle` is a prefix of the string.
1430 fn starts_with<'a>(&self, needle: &'a str) -> bool {
1431 let (self_len, needle_len) = (self.len(), needle.len());
1432 if needle_len == 0u { true }
1433 else if needle_len > self_len { false }
1434 else { match_at(*self, needle, 0u) }
1436 /// Returns true if `needle` is a suffix of the string.
1437 fn ends_with(&self, needle: &str) -> bool {
1438 let (self_len, needle_len) = (self.len(), needle.len());
1439 if needle_len == 0u { true }
1440 else if needle_len > self_len { false }
1441 else { match_at(*self, needle, self_len - needle_len) }
1444 /// Escape each char in `s` with char::escape_default.
1445 fn escape_default(&self) -> ~str {
1446 let mut out: ~str = ~"";
1447 out.reserve_at_least(self.len());
1448 for self.iter().advance |c| {
1449 do c.escape_default |c| {
1456 /// Escape each char in `s` with char::escape_unicode.
1457 fn escape_unicode(&self) -> ~str {
1458 let mut out: ~str = ~"";
1459 out.reserve_at_least(self.len());
1460 for self.iter().advance |c| {
1461 do c.escape_unicode |c| {
1468 /// Returns a string with leading and trailing whitespace removed
1470 fn trim(&self) -> &'self str {
1471 self.trim_left().trim_right()
1473 /// Returns a string with leading whitespace removed
1475 fn trim_left(&self) -> &'self str {
1476 self.trim_left_chars(&char::is_whitespace)
1478 /// Returns a string with trailing whitespace removed
1480 fn trim_right(&self) -> &'self str {
1481 self.trim_right_chars(&char::is_whitespace)
1485 * Returns a string with characters that match `to_trim` removed.
1489 * * to_trim - a character matcher
1494 * assert_eq!("11foo1bar11".trim_chars(&'1'), "foo1bar")
1495 * assert_eq!("12foo1bar12".trim_chars(& &['1', '2']), "foo1bar")
1496 * assert_eq!("123foo1bar123".trim_chars(&|c: char| c.is_digit()), "foo1bar")
1500 fn trim_chars<C: CharEq>(&self, to_trim: &C) -> &'self str {
1501 self.trim_left_chars(to_trim).trim_right_chars(to_trim)
1504 * Returns a string with leading `chars_to_trim` removed.
1508 * * to_trim - a character matcher
1513 * assert_eq!("11foo1bar11".trim_left_chars(&'1'), "foo1bar11")
1514 * assert_eq!("12foo1bar12".trim_left_chars(& &['1', '2']), "foo1bar12")
1515 * assert_eq!("123foo1bar123".trim_left_chars(&|c: char| c.is_digit()), "foo1bar123")
1519 fn trim_left_chars<C: CharEq>(&self, to_trim: &C) -> &'self str {
1520 match self.find(|c: char| !to_trim.matches(c)) {
1522 Some(first) => unsafe { raw::slice_bytes(*self, first, self.len()) }
1526 * Returns a string with trailing `chars_to_trim` removed.
1530 * * to_trim - a character matcher
1535 * assert_eq!("11foo1bar11".trim_right_chars(&'1'), "11foo1bar")
1536 * assert_eq!("12foo1bar12".trim_right_chars(& &['1', '2']), "12foo1bar")
1537 * assert_eq!("123foo1bar123".trim_right_chars(&|c: char| c.is_digit()), "123foo1bar")
1541 fn trim_right_chars<C: CharEq>(&self, to_trim: &C) -> &'self str {
1542 match self.rfind(|c: char| !to_trim.matches(c)) {
1545 let next = self.char_range_at(last).next;
1546 unsafe { raw::slice_bytes(*self, 0u, next) }
1552 * Replace all occurrences of one string with another
1556 * * from - The string to replace
1557 * * to - The replacement string
1561 * The original string with all occurances of `from` replaced with `to`
1563 pub fn replace(&self, from: &str, to: &str) -> ~str {
1564 let mut result = ~"";
1565 let mut last_end = 0;
1566 for self.matches_index_iter(from).advance |(start, end)| {
1567 result.push_str(unsafe{raw::slice_bytes(*self, last_end, start)});
1568 result.push_str(to);
1571 result.push_str(unsafe{raw::slice_bytes(*self, last_end, self.len())});
1575 /// Copy a slice into a new unique str
1577 fn to_owned(&self) -> ~str {
1578 do self.as_imm_buf |src, len| {
1581 let mut v = vec::with_capacity(len);
1583 do v.as_mut_buf |dst, _| {
1584 ptr::copy_memory(dst, src, len - 1);
1586 vec::raw::set_len(&mut v, len - 1);
1588 ::cast::transmute(v)
1594 fn to_managed(&self) -> @str {
1595 let v = at_vec::from_fn(self.len() + 1, |i| {
1596 if i == self.len() { 0 } else { self[i] }
1598 unsafe { ::cast::transmute(v) }
1601 /// Converts to a vector of `u16` encoded as UTF-16.
1602 fn to_utf16(&self) -> ~[u16] {
1604 for self.iter().advance |ch| {
1605 // Arithmetic with u32 literals is easier on the eyes than chars.
1606 let mut ch = ch as u32;
1608 if (ch & 0xFFFF_u32) == ch {
1609 // The BMP falls through (assuming non-surrogate, as it
1611 assert!(ch <= 0xD7FF_u32 || ch >= 0xE000_u32);
1614 // Supplementary planes break into surrogates.
1615 assert!(ch >= 0x1_0000_u32 && ch <= 0x10_FFFF_u32);
1617 let w1 = 0xD800_u16 | ((ch >> 10) as u16);
1618 let w2 = 0xDC00_u16 | ((ch as u16) & 0x3FF_u16);
1619 u.push_all([w1, w2])
1626 * Returns false if the index points into the middle of a multi-byte
1627 * character sequence.
1629 fn is_char_boundary(&self, index: uint) -> bool {
1630 if index == self.len() { return true; }
1631 let b = self[index];
1632 return b < 128u8 || b >= 192u8;
1636 * Pluck a character out of a string and return the index of the next
1639 * This function can be used to iterate over the unicode characters of a
1645 * let s = "中华Việt Nam";
1647 * while i < s.len() {
1648 * let CharRange {ch, next} = s.char_range_at(i);
1649 * std::io::println(fmt!("%u: %c",i,ch));
1672 * * i - The byte offset of the char to extract
1676 * A record {ch: char, next: uint} containing the char value and the byte
1677 * index of the next unicode character.
1681 * If `i` is greater than or equal to the length of the string.
1682 * If `i` is not the index of the beginning of a valid UTF-8 character.
1685 fn char_range_at(&self, i: uint) -> CharRange {
1686 if (self[i] < 128u8) {
1687 return CharRange {ch: self[i] as char, next: i + 1 };
1690 // Multibyte case is a fn to allow char_range_at to inline cleanly
1691 fn multibyte_char_range_at(s: &str, i: uint) -> CharRange {
1692 let mut val = s[i] as uint;
1693 let w = UTF8_CHAR_WIDTH[val] as uint;
1696 // First byte is special, only want bottom 5 bits for width 2, 4 bits
1697 // for width 3, and 3 bits for width 4
1699 val = (val << 6) | (s[i + 1] & 63u8) as uint;
1700 if w > 2 { val = (val << 6) | (s[i + 2] & 63u8) as uint; }
1701 if w > 3 { val = (val << 6) | (s[i + 3] & 63u8) as uint; }
1703 return CharRange {ch: val as char, next: i + w};
1706 return multibyte_char_range_at(*self, i);
1709 /// Plucks the character starting at the `i`th byte of a string
1711 fn char_at(&self, i: uint) -> char { self.char_range_at(i).ch }
1714 * Given a byte position and a str, return the previous char and its position.
1716 * This function can be used to iterate over a unicode string in reverse.
1718 * Returns 0 for next index if called on start index 0.
1720 fn char_range_at_reverse(&self, start: uint) -> CharRange {
1721 let mut prev = start;
1723 // while there is a previous byte == 10......
1724 while prev > 0u && self[prev - 1u] & 192u8 == TAG_CONT_U8 {
1728 // now refer to the initial byte of previous char
1736 let ch = self.char_at(prev);
1737 return CharRange {ch:ch, next:prev};
1740 /// Plucks the character ending at the `i`th byte of a string
1742 fn char_at_reverse(&self, i: uint) -> char {
1743 self.char_range_at_reverse(i).ch
1747 * Work with the byte buffer of a string as a byte slice.
1749 * The byte slice does not include the null terminator.
1751 fn as_bytes(&self) -> &'self [u8] {
1753 let (ptr, len): (*u8, uint) = ::cast::transmute(*self);
1754 let outgoing_tuple: (*u8, uint) = (ptr, len - 1);
1755 ::cast::transmute(outgoing_tuple)
1760 * Returns the byte index of the first character of `self` that matches `search`
1764 * `Some` containing the byte index of the last matching character
1765 * or `None` if there is no match
1767 fn find<C: CharEq>(&self, search: C) -> Option<uint> {
1768 if search.only_ascii() {
1769 for self.bytes_iter().enumerate().advance |(i, b)| {
1770 if search.matches(b as char) { return Some(i) }
1774 for self.iter().advance |c| {
1775 if search.matches(c) { return Some(index); }
1776 index += c.len_utf8_bytes();
1783 * Returns the byte index of the last character of `self` that matches `search`
1787 * `Some` containing the byte index of the last matching character
1788 * or `None` if there is no match
1790 fn rfind<C: CharEq>(&self, search: C) -> Option<uint> {
1791 let mut index = self.len();
1792 if search.only_ascii() {
1793 for self.bytes_rev_iter().advance |b| {
1795 if search.matches(b as char) { return Some(index); }
1798 for self.rev_iter().advance |c| {
1799 index -= c.len_utf8_bytes();
1800 if search.matches(c) { return Some(index); }
1808 * Returns the byte index of the first matching substring
1812 * * `needle` - The string to search for
1816 * `Some` containing the byte index of the first matching substring
1817 * or `None` if there is no match
1819 fn find_str(&self, needle: &str) -> Option<uint> {
1820 if needle.is_empty() {
1823 self.matches_index_iter(needle)
1825 .map_consume(|(start, _end)| start)
1829 /// Given a string, make a new string with repeated copies of it.
1830 fn repeat(&self, nn: uint) -> ~str {
1831 do self.as_imm_buf |buf, len| {
1832 // ignore the NULL terminator
1834 let mut ret = with_capacity(nn * len);
1837 do ret.as_mut_buf |rbuf, _len| {
1838 let mut rbuf = rbuf;
1841 ptr::copy_memory(rbuf, buf, len);
1842 rbuf = rbuf.offset(len);
1845 raw::set_len(&mut ret, nn * len);
1852 * Retrieves the first character from a string slice and returns
1853 * it. This does not allocate a new string; instead, it returns a
1854 * slice that point one character beyond the character that was
1859 * If the string does not contain any characters
1862 fn slice_shift_char(&self) -> (char, &'self str) {
1863 let CharRange {ch, next} = self.char_range_at(0u);
1864 let next_s = unsafe { raw::slice_bytes(*self, next, self.len()) };
1865 return (ch, next_s);
1869 /// Apply a function to each character.
1870 fn map_chars(&self, ff: &fn(char) -> char) -> ~str {
1871 let mut result = with_capacity(self.len());
1872 for self.iter().advance |cc| {
1873 result.push_char(ff(cc));
1878 /// Levenshtein Distance between two strings.
1879 fn lev_distance(&self, t: &str) -> uint {
1880 let slen = self.len();
1883 if slen == 0 { return tlen; }
1884 if tlen == 0 { return slen; }
1886 let mut dcol = vec::from_fn(tlen + 1, |x| x);
1888 for self.iter().enumerate().advance |(i, sc)| {
1890 let mut current = i;
1891 dcol[0] = current + 1;
1893 for t.iter().enumerate().advance |(j, tc)| {
1895 let next = dcol[j + 1];
1898 dcol[j + 1] = current;
1900 dcol[j + 1] = ::cmp::min(current, next);
1901 dcol[j + 1] = ::cmp::min(dcol[j + 1], dcol[j]) + 1;
1913 * Returns the byte offset of an inner slice relative to an enclosing outer slice.
1915 * Fails if `inner` is not a direct slice contained within self.
1920 * let string = "a\nb\nc";
1921 * let mut lines = ~[];
1922 * for string.line_iter().advance |line| { lines.push(line) }
1924 * assert!(string.subslice_offset(lines[0]) == 0); // &"a"
1925 * assert!(string.subslice_offset(lines[1]) == 2); // &"b"
1926 * assert!(string.subslice_offset(lines[2]) == 4); // &"c"
1930 fn subslice_offset(&self, inner: &str) -> uint {
1931 do self.as_imm_buf |a, a_len| {
1932 do inner.as_imm_buf |b, b_len| {
1938 a_start = cast::transmute(a); a_end = a_len + cast::transmute(a);
1939 b_start = cast::transmute(b); b_end = b_len + cast::transmute(b);
1941 assert!(a_start <= b_start);
1942 assert!(b_end <= a_end);
1949 * Work with the byte buffer and length of a slice.
1951 * The given length is one byte longer than the 'official' indexable
1952 * length of the string. This is to permit probing the byte past the
1953 * indexable area for a null byte, as is the case in slices pointing
1954 * to full strings, or suffixes of them.
1957 fn as_imm_buf<T>(&self, f: &fn(*u8, uint) -> T) -> T {
1958 let v: &[u8] = unsafe { cast::transmute(*self) };
1963 * Work with the byte buffer of a string as a null-terminated C string.
1965 * Allows for unsafe manipulation of strings, which is useful for foreign
1966 * interop. This is similar to `str::as_buf`, but guarantees null-termination.
1967 * If the given slice is not already null-terminated, this function will
1968 * allocate a temporary, copy the slice, null terminate it, and pass
1974 * let s = "PATH".as_c_str(|path| libc::getenv(path));
1978 fn as_c_str<T>(&self, f: &fn(*libc::c_char) -> T) -> T {
1979 do self.as_imm_buf |buf, len| {
1980 // NB: len includes the trailing null.
1982 if unsafe { *(ptr::offset(buf, len - 1)) != 0 } {
1983 self.to_owned().as_c_str(|s| f(s))
1985 f(buf as *libc::c_char)
1991 #[allow(missing_doc)]
1992 pub trait NullTerminatedStr {
1993 fn as_bytes_with_null<'a>(&'a self) -> &'a [u8];
1996 impl NullTerminatedStr for ~str {
1998 * Work with the byte buffer of a string as a byte slice.
2000 * The byte slice does include the null terminator.
2003 fn as_bytes_with_null<'a>(&'a self) -> &'a [u8] {
2004 let ptr: &'a ~[u8] = unsafe { ::cast::transmute(self) };
2005 let slice: &'a [u8] = *ptr;
2009 impl NullTerminatedStr for @str {
2011 * Work with the byte buffer of a string as a byte slice.
2013 * The byte slice does include the null terminator.
2016 fn as_bytes_with_null<'a>(&'a self) -> &'a [u8] {
2017 let ptr: &'a @[u8] = unsafe { ::cast::transmute(self) };
2018 let slice: &'a [u8] = *ptr;
2023 #[allow(missing_doc)]
2024 pub trait OwnedStr {
2025 fn push_str_no_overallocate(&mut self, rhs: &str);
2026 fn push_str(&mut self, rhs: &str);
2027 fn push_char(&mut self, c: char);
2028 fn pop_char(&mut self) -> char;
2029 fn shift_char(&mut self) -> char;
2030 fn unshift_char(&mut self, ch: char);
2031 fn append(&self, rhs: &str) -> ~str; // FIXME #4850: this should consume self.
2032 fn reserve(&mut self, n: uint);
2033 fn reserve_at_least(&mut self, n: uint);
2034 fn capacity(&self) -> uint;
2035 fn to_bytes_with_null(self) -> ~[u8];
2038 * Work with the mutable byte buffer and length of a slice.
2040 * The given length is one byte longer than the 'official' indexable
2041 * length of the string. This is to permit probing the byte past the
2042 * indexable area for a null byte, as is the case in slices pointing
2043 * to full strings, or suffixes of them.
2045 * Make sure any mutations to this buffer keep this string valid UTF8.
2047 fn as_mut_buf<T>(&mut self, f: &fn(*mut u8, uint) -> T) -> T;
2050 impl OwnedStr for ~str {
2051 /// Appends a string slice to the back of a string, without overallocating
2053 fn push_str_no_overallocate(&mut self, rhs: &str) {
2055 let llen = self.len();
2056 let rlen = rhs.len();
2057 self.reserve(llen + rlen);
2058 do self.as_imm_buf |lbuf, _llen| {
2059 do rhs.as_imm_buf |rbuf, _rlen| {
2060 let dst = ptr::offset(lbuf, llen);
2061 let dst = ::cast::transmute_mut_unsafe(dst);
2062 ptr::copy_memory(dst, rbuf, rlen);
2065 raw::set_len(self, llen + rlen);
2069 /// Appends a string slice to the back of a string
2071 fn push_str(&mut self, rhs: &str) {
2073 let llen = self.len();
2074 let rlen = rhs.len();
2075 self.reserve_at_least(llen + rlen);
2076 do self.as_imm_buf |lbuf, _llen| {
2077 do rhs.as_imm_buf |rbuf, _rlen| {
2078 let dst = ptr::offset(lbuf, llen);
2079 let dst = ::cast::transmute_mut_unsafe(dst);
2080 ptr::copy_memory(dst, rbuf, rlen);
2083 raw::set_len(self, llen + rlen);
2086 /// Appends a character to the back of a string
2088 fn push_char(&mut self, c: char) {
2089 assert!(c as uint <= 0x10ffff); // FIXME: #7609: should be enforced on all `char`
2091 let code = c as uint;
2092 let nb = if code < MAX_ONE_B { 1u }
2093 else if code < MAX_TWO_B { 2u }
2094 else if code < MAX_THREE_B { 3u }
2096 let len = self.len();
2097 let new_len = len + nb;
2098 self.reserve_at_least(new_len);
2100 do self.as_mut_buf |buf, _len| {
2103 *ptr::mut_offset(buf, off) = code as u8;
2106 *ptr::mut_offset(buf, off) = (code >> 6u & 31u | TAG_TWO_B) as u8;
2107 *ptr::mut_offset(buf, off + 1u) = (code & 63u | TAG_CONT) as u8;
2110 *ptr::mut_offset(buf, off) = (code >> 12u & 15u | TAG_THREE_B) as u8;
2111 *ptr::mut_offset(buf, off + 1u) = (code >> 6u & 63u | TAG_CONT) as u8;
2112 *ptr::mut_offset(buf, off + 2u) = (code & 63u | TAG_CONT) as u8;
2115 *ptr::mut_offset(buf, off) = (code >> 18u & 7u | TAG_FOUR_B) as u8;
2116 *ptr::mut_offset(buf, off + 1u) = (code >> 12u & 63u | TAG_CONT) as u8;
2117 *ptr::mut_offset(buf, off + 2u) = (code >> 6u & 63u | TAG_CONT) as u8;
2118 *ptr::mut_offset(buf, off + 3u) = (code & 63u | TAG_CONT) as u8;
2123 raw::set_len(self, new_len);
2127 * Remove the final character from a string and return it
2131 * If the string does not contain any characters
2133 fn pop_char(&mut self) -> char {
2134 let end = self.len();
2136 let CharRange {ch, next} = self.char_range_at_reverse(end);
2137 unsafe { raw::set_len(self, next); }
2142 * Remove the first character from a string and return it
2146 * If the string does not contain any characters
2148 fn shift_char(&mut self) -> char {
2149 let CharRange {ch, next} = self.char_range_at(0u);
2150 *self = self.slice(next, self.len()).to_owned();
2154 /// Prepend a char to a string
2155 fn unshift_char(&mut self, ch: char) {
2156 // This could be more efficient.
2157 let mut new_str = ~"";
2158 new_str.push_char(ch);
2159 new_str.push_str(*self);
2163 /// Concatenate two strings together.
2165 fn append(&self, rhs: &str) -> ~str {
2166 // FIXME #4850: this should consume self, but that causes segfaults
2167 let mut v = self.clone();
2168 v.push_str_no_overallocate(rhs);
2173 * Reserves capacity for exactly `n` bytes in the given string, not including
2174 * the null terminator.
2176 * Assuming single-byte characters, the resulting string will be large
2177 * enough to hold a string of length `n`. To account for the null terminator,
2178 * the underlying buffer will have the size `n` + 1.
2180 * If the capacity for `s` is already equal to or greater than the requested
2181 * capacity, then no action is taken.
2186 * * n - The number of bytes to reserve space for
2189 pub fn reserve(&mut self, n: uint) {
2191 let v: *mut ~[u8] = cast::transmute(self);
2192 (*v).reserve(n + 1);
2197 * Reserves capacity for at least `n` bytes in the given string, not including
2198 * the null terminator.
2200 * Assuming single-byte characters, the resulting string will be large
2201 * enough to hold a string of length `n`. To account for the null terminator,
2202 * the underlying buffer will have the size `n` + 1.
2204 * This function will over-allocate in order to amortize the allocation costs
2205 * in scenarios where the caller may need to repeatedly reserve additional
2208 * If the capacity for `s` is already equal to or greater than the requested
2209 * capacity, then no action is taken.
2214 * * n - The number of bytes to reserve space for
2217 fn reserve_at_least(&mut self, n: uint) {
2218 self.reserve(uint::next_power_of_two(n + 1u) - 1u)
2222 * Returns the number of single-byte characters the string can hold without
2225 fn capacity(&self) -> uint {
2226 let buf: &~[u8] = unsafe { cast::transmute(self) };
2227 let vcap = buf.capacity();
2232 /// Convert to a vector of bytes. This does not allocate a new
2233 /// string, and includes the null terminator.
2235 fn to_bytes_with_null(self) -> ~[u8] {
2236 unsafe { ::cast::transmute(self) }
2240 fn as_mut_buf<T>(&mut self, f: &fn(*mut u8, uint) -> T) -> T {
2241 let v: &mut ~[u8] = unsafe { cast::transmute(self) };
2246 impl Clone for ~str {
2248 fn clone(&self) -> ~str {
2253 impl Clone for @str {
2255 fn clone(&self) -> @str {
2260 /// External iterator for a string's characters. Use with the `std::iterator`
2263 pub struct StrCharIterator<'self> {
2265 priv string: &'self str,
2268 impl<'self> Iterator<char> for StrCharIterator<'self> {
2270 fn next(&mut self) -> Option<char> {
2271 if self.index < self.string.len() {
2272 let CharRange {ch, next} = self.string.char_range_at(self.index);
2280 /// External iterator for a string's characters in reverse order. Use
2281 /// with the `std::iterator` module.
2283 pub struct StrCharRevIterator<'self> {
2285 priv string: &'self str,
2288 impl<'self> Iterator<char> for StrCharRevIterator<'self> {
2290 fn next(&mut self) -> Option<char> {
2292 let CharRange {ch, next} = self.string.char_range_at_reverse(self.index);
2301 /// External iterator for a string's bytes. Use with the `std::iterator`
2304 pub struct StrBytesIterator<'self> {
2305 priv it: vec::VecIterator<'self, u8>
2308 impl<'self> Iterator<u8> for StrBytesIterator<'self> {
2310 fn next(&mut self) -> Option<u8> {
2311 self.it.next().map_consume(|&x| x)
2315 /// External iterator for a string's bytes in reverse order. Use with
2316 /// the `std::iterator` module.
2318 pub struct StrBytesRevIterator<'self> {
2319 priv it: vec::VecRevIterator<'self, u8>
2322 impl<'self> Iterator<u8> for StrBytesRevIterator<'self> {
2324 fn next(&mut self) -> Option<u8> {
2325 self.it.next().map_consume(|&x| x)
2329 // This works because every lifetime is a sub-lifetime of 'static
2330 impl<'self> Zero for &'self str {
2331 fn zero() -> &'self str { "" }
2332 fn is_zero(&self) -> bool { self.is_empty() }
2335 impl Zero for ~str {
2336 fn zero() -> ~str { ~"" }
2337 fn is_zero(&self) -> bool { self.len() == 0 }
2340 impl Zero for @str {
2341 fn zero() -> @str { @"" }
2342 fn is_zero(&self) -> bool { self.len() == 0 }
2347 use iterator::IteratorUtil;
2348 use container::Container;
2356 use vec::{ImmutableVector, CopyableVector};
2357 use cmp::{TotalOrd, Less, Equal, Greater};
2361 assert!((eq(&~"", &~"")));
2362 assert!((eq(&~"foo", &~"foo")));
2363 assert!((!eq(&~"foo", &~"bar")));
2367 fn test_eq_slice() {
2368 assert!((eq_slice("foobar".slice(0, 3), "foo")));
2369 assert!((eq_slice("barfoo".slice(3, 6), "foo")));
2370 assert!((!eq_slice("foo1", "foo2")));
2376 assert!("" <= "foo");
2377 assert!("foo" <= "foo");
2378 assert!("foo" != "bar");
2383 assert_eq!("".len(), 0u);
2384 assert_eq!("hello world".len(), 11u);
2385 assert_eq!("\x63".len(), 1u);
2386 assert_eq!("\xa2".len(), 2u);
2387 assert_eq!("\u03c0".len(), 2u);
2388 assert_eq!("\u2620".len(), 3u);
2389 assert_eq!("\U0001d11e".len(), 4u);
2391 assert_eq!("".char_len(), 0u);
2392 assert_eq!("hello world".char_len(), 11u);
2393 assert_eq!("\x63".char_len(), 1u);
2394 assert_eq!("\xa2".char_len(), 1u);
2395 assert_eq!("\u03c0".char_len(), 1u);
2396 assert_eq!("\u2620".char_len(), 1u);
2397 assert_eq!("\U0001d11e".char_len(), 1u);
2398 assert_eq!("ประเทศไทย中华Việt Nam".char_len(), 19u);
2403 assert_eq!("hello".find('l'), Some(2u));
2404 assert_eq!("hello".find(|c:char| c == 'o'), Some(4u));
2405 assert!("hello".find('x').is_none());
2406 assert!("hello".find(|c:char| c == 'x').is_none());
2407 assert_eq!("ประเทศไทย中华Việt Nam".find('华'), Some(30u));
2408 assert_eq!("ประเทศไทย中华Việt Nam".find(|c: char| c == '华'), Some(30u));
2413 assert_eq!("hello".rfind('l'), Some(3u));
2414 assert_eq!("hello".rfind(|c:char| c == 'o'), Some(4u));
2415 assert!("hello".rfind('x').is_none());
2416 assert!("hello".rfind(|c:char| c == 'x').is_none());
2417 assert_eq!("ประเทศไทย中华Việt Nam".rfind('华'), Some(30u));
2418 assert_eq!("ประเทศไทย中华Việt Nam".rfind(|c: char| c == '华'), Some(30u));
2422 fn test_push_str() {
2425 assert_eq!(s.slice_from(0), "");
2427 assert_eq!(s.slice_from(0), "abc");
2428 s.push_str("ประเทศไทย中华Việt Nam");
2429 assert_eq!(s.slice_from(0), "abcประเทศไทย中华Việt Nam");
2435 assert_eq!(s.slice_from(0), "");
2436 s = s.append("abc");
2437 assert_eq!(s.slice_from(0), "abc");
2438 s = s.append("ประเทศไทย中华Việt Nam");
2439 assert_eq!(s.slice_from(0), "abcประเทศไทย中华Việt Nam");
2443 fn test_pop_char() {
2444 let mut data = ~"ประเทศไทย中华";
2445 let cc = data.pop_char();
2446 assert_eq!(~"ประเทศไทย中", data);
2447 assert_eq!('华', cc);
2451 fn test_pop_char_2() {
2452 let mut data2 = ~"华";
2453 let cc2 = data2.pop_char();
2454 assert_eq!(~"", data2);
2455 assert_eq!('华', cc2);
2460 #[ignore(cfg(windows))]
2461 fn test_pop_char_fail() {
2463 let _cc3 = data.pop_char();
2467 fn test_push_char() {
2468 let mut data = ~"ประเทศไทย中";
2469 data.push_char('华');
2470 data.push_char('b'); // 1 byte
2471 data.push_char('¢'); // 2 byte
2472 data.push_char('€'); // 3 byte
2473 data.push_char('𤭢'); // 4 byte
2474 assert_eq!(~"ประเทศไทย中华b¢€𤭢", data);
2478 fn test_shift_char() {
2479 let mut data = ~"ประเทศไทย中";
2480 let cc = data.shift_char();
2481 assert_eq!(~"ระเทศไทย中", data);
2482 assert_eq!('ป', cc);
2486 fn test_unshift_char() {
2487 let mut data = ~"ประเทศไทย中";
2488 data.unshift_char('华');
2489 assert_eq!(~"华ประเทศไทย中", data);
2494 let mut empty = ~"";
2496 assert_eq!("", empty.as_slice());
2497 let mut data = ~"ประเทศไทย中";
2499 assert_eq!("", data.as_slice());
2500 data.push_char('华');
2501 assert_eq!("华", data.as_slice());
2505 fn test_split_within() {
2506 fn t(s: &str, i: uint, u: &[~str]) {
2508 for each_split_within(s, i) |s| { v.push(s.to_owned()) }
2509 assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b));
2513 t("hello", 15, [~"hello"]);
2514 t("\nMary had a little lamb\nLittle lamb\n", 15,
2515 [~"Mary had a", ~"little lamb", ~"Little lamb"]);
2516 t("\nMary had a little lamb\nLittle lamb\n", uint::max_value,
2517 [~"Mary had a little lamb\nLittle lamb"]);
2521 fn test_find_str() {
2523 assert_eq!("".find_str(""), Some(0u));
2524 assert!("banana".find_str("apple pie").is_none());
2526 let data = "abcabc";
2527 assert_eq!(data.slice(0u, 6u).find_str("ab"), Some(0u));
2528 assert_eq!(data.slice(2u, 6u).find_str("ab"), Some(3u - 2u));
2529 assert!(data.slice(2u, 4u).find_str("ab").is_none());
2531 let mut data = ~"ประเทศไทย中华Việt Nam";
2533 assert!(data.find_str("ไท华").is_none());
2534 assert_eq!(data.slice(0u, 43u).find_str(""), Some(0u));
2535 assert_eq!(data.slice(6u, 43u).find_str(""), Some(6u - 6u));
2537 assert_eq!(data.slice(0u, 43u).find_str("ประ"), Some( 0u));
2538 assert_eq!(data.slice(0u, 43u).find_str("ทศไ"), Some(12u));
2539 assert_eq!(data.slice(0u, 43u).find_str("ย中"), Some(24u));
2540 assert_eq!(data.slice(0u, 43u).find_str("iệt"), Some(34u));
2541 assert_eq!(data.slice(0u, 43u).find_str("Nam"), Some(40u));
2543 assert_eq!(data.slice(43u, 86u).find_str("ประ"), Some(43u - 43u));
2544 assert_eq!(data.slice(43u, 86u).find_str("ทศไ"), Some(55u - 43u));
2545 assert_eq!(data.slice(43u, 86u).find_str("ย中"), Some(67u - 43u));
2546 assert_eq!(data.slice(43u, 86u).find_str("iệt"), Some(77u - 43u));
2547 assert_eq!(data.slice(43u, 86u).find_str("Nam"), Some(83u - 43u));
2551 fn test_slice_chars() {
2552 fn t(a: &str, b: &str, start: uint) {
2553 assert_eq!(a.slice_chars(start, start + b.char_len()), b);
2555 t("hello", "llo", 2);
2556 t("hello", "el", 1);
2557 assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".slice_chars(2, 8));
2562 fn t(v: &[~str], s: &str) {
2563 assert_eq!(v.concat(), s.to_str());
2565 t([~"you", ~"know", ~"I'm", ~"no", ~"good"], "youknowI'mnogood");
2566 let v: &[~str] = [];
2573 fn t(v: &[~str], sep: &str, s: &str) {
2574 assert_eq!(v.connect(sep), s.to_str());
2576 t([~"you", ~"know", ~"I'm", ~"no", ~"good"],
2577 " ", "you know I'm no good");
2578 let v: &[~str] = [];
2580 t([~"hi"], " ", "hi");
2584 fn test_concat_slices() {
2585 fn t(v: &[&str], s: &str) {
2586 assert_eq!(v.concat(), s.to_str());
2588 t(["you", "know", "I'm", "no", "good"], "youknowI'mnogood");
2589 let v: &[&str] = [];
2595 fn test_connect_slices() {
2596 fn t(v: &[&str], sep: &str, s: &str) {
2597 assert_eq!(v.connect(sep), s.to_str());
2599 t(["you", "know", "I'm", "no", "good"],
2600 " ", "you know I'm no good");
2602 t(["hi"], " ", "hi");
2607 assert_eq!("x".repeat(4), ~"xxxx");
2608 assert_eq!("hi".repeat(4), ~"hihihihi");
2609 assert_eq!("ไท华".repeat(3), ~"ไท华ไท华ไท华");
2610 assert_eq!("".repeat(4), ~"");
2611 assert_eq!("hi".repeat(0), ~"");
2615 fn test_unsafe_slice() {
2616 assert_eq!("ab", unsafe {raw::slice_bytes("abc", 0, 2)});
2617 assert_eq!("bc", unsafe {raw::slice_bytes("abc", 1, 3)});
2618 assert_eq!("", unsafe {raw::slice_bytes("abc", 1, 1)});
2619 fn a_million_letter_a() -> ~str {
2622 while i < 100000 { rs.push_str("aaaaaaaaaa"); i += 1; }
2625 fn half_a_million_letter_a() -> ~str {
2628 while i < 100000 { rs.push_str("aaaaa"); i += 1; }
2631 let letters = a_million_letter_a();
2632 assert!(half_a_million_letter_a() ==
2633 unsafe {raw::slice_bytes(letters, 0u, 500000)}.to_owned());
2637 fn test_starts_with() {
2638 assert!(("".starts_with("")));
2639 assert!(("abc".starts_with("")));
2640 assert!(("abc".starts_with("a")));
2641 assert!((!"a".starts_with("abc")));
2642 assert!((!"".starts_with("abc")));
2646 fn test_ends_with() {
2647 assert!(("".ends_with("")));
2648 assert!(("abc".ends_with("")));
2649 assert!(("abc".ends_with("c")));
2650 assert!((!"a".ends_with("abc")));
2651 assert!((!"".ends_with("abc")));
2655 fn test_is_empty() {
2656 assert!("".is_empty());
2657 assert!(!"a".is_empty());
2663 assert_eq!("".replace(a, "b"), ~"");
2664 assert_eq!("a".replace(a, "b"), ~"b");
2665 assert_eq!("ab".replace(a, "b"), ~"bb");
2667 assert!(" test test ".replace(test, "toast") ==
2669 assert_eq!(" test test ".replace(test, ""), ~" ");
2673 fn test_replace_2a() {
2674 let data = ~"ประเทศไทย中华";
2675 let repl = ~"دولة الكويت";
2678 let A = ~"دولة الكويتทศไทย中华";
2679 assert_eq!(data.replace(a, repl), A);
2683 fn test_replace_2b() {
2684 let data = ~"ประเทศไทย中华";
2685 let repl = ~"دولة الكويت";
2688 let B = ~"ปรدولة الكويتทศไทย中华";
2689 assert_eq!(data.replace(b, repl), B);
2693 fn test_replace_2c() {
2694 let data = ~"ประเทศไทย中华";
2695 let repl = ~"دولة الكويت";
2698 let C = ~"ประเทศไทยدولة الكويت";
2699 assert_eq!(data.replace(c, repl), C);
2703 fn test_replace_2d() {
2704 let data = ~"ประเทศไทย中华";
2705 let repl = ~"دولة الكويت";
2708 assert_eq!(data.replace(d, repl), data);
2713 assert_eq!("ab", "abc".slice(0, 2));
2714 assert_eq!("bc", "abc".slice(1, 3));
2715 assert_eq!("", "abc".slice(1, 1));
2716 assert_eq!("\u65e5", "\u65e5\u672c".slice(0, 3));
2718 let data = "ประเทศไทย中华";
2719 assert_eq!("ป", data.slice(0, 3));
2720 assert_eq!("ร", data.slice(3, 6));
2721 assert_eq!("", data.slice(3, 3));
2722 assert_eq!("华", data.slice(30, 33));
2724 fn a_million_letter_X() -> ~str {
2728 push_str(&mut rs, "华华华华华华华华华华");
2733 fn half_a_million_letter_X() -> ~str {
2736 while i < 100000 { push_str(&mut rs, "华华华华华"); i += 1; }
2739 let letters = a_million_letter_X();
2740 assert!(half_a_million_letter_X() ==
2741 letters.slice(0u, 3u * 500000u).to_owned());
2746 let ss = "中华Việt Nam";
2748 assert_eq!("华", ss.slice(3u, 6u));
2749 assert_eq!("Việt Nam", ss.slice(6u, 16u));
2751 assert_eq!("ab", "abc".slice(0u, 2u));
2752 assert_eq!("bc", "abc".slice(1u, 3u));
2753 assert_eq!("", "abc".slice(1u, 1u));
2755 assert_eq!("中", ss.slice(0u, 3u));
2756 assert_eq!("华V", ss.slice(3u, 7u));
2757 assert_eq!("", ss.slice(3u, 3u));
2772 #[ignore(cfg(windows))]
2773 fn test_slice_fail() {
2774 "中华Việt Nam".slice(0u, 2u);
2778 fn test_slice_from() {
2779 assert_eq!("abcd".slice_from(0), "abcd");
2780 assert_eq!("abcd".slice_from(2), "cd");
2781 assert_eq!("abcd".slice_from(4), "");
2784 fn test_slice_to() {
2785 assert_eq!("abcd".slice_to(0), "");
2786 assert_eq!("abcd".slice_to(2), "ab");
2787 assert_eq!("abcd".slice_to(4), "abcd");
2791 fn test_trim_left_chars() {
2792 let v: &[char] = &[];
2793 assert_eq!(" *** foo *** ".trim_left_chars(&v), " *** foo *** ");
2794 assert_eq!(" *** foo *** ".trim_left_chars(& &['*', ' ']), "foo *** ");
2795 assert_eq!(" *** *** ".trim_left_chars(& &['*', ' ']), "");
2796 assert_eq!("foo *** ".trim_left_chars(& &['*', ' ']), "foo *** ");
2798 assert_eq!("11foo1bar11".trim_left_chars(&'1'), "foo1bar11");
2799 assert_eq!("12foo1bar12".trim_left_chars(& &['1', '2']), "foo1bar12");
2800 assert_eq!("123foo1bar123".trim_left_chars(&|c: char| c.is_digit()), "foo1bar123");
2804 fn test_trim_right_chars() {
2805 let v: &[char] = &[];
2806 assert_eq!(" *** foo *** ".trim_right_chars(&v), " *** foo *** ");
2807 assert_eq!(" *** foo *** ".trim_right_chars(& &['*', ' ']), " *** foo");
2808 assert_eq!(" *** *** ".trim_right_chars(& &['*', ' ']), "");
2809 assert_eq!(" *** foo".trim_right_chars(& &['*', ' ']), " *** foo");
2811 assert_eq!("11foo1bar11".trim_right_chars(&'1'), "11foo1bar");
2812 assert_eq!("12foo1bar12".trim_right_chars(& &['1', '2']), "12foo1bar");
2813 assert_eq!("123foo1bar123".trim_right_chars(&|c: char| c.is_digit()), "123foo1bar");
2817 fn test_trim_chars() {
2818 let v: &[char] = &[];
2819 assert_eq!(" *** foo *** ".trim_chars(&v), " *** foo *** ");
2820 assert_eq!(" *** foo *** ".trim_chars(& &['*', ' ']), "foo");
2821 assert_eq!(" *** *** ".trim_chars(& &['*', ' ']), "");
2822 assert_eq!("foo".trim_chars(& &['*', ' ']), "foo");
2824 assert_eq!("11foo1bar11".trim_chars(&'1'), "foo1bar");
2825 assert_eq!("12foo1bar12".trim_chars(& &['1', '2']), "foo1bar");
2826 assert_eq!("123foo1bar123".trim_chars(&|c: char| c.is_digit()), "foo1bar");
2830 fn test_trim_left() {
2831 assert_eq!("".trim_left(), "");
2832 assert_eq!("a".trim_left(), "a");
2833 assert_eq!(" ".trim_left(), "");
2834 assert_eq!(" blah".trim_left(), "blah");
2835 assert_eq!(" \u3000 wut".trim_left(), "wut");
2836 assert_eq!("hey ".trim_left(), "hey ");
2840 fn test_trim_right() {
2841 assert_eq!("".trim_right(), "");
2842 assert_eq!("a".trim_right(), "a");
2843 assert_eq!(" ".trim_right(), "");
2844 assert_eq!("blah ".trim_right(), "blah");
2845 assert_eq!("wut \u3000 ".trim_right(), "wut");
2846 assert_eq!(" hey".trim_right(), " hey");
2851 assert_eq!("".trim(), "");
2852 assert_eq!("a".trim(), "a");
2853 assert_eq!(" ".trim(), "");
2854 assert_eq!(" blah ".trim(), "blah");
2855 assert_eq!("\nwut \u3000 ".trim(), "wut");
2856 assert_eq!(" hey dude ".trim(), "hey dude");
2860 fn test_is_whitespace() {
2861 assert!("".is_whitespace());
2862 assert!(" ".is_whitespace());
2863 assert!("\u2009".is_whitespace()); // Thin space
2864 assert!(" \n\t ".is_whitespace());
2865 assert!(!" _ ".is_whitespace());
2869 fn test_shift_byte() {
2871 let b = unsafe{raw::shift_byte(&mut s)};
2872 assert_eq!(s, ~"BC");
2873 assert_eq!(b, 65u8);
2877 fn test_pop_byte() {
2879 let b = unsafe{raw::pop_byte(&mut s)};
2880 assert_eq!(s, ~"AB");
2881 assert_eq!(b, 67u8);
2885 fn test_unsafe_from_bytes() {
2886 let a = ~[65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8];
2887 let b = unsafe { raw::from_bytes(a) };
2888 assert_eq!(b, ~"AAAAAAA");
2892 fn test_from_bytes() {
2893 let ss = ~"ศไทย中华Việt Nam";
2894 let bb = ~[0xe0_u8, 0xb8_u8, 0xa8_u8,
2895 0xe0_u8, 0xb9_u8, 0x84_u8,
2896 0xe0_u8, 0xb8_u8, 0x97_u8,
2897 0xe0_u8, 0xb8_u8, 0xa2_u8,
2898 0xe4_u8, 0xb8_u8, 0xad_u8,
2899 0xe5_u8, 0x8d_u8, 0x8e_u8,
2900 0x56_u8, 0x69_u8, 0xe1_u8,
2901 0xbb_u8, 0x87_u8, 0x74_u8,
2902 0x20_u8, 0x4e_u8, 0x61_u8,
2905 assert_eq!(ss, from_bytes(bb));
2909 #[ignore(cfg(windows))]
2910 fn test_from_bytes_fail() {
2911 use str::not_utf8::cond;
2913 let bb = ~[0xff_u8, 0xb8_u8, 0xa8_u8,
2914 0xe0_u8, 0xb9_u8, 0x84_u8,
2915 0xe0_u8, 0xb8_u8, 0x97_u8,
2916 0xe0_u8, 0xb8_u8, 0xa2_u8,
2917 0xe4_u8, 0xb8_u8, 0xad_u8,
2918 0xe5_u8, 0x8d_u8, 0x8e_u8,
2919 0x56_u8, 0x69_u8, 0xe1_u8,
2920 0xbb_u8, 0x87_u8, 0x74_u8,
2921 0x20_u8, 0x4e_u8, 0x61_u8,
2924 let mut error_happened = false;
2925 let _x = do cond.trap(|err| {
2926 assert_eq!(err, ~"from_bytes: input is not UTF-8; first bad byte is 255");
2927 error_happened = true;
2932 assert!(error_happened);
2936 fn test_unsafe_from_bytes_with_null() {
2937 let a = [65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8];
2938 let b = unsafe { raw::from_bytes_with_null(a) };
2939 assert_eq!(b, "AAAAAAA");
2943 fn test_from_bytes_with_null() {
2944 let ss = "ศไทย中华Việt Nam";
2945 let bb = [0xe0_u8, 0xb8_u8, 0xa8_u8,
2946 0xe0_u8, 0xb9_u8, 0x84_u8,
2947 0xe0_u8, 0xb8_u8, 0x97_u8,
2948 0xe0_u8, 0xb8_u8, 0xa2_u8,
2949 0xe4_u8, 0xb8_u8, 0xad_u8,
2950 0xe5_u8, 0x8d_u8, 0x8e_u8,
2951 0x56_u8, 0x69_u8, 0xe1_u8,
2952 0xbb_u8, 0x87_u8, 0x74_u8,
2953 0x20_u8, 0x4e_u8, 0x61_u8,
2956 assert_eq!(ss, from_bytes_with_null(bb));
2961 #[ignore(cfg(windows))]
2962 fn test_from_bytes_with_null_fail() {
2963 let bb = [0xff_u8, 0xb8_u8, 0xa8_u8,
2964 0xe0_u8, 0xb9_u8, 0x84_u8,
2965 0xe0_u8, 0xb8_u8, 0x97_u8,
2966 0xe0_u8, 0xb8_u8, 0xa2_u8,
2967 0xe4_u8, 0xb8_u8, 0xad_u8,
2968 0xe5_u8, 0x8d_u8, 0x8e_u8,
2969 0x56_u8, 0x69_u8, 0xe1_u8,
2970 0xbb_u8, 0x87_u8, 0x74_u8,
2971 0x20_u8, 0x4e_u8, 0x61_u8,
2974 let _x = from_bytes_with_null(bb);
2979 #[ignore(cfg(windows))]
2980 fn test_from_bytes_with_null_fail_2() {
2981 let bb = [0xff_u8, 0xb8_u8, 0xa8_u8,
2982 0xe0_u8, 0xb9_u8, 0x84_u8,
2983 0xe0_u8, 0xb8_u8, 0x97_u8,
2984 0xe0_u8, 0xb8_u8, 0xa2_u8,
2985 0xe4_u8, 0xb8_u8, 0xad_u8,
2986 0xe5_u8, 0x8d_u8, 0x8e_u8,
2987 0x56_u8, 0x69_u8, 0xe1_u8,
2988 0xbb_u8, 0x87_u8, 0x74_u8,
2989 0x20_u8, 0x4e_u8, 0x61_u8,
2992 let _x = from_bytes_with_null(bb);
2996 fn test_from_buf() {
2998 let a = ~[65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8];
2999 let b = vec::raw::to_ptr(a);
3000 let c = raw::from_buf(b);
3001 assert_eq!(c, ~"AAAAAAA");
3006 fn test_as_bytes() {
3009 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
3010 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
3013 assert_eq!("".as_bytes(), &[]);
3014 assert_eq!("abc".as_bytes(), &['a' as u8, 'b' as u8, 'c' as u8]);
3015 assert_eq!("ศไทย中华Việt Nam".as_bytes(), v);
3019 fn test_as_bytes_with_null() {
3022 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
3023 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
3029 let s3 = @"ศไทย中华Việt Nam";
3030 assert_eq!(s1.as_bytes_with_null(), &[0]);
3031 assert_eq!(s2.as_bytes_with_null(), &['a' as u8, 'b' as u8, 'c' as u8, 0]);
3032 assert_eq!(s3.as_bytes_with_null(), v);
3036 let s3 = ~"ศไทย中华Việt Nam";
3037 assert_eq!(s1.as_bytes_with_null(), &[0]);
3038 assert_eq!(s2.as_bytes_with_null(), &['a' as u8, 'b' as u8, 'c' as u8, 0]);
3039 assert_eq!(s3.as_bytes_with_null(), v);
3043 fn test_to_bytes_with_null() {
3044 let s = ~"ศไทย中华Việt Nam";
3046 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
3047 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
3050 assert_eq!((~"").to_bytes_with_null(), ~[0]);
3051 assert_eq!((~"abc").to_bytes_with_null(),
3052 ~['a' as u8, 'b' as u8, 'c' as u8, 0]);
3053 assert_eq!(s.to_bytes_with_null(), v);
3057 #[ignore(cfg(windows))]
3059 fn test_as_bytes_fail() {
3060 // Don't double free. (I'm not sure if this exercises the
3061 // original problem code path anymore.)
3063 let _bytes = s.as_bytes_with_null();
3068 fn test_as_imm_buf() {
3069 do "".as_imm_buf |buf, len| {
3072 assert_eq!(*ptr::offset(buf, 0), 0);
3076 do "hello".as_imm_buf |buf, len| {
3079 assert_eq!(*ptr::offset(buf, 0), 'h' as u8);
3080 assert_eq!(*ptr::offset(buf, 1), 'e' as u8);
3081 assert_eq!(*ptr::offset(buf, 2), 'l' as u8);
3082 assert_eq!(*ptr::offset(buf, 3), 'l' as u8);
3083 assert_eq!(*ptr::offset(buf, 4), 'o' as u8);
3084 assert_eq!(*ptr::offset(buf, 5), 0);
3090 fn test_as_c_str() {
3092 do a.as_c_str |buf| {
3094 assert_eq!(*ptr::offset(buf, 0), 0);
3099 do a.as_c_str |buf| {
3101 assert_eq!(*ptr::offset(buf, 0), 'h' as libc::c_char);
3102 assert_eq!(*ptr::offset(buf, 1), 'e' as libc::c_char);
3103 assert_eq!(*ptr::offset(buf, 2), 'l' as libc::c_char);
3104 assert_eq!(*ptr::offset(buf, 3), 'l' as libc::c_char);
3105 assert_eq!(*ptr::offset(buf, 4), 'o' as libc::c_char);
3106 assert_eq!(*ptr::offset(buf, 5), 0);
3112 fn test_subslice_offset() {
3113 let a = "kernelsprite";
3114 let b = a.slice(7, a.len());
3115 let c = a.slice(0, a.len() - 6);
3116 assert_eq!(a.subslice_offset(b), 7);
3117 assert_eq!(a.subslice_offset(c), 0);
3119 let string = "a\nb\nc";
3120 let mut lines = ~[];
3121 for string.line_iter().advance |line| { lines.push(line) }
3122 assert_eq!(string.subslice_offset(lines[0]), 0);
3123 assert_eq!(string.subslice_offset(lines[1]), 2);
3124 assert_eq!(string.subslice_offset(lines[2]), 4);
3129 fn test_subslice_offset_2() {
3130 let a = "alchemiter";
3131 let b = "cruxtruder";
3132 a.subslice_offset(b);
3136 fn vec_str_conversions() {
3137 let s1: ~str = ~"All mimsy were the borogoves";
3139 let v: ~[u8] = s1.as_bytes().to_owned();
3140 let s2: ~str = from_bytes(v);
3141 let mut i: uint = 0u;
3142 let n1: uint = s1.len();
3143 let n2: uint = v.len();
3156 fn test_contains() {
3157 assert!("abcde".contains("bcd"));
3158 assert!("abcde".contains("abcd"));
3159 assert!("abcde".contains("bcde"));
3160 assert!("abcde".contains(""));
3161 assert!("".contains(""));
3162 assert!(!"abcde".contains("def"));
3163 assert!(!"".contains("a"));
3165 let data = ~"ประเทศไทย中华Việt Nam";
3166 assert!(data.contains("ประเ"));
3167 assert!(data.contains("ะเ"));
3168 assert!(data.contains("中华"));
3169 assert!(!data.contains("ไท华"));
3173 fn test_contains_char() {
3174 assert!("abc".contains_char('b'));
3175 assert!("a".contains_char('a'));
3176 assert!(!"abc".contains_char('d'));
3177 assert!(!"".contains_char('a'));
3182 assert_eq!(~"", "".map_chars(|c| unsafe {libc::toupper(c as c_char)} as char));
3183 assert_eq!(~"YMCA", "ymca".map_chars(|c| unsafe {libc::toupper(c as c_char)} as char));
3190 ~[0xd800_u16, 0xdf45_u16, 0xd800_u16, 0xdf3f_u16,
3191 0xd800_u16, 0xdf3b_u16, 0xd800_u16, 0xdf46_u16,
3192 0xd800_u16, 0xdf39_u16, 0xd800_u16, 0xdf3b_u16,
3193 0xd800_u16, 0xdf30_u16, 0x000a_u16]),
3196 ~[0xd801_u16, 0xdc12_u16, 0xd801_u16,
3197 0xdc49_u16, 0xd801_u16, 0xdc2e_u16, 0xd801_u16,
3198 0xdc40_u16, 0xd801_u16, 0xdc32_u16, 0xd801_u16,
3199 0xdc4b_u16, 0x0020_u16, 0xd801_u16, 0xdc0f_u16,
3200 0xd801_u16, 0xdc32_u16, 0xd801_u16, 0xdc4d_u16,
3203 (~"𐌀𐌖𐌋𐌄𐌑𐌉·𐌌𐌄𐌕𐌄𐌋𐌉𐌑\n",
3204 ~[0xd800_u16, 0xdf00_u16, 0xd800_u16, 0xdf16_u16,
3205 0xd800_u16, 0xdf0b_u16, 0xd800_u16, 0xdf04_u16,
3206 0xd800_u16, 0xdf11_u16, 0xd800_u16, 0xdf09_u16,
3207 0x00b7_u16, 0xd800_u16, 0xdf0c_u16, 0xd800_u16,
3208 0xdf04_u16, 0xd800_u16, 0xdf15_u16, 0xd800_u16,
3209 0xdf04_u16, 0xd800_u16, 0xdf0b_u16, 0xd800_u16,
3210 0xdf09_u16, 0xd800_u16, 0xdf11_u16, 0x000a_u16 ]),
3212 (~"𐒋𐒘𐒈𐒑𐒛𐒒 𐒕𐒓 𐒈𐒚𐒍 𐒏𐒜𐒒𐒖𐒆 𐒕𐒆\n",
3213 ~[0xd801_u16, 0xdc8b_u16, 0xd801_u16, 0xdc98_u16,
3214 0xd801_u16, 0xdc88_u16, 0xd801_u16, 0xdc91_u16,
3215 0xd801_u16, 0xdc9b_u16, 0xd801_u16, 0xdc92_u16,
3216 0x0020_u16, 0xd801_u16, 0xdc95_u16, 0xd801_u16,
3217 0xdc93_u16, 0x0020_u16, 0xd801_u16, 0xdc88_u16,
3218 0xd801_u16, 0xdc9a_u16, 0xd801_u16, 0xdc8d_u16,
3219 0x0020_u16, 0xd801_u16, 0xdc8f_u16, 0xd801_u16,
3220 0xdc9c_u16, 0xd801_u16, 0xdc92_u16, 0xd801_u16,
3221 0xdc96_u16, 0xd801_u16, 0xdc86_u16, 0x0020_u16,
3222 0xd801_u16, 0xdc95_u16, 0xd801_u16, 0xdc86_u16,
3225 for pairs.iter().advance |p| {
3226 let (s, u) = (*p).clone();
3227 assert!(s.to_utf16() == u);
3228 assert!(from_utf16(u) == s);
3229 assert!(from_utf16(s.to_utf16()) == s);
3230 assert!(from_utf16(u).to_utf16() == u);
3236 let s = ~"ศไทย中华Việt Nam";
3237 let v = ~['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
3239 for v.iter().advance |ch| {
3240 assert!(s.char_at(pos) == *ch);
3241 pos += from_char(*ch).len();
3246 fn test_char_at_reverse() {
3247 let s = ~"ศไทย中华Việt Nam";
3248 let v = ~['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
3249 let mut pos = s.len();
3250 for v.rev_iter().advance |ch| {
3251 assert!(s.char_at_reverse(pos) == *ch);
3252 pos -= from_char(*ch).len();
3257 fn test_escape_unicode() {
3258 assert_eq!("abc".escape_unicode(), ~"\\x61\\x62\\x63");
3259 assert_eq!("a c".escape_unicode(), ~"\\x61\\x20\\x63");
3260 assert_eq!("\r\n\t".escape_unicode(), ~"\\x0d\\x0a\\x09");
3261 assert_eq!("'\"\\".escape_unicode(), ~"\\x27\\x22\\x5c");
3262 assert_eq!("\x00\x01\xfe\xff".escape_unicode(), ~"\\x00\\x01\\xfe\\xff");
3263 assert_eq!("\u0100\uffff".escape_unicode(), ~"\\u0100\\uffff");
3264 assert_eq!("\U00010000\U0010ffff".escape_unicode(), ~"\\U00010000\\U0010ffff");
3265 assert_eq!("ab\ufb00".escape_unicode(), ~"\\x61\\x62\\ufb00");
3266 assert_eq!("\U0001d4ea\r".escape_unicode(), ~"\\U0001d4ea\\x0d");
3270 fn test_escape_default() {
3271 assert_eq!("abc".escape_default(), ~"abc");
3272 assert_eq!("a c".escape_default(), ~"a c");
3273 assert_eq!("\r\n\t".escape_default(), ~"\\r\\n\\t");
3274 assert_eq!("'\"\\".escape_default(), ~"\\'\\\"\\\\");
3275 assert_eq!("\u0100\uffff".escape_default(), ~"\\u0100\\uffff");
3276 assert_eq!("\U00010000\U0010ffff".escape_default(), ~"\\U00010000\\U0010ffff");
3277 assert_eq!("ab\ufb00".escape_default(), ~"ab\\ufb00");
3278 assert_eq!("\U0001d4ea\r".escape_default(), ~"\\U0001d4ea\\r");
3282 fn test_to_managed() {
3283 assert_eq!("abc".to_managed(), @"abc");
3284 assert_eq!("abcdef".slice(1, 5).to_managed(), @"bcde");
3288 fn test_total_ord() {
3289 "1234".cmp(& &"123") == Greater;
3290 "123".cmp(& &"1234") == Less;
3291 "1234".cmp(& &"1234") == Equal;
3292 "12345555".cmp(& &"123456") == Less;
3293 "22".cmp(& &"1234") == Greater;
3297 fn test_char_range_at() {
3298 let data = ~"b¢€𤭢𤭢€¢b";
3299 assert_eq!('b', data.char_range_at(0).ch);
3300 assert_eq!('¢', data.char_range_at(1).ch);
3301 assert_eq!('€', data.char_range_at(3).ch);
3302 assert_eq!('𤭢', data.char_range_at(6).ch);
3303 assert_eq!('𤭢', data.char_range_at(10).ch);
3304 assert_eq!('€', data.char_range_at(14).ch);
3305 assert_eq!('¢', data.char_range_at(17).ch);
3306 assert_eq!('b', data.char_range_at(19).ch);
3310 fn test_char_range_at_reverse_underflow() {
3311 assert_eq!("abc".char_range_at_reverse(0).next, 0);
3316 #[allow(unnecessary_allocation)];
3318 ($s1:expr, $s2:expr, $e:expr) => {
3319 assert_eq!($s1 + $s2, $e);
3320 assert_eq!($s1.to_owned() + $s2, $e);
3321 assert_eq!($s1.to_managed() + $s2, $e);
3325 t!("foo", "bar", ~"foobar");
3326 t!("foo", @"bar", ~"foobar");
3327 t!("foo", ~"bar", ~"foobar");
3328 t!("ศไทย中", "华Việt Nam", ~"ศไทย中华Việt Nam");
3329 t!("ศไทย中", @"华Việt Nam", ~"ศไทย中华Việt Nam");
3330 t!("ศไทย中", ~"华Việt Nam", ~"ศไทย中华Việt Nam");
3334 fn test_iterator() {
3336 let s = ~"ศไทย中华Việt Nam";
3337 let v = ~['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
3340 let mut it = s.iter();
3342 for it.advance |c| {
3343 assert_eq!(c, v[pos]);
3346 assert_eq!(pos, v.len());
3350 fn test_rev_iterator() {
3352 let s = ~"ศไทย中华Việt Nam";
3353 let v = ~['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
3356 let mut it = s.rev_iter();
3358 for it.advance |c| {
3359 assert_eq!(c, v[pos]);
3362 assert_eq!(pos, v.len());
3366 fn test_bytes_iterator() {
3367 let s = ~"ศไทย中华Việt Nam";
3369 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
3370 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
3375 for s.bytes_iter().advance |b| {
3376 assert_eq!(b, v[pos]);
3382 fn test_bytes_rev_iterator() {
3383 let s = ~"ศไทย中华Việt Nam";
3385 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
3386 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
3389 let mut pos = v.len();
3391 for s.bytes_rev_iter().advance |b| {
3393 assert_eq!(b, v[pos]);
3398 fn test_split_char_iterator() {
3399 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
3401 let split: ~[&str] = data.split_iter(' ').collect();
3402 assert_eq!(split, ~["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
3404 let split: ~[&str] = data.split_iter(|c: char| c == ' ').collect();
3405 assert_eq!(split, ~["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
3408 let split: ~[&str] = data.split_iter('ä').collect();
3409 assert_eq!(split, ~["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
3411 let split: ~[&str] = data.split_iter(|c: char| c == 'ä').collect();
3412 assert_eq!(split, ~["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
3415 fn test_splitn_char_iterator() {
3416 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
3418 let split: ~[&str] = data.splitn_iter(' ', 3).collect();
3419 assert_eq!(split, ~["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
3421 let split: ~[&str] = data.splitn_iter(|c: char| c == ' ', 3).collect();
3422 assert_eq!(split, ~["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
3425 let split: ~[&str] = data.splitn_iter('ä', 3).collect();
3426 assert_eq!(split, ~["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
3428 let split: ~[&str] = data.splitn_iter(|c: char| c == 'ä', 3).collect();
3429 assert_eq!(split, ~["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
3433 fn test_split_char_iterator_no_trailing() {
3434 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
3436 let split: ~[&str] = data.split_options_iter('\n', 1000, true).collect();
3437 assert_eq!(split, ~["", "Märy häd ä little lämb", "Little lämb", ""]);
3439 let split: ~[&str] = data.split_options_iter('\n', 1000, false).collect();
3440 assert_eq!(split, ~["", "Märy häd ä little lämb", "Little lämb"]);
3444 fn test_word_iter() {
3445 let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n";
3446 let words: ~[&str] = data.word_iter().collect();
3447 assert_eq!(words, ~["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
3451 fn test_line_iter() {
3452 let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
3453 let lines: ~[&str] = data.line_iter().collect();
3454 assert_eq!(lines, ~["", "Märy häd ä little lämb", "", "Little lämb"]);
3456 let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
3457 let lines: ~[&str] = data.line_iter().collect();
3458 assert_eq!(lines, ~["", "Märy häd ä little lämb", "", "Little lämb"]);
3462 fn test_split_str_iterator() {
3463 fn t<'a>(s: &str, sep: &'a str, u: ~[&str]) {
3464 let v: ~[&str] = s.split_str_iter(sep).collect();
3467 t("--1233345--", "12345", ~["--1233345--"]);
3468 t("abc::hello::there", "::", ~["abc", "hello", "there"]);
3469 t("::hello::there", "::", ~["", "hello", "there"]);
3470 t("hello::there::", "::", ~["hello", "there", ""]);
3471 t("::hello::there::", "::", ~["", "hello", "there", ""]);
3472 t("ประเทศไทย中华Việt Nam", "中华", ~["ประเทศไทย", "Việt Nam"]);
3473 t("zzXXXzzYYYzz", "zz", ~["", "XXX", "YYY", ""]);
3474 t("zzXXXzYYYz", "XXX", ~["zz", "zYYYz"]);
3475 t(".XXX.YYY.", ".", ~["", "XXX", "YYY", ""]);
3477 t("zz", "zz", ~["",""]);
3478 t("ok", "z", ~["ok"]);
3479 t("zzz", "zz", ~["","z"]);
3480 t("zzzzz", "zz", ~["","","z"]);
3484 fn test_str_zero() {
3486 fn t<S: Zero + Str>() {
3487 let s: S = Zero::zero();
3488 assert_eq!(s.as_slice(), "");
3489 assert!(s.is_zero());
3498 fn test_str_container() {
3499 fn sum_len<S: Container>(v: &[S]) -> uint {
3500 v.iter().transform(|x| x.len()).sum()
3504 assert_eq!(5, sum_len(["012", "", "34"]));
3505 assert_eq!(5, sum_len([@"01", @"2", @"34", @""]));
3506 assert_eq!(5, sum_len([~"01", ~"2", ~"34", ~""]));
3507 assert_eq!(5, sum_len([s.as_slice()]));