1 // Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
14 * Strings are a packed UTF-8 representation of text, stored as null
15 * terminated buffers of u8 bytes. Strings should be indexed in bytes,
16 * for efficiency, but UTF-8 unsafe operations should be avoided.
24 use container::{Container, Mutable};
26 use iterator::{Iterator, IteratorUtil, FilterIterator, AdditiveIterator, MapIterator};
29 use option::{None, Option, Some};
35 use vec::{OwnedVector, OwnedCopyableVector, ImmutableVector, MutableVector};
41 not_utf8: (~str) -> ~str;
45 Section: Creating a string
49 * Convert a vector of bytes to a new UTF-8 string
53 * Raises the `not_utf8` condition if invalid UTF-8
55 pub fn from_bytes(vv: &[u8]) -> ~str {
56 use str::not_utf8::cond;
59 let first_bad_byte = *vv.iter().find_(|&b| !is_utf8([*b])).get();
60 cond.raise(fmt!("from_bytes: input is not UTF-8; first bad byte is %u",
61 first_bad_byte as uint))
64 return unsafe { raw::from_bytes(vv) }
69 * Consumes a vector of bytes to create a new utf-8 string
73 * Raises the `not_utf8` condition if invalid UTF-8
75 pub fn from_bytes_owned(vv: ~[u8]) -> ~str {
76 use str::not_utf8::cond;
79 let first_bad_byte = *vv.iter().find_(|&b| !is_utf8([*b])).get();
80 cond.raise(fmt!("from_bytes: input is not UTF-8; first bad byte is %u",
81 first_bad_byte as uint))
83 return unsafe { raw::from_bytes_owned(vv) }
88 * Convert a vector of bytes to a UTF-8 string.
89 * The vector needs to be one byte longer than the string, and end with a 0 byte.
91 * Compared to `from_bytes()`, this fn doesn't need to allocate a new owned str.
95 * Fails if invalid UTF-8
96 * Fails if not null terminated
98 pub fn from_bytes_with_null<'a>(vv: &'a [u8]) -> &'a str {
99 assert_eq!(vv[vv.len() - 1], 0);
100 assert!(is_utf8(vv));
101 return unsafe { raw::from_bytes_with_null(vv) };
105 * Converts a vector to a string slice without performing any allocations.
107 * Once the slice has been validated as utf-8, it is transmuted in-place and
108 * returned as a '&str' instead of a '&[u8]'
112 * Fails if invalid UTF-8
114 pub fn from_bytes_slice<'a>(vector: &'a [u8]) -> &'a str {
116 assert!(is_utf8(vector));
117 let (ptr, len): (*u8, uint) = ::cast::transmute(vector);
118 let string: &'a str = ::cast::transmute((ptr, len + 1));
123 impl ToStr for ~str {
125 fn to_str(&self) -> ~str { self.to_owned() }
127 impl<'self> ToStr for &'self str {
129 fn to_str(&self) -> ~str { self.to_owned() }
131 impl ToStr for @str {
133 fn to_str(&self) -> ~str { self.to_owned() }
137 * Convert a byte to a UTF-8 string
141 * Fails if invalid UTF-8
143 pub fn from_byte(b: u8) -> ~str {
145 unsafe { ::cast::transmute(~[b, 0u8]) }
148 /// Convert a char to a string
149 pub fn from_char(ch: char) -> ~str {
155 /// Convert a vector of chars to a string
156 pub fn from_chars(chs: &[char]) -> ~str {
158 buf.reserve(chs.len());
159 for chs.iter().advance |ch| {
166 pub fn push_str(lhs: &mut ~str, rhs: &str) {
170 #[allow(missing_doc)]
171 pub trait StrVector {
172 pub fn concat(&self) -> ~str;
173 pub fn connect(&self, sep: &str) -> ~str;
176 impl<'self, S: Str> StrVector for &'self [S] {
177 /// Concatenate a vector of strings.
178 pub fn concat(&self) -> ~str {
179 if self.is_empty() { return ~""; }
181 let len = self.iter().transform(|s| s.as_slice().len()).sum();
183 let mut s = with_capacity(len);
186 do s.as_mut_buf |buf, _| {
188 for self.iter().advance |ss| {
189 do ss.as_slice().as_imm_buf |ssbuf, sslen| {
190 let sslen = sslen - 1;
191 ptr::copy_memory(buf, ssbuf, sslen);
192 buf = buf.offset(sslen);
196 raw::set_len(&mut s, len);
201 /// Concatenate a vector of strings, placing a given separator between each.
202 pub fn connect(&self, sep: &str) -> ~str {
203 if self.is_empty() { return ~""; }
206 if sep.is_empty() { return self.concat(); }
208 // this is wrong without the guarantee that `self` is non-empty
209 let len = sep.len() * (self.len() - 1)
210 + self.iter().transform(|s| s.as_slice().len()).sum();
212 let mut first = true;
217 do s.as_mut_buf |buf, _| {
218 do sep.as_imm_buf |sepbuf, seplen| {
219 let seplen = seplen - 1;
220 let mut buf = ::cast::transmute_mut_unsafe(buf);
221 for self.iter().advance |ss| {
222 do ss.as_slice().as_imm_buf |ssbuf, sslen| {
223 let sslen = sslen - 1;
227 ptr::copy_memory(buf, sepbuf, seplen);
228 buf = buf.offset(seplen);
230 ptr::copy_memory(buf, ssbuf, sslen);
231 buf = buf.offset(sslen);
236 raw::set_len(&mut s, len);
242 /// Something that can be used to compare against a character
244 /// Determine if the splitter should split at the given character
245 fn matches(&self, char) -> bool;
246 /// Indicate if this is only concerned about ASCII characters,
247 /// which can allow for a faster implementation.
248 fn only_ascii(&self) -> bool;
250 impl CharEq for char {
252 fn matches(&self, c: char) -> bool { *self == c }
254 fn only_ascii(&self) -> bool { (*self as uint) < 128 }
256 impl<'self> CharEq for &'self fn(char) -> bool {
258 fn matches(&self, c: char) -> bool { (*self)(c) }
260 fn only_ascii(&self) -> bool { false }
262 impl CharEq for extern "Rust" fn(char) -> bool {
264 fn matches(&self, c: char) -> bool { (*self)(c) }
266 fn only_ascii(&self) -> bool { false }
269 impl<'self, C: CharEq> CharEq for &'self [C] {
271 fn matches(&self, c: char) -> bool {
272 self.iter().any(|m| m.matches(c))
275 fn only_ascii(&self) -> bool {
276 self.iter().all(|m| m.only_ascii())
281 /// An iterator over the substrings of a string, separated by `sep`.
283 pub struct StrCharSplitIterator<'self,Sep> {
284 priv string: &'self str,
287 /// The number of splits remaining
289 /// Whether an empty string at the end is allowed
290 priv allow_trailing_empty: bool,
292 priv only_ascii: bool
295 /// An iterator over the words of a string, separated by an sequence of whitespace
296 pub type WordIterator<'self> =
297 FilterIterator<'self, &'self str,
298 StrCharSplitIterator<'self, extern "Rust" fn(char) -> bool>>;
300 /// An iterator over the lines of a string, separated by either `\n` or (`\r\n`).
301 pub type AnyLineIterator<'self> =
302 MapIterator<'self, &'self str, &'self str, StrCharSplitIterator<'self, char>>;
304 impl<'self, Sep: CharEq> Iterator<&'self str> for StrCharSplitIterator<'self, Sep> {
306 fn next(&mut self) -> Option<&'self str> {
307 if self.finished { return None }
309 let l = self.string.len();
310 let start = self.position;
313 // this gives a *huge* speed up for splitting on ASCII
314 // characters (e.g. '\n' or ' ')
315 while self.position < l && self.count > 0 {
316 let byte = self.string[self.position];
318 if self.sep.matches(byte as char) {
319 let slice = unsafe { raw::slice_bytes(self.string, start, self.position) };
327 while self.position < l && self.count > 0 {
328 let CharRange {ch, next} = self.string.char_range_at(self.position);
330 if self.sep.matches(ch) {
331 let slice = unsafe { raw::slice_bytes(self.string, start, self.position) };
332 self.position = next;
336 self.position = next;
339 self.finished = true;
340 if self.allow_trailing_empty || start < l {
341 Some(unsafe { raw::slice_bytes(self.string, start, l) })
348 /// An iterator over the start and end indicies of the matches of a
349 /// substring within a larger string
351 pub struct StrMatchesIndexIterator<'self> {
352 priv haystack: &'self str,
353 priv needle: &'self str,
357 /// An iterator over the substrings of a string separated by a given
360 pub struct StrStrSplitIterator<'self> {
361 priv it: StrMatchesIndexIterator<'self>,
366 impl<'self> Iterator<(uint, uint)> for StrMatchesIndexIterator<'self> {
368 fn next(&mut self) -> Option<(uint, uint)> {
369 // See Issue #1932 for why this is a naive search
370 let (h_len, n_len) = (self.haystack.len(), self.needle.len());
371 let mut match_start = 0;
374 while self.position < h_len {
375 if self.haystack[self.position] == self.needle[match_i] {
376 if match_i == 0 { match_start = self.position; }
380 if match_i == n_len {
382 return Some((match_start, self.position));
385 // failed match, backtrack
388 self.position = match_start;
397 impl<'self> Iterator<&'self str> for StrStrSplitIterator<'self> {
399 fn next(&mut self) -> Option<&'self str> {
400 if self.finished { return None; }
402 match self.it.next() {
403 Some((from, to)) => {
404 let ret = Some(self.it.haystack.slice(self.last_end, from));
409 self.finished = true;
410 Some(self.it.haystack.slice(self.last_end, self.it.haystack.len()))
416 /** Splits a string into substrings with possibly internal whitespace,
417 * each of them at most `lim` bytes long. The substrings have leading and trailing
418 * whitespace removed, and are only cut at whitespace boundaries.
422 * Fails during iteration if the string contains a non-whitespace
423 * sequence longer than the limit.
425 pub fn each_split_within<'a>(ss: &'a str,
427 it: &fn(&'a str) -> bool) -> bool {
428 // Just for fun, let's write this as an state machine:
430 enum SplitWithinState {
431 A, // leading whitespace, initial state
433 C, // internal and trailing whitespace
436 Ws, // current char is whitespace
437 Cr // current char is not whitespace
440 UnderLim, // current char makes current substring still fit in limit
441 OverLim // current char makes current substring no longer fit in limit
444 let mut slice_start = 0;
445 let mut last_start = 0;
446 let mut last_end = 0;
448 let mut fake_i = ss.len();
452 let slice: &fn() = || { cont = it(ss.slice(slice_start, last_end)) };
454 // if the limit is larger than the string, lower it to save cycles
459 let machine: &fn((uint, char)) -> bool = |(i, c)| {
460 let whitespace = if char::is_whitespace(c) { Ws } else { Cr };
461 let limit = if (i - slice_start + 1) <= lim { UnderLim } else { OverLim };
463 state = match (state, whitespace, limit) {
465 (A, Cr, _) => { slice_start = i; last_start = i; B }
467 (B, Cr, UnderLim) => { B }
468 (B, Cr, OverLim) if (i - last_start + 1) > lim
469 => fail!("word starting with %? longer than limit!",
470 ss.slice(last_start, i + 1)),
471 (B, Cr, OverLim) => { slice(); slice_start = last_start; B }
472 (B, Ws, UnderLim) => { last_end = i; C }
473 (B, Ws, OverLim) => { last_end = i; slice(); A }
475 (C, Cr, UnderLim) => { last_start = i; B }
476 (C, Cr, OverLim) => { slice(); slice_start = i; last_start = i; last_end = i; B }
477 (C, Ws, OverLim) => { slice(); A }
478 (C, Ws, UnderLim) => { C }
484 ss.iter().enumerate().advance(|x| machine(x));
486 // Let the automaton 'run out' by supplying trailing whitespace
487 while cont && match state { B | C => true, A => false } {
488 machine((fake_i, ' '));
495 * Replace all occurrences of one string with another
499 * * s - The string containing substrings to replace
500 * * from - The string to replace
501 * * to - The replacement string
505 * The original string with all occurances of `from` replaced with `to`
507 pub fn replace(s: &str, from: &str, to: &str) -> ~str {
508 let mut result = ~"";
509 let mut last_end = 0;
510 for s.matches_index_iter(from).advance |(start, end)| {
511 result.push_str(unsafe{raw::slice_bytes(s, last_end, start)});
515 result.push_str(unsafe{raw::slice_bytes(s, last_end, s.len())});
520 Section: Comparing strings
523 /// Bytewise slice equality
527 pub fn eq_slice(a: &str, b: &str) -> bool {
528 do a.as_imm_buf |ap, alen| {
529 do b.as_imm_buf |bp, blen| {
530 if (alen != blen) { false }
533 libc::memcmp(ap as *libc::c_void,
535 (alen - 1) as libc::size_t) == 0
544 pub fn eq_slice(a: &str, b: &str) -> bool {
545 do a.as_imm_buf |ap, alen| {
546 do b.as_imm_buf |bp, blen| {
547 if (alen != blen) { false }
550 libc::memcmp(ap as *libc::c_void,
552 (alen - 1) as libc::size_t) == 0
559 /// Bytewise string equality
561 #[lang="uniq_str_eq"]
563 pub fn eq(a: &~str, b: &~str) -> bool {
569 pub fn eq(a: &~str, b: &~str) -> bool {
577 // Utility used by various searching functions
578 fn match_at<'a,'b>(haystack: &'a str, needle: &'b str, at: uint) -> bool {
580 for needle.bytes_iter().advance |c| { if haystack[i] != c { return false; } i += 1u; }
588 /// Determines if a vector of bytes contains valid UTF-8
589 pub fn is_utf8(v: &[u8]) -> bool {
596 let w = utf8_char_width(v[i]);
597 if w == 0u { return false; }
600 if nexti > total { return false; }
602 if v[i + 1] & 192u8 != TAG_CONT_U8 { return false; }
604 if v[i + 2] & 192u8 != TAG_CONT_U8 { return false; }
605 if w > 3 && (v[i + 3] & 192u8 != TAG_CONT_U8) { return false; }
614 /// Determines if a vector of `u16` contains valid UTF-16
615 pub fn is_utf16(v: &[u16]) -> bool {
621 if u <= 0xD7FF_u16 || u >= 0xE000_u16 {
625 if i+1u < len { return false; }
627 if u < 0xD7FF_u16 || u > 0xDBFF_u16 { return false; }
628 if u2 < 0xDC00_u16 || u2 > 0xDFFF_u16 { return false; }
635 /// Iterates over the utf-16 characters in the specified slice, yielding each
636 /// decoded unicode character to the function provided.
640 /// * Fails on invalid utf-16 data
641 pub fn utf16_chars(v: &[u16], f: &fn(char)) {
644 while (i < len && v[i] != 0u16) {
647 if u <= 0xD7FF_u16 || u >= 0xE000_u16 {
653 assert!(u >= 0xD800_u16 && u <= 0xDBFF_u16);
654 assert!(u2 >= 0xDC00_u16 && u2 <= 0xDFFF_u16);
655 let mut c = (u - 0xD800_u16) as char;
657 c |= (u2 - 0xDC00_u16) as char;
658 c |= 0x1_0000_u32 as char;
666 * Allocates a new string from the utf-16 slice provided
668 pub fn from_utf16(v: &[u16]) -> ~str {
670 buf.reserve(v.len());
671 utf16_chars(v, |ch| buf.push_char(ch));
676 * Allocates a new string with the specified capacity. The string returned is
677 * the empty string, but has capacity for much more.
680 pub fn with_capacity(capacity: uint) -> ~str {
682 buf.reserve(capacity);
687 * As char_len but for a slice of a string
691 * * s - A valid string
692 * * start - The position inside `s` where to start counting in bytes
693 * * end - The position where to stop counting
697 * The number of Unicode characters in `s` between the given indices.
699 pub fn count_chars(s: &str, start: uint, end: uint) -> uint {
700 assert!(s.is_char_boundary(start));
701 assert!(s.is_char_boundary(end));
705 let next = s.char_range_at(i).next;
712 /// Counts the number of bytes taken by the first `n` chars in `s`
713 /// starting from `start`.
714 pub fn count_bytes<'b>(s: &'b str, start: uint, n: uint) -> uint {
715 assert!(s.is_char_boundary(start));
721 let next = s.char_range_at(end).next;
728 // https://tools.ietf.org/html/rfc3629
729 static UTF8_CHAR_WIDTH: [u8, ..256] = [
730 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
731 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x1F
732 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
733 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x3F
734 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
735 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x5F
736 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
737 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x7F
738 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
739 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x9F
740 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
741 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xBF
742 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
743 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xDF
744 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, // 0xEF
745 4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // 0xFF
748 /// Given a first byte, determine how many bytes are in this UTF-8 character
749 pub fn utf8_char_width(b: u8) -> uint {
750 return UTF8_CHAR_WIDTH[b] as uint;
753 #[allow(missing_doc)]
754 pub struct CharRange {
759 // UTF-8 tags and ranges
760 static TAG_CONT_U8: u8 = 128u8;
761 static TAG_CONT: uint = 128u;
762 static MAX_ONE_B: uint = 128u;
763 static TAG_TWO_B: uint = 192u;
764 static MAX_TWO_B: uint = 2048u;
765 static TAG_THREE_B: uint = 224u;
766 static MAX_THREE_B: uint = 65536u;
767 static TAG_FOUR_B: uint = 240u;
769 /// Unsafe operations
777 use vec::MutableVector;
779 /// Create a Rust string from a null-terminated *u8 buffer
780 pub unsafe fn from_buf(buf: *u8) -> ~str {
785 curr = ptr::offset(buf, i);
787 return from_buf_len(buf, i);
790 /// Create a Rust string from a *u8 buffer of the given length
791 pub unsafe fn from_buf_len(buf: *u8, len: uint) -> ~str {
792 let mut v: ~[u8] = vec::with_capacity(len + 1);
793 v.as_mut_buf(|vbuf, _len| {
794 ptr::copy_memory(vbuf, buf as *u8, len)
796 vec::raw::set_len(&mut v, len);
800 return ::cast::transmute(v);
803 /// Create a Rust string from a null-terminated C string
804 pub unsafe fn from_c_str(c_str: *libc::c_char) -> ~str {
805 from_buf(::cast::transmute(c_str))
808 /// Create a Rust string from a `*c_char` buffer of the given length
809 pub unsafe fn from_c_str_len(c_str: *libc::c_char, len: uint) -> ~str {
810 from_buf_len(::cast::transmute(c_str), len)
813 /// Converts a vector of bytes to a new owned string.
814 pub unsafe fn from_bytes(v: &[u8]) -> ~str {
815 do v.as_imm_buf |buf, len| {
816 from_buf_len(buf, len)
820 /// Converts an owned vector of bytes to a new owned string. This assumes
821 /// that the utf-8-ness of the vector has already been validated
822 pub unsafe fn from_bytes_owned(mut v: ~[u8]) -> ~str {
827 /// Converts a vector of bytes to a string.
828 /// The byte slice needs to contain valid utf8 and needs to be one byte longer than
829 /// the string, if possible ending in a 0 byte.
830 pub unsafe fn from_bytes_with_null<'a>(v: &'a [u8]) -> &'a str {
834 /// Converts a byte to a string.
835 pub unsafe fn from_byte(u: u8) -> ~str { raw::from_bytes([u]) }
837 /// Form a slice from a C string. Unsafe because the caller must ensure the
838 /// C string has the static lifetime, or else the return value may be
839 /// invalidated later.
840 pub unsafe fn c_str_to_static_slice(s: *libc::c_char) -> &'static str {
846 curr = ptr::offset(s, len);
848 let v = (s, len + 1);
849 assert!(is_utf8(::cast::transmute(v)));
854 * Takes a bytewise (not UTF-8) slice from a string.
856 * Returns the substring from [`begin`..`end`).
860 * If begin is greater than end.
861 * If end is greater than the length of the string.
864 pub unsafe fn slice_bytes(s: &str, begin: uint, end: uint) -> &str {
865 do s.as_imm_buf |sbuf, n| {
866 assert!((begin <= end));
869 let tuple = (ptr::offset(sbuf, begin), end - begin + 1);
870 ::cast::transmute(tuple)
874 /// Appends a byte to a string. (Not UTF-8 safe).
875 pub unsafe fn push_byte(s: &mut ~str, b: u8) {
876 let new_len = s.len() + 1;
877 s.reserve_at_least(new_len);
878 do s.as_mut_buf |buf, len| {
879 *ptr::mut_offset(buf, len) = b;
881 set_len(&mut *s, new_len);
884 /// Appends a vector of bytes to a string. (Not UTF-8 safe).
885 unsafe fn push_bytes(s: &mut ~str, bytes: &[u8]) {
886 let new_len = s.len() + bytes.len();
887 s.reserve_at_least(new_len);
888 for bytes.iter().advance |byte| { push_byte(&mut *s, *byte); }
891 /// Removes the last byte from a string and returns it. (Not UTF-8 safe).
892 pub unsafe fn pop_byte(s: &mut ~str) -> u8 {
896 set_len(s, len - 1u);
900 /// Removes the first byte from a string and returns it. (Not UTF-8 safe).
901 pub unsafe fn shift_byte(s: &mut ~str) -> u8 {
905 *s = s.slice(1, len).to_owned();
909 /// Sets the length of the string and adds the null terminator
911 pub unsafe fn set_len(v: &mut ~str, new_len: uint) {
912 let v: **mut vec::UnboxedVecRepr = cast::transmute(v);
913 let repr: *mut vec::UnboxedVecRepr = *v;
914 (*repr).fill = new_len + 1u;
915 let null = ptr::mut_offset(cast::transmute(&((*repr).data)),
921 fn test_from_buf_len() {
923 let a = ~[65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8];
924 let b = vec::raw::to_ptr(a);
925 let c = from_buf_len(b, 3u);
926 assert_eq!(c, ~"AAA");
935 use cmp::{TotalOrd, Ordering, Less, Equal, Greater, Eq, Ord, Equiv, TotalEq};
936 use super::{Str, eq_slice};
938 impl<'self> Add<&'self str,~str> for &'self str {
940 fn add(&self, rhs: & &'self str) -> ~str {
941 let mut ret = self.to_owned();
947 impl<'self> TotalOrd for &'self str {
949 fn cmp(&self, other: & &'self str) -> Ordering {
950 for self.bytes_iter().zip(other.bytes_iter()).advance |(s_b, o_b)| {
951 match s_b.cmp(&o_b) {
952 Greater => return Greater,
958 self.len().cmp(&other.len())
962 impl TotalOrd for ~str {
964 fn cmp(&self, other: &~str) -> Ordering { self.as_slice().cmp(&other.as_slice()) }
967 impl TotalOrd for @str {
969 fn cmp(&self, other: &@str) -> Ordering { self.as_slice().cmp(&other.as_slice()) }
972 impl<'self> Eq for &'self str {
974 fn eq(&self, other: & &'self str) -> bool {
975 eq_slice((*self), (*other))
978 fn ne(&self, other: & &'self str) -> bool { !(*self).eq(other) }
983 fn eq(&self, other: &~str) -> bool {
984 eq_slice((*self), (*other))
987 fn ne(&self, other: &~str) -> bool { !(*self).eq(other) }
992 fn eq(&self, other: &@str) -> bool {
993 eq_slice((*self), (*other))
996 fn ne(&self, other: &@str) -> bool { !(*self).eq(other) }
999 impl<'self> TotalEq for &'self str {
1001 fn equals(&self, other: & &'self str) -> bool {
1002 eq_slice((*self), (*other))
1006 impl TotalEq for ~str {
1008 fn equals(&self, other: &~str) -> bool {
1009 eq_slice((*self), (*other))
1013 impl TotalEq for @str {
1015 fn equals(&self, other: &@str) -> bool {
1016 eq_slice((*self), (*other))
1020 impl<'self> Ord for &'self str {
1022 fn lt(&self, other: & &'self str) -> bool { self.cmp(other) == Less }
1024 fn le(&self, other: & &'self str) -> bool { self.cmp(other) != Greater }
1026 fn ge(&self, other: & &'self str) -> bool { self.cmp(other) != Less }
1028 fn gt(&self, other: & &'self str) -> bool { self.cmp(other) == Greater }
1033 fn lt(&self, other: &~str) -> bool { self.cmp(other) == Less }
1035 fn le(&self, other: &~str) -> bool { self.cmp(other) != Greater }
1037 fn ge(&self, other: &~str) -> bool { self.cmp(other) != Less }
1039 fn gt(&self, other: &~str) -> bool { self.cmp(other) == Greater }
1044 fn lt(&self, other: &@str) -> bool { self.cmp(other) == Less }
1046 fn le(&self, other: &@str) -> bool { self.cmp(other) != Greater }
1048 fn ge(&self, other: &@str) -> bool { self.cmp(other) != Less }
1050 fn gt(&self, other: &@str) -> bool { self.cmp(other) == Greater }
1053 impl<'self, S: Str> Equiv<S> for &'self str {
1055 fn equiv(&self, other: &S) -> bool { eq_slice(*self, other.as_slice()) }
1058 impl<'self, S: Str> Equiv<S> for @str {
1060 fn equiv(&self, other: &S) -> bool { eq_slice(*self, other.as_slice()) }
1063 impl<'self, S: Str> Equiv<S> for ~str {
1065 fn equiv(&self, other: &S) -> bool { eq_slice(*self, other.as_slice()) }
1072 /// Any string that can be represented as a slice
1074 /// Work with `self` as a slice.
1075 fn as_slice<'a>(&'a self) -> &'a str;
1078 impl<'self> Str for &'self str {
1080 fn as_slice<'a>(&'a self) -> &'a str { *self }
1082 impl<'self> Str for ~str {
1084 fn as_slice<'a>(&'a self) -> &'a str {
1085 let s: &'a str = *self; s
1088 impl<'self> Str for @str {
1090 fn as_slice<'a>(&'a self) -> &'a str {
1091 let s: &'a str = *self; s
1095 impl<'self> Container for &'self str {
1097 fn len(&self) -> uint {
1098 do self.as_imm_buf |_p, n| { n - 1u }
1101 fn is_empty(&self) -> bool {
1106 impl Container for ~str {
1108 fn len(&self) -> uint { self.as_slice().len() }
1110 fn is_empty(&self) -> bool { self.len() == 0 }
1113 impl Container for @str {
1115 fn len(&self) -> uint { self.as_slice().len() }
1117 fn is_empty(&self) -> bool { self.len() == 0 }
1120 impl Mutable for ~str {
1121 /// Remove all content, make the string empty
1123 fn clear(&mut self) {
1125 raw::set_len(self, 0)
1131 #[allow(missing_doc)]
1132 pub trait StrSlice<'self> {
1133 fn contains<'a>(&self, needle: &'a str) -> bool;
1134 fn contains_char(&self, needle: char) -> bool;
1135 fn iter(&self) -> StrCharIterator<'self>;
1136 fn rev_iter(&self) -> StrCharRevIterator<'self>;
1137 fn bytes_iter(&self) -> StrBytesIterator<'self>;
1138 fn bytes_rev_iter(&self) -> StrBytesRevIterator<'self>;
1139 fn split_iter<Sep: CharEq>(&self, sep: Sep) -> StrCharSplitIterator<'self, Sep>;
1140 fn splitn_iter<Sep: CharEq>(&self, sep: Sep, count: uint) -> StrCharSplitIterator<'self, Sep>;
1141 fn split_options_iter<Sep: CharEq>(&self, sep: Sep, count: uint, allow_trailing_empty: bool)
1142 -> StrCharSplitIterator<'self, Sep>;
1143 fn matches_index_iter(&self, sep: &'self str) -> StrMatchesIndexIterator<'self>;
1144 fn split_str_iter(&self, &'self str) -> StrStrSplitIterator<'self>;
1145 fn line_iter(&self) -> StrCharSplitIterator<'self, char>;
1146 fn any_line_iter(&self) -> AnyLineIterator<'self>;
1147 fn word_iter(&self) -> WordIterator<'self>;
1148 fn ends_with(&self, needle: &str) -> bool;
1149 fn is_whitespace(&self) -> bool;
1150 fn is_alphanumeric(&self) -> bool;
1151 fn char_len(&self) -> uint;
1153 fn slice(&self, begin: uint, end: uint) -> &'self str;
1154 fn slice_from(&self, begin: uint) -> &'self str;
1155 fn slice_to(&self, end: uint) -> &'self str;
1157 fn slice_chars(&self, begin: uint, end: uint) -> &'self str;
1159 fn starts_with(&self, needle: &str) -> bool;
1160 fn escape_default(&self) -> ~str;
1161 fn escape_unicode(&self) -> ~str;
1162 fn trim(&self) -> &'self str;
1163 fn trim_left(&self) -> &'self str;
1164 fn trim_right(&self) -> &'self str;
1165 fn trim_chars<C: CharEq>(&self, to_trim: &C) -> &'self str;
1166 fn trim_left_chars<C: CharEq>(&self, to_trim: &C) -> &'self str;
1167 fn trim_right_chars<C: CharEq>(&self, to_trim: &C) -> &'self str;
1168 fn replace(&self, from: &str, to: &str) -> ~str;
1169 fn to_owned(&self) -> ~str;
1170 fn to_managed(&self) -> @str;
1171 fn to_utf16(&self) -> ~[u16];
1172 fn is_char_boundary(&self, index: uint) -> bool;
1173 fn char_range_at(&self, start: uint) -> CharRange;
1174 fn char_at(&self, i: uint) -> char;
1175 fn char_range_at_reverse(&self, start: uint) -> CharRange;
1176 fn char_at_reverse(&self, i: uint) -> char;
1177 fn as_bytes(&self) -> &'self [u8];
1179 fn find<C: CharEq>(&self, search: C) -> Option<uint>;
1180 fn rfind<C: CharEq>(&self, search: C) -> Option<uint>;
1181 fn find_str(&self, &str) -> Option<uint>;
1183 fn repeat(&self, nn: uint) -> ~str;
1185 fn slice_shift_char(&self) -> (char, &'self str);
1187 fn map_chars(&self, ff: &fn(char) -> char) -> ~str;
1189 fn lev_distance(&self, t: &str) -> uint;
1191 fn subslice_offset(&self, inner: &str) -> uint;
1193 fn as_imm_buf<T>(&self, f: &fn(*u8, uint) -> T) -> T;
1194 fn as_c_str<T>(&self, f: &fn(*libc::c_char) -> T) -> T;
1197 /// Extension methods for strings
1198 impl<'self> StrSlice<'self> for &'self str {
1200 * Returns true if one string contains another
1204 * * needle - The string to look for
1207 fn contains<'a>(&self, needle: &'a str) -> bool {
1208 self.find_str(needle).is_some()
1211 * Returns true if a string contains a char.
1215 * * needle - The char to look for
1218 fn contains_char(&self, needle: char) -> bool {
1219 self.find(needle).is_some()
1221 /// An iterator over the characters of `self`. Note, this iterates
1222 /// over unicode code-points, not unicode graphemes.
1227 /// let v: ~[char] = "abc åäö".iter().collect();
1228 /// assert_eq!(v, ~['a', 'b', 'c', ' ', 'å', 'ä', 'ö']);
1231 fn iter(&self) -> StrCharIterator<'self> {
1237 /// An iterator over the characters of `self`, in reverse order.
1239 fn rev_iter(&self) -> StrCharRevIterator<'self> {
1240 StrCharRevIterator {
1246 /// An iterator over the bytes of `self`
1248 fn bytes_iter(&self) -> StrBytesIterator<'self> {
1249 StrBytesIterator { it: self.as_bytes().iter() }
1251 /// An iterator over the bytes of `self`, in reverse order
1253 fn bytes_rev_iter(&self) -> StrBytesRevIterator<'self> {
1254 StrBytesRevIterator { it: self.as_bytes().rev_iter() }
1257 /// An iterator over substrings of `self`, separated by characters
1258 /// matched by `sep`.
1263 /// let v: ~[&str] = "Mary had a little lamb".split_iter(' ').collect();
1264 /// assert_eq!(v, ~["Mary", "had", "a", "little", "lamb"]);
1266 /// let v: ~[&str] = "abc1def2ghi".split_iter(|c: char| c.is_digit()).collect();
1267 /// assert_eq!(v, ~["abc", "def", "ghi"]);
1270 fn split_iter<Sep: CharEq>(&self, sep: Sep) -> StrCharSplitIterator<'self, Sep> {
1271 self.split_options_iter(sep, self.len(), true)
1274 /// An iterator over substrings of `self`, separated by characters
1275 /// matched by `sep`, restricted to splitting at most `count`
1278 fn splitn_iter<Sep: CharEq>(&self, sep: Sep, count: uint) -> StrCharSplitIterator<'self, Sep> {
1279 self.split_options_iter(sep, count, true)
1282 /// An iterator over substrings of `self`, separated by characters
1283 /// matched by `sep`, splitting at most `count` times, and
1284 /// possibly not including the trailing empty substring, if it
1287 fn split_options_iter<Sep: CharEq>(&self, sep: Sep, count: uint, allow_trailing_empty: bool)
1288 -> StrCharSplitIterator<'self, Sep> {
1289 let only_ascii = sep.only_ascii();
1290 StrCharSplitIterator {
1295 allow_trailing_empty: allow_trailing_empty,
1297 only_ascii: only_ascii
1300 /// An iterator over the start and end indices of each match of
1301 /// `sep` within `self`.
1303 fn matches_index_iter(&self, sep: &'self str) -> StrMatchesIndexIterator<'self> {
1304 assert!(!sep.is_empty())
1305 StrMatchesIndexIterator {
1312 * An iterator over the substrings of `self` separated by `sep`.
1317 * let v: ~[&str] = "abcXXXabcYYYabc".split_str_iter("abc").collect()
1318 * assert_eq!(v, ["", "XXX", "YYY", ""]);
1322 fn split_str_iter(&self, sep: &'self str) -> StrStrSplitIterator<'self> {
1323 StrStrSplitIterator {
1324 it: self.matches_index_iter(sep),
1330 /// An iterator over the lines of a string (subsequences separated
1333 fn line_iter(&self) -> StrCharSplitIterator<'self, char> {
1334 self.split_options_iter('\n', self.len(), false)
1337 /// An iterator over the lines of a string, separated by either
1338 /// `\n` or (`\r\n`).
1339 fn any_line_iter(&self) -> AnyLineIterator<'self> {
1340 do self.line_iter().transform |line| {
1342 if l > 0 && line[l - 1] == '\r' as u8 { line.slice(0, l - 1) }
1347 /// An iterator over the words of a string (subsequences separated
1348 /// by any sequence of whitespace).
1350 fn word_iter(&self) -> WordIterator<'self> {
1351 self.split_iter(char::is_whitespace).filter(|s| !s.is_empty())
1355 * Returns true if the string contains only whitespace
1357 * Whitespace characters are determined by `char::is_whitespace`
1360 fn is_whitespace(&self) -> bool { self.iter().all(char::is_whitespace) }
1362 * Returns true if the string contains only alphanumerics
1364 * Alphanumeric characters are determined by `char::is_alphanumeric`
1367 fn is_alphanumeric(&self) -> bool { self.iter().all(char::is_alphanumeric) }
1368 /// Returns the number of characters that a string holds
1370 fn char_len(&self) -> uint { self.iter().len_() }
1373 * Returns a slice of the given string from the byte range
1376 * Fails when `begin` and `end` do not point to valid characters or
1377 * beyond the last character of the string
1380 fn slice(&self, begin: uint, end: uint) -> &'self str {
1381 assert!(self.is_char_boundary(begin));
1382 assert!(self.is_char_boundary(end));
1383 unsafe { raw::slice_bytes(*self, begin, end) }
1385 /// Returns a slice of the string from `begin` to its end.
1387 /// Fails when `begin` does not point to a valid character, or is
1390 fn slice_from(&self, begin: uint) -> &'self str {
1391 self.slice(begin, self.len())
1393 /// Returns a slice of the string from the beginning to byte
1396 /// Fails when `end` does not point to a valid character, or is
1399 fn slice_to(&self, end: uint) -> &'self str {
1403 /// Returns a slice of the string from the char range
1404 /// [`begin`..`end`).
1406 /// Fails if `begin` > `end` or the either `begin` or `end` are
1407 /// beyond the last character of the string.
1408 fn slice_chars(&self, begin: uint, end: uint) -> &'self str {
1409 assert!(begin <= end);
1410 // not sure how to use the iterators for this nicely.
1411 let mut position = 0;
1414 while count < begin && position < l {
1415 position = self.char_range_at(position).next;
1418 if count < begin { fail!("Attempted to begin slice_chars beyond end of string") }
1419 let start_byte = position;
1420 while count < end && position < l {
1421 position = self.char_range_at(position).next;
1424 if count < end { fail!("Attempted to end slice_chars beyond end of string") }
1426 self.slice(start_byte, position)
1429 /// Returns true if `needle` is a prefix of the string.
1430 fn starts_with<'a>(&self, needle: &'a str) -> bool {
1431 let (self_len, needle_len) = (self.len(), needle.len());
1432 if needle_len == 0u { true }
1433 else if needle_len > self_len { false }
1434 else { match_at(*self, needle, 0u) }
1436 /// Returns true if `needle` is a suffix of the string.
1437 fn ends_with(&self, needle: &str) -> bool {
1438 let (self_len, needle_len) = (self.len(), needle.len());
1439 if needle_len == 0u { true }
1440 else if needle_len > self_len { false }
1441 else { match_at(*self, needle, self_len - needle_len) }
1444 /// Escape each char in `s` with char::escape_default.
1445 fn escape_default(&self) -> ~str {
1446 let mut out: ~str = ~"";
1447 out.reserve_at_least(self.len());
1448 for self.iter().advance |c| {
1449 do c.escape_default |c| {
1456 /// Escape each char in `s` with char::escape_unicode.
1457 fn escape_unicode(&self) -> ~str {
1458 let mut out: ~str = ~"";
1459 out.reserve_at_least(self.len());
1460 for self.iter().advance |c| {
1461 do c.escape_unicode |c| {
1468 /// Returns a string with leading and trailing whitespace removed
1470 fn trim(&self) -> &'self str {
1471 self.trim_left().trim_right()
1473 /// Returns a string with leading whitespace removed
1475 fn trim_left(&self) -> &'self str {
1476 self.trim_left_chars(&char::is_whitespace)
1478 /// Returns a string with trailing whitespace removed
1480 fn trim_right(&self) -> &'self str {
1481 self.trim_right_chars(&char::is_whitespace)
1485 * Returns a string with characters that match `to_trim` removed.
1489 * * to_trim - a character matcher
1494 * assert_eq!("11foo1bar11".trim_chars(&'1'), "foo1bar")
1495 * assert_eq!("12foo1bar12".trim_chars(& &['1', '2']), "foo1bar")
1496 * assert_eq!("123foo1bar123".trim_chars(&|c: char| c.is_digit()), "foo1bar")
1500 fn trim_chars<C: CharEq>(&self, to_trim: &C) -> &'self str {
1501 self.trim_left_chars(to_trim).trim_right_chars(to_trim)
1504 * Returns a string with leading `chars_to_trim` removed.
1508 * * to_trim - a character matcher
1513 * assert_eq!("11foo1bar11".trim_left_chars(&'1'), "foo1bar11")
1514 * assert_eq!("12foo1bar12".trim_left_chars(& &['1', '2']), "foo1bar12")
1515 * assert_eq!("123foo1bar123".trim_left_chars(&|c: char| c.is_digit()), "foo1bar123")
1519 fn trim_left_chars<C: CharEq>(&self, to_trim: &C) -> &'self str {
1520 match self.find(|c: char| !to_trim.matches(c)) {
1522 Some(first) => unsafe { raw::slice_bytes(*self, first, self.len()) }
1526 * Returns a string with trailing `chars_to_trim` removed.
1530 * * to_trim - a character matcher
1535 * assert_eq!("11foo1bar11".trim_right_chars(&'1'), "11foo1bar")
1536 * assert_eq!("12foo1bar12".trim_right_chars(& &['1', '2']), "12foo1bar")
1537 * assert_eq!("123foo1bar123".trim_right_chars(&|c: char| c.is_digit()), "123foo1bar")
1541 fn trim_right_chars<C: CharEq>(&self, to_trim: &C) -> &'self str {
1542 match self.rfind(|c: char| !to_trim.matches(c)) {
1545 let next = self.char_range_at(last).next;
1546 unsafe { raw::slice_bytes(*self, 0u, next) }
1552 * Replace all occurrences of one string with another
1556 * * from - The string to replace
1557 * * to - The replacement string
1561 * The original string with all occurances of `from` replaced with `to`
1563 pub fn replace(&self, from: &str, to: &str) -> ~str {
1564 let mut result = ~"";
1565 let mut last_end = 0;
1566 for self.matches_index_iter(from).advance |(start, end)| {
1567 result.push_str(unsafe{raw::slice_bytes(*self, last_end, start)});
1568 result.push_str(to);
1571 result.push_str(unsafe{raw::slice_bytes(*self, last_end, self.len())});
1575 /// Copy a slice into a new unique str
1577 fn to_owned(&self) -> ~str {
1578 do self.as_imm_buf |src, len| {
1581 let mut v = vec::with_capacity(len);
1583 do v.as_mut_buf |dst, _| {
1584 ptr::copy_memory(dst, src, len - 1);
1586 vec::raw::set_len(&mut v, len - 1);
1588 ::cast::transmute(v)
1594 fn to_managed(&self) -> @str {
1595 let v = at_vec::from_fn(self.len() + 1, |i| {
1596 if i == self.len() { 0 } else { self[i] }
1598 unsafe { ::cast::transmute(v) }
1601 /// Converts to a vector of `u16` encoded as UTF-16.
1602 fn to_utf16(&self) -> ~[u16] {
1604 for self.iter().advance |ch| {
1605 // Arithmetic with u32 literals is easier on the eyes than chars.
1606 let mut ch = ch as u32;
1608 if (ch & 0xFFFF_u32) == ch {
1609 // The BMP falls through (assuming non-surrogate, as it
1611 assert!(ch <= 0xD7FF_u32 || ch >= 0xE000_u32);
1614 // Supplementary planes break into surrogates.
1615 assert!(ch >= 0x1_0000_u32 && ch <= 0x10_FFFF_u32);
1617 let w1 = 0xD800_u16 | ((ch >> 10) as u16);
1618 let w2 = 0xDC00_u16 | ((ch as u16) & 0x3FF_u16);
1619 u.push_all([w1, w2])
1626 * Returns false if the index points into the middle of a multi-byte
1627 * character sequence.
1629 fn is_char_boundary(&self, index: uint) -> bool {
1630 if index == self.len() { return true; }
1631 let b = self[index];
1632 return b < 128u8 || b >= 192u8;
1636 * Pluck a character out of a string and return the index of the next
1639 * This function can be used to iterate over the unicode characters of a
1645 * let s = "中华Việt Nam";
1647 * while i < s.len() {
1648 * let CharRange {ch, next} = s.char_range_at(i);
1649 * printfln!("%u: %c", i, ch);
1672 * * i - The byte offset of the char to extract
1676 * A record {ch: char, next: uint} containing the char value and the byte
1677 * index of the next unicode character.
1681 * If `i` is greater than or equal to the length of the string.
1682 * If `i` is not the index of the beginning of a valid UTF-8 character.
1685 fn char_range_at(&self, i: uint) -> CharRange {
1686 if (self[i] < 128u8) {
1687 return CharRange {ch: self[i] as char, next: i + 1 };
1690 // Multibyte case is a fn to allow char_range_at to inline cleanly
1691 fn multibyte_char_range_at(s: &str, i: uint) -> CharRange {
1692 let mut val = s[i] as uint;
1693 let w = UTF8_CHAR_WIDTH[val] as uint;
1696 // First byte is special, only want bottom 5 bits for width 2, 4 bits
1697 // for width 3, and 3 bits for width 4
1699 val = (val << 6) | (s[i + 1] & 63u8) as uint;
1700 if w > 2 { val = (val << 6) | (s[i + 2] & 63u8) as uint; }
1701 if w > 3 { val = (val << 6) | (s[i + 3] & 63u8) as uint; }
1703 return CharRange {ch: val as char, next: i + w};
1706 return multibyte_char_range_at(*self, i);
1709 /// Plucks the character starting at the `i`th byte of a string
1711 fn char_at(&self, i: uint) -> char { self.char_range_at(i).ch }
1714 * Given a byte position and a str, return the previous char and its position.
1716 * This function can be used to iterate over a unicode string in reverse.
1718 * Returns 0 for next index if called on start index 0.
1720 fn char_range_at_reverse(&self, start: uint) -> CharRange {
1721 let mut prev = start;
1723 // while there is a previous byte == 10......
1724 while prev > 0u && self[prev - 1u] & 192u8 == TAG_CONT_U8 {
1728 // now refer to the initial byte of previous char
1736 let ch = self.char_at(prev);
1737 return CharRange {ch:ch, next:prev};
1740 /// Plucks the character ending at the `i`th byte of a string
1742 fn char_at_reverse(&self, i: uint) -> char {
1743 self.char_range_at_reverse(i).ch
1747 * Work with the byte buffer of a string as a byte slice.
1749 * The byte slice does not include the null terminator.
1751 fn as_bytes(&self) -> &'self [u8] {
1753 let (ptr, len): (*u8, uint) = ::cast::transmute(*self);
1754 let outgoing_tuple: (*u8, uint) = (ptr, len - 1);
1755 ::cast::transmute(outgoing_tuple)
1760 * Returns the byte index of the first character of `self` that matches `search`
1764 * `Some` containing the byte index of the last matching character
1765 * or `None` if there is no match
1767 fn find<C: CharEq>(&self, search: C) -> Option<uint> {
1768 if search.only_ascii() {
1769 for self.bytes_iter().enumerate().advance |(i, b)| {
1770 if search.matches(b as char) { return Some(i) }
1774 for self.iter().advance |c| {
1775 if search.matches(c) { return Some(index); }
1776 index += c.len_utf8_bytes();
1783 * Returns the byte index of the last character of `self` that matches `search`
1787 * `Some` containing the byte index of the last matching character
1788 * or `None` if there is no match
1790 fn rfind<C: CharEq>(&self, search: C) -> Option<uint> {
1791 let mut index = self.len();
1792 if search.only_ascii() {
1793 for self.bytes_rev_iter().advance |b| {
1795 if search.matches(b as char) { return Some(index); }
1798 for self.rev_iter().advance |c| {
1799 index -= c.len_utf8_bytes();
1800 if search.matches(c) { return Some(index); }
1808 * Returns the byte index of the first matching substring
1812 * * `needle` - The string to search for
1816 * `Some` containing the byte index of the first matching substring
1817 * or `None` if there is no match
1819 fn find_str(&self, needle: &str) -> Option<uint> {
1820 if needle.is_empty() {
1823 self.matches_index_iter(needle)
1825 .map_consume(|(start, _end)| start)
1829 /// Given a string, make a new string with repeated copies of it.
1830 fn repeat(&self, nn: uint) -> ~str {
1831 do self.as_imm_buf |buf, len| {
1832 // ignore the NULL terminator
1834 let mut ret = with_capacity(nn * len);
1837 do ret.as_mut_buf |rbuf, _len| {
1838 let mut rbuf = rbuf;
1841 ptr::copy_memory(rbuf, buf, len);
1842 rbuf = rbuf.offset(len);
1845 raw::set_len(&mut ret, nn * len);
1852 * Retrieves the first character from a string slice and returns
1853 * it. This does not allocate a new string; instead, it returns a
1854 * slice that point one character beyond the character that was
1859 * If the string does not contain any characters
1862 fn slice_shift_char(&self) -> (char, &'self str) {
1863 let CharRange {ch, next} = self.char_range_at(0u);
1864 let next_s = unsafe { raw::slice_bytes(*self, next, self.len()) };
1865 return (ch, next_s);
1869 /// Apply a function to each character.
1870 fn map_chars(&self, ff: &fn(char) -> char) -> ~str {
1871 let mut result = with_capacity(self.len());
1872 for self.iter().advance |cc| {
1873 result.push_char(ff(cc));
1878 /// Levenshtein Distance between two strings.
1879 fn lev_distance(&self, t: &str) -> uint {
1880 let slen = self.len();
1883 if slen == 0 { return tlen; }
1884 if tlen == 0 { return slen; }
1886 let mut dcol = vec::from_fn(tlen + 1, |x| x);
1888 for self.iter().enumerate().advance |(i, sc)| {
1890 let mut current = i;
1891 dcol[0] = current + 1;
1893 for t.iter().enumerate().advance |(j, tc)| {
1895 let next = dcol[j + 1];
1898 dcol[j + 1] = current;
1900 dcol[j + 1] = ::cmp::min(current, next);
1901 dcol[j + 1] = ::cmp::min(dcol[j + 1], dcol[j]) + 1;
1913 * Returns the byte offset of an inner slice relative to an enclosing outer slice.
1915 * Fails if `inner` is not a direct slice contained within self.
1920 * let string = "a\nb\nc";
1921 * let mut lines = ~[];
1922 * for string.line_iter().advance |line| { lines.push(line) }
1924 * assert!(string.subslice_offset(lines[0]) == 0); // &"a"
1925 * assert!(string.subslice_offset(lines[1]) == 2); // &"b"
1926 * assert!(string.subslice_offset(lines[2]) == 4); // &"c"
1930 fn subslice_offset(&self, inner: &str) -> uint {
1931 do self.as_imm_buf |a, a_len| {
1932 do inner.as_imm_buf |b, b_len| {
1938 a_start = cast::transmute(a); a_end = a_len + cast::transmute(a);
1939 b_start = cast::transmute(b); b_end = b_len + cast::transmute(b);
1941 assert!(a_start <= b_start);
1942 assert!(b_end <= a_end);
1949 * Work with the byte buffer and length of a slice.
1951 * The given length is one byte longer than the 'official' indexable
1952 * length of the string. This is to permit probing the byte past the
1953 * indexable area for a null byte, as is the case in slices pointing
1954 * to full strings, or suffixes of them.
1957 fn as_imm_buf<T>(&self, f: &fn(*u8, uint) -> T) -> T {
1958 let v: &[u8] = unsafe { cast::transmute(*self) };
1963 * Work with the byte buffer of a string as a null-terminated C string.
1965 * Allows for unsafe manipulation of strings, which is useful for foreign
1966 * interop. This is similar to `str::as_buf`, but guarantees null-termination.
1967 * If the given slice is not already null-terminated, this function will
1968 * allocate a temporary, copy the slice, null terminate it, and pass
1974 * let s = "PATH".as_c_str(|path| libc::getenv(path));
1978 fn as_c_str<T>(&self, f: &fn(*libc::c_char) -> T) -> T {
1979 do self.as_imm_buf |buf, len| {
1980 // NB: len includes the trailing null.
1982 if unsafe { *(ptr::offset(buf, len - 1)) != 0 } {
1983 self.to_owned().as_c_str(|s| f(s))
1985 f(buf as *libc::c_char)
1991 #[allow(missing_doc)]
1992 pub trait NullTerminatedStr {
1993 fn as_bytes_with_null<'a>(&'a self) -> &'a [u8];
1996 impl NullTerminatedStr for ~str {
1998 * Work with the byte buffer of a string as a byte slice.
2000 * The byte slice does include the null terminator.
2003 fn as_bytes_with_null<'a>(&'a self) -> &'a [u8] {
2004 let ptr: &'a ~[u8] = unsafe { ::cast::transmute(self) };
2005 let slice: &'a [u8] = *ptr;
2009 impl NullTerminatedStr for @str {
2011 * Work with the byte buffer of a string as a byte slice.
2013 * The byte slice does include the null terminator.
2016 fn as_bytes_with_null<'a>(&'a self) -> &'a [u8] {
2017 let ptr: &'a @[u8] = unsafe { ::cast::transmute(self) };
2018 let slice: &'a [u8] = *ptr;
2023 #[allow(missing_doc)]
2024 pub trait OwnedStr {
2025 fn push_str_no_overallocate(&mut self, rhs: &str);
2026 fn push_str(&mut self, rhs: &str);
2027 fn push_char(&mut self, c: char);
2028 fn pop_char(&mut self) -> char;
2029 fn shift_char(&mut self) -> char;
2030 fn unshift_char(&mut self, ch: char);
2031 fn append(self, rhs: &str) -> ~str;
2032 fn reserve(&mut self, n: uint);
2033 fn reserve_at_least(&mut self, n: uint);
2034 fn capacity(&self) -> uint;
2035 fn to_bytes_with_null(self) -> ~[u8];
2038 * Work with the mutable byte buffer and length of a slice.
2040 * The given length is one byte longer than the 'official' indexable
2041 * length of the string. This is to permit probing the byte past the
2042 * indexable area for a null byte, as is the case in slices pointing
2043 * to full strings, or suffixes of them.
2045 * Make sure any mutations to this buffer keep this string valid UTF8.
2047 fn as_mut_buf<T>(&mut self, f: &fn(*mut u8, uint) -> T) -> T;
2050 impl OwnedStr for ~str {
2051 /// Appends a string slice to the back of a string, without overallocating
2053 fn push_str_no_overallocate(&mut self, rhs: &str) {
2055 let llen = self.len();
2056 let rlen = rhs.len();
2057 self.reserve(llen + rlen);
2058 do self.as_imm_buf |lbuf, _llen| {
2059 do rhs.as_imm_buf |rbuf, _rlen| {
2060 let dst = ptr::offset(lbuf, llen);
2061 let dst = ::cast::transmute_mut_unsafe(dst);
2062 ptr::copy_memory(dst, rbuf, rlen);
2065 raw::set_len(self, llen + rlen);
2069 /// Appends a string slice to the back of a string
2071 fn push_str(&mut self, rhs: &str) {
2073 let llen = self.len();
2074 let rlen = rhs.len();
2075 self.reserve_at_least(llen + rlen);
2076 do self.as_imm_buf |lbuf, _llen| {
2077 do rhs.as_imm_buf |rbuf, _rlen| {
2078 let dst = ptr::offset(lbuf, llen);
2079 let dst = ::cast::transmute_mut_unsafe(dst);
2080 ptr::copy_memory(dst, rbuf, rlen);
2083 raw::set_len(self, llen + rlen);
2086 /// Appends a character to the back of a string
2088 fn push_char(&mut self, c: char) {
2089 assert!(c as uint <= 0x10ffff); // FIXME: #7609: should be enforced on all `char`
2091 let code = c as uint;
2092 let nb = if code < MAX_ONE_B { 1u }
2093 else if code < MAX_TWO_B { 2u }
2094 else if code < MAX_THREE_B { 3u }
2096 let len = self.len();
2097 let new_len = len + nb;
2098 self.reserve_at_least(new_len);
2100 do self.as_mut_buf |buf, _len| {
2103 *ptr::mut_offset(buf, off) = code as u8;
2106 *ptr::mut_offset(buf, off) = (code >> 6u & 31u | TAG_TWO_B) as u8;
2107 *ptr::mut_offset(buf, off + 1u) = (code & 63u | TAG_CONT) as u8;
2110 *ptr::mut_offset(buf, off) = (code >> 12u & 15u | TAG_THREE_B) as u8;
2111 *ptr::mut_offset(buf, off + 1u) = (code >> 6u & 63u | TAG_CONT) as u8;
2112 *ptr::mut_offset(buf, off + 2u) = (code & 63u | TAG_CONT) as u8;
2115 *ptr::mut_offset(buf, off) = (code >> 18u & 7u | TAG_FOUR_B) as u8;
2116 *ptr::mut_offset(buf, off + 1u) = (code >> 12u & 63u | TAG_CONT) as u8;
2117 *ptr::mut_offset(buf, off + 2u) = (code >> 6u & 63u | TAG_CONT) as u8;
2118 *ptr::mut_offset(buf, off + 3u) = (code & 63u | TAG_CONT) as u8;
2123 raw::set_len(self, new_len);
2127 * Remove the final character from a string and return it
2131 * If the string does not contain any characters
2133 fn pop_char(&mut self) -> char {
2134 let end = self.len();
2136 let CharRange {ch, next} = self.char_range_at_reverse(end);
2137 unsafe { raw::set_len(self, next); }
2142 * Remove the first character from a string and return it
2146 * If the string does not contain any characters
2148 fn shift_char(&mut self) -> char {
2149 let CharRange {ch, next} = self.char_range_at(0u);
2150 *self = self.slice(next, self.len()).to_owned();
2154 /// Prepend a char to a string
2155 fn unshift_char(&mut self, ch: char) {
2156 // This could be more efficient.
2157 let mut new_str = ~"";
2158 new_str.push_char(ch);
2159 new_str.push_str(*self);
2163 /// Concatenate two strings together.
2165 fn append(self, rhs: &str) -> ~str {
2166 let mut new_str = self;
2167 new_str.push_str_no_overallocate(rhs);
2172 * Reserves capacity for exactly `n` bytes in the given string, not including
2173 * the null terminator.
2175 * Assuming single-byte characters, the resulting string will be large
2176 * enough to hold a string of length `n`. To account for the null terminator,
2177 * the underlying buffer will have the size `n` + 1.
2179 * If the capacity for `s` is already equal to or greater than the requested
2180 * capacity, then no action is taken.
2185 * * n - The number of bytes to reserve space for
2188 pub fn reserve(&mut self, n: uint) {
2190 let v: *mut ~[u8] = cast::transmute(self);
2191 (*v).reserve(n + 1);
2196 * Reserves capacity for at least `n` bytes in the given string, not including
2197 * the null terminator.
2199 * Assuming single-byte characters, the resulting string will be large
2200 * enough to hold a string of length `n`. To account for the null terminator,
2201 * the underlying buffer will have the size `n` + 1.
2203 * This function will over-allocate in order to amortize the allocation costs
2204 * in scenarios where the caller may need to repeatedly reserve additional
2207 * If the capacity for `s` is already equal to or greater than the requested
2208 * capacity, then no action is taken.
2213 * * n - The number of bytes to reserve space for
2216 fn reserve_at_least(&mut self, n: uint) {
2217 self.reserve(uint::next_power_of_two(n + 1u) - 1u)
2221 * Returns the number of single-byte characters the string can hold without
2224 fn capacity(&self) -> uint {
2225 let buf: &~[u8] = unsafe { cast::transmute(self) };
2226 let vcap = buf.capacity();
2231 /// Convert to a vector of bytes. This does not allocate a new
2232 /// string, and includes the null terminator.
2234 fn to_bytes_with_null(self) -> ~[u8] {
2235 unsafe { ::cast::transmute(self) }
2239 fn as_mut_buf<T>(&mut self, f: &fn(*mut u8, uint) -> T) -> T {
2240 let v: &mut ~[u8] = unsafe { cast::transmute(self) };
2245 impl Clone for ~str {
2247 fn clone(&self) -> ~str {
2252 impl Clone for @str {
2254 fn clone(&self) -> @str {
2259 /// External iterator for a string's characters. Use with the `std::iterator`
2262 pub struct StrCharIterator<'self> {
2264 priv string: &'self str,
2267 impl<'self> Iterator<char> for StrCharIterator<'self> {
2269 fn next(&mut self) -> Option<char> {
2270 if self.index < self.string.len() {
2271 let CharRange {ch, next} = self.string.char_range_at(self.index);
2279 /// External iterator for a string's characters in reverse order. Use
2280 /// with the `std::iterator` module.
2282 pub struct StrCharRevIterator<'self> {
2284 priv string: &'self str,
2287 impl<'self> Iterator<char> for StrCharRevIterator<'self> {
2289 fn next(&mut self) -> Option<char> {
2291 let CharRange {ch, next} = self.string.char_range_at_reverse(self.index);
2300 /// External iterator for a string's bytes. Use with the `std::iterator`
2303 pub struct StrBytesIterator<'self> {
2304 priv it: vec::VecIterator<'self, u8>
2307 impl<'self> Iterator<u8> for StrBytesIterator<'self> {
2309 fn next(&mut self) -> Option<u8> {
2310 self.it.next().map_consume(|&x| x)
2314 /// External iterator for a string's bytes in reverse order. Use with
2315 /// the `std::iterator` module.
2317 pub struct StrBytesRevIterator<'self> {
2318 priv it: vec::VecRevIterator<'self, u8>
2321 impl<'self> Iterator<u8> for StrBytesRevIterator<'self> {
2323 fn next(&mut self) -> Option<u8> {
2324 self.it.next().map_consume(|&x| x)
2328 // This works because every lifetime is a sub-lifetime of 'static
2329 impl<'self> Zero for &'self str {
2330 fn zero() -> &'self str { "" }
2331 fn is_zero(&self) -> bool { self.is_empty() }
2334 impl Zero for ~str {
2335 fn zero() -> ~str { ~"" }
2336 fn is_zero(&self) -> bool { self.len() == 0 }
2339 impl Zero for @str {
2340 fn zero() -> @str { @"" }
2341 fn is_zero(&self) -> bool { self.len() == 0 }
2346 use iterator::IteratorUtil;
2347 use container::Container;
2355 use vec::{ImmutableVector, CopyableVector};
2356 use cmp::{TotalOrd, Less, Equal, Greater};
2360 assert!((eq(&~"", &~"")));
2361 assert!((eq(&~"foo", &~"foo")));
2362 assert!((!eq(&~"foo", &~"bar")));
2366 fn test_eq_slice() {
2367 assert!((eq_slice("foobar".slice(0, 3), "foo")));
2368 assert!((eq_slice("barfoo".slice(3, 6), "foo")));
2369 assert!((!eq_slice("foo1", "foo2")));
2375 assert!("" <= "foo");
2376 assert!("foo" <= "foo");
2377 assert!("foo" != "bar");
2382 assert_eq!("".len(), 0u);
2383 assert_eq!("hello world".len(), 11u);
2384 assert_eq!("\x63".len(), 1u);
2385 assert_eq!("\xa2".len(), 2u);
2386 assert_eq!("\u03c0".len(), 2u);
2387 assert_eq!("\u2620".len(), 3u);
2388 assert_eq!("\U0001d11e".len(), 4u);
2390 assert_eq!("".char_len(), 0u);
2391 assert_eq!("hello world".char_len(), 11u);
2392 assert_eq!("\x63".char_len(), 1u);
2393 assert_eq!("\xa2".char_len(), 1u);
2394 assert_eq!("\u03c0".char_len(), 1u);
2395 assert_eq!("\u2620".char_len(), 1u);
2396 assert_eq!("\U0001d11e".char_len(), 1u);
2397 assert_eq!("ประเทศไทย中华Việt Nam".char_len(), 19u);
2402 assert_eq!("hello".find('l'), Some(2u));
2403 assert_eq!("hello".find(|c:char| c == 'o'), Some(4u));
2404 assert!("hello".find('x').is_none());
2405 assert!("hello".find(|c:char| c == 'x').is_none());
2406 assert_eq!("ประเทศไทย中华Việt Nam".find('华'), Some(30u));
2407 assert_eq!("ประเทศไทย中华Việt Nam".find(|c: char| c == '华'), Some(30u));
2412 assert_eq!("hello".rfind('l'), Some(3u));
2413 assert_eq!("hello".rfind(|c:char| c == 'o'), Some(4u));
2414 assert!("hello".rfind('x').is_none());
2415 assert!("hello".rfind(|c:char| c == 'x').is_none());
2416 assert_eq!("ประเทศไทย中华Việt Nam".rfind('华'), Some(30u));
2417 assert_eq!("ประเทศไทย中华Việt Nam".rfind(|c: char| c == '华'), Some(30u));
2421 fn test_push_str() {
2424 assert_eq!(s.slice_from(0), "");
2426 assert_eq!(s.slice_from(0), "abc");
2427 s.push_str("ประเทศไทย中华Việt Nam");
2428 assert_eq!(s.slice_from(0), "abcประเทศไทย中华Việt Nam");
2434 assert_eq!(s.slice_from(0), "");
2435 s = s.append("abc");
2436 assert_eq!(s.slice_from(0), "abc");
2437 s = s.append("ประเทศไทย中华Việt Nam");
2438 assert_eq!(s.slice_from(0), "abcประเทศไทย中华Việt Nam");
2442 fn test_pop_char() {
2443 let mut data = ~"ประเทศไทย中华";
2444 let cc = data.pop_char();
2445 assert_eq!(~"ประเทศไทย中", data);
2446 assert_eq!('华', cc);
2450 fn test_pop_char_2() {
2451 let mut data2 = ~"华";
2452 let cc2 = data2.pop_char();
2453 assert_eq!(~"", data2);
2454 assert_eq!('华', cc2);
2459 #[ignore(cfg(windows))]
2460 fn test_pop_char_fail() {
2462 let _cc3 = data.pop_char();
2466 fn test_push_char() {
2467 let mut data = ~"ประเทศไทย中";
2468 data.push_char('华');
2469 data.push_char('b'); // 1 byte
2470 data.push_char('¢'); // 2 byte
2471 data.push_char('€'); // 3 byte
2472 data.push_char('𤭢'); // 4 byte
2473 assert_eq!(~"ประเทศไทย中华b¢€𤭢", data);
2477 fn test_shift_char() {
2478 let mut data = ~"ประเทศไทย中";
2479 let cc = data.shift_char();
2480 assert_eq!(~"ระเทศไทย中", data);
2481 assert_eq!('ป', cc);
2485 fn test_unshift_char() {
2486 let mut data = ~"ประเทศไทย中";
2487 data.unshift_char('华');
2488 assert_eq!(~"华ประเทศไทย中", data);
2493 let mut empty = ~"";
2495 assert_eq!("", empty.as_slice());
2496 let mut data = ~"ประเทศไทย中";
2498 assert_eq!("", data.as_slice());
2499 data.push_char('华');
2500 assert_eq!("华", data.as_slice());
2504 fn test_split_within() {
2505 fn t(s: &str, i: uint, u: &[~str]) {
2507 for each_split_within(s, i) |s| { v.push(s.to_owned()) }
2508 assert!(v.iter().zip(u.iter()).all(|(a,b)| a == b));
2512 t("hello", 15, [~"hello"]);
2513 t("\nMary had a little lamb\nLittle lamb\n", 15,
2514 [~"Mary had a", ~"little lamb", ~"Little lamb"]);
2515 t("\nMary had a little lamb\nLittle lamb\n", uint::max_value,
2516 [~"Mary had a little lamb\nLittle lamb"]);
2520 fn test_find_str() {
2522 assert_eq!("".find_str(""), Some(0u));
2523 assert!("banana".find_str("apple pie").is_none());
2525 let data = "abcabc";
2526 assert_eq!(data.slice(0u, 6u).find_str("ab"), Some(0u));
2527 assert_eq!(data.slice(2u, 6u).find_str("ab"), Some(3u - 2u));
2528 assert!(data.slice(2u, 4u).find_str("ab").is_none());
2530 let mut data = ~"ประเทศไทย中华Việt Nam";
2532 assert!(data.find_str("ไท华").is_none());
2533 assert_eq!(data.slice(0u, 43u).find_str(""), Some(0u));
2534 assert_eq!(data.slice(6u, 43u).find_str(""), Some(6u - 6u));
2536 assert_eq!(data.slice(0u, 43u).find_str("ประ"), Some( 0u));
2537 assert_eq!(data.slice(0u, 43u).find_str("ทศไ"), Some(12u));
2538 assert_eq!(data.slice(0u, 43u).find_str("ย中"), Some(24u));
2539 assert_eq!(data.slice(0u, 43u).find_str("iệt"), Some(34u));
2540 assert_eq!(data.slice(0u, 43u).find_str("Nam"), Some(40u));
2542 assert_eq!(data.slice(43u, 86u).find_str("ประ"), Some(43u - 43u));
2543 assert_eq!(data.slice(43u, 86u).find_str("ทศไ"), Some(55u - 43u));
2544 assert_eq!(data.slice(43u, 86u).find_str("ย中"), Some(67u - 43u));
2545 assert_eq!(data.slice(43u, 86u).find_str("iệt"), Some(77u - 43u));
2546 assert_eq!(data.slice(43u, 86u).find_str("Nam"), Some(83u - 43u));
2550 fn test_slice_chars() {
2551 fn t(a: &str, b: &str, start: uint) {
2552 assert_eq!(a.slice_chars(start, start + b.char_len()), b);
2554 t("hello", "llo", 2);
2555 t("hello", "el", 1);
2556 assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".slice_chars(2, 8));
2561 fn t(v: &[~str], s: &str) {
2562 assert_eq!(v.concat(), s.to_str());
2564 t([~"you", ~"know", ~"I'm", ~"no", ~"good"], "youknowI'mnogood");
2565 let v: &[~str] = [];
2572 fn t(v: &[~str], sep: &str, s: &str) {
2573 assert_eq!(v.connect(sep), s.to_str());
2575 t([~"you", ~"know", ~"I'm", ~"no", ~"good"],
2576 " ", "you know I'm no good");
2577 let v: &[~str] = [];
2579 t([~"hi"], " ", "hi");
2583 fn test_concat_slices() {
2584 fn t(v: &[&str], s: &str) {
2585 assert_eq!(v.concat(), s.to_str());
2587 t(["you", "know", "I'm", "no", "good"], "youknowI'mnogood");
2588 let v: &[&str] = [];
2594 fn test_connect_slices() {
2595 fn t(v: &[&str], sep: &str, s: &str) {
2596 assert_eq!(v.connect(sep), s.to_str());
2598 t(["you", "know", "I'm", "no", "good"],
2599 " ", "you know I'm no good");
2601 t(["hi"], " ", "hi");
2606 assert_eq!("x".repeat(4), ~"xxxx");
2607 assert_eq!("hi".repeat(4), ~"hihihihi");
2608 assert_eq!("ไท华".repeat(3), ~"ไท华ไท华ไท华");
2609 assert_eq!("".repeat(4), ~"");
2610 assert_eq!("hi".repeat(0), ~"");
2614 fn test_unsafe_slice() {
2615 assert_eq!("ab", unsafe {raw::slice_bytes("abc", 0, 2)});
2616 assert_eq!("bc", unsafe {raw::slice_bytes("abc", 1, 3)});
2617 assert_eq!("", unsafe {raw::slice_bytes("abc", 1, 1)});
2618 fn a_million_letter_a() -> ~str {
2621 while i < 100000 { rs.push_str("aaaaaaaaaa"); i += 1; }
2624 fn half_a_million_letter_a() -> ~str {
2627 while i < 100000 { rs.push_str("aaaaa"); i += 1; }
2630 let letters = a_million_letter_a();
2631 assert!(half_a_million_letter_a() ==
2632 unsafe {raw::slice_bytes(letters, 0u, 500000)}.to_owned());
2636 fn test_starts_with() {
2637 assert!(("".starts_with("")));
2638 assert!(("abc".starts_with("")));
2639 assert!(("abc".starts_with("a")));
2640 assert!((!"a".starts_with("abc")));
2641 assert!((!"".starts_with("abc")));
2645 fn test_ends_with() {
2646 assert!(("".ends_with("")));
2647 assert!(("abc".ends_with("")));
2648 assert!(("abc".ends_with("c")));
2649 assert!((!"a".ends_with("abc")));
2650 assert!((!"".ends_with("abc")));
2654 fn test_is_empty() {
2655 assert!("".is_empty());
2656 assert!(!"a".is_empty());
2662 assert_eq!("".replace(a, "b"), ~"");
2663 assert_eq!("a".replace(a, "b"), ~"b");
2664 assert_eq!("ab".replace(a, "b"), ~"bb");
2666 assert!(" test test ".replace(test, "toast") ==
2668 assert_eq!(" test test ".replace(test, ""), ~" ");
2672 fn test_replace_2a() {
2673 let data = ~"ประเทศไทย中华";
2674 let repl = ~"دولة الكويت";
2677 let A = ~"دولة الكويتทศไทย中华";
2678 assert_eq!(data.replace(a, repl), A);
2682 fn test_replace_2b() {
2683 let data = ~"ประเทศไทย中华";
2684 let repl = ~"دولة الكويت";
2687 let B = ~"ปรدولة الكويتทศไทย中华";
2688 assert_eq!(data.replace(b, repl), B);
2692 fn test_replace_2c() {
2693 let data = ~"ประเทศไทย中华";
2694 let repl = ~"دولة الكويت";
2697 let C = ~"ประเทศไทยدولة الكويت";
2698 assert_eq!(data.replace(c, repl), C);
2702 fn test_replace_2d() {
2703 let data = ~"ประเทศไทย中华";
2704 let repl = ~"دولة الكويت";
2707 assert_eq!(data.replace(d, repl), data);
2712 assert_eq!("ab", "abc".slice(0, 2));
2713 assert_eq!("bc", "abc".slice(1, 3));
2714 assert_eq!("", "abc".slice(1, 1));
2715 assert_eq!("\u65e5", "\u65e5\u672c".slice(0, 3));
2717 let data = "ประเทศไทย中华";
2718 assert_eq!("ป", data.slice(0, 3));
2719 assert_eq!("ร", data.slice(3, 6));
2720 assert_eq!("", data.slice(3, 3));
2721 assert_eq!("华", data.slice(30, 33));
2723 fn a_million_letter_X() -> ~str {
2727 push_str(&mut rs, "华华华华华华华华华华");
2732 fn half_a_million_letter_X() -> ~str {
2735 while i < 100000 { push_str(&mut rs, "华华华华华"); i += 1; }
2738 let letters = a_million_letter_X();
2739 assert!(half_a_million_letter_X() ==
2740 letters.slice(0u, 3u * 500000u).to_owned());
2745 let ss = "中华Việt Nam";
2747 assert_eq!("华", ss.slice(3u, 6u));
2748 assert_eq!("Việt Nam", ss.slice(6u, 16u));
2750 assert_eq!("ab", "abc".slice(0u, 2u));
2751 assert_eq!("bc", "abc".slice(1u, 3u));
2752 assert_eq!("", "abc".slice(1u, 1u));
2754 assert_eq!("中", ss.slice(0u, 3u));
2755 assert_eq!("华V", ss.slice(3u, 7u));
2756 assert_eq!("", ss.slice(3u, 3u));
2771 #[ignore(cfg(windows))]
2772 fn test_slice_fail() {
2773 "中华Việt Nam".slice(0u, 2u);
2777 fn test_slice_from() {
2778 assert_eq!("abcd".slice_from(0), "abcd");
2779 assert_eq!("abcd".slice_from(2), "cd");
2780 assert_eq!("abcd".slice_from(4), "");
2783 fn test_slice_to() {
2784 assert_eq!("abcd".slice_to(0), "");
2785 assert_eq!("abcd".slice_to(2), "ab");
2786 assert_eq!("abcd".slice_to(4), "abcd");
2790 fn test_trim_left_chars() {
2791 let v: &[char] = &[];
2792 assert_eq!(" *** foo *** ".trim_left_chars(&v), " *** foo *** ");
2793 assert_eq!(" *** foo *** ".trim_left_chars(& &['*', ' ']), "foo *** ");
2794 assert_eq!(" *** *** ".trim_left_chars(& &['*', ' ']), "");
2795 assert_eq!("foo *** ".trim_left_chars(& &['*', ' ']), "foo *** ");
2797 assert_eq!("11foo1bar11".trim_left_chars(&'1'), "foo1bar11");
2798 assert_eq!("12foo1bar12".trim_left_chars(& &['1', '2']), "foo1bar12");
2799 assert_eq!("123foo1bar123".trim_left_chars(&|c: char| c.is_digit()), "foo1bar123");
2803 fn test_trim_right_chars() {
2804 let v: &[char] = &[];
2805 assert_eq!(" *** foo *** ".trim_right_chars(&v), " *** foo *** ");
2806 assert_eq!(" *** foo *** ".trim_right_chars(& &['*', ' ']), " *** foo");
2807 assert_eq!(" *** *** ".trim_right_chars(& &['*', ' ']), "");
2808 assert_eq!(" *** foo".trim_right_chars(& &['*', ' ']), " *** foo");
2810 assert_eq!("11foo1bar11".trim_right_chars(&'1'), "11foo1bar");
2811 assert_eq!("12foo1bar12".trim_right_chars(& &['1', '2']), "12foo1bar");
2812 assert_eq!("123foo1bar123".trim_right_chars(&|c: char| c.is_digit()), "123foo1bar");
2816 fn test_trim_chars() {
2817 let v: &[char] = &[];
2818 assert_eq!(" *** foo *** ".trim_chars(&v), " *** foo *** ");
2819 assert_eq!(" *** foo *** ".trim_chars(& &['*', ' ']), "foo");
2820 assert_eq!(" *** *** ".trim_chars(& &['*', ' ']), "");
2821 assert_eq!("foo".trim_chars(& &['*', ' ']), "foo");
2823 assert_eq!("11foo1bar11".trim_chars(&'1'), "foo1bar");
2824 assert_eq!("12foo1bar12".trim_chars(& &['1', '2']), "foo1bar");
2825 assert_eq!("123foo1bar123".trim_chars(&|c: char| c.is_digit()), "foo1bar");
2829 fn test_trim_left() {
2830 assert_eq!("".trim_left(), "");
2831 assert_eq!("a".trim_left(), "a");
2832 assert_eq!(" ".trim_left(), "");
2833 assert_eq!(" blah".trim_left(), "blah");
2834 assert_eq!(" \u3000 wut".trim_left(), "wut");
2835 assert_eq!("hey ".trim_left(), "hey ");
2839 fn test_trim_right() {
2840 assert_eq!("".trim_right(), "");
2841 assert_eq!("a".trim_right(), "a");
2842 assert_eq!(" ".trim_right(), "");
2843 assert_eq!("blah ".trim_right(), "blah");
2844 assert_eq!("wut \u3000 ".trim_right(), "wut");
2845 assert_eq!(" hey".trim_right(), " hey");
2850 assert_eq!("".trim(), "");
2851 assert_eq!("a".trim(), "a");
2852 assert_eq!(" ".trim(), "");
2853 assert_eq!(" blah ".trim(), "blah");
2854 assert_eq!("\nwut \u3000 ".trim(), "wut");
2855 assert_eq!(" hey dude ".trim(), "hey dude");
2859 fn test_is_whitespace() {
2860 assert!("".is_whitespace());
2861 assert!(" ".is_whitespace());
2862 assert!("\u2009".is_whitespace()); // Thin space
2863 assert!(" \n\t ".is_whitespace());
2864 assert!(!" _ ".is_whitespace());
2868 fn test_shift_byte() {
2870 let b = unsafe{raw::shift_byte(&mut s)};
2871 assert_eq!(s, ~"BC");
2872 assert_eq!(b, 65u8);
2876 fn test_pop_byte() {
2878 let b = unsafe{raw::pop_byte(&mut s)};
2879 assert_eq!(s, ~"AB");
2880 assert_eq!(b, 67u8);
2884 fn test_unsafe_from_bytes() {
2885 let a = ~[65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8];
2886 let b = unsafe { raw::from_bytes(a) };
2887 assert_eq!(b, ~"AAAAAAA");
2891 fn test_from_bytes() {
2892 let ss = ~"ศไทย中华Việt Nam";
2893 let bb = ~[0xe0_u8, 0xb8_u8, 0xa8_u8,
2894 0xe0_u8, 0xb9_u8, 0x84_u8,
2895 0xe0_u8, 0xb8_u8, 0x97_u8,
2896 0xe0_u8, 0xb8_u8, 0xa2_u8,
2897 0xe4_u8, 0xb8_u8, 0xad_u8,
2898 0xe5_u8, 0x8d_u8, 0x8e_u8,
2899 0x56_u8, 0x69_u8, 0xe1_u8,
2900 0xbb_u8, 0x87_u8, 0x74_u8,
2901 0x20_u8, 0x4e_u8, 0x61_u8,
2904 assert_eq!(ss, from_bytes(bb));
2908 #[ignore(cfg(windows))]
2909 fn test_from_bytes_fail() {
2910 use str::not_utf8::cond;
2912 let bb = ~[0xff_u8, 0xb8_u8, 0xa8_u8,
2913 0xe0_u8, 0xb9_u8, 0x84_u8,
2914 0xe0_u8, 0xb8_u8, 0x97_u8,
2915 0xe0_u8, 0xb8_u8, 0xa2_u8,
2916 0xe4_u8, 0xb8_u8, 0xad_u8,
2917 0xe5_u8, 0x8d_u8, 0x8e_u8,
2918 0x56_u8, 0x69_u8, 0xe1_u8,
2919 0xbb_u8, 0x87_u8, 0x74_u8,
2920 0x20_u8, 0x4e_u8, 0x61_u8,
2923 let mut error_happened = false;
2924 let _x = do cond.trap(|err| {
2925 assert_eq!(err, ~"from_bytes: input is not UTF-8; first bad byte is 255");
2926 error_happened = true;
2931 assert!(error_happened);
2935 fn test_unsafe_from_bytes_with_null() {
2936 let a = [65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8];
2937 let b = unsafe { raw::from_bytes_with_null(a) };
2938 assert_eq!(b, "AAAAAAA");
2942 fn test_from_bytes_with_null() {
2943 let ss = "ศไทย中华Việt Nam";
2944 let bb = [0xe0_u8, 0xb8_u8, 0xa8_u8,
2945 0xe0_u8, 0xb9_u8, 0x84_u8,
2946 0xe0_u8, 0xb8_u8, 0x97_u8,
2947 0xe0_u8, 0xb8_u8, 0xa2_u8,
2948 0xe4_u8, 0xb8_u8, 0xad_u8,
2949 0xe5_u8, 0x8d_u8, 0x8e_u8,
2950 0x56_u8, 0x69_u8, 0xe1_u8,
2951 0xbb_u8, 0x87_u8, 0x74_u8,
2952 0x20_u8, 0x4e_u8, 0x61_u8,
2955 assert_eq!(ss, from_bytes_with_null(bb));
2960 #[ignore(cfg(windows))]
2961 fn test_from_bytes_with_null_fail() {
2962 let bb = [0xff_u8, 0xb8_u8, 0xa8_u8,
2963 0xe0_u8, 0xb9_u8, 0x84_u8,
2964 0xe0_u8, 0xb8_u8, 0x97_u8,
2965 0xe0_u8, 0xb8_u8, 0xa2_u8,
2966 0xe4_u8, 0xb8_u8, 0xad_u8,
2967 0xe5_u8, 0x8d_u8, 0x8e_u8,
2968 0x56_u8, 0x69_u8, 0xe1_u8,
2969 0xbb_u8, 0x87_u8, 0x74_u8,
2970 0x20_u8, 0x4e_u8, 0x61_u8,
2973 let _x = from_bytes_with_null(bb);
2978 #[ignore(cfg(windows))]
2979 fn test_from_bytes_with_null_fail_2() {
2980 let bb = [0xff_u8, 0xb8_u8, 0xa8_u8,
2981 0xe0_u8, 0xb9_u8, 0x84_u8,
2982 0xe0_u8, 0xb8_u8, 0x97_u8,
2983 0xe0_u8, 0xb8_u8, 0xa2_u8,
2984 0xe4_u8, 0xb8_u8, 0xad_u8,
2985 0xe5_u8, 0x8d_u8, 0x8e_u8,
2986 0x56_u8, 0x69_u8, 0xe1_u8,
2987 0xbb_u8, 0x87_u8, 0x74_u8,
2988 0x20_u8, 0x4e_u8, 0x61_u8,
2991 let _x = from_bytes_with_null(bb);
2995 fn test_from_buf() {
2997 let a = ~[65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8];
2998 let b = vec::raw::to_ptr(a);
2999 let c = raw::from_buf(b);
3000 assert_eq!(c, ~"AAAAAAA");
3005 fn test_as_bytes() {
3008 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
3009 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
3012 assert_eq!("".as_bytes(), &[]);
3013 assert_eq!("abc".as_bytes(), &['a' as u8, 'b' as u8, 'c' as u8]);
3014 assert_eq!("ศไทย中华Việt Nam".as_bytes(), v);
3018 fn test_as_bytes_with_null() {
3021 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
3022 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
3028 let s3 = @"ศไทย中华Việt Nam";
3029 assert_eq!(s1.as_bytes_with_null(), &[0]);
3030 assert_eq!(s2.as_bytes_with_null(), &['a' as u8, 'b' as u8, 'c' as u8, 0]);
3031 assert_eq!(s3.as_bytes_with_null(), v);
3035 let s3 = ~"ศไทย中华Việt Nam";
3036 assert_eq!(s1.as_bytes_with_null(), &[0]);
3037 assert_eq!(s2.as_bytes_with_null(), &['a' as u8, 'b' as u8, 'c' as u8, 0]);
3038 assert_eq!(s3.as_bytes_with_null(), v);
3042 fn test_to_bytes_with_null() {
3043 let s = ~"ศไทย中华Việt Nam";
3045 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
3046 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
3049 assert_eq!((~"").to_bytes_with_null(), ~[0]);
3050 assert_eq!((~"abc").to_bytes_with_null(),
3051 ~['a' as u8, 'b' as u8, 'c' as u8, 0]);
3052 assert_eq!(s.to_bytes_with_null(), v);
3056 #[ignore(cfg(windows))]
3058 fn test_as_bytes_fail() {
3059 // Don't double free. (I'm not sure if this exercises the
3060 // original problem code path anymore.)
3062 let _bytes = s.as_bytes_with_null();
3067 fn test_as_imm_buf() {
3068 do "".as_imm_buf |buf, len| {
3071 assert_eq!(*ptr::offset(buf, 0), 0);
3075 do "hello".as_imm_buf |buf, len| {
3078 assert_eq!(*ptr::offset(buf, 0), 'h' as u8);
3079 assert_eq!(*ptr::offset(buf, 1), 'e' as u8);
3080 assert_eq!(*ptr::offset(buf, 2), 'l' as u8);
3081 assert_eq!(*ptr::offset(buf, 3), 'l' as u8);
3082 assert_eq!(*ptr::offset(buf, 4), 'o' as u8);
3083 assert_eq!(*ptr::offset(buf, 5), 0);
3089 fn test_as_c_str() {
3091 do a.as_c_str |buf| {
3093 assert_eq!(*ptr::offset(buf, 0), 0);
3098 do a.as_c_str |buf| {
3100 assert_eq!(*ptr::offset(buf, 0), 'h' as libc::c_char);
3101 assert_eq!(*ptr::offset(buf, 1), 'e' as libc::c_char);
3102 assert_eq!(*ptr::offset(buf, 2), 'l' as libc::c_char);
3103 assert_eq!(*ptr::offset(buf, 3), 'l' as libc::c_char);
3104 assert_eq!(*ptr::offset(buf, 4), 'o' as libc::c_char);
3105 assert_eq!(*ptr::offset(buf, 5), 0);
3111 fn test_subslice_offset() {
3112 let a = "kernelsprite";
3113 let b = a.slice(7, a.len());
3114 let c = a.slice(0, a.len() - 6);
3115 assert_eq!(a.subslice_offset(b), 7);
3116 assert_eq!(a.subslice_offset(c), 0);
3118 let string = "a\nb\nc";
3119 let mut lines = ~[];
3120 for string.line_iter().advance |line| { lines.push(line) }
3121 assert_eq!(string.subslice_offset(lines[0]), 0);
3122 assert_eq!(string.subslice_offset(lines[1]), 2);
3123 assert_eq!(string.subslice_offset(lines[2]), 4);
3128 fn test_subslice_offset_2() {
3129 let a = "alchemiter";
3130 let b = "cruxtruder";
3131 a.subslice_offset(b);
3135 fn vec_str_conversions() {
3136 let s1: ~str = ~"All mimsy were the borogoves";
3138 let v: ~[u8] = s1.as_bytes().to_owned();
3139 let s2: ~str = from_bytes(v);
3140 let mut i: uint = 0u;
3141 let n1: uint = s1.len();
3142 let n2: uint = v.len();
3155 fn test_contains() {
3156 assert!("abcde".contains("bcd"));
3157 assert!("abcde".contains("abcd"));
3158 assert!("abcde".contains("bcde"));
3159 assert!("abcde".contains(""));
3160 assert!("".contains(""));
3161 assert!(!"abcde".contains("def"));
3162 assert!(!"".contains("a"));
3164 let data = ~"ประเทศไทย中华Việt Nam";
3165 assert!(data.contains("ประเ"));
3166 assert!(data.contains("ะเ"));
3167 assert!(data.contains("中华"));
3168 assert!(!data.contains("ไท华"));
3172 fn test_contains_char() {
3173 assert!("abc".contains_char('b'));
3174 assert!("a".contains_char('a'));
3175 assert!(!"abc".contains_char('d'));
3176 assert!(!"".contains_char('a'));
3181 assert_eq!(~"", "".map_chars(|c| unsafe {libc::toupper(c as c_char)} as char));
3182 assert_eq!(~"YMCA", "ymca".map_chars(|c| unsafe {libc::toupper(c as c_char)} as char));
3189 ~[0xd800_u16, 0xdf45_u16, 0xd800_u16, 0xdf3f_u16,
3190 0xd800_u16, 0xdf3b_u16, 0xd800_u16, 0xdf46_u16,
3191 0xd800_u16, 0xdf39_u16, 0xd800_u16, 0xdf3b_u16,
3192 0xd800_u16, 0xdf30_u16, 0x000a_u16]),
3195 ~[0xd801_u16, 0xdc12_u16, 0xd801_u16,
3196 0xdc49_u16, 0xd801_u16, 0xdc2e_u16, 0xd801_u16,
3197 0xdc40_u16, 0xd801_u16, 0xdc32_u16, 0xd801_u16,
3198 0xdc4b_u16, 0x0020_u16, 0xd801_u16, 0xdc0f_u16,
3199 0xd801_u16, 0xdc32_u16, 0xd801_u16, 0xdc4d_u16,
3202 (~"𐌀𐌖𐌋𐌄𐌑𐌉·𐌌𐌄𐌕𐌄𐌋𐌉𐌑\n",
3203 ~[0xd800_u16, 0xdf00_u16, 0xd800_u16, 0xdf16_u16,
3204 0xd800_u16, 0xdf0b_u16, 0xd800_u16, 0xdf04_u16,
3205 0xd800_u16, 0xdf11_u16, 0xd800_u16, 0xdf09_u16,
3206 0x00b7_u16, 0xd800_u16, 0xdf0c_u16, 0xd800_u16,
3207 0xdf04_u16, 0xd800_u16, 0xdf15_u16, 0xd800_u16,
3208 0xdf04_u16, 0xd800_u16, 0xdf0b_u16, 0xd800_u16,
3209 0xdf09_u16, 0xd800_u16, 0xdf11_u16, 0x000a_u16 ]),
3211 (~"𐒋𐒘𐒈𐒑𐒛𐒒 𐒕𐒓 𐒈𐒚𐒍 𐒏𐒜𐒒𐒖𐒆 𐒕𐒆\n",
3212 ~[0xd801_u16, 0xdc8b_u16, 0xd801_u16, 0xdc98_u16,
3213 0xd801_u16, 0xdc88_u16, 0xd801_u16, 0xdc91_u16,
3214 0xd801_u16, 0xdc9b_u16, 0xd801_u16, 0xdc92_u16,
3215 0x0020_u16, 0xd801_u16, 0xdc95_u16, 0xd801_u16,
3216 0xdc93_u16, 0x0020_u16, 0xd801_u16, 0xdc88_u16,
3217 0xd801_u16, 0xdc9a_u16, 0xd801_u16, 0xdc8d_u16,
3218 0x0020_u16, 0xd801_u16, 0xdc8f_u16, 0xd801_u16,
3219 0xdc9c_u16, 0xd801_u16, 0xdc92_u16, 0xd801_u16,
3220 0xdc96_u16, 0xd801_u16, 0xdc86_u16, 0x0020_u16,
3221 0xd801_u16, 0xdc95_u16, 0xd801_u16, 0xdc86_u16,
3224 for pairs.iter().advance |p| {
3225 let (s, u) = (*p).clone();
3226 assert!(s.to_utf16() == u);
3227 assert!(from_utf16(u) == s);
3228 assert!(from_utf16(s.to_utf16()) == s);
3229 assert!(from_utf16(u).to_utf16() == u);
3235 let s = ~"ศไทย中华Việt Nam";
3236 let v = ~['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
3238 for v.iter().advance |ch| {
3239 assert!(s.char_at(pos) == *ch);
3240 pos += from_char(*ch).len();
3245 fn test_char_at_reverse() {
3246 let s = ~"ศไทย中华Việt Nam";
3247 let v = ~['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
3248 let mut pos = s.len();
3249 for v.rev_iter().advance |ch| {
3250 assert!(s.char_at_reverse(pos) == *ch);
3251 pos -= from_char(*ch).len();
3256 fn test_escape_unicode() {
3257 assert_eq!("abc".escape_unicode(), ~"\\x61\\x62\\x63");
3258 assert_eq!("a c".escape_unicode(), ~"\\x61\\x20\\x63");
3259 assert_eq!("\r\n\t".escape_unicode(), ~"\\x0d\\x0a\\x09");
3260 assert_eq!("'\"\\".escape_unicode(), ~"\\x27\\x22\\x5c");
3261 assert_eq!("\x00\x01\xfe\xff".escape_unicode(), ~"\\x00\\x01\\xfe\\xff");
3262 assert_eq!("\u0100\uffff".escape_unicode(), ~"\\u0100\\uffff");
3263 assert_eq!("\U00010000\U0010ffff".escape_unicode(), ~"\\U00010000\\U0010ffff");
3264 assert_eq!("ab\ufb00".escape_unicode(), ~"\\x61\\x62\\ufb00");
3265 assert_eq!("\U0001d4ea\r".escape_unicode(), ~"\\U0001d4ea\\x0d");
3269 fn test_escape_default() {
3270 assert_eq!("abc".escape_default(), ~"abc");
3271 assert_eq!("a c".escape_default(), ~"a c");
3272 assert_eq!("\r\n\t".escape_default(), ~"\\r\\n\\t");
3273 assert_eq!("'\"\\".escape_default(), ~"\\'\\\"\\\\");
3274 assert_eq!("\u0100\uffff".escape_default(), ~"\\u0100\\uffff");
3275 assert_eq!("\U00010000\U0010ffff".escape_default(), ~"\\U00010000\\U0010ffff");
3276 assert_eq!("ab\ufb00".escape_default(), ~"ab\\ufb00");
3277 assert_eq!("\U0001d4ea\r".escape_default(), ~"\\U0001d4ea\\r");
3281 fn test_to_managed() {
3282 assert_eq!("abc".to_managed(), @"abc");
3283 assert_eq!("abcdef".slice(1, 5).to_managed(), @"bcde");
3287 fn test_total_ord() {
3288 "1234".cmp(& &"123") == Greater;
3289 "123".cmp(& &"1234") == Less;
3290 "1234".cmp(& &"1234") == Equal;
3291 "12345555".cmp(& &"123456") == Less;
3292 "22".cmp(& &"1234") == Greater;
3296 fn test_char_range_at() {
3297 let data = ~"b¢€𤭢𤭢€¢b";
3298 assert_eq!('b', data.char_range_at(0).ch);
3299 assert_eq!('¢', data.char_range_at(1).ch);
3300 assert_eq!('€', data.char_range_at(3).ch);
3301 assert_eq!('𤭢', data.char_range_at(6).ch);
3302 assert_eq!('𤭢', data.char_range_at(10).ch);
3303 assert_eq!('€', data.char_range_at(14).ch);
3304 assert_eq!('¢', data.char_range_at(17).ch);
3305 assert_eq!('b', data.char_range_at(19).ch);
3309 fn test_char_range_at_reverse_underflow() {
3310 assert_eq!("abc".char_range_at_reverse(0).next, 0);
3315 #[allow(unnecessary_allocation)];
3317 ($s1:expr, $s2:expr, $e:expr) => {
3318 assert_eq!($s1 + $s2, $e);
3319 assert_eq!($s1.to_owned() + $s2, $e);
3320 assert_eq!($s1.to_managed() + $s2, $e);
3324 t!("foo", "bar", ~"foobar");
3325 t!("foo", @"bar", ~"foobar");
3326 t!("foo", ~"bar", ~"foobar");
3327 t!("ศไทย中", "华Việt Nam", ~"ศไทย中华Việt Nam");
3328 t!("ศไทย中", @"华Việt Nam", ~"ศไทย中华Việt Nam");
3329 t!("ศไทย中", ~"华Việt Nam", ~"ศไทย中华Việt Nam");
3333 fn test_iterator() {
3335 let s = ~"ศไทย中华Việt Nam";
3336 let v = ~['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
3339 let mut it = s.iter();
3341 for it.advance |c| {
3342 assert_eq!(c, v[pos]);
3345 assert_eq!(pos, v.len());
3349 fn test_rev_iterator() {
3351 let s = ~"ศไทย中华Việt Nam";
3352 let v = ~['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
3355 let mut it = s.rev_iter();
3357 for it.advance |c| {
3358 assert_eq!(c, v[pos]);
3361 assert_eq!(pos, v.len());
3365 fn test_bytes_iterator() {
3366 let s = ~"ศไทย中华Việt Nam";
3368 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
3369 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
3374 for s.bytes_iter().advance |b| {
3375 assert_eq!(b, v[pos]);
3381 fn test_bytes_rev_iterator() {
3382 let s = ~"ศไทย中华Việt Nam";
3384 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
3385 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
3388 let mut pos = v.len();
3390 for s.bytes_rev_iter().advance |b| {
3392 assert_eq!(b, v[pos]);
3397 fn test_split_char_iterator() {
3398 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
3400 let split: ~[&str] = data.split_iter(' ').collect();
3401 assert_eq!(split, ~["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
3403 let split: ~[&str] = data.split_iter(|c: char| c == ' ').collect();
3404 assert_eq!(split, ~["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
3407 let split: ~[&str] = data.split_iter('ä').collect();
3408 assert_eq!(split, ~["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
3410 let split: ~[&str] = data.split_iter(|c: char| c == 'ä').collect();
3411 assert_eq!(split, ~["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
3414 fn test_splitn_char_iterator() {
3415 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
3417 let split: ~[&str] = data.splitn_iter(' ', 3).collect();
3418 assert_eq!(split, ~["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
3420 let split: ~[&str] = data.splitn_iter(|c: char| c == ' ', 3).collect();
3421 assert_eq!(split, ~["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
3424 let split: ~[&str] = data.splitn_iter('ä', 3).collect();
3425 assert_eq!(split, ~["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
3427 let split: ~[&str] = data.splitn_iter(|c: char| c == 'ä', 3).collect();
3428 assert_eq!(split, ~["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
3432 fn test_split_char_iterator_no_trailing() {
3433 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
3435 let split: ~[&str] = data.split_options_iter('\n', 1000, true).collect();
3436 assert_eq!(split, ~["", "Märy häd ä little lämb", "Little lämb", ""]);
3438 let split: ~[&str] = data.split_options_iter('\n', 1000, false).collect();
3439 assert_eq!(split, ~["", "Märy häd ä little lämb", "Little lämb"]);
3443 fn test_word_iter() {
3444 let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n";
3445 let words: ~[&str] = data.word_iter().collect();
3446 assert_eq!(words, ~["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
3450 fn test_line_iter() {
3451 let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
3452 let lines: ~[&str] = data.line_iter().collect();
3453 assert_eq!(lines, ~["", "Märy häd ä little lämb", "", "Little lämb"]);
3455 let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
3456 let lines: ~[&str] = data.line_iter().collect();
3457 assert_eq!(lines, ~["", "Märy häd ä little lämb", "", "Little lämb"]);
3461 fn test_split_str_iterator() {
3462 fn t<'a>(s: &str, sep: &'a str, u: ~[&str]) {
3463 let v: ~[&str] = s.split_str_iter(sep).collect();
3466 t("--1233345--", "12345", ~["--1233345--"]);
3467 t("abc::hello::there", "::", ~["abc", "hello", "there"]);
3468 t("::hello::there", "::", ~["", "hello", "there"]);
3469 t("hello::there::", "::", ~["hello", "there", ""]);
3470 t("::hello::there::", "::", ~["", "hello", "there", ""]);
3471 t("ประเทศไทย中华Việt Nam", "中华", ~["ประเทศไทย", "Việt Nam"]);
3472 t("zzXXXzzYYYzz", "zz", ~["", "XXX", "YYY", ""]);
3473 t("zzXXXzYYYz", "XXX", ~["zz", "zYYYz"]);
3474 t(".XXX.YYY.", ".", ~["", "XXX", "YYY", ""]);
3476 t("zz", "zz", ~["",""]);
3477 t("ok", "z", ~["ok"]);
3478 t("zzz", "zz", ~["","z"]);
3479 t("zzzzz", "zz", ~["","","z"]);
3483 fn test_str_zero() {
3485 fn t<S: Zero + Str>() {
3486 let s: S = Zero::zero();
3487 assert_eq!(s.as_slice(), "");
3488 assert!(s.is_zero());
3497 fn test_str_container() {
3498 fn sum_len<S: Container>(v: &[S]) -> uint {
3499 v.iter().transform(|x| x.len()).sum()
3503 assert_eq!(5, sum_len(["012", "", "34"]));
3504 assert_eq!(5, sum_len([@"01", @"2", @"34", @""]));
3505 assert_eq!(5, sum_len([~"01", ~"2", ~"34", ~""]));
3506 assert_eq!(5, sum_len([s.as_slice()]));
3512 use extra::test::BenchHarness;
3516 fn is_utf8_100_ascii(bh: &mut BenchHarness) {
3518 let s = bytes!("Hello there, the quick brown fox jumped over the lazy dog! \
3519 Lorem ipsum dolor sit amet, consectetur. ");
3521 assert_eq!(100, s.len());
3528 fn is_utf8_100_multibyte(bh: &mut BenchHarness) {
3529 let s = bytes!("𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰");
3530 assert_eq!(100, s.len());
3537 fn map_chars_100_ascii(bh: &mut BenchHarness) {
3538 let s = "HelloHelloHelloHelloHelloHelloHelloHelloHelloHello\
3539 HelloHelloHelloHelloHelloHelloHelloHelloHelloHello";
3541 s.map_chars(|c| ((c as uint) + 1) as char);
3546 fn map_chars_100_multibytes(bh: &mut BenchHarness) {
3547 let s = "𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑\
3548 𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑\
3549 𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑\
3550 𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑";
3552 s.map_chars(|c| ((c as uint) + 1) as char);