1 // Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
14 * Strings are a packed UTF-8 representation of text, stored as null
15 * terminated buffers of u8 bytes. Strings should be indexed in bytes,
16 * for efficiency, but UTF-8 unsafe operations should be avoided.
24 use container::{Container, Mutable};
26 use iterator::{Iterator, FromIterator, Extendable, IteratorUtil};
27 use iterator::{Filter, AdditiveIterator, Map};
28 use iterator::{Invert, DoubleEndedIterator, DoubleEndedIteratorUtil};
31 use option::{None, Option, Some};
36 use unstable::raw::Repr;
38 use vec::{OwnedVector, OwnedCopyableVector, ImmutableVector, MutableVector};
45 not_utf8: (~str) -> ~str;
49 Section: Creating a string
52 /// Convert a vector of bytes to a new UTF-8 string
56 /// Raises the `not_utf8` condition if invalid UTF-8
57 pub fn from_bytes(vv: &[u8]) -> ~str {
58 use str::not_utf8::cond;
61 let first_bad_byte = *vv.iter().find_(|&b| !is_utf8([*b])).unwrap();
62 cond.raise(fmt!("from_bytes: input is not UTF-8; first bad byte is %u",
63 first_bad_byte as uint))
65 return unsafe { raw::from_bytes(vv) }
69 /// Consumes a vector of bytes to create a new utf-8 string
73 /// Raises the `not_utf8` condition if invalid UTF-8
74 pub fn from_bytes_owned(vv: ~[u8]) -> ~str {
75 use str::not_utf8::cond;
78 let first_bad_byte = *vv.iter().find_(|&b| !is_utf8([*b])).unwrap();
79 cond.raise(fmt!("from_bytes: input is not UTF-8; first bad byte is %u",
80 first_bad_byte as uint))
82 return unsafe { raw::from_bytes_owned(vv) }
86 /// Convert a vector of bytes to a UTF-8 string.
87 /// The vector needs to be one byte longer than the string, and end with a 0 byte.
89 /// Compared to `from_bytes()`, this fn doesn't need to allocate a new owned str.
93 /// Fails if invalid UTF-8
94 /// Fails if not null terminated
95 pub fn from_bytes_with_null<'a>(vv: &'a [u8]) -> &'a str {
96 assert_eq!(vv[vv.len() - 1], 0);
98 return unsafe { raw::from_bytes_with_null(vv) };
101 /// Converts a vector to a string slice without performing any allocations.
103 /// Once the slice has been validated as utf-8, it is transmuted in-place and
104 /// returned as a '&str' instead of a '&[u8]'
108 /// Fails if invalid UTF-8
109 pub fn from_bytes_slice<'a>(vector: &'a [u8]) -> &'a str {
111 assert!(is_utf8(vector));
112 let mut s = vector.repr();
118 impl ToStr for ~str {
120 fn to_str(&self) -> ~str { self.to_owned() }
122 impl<'self> ToStr for &'self str {
124 fn to_str(&self) -> ~str { self.to_owned() }
126 impl ToStr for @str {
128 fn to_str(&self) -> ~str { self.to_owned() }
131 /// Convert a byte to a UTF-8 string
135 /// Fails if invalid UTF-8
136 pub fn from_byte(b: u8) -> ~str {
138 unsafe { cast::transmute(~[b, 0u8]) }
141 /// Convert a char to a string
142 pub fn from_char(ch: char) -> ~str {
148 /// Convert a vector of chars to a string
149 pub fn from_chars(chs: &[char]) -> ~str {
151 buf.reserve(chs.len());
152 for ch in chs.iter() {
159 pub fn push_str(lhs: &mut ~str, rhs: &str) {
163 #[allow(missing_doc)]
164 pub trait StrVector {
165 pub fn concat(&self) -> ~str;
166 pub fn connect(&self, sep: &str) -> ~str;
169 impl<'self, S: Str> StrVector for &'self [S] {
170 /// Concatenate a vector of strings.
171 pub fn concat(&self) -> ~str {
172 if self.is_empty() { return ~""; }
174 let len = self.iter().transform(|s| s.as_slice().len()).sum();
176 let mut s = with_capacity(len);
179 do s.as_mut_buf |buf, _| {
181 for ss in self.iter() {
182 do ss.as_slice().as_imm_buf |ssbuf, sslen| {
183 let sslen = sslen - 1;
184 ptr::copy_memory(buf, ssbuf, sslen);
185 buf = buf.offset(sslen as int);
189 raw::set_len(&mut s, len);
194 /// Concatenate a vector of strings, placing a given separator between each.
195 pub fn connect(&self, sep: &str) -> ~str {
196 if self.is_empty() { return ~""; }
199 if sep.is_empty() { return self.concat(); }
201 // this is wrong without the guarantee that `self` is non-empty
202 let len = sep.len() * (self.len() - 1)
203 + self.iter().transform(|s| s.as_slice().len()).sum();
205 let mut first = true;
210 do s.as_mut_buf |buf, _| {
211 do sep.as_imm_buf |sepbuf, seplen| {
212 let seplen = seplen - 1;
213 let mut buf = cast::transmute_mut_unsafe(buf);
214 for ss in self.iter() {
215 do ss.as_slice().as_imm_buf |ssbuf, sslen| {
216 let sslen = sslen - 1;
220 ptr::copy_memory(buf, sepbuf, seplen);
221 buf = buf.offset(seplen as int);
223 ptr::copy_memory(buf, ssbuf, sslen);
224 buf = buf.offset(sslen as int);
229 raw::set_len(&mut s, len);
235 /// Something that can be used to compare against a character
237 /// Determine if the splitter should split at the given character
238 fn matches(&self, char) -> bool;
239 /// Indicate if this is only concerned about ASCII characters,
240 /// which can allow for a faster implementation.
241 fn only_ascii(&self) -> bool;
244 impl CharEq for char {
246 fn matches(&self, c: char) -> bool { *self == c }
248 fn only_ascii(&self) -> bool { (*self as uint) < 128 }
251 impl<'self> CharEq for &'self fn(char) -> bool {
253 fn matches(&self, c: char) -> bool { (*self)(c) }
255 fn only_ascii(&self) -> bool { false }
258 impl CharEq for extern "Rust" fn(char) -> bool {
260 fn matches(&self, c: char) -> bool { (*self)(c) }
262 fn only_ascii(&self) -> bool { false }
265 impl<'self, C: CharEq> CharEq for &'self [C] {
267 fn matches(&self, c: char) -> bool {
268 self.iter().any(|m| m.matches(c))
271 fn only_ascii(&self) -> bool {
272 self.iter().all(|m| m.only_ascii())
280 /// External iterator for a string's characters and their byte offsets.
281 /// Use with the `std::iterator` module.
283 pub struct CharOffsetIterator<'self> {
284 priv index_front: uint,
285 priv index_back: uint,
286 priv string: &'self str,
289 impl<'self> Iterator<(uint, char)> for CharOffsetIterator<'self> {
291 fn next(&mut self) -> Option<(uint, char)> {
292 if self.index_front < self.index_back {
293 let CharRange {ch, next} = self.string.char_range_at(self.index_front);
294 let index = self.index_front;
295 self.index_front = next;
303 impl<'self> DoubleEndedIterator<(uint, char)> for CharOffsetIterator<'self> {
305 fn next_back(&mut self) -> Option<(uint, char)> {
306 if self.index_front < self.index_back {
307 let CharRange {ch, next} = self.string.char_range_at_reverse(self.index_back);
308 self.index_back = next;
316 /// External iterator for a string's characters and their byte offsets in reverse order.
317 /// Use with the `std::iterator` module.
318 pub type CharOffsetRevIterator<'self> =
319 Invert<CharOffsetIterator<'self>>;
321 /// External iterator for a string's characters.
322 /// Use with the `std::iterator` module.
323 pub type CharIterator<'self> =
324 Map<'self, (uint, char), char, CharOffsetIterator<'self>>;
326 /// External iterator for a string's characters in reverse order.
327 /// Use with the `std::iterator` module.
328 pub type CharRevIterator<'self> =
329 Invert<Map<'self, (uint, char), char, CharOffsetIterator<'self>>>;
331 /// External iterator for a string's bytes.
332 /// Use with the `std::iterator` module.
333 pub type ByteIterator<'self> =
334 Map<'self, &'self u8, u8, vec::VecIterator<'self, u8>>;
336 /// External iterator for a string's bytes in reverse order.
337 /// Use with the `std::iterator` module.
338 pub type ByteRevIterator<'self> =
339 Invert<Map<'self, &'self u8, u8, vec::VecIterator<'self, u8>>>;
341 /// An iterator over the substrings of a string, separated by `sep`.
343 pub struct CharSplitIterator<'self,Sep> {
344 priv string: &'self str,
347 /// The number of splits remaining
349 /// Whether an empty string at the end is allowed
350 priv allow_trailing_empty: bool,
352 priv only_ascii: bool
355 /// An iterator over the words of a string, separated by an sequence of whitespace
356 pub type WordIterator<'self> =
357 Filter<'self, &'self str, CharSplitIterator<'self, extern "Rust" fn(char) -> bool>>;
359 /// An iterator over the lines of a string, separated by either `\n` or (`\r\n`).
360 pub type AnyLineIterator<'self> =
361 Map<'self, &'self str, &'self str, CharSplitIterator<'self, char>>;
363 impl<'self, Sep: CharEq> Iterator<&'self str> for CharSplitIterator<'self, Sep> {
365 fn next(&mut self) -> Option<&'self str> {
366 if self.finished { return None }
368 let l = self.string.len();
369 let start = self.position;
372 // this gives a *huge* speed up for splitting on ASCII
373 // characters (e.g. '\n' or ' ')
374 while self.position < l && self.count > 0 {
375 let byte = self.string[self.position];
377 if self.sep.matches(byte as char) {
378 let slice = unsafe { raw::slice_bytes(self.string, start, self.position) };
386 while self.position < l && self.count > 0 {
387 let CharRange {ch, next} = self.string.char_range_at(self.position);
389 if self.sep.matches(ch) {
390 let slice = unsafe { raw::slice_bytes(self.string, start, self.position) };
391 self.position = next;
395 self.position = next;
398 self.finished = true;
399 if self.allow_trailing_empty || start < l {
400 Some(unsafe { raw::slice_bytes(self.string, start, l) })
407 /// An iterator over the start and end indicies of the matches of a
408 /// substring within a larger string
410 pub struct MatchesIndexIterator<'self> {
411 priv haystack: &'self str,
412 priv needle: &'self str,
416 /// An iterator over the substrings of a string separated by a given
419 pub struct StrSplitIterator<'self> {
420 priv it: MatchesIndexIterator<'self>,
425 impl<'self> Iterator<(uint, uint)> for MatchesIndexIterator<'self> {
427 fn next(&mut self) -> Option<(uint, uint)> {
428 // See Issue #1932 for why this is a naive search
429 let (h_len, n_len) = (self.haystack.len(), self.needle.len());
430 let mut match_start = 0;
433 while self.position < h_len {
434 if self.haystack[self.position] == self.needle[match_i] {
435 if match_i == 0 { match_start = self.position; }
439 if match_i == n_len {
441 return Some((match_start, self.position));
444 // failed match, backtrack
447 self.position = match_start;
456 impl<'self> Iterator<&'self str> for StrSplitIterator<'self> {
458 fn next(&mut self) -> Option<&'self str> {
459 if self.finished { return None; }
461 match self.it.next() {
462 Some((from, to)) => {
463 let ret = Some(self.it.haystack.slice(self.last_end, from));
468 self.finished = true;
469 Some(self.it.haystack.slice(self.last_end, self.it.haystack.len()))
475 /// Replace all occurrences of one string with another
479 /// * s - The string containing substrings to replace
480 /// * from - The string to replace
481 /// * to - The replacement string
485 /// The original string with all occurances of `from` replaced with `to`
486 pub fn replace(s: &str, from: &str, to: &str) -> ~str {
487 let mut result = ~"";
488 let mut last_end = 0;
489 for (start, end) in s.matches_index_iter(from) {
490 result.push_str(unsafe{raw::slice_bytes(s, last_end, start)});
494 result.push_str(unsafe{raw::slice_bytes(s, last_end, s.len())});
499 Section: Comparing strings
502 /// Bytewise slice equality
506 pub fn eq_slice(a: &str, b: &str) -> bool {
507 do a.as_imm_buf |ap, alen| {
508 do b.as_imm_buf |bp, blen| {
509 if (alen != blen) { false }
512 libc::memcmp(ap as *libc::c_void,
514 (alen - 1) as libc::size_t) == 0
523 pub fn eq_slice(a: &str, b: &str) -> bool {
524 do a.as_imm_buf |ap, alen| {
525 do b.as_imm_buf |bp, blen| {
526 if (alen != blen) { false }
529 libc::memcmp(ap as *libc::c_void,
531 (alen - 1) as libc::size_t) == 0
538 /// Bytewise string equality
540 #[lang="uniq_str_eq"]
542 pub fn eq(a: &~str, b: &~str) -> bool {
548 pub fn eq(a: &~str, b: &~str) -> bool {
556 // Utility used by various searching functions
557 fn match_at<'a,'b>(haystack: &'a str, needle: &'b str, at: uint) -> bool {
559 for c in needle.byte_iter() { if haystack[i] != c { return false; } i += 1u; }
567 /// Determines if a vector of bytes contains valid UTF-8
568 pub fn is_utf8(v: &[u8]) -> bool {
571 fn unsafe_get(xs: &[u8], i: uint) -> u8 {
572 unsafe { *xs.unsafe_ref(i) }
575 let v_i = unsafe_get(v, i);
579 let w = utf8_char_width(v_i);
580 if w == 0u { return false; }
583 if nexti > total { return false; }
585 // 2-byte encoding is for codepoints \u0080 to \u07ff
586 // first C2 80 last DF BF
587 // 3-byte encoding is for codepoints \u0800 to \uffff
588 // first E0 A0 80 last EF BF BF
589 // 4-byte encoding is for codepoints \u10000 to \u10ffff
590 // first F0 90 80 80 last F4 8F BF BF
592 // Use the UTF-8 syntax from the RFC
594 // https://tools.ietf.org/html/rfc3629
596 // UTF8-2 = %xC2-DF UTF8-tail
597 // UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
598 // %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
599 // UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
600 // %xF4 %x80-8F 2( UTF8-tail )
601 // UTF8-tail = %x80-BF
603 // This code allows surrogate pairs: \uD800 to \uDFFF -> ED A0 80 to ED BF BF
605 2 => if unsafe_get(v, i + 1) & 192u8 != TAG_CONT_U8 {
609 unsafe_get(v, i + 1),
610 unsafe_get(v, i + 2) & 192u8) {
611 (0xE0 , 0xA0 .. 0xBF, TAG_CONT_U8) => (),
612 (0xE1 .. 0xEF, 0x80 .. 0xBF, TAG_CONT_U8) => (),
616 unsafe_get(v, i + 1),
617 unsafe_get(v, i + 2) & 192u8,
618 unsafe_get(v, i + 3) & 192u8) {
619 (0xF0 , 0x90 .. 0xBF, TAG_CONT_U8, TAG_CONT_U8) => (),
620 (0xF1 .. 0xF3, 0x80 .. 0xBF, TAG_CONT_U8, TAG_CONT_U8) => (),
621 (0xF4 , 0x80 .. 0x8F, TAG_CONT_U8, TAG_CONT_U8) => (),
632 /// Determines if a vector of `u16` contains valid UTF-16
633 pub fn is_utf16(v: &[u16]) -> bool {
639 if u <= 0xD7FF_u16 || u >= 0xE000_u16 {
643 if i+1u < len { return false; }
645 if u < 0xD7FF_u16 || u > 0xDBFF_u16 { return false; }
646 if u2 < 0xDC00_u16 || u2 > 0xDFFF_u16 { return false; }
653 /// Iterates over the utf-16 characters in the specified slice, yielding each
654 /// decoded unicode character to the function provided.
658 /// * Fails on invalid utf-16 data
659 pub fn utf16_chars(v: &[u16], f: &fn(char)) {
662 while (i < len && v[i] != 0u16) {
665 if u <= 0xD7FF_u16 || u >= 0xE000_u16 {
671 assert!(u >= 0xD800_u16 && u <= 0xDBFF_u16);
672 assert!(u2 >= 0xDC00_u16 && u2 <= 0xDFFF_u16);
673 let mut c = (u - 0xD800_u16) as char;
675 c |= (u2 - 0xDC00_u16) as char;
676 c |= 0x1_0000_u32 as char;
683 /// Allocates a new string from the utf-16 slice provided
684 pub fn from_utf16(v: &[u16]) -> ~str {
686 buf.reserve(v.len());
687 utf16_chars(v, |ch| buf.push_char(ch));
691 /// Allocates a new string with the specified capacity. The string returned is
692 /// the empty string, but has capacity for much more.
694 pub fn with_capacity(capacity: uint) -> ~str {
696 buf.reserve(capacity);
700 /// As char_len but for a slice of a string
704 /// * s - A valid string
705 /// * start - The position inside `s` where to start counting in bytes
706 /// * end - The position where to stop counting
710 /// The number of Unicode characters in `s` between the given indices.
711 pub fn count_chars(s: &str, start: uint, end: uint) -> uint {
712 assert!(s.is_char_boundary(start));
713 assert!(s.is_char_boundary(end));
717 let next = s.char_range_at(i).next;
724 /// Counts the number of bytes taken by the first `n` chars in `s`
725 /// starting from `start`.
726 pub fn count_bytes<'b>(s: &'b str, start: uint, n: uint) -> uint {
727 assert!(s.is_char_boundary(start));
733 let next = s.char_range_at(end).next;
740 // https://tools.ietf.org/html/rfc3629
741 static UTF8_CHAR_WIDTH: [u8, ..256] = [
742 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
743 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x1F
744 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
745 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x3F
746 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
747 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x5F
748 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
749 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x7F
750 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
751 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x9F
752 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
753 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xBF
754 0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
755 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xDF
756 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, // 0xEF
757 4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // 0xFF
760 /// Given a first byte, determine how many bytes are in this UTF-8 character
761 pub fn utf8_char_width(b: u8) -> uint {
762 return UTF8_CHAR_WIDTH[b] as uint;
765 #[allow(missing_doc)]
766 pub struct CharRange {
771 // Return the initial codepoint accumulator for the first byte.
772 // The first byte is special, only want bottom 5 bits for width 2, 4 bits
773 // for width 3, and 3 bits for width 4
774 macro_rules! utf8_first_byte(
775 ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as uint)
778 // return the value of $ch updated with continuation byte $byte
779 macro_rules! utf8_acc_cont_byte(
780 ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as uint)
783 // UTF-8 tags and ranges
784 static TAG_CONT_U8: u8 = 128u8;
785 static TAG_CONT: uint = 128u;
786 static MAX_ONE_B: uint = 128u;
787 static TAG_TWO_B: uint = 192u;
788 static MAX_TWO_B: uint = 2048u;
789 static TAG_THREE_B: uint = 224u;
790 static MAX_THREE_B: uint = 65536u;
791 static TAG_FOUR_B: uint = 240u;
792 static MAX_UNICODE: uint = 1114112u;
794 /// Unsafe operations
802 use vec::MutableVector;
803 use unstable::raw::{Slice, String};
805 /// Create a Rust string from a null-terminated *u8 buffer
806 pub unsafe fn from_buf(buf: *u8) -> ~str {
811 curr = ptr::offset(buf, i as int);
813 return from_buf_len(buf, i);
816 /// Create a Rust string from a *u8 buffer of the given length
817 pub unsafe fn from_buf_len(buf: *u8, len: uint) -> ~str {
818 let mut v: ~[u8] = vec::with_capacity(len + 1);
819 v.as_mut_buf(|vbuf, _len| {
820 ptr::copy_memory(vbuf, buf as *u8, len)
822 vec::raw::set_len(&mut v, len);
826 return cast::transmute(v);
829 /// Create a Rust string from a null-terminated C string
830 pub unsafe fn from_c_str(c_str: *libc::c_char) -> ~str {
831 from_buf(c_str as *u8)
834 /// Create a Rust string from a `*c_char` buffer of the given length
835 pub unsafe fn from_c_str_len(c_str: *libc::c_char, len: uint) -> ~str {
836 from_buf_len(c_str as *u8, len)
839 /// Converts a vector of bytes to a new owned string.
840 pub unsafe fn from_bytes(v: &[u8]) -> ~str {
841 do v.as_imm_buf |buf, len| {
842 from_buf_len(buf, len)
846 /// Converts an owned vector of bytes to a new owned string. This assumes
847 /// that the utf-8-ness of the vector has already been validated
848 pub unsafe fn from_bytes_owned(mut v: ~[u8]) -> ~str {
853 /// Converts a vector of bytes to a string.
854 /// The byte slice needs to contain valid utf8 and needs to be one byte longer than
855 /// the string, if possible ending in a 0 byte.
856 pub unsafe fn from_bytes_with_null<'a>(v: &'a [u8]) -> &'a str {
860 /// Converts a byte to a string.
861 pub unsafe fn from_byte(u: u8) -> ~str { from_bytes([u]) }
863 /// Form a slice from a C string. Unsafe because the caller must ensure the
864 /// C string has the static lifetime, or else the return value may be
865 /// invalidated later.
866 pub unsafe fn c_str_to_static_slice(s: *libc::c_char) -> &'static str {
872 curr = ptr::offset(s, len as int);
874 let v = Slice { data: s, len: len + 1 };
875 assert!(is_utf8(cast::transmute(v)));
879 /// Takes a bytewise (not UTF-8) slice from a string.
881 /// Returns the substring from [`begin`..`end`).
885 /// If begin is greater than end.
886 /// If end is greater than the length of the string.
888 pub unsafe fn slice_bytes<'a>(s: &'a str, begin: uint, end: uint) -> &'a str {
889 do s.as_imm_buf |sbuf, n| {
890 assert!((begin <= end));
893 cast::transmute(Slice {
894 data: ptr::offset(sbuf, begin as int),
895 len: end - begin + 1,
900 /// Appends a byte to a string. (Not UTF-8 safe).
901 pub unsafe fn push_byte(s: &mut ~str, b: u8) {
902 let new_len = s.len() + 1;
903 s.reserve_at_least(new_len);
904 do s.as_mut_buf |buf, len| {
905 *ptr::mut_offset(buf, (len-1) as int) = b;
907 set_len(&mut *s, new_len);
910 /// Appends a vector of bytes to a string. (Not UTF-8 safe).
911 unsafe fn push_bytes(s: &mut ~str, bytes: &[u8]) {
912 let new_len = s.len() + bytes.len();
913 s.reserve_at_least(new_len);
914 for byte in bytes.iter() { push_byte(&mut *s, *byte); }
917 /// Removes the last byte from a string and returns it. (Not UTF-8 safe).
918 pub unsafe fn pop_byte(s: &mut ~str) -> u8 {
922 set_len(s, len - 1u);
926 /// Removes the first byte from a string and returns it. (Not UTF-8 safe).
927 pub unsafe fn shift_byte(s: &mut ~str) -> u8 {
931 *s = s.slice(1, len).to_owned();
935 /// Sets the length of the string and adds the null terminator
937 pub unsafe fn set_len(v: &mut ~str, new_len: uint) {
938 let v: **mut String = cast::transmute(v);
940 (*repr).fill = new_len + 1u;
941 let null = ptr::mut_offset(&mut ((*repr).data), new_len as int);
946 fn test_from_buf_len() {
948 let a = ~[65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8];
949 let b = vec::raw::to_ptr(a);
950 let c = from_buf_len(b, 3u);
951 assert_eq!(c, ~"AAA");
958 Section: Trait implementations
964 use cmp::{TotalOrd, Ordering, Less, Equal, Greater, Eq, Ord, Equiv, TotalEq};
965 use super::{Str, eq_slice};
966 use option::{Some, None};
968 impl<'self> Add<&'self str,~str> for &'self str {
970 fn add(&self, rhs: & &'self str) -> ~str {
971 let mut ret = self.to_owned();
977 impl<'self> TotalOrd for &'self str {
979 fn cmp(&self, other: & &'self str) -> Ordering {
980 for (s_b, o_b) in self.byte_iter().zip(other.byte_iter()) {
981 match s_b.cmp(&o_b) {
982 Greater => return Greater,
988 self.len().cmp(&other.len())
992 impl TotalOrd for ~str {
994 fn cmp(&self, other: &~str) -> Ordering { self.as_slice().cmp(&other.as_slice()) }
997 impl TotalOrd for @str {
999 fn cmp(&self, other: &@str) -> Ordering { self.as_slice().cmp(&other.as_slice()) }
1002 impl<'self> Eq for &'self str {
1004 fn eq(&self, other: & &'self str) -> bool {
1005 eq_slice((*self), (*other))
1008 fn ne(&self, other: & &'self str) -> bool { !(*self).eq(other) }
1013 fn eq(&self, other: &~str) -> bool {
1014 eq_slice((*self), (*other))
1017 fn ne(&self, other: &~str) -> bool { !(*self).eq(other) }
1022 fn eq(&self, other: &@str) -> bool {
1023 eq_slice((*self), (*other))
1026 fn ne(&self, other: &@str) -> bool { !(*self).eq(other) }
1029 impl<'self> TotalEq for &'self str {
1031 fn equals(&self, other: & &'self str) -> bool {
1032 eq_slice((*self), (*other))
1036 impl TotalEq for ~str {
1038 fn equals(&self, other: &~str) -> bool {
1039 eq_slice((*self), (*other))
1043 impl TotalEq for @str {
1045 fn equals(&self, other: &@str) -> bool {
1046 eq_slice((*self), (*other))
1050 impl<'self> Ord for &'self str {
1052 fn lt(&self, other: & &'self str) -> bool { self.cmp(other) == Less }
1054 fn le(&self, other: & &'self str) -> bool { self.cmp(other) != Greater }
1056 fn ge(&self, other: & &'self str) -> bool { self.cmp(other) != Less }
1058 fn gt(&self, other: & &'self str) -> bool { self.cmp(other) == Greater }
1063 fn lt(&self, other: &~str) -> bool { self.cmp(other) == Less }
1065 fn le(&self, other: &~str) -> bool { self.cmp(other) != Greater }
1067 fn ge(&self, other: &~str) -> bool { self.cmp(other) != Less }
1069 fn gt(&self, other: &~str) -> bool { self.cmp(other) == Greater }
1074 fn lt(&self, other: &@str) -> bool { self.cmp(other) == Less }
1076 fn le(&self, other: &@str) -> bool { self.cmp(other) != Greater }
1078 fn ge(&self, other: &@str) -> bool { self.cmp(other) != Less }
1080 fn gt(&self, other: &@str) -> bool { self.cmp(other) == Greater }
1083 impl<'self, S: Str> Equiv<S> for &'self str {
1085 fn equiv(&self, other: &S) -> bool { eq_slice(*self, other.as_slice()) }
1088 impl<'self, S: Str> Equiv<S> for @str {
1090 fn equiv(&self, other: &S) -> bool { eq_slice(*self, other.as_slice()) }
1093 impl<'self, S: Str> Equiv<S> for ~str {
1095 fn equiv(&self, other: &S) -> bool { eq_slice(*self, other.as_slice()) }
1102 /// Any string that can be represented as a slice
1104 /// Work with `self` as a slice.
1105 fn as_slice<'a>(&'a self) -> &'a str;
1107 /// Convert `self` into a ~str.
1108 fn into_owned(self) -> ~str;
1111 impl<'self> Str for &'self str {
1113 fn as_slice<'a>(&'a self) -> &'a str { *self }
1116 fn into_owned(self) -> ~str { self.to_owned() }
1119 impl<'self> Str for ~str {
1121 fn as_slice<'a>(&'a self) -> &'a str {
1122 let s: &'a str = *self; s
1126 fn into_owned(self) -> ~str { self }
1129 impl<'self> Str for @str {
1131 fn as_slice<'a>(&'a self) -> &'a str {
1132 let s: &'a str = *self; s
1136 fn into_owned(self) -> ~str { self.to_owned() }
1139 impl<'self> Container for &'self str {
1141 fn len(&self) -> uint {
1142 do self.as_imm_buf |_p, n| { n - 1u }
1146 impl Container for ~str {
1148 fn len(&self) -> uint { self.as_slice().len() }
1151 impl Container for @str {
1153 fn len(&self) -> uint { self.as_slice().len() }
1156 impl Mutable for ~str {
1157 /// Remove all content, make the string empty
1159 fn clear(&mut self) {
1161 raw::set_len(self, 0)
1166 #[allow(missing_doc)]
1167 pub trait StrSlice<'self> {
1168 fn contains<'a>(&self, needle: &'a str) -> bool;
1169 fn contains_char(&self, needle: char) -> bool;
1170 fn iter(&self) -> CharIterator<'self>;
1171 fn rev_iter(&self) -> CharRevIterator<'self>;
1172 fn byte_iter(&self) -> ByteIterator<'self>;
1173 fn byte_rev_iter(&self) -> ByteRevIterator<'self>;
1174 fn char_offset_iter(&self) -> CharOffsetIterator<'self>;
1175 fn char_offset_rev_iter(&self) -> CharOffsetRevIterator<'self>;
1176 fn split_iter<Sep: CharEq>(&self, sep: Sep) -> CharSplitIterator<'self, Sep>;
1177 fn splitn_iter<Sep: CharEq>(&self, sep: Sep, count: uint) -> CharSplitIterator<'self, Sep>;
1178 fn split_options_iter<Sep: CharEq>(&self, sep: Sep, count: uint, allow_trailing_empty: bool)
1179 -> CharSplitIterator<'self, Sep>;
1180 fn matches_index_iter(&self, sep: &'self str) -> MatchesIndexIterator<'self>;
1181 fn split_str_iter(&self, &'self str) -> StrSplitIterator<'self>;
1182 fn line_iter(&self) -> CharSplitIterator<'self, char>;
1183 fn any_line_iter(&self) -> AnyLineIterator<'self>;
1184 fn word_iter(&self) -> WordIterator<'self>;
1185 fn ends_with(&self, needle: &str) -> bool;
1186 fn is_whitespace(&self) -> bool;
1187 fn is_alphanumeric(&self) -> bool;
1188 fn char_len(&self) -> uint;
1190 fn slice(&self, begin: uint, end: uint) -> &'self str;
1191 fn slice_from(&self, begin: uint) -> &'self str;
1192 fn slice_to(&self, end: uint) -> &'self str;
1194 fn slice_chars(&self, begin: uint, end: uint) -> &'self str;
1196 fn starts_with(&self, needle: &str) -> bool;
1197 fn escape_default(&self) -> ~str;
1198 fn escape_unicode(&self) -> ~str;
1199 fn trim(&self) -> &'self str;
1200 fn trim_left(&self) -> &'self str;
1201 fn trim_right(&self) -> &'self str;
1202 fn trim_chars<C: CharEq>(&self, to_trim: &C) -> &'self str;
1203 fn trim_left_chars<C: CharEq>(&self, to_trim: &C) -> &'self str;
1204 fn trim_right_chars<C: CharEq>(&self, to_trim: &C) -> &'self str;
1205 fn replace(&self, from: &str, to: &str) -> ~str;
1206 fn to_owned(&self) -> ~str;
1207 fn to_managed(&self) -> @str;
1208 fn to_utf16(&self) -> ~[u16];
1209 fn is_char_boundary(&self, index: uint) -> bool;
1210 fn char_range_at(&self, start: uint) -> CharRange;
1211 fn char_at(&self, i: uint) -> char;
1212 fn char_range_at_reverse(&self, start: uint) -> CharRange;
1213 fn char_at_reverse(&self, i: uint) -> char;
1214 fn as_bytes(&self) -> &'self [u8];
1216 fn find<C: CharEq>(&self, search: C) -> Option<uint>;
1217 fn rfind<C: CharEq>(&self, search: C) -> Option<uint>;
1218 fn find_str(&self, &str) -> Option<uint>;
1220 fn repeat(&self, nn: uint) -> ~str;
1222 fn slice_shift_char(&self) -> (char, &'self str);
1224 fn map_chars(&self, ff: &fn(char) -> char) -> ~str;
1226 fn lev_distance(&self, t: &str) -> uint;
1228 fn subslice_offset(&self, inner: &str) -> uint;
1230 fn as_imm_buf<T>(&self, f: &fn(*u8, uint) -> T) -> T;
1231 fn as_c_str<T>(&self, f: &fn(*libc::c_char) -> T) -> T;
1234 /// Extension methods for strings
1235 impl<'self> StrSlice<'self> for &'self str {
1236 /// Returns true if one string contains another
1240 /// * needle - The string to look for
1242 fn contains<'a>(&self, needle: &'a str) -> bool {
1243 self.find_str(needle).is_some()
1246 /// Returns true if a string contains a char.
1250 /// * needle - The char to look for
1252 fn contains_char(&self, needle: char) -> bool {
1253 self.find(needle).is_some()
1256 /// An iterator over the characters of `self`. Note, this iterates
1257 /// over unicode code-points, not unicode graphemes.
1262 /// let v: ~[char] = "abc åäö".iter().collect();
1263 /// assert_eq!(v, ~['a', 'b', 'c', ' ', 'å', 'ä', 'ö']);
1266 fn iter(&self) -> CharIterator<'self> {
1267 self.char_offset_iter().transform(|(_, c)| c)
1270 /// An iterator over the characters of `self`, in reverse order.
1272 fn rev_iter(&self) -> CharRevIterator<'self> {
1273 self.iter().invert()
1276 /// An iterator over the bytes of `self`
1278 fn byte_iter(&self) -> ByteIterator<'self> {
1279 self.as_bytes().iter().transform(|&b| b)
1282 /// An iterator over the bytes of `self`, in reverse order
1284 fn byte_rev_iter(&self) -> ByteRevIterator<'self> {
1285 self.byte_iter().invert()
1288 /// An iterator over the characters of `self` and their byte offsets.
1290 fn char_offset_iter(&self) -> CharOffsetIterator<'self> {
1291 CharOffsetIterator {
1293 index_back: self.len(),
1298 /// An iterator over the characters of `self` and their byte offsets.
1300 fn char_offset_rev_iter(&self) -> CharOffsetRevIterator<'self> {
1301 self.char_offset_iter().invert()
1304 /// An iterator over substrings of `self`, separated by characters
1305 /// matched by `sep`.
1310 /// let v: ~[&str] = "Mary had a little lamb".split_iter(' ').collect();
1311 /// assert_eq!(v, ~["Mary", "had", "a", "little", "lamb"]);
1313 /// let v: ~[&str] = "abc1def2ghi".split_iter(|c: char| c.is_digit()).collect();
1314 /// assert_eq!(v, ~["abc", "def", "ghi"]);
1317 fn split_iter<Sep: CharEq>(&self, sep: Sep) -> CharSplitIterator<'self, Sep> {
1318 self.split_options_iter(sep, self.len(), true)
1321 /// An iterator over substrings of `self`, separated by characters
1322 /// matched by `sep`, restricted to splitting at most `count`
1325 fn splitn_iter<Sep: CharEq>(&self, sep: Sep, count: uint) -> CharSplitIterator<'self, Sep> {
1326 self.split_options_iter(sep, count, true)
1329 /// An iterator over substrings of `self`, separated by characters
1330 /// matched by `sep`, splitting at most `count` times, and
1331 /// possibly not including the trailing empty substring, if it
1334 fn split_options_iter<Sep: CharEq>(&self, sep: Sep, count: uint, allow_trailing_empty: bool)
1335 -> CharSplitIterator<'self, Sep> {
1336 let only_ascii = sep.only_ascii();
1342 allow_trailing_empty: allow_trailing_empty,
1344 only_ascii: only_ascii
1348 /// An iterator over the start and end indices of each match of
1349 /// `sep` within `self`.
1351 fn matches_index_iter(&self, sep: &'self str) -> MatchesIndexIterator<'self> {
1352 assert!(!sep.is_empty())
1353 MatchesIndexIterator {
1360 /// An iterator over the substrings of `self` separated by `sep`.
1365 /// let v: ~[&str] = "abcXXXabcYYYabc".split_str_iter("abc").collect()
1366 /// assert_eq!(v, ["", "XXX", "YYY", ""]);
1369 fn split_str_iter(&self, sep: &'self str) -> StrSplitIterator<'self> {
1371 it: self.matches_index_iter(sep),
1377 /// An iterator over the lines of a string (subsequences separated
1380 fn line_iter(&self) -> CharSplitIterator<'self, char> {
1381 self.split_options_iter('\n', self.len(), false)
1384 /// An iterator over the lines of a string, separated by either
1385 /// `\n` or (`\r\n`).
1386 fn any_line_iter(&self) -> AnyLineIterator<'self> {
1387 do self.line_iter().transform |line| {
1389 if l > 0 && line[l - 1] == '\r' as u8 { line.slice(0, l - 1) }
1394 /// An iterator over the words of a string (subsequences separated
1395 /// by any sequence of whitespace).
1397 fn word_iter(&self) -> WordIterator<'self> {
1398 self.split_iter(char::is_whitespace).filter(|s| !s.is_empty())
1401 /// Returns true if the string contains only whitespace
1403 /// Whitespace characters are determined by `char::is_whitespace`
1405 fn is_whitespace(&self) -> bool { self.iter().all(char::is_whitespace) }
1407 /// Returns true if the string contains only alphanumerics
1409 /// Alphanumeric characters are determined by `char::is_alphanumeric`
1411 fn is_alphanumeric(&self) -> bool { self.iter().all(char::is_alphanumeric) }
1413 /// Returns the number of characters that a string holds
1415 fn char_len(&self) -> uint { self.iter().len_() }
1417 /// Returns a slice of the given string from the byte range
1418 /// [`begin`..`end`)
1420 /// Fails when `begin` and `end` do not point to valid characters or
1421 /// beyond the last character of the string
1423 fn slice(&self, begin: uint, end: uint) -> &'self str {
1424 assert!(self.is_char_boundary(begin));
1425 assert!(self.is_char_boundary(end));
1426 unsafe { raw::slice_bytes(*self, begin, end) }
1429 /// Returns a slice of the string from `begin` to its end.
1431 /// Fails when `begin` does not point to a valid character, or is
1434 fn slice_from(&self, begin: uint) -> &'self str {
1435 self.slice(begin, self.len())
1438 /// Returns a slice of the string from the beginning to byte
1441 /// Fails when `end` does not point to a valid character, or is
1444 fn slice_to(&self, end: uint) -> &'self str {
1448 /// Returns a slice of the string from the char range
1449 /// [`begin`..`end`).
1451 /// Fails if `begin` > `end` or the either `begin` or `end` are
1452 /// beyond the last character of the string.
1453 fn slice_chars(&self, begin: uint, end: uint) -> &'self str {
1454 assert!(begin <= end);
1455 // not sure how to use the iterators for this nicely.
1456 let mut position = 0;
1459 while count < begin && position < l {
1460 position = self.char_range_at(position).next;
1463 if count < begin { fail!("Attempted to begin slice_chars beyond end of string") }
1464 let start_byte = position;
1465 while count < end && position < l {
1466 position = self.char_range_at(position).next;
1469 if count < end { fail!("Attempted to end slice_chars beyond end of string") }
1471 self.slice(start_byte, position)
1474 /// Returns true if `needle` is a prefix of the string.
1475 fn starts_with<'a>(&self, needle: &'a str) -> bool {
1476 let (self_len, needle_len) = (self.len(), needle.len());
1477 if needle_len == 0u { true }
1478 else if needle_len > self_len { false }
1479 else { match_at(*self, needle, 0u) }
1482 /// Returns true if `needle` is a suffix of the string.
1483 fn ends_with(&self, needle: &str) -> bool {
1484 let (self_len, needle_len) = (self.len(), needle.len());
1485 if needle_len == 0u { true }
1486 else if needle_len > self_len { false }
1487 else { match_at(*self, needle, self_len - needle_len) }
1490 /// Escape each char in `s` with char::escape_default.
1491 fn escape_default(&self) -> ~str {
1492 let mut out: ~str = ~"";
1493 out.reserve_at_least(self.len());
1494 for c in self.iter() {
1495 do c.escape_default |c| {
1502 /// Escape each char in `s` with char::escape_unicode.
1503 fn escape_unicode(&self) -> ~str {
1504 let mut out: ~str = ~"";
1505 out.reserve_at_least(self.len());
1506 for c in self.iter() {
1507 do c.escape_unicode |c| {
1514 /// Returns a string with leading and trailing whitespace removed
1516 fn trim(&self) -> &'self str {
1517 self.trim_left().trim_right()
1520 /// Returns a string with leading whitespace removed
1522 fn trim_left(&self) -> &'self str {
1523 self.trim_left_chars(&char::is_whitespace)
1526 /// Returns a string with trailing whitespace removed
1528 fn trim_right(&self) -> &'self str {
1529 self.trim_right_chars(&char::is_whitespace)
1532 /// Returns a string with characters that match `to_trim` removed.
1536 /// * to_trim - a character matcher
1541 /// assert_eq!("11foo1bar11".trim_chars(&'1'), "foo1bar")
1542 /// assert_eq!("12foo1bar12".trim_chars(& &['1', '2']), "foo1bar")
1543 /// assert_eq!("123foo1bar123".trim_chars(&|c: char| c.is_digit()), "foo1bar")
1546 fn trim_chars<C: CharEq>(&self, to_trim: &C) -> &'self str {
1547 self.trim_left_chars(to_trim).trim_right_chars(to_trim)
1550 /// Returns a string with leading `chars_to_trim` removed.
1554 /// * to_trim - a character matcher
1559 /// assert_eq!("11foo1bar11".trim_left_chars(&'1'), "foo1bar11")
1560 /// assert_eq!("12foo1bar12".trim_left_chars(& &['1', '2']), "foo1bar12")
1561 /// assert_eq!("123foo1bar123".trim_left_chars(&|c: char| c.is_digit()), "foo1bar123")
1564 fn trim_left_chars<C: CharEq>(&self, to_trim: &C) -> &'self str {
1565 match self.find(|c: char| !to_trim.matches(c)) {
1567 Some(first) => unsafe { raw::slice_bytes(*self, first, self.len()) }
1571 /// Returns a string with trailing `chars_to_trim` removed.
1575 /// * to_trim - a character matcher
1580 /// assert_eq!("11foo1bar11".trim_right_chars(&'1'), "11foo1bar")
1581 /// assert_eq!("12foo1bar12".trim_right_chars(& &['1', '2']), "12foo1bar")
1582 /// assert_eq!("123foo1bar123".trim_right_chars(&|c: char| c.is_digit()), "123foo1bar")
1585 fn trim_right_chars<C: CharEq>(&self, to_trim: &C) -> &'self str {
1586 match self.rfind(|c: char| !to_trim.matches(c)) {
1589 let next = self.char_range_at(last).next;
1590 unsafe { raw::slice_bytes(*self, 0u, next) }
1595 /// Replace all occurrences of one string with another
1599 /// * from - The string to replace
1600 /// * to - The replacement string
1604 /// The original string with all occurances of `from` replaced with `to`
1605 pub fn replace(&self, from: &str, to: &str) -> ~str {
1606 let mut result = ~"";
1607 let mut last_end = 0;
1608 for (start, end) in self.matches_index_iter(from) {
1609 result.push_str(unsafe{raw::slice_bytes(*self, last_end, start)});
1610 result.push_str(to);
1613 result.push_str(unsafe{raw::slice_bytes(*self, last_end, self.len())});
1617 /// Copy a slice into a new unique str
1619 fn to_owned(&self) -> ~str {
1620 do self.as_imm_buf |src, len| {
1623 let mut v = vec::with_capacity(len);
1625 do v.as_mut_buf |dst, _| {
1626 ptr::copy_memory(dst, src, len - 1);
1628 vec::raw::set_len(&mut v, len - 1);
1630 ::cast::transmute(v)
1636 fn to_managed(&self) -> @str {
1637 let v = at_vec::from_fn(self.len() + 1, |i| {
1638 if i == self.len() { 0 } else { self[i] }
1640 unsafe { cast::transmute(v) }
1643 /// Converts to a vector of `u16` encoded as UTF-16.
1644 fn to_utf16(&self) -> ~[u16] {
1646 for ch in self.iter() {
1647 // Arithmetic with u32 literals is easier on the eyes than chars.
1648 let mut ch = ch as u32;
1650 if (ch & 0xFFFF_u32) == ch {
1651 // The BMP falls through (assuming non-surrogate, as it
1653 assert!(ch <= 0xD7FF_u32 || ch >= 0xE000_u32);
1656 // Supplementary planes break into surrogates.
1657 assert!(ch >= 0x1_0000_u32 && ch <= 0x10_FFFF_u32);
1659 let w1 = 0xD800_u16 | ((ch >> 10) as u16);
1660 let w2 = 0xDC00_u16 | ((ch as u16) & 0x3FF_u16);
1661 u.push_all([w1, w2])
1667 /// Returns false if the index points into the middle of a multi-byte
1668 /// character sequence.
1669 fn is_char_boundary(&self, index: uint) -> bool {
1670 if index == self.len() { return true; }
1671 let b = self[index];
1672 return b < 128u8 || b >= 192u8;
1675 /// Pluck a character out of a string and return the index of the next
1678 /// This function can be used to iterate over the unicode characters of a
1684 /// let s = "中华Việt Nam";
1686 /// while i < s.len() {
1687 /// let CharRange {ch, next} = s.char_range_at(i);
1688 /// printfln!("%u: %c", i, ch);
1693 /// # Example output
1710 /// * s - The string
1711 /// * i - The byte offset of the char to extract
1715 /// A record {ch: char, next: uint} containing the char value and the byte
1716 /// index of the next unicode character.
1720 /// If `i` is greater than or equal to the length of the string.
1721 /// If `i` is not the index of the beginning of a valid UTF-8 character.
1723 fn char_range_at(&self, i: uint) -> CharRange {
1724 if (self[i] < 128u8) {
1725 return CharRange {ch: self[i] as char, next: i + 1 };
1728 // Multibyte case is a fn to allow char_range_at to inline cleanly
1729 fn multibyte_char_range_at(s: &str, i: uint) -> CharRange {
1730 let mut val = s[i] as uint;
1731 let w = UTF8_CHAR_WIDTH[val] as uint;
1734 val = utf8_first_byte!(val, w);
1735 val = utf8_acc_cont_byte!(val, s[i + 1]);
1736 if w > 2 { val = utf8_acc_cont_byte!(val, s[i + 2]); }
1737 if w > 3 { val = utf8_acc_cont_byte!(val, s[i + 3]); }
1739 return CharRange {ch: val as char, next: i + w};
1742 return multibyte_char_range_at(*self, i);
1745 /// Plucks the character starting at the `i`th byte of a string
1747 fn char_at(&self, i: uint) -> char { self.char_range_at(i).ch }
1749 /// Given a byte position and a str, return the previous char and its position.
1751 /// This function can be used to iterate over a unicode string in reverse.
1753 /// Returns 0 for next index if called on start index 0.
1754 fn char_range_at_reverse(&self, start: uint) -> CharRange {
1755 let mut prev = start;
1757 // while there is a previous byte == 10......
1758 while prev > 0u && self[prev - 1u] & 192u8 == TAG_CONT_U8 {
1762 // now refer to the initial byte of previous char
1770 let ch = self.char_at(prev);
1771 return CharRange {ch:ch, next:prev};
1774 /// Plucks the character ending at the `i`th byte of a string
1776 fn char_at_reverse(&self, i: uint) -> char {
1777 self.char_range_at_reverse(i).ch
1780 /// Work with the byte buffer of a string as a byte slice.
1782 /// The byte slice does not include the null terminator.
1783 fn as_bytes(&self) -> &'self [u8] {
1785 let mut slice = self.repr();
1787 cast::transmute(slice)
1791 /// Returns the byte index of the first character of `self` that matches `search`
1795 /// `Some` containing the byte index of the last matching character
1796 /// or `None` if there is no match
1797 fn find<C: CharEq>(&self, search: C) -> Option<uint> {
1798 if search.only_ascii() {
1799 for (i, b) in self.byte_iter().enumerate() {
1800 if search.matches(b as char) { return Some(i) }
1804 for c in self.iter() {
1805 if search.matches(c) { return Some(index); }
1806 index += c.len_utf8_bytes();
1813 /// Returns the byte index of the last character of `self` that matches `search`
1817 /// `Some` containing the byte index of the last matching character
1818 /// or `None` if there is no match
1819 fn rfind<C: CharEq>(&self, search: C) -> Option<uint> {
1820 let mut index = self.len();
1821 if search.only_ascii() {
1822 for b in self.byte_rev_iter() {
1824 if search.matches(b as char) { return Some(index); }
1827 for c in self.rev_iter() {
1828 index -= c.len_utf8_bytes();
1829 if search.matches(c) { return Some(index); }
1836 /// Returns the byte index of the first matching substring
1840 /// * `needle` - The string to search for
1844 /// `Some` containing the byte index of the first matching substring
1845 /// or `None` if there is no match
1846 fn find_str(&self, needle: &str) -> Option<uint> {
1847 if needle.is_empty() {
1850 self.matches_index_iter(needle)
1852 .map_move(|(start, _end)| start)
1856 /// Given a string, make a new string with repeated copies of it.
1857 fn repeat(&self, nn: uint) -> ~str {
1858 do self.as_imm_buf |buf, len| {
1859 // ignore the NULL terminator
1861 let mut ret = with_capacity(nn * len);
1864 do ret.as_mut_buf |rbuf, _len| {
1865 let mut rbuf = rbuf;
1868 ptr::copy_memory(rbuf, buf, len);
1869 rbuf = rbuf.offset(len as int);
1872 raw::set_len(&mut ret, nn * len);
1878 /// Retrieves the first character from a string slice and returns
1879 /// it. This does not allocate a new string; instead, it returns a
1880 /// slice that point one character beyond the character that was
1885 /// If the string does not contain any characters
1887 fn slice_shift_char(&self) -> (char, &'self str) {
1888 let CharRange {ch, next} = self.char_range_at(0u);
1889 let next_s = unsafe { raw::slice_bytes(*self, next, self.len()) };
1890 return (ch, next_s);
1893 /// Apply a function to each character.
1894 fn map_chars(&self, ff: &fn(char) -> char) -> ~str {
1895 let mut result = with_capacity(self.len());
1896 for cc in self.iter() {
1897 result.push_char(ff(cc));
1902 /// Levenshtein Distance between two strings.
1903 fn lev_distance(&self, t: &str) -> uint {
1904 let slen = self.len();
1907 if slen == 0 { return tlen; }
1908 if tlen == 0 { return slen; }
1910 let mut dcol = vec::from_fn(tlen + 1, |x| x);
1912 for (i, sc) in self.iter().enumerate() {
1914 let mut current = i;
1915 dcol[0] = current + 1;
1917 for (j, tc) in t.iter().enumerate() {
1919 let next = dcol[j + 1];
1922 dcol[j + 1] = current;
1924 dcol[j + 1] = ::cmp::min(current, next);
1925 dcol[j + 1] = ::cmp::min(dcol[j + 1], dcol[j]) + 1;
1935 /// Returns the byte offset of an inner slice relative to an enclosing outer slice.
1937 /// Fails if `inner` is not a direct slice contained within self.
1942 /// let string = "a\nb\nc";
1943 /// let mut lines = ~[];
1944 /// for line in string.line_iter() { lines.push(line) }
1946 /// assert!(string.subslice_offset(lines[0]) == 0); // &"a"
1947 /// assert!(string.subslice_offset(lines[1]) == 2); // &"b"
1948 /// assert!(string.subslice_offset(lines[2]) == 4); // &"c"
1951 fn subslice_offset(&self, inner: &str) -> uint {
1952 do self.as_imm_buf |a, a_len| {
1953 do inner.as_imm_buf |b, b_len| {
1959 a_start = cast::transmute(a); a_end = a_len + cast::transmute(a);
1960 b_start = cast::transmute(b); b_end = b_len + cast::transmute(b);
1962 assert!(a_start <= b_start);
1963 assert!(b_end <= a_end);
1969 /// Work with the byte buffer and length of a slice.
1971 /// The given length is one byte longer than the 'official' indexable
1972 /// length of the string. This is to permit probing the byte past the
1973 /// indexable area for a null byte, as is the case in slices pointing
1974 /// to full strings, or suffixes of them.
1976 fn as_imm_buf<T>(&self, f: &fn(*u8, uint) -> T) -> T {
1977 let v: &[u8] = unsafe { cast::transmute(*self) };
1981 /// Work with the byte buffer of a string as a null-terminated C string.
1983 /// Allows for unsafe manipulation of strings, which is useful for foreign
1984 /// interop. This is similar to `str::as_buf`, but guarantees null-termination.
1985 /// If the given slice is not already null-terminated, this function will
1986 /// allocate a temporary, copy the slice, null terminate it, and pass
1992 /// let s = "PATH".as_c_str(|path| libc::getenv(path));
1995 fn as_c_str<T>(&self, f: &fn(*libc::c_char) -> T) -> T {
1996 do self.as_imm_buf |buf, len| {
1997 // NB: len includes the trailing null.
1999 if unsafe { *(ptr::offset(buf, (len - 1) as int)) != 0 } {
2000 self.to_owned().as_c_str(|s| f(s))
2002 f(buf as *libc::c_char)
2008 #[allow(missing_doc)]
2009 pub trait NullTerminatedStr {
2010 fn as_bytes_with_null<'a>(&'a self) -> &'a [u8];
2013 impl NullTerminatedStr for ~str {
2014 /// Work with the byte buffer of a string as a byte slice.
2016 /// The byte slice does include the null terminator.
2018 fn as_bytes_with_null<'a>(&'a self) -> &'a [u8] {
2019 let ptr: &'a ~[u8] = unsafe { cast::transmute(self) };
2020 let slice: &'a [u8] = *ptr;
2025 impl NullTerminatedStr for @str {
2026 /// Work with the byte buffer of a string as a byte slice.
2028 /// The byte slice does include the null terminator.
2030 fn as_bytes_with_null<'a>(&'a self) -> &'a [u8] {
2031 let ptr: &'a @[u8] = unsafe { cast::transmute(self) };
2032 let slice: &'a [u8] = *ptr;
2037 #[allow(missing_doc)]
2038 pub trait OwnedStr {
2039 fn push_str_no_overallocate(&mut self, rhs: &str);
2040 fn push_str(&mut self, rhs: &str);
2041 fn push_char(&mut self, c: char);
2042 fn pop_char(&mut self) -> char;
2043 fn shift_char(&mut self) -> char;
2044 fn unshift_char(&mut self, ch: char);
2045 fn append(self, rhs: &str) -> ~str;
2046 fn reserve(&mut self, n: uint);
2047 fn reserve_at_least(&mut self, n: uint);
2048 fn capacity(&self) -> uint;
2049 fn to_bytes_with_null(self) -> ~[u8];
2051 /// Work with the mutable byte buffer and length of a slice.
2053 /// The given length is one byte longer than the 'official' indexable
2054 /// length of the string. This is to permit probing the byte past the
2055 /// indexable area for a null byte, as is the case in slices pointing
2056 /// to full strings, or suffixes of them.
2058 /// Make sure any mutations to this buffer keep this string valid UTF8.
2059 fn as_mut_buf<T>(&mut self, f: &fn(*mut u8, uint) -> T) -> T;
2062 impl OwnedStr for ~str {
2063 /// Appends a string slice to the back of a string, without overallocating
2065 fn push_str_no_overallocate(&mut self, rhs: &str) {
2067 let llen = self.len();
2068 let rlen = rhs.len();
2069 self.reserve(llen + rlen);
2070 do self.as_imm_buf |lbuf, _llen| {
2071 do rhs.as_imm_buf |rbuf, _rlen| {
2072 let dst = ptr::offset(lbuf, llen as int);
2073 let dst = cast::transmute_mut_unsafe(dst);
2074 ptr::copy_memory(dst, rbuf, rlen);
2077 raw::set_len(self, llen + rlen);
2081 /// Appends a string slice to the back of a string
2083 fn push_str(&mut self, rhs: &str) {
2085 let llen = self.len();
2086 let rlen = rhs.len();
2087 self.reserve_at_least(llen + rlen);
2088 do self.as_imm_buf |lbuf, _llen| {
2089 do rhs.as_imm_buf |rbuf, _rlen| {
2090 let dst = ptr::offset(lbuf, llen as int);
2091 let dst = cast::transmute_mut_unsafe(dst);
2092 ptr::copy_memory(dst, rbuf, rlen);
2095 raw::set_len(self, llen + rlen);
2099 /// Appends a character to the back of a string
2101 fn push_char(&mut self, c: char) {
2102 assert!((c as uint) < MAX_UNICODE); // FIXME: #7609: should be enforced on all `char`
2104 let code = c as uint;
2105 let nb = if code < MAX_ONE_B { 1u }
2106 else if code < MAX_TWO_B { 2u }
2107 else if code < MAX_THREE_B { 3u }
2109 let len = self.len();
2110 let new_len = len + nb;
2111 self.reserve_at_least(new_len);
2112 let off = len as int;
2113 do self.as_mut_buf |buf, _len| {
2116 *ptr::mut_offset(buf, off) = code as u8;
2119 *ptr::mut_offset(buf, off) = (code >> 6u & 31u | TAG_TWO_B) as u8;
2120 *ptr::mut_offset(buf, off + 1) = (code & 63u | TAG_CONT) as u8;
2123 *ptr::mut_offset(buf, off) = (code >> 12u & 15u | TAG_THREE_B) as u8;
2124 *ptr::mut_offset(buf, off + 1) = (code >> 6u & 63u | TAG_CONT) as u8;
2125 *ptr::mut_offset(buf, off + 2) = (code & 63u | TAG_CONT) as u8;
2128 *ptr::mut_offset(buf, off) = (code >> 18u & 7u | TAG_FOUR_B) as u8;
2129 *ptr::mut_offset(buf, off + 1) = (code >> 12u & 63u | TAG_CONT) as u8;
2130 *ptr::mut_offset(buf, off + 2) = (code >> 6u & 63u | TAG_CONT) as u8;
2131 *ptr::mut_offset(buf, off + 3) = (code & 63u | TAG_CONT) as u8;
2136 raw::set_len(self, new_len);
2140 /// Remove the final character from a string and return it
2144 /// If the string does not contain any characters
2145 fn pop_char(&mut self) -> char {
2146 let end = self.len();
2148 let CharRange {ch, next} = self.char_range_at_reverse(end);
2149 unsafe { raw::set_len(self, next); }
2153 /// Remove the first character from a string and return it
2157 /// If the string does not contain any characters
2158 fn shift_char(&mut self) -> char {
2159 let CharRange {ch, next} = self.char_range_at(0u);
2160 *self = self.slice(next, self.len()).to_owned();
2164 /// Prepend a char to a string
2165 fn unshift_char(&mut self, ch: char) {
2166 // This could be more efficient.
2167 let mut new_str = ~"";
2168 new_str.push_char(ch);
2169 new_str.push_str(*self);
2173 /// Concatenate two strings together.
2175 fn append(self, rhs: &str) -> ~str {
2176 let mut new_str = self;
2177 new_str.push_str_no_overallocate(rhs);
2181 /// Reserves capacity for exactly `n` bytes in the given string, not including
2182 /// the null terminator.
2184 /// Assuming single-byte characters, the resulting string will be large
2185 /// enough to hold a string of length `n`. To account for the null terminator,
2186 /// the underlying buffer will have the size `n` + 1.
2188 /// If the capacity for `s` is already equal to or greater than the requested
2189 /// capacity, then no action is taken.
2194 /// * n - The number of bytes to reserve space for
2196 pub fn reserve(&mut self, n: uint) {
2198 let v: *mut ~[u8] = cast::transmute(self);
2199 (*v).reserve(n + 1);
2203 /// Reserves capacity for at least `n` bytes in the given string, not including
2204 /// the null terminator.
2206 /// Assuming single-byte characters, the resulting string will be large
2207 /// enough to hold a string of length `n`. To account for the null terminator,
2208 /// the underlying buffer will have the size `n` + 1.
2210 /// This function will over-allocate in order to amortize the allocation costs
2211 /// in scenarios where the caller may need to repeatedly reserve additional
2214 /// If the capacity for `s` is already equal to or greater than the requested
2215 /// capacity, then no action is taken.
2220 /// * n - The number of bytes to reserve space for
2222 fn reserve_at_least(&mut self, n: uint) {
2223 self.reserve(uint::next_power_of_two(n + 1u) - 1u)
2226 /// Returns the number of single-byte characters the string can hold without
2228 fn capacity(&self) -> uint {
2229 let buf: &~[u8] = unsafe { cast::transmute(self) };
2230 let vcap = buf.capacity();
2235 /// Convert to a vector of bytes. This does not allocate a new
2236 /// string, and includes the null terminator.
2238 fn to_bytes_with_null(self) -> ~[u8] {
2239 unsafe { cast::transmute(self) }
2243 fn as_mut_buf<T>(&mut self, f: &fn(*mut u8, uint) -> T) -> T {
2244 let v: &mut ~[u8] = unsafe { cast::transmute(self) };
2249 impl Clone for ~str {
2251 fn clone(&self) -> ~str {
2256 impl Clone for @str {
2258 fn clone(&self) -> @str {
2263 impl<T: Iterator<char>> FromIterator<char, T> for ~str {
2265 fn from_iterator(iterator: &mut T) -> ~str {
2266 let (lower, _) = iterator.size_hint();
2267 let mut buf = with_capacity(lower);
2268 buf.extend(iterator);
2273 impl<T: Iterator<char>> Extendable<char, T> for ~str {
2275 fn extend(&mut self, iterator: &mut T) {
2276 let (lower, _) = iterator.size_hint();
2277 let reserve = lower + self.len();
2278 self.reserve_at_least(reserve);
2279 for ch in *iterator {
2285 // This works because every lifetime is a sub-lifetime of 'static
2286 impl<'self> Zero for &'self str {
2287 fn zero() -> &'self str { "" }
2288 fn is_zero(&self) -> bool { self.is_empty() }
2291 impl Zero for ~str {
2292 fn zero() -> ~str { ~"" }
2293 fn is_zero(&self) -> bool { self.len() == 0 }
2296 impl Zero for @str {
2297 fn zero() -> @str { @"" }
2298 fn is_zero(&self) -> bool { self.len() == 0 }
2303 use iterator::IteratorUtil;
2304 use container::Container;
2311 use vec::{ImmutableVector, CopyableVector};
2312 use cmp::{TotalOrd, Less, Equal, Greater};
2316 assert!((eq(&~"", &~"")));
2317 assert!((eq(&~"foo", &~"foo")));
2318 assert!((!eq(&~"foo", &~"bar")));
2322 fn test_eq_slice() {
2323 assert!((eq_slice("foobar".slice(0, 3), "foo")));
2324 assert!((eq_slice("barfoo".slice(3, 6), "foo")));
2325 assert!((!eq_slice("foo1", "foo2")));
2331 assert!("" <= "foo");
2332 assert!("foo" <= "foo");
2333 assert!("foo" != "bar");
2338 assert_eq!("".len(), 0u);
2339 assert_eq!("hello world".len(), 11u);
2340 assert_eq!("\x63".len(), 1u);
2341 assert_eq!("\xa2".len(), 2u);
2342 assert_eq!("\u03c0".len(), 2u);
2343 assert_eq!("\u2620".len(), 3u);
2344 assert_eq!("\U0001d11e".len(), 4u);
2346 assert_eq!("".char_len(), 0u);
2347 assert_eq!("hello world".char_len(), 11u);
2348 assert_eq!("\x63".char_len(), 1u);
2349 assert_eq!("\xa2".char_len(), 1u);
2350 assert_eq!("\u03c0".char_len(), 1u);
2351 assert_eq!("\u2620".char_len(), 1u);
2352 assert_eq!("\U0001d11e".char_len(), 1u);
2353 assert_eq!("ประเทศไทย中华Việt Nam".char_len(), 19u);
2358 assert_eq!("hello".find('l'), Some(2u));
2359 assert_eq!("hello".find(|c:char| c == 'o'), Some(4u));
2360 assert!("hello".find('x').is_none());
2361 assert!("hello".find(|c:char| c == 'x').is_none());
2362 assert_eq!("ประเทศไทย中华Việt Nam".find('华'), Some(30u));
2363 assert_eq!("ประเทศไทย中华Việt Nam".find(|c: char| c == '华'), Some(30u));
2368 assert_eq!("hello".rfind('l'), Some(3u));
2369 assert_eq!("hello".rfind(|c:char| c == 'o'), Some(4u));
2370 assert!("hello".rfind('x').is_none());
2371 assert!("hello".rfind(|c:char| c == 'x').is_none());
2372 assert_eq!("ประเทศไทย中华Việt Nam".rfind('华'), Some(30u));
2373 assert_eq!("ประเทศไทย中华Việt Nam".rfind(|c: char| c == '华'), Some(30u));
2377 fn test_push_str() {
2380 assert_eq!(s.slice_from(0), "");
2382 assert_eq!(s.slice_from(0), "abc");
2383 s.push_str("ประเทศไทย中华Việt Nam");
2384 assert_eq!(s.slice_from(0), "abcประเทศไทย中华Việt Nam");
2391 assert_eq!(s.slice_from(0), "");
2392 s = s.append("abc");
2393 assert_eq!(s.slice_from(0), "abc");
2394 s = s.append("ประเทศไทย中华Việt Nam");
2395 assert_eq!(s.slice_from(0), "abcประเทศไทย中华Việt Nam");
2399 fn test_pop_char() {
2400 let mut data = ~"ประเทศไทย中华";
2401 let cc = data.pop_char();
2402 assert_eq!(~"ประเทศไทย中", data);
2403 assert_eq!('华', cc);
2407 fn test_pop_char_2() {
2408 let mut data2 = ~"华";
2409 let cc2 = data2.pop_char();
2410 assert_eq!(~"", data2);
2411 assert_eq!('华', cc2);
2416 #[ignore(cfg(windows))]
2417 fn test_pop_char_fail() {
2419 let _cc3 = data.pop_char();
2423 fn test_push_char() {
2424 let mut data = ~"ประเทศไทย中";
2425 data.push_char('华');
2426 data.push_char('b'); // 1 byte
2427 data.push_char('¢'); // 2 byte
2428 data.push_char('€'); // 3 byte
2429 data.push_char('𤭢'); // 4 byte
2430 assert_eq!(~"ประเทศไทย中华b¢€𤭢", data);
2434 fn test_shift_char() {
2435 let mut data = ~"ประเทศไทย中";
2436 let cc = data.shift_char();
2437 assert_eq!(~"ระเทศไทย中", data);
2438 assert_eq!('ป', cc);
2442 fn test_unshift_char() {
2443 let mut data = ~"ประเทศไทย中";
2444 data.unshift_char('华');
2445 assert_eq!(~"华ประเทศไทย中", data);
2451 let s: ~str = empty.iter().collect();
2452 assert_eq!(empty, s.as_slice());
2453 let data = "ประเทศไทย中";
2454 let s: ~str = data.iter().collect();
2455 assert_eq!(data, s.as_slice());
2460 let data = ~"ประเทศไทย中";
2461 let mut cpy = data.clone();
2463 let mut it = other.iter();
2464 cpy.extend(&mut it);
2465 assert_eq!(cpy, data + other);
2470 let mut empty = ~"";
2472 assert_eq!("", empty.as_slice());
2473 let mut data = ~"ประเทศไทย中";
2475 assert_eq!("", data.as_slice());
2476 data.push_char('华');
2477 assert_eq!("华", data.as_slice());
2481 fn test_find_str() {
2483 assert_eq!("".find_str(""), Some(0u));
2484 assert!("banana".find_str("apple pie").is_none());
2486 let data = "abcabc";
2487 assert_eq!(data.slice(0u, 6u).find_str("ab"), Some(0u));
2488 assert_eq!(data.slice(2u, 6u).find_str("ab"), Some(3u - 2u));
2489 assert!(data.slice(2u, 4u).find_str("ab").is_none());
2491 let mut data = ~"ประเทศไทย中华Việt Nam";
2493 assert!(data.find_str("ไท华").is_none());
2494 assert_eq!(data.slice(0u, 43u).find_str(""), Some(0u));
2495 assert_eq!(data.slice(6u, 43u).find_str(""), Some(6u - 6u));
2497 assert_eq!(data.slice(0u, 43u).find_str("ประ"), Some( 0u));
2498 assert_eq!(data.slice(0u, 43u).find_str("ทศไ"), Some(12u));
2499 assert_eq!(data.slice(0u, 43u).find_str("ย中"), Some(24u));
2500 assert_eq!(data.slice(0u, 43u).find_str("iệt"), Some(34u));
2501 assert_eq!(data.slice(0u, 43u).find_str("Nam"), Some(40u));
2503 assert_eq!(data.slice(43u, 86u).find_str("ประ"), Some(43u - 43u));
2504 assert_eq!(data.slice(43u, 86u).find_str("ทศไ"), Some(55u - 43u));
2505 assert_eq!(data.slice(43u, 86u).find_str("ย中"), Some(67u - 43u));
2506 assert_eq!(data.slice(43u, 86u).find_str("iệt"), Some(77u - 43u));
2507 assert_eq!(data.slice(43u, 86u).find_str("Nam"), Some(83u - 43u));
2511 fn test_slice_chars() {
2512 fn t(a: &str, b: &str, start: uint) {
2513 assert_eq!(a.slice_chars(start, start + b.char_len()), b);
2515 t("hello", "llo", 2);
2516 t("hello", "el", 1);
2517 assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".slice_chars(2, 8));
2522 fn t(v: &[~str], s: &str) {
2523 assert_eq!(v.concat(), s.to_str());
2525 t([~"you", ~"know", ~"I'm", ~"no", ~"good"], "youknowI'mnogood");
2526 let v: &[~str] = [];
2533 fn t(v: &[~str], sep: &str, s: &str) {
2534 assert_eq!(v.connect(sep), s.to_str());
2536 t([~"you", ~"know", ~"I'm", ~"no", ~"good"],
2537 " ", "you know I'm no good");
2538 let v: &[~str] = [];
2540 t([~"hi"], " ", "hi");
2544 fn test_concat_slices() {
2545 fn t(v: &[&str], s: &str) {
2546 assert_eq!(v.concat(), s.to_str());
2548 t(["you", "know", "I'm", "no", "good"], "youknowI'mnogood");
2549 let v: &[&str] = [];
2555 fn test_connect_slices() {
2556 fn t(v: &[&str], sep: &str, s: &str) {
2557 assert_eq!(v.connect(sep), s.to_str());
2559 t(["you", "know", "I'm", "no", "good"],
2560 " ", "you know I'm no good");
2562 t(["hi"], " ", "hi");
2567 assert_eq!("x".repeat(4), ~"xxxx");
2568 assert_eq!("hi".repeat(4), ~"hihihihi");
2569 assert_eq!("ไท华".repeat(3), ~"ไท华ไท华ไท华");
2570 assert_eq!("".repeat(4), ~"");
2571 assert_eq!("hi".repeat(0), ~"");
2575 fn test_unsafe_slice() {
2576 assert_eq!("ab", unsafe {raw::slice_bytes("abc", 0, 2)});
2577 assert_eq!("bc", unsafe {raw::slice_bytes("abc", 1, 3)});
2578 assert_eq!("", unsafe {raw::slice_bytes("abc", 1, 1)});
2579 fn a_million_letter_a() -> ~str {
2582 while i < 100000 { rs.push_str("aaaaaaaaaa"); i += 1; }
2585 fn half_a_million_letter_a() -> ~str {
2588 while i < 100000 { rs.push_str("aaaaa"); i += 1; }
2591 let letters = a_million_letter_a();
2592 assert!(half_a_million_letter_a() ==
2593 unsafe {raw::slice_bytes(letters, 0u, 500000)}.to_owned());
2597 fn test_starts_with() {
2598 assert!(("".starts_with("")));
2599 assert!(("abc".starts_with("")));
2600 assert!(("abc".starts_with("a")));
2601 assert!((!"a".starts_with("abc")));
2602 assert!((!"".starts_with("abc")));
2606 fn test_ends_with() {
2607 assert!(("".ends_with("")));
2608 assert!(("abc".ends_with("")));
2609 assert!(("abc".ends_with("c")));
2610 assert!((!"a".ends_with("abc")));
2611 assert!((!"".ends_with("abc")));
2615 fn test_is_empty() {
2616 assert!("".is_empty());
2617 assert!(!"a".is_empty());
2623 assert_eq!("".replace(a, "b"), ~"");
2624 assert_eq!("a".replace(a, "b"), ~"b");
2625 assert_eq!("ab".replace(a, "b"), ~"bb");
2627 assert!(" test test ".replace(test, "toast") ==
2629 assert_eq!(" test test ".replace(test, ""), ~" ");
2633 fn test_replace_2a() {
2634 let data = ~"ประเทศไทย中华";
2635 let repl = ~"دولة الكويت";
2638 let A = ~"دولة الكويتทศไทย中华";
2639 assert_eq!(data.replace(a, repl), A);
2643 fn test_replace_2b() {
2644 let data = ~"ประเทศไทย中华";
2645 let repl = ~"دولة الكويت";
2648 let B = ~"ปรدولة الكويتทศไทย中华";
2649 assert_eq!(data.replace(b, repl), B);
2653 fn test_replace_2c() {
2654 let data = ~"ประเทศไทย中华";
2655 let repl = ~"دولة الكويت";
2658 let C = ~"ประเทศไทยدولة الكويت";
2659 assert_eq!(data.replace(c, repl), C);
2663 fn test_replace_2d() {
2664 let data = ~"ประเทศไทย中华";
2665 let repl = ~"دولة الكويت";
2668 assert_eq!(data.replace(d, repl), data);
2673 assert_eq!("ab", "abc".slice(0, 2));
2674 assert_eq!("bc", "abc".slice(1, 3));
2675 assert_eq!("", "abc".slice(1, 1));
2676 assert_eq!("\u65e5", "\u65e5\u672c".slice(0, 3));
2678 let data = "ประเทศไทย中华";
2679 assert_eq!("ป", data.slice(0, 3));
2680 assert_eq!("ร", data.slice(3, 6));
2681 assert_eq!("", data.slice(3, 3));
2682 assert_eq!("华", data.slice(30, 33));
2684 fn a_million_letter_X() -> ~str {
2688 push_str(&mut rs, "华华华华华华华华华华");
2693 fn half_a_million_letter_X() -> ~str {
2696 while i < 100000 { push_str(&mut rs, "华华华华华"); i += 1; }
2699 let letters = a_million_letter_X();
2700 assert!(half_a_million_letter_X() ==
2701 letters.slice(0u, 3u * 500000u).to_owned());
2706 let ss = "中华Việt Nam";
2708 assert_eq!("华", ss.slice(3u, 6u));
2709 assert_eq!("Việt Nam", ss.slice(6u, 16u));
2711 assert_eq!("ab", "abc".slice(0u, 2u));
2712 assert_eq!("bc", "abc".slice(1u, 3u));
2713 assert_eq!("", "abc".slice(1u, 1u));
2715 assert_eq!("中", ss.slice(0u, 3u));
2716 assert_eq!("华V", ss.slice(3u, 7u));
2717 assert_eq!("", ss.slice(3u, 3u));
2732 #[ignore(cfg(windows))]
2733 fn test_slice_fail() {
2734 "中华Việt Nam".slice(0u, 2u);
2738 fn test_slice_from() {
2739 assert_eq!("abcd".slice_from(0), "abcd");
2740 assert_eq!("abcd".slice_from(2), "cd");
2741 assert_eq!("abcd".slice_from(4), "");
2744 fn test_slice_to() {
2745 assert_eq!("abcd".slice_to(0), "");
2746 assert_eq!("abcd".slice_to(2), "ab");
2747 assert_eq!("abcd".slice_to(4), "abcd");
2751 fn test_trim_left_chars() {
2752 let v: &[char] = &[];
2753 assert_eq!(" *** foo *** ".trim_left_chars(&v), " *** foo *** ");
2754 assert_eq!(" *** foo *** ".trim_left_chars(& &['*', ' ']), "foo *** ");
2755 assert_eq!(" *** *** ".trim_left_chars(& &['*', ' ']), "");
2756 assert_eq!("foo *** ".trim_left_chars(& &['*', ' ']), "foo *** ");
2758 assert_eq!("11foo1bar11".trim_left_chars(&'1'), "foo1bar11");
2759 assert_eq!("12foo1bar12".trim_left_chars(& &['1', '2']), "foo1bar12");
2760 assert_eq!("123foo1bar123".trim_left_chars(&|c: char| c.is_digit()), "foo1bar123");
2764 fn test_trim_right_chars() {
2765 let v: &[char] = &[];
2766 assert_eq!(" *** foo *** ".trim_right_chars(&v), " *** foo *** ");
2767 assert_eq!(" *** foo *** ".trim_right_chars(& &['*', ' ']), " *** foo");
2768 assert_eq!(" *** *** ".trim_right_chars(& &['*', ' ']), "");
2769 assert_eq!(" *** foo".trim_right_chars(& &['*', ' ']), " *** foo");
2771 assert_eq!("11foo1bar11".trim_right_chars(&'1'), "11foo1bar");
2772 assert_eq!("12foo1bar12".trim_right_chars(& &['1', '2']), "12foo1bar");
2773 assert_eq!("123foo1bar123".trim_right_chars(&|c: char| c.is_digit()), "123foo1bar");
2777 fn test_trim_chars() {
2778 let v: &[char] = &[];
2779 assert_eq!(" *** foo *** ".trim_chars(&v), " *** foo *** ");
2780 assert_eq!(" *** foo *** ".trim_chars(& &['*', ' ']), "foo");
2781 assert_eq!(" *** *** ".trim_chars(& &['*', ' ']), "");
2782 assert_eq!("foo".trim_chars(& &['*', ' ']), "foo");
2784 assert_eq!("11foo1bar11".trim_chars(&'1'), "foo1bar");
2785 assert_eq!("12foo1bar12".trim_chars(& &['1', '2']), "foo1bar");
2786 assert_eq!("123foo1bar123".trim_chars(&|c: char| c.is_digit()), "foo1bar");
2790 fn test_trim_left() {
2791 assert_eq!("".trim_left(), "");
2792 assert_eq!("a".trim_left(), "a");
2793 assert_eq!(" ".trim_left(), "");
2794 assert_eq!(" blah".trim_left(), "blah");
2795 assert_eq!(" \u3000 wut".trim_left(), "wut");
2796 assert_eq!("hey ".trim_left(), "hey ");
2800 fn test_trim_right() {
2801 assert_eq!("".trim_right(), "");
2802 assert_eq!("a".trim_right(), "a");
2803 assert_eq!(" ".trim_right(), "");
2804 assert_eq!("blah ".trim_right(), "blah");
2805 assert_eq!("wut \u3000 ".trim_right(), "wut");
2806 assert_eq!(" hey".trim_right(), " hey");
2811 assert_eq!("".trim(), "");
2812 assert_eq!("a".trim(), "a");
2813 assert_eq!(" ".trim(), "");
2814 assert_eq!(" blah ".trim(), "blah");
2815 assert_eq!("\nwut \u3000 ".trim(), "wut");
2816 assert_eq!(" hey dude ".trim(), "hey dude");
2820 fn test_is_whitespace() {
2821 assert!("".is_whitespace());
2822 assert!(" ".is_whitespace());
2823 assert!("\u2009".is_whitespace()); // Thin space
2824 assert!(" \n\t ".is_whitespace());
2825 assert!(!" _ ".is_whitespace());
2829 fn test_push_byte() {
2831 unsafe{raw::push_byte(&mut s, 'D' as u8)};
2832 assert_eq!(s, ~"ABCD");
2836 fn test_shift_byte() {
2838 let b = unsafe{raw::shift_byte(&mut s)};
2839 assert_eq!(s, ~"BC");
2840 assert_eq!(b, 65u8);
2844 fn test_pop_byte() {
2846 let b = unsafe{raw::pop_byte(&mut s)};
2847 assert_eq!(s, ~"AB");
2848 assert_eq!(b, 67u8);
2852 fn test_unsafe_from_bytes() {
2853 let a = ~[65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8];
2854 let b = unsafe { raw::from_bytes(a) };
2855 assert_eq!(b, ~"AAAAAAA");
2859 fn test_from_bytes() {
2860 let ss = ~"ศไทย中华Việt Nam";
2861 let bb = ~[0xe0_u8, 0xb8_u8, 0xa8_u8,
2862 0xe0_u8, 0xb9_u8, 0x84_u8,
2863 0xe0_u8, 0xb8_u8, 0x97_u8,
2864 0xe0_u8, 0xb8_u8, 0xa2_u8,
2865 0xe4_u8, 0xb8_u8, 0xad_u8,
2866 0xe5_u8, 0x8d_u8, 0x8e_u8,
2867 0x56_u8, 0x69_u8, 0xe1_u8,
2868 0xbb_u8, 0x87_u8, 0x74_u8,
2869 0x20_u8, 0x4e_u8, 0x61_u8,
2873 assert_eq!(ss, from_bytes(bb));
2874 assert_eq!(~"𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰",
2875 from_bytes(bytes!("𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰")));
2880 assert!(!is_utf8([0xc0, 0x80]));
2881 assert!(!is_utf8([0xc0, 0xae]));
2882 assert!(!is_utf8([0xe0, 0x80, 0x80]));
2883 assert!(!is_utf8([0xe0, 0x80, 0xaf]));
2884 assert!(!is_utf8([0xe0, 0x81, 0x81]));
2885 assert!(!is_utf8([0xf0, 0x82, 0x82, 0xac]));
2886 assert!(!is_utf8([0xf4, 0x90, 0x80, 0x80]));
2888 assert!(is_utf8([0xC2, 0x80]));
2889 assert!(is_utf8([0xDF, 0xBF]));
2890 assert!(is_utf8([0xE0, 0xA0, 0x80]));
2891 assert!(is_utf8([0xEF, 0xBF, 0xBF]));
2892 assert!(is_utf8([0xF0, 0x90, 0x80, 0x80]));
2893 assert!(is_utf8([0xF4, 0x8F, 0xBF, 0xBF]));
2898 #[ignore(cfg(windows))]
2899 fn test_from_bytes_fail() {
2900 use str::not_utf8::cond;
2902 let bb = ~[0xff_u8, 0xb8_u8, 0xa8_u8,
2903 0xe0_u8, 0xb9_u8, 0x84_u8,
2904 0xe0_u8, 0xb8_u8, 0x97_u8,
2905 0xe0_u8, 0xb8_u8, 0xa2_u8,
2906 0xe4_u8, 0xb8_u8, 0xad_u8,
2907 0xe5_u8, 0x8d_u8, 0x8e_u8,
2908 0x56_u8, 0x69_u8, 0xe1_u8,
2909 0xbb_u8, 0x87_u8, 0x74_u8,
2910 0x20_u8, 0x4e_u8, 0x61_u8,
2913 let mut error_happened = false;
2914 let _x = do cond.trap(|err| {
2915 assert_eq!(err, ~"from_bytes: input is not UTF-8; first bad byte is 255");
2916 error_happened = true;
2921 assert!(error_happened);
2925 fn test_unsafe_from_bytes_with_null() {
2926 let a = [65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8];
2927 let b = unsafe { raw::from_bytes_with_null(a) };
2928 assert_eq!(b, "AAAAAAA");
2932 fn test_from_bytes_with_null() {
2933 let ss = "ศไทย中华Việt Nam";
2934 let bb = [0xe0_u8, 0xb8_u8, 0xa8_u8,
2935 0xe0_u8, 0xb9_u8, 0x84_u8,
2936 0xe0_u8, 0xb8_u8, 0x97_u8,
2937 0xe0_u8, 0xb8_u8, 0xa2_u8,
2938 0xe4_u8, 0xb8_u8, 0xad_u8,
2939 0xe5_u8, 0x8d_u8, 0x8e_u8,
2940 0x56_u8, 0x69_u8, 0xe1_u8,
2941 0xbb_u8, 0x87_u8, 0x74_u8,
2942 0x20_u8, 0x4e_u8, 0x61_u8,
2945 assert_eq!(ss, from_bytes_with_null(bb));
2950 #[ignore(cfg(windows))]
2951 fn test_from_bytes_with_null_fail() {
2952 let bb = [0xff_u8, 0xb8_u8, 0xa8_u8,
2953 0xe0_u8, 0xb9_u8, 0x84_u8,
2954 0xe0_u8, 0xb8_u8, 0x97_u8,
2955 0xe0_u8, 0xb8_u8, 0xa2_u8,
2956 0xe4_u8, 0xb8_u8, 0xad_u8,
2957 0xe5_u8, 0x8d_u8, 0x8e_u8,
2958 0x56_u8, 0x69_u8, 0xe1_u8,
2959 0xbb_u8, 0x87_u8, 0x74_u8,
2960 0x20_u8, 0x4e_u8, 0x61_u8,
2963 let _x = from_bytes_with_null(bb);
2968 #[ignore(cfg(windows))]
2969 fn test_from_bytes_with_null_fail_2() {
2970 let bb = [0xff_u8, 0xb8_u8, 0xa8_u8,
2971 0xe0_u8, 0xb9_u8, 0x84_u8,
2972 0xe0_u8, 0xb8_u8, 0x97_u8,
2973 0xe0_u8, 0xb8_u8, 0xa2_u8,
2974 0xe4_u8, 0xb8_u8, 0xad_u8,
2975 0xe5_u8, 0x8d_u8, 0x8e_u8,
2976 0x56_u8, 0x69_u8, 0xe1_u8,
2977 0xbb_u8, 0x87_u8, 0x74_u8,
2978 0x20_u8, 0x4e_u8, 0x61_u8,
2981 let _x = from_bytes_with_null(bb);
2985 fn test_from_buf() {
2987 let a = ~[65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8];
2988 let b = vec::raw::to_ptr(a);
2989 let c = raw::from_buf(b);
2990 assert_eq!(c, ~"AAAAAAA");
2995 fn test_as_bytes() {
2998 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
2999 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
3002 assert_eq!("".as_bytes(), &[]);
3003 assert_eq!("abc".as_bytes(), &['a' as u8, 'b' as u8, 'c' as u8]);
3004 assert_eq!("ศไทย中华Việt Nam".as_bytes(), v);
3008 fn test_as_bytes_with_null() {
3011 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
3012 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
3018 let s3 = @"ศไทย中华Việt Nam";
3019 assert_eq!(s1.as_bytes_with_null(), &[0]);
3020 assert_eq!(s2.as_bytes_with_null(), &['a' as u8, 'b' as u8, 'c' as u8, 0]);
3021 assert_eq!(s3.as_bytes_with_null(), v);
3025 let s3 = ~"ศไทย中华Việt Nam";
3026 assert_eq!(s1.as_bytes_with_null(), &[0]);
3027 assert_eq!(s2.as_bytes_with_null(), &['a' as u8, 'b' as u8, 'c' as u8, 0]);
3028 assert_eq!(s3.as_bytes_with_null(), v);
3032 fn test_to_bytes_with_null() {
3033 let s = ~"ศไทย中华Việt Nam";
3035 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
3036 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
3039 assert_eq!((~"").to_bytes_with_null(), ~[0]);
3040 assert_eq!((~"abc").to_bytes_with_null(),
3041 ~['a' as u8, 'b' as u8, 'c' as u8, 0]);
3042 assert_eq!(s.to_bytes_with_null(), v);
3046 #[ignore(cfg(windows))]
3048 fn test_as_bytes_fail() {
3049 // Don't double free. (I'm not sure if this exercises the
3050 // original problem code path anymore.)
3052 let _bytes = s.as_bytes_with_null();
3057 fn test_as_imm_buf() {
3058 do "".as_imm_buf |buf, len| {
3061 assert_eq!(*ptr::offset(buf, 0), 0);
3065 do "hello".as_imm_buf |buf, len| {
3068 assert_eq!(*ptr::offset(buf, 0), 'h' as u8);
3069 assert_eq!(*ptr::offset(buf, 1), 'e' as u8);
3070 assert_eq!(*ptr::offset(buf, 2), 'l' as u8);
3071 assert_eq!(*ptr::offset(buf, 3), 'l' as u8);
3072 assert_eq!(*ptr::offset(buf, 4), 'o' as u8);
3073 assert_eq!(*ptr::offset(buf, 5), 0);
3079 fn test_as_c_str() {
3081 do a.as_c_str |buf| {
3083 assert_eq!(*ptr::offset(buf, 0), 0);
3088 do a.as_c_str |buf| {
3090 assert_eq!(*ptr::offset(buf, 0), 'h' as libc::c_char);
3091 assert_eq!(*ptr::offset(buf, 1), 'e' as libc::c_char);
3092 assert_eq!(*ptr::offset(buf, 2), 'l' as libc::c_char);
3093 assert_eq!(*ptr::offset(buf, 3), 'l' as libc::c_char);
3094 assert_eq!(*ptr::offset(buf, 4), 'o' as libc::c_char);
3095 assert_eq!(*ptr::offset(buf, 5), 0);
3101 fn test_subslice_offset() {
3102 let a = "kernelsprite";
3103 let b = a.slice(7, a.len());
3104 let c = a.slice(0, a.len() - 6);
3105 assert_eq!(a.subslice_offset(b), 7);
3106 assert_eq!(a.subslice_offset(c), 0);
3108 let string = "a\nb\nc";
3109 let mut lines = ~[];
3110 for line in string.line_iter() { lines.push(line) }
3111 assert_eq!(string.subslice_offset(lines[0]), 0);
3112 assert_eq!(string.subslice_offset(lines[1]), 2);
3113 assert_eq!(string.subslice_offset(lines[2]), 4);
3118 fn test_subslice_offset_2() {
3119 let a = "alchemiter";
3120 let b = "cruxtruder";
3121 a.subslice_offset(b);
3125 fn vec_str_conversions() {
3126 let s1: ~str = ~"All mimsy were the borogoves";
3128 let v: ~[u8] = s1.as_bytes().to_owned();
3129 let s2: ~str = from_bytes(v);
3130 let mut i: uint = 0u;
3131 let n1: uint = s1.len();
3132 let n2: uint = v.len();
3145 fn test_contains() {
3146 assert!("abcde".contains("bcd"));
3147 assert!("abcde".contains("abcd"));
3148 assert!("abcde".contains("bcde"));
3149 assert!("abcde".contains(""));
3150 assert!("".contains(""));
3151 assert!(!"abcde".contains("def"));
3152 assert!(!"".contains("a"));
3154 let data = ~"ประเทศไทย中华Việt Nam";
3155 assert!(data.contains("ประเ"));
3156 assert!(data.contains("ะเ"));
3157 assert!(data.contains("中华"));
3158 assert!(!data.contains("ไท华"));
3162 fn test_contains_char() {
3163 assert!("abc".contains_char('b'));
3164 assert!("a".contains_char('a'));
3165 assert!(!"abc".contains_char('d'));
3166 assert!(!"".contains_char('a'));
3171 assert_eq!(~"", "".map_chars(|c| unsafe {libc::toupper(c as c_char)} as char));
3172 assert_eq!(~"YMCA", "ymca".map_chars(|c| unsafe {libc::toupper(c as c_char)} as char));
3179 ~[0xd800_u16, 0xdf45_u16, 0xd800_u16, 0xdf3f_u16,
3180 0xd800_u16, 0xdf3b_u16, 0xd800_u16, 0xdf46_u16,
3181 0xd800_u16, 0xdf39_u16, 0xd800_u16, 0xdf3b_u16,
3182 0xd800_u16, 0xdf30_u16, 0x000a_u16]),
3185 ~[0xd801_u16, 0xdc12_u16, 0xd801_u16,
3186 0xdc49_u16, 0xd801_u16, 0xdc2e_u16, 0xd801_u16,
3187 0xdc40_u16, 0xd801_u16, 0xdc32_u16, 0xd801_u16,
3188 0xdc4b_u16, 0x0020_u16, 0xd801_u16, 0xdc0f_u16,
3189 0xd801_u16, 0xdc32_u16, 0xd801_u16, 0xdc4d_u16,
3192 (~"𐌀𐌖𐌋𐌄𐌑𐌉·𐌌𐌄𐌕𐌄𐌋𐌉𐌑\n",
3193 ~[0xd800_u16, 0xdf00_u16, 0xd800_u16, 0xdf16_u16,
3194 0xd800_u16, 0xdf0b_u16, 0xd800_u16, 0xdf04_u16,
3195 0xd800_u16, 0xdf11_u16, 0xd800_u16, 0xdf09_u16,
3196 0x00b7_u16, 0xd800_u16, 0xdf0c_u16, 0xd800_u16,
3197 0xdf04_u16, 0xd800_u16, 0xdf15_u16, 0xd800_u16,
3198 0xdf04_u16, 0xd800_u16, 0xdf0b_u16, 0xd800_u16,
3199 0xdf09_u16, 0xd800_u16, 0xdf11_u16, 0x000a_u16 ]),
3201 (~"𐒋𐒘𐒈𐒑𐒛𐒒 𐒕𐒓 𐒈𐒚𐒍 𐒏𐒜𐒒𐒖𐒆 𐒕𐒆\n",
3202 ~[0xd801_u16, 0xdc8b_u16, 0xd801_u16, 0xdc98_u16,
3203 0xd801_u16, 0xdc88_u16, 0xd801_u16, 0xdc91_u16,
3204 0xd801_u16, 0xdc9b_u16, 0xd801_u16, 0xdc92_u16,
3205 0x0020_u16, 0xd801_u16, 0xdc95_u16, 0xd801_u16,
3206 0xdc93_u16, 0x0020_u16, 0xd801_u16, 0xdc88_u16,
3207 0xd801_u16, 0xdc9a_u16, 0xd801_u16, 0xdc8d_u16,
3208 0x0020_u16, 0xd801_u16, 0xdc8f_u16, 0xd801_u16,
3209 0xdc9c_u16, 0xd801_u16, 0xdc92_u16, 0xd801_u16,
3210 0xdc96_u16, 0xd801_u16, 0xdc86_u16, 0x0020_u16,
3211 0xd801_u16, 0xdc95_u16, 0xd801_u16, 0xdc86_u16,
3214 for p in pairs.iter() {
3215 let (s, u) = (*p).clone();
3216 assert!(s.to_utf16() == u);
3217 assert!(from_utf16(u) == s);
3218 assert!(from_utf16(s.to_utf16()) == s);
3219 assert!(from_utf16(u).to_utf16() == u);
3225 let s = ~"ศไทย中华Việt Nam";
3226 let v = ~['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
3228 for ch in v.iter() {
3229 assert!(s.char_at(pos) == *ch);
3230 pos += from_char(*ch).len();
3235 fn test_char_at_reverse() {
3236 let s = ~"ศไทย中华Việt Nam";
3237 let v = ~['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
3238 let mut pos = s.len();
3239 for ch in v.rev_iter() {
3240 assert!(s.char_at_reverse(pos) == *ch);
3241 pos -= from_char(*ch).len();
3246 fn test_escape_unicode() {
3247 assert_eq!("abc".escape_unicode(), ~"\\x61\\x62\\x63");
3248 assert_eq!("a c".escape_unicode(), ~"\\x61\\x20\\x63");
3249 assert_eq!("\r\n\t".escape_unicode(), ~"\\x0d\\x0a\\x09");
3250 assert_eq!("'\"\\".escape_unicode(), ~"\\x27\\x22\\x5c");
3251 assert_eq!("\x00\x01\xfe\xff".escape_unicode(), ~"\\x00\\x01\\xfe\\xff");
3252 assert_eq!("\u0100\uffff".escape_unicode(), ~"\\u0100\\uffff");
3253 assert_eq!("\U00010000\U0010ffff".escape_unicode(), ~"\\U00010000\\U0010ffff");
3254 assert_eq!("ab\ufb00".escape_unicode(), ~"\\x61\\x62\\ufb00");
3255 assert_eq!("\U0001d4ea\r".escape_unicode(), ~"\\U0001d4ea\\x0d");
3259 fn test_escape_default() {
3260 assert_eq!("abc".escape_default(), ~"abc");
3261 assert_eq!("a c".escape_default(), ~"a c");
3262 assert_eq!("\r\n\t".escape_default(), ~"\\r\\n\\t");
3263 assert_eq!("'\"\\".escape_default(), ~"\\'\\\"\\\\");
3264 assert_eq!("\u0100\uffff".escape_default(), ~"\\u0100\\uffff");
3265 assert_eq!("\U00010000\U0010ffff".escape_default(), ~"\\U00010000\\U0010ffff");
3266 assert_eq!("ab\ufb00".escape_default(), ~"ab\\ufb00");
3267 assert_eq!("\U0001d4ea\r".escape_default(), ~"\\U0001d4ea\\r");
3271 fn test_to_managed() {
3272 assert_eq!("abc".to_managed(), @"abc");
3273 assert_eq!("abcdef".slice(1, 5).to_managed(), @"bcde");
3277 fn test_total_ord() {
3278 "1234".cmp(& &"123") == Greater;
3279 "123".cmp(& &"1234") == Less;
3280 "1234".cmp(& &"1234") == Equal;
3281 "12345555".cmp(& &"123456") == Less;
3282 "22".cmp(& &"1234") == Greater;
3286 fn test_char_range_at() {
3287 let data = ~"b¢€𤭢𤭢€¢b";
3288 assert_eq!('b', data.char_range_at(0).ch);
3289 assert_eq!('¢', data.char_range_at(1).ch);
3290 assert_eq!('€', data.char_range_at(3).ch);
3291 assert_eq!('𤭢', data.char_range_at(6).ch);
3292 assert_eq!('𤭢', data.char_range_at(10).ch);
3293 assert_eq!('€', data.char_range_at(14).ch);
3294 assert_eq!('¢', data.char_range_at(17).ch);
3295 assert_eq!('b', data.char_range_at(19).ch);
3299 fn test_char_range_at_reverse_underflow() {
3300 assert_eq!("abc".char_range_at_reverse(0).next, 0);
3305 #[allow(unnecessary_allocation)];
3307 ($s1:expr, $s2:expr, $e:expr) => { {
3311 assert_eq!(s1 + s2, e.to_owned());
3312 assert_eq!(s1.to_owned() + s2, e.to_owned());
3313 assert_eq!(s1.to_managed() + s2, e.to_owned());
3317 t!("foo", "bar", "foobar");
3318 t!("foo", @"bar", "foobar");
3319 t!("foo", ~"bar", "foobar");
3320 t!("ศไทย中", "华Việt Nam", "ศไทย中华Việt Nam");
3321 t!("ศไทย中", @"华Việt Nam", "ศไทย中华Việt Nam");
3322 t!("ศไทย中", ~"华Việt Nam", "ศไทย中华Việt Nam");
3326 fn test_iterator() {
3328 let s = ~"ศไทย中华Việt Nam";
3329 let v = ~['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
3332 let mut it = s.iter();
3335 assert_eq!(c, v[pos]);
3338 assert_eq!(pos, v.len());
3342 fn test_rev_iterator() {
3344 let s = ~"ศไทย中华Việt Nam";
3345 let v = ~['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
3348 let mut it = s.rev_iter();
3351 assert_eq!(c, v[pos]);
3354 assert_eq!(pos, v.len());
3358 fn test_byte_iterator() {
3359 let s = ~"ศไทย中华Việt Nam";
3361 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
3362 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
3367 for b in s.byte_iter() {
3368 assert_eq!(b, v[pos]);
3374 fn test_byte_rev_iterator() {
3375 let s = ~"ศไทย中华Việt Nam";
3377 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
3378 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
3381 let mut pos = v.len();
3383 for b in s.byte_rev_iter() {
3385 assert_eq!(b, v[pos]);
3390 fn test_char_offset_iterator() {
3392 let s = "ศไทย中华Việt Nam";
3393 let p = [0, 3, 6, 9, 12, 15, 18, 19, 20, 23, 24, 25, 26, 27];
3394 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
3397 let mut it = s.char_offset_iter();
3400 assert_eq!(c, (p[pos], v[pos]));
3403 assert_eq!(pos, v.len());
3404 assert_eq!(pos, p.len());
3408 fn test_char_offset_rev_iterator() {
3410 let s = "ศไทย中华Việt Nam";
3411 let p = [27, 26, 25, 24, 23, 20, 19, 18, 15, 12, 9, 6, 3, 0];
3412 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
3415 let mut it = s.char_offset_rev_iter();
3418 assert_eq!(c, (p[pos], v[pos]));
3421 assert_eq!(pos, v.len());
3422 assert_eq!(pos, p.len());
3426 fn test_split_char_iterator() {
3427 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
3429 let split: ~[&str] = data.split_iter(' ').collect();
3430 assert_eq!(split, ~["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
3432 let split: ~[&str] = data.split_iter(|c: char| c == ' ').collect();
3433 assert_eq!(split, ~["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
3436 let split: ~[&str] = data.split_iter('ä').collect();
3437 assert_eq!(split, ~["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
3439 let split: ~[&str] = data.split_iter(|c: char| c == 'ä').collect();
3440 assert_eq!(split, ~["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
3444 fn test_splitn_char_iterator() {
3445 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
3447 let split: ~[&str] = data.splitn_iter(' ', 3).collect();
3448 assert_eq!(split, ~["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
3450 let split: ~[&str] = data.splitn_iter(|c: char| c == ' ', 3).collect();
3451 assert_eq!(split, ~["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
3454 let split: ~[&str] = data.splitn_iter('ä', 3).collect();
3455 assert_eq!(split, ~["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
3457 let split: ~[&str] = data.splitn_iter(|c: char| c == 'ä', 3).collect();
3458 assert_eq!(split, ~["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
3462 fn test_split_char_iterator_no_trailing() {
3463 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
3465 let split: ~[&str] = data.split_options_iter('\n', 1000, true).collect();
3466 assert_eq!(split, ~["", "Märy häd ä little lämb", "Little lämb", ""]);
3468 let split: ~[&str] = data.split_options_iter('\n', 1000, false).collect();
3469 assert_eq!(split, ~["", "Märy häd ä little lämb", "Little lämb"]);
3473 fn test_word_iter() {
3474 let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n";
3475 let words: ~[&str] = data.word_iter().collect();
3476 assert_eq!(words, ~["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
3480 fn test_line_iter() {
3481 let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
3482 let lines: ~[&str] = data.line_iter().collect();
3483 assert_eq!(lines, ~["", "Märy häd ä little lämb", "", "Little lämb"]);
3485 let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
3486 let lines: ~[&str] = data.line_iter().collect();
3487 assert_eq!(lines, ~["", "Märy häd ä little lämb", "", "Little lämb"]);
3491 fn test_split_str_iterator() {
3492 fn t<'a>(s: &str, sep: &'a str, u: ~[&str]) {
3493 let v: ~[&str] = s.split_str_iter(sep).collect();
3496 t("--1233345--", "12345", ~["--1233345--"]);
3497 t("abc::hello::there", "::", ~["abc", "hello", "there"]);
3498 t("::hello::there", "::", ~["", "hello", "there"]);
3499 t("hello::there::", "::", ~["hello", "there", ""]);
3500 t("::hello::there::", "::", ~["", "hello", "there", ""]);
3501 t("ประเทศไทย中华Việt Nam", "中华", ~["ประเทศไทย", "Việt Nam"]);
3502 t("zzXXXzzYYYzz", "zz", ~["", "XXX", "YYY", ""]);
3503 t("zzXXXzYYYz", "XXX", ~["zz", "zYYYz"]);
3504 t(".XXX.YYY.", ".", ~["", "XXX", "YYY", ""]);
3506 t("zz", "zz", ~["",""]);
3507 t("ok", "z", ~["ok"]);
3508 t("zzz", "zz", ~["","z"]);
3509 t("zzzzz", "zz", ~["","","z"]);
3513 fn test_str_zero() {
3515 fn t<S: Zero + Str>() {
3516 let s: S = Zero::zero();
3517 assert_eq!(s.as_slice(), "");
3518 assert!(s.is_zero());
3527 fn test_str_container() {
3528 fn sum_len<S: Container>(v: &[S]) -> uint {
3529 v.iter().transform(|x| x.len()).sum()
3533 assert_eq!(5, sum_len(["012", "", "34"]));
3534 assert_eq!(5, sum_len([@"01", @"2", @"34", @""]));
3535 assert_eq!(5, sum_len([~"01", ~"2", ~"34", ~""]));
3536 assert_eq!(5, sum_len([s.as_slice()]));
3542 use extra::test::BenchHarness;
3546 fn is_utf8_100_ascii(bh: &mut BenchHarness) {
3548 let s = bytes!("Hello there, the quick brown fox jumped over the lazy dog! \
3549 Lorem ipsum dolor sit amet, consectetur. ");
3551 assert_eq!(100, s.len());
3558 fn is_utf8_100_multibyte(bh: &mut BenchHarness) {
3559 let s = bytes!("𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰");
3560 assert_eq!(100, s.len());
3567 fn map_chars_100_ascii(bh: &mut BenchHarness) {
3568 let s = "HelloHelloHelloHelloHelloHelloHelloHelloHelloHello\
3569 HelloHelloHelloHelloHelloHelloHelloHelloHelloHello";
3571 s.map_chars(|c| ((c as uint) + 1) as char);
3576 fn map_chars_100_multibytes(bh: &mut BenchHarness) {
3577 let s = "𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑\
3578 𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑\
3579 𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑\
3580 𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑";
3582 s.map_chars(|c| ((c as uint) + 1) as char);