1 // Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
14 * Strings are a packed UTF-8 representation of text, stored as null
15 * terminated buffers of u8 bytes. Strings should be indexed in bytes,
16 * for efficiency, but UTF-8 unsafe operations should be avoided.
24 use container::{Container, Mutable};
26 use iterator::{Iterator, FromIterator, Extendable, IteratorUtil};
27 use iterator::{Filter, AdditiveIterator, Map};
28 use iterator::{Invert, DoubleEndedIterator, DoubleEndedIteratorUtil};
31 use option::{None, Option, Some};
37 use unstable::raw::Repr;
39 use vec::{OwnedVector, OwnedCopyableVector, ImmutableVector, MutableVector};
46 not_utf8: (~str) -> ~str;
50 Section: Creating a string
53 /// Convert a vector of bytes to a new UTF-8 string
57 /// Raises the `not_utf8` condition if invalid UTF-8
58 pub fn from_bytes(vv: &[u8]) -> ~str {
59 use str::not_utf8::cond;
62 let first_bad_byte = *vv.iter().find_(|&b| !is_utf8([*b])).get();
63 cond.raise(fmt!("from_bytes: input is not UTF-8; first bad byte is %u",
64 first_bad_byte as uint))
66 return unsafe { raw::from_bytes(vv) }
70 /// Consumes a vector of bytes to create a new utf-8 string
74 /// Raises the `not_utf8` condition if invalid UTF-8
75 pub fn from_bytes_owned(vv: ~[u8]) -> ~str {
76 use str::not_utf8::cond;
79 let first_bad_byte = *vv.iter().find_(|&b| !is_utf8([*b])).get();
80 cond.raise(fmt!("from_bytes: input is not UTF-8; first bad byte is %u",
81 first_bad_byte as uint))
83 return unsafe { raw::from_bytes_owned(vv) }
87 /// Converts a vector to a string slice without performing any allocations.
89 /// Once the slice has been validated as utf-8, it is transmuted in-place and
90 /// returned as a '&str' instead of a '&[u8]'
94 /// Fails if invalid UTF-8
96 pub fn from_bytes_slice<'a>(vector: &'a [u8]) -> &'a str {
98 assert!(is_utf8(vector));
99 let mut s = vector.repr();
105 /// Converts a vector to a string slice without performing any allocations.
107 /// Once the slice has been validated as utf-8, it is transmuted in-place and
108 /// returned as a '&str' instead of a '&[u8]'
112 /// Fails if invalid UTF-8
114 pub fn from_bytes_slice<'a>(v: &'a [u8]) -> &'a str {
116 unsafe { cast::transmute(v) }
119 impl ToStr for ~str {
121 fn to_str(&self) -> ~str { self.to_owned() }
123 impl<'self> ToStr for &'self str {
125 fn to_str(&self) -> ~str { self.to_owned() }
127 impl ToStr for @str {
129 fn to_str(&self) -> ~str { self.to_owned() }
132 /// Convert a byte to a UTF-8 string
136 /// Fails if invalid UTF-8
138 pub fn from_byte(b: u8) -> ~str {
140 unsafe { cast::transmute(~[b, 0u8]) }
143 /// Convert a byte to a UTF-8 string
147 /// Fails if invalid UTF-8
149 pub fn from_byte(b: u8) -> ~str {
151 unsafe { ::cast::transmute(~[b]) }
154 /// Convert a char to a string
155 pub fn from_char(ch: char) -> ~str {
161 /// Convert a vector of chars to a string
162 pub fn from_chars(chs: &[char]) -> ~str {
164 buf.reserve(chs.len());
165 for ch in chs.iter() {
172 pub fn push_str(lhs: &mut ~str, rhs: &str) {
176 #[allow(missing_doc)]
177 pub trait StrVector {
178 pub fn concat(&self) -> ~str;
179 pub fn connect(&self, sep: &str) -> ~str;
182 impl<'self, S: Str> StrVector for &'self [S] {
183 /// Concatenate a vector of strings.
185 pub fn concat(&self) -> ~str {
186 if self.is_empty() { return ~""; }
188 let len = self.iter().transform(|s| s.as_slice().len()).sum();
190 let mut s = with_capacity(len);
193 do s.as_mut_buf |buf, _| {
195 for ss in self.iter() {
196 do ss.as_slice().as_imm_buf |ssbuf, sslen| {
197 let sslen = sslen - 1;
198 ptr::copy_memory(buf, ssbuf, sslen);
199 buf = buf.offset(sslen as int);
203 raw::set_len(&mut s, len);
208 /// Concatenate a vector of strings.
210 pub fn concat(&self) -> ~str {
211 if self.is_empty() { return ~""; }
213 let len = self.iter().transform(|s| s.as_slice().len()).sum();
215 let mut s = with_capacity(len);
218 do s.as_mut_buf |buf, _| {
220 for ss in self.iter() {
221 do ss.as_slice().as_imm_buf |ssbuf, sslen| {
222 ptr::copy_memory(buf, ssbuf, sslen);
223 buf = buf.offset(sslen as int);
227 raw::set_len(&mut s, len);
232 /// Concatenate a vector of strings, placing a given separator between each.
234 pub fn connect(&self, sep: &str) -> ~str {
235 if self.is_empty() { return ~""; }
238 if sep.is_empty() { return self.concat(); }
240 // this is wrong without the guarantee that `self` is non-empty
241 let len = sep.len() * (self.len() - 1)
242 + self.iter().transform(|s| s.as_slice().len()).sum();
244 let mut first = true;
249 do s.as_mut_buf |buf, _| {
250 do sep.as_imm_buf |sepbuf, seplen| {
251 let seplen = seplen - 1;
252 let mut buf = cast::transmute_mut_unsafe(buf);
253 for ss in self.iter() {
254 do ss.as_slice().as_imm_buf |ssbuf, sslen| {
255 let sslen = sslen - 1;
259 ptr::copy_memory(buf, sepbuf, seplen);
260 buf = buf.offset(seplen as int);
262 ptr::copy_memory(buf, ssbuf, sslen);
263 buf = buf.offset(sslen as int);
268 raw::set_len(&mut s, len);
273 /// Concatenate a vector of strings, placing a given separator between each.
275 pub fn connect(&self, sep: &str) -> ~str {
276 if self.is_empty() { return ~""; }
279 if sep.is_empty() { return self.concat(); }
281 // this is wrong without the guarantee that `self` is non-empty
282 let len = sep.len() * (self.len() - 1)
283 + self.iter().transform(|s| s.as_slice().len()).sum();
285 let mut first = true;
290 do s.as_mut_buf |buf, _| {
291 do sep.as_imm_buf |sepbuf, seplen| {
293 for ss in self.iter() {
294 do ss.as_slice().as_imm_buf |ssbuf, sslen| {
298 ptr::copy_memory(buf, sepbuf, seplen);
299 buf = buf.offset(seplen as int);
301 ptr::copy_memory(buf, ssbuf, sslen);
302 buf = buf.offset(sslen as int);
307 raw::set_len(&mut s, len);
313 /// Something that can be used to compare against a character
315 /// Determine if the splitter should split at the given character
316 fn matches(&self, char) -> bool;
317 /// Indicate if this is only concerned about ASCII characters,
318 /// which can allow for a faster implementation.
319 fn only_ascii(&self) -> bool;
322 impl CharEq for char {
324 fn matches(&self, c: char) -> bool { *self == c }
326 fn only_ascii(&self) -> bool { (*self as uint) < 128 }
329 impl<'self> CharEq for &'self fn(char) -> bool {
331 fn matches(&self, c: char) -> bool { (*self)(c) }
333 fn only_ascii(&self) -> bool { false }
336 impl CharEq for extern "Rust" fn(char) -> bool {
338 fn matches(&self, c: char) -> bool { (*self)(c) }
340 fn only_ascii(&self) -> bool { false }
343 impl<'self, C: CharEq> CharEq for &'self [C] {
345 fn matches(&self, c: char) -> bool {
346 self.iter().any(|m| m.matches(c))
349 fn only_ascii(&self) -> bool {
350 self.iter().all(|m| m.only_ascii())
358 /// External iterator for a string's characters and their byte offsets.
359 /// Use with the `std::iterator` module.
361 pub struct CharOffsetIterator<'self> {
362 priv index_front: uint,
363 priv index_back: uint,
364 priv string: &'self str,
367 impl<'self> Iterator<(uint, char)> for CharOffsetIterator<'self> {
369 fn next(&mut self) -> Option<(uint, char)> {
370 if self.index_front < self.index_back {
371 let CharRange {ch, next} = self.string.char_range_at(self.index_front);
372 let index = self.index_front;
373 self.index_front = next;
381 impl<'self> DoubleEndedIterator<(uint, char)> for CharOffsetIterator<'self> {
383 fn next_back(&mut self) -> Option<(uint, char)> {
384 if self.index_front < self.index_back {
385 let CharRange {ch, next} = self.string.char_range_at_reverse(self.index_back);
386 self.index_back = next;
394 /// External iterator for a string's characters and their byte offsets in reverse order.
395 /// Use with the `std::iterator` module.
396 pub type CharOffsetRevIterator<'self> =
397 Invert<CharOffsetIterator<'self>>;
399 /// External iterator for a string's characters.
400 /// Use with the `std::iterator` module.
401 pub type CharIterator<'self> =
402 Map<'self, (uint, char), char, CharOffsetIterator<'self>>;
404 /// External iterator for a string's characters in reverse order.
405 /// Use with the `std::iterator` module.
406 pub type CharRevIterator<'self> =
407 Invert<Map<'self, (uint, char), char, CharOffsetIterator<'self>>>;
409 /// External iterator for a string's bytes.
410 /// Use with the `std::iterator` module.
411 pub type ByteIterator<'self> =
412 Map<'self, &'self u8, u8, vec::VecIterator<'self, u8>>;
414 /// External iterator for a string's bytes in reverse order.
415 /// Use with the `std::iterator` module.
416 pub type ByteRevIterator<'self> =
417 Invert<Map<'self, &'self u8, u8, vec::VecIterator<'self, u8>>>;
419 /// An iterator over the substrings of a string, separated by `sep`.
421 pub struct CharSplitIterator<'self,Sep> {
422 priv string: &'self str,
425 /// The number of splits remaining
427 /// Whether an empty string at the end is allowed
428 priv allow_trailing_empty: bool,
430 priv only_ascii: bool
433 /// An iterator over the words of a string, separated by an sequence of whitespace
434 pub type WordIterator<'self> =
435 Filter<'self, &'self str, CharSplitIterator<'self, extern "Rust" fn(char) -> bool>>;
437 /// An iterator over the lines of a string, separated by either `\n` or (`\r\n`).
438 pub type AnyLineIterator<'self> =
439 Map<'self, &'self str, &'self str, CharSplitIterator<'self, char>>;
441 impl<'self, Sep: CharEq> Iterator<&'self str> for CharSplitIterator<'self, Sep> {
443 fn next(&mut self) -> Option<&'self str> {
444 if self.finished { return None }
446 let l = self.string.len();
447 let start = self.position;
450 // this gives a *huge* speed up for splitting on ASCII
451 // characters (e.g. '\n' or ' ')
452 while self.position < l && self.count > 0 {
453 let byte = self.string[self.position];
455 if self.sep.matches(byte as char) {
456 let slice = unsafe { raw::slice_bytes(self.string, start, self.position) };
464 while self.position < l && self.count > 0 {
465 let CharRange {ch, next} = self.string.char_range_at(self.position);
467 if self.sep.matches(ch) {
468 let slice = unsafe { raw::slice_bytes(self.string, start, self.position) };
469 self.position = next;
473 self.position = next;
476 self.finished = true;
477 if self.allow_trailing_empty || start < l {
478 Some(unsafe { raw::slice_bytes(self.string, start, l) })
485 /// An iterator over the start and end indicies of the matches of a
486 /// substring within a larger string
488 pub struct MatchesIndexIterator<'self> {
489 priv haystack: &'self str,
490 priv needle: &'self str,
494 /// An iterator over the substrings of a string separated by a given
497 pub struct StrSplitIterator<'self> {
498 priv it: MatchesIndexIterator<'self>,
503 impl<'self> Iterator<(uint, uint)> for MatchesIndexIterator<'self> {
505 fn next(&mut self) -> Option<(uint, uint)> {
506 // See Issue #1932 for why this is a naive search
507 let (h_len, n_len) = (self.haystack.len(), self.needle.len());
508 let mut match_start = 0;
511 while self.position < h_len {
512 if self.haystack[self.position] == self.needle[match_i] {
513 if match_i == 0 { match_start = self.position; }
517 if match_i == n_len {
519 return Some((match_start, self.position));
522 // failed match, backtrack
525 self.position = match_start;
534 impl<'self> Iterator<&'self str> for StrSplitIterator<'self> {
536 fn next(&mut self) -> Option<&'self str> {
537 if self.finished { return None; }
539 match self.it.next() {
540 Some((from, to)) => {
541 let ret = Some(self.it.haystack.slice(self.last_end, from));
546 self.finished = true;
547 Some(self.it.haystack.slice(self.last_end, self.it.haystack.len()))
553 /// Replace all occurrences of one string with another
557 /// * s - The string containing substrings to replace
558 /// * from - The string to replace
559 /// * to - The replacement string
563 /// The original string with all occurances of `from` replaced with `to`
564 pub fn replace(s: &str, from: &str, to: &str) -> ~str {
565 let mut result = ~"";
566 let mut last_end = 0;
567 for (start, end) in s.matches_index_iter(from) {
568 result.push_str(unsafe{raw::slice_bytes(s, last_end, start)});
572 result.push_str(unsafe{raw::slice_bytes(s, last_end, s.len())});
577 Section: Comparing strings
580 /// Bytewise slice equality
581 #[cfg(not(test), stage0)]
584 pub fn eq_slice(a: &str, b: &str) -> bool {
585 do a.as_imm_buf |ap, alen| {
586 do b.as_imm_buf |bp, blen| {
587 if (alen != blen) { false }
590 libc::memcmp(ap as *libc::c_void,
592 (alen - 1) as libc::size_t) == 0
599 /// Bytewise slice equality
600 #[cfg(not(test), not(stage0))]
603 pub fn eq_slice(a: &str, b: &str) -> bool {
604 do a.as_imm_buf |ap, alen| {
605 do b.as_imm_buf |bp, blen| {
606 if (alen != blen) { false }
609 libc::memcmp(ap as *libc::c_void,
611 alen as libc::size_t) == 0
618 /// Bytewise slice equality
622 pub fn eq_slice(a: &str, b: &str) -> bool {
623 do a.as_imm_buf |ap, alen| {
624 do b.as_imm_buf |bp, blen| {
625 if (alen != blen) { false }
628 libc::memcmp(ap as *libc::c_void,
630 (alen - 1) as libc::size_t) == 0
637 /// Bytewise slice equality
638 #[cfg(test, not(stage0))]
640 pub fn eq_slice(a: &str, b: &str) -> bool {
641 do a.as_imm_buf |ap, alen| {
642 do b.as_imm_buf |bp, blen| {
643 if (alen != blen) { false }
646 libc::memcmp(ap as *libc::c_void,
648 alen as libc::size_t) == 0
655 /// Bytewise string equality
657 #[lang="uniq_str_eq"]
659 pub fn eq(a: &~str, b: &~str) -> bool {
665 pub fn eq(a: &~str, b: &~str) -> bool {
673 // Utility used by various searching functions
674 fn match_at<'a,'b>(haystack: &'a str, needle: &'b str, at: uint) -> bool {
676 for c in needle.byte_iter() { if haystack[i] != c { return false; } i += 1u; }
684 /// Determines if a vector of bytes contains valid UTF-8
685 pub fn is_utf8(v: &[u8]) -> bool {
688 fn unsafe_get(xs: &[u8], i: uint) -> u8 {
689 unsafe { *xs.unsafe_ref(i) }
692 let v_i = unsafe_get(v, i);
696 let w = utf8_char_width(v_i);
697 if w == 0u { return false; }
700 if nexti > total { return false; }
702 // 2-byte encoding is for codepoints \u0080 to \u07ff
703 // first C2 80 last DF BF
704 // 3-byte encoding is for codepoints \u0800 to \uffff
705 // first E0 A0 80 last EF BF BF
706 // 4-byte encoding is for codepoints \u10000 to \u10ffff
707 // first F0 90 80 80 last F4 8F BF BF
709 // Use the UTF-8 syntax from the RFC
711 // https://tools.ietf.org/html/rfc3629
713 // UTF8-2 = %xC2-DF UTF8-tail
714 // UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
715 // %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
716 // UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
717 // %xF4 %x80-8F 2( UTF8-tail )
718 // UTF8-tail = %x80-BF
720 // This code allows surrogate pairs: \uD800 to \uDFFF -> ED A0 80 to ED BF BF
722 2 => if unsafe_get(v, i + 1) & 192u8 != TAG_CONT_U8 {
726 unsafe_get(v, i + 1),
727 unsafe_get(v, i + 2) & 192u8) {
728 (0xE0 , 0xA0 .. 0xBF, TAG_CONT_U8) => (),
729 (0xE1 .. 0xEF, 0x80 .. 0xBF, TAG_CONT_U8) => (),
733 unsafe_get(v, i + 1),
734 unsafe_get(v, i + 2) & 192u8,
735 unsafe_get(v, i + 3) & 192u8) {
736 (0xF0 , 0x90 .. 0xBF, TAG_CONT_U8, TAG_CONT_U8) => (),
737 (0xF1 .. 0xF3, 0x80 .. 0xBF, TAG_CONT_U8, TAG_CONT_U8) => (),
738 (0xF4 , 0x80 .. 0x8F, TAG_CONT_U8, TAG_CONT_U8) => (),
749 /// Determines if a vector of `u16` contains valid UTF-16
750 pub fn is_utf16(v: &[u16]) -> bool {
756 if u <= 0xD7FF_u16 || u >= 0xE000_u16 {
760 if i+1u < len { return false; }
762 if u < 0xD7FF_u16 || u > 0xDBFF_u16 { return false; }
763 if u2 < 0xDC00_u16 || u2 > 0xDFFF_u16 { return false; }
770 /// Iterates over the utf-16 characters in the specified slice, yielding each
771 /// decoded unicode character to the function provided.
775 /// * Fails on invalid utf-16 data
776 pub fn utf16_chars(v: &[u16], f: &fn(char)) {
779 while (i < len && v[i] != 0u16) {
782 if u <= 0xD7FF_u16 || u >= 0xE000_u16 {
788 assert!(u >= 0xD800_u16 && u <= 0xDBFF_u16);
789 assert!(u2 >= 0xDC00_u16 && u2 <= 0xDFFF_u16);
790 let mut c = (u - 0xD800_u16) as char;
792 c |= (u2 - 0xDC00_u16) as char;
793 c |= 0x1_0000_u32 as char;
800 /// Allocates a new string from the utf-16 slice provided
801 pub fn from_utf16(v: &[u16]) -> ~str {
803 buf.reserve(v.len());
804 utf16_chars(v, |ch| buf.push_char(ch));
808 /// Allocates a new string with the specified capacity. The string returned is
809 /// the empty string, but has capacity for much more.
811 pub fn with_capacity(capacity: uint) -> ~str {
813 buf.reserve(capacity);
817 /// As char_len but for a slice of a string
821 /// * s - A valid string
822 /// * start - The position inside `s` where to start counting in bytes
823 /// * end - The position where to stop counting
827 /// The number of Unicode characters in `s` between the given indices.
828 pub fn count_chars(s: &str, start: uint, end: uint) -> uint {
829 assert!(s.is_char_boundary(start));
830 assert!(s.is_char_boundary(end));
834 let next = s.char_range_at(i).next;
841 /// Counts the number of bytes taken by the first `n` chars in `s`
842 /// starting from `start`.
843 pub fn count_bytes<'b>(s: &'b str, start: uint, n: uint) -> uint {
844 assert!(s.is_char_boundary(start));
850 let next = s.char_range_at(end).next;
857 // https://tools.ietf.org/html/rfc3629
858 priv static UTF8_CHAR_WIDTH: [u8, ..256] = [
859 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
860 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x1F
861 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
862 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x3F
863 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
864 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x5F
865 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
866 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x7F
867 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
868 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x9F
869 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
870 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xBF
871 0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
872 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xDF
873 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, // 0xEF
874 4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // 0xFF
877 /// Given a first byte, determine how many bytes are in this UTF-8 character
878 pub fn utf8_char_width(b: u8) -> uint {
879 return UTF8_CHAR_WIDTH[b] as uint;
882 #[allow(missing_doc)]
883 pub struct CharRange {
888 // Return the initial codepoint accumulator for the first byte.
889 // The first byte is special, only want bottom 5 bits for width 2, 4 bits
890 // for width 3, and 3 bits for width 4
891 macro_rules! utf8_first_byte(
892 ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as uint)
895 // return the value of $ch updated with continuation byte $byte
896 macro_rules! utf8_acc_cont_byte(
897 ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as uint)
900 // UTF-8 tags and ranges
901 priv static TAG_CONT_U8: u8 = 128u8;
902 priv static TAG_CONT: uint = 128u;
903 priv static MAX_ONE_B: uint = 128u;
904 priv static TAG_TWO_B: uint = 192u;
905 priv static MAX_TWO_B: uint = 2048u;
906 priv static TAG_THREE_B: uint = 224u;
907 priv static MAX_THREE_B: uint = 65536u;
908 priv static TAG_FOUR_B: uint = 240u;
909 priv static MAX_UNICODE: uint = 1114112u;
911 /// Unsafe operations
919 use vec::MutableVector;
920 use unstable::raw::Slice;
922 use unstable::raw::String;
924 /// Create a Rust string from a *u8 buffer of the given length
926 pub unsafe fn from_buf_len(buf: *u8, len: uint) -> ~str {
927 let mut v: ~[u8] = vec::with_capacity(len + 1);
928 v.as_mut_buf(|vbuf, _len| {
929 ptr::copy_memory(vbuf, buf as *u8, len)
931 vec::raw::set_len(&mut v, len);
938 /// Create a Rust string from a *u8 buffer of the given length
940 pub unsafe fn from_buf_len(buf: *u8, len: uint) -> ~str {
941 let mut v: ~[u8] = vec::with_capacity(len);
942 do v.as_mut_buf |vbuf, _len| {
943 ptr::copy_memory(vbuf, buf as *u8, len)
945 vec::raw::set_len(&mut v, len);
951 /// Create a Rust string from a null-terminated C string
952 pub unsafe fn from_c_str(buf: *libc::c_char) -> ~str {
957 curr = ptr::offset(buf, i);
959 from_buf_len(buf as *u8, i as uint)
962 /// Converts a vector of bytes to a new owned string.
963 pub unsafe fn from_bytes(v: &[u8]) -> ~str {
964 do v.as_imm_buf |buf, len| {
965 from_buf_len(buf, len)
969 /// Converts an owned vector of bytes to a new owned string. This assumes
970 /// that the utf-8-ness of the vector has already been validated
972 pub unsafe fn from_bytes_owned(mut v: ~[u8]) -> ~str {
977 /// Converts an owned vector of bytes to a new owned string. This assumes
978 /// that the utf-8-ness of the vector has already been validated
981 pub unsafe fn from_bytes_owned(v: ~[u8]) -> ~str {
985 /// Converts a byte to a string.
986 pub unsafe fn from_byte(u: u8) -> ~str { from_bytes([u]) }
988 /// Form a slice from a C string. Unsafe because the caller must ensure the
989 /// C string has the static lifetime, or else the return value may be
990 /// invalidated later.
992 pub unsafe fn c_str_to_static_slice(s: *libc::c_char) -> &'static str {
998 curr = ptr::offset(s, len as int);
1000 let v = Slice { data: s, len: len + 1 };
1001 assert!(is_utf8(cast::transmute(v)));
1005 /// Form a slice from a C string. Unsafe because the caller must ensure the
1006 /// C string has the static lifetime, or else the return value may be
1007 /// invalidated later.
1009 pub unsafe fn c_str_to_static_slice(s: *libc::c_char) -> &'static str {
1013 while *curr != 0u8 {
1015 curr = ptr::offset(s, len as int);
1017 let v = Slice { data: s, len: len };
1018 assert!(is_utf8(::cast::transmute(v)));
1019 ::cast::transmute(v)
1022 /// Takes a bytewise (not UTF-8) slice from a string.
1024 /// Returns the substring from [`begin`..`end`).
1028 /// If begin is greater than end.
1029 /// If end is greater than the length of the string.
1032 pub unsafe fn slice_bytes(s: &str, begin: uint, end: uint) -> &str {
1033 do s.as_imm_buf |sbuf, n| {
1034 assert!((begin <= end));
1035 assert!((end <= n));
1037 cast::transmute(Slice {
1038 data: ptr::offset(sbuf, begin as int),
1039 len: end - begin + 1,
1044 /// Takes a bytewise (not UTF-8) slice from a string.
1046 /// Returns the substring from [`begin`..`end`).
1050 /// If begin is greater than end.
1051 /// If end is greater than the length of the string.
1054 pub unsafe fn slice_bytes(s: &str, begin: uint, end: uint) -> &str {
1055 do s.as_imm_buf |sbuf, n| {
1056 assert!((begin <= end));
1057 assert!((end <= n));
1059 cast::transmute(Slice {
1060 data: ptr::offset(sbuf, begin as int),
1066 /// Appends a byte to a string. (Not UTF-8 safe).
1067 pub unsafe fn push_byte(s: &mut ~str, b: u8) {
1068 let new_len = s.len() + 1;
1069 s.reserve_at_least(new_len);
1070 do s.as_mut_buf |buf, len| {
1071 *ptr::mut_offset(buf, len as int) = b;
1073 set_len(&mut *s, new_len);
1076 /// Appends a vector of bytes to a string. (Not UTF-8 safe).
1077 unsafe fn push_bytes(s: &mut ~str, bytes: &[u8]) {
1078 let new_len = s.len() + bytes.len();
1079 s.reserve_at_least(new_len);
1080 for byte in bytes.iter() { push_byte(&mut *s, *byte); }
1083 /// Removes the last byte from a string and returns it. (Not UTF-8 safe).
1084 pub unsafe fn pop_byte(s: &mut ~str) -> u8 {
1086 assert!((len > 0u));
1087 let b = s[len - 1u];
1088 set_len(s, len - 1u);
1092 /// Removes the first byte from a string and returns it. (Not UTF-8 safe).
1093 pub unsafe fn shift_byte(s: &mut ~str) -> u8 {
1095 assert!((len > 0u));
1097 *s = s.slice(1, len).to_owned();
1101 /// Sets the length of the string and adds the null terminator
1104 pub unsafe fn set_len(v: &mut ~str, new_len: uint) {
1105 let v: **mut String = cast::transmute(v);
1107 (*repr).fill = new_len + 1u;
1108 let null = ptr::mut_offset(&mut ((*repr).data), new_len as int);
1112 /// Sets the length of a string
1114 /// This will explicitly set the size of the string, without actually
1115 /// modifing its buffers, so it is up to the caller to ensure that
1116 /// the string is actually the specified size.
1119 pub unsafe fn set_len(s: &mut ~str, new_len: uint) {
1120 let v: &mut ~[u8] = cast::transmute(s);
1121 vec::raw::set_len(v, new_len)
1124 /// Sets the length of a string
1126 /// This will explicitly set the size of the string, without actually
1127 /// modifing its buffers, so it is up to the caller to ensure that
1128 /// the string is actually the specified size.
1130 fn test_from_buf_len() {
1132 let a = ~[65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8];
1133 let b = vec::raw::to_ptr(a);
1134 let c = from_buf_len(b, 3u);
1135 assert_eq!(c, ~"AAA");
1142 Section: Trait implementations
1148 use cmp::{TotalOrd, Ordering, Less, Equal, Greater, Eq, Ord, Equiv, TotalEq};
1149 use super::{Str, eq_slice};
1150 use option::{Some, None};
1152 impl<'self> Add<&'self str,~str> for &'self str {
1154 fn add(&self, rhs: & &'self str) -> ~str {
1155 let mut ret = self.to_owned();
1161 impl<'self> TotalOrd for &'self str {
1163 fn cmp(&self, other: & &'self str) -> Ordering {
1164 for (s_b, o_b) in self.byte_iter().zip(other.byte_iter()) {
1165 match s_b.cmp(&o_b) {
1166 Greater => return Greater,
1167 Less => return Less,
1172 self.len().cmp(&other.len())
1176 impl TotalOrd for ~str {
1178 fn cmp(&self, other: &~str) -> Ordering { self.as_slice().cmp(&other.as_slice()) }
1181 impl TotalOrd for @str {
1183 fn cmp(&self, other: &@str) -> Ordering { self.as_slice().cmp(&other.as_slice()) }
1186 impl<'self> Eq for &'self str {
1188 fn eq(&self, other: & &'self str) -> bool {
1189 eq_slice((*self), (*other))
1192 fn ne(&self, other: & &'self str) -> bool { !(*self).eq(other) }
1197 fn eq(&self, other: &~str) -> bool {
1198 eq_slice((*self), (*other))
1201 fn ne(&self, other: &~str) -> bool { !(*self).eq(other) }
1206 fn eq(&self, other: &@str) -> bool {
1207 eq_slice((*self), (*other))
1210 fn ne(&self, other: &@str) -> bool { !(*self).eq(other) }
1213 impl<'self> TotalEq for &'self str {
1215 fn equals(&self, other: & &'self str) -> bool {
1216 eq_slice((*self), (*other))
1220 impl TotalEq for ~str {
1222 fn equals(&self, other: &~str) -> bool {
1223 eq_slice((*self), (*other))
1227 impl TotalEq for @str {
1229 fn equals(&self, other: &@str) -> bool {
1230 eq_slice((*self), (*other))
1234 impl<'self> Ord for &'self str {
1236 fn lt(&self, other: & &'self str) -> bool { self.cmp(other) == Less }
1238 fn le(&self, other: & &'self str) -> bool { self.cmp(other) != Greater }
1240 fn ge(&self, other: & &'self str) -> bool { self.cmp(other) != Less }
1242 fn gt(&self, other: & &'self str) -> bool { self.cmp(other) == Greater }
1247 fn lt(&self, other: &~str) -> bool { self.cmp(other) == Less }
1249 fn le(&self, other: &~str) -> bool { self.cmp(other) != Greater }
1251 fn ge(&self, other: &~str) -> bool { self.cmp(other) != Less }
1253 fn gt(&self, other: &~str) -> bool { self.cmp(other) == Greater }
1258 fn lt(&self, other: &@str) -> bool { self.cmp(other) == Less }
1260 fn le(&self, other: &@str) -> bool { self.cmp(other) != Greater }
1262 fn ge(&self, other: &@str) -> bool { self.cmp(other) != Less }
1264 fn gt(&self, other: &@str) -> bool { self.cmp(other) == Greater }
1267 impl<'self, S: Str> Equiv<S> for &'self str {
1269 fn equiv(&self, other: &S) -> bool { eq_slice(*self, other.as_slice()) }
1272 impl<'self, S: Str> Equiv<S> for @str {
1274 fn equiv(&self, other: &S) -> bool { eq_slice(*self, other.as_slice()) }
1277 impl<'self, S: Str> Equiv<S> for ~str {
1279 fn equiv(&self, other: &S) -> bool { eq_slice(*self, other.as_slice()) }
1286 /// Any string that can be represented as a slice
1288 /// Work with `self` as a slice.
1289 fn as_slice<'a>(&'a self) -> &'a str;
1291 /// Convert `self` into a ~str.
1292 fn into_owned(self) -> ~str;
1295 impl<'self> Str for &'self str {
1297 fn as_slice<'a>(&'a self) -> &'a str { *self }
1300 fn into_owned(self) -> ~str { self.to_owned() }
1303 impl<'self> Str for ~str {
1305 fn as_slice<'a>(&'a self) -> &'a str {
1306 let s: &'a str = *self; s
1310 fn into_owned(self) -> ~str { self }
1313 impl<'self> Str for @str {
1315 fn as_slice<'a>(&'a self) -> &'a str {
1316 let s: &'a str = *self; s
1320 fn into_owned(self) -> ~str { self.to_owned() }
1323 impl<'self> Container for &'self str {
1326 fn len(&self) -> uint {
1327 do self.as_imm_buf |_p, n| { n - 1u }
1332 fn len(&self) -> uint {
1333 do self.as_imm_buf |_p, n| { n }
1337 impl Container for ~str {
1339 fn len(&self) -> uint { self.as_slice().len() }
1342 impl Container for @str {
1344 fn len(&self) -> uint { self.as_slice().len() }
1347 impl Mutable for ~str {
1348 /// Remove all content, make the string empty
1350 fn clear(&mut self) {
1352 raw::set_len(self, 0)
1357 #[allow(missing_doc)]
1358 pub trait StrSlice<'self> {
1359 fn contains<'a>(&self, needle: &'a str) -> bool;
1360 fn contains_char(&self, needle: char) -> bool;
1361 fn iter(&self) -> CharIterator<'self>;
1362 fn rev_iter(&self) -> CharRevIterator<'self>;
1363 fn byte_iter(&self) -> ByteIterator<'self>;
1364 fn byte_rev_iter(&self) -> ByteRevIterator<'self>;
1365 fn char_offset_iter(&self) -> CharOffsetIterator<'self>;
1366 fn char_offset_rev_iter(&self) -> CharOffsetRevIterator<'self>;
1367 fn split_iter<Sep: CharEq>(&self, sep: Sep) -> CharSplitIterator<'self, Sep>;
1368 fn splitn_iter<Sep: CharEq>(&self, sep: Sep, count: uint) -> CharSplitIterator<'self, Sep>;
1369 fn split_options_iter<Sep: CharEq>(&self, sep: Sep, count: uint, allow_trailing_empty: bool)
1370 -> CharSplitIterator<'self, Sep>;
1371 fn matches_index_iter(&self, sep: &'self str) -> MatchesIndexIterator<'self>;
1372 fn split_str_iter(&self, &'self str) -> StrSplitIterator<'self>;
1373 fn line_iter(&self) -> CharSplitIterator<'self, char>;
1374 fn any_line_iter(&self) -> AnyLineIterator<'self>;
1375 fn word_iter(&self) -> WordIterator<'self>;
1376 fn ends_with(&self, needle: &str) -> bool;
1377 fn is_whitespace(&self) -> bool;
1378 fn is_alphanumeric(&self) -> bool;
1379 fn char_len(&self) -> uint;
1381 fn slice(&self, begin: uint, end: uint) -> &'self str;
1382 fn slice_from(&self, begin: uint) -> &'self str;
1383 fn slice_to(&self, end: uint) -> &'self str;
1385 fn slice_chars(&self, begin: uint, end: uint) -> &'self str;
1387 fn starts_with(&self, needle: &str) -> bool;
1388 fn escape_default(&self) -> ~str;
1389 fn escape_unicode(&self) -> ~str;
1390 fn trim(&self) -> &'self str;
1391 fn trim_left(&self) -> &'self str;
1392 fn trim_right(&self) -> &'self str;
1393 fn trim_chars<C: CharEq>(&self, to_trim: &C) -> &'self str;
1394 fn trim_left_chars<C: CharEq>(&self, to_trim: &C) -> &'self str;
1395 fn trim_right_chars<C: CharEq>(&self, to_trim: &C) -> &'self str;
1396 fn replace(&self, from: &str, to: &str) -> ~str;
1397 fn to_owned(&self) -> ~str;
1398 fn to_managed(&self) -> @str;
1399 fn to_utf16(&self) -> ~[u16];
1400 fn is_char_boundary(&self, index: uint) -> bool;
1401 fn char_range_at(&self, start: uint) -> CharRange;
1402 fn char_at(&self, i: uint) -> char;
1403 fn char_range_at_reverse(&self, start: uint) -> CharRange;
1404 fn char_at_reverse(&self, i: uint) -> char;
1405 fn as_bytes(&self) -> &'self [u8];
1407 fn find<C: CharEq>(&self, search: C) -> Option<uint>;
1408 fn rfind<C: CharEq>(&self, search: C) -> Option<uint>;
1409 fn find_str(&self, &str) -> Option<uint>;
1411 fn repeat(&self, nn: uint) -> ~str;
1413 fn slice_shift_char(&self) -> (char, &'self str);
1415 fn map_chars(&self, ff: &fn(char) -> char) -> ~str;
1417 fn lev_distance(&self, t: &str) -> uint;
1419 fn subslice_offset(&self, inner: &str) -> uint;
1421 fn as_imm_buf<T>(&self, f: &fn(*u8, uint) -> T) -> T;
1424 /// Extension methods for strings
1425 impl<'self> StrSlice<'self> for &'self str {
1426 /// Returns true if one string contains another
1430 /// * needle - The string to look for
1432 fn contains<'a>(&self, needle: &'a str) -> bool {
1433 self.find_str(needle).is_some()
1436 /// Returns true if a string contains a char.
1440 /// * needle - The char to look for
1442 fn contains_char(&self, needle: char) -> bool {
1443 self.find(needle).is_some()
1446 /// An iterator over the characters of `self`. Note, this iterates
1447 /// over unicode code-points, not unicode graphemes.
1452 /// let v: ~[char] = "abc åäö".iter().collect();
1453 /// assert_eq!(v, ~['a', 'b', 'c', ' ', 'å', 'ä', 'ö']);
1456 fn iter(&self) -> CharIterator<'self> {
1457 self.char_offset_iter().transform(|(_, c)| c)
1460 /// An iterator over the characters of `self`, in reverse order.
1462 fn rev_iter(&self) -> CharRevIterator<'self> {
1463 self.iter().invert()
1466 /// An iterator over the bytes of `self`
1468 fn byte_iter(&self) -> ByteIterator<'self> {
1469 self.as_bytes().iter().transform(|&b| b)
1472 /// An iterator over the bytes of `self`, in reverse order
1474 fn byte_rev_iter(&self) -> ByteRevIterator<'self> {
1475 self.byte_iter().invert()
1478 /// An iterator over the characters of `self` and their byte offsets.
1480 fn char_offset_iter(&self) -> CharOffsetIterator<'self> {
1481 CharOffsetIterator {
1483 index_back: self.len(),
1488 /// An iterator over the characters of `self` and their byte offsets.
1490 fn char_offset_rev_iter(&self) -> CharOffsetRevIterator<'self> {
1491 self.char_offset_iter().invert()
1494 /// An iterator over substrings of `self`, separated by characters
1495 /// matched by `sep`.
1500 /// let v: ~[&str] = "Mary had a little lamb".split_iter(' ').collect();
1501 /// assert_eq!(v, ~["Mary", "had", "a", "little", "lamb"]);
1503 /// let v: ~[&str] = "abc1def2ghi".split_iter(|c: char| c.is_digit()).collect();
1504 /// assert_eq!(v, ~["abc", "def", "ghi"]);
1507 fn split_iter<Sep: CharEq>(&self, sep: Sep) -> CharSplitIterator<'self, Sep> {
1508 self.split_options_iter(sep, self.len(), true)
1511 /// An iterator over substrings of `self`, separated by characters
1512 /// matched by `sep`, restricted to splitting at most `count`
1515 fn splitn_iter<Sep: CharEq>(&self, sep: Sep, count: uint) -> CharSplitIterator<'self, Sep> {
1516 self.split_options_iter(sep, count, true)
1519 /// An iterator over substrings of `self`, separated by characters
1520 /// matched by `sep`, splitting at most `count` times, and
1521 /// possibly not including the trailing empty substring, if it
1524 fn split_options_iter<Sep: CharEq>(&self, sep: Sep, count: uint, allow_trailing_empty: bool)
1525 -> CharSplitIterator<'self, Sep> {
1526 let only_ascii = sep.only_ascii();
1532 allow_trailing_empty: allow_trailing_empty,
1534 only_ascii: only_ascii
1538 /// An iterator over the start and end indices of each match of
1539 /// `sep` within `self`.
1541 fn matches_index_iter(&self, sep: &'self str) -> MatchesIndexIterator<'self> {
1542 assert!(!sep.is_empty())
1543 MatchesIndexIterator {
1550 /// An iterator over the substrings of `self` separated by `sep`.
1555 /// let v: ~[&str] = "abcXXXabcYYYabc".split_str_iter("abc").collect()
1556 /// assert_eq!(v, ["", "XXX", "YYY", ""]);
1559 fn split_str_iter(&self, sep: &'self str) -> StrSplitIterator<'self> {
1561 it: self.matches_index_iter(sep),
1567 /// An iterator over the lines of a string (subsequences separated
1570 fn line_iter(&self) -> CharSplitIterator<'self, char> {
1571 self.split_options_iter('\n', self.len(), false)
1574 /// An iterator over the lines of a string, separated by either
1575 /// `\n` or (`\r\n`).
1576 fn any_line_iter(&self) -> AnyLineIterator<'self> {
1577 do self.line_iter().transform |line| {
1579 if l > 0 && line[l - 1] == '\r' as u8 { line.slice(0, l - 1) }
1584 /// An iterator over the words of a string (subsequences separated
1585 /// by any sequence of whitespace).
1587 fn word_iter(&self) -> WordIterator<'self> {
1588 self.split_iter(char::is_whitespace).filter(|s| !s.is_empty())
1591 /// Returns true if the string contains only whitespace
1593 /// Whitespace characters are determined by `char::is_whitespace`
1595 fn is_whitespace(&self) -> bool { self.iter().all(char::is_whitespace) }
1597 /// Returns true if the string contains only alphanumerics
1599 /// Alphanumeric characters are determined by `char::is_alphanumeric`
1601 fn is_alphanumeric(&self) -> bool { self.iter().all(char::is_alphanumeric) }
1603 /// Returns the number of characters that a string holds
1605 fn char_len(&self) -> uint { self.iter().len_() }
1607 /// Returns a slice of the given string from the byte range
1608 /// [`begin`..`end`)
1610 /// Fails when `begin` and `end` do not point to valid characters or
1611 /// beyond the last character of the string
1613 fn slice(&self, begin: uint, end: uint) -> &'self str {
1614 assert!(self.is_char_boundary(begin));
1615 assert!(self.is_char_boundary(end));
1616 unsafe { raw::slice_bytes(*self, begin, end) }
1619 /// Returns a slice of the string from `begin` to its end.
1621 /// Fails when `begin` does not point to a valid character, or is
1624 fn slice_from(&self, begin: uint) -> &'self str {
1625 self.slice(begin, self.len())
1628 /// Returns a slice of the string from the beginning to byte
1631 /// Fails when `end` does not point to a valid character, or is
1634 fn slice_to(&self, end: uint) -> &'self str {
1638 /// Returns a slice of the string from the char range
1639 /// [`begin`..`end`).
1641 /// Fails if `begin` > `end` or the either `begin` or `end` are
1642 /// beyond the last character of the string.
1643 fn slice_chars(&self, begin: uint, end: uint) -> &'self str {
1644 assert!(begin <= end);
1645 // not sure how to use the iterators for this nicely.
1646 let mut position = 0;
1649 while count < begin && position < l {
1650 position = self.char_range_at(position).next;
1653 if count < begin { fail!("Attempted to begin slice_chars beyond end of string") }
1654 let start_byte = position;
1655 while count < end && position < l {
1656 position = self.char_range_at(position).next;
1659 if count < end { fail!("Attempted to end slice_chars beyond end of string") }
1661 self.slice(start_byte, position)
1664 /// Returns true if `needle` is a prefix of the string.
1665 fn starts_with<'a>(&self, needle: &'a str) -> bool {
1666 let (self_len, needle_len) = (self.len(), needle.len());
1667 if needle_len == 0u { true }
1668 else if needle_len > self_len { false }
1669 else { match_at(*self, needle, 0u) }
1672 /// Returns true if `needle` is a suffix of the string.
1673 fn ends_with(&self, needle: &str) -> bool {
1674 let (self_len, needle_len) = (self.len(), needle.len());
1675 if needle_len == 0u { true }
1676 else if needle_len > self_len { false }
1677 else { match_at(*self, needle, self_len - needle_len) }
1680 /// Escape each char in `s` with char::escape_default.
1681 fn escape_default(&self) -> ~str {
1682 let mut out: ~str = ~"";
1683 out.reserve_at_least(self.len());
1684 for c in self.iter() {
1685 do c.escape_default |c| {
1692 /// Escape each char in `s` with char::escape_unicode.
1693 fn escape_unicode(&self) -> ~str {
1694 let mut out: ~str = ~"";
1695 out.reserve_at_least(self.len());
1696 for c in self.iter() {
1697 do c.escape_unicode |c| {
1704 /// Returns a string with leading and trailing whitespace removed
1706 fn trim(&self) -> &'self str {
1707 self.trim_left().trim_right()
1710 /// Returns a string with leading whitespace removed
1712 fn trim_left(&self) -> &'self str {
1713 self.trim_left_chars(&char::is_whitespace)
1716 /// Returns a string with trailing whitespace removed
1718 fn trim_right(&self) -> &'self str {
1719 self.trim_right_chars(&char::is_whitespace)
1722 /// Returns a string with characters that match `to_trim` removed.
1726 /// * to_trim - a character matcher
1731 /// assert_eq!("11foo1bar11".trim_chars(&'1'), "foo1bar")
1732 /// assert_eq!("12foo1bar12".trim_chars(& &['1', '2']), "foo1bar")
1733 /// assert_eq!("123foo1bar123".trim_chars(&|c: char| c.is_digit()), "foo1bar")
1736 fn trim_chars<C: CharEq>(&self, to_trim: &C) -> &'self str {
1737 self.trim_left_chars(to_trim).trim_right_chars(to_trim)
1740 /// Returns a string with leading `chars_to_trim` removed.
1744 /// * to_trim - a character matcher
1749 /// assert_eq!("11foo1bar11".trim_left_chars(&'1'), "foo1bar11")
1750 /// assert_eq!("12foo1bar12".trim_left_chars(& &['1', '2']), "foo1bar12")
1751 /// assert_eq!("123foo1bar123".trim_left_chars(&|c: char| c.is_digit()), "foo1bar123")
1754 fn trim_left_chars<C: CharEq>(&self, to_trim: &C) -> &'self str {
1755 match self.find(|c: char| !to_trim.matches(c)) {
1757 Some(first) => unsafe { raw::slice_bytes(*self, first, self.len()) }
1761 /// Returns a string with trailing `chars_to_trim` removed.
1765 /// * to_trim - a character matcher
1770 /// assert_eq!("11foo1bar11".trim_right_chars(&'1'), "11foo1bar")
1771 /// assert_eq!("12foo1bar12".trim_right_chars(& &['1', '2']), "12foo1bar")
1772 /// assert_eq!("123foo1bar123".trim_right_chars(&|c: char| c.is_digit()), "123foo1bar")
1775 fn trim_right_chars<C: CharEq>(&self, to_trim: &C) -> &'self str {
1776 match self.rfind(|c: char| !to_trim.matches(c)) {
1779 let next = self.char_range_at(last).next;
1780 unsafe { raw::slice_bytes(*self, 0u, next) }
1785 /// Replace all occurrences of one string with another
1789 /// * from - The string to replace
1790 /// * to - The replacement string
1794 /// The original string with all occurances of `from` replaced with `to`
1795 pub fn replace(&self, from: &str, to: &str) -> ~str {
1796 let mut result = ~"";
1797 let mut last_end = 0;
1798 for (start, end) in self.matches_index_iter(from) {
1799 result.push_str(unsafe{raw::slice_bytes(*self, last_end, start)});
1800 result.push_str(to);
1803 result.push_str(unsafe{raw::slice_bytes(*self, last_end, self.len())});
1807 /// Copy a slice into a new unique str
1810 fn to_owned(&self) -> ~str {
1811 do self.as_imm_buf |src, len| {
1814 let mut v = vec::with_capacity(len);
1816 do v.as_mut_buf |dst, _| {
1817 ptr::copy_memory(dst, src, len - 1);
1819 vec::raw::set_len(&mut v, len - 1);
1821 ::cast::transmute(v)
1826 /// Copy a slice into a new unique str
1829 fn to_owned(&self) -> ~str {
1830 do self.as_imm_buf |src, len| {
1832 let mut v = vec::with_capacity(len);
1834 do v.as_mut_buf |dst, _| {
1835 ptr::copy_memory(dst, src, len);
1837 vec::raw::set_len(&mut v, len);
1838 ::cast::transmute(v)
1845 fn to_managed(&self) -> @str {
1846 let v = at_vec::from_fn(self.len() + 1, |i| {
1847 if i == self.len() { 0 } else { self[i] }
1849 unsafe { cast::transmute(v) }
1854 fn to_managed(&self) -> @str {
1856 let v: *&[u8] = cast::transmute(self);
1857 cast::transmute(at_vec::to_managed(*v))
1861 /// Converts to a vector of `u16` encoded as UTF-16.
1862 fn to_utf16(&self) -> ~[u16] {
1864 for ch in self.iter() {
1865 // Arithmetic with u32 literals is easier on the eyes than chars.
1866 let mut ch = ch as u32;
1868 if (ch & 0xFFFF_u32) == ch {
1869 // The BMP falls through (assuming non-surrogate, as it
1871 assert!(ch <= 0xD7FF_u32 || ch >= 0xE000_u32);
1874 // Supplementary planes break into surrogates.
1875 assert!(ch >= 0x1_0000_u32 && ch <= 0x10_FFFF_u32);
1877 let w1 = 0xD800_u16 | ((ch >> 10) as u16);
1878 let w2 = 0xDC00_u16 | ((ch as u16) & 0x3FF_u16);
1879 u.push_all([w1, w2])
1885 /// Returns false if the index points into the middle of a multi-byte
1886 /// character sequence.
1887 fn is_char_boundary(&self, index: uint) -> bool {
1888 if index == self.len() { return true; }
1889 let b = self[index];
1890 return b < 128u8 || b >= 192u8;
1893 /// Pluck a character out of a string and return the index of the next
1896 /// This function can be used to iterate over the unicode characters of a
1902 /// let s = "中华Việt Nam";
1904 /// while i < s.len() {
1905 /// let CharRange {ch, next} = s.char_range_at(i);
1906 /// printfln!("%u: %c", i, ch);
1911 /// # Example output
1928 /// * s - The string
1929 /// * i - The byte offset of the char to extract
1933 /// A record {ch: char, next: uint} containing the char value and the byte
1934 /// index of the next unicode character.
1938 /// If `i` is greater than or equal to the length of the string.
1939 /// If `i` is not the index of the beginning of a valid UTF-8 character.
1941 fn char_range_at(&self, i: uint) -> CharRange {
1942 if (self[i] < 128u8) {
1943 return CharRange {ch: self[i] as char, next: i + 1 };
1946 // Multibyte case is a fn to allow char_range_at to inline cleanly
1947 fn multibyte_char_range_at(s: &str, i: uint) -> CharRange {
1948 let mut val = s[i] as uint;
1949 let w = UTF8_CHAR_WIDTH[val] as uint;
1952 val = utf8_first_byte!(val, w);
1953 val = utf8_acc_cont_byte!(val, s[i + 1]);
1954 if w > 2 { val = utf8_acc_cont_byte!(val, s[i + 2]); }
1955 if w > 3 { val = utf8_acc_cont_byte!(val, s[i + 3]); }
1957 return CharRange {ch: val as char, next: i + w};
1960 return multibyte_char_range_at(*self, i);
1963 /// Plucks the character starting at the `i`th byte of a string
1965 fn char_at(&self, i: uint) -> char { self.char_range_at(i).ch }
1967 /// Given a byte position and a str, return the previous char and its position.
1969 /// This function can be used to iterate over a unicode string in reverse.
1971 /// Returns 0 for next index if called on start index 0.
1972 fn char_range_at_reverse(&self, start: uint) -> CharRange {
1973 let mut prev = start;
1975 // while there is a previous byte == 10......
1976 while prev > 0u && self[prev - 1u] & 192u8 == TAG_CONT_U8 {
1980 // now refer to the initial byte of previous char
1988 let ch = self.char_at(prev);
1989 return CharRange {ch:ch, next:prev};
1992 /// Plucks the character ending at the `i`th byte of a string
1994 fn char_at_reverse(&self, i: uint) -> char {
1995 self.char_range_at_reverse(i).ch
1998 /// Work with the byte buffer of a string as a byte slice.
2000 /// The byte slice does not include the null terminator.
2002 fn as_bytes(&self) -> &'self [u8] {
2004 let mut slice = self.repr();
2006 cast::transmute(slice)
2010 /// Work with the byte buffer of a string as a byte slice.
2012 /// The byte slice does not include the null terminator.
2014 fn as_bytes(&self) -> &'self [u8] {
2015 unsafe { cast::transmute(*self) }
2018 /// Returns the byte index of the first character of `self` that matches `search`
2022 /// `Some` containing the byte index of the last matching character
2023 /// or `None` if there is no match
2024 fn find<C: CharEq>(&self, search: C) -> Option<uint> {
2025 if search.only_ascii() {
2026 for (i, b) in self.byte_iter().enumerate() {
2027 if search.matches(b as char) { return Some(i) }
2031 for c in self.iter() {
2032 if search.matches(c) { return Some(index); }
2033 index += c.len_utf8_bytes();
2040 /// Returns the byte index of the last character of `self` that matches `search`
2044 /// `Some` containing the byte index of the last matching character
2045 /// or `None` if there is no match
2046 fn rfind<C: CharEq>(&self, search: C) -> Option<uint> {
2047 let mut index = self.len();
2048 if search.only_ascii() {
2049 for b in self.byte_rev_iter() {
2051 if search.matches(b as char) { return Some(index); }
2054 for c in self.rev_iter() {
2055 index -= c.len_utf8_bytes();
2056 if search.matches(c) { return Some(index); }
2063 /// Returns the byte index of the first matching substring
2067 /// * `needle` - The string to search for
2071 /// `Some` containing the byte index of the first matching substring
2072 /// or `None` if there is no match
2073 fn find_str(&self, needle: &str) -> Option<uint> {
2074 if needle.is_empty() {
2077 self.matches_index_iter(needle)
2079 .map_consume(|(start, _end)| start)
2083 /// Given a string, make a new string with repeated copies of it.
2085 fn repeat(&self, nn: uint) -> ~str {
2086 do self.as_imm_buf |buf, len| {
2087 // ignore the NULL terminator
2089 let mut ret = with_capacity(nn * len);
2092 do ret.as_mut_buf |rbuf, _len| {
2093 let mut rbuf = rbuf;
2096 ptr::copy_memory(rbuf, buf, len);
2097 rbuf = rbuf.offset(len as int);
2100 raw::set_len(&mut ret, nn * len);
2106 /// Given a string, make a new string with repeated copies of it.
2108 fn repeat(&self, nn: uint) -> ~str {
2109 do self.as_imm_buf |buf, len| {
2110 let mut ret = with_capacity(nn * len);
2113 do ret.as_mut_buf |rbuf, _len| {
2114 let mut rbuf = rbuf;
2117 ptr::copy_memory(rbuf, buf, len);
2118 rbuf = rbuf.offset(len as int);
2121 raw::set_len(&mut ret, nn * len);
2127 /// Retrieves the first character from a string slice and returns
2128 /// it. This does not allocate a new string; instead, it returns a
2129 /// slice that point one character beyond the character that was
2134 /// If the string does not contain any characters
2136 fn slice_shift_char(&self) -> (char, &'self str) {
2137 let CharRange {ch, next} = self.char_range_at(0u);
2138 let next_s = unsafe { raw::slice_bytes(*self, next, self.len()) };
2139 return (ch, next_s);
2142 /// Apply a function to each character.
2143 fn map_chars(&self, ff: &fn(char) -> char) -> ~str {
2144 let mut result = with_capacity(self.len());
2145 for cc in self.iter() {
2146 result.push_char(ff(cc));
2151 /// Levenshtein Distance between two strings.
2152 fn lev_distance(&self, t: &str) -> uint {
2153 let slen = self.len();
2156 if slen == 0 { return tlen; }
2157 if tlen == 0 { return slen; }
2159 let mut dcol = vec::from_fn(tlen + 1, |x| x);
2161 for (i, sc) in self.iter().enumerate() {
2163 let mut current = i;
2164 dcol[0] = current + 1;
2166 for (j, tc) in t.iter().enumerate() {
2168 let next = dcol[j + 1];
2171 dcol[j + 1] = current;
2173 dcol[j + 1] = ::cmp::min(current, next);
2174 dcol[j + 1] = ::cmp::min(dcol[j + 1], dcol[j]) + 1;
2184 /// Returns the byte offset of an inner slice relative to an enclosing outer slice.
2186 /// Fails if `inner` is not a direct slice contained within self.
2191 /// let string = "a\nb\nc";
2192 /// let mut lines = ~[];
2193 /// for line in string.line_iter() { lines.push(line) }
2195 /// assert!(string.subslice_offset(lines[0]) == 0); // &"a"
2196 /// assert!(string.subslice_offset(lines[1]) == 2); // &"b"
2197 /// assert!(string.subslice_offset(lines[2]) == 4); // &"c"
2200 fn subslice_offset(&self, inner: &str) -> uint {
2201 do self.as_imm_buf |a, a_len| {
2202 do inner.as_imm_buf |b, b_len| {
2208 a_start = cast::transmute(a); a_end = a_len + cast::transmute(a);
2209 b_start = cast::transmute(b); b_end = b_len + cast::transmute(b);
2211 assert!(a_start <= b_start);
2212 assert!(b_end <= a_end);
2218 /// Work with the byte buffer and length of a slice.
2220 /// The given length is one byte longer than the 'official' indexable
2221 /// length of the string. This is to permit probing the byte past the
2222 /// indexable area for a null byte, as is the case in slices pointing
2223 /// to full strings, or suffixes of them.
2225 fn as_imm_buf<T>(&self, f: &fn(*u8, uint) -> T) -> T {
2226 let v: &[u8] = unsafe { cast::transmute(*self) };
2231 #[allow(missing_doc)]
2232 pub trait OwnedStr {
2233 fn push_str_no_overallocate(&mut self, rhs: &str);
2234 fn push_str(&mut self, rhs: &str);
2235 fn push_char(&mut self, c: char);
2236 fn pop_char(&mut self) -> char;
2237 fn shift_char(&mut self) -> char;
2238 fn unshift_char(&mut self, ch: char);
2239 fn append(self, rhs: &str) -> ~str;
2240 fn reserve(&mut self, n: uint);
2241 fn reserve_at_least(&mut self, n: uint);
2242 fn capacity(&self) -> uint;
2244 fn to_bytes_with_null(self) -> ~[u8];
2246 /// Work with the mutable byte buffer and length of a slice.
2248 /// The given length is one byte longer than the 'official' indexable
2249 /// length of the string. This is to permit probing the byte past the
2250 /// indexable area for a null byte, as is the case in slices pointing
2251 /// to full strings, or suffixes of them.
2253 /// Make sure any mutations to this buffer keep this string valid UTF8.
2254 fn as_mut_buf<T>(&mut self, f: &fn(*mut u8, uint) -> T) -> T;
2257 impl OwnedStr for ~str {
2258 /// Appends a string slice to the back of a string, without overallocating
2260 fn push_str_no_overallocate(&mut self, rhs: &str) {
2262 let llen = self.len();
2263 let rlen = rhs.len();
2264 self.reserve(llen + rlen);
2265 do self.as_imm_buf |lbuf, _llen| {
2266 do rhs.as_imm_buf |rbuf, _rlen| {
2267 let dst = ptr::offset(lbuf, llen as int);
2268 let dst = cast::transmute_mut_unsafe(dst);
2269 ptr::copy_memory(dst, rbuf, rlen);
2272 raw::set_len(self, llen + rlen);
2276 /// Appends a string slice to the back of a string
2278 fn push_str(&mut self, rhs: &str) {
2280 let llen = self.len();
2281 let rlen = rhs.len();
2282 self.reserve_at_least(llen + rlen);
2283 do self.as_imm_buf |lbuf, _llen| {
2284 do rhs.as_imm_buf |rbuf, _rlen| {
2285 let dst = ptr::offset(lbuf, llen as int);
2286 let dst = cast::transmute_mut_unsafe(dst);
2287 ptr::copy_memory(dst, rbuf, rlen);
2290 raw::set_len(self, llen + rlen);
2294 /// Appends a character to the back of a string
2296 fn push_char(&mut self, c: char) {
2297 assert!((c as uint) < MAX_UNICODE); // FIXME: #7609: should be enforced on all `char`
2299 let code = c as uint;
2300 let nb = if code < MAX_ONE_B { 1u }
2301 else if code < MAX_TWO_B { 2u }
2302 else if code < MAX_THREE_B { 3u }
2304 let len = self.len();
2305 let new_len = len + nb;
2306 self.reserve_at_least(new_len);
2307 let off = len as int;
2308 do self.as_mut_buf |buf, _len| {
2311 *ptr::mut_offset(buf, off) = code as u8;
2314 *ptr::mut_offset(buf, off) = (code >> 6u & 31u | TAG_TWO_B) as u8;
2315 *ptr::mut_offset(buf, off + 1) = (code & 63u | TAG_CONT) as u8;
2318 *ptr::mut_offset(buf, off) = (code >> 12u & 15u | TAG_THREE_B) as u8;
2319 *ptr::mut_offset(buf, off + 1) = (code >> 6u & 63u | TAG_CONT) as u8;
2320 *ptr::mut_offset(buf, off + 2) = (code & 63u | TAG_CONT) as u8;
2323 *ptr::mut_offset(buf, off) = (code >> 18u & 7u | TAG_FOUR_B) as u8;
2324 *ptr::mut_offset(buf, off + 1) = (code >> 12u & 63u | TAG_CONT) as u8;
2325 *ptr::mut_offset(buf, off + 2) = (code >> 6u & 63u | TAG_CONT) as u8;
2326 *ptr::mut_offset(buf, off + 3) = (code & 63u | TAG_CONT) as u8;
2331 raw::set_len(self, new_len);
2335 /// Remove the final character from a string and return it
2339 /// If the string does not contain any characters
2340 fn pop_char(&mut self) -> char {
2341 let end = self.len();
2343 let CharRange {ch, next} = self.char_range_at_reverse(end);
2344 unsafe { raw::set_len(self, next); }
2348 /// Remove the first character from a string and return it
2352 /// If the string does not contain any characters
2353 fn shift_char(&mut self) -> char {
2354 let CharRange {ch, next} = self.char_range_at(0u);
2355 *self = self.slice(next, self.len()).to_owned();
2359 /// Prepend a char to a string
2360 fn unshift_char(&mut self, ch: char) {
2361 // This could be more efficient.
2362 let mut new_str = ~"";
2363 new_str.push_char(ch);
2364 new_str.push_str(*self);
2368 /// Concatenate two strings together.
2370 fn append(self, rhs: &str) -> ~str {
2371 let mut new_str = self;
2372 new_str.push_str_no_overallocate(rhs);
2376 /// Reserves capacity for exactly `n` bytes in the given string, not including
2377 /// the null terminator.
2379 /// Assuming single-byte characters, the resulting string will be large
2380 /// enough to hold a string of length `n`. To account for the null terminator,
2381 /// the underlying buffer will have the size `n` + 1.
2383 /// If the capacity for `s` is already equal to or greater than the requested
2384 /// capacity, then no action is taken.
2389 /// * n - The number of bytes to reserve space for
2392 pub fn reserve(&mut self, n: uint) {
2394 let v: *mut ~[u8] = cast::transmute(self);
2395 (*v).reserve(n + 1);
2399 /// Reserves capacity for exactly `n` bytes in the given string, not including
2400 /// the null terminator.
2402 /// Assuming single-byte characters, the resulting string will be large
2403 /// enough to hold a string of length `n`. To account for the null terminator,
2404 /// the underlying buffer will have the size `n` + 1.
2406 /// If the capacity for `s` is already equal to or greater than the requested
2407 /// capacity, then no action is taken.
2412 /// * n - The number of bytes to reserve space for
2415 pub fn reserve(&mut self, n: uint) {
2417 let v: &mut ~[u8] = cast::transmute(self);
2422 /// Reserves capacity for at least `n` bytes in the given string, not including
2423 /// the null terminator.
2425 /// Assuming single-byte characters, the resulting string will be large
2426 /// enough to hold a string of length `n`. To account for the null terminator,
2427 /// the underlying buffer will have the size `n` + 1.
2429 /// This function will over-allocate in order to amortize the allocation costs
2430 /// in scenarios where the caller may need to repeatedly reserve additional
2433 /// If the capacity for `s` is already equal to or greater than the requested
2434 /// capacity, then no action is taken.
2439 /// * n - The number of bytes to reserve space for
2442 fn reserve_at_least(&mut self, n: uint) {
2443 self.reserve(uint::next_power_of_two(n + 1u) - 1u)
2446 /// Reserves capacity for at least `n` bytes in the given string.
2448 /// Assuming single-byte characters, the resulting string will be large
2449 /// enough to hold a string of length `n`. To account for the null terminator,
2450 /// the underlying buffer will have the size `n` + 1.
2452 /// This function will over-allocate in order to amortize the allocation costs
2453 /// in scenarios where the caller may need to repeatedly reserve additional
2456 /// If the capacity for `s` is already equal to or greater than the requested
2457 /// capacity, then no action is taken.
2462 /// * n - The number of bytes to reserve space for
2465 fn reserve_at_least(&mut self, n: uint) {
2466 self.reserve(uint::next_power_of_two(n))
2469 /// Returns the number of single-byte characters the string can hold without
2472 fn capacity(&self) -> uint {
2473 let buf: &~[u8] = unsafe { cast::transmute(self) };
2474 let vcap = buf.capacity();
2479 /// Returns the number of single-byte characters the string can hold without
2482 fn capacity(&self) -> uint {
2484 let buf: &~[u8] = cast::transmute(self);
2489 /// Convert to a vector of bytes. This does not allocate a new
2490 /// string, and includes the null terminator.
2493 fn to_bytes_with_null(self) -> ~[u8] {
2494 unsafe { cast::transmute(self) }
2498 fn as_mut_buf<T>(&mut self, f: &fn(*mut u8, uint) -> T) -> T {
2499 let v: &mut ~[u8] = unsafe { cast::transmute(self) };
2504 impl Clone for ~str {
2506 fn clone(&self) -> ~str {
2511 impl Clone for @str {
2513 fn clone(&self) -> @str {
2518 impl<T: Iterator<char>> FromIterator<char, T> for ~str {
2520 fn from_iterator(iterator: &mut T) -> ~str {
2521 let (lower, _) = iterator.size_hint();
2522 let mut buf = with_capacity(lower);
2523 buf.extend(iterator);
2528 impl<T: Iterator<char>> Extendable<char, T> for ~str {
2530 fn extend(&mut self, iterator: &mut T) {
2531 let (lower, _) = iterator.size_hint();
2532 let reserve = lower + self.len();
2533 self.reserve_at_least(reserve);
2534 for ch in *iterator {
2540 // This works because every lifetime is a sub-lifetime of 'static
2541 impl<'self> Zero for &'self str {
2542 fn zero() -> &'self str { "" }
2543 fn is_zero(&self) -> bool { self.is_empty() }
2546 impl Zero for ~str {
2547 fn zero() -> ~str { ~"" }
2548 fn is_zero(&self) -> bool { self.len() == 0 }
2551 impl Zero for @str {
2552 fn zero() -> @str { @"" }
2553 fn is_zero(&self) -> bool { self.len() == 0 }
2558 use iterator::IteratorUtil;
2559 use container::Container;
2566 use vec::{ImmutableVector, CopyableVector};
2567 use cmp::{TotalOrd, Less, Equal, Greater};
2571 assert!((eq(&~"", &~"")));
2572 assert!((eq(&~"foo", &~"foo")));
2573 assert!((!eq(&~"foo", &~"bar")));
2577 fn test_eq_slice() {
2578 assert!((eq_slice("foobar".slice(0, 3), "foo")));
2579 assert!((eq_slice("barfoo".slice(3, 6), "foo")));
2580 assert!((!eq_slice("foo1", "foo2")));
2586 assert!("" <= "foo");
2587 assert!("foo" <= "foo");
2588 assert!("foo" != "bar");
2593 assert_eq!("".len(), 0u);
2594 assert_eq!("hello world".len(), 11u);
2595 assert_eq!("\x63".len(), 1u);
2596 assert_eq!("\xa2".len(), 2u);
2597 assert_eq!("\u03c0".len(), 2u);
2598 assert_eq!("\u2620".len(), 3u);
2599 assert_eq!("\U0001d11e".len(), 4u);
2601 assert_eq!("".char_len(), 0u);
2602 assert_eq!("hello world".char_len(), 11u);
2603 assert_eq!("\x63".char_len(), 1u);
2604 assert_eq!("\xa2".char_len(), 1u);
2605 assert_eq!("\u03c0".char_len(), 1u);
2606 assert_eq!("\u2620".char_len(), 1u);
2607 assert_eq!("\U0001d11e".char_len(), 1u);
2608 assert_eq!("ประเทศไทย中华Việt Nam".char_len(), 19u);
2613 assert_eq!("hello".find('l'), Some(2u));
2614 assert_eq!("hello".find(|c:char| c == 'o'), Some(4u));
2615 assert!("hello".find('x').is_none());
2616 assert!("hello".find(|c:char| c == 'x').is_none());
2617 assert_eq!("ประเทศไทย中华Việt Nam".find('华'), Some(30u));
2618 assert_eq!("ประเทศไทย中华Việt Nam".find(|c: char| c == '华'), Some(30u));
2623 assert_eq!("hello".rfind('l'), Some(3u));
2624 assert_eq!("hello".rfind(|c:char| c == 'o'), Some(4u));
2625 assert!("hello".rfind('x').is_none());
2626 assert!("hello".rfind(|c:char| c == 'x').is_none());
2627 assert_eq!("ประเทศไทย中华Việt Nam".rfind('华'), Some(30u));
2628 assert_eq!("ประเทศไทย中华Việt Nam".rfind(|c: char| c == '华'), Some(30u));
2632 fn test_push_str() {
2635 assert_eq!(s.slice_from(0), "");
2637 assert_eq!(s.slice_from(0), "abc");
2638 s.push_str("ประเทศไทย中华Việt Nam");
2639 assert_eq!(s.slice_from(0), "abcประเทศไทย中华Việt Nam");
2646 assert_eq!(s.slice_from(0), "");
2647 s = s.append("abc");
2648 assert_eq!(s.slice_from(0), "abc");
2649 s = s.append("ประเทศไทย中华Việt Nam");
2650 assert_eq!(s.slice_from(0), "abcประเทศไทย中华Việt Nam");
2654 fn test_pop_char() {
2655 let mut data = ~"ประเทศไทย中华";
2656 let cc = data.pop_char();
2657 assert_eq!(~"ประเทศไทย中", data);
2658 assert_eq!('华', cc);
2662 fn test_pop_char_2() {
2663 let mut data2 = ~"华";
2664 let cc2 = data2.pop_char();
2665 assert_eq!(~"", data2);
2666 assert_eq!('华', cc2);
2671 #[ignore(cfg(windows))]
2672 fn test_pop_char_fail() {
2674 let _cc3 = data.pop_char();
2678 fn test_push_char() {
2679 let mut data = ~"ประเทศไทย中";
2680 data.push_char('华');
2681 data.push_char('b'); // 1 byte
2682 data.push_char('¢'); // 2 byte
2683 data.push_char('€'); // 3 byte
2684 data.push_char('𤭢'); // 4 byte
2685 assert_eq!(~"ประเทศไทย中华b¢€𤭢", data);
2689 fn test_shift_char() {
2690 let mut data = ~"ประเทศไทย中";
2691 let cc = data.shift_char();
2692 assert_eq!(~"ระเทศไทย中", data);
2693 assert_eq!('ป', cc);
2697 fn test_unshift_char() {
2698 let mut data = ~"ประเทศไทย中";
2699 data.unshift_char('华');
2700 assert_eq!(~"华ประเทศไทย中", data);
2706 let s: ~str = empty.iter().collect();
2707 assert_eq!(empty, s.as_slice());
2708 let data = "ประเทศไทย中";
2709 let s: ~str = data.iter().collect();
2710 assert_eq!(data, s.as_slice());
2715 let data = ~"ประเทศไทย中";
2716 let mut cpy = data.clone();
2718 let mut it = other.iter();
2719 cpy.extend(&mut it);
2720 assert_eq!(cpy, data + other);
2725 let mut empty = ~"";
2727 assert_eq!("", empty.as_slice());
2728 let mut data = ~"ประเทศไทย中";
2730 assert_eq!("", data.as_slice());
2731 data.push_char('华');
2732 assert_eq!("华", data.as_slice());
2736 fn test_find_str() {
2738 assert_eq!("".find_str(""), Some(0u));
2739 assert!("banana".find_str("apple pie").is_none());
2741 let data = "abcabc";
2742 assert_eq!(data.slice(0u, 6u).find_str("ab"), Some(0u));
2743 assert_eq!(data.slice(2u, 6u).find_str("ab"), Some(3u - 2u));
2744 assert!(data.slice(2u, 4u).find_str("ab").is_none());
2746 let mut data = ~"ประเทศไทย中华Việt Nam";
2748 assert!(data.find_str("ไท华").is_none());
2749 assert_eq!(data.slice(0u, 43u).find_str(""), Some(0u));
2750 assert_eq!(data.slice(6u, 43u).find_str(""), Some(6u - 6u));
2752 assert_eq!(data.slice(0u, 43u).find_str("ประ"), Some( 0u));
2753 assert_eq!(data.slice(0u, 43u).find_str("ทศไ"), Some(12u));
2754 assert_eq!(data.slice(0u, 43u).find_str("ย中"), Some(24u));
2755 assert_eq!(data.slice(0u, 43u).find_str("iệt"), Some(34u));
2756 assert_eq!(data.slice(0u, 43u).find_str("Nam"), Some(40u));
2758 assert_eq!(data.slice(43u, 86u).find_str("ประ"), Some(43u - 43u));
2759 assert_eq!(data.slice(43u, 86u).find_str("ทศไ"), Some(55u - 43u));
2760 assert_eq!(data.slice(43u, 86u).find_str("ย中"), Some(67u - 43u));
2761 assert_eq!(data.slice(43u, 86u).find_str("iệt"), Some(77u - 43u));
2762 assert_eq!(data.slice(43u, 86u).find_str("Nam"), Some(83u - 43u));
2766 fn test_slice_chars() {
2767 fn t(a: &str, b: &str, start: uint) {
2768 assert_eq!(a.slice_chars(start, start + b.char_len()), b);
2770 t("hello", "llo", 2);
2771 t("hello", "el", 1);
2772 assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".slice_chars(2, 8));
2777 fn t(v: &[~str], s: &str) {
2778 assert_eq!(v.concat(), s.to_str());
2780 t([~"you", ~"know", ~"I'm", ~"no", ~"good"], "youknowI'mnogood");
2781 let v: &[~str] = [];
2788 fn t(v: &[~str], sep: &str, s: &str) {
2789 assert_eq!(v.connect(sep), s.to_str());
2791 t([~"you", ~"know", ~"I'm", ~"no", ~"good"],
2792 " ", "you know I'm no good");
2793 let v: &[~str] = [];
2795 t([~"hi"], " ", "hi");
2799 fn test_concat_slices() {
2800 fn t(v: &[&str], s: &str) {
2801 assert_eq!(v.concat(), s.to_str());
2803 t(["you", "know", "I'm", "no", "good"], "youknowI'mnogood");
2804 let v: &[&str] = [];
2810 fn test_connect_slices() {
2811 fn t(v: &[&str], sep: &str, s: &str) {
2812 assert_eq!(v.connect(sep), s.to_str());
2814 t(["you", "know", "I'm", "no", "good"],
2815 " ", "you know I'm no good");
2817 t(["hi"], " ", "hi");
2822 assert_eq!("x".repeat(4), ~"xxxx");
2823 assert_eq!("hi".repeat(4), ~"hihihihi");
2824 assert_eq!("ไท华".repeat(3), ~"ไท华ไท华ไท华");
2825 assert_eq!("".repeat(4), ~"");
2826 assert_eq!("hi".repeat(0), ~"");
2830 fn test_unsafe_slice() {
2831 assert_eq!("ab", unsafe {raw::slice_bytes("abc", 0, 2)});
2832 assert_eq!("bc", unsafe {raw::slice_bytes("abc", 1, 3)});
2833 assert_eq!("", unsafe {raw::slice_bytes("abc", 1, 1)});
2834 fn a_million_letter_a() -> ~str {
2837 while i < 100000 { rs.push_str("aaaaaaaaaa"); i += 1; }
2840 fn half_a_million_letter_a() -> ~str {
2843 while i < 100000 { rs.push_str("aaaaa"); i += 1; }
2846 let letters = a_million_letter_a();
2847 assert!(half_a_million_letter_a() ==
2848 unsafe {raw::slice_bytes(letters, 0u, 500000)}.to_owned());
2852 fn test_starts_with() {
2853 assert!(("".starts_with("")));
2854 assert!(("abc".starts_with("")));
2855 assert!(("abc".starts_with("a")));
2856 assert!((!"a".starts_with("abc")));
2857 assert!((!"".starts_with("abc")));
2861 fn test_ends_with() {
2862 assert!(("".ends_with("")));
2863 assert!(("abc".ends_with("")));
2864 assert!(("abc".ends_with("c")));
2865 assert!((!"a".ends_with("abc")));
2866 assert!((!"".ends_with("abc")));
2870 fn test_is_empty() {
2871 assert!("".is_empty());
2872 assert!(!"a".is_empty());
2878 assert_eq!("".replace(a, "b"), ~"");
2879 assert_eq!("a".replace(a, "b"), ~"b");
2880 assert_eq!("ab".replace(a, "b"), ~"bb");
2882 assert!(" test test ".replace(test, "toast") ==
2884 assert_eq!(" test test ".replace(test, ""), ~" ");
2888 fn test_replace_2a() {
2889 let data = ~"ประเทศไทย中华";
2890 let repl = ~"دولة الكويت";
2893 let A = ~"دولة الكويتทศไทย中华";
2894 assert_eq!(data.replace(a, repl), A);
2898 fn test_replace_2b() {
2899 let data = ~"ประเทศไทย中华";
2900 let repl = ~"دولة الكويت";
2903 let B = ~"ปรدولة الكويتทศไทย中华";
2904 assert_eq!(data.replace(b, repl), B);
2908 fn test_replace_2c() {
2909 let data = ~"ประเทศไทย中华";
2910 let repl = ~"دولة الكويت";
2913 let C = ~"ประเทศไทยدولة الكويت";
2914 assert_eq!(data.replace(c, repl), C);
2918 fn test_replace_2d() {
2919 let data = ~"ประเทศไทย中华";
2920 let repl = ~"دولة الكويت";
2923 assert_eq!(data.replace(d, repl), data);
2928 assert_eq!("ab", "abc".slice(0, 2));
2929 assert_eq!("bc", "abc".slice(1, 3));
2930 assert_eq!("", "abc".slice(1, 1));
2931 assert_eq!("\u65e5", "\u65e5\u672c".slice(0, 3));
2933 let data = "ประเทศไทย中华";
2934 assert_eq!("ป", data.slice(0, 3));
2935 assert_eq!("ร", data.slice(3, 6));
2936 assert_eq!("", data.slice(3, 3));
2937 assert_eq!("华", data.slice(30, 33));
2939 fn a_million_letter_X() -> ~str {
2943 push_str(&mut rs, "华华华华华华华华华华");
2948 fn half_a_million_letter_X() -> ~str {
2951 while i < 100000 { push_str(&mut rs, "华华华华华"); i += 1; }
2954 let letters = a_million_letter_X();
2955 assert!(half_a_million_letter_X() ==
2956 letters.slice(0u, 3u * 500000u).to_owned());
2961 let ss = "中华Việt Nam";
2963 assert_eq!("华", ss.slice(3u, 6u));
2964 assert_eq!("Việt Nam", ss.slice(6u, 16u));
2966 assert_eq!("ab", "abc".slice(0u, 2u));
2967 assert_eq!("bc", "abc".slice(1u, 3u));
2968 assert_eq!("", "abc".slice(1u, 1u));
2970 assert_eq!("中", ss.slice(0u, 3u));
2971 assert_eq!("华V", ss.slice(3u, 7u));
2972 assert_eq!("", ss.slice(3u, 3u));
2987 #[ignore(cfg(windows))]
2988 fn test_slice_fail() {
2989 "中华Việt Nam".slice(0u, 2u);
2993 fn test_slice_from() {
2994 assert_eq!("abcd".slice_from(0), "abcd");
2995 assert_eq!("abcd".slice_from(2), "cd");
2996 assert_eq!("abcd".slice_from(4), "");
2999 fn test_slice_to() {
3000 assert_eq!("abcd".slice_to(0), "");
3001 assert_eq!("abcd".slice_to(2), "ab");
3002 assert_eq!("abcd".slice_to(4), "abcd");
3006 fn test_trim_left_chars() {
3007 let v: &[char] = &[];
3008 assert_eq!(" *** foo *** ".trim_left_chars(&v), " *** foo *** ");
3009 assert_eq!(" *** foo *** ".trim_left_chars(& &['*', ' ']), "foo *** ");
3010 assert_eq!(" *** *** ".trim_left_chars(& &['*', ' ']), "");
3011 assert_eq!("foo *** ".trim_left_chars(& &['*', ' ']), "foo *** ");
3013 assert_eq!("11foo1bar11".trim_left_chars(&'1'), "foo1bar11");
3014 assert_eq!("12foo1bar12".trim_left_chars(& &['1', '2']), "foo1bar12");
3015 assert_eq!("123foo1bar123".trim_left_chars(&|c: char| c.is_digit()), "foo1bar123");
3019 fn test_trim_right_chars() {
3020 let v: &[char] = &[];
3021 assert_eq!(" *** foo *** ".trim_right_chars(&v), " *** foo *** ");
3022 assert_eq!(" *** foo *** ".trim_right_chars(& &['*', ' ']), " *** foo");
3023 assert_eq!(" *** *** ".trim_right_chars(& &['*', ' ']), "");
3024 assert_eq!(" *** foo".trim_right_chars(& &['*', ' ']), " *** foo");
3026 assert_eq!("11foo1bar11".trim_right_chars(&'1'), "11foo1bar");
3027 assert_eq!("12foo1bar12".trim_right_chars(& &['1', '2']), "12foo1bar");
3028 assert_eq!("123foo1bar123".trim_right_chars(&|c: char| c.is_digit()), "123foo1bar");
3032 fn test_trim_chars() {
3033 let v: &[char] = &[];
3034 assert_eq!(" *** foo *** ".trim_chars(&v), " *** foo *** ");
3035 assert_eq!(" *** foo *** ".trim_chars(& &['*', ' ']), "foo");
3036 assert_eq!(" *** *** ".trim_chars(& &['*', ' ']), "");
3037 assert_eq!("foo".trim_chars(& &['*', ' ']), "foo");
3039 assert_eq!("11foo1bar11".trim_chars(&'1'), "foo1bar");
3040 assert_eq!("12foo1bar12".trim_chars(& &['1', '2']), "foo1bar");
3041 assert_eq!("123foo1bar123".trim_chars(&|c: char| c.is_digit()), "foo1bar");
3045 fn test_trim_left() {
3046 assert_eq!("".trim_left(), "");
3047 assert_eq!("a".trim_left(), "a");
3048 assert_eq!(" ".trim_left(), "");
3049 assert_eq!(" blah".trim_left(), "blah");
3050 assert_eq!(" \u3000 wut".trim_left(), "wut");
3051 assert_eq!("hey ".trim_left(), "hey ");
3055 fn test_trim_right() {
3056 assert_eq!("".trim_right(), "");
3057 assert_eq!("a".trim_right(), "a");
3058 assert_eq!(" ".trim_right(), "");
3059 assert_eq!("blah ".trim_right(), "blah");
3060 assert_eq!("wut \u3000 ".trim_right(), "wut");
3061 assert_eq!(" hey".trim_right(), " hey");
3066 assert_eq!("".trim(), "");
3067 assert_eq!("a".trim(), "a");
3068 assert_eq!(" ".trim(), "");
3069 assert_eq!(" blah ".trim(), "blah");
3070 assert_eq!("\nwut \u3000 ".trim(), "wut");
3071 assert_eq!(" hey dude ".trim(), "hey dude");
3075 fn test_is_whitespace() {
3076 assert!("".is_whitespace());
3077 assert!(" ".is_whitespace());
3078 assert!("\u2009".is_whitespace()); // Thin space
3079 assert!(" \n\t ".is_whitespace());
3080 assert!(!" _ ".is_whitespace());
3084 fn test_shift_byte() {
3086 let b = unsafe{raw::shift_byte(&mut s)};
3087 assert_eq!(s, ~"BC");
3088 assert_eq!(b, 65u8);
3092 fn test_pop_byte() {
3094 let b = unsafe{raw::pop_byte(&mut s)};
3095 assert_eq!(s, ~"AB");
3096 assert_eq!(b, 67u8);
3100 fn test_unsafe_from_bytes() {
3101 let a = ~[65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8];
3102 let b = unsafe { raw::from_bytes(a) };
3103 assert_eq!(b, ~"AAAAAAA");
3107 fn test_from_bytes() {
3108 let ss = ~"ศไทย中华Việt Nam";
3109 let bb = ~[0xe0_u8, 0xb8_u8, 0xa8_u8,
3110 0xe0_u8, 0xb9_u8, 0x84_u8,
3111 0xe0_u8, 0xb8_u8, 0x97_u8,
3112 0xe0_u8, 0xb8_u8, 0xa2_u8,
3113 0xe4_u8, 0xb8_u8, 0xad_u8,
3114 0xe5_u8, 0x8d_u8, 0x8e_u8,
3115 0x56_u8, 0x69_u8, 0xe1_u8,
3116 0xbb_u8, 0x87_u8, 0x74_u8,
3117 0x20_u8, 0x4e_u8, 0x61_u8,
3121 assert_eq!(ss, from_bytes(bb));
3122 assert_eq!(~"𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰",
3123 from_bytes(bytes!("𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰")));
3128 assert!(!is_utf8([0xc0, 0x80]));
3129 assert!(!is_utf8([0xc0, 0xae]));
3130 assert!(!is_utf8([0xe0, 0x80, 0x80]));
3131 assert!(!is_utf8([0xe0, 0x80, 0xaf]));
3132 assert!(!is_utf8([0xe0, 0x81, 0x81]));
3133 assert!(!is_utf8([0xf0, 0x82, 0x82, 0xac]));
3134 assert!(!is_utf8([0xf4, 0x90, 0x80, 0x80]));
3136 assert!(is_utf8([0xC2, 0x80]));
3137 assert!(is_utf8([0xDF, 0xBF]));
3138 assert!(is_utf8([0xE0, 0xA0, 0x80]));
3139 assert!(is_utf8([0xEF, 0xBF, 0xBF]));
3140 assert!(is_utf8([0xF0, 0x90, 0x80, 0x80]));
3141 assert!(is_utf8([0xF4, 0x8F, 0xBF, 0xBF]));
3146 #[ignore(cfg(windows))]
3147 fn test_from_bytes_fail() {
3148 use str::not_utf8::cond;
3150 let bb = ~[0xff_u8, 0xb8_u8, 0xa8_u8,
3151 0xe0_u8, 0xb9_u8, 0x84_u8,
3152 0xe0_u8, 0xb8_u8, 0x97_u8,
3153 0xe0_u8, 0xb8_u8, 0xa2_u8,
3154 0xe4_u8, 0xb8_u8, 0xad_u8,
3155 0xe5_u8, 0x8d_u8, 0x8e_u8,
3156 0x56_u8, 0x69_u8, 0xe1_u8,
3157 0xbb_u8, 0x87_u8, 0x74_u8,
3158 0x20_u8, 0x4e_u8, 0x61_u8,
3161 let mut error_happened = false;
3162 let _x = do cond.trap(|err| {
3163 assert_eq!(err, ~"from_bytes: input is not UTF-8; first bad byte is 255");
3164 error_happened = true;
3169 assert!(error_happened);
3173 fn test_raw_from_c_str() {
3175 let a = ~[65, 65, 65, 65, 65, 65, 65, 0];
3176 let b = vec::raw::to_ptr(a);
3177 let c = raw::from_c_str(b);
3178 assert_eq!(c, ~"AAAAAAA");
3183 fn test_as_bytes() {
3186 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
3187 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
3190 assert_eq!("".as_bytes(), &[]);
3191 assert_eq!("abc".as_bytes(), &['a' as u8, 'b' as u8, 'c' as u8]);
3192 assert_eq!("ศไทย中华Việt Nam".as_bytes(), v);
3197 #[ignore(cfg(windows))]
3199 fn test_as_bytes_fail() {
3200 // Don't double free. (I'm not sure if this exercises the
3201 // original problem code path anymore.)
3203 let _bytes = s.as_bytes();
3209 #[ignore(cfg(windows))]
3211 fn test_as_bytes_fail() {
3212 // Don't double free. (I'm not sure if this exercises the
3213 // original problem code path anymore.)
3215 let _bytes = s.as_bytes_with_null();
3221 fn test_to_bytes_with_null() {
3222 let s = ~"ศไทย中华Việt Nam";
3224 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
3225 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
3228 assert_eq!((~"").to_bytes_with_null(), ~[0]);
3229 assert_eq!((~"abc").to_bytes_with_null(),
3230 ~['a' as u8, 'b' as u8, 'c' as u8, 0]);
3231 assert_eq!(s.to_bytes_with_null(), v);
3235 #[ignore(cfg(windows))]
3237 fn test_as_bytes_fail() {
3238 // Don't double free. (I'm not sure if this exercises the
3239 // original problem code path anymore.)
3241 let _bytes = s.as_bytes();
3246 fn test_as_imm_buf() {
3247 do "".as_imm_buf |_, len| {
3251 do "hello".as_imm_buf |buf, len| {
3254 assert_eq!(*ptr::offset(buf, 0), 'h' as u8);
3255 assert_eq!(*ptr::offset(buf, 1), 'e' as u8);
3256 assert_eq!(*ptr::offset(buf, 2), 'l' as u8);
3257 assert_eq!(*ptr::offset(buf, 3), 'l' as u8);
3258 assert_eq!(*ptr::offset(buf, 4), 'o' as u8);
3264 fn test_subslice_offset() {
3265 let a = "kernelsprite";
3266 let b = a.slice(7, a.len());
3267 let c = a.slice(0, a.len() - 6);
3268 assert_eq!(a.subslice_offset(b), 7);
3269 assert_eq!(a.subslice_offset(c), 0);
3271 let string = "a\nb\nc";
3272 let mut lines = ~[];
3273 for line in string.line_iter() { lines.push(line) }
3274 assert_eq!(string.subslice_offset(lines[0]), 0);
3275 assert_eq!(string.subslice_offset(lines[1]), 2);
3276 assert_eq!(string.subslice_offset(lines[2]), 4);
3281 fn test_subslice_offset_2() {
3282 let a = "alchemiter";
3283 let b = "cruxtruder";
3284 a.subslice_offset(b);
3288 fn vec_str_conversions() {
3289 let s1: ~str = ~"All mimsy were the borogoves";
3291 let v: ~[u8] = s1.as_bytes().to_owned();
3292 let s2: ~str = from_bytes(v);
3293 let mut i: uint = 0u;
3294 let n1: uint = s1.len();
3295 let n2: uint = v.len();
3308 fn test_contains() {
3309 assert!("abcde".contains("bcd"));
3310 assert!("abcde".contains("abcd"));
3311 assert!("abcde".contains("bcde"));
3312 assert!("abcde".contains(""));
3313 assert!("".contains(""));
3314 assert!(!"abcde".contains("def"));
3315 assert!(!"".contains("a"));
3317 let data = ~"ประเทศไทย中华Việt Nam";
3318 assert!(data.contains("ประเ"));
3319 assert!(data.contains("ะเ"));
3320 assert!(data.contains("中华"));
3321 assert!(!data.contains("ไท华"));
3325 fn test_contains_char() {
3326 assert!("abc".contains_char('b'));
3327 assert!("a".contains_char('a'));
3328 assert!(!"abc".contains_char('d'));
3329 assert!(!"".contains_char('a'));
3334 assert_eq!(~"", "".map_chars(|c| unsafe {libc::toupper(c as c_char)} as char));
3335 assert_eq!(~"YMCA", "ymca".map_chars(|c| unsafe {libc::toupper(c as c_char)} as char));
3342 ~[0xd800_u16, 0xdf45_u16, 0xd800_u16, 0xdf3f_u16,
3343 0xd800_u16, 0xdf3b_u16, 0xd800_u16, 0xdf46_u16,
3344 0xd800_u16, 0xdf39_u16, 0xd800_u16, 0xdf3b_u16,
3345 0xd800_u16, 0xdf30_u16, 0x000a_u16]),
3348 ~[0xd801_u16, 0xdc12_u16, 0xd801_u16,
3349 0xdc49_u16, 0xd801_u16, 0xdc2e_u16, 0xd801_u16,
3350 0xdc40_u16, 0xd801_u16, 0xdc32_u16, 0xd801_u16,
3351 0xdc4b_u16, 0x0020_u16, 0xd801_u16, 0xdc0f_u16,
3352 0xd801_u16, 0xdc32_u16, 0xd801_u16, 0xdc4d_u16,
3355 (~"𐌀𐌖𐌋𐌄𐌑𐌉·𐌌𐌄𐌕𐌄𐌋𐌉𐌑\n",
3356 ~[0xd800_u16, 0xdf00_u16, 0xd800_u16, 0xdf16_u16,
3357 0xd800_u16, 0xdf0b_u16, 0xd800_u16, 0xdf04_u16,
3358 0xd800_u16, 0xdf11_u16, 0xd800_u16, 0xdf09_u16,
3359 0x00b7_u16, 0xd800_u16, 0xdf0c_u16, 0xd800_u16,
3360 0xdf04_u16, 0xd800_u16, 0xdf15_u16, 0xd800_u16,
3361 0xdf04_u16, 0xd800_u16, 0xdf0b_u16, 0xd800_u16,
3362 0xdf09_u16, 0xd800_u16, 0xdf11_u16, 0x000a_u16 ]),
3364 (~"𐒋𐒘𐒈𐒑𐒛𐒒 𐒕𐒓 𐒈𐒚𐒍 𐒏𐒜𐒒𐒖𐒆 𐒕𐒆\n",
3365 ~[0xd801_u16, 0xdc8b_u16, 0xd801_u16, 0xdc98_u16,
3366 0xd801_u16, 0xdc88_u16, 0xd801_u16, 0xdc91_u16,
3367 0xd801_u16, 0xdc9b_u16, 0xd801_u16, 0xdc92_u16,
3368 0x0020_u16, 0xd801_u16, 0xdc95_u16, 0xd801_u16,
3369 0xdc93_u16, 0x0020_u16, 0xd801_u16, 0xdc88_u16,
3370 0xd801_u16, 0xdc9a_u16, 0xd801_u16, 0xdc8d_u16,
3371 0x0020_u16, 0xd801_u16, 0xdc8f_u16, 0xd801_u16,
3372 0xdc9c_u16, 0xd801_u16, 0xdc92_u16, 0xd801_u16,
3373 0xdc96_u16, 0xd801_u16, 0xdc86_u16, 0x0020_u16,
3374 0xd801_u16, 0xdc95_u16, 0xd801_u16, 0xdc86_u16,
3377 for p in pairs.iter() {
3378 let (s, u) = (*p).clone();
3379 assert!(s.to_utf16() == u);
3380 assert!(from_utf16(u) == s);
3381 assert!(from_utf16(s.to_utf16()) == s);
3382 assert!(from_utf16(u).to_utf16() == u);
3388 let s = ~"ศไทย中华Việt Nam";
3389 let v = ~['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
3391 for ch in v.iter() {
3392 assert!(s.char_at(pos) == *ch);
3393 pos += from_char(*ch).len();
3398 fn test_char_at_reverse() {
3399 let s = ~"ศไทย中华Việt Nam";
3400 let v = ~['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
3401 let mut pos = s.len();
3402 for ch in v.rev_iter() {
3403 assert!(s.char_at_reverse(pos) == *ch);
3404 pos -= from_char(*ch).len();
3409 fn test_escape_unicode() {
3410 assert_eq!("abc".escape_unicode(), ~"\\x61\\x62\\x63");
3411 assert_eq!("a c".escape_unicode(), ~"\\x61\\x20\\x63");
3412 assert_eq!("\r\n\t".escape_unicode(), ~"\\x0d\\x0a\\x09");
3413 assert_eq!("'\"\\".escape_unicode(), ~"\\x27\\x22\\x5c");
3414 assert_eq!("\x00\x01\xfe\xff".escape_unicode(), ~"\\x00\\x01\\xfe\\xff");
3415 assert_eq!("\u0100\uffff".escape_unicode(), ~"\\u0100\\uffff");
3416 assert_eq!("\U00010000\U0010ffff".escape_unicode(), ~"\\U00010000\\U0010ffff");
3417 assert_eq!("ab\ufb00".escape_unicode(), ~"\\x61\\x62\\ufb00");
3418 assert_eq!("\U0001d4ea\r".escape_unicode(), ~"\\U0001d4ea\\x0d");
3422 fn test_escape_default() {
3423 assert_eq!("abc".escape_default(), ~"abc");
3424 assert_eq!("a c".escape_default(), ~"a c");
3425 assert_eq!("\r\n\t".escape_default(), ~"\\r\\n\\t");
3426 assert_eq!("'\"\\".escape_default(), ~"\\'\\\"\\\\");
3427 assert_eq!("\u0100\uffff".escape_default(), ~"\\u0100\\uffff");
3428 assert_eq!("\U00010000\U0010ffff".escape_default(), ~"\\U00010000\\U0010ffff");
3429 assert_eq!("ab\ufb00".escape_default(), ~"ab\\ufb00");
3430 assert_eq!("\U0001d4ea\r".escape_default(), ~"\\U0001d4ea\\r");
3434 fn test_to_managed() {
3435 assert_eq!("abc".to_managed(), @"abc");
3436 assert_eq!("abcdef".slice(1, 5).to_managed(), @"bcde");
3440 fn test_total_ord() {
3441 "1234".cmp(& &"123") == Greater;
3442 "123".cmp(& &"1234") == Less;
3443 "1234".cmp(& &"1234") == Equal;
3444 "12345555".cmp(& &"123456") == Less;
3445 "22".cmp(& &"1234") == Greater;
3449 fn test_char_range_at() {
3450 let data = ~"b¢€𤭢𤭢€¢b";
3451 assert_eq!('b', data.char_range_at(0).ch);
3452 assert_eq!('¢', data.char_range_at(1).ch);
3453 assert_eq!('€', data.char_range_at(3).ch);
3454 assert_eq!('𤭢', data.char_range_at(6).ch);
3455 assert_eq!('𤭢', data.char_range_at(10).ch);
3456 assert_eq!('€', data.char_range_at(14).ch);
3457 assert_eq!('¢', data.char_range_at(17).ch);
3458 assert_eq!('b', data.char_range_at(19).ch);
3462 fn test_char_range_at_reverse_underflow() {
3463 assert_eq!("abc".char_range_at_reverse(0).next, 0);
3468 #[allow(unnecessary_allocation)];
3470 ($s1:expr, $s2:expr, $e:expr) => {
3471 assert_eq!($s1 + $s2, $e);
3472 assert_eq!($s1.to_owned() + $s2, $e);
3473 assert_eq!($s1.to_managed() + $s2, $e);
3477 t!("foo", "bar", ~"foobar");
3478 t!("foo", @"bar", ~"foobar");
3479 t!("foo", ~"bar", ~"foobar");
3480 t!("ศไทย中", "华Việt Nam", ~"ศไทย中华Việt Nam");
3481 t!("ศไทย中", @"华Việt Nam", ~"ศไทย中华Việt Nam");
3482 t!("ศไทย中", ~"华Việt Nam", ~"ศไทย中华Việt Nam");
3486 fn test_iterator() {
3488 let s = ~"ศไทย中华Việt Nam";
3489 let v = ~['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
3492 let mut it = s.iter();
3495 assert_eq!(c, v[pos]);
3498 assert_eq!(pos, v.len());
3502 fn test_rev_iterator() {
3504 let s = ~"ศไทย中华Việt Nam";
3505 let v = ~['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
3508 let mut it = s.rev_iter();
3511 assert_eq!(c, v[pos]);
3514 assert_eq!(pos, v.len());
3518 fn test_byte_iterator() {
3519 let s = ~"ศไทย中华Việt Nam";
3521 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
3522 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
3527 for b in s.byte_iter() {
3528 assert_eq!(b, v[pos]);
3534 fn test_byte_rev_iterator() {
3535 let s = ~"ศไทย中华Việt Nam";
3537 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
3538 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
3541 let mut pos = v.len();
3543 for b in s.byte_rev_iter() {
3545 assert_eq!(b, v[pos]);
3550 fn test_char_offset_iterator() {
3552 let s = "ศไทย中华Việt Nam";
3553 let p = [0, 3, 6, 9, 12, 15, 18, 19, 20, 23, 24, 25, 26, 27];
3554 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
3557 let mut it = s.char_offset_iter();
3560 assert_eq!(c, (p[pos], v[pos]));
3563 assert_eq!(pos, v.len());
3564 assert_eq!(pos, p.len());
3568 fn test_char_offset_rev_iterator() {
3570 let s = "ศไทย中华Việt Nam";
3571 let p = [27, 26, 25, 24, 23, 20, 19, 18, 15, 12, 9, 6, 3, 0];
3572 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
3575 let mut it = s.char_offset_rev_iter();
3578 assert_eq!(c, (p[pos], v[pos]));
3581 assert_eq!(pos, v.len());
3582 assert_eq!(pos, p.len());
3586 fn test_split_char_iterator() {
3587 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
3589 let split: ~[&str] = data.split_iter(' ').collect();
3590 assert_eq!(split, ~["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
3592 let split: ~[&str] = data.split_iter(|c: char| c == ' ').collect();
3593 assert_eq!(split, ~["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
3596 let split: ~[&str] = data.split_iter('ä').collect();
3597 assert_eq!(split, ~["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
3599 let split: ~[&str] = data.split_iter(|c: char| c == 'ä').collect();
3600 assert_eq!(split, ~["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
3604 fn test_splitn_char_iterator() {
3605 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
3607 let split: ~[&str] = data.splitn_iter(' ', 3).collect();
3608 assert_eq!(split, ~["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
3610 let split: ~[&str] = data.splitn_iter(|c: char| c == ' ', 3).collect();
3611 assert_eq!(split, ~["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
3614 let split: ~[&str] = data.splitn_iter('ä', 3).collect();
3615 assert_eq!(split, ~["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
3617 let split: ~[&str] = data.splitn_iter(|c: char| c == 'ä', 3).collect();
3618 assert_eq!(split, ~["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
3622 fn test_split_char_iterator_no_trailing() {
3623 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
3625 let split: ~[&str] = data.split_options_iter('\n', 1000, true).collect();
3626 assert_eq!(split, ~["", "Märy häd ä little lämb", "Little lämb", ""]);
3628 let split: ~[&str] = data.split_options_iter('\n', 1000, false).collect();
3629 assert_eq!(split, ~["", "Märy häd ä little lämb", "Little lämb"]);
3633 fn test_word_iter() {
3634 let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n";
3635 let words: ~[&str] = data.word_iter().collect();
3636 assert_eq!(words, ~["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
3640 fn test_line_iter() {
3641 let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
3642 let lines: ~[&str] = data.line_iter().collect();
3643 assert_eq!(lines, ~["", "Märy häd ä little lämb", "", "Little lämb"]);
3645 let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
3646 let lines: ~[&str] = data.line_iter().collect();
3647 assert_eq!(lines, ~["", "Märy häd ä little lämb", "", "Little lämb"]);
3651 fn test_split_str_iterator() {
3652 fn t<'a>(s: &str, sep: &'a str, u: ~[&str]) {
3653 let v: ~[&str] = s.split_str_iter(sep).collect();
3656 t("--1233345--", "12345", ~["--1233345--"]);
3657 t("abc::hello::there", "::", ~["abc", "hello", "there"]);
3658 t("::hello::there", "::", ~["", "hello", "there"]);
3659 t("hello::there::", "::", ~["hello", "there", ""]);
3660 t("::hello::there::", "::", ~["", "hello", "there", ""]);
3661 t("ประเทศไทย中华Việt Nam", "中华", ~["ประเทศไทย", "Việt Nam"]);
3662 t("zzXXXzzYYYzz", "zz", ~["", "XXX", "YYY", ""]);
3663 t("zzXXXzYYYz", "XXX", ~["zz", "zYYYz"]);
3664 t(".XXX.YYY.", ".", ~["", "XXX", "YYY", ""]);
3666 t("zz", "zz", ~["",""]);
3667 t("ok", "z", ~["ok"]);
3668 t("zzz", "zz", ~["","z"]);
3669 t("zzzzz", "zz", ~["","","z"]);
3673 fn test_str_zero() {
3675 fn t<S: Zero + Str>() {
3676 let s: S = Zero::zero();
3677 assert_eq!(s.as_slice(), "");
3678 assert!(s.is_zero());
3687 fn test_str_container() {
3688 fn sum_len<S: Container>(v: &[S]) -> uint {
3689 v.iter().transform(|x| x.len()).sum()
3693 assert_eq!(5, sum_len(["012", "", "34"]));
3694 assert_eq!(5, sum_len([@"01", @"2", @"34", @""]));
3695 assert_eq!(5, sum_len([~"01", ~"2", ~"34", ~""]));
3696 assert_eq!(5, sum_len([s.as_slice()]));
3702 use extra::test::BenchHarness;
3706 fn is_utf8_100_ascii(bh: &mut BenchHarness) {
3708 let s = bytes!("Hello there, the quick brown fox jumped over the lazy dog! \
3709 Lorem ipsum dolor sit amet, consectetur. ");
3711 assert_eq!(100, s.len());
3718 fn is_utf8_100_multibyte(bh: &mut BenchHarness) {
3719 let s = bytes!("𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰");
3720 assert_eq!(100, s.len());
3727 fn map_chars_100_ascii(bh: &mut BenchHarness) {
3728 let s = "HelloHelloHelloHelloHelloHelloHelloHelloHelloHello\
3729 HelloHelloHelloHelloHelloHelloHelloHelloHelloHello";
3731 s.map_chars(|c| ((c as uint) + 1) as char);
3736 fn map_chars_100_multibytes(bh: &mut BenchHarness) {
3737 let s = "𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑\
3738 𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑\
3739 𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑\
3740 𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑";
3742 s.map_chars(|c| ((c as uint) + 1) as char);