1 // Copyright 2012-2013 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
14 * Strings are a packed UTF-8 representation of text, stored as null
15 * terminated buffers of u8 bytes. Strings should be indexed in bytes,
16 * for efficiency, but UTF-8 unsafe operations should be avoided.
24 use container::{Container, Mutable};
26 use iterator::{Iterator, FromIterator, Extendable, IteratorUtil};
27 use iterator::{Filter, AdditiveIterator, Map};
28 use iterator::{Invert, DoubleEndedIterator, DoubleEndedIteratorUtil};
31 use option::{None, Option, Some};
37 use unstable::raw::Repr;
39 use vec::{OwnedVector, OwnedCopyableVector, ImmutableVector, MutableVector};
46 not_utf8: (~str) -> ~str;
50 Section: Creating a string
53 /// Convert a vector of bytes to a new UTF-8 string
57 /// Raises the `not_utf8` condition if invalid UTF-8
58 pub fn from_bytes(vv: &[u8]) -> ~str {
59 use str::not_utf8::cond;
62 let first_bad_byte = *vv.iter().find_(|&b| !is_utf8([*b])).get();
63 cond.raise(fmt!("from_bytes: input is not UTF-8; first bad byte is %u",
64 first_bad_byte as uint))
66 return unsafe { raw::from_bytes(vv) }
70 /// Consumes a vector of bytes to create a new utf-8 string
74 /// Raises the `not_utf8` condition if invalid UTF-8
75 pub fn from_bytes_owned(vv: ~[u8]) -> ~str {
76 use str::not_utf8::cond;
79 let first_bad_byte = *vv.iter().find_(|&b| !is_utf8([*b])).get();
80 cond.raise(fmt!("from_bytes: input is not UTF-8; first bad byte is %u",
81 first_bad_byte as uint))
83 return unsafe { raw::from_bytes_owned(vv) }
87 /// Converts a vector to a string slice without performing any allocations.
89 /// Once the slice has been validated as utf-8, it is transmuted in-place and
90 /// returned as a '&str' instead of a '&[u8]'
94 /// Fails if invalid UTF-8
96 pub fn from_bytes_slice<'a>(vector: &'a [u8]) -> &'a str {
98 assert!(is_utf8(vector));
99 let mut s = vector.repr();
105 /// Converts a vector to a string slice without performing any allocations.
107 /// Once the slice has been validated as utf-8, it is transmuted in-place and
108 /// returned as a '&str' instead of a '&[u8]'
112 /// Fails if invalid UTF-8
114 pub fn from_bytes_slice<'a>(v: &'a [u8]) -> &'a str {
116 unsafe { cast::transmute(v) }
119 impl ToStr for ~str {
121 fn to_str(&self) -> ~str { self.to_owned() }
123 impl<'self> ToStr for &'self str {
125 fn to_str(&self) -> ~str { self.to_owned() }
127 impl ToStr for @str {
129 fn to_str(&self) -> ~str { self.to_owned() }
132 /// Convert a byte to a UTF-8 string
136 /// Fails if invalid UTF-8
138 pub fn from_byte(b: u8) -> ~str {
140 unsafe { cast::transmute(~[b, 0u8]) }
143 /// Convert a byte to a UTF-8 string
147 /// Fails if invalid UTF-8
149 pub fn from_byte(b: u8) -> ~str {
151 unsafe { ::cast::transmute(~[b]) }
154 /// Convert a char to a string
155 pub fn from_char(ch: char) -> ~str {
161 /// Convert a vector of chars to a string
162 pub fn from_chars(chs: &[char]) -> ~str {
164 buf.reserve(chs.len());
165 foreach ch in chs.iter() {
172 pub fn push_str(lhs: &mut ~str, rhs: &str) {
176 #[allow(missing_doc)]
177 pub trait StrVector {
178 pub fn concat(&self) -> ~str;
179 pub fn connect(&self, sep: &str) -> ~str;
182 impl<'self, S: Str> StrVector for &'self [S] {
183 /// Concatenate a vector of strings.
185 pub fn concat(&self) -> ~str {
186 if self.is_empty() { return ~""; }
188 let len = self.iter().transform(|s| s.as_slice().len()).sum();
190 let mut s = with_capacity(len);
193 do s.as_mut_buf |buf, _| {
195 foreach ss in self.iter() {
196 do ss.as_slice().as_imm_buf |ssbuf, sslen| {
197 let sslen = sslen - 1;
198 ptr::copy_memory(buf, ssbuf, sslen);
199 buf = buf.offset(sslen as int);
203 raw::set_len(&mut s, len);
208 /// Concatenate a vector of strings.
210 pub fn concat(&self) -> ~str {
211 if self.is_empty() { return ~""; }
213 let len = self.iter().transform(|s| s.as_slice().len()).sum();
215 let mut s = with_capacity(len);
218 do s.as_mut_buf |buf, _| {
220 foreach ss in self.iter() {
221 do ss.as_slice().as_imm_buf |ssbuf, sslen| {
222 ptr::copy_memory(buf, ssbuf, sslen);
223 buf = buf.offset(sslen as int);
227 raw::set_len(&mut s, len);
232 /// Concatenate a vector of strings, placing a given separator between each.
234 pub fn connect(&self, sep: &str) -> ~str {
235 if self.is_empty() { return ~""; }
238 if sep.is_empty() { return self.concat(); }
240 // this is wrong without the guarantee that `self` is non-empty
241 let len = sep.len() * (self.len() - 1)
242 + self.iter().transform(|s| s.as_slice().len()).sum();
244 let mut first = true;
249 do s.as_mut_buf |buf, _| {
250 do sep.as_imm_buf |sepbuf, seplen| {
251 let seplen = seplen - 1;
252 let mut buf = cast::transmute_mut_unsafe(buf);
253 foreach ss in self.iter() {
254 do ss.as_slice().as_imm_buf |ssbuf, sslen| {
255 let sslen = sslen - 1;
259 ptr::copy_memory(buf, sepbuf, seplen);
260 buf = buf.offset(seplen as int);
262 ptr::copy_memory(buf, ssbuf, sslen);
263 buf = buf.offset(sslen as int);
268 raw::set_len(&mut s, len);
273 /// Concatenate a vector of strings, placing a given separator between each.
275 pub fn connect(&self, sep: &str) -> ~str {
276 if self.is_empty() { return ~""; }
279 if sep.is_empty() { return self.concat(); }
281 // this is wrong without the guarantee that `self` is non-empty
282 let len = sep.len() * (self.len() - 1)
283 + self.iter().transform(|s| s.as_slice().len()).sum();
285 let mut first = true;
290 do s.as_mut_buf |buf, _| {
291 do sep.as_imm_buf |sepbuf, seplen| {
293 foreach ss in self.iter() {
294 do ss.as_slice().as_imm_buf |ssbuf, sslen| {
298 ptr::copy_memory(buf, sepbuf, seplen);
299 buf = buf.offset(seplen as int);
301 ptr::copy_memory(buf, ssbuf, sslen);
302 buf = buf.offset(sslen as int);
307 raw::set_len(&mut s, len);
313 /// Something that can be used to compare against a character
315 /// Determine if the splitter should split at the given character
316 fn matches(&self, char) -> bool;
317 /// Indicate if this is only concerned about ASCII characters,
318 /// which can allow for a faster implementation.
319 fn only_ascii(&self) -> bool;
322 impl CharEq for char {
324 fn matches(&self, c: char) -> bool { *self == c }
326 fn only_ascii(&self) -> bool { (*self as uint) < 128 }
329 impl<'self> CharEq for &'self fn(char) -> bool {
331 fn matches(&self, c: char) -> bool { (*self)(c) }
333 fn only_ascii(&self) -> bool { false }
336 impl CharEq for extern "Rust" fn(char) -> bool {
338 fn matches(&self, c: char) -> bool { (*self)(c) }
340 fn only_ascii(&self) -> bool { false }
343 impl<'self, C: CharEq> CharEq for &'self [C] {
345 fn matches(&self, c: char) -> bool {
346 self.iter().any(|m| m.matches(c))
349 fn only_ascii(&self) -> bool {
350 self.iter().all(|m| m.only_ascii())
358 /// External iterator for a string's characters and their byte offsets.
359 /// Use with the `std::iterator` module.
361 pub struct CharOffsetIterator<'self> {
362 priv index_front: uint,
363 priv index_back: uint,
364 priv string: &'self str,
367 impl<'self> Iterator<(uint, char)> for CharOffsetIterator<'self> {
369 fn next(&mut self) -> Option<(uint, char)> {
370 if self.index_front < self.index_back {
371 let CharRange {ch, next} = self.string.char_range_at(self.index_front);
372 let index = self.index_front;
373 self.index_front = next;
381 impl<'self> DoubleEndedIterator<(uint, char)> for CharOffsetIterator<'self> {
383 fn next_back(&mut self) -> Option<(uint, char)> {
384 if self.index_front < self.index_back {
385 let CharRange {ch, next} = self.string.char_range_at_reverse(self.index_back);
386 self.index_back = next;
394 /// External iterator for a string's characters and their byte offsets in reverse order.
395 /// Use with the `std::iterator` module.
396 pub type CharOffsetRevIterator<'self> =
397 Invert<CharOffsetIterator<'self>>;
399 /// External iterator for a string's characters.
400 /// Use with the `std::iterator` module.
401 pub type CharIterator<'self> =
402 Map<'self, (uint, char), char, CharOffsetIterator<'self>>;
404 /// External iterator for a string's characters in reverse order.
405 /// Use with the `std::iterator` module.
406 pub type CharRevIterator<'self> =
407 Invert<Map<'self, (uint, char), char, CharOffsetIterator<'self>>>;
409 /// External iterator for a string's bytes.
410 /// Use with the `std::iterator` module.
411 pub type ByteIterator<'self> =
412 Map<'self, &'self u8, u8, vec::VecIterator<'self, u8>>;
414 /// External iterator for a string's bytes in reverse order.
415 /// Use with the `std::iterator` module.
416 pub type ByteRevIterator<'self> =
417 Invert<Map<'self, &'self u8, u8, vec::VecIterator<'self, u8>>>;
419 /// An iterator over the substrings of a string, separated by `sep`.
421 pub struct CharSplitIterator<'self,Sep> {
422 priv string: &'self str,
425 /// The number of splits remaining
427 /// Whether an empty string at the end is allowed
428 priv allow_trailing_empty: bool,
430 priv only_ascii: bool
433 /// An iterator over the words of a string, separated by an sequence of whitespace
434 pub type WordIterator<'self> =
435 Filter<'self, &'self str, CharSplitIterator<'self, extern "Rust" fn(char) -> bool>>;
437 /// An iterator over the lines of a string, separated by either `\n` or (`\r\n`).
438 pub type AnyLineIterator<'self> =
439 Map<'self, &'self str, &'self str, CharSplitIterator<'self, char>>;
441 impl<'self, Sep: CharEq> Iterator<&'self str> for CharSplitIterator<'self, Sep> {
443 fn next(&mut self) -> Option<&'self str> {
444 if self.finished { return None }
446 let l = self.string.len();
447 let start = self.position;
450 // this gives a *huge* speed up for splitting on ASCII
451 // characters (e.g. '\n' or ' ')
452 while self.position < l && self.count > 0 {
453 let byte = self.string[self.position];
455 if self.sep.matches(byte as char) {
456 let slice = unsafe { raw::slice_bytes(self.string, start, self.position) };
464 while self.position < l && self.count > 0 {
465 let CharRange {ch, next} = self.string.char_range_at(self.position);
467 if self.sep.matches(ch) {
468 let slice = unsafe { raw::slice_bytes(self.string, start, self.position) };
469 self.position = next;
473 self.position = next;
476 self.finished = true;
477 if self.allow_trailing_empty || start < l {
478 Some(unsafe { raw::slice_bytes(self.string, start, l) })
485 /// An iterator over the start and end indicies of the matches of a
486 /// substring within a larger string
488 pub struct MatchesIndexIterator<'self> {
489 priv haystack: &'self str,
490 priv needle: &'self str,
494 /// An iterator over the substrings of a string separated by a given
497 pub struct StrSplitIterator<'self> {
498 priv it: MatchesIndexIterator<'self>,
503 impl<'self> Iterator<(uint, uint)> for MatchesIndexIterator<'self> {
505 fn next(&mut self) -> Option<(uint, uint)> {
506 // See Issue #1932 for why this is a naive search
507 let (h_len, n_len) = (self.haystack.len(), self.needle.len());
508 let mut match_start = 0;
511 while self.position < h_len {
512 if self.haystack[self.position] == self.needle[match_i] {
513 if match_i == 0 { match_start = self.position; }
517 if match_i == n_len {
519 return Some((match_start, self.position));
522 // failed match, backtrack
525 self.position = match_start;
534 impl<'self> Iterator<&'self str> for StrSplitIterator<'self> {
536 fn next(&mut self) -> Option<&'self str> {
537 if self.finished { return None; }
539 match self.it.next() {
540 Some((from, to)) => {
541 let ret = Some(self.it.haystack.slice(self.last_end, from));
546 self.finished = true;
547 Some(self.it.haystack.slice(self.last_end, self.it.haystack.len()))
553 /// Replace all occurrences of one string with another
557 /// * s - The string containing substrings to replace
558 /// * from - The string to replace
559 /// * to - The replacement string
563 /// The original string with all occurances of `from` replaced with `to`
564 pub fn replace(s: &str, from: &str, to: &str) -> ~str {
565 let mut result = ~"";
566 let mut last_end = 0;
567 foreach (start, end) in s.matches_index_iter(from) {
568 result.push_str(unsafe{raw::slice_bytes(s, last_end, start)});
572 result.push_str(unsafe{raw::slice_bytes(s, last_end, s.len())});
577 Section: Comparing strings
580 /// Bytewise slice equality
581 #[cfg(not(test), stage0)]
584 pub fn eq_slice(a: &str, b: &str) -> bool {
585 do a.as_imm_buf |ap, alen| {
586 do b.as_imm_buf |bp, blen| {
587 if (alen != blen) { false }
590 libc::memcmp(ap as *libc::c_void,
592 (alen - 1) as libc::size_t) == 0
599 /// Bytewise slice equality
600 #[cfg(not(test), not(stage0))]
603 pub fn eq_slice(a: &str, b: &str) -> bool {
604 do a.as_imm_buf |ap, alen| {
605 do b.as_imm_buf |bp, blen| {
606 if (alen != blen) { false }
609 libc::memcmp(ap as *libc::c_void,
611 alen as libc::size_t) == 0
618 /// Bytewise slice equality
622 pub fn eq_slice(a: &str, b: &str) -> bool {
623 do a.as_imm_buf |ap, alen| {
624 do b.as_imm_buf |bp, blen| {
625 if (alen != blen) { false }
628 libc::memcmp(ap as *libc::c_void,
630 (alen - 1) as libc::size_t) == 0
637 /// Bytewise slice equality
638 #[cfg(test, not(stage0))]
640 pub fn eq_slice(a: &str, b: &str) -> bool {
641 do a.as_imm_buf |ap, alen| {
642 do b.as_imm_buf |bp, blen| {
643 if (alen != blen) { false }
646 libc::memcmp(ap as *libc::c_void,
648 alen as libc::size_t) == 0
655 /// Bytewise string equality
657 #[lang="uniq_str_eq"]
659 pub fn eq(a: &~str, b: &~str) -> bool {
665 pub fn eq(a: &~str, b: &~str) -> bool {
673 // Utility used by various searching functions
674 fn match_at<'a,'b>(haystack: &'a str, needle: &'b str, at: uint) -> bool {
676 foreach c in needle.byte_iter() { if haystack[i] != c { return false; } i += 1u; }
684 // Return the initial codepoint accumulator for the first byte.
685 // The first byte is special, only want bottom 5 bits for width 2, 4 bits
686 // for width 3, and 3 bits for width 4
687 macro_rules! utf8_first_byte(
688 ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as uint)
691 // return the value of $ch updated with continuation byte $byte
692 macro_rules! utf8_acc_cont_byte(
693 ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as uint)
696 /// Determines if a vector of bytes contains valid UTF-8
697 pub fn is_utf8(v: &[u8]) -> bool {
704 let w = utf8_char_width(v[i]);
705 if w == 0u { return false; }
708 if nexti > total { return false; }
709 // 1. Make sure the correct number of continuation bytes are present
710 // 2. Check codepoint ranges (deny overlong encodings)
711 // 2-byte encoding is for codepoints \u0080 to \u07ff
712 // 3-byte encoding is for codepoints \u0800 to \uffff
713 // 4-byte encoding is for codepoints \u10000 to \u10ffff
715 // 2-byte encodings are correct if the width and continuation match up
716 if v[i + 1] & 192u8 != TAG_CONT_U8 { return false; }
719 ch = utf8_first_byte!(v[i], w);
720 ch = utf8_acc_cont_byte!(ch, v[i + 1]);
721 if v[i + 2] & 192u8 != TAG_CONT_U8 { return false; }
722 ch = utf8_acc_cont_byte!(ch, v[i + 2]);
723 if w == 3 && ch < MAX_TWO_B { return false; }
725 if v[i + 3] & 192u8 != TAG_CONT_U8 { return false; }
726 ch = utf8_acc_cont_byte!(ch, v[i + 3]);
727 if ch < MAX_THREE_B || ch >= MAX_UNICODE { return false; }
737 /// Determines if a vector of `u16` contains valid UTF-16
738 pub fn is_utf16(v: &[u16]) -> bool {
744 if u <= 0xD7FF_u16 || u >= 0xE000_u16 {
748 if i+1u < len { return false; }
750 if u < 0xD7FF_u16 || u > 0xDBFF_u16 { return false; }
751 if u2 < 0xDC00_u16 || u2 > 0xDFFF_u16 { return false; }
758 /// Iterates over the utf-16 characters in the specified slice, yielding each
759 /// decoded unicode character to the function provided.
763 /// * Fails on invalid utf-16 data
764 pub fn utf16_chars(v: &[u16], f: &fn(char)) {
767 while (i < len && v[i] != 0u16) {
770 if u <= 0xD7FF_u16 || u >= 0xE000_u16 {
776 assert!(u >= 0xD800_u16 && u <= 0xDBFF_u16);
777 assert!(u2 >= 0xDC00_u16 && u2 <= 0xDFFF_u16);
778 let mut c = (u - 0xD800_u16) as char;
780 c |= (u2 - 0xDC00_u16) as char;
781 c |= 0x1_0000_u32 as char;
788 /// Allocates a new string from the utf-16 slice provided
789 pub fn from_utf16(v: &[u16]) -> ~str {
791 buf.reserve(v.len());
792 utf16_chars(v, |ch| buf.push_char(ch));
796 /// Allocates a new string with the specified capacity. The string returned is
797 /// the empty string, but has capacity for much more.
799 pub fn with_capacity(capacity: uint) -> ~str {
801 buf.reserve(capacity);
805 /// As char_len but for a slice of a string
809 /// * s - A valid string
810 /// * start - The position inside `s` where to start counting in bytes
811 /// * end - The position where to stop counting
815 /// The number of Unicode characters in `s` between the given indices.
816 pub fn count_chars(s: &str, start: uint, end: uint) -> uint {
817 assert!(s.is_char_boundary(start));
818 assert!(s.is_char_boundary(end));
822 let next = s.char_range_at(i).next;
829 /// Counts the number of bytes taken by the first `n` chars in `s`
830 /// starting from `start`.
831 pub fn count_bytes<'b>(s: &'b str, start: uint, n: uint) -> uint {
832 assert!(s.is_char_boundary(start));
838 let next = s.char_range_at(end).next;
845 // https://tools.ietf.org/html/rfc3629
846 priv static UTF8_CHAR_WIDTH: [u8, ..256] = [
847 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
848 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x1F
849 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
850 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x3F
851 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
852 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x5F
853 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
854 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x7F
855 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
856 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x9F
857 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
858 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xBF
859 0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
860 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xDF
861 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, // 0xEF
862 4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // 0xFF
865 /// Given a first byte, determine how many bytes are in this UTF-8 character
866 pub fn utf8_char_width(b: u8) -> uint {
867 return UTF8_CHAR_WIDTH[b] as uint;
870 #[allow(missing_doc)]
871 pub struct CharRange {
876 // UTF-8 tags and ranges
877 priv static TAG_CONT_U8: u8 = 128u8;
878 priv static TAG_CONT: uint = 128u;
879 priv static MAX_ONE_B: uint = 128u;
880 priv static TAG_TWO_B: uint = 192u;
881 priv static MAX_TWO_B: uint = 2048u;
882 priv static TAG_THREE_B: uint = 224u;
883 priv static MAX_THREE_B: uint = 65536u;
884 priv static TAG_FOUR_B: uint = 240u;
885 priv static MAX_UNICODE: uint = 1114112u;
887 /// Unsafe operations
895 use vec::MutableVector;
896 use unstable::raw::Slice;
898 use unstable::raw::String;
900 /// Create a Rust string from a *u8 buffer of the given length
902 pub unsafe fn from_buf_len(buf: *u8, len: uint) -> ~str {
903 let mut v: ~[u8] = vec::with_capacity(len + 1);
904 v.as_mut_buf(|vbuf, _len| {
905 ptr::copy_memory(vbuf, buf as *u8, len)
907 vec::raw::set_len(&mut v, len);
914 /// Create a Rust string from a *u8 buffer of the given length
916 pub unsafe fn from_buf_len(buf: *u8, len: uint) -> ~str {
917 let mut v: ~[u8] = vec::with_capacity(len);
918 do v.as_mut_buf |vbuf, _len| {
919 ptr::copy_memory(vbuf, buf as *u8, len)
921 vec::raw::set_len(&mut v, len);
927 /// Create a Rust string from a null-terminated C string
928 pub unsafe fn from_c_str(buf: *libc::c_char) -> ~str {
933 curr = ptr::offset(buf, i);
935 from_buf_len(buf as *u8, i as uint)
938 /// Converts a vector of bytes to a new owned string.
939 pub unsafe fn from_bytes(v: &[u8]) -> ~str {
940 do v.as_imm_buf |buf, len| {
941 from_buf_len(buf, len)
945 /// Converts an owned vector of bytes to a new owned string. This assumes
946 /// that the utf-8-ness of the vector has already been validated
948 pub unsafe fn from_bytes_owned(mut v: ~[u8]) -> ~str {
953 /// Converts an owned vector of bytes to a new owned string. This assumes
954 /// that the utf-8-ness of the vector has already been validated
957 pub unsafe fn from_bytes_owned(v: ~[u8]) -> ~str {
961 /// Converts a byte to a string.
962 pub unsafe fn from_byte(u: u8) -> ~str { from_bytes([u]) }
964 /// Form a slice from a C string. Unsafe because the caller must ensure the
965 /// C string has the static lifetime, or else the return value may be
966 /// invalidated later.
968 pub unsafe fn c_str_to_static_slice(s: *libc::c_char) -> &'static str {
974 curr = ptr::offset(s, len as int);
976 let v = Slice { data: s, len: len + 1 };
977 assert!(is_utf8(cast::transmute(v)));
981 /// Form a slice from a C string. Unsafe because the caller must ensure the
982 /// C string has the static lifetime, or else the return value may be
983 /// invalidated later.
985 pub unsafe fn c_str_to_static_slice(s: *libc::c_char) -> &'static str {
991 curr = ptr::offset(s, len as int);
993 let v = Slice { data: s, len: len };
994 assert!(is_utf8(::cast::transmute(v)));
998 /// Takes a bytewise (not UTF-8) slice from a string.
1000 /// Returns the substring from [`begin`..`end`).
1004 /// If begin is greater than end.
1005 /// If end is greater than the length of the string.
1008 pub unsafe fn slice_bytes(s: &str, begin: uint, end: uint) -> &str {
1009 do s.as_imm_buf |sbuf, n| {
1010 assert!((begin <= end));
1011 assert!((end <= n));
1013 cast::transmute(Slice {
1014 data: ptr::offset(sbuf, begin as int),
1015 len: end - begin + 1,
1020 /// Takes a bytewise (not UTF-8) slice from a string.
1022 /// Returns the substring from [`begin`..`end`).
1026 /// If begin is greater than end.
1027 /// If end is greater than the length of the string.
1030 pub unsafe fn slice_bytes(s: &str, begin: uint, end: uint) -> &str {
1031 do s.as_imm_buf |sbuf, n| {
1032 assert!((begin <= end));
1033 assert!((end <= n));
1035 cast::transmute(Slice {
1036 data: ptr::offset(sbuf, begin as int),
1042 /// Appends a byte to a string. (Not UTF-8 safe).
1043 pub unsafe fn push_byte(s: &mut ~str, b: u8) {
1044 let new_len = s.len() + 1;
1045 s.reserve_at_least(new_len);
1046 do s.as_mut_buf |buf, len| {
1047 *ptr::mut_offset(buf, len as int) = b;
1049 set_len(&mut *s, new_len);
1052 /// Appends a vector of bytes to a string. (Not UTF-8 safe).
1053 unsafe fn push_bytes(s: &mut ~str, bytes: &[u8]) {
1054 let new_len = s.len() + bytes.len();
1055 s.reserve_at_least(new_len);
1056 foreach byte in bytes.iter() { push_byte(&mut *s, *byte); }
1059 /// Removes the last byte from a string and returns it. (Not UTF-8 safe).
1060 pub unsafe fn pop_byte(s: &mut ~str) -> u8 {
1062 assert!((len > 0u));
1063 let b = s[len - 1u];
1064 set_len(s, len - 1u);
1068 /// Removes the first byte from a string and returns it. (Not UTF-8 safe).
1069 pub unsafe fn shift_byte(s: &mut ~str) -> u8 {
1071 assert!((len > 0u));
1073 *s = s.slice(1, len).to_owned();
1077 /// Sets the length of the string and adds the null terminator
1080 pub unsafe fn set_len(v: &mut ~str, new_len: uint) {
1081 let v: **mut String = cast::transmute(v);
1083 (*repr).fill = new_len + 1u;
1084 let null = ptr::mut_offset(&mut ((*repr).data), new_len as int);
1088 /// Sets the length of a string
1090 /// This will explicitly set the size of the string, without actually
1091 /// modifing its buffers, so it is up to the caller to ensure that
1092 /// the string is actually the specified size.
1095 pub unsafe fn set_len(s: &mut ~str, new_len: uint) {
1096 let v: &mut ~[u8] = cast::transmute(s);
1097 vec::raw::set_len(v, new_len)
1100 /// Sets the length of a string
1102 /// This will explicitly set the size of the string, without actually
1103 /// modifing its buffers, so it is up to the caller to ensure that
1104 /// the string is actually the specified size.
1106 fn test_from_buf_len() {
1108 let a = ~[65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8];
1109 let b = vec::raw::to_ptr(a);
1110 let c = from_buf_len(b, 3u);
1111 assert_eq!(c, ~"AAA");
1118 Section: Trait implementations
1124 use cmp::{TotalOrd, Ordering, Less, Equal, Greater, Eq, Ord, Equiv, TotalEq};
1125 use super::{Str, eq_slice};
1126 use option::{Some, None};
1128 impl<'self> Add<&'self str,~str> for &'self str {
1130 fn add(&self, rhs: & &'self str) -> ~str {
1131 let mut ret = self.to_owned();
1137 impl<'self> TotalOrd for &'self str {
1139 fn cmp(&self, other: & &'self str) -> Ordering {
1140 foreach (s_b, o_b) in self.byte_iter().zip(other.byte_iter()) {
1141 match s_b.cmp(&o_b) {
1142 Greater => return Greater,
1143 Less => return Less,
1148 self.len().cmp(&other.len())
1152 impl TotalOrd for ~str {
1154 fn cmp(&self, other: &~str) -> Ordering { self.as_slice().cmp(&other.as_slice()) }
1157 impl TotalOrd for @str {
1159 fn cmp(&self, other: &@str) -> Ordering { self.as_slice().cmp(&other.as_slice()) }
1162 impl<'self> Eq for &'self str {
1164 fn eq(&self, other: & &'self str) -> bool {
1165 eq_slice((*self), (*other))
1168 fn ne(&self, other: & &'self str) -> bool { !(*self).eq(other) }
1173 fn eq(&self, other: &~str) -> bool {
1174 eq_slice((*self), (*other))
1177 fn ne(&self, other: &~str) -> bool { !(*self).eq(other) }
1182 fn eq(&self, other: &@str) -> bool {
1183 eq_slice((*self), (*other))
1186 fn ne(&self, other: &@str) -> bool { !(*self).eq(other) }
1189 impl<'self> TotalEq for &'self str {
1191 fn equals(&self, other: & &'self str) -> bool {
1192 eq_slice((*self), (*other))
1196 impl TotalEq for ~str {
1198 fn equals(&self, other: &~str) -> bool {
1199 eq_slice((*self), (*other))
1203 impl TotalEq for @str {
1205 fn equals(&self, other: &@str) -> bool {
1206 eq_slice((*self), (*other))
1210 impl<'self> Ord for &'self str {
1212 fn lt(&self, other: & &'self str) -> bool { self.cmp(other) == Less }
1214 fn le(&self, other: & &'self str) -> bool { self.cmp(other) != Greater }
1216 fn ge(&self, other: & &'self str) -> bool { self.cmp(other) != Less }
1218 fn gt(&self, other: & &'self str) -> bool { self.cmp(other) == Greater }
1223 fn lt(&self, other: &~str) -> bool { self.cmp(other) == Less }
1225 fn le(&self, other: &~str) -> bool { self.cmp(other) != Greater }
1227 fn ge(&self, other: &~str) -> bool { self.cmp(other) != Less }
1229 fn gt(&self, other: &~str) -> bool { self.cmp(other) == Greater }
1234 fn lt(&self, other: &@str) -> bool { self.cmp(other) == Less }
1236 fn le(&self, other: &@str) -> bool { self.cmp(other) != Greater }
1238 fn ge(&self, other: &@str) -> bool { self.cmp(other) != Less }
1240 fn gt(&self, other: &@str) -> bool { self.cmp(other) == Greater }
1243 impl<'self, S: Str> Equiv<S> for &'self str {
1245 fn equiv(&self, other: &S) -> bool { eq_slice(*self, other.as_slice()) }
1248 impl<'self, S: Str> Equiv<S> for @str {
1250 fn equiv(&self, other: &S) -> bool { eq_slice(*self, other.as_slice()) }
1253 impl<'self, S: Str> Equiv<S> for ~str {
1255 fn equiv(&self, other: &S) -> bool { eq_slice(*self, other.as_slice()) }
1262 /// Any string that can be represented as a slice
1264 /// Work with `self` as a slice.
1265 fn as_slice<'a>(&'a self) -> &'a str;
1267 /// Convert `self` into a ~str.
1268 fn into_owned(self) -> ~str;
1271 impl<'self> Str for &'self str {
1273 fn as_slice<'a>(&'a self) -> &'a str { *self }
1276 fn into_owned(self) -> ~str { self.to_owned() }
1279 impl<'self> Str for ~str {
1281 fn as_slice<'a>(&'a self) -> &'a str {
1282 let s: &'a str = *self; s
1286 fn into_owned(self) -> ~str { self }
1289 impl<'self> Str for @str {
1291 fn as_slice<'a>(&'a self) -> &'a str {
1292 let s: &'a str = *self; s
1296 fn into_owned(self) -> ~str { self.to_owned() }
1299 impl<'self> Container for &'self str {
1302 fn len(&self) -> uint {
1303 do self.as_imm_buf |_p, n| { n - 1u }
1308 fn len(&self) -> uint {
1309 do self.as_imm_buf |_p, n| { n }
1313 impl Container for ~str {
1315 fn len(&self) -> uint { self.as_slice().len() }
1318 impl Container for @str {
1320 fn len(&self) -> uint { self.as_slice().len() }
1323 impl Mutable for ~str {
1324 /// Remove all content, make the string empty
1326 fn clear(&mut self) {
1328 raw::set_len(self, 0)
1333 #[allow(missing_doc)]
1334 pub trait StrSlice<'self> {
1335 fn contains<'a>(&self, needle: &'a str) -> bool;
1336 fn contains_char(&self, needle: char) -> bool;
1337 fn iter(&self) -> CharIterator<'self>;
1338 fn rev_iter(&self) -> CharRevIterator<'self>;
1339 fn byte_iter(&self) -> ByteIterator<'self>;
1340 fn byte_rev_iter(&self) -> ByteRevIterator<'self>;
1341 fn char_offset_iter(&self) -> CharOffsetIterator<'self>;
1342 fn char_offset_rev_iter(&self) -> CharOffsetRevIterator<'self>;
1343 fn split_iter<Sep: CharEq>(&self, sep: Sep) -> CharSplitIterator<'self, Sep>;
1344 fn splitn_iter<Sep: CharEq>(&self, sep: Sep, count: uint) -> CharSplitIterator<'self, Sep>;
1345 fn split_options_iter<Sep: CharEq>(&self, sep: Sep, count: uint, allow_trailing_empty: bool)
1346 -> CharSplitIterator<'self, Sep>;
1347 fn matches_index_iter(&self, sep: &'self str) -> MatchesIndexIterator<'self>;
1348 fn split_str_iter(&self, &'self str) -> StrSplitIterator<'self>;
1349 fn line_iter(&self) -> CharSplitIterator<'self, char>;
1350 fn any_line_iter(&self) -> AnyLineIterator<'self>;
1351 fn word_iter(&self) -> WordIterator<'self>;
1352 fn ends_with(&self, needle: &str) -> bool;
1353 fn is_whitespace(&self) -> bool;
1354 fn is_alphanumeric(&self) -> bool;
1355 fn char_len(&self) -> uint;
1357 fn slice(&self, begin: uint, end: uint) -> &'self str;
1358 fn slice_from(&self, begin: uint) -> &'self str;
1359 fn slice_to(&self, end: uint) -> &'self str;
1361 fn slice_chars(&self, begin: uint, end: uint) -> &'self str;
1363 fn starts_with(&self, needle: &str) -> bool;
1364 fn escape_default(&self) -> ~str;
1365 fn escape_unicode(&self) -> ~str;
1366 fn trim(&self) -> &'self str;
1367 fn trim_left(&self) -> &'self str;
1368 fn trim_right(&self) -> &'self str;
1369 fn trim_chars<C: CharEq>(&self, to_trim: &C) -> &'self str;
1370 fn trim_left_chars<C: CharEq>(&self, to_trim: &C) -> &'self str;
1371 fn trim_right_chars<C: CharEq>(&self, to_trim: &C) -> &'self str;
1372 fn replace(&self, from: &str, to: &str) -> ~str;
1373 fn to_owned(&self) -> ~str;
1374 fn to_managed(&self) -> @str;
1375 fn to_utf16(&self) -> ~[u16];
1376 fn is_char_boundary(&self, index: uint) -> bool;
1377 fn char_range_at(&self, start: uint) -> CharRange;
1378 fn char_at(&self, i: uint) -> char;
1379 fn char_range_at_reverse(&self, start: uint) -> CharRange;
1380 fn char_at_reverse(&self, i: uint) -> char;
1381 fn as_bytes(&self) -> &'self [u8];
1383 fn find<C: CharEq>(&self, search: C) -> Option<uint>;
1384 fn rfind<C: CharEq>(&self, search: C) -> Option<uint>;
1385 fn find_str(&self, &str) -> Option<uint>;
1387 fn repeat(&self, nn: uint) -> ~str;
1389 fn slice_shift_char(&self) -> (char, &'self str);
1391 fn map_chars(&self, ff: &fn(char) -> char) -> ~str;
1393 fn lev_distance(&self, t: &str) -> uint;
1395 fn subslice_offset(&self, inner: &str) -> uint;
1397 fn as_imm_buf<T>(&self, f: &fn(*u8, uint) -> T) -> T;
1400 /// Extension methods for strings
1401 impl<'self> StrSlice<'self> for &'self str {
1402 /// Returns true if one string contains another
1406 /// * needle - The string to look for
1408 fn contains<'a>(&self, needle: &'a str) -> bool {
1409 self.find_str(needle).is_some()
1412 /// Returns true if a string contains a char.
1416 /// * needle - The char to look for
1418 fn contains_char(&self, needle: char) -> bool {
1419 self.find(needle).is_some()
1422 /// An iterator over the characters of `self`. Note, this iterates
1423 /// over unicode code-points, not unicode graphemes.
1428 /// let v: ~[char] = "abc åäö".iter().collect();
1429 /// assert_eq!(v, ~['a', 'b', 'c', ' ', 'å', 'ä', 'ö']);
1432 fn iter(&self) -> CharIterator<'self> {
1433 self.char_offset_iter().transform(|(_, c)| c)
1436 /// An iterator over the characters of `self`, in reverse order.
1438 fn rev_iter(&self) -> CharRevIterator<'self> {
1439 self.iter().invert()
1442 /// An iterator over the bytes of `self`
1444 fn byte_iter(&self) -> ByteIterator<'self> {
1445 self.as_bytes().iter().transform(|&b| b)
1448 /// An iterator over the bytes of `self`, in reverse order
1450 fn byte_rev_iter(&self) -> ByteRevIterator<'self> {
1451 self.byte_iter().invert()
1454 /// An iterator over the characters of `self` and their byte offsets.
1456 fn char_offset_iter(&self) -> CharOffsetIterator<'self> {
1457 CharOffsetIterator {
1459 index_back: self.len(),
1464 /// An iterator over the characters of `self` and their byte offsets.
1466 fn char_offset_rev_iter(&self) -> CharOffsetRevIterator<'self> {
1467 self.char_offset_iter().invert()
1470 /// An iterator over substrings of `self`, separated by characters
1471 /// matched by `sep`.
1476 /// let v: ~[&str] = "Mary had a little lamb".split_iter(' ').collect();
1477 /// assert_eq!(v, ~["Mary", "had", "a", "little", "lamb"]);
1479 /// let v: ~[&str] = "abc1def2ghi".split_iter(|c: char| c.is_digit()).collect();
1480 /// assert_eq!(v, ~["abc", "def", "ghi"]);
1483 fn split_iter<Sep: CharEq>(&self, sep: Sep) -> CharSplitIterator<'self, Sep> {
1484 self.split_options_iter(sep, self.len(), true)
1487 /// An iterator over substrings of `self`, separated by characters
1488 /// matched by `sep`, restricted to splitting at most `count`
1491 fn splitn_iter<Sep: CharEq>(&self, sep: Sep, count: uint) -> CharSplitIterator<'self, Sep> {
1492 self.split_options_iter(sep, count, true)
1495 /// An iterator over substrings of `self`, separated by characters
1496 /// matched by `sep`, splitting at most `count` times, and
1497 /// possibly not including the trailing empty substring, if it
1500 fn split_options_iter<Sep: CharEq>(&self, sep: Sep, count: uint, allow_trailing_empty: bool)
1501 -> CharSplitIterator<'self, Sep> {
1502 let only_ascii = sep.only_ascii();
1508 allow_trailing_empty: allow_trailing_empty,
1510 only_ascii: only_ascii
1514 /// An iterator over the start and end indices of each match of
1515 /// `sep` within `self`.
1517 fn matches_index_iter(&self, sep: &'self str) -> MatchesIndexIterator<'self> {
1518 assert!(!sep.is_empty())
1519 MatchesIndexIterator {
1526 /// An iterator over the substrings of `self` separated by `sep`.
1531 /// let v: ~[&str] = "abcXXXabcYYYabc".split_str_iter("abc").collect()
1532 /// assert_eq!(v, ["", "XXX", "YYY", ""]);
1535 fn split_str_iter(&self, sep: &'self str) -> StrSplitIterator<'self> {
1537 it: self.matches_index_iter(sep),
1543 /// An iterator over the lines of a string (subsequences separated
1546 fn line_iter(&self) -> CharSplitIterator<'self, char> {
1547 self.split_options_iter('\n', self.len(), false)
1550 /// An iterator over the lines of a string, separated by either
1551 /// `\n` or (`\r\n`).
1552 fn any_line_iter(&self) -> AnyLineIterator<'self> {
1553 do self.line_iter().transform |line| {
1555 if l > 0 && line[l - 1] == '\r' as u8 { line.slice(0, l - 1) }
1560 /// An iterator over the words of a string (subsequences separated
1561 /// by any sequence of whitespace).
1563 fn word_iter(&self) -> WordIterator<'self> {
1564 self.split_iter(char::is_whitespace).filter(|s| !s.is_empty())
1567 /// Returns true if the string contains only whitespace
1569 /// Whitespace characters are determined by `char::is_whitespace`
1571 fn is_whitespace(&self) -> bool { self.iter().all(char::is_whitespace) }
1573 /// Returns true if the string contains only alphanumerics
1575 /// Alphanumeric characters are determined by `char::is_alphanumeric`
1577 fn is_alphanumeric(&self) -> bool { self.iter().all(char::is_alphanumeric) }
1579 /// Returns the number of characters that a string holds
1581 fn char_len(&self) -> uint { self.iter().len_() }
1583 /// Returns a slice of the given string from the byte range
1584 /// [`begin`..`end`)
1586 /// Fails when `begin` and `end` do not point to valid characters or
1587 /// beyond the last character of the string
1589 fn slice(&self, begin: uint, end: uint) -> &'self str {
1590 assert!(self.is_char_boundary(begin));
1591 assert!(self.is_char_boundary(end));
1592 unsafe { raw::slice_bytes(*self, begin, end) }
1595 /// Returns a slice of the string from `begin` to its end.
1597 /// Fails when `begin` does not point to a valid character, or is
1600 fn slice_from(&self, begin: uint) -> &'self str {
1601 self.slice(begin, self.len())
1604 /// Returns a slice of the string from the beginning to byte
1607 /// Fails when `end` does not point to a valid character, or is
1610 fn slice_to(&self, end: uint) -> &'self str {
1614 /// Returns a slice of the string from the char range
1615 /// [`begin`..`end`).
1617 /// Fails if `begin` > `end` or the either `begin` or `end` are
1618 /// beyond the last character of the string.
1619 fn slice_chars(&self, begin: uint, end: uint) -> &'self str {
1620 assert!(begin <= end);
1621 // not sure how to use the iterators for this nicely.
1622 let mut position = 0;
1625 while count < begin && position < l {
1626 position = self.char_range_at(position).next;
1629 if count < begin { fail!("Attempted to begin slice_chars beyond end of string") }
1630 let start_byte = position;
1631 while count < end && position < l {
1632 position = self.char_range_at(position).next;
1635 if count < end { fail!("Attempted to end slice_chars beyond end of string") }
1637 self.slice(start_byte, position)
1640 /// Returns true if `needle` is a prefix of the string.
1641 fn starts_with<'a>(&self, needle: &'a str) -> bool {
1642 let (self_len, needle_len) = (self.len(), needle.len());
1643 if needle_len == 0u { true }
1644 else if needle_len > self_len { false }
1645 else { match_at(*self, needle, 0u) }
1648 /// Returns true if `needle` is a suffix of the string.
1649 fn ends_with(&self, needle: &str) -> bool {
1650 let (self_len, needle_len) = (self.len(), needle.len());
1651 if needle_len == 0u { true }
1652 else if needle_len > self_len { false }
1653 else { match_at(*self, needle, self_len - needle_len) }
1656 /// Escape each char in `s` with char::escape_default.
1657 fn escape_default(&self) -> ~str {
1658 let mut out: ~str = ~"";
1659 out.reserve_at_least(self.len());
1660 foreach c in self.iter() {
1661 do c.escape_default |c| {
1668 /// Escape each char in `s` with char::escape_unicode.
1669 fn escape_unicode(&self) -> ~str {
1670 let mut out: ~str = ~"";
1671 out.reserve_at_least(self.len());
1672 foreach c in self.iter() {
1673 do c.escape_unicode |c| {
1680 /// Returns a string with leading and trailing whitespace removed
1682 fn trim(&self) -> &'self str {
1683 self.trim_left().trim_right()
1686 /// Returns a string with leading whitespace removed
1688 fn trim_left(&self) -> &'self str {
1689 self.trim_left_chars(&char::is_whitespace)
1692 /// Returns a string with trailing whitespace removed
1694 fn trim_right(&self) -> &'self str {
1695 self.trim_right_chars(&char::is_whitespace)
1698 /// Returns a string with characters that match `to_trim` removed.
1702 /// * to_trim - a character matcher
1707 /// assert_eq!("11foo1bar11".trim_chars(&'1'), "foo1bar")
1708 /// assert_eq!("12foo1bar12".trim_chars(& &['1', '2']), "foo1bar")
1709 /// assert_eq!("123foo1bar123".trim_chars(&|c: char| c.is_digit()), "foo1bar")
1712 fn trim_chars<C: CharEq>(&self, to_trim: &C) -> &'self str {
1713 self.trim_left_chars(to_trim).trim_right_chars(to_trim)
1716 /// Returns a string with leading `chars_to_trim` removed.
1720 /// * to_trim - a character matcher
1725 /// assert_eq!("11foo1bar11".trim_left_chars(&'1'), "foo1bar11")
1726 /// assert_eq!("12foo1bar12".trim_left_chars(& &['1', '2']), "foo1bar12")
1727 /// assert_eq!("123foo1bar123".trim_left_chars(&|c: char| c.is_digit()), "foo1bar123")
1730 fn trim_left_chars<C: CharEq>(&self, to_trim: &C) -> &'self str {
1731 match self.find(|c: char| !to_trim.matches(c)) {
1733 Some(first) => unsafe { raw::slice_bytes(*self, first, self.len()) }
1737 /// Returns a string with trailing `chars_to_trim` removed.
1741 /// * to_trim - a character matcher
1746 /// assert_eq!("11foo1bar11".trim_right_chars(&'1'), "11foo1bar")
1747 /// assert_eq!("12foo1bar12".trim_right_chars(& &['1', '2']), "12foo1bar")
1748 /// assert_eq!("123foo1bar123".trim_right_chars(&|c: char| c.is_digit()), "123foo1bar")
1751 fn trim_right_chars<C: CharEq>(&self, to_trim: &C) -> &'self str {
1752 match self.rfind(|c: char| !to_trim.matches(c)) {
1755 let next = self.char_range_at(last).next;
1756 unsafe { raw::slice_bytes(*self, 0u, next) }
1761 /// Replace all occurrences of one string with another
1765 /// * from - The string to replace
1766 /// * to - The replacement string
1770 /// The original string with all occurances of `from` replaced with `to`
1771 pub fn replace(&self, from: &str, to: &str) -> ~str {
1772 let mut result = ~"";
1773 let mut last_end = 0;
1774 foreach (start, end) in self.matches_index_iter(from) {
1775 result.push_str(unsafe{raw::slice_bytes(*self, last_end, start)});
1776 result.push_str(to);
1779 result.push_str(unsafe{raw::slice_bytes(*self, last_end, self.len())});
1783 /// Copy a slice into a new unique str
1786 fn to_owned(&self) -> ~str {
1787 do self.as_imm_buf |src, len| {
1790 let mut v = vec::with_capacity(len);
1792 do v.as_mut_buf |dst, _| {
1793 ptr::copy_memory(dst, src, len - 1);
1795 vec::raw::set_len(&mut v, len - 1);
1797 ::cast::transmute(v)
1802 /// Copy a slice into a new unique str
1805 fn to_owned(&self) -> ~str {
1806 do self.as_imm_buf |src, len| {
1808 let mut v = vec::with_capacity(len);
1810 do v.as_mut_buf |dst, _| {
1811 ptr::copy_memory(dst, src, len);
1813 vec::raw::set_len(&mut v, len);
1814 ::cast::transmute(v)
1821 fn to_managed(&self) -> @str {
1822 let v = at_vec::from_fn(self.len() + 1, |i| {
1823 if i == self.len() { 0 } else { self[i] }
1825 unsafe { cast::transmute(v) }
1830 fn to_managed(&self) -> @str {
1832 let v: *&[u8] = cast::transmute(self);
1833 cast::transmute(at_vec::to_managed(*v))
1837 /// Converts to a vector of `u16` encoded as UTF-16.
1838 fn to_utf16(&self) -> ~[u16] {
1840 foreach ch in self.iter() {
1841 // Arithmetic with u32 literals is easier on the eyes than chars.
1842 let mut ch = ch as u32;
1844 if (ch & 0xFFFF_u32) == ch {
1845 // The BMP falls through (assuming non-surrogate, as it
1847 assert!(ch <= 0xD7FF_u32 || ch >= 0xE000_u32);
1850 // Supplementary planes break into surrogates.
1851 assert!(ch >= 0x1_0000_u32 && ch <= 0x10_FFFF_u32);
1853 let w1 = 0xD800_u16 | ((ch >> 10) as u16);
1854 let w2 = 0xDC00_u16 | ((ch as u16) & 0x3FF_u16);
1855 u.push_all([w1, w2])
1861 /// Returns false if the index points into the middle of a multi-byte
1862 /// character sequence.
1863 fn is_char_boundary(&self, index: uint) -> bool {
1864 if index == self.len() { return true; }
1865 let b = self[index];
1866 return b < 128u8 || b >= 192u8;
1869 /// Pluck a character out of a string and return the index of the next
1872 /// This function can be used to iterate over the unicode characters of a
1878 /// let s = "中华Việt Nam";
1880 /// while i < s.len() {
1881 /// let CharRange {ch, next} = s.char_range_at(i);
1882 /// printfln!("%u: %c", i, ch);
1887 /// # Example output
1904 /// * s - The string
1905 /// * i - The byte offset of the char to extract
1909 /// A record {ch: char, next: uint} containing the char value and the byte
1910 /// index of the next unicode character.
1914 /// If `i` is greater than or equal to the length of the string.
1915 /// If `i` is not the index of the beginning of a valid UTF-8 character.
1917 fn char_range_at(&self, i: uint) -> CharRange {
1918 if (self[i] < 128u8) {
1919 return CharRange {ch: self[i] as char, next: i + 1 };
1922 // Multibyte case is a fn to allow char_range_at to inline cleanly
1923 fn multibyte_char_range_at(s: &str, i: uint) -> CharRange {
1924 let mut val = s[i] as uint;
1925 let w = UTF8_CHAR_WIDTH[val] as uint;
1928 val = utf8_first_byte!(val, w);
1929 val = utf8_acc_cont_byte!(val, s[i + 1]);
1930 if w > 2 { val = utf8_acc_cont_byte!(val, s[i + 2]); }
1931 if w > 3 { val = utf8_acc_cont_byte!(val, s[i + 3]); }
1933 return CharRange {ch: val as char, next: i + w};
1936 return multibyte_char_range_at(*self, i);
1939 /// Plucks the character starting at the `i`th byte of a string
1941 fn char_at(&self, i: uint) -> char { self.char_range_at(i).ch }
1943 /// Given a byte position and a str, return the previous char and its position.
1945 /// This function can be used to iterate over a unicode string in reverse.
1947 /// Returns 0 for next index if called on start index 0.
1948 fn char_range_at_reverse(&self, start: uint) -> CharRange {
1949 let mut prev = start;
1951 // while there is a previous byte == 10......
1952 while prev > 0u && self[prev - 1u] & 192u8 == TAG_CONT_U8 {
1956 // now refer to the initial byte of previous char
1964 let ch = self.char_at(prev);
1965 return CharRange {ch:ch, next:prev};
1968 /// Plucks the character ending at the `i`th byte of a string
1970 fn char_at_reverse(&self, i: uint) -> char {
1971 self.char_range_at_reverse(i).ch
1974 /// Work with the byte buffer of a string as a byte slice.
1976 /// The byte slice does not include the null terminator.
1978 fn as_bytes(&self) -> &'self [u8] {
1980 let mut slice = self.repr();
1982 cast::transmute(slice)
1986 /// Work with the byte buffer of a string as a byte slice.
1988 /// The byte slice does not include the null terminator.
1990 fn as_bytes(&self) -> &'self [u8] {
1991 unsafe { cast::transmute(*self) }
1994 /// Returns the byte index of the first character of `self` that matches `search`
1998 /// `Some` containing the byte index of the last matching character
1999 /// or `None` if there is no match
2000 fn find<C: CharEq>(&self, search: C) -> Option<uint> {
2001 if search.only_ascii() {
2002 foreach (i, b) in self.byte_iter().enumerate() {
2003 if search.matches(b as char) { return Some(i) }
2007 foreach c in self.iter() {
2008 if search.matches(c) { return Some(index); }
2009 index += c.len_utf8_bytes();
2016 /// Returns the byte index of the last character of `self` that matches `search`
2020 /// `Some` containing the byte index of the last matching character
2021 /// or `None` if there is no match
2022 fn rfind<C: CharEq>(&self, search: C) -> Option<uint> {
2023 let mut index = self.len();
2024 if search.only_ascii() {
2025 foreach b in self.byte_rev_iter() {
2027 if search.matches(b as char) { return Some(index); }
2030 foreach c in self.rev_iter() {
2031 index -= c.len_utf8_bytes();
2032 if search.matches(c) { return Some(index); }
2039 /// Returns the byte index of the first matching substring
2043 /// * `needle` - The string to search for
2047 /// `Some` containing the byte index of the first matching substring
2048 /// or `None` if there is no match
2049 fn find_str(&self, needle: &str) -> Option<uint> {
2050 if needle.is_empty() {
2053 self.matches_index_iter(needle)
2055 .map_consume(|(start, _end)| start)
2059 /// Given a string, make a new string with repeated copies of it.
2061 fn repeat(&self, nn: uint) -> ~str {
2062 do self.as_imm_buf |buf, len| {
2063 // ignore the NULL terminator
2065 let mut ret = with_capacity(nn * len);
2068 do ret.as_mut_buf |rbuf, _len| {
2069 let mut rbuf = rbuf;
2072 ptr::copy_memory(rbuf, buf, len);
2073 rbuf = rbuf.offset(len as int);
2076 raw::set_len(&mut ret, nn * len);
2082 /// Given a string, make a new string with repeated copies of it.
2084 fn repeat(&self, nn: uint) -> ~str {
2085 do self.as_imm_buf |buf, len| {
2086 let mut ret = with_capacity(nn * len);
2089 do ret.as_mut_buf |rbuf, _len| {
2090 let mut rbuf = rbuf;
2093 ptr::copy_memory(rbuf, buf, len);
2094 rbuf = rbuf.offset(len as int);
2097 raw::set_len(&mut ret, nn * len);
2103 /// Retrieves the first character from a string slice and returns
2104 /// it. This does not allocate a new string; instead, it returns a
2105 /// slice that point one character beyond the character that was
2110 /// If the string does not contain any characters
2112 fn slice_shift_char(&self) -> (char, &'self str) {
2113 let CharRange {ch, next} = self.char_range_at(0u);
2114 let next_s = unsafe { raw::slice_bytes(*self, next, self.len()) };
2115 return (ch, next_s);
2118 /// Apply a function to each character.
2119 fn map_chars(&self, ff: &fn(char) -> char) -> ~str {
2120 let mut result = with_capacity(self.len());
2121 foreach cc in self.iter() {
2122 result.push_char(ff(cc));
2127 /// Levenshtein Distance between two strings.
2128 fn lev_distance(&self, t: &str) -> uint {
2129 let slen = self.len();
2132 if slen == 0 { return tlen; }
2133 if tlen == 0 { return slen; }
2135 let mut dcol = vec::from_fn(tlen + 1, |x| x);
2137 foreach (i, sc) in self.iter().enumerate() {
2139 let mut current = i;
2140 dcol[0] = current + 1;
2142 foreach (j, tc) in t.iter().enumerate() {
2144 let next = dcol[j + 1];
2147 dcol[j + 1] = current;
2149 dcol[j + 1] = ::cmp::min(current, next);
2150 dcol[j + 1] = ::cmp::min(dcol[j + 1], dcol[j]) + 1;
2160 /// Returns the byte offset of an inner slice relative to an enclosing outer slice.
2162 /// Fails if `inner` is not a direct slice contained within self.
2167 /// let string = "a\nb\nc";
2168 /// let mut lines = ~[];
2169 /// foreach line in string.line_iter() { lines.push(line) }
2171 /// assert!(string.subslice_offset(lines[0]) == 0); // &"a"
2172 /// assert!(string.subslice_offset(lines[1]) == 2); // &"b"
2173 /// assert!(string.subslice_offset(lines[2]) == 4); // &"c"
2176 fn subslice_offset(&self, inner: &str) -> uint {
2177 do self.as_imm_buf |a, a_len| {
2178 do inner.as_imm_buf |b, b_len| {
2184 a_start = cast::transmute(a); a_end = a_len + cast::transmute(a);
2185 b_start = cast::transmute(b); b_end = b_len + cast::transmute(b);
2187 assert!(a_start <= b_start);
2188 assert!(b_end <= a_end);
2194 /// Work with the byte buffer and length of a slice.
2196 /// The given length is one byte longer than the 'official' indexable
2197 /// length of the string. This is to permit probing the byte past the
2198 /// indexable area for a null byte, as is the case in slices pointing
2199 /// to full strings, or suffixes of them.
2201 fn as_imm_buf<T>(&self, f: &fn(*u8, uint) -> T) -> T {
2202 let v: &[u8] = unsafe { cast::transmute(*self) };
2207 #[allow(missing_doc)]
2208 pub trait OwnedStr {
2209 fn push_str_no_overallocate(&mut self, rhs: &str);
2210 fn push_str(&mut self, rhs: &str);
2211 fn push_char(&mut self, c: char);
2212 fn pop_char(&mut self) -> char;
2213 fn shift_char(&mut self) -> char;
2214 fn unshift_char(&mut self, ch: char);
2215 fn append(self, rhs: &str) -> ~str;
2216 fn reserve(&mut self, n: uint);
2217 fn reserve_at_least(&mut self, n: uint);
2218 fn capacity(&self) -> uint;
2220 fn to_bytes_with_null(self) -> ~[u8];
2222 /// Work with the mutable byte buffer and length of a slice.
2224 /// The given length is one byte longer than the 'official' indexable
2225 /// length of the string. This is to permit probing the byte past the
2226 /// indexable area for a null byte, as is the case in slices pointing
2227 /// to full strings, or suffixes of them.
2229 /// Make sure any mutations to this buffer keep this string valid UTF8.
2230 fn as_mut_buf<T>(&mut self, f: &fn(*mut u8, uint) -> T) -> T;
2233 impl OwnedStr for ~str {
2234 /// Appends a string slice to the back of a string, without overallocating
2236 fn push_str_no_overallocate(&mut self, rhs: &str) {
2238 let llen = self.len();
2239 let rlen = rhs.len();
2240 self.reserve(llen + rlen);
2241 do self.as_imm_buf |lbuf, _llen| {
2242 do rhs.as_imm_buf |rbuf, _rlen| {
2243 let dst = ptr::offset(lbuf, llen as int);
2244 let dst = cast::transmute_mut_unsafe(dst);
2245 ptr::copy_memory(dst, rbuf, rlen);
2248 raw::set_len(self, llen + rlen);
2252 /// Appends a string slice to the back of a string
2254 fn push_str(&mut self, rhs: &str) {
2256 let llen = self.len();
2257 let rlen = rhs.len();
2258 self.reserve_at_least(llen + rlen);
2259 do self.as_imm_buf |lbuf, _llen| {
2260 do rhs.as_imm_buf |rbuf, _rlen| {
2261 let dst = ptr::offset(lbuf, llen as int);
2262 let dst = cast::transmute_mut_unsafe(dst);
2263 ptr::copy_memory(dst, rbuf, rlen);
2266 raw::set_len(self, llen + rlen);
2270 /// Appends a character to the back of a string
2272 fn push_char(&mut self, c: char) {
2273 assert!((c as uint) < MAX_UNICODE); // FIXME: #7609: should be enforced on all `char`
2275 let code = c as uint;
2276 let nb = if code < MAX_ONE_B { 1u }
2277 else if code < MAX_TWO_B { 2u }
2278 else if code < MAX_THREE_B { 3u }
2280 let len = self.len();
2281 let new_len = len + nb;
2282 self.reserve_at_least(new_len);
2283 let off = len as int;
2284 do self.as_mut_buf |buf, _len| {
2287 *ptr::mut_offset(buf, off) = code as u8;
2290 *ptr::mut_offset(buf, off) = (code >> 6u & 31u | TAG_TWO_B) as u8;
2291 *ptr::mut_offset(buf, off + 1) = (code & 63u | TAG_CONT) as u8;
2294 *ptr::mut_offset(buf, off) = (code >> 12u & 15u | TAG_THREE_B) as u8;
2295 *ptr::mut_offset(buf, off + 1) = (code >> 6u & 63u | TAG_CONT) as u8;
2296 *ptr::mut_offset(buf, off + 2) = (code & 63u | TAG_CONT) as u8;
2299 *ptr::mut_offset(buf, off) = (code >> 18u & 7u | TAG_FOUR_B) as u8;
2300 *ptr::mut_offset(buf, off + 1) = (code >> 12u & 63u | TAG_CONT) as u8;
2301 *ptr::mut_offset(buf, off + 2) = (code >> 6u & 63u | TAG_CONT) as u8;
2302 *ptr::mut_offset(buf, off + 3) = (code & 63u | TAG_CONT) as u8;
2307 raw::set_len(self, new_len);
2311 /// Remove the final character from a string and return it
2315 /// If the string does not contain any characters
2316 fn pop_char(&mut self) -> char {
2317 let end = self.len();
2319 let CharRange {ch, next} = self.char_range_at_reverse(end);
2320 unsafe { raw::set_len(self, next); }
2324 /// Remove the first character from a string and return it
2328 /// If the string does not contain any characters
2329 fn shift_char(&mut self) -> char {
2330 let CharRange {ch, next} = self.char_range_at(0u);
2331 *self = self.slice(next, self.len()).to_owned();
2335 /// Prepend a char to a string
2336 fn unshift_char(&mut self, ch: char) {
2337 // This could be more efficient.
2338 let mut new_str = ~"";
2339 new_str.push_char(ch);
2340 new_str.push_str(*self);
2344 /// Concatenate two strings together.
2346 fn append(self, rhs: &str) -> ~str {
2347 let mut new_str = self;
2348 new_str.push_str_no_overallocate(rhs);
2352 /// Reserves capacity for exactly `n` bytes in the given string, not including
2353 /// the null terminator.
2355 /// Assuming single-byte characters, the resulting string will be large
2356 /// enough to hold a string of length `n`. To account for the null terminator,
2357 /// the underlying buffer will have the size `n` + 1.
2359 /// If the capacity for `s` is already equal to or greater than the requested
2360 /// capacity, then no action is taken.
2365 /// * n - The number of bytes to reserve space for
2368 pub fn reserve(&mut self, n: uint) {
2370 let v: *mut ~[u8] = cast::transmute(self);
2371 (*v).reserve(n + 1);
2375 /// Reserves capacity for exactly `n` bytes in the given string, not including
2376 /// the null terminator.
2378 /// Assuming single-byte characters, the resulting string will be large
2379 /// enough to hold a string of length `n`. To account for the null terminator,
2380 /// the underlying buffer will have the size `n` + 1.
2382 /// If the capacity for `s` is already equal to or greater than the requested
2383 /// capacity, then no action is taken.
2388 /// * n - The number of bytes to reserve space for
2391 pub fn reserve(&mut self, n: uint) {
2393 let v: &mut ~[u8] = cast::transmute(self);
2398 /// Reserves capacity for at least `n` bytes in the given string, not including
2399 /// the null terminator.
2401 /// Assuming single-byte characters, the resulting string will be large
2402 /// enough to hold a string of length `n`. To account for the null terminator,
2403 /// the underlying buffer will have the size `n` + 1.
2405 /// This function will over-allocate in order to amortize the allocation costs
2406 /// in scenarios where the caller may need to repeatedly reserve additional
2409 /// If the capacity for `s` is already equal to or greater than the requested
2410 /// capacity, then no action is taken.
2415 /// * n - The number of bytes to reserve space for
2418 fn reserve_at_least(&mut self, n: uint) {
2419 self.reserve(uint::next_power_of_two(n + 1u) - 1u)
2422 /// Reserves capacity for at least `n` bytes in the given string.
2424 /// Assuming single-byte characters, the resulting string will be large
2425 /// enough to hold a string of length `n`. To account for the null terminator,
2426 /// the underlying buffer will have the size `n` + 1.
2428 /// This function will over-allocate in order to amortize the allocation costs
2429 /// in scenarios where the caller may need to repeatedly reserve additional
2432 /// If the capacity for `s` is already equal to or greater than the requested
2433 /// capacity, then no action is taken.
2438 /// * n - The number of bytes to reserve space for
2441 fn reserve_at_least(&mut self, n: uint) {
2442 self.reserve(uint::next_power_of_two(n))
2445 /// Returns the number of single-byte characters the string can hold without
2448 fn capacity(&self) -> uint {
2449 let buf: &~[u8] = unsafe { cast::transmute(self) };
2450 let vcap = buf.capacity();
2455 /// Returns the number of single-byte characters the string can hold without
2458 fn capacity(&self) -> uint {
2460 let buf: &~[u8] = cast::transmute(self);
2465 /// Convert to a vector of bytes. This does not allocate a new
2466 /// string, and includes the null terminator.
2469 fn to_bytes_with_null(self) -> ~[u8] {
2470 unsafe { cast::transmute(self) }
2474 fn as_mut_buf<T>(&mut self, f: &fn(*mut u8, uint) -> T) -> T {
2475 let v: &mut ~[u8] = unsafe { cast::transmute(self) };
2480 impl Clone for ~str {
2482 fn clone(&self) -> ~str {
2487 impl Clone for @str {
2489 fn clone(&self) -> @str {
2494 impl<T: Iterator<char>> FromIterator<char, T> for ~str {
2496 fn from_iterator(iterator: &mut T) -> ~str {
2497 let (lower, _) = iterator.size_hint();
2498 let mut buf = with_capacity(lower);
2499 buf.extend(iterator);
2504 impl<T: Iterator<char>> Extendable<char, T> for ~str {
2506 fn extend(&mut self, iterator: &mut T) {
2507 let (lower, _) = iterator.size_hint();
2508 let reserve = lower + self.len();
2509 self.reserve_at_least(reserve);
2510 foreach ch in *iterator {
2516 // This works because every lifetime is a sub-lifetime of 'static
2517 impl<'self> Zero for &'self str {
2518 fn zero() -> &'self str { "" }
2519 fn is_zero(&self) -> bool { self.is_empty() }
2522 impl Zero for ~str {
2523 fn zero() -> ~str { ~"" }
2524 fn is_zero(&self) -> bool { self.len() == 0 }
2527 impl Zero for @str {
2528 fn zero() -> @str { @"" }
2529 fn is_zero(&self) -> bool { self.len() == 0 }
2534 use iterator::IteratorUtil;
2535 use container::Container;
2542 use vec::{ImmutableVector, CopyableVector};
2543 use cmp::{TotalOrd, Less, Equal, Greater};
2547 assert!((eq(&~"", &~"")));
2548 assert!((eq(&~"foo", &~"foo")));
2549 assert!((!eq(&~"foo", &~"bar")));
2553 fn test_eq_slice() {
2554 assert!((eq_slice("foobar".slice(0, 3), "foo")));
2555 assert!((eq_slice("barfoo".slice(3, 6), "foo")));
2556 assert!((!eq_slice("foo1", "foo2")));
2562 assert!("" <= "foo");
2563 assert!("foo" <= "foo");
2564 assert!("foo" != "bar");
2569 assert_eq!("".len(), 0u);
2570 assert_eq!("hello world".len(), 11u);
2571 assert_eq!("\x63".len(), 1u);
2572 assert_eq!("\xa2".len(), 2u);
2573 assert_eq!("\u03c0".len(), 2u);
2574 assert_eq!("\u2620".len(), 3u);
2575 assert_eq!("\U0001d11e".len(), 4u);
2577 assert_eq!("".char_len(), 0u);
2578 assert_eq!("hello world".char_len(), 11u);
2579 assert_eq!("\x63".char_len(), 1u);
2580 assert_eq!("\xa2".char_len(), 1u);
2581 assert_eq!("\u03c0".char_len(), 1u);
2582 assert_eq!("\u2620".char_len(), 1u);
2583 assert_eq!("\U0001d11e".char_len(), 1u);
2584 assert_eq!("ประเทศไทย中华Việt Nam".char_len(), 19u);
2589 assert_eq!("hello".find('l'), Some(2u));
2590 assert_eq!("hello".find(|c:char| c == 'o'), Some(4u));
2591 assert!("hello".find('x').is_none());
2592 assert!("hello".find(|c:char| c == 'x').is_none());
2593 assert_eq!("ประเทศไทย中华Việt Nam".find('华'), Some(30u));
2594 assert_eq!("ประเทศไทย中华Việt Nam".find(|c: char| c == '华'), Some(30u));
2599 assert_eq!("hello".rfind('l'), Some(3u));
2600 assert_eq!("hello".rfind(|c:char| c == 'o'), Some(4u));
2601 assert!("hello".rfind('x').is_none());
2602 assert!("hello".rfind(|c:char| c == 'x').is_none());
2603 assert_eq!("ประเทศไทย中华Việt Nam".rfind('华'), Some(30u));
2604 assert_eq!("ประเทศไทย中华Việt Nam".rfind(|c: char| c == '华'), Some(30u));
2608 fn test_push_str() {
2611 assert_eq!(s.slice_from(0), "");
2613 assert_eq!(s.slice_from(0), "abc");
2614 s.push_str("ประเทศไทย中华Việt Nam");
2615 assert_eq!(s.slice_from(0), "abcประเทศไทย中华Việt Nam");
2622 assert_eq!(s.slice_from(0), "");
2623 s = s.append("abc");
2624 assert_eq!(s.slice_from(0), "abc");
2625 s = s.append("ประเทศไทย中华Việt Nam");
2626 assert_eq!(s.slice_from(0), "abcประเทศไทย中华Việt Nam");
2630 fn test_pop_char() {
2631 let mut data = ~"ประเทศไทย中华";
2632 let cc = data.pop_char();
2633 assert_eq!(~"ประเทศไทย中", data);
2634 assert_eq!('华', cc);
2638 fn test_pop_char_2() {
2639 let mut data2 = ~"华";
2640 let cc2 = data2.pop_char();
2641 assert_eq!(~"", data2);
2642 assert_eq!('华', cc2);
2647 #[ignore(cfg(windows))]
2648 fn test_pop_char_fail() {
2650 let _cc3 = data.pop_char();
2654 fn test_push_char() {
2655 let mut data = ~"ประเทศไทย中";
2656 data.push_char('华');
2657 data.push_char('b'); // 1 byte
2658 data.push_char('¢'); // 2 byte
2659 data.push_char('€'); // 3 byte
2660 data.push_char('𤭢'); // 4 byte
2661 assert_eq!(~"ประเทศไทย中华b¢€𤭢", data);
2665 fn test_shift_char() {
2666 let mut data = ~"ประเทศไทย中";
2667 let cc = data.shift_char();
2668 assert_eq!(~"ระเทศไทย中", data);
2669 assert_eq!('ป', cc);
2673 fn test_unshift_char() {
2674 let mut data = ~"ประเทศไทย中";
2675 data.unshift_char('华');
2676 assert_eq!(~"华ประเทศไทย中", data);
2682 let s: ~str = empty.iter().collect();
2683 assert_eq!(empty, s.as_slice());
2684 let data = "ประเทศไทย中";
2685 let s: ~str = data.iter().collect();
2686 assert_eq!(data, s.as_slice());
2691 let data = ~"ประเทศไทย中";
2692 let mut cpy = data.clone();
2694 let mut it = other.iter();
2695 cpy.extend(&mut it);
2696 assert_eq!(cpy, data + other);
2701 let mut empty = ~"";
2703 assert_eq!("", empty.as_slice());
2704 let mut data = ~"ประเทศไทย中";
2706 assert_eq!("", data.as_slice());
2707 data.push_char('华');
2708 assert_eq!("华", data.as_slice());
2712 fn test_find_str() {
2714 assert_eq!("".find_str(""), Some(0u));
2715 assert!("banana".find_str("apple pie").is_none());
2717 let data = "abcabc";
2718 assert_eq!(data.slice(0u, 6u).find_str("ab"), Some(0u));
2719 assert_eq!(data.slice(2u, 6u).find_str("ab"), Some(3u - 2u));
2720 assert!(data.slice(2u, 4u).find_str("ab").is_none());
2722 let mut data = ~"ประเทศไทย中华Việt Nam";
2724 assert!(data.find_str("ไท华").is_none());
2725 assert_eq!(data.slice(0u, 43u).find_str(""), Some(0u));
2726 assert_eq!(data.slice(6u, 43u).find_str(""), Some(6u - 6u));
2728 assert_eq!(data.slice(0u, 43u).find_str("ประ"), Some( 0u));
2729 assert_eq!(data.slice(0u, 43u).find_str("ทศไ"), Some(12u));
2730 assert_eq!(data.slice(0u, 43u).find_str("ย中"), Some(24u));
2731 assert_eq!(data.slice(0u, 43u).find_str("iệt"), Some(34u));
2732 assert_eq!(data.slice(0u, 43u).find_str("Nam"), Some(40u));
2734 assert_eq!(data.slice(43u, 86u).find_str("ประ"), Some(43u - 43u));
2735 assert_eq!(data.slice(43u, 86u).find_str("ทศไ"), Some(55u - 43u));
2736 assert_eq!(data.slice(43u, 86u).find_str("ย中"), Some(67u - 43u));
2737 assert_eq!(data.slice(43u, 86u).find_str("iệt"), Some(77u - 43u));
2738 assert_eq!(data.slice(43u, 86u).find_str("Nam"), Some(83u - 43u));
2742 fn test_slice_chars() {
2743 fn t(a: &str, b: &str, start: uint) {
2744 assert_eq!(a.slice_chars(start, start + b.char_len()), b);
2746 t("hello", "llo", 2);
2747 t("hello", "el", 1);
2748 assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".slice_chars(2, 8));
2753 fn t(v: &[~str], s: &str) {
2754 assert_eq!(v.concat(), s.to_str());
2756 t([~"you", ~"know", ~"I'm", ~"no", ~"good"], "youknowI'mnogood");
2757 let v: &[~str] = [];
2764 fn t(v: &[~str], sep: &str, s: &str) {
2765 assert_eq!(v.connect(sep), s.to_str());
2767 t([~"you", ~"know", ~"I'm", ~"no", ~"good"],
2768 " ", "you know I'm no good");
2769 let v: &[~str] = [];
2771 t([~"hi"], " ", "hi");
2775 fn test_concat_slices() {
2776 fn t(v: &[&str], s: &str) {
2777 assert_eq!(v.concat(), s.to_str());
2779 t(["you", "know", "I'm", "no", "good"], "youknowI'mnogood");
2780 let v: &[&str] = [];
2786 fn test_connect_slices() {
2787 fn t(v: &[&str], sep: &str, s: &str) {
2788 assert_eq!(v.connect(sep), s.to_str());
2790 t(["you", "know", "I'm", "no", "good"],
2791 " ", "you know I'm no good");
2793 t(["hi"], " ", "hi");
2798 assert_eq!("x".repeat(4), ~"xxxx");
2799 assert_eq!("hi".repeat(4), ~"hihihihi");
2800 assert_eq!("ไท华".repeat(3), ~"ไท华ไท华ไท华");
2801 assert_eq!("".repeat(4), ~"");
2802 assert_eq!("hi".repeat(0), ~"");
2806 fn test_unsafe_slice() {
2807 assert_eq!("ab", unsafe {raw::slice_bytes("abc", 0, 2)});
2808 assert_eq!("bc", unsafe {raw::slice_bytes("abc", 1, 3)});
2809 assert_eq!("", unsafe {raw::slice_bytes("abc", 1, 1)});
2810 fn a_million_letter_a() -> ~str {
2813 while i < 100000 { rs.push_str("aaaaaaaaaa"); i += 1; }
2816 fn half_a_million_letter_a() -> ~str {
2819 while i < 100000 { rs.push_str("aaaaa"); i += 1; }
2822 let letters = a_million_letter_a();
2823 assert!(half_a_million_letter_a() ==
2824 unsafe {raw::slice_bytes(letters, 0u, 500000)}.to_owned());
2828 fn test_starts_with() {
2829 assert!(("".starts_with("")));
2830 assert!(("abc".starts_with("")));
2831 assert!(("abc".starts_with("a")));
2832 assert!((!"a".starts_with("abc")));
2833 assert!((!"".starts_with("abc")));
2837 fn test_ends_with() {
2838 assert!(("".ends_with("")));
2839 assert!(("abc".ends_with("")));
2840 assert!(("abc".ends_with("c")));
2841 assert!((!"a".ends_with("abc")));
2842 assert!((!"".ends_with("abc")));
2846 fn test_is_empty() {
2847 assert!("".is_empty());
2848 assert!(!"a".is_empty());
2854 assert_eq!("".replace(a, "b"), ~"");
2855 assert_eq!("a".replace(a, "b"), ~"b");
2856 assert_eq!("ab".replace(a, "b"), ~"bb");
2858 assert!(" test test ".replace(test, "toast") ==
2860 assert_eq!(" test test ".replace(test, ""), ~" ");
2864 fn test_replace_2a() {
2865 let data = ~"ประเทศไทย中华";
2866 let repl = ~"دولة الكويت";
2869 let A = ~"دولة الكويتทศไทย中华";
2870 assert_eq!(data.replace(a, repl), A);
2874 fn test_replace_2b() {
2875 let data = ~"ประเทศไทย中华";
2876 let repl = ~"دولة الكويت";
2879 let B = ~"ปรدولة الكويتทศไทย中华";
2880 assert_eq!(data.replace(b, repl), B);
2884 fn test_replace_2c() {
2885 let data = ~"ประเทศไทย中华";
2886 let repl = ~"دولة الكويت";
2889 let C = ~"ประเทศไทยدولة الكويت";
2890 assert_eq!(data.replace(c, repl), C);
2894 fn test_replace_2d() {
2895 let data = ~"ประเทศไทย中华";
2896 let repl = ~"دولة الكويت";
2899 assert_eq!(data.replace(d, repl), data);
2904 assert_eq!("ab", "abc".slice(0, 2));
2905 assert_eq!("bc", "abc".slice(1, 3));
2906 assert_eq!("", "abc".slice(1, 1));
2907 assert_eq!("\u65e5", "\u65e5\u672c".slice(0, 3));
2909 let data = "ประเทศไทย中华";
2910 assert_eq!("ป", data.slice(0, 3));
2911 assert_eq!("ร", data.slice(3, 6));
2912 assert_eq!("", data.slice(3, 3));
2913 assert_eq!("华", data.slice(30, 33));
2915 fn a_million_letter_X() -> ~str {
2919 push_str(&mut rs, "华华华华华华华华华华");
2924 fn half_a_million_letter_X() -> ~str {
2927 while i < 100000 { push_str(&mut rs, "华华华华华"); i += 1; }
2930 let letters = a_million_letter_X();
2931 assert!(half_a_million_letter_X() ==
2932 letters.slice(0u, 3u * 500000u).to_owned());
2937 let ss = "中华Việt Nam";
2939 assert_eq!("华", ss.slice(3u, 6u));
2940 assert_eq!("Việt Nam", ss.slice(6u, 16u));
2942 assert_eq!("ab", "abc".slice(0u, 2u));
2943 assert_eq!("bc", "abc".slice(1u, 3u));
2944 assert_eq!("", "abc".slice(1u, 1u));
2946 assert_eq!("中", ss.slice(0u, 3u));
2947 assert_eq!("华V", ss.slice(3u, 7u));
2948 assert_eq!("", ss.slice(3u, 3u));
2963 #[ignore(cfg(windows))]
2964 fn test_slice_fail() {
2965 "中华Việt Nam".slice(0u, 2u);
2969 fn test_slice_from() {
2970 assert_eq!("abcd".slice_from(0), "abcd");
2971 assert_eq!("abcd".slice_from(2), "cd");
2972 assert_eq!("abcd".slice_from(4), "");
2975 fn test_slice_to() {
2976 assert_eq!("abcd".slice_to(0), "");
2977 assert_eq!("abcd".slice_to(2), "ab");
2978 assert_eq!("abcd".slice_to(4), "abcd");
2982 fn test_trim_left_chars() {
2983 let v: &[char] = &[];
2984 assert_eq!(" *** foo *** ".trim_left_chars(&v), " *** foo *** ");
2985 assert_eq!(" *** foo *** ".trim_left_chars(& &['*', ' ']), "foo *** ");
2986 assert_eq!(" *** *** ".trim_left_chars(& &['*', ' ']), "");
2987 assert_eq!("foo *** ".trim_left_chars(& &['*', ' ']), "foo *** ");
2989 assert_eq!("11foo1bar11".trim_left_chars(&'1'), "foo1bar11");
2990 assert_eq!("12foo1bar12".trim_left_chars(& &['1', '2']), "foo1bar12");
2991 assert_eq!("123foo1bar123".trim_left_chars(&|c: char| c.is_digit()), "foo1bar123");
2995 fn test_trim_right_chars() {
2996 let v: &[char] = &[];
2997 assert_eq!(" *** foo *** ".trim_right_chars(&v), " *** foo *** ");
2998 assert_eq!(" *** foo *** ".trim_right_chars(& &['*', ' ']), " *** foo");
2999 assert_eq!(" *** *** ".trim_right_chars(& &['*', ' ']), "");
3000 assert_eq!(" *** foo".trim_right_chars(& &['*', ' ']), " *** foo");
3002 assert_eq!("11foo1bar11".trim_right_chars(&'1'), "11foo1bar");
3003 assert_eq!("12foo1bar12".trim_right_chars(& &['1', '2']), "12foo1bar");
3004 assert_eq!("123foo1bar123".trim_right_chars(&|c: char| c.is_digit()), "123foo1bar");
3008 fn test_trim_chars() {
3009 let v: &[char] = &[];
3010 assert_eq!(" *** foo *** ".trim_chars(&v), " *** foo *** ");
3011 assert_eq!(" *** foo *** ".trim_chars(& &['*', ' ']), "foo");
3012 assert_eq!(" *** *** ".trim_chars(& &['*', ' ']), "");
3013 assert_eq!("foo".trim_chars(& &['*', ' ']), "foo");
3015 assert_eq!("11foo1bar11".trim_chars(&'1'), "foo1bar");
3016 assert_eq!("12foo1bar12".trim_chars(& &['1', '2']), "foo1bar");
3017 assert_eq!("123foo1bar123".trim_chars(&|c: char| c.is_digit()), "foo1bar");
3021 fn test_trim_left() {
3022 assert_eq!("".trim_left(), "");
3023 assert_eq!("a".trim_left(), "a");
3024 assert_eq!(" ".trim_left(), "");
3025 assert_eq!(" blah".trim_left(), "blah");
3026 assert_eq!(" \u3000 wut".trim_left(), "wut");
3027 assert_eq!("hey ".trim_left(), "hey ");
3031 fn test_trim_right() {
3032 assert_eq!("".trim_right(), "");
3033 assert_eq!("a".trim_right(), "a");
3034 assert_eq!(" ".trim_right(), "");
3035 assert_eq!("blah ".trim_right(), "blah");
3036 assert_eq!("wut \u3000 ".trim_right(), "wut");
3037 assert_eq!(" hey".trim_right(), " hey");
3042 assert_eq!("".trim(), "");
3043 assert_eq!("a".trim(), "a");
3044 assert_eq!(" ".trim(), "");
3045 assert_eq!(" blah ".trim(), "blah");
3046 assert_eq!("\nwut \u3000 ".trim(), "wut");
3047 assert_eq!(" hey dude ".trim(), "hey dude");
3051 fn test_is_whitespace() {
3052 assert!("".is_whitespace());
3053 assert!(" ".is_whitespace());
3054 assert!("\u2009".is_whitespace()); // Thin space
3055 assert!(" \n\t ".is_whitespace());
3056 assert!(!" _ ".is_whitespace());
3060 fn test_shift_byte() {
3062 let b = unsafe{raw::shift_byte(&mut s)};
3063 assert_eq!(s, ~"BC");
3064 assert_eq!(b, 65u8);
3068 fn test_pop_byte() {
3070 let b = unsafe{raw::pop_byte(&mut s)};
3071 assert_eq!(s, ~"AB");
3072 assert_eq!(b, 67u8);
3076 fn test_unsafe_from_bytes() {
3077 let a = ~[65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8];
3078 let b = unsafe { raw::from_bytes(a) };
3079 assert_eq!(b, ~"AAAAAAA");
3083 fn test_from_bytes() {
3084 let ss = ~"ศไทย中华Việt Nam";
3085 let bb = ~[0xe0_u8, 0xb8_u8, 0xa8_u8,
3086 0xe0_u8, 0xb9_u8, 0x84_u8,
3087 0xe0_u8, 0xb8_u8, 0x97_u8,
3088 0xe0_u8, 0xb8_u8, 0xa2_u8,
3089 0xe4_u8, 0xb8_u8, 0xad_u8,
3090 0xe5_u8, 0x8d_u8, 0x8e_u8,
3091 0x56_u8, 0x69_u8, 0xe1_u8,
3092 0xbb_u8, 0x87_u8, 0x74_u8,
3093 0x20_u8, 0x4e_u8, 0x61_u8,
3097 assert_eq!(ss, from_bytes(bb));
3098 assert_eq!(~"𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰",
3099 from_bytes(bytes!("𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰")));
3103 fn test_is_utf8_deny_overlong() {
3104 assert!(!is_utf8([0xc0, 0x80]));
3105 assert!(!is_utf8([0xc0, 0xae]));
3106 assert!(!is_utf8([0xe0, 0x80, 0x80]));
3107 assert!(!is_utf8([0xe0, 0x80, 0xaf]));
3108 assert!(!is_utf8([0xe0, 0x81, 0x81]));
3109 assert!(!is_utf8([0xf0, 0x82, 0x82, 0xac]));
3114 #[ignore(cfg(windows))]
3115 fn test_from_bytes_fail() {
3116 use str::not_utf8::cond;
3118 let bb = ~[0xff_u8, 0xb8_u8, 0xa8_u8,
3119 0xe0_u8, 0xb9_u8, 0x84_u8,
3120 0xe0_u8, 0xb8_u8, 0x97_u8,
3121 0xe0_u8, 0xb8_u8, 0xa2_u8,
3122 0xe4_u8, 0xb8_u8, 0xad_u8,
3123 0xe5_u8, 0x8d_u8, 0x8e_u8,
3124 0x56_u8, 0x69_u8, 0xe1_u8,
3125 0xbb_u8, 0x87_u8, 0x74_u8,
3126 0x20_u8, 0x4e_u8, 0x61_u8,
3129 let mut error_happened = false;
3130 let _x = do cond.trap(|err| {
3131 assert_eq!(err, ~"from_bytes: input is not UTF-8; first bad byte is 255");
3132 error_happened = true;
3137 assert!(error_happened);
3141 fn test_raw_from_c_str() {
3143 let a = ~[65, 65, 65, 65, 65, 65, 65, 0];
3144 let b = vec::raw::to_ptr(a);
3145 let c = raw::from_c_str(b);
3146 assert_eq!(c, ~"AAAAAAA");
3151 fn test_as_bytes() {
3154 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
3155 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
3158 assert_eq!("".as_bytes(), &[]);
3159 assert_eq!("abc".as_bytes(), &['a' as u8, 'b' as u8, 'c' as u8]);
3160 assert_eq!("ศไทย中华Việt Nam".as_bytes(), v);
3165 #[ignore(cfg(windows))]
3167 fn test_as_bytes_fail() {
3168 // Don't double free. (I'm not sure if this exercises the
3169 // original problem code path anymore.)
3171 let _bytes = s.as_bytes();
3177 #[ignore(cfg(windows))]
3179 fn test_as_bytes_fail() {
3180 // Don't double free. (I'm not sure if this exercises the
3181 // original problem code path anymore.)
3183 let _bytes = s.as_bytes_with_null();
3189 fn test_to_bytes_with_null() {
3190 let s = ~"ศไทย中华Việt Nam";
3192 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
3193 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
3196 assert_eq!((~"").to_bytes_with_null(), ~[0]);
3197 assert_eq!((~"abc").to_bytes_with_null(),
3198 ~['a' as u8, 'b' as u8, 'c' as u8, 0]);
3199 assert_eq!(s.to_bytes_with_null(), v);
3203 #[ignore(cfg(windows))]
3205 fn test_as_bytes_fail() {
3206 // Don't double free. (I'm not sure if this exercises the
3207 // original problem code path anymore.)
3209 let _bytes = s.as_bytes();
3214 fn test_as_imm_buf() {
3215 do "".as_imm_buf |_, len| {
3219 do "hello".as_imm_buf |buf, len| {
3222 assert_eq!(*ptr::offset(buf, 0), 'h' as u8);
3223 assert_eq!(*ptr::offset(buf, 1), 'e' as u8);
3224 assert_eq!(*ptr::offset(buf, 2), 'l' as u8);
3225 assert_eq!(*ptr::offset(buf, 3), 'l' as u8);
3226 assert_eq!(*ptr::offset(buf, 4), 'o' as u8);
3232 fn test_subslice_offset() {
3233 let a = "kernelsprite";
3234 let b = a.slice(7, a.len());
3235 let c = a.slice(0, a.len() - 6);
3236 assert_eq!(a.subslice_offset(b), 7);
3237 assert_eq!(a.subslice_offset(c), 0);
3239 let string = "a\nb\nc";
3240 let mut lines = ~[];
3241 foreach line in string.line_iter() { lines.push(line) }
3242 assert_eq!(string.subslice_offset(lines[0]), 0);
3243 assert_eq!(string.subslice_offset(lines[1]), 2);
3244 assert_eq!(string.subslice_offset(lines[2]), 4);
3249 fn test_subslice_offset_2() {
3250 let a = "alchemiter";
3251 let b = "cruxtruder";
3252 a.subslice_offset(b);
3256 fn vec_str_conversions() {
3257 let s1: ~str = ~"All mimsy were the borogoves";
3259 let v: ~[u8] = s1.as_bytes().to_owned();
3260 let s2: ~str = from_bytes(v);
3261 let mut i: uint = 0u;
3262 let n1: uint = s1.len();
3263 let n2: uint = v.len();
3276 fn test_contains() {
3277 assert!("abcde".contains("bcd"));
3278 assert!("abcde".contains("abcd"));
3279 assert!("abcde".contains("bcde"));
3280 assert!("abcde".contains(""));
3281 assert!("".contains(""));
3282 assert!(!"abcde".contains("def"));
3283 assert!(!"".contains("a"));
3285 let data = ~"ประเทศไทย中华Việt Nam";
3286 assert!(data.contains("ประเ"));
3287 assert!(data.contains("ะเ"));
3288 assert!(data.contains("中华"));
3289 assert!(!data.contains("ไท华"));
3293 fn test_contains_char() {
3294 assert!("abc".contains_char('b'));
3295 assert!("a".contains_char('a'));
3296 assert!(!"abc".contains_char('d'));
3297 assert!(!"".contains_char('a'));
3302 assert_eq!(~"", "".map_chars(|c| unsafe {libc::toupper(c as c_char)} as char));
3303 assert_eq!(~"YMCA", "ymca".map_chars(|c| unsafe {libc::toupper(c as c_char)} as char));
3310 ~[0xd800_u16, 0xdf45_u16, 0xd800_u16, 0xdf3f_u16,
3311 0xd800_u16, 0xdf3b_u16, 0xd800_u16, 0xdf46_u16,
3312 0xd800_u16, 0xdf39_u16, 0xd800_u16, 0xdf3b_u16,
3313 0xd800_u16, 0xdf30_u16, 0x000a_u16]),
3316 ~[0xd801_u16, 0xdc12_u16, 0xd801_u16,
3317 0xdc49_u16, 0xd801_u16, 0xdc2e_u16, 0xd801_u16,
3318 0xdc40_u16, 0xd801_u16, 0xdc32_u16, 0xd801_u16,
3319 0xdc4b_u16, 0x0020_u16, 0xd801_u16, 0xdc0f_u16,
3320 0xd801_u16, 0xdc32_u16, 0xd801_u16, 0xdc4d_u16,
3323 (~"𐌀𐌖𐌋𐌄𐌑𐌉·𐌌𐌄𐌕𐌄𐌋𐌉𐌑\n",
3324 ~[0xd800_u16, 0xdf00_u16, 0xd800_u16, 0xdf16_u16,
3325 0xd800_u16, 0xdf0b_u16, 0xd800_u16, 0xdf04_u16,
3326 0xd800_u16, 0xdf11_u16, 0xd800_u16, 0xdf09_u16,
3327 0x00b7_u16, 0xd800_u16, 0xdf0c_u16, 0xd800_u16,
3328 0xdf04_u16, 0xd800_u16, 0xdf15_u16, 0xd800_u16,
3329 0xdf04_u16, 0xd800_u16, 0xdf0b_u16, 0xd800_u16,
3330 0xdf09_u16, 0xd800_u16, 0xdf11_u16, 0x000a_u16 ]),
3332 (~"𐒋𐒘𐒈𐒑𐒛𐒒 𐒕𐒓 𐒈𐒚𐒍 𐒏𐒜𐒒𐒖𐒆 𐒕𐒆\n",
3333 ~[0xd801_u16, 0xdc8b_u16, 0xd801_u16, 0xdc98_u16,
3334 0xd801_u16, 0xdc88_u16, 0xd801_u16, 0xdc91_u16,
3335 0xd801_u16, 0xdc9b_u16, 0xd801_u16, 0xdc92_u16,
3336 0x0020_u16, 0xd801_u16, 0xdc95_u16, 0xd801_u16,
3337 0xdc93_u16, 0x0020_u16, 0xd801_u16, 0xdc88_u16,
3338 0xd801_u16, 0xdc9a_u16, 0xd801_u16, 0xdc8d_u16,
3339 0x0020_u16, 0xd801_u16, 0xdc8f_u16, 0xd801_u16,
3340 0xdc9c_u16, 0xd801_u16, 0xdc92_u16, 0xd801_u16,
3341 0xdc96_u16, 0xd801_u16, 0xdc86_u16, 0x0020_u16,
3342 0xd801_u16, 0xdc95_u16, 0xd801_u16, 0xdc86_u16,
3345 foreach p in pairs.iter() {
3346 let (s, u) = (*p).clone();
3347 assert!(s.to_utf16() == u);
3348 assert!(from_utf16(u) == s);
3349 assert!(from_utf16(s.to_utf16()) == s);
3350 assert!(from_utf16(u).to_utf16() == u);
3356 let s = ~"ศไทย中华Việt Nam";
3357 let v = ~['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
3359 foreach ch in v.iter() {
3360 assert!(s.char_at(pos) == *ch);
3361 pos += from_char(*ch).len();
3366 fn test_char_at_reverse() {
3367 let s = ~"ศไทย中华Việt Nam";
3368 let v = ~['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
3369 let mut pos = s.len();
3370 foreach ch in v.rev_iter() {
3371 assert!(s.char_at_reverse(pos) == *ch);
3372 pos -= from_char(*ch).len();
3377 fn test_escape_unicode() {
3378 assert_eq!("abc".escape_unicode(), ~"\\x61\\x62\\x63");
3379 assert_eq!("a c".escape_unicode(), ~"\\x61\\x20\\x63");
3380 assert_eq!("\r\n\t".escape_unicode(), ~"\\x0d\\x0a\\x09");
3381 assert_eq!("'\"\\".escape_unicode(), ~"\\x27\\x22\\x5c");
3382 assert_eq!("\x00\x01\xfe\xff".escape_unicode(), ~"\\x00\\x01\\xfe\\xff");
3383 assert_eq!("\u0100\uffff".escape_unicode(), ~"\\u0100\\uffff");
3384 assert_eq!("\U00010000\U0010ffff".escape_unicode(), ~"\\U00010000\\U0010ffff");
3385 assert_eq!("ab\ufb00".escape_unicode(), ~"\\x61\\x62\\ufb00");
3386 assert_eq!("\U0001d4ea\r".escape_unicode(), ~"\\U0001d4ea\\x0d");
3390 fn test_escape_default() {
3391 assert_eq!("abc".escape_default(), ~"abc");
3392 assert_eq!("a c".escape_default(), ~"a c");
3393 assert_eq!("\r\n\t".escape_default(), ~"\\r\\n\\t");
3394 assert_eq!("'\"\\".escape_default(), ~"\\'\\\"\\\\");
3395 assert_eq!("\u0100\uffff".escape_default(), ~"\\u0100\\uffff");
3396 assert_eq!("\U00010000\U0010ffff".escape_default(), ~"\\U00010000\\U0010ffff");
3397 assert_eq!("ab\ufb00".escape_default(), ~"ab\\ufb00");
3398 assert_eq!("\U0001d4ea\r".escape_default(), ~"\\U0001d4ea\\r");
3402 fn test_to_managed() {
3403 assert_eq!("abc".to_managed(), @"abc");
3404 assert_eq!("abcdef".slice(1, 5).to_managed(), @"bcde");
3408 fn test_total_ord() {
3409 "1234".cmp(& &"123") == Greater;
3410 "123".cmp(& &"1234") == Less;
3411 "1234".cmp(& &"1234") == Equal;
3412 "12345555".cmp(& &"123456") == Less;
3413 "22".cmp(& &"1234") == Greater;
3417 fn test_char_range_at() {
3418 let data = ~"b¢€𤭢𤭢€¢b";
3419 assert_eq!('b', data.char_range_at(0).ch);
3420 assert_eq!('¢', data.char_range_at(1).ch);
3421 assert_eq!('€', data.char_range_at(3).ch);
3422 assert_eq!('𤭢', data.char_range_at(6).ch);
3423 assert_eq!('𤭢', data.char_range_at(10).ch);
3424 assert_eq!('€', data.char_range_at(14).ch);
3425 assert_eq!('¢', data.char_range_at(17).ch);
3426 assert_eq!('b', data.char_range_at(19).ch);
3430 fn test_char_range_at_reverse_underflow() {
3431 assert_eq!("abc".char_range_at_reverse(0).next, 0);
3436 #[allow(unnecessary_allocation)];
3438 ($s1:expr, $s2:expr, $e:expr) => {
3439 assert_eq!($s1 + $s2, $e);
3440 assert_eq!($s1.to_owned() + $s2, $e);
3441 assert_eq!($s1.to_managed() + $s2, $e);
3445 t!("foo", "bar", ~"foobar");
3446 t!("foo", @"bar", ~"foobar");
3447 t!("foo", ~"bar", ~"foobar");
3448 t!("ศไทย中", "华Việt Nam", ~"ศไทย中华Việt Nam");
3449 t!("ศไทย中", @"华Việt Nam", ~"ศไทย中华Việt Nam");
3450 t!("ศไทย中", ~"华Việt Nam", ~"ศไทย中华Việt Nam");
3454 fn test_iterator() {
3456 let s = ~"ศไทย中华Việt Nam";
3457 let v = ~['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
3460 let mut it = s.iter();
3463 assert_eq!(c, v[pos]);
3466 assert_eq!(pos, v.len());
3470 fn test_rev_iterator() {
3472 let s = ~"ศไทย中华Việt Nam";
3473 let v = ~['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
3476 let mut it = s.rev_iter();
3479 assert_eq!(c, v[pos]);
3482 assert_eq!(pos, v.len());
3486 fn test_byte_iterator() {
3487 let s = ~"ศไทย中华Việt Nam";
3489 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
3490 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
3495 foreach b in s.byte_iter() {
3496 assert_eq!(b, v[pos]);
3502 fn test_byte_rev_iterator() {
3503 let s = ~"ศไทย中华Việt Nam";
3505 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
3506 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
3509 let mut pos = v.len();
3511 foreach b in s.byte_rev_iter() {
3513 assert_eq!(b, v[pos]);
3518 fn test_char_offset_iterator() {
3520 let s = "ศไทย中华Việt Nam";
3521 let p = [0, 3, 6, 9, 12, 15, 18, 19, 20, 23, 24, 25, 26, 27];
3522 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
3525 let mut it = s.char_offset_iter();
3528 assert_eq!(c, (p[pos], v[pos]));
3531 assert_eq!(pos, v.len());
3532 assert_eq!(pos, p.len());
3536 fn test_char_offset_rev_iterator() {
3538 let s = "ศไทย中华Việt Nam";
3539 let p = [27, 26, 25, 24, 23, 20, 19, 18, 15, 12, 9, 6, 3, 0];
3540 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
3543 let mut it = s.char_offset_rev_iter();
3546 assert_eq!(c, (p[pos], v[pos]));
3549 assert_eq!(pos, v.len());
3550 assert_eq!(pos, p.len());
3554 fn test_split_char_iterator() {
3555 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
3557 let split: ~[&str] = data.split_iter(' ').collect();
3558 assert_eq!(split, ~["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
3560 let split: ~[&str] = data.split_iter(|c: char| c == ' ').collect();
3561 assert_eq!(split, ~["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
3564 let split: ~[&str] = data.split_iter('ä').collect();
3565 assert_eq!(split, ~["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
3567 let split: ~[&str] = data.split_iter(|c: char| c == 'ä').collect();
3568 assert_eq!(split, ~["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
3572 fn test_splitn_char_iterator() {
3573 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
3575 let split: ~[&str] = data.splitn_iter(' ', 3).collect();
3576 assert_eq!(split, ~["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
3578 let split: ~[&str] = data.splitn_iter(|c: char| c == ' ', 3).collect();
3579 assert_eq!(split, ~["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
3582 let split: ~[&str] = data.splitn_iter('ä', 3).collect();
3583 assert_eq!(split, ~["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
3585 let split: ~[&str] = data.splitn_iter(|c: char| c == 'ä', 3).collect();
3586 assert_eq!(split, ~["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
3590 fn test_split_char_iterator_no_trailing() {
3591 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
3593 let split: ~[&str] = data.split_options_iter('\n', 1000, true).collect();
3594 assert_eq!(split, ~["", "Märy häd ä little lämb", "Little lämb", ""]);
3596 let split: ~[&str] = data.split_options_iter('\n', 1000, false).collect();
3597 assert_eq!(split, ~["", "Märy häd ä little lämb", "Little lämb"]);
3601 fn test_word_iter() {
3602 let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n";
3603 let words: ~[&str] = data.word_iter().collect();
3604 assert_eq!(words, ~["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
3608 fn test_line_iter() {
3609 let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
3610 let lines: ~[&str] = data.line_iter().collect();
3611 assert_eq!(lines, ~["", "Märy häd ä little lämb", "", "Little lämb"]);
3613 let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
3614 let lines: ~[&str] = data.line_iter().collect();
3615 assert_eq!(lines, ~["", "Märy häd ä little lämb", "", "Little lämb"]);
3619 fn test_split_str_iterator() {
3620 fn t<'a>(s: &str, sep: &'a str, u: ~[&str]) {
3621 let v: ~[&str] = s.split_str_iter(sep).collect();
3624 t("--1233345--", "12345", ~["--1233345--"]);
3625 t("abc::hello::there", "::", ~["abc", "hello", "there"]);
3626 t("::hello::there", "::", ~["", "hello", "there"]);
3627 t("hello::there::", "::", ~["hello", "there", ""]);
3628 t("::hello::there::", "::", ~["", "hello", "there", ""]);
3629 t("ประเทศไทย中华Việt Nam", "中华", ~["ประเทศไทย", "Việt Nam"]);
3630 t("zzXXXzzYYYzz", "zz", ~["", "XXX", "YYY", ""]);
3631 t("zzXXXzYYYz", "XXX", ~["zz", "zYYYz"]);
3632 t(".XXX.YYY.", ".", ~["", "XXX", "YYY", ""]);
3634 t("zz", "zz", ~["",""]);
3635 t("ok", "z", ~["ok"]);
3636 t("zzz", "zz", ~["","z"]);
3637 t("zzzzz", "zz", ~["","","z"]);
3641 fn test_str_zero() {
3643 fn t<S: Zero + Str>() {
3644 let s: S = Zero::zero();
3645 assert_eq!(s.as_slice(), "");
3646 assert!(s.is_zero());
3655 fn test_str_container() {
3656 fn sum_len<S: Container>(v: &[S]) -> uint {
3657 v.iter().transform(|x| x.len()).sum()
3661 assert_eq!(5, sum_len(["012", "", "34"]));
3662 assert_eq!(5, sum_len([@"01", @"2", @"34", @""]));
3663 assert_eq!(5, sum_len([~"01", ~"2", ~"34", ~""]));
3664 assert_eq!(5, sum_len([s.as_slice()]));
3670 use extra::test::BenchHarness;
3674 fn is_utf8_100_ascii(bh: &mut BenchHarness) {
3676 let s = bytes!("Hello there, the quick brown fox jumped over the lazy dog! \
3677 Lorem ipsum dolor sit amet, consectetur. ");
3679 assert_eq!(100, s.len());
3686 fn is_utf8_100_multibyte(bh: &mut BenchHarness) {
3687 let s = bytes!("𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰");
3688 assert_eq!(100, s.len());
3695 fn map_chars_100_ascii(bh: &mut BenchHarness) {
3696 let s = "HelloHelloHelloHelloHelloHelloHelloHelloHelloHello\
3697 HelloHelloHelloHelloHelloHelloHelloHelloHelloHello";
3699 s.map_chars(|c| ((c as uint) + 1) as char);
3704 fn map_chars_100_multibytes(bh: &mut BenchHarness) {
3705 let s = "𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑\
3706 𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑\
3707 𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑\
3708 𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑𐌀𐌖𐌋𐌄𐌑";
3710 s.map_chars(|c| ((c as uint) + 1) as char);