1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
13 Unicode string manipulation (`str` type)
17 Rust's string type is one of the core primitive types of the language. While
18 represented by the name `str`, the name `str` is not actually a valid type in
19 Rust. Each string must also be decorated with its ownership. This means that
20 there are two common kinds of strings in rust:
22 * `~str` - This is an owned string. This type obeys all of the normal semantics
23 of the `~T` types, meaning that it has one, and only one, owner. This
24 type cannot be implicitly copied, and is moved out of when passed to
27 * `&str` - This is the borrowed string type. This type of string can only be
28 created from the other kind of string. As the name "borrowed"
29 implies, this type of string is owned elsewhere, and this string
30 cannot be moved out of.
32 As an example, here's a few different kinds of strings.
36 let owned_string = ~"I am an owned string";
37 let borrowed_string1 = "This string is borrowed with the 'static lifetime";
38 let borrowed_string2: &str = owned_string; // owned strings can be borrowed
42 From the example above, you can see that rust has 2 different kinds of string
43 literals. The owned literals correspond to the owned string types, but the
44 "borrowed literal" is actually more akin to C's concept of a static string.
46 When a string is declared without a `~` sigil, then the string is allocated
47 statically in the rodata of the executable/library. The string then has the
48 type `&'static str` meaning that the string is valid for the `'static`
49 lifetime, otherwise known as the lifetime of the entire program. As can be
50 inferred from the type, these static strings are not mutable.
54 Many languages have immutable strings by default, and rust has a particular
55 flavor on this idea. As with the rest of Rust types, strings are immutable by
56 default. If a string is declared as `mut`, however, it may be mutated. This
57 works the same way as the rest of Rust's type system in the sense that if
58 there's a mutable reference to a string, there may only be one mutable reference
59 to that string. With these guarantees, strings can easily transition between
60 being mutable/immutable with the same benefits of having mutable strings in
64 let mut buf = ~"testing";
67 assert_eq!(buf, ~"testing 123");
72 Rust's string type, `str`, is a sequence of unicode codepoints encoded as a
73 stream of UTF-8 bytes. All safely-created strings are guaranteed to be validly
74 encoded UTF-8 sequences. Additionally, strings are not null-terminated
75 and can contain null codepoints.
77 The actual representation of strings have direct mappings to vectors:
79 * `~str` is the same as `~[u8]`
80 * `&str` is the same as `&[u8]`
88 use clone::{Clone, DeepClone};
89 use container::{Container, Mutable};
90 use iter::{Iterator, FromIterator, Extendable, range};
91 use iter::{Filter, AdditiveIterator, Map};
92 use iter::{Rev, DoubleEndedIterator, ExactSize};
94 use num::{Saturating, checked_next_power_of_two};
95 use option::{None, Option, Some};
99 use from_str::FromStr;
101 use vec::{OwnedVector, OwnedCloneableVector, ImmutableVector, MutableVector};
102 use default::Default;
103 use send_str::{SendStr, SendStrOwned};
104 use unstable::raw::Repr;
107 Section: Creating a string
110 /// Consumes a vector of bytes to create a new utf-8 string.
111 /// Returns None if the vector contains invalid UTF-8.
112 pub fn from_utf8_owned(vv: ~[u8]) -> Option<~str> {
114 Some(unsafe { raw::from_utf8_owned(vv) })
120 /// Converts a vector to a string slice without performing any allocations.
122 /// Once the slice has been validated as utf-8, it is transmuted in-place and
123 /// returned as a '&str' instead of a '&[u8]'
125 /// Returns None if the slice is not utf-8.
126 pub fn from_utf8<'a>(v: &'a [u8]) -> Option<&'a str> {
128 Some(unsafe { raw::from_utf8(v) })
132 impl ToStr for ~str {
134 fn to_str(&self) -> ~str { self.to_owned() }
137 impl FromStr for ~str {
139 fn from_str(s: &str) -> Option<~str> { Some(s.to_owned()) }
142 impl<'a> ToStr for &'a str {
144 fn to_str(&self) -> ~str { self.to_owned() }
147 /// Convert a byte to a UTF-8 string
151 /// Fails if invalid UTF-8
152 pub fn from_byte(b: u8) -> ~str {
154 unsafe { ::cast::transmute(~[b]) }
157 /// Convert a char to a string
158 pub fn from_char(ch: char) -> ~str {
164 /// Convert a vector of chars to a string
165 pub fn from_chars(chs: &[char]) -> ~str {
167 buf.reserve(chs.len());
168 for ch in chs.iter() {
175 pub fn push_str(lhs: &mut ~str, rhs: &str) {
179 /// Methods for vectors of strings
180 pub trait StrVector {
181 /// Concatenate a vector of strings.
182 fn concat(&self) -> ~str;
184 /// Concatenate a vector of strings, placing a given separator between each.
185 fn connect(&self, sep: &str) -> ~str;
188 impl<'a, S: Str> StrVector for &'a [S] {
189 fn concat(&self) -> ~str {
190 if self.is_empty() { return ~""; }
192 // `len` calculation may overflow but push_str but will check boundaries
193 let len = self.iter().map(|s| s.as_slice().len()).sum();
195 let mut result = with_capacity(len);
197 for s in self.iter() {
198 result.push_str(s.as_slice())
203 fn connect(&self, sep: &str) -> ~str {
204 if self.is_empty() { return ~""; }
207 if sep.is_empty() { return self.concat(); }
209 // this is wrong without the guarantee that `self` is non-empty
210 // `len` calculation may overflow but push_str but will check boundaries
211 let len = sep.len() * (self.len() - 1)
212 + self.iter().map(|s| s.as_slice().len()).sum();
213 let mut result = with_capacity(len);
214 let mut first = true;
216 for s in self.iter() {
220 result.push_str(sep);
222 result.push_str(s.as_slice());
228 /// Something that can be used to compare against a character
230 /// Determine if the splitter should split at the given character
231 fn matches(&self, char) -> bool;
232 /// Indicate if this is only concerned about ASCII characters,
233 /// which can allow for a faster implementation.
234 fn only_ascii(&self) -> bool;
237 impl CharEq for char {
239 fn matches(&self, c: char) -> bool { *self == c }
241 fn only_ascii(&self) -> bool { (*self as uint) < 128 }
244 impl<'a> CharEq for 'a |char| -> bool {
246 fn matches(&self, c: char) -> bool { (*self)(c) }
248 fn only_ascii(&self) -> bool { false }
251 impl CharEq for extern "Rust" fn(char) -> bool {
253 fn matches(&self, c: char) -> bool { (*self)(c) }
255 fn only_ascii(&self) -> bool { false }
258 impl<'a, C: CharEq> CharEq for &'a [C] {
260 fn matches(&self, c: char) -> bool {
261 self.iter().any(|m| m.matches(c))
264 fn only_ascii(&self) -> bool {
265 self.iter().all(|m| m.only_ascii())
273 /// External iterator for a string's characters.
274 /// Use with the `std::iter` module.
276 pub struct Chars<'a> {
277 /// The slice remaining to be iterated
278 priv string: &'a str,
281 impl<'a> Iterator<char> for Chars<'a> {
283 fn next(&mut self) -> Option<char> {
284 // Decode the next codepoint, then update
285 // the slice to be just the remaining part
286 if self.string.len() != 0 {
287 let CharRange {ch, next} = self.string.char_range_at(0);
289 self.string = raw::slice_unchecked(self.string, next, self.string.len());
298 fn size_hint(&self) -> (uint, Option<uint>) {
299 (self.string.len().saturating_add(3)/4, Some(self.string.len()))
303 impl<'a> DoubleEndedIterator<char> for Chars<'a> {
305 fn next_back(&mut self) -> Option<char> {
306 if self.string.len() != 0 {
307 let CharRange {ch, next} = self.string.char_range_at_reverse(self.string.len());
309 self.string = raw::slice_unchecked(self.string, 0, next);
318 /// External iterator for a string's characters and their byte offsets.
319 /// Use with the `std::iter` module.
321 pub struct CharOffsets<'a> {
322 /// The original string to be iterated
323 priv string: &'a str,
324 priv iter: Chars<'a>,
327 impl<'a> Iterator<(uint, char)> for CharOffsets<'a> {
329 fn next(&mut self) -> Option<(uint, char)> {
330 // Compute the byte offset by using the pointer offset between
331 // the original string slice and the iterator's remaining part
332 let offset = self.iter.string.as_ptr() as uint - self.string.as_ptr() as uint;
333 self.iter.next().map(|ch| (offset, ch))
337 fn size_hint(&self) -> (uint, Option<uint>) {
338 self.iter.size_hint()
342 impl<'a> DoubleEndedIterator<(uint, char)> for CharOffsets<'a> {
344 fn next_back(&mut self) -> Option<(uint, char)> {
345 self.iter.next_back().map(|ch| {
346 let offset = self.iter.string.len() +
347 self.iter.string.as_ptr() as uint - self.string.as_ptr() as uint;
353 /// External iterator for a string's characters in reverse order.
354 /// Use with the `std::iter` module.
355 pub type RevChars<'a> = Rev<Chars<'a>>;
357 /// External iterator for a string's characters and their byte offsets in reverse order.
358 /// Use with the `std::iter` module.
359 pub type RevCharOffsets<'a> = Rev<CharOffsets<'a>>;
361 /// External iterator for a string's bytes.
362 /// Use with the `std::iter` module.
364 Map<'a, &'a u8, u8, vec::Items<'a, u8>>;
366 /// External iterator for a string's bytes in reverse order.
367 /// Use with the `std::iter` module.
368 pub type RevBytes<'a> = Rev<Bytes<'a>>;
370 /// An iterator over the substrings of a string, separated by `sep`.
372 pub struct CharSplits<'a, Sep> {
373 /// The slice remaining to be iterated
374 priv string: &'a str,
376 /// Whether an empty string at the end is allowed
377 priv allow_trailing_empty: bool,
378 priv only_ascii: bool,
382 /// An iterator over the substrings of a string, separated by `sep`,
383 /// starting from the back of the string.
384 pub type RevCharSplits<'a, Sep> = Rev<CharSplits<'a, Sep>>;
386 /// An iterator over the substrings of a string, separated by `sep`,
387 /// splitting at most `count` times.
389 pub struct CharSplitsN<'a, Sep> {
390 priv iter: CharSplits<'a, Sep>,
391 /// The number of splits remaining
396 /// An iterator over the words of a string, separated by a sequence of whitespace
398 Filter<'a, &'a str, CharSplits<'a, extern "Rust" fn(char) -> bool>>;
400 /// An iterator over the lines of a string, separated by either `\n` or (`\r\n`).
401 pub type AnyLines<'a> =
402 Map<'a, &'a str, &'a str, CharSplits<'a, char>>;
404 impl<'a, Sep> CharSplits<'a, Sep> {
406 fn get_end(&mut self) -> Option<&'a str> {
407 if !self.finished && (self.allow_trailing_empty || self.string.len() > 0) {
408 self.finished = true;
416 impl<'a, Sep: CharEq> Iterator<&'a str> for CharSplits<'a, Sep> {
418 fn next(&mut self) -> Option<&'a str> {
419 if self.finished { return None }
421 let mut next_split = None;
423 for (idx, byte) in self.string.bytes().enumerate() {
424 if self.sep.matches(byte as char) && byte < 128u8 {
425 next_split = Some((idx, idx + 1));
430 for (idx, ch) in self.string.char_indices() {
431 if self.sep.matches(ch) {
432 next_split = Some((idx, self.string.char_range_at(idx).next));
438 Some((a, b)) => unsafe {
439 let elt = raw::slice_unchecked(self.string, 0, a);
440 self.string = raw::slice_unchecked(self.string, b, self.string.len());
443 None => self.get_end(),
448 impl<'a, Sep: CharEq> DoubleEndedIterator<&'a str>
449 for CharSplits<'a, Sep> {
451 fn next_back(&mut self) -> Option<&'a str> {
452 if self.finished { return None }
454 if !self.allow_trailing_empty {
455 self.allow_trailing_empty = true;
456 match self.next_back() {
457 Some(elt) if !elt.is_empty() => return Some(elt),
458 _ => if self.finished { return None }
461 let len = self.string.len();
462 let mut next_split = None;
465 for (idx, byte) in self.string.bytes().enumerate().rev() {
466 if self.sep.matches(byte as char) && byte < 128u8 {
467 next_split = Some((idx, idx + 1));
472 for (idx, ch) in self.string.char_indices_rev() {
473 if self.sep.matches(ch) {
474 next_split = Some((idx, self.string.char_range_at(idx).next));
480 Some((a, b)) => unsafe {
481 let elt = raw::slice_unchecked(self.string, b, len);
482 self.string = raw::slice_unchecked(self.string, 0, a);
485 None => { self.finished = true; Some(self.string) }
490 impl<'a, Sep: CharEq> Iterator<&'a str> for CharSplitsN<'a, Sep> {
492 fn next(&mut self) -> Option<&'a str> {
495 if self.invert { self.iter.next_back() } else { self.iter.next() }
502 /// An iterator over the start and end indices of the matches of a
503 /// substring within a larger string
505 pub struct MatchIndices<'a> {
506 priv haystack: &'a str,
507 priv needle: &'a str,
511 /// An iterator over the substrings of a string separated by a given
514 pub struct StrSplits<'a> {
515 priv it: MatchIndices<'a>,
520 impl<'a> Iterator<(uint, uint)> for MatchIndices<'a> {
522 fn next(&mut self) -> Option<(uint, uint)> {
523 // See Issue #1932 for why this is a naive search
524 let (h_len, n_len) = (self.haystack.len(), self.needle.len());
525 let mut match_start = 0;
528 while self.position < h_len {
529 if self.haystack[self.position] == self.needle[match_i] {
530 if match_i == 0 { match_start = self.position; }
534 if match_i == n_len {
536 return Some((match_start, self.position));
539 // failed match, backtrack
542 self.position = match_start;
551 impl<'a> Iterator<&'a str> for StrSplits<'a> {
553 fn next(&mut self) -> Option<&'a str> {
554 if self.finished { return None; }
556 match self.it.next() {
557 Some((from, to)) => {
558 let ret = Some(self.it.haystack.slice(self.last_end, from));
563 self.finished = true;
564 Some(self.it.haystack.slice(self.last_end, self.it.haystack.len()))
570 // Helper functions used for Unicode normalization
571 fn canonical_sort(comb: &mut [(char, u8)]) {
573 use tuple::CloneableTuple;
575 let len = comb.len();
576 for i in range(0, len) {
577 let mut swapped = false;
578 for j in range(1, len-i) {
579 let classA = comb[j-1].second();
580 let classB = comb[j].second();
581 if classA != 0 && classB != 0 && classA > classB {
586 if !swapped { break; }
591 enum NormalizationForm {
596 /// External iterator for a string's normalization's characters.
597 /// Use with the `std::iter` module.
599 pub struct Normalizations<'a> {
600 priv kind: NormalizationForm,
601 priv iter: Chars<'a>,
602 priv buffer: ~[(char, u8)],
606 impl<'a> Iterator<char> for Normalizations<'a> {
608 fn next(&mut self) -> Option<char> {
609 use unicode::decompose::canonical_combining_class;
611 match self.buffer.head() {
617 Some(&(c, _)) if self.sorted => {
621 _ => self.sorted = false
624 let decomposer = match self.kind {
625 NFD => char::decompose_canonical,
626 NFKD => char::decompose_compatible
630 for ch in self.iter {
632 let class = canonical_combining_class(d);
633 if class == 0 && !self.sorted {
634 canonical_sort(self.buffer);
637 self.buffer.push((d, class));
639 if self.sorted { break }
644 canonical_sort(self.buffer);
648 match self.buffer.shift() {
653 Some((c, _)) => Some(c),
658 fn size_hint(&self) -> (uint, Option<uint>) {
659 let (lower, _) = self.iter.size_hint();
664 /// Replace all occurrences of one string with another
668 /// * s - The string containing substrings to replace
669 /// * from - The string to replace
670 /// * to - The replacement string
674 /// The original string with all occurances of `from` replaced with `to`
675 pub fn replace(s: &str, from: &str, to: &str) -> ~str {
676 let mut result = ~"";
677 let mut last_end = 0;
678 for (start, end) in s.match_indices(from) {
679 result.push_str(unsafe{raw::slice_bytes(s, last_end, start)});
683 result.push_str(unsafe{raw::slice_bytes(s, last_end, s.len())});
688 Section: Comparing strings
691 // share the implementation of the lang-item vs. non-lang-item
694 fn eq_slice_(a: &str, b: &str) -> bool {
695 a.len() == b.len() && unsafe {
696 libc::memcmp(a.as_ptr() as *libc::c_void,
697 b.as_ptr() as *libc::c_void,
698 a.len() as libc::size_t) == 0
702 /// Bytewise slice equality
706 pub fn eq_slice(a: &str, b: &str) -> bool {
710 /// Bytewise slice equality
713 pub fn eq_slice(a: &str, b: &str) -> bool {
717 /// Bytewise string equality
719 #[lang="uniq_str_eq"]
721 pub fn eq(a: &~str, b: &~str) -> bool {
727 pub fn eq(a: &~str, b: &~str) -> bool {
735 /// Determines if a vector of bytes contains valid UTF-8
736 pub fn is_utf8(v: &[u8]) -> bool {
739 fn unsafe_get(xs: &[u8], i: uint) -> u8 {
740 unsafe { *xs.unsafe_ref(i) }
743 let v_i = unsafe_get(v, i);
747 let w = utf8_char_width(v_i);
748 if w == 0u { return false; }
751 if nexti > total { return false; }
753 // 2-byte encoding is for codepoints \u0080 to \u07ff
754 // first C2 80 last DF BF
755 // 3-byte encoding is for codepoints \u0800 to \uffff
756 // first E0 A0 80 last EF BF BF
757 // excluding surrogates codepoints \ud800 to \udfff
758 // ED A0 80 to ED BF BF
759 // 4-byte encoding is for codepoints \u10000 to \u10ffff
760 // first F0 90 80 80 last F4 8F BF BF
762 // Use the UTF-8 syntax from the RFC
764 // https://tools.ietf.org/html/rfc3629
766 // UTF8-2 = %xC2-DF UTF8-tail
767 // UTF8-3 = %xE0 %xA0-BF UTF8-tail / %xE1-EC 2( UTF8-tail ) /
768 // %xED %x80-9F UTF8-tail / %xEE-EF 2( UTF8-tail )
769 // UTF8-4 = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
770 // %xF4 %x80-8F 2( UTF8-tail )
771 // UTF8-tail = %x80-BF
773 2 => if unsafe_get(v, i + 1) & 192u8 != TAG_CONT_U8 {
777 unsafe_get(v, i + 1),
778 unsafe_get(v, i + 2) & 192u8) {
779 (0xE0 , 0xA0 .. 0xBF, TAG_CONT_U8) => (),
780 (0xE1 .. 0xEC, 0x80 .. 0xBF, TAG_CONT_U8) => (),
781 (0xED , 0x80 .. 0x9F, TAG_CONT_U8) => (),
782 (0xEE .. 0xEF, 0x80 .. 0xBF, TAG_CONT_U8) => (),
786 unsafe_get(v, i + 1),
787 unsafe_get(v, i + 2) & 192u8,
788 unsafe_get(v, i + 3) & 192u8) {
789 (0xF0 , 0x90 .. 0xBF, TAG_CONT_U8, TAG_CONT_U8) => (),
790 (0xF1 .. 0xF3, 0x80 .. 0xBF, TAG_CONT_U8, TAG_CONT_U8) => (),
791 (0xF4 , 0x80 .. 0x8F, TAG_CONT_U8, TAG_CONT_U8) => (),
802 /// Determines if a vector of `u16` contains valid UTF-16
803 pub fn is_utf16(v: &[u16]) -> bool {
809 if u <= 0xD7FF_u16 || u >= 0xE000_u16 {
813 if i+1u < len { return false; }
815 if u < 0xD7FF_u16 || u > 0xDBFF_u16 { return false; }
816 if u2 < 0xDC00_u16 || u2 > 0xDFFF_u16 { return false; }
823 /// Iterates over the utf-16 characters in the specified slice, yielding each
824 /// decoded unicode character to the function provided.
828 /// * Fails on invalid utf-16 data
829 pub fn utf16_chars(v: &[u16], f: |char|) {
832 while i < len && v[i] != 0u16 {
835 if u <= 0xD7FF_u16 || u >= 0xE000_u16 {
836 f(unsafe { cast::transmute(u as u32) });
841 assert!(u >= 0xD800_u16 && u <= 0xDBFF_u16);
842 assert!(u2 >= 0xDC00_u16 && u2 <= 0xDFFF_u16);
843 let mut c: u32 = (u - 0xD800_u16) as u32;
845 c |= (u2 - 0xDC00_u16) as u32;
847 f(unsafe { cast::transmute(c) });
853 /// Allocates a new string from the utf-16 slice provided
854 pub fn from_utf16(v: &[u16]) -> ~str {
856 buf.reserve(v.len());
857 utf16_chars(v, |ch| buf.push_char(ch));
861 /// Allocates a new string with the specified capacity. The string returned is
862 /// the empty string, but has capacity for much more.
864 pub fn with_capacity(capacity: uint) -> ~str {
866 cast::transmute(vec::with_capacity::<~[u8]>(capacity))
870 // https://tools.ietf.org/html/rfc3629
871 static UTF8_CHAR_WIDTH: [u8, ..256] = [
872 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
873 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x1F
874 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
875 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x3F
876 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
877 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x5F
878 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
879 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // 0x7F
880 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
881 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0x9F
882 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
883 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 0xBF
884 0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
885 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // 0xDF
886 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, // 0xEF
887 4,4,4,4,4,0,0,0,0,0,0,0,0,0,0,0, // 0xFF
890 /// Given a first byte, determine how many bytes are in this UTF-8 character
891 pub fn utf8_char_width(b: u8) -> uint {
892 return UTF8_CHAR_WIDTH[b] as uint;
895 /// Struct that contains a `char` and the index of the first byte of
896 /// the next `char` in a string. This can be used as a data structure
897 /// for iterating over the UTF-8 bytes of a string.
898 pub struct CharRange {
901 /// Index of the first byte of the next `char`
905 // Return the initial codepoint accumulator for the first byte.
906 // The first byte is special, only want bottom 5 bits for width 2, 4 bits
907 // for width 3, and 3 bits for width 4
908 macro_rules! utf8_first_byte(
909 ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as uint)
912 // return the value of $ch updated with continuation byte $byte
913 macro_rules! utf8_acc_cont_byte(
914 ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as uint)
917 static TAG_CONT_U8: u8 = 128u8;
919 /// Unsafe operations
922 use container::Container;
926 use str::{is_utf8, OwnedStr, StrSlice};
928 use vec::{MutableVector, ImmutableVector, OwnedVector};
929 use unstable::raw::Slice;
931 /// Create a Rust string from a *u8 buffer of the given length
932 pub unsafe fn from_buf_len(buf: *u8, len: uint) -> ~str {
933 let mut v: ~[u8] = vec::with_capacity(len);
934 ptr::copy_memory(v.as_mut_ptr(), buf, len);
941 #[lang="strdup_uniq"]
943 #[allow(missing_doc)]
945 pub unsafe fn strdup_uniq(ptr: *u8, len: uint) -> ~str {
946 from_buf_len(ptr, len)
949 /// Create a Rust string from a null-terminated C string
950 pub unsafe fn from_c_str(buf: *libc::c_char) -> ~str {
955 curr = ptr::offset(buf, i);
957 from_buf_len(buf as *u8, i as uint)
960 /// Converts a slice of bytes to a string slice without checking
961 /// that the string contains valid UTF-8.
962 pub unsafe fn from_utf8<'a>(v: &'a [u8]) -> &'a str {
966 /// Converts an owned vector of bytes to a new owned string. This assumes
967 /// that the utf-8-ness of the vector has already been validated
969 pub unsafe fn from_utf8_owned(v: ~[u8]) -> ~str {
973 /// Converts a byte to a string.
974 pub unsafe fn from_byte(u: u8) -> ~str { from_utf8_owned(~[u]) }
976 /// Form a slice from a C string. Unsafe because the caller must ensure the
977 /// C string has the static lifetime, or else the return value may be
978 /// invalidated later.
979 pub unsafe fn c_str_to_static_slice(s: *libc::c_char) -> &'static str {
985 curr = ptr::offset(s, len as int);
987 let v = Slice { data: s, len: len };
988 assert!(is_utf8(::cast::transmute(v)));
992 /// Takes a bytewise (not UTF-8) slice from a string.
994 /// Returns the substring from [`begin`..`end`).
998 /// If begin is greater than end.
999 /// If end is greater than the length of the string.
1001 pub unsafe fn slice_bytes<'a>(s: &'a str, begin: uint, end: uint) -> &'a str {
1002 assert!(begin <= end);
1003 assert!(end <= s.len());
1004 slice_unchecked(s, begin, end)
1007 /// Takes a bytewise (not UTF-8) slice from a string.
1009 /// Returns the substring from [`begin`..`end`).
1011 /// Caller must check slice boundaries!
1013 pub unsafe fn slice_unchecked<'a>(s: &'a str, begin: uint, end: uint) -> &'a str {
1014 cast::transmute(Slice {
1015 data: s.as_ptr().offset(begin as int),
1020 /// Appends a byte to a string.
1021 /// The caller must preserve the valid UTF-8 property.
1023 pub unsafe fn push_byte(s: &mut ~str, b: u8) {
1024 as_owned_vec(s).push(b)
1027 /// Appends a vector of bytes to a string.
1028 /// The caller must preserve the valid UTF-8 property.
1030 pub unsafe fn push_bytes(s: &mut ~str, bytes: &[u8]) {
1031 vec::bytes::push_bytes(as_owned_vec(s), bytes);
1034 /// Removes the last byte from a string and returns it.
1035 /// The caller must preserve the valid UTF-8 property.
1036 pub unsafe fn pop_byte(s: &mut ~str) -> u8 {
1038 assert!((len > 0u));
1039 let b = s[len - 1u];
1044 /// Removes the first byte from a string and returns it.
1045 /// The caller must preserve the valid UTF-8 property.
1046 pub unsafe fn shift_byte(s: &mut ~str) -> u8 {
1048 assert!((len > 0u));
1050 *s = s.slice(1, len).to_owned();
1054 /// Access the str in its vector representation.
1055 /// The caller must preserve the valid UTF-8 property when modifying.
1057 pub unsafe fn as_owned_vec<'a>(s: &'a mut ~str) -> &'a mut ~[u8] {
1061 /// Sets the length of a string
1063 /// This will explicitly set the size of the string, without actually
1064 /// modifing its buffers, so it is up to the caller to ensure that
1065 /// the string is actually the specified size.
1067 fn test_from_buf_len() {
1069 let a = ~[65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8];
1071 let c = from_buf_len(b, 3u);
1072 assert_eq!(c, ~"AAA");
1078 Section: Trait implementations
1082 #[allow(missing_doc)]
1084 use container::Container;
1085 use cmp::{TotalOrd, Ordering, Less, Equal, Greater, Eq, Ord, Equiv, TotalEq};
1088 use option::{Some, None};
1089 use str::{Str, StrSlice, OwnedStr, eq_slice};
1091 impl<'a> Add<&'a str,~str> for &'a str {
1093 fn add(&self, rhs: & &'a str) -> ~str {
1094 let mut ret = self.to_owned();
1100 impl<'a> TotalOrd for &'a str {
1102 fn cmp(&self, other: & &'a str) -> Ordering {
1103 for (s_b, o_b) in self.bytes().zip(other.bytes()) {
1104 match s_b.cmp(&o_b) {
1105 Greater => return Greater,
1106 Less => return Less,
1111 self.len().cmp(&other.len())
1115 impl TotalOrd for ~str {
1117 fn cmp(&self, other: &~str) -> Ordering { self.as_slice().cmp(&other.as_slice()) }
1120 impl<'a> Eq for &'a str {
1122 fn eq(&self, other: & &'a str) -> bool {
1123 eq_slice((*self), (*other))
1126 fn ne(&self, other: & &'a str) -> bool { !(*self).eq(other) }
1131 fn eq(&self, other: &~str) -> bool {
1132 eq_slice((*self), (*other))
1136 impl<'a> TotalEq for &'a str {
1138 fn equals(&self, other: & &'a str) -> bool {
1139 eq_slice((*self), (*other))
1143 impl TotalEq for ~str {
1145 fn equals(&self, other: &~str) -> bool {
1146 eq_slice((*self), (*other))
1150 impl<'a> Ord for &'a str {
1152 fn lt(&self, other: & &'a str) -> bool { self.cmp(other) == Less }
1157 fn lt(&self, other: &~str) -> bool { self.cmp(other) == Less }
1160 impl<'a, S: Str> Equiv<S> for &'a str {
1162 fn equiv(&self, other: &S) -> bool { eq_slice(*self, other.as_slice()) }
1165 impl<'a, S: Str> Equiv<S> for ~str {
1167 fn equiv(&self, other: &S) -> bool { eq_slice(*self, other.as_slice()) }
1174 /// Any string that can be represented as a slice
1176 /// Work with `self` as a slice.
1177 fn as_slice<'a>(&'a self) -> &'a str;
1179 /// Convert `self` into a ~str, not making a copy if possible
1180 fn into_owned(self) -> ~str;
1183 impl<'a> Str for &'a str {
1185 fn as_slice<'a>(&'a self) -> &'a str { *self }
1188 fn into_owned(self) -> ~str { self.to_owned() }
1191 impl<'a> Str for ~str {
1193 fn as_slice<'a>(&'a self) -> &'a str {
1194 let s: &'a str = *self; s
1198 fn into_owned(self) -> ~str { self }
1201 impl<'a> Container for &'a str {
1203 fn len(&self) -> uint {
1208 impl Container for ~str {
1210 fn len(&self) -> uint { self.as_slice().len() }
1213 impl Mutable for ~str {
1214 /// Remove all content, make the string empty
1216 fn clear(&mut self) {
1223 /// Methods for string slices
1224 pub trait StrSlice<'a> {
1225 /// Returns true if one string contains another
1229 /// - needle - The string to look for
1230 fn contains<'a>(&self, needle: &'a str) -> bool;
1232 /// Returns true if a string contains a char.
1236 /// - needle - The char to look for
1237 fn contains_char(&self, needle: char) -> bool;
1239 /// An iterator over the characters of `self`. Note, this iterates
1240 /// over unicode code-points, not unicode graphemes.
1245 /// let v: ~[char] = "abc åäö".chars().collect();
1246 /// assert_eq!(v, ~['a', 'b', 'c', ' ', 'å', 'ä', 'ö']);
1248 fn chars(&self) -> Chars<'a>;
1250 /// An iterator over the characters of `self`, in reverse order.
1251 fn chars_rev(&self) -> RevChars<'a>;
1253 /// An iterator over the bytes of `self`
1254 fn bytes(&self) -> Bytes<'a>;
1256 /// An iterator over the bytes of `self`, in reverse order
1257 fn bytes_rev(&self) -> RevBytes<'a>;
1259 /// An iterator over the characters of `self` and their byte offsets.
1260 fn char_indices(&self) -> CharOffsets<'a>;
1262 /// An iterator over the characters of `self` and their byte offsets,
1263 /// in reverse order.
1264 fn char_indices_rev(&self) -> RevCharOffsets<'a>;
1266 /// An iterator over substrings of `self`, separated by characters
1267 /// matched by `sep`.
1272 /// let v: ~[&str] = "Mary had a little lamb".split(' ').collect();
1273 /// assert_eq!(v, ~["Mary", "had", "a", "little", "lamb"]);
1275 /// let v: ~[&str] = "abc1def2ghi".split(|c: char| c.is_digit()).collect();
1276 /// assert_eq!(v, ~["abc", "def", "ghi"]);
1278 /// let v: ~[&str] = "lionXXtigerXleopard".split('X').collect();
1279 /// assert_eq!(v, ~["lion", "", "tiger", "leopard"]);
1281 fn split<Sep: CharEq>(&self, sep: Sep) -> CharSplits<'a, Sep>;
1283 /// An iterator over substrings of `self`, separated by characters
1284 /// matched by `sep`, restricted to splitting at most `count`
1290 /// let v: ~[&str] = "Mary had a little lambda".splitn(' ', 2).collect();
1291 /// assert_eq!(v, ~["Mary", "had", "a little lambda"]);
1293 /// let v: ~[&str] = "abc1def2ghi".splitn(|c: char| c.is_digit(), 1).collect();
1294 /// assert_eq!(v, ~["abc", "def2ghi"]);
1296 /// let v: ~[&str] = "lionXXtigerXleopard".splitn('X', 2).collect();
1297 /// assert_eq!(v, ~["lion", "", "tigerXleopard"]);
1299 fn splitn<Sep: CharEq>(&self, sep: Sep, count: uint) -> CharSplitsN<'a, Sep>;
1301 /// An iterator over substrings of `self`, separated by characters
1302 /// matched by `sep`.
1304 /// Equivalent to `split`, except that the trailing substring
1305 /// is skipped if empty (terminator semantics).
1310 /// let v: ~[&str] = "A.B.".split_terminator('.').collect();
1311 /// assert_eq!(v, ~["A", "B"]);
1313 /// let v: ~[&str] = "A..B..".split_terminator('.').collect();
1314 /// assert_eq!(v, ~["A", "", "B", ""]);
1316 fn split_terminator<Sep: CharEq>(&self, sep: Sep) -> CharSplits<'a, Sep>;
1318 /// An iterator over substrings of `self`, separated by characters
1319 /// matched by `sep`, in reverse order.
1324 /// let v: ~[&str] = "Mary had a little lamb".rsplit(' ').collect();
1325 /// assert_eq!(v, ~["lamb", "little", "a", "had", "Mary"]);
1327 /// let v: ~[&str] = "abc1def2ghi".rsplit(|c: char| c.is_digit()).collect();
1328 /// assert_eq!(v, ~["ghi", "def", "abc"]);
1330 /// let v: ~[&str] = "lionXXtigerXleopard".rsplit('X').collect();
1331 /// assert_eq!(v, ~["leopard", "tiger", "", "lion"]);
1333 fn rsplit<Sep: CharEq>(&self, sep: Sep) -> RevCharSplits<'a, Sep>;
1335 /// An iterator over substrings of `self`, separated by characters
1336 /// matched by `sep`, starting from the end of the string.
1337 /// Restricted to splitting at most `count` times.
1342 /// let v: ~[&str] = "Mary had a little lamb".rsplitn(' ', 2).collect();
1343 /// assert_eq!(v, ~["lamb", "little", "Mary had a"]);
1345 /// let v: ~[&str] = "abc1def2ghi".rsplitn(|c: char| c.is_digit(), 1).collect();
1346 /// assert_eq!(v, ~["ghi", "abc1def"]);
1348 /// let v: ~[&str] = "lionXXtigerXleopard".rsplitn('X', 2).collect();
1349 /// assert_eq!(v, ~["leopard", "tiger", "lionX"]);
1351 fn rsplitn<Sep: CharEq>(&self, sep: Sep, count: uint) -> CharSplitsN<'a, Sep>;
1353 /// An iterator over the start and end indices of the disjoint
1354 /// matches of `sep` within `self`.
1356 /// That is, each returned value `(start, end)` satisfies
1357 /// `self.slice(start, end) == sep`. For matches of `sep` within
1358 /// `self` that overlap, only the indicies corresponding to the
1359 /// first match are returned.
1364 /// let v: ~[(uint, uint)] = "abcXXXabcYYYabc".match_indices("abc").collect();
1365 /// assert_eq!(v, ~[(0,3), (6,9), (12,15)]);
1367 /// let v: ~[(uint, uint)] = "1abcabc2".match_indices("abc").collect();
1368 /// assert_eq!(v, ~[(1,4), (4,7)]);
1370 /// let v: ~[(uint, uint)] = "ababa".match_indices("aba").collect();
1371 /// assert_eq!(v, ~[(0, 3)]); // only the first `aba`
1373 fn match_indices(&self, sep: &'a str) -> MatchIndices<'a>;
1375 /// An iterator over the substrings of `self` separated by `sep`.
1380 /// let v: ~[&str] = "abcXXXabcYYYabc".split_str("abc").collect();
1381 /// assert_eq!(v, ~["", "XXX", "YYY", ""]);
1383 /// let v: ~[&str] = "1abcabc2".split_str("abc").collect();
1384 /// assert_eq!(v, ~["1", "", "2"]);
1386 fn split_str(&self, &'a str) -> StrSplits<'a>;
1388 /// An iterator over the lines of a string (subsequences separated
1389 /// by `\n`). This does not include the empty string after a
1395 /// let four_lines = "foo\nbar\n\nbaz\n";
1396 /// let v: ~[&str] = four_lines.lines().collect();
1397 /// assert_eq!(v, ~["foo", "bar", "", "baz"]);
1399 fn lines(&self) -> CharSplits<'a, char>;
1401 /// An iterator over the lines of a string, separated by either
1402 /// `\n` or `\r\n`. As with `.lines()`, this does not include an
1403 /// empty trailing line.
1408 /// let four_lines = "foo\r\nbar\n\r\nbaz\n";
1409 /// let v: ~[&str] = four_lines.lines_any().collect();
1410 /// assert_eq!(v, ~["foo", "bar", "", "baz"]);
1412 fn lines_any(&self) -> AnyLines<'a>;
1414 /// An iterator over the words of a string (subsequences separated
1415 /// by any sequence of whitespace). Sequences of whitespace are
1416 /// collapsed, so empty "words" are not included.
1421 /// let some_words = " Mary had\ta little \n\t lamb";
1422 /// let v: ~[&str] = some_words.words().collect();
1423 /// assert_eq!(v, ~["Mary", "had", "a", "little", "lamb"]);
1425 fn words(&self) -> Words<'a>;
1427 /// An Iterator over the string in Unicode Normalization Form D
1428 /// (canonical decomposition).
1429 fn nfd_chars(&self) -> Normalizations<'a>;
1431 /// An Iterator over the string in Unicode Normalization Form KD
1432 /// (compatibility decomposition).
1433 fn nfkd_chars(&self) -> Normalizations<'a>;
1435 /// Returns true if the string contains only whitespace.
1437 /// Whitespace characters are determined by `char::is_whitespace`.
1442 /// assert!(" \t\n".is_whitespace());
1443 /// assert!("".is_whitespace());
1445 /// assert!( !"abc".is_whitespace());
1447 fn is_whitespace(&self) -> bool;
1449 /// Returns true if the string contains only alphanumeric code
1452 /// Alphanumeric characters are determined by `char::is_alphanumeric`.
1457 /// assert!("Löwe老虎Léopard123".is_alphanumeric());
1458 /// assert!("".is_alphanumeric());
1460 /// assert!( !" &*~".is_alphanumeric());
1462 fn is_alphanumeric(&self) -> bool;
1464 /// Returns the number of Unicode code points (`char`) that a
1467 /// This does not perform any normalization, and is `O(n)`, since
1468 /// UTF-8 is a variable width encoding of code points.
1470 /// *Warning*: The number of code points in a string does not directly
1471 /// correspond to the number of visible characters or width of the
1472 /// visible text due to composing characters, and double- and
1473 /// zero-width ones.
1475 /// See also `.len()` for the byte length.
1480 /// // composed forms of `ö` and `é`
1481 /// let c = "Löwe 老虎 Léopard"; // German, Simplified Chinese, French
1482 /// // decomposed forms of `ö` and `é`
1483 /// let d = "Lo\u0308we 老虎 Le\u0301opard";
1485 /// assert_eq!(c.char_len(), 15);
1486 /// assert_eq!(d.char_len(), 17);
1488 /// assert_eq!(c.len(), 21);
1489 /// assert_eq!(d.len(), 23);
1491 /// // the two strings *look* the same
1492 /// println!("{}", c);
1493 /// println!("{}", d);
1495 fn char_len(&self) -> uint;
1497 /// Returns a slice of the given string from the byte range
1498 /// [`begin`..`end`).
1500 /// This operation is `O(1)`.
1502 /// Fails when `begin` and `end` do not point to valid characters
1503 /// or point beyond the last character of the string.
1505 /// See also `slice_to` and `slice_from` for slicing prefixes and
1506 /// suffixes of strings, and `slice_chars` for slicing based on
1507 /// code point counts.
1512 /// let s = "Löwe 老虎 Léopard";
1513 /// assert_eq!(s.slice(0, 1), "L");
1515 /// assert_eq!(s.slice(1, 9), "öwe 老");
1517 /// // these will fail:
1518 /// // byte 2 lies within `ö`:
1519 /// // s.slice(2, 3);
1521 /// // byte 8 lies within `老`
1522 /// // s.slice(1, 8);
1524 /// // byte 100 is outside the string
1525 /// // s.slice(3, 100);
1527 fn slice(&self, begin: uint, end: uint) -> &'a str;
1529 /// Returns a slice of the string from `begin` to its end.
1531 /// Equivalent to `self.slice(begin, self.len())`.
1533 /// Fails when `begin` does not point to a valid character, or is
1536 /// See also `slice`, `slice_to` and `slice_chars`.
1537 fn slice_from(&self, begin: uint) -> &'a str;
1539 /// Returns a slice of the string from the beginning to byte
1542 /// Equivalent to `self.slice(0, end)`.
1544 /// Fails when `end` does not point to a valid character, or is
1547 /// See also `slice`, `slice_from` and `slice_chars`.
1548 fn slice_to(&self, end: uint) -> &'a str;
1550 /// Returns a slice of the string from the character range
1551 /// [`begin`..`end`).
1553 /// That is, start at the `begin`-th code point of the string and
1554 /// continue to the `end`-th code point. This does not detect or
1555 /// handle edge cases such as leaving a combining character as the
1556 /// first code point of the string.
1558 /// Due to the design of UTF-8, this operation is `O(end -
1559 /// begin)`. See `slice`, `slice_to` and `slice_from` for `O(1)`
1560 /// variants that use byte indices rather than code point
1563 /// Fails if `begin` > `end` or the either `begin` or `end` are
1564 /// beyond the last character of the string.
1569 /// let s = "Löwe 老虎 Léopard";
1570 /// assert_eq!(s.slice_chars(0, 4), "Löwe");
1571 /// assert_eq!(s.slice_chars(5, 7), "老虎");
1573 fn slice_chars(&self, begin: uint, end: uint) -> &'a str;
1575 /// Returns true if `needle` is a prefix of the string.
1576 fn starts_with(&self, needle: &str) -> bool;
1578 /// Returns true if `needle` is a suffix of the string.
1579 fn ends_with(&self, needle: &str) -> bool;
1581 /// Escape each char in `s` with `char::escape_default`.
1582 fn escape_default(&self) -> ~str;
1584 /// Escape each char in `s` with `char::escape_unicode`.
1585 fn escape_unicode(&self) -> ~str;
1587 /// Returns a string with leading and trailing whitespace removed.
1588 fn trim(&self) -> &'a str;
1590 /// Returns a string with leading whitespace removed.
1591 fn trim_left(&self) -> &'a str;
1593 /// Returns a string with trailing whitespace removed.
1594 fn trim_right(&self) -> &'a str;
1596 /// Returns a string with characters that match `to_trim` removed.
1600 /// * to_trim - a character matcher
1605 /// assert_eq!("11foo1bar11".trim_chars(&'1'), "foo1bar")
1606 /// assert_eq!("12foo1bar12".trim_chars(& &['1', '2']), "foo1bar")
1607 /// assert_eq!("123foo1bar123".trim_chars(&|c: char| c.is_digit()), "foo1bar")
1609 fn trim_chars<C: CharEq>(&self, to_trim: &C) -> &'a str;
1611 /// Returns a string with leading `chars_to_trim` removed.
1615 /// * to_trim - a character matcher
1620 /// assert_eq!("11foo1bar11".trim_left_chars(&'1'), "foo1bar11")
1621 /// assert_eq!("12foo1bar12".trim_left_chars(& &['1', '2']), "foo1bar12")
1622 /// assert_eq!("123foo1bar123".trim_left_chars(&|c: char| c.is_digit()), "foo1bar123")
1624 fn trim_left_chars<C: CharEq>(&self, to_trim: &C) -> &'a str;
1626 /// Returns a string with trailing `chars_to_trim` removed.
1630 /// * to_trim - a character matcher
1635 /// assert_eq!("11foo1bar11".trim_right_chars(&'1'), "11foo1bar")
1636 /// assert_eq!("12foo1bar12".trim_right_chars(& &['1', '2']), "12foo1bar")
1637 /// assert_eq!("123foo1bar123".trim_right_chars(&|c: char| c.is_digit()), "123foo1bar")
1639 fn trim_right_chars<C: CharEq>(&self, to_trim: &C) -> &'a str;
1641 /// Replace all occurrences of one string with another.
1645 /// * `from` - The string to replace
1646 /// * `to` - The replacement string
1650 /// The original string with all occurances of `from` replaced with `to`.
1655 /// let s = ~"Do you know the muffin man,
1656 /// The muffin man, the muffin man, ...";
1658 /// assert_eq!(s.replace("muffin man", "little lamb"),
1659 /// ~"Do you know the little lamb,
1660 /// The little lamb, the little lamb, ...");
1662 /// // not found, so no change.
1663 /// assert_eq!(s.replace("cookie monster", "little lamb"), s);
1665 fn replace(&self, from: &str, to: &str) -> ~str;
1667 /// Copy a slice into a new owned str.
1668 fn to_owned(&self) -> ~str;
1670 /// Converts to a vector of `u16` encoded as UTF-16.
1671 fn to_utf16(&self) -> ~[u16];
1673 /// Copy a slice into a new `SendStr`.
1674 fn to_send_str(&self) -> SendStr;
1676 /// Check that `index`-th byte lies at the start and/or end of a
1677 /// UTF-8 code point sequence.
1679 /// The start and end of the string (when `index == self.len()`)
1680 /// are considered to be boundaries.
1682 /// Fails if `index` is greater than `self.len()`.
1687 /// let s = "Löwe 老虎 Léopard";
1688 /// assert!(s.is_char_boundary(0));
1690 /// assert!(s.is_char_boundary(6));
1691 /// assert!(s.is_char_boundary(s.len()));
1693 /// // second byte of `ö`
1694 /// assert!(!s.is_char_boundary(2));
1696 /// // third byte of `老`
1697 /// assert!(!s.is_char_boundary(8));
1699 fn is_char_boundary(&self, index: uint) -> bool;
1701 /// Pluck a character out of a string and return the index of the next
1704 /// This function can be used to iterate over the unicode characters of a
1709 /// This example manually iterate through the characters of a
1710 /// string; this should normally by done by `.chars()` or
1711 /// `.char_indices`.
1714 /// use std::str::CharRange;
1716 /// let s = "中华Việt Nam";
1718 /// while i < s.len() {
1719 /// let CharRange {ch, next} = s.char_range_at(i);
1720 /// println!("{}: {}", i, ch);
1742 /// * s - The string
1743 /// * i - The byte offset of the char to extract
1747 /// A record {ch: char, next: uint} containing the char value and the byte
1748 /// index of the next unicode character.
1752 /// If `i` is greater than or equal to the length of the string.
1753 /// If `i` is not the index of the beginning of a valid UTF-8 character.
1754 fn char_range_at(&self, start: uint) -> CharRange;
1756 /// Given a byte position and a str, return the previous char and its position.
1758 /// This function can be used to iterate over a unicode string in reverse.
1760 /// Returns 0 for next index if called on start index 0.
1761 fn char_range_at_reverse(&self, start: uint) -> CharRange;
1763 /// Plucks the character starting at the `i`th byte of a string
1764 fn char_at(&self, i: uint) -> char;
1766 /// Plucks the character ending at the `i`th byte of a string
1767 fn char_at_reverse(&self, i: uint) -> char;
1769 /// Work with the byte buffer of a string as a byte slice.
1770 fn as_bytes(&self) -> &'a [u8];
1772 /// Returns the byte index of the first character of `self` that
1773 /// matches `search`.
1777 /// `Some` containing the byte index of the last matching character
1778 /// or `None` if there is no match
1783 /// let s = "Löwe 老虎 Léopard";
1785 /// assert_eq!(s.find('L'), Some(0));
1786 /// assert_eq!(s.find('é'), Some(14));
1788 /// // the first space
1789 /// assert_eq!(s.find(|c: char| c.is_whitespace()), Some(5));
1791 /// // neither are found
1792 /// assert_eq!(s.find(&['1', '2']), None);
1794 fn find<C: CharEq>(&self, search: C) -> Option<uint>;
1796 /// Returns the byte index of the last character of `self` that
1797 /// matches `search`.
1801 /// `Some` containing the byte index of the last matching character
1802 /// or `None` if there is no match.
1807 /// let s = "Löwe 老虎 Léopard";
1809 /// assert_eq!(s.rfind('L'), Some(13));
1810 /// assert_eq!(s.rfind('é'), Some(14));
1812 /// // the second space
1813 /// assert_eq!(s.rfind(|c: char| c.is_whitespace()), Some(12));
1815 /// // searches for an occurrence of either `1` or `2`, but neither are found
1816 /// assert_eq!(s.rfind(&['1', '2']), None);
1818 fn rfind<C: CharEq>(&self, search: C) -> Option<uint>;
1820 /// Returns the byte index of the first matching substring
1824 /// * `needle` - The string to search for
1828 /// `Some` containing the byte index of the first matching substring
1829 /// or `None` if there is no match.
1834 /// let s = "Löwe 老虎 Léopard";
1836 /// assert_eq!(s.find_str("老虎 L"), Some(6));
1837 /// assert_eq!(s.find_str("muffin man"), None);
1839 fn find_str(&self, &str) -> Option<uint>;
1841 /// Given a string, make a new string with repeated copies of it.
1842 fn repeat(&self, nn: uint) -> ~str;
1844 /// Retrieves the first character from a string slice and returns
1845 /// it. This does not allocate a new string; instead, it returns a
1846 /// slice that point one character beyond the character that was
1851 /// If the string does not contain any characters.
1856 /// let s = "Löwe 老虎 Léopard";
1857 /// let (c, s1) = s.slice_shift_char();
1858 /// assert_eq!(c, 'L');
1859 /// assert_eq!(s1, "öwe 老虎 Léopard");
1861 /// let (c, s2) = s1.slice_shift_char();
1862 /// assert_eq!(c, 'ö');
1863 /// assert_eq!(s2, "we 老虎 Léopard");
1865 fn slice_shift_char(&self) -> (char, &'a str);
1867 /// Levenshtein Distance between two strings.
1868 fn lev_distance(&self, t: &str) -> uint;
1870 /// Returns the byte offset of an inner slice relative to an enclosing outer slice.
1872 /// Fails if `inner` is not a direct slice contained within self.
1877 /// let string = "a\nb\nc";
1878 /// let lines: ~[&str] = string.lines().collect();
1880 /// assert!(string.subslice_offset(lines[0]) == 0); // &"a"
1881 /// assert!(string.subslice_offset(lines[1]) == 2); // &"b"
1882 /// assert!(string.subslice_offset(lines[2]) == 4); // &"c"
1884 fn subslice_offset(&self, inner: &str) -> uint;
1886 /// Return an unsafe pointer to the strings buffer.
1888 /// The caller must ensure that the string outlives this pointer,
1889 /// and that it is not reallocated (e.g. by pushing to the
1891 fn as_ptr(&self) -> *u8;
1894 impl<'a> StrSlice<'a> for &'a str {
1896 fn contains<'a>(&self, needle: &'a str) -> bool {
1897 self.find_str(needle).is_some()
1901 fn contains_char(&self, needle: char) -> bool {
1902 self.find(needle).is_some()
1906 fn chars(&self) -> Chars<'a> {
1907 Chars{string: *self}
1911 fn chars_rev(&self) -> RevChars<'a> {
1916 fn bytes(&self) -> Bytes<'a> {
1917 self.as_bytes().iter().map(|&b| b)
1921 fn bytes_rev(&self) -> RevBytes<'a> {
1926 fn char_indices(&self) -> CharOffsets<'a> {
1927 CharOffsets{string: *self, iter: self.chars()}
1931 fn char_indices_rev(&self) -> RevCharOffsets<'a> {
1932 self.char_indices().rev()
1936 fn split<Sep: CharEq>(&self, sep: Sep) -> CharSplits<'a, Sep> {
1939 only_ascii: sep.only_ascii(),
1941 allow_trailing_empty: true,
1947 fn splitn<Sep: CharEq>(&self, sep: Sep, count: uint)
1948 -> CharSplitsN<'a, Sep> {
1950 iter: self.split(sep),
1957 fn split_terminator<Sep: CharEq>(&self, sep: Sep)
1958 -> CharSplits<'a, Sep> {
1960 allow_trailing_empty: false,
1966 fn rsplit<Sep: CharEq>(&self, sep: Sep) -> RevCharSplits<'a, Sep> {
1967 self.split(sep).rev()
1971 fn rsplitn<Sep: CharEq>(&self, sep: Sep, count: uint)
1972 -> CharSplitsN<'a, Sep> {
1974 iter: self.split(sep),
1981 fn match_indices(&self, sep: &'a str) -> MatchIndices<'a> {
1982 assert!(!sep.is_empty())
1991 fn split_str(&self, sep: &'a str) -> StrSplits<'a> {
1993 it: self.match_indices(sep),
2000 fn lines(&self) -> CharSplits<'a, char> {
2001 self.split_terminator('\n')
2004 fn lines_any(&self) -> AnyLines<'a> {
2005 self.lines().map(|line| {
2007 if l > 0 && line[l - 1] == '\r' as u8 { line.slice(0, l - 1) }
2013 fn words(&self) -> Words<'a> {
2014 self.split(char::is_whitespace).filter(|s| !s.is_empty())
2018 fn nfd_chars(&self) -> Normalizations<'a> {
2028 fn nfkd_chars(&self) -> Normalizations<'a> {
2038 fn is_whitespace(&self) -> bool { self.chars().all(char::is_whitespace) }
2041 fn is_alphanumeric(&self) -> bool { self.chars().all(char::is_alphanumeric) }
2044 fn char_len(&self) -> uint { self.chars().len() }
2047 fn slice(&self, begin: uint, end: uint) -> &'a str {
2048 assert!(self.is_char_boundary(begin) && self.is_char_boundary(end));
2049 unsafe { raw::slice_bytes(*self, begin, end) }
2053 fn slice_from(&self, begin: uint) -> &'a str {
2054 self.slice(begin, self.len())
2058 fn slice_to(&self, end: uint) -> &'a str {
2059 assert!(self.is_char_boundary(end));
2060 unsafe { raw::slice_bytes(*self, 0, end) }
2063 fn slice_chars(&self, begin: uint, end: uint) -> &'a str {
2064 assert!(begin <= end);
2066 let mut begin_byte = None;
2067 let mut end_byte = None;
2069 // This could be even more efficient by not decoding,
2070 // only finding the char boundaries
2071 for (idx, _) in self.char_indices() {
2072 if count == begin { begin_byte = Some(idx); }
2073 if count == end { end_byte = Some(idx); break; }
2076 if begin_byte.is_none() && count == begin { begin_byte = Some(self.len()) }
2077 if end_byte.is_none() && count == end { end_byte = Some(self.len()) }
2079 match (begin_byte, end_byte) {
2080 (None, _) => fail!("slice_chars: `begin` is beyond end of string"),
2081 (_, None) => fail!("slice_chars: `end` is beyond end of string"),
2082 (Some(a), Some(b)) => unsafe { raw::slice_bytes(*self, a, b) }
2087 fn starts_with<'a>(&self, needle: &'a str) -> bool {
2088 let n = needle.len();
2089 self.len() >= n && needle.as_bytes() == self.as_bytes().slice_to(n)
2093 fn ends_with(&self, needle: &str) -> bool {
2094 let (m, n) = (self.len(), needle.len());
2095 m >= n && needle.as_bytes() == self.as_bytes().slice_from(m - n)
2098 fn escape_default(&self) -> ~str {
2099 let mut out: ~str = ~"";
2100 out.reserve_at_least(self.len());
2101 for c in self.chars() {
2102 c.escape_default(|c| out.push_char(c));
2107 fn escape_unicode(&self) -> ~str {
2108 let mut out: ~str = ~"";
2109 out.reserve_at_least(self.len());
2110 for c in self.chars() {
2111 c.escape_unicode(|c| out.push_char(c));
2117 fn trim(&self) -> &'a str {
2118 self.trim_left().trim_right()
2122 fn trim_left(&self) -> &'a str {
2123 self.trim_left_chars(&char::is_whitespace)
2127 fn trim_right(&self) -> &'a str {
2128 self.trim_right_chars(&char::is_whitespace)
2132 fn trim_chars<C: CharEq>(&self, to_trim: &C) -> &'a str {
2133 self.trim_left_chars(to_trim).trim_right_chars(to_trim)
2137 fn trim_left_chars<C: CharEq>(&self, to_trim: &C) -> &'a str {
2138 match self.find(|c: char| !to_trim.matches(c)) {
2140 Some(first) => unsafe { raw::slice_bytes(*self, first, self.len()) }
2145 fn trim_right_chars<C: CharEq>(&self, to_trim: &C) -> &'a str {
2146 match self.rfind(|c: char| !to_trim.matches(c)) {
2149 let next = self.char_range_at(last).next;
2150 unsafe { raw::slice_bytes(*self, 0u, next) }
2155 fn replace(&self, from: &str, to: &str) -> ~str {
2156 let mut result = ~"";
2157 let mut last_end = 0;
2158 for (start, end) in self.match_indices(from) {
2159 result.push_str(unsafe{raw::slice_bytes(*self, last_end, start)});
2160 result.push_str(to);
2163 result.push_str(unsafe{raw::slice_bytes(*self, last_end, self.len())});
2168 fn to_owned(&self) -> ~str {
2169 let len = self.len();
2171 let mut v = vec::with_capacity(len);
2173 ptr::copy_memory(v.as_mut_ptr(), self.as_ptr(), len);
2175 ::cast::transmute(v)
2179 fn to_utf16(&self) -> ~[u16] {
2181 for ch in self.chars() {
2182 // Arithmetic with u32 literals is easier on the eyes than chars.
2183 let mut ch = ch as u32;
2185 if (ch & 0xFFFF_u32) == ch {
2186 // The BMP falls through (assuming non-surrogate, as it
2188 assert!(ch <= 0xD7FF_u32 || ch >= 0xE000_u32);
2191 // Supplementary planes break into surrogates.
2192 assert!(ch >= 0x1_0000_u32 && ch <= 0x10_FFFF_u32);
2194 let w1 = 0xD800_u16 | ((ch >> 10) as u16);
2195 let w2 = 0xDC00_u16 | ((ch as u16) & 0x3FF_u16);
2196 u.push_all([w1, w2])
2203 fn to_send_str(&self) -> SendStr {
2204 SendStrOwned(self.to_owned())
2208 fn is_char_boundary(&self, index: uint) -> bool {
2209 if index == self.len() { return true; }
2210 let b = self[index];
2211 return b < 128u8 || b >= 192u8;
2215 fn char_range_at(&self, i: uint) -> CharRange {
2216 if self[i] < 128u8 {
2217 return CharRange {ch: self[i] as char, next: i + 1 };
2220 // Multibyte case is a fn to allow char_range_at to inline cleanly
2221 fn multibyte_char_range_at(s: &str, i: uint) -> CharRange {
2222 let mut val = s[i] as uint;
2223 let w = UTF8_CHAR_WIDTH[val] as uint;
2226 val = utf8_first_byte!(val, w);
2227 val = utf8_acc_cont_byte!(val, s[i + 1]);
2228 if w > 2 { val = utf8_acc_cont_byte!(val, s[i + 2]); }
2229 if w > 3 { val = utf8_acc_cont_byte!(val, s[i + 3]); }
2231 return CharRange {ch: unsafe { transmute(val as u32) }, next: i + w};
2234 return multibyte_char_range_at(*self, i);
2238 fn char_at(&self, i: uint) -> char { self.char_range_at(i).ch }
2241 fn char_range_at_reverse(&self, start: uint) -> CharRange {
2242 let mut prev = start;
2244 prev = prev.saturating_sub(1);
2245 if self[prev] < 128 { return CharRange{ch: self[prev] as char, next: prev} }
2247 // Multibyte case is a fn to allow char_range_at_reverse to inline cleanly
2248 fn multibyte_char_range_at_reverse(s: &str, mut i: uint) -> CharRange {
2249 // while there is a previous byte == 10......
2250 while i > 0 && s[i] & 192u8 == TAG_CONT_U8 {
2254 let mut val = s[i] as uint;
2255 let w = UTF8_CHAR_WIDTH[val] as uint;
2258 val = utf8_first_byte!(val, w);
2259 val = utf8_acc_cont_byte!(val, s[i + 1]);
2260 if w > 2 { val = utf8_acc_cont_byte!(val, s[i + 2]); }
2261 if w > 3 { val = utf8_acc_cont_byte!(val, s[i + 3]); }
2263 return CharRange {ch: unsafe { transmute(val as u32) }, next: i};
2266 return multibyte_char_range_at_reverse(*self, prev);
2270 fn char_at(&self, i: uint) -> char {
2271 self.char_range_at(i).ch
2275 fn char_at_reverse(&self, i: uint) -> char {
2276 self.char_range_at_reverse(i).ch
2280 fn as_bytes(&self) -> &'a [u8] {
2281 unsafe { cast::transmute(*self) }
2284 fn find<C: CharEq>(&self, search: C) -> Option<uint> {
2285 if search.only_ascii() {
2286 self.bytes().position(|b| search.matches(b as char))
2288 for (index, c) in self.char_indices() {
2289 if search.matches(c) { return Some(index); }
2295 fn rfind<C: CharEq>(&self, search: C) -> Option<uint> {
2296 if search.only_ascii() {
2297 self.bytes().rposition(|b| search.matches(b as char))
2299 for (index, c) in self.char_indices_rev() {
2300 if search.matches(c) { return Some(index); }
2306 fn find_str(&self, needle: &str) -> Option<uint> {
2307 if needle.is_empty() {
2310 self.match_indices(needle)
2312 .map(|(start, _end)| start)
2316 fn repeat(&self, nn: uint) -> ~str {
2317 let mut ret = with_capacity(nn * self.len());
2318 for _ in range(0, nn) {
2319 ret.push_str(*self);
2325 fn slice_shift_char(&self) -> (char, &'a str) {
2326 let CharRange {ch, next} = self.char_range_at(0u);
2327 let next_s = unsafe { raw::slice_bytes(*self, next, self.len()) };
2328 return (ch, next_s);
2331 fn lev_distance(&self, t: &str) -> uint {
2332 let slen = self.len();
2335 if slen == 0 { return tlen; }
2336 if tlen == 0 { return slen; }
2338 let mut dcol = vec::from_fn(tlen + 1, |x| x);
2340 for (i, sc) in self.chars().enumerate() {
2342 let mut current = i;
2343 dcol[0] = current + 1;
2345 for (j, tc) in t.chars().enumerate() {
2347 let next = dcol[j + 1];
2350 dcol[j + 1] = current;
2352 dcol[j + 1] = ::cmp::min(current, next);
2353 dcol[j + 1] = ::cmp::min(dcol[j + 1], dcol[j]) + 1;
2363 fn subslice_offset(&self, inner: &str) -> uint {
2364 let a_start = self.as_ptr() as uint;
2365 let a_end = a_start + self.len();
2366 let b_start = inner.as_ptr() as uint;
2367 let b_end = b_start + inner.len();
2369 assert!(a_start <= b_start);
2370 assert!(b_end <= a_end);
2375 fn as_ptr(&self) -> *u8 {
2380 /// Methods for owned strings
2381 pub trait OwnedStr {
2382 /// Appends a string slice to the back of a string, without overallocating.
2383 fn push_str_no_overallocate(&mut self, rhs: &str);
2385 /// Appends a string slice to the back of a string
2386 fn push_str(&mut self, rhs: &str);
2388 /// Appends a character to the back of a string
2389 fn push_char(&mut self, c: char);
2391 /// Remove the final character from a string and return it
2395 /// If the string does not contain any characters
2396 fn pop_char(&mut self) -> char;
2398 /// Remove the first character from a string and return it
2402 /// If the string does not contain any characters
2403 fn shift_char(&mut self) -> char;
2405 /// Prepend a char to a string
2406 fn unshift_char(&mut self, ch: char);
2408 /// Insert a new sub-string at the given position in a string, in O(n + m) time
2409 /// (with n and m the lengths of the string and the substring.)
2410 /// This fails if `position` is not at a character boundary.
2411 fn insert(&mut self, position: uint, substring: &str);
2413 /// Insert a char at the given position in a string, in O(n + m) time
2414 /// (with n and m the lengths of the string and the substring.)
2415 /// This fails if `position` is not at a character boundary.
2416 fn insert_char(&mut self, position: uint, ch: char);
2418 /// Concatenate two strings together.
2419 fn append(self, rhs: &str) -> ~str;
2421 /// Reserves capacity for exactly `n` bytes in the given string.
2423 /// Assuming single-byte characters, the resulting string will be large
2424 /// enough to hold a string of length `n`.
2426 /// If the capacity for `s` is already equal to or greater than the requested
2427 /// capacity, then no action is taken.
2432 /// * n - The number of bytes to reserve space for
2433 fn reserve(&mut self, n: uint);
2435 /// Reserves capacity for at least `n` bytes in the given string.
2437 /// Assuming single-byte characters, the resulting string will be large
2438 /// enough to hold a string of length `n`.
2440 /// This function will over-allocate in order to amortize the allocation costs
2441 /// in scenarios where the caller may need to repeatedly reserve additional
2444 /// If the capacity for `s` is already equal to or greater than the requested
2445 /// capacity, then no action is taken.
2450 /// * n - The number of bytes to reserve space for
2451 fn reserve_at_least(&mut self, n: uint);
2453 /// Returns the number of single-byte characters the string can hold without
2455 fn capacity(&self) -> uint;
2457 /// Shorten a string to the specified length (which must be <= the current length)
2458 fn truncate(&mut self, len: uint);
2460 /// Consumes the string, returning the underlying byte buffer.
2462 /// The buffer does not have a null terminator.
2463 fn into_bytes(self) -> ~[u8];
2465 /// Sets the length of a string
2467 /// This will explicitly set the size of the string, without actually
2468 /// modifying its buffers, so it is up to the caller to ensure that
2469 /// the string is actually the specified size.
2470 unsafe fn set_len(&mut self, new_len: uint);
2473 impl OwnedStr for ~str {
2475 fn push_str_no_overallocate(&mut self, rhs: &str) {
2476 let new_cap = self.len() + rhs.len();
2477 self.reserve(new_cap);
2482 fn push_str(&mut self, rhs: &str) {
2484 raw::push_bytes(self, rhs.as_bytes());
2489 fn push_char(&mut self, c: char) {
2490 let cur_len = self.len();
2491 // may use up to 4 bytes.
2493 let v = raw::as_owned_vec(self);
2494 v.reserve_additional(4);
2496 // Attempt to not use an intermediate buffer by just pushing bytes
2497 // directly onto this string.
2498 let write_ptr = v.as_mut_ptr().offset(cur_len as int);
2499 let used = vec::raw::mut_buf_as_slice(write_ptr, 4, |slc| c.encode_utf8(slc));
2501 v.set_len(cur_len + used);
2506 fn pop_char(&mut self) -> char {
2507 let end = self.len();
2509 let CharRange {ch, next} = self.char_range_at_reverse(end);
2510 unsafe { self.set_len(next); }
2515 fn shift_char(&mut self) -> char {
2516 let CharRange {ch, next} = self.char_range_at(0u);
2517 *self = self.slice(next, self.len()).to_owned();
2522 fn unshift_char(&mut self, ch: char) {
2523 // This could be more efficient.
2524 let mut new_str = ~"";
2525 new_str.push_char(ch);
2526 new_str.push_str(*self);
2531 fn insert(&mut self, position: uint, substring: &str) {
2532 // This could be more efficient.
2533 let mut new_str = self.slice_to(position).to_owned();
2534 new_str.push_str(substring);
2535 new_str.push_str(self.slice_from(position));
2540 fn insert_char(&mut self, position: uint, ch: char) {
2541 // This could be more efficient.
2542 let mut new_str = self.slice_to(position).to_owned();
2543 new_str.push_char(ch);
2544 new_str.push_str(self.slice_from(position));
2549 fn append(self, rhs: &str) -> ~str {
2550 let mut new_str = self;
2551 new_str.push_str_no_overallocate(rhs);
2556 fn reserve(&mut self, n: uint) {
2558 raw::as_owned_vec(self).reserve(n)
2563 fn reserve_at_least(&mut self, n: uint) {
2564 self.reserve(checked_next_power_of_two(n).unwrap_or(n))
2568 fn capacity(&self) -> uint {
2570 let buf: &~[u8] = cast::transmute(self);
2576 fn truncate(&mut self, len: uint) {
2577 assert!(len <= self.len());
2578 assert!(self.is_char_boundary(len));
2579 unsafe { self.set_len(len); }
2583 fn into_bytes(self) -> ~[u8] {
2584 unsafe { cast::transmute(self) }
2588 unsafe fn set_len(&mut self, new_len: uint) {
2589 raw::as_owned_vec(self).set_len(new_len)
2593 impl Clone for ~str {
2595 fn clone(&self) -> ~str {
2600 impl DeepClone for ~str {
2602 fn deep_clone(&self) -> ~str {
2607 impl FromIterator<char> for ~str {
2609 fn from_iterator<T: Iterator<char>>(iterator: &mut T) -> ~str {
2610 let (lower, _) = iterator.size_hint();
2611 let mut buf = with_capacity(lower);
2612 buf.extend(iterator);
2617 impl Extendable<char> for ~str {
2619 fn extend<T: Iterator<char>>(&mut self, iterator: &mut T) {
2620 let (lower, _) = iterator.size_hint();
2621 let reserve = lower + self.len();
2622 self.reserve_at_least(reserve);
2623 for ch in *iterator {
2629 // This works because every lifetime is a sub-lifetime of 'static
2630 impl<'a> Default for &'a str {
2631 fn default() -> &'a str { "" }
2634 impl Default for ~str {
2635 fn default() -> ~str { ~"" }
2640 use iter::AdditiveIterator;
2644 use send_str::{SendStrOwned, SendStrStatic};
2648 assert!((eq(&~"", &~"")));
2649 assert!((eq(&~"foo", &~"foo")));
2650 assert!((!eq(&~"foo", &~"bar")));
2654 fn test_eq_slice() {
2655 assert!((eq_slice("foobar".slice(0, 3), "foo")));
2656 assert!((eq_slice("barfoo".slice(3, 6), "foo")));
2657 assert!((!eq_slice("foo1", "foo2")));
2663 assert!("" <= "foo");
2664 assert!("foo" <= "foo");
2665 assert!("foo" != "bar");
2670 assert_eq!("".len(), 0u);
2671 assert_eq!("hello world".len(), 11u);
2672 assert_eq!("\x63".len(), 1u);
2673 assert_eq!("\xa2".len(), 2u);
2674 assert_eq!("\u03c0".len(), 2u);
2675 assert_eq!("\u2620".len(), 3u);
2676 assert_eq!("\U0001d11e".len(), 4u);
2678 assert_eq!("".char_len(), 0u);
2679 assert_eq!("hello world".char_len(), 11u);
2680 assert_eq!("\x63".char_len(), 1u);
2681 assert_eq!("\xa2".char_len(), 1u);
2682 assert_eq!("\u03c0".char_len(), 1u);
2683 assert_eq!("\u2620".char_len(), 1u);
2684 assert_eq!("\U0001d11e".char_len(), 1u);
2685 assert_eq!("ประเทศไทย中华Việt Nam".char_len(), 19u);
2690 assert_eq!("hello".find('l'), Some(2u));
2691 assert_eq!("hello".find(|c:char| c == 'o'), Some(4u));
2692 assert!("hello".find('x').is_none());
2693 assert!("hello".find(|c:char| c == 'x').is_none());
2694 assert_eq!("ประเทศไทย中华Việt Nam".find('华'), Some(30u));
2695 assert_eq!("ประเทศไทย中华Việt Nam".find(|c: char| c == '华'), Some(30u));
2700 assert_eq!("hello".rfind('l'), Some(3u));
2701 assert_eq!("hello".rfind(|c:char| c == 'o'), Some(4u));
2702 assert!("hello".rfind('x').is_none());
2703 assert!("hello".rfind(|c:char| c == 'x').is_none());
2704 assert_eq!("ประเทศไทย中华Việt Nam".rfind('华'), Some(30u));
2705 assert_eq!("ประเทศไทย中华Việt Nam".rfind(|c: char| c == '华'), Some(30u));
2709 fn test_push_str() {
2712 assert_eq!(s.slice_from(0), "");
2714 assert_eq!(s.slice_from(0), "abc");
2715 s.push_str("ประเทศไทย中华Việt Nam");
2716 assert_eq!(s.slice_from(0), "abcประเทศไทย中华Việt Nam");
2723 assert_eq!(s.slice_from(0), "");
2724 s = s.append("abc");
2725 assert_eq!(s.slice_from(0), "abc");
2726 s = s.append("ประเทศไทย中华Việt Nam");
2727 assert_eq!(s.slice_from(0), "abcประเทศไทย中华Việt Nam");
2731 fn test_pop_char() {
2732 let mut data = ~"ประเทศไทย中华";
2733 let cc = data.pop_char();
2734 assert_eq!(~"ประเทศไทย中", data);
2735 assert_eq!('华', cc);
2739 fn test_pop_char_2() {
2740 let mut data2 = ~"华";
2741 let cc2 = data2.pop_char();
2742 assert_eq!(~"", data2);
2743 assert_eq!('华', cc2);
2748 fn test_pop_char_fail() {
2750 let _cc3 = data.pop_char();
2754 fn test_push_char() {
2755 let mut data = ~"ประเทศไทย中";
2756 data.push_char('华');
2757 data.push_char('b'); // 1 byte
2758 data.push_char('¢'); // 2 byte
2759 data.push_char('€'); // 3 byte
2760 data.push_char('𤭢'); // 4 byte
2761 assert_eq!(~"ประเทศไทย中华b¢€𤭢", data);
2765 fn test_shift_char() {
2766 let mut data = ~"ประเทศไทย中";
2767 let cc = data.shift_char();
2768 assert_eq!(~"ระเทศไทย中", data);
2769 assert_eq!('ป', cc);
2773 fn test_unshift_char() {
2774 let mut data = ~"ประเทศไทย中";
2775 data.unshift_char('华');
2776 assert_eq!(~"华ประเทศไทย中", data);
2780 fn test_insert_char() {
2781 let mut data = ~"ประเทศไทย中";
2782 data.insert_char(15, '华');
2783 assert_eq!(~"ประเท华ศไทย中", data);
2788 let mut data = ~"ประเทศไทย中";
2789 data.insert(15, "华中");
2790 assert_eq!(~"ประเท华中ศไทย中", data);
2796 let s: ~str = empty.chars().collect();
2797 assert_eq!(empty, s);
2798 let data = ~"ประเทศไทย中";
2799 let s: ~str = data.chars().collect();
2800 assert_eq!(data, s);
2805 let data = ~"ประเทศไทย中";
2806 let mut cpy = data.clone();
2808 let mut it = other.chars();
2809 cpy.extend(&mut it);
2810 assert_eq!(cpy, data + other);
2815 let mut empty = ~"";
2817 assert_eq!("", empty.as_slice());
2818 let mut data = ~"ประเทศไทย中";
2820 assert_eq!("", data.as_slice());
2821 data.push_char('华');
2822 assert_eq!("华", data.as_slice());
2826 fn test_into_bytes() {
2828 let buf = data.into_bytes();
2829 assert_eq!(bytes!("asdf"), buf.as_slice());
2833 fn test_find_str() {
2835 assert_eq!("".find_str(""), Some(0u));
2836 assert!("banana".find_str("apple pie").is_none());
2838 let data = "abcabc";
2839 assert_eq!(data.slice(0u, 6u).find_str("ab"), Some(0u));
2840 assert_eq!(data.slice(2u, 6u).find_str("ab"), Some(3u - 2u));
2841 assert!(data.slice(2u, 4u).find_str("ab").is_none());
2843 let mut data = ~"ประเทศไทย中华Việt Nam";
2845 assert!(data.find_str("ไท华").is_none());
2846 assert_eq!(data.slice(0u, 43u).find_str(""), Some(0u));
2847 assert_eq!(data.slice(6u, 43u).find_str(""), Some(6u - 6u));
2849 assert_eq!(data.slice(0u, 43u).find_str("ประ"), Some( 0u));
2850 assert_eq!(data.slice(0u, 43u).find_str("ทศไ"), Some(12u));
2851 assert_eq!(data.slice(0u, 43u).find_str("ย中"), Some(24u));
2852 assert_eq!(data.slice(0u, 43u).find_str("iệt"), Some(34u));
2853 assert_eq!(data.slice(0u, 43u).find_str("Nam"), Some(40u));
2855 assert_eq!(data.slice(43u, 86u).find_str("ประ"), Some(43u - 43u));
2856 assert_eq!(data.slice(43u, 86u).find_str("ทศไ"), Some(55u - 43u));
2857 assert_eq!(data.slice(43u, 86u).find_str("ย中"), Some(67u - 43u));
2858 assert_eq!(data.slice(43u, 86u).find_str("iệt"), Some(77u - 43u));
2859 assert_eq!(data.slice(43u, 86u).find_str("Nam"), Some(83u - 43u));
2863 fn test_slice_chars() {
2864 fn t(a: &str, b: &str, start: uint) {
2865 assert_eq!(a.slice_chars(start, start + b.char_len()), b);
2868 t("hello", "llo", 2);
2869 t("hello", "el", 1);
2872 assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".slice_chars(2, 8));
2877 fn t(v: &[~str], s: &str) {
2878 assert_eq!(v.concat(), s.to_str());
2880 t([~"you", ~"know", ~"I'm", ~"no", ~"good"], "youknowI'mnogood");
2881 let v: &[~str] = [];
2888 fn t(v: &[~str], sep: &str, s: &str) {
2889 assert_eq!(v.connect(sep), s.to_str());
2891 t([~"you", ~"know", ~"I'm", ~"no", ~"good"],
2892 " ", "you know I'm no good");
2893 let v: &[~str] = [];
2895 t([~"hi"], " ", "hi");
2899 fn test_concat_slices() {
2900 fn t(v: &[&str], s: &str) {
2901 assert_eq!(v.concat(), s.to_str());
2903 t(["you", "know", "I'm", "no", "good"], "youknowI'mnogood");
2904 let v: &[&str] = [];
2910 fn test_connect_slices() {
2911 fn t(v: &[&str], sep: &str, s: &str) {
2912 assert_eq!(v.connect(sep), s.to_str());
2914 t(["you", "know", "I'm", "no", "good"],
2915 " ", "you know I'm no good");
2917 t(["hi"], " ", "hi");
2922 assert_eq!("x".repeat(4), ~"xxxx");
2923 assert_eq!("hi".repeat(4), ~"hihihihi");
2924 assert_eq!("ไท华".repeat(3), ~"ไท华ไท华ไท华");
2925 assert_eq!("".repeat(4), ~"");
2926 assert_eq!("hi".repeat(0), ~"");
2930 fn test_unsafe_slice() {
2931 assert_eq!("ab", unsafe {raw::slice_bytes("abc", 0, 2)});
2932 assert_eq!("bc", unsafe {raw::slice_bytes("abc", 1, 3)});
2933 assert_eq!("", unsafe {raw::slice_bytes("abc", 1, 1)});
2934 fn a_million_letter_a() -> ~str {
2937 while i < 100000 { rs.push_str("aaaaaaaaaa"); i += 1; }
2940 fn half_a_million_letter_a() -> ~str {
2943 while i < 100000 { rs.push_str("aaaaa"); i += 1; }
2946 let letters = a_million_letter_a();
2947 assert!(half_a_million_letter_a() ==
2948 unsafe {raw::slice_bytes(letters, 0u, 500000)}.to_owned());
2952 fn test_starts_with() {
2953 assert!(("".starts_with("")));
2954 assert!(("abc".starts_with("")));
2955 assert!(("abc".starts_with("a")));
2956 assert!((!"a".starts_with("abc")));
2957 assert!((!"".starts_with("abc")));
2958 assert!((!"ödd".starts_with("-")));
2959 assert!(("ödd".starts_with("öd")));
2963 fn test_ends_with() {
2964 assert!(("".ends_with("")));
2965 assert!(("abc".ends_with("")));
2966 assert!(("abc".ends_with("c")));
2967 assert!((!"a".ends_with("abc")));
2968 assert!((!"".ends_with("abc")));
2969 assert!((!"ddö".ends_with("-")));
2970 assert!(("ddö".ends_with("dö")));
2974 fn test_is_empty() {
2975 assert!("".is_empty());
2976 assert!(!"a".is_empty());
2982 assert_eq!("".replace(a, "b"), ~"");
2983 assert_eq!("a".replace(a, "b"), ~"b");
2984 assert_eq!("ab".replace(a, "b"), ~"bb");
2986 assert!(" test test ".replace(test, "toast") ==
2988 assert_eq!(" test test ".replace(test, ""), ~" ");
2992 fn test_replace_2a() {
2993 let data = ~"ประเทศไทย中华";
2994 let repl = ~"دولة الكويت";
2997 let A = ~"دولة الكويتทศไทย中华";
2998 assert_eq!(data.replace(a, repl), A);
3002 fn test_replace_2b() {
3003 let data = ~"ประเทศไทย中华";
3004 let repl = ~"دولة الكويت";
3007 let B = ~"ปรدولة الكويتทศไทย中华";
3008 assert_eq!(data.replace(b, repl), B);
3012 fn test_replace_2c() {
3013 let data = ~"ประเทศไทย中华";
3014 let repl = ~"دولة الكويت";
3017 let C = ~"ประเทศไทยدولة الكويت";
3018 assert_eq!(data.replace(c, repl), C);
3022 fn test_replace_2d() {
3023 let data = ~"ประเทศไทย中华";
3024 let repl = ~"دولة الكويت";
3027 assert_eq!(data.replace(d, repl), data);
3032 assert_eq!("ab", "abc".slice(0, 2));
3033 assert_eq!("bc", "abc".slice(1, 3));
3034 assert_eq!("", "abc".slice(1, 1));
3035 assert_eq!("\u65e5", "\u65e5\u672c".slice(0, 3));
3037 let data = "ประเทศไทย中华";
3038 assert_eq!("ป", data.slice(0, 3));
3039 assert_eq!("ร", data.slice(3, 6));
3040 assert_eq!("", data.slice(3, 3));
3041 assert_eq!("华", data.slice(30, 33));
3043 fn a_million_letter_X() -> ~str {
3047 push_str(&mut rs, "华华华华华华华华华华");
3052 fn half_a_million_letter_X() -> ~str {
3055 while i < 100000 { push_str(&mut rs, "华华华华华"); i += 1; }
3058 let letters = a_million_letter_X();
3059 assert!(half_a_million_letter_X() ==
3060 letters.slice(0u, 3u * 500000u).to_owned());
3065 let ss = "中华Việt Nam";
3067 assert_eq!("华", ss.slice(3u, 6u));
3068 assert_eq!("Việt Nam", ss.slice(6u, 16u));
3070 assert_eq!("ab", "abc".slice(0u, 2u));
3071 assert_eq!("bc", "abc".slice(1u, 3u));
3072 assert_eq!("", "abc".slice(1u, 1u));
3074 assert_eq!("中", ss.slice(0u, 3u));
3075 assert_eq!("华V", ss.slice(3u, 7u));
3076 assert_eq!("", ss.slice(3u, 3u));
3091 fn test_slice_fail() {
3092 "中华Việt Nam".slice(0u, 2u);
3096 fn test_slice_from() {
3097 assert_eq!("abcd".slice_from(0), "abcd");
3098 assert_eq!("abcd".slice_from(2), "cd");
3099 assert_eq!("abcd".slice_from(4), "");
3102 fn test_slice_to() {
3103 assert_eq!("abcd".slice_to(0), "");
3104 assert_eq!("abcd".slice_to(2), "ab");
3105 assert_eq!("abcd".slice_to(4), "abcd");
3109 fn test_trim_left_chars() {
3110 let v: &[char] = &[];
3111 assert_eq!(" *** foo *** ".trim_left_chars(&v), " *** foo *** ");
3112 assert_eq!(" *** foo *** ".trim_left_chars(& &['*', ' ']), "foo *** ");
3113 assert_eq!(" *** *** ".trim_left_chars(& &['*', ' ']), "");
3114 assert_eq!("foo *** ".trim_left_chars(& &['*', ' ']), "foo *** ");
3116 assert_eq!("11foo1bar11".trim_left_chars(&'1'), "foo1bar11");
3117 assert_eq!("12foo1bar12".trim_left_chars(& &['1', '2']), "foo1bar12");
3118 assert_eq!("123foo1bar123".trim_left_chars(&|c: char| c.is_digit()), "foo1bar123");
3122 fn test_trim_right_chars() {
3123 let v: &[char] = &[];
3124 assert_eq!(" *** foo *** ".trim_right_chars(&v), " *** foo *** ");
3125 assert_eq!(" *** foo *** ".trim_right_chars(& &['*', ' ']), " *** foo");
3126 assert_eq!(" *** *** ".trim_right_chars(& &['*', ' ']), "");
3127 assert_eq!(" *** foo".trim_right_chars(& &['*', ' ']), " *** foo");
3129 assert_eq!("11foo1bar11".trim_right_chars(&'1'), "11foo1bar");
3130 assert_eq!("12foo1bar12".trim_right_chars(& &['1', '2']), "12foo1bar");
3131 assert_eq!("123foo1bar123".trim_right_chars(&|c: char| c.is_digit()), "123foo1bar");
3135 fn test_trim_chars() {
3136 let v: &[char] = &[];
3137 assert_eq!(" *** foo *** ".trim_chars(&v), " *** foo *** ");
3138 assert_eq!(" *** foo *** ".trim_chars(& &['*', ' ']), "foo");
3139 assert_eq!(" *** *** ".trim_chars(& &['*', ' ']), "");
3140 assert_eq!("foo".trim_chars(& &['*', ' ']), "foo");
3142 assert_eq!("11foo1bar11".trim_chars(&'1'), "foo1bar");
3143 assert_eq!("12foo1bar12".trim_chars(& &['1', '2']), "foo1bar");
3144 assert_eq!("123foo1bar123".trim_chars(&|c: char| c.is_digit()), "foo1bar");
3148 fn test_trim_left() {
3149 assert_eq!("".trim_left(), "");
3150 assert_eq!("a".trim_left(), "a");
3151 assert_eq!(" ".trim_left(), "");
3152 assert_eq!(" blah".trim_left(), "blah");
3153 assert_eq!(" \u3000 wut".trim_left(), "wut");
3154 assert_eq!("hey ".trim_left(), "hey ");
3158 fn test_trim_right() {
3159 assert_eq!("".trim_right(), "");
3160 assert_eq!("a".trim_right(), "a");
3161 assert_eq!(" ".trim_right(), "");
3162 assert_eq!("blah ".trim_right(), "blah");
3163 assert_eq!("wut \u3000 ".trim_right(), "wut");
3164 assert_eq!(" hey".trim_right(), " hey");
3169 assert_eq!("".trim(), "");
3170 assert_eq!("a".trim(), "a");
3171 assert_eq!(" ".trim(), "");
3172 assert_eq!(" blah ".trim(), "blah");
3173 assert_eq!("\nwut \u3000 ".trim(), "wut");
3174 assert_eq!(" hey dude ".trim(), "hey dude");
3178 fn test_is_whitespace() {
3179 assert!("".is_whitespace());
3180 assert!(" ".is_whitespace());
3181 assert!("\u2009".is_whitespace()); // Thin space
3182 assert!(" \n\t ".is_whitespace());
3183 assert!(!" _ ".is_whitespace());
3187 fn test_push_byte() {
3189 unsafe{raw::push_byte(&mut s, 'D' as u8)};
3190 assert_eq!(s, ~"ABCD");
3194 fn test_shift_byte() {
3196 let b = unsafe{raw::shift_byte(&mut s)};
3197 assert_eq!(s, ~"BC");
3198 assert_eq!(b, 65u8);
3202 fn test_pop_byte() {
3204 let b = unsafe{raw::pop_byte(&mut s)};
3205 assert_eq!(s, ~"AB");
3206 assert_eq!(b, 67u8);
3211 // deny overlong encodings
3212 assert!(!is_utf8([0xc0, 0x80]));
3213 assert!(!is_utf8([0xc0, 0xae]));
3214 assert!(!is_utf8([0xe0, 0x80, 0x80]));
3215 assert!(!is_utf8([0xe0, 0x80, 0xaf]));
3216 assert!(!is_utf8([0xe0, 0x81, 0x81]));
3217 assert!(!is_utf8([0xf0, 0x82, 0x82, 0xac]));
3218 assert!(!is_utf8([0xf4, 0x90, 0x80, 0x80]));
3221 assert!(!is_utf8([0xED, 0xA0, 0x80]));
3222 assert!(!is_utf8([0xED, 0xBF, 0xBF]));
3224 assert!(is_utf8([0xC2, 0x80]));
3225 assert!(is_utf8([0xDF, 0xBF]));
3226 assert!(is_utf8([0xE0, 0xA0, 0x80]));
3227 assert!(is_utf8([0xED, 0x9F, 0xBF]));
3228 assert!(is_utf8([0xEE, 0x80, 0x80]));
3229 assert!(is_utf8([0xEF, 0xBF, 0xBF]));
3230 assert!(is_utf8([0xF0, 0x90, 0x80, 0x80]));
3231 assert!(is_utf8([0xF4, 0x8F, 0xBF, 0xBF]));
3235 fn test_raw_from_c_str() {
3237 let a = ~[65, 65, 65, 65, 65, 65, 65, 0];
3239 let c = raw::from_c_str(b);
3240 assert_eq!(c, ~"AAAAAAA");
3245 fn test_as_bytes() {
3248 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
3249 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
3252 assert_eq!("".as_bytes(), &[]);
3253 assert_eq!("abc".as_bytes(), &['a' as u8, 'b' as u8, 'c' as u8]);
3254 assert_eq!("ศไทย中华Việt Nam".as_bytes(), v);
3259 fn test_as_bytes_fail() {
3260 // Don't double free. (I'm not sure if this exercises the
3261 // original problem code path anymore.)
3263 let _bytes = s.as_bytes();
3269 let buf = "hello".as_ptr();
3271 assert_eq!(*ptr::offset(buf, 0), 'h' as u8);
3272 assert_eq!(*ptr::offset(buf, 1), 'e' as u8);
3273 assert_eq!(*ptr::offset(buf, 2), 'l' as u8);
3274 assert_eq!(*ptr::offset(buf, 3), 'l' as u8);
3275 assert_eq!(*ptr::offset(buf, 4), 'o' as u8);
3280 fn test_subslice_offset() {
3281 let a = "kernelsprite";
3282 let b = a.slice(7, a.len());
3283 let c = a.slice(0, a.len() - 6);
3284 assert_eq!(a.subslice_offset(b), 7);
3285 assert_eq!(a.subslice_offset(c), 0);
3287 let string = "a\nb\nc";
3288 let mut lines = ~[];
3289 for line in string.lines() { lines.push(line) }
3290 assert_eq!(string.subslice_offset(lines[0]), 0);
3291 assert_eq!(string.subslice_offset(lines[1]), 2);
3292 assert_eq!(string.subslice_offset(lines[2]), 4);
3297 fn test_subslice_offset_2() {
3298 let a = "alchemiter";
3299 let b = "cruxtruder";
3300 a.subslice_offset(b);
3304 fn vec_str_conversions() {
3305 let s1: ~str = ~"All mimsy were the borogoves";
3307 let v: ~[u8] = s1.as_bytes().to_owned();
3308 let s2: ~str = from_utf8(v).unwrap().to_owned();
3309 let mut i: uint = 0u;
3310 let n1: uint = s1.len();
3311 let n2: uint = v.len();
3324 fn test_contains() {
3325 assert!("abcde".contains("bcd"));
3326 assert!("abcde".contains("abcd"));
3327 assert!("abcde".contains("bcde"));
3328 assert!("abcde".contains(""));
3329 assert!("".contains(""));
3330 assert!(!"abcde".contains("def"));
3331 assert!(!"".contains("a"));
3333 let data = ~"ประเทศไทย中华Việt Nam";
3334 assert!(data.contains("ประเ"));
3335 assert!(data.contains("ะเ"));
3336 assert!(data.contains("中华"));
3337 assert!(!data.contains("ไท华"));
3341 fn test_contains_char() {
3342 assert!("abc".contains_char('b'));
3343 assert!("a".contains_char('a'));
3344 assert!(!"abc".contains_char('d'));
3345 assert!(!"".contains_char('a'));
3352 ~[0xd800_u16, 0xdf45_u16, 0xd800_u16, 0xdf3f_u16,
3353 0xd800_u16, 0xdf3b_u16, 0xd800_u16, 0xdf46_u16,
3354 0xd800_u16, 0xdf39_u16, 0xd800_u16, 0xdf3b_u16,
3355 0xd800_u16, 0xdf30_u16, 0x000a_u16]),
3358 ~[0xd801_u16, 0xdc12_u16, 0xd801_u16,
3359 0xdc49_u16, 0xd801_u16, 0xdc2e_u16, 0xd801_u16,
3360 0xdc40_u16, 0xd801_u16, 0xdc32_u16, 0xd801_u16,
3361 0xdc4b_u16, 0x0020_u16, 0xd801_u16, 0xdc0f_u16,
3362 0xd801_u16, 0xdc32_u16, 0xd801_u16, 0xdc4d_u16,
3365 (~"𐌀𐌖𐌋𐌄𐌑𐌉·𐌌𐌄𐌕𐌄𐌋𐌉𐌑\n",
3366 ~[0xd800_u16, 0xdf00_u16, 0xd800_u16, 0xdf16_u16,
3367 0xd800_u16, 0xdf0b_u16, 0xd800_u16, 0xdf04_u16,
3368 0xd800_u16, 0xdf11_u16, 0xd800_u16, 0xdf09_u16,
3369 0x00b7_u16, 0xd800_u16, 0xdf0c_u16, 0xd800_u16,
3370 0xdf04_u16, 0xd800_u16, 0xdf15_u16, 0xd800_u16,
3371 0xdf04_u16, 0xd800_u16, 0xdf0b_u16, 0xd800_u16,
3372 0xdf09_u16, 0xd800_u16, 0xdf11_u16, 0x000a_u16 ]),
3374 (~"𐒋𐒘𐒈𐒑𐒛𐒒 𐒕𐒓 𐒈𐒚𐒍 𐒏𐒜𐒒𐒖𐒆 𐒕𐒆\n",
3375 ~[0xd801_u16, 0xdc8b_u16, 0xd801_u16, 0xdc98_u16,
3376 0xd801_u16, 0xdc88_u16, 0xd801_u16, 0xdc91_u16,
3377 0xd801_u16, 0xdc9b_u16, 0xd801_u16, 0xdc92_u16,
3378 0x0020_u16, 0xd801_u16, 0xdc95_u16, 0xd801_u16,
3379 0xdc93_u16, 0x0020_u16, 0xd801_u16, 0xdc88_u16,
3380 0xd801_u16, 0xdc9a_u16, 0xd801_u16, 0xdc8d_u16,
3381 0x0020_u16, 0xd801_u16, 0xdc8f_u16, 0xd801_u16,
3382 0xdc9c_u16, 0xd801_u16, 0xdc92_u16, 0xd801_u16,
3383 0xdc96_u16, 0xd801_u16, 0xdc86_u16, 0x0020_u16,
3384 0xd801_u16, 0xdc95_u16, 0xd801_u16, 0xdc86_u16,
3387 for p in pairs.iter() {
3388 let (s, u) = (*p).clone();
3389 assert!(s.to_utf16() == u);
3390 assert!(from_utf16(u) == s);
3391 assert!(from_utf16(s.to_utf16()) == s);
3392 assert!(from_utf16(u).to_utf16() == u);
3398 let s = ~"ศไทย中华Việt Nam";
3399 let v = ~['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
3401 for ch in v.iter() {
3402 assert!(s.char_at(pos) == *ch);
3403 pos += from_char(*ch).len();
3408 fn test_char_at_reverse() {
3409 let s = ~"ศไทย中华Việt Nam";
3410 let v = ~['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
3411 let mut pos = s.len();
3412 for ch in v.rev_iter() {
3413 assert!(s.char_at_reverse(pos) == *ch);
3414 pos -= from_char(*ch).len();
3419 fn test_escape_unicode() {
3420 assert_eq!("abc".escape_unicode(), ~"\\x61\\x62\\x63");
3421 assert_eq!("a c".escape_unicode(), ~"\\x61\\x20\\x63");
3422 assert_eq!("\r\n\t".escape_unicode(), ~"\\x0d\\x0a\\x09");
3423 assert_eq!("'\"\\".escape_unicode(), ~"\\x27\\x22\\x5c");
3424 assert_eq!("\x00\x01\xfe\xff".escape_unicode(), ~"\\x00\\x01\\xfe\\xff");
3425 assert_eq!("\u0100\uffff".escape_unicode(), ~"\\u0100\\uffff");
3426 assert_eq!("\U00010000\U0010ffff".escape_unicode(), ~"\\U00010000\\U0010ffff");
3427 assert_eq!("ab\ufb00".escape_unicode(), ~"\\x61\\x62\\ufb00");
3428 assert_eq!("\U0001d4ea\r".escape_unicode(), ~"\\U0001d4ea\\x0d");
3432 fn test_escape_default() {
3433 assert_eq!("abc".escape_default(), ~"abc");
3434 assert_eq!("a c".escape_default(), ~"a c");
3435 assert_eq!("\r\n\t".escape_default(), ~"\\r\\n\\t");
3436 assert_eq!("'\"\\".escape_default(), ~"\\'\\\"\\\\");
3437 assert_eq!("\u0100\uffff".escape_default(), ~"\\u0100\\uffff");
3438 assert_eq!("\U00010000\U0010ffff".escape_default(), ~"\\U00010000\\U0010ffff");
3439 assert_eq!("ab\ufb00".escape_default(), ~"ab\\ufb00");
3440 assert_eq!("\U0001d4ea\r".escape_default(), ~"\\U0001d4ea\\r");
3444 fn test_total_ord() {
3445 "1234".cmp(& &"123") == Greater;
3446 "123".cmp(& &"1234") == Less;
3447 "1234".cmp(& &"1234") == Equal;
3448 "12345555".cmp(& &"123456") == Less;
3449 "22".cmp(& &"1234") == Greater;
3453 fn test_char_range_at() {
3454 let data = ~"b¢€𤭢𤭢€¢b";
3455 assert_eq!('b', data.char_range_at(0).ch);
3456 assert_eq!('¢', data.char_range_at(1).ch);
3457 assert_eq!('€', data.char_range_at(3).ch);
3458 assert_eq!('𤭢', data.char_range_at(6).ch);
3459 assert_eq!('𤭢', data.char_range_at(10).ch);
3460 assert_eq!('€', data.char_range_at(14).ch);
3461 assert_eq!('¢', data.char_range_at(17).ch);
3462 assert_eq!('b', data.char_range_at(19).ch);
3466 fn test_char_range_at_reverse_underflow() {
3467 assert_eq!("abc".char_range_at_reverse(0).next, 0);
3472 #[allow(unnecessary_allocation)];
3474 ($s1:expr, $s2:expr, $e:expr) => { {
3478 assert_eq!(s1 + s2, e.to_owned());
3479 assert_eq!(s1.to_owned() + s2, e.to_owned());
3483 t!("foo", "bar", "foobar");
3484 t!("foo", ~"bar", "foobar");
3485 t!("ศไทย中", "华Việt Nam", "ศไทย中华Việt Nam");
3486 t!("ศไทย中", ~"华Việt Nam", "ศไทย中华Việt Nam");
3490 fn test_iterator() {
3492 let s = ~"ศไทย中华Việt Nam";
3493 let v = ~['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
3496 let mut it = s.chars();
3499 assert_eq!(c, v[pos]);
3502 assert_eq!(pos, v.len());
3506 fn test_rev_iterator() {
3508 let s = ~"ศไทย中华Việt Nam";
3509 let v = ~['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
3512 let mut it = s.chars_rev();
3515 assert_eq!(c, v[pos]);
3518 assert_eq!(pos, v.len());
3522 fn test_iterator_clone() {
3523 let s = "ศไทย中华Việt Nam";
3524 let mut it = s.chars();
3526 assert!(it.zip(it.clone()).all(|(x,y)| x == y));
3530 fn test_bytesator() {
3531 let s = ~"ศไทย中华Việt Nam";
3533 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
3534 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
3539 for b in s.bytes() {
3540 assert_eq!(b, v[pos]);
3546 fn test_bytes_revator() {
3547 let s = ~"ศไทย中华Việt Nam";
3549 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
3550 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
3553 let mut pos = v.len();
3555 for b in s.bytes_rev() {
3557 assert_eq!(b, v[pos]);
3562 fn test_char_indicesator() {
3564 let s = "ศไทย中华Việt Nam";
3565 let p = [0, 3, 6, 9, 12, 15, 18, 19, 20, 23, 24, 25, 26, 27];
3566 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
3569 let mut it = s.char_indices();
3572 assert_eq!(c, (p[pos], v[pos]));
3575 assert_eq!(pos, v.len());
3576 assert_eq!(pos, p.len());
3580 fn test_char_indices_revator() {
3582 let s = "ศไทย中华Việt Nam";
3583 let p = [27, 26, 25, 24, 23, 20, 19, 18, 15, 12, 9, 6, 3, 0];
3584 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
3587 let mut it = s.char_indices_rev();
3590 assert_eq!(c, (p[pos], v[pos]));
3593 assert_eq!(pos, v.len());
3594 assert_eq!(pos, p.len());
3598 fn test_split_char_iterator() {
3599 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
3601 let split: ~[&str] = data.split(' ').collect();
3602 assert_eq!( split, ~["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
3604 let mut rsplit: ~[&str] = data.rsplit(' ').collect();
3606 assert_eq!(rsplit, ~["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
3608 let split: ~[&str] = data.split(|c: char| c == ' ').collect();
3609 assert_eq!( split, ~["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
3611 let mut rsplit: ~[&str] = data.rsplit(|c: char| c == ' ').collect();
3613 assert_eq!(rsplit, ~["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
3616 let split: ~[&str] = data.split('ä').collect();
3617 assert_eq!( split, ~["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
3619 let mut rsplit: ~[&str] = data.rsplit('ä').collect();
3621 assert_eq!(rsplit, ~["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
3623 let split: ~[&str] = data.split(|c: char| c == 'ä').collect();
3624 assert_eq!( split, ~["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
3626 let mut rsplit: ~[&str] = data.rsplit(|c: char| c == 'ä').collect();
3628 assert_eq!(rsplit, ~["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
3632 fn test_splitn_char_iterator() {
3633 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
3635 let split: ~[&str] = data.splitn(' ', 3).collect();
3636 assert_eq!(split, ~["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
3638 let split: ~[&str] = data.splitn(|c: char| c == ' ', 3).collect();
3639 assert_eq!(split, ~["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
3642 let split: ~[&str] = data.splitn('ä', 3).collect();
3643 assert_eq!(split, ~["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
3645 let split: ~[&str] = data.splitn(|c: char| c == 'ä', 3).collect();
3646 assert_eq!(split, ~["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
3650 fn test_rsplitn_char_iterator() {
3651 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
3653 let mut split: ~[&str] = data.rsplitn(' ', 3).collect();
3655 assert_eq!(split, ~["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]);
3657 let mut split: ~[&str] = data.rsplitn(|c: char| c == ' ', 3).collect();
3659 assert_eq!(split, ~["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]);
3662 let mut split: ~[&str] = data.rsplitn('ä', 3).collect();
3664 assert_eq!(split, ~["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]);
3666 let mut split: ~[&str] = data.rsplitn(|c: char| c == 'ä', 3).collect();
3668 assert_eq!(split, ~["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]);
3672 fn test_split_char_iterator_no_trailing() {
3673 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
3675 let split: ~[&str] = data.split('\n').collect();
3676 assert_eq!(split, ~["", "Märy häd ä little lämb", "Little lämb", ""]);
3678 let split: ~[&str] = data.split_terminator('\n').collect();
3679 assert_eq!(split, ~["", "Märy häd ä little lämb", "Little lämb"]);
3683 fn test_rev_split_char_iterator_no_trailing() {
3684 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
3686 let mut split: ~[&str] = data.split('\n').rev().collect();
3688 assert_eq!(split, ~["", "Märy häd ä little lämb", "Little lämb", ""]);
3690 let mut split: ~[&str] = data.split_terminator('\n').rev().collect();
3692 assert_eq!(split, ~["", "Märy häd ä little lämb", "Little lämb"]);
3697 let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n";
3698 let words: ~[&str] = data.words().collect();
3699 assert_eq!(words, ~["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
3703 fn test_nfd_chars() {
3704 assert_eq!("abc".nfd_chars().collect::<~str>(), ~"abc");
3705 assert_eq!("\u1e0b\u01c4".nfd_chars().collect::<~str>(), ~"d\u0307\u01c4");
3706 assert_eq!("\u2026".nfd_chars().collect::<~str>(), ~"\u2026");
3707 assert_eq!("\u2126".nfd_chars().collect::<~str>(), ~"\u03a9");
3708 assert_eq!("\u1e0b\u0323".nfd_chars().collect::<~str>(), ~"d\u0323\u0307");
3709 assert_eq!("\u1e0d\u0307".nfd_chars().collect::<~str>(), ~"d\u0323\u0307");
3710 assert_eq!("a\u0301".nfd_chars().collect::<~str>(), ~"a\u0301");
3711 assert_eq!("\u0301a".nfd_chars().collect::<~str>(), ~"\u0301a");
3712 assert_eq!("\ud4db".nfd_chars().collect::<~str>(), ~"\u1111\u1171\u11b6");
3713 assert_eq!("\uac1c".nfd_chars().collect::<~str>(), ~"\u1100\u1162");
3717 fn test_nfkd_chars() {
3718 assert_eq!("abc".nfkd_chars().collect::<~str>(), ~"abc");
3719 assert_eq!("\u1e0b\u01c4".nfkd_chars().collect::<~str>(), ~"d\u0307DZ\u030c");
3720 assert_eq!("\u2026".nfkd_chars().collect::<~str>(), ~"...");
3721 assert_eq!("\u2126".nfkd_chars().collect::<~str>(), ~"\u03a9");
3722 assert_eq!("\u1e0b\u0323".nfkd_chars().collect::<~str>(), ~"d\u0323\u0307");
3723 assert_eq!("\u1e0d\u0307".nfkd_chars().collect::<~str>(), ~"d\u0323\u0307");
3724 assert_eq!("a\u0301".nfkd_chars().collect::<~str>(), ~"a\u0301");
3725 assert_eq!("\u0301a".nfkd_chars().collect::<~str>(), ~"\u0301a");
3726 assert_eq!("\ud4db".nfkd_chars().collect::<~str>(), ~"\u1111\u1171\u11b6");
3727 assert_eq!("\uac1c".nfkd_chars().collect::<~str>(), ~"\u1100\u1162");
3732 let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
3733 let lines: ~[&str] = data.lines().collect();
3734 assert_eq!(lines, ~["", "Märy häd ä little lämb", "", "Little lämb"]);
3736 let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
3737 let lines: ~[&str] = data.lines().collect();
3738 assert_eq!(lines, ~["", "Märy häd ä little lämb", "", "Little lämb"]);
3742 fn test_split_strator() {
3743 fn t<'a>(s: &str, sep: &'a str, u: ~[&str]) {
3744 let v: ~[&str] = s.split_str(sep).collect();
3747 t("--1233345--", "12345", ~["--1233345--"]);
3748 t("abc::hello::there", "::", ~["abc", "hello", "there"]);
3749 t("::hello::there", "::", ~["", "hello", "there"]);
3750 t("hello::there::", "::", ~["hello", "there", ""]);
3751 t("::hello::there::", "::", ~["", "hello", "there", ""]);
3752 t("ประเทศไทย中华Việt Nam", "中华", ~["ประเทศไทย", "Việt Nam"]);
3753 t("zzXXXzzYYYzz", "zz", ~["", "XXX", "YYY", ""]);
3754 t("zzXXXzYYYz", "XXX", ~["zz", "zYYYz"]);
3755 t(".XXX.YYY.", ".", ~["", "XXX", "YYY", ""]);
3757 t("zz", "zz", ~["",""]);
3758 t("ok", "z", ~["ok"]);
3759 t("zzz", "zz", ~["","z"]);
3760 t("zzzzz", "zz", ~["","","z"]);
3764 fn test_str_default() {
3765 use default::Default;
3766 fn t<S: Default + Str>() {
3767 let s: S = Default::default();
3768 assert_eq!(s.as_slice(), "");
3776 fn test_str_container() {
3777 fn sum_len<S: Container>(v: &[S]) -> uint {
3778 v.iter().map(|x| x.len()).sum()
3782 assert_eq!(5, sum_len(["012", "", "34"]));
3783 assert_eq!(5, sum_len([~"01", ~"2", ~"34", ~""]));
3784 assert_eq!(5, sum_len([s.as_slice()]));
3788 fn test_str_truncate() {
3789 let mut s = ~"12345";
3791 assert_eq!(s.as_slice(), "12345");
3793 assert_eq!(s.as_slice(), "123");
3795 assert_eq!(s.as_slice(), "");
3797 let mut s = ~"12345";
3801 let p_ = s.as_ptr();
3807 fn test_str_truncate_invalid_len() {
3808 let mut s = ~"12345";
3814 fn test_str_truncate_split_codepoint() {
3815 let mut s = ~"\u00FC"; // ü
3820 fn test_str_from_utf8() {
3821 let xs = bytes!("hello");
3822 assert_eq!(from_utf8(xs), Some("hello"));
3824 let xs = bytes!("ศไทย中华Việt Nam");
3825 assert_eq!(from_utf8(xs), Some("ศไทย中华Việt Nam"));
3827 let xs = bytes!("hello", 0xff);
3828 assert_eq!(from_utf8(xs), None);
3832 fn test_str_from_utf8_owned() {
3833 let xs = bytes!("hello").to_owned();
3834 assert_eq!(from_utf8_owned(xs), Some(~"hello"));
3836 let xs = bytes!("ศไทย中华Việt Nam").to_owned();
3837 assert_eq!(from_utf8_owned(xs), Some(~"ศไทย中华Việt Nam"));
3839 let xs = bytes!("hello", 0xff).to_owned();
3840 assert_eq!(from_utf8_owned(xs), None);
3844 fn test_to_send_str() {
3845 assert_eq!("abcde".to_send_str(), SendStrStatic("abcde"));
3846 assert_eq!("abcde".to_send_str(), SendStrOwned(~"abcde"));
3850 fn test_from_str() {
3851 let owned: Option<~str> = from_str(&"string");
3852 assert_eq!(owned, Some(~"string"));
3858 use extra::test::BenchHarness;
3863 fn char_iterator(bh: &mut BenchHarness) {
3864 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
3865 let len = s.char_len();
3867 bh.iter(|| assert_eq!(s.chars().len(), len));
3871 fn char_iterator_ascii(bh: &mut BenchHarness) {
3872 let s = "Mary had a little lamb, Little lamb
3873 Mary had a little lamb, Little lamb
3874 Mary had a little lamb, Little lamb
3875 Mary had a little lamb, Little lamb
3876 Mary had a little lamb, Little lamb
3877 Mary had a little lamb, Little lamb";
3878 let len = s.char_len();
3880 bh.iter(|| assert_eq!(s.chars().len(), len));
3884 fn char_iterator_rev(bh: &mut BenchHarness) {
3885 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
3886 let len = s.char_len();
3888 bh.iter(|| assert_eq!(s.chars_rev().len(), len));
3892 fn char_indicesator(bh: &mut BenchHarness) {
3893 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
3894 let len = s.char_len();
3896 bh.iter(|| assert_eq!(s.char_indices().len(), len));
3900 fn char_indicesator_rev(bh: &mut BenchHarness) {
3901 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
3902 let len = s.char_len();
3904 bh.iter(|| assert_eq!(s.char_indices_rev().len(), len));
3908 fn split_unicode_ascii(bh: &mut BenchHarness) {
3909 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
3911 bh.iter(|| assert_eq!(s.split('V').len(), 3));
3915 fn split_unicode_not_ascii(bh: &mut BenchHarness) {
3916 struct NotAscii(char);
3917 impl CharEq for NotAscii {
3918 fn matches(&self, c: char) -> bool {
3919 let NotAscii(cc) = *self;
3922 fn only_ascii(&self) -> bool { false }
3924 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
3926 bh.iter(|| assert_eq!(s.split(NotAscii('V')).len(), 3));
3931 fn split_ascii(bh: &mut BenchHarness) {
3932 let s = "Mary had a little lamb, Little lamb, little-lamb.";
3933 let len = s.split(' ').len();
3935 bh.iter(|| assert_eq!(s.split(' ').len(), len));
3939 fn split_not_ascii(bh: &mut BenchHarness) {
3940 struct NotAscii(char);
3941 impl CharEq for NotAscii {
3943 fn matches(&self, c: char) -> bool {
3944 let NotAscii(cc) = *self;
3947 fn only_ascii(&self) -> bool { false }
3949 let s = "Mary had a little lamb, Little lamb, little-lamb.";
3950 let len = s.split(' ').len();
3952 bh.iter(|| assert_eq!(s.split(NotAscii(' ')).len(), len));
3956 fn split_extern_fn(bh: &mut BenchHarness) {
3957 let s = "Mary had a little lamb, Little lamb, little-lamb.";
3958 let len = s.split(' ').len();
3959 fn pred(c: char) -> bool { c == ' ' }
3961 bh.iter(|| assert_eq!(s.split(pred).len(), len));
3965 fn split_closure(bh: &mut BenchHarness) {
3966 let s = "Mary had a little lamb, Little lamb, little-lamb.";
3967 let len = s.split(' ').len();
3969 bh.iter(|| assert_eq!(s.split(|c: char| c == ' ').len(), len));
3973 fn split_slice(bh: &mut BenchHarness) {
3974 let s = "Mary had a little lamb, Little lamb, little-lamb.";
3975 let len = s.split(' ').len();
3977 bh.iter(|| assert_eq!(s.split(&[' ']).len(), len));
3981 fn is_utf8_100_ascii(bh: &mut BenchHarness) {
3983 let s = bytes!("Hello there, the quick brown fox jumped over the lazy dog! \
3984 Lorem ipsum dolor sit amet, consectetur. ");
3986 assert_eq!(100, s.len());
3993 fn is_utf8_100_multibyte(bh: &mut BenchHarness) {
3994 let s = bytes!("𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰");
3995 assert_eq!(100, s.len());
4002 fn bench_with_capacity(bh: &mut BenchHarness) {
4004 let _ = with_capacity(100);
4009 fn bench_push_str(bh: &mut BenchHarness) {
4010 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
4018 fn bench_connect(bh: &mut BenchHarness) {
4019 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
4021 let v = [s, s, s, s, s, s, s, s, s, s];
4023 assert_eq!(v.connect(sep).len(), s.len() * 10 + sep.len() * 9);