1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
13 Unicode string manipulation (`str` type)
17 Rust's string type is one of the core primitive types of the language. While
18 represented by the name `str`, the name `str` is not actually a valid type in
19 Rust. Each string must also be decorated with its ownership. This means that
20 there are two common kinds of strings in Rust:
22 * `~str` - This is an owned string. This type obeys all of the normal semantics
23 of the `Box<T>` types, meaning that it has one, and only one,
24 owner. This type cannot be implicitly copied, and is moved out of
25 when passed to other functions.
27 * `&str` - This is the borrowed string type. This type of string can only be
28 created from the other kind of string. As the name "borrowed"
29 implies, this type of string is owned elsewhere, and this string
30 cannot be moved out of.
32 As an example, here's a few different kinds of strings.
36 let owned_string = "I am an owned string".to_owned();
37 let borrowed_string1 = "This string is borrowed with the 'static lifetime";
38 let borrowed_string2: &str = owned_string; // owned strings can be borrowed
42 From the example above, you can see that Rust has 2 different kinds of string
43 literals. The owned literals correspond to the owned string types, but the
44 "borrowed literal" is actually more akin to C's concept of a static string.
46 When a string is declared without a `~` sigil, then the string is allocated
47 statically in the rodata of the executable/library. The string then has the
48 type `&'static str` meaning that the string is valid for the `'static`
49 lifetime, otherwise known as the lifetime of the entire program. As can be
50 inferred from the type, these static strings are not mutable.
54 Many languages have immutable strings by default, and Rust has a particular
55 flavor on this idea. As with the rest of Rust types, strings are immutable by
56 default. If a string is declared as `mut`, however, it may be mutated. This
57 works the same way as the rest of Rust's type system in the sense that if
58 there's a mutable reference to a string, there may only be one mutable reference
59 to that string. With these guarantees, strings can easily transition between
60 being mutable/immutable with the same benefits of having mutable strings in
65 Rust's string type, `str`, is a sequence of unicode codepoints encoded as a
66 stream of UTF-8 bytes. All safely-created strings are guaranteed to be validly
67 encoded UTF-8 sequences. Additionally, strings are not null-terminated
68 and can contain null codepoints.
70 The actual representation of strings have direct mappings to vectors:
72 * `~str` is the same as `~[u8]`
73 * `&str` is the same as `&[u8]`
80 use cmp::{Eq, TotalEq, Ord, TotalOrd, Equiv, Ordering};
81 use container::Container;
84 use from_str::FromStr;
86 use iter::{Iterator, range, AdditiveIterator};
89 use option::{None, Option, Some};
90 use result::{Result, Ok, Err};
92 use slice::{ImmutableVector, MutableVector, CloneableVector};
96 pub use core::str::{from_utf8, CharEq, Chars, CharOffsets, RevChars};
97 pub use core::str::{RevCharOffsets, Bytes, RevBytes, CharSplits, RevCharSplits};
98 pub use core::str::{CharSplitsN, Words, AnyLines, MatchIndices, StrSplits};
99 pub use core::str::{eq_slice, eq, is_utf8, is_utf16, UTF16Items};
100 pub use core::str::{UTF16Item, ScalarValue, LoneSurrogate, utf16_items};
101 pub use core::str::{truncate_utf16_at_nul, utf8_char_width, CharRange};
102 pub use core::str::{Str, StrSlice};
105 Section: Creating a string
108 /// Consumes a vector of bytes to create a new utf-8 string.
110 /// Returns `Err` with the original vector if the vector contains invalid
112 pub fn from_utf8_owned(vv: ~[u8]) -> Result<~str, ~[u8]> {
114 Ok(unsafe { raw::from_utf8_owned(vv) })
120 impl FromStr for ~str {
122 fn from_str(s: &str) -> Option<~str> { Some(s.to_owned()) }
125 /// Convert a byte to a UTF-8 string
129 /// Fails if invalid UTF-8
130 pub fn from_byte(b: u8) -> ~str {
132 unsafe { ::mem::transmute(box [b]) }
135 /// Convert a char to a string
136 pub fn from_char(ch: char) -> ~str {
137 let mut buf = StrBuf::new();
142 /// Convert a vector of chars to a string
143 pub fn from_chars(chs: &[char]) -> ~str {
144 chs.iter().map(|c| *c).collect()
147 /// Methods for vectors of strings
148 pub trait StrVector {
149 /// Concatenate a vector of strings.
150 fn concat(&self) -> ~str;
152 /// Concatenate a vector of strings, placing a given separator between each.
153 fn connect(&self, sep: &str) -> ~str;
156 impl<'a, S: Str> StrVector for &'a [S] {
157 fn concat(&self) -> ~str {
158 if self.is_empty() { return "".to_owned(); }
160 // `len` calculation may overflow but push_str but will check boundaries
161 let len = self.iter().map(|s| s.as_slice().len()).sum();
163 let mut result = StrBuf::with_capacity(len);
165 for s in self.iter() {
166 result.push_str(s.as_slice())
172 fn connect(&self, sep: &str) -> ~str {
173 if self.is_empty() { return "".to_owned(); }
176 if sep.is_empty() { return self.concat(); }
178 // this is wrong without the guarantee that `self` is non-empty
179 // `len` calculation may overflow but push_str but will check boundaries
180 let len = sep.len() * (self.len() - 1)
181 + self.iter().map(|s| s.as_slice().len()).sum();
182 let mut result = StrBuf::with_capacity(len);
183 let mut first = true;
185 for s in self.iter() {
189 result.push_str(sep);
191 result.push_str(s.as_slice());
197 impl<'a, S: Str> StrVector for Vec<S> {
199 fn concat(&self) -> ~str {
200 self.as_slice().concat()
204 fn connect(&self, sep: &str) -> ~str {
205 self.as_slice().connect(sep)
213 // Helper functions used for Unicode normalization
214 fn canonical_sort(comb: &mut [(char, u8)]) {
218 let len = comb.len();
219 for i in range(0, len) {
220 let mut swapped = false;
221 for j in range(1, len-i) {
222 let class_a = *comb[j-1].ref1();
223 let class_b = *comb[j].ref1();
224 if class_a != 0 && class_b != 0 && class_a > class_b {
229 if !swapped { break; }
234 enum DecompositionType {
239 /// External iterator for a string's decomposition's characters.
240 /// Use with the `std::iter` module.
242 pub struct Decompositions<'a> {
243 kind: DecompositionType,
245 buffer: Vec<(char, u8)>,
249 impl<'a> Iterator<char> for Decompositions<'a> {
251 fn next(&mut self) -> Option<char> {
252 use unicode::normalization::canonical_combining_class;
254 match self.buffer.as_slice().head() {
260 Some(&(c, _)) if self.sorted => {
264 _ => self.sorted = false
267 let decomposer = match self.kind {
268 Canonical => char::decompose_canonical,
269 Compatible => char::decompose_compatible
273 for ch in self.iter {
274 let buffer = &mut self.buffer;
275 let sorted = &mut self.sorted;
277 let class = canonical_combining_class(d);
278 if class == 0 && !*sorted {
279 canonical_sort(buffer.as_mut_slice());
282 buffer.push((d, class));
289 canonical_sort(self.buffer.as_mut_slice());
293 match self.buffer.shift() {
298 Some((c, _)) => Some(c),
303 fn size_hint(&self) -> (uint, Option<uint>) {
304 let (lower, _) = self.iter.size_hint();
309 /// Replace all occurrences of one string with another
313 /// * s - The string containing substrings to replace
314 /// * from - The string to replace
315 /// * to - The replacement string
319 /// The original string with all occurrences of `from` replaced with `to`
320 pub fn replace(s: &str, from: &str, to: &str) -> ~str {
321 let mut result = StrBuf::new();
322 let mut last_end = 0;
323 for (start, end) in s.match_indices(from) {
324 result.push_str(unsafe{raw::slice_bytes(s, last_end, start)});
328 result.push_str(unsafe{raw::slice_bytes(s, last_end, s.len())});
336 /// Decode a UTF-16 encoded vector `v` into a string, returning `None`
337 /// if `v` contains any invalid data.
345 /// let mut v = [0xD834, 0xDD1E, 0x006d, 0x0075,
346 /// 0x0073, 0x0069, 0x0063];
347 /// assert_eq!(str::from_utf16(v), Some("𝄞music".to_owned()));
349 /// // 𝄞mu<invalid>ic
351 /// assert_eq!(str::from_utf16(v), None);
353 pub fn from_utf16(v: &[u16]) -> Option<~str> {
354 let mut s = StrBuf::with_capacity(v.len() / 2);
355 for c in utf16_items(v) {
357 ScalarValue(c) => s.push_char(c),
358 LoneSurrogate(_) => return None
364 /// Decode a UTF-16 encoded vector `v` into a string, replacing
365 /// invalid data with the replacement character (U+FFFD).
371 /// // 𝄞mus<invalid>ic<invalid>
372 /// let v = [0xD834, 0xDD1E, 0x006d, 0x0075,
373 /// 0x0073, 0xDD1E, 0x0069, 0x0063,
376 /// assert_eq!(str::from_utf16_lossy(v),
377 /// "𝄞mus\uFFFDic\uFFFD".to_owned());
379 pub fn from_utf16_lossy(v: &[u16]) -> ~str {
380 utf16_items(v).map(|c| c.to_char_lossy()).collect()
383 // Return the initial codepoint accumulator for the first byte.
384 // The first byte is special, only want bottom 5 bits for width 2, 4 bits
385 // for width 3, and 3 bits for width 4
386 macro_rules! utf8_first_byte(
387 ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as u32)
390 // return the value of $ch updated with continuation byte $byte
391 macro_rules! utf8_acc_cont_byte(
392 ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as u32)
395 static TAG_CONT_U8: u8 = 128u8;
397 /// Converts a vector of bytes to a new utf-8 string.
398 /// Any invalid utf-8 sequences are replaced with U+FFFD REPLACEMENT CHARACTER.
403 /// let input = bytes!("Hello ", 0xF0, 0x90, 0x80, "World");
404 /// let output = std::str::from_utf8_lossy(input);
405 /// assert_eq!(output.as_slice(), "Hello \uFFFDWorld");
407 pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> MaybeOwned<'a> {
409 return Slice(unsafe { mem::transmute(v) })
412 static REPLACEMENT: &'static [u8] = bytes!(0xEF, 0xBF, 0xBD); // U+FFFD in UTF-8
415 fn unsafe_get(xs: &[u8], i: uint) -> u8 {
416 unsafe { *xs.unsafe_ref(i) }
418 fn safe_get(xs: &[u8], i: uint, total: uint) -> u8 {
426 let mut res = StrBuf::with_capacity(total);
430 res.push_bytes(v.slice_to(i))
434 // subseqidx is the index of the first byte of the subsequence we're looking at.
435 // It's used to copy a bunch of contiguous good codepoints at once instead of copying
437 let mut subseqidx = 0;
441 let byte = unsafe_get(v, i);
444 macro_rules! error(() => ({
447 res.push_bytes(v.slice(subseqidx, i_));
450 res.push_bytes(REPLACEMENT);
455 // subseqidx handles this
457 let w = utf8_char_width(byte);
461 if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
468 match (byte, safe_get(v, i, total)) {
469 (0xE0 , 0xA0 .. 0xBF) => (),
470 (0xE1 .. 0xEC, 0x80 .. 0xBF) => (),
471 (0xED , 0x80 .. 0x9F) => (),
472 (0xEE .. 0xEF, 0x80 .. 0xBF) => (),
479 if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
486 match (byte, safe_get(v, i, total)) {
487 (0xF0 , 0x90 .. 0xBF) => (),
488 (0xF1 .. 0xF3, 0x80 .. 0xBF) => (),
489 (0xF4 , 0x80 .. 0x8F) => (),
496 if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
501 if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
514 if subseqidx < total {
516 res.push_bytes(v.slice(subseqidx, total))
519 Owned(res.into_owned())
526 /// A MaybeOwned is a string that can hold either a ~str or a &str.
527 /// This can be useful as an optimization when an allocation is sometimes
528 /// needed but not always.
529 pub enum MaybeOwned<'a> {
530 /// A borrowed string
536 /// SendStr is a specialization of `MaybeOwned` to be sendable
537 pub type SendStr = MaybeOwned<'static>;
539 impl<'a> MaybeOwned<'a> {
540 /// Returns `true` if this `MaybeOwned` wraps an owned string
542 pub fn is_owned(&self) -> bool {
549 /// Returns `true` if this `MaybeOwned` wraps a borrowed string
551 pub fn is_slice(&self) -> bool {
559 /// Trait for moving into a `MaybeOwned`
560 pub trait IntoMaybeOwned<'a> {
561 /// Moves self into a `MaybeOwned`
562 fn into_maybe_owned(self) -> MaybeOwned<'a>;
565 impl<'a> IntoMaybeOwned<'a> for ~str {
567 fn into_maybe_owned(self) -> MaybeOwned<'a> { Owned(self) }
570 impl<'a> IntoMaybeOwned<'a> for StrBuf {
572 fn into_maybe_owned(self) -> MaybeOwned<'a> { Owned(self.into_owned()) }
575 impl<'a> IntoMaybeOwned<'a> for &'a str {
577 fn into_maybe_owned(self) -> MaybeOwned<'a> { Slice(self) }
580 impl<'a> IntoMaybeOwned<'a> for MaybeOwned<'a> {
582 fn into_maybe_owned(self) -> MaybeOwned<'a> { self }
585 impl<'a> Eq for MaybeOwned<'a> {
587 fn eq(&self, other: &MaybeOwned) -> bool {
588 self.as_slice() == other.as_slice()
592 impl<'a> TotalEq for MaybeOwned<'a> {}
594 impl<'a> Ord for MaybeOwned<'a> {
596 fn lt(&self, other: &MaybeOwned) -> bool {
597 self.as_slice().lt(&other.as_slice())
601 impl<'a> TotalOrd for MaybeOwned<'a> {
603 fn cmp(&self, other: &MaybeOwned) -> Ordering {
604 self.as_slice().cmp(&other.as_slice())
608 impl<'a, S: Str> Equiv<S> for MaybeOwned<'a> {
610 fn equiv(&self, other: &S) -> bool {
611 self.as_slice() == other.as_slice()
615 impl<'a> Str for MaybeOwned<'a> {
617 fn as_slice<'b>(&'b self) -> &'b str {
620 Owned(ref s) => s.as_slice()
625 impl<'a> StrAllocating for MaybeOwned<'a> {
627 fn into_owned(self) -> ~str {
629 Slice(s) => s.to_owned(),
635 impl<'a> Container for MaybeOwned<'a> {
637 fn len(&self) -> uint { self.as_slice().len() }
640 impl<'a> Clone for MaybeOwned<'a> {
642 fn clone(&self) -> MaybeOwned<'a> {
644 Slice(s) => Slice(s),
645 Owned(ref s) => Owned(s.to_owned())
650 impl<'a> Default for MaybeOwned<'a> {
652 fn default() -> MaybeOwned<'a> { Slice("") }
655 impl<'a, H: Writer> ::hash::Hash<H> for MaybeOwned<'a> {
657 fn hash(&self, hasher: &mut H) {
659 Slice(s) => s.hash(hasher),
660 Owned(ref s) => s.hash(hasher),
665 impl<'a> fmt::Show for MaybeOwned<'a> {
667 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
669 Slice(ref s) => s.fmt(f),
670 Owned(ref s) => s.fmt(f)
675 /// Unsafe operations
681 use slice::CloneableVector;
682 use str::{is_utf8, StrAllocating};
684 pub use core::str::raw::{from_utf8, c_str_to_static_slice, slice_bytes};
685 pub use core::str::raw::{slice_unchecked};
687 /// Create a Rust string from a *u8 buffer of the given length
688 pub unsafe fn from_buf_len(buf: *u8, len: uint) -> ~str {
689 let v = Slice { data: buf, len: len };
690 let bytes: &[u8] = ::mem::transmute(v);
691 assert!(is_utf8(bytes));
692 let s: &str = ::mem::transmute(bytes);
696 #[lang="strdup_uniq"]
699 unsafe fn strdup_uniq(ptr: *u8, len: uint) -> ~str {
700 from_buf_len(ptr, len)
703 /// Create a Rust string from a null-terminated C string
704 pub unsafe fn from_c_str(buf: *libc::c_char) -> ~str {
709 curr = buf.offset(i);
711 from_buf_len(buf as *u8, i as uint)
714 /// Converts an owned vector of bytes to a new owned string. This assumes
715 /// that the utf-8-ness of the vector has already been validated
717 pub unsafe fn from_utf8_owned(v: ~[u8]) -> ~str {
721 /// Converts a byte to a string.
722 pub unsafe fn from_byte(u: u8) -> ~str { from_utf8_owned(box [u]) }
724 /// Access the str in its vector representation.
725 /// The caller must preserve the valid UTF-8 property when modifying.
727 pub unsafe fn as_owned_vec<'a>(s: &'a mut ~str) -> &'a mut ~[u8] {
731 /// Sets the length of a string
733 /// This will explicitly set the size of the string, without actually
734 /// modifying its buffers, so it is up to the caller to ensure that
735 /// the string is actually the specified size.
737 fn test_from_buf_len() {
738 use slice::ImmutableVector;
739 use str::StrAllocating;
742 let a = ~[65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8];
744 let c = from_buf_len(b, 3u);
745 assert_eq!(c, "AAA".to_owned());
751 Section: Trait implementations
754 /// Any string that can be represented as a slice
755 pub trait StrAllocating: Str {
756 /// Convert `self` into a ~str, not making a copy if possible.
757 fn into_owned(self) -> ~str;
759 /// Convert `self` into a `StrBuf`.
761 fn to_strbuf(&self) -> StrBuf {
762 StrBuf::from_str(self.as_slice())
765 /// Convert `self` into a `StrBuf`, not making a copy if possible.
767 fn into_strbuf(self) -> StrBuf {
768 StrBuf::from_owned_str(self.into_owned())
771 /// Escape each char in `s` with `char::escape_default`.
772 fn escape_default(&self) -> ~str {
773 let me = self.as_slice();
774 let mut out = StrBuf::with_capacity(me.len());
775 for c in me.chars() {
776 c.escape_default(|c| out.push_char(c));
781 /// Escape each char in `s` with `char::escape_unicode`.
782 fn escape_unicode(&self) -> ~str {
783 let me = self.as_slice();
784 let mut out = StrBuf::with_capacity(me.len());
785 for c in me.chars() {
786 c.escape_unicode(|c| out.push_char(c));
791 /// Replace all occurrences of one string with another.
795 /// * `from` - The string to replace
796 /// * `to` - The replacement string
800 /// The original string with all occurrences of `from` replaced with `to`.
805 /// let s = "Do you know the muffin man,
806 /// The muffin man, the muffin man, ...".to_owned();
808 /// assert_eq!(s.replace("muffin man", "little lamb"),
809 /// "Do you know the little lamb,
810 /// The little lamb, the little lamb, ...".to_owned());
812 /// // not found, so no change.
813 /// assert_eq!(s.replace("cookie monster", "little lamb"), s);
815 fn replace(&self, from: &str, to: &str) -> ~str {
816 let me = self.as_slice();
817 let mut result = StrBuf::new();
818 let mut last_end = 0;
819 for (start, end) in me.match_indices(from) {
820 result.push_str(unsafe{raw::slice_bytes(me, last_end, start)});
824 result.push_str(unsafe{raw::slice_bytes(me, last_end, me.len())});
828 /// Copy a slice into a new owned str.
830 fn to_owned(&self) -> ~str {
834 ::mem::transmute(self.as_slice().as_bytes().to_owned())
838 /// Converts to a vector of `u16` encoded as UTF-16.
839 fn to_utf16(&self) -> Vec<u16> {
840 let me = self.as_slice();
841 let mut u = Vec::new();
842 for ch in me.chars() {
843 let mut buf = [0u16, ..2];
844 let n = ch.encode_utf16(buf /* as mut slice! */);
845 u.push_all(buf.slice_to(n));
850 /// Given a string, make a new string with repeated copies of it.
851 fn repeat(&self, nn: uint) -> ~str {
852 let me = self.as_slice();
853 let mut ret = StrBuf::with_capacity(nn * me.len());
854 for _ in range(0, nn) {
860 /// Levenshtein Distance between two strings.
861 fn lev_distance(&self, t: &str) -> uint {
862 let me = self.as_slice();
866 if slen == 0 { return tlen; }
867 if tlen == 0 { return slen; }
869 let mut dcol = Vec::from_fn(tlen + 1, |x| x);
871 for (i, sc) in me.chars().enumerate() {
874 *dcol.get_mut(0) = current + 1;
876 for (j, tc) in t.chars().enumerate() {
878 let next = *dcol.get(j + 1);
881 *dcol.get_mut(j + 1) = current;
883 *dcol.get_mut(j + 1) = ::cmp::min(current, next);
884 *dcol.get_mut(j + 1) = ::cmp::min(*dcol.get(j + 1),
892 return *dcol.get(tlen);
895 /// An Iterator over the string in Unicode Normalization Form D
896 /// (canonical decomposition).
898 fn nfd_chars<'a>(&'a self) -> Decompositions<'a> {
900 iter: self.as_slice().chars(),
907 /// An Iterator over the string in Unicode Normalization Form KD
908 /// (compatibility decomposition).
910 fn nfkd_chars<'a>(&'a self) -> Decompositions<'a> {
912 iter: self.as_slice().chars(),
920 impl<'a> StrAllocating for &'a str {
922 fn into_owned(self) -> ~str { self.to_owned() }
925 impl<'a> StrAllocating for ~str {
927 fn into_owned(self) -> ~str { self }
930 /// Methods for owned strings
932 /// Consumes the string, returning the underlying byte buffer.
934 /// The buffer does not have a null terminator.
935 fn into_bytes(self) -> ~[u8];
937 /// Pushes the given string onto this string, returning the concatenation of the two strings.
938 fn append(self, rhs: &str) -> ~str;
941 impl OwnedStr for ~str {
943 fn into_bytes(self) -> ~[u8] {
944 unsafe { mem::transmute(self) }
948 fn append(self, rhs: &str) -> ~str {
949 let mut new_str = StrBuf::from_owned_str(self);
950 new_str.push_str(rhs);
957 use iter::AdditiveIterator;
958 use default::Default;
965 assert!((eq(&"".to_owned(), &"".to_owned())));
966 assert!((eq(&"foo".to_owned(), &"foo".to_owned())));
967 assert!((!eq(&"foo".to_owned(), &"bar".to_owned())));
972 assert!((eq_slice("foobar".slice(0, 3), "foo")));
973 assert!((eq_slice("barfoo".slice(3, 6), "foo")));
974 assert!((!eq_slice("foo1", "foo2")));
980 assert!("" <= "foo");
981 assert!("foo" <= "foo");
982 assert!("foo" != "bar");
987 assert_eq!("".len(), 0u);
988 assert_eq!("hello world".len(), 11u);
989 assert_eq!("\x63".len(), 1u);
990 assert_eq!("\xa2".len(), 2u);
991 assert_eq!("\u03c0".len(), 2u);
992 assert_eq!("\u2620".len(), 3u);
993 assert_eq!("\U0001d11e".len(), 4u);
995 assert_eq!("".char_len(), 0u);
996 assert_eq!("hello world".char_len(), 11u);
997 assert_eq!("\x63".char_len(), 1u);
998 assert_eq!("\xa2".char_len(), 1u);
999 assert_eq!("\u03c0".char_len(), 1u);
1000 assert_eq!("\u2620".char_len(), 1u);
1001 assert_eq!("\U0001d11e".char_len(), 1u);
1002 assert_eq!("ประเทศไทย中华Việt Nam".char_len(), 19u);
1007 assert_eq!("hello".find('l'), Some(2u));
1008 assert_eq!("hello".find(|c:char| c == 'o'), Some(4u));
1009 assert!("hello".find('x').is_none());
1010 assert!("hello".find(|c:char| c == 'x').is_none());
1011 assert_eq!("ประเทศไทย中华Việt Nam".find('华'), Some(30u));
1012 assert_eq!("ประเทศไทย中华Việt Nam".find(|c: char| c == '华'), Some(30u));
1017 assert_eq!("hello".rfind('l'), Some(3u));
1018 assert_eq!("hello".rfind(|c:char| c == 'o'), Some(4u));
1019 assert!("hello".rfind('x').is_none());
1020 assert!("hello".rfind(|c:char| c == 'x').is_none());
1021 assert_eq!("ประเทศไทย中华Việt Nam".rfind('华'), Some(30u));
1022 assert_eq!("ประเทศไทย中华Việt Nam".rfind(|c: char| c == '华'), Some(30u));
1027 let empty = "".to_owned();
1028 let s: ~str = empty.chars().collect();
1029 assert_eq!(empty, s);
1030 let data = "ประเทศไทย中".to_owned();
1031 let s: ~str = data.chars().collect();
1032 assert_eq!(data, s);
1036 fn test_into_bytes() {
1037 let data = "asdf".to_owned();
1038 let buf = data.into_bytes();
1039 assert_eq!(bytes!("asdf"), buf.as_slice());
1043 fn test_find_str() {
1045 assert_eq!("".find_str(""), Some(0u));
1046 assert!("banana".find_str("apple pie").is_none());
1048 let data = "abcabc";
1049 assert_eq!(data.slice(0u, 6u).find_str("ab"), Some(0u));
1050 assert_eq!(data.slice(2u, 6u).find_str("ab"), Some(3u - 2u));
1051 assert!(data.slice(2u, 4u).find_str("ab").is_none());
1053 let mut data = "ประเทศไทย中华Việt Nam".to_owned();
1055 assert!(data.find_str("ไท华").is_none());
1056 assert_eq!(data.slice(0u, 43u).find_str(""), Some(0u));
1057 assert_eq!(data.slice(6u, 43u).find_str(""), Some(6u - 6u));
1059 assert_eq!(data.slice(0u, 43u).find_str("ประ"), Some( 0u));
1060 assert_eq!(data.slice(0u, 43u).find_str("ทศไ"), Some(12u));
1061 assert_eq!(data.slice(0u, 43u).find_str("ย中"), Some(24u));
1062 assert_eq!(data.slice(0u, 43u).find_str("iệt"), Some(34u));
1063 assert_eq!(data.slice(0u, 43u).find_str("Nam"), Some(40u));
1065 assert_eq!(data.slice(43u, 86u).find_str("ประ"), Some(43u - 43u));
1066 assert_eq!(data.slice(43u, 86u).find_str("ทศไ"), Some(55u - 43u));
1067 assert_eq!(data.slice(43u, 86u).find_str("ย中"), Some(67u - 43u));
1068 assert_eq!(data.slice(43u, 86u).find_str("iệt"), Some(77u - 43u));
1069 assert_eq!(data.slice(43u, 86u).find_str("Nam"), Some(83u - 43u));
1073 fn test_slice_chars() {
1074 fn t(a: &str, b: &str, start: uint) {
1075 assert_eq!(a.slice_chars(start, start + b.char_len()), b);
1078 t("hello", "llo", 2);
1079 t("hello", "el", 1);
1082 assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".slice_chars(2, 8));
1087 fn t(v: &[~str], s: &str) {
1088 assert_eq!(v.concat(), s.to_str());
1090 t(["you".to_owned(), "know".to_owned(), "I'm".to_owned(),
1091 "no".to_owned(), "good".to_owned()], "youknowI'mnogood");
1092 let v: &[~str] = [];
1094 t(["hi".to_owned()], "hi");
1099 fn t(v: &[~str], sep: &str, s: &str) {
1100 assert_eq!(v.connect(sep), s.to_str());
1102 t(["you".to_owned(), "know".to_owned(), "I'm".to_owned(),
1103 "no".to_owned(), "good".to_owned()],
1104 " ", "you know I'm no good");
1105 let v: &[~str] = [];
1107 t(["hi".to_owned()], " ", "hi");
1111 fn test_concat_slices() {
1112 fn t(v: &[&str], s: &str) {
1113 assert_eq!(v.concat(), s.to_str());
1115 t(["you", "know", "I'm", "no", "good"], "youknowI'mnogood");
1116 let v: &[&str] = [];
1122 fn test_connect_slices() {
1123 fn t(v: &[&str], sep: &str, s: &str) {
1124 assert_eq!(v.connect(sep), s.to_str());
1126 t(["you", "know", "I'm", "no", "good"],
1127 " ", "you know I'm no good");
1129 t(["hi"], " ", "hi");
1134 assert_eq!("x".repeat(4), "xxxx".to_owned());
1135 assert_eq!("hi".repeat(4), "hihihihi".to_owned());
1136 assert_eq!("ไท华".repeat(3), "ไท华ไท华ไท华".to_owned());
1137 assert_eq!("".repeat(4), "".to_owned());
1138 assert_eq!("hi".repeat(0), "".to_owned());
1142 fn test_unsafe_slice() {
1143 assert_eq!("ab", unsafe {raw::slice_bytes("abc", 0, 2)});
1144 assert_eq!("bc", unsafe {raw::slice_bytes("abc", 1, 3)});
1145 assert_eq!("", unsafe {raw::slice_bytes("abc", 1, 1)});
1146 fn a_million_letter_a() -> ~str {
1148 let mut rs = StrBuf::new();
1150 rs.push_str("aaaaaaaaaa");
1155 fn half_a_million_letter_a() -> ~str {
1157 let mut rs = StrBuf::new();
1159 rs.push_str("aaaaa");
1164 let letters = a_million_letter_a();
1165 assert!(half_a_million_letter_a() ==
1166 unsafe {raw::slice_bytes(letters, 0u, 500000)}.to_owned());
1170 fn test_starts_with() {
1171 assert!(("".starts_with("")));
1172 assert!(("abc".starts_with("")));
1173 assert!(("abc".starts_with("a")));
1174 assert!((!"a".starts_with("abc")));
1175 assert!((!"".starts_with("abc")));
1176 assert!((!"ödd".starts_with("-")));
1177 assert!(("ödd".starts_with("öd")));
1181 fn test_ends_with() {
1182 assert!(("".ends_with("")));
1183 assert!(("abc".ends_with("")));
1184 assert!(("abc".ends_with("c")));
1185 assert!((!"a".ends_with("abc")));
1186 assert!((!"".ends_with("abc")));
1187 assert!((!"ddö".ends_with("-")));
1188 assert!(("ddö".ends_with("dö")));
1192 fn test_is_empty() {
1193 assert!("".is_empty());
1194 assert!(!"a".is_empty());
1200 assert_eq!("".replace(a, "b"), "".to_owned());
1201 assert_eq!("a".replace(a, "b"), "b".to_owned());
1202 assert_eq!("ab".replace(a, "b"), "bb".to_owned());
1204 assert!(" test test ".replace(test, "toast") ==
1205 " toast toast ".to_owned());
1206 assert_eq!(" test test ".replace(test, ""), " ".to_owned());
1210 fn test_replace_2a() {
1211 let data = "ประเทศไทย中华".to_owned();
1212 let repl = "دولة الكويت".to_owned();
1214 let a = "ประเ".to_owned();
1215 let a2 = "دولة الكويتทศไทย中华".to_owned();
1216 assert_eq!(data.replace(a, repl), a2);
1220 fn test_replace_2b() {
1221 let data = "ประเทศไทย中华".to_owned();
1222 let repl = "دولة الكويت".to_owned();
1224 let b = "ะเ".to_owned();
1225 let b2 = "ปรدولة الكويتทศไทย中华".to_owned();
1226 assert_eq!(data.replace(b, repl), b2);
1230 fn test_replace_2c() {
1231 let data = "ประเทศไทย中华".to_owned();
1232 let repl = "دولة الكويت".to_owned();
1234 let c = "中华".to_owned();
1235 let c2 = "ประเทศไทยدولة الكويت".to_owned();
1236 assert_eq!(data.replace(c, repl), c2);
1240 fn test_replace_2d() {
1241 let data = "ประเทศไทย中华".to_owned();
1242 let repl = "دولة الكويت".to_owned();
1244 let d = "ไท华".to_owned();
1245 assert_eq!(data.replace(d, repl), data);
1250 assert_eq!("ab", "abc".slice(0, 2));
1251 assert_eq!("bc", "abc".slice(1, 3));
1252 assert_eq!("", "abc".slice(1, 1));
1253 assert_eq!("\u65e5", "\u65e5\u672c".slice(0, 3));
1255 let data = "ประเทศไทย中华";
1256 assert_eq!("ป", data.slice(0, 3));
1257 assert_eq!("ร", data.slice(3, 6));
1258 assert_eq!("", data.slice(3, 3));
1259 assert_eq!("华", data.slice(30, 33));
1261 fn a_million_letter_X() -> ~str {
1263 let mut rs = StrBuf::new();
1265 rs.push_str("华华华华华华华华华华");
1270 fn half_a_million_letter_X() -> ~str {
1272 let mut rs = StrBuf::new();
1274 rs.push_str("华华华华华");
1279 let letters = a_million_letter_X();
1280 assert!(half_a_million_letter_X() ==
1281 letters.slice(0u, 3u * 500000u).to_owned());
1286 let ss = "中华Việt Nam";
1288 assert_eq!("华", ss.slice(3u, 6u));
1289 assert_eq!("Việt Nam", ss.slice(6u, 16u));
1291 assert_eq!("ab", "abc".slice(0u, 2u));
1292 assert_eq!("bc", "abc".slice(1u, 3u));
1293 assert_eq!("", "abc".slice(1u, 1u));
1295 assert_eq!("中", ss.slice(0u, 3u));
1296 assert_eq!("华V", ss.slice(3u, 7u));
1297 assert_eq!("", ss.slice(3u, 3u));
1312 fn test_slice_fail() {
1313 "中华Việt Nam".slice(0u, 2u);
1317 fn test_slice_from() {
1318 assert_eq!("abcd".slice_from(0), "abcd");
1319 assert_eq!("abcd".slice_from(2), "cd");
1320 assert_eq!("abcd".slice_from(4), "");
1323 fn test_slice_to() {
1324 assert_eq!("abcd".slice_to(0), "");
1325 assert_eq!("abcd".slice_to(2), "ab");
1326 assert_eq!("abcd".slice_to(4), "abcd");
1330 fn test_trim_left_chars() {
1331 let v: &[char] = &[];
1332 assert_eq!(" *** foo *** ".trim_left_chars(v), " *** foo *** ");
1333 assert_eq!(" *** foo *** ".trim_left_chars(&['*', ' ']), "foo *** ");
1334 assert_eq!(" *** *** ".trim_left_chars(&['*', ' ']), "");
1335 assert_eq!("foo *** ".trim_left_chars(&['*', ' ']), "foo *** ");
1337 assert_eq!("11foo1bar11".trim_left_chars('1'), "foo1bar11");
1338 assert_eq!("12foo1bar12".trim_left_chars(&['1', '2']), "foo1bar12");
1339 assert_eq!("123foo1bar123".trim_left_chars(|c: char| c.is_digit()), "foo1bar123");
1343 fn test_trim_right_chars() {
1344 let v: &[char] = &[];
1345 assert_eq!(" *** foo *** ".trim_right_chars(v), " *** foo *** ");
1346 assert_eq!(" *** foo *** ".trim_right_chars(&['*', ' ']), " *** foo");
1347 assert_eq!(" *** *** ".trim_right_chars(&['*', ' ']), "");
1348 assert_eq!(" *** foo".trim_right_chars(&['*', ' ']), " *** foo");
1350 assert_eq!("11foo1bar11".trim_right_chars('1'), "11foo1bar");
1351 assert_eq!("12foo1bar12".trim_right_chars(&['1', '2']), "12foo1bar");
1352 assert_eq!("123foo1bar123".trim_right_chars(|c: char| c.is_digit()), "123foo1bar");
1356 fn test_trim_chars() {
1357 let v: &[char] = &[];
1358 assert_eq!(" *** foo *** ".trim_chars(v), " *** foo *** ");
1359 assert_eq!(" *** foo *** ".trim_chars(&['*', ' ']), "foo");
1360 assert_eq!(" *** *** ".trim_chars(&['*', ' ']), "");
1361 assert_eq!("foo".trim_chars(&['*', ' ']), "foo");
1363 assert_eq!("11foo1bar11".trim_chars('1'), "foo1bar");
1364 assert_eq!("12foo1bar12".trim_chars(&['1', '2']), "foo1bar");
1365 assert_eq!("123foo1bar123".trim_chars(|c: char| c.is_digit()), "foo1bar");
1369 fn test_trim_left() {
1370 assert_eq!("".trim_left(), "");
1371 assert_eq!("a".trim_left(), "a");
1372 assert_eq!(" ".trim_left(), "");
1373 assert_eq!(" blah".trim_left(), "blah");
1374 assert_eq!(" \u3000 wut".trim_left(), "wut");
1375 assert_eq!("hey ".trim_left(), "hey ");
1379 fn test_trim_right() {
1380 assert_eq!("".trim_right(), "");
1381 assert_eq!("a".trim_right(), "a");
1382 assert_eq!(" ".trim_right(), "");
1383 assert_eq!("blah ".trim_right(), "blah");
1384 assert_eq!("wut \u3000 ".trim_right(), "wut");
1385 assert_eq!(" hey".trim_right(), " hey");
1390 assert_eq!("".trim(), "");
1391 assert_eq!("a".trim(), "a");
1392 assert_eq!(" ".trim(), "");
1393 assert_eq!(" blah ".trim(), "blah");
1394 assert_eq!("\nwut \u3000 ".trim(), "wut");
1395 assert_eq!(" hey dude ".trim(), "hey dude");
1399 fn test_is_whitespace() {
1400 assert!("".is_whitespace());
1401 assert!(" ".is_whitespace());
1402 assert!("\u2009".is_whitespace()); // Thin space
1403 assert!(" \n\t ".is_whitespace());
1404 assert!(!" _ ".is_whitespace());
1408 fn test_slice_shift_char() {
1409 let data = "ประเทศไทย中";
1410 assert_eq!(data.slice_shift_char(), (Some('ป'), "ระเทศไทย中"));
1414 fn test_slice_shift_char_2() {
1416 assert_eq!(empty.slice_shift_char(), (None, ""));
1421 // deny overlong encodings
1422 assert!(!is_utf8([0xc0, 0x80]));
1423 assert!(!is_utf8([0xc0, 0xae]));
1424 assert!(!is_utf8([0xe0, 0x80, 0x80]));
1425 assert!(!is_utf8([0xe0, 0x80, 0xaf]));
1426 assert!(!is_utf8([0xe0, 0x81, 0x81]));
1427 assert!(!is_utf8([0xf0, 0x82, 0x82, 0xac]));
1428 assert!(!is_utf8([0xf4, 0x90, 0x80, 0x80]));
1431 assert!(!is_utf8([0xED, 0xA0, 0x80]));
1432 assert!(!is_utf8([0xED, 0xBF, 0xBF]));
1434 assert!(is_utf8([0xC2, 0x80]));
1435 assert!(is_utf8([0xDF, 0xBF]));
1436 assert!(is_utf8([0xE0, 0xA0, 0x80]));
1437 assert!(is_utf8([0xED, 0x9F, 0xBF]));
1438 assert!(is_utf8([0xEE, 0x80, 0x80]));
1439 assert!(is_utf8([0xEF, 0xBF, 0xBF]));
1440 assert!(is_utf8([0xF0, 0x90, 0x80, 0x80]));
1441 assert!(is_utf8([0xF4, 0x8F, 0xBF, 0xBF]));
1445 fn test_is_utf16() {
1446 macro_rules! pos ( ($($e:expr),*) => { { $(assert!(is_utf16($e));)* } });
1454 // surrogate pairs (randomly generated with Python 3's
1455 // .encode('utf-16be'))
1456 pos!([0xdb54, 0xdf16, 0xd880, 0xdee0, 0xdb6a, 0xdd45],
1457 [0xd91f, 0xdeb1, 0xdb31, 0xdd84, 0xd8e2, 0xde14],
1458 [0xdb9f, 0xdc26, 0xdb6f, 0xde58, 0xd850, 0xdfae]);
1460 // mixtures (also random)
1461 pos!([0xd921, 0xdcc2, 0x002d, 0x004d, 0xdb32, 0xdf65],
1462 [0xdb45, 0xdd2d, 0x006a, 0xdacd, 0xddfe, 0x0006],
1463 [0x0067, 0xd8ff, 0xddb7, 0x000f, 0xd900, 0xdc80]);
1466 macro_rules! neg ( ($($e:expr),*) => { { $(assert!(!is_utf16($e));)* } });
1469 // surrogate + regular unit
1471 // surrogate + lead surrogate
1473 // unterminated surrogate
1475 // trail surrogate without a lead
1478 // random byte sequences that Python 3's .decode('utf-16be')
1480 neg!([0x5b3d, 0x0141, 0xde9e, 0x8fdc, 0xc6e7],
1481 [0xdf5a, 0x82a5, 0x62b9, 0xb447, 0x92f3],
1482 [0xda4e, 0x42bc, 0x4462, 0xee98, 0xc2ca],
1483 [0xbe00, 0xb04a, 0x6ecb, 0xdd89, 0xe278],
1484 [0x0465, 0xab56, 0xdbb6, 0xa893, 0x665e],
1485 [0x6b7f, 0x0a19, 0x40f4, 0xa657, 0xdcc5],
1486 [0x9b50, 0xda5e, 0x24ec, 0x03ad, 0x6dee],
1487 [0x8d17, 0xcaa7, 0xf4ae, 0xdf6e, 0xbed7],
1488 [0xdaee, 0x2584, 0x7d30, 0xa626, 0x121a],
1489 [0xd956, 0x4b43, 0x7570, 0xccd6, 0x4f4a],
1490 [0x9dcf, 0x1b49, 0x4ba5, 0xfce9, 0xdffe],
1491 [0x6572, 0xce53, 0xb05a, 0xf6af, 0xdacf],
1492 [0x1b90, 0x728c, 0x9906, 0xdb68, 0xf46e],
1493 [0x1606, 0xbeca, 0xbe76, 0x860f, 0xdfa5],
1494 [0x8b4f, 0xde7a, 0xd220, 0x9fac, 0x2b6f],
1495 [0xb8fe, 0xebbe, 0xda32, 0x1a5f, 0x8b8b],
1496 [0x934b, 0x8956, 0xc434, 0x1881, 0xddf7],
1497 [0x5a95, 0x13fc, 0xf116, 0xd89b, 0x93f9],
1498 [0xd640, 0x71f1, 0xdd7d, 0x77eb, 0x1cd8],
1499 [0x348b, 0xaef0, 0xdb2c, 0xebf1, 0x1282],
1500 [0x50d7, 0xd824, 0x5010, 0xb369, 0x22ea]);
1504 fn test_raw_from_c_str() {
1506 let a = box [65, 65, 65, 65, 65, 65, 65, 0];
1508 let c = raw::from_c_str(b);
1509 assert_eq!(c, "AAAAAAA".to_owned());
1514 fn test_as_bytes() {
1517 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1518 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1521 assert_eq!("".as_bytes(), &[]);
1522 assert_eq!("abc".as_bytes(), &['a' as u8, 'b' as u8, 'c' as u8]);
1523 assert_eq!("ศไทย中华Việt Nam".as_bytes(), v.as_slice());
1528 fn test_as_bytes_fail() {
1529 // Don't double free. (I'm not sure if this exercises the
1530 // original problem code path anymore.)
1531 let s = "".to_owned();
1532 let _bytes = s.as_bytes();
1538 let buf = "hello".as_ptr();
1540 assert_eq!(*buf.offset(0), 'h' as u8);
1541 assert_eq!(*buf.offset(1), 'e' as u8);
1542 assert_eq!(*buf.offset(2), 'l' as u8);
1543 assert_eq!(*buf.offset(3), 'l' as u8);
1544 assert_eq!(*buf.offset(4), 'o' as u8);
1549 fn test_subslice_offset() {
1550 let a = "kernelsprite";
1551 let b = a.slice(7, a.len());
1552 let c = a.slice(0, a.len() - 6);
1553 assert_eq!(a.subslice_offset(b), 7);
1554 assert_eq!(a.subslice_offset(c), 0);
1556 let string = "a\nb\nc";
1557 let lines: Vec<&str> = string.lines().collect();
1558 let lines = lines.as_slice();
1559 assert_eq!(string.subslice_offset(lines[0]), 0);
1560 assert_eq!(string.subslice_offset(lines[1]), 2);
1561 assert_eq!(string.subslice_offset(lines[2]), 4);
1566 fn test_subslice_offset_2() {
1567 let a = "alchemiter";
1568 let b = "cruxtruder";
1569 a.subslice_offset(b);
1573 fn vec_str_conversions() {
1574 let s1: ~str = "All mimsy were the borogoves".to_owned();
1576 let v: ~[u8] = s1.as_bytes().to_owned();
1577 let s2: ~str = from_utf8(v).unwrap().to_owned();
1578 let mut i: uint = 0u;
1579 let n1: uint = s1.len();
1580 let n2: uint = v.len();
1593 fn test_contains() {
1594 assert!("abcde".contains("bcd"));
1595 assert!("abcde".contains("abcd"));
1596 assert!("abcde".contains("bcde"));
1597 assert!("abcde".contains(""));
1598 assert!("".contains(""));
1599 assert!(!"abcde".contains("def"));
1600 assert!(!"".contains("a"));
1602 let data = "ประเทศไทย中华Việt Nam".to_owned();
1603 assert!(data.contains("ประเ"));
1604 assert!(data.contains("ะเ"));
1605 assert!(data.contains("中华"));
1606 assert!(!data.contains("ไท华"));
1610 fn test_contains_char() {
1611 assert!("abc".contains_char('b'));
1612 assert!("a".contains_char('a'));
1613 assert!(!"abc".contains_char('d'));
1614 assert!(!"".contains_char('a'));
1620 [("𐍅𐌿𐌻𐍆𐌹𐌻𐌰\n".to_owned(),
1621 vec![0xd800_u16, 0xdf45_u16, 0xd800_u16, 0xdf3f_u16,
1622 0xd800_u16, 0xdf3b_u16, 0xd800_u16, 0xdf46_u16,
1623 0xd800_u16, 0xdf39_u16, 0xd800_u16, 0xdf3b_u16,
1624 0xd800_u16, 0xdf30_u16, 0x000a_u16]),
1626 ("𐐒𐑉𐐮𐑀𐐲𐑋 𐐏𐐲𐑍\n".to_owned(),
1627 vec![0xd801_u16, 0xdc12_u16, 0xd801_u16,
1628 0xdc49_u16, 0xd801_u16, 0xdc2e_u16, 0xd801_u16,
1629 0xdc40_u16, 0xd801_u16, 0xdc32_u16, 0xd801_u16,
1630 0xdc4b_u16, 0x0020_u16, 0xd801_u16, 0xdc0f_u16,
1631 0xd801_u16, 0xdc32_u16, 0xd801_u16, 0xdc4d_u16,
1634 ("𐌀𐌖𐌋𐌄𐌑𐌉·𐌌𐌄𐌕𐌄𐌋𐌉𐌑\n".to_owned(),
1635 vec![0xd800_u16, 0xdf00_u16, 0xd800_u16, 0xdf16_u16,
1636 0xd800_u16, 0xdf0b_u16, 0xd800_u16, 0xdf04_u16,
1637 0xd800_u16, 0xdf11_u16, 0xd800_u16, 0xdf09_u16,
1638 0x00b7_u16, 0xd800_u16, 0xdf0c_u16, 0xd800_u16,
1639 0xdf04_u16, 0xd800_u16, 0xdf15_u16, 0xd800_u16,
1640 0xdf04_u16, 0xd800_u16, 0xdf0b_u16, 0xd800_u16,
1641 0xdf09_u16, 0xd800_u16, 0xdf11_u16, 0x000a_u16 ]),
1643 ("𐒋𐒘𐒈𐒑𐒛𐒒 𐒕𐒓 𐒈𐒚𐒍 𐒏𐒜𐒒𐒖𐒆 𐒕𐒆\n".to_owned(),
1644 vec![0xd801_u16, 0xdc8b_u16, 0xd801_u16, 0xdc98_u16,
1645 0xd801_u16, 0xdc88_u16, 0xd801_u16, 0xdc91_u16,
1646 0xd801_u16, 0xdc9b_u16, 0xd801_u16, 0xdc92_u16,
1647 0x0020_u16, 0xd801_u16, 0xdc95_u16, 0xd801_u16,
1648 0xdc93_u16, 0x0020_u16, 0xd801_u16, 0xdc88_u16,
1649 0xd801_u16, 0xdc9a_u16, 0xd801_u16, 0xdc8d_u16,
1650 0x0020_u16, 0xd801_u16, 0xdc8f_u16, 0xd801_u16,
1651 0xdc9c_u16, 0xd801_u16, 0xdc92_u16, 0xd801_u16,
1652 0xdc96_u16, 0xd801_u16, 0xdc86_u16, 0x0020_u16,
1653 0xd801_u16, 0xdc95_u16, 0xd801_u16, 0xdc86_u16,
1655 // Issue #12318, even-numbered non-BMP planes
1656 ("\U00020000".to_owned(),
1657 vec![0xD840, 0xDC00])];
1659 for p in pairs.iter() {
1660 let (s, u) = (*p).clone();
1661 assert!(is_utf16(u.as_slice()));
1662 assert_eq!(s.to_utf16(), u);
1664 assert_eq!(from_utf16(u.as_slice()).unwrap(), s);
1665 assert_eq!(from_utf16_lossy(u.as_slice()), s);
1667 assert_eq!(from_utf16(s.to_utf16().as_slice()).unwrap(), s);
1668 assert_eq!(from_utf16(u.as_slice()).unwrap().to_utf16(), u);
1673 fn test_utf16_invalid() {
1674 // completely positive cases tested above.
1676 assert_eq!(from_utf16([0xD800]), None);
1678 assert_eq!(from_utf16([0xD800, 0xD800]), None);
1681 assert_eq!(from_utf16([0x0061, 0xDC00]), None);
1684 assert_eq!(from_utf16([0xD800, 0xd801, 0xdc8b, 0xD800]), None);
1688 fn test_utf16_lossy() {
1689 // completely positive cases tested above.
1691 assert_eq!(from_utf16_lossy([0xD800]), "\uFFFD".to_owned());
1693 assert_eq!(from_utf16_lossy([0xD800, 0xD800]), "\uFFFD\uFFFD".to_owned());
1696 assert_eq!(from_utf16_lossy([0x0061, 0xDC00]), "a\uFFFD".to_owned());
1699 assert_eq!(from_utf16_lossy([0xD800, 0xd801, 0xdc8b, 0xD800]), "\uFFFD𐒋\uFFFD".to_owned());
1703 fn test_truncate_utf16_at_nul() {
1705 assert_eq!(truncate_utf16_at_nul(v), &[]);
1708 assert_eq!(truncate_utf16_at_nul(v), &[]);
1711 assert_eq!(truncate_utf16_at_nul(v), &[1]);
1714 assert_eq!(truncate_utf16_at_nul(v), &[1, 2]);
1717 assert_eq!(truncate_utf16_at_nul(v), &[1, 2, 3]);
1722 let s = "ศไทย中华Việt Nam".to_owned();
1723 let v = box ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1725 for ch in v.iter() {
1726 assert!(s.char_at(pos) == *ch);
1727 pos += from_char(*ch).len();
1732 fn test_char_at_reverse() {
1733 let s = "ศไทย中华Việt Nam".to_owned();
1734 let v = box ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1735 let mut pos = s.len();
1736 for ch in v.iter().rev() {
1737 assert!(s.char_at_reverse(pos) == *ch);
1738 pos -= from_char(*ch).len();
1743 fn test_escape_unicode() {
1744 assert_eq!("abc".escape_unicode(), "\\x61\\x62\\x63".to_owned());
1745 assert_eq!("a c".escape_unicode(), "\\x61\\x20\\x63".to_owned());
1746 assert_eq!("\r\n\t".escape_unicode(), "\\x0d\\x0a\\x09".to_owned());
1747 assert_eq!("'\"\\".escape_unicode(), "\\x27\\x22\\x5c".to_owned());
1748 assert_eq!("\x00\x01\xfe\xff".escape_unicode(), "\\x00\\x01\\xfe\\xff".to_owned());
1749 assert_eq!("\u0100\uffff".escape_unicode(), "\\u0100\\uffff".to_owned());
1750 assert_eq!("\U00010000\U0010ffff".escape_unicode(), "\\U00010000\\U0010ffff".to_owned());
1751 assert_eq!("ab\ufb00".escape_unicode(), "\\x61\\x62\\ufb00".to_owned());
1752 assert_eq!("\U0001d4ea\r".escape_unicode(), "\\U0001d4ea\\x0d".to_owned());
1756 fn test_escape_default() {
1757 assert_eq!("abc".escape_default(), "abc".to_owned());
1758 assert_eq!("a c".escape_default(), "a c".to_owned());
1759 assert_eq!("\r\n\t".escape_default(), "\\r\\n\\t".to_owned());
1760 assert_eq!("'\"\\".escape_default(), "\\'\\\"\\\\".to_owned());
1761 assert_eq!("\u0100\uffff".escape_default(), "\\u0100\\uffff".to_owned());
1762 assert_eq!("\U00010000\U0010ffff".escape_default(), "\\U00010000\\U0010ffff".to_owned());
1763 assert_eq!("ab\ufb00".escape_default(), "ab\\ufb00".to_owned());
1764 assert_eq!("\U0001d4ea\r".escape_default(), "\\U0001d4ea\\r".to_owned());
1768 fn test_total_ord() {
1769 "1234".cmp(&("123")) == Greater;
1770 "123".cmp(&("1234")) == Less;
1771 "1234".cmp(&("1234")) == Equal;
1772 "12345555".cmp(&("123456")) == Less;
1773 "22".cmp(&("1234")) == Greater;
1777 fn test_char_range_at() {
1778 let data = "b¢€𤭢𤭢€¢b".to_owned();
1779 assert_eq!('b', data.char_range_at(0).ch);
1780 assert_eq!('¢', data.char_range_at(1).ch);
1781 assert_eq!('€', data.char_range_at(3).ch);
1782 assert_eq!('𤭢', data.char_range_at(6).ch);
1783 assert_eq!('𤭢', data.char_range_at(10).ch);
1784 assert_eq!('€', data.char_range_at(14).ch);
1785 assert_eq!('¢', data.char_range_at(17).ch);
1786 assert_eq!('b', data.char_range_at(19).ch);
1790 fn test_char_range_at_reverse_underflow() {
1791 assert_eq!("abc".char_range_at_reverse(0).next, 0);
1796 #![allow(unnecessary_allocation)]
1798 ($s1:expr, $s2:expr, $e:expr) => { {
1802 assert_eq!(s1 + s2, e.to_owned());
1803 assert_eq!(s1.to_owned() + s2, e.to_owned());
1807 t!("foo", "bar", "foobar");
1808 t!("foo", "bar".to_owned(), "foobar");
1809 t!("ศไทย中", "华Việt Nam", "ศไทย中华Việt Nam");
1810 t!("ศไทย中", "华Việt Nam".to_owned(), "ศไทย中华Việt Nam");
1814 fn test_iterator() {
1816 let s = "ศไทย中华Việt Nam".to_owned();
1817 let v = box ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1820 let mut it = s.chars();
1823 assert_eq!(c, v[pos]);
1826 assert_eq!(pos, v.len());
1830 fn test_rev_iterator() {
1832 let s = "ศไทย中华Việt Nam".to_owned();
1833 let v = box ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
1836 let mut it = s.chars().rev();
1839 assert_eq!(c, v[pos]);
1842 assert_eq!(pos, v.len());
1846 fn test_iterator_clone() {
1847 let s = "ศไทย中华Việt Nam";
1848 let mut it = s.chars();
1850 assert!(it.zip(it.clone()).all(|(x,y)| x == y));
1854 fn test_bytesator() {
1855 let s = "ศไทย中华Việt Nam".to_owned();
1857 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1858 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1863 for b in s.bytes() {
1864 assert_eq!(b, v[pos]);
1870 fn test_bytes_revator() {
1871 let s = "ศไทย中华Việt Nam".to_owned();
1873 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1874 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1877 let mut pos = v.len();
1879 for b in s.bytes().rev() {
1881 assert_eq!(b, v[pos]);
1886 fn test_char_indicesator() {
1888 let s = "ศไทย中华Việt Nam";
1889 let p = [0, 3, 6, 9, 12, 15, 18, 19, 20, 23, 24, 25, 26, 27];
1890 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1893 let mut it = s.char_indices();
1896 assert_eq!(c, (p[pos], v[pos]));
1899 assert_eq!(pos, v.len());
1900 assert_eq!(pos, p.len());
1904 fn test_char_indices_revator() {
1906 let s = "ศไทย中华Việt Nam";
1907 let p = [27, 26, 25, 24, 23, 20, 19, 18, 15, 12, 9, 6, 3, 0];
1908 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
1911 let mut it = s.char_indices().rev();
1914 assert_eq!(c, (p[pos], v[pos]));
1917 assert_eq!(pos, v.len());
1918 assert_eq!(pos, p.len());
1922 fn test_split_char_iterator() {
1923 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1925 let split: Vec<&str> = data.split(' ').collect();
1926 assert_eq!( split, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1928 let mut rsplit: Vec<&str> = data.split(' ').rev().collect();
1930 assert_eq!(rsplit, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1932 let split: Vec<&str> = data.split(|c: char| c == ' ').collect();
1933 assert_eq!( split, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1935 let mut rsplit: Vec<&str> = data.split(|c: char| c == ' ').rev().collect();
1937 assert_eq!(rsplit, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1940 let split: Vec<&str> = data.split('ä').collect();
1941 assert_eq!( split, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1943 let mut rsplit: Vec<&str> = data.split('ä').rev().collect();
1945 assert_eq!(rsplit, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1947 let split: Vec<&str> = data.split(|c: char| c == 'ä').collect();
1948 assert_eq!( split, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1950 let mut rsplit: Vec<&str> = data.split(|c: char| c == 'ä').rev().collect();
1952 assert_eq!(rsplit, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1956 fn test_splitn_char_iterator() {
1957 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1959 let split: Vec<&str> = data.splitn(' ', 3).collect();
1960 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
1962 let split: Vec<&str> = data.splitn(|c: char| c == ' ', 3).collect();
1963 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
1966 let split: Vec<&str> = data.splitn('ä', 3).collect();
1967 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
1969 let split: Vec<&str> = data.splitn(|c: char| c == 'ä', 3).collect();
1970 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
1974 fn test_rsplitn_char_iterator() {
1975 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1977 let mut split: Vec<&str> = data.rsplitn(' ', 3).collect();
1979 assert_eq!(split, vec!["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]);
1981 let mut split: Vec<&str> = data.rsplitn(|c: char| c == ' ', 3).collect();
1983 assert_eq!(split, vec!["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]);
1986 let mut split: Vec<&str> = data.rsplitn('ä', 3).collect();
1988 assert_eq!(split, vec!["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]);
1990 let mut split: Vec<&str> = data.rsplitn(|c: char| c == 'ä', 3).collect();
1992 assert_eq!(split, vec!["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]);
1996 fn test_split_char_iterator_no_trailing() {
1997 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1999 let split: Vec<&str> = data.split('\n').collect();
2000 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
2002 let split: Vec<&str> = data.split_terminator('\n').collect();
2003 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
2007 fn test_rev_split_char_iterator_no_trailing() {
2008 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
2010 let mut split: Vec<&str> = data.split('\n').rev().collect();
2012 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
2014 let mut split: Vec<&str> = data.split_terminator('\n').rev().collect();
2016 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
2021 let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n";
2022 let words: Vec<&str> = data.words().collect();
2023 assert_eq!(words, vec!["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
2027 fn test_nfd_chars() {
2028 assert_eq!("abc".nfd_chars().collect::<~str>(), "abc".to_owned());
2029 assert_eq!("\u1e0b\u01c4".nfd_chars().collect::<~str>(), "d\u0307\u01c4".to_owned());
2030 assert_eq!("\u2026".nfd_chars().collect::<~str>(), "\u2026".to_owned());
2031 assert_eq!("\u2126".nfd_chars().collect::<~str>(), "\u03a9".to_owned());
2032 assert_eq!("\u1e0b\u0323".nfd_chars().collect::<~str>(), "d\u0323\u0307".to_owned());
2033 assert_eq!("\u1e0d\u0307".nfd_chars().collect::<~str>(), "d\u0323\u0307".to_owned());
2034 assert_eq!("a\u0301".nfd_chars().collect::<~str>(), "a\u0301".to_owned());
2035 assert_eq!("\u0301a".nfd_chars().collect::<~str>(), "\u0301a".to_owned());
2036 assert_eq!("\ud4db".nfd_chars().collect::<~str>(), "\u1111\u1171\u11b6".to_owned());
2037 assert_eq!("\uac1c".nfd_chars().collect::<~str>(), "\u1100\u1162".to_owned());
2041 fn test_nfkd_chars() {
2042 assert_eq!("abc".nfkd_chars().collect::<~str>(), "abc".to_owned());
2043 assert_eq!("\u1e0b\u01c4".nfkd_chars().collect::<~str>(), "d\u0307DZ\u030c".to_owned());
2044 assert_eq!("\u2026".nfkd_chars().collect::<~str>(), "...".to_owned());
2045 assert_eq!("\u2126".nfkd_chars().collect::<~str>(), "\u03a9".to_owned());
2046 assert_eq!("\u1e0b\u0323".nfkd_chars().collect::<~str>(), "d\u0323\u0307".to_owned());
2047 assert_eq!("\u1e0d\u0307".nfkd_chars().collect::<~str>(), "d\u0323\u0307".to_owned());
2048 assert_eq!("a\u0301".nfkd_chars().collect::<~str>(), "a\u0301".to_owned());
2049 assert_eq!("\u0301a".nfkd_chars().collect::<~str>(), "\u0301a".to_owned());
2050 assert_eq!("\ud4db".nfkd_chars().collect::<~str>(), "\u1111\u1171\u11b6".to_owned());
2051 assert_eq!("\uac1c".nfkd_chars().collect::<~str>(), "\u1100\u1162".to_owned());
2056 let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
2057 let lines: Vec<&str> = data.lines().collect();
2058 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2060 let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
2061 let lines: Vec<&str> = data.lines().collect();
2062 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2066 fn test_split_strator() {
2067 fn t(s: &str, sep: &str, u: &[&str]) {
2068 let v: Vec<&str> = s.split_str(sep).collect();
2069 assert_eq!(v.as_slice(), u.as_slice());
2071 t("--1233345--", "12345", ["--1233345--"]);
2072 t("abc::hello::there", "::", ["abc", "hello", "there"]);
2073 t("::hello::there", "::", ["", "hello", "there"]);
2074 t("hello::there::", "::", ["hello", "there", ""]);
2075 t("::hello::there::", "::", ["", "hello", "there", ""]);
2076 t("ประเทศไทย中华Việt Nam", "中华", ["ประเทศไทย", "Việt Nam"]);
2077 t("zzXXXzzYYYzz", "zz", ["", "XXX", "YYY", ""]);
2078 t("zzXXXzYYYz", "XXX", ["zz", "zYYYz"]);
2079 t(".XXX.YYY.", ".", ["", "XXX", "YYY", ""]);
2081 t("zz", "zz", ["",""]);
2082 t("ok", "z", ["ok"]);
2083 t("zzz", "zz", ["","z"]);
2084 t("zzzzz", "zz", ["","","z"]);
2088 fn test_str_default() {
2089 use default::Default;
2090 fn t<S: Default + Str>() {
2091 let s: S = Default::default();
2092 assert_eq!(s.as_slice(), "");
2100 fn test_str_container() {
2101 fn sum_len<S: Container>(v: &[S]) -> uint {
2102 v.iter().map(|x| x.len()).sum()
2105 let s = "01234".to_owned();
2106 assert_eq!(5, sum_len(["012", "", "34"]));
2107 assert_eq!(5, sum_len(["01".to_owned(), "2".to_owned(), "34".to_owned(), "".to_owned()]));
2108 assert_eq!(5, sum_len([s.as_slice()]));
2112 fn test_str_from_utf8() {
2113 let xs = bytes!("hello");
2114 assert_eq!(from_utf8(xs), Some("hello"));
2116 let xs = bytes!("ศไทย中华Việt Nam");
2117 assert_eq!(from_utf8(xs), Some("ศไทย中华Việt Nam"));
2119 let xs = bytes!("hello", 0xff);
2120 assert_eq!(from_utf8(xs), None);
2124 fn test_str_from_utf8_owned() {
2125 let xs = bytes!("hello").to_owned();
2126 assert_eq!(from_utf8_owned(xs), Ok("hello".to_owned()));
2128 let xs = bytes!("ศไทย中华Việt Nam").to_owned();
2129 assert_eq!(from_utf8_owned(xs), Ok("ศไทย中华Việt Nam".to_owned()));
2131 let xs = bytes!("hello", 0xff).to_owned();
2132 assert_eq!(from_utf8_owned(xs), Err(bytes!("hello", 0xff).to_owned()));
2136 fn test_str_from_utf8_lossy() {
2137 let xs = bytes!("hello");
2138 assert_eq!(from_utf8_lossy(xs), Slice("hello"));
2140 let xs = bytes!("ศไทย中华Việt Nam");
2141 assert_eq!(from_utf8_lossy(xs), Slice("ศไทย中华Việt Nam"));
2143 let xs = bytes!("Hello", 0xC2, " There", 0xFF, " Goodbye");
2144 assert_eq!(from_utf8_lossy(xs), Owned("Hello\uFFFD There\uFFFD Goodbye".to_owned()));
2146 let xs = bytes!("Hello", 0xC0, 0x80, " There", 0xE6, 0x83, " Goodbye");
2147 assert_eq!(from_utf8_lossy(xs), Owned("Hello\uFFFD\uFFFD There\uFFFD Goodbye".to_owned()));
2149 let xs = bytes!(0xF5, "foo", 0xF5, 0x80, "bar");
2150 assert_eq!(from_utf8_lossy(xs), Owned("\uFFFDfoo\uFFFD\uFFFDbar".to_owned()));
2152 let xs = bytes!(0xF1, "foo", 0xF1, 0x80, "bar", 0xF1, 0x80, 0x80, "baz");
2153 assert_eq!(from_utf8_lossy(xs), Owned("\uFFFDfoo\uFFFDbar\uFFFDbaz".to_owned()));
2155 let xs = bytes!(0xF4, "foo", 0xF4, 0x80, "bar", 0xF4, 0xBF, "baz");
2156 assert_eq!(from_utf8_lossy(xs), Owned("\uFFFDfoo\uFFFDbar\uFFFD\uFFFDbaz".to_owned()));
2158 let xs = bytes!(0xF0, 0x80, 0x80, 0x80, "foo", 0xF0, 0x90, 0x80, 0x80, "bar");
2159 assert_eq!(from_utf8_lossy(xs), Owned("\uFFFD\uFFFD\uFFFD\uFFFD\
2160 foo\U00010000bar".to_owned()));
2163 let xs = bytes!(0xED, 0xA0, 0x80, "foo", 0xED, 0xBF, 0xBF, "bar");
2164 assert_eq!(from_utf8_lossy(xs), Owned("\uFFFD\uFFFD\uFFFDfoo\
2165 \uFFFD\uFFFD\uFFFDbar".to_owned()));
2169 fn test_from_str() {
2170 let owned: Option<~str> = from_str("string");
2171 assert_eq!(owned, Some("string".to_owned()));
2175 fn test_maybe_owned_traits() {
2176 let s = Slice("abcde");
2177 assert_eq!(s.len(), 5);
2178 assert_eq!(s.as_slice(), "abcde");
2179 assert_eq!(s.to_str(), "abcde".to_owned());
2180 assert_eq!(format!("{}", s), "abcde".to_owned());
2181 assert!(s.lt(&Owned("bcdef".to_owned())));
2182 assert_eq!(Slice(""), Default::default());
2184 let o = Owned("abcde".to_owned());
2185 assert_eq!(o.len(), 5);
2186 assert_eq!(o.as_slice(), "abcde");
2187 assert_eq!(o.to_str(), "abcde".to_owned());
2188 assert_eq!(format!("{}", o), "abcde".to_owned());
2189 assert!(o.lt(&Slice("bcdef")));
2190 assert_eq!(Owned("".to_owned()), Default::default());
2192 assert!(s.cmp(&o) == Equal);
2193 assert!(s.equiv(&o));
2195 assert!(o.cmp(&s) == Equal);
2196 assert!(o.equiv(&s));
2200 fn test_maybe_owned_methods() {
2201 let s = Slice("abcde");
2202 assert!(s.is_slice());
2203 assert!(!s.is_owned());
2205 let o = Owned("abcde".to_owned());
2206 assert!(!o.is_slice());
2207 assert!(o.is_owned());
2211 fn test_maybe_owned_clone() {
2212 assert_eq!(Owned("abcde".to_owned()), Slice("abcde").clone());
2213 assert_eq!(Owned("abcde".to_owned()), Owned("abcde".to_owned()).clone());
2214 assert_eq!(Slice("abcde"), Slice("abcde").clone());
2215 assert_eq!(Slice("abcde"), Owned("abcde".to_owned()).clone());
2219 fn test_maybe_owned_into_owned() {
2220 assert_eq!(Slice("abcde").into_owned(), "abcde".to_owned());
2221 assert_eq!(Owned("abcde".to_owned()).into_owned(), "abcde".to_owned());
2225 fn test_into_maybe_owned() {
2226 assert_eq!("abcde".into_maybe_owned(), Slice("abcde"));
2227 assert_eq!(("abcde".to_owned()).into_maybe_owned(), Slice("abcde"));
2228 assert_eq!("abcde".into_maybe_owned(), Owned("abcde".to_owned()));
2229 assert_eq!(("abcde".to_owned()).into_maybe_owned(), Owned("abcde".to_owned()));
2236 use self::test::Bencher;
2241 fn char_iterator(b: &mut Bencher) {
2242 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2243 let len = s.char_len();
2245 b.iter(|| assert_eq!(s.chars().len(), len));
2249 fn char_iterator_ascii(b: &mut Bencher) {
2250 let s = "Mary had a little lamb, Little lamb
2251 Mary had a little lamb, Little lamb
2252 Mary had a little lamb, Little lamb
2253 Mary had a little lamb, Little lamb
2254 Mary had a little lamb, Little lamb
2255 Mary had a little lamb, Little lamb";
2256 let len = s.char_len();
2258 b.iter(|| assert_eq!(s.chars().len(), len));
2262 fn char_iterator_rev(b: &mut Bencher) {
2263 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2264 let len = s.char_len();
2266 b.iter(|| assert_eq!(s.chars().rev().len(), len));
2270 fn char_indicesator(b: &mut Bencher) {
2271 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2272 let len = s.char_len();
2274 b.iter(|| assert_eq!(s.char_indices().len(), len));
2278 fn char_indicesator_rev(b: &mut Bencher) {
2279 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2280 let len = s.char_len();
2282 b.iter(|| assert_eq!(s.char_indices().rev().len(), len));
2286 fn split_unicode_ascii(b: &mut Bencher) {
2287 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2289 b.iter(|| assert_eq!(s.split('V').len(), 3));
2293 fn split_unicode_not_ascii(b: &mut Bencher) {
2294 struct NotAscii(char);
2295 impl CharEq for NotAscii {
2296 fn matches(&mut self, c: char) -> bool {
2297 let NotAscii(cc) = *self;
2300 fn only_ascii(&self) -> bool { false }
2302 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2304 b.iter(|| assert_eq!(s.split(NotAscii('V')).len(), 3));
2309 fn split_ascii(b: &mut Bencher) {
2310 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2311 let len = s.split(' ').len();
2313 b.iter(|| assert_eq!(s.split(' ').len(), len));
2317 fn split_not_ascii(b: &mut Bencher) {
2318 struct NotAscii(char);
2319 impl CharEq for NotAscii {
2321 fn matches(&mut self, c: char) -> bool {
2322 let NotAscii(cc) = *self;
2325 fn only_ascii(&self) -> bool { false }
2327 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2328 let len = s.split(' ').len();
2330 b.iter(|| assert_eq!(s.split(NotAscii(' ')).len(), len));
2334 fn split_extern_fn(b: &mut Bencher) {
2335 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2336 let len = s.split(' ').len();
2337 fn pred(c: char) -> bool { c == ' ' }
2339 b.iter(|| assert_eq!(s.split(pred).len(), len));
2343 fn split_closure(b: &mut Bencher) {
2344 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2345 let len = s.split(' ').len();
2347 b.iter(|| assert_eq!(s.split(|c: char| c == ' ').len(), len));
2351 fn split_slice(b: &mut Bencher) {
2352 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2353 let len = s.split(' ').len();
2355 b.iter(|| assert_eq!(s.split(&[' ']).len(), len));
2359 fn is_utf8_100_ascii(b: &mut Bencher) {
2361 let s = bytes!("Hello there, the quick brown fox jumped over the lazy dog! \
2362 Lorem ipsum dolor sit amet, consectetur. ");
2364 assert_eq!(100, s.len());
2371 fn is_utf8_100_multibyte(b: &mut Bencher) {
2372 let s = bytes!("𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰");
2373 assert_eq!(100, s.len());
2380 fn from_utf8_lossy_100_ascii(b: &mut Bencher) {
2381 let s = bytes!("Hello there, the quick brown fox jumped over the lazy dog! \
2382 Lorem ipsum dolor sit amet, consectetur. ");
2384 assert_eq!(100, s.len());
2386 let _ = from_utf8_lossy(s);
2391 fn from_utf8_lossy_100_multibyte(b: &mut Bencher) {
2392 let s = bytes!("𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰");
2393 assert_eq!(100, s.len());
2395 let _ = from_utf8_lossy(s);
2400 fn from_utf8_lossy_invalid(b: &mut Bencher) {
2401 let s = bytes!("Hello", 0xC0, 0x80, " There", 0xE6, 0x83, " Goodbye");
2403 let _ = from_utf8_lossy(s);
2408 fn from_utf8_lossy_100_invalid(b: &mut Bencher) {
2409 let s = Vec::from_elem(100, 0xF5u8);
2411 let _ = from_utf8_lossy(s.as_slice());
2416 fn bench_connect(b: &mut Bencher) {
2417 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2419 let v = [s, s, s, s, s, s, s, s, s, s];
2421 assert_eq!(v.connect(sep).len(), s.len() * 10 + sep.len() * 9);
2426 fn bench_contains_short_short(b: &mut Bencher) {
2427 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2431 assert!(haystack.contains(needle));
2436 fn bench_contains_short_long(b: &mut Bencher) {
2438 Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \
2439 ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \
2440 eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \
2441 sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \
2442 tempus vel, gravida nec quam.
2444 In est dui, tincidunt sed tempus interdum, adipiscing laoreet ante. Etiam tempor, tellus quis \
2445 sagittis interdum, nulla purus mattis sem, quis auctor erat odio ac tellus. In nec nunc sit amet \
2446 diam volutpat molestie at sed ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan \
2447 lorem ac dignissim placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat. In vel \
2448 eleifend felis. Sed suscipit nulla lorem, sed mollis est sollicitudin et. Nam fermentum egestas \
2449 interdum. Curabitur ut nisi justo.
2451 Sed sollicitudin ipsum tellus, ut condimentum leo eleifend nec. Cras ut velit ante. Phasellus nec \
2452 mollis odio. Mauris molestie erat in arcu mattis, at aliquet dolor vehicula. Quisque malesuada \
2453 lectus sit amet nisi pretium, a condimentum ipsum porta. Morbi at dapibus diam. Praesent egestas \
2454 est sed risus elementum, eu rutrum metus ultrices. Etiam fermentum consectetur magna, id rutrum \
2455 felis accumsan a. Aliquam ut pellentesque libero. Sed mi nulla, lobortis eu tortor id, suscipit \
2456 ultricies neque. Morbi iaculis sit amet risus at iaculis. Praesent eget ligula quis turpis \
2457 feugiat suscipit vel non arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. \
2458 Aliquam sit amet placerat lorem.
2460 Cras a lacus vel ante posuere elementum. Nunc est leo, bibendum ut facilisis vel, bibendum at \
2461 mauris. Nullam adipiscing diam vel odio ornare, luctus adipiscing mi luctus. Nulla facilisi. \
2462 Mauris adipiscing bibendum neque, quis adipiscing lectus tempus et. Sed feugiat erat et nisl \
2463 lobortis pharetra. Donec vitae erat enim. Nullam sit amet felis et quam lacinia tincidunt. Aliquam \
2464 suscipit dapibus urna. Sed volutpat urna in magna pulvinar volutpat. Phasellus nec tellus ac diam \
2467 Nam lectus enim, dapibus non nisi tempor, consectetur convallis massa. Maecenas eleifend dictum \
2468 feugiat. Etiam quis mauris vel risus luctus mattis a a nunc. Nullam orci quam, imperdiet id \
2469 vehicula in, porttitor ut nibh. Duis sagittis adipiscing nisl vitae congue. Donec mollis risus eu \
2470 leo suscipit, varius porttitor nulla porta. Pellentesque ut sem nec nisi euismod vehicula. Nulla \
2471 malesuada sollicitudin quam eu fermentum.";
2472 let needle = "english";
2475 assert!(!haystack.contains(needle));
2480 fn bench_contains_bad_naive(b: &mut Bencher) {
2481 let haystack = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
2482 let needle = "aaaaaaaab";
2485 assert!(!haystack.contains(needle));
2490 fn bench_contains_equal(b: &mut Bencher) {
2491 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2492 let needle = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2495 assert!(haystack.contains(needle));