1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 // ignore-lexer-test FIXME #15679
15 Unicode string manipulation (`str` type)
19 Rust's string type is one of the core primitive types of the language. While
20 represented by the name `str`, the name `str` is not actually a valid type in
21 Rust. Each string must also be decorated with a pointer. `String` is used
22 for an owned string, so there is only one commonly-used `str` type in Rust:
25 `&str` is the borrowed string type. This type of string can only be created
26 from other strings, unless it is a static string (see below). As the word
27 "borrowed" implies, this type of string is owned elsewhere, and this string
28 cannot be moved out of.
30 As an example, here's some code that uses a string.
34 let borrowed_string = "This string is borrowed with the 'static lifetime";
38 From the example above, you can see that Rust's string literals have the
39 `'static` lifetime. This is akin to C's concept of a static string.
41 String literals are allocated statically in the rodata of the
42 executable/library. The string then has the type `&'static str` meaning that
43 the string is valid for the `'static` lifetime, otherwise known as the
44 lifetime of the entire program. As can be inferred from the type, these static
45 strings are not mutable.
49 Many languages have immutable strings by default, and Rust has a particular
50 flavor on this idea. As with the rest of Rust types, strings are immutable by
51 default. If a string is declared as `mut`, however, it may be mutated. This
52 works the same way as the rest of Rust's type system in the sense that if
53 there's a mutable reference to a string, there may only be one mutable reference
54 to that string. With these guarantees, strings can easily transition between
55 being mutable/immutable with the same benefits of having mutable strings in
60 Rust's string type, `str`, is a sequence of unicode scalar values encoded as a
61 stream of UTF-8 bytes. All strings are guaranteed to be validly encoded UTF-8
62 sequences. Additionally, strings are not null-terminated and can contain null
65 The actual representation of strings have direct mappings to vectors: `&str`
66 is the same as `&[u8]`.
70 #![doc(primitive = "str")]
74 use core::default::Default;
77 use core::iter::AdditiveIterator;
80 use {Collection, MutableSeq};
86 pub use core::str::{from_utf8, CharEq, Chars, CharOffsets};
87 pub use core::str::{Bytes, CharSplits};
88 pub use core::str::{CharSplitsN, AnyLines, MatchIndices, StrSplits};
89 pub use core::str::{eq_slice, is_utf8, is_utf16, Utf16Items};
90 pub use core::str::{Utf16Item, ScalarValue, LoneSurrogate, utf16_items};
91 pub use core::str::{truncate_utf16_at_nul, utf8_char_width, CharRange};
92 pub use core::str::{Str, StrSlice};
93 pub use unicode::str::{UnicodeStrSlice, Words, Graphemes, GraphemeIndices};
96 Section: Creating a string
99 /// Deprecated. Replaced by `String::from_utf8`
100 #[deprecated = "Replaced by `String::from_utf8`"]
101 pub fn from_utf8_owned(vv: Vec<u8>) -> Result<String, Vec<u8>> {
102 String::from_utf8(vv)
105 /// Deprecated. Replaced by `String::from_byte`
106 #[deprecated = "Replaced by String::from_byte"]
107 pub fn from_byte(b: u8) -> String {
109 String::from_char(1, b as char)
112 /// Deprecated. Use `String::from_char` or `char::to_string()` instead
113 #[deprecated = "use String::from_char or char.to_string()"]
114 pub fn from_char(ch: char) -> String {
115 String::from_char(1, ch)
118 /// Deprecated. Replaced by `String::from_chars`
119 #[deprecated = "use String::from_chars instead"]
120 pub fn from_chars(chs: &[char]) -> String {
121 chs.iter().map(|c| *c).collect()
124 /// Methods for vectors of strings
125 pub trait StrVector {
126 /// Concatenate a vector of strings.
131 /// let first = "Restaurant at the End of the".to_string();
132 /// let second = " Universe".to_string();
133 /// let string_vec = vec![first, second];
134 /// assert_eq!(string_vec.concat(), "Restaurant at the End of the Universe".to_string());
136 fn concat(&self) -> String;
138 /// Concatenate a vector of strings, placing a given separator between each.
143 /// let first = "Roast".to_string();
144 /// let second = "Sirloin Steak".to_string();
145 /// let string_vec = vec![first, second];
146 /// assert_eq!(string_vec.connect(", "), "Roast, Sirloin Steak".to_string());
148 fn connect(&self, sep: &str) -> String;
151 impl<'a, S: Str> StrVector for &'a [S] {
152 fn concat(&self) -> String {
154 return String::new();
157 // `len` calculation may overflow but push_str will check boundaries
158 let len = self.iter().map(|s| s.as_slice().len()).sum();
160 let mut result = String::with_capacity(len);
162 for s in self.iter() {
163 result.push_str(s.as_slice())
169 fn connect(&self, sep: &str) -> String {
171 return String::new();
176 return self.concat();
179 // this is wrong without the guarantee that `self` is non-empty
180 // `len` calculation may overflow but push_str but will check boundaries
181 let len = sep.len() * (self.len() - 1)
182 + self.iter().map(|s| s.as_slice().len()).sum();
183 let mut result = String::with_capacity(len);
184 let mut first = true;
186 for s in self.iter() {
190 result.push_str(sep);
192 result.push_str(s.as_slice());
198 impl<'a, S: Str> StrVector for Vec<S> {
200 fn concat(&self) -> String {
201 self.as_slice().concat()
205 fn connect(&self, sep: &str) -> String {
206 self.as_slice().connect(sep)
214 // Helper functions used for Unicode normalization
215 fn canonical_sort(comb: &mut [(char, u8)]) {
216 let len = comb.len();
217 for i in range(0, len) {
218 let mut swapped = false;
219 for j in range(1, len-i) {
220 let class_a = *comb[j-1].ref1();
221 let class_b = *comb[j].ref1();
222 if class_a != 0 && class_b != 0 && class_a > class_b {
227 if !swapped { break; }
232 enum DecompositionType {
237 /// External iterator for a string's decomposition's characters.
238 /// Use with the `std::iter` module.
240 pub struct Decompositions<'a> {
241 kind: DecompositionType,
243 buffer: Vec<(char, u8)>,
247 impl<'a> Iterator<char> for Decompositions<'a> {
249 fn next(&mut self) -> Option<char> {
250 match self.buffer.as_slice().head() {
256 Some(&(c, _)) if self.sorted => {
260 _ => self.sorted = false
263 let decomposer = match self.kind {
264 Canonical => unicode::char::decompose_canonical,
265 Compatible => unicode::char::decompose_compatible
269 for ch in self.iter {
270 let buffer = &mut self.buffer;
271 let sorted = &mut self.sorted;
273 let class = unicode::char::canonical_combining_class(d);
274 if class == 0 && !*sorted {
275 canonical_sort(buffer.as_mut_slice());
278 buffer.push((d, class));
285 canonical_sort(self.buffer.as_mut_slice());
289 match self.buffer.shift() {
294 Some((c, _)) => Some(c),
299 fn size_hint(&self) -> (uint, Option<uint>) {
300 let (lower, _) = self.iter.size_hint();
305 /// Replace all occurrences of one string with another
309 /// * s - The string containing substrings to replace
310 /// * from - The string to replace
311 /// * to - The replacement string
315 /// The original string with all occurrences of `from` replaced with `to`
321 /// let string = "orange";
322 /// let new_string = str::replace(string, "or", "str");
323 /// assert_eq!(new_string.as_slice(), "strange");
325 pub fn replace(s: &str, from: &str, to: &str) -> String {
326 let mut result = String::new();
327 let mut last_end = 0;
328 for (start, end) in s.match_indices(from) {
329 result.push_str(unsafe{raw::slice_bytes(s, last_end, start)});
333 result.push_str(unsafe{raw::slice_bytes(s, last_end, s.len())});
341 /// Deprecated. Use `String::from_utf16`.
342 #[deprecated = "Replaced by String::from_utf16"]
343 pub fn from_utf16(v: &[u16]) -> Option<String> {
344 String::from_utf16(v)
347 /// Deprecated. Use `String::from_utf16_lossy`.
348 #[deprecated = "Replaced by String::from_utf16_lossy"]
349 pub fn from_utf16_lossy(v: &[u16]) -> String {
350 String::from_utf16_lossy(v)
353 // Return the initial codepoint accumulator for the first byte.
354 // The first byte is special, only want bottom 5 bits for width 2, 4 bits
355 // for width 3, and 3 bits for width 4
356 macro_rules! utf8_first_byte(
357 ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as u32)
360 // return the value of $ch updated with continuation byte $byte
361 macro_rules! utf8_acc_cont_byte(
362 ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as u32)
365 /// Deprecated. Use `String::from_utf8_lossy`.
366 #[deprecated = "Replaced by String::from_utf8_lossy"]
367 pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> MaybeOwned<'a> {
368 String::from_utf8_lossy(v)
375 /// A `MaybeOwned` is a string that can hold either a `String` or a `&str`.
376 /// This can be useful as an optimization when an allocation is sometimes
377 /// needed but not always.
378 pub enum MaybeOwned<'a> {
379 /// A borrowed string
385 /// `SendStr` is a specialization of `MaybeOwned` to be sendable
386 pub type SendStr = MaybeOwned<'static>;
388 impl<'a> MaybeOwned<'a> {
389 /// Returns `true` if this `MaybeOwned` wraps an owned string
394 /// let string = String::from_str("orange");
395 /// let maybe_owned_string = string.into_maybe_owned();
396 /// assert_eq!(true, maybe_owned_string.is_owned());
399 pub fn is_owned(&self) -> bool {
406 /// Returns `true` if this `MaybeOwned` wraps a borrowed string
411 /// let string = "orange";
412 /// let maybe_owned_string = string.as_slice().into_maybe_owned();
413 /// assert_eq!(true, maybe_owned_string.is_slice());
416 pub fn is_slice(&self) -> bool {
424 /// Trait for moving into a `MaybeOwned`
425 pub trait IntoMaybeOwned<'a> {
426 /// Moves self into a `MaybeOwned`
427 fn into_maybe_owned(self) -> MaybeOwned<'a>;
433 /// let owned_string = String::from_str("orange");
434 /// let maybe_owned_string = owned_string.into_maybe_owned();
435 /// assert_eq!(true, maybe_owned_string.is_owned());
437 impl<'a> IntoMaybeOwned<'a> for String {
439 fn into_maybe_owned(self) -> MaybeOwned<'a> {
447 /// let string = "orange";
448 /// let maybe_owned_str = string.as_slice().into_maybe_owned();
449 /// assert_eq!(false, maybe_owned_str.is_owned());
451 impl<'a> IntoMaybeOwned<'a> for &'a str {
453 fn into_maybe_owned(self) -> MaybeOwned<'a> { Slice(self) }
459 /// let str = "orange";
460 /// let maybe_owned_str = str.as_slice().into_maybe_owned();
461 /// let maybe_maybe_owned_str = maybe_owned_str.into_maybe_owned();
462 /// assert_eq!(false, maybe_maybe_owned_str.is_owned());
464 impl<'a> IntoMaybeOwned<'a> for MaybeOwned<'a> {
466 fn into_maybe_owned(self) -> MaybeOwned<'a> { self }
469 impl<'a> PartialEq for MaybeOwned<'a> {
471 fn eq(&self, other: &MaybeOwned) -> bool {
472 self.as_slice() == other.as_slice()
476 impl<'a> Eq for MaybeOwned<'a> {}
478 impl<'a> PartialOrd for MaybeOwned<'a> {
480 fn partial_cmp(&self, other: &MaybeOwned) -> Option<Ordering> {
481 Some(self.cmp(other))
485 impl<'a> Ord for MaybeOwned<'a> {
487 fn cmp(&self, other: &MaybeOwned) -> Ordering {
488 self.as_slice().cmp(&other.as_slice())
492 impl<'a, S: Str> Equiv<S> for MaybeOwned<'a> {
494 fn equiv(&self, other: &S) -> bool {
495 self.as_slice() == other.as_slice()
499 impl<'a> Str for MaybeOwned<'a> {
501 fn as_slice<'b>(&'b self) -> &'b str {
504 Owned(ref s) => s.as_slice()
509 impl<'a> StrAllocating for MaybeOwned<'a> {
511 fn into_string(self) -> String {
513 Slice(s) => String::from_str(s),
519 impl<'a> Collection for MaybeOwned<'a> {
521 fn len(&self) -> uint { self.as_slice().len() }
524 impl<'a> Clone for MaybeOwned<'a> {
526 fn clone(&self) -> MaybeOwned<'a> {
528 Slice(s) => Slice(s),
529 Owned(ref s) => Owned(String::from_str(s.as_slice()))
534 impl<'a> Default for MaybeOwned<'a> {
536 fn default() -> MaybeOwned<'a> { Slice("") }
539 impl<'a, H: hash::Writer> hash::Hash<H> for MaybeOwned<'a> {
541 fn hash(&self, hasher: &mut H) {
542 self.as_slice().hash(hasher)
546 impl<'a> fmt::Show for MaybeOwned<'a> {
548 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
550 Slice(ref s) => s.fmt(f),
551 Owned(ref s) => s.fmt(f)
556 /// Unsafe operations
558 use core::prelude::*;
560 use core::raw::Slice;
567 pub use core::str::raw::{from_utf8, c_str_to_static_slice, slice_bytes};
568 pub use core::str::raw::{slice_unchecked};
570 /// Create a Rust string from a *u8 buffer of the given length
571 pub unsafe fn from_buf_len(buf: *const u8, len: uint) -> String {
572 let mut result = String::new();
573 result.push_bytes(mem::transmute(Slice {
580 /// Create a Rust string from a null-terminated C string
581 pub unsafe fn from_c_str(c_string: *const i8) -> String {
582 let mut buf = String::new();
584 while *c_string.offset(len) != 0 {
587 buf.push_bytes(mem::transmute(Slice {
594 /// Converts an owned vector of bytes to a new owned string. This assumes
595 /// that the utf-8-ness of the vector has already been validated
597 pub unsafe fn from_utf8_owned(v: Vec<u8>) -> String {
601 /// Converts a byte to a string.
602 pub unsafe fn from_byte(u: u8) -> String {
603 from_utf8_owned(vec![u])
606 /// Sets the length of a string
608 /// This will explicitly set the size of the string, without actually
609 /// modifying its buffers, so it is up to the caller to ensure that
610 /// the string is actually the specified size.
612 fn test_from_buf_len() {
613 use slice::ImmutableVector;
616 let a = vec![65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8];
618 let c = from_buf_len(b, 3u);
619 assert_eq!(c, String::from_str("AAA"));
625 Section: Trait implementations
628 /// Any string that can be represented as a slice
629 pub trait StrAllocating: Str {
630 /// Convert `self` into a `String`, not making a copy if possible.
631 fn into_string(self) -> String;
633 #[allow(missing_doc)]
634 #[deprecated = "replaced by .into_string()"]
635 fn into_owned(self) -> String {
639 /// Escape each char in `s` with `char::escape_default`.
640 fn escape_default(&self) -> String {
641 let me = self.as_slice();
642 let mut out = String::with_capacity(me.len());
643 for c in me.chars() {
644 c.escape_default(|c| out.push_char(c));
649 /// Escape each char in `s` with `char::escape_unicode`.
650 fn escape_unicode(&self) -> String {
651 let me = self.as_slice();
652 let mut out = String::with_capacity(me.len());
653 for c in me.chars() {
654 c.escape_unicode(|c| out.push_char(c));
659 /// Replace all occurrences of one string with another.
663 /// * `from` - The string to replace
664 /// * `to` - The replacement string
668 /// The original string with all occurrences of `from` replaced with `to`.
673 /// let s = "Do you know the muffin man,
674 /// The muffin man, the muffin man, ...".to_string();
676 /// assert_eq!(s.replace("muffin man", "little lamb"),
677 /// "Do you know the little lamb,
678 /// The little lamb, the little lamb, ...".to_string());
680 /// // not found, so no change.
681 /// assert_eq!(s.replace("cookie monster", "little lamb"), s);
683 fn replace(&self, from: &str, to: &str) -> String {
684 let me = self.as_slice();
685 let mut result = String::new();
686 let mut last_end = 0;
687 for (start, end) in me.match_indices(from) {
688 result.push_str(unsafe{raw::slice_bytes(me, last_end, start)});
692 result.push_str(unsafe{raw::slice_bytes(me, last_end, me.len())});
696 #[allow(missing_doc)]
697 #[deprecated = "obsolete, use `to_string`"]
699 fn to_owned(&self) -> String {
701 mem::transmute(Vec::from_slice(self.as_slice().as_bytes()))
705 /// Converts to a vector of `u16` encoded as UTF-16.
706 #[deprecated = "use `utf16_units` instead"]
707 fn to_utf16(&self) -> Vec<u16> {
708 self.as_slice().utf16_units().collect::<Vec<u16>>()
711 /// Given a string, make a new string with repeated copies of it.
712 fn repeat(&self, nn: uint) -> String {
713 let me = self.as_slice();
714 let mut ret = String::with_capacity(nn * me.len());
715 for _ in range(0, nn) {
721 /// Levenshtein Distance between two strings.
722 fn lev_distance(&self, t: &str) -> uint {
723 let me = self.as_slice();
727 if slen == 0 { return tlen; }
728 if tlen == 0 { return slen; }
730 let mut dcol = Vec::from_fn(tlen + 1, |x| x);
732 for (i, sc) in me.chars().enumerate() {
735 *dcol.get_mut(0) = current + 1;
737 for (j, tc) in t.chars().enumerate() {
739 let next = *dcol.get(j + 1);
742 *dcol.get_mut(j + 1) = current;
744 *dcol.get_mut(j + 1) = cmp::min(current, next);
745 *dcol.get_mut(j + 1) = cmp::min(*dcol.get(j + 1),
753 return *dcol.get(tlen);
756 /// An Iterator over the string in Unicode Normalization Form D
757 /// (canonical decomposition).
759 fn nfd_chars<'a>(&'a self) -> Decompositions<'a> {
761 iter: self.as_slice().chars(),
768 /// An Iterator over the string in Unicode Normalization Form KD
769 /// (compatibility decomposition).
771 fn nfkd_chars<'a>(&'a self) -> Decompositions<'a> {
773 iter: self.as_slice().chars(),
781 impl<'a> StrAllocating for &'a str {
783 fn into_string(self) -> String {
784 String::from_str(self)
788 /// Methods for owned strings
790 /// Consumes the string, returning the underlying byte buffer.
792 /// The buffer does not have a null terminator.
793 fn into_bytes(self) -> Vec<u8>;
795 /// Pushes the given string onto this string, returning the concatenation of the two strings.
796 fn append(self, rhs: &str) -> String;
799 impl OwnedStr for String {
801 fn into_bytes(self) -> Vec<u8> {
802 unsafe { mem::transmute(self) }
806 fn append(mut self, rhs: &str) -> String {
814 use std::iter::AdditiveIterator;
815 use std::iter::range;
816 use std::default::Default;
818 use std::clone::Clone;
819 use std::cmp::{Equal, Greater, Less, Ord, PartialOrd, Equiv};
820 use std::option::{Some, None};
821 use std::ptr::RawPtr;
822 use std::iter::{Iterator, DoubleEndedIterator};
823 use {Collection, MutableSeq};
826 use std::slice::{Vector, ImmutableVector};
830 use unicode::char::UnicodeChar;
834 assert!((eq_slice("foobar".slice(0, 3), "foo")));
835 assert!((eq_slice("barfoo".slice(3, 6), "foo")));
836 assert!((!eq_slice("foo1", "foo2")));
842 assert!("" <= "foo");
843 assert!("foo" <= "foo");
844 assert!("foo" != "bar");
849 assert_eq!("".len(), 0u);
850 assert_eq!("hello world".len(), 11u);
851 assert_eq!("\x63".len(), 1u);
852 assert_eq!("\xa2".len(), 2u);
853 assert_eq!("\u03c0".len(), 2u);
854 assert_eq!("\u2620".len(), 3u);
855 assert_eq!("\U0001d11e".len(), 4u);
857 assert_eq!("".char_len(), 0u);
858 assert_eq!("hello world".char_len(), 11u);
859 assert_eq!("\x63".char_len(), 1u);
860 assert_eq!("\xa2".char_len(), 1u);
861 assert_eq!("\u03c0".char_len(), 1u);
862 assert_eq!("\u2620".char_len(), 1u);
863 assert_eq!("\U0001d11e".char_len(), 1u);
864 assert_eq!("ประเทศไทย中华Việt Nam".char_len(), 19u);
866 assert_eq!("hello".width(false), 10u);
867 assert_eq!("hello".width(true), 10u);
868 assert_eq!("\0\0\0\0\0".width(false), 0u);
869 assert_eq!("\0\0\0\0\0".width(true), 0u);
870 assert_eq!("".width(false), 0u);
871 assert_eq!("".width(true), 0u);
872 assert_eq!("\u2081\u2082\u2083\u2084".width(false), 4u);
873 assert_eq!("\u2081\u2082\u2083\u2084".width(true), 8u);
878 assert_eq!("hello".find('l'), Some(2u));
879 assert_eq!("hello".find(|c:char| c == 'o'), Some(4u));
880 assert!("hello".find('x').is_none());
881 assert!("hello".find(|c:char| c == 'x').is_none());
882 assert_eq!("ประเทศไทย中华Việt Nam".find('华'), Some(30u));
883 assert_eq!("ประเทศไทย中华Việt Nam".find(|c: char| c == '华'), Some(30u));
888 assert_eq!("hello".rfind('l'), Some(3u));
889 assert_eq!("hello".rfind(|c:char| c == 'o'), Some(4u));
890 assert!("hello".rfind('x').is_none());
891 assert!("hello".rfind(|c:char| c == 'x').is_none());
892 assert_eq!("ประเทศไทย中华Việt Nam".rfind('华'), Some(30u));
893 assert_eq!("ประเทศไทย中华Việt Nam".rfind(|c: char| c == '华'), Some(30u));
898 let empty = String::from_str("");
899 let s: String = empty.as_slice().chars().collect();
900 assert_eq!(empty, s);
901 let data = String::from_str("ประเทศไทย中");
902 let s: String = data.as_slice().chars().collect();
907 fn test_into_bytes() {
908 let data = String::from_str("asdf");
909 let buf = data.into_bytes();
910 assert_eq!(b"asdf", buf.as_slice());
916 assert_eq!("".find_str(""), Some(0u));
917 assert!("banana".find_str("apple pie").is_none());
920 assert_eq!(data.slice(0u, 6u).find_str("ab"), Some(0u));
921 assert_eq!(data.slice(2u, 6u).find_str("ab"), Some(3u - 2u));
922 assert!(data.slice(2u, 4u).find_str("ab").is_none());
924 let string = "ประเทศไทย中华Việt Nam";
925 let mut data = String::from_str(string);
926 data.push_str(string);
927 assert!(data.as_slice().find_str("ไท华").is_none());
928 assert_eq!(data.as_slice().slice(0u, 43u).find_str(""), Some(0u));
929 assert_eq!(data.as_slice().slice(6u, 43u).find_str(""), Some(6u - 6u));
931 assert_eq!(data.as_slice().slice(0u, 43u).find_str("ประ"), Some( 0u));
932 assert_eq!(data.as_slice().slice(0u, 43u).find_str("ทศไ"), Some(12u));
933 assert_eq!(data.as_slice().slice(0u, 43u).find_str("ย中"), Some(24u));
934 assert_eq!(data.as_slice().slice(0u, 43u).find_str("iệt"), Some(34u));
935 assert_eq!(data.as_slice().slice(0u, 43u).find_str("Nam"), Some(40u));
937 assert_eq!(data.as_slice().slice(43u, 86u).find_str("ประ"), Some(43u - 43u));
938 assert_eq!(data.as_slice().slice(43u, 86u).find_str("ทศไ"), Some(55u - 43u));
939 assert_eq!(data.as_slice().slice(43u, 86u).find_str("ย中"), Some(67u - 43u));
940 assert_eq!(data.as_slice().slice(43u, 86u).find_str("iệt"), Some(77u - 43u));
941 assert_eq!(data.as_slice().slice(43u, 86u).find_str("Nam"), Some(83u - 43u));
945 fn test_slice_chars() {
946 fn t(a: &str, b: &str, start: uint) {
947 assert_eq!(a.slice_chars(start, start + b.char_len()), b);
950 t("hello", "llo", 2);
954 assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".slice_chars(2, 8));
959 fn t(v: &[String], s: &str) {
960 assert_eq!(v.concat().as_slice(), s);
962 t([String::from_str("you"), String::from_str("know"),
963 String::from_str("I'm"),
964 String::from_str("no"), String::from_str("good")],
966 let v: &[String] = [];
968 t([String::from_str("hi")], "hi");
973 fn t(v: &[String], sep: &str, s: &str) {
974 assert_eq!(v.connect(sep).as_slice(), s);
976 t([String::from_str("you"), String::from_str("know"),
977 String::from_str("I'm"),
978 String::from_str("no"), String::from_str("good")],
979 " ", "you know I'm no good");
980 let v: &[String] = [];
982 t([String::from_str("hi")], " ", "hi");
986 fn test_concat_slices() {
987 fn t(v: &[&str], s: &str) {
988 assert_eq!(v.concat().as_slice(), s);
990 t(["you", "know", "I'm", "no", "good"], "youknowI'mnogood");
997 fn test_connect_slices() {
998 fn t(v: &[&str], sep: &str, s: &str) {
999 assert_eq!(v.connect(sep).as_slice(), s);
1001 t(["you", "know", "I'm", "no", "good"],
1002 " ", "you know I'm no good");
1004 t(["hi"], " ", "hi");
1009 assert_eq!("x".repeat(4), String::from_str("xxxx"));
1010 assert_eq!("hi".repeat(4), String::from_str("hihihihi"));
1011 assert_eq!("ไท华".repeat(3), String::from_str("ไท华ไท华ไท华"));
1012 assert_eq!("".repeat(4), String::from_str(""));
1013 assert_eq!("hi".repeat(0), String::from_str(""));
1017 fn test_unsafe_slice() {
1018 assert_eq!("ab", unsafe {raw::slice_bytes("abc", 0, 2)});
1019 assert_eq!("bc", unsafe {raw::slice_bytes("abc", 1, 3)});
1020 assert_eq!("", unsafe {raw::slice_bytes("abc", 1, 1)});
1021 fn a_million_letter_a() -> String {
1023 let mut rs = String::new();
1025 rs.push_str("aaaaaaaaaa");
1030 fn half_a_million_letter_a() -> String {
1032 let mut rs = String::new();
1034 rs.push_str("aaaaa");
1039 let letters = a_million_letter_a();
1040 assert!(half_a_million_letter_a() ==
1041 unsafe {String::from_str(raw::slice_bytes(letters.as_slice(),
1047 fn test_starts_with() {
1048 assert!(("".starts_with("")));
1049 assert!(("abc".starts_with("")));
1050 assert!(("abc".starts_with("a")));
1051 assert!((!"a".starts_with("abc")));
1052 assert!((!"".starts_with("abc")));
1053 assert!((!"ödd".starts_with("-")));
1054 assert!(("ödd".starts_with("öd")));
1058 fn test_ends_with() {
1059 assert!(("".ends_with("")));
1060 assert!(("abc".ends_with("")));
1061 assert!(("abc".ends_with("c")));
1062 assert!((!"a".ends_with("abc")));
1063 assert!((!"".ends_with("abc")));
1064 assert!((!"ddö".ends_with("-")));
1065 assert!(("ddö".ends_with("dö")));
1069 fn test_is_empty() {
1070 assert!("".is_empty());
1071 assert!(!"a".is_empty());
1077 assert_eq!("".replace(a, "b"), String::from_str(""));
1078 assert_eq!("a".replace(a, "b"), String::from_str("b"));
1079 assert_eq!("ab".replace(a, "b"), String::from_str("bb"));
1081 assert!(" test test ".replace(test, "toast") ==
1082 String::from_str(" toast toast "));
1083 assert_eq!(" test test ".replace(test, ""), String::from_str(" "));
1087 fn test_replace_2a() {
1088 let data = "ประเทศไทย中华";
1089 let repl = "دولة الكويت";
1092 let a2 = "دولة الكويتทศไทย中华";
1093 assert_eq!(data.replace(a, repl).as_slice(), a2);
1097 fn test_replace_2b() {
1098 let data = "ประเทศไทย中华";
1099 let repl = "دولة الكويت";
1102 let b2 = "ปรدولة الكويتทศไทย中华";
1103 assert_eq!(data.replace(b, repl).as_slice(), b2);
1107 fn test_replace_2c() {
1108 let data = "ประเทศไทย中华";
1109 let repl = "دولة الكويت";
1112 let c2 = "ประเทศไทยدولة الكويت";
1113 assert_eq!(data.replace(c, repl).as_slice(), c2);
1117 fn test_replace_2d() {
1118 let data = "ประเทศไทย中华";
1119 let repl = "دولة الكويت";
1122 assert_eq!(data.replace(d, repl).as_slice(), data);
1127 assert_eq!("ab", "abc".slice(0, 2));
1128 assert_eq!("bc", "abc".slice(1, 3));
1129 assert_eq!("", "abc".slice(1, 1));
1130 assert_eq!("\u65e5", "\u65e5\u672c".slice(0, 3));
1132 let data = "ประเทศไทย中华";
1133 assert_eq!("ป", data.slice(0, 3));
1134 assert_eq!("ร", data.slice(3, 6));
1135 assert_eq!("", data.slice(3, 3));
1136 assert_eq!("华", data.slice(30, 33));
1138 fn a_million_letter_x() -> String {
1140 let mut rs = String::new();
1142 rs.push_str("华华华华华华华华华华");
1147 fn half_a_million_letter_x() -> String {
1149 let mut rs = String::new();
1151 rs.push_str("华华华华华");
1156 let letters = a_million_letter_x();
1157 assert!(half_a_million_letter_x() ==
1158 String::from_str(letters.as_slice().slice(0u, 3u * 500000u)));
1163 let ss = "中华Việt Nam";
1165 assert_eq!("华", ss.slice(3u, 6u));
1166 assert_eq!("Việt Nam", ss.slice(6u, 16u));
1168 assert_eq!("ab", "abc".slice(0u, 2u));
1169 assert_eq!("bc", "abc".slice(1u, 3u));
1170 assert_eq!("", "abc".slice(1u, 1u));
1172 assert_eq!("中", ss.slice(0u, 3u));
1173 assert_eq!("华V", ss.slice(3u, 7u));
1174 assert_eq!("", ss.slice(3u, 3u));
1189 fn test_slice_fail() {
1190 "中华Việt Nam".slice(0u, 2u);
1194 fn test_slice_from() {
1195 assert_eq!("abcd".slice_from(0), "abcd");
1196 assert_eq!("abcd".slice_from(2), "cd");
1197 assert_eq!("abcd".slice_from(4), "");
1200 fn test_slice_to() {
1201 assert_eq!("abcd".slice_to(0), "");
1202 assert_eq!("abcd".slice_to(2), "ab");
1203 assert_eq!("abcd".slice_to(4), "abcd");
1207 fn test_trim_left_chars() {
1208 let v: &[char] = &[];
1209 assert_eq!(" *** foo *** ".trim_left_chars(v), " *** foo *** ");
1210 assert_eq!(" *** foo *** ".trim_left_chars(&['*', ' ']), "foo *** ");
1211 assert_eq!(" *** *** ".trim_left_chars(&['*', ' ']), "");
1212 assert_eq!("foo *** ".trim_left_chars(&['*', ' ']), "foo *** ");
1214 assert_eq!("11foo1bar11".trim_left_chars('1'), "foo1bar11");
1215 assert_eq!("12foo1bar12".trim_left_chars(&['1', '2']), "foo1bar12");
1216 assert_eq!("123foo1bar123".trim_left_chars(|c: char| c.is_digit()), "foo1bar123");
1220 fn test_trim_right_chars() {
1221 let v: &[char] = &[];
1222 assert_eq!(" *** foo *** ".trim_right_chars(v), " *** foo *** ");
1223 assert_eq!(" *** foo *** ".trim_right_chars(&['*', ' ']), " *** foo");
1224 assert_eq!(" *** *** ".trim_right_chars(&['*', ' ']), "");
1225 assert_eq!(" *** foo".trim_right_chars(&['*', ' ']), " *** foo");
1227 assert_eq!("11foo1bar11".trim_right_chars('1'), "11foo1bar");
1228 assert_eq!("12foo1bar12".trim_right_chars(&['1', '2']), "12foo1bar");
1229 assert_eq!("123foo1bar123".trim_right_chars(|c: char| c.is_digit()), "123foo1bar");
1233 fn test_trim_chars() {
1234 let v: &[char] = &[];
1235 assert_eq!(" *** foo *** ".trim_chars(v), " *** foo *** ");
1236 assert_eq!(" *** foo *** ".trim_chars(&['*', ' ']), "foo");
1237 assert_eq!(" *** *** ".trim_chars(&['*', ' ']), "");
1238 assert_eq!("foo".trim_chars(&['*', ' ']), "foo");
1240 assert_eq!("11foo1bar11".trim_chars('1'), "foo1bar");
1241 assert_eq!("12foo1bar12".trim_chars(&['1', '2']), "foo1bar");
1242 assert_eq!("123foo1bar123".trim_chars(|c: char| c.is_digit()), "foo1bar");
1246 fn test_trim_left() {
1247 assert_eq!("".trim_left(), "");
1248 assert_eq!("a".trim_left(), "a");
1249 assert_eq!(" ".trim_left(), "");
1250 assert_eq!(" blah".trim_left(), "blah");
1251 assert_eq!(" \u3000 wut".trim_left(), "wut");
1252 assert_eq!("hey ".trim_left(), "hey ");
1256 fn test_trim_right() {
1257 assert_eq!("".trim_right(), "");
1258 assert_eq!("a".trim_right(), "a");
1259 assert_eq!(" ".trim_right(), "");
1260 assert_eq!("blah ".trim_right(), "blah");
1261 assert_eq!("wut \u3000 ".trim_right(), "wut");
1262 assert_eq!(" hey".trim_right(), " hey");
1267 assert_eq!("".trim(), "");
1268 assert_eq!("a".trim(), "a");
1269 assert_eq!(" ".trim(), "");
1270 assert_eq!(" blah ".trim(), "blah");
1271 assert_eq!("\nwut \u3000 ".trim(), "wut");
1272 assert_eq!(" hey dude ".trim(), "hey dude");
1276 fn test_is_whitespace() {
1277 assert!("".is_whitespace());
1278 assert!(" ".is_whitespace());
1279 assert!("\u2009".is_whitespace()); // Thin space
1280 assert!(" \n\t ".is_whitespace());
1281 assert!(!" _ ".is_whitespace());
1285 fn test_slice_shift_char() {
1286 let data = "ประเทศไทย中";
1287 assert_eq!(data.slice_shift_char(), (Some('ป'), "ระเทศไทย中"));
1291 fn test_slice_shift_char_2() {
1293 assert_eq!(empty.slice_shift_char(), (None, ""));
1298 // deny overlong encodings
1299 assert!(!is_utf8([0xc0, 0x80]));
1300 assert!(!is_utf8([0xc0, 0xae]));
1301 assert!(!is_utf8([0xe0, 0x80, 0x80]));
1302 assert!(!is_utf8([0xe0, 0x80, 0xaf]));
1303 assert!(!is_utf8([0xe0, 0x81, 0x81]));
1304 assert!(!is_utf8([0xf0, 0x82, 0x82, 0xac]));
1305 assert!(!is_utf8([0xf4, 0x90, 0x80, 0x80]));
1308 assert!(!is_utf8([0xED, 0xA0, 0x80]));
1309 assert!(!is_utf8([0xED, 0xBF, 0xBF]));
1311 assert!(is_utf8([0xC2, 0x80]));
1312 assert!(is_utf8([0xDF, 0xBF]));
1313 assert!(is_utf8([0xE0, 0xA0, 0x80]));
1314 assert!(is_utf8([0xED, 0x9F, 0xBF]));
1315 assert!(is_utf8([0xEE, 0x80, 0x80]));
1316 assert!(is_utf8([0xEF, 0xBF, 0xBF]));
1317 assert!(is_utf8([0xF0, 0x90, 0x80, 0x80]));
1318 assert!(is_utf8([0xF4, 0x8F, 0xBF, 0xBF]));
1322 fn test_is_utf16() {
1323 macro_rules! pos ( ($($e:expr),*) => { { $(assert!(is_utf16($e));)* } });
1331 // surrogate pairs (randomly generated with Python 3's
1332 // .encode('utf-16be'))
1333 pos!([0xdb54, 0xdf16, 0xd880, 0xdee0, 0xdb6a, 0xdd45],
1334 [0xd91f, 0xdeb1, 0xdb31, 0xdd84, 0xd8e2, 0xde14],
1335 [0xdb9f, 0xdc26, 0xdb6f, 0xde58, 0xd850, 0xdfae]);
1337 // mixtures (also random)
1338 pos!([0xd921, 0xdcc2, 0x002d, 0x004d, 0xdb32, 0xdf65],
1339 [0xdb45, 0xdd2d, 0x006a, 0xdacd, 0xddfe, 0x0006],
1340 [0x0067, 0xd8ff, 0xddb7, 0x000f, 0xd900, 0xdc80]);
1343 macro_rules! neg ( ($($e:expr),*) => { { $(assert!(!is_utf16($e));)* } });
1346 // surrogate + regular unit
1348 // surrogate + lead surrogate
1350 // unterminated surrogate
1352 // trail surrogate without a lead
1355 // random byte sequences that Python 3's .decode('utf-16be')
1357 neg!([0x5b3d, 0x0141, 0xde9e, 0x8fdc, 0xc6e7],
1358 [0xdf5a, 0x82a5, 0x62b9, 0xb447, 0x92f3],
1359 [0xda4e, 0x42bc, 0x4462, 0xee98, 0xc2ca],
1360 [0xbe00, 0xb04a, 0x6ecb, 0xdd89, 0xe278],
1361 [0x0465, 0xab56, 0xdbb6, 0xa893, 0x665e],
1362 [0x6b7f, 0x0a19, 0x40f4, 0xa657, 0xdcc5],
1363 [0x9b50, 0xda5e, 0x24ec, 0x03ad, 0x6dee],
1364 [0x8d17, 0xcaa7, 0xf4ae, 0xdf6e, 0xbed7],
1365 [0xdaee, 0x2584, 0x7d30, 0xa626, 0x121a],
1366 [0xd956, 0x4b43, 0x7570, 0xccd6, 0x4f4a],
1367 [0x9dcf, 0x1b49, 0x4ba5, 0xfce9, 0xdffe],
1368 [0x6572, 0xce53, 0xb05a, 0xf6af, 0xdacf],
1369 [0x1b90, 0x728c, 0x9906, 0xdb68, 0xf46e],
1370 [0x1606, 0xbeca, 0xbe76, 0x860f, 0xdfa5],
1371 [0x8b4f, 0xde7a, 0xd220, 0x9fac, 0x2b6f],
1372 [0xb8fe, 0xebbe, 0xda32, 0x1a5f, 0x8b8b],
1373 [0x934b, 0x8956, 0xc434, 0x1881, 0xddf7],
1374 [0x5a95, 0x13fc, 0xf116, 0xd89b, 0x93f9],
1375 [0xd640, 0x71f1, 0xdd7d, 0x77eb, 0x1cd8],
1376 [0x348b, 0xaef0, 0xdb2c, 0xebf1, 0x1282],
1377 [0x50d7, 0xd824, 0x5010, 0xb369, 0x22ea]);
1381 fn test_raw_from_c_str() {
1383 let a = vec![65, 65, 65, 65, 65, 65, 65, 0];
1385 let c = raw::from_c_str(b);
1386 assert_eq!(c, String::from_str("AAAAAAA"));
1391 fn test_as_bytes() {
1394 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1395 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1398 assert_eq!("".as_bytes(), &[]);
1399 assert_eq!("abc".as_bytes(), &['a' as u8, 'b' as u8, 'c' as u8]);
1400 assert_eq!("ศไทย中华Việt Nam".as_bytes(), v.as_slice());
1405 fn test_as_bytes_fail() {
1406 // Don't double free. (I'm not sure if this exercises the
1407 // original problem code path anymore.)
1408 let s = String::from_str("");
1409 let _bytes = s.as_bytes();
1415 let buf = "hello".as_ptr();
1417 assert_eq!(*buf.offset(0), 'h' as u8);
1418 assert_eq!(*buf.offset(1), 'e' as u8);
1419 assert_eq!(*buf.offset(2), 'l' as u8);
1420 assert_eq!(*buf.offset(3), 'l' as u8);
1421 assert_eq!(*buf.offset(4), 'o' as u8);
1426 fn test_subslice_offset() {
1427 let a = "kernelsprite";
1428 let b = a.slice(7, a.len());
1429 let c = a.slice(0, a.len() - 6);
1430 assert_eq!(a.subslice_offset(b), 7);
1431 assert_eq!(a.subslice_offset(c), 0);
1433 let string = "a\nb\nc";
1434 let lines: Vec<&str> = string.lines().collect();
1435 let lines = lines.as_slice();
1436 assert_eq!(string.subslice_offset(lines[0]), 0);
1437 assert_eq!(string.subslice_offset(lines[1]), 2);
1438 assert_eq!(string.subslice_offset(lines[2]), 4);
1443 fn test_subslice_offset_2() {
1444 let a = "alchemiter";
1445 let b = "cruxtruder";
1446 a.subslice_offset(b);
1450 fn vec_str_conversions() {
1451 let s1: String = String::from_str("All mimsy were the borogoves");
1453 let v: Vec<u8> = Vec::from_slice(s1.as_bytes());
1454 let s2: String = String::from_str(from_utf8(v.as_slice()).unwrap());
1455 let mut i: uint = 0u;
1456 let n1: uint = s1.len();
1457 let n2: uint = v.len();
1460 let a: u8 = s1.as_bytes()[i];
1461 let b: u8 = s2.as_bytes()[i];
1470 fn test_contains() {
1471 assert!("abcde".contains("bcd"));
1472 assert!("abcde".contains("abcd"));
1473 assert!("abcde".contains("bcde"));
1474 assert!("abcde".contains(""));
1475 assert!("".contains(""));
1476 assert!(!"abcde".contains("def"));
1477 assert!(!"".contains("a"));
1479 let data = "ประเทศไทย中华Việt Nam";
1480 assert!(data.contains("ประเ"));
1481 assert!(data.contains("ะเ"));
1482 assert!(data.contains("中华"));
1483 assert!(!data.contains("ไท华"));
1487 fn test_contains_char() {
1488 assert!("abc".contains_char('b'));
1489 assert!("a".contains_char('a'));
1490 assert!(!"abc".contains_char('d'));
1491 assert!(!"".contains_char('a'));
1495 fn test_truncate_utf16_at_nul() {
1497 assert_eq!(truncate_utf16_at_nul(v), &[]);
1500 assert_eq!(truncate_utf16_at_nul(v), &[]);
1503 assert_eq!(truncate_utf16_at_nul(v), &[1]);
1506 assert_eq!(truncate_utf16_at_nul(v), &[1, 2]);
1509 assert_eq!(truncate_utf16_at_nul(v), &[1, 2, 3]);
1514 let s = "ศไทย中华Việt Nam";
1515 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1517 for ch in v.iter() {
1518 assert!(s.char_at(pos) == *ch);
1519 pos += String::from_char(1, *ch).len();
1524 fn test_char_at_reverse() {
1525 let s = "ศไทย中华Việt Nam";
1526 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1527 let mut pos = s.len();
1528 for ch in v.iter().rev() {
1529 assert!(s.char_at_reverse(pos) == *ch);
1530 pos -= String::from_char(1, *ch).len();
1535 fn test_escape_unicode() {
1536 assert_eq!("abc".escape_unicode(), String::from_str("\\x61\\x62\\x63"));
1537 assert_eq!("a c".escape_unicode(), String::from_str("\\x61\\x20\\x63"));
1538 assert_eq!("\r\n\t".escape_unicode(), String::from_str("\\x0d\\x0a\\x09"));
1539 assert_eq!("'\"\\".escape_unicode(), String::from_str("\\x27\\x22\\x5c"));
1540 assert_eq!("\x00\x01\xfe\xff".escape_unicode(), String::from_str("\\x00\\x01\\xfe\\xff"));
1541 assert_eq!("\u0100\uffff".escape_unicode(), String::from_str("\\u0100\\uffff"));
1542 assert_eq!("\U00010000\U0010ffff".escape_unicode(),
1543 String::from_str("\\U00010000\\U0010ffff"));
1544 assert_eq!("ab\ufb00".escape_unicode(), String::from_str("\\x61\\x62\\ufb00"));
1545 assert_eq!("\U0001d4ea\r".escape_unicode(), String::from_str("\\U0001d4ea\\x0d"));
1549 fn test_escape_default() {
1550 assert_eq!("abc".escape_default(), String::from_str("abc"));
1551 assert_eq!("a c".escape_default(), String::from_str("a c"));
1552 assert_eq!("\r\n\t".escape_default(), String::from_str("\\r\\n\\t"));
1553 assert_eq!("'\"\\".escape_default(), String::from_str("\\'\\\"\\\\"));
1554 assert_eq!("\u0100\uffff".escape_default(), String::from_str("\\u0100\\uffff"));
1555 assert_eq!("\U00010000\U0010ffff".escape_default(),
1556 String::from_str("\\U00010000\\U0010ffff"));
1557 assert_eq!("ab\ufb00".escape_default(), String::from_str("ab\\ufb00"));
1558 assert_eq!("\U0001d4ea\r".escape_default(), String::from_str("\\U0001d4ea\\r"));
1562 fn test_total_ord() {
1563 "1234".cmp(&("123")) == Greater;
1564 "123".cmp(&("1234")) == Less;
1565 "1234".cmp(&("1234")) == Equal;
1566 "12345555".cmp(&("123456")) == Less;
1567 "22".cmp(&("1234")) == Greater;
1571 fn test_char_range_at() {
1572 let data = "b¢€𤭢𤭢€¢b";
1573 assert_eq!('b', data.char_range_at(0).ch);
1574 assert_eq!('¢', data.char_range_at(1).ch);
1575 assert_eq!('€', data.char_range_at(3).ch);
1576 assert_eq!('𤭢', data.char_range_at(6).ch);
1577 assert_eq!('𤭢', data.char_range_at(10).ch);
1578 assert_eq!('€', data.char_range_at(14).ch);
1579 assert_eq!('¢', data.char_range_at(17).ch);
1580 assert_eq!('b', data.char_range_at(19).ch);
1584 fn test_char_range_at_reverse_underflow() {
1585 assert_eq!("abc".char_range_at_reverse(0).next, 0);
1589 fn test_iterator() {
1590 let s = "ศไทย中华Việt Nam";
1591 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1594 let mut it = s.chars();
1597 assert_eq!(c, v[pos]);
1600 assert_eq!(pos, v.len());
1604 fn test_rev_iterator() {
1605 let s = "ศไทย中华Việt Nam";
1606 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
1609 let mut it = s.chars().rev();
1612 assert_eq!(c, v[pos]);
1615 assert_eq!(pos, v.len());
1619 fn test_chars_decoding() {
1620 let mut bytes = [0u8, ..4];
1621 for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
1622 let len = c.encode_utf8(bytes);
1623 let s = ::core::str::from_utf8(bytes.slice_to(len)).unwrap();
1624 if Some(c) != s.chars().next() {
1625 fail!("character {:x}={} does not decode correctly", c as u32, c);
1631 fn test_chars_rev_decoding() {
1632 let mut bytes = [0u8, ..4];
1633 for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
1634 let len = c.encode_utf8(bytes);
1635 let s = ::core::str::from_utf8(bytes.slice_to(len)).unwrap();
1636 if Some(c) != s.chars().rev().next() {
1637 fail!("character {:x}={} does not decode correctly", c as u32, c);
1643 fn test_iterator_clone() {
1644 let s = "ศไทย中华Việt Nam";
1645 let mut it = s.chars();
1647 assert!(it.zip(it.clone()).all(|(x,y)| x == y));
1651 fn test_bytesator() {
1652 let s = "ศไทย中华Việt Nam";
1654 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1655 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1660 for b in s.bytes() {
1661 assert_eq!(b, v[pos]);
1667 fn test_bytes_revator() {
1668 let s = "ศไทย中华Việt Nam";
1670 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1671 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1674 let mut pos = v.len();
1676 for b in s.bytes().rev() {
1678 assert_eq!(b, v[pos]);
1683 fn test_char_indicesator() {
1684 let s = "ศไทย中华Việt Nam";
1685 let p = [0, 3, 6, 9, 12, 15, 18, 19, 20, 23, 24, 25, 26, 27];
1686 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1689 let mut it = s.char_indices();
1692 assert_eq!(c, (p[pos], v[pos]));
1695 assert_eq!(pos, v.len());
1696 assert_eq!(pos, p.len());
1700 fn test_char_indices_revator() {
1701 let s = "ศไทย中华Việt Nam";
1702 let p = [27, 26, 25, 24, 23, 20, 19, 18, 15, 12, 9, 6, 3, 0];
1703 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
1706 let mut it = s.char_indices().rev();
1709 assert_eq!(c, (p[pos], v[pos]));
1712 assert_eq!(pos, v.len());
1713 assert_eq!(pos, p.len());
1717 fn test_split_char_iterator() {
1718 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1720 let split: Vec<&str> = data.split(' ').collect();
1721 assert_eq!( split, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1723 let mut rsplit: Vec<&str> = data.split(' ').rev().collect();
1725 assert_eq!(rsplit, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1727 let split: Vec<&str> = data.split(|c: char| c == ' ').collect();
1728 assert_eq!( split, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1730 let mut rsplit: Vec<&str> = data.split(|c: char| c == ' ').rev().collect();
1732 assert_eq!(rsplit, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1735 let split: Vec<&str> = data.split('ä').collect();
1736 assert_eq!( split, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1738 let mut rsplit: Vec<&str> = data.split('ä').rev().collect();
1740 assert_eq!(rsplit, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1742 let split: Vec<&str> = data.split(|c: char| c == 'ä').collect();
1743 assert_eq!( split, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1745 let mut rsplit: Vec<&str> = data.split(|c: char| c == 'ä').rev().collect();
1747 assert_eq!(rsplit, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1751 fn test_splitn_char_iterator() {
1752 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1754 let split: Vec<&str> = data.splitn(' ', 3).collect();
1755 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
1757 let split: Vec<&str> = data.splitn(|c: char| c == ' ', 3).collect();
1758 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
1761 let split: Vec<&str> = data.splitn('ä', 3).collect();
1762 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
1764 let split: Vec<&str> = data.splitn(|c: char| c == 'ä', 3).collect();
1765 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
1769 fn test_rsplitn_char_iterator() {
1770 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1772 let mut split: Vec<&str> = data.rsplitn(' ', 3).collect();
1774 assert_eq!(split, vec!["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]);
1776 let mut split: Vec<&str> = data.rsplitn(|c: char| c == ' ', 3).collect();
1778 assert_eq!(split, vec!["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]);
1781 let mut split: Vec<&str> = data.rsplitn('ä', 3).collect();
1783 assert_eq!(split, vec!["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]);
1785 let mut split: Vec<&str> = data.rsplitn(|c: char| c == 'ä', 3).collect();
1787 assert_eq!(split, vec!["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]);
1791 fn test_split_char_iterator_no_trailing() {
1792 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1794 let split: Vec<&str> = data.split('\n').collect();
1795 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
1797 let split: Vec<&str> = data.split_terminator('\n').collect();
1798 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
1802 fn test_rev_split_char_iterator_no_trailing() {
1803 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1805 let mut split: Vec<&str> = data.split('\n').rev().collect();
1807 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
1809 let mut split: Vec<&str> = data.split_terminator('\n').rev().collect();
1811 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
1816 let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n";
1817 let words: Vec<&str> = data.words().collect();
1818 assert_eq!(words, vec!["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
1822 fn test_nfd_chars() {
1823 assert_eq!("abc".nfd_chars().collect::<String>(), String::from_str("abc"));
1824 assert_eq!("\u1e0b\u01c4".nfd_chars().collect::<String>(),
1825 String::from_str("d\u0307\u01c4"));
1826 assert_eq!("\u2026".nfd_chars().collect::<String>(), String::from_str("\u2026"));
1827 assert_eq!("\u2126".nfd_chars().collect::<String>(), String::from_str("\u03a9"));
1828 assert_eq!("\u1e0b\u0323".nfd_chars().collect::<String>(),
1829 String::from_str("d\u0323\u0307"));
1830 assert_eq!("\u1e0d\u0307".nfd_chars().collect::<String>(),
1831 String::from_str("d\u0323\u0307"));
1832 assert_eq!("a\u0301".nfd_chars().collect::<String>(), String::from_str("a\u0301"));
1833 assert_eq!("\u0301a".nfd_chars().collect::<String>(), String::from_str("\u0301a"));
1834 assert_eq!("\ud4db".nfd_chars().collect::<String>(),
1835 String::from_str("\u1111\u1171\u11b6"));
1836 assert_eq!("\uac1c".nfd_chars().collect::<String>(), String::from_str("\u1100\u1162"));
1840 fn test_nfkd_chars() {
1841 assert_eq!("abc".nfkd_chars().collect::<String>(), String::from_str("abc"));
1842 assert_eq!("\u1e0b\u01c4".nfkd_chars().collect::<String>(),
1843 String::from_str("d\u0307DZ\u030c"));
1844 assert_eq!("\u2026".nfkd_chars().collect::<String>(), String::from_str("..."));
1845 assert_eq!("\u2126".nfkd_chars().collect::<String>(), String::from_str("\u03a9"));
1846 assert_eq!("\u1e0b\u0323".nfkd_chars().collect::<String>(),
1847 String::from_str("d\u0323\u0307"));
1848 assert_eq!("\u1e0d\u0307".nfkd_chars().collect::<String>(),
1849 String::from_str("d\u0323\u0307"));
1850 assert_eq!("a\u0301".nfkd_chars().collect::<String>(), String::from_str("a\u0301"));
1851 assert_eq!("\u0301a".nfkd_chars().collect::<String>(),
1852 String::from_str("\u0301a"));
1853 assert_eq!("\ud4db".nfkd_chars().collect::<String>(),
1854 String::from_str("\u1111\u1171\u11b6"));
1855 assert_eq!("\uac1c".nfkd_chars().collect::<String>(), String::from_str("\u1100\u1162"));
1860 let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
1861 let lines: Vec<&str> = data.lines().collect();
1862 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
1864 let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
1865 let lines: Vec<&str> = data.lines().collect();
1866 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
1870 fn test_graphemes() {
1871 use std::iter::order;
1872 // official Unicode test data
1873 // from http://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt
1875 ("\u0020\u0020", &["\u0020", "\u0020"]), ("\u0020\u0308\u0020", &["\u0020\u0308",
1876 "\u0020"]), ("\u0020\u000D", &["\u0020", "\u000D"]), ("\u0020\u0308\u000D",
1877 &["\u0020\u0308", "\u000D"]), ("\u0020\u000A", &["\u0020", "\u000A"]),
1878 ("\u0020\u0308\u000A", &["\u0020\u0308", "\u000A"]), ("\u0020\u0001", &["\u0020",
1879 "\u0001"]), ("\u0020\u0308\u0001", &["\u0020\u0308", "\u0001"]), ("\u0020\u0300",
1880 &["\u0020\u0300"]), ("\u0020\u0308\u0300", &["\u0020\u0308\u0300"]), ("\u0020\u1100",
1881 &["\u0020", "\u1100"]), ("\u0020\u0308\u1100", &["\u0020\u0308", "\u1100"]),
1882 ("\u0020\u1160", &["\u0020", "\u1160"]), ("\u0020\u0308\u1160", &["\u0020\u0308",
1883 "\u1160"]), ("\u0020\u11A8", &["\u0020", "\u11A8"]), ("\u0020\u0308\u11A8",
1884 &["\u0020\u0308", "\u11A8"]), ("\u0020\uAC00", &["\u0020", "\uAC00"]),
1885 ("\u0020\u0308\uAC00", &["\u0020\u0308", "\uAC00"]), ("\u0020\uAC01", &["\u0020",
1886 "\uAC01"]), ("\u0020\u0308\uAC01", &["\u0020\u0308", "\uAC01"]), ("\u0020\U0001F1E6",
1887 &["\u0020", "\U0001F1E6"]), ("\u0020\u0308\U0001F1E6", &["\u0020\u0308",
1888 "\U0001F1E6"]), ("\u0020\u0378", &["\u0020", "\u0378"]), ("\u0020\u0308\u0378",
1889 &["\u0020\u0308", "\u0378"]), ("\u000D\u0020", &["\u000D", "\u0020"]),
1890 ("\u000D\u0308\u0020", &["\u000D", "\u0308", "\u0020"]), ("\u000D\u000D", &["\u000D",
1891 "\u000D"]), ("\u000D\u0308\u000D", &["\u000D", "\u0308", "\u000D"]), ("\u000D\u000A",
1892 &["\u000D\u000A"]), ("\u000D\u0308\u000A", &["\u000D", "\u0308", "\u000A"]),
1893 ("\u000D\u0001", &["\u000D", "\u0001"]), ("\u000D\u0308\u0001", &["\u000D", "\u0308",
1894 "\u0001"]), ("\u000D\u0300", &["\u000D", "\u0300"]), ("\u000D\u0308\u0300",
1895 &["\u000D", "\u0308\u0300"]), ("\u000D\u0903", &["\u000D", "\u0903"]),
1896 ("\u000D\u1100", &["\u000D", "\u1100"]), ("\u000D\u0308\u1100", &["\u000D", "\u0308",
1897 "\u1100"]), ("\u000D\u1160", &["\u000D", "\u1160"]), ("\u000D\u0308\u1160",
1898 &["\u000D", "\u0308", "\u1160"]), ("\u000D\u11A8", &["\u000D", "\u11A8"]),
1899 ("\u000D\u0308\u11A8", &["\u000D", "\u0308", "\u11A8"]), ("\u000D\uAC00", &["\u000D",
1900 "\uAC00"]), ("\u000D\u0308\uAC00", &["\u000D", "\u0308", "\uAC00"]), ("\u000D\uAC01",
1901 &["\u000D", "\uAC01"]), ("\u000D\u0308\uAC01", &["\u000D", "\u0308", "\uAC01"]),
1902 ("\u000D\U0001F1E6", &["\u000D", "\U0001F1E6"]), ("\u000D\u0308\U0001F1E6",
1903 &["\u000D", "\u0308", "\U0001F1E6"]), ("\u000D\u0378", &["\u000D", "\u0378"]),
1904 ("\u000D\u0308\u0378", &["\u000D", "\u0308", "\u0378"]), ("\u000A\u0020", &["\u000A",
1905 "\u0020"]), ("\u000A\u0308\u0020", &["\u000A", "\u0308", "\u0020"]), ("\u000A\u000D",
1906 &["\u000A", "\u000D"]), ("\u000A\u0308\u000D", &["\u000A", "\u0308", "\u000D"]),
1907 ("\u000A\u000A", &["\u000A", "\u000A"]), ("\u000A\u0308\u000A", &["\u000A", "\u0308",
1908 "\u000A"]), ("\u000A\u0001", &["\u000A", "\u0001"]), ("\u000A\u0308\u0001",
1909 &["\u000A", "\u0308", "\u0001"]), ("\u000A\u0300", &["\u000A", "\u0300"]),
1910 ("\u000A\u0308\u0300", &["\u000A", "\u0308\u0300"]), ("\u000A\u0903", &["\u000A",
1911 "\u0903"]), ("\u000A\u1100", &["\u000A", "\u1100"]), ("\u000A\u0308\u1100",
1912 &["\u000A", "\u0308", "\u1100"]), ("\u000A\u1160", &["\u000A", "\u1160"]),
1913 ("\u000A\u0308\u1160", &["\u000A", "\u0308", "\u1160"]), ("\u000A\u11A8", &["\u000A",
1914 "\u11A8"]), ("\u000A\u0308\u11A8", &["\u000A", "\u0308", "\u11A8"]), ("\u000A\uAC00",
1915 &["\u000A", "\uAC00"]), ("\u000A\u0308\uAC00", &["\u000A", "\u0308", "\uAC00"]),
1916 ("\u000A\uAC01", &["\u000A", "\uAC01"]), ("\u000A\u0308\uAC01", &["\u000A", "\u0308",
1917 "\uAC01"]), ("\u000A\U0001F1E6", &["\u000A", "\U0001F1E6"]),
1918 ("\u000A\u0308\U0001F1E6", &["\u000A", "\u0308", "\U0001F1E6"]), ("\u000A\u0378",
1919 &["\u000A", "\u0378"]), ("\u000A\u0308\u0378", &["\u000A", "\u0308", "\u0378"]),
1920 ("\u0001\u0020", &["\u0001", "\u0020"]), ("\u0001\u0308\u0020", &["\u0001", "\u0308",
1921 "\u0020"]), ("\u0001\u000D", &["\u0001", "\u000D"]), ("\u0001\u0308\u000D",
1922 &["\u0001", "\u0308", "\u000D"]), ("\u0001\u000A", &["\u0001", "\u000A"]),
1923 ("\u0001\u0308\u000A", &["\u0001", "\u0308", "\u000A"]), ("\u0001\u0001", &["\u0001",
1924 "\u0001"]), ("\u0001\u0308\u0001", &["\u0001", "\u0308", "\u0001"]), ("\u0001\u0300",
1925 &["\u0001", "\u0300"]), ("\u0001\u0308\u0300", &["\u0001", "\u0308\u0300"]),
1926 ("\u0001\u0903", &["\u0001", "\u0903"]), ("\u0001\u1100", &["\u0001", "\u1100"]),
1927 ("\u0001\u0308\u1100", &["\u0001", "\u0308", "\u1100"]), ("\u0001\u1160", &["\u0001",
1928 "\u1160"]), ("\u0001\u0308\u1160", &["\u0001", "\u0308", "\u1160"]), ("\u0001\u11A8",
1929 &["\u0001", "\u11A8"]), ("\u0001\u0308\u11A8", &["\u0001", "\u0308", "\u11A8"]),
1930 ("\u0001\uAC00", &["\u0001", "\uAC00"]), ("\u0001\u0308\uAC00", &["\u0001", "\u0308",
1931 "\uAC00"]), ("\u0001\uAC01", &["\u0001", "\uAC01"]), ("\u0001\u0308\uAC01",
1932 &["\u0001", "\u0308", "\uAC01"]), ("\u0001\U0001F1E6", &["\u0001", "\U0001F1E6"]),
1933 ("\u0001\u0308\U0001F1E6", &["\u0001", "\u0308", "\U0001F1E6"]), ("\u0001\u0378",
1934 &["\u0001", "\u0378"]), ("\u0001\u0308\u0378", &["\u0001", "\u0308", "\u0378"]),
1935 ("\u0300\u0020", &["\u0300", "\u0020"]), ("\u0300\u0308\u0020", &["\u0300\u0308",
1936 "\u0020"]), ("\u0300\u000D", &["\u0300", "\u000D"]), ("\u0300\u0308\u000D",
1937 &["\u0300\u0308", "\u000D"]), ("\u0300\u000A", &["\u0300", "\u000A"]),
1938 ("\u0300\u0308\u000A", &["\u0300\u0308", "\u000A"]), ("\u0300\u0001", &["\u0300",
1939 "\u0001"]), ("\u0300\u0308\u0001", &["\u0300\u0308", "\u0001"]), ("\u0300\u0300",
1940 &["\u0300\u0300"]), ("\u0300\u0308\u0300", &["\u0300\u0308\u0300"]), ("\u0300\u1100",
1941 &["\u0300", "\u1100"]), ("\u0300\u0308\u1100", &["\u0300\u0308", "\u1100"]),
1942 ("\u0300\u1160", &["\u0300", "\u1160"]), ("\u0300\u0308\u1160", &["\u0300\u0308",
1943 "\u1160"]), ("\u0300\u11A8", &["\u0300", "\u11A8"]), ("\u0300\u0308\u11A8",
1944 &["\u0300\u0308", "\u11A8"]), ("\u0300\uAC00", &["\u0300", "\uAC00"]),
1945 ("\u0300\u0308\uAC00", &["\u0300\u0308", "\uAC00"]), ("\u0300\uAC01", &["\u0300",
1946 "\uAC01"]), ("\u0300\u0308\uAC01", &["\u0300\u0308", "\uAC01"]), ("\u0300\U0001F1E6",
1947 &["\u0300", "\U0001F1E6"]), ("\u0300\u0308\U0001F1E6", &["\u0300\u0308",
1948 "\U0001F1E6"]), ("\u0300\u0378", &["\u0300", "\u0378"]), ("\u0300\u0308\u0378",
1949 &["\u0300\u0308", "\u0378"]), ("\u0903\u0020", &["\u0903", "\u0020"]),
1950 ("\u0903\u0308\u0020", &["\u0903\u0308", "\u0020"]), ("\u0903\u000D", &["\u0903",
1951 "\u000D"]), ("\u0903\u0308\u000D", &["\u0903\u0308", "\u000D"]), ("\u0903\u000A",
1952 &["\u0903", "\u000A"]), ("\u0903\u0308\u000A", &["\u0903\u0308", "\u000A"]),
1953 ("\u0903\u0001", &["\u0903", "\u0001"]), ("\u0903\u0308\u0001", &["\u0903\u0308",
1954 "\u0001"]), ("\u0903\u0300", &["\u0903\u0300"]), ("\u0903\u0308\u0300",
1955 &["\u0903\u0308\u0300"]), ("\u0903\u1100", &["\u0903", "\u1100"]),
1956 ("\u0903\u0308\u1100", &["\u0903\u0308", "\u1100"]), ("\u0903\u1160", &["\u0903",
1957 "\u1160"]), ("\u0903\u0308\u1160", &["\u0903\u0308", "\u1160"]), ("\u0903\u11A8",
1958 &["\u0903", "\u11A8"]), ("\u0903\u0308\u11A8", &["\u0903\u0308", "\u11A8"]),
1959 ("\u0903\uAC00", &["\u0903", "\uAC00"]), ("\u0903\u0308\uAC00", &["\u0903\u0308",
1960 "\uAC00"]), ("\u0903\uAC01", &["\u0903", "\uAC01"]), ("\u0903\u0308\uAC01",
1961 &["\u0903\u0308", "\uAC01"]), ("\u0903\U0001F1E6", &["\u0903", "\U0001F1E6"]),
1962 ("\u0903\u0308\U0001F1E6", &["\u0903\u0308", "\U0001F1E6"]), ("\u0903\u0378",
1963 &["\u0903", "\u0378"]), ("\u0903\u0308\u0378", &["\u0903\u0308", "\u0378"]),
1964 ("\u1100\u0020", &["\u1100", "\u0020"]), ("\u1100\u0308\u0020", &["\u1100\u0308",
1965 "\u0020"]), ("\u1100\u000D", &["\u1100", "\u000D"]), ("\u1100\u0308\u000D",
1966 &["\u1100\u0308", "\u000D"]), ("\u1100\u000A", &["\u1100", "\u000A"]),
1967 ("\u1100\u0308\u000A", &["\u1100\u0308", "\u000A"]), ("\u1100\u0001", &["\u1100",
1968 "\u0001"]), ("\u1100\u0308\u0001", &["\u1100\u0308", "\u0001"]), ("\u1100\u0300",
1969 &["\u1100\u0300"]), ("\u1100\u0308\u0300", &["\u1100\u0308\u0300"]), ("\u1100\u1100",
1970 &["\u1100\u1100"]), ("\u1100\u0308\u1100", &["\u1100\u0308", "\u1100"]),
1971 ("\u1100\u1160", &["\u1100\u1160"]), ("\u1100\u0308\u1160", &["\u1100\u0308",
1972 "\u1160"]), ("\u1100\u11A8", &["\u1100", "\u11A8"]), ("\u1100\u0308\u11A8",
1973 &["\u1100\u0308", "\u11A8"]), ("\u1100\uAC00", &["\u1100\uAC00"]),
1974 ("\u1100\u0308\uAC00", &["\u1100\u0308", "\uAC00"]), ("\u1100\uAC01",
1975 &["\u1100\uAC01"]), ("\u1100\u0308\uAC01", &["\u1100\u0308", "\uAC01"]),
1976 ("\u1100\U0001F1E6", &["\u1100", "\U0001F1E6"]), ("\u1100\u0308\U0001F1E6",
1977 &["\u1100\u0308", "\U0001F1E6"]), ("\u1100\u0378", &["\u1100", "\u0378"]),
1978 ("\u1100\u0308\u0378", &["\u1100\u0308", "\u0378"]), ("\u1160\u0020", &["\u1160",
1979 "\u0020"]), ("\u1160\u0308\u0020", &["\u1160\u0308", "\u0020"]), ("\u1160\u000D",
1980 &["\u1160", "\u000D"]), ("\u1160\u0308\u000D", &["\u1160\u0308", "\u000D"]),
1981 ("\u1160\u000A", &["\u1160", "\u000A"]), ("\u1160\u0308\u000A", &["\u1160\u0308",
1982 "\u000A"]), ("\u1160\u0001", &["\u1160", "\u0001"]), ("\u1160\u0308\u0001",
1983 &["\u1160\u0308", "\u0001"]), ("\u1160\u0300", &["\u1160\u0300"]),
1984 ("\u1160\u0308\u0300", &["\u1160\u0308\u0300"]), ("\u1160\u1100", &["\u1160",
1985 "\u1100"]), ("\u1160\u0308\u1100", &["\u1160\u0308", "\u1100"]), ("\u1160\u1160",
1986 &["\u1160\u1160"]), ("\u1160\u0308\u1160", &["\u1160\u0308", "\u1160"]),
1987 ("\u1160\u11A8", &["\u1160\u11A8"]), ("\u1160\u0308\u11A8", &["\u1160\u0308",
1988 "\u11A8"]), ("\u1160\uAC00", &["\u1160", "\uAC00"]), ("\u1160\u0308\uAC00",
1989 &["\u1160\u0308", "\uAC00"]), ("\u1160\uAC01", &["\u1160", "\uAC01"]),
1990 ("\u1160\u0308\uAC01", &["\u1160\u0308", "\uAC01"]), ("\u1160\U0001F1E6", &["\u1160",
1991 "\U0001F1E6"]), ("\u1160\u0308\U0001F1E6", &["\u1160\u0308", "\U0001F1E6"]),
1992 ("\u1160\u0378", &["\u1160", "\u0378"]), ("\u1160\u0308\u0378", &["\u1160\u0308",
1993 "\u0378"]), ("\u11A8\u0020", &["\u11A8", "\u0020"]), ("\u11A8\u0308\u0020",
1994 &["\u11A8\u0308", "\u0020"]), ("\u11A8\u000D", &["\u11A8", "\u000D"]),
1995 ("\u11A8\u0308\u000D", &["\u11A8\u0308", "\u000D"]), ("\u11A8\u000A", &["\u11A8",
1996 "\u000A"]), ("\u11A8\u0308\u000A", &["\u11A8\u0308", "\u000A"]), ("\u11A8\u0001",
1997 &["\u11A8", "\u0001"]), ("\u11A8\u0308\u0001", &["\u11A8\u0308", "\u0001"]),
1998 ("\u11A8\u0300", &["\u11A8\u0300"]), ("\u11A8\u0308\u0300", &["\u11A8\u0308\u0300"]),
1999 ("\u11A8\u1100", &["\u11A8", "\u1100"]), ("\u11A8\u0308\u1100", &["\u11A8\u0308",
2000 "\u1100"]), ("\u11A8\u1160", &["\u11A8", "\u1160"]), ("\u11A8\u0308\u1160",
2001 &["\u11A8\u0308", "\u1160"]), ("\u11A8\u11A8", &["\u11A8\u11A8"]),
2002 ("\u11A8\u0308\u11A8", &["\u11A8\u0308", "\u11A8"]), ("\u11A8\uAC00", &["\u11A8",
2003 "\uAC00"]), ("\u11A8\u0308\uAC00", &["\u11A8\u0308", "\uAC00"]), ("\u11A8\uAC01",
2004 &["\u11A8", "\uAC01"]), ("\u11A8\u0308\uAC01", &["\u11A8\u0308", "\uAC01"]),
2005 ("\u11A8\U0001F1E6", &["\u11A8", "\U0001F1E6"]), ("\u11A8\u0308\U0001F1E6",
2006 &["\u11A8\u0308", "\U0001F1E6"]), ("\u11A8\u0378", &["\u11A8", "\u0378"]),
2007 ("\u11A8\u0308\u0378", &["\u11A8\u0308", "\u0378"]), ("\uAC00\u0020", &["\uAC00",
2008 "\u0020"]), ("\uAC00\u0308\u0020", &["\uAC00\u0308", "\u0020"]), ("\uAC00\u000D",
2009 &["\uAC00", "\u000D"]), ("\uAC00\u0308\u000D", &["\uAC00\u0308", "\u000D"]),
2010 ("\uAC00\u000A", &["\uAC00", "\u000A"]), ("\uAC00\u0308\u000A", &["\uAC00\u0308",
2011 "\u000A"]), ("\uAC00\u0001", &["\uAC00", "\u0001"]), ("\uAC00\u0308\u0001",
2012 &["\uAC00\u0308", "\u0001"]), ("\uAC00\u0300", &["\uAC00\u0300"]),
2013 ("\uAC00\u0308\u0300", &["\uAC00\u0308\u0300"]), ("\uAC00\u1100", &["\uAC00",
2014 "\u1100"]), ("\uAC00\u0308\u1100", &["\uAC00\u0308", "\u1100"]), ("\uAC00\u1160",
2015 &["\uAC00\u1160"]), ("\uAC00\u0308\u1160", &["\uAC00\u0308", "\u1160"]),
2016 ("\uAC00\u11A8", &["\uAC00\u11A8"]), ("\uAC00\u0308\u11A8", &["\uAC00\u0308",
2017 "\u11A8"]), ("\uAC00\uAC00", &["\uAC00", "\uAC00"]), ("\uAC00\u0308\uAC00",
2018 &["\uAC00\u0308", "\uAC00"]), ("\uAC00\uAC01", &["\uAC00", "\uAC01"]),
2019 ("\uAC00\u0308\uAC01", &["\uAC00\u0308", "\uAC01"]), ("\uAC00\U0001F1E6", &["\uAC00",
2020 "\U0001F1E6"]), ("\uAC00\u0308\U0001F1E6", &["\uAC00\u0308", "\U0001F1E6"]),
2021 ("\uAC00\u0378", &["\uAC00", "\u0378"]), ("\uAC00\u0308\u0378", &["\uAC00\u0308",
2022 "\u0378"]), ("\uAC01\u0020", &["\uAC01", "\u0020"]), ("\uAC01\u0308\u0020",
2023 &["\uAC01\u0308", "\u0020"]), ("\uAC01\u000D", &["\uAC01", "\u000D"]),
2024 ("\uAC01\u0308\u000D", &["\uAC01\u0308", "\u000D"]), ("\uAC01\u000A", &["\uAC01",
2025 "\u000A"]), ("\uAC01\u0308\u000A", &["\uAC01\u0308", "\u000A"]), ("\uAC01\u0001",
2026 &["\uAC01", "\u0001"]), ("\uAC01\u0308\u0001", &["\uAC01\u0308", "\u0001"]),
2027 ("\uAC01\u0300", &["\uAC01\u0300"]), ("\uAC01\u0308\u0300", &["\uAC01\u0308\u0300"]),
2028 ("\uAC01\u1100", &["\uAC01", "\u1100"]), ("\uAC01\u0308\u1100", &["\uAC01\u0308",
2029 "\u1100"]), ("\uAC01\u1160", &["\uAC01", "\u1160"]), ("\uAC01\u0308\u1160",
2030 &["\uAC01\u0308", "\u1160"]), ("\uAC01\u11A8", &["\uAC01\u11A8"]),
2031 ("\uAC01\u0308\u11A8", &["\uAC01\u0308", "\u11A8"]), ("\uAC01\uAC00", &["\uAC01",
2032 "\uAC00"]), ("\uAC01\u0308\uAC00", &["\uAC01\u0308", "\uAC00"]), ("\uAC01\uAC01",
2033 &["\uAC01", "\uAC01"]), ("\uAC01\u0308\uAC01", &["\uAC01\u0308", "\uAC01"]),
2034 ("\uAC01\U0001F1E6", &["\uAC01", "\U0001F1E6"]), ("\uAC01\u0308\U0001F1E6",
2035 &["\uAC01\u0308", "\U0001F1E6"]), ("\uAC01\u0378", &["\uAC01", "\u0378"]),
2036 ("\uAC01\u0308\u0378", &["\uAC01\u0308", "\u0378"]), ("\U0001F1E6\u0020",
2037 &["\U0001F1E6", "\u0020"]), ("\U0001F1E6\u0308\u0020", &["\U0001F1E6\u0308",
2038 "\u0020"]), ("\U0001F1E6\u000D", &["\U0001F1E6", "\u000D"]),
2039 ("\U0001F1E6\u0308\u000D", &["\U0001F1E6\u0308", "\u000D"]), ("\U0001F1E6\u000A",
2040 &["\U0001F1E6", "\u000A"]), ("\U0001F1E6\u0308\u000A", &["\U0001F1E6\u0308",
2041 "\u000A"]), ("\U0001F1E6\u0001", &["\U0001F1E6", "\u0001"]),
2042 ("\U0001F1E6\u0308\u0001", &["\U0001F1E6\u0308", "\u0001"]), ("\U0001F1E6\u0300",
2043 &["\U0001F1E6\u0300"]), ("\U0001F1E6\u0308\u0300", &["\U0001F1E6\u0308\u0300"]),
2044 ("\U0001F1E6\u1100", &["\U0001F1E6", "\u1100"]), ("\U0001F1E6\u0308\u1100",
2045 &["\U0001F1E6\u0308", "\u1100"]), ("\U0001F1E6\u1160", &["\U0001F1E6", "\u1160"]),
2046 ("\U0001F1E6\u0308\u1160", &["\U0001F1E6\u0308", "\u1160"]), ("\U0001F1E6\u11A8",
2047 &["\U0001F1E6", "\u11A8"]), ("\U0001F1E6\u0308\u11A8", &["\U0001F1E6\u0308",
2048 "\u11A8"]), ("\U0001F1E6\uAC00", &["\U0001F1E6", "\uAC00"]),
2049 ("\U0001F1E6\u0308\uAC00", &["\U0001F1E6\u0308", "\uAC00"]), ("\U0001F1E6\uAC01",
2050 &["\U0001F1E6", "\uAC01"]), ("\U0001F1E6\u0308\uAC01", &["\U0001F1E6\u0308",
2051 "\uAC01"]), ("\U0001F1E6\U0001F1E6", &["\U0001F1E6\U0001F1E6"]),
2052 ("\U0001F1E6\u0308\U0001F1E6", &["\U0001F1E6\u0308", "\U0001F1E6"]),
2053 ("\U0001F1E6\u0378", &["\U0001F1E6", "\u0378"]), ("\U0001F1E6\u0308\u0378",
2054 &["\U0001F1E6\u0308", "\u0378"]), ("\u0378\u0020", &["\u0378", "\u0020"]),
2055 ("\u0378\u0308\u0020", &["\u0378\u0308", "\u0020"]), ("\u0378\u000D", &["\u0378",
2056 "\u000D"]), ("\u0378\u0308\u000D", &["\u0378\u0308", "\u000D"]), ("\u0378\u000A",
2057 &["\u0378", "\u000A"]), ("\u0378\u0308\u000A", &["\u0378\u0308", "\u000A"]),
2058 ("\u0378\u0001", &["\u0378", "\u0001"]), ("\u0378\u0308\u0001", &["\u0378\u0308",
2059 "\u0001"]), ("\u0378\u0300", &["\u0378\u0300"]), ("\u0378\u0308\u0300",
2060 &["\u0378\u0308\u0300"]), ("\u0378\u1100", &["\u0378", "\u1100"]),
2061 ("\u0378\u0308\u1100", &["\u0378\u0308", "\u1100"]), ("\u0378\u1160", &["\u0378",
2062 "\u1160"]), ("\u0378\u0308\u1160", &["\u0378\u0308", "\u1160"]), ("\u0378\u11A8",
2063 &["\u0378", "\u11A8"]), ("\u0378\u0308\u11A8", &["\u0378\u0308", "\u11A8"]),
2064 ("\u0378\uAC00", &["\u0378", "\uAC00"]), ("\u0378\u0308\uAC00", &["\u0378\u0308",
2065 "\uAC00"]), ("\u0378\uAC01", &["\u0378", "\uAC01"]), ("\u0378\u0308\uAC01",
2066 &["\u0378\u0308", "\uAC01"]), ("\u0378\U0001F1E6", &["\u0378", "\U0001F1E6"]),
2067 ("\u0378\u0308\U0001F1E6", &["\u0378\u0308", "\U0001F1E6"]), ("\u0378\u0378",
2068 &["\u0378", "\u0378"]), ("\u0378\u0308\u0378", &["\u0378\u0308", "\u0378"]),
2069 ("\u0061\U0001F1E6\u0062", &["\u0061", "\U0001F1E6", "\u0062"]),
2070 ("\U0001F1F7\U0001F1FA", &["\U0001F1F7\U0001F1FA"]),
2071 ("\U0001F1F7\U0001F1FA\U0001F1F8", &["\U0001F1F7\U0001F1FA\U0001F1F8"]),
2072 ("\U0001F1F7\U0001F1FA\U0001F1F8\U0001F1EA",
2073 &["\U0001F1F7\U0001F1FA\U0001F1F8\U0001F1EA"]),
2074 ("\U0001F1F7\U0001F1FA\u200B\U0001F1F8\U0001F1EA", &["\U0001F1F7\U0001F1FA", "\u200B",
2075 "\U0001F1F8\U0001F1EA"]), ("\U0001F1E6\U0001F1E7\U0001F1E8",
2076 &["\U0001F1E6\U0001F1E7\U0001F1E8"]), ("\U0001F1E6\u200D\U0001F1E7\U0001F1E8",
2077 &["\U0001F1E6\u200D", "\U0001F1E7\U0001F1E8"]),
2078 ("\U0001F1E6\U0001F1E7\u200D\U0001F1E8", &["\U0001F1E6\U0001F1E7\u200D",
2079 "\U0001F1E8"]), ("\u0020\u200D\u0646", &["\u0020\u200D", "\u0646"]),
2080 ("\u0646\u200D\u0020", &["\u0646\u200D", "\u0020"]),
2084 ("\u0020\u0903", &["\u0020\u0903"], &["\u0020", "\u0903"]), ("\u0020\u0308\u0903",
2085 &["\u0020\u0308\u0903"], &["\u0020\u0308", "\u0903"]), ("\u000D\u0308\u0903",
2086 &["\u000D", "\u0308\u0903"], &["\u000D", "\u0308", "\u0903"]), ("\u000A\u0308\u0903",
2087 &["\u000A", "\u0308\u0903"], &["\u000A", "\u0308", "\u0903"]), ("\u0001\u0308\u0903",
2088 &["\u0001", "\u0308\u0903"], &["\u0001", "\u0308", "\u0903"]), ("\u0300\u0903",
2089 &["\u0300\u0903"], &["\u0300", "\u0903"]), ("\u0300\u0308\u0903",
2090 &["\u0300\u0308\u0903"], &["\u0300\u0308", "\u0903"]), ("\u0903\u0903",
2091 &["\u0903\u0903"], &["\u0903", "\u0903"]), ("\u0903\u0308\u0903",
2092 &["\u0903\u0308\u0903"], &["\u0903\u0308", "\u0903"]), ("\u1100\u0903",
2093 &["\u1100\u0903"], &["\u1100", "\u0903"]), ("\u1100\u0308\u0903",
2094 &["\u1100\u0308\u0903"], &["\u1100\u0308", "\u0903"]), ("\u1160\u0903",
2095 &["\u1160\u0903"], &["\u1160", "\u0903"]), ("\u1160\u0308\u0903",
2096 &["\u1160\u0308\u0903"], &["\u1160\u0308", "\u0903"]), ("\u11A8\u0903",
2097 &["\u11A8\u0903"], &["\u11A8", "\u0903"]), ("\u11A8\u0308\u0903",
2098 &["\u11A8\u0308\u0903"], &["\u11A8\u0308", "\u0903"]), ("\uAC00\u0903",
2099 &["\uAC00\u0903"], &["\uAC00", "\u0903"]), ("\uAC00\u0308\u0903",
2100 &["\uAC00\u0308\u0903"], &["\uAC00\u0308", "\u0903"]), ("\uAC01\u0903",
2101 &["\uAC01\u0903"], &["\uAC01", "\u0903"]), ("\uAC01\u0308\u0903",
2102 &["\uAC01\u0308\u0903"], &["\uAC01\u0308", "\u0903"]), ("\U0001F1E6\u0903",
2103 &["\U0001F1E6\u0903"], &["\U0001F1E6", "\u0903"]), ("\U0001F1E6\u0308\u0903",
2104 &["\U0001F1E6\u0308\u0903"], &["\U0001F1E6\u0308", "\u0903"]), ("\u0378\u0903",
2105 &["\u0378\u0903"], &["\u0378", "\u0903"]), ("\u0378\u0308\u0903",
2106 &["\u0378\u0308\u0903"], &["\u0378\u0308", "\u0903"]),
2109 for &(s, g) in test_same.iter() {
2110 // test forward iterator
2111 assert!(order::equals(s.graphemes(true), g.iter().map(|&x| x)));
2112 assert!(order::equals(s.graphemes(false), g.iter().map(|&x| x)));
2114 // test reverse iterator
2115 assert!(order::equals(s.graphemes(true).rev(), g.iter().rev().map(|&x| x)));
2116 assert!(order::equals(s.graphemes(false).rev(), g.iter().rev().map(|&x| x)));
2119 for &(s, gt, gf) in test_diff.iter() {
2120 // test forward iterator
2121 assert!(order::equals(s.graphemes(true), gt.iter().map(|&x| x)));
2122 assert!(order::equals(s.graphemes(false), gf.iter().map(|&x| x)));
2124 // test reverse iterator
2125 assert!(order::equals(s.graphemes(true).rev(), gt.iter().rev().map(|&x| x)));
2126 assert!(order::equals(s.graphemes(false).rev(), gf.iter().rev().map(|&x| x)));
2129 // test the indices iterators
2130 let s = "a̐éö̲\r\n";
2131 let gr_inds = s.grapheme_indices(true).collect::<Vec<(uint, &str)>>();
2132 assert_eq!(gr_inds.as_slice(), &[(0u, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")]);
2133 let gr_inds = s.grapheme_indices(true).rev().collect::<Vec<(uint, &str)>>();
2134 assert_eq!(gr_inds.as_slice(), &[(11, "\r\n"), (6, "ö̲"), (3, "é"), (0u, "a̐")]);
2135 let mut gr_inds = s.grapheme_indices(true);
2136 let e1 = gr_inds.size_hint();
2137 assert_eq!(e1, (1, Some(13)));
2138 let c = gr_inds.count();
2140 let e2 = gr_inds.size_hint();
2141 assert_eq!(e2, (0, Some(0)));
2143 // make sure the reverse iterator does the right thing with "\n" at beginning of string
2145 let gr = s.graphemes(true).rev().collect::<Vec<&str>>();
2146 assert_eq!(gr.as_slice(), &["\r", "\r\n", "\n"]);
2150 fn test_split_strator() {
2151 fn t(s: &str, sep: &str, u: &[&str]) {
2152 let v: Vec<&str> = s.split_str(sep).collect();
2153 assert_eq!(v.as_slice(), u.as_slice());
2155 t("--1233345--", "12345", ["--1233345--"]);
2156 t("abc::hello::there", "::", ["abc", "hello", "there"]);
2157 t("::hello::there", "::", ["", "hello", "there"]);
2158 t("hello::there::", "::", ["hello", "there", ""]);
2159 t("::hello::there::", "::", ["", "hello", "there", ""]);
2160 t("ประเทศไทย中华Việt Nam", "中华", ["ประเทศไทย", "Việt Nam"]);
2161 t("zzXXXzzYYYzz", "zz", ["", "XXX", "YYY", ""]);
2162 t("zzXXXzYYYz", "XXX", ["zz", "zYYYz"]);
2163 t(".XXX.YYY.", ".", ["", "XXX", "YYY", ""]);
2165 t("zz", "zz", ["",""]);
2166 t("ok", "z", ["ok"]);
2167 t("zzz", "zz", ["","z"]);
2168 t("zzzzz", "zz", ["","","z"]);
2172 fn test_str_default() {
2173 use std::default::Default;
2174 fn t<S: Default + Str>() {
2175 let s: S = Default::default();
2176 assert_eq!(s.as_slice(), "");
2184 fn test_str_container() {
2185 fn sum_len<S: Collection>(v: &[S]) -> uint {
2186 v.iter().map(|x| x.len()).sum()
2189 let s = String::from_str("01234");
2190 assert_eq!(5, sum_len(["012", "", "34"]));
2191 assert_eq!(5, sum_len([String::from_str("01"), String::from_str("2"),
2192 String::from_str("34"), String::from_str("")]));
2193 assert_eq!(5, sum_len([s.as_slice()]));
2197 fn test_str_from_utf8() {
2199 assert_eq!(from_utf8(xs), Some("hello"));
2201 let xs = "ศไทย中华Việt Nam".as_bytes();
2202 assert_eq!(from_utf8(xs), Some("ศไทย中华Việt Nam"));
2204 let xs = b"hello\xFF";
2205 assert_eq!(from_utf8(xs), None);
2209 fn test_maybe_owned_traits() {
2210 let s = Slice("abcde");
2211 assert_eq!(s.len(), 5);
2212 assert_eq!(s.as_slice(), "abcde");
2213 assert_eq!(String::from_str(s.as_slice()).as_slice(), "abcde");
2214 assert_eq!(format!("{}", s).as_slice(), "abcde");
2215 assert!(s.lt(&Owned(String::from_str("bcdef"))));
2216 assert_eq!(Slice(""), Default::default());
2218 let o = Owned(String::from_str("abcde"));
2219 assert_eq!(o.len(), 5);
2220 assert_eq!(o.as_slice(), "abcde");
2221 assert_eq!(String::from_str(o.as_slice()).as_slice(), "abcde");
2222 assert_eq!(format!("{}", o).as_slice(), "abcde");
2223 assert!(o.lt(&Slice("bcdef")));
2224 assert_eq!(Owned(String::from_str("")), Default::default());
2226 assert!(s.cmp(&o) == Equal);
2227 assert!(s.equiv(&o));
2229 assert!(o.cmp(&s) == Equal);
2230 assert!(o.equiv(&s));
2234 fn test_maybe_owned_methods() {
2235 let s = Slice("abcde");
2236 assert!(s.is_slice());
2237 assert!(!s.is_owned());
2239 let o = Owned(String::from_str("abcde"));
2240 assert!(!o.is_slice());
2241 assert!(o.is_owned());
2245 fn test_maybe_owned_clone() {
2246 assert_eq!(Owned(String::from_str("abcde")), Slice("abcde").clone());
2247 assert_eq!(Owned(String::from_str("abcde")), Owned(String::from_str("abcde")).clone());
2248 assert_eq!(Slice("abcde"), Slice("abcde").clone());
2249 assert_eq!(Slice("abcde"), Owned(String::from_str("abcde")).clone());
2253 fn test_maybe_owned_into_string() {
2254 assert_eq!(Slice("abcde").into_string(), String::from_str("abcde"));
2255 assert_eq!(Owned(String::from_str("abcde")).into_string(),
2256 String::from_str("abcde"));
2260 fn test_into_maybe_owned() {
2261 assert_eq!("abcde".into_maybe_owned(), Slice("abcde"));
2262 assert_eq!((String::from_str("abcde")).into_maybe_owned(), Slice("abcde"));
2263 assert_eq!("abcde".into_maybe_owned(), Owned(String::from_str("abcde")));
2264 assert_eq!((String::from_str("abcde")).into_maybe_owned(),
2265 Owned(String::from_str("abcde")));
2272 use test::black_box;
2274 use std::option::{None, Some};
2275 use std::iter::{Iterator, DoubleEndedIterator};
2276 use std::collections::Collection;
2279 fn char_iterator(b: &mut Bencher) {
2280 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2282 b.iter(|| s.chars().count());
2286 fn char_iterator_for(b: &mut Bencher) {
2287 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2290 for ch in s.chars() { black_box(ch) }
2295 fn char_iterator_ascii(b: &mut Bencher) {
2296 let s = "Mary had a little lamb, Little lamb
2297 Mary had a little lamb, Little lamb
2298 Mary had a little lamb, Little lamb
2299 Mary had a little lamb, Little lamb
2300 Mary had a little lamb, Little lamb
2301 Mary had a little lamb, Little lamb";
2303 b.iter(|| s.chars().count());
2307 fn char_iterator_rev(b: &mut Bencher) {
2308 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2310 b.iter(|| s.chars().rev().count());
2314 fn char_iterator_rev_for(b: &mut Bencher) {
2315 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2318 for ch in s.chars().rev() { black_box(ch) }
2323 fn char_indicesator(b: &mut Bencher) {
2324 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2325 let len = s.char_len();
2327 b.iter(|| assert_eq!(s.char_indices().count(), len));
2331 fn char_indicesator_rev(b: &mut Bencher) {
2332 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2333 let len = s.char_len();
2335 b.iter(|| assert_eq!(s.char_indices().rev().count(), len));
2339 fn split_unicode_ascii(b: &mut Bencher) {
2340 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2342 b.iter(|| assert_eq!(s.split('V').count(), 3));
2346 fn split_unicode_not_ascii(b: &mut Bencher) {
2347 struct NotAscii(char);
2348 impl CharEq for NotAscii {
2349 fn matches(&mut self, c: char) -> bool {
2350 let NotAscii(cc) = *self;
2353 fn only_ascii(&self) -> bool { false }
2355 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2357 b.iter(|| assert_eq!(s.split(NotAscii('V')).count(), 3));
2362 fn split_ascii(b: &mut Bencher) {
2363 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2364 let len = s.split(' ').count();
2366 b.iter(|| assert_eq!(s.split(' ').count(), len));
2370 fn split_not_ascii(b: &mut Bencher) {
2371 struct NotAscii(char);
2372 impl CharEq for NotAscii {
2374 fn matches(&mut self, c: char) -> bool {
2375 let NotAscii(cc) = *self;
2378 fn only_ascii(&self) -> bool { false }
2380 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2381 let len = s.split(' ').count();
2383 b.iter(|| assert_eq!(s.split(NotAscii(' ')).count(), len));
2387 fn split_extern_fn(b: &mut Bencher) {
2388 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2389 let len = s.split(' ').count();
2390 fn pred(c: char) -> bool { c == ' ' }
2392 b.iter(|| assert_eq!(s.split(pred).count(), len));
2396 fn split_closure(b: &mut Bencher) {
2397 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2398 let len = s.split(' ').count();
2400 b.iter(|| assert_eq!(s.split(|c: char| c == ' ').count(), len));
2404 fn split_slice(b: &mut Bencher) {
2405 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2406 let len = s.split(' ').count();
2408 b.iter(|| assert_eq!(s.split(&[' ']).count(), len));
2412 fn is_utf8_100_ascii(b: &mut Bencher) {
2414 let s = b"Hello there, the quick brown fox jumped over the lazy dog! \
2415 Lorem ipsum dolor sit amet, consectetur. ";
2417 assert_eq!(100, s.len());
2424 fn is_utf8_100_multibyte(b: &mut Bencher) {
2425 let s = "𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰".as_bytes();
2426 assert_eq!(100, s.len());
2433 fn bench_connect(b: &mut Bencher) {
2434 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2436 let v = [s, s, s, s, s, s, s, s, s, s];
2438 assert_eq!(v.connect(sep).len(), s.len() * 10 + sep.len() * 9);
2443 fn bench_contains_short_short(b: &mut Bencher) {
2444 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2448 assert!(haystack.contains(needle));
2453 fn bench_contains_short_long(b: &mut Bencher) {
2455 Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \
2456 ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \
2457 eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \
2458 sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \
2459 tempus vel, gravida nec quam.
2461 In est dui, tincidunt sed tempus interdum, adipiscing laoreet ante. Etiam tempor, tellus quis \
2462 sagittis interdum, nulla purus mattis sem, quis auctor erat odio ac tellus. In nec nunc sit amet \
2463 diam volutpat molestie at sed ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan \
2464 lorem ac dignissim placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat. In vel \
2465 eleifend felis. Sed suscipit nulla lorem, sed mollis est sollicitudin et. Nam fermentum egestas \
2466 interdum. Curabitur ut nisi justo.
2468 Sed sollicitudin ipsum tellus, ut condimentum leo eleifend nec. Cras ut velit ante. Phasellus nec \
2469 mollis odio. Mauris molestie erat in arcu mattis, at aliquet dolor vehicula. Quisque malesuada \
2470 lectus sit amet nisi pretium, a condimentum ipsum porta. Morbi at dapibus diam. Praesent egestas \
2471 est sed risus elementum, eu rutrum metus ultrices. Etiam fermentum consectetur magna, id rutrum \
2472 felis accumsan a. Aliquam ut pellentesque libero. Sed mi nulla, lobortis eu tortor id, suscipit \
2473 ultricies neque. Morbi iaculis sit amet risus at iaculis. Praesent eget ligula quis turpis \
2474 feugiat suscipit vel non arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. \
2475 Aliquam sit amet placerat lorem.
2477 Cras a lacus vel ante posuere elementum. Nunc est leo, bibendum ut facilisis vel, bibendum at \
2478 mauris. Nullam adipiscing diam vel odio ornare, luctus adipiscing mi luctus. Nulla facilisi. \
2479 Mauris adipiscing bibendum neque, quis adipiscing lectus tempus et. Sed feugiat erat et nisl \
2480 lobortis pharetra. Donec vitae erat enim. Nullam sit amet felis et quam lacinia tincidunt. Aliquam \
2481 suscipit dapibus urna. Sed volutpat urna in magna pulvinar volutpat. Phasellus nec tellus ac diam \
2484 Nam lectus enim, dapibus non nisi tempor, consectetur convallis massa. Maecenas eleifend dictum \
2485 feugiat. Etiam quis mauris vel risus luctus mattis a a nunc. Nullam orci quam, imperdiet id \
2486 vehicula in, porttitor ut nibh. Duis sagittis adipiscing nisl vitae congue. Donec mollis risus eu \
2487 leo suscipit, varius porttitor nulla porta. Pellentesque ut sem nec nisi euismod vehicula. Nulla \
2488 malesuada sollicitudin quam eu fermentum.";
2489 let needle = "english";
2492 assert!(!haystack.contains(needle));
2497 fn bench_contains_bad_naive(b: &mut Bencher) {
2498 let haystack = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
2499 let needle = "aaaaaaaab";
2502 assert!(!haystack.contains(needle));
2507 fn bench_contains_equal(b: &mut Bencher) {
2508 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2509 let needle = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2512 assert!(haystack.contains(needle));