1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 // ignore-lexer-test FIXME #15679
15 Unicode string manipulation (`str` type)
19 Rust's string type is one of the core primitive types of the language. While
20 represented by the name `str`, the name `str` is not actually a valid type in
21 Rust. Each string must also be decorated with a pointer. `String` is used
22 for an owned string, so there is only one commonly-used `str` type in Rust:
25 `&str` is the borrowed string type. This type of string can only be created
26 from other strings, unless it is a static string (see below). As the word
27 "borrowed" implies, this type of string is owned elsewhere, and this string
28 cannot be moved out of.
30 As an example, here's some code that uses a string.
34 let borrowed_string = "This string is borrowed with the 'static lifetime";
38 From the example above, you can see that Rust's string literals have the
39 `'static` lifetime. This is akin to C's concept of a static string.
41 String literals are allocated statically in the rodata of the
42 executable/library. The string then has the type `&'static str` meaning that
43 the string is valid for the `'static` lifetime, otherwise known as the
44 lifetime of the entire program. As can be inferred from the type, these static
45 strings are not mutable.
49 Many languages have immutable strings by default, and Rust has a particular
50 flavor on this idea. As with the rest of Rust types, strings are immutable by
51 default. If a string is declared as `mut`, however, it may be mutated. This
52 works the same way as the rest of Rust's type system in the sense that if
53 there's a mutable reference to a string, there may only be one mutable reference
54 to that string. With these guarantees, strings can easily transition between
55 being mutable/immutable with the same benefits of having mutable strings in
60 Rust's string type, `str`, is a sequence of unicode scalar values encoded as a
61 stream of UTF-8 bytes. All strings are guaranteed to be validly encoded UTF-8
62 sequences. Additionally, strings are not null-terminated and can contain null
65 The actual representation of strings have direct mappings to vectors: `&str`
66 is the same as `&[u8]`.
70 #![doc(primitive = "str")]
74 use core::default::Default;
77 use core::iter::AdditiveIterator;
80 use {Collection, MutableSeq};
86 pub use core::str::{from_utf8, CharEq, Chars, CharOffsets};
87 pub use core::str::{Bytes, CharSplits};
88 pub use core::str::{CharSplitsN, AnyLines, MatchIndices, StrSplits};
89 pub use core::str::{eq_slice, is_utf8, is_utf16, Utf16Items};
90 pub use core::str::{Utf16Item, ScalarValue, LoneSurrogate, utf16_items};
91 pub use core::str::{truncate_utf16_at_nul, utf8_char_width, CharRange};
92 pub use core::str::{Str, StrSlice};
93 pub use unicode::str::{UnicodeStrSlice, Words, Graphemes, GraphemeIndices};
96 Section: Creating a string
99 /// Deprecated. Replaced by `String::from_utf8`
100 #[deprecated = "Replaced by `String::from_utf8`"]
101 pub fn from_utf8_owned(vv: Vec<u8>) -> Result<String, Vec<u8>> {
102 String::from_utf8(vv)
105 /// Deprecated. Replaced by `String::from_byte`
106 #[deprecated = "Replaced by String::from_byte"]
107 pub fn from_byte(b: u8) -> String {
109 String::from_char(1, b as char)
112 /// Deprecated. Use `String::from_char` or `char::to_string()` instead
113 #[deprecated = "use String::from_char or char.to_string()"]
114 pub fn from_char(ch: char) -> String {
115 String::from_char(1, ch)
118 /// Deprecated. Replaced by `String::from_chars`
119 #[deprecated = "use String::from_chars instead"]
120 pub fn from_chars(chs: &[char]) -> String {
121 chs.iter().map(|c| *c).collect()
124 /// Methods for vectors of strings
125 pub trait StrVector {
126 /// Concatenate a vector of strings.
131 /// let first = "Restaurant at the End of the".to_string();
132 /// let second = " Universe".to_string();
133 /// let string_vec = vec![first, second];
134 /// assert_eq!(string_vec.concat(), "Restaurant at the End of the Universe".to_string());
136 fn concat(&self) -> String;
138 /// Concatenate a vector of strings, placing a given separator between each.
143 /// let first = "Roast".to_string();
144 /// let second = "Sirloin Steak".to_string();
145 /// let string_vec = vec![first, second];
146 /// assert_eq!(string_vec.connect(", "), "Roast, Sirloin Steak".to_string());
148 fn connect(&self, sep: &str) -> String;
151 impl<'a, S: Str> StrVector for &'a [S] {
152 fn concat(&self) -> String {
154 return String::new();
157 // `len` calculation may overflow but push_str will check boundaries
158 let len = self.iter().map(|s| s.as_slice().len()).sum();
160 let mut result = String::with_capacity(len);
162 for s in self.iter() {
163 result.push_str(s.as_slice())
169 fn connect(&self, sep: &str) -> String {
171 return String::new();
176 return self.concat();
179 // this is wrong without the guarantee that `self` is non-empty
180 // `len` calculation may overflow but push_str but will check boundaries
181 let len = sep.len() * (self.len() - 1)
182 + self.iter().map(|s| s.as_slice().len()).sum();
183 let mut result = String::with_capacity(len);
184 let mut first = true;
186 for s in self.iter() {
190 result.push_str(sep);
192 result.push_str(s.as_slice());
198 impl<'a, S: Str> StrVector for Vec<S> {
200 fn concat(&self) -> String {
201 self.as_slice().concat()
205 fn connect(&self, sep: &str) -> String {
206 self.as_slice().connect(sep)
214 // Helper functions used for Unicode normalization
215 fn canonical_sort(comb: &mut [(char, u8)]) {
216 let len = comb.len();
217 for i in range(0, len) {
218 let mut swapped = false;
219 for j in range(1, len-i) {
220 let class_a = *comb[j-1].ref1();
221 let class_b = *comb[j].ref1();
222 if class_a != 0 && class_b != 0 && class_a > class_b {
227 if !swapped { break; }
232 enum DecompositionType {
237 /// External iterator for a string's decomposition's characters.
238 /// Use with the `std::iter` module.
240 pub struct Decompositions<'a> {
241 kind: DecompositionType,
243 buffer: Vec<(char, u8)>,
247 impl<'a> Iterator<char> for Decompositions<'a> {
249 fn next(&mut self) -> Option<char> {
250 match self.buffer.as_slice().head() {
256 Some(&(c, _)) if self.sorted => {
260 _ => self.sorted = false
263 let decomposer = match self.kind {
264 Canonical => unicode::char::decompose_canonical,
265 Compatible => unicode::char::decompose_compatible
269 for ch in self.iter {
270 let buffer = &mut self.buffer;
271 let sorted = &mut self.sorted;
273 let class = unicode::char::canonical_combining_class(d);
274 if class == 0 && !*sorted {
275 canonical_sort(buffer.as_mut_slice());
278 buffer.push((d, class));
285 canonical_sort(self.buffer.as_mut_slice());
289 match self.buffer.shift() {
294 Some((c, _)) => Some(c),
299 fn size_hint(&self) -> (uint, Option<uint>) {
300 let (lower, _) = self.iter.size_hint();
305 /// Replace all occurrences of one string with another
309 /// * s - The string containing substrings to replace
310 /// * from - The string to replace
311 /// * to - The replacement string
315 /// The original string with all occurrences of `from` replaced with `to`
321 /// let string = "orange";
322 /// let new_string = str::replace(string, "or", "str");
323 /// assert_eq!(new_string.as_slice(), "strange");
325 pub fn replace(s: &str, from: &str, to: &str) -> String {
326 let mut result = String::new();
327 let mut last_end = 0;
328 for (start, end) in s.match_indices(from) {
329 result.push_str(unsafe{raw::slice_bytes(s, last_end, start)});
333 result.push_str(unsafe{raw::slice_bytes(s, last_end, s.len())});
341 /// Deprecated. Use `String::from_utf16`.
342 #[deprecated = "Replaced by String::from_utf16"]
343 pub fn from_utf16(v: &[u16]) -> Option<String> {
344 String::from_utf16(v)
347 /// Deprecated. Use `String::from_utf16_lossy`.
348 #[deprecated = "Replaced by String::from_utf16_lossy"]
349 pub fn from_utf16_lossy(v: &[u16]) -> String {
350 String::from_utf16_lossy(v)
353 // Return the initial codepoint accumulator for the first byte.
354 // The first byte is special, only want bottom 5 bits for width 2, 4 bits
355 // for width 3, and 3 bits for width 4
356 macro_rules! utf8_first_byte(
357 ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as u32)
360 // return the value of $ch updated with continuation byte $byte
361 macro_rules! utf8_acc_cont_byte(
362 ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as u32)
365 /// Deprecated. Use `String::from_utf8_lossy`.
366 #[deprecated = "Replaced by String::from_utf8_lossy"]
367 pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> MaybeOwned<'a> {
368 String::from_utf8_lossy(v)
375 /// A `MaybeOwned` is a string that can hold either a `String` or a `&str`.
376 /// This can be useful as an optimization when an allocation is sometimes
377 /// needed but not always.
378 pub enum MaybeOwned<'a> {
379 /// A borrowed string
385 /// `SendStr` is a specialization of `MaybeOwned` to be sendable
386 pub type SendStr = MaybeOwned<'static>;
388 impl<'a> MaybeOwned<'a> {
389 /// Returns `true` if this `MaybeOwned` wraps an owned string
394 /// let string = String::from_str("orange");
395 /// let maybe_owned_string = string.into_maybe_owned();
396 /// assert_eq!(true, maybe_owned_string.is_owned());
399 pub fn is_owned(&self) -> bool {
406 /// Returns `true` if this `MaybeOwned` wraps a borrowed string
411 /// let string = "orange";
412 /// let maybe_owned_string = string.as_slice().into_maybe_owned();
413 /// assert_eq!(true, maybe_owned_string.is_slice());
416 pub fn is_slice(&self) -> bool {
424 /// Trait for moving into a `MaybeOwned`
425 pub trait IntoMaybeOwned<'a> {
426 /// Moves self into a `MaybeOwned`
427 fn into_maybe_owned(self) -> MaybeOwned<'a>;
433 /// let owned_string = String::from_str("orange");
434 /// let maybe_owned_string = owned_string.into_maybe_owned();
435 /// assert_eq!(true, maybe_owned_string.is_owned());
437 impl<'a> IntoMaybeOwned<'a> for String {
439 fn into_maybe_owned(self) -> MaybeOwned<'a> {
447 /// let string = "orange";
448 /// let maybe_owned_str = string.as_slice().into_maybe_owned();
449 /// assert_eq!(false, maybe_owned_str.is_owned());
451 impl<'a> IntoMaybeOwned<'a> for &'a str {
453 fn into_maybe_owned(self) -> MaybeOwned<'a> { Slice(self) }
459 /// let str = "orange";
460 /// let maybe_owned_str = str.as_slice().into_maybe_owned();
461 /// let maybe_maybe_owned_str = maybe_owned_str.into_maybe_owned();
462 /// assert_eq!(false, maybe_maybe_owned_str.is_owned());
464 impl<'a> IntoMaybeOwned<'a> for MaybeOwned<'a> {
466 fn into_maybe_owned(self) -> MaybeOwned<'a> { self }
469 impl<'a> PartialEq for MaybeOwned<'a> {
471 fn eq(&self, other: &MaybeOwned) -> bool {
472 self.as_slice() == other.as_slice()
476 impl<'a> Eq for MaybeOwned<'a> {}
478 impl<'a> PartialOrd for MaybeOwned<'a> {
480 fn partial_cmp(&self, other: &MaybeOwned) -> Option<Ordering> {
481 Some(self.cmp(other))
485 impl<'a> Ord for MaybeOwned<'a> {
487 fn cmp(&self, other: &MaybeOwned) -> Ordering {
488 self.as_slice().cmp(&other.as_slice())
492 impl<'a, S: Str> Equiv<S> for MaybeOwned<'a> {
494 fn equiv(&self, other: &S) -> bool {
495 self.as_slice() == other.as_slice()
499 impl<'a> Str for MaybeOwned<'a> {
501 fn as_slice<'b>(&'b self) -> &'b str {
504 Owned(ref s) => s.as_slice()
509 impl<'a> StrAllocating for MaybeOwned<'a> {
511 fn into_string(self) -> String {
513 Slice(s) => String::from_str(s),
519 impl<'a> Collection for MaybeOwned<'a> {
521 fn len(&self) -> uint { self.as_slice().len() }
524 impl<'a> Clone for MaybeOwned<'a> {
526 fn clone(&self) -> MaybeOwned<'a> {
528 Slice(s) => Slice(s),
529 Owned(ref s) => Owned(String::from_str(s.as_slice()))
534 impl<'a> Default for MaybeOwned<'a> {
536 fn default() -> MaybeOwned<'a> { Slice("") }
539 impl<'a, H: hash::Writer> hash::Hash<H> for MaybeOwned<'a> {
541 fn hash(&self, hasher: &mut H) {
542 self.as_slice().hash(hasher)
546 impl<'a> fmt::Show for MaybeOwned<'a> {
548 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
550 Slice(ref s) => s.fmt(f),
551 Owned(ref s) => s.fmt(f)
556 /// Unsafe operations
558 use core::prelude::*;
560 use core::raw::Slice;
567 pub use core::str::raw::{from_utf8, c_str_to_static_slice, slice_bytes};
568 pub use core::str::raw::{slice_unchecked};
570 /// Create a Rust string from a *u8 buffer of the given length
571 pub unsafe fn from_buf_len(buf: *const u8, len: uint) -> String {
572 let mut result = String::new();
573 result.push_bytes(mem::transmute(Slice {
580 /// Create a Rust string from a null-terminated C string
581 pub unsafe fn from_c_str(c_string: *const i8) -> String {
582 let mut buf = String::new();
584 while *c_string.offset(len) != 0 {
587 buf.push_bytes(mem::transmute(Slice {
594 /// Converts an owned vector of bytes to a new owned string. This assumes
595 /// that the utf-8-ness of the vector has already been validated
597 pub unsafe fn from_utf8_owned(v: Vec<u8>) -> String {
601 /// Converts a byte to a string.
602 pub unsafe fn from_byte(u: u8) -> String {
603 from_utf8_owned(vec![u])
607 fn test_from_buf_len() {
608 use slice::ImmutableVector;
611 let a = vec![65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8];
613 let c = from_buf_len(b, 3u);
614 assert_eq!(c, String::from_str("AAA"));
620 Section: Trait implementations
623 /// Any string that can be represented as a slice
624 pub trait StrAllocating: Str {
625 /// Convert `self` into a `String`, not making a copy if possible.
626 fn into_string(self) -> String;
628 #[allow(missing_doc)]
629 #[deprecated = "replaced by .into_string()"]
630 fn into_owned(self) -> String {
634 /// Escape each char in `s` with `char::escape_default`.
635 fn escape_default(&self) -> String {
636 let me = self.as_slice();
637 let mut out = String::with_capacity(me.len());
638 for c in me.chars() {
639 c.escape_default(|c| out.push_char(c));
644 /// Escape each char in `s` with `char::escape_unicode`.
645 fn escape_unicode(&self) -> String {
646 let me = self.as_slice();
647 let mut out = String::with_capacity(me.len());
648 for c in me.chars() {
649 c.escape_unicode(|c| out.push_char(c));
654 /// Replace all occurrences of one string with another.
658 /// * `from` - The string to replace
659 /// * `to` - The replacement string
663 /// The original string with all occurrences of `from` replaced with `to`.
668 /// let s = "Do you know the muffin man,
669 /// The muffin man, the muffin man, ...".to_string();
671 /// assert_eq!(s.replace("muffin man", "little lamb"),
672 /// "Do you know the little lamb,
673 /// The little lamb, the little lamb, ...".to_string());
675 /// // not found, so no change.
676 /// assert_eq!(s.replace("cookie monster", "little lamb"), s);
678 fn replace(&self, from: &str, to: &str) -> String {
679 let me = self.as_slice();
680 let mut result = String::new();
681 let mut last_end = 0;
682 for (start, end) in me.match_indices(from) {
683 result.push_str(unsafe{raw::slice_bytes(me, last_end, start)});
687 result.push_str(unsafe{raw::slice_bytes(me, last_end, me.len())});
691 #[allow(missing_doc)]
692 #[deprecated = "obsolete, use `to_string`"]
694 fn to_owned(&self) -> String {
696 mem::transmute(Vec::from_slice(self.as_slice().as_bytes()))
700 /// Converts to a vector of `u16` encoded as UTF-16.
701 #[deprecated = "use `utf16_units` instead"]
702 fn to_utf16(&self) -> Vec<u16> {
703 self.as_slice().utf16_units().collect::<Vec<u16>>()
706 /// Given a string, make a new string with repeated copies of it.
707 fn repeat(&self, nn: uint) -> String {
708 let me = self.as_slice();
709 let mut ret = String::with_capacity(nn * me.len());
710 for _ in range(0, nn) {
716 /// Levenshtein Distance between two strings.
717 fn lev_distance(&self, t: &str) -> uint {
718 let me = self.as_slice();
722 if slen == 0 { return tlen; }
723 if tlen == 0 { return slen; }
725 let mut dcol = Vec::from_fn(tlen + 1, |x| x);
727 for (i, sc) in me.chars().enumerate() {
730 *dcol.get_mut(0) = current + 1;
732 for (j, tc) in t.chars().enumerate() {
734 let next = *dcol.get(j + 1);
737 *dcol.get_mut(j + 1) = current;
739 *dcol.get_mut(j + 1) = cmp::min(current, next);
740 *dcol.get_mut(j + 1) = cmp::min(*dcol.get(j + 1),
748 return *dcol.get(tlen);
751 /// An Iterator over the string in Unicode Normalization Form D
752 /// (canonical decomposition).
754 fn nfd_chars<'a>(&'a self) -> Decompositions<'a> {
756 iter: self.as_slice().chars(),
763 /// An Iterator over the string in Unicode Normalization Form KD
764 /// (compatibility decomposition).
766 fn nfkd_chars<'a>(&'a self) -> Decompositions<'a> {
768 iter: self.as_slice().chars(),
776 impl<'a> StrAllocating for &'a str {
778 fn into_string(self) -> String {
779 String::from_str(self)
785 use std::iter::AdditiveIterator;
786 use std::iter::range;
787 use std::default::Default;
789 use std::clone::Clone;
790 use std::cmp::{Equal, Greater, Less, Ord, PartialOrd, Equiv};
791 use std::option::{Some, None};
792 use std::ptr::RawPtr;
793 use std::iter::{Iterator, DoubleEndedIterator};
794 use {Collection, MutableSeq};
797 use std::slice::{Vector, ImmutableVector};
801 use unicode::char::UnicodeChar;
805 assert!((eq_slice("foobar".slice(0, 3), "foo")));
806 assert!((eq_slice("barfoo".slice(3, 6), "foo")));
807 assert!((!eq_slice("foo1", "foo2")));
813 assert!("" <= "foo");
814 assert!("foo" <= "foo");
815 assert!("foo" != "bar");
820 assert_eq!("".len(), 0u);
821 assert_eq!("hello world".len(), 11u);
822 assert_eq!("\x63".len(), 1u);
823 assert_eq!("\xa2".len(), 2u);
824 assert_eq!("\u03c0".len(), 2u);
825 assert_eq!("\u2620".len(), 3u);
826 assert_eq!("\U0001d11e".len(), 4u);
828 assert_eq!("".char_len(), 0u);
829 assert_eq!("hello world".char_len(), 11u);
830 assert_eq!("\x63".char_len(), 1u);
831 assert_eq!("\xa2".char_len(), 1u);
832 assert_eq!("\u03c0".char_len(), 1u);
833 assert_eq!("\u2620".char_len(), 1u);
834 assert_eq!("\U0001d11e".char_len(), 1u);
835 assert_eq!("ประเทศไทย中华Việt Nam".char_len(), 19u);
837 assert_eq!("hello".width(false), 10u);
838 assert_eq!("hello".width(true), 10u);
839 assert_eq!("\0\0\0\0\0".width(false), 0u);
840 assert_eq!("\0\0\0\0\0".width(true), 0u);
841 assert_eq!("".width(false), 0u);
842 assert_eq!("".width(true), 0u);
843 assert_eq!("\u2081\u2082\u2083\u2084".width(false), 4u);
844 assert_eq!("\u2081\u2082\u2083\u2084".width(true), 8u);
849 assert_eq!("hello".find('l'), Some(2u));
850 assert_eq!("hello".find(|c:char| c == 'o'), Some(4u));
851 assert!("hello".find('x').is_none());
852 assert!("hello".find(|c:char| c == 'x').is_none());
853 assert_eq!("ประเทศไทย中华Việt Nam".find('华'), Some(30u));
854 assert_eq!("ประเทศไทย中华Việt Nam".find(|c: char| c == '华'), Some(30u));
859 assert_eq!("hello".rfind('l'), Some(3u));
860 assert_eq!("hello".rfind(|c:char| c == 'o'), Some(4u));
861 assert!("hello".rfind('x').is_none());
862 assert!("hello".rfind(|c:char| c == 'x').is_none());
863 assert_eq!("ประเทศไทย中华Việt Nam".rfind('华'), Some(30u));
864 assert_eq!("ประเทศไทย中华Việt Nam".rfind(|c: char| c == '华'), Some(30u));
869 let empty = String::from_str("");
870 let s: String = empty.as_slice().chars().collect();
871 assert_eq!(empty, s);
872 let data = String::from_str("ประเทศไทย中");
873 let s: String = data.as_slice().chars().collect();
878 fn test_into_bytes() {
879 let data = String::from_str("asdf");
880 let buf = data.into_bytes();
881 assert_eq!(b"asdf", buf.as_slice());
887 assert_eq!("".find_str(""), Some(0u));
888 assert!("banana".find_str("apple pie").is_none());
891 assert_eq!(data.slice(0u, 6u).find_str("ab"), Some(0u));
892 assert_eq!(data.slice(2u, 6u).find_str("ab"), Some(3u - 2u));
893 assert!(data.slice(2u, 4u).find_str("ab").is_none());
895 let string = "ประเทศไทย中华Việt Nam";
896 let mut data = String::from_str(string);
897 data.push_str(string);
898 assert!(data.as_slice().find_str("ไท华").is_none());
899 assert_eq!(data.as_slice().slice(0u, 43u).find_str(""), Some(0u));
900 assert_eq!(data.as_slice().slice(6u, 43u).find_str(""), Some(6u - 6u));
902 assert_eq!(data.as_slice().slice(0u, 43u).find_str("ประ"), Some( 0u));
903 assert_eq!(data.as_slice().slice(0u, 43u).find_str("ทศไ"), Some(12u));
904 assert_eq!(data.as_slice().slice(0u, 43u).find_str("ย中"), Some(24u));
905 assert_eq!(data.as_slice().slice(0u, 43u).find_str("iệt"), Some(34u));
906 assert_eq!(data.as_slice().slice(0u, 43u).find_str("Nam"), Some(40u));
908 assert_eq!(data.as_slice().slice(43u, 86u).find_str("ประ"), Some(43u - 43u));
909 assert_eq!(data.as_slice().slice(43u, 86u).find_str("ทศไ"), Some(55u - 43u));
910 assert_eq!(data.as_slice().slice(43u, 86u).find_str("ย中"), Some(67u - 43u));
911 assert_eq!(data.as_slice().slice(43u, 86u).find_str("iệt"), Some(77u - 43u));
912 assert_eq!(data.as_slice().slice(43u, 86u).find_str("Nam"), Some(83u - 43u));
916 fn test_slice_chars() {
917 fn t(a: &str, b: &str, start: uint) {
918 assert_eq!(a.slice_chars(start, start + b.char_len()), b);
921 t("hello", "llo", 2);
925 assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".slice_chars(2, 8));
930 fn t(v: &[String], s: &str) {
931 assert_eq!(v.concat().as_slice(), s);
933 t([String::from_str("you"), String::from_str("know"),
934 String::from_str("I'm"),
935 String::from_str("no"), String::from_str("good")],
937 let v: &[String] = [];
939 t([String::from_str("hi")], "hi");
944 fn t(v: &[String], sep: &str, s: &str) {
945 assert_eq!(v.connect(sep).as_slice(), s);
947 t([String::from_str("you"), String::from_str("know"),
948 String::from_str("I'm"),
949 String::from_str("no"), String::from_str("good")],
950 " ", "you know I'm no good");
951 let v: &[String] = [];
953 t([String::from_str("hi")], " ", "hi");
957 fn test_concat_slices() {
958 fn t(v: &[&str], s: &str) {
959 assert_eq!(v.concat().as_slice(), s);
961 t(["you", "know", "I'm", "no", "good"], "youknowI'mnogood");
968 fn test_connect_slices() {
969 fn t(v: &[&str], sep: &str, s: &str) {
970 assert_eq!(v.connect(sep).as_slice(), s);
972 t(["you", "know", "I'm", "no", "good"],
973 " ", "you know I'm no good");
975 t(["hi"], " ", "hi");
980 assert_eq!("x".repeat(4), String::from_str("xxxx"));
981 assert_eq!("hi".repeat(4), String::from_str("hihihihi"));
982 assert_eq!("ไท华".repeat(3), String::from_str("ไท华ไท华ไท华"));
983 assert_eq!("".repeat(4), String::from_str(""));
984 assert_eq!("hi".repeat(0), String::from_str(""));
988 fn test_unsafe_slice() {
989 assert_eq!("ab", unsafe {raw::slice_bytes("abc", 0, 2)});
990 assert_eq!("bc", unsafe {raw::slice_bytes("abc", 1, 3)});
991 assert_eq!("", unsafe {raw::slice_bytes("abc", 1, 1)});
992 fn a_million_letter_a() -> String {
994 let mut rs = String::new();
996 rs.push_str("aaaaaaaaaa");
1001 fn half_a_million_letter_a() -> String {
1003 let mut rs = String::new();
1005 rs.push_str("aaaaa");
1010 let letters = a_million_letter_a();
1011 assert!(half_a_million_letter_a() ==
1012 unsafe {String::from_str(raw::slice_bytes(letters.as_slice(),
1018 fn test_starts_with() {
1019 assert!(("".starts_with("")));
1020 assert!(("abc".starts_with("")));
1021 assert!(("abc".starts_with("a")));
1022 assert!((!"a".starts_with("abc")));
1023 assert!((!"".starts_with("abc")));
1024 assert!((!"ödd".starts_with("-")));
1025 assert!(("ödd".starts_with("öd")));
1029 fn test_ends_with() {
1030 assert!(("".ends_with("")));
1031 assert!(("abc".ends_with("")));
1032 assert!(("abc".ends_with("c")));
1033 assert!((!"a".ends_with("abc")));
1034 assert!((!"".ends_with("abc")));
1035 assert!((!"ddö".ends_with("-")));
1036 assert!(("ddö".ends_with("dö")));
1040 fn test_is_empty() {
1041 assert!("".is_empty());
1042 assert!(!"a".is_empty());
1048 assert_eq!("".replace(a, "b"), String::from_str(""));
1049 assert_eq!("a".replace(a, "b"), String::from_str("b"));
1050 assert_eq!("ab".replace(a, "b"), String::from_str("bb"));
1052 assert!(" test test ".replace(test, "toast") ==
1053 String::from_str(" toast toast "));
1054 assert_eq!(" test test ".replace(test, ""), String::from_str(" "));
1058 fn test_replace_2a() {
1059 let data = "ประเทศไทย中华";
1060 let repl = "دولة الكويت";
1063 let a2 = "دولة الكويتทศไทย中华";
1064 assert_eq!(data.replace(a, repl).as_slice(), a2);
1068 fn test_replace_2b() {
1069 let data = "ประเทศไทย中华";
1070 let repl = "دولة الكويت";
1073 let b2 = "ปรدولة الكويتทศไทย中华";
1074 assert_eq!(data.replace(b, repl).as_slice(), b2);
1078 fn test_replace_2c() {
1079 let data = "ประเทศไทย中华";
1080 let repl = "دولة الكويت";
1083 let c2 = "ประเทศไทยدولة الكويت";
1084 assert_eq!(data.replace(c, repl).as_slice(), c2);
1088 fn test_replace_2d() {
1089 let data = "ประเทศไทย中华";
1090 let repl = "دولة الكويت";
1093 assert_eq!(data.replace(d, repl).as_slice(), data);
1098 assert_eq!("ab", "abc".slice(0, 2));
1099 assert_eq!("bc", "abc".slice(1, 3));
1100 assert_eq!("", "abc".slice(1, 1));
1101 assert_eq!("\u65e5", "\u65e5\u672c".slice(0, 3));
1103 let data = "ประเทศไทย中华";
1104 assert_eq!("ป", data.slice(0, 3));
1105 assert_eq!("ร", data.slice(3, 6));
1106 assert_eq!("", data.slice(3, 3));
1107 assert_eq!("华", data.slice(30, 33));
1109 fn a_million_letter_x() -> String {
1111 let mut rs = String::new();
1113 rs.push_str("华华华华华华华华华华");
1118 fn half_a_million_letter_x() -> String {
1120 let mut rs = String::new();
1122 rs.push_str("华华华华华");
1127 let letters = a_million_letter_x();
1128 assert!(half_a_million_letter_x() ==
1129 String::from_str(letters.as_slice().slice(0u, 3u * 500000u)));
1134 let ss = "中华Việt Nam";
1136 assert_eq!("华", ss.slice(3u, 6u));
1137 assert_eq!("Việt Nam", ss.slice(6u, 16u));
1139 assert_eq!("ab", "abc".slice(0u, 2u));
1140 assert_eq!("bc", "abc".slice(1u, 3u));
1141 assert_eq!("", "abc".slice(1u, 1u));
1143 assert_eq!("中", ss.slice(0u, 3u));
1144 assert_eq!("华V", ss.slice(3u, 7u));
1145 assert_eq!("", ss.slice(3u, 3u));
1160 fn test_slice_fail() {
1161 "中华Việt Nam".slice(0u, 2u);
1165 fn test_slice_from() {
1166 assert_eq!("abcd".slice_from(0), "abcd");
1167 assert_eq!("abcd".slice_from(2), "cd");
1168 assert_eq!("abcd".slice_from(4), "");
1171 fn test_slice_to() {
1172 assert_eq!("abcd".slice_to(0), "");
1173 assert_eq!("abcd".slice_to(2), "ab");
1174 assert_eq!("abcd".slice_to(4), "abcd");
1178 fn test_trim_left_chars() {
1179 let v: &[char] = &[];
1180 assert_eq!(" *** foo *** ".trim_left_chars(v), " *** foo *** ");
1181 assert_eq!(" *** foo *** ".trim_left_chars(&['*', ' ']), "foo *** ");
1182 assert_eq!(" *** *** ".trim_left_chars(&['*', ' ']), "");
1183 assert_eq!("foo *** ".trim_left_chars(&['*', ' ']), "foo *** ");
1185 assert_eq!("11foo1bar11".trim_left_chars('1'), "foo1bar11");
1186 assert_eq!("12foo1bar12".trim_left_chars(&['1', '2']), "foo1bar12");
1187 assert_eq!("123foo1bar123".trim_left_chars(|c: char| c.is_digit()), "foo1bar123");
1191 fn test_trim_right_chars() {
1192 let v: &[char] = &[];
1193 assert_eq!(" *** foo *** ".trim_right_chars(v), " *** foo *** ");
1194 assert_eq!(" *** foo *** ".trim_right_chars(&['*', ' ']), " *** foo");
1195 assert_eq!(" *** *** ".trim_right_chars(&['*', ' ']), "");
1196 assert_eq!(" *** foo".trim_right_chars(&['*', ' ']), " *** foo");
1198 assert_eq!("11foo1bar11".trim_right_chars('1'), "11foo1bar");
1199 assert_eq!("12foo1bar12".trim_right_chars(&['1', '2']), "12foo1bar");
1200 assert_eq!("123foo1bar123".trim_right_chars(|c: char| c.is_digit()), "123foo1bar");
1204 fn test_trim_chars() {
1205 let v: &[char] = &[];
1206 assert_eq!(" *** foo *** ".trim_chars(v), " *** foo *** ");
1207 assert_eq!(" *** foo *** ".trim_chars(&['*', ' ']), "foo");
1208 assert_eq!(" *** *** ".trim_chars(&['*', ' ']), "");
1209 assert_eq!("foo".trim_chars(&['*', ' ']), "foo");
1211 assert_eq!("11foo1bar11".trim_chars('1'), "foo1bar");
1212 assert_eq!("12foo1bar12".trim_chars(&['1', '2']), "foo1bar");
1213 assert_eq!("123foo1bar123".trim_chars(|c: char| c.is_digit()), "foo1bar");
1217 fn test_trim_left() {
1218 assert_eq!("".trim_left(), "");
1219 assert_eq!("a".trim_left(), "a");
1220 assert_eq!(" ".trim_left(), "");
1221 assert_eq!(" blah".trim_left(), "blah");
1222 assert_eq!(" \u3000 wut".trim_left(), "wut");
1223 assert_eq!("hey ".trim_left(), "hey ");
1227 fn test_trim_right() {
1228 assert_eq!("".trim_right(), "");
1229 assert_eq!("a".trim_right(), "a");
1230 assert_eq!(" ".trim_right(), "");
1231 assert_eq!("blah ".trim_right(), "blah");
1232 assert_eq!("wut \u3000 ".trim_right(), "wut");
1233 assert_eq!(" hey".trim_right(), " hey");
1238 assert_eq!("".trim(), "");
1239 assert_eq!("a".trim(), "a");
1240 assert_eq!(" ".trim(), "");
1241 assert_eq!(" blah ".trim(), "blah");
1242 assert_eq!("\nwut \u3000 ".trim(), "wut");
1243 assert_eq!(" hey dude ".trim(), "hey dude");
1247 fn test_is_whitespace() {
1248 assert!("".is_whitespace());
1249 assert!(" ".is_whitespace());
1250 assert!("\u2009".is_whitespace()); // Thin space
1251 assert!(" \n\t ".is_whitespace());
1252 assert!(!" _ ".is_whitespace());
1256 fn test_slice_shift_char() {
1257 let data = "ประเทศไทย中";
1258 assert_eq!(data.slice_shift_char(), (Some('ป'), "ระเทศไทย中"));
1262 fn test_slice_shift_char_2() {
1264 assert_eq!(empty.slice_shift_char(), (None, ""));
1269 // deny overlong encodings
1270 assert!(!is_utf8([0xc0, 0x80]));
1271 assert!(!is_utf8([0xc0, 0xae]));
1272 assert!(!is_utf8([0xe0, 0x80, 0x80]));
1273 assert!(!is_utf8([0xe0, 0x80, 0xaf]));
1274 assert!(!is_utf8([0xe0, 0x81, 0x81]));
1275 assert!(!is_utf8([0xf0, 0x82, 0x82, 0xac]));
1276 assert!(!is_utf8([0xf4, 0x90, 0x80, 0x80]));
1279 assert!(!is_utf8([0xED, 0xA0, 0x80]));
1280 assert!(!is_utf8([0xED, 0xBF, 0xBF]));
1282 assert!(is_utf8([0xC2, 0x80]));
1283 assert!(is_utf8([0xDF, 0xBF]));
1284 assert!(is_utf8([0xE0, 0xA0, 0x80]));
1285 assert!(is_utf8([0xED, 0x9F, 0xBF]));
1286 assert!(is_utf8([0xEE, 0x80, 0x80]));
1287 assert!(is_utf8([0xEF, 0xBF, 0xBF]));
1288 assert!(is_utf8([0xF0, 0x90, 0x80, 0x80]));
1289 assert!(is_utf8([0xF4, 0x8F, 0xBF, 0xBF]));
1293 fn test_is_utf16() {
1294 macro_rules! pos ( ($($e:expr),*) => { { $(assert!(is_utf16($e));)* } });
1302 // surrogate pairs (randomly generated with Python 3's
1303 // .encode('utf-16be'))
1304 pos!([0xdb54, 0xdf16, 0xd880, 0xdee0, 0xdb6a, 0xdd45],
1305 [0xd91f, 0xdeb1, 0xdb31, 0xdd84, 0xd8e2, 0xde14],
1306 [0xdb9f, 0xdc26, 0xdb6f, 0xde58, 0xd850, 0xdfae]);
1308 // mixtures (also random)
1309 pos!([0xd921, 0xdcc2, 0x002d, 0x004d, 0xdb32, 0xdf65],
1310 [0xdb45, 0xdd2d, 0x006a, 0xdacd, 0xddfe, 0x0006],
1311 [0x0067, 0xd8ff, 0xddb7, 0x000f, 0xd900, 0xdc80]);
1314 macro_rules! neg ( ($($e:expr),*) => { { $(assert!(!is_utf16($e));)* } });
1317 // surrogate + regular unit
1319 // surrogate + lead surrogate
1321 // unterminated surrogate
1323 // trail surrogate without a lead
1326 // random byte sequences that Python 3's .decode('utf-16be')
1328 neg!([0x5b3d, 0x0141, 0xde9e, 0x8fdc, 0xc6e7],
1329 [0xdf5a, 0x82a5, 0x62b9, 0xb447, 0x92f3],
1330 [0xda4e, 0x42bc, 0x4462, 0xee98, 0xc2ca],
1331 [0xbe00, 0xb04a, 0x6ecb, 0xdd89, 0xe278],
1332 [0x0465, 0xab56, 0xdbb6, 0xa893, 0x665e],
1333 [0x6b7f, 0x0a19, 0x40f4, 0xa657, 0xdcc5],
1334 [0x9b50, 0xda5e, 0x24ec, 0x03ad, 0x6dee],
1335 [0x8d17, 0xcaa7, 0xf4ae, 0xdf6e, 0xbed7],
1336 [0xdaee, 0x2584, 0x7d30, 0xa626, 0x121a],
1337 [0xd956, 0x4b43, 0x7570, 0xccd6, 0x4f4a],
1338 [0x9dcf, 0x1b49, 0x4ba5, 0xfce9, 0xdffe],
1339 [0x6572, 0xce53, 0xb05a, 0xf6af, 0xdacf],
1340 [0x1b90, 0x728c, 0x9906, 0xdb68, 0xf46e],
1341 [0x1606, 0xbeca, 0xbe76, 0x860f, 0xdfa5],
1342 [0x8b4f, 0xde7a, 0xd220, 0x9fac, 0x2b6f],
1343 [0xb8fe, 0xebbe, 0xda32, 0x1a5f, 0x8b8b],
1344 [0x934b, 0x8956, 0xc434, 0x1881, 0xddf7],
1345 [0x5a95, 0x13fc, 0xf116, 0xd89b, 0x93f9],
1346 [0xd640, 0x71f1, 0xdd7d, 0x77eb, 0x1cd8],
1347 [0x348b, 0xaef0, 0xdb2c, 0xebf1, 0x1282],
1348 [0x50d7, 0xd824, 0x5010, 0xb369, 0x22ea]);
1352 fn test_raw_from_c_str() {
1354 let a = vec![65, 65, 65, 65, 65, 65, 65, 0];
1356 let c = raw::from_c_str(b);
1357 assert_eq!(c, String::from_str("AAAAAAA"));
1362 fn test_as_bytes() {
1365 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1366 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1369 assert_eq!("".as_bytes(), &[]);
1370 assert_eq!("abc".as_bytes(), &['a' as u8, 'b' as u8, 'c' as u8]);
1371 assert_eq!("ศไทย中华Việt Nam".as_bytes(), v.as_slice());
1376 fn test_as_bytes_fail() {
1377 // Don't double free. (I'm not sure if this exercises the
1378 // original problem code path anymore.)
1379 let s = String::from_str("");
1380 let _bytes = s.as_bytes();
1386 let buf = "hello".as_ptr();
1388 assert_eq!(*buf.offset(0), 'h' as u8);
1389 assert_eq!(*buf.offset(1), 'e' as u8);
1390 assert_eq!(*buf.offset(2), 'l' as u8);
1391 assert_eq!(*buf.offset(3), 'l' as u8);
1392 assert_eq!(*buf.offset(4), 'o' as u8);
1397 fn test_subslice_offset() {
1398 let a = "kernelsprite";
1399 let b = a.slice(7, a.len());
1400 let c = a.slice(0, a.len() - 6);
1401 assert_eq!(a.subslice_offset(b), 7);
1402 assert_eq!(a.subslice_offset(c), 0);
1404 let string = "a\nb\nc";
1405 let lines: Vec<&str> = string.lines().collect();
1406 let lines = lines.as_slice();
1407 assert_eq!(string.subslice_offset(lines[0]), 0);
1408 assert_eq!(string.subslice_offset(lines[1]), 2);
1409 assert_eq!(string.subslice_offset(lines[2]), 4);
1414 fn test_subslice_offset_2() {
1415 let a = "alchemiter";
1416 let b = "cruxtruder";
1417 a.subslice_offset(b);
1421 fn vec_str_conversions() {
1422 let s1: String = String::from_str("All mimsy were the borogoves");
1424 let v: Vec<u8> = Vec::from_slice(s1.as_bytes());
1425 let s2: String = String::from_str(from_utf8(v.as_slice()).unwrap());
1426 let mut i: uint = 0u;
1427 let n1: uint = s1.len();
1428 let n2: uint = v.len();
1431 let a: u8 = s1.as_bytes()[i];
1432 let b: u8 = s2.as_bytes()[i];
1441 fn test_contains() {
1442 assert!("abcde".contains("bcd"));
1443 assert!("abcde".contains("abcd"));
1444 assert!("abcde".contains("bcde"));
1445 assert!("abcde".contains(""));
1446 assert!("".contains(""));
1447 assert!(!"abcde".contains("def"));
1448 assert!(!"".contains("a"));
1450 let data = "ประเทศไทย中华Việt Nam";
1451 assert!(data.contains("ประเ"));
1452 assert!(data.contains("ะเ"));
1453 assert!(data.contains("中华"));
1454 assert!(!data.contains("ไท华"));
1458 fn test_contains_char() {
1459 assert!("abc".contains_char('b'));
1460 assert!("a".contains_char('a'));
1461 assert!(!"abc".contains_char('d'));
1462 assert!(!"".contains_char('a'));
1466 fn test_truncate_utf16_at_nul() {
1468 assert_eq!(truncate_utf16_at_nul(v), &[]);
1471 assert_eq!(truncate_utf16_at_nul(v), &[]);
1474 assert_eq!(truncate_utf16_at_nul(v), &[1]);
1477 assert_eq!(truncate_utf16_at_nul(v), &[1, 2]);
1480 assert_eq!(truncate_utf16_at_nul(v), &[1, 2, 3]);
1485 let s = "ศไทย中华Việt Nam";
1486 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1488 for ch in v.iter() {
1489 assert!(s.char_at(pos) == *ch);
1490 pos += String::from_char(1, *ch).len();
1495 fn test_char_at_reverse() {
1496 let s = "ศไทย中华Việt Nam";
1497 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1498 let mut pos = s.len();
1499 for ch in v.iter().rev() {
1500 assert!(s.char_at_reverse(pos) == *ch);
1501 pos -= String::from_char(1, *ch).len();
1506 fn test_escape_unicode() {
1507 assert_eq!("abc".escape_unicode(), String::from_str("\\x61\\x62\\x63"));
1508 assert_eq!("a c".escape_unicode(), String::from_str("\\x61\\x20\\x63"));
1509 assert_eq!("\r\n\t".escape_unicode(), String::from_str("\\x0d\\x0a\\x09"));
1510 assert_eq!("'\"\\".escape_unicode(), String::from_str("\\x27\\x22\\x5c"));
1511 assert_eq!("\x00\x01\xfe\xff".escape_unicode(), String::from_str("\\x00\\x01\\xfe\\xff"));
1512 assert_eq!("\u0100\uffff".escape_unicode(), String::from_str("\\u0100\\uffff"));
1513 assert_eq!("\U00010000\U0010ffff".escape_unicode(),
1514 String::from_str("\\U00010000\\U0010ffff"));
1515 assert_eq!("ab\ufb00".escape_unicode(), String::from_str("\\x61\\x62\\ufb00"));
1516 assert_eq!("\U0001d4ea\r".escape_unicode(), String::from_str("\\U0001d4ea\\x0d"));
1520 fn test_escape_default() {
1521 assert_eq!("abc".escape_default(), String::from_str("abc"));
1522 assert_eq!("a c".escape_default(), String::from_str("a c"));
1523 assert_eq!("\r\n\t".escape_default(), String::from_str("\\r\\n\\t"));
1524 assert_eq!("'\"\\".escape_default(), String::from_str("\\'\\\"\\\\"));
1525 assert_eq!("\u0100\uffff".escape_default(), String::from_str("\\u0100\\uffff"));
1526 assert_eq!("\U00010000\U0010ffff".escape_default(),
1527 String::from_str("\\U00010000\\U0010ffff"));
1528 assert_eq!("ab\ufb00".escape_default(), String::from_str("ab\\ufb00"));
1529 assert_eq!("\U0001d4ea\r".escape_default(), String::from_str("\\U0001d4ea\\r"));
1533 fn test_total_ord() {
1534 "1234".cmp(&("123")) == Greater;
1535 "123".cmp(&("1234")) == Less;
1536 "1234".cmp(&("1234")) == Equal;
1537 "12345555".cmp(&("123456")) == Less;
1538 "22".cmp(&("1234")) == Greater;
1542 fn test_char_range_at() {
1543 let data = "b¢€𤭢𤭢€¢b";
1544 assert_eq!('b', data.char_range_at(0).ch);
1545 assert_eq!('¢', data.char_range_at(1).ch);
1546 assert_eq!('€', data.char_range_at(3).ch);
1547 assert_eq!('𤭢', data.char_range_at(6).ch);
1548 assert_eq!('𤭢', data.char_range_at(10).ch);
1549 assert_eq!('€', data.char_range_at(14).ch);
1550 assert_eq!('¢', data.char_range_at(17).ch);
1551 assert_eq!('b', data.char_range_at(19).ch);
1555 fn test_char_range_at_reverse_underflow() {
1556 assert_eq!("abc".char_range_at_reverse(0).next, 0);
1560 fn test_iterator() {
1561 let s = "ศไทย中华Việt Nam";
1562 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1565 let mut it = s.chars();
1568 assert_eq!(c, v[pos]);
1571 assert_eq!(pos, v.len());
1575 fn test_rev_iterator() {
1576 let s = "ศไทย中华Việt Nam";
1577 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
1580 let mut it = s.chars().rev();
1583 assert_eq!(c, v[pos]);
1586 assert_eq!(pos, v.len());
1590 fn test_chars_decoding() {
1591 let mut bytes = [0u8, ..4];
1592 for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
1593 let len = c.encode_utf8(bytes);
1594 let s = ::core::str::from_utf8(bytes.slice_to(len)).unwrap();
1595 if Some(c) != s.chars().next() {
1596 fail!("character {:x}={} does not decode correctly", c as u32, c);
1602 fn test_chars_rev_decoding() {
1603 let mut bytes = [0u8, ..4];
1604 for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
1605 let len = c.encode_utf8(bytes);
1606 let s = ::core::str::from_utf8(bytes.slice_to(len)).unwrap();
1607 if Some(c) != s.chars().rev().next() {
1608 fail!("character {:x}={} does not decode correctly", c as u32, c);
1614 fn test_iterator_clone() {
1615 let s = "ศไทย中华Việt Nam";
1616 let mut it = s.chars();
1618 assert!(it.zip(it.clone()).all(|(x,y)| x == y));
1622 fn test_bytesator() {
1623 let s = "ศไทย中华Việt Nam";
1625 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1626 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1631 for b in s.bytes() {
1632 assert_eq!(b, v[pos]);
1638 fn test_bytes_revator() {
1639 let s = "ศไทย中华Việt Nam";
1641 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1642 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1645 let mut pos = v.len();
1647 for b in s.bytes().rev() {
1649 assert_eq!(b, v[pos]);
1654 fn test_char_indicesator() {
1655 let s = "ศไทย中华Việt Nam";
1656 let p = [0, 3, 6, 9, 12, 15, 18, 19, 20, 23, 24, 25, 26, 27];
1657 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1660 let mut it = s.char_indices();
1663 assert_eq!(c, (p[pos], v[pos]));
1666 assert_eq!(pos, v.len());
1667 assert_eq!(pos, p.len());
1671 fn test_char_indices_revator() {
1672 let s = "ศไทย中华Việt Nam";
1673 let p = [27, 26, 25, 24, 23, 20, 19, 18, 15, 12, 9, 6, 3, 0];
1674 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
1677 let mut it = s.char_indices().rev();
1680 assert_eq!(c, (p[pos], v[pos]));
1683 assert_eq!(pos, v.len());
1684 assert_eq!(pos, p.len());
1688 fn test_split_char_iterator() {
1689 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1691 let split: Vec<&str> = data.split(' ').collect();
1692 assert_eq!( split, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1694 let mut rsplit: Vec<&str> = data.split(' ').rev().collect();
1696 assert_eq!(rsplit, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1698 let split: Vec<&str> = data.split(|c: char| c == ' ').collect();
1699 assert_eq!( split, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1701 let mut rsplit: Vec<&str> = data.split(|c: char| c == ' ').rev().collect();
1703 assert_eq!(rsplit, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1706 let split: Vec<&str> = data.split('ä').collect();
1707 assert_eq!( split, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1709 let mut rsplit: Vec<&str> = data.split('ä').rev().collect();
1711 assert_eq!(rsplit, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1713 let split: Vec<&str> = data.split(|c: char| c == 'ä').collect();
1714 assert_eq!( split, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1716 let mut rsplit: Vec<&str> = data.split(|c: char| c == 'ä').rev().collect();
1718 assert_eq!(rsplit, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1722 fn test_splitn_char_iterator() {
1723 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1725 let split: Vec<&str> = data.splitn(' ', 3).collect();
1726 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
1728 let split: Vec<&str> = data.splitn(|c: char| c == ' ', 3).collect();
1729 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
1732 let split: Vec<&str> = data.splitn('ä', 3).collect();
1733 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
1735 let split: Vec<&str> = data.splitn(|c: char| c == 'ä', 3).collect();
1736 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
1740 fn test_rsplitn_char_iterator() {
1741 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1743 let mut split: Vec<&str> = data.rsplitn(' ', 3).collect();
1745 assert_eq!(split, vec!["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]);
1747 let mut split: Vec<&str> = data.rsplitn(|c: char| c == ' ', 3).collect();
1749 assert_eq!(split, vec!["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]);
1752 let mut split: Vec<&str> = data.rsplitn('ä', 3).collect();
1754 assert_eq!(split, vec!["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]);
1756 let mut split: Vec<&str> = data.rsplitn(|c: char| c == 'ä', 3).collect();
1758 assert_eq!(split, vec!["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]);
1762 fn test_split_char_iterator_no_trailing() {
1763 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1765 let split: Vec<&str> = data.split('\n').collect();
1766 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
1768 let split: Vec<&str> = data.split_terminator('\n').collect();
1769 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
1773 fn test_rev_split_char_iterator_no_trailing() {
1774 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1776 let mut split: Vec<&str> = data.split('\n').rev().collect();
1778 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
1780 let mut split: Vec<&str> = data.split_terminator('\n').rev().collect();
1782 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
1787 let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n";
1788 let words: Vec<&str> = data.words().collect();
1789 assert_eq!(words, vec!["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
1793 fn test_nfd_chars() {
1794 assert_eq!("abc".nfd_chars().collect::<String>(), String::from_str("abc"));
1795 assert_eq!("\u1e0b\u01c4".nfd_chars().collect::<String>(),
1796 String::from_str("d\u0307\u01c4"));
1797 assert_eq!("\u2026".nfd_chars().collect::<String>(), String::from_str("\u2026"));
1798 assert_eq!("\u2126".nfd_chars().collect::<String>(), String::from_str("\u03a9"));
1799 assert_eq!("\u1e0b\u0323".nfd_chars().collect::<String>(),
1800 String::from_str("d\u0323\u0307"));
1801 assert_eq!("\u1e0d\u0307".nfd_chars().collect::<String>(),
1802 String::from_str("d\u0323\u0307"));
1803 assert_eq!("a\u0301".nfd_chars().collect::<String>(), String::from_str("a\u0301"));
1804 assert_eq!("\u0301a".nfd_chars().collect::<String>(), String::from_str("\u0301a"));
1805 assert_eq!("\ud4db".nfd_chars().collect::<String>(),
1806 String::from_str("\u1111\u1171\u11b6"));
1807 assert_eq!("\uac1c".nfd_chars().collect::<String>(), String::from_str("\u1100\u1162"));
1811 fn test_nfkd_chars() {
1812 assert_eq!("abc".nfkd_chars().collect::<String>(), String::from_str("abc"));
1813 assert_eq!("\u1e0b\u01c4".nfkd_chars().collect::<String>(),
1814 String::from_str("d\u0307DZ\u030c"));
1815 assert_eq!("\u2026".nfkd_chars().collect::<String>(), String::from_str("..."));
1816 assert_eq!("\u2126".nfkd_chars().collect::<String>(), String::from_str("\u03a9"));
1817 assert_eq!("\u1e0b\u0323".nfkd_chars().collect::<String>(),
1818 String::from_str("d\u0323\u0307"));
1819 assert_eq!("\u1e0d\u0307".nfkd_chars().collect::<String>(),
1820 String::from_str("d\u0323\u0307"));
1821 assert_eq!("a\u0301".nfkd_chars().collect::<String>(), String::from_str("a\u0301"));
1822 assert_eq!("\u0301a".nfkd_chars().collect::<String>(),
1823 String::from_str("\u0301a"));
1824 assert_eq!("\ud4db".nfkd_chars().collect::<String>(),
1825 String::from_str("\u1111\u1171\u11b6"));
1826 assert_eq!("\uac1c".nfkd_chars().collect::<String>(), String::from_str("\u1100\u1162"));
1831 let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
1832 let lines: Vec<&str> = data.lines().collect();
1833 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
1835 let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
1836 let lines: Vec<&str> = data.lines().collect();
1837 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
1841 fn test_graphemes() {
1842 use std::iter::order;
1843 // official Unicode test data
1844 // from http://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt
1846 ("\u0020\u0020", &["\u0020", "\u0020"]), ("\u0020\u0308\u0020", &["\u0020\u0308",
1847 "\u0020"]), ("\u0020\u000D", &["\u0020", "\u000D"]), ("\u0020\u0308\u000D",
1848 &["\u0020\u0308", "\u000D"]), ("\u0020\u000A", &["\u0020", "\u000A"]),
1849 ("\u0020\u0308\u000A", &["\u0020\u0308", "\u000A"]), ("\u0020\u0001", &["\u0020",
1850 "\u0001"]), ("\u0020\u0308\u0001", &["\u0020\u0308", "\u0001"]), ("\u0020\u0300",
1851 &["\u0020\u0300"]), ("\u0020\u0308\u0300", &["\u0020\u0308\u0300"]), ("\u0020\u1100",
1852 &["\u0020", "\u1100"]), ("\u0020\u0308\u1100", &["\u0020\u0308", "\u1100"]),
1853 ("\u0020\u1160", &["\u0020", "\u1160"]), ("\u0020\u0308\u1160", &["\u0020\u0308",
1854 "\u1160"]), ("\u0020\u11A8", &["\u0020", "\u11A8"]), ("\u0020\u0308\u11A8",
1855 &["\u0020\u0308", "\u11A8"]), ("\u0020\uAC00", &["\u0020", "\uAC00"]),
1856 ("\u0020\u0308\uAC00", &["\u0020\u0308", "\uAC00"]), ("\u0020\uAC01", &["\u0020",
1857 "\uAC01"]), ("\u0020\u0308\uAC01", &["\u0020\u0308", "\uAC01"]), ("\u0020\U0001F1E6",
1858 &["\u0020", "\U0001F1E6"]), ("\u0020\u0308\U0001F1E6", &["\u0020\u0308",
1859 "\U0001F1E6"]), ("\u0020\u0378", &["\u0020", "\u0378"]), ("\u0020\u0308\u0378",
1860 &["\u0020\u0308", "\u0378"]), ("\u000D\u0020", &["\u000D", "\u0020"]),
1861 ("\u000D\u0308\u0020", &["\u000D", "\u0308", "\u0020"]), ("\u000D\u000D", &["\u000D",
1862 "\u000D"]), ("\u000D\u0308\u000D", &["\u000D", "\u0308", "\u000D"]), ("\u000D\u000A",
1863 &["\u000D\u000A"]), ("\u000D\u0308\u000A", &["\u000D", "\u0308", "\u000A"]),
1864 ("\u000D\u0001", &["\u000D", "\u0001"]), ("\u000D\u0308\u0001", &["\u000D", "\u0308",
1865 "\u0001"]), ("\u000D\u0300", &["\u000D", "\u0300"]), ("\u000D\u0308\u0300",
1866 &["\u000D", "\u0308\u0300"]), ("\u000D\u0903", &["\u000D", "\u0903"]),
1867 ("\u000D\u1100", &["\u000D", "\u1100"]), ("\u000D\u0308\u1100", &["\u000D", "\u0308",
1868 "\u1100"]), ("\u000D\u1160", &["\u000D", "\u1160"]), ("\u000D\u0308\u1160",
1869 &["\u000D", "\u0308", "\u1160"]), ("\u000D\u11A8", &["\u000D", "\u11A8"]),
1870 ("\u000D\u0308\u11A8", &["\u000D", "\u0308", "\u11A8"]), ("\u000D\uAC00", &["\u000D",
1871 "\uAC00"]), ("\u000D\u0308\uAC00", &["\u000D", "\u0308", "\uAC00"]), ("\u000D\uAC01",
1872 &["\u000D", "\uAC01"]), ("\u000D\u0308\uAC01", &["\u000D", "\u0308", "\uAC01"]),
1873 ("\u000D\U0001F1E6", &["\u000D", "\U0001F1E6"]), ("\u000D\u0308\U0001F1E6",
1874 &["\u000D", "\u0308", "\U0001F1E6"]), ("\u000D\u0378", &["\u000D", "\u0378"]),
1875 ("\u000D\u0308\u0378", &["\u000D", "\u0308", "\u0378"]), ("\u000A\u0020", &["\u000A",
1876 "\u0020"]), ("\u000A\u0308\u0020", &["\u000A", "\u0308", "\u0020"]), ("\u000A\u000D",
1877 &["\u000A", "\u000D"]), ("\u000A\u0308\u000D", &["\u000A", "\u0308", "\u000D"]),
1878 ("\u000A\u000A", &["\u000A", "\u000A"]), ("\u000A\u0308\u000A", &["\u000A", "\u0308",
1879 "\u000A"]), ("\u000A\u0001", &["\u000A", "\u0001"]), ("\u000A\u0308\u0001",
1880 &["\u000A", "\u0308", "\u0001"]), ("\u000A\u0300", &["\u000A", "\u0300"]),
1881 ("\u000A\u0308\u0300", &["\u000A", "\u0308\u0300"]), ("\u000A\u0903", &["\u000A",
1882 "\u0903"]), ("\u000A\u1100", &["\u000A", "\u1100"]), ("\u000A\u0308\u1100",
1883 &["\u000A", "\u0308", "\u1100"]), ("\u000A\u1160", &["\u000A", "\u1160"]),
1884 ("\u000A\u0308\u1160", &["\u000A", "\u0308", "\u1160"]), ("\u000A\u11A8", &["\u000A",
1885 "\u11A8"]), ("\u000A\u0308\u11A8", &["\u000A", "\u0308", "\u11A8"]), ("\u000A\uAC00",
1886 &["\u000A", "\uAC00"]), ("\u000A\u0308\uAC00", &["\u000A", "\u0308", "\uAC00"]),
1887 ("\u000A\uAC01", &["\u000A", "\uAC01"]), ("\u000A\u0308\uAC01", &["\u000A", "\u0308",
1888 "\uAC01"]), ("\u000A\U0001F1E6", &["\u000A", "\U0001F1E6"]),
1889 ("\u000A\u0308\U0001F1E6", &["\u000A", "\u0308", "\U0001F1E6"]), ("\u000A\u0378",
1890 &["\u000A", "\u0378"]), ("\u000A\u0308\u0378", &["\u000A", "\u0308", "\u0378"]),
1891 ("\u0001\u0020", &["\u0001", "\u0020"]), ("\u0001\u0308\u0020", &["\u0001", "\u0308",
1892 "\u0020"]), ("\u0001\u000D", &["\u0001", "\u000D"]), ("\u0001\u0308\u000D",
1893 &["\u0001", "\u0308", "\u000D"]), ("\u0001\u000A", &["\u0001", "\u000A"]),
1894 ("\u0001\u0308\u000A", &["\u0001", "\u0308", "\u000A"]), ("\u0001\u0001", &["\u0001",
1895 "\u0001"]), ("\u0001\u0308\u0001", &["\u0001", "\u0308", "\u0001"]), ("\u0001\u0300",
1896 &["\u0001", "\u0300"]), ("\u0001\u0308\u0300", &["\u0001", "\u0308\u0300"]),
1897 ("\u0001\u0903", &["\u0001", "\u0903"]), ("\u0001\u1100", &["\u0001", "\u1100"]),
1898 ("\u0001\u0308\u1100", &["\u0001", "\u0308", "\u1100"]), ("\u0001\u1160", &["\u0001",
1899 "\u1160"]), ("\u0001\u0308\u1160", &["\u0001", "\u0308", "\u1160"]), ("\u0001\u11A8",
1900 &["\u0001", "\u11A8"]), ("\u0001\u0308\u11A8", &["\u0001", "\u0308", "\u11A8"]),
1901 ("\u0001\uAC00", &["\u0001", "\uAC00"]), ("\u0001\u0308\uAC00", &["\u0001", "\u0308",
1902 "\uAC00"]), ("\u0001\uAC01", &["\u0001", "\uAC01"]), ("\u0001\u0308\uAC01",
1903 &["\u0001", "\u0308", "\uAC01"]), ("\u0001\U0001F1E6", &["\u0001", "\U0001F1E6"]),
1904 ("\u0001\u0308\U0001F1E6", &["\u0001", "\u0308", "\U0001F1E6"]), ("\u0001\u0378",
1905 &["\u0001", "\u0378"]), ("\u0001\u0308\u0378", &["\u0001", "\u0308", "\u0378"]),
1906 ("\u0300\u0020", &["\u0300", "\u0020"]), ("\u0300\u0308\u0020", &["\u0300\u0308",
1907 "\u0020"]), ("\u0300\u000D", &["\u0300", "\u000D"]), ("\u0300\u0308\u000D",
1908 &["\u0300\u0308", "\u000D"]), ("\u0300\u000A", &["\u0300", "\u000A"]),
1909 ("\u0300\u0308\u000A", &["\u0300\u0308", "\u000A"]), ("\u0300\u0001", &["\u0300",
1910 "\u0001"]), ("\u0300\u0308\u0001", &["\u0300\u0308", "\u0001"]), ("\u0300\u0300",
1911 &["\u0300\u0300"]), ("\u0300\u0308\u0300", &["\u0300\u0308\u0300"]), ("\u0300\u1100",
1912 &["\u0300", "\u1100"]), ("\u0300\u0308\u1100", &["\u0300\u0308", "\u1100"]),
1913 ("\u0300\u1160", &["\u0300", "\u1160"]), ("\u0300\u0308\u1160", &["\u0300\u0308",
1914 "\u1160"]), ("\u0300\u11A8", &["\u0300", "\u11A8"]), ("\u0300\u0308\u11A8",
1915 &["\u0300\u0308", "\u11A8"]), ("\u0300\uAC00", &["\u0300", "\uAC00"]),
1916 ("\u0300\u0308\uAC00", &["\u0300\u0308", "\uAC00"]), ("\u0300\uAC01", &["\u0300",
1917 "\uAC01"]), ("\u0300\u0308\uAC01", &["\u0300\u0308", "\uAC01"]), ("\u0300\U0001F1E6",
1918 &["\u0300", "\U0001F1E6"]), ("\u0300\u0308\U0001F1E6", &["\u0300\u0308",
1919 "\U0001F1E6"]), ("\u0300\u0378", &["\u0300", "\u0378"]), ("\u0300\u0308\u0378",
1920 &["\u0300\u0308", "\u0378"]), ("\u0903\u0020", &["\u0903", "\u0020"]),
1921 ("\u0903\u0308\u0020", &["\u0903\u0308", "\u0020"]), ("\u0903\u000D", &["\u0903",
1922 "\u000D"]), ("\u0903\u0308\u000D", &["\u0903\u0308", "\u000D"]), ("\u0903\u000A",
1923 &["\u0903", "\u000A"]), ("\u0903\u0308\u000A", &["\u0903\u0308", "\u000A"]),
1924 ("\u0903\u0001", &["\u0903", "\u0001"]), ("\u0903\u0308\u0001", &["\u0903\u0308",
1925 "\u0001"]), ("\u0903\u0300", &["\u0903\u0300"]), ("\u0903\u0308\u0300",
1926 &["\u0903\u0308\u0300"]), ("\u0903\u1100", &["\u0903", "\u1100"]),
1927 ("\u0903\u0308\u1100", &["\u0903\u0308", "\u1100"]), ("\u0903\u1160", &["\u0903",
1928 "\u1160"]), ("\u0903\u0308\u1160", &["\u0903\u0308", "\u1160"]), ("\u0903\u11A8",
1929 &["\u0903", "\u11A8"]), ("\u0903\u0308\u11A8", &["\u0903\u0308", "\u11A8"]),
1930 ("\u0903\uAC00", &["\u0903", "\uAC00"]), ("\u0903\u0308\uAC00", &["\u0903\u0308",
1931 "\uAC00"]), ("\u0903\uAC01", &["\u0903", "\uAC01"]), ("\u0903\u0308\uAC01",
1932 &["\u0903\u0308", "\uAC01"]), ("\u0903\U0001F1E6", &["\u0903", "\U0001F1E6"]),
1933 ("\u0903\u0308\U0001F1E6", &["\u0903\u0308", "\U0001F1E6"]), ("\u0903\u0378",
1934 &["\u0903", "\u0378"]), ("\u0903\u0308\u0378", &["\u0903\u0308", "\u0378"]),
1935 ("\u1100\u0020", &["\u1100", "\u0020"]), ("\u1100\u0308\u0020", &["\u1100\u0308",
1936 "\u0020"]), ("\u1100\u000D", &["\u1100", "\u000D"]), ("\u1100\u0308\u000D",
1937 &["\u1100\u0308", "\u000D"]), ("\u1100\u000A", &["\u1100", "\u000A"]),
1938 ("\u1100\u0308\u000A", &["\u1100\u0308", "\u000A"]), ("\u1100\u0001", &["\u1100",
1939 "\u0001"]), ("\u1100\u0308\u0001", &["\u1100\u0308", "\u0001"]), ("\u1100\u0300",
1940 &["\u1100\u0300"]), ("\u1100\u0308\u0300", &["\u1100\u0308\u0300"]), ("\u1100\u1100",
1941 &["\u1100\u1100"]), ("\u1100\u0308\u1100", &["\u1100\u0308", "\u1100"]),
1942 ("\u1100\u1160", &["\u1100\u1160"]), ("\u1100\u0308\u1160", &["\u1100\u0308",
1943 "\u1160"]), ("\u1100\u11A8", &["\u1100", "\u11A8"]), ("\u1100\u0308\u11A8",
1944 &["\u1100\u0308", "\u11A8"]), ("\u1100\uAC00", &["\u1100\uAC00"]),
1945 ("\u1100\u0308\uAC00", &["\u1100\u0308", "\uAC00"]), ("\u1100\uAC01",
1946 &["\u1100\uAC01"]), ("\u1100\u0308\uAC01", &["\u1100\u0308", "\uAC01"]),
1947 ("\u1100\U0001F1E6", &["\u1100", "\U0001F1E6"]), ("\u1100\u0308\U0001F1E6",
1948 &["\u1100\u0308", "\U0001F1E6"]), ("\u1100\u0378", &["\u1100", "\u0378"]),
1949 ("\u1100\u0308\u0378", &["\u1100\u0308", "\u0378"]), ("\u1160\u0020", &["\u1160",
1950 "\u0020"]), ("\u1160\u0308\u0020", &["\u1160\u0308", "\u0020"]), ("\u1160\u000D",
1951 &["\u1160", "\u000D"]), ("\u1160\u0308\u000D", &["\u1160\u0308", "\u000D"]),
1952 ("\u1160\u000A", &["\u1160", "\u000A"]), ("\u1160\u0308\u000A", &["\u1160\u0308",
1953 "\u000A"]), ("\u1160\u0001", &["\u1160", "\u0001"]), ("\u1160\u0308\u0001",
1954 &["\u1160\u0308", "\u0001"]), ("\u1160\u0300", &["\u1160\u0300"]),
1955 ("\u1160\u0308\u0300", &["\u1160\u0308\u0300"]), ("\u1160\u1100", &["\u1160",
1956 "\u1100"]), ("\u1160\u0308\u1100", &["\u1160\u0308", "\u1100"]), ("\u1160\u1160",
1957 &["\u1160\u1160"]), ("\u1160\u0308\u1160", &["\u1160\u0308", "\u1160"]),
1958 ("\u1160\u11A8", &["\u1160\u11A8"]), ("\u1160\u0308\u11A8", &["\u1160\u0308",
1959 "\u11A8"]), ("\u1160\uAC00", &["\u1160", "\uAC00"]), ("\u1160\u0308\uAC00",
1960 &["\u1160\u0308", "\uAC00"]), ("\u1160\uAC01", &["\u1160", "\uAC01"]),
1961 ("\u1160\u0308\uAC01", &["\u1160\u0308", "\uAC01"]), ("\u1160\U0001F1E6", &["\u1160",
1962 "\U0001F1E6"]), ("\u1160\u0308\U0001F1E6", &["\u1160\u0308", "\U0001F1E6"]),
1963 ("\u1160\u0378", &["\u1160", "\u0378"]), ("\u1160\u0308\u0378", &["\u1160\u0308",
1964 "\u0378"]), ("\u11A8\u0020", &["\u11A8", "\u0020"]), ("\u11A8\u0308\u0020",
1965 &["\u11A8\u0308", "\u0020"]), ("\u11A8\u000D", &["\u11A8", "\u000D"]),
1966 ("\u11A8\u0308\u000D", &["\u11A8\u0308", "\u000D"]), ("\u11A8\u000A", &["\u11A8",
1967 "\u000A"]), ("\u11A8\u0308\u000A", &["\u11A8\u0308", "\u000A"]), ("\u11A8\u0001",
1968 &["\u11A8", "\u0001"]), ("\u11A8\u0308\u0001", &["\u11A8\u0308", "\u0001"]),
1969 ("\u11A8\u0300", &["\u11A8\u0300"]), ("\u11A8\u0308\u0300", &["\u11A8\u0308\u0300"]),
1970 ("\u11A8\u1100", &["\u11A8", "\u1100"]), ("\u11A8\u0308\u1100", &["\u11A8\u0308",
1971 "\u1100"]), ("\u11A8\u1160", &["\u11A8", "\u1160"]), ("\u11A8\u0308\u1160",
1972 &["\u11A8\u0308", "\u1160"]), ("\u11A8\u11A8", &["\u11A8\u11A8"]),
1973 ("\u11A8\u0308\u11A8", &["\u11A8\u0308", "\u11A8"]), ("\u11A8\uAC00", &["\u11A8",
1974 "\uAC00"]), ("\u11A8\u0308\uAC00", &["\u11A8\u0308", "\uAC00"]), ("\u11A8\uAC01",
1975 &["\u11A8", "\uAC01"]), ("\u11A8\u0308\uAC01", &["\u11A8\u0308", "\uAC01"]),
1976 ("\u11A8\U0001F1E6", &["\u11A8", "\U0001F1E6"]), ("\u11A8\u0308\U0001F1E6",
1977 &["\u11A8\u0308", "\U0001F1E6"]), ("\u11A8\u0378", &["\u11A8", "\u0378"]),
1978 ("\u11A8\u0308\u0378", &["\u11A8\u0308", "\u0378"]), ("\uAC00\u0020", &["\uAC00",
1979 "\u0020"]), ("\uAC00\u0308\u0020", &["\uAC00\u0308", "\u0020"]), ("\uAC00\u000D",
1980 &["\uAC00", "\u000D"]), ("\uAC00\u0308\u000D", &["\uAC00\u0308", "\u000D"]),
1981 ("\uAC00\u000A", &["\uAC00", "\u000A"]), ("\uAC00\u0308\u000A", &["\uAC00\u0308",
1982 "\u000A"]), ("\uAC00\u0001", &["\uAC00", "\u0001"]), ("\uAC00\u0308\u0001",
1983 &["\uAC00\u0308", "\u0001"]), ("\uAC00\u0300", &["\uAC00\u0300"]),
1984 ("\uAC00\u0308\u0300", &["\uAC00\u0308\u0300"]), ("\uAC00\u1100", &["\uAC00",
1985 "\u1100"]), ("\uAC00\u0308\u1100", &["\uAC00\u0308", "\u1100"]), ("\uAC00\u1160",
1986 &["\uAC00\u1160"]), ("\uAC00\u0308\u1160", &["\uAC00\u0308", "\u1160"]),
1987 ("\uAC00\u11A8", &["\uAC00\u11A8"]), ("\uAC00\u0308\u11A8", &["\uAC00\u0308",
1988 "\u11A8"]), ("\uAC00\uAC00", &["\uAC00", "\uAC00"]), ("\uAC00\u0308\uAC00",
1989 &["\uAC00\u0308", "\uAC00"]), ("\uAC00\uAC01", &["\uAC00", "\uAC01"]),
1990 ("\uAC00\u0308\uAC01", &["\uAC00\u0308", "\uAC01"]), ("\uAC00\U0001F1E6", &["\uAC00",
1991 "\U0001F1E6"]), ("\uAC00\u0308\U0001F1E6", &["\uAC00\u0308", "\U0001F1E6"]),
1992 ("\uAC00\u0378", &["\uAC00", "\u0378"]), ("\uAC00\u0308\u0378", &["\uAC00\u0308",
1993 "\u0378"]), ("\uAC01\u0020", &["\uAC01", "\u0020"]), ("\uAC01\u0308\u0020",
1994 &["\uAC01\u0308", "\u0020"]), ("\uAC01\u000D", &["\uAC01", "\u000D"]),
1995 ("\uAC01\u0308\u000D", &["\uAC01\u0308", "\u000D"]), ("\uAC01\u000A", &["\uAC01",
1996 "\u000A"]), ("\uAC01\u0308\u000A", &["\uAC01\u0308", "\u000A"]), ("\uAC01\u0001",
1997 &["\uAC01", "\u0001"]), ("\uAC01\u0308\u0001", &["\uAC01\u0308", "\u0001"]),
1998 ("\uAC01\u0300", &["\uAC01\u0300"]), ("\uAC01\u0308\u0300", &["\uAC01\u0308\u0300"]),
1999 ("\uAC01\u1100", &["\uAC01", "\u1100"]), ("\uAC01\u0308\u1100", &["\uAC01\u0308",
2000 "\u1100"]), ("\uAC01\u1160", &["\uAC01", "\u1160"]), ("\uAC01\u0308\u1160",
2001 &["\uAC01\u0308", "\u1160"]), ("\uAC01\u11A8", &["\uAC01\u11A8"]),
2002 ("\uAC01\u0308\u11A8", &["\uAC01\u0308", "\u11A8"]), ("\uAC01\uAC00", &["\uAC01",
2003 "\uAC00"]), ("\uAC01\u0308\uAC00", &["\uAC01\u0308", "\uAC00"]), ("\uAC01\uAC01",
2004 &["\uAC01", "\uAC01"]), ("\uAC01\u0308\uAC01", &["\uAC01\u0308", "\uAC01"]),
2005 ("\uAC01\U0001F1E6", &["\uAC01", "\U0001F1E6"]), ("\uAC01\u0308\U0001F1E6",
2006 &["\uAC01\u0308", "\U0001F1E6"]), ("\uAC01\u0378", &["\uAC01", "\u0378"]),
2007 ("\uAC01\u0308\u0378", &["\uAC01\u0308", "\u0378"]), ("\U0001F1E6\u0020",
2008 &["\U0001F1E6", "\u0020"]), ("\U0001F1E6\u0308\u0020", &["\U0001F1E6\u0308",
2009 "\u0020"]), ("\U0001F1E6\u000D", &["\U0001F1E6", "\u000D"]),
2010 ("\U0001F1E6\u0308\u000D", &["\U0001F1E6\u0308", "\u000D"]), ("\U0001F1E6\u000A",
2011 &["\U0001F1E6", "\u000A"]), ("\U0001F1E6\u0308\u000A", &["\U0001F1E6\u0308",
2012 "\u000A"]), ("\U0001F1E6\u0001", &["\U0001F1E6", "\u0001"]),
2013 ("\U0001F1E6\u0308\u0001", &["\U0001F1E6\u0308", "\u0001"]), ("\U0001F1E6\u0300",
2014 &["\U0001F1E6\u0300"]), ("\U0001F1E6\u0308\u0300", &["\U0001F1E6\u0308\u0300"]),
2015 ("\U0001F1E6\u1100", &["\U0001F1E6", "\u1100"]), ("\U0001F1E6\u0308\u1100",
2016 &["\U0001F1E6\u0308", "\u1100"]), ("\U0001F1E6\u1160", &["\U0001F1E6", "\u1160"]),
2017 ("\U0001F1E6\u0308\u1160", &["\U0001F1E6\u0308", "\u1160"]), ("\U0001F1E6\u11A8",
2018 &["\U0001F1E6", "\u11A8"]), ("\U0001F1E6\u0308\u11A8", &["\U0001F1E6\u0308",
2019 "\u11A8"]), ("\U0001F1E6\uAC00", &["\U0001F1E6", "\uAC00"]),
2020 ("\U0001F1E6\u0308\uAC00", &["\U0001F1E6\u0308", "\uAC00"]), ("\U0001F1E6\uAC01",
2021 &["\U0001F1E6", "\uAC01"]), ("\U0001F1E6\u0308\uAC01", &["\U0001F1E6\u0308",
2022 "\uAC01"]), ("\U0001F1E6\U0001F1E6", &["\U0001F1E6\U0001F1E6"]),
2023 ("\U0001F1E6\u0308\U0001F1E6", &["\U0001F1E6\u0308", "\U0001F1E6"]),
2024 ("\U0001F1E6\u0378", &["\U0001F1E6", "\u0378"]), ("\U0001F1E6\u0308\u0378",
2025 &["\U0001F1E6\u0308", "\u0378"]), ("\u0378\u0020", &["\u0378", "\u0020"]),
2026 ("\u0378\u0308\u0020", &["\u0378\u0308", "\u0020"]), ("\u0378\u000D", &["\u0378",
2027 "\u000D"]), ("\u0378\u0308\u000D", &["\u0378\u0308", "\u000D"]), ("\u0378\u000A",
2028 &["\u0378", "\u000A"]), ("\u0378\u0308\u000A", &["\u0378\u0308", "\u000A"]),
2029 ("\u0378\u0001", &["\u0378", "\u0001"]), ("\u0378\u0308\u0001", &["\u0378\u0308",
2030 "\u0001"]), ("\u0378\u0300", &["\u0378\u0300"]), ("\u0378\u0308\u0300",
2031 &["\u0378\u0308\u0300"]), ("\u0378\u1100", &["\u0378", "\u1100"]),
2032 ("\u0378\u0308\u1100", &["\u0378\u0308", "\u1100"]), ("\u0378\u1160", &["\u0378",
2033 "\u1160"]), ("\u0378\u0308\u1160", &["\u0378\u0308", "\u1160"]), ("\u0378\u11A8",
2034 &["\u0378", "\u11A8"]), ("\u0378\u0308\u11A8", &["\u0378\u0308", "\u11A8"]),
2035 ("\u0378\uAC00", &["\u0378", "\uAC00"]), ("\u0378\u0308\uAC00", &["\u0378\u0308",
2036 "\uAC00"]), ("\u0378\uAC01", &["\u0378", "\uAC01"]), ("\u0378\u0308\uAC01",
2037 &["\u0378\u0308", "\uAC01"]), ("\u0378\U0001F1E6", &["\u0378", "\U0001F1E6"]),
2038 ("\u0378\u0308\U0001F1E6", &["\u0378\u0308", "\U0001F1E6"]), ("\u0378\u0378",
2039 &["\u0378", "\u0378"]), ("\u0378\u0308\u0378", &["\u0378\u0308", "\u0378"]),
2040 ("\u0061\U0001F1E6\u0062", &["\u0061", "\U0001F1E6", "\u0062"]),
2041 ("\U0001F1F7\U0001F1FA", &["\U0001F1F7\U0001F1FA"]),
2042 ("\U0001F1F7\U0001F1FA\U0001F1F8", &["\U0001F1F7\U0001F1FA\U0001F1F8"]),
2043 ("\U0001F1F7\U0001F1FA\U0001F1F8\U0001F1EA",
2044 &["\U0001F1F7\U0001F1FA\U0001F1F8\U0001F1EA"]),
2045 ("\U0001F1F7\U0001F1FA\u200B\U0001F1F8\U0001F1EA", &["\U0001F1F7\U0001F1FA", "\u200B",
2046 "\U0001F1F8\U0001F1EA"]), ("\U0001F1E6\U0001F1E7\U0001F1E8",
2047 &["\U0001F1E6\U0001F1E7\U0001F1E8"]), ("\U0001F1E6\u200D\U0001F1E7\U0001F1E8",
2048 &["\U0001F1E6\u200D", "\U0001F1E7\U0001F1E8"]),
2049 ("\U0001F1E6\U0001F1E7\u200D\U0001F1E8", &["\U0001F1E6\U0001F1E7\u200D",
2050 "\U0001F1E8"]), ("\u0020\u200D\u0646", &["\u0020\u200D", "\u0646"]),
2051 ("\u0646\u200D\u0020", &["\u0646\u200D", "\u0020"]),
2055 ("\u0020\u0903", &["\u0020\u0903"], &["\u0020", "\u0903"]), ("\u0020\u0308\u0903",
2056 &["\u0020\u0308\u0903"], &["\u0020\u0308", "\u0903"]), ("\u000D\u0308\u0903",
2057 &["\u000D", "\u0308\u0903"], &["\u000D", "\u0308", "\u0903"]), ("\u000A\u0308\u0903",
2058 &["\u000A", "\u0308\u0903"], &["\u000A", "\u0308", "\u0903"]), ("\u0001\u0308\u0903",
2059 &["\u0001", "\u0308\u0903"], &["\u0001", "\u0308", "\u0903"]), ("\u0300\u0903",
2060 &["\u0300\u0903"], &["\u0300", "\u0903"]), ("\u0300\u0308\u0903",
2061 &["\u0300\u0308\u0903"], &["\u0300\u0308", "\u0903"]), ("\u0903\u0903",
2062 &["\u0903\u0903"], &["\u0903", "\u0903"]), ("\u0903\u0308\u0903",
2063 &["\u0903\u0308\u0903"], &["\u0903\u0308", "\u0903"]), ("\u1100\u0903",
2064 &["\u1100\u0903"], &["\u1100", "\u0903"]), ("\u1100\u0308\u0903",
2065 &["\u1100\u0308\u0903"], &["\u1100\u0308", "\u0903"]), ("\u1160\u0903",
2066 &["\u1160\u0903"], &["\u1160", "\u0903"]), ("\u1160\u0308\u0903",
2067 &["\u1160\u0308\u0903"], &["\u1160\u0308", "\u0903"]), ("\u11A8\u0903",
2068 &["\u11A8\u0903"], &["\u11A8", "\u0903"]), ("\u11A8\u0308\u0903",
2069 &["\u11A8\u0308\u0903"], &["\u11A8\u0308", "\u0903"]), ("\uAC00\u0903",
2070 &["\uAC00\u0903"], &["\uAC00", "\u0903"]), ("\uAC00\u0308\u0903",
2071 &["\uAC00\u0308\u0903"], &["\uAC00\u0308", "\u0903"]), ("\uAC01\u0903",
2072 &["\uAC01\u0903"], &["\uAC01", "\u0903"]), ("\uAC01\u0308\u0903",
2073 &["\uAC01\u0308\u0903"], &["\uAC01\u0308", "\u0903"]), ("\U0001F1E6\u0903",
2074 &["\U0001F1E6\u0903"], &["\U0001F1E6", "\u0903"]), ("\U0001F1E6\u0308\u0903",
2075 &["\U0001F1E6\u0308\u0903"], &["\U0001F1E6\u0308", "\u0903"]), ("\u0378\u0903",
2076 &["\u0378\u0903"], &["\u0378", "\u0903"]), ("\u0378\u0308\u0903",
2077 &["\u0378\u0308\u0903"], &["\u0378\u0308", "\u0903"]),
2080 for &(s, g) in test_same.iter() {
2081 // test forward iterator
2082 assert!(order::equals(s.graphemes(true), g.iter().map(|&x| x)));
2083 assert!(order::equals(s.graphemes(false), g.iter().map(|&x| x)));
2085 // test reverse iterator
2086 assert!(order::equals(s.graphemes(true).rev(), g.iter().rev().map(|&x| x)));
2087 assert!(order::equals(s.graphemes(false).rev(), g.iter().rev().map(|&x| x)));
2090 for &(s, gt, gf) in test_diff.iter() {
2091 // test forward iterator
2092 assert!(order::equals(s.graphemes(true), gt.iter().map(|&x| x)));
2093 assert!(order::equals(s.graphemes(false), gf.iter().map(|&x| x)));
2095 // test reverse iterator
2096 assert!(order::equals(s.graphemes(true).rev(), gt.iter().rev().map(|&x| x)));
2097 assert!(order::equals(s.graphemes(false).rev(), gf.iter().rev().map(|&x| x)));
2100 // test the indices iterators
2101 let s = "a̐éö̲\r\n";
2102 let gr_inds = s.grapheme_indices(true).collect::<Vec<(uint, &str)>>();
2103 assert_eq!(gr_inds.as_slice(), &[(0u, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")]);
2104 let gr_inds = s.grapheme_indices(true).rev().collect::<Vec<(uint, &str)>>();
2105 assert_eq!(gr_inds.as_slice(), &[(11, "\r\n"), (6, "ö̲"), (3, "é"), (0u, "a̐")]);
2106 let mut gr_inds = s.grapheme_indices(true);
2107 let e1 = gr_inds.size_hint();
2108 assert_eq!(e1, (1, Some(13)));
2109 let c = gr_inds.count();
2111 let e2 = gr_inds.size_hint();
2112 assert_eq!(e2, (0, Some(0)));
2114 // make sure the reverse iterator does the right thing with "\n" at beginning of string
2116 let gr = s.graphemes(true).rev().collect::<Vec<&str>>();
2117 assert_eq!(gr.as_slice(), &["\r", "\r\n", "\n"]);
2121 fn test_split_strator() {
2122 fn t(s: &str, sep: &str, u: &[&str]) {
2123 let v: Vec<&str> = s.split_str(sep).collect();
2124 assert_eq!(v.as_slice(), u.as_slice());
2126 t("--1233345--", "12345", ["--1233345--"]);
2127 t("abc::hello::there", "::", ["abc", "hello", "there"]);
2128 t("::hello::there", "::", ["", "hello", "there"]);
2129 t("hello::there::", "::", ["hello", "there", ""]);
2130 t("::hello::there::", "::", ["", "hello", "there", ""]);
2131 t("ประเทศไทย中华Việt Nam", "中华", ["ประเทศไทย", "Việt Nam"]);
2132 t("zzXXXzzYYYzz", "zz", ["", "XXX", "YYY", ""]);
2133 t("zzXXXzYYYz", "XXX", ["zz", "zYYYz"]);
2134 t(".XXX.YYY.", ".", ["", "XXX", "YYY", ""]);
2136 t("zz", "zz", ["",""]);
2137 t("ok", "z", ["ok"]);
2138 t("zzz", "zz", ["","z"]);
2139 t("zzzzz", "zz", ["","","z"]);
2143 fn test_str_default() {
2144 use std::default::Default;
2145 fn t<S: Default + Str>() {
2146 let s: S = Default::default();
2147 assert_eq!(s.as_slice(), "");
2155 fn test_str_container() {
2156 fn sum_len<S: Collection>(v: &[S]) -> uint {
2157 v.iter().map(|x| x.len()).sum()
2160 let s = String::from_str("01234");
2161 assert_eq!(5, sum_len(["012", "", "34"]));
2162 assert_eq!(5, sum_len([String::from_str("01"), String::from_str("2"),
2163 String::from_str("34"), String::from_str("")]));
2164 assert_eq!(5, sum_len([s.as_slice()]));
2168 fn test_str_from_utf8() {
2170 assert_eq!(from_utf8(xs), Some("hello"));
2172 let xs = "ศไทย中华Việt Nam".as_bytes();
2173 assert_eq!(from_utf8(xs), Some("ศไทย中华Việt Nam"));
2175 let xs = b"hello\xFF";
2176 assert_eq!(from_utf8(xs), None);
2180 fn test_maybe_owned_traits() {
2181 let s = Slice("abcde");
2182 assert_eq!(s.len(), 5);
2183 assert_eq!(s.as_slice(), "abcde");
2184 assert_eq!(String::from_str(s.as_slice()).as_slice(), "abcde");
2185 assert_eq!(format!("{}", s).as_slice(), "abcde");
2186 assert!(s.lt(&Owned(String::from_str("bcdef"))));
2187 assert_eq!(Slice(""), Default::default());
2189 let o = Owned(String::from_str("abcde"));
2190 assert_eq!(o.len(), 5);
2191 assert_eq!(o.as_slice(), "abcde");
2192 assert_eq!(String::from_str(o.as_slice()).as_slice(), "abcde");
2193 assert_eq!(format!("{}", o).as_slice(), "abcde");
2194 assert!(o.lt(&Slice("bcdef")));
2195 assert_eq!(Owned(String::from_str("")), Default::default());
2197 assert!(s.cmp(&o) == Equal);
2198 assert!(s.equiv(&o));
2200 assert!(o.cmp(&s) == Equal);
2201 assert!(o.equiv(&s));
2205 fn test_maybe_owned_methods() {
2206 let s = Slice("abcde");
2207 assert!(s.is_slice());
2208 assert!(!s.is_owned());
2210 let o = Owned(String::from_str("abcde"));
2211 assert!(!o.is_slice());
2212 assert!(o.is_owned());
2216 fn test_maybe_owned_clone() {
2217 assert_eq!(Owned(String::from_str("abcde")), Slice("abcde").clone());
2218 assert_eq!(Owned(String::from_str("abcde")), Owned(String::from_str("abcde")).clone());
2219 assert_eq!(Slice("abcde"), Slice("abcde").clone());
2220 assert_eq!(Slice("abcde"), Owned(String::from_str("abcde")).clone());
2224 fn test_maybe_owned_into_string() {
2225 assert_eq!(Slice("abcde").into_string(), String::from_str("abcde"));
2226 assert_eq!(Owned(String::from_str("abcde")).into_string(),
2227 String::from_str("abcde"));
2231 fn test_into_maybe_owned() {
2232 assert_eq!("abcde".into_maybe_owned(), Slice("abcde"));
2233 assert_eq!((String::from_str("abcde")).into_maybe_owned(), Slice("abcde"));
2234 assert_eq!("abcde".into_maybe_owned(), Owned(String::from_str("abcde")));
2235 assert_eq!((String::from_str("abcde")).into_maybe_owned(),
2236 Owned(String::from_str("abcde")));
2243 use test::black_box;
2245 use std::option::{None, Some};
2246 use std::iter::{Iterator, DoubleEndedIterator};
2247 use std::collections::Collection;
2250 fn char_iterator(b: &mut Bencher) {
2251 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2253 b.iter(|| s.chars().count());
2257 fn char_iterator_for(b: &mut Bencher) {
2258 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2261 for ch in s.chars() { black_box(ch) }
2266 fn char_iterator_ascii(b: &mut Bencher) {
2267 let s = "Mary had a little lamb, Little lamb
2268 Mary had a little lamb, Little lamb
2269 Mary had a little lamb, Little lamb
2270 Mary had a little lamb, Little lamb
2271 Mary had a little lamb, Little lamb
2272 Mary had a little lamb, Little lamb";
2274 b.iter(|| s.chars().count());
2278 fn char_iterator_rev(b: &mut Bencher) {
2279 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2281 b.iter(|| s.chars().rev().count());
2285 fn char_iterator_rev_for(b: &mut Bencher) {
2286 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2289 for ch in s.chars().rev() { black_box(ch) }
2294 fn char_indicesator(b: &mut Bencher) {
2295 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2296 let len = s.char_len();
2298 b.iter(|| assert_eq!(s.char_indices().count(), len));
2302 fn char_indicesator_rev(b: &mut Bencher) {
2303 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2304 let len = s.char_len();
2306 b.iter(|| assert_eq!(s.char_indices().rev().count(), len));
2310 fn split_unicode_ascii(b: &mut Bencher) {
2311 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2313 b.iter(|| assert_eq!(s.split('V').count(), 3));
2317 fn split_unicode_not_ascii(b: &mut Bencher) {
2318 struct NotAscii(char);
2319 impl CharEq for NotAscii {
2320 fn matches(&mut self, c: char) -> bool {
2321 let NotAscii(cc) = *self;
2324 fn only_ascii(&self) -> bool { false }
2326 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2328 b.iter(|| assert_eq!(s.split(NotAscii('V')).count(), 3));
2333 fn split_ascii(b: &mut Bencher) {
2334 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2335 let len = s.split(' ').count();
2337 b.iter(|| assert_eq!(s.split(' ').count(), len));
2341 fn split_not_ascii(b: &mut Bencher) {
2342 struct NotAscii(char);
2343 impl CharEq for NotAscii {
2345 fn matches(&mut self, c: char) -> bool {
2346 let NotAscii(cc) = *self;
2349 fn only_ascii(&self) -> bool { false }
2351 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2352 let len = s.split(' ').count();
2354 b.iter(|| assert_eq!(s.split(NotAscii(' ')).count(), len));
2358 fn split_extern_fn(b: &mut Bencher) {
2359 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2360 let len = s.split(' ').count();
2361 fn pred(c: char) -> bool { c == ' ' }
2363 b.iter(|| assert_eq!(s.split(pred).count(), len));
2367 fn split_closure(b: &mut Bencher) {
2368 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2369 let len = s.split(' ').count();
2371 b.iter(|| assert_eq!(s.split(|c: char| c == ' ').count(), len));
2375 fn split_slice(b: &mut Bencher) {
2376 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2377 let len = s.split(' ').count();
2379 b.iter(|| assert_eq!(s.split(&[' ']).count(), len));
2383 fn is_utf8_100_ascii(b: &mut Bencher) {
2385 let s = b"Hello there, the quick brown fox jumped over the lazy dog! \
2386 Lorem ipsum dolor sit amet, consectetur. ";
2388 assert_eq!(100, s.len());
2395 fn is_utf8_100_multibyte(b: &mut Bencher) {
2396 let s = "𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰".as_bytes();
2397 assert_eq!(100, s.len());
2404 fn bench_connect(b: &mut Bencher) {
2405 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2407 let v = [s, s, s, s, s, s, s, s, s, s];
2409 assert_eq!(v.connect(sep).len(), s.len() * 10 + sep.len() * 9);
2414 fn bench_contains_short_short(b: &mut Bencher) {
2415 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2419 assert!(haystack.contains(needle));
2424 fn bench_contains_short_long(b: &mut Bencher) {
2426 Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \
2427 ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \
2428 eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \
2429 sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \
2430 tempus vel, gravida nec quam.
2432 In est dui, tincidunt sed tempus interdum, adipiscing laoreet ante. Etiam tempor, tellus quis \
2433 sagittis interdum, nulla purus mattis sem, quis auctor erat odio ac tellus. In nec nunc sit amet \
2434 diam volutpat molestie at sed ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan \
2435 lorem ac dignissim placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat. In vel \
2436 eleifend felis. Sed suscipit nulla lorem, sed mollis est sollicitudin et. Nam fermentum egestas \
2437 interdum. Curabitur ut nisi justo.
2439 Sed sollicitudin ipsum tellus, ut condimentum leo eleifend nec. Cras ut velit ante. Phasellus nec \
2440 mollis odio. Mauris molestie erat in arcu mattis, at aliquet dolor vehicula. Quisque malesuada \
2441 lectus sit amet nisi pretium, a condimentum ipsum porta. Morbi at dapibus diam. Praesent egestas \
2442 est sed risus elementum, eu rutrum metus ultrices. Etiam fermentum consectetur magna, id rutrum \
2443 felis accumsan a. Aliquam ut pellentesque libero. Sed mi nulla, lobortis eu tortor id, suscipit \
2444 ultricies neque. Morbi iaculis sit amet risus at iaculis. Praesent eget ligula quis turpis \
2445 feugiat suscipit vel non arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. \
2446 Aliquam sit amet placerat lorem.
2448 Cras a lacus vel ante posuere elementum. Nunc est leo, bibendum ut facilisis vel, bibendum at \
2449 mauris. Nullam adipiscing diam vel odio ornare, luctus adipiscing mi luctus. Nulla facilisi. \
2450 Mauris adipiscing bibendum neque, quis adipiscing lectus tempus et. Sed feugiat erat et nisl \
2451 lobortis pharetra. Donec vitae erat enim. Nullam sit amet felis et quam lacinia tincidunt. Aliquam \
2452 suscipit dapibus urna. Sed volutpat urna in magna pulvinar volutpat. Phasellus nec tellus ac diam \
2455 Nam lectus enim, dapibus non nisi tempor, consectetur convallis massa. Maecenas eleifend dictum \
2456 feugiat. Etiam quis mauris vel risus luctus mattis a a nunc. Nullam orci quam, imperdiet id \
2457 vehicula in, porttitor ut nibh. Duis sagittis adipiscing nisl vitae congue. Donec mollis risus eu \
2458 leo suscipit, varius porttitor nulla porta. Pellentesque ut sem nec nisi euismod vehicula. Nulla \
2459 malesuada sollicitudin quam eu fermentum.";
2460 let needle = "english";
2463 assert!(!haystack.contains(needle));
2468 fn bench_contains_bad_naive(b: &mut Bencher) {
2469 let haystack = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
2470 let needle = "aaaaaaaab";
2473 assert!(!haystack.contains(needle));
2478 fn bench_contains_equal(b: &mut Bencher) {
2479 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2480 let needle = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2483 assert!(haystack.contains(needle));