1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 // ignore-lexer-test FIXME #15679
13 //! Unicode string manipulation (`str` type)
17 //! Rust's string type is one of the core primitive types of the language. While
18 //! represented by the name `str`, the name `str` is not actually a valid type in
19 //! Rust. Each string must also be decorated with a pointer. `String` is used
20 //! for an owned string, so there is only one commonly-used `str` type in Rust:
23 //! `&str` is the borrowed string type. This type of string can only be created
24 //! from other strings, unless it is a static string (see below). As the word
25 //! "borrowed" implies, this type of string is owned elsewhere, and this string
26 //! cannot be moved out of.
28 //! As an example, here's some code that uses a string.
32 //! let borrowed_string = "This string is borrowed with the 'static lifetime";
36 //! From the example above, you can guess that Rust's string literals have the
37 //! `'static` lifetime. This is akin to C's concept of a static string.
38 //! More precisely, string literals are immutable views with a 'static lifetime
39 //! (otherwise known as the lifetime of the entire program), and thus have the
40 //! type `&'static str`.
44 //! Rust's string type, `str`, is a sequence of Unicode scalar values encoded as a
45 //! stream of UTF-8 bytes. All [strings](../../reference.html#literals) are
46 //! guaranteed to be validly encoded UTF-8 sequences. Additionally, strings are
47 //! not null-terminated and can thus contain null bytes.
49 //! The actual representation of strings have direct mappings to slices: `&str`
50 //! is the same as `&[u8]`.
52 #![doc(primitive = "str")]
54 pub use self::MaybeOwned::*;
55 use self::RecompositionState::*;
56 use self::DecompositionType::*;
57 use core::borrow::{BorrowFrom, Cow, ToOwned};
58 use core::default::Default;
61 use core::iter::AdditiveIterator;
62 use core::kinds::Sized;
63 use core::prelude::{Char, Clone, Eq, Equiv};
64 use core::prelude::{Iterator, IteratorExt, SlicePrelude, None, Option, Ord, Ordering};
65 use core::prelude::{PartialEq, PartialOrd, Result, AsSlice, Some, Tuple2};
66 use core::prelude::{range};
69 use ring_buf::RingBuf;
74 pub use core::str::{from_utf8, CharEq, Chars, CharOffsets};
75 pub use core::str::{Bytes, CharSplits};
76 pub use core::str::{CharSplitsN, AnyLines, MatchIndices, StrSplits};
77 pub use core::str::{Utf16Encoder, Utf16CodeUnits};
78 pub use core::str::{eq_slice, is_utf8, is_utf16, Utf16Items};
79 pub use core::str::{Utf16Item, ScalarValue, LoneSurrogate, utf16_items};
80 pub use core::str::{truncate_utf16_at_nul, utf8_char_width, CharRange};
81 pub use core::str::{FromStr, from_str};
82 pub use core::str::{Str, StrPrelude};
83 pub use core::str::{from_utf8_unchecked, from_c_str};
84 pub use unicode::str::{UnicodeStrPrelude, Words, Graphemes, GraphemeIndices};
86 // FIXME(conventions): ensure bit/char conventions are followed by str's API
89 Section: Creating a string
92 /// Methods for vectors of strings.
93 pub trait StrVector for Sized? {
94 /// Concatenates a vector of strings.
99 /// let first = "Restaurant at the End of the".to_string();
100 /// let second = " Universe".to_string();
101 /// let string_vec = vec![first, second];
102 /// assert_eq!(string_vec.concat(), "Restaurant at the End of the Universe".to_string());
104 fn concat(&self) -> String;
106 /// Concatenates a vector of strings, placing a given separator between each.
111 /// let first = "Roast".to_string();
112 /// let second = "Sirloin Steak".to_string();
113 /// let string_vec = vec![first, second];
114 /// assert_eq!(string_vec.connect(", "), "Roast, Sirloin Steak".to_string());
116 fn connect(&self, sep: &str) -> String;
119 impl<S: Str> StrVector for [S] {
120 fn concat(&self) -> String {
122 return String::new();
125 // `len` calculation may overflow but push_str will check boundaries
126 let len = self.iter().map(|s| s.as_slice().len()).sum();
128 let mut result = String::with_capacity(len);
130 for s in self.iter() {
131 result.push_str(s.as_slice())
137 fn connect(&self, sep: &str) -> String {
139 return String::new();
144 return self.concat();
147 // this is wrong without the guarantee that `self` is non-empty
148 // `len` calculation may overflow but push_str but will check boundaries
149 let len = sep.len() * (self.len() - 1)
150 + self.iter().map(|s| s.as_slice().len()).sum();
151 let mut result = String::with_capacity(len);
152 let mut first = true;
154 for s in self.iter() {
158 result.push_str(sep);
160 result.push_str(s.as_slice());
166 impl<S: Str, T: AsSlice<S>> StrVector for T {
168 fn concat(&self) -> String {
169 self.as_slice().concat()
173 fn connect(&self, sep: &str) -> String {
174 self.as_slice().connect(sep)
182 // Helper functions used for Unicode normalization
183 fn canonical_sort(comb: &mut [(char, u8)]) {
184 let len = comb.len();
185 for i in range(0, len) {
186 let mut swapped = false;
187 for j in range(1, len-i) {
188 let class_a = *comb[j-1].ref1();
189 let class_b = *comb[j].ref1();
190 if class_a != 0 && class_b != 0 && class_a > class_b {
195 if !swapped { break; }
200 enum DecompositionType {
205 /// External iterator for a string's decomposition's characters.
206 /// Use with the `std::iter` module.
208 pub struct Decompositions<'a> {
209 kind: DecompositionType,
211 buffer: Vec<(char, u8)>,
215 impl<'a> Iterator<char> for Decompositions<'a> {
217 fn next(&mut self) -> Option<char> {
218 match self.buffer.head() {
221 self.buffer.remove(0);
224 Some(&(c, _)) if self.sorted => {
225 self.buffer.remove(0);
228 _ => self.sorted = false
232 for ch in self.iter {
233 let buffer = &mut self.buffer;
234 let sorted = &mut self.sorted;
238 unicode::char::canonical_combining_class(d);
239 if class == 0 && !*sorted {
240 canonical_sort(buffer.as_mut_slice());
243 buffer.push((d, class));
247 unicode::char::decompose_canonical(ch, callback)
250 unicode::char::decompose_compatible(ch, callback)
261 canonical_sort(self.buffer.as_mut_slice());
265 match self.buffer.remove(0) {
270 Some((c, _)) => Some(c),
275 fn size_hint(&self) -> (uint, Option<uint>) {
276 let (lower, _) = self.iter.size_hint();
282 enum RecompositionState {
288 /// External iterator for a string's recomposition's characters.
289 /// Use with the `std::iter` module.
291 pub struct Recompositions<'a> {
292 iter: Decompositions<'a>,
293 state: RecompositionState,
294 buffer: RingBuf<char>,
295 composee: Option<char>,
299 impl<'a> Iterator<char> for Recompositions<'a> {
301 fn next(&mut self) -> Option<char> {
305 for ch in self.iter {
306 let ch_class = unicode::char::canonical_combining_class(ch);
307 if self.composee.is_none() {
311 self.composee = Some(ch);
314 let k = self.composee.clone().unwrap();
316 match self.last_ccc {
318 match unicode::char::compose(k, ch) {
320 self.composee = Some(r);
325 self.composee = Some(ch);
328 self.buffer.push_back(ch);
329 self.last_ccc = Some(ch_class);
334 if l_class >= ch_class {
335 // `ch` is blocked from `composee`
337 self.composee = Some(ch);
338 self.last_ccc = None;
339 self.state = Purging;
342 self.buffer.push_back(ch);
343 self.last_ccc = Some(ch_class);
346 match unicode::char::compose(k, ch) {
348 self.composee = Some(r);
352 self.buffer.push_back(ch);
353 self.last_ccc = Some(ch_class);
359 self.state = Finished;
360 if self.composee.is_some() {
361 return self.composee.take();
365 match self.buffer.pop_front() {
366 None => self.state = Composing,
371 match self.buffer.pop_front() {
372 None => return self.composee.take(),
381 /// Replaces all occurrences of one string with another.
385 /// * s - The string containing substrings to replace
386 /// * from - The string to replace
387 /// * to - The replacement string
391 /// The original string with all occurrences of `from` replaced with `to`.
397 /// let string = "orange";
398 /// let new_string = str::replace(string, "or", "str");
399 /// assert_eq!(new_string.as_slice(), "strange");
401 pub fn replace(s: &str, from: &str, to: &str) -> String {
402 let mut result = String::new();
403 let mut last_end = 0;
404 for (start, end) in s.match_indices(from) {
405 result.push_str(unsafe { s.slice_unchecked(last_end, start) });
409 result.push_str(unsafe { s.slice_unchecked(last_end, s.len()) });
417 // Return the initial codepoint accumulator for the first byte.
418 // The first byte is special, only want bottom 5 bits for width 2, 4 bits
419 // for width 3, and 3 bits for width 4
420 macro_rules! utf8_first_byte(
421 ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as u32)
424 // return the value of $ch updated with continuation byte $byte
425 macro_rules! utf8_acc_cont_byte(
426 ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as u32)
433 /// A string type that can hold either a `String` or a `&str`.
434 /// This can be useful as an optimization when an allocation is sometimes
435 /// needed but not always.
436 #[deprecated = "use std::str::CowString"]
437 pub enum MaybeOwned<'a> {
438 /// A borrowed string.
444 /// A specialization of `CowString` to be sendable.
445 pub type SendStr = CowString<'static>;
447 #[deprecated = "use std::str::CowString"]
448 impl<'a> MaybeOwned<'a> {
449 /// Returns `true` if this `MaybeOwned` wraps an owned string.
454 /// let string = String::from_str("orange");
455 /// let maybe_owned_string = string.into_maybe_owned();
456 /// assert_eq!(true, maybe_owned_string.is_owned());
459 pub fn is_owned(&self) -> bool {
466 /// Returns `true` if this `MaybeOwned` wraps a borrowed string.
471 /// let string = "orange";
472 /// let maybe_owned_string = string.as_slice().into_maybe_owned();
473 /// assert_eq!(true, maybe_owned_string.is_slice());
476 pub fn is_slice(&self) -> bool {
483 /// Return the number of bytes in this string.
485 pub fn len(&self) -> uint { self.as_slice().len() }
487 /// Returns true if the string contains no bytes
490 pub fn is_empty(&self) -> bool { self.len() == 0 }
493 #[deprecated = "use std::borrow::IntoCow"]
494 /// Trait for moving into a `MaybeOwned`.
495 pub trait IntoMaybeOwned<'a> {
496 /// Moves `self` into a `MaybeOwned`.
497 fn into_maybe_owned(self) -> MaybeOwned<'a>;
500 #[deprecated = "use std::borrow::IntoCow"]
502 impl<'a> IntoMaybeOwned<'a> for String {
506 /// let owned_string = String::from_str("orange");
507 /// let maybe_owned_string = owned_string.into_maybe_owned();
508 /// assert_eq!(true, maybe_owned_string.is_owned());
512 fn into_maybe_owned(self) -> MaybeOwned<'a> {
517 #[deprecated = "use std::borrow::IntoCow"]
519 impl<'a> IntoMaybeOwned<'a> for &'a str {
523 /// let string = "orange";
524 /// let maybe_owned_str = string.as_slice().into_maybe_owned();
525 /// assert_eq!(false, maybe_owned_str.is_owned());
529 fn into_maybe_owned(self) -> MaybeOwned<'a> { Slice(self) }
533 #[deprecated = "use std::borrow::IntoCow"]
534 impl<'a> IntoMaybeOwned<'a> for MaybeOwned<'a> {
538 /// let str = "orange";
539 /// let maybe_owned_str = str.as_slice().into_maybe_owned();
540 /// let maybe_maybe_owned_str = maybe_owned_str.into_maybe_owned();
541 /// assert_eq!(false, maybe_maybe_owned_str.is_owned());
544 fn into_maybe_owned(self) -> MaybeOwned<'a> { self }
547 #[deprecated = "use std::str::CowString"]
548 impl<'a> PartialEq for MaybeOwned<'a> {
550 fn eq(&self, other: &MaybeOwned) -> bool {
551 self.as_slice() == other.as_slice()
555 #[deprecated = "use std::str::CowString"]
556 impl<'a> Eq for MaybeOwned<'a> {}
558 #[deprecated = "use std::str::CowString"]
559 impl<'a> PartialOrd for MaybeOwned<'a> {
561 fn partial_cmp(&self, other: &MaybeOwned) -> Option<Ordering> {
562 Some(self.cmp(other))
566 #[deprecated = "use std::str::CowString"]
567 impl<'a> Ord for MaybeOwned<'a> {
569 fn cmp(&self, other: &MaybeOwned) -> Ordering {
570 self.as_slice().cmp(other.as_slice())
575 #[deprecated = "use std::str::CowString"]
576 impl<'a, S: Str> Equiv<S> for MaybeOwned<'a> {
578 fn equiv(&self, other: &S) -> bool {
579 self.as_slice() == other.as_slice()
583 #[deprecated = "use std::str::CowString"]
584 impl<'a> Str for MaybeOwned<'a> {
587 fn as_slice<'b>(&'b self) -> &'b str {
590 Owned(ref s) => s.as_slice()
595 #[deprecated = "use std::str::CowString"]
596 impl<'a> StrAllocating for MaybeOwned<'a> {
599 fn into_string(self) -> String {
601 Slice(s) => String::from_str(s),
607 #[deprecated = "use std::str::CowString"]
608 impl<'a> Clone for MaybeOwned<'a> {
611 fn clone(&self) -> MaybeOwned<'a> {
613 Slice(s) => Slice(s),
614 Owned(ref s) => Owned(String::from_str(s.as_slice()))
619 #[deprecated = "use std::str::CowString"]
620 impl<'a> Default for MaybeOwned<'a> {
623 fn default() -> MaybeOwned<'a> { Slice("") }
626 #[deprecated = "use std::str::CowString"]
627 impl<'a, H: hash::Writer> hash::Hash<H> for MaybeOwned<'a> {
629 fn hash(&self, hasher: &mut H) {
630 self.as_slice().hash(hasher)
634 #[deprecated = "use std::str::CowString"]
635 impl<'a> fmt::Show for MaybeOwned<'a> {
637 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
639 Slice(ref s) => s.fmt(f),
640 Owned(ref s) => s.fmt(f)
645 #[unstable = "trait is unstable"]
646 impl BorrowFrom<String> for str {
647 fn borrow_from(owned: &String) -> &str { owned[] }
650 #[unstable = "trait is unstable"]
651 impl ToOwned<String> for str {
652 fn to_owned(&self) -> String { self.into_string() }
655 /// Unsafe string operations.
657 pub use core::str::raw::{from_utf8, c_str_to_static_slice, slice_bytes};
658 pub use core::str::raw::{slice_unchecked};
665 /// A clone-on-write string
666 pub type CowString<'a> = Cow<'a, String, str>;
668 impl<'a> Str for CowString<'a> {
670 fn as_slice<'b>(&'b self) -> &'b str {
676 Section: Trait implementations
679 /// Any string that can be represented as a slice.
680 pub trait StrAllocating: Str {
681 /// Converts `self` into a `String`, not making a copy if possible.
682 fn into_string(self) -> String;
684 /// Escapes each char in `s` with `char::escape_default`.
685 fn escape_default(&self) -> String {
686 let me = self.as_slice();
687 let mut out = String::with_capacity(me.len());
688 for c in me.chars() {
689 for c in c.escape_default() {
696 /// Escapes each char in `s` with `char::escape_unicode`.
697 fn escape_unicode(&self) -> String {
698 let me = self.as_slice();
699 let mut out = String::with_capacity(me.len());
700 for c in me.chars() {
701 for c in c.escape_unicode() {
708 /// Replaces all occurrences of one string with another.
712 /// * `from` - The string to replace
713 /// * `to` - The replacement string
717 /// The original string with all occurrences of `from` replaced with `to`.
722 /// let s = "Do you know the muffin man,
723 /// The muffin man, the muffin man, ...".to_string();
725 /// assert_eq!(s.replace("muffin man", "little lamb"),
726 /// "Do you know the little lamb,
727 /// The little lamb, the little lamb, ...".to_string());
729 /// // not found, so no change.
730 /// assert_eq!(s.replace("cookie monster", "little lamb"), s);
732 fn replace(&self, from: &str, to: &str) -> String {
733 replace(self.as_slice(), from, to)
736 /// Given a string, makes a new string with repeated copies of it.
737 fn repeat(&self, nn: uint) -> String {
738 let me = self.as_slice();
739 let mut ret = String::with_capacity(nn * me.len());
740 for _ in range(0, nn) {
746 /// Returns the Levenshtein Distance between two strings.
747 fn lev_distance(&self, t: &str) -> uint {
748 let me = self.as_slice();
749 if me.is_empty() { return t.char_len(); }
750 if t.is_empty() { return me.char_len(); }
752 let mut dcol = Vec::from_fn(t.len() + 1, |x| x);
755 for (i, sc) in me.chars().enumerate() {
758 dcol[0] = current + 1;
760 for (j, tc) in t.chars().enumerate() {
762 let next = dcol[j + 1];
765 dcol[j + 1] = current;
767 dcol[j + 1] = cmp::min(current, next);
768 dcol[j + 1] = cmp::min(dcol[j + 1], dcol[j]) + 1;
779 /// Returns an iterator over the string in Unicode Normalization Form D
780 /// (canonical decomposition).
782 fn nfd_chars<'a>(&'a self) -> Decompositions<'a> {
784 iter: self.as_slice().chars(),
791 /// Returns an iterator over the string in Unicode Normalization Form KD
792 /// (compatibility decomposition).
794 fn nfkd_chars<'a>(&'a self) -> Decompositions<'a> {
796 iter: self.as_slice().chars(),
803 /// An Iterator over the string in Unicode Normalization Form C
804 /// (canonical decomposition followed by canonical composition).
806 fn nfc_chars<'a>(&'a self) -> Recompositions<'a> {
808 iter: self.nfd_chars(),
810 buffer: RingBuf::new(),
816 /// An Iterator over the string in Unicode Normalization Form KC
817 /// (compatibility decomposition followed by canonical composition).
819 fn nfkc_chars<'a>(&'a self) -> Recompositions<'a> {
821 iter: self.nfkd_chars(),
823 buffer: RingBuf::new(),
830 impl<'a> StrAllocating for &'a str {
832 fn into_string(self) -> String {
833 String::from_str(self)
839 use std::iter::AdditiveIterator;
840 use std::iter::range;
841 use std::default::Default;
843 use std::clone::Clone;
844 use std::cmp::{Ord, PartialOrd, Equiv};
845 use std::cmp::Ordering::{Equal, Greater, Less};
846 use std::option::Option;
847 use std::option::Option::{Some, None};
848 use std::ptr::RawPtr;
849 use std::iter::{Iterator, IteratorExt, DoubleEndedIteratorExt};
852 use std::slice::{AsSlice, SlicePrelude};
855 use slice::CloneSliceAllocPrelude;
857 use unicode::char::UnicodeChar;
861 assert!((eq_slice("foobar".slice(0, 3), "foo")));
862 assert!((eq_slice("barfoo".slice(3, 6), "foo")));
863 assert!((!eq_slice("foo1", "foo2")));
869 assert!("" <= "foo");
870 assert!("foo" <= "foo");
871 assert!("foo" != "bar");
876 assert_eq!("".len(), 0u);
877 assert_eq!("hello world".len(), 11u);
878 assert_eq!("\x63".len(), 1u);
879 assert_eq!("\u00a2".len(), 2u);
880 assert_eq!("\u03c0".len(), 2u);
881 assert_eq!("\u2620".len(), 3u);
882 assert_eq!("\U0001d11e".len(), 4u);
884 assert_eq!("".char_len(), 0u);
885 assert_eq!("hello world".char_len(), 11u);
886 assert_eq!("\x63".char_len(), 1u);
887 assert_eq!("\u00a2".char_len(), 1u);
888 assert_eq!("\u03c0".char_len(), 1u);
889 assert_eq!("\u2620".char_len(), 1u);
890 assert_eq!("\U0001d11e".char_len(), 1u);
891 assert_eq!("ประเทศไทย中华Việt Nam".char_len(), 19u);
893 assert_eq!("hello".width(false), 10u);
894 assert_eq!("hello".width(true), 10u);
895 assert_eq!("\0\0\0\0\0".width(false), 0u);
896 assert_eq!("\0\0\0\0\0".width(true), 0u);
897 assert_eq!("".width(false), 0u);
898 assert_eq!("".width(true), 0u);
899 assert_eq!("\u2081\u2082\u2083\u2084".width(false), 4u);
900 assert_eq!("\u2081\u2082\u2083\u2084".width(true), 8u);
905 assert_eq!("hello".find('l'), Some(2u));
906 assert_eq!("hello".find(|c:char| c == 'o'), Some(4u));
907 assert!("hello".find('x').is_none());
908 assert!("hello".find(|c:char| c == 'x').is_none());
909 assert_eq!("ประเทศไทย中华Việt Nam".find('华'), Some(30u));
910 assert_eq!("ประเทศไทย中华Việt Nam".find(|c: char| c == '华'), Some(30u));
915 assert_eq!("hello".rfind('l'), Some(3u));
916 assert_eq!("hello".rfind(|c:char| c == 'o'), Some(4u));
917 assert!("hello".rfind('x').is_none());
918 assert!("hello".rfind(|c:char| c == 'x').is_none());
919 assert_eq!("ประเทศไทย中华Việt Nam".rfind('华'), Some(30u));
920 assert_eq!("ประเทศไทย中华Việt Nam".rfind(|c: char| c == '华'), Some(30u));
925 let empty = String::from_str("");
926 let s: String = empty.chars().collect();
927 assert_eq!(empty, s);
928 let data = String::from_str("ประเทศไทย中");
929 let s: String = data.chars().collect();
934 fn test_into_bytes() {
935 let data = String::from_str("asdf");
936 let buf = data.into_bytes();
937 assert_eq!(b"asdf", buf);
943 assert_eq!("".find_str(""), Some(0u));
944 assert!("banana".find_str("apple pie").is_none());
947 assert_eq!(data.slice(0u, 6u).find_str("ab"), Some(0u));
948 assert_eq!(data.slice(2u, 6u).find_str("ab"), Some(3u - 2u));
949 assert!(data.slice(2u, 4u).find_str("ab").is_none());
951 let string = "ประเทศไทย中华Việt Nam";
952 let mut data = String::from_str(string);
953 data.push_str(string);
954 assert!(data.find_str("ไท华").is_none());
955 assert_eq!(data.slice(0u, 43u).find_str(""), Some(0u));
956 assert_eq!(data.slice(6u, 43u).find_str(""), Some(6u - 6u));
958 assert_eq!(data.slice(0u, 43u).find_str("ประ"), Some( 0u));
959 assert_eq!(data.slice(0u, 43u).find_str("ทศไ"), Some(12u));
960 assert_eq!(data.slice(0u, 43u).find_str("ย中"), Some(24u));
961 assert_eq!(data.slice(0u, 43u).find_str("iệt"), Some(34u));
962 assert_eq!(data.slice(0u, 43u).find_str("Nam"), Some(40u));
964 assert_eq!(data.slice(43u, 86u).find_str("ประ"), Some(43u - 43u));
965 assert_eq!(data.slice(43u, 86u).find_str("ทศไ"), Some(55u - 43u));
966 assert_eq!(data.slice(43u, 86u).find_str("ย中"), Some(67u - 43u));
967 assert_eq!(data.slice(43u, 86u).find_str("iệt"), Some(77u - 43u));
968 assert_eq!(data.slice(43u, 86u).find_str("Nam"), Some(83u - 43u));
972 fn test_slice_chars() {
973 fn t(a: &str, b: &str, start: uint) {
974 assert_eq!(a.slice_chars(start, start + b.char_len()), b);
977 t("hello", "llo", 2);
981 assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".slice_chars(2, 8));
988 impl AsSlice<String> for S {
989 fn as_slice<'a> (&'a self) -> &'a [String] {
994 fn s(x: &str) -> String { x.into_string() }
996 macro_rules! test_concat {
997 ($expected: expr, $string: expr) => {
999 let s = $string.concat();
1000 assert_eq!($expected, s);
1006 fn test_concat_for_different_types() {
1007 test_concat!("ab", ["a", "b"]);
1008 test_concat!("ab", [s("a"), s("b")]);
1009 test_concat!("ab", vec!["a", "b"]);
1010 test_concat!("ab", vec!["a", "b"].as_slice());
1011 test_concat!("ab", vec![s("a"), s("b")]);
1013 let mut v0 = ["a", "b"];
1014 let mut v1 = [s("a"), s("b")];
1016 use std::c_vec::CVec;
1018 test_concat!("ab", CVec::new(v0.as_mut_ptr(), v0.len()));
1019 test_concat!("ab", CVec::new(v1.as_mut_ptr(), v1.len()));
1022 test_concat!("ab", S { x: [s("a"), s("b")] });
1026 fn test_concat_for_different_lengths() {
1027 let empty: &[&str] = &[];
1028 test_concat!("", empty);
1029 test_concat!("a", ["a"]);
1030 test_concat!("ab", ["a", "b"]);
1031 test_concat!("abc", ["", "a", "bc"]);
1034 macro_rules! test_connect {
1035 ($expected: expr, $string: expr, $delim: expr) => {
1037 let s = $string.connect($delim);
1038 assert_eq!($expected, s);
1044 fn test_connect_for_different_types() {
1045 test_connect!("a-b", ["a", "b"], "-");
1046 let hyphen = "-".into_string();
1047 test_connect!("a-b", [s("a"), s("b")], hyphen.as_slice());
1048 test_connect!("a-b", vec!["a", "b"], hyphen.as_slice());
1049 test_connect!("a-b", vec!["a", "b"].as_slice(), "-");
1050 test_connect!("a-b", vec![s("a"), s("b")], "-");
1052 let mut v0 = ["a", "b"];
1053 let mut v1 = [s("a"), s("b")];
1055 use std::c_vec::CVec;
1057 test_connect!("a-b", CVec::new(v0.as_mut_ptr(), v0.len()), "-");
1058 test_connect!("a-b", CVec::new(v1.as_mut_ptr(), v1.len()), hyphen.as_slice());
1061 test_connect!("a-b", S { x: [s("a"), s("b")] }, "-");
1065 fn test_connect_for_different_lengths() {
1066 let empty: &[&str] = &[];
1067 test_connect!("", empty, "-");
1068 test_connect!("a", ["a"], "-");
1069 test_connect!("a-b", ["a", "b"], "-");
1070 test_connect!("-a-bc", ["", "a", "bc"], "-");
1075 assert_eq!("x".repeat(4), String::from_str("xxxx"));
1076 assert_eq!("hi".repeat(4), String::from_str("hihihihi"));
1077 assert_eq!("ไท华".repeat(3), String::from_str("ไท华ไท华ไท华"));
1078 assert_eq!("".repeat(4), String::from_str(""));
1079 assert_eq!("hi".repeat(0), String::from_str(""));
1083 fn test_unsafe_slice() {
1084 assert_eq!("ab", unsafe {raw::slice_bytes("abc", 0, 2)});
1085 assert_eq!("bc", unsafe {raw::slice_bytes("abc", 1, 3)});
1086 assert_eq!("", unsafe {raw::slice_bytes("abc", 1, 1)});
1087 fn a_million_letter_a() -> String {
1089 let mut rs = String::new();
1091 rs.push_str("aaaaaaaaaa");
1096 fn half_a_million_letter_a() -> String {
1098 let mut rs = String::new();
1100 rs.push_str("aaaaa");
1105 let letters = a_million_letter_a();
1106 assert!(half_a_million_letter_a() ==
1107 unsafe {String::from_str(raw::slice_bytes(letters.as_slice(),
1113 fn test_starts_with() {
1114 assert!(("".starts_with("")));
1115 assert!(("abc".starts_with("")));
1116 assert!(("abc".starts_with("a")));
1117 assert!((!"a".starts_with("abc")));
1118 assert!((!"".starts_with("abc")));
1119 assert!((!"ödd".starts_with("-")));
1120 assert!(("ödd".starts_with("öd")));
1124 fn test_ends_with() {
1125 assert!(("".ends_with("")));
1126 assert!(("abc".ends_with("")));
1127 assert!(("abc".ends_with("c")));
1128 assert!((!"a".ends_with("abc")));
1129 assert!((!"".ends_with("abc")));
1130 assert!((!"ddö".ends_with("-")));
1131 assert!(("ddö".ends_with("dö")));
1135 fn test_is_empty() {
1136 assert!("".is_empty());
1137 assert!(!"a".is_empty());
1143 assert_eq!("".replace(a, "b"), String::from_str(""));
1144 assert_eq!("a".replace(a, "b"), String::from_str("b"));
1145 assert_eq!("ab".replace(a, "b"), String::from_str("bb"));
1147 assert!(" test test ".replace(test, "toast") ==
1148 String::from_str(" toast toast "));
1149 assert_eq!(" test test ".replace(test, ""), String::from_str(" "));
1153 fn test_replace_2a() {
1154 let data = "ประเทศไทย中华";
1155 let repl = "دولة الكويت";
1158 let a2 = "دولة الكويتทศไทย中华";
1159 assert_eq!(data.replace(a, repl), a2);
1163 fn test_replace_2b() {
1164 let data = "ประเทศไทย中华";
1165 let repl = "دولة الكويت";
1168 let b2 = "ปรدولة الكويتทศไทย中华";
1169 assert_eq!(data.replace(b, repl), b2);
1173 fn test_replace_2c() {
1174 let data = "ประเทศไทย中华";
1175 let repl = "دولة الكويت";
1178 let c2 = "ประเทศไทยدولة الكويت";
1179 assert_eq!(data.replace(c, repl), c2);
1183 fn test_replace_2d() {
1184 let data = "ประเทศไทย中华";
1185 let repl = "دولة الكويت";
1188 assert_eq!(data.replace(d, repl), data);
1193 assert_eq!("ab", "abc".slice(0, 2));
1194 assert_eq!("bc", "abc".slice(1, 3));
1195 assert_eq!("", "abc".slice(1, 1));
1196 assert_eq!("\u65e5", "\u65e5\u672c".slice(0, 3));
1198 let data = "ประเทศไทย中华";
1199 assert_eq!("ป", data.slice(0, 3));
1200 assert_eq!("ร", data.slice(3, 6));
1201 assert_eq!("", data.slice(3, 3));
1202 assert_eq!("华", data.slice(30, 33));
1204 fn a_million_letter_x() -> String {
1206 let mut rs = String::new();
1208 rs.push_str("华华华华华华华华华华");
1213 fn half_a_million_letter_x() -> String {
1215 let mut rs = String::new();
1217 rs.push_str("华华华华华");
1222 let letters = a_million_letter_x();
1223 assert!(half_a_million_letter_x() ==
1224 String::from_str(letters.slice(0u, 3u * 500000u)));
1229 let ss = "中华Việt Nam";
1231 assert_eq!("华", ss.slice(3u, 6u));
1232 assert_eq!("Việt Nam", ss.slice(6u, 16u));
1234 assert_eq!("ab", "abc".slice(0u, 2u));
1235 assert_eq!("bc", "abc".slice(1u, 3u));
1236 assert_eq!("", "abc".slice(1u, 1u));
1238 assert_eq!("中", ss.slice(0u, 3u));
1239 assert_eq!("华V", ss.slice(3u, 7u));
1240 assert_eq!("", ss.slice(3u, 3u));
1255 fn test_slice_fail() {
1256 "中华Việt Nam".slice(0u, 2u);
1260 fn test_slice_from() {
1261 assert_eq!("abcd".slice_from(0), "abcd");
1262 assert_eq!("abcd".slice_from(2), "cd");
1263 assert_eq!("abcd".slice_from(4), "");
1266 fn test_slice_to() {
1267 assert_eq!("abcd".slice_to(0), "");
1268 assert_eq!("abcd".slice_to(2), "ab");
1269 assert_eq!("abcd".slice_to(4), "abcd");
1273 fn test_trim_left_chars() {
1274 let v: &[char] = &[];
1275 assert_eq!(" *** foo *** ".trim_left_chars(v), " *** foo *** ");
1276 let chars: &[char] = &['*', ' '];
1277 assert_eq!(" *** foo *** ".trim_left_chars(chars), "foo *** ");
1278 assert_eq!(" *** *** ".trim_left_chars(chars), "");
1279 assert_eq!("foo *** ".trim_left_chars(chars), "foo *** ");
1281 assert_eq!("11foo1bar11".trim_left_chars('1'), "foo1bar11");
1282 let chars: &[char] = &['1', '2'];
1283 assert_eq!("12foo1bar12".trim_left_chars(chars), "foo1bar12");
1284 assert_eq!("123foo1bar123".trim_left_chars(|c: char| c.is_numeric()), "foo1bar123");
1288 fn test_trim_right_chars() {
1289 let v: &[char] = &[];
1290 assert_eq!(" *** foo *** ".trim_right_chars(v), " *** foo *** ");
1291 let chars: &[char] = &['*', ' '];
1292 assert_eq!(" *** foo *** ".trim_right_chars(chars), " *** foo");
1293 assert_eq!(" *** *** ".trim_right_chars(chars), "");
1294 assert_eq!(" *** foo".trim_right_chars(chars), " *** foo");
1296 assert_eq!("11foo1bar11".trim_right_chars('1'), "11foo1bar");
1297 let chars: &[char] = &['1', '2'];
1298 assert_eq!("12foo1bar12".trim_right_chars(chars), "12foo1bar");
1299 assert_eq!("123foo1bar123".trim_right_chars(|c: char| c.is_numeric()), "123foo1bar");
1303 fn test_trim_chars() {
1304 let v: &[char] = &[];
1305 assert_eq!(" *** foo *** ".trim_chars(v), " *** foo *** ");
1306 let chars: &[char] = &['*', ' '];
1307 assert_eq!(" *** foo *** ".trim_chars(chars), "foo");
1308 assert_eq!(" *** *** ".trim_chars(chars), "");
1309 assert_eq!("foo".trim_chars(chars), "foo");
1311 assert_eq!("11foo1bar11".trim_chars('1'), "foo1bar");
1312 let chars: &[char] = &['1', '2'];
1313 assert_eq!("12foo1bar12".trim_chars(chars), "foo1bar");
1314 assert_eq!("123foo1bar123".trim_chars(|c: char| c.is_numeric()), "foo1bar");
1318 fn test_trim_left() {
1319 assert_eq!("".trim_left(), "");
1320 assert_eq!("a".trim_left(), "a");
1321 assert_eq!(" ".trim_left(), "");
1322 assert_eq!(" blah".trim_left(), "blah");
1323 assert_eq!(" \u3000 wut".trim_left(), "wut");
1324 assert_eq!("hey ".trim_left(), "hey ");
1328 fn test_trim_right() {
1329 assert_eq!("".trim_right(), "");
1330 assert_eq!("a".trim_right(), "a");
1331 assert_eq!(" ".trim_right(), "");
1332 assert_eq!("blah ".trim_right(), "blah");
1333 assert_eq!("wut \u3000 ".trim_right(), "wut");
1334 assert_eq!(" hey".trim_right(), " hey");
1339 assert_eq!("".trim(), "");
1340 assert_eq!("a".trim(), "a");
1341 assert_eq!(" ".trim(), "");
1342 assert_eq!(" blah ".trim(), "blah");
1343 assert_eq!("\nwut \u3000 ".trim(), "wut");
1344 assert_eq!(" hey dude ".trim(), "hey dude");
1348 fn test_is_whitespace() {
1349 assert!("".is_whitespace());
1350 assert!(" ".is_whitespace());
1351 assert!("\u2009".is_whitespace()); // Thin space
1352 assert!(" \n\t ".is_whitespace());
1353 assert!(!" _ ".is_whitespace());
1357 fn test_slice_shift_char() {
1358 let data = "ประเทศไทย中";
1359 assert_eq!(data.slice_shift_char(), Some(('ป', "ระเทศไทย中")));
1363 fn test_slice_shift_char_2() {
1365 assert_eq!(empty.slice_shift_char(), None);
1370 // deny overlong encodings
1371 assert!(!is_utf8(&[0xc0, 0x80]));
1372 assert!(!is_utf8(&[0xc0, 0xae]));
1373 assert!(!is_utf8(&[0xe0, 0x80, 0x80]));
1374 assert!(!is_utf8(&[0xe0, 0x80, 0xaf]));
1375 assert!(!is_utf8(&[0xe0, 0x81, 0x81]));
1376 assert!(!is_utf8(&[0xf0, 0x82, 0x82, 0xac]));
1377 assert!(!is_utf8(&[0xf4, 0x90, 0x80, 0x80]));
1380 assert!(!is_utf8(&[0xED, 0xA0, 0x80]));
1381 assert!(!is_utf8(&[0xED, 0xBF, 0xBF]));
1383 assert!(is_utf8(&[0xC2, 0x80]));
1384 assert!(is_utf8(&[0xDF, 0xBF]));
1385 assert!(is_utf8(&[0xE0, 0xA0, 0x80]));
1386 assert!(is_utf8(&[0xED, 0x9F, 0xBF]));
1387 assert!(is_utf8(&[0xEE, 0x80, 0x80]));
1388 assert!(is_utf8(&[0xEF, 0xBF, 0xBF]));
1389 assert!(is_utf8(&[0xF0, 0x90, 0x80, 0x80]));
1390 assert!(is_utf8(&[0xF4, 0x8F, 0xBF, 0xBF]));
1394 fn test_is_utf16() {
1395 macro_rules! pos ( ($($e:expr),*) => { { $(assert!(is_utf16($e));)* } });
1403 // surrogate pairs (randomly generated with Python 3's
1404 // .encode('utf-16be'))
1405 pos!(&[0xdb54, 0xdf16, 0xd880, 0xdee0, 0xdb6a, 0xdd45],
1406 &[0xd91f, 0xdeb1, 0xdb31, 0xdd84, 0xd8e2, 0xde14],
1407 &[0xdb9f, 0xdc26, 0xdb6f, 0xde58, 0xd850, 0xdfae]);
1409 // mixtures (also random)
1410 pos!(&[0xd921, 0xdcc2, 0x002d, 0x004d, 0xdb32, 0xdf65],
1411 &[0xdb45, 0xdd2d, 0x006a, 0xdacd, 0xddfe, 0x0006],
1412 &[0x0067, 0xd8ff, 0xddb7, 0x000f, 0xd900, 0xdc80]);
1415 macro_rules! neg ( ($($e:expr),*) => { { $(assert!(!is_utf16($e));)* } });
1418 // surrogate + regular unit
1420 // surrogate + lead surrogate
1422 // unterminated surrogate
1424 // trail surrogate without a lead
1427 // random byte sequences that Python 3's .decode('utf-16be')
1429 neg!(&[0x5b3d, 0x0141, 0xde9e, 0x8fdc, 0xc6e7],
1430 &[0xdf5a, 0x82a5, 0x62b9, 0xb447, 0x92f3],
1431 &[0xda4e, 0x42bc, 0x4462, 0xee98, 0xc2ca],
1432 &[0xbe00, 0xb04a, 0x6ecb, 0xdd89, 0xe278],
1433 &[0x0465, 0xab56, 0xdbb6, 0xa893, 0x665e],
1434 &[0x6b7f, 0x0a19, 0x40f4, 0xa657, 0xdcc5],
1435 &[0x9b50, 0xda5e, 0x24ec, 0x03ad, 0x6dee],
1436 &[0x8d17, 0xcaa7, 0xf4ae, 0xdf6e, 0xbed7],
1437 &[0xdaee, 0x2584, 0x7d30, 0xa626, 0x121a],
1438 &[0xd956, 0x4b43, 0x7570, 0xccd6, 0x4f4a],
1439 &[0x9dcf, 0x1b49, 0x4ba5, 0xfce9, 0xdffe],
1440 &[0x6572, 0xce53, 0xb05a, 0xf6af, 0xdacf],
1441 &[0x1b90, 0x728c, 0x9906, 0xdb68, 0xf46e],
1442 &[0x1606, 0xbeca, 0xbe76, 0x860f, 0xdfa5],
1443 &[0x8b4f, 0xde7a, 0xd220, 0x9fac, 0x2b6f],
1444 &[0xb8fe, 0xebbe, 0xda32, 0x1a5f, 0x8b8b],
1445 &[0x934b, 0x8956, 0xc434, 0x1881, 0xddf7],
1446 &[0x5a95, 0x13fc, 0xf116, 0xd89b, 0x93f9],
1447 &[0xd640, 0x71f1, 0xdd7d, 0x77eb, 0x1cd8],
1448 &[0x348b, 0xaef0, 0xdb2c, 0xebf1, 0x1282],
1449 &[0x50d7, 0xd824, 0x5010, 0xb369, 0x22ea]);
1453 fn test_as_bytes() {
1456 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1457 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1461 assert_eq!("".as_bytes(), b);
1462 assert_eq!("abc".as_bytes(), b"abc");
1463 assert_eq!("ศไทย中华Việt Nam".as_bytes(), v);
1468 fn test_as_bytes_fail() {
1469 // Don't double free. (I'm not sure if this exercises the
1470 // original problem code path anymore.)
1471 let s = String::from_str("");
1472 let _bytes = s.as_bytes();
1478 let buf = "hello".as_ptr();
1480 assert_eq!(*buf.offset(0), b'h');
1481 assert_eq!(*buf.offset(1), b'e');
1482 assert_eq!(*buf.offset(2), b'l');
1483 assert_eq!(*buf.offset(3), b'l');
1484 assert_eq!(*buf.offset(4), b'o');
1489 fn test_subslice_offset() {
1490 let a = "kernelsprite";
1491 let b = a.slice(7, a.len());
1492 let c = a.slice(0, a.len() - 6);
1493 assert_eq!(a.subslice_offset(b), 7);
1494 assert_eq!(a.subslice_offset(c), 0);
1496 let string = "a\nb\nc";
1497 let lines: Vec<&str> = string.lines().collect();
1498 assert_eq!(string.subslice_offset(lines[0]), 0);
1499 assert_eq!(string.subslice_offset(lines[1]), 2);
1500 assert_eq!(string.subslice_offset(lines[2]), 4);
1505 fn test_subslice_offset_2() {
1506 let a = "alchemiter";
1507 let b = "cruxtruder";
1508 a.subslice_offset(b);
1512 fn vec_str_conversions() {
1513 let s1: String = String::from_str("All mimsy were the borogoves");
1515 let v: Vec<u8> = s1.as_bytes().to_vec();
1516 let s2: String = String::from_str(from_utf8(v.as_slice()).unwrap());
1517 let mut i: uint = 0u;
1518 let n1: uint = s1.len();
1519 let n2: uint = v.len();
1522 let a: u8 = s1.as_bytes()[i];
1523 let b: u8 = s2.as_bytes()[i];
1532 fn test_contains() {
1533 assert!("abcde".contains("bcd"));
1534 assert!("abcde".contains("abcd"));
1535 assert!("abcde".contains("bcde"));
1536 assert!("abcde".contains(""));
1537 assert!("".contains(""));
1538 assert!(!"abcde".contains("def"));
1539 assert!(!"".contains("a"));
1541 let data = "ประเทศไทย中华Việt Nam";
1542 assert!(data.contains("ประเ"));
1543 assert!(data.contains("ะเ"));
1544 assert!(data.contains("中华"));
1545 assert!(!data.contains("ไท华"));
1549 fn test_contains_char() {
1550 assert!("abc".contains_char('b'));
1551 assert!("a".contains_char('a'));
1552 assert!(!"abc".contains_char('d'));
1553 assert!(!"".contains_char('a'));
1557 fn test_truncate_utf16_at_nul() {
1559 let b: &[u16] = &[];
1560 assert_eq!(truncate_utf16_at_nul(&v), b);
1563 assert_eq!(truncate_utf16_at_nul(&v), b);
1566 let b: &[u16] = &[1];
1567 assert_eq!(truncate_utf16_at_nul(&v), b);
1570 let b: &[u16] = &[1, 2];
1571 assert_eq!(truncate_utf16_at_nul(&v), b);
1574 let b: &[u16] = &[1, 2, 3];
1575 assert_eq!(truncate_utf16_at_nul(&v), b);
1580 let s = "ศไทย中华Việt Nam";
1581 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1583 for ch in v.iter() {
1584 assert!(s.char_at(pos) == *ch);
1585 pos += String::from_char(1, *ch).len();
1590 fn test_char_at_reverse() {
1591 let s = "ศไทย中华Việt Nam";
1592 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1593 let mut pos = s.len();
1594 for ch in v.iter().rev() {
1595 assert!(s.char_at_reverse(pos) == *ch);
1596 pos -= String::from_char(1, *ch).len();
1601 fn test_escape_unicode() {
1602 assert_eq!("abc".escape_unicode(), String::from_str("\\x61\\x62\\x63"));
1603 assert_eq!("a c".escape_unicode(), String::from_str("\\x61\\x20\\x63"));
1604 assert_eq!("\r\n\t".escape_unicode(), String::from_str("\\x0d\\x0a\\x09"));
1605 assert_eq!("'\"\\".escape_unicode(), String::from_str("\\x27\\x22\\x5c"));
1606 assert_eq!("\x00\x01\u00fe\u00ff".escape_unicode(),
1607 String::from_str("\\x00\\x01\\u00fe\\u00ff"));
1608 assert_eq!("\u0100\uffff".escape_unicode(), String::from_str("\\u0100\\uffff"));
1609 assert_eq!("\U00010000\U0010ffff".escape_unicode(),
1610 String::from_str("\\U00010000\\U0010ffff"));
1611 assert_eq!("ab\ufb00".escape_unicode(), String::from_str("\\x61\\x62\\ufb00"));
1612 assert_eq!("\U0001d4ea\r".escape_unicode(), String::from_str("\\U0001d4ea\\x0d"));
1616 fn test_escape_default() {
1617 assert_eq!("abc".escape_default(), String::from_str("abc"));
1618 assert_eq!("a c".escape_default(), String::from_str("a c"));
1619 assert_eq!("\r\n\t".escape_default(), String::from_str("\\r\\n\\t"));
1620 assert_eq!("'\"\\".escape_default(), String::from_str("\\'\\\"\\\\"));
1621 assert_eq!("\u0100\uffff".escape_default(), String::from_str("\\u0100\\uffff"));
1622 assert_eq!("\U00010000\U0010ffff".escape_default(),
1623 String::from_str("\\U00010000\\U0010ffff"));
1624 assert_eq!("ab\ufb00".escape_default(), String::from_str("ab\\ufb00"));
1625 assert_eq!("\U0001d4ea\r".escape_default(), String::from_str("\\U0001d4ea\\r"));
1629 fn test_total_ord() {
1630 "1234".cmp("123") == Greater;
1631 "123".cmp("1234") == Less;
1632 "1234".cmp("1234") == Equal;
1633 "12345555".cmp("123456") == Less;
1634 "22".cmp("1234") == Greater;
1638 fn test_char_range_at() {
1639 let data = "b¢€𤭢𤭢€¢b";
1640 assert_eq!('b', data.char_range_at(0).ch);
1641 assert_eq!('¢', data.char_range_at(1).ch);
1642 assert_eq!('€', data.char_range_at(3).ch);
1643 assert_eq!('𤭢', data.char_range_at(6).ch);
1644 assert_eq!('𤭢', data.char_range_at(10).ch);
1645 assert_eq!('€', data.char_range_at(14).ch);
1646 assert_eq!('¢', data.char_range_at(17).ch);
1647 assert_eq!('b', data.char_range_at(19).ch);
1651 fn test_char_range_at_reverse_underflow() {
1652 assert_eq!("abc".char_range_at_reverse(0).next, 0);
1656 fn test_iterator() {
1657 let s = "ศไทย中华Việt Nam";
1658 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1661 let mut it = s.chars();
1664 assert_eq!(c, v[pos]);
1667 assert_eq!(pos, v.len());
1671 fn test_rev_iterator() {
1672 let s = "ศไทย中华Việt Nam";
1673 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
1676 let mut it = s.chars().rev();
1679 assert_eq!(c, v[pos]);
1682 assert_eq!(pos, v.len());
1686 fn test_chars_decoding() {
1687 let mut bytes = [0u8, ..4];
1688 for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
1689 let len = c.encode_utf8(&mut bytes).unwrap_or(0);
1690 let s = ::core::str::from_utf8(bytes[..len]).unwrap();
1691 if Some(c) != s.chars().next() {
1692 panic!("character {:x}={} does not decode correctly", c as u32, c);
1698 fn test_chars_rev_decoding() {
1699 let mut bytes = [0u8, ..4];
1700 for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
1701 let len = c.encode_utf8(&mut bytes).unwrap_or(0);
1702 let s = ::core::str::from_utf8(bytes[..len]).unwrap();
1703 if Some(c) != s.chars().rev().next() {
1704 panic!("character {:x}={} does not decode correctly", c as u32, c);
1710 fn test_iterator_clone() {
1711 let s = "ศไทย中华Việt Nam";
1712 let mut it = s.chars();
1714 assert!(it.zip(it.clone()).all(|(x,y)| x == y));
1718 fn test_bytesator() {
1719 let s = "ศไทย中华Việt Nam";
1721 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1722 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1727 for b in s.bytes() {
1728 assert_eq!(b, v[pos]);
1734 fn test_bytes_revator() {
1735 let s = "ศไทย中华Việt Nam";
1737 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1738 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1741 let mut pos = v.len();
1743 for b in s.bytes().rev() {
1745 assert_eq!(b, v[pos]);
1750 fn test_char_indicesator() {
1751 let s = "ศไทย中华Việt Nam";
1752 let p = [0, 3, 6, 9, 12, 15, 18, 19, 20, 23, 24, 25, 26, 27];
1753 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1756 let mut it = s.char_indices();
1759 assert_eq!(c, (p[pos], v[pos]));
1762 assert_eq!(pos, v.len());
1763 assert_eq!(pos, p.len());
1767 fn test_char_indices_revator() {
1768 let s = "ศไทย中华Việt Nam";
1769 let p = [27, 26, 25, 24, 23, 20, 19, 18, 15, 12, 9, 6, 3, 0];
1770 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
1773 let mut it = s.char_indices().rev();
1776 assert_eq!(c, (p[pos], v[pos]));
1779 assert_eq!(pos, v.len());
1780 assert_eq!(pos, p.len());
1784 fn test_splitn_char_iterator() {
1785 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1787 let split: Vec<&str> = data.splitn(3, ' ').collect();
1788 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
1790 let split: Vec<&str> = data.splitn(3, |c: char| c == ' ').collect();
1791 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
1794 let split: Vec<&str> = data.splitn(3, 'ä').collect();
1795 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
1797 let split: Vec<&str> = data.splitn(3, |c: char| c == 'ä').collect();
1798 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
1802 fn test_split_char_iterator_no_trailing() {
1803 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1805 let split: Vec<&str> = data.split('\n').collect();
1806 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
1808 let split: Vec<&str> = data.split_terminator('\n').collect();
1809 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
1814 let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n";
1815 let words: Vec<&str> = data.words().collect();
1816 assert_eq!(words, vec!["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
1820 fn test_lev_distance() {
1821 use std::char::{ from_u32, MAX };
1822 // Test bytelength agnosticity
1823 for c in range(0u32, MAX as u32)
1824 .filter_map(|i| from_u32(i))
1825 .map(|i| String::from_char(1, i)) {
1826 assert_eq!(c[].lev_distance(c[]), 0);
1829 let a = "\nMäry häd ä little lämb\n\nLittle lämb\n";
1830 let b = "\nMary häd ä little lämb\n\nLittle lämb\n";
1831 let c = "Mary häd ä little lämb\n\nLittle lämb\n";
1832 assert_eq!(a.lev_distance(b), 1);
1833 assert_eq!(b.lev_distance(a), 1);
1834 assert_eq!(a.lev_distance(c), 2);
1835 assert_eq!(c.lev_distance(a), 2);
1836 assert_eq!(b.lev_distance(c), 1);
1837 assert_eq!(c.lev_distance(b), 1);
1841 fn test_nfd_chars() {
1843 ($input: expr, $expected: expr) => {
1844 assert_eq!($input.nfd_chars().collect::<String>(), $expected);
1848 t!("\u1e0b\u01c4", "d\u0307\u01c4");
1849 t!("\u2026", "\u2026");
1850 t!("\u2126", "\u03a9");
1851 t!("\u1e0b\u0323", "d\u0323\u0307");
1852 t!("\u1e0d\u0307", "d\u0323\u0307");
1853 t!("a\u0301", "a\u0301");
1854 t!("\u0301a", "\u0301a");
1855 t!("\ud4db", "\u1111\u1171\u11b6");
1856 t!("\uac1c", "\u1100\u1162");
1860 fn test_nfkd_chars() {
1862 ($input: expr, $expected: expr) => {
1863 assert_eq!($input.nfkd_chars().collect::<String>(), $expected);
1867 t!("\u1e0b\u01c4", "d\u0307DZ\u030c");
1868 t!("\u2026", "...");
1869 t!("\u2126", "\u03a9");
1870 t!("\u1e0b\u0323", "d\u0323\u0307");
1871 t!("\u1e0d\u0307", "d\u0323\u0307");
1872 t!("a\u0301", "a\u0301");
1873 t!("\u0301a", "\u0301a");
1874 t!("\ud4db", "\u1111\u1171\u11b6");
1875 t!("\uac1c", "\u1100\u1162");
1879 fn test_nfc_chars() {
1881 ($input: expr, $expected: expr) => {
1882 assert_eq!($input.nfc_chars().collect::<String>(), $expected);
1886 t!("\u1e0b\u01c4", "\u1e0b\u01c4");
1887 t!("\u2026", "\u2026");
1888 t!("\u2126", "\u03a9");
1889 t!("\u1e0b\u0323", "\u1e0d\u0307");
1890 t!("\u1e0d\u0307", "\u1e0d\u0307");
1891 t!("a\u0301", "\u00e1");
1892 t!("\u0301a", "\u0301a");
1893 t!("\ud4db", "\ud4db");
1894 t!("\uac1c", "\uac1c");
1895 t!("a\u0300\u0305\u0315\u05aeb", "\u00e0\u05ae\u0305\u0315b");
1899 fn test_nfkc_chars() {
1901 ($input: expr, $expected: expr) => {
1902 assert_eq!($input.nfkc_chars().collect::<String>(), $expected);
1906 t!("\u1e0b\u01c4", "\u1e0bD\u017d");
1907 t!("\u2026", "...");
1908 t!("\u2126", "\u03a9");
1909 t!("\u1e0b\u0323", "\u1e0d\u0307");
1910 t!("\u1e0d\u0307", "\u1e0d\u0307");
1911 t!("a\u0301", "\u00e1");
1912 t!("\u0301a", "\u0301a");
1913 t!("\ud4db", "\ud4db");
1914 t!("\uac1c", "\uac1c");
1915 t!("a\u0300\u0305\u0315\u05aeb", "\u00e0\u05ae\u0305\u0315b");
1920 let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
1921 let lines: Vec<&str> = data.lines().collect();
1922 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
1924 let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
1925 let lines: Vec<&str> = data.lines().collect();
1926 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
1930 fn test_graphemes() {
1931 use std::iter::order;
1932 // official Unicode test data
1933 // from http://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt
1934 let test_same: [(_, &[_]), .. 325] = [
1935 ("\u0020\u0020", &["\u0020", "\u0020"]), ("\u0020\u0308\u0020", &["\u0020\u0308",
1936 "\u0020"]), ("\u0020\u000D", &["\u0020", "\u000D"]), ("\u0020\u0308\u000D",
1937 &["\u0020\u0308", "\u000D"]), ("\u0020\u000A", &["\u0020", "\u000A"]),
1938 ("\u0020\u0308\u000A", &["\u0020\u0308", "\u000A"]), ("\u0020\u0001", &["\u0020",
1939 "\u0001"]), ("\u0020\u0308\u0001", &["\u0020\u0308", "\u0001"]), ("\u0020\u0300",
1940 &["\u0020\u0300"]), ("\u0020\u0308\u0300", &["\u0020\u0308\u0300"]), ("\u0020\u1100",
1941 &["\u0020", "\u1100"]), ("\u0020\u0308\u1100", &["\u0020\u0308", "\u1100"]),
1942 ("\u0020\u1160", &["\u0020", "\u1160"]), ("\u0020\u0308\u1160", &["\u0020\u0308",
1943 "\u1160"]), ("\u0020\u11A8", &["\u0020", "\u11A8"]), ("\u0020\u0308\u11A8",
1944 &["\u0020\u0308", "\u11A8"]), ("\u0020\uAC00", &["\u0020", "\uAC00"]),
1945 ("\u0020\u0308\uAC00", &["\u0020\u0308", "\uAC00"]), ("\u0020\uAC01", &["\u0020",
1946 "\uAC01"]), ("\u0020\u0308\uAC01", &["\u0020\u0308", "\uAC01"]), ("\u0020\U0001F1E6",
1947 &["\u0020", "\U0001F1E6"]), ("\u0020\u0308\U0001F1E6", &["\u0020\u0308",
1948 "\U0001F1E6"]), ("\u0020\u0378", &["\u0020", "\u0378"]), ("\u0020\u0308\u0378",
1949 &["\u0020\u0308", "\u0378"]), ("\u000D\u0020", &["\u000D", "\u0020"]),
1950 ("\u000D\u0308\u0020", &["\u000D", "\u0308", "\u0020"]), ("\u000D\u000D", &["\u000D",
1951 "\u000D"]), ("\u000D\u0308\u000D", &["\u000D", "\u0308", "\u000D"]), ("\u000D\u000A",
1952 &["\u000D\u000A"]), ("\u000D\u0308\u000A", &["\u000D", "\u0308", "\u000A"]),
1953 ("\u000D\u0001", &["\u000D", "\u0001"]), ("\u000D\u0308\u0001", &["\u000D", "\u0308",
1954 "\u0001"]), ("\u000D\u0300", &["\u000D", "\u0300"]), ("\u000D\u0308\u0300",
1955 &["\u000D", "\u0308\u0300"]), ("\u000D\u0903", &["\u000D", "\u0903"]),
1956 ("\u000D\u1100", &["\u000D", "\u1100"]), ("\u000D\u0308\u1100", &["\u000D", "\u0308",
1957 "\u1100"]), ("\u000D\u1160", &["\u000D", "\u1160"]), ("\u000D\u0308\u1160",
1958 &["\u000D", "\u0308", "\u1160"]), ("\u000D\u11A8", &["\u000D", "\u11A8"]),
1959 ("\u000D\u0308\u11A8", &["\u000D", "\u0308", "\u11A8"]), ("\u000D\uAC00", &["\u000D",
1960 "\uAC00"]), ("\u000D\u0308\uAC00", &["\u000D", "\u0308", "\uAC00"]), ("\u000D\uAC01",
1961 &["\u000D", "\uAC01"]), ("\u000D\u0308\uAC01", &["\u000D", "\u0308", "\uAC01"]),
1962 ("\u000D\U0001F1E6", &["\u000D", "\U0001F1E6"]), ("\u000D\u0308\U0001F1E6",
1963 &["\u000D", "\u0308", "\U0001F1E6"]), ("\u000D\u0378", &["\u000D", "\u0378"]),
1964 ("\u000D\u0308\u0378", &["\u000D", "\u0308", "\u0378"]), ("\u000A\u0020", &["\u000A",
1965 "\u0020"]), ("\u000A\u0308\u0020", &["\u000A", "\u0308", "\u0020"]), ("\u000A\u000D",
1966 &["\u000A", "\u000D"]), ("\u000A\u0308\u000D", &["\u000A", "\u0308", "\u000D"]),
1967 ("\u000A\u000A", &["\u000A", "\u000A"]), ("\u000A\u0308\u000A", &["\u000A", "\u0308",
1968 "\u000A"]), ("\u000A\u0001", &["\u000A", "\u0001"]), ("\u000A\u0308\u0001",
1969 &["\u000A", "\u0308", "\u0001"]), ("\u000A\u0300", &["\u000A", "\u0300"]),
1970 ("\u000A\u0308\u0300", &["\u000A", "\u0308\u0300"]), ("\u000A\u0903", &["\u000A",
1971 "\u0903"]), ("\u000A\u1100", &["\u000A", "\u1100"]), ("\u000A\u0308\u1100",
1972 &["\u000A", "\u0308", "\u1100"]), ("\u000A\u1160", &["\u000A", "\u1160"]),
1973 ("\u000A\u0308\u1160", &["\u000A", "\u0308", "\u1160"]), ("\u000A\u11A8", &["\u000A",
1974 "\u11A8"]), ("\u000A\u0308\u11A8", &["\u000A", "\u0308", "\u11A8"]), ("\u000A\uAC00",
1975 &["\u000A", "\uAC00"]), ("\u000A\u0308\uAC00", &["\u000A", "\u0308", "\uAC00"]),
1976 ("\u000A\uAC01", &["\u000A", "\uAC01"]), ("\u000A\u0308\uAC01", &["\u000A", "\u0308",
1977 "\uAC01"]), ("\u000A\U0001F1E6", &["\u000A", "\U0001F1E6"]),
1978 ("\u000A\u0308\U0001F1E6", &["\u000A", "\u0308", "\U0001F1E6"]), ("\u000A\u0378",
1979 &["\u000A", "\u0378"]), ("\u000A\u0308\u0378", &["\u000A", "\u0308", "\u0378"]),
1980 ("\u0001\u0020", &["\u0001", "\u0020"]), ("\u0001\u0308\u0020", &["\u0001", "\u0308",
1981 "\u0020"]), ("\u0001\u000D", &["\u0001", "\u000D"]), ("\u0001\u0308\u000D",
1982 &["\u0001", "\u0308", "\u000D"]), ("\u0001\u000A", &["\u0001", "\u000A"]),
1983 ("\u0001\u0308\u000A", &["\u0001", "\u0308", "\u000A"]), ("\u0001\u0001", &["\u0001",
1984 "\u0001"]), ("\u0001\u0308\u0001", &["\u0001", "\u0308", "\u0001"]), ("\u0001\u0300",
1985 &["\u0001", "\u0300"]), ("\u0001\u0308\u0300", &["\u0001", "\u0308\u0300"]),
1986 ("\u0001\u0903", &["\u0001", "\u0903"]), ("\u0001\u1100", &["\u0001", "\u1100"]),
1987 ("\u0001\u0308\u1100", &["\u0001", "\u0308", "\u1100"]), ("\u0001\u1160", &["\u0001",
1988 "\u1160"]), ("\u0001\u0308\u1160", &["\u0001", "\u0308", "\u1160"]), ("\u0001\u11A8",
1989 &["\u0001", "\u11A8"]), ("\u0001\u0308\u11A8", &["\u0001", "\u0308", "\u11A8"]),
1990 ("\u0001\uAC00", &["\u0001", "\uAC00"]), ("\u0001\u0308\uAC00", &["\u0001", "\u0308",
1991 "\uAC00"]), ("\u0001\uAC01", &["\u0001", "\uAC01"]), ("\u0001\u0308\uAC01",
1992 &["\u0001", "\u0308", "\uAC01"]), ("\u0001\U0001F1E6", &["\u0001", "\U0001F1E6"]),
1993 ("\u0001\u0308\U0001F1E6", &["\u0001", "\u0308", "\U0001F1E6"]), ("\u0001\u0378",
1994 &["\u0001", "\u0378"]), ("\u0001\u0308\u0378", &["\u0001", "\u0308", "\u0378"]),
1995 ("\u0300\u0020", &["\u0300", "\u0020"]), ("\u0300\u0308\u0020", &["\u0300\u0308",
1996 "\u0020"]), ("\u0300\u000D", &["\u0300", "\u000D"]), ("\u0300\u0308\u000D",
1997 &["\u0300\u0308", "\u000D"]), ("\u0300\u000A", &["\u0300", "\u000A"]),
1998 ("\u0300\u0308\u000A", &["\u0300\u0308", "\u000A"]), ("\u0300\u0001", &["\u0300",
1999 "\u0001"]), ("\u0300\u0308\u0001", &["\u0300\u0308", "\u0001"]), ("\u0300\u0300",
2000 &["\u0300\u0300"]), ("\u0300\u0308\u0300", &["\u0300\u0308\u0300"]), ("\u0300\u1100",
2001 &["\u0300", "\u1100"]), ("\u0300\u0308\u1100", &["\u0300\u0308", "\u1100"]),
2002 ("\u0300\u1160", &["\u0300", "\u1160"]), ("\u0300\u0308\u1160", &["\u0300\u0308",
2003 "\u1160"]), ("\u0300\u11A8", &["\u0300", "\u11A8"]), ("\u0300\u0308\u11A8",
2004 &["\u0300\u0308", "\u11A8"]), ("\u0300\uAC00", &["\u0300", "\uAC00"]),
2005 ("\u0300\u0308\uAC00", &["\u0300\u0308", "\uAC00"]), ("\u0300\uAC01", &["\u0300",
2006 "\uAC01"]), ("\u0300\u0308\uAC01", &["\u0300\u0308", "\uAC01"]), ("\u0300\U0001F1E6",
2007 &["\u0300", "\U0001F1E6"]), ("\u0300\u0308\U0001F1E6", &["\u0300\u0308",
2008 "\U0001F1E6"]), ("\u0300\u0378", &["\u0300", "\u0378"]), ("\u0300\u0308\u0378",
2009 &["\u0300\u0308", "\u0378"]), ("\u0903\u0020", &["\u0903", "\u0020"]),
2010 ("\u0903\u0308\u0020", &["\u0903\u0308", "\u0020"]), ("\u0903\u000D", &["\u0903",
2011 "\u000D"]), ("\u0903\u0308\u000D", &["\u0903\u0308", "\u000D"]), ("\u0903\u000A",
2012 &["\u0903", "\u000A"]), ("\u0903\u0308\u000A", &["\u0903\u0308", "\u000A"]),
2013 ("\u0903\u0001", &["\u0903", "\u0001"]), ("\u0903\u0308\u0001", &["\u0903\u0308",
2014 "\u0001"]), ("\u0903\u0300", &["\u0903\u0300"]), ("\u0903\u0308\u0300",
2015 &["\u0903\u0308\u0300"]), ("\u0903\u1100", &["\u0903", "\u1100"]),
2016 ("\u0903\u0308\u1100", &["\u0903\u0308", "\u1100"]), ("\u0903\u1160", &["\u0903",
2017 "\u1160"]), ("\u0903\u0308\u1160", &["\u0903\u0308", "\u1160"]), ("\u0903\u11A8",
2018 &["\u0903", "\u11A8"]), ("\u0903\u0308\u11A8", &["\u0903\u0308", "\u11A8"]),
2019 ("\u0903\uAC00", &["\u0903", "\uAC00"]), ("\u0903\u0308\uAC00", &["\u0903\u0308",
2020 "\uAC00"]), ("\u0903\uAC01", &["\u0903", "\uAC01"]), ("\u0903\u0308\uAC01",
2021 &["\u0903\u0308", "\uAC01"]), ("\u0903\U0001F1E6", &["\u0903", "\U0001F1E6"]),
2022 ("\u0903\u0308\U0001F1E6", &["\u0903\u0308", "\U0001F1E6"]), ("\u0903\u0378",
2023 &["\u0903", "\u0378"]), ("\u0903\u0308\u0378", &["\u0903\u0308", "\u0378"]),
2024 ("\u1100\u0020", &["\u1100", "\u0020"]), ("\u1100\u0308\u0020", &["\u1100\u0308",
2025 "\u0020"]), ("\u1100\u000D", &["\u1100", "\u000D"]), ("\u1100\u0308\u000D",
2026 &["\u1100\u0308", "\u000D"]), ("\u1100\u000A", &["\u1100", "\u000A"]),
2027 ("\u1100\u0308\u000A", &["\u1100\u0308", "\u000A"]), ("\u1100\u0001", &["\u1100",
2028 "\u0001"]), ("\u1100\u0308\u0001", &["\u1100\u0308", "\u0001"]), ("\u1100\u0300",
2029 &["\u1100\u0300"]), ("\u1100\u0308\u0300", &["\u1100\u0308\u0300"]), ("\u1100\u1100",
2030 &["\u1100\u1100"]), ("\u1100\u0308\u1100", &["\u1100\u0308", "\u1100"]),
2031 ("\u1100\u1160", &["\u1100\u1160"]), ("\u1100\u0308\u1160", &["\u1100\u0308",
2032 "\u1160"]), ("\u1100\u11A8", &["\u1100", "\u11A8"]), ("\u1100\u0308\u11A8",
2033 &["\u1100\u0308", "\u11A8"]), ("\u1100\uAC00", &["\u1100\uAC00"]),
2034 ("\u1100\u0308\uAC00", &["\u1100\u0308", "\uAC00"]), ("\u1100\uAC01",
2035 &["\u1100\uAC01"]), ("\u1100\u0308\uAC01", &["\u1100\u0308", "\uAC01"]),
2036 ("\u1100\U0001F1E6", &["\u1100", "\U0001F1E6"]), ("\u1100\u0308\U0001F1E6",
2037 &["\u1100\u0308", "\U0001F1E6"]), ("\u1100\u0378", &["\u1100", "\u0378"]),
2038 ("\u1100\u0308\u0378", &["\u1100\u0308", "\u0378"]), ("\u1160\u0020", &["\u1160",
2039 "\u0020"]), ("\u1160\u0308\u0020", &["\u1160\u0308", "\u0020"]), ("\u1160\u000D",
2040 &["\u1160", "\u000D"]), ("\u1160\u0308\u000D", &["\u1160\u0308", "\u000D"]),
2041 ("\u1160\u000A", &["\u1160", "\u000A"]), ("\u1160\u0308\u000A", &["\u1160\u0308",
2042 "\u000A"]), ("\u1160\u0001", &["\u1160", "\u0001"]), ("\u1160\u0308\u0001",
2043 &["\u1160\u0308", "\u0001"]), ("\u1160\u0300", &["\u1160\u0300"]),
2044 ("\u1160\u0308\u0300", &["\u1160\u0308\u0300"]), ("\u1160\u1100", &["\u1160",
2045 "\u1100"]), ("\u1160\u0308\u1100", &["\u1160\u0308", "\u1100"]), ("\u1160\u1160",
2046 &["\u1160\u1160"]), ("\u1160\u0308\u1160", &["\u1160\u0308", "\u1160"]),
2047 ("\u1160\u11A8", &["\u1160\u11A8"]), ("\u1160\u0308\u11A8", &["\u1160\u0308",
2048 "\u11A8"]), ("\u1160\uAC00", &["\u1160", "\uAC00"]), ("\u1160\u0308\uAC00",
2049 &["\u1160\u0308", "\uAC00"]), ("\u1160\uAC01", &["\u1160", "\uAC01"]),
2050 ("\u1160\u0308\uAC01", &["\u1160\u0308", "\uAC01"]), ("\u1160\U0001F1E6", &["\u1160",
2051 "\U0001F1E6"]), ("\u1160\u0308\U0001F1E6", &["\u1160\u0308", "\U0001F1E6"]),
2052 ("\u1160\u0378", &["\u1160", "\u0378"]), ("\u1160\u0308\u0378", &["\u1160\u0308",
2053 "\u0378"]), ("\u11A8\u0020", &["\u11A8", "\u0020"]), ("\u11A8\u0308\u0020",
2054 &["\u11A8\u0308", "\u0020"]), ("\u11A8\u000D", &["\u11A8", "\u000D"]),
2055 ("\u11A8\u0308\u000D", &["\u11A8\u0308", "\u000D"]), ("\u11A8\u000A", &["\u11A8",
2056 "\u000A"]), ("\u11A8\u0308\u000A", &["\u11A8\u0308", "\u000A"]), ("\u11A8\u0001",
2057 &["\u11A8", "\u0001"]), ("\u11A8\u0308\u0001", &["\u11A8\u0308", "\u0001"]),
2058 ("\u11A8\u0300", &["\u11A8\u0300"]), ("\u11A8\u0308\u0300", &["\u11A8\u0308\u0300"]),
2059 ("\u11A8\u1100", &["\u11A8", "\u1100"]), ("\u11A8\u0308\u1100", &["\u11A8\u0308",
2060 "\u1100"]), ("\u11A8\u1160", &["\u11A8", "\u1160"]), ("\u11A8\u0308\u1160",
2061 &["\u11A8\u0308", "\u1160"]), ("\u11A8\u11A8", &["\u11A8\u11A8"]),
2062 ("\u11A8\u0308\u11A8", &["\u11A8\u0308", "\u11A8"]), ("\u11A8\uAC00", &["\u11A8",
2063 "\uAC00"]), ("\u11A8\u0308\uAC00", &["\u11A8\u0308", "\uAC00"]), ("\u11A8\uAC01",
2064 &["\u11A8", "\uAC01"]), ("\u11A8\u0308\uAC01", &["\u11A8\u0308", "\uAC01"]),
2065 ("\u11A8\U0001F1E6", &["\u11A8", "\U0001F1E6"]), ("\u11A8\u0308\U0001F1E6",
2066 &["\u11A8\u0308", "\U0001F1E6"]), ("\u11A8\u0378", &["\u11A8", "\u0378"]),
2067 ("\u11A8\u0308\u0378", &["\u11A8\u0308", "\u0378"]), ("\uAC00\u0020", &["\uAC00",
2068 "\u0020"]), ("\uAC00\u0308\u0020", &["\uAC00\u0308", "\u0020"]), ("\uAC00\u000D",
2069 &["\uAC00", "\u000D"]), ("\uAC00\u0308\u000D", &["\uAC00\u0308", "\u000D"]),
2070 ("\uAC00\u000A", &["\uAC00", "\u000A"]), ("\uAC00\u0308\u000A", &["\uAC00\u0308",
2071 "\u000A"]), ("\uAC00\u0001", &["\uAC00", "\u0001"]), ("\uAC00\u0308\u0001",
2072 &["\uAC00\u0308", "\u0001"]), ("\uAC00\u0300", &["\uAC00\u0300"]),
2073 ("\uAC00\u0308\u0300", &["\uAC00\u0308\u0300"]), ("\uAC00\u1100", &["\uAC00",
2074 "\u1100"]), ("\uAC00\u0308\u1100", &["\uAC00\u0308", "\u1100"]), ("\uAC00\u1160",
2075 &["\uAC00\u1160"]), ("\uAC00\u0308\u1160", &["\uAC00\u0308", "\u1160"]),
2076 ("\uAC00\u11A8", &["\uAC00\u11A8"]), ("\uAC00\u0308\u11A8", &["\uAC00\u0308",
2077 "\u11A8"]), ("\uAC00\uAC00", &["\uAC00", "\uAC00"]), ("\uAC00\u0308\uAC00",
2078 &["\uAC00\u0308", "\uAC00"]), ("\uAC00\uAC01", &["\uAC00", "\uAC01"]),
2079 ("\uAC00\u0308\uAC01", &["\uAC00\u0308", "\uAC01"]), ("\uAC00\U0001F1E6", &["\uAC00",
2080 "\U0001F1E6"]), ("\uAC00\u0308\U0001F1E6", &["\uAC00\u0308", "\U0001F1E6"]),
2081 ("\uAC00\u0378", &["\uAC00", "\u0378"]), ("\uAC00\u0308\u0378", &["\uAC00\u0308",
2082 "\u0378"]), ("\uAC01\u0020", &["\uAC01", "\u0020"]), ("\uAC01\u0308\u0020",
2083 &["\uAC01\u0308", "\u0020"]), ("\uAC01\u000D", &["\uAC01", "\u000D"]),
2084 ("\uAC01\u0308\u000D", &["\uAC01\u0308", "\u000D"]), ("\uAC01\u000A", &["\uAC01",
2085 "\u000A"]), ("\uAC01\u0308\u000A", &["\uAC01\u0308", "\u000A"]), ("\uAC01\u0001",
2086 &["\uAC01", "\u0001"]), ("\uAC01\u0308\u0001", &["\uAC01\u0308", "\u0001"]),
2087 ("\uAC01\u0300", &["\uAC01\u0300"]), ("\uAC01\u0308\u0300", &["\uAC01\u0308\u0300"]),
2088 ("\uAC01\u1100", &["\uAC01", "\u1100"]), ("\uAC01\u0308\u1100", &["\uAC01\u0308",
2089 "\u1100"]), ("\uAC01\u1160", &["\uAC01", "\u1160"]), ("\uAC01\u0308\u1160",
2090 &["\uAC01\u0308", "\u1160"]), ("\uAC01\u11A8", &["\uAC01\u11A8"]),
2091 ("\uAC01\u0308\u11A8", &["\uAC01\u0308", "\u11A8"]), ("\uAC01\uAC00", &["\uAC01",
2092 "\uAC00"]), ("\uAC01\u0308\uAC00", &["\uAC01\u0308", "\uAC00"]), ("\uAC01\uAC01",
2093 &["\uAC01", "\uAC01"]), ("\uAC01\u0308\uAC01", &["\uAC01\u0308", "\uAC01"]),
2094 ("\uAC01\U0001F1E6", &["\uAC01", "\U0001F1E6"]), ("\uAC01\u0308\U0001F1E6",
2095 &["\uAC01\u0308", "\U0001F1E6"]), ("\uAC01\u0378", &["\uAC01", "\u0378"]),
2096 ("\uAC01\u0308\u0378", &["\uAC01\u0308", "\u0378"]), ("\U0001F1E6\u0020",
2097 &["\U0001F1E6", "\u0020"]), ("\U0001F1E6\u0308\u0020", &["\U0001F1E6\u0308",
2098 "\u0020"]), ("\U0001F1E6\u000D", &["\U0001F1E6", "\u000D"]),
2099 ("\U0001F1E6\u0308\u000D", &["\U0001F1E6\u0308", "\u000D"]), ("\U0001F1E6\u000A",
2100 &["\U0001F1E6", "\u000A"]), ("\U0001F1E6\u0308\u000A", &["\U0001F1E6\u0308",
2101 "\u000A"]), ("\U0001F1E6\u0001", &["\U0001F1E6", "\u0001"]),
2102 ("\U0001F1E6\u0308\u0001", &["\U0001F1E6\u0308", "\u0001"]), ("\U0001F1E6\u0300",
2103 &["\U0001F1E6\u0300"]), ("\U0001F1E6\u0308\u0300", &["\U0001F1E6\u0308\u0300"]),
2104 ("\U0001F1E6\u1100", &["\U0001F1E6", "\u1100"]), ("\U0001F1E6\u0308\u1100",
2105 &["\U0001F1E6\u0308", "\u1100"]), ("\U0001F1E6\u1160", &["\U0001F1E6", "\u1160"]),
2106 ("\U0001F1E6\u0308\u1160", &["\U0001F1E6\u0308", "\u1160"]), ("\U0001F1E6\u11A8",
2107 &["\U0001F1E6", "\u11A8"]), ("\U0001F1E6\u0308\u11A8", &["\U0001F1E6\u0308",
2108 "\u11A8"]), ("\U0001F1E6\uAC00", &["\U0001F1E6", "\uAC00"]),
2109 ("\U0001F1E6\u0308\uAC00", &["\U0001F1E6\u0308", "\uAC00"]), ("\U0001F1E6\uAC01",
2110 &["\U0001F1E6", "\uAC01"]), ("\U0001F1E6\u0308\uAC01", &["\U0001F1E6\u0308",
2111 "\uAC01"]), ("\U0001F1E6\U0001F1E6", &["\U0001F1E6\U0001F1E6"]),
2112 ("\U0001F1E6\u0308\U0001F1E6", &["\U0001F1E6\u0308", "\U0001F1E6"]),
2113 ("\U0001F1E6\u0378", &["\U0001F1E6", "\u0378"]), ("\U0001F1E6\u0308\u0378",
2114 &["\U0001F1E6\u0308", "\u0378"]), ("\u0378\u0020", &["\u0378", "\u0020"]),
2115 ("\u0378\u0308\u0020", &["\u0378\u0308", "\u0020"]), ("\u0378\u000D", &["\u0378",
2116 "\u000D"]), ("\u0378\u0308\u000D", &["\u0378\u0308", "\u000D"]), ("\u0378\u000A",
2117 &["\u0378", "\u000A"]), ("\u0378\u0308\u000A", &["\u0378\u0308", "\u000A"]),
2118 ("\u0378\u0001", &["\u0378", "\u0001"]), ("\u0378\u0308\u0001", &["\u0378\u0308",
2119 "\u0001"]), ("\u0378\u0300", &["\u0378\u0300"]), ("\u0378\u0308\u0300",
2120 &["\u0378\u0308\u0300"]), ("\u0378\u1100", &["\u0378", "\u1100"]),
2121 ("\u0378\u0308\u1100", &["\u0378\u0308", "\u1100"]), ("\u0378\u1160", &["\u0378",
2122 "\u1160"]), ("\u0378\u0308\u1160", &["\u0378\u0308", "\u1160"]), ("\u0378\u11A8",
2123 &["\u0378", "\u11A8"]), ("\u0378\u0308\u11A8", &["\u0378\u0308", "\u11A8"]),
2124 ("\u0378\uAC00", &["\u0378", "\uAC00"]), ("\u0378\u0308\uAC00", &["\u0378\u0308",
2125 "\uAC00"]), ("\u0378\uAC01", &["\u0378", "\uAC01"]), ("\u0378\u0308\uAC01",
2126 &["\u0378\u0308", "\uAC01"]), ("\u0378\U0001F1E6", &["\u0378", "\U0001F1E6"]),
2127 ("\u0378\u0308\U0001F1E6", &["\u0378\u0308", "\U0001F1E6"]), ("\u0378\u0378",
2128 &["\u0378", "\u0378"]), ("\u0378\u0308\u0378", &["\u0378\u0308", "\u0378"]),
2129 ("\u0061\U0001F1E6\u0062", &["\u0061", "\U0001F1E6", "\u0062"]),
2130 ("\U0001F1F7\U0001F1FA", &["\U0001F1F7\U0001F1FA"]),
2131 ("\U0001F1F7\U0001F1FA\U0001F1F8", &["\U0001F1F7\U0001F1FA\U0001F1F8"]),
2132 ("\U0001F1F7\U0001F1FA\U0001F1F8\U0001F1EA",
2133 &["\U0001F1F7\U0001F1FA\U0001F1F8\U0001F1EA"]),
2134 ("\U0001F1F7\U0001F1FA\u200B\U0001F1F8\U0001F1EA", &["\U0001F1F7\U0001F1FA", "\u200B",
2135 "\U0001F1F8\U0001F1EA"]), ("\U0001F1E6\U0001F1E7\U0001F1E8",
2136 &["\U0001F1E6\U0001F1E7\U0001F1E8"]), ("\U0001F1E6\u200D\U0001F1E7\U0001F1E8",
2137 &["\U0001F1E6\u200D", "\U0001F1E7\U0001F1E8"]),
2138 ("\U0001F1E6\U0001F1E7\u200D\U0001F1E8", &["\U0001F1E6\U0001F1E7\u200D",
2139 "\U0001F1E8"]), ("\u0020\u200D\u0646", &["\u0020\u200D", "\u0646"]),
2140 ("\u0646\u200D\u0020", &["\u0646\u200D", "\u0020"]),
2143 let test_diff: [(_, &[_], &[_]), .. 23] = [
2144 ("\u0020\u0903", &["\u0020\u0903"], &["\u0020", "\u0903"]), ("\u0020\u0308\u0903",
2145 &["\u0020\u0308\u0903"], &["\u0020\u0308", "\u0903"]), ("\u000D\u0308\u0903",
2146 &["\u000D", "\u0308\u0903"], &["\u000D", "\u0308", "\u0903"]), ("\u000A\u0308\u0903",
2147 &["\u000A", "\u0308\u0903"], &["\u000A", "\u0308", "\u0903"]), ("\u0001\u0308\u0903",
2148 &["\u0001", "\u0308\u0903"], &["\u0001", "\u0308", "\u0903"]), ("\u0300\u0903",
2149 &["\u0300\u0903"], &["\u0300", "\u0903"]), ("\u0300\u0308\u0903",
2150 &["\u0300\u0308\u0903"], &["\u0300\u0308", "\u0903"]), ("\u0903\u0903",
2151 &["\u0903\u0903"], &["\u0903", "\u0903"]), ("\u0903\u0308\u0903",
2152 &["\u0903\u0308\u0903"], &["\u0903\u0308", "\u0903"]), ("\u1100\u0903",
2153 &["\u1100\u0903"], &["\u1100", "\u0903"]), ("\u1100\u0308\u0903",
2154 &["\u1100\u0308\u0903"], &["\u1100\u0308", "\u0903"]), ("\u1160\u0903",
2155 &["\u1160\u0903"], &["\u1160", "\u0903"]), ("\u1160\u0308\u0903",
2156 &["\u1160\u0308\u0903"], &["\u1160\u0308", "\u0903"]), ("\u11A8\u0903",
2157 &["\u11A8\u0903"], &["\u11A8", "\u0903"]), ("\u11A8\u0308\u0903",
2158 &["\u11A8\u0308\u0903"], &["\u11A8\u0308", "\u0903"]), ("\uAC00\u0903",
2159 &["\uAC00\u0903"], &["\uAC00", "\u0903"]), ("\uAC00\u0308\u0903",
2160 &["\uAC00\u0308\u0903"], &["\uAC00\u0308", "\u0903"]), ("\uAC01\u0903",
2161 &["\uAC01\u0903"], &["\uAC01", "\u0903"]), ("\uAC01\u0308\u0903",
2162 &["\uAC01\u0308\u0903"], &["\uAC01\u0308", "\u0903"]), ("\U0001F1E6\u0903",
2163 &["\U0001F1E6\u0903"], &["\U0001F1E6", "\u0903"]), ("\U0001F1E6\u0308\u0903",
2164 &["\U0001F1E6\u0308\u0903"], &["\U0001F1E6\u0308", "\u0903"]), ("\u0378\u0903",
2165 &["\u0378\u0903"], &["\u0378", "\u0903"]), ("\u0378\u0308\u0903",
2166 &["\u0378\u0308\u0903"], &["\u0378\u0308", "\u0903"]),
2169 for &(s, g) in test_same.iter() {
2170 // test forward iterator
2171 assert!(order::equals(s.graphemes(true), g.iter().map(|&x| x)));
2172 assert!(order::equals(s.graphemes(false), g.iter().map(|&x| x)));
2174 // test reverse iterator
2175 assert!(order::equals(s.graphemes(true).rev(), g.iter().rev().map(|&x| x)));
2176 assert!(order::equals(s.graphemes(false).rev(), g.iter().rev().map(|&x| x)));
2179 for &(s, gt, gf) in test_diff.iter() {
2180 // test forward iterator
2181 assert!(order::equals(s.graphemes(true), gt.iter().map(|&x| x)));
2182 assert!(order::equals(s.graphemes(false), gf.iter().map(|&x| x)));
2184 // test reverse iterator
2185 assert!(order::equals(s.graphemes(true).rev(), gt.iter().rev().map(|&x| x)));
2186 assert!(order::equals(s.graphemes(false).rev(), gf.iter().rev().map(|&x| x)));
2189 // test the indices iterators
2190 let s = "a̐éö̲\r\n";
2191 let gr_inds = s.grapheme_indices(true).collect::<Vec<(uint, &str)>>();
2192 let b: &[_] = &[(0u, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
2193 assert_eq!(gr_inds, b);
2194 let gr_inds = s.grapheme_indices(true).rev().collect::<Vec<(uint, &str)>>();
2195 let b: &[_] = &[(11, "\r\n"), (6, "ö̲"), (3, "é"), (0u, "a̐")];
2196 assert_eq!(gr_inds, b);
2197 let mut gr_inds_iter = s.grapheme_indices(true);
2199 let gr_inds = gr_inds_iter.by_ref();
2200 let e1 = gr_inds.size_hint();
2201 assert_eq!(e1, (1, Some(13)));
2202 let c = gr_inds.count();
2205 let e2 = gr_inds_iter.size_hint();
2206 assert_eq!(e2, (0, Some(0)));
2208 // make sure the reverse iterator does the right thing with "\n" at beginning of string
2210 let gr = s.graphemes(true).rev().collect::<Vec<&str>>();
2211 let b: &[_] = &["\r", "\r\n", "\n"];
2216 fn test_split_strator() {
2217 fn t(s: &str, sep: &str, u: &[&str]) {
2218 let v: Vec<&str> = s.split_str(sep).collect();
2221 t("--1233345--", "12345", &["--1233345--"]);
2222 t("abc::hello::there", "::", &["abc", "hello", "there"]);
2223 t("::hello::there", "::", &["", "hello", "there"]);
2224 t("hello::there::", "::", &["hello", "there", ""]);
2225 t("::hello::there::", "::", &["", "hello", "there", ""]);
2226 t("ประเทศไทย中华Việt Nam", "中华", &["ประเทศไทย", "Việt Nam"]);
2227 t("zzXXXzzYYYzz", "zz", &["", "XXX", "YYY", ""]);
2228 t("zzXXXzYYYz", "XXX", &["zz", "zYYYz"]);
2229 t(".XXX.YYY.", ".", &["", "XXX", "YYY", ""]);
2231 t("zz", "zz", &["",""]);
2232 t("ok", "z", &["ok"]);
2233 t("zzz", "zz", &["","z"]);
2234 t("zzzzz", "zz", &["","","z"]);
2238 fn test_str_default() {
2239 use std::default::Default;
2240 fn t<S: Default + Str>() {
2241 let s: S = Default::default();
2242 assert_eq!(s.as_slice(), "");
2250 fn test_str_container() {
2251 fn sum_len(v: &[&str]) -> uint {
2252 v.iter().map(|x| x.len()).sum()
2255 let s = String::from_str("01234");
2256 assert_eq!(5, sum_len(&["012", "", "34"]));
2257 assert_eq!(5, sum_len(&[String::from_str("01").as_slice(),
2258 String::from_str("2").as_slice(),
2259 String::from_str("34").as_slice(),
2260 String::from_str("").as_slice()]));
2261 assert_eq!(5, sum_len(&[s.as_slice()]));
2265 fn test_str_from_utf8() {
2267 assert_eq!(from_utf8(xs), Some("hello"));
2269 let xs = "ศไทย中华Việt Nam".as_bytes();
2270 assert_eq!(from_utf8(xs), Some("ศไทย中华Việt Nam"));
2272 let xs = b"hello\xFF";
2273 assert_eq!(from_utf8(xs), None);
2277 fn test_maybe_owned_traits() {
2278 let s = Slice("abcde");
2279 assert_eq!(s.len(), 5);
2280 assert_eq!(s.as_slice(), "abcde");
2281 assert_eq!(String::from_str(s.as_slice()).as_slice(), "abcde");
2282 assert_eq!(format!("{}", s).as_slice(), "abcde");
2283 assert!(s.lt(&Owned(String::from_str("bcdef"))));
2284 assert_eq!(Slice(""), Default::default());
2286 let o = Owned(String::from_str("abcde"));
2287 assert_eq!(o.len(), 5);
2288 assert_eq!(o.as_slice(), "abcde");
2289 assert_eq!(String::from_str(o.as_slice()).as_slice(), "abcde");
2290 assert_eq!(format!("{}", o).as_slice(), "abcde");
2291 assert!(o.lt(&Slice("bcdef")));
2292 assert_eq!(Owned(String::from_str("")), Default::default());
2294 assert!(s.cmp(&o) == Equal);
2295 assert!(s.equiv(&o));
2297 assert!(o.cmp(&s) == Equal);
2298 assert!(o.equiv(&s));
2302 fn test_maybe_owned_methods() {
2303 let s = Slice("abcde");
2304 assert!(s.is_slice());
2305 assert!(!s.is_owned());
2307 let o = Owned(String::from_str("abcde"));
2308 assert!(!o.is_slice());
2309 assert!(o.is_owned());
2313 fn test_maybe_owned_clone() {
2314 assert_eq!(Owned(String::from_str("abcde")), Slice("abcde").clone());
2315 assert_eq!(Owned(String::from_str("abcde")), Owned(String::from_str("abcde")).clone());
2316 assert_eq!(Slice("abcde"), Slice("abcde").clone());
2317 assert_eq!(Slice("abcde"), Owned(String::from_str("abcde")).clone());
2321 fn test_maybe_owned_into_string() {
2322 assert_eq!(Slice("abcde").into_string(), String::from_str("abcde"));
2323 assert_eq!(Owned(String::from_str("abcde")).into_string(),
2324 String::from_str("abcde"));
2328 fn test_into_maybe_owned() {
2329 assert_eq!("abcde".into_maybe_owned(), Slice("abcde"));
2330 assert_eq!((String::from_str("abcde")).into_maybe_owned(), Slice("abcde"));
2331 assert_eq!("abcde".into_maybe_owned(), Owned(String::from_str("abcde")));
2332 assert_eq!((String::from_str("abcde")).into_maybe_owned(),
2333 Owned(String::from_str("abcde")));
2340 use test::black_box;
2342 use std::iter::{IteratorExt, DoubleEndedIteratorExt};
2343 use std::str::StrPrelude;
2344 use std::slice::SlicePrelude;
2347 fn char_iterator(b: &mut Bencher) {
2348 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2350 b.iter(|| s.chars().count());
2354 fn char_iterator_for(b: &mut Bencher) {
2355 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2358 for ch in s.chars() { black_box(ch) }
2363 fn char_iterator_ascii(b: &mut Bencher) {
2364 let s = "Mary had a little lamb, Little lamb
2365 Mary had a little lamb, Little lamb
2366 Mary had a little lamb, Little lamb
2367 Mary had a little lamb, Little lamb
2368 Mary had a little lamb, Little lamb
2369 Mary had a little lamb, Little lamb";
2371 b.iter(|| s.chars().count());
2375 fn char_iterator_rev(b: &mut Bencher) {
2376 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2378 b.iter(|| s.chars().rev().count());
2382 fn char_iterator_rev_for(b: &mut Bencher) {
2383 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2386 for ch in s.chars().rev() { black_box(ch) }
2391 fn char_indicesator(b: &mut Bencher) {
2392 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2393 let len = s.char_len();
2395 b.iter(|| assert_eq!(s.char_indices().count(), len));
2399 fn char_indicesator_rev(b: &mut Bencher) {
2400 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2401 let len = s.char_len();
2403 b.iter(|| assert_eq!(s.char_indices().rev().count(), len));
2407 fn split_unicode_ascii(b: &mut Bencher) {
2408 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2410 b.iter(|| assert_eq!(s.split('V').count(), 3));
2414 fn split_unicode_not_ascii(b: &mut Bencher) {
2415 struct NotAscii(char);
2416 impl CharEq for NotAscii {
2417 fn matches(&mut self, c: char) -> bool {
2418 let NotAscii(cc) = *self;
2421 fn only_ascii(&self) -> bool { false }
2423 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2425 b.iter(|| assert_eq!(s.split(NotAscii('V')).count(), 3));
2430 fn split_ascii(b: &mut Bencher) {
2431 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2432 let len = s.split(' ').count();
2434 b.iter(|| assert_eq!(s.split(' ').count(), len));
2438 fn split_not_ascii(b: &mut Bencher) {
2439 struct NotAscii(char);
2440 impl CharEq for NotAscii {
2442 fn matches(&mut self, c: char) -> bool {
2443 let NotAscii(cc) = *self;
2446 fn only_ascii(&self) -> bool { false }
2448 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2449 let len = s.split(' ').count();
2451 b.iter(|| assert_eq!(s.split(NotAscii(' ')).count(), len));
2455 fn split_extern_fn(b: &mut Bencher) {
2456 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2457 let len = s.split(' ').count();
2458 fn pred(c: char) -> bool { c == ' ' }
2460 b.iter(|| assert_eq!(s.split(pred).count(), len));
2464 fn split_closure(b: &mut Bencher) {
2465 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2466 let len = s.split(' ').count();
2468 b.iter(|| assert_eq!(s.split(|c: char| c == ' ').count(), len));
2472 fn split_slice(b: &mut Bencher) {
2473 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2474 let len = s.split(' ').count();
2476 let c: &[char] = &[' '];
2477 b.iter(|| assert_eq!(s.split(c).count(), len));
2481 fn is_utf8_100_ascii(b: &mut Bencher) {
2483 let s = b"Hello there, the quick brown fox jumped over the lazy dog! \
2484 Lorem ipsum dolor sit amet, consectetur. ";
2486 assert_eq!(100, s.len());
2493 fn is_utf8_100_multibyte(b: &mut Bencher) {
2494 let s = "𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰".as_bytes();
2495 assert_eq!(100, s.len());
2502 fn bench_connect(b: &mut Bencher) {
2503 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2505 let v = [s, s, s, s, s, s, s, s, s, s];
2507 assert_eq!(v.connect(sep).len(), s.len() * 10 + sep.len() * 9);
2512 fn bench_contains_short_short(b: &mut Bencher) {
2513 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2517 assert!(haystack.contains(needle));
2522 fn bench_contains_short_long(b: &mut Bencher) {
2524 Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \
2525 ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \
2526 eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \
2527 sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \
2528 tempus vel, gravida nec quam.
2530 In est dui, tincidunt sed tempus interdum, adipiscing laoreet ante. Etiam tempor, tellus quis \
2531 sagittis interdum, nulla purus mattis sem, quis auctor erat odio ac tellus. In nec nunc sit amet \
2532 diam volutpat molestie at sed ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan \
2533 lorem ac dignissim placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat. In vel \
2534 eleifend felis. Sed suscipit nulla lorem, sed mollis est sollicitudin et. Nam fermentum egestas \
2535 interdum. Curabitur ut nisi justo.
2537 Sed sollicitudin ipsum tellus, ut condimentum leo eleifend nec. Cras ut velit ante. Phasellus nec \
2538 mollis odio. Mauris molestie erat in arcu mattis, at aliquet dolor vehicula. Quisque malesuada \
2539 lectus sit amet nisi pretium, a condimentum ipsum porta. Morbi at dapibus diam. Praesent egestas \
2540 est sed risus elementum, eu rutrum metus ultrices. Etiam fermentum consectetur magna, id rutrum \
2541 felis accumsan a. Aliquam ut pellentesque libero. Sed mi nulla, lobortis eu tortor id, suscipit \
2542 ultricies neque. Morbi iaculis sit amet risus at iaculis. Praesent eget ligula quis turpis \
2543 feugiat suscipit vel non arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. \
2544 Aliquam sit amet placerat lorem.
2546 Cras a lacus vel ante posuere elementum. Nunc est leo, bibendum ut facilisis vel, bibendum at \
2547 mauris. Nullam adipiscing diam vel odio ornare, luctus adipiscing mi luctus. Nulla facilisi. \
2548 Mauris adipiscing bibendum neque, quis adipiscing lectus tempus et. Sed feugiat erat et nisl \
2549 lobortis pharetra. Donec vitae erat enim. Nullam sit amet felis et quam lacinia tincidunt. Aliquam \
2550 suscipit dapibus urna. Sed volutpat urna in magna pulvinar volutpat. Phasellus nec tellus ac diam \
2553 Nam lectus enim, dapibus non nisi tempor, consectetur convallis massa. Maecenas eleifend dictum \
2554 feugiat. Etiam quis mauris vel risus luctus mattis a a nunc. Nullam orci quam, imperdiet id \
2555 vehicula in, porttitor ut nibh. Duis sagittis adipiscing nisl vitae congue. Donec mollis risus eu \
2556 leo suscipit, varius porttitor nulla porta. Pellentesque ut sem nec nisi euismod vehicula. Nulla \
2557 malesuada sollicitudin quam eu fermentum.";
2558 let needle = "english";
2561 assert!(!haystack.contains(needle));
2566 fn bench_contains_bad_naive(b: &mut Bencher) {
2567 let haystack = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
2568 let needle = "aaaaaaaab";
2571 assert!(!haystack.contains(needle));
2576 fn bench_contains_equal(b: &mut Bencher) {
2577 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2578 let needle = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2581 assert!(haystack.contains(needle));