1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 // ignore-lexer-test FIXME #15679
13 //! Unicode string manipulation (`str` type)
17 //! Rust's string type is one of the core primitive types of the language. While
18 //! represented by the name `str`, the name `str` is not actually a valid type in
19 //! Rust. Each string must also be decorated with a pointer. `String` is used
20 //! for an owned string, so there is only one commonly-used `str` type in Rust:
23 //! `&str` is the borrowed string type. This type of string can only be created
24 //! from other strings, unless it is a static string (see below). As the word
25 //! "borrowed" implies, this type of string is owned elsewhere, and this string
26 //! cannot be moved out of.
28 //! As an example, here's some code that uses a string.
32 //! let borrowed_string = "This string is borrowed with the 'static lifetime";
36 //! From the example above, you can guess that Rust's string literals have the
37 //! `'static` lifetime. This is akin to C's concept of a static string.
38 //! More precisely, string literals are immutable views with a 'static lifetime
39 //! (otherwise known as the lifetime of the entire program), and thus have the
40 //! type `&'static str`.
44 //! Rust's string type, `str`, is a sequence of Unicode scalar values encoded as a
45 //! stream of UTF-8 bytes. All [strings](../../reference.html#literals) are
46 //! guaranteed to be validly encoded UTF-8 sequences. Additionally, strings are
47 //! not null-terminated and can thus contain null bytes.
49 //! The actual representation of strings have direct mappings to slices: `&str`
50 //! is the same as `&[u8]`.
52 #![doc(primitive = "str")]
56 pub use self::MaybeOwned::*;
57 use self::RecompositionState::*;
58 use self::DecompositionType::*;
60 use core::borrow::{BorrowFrom, Cow, ToOwned};
61 use core::default::Default;
64 use core::iter::AdditiveIterator;
67 use ring_buf::RingBuf;
72 pub use core::str::{from_utf8, CharEq, Chars, CharOffsets};
73 pub use core::str::{Bytes, CharSplits};
74 pub use core::str::{CharSplitsN, AnyLines, MatchIndices, StrSplits};
75 pub use core::str::{Utf16Encoder, Utf16CodeUnits};
76 pub use core::str::{eq_slice, is_utf8, is_utf16, Utf16Items};
77 pub use core::str::{Utf16Item, ScalarValue, LoneSurrogate, utf16_items};
78 pub use core::str::{truncate_utf16_at_nul, utf8_char_width, CharRange};
79 pub use core::str::{FromStr, from_str};
80 pub use core::str::{Str, StrPrelude};
81 pub use core::str::{from_utf8_unchecked, from_c_str};
82 pub use unicode::str::{UnicodeStrPrelude, Words, Graphemes, GraphemeIndices};
84 // FIXME(conventions): ensure bit/char conventions are followed by str's API
87 Section: Creating a string
90 /// Methods for vectors of strings.
91 pub trait StrVector for Sized? {
92 /// Concatenates a vector of strings.
97 /// let first = "Restaurant at the End of the".to_string();
98 /// let second = " Universe".to_string();
99 /// let string_vec = vec![first, second];
100 /// assert_eq!(string_vec.concat(), "Restaurant at the End of the Universe".to_string());
102 fn concat(&self) -> String;
104 /// Concatenates a vector of strings, placing a given separator between each.
109 /// let first = "Roast".to_string();
110 /// let second = "Sirloin Steak".to_string();
111 /// let string_vec = vec![first, second];
112 /// assert_eq!(string_vec.connect(", "), "Roast, Sirloin Steak".to_string());
114 fn connect(&self, sep: &str) -> String;
117 impl<S: Str> StrVector for [S] {
118 fn concat(&self) -> String {
120 return String::new();
123 // `len` calculation may overflow but push_str will check boundaries
124 let len = self.iter().map(|s| s.as_slice().len()).sum();
126 let mut result = String::with_capacity(len);
128 for s in self.iter() {
129 result.push_str(s.as_slice())
135 fn connect(&self, sep: &str) -> String {
137 return String::new();
142 return self.concat();
145 // this is wrong without the guarantee that `self` is non-empty
146 // `len` calculation may overflow but push_str but will check boundaries
147 let len = sep.len() * (self.len() - 1)
148 + self.iter().map(|s| s.as_slice().len()).sum();
149 let mut result = String::with_capacity(len);
150 let mut first = true;
152 for s in self.iter() {
156 result.push_str(sep);
158 result.push_str(s.as_slice());
164 impl<S: Str, T: AsSlice<S>> StrVector for T {
166 fn concat(&self) -> String {
167 self.as_slice().concat()
171 fn connect(&self, sep: &str) -> String {
172 self.as_slice().connect(sep)
180 // Helper functions used for Unicode normalization
181 fn canonical_sort(comb: &mut [(char, u8)]) {
182 let len = comb.len();
183 for i in range(0, len) {
184 let mut swapped = false;
185 for j in range(1, len-i) {
186 let class_a = comb[j-1].1;
187 let class_b = comb[j].1;
188 if class_a != 0 && class_b != 0 && class_a > class_b {
193 if !swapped { break; }
198 enum DecompositionType {
203 /// External iterator for a string's decomposition's characters.
204 /// Use with the `std::iter` module.
206 pub struct Decompositions<'a> {
207 kind: DecompositionType,
209 buffer: Vec<(char, u8)>,
213 impl<'a> Iterator<char> for Decompositions<'a> {
215 fn next(&mut self) -> Option<char> {
216 match self.buffer.head() {
219 self.buffer.remove(0);
222 Some(&(c, _)) if self.sorted => {
223 self.buffer.remove(0);
226 _ => self.sorted = false
230 for ch in self.iter {
231 let buffer = &mut self.buffer;
232 let sorted = &mut self.sorted;
236 unicode::char::canonical_combining_class(d);
237 if class == 0 && !*sorted {
238 canonical_sort(buffer.as_mut_slice());
241 buffer.push((d, class));
245 unicode::char::decompose_canonical(ch, callback)
248 unicode::char::decompose_compatible(ch, callback)
259 canonical_sort(self.buffer.as_mut_slice());
263 match self.buffer.remove(0) {
268 Some((c, _)) => Some(c),
273 fn size_hint(&self) -> (uint, Option<uint>) {
274 let (lower, _) = self.iter.size_hint();
280 enum RecompositionState {
286 /// External iterator for a string's recomposition's characters.
287 /// Use with the `std::iter` module.
289 pub struct Recompositions<'a> {
290 iter: Decompositions<'a>,
291 state: RecompositionState,
292 buffer: RingBuf<char>,
293 composee: Option<char>,
297 impl<'a> Iterator<char> for Recompositions<'a> {
299 fn next(&mut self) -> Option<char> {
303 for ch in self.iter {
304 let ch_class = unicode::char::canonical_combining_class(ch);
305 if self.composee.is_none() {
309 self.composee = Some(ch);
312 let k = self.composee.clone().unwrap();
314 match self.last_ccc {
316 match unicode::char::compose(k, ch) {
318 self.composee = Some(r);
323 self.composee = Some(ch);
326 self.buffer.push_back(ch);
327 self.last_ccc = Some(ch_class);
332 if l_class >= ch_class {
333 // `ch` is blocked from `composee`
335 self.composee = Some(ch);
336 self.last_ccc = None;
337 self.state = Purging;
340 self.buffer.push_back(ch);
341 self.last_ccc = Some(ch_class);
344 match unicode::char::compose(k, ch) {
346 self.composee = Some(r);
350 self.buffer.push_back(ch);
351 self.last_ccc = Some(ch_class);
357 self.state = Finished;
358 if self.composee.is_some() {
359 return self.composee.take();
363 match self.buffer.pop_front() {
364 None => self.state = Composing,
369 match self.buffer.pop_front() {
370 None => return self.composee.take(),
379 /// Replaces all occurrences of one string with another.
383 /// * s - The string containing substrings to replace
384 /// * from - The string to replace
385 /// * to - The replacement string
389 /// The original string with all occurrences of `from` replaced with `to`.
395 /// let string = "orange";
396 /// let new_string = str::replace(string, "or", "str");
397 /// assert_eq!(new_string.as_slice(), "strange");
399 pub fn replace(s: &str, from: &str, to: &str) -> String {
400 let mut result = String::new();
401 let mut last_end = 0;
402 for (start, end) in s.match_indices(from) {
403 result.push_str(unsafe { s.slice_unchecked(last_end, start) });
407 result.push_str(unsafe { s.slice_unchecked(last_end, s.len()) });
415 // Return the initial codepoint accumulator for the first byte.
416 // The first byte is special, only want bottom 5 bits for width 2, 4 bits
417 // for width 3, and 3 bits for width 4
418 macro_rules! utf8_first_byte(
419 ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as u32)
422 // return the value of $ch updated with continuation byte $byte
423 macro_rules! utf8_acc_cont_byte(
424 ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as u32)
431 /// A string type that can hold either a `String` or a `&str`.
432 /// This can be useful as an optimization when an allocation is sometimes
433 /// needed but not always.
434 #[deprecated = "use std::str::CowString"]
435 pub enum MaybeOwned<'a> {
436 /// A borrowed string.
442 /// A specialization of `CowString` to be sendable.
443 pub type SendStr = CowString<'static>;
445 #[deprecated = "use std::str::CowString"]
446 impl<'a> MaybeOwned<'a> {
447 /// Returns `true` if this `MaybeOwned` wraps an owned string.
452 /// let string = String::from_str("orange");
453 /// let maybe_owned_string = string.into_maybe_owned();
454 /// assert_eq!(true, maybe_owned_string.is_owned());
457 pub fn is_owned(&self) -> bool {
464 /// Returns `true` if this `MaybeOwned` wraps a borrowed string.
469 /// let string = "orange";
470 /// let maybe_owned_string = string.as_slice().into_maybe_owned();
471 /// assert_eq!(true, maybe_owned_string.is_slice());
474 pub fn is_slice(&self) -> bool {
481 /// Return the number of bytes in this string.
483 pub fn len(&self) -> uint { self.as_slice().len() }
485 /// Returns true if the string contains no bytes
488 pub fn is_empty(&self) -> bool { self.len() == 0 }
491 #[deprecated = "use std::borrow::IntoCow"]
492 /// Trait for moving into a `MaybeOwned`.
493 pub trait IntoMaybeOwned<'a> {
494 /// Moves `self` into a `MaybeOwned`.
495 fn into_maybe_owned(self) -> MaybeOwned<'a>;
498 #[deprecated = "use std::borrow::IntoCow"]
500 impl<'a> IntoMaybeOwned<'a> for String {
504 /// let owned_string = String::from_str("orange");
505 /// let maybe_owned_string = owned_string.into_maybe_owned();
506 /// assert_eq!(true, maybe_owned_string.is_owned());
510 fn into_maybe_owned(self) -> MaybeOwned<'a> {
515 #[deprecated = "use std::borrow::IntoCow"]
517 impl<'a> IntoMaybeOwned<'a> for &'a str {
521 /// let string = "orange";
522 /// let maybe_owned_str = string.as_slice().into_maybe_owned();
523 /// assert_eq!(false, maybe_owned_str.is_owned());
527 fn into_maybe_owned(self) -> MaybeOwned<'a> { Slice(self) }
531 #[deprecated = "use std::borrow::IntoCow"]
532 impl<'a> IntoMaybeOwned<'a> for MaybeOwned<'a> {
536 /// let str = "orange";
537 /// let maybe_owned_str = str.as_slice().into_maybe_owned();
538 /// let maybe_maybe_owned_str = maybe_owned_str.into_maybe_owned();
539 /// assert_eq!(false, maybe_maybe_owned_str.is_owned());
542 fn into_maybe_owned(self) -> MaybeOwned<'a> { self }
545 #[deprecated = "use std::str::CowString"]
546 impl<'a> PartialEq for MaybeOwned<'a> {
548 fn eq(&self, other: &MaybeOwned) -> bool {
549 self.as_slice() == other.as_slice()
553 #[deprecated = "use std::str::CowString"]
554 impl<'a> Eq for MaybeOwned<'a> {}
556 #[deprecated = "use std::str::CowString"]
557 impl<'a> PartialOrd for MaybeOwned<'a> {
559 fn partial_cmp(&self, other: &MaybeOwned) -> Option<Ordering> {
560 Some(self.cmp(other))
564 #[deprecated = "use std::str::CowString"]
565 impl<'a> Ord for MaybeOwned<'a> {
567 fn cmp(&self, other: &MaybeOwned) -> Ordering {
568 self.as_slice().cmp(other.as_slice())
573 #[deprecated = "use std::str::CowString"]
574 impl<'a, S: Str> Equiv<S> for MaybeOwned<'a> {
576 fn equiv(&self, other: &S) -> bool {
577 self.as_slice() == other.as_slice()
581 #[deprecated = "use std::str::CowString"]
582 impl<'a> Str for MaybeOwned<'a> {
585 fn as_slice<'b>(&'b self) -> &'b str {
588 Owned(ref s) => s.as_slice()
593 #[deprecated = "use std::str::CowString"]
594 impl<'a> StrAllocating for MaybeOwned<'a> {
597 fn into_string(self) -> String {
599 Slice(s) => String::from_str(s),
605 #[deprecated = "use std::str::CowString"]
606 impl<'a> Clone for MaybeOwned<'a> {
609 fn clone(&self) -> MaybeOwned<'a> {
611 Slice(s) => Slice(s),
612 Owned(ref s) => Owned(String::from_str(s.as_slice()))
617 #[deprecated = "use std::str::CowString"]
618 impl<'a> Default for MaybeOwned<'a> {
621 fn default() -> MaybeOwned<'a> { Slice("") }
624 #[deprecated = "use std::str::CowString"]
625 impl<'a, H: hash::Writer> hash::Hash<H> for MaybeOwned<'a> {
627 fn hash(&self, hasher: &mut H) {
628 self.as_slice().hash(hasher)
632 #[deprecated = "use std::str::CowString"]
633 impl<'a> fmt::Show for MaybeOwned<'a> {
635 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
637 Slice(ref s) => s.fmt(f),
638 Owned(ref s) => s.fmt(f)
643 #[unstable = "trait is unstable"]
644 impl BorrowFrom<String> for str {
645 fn borrow_from(owned: &String) -> &str { owned[] }
648 #[unstable = "trait is unstable"]
649 impl ToOwned<String> for str {
650 fn to_owned(&self) -> String { self.into_string() }
653 /// Unsafe string operations.
655 pub use core::str::raw::{from_utf8, c_str_to_static_slice, slice_bytes};
656 pub use core::str::raw::{slice_unchecked};
663 /// A clone-on-write string
664 pub type CowString<'a> = Cow<'a, String, str>;
666 impl<'a> Str for CowString<'a> {
668 fn as_slice<'b>(&'b self) -> &'b str {
674 Section: Trait implementations
677 /// Any string that can be represented as a slice.
678 pub trait StrAllocating: Str {
679 /// Converts `self` into a `String`, not making a copy if possible.
680 fn into_string(self) -> String;
682 /// Escapes each char in `s` with `char::escape_default`.
683 fn escape_default(&self) -> String {
684 let me = self.as_slice();
685 let mut out = String::with_capacity(me.len());
686 for c in me.chars() {
687 for c in c.escape_default() {
694 /// Escapes each char in `s` with `char::escape_unicode`.
695 fn escape_unicode(&self) -> String {
696 let me = self.as_slice();
697 let mut out = String::with_capacity(me.len());
698 for c in me.chars() {
699 for c in c.escape_unicode() {
706 /// Replaces all occurrences of one string with another.
710 /// * `from` - The string to replace
711 /// * `to` - The replacement string
715 /// The original string with all occurrences of `from` replaced with `to`.
720 /// let s = "Do you know the muffin man,
721 /// The muffin man, the muffin man, ...".to_string();
723 /// assert_eq!(s.replace("muffin man", "little lamb"),
724 /// "Do you know the little lamb,
725 /// The little lamb, the little lamb, ...".to_string());
727 /// // not found, so no change.
728 /// assert_eq!(s.replace("cookie monster", "little lamb"), s);
730 fn replace(&self, from: &str, to: &str) -> String {
731 replace(self.as_slice(), from, to)
734 /// Given a string, makes a new string with repeated copies of it.
735 fn repeat(&self, nn: uint) -> String {
736 let me = self.as_slice();
737 let mut ret = String::with_capacity(nn * me.len());
738 for _ in range(0, nn) {
744 /// Returns the Levenshtein Distance between two strings.
745 fn lev_distance(&self, t: &str) -> uint {
746 let me = self.as_slice();
747 if me.is_empty() { return t.char_len(); }
748 if t.is_empty() { return me.char_len(); }
750 let mut dcol = Vec::from_fn(t.len() + 1, |x| x);
753 for (i, sc) in me.chars().enumerate() {
756 dcol[0] = current + 1;
758 for (j, tc) in t.chars().enumerate() {
760 let next = dcol[j + 1];
763 dcol[j + 1] = current;
765 dcol[j + 1] = cmp::min(current, next);
766 dcol[j + 1] = cmp::min(dcol[j + 1], dcol[j]) + 1;
777 /// Returns an iterator over the string in Unicode Normalization Form D
778 /// (canonical decomposition).
780 fn nfd_chars<'a>(&'a self) -> Decompositions<'a> {
782 iter: self.as_slice().chars(),
789 /// Returns an iterator over the string in Unicode Normalization Form KD
790 /// (compatibility decomposition).
792 fn nfkd_chars<'a>(&'a self) -> Decompositions<'a> {
794 iter: self.as_slice().chars(),
801 /// An Iterator over the string in Unicode Normalization Form C
802 /// (canonical decomposition followed by canonical composition).
804 fn nfc_chars<'a>(&'a self) -> Recompositions<'a> {
806 iter: self.nfd_chars(),
808 buffer: RingBuf::new(),
814 /// An Iterator over the string in Unicode Normalization Form KC
815 /// (compatibility decomposition followed by canonical composition).
817 fn nfkc_chars<'a>(&'a self) -> Recompositions<'a> {
819 iter: self.nfkd_chars(),
821 buffer: RingBuf::new(),
828 impl<'a> StrAllocating for &'a str {
830 fn into_string(self) -> String {
831 String::from_str(self)
837 use std::iter::AdditiveIterator;
838 use std::iter::range;
839 use std::default::Default;
841 use std::clone::Clone;
842 use std::cmp::{Ord, PartialOrd, Equiv};
843 use std::cmp::Ordering::{Equal, Greater, Less};
844 use std::option::Option;
845 use std::option::Option::{Some, None};
846 use std::ptr::RawPtr;
847 use std::iter::{Iterator, IteratorExt, DoubleEndedIteratorExt};
850 use std::slice::{AsSlice, SliceExt};
853 use slice::CloneSliceExt;
855 use unicode::char::UnicodeChar;
859 assert!((eq_slice("foobar".slice(0, 3), "foo")));
860 assert!((eq_slice("barfoo".slice(3, 6), "foo")));
861 assert!((!eq_slice("foo1", "foo2")));
867 assert!("" <= "foo");
868 assert!("foo" <= "foo");
869 assert!("foo" != "bar");
874 assert_eq!("".len(), 0u);
875 assert_eq!("hello world".len(), 11u);
876 assert_eq!("\x63".len(), 1u);
877 assert_eq!("\u{a2}".len(), 2u);
878 assert_eq!("\u{3c0}".len(), 2u);
879 assert_eq!("\u{2620}".len(), 3u);
880 assert_eq!("\u{1d11e}".len(), 4u);
882 assert_eq!("".char_len(), 0u);
883 assert_eq!("hello world".char_len(), 11u);
884 assert_eq!("\x63".char_len(), 1u);
885 assert_eq!("\u{a2}".char_len(), 1u);
886 assert_eq!("\u{3c0}".char_len(), 1u);
887 assert_eq!("\u{2620}".char_len(), 1u);
888 assert_eq!("\u{1d11e}".char_len(), 1u);
889 assert_eq!("ประเทศไทย中华Việt Nam".char_len(), 19u);
891 assert_eq!("hello".width(false), 10u);
892 assert_eq!("hello".width(true), 10u);
893 assert_eq!("\0\0\0\0\0".width(false), 0u);
894 assert_eq!("\0\0\0\0\0".width(true), 0u);
895 assert_eq!("".width(false), 0u);
896 assert_eq!("".width(true), 0u);
897 assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width(false), 4u);
898 assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width(true), 8u);
903 assert_eq!("hello".find('l'), Some(2u));
904 assert_eq!("hello".find(|&: c:char| c == 'o'), Some(4u));
905 assert!("hello".find('x').is_none());
906 assert!("hello".find(|&: c:char| c == 'x').is_none());
907 assert_eq!("ประเทศไทย中华Việt Nam".find('华'), Some(30u));
908 assert_eq!("ประเทศไทย中华Việt Nam".find(|&: c: char| c == '华'), Some(30u));
913 assert_eq!("hello".rfind('l'), Some(3u));
914 assert_eq!("hello".rfind(|&: c:char| c == 'o'), Some(4u));
915 assert!("hello".rfind('x').is_none());
916 assert!("hello".rfind(|&: c:char| c == 'x').is_none());
917 assert_eq!("ประเทศไทย中华Việt Nam".rfind('华'), Some(30u));
918 assert_eq!("ประเทศไทย中华Việt Nam".rfind(|&: c: char| c == '华'), Some(30u));
923 let empty = String::from_str("");
924 let s: String = empty.chars().collect();
925 assert_eq!(empty, s);
926 let data = String::from_str("ประเทศไทย中");
927 let s: String = data.chars().collect();
932 fn test_into_bytes() {
933 let data = String::from_str("asdf");
934 let buf = data.into_bytes();
935 assert_eq!(b"asdf", buf);
941 assert_eq!("".find_str(""), Some(0u));
942 assert!("banana".find_str("apple pie").is_none());
945 assert_eq!(data.slice(0u, 6u).find_str("ab"), Some(0u));
946 assert_eq!(data.slice(2u, 6u).find_str("ab"), Some(3u - 2u));
947 assert!(data.slice(2u, 4u).find_str("ab").is_none());
949 let string = "ประเทศไทย中华Việt Nam";
950 let mut data = String::from_str(string);
951 data.push_str(string);
952 assert!(data.find_str("ไท华").is_none());
953 assert_eq!(data.slice(0u, 43u).find_str(""), Some(0u));
954 assert_eq!(data.slice(6u, 43u).find_str(""), Some(6u - 6u));
956 assert_eq!(data.slice(0u, 43u).find_str("ประ"), Some( 0u));
957 assert_eq!(data.slice(0u, 43u).find_str("ทศไ"), Some(12u));
958 assert_eq!(data.slice(0u, 43u).find_str("ย中"), Some(24u));
959 assert_eq!(data.slice(0u, 43u).find_str("iệt"), Some(34u));
960 assert_eq!(data.slice(0u, 43u).find_str("Nam"), Some(40u));
962 assert_eq!(data.slice(43u, 86u).find_str("ประ"), Some(43u - 43u));
963 assert_eq!(data.slice(43u, 86u).find_str("ทศไ"), Some(55u - 43u));
964 assert_eq!(data.slice(43u, 86u).find_str("ย中"), Some(67u - 43u));
965 assert_eq!(data.slice(43u, 86u).find_str("iệt"), Some(77u - 43u));
966 assert_eq!(data.slice(43u, 86u).find_str("Nam"), Some(83u - 43u));
970 fn test_slice_chars() {
971 fn t(a: &str, b: &str, start: uint) {
972 assert_eq!(a.slice_chars(start, start + b.char_len()), b);
975 t("hello", "llo", 2);
979 assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".slice_chars(2, 8));
986 impl AsSlice<String> for S {
987 fn as_slice<'a> (&'a self) -> &'a [String] {
992 fn s(x: &str) -> String { x.into_string() }
994 macro_rules! test_concat {
995 ($expected: expr, $string: expr) => {
997 let s = $string.concat();
998 assert_eq!($expected, s);
1004 fn test_concat_for_different_types() {
1005 test_concat!("ab", ["a", "b"]);
1006 test_concat!("ab", [s("a"), s("b")]);
1007 test_concat!("ab", vec!["a", "b"]);
1008 test_concat!("ab", vec!["a", "b"].as_slice());
1009 test_concat!("ab", vec![s("a"), s("b")]);
1011 let mut v0 = ["a", "b"];
1012 let mut v1 = [s("a"), s("b")];
1014 use std::c_vec::CVec;
1016 test_concat!("ab", CVec::new(v0.as_mut_ptr(), v0.len()));
1017 test_concat!("ab", CVec::new(v1.as_mut_ptr(), v1.len()));
1020 test_concat!("ab", S { x: [s("a"), s("b")] });
1024 fn test_concat_for_different_lengths() {
1025 let empty: &[&str] = &[];
1026 test_concat!("", empty);
1027 test_concat!("a", ["a"]);
1028 test_concat!("ab", ["a", "b"]);
1029 test_concat!("abc", ["", "a", "bc"]);
1032 macro_rules! test_connect {
1033 ($expected: expr, $string: expr, $delim: expr) => {
1035 let s = $string.connect($delim);
1036 assert_eq!($expected, s);
1042 fn test_connect_for_different_types() {
1043 test_connect!("a-b", ["a", "b"], "-");
1044 let hyphen = "-".into_string();
1045 test_connect!("a-b", [s("a"), s("b")], hyphen.as_slice());
1046 test_connect!("a-b", vec!["a", "b"], hyphen.as_slice());
1047 test_connect!("a-b", vec!["a", "b"].as_slice(), "-");
1048 test_connect!("a-b", vec![s("a"), s("b")], "-");
1050 let mut v0 = ["a", "b"];
1051 let mut v1 = [s("a"), s("b")];
1053 use std::c_vec::CVec;
1055 test_connect!("a-b", CVec::new(v0.as_mut_ptr(), v0.len()), "-");
1056 test_connect!("a-b", CVec::new(v1.as_mut_ptr(), v1.len()), hyphen.as_slice());
1059 test_connect!("a-b", S { x: [s("a"), s("b")] }, "-");
1063 fn test_connect_for_different_lengths() {
1064 let empty: &[&str] = &[];
1065 test_connect!("", empty, "-");
1066 test_connect!("a", ["a"], "-");
1067 test_connect!("a-b", ["a", "b"], "-");
1068 test_connect!("-a-bc", ["", "a", "bc"], "-");
1073 assert_eq!("x".repeat(4), String::from_str("xxxx"));
1074 assert_eq!("hi".repeat(4), String::from_str("hihihihi"));
1075 assert_eq!("ไท华".repeat(3), String::from_str("ไท华ไท华ไท华"));
1076 assert_eq!("".repeat(4), String::from_str(""));
1077 assert_eq!("hi".repeat(0), String::from_str(""));
1081 fn test_unsafe_slice() {
1082 assert_eq!("ab", unsafe {raw::slice_bytes("abc", 0, 2)});
1083 assert_eq!("bc", unsafe {raw::slice_bytes("abc", 1, 3)});
1084 assert_eq!("", unsafe {raw::slice_bytes("abc", 1, 1)});
1085 fn a_million_letter_a() -> String {
1087 let mut rs = String::new();
1089 rs.push_str("aaaaaaaaaa");
1094 fn half_a_million_letter_a() -> String {
1096 let mut rs = String::new();
1098 rs.push_str("aaaaa");
1103 let letters = a_million_letter_a();
1104 assert!(half_a_million_letter_a() ==
1105 unsafe {String::from_str(raw::slice_bytes(letters.as_slice(),
1111 fn test_starts_with() {
1112 assert!(("".starts_with("")));
1113 assert!(("abc".starts_with("")));
1114 assert!(("abc".starts_with("a")));
1115 assert!((!"a".starts_with("abc")));
1116 assert!((!"".starts_with("abc")));
1117 assert!((!"ödd".starts_with("-")));
1118 assert!(("ödd".starts_with("öd")));
1122 fn test_ends_with() {
1123 assert!(("".ends_with("")));
1124 assert!(("abc".ends_with("")));
1125 assert!(("abc".ends_with("c")));
1126 assert!((!"a".ends_with("abc")));
1127 assert!((!"".ends_with("abc")));
1128 assert!((!"ddö".ends_with("-")));
1129 assert!(("ddö".ends_with("dö")));
1133 fn test_is_empty() {
1134 assert!("".is_empty());
1135 assert!(!"a".is_empty());
1141 assert_eq!("".replace(a, "b"), String::from_str(""));
1142 assert_eq!("a".replace(a, "b"), String::from_str("b"));
1143 assert_eq!("ab".replace(a, "b"), String::from_str("bb"));
1145 assert!(" test test ".replace(test, "toast") ==
1146 String::from_str(" toast toast "));
1147 assert_eq!(" test test ".replace(test, ""), String::from_str(" "));
1151 fn test_replace_2a() {
1152 let data = "ประเทศไทย中华";
1153 let repl = "دولة الكويت";
1156 let a2 = "دولة الكويتทศไทย中华";
1157 assert_eq!(data.replace(a, repl), a2);
1161 fn test_replace_2b() {
1162 let data = "ประเทศไทย中华";
1163 let repl = "دولة الكويت";
1166 let b2 = "ปรدولة الكويتทศไทย中华";
1167 assert_eq!(data.replace(b, repl), b2);
1171 fn test_replace_2c() {
1172 let data = "ประเทศไทย中华";
1173 let repl = "دولة الكويت";
1176 let c2 = "ประเทศไทยدولة الكويت";
1177 assert_eq!(data.replace(c, repl), c2);
1181 fn test_replace_2d() {
1182 let data = "ประเทศไทย中华";
1183 let repl = "دولة الكويت";
1186 assert_eq!(data.replace(d, repl), data);
1191 assert_eq!("ab", "abc".slice(0, 2));
1192 assert_eq!("bc", "abc".slice(1, 3));
1193 assert_eq!("", "abc".slice(1, 1));
1194 assert_eq!("\u{65e5}", "\u{65e5}\u{672c}".slice(0, 3));
1196 let data = "ประเทศไทย中华";
1197 assert_eq!("ป", data.slice(0, 3));
1198 assert_eq!("ร", data.slice(3, 6));
1199 assert_eq!("", data.slice(3, 3));
1200 assert_eq!("华", data.slice(30, 33));
1202 fn a_million_letter_x() -> String {
1204 let mut rs = String::new();
1206 rs.push_str("华华华华华华华华华华");
1211 fn half_a_million_letter_x() -> String {
1213 let mut rs = String::new();
1215 rs.push_str("华华华华华");
1220 let letters = a_million_letter_x();
1221 assert!(half_a_million_letter_x() ==
1222 String::from_str(letters.slice(0u, 3u * 500000u)));
1227 let ss = "中华Việt Nam";
1229 assert_eq!("华", ss.slice(3u, 6u));
1230 assert_eq!("Việt Nam", ss.slice(6u, 16u));
1232 assert_eq!("ab", "abc".slice(0u, 2u));
1233 assert_eq!("bc", "abc".slice(1u, 3u));
1234 assert_eq!("", "abc".slice(1u, 1u));
1236 assert_eq!("中", ss.slice(0u, 3u));
1237 assert_eq!("华V", ss.slice(3u, 7u));
1238 assert_eq!("", ss.slice(3u, 3u));
1253 fn test_slice_fail() {
1254 "中华Việt Nam".slice(0u, 2u);
1258 fn test_slice_from() {
1259 assert_eq!("abcd".slice_from(0), "abcd");
1260 assert_eq!("abcd".slice_from(2), "cd");
1261 assert_eq!("abcd".slice_from(4), "");
1264 fn test_slice_to() {
1265 assert_eq!("abcd".slice_to(0), "");
1266 assert_eq!("abcd".slice_to(2), "ab");
1267 assert_eq!("abcd".slice_to(4), "abcd");
1271 fn test_trim_left_chars() {
1272 let v: &[char] = &[];
1273 assert_eq!(" *** foo *** ".trim_left_chars(v), " *** foo *** ");
1274 let chars: &[char] = &['*', ' '];
1275 assert_eq!(" *** foo *** ".trim_left_chars(chars), "foo *** ");
1276 assert_eq!(" *** *** ".trim_left_chars(chars), "");
1277 assert_eq!("foo *** ".trim_left_chars(chars), "foo *** ");
1279 assert_eq!("11foo1bar11".trim_left_chars('1'), "foo1bar11");
1280 let chars: &[char] = &['1', '2'];
1281 assert_eq!("12foo1bar12".trim_left_chars(chars), "foo1bar12");
1282 assert_eq!("123foo1bar123".trim_left_chars(|&: c: char| c.is_numeric()), "foo1bar123");
1286 fn test_trim_right_chars() {
1287 let v: &[char] = &[];
1288 assert_eq!(" *** foo *** ".trim_right_chars(v), " *** foo *** ");
1289 let chars: &[char] = &['*', ' '];
1290 assert_eq!(" *** foo *** ".trim_right_chars(chars), " *** foo");
1291 assert_eq!(" *** *** ".trim_right_chars(chars), "");
1292 assert_eq!(" *** foo".trim_right_chars(chars), " *** foo");
1294 assert_eq!("11foo1bar11".trim_right_chars('1'), "11foo1bar");
1295 let chars: &[char] = &['1', '2'];
1296 assert_eq!("12foo1bar12".trim_right_chars(chars), "12foo1bar");
1297 assert_eq!("123foo1bar123".trim_right_chars(|&: c: char| c.is_numeric()), "123foo1bar");
1301 fn test_trim_chars() {
1302 let v: &[char] = &[];
1303 assert_eq!(" *** foo *** ".trim_chars(v), " *** foo *** ");
1304 let chars: &[char] = &['*', ' '];
1305 assert_eq!(" *** foo *** ".trim_chars(chars), "foo");
1306 assert_eq!(" *** *** ".trim_chars(chars), "");
1307 assert_eq!("foo".trim_chars(chars), "foo");
1309 assert_eq!("11foo1bar11".trim_chars('1'), "foo1bar");
1310 let chars: &[char] = &['1', '2'];
1311 assert_eq!("12foo1bar12".trim_chars(chars), "foo1bar");
1312 assert_eq!("123foo1bar123".trim_chars(|&: c: char| c.is_numeric()), "foo1bar");
1316 fn test_trim_left() {
1317 assert_eq!("".trim_left(), "");
1318 assert_eq!("a".trim_left(), "a");
1319 assert_eq!(" ".trim_left(), "");
1320 assert_eq!(" blah".trim_left(), "blah");
1321 assert_eq!(" \u{3000} wut".trim_left(), "wut");
1322 assert_eq!("hey ".trim_left(), "hey ");
1326 fn test_trim_right() {
1327 assert_eq!("".trim_right(), "");
1328 assert_eq!("a".trim_right(), "a");
1329 assert_eq!(" ".trim_right(), "");
1330 assert_eq!("blah ".trim_right(), "blah");
1331 assert_eq!("wut \u{3000} ".trim_right(), "wut");
1332 assert_eq!(" hey".trim_right(), " hey");
1337 assert_eq!("".trim(), "");
1338 assert_eq!("a".trim(), "a");
1339 assert_eq!(" ".trim(), "");
1340 assert_eq!(" blah ".trim(), "blah");
1341 assert_eq!("\nwut \u{3000} ".trim(), "wut");
1342 assert_eq!(" hey dude ".trim(), "hey dude");
1346 fn test_is_whitespace() {
1347 assert!("".is_whitespace());
1348 assert!(" ".is_whitespace());
1349 assert!("\u{2009}".is_whitespace()); // Thin space
1350 assert!(" \n\t ".is_whitespace());
1351 assert!(!" _ ".is_whitespace());
1355 fn test_slice_shift_char() {
1356 let data = "ประเทศไทย中";
1357 assert_eq!(data.slice_shift_char(), Some(('ป', "ระเทศไทย中")));
1361 fn test_slice_shift_char_2() {
1363 assert_eq!(empty.slice_shift_char(), None);
1368 // deny overlong encodings
1369 assert!(!is_utf8(&[0xc0, 0x80]));
1370 assert!(!is_utf8(&[0xc0, 0xae]));
1371 assert!(!is_utf8(&[0xe0, 0x80, 0x80]));
1372 assert!(!is_utf8(&[0xe0, 0x80, 0xaf]));
1373 assert!(!is_utf8(&[0xe0, 0x81, 0x81]));
1374 assert!(!is_utf8(&[0xf0, 0x82, 0x82, 0xac]));
1375 assert!(!is_utf8(&[0xf4, 0x90, 0x80, 0x80]));
1378 assert!(!is_utf8(&[0xED, 0xA0, 0x80]));
1379 assert!(!is_utf8(&[0xED, 0xBF, 0xBF]));
1381 assert!(is_utf8(&[0xC2, 0x80]));
1382 assert!(is_utf8(&[0xDF, 0xBF]));
1383 assert!(is_utf8(&[0xE0, 0xA0, 0x80]));
1384 assert!(is_utf8(&[0xED, 0x9F, 0xBF]));
1385 assert!(is_utf8(&[0xEE, 0x80, 0x80]));
1386 assert!(is_utf8(&[0xEF, 0xBF, 0xBF]));
1387 assert!(is_utf8(&[0xF0, 0x90, 0x80, 0x80]));
1388 assert!(is_utf8(&[0xF4, 0x8F, 0xBF, 0xBF]));
1392 fn test_is_utf16() {
1393 macro_rules! pos ( ($($e:expr),*) => { { $(assert!(is_utf16($e));)* } });
1401 // surrogate pairs (randomly generated with Python 3's
1402 // .encode('utf-16be'))
1403 pos!(&[0xdb54, 0xdf16, 0xd880, 0xdee0, 0xdb6a, 0xdd45],
1404 &[0xd91f, 0xdeb1, 0xdb31, 0xdd84, 0xd8e2, 0xde14],
1405 &[0xdb9f, 0xdc26, 0xdb6f, 0xde58, 0xd850, 0xdfae]);
1407 // mixtures (also random)
1408 pos!(&[0xd921, 0xdcc2, 0x002d, 0x004d, 0xdb32, 0xdf65],
1409 &[0xdb45, 0xdd2d, 0x006a, 0xdacd, 0xddfe, 0x0006],
1410 &[0x0067, 0xd8ff, 0xddb7, 0x000f, 0xd900, 0xdc80]);
1413 macro_rules! neg ( ($($e:expr),*) => { { $(assert!(!is_utf16($e));)* } });
1416 // surrogate + regular unit
1418 // surrogate + lead surrogate
1420 // unterminated surrogate
1422 // trail surrogate without a lead
1425 // random byte sequences that Python 3's .decode('utf-16be')
1427 neg!(&[0x5b3d, 0x0141, 0xde9e, 0x8fdc, 0xc6e7],
1428 &[0xdf5a, 0x82a5, 0x62b9, 0xb447, 0x92f3],
1429 &[0xda4e, 0x42bc, 0x4462, 0xee98, 0xc2ca],
1430 &[0xbe00, 0xb04a, 0x6ecb, 0xdd89, 0xe278],
1431 &[0x0465, 0xab56, 0xdbb6, 0xa893, 0x665e],
1432 &[0x6b7f, 0x0a19, 0x40f4, 0xa657, 0xdcc5],
1433 &[0x9b50, 0xda5e, 0x24ec, 0x03ad, 0x6dee],
1434 &[0x8d17, 0xcaa7, 0xf4ae, 0xdf6e, 0xbed7],
1435 &[0xdaee, 0x2584, 0x7d30, 0xa626, 0x121a],
1436 &[0xd956, 0x4b43, 0x7570, 0xccd6, 0x4f4a],
1437 &[0x9dcf, 0x1b49, 0x4ba5, 0xfce9, 0xdffe],
1438 &[0x6572, 0xce53, 0xb05a, 0xf6af, 0xdacf],
1439 &[0x1b90, 0x728c, 0x9906, 0xdb68, 0xf46e],
1440 &[0x1606, 0xbeca, 0xbe76, 0x860f, 0xdfa5],
1441 &[0x8b4f, 0xde7a, 0xd220, 0x9fac, 0x2b6f],
1442 &[0xb8fe, 0xebbe, 0xda32, 0x1a5f, 0x8b8b],
1443 &[0x934b, 0x8956, 0xc434, 0x1881, 0xddf7],
1444 &[0x5a95, 0x13fc, 0xf116, 0xd89b, 0x93f9],
1445 &[0xd640, 0x71f1, 0xdd7d, 0x77eb, 0x1cd8],
1446 &[0x348b, 0xaef0, 0xdb2c, 0xebf1, 0x1282],
1447 &[0x50d7, 0xd824, 0x5010, 0xb369, 0x22ea]);
1451 fn test_as_bytes() {
1454 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1455 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1459 assert_eq!("".as_bytes(), b);
1460 assert_eq!("abc".as_bytes(), b"abc");
1461 assert_eq!("ศไทย中华Việt Nam".as_bytes(), v);
1466 fn test_as_bytes_fail() {
1467 // Don't double free. (I'm not sure if this exercises the
1468 // original problem code path anymore.)
1469 let s = String::from_str("");
1470 let _bytes = s.as_bytes();
1476 let buf = "hello".as_ptr();
1478 assert_eq!(*buf.offset(0), b'h');
1479 assert_eq!(*buf.offset(1), b'e');
1480 assert_eq!(*buf.offset(2), b'l');
1481 assert_eq!(*buf.offset(3), b'l');
1482 assert_eq!(*buf.offset(4), b'o');
1487 fn test_subslice_offset() {
1488 let a = "kernelsprite";
1489 let b = a.slice(7, a.len());
1490 let c = a.slice(0, a.len() - 6);
1491 assert_eq!(a.subslice_offset(b), 7);
1492 assert_eq!(a.subslice_offset(c), 0);
1494 let string = "a\nb\nc";
1495 let lines: Vec<&str> = string.lines().collect();
1496 assert_eq!(string.subslice_offset(lines[0]), 0);
1497 assert_eq!(string.subslice_offset(lines[1]), 2);
1498 assert_eq!(string.subslice_offset(lines[2]), 4);
1503 fn test_subslice_offset_2() {
1504 let a = "alchemiter";
1505 let b = "cruxtruder";
1506 a.subslice_offset(b);
1510 fn vec_str_conversions() {
1511 let s1: String = String::from_str("All mimsy were the borogoves");
1513 let v: Vec<u8> = s1.as_bytes().to_vec();
1514 let s2: String = String::from_str(from_utf8(v.as_slice()).unwrap());
1515 let mut i: uint = 0u;
1516 let n1: uint = s1.len();
1517 let n2: uint = v.len();
1520 let a: u8 = s1.as_bytes()[i];
1521 let b: u8 = s2.as_bytes()[i];
1530 fn test_contains() {
1531 assert!("abcde".contains("bcd"));
1532 assert!("abcde".contains("abcd"));
1533 assert!("abcde".contains("bcde"));
1534 assert!("abcde".contains(""));
1535 assert!("".contains(""));
1536 assert!(!"abcde".contains("def"));
1537 assert!(!"".contains("a"));
1539 let data = "ประเทศไทย中华Việt Nam";
1540 assert!(data.contains("ประเ"));
1541 assert!(data.contains("ะเ"));
1542 assert!(data.contains("中华"));
1543 assert!(!data.contains("ไท华"));
1547 fn test_contains_char() {
1548 assert!("abc".contains_char('b'));
1549 assert!("a".contains_char('a'));
1550 assert!(!"abc".contains_char('d'));
1551 assert!(!"".contains_char('a'));
1555 fn test_truncate_utf16_at_nul() {
1557 let b: &[u16] = &[];
1558 assert_eq!(truncate_utf16_at_nul(&v), b);
1561 assert_eq!(truncate_utf16_at_nul(&v), b);
1564 let b: &[u16] = &[1];
1565 assert_eq!(truncate_utf16_at_nul(&v), b);
1568 let b: &[u16] = &[1, 2];
1569 assert_eq!(truncate_utf16_at_nul(&v), b);
1572 let b: &[u16] = &[1, 2, 3];
1573 assert_eq!(truncate_utf16_at_nul(&v), b);
1578 let s = "ศไทย中华Việt Nam";
1579 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1581 for ch in v.iter() {
1582 assert!(s.char_at(pos) == *ch);
1583 pos += String::from_char(1, *ch).len();
1588 fn test_char_at_reverse() {
1589 let s = "ศไทย中华Việt Nam";
1590 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1591 let mut pos = s.len();
1592 for ch in v.iter().rev() {
1593 assert!(s.char_at_reverse(pos) == *ch);
1594 pos -= String::from_char(1, *ch).len();
1599 fn test_escape_unicode() {
1600 assert_eq!("abc".escape_unicode(), String::from_str("\\x61\\x62\\x63"));
1601 assert_eq!("a c".escape_unicode(), String::from_str("\\x61\\x20\\x63"));
1602 assert_eq!("\r\n\t".escape_unicode(), String::from_str("\\x0d\\x0a\\x09"));
1603 assert_eq!("'\"\\".escape_unicode(), String::from_str("\\x27\\x22\\x5c"));
1604 assert_eq!("\x00\x01\u{fe}\u{ff}".escape_unicode(),
1605 String::from_str("\\x00\\x01\\u00fe\\u00ff"));
1606 assert_eq!("\u{100}\u{ffff}".escape_unicode(), String::from_str("\\u0100\\uffff"));
1607 assert_eq!("\u{10000}\u{10ffff}".escape_unicode(),
1608 String::from_str("\\U00010000\\U0010ffff"));
1609 assert_eq!("ab\u{fb00}".escape_unicode(), String::from_str("\\x61\\x62\\ufb00"));
1610 assert_eq!("\u{1d4ea}\r".escape_unicode(), String::from_str("\\U0001d4ea\\x0d"));
1614 fn test_escape_default() {
1615 assert_eq!("abc".escape_default(), String::from_str("abc"));
1616 assert_eq!("a c".escape_default(), String::from_str("a c"));
1617 assert_eq!("\r\n\t".escape_default(), String::from_str("\\r\\n\\t"));
1618 assert_eq!("'\"\\".escape_default(), String::from_str("\\'\\\"\\\\"));
1619 assert_eq!("\u{100}\u{ffff}".escape_default(), String::from_str("\\u0100\\uffff"));
1620 assert_eq!("\u{10000}\u{10ffff}".escape_default(),
1621 String::from_str("\\U00010000\\U0010ffff"));
1622 assert_eq!("ab\u{fb00}".escape_default(), String::from_str("ab\\ufb00"));
1623 assert_eq!("\u{1d4ea}\r".escape_default(), String::from_str("\\U0001d4ea\\r"));
1627 fn test_total_ord() {
1628 "1234".cmp("123") == Greater;
1629 "123".cmp("1234") == Less;
1630 "1234".cmp("1234") == Equal;
1631 "12345555".cmp("123456") == Less;
1632 "22".cmp("1234") == Greater;
1636 fn test_char_range_at() {
1637 let data = "b¢€𤭢𤭢€¢b";
1638 assert_eq!('b', data.char_range_at(0).ch);
1639 assert_eq!('¢', data.char_range_at(1).ch);
1640 assert_eq!('€', data.char_range_at(3).ch);
1641 assert_eq!('𤭢', data.char_range_at(6).ch);
1642 assert_eq!('𤭢', data.char_range_at(10).ch);
1643 assert_eq!('€', data.char_range_at(14).ch);
1644 assert_eq!('¢', data.char_range_at(17).ch);
1645 assert_eq!('b', data.char_range_at(19).ch);
1649 fn test_char_range_at_reverse_underflow() {
1650 assert_eq!("abc".char_range_at_reverse(0).next, 0);
1654 fn test_iterator() {
1655 let s = "ศไทย中华Việt Nam";
1656 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1659 let mut it = s.chars();
1662 assert_eq!(c, v[pos]);
1665 assert_eq!(pos, v.len());
1669 fn test_rev_iterator() {
1670 let s = "ศไทย中华Việt Nam";
1671 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
1674 let mut it = s.chars().rev();
1677 assert_eq!(c, v[pos]);
1680 assert_eq!(pos, v.len());
1684 fn test_chars_decoding() {
1685 let mut bytes = [0u8, ..4];
1686 for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
1687 let len = c.encode_utf8(&mut bytes).unwrap_or(0);
1688 let s = ::core::str::from_utf8(bytes[..len]).unwrap();
1689 if Some(c) != s.chars().next() {
1690 panic!("character {:x}={} does not decode correctly", c as u32, c);
1696 fn test_chars_rev_decoding() {
1697 let mut bytes = [0u8, ..4];
1698 for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
1699 let len = c.encode_utf8(&mut bytes).unwrap_or(0);
1700 let s = ::core::str::from_utf8(bytes[..len]).unwrap();
1701 if Some(c) != s.chars().rev().next() {
1702 panic!("character {:x}={} does not decode correctly", c as u32, c);
1708 fn test_iterator_clone() {
1709 let s = "ศไทย中华Việt Nam";
1710 let mut it = s.chars();
1712 assert!(it.zip(it.clone()).all(|(x,y)| x == y));
1716 fn test_bytesator() {
1717 let s = "ศไทย中华Việt Nam";
1719 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1720 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1725 for b in s.bytes() {
1726 assert_eq!(b, v[pos]);
1732 fn test_bytes_revator() {
1733 let s = "ศไทย中华Việt Nam";
1735 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1736 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1739 let mut pos = v.len();
1741 for b in s.bytes().rev() {
1743 assert_eq!(b, v[pos]);
1748 fn test_char_indicesator() {
1749 let s = "ศไทย中华Việt Nam";
1750 let p = [0, 3, 6, 9, 12, 15, 18, 19, 20, 23, 24, 25, 26, 27];
1751 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1754 let mut it = s.char_indices();
1757 assert_eq!(c, (p[pos], v[pos]));
1760 assert_eq!(pos, v.len());
1761 assert_eq!(pos, p.len());
1765 fn test_char_indices_revator() {
1766 let s = "ศไทย中华Việt Nam";
1767 let p = [27, 26, 25, 24, 23, 20, 19, 18, 15, 12, 9, 6, 3, 0];
1768 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
1771 let mut it = s.char_indices().rev();
1774 assert_eq!(c, (p[pos], v[pos]));
1777 assert_eq!(pos, v.len());
1778 assert_eq!(pos, p.len());
1782 fn test_splitn_char_iterator() {
1783 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1785 let split: Vec<&str> = data.splitn(3, ' ').collect();
1786 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
1788 let split: Vec<&str> = data.splitn(3, |&: c: char| c == ' ').collect();
1789 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
1792 let split: Vec<&str> = data.splitn(3, 'ä').collect();
1793 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
1795 let split: Vec<&str> = data.splitn(3, |&: c: char| c == 'ä').collect();
1796 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
1800 fn test_split_char_iterator_no_trailing() {
1801 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1803 let split: Vec<&str> = data.split('\n').collect();
1804 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
1806 let split: Vec<&str> = data.split_terminator('\n').collect();
1807 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
1812 let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n";
1813 let words: Vec<&str> = data.words().collect();
1814 assert_eq!(words, vec!["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
1818 fn test_lev_distance() {
1819 use std::char::{ from_u32, MAX };
1820 // Test bytelength agnosticity
1821 for c in range(0u32, MAX as u32)
1822 .filter_map(|i| from_u32(i))
1823 .map(|i| String::from_char(1, i)) {
1824 assert_eq!(c[].lev_distance(c[]), 0);
1827 let a = "\nMäry häd ä little lämb\n\nLittle lämb\n";
1828 let b = "\nMary häd ä little lämb\n\nLittle lämb\n";
1829 let c = "Mary häd ä little lämb\n\nLittle lämb\n";
1830 assert_eq!(a.lev_distance(b), 1);
1831 assert_eq!(b.lev_distance(a), 1);
1832 assert_eq!(a.lev_distance(c), 2);
1833 assert_eq!(c.lev_distance(a), 2);
1834 assert_eq!(b.lev_distance(c), 1);
1835 assert_eq!(c.lev_distance(b), 1);
1839 fn test_nfd_chars() {
1841 ($input: expr, $expected: expr) => {
1842 assert_eq!($input.nfd_chars().collect::<String>(), $expected);
1846 t!("\u{1e0b}\u{1c4}", "d\u{307}\u{1c4}");
1847 t!("\u{2026}", "\u{2026}");
1848 t!("\u{2126}", "\u{3a9}");
1849 t!("\u{1e0b}\u{323}", "d\u{323}\u{307}");
1850 t!("\u{1e0d}\u{307}", "d\u{323}\u{307}");
1851 t!("a\u{301}", "a\u{301}");
1852 t!("\u{301}a", "\u{301}a");
1853 t!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}");
1854 t!("\u{ac1c}", "\u{1100}\u{1162}");
1858 fn test_nfkd_chars() {
1860 ($input: expr, $expected: expr) => {
1861 assert_eq!($input.nfkd_chars().collect::<String>(), $expected);
1865 t!("\u{1e0b}\u{1c4}", "d\u{307}DZ\u{30c}");
1866 t!("\u{2026}", "...");
1867 t!("\u{2126}", "\u{3a9}");
1868 t!("\u{1e0b}\u{323}", "d\u{323}\u{307}");
1869 t!("\u{1e0d}\u{307}", "d\u{323}\u{307}");
1870 t!("a\u{301}", "a\u{301}");
1871 t!("\u{301}a", "\u{301}a");
1872 t!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}");
1873 t!("\u{ac1c}", "\u{1100}\u{1162}");
1877 fn test_nfc_chars() {
1879 ($input: expr, $expected: expr) => {
1880 assert_eq!($input.nfc_chars().collect::<String>(), $expected);
1884 t!("\u{1e0b}\u{1c4}", "\u{1e0b}\u{1c4}");
1885 t!("\u{2026}", "\u{2026}");
1886 t!("\u{2126}", "\u{3a9}");
1887 t!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}");
1888 t!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}");
1889 t!("a\u{301}", "\u{e1}");
1890 t!("\u{301}a", "\u{301}a");
1891 t!("\u{d4db}", "\u{d4db}");
1892 t!("\u{ac1c}", "\u{ac1c}");
1893 t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
1897 fn test_nfkc_chars() {
1899 ($input: expr, $expected: expr) => {
1900 assert_eq!($input.nfkc_chars().collect::<String>(), $expected);
1904 t!("\u{1e0b}\u{1c4}", "\u{1e0b}D\u{17d}");
1905 t!("\u{2026}", "...");
1906 t!("\u{2126}", "\u{3a9}");
1907 t!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}");
1908 t!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}");
1909 t!("a\u{301}", "\u{e1}");
1910 t!("\u{301}a", "\u{301}a");
1911 t!("\u{d4db}", "\u{d4db}");
1912 t!("\u{ac1c}", "\u{ac1c}");
1913 t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
1918 let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
1919 let lines: Vec<&str> = data.lines().collect();
1920 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
1922 let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
1923 let lines: Vec<&str> = data.lines().collect();
1924 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
1928 fn test_graphemes() {
1929 use std::iter::order;
1930 // official Unicode test data
1931 // from http://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt
1932 let test_same: [(_, &[_]), .. 325] = [
1933 ("\u{20}\u{20}", &["\u{20}", "\u{20}"]),
1934 ("\u{20}\u{308}\u{20}", &["\u{20}\u{308}", "\u{20}"]),
1935 ("\u{20}\u{D}", &["\u{20}", "\u{D}"]),
1936 ("\u{20}\u{308}\u{D}", &["\u{20}\u{308}", "\u{D}"]),
1937 ("\u{20}\u{A}", &["\u{20}", "\u{A}"]),
1938 ("\u{20}\u{308}\u{A}", &["\u{20}\u{308}", "\u{A}"]),
1939 ("\u{20}\u{1}", &["\u{20}", "\u{1}"]),
1940 ("\u{20}\u{308}\u{1}", &["\u{20}\u{308}", "\u{1}"]),
1941 ("\u{20}\u{300}", &["\u{20}\u{300}"]),
1942 ("\u{20}\u{308}\u{300}", &["\u{20}\u{308}\u{300}"]),
1943 ("\u{20}\u{1100}", &["\u{20}", "\u{1100}"]),
1944 ("\u{20}\u{308}\u{1100}", &["\u{20}\u{308}", "\u{1100}"]),
1945 ("\u{20}\u{1160}", &["\u{20}", "\u{1160}"]),
1946 ("\u{20}\u{308}\u{1160}", &["\u{20}\u{308}", "\u{1160}"]),
1947 ("\u{20}\u{11A8}", &["\u{20}", "\u{11A8}"]),
1948 ("\u{20}\u{308}\u{11A8}", &["\u{20}\u{308}", "\u{11A8}"]),
1949 ("\u{20}\u{AC00}", &["\u{20}", "\u{AC00}"]),
1950 ("\u{20}\u{308}\u{AC00}", &["\u{20}\u{308}", "\u{AC00}"]),
1951 ("\u{20}\u{AC01}", &["\u{20}", "\u{AC01}"]),
1952 ("\u{20}\u{308}\u{AC01}", &["\u{20}\u{308}", "\u{AC01}"]),
1953 ("\u{20}\u{1F1E6}", &["\u{20}", "\u{1F1E6}"]),
1954 ("\u{20}\u{308}\u{1F1E6}", &["\u{20}\u{308}", "\u{1F1E6}"]),
1955 ("\u{20}\u{378}", &["\u{20}", "\u{378}"]),
1956 ("\u{20}\u{308}\u{378}", &["\u{20}\u{308}", "\u{378}"]),
1957 ("\u{D}\u{20}", &["\u{D}", "\u{20}"]),
1958 ("\u{D}\u{308}\u{20}", &["\u{D}", "\u{308}", "\u{20}"]),
1959 ("\u{D}\u{D}", &["\u{D}", "\u{D}"]),
1960 ("\u{D}\u{308}\u{D}", &["\u{D}", "\u{308}", "\u{D}"]),
1961 ("\u{D}\u{A}", &["\u{D}\u{A}"]),
1962 ("\u{D}\u{308}\u{A}", &["\u{D}", "\u{308}", "\u{A}"]),
1963 ("\u{D}\u{1}", &["\u{D}", "\u{1}"]),
1964 ("\u{D}\u{308}\u{1}", &["\u{D}", "\u{308}", "\u{1}"]),
1965 ("\u{D}\u{300}", &["\u{D}", "\u{300}"]),
1966 ("\u{D}\u{308}\u{300}", &["\u{D}", "\u{308}\u{300}"]),
1967 ("\u{D}\u{903}", &["\u{D}", "\u{903}"]),
1968 ("\u{D}\u{1100}", &["\u{D}", "\u{1100}"]),
1969 ("\u{D}\u{308}\u{1100}", &["\u{D}", "\u{308}", "\u{1100}"]),
1970 ("\u{D}\u{1160}", &["\u{D}", "\u{1160}"]),
1971 ("\u{D}\u{308}\u{1160}", &["\u{D}", "\u{308}", "\u{1160}"]),
1972 ("\u{D}\u{11A8}", &["\u{D}", "\u{11A8}"]),
1973 ("\u{D}\u{308}\u{11A8}", &["\u{D}", "\u{308}", "\u{11A8}"]),
1974 ("\u{D}\u{AC00}", &["\u{D}", "\u{AC00}"]),
1975 ("\u{D}\u{308}\u{AC00}", &["\u{D}", "\u{308}", "\u{AC00}"]),
1976 ("\u{D}\u{AC01}", &["\u{D}", "\u{AC01}"]),
1977 ("\u{D}\u{308}\u{AC01}", &["\u{D}", "\u{308}", "\u{AC01}"]),
1978 ("\u{D}\u{1F1E6}", &["\u{D}", "\u{1F1E6}"]),
1979 ("\u{D}\u{308}\u{1F1E6}", &["\u{D}", "\u{308}", "\u{1F1E6}"]),
1980 ("\u{D}\u{378}", &["\u{D}", "\u{378}"]),
1981 ("\u{D}\u{308}\u{378}", &["\u{D}", "\u{308}", "\u{378}"]),
1982 ("\u{A}\u{20}", &["\u{A}", "\u{20}"]),
1983 ("\u{A}\u{308}\u{20}", &["\u{A}", "\u{308}", "\u{20}"]),
1984 ("\u{A}\u{D}", &["\u{A}", "\u{D}"]),
1985 ("\u{A}\u{308}\u{D}", &["\u{A}", "\u{308}", "\u{D}"]),
1986 ("\u{A}\u{A}", &["\u{A}", "\u{A}"]),
1987 ("\u{A}\u{308}\u{A}", &["\u{A}", "\u{308}", "\u{A}"]),
1988 ("\u{A}\u{1}", &["\u{A}", "\u{1}"]),
1989 ("\u{A}\u{308}\u{1}", &["\u{A}", "\u{308}", "\u{1}"]),
1990 ("\u{A}\u{300}", &["\u{A}", "\u{300}"]),
1991 ("\u{A}\u{308}\u{300}", &["\u{A}", "\u{308}\u{300}"]),
1992 ("\u{A}\u{903}", &["\u{A}", "\u{903}"]),
1993 ("\u{A}\u{1100}", &["\u{A}", "\u{1100}"]),
1994 ("\u{A}\u{308}\u{1100}", &["\u{A}", "\u{308}", "\u{1100}"]),
1995 ("\u{A}\u{1160}", &["\u{A}", "\u{1160}"]),
1996 ("\u{A}\u{308}\u{1160}", &["\u{A}", "\u{308}", "\u{1160}"]),
1997 ("\u{A}\u{11A8}", &["\u{A}", "\u{11A8}"]),
1998 ("\u{A}\u{308}\u{11A8}", &["\u{A}", "\u{308}", "\u{11A8}"]),
1999 ("\u{A}\u{AC00}", &["\u{A}", "\u{AC00}"]),
2000 ("\u{A}\u{308}\u{AC00}", &["\u{A}", "\u{308}", "\u{AC00}"]),
2001 ("\u{A}\u{AC01}", &["\u{A}", "\u{AC01}"]),
2002 ("\u{A}\u{308}\u{AC01}", &["\u{A}", "\u{308}", "\u{AC01}"]),
2003 ("\u{A}\u{1F1E6}", &["\u{A}", "\u{1F1E6}"]),
2004 ("\u{A}\u{308}\u{1F1E6}", &["\u{A}", "\u{308}", "\u{1F1E6}"]),
2005 ("\u{A}\u{378}", &["\u{A}", "\u{378}"]),
2006 ("\u{A}\u{308}\u{378}", &["\u{A}", "\u{308}", "\u{378}"]),
2007 ("\u{1}\u{20}", &["\u{1}", "\u{20}"]),
2008 ("\u{1}\u{308}\u{20}", &["\u{1}", "\u{308}", "\u{20}"]),
2009 ("\u{1}\u{D}", &["\u{1}", "\u{D}"]),
2010 ("\u{1}\u{308}\u{D}", &["\u{1}", "\u{308}", "\u{D}"]),
2011 ("\u{1}\u{A}", &["\u{1}", "\u{A}"]),
2012 ("\u{1}\u{308}\u{A}", &["\u{1}", "\u{308}", "\u{A}"]),
2013 ("\u{1}\u{1}", &["\u{1}", "\u{1}"]),
2014 ("\u{1}\u{308}\u{1}", &["\u{1}", "\u{308}", "\u{1}"]),
2015 ("\u{1}\u{300}", &["\u{1}", "\u{300}"]),
2016 ("\u{1}\u{308}\u{300}", &["\u{1}", "\u{308}\u{300}"]),
2017 ("\u{1}\u{903}", &["\u{1}", "\u{903}"]),
2018 ("\u{1}\u{1100}", &["\u{1}", "\u{1100}"]),
2019 ("\u{1}\u{308}\u{1100}", &["\u{1}", "\u{308}", "\u{1100}"]),
2020 ("\u{1}\u{1160}", &["\u{1}", "\u{1160}"]),
2021 ("\u{1}\u{308}\u{1160}", &["\u{1}", "\u{308}", "\u{1160}"]),
2022 ("\u{1}\u{11A8}", &["\u{1}", "\u{11A8}"]),
2023 ("\u{1}\u{308}\u{11A8}", &["\u{1}", "\u{308}", "\u{11A8}"]),
2024 ("\u{1}\u{AC00}", &["\u{1}", "\u{AC00}"]),
2025 ("\u{1}\u{308}\u{AC00}", &["\u{1}", "\u{308}", "\u{AC00}"]),
2026 ("\u{1}\u{AC01}", &["\u{1}", "\u{AC01}"]),
2027 ("\u{1}\u{308}\u{AC01}", &["\u{1}", "\u{308}", "\u{AC01}"]),
2028 ("\u{1}\u{1F1E6}", &["\u{1}", "\u{1F1E6}"]),
2029 ("\u{1}\u{308}\u{1F1E6}", &["\u{1}", "\u{308}", "\u{1F1E6}"]),
2030 ("\u{1}\u{378}", &["\u{1}", "\u{378}"]),
2031 ("\u{1}\u{308}\u{378}", &["\u{1}", "\u{308}", "\u{378}"]),
2032 ("\u{300}\u{20}", &["\u{300}", "\u{20}"]),
2033 ("\u{300}\u{308}\u{20}", &["\u{300}\u{308}", "\u{20}"]),
2034 ("\u{300}\u{D}", &["\u{300}", "\u{D}"]),
2035 ("\u{300}\u{308}\u{D}", &["\u{300}\u{308}", "\u{D}"]),
2036 ("\u{300}\u{A}", &["\u{300}", "\u{A}"]),
2037 ("\u{300}\u{308}\u{A}", &["\u{300}\u{308}", "\u{A}"]),
2038 ("\u{300}\u{1}", &["\u{300}", "\u{1}"]),
2039 ("\u{300}\u{308}\u{1}", &["\u{300}\u{308}", "\u{1}"]),
2040 ("\u{300}\u{300}", &["\u{300}\u{300}"]),
2041 ("\u{300}\u{308}\u{300}", &["\u{300}\u{308}\u{300}"]),
2042 ("\u{300}\u{1100}", &["\u{300}", "\u{1100}"]),
2043 ("\u{300}\u{308}\u{1100}", &["\u{300}\u{308}", "\u{1100}"]),
2044 ("\u{300}\u{1160}", &["\u{300}", "\u{1160}"]),
2045 ("\u{300}\u{308}\u{1160}", &["\u{300}\u{308}", "\u{1160}"]),
2046 ("\u{300}\u{11A8}", &["\u{300}", "\u{11A8}"]),
2047 ("\u{300}\u{308}\u{11A8}", &["\u{300}\u{308}", "\u{11A8}"]),
2048 ("\u{300}\u{AC00}", &["\u{300}", "\u{AC00}"]),
2049 ("\u{300}\u{308}\u{AC00}", &["\u{300}\u{308}", "\u{AC00}"]),
2050 ("\u{300}\u{AC01}", &["\u{300}", "\u{AC01}"]),
2051 ("\u{300}\u{308}\u{AC01}", &["\u{300}\u{308}", "\u{AC01}"]),
2052 ("\u{300}\u{1F1E6}", &["\u{300}", "\u{1F1E6}"]),
2053 ("\u{300}\u{308}\u{1F1E6}", &["\u{300}\u{308}", "\u{1F1E6}"]),
2054 ("\u{300}\u{378}", &["\u{300}", "\u{378}"]),
2055 ("\u{300}\u{308}\u{378}", &["\u{300}\u{308}", "\u{378}"]),
2056 ("\u{903}\u{20}", &["\u{903}", "\u{20}"]),
2057 ("\u{903}\u{308}\u{20}", &["\u{903}\u{308}", "\u{20}"]),
2058 ("\u{903}\u{D}", &["\u{903}", "\u{D}"]),
2059 ("\u{903}\u{308}\u{D}", &["\u{903}\u{308}", "\u{D}"]),
2060 ("\u{903}\u{A}", &["\u{903}", "\u{A}"]),
2061 ("\u{903}\u{308}\u{A}", &["\u{903}\u{308}", "\u{A}"]),
2062 ("\u{903}\u{1}", &["\u{903}", "\u{1}"]),
2063 ("\u{903}\u{308}\u{1}", &["\u{903}\u{308}", "\u{1}"]),
2064 ("\u{903}\u{300}", &["\u{903}\u{300}"]),
2065 ("\u{903}\u{308}\u{300}", &["\u{903}\u{308}\u{300}"]),
2066 ("\u{903}\u{1100}", &["\u{903}", "\u{1100}"]),
2067 ("\u{903}\u{308}\u{1100}", &["\u{903}\u{308}", "\u{1100}"]),
2068 ("\u{903}\u{1160}", &["\u{903}", "\u{1160}"]),
2069 ("\u{903}\u{308}\u{1160}", &["\u{903}\u{308}", "\u{1160}"]),
2070 ("\u{903}\u{11A8}", &["\u{903}", "\u{11A8}"]),
2071 ("\u{903}\u{308}\u{11A8}", &["\u{903}\u{308}", "\u{11A8}"]),
2072 ("\u{903}\u{AC00}", &["\u{903}", "\u{AC00}"]),
2073 ("\u{903}\u{308}\u{AC00}", &["\u{903}\u{308}", "\u{AC00}"]),
2074 ("\u{903}\u{AC01}", &["\u{903}", "\u{AC01}"]),
2075 ("\u{903}\u{308}\u{AC01}", &["\u{903}\u{308}", "\u{AC01}"]),
2076 ("\u{903}\u{1F1E6}", &["\u{903}", "\u{1F1E6}"]),
2077 ("\u{903}\u{308}\u{1F1E6}", &["\u{903}\u{308}", "\u{1F1E6}"]),
2078 ("\u{903}\u{378}", &["\u{903}", "\u{378}"]),
2079 ("\u{903}\u{308}\u{378}", &["\u{903}\u{308}", "\u{378}"]),
2080 ("\u{1100}\u{20}", &["\u{1100}", "\u{20}"]),
2081 ("\u{1100}\u{308}\u{20}", &["\u{1100}\u{308}", "\u{20}"]),
2082 ("\u{1100}\u{D}", &["\u{1100}", "\u{D}"]),
2083 ("\u{1100}\u{308}\u{D}", &["\u{1100}\u{308}", "\u{D}"]),
2084 ("\u{1100}\u{A}", &["\u{1100}", "\u{A}"]),
2085 ("\u{1100}\u{308}\u{A}", &["\u{1100}\u{308}", "\u{A}"]),
2086 ("\u{1100}\u{1}", &["\u{1100}", "\u{1}"]),
2087 ("\u{1100}\u{308}\u{1}", &["\u{1100}\u{308}", "\u{1}"]),
2088 ("\u{1100}\u{300}", &["\u{1100}\u{300}"]),
2089 ("\u{1100}\u{308}\u{300}", &["\u{1100}\u{308}\u{300}"]),
2090 ("\u{1100}\u{1100}", &["\u{1100}\u{1100}"]),
2091 ("\u{1100}\u{308}\u{1100}", &["\u{1100}\u{308}", "\u{1100}"]),
2092 ("\u{1100}\u{1160}", &["\u{1100}\u{1160}"]),
2093 ("\u{1100}\u{308}\u{1160}", &["\u{1100}\u{308}", "\u{1160}"]),
2094 ("\u{1100}\u{11A8}", &["\u{1100}", "\u{11A8}"]),
2095 ("\u{1100}\u{308}\u{11A8}", &["\u{1100}\u{308}", "\u{11A8}"]),
2096 ("\u{1100}\u{AC00}", &["\u{1100}\u{AC00}"]),
2097 ("\u{1100}\u{308}\u{AC00}", &["\u{1100}\u{308}", "\u{AC00}"]),
2098 ("\u{1100}\u{AC01}", &["\u{1100}\u{AC01}"]),
2099 ("\u{1100}\u{308}\u{AC01}", &["\u{1100}\u{308}", "\u{AC01}"]),
2100 ("\u{1100}\u{1F1E6}", &["\u{1100}", "\u{1F1E6}"]),
2101 ("\u{1100}\u{308}\u{1F1E6}", &["\u{1100}\u{308}", "\u{1F1E6}"]),
2102 ("\u{1100}\u{378}", &["\u{1100}", "\u{378}"]),
2103 ("\u{1100}\u{308}\u{378}", &["\u{1100}\u{308}", "\u{378}"]),
2104 ("\u{1160}\u{20}", &["\u{1160}", "\u{20}"]),
2105 ("\u{1160}\u{308}\u{20}", &["\u{1160}\u{308}", "\u{20}"]),
2106 ("\u{1160}\u{D}", &["\u{1160}", "\u{D}"]),
2107 ("\u{1160}\u{308}\u{D}", &["\u{1160}\u{308}", "\u{D}"]),
2108 ("\u{1160}\u{A}", &["\u{1160}", "\u{A}"]),
2109 ("\u{1160}\u{308}\u{A}", &["\u{1160}\u{308}", "\u{A}"]),
2110 ("\u{1160}\u{1}", &["\u{1160}", "\u{1}"]),
2111 ("\u{1160}\u{308}\u{1}", &["\u{1160}\u{308}", "\u{1}"]),
2112 ("\u{1160}\u{300}", &["\u{1160}\u{300}"]),
2113 ("\u{1160}\u{308}\u{300}", &["\u{1160}\u{308}\u{300}"]),
2114 ("\u{1160}\u{1100}", &["\u{1160}", "\u{1100}"]),
2115 ("\u{1160}\u{308}\u{1100}", &["\u{1160}\u{308}", "\u{1100}"]),
2116 ("\u{1160}\u{1160}", &["\u{1160}\u{1160}"]),
2117 ("\u{1160}\u{308}\u{1160}", &["\u{1160}\u{308}", "\u{1160}"]),
2118 ("\u{1160}\u{11A8}", &["\u{1160}\u{11A8}"]),
2119 ("\u{1160}\u{308}\u{11A8}", &["\u{1160}\u{308}", "\u{11A8}"]),
2120 ("\u{1160}\u{AC00}", &["\u{1160}", "\u{AC00}"]),
2121 ("\u{1160}\u{308}\u{AC00}", &["\u{1160}\u{308}", "\u{AC00}"]),
2122 ("\u{1160}\u{AC01}", &["\u{1160}", "\u{AC01}"]),
2123 ("\u{1160}\u{308}\u{AC01}", &["\u{1160}\u{308}", "\u{AC01}"]),
2124 ("\u{1160}\u{1F1E6}", &["\u{1160}", "\u{1F1E6}"]),
2125 ("\u{1160}\u{308}\u{1F1E6}", &["\u{1160}\u{308}", "\u{1F1E6}"]),
2126 ("\u{1160}\u{378}", &["\u{1160}", "\u{378}"]),
2127 ("\u{1160}\u{308}\u{378}", &["\u{1160}\u{308}", "\u{378}"]),
2128 ("\u{11A8}\u{20}", &["\u{11A8}", "\u{20}"]),
2129 ("\u{11A8}\u{308}\u{20}", &["\u{11A8}\u{308}", "\u{20}"]),
2130 ("\u{11A8}\u{D}", &["\u{11A8}", "\u{D}"]),
2131 ("\u{11A8}\u{308}\u{D}", &["\u{11A8}\u{308}", "\u{D}"]),
2132 ("\u{11A8}\u{A}", &["\u{11A8}", "\u{A}"]),
2133 ("\u{11A8}\u{308}\u{A}", &["\u{11A8}\u{308}", "\u{A}"]),
2134 ("\u{11A8}\u{1}", &["\u{11A8}", "\u{1}"]),
2135 ("\u{11A8}\u{308}\u{1}", &["\u{11A8}\u{308}", "\u{1}"]),
2136 ("\u{11A8}\u{300}", &["\u{11A8}\u{300}"]),
2137 ("\u{11A8}\u{308}\u{300}", &["\u{11A8}\u{308}\u{300}"]),
2138 ("\u{11A8}\u{1100}", &["\u{11A8}", "\u{1100}"]),
2139 ("\u{11A8}\u{308}\u{1100}", &["\u{11A8}\u{308}", "\u{1100}"]),
2140 ("\u{11A8}\u{1160}", &["\u{11A8}", "\u{1160}"]),
2141 ("\u{11A8}\u{308}\u{1160}", &["\u{11A8}\u{308}", "\u{1160}"]),
2142 ("\u{11A8}\u{11A8}", &["\u{11A8}\u{11A8}"]),
2143 ("\u{11A8}\u{308}\u{11A8}", &["\u{11A8}\u{308}", "\u{11A8}"]),
2144 ("\u{11A8}\u{AC00}", &["\u{11A8}", "\u{AC00}"]),
2145 ("\u{11A8}\u{308}\u{AC00}", &["\u{11A8}\u{308}", "\u{AC00}"]),
2146 ("\u{11A8}\u{AC01}", &["\u{11A8}", "\u{AC01}"]),
2147 ("\u{11A8}\u{308}\u{AC01}", &["\u{11A8}\u{308}", "\u{AC01}"]),
2148 ("\u{11A8}\u{1F1E6}", &["\u{11A8}", "\u{1F1E6}"]),
2149 ("\u{11A8}\u{308}\u{1F1E6}", &["\u{11A8}\u{308}", "\u{1F1E6}"]),
2150 ("\u{11A8}\u{378}", &["\u{11A8}", "\u{378}"]),
2151 ("\u{11A8}\u{308}\u{378}", &["\u{11A8}\u{308}", "\u{378}"]),
2152 ("\u{AC00}\u{20}", &["\u{AC00}", "\u{20}"]),
2153 ("\u{AC00}\u{308}\u{20}", &["\u{AC00}\u{308}", "\u{20}"]),
2154 ("\u{AC00}\u{D}", &["\u{AC00}", "\u{D}"]),
2155 ("\u{AC00}\u{308}\u{D}", &["\u{AC00}\u{308}", "\u{D}"]),
2156 ("\u{AC00}\u{A}", &["\u{AC00}", "\u{A}"]),
2157 ("\u{AC00}\u{308}\u{A}", &["\u{AC00}\u{308}", "\u{A}"]),
2158 ("\u{AC00}\u{1}", &["\u{AC00}", "\u{1}"]),
2159 ("\u{AC00}\u{308}\u{1}", &["\u{AC00}\u{308}", "\u{1}"]),
2160 ("\u{AC00}\u{300}", &["\u{AC00}\u{300}"]),
2161 ("\u{AC00}\u{308}\u{300}", &["\u{AC00}\u{308}\u{300}"]),
2162 ("\u{AC00}\u{1100}", &["\u{AC00}", "\u{1100}"]),
2163 ("\u{AC00}\u{308}\u{1100}", &["\u{AC00}\u{308}", "\u{1100}"]),
2164 ("\u{AC00}\u{1160}", &["\u{AC00}\u{1160}"]),
2165 ("\u{AC00}\u{308}\u{1160}", &["\u{AC00}\u{308}", "\u{1160}"]),
2166 ("\u{AC00}\u{11A8}", &["\u{AC00}\u{11A8}"]),
2167 ("\u{AC00}\u{308}\u{11A8}", &["\u{AC00}\u{308}", "\u{11A8}"]),
2168 ("\u{AC00}\u{AC00}", &["\u{AC00}", "\u{AC00}"]),
2169 ("\u{AC00}\u{308}\u{AC00}", &["\u{AC00}\u{308}", "\u{AC00}"]),
2170 ("\u{AC00}\u{AC01}", &["\u{AC00}", "\u{AC01}"]),
2171 ("\u{AC00}\u{308}\u{AC01}", &["\u{AC00}\u{308}", "\u{AC01}"]),
2172 ("\u{AC00}\u{1F1E6}", &["\u{AC00}", "\u{1F1E6}"]),
2173 ("\u{AC00}\u{308}\u{1F1E6}", &["\u{AC00}\u{308}", "\u{1F1E6}"]),
2174 ("\u{AC00}\u{378}", &["\u{AC00}", "\u{378}"]),
2175 ("\u{AC00}\u{308}\u{378}", &["\u{AC00}\u{308}", "\u{378}"]),
2176 ("\u{AC01}\u{20}", &["\u{AC01}", "\u{20}"]),
2177 ("\u{AC01}\u{308}\u{20}", &["\u{AC01}\u{308}", "\u{20}"]),
2178 ("\u{AC01}\u{D}", &["\u{AC01}", "\u{D}"]),
2179 ("\u{AC01}\u{308}\u{D}", &["\u{AC01}\u{308}", "\u{D}"]),
2180 ("\u{AC01}\u{A}", &["\u{AC01}", "\u{A}"]),
2181 ("\u{AC01}\u{308}\u{A}", &["\u{AC01}\u{308}", "\u{A}"]),
2182 ("\u{AC01}\u{1}", &["\u{AC01}", "\u{1}"]),
2183 ("\u{AC01}\u{308}\u{1}", &["\u{AC01}\u{308}", "\u{1}"]),
2184 ("\u{AC01}\u{300}", &["\u{AC01}\u{300}"]),
2185 ("\u{AC01}\u{308}\u{300}", &["\u{AC01}\u{308}\u{300}"]),
2186 ("\u{AC01}\u{1100}", &["\u{AC01}", "\u{1100}"]),
2187 ("\u{AC01}\u{308}\u{1100}", &["\u{AC01}\u{308}", "\u{1100}"]),
2188 ("\u{AC01}\u{1160}", &["\u{AC01}", "\u{1160}"]),
2189 ("\u{AC01}\u{308}\u{1160}", &["\u{AC01}\u{308}", "\u{1160}"]),
2190 ("\u{AC01}\u{11A8}", &["\u{AC01}\u{11A8}"]),
2191 ("\u{AC01}\u{308}\u{11A8}", &["\u{AC01}\u{308}", "\u{11A8}"]),
2192 ("\u{AC01}\u{AC00}", &["\u{AC01}", "\u{AC00}"]),
2193 ("\u{AC01}\u{308}\u{AC00}", &["\u{AC01}\u{308}", "\u{AC00}"]),
2194 ("\u{AC01}\u{AC01}", &["\u{AC01}", "\u{AC01}"]),
2195 ("\u{AC01}\u{308}\u{AC01}", &["\u{AC01}\u{308}", "\u{AC01}"]),
2196 ("\u{AC01}\u{1F1E6}", &["\u{AC01}", "\u{1F1E6}"]),
2197 ("\u{AC01}\u{308}\u{1F1E6}", &["\u{AC01}\u{308}", "\u{1F1E6}"]),
2198 ("\u{AC01}\u{378}", &["\u{AC01}", "\u{378}"]),
2199 ("\u{AC01}\u{308}\u{378}", &["\u{AC01}\u{308}", "\u{378}"]),
2200 ("\u{1F1E6}\u{20}", &["\u{1F1E6}", "\u{20}"]),
2201 ("\u{1F1E6}\u{308}\u{20}", &["\u{1F1E6}\u{308}", "\u{20}"]),
2202 ("\u{1F1E6}\u{D}", &["\u{1F1E6}", "\u{D}"]),
2203 ("\u{1F1E6}\u{308}\u{D}", &["\u{1F1E6}\u{308}", "\u{D}"]),
2204 ("\u{1F1E6}\u{A}", &["\u{1F1E6}", "\u{A}"]),
2205 ("\u{1F1E6}\u{308}\u{A}", &["\u{1F1E6}\u{308}", "\u{A}"]),
2206 ("\u{1F1E6}\u{1}", &["\u{1F1E6}", "\u{1}"]),
2207 ("\u{1F1E6}\u{308}\u{1}", &["\u{1F1E6}\u{308}", "\u{1}"]),
2208 ("\u{1F1E6}\u{300}", &["\u{1F1E6}\u{300}"]),
2209 ("\u{1F1E6}\u{308}\u{300}", &["\u{1F1E6}\u{308}\u{300}"]),
2210 ("\u{1F1E6}\u{1100}", &["\u{1F1E6}", "\u{1100}"]),
2211 ("\u{1F1E6}\u{308}\u{1100}", &["\u{1F1E6}\u{308}", "\u{1100}"]),
2212 ("\u{1F1E6}\u{1160}", &["\u{1F1E6}", "\u{1160}"]),
2213 ("\u{1F1E6}\u{308}\u{1160}", &["\u{1F1E6}\u{308}", "\u{1160}"]),
2214 ("\u{1F1E6}\u{11A8}", &["\u{1F1E6}", "\u{11A8}"]),
2215 ("\u{1F1E6}\u{308}\u{11A8}", &["\u{1F1E6}\u{308}", "\u{11A8}"]),
2216 ("\u{1F1E6}\u{AC00}", &["\u{1F1E6}", "\u{AC00}"]),
2217 ("\u{1F1E6}\u{308}\u{AC00}", &["\u{1F1E6}\u{308}", "\u{AC00}"]),
2218 ("\u{1F1E6}\u{AC01}", &["\u{1F1E6}", "\u{AC01}"]),
2219 ("\u{1F1E6}\u{308}\u{AC01}", &["\u{1F1E6}\u{308}", "\u{AC01}"]),
2220 ("\u{1F1E6}\u{1F1E6}", &["\u{1F1E6}\u{1F1E6}"]),
2221 ("\u{1F1E6}\u{308}\u{1F1E6}", &["\u{1F1E6}\u{308}", "\u{1F1E6}"]),
2222 ("\u{1F1E6}\u{378}", &["\u{1F1E6}", "\u{378}"]),
2223 ("\u{1F1E6}\u{308}\u{378}", &["\u{1F1E6}\u{308}", "\u{378}"]),
2224 ("\u{378}\u{20}", &["\u{378}", "\u{20}"]),
2225 ("\u{378}\u{308}\u{20}", &["\u{378}\u{308}", "\u{20}"]),
2226 ("\u{378}\u{D}", &["\u{378}", "\u{D}"]),
2227 ("\u{378}\u{308}\u{D}", &["\u{378}\u{308}", "\u{D}"]),
2228 ("\u{378}\u{A}", &["\u{378}", "\u{A}"]),
2229 ("\u{378}\u{308}\u{A}", &["\u{378}\u{308}", "\u{A}"]),
2230 ("\u{378}\u{1}", &["\u{378}", "\u{1}"]),
2231 ("\u{378}\u{308}\u{1}", &["\u{378}\u{308}", "\u{1}"]),
2232 ("\u{378}\u{300}", &["\u{378}\u{300}"]),
2233 ("\u{378}\u{308}\u{300}", &["\u{378}\u{308}\u{300}"]),
2234 ("\u{378}\u{1100}", &["\u{378}", "\u{1100}"]),
2235 ("\u{378}\u{308}\u{1100}", &["\u{378}\u{308}", "\u{1100}"]),
2236 ("\u{378}\u{1160}", &["\u{378}", "\u{1160}"]),
2237 ("\u{378}\u{308}\u{1160}", &["\u{378}\u{308}", "\u{1160}"]),
2238 ("\u{378}\u{11A8}", &["\u{378}", "\u{11A8}"]),
2239 ("\u{378}\u{308}\u{11A8}", &["\u{378}\u{308}", "\u{11A8}"]),
2240 ("\u{378}\u{AC00}", &["\u{378}", "\u{AC00}"]),
2241 ("\u{378}\u{308}\u{AC00}", &["\u{378}\u{308}", "\u{AC00}"]),
2242 ("\u{378}\u{AC01}", &["\u{378}", "\u{AC01}"]),
2243 ("\u{378}\u{308}\u{AC01}", &["\u{378}\u{308}", "\u{AC01}"]),
2244 ("\u{378}\u{1F1E6}", &["\u{378}", "\u{1F1E6}"]),
2245 ("\u{378}\u{308}\u{1F1E6}", &["\u{378}\u{308}", "\u{1F1E6}"]),
2246 ("\u{378}\u{378}", &["\u{378}", "\u{378}"]),
2247 ("\u{378}\u{308}\u{378}", &["\u{378}\u{308}", "\u{378}"]),
2248 ("\u{61}\u{1F1E6}\u{62}", &["\u{61}", "\u{1F1E6}", "\u{62}"]),
2249 ("\u{1F1F7}\u{1F1FA}", &["\u{1F1F7}\u{1F1FA}"]),
2250 ("\u{1F1F7}\u{1F1FA}\u{1F1F8}", &["\u{1F1F7}\u{1F1FA}\u{1F1F8}"]),
2251 ("\u{1F1F7}\u{1F1FA}\u{1F1F8}\u{1F1EA}",
2252 &["\u{1F1F7}\u{1F1FA}\u{1F1F8}\u{1F1EA}"]),
2253 ("\u{1F1F7}\u{1F1FA}\u{200B}\u{1F1F8}\u{1F1EA}",
2254 &["\u{1F1F7}\u{1F1FA}", "\u{200B}", "\u{1F1F8}\u{1F1EA}"]),
2255 ("\u{1F1E6}\u{1F1E7}\u{1F1E8}", &["\u{1F1E6}\u{1F1E7}\u{1F1E8}"]),
2256 ("\u{1F1E6}\u{200D}\u{1F1E7}\u{1F1E8}", &["\u{1F1E6}\u{200D}",
2257 "\u{1F1E7}\u{1F1E8}"]),
2258 ("\u{1F1E6}\u{1F1E7}\u{200D}\u{1F1E8}",
2259 &["\u{1F1E6}\u{1F1E7}\u{200D}", "\u{1F1E8}"]),
2260 ("\u{20}\u{200D}\u{646}", &["\u{20}\u{200D}", "\u{646}"]),
2261 ("\u{646}\u{200D}\u{20}", &["\u{646}\u{200D}", "\u{20}"]),
2264 let test_diff: [(_, &[_], &[_]), .. 23] = [
2265 ("\u{20}\u{903}", &["\u{20}\u{903}"], &["\u{20}", "\u{903}"]), ("\u{20}\u{308}\u{903}",
2266 &["\u{20}\u{308}\u{903}"], &["\u{20}\u{308}", "\u{903}"]), ("\u{D}\u{308}\u{903}",
2267 &["\u{D}", "\u{308}\u{903}"], &["\u{D}", "\u{308}", "\u{903}"]), ("\u{A}\u{308}\u{903}",
2268 &["\u{A}", "\u{308}\u{903}"], &["\u{A}", "\u{308}", "\u{903}"]), ("\u{1}\u{308}\u{903}",
2269 &["\u{1}", "\u{308}\u{903}"], &["\u{1}", "\u{308}", "\u{903}"]), ("\u{300}\u{903}",
2270 &["\u{300}\u{903}"], &["\u{300}", "\u{903}"]), ("\u{300}\u{308}\u{903}",
2271 &["\u{300}\u{308}\u{903}"], &["\u{300}\u{308}", "\u{903}"]), ("\u{903}\u{903}",
2272 &["\u{903}\u{903}"], &["\u{903}", "\u{903}"]), ("\u{903}\u{308}\u{903}",
2273 &["\u{903}\u{308}\u{903}"], &["\u{903}\u{308}", "\u{903}"]), ("\u{1100}\u{903}",
2274 &["\u{1100}\u{903}"], &["\u{1100}", "\u{903}"]), ("\u{1100}\u{308}\u{903}",
2275 &["\u{1100}\u{308}\u{903}"], &["\u{1100}\u{308}", "\u{903}"]), ("\u{1160}\u{903}",
2276 &["\u{1160}\u{903}"], &["\u{1160}", "\u{903}"]), ("\u{1160}\u{308}\u{903}",
2277 &["\u{1160}\u{308}\u{903}"], &["\u{1160}\u{308}", "\u{903}"]), ("\u{11A8}\u{903}",
2278 &["\u{11A8}\u{903}"], &["\u{11A8}", "\u{903}"]), ("\u{11A8}\u{308}\u{903}",
2279 &["\u{11A8}\u{308}\u{903}"], &["\u{11A8}\u{308}", "\u{903}"]), ("\u{AC00}\u{903}",
2280 &["\u{AC00}\u{903}"], &["\u{AC00}", "\u{903}"]), ("\u{AC00}\u{308}\u{903}",
2281 &["\u{AC00}\u{308}\u{903}"], &["\u{AC00}\u{308}", "\u{903}"]), ("\u{AC01}\u{903}",
2282 &["\u{AC01}\u{903}"], &["\u{AC01}", "\u{903}"]), ("\u{AC01}\u{308}\u{903}",
2283 &["\u{AC01}\u{308}\u{903}"], &["\u{AC01}\u{308}", "\u{903}"]), ("\u{1F1E6}\u{903}",
2284 &["\u{1F1E6}\u{903}"], &["\u{1F1E6}", "\u{903}"]), ("\u{1F1E6}\u{308}\u{903}",
2285 &["\u{1F1E6}\u{308}\u{903}"], &["\u{1F1E6}\u{308}", "\u{903}"]), ("\u{378}\u{903}",
2286 &["\u{378}\u{903}"], &["\u{378}", "\u{903}"]), ("\u{378}\u{308}\u{903}",
2287 &["\u{378}\u{308}\u{903}"], &["\u{378}\u{308}", "\u{903}"]),
2290 for &(s, g) in test_same.iter() {
2291 // test forward iterator
2292 assert!(order::equals(s.graphemes(true), g.iter().map(|&x| x)));
2293 assert!(order::equals(s.graphemes(false), g.iter().map(|&x| x)));
2295 // test reverse iterator
2296 assert!(order::equals(s.graphemes(true).rev(), g.iter().rev().map(|&x| x)));
2297 assert!(order::equals(s.graphemes(false).rev(), g.iter().rev().map(|&x| x)));
2300 for &(s, gt, gf) in test_diff.iter() {
2301 // test forward iterator
2302 assert!(order::equals(s.graphemes(true), gt.iter().map(|&x| x)));
2303 assert!(order::equals(s.graphemes(false), gf.iter().map(|&x| x)));
2305 // test reverse iterator
2306 assert!(order::equals(s.graphemes(true).rev(), gt.iter().rev().map(|&x| x)));
2307 assert!(order::equals(s.graphemes(false).rev(), gf.iter().rev().map(|&x| x)));
2310 // test the indices iterators
2311 let s = "a̐éö̲\r\n";
2312 let gr_inds = s.grapheme_indices(true).collect::<Vec<(uint, &str)>>();
2313 let b: &[_] = &[(0u, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
2314 assert_eq!(gr_inds, b);
2315 let gr_inds = s.grapheme_indices(true).rev().collect::<Vec<(uint, &str)>>();
2316 let b: &[_] = &[(11, "\r\n"), (6, "ö̲"), (3, "é"), (0u, "a̐")];
2317 assert_eq!(gr_inds, b);
2318 let mut gr_inds_iter = s.grapheme_indices(true);
2320 let gr_inds = gr_inds_iter.by_ref();
2321 let e1 = gr_inds.size_hint();
2322 assert_eq!(e1, (1, Some(13)));
2323 let c = gr_inds.count();
2326 let e2 = gr_inds_iter.size_hint();
2327 assert_eq!(e2, (0, Some(0)));
2329 // make sure the reverse iterator does the right thing with "\n" at beginning of string
2331 let gr = s.graphemes(true).rev().collect::<Vec<&str>>();
2332 let b: &[_] = &["\r", "\r\n", "\n"];
2337 fn test_split_strator() {
2338 fn t(s: &str, sep: &str, u: &[&str]) {
2339 let v: Vec<&str> = s.split_str(sep).collect();
2342 t("--1233345--", "12345", &["--1233345--"]);
2343 t("abc::hello::there", "::", &["abc", "hello", "there"]);
2344 t("::hello::there", "::", &["", "hello", "there"]);
2345 t("hello::there::", "::", &["hello", "there", ""]);
2346 t("::hello::there::", "::", &["", "hello", "there", ""]);
2347 t("ประเทศไทย中华Việt Nam", "中华", &["ประเทศไทย", "Việt Nam"]);
2348 t("zzXXXzzYYYzz", "zz", &["", "XXX", "YYY", ""]);
2349 t("zzXXXzYYYz", "XXX", &["zz", "zYYYz"]);
2350 t(".XXX.YYY.", ".", &["", "XXX", "YYY", ""]);
2352 t("zz", "zz", &["",""]);
2353 t("ok", "z", &["ok"]);
2354 t("zzz", "zz", &["","z"]);
2355 t("zzzzz", "zz", &["","","z"]);
2359 fn test_str_default() {
2360 use std::default::Default;
2361 fn t<S: Default + Str>() {
2362 let s: S = Default::default();
2363 assert_eq!(s.as_slice(), "");
2371 fn test_str_container() {
2372 fn sum_len(v: &[&str]) -> uint {
2373 v.iter().map(|x| x.len()).sum()
2376 let s = String::from_str("01234");
2377 assert_eq!(5, sum_len(&["012", "", "34"]));
2378 assert_eq!(5, sum_len(&[String::from_str("01").as_slice(),
2379 String::from_str("2").as_slice(),
2380 String::from_str("34").as_slice(),
2381 String::from_str("").as_slice()]));
2382 assert_eq!(5, sum_len(&[s.as_slice()]));
2386 fn test_str_from_utf8() {
2388 assert_eq!(from_utf8(xs), Some("hello"));
2390 let xs = "ศไทย中华Việt Nam".as_bytes();
2391 assert_eq!(from_utf8(xs), Some("ศไทย中华Việt Nam"));
2393 let xs = b"hello\xFF";
2394 assert_eq!(from_utf8(xs), None);
2398 fn test_maybe_owned_traits() {
2399 let s = Slice("abcde");
2400 assert_eq!(s.len(), 5);
2401 assert_eq!(s.as_slice(), "abcde");
2402 assert_eq!(String::from_str(s.as_slice()).as_slice(), "abcde");
2403 assert_eq!(format!("{}", s).as_slice(), "abcde");
2404 assert!(s.lt(&Owned(String::from_str("bcdef"))));
2405 assert_eq!(Slice(""), Default::default());
2407 let o = Owned(String::from_str("abcde"));
2408 assert_eq!(o.len(), 5);
2409 assert_eq!(o.as_slice(), "abcde");
2410 assert_eq!(String::from_str(o.as_slice()).as_slice(), "abcde");
2411 assert_eq!(format!("{}", o).as_slice(), "abcde");
2412 assert!(o.lt(&Slice("bcdef")));
2413 assert_eq!(Owned(String::from_str("")), Default::default());
2415 assert!(s.cmp(&o) == Equal);
2416 assert!(s.equiv(&o));
2418 assert!(o.cmp(&s) == Equal);
2419 assert!(o.equiv(&s));
2423 fn test_maybe_owned_methods() {
2424 let s = Slice("abcde");
2425 assert!(s.is_slice());
2426 assert!(!s.is_owned());
2428 let o = Owned(String::from_str("abcde"));
2429 assert!(!o.is_slice());
2430 assert!(o.is_owned());
2434 fn test_maybe_owned_clone() {
2435 assert_eq!(Owned(String::from_str("abcde")), Slice("abcde").clone());
2436 assert_eq!(Owned(String::from_str("abcde")), Owned(String::from_str("abcde")).clone());
2437 assert_eq!(Slice("abcde"), Slice("abcde").clone());
2438 assert_eq!(Slice("abcde"), Owned(String::from_str("abcde")).clone());
2442 fn test_maybe_owned_into_string() {
2443 assert_eq!(Slice("abcde").into_string(), String::from_str("abcde"));
2444 assert_eq!(Owned(String::from_str("abcde")).into_string(),
2445 String::from_str("abcde"));
2449 fn test_into_maybe_owned() {
2450 assert_eq!("abcde".into_maybe_owned(), Slice("abcde"));
2451 assert_eq!((String::from_str("abcde")).into_maybe_owned(), Slice("abcde"));
2452 assert_eq!("abcde".into_maybe_owned(), Owned(String::from_str("abcde")));
2453 assert_eq!((String::from_str("abcde")).into_maybe_owned(),
2454 Owned(String::from_str("abcde")));
2461 use test::black_box;
2463 use std::iter::{IteratorExt, DoubleEndedIteratorExt};
2464 use std::str::StrPrelude;
2465 use std::slice::SliceExt;
2468 fn char_iterator(b: &mut Bencher) {
2469 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2471 b.iter(|| s.chars().count());
2475 fn char_iterator_for(b: &mut Bencher) {
2476 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2479 for ch in s.chars() { black_box(ch) }
2484 fn char_iterator_ascii(b: &mut Bencher) {
2485 let s = "Mary had a little lamb, Little lamb
2486 Mary had a little lamb, Little lamb
2487 Mary had a little lamb, Little lamb
2488 Mary had a little lamb, Little lamb
2489 Mary had a little lamb, Little lamb
2490 Mary had a little lamb, Little lamb";
2492 b.iter(|| s.chars().count());
2496 fn char_iterator_rev(b: &mut Bencher) {
2497 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2499 b.iter(|| s.chars().rev().count());
2503 fn char_iterator_rev_for(b: &mut Bencher) {
2504 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2507 for ch in s.chars().rev() { black_box(ch) }
2512 fn char_indicesator(b: &mut Bencher) {
2513 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2514 let len = s.char_len();
2516 b.iter(|| assert_eq!(s.char_indices().count(), len));
2520 fn char_indicesator_rev(b: &mut Bencher) {
2521 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2522 let len = s.char_len();
2524 b.iter(|| assert_eq!(s.char_indices().rev().count(), len));
2528 fn split_unicode_ascii(b: &mut Bencher) {
2529 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2531 b.iter(|| assert_eq!(s.split('V').count(), 3));
2535 fn split_unicode_not_ascii(b: &mut Bencher) {
2536 struct NotAscii(char);
2537 impl CharEq for NotAscii {
2538 fn matches(&mut self, c: char) -> bool {
2539 let NotAscii(cc) = *self;
2542 fn only_ascii(&self) -> bool { false }
2544 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2546 b.iter(|| assert_eq!(s.split(NotAscii('V')).count(), 3));
2551 fn split_ascii(b: &mut Bencher) {
2552 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2553 let len = s.split(' ').count();
2555 b.iter(|| assert_eq!(s.split(' ').count(), len));
2559 fn split_not_ascii(b: &mut Bencher) {
2560 struct NotAscii(char);
2561 impl CharEq for NotAscii {
2563 fn matches(&mut self, c: char) -> bool {
2564 let NotAscii(cc) = *self;
2567 fn only_ascii(&self) -> bool { false }
2569 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2570 let len = s.split(' ').count();
2572 b.iter(|| assert_eq!(s.split(NotAscii(' ')).count(), len));
2576 fn split_extern_fn(b: &mut Bencher) {
2577 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2578 let len = s.split(' ').count();
2579 fn pred(c: char) -> bool { c == ' ' }
2581 b.iter(|| assert_eq!(s.split(pred).count(), len));
2585 fn split_closure(b: &mut Bencher) {
2586 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2587 let len = s.split(' ').count();
2589 b.iter(|| assert_eq!(s.split(|&: c: char| c == ' ').count(), len));
2593 fn split_slice(b: &mut Bencher) {
2594 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2595 let len = s.split(' ').count();
2597 let c: &[char] = &[' '];
2598 b.iter(|| assert_eq!(s.split(c).count(), len));
2602 fn is_utf8_100_ascii(b: &mut Bencher) {
2604 let s = b"Hello there, the quick brown fox jumped over the lazy dog! \
2605 Lorem ipsum dolor sit amet, consectetur. ";
2607 assert_eq!(100, s.len());
2614 fn is_utf8_100_multibyte(b: &mut Bencher) {
2615 let s = "𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰".as_bytes();
2616 assert_eq!(100, s.len());
2623 fn bench_connect(b: &mut Bencher) {
2624 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2626 let v = [s, s, s, s, s, s, s, s, s, s];
2628 assert_eq!(v.connect(sep).len(), s.len() * 10 + sep.len() * 9);
2633 fn bench_contains_short_short(b: &mut Bencher) {
2634 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2638 assert!(haystack.contains(needle));
2643 fn bench_contains_short_long(b: &mut Bencher) {
2645 Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \
2646 ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \
2647 eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \
2648 sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \
2649 tempus vel, gravida nec quam.
2651 In est dui, tincidunt sed tempus interdum, adipiscing laoreet ante. Etiam tempor, tellus quis \
2652 sagittis interdum, nulla purus mattis sem, quis auctor erat odio ac tellus. In nec nunc sit amet \
2653 diam volutpat molestie at sed ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan \
2654 lorem ac dignissim placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat. In vel \
2655 eleifend felis. Sed suscipit nulla lorem, sed mollis est sollicitudin et. Nam fermentum egestas \
2656 interdum. Curabitur ut nisi justo.
2658 Sed sollicitudin ipsum tellus, ut condimentum leo eleifend nec. Cras ut velit ante. Phasellus nec \
2659 mollis odio. Mauris molestie erat in arcu mattis, at aliquet dolor vehicula. Quisque malesuada \
2660 lectus sit amet nisi pretium, a condimentum ipsum porta. Morbi at dapibus diam. Praesent egestas \
2661 est sed risus elementum, eu rutrum metus ultrices. Etiam fermentum consectetur magna, id rutrum \
2662 felis accumsan a. Aliquam ut pellentesque libero. Sed mi nulla, lobortis eu tortor id, suscipit \
2663 ultricies neque. Morbi iaculis sit amet risus at iaculis. Praesent eget ligula quis turpis \
2664 feugiat suscipit vel non arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. \
2665 Aliquam sit amet placerat lorem.
2667 Cras a lacus vel ante posuere elementum. Nunc est leo, bibendum ut facilisis vel, bibendum at \
2668 mauris. Nullam adipiscing diam vel odio ornare, luctus adipiscing mi luctus. Nulla facilisi. \
2669 Mauris adipiscing bibendum neque, quis adipiscing lectus tempus et. Sed feugiat erat et nisl \
2670 lobortis pharetra. Donec vitae erat enim. Nullam sit amet felis et quam lacinia tincidunt. Aliquam \
2671 suscipit dapibus urna. Sed volutpat urna in magna pulvinar volutpat. Phasellus nec tellus ac diam \
2674 Nam lectus enim, dapibus non nisi tempor, consectetur convallis massa. Maecenas eleifend dictum \
2675 feugiat. Etiam quis mauris vel risus luctus mattis a a nunc. Nullam orci quam, imperdiet id \
2676 vehicula in, porttitor ut nibh. Duis sagittis adipiscing nisl vitae congue. Donec mollis risus eu \
2677 leo suscipit, varius porttitor nulla porta. Pellentesque ut sem nec nisi euismod vehicula. Nulla \
2678 malesuada sollicitudin quam eu fermentum.";
2679 let needle = "english";
2682 assert!(!haystack.contains(needle));
2687 fn bench_contains_bad_naive(b: &mut Bencher) {
2688 let haystack = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
2689 let needle = "aaaaaaaab";
2692 assert!(!haystack.contains(needle));
2697 fn bench_contains_equal(b: &mut Bencher) {
2698 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2699 let needle = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2702 assert!(haystack.contains(needle));