1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 // ignore-lexer-test FIXME #15679
13 //! Unicode string manipulation (`str` type)
17 //! Rust's string type is one of the core primitive types of the language. While
18 //! represented by the name `str`, the name `str` is not actually a valid type in
19 //! Rust. Each string must also be decorated with a pointer. `String` is used
20 //! for an owned string, so there is only one commonly-used `str` type in Rust:
23 //! `&str` is the borrowed string type. This type of string can only be created
24 //! from other strings, unless it is a static string (see below). As the word
25 //! "borrowed" implies, this type of string is owned elsewhere, and this string
26 //! cannot be moved out of.
28 //! As an example, here's some code that uses a string.
32 //! let borrowed_string = "This string is borrowed with the 'static lifetime";
36 //! From the example above, you can guess that Rust's string literals have the
37 //! `'static` lifetime. This is akin to C's concept of a static string.
38 //! More precisely, string literals are immutable views with a 'static lifetime
39 //! (otherwise known as the lifetime of the entire program), and thus have the
40 //! type `&'static str`.
44 //! Rust's string type, `str`, is a sequence of Unicode scalar values encoded as a
45 //! stream of UTF-8 bytes. All [strings](../../reference.html#literals) are
46 //! guaranteed to be validly encoded UTF-8 sequences. Additionally, strings are
47 //! not null-terminated and can thus contain null bytes.
49 //! The actual representation of strings have direct mappings to slices: `&str`
50 //! is the same as `&[u8]`.
52 #![doc(primitive = "str")]
54 use self::MaybeOwned::*;
55 use self::RecompositionState::*;
56 use self::DecompositionType::*;
58 use core::borrow::{BorrowFrom, Cow, ToOwned};
60 use core::clone::Clone;
61 use core::cmp::{Equiv, PartialEq, Eq, PartialOrd, Ord, Ordering};
63 use core::default::Default;
66 use core::iter::AdditiveIterator;
67 use core::iter::{mod, range, Iterator, IteratorExt};
68 use core::kinds::Sized;
70 use core::option::Option::{mod, Some, None};
71 use core::slice::AsSlice;
72 use core::str as core_str;
73 use unicode::str::{UnicodeStr, Utf16Encoder};
75 use ring_buf::RingBuf;
80 use slice::SliceConcatExt;
82 pub use core::str::{from_utf8, CharEq, Chars, CharIndices};
83 pub use core::str::{Bytes, CharSplits, is_utf8};
84 pub use core::str::{CharSplitsN, Lines, LinesAny, MatchIndices, StrSplits, SplitStr};
85 pub use core::str::{CharRange};
86 pub use core::str::{FromStr, from_str, Utf8Error};
87 pub use core::str::Str;
88 pub use core::str::{from_utf8_unchecked, from_c_str};
89 pub use unicode::str::{Words, Graphemes, GraphemeIndices};
90 pub use core::str::{Split, SplitTerminator};
91 pub use core::str::{SplitN, RSplitN};
94 Section: Creating a string
97 impl<S: Str> SliceConcatExt<str, String> for [S] {
98 fn concat(&self) -> String {
99 let s = self.as_slice();
102 return String::new();
105 // `len` calculation may overflow but push_str will check boundaries
106 let len = s.iter().map(|s| s.as_slice().len()).sum();
107 let mut result = String::with_capacity(len);
110 result.push_str(s.as_slice())
116 fn connect(&self, sep: &str) -> String {
117 let s = self.as_slice();
120 return String::new();
128 // this is wrong without the guarantee that `self` is non-empty
129 // `len` calculation may overflow but push_str but will check boundaries
130 let len = sep.len() * (s.len() - 1)
131 + s.iter().map(|s| s.as_slice().len()).sum();
132 let mut result = String::with_capacity(len);
133 let mut first = true;
139 result.push_str(sep);
141 result.push_str(s.as_slice());
151 // Helper functions used for Unicode normalization
152 fn canonical_sort(comb: &mut [(char, u8)]) {
153 let len = comb.len();
154 for i in range(0, len) {
155 let mut swapped = false;
156 for j in range(1, len-i) {
157 let class_a = comb[j-1].1;
158 let class_b = comb[j].1;
159 if class_a != 0 && class_b != 0 && class_a > class_b {
164 if !swapped { break; }
169 enum DecompositionType {
174 /// External iterator for a string's decomposition's characters.
175 /// Use with the `std::iter` module.
177 pub struct Decompositions<'a> {
178 kind: DecompositionType,
180 buffer: Vec<(char, u8)>,
184 impl<'a> Iterator<char> for Decompositions<'a> {
186 fn next(&mut self) -> Option<char> {
187 match self.buffer.first() {
190 self.buffer.remove(0);
193 Some(&(c, _)) if self.sorted => {
194 self.buffer.remove(0);
197 _ => self.sorted = false
201 for ch in self.iter {
202 let buffer = &mut self.buffer;
203 let sorted = &mut self.sorted;
207 unicode::char::canonical_combining_class(d);
208 if class == 0 && !*sorted {
209 canonical_sort(buffer.as_mut_slice());
212 buffer.push((d, class));
216 unicode::char::decompose_canonical(ch, callback)
219 unicode::char::decompose_compatible(ch, callback)
230 canonical_sort(self.buffer.as_mut_slice());
234 if self.buffer.is_empty() {
237 match self.buffer.remove(0) {
247 fn size_hint(&self) -> (uint, Option<uint>) {
248 let (lower, _) = self.iter.size_hint();
254 enum RecompositionState {
260 /// External iterator for a string's recomposition's characters.
261 /// Use with the `std::iter` module.
263 pub struct Recompositions<'a> {
264 iter: Decompositions<'a>,
265 state: RecompositionState,
266 buffer: RingBuf<char>,
267 composee: Option<char>,
271 impl<'a> Iterator<char> for Recompositions<'a> {
273 fn next(&mut self) -> Option<char> {
277 for ch in self.iter {
278 let ch_class = unicode::char::canonical_combining_class(ch);
279 if self.composee.is_none() {
283 self.composee = Some(ch);
286 let k = self.composee.clone().unwrap();
288 match self.last_ccc {
290 match unicode::char::compose(k, ch) {
292 self.composee = Some(r);
297 self.composee = Some(ch);
300 self.buffer.push_back(ch);
301 self.last_ccc = Some(ch_class);
306 if l_class >= ch_class {
307 // `ch` is blocked from `composee`
309 self.composee = Some(ch);
310 self.last_ccc = None;
311 self.state = Purging;
314 self.buffer.push_back(ch);
315 self.last_ccc = Some(ch_class);
318 match unicode::char::compose(k, ch) {
320 self.composee = Some(r);
324 self.buffer.push_back(ch);
325 self.last_ccc = Some(ch_class);
331 self.state = Finished;
332 if self.composee.is_some() {
333 return self.composee.take();
337 match self.buffer.pop_front() {
338 None => self.state = Composing,
343 match self.buffer.pop_front() {
344 None => return self.composee.take(),
353 /// External iterator for a string's UTF16 codeunits.
354 /// Use with the `std::iter` module.
356 pub struct Utf16Units<'a> {
357 encoder: Utf16Encoder<Chars<'a>>
360 impl<'a> Iterator<u16> for Utf16Units<'a> {
362 fn next(&mut self) -> Option<u16> { self.encoder.next() }
365 fn size_hint(&self) -> (uint, Option<uint>) { self.encoder.size_hint() }
368 /// Replaces all occurrences of one string with another.
372 /// * s - The string containing substrings to replace
373 /// * from - The string to replace
374 /// * to - The replacement string
378 /// The original string with all occurrences of `from` replaced with `to`.
383 /// # #![allow(deprecated)]
385 /// let string = "orange";
386 /// let new_string = str::replace(string, "or", "str");
387 /// assert_eq!(new_string.as_slice(), "strange");
389 #[deprecated = "call the inherent method instead"]
390 pub fn replace(s: &str, from: &str, to: &str) -> String {
398 // Return the initial codepoint accumulator for the first byte.
399 // The first byte is special, only want bottom 5 bits for width 2, 4 bits
400 // for width 3, and 3 bits for width 4
401 macro_rules! utf8_first_byte {
402 ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as u32)
405 // return the value of $ch updated with continuation byte $byte
406 macro_rules! utf8_acc_cont_byte {
407 ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as u32)
414 /// A string type that can hold either a `String` or a `&str`.
415 /// This can be useful as an optimization when an allocation is sometimes
416 /// needed but not always.
417 #[deprecated = "use std::string::CowString"]
418 pub enum MaybeOwned<'a> {
419 /// A borrowed string.
425 /// A specialization of `CowString` to be sendable.
426 #[deprecated = "use std::string::CowString<'static>"]
427 pub type SendStr = CowString<'static>;
429 #[deprecated = "use std::string::CowString"]
430 impl<'a> MaybeOwned<'a> {
431 /// Returns `true` if this `MaybeOwned` wraps an owned string.
436 /// let string = String::from_str("orange");
437 /// let maybe_owned_string = string.into_maybe_owned();
438 /// assert_eq!(true, maybe_owned_string.is_owned());
441 pub fn is_owned(&self) -> bool {
448 /// Returns `true` if this `MaybeOwned` wraps a borrowed string.
453 /// let string = "orange";
454 /// let maybe_owned_string = string.as_slice().into_maybe_owned();
455 /// assert_eq!(true, maybe_owned_string.is_slice());
458 pub fn is_slice(&self) -> bool {
465 /// Return the number of bytes in this string.
468 pub fn len(&self) -> uint { self.as_slice().len() }
470 /// Returns true if the string contains no bytes
473 pub fn is_empty(&self) -> bool { self.len() == 0 }
476 #[deprecated = "use std::borrow::IntoCow"]
477 /// Trait for moving into a `MaybeOwned`.
478 pub trait IntoMaybeOwned<'a> {
479 /// Moves `self` into a `MaybeOwned`.
480 fn into_maybe_owned(self) -> MaybeOwned<'a>;
483 #[deprecated = "use std::borrow::IntoCow"]
485 impl<'a> IntoMaybeOwned<'a> for String {
489 /// let owned_string = String::from_str("orange");
490 /// let maybe_owned_string = owned_string.into_maybe_owned();
491 /// assert_eq!(true, maybe_owned_string.is_owned());
495 fn into_maybe_owned(self) -> MaybeOwned<'a> {
500 #[deprecated = "use std::borrow::IntoCow"]
502 impl<'a> IntoMaybeOwned<'a> for &'a str {
506 /// let string = "orange";
507 /// let maybe_owned_str = string.as_slice().into_maybe_owned();
508 /// assert_eq!(false, maybe_owned_str.is_owned());
512 fn into_maybe_owned(self) -> MaybeOwned<'a> { Slice(self) }
516 #[deprecated = "use std::borrow::IntoCow"]
517 impl<'a> IntoMaybeOwned<'a> for MaybeOwned<'a> {
521 /// let str = "orange";
522 /// let maybe_owned_str = str.as_slice().into_maybe_owned();
523 /// let maybe_maybe_owned_str = maybe_owned_str.into_maybe_owned();
524 /// assert_eq!(false, maybe_maybe_owned_str.is_owned());
527 fn into_maybe_owned(self) -> MaybeOwned<'a> { self }
530 #[deprecated = "use std::string::CowString"]
532 impl<'a> PartialEq for MaybeOwned<'a> {
534 fn eq(&self, other: &MaybeOwned) -> bool {
535 self.as_slice() == other.as_slice()
539 #[deprecated = "use std::string::CowString"]
540 impl<'a> Eq for MaybeOwned<'a> {}
542 #[deprecated = "use std::string::CowString"]
543 impl<'a> PartialOrd for MaybeOwned<'a> {
545 fn partial_cmp(&self, other: &MaybeOwned) -> Option<Ordering> {
546 Some(self.cmp(other))
550 #[deprecated = "use std::string::CowString"]
551 impl<'a> Ord for MaybeOwned<'a> {
554 fn cmp(&self, other: &MaybeOwned) -> Ordering {
555 self.as_slice().cmp(other.as_slice())
560 #[deprecated = "use std::string::CowString"]
561 impl<'a, S: Str> Equiv<S> for MaybeOwned<'a> {
563 fn equiv(&self, other: &S) -> bool {
564 self.as_slice() == other.as_slice()
568 #[deprecated = "use std::string::CowString"]
570 impl<'a> Str for MaybeOwned<'a> {
572 fn as_slice<'b>(&'b self) -> &'b str {
575 Owned(ref s) => s.as_slice()
580 #[deprecated = "use std::string::CowString"]
581 impl<'a> Clone for MaybeOwned<'a> {
584 fn clone(&self) -> MaybeOwned<'a> {
586 Slice(s) => Slice(s),
587 Owned(ref s) => Owned(String::from_str(s.as_slice()))
592 #[deprecated = "use std::string::CowString"]
593 impl<'a> Default for MaybeOwned<'a> {
596 fn default() -> MaybeOwned<'a> { Slice("") }
599 #[deprecated = "use std::string::CowString"]
601 impl<'a, H: hash::Writer> hash::Hash<H> for MaybeOwned<'a> {
603 fn hash(&self, hasher: &mut H) {
604 self.as_slice().hash(hasher)
608 #[deprecated = "use std::string::CowString"]
609 impl<'a> fmt::Show for MaybeOwned<'a> {
611 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
613 Slice(ref s) => s.fmt(f),
614 Owned(ref s) => s.fmt(f)
619 #[unstable = "trait is unstable"]
620 impl BorrowFrom<String> for str {
621 fn borrow_from(owned: &String) -> &str { owned[] }
624 #[unstable = "trait is unstable"]
625 impl ToOwned<String> for str {
626 fn to_owned(&self) -> String {
628 String::from_utf8_unchecked(self.as_bytes().to_owned())
633 /// Unsafe string operations.
636 pub use core::str::raw::{from_utf8, c_str_to_static_slice, slice_bytes};
637 pub use core::str::raw::{slice_unchecked};
644 /// A clone-on-write string
645 #[deprecated = "use std::string::CowString instead"]
646 pub type CowString<'a> = Cow<'a, String, str>;
649 Section: Trait implementations
652 /// Any string that can be represented as a slice.
653 pub trait StrExt for Sized?: ops::Slice<uint, str> {
654 /// Escapes each char in `s` with `char::escape_default`.
655 #[unstable = "return type may change to be an iterator"]
656 fn escape_default(&self) -> String {
657 self.chars().flat_map(|c| c.escape_default()).collect()
660 /// Escapes each char in `s` with `char::escape_unicode`.
661 #[unstable = "return type may change to be an iterator"]
662 fn escape_unicode(&self) -> String {
663 self.chars().flat_map(|c| c.escape_unicode()).collect()
666 /// Replaces all occurrences of one string with another.
670 /// * `from` - The string to replace
671 /// * `to` - The replacement string
675 /// The original string with all occurrences of `from` replaced with `to`.
680 /// let s = "Do you know the muffin man,
681 /// The muffin man, the muffin man, ...".to_string();
683 /// assert_eq!(s.replace("muffin man", "little lamb"),
684 /// "Do you know the little lamb,
685 /// The little lamb, the little lamb, ...".to_string());
687 /// // not found, so no change.
688 /// assert_eq!(s.replace("cookie monster", "little lamb"), s);
691 fn replace(&self, from: &str, to: &str) -> String {
692 let mut result = String::new();
693 let mut last_end = 0;
694 for (start, end) in self.match_indices(from) {
695 result.push_str(unsafe { self.slice_unchecked(last_end, start) });
699 result.push_str(unsafe { self.slice_unchecked(last_end, self.len()) });
703 /// Given a string, makes a new string with repeated copies of it.
704 #[deprecated = "use repeat(self).take(n).collect() instead"]
705 fn repeat(&self, nn: uint) -> String {
706 iter::repeat(self[]).take(nn).collect()
709 /// Returns the Levenshtein Distance between two strings.
710 #[deprecated = "this function will be removed"]
711 fn lev_distance(&self, t: &str) -> uint {
713 if me.is_empty() { return t.chars().count(); }
714 if t.is_empty() { return me.chars().count(); }
716 let mut dcol: Vec<_> = range(0, t.len() + 1).collect();
719 for (i, sc) in me.chars().enumerate() {
722 dcol[0] = current + 1;
724 for (j, tc) in t.chars().enumerate() {
726 let next = dcol[j + 1];
729 dcol[j + 1] = current;
731 dcol[j + 1] = cmp::min(current, next);
732 dcol[j + 1] = cmp::min(dcol[j + 1], dcol[j]) + 1;
743 /// Returns an iterator over the string in Unicode Normalization Form D
744 /// (canonical decomposition).
746 #[unstable = "this functionality may be moved to libunicode"]
747 fn nfd_chars<'a>(&'a self) -> Decompositions<'a> {
749 iter: self[].chars(),
756 /// Returns an iterator over the string in Unicode Normalization Form KD
757 /// (compatibility decomposition).
759 #[unstable = "this functionality may be moved to libunicode"]
760 fn nfkd_chars<'a>(&'a self) -> Decompositions<'a> {
762 iter: self[].chars(),
769 /// An Iterator over the string in Unicode Normalization Form C
770 /// (canonical decomposition followed by canonical composition).
772 #[unstable = "this functionality may be moved to libunicode"]
773 fn nfc_chars<'a>(&'a self) -> Recompositions<'a> {
775 iter: self.nfd_chars(),
777 buffer: RingBuf::new(),
783 /// An Iterator over the string in Unicode Normalization Form KC
784 /// (compatibility decomposition followed by canonical composition).
786 #[unstable = "this functionality may be moved to libunicode"]
787 fn nfkc_chars<'a>(&'a self) -> Recompositions<'a> {
789 iter: self.nfkd_chars(),
791 buffer: RingBuf::new(),
797 /// Returns true if a string contains a string pattern.
801 /// - pat - The string pattern to look for
806 /// assert!("bananas".contains("nana"));
809 fn contains(&self, pat: &str) -> bool {
810 core_str::StrExt::contains(self[], pat)
813 /// Returns true if a string contains a char pattern.
817 /// - pat - The char pattern to look for
822 /// assert!("hello".contains_char('e'));
824 #[unstable = "might get removed in favour of a more generic contains()"]
825 fn contains_char<P: CharEq>(&self, pat: P) -> bool {
826 core_str::StrExt::contains_char(self[], pat)
829 /// An iterator over the characters of `self`. Note, this iterates
830 /// over Unicode code-points, not Unicode graphemes.
835 /// let v: Vec<char> = "abc åäö".chars().collect();
836 /// assert_eq!(v, vec!['a', 'b', 'c', ' ', 'å', 'ä', 'ö']);
839 fn chars(&self) -> Chars {
840 core_str::StrExt::chars(self[])
843 /// An iterator over the bytes of `self`
848 /// let v: Vec<u8> = "bors".bytes().collect();
849 /// assert_eq!(v, b"bors".to_vec());
852 fn bytes(&self) -> Bytes {
853 core_str::StrExt::bytes(self[])
856 /// An iterator over the characters of `self` and their byte offsets.
858 fn char_indices(&self) -> CharIndices {
859 core_str::StrExt::char_indices(self[])
862 /// An iterator over substrings of `self`, separated by characters
863 /// matched by the pattern `pat`.
868 /// let v: Vec<&str> = "Mary had a little lamb".split(' ').collect();
869 /// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]);
871 /// let v: Vec<&str> = "abc1def2ghi".split(|&: c: char| c.is_numeric()).collect();
872 /// assert_eq!(v, vec!["abc", "def", "ghi"]);
874 /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').collect();
875 /// assert_eq!(v, vec!["lion", "", "tiger", "leopard"]);
877 /// let v: Vec<&str> = "".split('X').collect();
878 /// assert_eq!(v, vec![""]);
881 fn split<P: CharEq>(&self, pat: P) -> Split<P> {
882 core_str::StrExt::split(self[], pat)
885 /// An iterator over substrings of `self`, separated by characters
886 /// matched by the pattern `pat`, restricted to splitting at most `count`
892 /// let v: Vec<&str> = "Mary had a little lambda".splitn(2, ' ').collect();
893 /// assert_eq!(v, vec!["Mary", "had", "a little lambda"]);
895 /// let v: Vec<&str> = "abc1def2ghi".splitn(1, |&: c: char| c.is_numeric()).collect();
896 /// assert_eq!(v, vec!["abc", "def2ghi"]);
898 /// let v: Vec<&str> = "lionXXtigerXleopard".splitn(2, 'X').collect();
899 /// assert_eq!(v, vec!["lion", "", "tigerXleopard"]);
901 /// let v: Vec<&str> = "abcXdef".splitn(0, 'X').collect();
902 /// assert_eq!(v, vec!["abcXdef"]);
904 /// let v: Vec<&str> = "".splitn(1, 'X').collect();
905 /// assert_eq!(v, vec![""]);
908 fn splitn<P: CharEq>(&self, count: uint, pat: P) -> SplitN<P> {
909 core_str::StrExt::splitn(self[], count, pat)
912 /// An iterator over substrings of `self`, separated by characters
913 /// matched by the pattern `pat`.
915 /// Equivalent to `split`, except that the trailing substring
916 /// is skipped if empty (terminator semantics).
921 /// let v: Vec<&str> = "A.B.".split_terminator('.').collect();
922 /// assert_eq!(v, vec!["A", "B"]);
924 /// let v: Vec<&str> = "A..B..".split_terminator('.').collect();
925 /// assert_eq!(v, vec!["A", "", "B", ""]);
927 /// let v: Vec<&str> = "Mary had a little lamb".split(' ').rev().collect();
928 /// assert_eq!(v, vec!["lamb", "little", "a", "had", "Mary"]);
930 /// let v: Vec<&str> = "abc1def2ghi".split(|&: c: char| c.is_numeric()).rev().collect();
931 /// assert_eq!(v, vec!["ghi", "def", "abc"]);
933 /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').rev().collect();
934 /// assert_eq!(v, vec!["leopard", "tiger", "", "lion"]);
936 #[unstable = "might get removed"]
937 fn split_terminator<P: CharEq>(&self, pat: P) -> SplitTerminator<P> {
938 core_str::StrExt::split_terminator(self[], pat)
941 /// An iterator over substrings of `self`, separated by characters
942 /// matched by the pattern `pat`, starting from the end of the string.
943 /// Restricted to splitting at most `count` times.
948 /// let v: Vec<&str> = "Mary had a little lamb".rsplitn(2, ' ').collect();
949 /// assert_eq!(v, vec!["lamb", "little", "Mary had a"]);
951 /// let v: Vec<&str> = "abc1def2ghi".rsplitn(1, |&: c: char| c.is_numeric()).collect();
952 /// assert_eq!(v, vec!["ghi", "abc1def"]);
954 /// let v: Vec<&str> = "lionXXtigerXleopard".rsplitn(2, 'X').collect();
955 /// assert_eq!(v, vec!["leopard", "tiger", "lionX"]);
958 fn rsplitn<P: CharEq>(&self, count: uint, pat: P) -> RSplitN<P> {
959 core_str::StrExt::rsplitn(self[], count, pat)
962 /// An iterator over the start and end indices of the disjoint
963 /// matches of the pattern `pat` within `self`.
965 /// That is, each returned value `(start, end)` satisfies
966 /// `self.slice(start, end) == sep`. For matches of `sep` within
967 /// `self` that overlap, only the indices corresponding to the
968 /// first match are returned.
973 /// let v: Vec<(uint, uint)> = "abcXXXabcYYYabc".match_indices("abc").collect();
974 /// assert_eq!(v, vec![(0,3), (6,9), (12,15)]);
976 /// let v: Vec<(uint, uint)> = "1abcabc2".match_indices("abc").collect();
977 /// assert_eq!(v, vec![(1,4), (4,7)]);
979 /// let v: Vec<(uint, uint)> = "ababa".match_indices("aba").collect();
980 /// assert_eq!(v, vec![(0, 3)]); // only the first `aba`
982 #[unstable = "might have its iterator type changed"]
983 fn match_indices<'a>(&'a self, pat: &'a str) -> MatchIndices<'a> {
984 core_str::StrExt::match_indices(self[], pat)
987 /// An iterator over the substrings of `self` separated by the pattern `sep`.
992 /// let v: Vec<&str> = "abcXXXabcYYYabc".split_str("abc").collect();
993 /// assert_eq!(v, vec!["", "XXX", "YYY", ""]);
995 /// let v: Vec<&str> = "1abcabc2".split_str("abc").collect();
996 /// assert_eq!(v, vec!["1", "", "2"]);
998 #[unstable = "might get removed in the future in favor of a more generic split()"]
999 fn split_str<'a>(&'a self, pat: &'a str) -> StrSplits<'a> {
1000 core_str::StrExt::split_str(self[], pat)
1003 /// An iterator over the lines of a string (subsequences separated
1004 /// by `\n`). This does not include the empty string after a
1010 /// let four_lines = "foo\nbar\n\nbaz\n";
1011 /// let v: Vec<&str> = four_lines.lines().collect();
1012 /// assert_eq!(v, vec!["foo", "bar", "", "baz"]);
1015 fn lines(&self) -> Lines {
1016 core_str::StrExt::lines(self[])
1019 /// An iterator over the lines of a string, separated by either
1020 /// `\n` or `\r\n`. As with `.lines()`, this does not include an
1021 /// empty trailing line.
1026 /// let four_lines = "foo\r\nbar\n\r\nbaz\n";
1027 /// let v: Vec<&str> = four_lines.lines_any().collect();
1028 /// assert_eq!(v, vec!["foo", "bar", "", "baz"]);
1031 fn lines_any(&self) -> LinesAny {
1032 core_str::StrExt::lines_any(self[])
1035 /// Returns the number of Unicode code points (`char`) that a
1038 /// This does not perform any normalization, and is `O(n)`, since
1039 /// UTF-8 is a variable width encoding of code points.
1041 /// *Warning*: The number of code points in a string does not directly
1042 /// correspond to the number of visible characters or width of the
1043 /// visible text due to composing characters, and double- and
1044 /// zero-width ones.
1046 /// See also `.len()` for the byte length.
1051 /// # #![allow(deprecated)]
1052 /// // composed forms of `ö` and `é`
1053 /// let c = "Löwe 老虎 Léopard"; // German, Simplified Chinese, French
1054 /// // decomposed forms of `ö` and `é`
1055 /// let d = "Lo\u{0308}we 老虎 Le\u{0301}opard";
1057 /// assert_eq!(c.char_len(), 15);
1058 /// assert_eq!(d.char_len(), 17);
1060 /// assert_eq!(c.len(), 21);
1061 /// assert_eq!(d.len(), 23);
1063 /// // the two strings *look* the same
1064 /// println!("{}", c);
1065 /// println!("{}", d);
1067 #[deprecated = "call .chars().count() instead"]
1068 fn char_len(&self) -> uint {
1069 core_str::StrExt::char_len(self[])
1072 /// Returns a slice of the given string from the byte range
1073 /// [`begin`..`end`).
1075 /// This operation is `O(1)`.
1077 /// Panics when `begin` and `end` do not point to valid characters
1078 /// or point beyond the last character of the string.
1080 /// See also `slice_to` and `slice_from` for slicing prefixes and
1081 /// suffixes of strings, and `slice_chars` for slicing based on
1082 /// code point counts.
1087 /// let s = "Löwe 老虎 Léopard";
1088 /// assert_eq!(s.slice(0, 1), "L");
1090 /// assert_eq!(s.slice(1, 9), "öwe 老");
1092 /// // these will panic:
1093 /// // byte 2 lies within `ö`:
1094 /// // s.slice(2, 3);
1096 /// // byte 8 lies within `老`
1097 /// // s.slice(1, 8);
1099 /// // byte 100 is outside the string
1100 /// // s.slice(3, 100);
1102 #[unstable = "use slice notation [a..b] instead"]
1103 fn slice(&self, begin: uint, end: uint) -> &str {
1104 core_str::StrExt::slice(self[], begin, end)
1107 /// Returns a slice of the string from `begin` to its end.
1109 /// Equivalent to `self.slice(begin, self.len())`.
1111 /// Panics when `begin` does not point to a valid character, or is
1114 /// See also `slice`, `slice_to` and `slice_chars`.
1115 #[unstable = "use slice notation [a..] instead"]
1116 fn slice_from(&self, begin: uint) -> &str {
1117 core_str::StrExt::slice_from(self[], begin)
1120 /// Returns a slice of the string from the beginning to byte
1123 /// Equivalent to `self.slice(0, end)`.
1125 /// Panics when `end` does not point to a valid character, or is
1128 /// See also `slice`, `slice_from` and `slice_chars`.
1129 #[unstable = "use slice notation [0..a] instead"]
1130 fn slice_to(&self, end: uint) -> &str {
1131 core_str::StrExt::slice_to(self[], end)
1134 /// Returns a slice of the string from the character range
1135 /// [`begin`..`end`).
1137 /// That is, start at the `begin`-th code point of the string and
1138 /// continue to the `end`-th code point. This does not detect or
1139 /// handle edge cases such as leaving a combining character as the
1140 /// first code point of the string.
1142 /// Due to the design of UTF-8, this operation is `O(end)`.
1143 /// See `slice`, `slice_to` and `slice_from` for `O(1)`
1144 /// variants that use byte indices rather than code point
1147 /// Panics if `begin` > `end` or the either `begin` or `end` are
1148 /// beyond the last character of the string.
1153 /// let s = "Löwe 老虎 Léopard";
1154 /// assert_eq!(s.slice_chars(0, 4), "Löwe");
1155 /// assert_eq!(s.slice_chars(5, 7), "老虎");
1157 #[unstable = "may have yet to prove its worth"]
1158 fn slice_chars(&self, begin: uint, end: uint) -> &str {
1159 core_str::StrExt::slice_chars(self[], begin, end)
1162 /// Takes a bytewise (not UTF-8) slice from a string.
1164 /// Returns the substring from [`begin`..`end`).
1166 /// Caller must check both UTF-8 character boundaries and the boundaries of
1167 /// the entire slice as well.
1169 unsafe fn slice_unchecked(&self, begin: uint, end: uint) -> &str {
1170 core_str::StrExt::slice_unchecked(self[], begin, end)
1173 /// Returns true if the pattern `pat` is a prefix of the string.
1178 /// assert!("banana".starts_with("ba"));
1181 fn starts_with(&self, pat: &str) -> bool {
1182 core_str::StrExt::starts_with(self[], pat)
1185 /// Returns true if the pattern `pat` is a suffix of the string.
1190 /// assert!("banana".ends_with("nana"));
1193 fn ends_with(&self, pat: &str) -> bool {
1194 core_str::StrExt::ends_with(self[], pat)
1197 /// Returns a string with all pre- and suffixes that match
1198 /// the pattern `pat` repeatedly removed.
1202 /// * pat - a string pattern
1207 /// assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar");
1208 /// let x: &[_] = &['1', '2'];
1209 /// assert_eq!("12foo1bar12".trim_matches(x), "foo1bar");
1210 /// assert_eq!("123foo1bar123".trim_matches(|&: c: char| c.is_numeric()), "foo1bar");
1213 fn trim_matches<P: CharEq>(&self, pat: P) -> &str {
1214 core_str::StrExt::trim_matches(self[], pat)
1218 #[deprecated = "Replaced by `trim_matches`"]
1219 fn trim_chars<'a, C: CharEq>(&'a self, to_trim: C) -> &'a str {
1220 self.trim_matches(to_trim)
1223 /// Returns a string with all prefixes that match
1224 /// the pattern `pat` repeatedly removed.
1228 /// * pat - a string pattern
1233 /// assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11");
1234 /// let x: &[_] = &['1', '2'];
1235 /// assert_eq!("12foo1bar12".trim_left_matches(x), "foo1bar12");
1236 /// assert_eq!("123foo1bar123".trim_left_matches(|&: c: char| c.is_numeric()), "foo1bar123");
1239 fn trim_left_matches<P: CharEq>(&self, pat: P) -> &str {
1240 core_str::StrExt::trim_left_matches(self[], pat)
1244 #[deprecated = "Replaced by `trim_left_matches`"]
1245 fn trim_left_chars<'a, C: CharEq>(&'a self, to_trim: C) -> &'a str {
1246 self.trim_left_matches(to_trim)
1249 /// Returns a string with all suffixes that match
1250 /// the pattern `pat` repeatedly removed.
1254 /// * pat - a string pattern
1259 /// assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar");
1260 /// let x: &[_] = &['1', '2'];
1261 /// assert_eq!("12foo1bar12".trim_right_matches(x), "12foo1bar");
1262 /// assert_eq!("123foo1bar123".trim_right_matches(|&: c: char| c.is_numeric()), "123foo1bar");
1265 fn trim_right_matches<P: CharEq>(&self, pat: P) -> &str {
1266 core_str::StrExt::trim_right_matches(self[], pat)
1270 #[deprecated = "Replaced by `trim_right_matches`"]
1271 fn trim_right_chars<'a, C: CharEq>(&'a self, to_trim: C) -> &'a str {
1272 self.trim_right_matches(to_trim)
1275 /// Check that `index`-th byte lies at the start and/or end of a
1276 /// UTF-8 code point sequence.
1278 /// The start and end of the string (when `index == self.len()`)
1279 /// are considered to be boundaries.
1281 /// Panics if `index` is greater than `self.len()`.
1286 /// let s = "Löwe 老虎 Léopard";
1287 /// assert!(s.is_char_boundary(0));
1289 /// assert!(s.is_char_boundary(6));
1290 /// assert!(s.is_char_boundary(s.len()));
1292 /// // second byte of `ö`
1293 /// assert!(!s.is_char_boundary(2));
1295 /// // third byte of `老`
1296 /// assert!(!s.is_char_boundary(8));
1298 #[unstable = "naming is uncertain with container conventions"]
1299 fn is_char_boundary(&self, index: uint) -> bool {
1300 core_str::StrExt::is_char_boundary(self[], index)
1303 /// Pluck a character out of a string and return the index of the next
1306 /// This function can be used to iterate over the Unicode characters of a
1311 /// This example manually iterates through the characters of a
1312 /// string; this should normally be done by `.chars()` or
1313 /// `.char_indices`.
1316 /// use std::str::CharRange;
1318 /// let s = "中华Việt Nam";
1320 /// while i < s.len() {
1321 /// let CharRange {ch, next} = s.char_range_at(i);
1322 /// println!("{}: {}", i, ch);
1344 /// * s - The string
1345 /// * i - The byte offset of the char to extract
1349 /// A record {ch: char, next: uint} containing the char value and the byte
1350 /// index of the next Unicode character.
1354 /// If `i` is greater than or equal to the length of the string.
1355 /// If `i` is not the index of the beginning of a valid UTF-8 character.
1356 #[unstable = "naming is uncertain with container conventions"]
1357 fn char_range_at(&self, start: uint) -> CharRange {
1358 core_str::StrExt::char_range_at(self[], start)
1361 /// Given a byte position and a str, return the previous char and its position.
1363 /// This function can be used to iterate over a Unicode string in reverse.
1365 /// Returns 0 for next index if called on start index 0.
1369 /// If `i` is greater than the length of the string.
1370 /// If `i` is not an index following a valid UTF-8 character.
1371 #[unstable = "naming is uncertain with container conventions"]
1372 fn char_range_at_reverse(&self, start: uint) -> CharRange {
1373 core_str::StrExt::char_range_at_reverse(self[], start)
1376 /// Plucks the character starting at the `i`th byte of a string.
1382 /// assert_eq!(s.char_at(1), 'b');
1383 /// assert_eq!(s.char_at(2), 'π');
1384 /// assert_eq!(s.char_at(4), 'c');
1389 /// If `i` is greater than or equal to the length of the string.
1390 /// If `i` is not the index of the beginning of a valid UTF-8 character.
1391 #[unstable = "naming is uncertain with container conventions"]
1392 fn char_at(&self, i: uint) -> char {
1393 core_str::StrExt::char_at(self[], i)
1396 /// Plucks the character ending at the `i`th byte of a string.
1400 /// If `i` is greater than the length of the string.
1401 /// If `i` is not an index following a valid UTF-8 character.
1402 #[unstable = "naming is uncertain with container conventions"]
1403 fn char_at_reverse(&self, i: uint) -> char {
1404 core_str::StrExt::char_at_reverse(self[], i)
1407 /// Work with the byte buffer of a string as a byte slice.
1412 /// assert_eq!("bors".as_bytes(), b"bors");
1415 fn as_bytes(&self) -> &[u8] {
1416 core_str::StrExt::as_bytes(self[])
1419 /// Returns the byte index of the first character of `self` that
1420 /// matches the pattern `pat`.
1424 /// `Some` containing the byte index of the last matching character
1425 /// or `None` if there is no match
1430 /// let s = "Löwe 老虎 Léopard";
1432 /// assert_eq!(s.find('L'), Some(0));
1433 /// assert_eq!(s.find('é'), Some(14));
1435 /// // the first space
1436 /// assert_eq!(s.find(|&: c: char| c.is_whitespace()), Some(5));
1438 /// // neither are found
1439 /// let x: &[_] = &['1', '2'];
1440 /// assert_eq!(s.find(x), None);
1443 fn find<P: CharEq>(&self, pat: P) -> Option<uint> {
1444 core_str::StrExt::find(self[], pat)
1447 /// Returns the byte index of the last character of `self` that
1448 /// matches the pattern `pat`.
1452 /// `Some` containing the byte index of the last matching character
1453 /// or `None` if there is no match.
1458 /// let s = "Löwe 老虎 Léopard";
1460 /// assert_eq!(s.rfind('L'), Some(13));
1461 /// assert_eq!(s.rfind('é'), Some(14));
1463 /// // the second space
1464 /// assert_eq!(s.rfind(|&: c: char| c.is_whitespace()), Some(12));
1466 /// // searches for an occurrence of either `1` or `2`, but neither are found
1467 /// let x: &[_] = &['1', '2'];
1468 /// assert_eq!(s.rfind(x), None);
1471 fn rfind<P: CharEq>(&self, pat: P) -> Option<uint> {
1472 core_str::StrExt::rfind(self[], pat)
1475 /// Returns the byte index of the first matching substring
1479 /// * `needle` - The string to search for
1483 /// `Some` containing the byte index of the first matching substring
1484 /// or `None` if there is no match.
1489 /// let s = "Löwe 老虎 Léopard";
1491 /// assert_eq!(s.find_str("老虎 L"), Some(6));
1492 /// assert_eq!(s.find_str("muffin man"), None);
1494 #[unstable = "might get removed in favor of a more generic find in the future"]
1495 fn find_str(&self, needle: &str) -> Option<uint> {
1496 core_str::StrExt::find_str(self[], needle)
1499 /// Retrieves the first character from a string slice and returns
1500 /// it. This does not allocate a new string; instead, it returns a
1501 /// slice that point one character beyond the character that was
1502 /// shifted. If the string does not contain any characters,
1503 /// None is returned instead.
1508 /// let s = "Löwe 老虎 Léopard";
1509 /// let (c, s1) = s.slice_shift_char().unwrap();
1510 /// assert_eq!(c, 'L');
1511 /// assert_eq!(s1, "öwe 老虎 Léopard");
1513 /// let (c, s2) = s1.slice_shift_char().unwrap();
1514 /// assert_eq!(c, 'ö');
1515 /// assert_eq!(s2, "we 老虎 Léopard");
1517 #[unstable = "awaiting conventions about shifting and slices"]
1518 fn slice_shift_char(&self) -> Option<(char, &str)> {
1519 core_str::StrExt::slice_shift_char(self[])
1522 /// Returns the byte offset of an inner slice relative to an enclosing outer slice.
1524 /// Panics if `inner` is not a direct slice contained within self.
1529 /// let string = "a\nb\nc";
1530 /// let lines: Vec<&str> = string.lines().collect();
1532 /// assert!(string.subslice_offset(lines[0]) == 0); // &"a"
1533 /// assert!(string.subslice_offset(lines[1]) == 2); // &"b"
1534 /// assert!(string.subslice_offset(lines[2]) == 4); // &"c"
1536 #[unstable = "awaiting convention about comparability of arbitrary slices"]
1537 fn subslice_offset(&self, inner: &str) -> uint {
1538 core_str::StrExt::subslice_offset(self[], inner)
1541 /// Return an unsafe pointer to the strings buffer.
1543 /// The caller must ensure that the string outlives this pointer,
1544 /// and that it is not reallocated (e.g. by pushing to the
1548 fn as_ptr(&self) -> *const u8 {
1549 core_str::StrExt::as_ptr(self[])
1552 /// Return an iterator of `u16` over the string encoded as UTF-16.
1553 #[unstable = "this functionality may only be provided by libunicode"]
1554 fn utf16_units(&self) -> Utf16Units {
1555 Utf16Units { encoder: Utf16Encoder::new(self[].chars()) }
1558 /// Return the number of bytes in this string
1563 /// assert_eq!("foo".len(), 3);
1564 /// assert_eq!("ƒoo".len(), 4);
1568 fn len(&self) -> uint {
1569 core_str::StrExt::len(self[])
1572 /// Returns true if this slice contains no bytes
1577 /// assert!("".is_empty());
1581 fn is_empty(&self) -> bool {
1582 core_str::StrExt::is_empty(self[])
1585 /// Parse this string into the specified type.
1590 /// assert_eq!("4".parse::<u32>(), Some(4));
1591 /// assert_eq!("j".parse::<u32>(), None);
1594 #[unstable = "this method was just created"]
1595 fn parse<F: FromStr>(&self) -> Option<F> {
1596 FromStr::from_str(self[])
1599 /// Returns an iterator over the
1600 /// [grapheme clusters](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
1603 /// If `is_extended` is true, the iterator is over the *extended grapheme clusters*;
1604 /// otherwise, the iterator is over the *legacy grapheme clusters*.
1605 /// [UAX#29](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
1606 /// recommends extended grapheme cluster boundaries for general processing.
1611 /// let gr1 = "a\u{310}e\u{301}o\u{308}\u{332}".graphemes(true).collect::<Vec<&str>>();
1612 /// let b: &[_] = &["a\u{310}", "e\u{301}", "o\u{308}\u{332}"];
1613 /// assert_eq!(gr1.as_slice(), b);
1614 /// let gr2 = "a\r\nb🇷🇺🇸🇹".graphemes(true).collect::<Vec<&str>>();
1615 /// let b: &[_] = &["a", "\r\n", "b", "🇷🇺🇸🇹"];
1616 /// assert_eq!(gr2.as_slice(), b);
1618 #[unstable = "this functionality may only be provided by libunicode"]
1619 fn graphemes(&self, is_extended: bool) -> Graphemes {
1620 UnicodeStr::graphemes(self[], is_extended)
1623 /// Returns an iterator over the grapheme clusters of self and their byte offsets.
1624 /// See `graphemes()` method for more information.
1629 /// let gr_inds = "a̐éö̲\r\n".grapheme_indices(true).collect::<Vec<(uint, &str)>>();
1630 /// let b: &[_] = &[(0u, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
1631 /// assert_eq!(gr_inds.as_slice(), b);
1633 #[unstable = "this functionality may only be provided by libunicode"]
1634 fn grapheme_indices(&self, is_extended: bool) -> GraphemeIndices {
1635 UnicodeStr::grapheme_indices(self[], is_extended)
1638 /// An iterator over the words of a string (subsequences separated
1639 /// by any sequence of whitespace). Sequences of whitespace are
1640 /// collapsed, so empty "words" are not included.
1645 /// let some_words = " Mary had\ta little \n\t lamb";
1646 /// let v: Vec<&str> = some_words.words().collect();
1647 /// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]);
1650 fn words(&self) -> Words {
1651 UnicodeStr::words(self[])
1654 /// Returns true if the string contains only whitespace.
1656 /// Whitespace characters are determined by `char::is_whitespace`.
1661 /// # #![allow(deprecated)]
1662 /// assert!(" \t\n".is_whitespace());
1663 /// assert!("".is_whitespace());
1665 /// assert!( !"abc".is_whitespace());
1667 #[deprecated = "use .chars().all(|c| c.is_whitespace())"]
1668 fn is_whitespace(&self) -> bool {
1669 UnicodeStr::is_whitespace(self[])
1672 /// Returns true if the string contains only alphanumeric code
1675 /// Alphanumeric characters are determined by `char::is_alphanumeric`.
1680 /// # #![allow(deprecated)]
1681 /// assert!("Löwe老虎Léopard123".is_alphanumeric());
1682 /// assert!("".is_alphanumeric());
1684 /// assert!( !" &*~".is_alphanumeric());
1686 #[deprecated = "use .chars().all(|c| c.is_alphanumeric())"]
1687 fn is_alphanumeric(&self) -> bool {
1688 UnicodeStr::is_alphanumeric(self[])
1691 /// Returns a string's displayed width in columns, treating control
1692 /// characters as zero-width.
1694 /// `is_cjk` determines behavior for characters in the Ambiguous category:
1695 /// if `is_cjk` is `true`, these are 2 columns wide; otherwise, they are 1.
1696 /// In CJK locales, `is_cjk` should be `true`, else it should be `false`.
1697 /// [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
1698 /// recommends that these characters be treated as 1 column (i.e.,
1699 /// `is_cjk` = `false`) if the locale is unknown.
1700 #[unstable = "this functionality may only be provided by libunicode"]
1701 fn width(&self, is_cjk: bool) -> uint {
1702 UnicodeStr::width(self[], is_cjk)
1705 /// Returns a string with leading and trailing whitespace removed.
1707 fn trim(&self) -> &str {
1708 UnicodeStr::trim(self[])
1711 /// Returns a string with leading whitespace removed.
1713 fn trim_left(&self) -> &str {
1714 UnicodeStr::trim_left(self[])
1717 /// Returns a string with trailing whitespace removed.
1719 fn trim_right(&self) -> &str {
1720 UnicodeStr::trim_right(self[])
1723 /// Deprecated, call `.to_owned()` instead from the `std::borrow::ToOwned`
1725 #[deprecated = "call `.to_owned()` on `std::borrow::ToOwned` instead"]
1726 fn into_string(&self) -> String {
1731 impl StrExt for str {}
1737 use core::default::Default;
1738 use core::iter::AdditiveIterator;
1739 use super::{from_utf8, is_utf8, raw};
1740 use super::MaybeOwned::{Owned, Slice};
1741 use super::Utf8Error;
1746 assert!("" <= "foo");
1747 assert!("foo" <= "foo");
1748 assert!("foo" != "bar");
1753 assert_eq!("".len(), 0u);
1754 assert_eq!("hello world".len(), 11u);
1755 assert_eq!("\x63".len(), 1u);
1756 assert_eq!("\u{a2}".len(), 2u);
1757 assert_eq!("\u{3c0}".len(), 2u);
1758 assert_eq!("\u{2620}".len(), 3u);
1759 assert_eq!("\u{1d11e}".len(), 4u);
1761 assert_eq!("".char_len(), 0u);
1762 assert_eq!("hello world".char_len(), 11u);
1763 assert_eq!("\x63".char_len(), 1u);
1764 assert_eq!("\u{a2}".char_len(), 1u);
1765 assert_eq!("\u{3c0}".char_len(), 1u);
1766 assert_eq!("\u{2620}".char_len(), 1u);
1767 assert_eq!("\u{1d11e}".char_len(), 1u);
1768 assert_eq!("ประเทศไทย中华Việt Nam".char_len(), 19u);
1770 assert_eq!("hello".width(false), 10u);
1771 assert_eq!("hello".width(true), 10u);
1772 assert_eq!("\0\0\0\0\0".width(false), 0u);
1773 assert_eq!("\0\0\0\0\0".width(true), 0u);
1774 assert_eq!("".width(false), 0u);
1775 assert_eq!("".width(true), 0u);
1776 assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width(false), 4u);
1777 assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width(true), 8u);
1782 assert_eq!("hello".find('l'), Some(2u));
1783 assert_eq!("hello".find(|&: c:char| c == 'o'), Some(4u));
1784 assert!("hello".find('x').is_none());
1785 assert!("hello".find(|&: c:char| c == 'x').is_none());
1786 assert_eq!("ประเทศไทย中华Việt Nam".find('华'), Some(30u));
1787 assert_eq!("ประเทศไทย中华Việt Nam".find(|&: c: char| c == '华'), Some(30u));
1792 assert_eq!("hello".rfind('l'), Some(3u));
1793 assert_eq!("hello".rfind(|&: c:char| c == 'o'), Some(4u));
1794 assert!("hello".rfind('x').is_none());
1795 assert!("hello".rfind(|&: c:char| c == 'x').is_none());
1796 assert_eq!("ประเทศไทย中华Việt Nam".rfind('华'), Some(30u));
1797 assert_eq!("ประเทศไทย中华Việt Nam".rfind(|&: c: char| c == '华'), Some(30u));
1802 let empty = String::from_str("");
1803 let s: String = empty.chars().collect();
1804 assert_eq!(empty, s);
1805 let data = String::from_str("ประเทศไทย中");
1806 let s: String = data.chars().collect();
1807 assert_eq!(data, s);
1811 fn test_into_bytes() {
1812 let data = String::from_str("asdf");
1813 let buf = data.into_bytes();
1814 assert_eq!(b"asdf", buf);
1818 fn test_find_str() {
1820 assert_eq!("".find_str(""), Some(0u));
1821 assert!("banana".find_str("apple pie").is_none());
1823 let data = "abcabc";
1824 assert_eq!(data.slice(0u, 6u).find_str("ab"), Some(0u));
1825 assert_eq!(data.slice(2u, 6u).find_str("ab"), Some(3u - 2u));
1826 assert!(data.slice(2u, 4u).find_str("ab").is_none());
1828 let string = "ประเทศไทย中华Việt Nam";
1829 let mut data = String::from_str(string);
1830 data.push_str(string);
1831 assert!(data.find_str("ไท华").is_none());
1832 assert_eq!(data.slice(0u, 43u).find_str(""), Some(0u));
1833 assert_eq!(data.slice(6u, 43u).find_str(""), Some(6u - 6u));
1835 assert_eq!(data.slice(0u, 43u).find_str("ประ"), Some( 0u));
1836 assert_eq!(data.slice(0u, 43u).find_str("ทศไ"), Some(12u));
1837 assert_eq!(data.slice(0u, 43u).find_str("ย中"), Some(24u));
1838 assert_eq!(data.slice(0u, 43u).find_str("iệt"), Some(34u));
1839 assert_eq!(data.slice(0u, 43u).find_str("Nam"), Some(40u));
1841 assert_eq!(data.slice(43u, 86u).find_str("ประ"), Some(43u - 43u));
1842 assert_eq!(data.slice(43u, 86u).find_str("ทศไ"), Some(55u - 43u));
1843 assert_eq!(data.slice(43u, 86u).find_str("ย中"), Some(67u - 43u));
1844 assert_eq!(data.slice(43u, 86u).find_str("iệt"), Some(77u - 43u));
1845 assert_eq!(data.slice(43u, 86u).find_str("Nam"), Some(83u - 43u));
1849 fn test_slice_chars() {
1850 fn t(a: &str, b: &str, start: uint) {
1851 assert_eq!(a.slice_chars(start, start + b.char_len()), b);
1854 t("hello", "llo", 2);
1855 t("hello", "el", 1);
1858 assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".slice_chars(2, 8));
1861 fn s(x: &str) -> String { x.into_string() }
1863 macro_rules! test_concat {
1864 ($expected: expr, $string: expr) => {
1866 let s: String = $string.concat();
1867 assert_eq!($expected, s);
1873 fn test_concat_for_different_types() {
1874 test_concat!("ab", vec![s("a"), s("b")]);
1875 test_concat!("ab", vec!["a", "b"]);
1876 test_concat!("ab", vec!["a", "b"].as_slice());
1877 test_concat!("ab", vec![s("a"), s("b")]);
1881 fn test_concat_for_different_lengths() {
1882 let empty: &[&str] = &[];
1883 test_concat!("", empty);
1884 test_concat!("a", ["a"]);
1885 test_concat!("ab", ["a", "b"]);
1886 test_concat!("abc", ["", "a", "bc"]);
1889 macro_rules! test_connect {
1890 ($expected: expr, $string: expr, $delim: expr) => {
1892 let s = $string.connect($delim);
1893 assert_eq!($expected, s);
1899 fn test_connect_for_different_types() {
1900 test_connect!("a-b", ["a", "b"], "-");
1901 let hyphen = "-".into_string();
1902 test_connect!("a-b", [s("a"), s("b")], hyphen.as_slice());
1903 test_connect!("a-b", vec!["a", "b"], hyphen.as_slice());
1904 test_connect!("a-b", vec!["a", "b"].as_slice(), "-");
1905 test_connect!("a-b", vec![s("a"), s("b")], "-");
1909 fn test_connect_for_different_lengths() {
1910 let empty: &[&str] = &[];
1911 test_connect!("", empty, "-");
1912 test_connect!("a", ["a"], "-");
1913 test_connect!("a-b", ["a", "b"], "-");
1914 test_connect!("-a-bc", ["", "a", "bc"], "-");
1919 assert_eq!("x".repeat(4), String::from_str("xxxx"));
1920 assert_eq!("hi".repeat(4), String::from_str("hihihihi"));
1921 assert_eq!("ไท华".repeat(3), String::from_str("ไท华ไท华ไท华"));
1922 assert_eq!("".repeat(4), String::from_str(""));
1923 assert_eq!("hi".repeat(0), String::from_str(""));
1927 fn test_unsafe_slice() {
1928 assert_eq!("ab", unsafe {raw::slice_bytes("abc", 0, 2)});
1929 assert_eq!("bc", unsafe {raw::slice_bytes("abc", 1, 3)});
1930 assert_eq!("", unsafe {raw::slice_bytes("abc", 1, 1)});
1931 fn a_million_letter_a() -> String {
1933 let mut rs = String::new();
1935 rs.push_str("aaaaaaaaaa");
1940 fn half_a_million_letter_a() -> String {
1942 let mut rs = String::new();
1944 rs.push_str("aaaaa");
1949 let letters = a_million_letter_a();
1950 assert!(half_a_million_letter_a() ==
1951 unsafe {String::from_str(raw::slice_bytes(letters.as_slice(),
1957 fn test_starts_with() {
1958 assert!(("".starts_with("")));
1959 assert!(("abc".starts_with("")));
1960 assert!(("abc".starts_with("a")));
1961 assert!((!"a".starts_with("abc")));
1962 assert!((!"".starts_with("abc")));
1963 assert!((!"ödd".starts_with("-")));
1964 assert!(("ödd".starts_with("öd")));
1968 fn test_ends_with() {
1969 assert!(("".ends_with("")));
1970 assert!(("abc".ends_with("")));
1971 assert!(("abc".ends_with("c")));
1972 assert!((!"a".ends_with("abc")));
1973 assert!((!"".ends_with("abc")));
1974 assert!((!"ddö".ends_with("-")));
1975 assert!(("ddö".ends_with("dö")));
1979 fn test_is_empty() {
1980 assert!("".is_empty());
1981 assert!(!"a".is_empty());
1987 assert_eq!("".replace(a, "b"), String::from_str(""));
1988 assert_eq!("a".replace(a, "b"), String::from_str("b"));
1989 assert_eq!("ab".replace(a, "b"), String::from_str("bb"));
1991 assert!(" test test ".replace(test, "toast") ==
1992 String::from_str(" toast toast "));
1993 assert_eq!(" test test ".replace(test, ""), String::from_str(" "));
1997 fn test_replace_2a() {
1998 let data = "ประเทศไทย中华";
1999 let repl = "دولة الكويت";
2002 let a2 = "دولة الكويتทศไทย中华";
2003 assert_eq!(data.replace(a, repl), a2);
2007 fn test_replace_2b() {
2008 let data = "ประเทศไทย中华";
2009 let repl = "دولة الكويت";
2012 let b2 = "ปรدولة الكويتทศไทย中华";
2013 assert_eq!(data.replace(b, repl), b2);
2017 fn test_replace_2c() {
2018 let data = "ประเทศไทย中华";
2019 let repl = "دولة الكويت";
2022 let c2 = "ประเทศไทยدولة الكويت";
2023 assert_eq!(data.replace(c, repl), c2);
2027 fn test_replace_2d() {
2028 let data = "ประเทศไทย中华";
2029 let repl = "دولة الكويت";
2032 assert_eq!(data.replace(d, repl), data);
2037 assert_eq!("ab", "abc".slice(0, 2));
2038 assert_eq!("bc", "abc".slice(1, 3));
2039 assert_eq!("", "abc".slice(1, 1));
2040 assert_eq!("\u{65e5}", "\u{65e5}\u{672c}".slice(0, 3));
2042 let data = "ประเทศไทย中华";
2043 assert_eq!("ป", data.slice(0, 3));
2044 assert_eq!("ร", data.slice(3, 6));
2045 assert_eq!("", data.slice(3, 3));
2046 assert_eq!("华", data.slice(30, 33));
2048 fn a_million_letter_x() -> String {
2050 let mut rs = String::new();
2052 rs.push_str("华华华华华华华华华华");
2057 fn half_a_million_letter_x() -> String {
2059 let mut rs = String::new();
2061 rs.push_str("华华华华华");
2066 let letters = a_million_letter_x();
2067 assert!(half_a_million_letter_x() ==
2068 String::from_str(letters.slice(0u, 3u * 500000u)));
2073 let ss = "中华Việt Nam";
2075 assert_eq!("华", ss.slice(3u, 6u));
2076 assert_eq!("Việt Nam", ss.slice(6u, 16u));
2078 assert_eq!("ab", "abc".slice(0u, 2u));
2079 assert_eq!("bc", "abc".slice(1u, 3u));
2080 assert_eq!("", "abc".slice(1u, 1u));
2082 assert_eq!("中", ss.slice(0u, 3u));
2083 assert_eq!("华V", ss.slice(3u, 7u));
2084 assert_eq!("", ss.slice(3u, 3u));
2099 fn test_slice_fail() {
2100 "中华Việt Nam".slice(0u, 2u);
2104 fn test_slice_from() {
2105 assert_eq!("abcd".slice_from(0), "abcd");
2106 assert_eq!("abcd".slice_from(2), "cd");
2107 assert_eq!("abcd".slice_from(4), "");
2110 fn test_slice_to() {
2111 assert_eq!("abcd".slice_to(0), "");
2112 assert_eq!("abcd".slice_to(2), "ab");
2113 assert_eq!("abcd".slice_to(4), "abcd");
2117 fn test_trim_left_chars() {
2118 let v: &[char] = &[];
2119 assert_eq!(" *** foo *** ".trim_left_chars(v), " *** foo *** ");
2120 let chars: &[char] = &['*', ' '];
2121 assert_eq!(" *** foo *** ".trim_left_chars(chars), "foo *** ");
2122 assert_eq!(" *** *** ".trim_left_chars(chars), "");
2123 assert_eq!("foo *** ".trim_left_chars(chars), "foo *** ");
2125 assert_eq!("11foo1bar11".trim_left_chars('1'), "foo1bar11");
2126 let chars: &[char] = &['1', '2'];
2127 assert_eq!("12foo1bar12".trim_left_chars(chars), "foo1bar12");
2128 assert_eq!("123foo1bar123".trim_left_chars(|&: c: char| c.is_numeric()), "foo1bar123");
2132 fn test_trim_right_chars() {
2133 let v: &[char] = &[];
2134 assert_eq!(" *** foo *** ".trim_right_chars(v), " *** foo *** ");
2135 let chars: &[char] = &['*', ' '];
2136 assert_eq!(" *** foo *** ".trim_right_chars(chars), " *** foo");
2137 assert_eq!(" *** *** ".trim_right_chars(chars), "");
2138 assert_eq!(" *** foo".trim_right_chars(chars), " *** foo");
2140 assert_eq!("11foo1bar11".trim_right_chars('1'), "11foo1bar");
2141 let chars: &[char] = &['1', '2'];
2142 assert_eq!("12foo1bar12".trim_right_chars(chars), "12foo1bar");
2143 assert_eq!("123foo1bar123".trim_right_chars(|&: c: char| c.is_numeric()), "123foo1bar");
2147 fn test_trim_chars() {
2148 let v: &[char] = &[];
2149 assert_eq!(" *** foo *** ".trim_chars(v), " *** foo *** ");
2150 let chars: &[char] = &['*', ' '];
2151 assert_eq!(" *** foo *** ".trim_chars(chars), "foo");
2152 assert_eq!(" *** *** ".trim_chars(chars), "");
2153 assert_eq!("foo".trim_chars(chars), "foo");
2155 assert_eq!("11foo1bar11".trim_chars('1'), "foo1bar");
2156 let chars: &[char] = &['1', '2'];
2157 assert_eq!("12foo1bar12".trim_chars(chars), "foo1bar");
2158 assert_eq!("123foo1bar123".trim_chars(|&: c: char| c.is_numeric()), "foo1bar");
2162 fn test_trim_left() {
2163 assert_eq!("".trim_left(), "");
2164 assert_eq!("a".trim_left(), "a");
2165 assert_eq!(" ".trim_left(), "");
2166 assert_eq!(" blah".trim_left(), "blah");
2167 assert_eq!(" \u{3000} wut".trim_left(), "wut");
2168 assert_eq!("hey ".trim_left(), "hey ");
2172 fn test_trim_right() {
2173 assert_eq!("".trim_right(), "");
2174 assert_eq!("a".trim_right(), "a");
2175 assert_eq!(" ".trim_right(), "");
2176 assert_eq!("blah ".trim_right(), "blah");
2177 assert_eq!("wut \u{3000} ".trim_right(), "wut");
2178 assert_eq!(" hey".trim_right(), " hey");
2183 assert_eq!("".trim(), "");
2184 assert_eq!("a".trim(), "a");
2185 assert_eq!(" ".trim(), "");
2186 assert_eq!(" blah ".trim(), "blah");
2187 assert_eq!("\nwut \u{3000} ".trim(), "wut");
2188 assert_eq!(" hey dude ".trim(), "hey dude");
2192 fn test_is_whitespace() {
2193 assert!("".is_whitespace());
2194 assert!(" ".is_whitespace());
2195 assert!("\u{2009}".is_whitespace()); // Thin space
2196 assert!(" \n\t ".is_whitespace());
2197 assert!(!" _ ".is_whitespace());
2201 fn test_slice_shift_char() {
2202 let data = "ประเทศไทย中";
2203 assert_eq!(data.slice_shift_char(), Some(('ป', "ระเทศไทย中")));
2207 fn test_slice_shift_char_2() {
2209 assert_eq!(empty.slice_shift_char(), None);
2214 // deny overlong encodings
2215 assert!(!is_utf8(&[0xc0, 0x80]));
2216 assert!(!is_utf8(&[0xc0, 0xae]));
2217 assert!(!is_utf8(&[0xe0, 0x80, 0x80]));
2218 assert!(!is_utf8(&[0xe0, 0x80, 0xaf]));
2219 assert!(!is_utf8(&[0xe0, 0x81, 0x81]));
2220 assert!(!is_utf8(&[0xf0, 0x82, 0x82, 0xac]));
2221 assert!(!is_utf8(&[0xf4, 0x90, 0x80, 0x80]));
2224 assert!(!is_utf8(&[0xED, 0xA0, 0x80]));
2225 assert!(!is_utf8(&[0xED, 0xBF, 0xBF]));
2227 assert!(is_utf8(&[0xC2, 0x80]));
2228 assert!(is_utf8(&[0xDF, 0xBF]));
2229 assert!(is_utf8(&[0xE0, 0xA0, 0x80]));
2230 assert!(is_utf8(&[0xED, 0x9F, 0xBF]));
2231 assert!(is_utf8(&[0xEE, 0x80, 0x80]));
2232 assert!(is_utf8(&[0xEF, 0xBF, 0xBF]));
2233 assert!(is_utf8(&[0xF0, 0x90, 0x80, 0x80]));
2234 assert!(is_utf8(&[0xF4, 0x8F, 0xBF, 0xBF]));
2238 fn test_is_utf16() {
2239 use unicode::str::is_utf16;
2240 macro_rules! pos ( ($($e:expr),*) => { { $(assert!(is_utf16($e));)* } });
2248 // surrogate pairs (randomly generated with Python 3's
2249 // .encode('utf-16be'))
2250 pos!(&[0xdb54, 0xdf16, 0xd880, 0xdee0, 0xdb6a, 0xdd45],
2251 &[0xd91f, 0xdeb1, 0xdb31, 0xdd84, 0xd8e2, 0xde14],
2252 &[0xdb9f, 0xdc26, 0xdb6f, 0xde58, 0xd850, 0xdfae]);
2254 // mixtures (also random)
2255 pos!(&[0xd921, 0xdcc2, 0x002d, 0x004d, 0xdb32, 0xdf65],
2256 &[0xdb45, 0xdd2d, 0x006a, 0xdacd, 0xddfe, 0x0006],
2257 &[0x0067, 0xd8ff, 0xddb7, 0x000f, 0xd900, 0xdc80]);
2260 macro_rules! neg ( ($($e:expr),*) => { { $(assert!(!is_utf16($e));)* } });
2263 // surrogate + regular unit
2265 // surrogate + lead surrogate
2267 // unterminated surrogate
2269 // trail surrogate without a lead
2272 // random byte sequences that Python 3's .decode('utf-16be')
2274 neg!(&[0x5b3d, 0x0141, 0xde9e, 0x8fdc, 0xc6e7],
2275 &[0xdf5a, 0x82a5, 0x62b9, 0xb447, 0x92f3],
2276 &[0xda4e, 0x42bc, 0x4462, 0xee98, 0xc2ca],
2277 &[0xbe00, 0xb04a, 0x6ecb, 0xdd89, 0xe278],
2278 &[0x0465, 0xab56, 0xdbb6, 0xa893, 0x665e],
2279 &[0x6b7f, 0x0a19, 0x40f4, 0xa657, 0xdcc5],
2280 &[0x9b50, 0xda5e, 0x24ec, 0x03ad, 0x6dee],
2281 &[0x8d17, 0xcaa7, 0xf4ae, 0xdf6e, 0xbed7],
2282 &[0xdaee, 0x2584, 0x7d30, 0xa626, 0x121a],
2283 &[0xd956, 0x4b43, 0x7570, 0xccd6, 0x4f4a],
2284 &[0x9dcf, 0x1b49, 0x4ba5, 0xfce9, 0xdffe],
2285 &[0x6572, 0xce53, 0xb05a, 0xf6af, 0xdacf],
2286 &[0x1b90, 0x728c, 0x9906, 0xdb68, 0xf46e],
2287 &[0x1606, 0xbeca, 0xbe76, 0x860f, 0xdfa5],
2288 &[0x8b4f, 0xde7a, 0xd220, 0x9fac, 0x2b6f],
2289 &[0xb8fe, 0xebbe, 0xda32, 0x1a5f, 0x8b8b],
2290 &[0x934b, 0x8956, 0xc434, 0x1881, 0xddf7],
2291 &[0x5a95, 0x13fc, 0xf116, 0xd89b, 0x93f9],
2292 &[0xd640, 0x71f1, 0xdd7d, 0x77eb, 0x1cd8],
2293 &[0x348b, 0xaef0, 0xdb2c, 0xebf1, 0x1282],
2294 &[0x50d7, 0xd824, 0x5010, 0xb369, 0x22ea]);
2298 fn test_as_bytes() {
2301 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
2302 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
2306 assert_eq!("".as_bytes(), b);
2307 assert_eq!("abc".as_bytes(), b"abc");
2308 assert_eq!("ศไทย中华Việt Nam".as_bytes(), v);
2313 fn test_as_bytes_fail() {
2314 // Don't double free. (I'm not sure if this exercises the
2315 // original problem code path anymore.)
2316 let s = String::from_str("");
2317 let _bytes = s.as_bytes();
2323 let buf = "hello".as_ptr();
2325 assert_eq!(*buf.offset(0), b'h');
2326 assert_eq!(*buf.offset(1), b'e');
2327 assert_eq!(*buf.offset(2), b'l');
2328 assert_eq!(*buf.offset(3), b'l');
2329 assert_eq!(*buf.offset(4), b'o');
2334 fn test_subslice_offset() {
2335 let a = "kernelsprite";
2336 let b = a.slice(7, a.len());
2337 let c = a.slice(0, a.len() - 6);
2338 assert_eq!(a.subslice_offset(b), 7);
2339 assert_eq!(a.subslice_offset(c), 0);
2341 let string = "a\nb\nc";
2342 let lines: Vec<&str> = string.lines().collect();
2343 assert_eq!(string.subslice_offset(lines[0]), 0);
2344 assert_eq!(string.subslice_offset(lines[1]), 2);
2345 assert_eq!(string.subslice_offset(lines[2]), 4);
2350 fn test_subslice_offset_2() {
2351 let a = "alchemiter";
2352 let b = "cruxtruder";
2353 a.subslice_offset(b);
2357 fn vec_str_conversions() {
2358 let s1: String = String::from_str("All mimsy were the borogoves");
2360 let v: Vec<u8> = s1.as_bytes().to_vec();
2361 let s2: String = String::from_str(from_utf8(v.as_slice()).unwrap());
2362 let mut i: uint = 0u;
2363 let n1: uint = s1.len();
2364 let n2: uint = v.len();
2367 let a: u8 = s1.as_bytes()[i];
2368 let b: u8 = s2.as_bytes()[i];
2377 fn test_contains() {
2378 assert!("abcde".contains("bcd"));
2379 assert!("abcde".contains("abcd"));
2380 assert!("abcde".contains("bcde"));
2381 assert!("abcde".contains(""));
2382 assert!("".contains(""));
2383 assert!(!"abcde".contains("def"));
2384 assert!(!"".contains("a"));
2386 let data = "ประเทศไทย中华Việt Nam";
2387 assert!(data.contains("ประเ"));
2388 assert!(data.contains("ะเ"));
2389 assert!(data.contains("中华"));
2390 assert!(!data.contains("ไท华"));
2394 fn test_contains_char() {
2395 assert!("abc".contains_char('b'));
2396 assert!("a".contains_char('a'));
2397 assert!(!"abc".contains_char('d'));
2398 assert!(!"".contains_char('a'));
2403 let s = "ศไทย中华Việt Nam";
2404 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2406 for ch in v.iter() {
2407 assert!(s.char_at(pos) == *ch);
2408 pos += String::from_char(1, *ch).len();
2413 fn test_char_at_reverse() {
2414 let s = "ศไทย中华Việt Nam";
2415 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2416 let mut pos = s.len();
2417 for ch in v.iter().rev() {
2418 assert!(s.char_at_reverse(pos) == *ch);
2419 pos -= String::from_char(1, *ch).len();
2424 fn test_escape_unicode() {
2425 assert_eq!("abc".escape_unicode(),
2426 String::from_str("\\u{61}\\u{62}\\u{63}"));
2427 assert_eq!("a c".escape_unicode(),
2428 String::from_str("\\u{61}\\u{20}\\u{63}"));
2429 assert_eq!("\r\n\t".escape_unicode(),
2430 String::from_str("\\u{d}\\u{a}\\u{9}"));
2431 assert_eq!("'\"\\".escape_unicode(),
2432 String::from_str("\\u{27}\\u{22}\\u{5c}"));
2433 assert_eq!("\x00\x01\u{fe}\u{ff}".escape_unicode(),
2434 String::from_str("\\u{0}\\u{1}\\u{fe}\\u{ff}"));
2435 assert_eq!("\u{100}\u{ffff}".escape_unicode(),
2436 String::from_str("\\u{100}\\u{ffff}"));
2437 assert_eq!("\u{10000}\u{10ffff}".escape_unicode(),
2438 String::from_str("\\u{10000}\\u{10ffff}"));
2439 assert_eq!("ab\u{fb00}".escape_unicode(),
2440 String::from_str("\\u{61}\\u{62}\\u{fb00}"));
2441 assert_eq!("\u{1d4ea}\r".escape_unicode(),
2442 String::from_str("\\u{1d4ea}\\u{d}"));
2446 fn test_escape_default() {
2447 assert_eq!("abc".escape_default(), String::from_str("abc"));
2448 assert_eq!("a c".escape_default(), String::from_str("a c"));
2449 assert_eq!("\r\n\t".escape_default(), String::from_str("\\r\\n\\t"));
2450 assert_eq!("'\"\\".escape_default(), String::from_str("\\'\\\"\\\\"));
2451 assert_eq!("\u{100}\u{ffff}".escape_default(),
2452 String::from_str("\\u{100}\\u{ffff}"));
2453 assert_eq!("\u{10000}\u{10ffff}".escape_default(),
2454 String::from_str("\\u{10000}\\u{10ffff}"));
2455 assert_eq!("ab\u{fb00}".escape_default(),
2456 String::from_str("ab\\u{fb00}"));
2457 assert_eq!("\u{1d4ea}\r".escape_default(),
2458 String::from_str("\\u{1d4ea}\\r"));
2462 fn test_total_ord() {
2463 "1234".cmp("123") == Greater;
2464 "123".cmp("1234") == Less;
2465 "1234".cmp("1234") == Equal;
2466 "12345555".cmp("123456") == Less;
2467 "22".cmp("1234") == Greater;
2471 fn test_char_range_at() {
2472 let data = "b¢€𤭢𤭢€¢b";
2473 assert_eq!('b', data.char_range_at(0).ch);
2474 assert_eq!('¢', data.char_range_at(1).ch);
2475 assert_eq!('€', data.char_range_at(3).ch);
2476 assert_eq!('𤭢', data.char_range_at(6).ch);
2477 assert_eq!('𤭢', data.char_range_at(10).ch);
2478 assert_eq!('€', data.char_range_at(14).ch);
2479 assert_eq!('¢', data.char_range_at(17).ch);
2480 assert_eq!('b', data.char_range_at(19).ch);
2484 fn test_char_range_at_reverse_underflow() {
2485 assert_eq!("abc".char_range_at_reverse(0).next, 0);
2489 fn test_iterator() {
2490 let s = "ศไทย中华Việt Nam";
2491 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2494 let mut it = s.chars();
2497 assert_eq!(c, v[pos]);
2500 assert_eq!(pos, v.len());
2504 fn test_rev_iterator() {
2505 let s = "ศไทย中华Việt Nam";
2506 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
2509 let mut it = s.chars().rev();
2512 assert_eq!(c, v[pos]);
2515 assert_eq!(pos, v.len());
2519 fn test_chars_decoding() {
2520 let mut bytes = [0u8, ..4];
2521 for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
2522 let len = c.encode_utf8(&mut bytes).unwrap_or(0);
2523 let s = ::core::str::from_utf8(bytes[..len]).unwrap();
2524 if Some(c) != s.chars().next() {
2525 panic!("character {:x}={} does not decode correctly", c as u32, c);
2531 fn test_chars_rev_decoding() {
2532 let mut bytes = [0u8, ..4];
2533 for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
2534 let len = c.encode_utf8(&mut bytes).unwrap_or(0);
2535 let s = ::core::str::from_utf8(bytes[..len]).unwrap();
2536 if Some(c) != s.chars().rev().next() {
2537 panic!("character {:x}={} does not decode correctly", c as u32, c);
2543 fn test_iterator_clone() {
2544 let s = "ศไทย中华Việt Nam";
2545 let mut it = s.chars();
2547 assert!(it.zip(it.clone()).all(|(x,y)| x == y));
2551 fn test_bytesator() {
2552 let s = "ศไทย中华Việt Nam";
2554 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
2555 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
2560 for b in s.bytes() {
2561 assert_eq!(b, v[pos]);
2567 fn test_bytes_revator() {
2568 let s = "ศไทย中华Việt Nam";
2570 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
2571 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
2574 let mut pos = v.len();
2576 for b in s.bytes().rev() {
2578 assert_eq!(b, v[pos]);
2583 fn test_char_indicesator() {
2584 let s = "ศไทย中华Việt Nam";
2585 let p = [0, 3, 6, 9, 12, 15, 18, 19, 20, 23, 24, 25, 26, 27];
2586 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2589 let mut it = s.char_indices();
2592 assert_eq!(c, (p[pos], v[pos]));
2595 assert_eq!(pos, v.len());
2596 assert_eq!(pos, p.len());
2600 fn test_char_indices_revator() {
2601 let s = "ศไทย中华Việt Nam";
2602 let p = [27, 26, 25, 24, 23, 20, 19, 18, 15, 12, 9, 6, 3, 0];
2603 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
2606 let mut it = s.char_indices().rev();
2609 assert_eq!(c, (p[pos], v[pos]));
2612 assert_eq!(pos, v.len());
2613 assert_eq!(pos, p.len());
2617 fn test_splitn_char_iterator() {
2618 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
2620 let split: Vec<&str> = data.splitn(3, ' ').collect();
2621 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
2623 let split: Vec<&str> = data.splitn(3, |&: c: char| c == ' ').collect();
2624 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
2627 let split: Vec<&str> = data.splitn(3, 'ä').collect();
2628 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
2630 let split: Vec<&str> = data.splitn(3, |&: c: char| c == 'ä').collect();
2631 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
2635 fn test_split_char_iterator_no_trailing() {
2636 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
2638 let split: Vec<&str> = data.split('\n').collect();
2639 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
2641 let split: Vec<&str> = data.split_terminator('\n').collect();
2642 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
2647 let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n";
2648 let words: Vec<&str> = data.words().collect();
2649 assert_eq!(words, vec!["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
2653 fn test_nfd_chars() {
2655 ($input: expr, $expected: expr) => {
2656 assert_eq!($input.nfd_chars().collect::<String>(), $expected);
2660 t!("\u{1e0b}\u{1c4}", "d\u{307}\u{1c4}");
2661 t!("\u{2026}", "\u{2026}");
2662 t!("\u{2126}", "\u{3a9}");
2663 t!("\u{1e0b}\u{323}", "d\u{323}\u{307}");
2664 t!("\u{1e0d}\u{307}", "d\u{323}\u{307}");
2665 t!("a\u{301}", "a\u{301}");
2666 t!("\u{301}a", "\u{301}a");
2667 t!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}");
2668 t!("\u{ac1c}", "\u{1100}\u{1162}");
2672 fn test_nfkd_chars() {
2674 ($input: expr, $expected: expr) => {
2675 assert_eq!($input.nfkd_chars().collect::<String>(), $expected);
2679 t!("\u{1e0b}\u{1c4}", "d\u{307}DZ\u{30c}");
2680 t!("\u{2026}", "...");
2681 t!("\u{2126}", "\u{3a9}");
2682 t!("\u{1e0b}\u{323}", "d\u{323}\u{307}");
2683 t!("\u{1e0d}\u{307}", "d\u{323}\u{307}");
2684 t!("a\u{301}", "a\u{301}");
2685 t!("\u{301}a", "\u{301}a");
2686 t!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}");
2687 t!("\u{ac1c}", "\u{1100}\u{1162}");
2691 fn test_nfc_chars() {
2693 ($input: expr, $expected: expr) => {
2694 assert_eq!($input.nfc_chars().collect::<String>(), $expected);
2698 t!("\u{1e0b}\u{1c4}", "\u{1e0b}\u{1c4}");
2699 t!("\u{2026}", "\u{2026}");
2700 t!("\u{2126}", "\u{3a9}");
2701 t!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}");
2702 t!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}");
2703 t!("a\u{301}", "\u{e1}");
2704 t!("\u{301}a", "\u{301}a");
2705 t!("\u{d4db}", "\u{d4db}");
2706 t!("\u{ac1c}", "\u{ac1c}");
2707 t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
2711 fn test_nfkc_chars() {
2713 ($input: expr, $expected: expr) => {
2714 assert_eq!($input.nfkc_chars().collect::<String>(), $expected);
2718 t!("\u{1e0b}\u{1c4}", "\u{1e0b}D\u{17d}");
2719 t!("\u{2026}", "...");
2720 t!("\u{2126}", "\u{3a9}");
2721 t!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}");
2722 t!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}");
2723 t!("a\u{301}", "\u{e1}");
2724 t!("\u{301}a", "\u{301}a");
2725 t!("\u{d4db}", "\u{d4db}");
2726 t!("\u{ac1c}", "\u{ac1c}");
2727 t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
2732 let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
2733 let lines: Vec<&str> = data.lines().collect();
2734 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2736 let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
2737 let lines: Vec<&str> = data.lines().collect();
2738 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2742 fn test_graphemes() {
2743 use core::iter::order;
2744 // official Unicode test data
2745 // from http://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt
2746 let test_same: [(_, &[_]), .. 325] = [
2747 ("\u{20}\u{20}", &["\u{20}", "\u{20}"]),
2748 ("\u{20}\u{308}\u{20}", &["\u{20}\u{308}", "\u{20}"]),
2749 ("\u{20}\u{D}", &["\u{20}", "\u{D}"]),
2750 ("\u{20}\u{308}\u{D}", &["\u{20}\u{308}", "\u{D}"]),
2751 ("\u{20}\u{A}", &["\u{20}", "\u{A}"]),
2752 ("\u{20}\u{308}\u{A}", &["\u{20}\u{308}", "\u{A}"]),
2753 ("\u{20}\u{1}", &["\u{20}", "\u{1}"]),
2754 ("\u{20}\u{308}\u{1}", &["\u{20}\u{308}", "\u{1}"]),
2755 ("\u{20}\u{300}", &["\u{20}\u{300}"]),
2756 ("\u{20}\u{308}\u{300}", &["\u{20}\u{308}\u{300}"]),
2757 ("\u{20}\u{1100}", &["\u{20}", "\u{1100}"]),
2758 ("\u{20}\u{308}\u{1100}", &["\u{20}\u{308}", "\u{1100}"]),
2759 ("\u{20}\u{1160}", &["\u{20}", "\u{1160}"]),
2760 ("\u{20}\u{308}\u{1160}", &["\u{20}\u{308}", "\u{1160}"]),
2761 ("\u{20}\u{11A8}", &["\u{20}", "\u{11A8}"]),
2762 ("\u{20}\u{308}\u{11A8}", &["\u{20}\u{308}", "\u{11A8}"]),
2763 ("\u{20}\u{AC00}", &["\u{20}", "\u{AC00}"]),
2764 ("\u{20}\u{308}\u{AC00}", &["\u{20}\u{308}", "\u{AC00}"]),
2765 ("\u{20}\u{AC01}", &["\u{20}", "\u{AC01}"]),
2766 ("\u{20}\u{308}\u{AC01}", &["\u{20}\u{308}", "\u{AC01}"]),
2767 ("\u{20}\u{1F1E6}", &["\u{20}", "\u{1F1E6}"]),
2768 ("\u{20}\u{308}\u{1F1E6}", &["\u{20}\u{308}", "\u{1F1E6}"]),
2769 ("\u{20}\u{378}", &["\u{20}", "\u{378}"]),
2770 ("\u{20}\u{308}\u{378}", &["\u{20}\u{308}", "\u{378}"]),
2771 ("\u{D}\u{20}", &["\u{D}", "\u{20}"]),
2772 ("\u{D}\u{308}\u{20}", &["\u{D}", "\u{308}", "\u{20}"]),
2773 ("\u{D}\u{D}", &["\u{D}", "\u{D}"]),
2774 ("\u{D}\u{308}\u{D}", &["\u{D}", "\u{308}", "\u{D}"]),
2775 ("\u{D}\u{A}", &["\u{D}\u{A}"]),
2776 ("\u{D}\u{308}\u{A}", &["\u{D}", "\u{308}", "\u{A}"]),
2777 ("\u{D}\u{1}", &["\u{D}", "\u{1}"]),
2778 ("\u{D}\u{308}\u{1}", &["\u{D}", "\u{308}", "\u{1}"]),
2779 ("\u{D}\u{300}", &["\u{D}", "\u{300}"]),
2780 ("\u{D}\u{308}\u{300}", &["\u{D}", "\u{308}\u{300}"]),
2781 ("\u{D}\u{903}", &["\u{D}", "\u{903}"]),
2782 ("\u{D}\u{1100}", &["\u{D}", "\u{1100}"]),
2783 ("\u{D}\u{308}\u{1100}", &["\u{D}", "\u{308}", "\u{1100}"]),
2784 ("\u{D}\u{1160}", &["\u{D}", "\u{1160}"]),
2785 ("\u{D}\u{308}\u{1160}", &["\u{D}", "\u{308}", "\u{1160}"]),
2786 ("\u{D}\u{11A8}", &["\u{D}", "\u{11A8}"]),
2787 ("\u{D}\u{308}\u{11A8}", &["\u{D}", "\u{308}", "\u{11A8}"]),
2788 ("\u{D}\u{AC00}", &["\u{D}", "\u{AC00}"]),
2789 ("\u{D}\u{308}\u{AC00}", &["\u{D}", "\u{308}", "\u{AC00}"]),
2790 ("\u{D}\u{AC01}", &["\u{D}", "\u{AC01}"]),
2791 ("\u{D}\u{308}\u{AC01}", &["\u{D}", "\u{308}", "\u{AC01}"]),
2792 ("\u{D}\u{1F1E6}", &["\u{D}", "\u{1F1E6}"]),
2793 ("\u{D}\u{308}\u{1F1E6}", &["\u{D}", "\u{308}", "\u{1F1E6}"]),
2794 ("\u{D}\u{378}", &["\u{D}", "\u{378}"]),
2795 ("\u{D}\u{308}\u{378}", &["\u{D}", "\u{308}", "\u{378}"]),
2796 ("\u{A}\u{20}", &["\u{A}", "\u{20}"]),
2797 ("\u{A}\u{308}\u{20}", &["\u{A}", "\u{308}", "\u{20}"]),
2798 ("\u{A}\u{D}", &["\u{A}", "\u{D}"]),
2799 ("\u{A}\u{308}\u{D}", &["\u{A}", "\u{308}", "\u{D}"]),
2800 ("\u{A}\u{A}", &["\u{A}", "\u{A}"]),
2801 ("\u{A}\u{308}\u{A}", &["\u{A}", "\u{308}", "\u{A}"]),
2802 ("\u{A}\u{1}", &["\u{A}", "\u{1}"]),
2803 ("\u{A}\u{308}\u{1}", &["\u{A}", "\u{308}", "\u{1}"]),
2804 ("\u{A}\u{300}", &["\u{A}", "\u{300}"]),
2805 ("\u{A}\u{308}\u{300}", &["\u{A}", "\u{308}\u{300}"]),
2806 ("\u{A}\u{903}", &["\u{A}", "\u{903}"]),
2807 ("\u{A}\u{1100}", &["\u{A}", "\u{1100}"]),
2808 ("\u{A}\u{308}\u{1100}", &["\u{A}", "\u{308}", "\u{1100}"]),
2809 ("\u{A}\u{1160}", &["\u{A}", "\u{1160}"]),
2810 ("\u{A}\u{308}\u{1160}", &["\u{A}", "\u{308}", "\u{1160}"]),
2811 ("\u{A}\u{11A8}", &["\u{A}", "\u{11A8}"]),
2812 ("\u{A}\u{308}\u{11A8}", &["\u{A}", "\u{308}", "\u{11A8}"]),
2813 ("\u{A}\u{AC00}", &["\u{A}", "\u{AC00}"]),
2814 ("\u{A}\u{308}\u{AC00}", &["\u{A}", "\u{308}", "\u{AC00}"]),
2815 ("\u{A}\u{AC01}", &["\u{A}", "\u{AC01}"]),
2816 ("\u{A}\u{308}\u{AC01}", &["\u{A}", "\u{308}", "\u{AC01}"]),
2817 ("\u{A}\u{1F1E6}", &["\u{A}", "\u{1F1E6}"]),
2818 ("\u{A}\u{308}\u{1F1E6}", &["\u{A}", "\u{308}", "\u{1F1E6}"]),
2819 ("\u{A}\u{378}", &["\u{A}", "\u{378}"]),
2820 ("\u{A}\u{308}\u{378}", &["\u{A}", "\u{308}", "\u{378}"]),
2821 ("\u{1}\u{20}", &["\u{1}", "\u{20}"]),
2822 ("\u{1}\u{308}\u{20}", &["\u{1}", "\u{308}", "\u{20}"]),
2823 ("\u{1}\u{D}", &["\u{1}", "\u{D}"]),
2824 ("\u{1}\u{308}\u{D}", &["\u{1}", "\u{308}", "\u{D}"]),
2825 ("\u{1}\u{A}", &["\u{1}", "\u{A}"]),
2826 ("\u{1}\u{308}\u{A}", &["\u{1}", "\u{308}", "\u{A}"]),
2827 ("\u{1}\u{1}", &["\u{1}", "\u{1}"]),
2828 ("\u{1}\u{308}\u{1}", &["\u{1}", "\u{308}", "\u{1}"]),
2829 ("\u{1}\u{300}", &["\u{1}", "\u{300}"]),
2830 ("\u{1}\u{308}\u{300}", &["\u{1}", "\u{308}\u{300}"]),
2831 ("\u{1}\u{903}", &["\u{1}", "\u{903}"]),
2832 ("\u{1}\u{1100}", &["\u{1}", "\u{1100}"]),
2833 ("\u{1}\u{308}\u{1100}", &["\u{1}", "\u{308}", "\u{1100}"]),
2834 ("\u{1}\u{1160}", &["\u{1}", "\u{1160}"]),
2835 ("\u{1}\u{308}\u{1160}", &["\u{1}", "\u{308}", "\u{1160}"]),
2836 ("\u{1}\u{11A8}", &["\u{1}", "\u{11A8}"]),
2837 ("\u{1}\u{308}\u{11A8}", &["\u{1}", "\u{308}", "\u{11A8}"]),
2838 ("\u{1}\u{AC00}", &["\u{1}", "\u{AC00}"]),
2839 ("\u{1}\u{308}\u{AC00}", &["\u{1}", "\u{308}", "\u{AC00}"]),
2840 ("\u{1}\u{AC01}", &["\u{1}", "\u{AC01}"]),
2841 ("\u{1}\u{308}\u{AC01}", &["\u{1}", "\u{308}", "\u{AC01}"]),
2842 ("\u{1}\u{1F1E6}", &["\u{1}", "\u{1F1E6}"]),
2843 ("\u{1}\u{308}\u{1F1E6}", &["\u{1}", "\u{308}", "\u{1F1E6}"]),
2844 ("\u{1}\u{378}", &["\u{1}", "\u{378}"]),
2845 ("\u{1}\u{308}\u{378}", &["\u{1}", "\u{308}", "\u{378}"]),
2846 ("\u{300}\u{20}", &["\u{300}", "\u{20}"]),
2847 ("\u{300}\u{308}\u{20}", &["\u{300}\u{308}", "\u{20}"]),
2848 ("\u{300}\u{D}", &["\u{300}", "\u{D}"]),
2849 ("\u{300}\u{308}\u{D}", &["\u{300}\u{308}", "\u{D}"]),
2850 ("\u{300}\u{A}", &["\u{300}", "\u{A}"]),
2851 ("\u{300}\u{308}\u{A}", &["\u{300}\u{308}", "\u{A}"]),
2852 ("\u{300}\u{1}", &["\u{300}", "\u{1}"]),
2853 ("\u{300}\u{308}\u{1}", &["\u{300}\u{308}", "\u{1}"]),
2854 ("\u{300}\u{300}", &["\u{300}\u{300}"]),
2855 ("\u{300}\u{308}\u{300}", &["\u{300}\u{308}\u{300}"]),
2856 ("\u{300}\u{1100}", &["\u{300}", "\u{1100}"]),
2857 ("\u{300}\u{308}\u{1100}", &["\u{300}\u{308}", "\u{1100}"]),
2858 ("\u{300}\u{1160}", &["\u{300}", "\u{1160}"]),
2859 ("\u{300}\u{308}\u{1160}", &["\u{300}\u{308}", "\u{1160}"]),
2860 ("\u{300}\u{11A8}", &["\u{300}", "\u{11A8}"]),
2861 ("\u{300}\u{308}\u{11A8}", &["\u{300}\u{308}", "\u{11A8}"]),
2862 ("\u{300}\u{AC00}", &["\u{300}", "\u{AC00}"]),
2863 ("\u{300}\u{308}\u{AC00}", &["\u{300}\u{308}", "\u{AC00}"]),
2864 ("\u{300}\u{AC01}", &["\u{300}", "\u{AC01}"]),
2865 ("\u{300}\u{308}\u{AC01}", &["\u{300}\u{308}", "\u{AC01}"]),
2866 ("\u{300}\u{1F1E6}", &["\u{300}", "\u{1F1E6}"]),
2867 ("\u{300}\u{308}\u{1F1E6}", &["\u{300}\u{308}", "\u{1F1E6}"]),
2868 ("\u{300}\u{378}", &["\u{300}", "\u{378}"]),
2869 ("\u{300}\u{308}\u{378}", &["\u{300}\u{308}", "\u{378}"]),
2870 ("\u{903}\u{20}", &["\u{903}", "\u{20}"]),
2871 ("\u{903}\u{308}\u{20}", &["\u{903}\u{308}", "\u{20}"]),
2872 ("\u{903}\u{D}", &["\u{903}", "\u{D}"]),
2873 ("\u{903}\u{308}\u{D}", &["\u{903}\u{308}", "\u{D}"]),
2874 ("\u{903}\u{A}", &["\u{903}", "\u{A}"]),
2875 ("\u{903}\u{308}\u{A}", &["\u{903}\u{308}", "\u{A}"]),
2876 ("\u{903}\u{1}", &["\u{903}", "\u{1}"]),
2877 ("\u{903}\u{308}\u{1}", &["\u{903}\u{308}", "\u{1}"]),
2878 ("\u{903}\u{300}", &["\u{903}\u{300}"]),
2879 ("\u{903}\u{308}\u{300}", &["\u{903}\u{308}\u{300}"]),
2880 ("\u{903}\u{1100}", &["\u{903}", "\u{1100}"]),
2881 ("\u{903}\u{308}\u{1100}", &["\u{903}\u{308}", "\u{1100}"]),
2882 ("\u{903}\u{1160}", &["\u{903}", "\u{1160}"]),
2883 ("\u{903}\u{308}\u{1160}", &["\u{903}\u{308}", "\u{1160}"]),
2884 ("\u{903}\u{11A8}", &["\u{903}", "\u{11A8}"]),
2885 ("\u{903}\u{308}\u{11A8}", &["\u{903}\u{308}", "\u{11A8}"]),
2886 ("\u{903}\u{AC00}", &["\u{903}", "\u{AC00}"]),
2887 ("\u{903}\u{308}\u{AC00}", &["\u{903}\u{308}", "\u{AC00}"]),
2888 ("\u{903}\u{AC01}", &["\u{903}", "\u{AC01}"]),
2889 ("\u{903}\u{308}\u{AC01}", &["\u{903}\u{308}", "\u{AC01}"]),
2890 ("\u{903}\u{1F1E6}", &["\u{903}", "\u{1F1E6}"]),
2891 ("\u{903}\u{308}\u{1F1E6}", &["\u{903}\u{308}", "\u{1F1E6}"]),
2892 ("\u{903}\u{378}", &["\u{903}", "\u{378}"]),
2893 ("\u{903}\u{308}\u{378}", &["\u{903}\u{308}", "\u{378}"]),
2894 ("\u{1100}\u{20}", &["\u{1100}", "\u{20}"]),
2895 ("\u{1100}\u{308}\u{20}", &["\u{1100}\u{308}", "\u{20}"]),
2896 ("\u{1100}\u{D}", &["\u{1100}", "\u{D}"]),
2897 ("\u{1100}\u{308}\u{D}", &["\u{1100}\u{308}", "\u{D}"]),
2898 ("\u{1100}\u{A}", &["\u{1100}", "\u{A}"]),
2899 ("\u{1100}\u{308}\u{A}", &["\u{1100}\u{308}", "\u{A}"]),
2900 ("\u{1100}\u{1}", &["\u{1100}", "\u{1}"]),
2901 ("\u{1100}\u{308}\u{1}", &["\u{1100}\u{308}", "\u{1}"]),
2902 ("\u{1100}\u{300}", &["\u{1100}\u{300}"]),
2903 ("\u{1100}\u{308}\u{300}", &["\u{1100}\u{308}\u{300}"]),
2904 ("\u{1100}\u{1100}", &["\u{1100}\u{1100}"]),
2905 ("\u{1100}\u{308}\u{1100}", &["\u{1100}\u{308}", "\u{1100}"]),
2906 ("\u{1100}\u{1160}", &["\u{1100}\u{1160}"]),
2907 ("\u{1100}\u{308}\u{1160}", &["\u{1100}\u{308}", "\u{1160}"]),
2908 ("\u{1100}\u{11A8}", &["\u{1100}", "\u{11A8}"]),
2909 ("\u{1100}\u{308}\u{11A8}", &["\u{1100}\u{308}", "\u{11A8}"]),
2910 ("\u{1100}\u{AC00}", &["\u{1100}\u{AC00}"]),
2911 ("\u{1100}\u{308}\u{AC00}", &["\u{1100}\u{308}", "\u{AC00}"]),
2912 ("\u{1100}\u{AC01}", &["\u{1100}\u{AC01}"]),
2913 ("\u{1100}\u{308}\u{AC01}", &["\u{1100}\u{308}", "\u{AC01}"]),
2914 ("\u{1100}\u{1F1E6}", &["\u{1100}", "\u{1F1E6}"]),
2915 ("\u{1100}\u{308}\u{1F1E6}", &["\u{1100}\u{308}", "\u{1F1E6}"]),
2916 ("\u{1100}\u{378}", &["\u{1100}", "\u{378}"]),
2917 ("\u{1100}\u{308}\u{378}", &["\u{1100}\u{308}", "\u{378}"]),
2918 ("\u{1160}\u{20}", &["\u{1160}", "\u{20}"]),
2919 ("\u{1160}\u{308}\u{20}", &["\u{1160}\u{308}", "\u{20}"]),
2920 ("\u{1160}\u{D}", &["\u{1160}", "\u{D}"]),
2921 ("\u{1160}\u{308}\u{D}", &["\u{1160}\u{308}", "\u{D}"]),
2922 ("\u{1160}\u{A}", &["\u{1160}", "\u{A}"]),
2923 ("\u{1160}\u{308}\u{A}", &["\u{1160}\u{308}", "\u{A}"]),
2924 ("\u{1160}\u{1}", &["\u{1160}", "\u{1}"]),
2925 ("\u{1160}\u{308}\u{1}", &["\u{1160}\u{308}", "\u{1}"]),
2926 ("\u{1160}\u{300}", &["\u{1160}\u{300}"]),
2927 ("\u{1160}\u{308}\u{300}", &["\u{1160}\u{308}\u{300}"]),
2928 ("\u{1160}\u{1100}", &["\u{1160}", "\u{1100}"]),
2929 ("\u{1160}\u{308}\u{1100}", &["\u{1160}\u{308}", "\u{1100}"]),
2930 ("\u{1160}\u{1160}", &["\u{1160}\u{1160}"]),
2931 ("\u{1160}\u{308}\u{1160}", &["\u{1160}\u{308}", "\u{1160}"]),
2932 ("\u{1160}\u{11A8}", &["\u{1160}\u{11A8}"]),
2933 ("\u{1160}\u{308}\u{11A8}", &["\u{1160}\u{308}", "\u{11A8}"]),
2934 ("\u{1160}\u{AC00}", &["\u{1160}", "\u{AC00}"]),
2935 ("\u{1160}\u{308}\u{AC00}", &["\u{1160}\u{308}", "\u{AC00}"]),
2936 ("\u{1160}\u{AC01}", &["\u{1160}", "\u{AC01}"]),
2937 ("\u{1160}\u{308}\u{AC01}", &["\u{1160}\u{308}", "\u{AC01}"]),
2938 ("\u{1160}\u{1F1E6}", &["\u{1160}", "\u{1F1E6}"]),
2939 ("\u{1160}\u{308}\u{1F1E6}", &["\u{1160}\u{308}", "\u{1F1E6}"]),
2940 ("\u{1160}\u{378}", &["\u{1160}", "\u{378}"]),
2941 ("\u{1160}\u{308}\u{378}", &["\u{1160}\u{308}", "\u{378}"]),
2942 ("\u{11A8}\u{20}", &["\u{11A8}", "\u{20}"]),
2943 ("\u{11A8}\u{308}\u{20}", &["\u{11A8}\u{308}", "\u{20}"]),
2944 ("\u{11A8}\u{D}", &["\u{11A8}", "\u{D}"]),
2945 ("\u{11A8}\u{308}\u{D}", &["\u{11A8}\u{308}", "\u{D}"]),
2946 ("\u{11A8}\u{A}", &["\u{11A8}", "\u{A}"]),
2947 ("\u{11A8}\u{308}\u{A}", &["\u{11A8}\u{308}", "\u{A}"]),
2948 ("\u{11A8}\u{1}", &["\u{11A8}", "\u{1}"]),
2949 ("\u{11A8}\u{308}\u{1}", &["\u{11A8}\u{308}", "\u{1}"]),
2950 ("\u{11A8}\u{300}", &["\u{11A8}\u{300}"]),
2951 ("\u{11A8}\u{308}\u{300}", &["\u{11A8}\u{308}\u{300}"]),
2952 ("\u{11A8}\u{1100}", &["\u{11A8}", "\u{1100}"]),
2953 ("\u{11A8}\u{308}\u{1100}", &["\u{11A8}\u{308}", "\u{1100}"]),
2954 ("\u{11A8}\u{1160}", &["\u{11A8}", "\u{1160}"]),
2955 ("\u{11A8}\u{308}\u{1160}", &["\u{11A8}\u{308}", "\u{1160}"]),
2956 ("\u{11A8}\u{11A8}", &["\u{11A8}\u{11A8}"]),
2957 ("\u{11A8}\u{308}\u{11A8}", &["\u{11A8}\u{308}", "\u{11A8}"]),
2958 ("\u{11A8}\u{AC00}", &["\u{11A8}", "\u{AC00}"]),
2959 ("\u{11A8}\u{308}\u{AC00}", &["\u{11A8}\u{308}", "\u{AC00}"]),
2960 ("\u{11A8}\u{AC01}", &["\u{11A8}", "\u{AC01}"]),
2961 ("\u{11A8}\u{308}\u{AC01}", &["\u{11A8}\u{308}", "\u{AC01}"]),
2962 ("\u{11A8}\u{1F1E6}", &["\u{11A8}", "\u{1F1E6}"]),
2963 ("\u{11A8}\u{308}\u{1F1E6}", &["\u{11A8}\u{308}", "\u{1F1E6}"]),
2964 ("\u{11A8}\u{378}", &["\u{11A8}", "\u{378}"]),
2965 ("\u{11A8}\u{308}\u{378}", &["\u{11A8}\u{308}", "\u{378}"]),
2966 ("\u{AC00}\u{20}", &["\u{AC00}", "\u{20}"]),
2967 ("\u{AC00}\u{308}\u{20}", &["\u{AC00}\u{308}", "\u{20}"]),
2968 ("\u{AC00}\u{D}", &["\u{AC00}", "\u{D}"]),
2969 ("\u{AC00}\u{308}\u{D}", &["\u{AC00}\u{308}", "\u{D}"]),
2970 ("\u{AC00}\u{A}", &["\u{AC00}", "\u{A}"]),
2971 ("\u{AC00}\u{308}\u{A}", &["\u{AC00}\u{308}", "\u{A}"]),
2972 ("\u{AC00}\u{1}", &["\u{AC00}", "\u{1}"]),
2973 ("\u{AC00}\u{308}\u{1}", &["\u{AC00}\u{308}", "\u{1}"]),
2974 ("\u{AC00}\u{300}", &["\u{AC00}\u{300}"]),
2975 ("\u{AC00}\u{308}\u{300}", &["\u{AC00}\u{308}\u{300}"]),
2976 ("\u{AC00}\u{1100}", &["\u{AC00}", "\u{1100}"]),
2977 ("\u{AC00}\u{308}\u{1100}", &["\u{AC00}\u{308}", "\u{1100}"]),
2978 ("\u{AC00}\u{1160}", &["\u{AC00}\u{1160}"]),
2979 ("\u{AC00}\u{308}\u{1160}", &["\u{AC00}\u{308}", "\u{1160}"]),
2980 ("\u{AC00}\u{11A8}", &["\u{AC00}\u{11A8}"]),
2981 ("\u{AC00}\u{308}\u{11A8}", &["\u{AC00}\u{308}", "\u{11A8}"]),
2982 ("\u{AC00}\u{AC00}", &["\u{AC00}", "\u{AC00}"]),
2983 ("\u{AC00}\u{308}\u{AC00}", &["\u{AC00}\u{308}", "\u{AC00}"]),
2984 ("\u{AC00}\u{AC01}", &["\u{AC00}", "\u{AC01}"]),
2985 ("\u{AC00}\u{308}\u{AC01}", &["\u{AC00}\u{308}", "\u{AC01}"]),
2986 ("\u{AC00}\u{1F1E6}", &["\u{AC00}", "\u{1F1E6}"]),
2987 ("\u{AC00}\u{308}\u{1F1E6}", &["\u{AC00}\u{308}", "\u{1F1E6}"]),
2988 ("\u{AC00}\u{378}", &["\u{AC00}", "\u{378}"]),
2989 ("\u{AC00}\u{308}\u{378}", &["\u{AC00}\u{308}", "\u{378}"]),
2990 ("\u{AC01}\u{20}", &["\u{AC01}", "\u{20}"]),
2991 ("\u{AC01}\u{308}\u{20}", &["\u{AC01}\u{308}", "\u{20}"]),
2992 ("\u{AC01}\u{D}", &["\u{AC01}", "\u{D}"]),
2993 ("\u{AC01}\u{308}\u{D}", &["\u{AC01}\u{308}", "\u{D}"]),
2994 ("\u{AC01}\u{A}", &["\u{AC01}", "\u{A}"]),
2995 ("\u{AC01}\u{308}\u{A}", &["\u{AC01}\u{308}", "\u{A}"]),
2996 ("\u{AC01}\u{1}", &["\u{AC01}", "\u{1}"]),
2997 ("\u{AC01}\u{308}\u{1}", &["\u{AC01}\u{308}", "\u{1}"]),
2998 ("\u{AC01}\u{300}", &["\u{AC01}\u{300}"]),
2999 ("\u{AC01}\u{308}\u{300}", &["\u{AC01}\u{308}\u{300}"]),
3000 ("\u{AC01}\u{1100}", &["\u{AC01}", "\u{1100}"]),
3001 ("\u{AC01}\u{308}\u{1100}", &["\u{AC01}\u{308}", "\u{1100}"]),
3002 ("\u{AC01}\u{1160}", &["\u{AC01}", "\u{1160}"]),
3003 ("\u{AC01}\u{308}\u{1160}", &["\u{AC01}\u{308}", "\u{1160}"]),
3004 ("\u{AC01}\u{11A8}", &["\u{AC01}\u{11A8}"]),
3005 ("\u{AC01}\u{308}\u{11A8}", &["\u{AC01}\u{308}", "\u{11A8}"]),
3006 ("\u{AC01}\u{AC00}", &["\u{AC01}", "\u{AC00}"]),
3007 ("\u{AC01}\u{308}\u{AC00}", &["\u{AC01}\u{308}", "\u{AC00}"]),
3008 ("\u{AC01}\u{AC01}", &["\u{AC01}", "\u{AC01}"]),
3009 ("\u{AC01}\u{308}\u{AC01}", &["\u{AC01}\u{308}", "\u{AC01}"]),
3010 ("\u{AC01}\u{1F1E6}", &["\u{AC01}", "\u{1F1E6}"]),
3011 ("\u{AC01}\u{308}\u{1F1E6}", &["\u{AC01}\u{308}", "\u{1F1E6}"]),
3012 ("\u{AC01}\u{378}", &["\u{AC01}", "\u{378}"]),
3013 ("\u{AC01}\u{308}\u{378}", &["\u{AC01}\u{308}", "\u{378}"]),
3014 ("\u{1F1E6}\u{20}", &["\u{1F1E6}", "\u{20}"]),
3015 ("\u{1F1E6}\u{308}\u{20}", &["\u{1F1E6}\u{308}", "\u{20}"]),
3016 ("\u{1F1E6}\u{D}", &["\u{1F1E6}", "\u{D}"]),
3017 ("\u{1F1E6}\u{308}\u{D}", &["\u{1F1E6}\u{308}", "\u{D}"]),
3018 ("\u{1F1E6}\u{A}", &["\u{1F1E6}", "\u{A}"]),
3019 ("\u{1F1E6}\u{308}\u{A}", &["\u{1F1E6}\u{308}", "\u{A}"]),
3020 ("\u{1F1E6}\u{1}", &["\u{1F1E6}", "\u{1}"]),
3021 ("\u{1F1E6}\u{308}\u{1}", &["\u{1F1E6}\u{308}", "\u{1}"]),
3022 ("\u{1F1E6}\u{300}", &["\u{1F1E6}\u{300}"]),
3023 ("\u{1F1E6}\u{308}\u{300}", &["\u{1F1E6}\u{308}\u{300}"]),
3024 ("\u{1F1E6}\u{1100}", &["\u{1F1E6}", "\u{1100}"]),
3025 ("\u{1F1E6}\u{308}\u{1100}", &["\u{1F1E6}\u{308}", "\u{1100}"]),
3026 ("\u{1F1E6}\u{1160}", &["\u{1F1E6}", "\u{1160}"]),
3027 ("\u{1F1E6}\u{308}\u{1160}", &["\u{1F1E6}\u{308}", "\u{1160}"]),
3028 ("\u{1F1E6}\u{11A8}", &["\u{1F1E6}", "\u{11A8}"]),
3029 ("\u{1F1E6}\u{308}\u{11A8}", &["\u{1F1E6}\u{308}", "\u{11A8}"]),
3030 ("\u{1F1E6}\u{AC00}", &["\u{1F1E6}", "\u{AC00}"]),
3031 ("\u{1F1E6}\u{308}\u{AC00}", &["\u{1F1E6}\u{308}", "\u{AC00}"]),
3032 ("\u{1F1E6}\u{AC01}", &["\u{1F1E6}", "\u{AC01}"]),
3033 ("\u{1F1E6}\u{308}\u{AC01}", &["\u{1F1E6}\u{308}", "\u{AC01}"]),
3034 ("\u{1F1E6}\u{1F1E6}", &["\u{1F1E6}\u{1F1E6}"]),
3035 ("\u{1F1E6}\u{308}\u{1F1E6}", &["\u{1F1E6}\u{308}", "\u{1F1E6}"]),
3036 ("\u{1F1E6}\u{378}", &["\u{1F1E6}", "\u{378}"]),
3037 ("\u{1F1E6}\u{308}\u{378}", &["\u{1F1E6}\u{308}", "\u{378}"]),
3038 ("\u{378}\u{20}", &["\u{378}", "\u{20}"]),
3039 ("\u{378}\u{308}\u{20}", &["\u{378}\u{308}", "\u{20}"]),
3040 ("\u{378}\u{D}", &["\u{378}", "\u{D}"]),
3041 ("\u{378}\u{308}\u{D}", &["\u{378}\u{308}", "\u{D}"]),
3042 ("\u{378}\u{A}", &["\u{378}", "\u{A}"]),
3043 ("\u{378}\u{308}\u{A}", &["\u{378}\u{308}", "\u{A}"]),
3044 ("\u{378}\u{1}", &["\u{378}", "\u{1}"]),
3045 ("\u{378}\u{308}\u{1}", &["\u{378}\u{308}", "\u{1}"]),
3046 ("\u{378}\u{300}", &["\u{378}\u{300}"]),
3047 ("\u{378}\u{308}\u{300}", &["\u{378}\u{308}\u{300}"]),
3048 ("\u{378}\u{1100}", &["\u{378}", "\u{1100}"]),
3049 ("\u{378}\u{308}\u{1100}", &["\u{378}\u{308}", "\u{1100}"]),
3050 ("\u{378}\u{1160}", &["\u{378}", "\u{1160}"]),
3051 ("\u{378}\u{308}\u{1160}", &["\u{378}\u{308}", "\u{1160}"]),
3052 ("\u{378}\u{11A8}", &["\u{378}", "\u{11A8}"]),
3053 ("\u{378}\u{308}\u{11A8}", &["\u{378}\u{308}", "\u{11A8}"]),
3054 ("\u{378}\u{AC00}", &["\u{378}", "\u{AC00}"]),
3055 ("\u{378}\u{308}\u{AC00}", &["\u{378}\u{308}", "\u{AC00}"]),
3056 ("\u{378}\u{AC01}", &["\u{378}", "\u{AC01}"]),
3057 ("\u{378}\u{308}\u{AC01}", &["\u{378}\u{308}", "\u{AC01}"]),
3058 ("\u{378}\u{1F1E6}", &["\u{378}", "\u{1F1E6}"]),
3059 ("\u{378}\u{308}\u{1F1E6}", &["\u{378}\u{308}", "\u{1F1E6}"]),
3060 ("\u{378}\u{378}", &["\u{378}", "\u{378}"]),
3061 ("\u{378}\u{308}\u{378}", &["\u{378}\u{308}", "\u{378}"]),
3062 ("\u{61}\u{1F1E6}\u{62}", &["\u{61}", "\u{1F1E6}", "\u{62}"]),
3063 ("\u{1F1F7}\u{1F1FA}", &["\u{1F1F7}\u{1F1FA}"]),
3064 ("\u{1F1F7}\u{1F1FA}\u{1F1F8}", &["\u{1F1F7}\u{1F1FA}\u{1F1F8}"]),
3065 ("\u{1F1F7}\u{1F1FA}\u{1F1F8}\u{1F1EA}",
3066 &["\u{1F1F7}\u{1F1FA}\u{1F1F8}\u{1F1EA}"]),
3067 ("\u{1F1F7}\u{1F1FA}\u{200B}\u{1F1F8}\u{1F1EA}",
3068 &["\u{1F1F7}\u{1F1FA}", "\u{200B}", "\u{1F1F8}\u{1F1EA}"]),
3069 ("\u{1F1E6}\u{1F1E7}\u{1F1E8}", &["\u{1F1E6}\u{1F1E7}\u{1F1E8}"]),
3070 ("\u{1F1E6}\u{200D}\u{1F1E7}\u{1F1E8}", &["\u{1F1E6}\u{200D}",
3071 "\u{1F1E7}\u{1F1E8}"]),
3072 ("\u{1F1E6}\u{1F1E7}\u{200D}\u{1F1E8}",
3073 &["\u{1F1E6}\u{1F1E7}\u{200D}", "\u{1F1E8}"]),
3074 ("\u{20}\u{200D}\u{646}", &["\u{20}\u{200D}", "\u{646}"]),
3075 ("\u{646}\u{200D}\u{20}", &["\u{646}\u{200D}", "\u{20}"]),
3078 let test_diff: [(_, &[_], &[_]), .. 23] = [
3079 ("\u{20}\u{903}", &["\u{20}\u{903}"], &["\u{20}", "\u{903}"]), ("\u{20}\u{308}\u{903}",
3080 &["\u{20}\u{308}\u{903}"], &["\u{20}\u{308}", "\u{903}"]), ("\u{D}\u{308}\u{903}",
3081 &["\u{D}", "\u{308}\u{903}"], &["\u{D}", "\u{308}", "\u{903}"]), ("\u{A}\u{308}\u{903}",
3082 &["\u{A}", "\u{308}\u{903}"], &["\u{A}", "\u{308}", "\u{903}"]), ("\u{1}\u{308}\u{903}",
3083 &["\u{1}", "\u{308}\u{903}"], &["\u{1}", "\u{308}", "\u{903}"]), ("\u{300}\u{903}",
3084 &["\u{300}\u{903}"], &["\u{300}", "\u{903}"]), ("\u{300}\u{308}\u{903}",
3085 &["\u{300}\u{308}\u{903}"], &["\u{300}\u{308}", "\u{903}"]), ("\u{903}\u{903}",
3086 &["\u{903}\u{903}"], &["\u{903}", "\u{903}"]), ("\u{903}\u{308}\u{903}",
3087 &["\u{903}\u{308}\u{903}"], &["\u{903}\u{308}", "\u{903}"]), ("\u{1100}\u{903}",
3088 &["\u{1100}\u{903}"], &["\u{1100}", "\u{903}"]), ("\u{1100}\u{308}\u{903}",
3089 &["\u{1100}\u{308}\u{903}"], &["\u{1100}\u{308}", "\u{903}"]), ("\u{1160}\u{903}",
3090 &["\u{1160}\u{903}"], &["\u{1160}", "\u{903}"]), ("\u{1160}\u{308}\u{903}",
3091 &["\u{1160}\u{308}\u{903}"], &["\u{1160}\u{308}", "\u{903}"]), ("\u{11A8}\u{903}",
3092 &["\u{11A8}\u{903}"], &["\u{11A8}", "\u{903}"]), ("\u{11A8}\u{308}\u{903}",
3093 &["\u{11A8}\u{308}\u{903}"], &["\u{11A8}\u{308}", "\u{903}"]), ("\u{AC00}\u{903}",
3094 &["\u{AC00}\u{903}"], &["\u{AC00}", "\u{903}"]), ("\u{AC00}\u{308}\u{903}",
3095 &["\u{AC00}\u{308}\u{903}"], &["\u{AC00}\u{308}", "\u{903}"]), ("\u{AC01}\u{903}",
3096 &["\u{AC01}\u{903}"], &["\u{AC01}", "\u{903}"]), ("\u{AC01}\u{308}\u{903}",
3097 &["\u{AC01}\u{308}\u{903}"], &["\u{AC01}\u{308}", "\u{903}"]), ("\u{1F1E6}\u{903}",
3098 &["\u{1F1E6}\u{903}"], &["\u{1F1E6}", "\u{903}"]), ("\u{1F1E6}\u{308}\u{903}",
3099 &["\u{1F1E6}\u{308}\u{903}"], &["\u{1F1E6}\u{308}", "\u{903}"]), ("\u{378}\u{903}",
3100 &["\u{378}\u{903}"], &["\u{378}", "\u{903}"]), ("\u{378}\u{308}\u{903}",
3101 &["\u{378}\u{308}\u{903}"], &["\u{378}\u{308}", "\u{903}"]),
3104 for &(s, g) in test_same.iter() {
3105 // test forward iterator
3106 assert!(order::equals(s.graphemes(true), g.iter().map(|&x| x)));
3107 assert!(order::equals(s.graphemes(false), g.iter().map(|&x| x)));
3109 // test reverse iterator
3110 assert!(order::equals(s.graphemes(true).rev(), g.iter().rev().map(|&x| x)));
3111 assert!(order::equals(s.graphemes(false).rev(), g.iter().rev().map(|&x| x)));
3114 for &(s, gt, gf) in test_diff.iter() {
3115 // test forward iterator
3116 assert!(order::equals(s.graphemes(true), gt.iter().map(|&x| x)));
3117 assert!(order::equals(s.graphemes(false), gf.iter().map(|&x| x)));
3119 // test reverse iterator
3120 assert!(order::equals(s.graphemes(true).rev(), gt.iter().rev().map(|&x| x)));
3121 assert!(order::equals(s.graphemes(false).rev(), gf.iter().rev().map(|&x| x)));
3124 // test the indices iterators
3125 let s = "a̐éö̲\r\n";
3126 let gr_inds = s.grapheme_indices(true).collect::<Vec<(uint, &str)>>();
3127 let b: &[_] = &[(0u, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
3128 assert_eq!(gr_inds, b);
3129 let gr_inds = s.grapheme_indices(true).rev().collect::<Vec<(uint, &str)>>();
3130 let b: &[_] = &[(11, "\r\n"), (6, "ö̲"), (3, "é"), (0u, "a̐")];
3131 assert_eq!(gr_inds, b);
3132 let mut gr_inds_iter = s.grapheme_indices(true);
3134 let gr_inds = gr_inds_iter.by_ref();
3135 let e1 = gr_inds.size_hint();
3136 assert_eq!(e1, (1, Some(13)));
3137 let c = gr_inds.count();
3140 let e2 = gr_inds_iter.size_hint();
3141 assert_eq!(e2, (0, Some(0)));
3143 // make sure the reverse iterator does the right thing with "\n" at beginning of string
3145 let gr = s.graphemes(true).rev().collect::<Vec<&str>>();
3146 let b: &[_] = &["\r", "\r\n", "\n"];
3151 fn test_split_strator() {
3152 fn t(s: &str, sep: &str, u: &[&str]) {
3153 let v: Vec<&str> = s.split_str(sep).collect();
3156 t("--1233345--", "12345", &["--1233345--"]);
3157 t("abc::hello::there", "::", &["abc", "hello", "there"]);
3158 t("::hello::there", "::", &["", "hello", "there"]);
3159 t("hello::there::", "::", &["hello", "there", ""]);
3160 t("::hello::there::", "::", &["", "hello", "there", ""]);
3161 t("ประเทศไทย中华Việt Nam", "中华", &["ประเทศไทย", "Việt Nam"]);
3162 t("zzXXXzzYYYzz", "zz", &["", "XXX", "YYY", ""]);
3163 t("zzXXXzYYYz", "XXX", &["zz", "zYYYz"]);
3164 t(".XXX.YYY.", ".", &["", "XXX", "YYY", ""]);
3166 t("zz", "zz", &["",""]);
3167 t("ok", "z", &["ok"]);
3168 t("zzz", "zz", &["","z"]);
3169 t("zzzzz", "zz", &["","","z"]);
3173 fn test_str_default() {
3174 use core::default::Default;
3175 fn t<S: Default + Str>() {
3176 let s: S = Default::default();
3177 assert_eq!(s.as_slice(), "");
3185 fn test_str_container() {
3186 fn sum_len(v: &[&str]) -> uint {
3187 v.iter().map(|x| x.len()).sum()
3190 let s = String::from_str("01234");
3191 assert_eq!(5, sum_len(&["012", "", "34"]));
3192 assert_eq!(5, sum_len(&[String::from_str("01").as_slice(),
3193 String::from_str("2").as_slice(),
3194 String::from_str("34").as_slice(),
3195 String::from_str("").as_slice()]));
3196 assert_eq!(5, sum_len(&[s.as_slice()]));
3200 fn test_str_from_utf8() {
3202 assert_eq!(from_utf8(xs), Ok("hello"));
3204 let xs = "ศไทย中华Việt Nam".as_bytes();
3205 assert_eq!(from_utf8(xs), Ok("ศไทย中华Việt Nam"));
3207 let xs = b"hello\xFF";
3208 assert_eq!(from_utf8(xs), Err(Utf8Error::TooShort));
3212 fn test_maybe_owned_traits() {
3213 let s = Slice("abcde");
3214 assert_eq!(s.len(), 5);
3215 assert_eq!(s.as_slice(), "abcde");
3216 assert_eq!(String::from_str(s.as_slice()).as_slice(), "abcde");
3217 assert_eq!(format!("{}", s).as_slice(), "abcde");
3218 assert!(s.lt(&Owned(String::from_str("bcdef"))));
3219 assert_eq!(Slice(""), Default::default());
3221 let o = Owned(String::from_str("abcde"));
3222 assert_eq!(o.len(), 5);
3223 assert_eq!(o.as_slice(), "abcde");
3224 assert_eq!(String::from_str(o.as_slice()).as_slice(), "abcde");
3225 assert_eq!(format!("{}", o).as_slice(), "abcde");
3226 assert!(o.lt(&Slice("bcdef")));
3227 assert_eq!(Owned(String::from_str("")), Default::default());
3229 assert!(s.cmp(&o) == Equal);
3230 assert!(s.equiv(&o));
3232 assert!(o.cmp(&s) == Equal);
3233 assert!(o.equiv(&s));
3237 fn test_maybe_owned_methods() {
3238 let s = Slice("abcde");
3239 assert!(s.is_slice());
3240 assert!(!s.is_owned());
3242 let o = Owned(String::from_str("abcde"));
3243 assert!(!o.is_slice());
3244 assert!(o.is_owned());
3248 fn test_maybe_owned_clone() {
3249 assert_eq!(Owned(String::from_str("abcde")), Slice("abcde").clone());
3250 assert_eq!(Owned(String::from_str("abcde")), Owned(String::from_str("abcde")).clone());
3251 assert_eq!(Slice("abcde"), Slice("abcde").clone());
3252 assert_eq!(Slice("abcde"), Owned(String::from_str("abcde")).clone());
3256 fn test_maybe_owned_into_string() {
3257 assert_eq!(Slice("abcde").to_string(), String::from_str("abcde"));
3258 assert_eq!(Owned(String::from_str("abcde")).to_string(),
3259 String::from_str("abcde"));
3263 fn test_into_maybe_owned() {
3264 assert_eq!("abcde".into_maybe_owned(), Slice("abcde"));
3265 assert_eq!((String::from_str("abcde")).into_maybe_owned(), Slice("abcde"));
3266 assert_eq!("abcde".into_maybe_owned(), Owned(String::from_str("abcde")));
3267 assert_eq!((String::from_str("abcde")).into_maybe_owned(),
3268 Owned(String::from_str("abcde")));
3275 use prelude::{SliceExt, IteratorExt, DoubleEndedIteratorExt, SliceConcatExt};
3277 use test::black_box;
3280 fn char_iterator(b: &mut Bencher) {
3281 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
3283 b.iter(|| s.chars().count());
3287 fn char_iterator_for(b: &mut Bencher) {
3288 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
3291 for ch in s.chars() { black_box(ch) }
3296 fn char_iterator_ascii(b: &mut Bencher) {
3297 let s = "Mary had a little lamb, Little lamb
3298 Mary had a little lamb, Little lamb
3299 Mary had a little lamb, Little lamb
3300 Mary had a little lamb, Little lamb
3301 Mary had a little lamb, Little lamb
3302 Mary had a little lamb, Little lamb";
3304 b.iter(|| s.chars().count());
3308 fn char_iterator_rev(b: &mut Bencher) {
3309 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
3311 b.iter(|| s.chars().rev().count());
3315 fn char_iterator_rev_for(b: &mut Bencher) {
3316 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
3319 for ch in s.chars().rev() { black_box(ch) }
3324 fn char_indicesator(b: &mut Bencher) {
3325 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
3326 let len = s.char_len();
3328 b.iter(|| assert_eq!(s.char_indices().count(), len));
3332 fn char_indicesator_rev(b: &mut Bencher) {
3333 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
3334 let len = s.char_len();
3336 b.iter(|| assert_eq!(s.char_indices().rev().count(), len));
3340 fn split_unicode_ascii(b: &mut Bencher) {
3341 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
3343 b.iter(|| assert_eq!(s.split('V').count(), 3));
3347 fn split_unicode_not_ascii(b: &mut Bencher) {
3348 struct NotAscii(char);
3349 impl CharEq for NotAscii {
3350 fn matches(&mut self, c: char) -> bool {
3351 let NotAscii(cc) = *self;
3354 fn only_ascii(&self) -> bool { false }
3356 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
3358 b.iter(|| assert_eq!(s.split(NotAscii('V')).count(), 3));
3363 fn split_ascii(b: &mut Bencher) {
3364 let s = "Mary had a little lamb, Little lamb, little-lamb.";
3365 let len = s.split(' ').count();
3367 b.iter(|| assert_eq!(s.split(' ').count(), len));
3371 fn split_not_ascii(b: &mut Bencher) {
3372 struct NotAscii(char);
3373 impl CharEq for NotAscii {
3375 fn matches(&mut self, c: char) -> bool {
3376 let NotAscii(cc) = *self;
3379 fn only_ascii(&self) -> bool { false }
3381 let s = "Mary had a little lamb, Little lamb, little-lamb.";
3382 let len = s.split(' ').count();
3384 b.iter(|| assert_eq!(s.split(NotAscii(' ')).count(), len));
3388 fn split_extern_fn(b: &mut Bencher) {
3389 let s = "Mary had a little lamb, Little lamb, little-lamb.";
3390 let len = s.split(' ').count();
3391 fn pred(c: char) -> bool { c == ' ' }
3393 b.iter(|| assert_eq!(s.split(pred).count(), len));
3397 fn split_closure(b: &mut Bencher) {
3398 let s = "Mary had a little lamb, Little lamb, little-lamb.";
3399 let len = s.split(' ').count();
3401 b.iter(|| assert_eq!(s.split(|&: c: char| c == ' ').count(), len));
3405 fn split_slice(b: &mut Bencher) {
3406 let s = "Mary had a little lamb, Little lamb, little-lamb.";
3407 let len = s.split(' ').count();
3409 let c: &[char] = &[' '];
3410 b.iter(|| assert_eq!(s.split(c).count(), len));
3414 fn is_utf8_100_ascii(b: &mut Bencher) {
3416 let s = b"Hello there, the quick brown fox jumped over the lazy dog! \
3417 Lorem ipsum dolor sit amet, consectetur. ";
3419 assert_eq!(100, s.len());
3426 fn is_utf8_100_multibyte(b: &mut Bencher) {
3427 let s = "𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰".as_bytes();
3428 assert_eq!(100, s.len());
3435 fn bench_connect(b: &mut Bencher) {
3436 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
3438 let v = vec![s, s, s, s, s, s, s, s, s, s];
3440 assert_eq!(v.connect(sep).len(), s.len() * 10 + sep.len() * 9);
3445 fn bench_contains_short_short(b: &mut Bencher) {
3446 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
3450 assert!(haystack.contains(needle));
3455 fn bench_contains_short_long(b: &mut Bencher) {
3457 Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \
3458 ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \
3459 eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \
3460 sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \
3461 tempus vel, gravida nec quam.
3463 In est dui, tincidunt sed tempus interdum, adipiscing laoreet ante. Etiam tempor, tellus quis \
3464 sagittis interdum, nulla purus mattis sem, quis auctor erat odio ac tellus. In nec nunc sit amet \
3465 diam volutpat molestie at sed ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan \
3466 lorem ac dignissim placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat. In vel \
3467 eleifend felis. Sed suscipit nulla lorem, sed mollis est sollicitudin et. Nam fermentum egestas \
3468 interdum. Curabitur ut nisi justo.
3470 Sed sollicitudin ipsum tellus, ut condimentum leo eleifend nec. Cras ut velit ante. Phasellus nec \
3471 mollis odio. Mauris molestie erat in arcu mattis, at aliquet dolor vehicula. Quisque malesuada \
3472 lectus sit amet nisi pretium, a condimentum ipsum porta. Morbi at dapibus diam. Praesent egestas \
3473 est sed risus elementum, eu rutrum metus ultrices. Etiam fermentum consectetur magna, id rutrum \
3474 felis accumsan a. Aliquam ut pellentesque libero. Sed mi nulla, lobortis eu tortor id, suscipit \
3475 ultricies neque. Morbi iaculis sit amet risus at iaculis. Praesent eget ligula quis turpis \
3476 feugiat suscipit vel non arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. \
3477 Aliquam sit amet placerat lorem.
3479 Cras a lacus vel ante posuere elementum. Nunc est leo, bibendum ut facilisis vel, bibendum at \
3480 mauris. Nullam adipiscing diam vel odio ornare, luctus adipiscing mi luctus. Nulla facilisi. \
3481 Mauris adipiscing bibendum neque, quis adipiscing lectus tempus et. Sed feugiat erat et nisl \
3482 lobortis pharetra. Donec vitae erat enim. Nullam sit amet felis et quam lacinia tincidunt. Aliquam \
3483 suscipit dapibus urna. Sed volutpat urna in magna pulvinar volutpat. Phasellus nec tellus ac diam \
3486 Nam lectus enim, dapibus non nisi tempor, consectetur convallis massa. Maecenas eleifend dictum \
3487 feugiat. Etiam quis mauris vel risus luctus mattis a a nunc. Nullam orci quam, imperdiet id \
3488 vehicula in, porttitor ut nibh. Duis sagittis adipiscing nisl vitae congue. Donec mollis risus eu \
3489 leo suscipit, varius porttitor nulla porta. Pellentesque ut sem nec nisi euismod vehicula. Nulla \
3490 malesuada sollicitudin quam eu fermentum.";
3491 let needle = "english";
3494 assert!(!haystack.contains(needle));
3499 fn bench_contains_bad_naive(b: &mut Bencher) {
3500 let haystack = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
3501 let needle = "aaaaaaaab";
3504 assert!(!haystack.contains(needle));
3509 fn bench_contains_equal(b: &mut Bencher) {
3510 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
3511 let needle = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
3514 assert!(haystack.contains(needle));