1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 // ignore-lexer-test FIXME #15679
13 //! Unicode string manipulation (`str` type)
17 //! Rust's string type is one of the core primitive types of the language. While
18 //! represented by the name `str`, the name `str` is not actually a valid type in
19 //! Rust. Each string must also be decorated with a pointer. `String` is used
20 //! for an owned string, so there is only one commonly-used `str` type in Rust:
23 //! `&str` is the borrowed string type. This type of string can only be created
24 //! from other strings, unless it is a static string (see below). As the word
25 //! "borrowed" implies, this type of string is owned elsewhere, and this string
26 //! cannot be moved out of.
28 //! As an example, here's some code that uses a string.
32 //! let borrowed_string = "This string is borrowed with the 'static lifetime";
36 //! From the example above, you can guess that Rust's string literals have the
37 //! `'static` lifetime. This is akin to C's concept of a static string.
38 //! More precisely, string literals are immutable views with a 'static lifetime
39 //! (otherwise known as the lifetime of the entire program), and thus have the
40 //! type `&'static str`.
44 //! Rust's string type, `str`, is a sequence of Unicode scalar values encoded as a
45 //! stream of UTF-8 bytes. All [strings](../../reference.html#literals) are
46 //! guaranteed to be validly encoded UTF-8 sequences. Additionally, strings are
47 //! not null-terminated and can thus contain null bytes.
49 //! The actual representation of strings have direct mappings to slices: `&str`
50 //! is the same as `&[u8]`.
52 #![doc(primitive = "str")]
54 use self::MaybeOwned::*;
55 use self::RecompositionState::*;
56 use self::DecompositionType::*;
58 use core::borrow::{BorrowFrom, Cow, ToOwned};
60 use core::clone::Clone;
61 use core::cmp::{Equiv, PartialEq, Eq, PartialOrd, Ord, Ordering};
63 use core::default::Default;
66 use core::iter::AdditiveIterator;
67 use core::iter::{mod, range, Iterator, IteratorExt};
68 use core::kinds::Sized;
70 use core::option::Option::{mod, Some, None};
71 use core::slice::AsSlice;
72 use core::str as core_str;
73 use unicode::str::{UnicodeStr, Utf16Encoder};
75 use ring_buf::RingBuf;
80 use slice::SliceConcatExt;
82 pub use core::str::{from_utf8, CharEq, Chars, CharIndices};
83 pub use core::str::{Bytes, CharSplits, is_utf8};
84 pub use core::str::{CharSplitsN, Lines, LinesAny, MatchIndices, StrSplits, SplitStr};
85 pub use core::str::{CharRange};
86 pub use core::str::{FromStr, from_str, Utf8Error};
87 pub use core::str::Str;
88 pub use core::str::{from_utf8_unchecked, from_c_str};
89 pub use unicode::str::{Words, Graphemes, GraphemeIndices};
90 pub use core::str::{Split, SplitTerminator};
91 pub use core::str::{SplitN, RSplitN};
94 Section: Creating a string
97 impl<S: Str> SliceConcatExt<str, String> for [S] {
98 fn concat(&self) -> String {
99 let s = self.as_slice();
102 return String::new();
105 // `len` calculation may overflow but push_str will check boundaries
106 let len = s.iter().map(|s| s.as_slice().len()).sum();
107 let mut result = String::with_capacity(len);
110 result.push_str(s.as_slice())
116 fn connect(&self, sep: &str) -> String {
117 let s = self.as_slice();
120 return String::new();
128 // this is wrong without the guarantee that `self` is non-empty
129 // `len` calculation may overflow but push_str but will check boundaries
130 let len = sep.len() * (s.len() - 1)
131 + s.iter().map(|s| s.as_slice().len()).sum();
132 let mut result = String::with_capacity(len);
133 let mut first = true;
139 result.push_str(sep);
141 result.push_str(s.as_slice());
151 // Helper functions used for Unicode normalization
152 fn canonical_sort(comb: &mut [(char, u8)]) {
153 let len = comb.len();
154 for i in range(0, len) {
155 let mut swapped = false;
156 for j in range(1, len-i) {
157 let class_a = comb[j-1].1;
158 let class_b = comb[j].1;
159 if class_a != 0 && class_b != 0 && class_a > class_b {
164 if !swapped { break; }
169 enum DecompositionType {
174 /// External iterator for a string's decomposition's characters.
175 /// Use with the `std::iter` module.
177 pub struct Decompositions<'a> {
178 kind: DecompositionType,
180 buffer: Vec<(char, u8)>,
184 impl<'a> Iterator for Decompositions<'a> {
188 fn next(&mut self) -> Option<char> {
189 match self.buffer.first() {
192 self.buffer.remove(0);
195 Some(&(c, _)) if self.sorted => {
196 self.buffer.remove(0);
199 _ => self.sorted = false
203 for ch in self.iter {
204 let buffer = &mut self.buffer;
205 let sorted = &mut self.sorted;
207 let callback = |&mut: d| {
209 unicode::char::canonical_combining_class(d);
210 if class == 0 && !*sorted {
211 canonical_sort(buffer.as_mut_slice());
214 buffer.push((d, class));
218 unicode::char::decompose_canonical(ch, callback)
221 unicode::char::decompose_compatible(ch, callback)
232 canonical_sort(self.buffer.as_mut_slice());
236 if self.buffer.is_empty() {
239 match self.buffer.remove(0) {
249 fn size_hint(&self) -> (uint, Option<uint>) {
250 let (lower, _) = self.iter.size_hint();
256 enum RecompositionState {
262 /// External iterator for a string's recomposition's characters.
263 /// Use with the `std::iter` module.
265 pub struct Recompositions<'a> {
266 iter: Decompositions<'a>,
267 state: RecompositionState,
268 buffer: RingBuf<char>,
269 composee: Option<char>,
273 impl<'a> Iterator for Recompositions<'a> {
277 fn next(&mut self) -> Option<char> {
281 for ch in self.iter {
282 let ch_class = unicode::char::canonical_combining_class(ch);
283 if self.composee.is_none() {
287 self.composee = Some(ch);
290 let k = self.composee.clone().unwrap();
292 match self.last_ccc {
294 match unicode::char::compose(k, ch) {
296 self.composee = Some(r);
301 self.composee = Some(ch);
304 self.buffer.push_back(ch);
305 self.last_ccc = Some(ch_class);
310 if l_class >= ch_class {
311 // `ch` is blocked from `composee`
313 self.composee = Some(ch);
314 self.last_ccc = None;
315 self.state = Purging;
318 self.buffer.push_back(ch);
319 self.last_ccc = Some(ch_class);
322 match unicode::char::compose(k, ch) {
324 self.composee = Some(r);
328 self.buffer.push_back(ch);
329 self.last_ccc = Some(ch_class);
335 self.state = Finished;
336 if self.composee.is_some() {
337 return self.composee.take();
341 match self.buffer.pop_front() {
342 None => self.state = Composing,
347 match self.buffer.pop_front() {
348 None => return self.composee.take(),
357 /// External iterator for a string's UTF16 codeunits.
358 /// Use with the `std::iter` module.
360 pub struct Utf16Units<'a> {
361 encoder: Utf16Encoder<Chars<'a>>
364 impl<'a> Iterator for Utf16Units<'a> {
368 fn next(&mut self) -> Option<u16> { self.encoder.next() }
371 fn size_hint(&self) -> (uint, Option<uint>) { self.encoder.size_hint() }
374 /// Replaces all occurrences of one string with another.
378 /// * s - The string containing substrings to replace
379 /// * from - The string to replace
380 /// * to - The replacement string
384 /// The original string with all occurrences of `from` replaced with `to`.
389 /// # #![allow(deprecated)]
391 /// let string = "orange";
392 /// let new_string = str::replace(string, "or", "str");
393 /// assert_eq!(new_string.as_slice(), "strange");
395 #[deprecated = "call the inherent method instead"]
396 pub fn replace(s: &str, from: &str, to: &str) -> String {
404 // Return the initial codepoint accumulator for the first byte.
405 // The first byte is special, only want bottom 5 bits for width 2, 4 bits
406 // for width 3, and 3 bits for width 4
407 macro_rules! utf8_first_byte {
408 ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as u32)
411 // return the value of $ch updated with continuation byte $byte
412 macro_rules! utf8_acc_cont_byte {
413 ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as u32)
420 /// A string type that can hold either a `String` or a `&str`.
421 /// This can be useful as an optimization when an allocation is sometimes
422 /// needed but not always.
423 #[deprecated = "use std::string::CowString"]
424 pub enum MaybeOwned<'a> {
425 /// A borrowed string.
431 /// A specialization of `CowString` to be sendable.
432 #[deprecated = "use std::string::CowString<'static>"]
433 pub type SendStr = CowString<'static>;
435 #[deprecated = "use std::string::CowString"]
436 impl<'a> MaybeOwned<'a> {
437 /// Returns `true` if this `MaybeOwned` wraps an owned string.
442 /// let string = String::from_str("orange");
443 /// let maybe_owned_string = string.into_maybe_owned();
444 /// assert_eq!(true, maybe_owned_string.is_owned());
447 pub fn is_owned(&self) -> bool {
454 /// Returns `true` if this `MaybeOwned` wraps a borrowed string.
459 /// let string = "orange";
460 /// let maybe_owned_string = string.as_slice().into_maybe_owned();
461 /// assert_eq!(true, maybe_owned_string.is_slice());
464 pub fn is_slice(&self) -> bool {
471 /// Return the number of bytes in this string.
474 pub fn len(&self) -> uint { self.as_slice().len() }
476 /// Returns true if the string contains no bytes
479 pub fn is_empty(&self) -> bool { self.len() == 0 }
482 #[deprecated = "use std::borrow::IntoCow"]
483 /// Trait for moving into a `MaybeOwned`.
484 pub trait IntoMaybeOwned<'a> {
485 /// Moves `self` into a `MaybeOwned`.
486 fn into_maybe_owned(self) -> MaybeOwned<'a>;
489 #[deprecated = "use std::borrow::IntoCow"]
491 impl<'a> IntoMaybeOwned<'a> for String {
495 /// let owned_string = String::from_str("orange");
496 /// let maybe_owned_string = owned_string.into_maybe_owned();
497 /// assert_eq!(true, maybe_owned_string.is_owned());
501 fn into_maybe_owned(self) -> MaybeOwned<'a> {
506 #[deprecated = "use std::borrow::IntoCow"]
508 impl<'a> IntoMaybeOwned<'a> for &'a str {
512 /// let string = "orange";
513 /// let maybe_owned_str = string.as_slice().into_maybe_owned();
514 /// assert_eq!(false, maybe_owned_str.is_owned());
518 fn into_maybe_owned(self) -> MaybeOwned<'a> { Slice(self) }
522 #[deprecated = "use std::borrow::IntoCow"]
523 impl<'a> IntoMaybeOwned<'a> for MaybeOwned<'a> {
527 /// let str = "orange";
528 /// let maybe_owned_str = str.as_slice().into_maybe_owned();
529 /// let maybe_maybe_owned_str = maybe_owned_str.into_maybe_owned();
530 /// assert_eq!(false, maybe_maybe_owned_str.is_owned());
533 fn into_maybe_owned(self) -> MaybeOwned<'a> { self }
536 #[deprecated = "use std::string::CowString"]
538 impl<'a> PartialEq for MaybeOwned<'a> {
540 fn eq(&self, other: &MaybeOwned) -> bool {
541 self.as_slice() == other.as_slice()
545 #[deprecated = "use std::string::CowString"]
546 impl<'a> Eq for MaybeOwned<'a> {}
548 #[deprecated = "use std::string::CowString"]
549 impl<'a> PartialOrd for MaybeOwned<'a> {
551 fn partial_cmp(&self, other: &MaybeOwned) -> Option<Ordering> {
552 Some(self.cmp(other))
556 #[deprecated = "use std::string::CowString"]
557 impl<'a> Ord for MaybeOwned<'a> {
560 fn cmp(&self, other: &MaybeOwned) -> Ordering {
561 self.as_slice().cmp(other.as_slice())
566 #[deprecated = "use std::string::CowString"]
567 impl<'a, S: Str> Equiv<S> for MaybeOwned<'a> {
569 fn equiv(&self, other: &S) -> bool {
570 self.as_slice() == other.as_slice()
574 #[deprecated = "use std::string::CowString"]
576 impl<'a> Str for MaybeOwned<'a> {
578 fn as_slice<'b>(&'b self) -> &'b str {
581 Owned(ref s) => s.as_slice()
586 #[deprecated = "use std::string::CowString"]
587 impl<'a> Clone for MaybeOwned<'a> {
590 fn clone(&self) -> MaybeOwned<'a> {
592 Slice(s) => Slice(s),
593 Owned(ref s) => Owned(String::from_str(s.as_slice()))
598 #[deprecated = "use std::string::CowString"]
599 impl<'a> Default for MaybeOwned<'a> {
602 fn default() -> MaybeOwned<'a> { Slice("") }
605 #[deprecated = "use std::string::CowString"]
607 impl<'a, H: hash::Writer> hash::Hash<H> for MaybeOwned<'a> {
609 fn hash(&self, hasher: &mut H) {
610 self.as_slice().hash(hasher)
614 #[deprecated = "use std::string::CowString"]
615 impl<'a> fmt::Show for MaybeOwned<'a> {
617 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
619 Slice(ref s) => s.fmt(f),
620 Owned(ref s) => s.fmt(f)
625 #[unstable = "trait is unstable"]
626 impl BorrowFrom<String> for str {
627 fn borrow_from(owned: &String) -> &str { owned[] }
630 #[unstable = "trait is unstable"]
631 impl ToOwned<String> for str {
632 fn to_owned(&self) -> String {
634 String::from_utf8_unchecked(self.as_bytes().to_owned())
639 /// Unsafe string operations.
642 pub use core::str::raw::{from_utf8, c_str_to_static_slice, slice_bytes};
643 pub use core::str::raw::{slice_unchecked};
650 /// A clone-on-write string
651 #[deprecated = "use std::string::CowString instead"]
652 pub type CowString<'a> = Cow<'a, String, str>;
655 Section: Trait implementations
658 /// Any string that can be represented as a slice.
659 pub trait StrExt for Sized?: ops::Slice<uint, str> {
660 /// Escapes each char in `s` with `char::escape_default`.
661 #[unstable = "return type may change to be an iterator"]
662 fn escape_default(&self) -> String {
663 self.chars().flat_map(|c| c.escape_default()).collect()
666 /// Escapes each char in `s` with `char::escape_unicode`.
667 #[unstable = "return type may change to be an iterator"]
668 fn escape_unicode(&self) -> String {
669 self.chars().flat_map(|c| c.escape_unicode()).collect()
672 /// Replaces all occurrences of one string with another.
676 /// * `from` - The string to replace
677 /// * `to` - The replacement string
681 /// The original string with all occurrences of `from` replaced with `to`.
686 /// let s = "Do you know the muffin man,
687 /// The muffin man, the muffin man, ...".to_string();
689 /// assert_eq!(s.replace("muffin man", "little lamb"),
690 /// "Do you know the little lamb,
691 /// The little lamb, the little lamb, ...".to_string());
693 /// // not found, so no change.
694 /// assert_eq!(s.replace("cookie monster", "little lamb"), s);
697 fn replace(&self, from: &str, to: &str) -> String {
698 let mut result = String::new();
699 let mut last_end = 0;
700 for (start, end) in self.match_indices(from) {
701 result.push_str(unsafe { self.slice_unchecked(last_end, start) });
705 result.push_str(unsafe { self.slice_unchecked(last_end, self.len()) });
709 /// Given a string, makes a new string with repeated copies of it.
710 #[deprecated = "use repeat(self).take(n).collect() instead"]
711 fn repeat(&self, nn: uint) -> String {
712 iter::repeat(self[]).take(nn).collect()
715 /// Returns the Levenshtein Distance between two strings.
716 #[deprecated = "this function will be removed"]
717 fn lev_distance(&self, t: &str) -> uint {
719 if me.is_empty() { return t.chars().count(); }
720 if t.is_empty() { return me.chars().count(); }
722 let mut dcol: Vec<_> = range(0, t.len() + 1).collect();
725 for (i, sc) in me.chars().enumerate() {
728 dcol[0] = current + 1;
730 for (j, tc) in t.chars().enumerate() {
732 let next = dcol[j + 1];
735 dcol[j + 1] = current;
737 dcol[j + 1] = cmp::min(current, next);
738 dcol[j + 1] = cmp::min(dcol[j + 1], dcol[j]) + 1;
749 /// Returns an iterator over the string in Unicode Normalization Form D
750 /// (canonical decomposition).
752 #[unstable = "this functionality may be moved to libunicode"]
753 fn nfd_chars<'a>(&'a self) -> Decompositions<'a> {
755 iter: self[].chars(),
762 /// Returns an iterator over the string in Unicode Normalization Form KD
763 /// (compatibility decomposition).
765 #[unstable = "this functionality may be moved to libunicode"]
766 fn nfkd_chars<'a>(&'a self) -> Decompositions<'a> {
768 iter: self[].chars(),
775 /// An Iterator over the string in Unicode Normalization Form C
776 /// (canonical decomposition followed by canonical composition).
778 #[unstable = "this functionality may be moved to libunicode"]
779 fn nfc_chars<'a>(&'a self) -> Recompositions<'a> {
781 iter: self.nfd_chars(),
783 buffer: RingBuf::new(),
789 /// An Iterator over the string in Unicode Normalization Form KC
790 /// (compatibility decomposition followed by canonical composition).
792 #[unstable = "this functionality may be moved to libunicode"]
793 fn nfkc_chars<'a>(&'a self) -> Recompositions<'a> {
795 iter: self.nfkd_chars(),
797 buffer: RingBuf::new(),
803 /// Returns true if a string contains a string pattern.
807 /// - pat - The string pattern to look for
812 /// assert!("bananas".contains("nana"));
815 fn contains(&self, pat: &str) -> bool {
816 core_str::StrExt::contains(self[], pat)
819 /// Returns true if a string contains a char pattern.
823 /// - pat - The char pattern to look for
828 /// assert!("hello".contains_char('e'));
830 #[unstable = "might get removed in favour of a more generic contains()"]
831 fn contains_char<P: CharEq>(&self, pat: P) -> bool {
832 core_str::StrExt::contains_char(self[], pat)
835 /// An iterator over the characters of `self`. Note, this iterates
836 /// over Unicode code-points, not Unicode graphemes.
841 /// let v: Vec<char> = "abc åäö".chars().collect();
842 /// assert_eq!(v, vec!['a', 'b', 'c', ' ', 'å', 'ä', 'ö']);
845 fn chars(&self) -> Chars {
846 core_str::StrExt::chars(self[])
849 /// An iterator over the bytes of `self`
854 /// let v: Vec<u8> = "bors".bytes().collect();
855 /// assert_eq!(v, b"bors".to_vec());
858 fn bytes(&self) -> Bytes {
859 core_str::StrExt::bytes(self[])
862 /// An iterator over the characters of `self` and their byte offsets.
864 fn char_indices(&self) -> CharIndices {
865 core_str::StrExt::char_indices(self[])
868 /// An iterator over substrings of `self`, separated by characters
869 /// matched by the pattern `pat`.
874 /// let v: Vec<&str> = "Mary had a little lamb".split(' ').collect();
875 /// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]);
877 /// let v: Vec<&str> = "abc1def2ghi".split(|&: c: char| c.is_numeric()).collect();
878 /// assert_eq!(v, vec!["abc", "def", "ghi"]);
880 /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').collect();
881 /// assert_eq!(v, vec!["lion", "", "tiger", "leopard"]);
883 /// let v: Vec<&str> = "".split('X').collect();
884 /// assert_eq!(v, vec![""]);
887 fn split<P: CharEq>(&self, pat: P) -> Split<P> {
888 core_str::StrExt::split(self[], pat)
891 /// An iterator over substrings of `self`, separated by characters
892 /// matched by the pattern `pat`, restricted to splitting at most `count`
898 /// let v: Vec<&str> = "Mary had a little lambda".splitn(2, ' ').collect();
899 /// assert_eq!(v, vec!["Mary", "had", "a little lambda"]);
901 /// let v: Vec<&str> = "abc1def2ghi".splitn(1, |&: c: char| c.is_numeric()).collect();
902 /// assert_eq!(v, vec!["abc", "def2ghi"]);
904 /// let v: Vec<&str> = "lionXXtigerXleopard".splitn(2, 'X').collect();
905 /// assert_eq!(v, vec!["lion", "", "tigerXleopard"]);
907 /// let v: Vec<&str> = "abcXdef".splitn(0, 'X').collect();
908 /// assert_eq!(v, vec!["abcXdef"]);
910 /// let v: Vec<&str> = "".splitn(1, 'X').collect();
911 /// assert_eq!(v, vec![""]);
914 fn splitn<P: CharEq>(&self, count: uint, pat: P) -> SplitN<P> {
915 core_str::StrExt::splitn(self[], count, pat)
918 /// An iterator over substrings of `self`, separated by characters
919 /// matched by the pattern `pat`.
921 /// Equivalent to `split`, except that the trailing substring
922 /// is skipped if empty (terminator semantics).
927 /// let v: Vec<&str> = "A.B.".split_terminator('.').collect();
928 /// assert_eq!(v, vec!["A", "B"]);
930 /// let v: Vec<&str> = "A..B..".split_terminator('.').collect();
931 /// assert_eq!(v, vec!["A", "", "B", ""]);
933 /// let v: Vec<&str> = "Mary had a little lamb".split(' ').rev().collect();
934 /// assert_eq!(v, vec!["lamb", "little", "a", "had", "Mary"]);
936 /// let v: Vec<&str> = "abc1def2ghi".split(|&: c: char| c.is_numeric()).rev().collect();
937 /// assert_eq!(v, vec!["ghi", "def", "abc"]);
939 /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').rev().collect();
940 /// assert_eq!(v, vec!["leopard", "tiger", "", "lion"]);
942 #[unstable = "might get removed"]
943 fn split_terminator<P: CharEq>(&self, pat: P) -> SplitTerminator<P> {
944 core_str::StrExt::split_terminator(self[], pat)
947 /// An iterator over substrings of `self`, separated by characters
948 /// matched by the pattern `pat`, starting from the end of the string.
949 /// Restricted to splitting at most `count` times.
954 /// let v: Vec<&str> = "Mary had a little lamb".rsplitn(2, ' ').collect();
955 /// assert_eq!(v, vec!["lamb", "little", "Mary had a"]);
957 /// let v: Vec<&str> = "abc1def2ghi".rsplitn(1, |&: c: char| c.is_numeric()).collect();
958 /// assert_eq!(v, vec!["ghi", "abc1def"]);
960 /// let v: Vec<&str> = "lionXXtigerXleopard".rsplitn(2, 'X').collect();
961 /// assert_eq!(v, vec!["leopard", "tiger", "lionX"]);
964 fn rsplitn<P: CharEq>(&self, count: uint, pat: P) -> RSplitN<P> {
965 core_str::StrExt::rsplitn(self[], count, pat)
968 /// An iterator over the start and end indices of the disjoint
969 /// matches of the pattern `pat` within `self`.
971 /// That is, each returned value `(start, end)` satisfies
972 /// `self.slice(start, end) == sep`. For matches of `sep` within
973 /// `self` that overlap, only the indices corresponding to the
974 /// first match are returned.
979 /// let v: Vec<(uint, uint)> = "abcXXXabcYYYabc".match_indices("abc").collect();
980 /// assert_eq!(v, vec![(0,3), (6,9), (12,15)]);
982 /// let v: Vec<(uint, uint)> = "1abcabc2".match_indices("abc").collect();
983 /// assert_eq!(v, vec![(1,4), (4,7)]);
985 /// let v: Vec<(uint, uint)> = "ababa".match_indices("aba").collect();
986 /// assert_eq!(v, vec![(0, 3)]); // only the first `aba`
988 #[unstable = "might have its iterator type changed"]
989 fn match_indices<'a>(&'a self, pat: &'a str) -> MatchIndices<'a> {
990 core_str::StrExt::match_indices(self[], pat)
993 /// An iterator over the substrings of `self` separated by the pattern `sep`.
998 /// let v: Vec<&str> = "abcXXXabcYYYabc".split_str("abc").collect();
999 /// assert_eq!(v, vec!["", "XXX", "YYY", ""]);
1001 /// let v: Vec<&str> = "1abcabc2".split_str("abc").collect();
1002 /// assert_eq!(v, vec!["1", "", "2"]);
1004 #[unstable = "might get removed in the future in favor of a more generic split()"]
1005 fn split_str<'a>(&'a self, pat: &'a str) -> StrSplits<'a> {
1006 core_str::StrExt::split_str(self[], pat)
1009 /// An iterator over the lines of a string (subsequences separated
1010 /// by `\n`). This does not include the empty string after a
1016 /// let four_lines = "foo\nbar\n\nbaz\n";
1017 /// let v: Vec<&str> = four_lines.lines().collect();
1018 /// assert_eq!(v, vec!["foo", "bar", "", "baz"]);
1021 fn lines(&self) -> Lines {
1022 core_str::StrExt::lines(self[])
1025 /// An iterator over the lines of a string, separated by either
1026 /// `\n` or `\r\n`. As with `.lines()`, this does not include an
1027 /// empty trailing line.
1032 /// let four_lines = "foo\r\nbar\n\r\nbaz\n";
1033 /// let v: Vec<&str> = four_lines.lines_any().collect();
1034 /// assert_eq!(v, vec!["foo", "bar", "", "baz"]);
1037 fn lines_any(&self) -> LinesAny {
1038 core_str::StrExt::lines_any(self[])
1041 /// Returns the number of Unicode code points (`char`) that a
1044 /// This does not perform any normalization, and is `O(n)`, since
1045 /// UTF-8 is a variable width encoding of code points.
1047 /// *Warning*: The number of code points in a string does not directly
1048 /// correspond to the number of visible characters or width of the
1049 /// visible text due to composing characters, and double- and
1050 /// zero-width ones.
1052 /// See also `.len()` for the byte length.
1057 /// # #![allow(deprecated)]
1058 /// // composed forms of `ö` and `é`
1059 /// let c = "Löwe 老虎 Léopard"; // German, Simplified Chinese, French
1060 /// // decomposed forms of `ö` and `é`
1061 /// let d = "Lo\u{0308}we 老虎 Le\u{0301}opard";
1063 /// assert_eq!(c.char_len(), 15);
1064 /// assert_eq!(d.char_len(), 17);
1066 /// assert_eq!(c.len(), 21);
1067 /// assert_eq!(d.len(), 23);
1069 /// // the two strings *look* the same
1070 /// println!("{}", c);
1071 /// println!("{}", d);
1073 #[deprecated = "call .chars().count() instead"]
1074 fn char_len(&self) -> uint {
1075 core_str::StrExt::char_len(self[])
1078 /// Returns a slice of the given string from the byte range
1079 /// [`begin`..`end`).
1081 /// This operation is `O(1)`.
1083 /// Panics when `begin` and `end` do not point to valid characters
1084 /// or point beyond the last character of the string.
1086 /// See also `slice_to` and `slice_from` for slicing prefixes and
1087 /// suffixes of strings, and `slice_chars` for slicing based on
1088 /// code point counts.
1093 /// let s = "Löwe 老虎 Léopard";
1094 /// assert_eq!(s.slice(0, 1), "L");
1096 /// assert_eq!(s.slice(1, 9), "öwe 老");
1098 /// // these will panic:
1099 /// // byte 2 lies within `ö`:
1100 /// // s.slice(2, 3);
1102 /// // byte 8 lies within `老`
1103 /// // s.slice(1, 8);
1105 /// // byte 100 is outside the string
1106 /// // s.slice(3, 100);
1108 #[unstable = "use slice notation [a..b] instead"]
1109 fn slice(&self, begin: uint, end: uint) -> &str {
1110 core_str::StrExt::slice(self[], begin, end)
1113 /// Returns a slice of the string from `begin` to its end.
1115 /// Equivalent to `self.slice(begin, self.len())`.
1117 /// Panics when `begin` does not point to a valid character, or is
1120 /// See also `slice`, `slice_to` and `slice_chars`.
1121 #[unstable = "use slice notation [a..] instead"]
1122 fn slice_from(&self, begin: uint) -> &str {
1123 core_str::StrExt::slice_from(self[], begin)
1126 /// Returns a slice of the string from the beginning to byte
1129 /// Equivalent to `self.slice(0, end)`.
1131 /// Panics when `end` does not point to a valid character, or is
1134 /// See also `slice`, `slice_from` and `slice_chars`.
1135 #[unstable = "use slice notation [0..a] instead"]
1136 fn slice_to(&self, end: uint) -> &str {
1137 core_str::StrExt::slice_to(self[], end)
1140 /// Returns a slice of the string from the character range
1141 /// [`begin`..`end`).
1143 /// That is, start at the `begin`-th code point of the string and
1144 /// continue to the `end`-th code point. This does not detect or
1145 /// handle edge cases such as leaving a combining character as the
1146 /// first code point of the string.
1148 /// Due to the design of UTF-8, this operation is `O(end)`.
1149 /// See `slice`, `slice_to` and `slice_from` for `O(1)`
1150 /// variants that use byte indices rather than code point
1153 /// Panics if `begin` > `end` or the either `begin` or `end` are
1154 /// beyond the last character of the string.
1159 /// let s = "Löwe 老虎 Léopard";
1160 /// assert_eq!(s.slice_chars(0, 4), "Löwe");
1161 /// assert_eq!(s.slice_chars(5, 7), "老虎");
1163 #[unstable = "may have yet to prove its worth"]
1164 fn slice_chars(&self, begin: uint, end: uint) -> &str {
1165 core_str::StrExt::slice_chars(self[], begin, end)
1168 /// Takes a bytewise (not UTF-8) slice from a string.
1170 /// Returns the substring from [`begin`..`end`).
1172 /// Caller must check both UTF-8 character boundaries and the boundaries of
1173 /// the entire slice as well.
1175 unsafe fn slice_unchecked(&self, begin: uint, end: uint) -> &str {
1176 core_str::StrExt::slice_unchecked(self[], begin, end)
1179 /// Returns true if the pattern `pat` is a prefix of the string.
1184 /// assert!("banana".starts_with("ba"));
1187 fn starts_with(&self, pat: &str) -> bool {
1188 core_str::StrExt::starts_with(self[], pat)
1191 /// Returns true if the pattern `pat` is a suffix of the string.
1196 /// assert!("banana".ends_with("nana"));
1199 fn ends_with(&self, pat: &str) -> bool {
1200 core_str::StrExt::ends_with(self[], pat)
1203 /// Returns a string with all pre- and suffixes that match
1204 /// the pattern `pat` repeatedly removed.
1208 /// * pat - a string pattern
1213 /// assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar");
1214 /// let x: &[_] = &['1', '2'];
1215 /// assert_eq!("12foo1bar12".trim_matches(x), "foo1bar");
1216 /// assert_eq!("123foo1bar123".trim_matches(|&: c: char| c.is_numeric()), "foo1bar");
1219 fn trim_matches<P: CharEq>(&self, pat: P) -> &str {
1220 core_str::StrExt::trim_matches(self[], pat)
1224 #[deprecated = "Replaced by `trim_matches`"]
1225 fn trim_chars<'a, C: CharEq>(&'a self, to_trim: C) -> &'a str {
1226 self.trim_matches(to_trim)
1229 /// Returns a string with all prefixes that match
1230 /// the pattern `pat` repeatedly removed.
1234 /// * pat - a string pattern
1239 /// assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11");
1240 /// let x: &[_] = &['1', '2'];
1241 /// assert_eq!("12foo1bar12".trim_left_matches(x), "foo1bar12");
1242 /// assert_eq!("123foo1bar123".trim_left_matches(|&: c: char| c.is_numeric()), "foo1bar123");
1245 fn trim_left_matches<P: CharEq>(&self, pat: P) -> &str {
1246 core_str::StrExt::trim_left_matches(self[], pat)
1250 #[deprecated = "Replaced by `trim_left_matches`"]
1251 fn trim_left_chars<'a, C: CharEq>(&'a self, to_trim: C) -> &'a str {
1252 self.trim_left_matches(to_trim)
1255 /// Returns a string with all suffixes that match
1256 /// the pattern `pat` repeatedly removed.
1260 /// * pat - a string pattern
1265 /// assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar");
1266 /// let x: &[_] = &['1', '2'];
1267 /// assert_eq!("12foo1bar12".trim_right_matches(x), "12foo1bar");
1268 /// assert_eq!("123foo1bar123".trim_right_matches(|&: c: char| c.is_numeric()), "123foo1bar");
1271 fn trim_right_matches<P: CharEq>(&self, pat: P) -> &str {
1272 core_str::StrExt::trim_right_matches(self[], pat)
1276 #[deprecated = "Replaced by `trim_right_matches`"]
1277 fn trim_right_chars<'a, C: CharEq>(&'a self, to_trim: C) -> &'a str {
1278 self.trim_right_matches(to_trim)
1281 /// Check that `index`-th byte lies at the start and/or end of a
1282 /// UTF-8 code point sequence.
1284 /// The start and end of the string (when `index == self.len()`)
1285 /// are considered to be boundaries.
1287 /// Panics if `index` is greater than `self.len()`.
1292 /// let s = "Löwe 老虎 Léopard";
1293 /// assert!(s.is_char_boundary(0));
1295 /// assert!(s.is_char_boundary(6));
1296 /// assert!(s.is_char_boundary(s.len()));
1298 /// // second byte of `ö`
1299 /// assert!(!s.is_char_boundary(2));
1301 /// // third byte of `老`
1302 /// assert!(!s.is_char_boundary(8));
1304 #[unstable = "naming is uncertain with container conventions"]
1305 fn is_char_boundary(&self, index: uint) -> bool {
1306 core_str::StrExt::is_char_boundary(self[], index)
1309 /// Pluck a character out of a string and return the index of the next
1312 /// This function can be used to iterate over the Unicode characters of a
1317 /// This example manually iterates through the characters of a
1318 /// string; this should normally be done by `.chars()` or
1319 /// `.char_indices`.
1322 /// use std::str::CharRange;
1324 /// let s = "中华Việt Nam";
1326 /// while i < s.len() {
1327 /// let CharRange {ch, next} = s.char_range_at(i);
1328 /// println!("{}: {}", i, ch);
1350 /// * s - The string
1351 /// * i - The byte offset of the char to extract
1355 /// A record {ch: char, next: uint} containing the char value and the byte
1356 /// index of the next Unicode character.
1360 /// If `i` is greater than or equal to the length of the string.
1361 /// If `i` is not the index of the beginning of a valid UTF-8 character.
1362 #[unstable = "naming is uncertain with container conventions"]
1363 fn char_range_at(&self, start: uint) -> CharRange {
1364 core_str::StrExt::char_range_at(self[], start)
1367 /// Given a byte position and a str, return the previous char and its position.
1369 /// This function can be used to iterate over a Unicode string in reverse.
1371 /// Returns 0 for next index if called on start index 0.
1375 /// If `i` is greater than the length of the string.
1376 /// If `i` is not an index following a valid UTF-8 character.
1377 #[unstable = "naming is uncertain with container conventions"]
1378 fn char_range_at_reverse(&self, start: uint) -> CharRange {
1379 core_str::StrExt::char_range_at_reverse(self[], start)
1382 /// Plucks the character starting at the `i`th byte of a string.
1388 /// assert_eq!(s.char_at(1), 'b');
1389 /// assert_eq!(s.char_at(2), 'π');
1390 /// assert_eq!(s.char_at(4), 'c');
1395 /// If `i` is greater than or equal to the length of the string.
1396 /// If `i` is not the index of the beginning of a valid UTF-8 character.
1397 #[unstable = "naming is uncertain with container conventions"]
1398 fn char_at(&self, i: uint) -> char {
1399 core_str::StrExt::char_at(self[], i)
1402 /// Plucks the character ending at the `i`th byte of a string.
1406 /// If `i` is greater than the length of the string.
1407 /// If `i` is not an index following a valid UTF-8 character.
1408 #[unstable = "naming is uncertain with container conventions"]
1409 fn char_at_reverse(&self, i: uint) -> char {
1410 core_str::StrExt::char_at_reverse(self[], i)
1413 /// Work with the byte buffer of a string as a byte slice.
1418 /// assert_eq!("bors".as_bytes(), b"bors");
1421 fn as_bytes(&self) -> &[u8] {
1422 core_str::StrExt::as_bytes(self[])
1425 /// Returns the byte index of the first character of `self` that
1426 /// matches the pattern `pat`.
1430 /// `Some` containing the byte index of the last matching character
1431 /// or `None` if there is no match
1436 /// let s = "Löwe 老虎 Léopard";
1438 /// assert_eq!(s.find('L'), Some(0));
1439 /// assert_eq!(s.find('é'), Some(14));
1441 /// // the first space
1442 /// assert_eq!(s.find(|&: c: char| c.is_whitespace()), Some(5));
1444 /// // neither are found
1445 /// let x: &[_] = &['1', '2'];
1446 /// assert_eq!(s.find(x), None);
1449 fn find<P: CharEq>(&self, pat: P) -> Option<uint> {
1450 core_str::StrExt::find(self[], pat)
1453 /// Returns the byte index of the last character of `self` that
1454 /// matches the pattern `pat`.
1458 /// `Some` containing the byte index of the last matching character
1459 /// or `None` if there is no match.
1464 /// let s = "Löwe 老虎 Léopard";
1466 /// assert_eq!(s.rfind('L'), Some(13));
1467 /// assert_eq!(s.rfind('é'), Some(14));
1469 /// // the second space
1470 /// assert_eq!(s.rfind(|&: c: char| c.is_whitespace()), Some(12));
1472 /// // searches for an occurrence of either `1` or `2`, but neither are found
1473 /// let x: &[_] = &['1', '2'];
1474 /// assert_eq!(s.rfind(x), None);
1477 fn rfind<P: CharEq>(&self, pat: P) -> Option<uint> {
1478 core_str::StrExt::rfind(self[], pat)
1481 /// Returns the byte index of the first matching substring
1485 /// * `needle` - The string to search for
1489 /// `Some` containing the byte index of the first matching substring
1490 /// or `None` if there is no match.
1495 /// let s = "Löwe 老虎 Léopard";
1497 /// assert_eq!(s.find_str("老虎 L"), Some(6));
1498 /// assert_eq!(s.find_str("muffin man"), None);
1500 #[unstable = "might get removed in favor of a more generic find in the future"]
1501 fn find_str(&self, needle: &str) -> Option<uint> {
1502 core_str::StrExt::find_str(self[], needle)
1505 /// Retrieves the first character from a string slice and returns
1506 /// it. This does not allocate a new string; instead, it returns a
1507 /// slice that point one character beyond the character that was
1508 /// shifted. If the string does not contain any characters,
1509 /// None is returned instead.
1514 /// let s = "Löwe 老虎 Léopard";
1515 /// let (c, s1) = s.slice_shift_char().unwrap();
1516 /// assert_eq!(c, 'L');
1517 /// assert_eq!(s1, "öwe 老虎 Léopard");
1519 /// let (c, s2) = s1.slice_shift_char().unwrap();
1520 /// assert_eq!(c, 'ö');
1521 /// assert_eq!(s2, "we 老虎 Léopard");
1523 #[unstable = "awaiting conventions about shifting and slices"]
1524 fn slice_shift_char(&self) -> Option<(char, &str)> {
1525 core_str::StrExt::slice_shift_char(self[])
1528 /// Returns the byte offset of an inner slice relative to an enclosing outer slice.
1530 /// Panics if `inner` is not a direct slice contained within self.
1535 /// let string = "a\nb\nc";
1536 /// let lines: Vec<&str> = string.lines().collect();
1538 /// assert!(string.subslice_offset(lines[0]) == 0); // &"a"
1539 /// assert!(string.subslice_offset(lines[1]) == 2); // &"b"
1540 /// assert!(string.subslice_offset(lines[2]) == 4); // &"c"
1542 #[unstable = "awaiting convention about comparability of arbitrary slices"]
1543 fn subslice_offset(&self, inner: &str) -> uint {
1544 core_str::StrExt::subslice_offset(self[], inner)
1547 /// Return an unsafe pointer to the strings buffer.
1549 /// The caller must ensure that the string outlives this pointer,
1550 /// and that it is not reallocated (e.g. by pushing to the
1554 fn as_ptr(&self) -> *const u8 {
1555 core_str::StrExt::as_ptr(self[])
1558 /// Return an iterator of `u16` over the string encoded as UTF-16.
1559 #[unstable = "this functionality may only be provided by libunicode"]
1560 fn utf16_units(&self) -> Utf16Units {
1561 Utf16Units { encoder: Utf16Encoder::new(self[].chars()) }
1564 /// Return the number of bytes in this string
1569 /// assert_eq!("foo".len(), 3);
1570 /// assert_eq!("ƒoo".len(), 4);
1574 fn len(&self) -> uint {
1575 core_str::StrExt::len(self[])
1578 /// Returns true if this slice contains no bytes
1583 /// assert!("".is_empty());
1587 fn is_empty(&self) -> bool {
1588 core_str::StrExt::is_empty(self[])
1591 /// Parse this string into the specified type.
1596 /// assert_eq!("4".parse::<u32>(), Some(4));
1597 /// assert_eq!("j".parse::<u32>(), None);
1600 #[unstable = "this method was just created"]
1601 fn parse<F: FromStr>(&self) -> Option<F> {
1602 FromStr::from_str(self[])
1605 /// Returns an iterator over the
1606 /// [grapheme clusters](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
1609 /// If `is_extended` is true, the iterator is over the *extended grapheme clusters*;
1610 /// otherwise, the iterator is over the *legacy grapheme clusters*.
1611 /// [UAX#29](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
1612 /// recommends extended grapheme cluster boundaries for general processing.
1617 /// let gr1 = "a\u{310}e\u{301}o\u{308}\u{332}".graphemes(true).collect::<Vec<&str>>();
1618 /// let b: &[_] = &["a\u{310}", "e\u{301}", "o\u{308}\u{332}"];
1619 /// assert_eq!(gr1.as_slice(), b);
1620 /// let gr2 = "a\r\nb🇷🇺🇸🇹".graphemes(true).collect::<Vec<&str>>();
1621 /// let b: &[_] = &["a", "\r\n", "b", "🇷🇺🇸🇹"];
1622 /// assert_eq!(gr2.as_slice(), b);
1624 #[unstable = "this functionality may only be provided by libunicode"]
1625 fn graphemes(&self, is_extended: bool) -> Graphemes {
1626 UnicodeStr::graphemes(self[], is_extended)
1629 /// Returns an iterator over the grapheme clusters of self and their byte offsets.
1630 /// See `graphemes()` method for more information.
1635 /// let gr_inds = "a̐éö̲\r\n".grapheme_indices(true).collect::<Vec<(uint, &str)>>();
1636 /// let b: &[_] = &[(0u, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
1637 /// assert_eq!(gr_inds.as_slice(), b);
1639 #[unstable = "this functionality may only be provided by libunicode"]
1640 fn grapheme_indices(&self, is_extended: bool) -> GraphemeIndices {
1641 UnicodeStr::grapheme_indices(self[], is_extended)
1644 /// An iterator over the words of a string (subsequences separated
1645 /// by any sequence of whitespace). Sequences of whitespace are
1646 /// collapsed, so empty "words" are not included.
1651 /// let some_words = " Mary had\ta little \n\t lamb";
1652 /// let v: Vec<&str> = some_words.words().collect();
1653 /// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]);
1656 fn words(&self) -> Words {
1657 UnicodeStr::words(self[])
1660 /// Returns true if the string contains only whitespace.
1662 /// Whitespace characters are determined by `char::is_whitespace`.
1667 /// # #![allow(deprecated)]
1668 /// assert!(" \t\n".is_whitespace());
1669 /// assert!("".is_whitespace());
1671 /// assert!( !"abc".is_whitespace());
1673 #[deprecated = "use .chars().all(|c| c.is_whitespace())"]
1674 fn is_whitespace(&self) -> bool {
1675 UnicodeStr::is_whitespace(self[])
1678 /// Returns true if the string contains only alphanumeric code
1681 /// Alphanumeric characters are determined by `char::is_alphanumeric`.
1686 /// # #![allow(deprecated)]
1687 /// assert!("Löwe老虎Léopard123".is_alphanumeric());
1688 /// assert!("".is_alphanumeric());
1690 /// assert!( !" &*~".is_alphanumeric());
1692 #[deprecated = "use .chars().all(|c| c.is_alphanumeric())"]
1693 fn is_alphanumeric(&self) -> bool {
1694 UnicodeStr::is_alphanumeric(self[])
1697 /// Returns a string's displayed width in columns, treating control
1698 /// characters as zero-width.
1700 /// `is_cjk` determines behavior for characters in the Ambiguous category:
1701 /// if `is_cjk` is `true`, these are 2 columns wide; otherwise, they are 1.
1702 /// In CJK locales, `is_cjk` should be `true`, else it should be `false`.
1703 /// [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
1704 /// recommends that these characters be treated as 1 column (i.e.,
1705 /// `is_cjk` = `false`) if the locale is unknown.
1706 #[unstable = "this functionality may only be provided by libunicode"]
1707 fn width(&self, is_cjk: bool) -> uint {
1708 UnicodeStr::width(self[], is_cjk)
1711 /// Returns a string with leading and trailing whitespace removed.
1713 fn trim(&self) -> &str {
1714 UnicodeStr::trim(self[])
1717 /// Returns a string with leading whitespace removed.
1719 fn trim_left(&self) -> &str {
1720 UnicodeStr::trim_left(self[])
1723 /// Returns a string with trailing whitespace removed.
1725 fn trim_right(&self) -> &str {
1726 UnicodeStr::trim_right(self[])
1729 /// Deprecated, call `.to_owned()` instead from the `std::borrow::ToOwned`
1731 #[deprecated = "call `.to_owned()` on `std::borrow::ToOwned` instead"]
1732 fn into_string(&self) -> String {
1737 impl StrExt for str {}
1743 use core::default::Default;
1744 use core::iter::AdditiveIterator;
1745 use super::{from_utf8, is_utf8, raw};
1746 use super::MaybeOwned::{Owned, Slice};
1747 use super::Utf8Error;
1752 assert!("" <= "foo");
1753 assert!("foo" <= "foo");
1754 assert!("foo" != "bar");
1759 assert_eq!("".len(), 0u);
1760 assert_eq!("hello world".len(), 11u);
1761 assert_eq!("\x63".len(), 1u);
1762 assert_eq!("\u{a2}".len(), 2u);
1763 assert_eq!("\u{3c0}".len(), 2u);
1764 assert_eq!("\u{2620}".len(), 3u);
1765 assert_eq!("\u{1d11e}".len(), 4u);
1767 assert_eq!("".char_len(), 0u);
1768 assert_eq!("hello world".char_len(), 11u);
1769 assert_eq!("\x63".char_len(), 1u);
1770 assert_eq!("\u{a2}".char_len(), 1u);
1771 assert_eq!("\u{3c0}".char_len(), 1u);
1772 assert_eq!("\u{2620}".char_len(), 1u);
1773 assert_eq!("\u{1d11e}".char_len(), 1u);
1774 assert_eq!("ประเทศไทย中华Việt Nam".char_len(), 19u);
1776 assert_eq!("hello".width(false), 10u);
1777 assert_eq!("hello".width(true), 10u);
1778 assert_eq!("\0\0\0\0\0".width(false), 0u);
1779 assert_eq!("\0\0\0\0\0".width(true), 0u);
1780 assert_eq!("".width(false), 0u);
1781 assert_eq!("".width(true), 0u);
1782 assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width(false), 4u);
1783 assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width(true), 8u);
1788 assert_eq!("hello".find('l'), Some(2u));
1789 assert_eq!("hello".find(|&: c:char| c == 'o'), Some(4u));
1790 assert!("hello".find('x').is_none());
1791 assert!("hello".find(|&: c:char| c == 'x').is_none());
1792 assert_eq!("ประเทศไทย中华Việt Nam".find('华'), Some(30u));
1793 assert_eq!("ประเทศไทย中华Việt Nam".find(|&: c: char| c == '华'), Some(30u));
1798 assert_eq!("hello".rfind('l'), Some(3u));
1799 assert_eq!("hello".rfind(|&: c:char| c == 'o'), Some(4u));
1800 assert!("hello".rfind('x').is_none());
1801 assert!("hello".rfind(|&: c:char| c == 'x').is_none());
1802 assert_eq!("ประเทศไทย中华Việt Nam".rfind('华'), Some(30u));
1803 assert_eq!("ประเทศไทย中华Việt Nam".rfind(|&: c: char| c == '华'), Some(30u));
1808 let empty = String::from_str("");
1809 let s: String = empty.chars().collect();
1810 assert_eq!(empty, s);
1811 let data = String::from_str("ประเทศไทย中");
1812 let s: String = data.chars().collect();
1813 assert_eq!(data, s);
1817 fn test_into_bytes() {
1818 let data = String::from_str("asdf");
1819 let buf = data.into_bytes();
1820 assert_eq!(b"asdf", buf);
1824 fn test_find_str() {
1826 assert_eq!("".find_str(""), Some(0u));
1827 assert!("banana".find_str("apple pie").is_none());
1829 let data = "abcabc";
1830 assert_eq!(data.slice(0u, 6u).find_str("ab"), Some(0u));
1831 assert_eq!(data.slice(2u, 6u).find_str("ab"), Some(3u - 2u));
1832 assert!(data.slice(2u, 4u).find_str("ab").is_none());
1834 let string = "ประเทศไทย中华Việt Nam";
1835 let mut data = String::from_str(string);
1836 data.push_str(string);
1837 assert!(data.find_str("ไท华").is_none());
1838 assert_eq!(data.slice(0u, 43u).find_str(""), Some(0u));
1839 assert_eq!(data.slice(6u, 43u).find_str(""), Some(6u - 6u));
1841 assert_eq!(data.slice(0u, 43u).find_str("ประ"), Some( 0u));
1842 assert_eq!(data.slice(0u, 43u).find_str("ทศไ"), Some(12u));
1843 assert_eq!(data.slice(0u, 43u).find_str("ย中"), Some(24u));
1844 assert_eq!(data.slice(0u, 43u).find_str("iệt"), Some(34u));
1845 assert_eq!(data.slice(0u, 43u).find_str("Nam"), Some(40u));
1847 assert_eq!(data.slice(43u, 86u).find_str("ประ"), Some(43u - 43u));
1848 assert_eq!(data.slice(43u, 86u).find_str("ทศไ"), Some(55u - 43u));
1849 assert_eq!(data.slice(43u, 86u).find_str("ย中"), Some(67u - 43u));
1850 assert_eq!(data.slice(43u, 86u).find_str("iệt"), Some(77u - 43u));
1851 assert_eq!(data.slice(43u, 86u).find_str("Nam"), Some(83u - 43u));
1855 fn test_slice_chars() {
1856 fn t(a: &str, b: &str, start: uint) {
1857 assert_eq!(a.slice_chars(start, start + b.char_len()), b);
1860 t("hello", "llo", 2);
1861 t("hello", "el", 1);
1864 assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".slice_chars(2, 8));
1867 fn s(x: &str) -> String { x.into_string() }
1869 macro_rules! test_concat {
1870 ($expected: expr, $string: expr) => {
1872 let s: String = $string.concat();
1873 assert_eq!($expected, s);
1879 fn test_concat_for_different_types() {
1880 test_concat!("ab", vec![s("a"), s("b")]);
1881 test_concat!("ab", vec!["a", "b"]);
1882 test_concat!("ab", vec!["a", "b"].as_slice());
1883 test_concat!("ab", vec![s("a"), s("b")]);
1887 fn test_concat_for_different_lengths() {
1888 let empty: &[&str] = &[];
1889 test_concat!("", empty);
1890 test_concat!("a", ["a"]);
1891 test_concat!("ab", ["a", "b"]);
1892 test_concat!("abc", ["", "a", "bc"]);
1895 macro_rules! test_connect {
1896 ($expected: expr, $string: expr, $delim: expr) => {
1898 let s = $string.connect($delim);
1899 assert_eq!($expected, s);
1905 fn test_connect_for_different_types() {
1906 test_connect!("a-b", ["a", "b"], "-");
1907 let hyphen = "-".into_string();
1908 test_connect!("a-b", [s("a"), s("b")], hyphen.as_slice());
1909 test_connect!("a-b", vec!["a", "b"], hyphen.as_slice());
1910 test_connect!("a-b", vec!["a", "b"].as_slice(), "-");
1911 test_connect!("a-b", vec![s("a"), s("b")], "-");
1915 fn test_connect_for_different_lengths() {
1916 let empty: &[&str] = &[];
1917 test_connect!("", empty, "-");
1918 test_connect!("a", ["a"], "-");
1919 test_connect!("a-b", ["a", "b"], "-");
1920 test_connect!("-a-bc", ["", "a", "bc"], "-");
1925 assert_eq!("x".repeat(4), String::from_str("xxxx"));
1926 assert_eq!("hi".repeat(4), String::from_str("hihihihi"));
1927 assert_eq!("ไท华".repeat(3), String::from_str("ไท华ไท华ไท华"));
1928 assert_eq!("".repeat(4), String::from_str(""));
1929 assert_eq!("hi".repeat(0), String::from_str(""));
1933 fn test_unsafe_slice() {
1934 assert_eq!("ab", unsafe {raw::slice_bytes("abc", 0, 2)});
1935 assert_eq!("bc", unsafe {raw::slice_bytes("abc", 1, 3)});
1936 assert_eq!("", unsafe {raw::slice_bytes("abc", 1, 1)});
1937 fn a_million_letter_a() -> String {
1939 let mut rs = String::new();
1941 rs.push_str("aaaaaaaaaa");
1946 fn half_a_million_letter_a() -> String {
1948 let mut rs = String::new();
1950 rs.push_str("aaaaa");
1955 let letters = a_million_letter_a();
1956 assert!(half_a_million_letter_a() ==
1957 unsafe {String::from_str(raw::slice_bytes(letters.as_slice(),
1963 fn test_starts_with() {
1964 assert!(("".starts_with("")));
1965 assert!(("abc".starts_with("")));
1966 assert!(("abc".starts_with("a")));
1967 assert!((!"a".starts_with("abc")));
1968 assert!((!"".starts_with("abc")));
1969 assert!((!"ödd".starts_with("-")));
1970 assert!(("ödd".starts_with("öd")));
1974 fn test_ends_with() {
1975 assert!(("".ends_with("")));
1976 assert!(("abc".ends_with("")));
1977 assert!(("abc".ends_with("c")));
1978 assert!((!"a".ends_with("abc")));
1979 assert!((!"".ends_with("abc")));
1980 assert!((!"ddö".ends_with("-")));
1981 assert!(("ddö".ends_with("dö")));
1985 fn test_is_empty() {
1986 assert!("".is_empty());
1987 assert!(!"a".is_empty());
1993 assert_eq!("".replace(a, "b"), String::from_str(""));
1994 assert_eq!("a".replace(a, "b"), String::from_str("b"));
1995 assert_eq!("ab".replace(a, "b"), String::from_str("bb"));
1997 assert!(" test test ".replace(test, "toast") ==
1998 String::from_str(" toast toast "));
1999 assert_eq!(" test test ".replace(test, ""), String::from_str(" "));
2003 fn test_replace_2a() {
2004 let data = "ประเทศไทย中华";
2005 let repl = "دولة الكويت";
2008 let a2 = "دولة الكويتทศไทย中华";
2009 assert_eq!(data.replace(a, repl), a2);
2013 fn test_replace_2b() {
2014 let data = "ประเทศไทย中华";
2015 let repl = "دولة الكويت";
2018 let b2 = "ปรدولة الكويتทศไทย中华";
2019 assert_eq!(data.replace(b, repl), b2);
2023 fn test_replace_2c() {
2024 let data = "ประเทศไทย中华";
2025 let repl = "دولة الكويت";
2028 let c2 = "ประเทศไทยدولة الكويت";
2029 assert_eq!(data.replace(c, repl), c2);
2033 fn test_replace_2d() {
2034 let data = "ประเทศไทย中华";
2035 let repl = "دولة الكويت";
2038 assert_eq!(data.replace(d, repl), data);
2043 assert_eq!("ab", "abc".slice(0, 2));
2044 assert_eq!("bc", "abc".slice(1, 3));
2045 assert_eq!("", "abc".slice(1, 1));
2046 assert_eq!("\u{65e5}", "\u{65e5}\u{672c}".slice(0, 3));
2048 let data = "ประเทศไทย中华";
2049 assert_eq!("ป", data.slice(0, 3));
2050 assert_eq!("ร", data.slice(3, 6));
2051 assert_eq!("", data.slice(3, 3));
2052 assert_eq!("华", data.slice(30, 33));
2054 fn a_million_letter_x() -> String {
2056 let mut rs = String::new();
2058 rs.push_str("华华华华华华华华华华");
2063 fn half_a_million_letter_x() -> String {
2065 let mut rs = String::new();
2067 rs.push_str("华华华华华");
2072 let letters = a_million_letter_x();
2073 assert!(half_a_million_letter_x() ==
2074 String::from_str(letters.slice(0u, 3u * 500000u)));
2079 let ss = "中华Việt Nam";
2081 assert_eq!("华", ss.slice(3u, 6u));
2082 assert_eq!("Việt Nam", ss.slice(6u, 16u));
2084 assert_eq!("ab", "abc".slice(0u, 2u));
2085 assert_eq!("bc", "abc".slice(1u, 3u));
2086 assert_eq!("", "abc".slice(1u, 1u));
2088 assert_eq!("中", ss.slice(0u, 3u));
2089 assert_eq!("华V", ss.slice(3u, 7u));
2090 assert_eq!("", ss.slice(3u, 3u));
2105 fn test_slice_fail() {
2106 "中华Việt Nam".slice(0u, 2u);
2110 fn test_slice_from() {
2111 assert_eq!("abcd".slice_from(0), "abcd");
2112 assert_eq!("abcd".slice_from(2), "cd");
2113 assert_eq!("abcd".slice_from(4), "");
2116 fn test_slice_to() {
2117 assert_eq!("abcd".slice_to(0), "");
2118 assert_eq!("abcd".slice_to(2), "ab");
2119 assert_eq!("abcd".slice_to(4), "abcd");
2123 fn test_trim_left_chars() {
2124 let v: &[char] = &[];
2125 assert_eq!(" *** foo *** ".trim_left_chars(v), " *** foo *** ");
2126 let chars: &[char] = &['*', ' '];
2127 assert_eq!(" *** foo *** ".trim_left_chars(chars), "foo *** ");
2128 assert_eq!(" *** *** ".trim_left_chars(chars), "");
2129 assert_eq!("foo *** ".trim_left_chars(chars), "foo *** ");
2131 assert_eq!("11foo1bar11".trim_left_chars('1'), "foo1bar11");
2132 let chars: &[char] = &['1', '2'];
2133 assert_eq!("12foo1bar12".trim_left_chars(chars), "foo1bar12");
2134 assert_eq!("123foo1bar123".trim_left_chars(|&: c: char| c.is_numeric()), "foo1bar123");
2138 fn test_trim_right_chars() {
2139 let v: &[char] = &[];
2140 assert_eq!(" *** foo *** ".trim_right_chars(v), " *** foo *** ");
2141 let chars: &[char] = &['*', ' '];
2142 assert_eq!(" *** foo *** ".trim_right_chars(chars), " *** foo");
2143 assert_eq!(" *** *** ".trim_right_chars(chars), "");
2144 assert_eq!(" *** foo".trim_right_chars(chars), " *** foo");
2146 assert_eq!("11foo1bar11".trim_right_chars('1'), "11foo1bar");
2147 let chars: &[char] = &['1', '2'];
2148 assert_eq!("12foo1bar12".trim_right_chars(chars), "12foo1bar");
2149 assert_eq!("123foo1bar123".trim_right_chars(|&: c: char| c.is_numeric()), "123foo1bar");
2153 fn test_trim_chars() {
2154 let v: &[char] = &[];
2155 assert_eq!(" *** foo *** ".trim_chars(v), " *** foo *** ");
2156 let chars: &[char] = &['*', ' '];
2157 assert_eq!(" *** foo *** ".trim_chars(chars), "foo");
2158 assert_eq!(" *** *** ".trim_chars(chars), "");
2159 assert_eq!("foo".trim_chars(chars), "foo");
2161 assert_eq!("11foo1bar11".trim_chars('1'), "foo1bar");
2162 let chars: &[char] = &['1', '2'];
2163 assert_eq!("12foo1bar12".trim_chars(chars), "foo1bar");
2164 assert_eq!("123foo1bar123".trim_chars(|&: c: char| c.is_numeric()), "foo1bar");
2168 fn test_trim_left() {
2169 assert_eq!("".trim_left(), "");
2170 assert_eq!("a".trim_left(), "a");
2171 assert_eq!(" ".trim_left(), "");
2172 assert_eq!(" blah".trim_left(), "blah");
2173 assert_eq!(" \u{3000} wut".trim_left(), "wut");
2174 assert_eq!("hey ".trim_left(), "hey ");
2178 fn test_trim_right() {
2179 assert_eq!("".trim_right(), "");
2180 assert_eq!("a".trim_right(), "a");
2181 assert_eq!(" ".trim_right(), "");
2182 assert_eq!("blah ".trim_right(), "blah");
2183 assert_eq!("wut \u{3000} ".trim_right(), "wut");
2184 assert_eq!(" hey".trim_right(), " hey");
2189 assert_eq!("".trim(), "");
2190 assert_eq!("a".trim(), "a");
2191 assert_eq!(" ".trim(), "");
2192 assert_eq!(" blah ".trim(), "blah");
2193 assert_eq!("\nwut \u{3000} ".trim(), "wut");
2194 assert_eq!(" hey dude ".trim(), "hey dude");
2198 fn test_is_whitespace() {
2199 assert!("".is_whitespace());
2200 assert!(" ".is_whitespace());
2201 assert!("\u{2009}".is_whitespace()); // Thin space
2202 assert!(" \n\t ".is_whitespace());
2203 assert!(!" _ ".is_whitespace());
2207 fn test_slice_shift_char() {
2208 let data = "ประเทศไทย中";
2209 assert_eq!(data.slice_shift_char(), Some(('ป', "ระเทศไทย中")));
2213 fn test_slice_shift_char_2() {
2215 assert_eq!(empty.slice_shift_char(), None);
2220 // deny overlong encodings
2221 assert!(!is_utf8(&[0xc0, 0x80]));
2222 assert!(!is_utf8(&[0xc0, 0xae]));
2223 assert!(!is_utf8(&[0xe0, 0x80, 0x80]));
2224 assert!(!is_utf8(&[0xe0, 0x80, 0xaf]));
2225 assert!(!is_utf8(&[0xe0, 0x81, 0x81]));
2226 assert!(!is_utf8(&[0xf0, 0x82, 0x82, 0xac]));
2227 assert!(!is_utf8(&[0xf4, 0x90, 0x80, 0x80]));
2230 assert!(!is_utf8(&[0xED, 0xA0, 0x80]));
2231 assert!(!is_utf8(&[0xED, 0xBF, 0xBF]));
2233 assert!(is_utf8(&[0xC2, 0x80]));
2234 assert!(is_utf8(&[0xDF, 0xBF]));
2235 assert!(is_utf8(&[0xE0, 0xA0, 0x80]));
2236 assert!(is_utf8(&[0xED, 0x9F, 0xBF]));
2237 assert!(is_utf8(&[0xEE, 0x80, 0x80]));
2238 assert!(is_utf8(&[0xEF, 0xBF, 0xBF]));
2239 assert!(is_utf8(&[0xF0, 0x90, 0x80, 0x80]));
2240 assert!(is_utf8(&[0xF4, 0x8F, 0xBF, 0xBF]));
2244 fn test_is_utf16() {
2245 use unicode::str::is_utf16;
2246 macro_rules! pos ( ($($e:expr),*) => { { $(assert!(is_utf16($e));)* } });
2254 // surrogate pairs (randomly generated with Python 3's
2255 // .encode('utf-16be'))
2256 pos!(&[0xdb54, 0xdf16, 0xd880, 0xdee0, 0xdb6a, 0xdd45],
2257 &[0xd91f, 0xdeb1, 0xdb31, 0xdd84, 0xd8e2, 0xde14],
2258 &[0xdb9f, 0xdc26, 0xdb6f, 0xde58, 0xd850, 0xdfae]);
2260 // mixtures (also random)
2261 pos!(&[0xd921, 0xdcc2, 0x002d, 0x004d, 0xdb32, 0xdf65],
2262 &[0xdb45, 0xdd2d, 0x006a, 0xdacd, 0xddfe, 0x0006],
2263 &[0x0067, 0xd8ff, 0xddb7, 0x000f, 0xd900, 0xdc80]);
2266 macro_rules! neg ( ($($e:expr),*) => { { $(assert!(!is_utf16($e));)* } });
2269 // surrogate + regular unit
2271 // surrogate + lead surrogate
2273 // unterminated surrogate
2275 // trail surrogate without a lead
2278 // random byte sequences that Python 3's .decode('utf-16be')
2280 neg!(&[0x5b3d, 0x0141, 0xde9e, 0x8fdc, 0xc6e7],
2281 &[0xdf5a, 0x82a5, 0x62b9, 0xb447, 0x92f3],
2282 &[0xda4e, 0x42bc, 0x4462, 0xee98, 0xc2ca],
2283 &[0xbe00, 0xb04a, 0x6ecb, 0xdd89, 0xe278],
2284 &[0x0465, 0xab56, 0xdbb6, 0xa893, 0x665e],
2285 &[0x6b7f, 0x0a19, 0x40f4, 0xa657, 0xdcc5],
2286 &[0x9b50, 0xda5e, 0x24ec, 0x03ad, 0x6dee],
2287 &[0x8d17, 0xcaa7, 0xf4ae, 0xdf6e, 0xbed7],
2288 &[0xdaee, 0x2584, 0x7d30, 0xa626, 0x121a],
2289 &[0xd956, 0x4b43, 0x7570, 0xccd6, 0x4f4a],
2290 &[0x9dcf, 0x1b49, 0x4ba5, 0xfce9, 0xdffe],
2291 &[0x6572, 0xce53, 0xb05a, 0xf6af, 0xdacf],
2292 &[0x1b90, 0x728c, 0x9906, 0xdb68, 0xf46e],
2293 &[0x1606, 0xbeca, 0xbe76, 0x860f, 0xdfa5],
2294 &[0x8b4f, 0xde7a, 0xd220, 0x9fac, 0x2b6f],
2295 &[0xb8fe, 0xebbe, 0xda32, 0x1a5f, 0x8b8b],
2296 &[0x934b, 0x8956, 0xc434, 0x1881, 0xddf7],
2297 &[0x5a95, 0x13fc, 0xf116, 0xd89b, 0x93f9],
2298 &[0xd640, 0x71f1, 0xdd7d, 0x77eb, 0x1cd8],
2299 &[0x348b, 0xaef0, 0xdb2c, 0xebf1, 0x1282],
2300 &[0x50d7, 0xd824, 0x5010, 0xb369, 0x22ea]);
2304 fn test_as_bytes() {
2307 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
2308 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
2312 assert_eq!("".as_bytes(), b);
2313 assert_eq!("abc".as_bytes(), b"abc");
2314 assert_eq!("ศไทย中华Việt Nam".as_bytes(), v);
2319 fn test_as_bytes_fail() {
2320 // Don't double free. (I'm not sure if this exercises the
2321 // original problem code path anymore.)
2322 let s = String::from_str("");
2323 let _bytes = s.as_bytes();
2329 let buf = "hello".as_ptr();
2331 assert_eq!(*buf.offset(0), b'h');
2332 assert_eq!(*buf.offset(1), b'e');
2333 assert_eq!(*buf.offset(2), b'l');
2334 assert_eq!(*buf.offset(3), b'l');
2335 assert_eq!(*buf.offset(4), b'o');
2340 fn test_subslice_offset() {
2341 let a = "kernelsprite";
2342 let b = a.slice(7, a.len());
2343 let c = a.slice(0, a.len() - 6);
2344 assert_eq!(a.subslice_offset(b), 7);
2345 assert_eq!(a.subslice_offset(c), 0);
2347 let string = "a\nb\nc";
2348 let lines: Vec<&str> = string.lines().collect();
2349 assert_eq!(string.subslice_offset(lines[0]), 0);
2350 assert_eq!(string.subslice_offset(lines[1]), 2);
2351 assert_eq!(string.subslice_offset(lines[2]), 4);
2356 fn test_subslice_offset_2() {
2357 let a = "alchemiter";
2358 let b = "cruxtruder";
2359 a.subslice_offset(b);
2363 fn vec_str_conversions() {
2364 let s1: String = String::from_str("All mimsy were the borogoves");
2366 let v: Vec<u8> = s1.as_bytes().to_vec();
2367 let s2: String = String::from_str(from_utf8(v.as_slice()).unwrap());
2368 let mut i: uint = 0u;
2369 let n1: uint = s1.len();
2370 let n2: uint = v.len();
2373 let a: u8 = s1.as_bytes()[i];
2374 let b: u8 = s2.as_bytes()[i];
2383 fn test_contains() {
2384 assert!("abcde".contains("bcd"));
2385 assert!("abcde".contains("abcd"));
2386 assert!("abcde".contains("bcde"));
2387 assert!("abcde".contains(""));
2388 assert!("".contains(""));
2389 assert!(!"abcde".contains("def"));
2390 assert!(!"".contains("a"));
2392 let data = "ประเทศไทย中华Việt Nam";
2393 assert!(data.contains("ประเ"));
2394 assert!(data.contains("ะเ"));
2395 assert!(data.contains("中华"));
2396 assert!(!data.contains("ไท华"));
2400 fn test_contains_char() {
2401 assert!("abc".contains_char('b'));
2402 assert!("a".contains_char('a'));
2403 assert!(!"abc".contains_char('d'));
2404 assert!(!"".contains_char('a'));
2409 let s = "ศไทย中华Việt Nam";
2410 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2412 for ch in v.iter() {
2413 assert!(s.char_at(pos) == *ch);
2414 pos += String::from_char(1, *ch).len();
2419 fn test_char_at_reverse() {
2420 let s = "ศไทย中华Việt Nam";
2421 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2422 let mut pos = s.len();
2423 for ch in v.iter().rev() {
2424 assert!(s.char_at_reverse(pos) == *ch);
2425 pos -= String::from_char(1, *ch).len();
2430 fn test_escape_unicode() {
2431 assert_eq!("abc".escape_unicode(),
2432 String::from_str("\\u{61}\\u{62}\\u{63}"));
2433 assert_eq!("a c".escape_unicode(),
2434 String::from_str("\\u{61}\\u{20}\\u{63}"));
2435 assert_eq!("\r\n\t".escape_unicode(),
2436 String::from_str("\\u{d}\\u{a}\\u{9}"));
2437 assert_eq!("'\"\\".escape_unicode(),
2438 String::from_str("\\u{27}\\u{22}\\u{5c}"));
2439 assert_eq!("\x00\x01\u{fe}\u{ff}".escape_unicode(),
2440 String::from_str("\\u{0}\\u{1}\\u{fe}\\u{ff}"));
2441 assert_eq!("\u{100}\u{ffff}".escape_unicode(),
2442 String::from_str("\\u{100}\\u{ffff}"));
2443 assert_eq!("\u{10000}\u{10ffff}".escape_unicode(),
2444 String::from_str("\\u{10000}\\u{10ffff}"));
2445 assert_eq!("ab\u{fb00}".escape_unicode(),
2446 String::from_str("\\u{61}\\u{62}\\u{fb00}"));
2447 assert_eq!("\u{1d4ea}\r".escape_unicode(),
2448 String::from_str("\\u{1d4ea}\\u{d}"));
2452 fn test_escape_default() {
2453 assert_eq!("abc".escape_default(), String::from_str("abc"));
2454 assert_eq!("a c".escape_default(), String::from_str("a c"));
2455 assert_eq!("\r\n\t".escape_default(), String::from_str("\\r\\n\\t"));
2456 assert_eq!("'\"\\".escape_default(), String::from_str("\\'\\\"\\\\"));
2457 assert_eq!("\u{100}\u{ffff}".escape_default(),
2458 String::from_str("\\u{100}\\u{ffff}"));
2459 assert_eq!("\u{10000}\u{10ffff}".escape_default(),
2460 String::from_str("\\u{10000}\\u{10ffff}"));
2461 assert_eq!("ab\u{fb00}".escape_default(),
2462 String::from_str("ab\\u{fb00}"));
2463 assert_eq!("\u{1d4ea}\r".escape_default(),
2464 String::from_str("\\u{1d4ea}\\r"));
2468 fn test_total_ord() {
2469 "1234".cmp("123") == Greater;
2470 "123".cmp("1234") == Less;
2471 "1234".cmp("1234") == Equal;
2472 "12345555".cmp("123456") == Less;
2473 "22".cmp("1234") == Greater;
2477 fn test_char_range_at() {
2478 let data = "b¢€𤭢𤭢€¢b";
2479 assert_eq!('b', data.char_range_at(0).ch);
2480 assert_eq!('¢', data.char_range_at(1).ch);
2481 assert_eq!('€', data.char_range_at(3).ch);
2482 assert_eq!('𤭢', data.char_range_at(6).ch);
2483 assert_eq!('𤭢', data.char_range_at(10).ch);
2484 assert_eq!('€', data.char_range_at(14).ch);
2485 assert_eq!('¢', data.char_range_at(17).ch);
2486 assert_eq!('b', data.char_range_at(19).ch);
2490 fn test_char_range_at_reverse_underflow() {
2491 assert_eq!("abc".char_range_at_reverse(0).next, 0);
2495 fn test_iterator() {
2496 let s = "ศไทย中华Việt Nam";
2497 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2500 let mut it = s.chars();
2503 assert_eq!(c, v[pos]);
2506 assert_eq!(pos, v.len());
2510 fn test_rev_iterator() {
2511 let s = "ศไทย中华Việt Nam";
2512 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
2515 let mut it = s.chars().rev();
2518 assert_eq!(c, v[pos]);
2521 assert_eq!(pos, v.len());
2525 fn test_chars_decoding() {
2526 let mut bytes = [0u8; 4];
2527 for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
2528 let len = c.encode_utf8(&mut bytes).unwrap_or(0);
2529 let s = ::core::str::from_utf8(bytes[..len]).unwrap();
2530 if Some(c) != s.chars().next() {
2531 panic!("character {:x}={} does not decode correctly", c as u32, c);
2537 fn test_chars_rev_decoding() {
2538 let mut bytes = [0u8; 4];
2539 for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
2540 let len = c.encode_utf8(&mut bytes).unwrap_or(0);
2541 let s = ::core::str::from_utf8(bytes[..len]).unwrap();
2542 if Some(c) != s.chars().rev().next() {
2543 panic!("character {:x}={} does not decode correctly", c as u32, c);
2549 fn test_iterator_clone() {
2550 let s = "ศไทย中华Việt Nam";
2551 let mut it = s.chars();
2553 assert!(it.zip(it.clone()).all(|(x,y)| x == y));
2557 fn test_bytesator() {
2558 let s = "ศไทย中华Việt Nam";
2560 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
2561 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
2566 for b in s.bytes() {
2567 assert_eq!(b, v[pos]);
2573 fn test_bytes_revator() {
2574 let s = "ศไทย中华Việt Nam";
2576 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
2577 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
2580 let mut pos = v.len();
2582 for b in s.bytes().rev() {
2584 assert_eq!(b, v[pos]);
2589 fn test_char_indicesator() {
2590 let s = "ศไทย中华Việt Nam";
2591 let p = [0, 3, 6, 9, 12, 15, 18, 19, 20, 23, 24, 25, 26, 27];
2592 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2595 let mut it = s.char_indices();
2598 assert_eq!(c, (p[pos], v[pos]));
2601 assert_eq!(pos, v.len());
2602 assert_eq!(pos, p.len());
2606 fn test_char_indices_revator() {
2607 let s = "ศไทย中华Việt Nam";
2608 let p = [27, 26, 25, 24, 23, 20, 19, 18, 15, 12, 9, 6, 3, 0];
2609 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
2612 let mut it = s.char_indices().rev();
2615 assert_eq!(c, (p[pos], v[pos]));
2618 assert_eq!(pos, v.len());
2619 assert_eq!(pos, p.len());
2623 fn test_splitn_char_iterator() {
2624 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
2626 let split: Vec<&str> = data.splitn(3, ' ').collect();
2627 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
2629 let split: Vec<&str> = data.splitn(3, |&: c: char| c == ' ').collect();
2630 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
2633 let split: Vec<&str> = data.splitn(3, 'ä').collect();
2634 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
2636 let split: Vec<&str> = data.splitn(3, |&: c: char| c == 'ä').collect();
2637 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
2641 fn test_split_char_iterator_no_trailing() {
2642 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
2644 let split: Vec<&str> = data.split('\n').collect();
2645 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
2647 let split: Vec<&str> = data.split_terminator('\n').collect();
2648 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
2653 let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n";
2654 let words: Vec<&str> = data.words().collect();
2655 assert_eq!(words, vec!["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
2659 fn test_nfd_chars() {
2661 ($input: expr, $expected: expr) => {
2662 assert_eq!($input.nfd_chars().collect::<String>(), $expected);
2666 t!("\u{1e0b}\u{1c4}", "d\u{307}\u{1c4}");
2667 t!("\u{2026}", "\u{2026}");
2668 t!("\u{2126}", "\u{3a9}");
2669 t!("\u{1e0b}\u{323}", "d\u{323}\u{307}");
2670 t!("\u{1e0d}\u{307}", "d\u{323}\u{307}");
2671 t!("a\u{301}", "a\u{301}");
2672 t!("\u{301}a", "\u{301}a");
2673 t!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}");
2674 t!("\u{ac1c}", "\u{1100}\u{1162}");
2678 fn test_nfkd_chars() {
2680 ($input: expr, $expected: expr) => {
2681 assert_eq!($input.nfkd_chars().collect::<String>(), $expected);
2685 t!("\u{1e0b}\u{1c4}", "d\u{307}DZ\u{30c}");
2686 t!("\u{2026}", "...");
2687 t!("\u{2126}", "\u{3a9}");
2688 t!("\u{1e0b}\u{323}", "d\u{323}\u{307}");
2689 t!("\u{1e0d}\u{307}", "d\u{323}\u{307}");
2690 t!("a\u{301}", "a\u{301}");
2691 t!("\u{301}a", "\u{301}a");
2692 t!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}");
2693 t!("\u{ac1c}", "\u{1100}\u{1162}");
2697 fn test_nfc_chars() {
2699 ($input: expr, $expected: expr) => {
2700 assert_eq!($input.nfc_chars().collect::<String>(), $expected);
2704 t!("\u{1e0b}\u{1c4}", "\u{1e0b}\u{1c4}");
2705 t!("\u{2026}", "\u{2026}");
2706 t!("\u{2126}", "\u{3a9}");
2707 t!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}");
2708 t!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}");
2709 t!("a\u{301}", "\u{e1}");
2710 t!("\u{301}a", "\u{301}a");
2711 t!("\u{d4db}", "\u{d4db}");
2712 t!("\u{ac1c}", "\u{ac1c}");
2713 t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
2717 fn test_nfkc_chars() {
2719 ($input: expr, $expected: expr) => {
2720 assert_eq!($input.nfkc_chars().collect::<String>(), $expected);
2724 t!("\u{1e0b}\u{1c4}", "\u{1e0b}D\u{17d}");
2725 t!("\u{2026}", "...");
2726 t!("\u{2126}", "\u{3a9}");
2727 t!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}");
2728 t!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}");
2729 t!("a\u{301}", "\u{e1}");
2730 t!("\u{301}a", "\u{301}a");
2731 t!("\u{d4db}", "\u{d4db}");
2732 t!("\u{ac1c}", "\u{ac1c}");
2733 t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
2738 let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
2739 let lines: Vec<&str> = data.lines().collect();
2740 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2742 let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
2743 let lines: Vec<&str> = data.lines().collect();
2744 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2748 fn test_graphemes() {
2749 use core::iter::order;
2750 // official Unicode test data
2751 // from http://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt
2752 let test_same: [(_, &[_]); 325] = [
2753 ("\u{20}\u{20}", &["\u{20}", "\u{20}"]),
2754 ("\u{20}\u{308}\u{20}", &["\u{20}\u{308}", "\u{20}"]),
2755 ("\u{20}\u{D}", &["\u{20}", "\u{D}"]),
2756 ("\u{20}\u{308}\u{D}", &["\u{20}\u{308}", "\u{D}"]),
2757 ("\u{20}\u{A}", &["\u{20}", "\u{A}"]),
2758 ("\u{20}\u{308}\u{A}", &["\u{20}\u{308}", "\u{A}"]),
2759 ("\u{20}\u{1}", &["\u{20}", "\u{1}"]),
2760 ("\u{20}\u{308}\u{1}", &["\u{20}\u{308}", "\u{1}"]),
2761 ("\u{20}\u{300}", &["\u{20}\u{300}"]),
2762 ("\u{20}\u{308}\u{300}", &["\u{20}\u{308}\u{300}"]),
2763 ("\u{20}\u{1100}", &["\u{20}", "\u{1100}"]),
2764 ("\u{20}\u{308}\u{1100}", &["\u{20}\u{308}", "\u{1100}"]),
2765 ("\u{20}\u{1160}", &["\u{20}", "\u{1160}"]),
2766 ("\u{20}\u{308}\u{1160}", &["\u{20}\u{308}", "\u{1160}"]),
2767 ("\u{20}\u{11A8}", &["\u{20}", "\u{11A8}"]),
2768 ("\u{20}\u{308}\u{11A8}", &["\u{20}\u{308}", "\u{11A8}"]),
2769 ("\u{20}\u{AC00}", &["\u{20}", "\u{AC00}"]),
2770 ("\u{20}\u{308}\u{AC00}", &["\u{20}\u{308}", "\u{AC00}"]),
2771 ("\u{20}\u{AC01}", &["\u{20}", "\u{AC01}"]),
2772 ("\u{20}\u{308}\u{AC01}", &["\u{20}\u{308}", "\u{AC01}"]),
2773 ("\u{20}\u{1F1E6}", &["\u{20}", "\u{1F1E6}"]),
2774 ("\u{20}\u{308}\u{1F1E6}", &["\u{20}\u{308}", "\u{1F1E6}"]),
2775 ("\u{20}\u{378}", &["\u{20}", "\u{378}"]),
2776 ("\u{20}\u{308}\u{378}", &["\u{20}\u{308}", "\u{378}"]),
2777 ("\u{D}\u{20}", &["\u{D}", "\u{20}"]),
2778 ("\u{D}\u{308}\u{20}", &["\u{D}", "\u{308}", "\u{20}"]),
2779 ("\u{D}\u{D}", &["\u{D}", "\u{D}"]),
2780 ("\u{D}\u{308}\u{D}", &["\u{D}", "\u{308}", "\u{D}"]),
2781 ("\u{D}\u{A}", &["\u{D}\u{A}"]),
2782 ("\u{D}\u{308}\u{A}", &["\u{D}", "\u{308}", "\u{A}"]),
2783 ("\u{D}\u{1}", &["\u{D}", "\u{1}"]),
2784 ("\u{D}\u{308}\u{1}", &["\u{D}", "\u{308}", "\u{1}"]),
2785 ("\u{D}\u{300}", &["\u{D}", "\u{300}"]),
2786 ("\u{D}\u{308}\u{300}", &["\u{D}", "\u{308}\u{300}"]),
2787 ("\u{D}\u{903}", &["\u{D}", "\u{903}"]),
2788 ("\u{D}\u{1100}", &["\u{D}", "\u{1100}"]),
2789 ("\u{D}\u{308}\u{1100}", &["\u{D}", "\u{308}", "\u{1100}"]),
2790 ("\u{D}\u{1160}", &["\u{D}", "\u{1160}"]),
2791 ("\u{D}\u{308}\u{1160}", &["\u{D}", "\u{308}", "\u{1160}"]),
2792 ("\u{D}\u{11A8}", &["\u{D}", "\u{11A8}"]),
2793 ("\u{D}\u{308}\u{11A8}", &["\u{D}", "\u{308}", "\u{11A8}"]),
2794 ("\u{D}\u{AC00}", &["\u{D}", "\u{AC00}"]),
2795 ("\u{D}\u{308}\u{AC00}", &["\u{D}", "\u{308}", "\u{AC00}"]),
2796 ("\u{D}\u{AC01}", &["\u{D}", "\u{AC01}"]),
2797 ("\u{D}\u{308}\u{AC01}", &["\u{D}", "\u{308}", "\u{AC01}"]),
2798 ("\u{D}\u{1F1E6}", &["\u{D}", "\u{1F1E6}"]),
2799 ("\u{D}\u{308}\u{1F1E6}", &["\u{D}", "\u{308}", "\u{1F1E6}"]),
2800 ("\u{D}\u{378}", &["\u{D}", "\u{378}"]),
2801 ("\u{D}\u{308}\u{378}", &["\u{D}", "\u{308}", "\u{378}"]),
2802 ("\u{A}\u{20}", &["\u{A}", "\u{20}"]),
2803 ("\u{A}\u{308}\u{20}", &["\u{A}", "\u{308}", "\u{20}"]),
2804 ("\u{A}\u{D}", &["\u{A}", "\u{D}"]),
2805 ("\u{A}\u{308}\u{D}", &["\u{A}", "\u{308}", "\u{D}"]),
2806 ("\u{A}\u{A}", &["\u{A}", "\u{A}"]),
2807 ("\u{A}\u{308}\u{A}", &["\u{A}", "\u{308}", "\u{A}"]),
2808 ("\u{A}\u{1}", &["\u{A}", "\u{1}"]),
2809 ("\u{A}\u{308}\u{1}", &["\u{A}", "\u{308}", "\u{1}"]),
2810 ("\u{A}\u{300}", &["\u{A}", "\u{300}"]),
2811 ("\u{A}\u{308}\u{300}", &["\u{A}", "\u{308}\u{300}"]),
2812 ("\u{A}\u{903}", &["\u{A}", "\u{903}"]),
2813 ("\u{A}\u{1100}", &["\u{A}", "\u{1100}"]),
2814 ("\u{A}\u{308}\u{1100}", &["\u{A}", "\u{308}", "\u{1100}"]),
2815 ("\u{A}\u{1160}", &["\u{A}", "\u{1160}"]),
2816 ("\u{A}\u{308}\u{1160}", &["\u{A}", "\u{308}", "\u{1160}"]),
2817 ("\u{A}\u{11A8}", &["\u{A}", "\u{11A8}"]),
2818 ("\u{A}\u{308}\u{11A8}", &["\u{A}", "\u{308}", "\u{11A8}"]),
2819 ("\u{A}\u{AC00}", &["\u{A}", "\u{AC00}"]),
2820 ("\u{A}\u{308}\u{AC00}", &["\u{A}", "\u{308}", "\u{AC00}"]),
2821 ("\u{A}\u{AC01}", &["\u{A}", "\u{AC01}"]),
2822 ("\u{A}\u{308}\u{AC01}", &["\u{A}", "\u{308}", "\u{AC01}"]),
2823 ("\u{A}\u{1F1E6}", &["\u{A}", "\u{1F1E6}"]),
2824 ("\u{A}\u{308}\u{1F1E6}", &["\u{A}", "\u{308}", "\u{1F1E6}"]),
2825 ("\u{A}\u{378}", &["\u{A}", "\u{378}"]),
2826 ("\u{A}\u{308}\u{378}", &["\u{A}", "\u{308}", "\u{378}"]),
2827 ("\u{1}\u{20}", &["\u{1}", "\u{20}"]),
2828 ("\u{1}\u{308}\u{20}", &["\u{1}", "\u{308}", "\u{20}"]),
2829 ("\u{1}\u{D}", &["\u{1}", "\u{D}"]),
2830 ("\u{1}\u{308}\u{D}", &["\u{1}", "\u{308}", "\u{D}"]),
2831 ("\u{1}\u{A}", &["\u{1}", "\u{A}"]),
2832 ("\u{1}\u{308}\u{A}", &["\u{1}", "\u{308}", "\u{A}"]),
2833 ("\u{1}\u{1}", &["\u{1}", "\u{1}"]),
2834 ("\u{1}\u{308}\u{1}", &["\u{1}", "\u{308}", "\u{1}"]),
2835 ("\u{1}\u{300}", &["\u{1}", "\u{300}"]),
2836 ("\u{1}\u{308}\u{300}", &["\u{1}", "\u{308}\u{300}"]),
2837 ("\u{1}\u{903}", &["\u{1}", "\u{903}"]),
2838 ("\u{1}\u{1100}", &["\u{1}", "\u{1100}"]),
2839 ("\u{1}\u{308}\u{1100}", &["\u{1}", "\u{308}", "\u{1100}"]),
2840 ("\u{1}\u{1160}", &["\u{1}", "\u{1160}"]),
2841 ("\u{1}\u{308}\u{1160}", &["\u{1}", "\u{308}", "\u{1160}"]),
2842 ("\u{1}\u{11A8}", &["\u{1}", "\u{11A8}"]),
2843 ("\u{1}\u{308}\u{11A8}", &["\u{1}", "\u{308}", "\u{11A8}"]),
2844 ("\u{1}\u{AC00}", &["\u{1}", "\u{AC00}"]),
2845 ("\u{1}\u{308}\u{AC00}", &["\u{1}", "\u{308}", "\u{AC00}"]),
2846 ("\u{1}\u{AC01}", &["\u{1}", "\u{AC01}"]),
2847 ("\u{1}\u{308}\u{AC01}", &["\u{1}", "\u{308}", "\u{AC01}"]),
2848 ("\u{1}\u{1F1E6}", &["\u{1}", "\u{1F1E6}"]),
2849 ("\u{1}\u{308}\u{1F1E6}", &["\u{1}", "\u{308}", "\u{1F1E6}"]),
2850 ("\u{1}\u{378}", &["\u{1}", "\u{378}"]),
2851 ("\u{1}\u{308}\u{378}", &["\u{1}", "\u{308}", "\u{378}"]),
2852 ("\u{300}\u{20}", &["\u{300}", "\u{20}"]),
2853 ("\u{300}\u{308}\u{20}", &["\u{300}\u{308}", "\u{20}"]),
2854 ("\u{300}\u{D}", &["\u{300}", "\u{D}"]),
2855 ("\u{300}\u{308}\u{D}", &["\u{300}\u{308}", "\u{D}"]),
2856 ("\u{300}\u{A}", &["\u{300}", "\u{A}"]),
2857 ("\u{300}\u{308}\u{A}", &["\u{300}\u{308}", "\u{A}"]),
2858 ("\u{300}\u{1}", &["\u{300}", "\u{1}"]),
2859 ("\u{300}\u{308}\u{1}", &["\u{300}\u{308}", "\u{1}"]),
2860 ("\u{300}\u{300}", &["\u{300}\u{300}"]),
2861 ("\u{300}\u{308}\u{300}", &["\u{300}\u{308}\u{300}"]),
2862 ("\u{300}\u{1100}", &["\u{300}", "\u{1100}"]),
2863 ("\u{300}\u{308}\u{1100}", &["\u{300}\u{308}", "\u{1100}"]),
2864 ("\u{300}\u{1160}", &["\u{300}", "\u{1160}"]),
2865 ("\u{300}\u{308}\u{1160}", &["\u{300}\u{308}", "\u{1160}"]),
2866 ("\u{300}\u{11A8}", &["\u{300}", "\u{11A8}"]),
2867 ("\u{300}\u{308}\u{11A8}", &["\u{300}\u{308}", "\u{11A8}"]),
2868 ("\u{300}\u{AC00}", &["\u{300}", "\u{AC00}"]),
2869 ("\u{300}\u{308}\u{AC00}", &["\u{300}\u{308}", "\u{AC00}"]),
2870 ("\u{300}\u{AC01}", &["\u{300}", "\u{AC01}"]),
2871 ("\u{300}\u{308}\u{AC01}", &["\u{300}\u{308}", "\u{AC01}"]),
2872 ("\u{300}\u{1F1E6}", &["\u{300}", "\u{1F1E6}"]),
2873 ("\u{300}\u{308}\u{1F1E6}", &["\u{300}\u{308}", "\u{1F1E6}"]),
2874 ("\u{300}\u{378}", &["\u{300}", "\u{378}"]),
2875 ("\u{300}\u{308}\u{378}", &["\u{300}\u{308}", "\u{378}"]),
2876 ("\u{903}\u{20}", &["\u{903}", "\u{20}"]),
2877 ("\u{903}\u{308}\u{20}", &["\u{903}\u{308}", "\u{20}"]),
2878 ("\u{903}\u{D}", &["\u{903}", "\u{D}"]),
2879 ("\u{903}\u{308}\u{D}", &["\u{903}\u{308}", "\u{D}"]),
2880 ("\u{903}\u{A}", &["\u{903}", "\u{A}"]),
2881 ("\u{903}\u{308}\u{A}", &["\u{903}\u{308}", "\u{A}"]),
2882 ("\u{903}\u{1}", &["\u{903}", "\u{1}"]),
2883 ("\u{903}\u{308}\u{1}", &["\u{903}\u{308}", "\u{1}"]),
2884 ("\u{903}\u{300}", &["\u{903}\u{300}"]),
2885 ("\u{903}\u{308}\u{300}", &["\u{903}\u{308}\u{300}"]),
2886 ("\u{903}\u{1100}", &["\u{903}", "\u{1100}"]),
2887 ("\u{903}\u{308}\u{1100}", &["\u{903}\u{308}", "\u{1100}"]),
2888 ("\u{903}\u{1160}", &["\u{903}", "\u{1160}"]),
2889 ("\u{903}\u{308}\u{1160}", &["\u{903}\u{308}", "\u{1160}"]),
2890 ("\u{903}\u{11A8}", &["\u{903}", "\u{11A8}"]),
2891 ("\u{903}\u{308}\u{11A8}", &["\u{903}\u{308}", "\u{11A8}"]),
2892 ("\u{903}\u{AC00}", &["\u{903}", "\u{AC00}"]),
2893 ("\u{903}\u{308}\u{AC00}", &["\u{903}\u{308}", "\u{AC00}"]),
2894 ("\u{903}\u{AC01}", &["\u{903}", "\u{AC01}"]),
2895 ("\u{903}\u{308}\u{AC01}", &["\u{903}\u{308}", "\u{AC01}"]),
2896 ("\u{903}\u{1F1E6}", &["\u{903}", "\u{1F1E6}"]),
2897 ("\u{903}\u{308}\u{1F1E6}", &["\u{903}\u{308}", "\u{1F1E6}"]),
2898 ("\u{903}\u{378}", &["\u{903}", "\u{378}"]),
2899 ("\u{903}\u{308}\u{378}", &["\u{903}\u{308}", "\u{378}"]),
2900 ("\u{1100}\u{20}", &["\u{1100}", "\u{20}"]),
2901 ("\u{1100}\u{308}\u{20}", &["\u{1100}\u{308}", "\u{20}"]),
2902 ("\u{1100}\u{D}", &["\u{1100}", "\u{D}"]),
2903 ("\u{1100}\u{308}\u{D}", &["\u{1100}\u{308}", "\u{D}"]),
2904 ("\u{1100}\u{A}", &["\u{1100}", "\u{A}"]),
2905 ("\u{1100}\u{308}\u{A}", &["\u{1100}\u{308}", "\u{A}"]),
2906 ("\u{1100}\u{1}", &["\u{1100}", "\u{1}"]),
2907 ("\u{1100}\u{308}\u{1}", &["\u{1100}\u{308}", "\u{1}"]),
2908 ("\u{1100}\u{300}", &["\u{1100}\u{300}"]),
2909 ("\u{1100}\u{308}\u{300}", &["\u{1100}\u{308}\u{300}"]),
2910 ("\u{1100}\u{1100}", &["\u{1100}\u{1100}"]),
2911 ("\u{1100}\u{308}\u{1100}", &["\u{1100}\u{308}", "\u{1100}"]),
2912 ("\u{1100}\u{1160}", &["\u{1100}\u{1160}"]),
2913 ("\u{1100}\u{308}\u{1160}", &["\u{1100}\u{308}", "\u{1160}"]),
2914 ("\u{1100}\u{11A8}", &["\u{1100}", "\u{11A8}"]),
2915 ("\u{1100}\u{308}\u{11A8}", &["\u{1100}\u{308}", "\u{11A8}"]),
2916 ("\u{1100}\u{AC00}", &["\u{1100}\u{AC00}"]),
2917 ("\u{1100}\u{308}\u{AC00}", &["\u{1100}\u{308}", "\u{AC00}"]),
2918 ("\u{1100}\u{AC01}", &["\u{1100}\u{AC01}"]),
2919 ("\u{1100}\u{308}\u{AC01}", &["\u{1100}\u{308}", "\u{AC01}"]),
2920 ("\u{1100}\u{1F1E6}", &["\u{1100}", "\u{1F1E6}"]),
2921 ("\u{1100}\u{308}\u{1F1E6}", &["\u{1100}\u{308}", "\u{1F1E6}"]),
2922 ("\u{1100}\u{378}", &["\u{1100}", "\u{378}"]),
2923 ("\u{1100}\u{308}\u{378}", &["\u{1100}\u{308}", "\u{378}"]),
2924 ("\u{1160}\u{20}", &["\u{1160}", "\u{20}"]),
2925 ("\u{1160}\u{308}\u{20}", &["\u{1160}\u{308}", "\u{20}"]),
2926 ("\u{1160}\u{D}", &["\u{1160}", "\u{D}"]),
2927 ("\u{1160}\u{308}\u{D}", &["\u{1160}\u{308}", "\u{D}"]),
2928 ("\u{1160}\u{A}", &["\u{1160}", "\u{A}"]),
2929 ("\u{1160}\u{308}\u{A}", &["\u{1160}\u{308}", "\u{A}"]),
2930 ("\u{1160}\u{1}", &["\u{1160}", "\u{1}"]),
2931 ("\u{1160}\u{308}\u{1}", &["\u{1160}\u{308}", "\u{1}"]),
2932 ("\u{1160}\u{300}", &["\u{1160}\u{300}"]),
2933 ("\u{1160}\u{308}\u{300}", &["\u{1160}\u{308}\u{300}"]),
2934 ("\u{1160}\u{1100}", &["\u{1160}", "\u{1100}"]),
2935 ("\u{1160}\u{308}\u{1100}", &["\u{1160}\u{308}", "\u{1100}"]),
2936 ("\u{1160}\u{1160}", &["\u{1160}\u{1160}"]),
2937 ("\u{1160}\u{308}\u{1160}", &["\u{1160}\u{308}", "\u{1160}"]),
2938 ("\u{1160}\u{11A8}", &["\u{1160}\u{11A8}"]),
2939 ("\u{1160}\u{308}\u{11A8}", &["\u{1160}\u{308}", "\u{11A8}"]),
2940 ("\u{1160}\u{AC00}", &["\u{1160}", "\u{AC00}"]),
2941 ("\u{1160}\u{308}\u{AC00}", &["\u{1160}\u{308}", "\u{AC00}"]),
2942 ("\u{1160}\u{AC01}", &["\u{1160}", "\u{AC01}"]),
2943 ("\u{1160}\u{308}\u{AC01}", &["\u{1160}\u{308}", "\u{AC01}"]),
2944 ("\u{1160}\u{1F1E6}", &["\u{1160}", "\u{1F1E6}"]),
2945 ("\u{1160}\u{308}\u{1F1E6}", &["\u{1160}\u{308}", "\u{1F1E6}"]),
2946 ("\u{1160}\u{378}", &["\u{1160}", "\u{378}"]),
2947 ("\u{1160}\u{308}\u{378}", &["\u{1160}\u{308}", "\u{378}"]),
2948 ("\u{11A8}\u{20}", &["\u{11A8}", "\u{20}"]),
2949 ("\u{11A8}\u{308}\u{20}", &["\u{11A8}\u{308}", "\u{20}"]),
2950 ("\u{11A8}\u{D}", &["\u{11A8}", "\u{D}"]),
2951 ("\u{11A8}\u{308}\u{D}", &["\u{11A8}\u{308}", "\u{D}"]),
2952 ("\u{11A8}\u{A}", &["\u{11A8}", "\u{A}"]),
2953 ("\u{11A8}\u{308}\u{A}", &["\u{11A8}\u{308}", "\u{A}"]),
2954 ("\u{11A8}\u{1}", &["\u{11A8}", "\u{1}"]),
2955 ("\u{11A8}\u{308}\u{1}", &["\u{11A8}\u{308}", "\u{1}"]),
2956 ("\u{11A8}\u{300}", &["\u{11A8}\u{300}"]),
2957 ("\u{11A8}\u{308}\u{300}", &["\u{11A8}\u{308}\u{300}"]),
2958 ("\u{11A8}\u{1100}", &["\u{11A8}", "\u{1100}"]),
2959 ("\u{11A8}\u{308}\u{1100}", &["\u{11A8}\u{308}", "\u{1100}"]),
2960 ("\u{11A8}\u{1160}", &["\u{11A8}", "\u{1160}"]),
2961 ("\u{11A8}\u{308}\u{1160}", &["\u{11A8}\u{308}", "\u{1160}"]),
2962 ("\u{11A8}\u{11A8}", &["\u{11A8}\u{11A8}"]),
2963 ("\u{11A8}\u{308}\u{11A8}", &["\u{11A8}\u{308}", "\u{11A8}"]),
2964 ("\u{11A8}\u{AC00}", &["\u{11A8}", "\u{AC00}"]),
2965 ("\u{11A8}\u{308}\u{AC00}", &["\u{11A8}\u{308}", "\u{AC00}"]),
2966 ("\u{11A8}\u{AC01}", &["\u{11A8}", "\u{AC01}"]),
2967 ("\u{11A8}\u{308}\u{AC01}", &["\u{11A8}\u{308}", "\u{AC01}"]),
2968 ("\u{11A8}\u{1F1E6}", &["\u{11A8}", "\u{1F1E6}"]),
2969 ("\u{11A8}\u{308}\u{1F1E6}", &["\u{11A8}\u{308}", "\u{1F1E6}"]),
2970 ("\u{11A8}\u{378}", &["\u{11A8}", "\u{378}"]),
2971 ("\u{11A8}\u{308}\u{378}", &["\u{11A8}\u{308}", "\u{378}"]),
2972 ("\u{AC00}\u{20}", &["\u{AC00}", "\u{20}"]),
2973 ("\u{AC00}\u{308}\u{20}", &["\u{AC00}\u{308}", "\u{20}"]),
2974 ("\u{AC00}\u{D}", &["\u{AC00}", "\u{D}"]),
2975 ("\u{AC00}\u{308}\u{D}", &["\u{AC00}\u{308}", "\u{D}"]),
2976 ("\u{AC00}\u{A}", &["\u{AC00}", "\u{A}"]),
2977 ("\u{AC00}\u{308}\u{A}", &["\u{AC00}\u{308}", "\u{A}"]),
2978 ("\u{AC00}\u{1}", &["\u{AC00}", "\u{1}"]),
2979 ("\u{AC00}\u{308}\u{1}", &["\u{AC00}\u{308}", "\u{1}"]),
2980 ("\u{AC00}\u{300}", &["\u{AC00}\u{300}"]),
2981 ("\u{AC00}\u{308}\u{300}", &["\u{AC00}\u{308}\u{300}"]),
2982 ("\u{AC00}\u{1100}", &["\u{AC00}", "\u{1100}"]),
2983 ("\u{AC00}\u{308}\u{1100}", &["\u{AC00}\u{308}", "\u{1100}"]),
2984 ("\u{AC00}\u{1160}", &["\u{AC00}\u{1160}"]),
2985 ("\u{AC00}\u{308}\u{1160}", &["\u{AC00}\u{308}", "\u{1160}"]),
2986 ("\u{AC00}\u{11A8}", &["\u{AC00}\u{11A8}"]),
2987 ("\u{AC00}\u{308}\u{11A8}", &["\u{AC00}\u{308}", "\u{11A8}"]),
2988 ("\u{AC00}\u{AC00}", &["\u{AC00}", "\u{AC00}"]),
2989 ("\u{AC00}\u{308}\u{AC00}", &["\u{AC00}\u{308}", "\u{AC00}"]),
2990 ("\u{AC00}\u{AC01}", &["\u{AC00}", "\u{AC01}"]),
2991 ("\u{AC00}\u{308}\u{AC01}", &["\u{AC00}\u{308}", "\u{AC01}"]),
2992 ("\u{AC00}\u{1F1E6}", &["\u{AC00}", "\u{1F1E6}"]),
2993 ("\u{AC00}\u{308}\u{1F1E6}", &["\u{AC00}\u{308}", "\u{1F1E6}"]),
2994 ("\u{AC00}\u{378}", &["\u{AC00}", "\u{378}"]),
2995 ("\u{AC00}\u{308}\u{378}", &["\u{AC00}\u{308}", "\u{378}"]),
2996 ("\u{AC01}\u{20}", &["\u{AC01}", "\u{20}"]),
2997 ("\u{AC01}\u{308}\u{20}", &["\u{AC01}\u{308}", "\u{20}"]),
2998 ("\u{AC01}\u{D}", &["\u{AC01}", "\u{D}"]),
2999 ("\u{AC01}\u{308}\u{D}", &["\u{AC01}\u{308}", "\u{D}"]),
3000 ("\u{AC01}\u{A}", &["\u{AC01}", "\u{A}"]),
3001 ("\u{AC01}\u{308}\u{A}", &["\u{AC01}\u{308}", "\u{A}"]),
3002 ("\u{AC01}\u{1}", &["\u{AC01}", "\u{1}"]),
3003 ("\u{AC01}\u{308}\u{1}", &["\u{AC01}\u{308}", "\u{1}"]),
3004 ("\u{AC01}\u{300}", &["\u{AC01}\u{300}"]),
3005 ("\u{AC01}\u{308}\u{300}", &["\u{AC01}\u{308}\u{300}"]),
3006 ("\u{AC01}\u{1100}", &["\u{AC01}", "\u{1100}"]),
3007 ("\u{AC01}\u{308}\u{1100}", &["\u{AC01}\u{308}", "\u{1100}"]),
3008 ("\u{AC01}\u{1160}", &["\u{AC01}", "\u{1160}"]),
3009 ("\u{AC01}\u{308}\u{1160}", &["\u{AC01}\u{308}", "\u{1160}"]),
3010 ("\u{AC01}\u{11A8}", &["\u{AC01}\u{11A8}"]),
3011 ("\u{AC01}\u{308}\u{11A8}", &["\u{AC01}\u{308}", "\u{11A8}"]),
3012 ("\u{AC01}\u{AC00}", &["\u{AC01}", "\u{AC00}"]),
3013 ("\u{AC01}\u{308}\u{AC00}", &["\u{AC01}\u{308}", "\u{AC00}"]),
3014 ("\u{AC01}\u{AC01}", &["\u{AC01}", "\u{AC01}"]),
3015 ("\u{AC01}\u{308}\u{AC01}", &["\u{AC01}\u{308}", "\u{AC01}"]),
3016 ("\u{AC01}\u{1F1E6}", &["\u{AC01}", "\u{1F1E6}"]),
3017 ("\u{AC01}\u{308}\u{1F1E6}", &["\u{AC01}\u{308}", "\u{1F1E6}"]),
3018 ("\u{AC01}\u{378}", &["\u{AC01}", "\u{378}"]),
3019 ("\u{AC01}\u{308}\u{378}", &["\u{AC01}\u{308}", "\u{378}"]),
3020 ("\u{1F1E6}\u{20}", &["\u{1F1E6}", "\u{20}"]),
3021 ("\u{1F1E6}\u{308}\u{20}", &["\u{1F1E6}\u{308}", "\u{20}"]),
3022 ("\u{1F1E6}\u{D}", &["\u{1F1E6}", "\u{D}"]),
3023 ("\u{1F1E6}\u{308}\u{D}", &["\u{1F1E6}\u{308}", "\u{D}"]),
3024 ("\u{1F1E6}\u{A}", &["\u{1F1E6}", "\u{A}"]),
3025 ("\u{1F1E6}\u{308}\u{A}", &["\u{1F1E6}\u{308}", "\u{A}"]),
3026 ("\u{1F1E6}\u{1}", &["\u{1F1E6}", "\u{1}"]),
3027 ("\u{1F1E6}\u{308}\u{1}", &["\u{1F1E6}\u{308}", "\u{1}"]),
3028 ("\u{1F1E6}\u{300}", &["\u{1F1E6}\u{300}"]),
3029 ("\u{1F1E6}\u{308}\u{300}", &["\u{1F1E6}\u{308}\u{300}"]),
3030 ("\u{1F1E6}\u{1100}", &["\u{1F1E6}", "\u{1100}"]),
3031 ("\u{1F1E6}\u{308}\u{1100}", &["\u{1F1E6}\u{308}", "\u{1100}"]),
3032 ("\u{1F1E6}\u{1160}", &["\u{1F1E6}", "\u{1160}"]),
3033 ("\u{1F1E6}\u{308}\u{1160}", &["\u{1F1E6}\u{308}", "\u{1160}"]),
3034 ("\u{1F1E6}\u{11A8}", &["\u{1F1E6}", "\u{11A8}"]),
3035 ("\u{1F1E6}\u{308}\u{11A8}", &["\u{1F1E6}\u{308}", "\u{11A8}"]),
3036 ("\u{1F1E6}\u{AC00}", &["\u{1F1E6}", "\u{AC00}"]),
3037 ("\u{1F1E6}\u{308}\u{AC00}", &["\u{1F1E6}\u{308}", "\u{AC00}"]),
3038 ("\u{1F1E6}\u{AC01}", &["\u{1F1E6}", "\u{AC01}"]),
3039 ("\u{1F1E6}\u{308}\u{AC01}", &["\u{1F1E6}\u{308}", "\u{AC01}"]),
3040 ("\u{1F1E6}\u{1F1E6}", &["\u{1F1E6}\u{1F1E6}"]),
3041 ("\u{1F1E6}\u{308}\u{1F1E6}", &["\u{1F1E6}\u{308}", "\u{1F1E6}"]),
3042 ("\u{1F1E6}\u{378}", &["\u{1F1E6}", "\u{378}"]),
3043 ("\u{1F1E6}\u{308}\u{378}", &["\u{1F1E6}\u{308}", "\u{378}"]),
3044 ("\u{378}\u{20}", &["\u{378}", "\u{20}"]),
3045 ("\u{378}\u{308}\u{20}", &["\u{378}\u{308}", "\u{20}"]),
3046 ("\u{378}\u{D}", &["\u{378}", "\u{D}"]),
3047 ("\u{378}\u{308}\u{D}", &["\u{378}\u{308}", "\u{D}"]),
3048 ("\u{378}\u{A}", &["\u{378}", "\u{A}"]),
3049 ("\u{378}\u{308}\u{A}", &["\u{378}\u{308}", "\u{A}"]),
3050 ("\u{378}\u{1}", &["\u{378}", "\u{1}"]),
3051 ("\u{378}\u{308}\u{1}", &["\u{378}\u{308}", "\u{1}"]),
3052 ("\u{378}\u{300}", &["\u{378}\u{300}"]),
3053 ("\u{378}\u{308}\u{300}", &["\u{378}\u{308}\u{300}"]),
3054 ("\u{378}\u{1100}", &["\u{378}", "\u{1100}"]),
3055 ("\u{378}\u{308}\u{1100}", &["\u{378}\u{308}", "\u{1100}"]),
3056 ("\u{378}\u{1160}", &["\u{378}", "\u{1160}"]),
3057 ("\u{378}\u{308}\u{1160}", &["\u{378}\u{308}", "\u{1160}"]),
3058 ("\u{378}\u{11A8}", &["\u{378}", "\u{11A8}"]),
3059 ("\u{378}\u{308}\u{11A8}", &["\u{378}\u{308}", "\u{11A8}"]),
3060 ("\u{378}\u{AC00}", &["\u{378}", "\u{AC00}"]),
3061 ("\u{378}\u{308}\u{AC00}", &["\u{378}\u{308}", "\u{AC00}"]),
3062 ("\u{378}\u{AC01}", &["\u{378}", "\u{AC01}"]),
3063 ("\u{378}\u{308}\u{AC01}", &["\u{378}\u{308}", "\u{AC01}"]),
3064 ("\u{378}\u{1F1E6}", &["\u{378}", "\u{1F1E6}"]),
3065 ("\u{378}\u{308}\u{1F1E6}", &["\u{378}\u{308}", "\u{1F1E6}"]),
3066 ("\u{378}\u{378}", &["\u{378}", "\u{378}"]),
3067 ("\u{378}\u{308}\u{378}", &["\u{378}\u{308}", "\u{378}"]),
3068 ("\u{61}\u{1F1E6}\u{62}", &["\u{61}", "\u{1F1E6}", "\u{62}"]),
3069 ("\u{1F1F7}\u{1F1FA}", &["\u{1F1F7}\u{1F1FA}"]),
3070 ("\u{1F1F7}\u{1F1FA}\u{1F1F8}", &["\u{1F1F7}\u{1F1FA}\u{1F1F8}"]),
3071 ("\u{1F1F7}\u{1F1FA}\u{1F1F8}\u{1F1EA}",
3072 &["\u{1F1F7}\u{1F1FA}\u{1F1F8}\u{1F1EA}"]),
3073 ("\u{1F1F7}\u{1F1FA}\u{200B}\u{1F1F8}\u{1F1EA}",
3074 &["\u{1F1F7}\u{1F1FA}", "\u{200B}", "\u{1F1F8}\u{1F1EA}"]),
3075 ("\u{1F1E6}\u{1F1E7}\u{1F1E8}", &["\u{1F1E6}\u{1F1E7}\u{1F1E8}"]),
3076 ("\u{1F1E6}\u{200D}\u{1F1E7}\u{1F1E8}", &["\u{1F1E6}\u{200D}",
3077 "\u{1F1E7}\u{1F1E8}"]),
3078 ("\u{1F1E6}\u{1F1E7}\u{200D}\u{1F1E8}",
3079 &["\u{1F1E6}\u{1F1E7}\u{200D}", "\u{1F1E8}"]),
3080 ("\u{20}\u{200D}\u{646}", &["\u{20}\u{200D}", "\u{646}"]),
3081 ("\u{646}\u{200D}\u{20}", &["\u{646}\u{200D}", "\u{20}"]),
3084 let test_diff: [(_, &[_], &[_]); 23] = [
3085 ("\u{20}\u{903}", &["\u{20}\u{903}"], &["\u{20}", "\u{903}"]), ("\u{20}\u{308}\u{903}",
3086 &["\u{20}\u{308}\u{903}"], &["\u{20}\u{308}", "\u{903}"]), ("\u{D}\u{308}\u{903}",
3087 &["\u{D}", "\u{308}\u{903}"], &["\u{D}", "\u{308}", "\u{903}"]), ("\u{A}\u{308}\u{903}",
3088 &["\u{A}", "\u{308}\u{903}"], &["\u{A}", "\u{308}", "\u{903}"]), ("\u{1}\u{308}\u{903}",
3089 &["\u{1}", "\u{308}\u{903}"], &["\u{1}", "\u{308}", "\u{903}"]), ("\u{300}\u{903}",
3090 &["\u{300}\u{903}"], &["\u{300}", "\u{903}"]), ("\u{300}\u{308}\u{903}",
3091 &["\u{300}\u{308}\u{903}"], &["\u{300}\u{308}", "\u{903}"]), ("\u{903}\u{903}",
3092 &["\u{903}\u{903}"], &["\u{903}", "\u{903}"]), ("\u{903}\u{308}\u{903}",
3093 &["\u{903}\u{308}\u{903}"], &["\u{903}\u{308}", "\u{903}"]), ("\u{1100}\u{903}",
3094 &["\u{1100}\u{903}"], &["\u{1100}", "\u{903}"]), ("\u{1100}\u{308}\u{903}",
3095 &["\u{1100}\u{308}\u{903}"], &["\u{1100}\u{308}", "\u{903}"]), ("\u{1160}\u{903}",
3096 &["\u{1160}\u{903}"], &["\u{1160}", "\u{903}"]), ("\u{1160}\u{308}\u{903}",
3097 &["\u{1160}\u{308}\u{903}"], &["\u{1160}\u{308}", "\u{903}"]), ("\u{11A8}\u{903}",
3098 &["\u{11A8}\u{903}"], &["\u{11A8}", "\u{903}"]), ("\u{11A8}\u{308}\u{903}",
3099 &["\u{11A8}\u{308}\u{903}"], &["\u{11A8}\u{308}", "\u{903}"]), ("\u{AC00}\u{903}",
3100 &["\u{AC00}\u{903}"], &["\u{AC00}", "\u{903}"]), ("\u{AC00}\u{308}\u{903}",
3101 &["\u{AC00}\u{308}\u{903}"], &["\u{AC00}\u{308}", "\u{903}"]), ("\u{AC01}\u{903}",
3102 &["\u{AC01}\u{903}"], &["\u{AC01}", "\u{903}"]), ("\u{AC01}\u{308}\u{903}",
3103 &["\u{AC01}\u{308}\u{903}"], &["\u{AC01}\u{308}", "\u{903}"]), ("\u{1F1E6}\u{903}",
3104 &["\u{1F1E6}\u{903}"], &["\u{1F1E6}", "\u{903}"]), ("\u{1F1E6}\u{308}\u{903}",
3105 &["\u{1F1E6}\u{308}\u{903}"], &["\u{1F1E6}\u{308}", "\u{903}"]), ("\u{378}\u{903}",
3106 &["\u{378}\u{903}"], &["\u{378}", "\u{903}"]), ("\u{378}\u{308}\u{903}",
3107 &["\u{378}\u{308}\u{903}"], &["\u{378}\u{308}", "\u{903}"]),
3110 for &(s, g) in test_same.iter() {
3111 // test forward iterator
3112 assert!(order::equals(s.graphemes(true), g.iter().map(|&x| x)));
3113 assert!(order::equals(s.graphemes(false), g.iter().map(|&x| x)));
3115 // test reverse iterator
3116 assert!(order::equals(s.graphemes(true).rev(), g.iter().rev().map(|&x| x)));
3117 assert!(order::equals(s.graphemes(false).rev(), g.iter().rev().map(|&x| x)));
3120 for &(s, gt, gf) in test_diff.iter() {
3121 // test forward iterator
3122 assert!(order::equals(s.graphemes(true), gt.iter().map(|&x| x)));
3123 assert!(order::equals(s.graphemes(false), gf.iter().map(|&x| x)));
3125 // test reverse iterator
3126 assert!(order::equals(s.graphemes(true).rev(), gt.iter().rev().map(|&x| x)));
3127 assert!(order::equals(s.graphemes(false).rev(), gf.iter().rev().map(|&x| x)));
3130 // test the indices iterators
3131 let s = "a̐éö̲\r\n";
3132 let gr_inds = s.grapheme_indices(true).collect::<Vec<(uint, &str)>>();
3133 let b: &[_] = &[(0u, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
3134 assert_eq!(gr_inds, b);
3135 let gr_inds = s.grapheme_indices(true).rev().collect::<Vec<(uint, &str)>>();
3136 let b: &[_] = &[(11, "\r\n"), (6, "ö̲"), (3, "é"), (0u, "a̐")];
3137 assert_eq!(gr_inds, b);
3138 let mut gr_inds_iter = s.grapheme_indices(true);
3140 let gr_inds = gr_inds_iter.by_ref();
3141 let e1 = gr_inds.size_hint();
3142 assert_eq!(e1, (1, Some(13)));
3143 let c = gr_inds.count();
3146 let e2 = gr_inds_iter.size_hint();
3147 assert_eq!(e2, (0, Some(0)));
3149 // make sure the reverse iterator does the right thing with "\n" at beginning of string
3151 let gr = s.graphemes(true).rev().collect::<Vec<&str>>();
3152 let b: &[_] = &["\r", "\r\n", "\n"];
3157 fn test_split_strator() {
3158 fn t(s: &str, sep: &str, u: &[&str]) {
3159 let v: Vec<&str> = s.split_str(sep).collect();
3162 t("--1233345--", "12345", &["--1233345--"]);
3163 t("abc::hello::there", "::", &["abc", "hello", "there"]);
3164 t("::hello::there", "::", &["", "hello", "there"]);
3165 t("hello::there::", "::", &["hello", "there", ""]);
3166 t("::hello::there::", "::", &["", "hello", "there", ""]);
3167 t("ประเทศไทย中华Việt Nam", "中华", &["ประเทศไทย", "Việt Nam"]);
3168 t("zzXXXzzYYYzz", "zz", &["", "XXX", "YYY", ""]);
3169 t("zzXXXzYYYz", "XXX", &["zz", "zYYYz"]);
3170 t(".XXX.YYY.", ".", &["", "XXX", "YYY", ""]);
3172 t("zz", "zz", &["",""]);
3173 t("ok", "z", &["ok"]);
3174 t("zzz", "zz", &["","z"]);
3175 t("zzzzz", "zz", &["","","z"]);
3179 fn test_str_default() {
3180 use core::default::Default;
3181 fn t<S: Default + Str>() {
3182 let s: S = Default::default();
3183 assert_eq!(s.as_slice(), "");
3191 fn test_str_container() {
3192 fn sum_len(v: &[&str]) -> uint {
3193 v.iter().map(|x| x.len()).sum()
3196 let s = String::from_str("01234");
3197 assert_eq!(5, sum_len(&["012", "", "34"]));
3198 assert_eq!(5, sum_len(&[String::from_str("01").as_slice(),
3199 String::from_str("2").as_slice(),
3200 String::from_str("34").as_slice(),
3201 String::from_str("").as_slice()]));
3202 assert_eq!(5, sum_len(&[s.as_slice()]));
3206 fn test_str_from_utf8() {
3208 assert_eq!(from_utf8(xs), Ok("hello"));
3210 let xs = "ศไทย中华Việt Nam".as_bytes();
3211 assert_eq!(from_utf8(xs), Ok("ศไทย中华Việt Nam"));
3213 let xs = b"hello\xFF";
3214 assert_eq!(from_utf8(xs), Err(Utf8Error::TooShort));
3218 fn test_maybe_owned_traits() {
3219 let s = Slice("abcde");
3220 assert_eq!(s.len(), 5);
3221 assert_eq!(s.as_slice(), "abcde");
3222 assert_eq!(String::from_str(s.as_slice()).as_slice(), "abcde");
3223 assert_eq!(format!("{}", s).as_slice(), "abcde");
3224 assert!(s.lt(&Owned(String::from_str("bcdef"))));
3225 assert_eq!(Slice(""), Default::default());
3227 let o = Owned(String::from_str("abcde"));
3228 assert_eq!(o.len(), 5);
3229 assert_eq!(o.as_slice(), "abcde");
3230 assert_eq!(String::from_str(o.as_slice()).as_slice(), "abcde");
3231 assert_eq!(format!("{}", o).as_slice(), "abcde");
3232 assert!(o.lt(&Slice("bcdef")));
3233 assert_eq!(Owned(String::from_str("")), Default::default());
3235 assert!(s.cmp(&o) == Equal);
3236 assert!(s.equiv(&o));
3238 assert!(o.cmp(&s) == Equal);
3239 assert!(o.equiv(&s));
3243 fn test_maybe_owned_methods() {
3244 let s = Slice("abcde");
3245 assert!(s.is_slice());
3246 assert!(!s.is_owned());
3248 let o = Owned(String::from_str("abcde"));
3249 assert!(!o.is_slice());
3250 assert!(o.is_owned());
3254 fn test_maybe_owned_clone() {
3255 assert_eq!(Owned(String::from_str("abcde")), Slice("abcde").clone());
3256 assert_eq!(Owned(String::from_str("abcde")), Owned(String::from_str("abcde")).clone());
3257 assert_eq!(Slice("abcde"), Slice("abcde").clone());
3258 assert_eq!(Slice("abcde"), Owned(String::from_str("abcde")).clone());
3262 fn test_maybe_owned_into_string() {
3263 assert_eq!(Slice("abcde").to_string(), String::from_str("abcde"));
3264 assert_eq!(Owned(String::from_str("abcde")).to_string(),
3265 String::from_str("abcde"));
3269 fn test_into_maybe_owned() {
3270 assert_eq!("abcde".into_maybe_owned(), Slice("abcde"));
3271 assert_eq!((String::from_str("abcde")).into_maybe_owned(), Slice("abcde"));
3272 assert_eq!("abcde".into_maybe_owned(), Owned(String::from_str("abcde")));
3273 assert_eq!((String::from_str("abcde")).into_maybe_owned(),
3274 Owned(String::from_str("abcde")));
3281 use prelude::{SliceExt, IteratorExt, SliceConcatExt};
3283 use test::black_box;
3286 fn char_iterator(b: &mut Bencher) {
3287 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
3289 b.iter(|| s.chars().count());
3293 fn char_iterator_for(b: &mut Bencher) {
3294 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
3297 for ch in s.chars() { black_box(ch) }
3302 fn char_iterator_ascii(b: &mut Bencher) {
3303 let s = "Mary had a little lamb, Little lamb
3304 Mary had a little lamb, Little lamb
3305 Mary had a little lamb, Little lamb
3306 Mary had a little lamb, Little lamb
3307 Mary had a little lamb, Little lamb
3308 Mary had a little lamb, Little lamb";
3310 b.iter(|| s.chars().count());
3314 fn char_iterator_rev(b: &mut Bencher) {
3315 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
3317 b.iter(|| s.chars().rev().count());
3321 fn char_iterator_rev_for(b: &mut Bencher) {
3322 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
3325 for ch in s.chars().rev() { black_box(ch) }
3330 fn char_indicesator(b: &mut Bencher) {
3331 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
3332 let len = s.char_len();
3334 b.iter(|| assert_eq!(s.char_indices().count(), len));
3338 fn char_indicesator_rev(b: &mut Bencher) {
3339 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
3340 let len = s.char_len();
3342 b.iter(|| assert_eq!(s.char_indices().rev().count(), len));
3346 fn split_unicode_ascii(b: &mut Bencher) {
3347 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
3349 b.iter(|| assert_eq!(s.split('V').count(), 3));
3353 fn split_unicode_not_ascii(b: &mut Bencher) {
3354 struct NotAscii(char);
3355 impl CharEq for NotAscii {
3356 fn matches(&mut self, c: char) -> bool {
3357 let NotAscii(cc) = *self;
3360 fn only_ascii(&self) -> bool { false }
3362 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
3364 b.iter(|| assert_eq!(s.split(NotAscii('V')).count(), 3));
3369 fn split_ascii(b: &mut Bencher) {
3370 let s = "Mary had a little lamb, Little lamb, little-lamb.";
3371 let len = s.split(' ').count();
3373 b.iter(|| assert_eq!(s.split(' ').count(), len));
3377 fn split_not_ascii(b: &mut Bencher) {
3378 struct NotAscii(char);
3379 impl CharEq for NotAscii {
3381 fn matches(&mut self, c: char) -> bool {
3382 let NotAscii(cc) = *self;
3385 fn only_ascii(&self) -> bool { false }
3387 let s = "Mary had a little lamb, Little lamb, little-lamb.";
3388 let len = s.split(' ').count();
3390 b.iter(|| assert_eq!(s.split(NotAscii(' ')).count(), len));
3394 fn split_extern_fn(b: &mut Bencher) {
3395 let s = "Mary had a little lamb, Little lamb, little-lamb.";
3396 let len = s.split(' ').count();
3397 fn pred(c: char) -> bool { c == ' ' }
3399 b.iter(|| assert_eq!(s.split(pred).count(), len));
3403 fn split_closure(b: &mut Bencher) {
3404 let s = "Mary had a little lamb, Little lamb, little-lamb.";
3405 let len = s.split(' ').count();
3407 b.iter(|| assert_eq!(s.split(|&: c: char| c == ' ').count(), len));
3411 fn split_slice(b: &mut Bencher) {
3412 let s = "Mary had a little lamb, Little lamb, little-lamb.";
3413 let len = s.split(' ').count();
3415 let c: &[char] = &[' '];
3416 b.iter(|| assert_eq!(s.split(c).count(), len));
3420 fn is_utf8_100_ascii(b: &mut Bencher) {
3422 let s = b"Hello there, the quick brown fox jumped over the lazy dog! \
3423 Lorem ipsum dolor sit amet, consectetur. ";
3425 assert_eq!(100, s.len());
3432 fn is_utf8_100_multibyte(b: &mut Bencher) {
3433 let s = "𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰".as_bytes();
3434 assert_eq!(100, s.len());
3441 fn bench_connect(b: &mut Bencher) {
3442 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
3444 let v = vec![s, s, s, s, s, s, s, s, s, s];
3446 assert_eq!(v.connect(sep).len(), s.len() * 10 + sep.len() * 9);
3451 fn bench_contains_short_short(b: &mut Bencher) {
3452 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
3456 assert!(haystack.contains(needle));
3461 fn bench_contains_short_long(b: &mut Bencher) {
3463 Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \
3464 ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \
3465 eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \
3466 sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \
3467 tempus vel, gravida nec quam.
3469 In est dui, tincidunt sed tempus interdum, adipiscing laoreet ante. Etiam tempor, tellus quis \
3470 sagittis interdum, nulla purus mattis sem, quis auctor erat odio ac tellus. In nec nunc sit amet \
3471 diam volutpat molestie at sed ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan \
3472 lorem ac dignissim placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat. In vel \
3473 eleifend felis. Sed suscipit nulla lorem, sed mollis est sollicitudin et. Nam fermentum egestas \
3474 interdum. Curabitur ut nisi justo.
3476 Sed sollicitudin ipsum tellus, ut condimentum leo eleifend nec. Cras ut velit ante. Phasellus nec \
3477 mollis odio. Mauris molestie erat in arcu mattis, at aliquet dolor vehicula. Quisque malesuada \
3478 lectus sit amet nisi pretium, a condimentum ipsum porta. Morbi at dapibus diam. Praesent egestas \
3479 est sed risus elementum, eu rutrum metus ultrices. Etiam fermentum consectetur magna, id rutrum \
3480 felis accumsan a. Aliquam ut pellentesque libero. Sed mi nulla, lobortis eu tortor id, suscipit \
3481 ultricies neque. Morbi iaculis sit amet risus at iaculis. Praesent eget ligula quis turpis \
3482 feugiat suscipit vel non arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. \
3483 Aliquam sit amet placerat lorem.
3485 Cras a lacus vel ante posuere elementum. Nunc est leo, bibendum ut facilisis vel, bibendum at \
3486 mauris. Nullam adipiscing diam vel odio ornare, luctus adipiscing mi luctus. Nulla facilisi. \
3487 Mauris adipiscing bibendum neque, quis adipiscing lectus tempus et. Sed feugiat erat et nisl \
3488 lobortis pharetra. Donec vitae erat enim. Nullam sit amet felis et quam lacinia tincidunt. Aliquam \
3489 suscipit dapibus urna. Sed volutpat urna in magna pulvinar volutpat. Phasellus nec tellus ac diam \
3492 Nam lectus enim, dapibus non nisi tempor, consectetur convallis massa. Maecenas eleifend dictum \
3493 feugiat. Etiam quis mauris vel risus luctus mattis a a nunc. Nullam orci quam, imperdiet id \
3494 vehicula in, porttitor ut nibh. Duis sagittis adipiscing nisl vitae congue. Donec mollis risus eu \
3495 leo suscipit, varius porttitor nulla porta. Pellentesque ut sem nec nisi euismod vehicula. Nulla \
3496 malesuada sollicitudin quam eu fermentum.";
3497 let needle = "english";
3500 assert!(!haystack.contains(needle));
3505 fn bench_contains_bad_naive(b: &mut Bencher) {
3506 let haystack = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
3507 let needle = "aaaaaaaab";
3510 assert!(!haystack.contains(needle));
3515 fn bench_contains_equal(b: &mut Bencher) {
3516 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
3517 let needle = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
3520 assert!(haystack.contains(needle));