1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 // ignore-lexer-test FIXME #15679
13 //! Unicode string manipulation (`str` type)
17 //! Rust's string type is one of the core primitive types of the language. While
18 //! represented by the name `str`, the name `str` is not actually a valid type in
19 //! Rust. Each string must also be decorated with a pointer. `String` is used
20 //! for an owned string, so there is only one commonly-used `str` type in Rust:
23 //! `&str` is the borrowed string type. This type of string can only be created
24 //! from other strings, unless it is a static string (see below). As the word
25 //! "borrowed" implies, this type of string is owned elsewhere, and this string
26 //! cannot be moved out of.
28 //! As an example, here's some code that uses a string.
32 //! let borrowed_string = "This string is borrowed with the 'static lifetime";
36 //! From the example above, you can see that Rust's string literals have the
37 //! `'static` lifetime. This is akin to C's concept of a static string.
39 //! String literals are allocated statically in the rodata of the
40 //! executable/library. The string then has the type `&'static str` meaning that
41 //! the string is valid for the `'static` lifetime, otherwise known as the
42 //! lifetime of the entire program. As can be inferred from the type, these static
43 //! strings are not mutable.
47 //! Rust's string type, `str`, is a sequence of Unicode scalar values encoded as a
48 //! stream of UTF-8 bytes. All strings are guaranteed to be validly encoded UTF-8
49 //! sequences. Additionally, strings are not null-terminated and can contain null
52 //! The actual representation of strings have direct mappings to slices: `&str`
53 //! is the same as `&[u8]`.
55 #![doc(primitive = "str")]
57 use core::default::Default;
60 use core::iter::AdditiveIterator;
61 use core::kinds::Sized;
62 use core::prelude::{Char, Clone, Collection, Eq, Equiv, ImmutableSlice};
63 use core::prelude::{Iterator, MutableSlice, None, Option, Ord, Ordering};
64 use core::prelude::{PartialEq, PartialOrd, Result, AsSlice, Some, Tuple2};
65 use core::prelude::{range};
67 use {Deque, MutableSeq};
74 pub use core::str::{from_utf8, CharEq, Chars, CharOffsets};
75 pub use core::str::{Bytes, CharSplits};
76 pub use core::str::{CharSplitsN, AnyLines, MatchIndices, StrSplits};
77 pub use core::str::{Utf16CodeUnits, eq_slice, is_utf8, is_utf16, Utf16Items};
78 pub use core::str::{Utf16Item, ScalarValue, LoneSurrogate, utf16_items};
79 pub use core::str::{truncate_utf16_at_nul, utf8_char_width, CharRange};
80 pub use core::str::{Str, StrSlice};
81 pub use unicode::str::{UnicodeStrSlice, Words, Graphemes, GraphemeIndices};
84 Section: Creating a string
87 /// Methods for vectors of strings.
88 pub trait StrVector for Sized? {
89 /// Concatenates a vector of strings.
94 /// let first = "Restaurant at the End of the".to_string();
95 /// let second = " Universe".to_string();
96 /// let string_vec = vec![first, second];
97 /// assert_eq!(string_vec.concat(), "Restaurant at the End of the Universe".to_string());
99 fn concat(&self) -> String;
101 /// Concatenates a vector of strings, placing a given separator between each.
106 /// let first = "Roast".to_string();
107 /// let second = "Sirloin Steak".to_string();
108 /// let string_vec = vec![first, second];
109 /// assert_eq!(string_vec.connect(", "), "Roast, Sirloin Steak".to_string());
111 fn connect(&self, sep: &str) -> String;
114 impl<S: Str> StrVector for [S] {
115 fn concat(&self) -> String {
117 return String::new();
120 // `len` calculation may overflow but push_str will check boundaries
121 let len = self.iter().map(|s| s.as_slice().len()).sum();
123 let mut result = String::with_capacity(len);
125 for s in self.iter() {
126 result.push_str(s.as_slice())
132 fn connect(&self, sep: &str) -> String {
134 return String::new();
139 return self.concat();
142 // this is wrong without the guarantee that `self` is non-empty
143 // `len` calculation may overflow but push_str but will check boundaries
144 let len = sep.len() * (self.len() - 1)
145 + self.iter().map(|s| s.as_slice().len()).sum();
146 let mut result = String::with_capacity(len);
147 let mut first = true;
149 for s in self.iter() {
153 result.push_str(sep);
155 result.push_str(s.as_slice());
161 impl<S: Str> StrVector for Vec<S> {
163 fn concat(&self) -> String {
164 self.as_slice().concat()
168 fn connect(&self, sep: &str) -> String {
169 self.as_slice().connect(sep)
177 // Helper functions used for Unicode normalization
178 fn canonical_sort(comb: &mut [(char, u8)]) {
179 let len = comb.len();
180 for i in range(0, len) {
181 let mut swapped = false;
182 for j in range(1, len-i) {
183 let class_a = *comb[j-1].ref1();
184 let class_b = *comb[j].ref1();
185 if class_a != 0 && class_b != 0 && class_a > class_b {
190 if !swapped { break; }
195 enum DecompositionType {
200 /// External iterator for a string's decomposition's characters.
201 /// Use with the `std::iter` module.
203 pub struct Decompositions<'a> {
204 kind: DecompositionType,
206 buffer: Vec<(char, u8)>,
210 impl<'a> Iterator<char> for Decompositions<'a> {
212 fn next(&mut self) -> Option<char> {
213 match self.buffer.as_slice().head() {
216 self.buffer.remove(0);
219 Some(&(c, _)) if self.sorted => {
220 self.buffer.remove(0);
223 _ => self.sorted = false
226 let decomposer = match self.kind {
227 Canonical => unicode::char::decompose_canonical,
228 Compatible => unicode::char::decompose_compatible
232 for ch in self.iter {
233 let buffer = &mut self.buffer;
234 let sorted = &mut self.sorted;
236 let class = unicode::char::canonical_combining_class(d);
237 if class == 0 && !*sorted {
238 canonical_sort(buffer.as_mut_slice());
241 buffer.push((d, class));
248 canonical_sort(self.buffer.as_mut_slice());
252 match self.buffer.remove(0) {
257 Some((c, _)) => Some(c),
262 fn size_hint(&self) -> (uint, Option<uint>) {
263 let (lower, _) = self.iter.size_hint();
269 enum RecompositionState {
275 /// External iterator for a string's recomposition's characters.
276 /// Use with the `std::iter` module.
278 pub struct Recompositions<'a> {
279 iter: Decompositions<'a>,
280 state: RecompositionState,
281 buffer: RingBuf<char>,
282 composee: Option<char>,
286 impl<'a> Iterator<char> for Recompositions<'a> {
288 fn next(&mut self) -> Option<char> {
292 for ch in self.iter {
293 let ch_class = unicode::char::canonical_combining_class(ch);
294 if self.composee.is_none() {
298 self.composee = Some(ch);
301 let k = self.composee.clone().unwrap();
303 match self.last_ccc {
305 match unicode::char::compose(k, ch) {
307 self.composee = Some(r);
312 self.composee = Some(ch);
315 self.buffer.push(ch);
316 self.last_ccc = Some(ch_class);
321 if l_class >= ch_class {
322 // `ch` is blocked from `composee`
324 self.composee = Some(ch);
325 self.last_ccc = None;
326 self.state = Purging;
329 self.buffer.push(ch);
330 self.last_ccc = Some(ch_class);
333 match unicode::char::compose(k, ch) {
335 self.composee = Some(r);
339 self.buffer.push(ch);
340 self.last_ccc = Some(ch_class);
346 self.state = Finished;
347 if self.composee.is_some() {
348 return self.composee.take();
352 match self.buffer.pop_front() {
353 None => self.state = Composing,
358 match self.buffer.pop_front() {
359 None => return self.composee.take(),
368 /// Replaces all occurrences of one string with another.
372 /// * s - The string containing substrings to replace
373 /// * from - The string to replace
374 /// * to - The replacement string
378 /// The original string with all occurrences of `from` replaced with `to`.
384 /// let string = "orange";
385 /// let new_string = str::replace(string, "or", "str");
386 /// assert_eq!(new_string.as_slice(), "strange");
388 pub fn replace(s: &str, from: &str, to: &str) -> String {
389 let mut result = String::new();
390 let mut last_end = 0;
391 for (start, end) in s.match_indices(from) {
392 result.push_str(unsafe{raw::slice_bytes(s, last_end, start)});
396 result.push_str(unsafe{raw::slice_bytes(s, last_end, s.len())});
404 // Return the initial codepoint accumulator for the first byte.
405 // The first byte is special, only want bottom 5 bits for width 2, 4 bits
406 // for width 3, and 3 bits for width 4
407 macro_rules! utf8_first_byte(
408 ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as u32)
411 // return the value of $ch updated with continuation byte $byte
412 macro_rules! utf8_acc_cont_byte(
413 ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as u32)
420 /// A string type that can hold either a `String` or a `&str`.
421 /// This can be useful as an optimization when an allocation is sometimes
422 /// needed but not always.
423 pub enum MaybeOwned<'a> {
424 /// A borrowed string.
430 /// A specialization of `MaybeOwned` to be sendable.
431 pub type SendStr = MaybeOwned<'static>;
433 impl<'a> MaybeOwned<'a> {
434 /// Returns `true` if this `MaybeOwned` wraps an owned string.
439 /// let string = String::from_str("orange");
440 /// let maybe_owned_string = string.into_maybe_owned();
441 /// assert_eq!(true, maybe_owned_string.is_owned());
444 pub fn is_owned(&self) -> bool {
451 /// Returns `true` if this `MaybeOwned` wraps a borrowed string.
456 /// let string = "orange";
457 /// let maybe_owned_string = string.as_slice().into_maybe_owned();
458 /// assert_eq!(true, maybe_owned_string.is_slice());
461 pub fn is_slice(&self) -> bool {
469 /// Trait for moving into a `MaybeOwned`.
470 pub trait IntoMaybeOwned<'a> {
471 /// Moves `self` into a `MaybeOwned`.
472 fn into_maybe_owned(self) -> MaybeOwned<'a>;
475 impl<'a> IntoMaybeOwned<'a> for String {
479 /// let owned_string = String::from_str("orange");
480 /// let maybe_owned_string = owned_string.into_maybe_owned();
481 /// assert_eq!(true, maybe_owned_string.is_owned());
484 fn into_maybe_owned(self) -> MaybeOwned<'a> {
489 impl<'a> IntoMaybeOwned<'a> for &'a str {
493 /// let string = "orange";
494 /// let maybe_owned_str = string.as_slice().into_maybe_owned();
495 /// assert_eq!(false, maybe_owned_str.is_owned());
498 fn into_maybe_owned(self) -> MaybeOwned<'a> { Slice(self) }
501 impl<'a> IntoMaybeOwned<'a> for MaybeOwned<'a> {
505 /// let str = "orange";
506 /// let maybe_owned_str = str.as_slice().into_maybe_owned();
507 /// let maybe_maybe_owned_str = maybe_owned_str.into_maybe_owned();
508 /// assert_eq!(false, maybe_maybe_owned_str.is_owned());
511 fn into_maybe_owned(self) -> MaybeOwned<'a> { self }
514 impl<'a> PartialEq for MaybeOwned<'a> {
516 fn eq(&self, other: &MaybeOwned) -> bool {
517 self.as_slice() == other.as_slice()
521 impl<'a> Eq for MaybeOwned<'a> {}
523 impl<'a> PartialOrd for MaybeOwned<'a> {
525 fn partial_cmp(&self, other: &MaybeOwned) -> Option<Ordering> {
526 Some(self.cmp(other))
530 impl<'a> Ord for MaybeOwned<'a> {
532 fn cmp(&self, other: &MaybeOwned) -> Ordering {
533 self.as_slice().cmp(&other.as_slice())
537 impl<'a, S: Str> Equiv<S> for MaybeOwned<'a> {
539 fn equiv(&self, other: &S) -> bool {
540 self.as_slice() == other.as_slice()
544 impl<'a> Str for MaybeOwned<'a> {
546 fn as_slice<'b>(&'b self) -> &'b str {
549 Owned(ref s) => s.as_slice()
554 impl<'a> StrAllocating for MaybeOwned<'a> {
556 fn into_string(self) -> String {
558 Slice(s) => String::from_str(s),
564 impl<'a> Collection for MaybeOwned<'a> {
566 fn len(&self) -> uint { self.as_slice().len() }
569 impl<'a> Clone for MaybeOwned<'a> {
571 fn clone(&self) -> MaybeOwned<'a> {
573 Slice(s) => Slice(s),
574 Owned(ref s) => Owned(String::from_str(s.as_slice()))
579 impl<'a> Default for MaybeOwned<'a> {
581 fn default() -> MaybeOwned<'a> { Slice("") }
584 impl<'a, H: hash::Writer> hash::Hash<H> for MaybeOwned<'a> {
586 fn hash(&self, hasher: &mut H) {
587 self.as_slice().hash(hasher)
591 impl<'a> fmt::Show for MaybeOwned<'a> {
593 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
595 Slice(ref s) => s.fmt(f),
596 Owned(ref s) => s.fmt(f)
601 /// Unsafe string operations.
603 pub use core::str::raw::{from_utf8, c_str_to_static_slice, slice_bytes};
604 pub use core::str::raw::{slice_unchecked};
608 Section: Trait implementations
611 /// Any string that can be represented as a slice.
612 pub trait StrAllocating: Str {
613 /// Converts `self` into a `String`, not making a copy if possible.
614 fn into_string(self) -> String;
616 /// Escapes each char in `s` with `char::escape_default`.
617 fn escape_default(&self) -> String {
618 let me = self.as_slice();
619 let mut out = String::with_capacity(me.len());
620 for c in me.chars() {
621 c.escape_default(|c| out.push(c));
626 /// Escapes each char in `s` with `char::escape_unicode`.
627 fn escape_unicode(&self) -> String {
628 let me = self.as_slice();
629 let mut out = String::with_capacity(me.len());
630 for c in me.chars() {
631 c.escape_unicode(|c| out.push(c));
636 /// Replaces all occurrences of one string with another.
640 /// * `from` - The string to replace
641 /// * `to` - The replacement string
645 /// The original string with all occurrences of `from` replaced with `to`.
650 /// let s = "Do you know the muffin man,
651 /// The muffin man, the muffin man, ...".to_string();
653 /// assert_eq!(s.replace("muffin man", "little lamb"),
654 /// "Do you know the little lamb,
655 /// The little lamb, the little lamb, ...".to_string());
657 /// // not found, so no change.
658 /// assert_eq!(s.replace("cookie monster", "little lamb"), s);
660 fn replace(&self, from: &str, to: &str) -> String {
661 let me = self.as_slice();
662 let mut result = String::new();
663 let mut last_end = 0;
664 for (start, end) in me.match_indices(from) {
665 result.push_str(unsafe{raw::slice_bytes(me, last_end, start)});
669 result.push_str(unsafe{raw::slice_bytes(me, last_end, me.len())});
673 /// Given a string, makes a new string with repeated copies of it.
674 fn repeat(&self, nn: uint) -> String {
675 let me = self.as_slice();
676 let mut ret = String::with_capacity(nn * me.len());
677 for _ in range(0, nn) {
683 /// Returns the Levenshtein Distance between two strings.
684 fn lev_distance(&self, t: &str) -> uint {
685 let me = self.as_slice();
686 if me.is_empty() { return t.char_len(); }
687 if t.is_empty() { return me.char_len(); }
689 let mut dcol = Vec::from_fn(t.len() + 1, |x| x);
692 for (i, sc) in me.chars().enumerate() {
695 *dcol.get_mut(0) = current + 1;
697 for (j, tc) in t.chars().enumerate() {
699 let next = dcol[j + 1];
702 *dcol.get_mut(j + 1) = current;
704 *dcol.get_mut(j + 1) = cmp::min(current, next);
705 *dcol.get_mut(j + 1) = cmp::min(dcol[j + 1], dcol[j]) + 1;
716 /// Returns an iterator over the string in Unicode Normalization Form D
717 /// (canonical decomposition).
719 fn nfd_chars<'a>(&'a self) -> Decompositions<'a> {
721 iter: self.as_slice().chars(),
728 /// Returns an iterator over the string in Unicode Normalization Form KD
729 /// (compatibility decomposition).
731 fn nfkd_chars<'a>(&'a self) -> Decompositions<'a> {
733 iter: self.as_slice().chars(),
740 /// An Iterator over the string in Unicode Normalization Form C
741 /// (canonical decomposition followed by canonical composition).
743 fn nfc_chars<'a>(&'a self) -> Recompositions<'a> {
745 iter: self.nfd_chars(),
747 buffer: RingBuf::new(),
753 /// An Iterator over the string in Unicode Normalization Form KC
754 /// (compatibility decomposition followed by canonical composition).
756 fn nfkc_chars<'a>(&'a self) -> Recompositions<'a> {
758 iter: self.nfkd_chars(),
760 buffer: RingBuf::new(),
767 impl<'a> StrAllocating for &'a str {
769 fn into_string(self) -> String {
770 String::from_str(self)
776 use std::iter::AdditiveIterator;
777 use std::iter::range;
778 use std::default::Default;
780 use std::clone::Clone;
781 use std::cmp::{Equal, Greater, Less, Ord, PartialOrd, Equiv};
782 use std::option::{Some, None};
783 use std::ptr::RawPtr;
784 use std::iter::{Iterator, DoubleEndedIterator};
785 use {Collection, MutableSeq};
788 use std::slice::{AsSlice, ImmutableSlice};
791 use slice::CloneableVector;
793 use unicode::char::UnicodeChar;
797 assert!((eq_slice("foobar".slice(0, 3), "foo")));
798 assert!((eq_slice("barfoo".slice(3, 6), "foo")));
799 assert!((!eq_slice("foo1", "foo2")));
805 assert!("" <= "foo");
806 assert!("foo" <= "foo");
807 assert!("foo" != "bar");
812 assert_eq!("".len(), 0u);
813 assert_eq!("hello world".len(), 11u);
814 assert_eq!("\x63".len(), 1u);
815 assert_eq!("\xa2".len(), 2u);
816 assert_eq!("\u03c0".len(), 2u);
817 assert_eq!("\u2620".len(), 3u);
818 assert_eq!("\U0001d11e".len(), 4u);
820 assert_eq!("".char_len(), 0u);
821 assert_eq!("hello world".char_len(), 11u);
822 assert_eq!("\x63".char_len(), 1u);
823 assert_eq!("\xa2".char_len(), 1u);
824 assert_eq!("\u03c0".char_len(), 1u);
825 assert_eq!("\u2620".char_len(), 1u);
826 assert_eq!("\U0001d11e".char_len(), 1u);
827 assert_eq!("ประเทศไทย中华Việt Nam".char_len(), 19u);
829 assert_eq!("hello".width(false), 10u);
830 assert_eq!("hello".width(true), 10u);
831 assert_eq!("\0\0\0\0\0".width(false), 0u);
832 assert_eq!("\0\0\0\0\0".width(true), 0u);
833 assert_eq!("".width(false), 0u);
834 assert_eq!("".width(true), 0u);
835 assert_eq!("\u2081\u2082\u2083\u2084".width(false), 4u);
836 assert_eq!("\u2081\u2082\u2083\u2084".width(true), 8u);
841 assert_eq!("hello".find('l'), Some(2u));
842 assert_eq!("hello".find(|c:char| c == 'o'), Some(4u));
843 assert!("hello".find('x').is_none());
844 assert!("hello".find(|c:char| c == 'x').is_none());
845 assert_eq!("ประเทศไทย中华Việt Nam".find('华'), Some(30u));
846 assert_eq!("ประเทศไทย中华Việt Nam".find(|c: char| c == '华'), Some(30u));
851 assert_eq!("hello".rfind('l'), Some(3u));
852 assert_eq!("hello".rfind(|c:char| c == 'o'), Some(4u));
853 assert!("hello".rfind('x').is_none());
854 assert!("hello".rfind(|c:char| c == 'x').is_none());
855 assert_eq!("ประเทศไทย中华Việt Nam".rfind('华'), Some(30u));
856 assert_eq!("ประเทศไทย中华Việt Nam".rfind(|c: char| c == '华'), Some(30u));
861 let empty = String::from_str("");
862 let s: String = empty.as_slice().chars().collect();
863 assert_eq!(empty, s);
864 let data = String::from_str("ประเทศไทย中");
865 let s: String = data.as_slice().chars().collect();
870 fn test_into_bytes() {
871 let data = String::from_str("asdf");
872 let buf = data.into_bytes();
873 assert_eq!(b"asdf", buf.as_slice());
879 assert_eq!("".find_str(""), Some(0u));
880 assert!("banana".find_str("apple pie").is_none());
883 assert_eq!(data.slice(0u, 6u).find_str("ab"), Some(0u));
884 assert_eq!(data.slice(2u, 6u).find_str("ab"), Some(3u - 2u));
885 assert!(data.slice(2u, 4u).find_str("ab").is_none());
887 let string = "ประเทศไทย中华Việt Nam";
888 let mut data = String::from_str(string);
889 data.push_str(string);
890 assert!(data.as_slice().find_str("ไท华").is_none());
891 assert_eq!(data.as_slice().slice(0u, 43u).find_str(""), Some(0u));
892 assert_eq!(data.as_slice().slice(6u, 43u).find_str(""), Some(6u - 6u));
894 assert_eq!(data.as_slice().slice(0u, 43u).find_str("ประ"), Some( 0u));
895 assert_eq!(data.as_slice().slice(0u, 43u).find_str("ทศไ"), Some(12u));
896 assert_eq!(data.as_slice().slice(0u, 43u).find_str("ย中"), Some(24u));
897 assert_eq!(data.as_slice().slice(0u, 43u).find_str("iệt"), Some(34u));
898 assert_eq!(data.as_slice().slice(0u, 43u).find_str("Nam"), Some(40u));
900 assert_eq!(data.as_slice().slice(43u, 86u).find_str("ประ"), Some(43u - 43u));
901 assert_eq!(data.as_slice().slice(43u, 86u).find_str("ทศไ"), Some(55u - 43u));
902 assert_eq!(data.as_slice().slice(43u, 86u).find_str("ย中"), Some(67u - 43u));
903 assert_eq!(data.as_slice().slice(43u, 86u).find_str("iệt"), Some(77u - 43u));
904 assert_eq!(data.as_slice().slice(43u, 86u).find_str("Nam"), Some(83u - 43u));
908 fn test_slice_chars() {
909 fn t(a: &str, b: &str, start: uint) {
910 assert_eq!(a.slice_chars(start, start + b.char_len()), b);
913 t("hello", "llo", 2);
917 assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".slice_chars(2, 8));
922 fn t(v: &[String], s: &str) {
923 assert_eq!(v.concat().as_slice(), s);
925 t([String::from_str("you"), String::from_str("know"),
926 String::from_str("I'm"),
927 String::from_str("no"), String::from_str("good")],
929 let v: &[String] = [];
931 t([String::from_str("hi")], "hi");
936 fn t(v: &[String], sep: &str, s: &str) {
937 assert_eq!(v.connect(sep).as_slice(), s);
939 t([String::from_str("you"), String::from_str("know"),
940 String::from_str("I'm"),
941 String::from_str("no"), String::from_str("good")],
942 " ", "you know I'm no good");
943 let v: &[String] = [];
945 t([String::from_str("hi")], " ", "hi");
949 fn test_concat_slices() {
950 fn t(v: &[&str], s: &str) {
951 assert_eq!(v.concat().as_slice(), s);
953 t(["you", "know", "I'm", "no", "good"], "youknowI'mnogood");
960 fn test_connect_slices() {
961 fn t(v: &[&str], sep: &str, s: &str) {
962 assert_eq!(v.connect(sep).as_slice(), s);
964 t(["you", "know", "I'm", "no", "good"],
965 " ", "you know I'm no good");
967 t(["hi"], " ", "hi");
972 assert_eq!("x".repeat(4), String::from_str("xxxx"));
973 assert_eq!("hi".repeat(4), String::from_str("hihihihi"));
974 assert_eq!("ไท华".repeat(3), String::from_str("ไท华ไท华ไท华"));
975 assert_eq!("".repeat(4), String::from_str(""));
976 assert_eq!("hi".repeat(0), String::from_str(""));
980 fn test_unsafe_slice() {
981 assert_eq!("ab", unsafe {raw::slice_bytes("abc", 0, 2)});
982 assert_eq!("bc", unsafe {raw::slice_bytes("abc", 1, 3)});
983 assert_eq!("", unsafe {raw::slice_bytes("abc", 1, 1)});
984 fn a_million_letter_a() -> String {
986 let mut rs = String::new();
988 rs.push_str("aaaaaaaaaa");
993 fn half_a_million_letter_a() -> String {
995 let mut rs = String::new();
997 rs.push_str("aaaaa");
1002 let letters = a_million_letter_a();
1003 assert!(half_a_million_letter_a() ==
1004 unsafe {String::from_str(raw::slice_bytes(letters.as_slice(),
1010 fn test_starts_with() {
1011 assert!(("".starts_with("")));
1012 assert!(("abc".starts_with("")));
1013 assert!(("abc".starts_with("a")));
1014 assert!((!"a".starts_with("abc")));
1015 assert!((!"".starts_with("abc")));
1016 assert!((!"ödd".starts_with("-")));
1017 assert!(("ödd".starts_with("öd")));
1021 fn test_ends_with() {
1022 assert!(("".ends_with("")));
1023 assert!(("abc".ends_with("")));
1024 assert!(("abc".ends_with("c")));
1025 assert!((!"a".ends_with("abc")));
1026 assert!((!"".ends_with("abc")));
1027 assert!((!"ddö".ends_with("-")));
1028 assert!(("ddö".ends_with("dö")));
1032 fn test_is_empty() {
1033 assert!("".is_empty());
1034 assert!(!"a".is_empty());
1040 assert_eq!("".replace(a, "b"), String::from_str(""));
1041 assert_eq!("a".replace(a, "b"), String::from_str("b"));
1042 assert_eq!("ab".replace(a, "b"), String::from_str("bb"));
1044 assert!(" test test ".replace(test, "toast") ==
1045 String::from_str(" toast toast "));
1046 assert_eq!(" test test ".replace(test, ""), String::from_str(" "));
1050 fn test_replace_2a() {
1051 let data = "ประเทศไทย中华";
1052 let repl = "دولة الكويت";
1055 let a2 = "دولة الكويتทศไทย中华";
1056 assert_eq!(data.replace(a, repl).as_slice(), a2);
1060 fn test_replace_2b() {
1061 let data = "ประเทศไทย中华";
1062 let repl = "دولة الكويت";
1065 let b2 = "ปรدولة الكويتทศไทย中华";
1066 assert_eq!(data.replace(b, repl).as_slice(), b2);
1070 fn test_replace_2c() {
1071 let data = "ประเทศไทย中华";
1072 let repl = "دولة الكويت";
1075 let c2 = "ประเทศไทยدولة الكويت";
1076 assert_eq!(data.replace(c, repl).as_slice(), c2);
1080 fn test_replace_2d() {
1081 let data = "ประเทศไทย中华";
1082 let repl = "دولة الكويت";
1085 assert_eq!(data.replace(d, repl).as_slice(), data);
1090 assert_eq!("ab", "abc".slice(0, 2));
1091 assert_eq!("bc", "abc".slice(1, 3));
1092 assert_eq!("", "abc".slice(1, 1));
1093 assert_eq!("\u65e5", "\u65e5\u672c".slice(0, 3));
1095 let data = "ประเทศไทย中华";
1096 assert_eq!("ป", data.slice(0, 3));
1097 assert_eq!("ร", data.slice(3, 6));
1098 assert_eq!("", data.slice(3, 3));
1099 assert_eq!("华", data.slice(30, 33));
1101 fn a_million_letter_x() -> String {
1103 let mut rs = String::new();
1105 rs.push_str("华华华华华华华华华华");
1110 fn half_a_million_letter_x() -> String {
1112 let mut rs = String::new();
1114 rs.push_str("华华华华华");
1119 let letters = a_million_letter_x();
1120 assert!(half_a_million_letter_x() ==
1121 String::from_str(letters.as_slice().slice(0u, 3u * 500000u)));
1126 let ss = "中华Việt Nam";
1128 assert_eq!("华", ss.slice(3u, 6u));
1129 assert_eq!("Việt Nam", ss.slice(6u, 16u));
1131 assert_eq!("ab", "abc".slice(0u, 2u));
1132 assert_eq!("bc", "abc".slice(1u, 3u));
1133 assert_eq!("", "abc".slice(1u, 1u));
1135 assert_eq!("中", ss.slice(0u, 3u));
1136 assert_eq!("华V", ss.slice(3u, 7u));
1137 assert_eq!("", ss.slice(3u, 3u));
1152 fn test_slice_fail() {
1153 "中华Việt Nam".slice(0u, 2u);
1157 fn test_slice_from() {
1158 assert_eq!("abcd".slice_from(0), "abcd");
1159 assert_eq!("abcd".slice_from(2), "cd");
1160 assert_eq!("abcd".slice_from(4), "");
1163 fn test_slice_to() {
1164 assert_eq!("abcd".slice_to(0), "");
1165 assert_eq!("abcd".slice_to(2), "ab");
1166 assert_eq!("abcd".slice_to(4), "abcd");
1170 fn test_trim_left_chars() {
1171 let v: &[char] = &[];
1172 assert_eq!(" *** foo *** ".trim_left_chars(v), " *** foo *** ");
1173 let chars: &[char] = &['*', ' '];
1174 assert_eq!(" *** foo *** ".trim_left_chars(chars), "foo *** ");
1175 assert_eq!(" *** *** ".trim_left_chars(chars), "");
1176 assert_eq!("foo *** ".trim_left_chars(chars), "foo *** ");
1178 assert_eq!("11foo1bar11".trim_left_chars('1'), "foo1bar11");
1179 let chars: &[char] = &['1', '2'];
1180 assert_eq!("12foo1bar12".trim_left_chars(chars), "foo1bar12");
1181 assert_eq!("123foo1bar123".trim_left_chars(|c: char| c.is_digit()), "foo1bar123");
1185 fn test_trim_right_chars() {
1186 let v: &[char] = &[];
1187 assert_eq!(" *** foo *** ".trim_right_chars(v), " *** foo *** ");
1188 let chars: &[char] = &['*', ' '];
1189 assert_eq!(" *** foo *** ".trim_right_chars(chars), " *** foo");
1190 assert_eq!(" *** *** ".trim_right_chars(chars), "");
1191 assert_eq!(" *** foo".trim_right_chars(chars), " *** foo");
1193 assert_eq!("11foo1bar11".trim_right_chars('1'), "11foo1bar");
1194 let chars: &[char] = &['1', '2'];
1195 assert_eq!("12foo1bar12".trim_right_chars(chars), "12foo1bar");
1196 assert_eq!("123foo1bar123".trim_right_chars(|c: char| c.is_digit()), "123foo1bar");
1200 fn test_trim_chars() {
1201 let v: &[char] = &[];
1202 assert_eq!(" *** foo *** ".trim_chars(v), " *** foo *** ");
1203 let chars: &[char] = &['*', ' '];
1204 assert_eq!(" *** foo *** ".trim_chars(chars), "foo");
1205 assert_eq!(" *** *** ".trim_chars(chars), "");
1206 assert_eq!("foo".trim_chars(chars), "foo");
1208 assert_eq!("11foo1bar11".trim_chars('1'), "foo1bar");
1209 let chars: &[char] = &['1', '2'];
1210 assert_eq!("12foo1bar12".trim_chars(chars), "foo1bar");
1211 assert_eq!("123foo1bar123".trim_chars(|c: char| c.is_digit()), "foo1bar");
1215 fn test_trim_left() {
1216 assert_eq!("".trim_left(), "");
1217 assert_eq!("a".trim_left(), "a");
1218 assert_eq!(" ".trim_left(), "");
1219 assert_eq!(" blah".trim_left(), "blah");
1220 assert_eq!(" \u3000 wut".trim_left(), "wut");
1221 assert_eq!("hey ".trim_left(), "hey ");
1225 fn test_trim_right() {
1226 assert_eq!("".trim_right(), "");
1227 assert_eq!("a".trim_right(), "a");
1228 assert_eq!(" ".trim_right(), "");
1229 assert_eq!("blah ".trim_right(), "blah");
1230 assert_eq!("wut \u3000 ".trim_right(), "wut");
1231 assert_eq!(" hey".trim_right(), " hey");
1236 assert_eq!("".trim(), "");
1237 assert_eq!("a".trim(), "a");
1238 assert_eq!(" ".trim(), "");
1239 assert_eq!(" blah ".trim(), "blah");
1240 assert_eq!("\nwut \u3000 ".trim(), "wut");
1241 assert_eq!(" hey dude ".trim(), "hey dude");
1245 fn test_is_whitespace() {
1246 assert!("".is_whitespace());
1247 assert!(" ".is_whitespace());
1248 assert!("\u2009".is_whitespace()); // Thin space
1249 assert!(" \n\t ".is_whitespace());
1250 assert!(!" _ ".is_whitespace());
1254 fn test_slice_shift_char() {
1255 let data = "ประเทศไทย中";
1256 assert_eq!(data.slice_shift_char(), (Some('ป'), "ระเทศไทย中"));
1260 fn test_slice_shift_char_2() {
1262 assert_eq!(empty.slice_shift_char(), (None, ""));
1267 // deny overlong encodings
1268 assert!(!is_utf8([0xc0, 0x80]));
1269 assert!(!is_utf8([0xc0, 0xae]));
1270 assert!(!is_utf8([0xe0, 0x80, 0x80]));
1271 assert!(!is_utf8([0xe0, 0x80, 0xaf]));
1272 assert!(!is_utf8([0xe0, 0x81, 0x81]));
1273 assert!(!is_utf8([0xf0, 0x82, 0x82, 0xac]));
1274 assert!(!is_utf8([0xf4, 0x90, 0x80, 0x80]));
1277 assert!(!is_utf8([0xED, 0xA0, 0x80]));
1278 assert!(!is_utf8([0xED, 0xBF, 0xBF]));
1280 assert!(is_utf8([0xC2, 0x80]));
1281 assert!(is_utf8([0xDF, 0xBF]));
1282 assert!(is_utf8([0xE0, 0xA0, 0x80]));
1283 assert!(is_utf8([0xED, 0x9F, 0xBF]));
1284 assert!(is_utf8([0xEE, 0x80, 0x80]));
1285 assert!(is_utf8([0xEF, 0xBF, 0xBF]));
1286 assert!(is_utf8([0xF0, 0x90, 0x80, 0x80]));
1287 assert!(is_utf8([0xF4, 0x8F, 0xBF, 0xBF]));
1291 fn test_is_utf16() {
1292 macro_rules! pos ( ($($e:expr),*) => { { $(assert!(is_utf16($e));)* } });
1300 // surrogate pairs (randomly generated with Python 3's
1301 // .encode('utf-16be'))
1302 pos!([0xdb54, 0xdf16, 0xd880, 0xdee0, 0xdb6a, 0xdd45],
1303 [0xd91f, 0xdeb1, 0xdb31, 0xdd84, 0xd8e2, 0xde14],
1304 [0xdb9f, 0xdc26, 0xdb6f, 0xde58, 0xd850, 0xdfae]);
1306 // mixtures (also random)
1307 pos!([0xd921, 0xdcc2, 0x002d, 0x004d, 0xdb32, 0xdf65],
1308 [0xdb45, 0xdd2d, 0x006a, 0xdacd, 0xddfe, 0x0006],
1309 [0x0067, 0xd8ff, 0xddb7, 0x000f, 0xd900, 0xdc80]);
1312 macro_rules! neg ( ($($e:expr),*) => { { $(assert!(!is_utf16($e));)* } });
1315 // surrogate + regular unit
1317 // surrogate + lead surrogate
1319 // unterminated surrogate
1321 // trail surrogate without a lead
1324 // random byte sequences that Python 3's .decode('utf-16be')
1326 neg!([0x5b3d, 0x0141, 0xde9e, 0x8fdc, 0xc6e7],
1327 [0xdf5a, 0x82a5, 0x62b9, 0xb447, 0x92f3],
1328 [0xda4e, 0x42bc, 0x4462, 0xee98, 0xc2ca],
1329 [0xbe00, 0xb04a, 0x6ecb, 0xdd89, 0xe278],
1330 [0x0465, 0xab56, 0xdbb6, 0xa893, 0x665e],
1331 [0x6b7f, 0x0a19, 0x40f4, 0xa657, 0xdcc5],
1332 [0x9b50, 0xda5e, 0x24ec, 0x03ad, 0x6dee],
1333 [0x8d17, 0xcaa7, 0xf4ae, 0xdf6e, 0xbed7],
1334 [0xdaee, 0x2584, 0x7d30, 0xa626, 0x121a],
1335 [0xd956, 0x4b43, 0x7570, 0xccd6, 0x4f4a],
1336 [0x9dcf, 0x1b49, 0x4ba5, 0xfce9, 0xdffe],
1337 [0x6572, 0xce53, 0xb05a, 0xf6af, 0xdacf],
1338 [0x1b90, 0x728c, 0x9906, 0xdb68, 0xf46e],
1339 [0x1606, 0xbeca, 0xbe76, 0x860f, 0xdfa5],
1340 [0x8b4f, 0xde7a, 0xd220, 0x9fac, 0x2b6f],
1341 [0xb8fe, 0xebbe, 0xda32, 0x1a5f, 0x8b8b],
1342 [0x934b, 0x8956, 0xc434, 0x1881, 0xddf7],
1343 [0x5a95, 0x13fc, 0xf116, 0xd89b, 0x93f9],
1344 [0xd640, 0x71f1, 0xdd7d, 0x77eb, 0x1cd8],
1345 [0x348b, 0xaef0, 0xdb2c, 0xebf1, 0x1282],
1346 [0x50d7, 0xd824, 0x5010, 0xb369, 0x22ea]);
1350 fn test_as_bytes() {
1353 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1354 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1358 assert_eq!("".as_bytes(), b);
1359 assert_eq!("abc".as_bytes(), b"abc");
1360 assert_eq!("ศไทย中华Việt Nam".as_bytes(), v.as_slice());
1365 fn test_as_bytes_fail() {
1366 // Don't double free. (I'm not sure if this exercises the
1367 // original problem code path anymore.)
1368 let s = String::from_str("");
1369 let _bytes = s.as_bytes();
1375 let buf = "hello".as_ptr();
1377 assert_eq!(*buf.offset(0), b'h');
1378 assert_eq!(*buf.offset(1), b'e');
1379 assert_eq!(*buf.offset(2), b'l');
1380 assert_eq!(*buf.offset(3), b'l');
1381 assert_eq!(*buf.offset(4), b'o');
1386 fn test_subslice_offset() {
1387 let a = "kernelsprite";
1388 let b = a.slice(7, a.len());
1389 let c = a.slice(0, a.len() - 6);
1390 assert_eq!(a.subslice_offset(b), 7);
1391 assert_eq!(a.subslice_offset(c), 0);
1393 let string = "a\nb\nc";
1394 let lines: Vec<&str> = string.lines().collect();
1395 let lines = lines.as_slice();
1396 assert_eq!(string.subslice_offset(lines[0]), 0);
1397 assert_eq!(string.subslice_offset(lines[1]), 2);
1398 assert_eq!(string.subslice_offset(lines[2]), 4);
1403 fn test_subslice_offset_2() {
1404 let a = "alchemiter";
1405 let b = "cruxtruder";
1406 a.subslice_offset(b);
1410 fn vec_str_conversions() {
1411 let s1: String = String::from_str("All mimsy were the borogoves");
1413 let v: Vec<u8> = s1.as_bytes().to_vec();
1414 let s2: String = String::from_str(from_utf8(v.as_slice()).unwrap());
1415 let mut i: uint = 0u;
1416 let n1: uint = s1.len();
1417 let n2: uint = v.len();
1420 let a: u8 = s1.as_bytes()[i];
1421 let b: u8 = s2.as_bytes()[i];
1430 fn test_contains() {
1431 assert!("abcde".contains("bcd"));
1432 assert!("abcde".contains("abcd"));
1433 assert!("abcde".contains("bcde"));
1434 assert!("abcde".contains(""));
1435 assert!("".contains(""));
1436 assert!(!"abcde".contains("def"));
1437 assert!(!"".contains("a"));
1439 let data = "ประเทศไทย中华Việt Nam";
1440 assert!(data.contains("ประเ"));
1441 assert!(data.contains("ะเ"));
1442 assert!(data.contains("中华"));
1443 assert!(!data.contains("ไท华"));
1447 fn test_contains_char() {
1448 assert!("abc".contains_char('b'));
1449 assert!("a".contains_char('a'));
1450 assert!(!"abc".contains_char('d'));
1451 assert!(!"".contains_char('a'));
1455 fn test_truncate_utf16_at_nul() {
1457 let b: &[u16] = &[];
1458 assert_eq!(truncate_utf16_at_nul(v), b);
1461 assert_eq!(truncate_utf16_at_nul(v), b);
1464 let b: &[u16] = &[1];
1465 assert_eq!(truncate_utf16_at_nul(v), b);
1468 let b: &[u16] = &[1, 2];
1469 assert_eq!(truncate_utf16_at_nul(v), b);
1472 let b: &[u16] = &[1, 2, 3];
1473 assert_eq!(truncate_utf16_at_nul(v), b);
1478 let s = "ศไทย中华Việt Nam";
1479 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1481 for ch in v.iter() {
1482 assert!(s.char_at(pos) == *ch);
1483 pos += String::from_char(1, *ch).len();
1488 fn test_char_at_reverse() {
1489 let s = "ศไทย中华Việt Nam";
1490 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1491 let mut pos = s.len();
1492 for ch in v.iter().rev() {
1493 assert!(s.char_at_reverse(pos) == *ch);
1494 pos -= String::from_char(1, *ch).len();
1499 fn test_escape_unicode() {
1500 assert_eq!("abc".escape_unicode(), String::from_str("\\x61\\x62\\x63"));
1501 assert_eq!("a c".escape_unicode(), String::from_str("\\x61\\x20\\x63"));
1502 assert_eq!("\r\n\t".escape_unicode(), String::from_str("\\x0d\\x0a\\x09"));
1503 assert_eq!("'\"\\".escape_unicode(), String::from_str("\\x27\\x22\\x5c"));
1504 assert_eq!("\x00\x01\xfe\xff".escape_unicode(), String::from_str("\\x00\\x01\\xfe\\xff"));
1505 assert_eq!("\u0100\uffff".escape_unicode(), String::from_str("\\u0100\\uffff"));
1506 assert_eq!("\U00010000\U0010ffff".escape_unicode(),
1507 String::from_str("\\U00010000\\U0010ffff"));
1508 assert_eq!("ab\ufb00".escape_unicode(), String::from_str("\\x61\\x62\\ufb00"));
1509 assert_eq!("\U0001d4ea\r".escape_unicode(), String::from_str("\\U0001d4ea\\x0d"));
1513 fn test_escape_default() {
1514 assert_eq!("abc".escape_default(), String::from_str("abc"));
1515 assert_eq!("a c".escape_default(), String::from_str("a c"));
1516 assert_eq!("\r\n\t".escape_default(), String::from_str("\\r\\n\\t"));
1517 assert_eq!("'\"\\".escape_default(), String::from_str("\\'\\\"\\\\"));
1518 assert_eq!("\u0100\uffff".escape_default(), String::from_str("\\u0100\\uffff"));
1519 assert_eq!("\U00010000\U0010ffff".escape_default(),
1520 String::from_str("\\U00010000\\U0010ffff"));
1521 assert_eq!("ab\ufb00".escape_default(), String::from_str("ab\\ufb00"));
1522 assert_eq!("\U0001d4ea\r".escape_default(), String::from_str("\\U0001d4ea\\r"));
1526 fn test_total_ord() {
1527 "1234".cmp(&("123")) == Greater;
1528 "123".cmp(&("1234")) == Less;
1529 "1234".cmp(&("1234")) == Equal;
1530 "12345555".cmp(&("123456")) == Less;
1531 "22".cmp(&("1234")) == Greater;
1535 fn test_char_range_at() {
1536 let data = "b¢€𤭢𤭢€¢b";
1537 assert_eq!('b', data.char_range_at(0).ch);
1538 assert_eq!('¢', data.char_range_at(1).ch);
1539 assert_eq!('€', data.char_range_at(3).ch);
1540 assert_eq!('𤭢', data.char_range_at(6).ch);
1541 assert_eq!('𤭢', data.char_range_at(10).ch);
1542 assert_eq!('€', data.char_range_at(14).ch);
1543 assert_eq!('¢', data.char_range_at(17).ch);
1544 assert_eq!('b', data.char_range_at(19).ch);
1548 fn test_char_range_at_reverse_underflow() {
1549 assert_eq!("abc".char_range_at_reverse(0).next, 0);
1553 fn test_iterator() {
1554 let s = "ศไทย中华Việt Nam";
1555 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1558 let mut it = s.chars();
1561 assert_eq!(c, v[pos]);
1564 assert_eq!(pos, v.len());
1568 fn test_rev_iterator() {
1569 let s = "ศไทย中华Việt Nam";
1570 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
1573 let mut it = s.chars().rev();
1576 assert_eq!(c, v[pos]);
1579 assert_eq!(pos, v.len());
1583 fn test_chars_decoding() {
1584 let mut bytes = [0u8, ..4];
1585 for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
1586 let len = c.encode_utf8(bytes).unwrap_or(0);
1587 let s = ::core::str::from_utf8(bytes[..len]).unwrap();
1588 if Some(c) != s.chars().next() {
1589 panic!("character {:x}={} does not decode correctly", c as u32, c);
1595 fn test_chars_rev_decoding() {
1596 let mut bytes = [0u8, ..4];
1597 for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
1598 let len = c.encode_utf8(bytes).unwrap_or(0);
1599 let s = ::core::str::from_utf8(bytes[..len]).unwrap();
1600 if Some(c) != s.chars().rev().next() {
1601 panic!("character {:x}={} does not decode correctly", c as u32, c);
1607 fn test_iterator_clone() {
1608 let s = "ศไทย中华Việt Nam";
1609 let mut it = s.chars();
1611 assert!(it.zip(it.clone()).all(|(x,y)| x == y));
1615 fn test_bytesator() {
1616 let s = "ศไทย中华Việt Nam";
1618 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1619 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1624 for b in s.bytes() {
1625 assert_eq!(b, v[pos]);
1631 fn test_bytes_revator() {
1632 let s = "ศไทย中华Việt Nam";
1634 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1635 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1638 let mut pos = v.len();
1640 for b in s.bytes().rev() {
1642 assert_eq!(b, v[pos]);
1647 fn test_char_indicesator() {
1648 let s = "ศไทย中华Việt Nam";
1649 let p = [0, 3, 6, 9, 12, 15, 18, 19, 20, 23, 24, 25, 26, 27];
1650 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1653 let mut it = s.char_indices();
1656 assert_eq!(c, (p[pos], v[pos]));
1659 assert_eq!(pos, v.len());
1660 assert_eq!(pos, p.len());
1664 fn test_char_indices_revator() {
1665 let s = "ศไทย中华Việt Nam";
1666 let p = [27, 26, 25, 24, 23, 20, 19, 18, 15, 12, 9, 6, 3, 0];
1667 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
1670 let mut it = s.char_indices().rev();
1673 assert_eq!(c, (p[pos], v[pos]));
1676 assert_eq!(pos, v.len());
1677 assert_eq!(pos, p.len());
1681 fn test_split_char_iterator() {
1682 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1684 let split: Vec<&str> = data.split(' ').collect();
1685 assert_eq!( split, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1687 let mut rsplit: Vec<&str> = data.split(' ').rev().collect();
1689 assert_eq!(rsplit, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1691 let split: Vec<&str> = data.split(|c: char| c == ' ').collect();
1692 assert_eq!( split, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1694 let mut rsplit: Vec<&str> = data.split(|c: char| c == ' ').rev().collect();
1696 assert_eq!(rsplit, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1699 let split: Vec<&str> = data.split('ä').collect();
1700 assert_eq!( split, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1702 let mut rsplit: Vec<&str> = data.split('ä').rev().collect();
1704 assert_eq!(rsplit, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1706 let split: Vec<&str> = data.split(|c: char| c == 'ä').collect();
1707 assert_eq!( split, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1709 let mut rsplit: Vec<&str> = data.split(|c: char| c == 'ä').rev().collect();
1711 assert_eq!(rsplit, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1715 fn test_splitn_char_iterator() {
1716 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1718 let split: Vec<&str> = data.splitn(3, ' ').collect();
1719 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
1721 let split: Vec<&str> = data.splitn(3, |c: char| c == ' ').collect();
1722 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
1725 let split: Vec<&str> = data.splitn(3, 'ä').collect();
1726 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
1728 let split: Vec<&str> = data.splitn(3, |c: char| c == 'ä').collect();
1729 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
1733 fn test_rsplitn_char_iterator() {
1734 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1736 let mut split: Vec<&str> = data.rsplitn(3, ' ').collect();
1738 assert_eq!(split, vec!["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]);
1740 let mut split: Vec<&str> = data.rsplitn(3, |c: char| c == ' ').collect();
1742 assert_eq!(split, vec!["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]);
1745 let mut split: Vec<&str> = data.rsplitn(3, 'ä').collect();
1747 assert_eq!(split, vec!["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]);
1749 let mut split: Vec<&str> = data.rsplitn(3, |c: char| c == 'ä').collect();
1751 assert_eq!(split, vec!["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]);
1755 fn test_split_char_iterator_no_trailing() {
1756 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1758 let split: Vec<&str> = data.split('\n').collect();
1759 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
1761 let split: Vec<&str> = data.split_terminator('\n').collect();
1762 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
1766 fn test_rev_split_char_iterator_no_trailing() {
1767 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1769 let mut split: Vec<&str> = data.split('\n').rev().collect();
1771 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
1773 let mut split: Vec<&str> = data.split_terminator('\n').rev().collect();
1775 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
1780 let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n";
1781 let words: Vec<&str> = data.words().collect();
1782 assert_eq!(words, vec!["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
1786 fn test_lev_distance() {
1787 use std::char::{ from_u32, MAX };
1788 // Test bytelength agnosticity
1789 for c in range(0u32, MAX as u32)
1790 .filter_map(|i| from_u32(i))
1791 .map(|i| String::from_char(1, i)) {
1792 assert_eq!(c[].lev_distance(c[]), 0);
1795 let a = "\nMäry häd ä little lämb\n\nLittle lämb\n";
1796 let b = "\nMary häd ä little lämb\n\nLittle lämb\n";
1797 let c = "Mary häd ä little lämb\n\nLittle lämb\n";
1798 assert_eq!(a.lev_distance(b), 1);
1799 assert_eq!(b.lev_distance(a), 1);
1800 assert_eq!(a.lev_distance(c), 2);
1801 assert_eq!(c.lev_distance(a), 2);
1802 assert_eq!(b.lev_distance(c), 1);
1803 assert_eq!(c.lev_distance(b), 1);
1807 fn test_nfd_chars() {
1809 ($input: expr, $expected: expr) => {
1810 assert_eq!($input.nfd_chars().collect::<String>(), $expected.into_string());
1814 t!("\u1e0b\u01c4", "d\u0307\u01c4");
1815 t!("\u2026", "\u2026");
1816 t!("\u2126", "\u03a9");
1817 t!("\u1e0b\u0323", "d\u0323\u0307");
1818 t!("\u1e0d\u0307", "d\u0323\u0307");
1819 t!("a\u0301", "a\u0301");
1820 t!("\u0301a", "\u0301a");
1821 t!("\ud4db", "\u1111\u1171\u11b6");
1822 t!("\uac1c", "\u1100\u1162");
1826 fn test_nfkd_chars() {
1828 ($input: expr, $expected: expr) => {
1829 assert_eq!($input.nfkd_chars().collect::<String>(), $expected.into_string());
1833 t!("\u1e0b\u01c4", "d\u0307DZ\u030c");
1834 t!("\u2026", "...");
1835 t!("\u2126", "\u03a9");
1836 t!("\u1e0b\u0323", "d\u0323\u0307");
1837 t!("\u1e0d\u0307", "d\u0323\u0307");
1838 t!("a\u0301", "a\u0301");
1839 t!("\u0301a", "\u0301a");
1840 t!("\ud4db", "\u1111\u1171\u11b6");
1841 t!("\uac1c", "\u1100\u1162");
1845 fn test_nfc_chars() {
1847 ($input: expr, $expected: expr) => {
1848 assert_eq!($input.nfc_chars().collect::<String>(), $expected.into_string());
1852 t!("\u1e0b\u01c4", "\u1e0b\u01c4");
1853 t!("\u2026", "\u2026");
1854 t!("\u2126", "\u03a9");
1855 t!("\u1e0b\u0323", "\u1e0d\u0307");
1856 t!("\u1e0d\u0307", "\u1e0d\u0307");
1857 t!("a\u0301", "\xe1");
1858 t!("\u0301a", "\u0301a");
1859 t!("\ud4db", "\ud4db");
1860 t!("\uac1c", "\uac1c");
1861 t!("a\u0300\u0305\u0315\u05aeb", "\xe0\u05ae\u0305\u0315b");
1865 fn test_nfkc_chars() {
1867 ($input: expr, $expected: expr) => {
1868 assert_eq!($input.nfkc_chars().collect::<String>(), $expected.into_string());
1872 t!("\u1e0b\u01c4", "\u1e0bD\u017d");
1873 t!("\u2026", "...");
1874 t!("\u2126", "\u03a9");
1875 t!("\u1e0b\u0323", "\u1e0d\u0307");
1876 t!("\u1e0d\u0307", "\u1e0d\u0307");
1877 t!("a\u0301", "\xe1");
1878 t!("\u0301a", "\u0301a");
1879 t!("\ud4db", "\ud4db");
1880 t!("\uac1c", "\uac1c");
1881 t!("a\u0300\u0305\u0315\u05aeb", "\xe0\u05ae\u0305\u0315b");
1886 let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
1887 let lines: Vec<&str> = data.lines().collect();
1888 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
1890 let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
1891 let lines: Vec<&str> = data.lines().collect();
1892 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
1896 fn test_graphemes() {
1897 use std::iter::order;
1898 // official Unicode test data
1899 // from http://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt
1900 let test_same: [(_, &[_]), .. 325] = [
1901 ("\u0020\u0020", &["\u0020", "\u0020"]), ("\u0020\u0308\u0020", &["\u0020\u0308",
1902 "\u0020"]), ("\u0020\u000D", &["\u0020", "\u000D"]), ("\u0020\u0308\u000D",
1903 &["\u0020\u0308", "\u000D"]), ("\u0020\u000A", &["\u0020", "\u000A"]),
1904 ("\u0020\u0308\u000A", &["\u0020\u0308", "\u000A"]), ("\u0020\u0001", &["\u0020",
1905 "\u0001"]), ("\u0020\u0308\u0001", &["\u0020\u0308", "\u0001"]), ("\u0020\u0300",
1906 &["\u0020\u0300"]), ("\u0020\u0308\u0300", &["\u0020\u0308\u0300"]), ("\u0020\u1100",
1907 &["\u0020", "\u1100"]), ("\u0020\u0308\u1100", &["\u0020\u0308", "\u1100"]),
1908 ("\u0020\u1160", &["\u0020", "\u1160"]), ("\u0020\u0308\u1160", &["\u0020\u0308",
1909 "\u1160"]), ("\u0020\u11A8", &["\u0020", "\u11A8"]), ("\u0020\u0308\u11A8",
1910 &["\u0020\u0308", "\u11A8"]), ("\u0020\uAC00", &["\u0020", "\uAC00"]),
1911 ("\u0020\u0308\uAC00", &["\u0020\u0308", "\uAC00"]), ("\u0020\uAC01", &["\u0020",
1912 "\uAC01"]), ("\u0020\u0308\uAC01", &["\u0020\u0308", "\uAC01"]), ("\u0020\U0001F1E6",
1913 &["\u0020", "\U0001F1E6"]), ("\u0020\u0308\U0001F1E6", &["\u0020\u0308",
1914 "\U0001F1E6"]), ("\u0020\u0378", &["\u0020", "\u0378"]), ("\u0020\u0308\u0378",
1915 &["\u0020\u0308", "\u0378"]), ("\u000D\u0020", &["\u000D", "\u0020"]),
1916 ("\u000D\u0308\u0020", &["\u000D", "\u0308", "\u0020"]), ("\u000D\u000D", &["\u000D",
1917 "\u000D"]), ("\u000D\u0308\u000D", &["\u000D", "\u0308", "\u000D"]), ("\u000D\u000A",
1918 &["\u000D\u000A"]), ("\u000D\u0308\u000A", &["\u000D", "\u0308", "\u000A"]),
1919 ("\u000D\u0001", &["\u000D", "\u0001"]), ("\u000D\u0308\u0001", &["\u000D", "\u0308",
1920 "\u0001"]), ("\u000D\u0300", &["\u000D", "\u0300"]), ("\u000D\u0308\u0300",
1921 &["\u000D", "\u0308\u0300"]), ("\u000D\u0903", &["\u000D", "\u0903"]),
1922 ("\u000D\u1100", &["\u000D", "\u1100"]), ("\u000D\u0308\u1100", &["\u000D", "\u0308",
1923 "\u1100"]), ("\u000D\u1160", &["\u000D", "\u1160"]), ("\u000D\u0308\u1160",
1924 &["\u000D", "\u0308", "\u1160"]), ("\u000D\u11A8", &["\u000D", "\u11A8"]),
1925 ("\u000D\u0308\u11A8", &["\u000D", "\u0308", "\u11A8"]), ("\u000D\uAC00", &["\u000D",
1926 "\uAC00"]), ("\u000D\u0308\uAC00", &["\u000D", "\u0308", "\uAC00"]), ("\u000D\uAC01",
1927 &["\u000D", "\uAC01"]), ("\u000D\u0308\uAC01", &["\u000D", "\u0308", "\uAC01"]),
1928 ("\u000D\U0001F1E6", &["\u000D", "\U0001F1E6"]), ("\u000D\u0308\U0001F1E6",
1929 &["\u000D", "\u0308", "\U0001F1E6"]), ("\u000D\u0378", &["\u000D", "\u0378"]),
1930 ("\u000D\u0308\u0378", &["\u000D", "\u0308", "\u0378"]), ("\u000A\u0020", &["\u000A",
1931 "\u0020"]), ("\u000A\u0308\u0020", &["\u000A", "\u0308", "\u0020"]), ("\u000A\u000D",
1932 &["\u000A", "\u000D"]), ("\u000A\u0308\u000D", &["\u000A", "\u0308", "\u000D"]),
1933 ("\u000A\u000A", &["\u000A", "\u000A"]), ("\u000A\u0308\u000A", &["\u000A", "\u0308",
1934 "\u000A"]), ("\u000A\u0001", &["\u000A", "\u0001"]), ("\u000A\u0308\u0001",
1935 &["\u000A", "\u0308", "\u0001"]), ("\u000A\u0300", &["\u000A", "\u0300"]),
1936 ("\u000A\u0308\u0300", &["\u000A", "\u0308\u0300"]), ("\u000A\u0903", &["\u000A",
1937 "\u0903"]), ("\u000A\u1100", &["\u000A", "\u1100"]), ("\u000A\u0308\u1100",
1938 &["\u000A", "\u0308", "\u1100"]), ("\u000A\u1160", &["\u000A", "\u1160"]),
1939 ("\u000A\u0308\u1160", &["\u000A", "\u0308", "\u1160"]), ("\u000A\u11A8", &["\u000A",
1940 "\u11A8"]), ("\u000A\u0308\u11A8", &["\u000A", "\u0308", "\u11A8"]), ("\u000A\uAC00",
1941 &["\u000A", "\uAC00"]), ("\u000A\u0308\uAC00", &["\u000A", "\u0308", "\uAC00"]),
1942 ("\u000A\uAC01", &["\u000A", "\uAC01"]), ("\u000A\u0308\uAC01", &["\u000A", "\u0308",
1943 "\uAC01"]), ("\u000A\U0001F1E6", &["\u000A", "\U0001F1E6"]),
1944 ("\u000A\u0308\U0001F1E6", &["\u000A", "\u0308", "\U0001F1E6"]), ("\u000A\u0378",
1945 &["\u000A", "\u0378"]), ("\u000A\u0308\u0378", &["\u000A", "\u0308", "\u0378"]),
1946 ("\u0001\u0020", &["\u0001", "\u0020"]), ("\u0001\u0308\u0020", &["\u0001", "\u0308",
1947 "\u0020"]), ("\u0001\u000D", &["\u0001", "\u000D"]), ("\u0001\u0308\u000D",
1948 &["\u0001", "\u0308", "\u000D"]), ("\u0001\u000A", &["\u0001", "\u000A"]),
1949 ("\u0001\u0308\u000A", &["\u0001", "\u0308", "\u000A"]), ("\u0001\u0001", &["\u0001",
1950 "\u0001"]), ("\u0001\u0308\u0001", &["\u0001", "\u0308", "\u0001"]), ("\u0001\u0300",
1951 &["\u0001", "\u0300"]), ("\u0001\u0308\u0300", &["\u0001", "\u0308\u0300"]),
1952 ("\u0001\u0903", &["\u0001", "\u0903"]), ("\u0001\u1100", &["\u0001", "\u1100"]),
1953 ("\u0001\u0308\u1100", &["\u0001", "\u0308", "\u1100"]), ("\u0001\u1160", &["\u0001",
1954 "\u1160"]), ("\u0001\u0308\u1160", &["\u0001", "\u0308", "\u1160"]), ("\u0001\u11A8",
1955 &["\u0001", "\u11A8"]), ("\u0001\u0308\u11A8", &["\u0001", "\u0308", "\u11A8"]),
1956 ("\u0001\uAC00", &["\u0001", "\uAC00"]), ("\u0001\u0308\uAC00", &["\u0001", "\u0308",
1957 "\uAC00"]), ("\u0001\uAC01", &["\u0001", "\uAC01"]), ("\u0001\u0308\uAC01",
1958 &["\u0001", "\u0308", "\uAC01"]), ("\u0001\U0001F1E6", &["\u0001", "\U0001F1E6"]),
1959 ("\u0001\u0308\U0001F1E6", &["\u0001", "\u0308", "\U0001F1E6"]), ("\u0001\u0378",
1960 &["\u0001", "\u0378"]), ("\u0001\u0308\u0378", &["\u0001", "\u0308", "\u0378"]),
1961 ("\u0300\u0020", &["\u0300", "\u0020"]), ("\u0300\u0308\u0020", &["\u0300\u0308",
1962 "\u0020"]), ("\u0300\u000D", &["\u0300", "\u000D"]), ("\u0300\u0308\u000D",
1963 &["\u0300\u0308", "\u000D"]), ("\u0300\u000A", &["\u0300", "\u000A"]),
1964 ("\u0300\u0308\u000A", &["\u0300\u0308", "\u000A"]), ("\u0300\u0001", &["\u0300",
1965 "\u0001"]), ("\u0300\u0308\u0001", &["\u0300\u0308", "\u0001"]), ("\u0300\u0300",
1966 &["\u0300\u0300"]), ("\u0300\u0308\u0300", &["\u0300\u0308\u0300"]), ("\u0300\u1100",
1967 &["\u0300", "\u1100"]), ("\u0300\u0308\u1100", &["\u0300\u0308", "\u1100"]),
1968 ("\u0300\u1160", &["\u0300", "\u1160"]), ("\u0300\u0308\u1160", &["\u0300\u0308",
1969 "\u1160"]), ("\u0300\u11A8", &["\u0300", "\u11A8"]), ("\u0300\u0308\u11A8",
1970 &["\u0300\u0308", "\u11A8"]), ("\u0300\uAC00", &["\u0300", "\uAC00"]),
1971 ("\u0300\u0308\uAC00", &["\u0300\u0308", "\uAC00"]), ("\u0300\uAC01", &["\u0300",
1972 "\uAC01"]), ("\u0300\u0308\uAC01", &["\u0300\u0308", "\uAC01"]), ("\u0300\U0001F1E6",
1973 &["\u0300", "\U0001F1E6"]), ("\u0300\u0308\U0001F1E6", &["\u0300\u0308",
1974 "\U0001F1E6"]), ("\u0300\u0378", &["\u0300", "\u0378"]), ("\u0300\u0308\u0378",
1975 &["\u0300\u0308", "\u0378"]), ("\u0903\u0020", &["\u0903", "\u0020"]),
1976 ("\u0903\u0308\u0020", &["\u0903\u0308", "\u0020"]), ("\u0903\u000D", &["\u0903",
1977 "\u000D"]), ("\u0903\u0308\u000D", &["\u0903\u0308", "\u000D"]), ("\u0903\u000A",
1978 &["\u0903", "\u000A"]), ("\u0903\u0308\u000A", &["\u0903\u0308", "\u000A"]),
1979 ("\u0903\u0001", &["\u0903", "\u0001"]), ("\u0903\u0308\u0001", &["\u0903\u0308",
1980 "\u0001"]), ("\u0903\u0300", &["\u0903\u0300"]), ("\u0903\u0308\u0300",
1981 &["\u0903\u0308\u0300"]), ("\u0903\u1100", &["\u0903", "\u1100"]),
1982 ("\u0903\u0308\u1100", &["\u0903\u0308", "\u1100"]), ("\u0903\u1160", &["\u0903",
1983 "\u1160"]), ("\u0903\u0308\u1160", &["\u0903\u0308", "\u1160"]), ("\u0903\u11A8",
1984 &["\u0903", "\u11A8"]), ("\u0903\u0308\u11A8", &["\u0903\u0308", "\u11A8"]),
1985 ("\u0903\uAC00", &["\u0903", "\uAC00"]), ("\u0903\u0308\uAC00", &["\u0903\u0308",
1986 "\uAC00"]), ("\u0903\uAC01", &["\u0903", "\uAC01"]), ("\u0903\u0308\uAC01",
1987 &["\u0903\u0308", "\uAC01"]), ("\u0903\U0001F1E6", &["\u0903", "\U0001F1E6"]),
1988 ("\u0903\u0308\U0001F1E6", &["\u0903\u0308", "\U0001F1E6"]), ("\u0903\u0378",
1989 &["\u0903", "\u0378"]), ("\u0903\u0308\u0378", &["\u0903\u0308", "\u0378"]),
1990 ("\u1100\u0020", &["\u1100", "\u0020"]), ("\u1100\u0308\u0020", &["\u1100\u0308",
1991 "\u0020"]), ("\u1100\u000D", &["\u1100", "\u000D"]), ("\u1100\u0308\u000D",
1992 &["\u1100\u0308", "\u000D"]), ("\u1100\u000A", &["\u1100", "\u000A"]),
1993 ("\u1100\u0308\u000A", &["\u1100\u0308", "\u000A"]), ("\u1100\u0001", &["\u1100",
1994 "\u0001"]), ("\u1100\u0308\u0001", &["\u1100\u0308", "\u0001"]), ("\u1100\u0300",
1995 &["\u1100\u0300"]), ("\u1100\u0308\u0300", &["\u1100\u0308\u0300"]), ("\u1100\u1100",
1996 &["\u1100\u1100"]), ("\u1100\u0308\u1100", &["\u1100\u0308", "\u1100"]),
1997 ("\u1100\u1160", &["\u1100\u1160"]), ("\u1100\u0308\u1160", &["\u1100\u0308",
1998 "\u1160"]), ("\u1100\u11A8", &["\u1100", "\u11A8"]), ("\u1100\u0308\u11A8",
1999 &["\u1100\u0308", "\u11A8"]), ("\u1100\uAC00", &["\u1100\uAC00"]),
2000 ("\u1100\u0308\uAC00", &["\u1100\u0308", "\uAC00"]), ("\u1100\uAC01",
2001 &["\u1100\uAC01"]), ("\u1100\u0308\uAC01", &["\u1100\u0308", "\uAC01"]),
2002 ("\u1100\U0001F1E6", &["\u1100", "\U0001F1E6"]), ("\u1100\u0308\U0001F1E6",
2003 &["\u1100\u0308", "\U0001F1E6"]), ("\u1100\u0378", &["\u1100", "\u0378"]),
2004 ("\u1100\u0308\u0378", &["\u1100\u0308", "\u0378"]), ("\u1160\u0020", &["\u1160",
2005 "\u0020"]), ("\u1160\u0308\u0020", &["\u1160\u0308", "\u0020"]), ("\u1160\u000D",
2006 &["\u1160", "\u000D"]), ("\u1160\u0308\u000D", &["\u1160\u0308", "\u000D"]),
2007 ("\u1160\u000A", &["\u1160", "\u000A"]), ("\u1160\u0308\u000A", &["\u1160\u0308",
2008 "\u000A"]), ("\u1160\u0001", &["\u1160", "\u0001"]), ("\u1160\u0308\u0001",
2009 &["\u1160\u0308", "\u0001"]), ("\u1160\u0300", &["\u1160\u0300"]),
2010 ("\u1160\u0308\u0300", &["\u1160\u0308\u0300"]), ("\u1160\u1100", &["\u1160",
2011 "\u1100"]), ("\u1160\u0308\u1100", &["\u1160\u0308", "\u1100"]), ("\u1160\u1160",
2012 &["\u1160\u1160"]), ("\u1160\u0308\u1160", &["\u1160\u0308", "\u1160"]),
2013 ("\u1160\u11A8", &["\u1160\u11A8"]), ("\u1160\u0308\u11A8", &["\u1160\u0308",
2014 "\u11A8"]), ("\u1160\uAC00", &["\u1160", "\uAC00"]), ("\u1160\u0308\uAC00",
2015 &["\u1160\u0308", "\uAC00"]), ("\u1160\uAC01", &["\u1160", "\uAC01"]),
2016 ("\u1160\u0308\uAC01", &["\u1160\u0308", "\uAC01"]), ("\u1160\U0001F1E6", &["\u1160",
2017 "\U0001F1E6"]), ("\u1160\u0308\U0001F1E6", &["\u1160\u0308", "\U0001F1E6"]),
2018 ("\u1160\u0378", &["\u1160", "\u0378"]), ("\u1160\u0308\u0378", &["\u1160\u0308",
2019 "\u0378"]), ("\u11A8\u0020", &["\u11A8", "\u0020"]), ("\u11A8\u0308\u0020",
2020 &["\u11A8\u0308", "\u0020"]), ("\u11A8\u000D", &["\u11A8", "\u000D"]),
2021 ("\u11A8\u0308\u000D", &["\u11A8\u0308", "\u000D"]), ("\u11A8\u000A", &["\u11A8",
2022 "\u000A"]), ("\u11A8\u0308\u000A", &["\u11A8\u0308", "\u000A"]), ("\u11A8\u0001",
2023 &["\u11A8", "\u0001"]), ("\u11A8\u0308\u0001", &["\u11A8\u0308", "\u0001"]),
2024 ("\u11A8\u0300", &["\u11A8\u0300"]), ("\u11A8\u0308\u0300", &["\u11A8\u0308\u0300"]),
2025 ("\u11A8\u1100", &["\u11A8", "\u1100"]), ("\u11A8\u0308\u1100", &["\u11A8\u0308",
2026 "\u1100"]), ("\u11A8\u1160", &["\u11A8", "\u1160"]), ("\u11A8\u0308\u1160",
2027 &["\u11A8\u0308", "\u1160"]), ("\u11A8\u11A8", &["\u11A8\u11A8"]),
2028 ("\u11A8\u0308\u11A8", &["\u11A8\u0308", "\u11A8"]), ("\u11A8\uAC00", &["\u11A8",
2029 "\uAC00"]), ("\u11A8\u0308\uAC00", &["\u11A8\u0308", "\uAC00"]), ("\u11A8\uAC01",
2030 &["\u11A8", "\uAC01"]), ("\u11A8\u0308\uAC01", &["\u11A8\u0308", "\uAC01"]),
2031 ("\u11A8\U0001F1E6", &["\u11A8", "\U0001F1E6"]), ("\u11A8\u0308\U0001F1E6",
2032 &["\u11A8\u0308", "\U0001F1E6"]), ("\u11A8\u0378", &["\u11A8", "\u0378"]),
2033 ("\u11A8\u0308\u0378", &["\u11A8\u0308", "\u0378"]), ("\uAC00\u0020", &["\uAC00",
2034 "\u0020"]), ("\uAC00\u0308\u0020", &["\uAC00\u0308", "\u0020"]), ("\uAC00\u000D",
2035 &["\uAC00", "\u000D"]), ("\uAC00\u0308\u000D", &["\uAC00\u0308", "\u000D"]),
2036 ("\uAC00\u000A", &["\uAC00", "\u000A"]), ("\uAC00\u0308\u000A", &["\uAC00\u0308",
2037 "\u000A"]), ("\uAC00\u0001", &["\uAC00", "\u0001"]), ("\uAC00\u0308\u0001",
2038 &["\uAC00\u0308", "\u0001"]), ("\uAC00\u0300", &["\uAC00\u0300"]),
2039 ("\uAC00\u0308\u0300", &["\uAC00\u0308\u0300"]), ("\uAC00\u1100", &["\uAC00",
2040 "\u1100"]), ("\uAC00\u0308\u1100", &["\uAC00\u0308", "\u1100"]), ("\uAC00\u1160",
2041 &["\uAC00\u1160"]), ("\uAC00\u0308\u1160", &["\uAC00\u0308", "\u1160"]),
2042 ("\uAC00\u11A8", &["\uAC00\u11A8"]), ("\uAC00\u0308\u11A8", &["\uAC00\u0308",
2043 "\u11A8"]), ("\uAC00\uAC00", &["\uAC00", "\uAC00"]), ("\uAC00\u0308\uAC00",
2044 &["\uAC00\u0308", "\uAC00"]), ("\uAC00\uAC01", &["\uAC00", "\uAC01"]),
2045 ("\uAC00\u0308\uAC01", &["\uAC00\u0308", "\uAC01"]), ("\uAC00\U0001F1E6", &["\uAC00",
2046 "\U0001F1E6"]), ("\uAC00\u0308\U0001F1E6", &["\uAC00\u0308", "\U0001F1E6"]),
2047 ("\uAC00\u0378", &["\uAC00", "\u0378"]), ("\uAC00\u0308\u0378", &["\uAC00\u0308",
2048 "\u0378"]), ("\uAC01\u0020", &["\uAC01", "\u0020"]), ("\uAC01\u0308\u0020",
2049 &["\uAC01\u0308", "\u0020"]), ("\uAC01\u000D", &["\uAC01", "\u000D"]),
2050 ("\uAC01\u0308\u000D", &["\uAC01\u0308", "\u000D"]), ("\uAC01\u000A", &["\uAC01",
2051 "\u000A"]), ("\uAC01\u0308\u000A", &["\uAC01\u0308", "\u000A"]), ("\uAC01\u0001",
2052 &["\uAC01", "\u0001"]), ("\uAC01\u0308\u0001", &["\uAC01\u0308", "\u0001"]),
2053 ("\uAC01\u0300", &["\uAC01\u0300"]), ("\uAC01\u0308\u0300", &["\uAC01\u0308\u0300"]),
2054 ("\uAC01\u1100", &["\uAC01", "\u1100"]), ("\uAC01\u0308\u1100", &["\uAC01\u0308",
2055 "\u1100"]), ("\uAC01\u1160", &["\uAC01", "\u1160"]), ("\uAC01\u0308\u1160",
2056 &["\uAC01\u0308", "\u1160"]), ("\uAC01\u11A8", &["\uAC01\u11A8"]),
2057 ("\uAC01\u0308\u11A8", &["\uAC01\u0308", "\u11A8"]), ("\uAC01\uAC00", &["\uAC01",
2058 "\uAC00"]), ("\uAC01\u0308\uAC00", &["\uAC01\u0308", "\uAC00"]), ("\uAC01\uAC01",
2059 &["\uAC01", "\uAC01"]), ("\uAC01\u0308\uAC01", &["\uAC01\u0308", "\uAC01"]),
2060 ("\uAC01\U0001F1E6", &["\uAC01", "\U0001F1E6"]), ("\uAC01\u0308\U0001F1E6",
2061 &["\uAC01\u0308", "\U0001F1E6"]), ("\uAC01\u0378", &["\uAC01", "\u0378"]),
2062 ("\uAC01\u0308\u0378", &["\uAC01\u0308", "\u0378"]), ("\U0001F1E6\u0020",
2063 &["\U0001F1E6", "\u0020"]), ("\U0001F1E6\u0308\u0020", &["\U0001F1E6\u0308",
2064 "\u0020"]), ("\U0001F1E6\u000D", &["\U0001F1E6", "\u000D"]),
2065 ("\U0001F1E6\u0308\u000D", &["\U0001F1E6\u0308", "\u000D"]), ("\U0001F1E6\u000A",
2066 &["\U0001F1E6", "\u000A"]), ("\U0001F1E6\u0308\u000A", &["\U0001F1E6\u0308",
2067 "\u000A"]), ("\U0001F1E6\u0001", &["\U0001F1E6", "\u0001"]),
2068 ("\U0001F1E6\u0308\u0001", &["\U0001F1E6\u0308", "\u0001"]), ("\U0001F1E6\u0300",
2069 &["\U0001F1E6\u0300"]), ("\U0001F1E6\u0308\u0300", &["\U0001F1E6\u0308\u0300"]),
2070 ("\U0001F1E6\u1100", &["\U0001F1E6", "\u1100"]), ("\U0001F1E6\u0308\u1100",
2071 &["\U0001F1E6\u0308", "\u1100"]), ("\U0001F1E6\u1160", &["\U0001F1E6", "\u1160"]),
2072 ("\U0001F1E6\u0308\u1160", &["\U0001F1E6\u0308", "\u1160"]), ("\U0001F1E6\u11A8",
2073 &["\U0001F1E6", "\u11A8"]), ("\U0001F1E6\u0308\u11A8", &["\U0001F1E6\u0308",
2074 "\u11A8"]), ("\U0001F1E6\uAC00", &["\U0001F1E6", "\uAC00"]),
2075 ("\U0001F1E6\u0308\uAC00", &["\U0001F1E6\u0308", "\uAC00"]), ("\U0001F1E6\uAC01",
2076 &["\U0001F1E6", "\uAC01"]), ("\U0001F1E6\u0308\uAC01", &["\U0001F1E6\u0308",
2077 "\uAC01"]), ("\U0001F1E6\U0001F1E6", &["\U0001F1E6\U0001F1E6"]),
2078 ("\U0001F1E6\u0308\U0001F1E6", &["\U0001F1E6\u0308", "\U0001F1E6"]),
2079 ("\U0001F1E6\u0378", &["\U0001F1E6", "\u0378"]), ("\U0001F1E6\u0308\u0378",
2080 &["\U0001F1E6\u0308", "\u0378"]), ("\u0378\u0020", &["\u0378", "\u0020"]),
2081 ("\u0378\u0308\u0020", &["\u0378\u0308", "\u0020"]), ("\u0378\u000D", &["\u0378",
2082 "\u000D"]), ("\u0378\u0308\u000D", &["\u0378\u0308", "\u000D"]), ("\u0378\u000A",
2083 &["\u0378", "\u000A"]), ("\u0378\u0308\u000A", &["\u0378\u0308", "\u000A"]),
2084 ("\u0378\u0001", &["\u0378", "\u0001"]), ("\u0378\u0308\u0001", &["\u0378\u0308",
2085 "\u0001"]), ("\u0378\u0300", &["\u0378\u0300"]), ("\u0378\u0308\u0300",
2086 &["\u0378\u0308\u0300"]), ("\u0378\u1100", &["\u0378", "\u1100"]),
2087 ("\u0378\u0308\u1100", &["\u0378\u0308", "\u1100"]), ("\u0378\u1160", &["\u0378",
2088 "\u1160"]), ("\u0378\u0308\u1160", &["\u0378\u0308", "\u1160"]), ("\u0378\u11A8",
2089 &["\u0378", "\u11A8"]), ("\u0378\u0308\u11A8", &["\u0378\u0308", "\u11A8"]),
2090 ("\u0378\uAC00", &["\u0378", "\uAC00"]), ("\u0378\u0308\uAC00", &["\u0378\u0308",
2091 "\uAC00"]), ("\u0378\uAC01", &["\u0378", "\uAC01"]), ("\u0378\u0308\uAC01",
2092 &["\u0378\u0308", "\uAC01"]), ("\u0378\U0001F1E6", &["\u0378", "\U0001F1E6"]),
2093 ("\u0378\u0308\U0001F1E6", &["\u0378\u0308", "\U0001F1E6"]), ("\u0378\u0378",
2094 &["\u0378", "\u0378"]), ("\u0378\u0308\u0378", &["\u0378\u0308", "\u0378"]),
2095 ("\u0061\U0001F1E6\u0062", &["\u0061", "\U0001F1E6", "\u0062"]),
2096 ("\U0001F1F7\U0001F1FA", &["\U0001F1F7\U0001F1FA"]),
2097 ("\U0001F1F7\U0001F1FA\U0001F1F8", &["\U0001F1F7\U0001F1FA\U0001F1F8"]),
2098 ("\U0001F1F7\U0001F1FA\U0001F1F8\U0001F1EA",
2099 &["\U0001F1F7\U0001F1FA\U0001F1F8\U0001F1EA"]),
2100 ("\U0001F1F7\U0001F1FA\u200B\U0001F1F8\U0001F1EA", &["\U0001F1F7\U0001F1FA", "\u200B",
2101 "\U0001F1F8\U0001F1EA"]), ("\U0001F1E6\U0001F1E7\U0001F1E8",
2102 &["\U0001F1E6\U0001F1E7\U0001F1E8"]), ("\U0001F1E6\u200D\U0001F1E7\U0001F1E8",
2103 &["\U0001F1E6\u200D", "\U0001F1E7\U0001F1E8"]),
2104 ("\U0001F1E6\U0001F1E7\u200D\U0001F1E8", &["\U0001F1E6\U0001F1E7\u200D",
2105 "\U0001F1E8"]), ("\u0020\u200D\u0646", &["\u0020\u200D", "\u0646"]),
2106 ("\u0646\u200D\u0020", &["\u0646\u200D", "\u0020"]),
2109 let test_diff: [(_, &[_], &[_]), .. 23] = [
2110 ("\u0020\u0903", &["\u0020\u0903"], &["\u0020", "\u0903"]), ("\u0020\u0308\u0903",
2111 &["\u0020\u0308\u0903"], &["\u0020\u0308", "\u0903"]), ("\u000D\u0308\u0903",
2112 &["\u000D", "\u0308\u0903"], &["\u000D", "\u0308", "\u0903"]), ("\u000A\u0308\u0903",
2113 &["\u000A", "\u0308\u0903"], &["\u000A", "\u0308", "\u0903"]), ("\u0001\u0308\u0903",
2114 &["\u0001", "\u0308\u0903"], &["\u0001", "\u0308", "\u0903"]), ("\u0300\u0903",
2115 &["\u0300\u0903"], &["\u0300", "\u0903"]), ("\u0300\u0308\u0903",
2116 &["\u0300\u0308\u0903"], &["\u0300\u0308", "\u0903"]), ("\u0903\u0903",
2117 &["\u0903\u0903"], &["\u0903", "\u0903"]), ("\u0903\u0308\u0903",
2118 &["\u0903\u0308\u0903"], &["\u0903\u0308", "\u0903"]), ("\u1100\u0903",
2119 &["\u1100\u0903"], &["\u1100", "\u0903"]), ("\u1100\u0308\u0903",
2120 &["\u1100\u0308\u0903"], &["\u1100\u0308", "\u0903"]), ("\u1160\u0903",
2121 &["\u1160\u0903"], &["\u1160", "\u0903"]), ("\u1160\u0308\u0903",
2122 &["\u1160\u0308\u0903"], &["\u1160\u0308", "\u0903"]), ("\u11A8\u0903",
2123 &["\u11A8\u0903"], &["\u11A8", "\u0903"]), ("\u11A8\u0308\u0903",
2124 &["\u11A8\u0308\u0903"], &["\u11A8\u0308", "\u0903"]), ("\uAC00\u0903",
2125 &["\uAC00\u0903"], &["\uAC00", "\u0903"]), ("\uAC00\u0308\u0903",
2126 &["\uAC00\u0308\u0903"], &["\uAC00\u0308", "\u0903"]), ("\uAC01\u0903",
2127 &["\uAC01\u0903"], &["\uAC01", "\u0903"]), ("\uAC01\u0308\u0903",
2128 &["\uAC01\u0308\u0903"], &["\uAC01\u0308", "\u0903"]), ("\U0001F1E6\u0903",
2129 &["\U0001F1E6\u0903"], &["\U0001F1E6", "\u0903"]), ("\U0001F1E6\u0308\u0903",
2130 &["\U0001F1E6\u0308\u0903"], &["\U0001F1E6\u0308", "\u0903"]), ("\u0378\u0903",
2131 &["\u0378\u0903"], &["\u0378", "\u0903"]), ("\u0378\u0308\u0903",
2132 &["\u0378\u0308\u0903"], &["\u0378\u0308", "\u0903"]),
2135 for &(s, g) in test_same.iter() {
2136 // test forward iterator
2137 assert!(order::equals(s.graphemes(true), g.iter().map(|&x| x)));
2138 assert!(order::equals(s.graphemes(false), g.iter().map(|&x| x)));
2140 // test reverse iterator
2141 assert!(order::equals(s.graphemes(true).rev(), g.iter().rev().map(|&x| x)));
2142 assert!(order::equals(s.graphemes(false).rev(), g.iter().rev().map(|&x| x)));
2145 for &(s, gt, gf) in test_diff.iter() {
2146 // test forward iterator
2147 assert!(order::equals(s.graphemes(true), gt.iter().map(|&x| x)));
2148 assert!(order::equals(s.graphemes(false), gf.iter().map(|&x| x)));
2150 // test reverse iterator
2151 assert!(order::equals(s.graphemes(true).rev(), gt.iter().rev().map(|&x| x)));
2152 assert!(order::equals(s.graphemes(false).rev(), gf.iter().rev().map(|&x| x)));
2155 // test the indices iterators
2156 let s = "a̐éö̲\r\n";
2157 let gr_inds = s.grapheme_indices(true).collect::<Vec<(uint, &str)>>();
2158 let b: &[_] = &[(0u, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
2159 assert_eq!(gr_inds.as_slice(), b);
2160 let gr_inds = s.grapheme_indices(true).rev().collect::<Vec<(uint, &str)>>();
2161 let b: &[_] = &[(11, "\r\n"), (6, "ö̲"), (3, "é"), (0u, "a̐")];
2162 assert_eq!(gr_inds.as_slice(), b);
2163 let mut gr_inds = s.grapheme_indices(true);
2164 let e1 = gr_inds.size_hint();
2165 assert_eq!(e1, (1, Some(13)));
2166 let c = gr_inds.count();
2168 let e2 = gr_inds.size_hint();
2169 assert_eq!(e2, (0, Some(0)));
2171 // make sure the reverse iterator does the right thing with "\n" at beginning of string
2173 let gr = s.graphemes(true).rev().collect::<Vec<&str>>();
2174 let b: &[_] = &["\r", "\r\n", "\n"];
2175 assert_eq!(gr.as_slice(), b);
2179 fn test_split_strator() {
2180 fn t(s: &str, sep: &str, u: &[&str]) {
2181 let v: Vec<&str> = s.split_str(sep).collect();
2182 assert_eq!(v.as_slice(), u.as_slice());
2184 t("--1233345--", "12345", ["--1233345--"]);
2185 t("abc::hello::there", "::", ["abc", "hello", "there"]);
2186 t("::hello::there", "::", ["", "hello", "there"]);
2187 t("hello::there::", "::", ["hello", "there", ""]);
2188 t("::hello::there::", "::", ["", "hello", "there", ""]);
2189 t("ประเทศไทย中华Việt Nam", "中华", ["ประเทศไทย", "Việt Nam"]);
2190 t("zzXXXzzYYYzz", "zz", ["", "XXX", "YYY", ""]);
2191 t("zzXXXzYYYz", "XXX", ["zz", "zYYYz"]);
2192 t(".XXX.YYY.", ".", ["", "XXX", "YYY", ""]);
2194 t("zz", "zz", ["",""]);
2195 t("ok", "z", ["ok"]);
2196 t("zzz", "zz", ["","z"]);
2197 t("zzzzz", "zz", ["","","z"]);
2201 fn test_str_default() {
2202 use std::default::Default;
2203 fn t<S: Default + Str>() {
2204 let s: S = Default::default();
2205 assert_eq!(s.as_slice(), "");
2213 fn test_str_container() {
2214 fn sum_len<S: Collection>(v: &[S]) -> uint {
2215 v.iter().map(|x| x.len()).sum()
2218 let s = String::from_str("01234");
2219 assert_eq!(5, sum_len(["012", "", "34"]));
2220 assert_eq!(5, sum_len([String::from_str("01"), String::from_str("2"),
2221 String::from_str("34"), String::from_str("")]));
2222 assert_eq!(5, sum_len([s.as_slice()]));
2226 fn test_str_from_utf8() {
2228 assert_eq!(from_utf8(xs), Some("hello"));
2230 let xs = "ศไทย中华Việt Nam".as_bytes();
2231 assert_eq!(from_utf8(xs), Some("ศไทย中华Việt Nam"));
2233 let xs = b"hello\xFF";
2234 assert_eq!(from_utf8(xs), None);
2238 fn test_maybe_owned_traits() {
2239 let s = Slice("abcde");
2240 assert_eq!(s.len(), 5);
2241 assert_eq!(s.as_slice(), "abcde");
2242 assert_eq!(String::from_str(s.as_slice()).as_slice(), "abcde");
2243 assert_eq!(format!("{}", s).as_slice(), "abcde");
2244 assert!(s.lt(&Owned(String::from_str("bcdef"))));
2245 assert_eq!(Slice(""), Default::default());
2247 let o = Owned(String::from_str("abcde"));
2248 assert_eq!(o.len(), 5);
2249 assert_eq!(o.as_slice(), "abcde");
2250 assert_eq!(String::from_str(o.as_slice()).as_slice(), "abcde");
2251 assert_eq!(format!("{}", o).as_slice(), "abcde");
2252 assert!(o.lt(&Slice("bcdef")));
2253 assert_eq!(Owned(String::from_str("")), Default::default());
2255 assert!(s.cmp(&o) == Equal);
2256 assert!(s.equiv(&o));
2258 assert!(o.cmp(&s) == Equal);
2259 assert!(o.equiv(&s));
2263 fn test_maybe_owned_methods() {
2264 let s = Slice("abcde");
2265 assert!(s.is_slice());
2266 assert!(!s.is_owned());
2268 let o = Owned(String::from_str("abcde"));
2269 assert!(!o.is_slice());
2270 assert!(o.is_owned());
2274 fn test_maybe_owned_clone() {
2275 assert_eq!(Owned(String::from_str("abcde")), Slice("abcde").clone());
2276 assert_eq!(Owned(String::from_str("abcde")), Owned(String::from_str("abcde")).clone());
2277 assert_eq!(Slice("abcde"), Slice("abcde").clone());
2278 assert_eq!(Slice("abcde"), Owned(String::from_str("abcde")).clone());
2282 fn test_maybe_owned_into_string() {
2283 assert_eq!(Slice("abcde").into_string(), String::from_str("abcde"));
2284 assert_eq!(Owned(String::from_str("abcde")).into_string(),
2285 String::from_str("abcde"));
2289 fn test_into_maybe_owned() {
2290 assert_eq!("abcde".into_maybe_owned(), Slice("abcde"));
2291 assert_eq!((String::from_str("abcde")).into_maybe_owned(), Slice("abcde"));
2292 assert_eq!("abcde".into_maybe_owned(), Owned(String::from_str("abcde")));
2293 assert_eq!((String::from_str("abcde")).into_maybe_owned(),
2294 Owned(String::from_str("abcde")));
2301 use test::black_box;
2303 use std::iter::{Iterator, DoubleEndedIterator};
2304 use std::collections::Collection;
2307 fn char_iterator(b: &mut Bencher) {
2308 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2310 b.iter(|| s.chars().count());
2314 fn char_iterator_for(b: &mut Bencher) {
2315 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2318 for ch in s.chars() { black_box(ch) }
2323 fn char_iterator_ascii(b: &mut Bencher) {
2324 let s = "Mary had a little lamb, Little lamb
2325 Mary had a little lamb, Little lamb
2326 Mary had a little lamb, Little lamb
2327 Mary had a little lamb, Little lamb
2328 Mary had a little lamb, Little lamb
2329 Mary had a little lamb, Little lamb";
2331 b.iter(|| s.chars().count());
2335 fn char_iterator_rev(b: &mut Bencher) {
2336 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2338 b.iter(|| s.chars().rev().count());
2342 fn char_iterator_rev_for(b: &mut Bencher) {
2343 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2346 for ch in s.chars().rev() { black_box(ch) }
2351 fn char_indicesator(b: &mut Bencher) {
2352 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2353 let len = s.char_len();
2355 b.iter(|| assert_eq!(s.char_indices().count(), len));
2359 fn char_indicesator_rev(b: &mut Bencher) {
2360 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2361 let len = s.char_len();
2363 b.iter(|| assert_eq!(s.char_indices().rev().count(), len));
2367 fn split_unicode_ascii(b: &mut Bencher) {
2368 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2370 b.iter(|| assert_eq!(s.split('V').count(), 3));
2374 fn split_unicode_not_ascii(b: &mut Bencher) {
2375 struct NotAscii(char);
2376 impl CharEq for NotAscii {
2377 fn matches(&mut self, c: char) -> bool {
2378 let NotAscii(cc) = *self;
2381 fn only_ascii(&self) -> bool { false }
2383 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2385 b.iter(|| assert_eq!(s.split(NotAscii('V')).count(), 3));
2390 fn split_ascii(b: &mut Bencher) {
2391 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2392 let len = s.split(' ').count();
2394 b.iter(|| assert_eq!(s.split(' ').count(), len));
2398 fn split_not_ascii(b: &mut Bencher) {
2399 struct NotAscii(char);
2400 impl CharEq for NotAscii {
2402 fn matches(&mut self, c: char) -> bool {
2403 let NotAscii(cc) = *self;
2406 fn only_ascii(&self) -> bool { false }
2408 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2409 let len = s.split(' ').count();
2411 b.iter(|| assert_eq!(s.split(NotAscii(' ')).count(), len));
2415 fn split_extern_fn(b: &mut Bencher) {
2416 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2417 let len = s.split(' ').count();
2418 fn pred(c: char) -> bool { c == ' ' }
2420 b.iter(|| assert_eq!(s.split(pred).count(), len));
2424 fn split_closure(b: &mut Bencher) {
2425 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2426 let len = s.split(' ').count();
2428 b.iter(|| assert_eq!(s.split(|c: char| c == ' ').count(), len));
2432 fn split_slice(b: &mut Bencher) {
2433 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2434 let len = s.split(' ').count();
2436 let c: &[char] = &[' '];
2437 b.iter(|| assert_eq!(s.split(c).count(), len));
2441 fn is_utf8_100_ascii(b: &mut Bencher) {
2443 let s = b"Hello there, the quick brown fox jumped over the lazy dog! \
2444 Lorem ipsum dolor sit amet, consectetur. ";
2446 assert_eq!(100, s.len());
2453 fn is_utf8_100_multibyte(b: &mut Bencher) {
2454 let s = "𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰".as_bytes();
2455 assert_eq!(100, s.len());
2462 fn bench_connect(b: &mut Bencher) {
2463 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2465 let v = [s, s, s, s, s, s, s, s, s, s];
2467 assert_eq!(v.connect(sep).len(), s.len() * 10 + sep.len() * 9);
2472 fn bench_contains_short_short(b: &mut Bencher) {
2473 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2477 assert!(haystack.contains(needle));
2482 fn bench_contains_short_long(b: &mut Bencher) {
2484 Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \
2485 ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \
2486 eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \
2487 sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \
2488 tempus vel, gravida nec quam.
2490 In est dui, tincidunt sed tempus interdum, adipiscing laoreet ante. Etiam tempor, tellus quis \
2491 sagittis interdum, nulla purus mattis sem, quis auctor erat odio ac tellus. In nec nunc sit amet \
2492 diam volutpat molestie at sed ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan \
2493 lorem ac dignissim placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat. In vel \
2494 eleifend felis. Sed suscipit nulla lorem, sed mollis est sollicitudin et. Nam fermentum egestas \
2495 interdum. Curabitur ut nisi justo.
2497 Sed sollicitudin ipsum tellus, ut condimentum leo eleifend nec. Cras ut velit ante. Phasellus nec \
2498 mollis odio. Mauris molestie erat in arcu mattis, at aliquet dolor vehicula. Quisque malesuada \
2499 lectus sit amet nisi pretium, a condimentum ipsum porta. Morbi at dapibus diam. Praesent egestas \
2500 est sed risus elementum, eu rutrum metus ultrices. Etiam fermentum consectetur magna, id rutrum \
2501 felis accumsan a. Aliquam ut pellentesque libero. Sed mi nulla, lobortis eu tortor id, suscipit \
2502 ultricies neque. Morbi iaculis sit amet risus at iaculis. Praesent eget ligula quis turpis \
2503 feugiat suscipit vel non arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. \
2504 Aliquam sit amet placerat lorem.
2506 Cras a lacus vel ante posuere elementum. Nunc est leo, bibendum ut facilisis vel, bibendum at \
2507 mauris. Nullam adipiscing diam vel odio ornare, luctus adipiscing mi luctus. Nulla facilisi. \
2508 Mauris adipiscing bibendum neque, quis adipiscing lectus tempus et. Sed feugiat erat et nisl \
2509 lobortis pharetra. Donec vitae erat enim. Nullam sit amet felis et quam lacinia tincidunt. Aliquam \
2510 suscipit dapibus urna. Sed volutpat urna in magna pulvinar volutpat. Phasellus nec tellus ac diam \
2513 Nam lectus enim, dapibus non nisi tempor, consectetur convallis massa. Maecenas eleifend dictum \
2514 feugiat. Etiam quis mauris vel risus luctus mattis a a nunc. Nullam orci quam, imperdiet id \
2515 vehicula in, porttitor ut nibh. Duis sagittis adipiscing nisl vitae congue. Donec mollis risus eu \
2516 leo suscipit, varius porttitor nulla porta. Pellentesque ut sem nec nisi euismod vehicula. Nulla \
2517 malesuada sollicitudin quam eu fermentum.";
2518 let needle = "english";
2521 assert!(!haystack.contains(needle));
2526 fn bench_contains_bad_naive(b: &mut Bencher) {
2527 let haystack = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
2528 let needle = "aaaaaaaab";
2531 assert!(!haystack.contains(needle));
2536 fn bench_contains_equal(b: &mut Bencher) {
2537 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2538 let needle = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2541 assert!(haystack.contains(needle));