1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 // ignore-lexer-test FIXME #15679
13 //! Unicode string manipulation (`str` type)
17 //! Rust's string type is one of the core primitive types of the language. While
18 //! represented by the name `str`, the name `str` is not actually a valid type in
19 //! Rust. Each string must also be decorated with a pointer. `String` is used
20 //! for an owned string, so there is only one commonly-used `str` type in Rust:
23 //! `&str` is the borrowed string type. This type of string can only be created
24 //! from other strings, unless it is a static string (see below). As the word
25 //! "borrowed" implies, this type of string is owned elsewhere, and this string
26 //! cannot be moved out of.
28 //! As an example, here's some code that uses a string.
32 //! let borrowed_string = "This string is borrowed with the 'static lifetime";
36 //! From the example above, you can guess that Rust's string literals have the
37 //! `'static` lifetime. This is akin to C's concept of a static string.
38 //! More precisely, string literals are immutable views with a 'static lifetime
39 //! (otherwise known as the lifetime of the entire program), and thus have the
40 //! type `&'static str`.
44 //! Rust's string type, `str`, is a sequence of Unicode scalar values encoded as a
45 //! stream of UTF-8 bytes. All [strings](../../reference.html#literals) are
46 //! guaranteed to be validly encoded UTF-8 sequences. Additionally, strings are
47 //! not null-terminated and can thus contain null bytes.
49 //! The actual representation of strings have direct mappings to slices: `&str`
50 //! is the same as `&[u8]`.
52 #![doc(primitive = "str")]
54 pub use self::MaybeOwned::*;
55 use self::RecompositionState::*;
56 use self::DecompositionType::*;
57 use core::borrow::{BorrowFrom, Cow, ToOwned};
58 use core::default::Default;
61 use core::iter::AdditiveIterator;
62 use core::kinds::Sized;
63 use core::prelude::{Char, Clone, Eq, Equiv};
64 use core::prelude::{Iterator, IteratorExt, SlicePrelude, None, Option, Ord, Ordering};
65 use core::prelude::{PartialEq, PartialOrd, Result, AsSlice, Some, Tuple2};
66 use core::prelude::{range};
69 use ring_buf::RingBuf;
74 pub use core::str::{from_utf8, CharEq, Chars, CharOffsets};
75 pub use core::str::{Bytes, CharSplits};
76 pub use core::str::{CharSplitsN, AnyLines, MatchIndices, StrSplits};
77 pub use core::str::{Utf16Encoder, Utf16CodeUnits};
78 pub use core::str::{eq_slice, is_utf8, is_utf16, Utf16Items};
79 pub use core::str::{Utf16Item, ScalarValue, LoneSurrogate, utf16_items};
80 pub use core::str::{truncate_utf16_at_nul, utf8_char_width, CharRange};
81 pub use core::str::{FromStr, from_str};
82 pub use core::str::{Str, StrPrelude};
83 pub use core::str::{from_utf8_unchecked, from_c_str};
84 pub use unicode::str::{UnicodeStrPrelude, Words, Graphemes, GraphemeIndices};
86 // FIXME(conventions): ensure bit/char conventions are followed by str's API
89 Section: Creating a string
92 /// Methods for vectors of strings.
93 pub trait StrVector for Sized? {
94 /// Concatenates a vector of strings.
99 /// let first = "Restaurant at the End of the".to_string();
100 /// let second = " Universe".to_string();
101 /// let string_vec = vec![first, second];
102 /// assert_eq!(string_vec.concat(), "Restaurant at the End of the Universe".to_string());
104 fn concat(&self) -> String;
106 /// Concatenates a vector of strings, placing a given separator between each.
111 /// let first = "Roast".to_string();
112 /// let second = "Sirloin Steak".to_string();
113 /// let string_vec = vec![first, second];
114 /// assert_eq!(string_vec.connect(", "), "Roast, Sirloin Steak".to_string());
116 fn connect(&self, sep: &str) -> String;
119 impl<S: Str> StrVector for [S] {
120 fn concat(&self) -> String {
122 return String::new();
125 // `len` calculation may overflow but push_str will check boundaries
126 let len = self.iter().map(|s| s.as_slice().len()).sum();
128 let mut result = String::with_capacity(len);
130 for s in self.iter() {
131 result.push_str(s.as_slice())
137 fn connect(&self, sep: &str) -> String {
139 return String::new();
144 return self.concat();
147 // this is wrong without the guarantee that `self` is non-empty
148 // `len` calculation may overflow but push_str but will check boundaries
149 let len = sep.len() * (self.len() - 1)
150 + self.iter().map(|s| s.as_slice().len()).sum();
151 let mut result = String::with_capacity(len);
152 let mut first = true;
154 for s in self.iter() {
158 result.push_str(sep);
160 result.push_str(s.as_slice());
166 impl<S: Str, T: AsSlice<S>> StrVector for T {
168 fn concat(&self) -> String {
169 self.as_slice().concat()
173 fn connect(&self, sep: &str) -> String {
174 self.as_slice().connect(sep)
182 // Helper functions used for Unicode normalization
183 fn canonical_sort(comb: &mut [(char, u8)]) {
184 let len = comb.len();
185 for i in range(0, len) {
186 let mut swapped = false;
187 for j in range(1, len-i) {
188 let class_a = *comb[j-1].ref1();
189 let class_b = *comb[j].ref1();
190 if class_a != 0 && class_b != 0 && class_a > class_b {
195 if !swapped { break; }
200 enum DecompositionType {
205 /// External iterator for a string's decomposition's characters.
206 /// Use with the `std::iter` module.
208 pub struct Decompositions<'a> {
209 kind: DecompositionType,
211 buffer: Vec<(char, u8)>,
215 impl<'a> Iterator<char> for Decompositions<'a> {
217 fn next(&mut self) -> Option<char> {
218 match self.buffer.as_slice().head() {
221 self.buffer.remove(0);
224 Some(&(c, _)) if self.sorted => {
225 self.buffer.remove(0);
228 _ => self.sorted = false
231 let decomposer = match self.kind {
232 Canonical => unicode::char::decompose_canonical,
233 Compatible => unicode::char::decompose_compatible
237 for ch in self.iter {
238 let buffer = &mut self.buffer;
239 let sorted = &mut self.sorted;
241 let class = unicode::char::canonical_combining_class(d);
242 if class == 0 && !*sorted {
243 canonical_sort(buffer.as_mut_slice());
246 buffer.push((d, class));
253 canonical_sort(self.buffer.as_mut_slice());
257 match self.buffer.remove(0) {
262 Some((c, _)) => Some(c),
267 fn size_hint(&self) -> (uint, Option<uint>) {
268 let (lower, _) = self.iter.size_hint();
274 enum RecompositionState {
280 /// External iterator for a string's recomposition's characters.
281 /// Use with the `std::iter` module.
283 pub struct Recompositions<'a> {
284 iter: Decompositions<'a>,
285 state: RecompositionState,
286 buffer: RingBuf<char>,
287 composee: Option<char>,
291 impl<'a> Iterator<char> for Recompositions<'a> {
293 fn next(&mut self) -> Option<char> {
297 for ch in self.iter {
298 let ch_class = unicode::char::canonical_combining_class(ch);
299 if self.composee.is_none() {
303 self.composee = Some(ch);
306 let k = self.composee.clone().unwrap();
308 match self.last_ccc {
310 match unicode::char::compose(k, ch) {
312 self.composee = Some(r);
317 self.composee = Some(ch);
320 self.buffer.push_back(ch);
321 self.last_ccc = Some(ch_class);
326 if l_class >= ch_class {
327 // `ch` is blocked from `composee`
329 self.composee = Some(ch);
330 self.last_ccc = None;
331 self.state = Purging;
334 self.buffer.push_back(ch);
335 self.last_ccc = Some(ch_class);
338 match unicode::char::compose(k, ch) {
340 self.composee = Some(r);
344 self.buffer.push_back(ch);
345 self.last_ccc = Some(ch_class);
351 self.state = Finished;
352 if self.composee.is_some() {
353 return self.composee.take();
357 match self.buffer.pop_front() {
358 None => self.state = Composing,
363 match self.buffer.pop_front() {
364 None => return self.composee.take(),
373 /// Replaces all occurrences of one string with another.
377 /// * s - The string containing substrings to replace
378 /// * from - The string to replace
379 /// * to - The replacement string
383 /// The original string with all occurrences of `from` replaced with `to`.
389 /// let string = "orange";
390 /// let new_string = str::replace(string, "or", "str");
391 /// assert_eq!(new_string.as_slice(), "strange");
393 pub fn replace(s: &str, from: &str, to: &str) -> String {
394 let mut result = String::new();
395 let mut last_end = 0;
396 for (start, end) in s.match_indices(from) {
397 result.push_str(unsafe { s.slice_unchecked(last_end, start) });
401 result.push_str(unsafe { s.slice_unchecked(last_end, s.len()) });
409 // Return the initial codepoint accumulator for the first byte.
410 // The first byte is special, only want bottom 5 bits for width 2, 4 bits
411 // for width 3, and 3 bits for width 4
412 macro_rules! utf8_first_byte(
413 ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as u32)
416 // return the value of $ch updated with continuation byte $byte
417 macro_rules! utf8_acc_cont_byte(
418 ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as u32)
425 /// A string type that can hold either a `String` or a `&str`.
426 /// This can be useful as an optimization when an allocation is sometimes
427 /// needed but not always.
428 #[deprecated = "use std::str::CowString"]
429 pub enum MaybeOwned<'a> {
430 /// A borrowed string.
436 /// A specialization of `CowString` to be sendable.
437 pub type SendStr = CowString<'static>;
439 #[deprecated = "use std::str::CowString"]
440 impl<'a> MaybeOwned<'a> {
441 /// Returns `true` if this `MaybeOwned` wraps an owned string.
446 /// let string = String::from_str("orange");
447 /// let maybe_owned_string = string.into_maybe_owned();
448 /// assert_eq!(true, maybe_owned_string.is_owned());
451 pub fn is_owned(&self) -> bool {
458 /// Returns `true` if this `MaybeOwned` wraps a borrowed string.
463 /// let string = "orange";
464 /// let maybe_owned_string = string.as_slice().into_maybe_owned();
465 /// assert_eq!(true, maybe_owned_string.is_slice());
468 pub fn is_slice(&self) -> bool {
475 /// Return the number of bytes in this string.
477 pub fn len(&self) -> uint { self.as_slice().len() }
479 /// Returns true if the string contains no bytes
482 pub fn is_empty(&self) -> bool { self.len() == 0 }
485 #[deprecated = "use std::borrow::IntoCow"]
486 /// Trait for moving into a `MaybeOwned`.
487 pub trait IntoMaybeOwned<'a> {
488 /// Moves `self` into a `MaybeOwned`.
489 fn into_maybe_owned(self) -> MaybeOwned<'a>;
492 #[deprecated = "use std::borrow::IntoCow"]
494 impl<'a> IntoMaybeOwned<'a> for String {
498 /// let owned_string = String::from_str("orange");
499 /// let maybe_owned_string = owned_string.into_maybe_owned();
500 /// assert_eq!(true, maybe_owned_string.is_owned());
504 fn into_maybe_owned(self) -> MaybeOwned<'a> {
509 #[deprecated = "use std::borrow::IntoCow"]
511 impl<'a> IntoMaybeOwned<'a> for &'a str {
515 /// let string = "orange";
516 /// let maybe_owned_str = string.as_slice().into_maybe_owned();
517 /// assert_eq!(false, maybe_owned_str.is_owned());
521 fn into_maybe_owned(self) -> MaybeOwned<'a> { Slice(self) }
525 #[deprecated = "use std::borrow::IntoCow"]
526 impl<'a> IntoMaybeOwned<'a> for MaybeOwned<'a> {
530 /// let str = "orange";
531 /// let maybe_owned_str = str.as_slice().into_maybe_owned();
532 /// let maybe_maybe_owned_str = maybe_owned_str.into_maybe_owned();
533 /// assert_eq!(false, maybe_maybe_owned_str.is_owned());
536 fn into_maybe_owned(self) -> MaybeOwned<'a> { self }
539 #[deprecated = "use std::str::CowString"]
540 impl<'a> PartialEq for MaybeOwned<'a> {
542 fn eq(&self, other: &MaybeOwned) -> bool {
543 self.as_slice() == other.as_slice()
547 #[deprecated = "use std::str::CowString"]
548 impl<'a> Eq for MaybeOwned<'a> {}
550 #[deprecated = "use std::str::CowString"]
551 impl<'a> PartialOrd for MaybeOwned<'a> {
553 fn partial_cmp(&self, other: &MaybeOwned) -> Option<Ordering> {
554 Some(self.cmp(other))
558 #[deprecated = "use std::str::CowString"]
559 impl<'a> Ord for MaybeOwned<'a> {
561 fn cmp(&self, other: &MaybeOwned) -> Ordering {
562 self.as_slice().cmp(other.as_slice())
567 #[deprecated = "use std::str::CowString"]
568 impl<'a, S: Str> Equiv<S> for MaybeOwned<'a> {
570 fn equiv(&self, other: &S) -> bool {
571 self.as_slice() == other.as_slice()
575 #[deprecated = "use std::str::CowString"]
576 impl<'a> Str for MaybeOwned<'a> {
579 fn as_slice<'b>(&'b self) -> &'b str {
582 Owned(ref s) => s.as_slice()
587 #[deprecated = "use std::str::CowString"]
588 impl<'a> StrAllocating for MaybeOwned<'a> {
591 fn into_string(self) -> String {
593 Slice(s) => String::from_str(s),
599 #[deprecated = "use std::str::CowString"]
600 impl<'a> Clone for MaybeOwned<'a> {
603 fn clone(&self) -> MaybeOwned<'a> {
605 Slice(s) => Slice(s),
606 Owned(ref s) => Owned(String::from_str(s.as_slice()))
611 #[deprecated = "use std::str::CowString"]
612 impl<'a> Default for MaybeOwned<'a> {
615 fn default() -> MaybeOwned<'a> { Slice("") }
618 #[deprecated = "use std::str::CowString"]
619 impl<'a, H: hash::Writer> hash::Hash<H> for MaybeOwned<'a> {
621 fn hash(&self, hasher: &mut H) {
622 self.as_slice().hash(hasher)
626 #[deprecated = "use std::str::CowString"]
627 impl<'a> fmt::Show for MaybeOwned<'a> {
629 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
631 Slice(ref s) => s.fmt(f),
632 Owned(ref s) => s.fmt(f)
637 #[unstable = "trait is unstable"]
638 impl BorrowFrom<String> for str {
639 fn borrow_from(owned: &String) -> &str { owned[] }
642 #[unstable = "trait is unstable"]
643 impl ToOwned<String> for str {
644 fn to_owned(&self) -> String { self.into_string() }
647 /// Unsafe string operations.
649 pub use core::str::raw::{from_utf8, c_str_to_static_slice, slice_bytes};
650 pub use core::str::raw::{slice_unchecked};
657 /// A clone-on-write string
658 pub type CowString<'a> = Cow<'a, String, str>;
660 impl<'a> Str for CowString<'a> {
662 fn as_slice<'b>(&'b self) -> &'b str {
668 Section: Trait implementations
671 /// Any string that can be represented as a slice.
672 pub trait StrAllocating: Str {
673 /// Converts `self` into a `String`, not making a copy if possible.
674 fn into_string(self) -> String;
676 /// Escapes each char in `s` with `char::escape_default`.
677 fn escape_default(&self) -> String {
678 let me = self.as_slice();
679 let mut out = String::with_capacity(me.len());
680 for c in me.chars() {
681 for c in c.escape_default() {
688 /// Escapes each char in `s` with `char::escape_unicode`.
689 fn escape_unicode(&self) -> String {
690 let me = self.as_slice();
691 let mut out = String::with_capacity(me.len());
692 for c in me.chars() {
693 for c in c.escape_unicode() {
700 /// Replaces all occurrences of one string with another.
704 /// * `from` - The string to replace
705 /// * `to` - The replacement string
709 /// The original string with all occurrences of `from` replaced with `to`.
714 /// let s = "Do you know the muffin man,
715 /// The muffin man, the muffin man, ...".to_string();
717 /// assert_eq!(s.replace("muffin man", "little lamb"),
718 /// "Do you know the little lamb,
719 /// The little lamb, the little lamb, ...".to_string());
721 /// // not found, so no change.
722 /// assert_eq!(s.replace("cookie monster", "little lamb"), s);
724 fn replace(&self, from: &str, to: &str) -> String {
725 replace(self.as_slice(), from, to)
728 /// Given a string, makes a new string with repeated copies of it.
729 fn repeat(&self, nn: uint) -> String {
730 let me = self.as_slice();
731 let mut ret = String::with_capacity(nn * me.len());
732 for _ in range(0, nn) {
738 /// Returns the Levenshtein Distance between two strings.
739 fn lev_distance(&self, t: &str) -> uint {
740 let me = self.as_slice();
741 if me.is_empty() { return t.char_len(); }
742 if t.is_empty() { return me.char_len(); }
744 let mut dcol = Vec::from_fn(t.len() + 1, |x| x);
747 for (i, sc) in me.chars().enumerate() {
750 dcol[0] = current + 1;
752 for (j, tc) in t.chars().enumerate() {
754 let next = dcol[j + 1];
757 dcol[j + 1] = current;
759 dcol[j + 1] = cmp::min(current, next);
760 dcol[j + 1] = cmp::min(dcol[j + 1], dcol[j]) + 1;
771 /// Returns an iterator over the string in Unicode Normalization Form D
772 /// (canonical decomposition).
774 fn nfd_chars<'a>(&'a self) -> Decompositions<'a> {
776 iter: self.as_slice().chars(),
783 /// Returns an iterator over the string in Unicode Normalization Form KD
784 /// (compatibility decomposition).
786 fn nfkd_chars<'a>(&'a self) -> Decompositions<'a> {
788 iter: self.as_slice().chars(),
795 /// An Iterator over the string in Unicode Normalization Form C
796 /// (canonical decomposition followed by canonical composition).
798 fn nfc_chars<'a>(&'a self) -> Recompositions<'a> {
800 iter: self.nfd_chars(),
802 buffer: RingBuf::new(),
808 /// An Iterator over the string in Unicode Normalization Form KC
809 /// (compatibility decomposition followed by canonical composition).
811 fn nfkc_chars<'a>(&'a self) -> Recompositions<'a> {
813 iter: self.nfkd_chars(),
815 buffer: RingBuf::new(),
822 impl<'a> StrAllocating for &'a str {
824 fn into_string(self) -> String {
825 String::from_str(self)
831 use std::iter::AdditiveIterator;
832 use std::iter::range;
833 use std::default::Default;
835 use std::clone::Clone;
836 use std::cmp::{Ord, PartialOrd, Equiv};
837 use std::cmp::Ordering::{Equal, Greater, Less};
838 use std::option::Option;
839 use std::option::Option::{Some, None};
840 use std::ptr::RawPtr;
841 use std::iter::{Iterator, IteratorExt, DoubleEndedIteratorExt};
844 use std::slice::{AsSlice, SlicePrelude};
847 use slice::CloneSliceAllocPrelude;
849 use unicode::char::UnicodeChar;
853 assert!((eq_slice("foobar".slice(0, 3), "foo")));
854 assert!((eq_slice("barfoo".slice(3, 6), "foo")));
855 assert!((!eq_slice("foo1", "foo2")));
861 assert!("" <= "foo");
862 assert!("foo" <= "foo");
863 assert!("foo" != "bar");
868 assert_eq!("".len(), 0u);
869 assert_eq!("hello world".len(), 11u);
870 assert_eq!("\x63".len(), 1u);
871 assert_eq!("\u00a2".len(), 2u);
872 assert_eq!("\u03c0".len(), 2u);
873 assert_eq!("\u2620".len(), 3u);
874 assert_eq!("\U0001d11e".len(), 4u);
876 assert_eq!("".char_len(), 0u);
877 assert_eq!("hello world".char_len(), 11u);
878 assert_eq!("\x63".char_len(), 1u);
879 assert_eq!("\u00a2".char_len(), 1u);
880 assert_eq!("\u03c0".char_len(), 1u);
881 assert_eq!("\u2620".char_len(), 1u);
882 assert_eq!("\U0001d11e".char_len(), 1u);
883 assert_eq!("ประเทศไทย中华Việt Nam".char_len(), 19u);
885 assert_eq!("hello".width(false), 10u);
886 assert_eq!("hello".width(true), 10u);
887 assert_eq!("\0\0\0\0\0".width(false), 0u);
888 assert_eq!("\0\0\0\0\0".width(true), 0u);
889 assert_eq!("".width(false), 0u);
890 assert_eq!("".width(true), 0u);
891 assert_eq!("\u2081\u2082\u2083\u2084".width(false), 4u);
892 assert_eq!("\u2081\u2082\u2083\u2084".width(true), 8u);
897 assert_eq!("hello".find('l'), Some(2u));
898 assert_eq!("hello".find(|c:char| c == 'o'), Some(4u));
899 assert!("hello".find('x').is_none());
900 assert!("hello".find(|c:char| c == 'x').is_none());
901 assert_eq!("ประเทศไทย中华Việt Nam".find('华'), Some(30u));
902 assert_eq!("ประเทศไทย中华Việt Nam".find(|c: char| c == '华'), Some(30u));
907 assert_eq!("hello".rfind('l'), Some(3u));
908 assert_eq!("hello".rfind(|c:char| c == 'o'), Some(4u));
909 assert!("hello".rfind('x').is_none());
910 assert!("hello".rfind(|c:char| c == 'x').is_none());
911 assert_eq!("ประเทศไทย中华Việt Nam".rfind('华'), Some(30u));
912 assert_eq!("ประเทศไทย中华Việt Nam".rfind(|c: char| c == '华'), Some(30u));
917 let empty = String::from_str("");
918 let s: String = empty.as_slice().chars().collect();
919 assert_eq!(empty, s);
920 let data = String::from_str("ประเทศไทย中");
921 let s: String = data.as_slice().chars().collect();
926 fn test_into_bytes() {
927 let data = String::from_str("asdf");
928 let buf = data.into_bytes();
929 assert_eq!(b"asdf", buf.as_slice());
935 assert_eq!("".find_str(""), Some(0u));
936 assert!("banana".find_str("apple pie").is_none());
939 assert_eq!(data.slice(0u, 6u).find_str("ab"), Some(0u));
940 assert_eq!(data.slice(2u, 6u).find_str("ab"), Some(3u - 2u));
941 assert!(data.slice(2u, 4u).find_str("ab").is_none());
943 let string = "ประเทศไทย中华Việt Nam";
944 let mut data = String::from_str(string);
945 data.push_str(string);
946 assert!(data.as_slice().find_str("ไท华").is_none());
947 assert_eq!(data.as_slice().slice(0u, 43u).find_str(""), Some(0u));
948 assert_eq!(data.as_slice().slice(6u, 43u).find_str(""), Some(6u - 6u));
950 assert_eq!(data.as_slice().slice(0u, 43u).find_str("ประ"), Some( 0u));
951 assert_eq!(data.as_slice().slice(0u, 43u).find_str("ทศไ"), Some(12u));
952 assert_eq!(data.as_slice().slice(0u, 43u).find_str("ย中"), Some(24u));
953 assert_eq!(data.as_slice().slice(0u, 43u).find_str("iệt"), Some(34u));
954 assert_eq!(data.as_slice().slice(0u, 43u).find_str("Nam"), Some(40u));
956 assert_eq!(data.as_slice().slice(43u, 86u).find_str("ประ"), Some(43u - 43u));
957 assert_eq!(data.as_slice().slice(43u, 86u).find_str("ทศไ"), Some(55u - 43u));
958 assert_eq!(data.as_slice().slice(43u, 86u).find_str("ย中"), Some(67u - 43u));
959 assert_eq!(data.as_slice().slice(43u, 86u).find_str("iệt"), Some(77u - 43u));
960 assert_eq!(data.as_slice().slice(43u, 86u).find_str("Nam"), Some(83u - 43u));
964 fn test_slice_chars() {
965 fn t(a: &str, b: &str, start: uint) {
966 assert_eq!(a.slice_chars(start, start + b.char_len()), b);
969 t("hello", "llo", 2);
973 assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".slice_chars(2, 8));
980 impl AsSlice<String> for S {
981 fn as_slice<'a> (&'a self) -> &'a [String] {
986 fn s(x: &str) -> String { x.into_string() }
988 macro_rules! test_concat {
989 ($expected: expr, $string: expr) => {
991 let s = $string.concat();
992 assert_eq!($expected, s.as_slice());
998 fn test_concat_for_different_types() {
999 test_concat!("ab", ["a", "b"]);
1000 test_concat!("ab", [s("a"), s("b")]);
1001 test_concat!("ab", vec!["a", "b"]);
1002 test_concat!("ab", vec!["a", "b"].as_slice());
1003 test_concat!("ab", vec![s("a"), s("b")]);
1005 let mut v0 = ["a", "b"];
1006 let mut v1 = [s("a"), s("b")];
1008 use std::c_vec::CVec;
1010 test_concat!("ab", CVec::new(v0.as_mut_ptr(), v0.len()));
1011 test_concat!("ab", CVec::new(v1.as_mut_ptr(), v1.len()));
1014 test_concat!("ab", S { x: [s("a"), s("b")] });
1018 fn test_concat_for_different_lengths() {
1019 let empty: &[&str] = &[];
1020 test_concat!("", empty);
1021 test_concat!("a", ["a"]);
1022 test_concat!("ab", ["a", "b"]);
1023 test_concat!("abc", ["", "a", "bc"]);
1026 macro_rules! test_connect {
1027 ($expected: expr, $string: expr, $delim: expr) => {
1029 let s = $string.connect($delim);
1030 assert_eq!($expected, s.as_slice());
1036 fn test_connect_for_different_types() {
1037 test_connect!("a-b", ["a", "b"], "-");
1038 let hyphen = "-".into_string();
1039 test_connect!("a-b", [s("a"), s("b")], hyphen.as_slice());
1040 test_connect!("a-b", vec!["a", "b"], hyphen.as_slice());
1041 test_connect!("a-b", vec!["a", "b"].as_slice(), "-");
1042 test_connect!("a-b", vec![s("a"), s("b")], "-");
1044 let mut v0 = ["a", "b"];
1045 let mut v1 = [s("a"), s("b")];
1047 use std::c_vec::CVec;
1049 test_connect!("a-b", CVec::new(v0.as_mut_ptr(), v0.len()), "-");
1050 test_connect!("a-b", CVec::new(v1.as_mut_ptr(), v1.len()), hyphen.as_slice());
1053 test_connect!("a-b", S { x: [s("a"), s("b")] }, "-");
1057 fn test_connect_for_different_lengths() {
1058 let empty: &[&str] = &[];
1059 test_connect!("", empty, "-");
1060 test_connect!("a", ["a"], "-");
1061 test_connect!("a-b", ["a", "b"], "-");
1062 test_connect!("-a-bc", ["", "a", "bc"], "-");
1067 assert_eq!("x".repeat(4), String::from_str("xxxx"));
1068 assert_eq!("hi".repeat(4), String::from_str("hihihihi"));
1069 assert_eq!("ไท华".repeat(3), String::from_str("ไท华ไท华ไท华"));
1070 assert_eq!("".repeat(4), String::from_str(""));
1071 assert_eq!("hi".repeat(0), String::from_str(""));
1075 fn test_unsafe_slice() {
1076 assert_eq!("ab", unsafe {raw::slice_bytes("abc", 0, 2)});
1077 assert_eq!("bc", unsafe {raw::slice_bytes("abc", 1, 3)});
1078 assert_eq!("", unsafe {raw::slice_bytes("abc", 1, 1)});
1079 fn a_million_letter_a() -> String {
1081 let mut rs = String::new();
1083 rs.push_str("aaaaaaaaaa");
1088 fn half_a_million_letter_a() -> String {
1090 let mut rs = String::new();
1092 rs.push_str("aaaaa");
1097 let letters = a_million_letter_a();
1098 assert!(half_a_million_letter_a() ==
1099 unsafe {String::from_str(raw::slice_bytes(letters.as_slice(),
1105 fn test_starts_with() {
1106 assert!(("".starts_with("")));
1107 assert!(("abc".starts_with("")));
1108 assert!(("abc".starts_with("a")));
1109 assert!((!"a".starts_with("abc")));
1110 assert!((!"".starts_with("abc")));
1111 assert!((!"ödd".starts_with("-")));
1112 assert!(("ödd".starts_with("öd")));
1116 fn test_ends_with() {
1117 assert!(("".ends_with("")));
1118 assert!(("abc".ends_with("")));
1119 assert!(("abc".ends_with("c")));
1120 assert!((!"a".ends_with("abc")));
1121 assert!((!"".ends_with("abc")));
1122 assert!((!"ddö".ends_with("-")));
1123 assert!(("ddö".ends_with("dö")));
1127 fn test_is_empty() {
1128 assert!("".is_empty());
1129 assert!(!"a".is_empty());
1135 assert_eq!("".replace(a, "b"), String::from_str(""));
1136 assert_eq!("a".replace(a, "b"), String::from_str("b"));
1137 assert_eq!("ab".replace(a, "b"), String::from_str("bb"));
1139 assert!(" test test ".replace(test, "toast") ==
1140 String::from_str(" toast toast "));
1141 assert_eq!(" test test ".replace(test, ""), String::from_str(" "));
1145 fn test_replace_2a() {
1146 let data = "ประเทศไทย中华";
1147 let repl = "دولة الكويت";
1150 let a2 = "دولة الكويتทศไทย中华";
1151 assert_eq!(data.replace(a, repl).as_slice(), a2);
1155 fn test_replace_2b() {
1156 let data = "ประเทศไทย中华";
1157 let repl = "دولة الكويت";
1160 let b2 = "ปรدولة الكويتทศไทย中华";
1161 assert_eq!(data.replace(b, repl).as_slice(), b2);
1165 fn test_replace_2c() {
1166 let data = "ประเทศไทย中华";
1167 let repl = "دولة الكويت";
1170 let c2 = "ประเทศไทยدولة الكويت";
1171 assert_eq!(data.replace(c, repl).as_slice(), c2);
1175 fn test_replace_2d() {
1176 let data = "ประเทศไทย中华";
1177 let repl = "دولة الكويت";
1180 assert_eq!(data.replace(d, repl).as_slice(), data);
1185 assert_eq!("ab", "abc".slice(0, 2));
1186 assert_eq!("bc", "abc".slice(1, 3));
1187 assert_eq!("", "abc".slice(1, 1));
1188 assert_eq!("\u65e5", "\u65e5\u672c".slice(0, 3));
1190 let data = "ประเทศไทย中华";
1191 assert_eq!("ป", data.slice(0, 3));
1192 assert_eq!("ร", data.slice(3, 6));
1193 assert_eq!("", data.slice(3, 3));
1194 assert_eq!("华", data.slice(30, 33));
1196 fn a_million_letter_x() -> String {
1198 let mut rs = String::new();
1200 rs.push_str("华华华华华华华华华华");
1205 fn half_a_million_letter_x() -> String {
1207 let mut rs = String::new();
1209 rs.push_str("华华华华华");
1214 let letters = a_million_letter_x();
1215 assert!(half_a_million_letter_x() ==
1216 String::from_str(letters.as_slice().slice(0u, 3u * 500000u)));
1221 let ss = "中华Việt Nam";
1223 assert_eq!("华", ss.slice(3u, 6u));
1224 assert_eq!("Việt Nam", ss.slice(6u, 16u));
1226 assert_eq!("ab", "abc".slice(0u, 2u));
1227 assert_eq!("bc", "abc".slice(1u, 3u));
1228 assert_eq!("", "abc".slice(1u, 1u));
1230 assert_eq!("中", ss.slice(0u, 3u));
1231 assert_eq!("华V", ss.slice(3u, 7u));
1232 assert_eq!("", ss.slice(3u, 3u));
1247 fn test_slice_fail() {
1248 "中华Việt Nam".slice(0u, 2u);
1252 fn test_slice_from() {
1253 assert_eq!("abcd".slice_from(0), "abcd");
1254 assert_eq!("abcd".slice_from(2), "cd");
1255 assert_eq!("abcd".slice_from(4), "");
1258 fn test_slice_to() {
1259 assert_eq!("abcd".slice_to(0), "");
1260 assert_eq!("abcd".slice_to(2), "ab");
1261 assert_eq!("abcd".slice_to(4), "abcd");
1265 fn test_trim_left_chars() {
1266 let v: &[char] = &[];
1267 assert_eq!(" *** foo *** ".trim_left_chars(v), " *** foo *** ");
1268 let chars: &[char] = &['*', ' '];
1269 assert_eq!(" *** foo *** ".trim_left_chars(chars), "foo *** ");
1270 assert_eq!(" *** *** ".trim_left_chars(chars), "");
1271 assert_eq!("foo *** ".trim_left_chars(chars), "foo *** ");
1273 assert_eq!("11foo1bar11".trim_left_chars('1'), "foo1bar11");
1274 let chars: &[char] = &['1', '2'];
1275 assert_eq!("12foo1bar12".trim_left_chars(chars), "foo1bar12");
1276 assert_eq!("123foo1bar123".trim_left_chars(|c: char| c.is_numeric()), "foo1bar123");
1280 fn test_trim_right_chars() {
1281 let v: &[char] = &[];
1282 assert_eq!(" *** foo *** ".trim_right_chars(v), " *** foo *** ");
1283 let chars: &[char] = &['*', ' '];
1284 assert_eq!(" *** foo *** ".trim_right_chars(chars), " *** foo");
1285 assert_eq!(" *** *** ".trim_right_chars(chars), "");
1286 assert_eq!(" *** foo".trim_right_chars(chars), " *** foo");
1288 assert_eq!("11foo1bar11".trim_right_chars('1'), "11foo1bar");
1289 let chars: &[char] = &['1', '2'];
1290 assert_eq!("12foo1bar12".trim_right_chars(chars), "12foo1bar");
1291 assert_eq!("123foo1bar123".trim_right_chars(|c: char| c.is_numeric()), "123foo1bar");
1295 fn test_trim_chars() {
1296 let v: &[char] = &[];
1297 assert_eq!(" *** foo *** ".trim_chars(v), " *** foo *** ");
1298 let chars: &[char] = &['*', ' '];
1299 assert_eq!(" *** foo *** ".trim_chars(chars), "foo");
1300 assert_eq!(" *** *** ".trim_chars(chars), "");
1301 assert_eq!("foo".trim_chars(chars), "foo");
1303 assert_eq!("11foo1bar11".trim_chars('1'), "foo1bar");
1304 let chars: &[char] = &['1', '2'];
1305 assert_eq!("12foo1bar12".trim_chars(chars), "foo1bar");
1306 assert_eq!("123foo1bar123".trim_chars(|c: char| c.is_numeric()), "foo1bar");
1310 fn test_trim_left() {
1311 assert_eq!("".trim_left(), "");
1312 assert_eq!("a".trim_left(), "a");
1313 assert_eq!(" ".trim_left(), "");
1314 assert_eq!(" blah".trim_left(), "blah");
1315 assert_eq!(" \u3000 wut".trim_left(), "wut");
1316 assert_eq!("hey ".trim_left(), "hey ");
1320 fn test_trim_right() {
1321 assert_eq!("".trim_right(), "");
1322 assert_eq!("a".trim_right(), "a");
1323 assert_eq!(" ".trim_right(), "");
1324 assert_eq!("blah ".trim_right(), "blah");
1325 assert_eq!("wut \u3000 ".trim_right(), "wut");
1326 assert_eq!(" hey".trim_right(), " hey");
1331 assert_eq!("".trim(), "");
1332 assert_eq!("a".trim(), "a");
1333 assert_eq!(" ".trim(), "");
1334 assert_eq!(" blah ".trim(), "blah");
1335 assert_eq!("\nwut \u3000 ".trim(), "wut");
1336 assert_eq!(" hey dude ".trim(), "hey dude");
1340 fn test_is_whitespace() {
1341 assert!("".is_whitespace());
1342 assert!(" ".is_whitespace());
1343 assert!("\u2009".is_whitespace()); // Thin space
1344 assert!(" \n\t ".is_whitespace());
1345 assert!(!" _ ".is_whitespace());
1349 fn test_slice_shift_char() {
1350 let data = "ประเทศไทย中";
1351 assert_eq!(data.slice_shift_char(), Some(('ป', "ระเทศไทย中")));
1355 fn test_slice_shift_char_2() {
1357 assert_eq!(empty.slice_shift_char(), None);
1362 // deny overlong encodings
1363 assert!(!is_utf8(&[0xc0, 0x80]));
1364 assert!(!is_utf8(&[0xc0, 0xae]));
1365 assert!(!is_utf8(&[0xe0, 0x80, 0x80]));
1366 assert!(!is_utf8(&[0xe0, 0x80, 0xaf]));
1367 assert!(!is_utf8(&[0xe0, 0x81, 0x81]));
1368 assert!(!is_utf8(&[0xf0, 0x82, 0x82, 0xac]));
1369 assert!(!is_utf8(&[0xf4, 0x90, 0x80, 0x80]));
1372 assert!(!is_utf8(&[0xED, 0xA0, 0x80]));
1373 assert!(!is_utf8(&[0xED, 0xBF, 0xBF]));
1375 assert!(is_utf8(&[0xC2, 0x80]));
1376 assert!(is_utf8(&[0xDF, 0xBF]));
1377 assert!(is_utf8(&[0xE0, 0xA0, 0x80]));
1378 assert!(is_utf8(&[0xED, 0x9F, 0xBF]));
1379 assert!(is_utf8(&[0xEE, 0x80, 0x80]));
1380 assert!(is_utf8(&[0xEF, 0xBF, 0xBF]));
1381 assert!(is_utf8(&[0xF0, 0x90, 0x80, 0x80]));
1382 assert!(is_utf8(&[0xF4, 0x8F, 0xBF, 0xBF]));
1386 fn test_is_utf16() {
1387 macro_rules! pos ( ($($e:expr),*) => { { $(assert!(is_utf16($e));)* } });
1395 // surrogate pairs (randomly generated with Python 3's
1396 // .encode('utf-16be'))
1397 pos!(&[0xdb54, 0xdf16, 0xd880, 0xdee0, 0xdb6a, 0xdd45],
1398 &[0xd91f, 0xdeb1, 0xdb31, 0xdd84, 0xd8e2, 0xde14],
1399 &[0xdb9f, 0xdc26, 0xdb6f, 0xde58, 0xd850, 0xdfae]);
1401 // mixtures (also random)
1402 pos!(&[0xd921, 0xdcc2, 0x002d, 0x004d, 0xdb32, 0xdf65],
1403 &[0xdb45, 0xdd2d, 0x006a, 0xdacd, 0xddfe, 0x0006],
1404 &[0x0067, 0xd8ff, 0xddb7, 0x000f, 0xd900, 0xdc80]);
1407 macro_rules! neg ( ($($e:expr),*) => { { $(assert!(!is_utf16($e));)* } });
1410 // surrogate + regular unit
1412 // surrogate + lead surrogate
1414 // unterminated surrogate
1416 // trail surrogate without a lead
1419 // random byte sequences that Python 3's .decode('utf-16be')
1421 neg!(&[0x5b3d, 0x0141, 0xde9e, 0x8fdc, 0xc6e7],
1422 &[0xdf5a, 0x82a5, 0x62b9, 0xb447, 0x92f3],
1423 &[0xda4e, 0x42bc, 0x4462, 0xee98, 0xc2ca],
1424 &[0xbe00, 0xb04a, 0x6ecb, 0xdd89, 0xe278],
1425 &[0x0465, 0xab56, 0xdbb6, 0xa893, 0x665e],
1426 &[0x6b7f, 0x0a19, 0x40f4, 0xa657, 0xdcc5],
1427 &[0x9b50, 0xda5e, 0x24ec, 0x03ad, 0x6dee],
1428 &[0x8d17, 0xcaa7, 0xf4ae, 0xdf6e, 0xbed7],
1429 &[0xdaee, 0x2584, 0x7d30, 0xa626, 0x121a],
1430 &[0xd956, 0x4b43, 0x7570, 0xccd6, 0x4f4a],
1431 &[0x9dcf, 0x1b49, 0x4ba5, 0xfce9, 0xdffe],
1432 &[0x6572, 0xce53, 0xb05a, 0xf6af, 0xdacf],
1433 &[0x1b90, 0x728c, 0x9906, 0xdb68, 0xf46e],
1434 &[0x1606, 0xbeca, 0xbe76, 0x860f, 0xdfa5],
1435 &[0x8b4f, 0xde7a, 0xd220, 0x9fac, 0x2b6f],
1436 &[0xb8fe, 0xebbe, 0xda32, 0x1a5f, 0x8b8b],
1437 &[0x934b, 0x8956, 0xc434, 0x1881, 0xddf7],
1438 &[0x5a95, 0x13fc, 0xf116, 0xd89b, 0x93f9],
1439 &[0xd640, 0x71f1, 0xdd7d, 0x77eb, 0x1cd8],
1440 &[0x348b, 0xaef0, 0xdb2c, 0xebf1, 0x1282],
1441 &[0x50d7, 0xd824, 0x5010, 0xb369, 0x22ea]);
1445 fn test_as_bytes() {
1448 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1449 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1453 assert_eq!("".as_bytes(), b);
1454 assert_eq!("abc".as_bytes(), b"abc");
1455 assert_eq!("ศไทย中华Việt Nam".as_bytes(), v.as_slice());
1460 fn test_as_bytes_fail() {
1461 // Don't double free. (I'm not sure if this exercises the
1462 // original problem code path anymore.)
1463 let s = String::from_str("");
1464 let _bytes = s.as_bytes();
1470 let buf = "hello".as_ptr();
1472 assert_eq!(*buf.offset(0), b'h');
1473 assert_eq!(*buf.offset(1), b'e');
1474 assert_eq!(*buf.offset(2), b'l');
1475 assert_eq!(*buf.offset(3), b'l');
1476 assert_eq!(*buf.offset(4), b'o');
1481 fn test_subslice_offset() {
1482 let a = "kernelsprite";
1483 let b = a.slice(7, a.len());
1484 let c = a.slice(0, a.len() - 6);
1485 assert_eq!(a.subslice_offset(b), 7);
1486 assert_eq!(a.subslice_offset(c), 0);
1488 let string = "a\nb\nc";
1489 let lines: Vec<&str> = string.lines().collect();
1490 let lines = lines.as_slice();
1491 assert_eq!(string.subslice_offset(lines[0]), 0);
1492 assert_eq!(string.subslice_offset(lines[1]), 2);
1493 assert_eq!(string.subslice_offset(lines[2]), 4);
1498 fn test_subslice_offset_2() {
1499 let a = "alchemiter";
1500 let b = "cruxtruder";
1501 a.subslice_offset(b);
1505 fn vec_str_conversions() {
1506 let s1: String = String::from_str("All mimsy were the borogoves");
1508 let v: Vec<u8> = s1.as_bytes().to_vec();
1509 let s2: String = String::from_str(from_utf8(v.as_slice()).unwrap());
1510 let mut i: uint = 0u;
1511 let n1: uint = s1.len();
1512 let n2: uint = v.len();
1515 let a: u8 = s1.as_bytes()[i];
1516 let b: u8 = s2.as_bytes()[i];
1525 fn test_contains() {
1526 assert!("abcde".contains("bcd"));
1527 assert!("abcde".contains("abcd"));
1528 assert!("abcde".contains("bcde"));
1529 assert!("abcde".contains(""));
1530 assert!("".contains(""));
1531 assert!(!"abcde".contains("def"));
1532 assert!(!"".contains("a"));
1534 let data = "ประเทศไทย中华Việt Nam";
1535 assert!(data.contains("ประเ"));
1536 assert!(data.contains("ะเ"));
1537 assert!(data.contains("中华"));
1538 assert!(!data.contains("ไท华"));
1542 fn test_contains_char() {
1543 assert!("abc".contains_char('b'));
1544 assert!("a".contains_char('a'));
1545 assert!(!"abc".contains_char('d'));
1546 assert!(!"".contains_char('a'));
1550 fn test_truncate_utf16_at_nul() {
1552 let b: &[u16] = &[];
1553 assert_eq!(truncate_utf16_at_nul(&v), b);
1556 assert_eq!(truncate_utf16_at_nul(&v), b);
1559 let b: &[u16] = &[1];
1560 assert_eq!(truncate_utf16_at_nul(&v), b);
1563 let b: &[u16] = &[1, 2];
1564 assert_eq!(truncate_utf16_at_nul(&v), b);
1567 let b: &[u16] = &[1, 2, 3];
1568 assert_eq!(truncate_utf16_at_nul(&v), b);
1573 let s = "ศไทย中华Việt Nam";
1574 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1576 for ch in v.iter() {
1577 assert!(s.char_at(pos) == *ch);
1578 pos += String::from_char(1, *ch).len();
1583 fn test_char_at_reverse() {
1584 let s = "ศไทย中华Việt Nam";
1585 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1586 let mut pos = s.len();
1587 for ch in v.iter().rev() {
1588 assert!(s.char_at_reverse(pos) == *ch);
1589 pos -= String::from_char(1, *ch).len();
1594 fn test_escape_unicode() {
1595 assert_eq!("abc".escape_unicode(), String::from_str("\\x61\\x62\\x63"));
1596 assert_eq!("a c".escape_unicode(), String::from_str("\\x61\\x20\\x63"));
1597 assert_eq!("\r\n\t".escape_unicode(), String::from_str("\\x0d\\x0a\\x09"));
1598 assert_eq!("'\"\\".escape_unicode(), String::from_str("\\x27\\x22\\x5c"));
1599 assert_eq!("\x00\x01\u00fe\u00ff".escape_unicode(),
1600 String::from_str("\\x00\\x01\\u00fe\\u00ff"));
1601 assert_eq!("\u0100\uffff".escape_unicode(), String::from_str("\\u0100\\uffff"));
1602 assert_eq!("\U00010000\U0010ffff".escape_unicode(),
1603 String::from_str("\\U00010000\\U0010ffff"));
1604 assert_eq!("ab\ufb00".escape_unicode(), String::from_str("\\x61\\x62\\ufb00"));
1605 assert_eq!("\U0001d4ea\r".escape_unicode(), String::from_str("\\U0001d4ea\\x0d"));
1609 fn test_escape_default() {
1610 assert_eq!("abc".escape_default(), String::from_str("abc"));
1611 assert_eq!("a c".escape_default(), String::from_str("a c"));
1612 assert_eq!("\r\n\t".escape_default(), String::from_str("\\r\\n\\t"));
1613 assert_eq!("'\"\\".escape_default(), String::from_str("\\'\\\"\\\\"));
1614 assert_eq!("\u0100\uffff".escape_default(), String::from_str("\\u0100\\uffff"));
1615 assert_eq!("\U00010000\U0010ffff".escape_default(),
1616 String::from_str("\\U00010000\\U0010ffff"));
1617 assert_eq!("ab\ufb00".escape_default(), String::from_str("ab\\ufb00"));
1618 assert_eq!("\U0001d4ea\r".escape_default(), String::from_str("\\U0001d4ea\\r"));
1622 fn test_total_ord() {
1623 "1234".cmp("123") == Greater;
1624 "123".cmp("1234") == Less;
1625 "1234".cmp("1234") == Equal;
1626 "12345555".cmp("123456") == Less;
1627 "22".cmp("1234") == Greater;
1631 fn test_char_range_at() {
1632 let data = "b¢€𤭢𤭢€¢b";
1633 assert_eq!('b', data.char_range_at(0).ch);
1634 assert_eq!('¢', data.char_range_at(1).ch);
1635 assert_eq!('€', data.char_range_at(3).ch);
1636 assert_eq!('𤭢', data.char_range_at(6).ch);
1637 assert_eq!('𤭢', data.char_range_at(10).ch);
1638 assert_eq!('€', data.char_range_at(14).ch);
1639 assert_eq!('¢', data.char_range_at(17).ch);
1640 assert_eq!('b', data.char_range_at(19).ch);
1644 fn test_char_range_at_reverse_underflow() {
1645 assert_eq!("abc".char_range_at_reverse(0).next, 0);
1649 fn test_iterator() {
1650 let s = "ศไทย中华Việt Nam";
1651 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1654 let mut it = s.chars();
1657 assert_eq!(c, v[pos]);
1660 assert_eq!(pos, v.len());
1664 fn test_rev_iterator() {
1665 let s = "ศไทย中华Việt Nam";
1666 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
1669 let mut it = s.chars().rev();
1672 assert_eq!(c, v[pos]);
1675 assert_eq!(pos, v.len());
1679 fn test_chars_decoding() {
1680 let mut bytes = [0u8, ..4];
1681 for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
1682 let len = c.encode_utf8(&mut bytes).unwrap_or(0);
1683 let s = ::core::str::from_utf8(bytes[..len]).unwrap();
1684 if Some(c) != s.chars().next() {
1685 panic!("character {:x}={} does not decode correctly", c as u32, c);
1691 fn test_chars_rev_decoding() {
1692 let mut bytes = [0u8, ..4];
1693 for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
1694 let len = c.encode_utf8(&mut bytes).unwrap_or(0);
1695 let s = ::core::str::from_utf8(bytes[..len]).unwrap();
1696 if Some(c) != s.chars().rev().next() {
1697 panic!("character {:x}={} does not decode correctly", c as u32, c);
1703 fn test_iterator_clone() {
1704 let s = "ศไทย中华Việt Nam";
1705 let mut it = s.chars();
1707 assert!(it.zip(it.clone()).all(|(x,y)| x == y));
1711 fn test_bytesator() {
1712 let s = "ศไทย中华Việt Nam";
1714 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1715 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1720 for b in s.bytes() {
1721 assert_eq!(b, v[pos]);
1727 fn test_bytes_revator() {
1728 let s = "ศไทย中华Việt Nam";
1730 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1731 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1734 let mut pos = v.len();
1736 for b in s.bytes().rev() {
1738 assert_eq!(b, v[pos]);
1743 fn test_char_indicesator() {
1744 let s = "ศไทย中华Việt Nam";
1745 let p = [0, 3, 6, 9, 12, 15, 18, 19, 20, 23, 24, 25, 26, 27];
1746 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1749 let mut it = s.char_indices();
1752 assert_eq!(c, (p[pos], v[pos]));
1755 assert_eq!(pos, v.len());
1756 assert_eq!(pos, p.len());
1760 fn test_char_indices_revator() {
1761 let s = "ศไทย中华Việt Nam";
1762 let p = [27, 26, 25, 24, 23, 20, 19, 18, 15, 12, 9, 6, 3, 0];
1763 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
1766 let mut it = s.char_indices().rev();
1769 assert_eq!(c, (p[pos], v[pos]));
1772 assert_eq!(pos, v.len());
1773 assert_eq!(pos, p.len());
1777 fn test_splitn_char_iterator() {
1778 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1780 let split: Vec<&str> = data.splitn(3, ' ').collect();
1781 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
1783 let split: Vec<&str> = data.splitn(3, |c: char| c == ' ').collect();
1784 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
1787 let split: Vec<&str> = data.splitn(3, 'ä').collect();
1788 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
1790 let split: Vec<&str> = data.splitn(3, |c: char| c == 'ä').collect();
1791 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
1795 fn test_split_char_iterator_no_trailing() {
1796 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1798 let split: Vec<&str> = data.split('\n').collect();
1799 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
1801 let split: Vec<&str> = data.split_terminator('\n').collect();
1802 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
1807 let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n";
1808 let words: Vec<&str> = data.words().collect();
1809 assert_eq!(words, vec!["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
1813 fn test_lev_distance() {
1814 use std::char::{ from_u32, MAX };
1815 // Test bytelength agnosticity
1816 for c in range(0u32, MAX as u32)
1817 .filter_map(|i| from_u32(i))
1818 .map(|i| String::from_char(1, i)) {
1819 assert_eq!(c[].lev_distance(c[]), 0);
1822 let a = "\nMäry häd ä little lämb\n\nLittle lämb\n";
1823 let b = "\nMary häd ä little lämb\n\nLittle lämb\n";
1824 let c = "Mary häd ä little lämb\n\nLittle lämb\n";
1825 assert_eq!(a.lev_distance(b), 1);
1826 assert_eq!(b.lev_distance(a), 1);
1827 assert_eq!(a.lev_distance(c), 2);
1828 assert_eq!(c.lev_distance(a), 2);
1829 assert_eq!(b.lev_distance(c), 1);
1830 assert_eq!(c.lev_distance(b), 1);
1834 fn test_nfd_chars() {
1836 ($input: expr, $expected: expr) => {
1837 assert_eq!($input.nfd_chars().collect::<String>(), $expected.into_string());
1841 t!("\u1e0b\u01c4", "d\u0307\u01c4");
1842 t!("\u2026", "\u2026");
1843 t!("\u2126", "\u03a9");
1844 t!("\u1e0b\u0323", "d\u0323\u0307");
1845 t!("\u1e0d\u0307", "d\u0323\u0307");
1846 t!("a\u0301", "a\u0301");
1847 t!("\u0301a", "\u0301a");
1848 t!("\ud4db", "\u1111\u1171\u11b6");
1849 t!("\uac1c", "\u1100\u1162");
1853 fn test_nfkd_chars() {
1855 ($input: expr, $expected: expr) => {
1856 assert_eq!($input.nfkd_chars().collect::<String>(), $expected.into_string());
1860 t!("\u1e0b\u01c4", "d\u0307DZ\u030c");
1861 t!("\u2026", "...");
1862 t!("\u2126", "\u03a9");
1863 t!("\u1e0b\u0323", "d\u0323\u0307");
1864 t!("\u1e0d\u0307", "d\u0323\u0307");
1865 t!("a\u0301", "a\u0301");
1866 t!("\u0301a", "\u0301a");
1867 t!("\ud4db", "\u1111\u1171\u11b6");
1868 t!("\uac1c", "\u1100\u1162");
1872 fn test_nfc_chars() {
1874 ($input: expr, $expected: expr) => {
1875 assert_eq!($input.nfc_chars().collect::<String>(), $expected.into_string());
1879 t!("\u1e0b\u01c4", "\u1e0b\u01c4");
1880 t!("\u2026", "\u2026");
1881 t!("\u2126", "\u03a9");
1882 t!("\u1e0b\u0323", "\u1e0d\u0307");
1883 t!("\u1e0d\u0307", "\u1e0d\u0307");
1884 t!("a\u0301", "\u00e1");
1885 t!("\u0301a", "\u0301a");
1886 t!("\ud4db", "\ud4db");
1887 t!("\uac1c", "\uac1c");
1888 t!("a\u0300\u0305\u0315\u05aeb", "\u00e0\u05ae\u0305\u0315b");
1892 fn test_nfkc_chars() {
1894 ($input: expr, $expected: expr) => {
1895 assert_eq!($input.nfkc_chars().collect::<String>(), $expected.into_string());
1899 t!("\u1e0b\u01c4", "\u1e0bD\u017d");
1900 t!("\u2026", "...");
1901 t!("\u2126", "\u03a9");
1902 t!("\u1e0b\u0323", "\u1e0d\u0307");
1903 t!("\u1e0d\u0307", "\u1e0d\u0307");
1904 t!("a\u0301", "\u00e1");
1905 t!("\u0301a", "\u0301a");
1906 t!("\ud4db", "\ud4db");
1907 t!("\uac1c", "\uac1c");
1908 t!("a\u0300\u0305\u0315\u05aeb", "\u00e0\u05ae\u0305\u0315b");
1913 let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
1914 let lines: Vec<&str> = data.lines().collect();
1915 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
1917 let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
1918 let lines: Vec<&str> = data.lines().collect();
1919 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
1923 fn test_graphemes() {
1924 use std::iter::order;
1925 // official Unicode test data
1926 // from http://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt
1927 let test_same: [(_, &[_]), .. 325] = [
1928 ("\u0020\u0020", &["\u0020", "\u0020"]), ("\u0020\u0308\u0020", &["\u0020\u0308",
1929 "\u0020"]), ("\u0020\u000D", &["\u0020", "\u000D"]), ("\u0020\u0308\u000D",
1930 &["\u0020\u0308", "\u000D"]), ("\u0020\u000A", &["\u0020", "\u000A"]),
1931 ("\u0020\u0308\u000A", &["\u0020\u0308", "\u000A"]), ("\u0020\u0001", &["\u0020",
1932 "\u0001"]), ("\u0020\u0308\u0001", &["\u0020\u0308", "\u0001"]), ("\u0020\u0300",
1933 &["\u0020\u0300"]), ("\u0020\u0308\u0300", &["\u0020\u0308\u0300"]), ("\u0020\u1100",
1934 &["\u0020", "\u1100"]), ("\u0020\u0308\u1100", &["\u0020\u0308", "\u1100"]),
1935 ("\u0020\u1160", &["\u0020", "\u1160"]), ("\u0020\u0308\u1160", &["\u0020\u0308",
1936 "\u1160"]), ("\u0020\u11A8", &["\u0020", "\u11A8"]), ("\u0020\u0308\u11A8",
1937 &["\u0020\u0308", "\u11A8"]), ("\u0020\uAC00", &["\u0020", "\uAC00"]),
1938 ("\u0020\u0308\uAC00", &["\u0020\u0308", "\uAC00"]), ("\u0020\uAC01", &["\u0020",
1939 "\uAC01"]), ("\u0020\u0308\uAC01", &["\u0020\u0308", "\uAC01"]), ("\u0020\U0001F1E6",
1940 &["\u0020", "\U0001F1E6"]), ("\u0020\u0308\U0001F1E6", &["\u0020\u0308",
1941 "\U0001F1E6"]), ("\u0020\u0378", &["\u0020", "\u0378"]), ("\u0020\u0308\u0378",
1942 &["\u0020\u0308", "\u0378"]), ("\u000D\u0020", &["\u000D", "\u0020"]),
1943 ("\u000D\u0308\u0020", &["\u000D", "\u0308", "\u0020"]), ("\u000D\u000D", &["\u000D",
1944 "\u000D"]), ("\u000D\u0308\u000D", &["\u000D", "\u0308", "\u000D"]), ("\u000D\u000A",
1945 &["\u000D\u000A"]), ("\u000D\u0308\u000A", &["\u000D", "\u0308", "\u000A"]),
1946 ("\u000D\u0001", &["\u000D", "\u0001"]), ("\u000D\u0308\u0001", &["\u000D", "\u0308",
1947 "\u0001"]), ("\u000D\u0300", &["\u000D", "\u0300"]), ("\u000D\u0308\u0300",
1948 &["\u000D", "\u0308\u0300"]), ("\u000D\u0903", &["\u000D", "\u0903"]),
1949 ("\u000D\u1100", &["\u000D", "\u1100"]), ("\u000D\u0308\u1100", &["\u000D", "\u0308",
1950 "\u1100"]), ("\u000D\u1160", &["\u000D", "\u1160"]), ("\u000D\u0308\u1160",
1951 &["\u000D", "\u0308", "\u1160"]), ("\u000D\u11A8", &["\u000D", "\u11A8"]),
1952 ("\u000D\u0308\u11A8", &["\u000D", "\u0308", "\u11A8"]), ("\u000D\uAC00", &["\u000D",
1953 "\uAC00"]), ("\u000D\u0308\uAC00", &["\u000D", "\u0308", "\uAC00"]), ("\u000D\uAC01",
1954 &["\u000D", "\uAC01"]), ("\u000D\u0308\uAC01", &["\u000D", "\u0308", "\uAC01"]),
1955 ("\u000D\U0001F1E6", &["\u000D", "\U0001F1E6"]), ("\u000D\u0308\U0001F1E6",
1956 &["\u000D", "\u0308", "\U0001F1E6"]), ("\u000D\u0378", &["\u000D", "\u0378"]),
1957 ("\u000D\u0308\u0378", &["\u000D", "\u0308", "\u0378"]), ("\u000A\u0020", &["\u000A",
1958 "\u0020"]), ("\u000A\u0308\u0020", &["\u000A", "\u0308", "\u0020"]), ("\u000A\u000D",
1959 &["\u000A", "\u000D"]), ("\u000A\u0308\u000D", &["\u000A", "\u0308", "\u000D"]),
1960 ("\u000A\u000A", &["\u000A", "\u000A"]), ("\u000A\u0308\u000A", &["\u000A", "\u0308",
1961 "\u000A"]), ("\u000A\u0001", &["\u000A", "\u0001"]), ("\u000A\u0308\u0001",
1962 &["\u000A", "\u0308", "\u0001"]), ("\u000A\u0300", &["\u000A", "\u0300"]),
1963 ("\u000A\u0308\u0300", &["\u000A", "\u0308\u0300"]), ("\u000A\u0903", &["\u000A",
1964 "\u0903"]), ("\u000A\u1100", &["\u000A", "\u1100"]), ("\u000A\u0308\u1100",
1965 &["\u000A", "\u0308", "\u1100"]), ("\u000A\u1160", &["\u000A", "\u1160"]),
1966 ("\u000A\u0308\u1160", &["\u000A", "\u0308", "\u1160"]), ("\u000A\u11A8", &["\u000A",
1967 "\u11A8"]), ("\u000A\u0308\u11A8", &["\u000A", "\u0308", "\u11A8"]), ("\u000A\uAC00",
1968 &["\u000A", "\uAC00"]), ("\u000A\u0308\uAC00", &["\u000A", "\u0308", "\uAC00"]),
1969 ("\u000A\uAC01", &["\u000A", "\uAC01"]), ("\u000A\u0308\uAC01", &["\u000A", "\u0308",
1970 "\uAC01"]), ("\u000A\U0001F1E6", &["\u000A", "\U0001F1E6"]),
1971 ("\u000A\u0308\U0001F1E6", &["\u000A", "\u0308", "\U0001F1E6"]), ("\u000A\u0378",
1972 &["\u000A", "\u0378"]), ("\u000A\u0308\u0378", &["\u000A", "\u0308", "\u0378"]),
1973 ("\u0001\u0020", &["\u0001", "\u0020"]), ("\u0001\u0308\u0020", &["\u0001", "\u0308",
1974 "\u0020"]), ("\u0001\u000D", &["\u0001", "\u000D"]), ("\u0001\u0308\u000D",
1975 &["\u0001", "\u0308", "\u000D"]), ("\u0001\u000A", &["\u0001", "\u000A"]),
1976 ("\u0001\u0308\u000A", &["\u0001", "\u0308", "\u000A"]), ("\u0001\u0001", &["\u0001",
1977 "\u0001"]), ("\u0001\u0308\u0001", &["\u0001", "\u0308", "\u0001"]), ("\u0001\u0300",
1978 &["\u0001", "\u0300"]), ("\u0001\u0308\u0300", &["\u0001", "\u0308\u0300"]),
1979 ("\u0001\u0903", &["\u0001", "\u0903"]), ("\u0001\u1100", &["\u0001", "\u1100"]),
1980 ("\u0001\u0308\u1100", &["\u0001", "\u0308", "\u1100"]), ("\u0001\u1160", &["\u0001",
1981 "\u1160"]), ("\u0001\u0308\u1160", &["\u0001", "\u0308", "\u1160"]), ("\u0001\u11A8",
1982 &["\u0001", "\u11A8"]), ("\u0001\u0308\u11A8", &["\u0001", "\u0308", "\u11A8"]),
1983 ("\u0001\uAC00", &["\u0001", "\uAC00"]), ("\u0001\u0308\uAC00", &["\u0001", "\u0308",
1984 "\uAC00"]), ("\u0001\uAC01", &["\u0001", "\uAC01"]), ("\u0001\u0308\uAC01",
1985 &["\u0001", "\u0308", "\uAC01"]), ("\u0001\U0001F1E6", &["\u0001", "\U0001F1E6"]),
1986 ("\u0001\u0308\U0001F1E6", &["\u0001", "\u0308", "\U0001F1E6"]), ("\u0001\u0378",
1987 &["\u0001", "\u0378"]), ("\u0001\u0308\u0378", &["\u0001", "\u0308", "\u0378"]),
1988 ("\u0300\u0020", &["\u0300", "\u0020"]), ("\u0300\u0308\u0020", &["\u0300\u0308",
1989 "\u0020"]), ("\u0300\u000D", &["\u0300", "\u000D"]), ("\u0300\u0308\u000D",
1990 &["\u0300\u0308", "\u000D"]), ("\u0300\u000A", &["\u0300", "\u000A"]),
1991 ("\u0300\u0308\u000A", &["\u0300\u0308", "\u000A"]), ("\u0300\u0001", &["\u0300",
1992 "\u0001"]), ("\u0300\u0308\u0001", &["\u0300\u0308", "\u0001"]), ("\u0300\u0300",
1993 &["\u0300\u0300"]), ("\u0300\u0308\u0300", &["\u0300\u0308\u0300"]), ("\u0300\u1100",
1994 &["\u0300", "\u1100"]), ("\u0300\u0308\u1100", &["\u0300\u0308", "\u1100"]),
1995 ("\u0300\u1160", &["\u0300", "\u1160"]), ("\u0300\u0308\u1160", &["\u0300\u0308",
1996 "\u1160"]), ("\u0300\u11A8", &["\u0300", "\u11A8"]), ("\u0300\u0308\u11A8",
1997 &["\u0300\u0308", "\u11A8"]), ("\u0300\uAC00", &["\u0300", "\uAC00"]),
1998 ("\u0300\u0308\uAC00", &["\u0300\u0308", "\uAC00"]), ("\u0300\uAC01", &["\u0300",
1999 "\uAC01"]), ("\u0300\u0308\uAC01", &["\u0300\u0308", "\uAC01"]), ("\u0300\U0001F1E6",
2000 &["\u0300", "\U0001F1E6"]), ("\u0300\u0308\U0001F1E6", &["\u0300\u0308",
2001 "\U0001F1E6"]), ("\u0300\u0378", &["\u0300", "\u0378"]), ("\u0300\u0308\u0378",
2002 &["\u0300\u0308", "\u0378"]), ("\u0903\u0020", &["\u0903", "\u0020"]),
2003 ("\u0903\u0308\u0020", &["\u0903\u0308", "\u0020"]), ("\u0903\u000D", &["\u0903",
2004 "\u000D"]), ("\u0903\u0308\u000D", &["\u0903\u0308", "\u000D"]), ("\u0903\u000A",
2005 &["\u0903", "\u000A"]), ("\u0903\u0308\u000A", &["\u0903\u0308", "\u000A"]),
2006 ("\u0903\u0001", &["\u0903", "\u0001"]), ("\u0903\u0308\u0001", &["\u0903\u0308",
2007 "\u0001"]), ("\u0903\u0300", &["\u0903\u0300"]), ("\u0903\u0308\u0300",
2008 &["\u0903\u0308\u0300"]), ("\u0903\u1100", &["\u0903", "\u1100"]),
2009 ("\u0903\u0308\u1100", &["\u0903\u0308", "\u1100"]), ("\u0903\u1160", &["\u0903",
2010 "\u1160"]), ("\u0903\u0308\u1160", &["\u0903\u0308", "\u1160"]), ("\u0903\u11A8",
2011 &["\u0903", "\u11A8"]), ("\u0903\u0308\u11A8", &["\u0903\u0308", "\u11A8"]),
2012 ("\u0903\uAC00", &["\u0903", "\uAC00"]), ("\u0903\u0308\uAC00", &["\u0903\u0308",
2013 "\uAC00"]), ("\u0903\uAC01", &["\u0903", "\uAC01"]), ("\u0903\u0308\uAC01",
2014 &["\u0903\u0308", "\uAC01"]), ("\u0903\U0001F1E6", &["\u0903", "\U0001F1E6"]),
2015 ("\u0903\u0308\U0001F1E6", &["\u0903\u0308", "\U0001F1E6"]), ("\u0903\u0378",
2016 &["\u0903", "\u0378"]), ("\u0903\u0308\u0378", &["\u0903\u0308", "\u0378"]),
2017 ("\u1100\u0020", &["\u1100", "\u0020"]), ("\u1100\u0308\u0020", &["\u1100\u0308",
2018 "\u0020"]), ("\u1100\u000D", &["\u1100", "\u000D"]), ("\u1100\u0308\u000D",
2019 &["\u1100\u0308", "\u000D"]), ("\u1100\u000A", &["\u1100", "\u000A"]),
2020 ("\u1100\u0308\u000A", &["\u1100\u0308", "\u000A"]), ("\u1100\u0001", &["\u1100",
2021 "\u0001"]), ("\u1100\u0308\u0001", &["\u1100\u0308", "\u0001"]), ("\u1100\u0300",
2022 &["\u1100\u0300"]), ("\u1100\u0308\u0300", &["\u1100\u0308\u0300"]), ("\u1100\u1100",
2023 &["\u1100\u1100"]), ("\u1100\u0308\u1100", &["\u1100\u0308", "\u1100"]),
2024 ("\u1100\u1160", &["\u1100\u1160"]), ("\u1100\u0308\u1160", &["\u1100\u0308",
2025 "\u1160"]), ("\u1100\u11A8", &["\u1100", "\u11A8"]), ("\u1100\u0308\u11A8",
2026 &["\u1100\u0308", "\u11A8"]), ("\u1100\uAC00", &["\u1100\uAC00"]),
2027 ("\u1100\u0308\uAC00", &["\u1100\u0308", "\uAC00"]), ("\u1100\uAC01",
2028 &["\u1100\uAC01"]), ("\u1100\u0308\uAC01", &["\u1100\u0308", "\uAC01"]),
2029 ("\u1100\U0001F1E6", &["\u1100", "\U0001F1E6"]), ("\u1100\u0308\U0001F1E6",
2030 &["\u1100\u0308", "\U0001F1E6"]), ("\u1100\u0378", &["\u1100", "\u0378"]),
2031 ("\u1100\u0308\u0378", &["\u1100\u0308", "\u0378"]), ("\u1160\u0020", &["\u1160",
2032 "\u0020"]), ("\u1160\u0308\u0020", &["\u1160\u0308", "\u0020"]), ("\u1160\u000D",
2033 &["\u1160", "\u000D"]), ("\u1160\u0308\u000D", &["\u1160\u0308", "\u000D"]),
2034 ("\u1160\u000A", &["\u1160", "\u000A"]), ("\u1160\u0308\u000A", &["\u1160\u0308",
2035 "\u000A"]), ("\u1160\u0001", &["\u1160", "\u0001"]), ("\u1160\u0308\u0001",
2036 &["\u1160\u0308", "\u0001"]), ("\u1160\u0300", &["\u1160\u0300"]),
2037 ("\u1160\u0308\u0300", &["\u1160\u0308\u0300"]), ("\u1160\u1100", &["\u1160",
2038 "\u1100"]), ("\u1160\u0308\u1100", &["\u1160\u0308", "\u1100"]), ("\u1160\u1160",
2039 &["\u1160\u1160"]), ("\u1160\u0308\u1160", &["\u1160\u0308", "\u1160"]),
2040 ("\u1160\u11A8", &["\u1160\u11A8"]), ("\u1160\u0308\u11A8", &["\u1160\u0308",
2041 "\u11A8"]), ("\u1160\uAC00", &["\u1160", "\uAC00"]), ("\u1160\u0308\uAC00",
2042 &["\u1160\u0308", "\uAC00"]), ("\u1160\uAC01", &["\u1160", "\uAC01"]),
2043 ("\u1160\u0308\uAC01", &["\u1160\u0308", "\uAC01"]), ("\u1160\U0001F1E6", &["\u1160",
2044 "\U0001F1E6"]), ("\u1160\u0308\U0001F1E6", &["\u1160\u0308", "\U0001F1E6"]),
2045 ("\u1160\u0378", &["\u1160", "\u0378"]), ("\u1160\u0308\u0378", &["\u1160\u0308",
2046 "\u0378"]), ("\u11A8\u0020", &["\u11A8", "\u0020"]), ("\u11A8\u0308\u0020",
2047 &["\u11A8\u0308", "\u0020"]), ("\u11A8\u000D", &["\u11A8", "\u000D"]),
2048 ("\u11A8\u0308\u000D", &["\u11A8\u0308", "\u000D"]), ("\u11A8\u000A", &["\u11A8",
2049 "\u000A"]), ("\u11A8\u0308\u000A", &["\u11A8\u0308", "\u000A"]), ("\u11A8\u0001",
2050 &["\u11A8", "\u0001"]), ("\u11A8\u0308\u0001", &["\u11A8\u0308", "\u0001"]),
2051 ("\u11A8\u0300", &["\u11A8\u0300"]), ("\u11A8\u0308\u0300", &["\u11A8\u0308\u0300"]),
2052 ("\u11A8\u1100", &["\u11A8", "\u1100"]), ("\u11A8\u0308\u1100", &["\u11A8\u0308",
2053 "\u1100"]), ("\u11A8\u1160", &["\u11A8", "\u1160"]), ("\u11A8\u0308\u1160",
2054 &["\u11A8\u0308", "\u1160"]), ("\u11A8\u11A8", &["\u11A8\u11A8"]),
2055 ("\u11A8\u0308\u11A8", &["\u11A8\u0308", "\u11A8"]), ("\u11A8\uAC00", &["\u11A8",
2056 "\uAC00"]), ("\u11A8\u0308\uAC00", &["\u11A8\u0308", "\uAC00"]), ("\u11A8\uAC01",
2057 &["\u11A8", "\uAC01"]), ("\u11A8\u0308\uAC01", &["\u11A8\u0308", "\uAC01"]),
2058 ("\u11A8\U0001F1E6", &["\u11A8", "\U0001F1E6"]), ("\u11A8\u0308\U0001F1E6",
2059 &["\u11A8\u0308", "\U0001F1E6"]), ("\u11A8\u0378", &["\u11A8", "\u0378"]),
2060 ("\u11A8\u0308\u0378", &["\u11A8\u0308", "\u0378"]), ("\uAC00\u0020", &["\uAC00",
2061 "\u0020"]), ("\uAC00\u0308\u0020", &["\uAC00\u0308", "\u0020"]), ("\uAC00\u000D",
2062 &["\uAC00", "\u000D"]), ("\uAC00\u0308\u000D", &["\uAC00\u0308", "\u000D"]),
2063 ("\uAC00\u000A", &["\uAC00", "\u000A"]), ("\uAC00\u0308\u000A", &["\uAC00\u0308",
2064 "\u000A"]), ("\uAC00\u0001", &["\uAC00", "\u0001"]), ("\uAC00\u0308\u0001",
2065 &["\uAC00\u0308", "\u0001"]), ("\uAC00\u0300", &["\uAC00\u0300"]),
2066 ("\uAC00\u0308\u0300", &["\uAC00\u0308\u0300"]), ("\uAC00\u1100", &["\uAC00",
2067 "\u1100"]), ("\uAC00\u0308\u1100", &["\uAC00\u0308", "\u1100"]), ("\uAC00\u1160",
2068 &["\uAC00\u1160"]), ("\uAC00\u0308\u1160", &["\uAC00\u0308", "\u1160"]),
2069 ("\uAC00\u11A8", &["\uAC00\u11A8"]), ("\uAC00\u0308\u11A8", &["\uAC00\u0308",
2070 "\u11A8"]), ("\uAC00\uAC00", &["\uAC00", "\uAC00"]), ("\uAC00\u0308\uAC00",
2071 &["\uAC00\u0308", "\uAC00"]), ("\uAC00\uAC01", &["\uAC00", "\uAC01"]),
2072 ("\uAC00\u0308\uAC01", &["\uAC00\u0308", "\uAC01"]), ("\uAC00\U0001F1E6", &["\uAC00",
2073 "\U0001F1E6"]), ("\uAC00\u0308\U0001F1E6", &["\uAC00\u0308", "\U0001F1E6"]),
2074 ("\uAC00\u0378", &["\uAC00", "\u0378"]), ("\uAC00\u0308\u0378", &["\uAC00\u0308",
2075 "\u0378"]), ("\uAC01\u0020", &["\uAC01", "\u0020"]), ("\uAC01\u0308\u0020",
2076 &["\uAC01\u0308", "\u0020"]), ("\uAC01\u000D", &["\uAC01", "\u000D"]),
2077 ("\uAC01\u0308\u000D", &["\uAC01\u0308", "\u000D"]), ("\uAC01\u000A", &["\uAC01",
2078 "\u000A"]), ("\uAC01\u0308\u000A", &["\uAC01\u0308", "\u000A"]), ("\uAC01\u0001",
2079 &["\uAC01", "\u0001"]), ("\uAC01\u0308\u0001", &["\uAC01\u0308", "\u0001"]),
2080 ("\uAC01\u0300", &["\uAC01\u0300"]), ("\uAC01\u0308\u0300", &["\uAC01\u0308\u0300"]),
2081 ("\uAC01\u1100", &["\uAC01", "\u1100"]), ("\uAC01\u0308\u1100", &["\uAC01\u0308",
2082 "\u1100"]), ("\uAC01\u1160", &["\uAC01", "\u1160"]), ("\uAC01\u0308\u1160",
2083 &["\uAC01\u0308", "\u1160"]), ("\uAC01\u11A8", &["\uAC01\u11A8"]),
2084 ("\uAC01\u0308\u11A8", &["\uAC01\u0308", "\u11A8"]), ("\uAC01\uAC00", &["\uAC01",
2085 "\uAC00"]), ("\uAC01\u0308\uAC00", &["\uAC01\u0308", "\uAC00"]), ("\uAC01\uAC01",
2086 &["\uAC01", "\uAC01"]), ("\uAC01\u0308\uAC01", &["\uAC01\u0308", "\uAC01"]),
2087 ("\uAC01\U0001F1E6", &["\uAC01", "\U0001F1E6"]), ("\uAC01\u0308\U0001F1E6",
2088 &["\uAC01\u0308", "\U0001F1E6"]), ("\uAC01\u0378", &["\uAC01", "\u0378"]),
2089 ("\uAC01\u0308\u0378", &["\uAC01\u0308", "\u0378"]), ("\U0001F1E6\u0020",
2090 &["\U0001F1E6", "\u0020"]), ("\U0001F1E6\u0308\u0020", &["\U0001F1E6\u0308",
2091 "\u0020"]), ("\U0001F1E6\u000D", &["\U0001F1E6", "\u000D"]),
2092 ("\U0001F1E6\u0308\u000D", &["\U0001F1E6\u0308", "\u000D"]), ("\U0001F1E6\u000A",
2093 &["\U0001F1E6", "\u000A"]), ("\U0001F1E6\u0308\u000A", &["\U0001F1E6\u0308",
2094 "\u000A"]), ("\U0001F1E6\u0001", &["\U0001F1E6", "\u0001"]),
2095 ("\U0001F1E6\u0308\u0001", &["\U0001F1E6\u0308", "\u0001"]), ("\U0001F1E6\u0300",
2096 &["\U0001F1E6\u0300"]), ("\U0001F1E6\u0308\u0300", &["\U0001F1E6\u0308\u0300"]),
2097 ("\U0001F1E6\u1100", &["\U0001F1E6", "\u1100"]), ("\U0001F1E6\u0308\u1100",
2098 &["\U0001F1E6\u0308", "\u1100"]), ("\U0001F1E6\u1160", &["\U0001F1E6", "\u1160"]),
2099 ("\U0001F1E6\u0308\u1160", &["\U0001F1E6\u0308", "\u1160"]), ("\U0001F1E6\u11A8",
2100 &["\U0001F1E6", "\u11A8"]), ("\U0001F1E6\u0308\u11A8", &["\U0001F1E6\u0308",
2101 "\u11A8"]), ("\U0001F1E6\uAC00", &["\U0001F1E6", "\uAC00"]),
2102 ("\U0001F1E6\u0308\uAC00", &["\U0001F1E6\u0308", "\uAC00"]), ("\U0001F1E6\uAC01",
2103 &["\U0001F1E6", "\uAC01"]), ("\U0001F1E6\u0308\uAC01", &["\U0001F1E6\u0308",
2104 "\uAC01"]), ("\U0001F1E6\U0001F1E6", &["\U0001F1E6\U0001F1E6"]),
2105 ("\U0001F1E6\u0308\U0001F1E6", &["\U0001F1E6\u0308", "\U0001F1E6"]),
2106 ("\U0001F1E6\u0378", &["\U0001F1E6", "\u0378"]), ("\U0001F1E6\u0308\u0378",
2107 &["\U0001F1E6\u0308", "\u0378"]), ("\u0378\u0020", &["\u0378", "\u0020"]),
2108 ("\u0378\u0308\u0020", &["\u0378\u0308", "\u0020"]), ("\u0378\u000D", &["\u0378",
2109 "\u000D"]), ("\u0378\u0308\u000D", &["\u0378\u0308", "\u000D"]), ("\u0378\u000A",
2110 &["\u0378", "\u000A"]), ("\u0378\u0308\u000A", &["\u0378\u0308", "\u000A"]),
2111 ("\u0378\u0001", &["\u0378", "\u0001"]), ("\u0378\u0308\u0001", &["\u0378\u0308",
2112 "\u0001"]), ("\u0378\u0300", &["\u0378\u0300"]), ("\u0378\u0308\u0300",
2113 &["\u0378\u0308\u0300"]), ("\u0378\u1100", &["\u0378", "\u1100"]),
2114 ("\u0378\u0308\u1100", &["\u0378\u0308", "\u1100"]), ("\u0378\u1160", &["\u0378",
2115 "\u1160"]), ("\u0378\u0308\u1160", &["\u0378\u0308", "\u1160"]), ("\u0378\u11A8",
2116 &["\u0378", "\u11A8"]), ("\u0378\u0308\u11A8", &["\u0378\u0308", "\u11A8"]),
2117 ("\u0378\uAC00", &["\u0378", "\uAC00"]), ("\u0378\u0308\uAC00", &["\u0378\u0308",
2118 "\uAC00"]), ("\u0378\uAC01", &["\u0378", "\uAC01"]), ("\u0378\u0308\uAC01",
2119 &["\u0378\u0308", "\uAC01"]), ("\u0378\U0001F1E6", &["\u0378", "\U0001F1E6"]),
2120 ("\u0378\u0308\U0001F1E6", &["\u0378\u0308", "\U0001F1E6"]), ("\u0378\u0378",
2121 &["\u0378", "\u0378"]), ("\u0378\u0308\u0378", &["\u0378\u0308", "\u0378"]),
2122 ("\u0061\U0001F1E6\u0062", &["\u0061", "\U0001F1E6", "\u0062"]),
2123 ("\U0001F1F7\U0001F1FA", &["\U0001F1F7\U0001F1FA"]),
2124 ("\U0001F1F7\U0001F1FA\U0001F1F8", &["\U0001F1F7\U0001F1FA\U0001F1F8"]),
2125 ("\U0001F1F7\U0001F1FA\U0001F1F8\U0001F1EA",
2126 &["\U0001F1F7\U0001F1FA\U0001F1F8\U0001F1EA"]),
2127 ("\U0001F1F7\U0001F1FA\u200B\U0001F1F8\U0001F1EA", &["\U0001F1F7\U0001F1FA", "\u200B",
2128 "\U0001F1F8\U0001F1EA"]), ("\U0001F1E6\U0001F1E7\U0001F1E8",
2129 &["\U0001F1E6\U0001F1E7\U0001F1E8"]), ("\U0001F1E6\u200D\U0001F1E7\U0001F1E8",
2130 &["\U0001F1E6\u200D", "\U0001F1E7\U0001F1E8"]),
2131 ("\U0001F1E6\U0001F1E7\u200D\U0001F1E8", &["\U0001F1E6\U0001F1E7\u200D",
2132 "\U0001F1E8"]), ("\u0020\u200D\u0646", &["\u0020\u200D", "\u0646"]),
2133 ("\u0646\u200D\u0020", &["\u0646\u200D", "\u0020"]),
2136 let test_diff: [(_, &[_], &[_]), .. 23] = [
2137 ("\u0020\u0903", &["\u0020\u0903"], &["\u0020", "\u0903"]), ("\u0020\u0308\u0903",
2138 &["\u0020\u0308\u0903"], &["\u0020\u0308", "\u0903"]), ("\u000D\u0308\u0903",
2139 &["\u000D", "\u0308\u0903"], &["\u000D", "\u0308", "\u0903"]), ("\u000A\u0308\u0903",
2140 &["\u000A", "\u0308\u0903"], &["\u000A", "\u0308", "\u0903"]), ("\u0001\u0308\u0903",
2141 &["\u0001", "\u0308\u0903"], &["\u0001", "\u0308", "\u0903"]), ("\u0300\u0903",
2142 &["\u0300\u0903"], &["\u0300", "\u0903"]), ("\u0300\u0308\u0903",
2143 &["\u0300\u0308\u0903"], &["\u0300\u0308", "\u0903"]), ("\u0903\u0903",
2144 &["\u0903\u0903"], &["\u0903", "\u0903"]), ("\u0903\u0308\u0903",
2145 &["\u0903\u0308\u0903"], &["\u0903\u0308", "\u0903"]), ("\u1100\u0903",
2146 &["\u1100\u0903"], &["\u1100", "\u0903"]), ("\u1100\u0308\u0903",
2147 &["\u1100\u0308\u0903"], &["\u1100\u0308", "\u0903"]), ("\u1160\u0903",
2148 &["\u1160\u0903"], &["\u1160", "\u0903"]), ("\u1160\u0308\u0903",
2149 &["\u1160\u0308\u0903"], &["\u1160\u0308", "\u0903"]), ("\u11A8\u0903",
2150 &["\u11A8\u0903"], &["\u11A8", "\u0903"]), ("\u11A8\u0308\u0903",
2151 &["\u11A8\u0308\u0903"], &["\u11A8\u0308", "\u0903"]), ("\uAC00\u0903",
2152 &["\uAC00\u0903"], &["\uAC00", "\u0903"]), ("\uAC00\u0308\u0903",
2153 &["\uAC00\u0308\u0903"], &["\uAC00\u0308", "\u0903"]), ("\uAC01\u0903",
2154 &["\uAC01\u0903"], &["\uAC01", "\u0903"]), ("\uAC01\u0308\u0903",
2155 &["\uAC01\u0308\u0903"], &["\uAC01\u0308", "\u0903"]), ("\U0001F1E6\u0903",
2156 &["\U0001F1E6\u0903"], &["\U0001F1E6", "\u0903"]), ("\U0001F1E6\u0308\u0903",
2157 &["\U0001F1E6\u0308\u0903"], &["\U0001F1E6\u0308", "\u0903"]), ("\u0378\u0903",
2158 &["\u0378\u0903"], &["\u0378", "\u0903"]), ("\u0378\u0308\u0903",
2159 &["\u0378\u0308\u0903"], &["\u0378\u0308", "\u0903"]),
2162 for &(s, g) in test_same.iter() {
2163 // test forward iterator
2164 assert!(order::equals(s.graphemes(true), g.iter().map(|&x| x)));
2165 assert!(order::equals(s.graphemes(false), g.iter().map(|&x| x)));
2167 // test reverse iterator
2168 assert!(order::equals(s.graphemes(true).rev(), g.iter().rev().map(|&x| x)));
2169 assert!(order::equals(s.graphemes(false).rev(), g.iter().rev().map(|&x| x)));
2172 for &(s, gt, gf) in test_diff.iter() {
2173 // test forward iterator
2174 assert!(order::equals(s.graphemes(true), gt.iter().map(|&x| x)));
2175 assert!(order::equals(s.graphemes(false), gf.iter().map(|&x| x)));
2177 // test reverse iterator
2178 assert!(order::equals(s.graphemes(true).rev(), gt.iter().rev().map(|&x| x)));
2179 assert!(order::equals(s.graphemes(false).rev(), gf.iter().rev().map(|&x| x)));
2182 // test the indices iterators
2183 let s = "a̐éö̲\r\n";
2184 let gr_inds = s.grapheme_indices(true).collect::<Vec<(uint, &str)>>();
2185 let b: &[_] = &[(0u, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
2186 assert_eq!(gr_inds.as_slice(), b);
2187 let gr_inds = s.grapheme_indices(true).rev().collect::<Vec<(uint, &str)>>();
2188 let b: &[_] = &[(11, "\r\n"), (6, "ö̲"), (3, "é"), (0u, "a̐")];
2189 assert_eq!(gr_inds.as_slice(), b);
2190 let mut gr_inds_iter = s.grapheme_indices(true);
2192 let gr_inds = gr_inds_iter.by_ref();
2193 let e1 = gr_inds.size_hint();
2194 assert_eq!(e1, (1, Some(13)));
2195 let c = gr_inds.count();
2198 let e2 = gr_inds_iter.size_hint();
2199 assert_eq!(e2, (0, Some(0)));
2201 // make sure the reverse iterator does the right thing with "\n" at beginning of string
2203 let gr = s.graphemes(true).rev().collect::<Vec<&str>>();
2204 let b: &[_] = &["\r", "\r\n", "\n"];
2205 assert_eq!(gr.as_slice(), b);
2209 fn test_split_strator() {
2210 fn t(s: &str, sep: &str, u: &[&str]) {
2211 let v: Vec<&str> = s.split_str(sep).collect();
2212 assert_eq!(v.as_slice(), u.as_slice());
2214 t("--1233345--", "12345", &["--1233345--"]);
2215 t("abc::hello::there", "::", &["abc", "hello", "there"]);
2216 t("::hello::there", "::", &["", "hello", "there"]);
2217 t("hello::there::", "::", &["hello", "there", ""]);
2218 t("::hello::there::", "::", &["", "hello", "there", ""]);
2219 t("ประเทศไทย中华Việt Nam", "中华", &["ประเทศไทย", "Việt Nam"]);
2220 t("zzXXXzzYYYzz", "zz", &["", "XXX", "YYY", ""]);
2221 t("zzXXXzYYYz", "XXX", &["zz", "zYYYz"]);
2222 t(".XXX.YYY.", ".", &["", "XXX", "YYY", ""]);
2224 t("zz", "zz", &["",""]);
2225 t("ok", "z", &["ok"]);
2226 t("zzz", "zz", &["","z"]);
2227 t("zzzzz", "zz", &["","","z"]);
2231 fn test_str_default() {
2232 use std::default::Default;
2233 fn t<S: Default + Str>() {
2234 let s: S = Default::default();
2235 assert_eq!(s.as_slice(), "");
2243 fn test_str_container() {
2244 fn sum_len(v: &[&str]) -> uint {
2245 v.iter().map(|x| x.len()).sum()
2248 let s = String::from_str("01234");
2249 assert_eq!(5, sum_len(&["012", "", "34"]));
2250 assert_eq!(5, sum_len(&[String::from_str("01").as_slice(),
2251 String::from_str("2").as_slice(),
2252 String::from_str("34").as_slice(),
2253 String::from_str("").as_slice()]));
2254 assert_eq!(5, sum_len(&[s.as_slice()]));
2258 fn test_str_from_utf8() {
2260 assert_eq!(from_utf8(xs), Some("hello"));
2262 let xs = "ศไทย中华Việt Nam".as_bytes();
2263 assert_eq!(from_utf8(xs), Some("ศไทย中华Việt Nam"));
2265 let xs = b"hello\xFF";
2266 assert_eq!(from_utf8(xs), None);
2270 fn test_maybe_owned_traits() {
2271 let s = Slice("abcde");
2272 assert_eq!(s.len(), 5);
2273 assert_eq!(s.as_slice(), "abcde");
2274 assert_eq!(String::from_str(s.as_slice()).as_slice(), "abcde");
2275 assert_eq!(format!("{}", s).as_slice(), "abcde");
2276 assert!(s.lt(&Owned(String::from_str("bcdef"))));
2277 assert_eq!(Slice(""), Default::default());
2279 let o = Owned(String::from_str("abcde"));
2280 assert_eq!(o.len(), 5);
2281 assert_eq!(o.as_slice(), "abcde");
2282 assert_eq!(String::from_str(o.as_slice()).as_slice(), "abcde");
2283 assert_eq!(format!("{}", o).as_slice(), "abcde");
2284 assert!(o.lt(&Slice("bcdef")));
2285 assert_eq!(Owned(String::from_str("")), Default::default());
2287 assert!(s.cmp(&o) == Equal);
2288 assert!(s.equiv(&o));
2290 assert!(o.cmp(&s) == Equal);
2291 assert!(o.equiv(&s));
2295 fn test_maybe_owned_methods() {
2296 let s = Slice("abcde");
2297 assert!(s.is_slice());
2298 assert!(!s.is_owned());
2300 let o = Owned(String::from_str("abcde"));
2301 assert!(!o.is_slice());
2302 assert!(o.is_owned());
2306 fn test_maybe_owned_clone() {
2307 assert_eq!(Owned(String::from_str("abcde")), Slice("abcde").clone());
2308 assert_eq!(Owned(String::from_str("abcde")), Owned(String::from_str("abcde")).clone());
2309 assert_eq!(Slice("abcde"), Slice("abcde").clone());
2310 assert_eq!(Slice("abcde"), Owned(String::from_str("abcde")).clone());
2314 fn test_maybe_owned_into_string() {
2315 assert_eq!(Slice("abcde").into_string(), String::from_str("abcde"));
2316 assert_eq!(Owned(String::from_str("abcde")).into_string(),
2317 String::from_str("abcde"));
2321 fn test_into_maybe_owned() {
2322 assert_eq!("abcde".into_maybe_owned(), Slice("abcde"));
2323 assert_eq!((String::from_str("abcde")).into_maybe_owned(), Slice("abcde"));
2324 assert_eq!("abcde".into_maybe_owned(), Owned(String::from_str("abcde")));
2325 assert_eq!((String::from_str("abcde")).into_maybe_owned(),
2326 Owned(String::from_str("abcde")));
2333 use test::black_box;
2335 use std::iter::{IteratorExt, DoubleEndedIteratorExt};
2336 use std::str::StrPrelude;
2337 use std::slice::SlicePrelude;
2340 fn char_iterator(b: &mut Bencher) {
2341 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2343 b.iter(|| s.chars().count());
2347 fn char_iterator_for(b: &mut Bencher) {
2348 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2351 for ch in s.chars() { black_box(ch) }
2356 fn char_iterator_ascii(b: &mut Bencher) {
2357 let s = "Mary had a little lamb, Little lamb
2358 Mary had a little lamb, Little lamb
2359 Mary had a little lamb, Little lamb
2360 Mary had a little lamb, Little lamb
2361 Mary had a little lamb, Little lamb
2362 Mary had a little lamb, Little lamb";
2364 b.iter(|| s.chars().count());
2368 fn char_iterator_rev(b: &mut Bencher) {
2369 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2371 b.iter(|| s.chars().rev().count());
2375 fn char_iterator_rev_for(b: &mut Bencher) {
2376 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2379 for ch in s.chars().rev() { black_box(ch) }
2384 fn char_indicesator(b: &mut Bencher) {
2385 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2386 let len = s.char_len();
2388 b.iter(|| assert_eq!(s.char_indices().count(), len));
2392 fn char_indicesator_rev(b: &mut Bencher) {
2393 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2394 let len = s.char_len();
2396 b.iter(|| assert_eq!(s.char_indices().rev().count(), len));
2400 fn split_unicode_ascii(b: &mut Bencher) {
2401 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2403 b.iter(|| assert_eq!(s.split('V').count(), 3));
2407 fn split_unicode_not_ascii(b: &mut Bencher) {
2408 struct NotAscii(char);
2409 impl CharEq for NotAscii {
2410 fn matches(&mut self, c: char) -> bool {
2411 let NotAscii(cc) = *self;
2414 fn only_ascii(&self) -> bool { false }
2416 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2418 b.iter(|| assert_eq!(s.split(NotAscii('V')).count(), 3));
2423 fn split_ascii(b: &mut Bencher) {
2424 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2425 let len = s.split(' ').count();
2427 b.iter(|| assert_eq!(s.split(' ').count(), len));
2431 fn split_not_ascii(b: &mut Bencher) {
2432 struct NotAscii(char);
2433 impl CharEq for NotAscii {
2435 fn matches(&mut self, c: char) -> bool {
2436 let NotAscii(cc) = *self;
2439 fn only_ascii(&self) -> bool { false }
2441 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2442 let len = s.split(' ').count();
2444 b.iter(|| assert_eq!(s.split(NotAscii(' ')).count(), len));
2448 fn split_extern_fn(b: &mut Bencher) {
2449 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2450 let len = s.split(' ').count();
2451 fn pred(c: char) -> bool { c == ' ' }
2453 b.iter(|| assert_eq!(s.split(pred).count(), len));
2457 fn split_closure(b: &mut Bencher) {
2458 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2459 let len = s.split(' ').count();
2461 b.iter(|| assert_eq!(s.split(|c: char| c == ' ').count(), len));
2465 fn split_slice(b: &mut Bencher) {
2466 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2467 let len = s.split(' ').count();
2469 let c: &[char] = &[' '];
2470 b.iter(|| assert_eq!(s.split(c).count(), len));
2474 fn is_utf8_100_ascii(b: &mut Bencher) {
2476 let s = b"Hello there, the quick brown fox jumped over the lazy dog! \
2477 Lorem ipsum dolor sit amet, consectetur. ";
2479 assert_eq!(100, s.len());
2486 fn is_utf8_100_multibyte(b: &mut Bencher) {
2487 let s = "𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰".as_bytes();
2488 assert_eq!(100, s.len());
2495 fn bench_connect(b: &mut Bencher) {
2496 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2498 let v = [s, s, s, s, s, s, s, s, s, s];
2500 assert_eq!(v.connect(sep).len(), s.len() * 10 + sep.len() * 9);
2505 fn bench_contains_short_short(b: &mut Bencher) {
2506 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2510 assert!(haystack.contains(needle));
2515 fn bench_contains_short_long(b: &mut Bencher) {
2517 Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \
2518 ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \
2519 eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \
2520 sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \
2521 tempus vel, gravida nec quam.
2523 In est dui, tincidunt sed tempus interdum, adipiscing laoreet ante. Etiam tempor, tellus quis \
2524 sagittis interdum, nulla purus mattis sem, quis auctor erat odio ac tellus. In nec nunc sit amet \
2525 diam volutpat molestie at sed ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan \
2526 lorem ac dignissim placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat. In vel \
2527 eleifend felis. Sed suscipit nulla lorem, sed mollis est sollicitudin et. Nam fermentum egestas \
2528 interdum. Curabitur ut nisi justo.
2530 Sed sollicitudin ipsum tellus, ut condimentum leo eleifend nec. Cras ut velit ante. Phasellus nec \
2531 mollis odio. Mauris molestie erat in arcu mattis, at aliquet dolor vehicula. Quisque malesuada \
2532 lectus sit amet nisi pretium, a condimentum ipsum porta. Morbi at dapibus diam. Praesent egestas \
2533 est sed risus elementum, eu rutrum metus ultrices. Etiam fermentum consectetur magna, id rutrum \
2534 felis accumsan a. Aliquam ut pellentesque libero. Sed mi nulla, lobortis eu tortor id, suscipit \
2535 ultricies neque. Morbi iaculis sit amet risus at iaculis. Praesent eget ligula quis turpis \
2536 feugiat suscipit vel non arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. \
2537 Aliquam sit amet placerat lorem.
2539 Cras a lacus vel ante posuere elementum. Nunc est leo, bibendum ut facilisis vel, bibendum at \
2540 mauris. Nullam adipiscing diam vel odio ornare, luctus adipiscing mi luctus. Nulla facilisi. \
2541 Mauris adipiscing bibendum neque, quis adipiscing lectus tempus et. Sed feugiat erat et nisl \
2542 lobortis pharetra. Donec vitae erat enim. Nullam sit amet felis et quam lacinia tincidunt. Aliquam \
2543 suscipit dapibus urna. Sed volutpat urna in magna pulvinar volutpat. Phasellus nec tellus ac diam \
2546 Nam lectus enim, dapibus non nisi tempor, consectetur convallis massa. Maecenas eleifend dictum \
2547 feugiat. Etiam quis mauris vel risus luctus mattis a a nunc. Nullam orci quam, imperdiet id \
2548 vehicula in, porttitor ut nibh. Duis sagittis adipiscing nisl vitae congue. Donec mollis risus eu \
2549 leo suscipit, varius porttitor nulla porta. Pellentesque ut sem nec nisi euismod vehicula. Nulla \
2550 malesuada sollicitudin quam eu fermentum.";
2551 let needle = "english";
2554 assert!(!haystack.contains(needle));
2559 fn bench_contains_bad_naive(b: &mut Bencher) {
2560 let haystack = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
2561 let needle = "aaaaaaaab";
2564 assert!(!haystack.contains(needle));
2569 fn bench_contains_equal(b: &mut Bencher) {
2570 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2571 let needle = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2574 assert!(haystack.contains(needle));