1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 // ignore-lexer-test FIXME #15679
15 Unicode string manipulation (`str` type)
19 Rust's string type is one of the core primitive types of the language. While
20 represented by the name `str`, the name `str` is not actually a valid type in
21 Rust. Each string must also be decorated with a pointer. `String` is used
22 for an owned string, so there is only one commonly-used `str` type in Rust:
25 `&str` is the borrowed string type. This type of string can only be created
26 from other strings, unless it is a static string (see below). As the word
27 "borrowed" implies, this type of string is owned elsewhere, and this string
28 cannot be moved out of.
30 As an example, here's some code that uses a string.
34 let borrowed_string = "This string is borrowed with the 'static lifetime";
38 From the example above, you can see that Rust's string literals have the
39 `'static` lifetime. This is akin to C's concept of a static string.
41 String literals are allocated statically in the rodata of the
42 executable/library. The string then has the type `&'static str` meaning that
43 the string is valid for the `'static` lifetime, otherwise known as the
44 lifetime of the entire program. As can be inferred from the type, these static
45 strings are not mutable.
49 Many languages have immutable strings by default, and Rust has a particular
50 flavor on this idea. As with the rest of Rust types, strings are immutable by
51 default. If a string is declared as `mut`, however, it may be mutated. This
52 works the same way as the rest of Rust's type system in the sense that if
53 there's a mutable reference to a string, there may only be one mutable reference
54 to that string. With these guarantees, strings can easily transition between
55 being mutable/immutable with the same benefits of having mutable strings in
60 Rust's string type, `str`, is a sequence of unicode scalar values encoded as a
61 stream of UTF-8 bytes. All strings are guaranteed to be validly encoded UTF-8
62 sequences. Additionally, strings are not null-terminated and can contain null
65 The actual representation of strings have direct mappings to vectors: `&str`
66 is the same as `&[u8]`.
70 #![doc(primitive = "str")]
74 use core::default::Default;
77 use core::iter::AdditiveIterator;
80 use {Collection, Deque, MutableSeq};
87 pub use core::str::{from_utf8, CharEq, Chars, CharOffsets};
88 pub use core::str::{Bytes, CharSplits};
89 pub use core::str::{CharSplitsN, AnyLines, MatchIndices, StrSplits};
90 pub use core::str::{eq_slice, is_utf8, is_utf16, Utf16Items};
91 pub use core::str::{Utf16Item, ScalarValue, LoneSurrogate, utf16_items};
92 pub use core::str::{truncate_utf16_at_nul, utf8_char_width, CharRange};
93 pub use core::str::{Str, StrSlice};
94 pub use unicode::str::{UnicodeStrSlice, Words, Graphemes, GraphemeIndices};
97 Section: Creating a string
100 /// Deprecated. Replaced by `String::from_utf8`
101 #[deprecated = "Replaced by `String::from_utf8`"]
102 pub fn from_utf8_owned(vv: Vec<u8>) -> Result<String, Vec<u8>> {
103 String::from_utf8(vv)
106 /// Deprecated. Replaced by `String::from_byte`
107 #[deprecated = "Replaced by String::from_byte"]
108 pub fn from_byte(b: u8) -> String {
110 String::from_char(1, b as char)
113 /// Deprecated. Use `String::from_char` or `char::to_string()` instead
114 #[deprecated = "use String::from_char or char.to_string()"]
115 pub fn from_char(ch: char) -> String {
116 String::from_char(1, ch)
119 /// Deprecated. Replaced by `String::from_chars`
120 #[deprecated = "use String::from_chars instead"]
121 pub fn from_chars(chs: &[char]) -> String {
122 chs.iter().map(|c| *c).collect()
125 /// Methods for vectors of strings
126 pub trait StrVector {
127 /// Concatenate a vector of strings.
132 /// let first = "Restaurant at the End of the".to_string();
133 /// let second = " Universe".to_string();
134 /// let string_vec = vec![first, second];
135 /// assert_eq!(string_vec.concat(), "Restaurant at the End of the Universe".to_string());
137 fn concat(&self) -> String;
139 /// Concatenate a vector of strings, placing a given separator between each.
144 /// let first = "Roast".to_string();
145 /// let second = "Sirloin Steak".to_string();
146 /// let string_vec = vec![first, second];
147 /// assert_eq!(string_vec.connect(", "), "Roast, Sirloin Steak".to_string());
149 fn connect(&self, sep: &str) -> String;
152 impl<'a, S: Str> StrVector for &'a [S] {
153 fn concat(&self) -> String {
155 return String::new();
158 // `len` calculation may overflow but push_str will check boundaries
159 let len = self.iter().map(|s| s.as_slice().len()).sum();
161 let mut result = String::with_capacity(len);
163 for s in self.iter() {
164 result.push_str(s.as_slice())
170 fn connect(&self, sep: &str) -> String {
172 return String::new();
177 return self.concat();
180 // this is wrong without the guarantee that `self` is non-empty
181 // `len` calculation may overflow but push_str but will check boundaries
182 let len = sep.len() * (self.len() - 1)
183 + self.iter().map(|s| s.as_slice().len()).sum();
184 let mut result = String::with_capacity(len);
185 let mut first = true;
187 for s in self.iter() {
191 result.push_str(sep);
193 result.push_str(s.as_slice());
199 impl<'a, S: Str> StrVector for Vec<S> {
201 fn concat(&self) -> String {
202 self.as_slice().concat()
206 fn connect(&self, sep: &str) -> String {
207 self.as_slice().connect(sep)
215 // Helper functions used for Unicode normalization
216 fn canonical_sort(comb: &mut [(char, u8)]) {
217 let len = comb.len();
218 for i in range(0, len) {
219 let mut swapped = false;
220 for j in range(1, len-i) {
221 let class_a = *comb[j-1].ref1();
222 let class_b = *comb[j].ref1();
223 if class_a != 0 && class_b != 0 && class_a > class_b {
228 if !swapped { break; }
233 enum DecompositionType {
238 /// External iterator for a string's decomposition's characters.
239 /// Use with the `std::iter` module.
241 pub struct Decompositions<'a> {
242 kind: DecompositionType,
244 buffer: Vec<(char, u8)>,
248 impl<'a> Iterator<char> for Decompositions<'a> {
250 fn next(&mut self) -> Option<char> {
251 match self.buffer.as_slice().head() {
254 self.buffer.remove(0);
257 Some(&(c, _)) if self.sorted => {
258 self.buffer.remove(0);
261 _ => self.sorted = false
264 let decomposer = match self.kind {
265 Canonical => unicode::char::decompose_canonical,
266 Compatible => unicode::char::decompose_compatible
270 for ch in self.iter {
271 let buffer = &mut self.buffer;
272 let sorted = &mut self.sorted;
274 let class = unicode::char::canonical_combining_class(d);
275 if class == 0 && !*sorted {
276 canonical_sort(buffer.as_mut_slice());
279 buffer.push((d, class));
286 canonical_sort(self.buffer.as_mut_slice());
290 match self.buffer.remove(0) {
295 Some((c, _)) => Some(c),
300 fn size_hint(&self) -> (uint, Option<uint>) {
301 let (lower, _) = self.iter.size_hint();
307 enum RecompositionState {
313 /// External iterator for a string's recomposition's characters.
314 /// Use with the `std::iter` module.
316 pub struct Recompositions<'a> {
317 iter: Decompositions<'a>,
318 state: RecompositionState,
319 buffer: RingBuf<char>,
320 composee: Option<char>,
324 impl<'a> Iterator<char> for Recompositions<'a> {
326 fn next(&mut self) -> Option<char> {
330 for ch in self.iter {
331 let ch_class = unicode::char::canonical_combining_class(ch);
332 if self.composee.is_none() {
336 self.composee = Some(ch);
339 let k = self.composee.clone().unwrap();
341 match self.last_ccc {
343 match unicode::char::compose(k, ch) {
345 self.composee = Some(r);
350 self.composee = Some(ch);
353 self.buffer.push(ch);
354 self.last_ccc = Some(ch_class);
359 if l_class >= ch_class {
360 // `ch` is blocked from `composee`
362 self.composee = Some(ch);
363 self.last_ccc = None;
364 self.state = Purging;
367 self.buffer.push(ch);
368 self.last_ccc = Some(ch_class);
371 match unicode::char::compose(k, ch) {
373 self.composee = Some(r);
377 self.buffer.push(ch);
378 self.last_ccc = Some(ch_class);
384 self.state = Finished;
385 if self.composee.is_some() {
386 return self.composee.take();
390 match self.buffer.pop_front() {
391 None => self.state = Composing,
396 match self.buffer.pop_front() {
397 None => return self.composee.take(),
406 /// Replace all occurrences of one string with another
410 /// * s - The string containing substrings to replace
411 /// * from - The string to replace
412 /// * to - The replacement string
416 /// The original string with all occurrences of `from` replaced with `to`
422 /// let string = "orange";
423 /// let new_string = str::replace(string, "or", "str");
424 /// assert_eq!(new_string.as_slice(), "strange");
426 pub fn replace(s: &str, from: &str, to: &str) -> String {
427 let mut result = String::new();
428 let mut last_end = 0;
429 for (start, end) in s.match_indices(from) {
430 result.push_str(unsafe{raw::slice_bytes(s, last_end, start)});
434 result.push_str(unsafe{raw::slice_bytes(s, last_end, s.len())});
442 /// Deprecated. Use `String::from_utf16`.
443 #[deprecated = "Replaced by String::from_utf16"]
444 pub fn from_utf16(v: &[u16]) -> Option<String> {
445 String::from_utf16(v)
448 /// Deprecated. Use `String::from_utf16_lossy`.
449 #[deprecated = "Replaced by String::from_utf16_lossy"]
450 pub fn from_utf16_lossy(v: &[u16]) -> String {
451 String::from_utf16_lossy(v)
454 // Return the initial codepoint accumulator for the first byte.
455 // The first byte is special, only want bottom 5 bits for width 2, 4 bits
456 // for width 3, and 3 bits for width 4
457 macro_rules! utf8_first_byte(
458 ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as u32)
461 // return the value of $ch updated with continuation byte $byte
462 macro_rules! utf8_acc_cont_byte(
463 ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as u32)
466 /// Deprecated. Use `String::from_utf8_lossy`.
467 #[deprecated = "Replaced by String::from_utf8_lossy"]
468 pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> MaybeOwned<'a> {
469 String::from_utf8_lossy(v)
476 /// A `MaybeOwned` is a string that can hold either a `String` or a `&str`.
477 /// This can be useful as an optimization when an allocation is sometimes
478 /// needed but not always.
479 pub enum MaybeOwned<'a> {
480 /// A borrowed string
486 /// `SendStr` is a specialization of `MaybeOwned` to be sendable
487 pub type SendStr = MaybeOwned<'static>;
489 impl<'a> MaybeOwned<'a> {
490 /// Returns `true` if this `MaybeOwned` wraps an owned string
495 /// let string = String::from_str("orange");
496 /// let maybe_owned_string = string.into_maybe_owned();
497 /// assert_eq!(true, maybe_owned_string.is_owned());
500 pub fn is_owned(&self) -> bool {
507 /// Returns `true` if this `MaybeOwned` wraps a borrowed string
512 /// let string = "orange";
513 /// let maybe_owned_string = string.as_slice().into_maybe_owned();
514 /// assert_eq!(true, maybe_owned_string.is_slice());
517 pub fn is_slice(&self) -> bool {
525 /// Trait for moving into a `MaybeOwned`
526 pub trait IntoMaybeOwned<'a> {
527 /// Moves self into a `MaybeOwned`
528 fn into_maybe_owned(self) -> MaybeOwned<'a>;
534 /// let owned_string = String::from_str("orange");
535 /// let maybe_owned_string = owned_string.into_maybe_owned();
536 /// assert_eq!(true, maybe_owned_string.is_owned());
538 impl<'a> IntoMaybeOwned<'a> for String {
540 fn into_maybe_owned(self) -> MaybeOwned<'a> {
548 /// let string = "orange";
549 /// let maybe_owned_str = string.as_slice().into_maybe_owned();
550 /// assert_eq!(false, maybe_owned_str.is_owned());
552 impl<'a> IntoMaybeOwned<'a> for &'a str {
554 fn into_maybe_owned(self) -> MaybeOwned<'a> { Slice(self) }
560 /// let str = "orange";
561 /// let maybe_owned_str = str.as_slice().into_maybe_owned();
562 /// let maybe_maybe_owned_str = maybe_owned_str.into_maybe_owned();
563 /// assert_eq!(false, maybe_maybe_owned_str.is_owned());
565 impl<'a> IntoMaybeOwned<'a> for MaybeOwned<'a> {
567 fn into_maybe_owned(self) -> MaybeOwned<'a> { self }
570 impl<'a> PartialEq for MaybeOwned<'a> {
572 fn eq(&self, other: &MaybeOwned) -> bool {
573 self.as_slice() == other.as_slice()
577 impl<'a> Eq for MaybeOwned<'a> {}
579 impl<'a> PartialOrd for MaybeOwned<'a> {
581 fn partial_cmp(&self, other: &MaybeOwned) -> Option<Ordering> {
582 Some(self.cmp(other))
586 impl<'a> Ord for MaybeOwned<'a> {
588 fn cmp(&self, other: &MaybeOwned) -> Ordering {
589 self.as_slice().cmp(&other.as_slice())
593 impl<'a, S: Str> Equiv<S> for MaybeOwned<'a> {
595 fn equiv(&self, other: &S) -> bool {
596 self.as_slice() == other.as_slice()
600 impl<'a> Str for MaybeOwned<'a> {
602 fn as_slice<'b>(&'b self) -> &'b str {
605 Owned(ref s) => s.as_slice()
610 impl<'a> StrAllocating for MaybeOwned<'a> {
612 fn into_string(self) -> String {
614 Slice(s) => String::from_str(s),
620 impl<'a> Collection for MaybeOwned<'a> {
622 fn len(&self) -> uint { self.as_slice().len() }
625 impl<'a> Clone for MaybeOwned<'a> {
627 fn clone(&self) -> MaybeOwned<'a> {
629 Slice(s) => Slice(s),
630 Owned(ref s) => Owned(String::from_str(s.as_slice()))
635 impl<'a> Default for MaybeOwned<'a> {
637 fn default() -> MaybeOwned<'a> { Slice("") }
640 impl<'a, H: hash::Writer> hash::Hash<H> for MaybeOwned<'a> {
642 fn hash(&self, hasher: &mut H) {
643 self.as_slice().hash(hasher)
647 impl<'a> fmt::Show for MaybeOwned<'a> {
649 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
651 Slice(ref s) => s.fmt(f),
652 Owned(ref s) => s.fmt(f)
657 /// Unsafe operations
665 pub use core::str::raw::{from_utf8, c_str_to_static_slice, slice_bytes};
666 pub use core::str::raw::{slice_unchecked};
668 /// Deprecated. Replaced by `string::raw::from_buf_len`
669 #[deprecated = "Use string::raw::from_buf_len"]
670 pub unsafe fn from_buf_len(buf: *const u8, len: uint) -> String {
671 string::raw::from_buf_len(buf, len)
674 /// Deprecated. Use `string::raw::from_buf`
675 #[deprecated = "Use string::raw::from_buf"]
676 pub unsafe fn from_c_str(c_string: *const i8) -> String {
677 string::raw::from_buf(c_string as *const u8)
680 /// Deprecated. Replaced by `string::raw::from_utf8`
681 #[deprecated = "Use string::raw::from_utf8"]
682 pub unsafe fn from_utf8_owned(v: Vec<u8>) -> String {
683 string::raw::from_utf8(v)
686 /// Deprecated. Use `string::raw::from_utf8`
687 #[deprecated = "Use string::raw::from_utf8"]
688 pub unsafe fn from_byte(u: u8) -> String {
689 string::raw::from_utf8(vec![u])
694 Section: Trait implementations
697 /// Any string that can be represented as a slice
698 pub trait StrAllocating: Str {
699 /// Convert `self` into a `String`, not making a copy if possible.
700 fn into_string(self) -> String;
702 #[allow(missing_doc)]
703 #[deprecated = "replaced by .into_string()"]
704 fn into_owned(self) -> String {
708 /// Escape each char in `s` with `char::escape_default`.
709 fn escape_default(&self) -> String {
710 let me = self.as_slice();
711 let mut out = String::with_capacity(me.len());
712 for c in me.chars() {
713 c.escape_default(|c| out.push_char(c));
718 /// Escape each char in `s` with `char::escape_unicode`.
719 fn escape_unicode(&self) -> String {
720 let me = self.as_slice();
721 let mut out = String::with_capacity(me.len());
722 for c in me.chars() {
723 c.escape_unicode(|c| out.push_char(c));
728 /// Replace all occurrences of one string with another.
732 /// * `from` - The string to replace
733 /// * `to` - The replacement string
737 /// The original string with all occurrences of `from` replaced with `to`.
742 /// let s = "Do you know the muffin man,
743 /// The muffin man, the muffin man, ...".to_string();
745 /// assert_eq!(s.replace("muffin man", "little lamb"),
746 /// "Do you know the little lamb,
747 /// The little lamb, the little lamb, ...".to_string());
749 /// // not found, so no change.
750 /// assert_eq!(s.replace("cookie monster", "little lamb"), s);
752 fn replace(&self, from: &str, to: &str) -> String {
753 let me = self.as_slice();
754 let mut result = String::new();
755 let mut last_end = 0;
756 for (start, end) in me.match_indices(from) {
757 result.push_str(unsafe{raw::slice_bytes(me, last_end, start)});
761 result.push_str(unsafe{raw::slice_bytes(me, last_end, me.len())});
765 #[allow(missing_doc)]
766 #[deprecated = "obsolete, use `to_string`"]
768 fn to_owned(&self) -> String {
770 mem::transmute(Vec::from_slice(self.as_slice().as_bytes()))
774 /// Converts to a vector of `u16` encoded as UTF-16.
775 #[deprecated = "use `utf16_units` instead"]
776 fn to_utf16(&self) -> Vec<u16> {
777 self.as_slice().utf16_units().collect::<Vec<u16>>()
780 /// Given a string, make a new string with repeated copies of it.
781 fn repeat(&self, nn: uint) -> String {
782 let me = self.as_slice();
783 let mut ret = String::with_capacity(nn * me.len());
784 for _ in range(0, nn) {
790 /// Levenshtein Distance between two strings.
791 fn lev_distance(&self, t: &str) -> uint {
792 let me = self.as_slice();
796 if slen == 0 { return tlen; }
797 if tlen == 0 { return slen; }
799 let mut dcol = Vec::from_fn(tlen + 1, |x| x);
801 for (i, sc) in me.chars().enumerate() {
804 *dcol.get_mut(0) = current + 1;
806 for (j, tc) in t.chars().enumerate() {
808 let next = dcol[j + 1];
811 *dcol.get_mut(j + 1) = current;
813 *dcol.get_mut(j + 1) = cmp::min(current, next);
814 *dcol.get_mut(j + 1) = cmp::min(dcol[j + 1],
825 /// An Iterator over the string in Unicode Normalization Form D
826 /// (canonical decomposition).
828 fn nfd_chars<'a>(&'a self) -> Decompositions<'a> {
830 iter: self.as_slice().chars(),
837 /// An Iterator over the string in Unicode Normalization Form KD
838 /// (compatibility decomposition).
840 fn nfkd_chars<'a>(&'a self) -> Decompositions<'a> {
842 iter: self.as_slice().chars(),
849 /// An Iterator over the string in Unicode Normalization Form C
850 /// (canonical decomposition followed by canonical composition).
852 fn nfc_chars<'a>(&'a self) -> Recompositions<'a> {
854 iter: self.nfd_chars(),
856 buffer: RingBuf::new(),
862 /// An Iterator over the string in Unicode Normalization Form KC
863 /// (compatibility decomposition followed by canonical composition).
865 fn nfkc_chars<'a>(&'a self) -> Recompositions<'a> {
867 iter: self.nfkd_chars(),
869 buffer: RingBuf::new(),
876 impl<'a> StrAllocating for &'a str {
878 fn into_string(self) -> String {
879 String::from_str(self)
885 use std::iter::AdditiveIterator;
886 use std::iter::range;
887 use std::default::Default;
889 use std::clone::Clone;
890 use std::cmp::{Equal, Greater, Less, Ord, PartialOrd, Equiv};
891 use std::option::{Some, None};
892 use std::ptr::RawPtr;
893 use std::iter::{Iterator, DoubleEndedIterator};
894 use {Collection, MutableSeq};
897 use std::slice::{Vector, ImmutableVector};
901 use unicode::char::UnicodeChar;
905 assert!((eq_slice("foobar".slice(0, 3), "foo")));
906 assert!((eq_slice("barfoo".slice(3, 6), "foo")));
907 assert!((!eq_slice("foo1", "foo2")));
913 assert!("" <= "foo");
914 assert!("foo" <= "foo");
915 assert!("foo" != "bar");
920 assert_eq!("".len(), 0u);
921 assert_eq!("hello world".len(), 11u);
922 assert_eq!("\x63".len(), 1u);
923 assert_eq!("\xa2".len(), 2u);
924 assert_eq!("\u03c0".len(), 2u);
925 assert_eq!("\u2620".len(), 3u);
926 assert_eq!("\U0001d11e".len(), 4u);
928 assert_eq!("".char_len(), 0u);
929 assert_eq!("hello world".char_len(), 11u);
930 assert_eq!("\x63".char_len(), 1u);
931 assert_eq!("\xa2".char_len(), 1u);
932 assert_eq!("\u03c0".char_len(), 1u);
933 assert_eq!("\u2620".char_len(), 1u);
934 assert_eq!("\U0001d11e".char_len(), 1u);
935 assert_eq!("ประเทศไทย中华Việt Nam".char_len(), 19u);
937 assert_eq!("hello".width(false), 10u);
938 assert_eq!("hello".width(true), 10u);
939 assert_eq!("\0\0\0\0\0".width(false), 0u);
940 assert_eq!("\0\0\0\0\0".width(true), 0u);
941 assert_eq!("".width(false), 0u);
942 assert_eq!("".width(true), 0u);
943 assert_eq!("\u2081\u2082\u2083\u2084".width(false), 4u);
944 assert_eq!("\u2081\u2082\u2083\u2084".width(true), 8u);
949 assert_eq!("hello".find('l'), Some(2u));
950 assert_eq!("hello".find(|c:char| c == 'o'), Some(4u));
951 assert!("hello".find('x').is_none());
952 assert!("hello".find(|c:char| c == 'x').is_none());
953 assert_eq!("ประเทศไทย中华Việt Nam".find('华'), Some(30u));
954 assert_eq!("ประเทศไทย中华Việt Nam".find(|c: char| c == '华'), Some(30u));
959 assert_eq!("hello".rfind('l'), Some(3u));
960 assert_eq!("hello".rfind(|c:char| c == 'o'), Some(4u));
961 assert!("hello".rfind('x').is_none());
962 assert!("hello".rfind(|c:char| c == 'x').is_none());
963 assert_eq!("ประเทศไทย中华Việt Nam".rfind('华'), Some(30u));
964 assert_eq!("ประเทศไทย中华Việt Nam".rfind(|c: char| c == '华'), Some(30u));
969 let empty = String::from_str("");
970 let s: String = empty.as_slice().chars().collect();
971 assert_eq!(empty, s);
972 let data = String::from_str("ประเทศไทย中");
973 let s: String = data.as_slice().chars().collect();
978 fn test_into_bytes() {
979 let data = String::from_str("asdf");
980 let buf = data.into_bytes();
981 assert_eq!(b"asdf", buf.as_slice());
987 assert_eq!("".find_str(""), Some(0u));
988 assert!("banana".find_str("apple pie").is_none());
991 assert_eq!(data.slice(0u, 6u).find_str("ab"), Some(0u));
992 assert_eq!(data.slice(2u, 6u).find_str("ab"), Some(3u - 2u));
993 assert!(data.slice(2u, 4u).find_str("ab").is_none());
995 let string = "ประเทศไทย中华Việt Nam";
996 let mut data = String::from_str(string);
997 data.push_str(string);
998 assert!(data.as_slice().find_str("ไท华").is_none());
999 assert_eq!(data.as_slice().slice(0u, 43u).find_str(""), Some(0u));
1000 assert_eq!(data.as_slice().slice(6u, 43u).find_str(""), Some(6u - 6u));
1002 assert_eq!(data.as_slice().slice(0u, 43u).find_str("ประ"), Some( 0u));
1003 assert_eq!(data.as_slice().slice(0u, 43u).find_str("ทศไ"), Some(12u));
1004 assert_eq!(data.as_slice().slice(0u, 43u).find_str("ย中"), Some(24u));
1005 assert_eq!(data.as_slice().slice(0u, 43u).find_str("iệt"), Some(34u));
1006 assert_eq!(data.as_slice().slice(0u, 43u).find_str("Nam"), Some(40u));
1008 assert_eq!(data.as_slice().slice(43u, 86u).find_str("ประ"), Some(43u - 43u));
1009 assert_eq!(data.as_slice().slice(43u, 86u).find_str("ทศไ"), Some(55u - 43u));
1010 assert_eq!(data.as_slice().slice(43u, 86u).find_str("ย中"), Some(67u - 43u));
1011 assert_eq!(data.as_slice().slice(43u, 86u).find_str("iệt"), Some(77u - 43u));
1012 assert_eq!(data.as_slice().slice(43u, 86u).find_str("Nam"), Some(83u - 43u));
1016 fn test_slice_chars() {
1017 fn t(a: &str, b: &str, start: uint) {
1018 assert_eq!(a.slice_chars(start, start + b.char_len()), b);
1021 t("hello", "llo", 2);
1022 t("hello", "el", 1);
1025 assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".slice_chars(2, 8));
1030 fn t(v: &[String], s: &str) {
1031 assert_eq!(v.concat().as_slice(), s);
1033 t([String::from_str("you"), String::from_str("know"),
1034 String::from_str("I'm"),
1035 String::from_str("no"), String::from_str("good")],
1036 "youknowI'mnogood");
1037 let v: &[String] = [];
1039 t([String::from_str("hi")], "hi");
1044 fn t(v: &[String], sep: &str, s: &str) {
1045 assert_eq!(v.connect(sep).as_slice(), s);
1047 t([String::from_str("you"), String::from_str("know"),
1048 String::from_str("I'm"),
1049 String::from_str("no"), String::from_str("good")],
1050 " ", "you know I'm no good");
1051 let v: &[String] = [];
1053 t([String::from_str("hi")], " ", "hi");
1057 fn test_concat_slices() {
1058 fn t(v: &[&str], s: &str) {
1059 assert_eq!(v.concat().as_slice(), s);
1061 t(["you", "know", "I'm", "no", "good"], "youknowI'mnogood");
1062 let v: &[&str] = [];
1068 fn test_connect_slices() {
1069 fn t(v: &[&str], sep: &str, s: &str) {
1070 assert_eq!(v.connect(sep).as_slice(), s);
1072 t(["you", "know", "I'm", "no", "good"],
1073 " ", "you know I'm no good");
1075 t(["hi"], " ", "hi");
1080 assert_eq!("x".repeat(4), String::from_str("xxxx"));
1081 assert_eq!("hi".repeat(4), String::from_str("hihihihi"));
1082 assert_eq!("ไท华".repeat(3), String::from_str("ไท华ไท华ไท华"));
1083 assert_eq!("".repeat(4), String::from_str(""));
1084 assert_eq!("hi".repeat(0), String::from_str(""));
1088 fn test_unsafe_slice() {
1089 assert_eq!("ab", unsafe {raw::slice_bytes("abc", 0, 2)});
1090 assert_eq!("bc", unsafe {raw::slice_bytes("abc", 1, 3)});
1091 assert_eq!("", unsafe {raw::slice_bytes("abc", 1, 1)});
1092 fn a_million_letter_a() -> String {
1094 let mut rs = String::new();
1096 rs.push_str("aaaaaaaaaa");
1101 fn half_a_million_letter_a() -> String {
1103 let mut rs = String::new();
1105 rs.push_str("aaaaa");
1110 let letters = a_million_letter_a();
1111 assert!(half_a_million_letter_a() ==
1112 unsafe {String::from_str(raw::slice_bytes(letters.as_slice(),
1118 fn test_starts_with() {
1119 assert!(("".starts_with("")));
1120 assert!(("abc".starts_with("")));
1121 assert!(("abc".starts_with("a")));
1122 assert!((!"a".starts_with("abc")));
1123 assert!((!"".starts_with("abc")));
1124 assert!((!"ödd".starts_with("-")));
1125 assert!(("ödd".starts_with("öd")));
1129 fn test_ends_with() {
1130 assert!(("".ends_with("")));
1131 assert!(("abc".ends_with("")));
1132 assert!(("abc".ends_with("c")));
1133 assert!((!"a".ends_with("abc")));
1134 assert!((!"".ends_with("abc")));
1135 assert!((!"ddö".ends_with("-")));
1136 assert!(("ddö".ends_with("dö")));
1140 fn test_is_empty() {
1141 assert!("".is_empty());
1142 assert!(!"a".is_empty());
1148 assert_eq!("".replace(a, "b"), String::from_str(""));
1149 assert_eq!("a".replace(a, "b"), String::from_str("b"));
1150 assert_eq!("ab".replace(a, "b"), String::from_str("bb"));
1152 assert!(" test test ".replace(test, "toast") ==
1153 String::from_str(" toast toast "));
1154 assert_eq!(" test test ".replace(test, ""), String::from_str(" "));
1158 fn test_replace_2a() {
1159 let data = "ประเทศไทย中华";
1160 let repl = "دولة الكويت";
1163 let a2 = "دولة الكويتทศไทย中华";
1164 assert_eq!(data.replace(a, repl).as_slice(), a2);
1168 fn test_replace_2b() {
1169 let data = "ประเทศไทย中华";
1170 let repl = "دولة الكويت";
1173 let b2 = "ปรدولة الكويتทศไทย中华";
1174 assert_eq!(data.replace(b, repl).as_slice(), b2);
1178 fn test_replace_2c() {
1179 let data = "ประเทศไทย中华";
1180 let repl = "دولة الكويت";
1183 let c2 = "ประเทศไทยدولة الكويت";
1184 assert_eq!(data.replace(c, repl).as_slice(), c2);
1188 fn test_replace_2d() {
1189 let data = "ประเทศไทย中华";
1190 let repl = "دولة الكويت";
1193 assert_eq!(data.replace(d, repl).as_slice(), data);
1198 assert_eq!("ab", "abc".slice(0, 2));
1199 assert_eq!("bc", "abc".slice(1, 3));
1200 assert_eq!("", "abc".slice(1, 1));
1201 assert_eq!("\u65e5", "\u65e5\u672c".slice(0, 3));
1203 let data = "ประเทศไทย中华";
1204 assert_eq!("ป", data.slice(0, 3));
1205 assert_eq!("ร", data.slice(3, 6));
1206 assert_eq!("", data.slice(3, 3));
1207 assert_eq!("华", data.slice(30, 33));
1209 fn a_million_letter_x() -> String {
1211 let mut rs = String::new();
1213 rs.push_str("华华华华华华华华华华");
1218 fn half_a_million_letter_x() -> String {
1220 let mut rs = String::new();
1222 rs.push_str("华华华华华");
1227 let letters = a_million_letter_x();
1228 assert!(half_a_million_letter_x() ==
1229 String::from_str(letters.as_slice().slice(0u, 3u * 500000u)));
1234 let ss = "中华Việt Nam";
1236 assert_eq!("华", ss.slice(3u, 6u));
1237 assert_eq!("Việt Nam", ss.slice(6u, 16u));
1239 assert_eq!("ab", "abc".slice(0u, 2u));
1240 assert_eq!("bc", "abc".slice(1u, 3u));
1241 assert_eq!("", "abc".slice(1u, 1u));
1243 assert_eq!("中", ss.slice(0u, 3u));
1244 assert_eq!("华V", ss.slice(3u, 7u));
1245 assert_eq!("", ss.slice(3u, 3u));
1260 fn test_slice_fail() {
1261 "中华Việt Nam".slice(0u, 2u);
1265 fn test_slice_from() {
1266 assert_eq!("abcd".slice_from(0), "abcd");
1267 assert_eq!("abcd".slice_from(2), "cd");
1268 assert_eq!("abcd".slice_from(4), "");
1271 fn test_slice_to() {
1272 assert_eq!("abcd".slice_to(0), "");
1273 assert_eq!("abcd".slice_to(2), "ab");
1274 assert_eq!("abcd".slice_to(4), "abcd");
1278 fn test_trim_left_chars() {
1279 let v: &[char] = &[];
1280 assert_eq!(" *** foo *** ".trim_left_chars(v), " *** foo *** ");
1281 assert_eq!(" *** foo *** ".trim_left_chars(&['*', ' ']), "foo *** ");
1282 assert_eq!(" *** *** ".trim_left_chars(&['*', ' ']), "");
1283 assert_eq!("foo *** ".trim_left_chars(&['*', ' ']), "foo *** ");
1285 assert_eq!("11foo1bar11".trim_left_chars('1'), "foo1bar11");
1286 assert_eq!("12foo1bar12".trim_left_chars(&['1', '2']), "foo1bar12");
1287 assert_eq!("123foo1bar123".trim_left_chars(|c: char| c.is_digit()), "foo1bar123");
1291 fn test_trim_right_chars() {
1292 let v: &[char] = &[];
1293 assert_eq!(" *** foo *** ".trim_right_chars(v), " *** foo *** ");
1294 assert_eq!(" *** foo *** ".trim_right_chars(&['*', ' ']), " *** foo");
1295 assert_eq!(" *** *** ".trim_right_chars(&['*', ' ']), "");
1296 assert_eq!(" *** foo".trim_right_chars(&['*', ' ']), " *** foo");
1298 assert_eq!("11foo1bar11".trim_right_chars('1'), "11foo1bar");
1299 assert_eq!("12foo1bar12".trim_right_chars(&['1', '2']), "12foo1bar");
1300 assert_eq!("123foo1bar123".trim_right_chars(|c: char| c.is_digit()), "123foo1bar");
1304 fn test_trim_chars() {
1305 let v: &[char] = &[];
1306 assert_eq!(" *** foo *** ".trim_chars(v), " *** foo *** ");
1307 assert_eq!(" *** foo *** ".trim_chars(&['*', ' ']), "foo");
1308 assert_eq!(" *** *** ".trim_chars(&['*', ' ']), "");
1309 assert_eq!("foo".trim_chars(&['*', ' ']), "foo");
1311 assert_eq!("11foo1bar11".trim_chars('1'), "foo1bar");
1312 assert_eq!("12foo1bar12".trim_chars(&['1', '2']), "foo1bar");
1313 assert_eq!("123foo1bar123".trim_chars(|c: char| c.is_digit()), "foo1bar");
1317 fn test_trim_left() {
1318 assert_eq!("".trim_left(), "");
1319 assert_eq!("a".trim_left(), "a");
1320 assert_eq!(" ".trim_left(), "");
1321 assert_eq!(" blah".trim_left(), "blah");
1322 assert_eq!(" \u3000 wut".trim_left(), "wut");
1323 assert_eq!("hey ".trim_left(), "hey ");
1327 fn test_trim_right() {
1328 assert_eq!("".trim_right(), "");
1329 assert_eq!("a".trim_right(), "a");
1330 assert_eq!(" ".trim_right(), "");
1331 assert_eq!("blah ".trim_right(), "blah");
1332 assert_eq!("wut \u3000 ".trim_right(), "wut");
1333 assert_eq!(" hey".trim_right(), " hey");
1338 assert_eq!("".trim(), "");
1339 assert_eq!("a".trim(), "a");
1340 assert_eq!(" ".trim(), "");
1341 assert_eq!(" blah ".trim(), "blah");
1342 assert_eq!("\nwut \u3000 ".trim(), "wut");
1343 assert_eq!(" hey dude ".trim(), "hey dude");
1347 fn test_is_whitespace() {
1348 assert!("".is_whitespace());
1349 assert!(" ".is_whitespace());
1350 assert!("\u2009".is_whitespace()); // Thin space
1351 assert!(" \n\t ".is_whitespace());
1352 assert!(!" _ ".is_whitespace());
1356 fn test_slice_shift_char() {
1357 let data = "ประเทศไทย中";
1358 assert_eq!(data.slice_shift_char(), (Some('ป'), "ระเทศไทย中"));
1362 fn test_slice_shift_char_2() {
1364 assert_eq!(empty.slice_shift_char(), (None, ""));
1369 // deny overlong encodings
1370 assert!(!is_utf8([0xc0, 0x80]));
1371 assert!(!is_utf8([0xc0, 0xae]));
1372 assert!(!is_utf8([0xe0, 0x80, 0x80]));
1373 assert!(!is_utf8([0xe0, 0x80, 0xaf]));
1374 assert!(!is_utf8([0xe0, 0x81, 0x81]));
1375 assert!(!is_utf8([0xf0, 0x82, 0x82, 0xac]));
1376 assert!(!is_utf8([0xf4, 0x90, 0x80, 0x80]));
1379 assert!(!is_utf8([0xED, 0xA0, 0x80]));
1380 assert!(!is_utf8([0xED, 0xBF, 0xBF]));
1382 assert!(is_utf8([0xC2, 0x80]));
1383 assert!(is_utf8([0xDF, 0xBF]));
1384 assert!(is_utf8([0xE0, 0xA0, 0x80]));
1385 assert!(is_utf8([0xED, 0x9F, 0xBF]));
1386 assert!(is_utf8([0xEE, 0x80, 0x80]));
1387 assert!(is_utf8([0xEF, 0xBF, 0xBF]));
1388 assert!(is_utf8([0xF0, 0x90, 0x80, 0x80]));
1389 assert!(is_utf8([0xF4, 0x8F, 0xBF, 0xBF]));
1393 fn test_is_utf16() {
1394 macro_rules! pos ( ($($e:expr),*) => { { $(assert!(is_utf16($e));)* } });
1402 // surrogate pairs (randomly generated with Python 3's
1403 // .encode('utf-16be'))
1404 pos!([0xdb54, 0xdf16, 0xd880, 0xdee0, 0xdb6a, 0xdd45],
1405 [0xd91f, 0xdeb1, 0xdb31, 0xdd84, 0xd8e2, 0xde14],
1406 [0xdb9f, 0xdc26, 0xdb6f, 0xde58, 0xd850, 0xdfae]);
1408 // mixtures (also random)
1409 pos!([0xd921, 0xdcc2, 0x002d, 0x004d, 0xdb32, 0xdf65],
1410 [0xdb45, 0xdd2d, 0x006a, 0xdacd, 0xddfe, 0x0006],
1411 [0x0067, 0xd8ff, 0xddb7, 0x000f, 0xd900, 0xdc80]);
1414 macro_rules! neg ( ($($e:expr),*) => { { $(assert!(!is_utf16($e));)* } });
1417 // surrogate + regular unit
1419 // surrogate + lead surrogate
1421 // unterminated surrogate
1423 // trail surrogate without a lead
1426 // random byte sequences that Python 3's .decode('utf-16be')
1428 neg!([0x5b3d, 0x0141, 0xde9e, 0x8fdc, 0xc6e7],
1429 [0xdf5a, 0x82a5, 0x62b9, 0xb447, 0x92f3],
1430 [0xda4e, 0x42bc, 0x4462, 0xee98, 0xc2ca],
1431 [0xbe00, 0xb04a, 0x6ecb, 0xdd89, 0xe278],
1432 [0x0465, 0xab56, 0xdbb6, 0xa893, 0x665e],
1433 [0x6b7f, 0x0a19, 0x40f4, 0xa657, 0xdcc5],
1434 [0x9b50, 0xda5e, 0x24ec, 0x03ad, 0x6dee],
1435 [0x8d17, 0xcaa7, 0xf4ae, 0xdf6e, 0xbed7],
1436 [0xdaee, 0x2584, 0x7d30, 0xa626, 0x121a],
1437 [0xd956, 0x4b43, 0x7570, 0xccd6, 0x4f4a],
1438 [0x9dcf, 0x1b49, 0x4ba5, 0xfce9, 0xdffe],
1439 [0x6572, 0xce53, 0xb05a, 0xf6af, 0xdacf],
1440 [0x1b90, 0x728c, 0x9906, 0xdb68, 0xf46e],
1441 [0x1606, 0xbeca, 0xbe76, 0x860f, 0xdfa5],
1442 [0x8b4f, 0xde7a, 0xd220, 0x9fac, 0x2b6f],
1443 [0xb8fe, 0xebbe, 0xda32, 0x1a5f, 0x8b8b],
1444 [0x934b, 0x8956, 0xc434, 0x1881, 0xddf7],
1445 [0x5a95, 0x13fc, 0xf116, 0xd89b, 0x93f9],
1446 [0xd640, 0x71f1, 0xdd7d, 0x77eb, 0x1cd8],
1447 [0x348b, 0xaef0, 0xdb2c, 0xebf1, 0x1282],
1448 [0x50d7, 0xd824, 0x5010, 0xb369, 0x22ea]);
1452 fn test_as_bytes() {
1455 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1456 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1459 assert_eq!("".as_bytes(), &[]);
1460 assert_eq!("abc".as_bytes(), b"abc");
1461 assert_eq!("ศไทย中华Việt Nam".as_bytes(), v.as_slice());
1466 fn test_as_bytes_fail() {
1467 // Don't double free. (I'm not sure if this exercises the
1468 // original problem code path anymore.)
1469 let s = String::from_str("");
1470 let _bytes = s.as_bytes();
1476 let buf = "hello".as_ptr();
1478 assert_eq!(*buf.offset(0), b'h');
1479 assert_eq!(*buf.offset(1), b'e');
1480 assert_eq!(*buf.offset(2), b'l');
1481 assert_eq!(*buf.offset(3), b'l');
1482 assert_eq!(*buf.offset(4), b'o');
1487 fn test_subslice_offset() {
1488 let a = "kernelsprite";
1489 let b = a.slice(7, a.len());
1490 let c = a.slice(0, a.len() - 6);
1491 assert_eq!(a.subslice_offset(b), 7);
1492 assert_eq!(a.subslice_offset(c), 0);
1494 let string = "a\nb\nc";
1495 let lines: Vec<&str> = string.lines().collect();
1496 let lines = lines.as_slice();
1497 assert_eq!(string.subslice_offset(lines[0]), 0);
1498 assert_eq!(string.subslice_offset(lines[1]), 2);
1499 assert_eq!(string.subslice_offset(lines[2]), 4);
1504 fn test_subslice_offset_2() {
1505 let a = "alchemiter";
1506 let b = "cruxtruder";
1507 a.subslice_offset(b);
1511 fn vec_str_conversions() {
1512 let s1: String = String::from_str("All mimsy were the borogoves");
1514 let v: Vec<u8> = Vec::from_slice(s1.as_bytes());
1515 let s2: String = String::from_str(from_utf8(v.as_slice()).unwrap());
1516 let mut i: uint = 0u;
1517 let n1: uint = s1.len();
1518 let n2: uint = v.len();
1521 let a: u8 = s1.as_bytes()[i];
1522 let b: u8 = s2.as_bytes()[i];
1531 fn test_contains() {
1532 assert!("abcde".contains("bcd"));
1533 assert!("abcde".contains("abcd"));
1534 assert!("abcde".contains("bcde"));
1535 assert!("abcde".contains(""));
1536 assert!("".contains(""));
1537 assert!(!"abcde".contains("def"));
1538 assert!(!"".contains("a"));
1540 let data = "ประเทศไทย中华Việt Nam";
1541 assert!(data.contains("ประเ"));
1542 assert!(data.contains("ะเ"));
1543 assert!(data.contains("中华"));
1544 assert!(!data.contains("ไท华"));
1548 fn test_contains_char() {
1549 assert!("abc".contains_char('b'));
1550 assert!("a".contains_char('a'));
1551 assert!(!"abc".contains_char('d'));
1552 assert!(!"".contains_char('a'));
1556 fn test_truncate_utf16_at_nul() {
1558 assert_eq!(truncate_utf16_at_nul(v), &[]);
1561 assert_eq!(truncate_utf16_at_nul(v), &[]);
1564 assert_eq!(truncate_utf16_at_nul(v), &[1]);
1567 assert_eq!(truncate_utf16_at_nul(v), &[1, 2]);
1570 assert_eq!(truncate_utf16_at_nul(v), &[1, 2, 3]);
1575 let s = "ศไทย中华Việt Nam";
1576 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1578 for ch in v.iter() {
1579 assert!(s.char_at(pos) == *ch);
1580 pos += String::from_char(1, *ch).len();
1585 fn test_char_at_reverse() {
1586 let s = "ศไทย中华Việt Nam";
1587 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1588 let mut pos = s.len();
1589 for ch in v.iter().rev() {
1590 assert!(s.char_at_reverse(pos) == *ch);
1591 pos -= String::from_char(1, *ch).len();
1596 fn test_escape_unicode() {
1597 assert_eq!("abc".escape_unicode(), String::from_str("\\x61\\x62\\x63"));
1598 assert_eq!("a c".escape_unicode(), String::from_str("\\x61\\x20\\x63"));
1599 assert_eq!("\r\n\t".escape_unicode(), String::from_str("\\x0d\\x0a\\x09"));
1600 assert_eq!("'\"\\".escape_unicode(), String::from_str("\\x27\\x22\\x5c"));
1601 assert_eq!("\x00\x01\xfe\xff".escape_unicode(), String::from_str("\\x00\\x01\\xfe\\xff"));
1602 assert_eq!("\u0100\uffff".escape_unicode(), String::from_str("\\u0100\\uffff"));
1603 assert_eq!("\U00010000\U0010ffff".escape_unicode(),
1604 String::from_str("\\U00010000\\U0010ffff"));
1605 assert_eq!("ab\ufb00".escape_unicode(), String::from_str("\\x61\\x62\\ufb00"));
1606 assert_eq!("\U0001d4ea\r".escape_unicode(), String::from_str("\\U0001d4ea\\x0d"));
1610 fn test_escape_default() {
1611 assert_eq!("abc".escape_default(), String::from_str("abc"));
1612 assert_eq!("a c".escape_default(), String::from_str("a c"));
1613 assert_eq!("\r\n\t".escape_default(), String::from_str("\\r\\n\\t"));
1614 assert_eq!("'\"\\".escape_default(), String::from_str("\\'\\\"\\\\"));
1615 assert_eq!("\u0100\uffff".escape_default(), String::from_str("\\u0100\\uffff"));
1616 assert_eq!("\U00010000\U0010ffff".escape_default(),
1617 String::from_str("\\U00010000\\U0010ffff"));
1618 assert_eq!("ab\ufb00".escape_default(), String::from_str("ab\\ufb00"));
1619 assert_eq!("\U0001d4ea\r".escape_default(), String::from_str("\\U0001d4ea\\r"));
1623 fn test_total_ord() {
1624 "1234".cmp(&("123")) == Greater;
1625 "123".cmp(&("1234")) == Less;
1626 "1234".cmp(&("1234")) == Equal;
1627 "12345555".cmp(&("123456")) == Less;
1628 "22".cmp(&("1234")) == Greater;
1632 fn test_char_range_at() {
1633 let data = "b¢€𤭢𤭢€¢b";
1634 assert_eq!('b', data.char_range_at(0).ch);
1635 assert_eq!('¢', data.char_range_at(1).ch);
1636 assert_eq!('€', data.char_range_at(3).ch);
1637 assert_eq!('𤭢', data.char_range_at(6).ch);
1638 assert_eq!('𤭢', data.char_range_at(10).ch);
1639 assert_eq!('€', data.char_range_at(14).ch);
1640 assert_eq!('¢', data.char_range_at(17).ch);
1641 assert_eq!('b', data.char_range_at(19).ch);
1645 fn test_char_range_at_reverse_underflow() {
1646 assert_eq!("abc".char_range_at_reverse(0).next, 0);
1650 fn test_iterator() {
1651 let s = "ศไทย中华Việt Nam";
1652 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1655 let mut it = s.chars();
1658 assert_eq!(c, v[pos]);
1661 assert_eq!(pos, v.len());
1665 fn test_rev_iterator() {
1666 let s = "ศไทย中华Việt Nam";
1667 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
1670 let mut it = s.chars().rev();
1673 assert_eq!(c, v[pos]);
1676 assert_eq!(pos, v.len());
1680 fn test_chars_decoding() {
1681 let mut bytes = [0u8, ..4];
1682 for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
1683 let len = c.encode_utf8(bytes);
1684 let s = ::core::str::from_utf8(bytes.slice_to(len)).unwrap();
1685 if Some(c) != s.chars().next() {
1686 fail!("character {:x}={} does not decode correctly", c as u32, c);
1692 fn test_chars_rev_decoding() {
1693 let mut bytes = [0u8, ..4];
1694 for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
1695 let len = c.encode_utf8(bytes);
1696 let s = ::core::str::from_utf8(bytes.slice_to(len)).unwrap();
1697 if Some(c) != s.chars().rev().next() {
1698 fail!("character {:x}={} does not decode correctly", c as u32, c);
1704 fn test_iterator_clone() {
1705 let s = "ศไทย中华Việt Nam";
1706 let mut it = s.chars();
1708 assert!(it.zip(it.clone()).all(|(x,y)| x == y));
1712 fn test_bytesator() {
1713 let s = "ศไทย中华Việt Nam";
1715 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1716 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1721 for b in s.bytes() {
1722 assert_eq!(b, v[pos]);
1728 fn test_bytes_revator() {
1729 let s = "ศไทย中华Việt Nam";
1731 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1732 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1735 let mut pos = v.len();
1737 for b in s.bytes().rev() {
1739 assert_eq!(b, v[pos]);
1744 fn test_char_indicesator() {
1745 let s = "ศไทย中华Việt Nam";
1746 let p = [0, 3, 6, 9, 12, 15, 18, 19, 20, 23, 24, 25, 26, 27];
1747 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1750 let mut it = s.char_indices();
1753 assert_eq!(c, (p[pos], v[pos]));
1756 assert_eq!(pos, v.len());
1757 assert_eq!(pos, p.len());
1761 fn test_char_indices_revator() {
1762 let s = "ศไทย中华Việt Nam";
1763 let p = [27, 26, 25, 24, 23, 20, 19, 18, 15, 12, 9, 6, 3, 0];
1764 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
1767 let mut it = s.char_indices().rev();
1770 assert_eq!(c, (p[pos], v[pos]));
1773 assert_eq!(pos, v.len());
1774 assert_eq!(pos, p.len());
1778 fn test_split_char_iterator() {
1779 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1781 let split: Vec<&str> = data.split(' ').collect();
1782 assert_eq!( split, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1784 let mut rsplit: Vec<&str> = data.split(' ').rev().collect();
1786 assert_eq!(rsplit, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1788 let split: Vec<&str> = data.split(|c: char| c == ' ').collect();
1789 assert_eq!( split, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1791 let mut rsplit: Vec<&str> = data.split(|c: char| c == ' ').rev().collect();
1793 assert_eq!(rsplit, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1796 let split: Vec<&str> = data.split('ä').collect();
1797 assert_eq!( split, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1799 let mut rsplit: Vec<&str> = data.split('ä').rev().collect();
1801 assert_eq!(rsplit, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1803 let split: Vec<&str> = data.split(|c: char| c == 'ä').collect();
1804 assert_eq!( split, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1806 let mut rsplit: Vec<&str> = data.split(|c: char| c == 'ä').rev().collect();
1808 assert_eq!(rsplit, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1812 fn test_splitn_char_iterator() {
1813 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1815 let split: Vec<&str> = data.splitn(' ', 3).collect();
1816 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
1818 let split: Vec<&str> = data.splitn(|c: char| c == ' ', 3).collect();
1819 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
1822 let split: Vec<&str> = data.splitn('ä', 3).collect();
1823 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
1825 let split: Vec<&str> = data.splitn(|c: char| c == 'ä', 3).collect();
1826 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
1830 fn test_rsplitn_char_iterator() {
1831 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1833 let mut split: Vec<&str> = data.rsplitn(' ', 3).collect();
1835 assert_eq!(split, vec!["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]);
1837 let mut split: Vec<&str> = data.rsplitn(|c: char| c == ' ', 3).collect();
1839 assert_eq!(split, vec!["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]);
1842 let mut split: Vec<&str> = data.rsplitn('ä', 3).collect();
1844 assert_eq!(split, vec!["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]);
1846 let mut split: Vec<&str> = data.rsplitn(|c: char| c == 'ä', 3).collect();
1848 assert_eq!(split, vec!["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]);
1852 fn test_split_char_iterator_no_trailing() {
1853 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1855 let split: Vec<&str> = data.split('\n').collect();
1856 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
1858 let split: Vec<&str> = data.split_terminator('\n').collect();
1859 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
1863 fn test_rev_split_char_iterator_no_trailing() {
1864 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1866 let mut split: Vec<&str> = data.split('\n').rev().collect();
1868 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
1870 let mut split: Vec<&str> = data.split_terminator('\n').rev().collect();
1872 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
1877 let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n";
1878 let words: Vec<&str> = data.words().collect();
1879 assert_eq!(words, vec!["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
1883 fn test_nfd_chars() {
1885 ($input: expr, $expected: expr) => {
1886 assert_eq!($input.nfd_chars().collect::<String>(), $expected.into_string());
1890 t!("\u1e0b\u01c4", "d\u0307\u01c4");
1891 t!("\u2026", "\u2026");
1892 t!("\u2126", "\u03a9");
1893 t!("\u1e0b\u0323", "d\u0323\u0307");
1894 t!("\u1e0d\u0307", "d\u0323\u0307");
1895 t!("a\u0301", "a\u0301");
1896 t!("\u0301a", "\u0301a");
1897 t!("\ud4db", "\u1111\u1171\u11b6");
1898 t!("\uac1c", "\u1100\u1162");
1902 fn test_nfkd_chars() {
1904 ($input: expr, $expected: expr) => {
1905 assert_eq!($input.nfkd_chars().collect::<String>(), $expected.into_string());
1909 t!("\u1e0b\u01c4", "d\u0307DZ\u030c");
1910 t!("\u2026", "...");
1911 t!("\u2126", "\u03a9");
1912 t!("\u1e0b\u0323", "d\u0323\u0307");
1913 t!("\u1e0d\u0307", "d\u0323\u0307");
1914 t!("a\u0301", "a\u0301");
1915 t!("\u0301a", "\u0301a");
1916 t!("\ud4db", "\u1111\u1171\u11b6");
1917 t!("\uac1c", "\u1100\u1162");
1921 fn test_nfc_chars() {
1923 ($input: expr, $expected: expr) => {
1924 assert_eq!($input.nfc_chars().collect::<String>(), $expected.into_string());
1928 t!("\u1e0b\u01c4", "\u1e0b\u01c4");
1929 t!("\u2026", "\u2026");
1930 t!("\u2126", "\u03a9");
1931 t!("\u1e0b\u0323", "\u1e0d\u0307");
1932 t!("\u1e0d\u0307", "\u1e0d\u0307");
1933 t!("a\u0301", "\xe1");
1934 t!("\u0301a", "\u0301a");
1935 t!("\ud4db", "\ud4db");
1936 t!("\uac1c", "\uac1c");
1937 t!("a\u0300\u0305\u0315\u05aeb", "\xe0\u05ae\u0305\u0315b");
1941 fn test_nfkc_chars() {
1943 ($input: expr, $expected: expr) => {
1944 assert_eq!($input.nfkc_chars().collect::<String>(), $expected.into_string());
1948 t!("\u1e0b\u01c4", "\u1e0bD\u017d");
1949 t!("\u2026", "...");
1950 t!("\u2126", "\u03a9");
1951 t!("\u1e0b\u0323", "\u1e0d\u0307");
1952 t!("\u1e0d\u0307", "\u1e0d\u0307");
1953 t!("a\u0301", "\xe1");
1954 t!("\u0301a", "\u0301a");
1955 t!("\ud4db", "\ud4db");
1956 t!("\uac1c", "\uac1c");
1957 t!("a\u0300\u0305\u0315\u05aeb", "\xe0\u05ae\u0305\u0315b");
1962 let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
1963 let lines: Vec<&str> = data.lines().collect();
1964 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
1966 let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
1967 let lines: Vec<&str> = data.lines().collect();
1968 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
1972 fn test_graphemes() {
1973 use std::iter::order;
1974 // official Unicode test data
1975 // from http://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt
1977 ("\u0020\u0020", &["\u0020", "\u0020"]), ("\u0020\u0308\u0020", &["\u0020\u0308",
1978 "\u0020"]), ("\u0020\u000D", &["\u0020", "\u000D"]), ("\u0020\u0308\u000D",
1979 &["\u0020\u0308", "\u000D"]), ("\u0020\u000A", &["\u0020", "\u000A"]),
1980 ("\u0020\u0308\u000A", &["\u0020\u0308", "\u000A"]), ("\u0020\u0001", &["\u0020",
1981 "\u0001"]), ("\u0020\u0308\u0001", &["\u0020\u0308", "\u0001"]), ("\u0020\u0300",
1982 &["\u0020\u0300"]), ("\u0020\u0308\u0300", &["\u0020\u0308\u0300"]), ("\u0020\u1100",
1983 &["\u0020", "\u1100"]), ("\u0020\u0308\u1100", &["\u0020\u0308", "\u1100"]),
1984 ("\u0020\u1160", &["\u0020", "\u1160"]), ("\u0020\u0308\u1160", &["\u0020\u0308",
1985 "\u1160"]), ("\u0020\u11A8", &["\u0020", "\u11A8"]), ("\u0020\u0308\u11A8",
1986 &["\u0020\u0308", "\u11A8"]), ("\u0020\uAC00", &["\u0020", "\uAC00"]),
1987 ("\u0020\u0308\uAC00", &["\u0020\u0308", "\uAC00"]), ("\u0020\uAC01", &["\u0020",
1988 "\uAC01"]), ("\u0020\u0308\uAC01", &["\u0020\u0308", "\uAC01"]), ("\u0020\U0001F1E6",
1989 &["\u0020", "\U0001F1E6"]), ("\u0020\u0308\U0001F1E6", &["\u0020\u0308",
1990 "\U0001F1E6"]), ("\u0020\u0378", &["\u0020", "\u0378"]), ("\u0020\u0308\u0378",
1991 &["\u0020\u0308", "\u0378"]), ("\u000D\u0020", &["\u000D", "\u0020"]),
1992 ("\u000D\u0308\u0020", &["\u000D", "\u0308", "\u0020"]), ("\u000D\u000D", &["\u000D",
1993 "\u000D"]), ("\u000D\u0308\u000D", &["\u000D", "\u0308", "\u000D"]), ("\u000D\u000A",
1994 &["\u000D\u000A"]), ("\u000D\u0308\u000A", &["\u000D", "\u0308", "\u000A"]),
1995 ("\u000D\u0001", &["\u000D", "\u0001"]), ("\u000D\u0308\u0001", &["\u000D", "\u0308",
1996 "\u0001"]), ("\u000D\u0300", &["\u000D", "\u0300"]), ("\u000D\u0308\u0300",
1997 &["\u000D", "\u0308\u0300"]), ("\u000D\u0903", &["\u000D", "\u0903"]),
1998 ("\u000D\u1100", &["\u000D", "\u1100"]), ("\u000D\u0308\u1100", &["\u000D", "\u0308",
1999 "\u1100"]), ("\u000D\u1160", &["\u000D", "\u1160"]), ("\u000D\u0308\u1160",
2000 &["\u000D", "\u0308", "\u1160"]), ("\u000D\u11A8", &["\u000D", "\u11A8"]),
2001 ("\u000D\u0308\u11A8", &["\u000D", "\u0308", "\u11A8"]), ("\u000D\uAC00", &["\u000D",
2002 "\uAC00"]), ("\u000D\u0308\uAC00", &["\u000D", "\u0308", "\uAC00"]), ("\u000D\uAC01",
2003 &["\u000D", "\uAC01"]), ("\u000D\u0308\uAC01", &["\u000D", "\u0308", "\uAC01"]),
2004 ("\u000D\U0001F1E6", &["\u000D", "\U0001F1E6"]), ("\u000D\u0308\U0001F1E6",
2005 &["\u000D", "\u0308", "\U0001F1E6"]), ("\u000D\u0378", &["\u000D", "\u0378"]),
2006 ("\u000D\u0308\u0378", &["\u000D", "\u0308", "\u0378"]), ("\u000A\u0020", &["\u000A",
2007 "\u0020"]), ("\u000A\u0308\u0020", &["\u000A", "\u0308", "\u0020"]), ("\u000A\u000D",
2008 &["\u000A", "\u000D"]), ("\u000A\u0308\u000D", &["\u000A", "\u0308", "\u000D"]),
2009 ("\u000A\u000A", &["\u000A", "\u000A"]), ("\u000A\u0308\u000A", &["\u000A", "\u0308",
2010 "\u000A"]), ("\u000A\u0001", &["\u000A", "\u0001"]), ("\u000A\u0308\u0001",
2011 &["\u000A", "\u0308", "\u0001"]), ("\u000A\u0300", &["\u000A", "\u0300"]),
2012 ("\u000A\u0308\u0300", &["\u000A", "\u0308\u0300"]), ("\u000A\u0903", &["\u000A",
2013 "\u0903"]), ("\u000A\u1100", &["\u000A", "\u1100"]), ("\u000A\u0308\u1100",
2014 &["\u000A", "\u0308", "\u1100"]), ("\u000A\u1160", &["\u000A", "\u1160"]),
2015 ("\u000A\u0308\u1160", &["\u000A", "\u0308", "\u1160"]), ("\u000A\u11A8", &["\u000A",
2016 "\u11A8"]), ("\u000A\u0308\u11A8", &["\u000A", "\u0308", "\u11A8"]), ("\u000A\uAC00",
2017 &["\u000A", "\uAC00"]), ("\u000A\u0308\uAC00", &["\u000A", "\u0308", "\uAC00"]),
2018 ("\u000A\uAC01", &["\u000A", "\uAC01"]), ("\u000A\u0308\uAC01", &["\u000A", "\u0308",
2019 "\uAC01"]), ("\u000A\U0001F1E6", &["\u000A", "\U0001F1E6"]),
2020 ("\u000A\u0308\U0001F1E6", &["\u000A", "\u0308", "\U0001F1E6"]), ("\u000A\u0378",
2021 &["\u000A", "\u0378"]), ("\u000A\u0308\u0378", &["\u000A", "\u0308", "\u0378"]),
2022 ("\u0001\u0020", &["\u0001", "\u0020"]), ("\u0001\u0308\u0020", &["\u0001", "\u0308",
2023 "\u0020"]), ("\u0001\u000D", &["\u0001", "\u000D"]), ("\u0001\u0308\u000D",
2024 &["\u0001", "\u0308", "\u000D"]), ("\u0001\u000A", &["\u0001", "\u000A"]),
2025 ("\u0001\u0308\u000A", &["\u0001", "\u0308", "\u000A"]), ("\u0001\u0001", &["\u0001",
2026 "\u0001"]), ("\u0001\u0308\u0001", &["\u0001", "\u0308", "\u0001"]), ("\u0001\u0300",
2027 &["\u0001", "\u0300"]), ("\u0001\u0308\u0300", &["\u0001", "\u0308\u0300"]),
2028 ("\u0001\u0903", &["\u0001", "\u0903"]), ("\u0001\u1100", &["\u0001", "\u1100"]),
2029 ("\u0001\u0308\u1100", &["\u0001", "\u0308", "\u1100"]), ("\u0001\u1160", &["\u0001",
2030 "\u1160"]), ("\u0001\u0308\u1160", &["\u0001", "\u0308", "\u1160"]), ("\u0001\u11A8",
2031 &["\u0001", "\u11A8"]), ("\u0001\u0308\u11A8", &["\u0001", "\u0308", "\u11A8"]),
2032 ("\u0001\uAC00", &["\u0001", "\uAC00"]), ("\u0001\u0308\uAC00", &["\u0001", "\u0308",
2033 "\uAC00"]), ("\u0001\uAC01", &["\u0001", "\uAC01"]), ("\u0001\u0308\uAC01",
2034 &["\u0001", "\u0308", "\uAC01"]), ("\u0001\U0001F1E6", &["\u0001", "\U0001F1E6"]),
2035 ("\u0001\u0308\U0001F1E6", &["\u0001", "\u0308", "\U0001F1E6"]), ("\u0001\u0378",
2036 &["\u0001", "\u0378"]), ("\u0001\u0308\u0378", &["\u0001", "\u0308", "\u0378"]),
2037 ("\u0300\u0020", &["\u0300", "\u0020"]), ("\u0300\u0308\u0020", &["\u0300\u0308",
2038 "\u0020"]), ("\u0300\u000D", &["\u0300", "\u000D"]), ("\u0300\u0308\u000D",
2039 &["\u0300\u0308", "\u000D"]), ("\u0300\u000A", &["\u0300", "\u000A"]),
2040 ("\u0300\u0308\u000A", &["\u0300\u0308", "\u000A"]), ("\u0300\u0001", &["\u0300",
2041 "\u0001"]), ("\u0300\u0308\u0001", &["\u0300\u0308", "\u0001"]), ("\u0300\u0300",
2042 &["\u0300\u0300"]), ("\u0300\u0308\u0300", &["\u0300\u0308\u0300"]), ("\u0300\u1100",
2043 &["\u0300", "\u1100"]), ("\u0300\u0308\u1100", &["\u0300\u0308", "\u1100"]),
2044 ("\u0300\u1160", &["\u0300", "\u1160"]), ("\u0300\u0308\u1160", &["\u0300\u0308",
2045 "\u1160"]), ("\u0300\u11A8", &["\u0300", "\u11A8"]), ("\u0300\u0308\u11A8",
2046 &["\u0300\u0308", "\u11A8"]), ("\u0300\uAC00", &["\u0300", "\uAC00"]),
2047 ("\u0300\u0308\uAC00", &["\u0300\u0308", "\uAC00"]), ("\u0300\uAC01", &["\u0300",
2048 "\uAC01"]), ("\u0300\u0308\uAC01", &["\u0300\u0308", "\uAC01"]), ("\u0300\U0001F1E6",
2049 &["\u0300", "\U0001F1E6"]), ("\u0300\u0308\U0001F1E6", &["\u0300\u0308",
2050 "\U0001F1E6"]), ("\u0300\u0378", &["\u0300", "\u0378"]), ("\u0300\u0308\u0378",
2051 &["\u0300\u0308", "\u0378"]), ("\u0903\u0020", &["\u0903", "\u0020"]),
2052 ("\u0903\u0308\u0020", &["\u0903\u0308", "\u0020"]), ("\u0903\u000D", &["\u0903",
2053 "\u000D"]), ("\u0903\u0308\u000D", &["\u0903\u0308", "\u000D"]), ("\u0903\u000A",
2054 &["\u0903", "\u000A"]), ("\u0903\u0308\u000A", &["\u0903\u0308", "\u000A"]),
2055 ("\u0903\u0001", &["\u0903", "\u0001"]), ("\u0903\u0308\u0001", &["\u0903\u0308",
2056 "\u0001"]), ("\u0903\u0300", &["\u0903\u0300"]), ("\u0903\u0308\u0300",
2057 &["\u0903\u0308\u0300"]), ("\u0903\u1100", &["\u0903", "\u1100"]),
2058 ("\u0903\u0308\u1100", &["\u0903\u0308", "\u1100"]), ("\u0903\u1160", &["\u0903",
2059 "\u1160"]), ("\u0903\u0308\u1160", &["\u0903\u0308", "\u1160"]), ("\u0903\u11A8",
2060 &["\u0903", "\u11A8"]), ("\u0903\u0308\u11A8", &["\u0903\u0308", "\u11A8"]),
2061 ("\u0903\uAC00", &["\u0903", "\uAC00"]), ("\u0903\u0308\uAC00", &["\u0903\u0308",
2062 "\uAC00"]), ("\u0903\uAC01", &["\u0903", "\uAC01"]), ("\u0903\u0308\uAC01",
2063 &["\u0903\u0308", "\uAC01"]), ("\u0903\U0001F1E6", &["\u0903", "\U0001F1E6"]),
2064 ("\u0903\u0308\U0001F1E6", &["\u0903\u0308", "\U0001F1E6"]), ("\u0903\u0378",
2065 &["\u0903", "\u0378"]), ("\u0903\u0308\u0378", &["\u0903\u0308", "\u0378"]),
2066 ("\u1100\u0020", &["\u1100", "\u0020"]), ("\u1100\u0308\u0020", &["\u1100\u0308",
2067 "\u0020"]), ("\u1100\u000D", &["\u1100", "\u000D"]), ("\u1100\u0308\u000D",
2068 &["\u1100\u0308", "\u000D"]), ("\u1100\u000A", &["\u1100", "\u000A"]),
2069 ("\u1100\u0308\u000A", &["\u1100\u0308", "\u000A"]), ("\u1100\u0001", &["\u1100",
2070 "\u0001"]), ("\u1100\u0308\u0001", &["\u1100\u0308", "\u0001"]), ("\u1100\u0300",
2071 &["\u1100\u0300"]), ("\u1100\u0308\u0300", &["\u1100\u0308\u0300"]), ("\u1100\u1100",
2072 &["\u1100\u1100"]), ("\u1100\u0308\u1100", &["\u1100\u0308", "\u1100"]),
2073 ("\u1100\u1160", &["\u1100\u1160"]), ("\u1100\u0308\u1160", &["\u1100\u0308",
2074 "\u1160"]), ("\u1100\u11A8", &["\u1100", "\u11A8"]), ("\u1100\u0308\u11A8",
2075 &["\u1100\u0308", "\u11A8"]), ("\u1100\uAC00", &["\u1100\uAC00"]),
2076 ("\u1100\u0308\uAC00", &["\u1100\u0308", "\uAC00"]), ("\u1100\uAC01",
2077 &["\u1100\uAC01"]), ("\u1100\u0308\uAC01", &["\u1100\u0308", "\uAC01"]),
2078 ("\u1100\U0001F1E6", &["\u1100", "\U0001F1E6"]), ("\u1100\u0308\U0001F1E6",
2079 &["\u1100\u0308", "\U0001F1E6"]), ("\u1100\u0378", &["\u1100", "\u0378"]),
2080 ("\u1100\u0308\u0378", &["\u1100\u0308", "\u0378"]), ("\u1160\u0020", &["\u1160",
2081 "\u0020"]), ("\u1160\u0308\u0020", &["\u1160\u0308", "\u0020"]), ("\u1160\u000D",
2082 &["\u1160", "\u000D"]), ("\u1160\u0308\u000D", &["\u1160\u0308", "\u000D"]),
2083 ("\u1160\u000A", &["\u1160", "\u000A"]), ("\u1160\u0308\u000A", &["\u1160\u0308",
2084 "\u000A"]), ("\u1160\u0001", &["\u1160", "\u0001"]), ("\u1160\u0308\u0001",
2085 &["\u1160\u0308", "\u0001"]), ("\u1160\u0300", &["\u1160\u0300"]),
2086 ("\u1160\u0308\u0300", &["\u1160\u0308\u0300"]), ("\u1160\u1100", &["\u1160",
2087 "\u1100"]), ("\u1160\u0308\u1100", &["\u1160\u0308", "\u1100"]), ("\u1160\u1160",
2088 &["\u1160\u1160"]), ("\u1160\u0308\u1160", &["\u1160\u0308", "\u1160"]),
2089 ("\u1160\u11A8", &["\u1160\u11A8"]), ("\u1160\u0308\u11A8", &["\u1160\u0308",
2090 "\u11A8"]), ("\u1160\uAC00", &["\u1160", "\uAC00"]), ("\u1160\u0308\uAC00",
2091 &["\u1160\u0308", "\uAC00"]), ("\u1160\uAC01", &["\u1160", "\uAC01"]),
2092 ("\u1160\u0308\uAC01", &["\u1160\u0308", "\uAC01"]), ("\u1160\U0001F1E6", &["\u1160",
2093 "\U0001F1E6"]), ("\u1160\u0308\U0001F1E6", &["\u1160\u0308", "\U0001F1E6"]),
2094 ("\u1160\u0378", &["\u1160", "\u0378"]), ("\u1160\u0308\u0378", &["\u1160\u0308",
2095 "\u0378"]), ("\u11A8\u0020", &["\u11A8", "\u0020"]), ("\u11A8\u0308\u0020",
2096 &["\u11A8\u0308", "\u0020"]), ("\u11A8\u000D", &["\u11A8", "\u000D"]),
2097 ("\u11A8\u0308\u000D", &["\u11A8\u0308", "\u000D"]), ("\u11A8\u000A", &["\u11A8",
2098 "\u000A"]), ("\u11A8\u0308\u000A", &["\u11A8\u0308", "\u000A"]), ("\u11A8\u0001",
2099 &["\u11A8", "\u0001"]), ("\u11A8\u0308\u0001", &["\u11A8\u0308", "\u0001"]),
2100 ("\u11A8\u0300", &["\u11A8\u0300"]), ("\u11A8\u0308\u0300", &["\u11A8\u0308\u0300"]),
2101 ("\u11A8\u1100", &["\u11A8", "\u1100"]), ("\u11A8\u0308\u1100", &["\u11A8\u0308",
2102 "\u1100"]), ("\u11A8\u1160", &["\u11A8", "\u1160"]), ("\u11A8\u0308\u1160",
2103 &["\u11A8\u0308", "\u1160"]), ("\u11A8\u11A8", &["\u11A8\u11A8"]),
2104 ("\u11A8\u0308\u11A8", &["\u11A8\u0308", "\u11A8"]), ("\u11A8\uAC00", &["\u11A8",
2105 "\uAC00"]), ("\u11A8\u0308\uAC00", &["\u11A8\u0308", "\uAC00"]), ("\u11A8\uAC01",
2106 &["\u11A8", "\uAC01"]), ("\u11A8\u0308\uAC01", &["\u11A8\u0308", "\uAC01"]),
2107 ("\u11A8\U0001F1E6", &["\u11A8", "\U0001F1E6"]), ("\u11A8\u0308\U0001F1E6",
2108 &["\u11A8\u0308", "\U0001F1E6"]), ("\u11A8\u0378", &["\u11A8", "\u0378"]),
2109 ("\u11A8\u0308\u0378", &["\u11A8\u0308", "\u0378"]), ("\uAC00\u0020", &["\uAC00",
2110 "\u0020"]), ("\uAC00\u0308\u0020", &["\uAC00\u0308", "\u0020"]), ("\uAC00\u000D",
2111 &["\uAC00", "\u000D"]), ("\uAC00\u0308\u000D", &["\uAC00\u0308", "\u000D"]),
2112 ("\uAC00\u000A", &["\uAC00", "\u000A"]), ("\uAC00\u0308\u000A", &["\uAC00\u0308",
2113 "\u000A"]), ("\uAC00\u0001", &["\uAC00", "\u0001"]), ("\uAC00\u0308\u0001",
2114 &["\uAC00\u0308", "\u0001"]), ("\uAC00\u0300", &["\uAC00\u0300"]),
2115 ("\uAC00\u0308\u0300", &["\uAC00\u0308\u0300"]), ("\uAC00\u1100", &["\uAC00",
2116 "\u1100"]), ("\uAC00\u0308\u1100", &["\uAC00\u0308", "\u1100"]), ("\uAC00\u1160",
2117 &["\uAC00\u1160"]), ("\uAC00\u0308\u1160", &["\uAC00\u0308", "\u1160"]),
2118 ("\uAC00\u11A8", &["\uAC00\u11A8"]), ("\uAC00\u0308\u11A8", &["\uAC00\u0308",
2119 "\u11A8"]), ("\uAC00\uAC00", &["\uAC00", "\uAC00"]), ("\uAC00\u0308\uAC00",
2120 &["\uAC00\u0308", "\uAC00"]), ("\uAC00\uAC01", &["\uAC00", "\uAC01"]),
2121 ("\uAC00\u0308\uAC01", &["\uAC00\u0308", "\uAC01"]), ("\uAC00\U0001F1E6", &["\uAC00",
2122 "\U0001F1E6"]), ("\uAC00\u0308\U0001F1E6", &["\uAC00\u0308", "\U0001F1E6"]),
2123 ("\uAC00\u0378", &["\uAC00", "\u0378"]), ("\uAC00\u0308\u0378", &["\uAC00\u0308",
2124 "\u0378"]), ("\uAC01\u0020", &["\uAC01", "\u0020"]), ("\uAC01\u0308\u0020",
2125 &["\uAC01\u0308", "\u0020"]), ("\uAC01\u000D", &["\uAC01", "\u000D"]),
2126 ("\uAC01\u0308\u000D", &["\uAC01\u0308", "\u000D"]), ("\uAC01\u000A", &["\uAC01",
2127 "\u000A"]), ("\uAC01\u0308\u000A", &["\uAC01\u0308", "\u000A"]), ("\uAC01\u0001",
2128 &["\uAC01", "\u0001"]), ("\uAC01\u0308\u0001", &["\uAC01\u0308", "\u0001"]),
2129 ("\uAC01\u0300", &["\uAC01\u0300"]), ("\uAC01\u0308\u0300", &["\uAC01\u0308\u0300"]),
2130 ("\uAC01\u1100", &["\uAC01", "\u1100"]), ("\uAC01\u0308\u1100", &["\uAC01\u0308",
2131 "\u1100"]), ("\uAC01\u1160", &["\uAC01", "\u1160"]), ("\uAC01\u0308\u1160",
2132 &["\uAC01\u0308", "\u1160"]), ("\uAC01\u11A8", &["\uAC01\u11A8"]),
2133 ("\uAC01\u0308\u11A8", &["\uAC01\u0308", "\u11A8"]), ("\uAC01\uAC00", &["\uAC01",
2134 "\uAC00"]), ("\uAC01\u0308\uAC00", &["\uAC01\u0308", "\uAC00"]), ("\uAC01\uAC01",
2135 &["\uAC01", "\uAC01"]), ("\uAC01\u0308\uAC01", &["\uAC01\u0308", "\uAC01"]),
2136 ("\uAC01\U0001F1E6", &["\uAC01", "\U0001F1E6"]), ("\uAC01\u0308\U0001F1E6",
2137 &["\uAC01\u0308", "\U0001F1E6"]), ("\uAC01\u0378", &["\uAC01", "\u0378"]),
2138 ("\uAC01\u0308\u0378", &["\uAC01\u0308", "\u0378"]), ("\U0001F1E6\u0020",
2139 &["\U0001F1E6", "\u0020"]), ("\U0001F1E6\u0308\u0020", &["\U0001F1E6\u0308",
2140 "\u0020"]), ("\U0001F1E6\u000D", &["\U0001F1E6", "\u000D"]),
2141 ("\U0001F1E6\u0308\u000D", &["\U0001F1E6\u0308", "\u000D"]), ("\U0001F1E6\u000A",
2142 &["\U0001F1E6", "\u000A"]), ("\U0001F1E6\u0308\u000A", &["\U0001F1E6\u0308",
2143 "\u000A"]), ("\U0001F1E6\u0001", &["\U0001F1E6", "\u0001"]),
2144 ("\U0001F1E6\u0308\u0001", &["\U0001F1E6\u0308", "\u0001"]), ("\U0001F1E6\u0300",
2145 &["\U0001F1E6\u0300"]), ("\U0001F1E6\u0308\u0300", &["\U0001F1E6\u0308\u0300"]),
2146 ("\U0001F1E6\u1100", &["\U0001F1E6", "\u1100"]), ("\U0001F1E6\u0308\u1100",
2147 &["\U0001F1E6\u0308", "\u1100"]), ("\U0001F1E6\u1160", &["\U0001F1E6", "\u1160"]),
2148 ("\U0001F1E6\u0308\u1160", &["\U0001F1E6\u0308", "\u1160"]), ("\U0001F1E6\u11A8",
2149 &["\U0001F1E6", "\u11A8"]), ("\U0001F1E6\u0308\u11A8", &["\U0001F1E6\u0308",
2150 "\u11A8"]), ("\U0001F1E6\uAC00", &["\U0001F1E6", "\uAC00"]),
2151 ("\U0001F1E6\u0308\uAC00", &["\U0001F1E6\u0308", "\uAC00"]), ("\U0001F1E6\uAC01",
2152 &["\U0001F1E6", "\uAC01"]), ("\U0001F1E6\u0308\uAC01", &["\U0001F1E6\u0308",
2153 "\uAC01"]), ("\U0001F1E6\U0001F1E6", &["\U0001F1E6\U0001F1E6"]),
2154 ("\U0001F1E6\u0308\U0001F1E6", &["\U0001F1E6\u0308", "\U0001F1E6"]),
2155 ("\U0001F1E6\u0378", &["\U0001F1E6", "\u0378"]), ("\U0001F1E6\u0308\u0378",
2156 &["\U0001F1E6\u0308", "\u0378"]), ("\u0378\u0020", &["\u0378", "\u0020"]),
2157 ("\u0378\u0308\u0020", &["\u0378\u0308", "\u0020"]), ("\u0378\u000D", &["\u0378",
2158 "\u000D"]), ("\u0378\u0308\u000D", &["\u0378\u0308", "\u000D"]), ("\u0378\u000A",
2159 &["\u0378", "\u000A"]), ("\u0378\u0308\u000A", &["\u0378\u0308", "\u000A"]),
2160 ("\u0378\u0001", &["\u0378", "\u0001"]), ("\u0378\u0308\u0001", &["\u0378\u0308",
2161 "\u0001"]), ("\u0378\u0300", &["\u0378\u0300"]), ("\u0378\u0308\u0300",
2162 &["\u0378\u0308\u0300"]), ("\u0378\u1100", &["\u0378", "\u1100"]),
2163 ("\u0378\u0308\u1100", &["\u0378\u0308", "\u1100"]), ("\u0378\u1160", &["\u0378",
2164 "\u1160"]), ("\u0378\u0308\u1160", &["\u0378\u0308", "\u1160"]), ("\u0378\u11A8",
2165 &["\u0378", "\u11A8"]), ("\u0378\u0308\u11A8", &["\u0378\u0308", "\u11A8"]),
2166 ("\u0378\uAC00", &["\u0378", "\uAC00"]), ("\u0378\u0308\uAC00", &["\u0378\u0308",
2167 "\uAC00"]), ("\u0378\uAC01", &["\u0378", "\uAC01"]), ("\u0378\u0308\uAC01",
2168 &["\u0378\u0308", "\uAC01"]), ("\u0378\U0001F1E6", &["\u0378", "\U0001F1E6"]),
2169 ("\u0378\u0308\U0001F1E6", &["\u0378\u0308", "\U0001F1E6"]), ("\u0378\u0378",
2170 &["\u0378", "\u0378"]), ("\u0378\u0308\u0378", &["\u0378\u0308", "\u0378"]),
2171 ("\u0061\U0001F1E6\u0062", &["\u0061", "\U0001F1E6", "\u0062"]),
2172 ("\U0001F1F7\U0001F1FA", &["\U0001F1F7\U0001F1FA"]),
2173 ("\U0001F1F7\U0001F1FA\U0001F1F8", &["\U0001F1F7\U0001F1FA\U0001F1F8"]),
2174 ("\U0001F1F7\U0001F1FA\U0001F1F8\U0001F1EA",
2175 &["\U0001F1F7\U0001F1FA\U0001F1F8\U0001F1EA"]),
2176 ("\U0001F1F7\U0001F1FA\u200B\U0001F1F8\U0001F1EA", &["\U0001F1F7\U0001F1FA", "\u200B",
2177 "\U0001F1F8\U0001F1EA"]), ("\U0001F1E6\U0001F1E7\U0001F1E8",
2178 &["\U0001F1E6\U0001F1E7\U0001F1E8"]), ("\U0001F1E6\u200D\U0001F1E7\U0001F1E8",
2179 &["\U0001F1E6\u200D", "\U0001F1E7\U0001F1E8"]),
2180 ("\U0001F1E6\U0001F1E7\u200D\U0001F1E8", &["\U0001F1E6\U0001F1E7\u200D",
2181 "\U0001F1E8"]), ("\u0020\u200D\u0646", &["\u0020\u200D", "\u0646"]),
2182 ("\u0646\u200D\u0020", &["\u0646\u200D", "\u0020"]),
2186 ("\u0020\u0903", &["\u0020\u0903"], &["\u0020", "\u0903"]), ("\u0020\u0308\u0903",
2187 &["\u0020\u0308\u0903"], &["\u0020\u0308", "\u0903"]), ("\u000D\u0308\u0903",
2188 &["\u000D", "\u0308\u0903"], &["\u000D", "\u0308", "\u0903"]), ("\u000A\u0308\u0903",
2189 &["\u000A", "\u0308\u0903"], &["\u000A", "\u0308", "\u0903"]), ("\u0001\u0308\u0903",
2190 &["\u0001", "\u0308\u0903"], &["\u0001", "\u0308", "\u0903"]), ("\u0300\u0903",
2191 &["\u0300\u0903"], &["\u0300", "\u0903"]), ("\u0300\u0308\u0903",
2192 &["\u0300\u0308\u0903"], &["\u0300\u0308", "\u0903"]), ("\u0903\u0903",
2193 &["\u0903\u0903"], &["\u0903", "\u0903"]), ("\u0903\u0308\u0903",
2194 &["\u0903\u0308\u0903"], &["\u0903\u0308", "\u0903"]), ("\u1100\u0903",
2195 &["\u1100\u0903"], &["\u1100", "\u0903"]), ("\u1100\u0308\u0903",
2196 &["\u1100\u0308\u0903"], &["\u1100\u0308", "\u0903"]), ("\u1160\u0903",
2197 &["\u1160\u0903"], &["\u1160", "\u0903"]), ("\u1160\u0308\u0903",
2198 &["\u1160\u0308\u0903"], &["\u1160\u0308", "\u0903"]), ("\u11A8\u0903",
2199 &["\u11A8\u0903"], &["\u11A8", "\u0903"]), ("\u11A8\u0308\u0903",
2200 &["\u11A8\u0308\u0903"], &["\u11A8\u0308", "\u0903"]), ("\uAC00\u0903",
2201 &["\uAC00\u0903"], &["\uAC00", "\u0903"]), ("\uAC00\u0308\u0903",
2202 &["\uAC00\u0308\u0903"], &["\uAC00\u0308", "\u0903"]), ("\uAC01\u0903",
2203 &["\uAC01\u0903"], &["\uAC01", "\u0903"]), ("\uAC01\u0308\u0903",
2204 &["\uAC01\u0308\u0903"], &["\uAC01\u0308", "\u0903"]), ("\U0001F1E6\u0903",
2205 &["\U0001F1E6\u0903"], &["\U0001F1E6", "\u0903"]), ("\U0001F1E6\u0308\u0903",
2206 &["\U0001F1E6\u0308\u0903"], &["\U0001F1E6\u0308", "\u0903"]), ("\u0378\u0903",
2207 &["\u0378\u0903"], &["\u0378", "\u0903"]), ("\u0378\u0308\u0903",
2208 &["\u0378\u0308\u0903"], &["\u0378\u0308", "\u0903"]),
2211 for &(s, g) in test_same.iter() {
2212 // test forward iterator
2213 assert!(order::equals(s.graphemes(true), g.iter().map(|&x| x)));
2214 assert!(order::equals(s.graphemes(false), g.iter().map(|&x| x)));
2216 // test reverse iterator
2217 assert!(order::equals(s.graphemes(true).rev(), g.iter().rev().map(|&x| x)));
2218 assert!(order::equals(s.graphemes(false).rev(), g.iter().rev().map(|&x| x)));
2221 for &(s, gt, gf) in test_diff.iter() {
2222 // test forward iterator
2223 assert!(order::equals(s.graphemes(true), gt.iter().map(|&x| x)));
2224 assert!(order::equals(s.graphemes(false), gf.iter().map(|&x| x)));
2226 // test reverse iterator
2227 assert!(order::equals(s.graphemes(true).rev(), gt.iter().rev().map(|&x| x)));
2228 assert!(order::equals(s.graphemes(false).rev(), gf.iter().rev().map(|&x| x)));
2231 // test the indices iterators
2232 let s = "a̐éö̲\r\n";
2233 let gr_inds = s.grapheme_indices(true).collect::<Vec<(uint, &str)>>();
2234 assert_eq!(gr_inds.as_slice(), &[(0u, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")]);
2235 let gr_inds = s.grapheme_indices(true).rev().collect::<Vec<(uint, &str)>>();
2236 assert_eq!(gr_inds.as_slice(), &[(11, "\r\n"), (6, "ö̲"), (3, "é"), (0u, "a̐")]);
2237 let mut gr_inds = s.grapheme_indices(true);
2238 let e1 = gr_inds.size_hint();
2239 assert_eq!(e1, (1, Some(13)));
2240 let c = gr_inds.count();
2242 let e2 = gr_inds.size_hint();
2243 assert_eq!(e2, (0, Some(0)));
2245 // make sure the reverse iterator does the right thing with "\n" at beginning of string
2247 let gr = s.graphemes(true).rev().collect::<Vec<&str>>();
2248 assert_eq!(gr.as_slice(), &["\r", "\r\n", "\n"]);
2252 fn test_split_strator() {
2253 fn t(s: &str, sep: &str, u: &[&str]) {
2254 let v: Vec<&str> = s.split_str(sep).collect();
2255 assert_eq!(v.as_slice(), u.as_slice());
2257 t("--1233345--", "12345", ["--1233345--"]);
2258 t("abc::hello::there", "::", ["abc", "hello", "there"]);
2259 t("::hello::there", "::", ["", "hello", "there"]);
2260 t("hello::there::", "::", ["hello", "there", ""]);
2261 t("::hello::there::", "::", ["", "hello", "there", ""]);
2262 t("ประเทศไทย中华Việt Nam", "中华", ["ประเทศไทย", "Việt Nam"]);
2263 t("zzXXXzzYYYzz", "zz", ["", "XXX", "YYY", ""]);
2264 t("zzXXXzYYYz", "XXX", ["zz", "zYYYz"]);
2265 t(".XXX.YYY.", ".", ["", "XXX", "YYY", ""]);
2267 t("zz", "zz", ["",""]);
2268 t("ok", "z", ["ok"]);
2269 t("zzz", "zz", ["","z"]);
2270 t("zzzzz", "zz", ["","","z"]);
2274 fn test_str_default() {
2275 use std::default::Default;
2276 fn t<S: Default + Str>() {
2277 let s: S = Default::default();
2278 assert_eq!(s.as_slice(), "");
2286 fn test_str_container() {
2287 fn sum_len<S: Collection>(v: &[S]) -> uint {
2288 v.iter().map(|x| x.len()).sum()
2291 let s = String::from_str("01234");
2292 assert_eq!(5, sum_len(["012", "", "34"]));
2293 assert_eq!(5, sum_len([String::from_str("01"), String::from_str("2"),
2294 String::from_str("34"), String::from_str("")]));
2295 assert_eq!(5, sum_len([s.as_slice()]));
2299 fn test_str_from_utf8() {
2301 assert_eq!(from_utf8(xs), Some("hello"));
2303 let xs = "ศไทย中华Việt Nam".as_bytes();
2304 assert_eq!(from_utf8(xs), Some("ศไทย中华Việt Nam"));
2306 let xs = b"hello\xFF";
2307 assert_eq!(from_utf8(xs), None);
2311 fn test_maybe_owned_traits() {
2312 let s = Slice("abcde");
2313 assert_eq!(s.len(), 5);
2314 assert_eq!(s.as_slice(), "abcde");
2315 assert_eq!(String::from_str(s.as_slice()).as_slice(), "abcde");
2316 assert_eq!(format!("{}", s).as_slice(), "abcde");
2317 assert!(s.lt(&Owned(String::from_str("bcdef"))));
2318 assert_eq!(Slice(""), Default::default());
2320 let o = Owned(String::from_str("abcde"));
2321 assert_eq!(o.len(), 5);
2322 assert_eq!(o.as_slice(), "abcde");
2323 assert_eq!(String::from_str(o.as_slice()).as_slice(), "abcde");
2324 assert_eq!(format!("{}", o).as_slice(), "abcde");
2325 assert!(o.lt(&Slice("bcdef")));
2326 assert_eq!(Owned(String::from_str("")), Default::default());
2328 assert!(s.cmp(&o) == Equal);
2329 assert!(s.equiv(&o));
2331 assert!(o.cmp(&s) == Equal);
2332 assert!(o.equiv(&s));
2336 fn test_maybe_owned_methods() {
2337 let s = Slice("abcde");
2338 assert!(s.is_slice());
2339 assert!(!s.is_owned());
2341 let o = Owned(String::from_str("abcde"));
2342 assert!(!o.is_slice());
2343 assert!(o.is_owned());
2347 fn test_maybe_owned_clone() {
2348 assert_eq!(Owned(String::from_str("abcde")), Slice("abcde").clone());
2349 assert_eq!(Owned(String::from_str("abcde")), Owned(String::from_str("abcde")).clone());
2350 assert_eq!(Slice("abcde"), Slice("abcde").clone());
2351 assert_eq!(Slice("abcde"), Owned(String::from_str("abcde")).clone());
2355 fn test_maybe_owned_into_string() {
2356 assert_eq!(Slice("abcde").into_string(), String::from_str("abcde"));
2357 assert_eq!(Owned(String::from_str("abcde")).into_string(),
2358 String::from_str("abcde"));
2362 fn test_into_maybe_owned() {
2363 assert_eq!("abcde".into_maybe_owned(), Slice("abcde"));
2364 assert_eq!((String::from_str("abcde")).into_maybe_owned(), Slice("abcde"));
2365 assert_eq!("abcde".into_maybe_owned(), Owned(String::from_str("abcde")));
2366 assert_eq!((String::from_str("abcde")).into_maybe_owned(),
2367 Owned(String::from_str("abcde")));
2374 use test::black_box;
2376 use std::option::{None, Some};
2377 use std::iter::{Iterator, DoubleEndedIterator};
2378 use std::collections::Collection;
2381 fn char_iterator(b: &mut Bencher) {
2382 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2384 b.iter(|| s.chars().count());
2388 fn char_iterator_for(b: &mut Bencher) {
2389 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2392 for ch in s.chars() { black_box(ch) }
2397 fn char_iterator_ascii(b: &mut Bencher) {
2398 let s = "Mary had a little lamb, Little lamb
2399 Mary had a little lamb, Little lamb
2400 Mary had a little lamb, Little lamb
2401 Mary had a little lamb, Little lamb
2402 Mary had a little lamb, Little lamb
2403 Mary had a little lamb, Little lamb";
2405 b.iter(|| s.chars().count());
2409 fn char_iterator_rev(b: &mut Bencher) {
2410 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2412 b.iter(|| s.chars().rev().count());
2416 fn char_iterator_rev_for(b: &mut Bencher) {
2417 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2420 for ch in s.chars().rev() { black_box(ch) }
2425 fn char_indicesator(b: &mut Bencher) {
2426 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2427 let len = s.char_len();
2429 b.iter(|| assert_eq!(s.char_indices().count(), len));
2433 fn char_indicesator_rev(b: &mut Bencher) {
2434 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2435 let len = s.char_len();
2437 b.iter(|| assert_eq!(s.char_indices().rev().count(), len));
2441 fn split_unicode_ascii(b: &mut Bencher) {
2442 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2444 b.iter(|| assert_eq!(s.split('V').count(), 3));
2448 fn split_unicode_not_ascii(b: &mut Bencher) {
2449 struct NotAscii(char);
2450 impl CharEq for NotAscii {
2451 fn matches(&mut self, c: char) -> bool {
2452 let NotAscii(cc) = *self;
2455 fn only_ascii(&self) -> bool { false }
2457 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2459 b.iter(|| assert_eq!(s.split(NotAscii('V')).count(), 3));
2464 fn split_ascii(b: &mut Bencher) {
2465 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2466 let len = s.split(' ').count();
2468 b.iter(|| assert_eq!(s.split(' ').count(), len));
2472 fn split_not_ascii(b: &mut Bencher) {
2473 struct NotAscii(char);
2474 impl CharEq for NotAscii {
2476 fn matches(&mut self, c: char) -> bool {
2477 let NotAscii(cc) = *self;
2480 fn only_ascii(&self) -> bool { false }
2482 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2483 let len = s.split(' ').count();
2485 b.iter(|| assert_eq!(s.split(NotAscii(' ')).count(), len));
2489 fn split_extern_fn(b: &mut Bencher) {
2490 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2491 let len = s.split(' ').count();
2492 fn pred(c: char) -> bool { c == ' ' }
2494 b.iter(|| assert_eq!(s.split(pred).count(), len));
2498 fn split_closure(b: &mut Bencher) {
2499 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2500 let len = s.split(' ').count();
2502 b.iter(|| assert_eq!(s.split(|c: char| c == ' ').count(), len));
2506 fn split_slice(b: &mut Bencher) {
2507 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2508 let len = s.split(' ').count();
2510 b.iter(|| assert_eq!(s.split(&[' ']).count(), len));
2514 fn is_utf8_100_ascii(b: &mut Bencher) {
2516 let s = b"Hello there, the quick brown fox jumped over the lazy dog! \
2517 Lorem ipsum dolor sit amet, consectetur. ";
2519 assert_eq!(100, s.len());
2526 fn is_utf8_100_multibyte(b: &mut Bencher) {
2527 let s = "𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰".as_bytes();
2528 assert_eq!(100, s.len());
2535 fn bench_connect(b: &mut Bencher) {
2536 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2538 let v = [s, s, s, s, s, s, s, s, s, s];
2540 assert_eq!(v.connect(sep).len(), s.len() * 10 + sep.len() * 9);
2545 fn bench_contains_short_short(b: &mut Bencher) {
2546 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2550 assert!(haystack.contains(needle));
2555 fn bench_contains_short_long(b: &mut Bencher) {
2557 Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \
2558 ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \
2559 eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \
2560 sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \
2561 tempus vel, gravida nec quam.
2563 In est dui, tincidunt sed tempus interdum, adipiscing laoreet ante. Etiam tempor, tellus quis \
2564 sagittis interdum, nulla purus mattis sem, quis auctor erat odio ac tellus. In nec nunc sit amet \
2565 diam volutpat molestie at sed ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan \
2566 lorem ac dignissim placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat. In vel \
2567 eleifend felis. Sed suscipit nulla lorem, sed mollis est sollicitudin et. Nam fermentum egestas \
2568 interdum. Curabitur ut nisi justo.
2570 Sed sollicitudin ipsum tellus, ut condimentum leo eleifend nec. Cras ut velit ante. Phasellus nec \
2571 mollis odio. Mauris molestie erat in arcu mattis, at aliquet dolor vehicula. Quisque malesuada \
2572 lectus sit amet nisi pretium, a condimentum ipsum porta. Morbi at dapibus diam. Praesent egestas \
2573 est sed risus elementum, eu rutrum metus ultrices. Etiam fermentum consectetur magna, id rutrum \
2574 felis accumsan a. Aliquam ut pellentesque libero. Sed mi nulla, lobortis eu tortor id, suscipit \
2575 ultricies neque. Morbi iaculis sit amet risus at iaculis. Praesent eget ligula quis turpis \
2576 feugiat suscipit vel non arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. \
2577 Aliquam sit amet placerat lorem.
2579 Cras a lacus vel ante posuere elementum. Nunc est leo, bibendum ut facilisis vel, bibendum at \
2580 mauris. Nullam adipiscing diam vel odio ornare, luctus adipiscing mi luctus. Nulla facilisi. \
2581 Mauris adipiscing bibendum neque, quis adipiscing lectus tempus et. Sed feugiat erat et nisl \
2582 lobortis pharetra. Donec vitae erat enim. Nullam sit amet felis et quam lacinia tincidunt. Aliquam \
2583 suscipit dapibus urna. Sed volutpat urna in magna pulvinar volutpat. Phasellus nec tellus ac diam \
2586 Nam lectus enim, dapibus non nisi tempor, consectetur convallis massa. Maecenas eleifend dictum \
2587 feugiat. Etiam quis mauris vel risus luctus mattis a a nunc. Nullam orci quam, imperdiet id \
2588 vehicula in, porttitor ut nibh. Duis sagittis adipiscing nisl vitae congue. Donec mollis risus eu \
2589 leo suscipit, varius porttitor nulla porta. Pellentesque ut sem nec nisi euismod vehicula. Nulla \
2590 malesuada sollicitudin quam eu fermentum.";
2591 let needle = "english";
2594 assert!(!haystack.contains(needle));
2599 fn bench_contains_bad_naive(b: &mut Bencher) {
2600 let haystack = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
2601 let needle = "aaaaaaaab";
2604 assert!(!haystack.contains(needle));
2609 fn bench_contains_equal(b: &mut Bencher) {
2610 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2611 let needle = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2614 assert!(haystack.contains(needle));