1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
13 Unicode string manipulation (`str` type)
17 Rust's string type is one of the core primitive types of the language. While
18 represented by the name `str`, the name `str` is not actually a valid type in
19 Rust. Each string must also be decorated with a pointer. `String` is used
20 for an owned string, so there is only one commonly-used `str` type in Rust:
23 `&str` is the borrowed string type. This type of string can only be created
24 from other strings, unless it is a static string (see below). As the word
25 "borrowed" implies, this type of string is owned elsewhere, and this string
26 cannot be moved out of.
28 As an example, here's some code that uses a string.
32 let borrowed_string = "This string is borrowed with the 'static lifetime";
36 From the example above, you can see that Rust's string literals have the
37 `'static` lifetime. This is akin to C's concept of a static string.
39 String literals are allocated statically in the rodata of the
40 executable/library. The string then has the type `&'static str` meaning that
41 the string is valid for the `'static` lifetime, otherwise known as the
42 lifetime of the entire program. As can be inferred from the type, these static
43 strings are not mutable.
47 Many languages have immutable strings by default, and Rust has a particular
48 flavor on this idea. As with the rest of Rust types, strings are immutable by
49 default. If a string is declared as `mut`, however, it may be mutated. This
50 works the same way as the rest of Rust's type system in the sense that if
51 there's a mutable reference to a string, there may only be one mutable reference
52 to that string. With these guarantees, strings can easily transition between
53 being mutable/immutable with the same benefits of having mutable strings in
58 Rust's string type, `str`, is a sequence of unicode scalar values encoded as a
59 stream of UTF-8 bytes. All strings are guaranteed to be validly encoded UTF-8
60 sequences. Additionally, strings are not null-terminated and can contain null
63 The actual representation of strings have direct mappings to vectors: `&str`
64 is the same as `&[u8]`.
68 #![doc(primitive = "str")]
72 use core::default::Default;
75 use core::iter::AdditiveIterator;
84 pub use core::str::{from_utf8, CharEq, Chars, CharOffsets};
85 pub use core::str::{Bytes, CharSplits};
86 pub use core::str::{CharSplitsN, AnyLines, MatchIndices, StrSplits};
87 pub use core::str::{eq_slice, is_utf8, is_utf16, Utf16Items};
88 pub use core::str::{Utf16Item, ScalarValue, LoneSurrogate, utf16_items};
89 pub use core::str::{truncate_utf16_at_nul, utf8_char_width, CharRange};
90 pub use core::str::{Str, StrSlice};
91 pub use unicode::str::{UnicodeStrSlice, Words, Graphemes, GraphemeIndices};
94 Section: Creating a string
97 /// Deprecated. Replaced by `String::from_utf8`
98 #[deprecated = "Replaced by `String::from_utf8`"]
99 pub fn from_utf8_owned(vv: Vec<u8>) -> Result<String, Vec<u8>> {
100 String::from_utf8(vv)
103 /// Deprecated. Replaced by `String::from_byte`
104 #[deprecated = "Replaced by String::from_byte"]
105 pub fn from_byte(b: u8) -> String {
107 String::from_char(1, b as char)
110 /// Deprecated. Use `String::from_char` or `char::to_string()` instead
111 #[deprecated = "use String::from_char or char.to_string()"]
112 pub fn from_char(ch: char) -> String {
113 String::from_char(1, ch)
116 /// Deprecated. Replaced by `String::from_chars`
117 #[deprecated = "use String::from_chars instead"]
118 pub fn from_chars(chs: &[char]) -> String {
119 chs.iter().map(|c| *c).collect()
122 /// Methods for vectors of strings
123 pub trait StrVector {
124 /// Concatenate a vector of strings.
129 /// let first = "Restaurant at the End of the".to_string();
130 /// let second = " Universe".to_string();
131 /// let string_vec = vec![first, second];
132 /// assert_eq!(string_vec.concat(), "Restaurant at the End of the Universe".to_string());
134 fn concat(&self) -> String;
136 /// Concatenate a vector of strings, placing a given separator between each.
141 /// let first = "Roast".to_string();
142 /// let second = "Sirloin Steak".to_string();
143 /// let string_vec = vec![first, second];
144 /// assert_eq!(string_vec.connect(", "), "Roast, Sirloin Steak".to_string());
146 fn connect(&self, sep: &str) -> String;
149 impl<'a, S: Str> StrVector for &'a [S] {
150 fn concat(&self) -> String {
152 return String::new();
155 // `len` calculation may overflow but push_str will check boundaries
156 let len = self.iter().map(|s| s.as_slice().len()).sum();
158 let mut result = String::with_capacity(len);
160 for s in self.iter() {
161 result.push_str(s.as_slice())
167 fn connect(&self, sep: &str) -> String {
169 return String::new();
174 return self.concat();
177 // this is wrong without the guarantee that `self` is non-empty
178 // `len` calculation may overflow but push_str but will check boundaries
179 let len = sep.len() * (self.len() - 1)
180 + self.iter().map(|s| s.as_slice().len()).sum();
181 let mut result = String::with_capacity(len);
182 let mut first = true;
184 for s in self.iter() {
188 result.push_str(sep);
190 result.push_str(s.as_slice());
196 impl<'a, S: Str> StrVector for Vec<S> {
198 fn concat(&self) -> String {
199 self.as_slice().concat()
203 fn connect(&self, sep: &str) -> String {
204 self.as_slice().connect(sep)
212 // Helper functions used for Unicode normalization
213 fn canonical_sort(comb: &mut [(char, u8)]) {
214 let len = comb.len();
215 for i in range(0, len) {
216 let mut swapped = false;
217 for j in range(1, len-i) {
218 let class_a = *comb[j-1].ref1();
219 let class_b = *comb[j].ref1();
220 if class_a != 0 && class_b != 0 && class_a > class_b {
225 if !swapped { break; }
230 enum DecompositionType {
235 /// External iterator for a string's decomposition's characters.
236 /// Use with the `std::iter` module.
238 pub struct Decompositions<'a> {
239 kind: DecompositionType,
241 buffer: Vec<(char, u8)>,
245 impl<'a> Iterator<char> for Decompositions<'a> {
247 fn next(&mut self) -> Option<char> {
248 match self.buffer.as_slice().head() {
254 Some(&(c, _)) if self.sorted => {
258 _ => self.sorted = false
261 let decomposer = match self.kind {
262 Canonical => unicode::char::decompose_canonical,
263 Compatible => unicode::char::decompose_compatible
267 for ch in self.iter {
268 let buffer = &mut self.buffer;
269 let sorted = &mut self.sorted;
271 let class = unicode::char::canonical_combining_class(d);
272 if class == 0 && !*sorted {
273 canonical_sort(buffer.as_mut_slice());
276 buffer.push((d, class));
283 canonical_sort(self.buffer.as_mut_slice());
287 match self.buffer.shift() {
292 Some((c, _)) => Some(c),
297 fn size_hint(&self) -> (uint, Option<uint>) {
298 let (lower, _) = self.iter.size_hint();
303 /// Replace all occurrences of one string with another
307 /// * s - The string containing substrings to replace
308 /// * from - The string to replace
309 /// * to - The replacement string
313 /// The original string with all occurrences of `from` replaced with `to`
319 /// let string = "orange";
320 /// let new_string = str::replace(string, "or", "str");
321 /// assert_eq!(new_string.as_slice(), "strange");
323 pub fn replace(s: &str, from: &str, to: &str) -> String {
324 let mut result = String::new();
325 let mut last_end = 0;
326 for (start, end) in s.match_indices(from) {
327 result.push_str(unsafe{raw::slice_bytes(s, last_end, start)});
331 result.push_str(unsafe{raw::slice_bytes(s, last_end, s.len())});
339 /// Deprecated. Use `String::from_utf16`.
340 #[deprecated = "Replaced by String::from_utf16"]
341 pub fn from_utf16(v: &[u16]) -> Option<String> {
342 String::from_utf16(v)
345 /// Deprecated. Use `String::from_utf16_lossy`.
346 #[deprecated = "Replaced by String::from_utf16_lossy"]
347 pub fn from_utf16_lossy(v: &[u16]) -> String {
348 String::from_utf16_lossy(v)
351 // Return the initial codepoint accumulator for the first byte.
352 // The first byte is special, only want bottom 5 bits for width 2, 4 bits
353 // for width 3, and 3 bits for width 4
354 macro_rules! utf8_first_byte(
355 ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as u32)
358 // return the value of $ch updated with continuation byte $byte
359 macro_rules! utf8_acc_cont_byte(
360 ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as u32)
363 /// Deprecated. Use `String::from_utf8_lossy`.
364 #[deprecated = "Replaced by String::from_utf8_lossy"]
365 pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> MaybeOwned<'a> {
366 String::from_utf8_lossy(v)
373 /// A `MaybeOwned` is a string that can hold either a `String` or a `&str`.
374 /// This can be useful as an optimization when an allocation is sometimes
375 /// needed but not always.
376 pub enum MaybeOwned<'a> {
377 /// A borrowed string
383 /// `SendStr` is a specialization of `MaybeOwned` to be sendable
384 pub type SendStr = MaybeOwned<'static>;
386 impl<'a> MaybeOwned<'a> {
387 /// Returns `true` if this `MaybeOwned` wraps an owned string
392 /// let string = String::from_str("orange");
393 /// let maybe_owned_string = string.into_maybe_owned();
394 /// assert_eq!(true, maybe_owned_string.is_owned());
397 pub fn is_owned(&self) -> bool {
404 /// Returns `true` if this `MaybeOwned` wraps a borrowed string
409 /// let string = "orange";
410 /// let maybe_owned_string = string.as_slice().into_maybe_owned();
411 /// assert_eq!(true, maybe_owned_string.is_slice());
414 pub fn is_slice(&self) -> bool {
422 /// Trait for moving into a `MaybeOwned`
423 pub trait IntoMaybeOwned<'a> {
424 /// Moves self into a `MaybeOwned`
425 fn into_maybe_owned(self) -> MaybeOwned<'a>;
431 /// let owned_string = String::from_str("orange");
432 /// let maybe_owned_string = owned_string.into_maybe_owned();
433 /// assert_eq!(true, maybe_owned_string.is_owned());
435 impl<'a> IntoMaybeOwned<'a> for String {
437 fn into_maybe_owned(self) -> MaybeOwned<'a> {
445 /// let string = "orange";
446 /// let maybe_owned_str = string.as_slice().into_maybe_owned();
447 /// assert_eq!(false, maybe_owned_str.is_owned());
449 impl<'a> IntoMaybeOwned<'a> for &'a str {
451 fn into_maybe_owned(self) -> MaybeOwned<'a> { Slice(self) }
457 /// let str = "orange";
458 /// let maybe_owned_str = str.as_slice().into_maybe_owned();
459 /// let maybe_maybe_owned_str = maybe_owned_str.into_maybe_owned();
460 /// assert_eq!(false, maybe_maybe_owned_str.is_owned());
462 impl<'a> IntoMaybeOwned<'a> for MaybeOwned<'a> {
464 fn into_maybe_owned(self) -> MaybeOwned<'a> { self }
467 impl<'a> PartialEq for MaybeOwned<'a> {
469 fn eq(&self, other: &MaybeOwned) -> bool {
470 self.as_slice() == other.as_slice()
474 impl<'a> Eq for MaybeOwned<'a> {}
476 impl<'a> PartialOrd for MaybeOwned<'a> {
478 fn partial_cmp(&self, other: &MaybeOwned) -> Option<Ordering> {
479 Some(self.cmp(other))
483 impl<'a> Ord for MaybeOwned<'a> {
485 fn cmp(&self, other: &MaybeOwned) -> Ordering {
486 self.as_slice().cmp(&other.as_slice())
490 impl<'a, S: Str> Equiv<S> for MaybeOwned<'a> {
492 fn equiv(&self, other: &S) -> bool {
493 self.as_slice() == other.as_slice()
497 impl<'a> Str for MaybeOwned<'a> {
499 fn as_slice<'b>(&'b self) -> &'b str {
502 Owned(ref s) => s.as_slice()
507 impl<'a> StrAllocating for MaybeOwned<'a> {
509 fn into_string(self) -> String {
511 Slice(s) => String::from_str(s),
517 impl<'a> Collection for MaybeOwned<'a> {
519 fn len(&self) -> uint { self.as_slice().len() }
522 impl<'a> Clone for MaybeOwned<'a> {
524 fn clone(&self) -> MaybeOwned<'a> {
526 Slice(s) => Slice(s),
527 Owned(ref s) => Owned(String::from_str(s.as_slice()))
532 impl<'a> Default for MaybeOwned<'a> {
534 fn default() -> MaybeOwned<'a> { Slice("") }
537 impl<'a, H: hash::Writer> hash::Hash<H> for MaybeOwned<'a> {
539 fn hash(&self, hasher: &mut H) {
540 self.as_slice().hash(hasher)
544 impl<'a> fmt::Show for MaybeOwned<'a> {
546 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
548 Slice(ref s) => s.fmt(f),
549 Owned(ref s) => s.fmt(f)
554 /// Unsafe operations
556 use core::prelude::*;
558 use core::raw::Slice;
563 pub use core::str::raw::{from_utf8, c_str_to_static_slice, slice_bytes};
564 pub use core::str::raw::{slice_unchecked};
566 /// Create a Rust string from a *u8 buffer of the given length
567 pub unsafe fn from_buf_len(buf: *const u8, len: uint) -> String {
568 let mut result = String::new();
569 result.push_bytes(mem::transmute(Slice {
576 /// Create a Rust string from a null-terminated C string
577 pub unsafe fn from_c_str(c_string: *const i8) -> String {
578 let mut buf = String::new();
580 while *c_string.offset(len) != 0 {
583 buf.push_bytes(mem::transmute(Slice {
590 /// Converts an owned vector of bytes to a new owned string. This assumes
591 /// that the utf-8-ness of the vector has already been validated
593 pub unsafe fn from_utf8_owned(v: Vec<u8>) -> String {
597 /// Converts a byte to a string.
598 pub unsafe fn from_byte(u: u8) -> String {
599 from_utf8_owned(vec![u])
602 /// Sets the length of a string
604 /// This will explicitly set the size of the string, without actually
605 /// modifying its buffers, so it is up to the caller to ensure that
606 /// the string is actually the specified size.
608 fn test_from_buf_len() {
609 use slice::ImmutableVector;
612 let a = vec![65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8];
614 let c = from_buf_len(b, 3u);
615 assert_eq!(c, String::from_str("AAA"));
621 Section: Trait implementations
624 /// Any string that can be represented as a slice
625 pub trait StrAllocating: Str {
626 /// Convert `self` into a `String`, not making a copy if possible.
627 fn into_string(self) -> String;
629 #[allow(missing_doc)]
630 #[deprecated = "replaced by .into_string()"]
631 fn into_owned(self) -> String {
635 /// Escape each char in `s` with `char::escape_default`.
636 fn escape_default(&self) -> String {
637 let me = self.as_slice();
638 let mut out = String::with_capacity(me.len());
639 for c in me.chars() {
640 c.escape_default(|c| out.push_char(c));
645 /// Escape each char in `s` with `char::escape_unicode`.
646 fn escape_unicode(&self) -> String {
647 let me = self.as_slice();
648 let mut out = String::with_capacity(me.len());
649 for c in me.chars() {
650 c.escape_unicode(|c| out.push_char(c));
655 /// Replace all occurrences of one string with another.
659 /// * `from` - The string to replace
660 /// * `to` - The replacement string
664 /// The original string with all occurrences of `from` replaced with `to`.
669 /// let s = "Do you know the muffin man,
670 /// The muffin man, the muffin man, ...".to_string();
672 /// assert_eq!(s.replace("muffin man", "little lamb"),
673 /// "Do you know the little lamb,
674 /// The little lamb, the little lamb, ...".to_string());
676 /// // not found, so no change.
677 /// assert_eq!(s.replace("cookie monster", "little lamb"), s);
679 fn replace(&self, from: &str, to: &str) -> String {
680 let me = self.as_slice();
681 let mut result = String::new();
682 let mut last_end = 0;
683 for (start, end) in me.match_indices(from) {
684 result.push_str(unsafe{raw::slice_bytes(me, last_end, start)});
688 result.push_str(unsafe{raw::slice_bytes(me, last_end, me.len())});
692 #[allow(missing_doc)]
693 #[deprecated = "obsolete, use `to_string`"]
695 fn to_owned(&self) -> String {
697 mem::transmute(Vec::from_slice(self.as_slice().as_bytes()))
701 /// Converts to a vector of `u16` encoded as UTF-16.
702 #[deprecated = "use `utf16_units` instead"]
703 fn to_utf16(&self) -> Vec<u16> {
704 self.as_slice().utf16_units().collect::<Vec<u16>>()
707 /// Given a string, make a new string with repeated copies of it.
708 fn repeat(&self, nn: uint) -> String {
709 let me = self.as_slice();
710 let mut ret = String::with_capacity(nn * me.len());
711 for _ in range(0, nn) {
717 /// Levenshtein Distance between two strings.
718 fn lev_distance(&self, t: &str) -> uint {
719 let me = self.as_slice();
723 if slen == 0 { return tlen; }
724 if tlen == 0 { return slen; }
726 let mut dcol = Vec::from_fn(tlen + 1, |x| x);
728 for (i, sc) in me.chars().enumerate() {
731 *dcol.get_mut(0) = current + 1;
733 for (j, tc) in t.chars().enumerate() {
735 let next = *dcol.get(j + 1);
738 *dcol.get_mut(j + 1) = current;
740 *dcol.get_mut(j + 1) = cmp::min(current, next);
741 *dcol.get_mut(j + 1) = cmp::min(*dcol.get(j + 1),
749 return *dcol.get(tlen);
752 /// An Iterator over the string in Unicode Normalization Form D
753 /// (canonical decomposition).
755 fn nfd_chars<'a>(&'a self) -> Decompositions<'a> {
757 iter: self.as_slice().chars(),
764 /// An Iterator over the string in Unicode Normalization Form KD
765 /// (compatibility decomposition).
767 fn nfkd_chars<'a>(&'a self) -> Decompositions<'a> {
769 iter: self.as_slice().chars(),
777 impl<'a> StrAllocating for &'a str {
779 fn into_string(self) -> String {
780 String::from_str(self)
784 /// Methods for owned strings
786 /// Consumes the string, returning the underlying byte buffer.
788 /// The buffer does not have a null terminator.
789 fn into_bytes(self) -> Vec<u8>;
791 /// Pushes the given string onto this string, returning the concatenation of the two strings.
792 fn append(self, rhs: &str) -> String;
795 impl OwnedStr for String {
797 fn into_bytes(self) -> Vec<u8> {
798 unsafe { mem::transmute(self) }
802 fn append(mut self, rhs: &str) -> String {
810 use std::iter::AdditiveIterator;
811 use std::iter::range;
812 use std::default::Default;
814 use std::clone::Clone;
815 use std::cmp::{Equal, Greater, Less, Ord, PartialOrd, Equiv};
816 use std::option::{Some, None};
817 use std::ptr::RawPtr;
818 use std::iter::{Iterator, DoubleEndedIterator};
822 use std::slice::{Vector, ImmutableVector};
826 use unicode::char::UnicodeChar;
830 assert!((eq_slice("foobar".slice(0, 3), "foo")));
831 assert!((eq_slice("barfoo".slice(3, 6), "foo")));
832 assert!((!eq_slice("foo1", "foo2")));
838 assert!("" <= "foo");
839 assert!("foo" <= "foo");
840 assert!("foo" != "bar");
845 assert_eq!("".len(), 0u);
846 assert_eq!("hello world".len(), 11u);
847 assert_eq!("\x63".len(), 1u);
848 assert_eq!("\xa2".len(), 2u);
849 assert_eq!("\u03c0".len(), 2u);
850 assert_eq!("\u2620".len(), 3u);
851 assert_eq!("\U0001d11e".len(), 4u);
853 assert_eq!("".char_len(), 0u);
854 assert_eq!("hello world".char_len(), 11u);
855 assert_eq!("\x63".char_len(), 1u);
856 assert_eq!("\xa2".char_len(), 1u);
857 assert_eq!("\u03c0".char_len(), 1u);
858 assert_eq!("\u2620".char_len(), 1u);
859 assert_eq!("\U0001d11e".char_len(), 1u);
860 assert_eq!("ประเทศไทย中华Việt Nam".char_len(), 19u);
862 assert_eq!("hello".width(false), 10u);
863 assert_eq!("hello".width(true), 10u);
864 assert_eq!("\0\0\0\0\0".width(false), 0u);
865 assert_eq!("\0\0\0\0\0".width(true), 0u);
866 assert_eq!("".width(false), 0u);
867 assert_eq!("".width(true), 0u);
868 assert_eq!("\u2081\u2082\u2083\u2084".width(false), 4u);
869 assert_eq!("\u2081\u2082\u2083\u2084".width(true), 8u);
874 assert_eq!("hello".find('l'), Some(2u));
875 assert_eq!("hello".find(|c:char| c == 'o'), Some(4u));
876 assert!("hello".find('x').is_none());
877 assert!("hello".find(|c:char| c == 'x').is_none());
878 assert_eq!("ประเทศไทย中华Việt Nam".find('华'), Some(30u));
879 assert_eq!("ประเทศไทย中华Việt Nam".find(|c: char| c == '华'), Some(30u));
884 assert_eq!("hello".rfind('l'), Some(3u));
885 assert_eq!("hello".rfind(|c:char| c == 'o'), Some(4u));
886 assert!("hello".rfind('x').is_none());
887 assert!("hello".rfind(|c:char| c == 'x').is_none());
888 assert_eq!("ประเทศไทย中华Việt Nam".rfind('华'), Some(30u));
889 assert_eq!("ประเทศไทย中华Việt Nam".rfind(|c: char| c == '华'), Some(30u));
894 let empty = String::from_str("");
895 let s: String = empty.as_slice().chars().collect();
896 assert_eq!(empty, s);
897 let data = String::from_str("ประเทศไทย中");
898 let s: String = data.as_slice().chars().collect();
903 fn test_into_bytes() {
904 let data = String::from_str("asdf");
905 let buf = data.into_bytes();
906 assert_eq!(b"asdf", buf.as_slice());
912 assert_eq!("".find_str(""), Some(0u));
913 assert!("banana".find_str("apple pie").is_none());
916 assert_eq!(data.slice(0u, 6u).find_str("ab"), Some(0u));
917 assert_eq!(data.slice(2u, 6u).find_str("ab"), Some(3u - 2u));
918 assert!(data.slice(2u, 4u).find_str("ab").is_none());
920 let string = "ประเทศไทย中华Việt Nam";
921 let mut data = String::from_str(string);
922 data.push_str(string);
923 assert!(data.as_slice().find_str("ไท华").is_none());
924 assert_eq!(data.as_slice().slice(0u, 43u).find_str(""), Some(0u));
925 assert_eq!(data.as_slice().slice(6u, 43u).find_str(""), Some(6u - 6u));
927 assert_eq!(data.as_slice().slice(0u, 43u).find_str("ประ"), Some( 0u));
928 assert_eq!(data.as_slice().slice(0u, 43u).find_str("ทศไ"), Some(12u));
929 assert_eq!(data.as_slice().slice(0u, 43u).find_str("ย中"), Some(24u));
930 assert_eq!(data.as_slice().slice(0u, 43u).find_str("iệt"), Some(34u));
931 assert_eq!(data.as_slice().slice(0u, 43u).find_str("Nam"), Some(40u));
933 assert_eq!(data.as_slice().slice(43u, 86u).find_str("ประ"), Some(43u - 43u));
934 assert_eq!(data.as_slice().slice(43u, 86u).find_str("ทศไ"), Some(55u - 43u));
935 assert_eq!(data.as_slice().slice(43u, 86u).find_str("ย中"), Some(67u - 43u));
936 assert_eq!(data.as_slice().slice(43u, 86u).find_str("iệt"), Some(77u - 43u));
937 assert_eq!(data.as_slice().slice(43u, 86u).find_str("Nam"), Some(83u - 43u));
941 fn test_slice_chars() {
942 fn t(a: &str, b: &str, start: uint) {
943 assert_eq!(a.slice_chars(start, start + b.char_len()), b);
946 t("hello", "llo", 2);
950 assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".slice_chars(2, 8));
955 fn t(v: &[String], s: &str) {
956 assert_eq!(v.concat().as_slice(), s);
958 t([String::from_str("you"), String::from_str("know"),
959 String::from_str("I'm"),
960 String::from_str("no"), String::from_str("good")],
962 let v: &[String] = [];
964 t([String::from_str("hi")], "hi");
969 fn t(v: &[String], sep: &str, s: &str) {
970 assert_eq!(v.connect(sep).as_slice(), s);
972 t([String::from_str("you"), String::from_str("know"),
973 String::from_str("I'm"),
974 String::from_str("no"), String::from_str("good")],
975 " ", "you know I'm no good");
976 let v: &[String] = [];
978 t([String::from_str("hi")], " ", "hi");
982 fn test_concat_slices() {
983 fn t(v: &[&str], s: &str) {
984 assert_eq!(v.concat().as_slice(), s);
986 t(["you", "know", "I'm", "no", "good"], "youknowI'mnogood");
993 fn test_connect_slices() {
994 fn t(v: &[&str], sep: &str, s: &str) {
995 assert_eq!(v.connect(sep).as_slice(), s);
997 t(["you", "know", "I'm", "no", "good"],
998 " ", "you know I'm no good");
1000 t(["hi"], " ", "hi");
1005 assert_eq!("x".repeat(4), String::from_str("xxxx"));
1006 assert_eq!("hi".repeat(4), String::from_str("hihihihi"));
1007 assert_eq!("ไท华".repeat(3), String::from_str("ไท华ไท华ไท华"));
1008 assert_eq!("".repeat(4), String::from_str(""));
1009 assert_eq!("hi".repeat(0), String::from_str(""));
1013 fn test_unsafe_slice() {
1014 assert_eq!("ab", unsafe {raw::slice_bytes("abc", 0, 2)});
1015 assert_eq!("bc", unsafe {raw::slice_bytes("abc", 1, 3)});
1016 assert_eq!("", unsafe {raw::slice_bytes("abc", 1, 1)});
1017 fn a_million_letter_a() -> String {
1019 let mut rs = String::new();
1021 rs.push_str("aaaaaaaaaa");
1026 fn half_a_million_letter_a() -> String {
1028 let mut rs = String::new();
1030 rs.push_str("aaaaa");
1035 let letters = a_million_letter_a();
1036 assert!(half_a_million_letter_a() ==
1037 unsafe {String::from_str(raw::slice_bytes(letters.as_slice(),
1043 fn test_starts_with() {
1044 assert!(("".starts_with("")));
1045 assert!(("abc".starts_with("")));
1046 assert!(("abc".starts_with("a")));
1047 assert!((!"a".starts_with("abc")));
1048 assert!((!"".starts_with("abc")));
1049 assert!((!"ödd".starts_with("-")));
1050 assert!(("ödd".starts_with("öd")));
1054 fn test_ends_with() {
1055 assert!(("".ends_with("")));
1056 assert!(("abc".ends_with("")));
1057 assert!(("abc".ends_with("c")));
1058 assert!((!"a".ends_with("abc")));
1059 assert!((!"".ends_with("abc")));
1060 assert!((!"ddö".ends_with("-")));
1061 assert!(("ddö".ends_with("dö")));
1065 fn test_is_empty() {
1066 assert!("".is_empty());
1067 assert!(!"a".is_empty());
1073 assert_eq!("".replace(a, "b"), String::from_str(""));
1074 assert_eq!("a".replace(a, "b"), String::from_str("b"));
1075 assert_eq!("ab".replace(a, "b"), String::from_str("bb"));
1077 assert!(" test test ".replace(test, "toast") ==
1078 String::from_str(" toast toast "));
1079 assert_eq!(" test test ".replace(test, ""), String::from_str(" "));
1083 fn test_replace_2a() {
1084 let data = "ประเทศไทย中华";
1085 let repl = "دولة الكويت";
1088 let a2 = "دولة الكويتทศไทย中华";
1089 assert_eq!(data.replace(a, repl).as_slice(), a2);
1093 fn test_replace_2b() {
1094 let data = "ประเทศไทย中华";
1095 let repl = "دولة الكويت";
1098 let b2 = "ปรدولة الكويتทศไทย中华";
1099 assert_eq!(data.replace(b, repl).as_slice(), b2);
1103 fn test_replace_2c() {
1104 let data = "ประเทศไทย中华";
1105 let repl = "دولة الكويت";
1108 let c2 = "ประเทศไทยدولة الكويت";
1109 assert_eq!(data.replace(c, repl).as_slice(), c2);
1113 fn test_replace_2d() {
1114 let data = "ประเทศไทย中华";
1115 let repl = "دولة الكويت";
1118 assert_eq!(data.replace(d, repl).as_slice(), data);
1123 assert_eq!("ab", "abc".slice(0, 2));
1124 assert_eq!("bc", "abc".slice(1, 3));
1125 assert_eq!("", "abc".slice(1, 1));
1126 assert_eq!("\u65e5", "\u65e5\u672c".slice(0, 3));
1128 let data = "ประเทศไทย中华";
1129 assert_eq!("ป", data.slice(0, 3));
1130 assert_eq!("ร", data.slice(3, 6));
1131 assert_eq!("", data.slice(3, 3));
1132 assert_eq!("华", data.slice(30, 33));
1134 fn a_million_letter_x() -> String {
1136 let mut rs = String::new();
1138 rs.push_str("华华华华华华华华华华");
1143 fn half_a_million_letter_x() -> String {
1145 let mut rs = String::new();
1147 rs.push_str("华华华华华");
1152 let letters = a_million_letter_x();
1153 assert!(half_a_million_letter_x() ==
1154 String::from_str(letters.as_slice().slice(0u, 3u * 500000u)));
1159 let ss = "中华Việt Nam";
1161 assert_eq!("华", ss.slice(3u, 6u));
1162 assert_eq!("Việt Nam", ss.slice(6u, 16u));
1164 assert_eq!("ab", "abc".slice(0u, 2u));
1165 assert_eq!("bc", "abc".slice(1u, 3u));
1166 assert_eq!("", "abc".slice(1u, 1u));
1168 assert_eq!("中", ss.slice(0u, 3u));
1169 assert_eq!("华V", ss.slice(3u, 7u));
1170 assert_eq!("", ss.slice(3u, 3u));
1185 fn test_slice_fail() {
1186 "中华Việt Nam".slice(0u, 2u);
1190 fn test_slice_from() {
1191 assert_eq!("abcd".slice_from(0), "abcd");
1192 assert_eq!("abcd".slice_from(2), "cd");
1193 assert_eq!("abcd".slice_from(4), "");
1196 fn test_slice_to() {
1197 assert_eq!("abcd".slice_to(0), "");
1198 assert_eq!("abcd".slice_to(2), "ab");
1199 assert_eq!("abcd".slice_to(4), "abcd");
1203 fn test_trim_left_chars() {
1204 let v: &[char] = &[];
1205 assert_eq!(" *** foo *** ".trim_left_chars(v), " *** foo *** ");
1206 assert_eq!(" *** foo *** ".trim_left_chars(&['*', ' ']), "foo *** ");
1207 assert_eq!(" *** *** ".trim_left_chars(&['*', ' ']), "");
1208 assert_eq!("foo *** ".trim_left_chars(&['*', ' ']), "foo *** ");
1210 assert_eq!("11foo1bar11".trim_left_chars('1'), "foo1bar11");
1211 assert_eq!("12foo1bar12".trim_left_chars(&['1', '2']), "foo1bar12");
1212 assert_eq!("123foo1bar123".trim_left_chars(|c: char| c.is_digit()), "foo1bar123");
1216 fn test_trim_right_chars() {
1217 let v: &[char] = &[];
1218 assert_eq!(" *** foo *** ".trim_right_chars(v), " *** foo *** ");
1219 assert_eq!(" *** foo *** ".trim_right_chars(&['*', ' ']), " *** foo");
1220 assert_eq!(" *** *** ".trim_right_chars(&['*', ' ']), "");
1221 assert_eq!(" *** foo".trim_right_chars(&['*', ' ']), " *** foo");
1223 assert_eq!("11foo1bar11".trim_right_chars('1'), "11foo1bar");
1224 assert_eq!("12foo1bar12".trim_right_chars(&['1', '2']), "12foo1bar");
1225 assert_eq!("123foo1bar123".trim_right_chars(|c: char| c.is_digit()), "123foo1bar");
1229 fn test_trim_chars() {
1230 let v: &[char] = &[];
1231 assert_eq!(" *** foo *** ".trim_chars(v), " *** foo *** ");
1232 assert_eq!(" *** foo *** ".trim_chars(&['*', ' ']), "foo");
1233 assert_eq!(" *** *** ".trim_chars(&['*', ' ']), "");
1234 assert_eq!("foo".trim_chars(&['*', ' ']), "foo");
1236 assert_eq!("11foo1bar11".trim_chars('1'), "foo1bar");
1237 assert_eq!("12foo1bar12".trim_chars(&['1', '2']), "foo1bar");
1238 assert_eq!("123foo1bar123".trim_chars(|c: char| c.is_digit()), "foo1bar");
1242 fn test_trim_left() {
1243 assert_eq!("".trim_left(), "");
1244 assert_eq!("a".trim_left(), "a");
1245 assert_eq!(" ".trim_left(), "");
1246 assert_eq!(" blah".trim_left(), "blah");
1247 assert_eq!(" \u3000 wut".trim_left(), "wut");
1248 assert_eq!("hey ".trim_left(), "hey ");
1252 fn test_trim_right() {
1253 assert_eq!("".trim_right(), "");
1254 assert_eq!("a".trim_right(), "a");
1255 assert_eq!(" ".trim_right(), "");
1256 assert_eq!("blah ".trim_right(), "blah");
1257 assert_eq!("wut \u3000 ".trim_right(), "wut");
1258 assert_eq!(" hey".trim_right(), " hey");
1263 assert_eq!("".trim(), "");
1264 assert_eq!("a".trim(), "a");
1265 assert_eq!(" ".trim(), "");
1266 assert_eq!(" blah ".trim(), "blah");
1267 assert_eq!("\nwut \u3000 ".trim(), "wut");
1268 assert_eq!(" hey dude ".trim(), "hey dude");
1272 fn test_is_whitespace() {
1273 assert!("".is_whitespace());
1274 assert!(" ".is_whitespace());
1275 assert!("\u2009".is_whitespace()); // Thin space
1276 assert!(" \n\t ".is_whitespace());
1277 assert!(!" _ ".is_whitespace());
1281 fn test_slice_shift_char() {
1282 let data = "ประเทศไทย中";
1283 assert_eq!(data.slice_shift_char(), (Some('ป'), "ระเทศไทย中"));
1287 fn test_slice_shift_char_2() {
1289 assert_eq!(empty.slice_shift_char(), (None, ""));
1294 // deny overlong encodings
1295 assert!(!is_utf8([0xc0, 0x80]));
1296 assert!(!is_utf8([0xc0, 0xae]));
1297 assert!(!is_utf8([0xe0, 0x80, 0x80]));
1298 assert!(!is_utf8([0xe0, 0x80, 0xaf]));
1299 assert!(!is_utf8([0xe0, 0x81, 0x81]));
1300 assert!(!is_utf8([0xf0, 0x82, 0x82, 0xac]));
1301 assert!(!is_utf8([0xf4, 0x90, 0x80, 0x80]));
1304 assert!(!is_utf8([0xED, 0xA0, 0x80]));
1305 assert!(!is_utf8([0xED, 0xBF, 0xBF]));
1307 assert!(is_utf8([0xC2, 0x80]));
1308 assert!(is_utf8([0xDF, 0xBF]));
1309 assert!(is_utf8([0xE0, 0xA0, 0x80]));
1310 assert!(is_utf8([0xED, 0x9F, 0xBF]));
1311 assert!(is_utf8([0xEE, 0x80, 0x80]));
1312 assert!(is_utf8([0xEF, 0xBF, 0xBF]));
1313 assert!(is_utf8([0xF0, 0x90, 0x80, 0x80]));
1314 assert!(is_utf8([0xF4, 0x8F, 0xBF, 0xBF]));
1318 fn test_is_utf16() {
1319 macro_rules! pos ( ($($e:expr),*) => { { $(assert!(is_utf16($e));)* } });
1327 // surrogate pairs (randomly generated with Python 3's
1328 // .encode('utf-16be'))
1329 pos!([0xdb54, 0xdf16, 0xd880, 0xdee0, 0xdb6a, 0xdd45],
1330 [0xd91f, 0xdeb1, 0xdb31, 0xdd84, 0xd8e2, 0xde14],
1331 [0xdb9f, 0xdc26, 0xdb6f, 0xde58, 0xd850, 0xdfae]);
1333 // mixtures (also random)
1334 pos!([0xd921, 0xdcc2, 0x002d, 0x004d, 0xdb32, 0xdf65],
1335 [0xdb45, 0xdd2d, 0x006a, 0xdacd, 0xddfe, 0x0006],
1336 [0x0067, 0xd8ff, 0xddb7, 0x000f, 0xd900, 0xdc80]);
1339 macro_rules! neg ( ($($e:expr),*) => { { $(assert!(!is_utf16($e));)* } });
1342 // surrogate + regular unit
1344 // surrogate + lead surrogate
1346 // unterminated surrogate
1348 // trail surrogate without a lead
1351 // random byte sequences that Python 3's .decode('utf-16be')
1353 neg!([0x5b3d, 0x0141, 0xde9e, 0x8fdc, 0xc6e7],
1354 [0xdf5a, 0x82a5, 0x62b9, 0xb447, 0x92f3],
1355 [0xda4e, 0x42bc, 0x4462, 0xee98, 0xc2ca],
1356 [0xbe00, 0xb04a, 0x6ecb, 0xdd89, 0xe278],
1357 [0x0465, 0xab56, 0xdbb6, 0xa893, 0x665e],
1358 [0x6b7f, 0x0a19, 0x40f4, 0xa657, 0xdcc5],
1359 [0x9b50, 0xda5e, 0x24ec, 0x03ad, 0x6dee],
1360 [0x8d17, 0xcaa7, 0xf4ae, 0xdf6e, 0xbed7],
1361 [0xdaee, 0x2584, 0x7d30, 0xa626, 0x121a],
1362 [0xd956, 0x4b43, 0x7570, 0xccd6, 0x4f4a],
1363 [0x9dcf, 0x1b49, 0x4ba5, 0xfce9, 0xdffe],
1364 [0x6572, 0xce53, 0xb05a, 0xf6af, 0xdacf],
1365 [0x1b90, 0x728c, 0x9906, 0xdb68, 0xf46e],
1366 [0x1606, 0xbeca, 0xbe76, 0x860f, 0xdfa5],
1367 [0x8b4f, 0xde7a, 0xd220, 0x9fac, 0x2b6f],
1368 [0xb8fe, 0xebbe, 0xda32, 0x1a5f, 0x8b8b],
1369 [0x934b, 0x8956, 0xc434, 0x1881, 0xddf7],
1370 [0x5a95, 0x13fc, 0xf116, 0xd89b, 0x93f9],
1371 [0xd640, 0x71f1, 0xdd7d, 0x77eb, 0x1cd8],
1372 [0x348b, 0xaef0, 0xdb2c, 0xebf1, 0x1282],
1373 [0x50d7, 0xd824, 0x5010, 0xb369, 0x22ea]);
1377 fn test_raw_from_c_str() {
1379 let a = vec![65, 65, 65, 65, 65, 65, 65, 0];
1381 let c = raw::from_c_str(b);
1382 assert_eq!(c, String::from_str("AAAAAAA"));
1387 fn test_as_bytes() {
1390 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1391 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1394 assert_eq!("".as_bytes(), &[]);
1395 assert_eq!("abc".as_bytes(), &['a' as u8, 'b' as u8, 'c' as u8]);
1396 assert_eq!("ศไทย中华Việt Nam".as_bytes(), v.as_slice());
1401 fn test_as_bytes_fail() {
1402 // Don't double free. (I'm not sure if this exercises the
1403 // original problem code path anymore.)
1404 let s = String::from_str("");
1405 let _bytes = s.as_bytes();
1411 let buf = "hello".as_ptr();
1413 assert_eq!(*buf.offset(0), 'h' as u8);
1414 assert_eq!(*buf.offset(1), 'e' as u8);
1415 assert_eq!(*buf.offset(2), 'l' as u8);
1416 assert_eq!(*buf.offset(3), 'l' as u8);
1417 assert_eq!(*buf.offset(4), 'o' as u8);
1422 fn test_subslice_offset() {
1423 let a = "kernelsprite";
1424 let b = a.slice(7, a.len());
1425 let c = a.slice(0, a.len() - 6);
1426 assert_eq!(a.subslice_offset(b), 7);
1427 assert_eq!(a.subslice_offset(c), 0);
1429 let string = "a\nb\nc";
1430 let lines: Vec<&str> = string.lines().collect();
1431 let lines = lines.as_slice();
1432 assert_eq!(string.subslice_offset(lines[0]), 0);
1433 assert_eq!(string.subslice_offset(lines[1]), 2);
1434 assert_eq!(string.subslice_offset(lines[2]), 4);
1439 fn test_subslice_offset_2() {
1440 let a = "alchemiter";
1441 let b = "cruxtruder";
1442 a.subslice_offset(b);
1446 fn vec_str_conversions() {
1447 let s1: String = String::from_str("All mimsy were the borogoves");
1449 let v: Vec<u8> = Vec::from_slice(s1.as_bytes());
1450 let s2: String = String::from_str(from_utf8(v.as_slice()).unwrap());
1451 let mut i: uint = 0u;
1452 let n1: uint = s1.len();
1453 let n2: uint = v.len();
1456 let a: u8 = s1.as_bytes()[i];
1457 let b: u8 = s2.as_bytes()[i];
1466 fn test_contains() {
1467 assert!("abcde".contains("bcd"));
1468 assert!("abcde".contains("abcd"));
1469 assert!("abcde".contains("bcde"));
1470 assert!("abcde".contains(""));
1471 assert!("".contains(""));
1472 assert!(!"abcde".contains("def"));
1473 assert!(!"".contains("a"));
1475 let data = "ประเทศไทย中华Việt Nam";
1476 assert!(data.contains("ประเ"));
1477 assert!(data.contains("ะเ"));
1478 assert!(data.contains("中华"));
1479 assert!(!data.contains("ไท华"));
1483 fn test_contains_char() {
1484 assert!("abc".contains_char('b'));
1485 assert!("a".contains_char('a'));
1486 assert!(!"abc".contains_char('d'));
1487 assert!(!"".contains_char('a'));
1491 fn test_truncate_utf16_at_nul() {
1493 assert_eq!(truncate_utf16_at_nul(v), &[]);
1496 assert_eq!(truncate_utf16_at_nul(v), &[]);
1499 assert_eq!(truncate_utf16_at_nul(v), &[1]);
1502 assert_eq!(truncate_utf16_at_nul(v), &[1, 2]);
1505 assert_eq!(truncate_utf16_at_nul(v), &[1, 2, 3]);
1510 let s = "ศไทย中华Việt Nam";
1511 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1513 for ch in v.iter() {
1514 assert!(s.char_at(pos) == *ch);
1515 pos += String::from_char(1, *ch).len();
1520 fn test_char_at_reverse() {
1521 let s = "ศไทย中华Việt Nam";
1522 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1523 let mut pos = s.len();
1524 for ch in v.iter().rev() {
1525 assert!(s.char_at_reverse(pos) == *ch);
1526 pos -= String::from_char(1, *ch).len();
1531 fn test_escape_unicode() {
1532 assert_eq!("abc".escape_unicode(), String::from_str("\\x61\\x62\\x63"));
1533 assert_eq!("a c".escape_unicode(), String::from_str("\\x61\\x20\\x63"));
1534 assert_eq!("\r\n\t".escape_unicode(), String::from_str("\\x0d\\x0a\\x09"));
1535 assert_eq!("'\"\\".escape_unicode(), String::from_str("\\x27\\x22\\x5c"));
1536 assert_eq!("\x00\x01\xfe\xff".escape_unicode(), String::from_str("\\x00\\x01\\xfe\\xff"));
1537 assert_eq!("\u0100\uffff".escape_unicode(), String::from_str("\\u0100\\uffff"));
1538 assert_eq!("\U00010000\U0010ffff".escape_unicode(),
1539 String::from_str("\\U00010000\\U0010ffff"));
1540 assert_eq!("ab\ufb00".escape_unicode(), String::from_str("\\x61\\x62\\ufb00"));
1541 assert_eq!("\U0001d4ea\r".escape_unicode(), String::from_str("\\U0001d4ea\\x0d"));
1545 fn test_escape_default() {
1546 assert_eq!("abc".escape_default(), String::from_str("abc"));
1547 assert_eq!("a c".escape_default(), String::from_str("a c"));
1548 assert_eq!("\r\n\t".escape_default(), String::from_str("\\r\\n\\t"));
1549 assert_eq!("'\"\\".escape_default(), String::from_str("\\'\\\"\\\\"));
1550 assert_eq!("\u0100\uffff".escape_default(), String::from_str("\\u0100\\uffff"));
1551 assert_eq!("\U00010000\U0010ffff".escape_default(),
1552 String::from_str("\\U00010000\\U0010ffff"));
1553 assert_eq!("ab\ufb00".escape_default(), String::from_str("ab\\ufb00"));
1554 assert_eq!("\U0001d4ea\r".escape_default(), String::from_str("\\U0001d4ea\\r"));
1558 fn test_total_ord() {
1559 "1234".cmp(&("123")) == Greater;
1560 "123".cmp(&("1234")) == Less;
1561 "1234".cmp(&("1234")) == Equal;
1562 "12345555".cmp(&("123456")) == Less;
1563 "22".cmp(&("1234")) == Greater;
1567 fn test_char_range_at() {
1568 let data = "b¢€𤭢𤭢€¢b";
1569 assert_eq!('b', data.char_range_at(0).ch);
1570 assert_eq!('¢', data.char_range_at(1).ch);
1571 assert_eq!('€', data.char_range_at(3).ch);
1572 assert_eq!('𤭢', data.char_range_at(6).ch);
1573 assert_eq!('𤭢', data.char_range_at(10).ch);
1574 assert_eq!('€', data.char_range_at(14).ch);
1575 assert_eq!('¢', data.char_range_at(17).ch);
1576 assert_eq!('b', data.char_range_at(19).ch);
1580 fn test_char_range_at_reverse_underflow() {
1581 assert_eq!("abc".char_range_at_reverse(0).next, 0);
1585 fn test_iterator() {
1586 let s = "ศไทย中华Việt Nam";
1587 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1590 let mut it = s.chars();
1593 assert_eq!(c, v[pos]);
1596 assert_eq!(pos, v.len());
1600 fn test_rev_iterator() {
1601 let s = "ศไทย中华Việt Nam";
1602 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
1605 let mut it = s.chars().rev();
1608 assert_eq!(c, v[pos]);
1611 assert_eq!(pos, v.len());
1615 fn test_chars_decoding() {
1616 let mut bytes = [0u8, ..4];
1617 for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
1618 let len = c.encode_utf8(bytes);
1619 let s = ::core::str::from_utf8(bytes.slice_to(len)).unwrap();
1620 if Some(c) != s.chars().next() {
1621 fail!("character {:x}={} does not decode correctly", c as u32, c);
1627 fn test_chars_rev_decoding() {
1628 let mut bytes = [0u8, ..4];
1629 for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
1630 let len = c.encode_utf8(bytes);
1631 let s = ::core::str::from_utf8(bytes.slice_to(len)).unwrap();
1632 if Some(c) != s.chars().rev().next() {
1633 fail!("character {:x}={} does not decode correctly", c as u32, c);
1639 fn test_iterator_clone() {
1640 let s = "ศไทย中华Việt Nam";
1641 let mut it = s.chars();
1643 assert!(it.zip(it.clone()).all(|(x,y)| x == y));
1647 fn test_bytesator() {
1648 let s = "ศไทย中华Việt Nam";
1650 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1651 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1656 for b in s.bytes() {
1657 assert_eq!(b, v[pos]);
1663 fn test_bytes_revator() {
1664 let s = "ศไทย中华Việt Nam";
1666 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1667 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1670 let mut pos = v.len();
1672 for b in s.bytes().rev() {
1674 assert_eq!(b, v[pos]);
1679 fn test_char_indicesator() {
1680 let s = "ศไทย中华Việt Nam";
1681 let p = [0, 3, 6, 9, 12, 15, 18, 19, 20, 23, 24, 25, 26, 27];
1682 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1685 let mut it = s.char_indices();
1688 assert_eq!(c, (p[pos], v[pos]));
1691 assert_eq!(pos, v.len());
1692 assert_eq!(pos, p.len());
1696 fn test_char_indices_revator() {
1697 let s = "ศไทย中华Việt Nam";
1698 let p = [27, 26, 25, 24, 23, 20, 19, 18, 15, 12, 9, 6, 3, 0];
1699 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
1702 let mut it = s.char_indices().rev();
1705 assert_eq!(c, (p[pos], v[pos]));
1708 assert_eq!(pos, v.len());
1709 assert_eq!(pos, p.len());
1713 fn test_split_char_iterator() {
1714 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1716 let split: Vec<&str> = data.split(' ').collect();
1717 assert_eq!( split, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1719 let mut rsplit: Vec<&str> = data.split(' ').rev().collect();
1721 assert_eq!(rsplit, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1723 let split: Vec<&str> = data.split(|c: char| c == ' ').collect();
1724 assert_eq!( split, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1726 let mut rsplit: Vec<&str> = data.split(|c: char| c == ' ').rev().collect();
1728 assert_eq!(rsplit, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1731 let split: Vec<&str> = data.split('ä').collect();
1732 assert_eq!( split, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1734 let mut rsplit: Vec<&str> = data.split('ä').rev().collect();
1736 assert_eq!(rsplit, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1738 let split: Vec<&str> = data.split(|c: char| c == 'ä').collect();
1739 assert_eq!( split, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1741 let mut rsplit: Vec<&str> = data.split(|c: char| c == 'ä').rev().collect();
1743 assert_eq!(rsplit, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1747 fn test_splitn_char_iterator() {
1748 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1750 let split: Vec<&str> = data.splitn(' ', 3).collect();
1751 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
1753 let split: Vec<&str> = data.splitn(|c: char| c == ' ', 3).collect();
1754 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
1757 let split: Vec<&str> = data.splitn('ä', 3).collect();
1758 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
1760 let split: Vec<&str> = data.splitn(|c: char| c == 'ä', 3).collect();
1761 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
1765 fn test_rsplitn_char_iterator() {
1766 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1768 let mut split: Vec<&str> = data.rsplitn(' ', 3).collect();
1770 assert_eq!(split, vec!["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]);
1772 let mut split: Vec<&str> = data.rsplitn(|c: char| c == ' ', 3).collect();
1774 assert_eq!(split, vec!["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]);
1777 let mut split: Vec<&str> = data.rsplitn('ä', 3).collect();
1779 assert_eq!(split, vec!["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]);
1781 let mut split: Vec<&str> = data.rsplitn(|c: char| c == 'ä', 3).collect();
1783 assert_eq!(split, vec!["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]);
1787 fn test_split_char_iterator_no_trailing() {
1788 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1790 let split: Vec<&str> = data.split('\n').collect();
1791 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
1793 let split: Vec<&str> = data.split_terminator('\n').collect();
1794 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
1798 fn test_rev_split_char_iterator_no_trailing() {
1799 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1801 let mut split: Vec<&str> = data.split('\n').rev().collect();
1803 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
1805 let mut split: Vec<&str> = data.split_terminator('\n').rev().collect();
1807 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
1812 let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n";
1813 let words: Vec<&str> = data.words().collect();
1814 assert_eq!(words, vec!["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
1818 fn test_nfd_chars() {
1819 assert_eq!("abc".nfd_chars().collect::<String>(), String::from_str("abc"));
1820 assert_eq!("\u1e0b\u01c4".nfd_chars().collect::<String>(),
1821 String::from_str("d\u0307\u01c4"));
1822 assert_eq!("\u2026".nfd_chars().collect::<String>(), String::from_str("\u2026"));
1823 assert_eq!("\u2126".nfd_chars().collect::<String>(), String::from_str("\u03a9"));
1824 assert_eq!("\u1e0b\u0323".nfd_chars().collect::<String>(),
1825 String::from_str("d\u0323\u0307"));
1826 assert_eq!("\u1e0d\u0307".nfd_chars().collect::<String>(),
1827 String::from_str("d\u0323\u0307"));
1828 assert_eq!("a\u0301".nfd_chars().collect::<String>(), String::from_str("a\u0301"));
1829 assert_eq!("\u0301a".nfd_chars().collect::<String>(), String::from_str("\u0301a"));
1830 assert_eq!("\ud4db".nfd_chars().collect::<String>(),
1831 String::from_str("\u1111\u1171\u11b6"));
1832 assert_eq!("\uac1c".nfd_chars().collect::<String>(), String::from_str("\u1100\u1162"));
1836 fn test_nfkd_chars() {
1837 assert_eq!("abc".nfkd_chars().collect::<String>(), String::from_str("abc"));
1838 assert_eq!("\u1e0b\u01c4".nfkd_chars().collect::<String>(),
1839 String::from_str("d\u0307DZ\u030c"));
1840 assert_eq!("\u2026".nfkd_chars().collect::<String>(), String::from_str("..."));
1841 assert_eq!("\u2126".nfkd_chars().collect::<String>(), String::from_str("\u03a9"));
1842 assert_eq!("\u1e0b\u0323".nfkd_chars().collect::<String>(),
1843 String::from_str("d\u0323\u0307"));
1844 assert_eq!("\u1e0d\u0307".nfkd_chars().collect::<String>(),
1845 String::from_str("d\u0323\u0307"));
1846 assert_eq!("a\u0301".nfkd_chars().collect::<String>(), String::from_str("a\u0301"));
1847 assert_eq!("\u0301a".nfkd_chars().collect::<String>(),
1848 String::from_str("\u0301a"));
1849 assert_eq!("\ud4db".nfkd_chars().collect::<String>(),
1850 String::from_str("\u1111\u1171\u11b6"));
1851 assert_eq!("\uac1c".nfkd_chars().collect::<String>(), String::from_str("\u1100\u1162"));
1856 let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
1857 let lines: Vec<&str> = data.lines().collect();
1858 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
1860 let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
1861 let lines: Vec<&str> = data.lines().collect();
1862 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
1866 fn test_graphemes() {
1867 use std::iter::order;
1868 // official Unicode test data
1869 // from http://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt
1871 ("\u0020\u0020", &["\u0020", "\u0020"]), ("\u0020\u0308\u0020", &["\u0020\u0308",
1872 "\u0020"]), ("\u0020\u000D", &["\u0020", "\u000D"]), ("\u0020\u0308\u000D",
1873 &["\u0020\u0308", "\u000D"]), ("\u0020\u000A", &["\u0020", "\u000A"]),
1874 ("\u0020\u0308\u000A", &["\u0020\u0308", "\u000A"]), ("\u0020\u0001", &["\u0020",
1875 "\u0001"]), ("\u0020\u0308\u0001", &["\u0020\u0308", "\u0001"]), ("\u0020\u0300",
1876 &["\u0020\u0300"]), ("\u0020\u0308\u0300", &["\u0020\u0308\u0300"]), ("\u0020\u1100",
1877 &["\u0020", "\u1100"]), ("\u0020\u0308\u1100", &["\u0020\u0308", "\u1100"]),
1878 ("\u0020\u1160", &["\u0020", "\u1160"]), ("\u0020\u0308\u1160", &["\u0020\u0308",
1879 "\u1160"]), ("\u0020\u11A8", &["\u0020", "\u11A8"]), ("\u0020\u0308\u11A8",
1880 &["\u0020\u0308", "\u11A8"]), ("\u0020\uAC00", &["\u0020", "\uAC00"]),
1881 ("\u0020\u0308\uAC00", &["\u0020\u0308", "\uAC00"]), ("\u0020\uAC01", &["\u0020",
1882 "\uAC01"]), ("\u0020\u0308\uAC01", &["\u0020\u0308", "\uAC01"]), ("\u0020\U0001F1E6",
1883 &["\u0020", "\U0001F1E6"]), ("\u0020\u0308\U0001F1E6", &["\u0020\u0308",
1884 "\U0001F1E6"]), ("\u0020\u0378", &["\u0020", "\u0378"]), ("\u0020\u0308\u0378",
1885 &["\u0020\u0308", "\u0378"]), ("\u000D\u0020", &["\u000D", "\u0020"]),
1886 ("\u000D\u0308\u0020", &["\u000D", "\u0308", "\u0020"]), ("\u000D\u000D", &["\u000D",
1887 "\u000D"]), ("\u000D\u0308\u000D", &["\u000D", "\u0308", "\u000D"]), ("\u000D\u000A",
1888 &["\u000D\u000A"]), ("\u000D\u0308\u000A", &["\u000D", "\u0308", "\u000A"]),
1889 ("\u000D\u0001", &["\u000D", "\u0001"]), ("\u000D\u0308\u0001", &["\u000D", "\u0308",
1890 "\u0001"]), ("\u000D\u0300", &["\u000D", "\u0300"]), ("\u000D\u0308\u0300",
1891 &["\u000D", "\u0308\u0300"]), ("\u000D\u0903", &["\u000D", "\u0903"]),
1892 ("\u000D\u1100", &["\u000D", "\u1100"]), ("\u000D\u0308\u1100", &["\u000D", "\u0308",
1893 "\u1100"]), ("\u000D\u1160", &["\u000D", "\u1160"]), ("\u000D\u0308\u1160",
1894 &["\u000D", "\u0308", "\u1160"]), ("\u000D\u11A8", &["\u000D", "\u11A8"]),
1895 ("\u000D\u0308\u11A8", &["\u000D", "\u0308", "\u11A8"]), ("\u000D\uAC00", &["\u000D",
1896 "\uAC00"]), ("\u000D\u0308\uAC00", &["\u000D", "\u0308", "\uAC00"]), ("\u000D\uAC01",
1897 &["\u000D", "\uAC01"]), ("\u000D\u0308\uAC01", &["\u000D", "\u0308", "\uAC01"]),
1898 ("\u000D\U0001F1E6", &["\u000D", "\U0001F1E6"]), ("\u000D\u0308\U0001F1E6",
1899 &["\u000D", "\u0308", "\U0001F1E6"]), ("\u000D\u0378", &["\u000D", "\u0378"]),
1900 ("\u000D\u0308\u0378", &["\u000D", "\u0308", "\u0378"]), ("\u000A\u0020", &["\u000A",
1901 "\u0020"]), ("\u000A\u0308\u0020", &["\u000A", "\u0308", "\u0020"]), ("\u000A\u000D",
1902 &["\u000A", "\u000D"]), ("\u000A\u0308\u000D", &["\u000A", "\u0308", "\u000D"]),
1903 ("\u000A\u000A", &["\u000A", "\u000A"]), ("\u000A\u0308\u000A", &["\u000A", "\u0308",
1904 "\u000A"]), ("\u000A\u0001", &["\u000A", "\u0001"]), ("\u000A\u0308\u0001",
1905 &["\u000A", "\u0308", "\u0001"]), ("\u000A\u0300", &["\u000A", "\u0300"]),
1906 ("\u000A\u0308\u0300", &["\u000A", "\u0308\u0300"]), ("\u000A\u0903", &["\u000A",
1907 "\u0903"]), ("\u000A\u1100", &["\u000A", "\u1100"]), ("\u000A\u0308\u1100",
1908 &["\u000A", "\u0308", "\u1100"]), ("\u000A\u1160", &["\u000A", "\u1160"]),
1909 ("\u000A\u0308\u1160", &["\u000A", "\u0308", "\u1160"]), ("\u000A\u11A8", &["\u000A",
1910 "\u11A8"]), ("\u000A\u0308\u11A8", &["\u000A", "\u0308", "\u11A8"]), ("\u000A\uAC00",
1911 &["\u000A", "\uAC00"]), ("\u000A\u0308\uAC00", &["\u000A", "\u0308", "\uAC00"]),
1912 ("\u000A\uAC01", &["\u000A", "\uAC01"]), ("\u000A\u0308\uAC01", &["\u000A", "\u0308",
1913 "\uAC01"]), ("\u000A\U0001F1E6", &["\u000A", "\U0001F1E6"]),
1914 ("\u000A\u0308\U0001F1E6", &["\u000A", "\u0308", "\U0001F1E6"]), ("\u000A\u0378",
1915 &["\u000A", "\u0378"]), ("\u000A\u0308\u0378", &["\u000A", "\u0308", "\u0378"]),
1916 ("\u0001\u0020", &["\u0001", "\u0020"]), ("\u0001\u0308\u0020", &["\u0001", "\u0308",
1917 "\u0020"]), ("\u0001\u000D", &["\u0001", "\u000D"]), ("\u0001\u0308\u000D",
1918 &["\u0001", "\u0308", "\u000D"]), ("\u0001\u000A", &["\u0001", "\u000A"]),
1919 ("\u0001\u0308\u000A", &["\u0001", "\u0308", "\u000A"]), ("\u0001\u0001", &["\u0001",
1920 "\u0001"]), ("\u0001\u0308\u0001", &["\u0001", "\u0308", "\u0001"]), ("\u0001\u0300",
1921 &["\u0001", "\u0300"]), ("\u0001\u0308\u0300", &["\u0001", "\u0308\u0300"]),
1922 ("\u0001\u0903", &["\u0001", "\u0903"]), ("\u0001\u1100", &["\u0001", "\u1100"]),
1923 ("\u0001\u0308\u1100", &["\u0001", "\u0308", "\u1100"]), ("\u0001\u1160", &["\u0001",
1924 "\u1160"]), ("\u0001\u0308\u1160", &["\u0001", "\u0308", "\u1160"]), ("\u0001\u11A8",
1925 &["\u0001", "\u11A8"]), ("\u0001\u0308\u11A8", &["\u0001", "\u0308", "\u11A8"]),
1926 ("\u0001\uAC00", &["\u0001", "\uAC00"]), ("\u0001\u0308\uAC00", &["\u0001", "\u0308",
1927 "\uAC00"]), ("\u0001\uAC01", &["\u0001", "\uAC01"]), ("\u0001\u0308\uAC01",
1928 &["\u0001", "\u0308", "\uAC01"]), ("\u0001\U0001F1E6", &["\u0001", "\U0001F1E6"]),
1929 ("\u0001\u0308\U0001F1E6", &["\u0001", "\u0308", "\U0001F1E6"]), ("\u0001\u0378",
1930 &["\u0001", "\u0378"]), ("\u0001\u0308\u0378", &["\u0001", "\u0308", "\u0378"]),
1931 ("\u0300\u0020", &["\u0300", "\u0020"]), ("\u0300\u0308\u0020", &["\u0300\u0308",
1932 "\u0020"]), ("\u0300\u000D", &["\u0300", "\u000D"]), ("\u0300\u0308\u000D",
1933 &["\u0300\u0308", "\u000D"]), ("\u0300\u000A", &["\u0300", "\u000A"]),
1934 ("\u0300\u0308\u000A", &["\u0300\u0308", "\u000A"]), ("\u0300\u0001", &["\u0300",
1935 "\u0001"]), ("\u0300\u0308\u0001", &["\u0300\u0308", "\u0001"]), ("\u0300\u0300",
1936 &["\u0300\u0300"]), ("\u0300\u0308\u0300", &["\u0300\u0308\u0300"]), ("\u0300\u1100",
1937 &["\u0300", "\u1100"]), ("\u0300\u0308\u1100", &["\u0300\u0308", "\u1100"]),
1938 ("\u0300\u1160", &["\u0300", "\u1160"]), ("\u0300\u0308\u1160", &["\u0300\u0308",
1939 "\u1160"]), ("\u0300\u11A8", &["\u0300", "\u11A8"]), ("\u0300\u0308\u11A8",
1940 &["\u0300\u0308", "\u11A8"]), ("\u0300\uAC00", &["\u0300", "\uAC00"]),
1941 ("\u0300\u0308\uAC00", &["\u0300\u0308", "\uAC00"]), ("\u0300\uAC01", &["\u0300",
1942 "\uAC01"]), ("\u0300\u0308\uAC01", &["\u0300\u0308", "\uAC01"]), ("\u0300\U0001F1E6",
1943 &["\u0300", "\U0001F1E6"]), ("\u0300\u0308\U0001F1E6", &["\u0300\u0308",
1944 "\U0001F1E6"]), ("\u0300\u0378", &["\u0300", "\u0378"]), ("\u0300\u0308\u0378",
1945 &["\u0300\u0308", "\u0378"]), ("\u0903\u0020", &["\u0903", "\u0020"]),
1946 ("\u0903\u0308\u0020", &["\u0903\u0308", "\u0020"]), ("\u0903\u000D", &["\u0903",
1947 "\u000D"]), ("\u0903\u0308\u000D", &["\u0903\u0308", "\u000D"]), ("\u0903\u000A",
1948 &["\u0903", "\u000A"]), ("\u0903\u0308\u000A", &["\u0903\u0308", "\u000A"]),
1949 ("\u0903\u0001", &["\u0903", "\u0001"]), ("\u0903\u0308\u0001", &["\u0903\u0308",
1950 "\u0001"]), ("\u0903\u0300", &["\u0903\u0300"]), ("\u0903\u0308\u0300",
1951 &["\u0903\u0308\u0300"]), ("\u0903\u1100", &["\u0903", "\u1100"]),
1952 ("\u0903\u0308\u1100", &["\u0903\u0308", "\u1100"]), ("\u0903\u1160", &["\u0903",
1953 "\u1160"]), ("\u0903\u0308\u1160", &["\u0903\u0308", "\u1160"]), ("\u0903\u11A8",
1954 &["\u0903", "\u11A8"]), ("\u0903\u0308\u11A8", &["\u0903\u0308", "\u11A8"]),
1955 ("\u0903\uAC00", &["\u0903", "\uAC00"]), ("\u0903\u0308\uAC00", &["\u0903\u0308",
1956 "\uAC00"]), ("\u0903\uAC01", &["\u0903", "\uAC01"]), ("\u0903\u0308\uAC01",
1957 &["\u0903\u0308", "\uAC01"]), ("\u0903\U0001F1E6", &["\u0903", "\U0001F1E6"]),
1958 ("\u0903\u0308\U0001F1E6", &["\u0903\u0308", "\U0001F1E6"]), ("\u0903\u0378",
1959 &["\u0903", "\u0378"]), ("\u0903\u0308\u0378", &["\u0903\u0308", "\u0378"]),
1960 ("\u1100\u0020", &["\u1100", "\u0020"]), ("\u1100\u0308\u0020", &["\u1100\u0308",
1961 "\u0020"]), ("\u1100\u000D", &["\u1100", "\u000D"]), ("\u1100\u0308\u000D",
1962 &["\u1100\u0308", "\u000D"]), ("\u1100\u000A", &["\u1100", "\u000A"]),
1963 ("\u1100\u0308\u000A", &["\u1100\u0308", "\u000A"]), ("\u1100\u0001", &["\u1100",
1964 "\u0001"]), ("\u1100\u0308\u0001", &["\u1100\u0308", "\u0001"]), ("\u1100\u0300",
1965 &["\u1100\u0300"]), ("\u1100\u0308\u0300", &["\u1100\u0308\u0300"]), ("\u1100\u1100",
1966 &["\u1100\u1100"]), ("\u1100\u0308\u1100", &["\u1100\u0308", "\u1100"]),
1967 ("\u1100\u1160", &["\u1100\u1160"]), ("\u1100\u0308\u1160", &["\u1100\u0308",
1968 "\u1160"]), ("\u1100\u11A8", &["\u1100", "\u11A8"]), ("\u1100\u0308\u11A8",
1969 &["\u1100\u0308", "\u11A8"]), ("\u1100\uAC00", &["\u1100\uAC00"]),
1970 ("\u1100\u0308\uAC00", &["\u1100\u0308", "\uAC00"]), ("\u1100\uAC01",
1971 &["\u1100\uAC01"]), ("\u1100\u0308\uAC01", &["\u1100\u0308", "\uAC01"]),
1972 ("\u1100\U0001F1E6", &["\u1100", "\U0001F1E6"]), ("\u1100\u0308\U0001F1E6",
1973 &["\u1100\u0308", "\U0001F1E6"]), ("\u1100\u0378", &["\u1100", "\u0378"]),
1974 ("\u1100\u0308\u0378", &["\u1100\u0308", "\u0378"]), ("\u1160\u0020", &["\u1160",
1975 "\u0020"]), ("\u1160\u0308\u0020", &["\u1160\u0308", "\u0020"]), ("\u1160\u000D",
1976 &["\u1160", "\u000D"]), ("\u1160\u0308\u000D", &["\u1160\u0308", "\u000D"]),
1977 ("\u1160\u000A", &["\u1160", "\u000A"]), ("\u1160\u0308\u000A", &["\u1160\u0308",
1978 "\u000A"]), ("\u1160\u0001", &["\u1160", "\u0001"]), ("\u1160\u0308\u0001",
1979 &["\u1160\u0308", "\u0001"]), ("\u1160\u0300", &["\u1160\u0300"]),
1980 ("\u1160\u0308\u0300", &["\u1160\u0308\u0300"]), ("\u1160\u1100", &["\u1160",
1981 "\u1100"]), ("\u1160\u0308\u1100", &["\u1160\u0308", "\u1100"]), ("\u1160\u1160",
1982 &["\u1160\u1160"]), ("\u1160\u0308\u1160", &["\u1160\u0308", "\u1160"]),
1983 ("\u1160\u11A8", &["\u1160\u11A8"]), ("\u1160\u0308\u11A8", &["\u1160\u0308",
1984 "\u11A8"]), ("\u1160\uAC00", &["\u1160", "\uAC00"]), ("\u1160\u0308\uAC00",
1985 &["\u1160\u0308", "\uAC00"]), ("\u1160\uAC01", &["\u1160", "\uAC01"]),
1986 ("\u1160\u0308\uAC01", &["\u1160\u0308", "\uAC01"]), ("\u1160\U0001F1E6", &["\u1160",
1987 "\U0001F1E6"]), ("\u1160\u0308\U0001F1E6", &["\u1160\u0308", "\U0001F1E6"]),
1988 ("\u1160\u0378", &["\u1160", "\u0378"]), ("\u1160\u0308\u0378", &["\u1160\u0308",
1989 "\u0378"]), ("\u11A8\u0020", &["\u11A8", "\u0020"]), ("\u11A8\u0308\u0020",
1990 &["\u11A8\u0308", "\u0020"]), ("\u11A8\u000D", &["\u11A8", "\u000D"]),
1991 ("\u11A8\u0308\u000D", &["\u11A8\u0308", "\u000D"]), ("\u11A8\u000A", &["\u11A8",
1992 "\u000A"]), ("\u11A8\u0308\u000A", &["\u11A8\u0308", "\u000A"]), ("\u11A8\u0001",
1993 &["\u11A8", "\u0001"]), ("\u11A8\u0308\u0001", &["\u11A8\u0308", "\u0001"]),
1994 ("\u11A8\u0300", &["\u11A8\u0300"]), ("\u11A8\u0308\u0300", &["\u11A8\u0308\u0300"]),
1995 ("\u11A8\u1100", &["\u11A8", "\u1100"]), ("\u11A8\u0308\u1100", &["\u11A8\u0308",
1996 "\u1100"]), ("\u11A8\u1160", &["\u11A8", "\u1160"]), ("\u11A8\u0308\u1160",
1997 &["\u11A8\u0308", "\u1160"]), ("\u11A8\u11A8", &["\u11A8\u11A8"]),
1998 ("\u11A8\u0308\u11A8", &["\u11A8\u0308", "\u11A8"]), ("\u11A8\uAC00", &["\u11A8",
1999 "\uAC00"]), ("\u11A8\u0308\uAC00", &["\u11A8\u0308", "\uAC00"]), ("\u11A8\uAC01",
2000 &["\u11A8", "\uAC01"]), ("\u11A8\u0308\uAC01", &["\u11A8\u0308", "\uAC01"]),
2001 ("\u11A8\U0001F1E6", &["\u11A8", "\U0001F1E6"]), ("\u11A8\u0308\U0001F1E6",
2002 &["\u11A8\u0308", "\U0001F1E6"]), ("\u11A8\u0378", &["\u11A8", "\u0378"]),
2003 ("\u11A8\u0308\u0378", &["\u11A8\u0308", "\u0378"]), ("\uAC00\u0020", &["\uAC00",
2004 "\u0020"]), ("\uAC00\u0308\u0020", &["\uAC00\u0308", "\u0020"]), ("\uAC00\u000D",
2005 &["\uAC00", "\u000D"]), ("\uAC00\u0308\u000D", &["\uAC00\u0308", "\u000D"]),
2006 ("\uAC00\u000A", &["\uAC00", "\u000A"]), ("\uAC00\u0308\u000A", &["\uAC00\u0308",
2007 "\u000A"]), ("\uAC00\u0001", &["\uAC00", "\u0001"]), ("\uAC00\u0308\u0001",
2008 &["\uAC00\u0308", "\u0001"]), ("\uAC00\u0300", &["\uAC00\u0300"]),
2009 ("\uAC00\u0308\u0300", &["\uAC00\u0308\u0300"]), ("\uAC00\u1100", &["\uAC00",
2010 "\u1100"]), ("\uAC00\u0308\u1100", &["\uAC00\u0308", "\u1100"]), ("\uAC00\u1160",
2011 &["\uAC00\u1160"]), ("\uAC00\u0308\u1160", &["\uAC00\u0308", "\u1160"]),
2012 ("\uAC00\u11A8", &["\uAC00\u11A8"]), ("\uAC00\u0308\u11A8", &["\uAC00\u0308",
2013 "\u11A8"]), ("\uAC00\uAC00", &["\uAC00", "\uAC00"]), ("\uAC00\u0308\uAC00",
2014 &["\uAC00\u0308", "\uAC00"]), ("\uAC00\uAC01", &["\uAC00", "\uAC01"]),
2015 ("\uAC00\u0308\uAC01", &["\uAC00\u0308", "\uAC01"]), ("\uAC00\U0001F1E6", &["\uAC00",
2016 "\U0001F1E6"]), ("\uAC00\u0308\U0001F1E6", &["\uAC00\u0308", "\U0001F1E6"]),
2017 ("\uAC00\u0378", &["\uAC00", "\u0378"]), ("\uAC00\u0308\u0378", &["\uAC00\u0308",
2018 "\u0378"]), ("\uAC01\u0020", &["\uAC01", "\u0020"]), ("\uAC01\u0308\u0020",
2019 &["\uAC01\u0308", "\u0020"]), ("\uAC01\u000D", &["\uAC01", "\u000D"]),
2020 ("\uAC01\u0308\u000D", &["\uAC01\u0308", "\u000D"]), ("\uAC01\u000A", &["\uAC01",
2021 "\u000A"]), ("\uAC01\u0308\u000A", &["\uAC01\u0308", "\u000A"]), ("\uAC01\u0001",
2022 &["\uAC01", "\u0001"]), ("\uAC01\u0308\u0001", &["\uAC01\u0308", "\u0001"]),
2023 ("\uAC01\u0300", &["\uAC01\u0300"]), ("\uAC01\u0308\u0300", &["\uAC01\u0308\u0300"]),
2024 ("\uAC01\u1100", &["\uAC01", "\u1100"]), ("\uAC01\u0308\u1100", &["\uAC01\u0308",
2025 "\u1100"]), ("\uAC01\u1160", &["\uAC01", "\u1160"]), ("\uAC01\u0308\u1160",
2026 &["\uAC01\u0308", "\u1160"]), ("\uAC01\u11A8", &["\uAC01\u11A8"]),
2027 ("\uAC01\u0308\u11A8", &["\uAC01\u0308", "\u11A8"]), ("\uAC01\uAC00", &["\uAC01",
2028 "\uAC00"]), ("\uAC01\u0308\uAC00", &["\uAC01\u0308", "\uAC00"]), ("\uAC01\uAC01",
2029 &["\uAC01", "\uAC01"]), ("\uAC01\u0308\uAC01", &["\uAC01\u0308", "\uAC01"]),
2030 ("\uAC01\U0001F1E6", &["\uAC01", "\U0001F1E6"]), ("\uAC01\u0308\U0001F1E6",
2031 &["\uAC01\u0308", "\U0001F1E6"]), ("\uAC01\u0378", &["\uAC01", "\u0378"]),
2032 ("\uAC01\u0308\u0378", &["\uAC01\u0308", "\u0378"]), ("\U0001F1E6\u0020",
2033 &["\U0001F1E6", "\u0020"]), ("\U0001F1E6\u0308\u0020", &["\U0001F1E6\u0308",
2034 "\u0020"]), ("\U0001F1E6\u000D", &["\U0001F1E6", "\u000D"]),
2035 ("\U0001F1E6\u0308\u000D", &["\U0001F1E6\u0308", "\u000D"]), ("\U0001F1E6\u000A",
2036 &["\U0001F1E6", "\u000A"]), ("\U0001F1E6\u0308\u000A", &["\U0001F1E6\u0308",
2037 "\u000A"]), ("\U0001F1E6\u0001", &["\U0001F1E6", "\u0001"]),
2038 ("\U0001F1E6\u0308\u0001", &["\U0001F1E6\u0308", "\u0001"]), ("\U0001F1E6\u0300",
2039 &["\U0001F1E6\u0300"]), ("\U0001F1E6\u0308\u0300", &["\U0001F1E6\u0308\u0300"]),
2040 ("\U0001F1E6\u1100", &["\U0001F1E6", "\u1100"]), ("\U0001F1E6\u0308\u1100",
2041 &["\U0001F1E6\u0308", "\u1100"]), ("\U0001F1E6\u1160", &["\U0001F1E6", "\u1160"]),
2042 ("\U0001F1E6\u0308\u1160", &["\U0001F1E6\u0308", "\u1160"]), ("\U0001F1E6\u11A8",
2043 &["\U0001F1E6", "\u11A8"]), ("\U0001F1E6\u0308\u11A8", &["\U0001F1E6\u0308",
2044 "\u11A8"]), ("\U0001F1E6\uAC00", &["\U0001F1E6", "\uAC00"]),
2045 ("\U0001F1E6\u0308\uAC00", &["\U0001F1E6\u0308", "\uAC00"]), ("\U0001F1E6\uAC01",
2046 &["\U0001F1E6", "\uAC01"]), ("\U0001F1E6\u0308\uAC01", &["\U0001F1E6\u0308",
2047 "\uAC01"]), ("\U0001F1E6\U0001F1E6", &["\U0001F1E6\U0001F1E6"]),
2048 ("\U0001F1E6\u0308\U0001F1E6", &["\U0001F1E6\u0308", "\U0001F1E6"]),
2049 ("\U0001F1E6\u0378", &["\U0001F1E6", "\u0378"]), ("\U0001F1E6\u0308\u0378",
2050 &["\U0001F1E6\u0308", "\u0378"]), ("\u0378\u0020", &["\u0378", "\u0020"]),
2051 ("\u0378\u0308\u0020", &["\u0378\u0308", "\u0020"]), ("\u0378\u000D", &["\u0378",
2052 "\u000D"]), ("\u0378\u0308\u000D", &["\u0378\u0308", "\u000D"]), ("\u0378\u000A",
2053 &["\u0378", "\u000A"]), ("\u0378\u0308\u000A", &["\u0378\u0308", "\u000A"]),
2054 ("\u0378\u0001", &["\u0378", "\u0001"]), ("\u0378\u0308\u0001", &["\u0378\u0308",
2055 "\u0001"]), ("\u0378\u0300", &["\u0378\u0300"]), ("\u0378\u0308\u0300",
2056 &["\u0378\u0308\u0300"]), ("\u0378\u1100", &["\u0378", "\u1100"]),
2057 ("\u0378\u0308\u1100", &["\u0378\u0308", "\u1100"]), ("\u0378\u1160", &["\u0378",
2058 "\u1160"]), ("\u0378\u0308\u1160", &["\u0378\u0308", "\u1160"]), ("\u0378\u11A8",
2059 &["\u0378", "\u11A8"]), ("\u0378\u0308\u11A8", &["\u0378\u0308", "\u11A8"]),
2060 ("\u0378\uAC00", &["\u0378", "\uAC00"]), ("\u0378\u0308\uAC00", &["\u0378\u0308",
2061 "\uAC00"]), ("\u0378\uAC01", &["\u0378", "\uAC01"]), ("\u0378\u0308\uAC01",
2062 &["\u0378\u0308", "\uAC01"]), ("\u0378\U0001F1E6", &["\u0378", "\U0001F1E6"]),
2063 ("\u0378\u0308\U0001F1E6", &["\u0378\u0308", "\U0001F1E6"]), ("\u0378\u0378",
2064 &["\u0378", "\u0378"]), ("\u0378\u0308\u0378", &["\u0378\u0308", "\u0378"]),
2065 ("\u0061\U0001F1E6\u0062", &["\u0061", "\U0001F1E6", "\u0062"]),
2066 ("\U0001F1F7\U0001F1FA", &["\U0001F1F7\U0001F1FA"]),
2067 ("\U0001F1F7\U0001F1FA\U0001F1F8", &["\U0001F1F7\U0001F1FA\U0001F1F8"]),
2068 ("\U0001F1F7\U0001F1FA\U0001F1F8\U0001F1EA",
2069 &["\U0001F1F7\U0001F1FA\U0001F1F8\U0001F1EA"]),
2070 ("\U0001F1F7\U0001F1FA\u200B\U0001F1F8\U0001F1EA", &["\U0001F1F7\U0001F1FA", "\u200B",
2071 "\U0001F1F8\U0001F1EA"]), ("\U0001F1E6\U0001F1E7\U0001F1E8",
2072 &["\U0001F1E6\U0001F1E7\U0001F1E8"]), ("\U0001F1E6\u200D\U0001F1E7\U0001F1E8",
2073 &["\U0001F1E6\u200D", "\U0001F1E7\U0001F1E8"]),
2074 ("\U0001F1E6\U0001F1E7\u200D\U0001F1E8", &["\U0001F1E6\U0001F1E7\u200D",
2075 "\U0001F1E8"]), ("\u0020\u200D\u0646", &["\u0020\u200D", "\u0646"]),
2076 ("\u0646\u200D\u0020", &["\u0646\u200D", "\u0020"]),
2080 ("\u0020\u0903", &["\u0020\u0903"], &["\u0020", "\u0903"]), ("\u0020\u0308\u0903",
2081 &["\u0020\u0308\u0903"], &["\u0020\u0308", "\u0903"]), ("\u000D\u0308\u0903",
2082 &["\u000D", "\u0308\u0903"], &["\u000D", "\u0308", "\u0903"]), ("\u000A\u0308\u0903",
2083 &["\u000A", "\u0308\u0903"], &["\u000A", "\u0308", "\u0903"]), ("\u0001\u0308\u0903",
2084 &["\u0001", "\u0308\u0903"], &["\u0001", "\u0308", "\u0903"]), ("\u0300\u0903",
2085 &["\u0300\u0903"], &["\u0300", "\u0903"]), ("\u0300\u0308\u0903",
2086 &["\u0300\u0308\u0903"], &["\u0300\u0308", "\u0903"]), ("\u0903\u0903",
2087 &["\u0903\u0903"], &["\u0903", "\u0903"]), ("\u0903\u0308\u0903",
2088 &["\u0903\u0308\u0903"], &["\u0903\u0308", "\u0903"]), ("\u1100\u0903",
2089 &["\u1100\u0903"], &["\u1100", "\u0903"]), ("\u1100\u0308\u0903",
2090 &["\u1100\u0308\u0903"], &["\u1100\u0308", "\u0903"]), ("\u1160\u0903",
2091 &["\u1160\u0903"], &["\u1160", "\u0903"]), ("\u1160\u0308\u0903",
2092 &["\u1160\u0308\u0903"], &["\u1160\u0308", "\u0903"]), ("\u11A8\u0903",
2093 &["\u11A8\u0903"], &["\u11A8", "\u0903"]), ("\u11A8\u0308\u0903",
2094 &["\u11A8\u0308\u0903"], &["\u11A8\u0308", "\u0903"]), ("\uAC00\u0903",
2095 &["\uAC00\u0903"], &["\uAC00", "\u0903"]), ("\uAC00\u0308\u0903",
2096 &["\uAC00\u0308\u0903"], &["\uAC00\u0308", "\u0903"]), ("\uAC01\u0903",
2097 &["\uAC01\u0903"], &["\uAC01", "\u0903"]), ("\uAC01\u0308\u0903",
2098 &["\uAC01\u0308\u0903"], &["\uAC01\u0308", "\u0903"]), ("\U0001F1E6\u0903",
2099 &["\U0001F1E6\u0903"], &["\U0001F1E6", "\u0903"]), ("\U0001F1E6\u0308\u0903",
2100 &["\U0001F1E6\u0308\u0903"], &["\U0001F1E6\u0308", "\u0903"]), ("\u0378\u0903",
2101 &["\u0378\u0903"], &["\u0378", "\u0903"]), ("\u0378\u0308\u0903",
2102 &["\u0378\u0308\u0903"], &["\u0378\u0308", "\u0903"]),
2105 for &(s, g) in test_same.iter() {
2106 // test forward iterator
2107 assert!(order::equals(s.graphemes(true), g.iter().map(|&x| x)));
2108 assert!(order::equals(s.graphemes(false), g.iter().map(|&x| x)));
2110 // test reverse iterator
2111 assert!(order::equals(s.graphemes(true).rev(), g.iter().rev().map(|&x| x)));
2112 assert!(order::equals(s.graphemes(false).rev(), g.iter().rev().map(|&x| x)));
2115 for &(s, gt, gf) in test_diff.iter() {
2116 // test forward iterator
2117 assert!(order::equals(s.graphemes(true), gt.iter().map(|&x| x)));
2118 assert!(order::equals(s.graphemes(false), gf.iter().map(|&x| x)));
2120 // test reverse iterator
2121 assert!(order::equals(s.graphemes(true).rev(), gt.iter().rev().map(|&x| x)));
2122 assert!(order::equals(s.graphemes(false).rev(), gf.iter().rev().map(|&x| x)));
2125 // test the indices iterators
2126 let s = "a̐éö̲\r\n";
2127 let gr_inds = s.grapheme_indices(true).collect::<Vec<(uint, &str)>>();
2128 assert_eq!(gr_inds.as_slice(), &[(0u, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")]);
2129 let gr_inds = s.grapheme_indices(true).rev().collect::<Vec<(uint, &str)>>();
2130 assert_eq!(gr_inds.as_slice(), &[(11, "\r\n"), (6, "ö̲"), (3, "é"), (0u, "a̐")]);
2131 let mut gr_inds = s.grapheme_indices(true);
2132 let e1 = gr_inds.size_hint();
2133 assert_eq!(e1, (1, Some(13)));
2134 let c = gr_inds.count();
2136 let e2 = gr_inds.size_hint();
2137 assert_eq!(e2, (0, Some(0)));
2139 // make sure the reverse iterator does the right thing with "\n" at beginning of string
2141 let gr = s.graphemes(true).rev().collect::<Vec<&str>>();
2142 assert_eq!(gr.as_slice(), &["\r", "\r\n", "\n"]);
2146 fn test_split_strator() {
2147 fn t(s: &str, sep: &str, u: &[&str]) {
2148 let v: Vec<&str> = s.split_str(sep).collect();
2149 assert_eq!(v.as_slice(), u.as_slice());
2151 t("--1233345--", "12345", ["--1233345--"]);
2152 t("abc::hello::there", "::", ["abc", "hello", "there"]);
2153 t("::hello::there", "::", ["", "hello", "there"]);
2154 t("hello::there::", "::", ["hello", "there", ""]);
2155 t("::hello::there::", "::", ["", "hello", "there", ""]);
2156 t("ประเทศไทย中华Việt Nam", "中华", ["ประเทศไทย", "Việt Nam"]);
2157 t("zzXXXzzYYYzz", "zz", ["", "XXX", "YYY", ""]);
2158 t("zzXXXzYYYz", "XXX", ["zz", "zYYYz"]);
2159 t(".XXX.YYY.", ".", ["", "XXX", "YYY", ""]);
2161 t("zz", "zz", ["",""]);
2162 t("ok", "z", ["ok"]);
2163 t("zzz", "zz", ["","z"]);
2164 t("zzzzz", "zz", ["","","z"]);
2168 fn test_str_default() {
2169 use std::default::Default;
2170 fn t<S: Default + Str>() {
2171 let s: S = Default::default();
2172 assert_eq!(s.as_slice(), "");
2180 fn test_str_container() {
2181 fn sum_len<S: Collection>(v: &[S]) -> uint {
2182 v.iter().map(|x| x.len()).sum()
2185 let s = String::from_str("01234");
2186 assert_eq!(5, sum_len(["012", "", "34"]));
2187 assert_eq!(5, sum_len([String::from_str("01"), String::from_str("2"),
2188 String::from_str("34"), String::from_str("")]));
2189 assert_eq!(5, sum_len([s.as_slice()]));
2193 fn test_str_from_utf8() {
2195 assert_eq!(from_utf8(xs), Some("hello"));
2197 let xs = "ศไทย中华Việt Nam".as_bytes();
2198 assert_eq!(from_utf8(xs), Some("ศไทย中华Việt Nam"));
2200 let xs = b"hello\xFF";
2201 assert_eq!(from_utf8(xs), None);
2205 fn test_maybe_owned_traits() {
2206 let s = Slice("abcde");
2207 assert_eq!(s.len(), 5);
2208 assert_eq!(s.as_slice(), "abcde");
2209 assert_eq!(String::from_str(s.as_slice()).as_slice(), "abcde");
2210 assert_eq!(format!("{}", s).as_slice(), "abcde");
2211 assert!(s.lt(&Owned(String::from_str("bcdef"))));
2212 assert_eq!(Slice(""), Default::default());
2214 let o = Owned(String::from_str("abcde"));
2215 assert_eq!(o.len(), 5);
2216 assert_eq!(o.as_slice(), "abcde");
2217 assert_eq!(String::from_str(o.as_slice()).as_slice(), "abcde");
2218 assert_eq!(format!("{}", o).as_slice(), "abcde");
2219 assert!(o.lt(&Slice("bcdef")));
2220 assert_eq!(Owned(String::from_str("")), Default::default());
2222 assert!(s.cmp(&o) == Equal);
2223 assert!(s.equiv(&o));
2225 assert!(o.cmp(&s) == Equal);
2226 assert!(o.equiv(&s));
2230 fn test_maybe_owned_methods() {
2231 let s = Slice("abcde");
2232 assert!(s.is_slice());
2233 assert!(!s.is_owned());
2235 let o = Owned(String::from_str("abcde"));
2236 assert!(!o.is_slice());
2237 assert!(o.is_owned());
2241 fn test_maybe_owned_clone() {
2242 assert_eq!(Owned(String::from_str("abcde")), Slice("abcde").clone());
2243 assert_eq!(Owned(String::from_str("abcde")), Owned(String::from_str("abcde")).clone());
2244 assert_eq!(Slice("abcde"), Slice("abcde").clone());
2245 assert_eq!(Slice("abcde"), Owned(String::from_str("abcde")).clone());
2249 fn test_maybe_owned_into_string() {
2250 assert_eq!(Slice("abcde").into_string(), String::from_str("abcde"));
2251 assert_eq!(Owned(String::from_str("abcde")).into_string(),
2252 String::from_str("abcde"));
2256 fn test_into_maybe_owned() {
2257 assert_eq!("abcde".into_maybe_owned(), Slice("abcde"));
2258 assert_eq!((String::from_str("abcde")).into_maybe_owned(), Slice("abcde"));
2259 assert_eq!("abcde".into_maybe_owned(), Owned(String::from_str("abcde")));
2260 assert_eq!((String::from_str("abcde")).into_maybe_owned(),
2261 Owned(String::from_str("abcde")));
2268 use test::black_box;
2270 use std::option::{None, Some};
2271 use std::iter::{Iterator, DoubleEndedIterator};
2272 use std::collections::Collection;
2275 fn char_iterator(b: &mut Bencher) {
2276 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2278 b.iter(|| s.chars().count());
2282 fn char_iterator_for(b: &mut Bencher) {
2283 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2286 for ch in s.chars() { black_box(ch) }
2291 fn char_iterator_ascii(b: &mut Bencher) {
2292 let s = "Mary had a little lamb, Little lamb
2293 Mary had a little lamb, Little lamb
2294 Mary had a little lamb, Little lamb
2295 Mary had a little lamb, Little lamb
2296 Mary had a little lamb, Little lamb
2297 Mary had a little lamb, Little lamb";
2299 b.iter(|| s.chars().count());
2303 fn char_iterator_rev(b: &mut Bencher) {
2304 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2306 b.iter(|| s.chars().rev().count());
2310 fn char_iterator_rev_for(b: &mut Bencher) {
2311 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2314 for ch in s.chars().rev() { black_box(ch) }
2319 fn char_indicesator(b: &mut Bencher) {
2320 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2321 let len = s.char_len();
2323 b.iter(|| assert_eq!(s.char_indices().count(), len));
2327 fn char_indicesator_rev(b: &mut Bencher) {
2328 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2329 let len = s.char_len();
2331 b.iter(|| assert_eq!(s.char_indices().rev().count(), len));
2335 fn split_unicode_ascii(b: &mut Bencher) {
2336 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2338 b.iter(|| assert_eq!(s.split('V').count(), 3));
2342 fn split_unicode_not_ascii(b: &mut Bencher) {
2343 struct NotAscii(char);
2344 impl CharEq for NotAscii {
2345 fn matches(&mut self, c: char) -> bool {
2346 let NotAscii(cc) = *self;
2349 fn only_ascii(&self) -> bool { false }
2351 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2353 b.iter(|| assert_eq!(s.split(NotAscii('V')).count(), 3));
2358 fn split_ascii(b: &mut Bencher) {
2359 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2360 let len = s.split(' ').count();
2362 b.iter(|| assert_eq!(s.split(' ').count(), len));
2366 fn split_not_ascii(b: &mut Bencher) {
2367 struct NotAscii(char);
2368 impl CharEq for NotAscii {
2370 fn matches(&mut self, c: char) -> bool {
2371 let NotAscii(cc) = *self;
2374 fn only_ascii(&self) -> bool { false }
2376 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2377 let len = s.split(' ').count();
2379 b.iter(|| assert_eq!(s.split(NotAscii(' ')).count(), len));
2383 fn split_extern_fn(b: &mut Bencher) {
2384 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2385 let len = s.split(' ').count();
2386 fn pred(c: char) -> bool { c == ' ' }
2388 b.iter(|| assert_eq!(s.split(pred).count(), len));
2392 fn split_closure(b: &mut Bencher) {
2393 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2394 let len = s.split(' ').count();
2396 b.iter(|| assert_eq!(s.split(|c: char| c == ' ').count(), len));
2400 fn split_slice(b: &mut Bencher) {
2401 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2402 let len = s.split(' ').count();
2404 b.iter(|| assert_eq!(s.split(&[' ']).count(), len));
2408 fn is_utf8_100_ascii(b: &mut Bencher) {
2410 let s = b"Hello there, the quick brown fox jumped over the lazy dog! \
2411 Lorem ipsum dolor sit amet, consectetur. ";
2413 assert_eq!(100, s.len());
2420 fn is_utf8_100_multibyte(b: &mut Bencher) {
2421 let s = "𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰".as_bytes();
2422 assert_eq!(100, s.len());
2429 fn bench_connect(b: &mut Bencher) {
2430 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2432 let v = [s, s, s, s, s, s, s, s, s, s];
2434 assert_eq!(v.connect(sep).len(), s.len() * 10 + sep.len() * 9);
2439 fn bench_contains_short_short(b: &mut Bencher) {
2440 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2444 assert!(haystack.contains(needle));
2449 fn bench_contains_short_long(b: &mut Bencher) {
2451 Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \
2452 ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \
2453 eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \
2454 sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \
2455 tempus vel, gravida nec quam.
2457 In est dui, tincidunt sed tempus interdum, adipiscing laoreet ante. Etiam tempor, tellus quis \
2458 sagittis interdum, nulla purus mattis sem, quis auctor erat odio ac tellus. In nec nunc sit amet \
2459 diam volutpat molestie at sed ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan \
2460 lorem ac dignissim placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat. In vel \
2461 eleifend felis. Sed suscipit nulla lorem, sed mollis est sollicitudin et. Nam fermentum egestas \
2462 interdum. Curabitur ut nisi justo.
2464 Sed sollicitudin ipsum tellus, ut condimentum leo eleifend nec. Cras ut velit ante. Phasellus nec \
2465 mollis odio. Mauris molestie erat in arcu mattis, at aliquet dolor vehicula. Quisque malesuada \
2466 lectus sit amet nisi pretium, a condimentum ipsum porta. Morbi at dapibus diam. Praesent egestas \
2467 est sed risus elementum, eu rutrum metus ultrices. Etiam fermentum consectetur magna, id rutrum \
2468 felis accumsan a. Aliquam ut pellentesque libero. Sed mi nulla, lobortis eu tortor id, suscipit \
2469 ultricies neque. Morbi iaculis sit amet risus at iaculis. Praesent eget ligula quis turpis \
2470 feugiat suscipit vel non arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. \
2471 Aliquam sit amet placerat lorem.
2473 Cras a lacus vel ante posuere elementum. Nunc est leo, bibendum ut facilisis vel, bibendum at \
2474 mauris. Nullam adipiscing diam vel odio ornare, luctus adipiscing mi luctus. Nulla facilisi. \
2475 Mauris adipiscing bibendum neque, quis adipiscing lectus tempus et. Sed feugiat erat et nisl \
2476 lobortis pharetra. Donec vitae erat enim. Nullam sit amet felis et quam lacinia tincidunt. Aliquam \
2477 suscipit dapibus urna. Sed volutpat urna in magna pulvinar volutpat. Phasellus nec tellus ac diam \
2480 Nam lectus enim, dapibus non nisi tempor, consectetur convallis massa. Maecenas eleifend dictum \
2481 feugiat. Etiam quis mauris vel risus luctus mattis a a nunc. Nullam orci quam, imperdiet id \
2482 vehicula in, porttitor ut nibh. Duis sagittis adipiscing nisl vitae congue. Donec mollis risus eu \
2483 leo suscipit, varius porttitor nulla porta. Pellentesque ut sem nec nisi euismod vehicula. Nulla \
2484 malesuada sollicitudin quam eu fermentum.";
2485 let needle = "english";
2488 assert!(!haystack.contains(needle));
2493 fn bench_contains_bad_naive(b: &mut Bencher) {
2494 let haystack = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
2495 let needle = "aaaaaaaab";
2498 assert!(!haystack.contains(needle));
2503 fn bench_contains_equal(b: &mut Bencher) {
2504 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2505 let needle = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2508 assert!(haystack.contains(needle));