1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
13 Unicode string manipulation (`str` type)
17 Rust's string type is one of the core primitive types of the language. While
18 represented by the name `str`, the name `str` is not actually a valid type in
19 Rust. Each string must also be decorated with a pointer. `String` is used
20 for an owned string, so there is only one commonly-used `str` type in Rust:
23 `&str` is the borrowed string type. This type of string can only be created
24 from other strings, unless it is a static string (see below). As the word
25 "borrowed" implies, this type of string is owned elsewhere, and this string
26 cannot be moved out of.
28 As an example, here's some code that uses a string.
32 let borrowed_string = "This string is borrowed with the 'static lifetime";
36 From the example above, you can see that Rust's string literals have the
37 `'static` lifetime. This is akin to C's concept of a static string.
39 String literals are allocated statically in the rodata of the
40 executable/library. The string then has the type `&'static str` meaning that
41 the string is valid for the `'static` lifetime, otherwise known as the
42 lifetime of the entire program. As can be inferred from the type, these static
43 strings are not mutable.
47 Many languages have immutable strings by default, and Rust has a particular
48 flavor on this idea. As with the rest of Rust types, strings are immutable by
49 default. If a string is declared as `mut`, however, it may be mutated. This
50 works the same way as the rest of Rust's type system in the sense that if
51 there's a mutable reference to a string, there may only be one mutable reference
52 to that string. With these guarantees, strings can easily transition between
53 being mutable/immutable with the same benefits of having mutable strings in
58 Rust's string type, `str`, is a sequence of unicode codepoints encoded as a
59 stream of UTF-8 bytes. All safely-created strings are guaranteed to be validly
60 encoded UTF-8 sequences. Additionally, strings are not null-terminated
61 and can contain null codepoints.
63 The actual representation of strings have direct mappings to vectors: `&str`
64 is the same as `&[u8]`.
68 #![doc(primitive = "str")]
73 use core::default::Default;
76 use core::iter::AdditiveIterator;
84 pub use core::str::{from_utf8, CharEq, Chars, CharOffsets};
85 pub use core::str::{Bytes, CharSplits};
86 pub use core::str::{CharSplitsN, Words, AnyLines, MatchIndices, StrSplits};
87 pub use core::str::{eq_slice, is_utf8, is_utf16, Utf16Items};
88 pub use core::str::{Utf16Item, ScalarValue, LoneSurrogate, utf16_items};
89 pub use core::str::{truncate_utf16_at_nul, utf8_char_width, CharRange};
90 pub use core::str::{Str, StrSlice};
93 Section: Creating a string
96 /// Consumes a vector of bytes to create a new utf-8 string.
98 /// Returns `Err` with the original vector if the vector contains invalid
105 /// let hello_vec = vec![104, 101, 108, 108, 111];
106 /// let string = str::from_utf8_owned(hello_vec);
107 /// assert_eq!(string, Ok("hello".to_string()));
109 pub fn from_utf8_owned(vv: Vec<u8>) -> Result<String, Vec<u8>> {
110 String::from_utf8(vv)
113 /// Convert a byte to a UTF-8 string
117 /// Fails if invalid UTF-8
123 /// let string = str::from_byte(104);
124 /// assert_eq!(string.as_slice(), "h");
126 pub fn from_byte(b: u8) -> String {
128 String::from_char(1, b as char)
131 /// Convert a char to a string
137 /// let string = str::from_char('b');
138 /// assert_eq!(string.as_slice(), "b");
140 pub fn from_char(ch: char) -> String {
141 let mut buf = String::new();
146 /// Convert a vector of chars to a string
152 /// let chars = ['h', 'e', 'l', 'l', 'o'];
153 /// let string = str::from_chars(chars);
154 /// assert_eq!(string.as_slice(), "hello");
156 pub fn from_chars(chs: &[char]) -> String {
157 chs.iter().map(|c| *c).collect()
160 /// Methods for vectors of strings
161 pub trait StrVector {
162 /// Concatenate a vector of strings.
167 /// let first = "Restaurant at the End of the".to_string();
168 /// let second = " Universe".to_string();
169 /// let string_vec = vec![first, second];
170 /// assert_eq!(string_vec.concat(), "Restaurant at the End of the Universe".to_string());
172 fn concat(&self) -> String;
174 /// Concatenate a vector of strings, placing a given separator between each.
179 /// let first = "Roast".to_string();
180 /// let second = "Sirloin Steak".to_string();
181 /// let string_vec = vec![first, second];
182 /// assert_eq!(string_vec.connect(", "), "Roast, Sirloin Steak".to_string());
184 fn connect(&self, sep: &str) -> String;
187 impl<'a, S: Str> StrVector for &'a [S] {
188 fn concat(&self) -> String {
190 return String::new();
193 // `len` calculation may overflow but push_str will check boundaries
194 let len = self.iter().map(|s| s.as_slice().len()).sum();
196 let mut result = String::with_capacity(len);
198 for s in self.iter() {
199 result.push_str(s.as_slice())
205 fn connect(&self, sep: &str) -> String {
207 return String::new();
212 return self.concat();
215 // this is wrong without the guarantee that `self` is non-empty
216 // `len` calculation may overflow but push_str but will check boundaries
217 let len = sep.len() * (self.len() - 1)
218 + self.iter().map(|s| s.as_slice().len()).sum();
219 let mut result = String::with_capacity(len);
220 let mut first = true;
222 for s in self.iter() {
226 result.push_str(sep);
228 result.push_str(s.as_slice());
234 impl<'a, S: Str> StrVector for Vec<S> {
236 fn concat(&self) -> String {
237 self.as_slice().concat()
241 fn connect(&self, sep: &str) -> String {
242 self.as_slice().connect(sep)
250 // Helper functions used for Unicode normalization
251 fn canonical_sort(comb: &mut [(char, u8)]) {
252 let len = comb.len();
253 for i in range(0, len) {
254 let mut swapped = false;
255 for j in range(1, len-i) {
256 let class_a = *comb[j-1].ref1();
257 let class_b = *comb[j].ref1();
258 if class_a != 0 && class_b != 0 && class_a > class_b {
263 if !swapped { break; }
268 enum DecompositionType {
273 /// External iterator for a string's decomposition's characters.
274 /// Use with the `std::iter` module.
276 pub struct Decompositions<'a> {
277 kind: DecompositionType,
279 buffer: Vec<(char, u8)>,
283 impl<'a> Iterator<char> for Decompositions<'a> {
285 fn next(&mut self) -> Option<char> {
286 use unicode::normalization::canonical_combining_class;
288 match self.buffer.as_slice().head() {
294 Some(&(c, _)) if self.sorted => {
298 _ => self.sorted = false
301 let decomposer = match self.kind {
302 Canonical => char::decompose_canonical,
303 Compatible => char::decompose_compatible
307 for ch in self.iter {
308 let buffer = &mut self.buffer;
309 let sorted = &mut self.sorted;
311 let class = canonical_combining_class(d);
312 if class == 0 && !*sorted {
313 canonical_sort(buffer.as_mut_slice());
316 buffer.push((d, class));
323 canonical_sort(self.buffer.as_mut_slice());
327 match self.buffer.shift() {
332 Some((c, _)) => Some(c),
337 fn size_hint(&self) -> (uint, Option<uint>) {
338 let (lower, _) = self.iter.size_hint();
343 /// Replace all occurrences of one string with another
347 /// * s - The string containing substrings to replace
348 /// * from - The string to replace
349 /// * to - The replacement string
353 /// The original string with all occurrences of `from` replaced with `to`
354 pub fn replace(s: &str, from: &str, to: &str) -> String {
355 let mut result = String::new();
356 let mut last_end = 0;
357 for (start, end) in s.match_indices(from) {
358 result.push_str(unsafe{raw::slice_bytes(s, last_end, start)});
362 result.push_str(unsafe{raw::slice_bytes(s, last_end, s.len())});
370 /// Decode a UTF-16 encoded vector `v` into a string, returning `None`
371 /// if `v` contains any invalid data.
379 /// let mut v = [0xD834, 0xDD1E, 0x006d, 0x0075,
380 /// 0x0073, 0x0069, 0x0063];
381 /// assert_eq!(str::from_utf16(v), Some("𝄞music".to_string()));
383 /// // 𝄞mu<invalid>ic
385 /// assert_eq!(str::from_utf16(v), None);
387 pub fn from_utf16(v: &[u16]) -> Option<String> {
388 let mut s = String::with_capacity(v.len() / 2);
389 for c in utf16_items(v) {
391 ScalarValue(c) => s.push_char(c),
392 LoneSurrogate(_) => return None
398 /// Decode a UTF-16 encoded vector `v` into a string, replacing
399 /// invalid data with the replacement character (U+FFFD).
405 /// // 𝄞mus<invalid>ic<invalid>
406 /// let v = [0xD834, 0xDD1E, 0x006d, 0x0075,
407 /// 0x0073, 0xDD1E, 0x0069, 0x0063,
410 /// assert_eq!(str::from_utf16_lossy(v),
411 /// "𝄞mus\uFFFDic\uFFFD".to_string());
413 pub fn from_utf16_lossy(v: &[u16]) -> String {
414 utf16_items(v).map(|c| c.to_char_lossy()).collect()
417 // Return the initial codepoint accumulator for the first byte.
418 // The first byte is special, only want bottom 5 bits for width 2, 4 bits
419 // for width 3, and 3 bits for width 4
420 macro_rules! utf8_first_byte(
421 ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as u32)
424 // return the value of $ch updated with continuation byte $byte
425 macro_rules! utf8_acc_cont_byte(
426 ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as u32)
429 static TAG_CONT_U8: u8 = 128u8;
431 /// Converts a vector of bytes to a new utf-8 string.
432 /// Any invalid utf-8 sequences are replaced with U+FFFD REPLACEMENT CHARACTER.
437 /// let input = b"Hello \xF0\x90\x80World";
438 /// let output = std::str::from_utf8_lossy(input);
439 /// assert_eq!(output.as_slice(), "Hello \uFFFDWorld");
441 pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> MaybeOwned<'a> {
443 return Slice(unsafe { mem::transmute(v) })
446 static REPLACEMENT: &'static [u8] = b"\xEF\xBF\xBD"; // U+FFFD in UTF-8
449 fn unsafe_get(xs: &[u8], i: uint) -> u8 {
450 unsafe { *xs.unsafe_ref(i) }
452 fn safe_get(xs: &[u8], i: uint, total: uint) -> u8 {
460 let mut res = String::with_capacity(total);
464 res.push_bytes(v.slice_to(i))
468 // subseqidx is the index of the first byte of the subsequence we're looking at.
469 // It's used to copy a bunch of contiguous good codepoints at once instead of copying
471 let mut subseqidx = 0;
475 let byte = unsafe_get(v, i);
478 macro_rules! error(() => ({
481 res.push_bytes(v.slice(subseqidx, i_));
484 res.push_bytes(REPLACEMENT);
489 // subseqidx handles this
491 let w = utf8_char_width(byte);
495 if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
502 match (byte, safe_get(v, i, total)) {
503 (0xE0 , 0xA0 .. 0xBF) => (),
504 (0xE1 .. 0xEC, 0x80 .. 0xBF) => (),
505 (0xED , 0x80 .. 0x9F) => (),
506 (0xEE .. 0xEF, 0x80 .. 0xBF) => (),
513 if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
520 match (byte, safe_get(v, i, total)) {
521 (0xF0 , 0x90 .. 0xBF) => (),
522 (0xF1 .. 0xF3, 0x80 .. 0xBF) => (),
523 (0xF4 , 0x80 .. 0x8F) => (),
530 if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
535 if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
548 if subseqidx < total {
550 res.push_bytes(v.slice(subseqidx, total))
553 Owned(res.into_string())
560 /// A `MaybeOwned` is a string that can hold either a `String` or a `&str`.
561 /// This can be useful as an optimization when an allocation is sometimes
562 /// needed but not always.
563 pub enum MaybeOwned<'a> {
564 /// A borrowed string
570 /// `SendStr` is a specialization of `MaybeOwned` to be sendable
571 pub type SendStr = MaybeOwned<'static>;
573 impl<'a> MaybeOwned<'a> {
574 /// Returns `true` if this `MaybeOwned` wraps an owned string
576 pub fn is_owned(&self) -> bool {
583 /// Returns `true` if this `MaybeOwned` wraps a borrowed string
585 pub fn is_slice(&self) -> bool {
593 /// Trait for moving into a `MaybeOwned`
594 pub trait IntoMaybeOwned<'a> {
595 /// Moves self into a `MaybeOwned`
596 fn into_maybe_owned(self) -> MaybeOwned<'a>;
599 impl<'a> IntoMaybeOwned<'a> for String {
601 fn into_maybe_owned(self) -> MaybeOwned<'a> {
606 impl<'a> IntoMaybeOwned<'a> for &'a str {
608 fn into_maybe_owned(self) -> MaybeOwned<'a> { Slice(self) }
611 impl<'a> IntoMaybeOwned<'a> for MaybeOwned<'a> {
613 fn into_maybe_owned(self) -> MaybeOwned<'a> { self }
616 impl<'a> PartialEq for MaybeOwned<'a> {
618 fn eq(&self, other: &MaybeOwned) -> bool {
619 self.as_slice() == other.as_slice()
623 impl<'a> Eq for MaybeOwned<'a> {}
625 impl<'a> PartialOrd for MaybeOwned<'a> {
627 fn partial_cmp(&self, other: &MaybeOwned) -> Option<Ordering> {
628 Some(self.cmp(other))
632 impl<'a> Ord for MaybeOwned<'a> {
634 fn cmp(&self, other: &MaybeOwned) -> Ordering {
635 self.as_slice().cmp(&other.as_slice())
639 impl<'a, S: Str> Equiv<S> for MaybeOwned<'a> {
641 fn equiv(&self, other: &S) -> bool {
642 self.as_slice() == other.as_slice()
646 impl<'a> Str for MaybeOwned<'a> {
648 fn as_slice<'b>(&'b self) -> &'b str {
651 Owned(ref s) => s.as_slice()
656 impl<'a> StrAllocating for MaybeOwned<'a> {
658 fn into_string(self) -> String {
660 Slice(s) => s.to_string(),
666 impl<'a> Collection for MaybeOwned<'a> {
668 fn len(&self) -> uint { self.as_slice().len() }
671 impl<'a> Clone for MaybeOwned<'a> {
673 fn clone(&self) -> MaybeOwned<'a> {
675 Slice(s) => Slice(s),
676 Owned(ref s) => Owned(s.to_string())
681 impl<'a> Default for MaybeOwned<'a> {
683 fn default() -> MaybeOwned<'a> { Slice("") }
686 impl<'a, H: hash::Writer> hash::Hash<H> for MaybeOwned<'a> {
688 fn hash(&self, hasher: &mut H) {
689 self.as_slice().hash(hasher)
693 impl<'a> fmt::Show for MaybeOwned<'a> {
695 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
697 Slice(ref s) => s.fmt(f),
698 Owned(ref s) => s.fmt(f)
703 /// Unsafe operations
705 use core::prelude::*;
707 use core::raw::Slice;
712 pub use core::str::raw::{from_utf8, c_str_to_static_slice, slice_bytes};
713 pub use core::str::raw::{slice_unchecked};
715 /// Create a Rust string from a *u8 buffer of the given length
716 pub unsafe fn from_buf_len(buf: *const u8, len: uint) -> String {
717 let mut result = String::new();
718 result.push_bytes(mem::transmute(Slice {
725 /// Create a Rust string from a null-terminated C string
726 pub unsafe fn from_c_str(c_string: *const i8) -> String {
727 let mut buf = String::new();
729 while *c_string.offset(len) != 0 {
732 buf.push_bytes(mem::transmute(Slice {
739 /// Converts an owned vector of bytes to a new owned string. This assumes
740 /// that the utf-8-ness of the vector has already been validated
742 pub unsafe fn from_utf8_owned(v: Vec<u8>) -> String {
746 /// Converts a byte to a string.
747 pub unsafe fn from_byte(u: u8) -> String {
748 from_utf8_owned(vec![u])
751 /// Sets the length of a string
753 /// This will explicitly set the size of the string, without actually
754 /// modifying its buffers, so it is up to the caller to ensure that
755 /// the string is actually the specified size.
757 fn test_from_buf_len() {
758 use slice::ImmutableVector;
759 use str::StrAllocating;
762 let a = vec![65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8];
764 let c = from_buf_len(b, 3u);
765 assert_eq!(c, "AAA".to_string());
771 Section: Trait implementations
774 /// Any string that can be represented as a slice
775 pub trait StrAllocating: Str {
776 /// Convert `self` into a `String`, not making a copy if possible.
777 fn into_string(self) -> String;
779 /// Convert `self` into a `String`.
781 fn to_string(&self) -> String {
782 String::from_str(self.as_slice())
785 #[allow(missing_doc)]
786 #[deprecated = "replaced by .into_string()"]
787 fn into_owned(self) -> String {
791 /// Escape each char in `s` with `char::escape_default`.
792 fn escape_default(&self) -> String {
793 let me = self.as_slice();
794 let mut out = String::with_capacity(me.len());
795 for c in me.chars() {
796 c.escape_default(|c| out.push_char(c));
801 /// Escape each char in `s` with `char::escape_unicode`.
802 fn escape_unicode(&self) -> String {
803 let me = self.as_slice();
804 let mut out = String::with_capacity(me.len());
805 for c in me.chars() {
806 c.escape_unicode(|c| out.push_char(c));
811 /// Replace all occurrences of one string with another.
815 /// * `from` - The string to replace
816 /// * `to` - The replacement string
820 /// The original string with all occurrences of `from` replaced with `to`.
825 /// let s = "Do you know the muffin man,
826 /// The muffin man, the muffin man, ...".to_string();
828 /// assert_eq!(s.replace("muffin man", "little lamb"),
829 /// "Do you know the little lamb,
830 /// The little lamb, the little lamb, ...".to_string());
832 /// // not found, so no change.
833 /// assert_eq!(s.replace("cookie monster", "little lamb"), s);
835 fn replace(&self, from: &str, to: &str) -> String {
836 let me = self.as_slice();
837 let mut result = String::new();
838 let mut last_end = 0;
839 for (start, end) in me.match_indices(from) {
840 result.push_str(unsafe{raw::slice_bytes(me, last_end, start)});
844 result.push_str(unsafe{raw::slice_bytes(me, last_end, me.len())});
848 #[allow(missing_doc)]
849 #[deprecated = "obsolete, use `to_string`"]
851 fn to_owned(&self) -> String {
853 mem::transmute(Vec::from_slice(self.as_slice().as_bytes()))
857 /// Converts to a vector of `u16` encoded as UTF-16.
858 #[deprecated = "use `utf16_units` instead"]
859 fn to_utf16(&self) -> Vec<u16> {
860 self.as_slice().utf16_units().collect::<Vec<u16>>()
863 /// Given a string, make a new string with repeated copies of it.
864 fn repeat(&self, nn: uint) -> String {
865 let me = self.as_slice();
866 let mut ret = String::with_capacity(nn * me.len());
867 for _ in range(0, nn) {
873 /// Levenshtein Distance between two strings.
874 fn lev_distance(&self, t: &str) -> uint {
875 let me = self.as_slice();
879 if slen == 0 { return tlen; }
880 if tlen == 0 { return slen; }
882 let mut dcol = Vec::from_fn(tlen + 1, |x| x);
884 for (i, sc) in me.chars().enumerate() {
887 *dcol.get_mut(0) = current + 1;
889 for (j, tc) in t.chars().enumerate() {
891 let next = *dcol.get(j + 1);
894 *dcol.get_mut(j + 1) = current;
896 *dcol.get_mut(j + 1) = cmp::min(current, next);
897 *dcol.get_mut(j + 1) = cmp::min(*dcol.get(j + 1),
905 return *dcol.get(tlen);
908 /// An Iterator over the string in Unicode Normalization Form D
909 /// (canonical decomposition).
911 fn nfd_chars<'a>(&'a self) -> Decompositions<'a> {
913 iter: self.as_slice().chars(),
920 /// An Iterator over the string in Unicode Normalization Form KD
921 /// (compatibility decomposition).
923 fn nfkd_chars<'a>(&'a self) -> Decompositions<'a> {
925 iter: self.as_slice().chars(),
933 impl<'a> StrAllocating for &'a str {
935 fn into_string(self) -> String {
940 /// Methods for owned strings
942 /// Consumes the string, returning the underlying byte buffer.
944 /// The buffer does not have a null terminator.
945 fn into_bytes(self) -> Vec<u8>;
947 /// Pushes the given string onto this string, returning the concatenation of the two strings.
948 fn append(self, rhs: &str) -> String;
951 impl OwnedStr for String {
953 fn into_bytes(self) -> Vec<u8> {
954 unsafe { mem::transmute(self) }
958 fn append(mut self, rhs: &str) -> String {
967 use std::iter::AdditiveIterator;
968 use std::default::Default;
976 assert!((eq_slice("foobar".slice(0, 3), "foo")));
977 assert!((eq_slice("barfoo".slice(3, 6), "foo")));
978 assert!((!eq_slice("foo1", "foo2")));
984 assert!("" <= "foo");
985 assert!("foo" <= "foo");
986 assert!("foo" != "bar");
991 assert_eq!("".len(), 0u);
992 assert_eq!("hello world".len(), 11u);
993 assert_eq!("\x63".len(), 1u);
994 assert_eq!("\xa2".len(), 2u);
995 assert_eq!("\u03c0".len(), 2u);
996 assert_eq!("\u2620".len(), 3u);
997 assert_eq!("\U0001d11e".len(), 4u);
999 assert_eq!("".char_len(), 0u);
1000 assert_eq!("hello world".char_len(), 11u);
1001 assert_eq!("\x63".char_len(), 1u);
1002 assert_eq!("\xa2".char_len(), 1u);
1003 assert_eq!("\u03c0".char_len(), 1u);
1004 assert_eq!("\u2620".char_len(), 1u);
1005 assert_eq!("\U0001d11e".char_len(), 1u);
1006 assert_eq!("ประเทศไทย中华Việt Nam".char_len(), 19u);
1011 assert_eq!("hello".find('l'), Some(2u));
1012 assert_eq!("hello".find(|c:char| c == 'o'), Some(4u));
1013 assert!("hello".find('x').is_none());
1014 assert!("hello".find(|c:char| c == 'x').is_none());
1015 assert_eq!("ประเทศไทย中华Việt Nam".find('华'), Some(30u));
1016 assert_eq!("ประเทศไทย中华Việt Nam".find(|c: char| c == '华'), Some(30u));
1021 assert_eq!("hello".rfind('l'), Some(3u));
1022 assert_eq!("hello".rfind(|c:char| c == 'o'), Some(4u));
1023 assert!("hello".rfind('x').is_none());
1024 assert!("hello".rfind(|c:char| c == 'x').is_none());
1025 assert_eq!("ประเทศไทย中华Việt Nam".rfind('华'), Some(30u));
1026 assert_eq!("ประเทศไทย中华Việt Nam".rfind(|c: char| c == '华'), Some(30u));
1031 let empty = "".to_string();
1032 let s: String = empty.as_slice().chars().collect();
1033 assert_eq!(empty, s);
1034 let data = "ประเทศไทย中".to_string();
1035 let s: String = data.as_slice().chars().collect();
1036 assert_eq!(data, s);
1040 fn test_into_bytes() {
1041 let data = "asdf".to_string();
1042 let buf = data.into_bytes();
1043 assert_eq!(b"asdf", buf.as_slice());
1047 fn test_find_str() {
1049 assert_eq!("".find_str(""), Some(0u));
1050 assert!("banana".find_str("apple pie").is_none());
1052 let data = "abcabc";
1053 assert_eq!(data.slice(0u, 6u).find_str("ab"), Some(0u));
1054 assert_eq!(data.slice(2u, 6u).find_str("ab"), Some(3u - 2u));
1055 assert!(data.slice(2u, 4u).find_str("ab").is_none());
1057 let string = "ประเทศไทย中华Việt Nam";
1058 let mut data = string.to_string();
1059 data.push_str(string);
1060 assert!(data.as_slice().find_str("ไท华").is_none());
1061 assert_eq!(data.as_slice().slice(0u, 43u).find_str(""), Some(0u));
1062 assert_eq!(data.as_slice().slice(6u, 43u).find_str(""), Some(6u - 6u));
1064 assert_eq!(data.as_slice().slice(0u, 43u).find_str("ประ"), Some( 0u));
1065 assert_eq!(data.as_slice().slice(0u, 43u).find_str("ทศไ"), Some(12u));
1066 assert_eq!(data.as_slice().slice(0u, 43u).find_str("ย中"), Some(24u));
1067 assert_eq!(data.as_slice().slice(0u, 43u).find_str("iệt"), Some(34u));
1068 assert_eq!(data.as_slice().slice(0u, 43u).find_str("Nam"), Some(40u));
1070 assert_eq!(data.as_slice().slice(43u, 86u).find_str("ประ"), Some(43u - 43u));
1071 assert_eq!(data.as_slice().slice(43u, 86u).find_str("ทศไ"), Some(55u - 43u));
1072 assert_eq!(data.as_slice().slice(43u, 86u).find_str("ย中"), Some(67u - 43u));
1073 assert_eq!(data.as_slice().slice(43u, 86u).find_str("iệt"), Some(77u - 43u));
1074 assert_eq!(data.as_slice().slice(43u, 86u).find_str("Nam"), Some(83u - 43u));
1078 fn test_slice_chars() {
1079 fn t(a: &str, b: &str, start: uint) {
1080 assert_eq!(a.slice_chars(start, start + b.char_len()), b);
1083 t("hello", "llo", 2);
1084 t("hello", "el", 1);
1087 assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".slice_chars(2, 8));
1092 fn t(v: &[String], s: &str) {
1093 assert_eq!(v.concat().as_slice(), s);
1095 t(["you".to_string(), "know".to_string(), "I'm".to_string(),
1096 "no".to_string(), "good".to_string()], "youknowI'mnogood");
1097 let v: &[String] = [];
1099 t(["hi".to_string()], "hi");
1104 fn t(v: &[String], sep: &str, s: &str) {
1105 assert_eq!(v.connect(sep).as_slice(), s);
1107 t(["you".to_string(), "know".to_string(), "I'm".to_string(),
1108 "no".to_string(), "good".to_string()],
1109 " ", "you know I'm no good");
1110 let v: &[String] = [];
1112 t(["hi".to_string()], " ", "hi");
1116 fn test_concat_slices() {
1117 fn t(v: &[&str], s: &str) {
1118 assert_eq!(v.concat().as_slice(), s);
1120 t(["you", "know", "I'm", "no", "good"], "youknowI'mnogood");
1121 let v: &[&str] = [];
1127 fn test_connect_slices() {
1128 fn t(v: &[&str], sep: &str, s: &str) {
1129 assert_eq!(v.connect(sep).as_slice(), s);
1131 t(["you", "know", "I'm", "no", "good"],
1132 " ", "you know I'm no good");
1134 t(["hi"], " ", "hi");
1139 assert_eq!("x".repeat(4), "xxxx".to_string());
1140 assert_eq!("hi".repeat(4), "hihihihi".to_string());
1141 assert_eq!("ไท华".repeat(3), "ไท华ไท华ไท华".to_string());
1142 assert_eq!("".repeat(4), "".to_string());
1143 assert_eq!("hi".repeat(0), "".to_string());
1147 fn test_unsafe_slice() {
1148 assert_eq!("ab", unsafe {raw::slice_bytes("abc", 0, 2)});
1149 assert_eq!("bc", unsafe {raw::slice_bytes("abc", 1, 3)});
1150 assert_eq!("", unsafe {raw::slice_bytes("abc", 1, 1)});
1151 fn a_million_letter_a() -> String {
1153 let mut rs = String::new();
1155 rs.push_str("aaaaaaaaaa");
1160 fn half_a_million_letter_a() -> String {
1162 let mut rs = String::new();
1164 rs.push_str("aaaaa");
1169 let letters = a_million_letter_a();
1170 assert!(half_a_million_letter_a() ==
1171 unsafe {raw::slice_bytes(letters.as_slice(),
1173 500000)}.to_string());
1177 fn test_starts_with() {
1178 assert!(("".starts_with("")));
1179 assert!(("abc".starts_with("")));
1180 assert!(("abc".starts_with("a")));
1181 assert!((!"a".starts_with("abc")));
1182 assert!((!"".starts_with("abc")));
1183 assert!((!"ödd".starts_with("-")));
1184 assert!(("ödd".starts_with("öd")));
1188 fn test_ends_with() {
1189 assert!(("".ends_with("")));
1190 assert!(("abc".ends_with("")));
1191 assert!(("abc".ends_with("c")));
1192 assert!((!"a".ends_with("abc")));
1193 assert!((!"".ends_with("abc")));
1194 assert!((!"ddö".ends_with("-")));
1195 assert!(("ddö".ends_with("dö")));
1199 fn test_is_empty() {
1200 assert!("".is_empty());
1201 assert!(!"a".is_empty());
1207 assert_eq!("".replace(a, "b"), "".to_string());
1208 assert_eq!("a".replace(a, "b"), "b".to_string());
1209 assert_eq!("ab".replace(a, "b"), "bb".to_string());
1211 assert!(" test test ".replace(test, "toast") ==
1212 " toast toast ".to_string());
1213 assert_eq!(" test test ".replace(test, ""), " ".to_string());
1217 fn test_replace_2a() {
1218 let data = "ประเทศไทย中华";
1219 let repl = "دولة الكويت";
1222 let a2 = "دولة الكويتทศไทย中华";
1223 assert_eq!(data.replace(a, repl).as_slice(), a2);
1227 fn test_replace_2b() {
1228 let data = "ประเทศไทย中华";
1229 let repl = "دولة الكويت";
1232 let b2 = "ปรدولة الكويتทศไทย中华";
1233 assert_eq!(data.replace(b, repl).as_slice(), b2);
1237 fn test_replace_2c() {
1238 let data = "ประเทศไทย中华";
1239 let repl = "دولة الكويت";
1242 let c2 = "ประเทศไทยدولة الكويت";
1243 assert_eq!(data.replace(c, repl).as_slice(), c2);
1247 fn test_replace_2d() {
1248 let data = "ประเทศไทย中华";
1249 let repl = "دولة الكويت";
1252 assert_eq!(data.replace(d, repl).as_slice(), data);
1257 assert_eq!("ab", "abc".slice(0, 2));
1258 assert_eq!("bc", "abc".slice(1, 3));
1259 assert_eq!("", "abc".slice(1, 1));
1260 assert_eq!("\u65e5", "\u65e5\u672c".slice(0, 3));
1262 let data = "ประเทศไทย中华";
1263 assert_eq!("ป", data.slice(0, 3));
1264 assert_eq!("ร", data.slice(3, 6));
1265 assert_eq!("", data.slice(3, 3));
1266 assert_eq!("华", data.slice(30, 33));
1268 fn a_million_letter_x() -> String {
1270 let mut rs = String::new();
1272 rs.push_str("华华华华华华华华华华");
1277 fn half_a_million_letter_x() -> String {
1279 let mut rs = String::new();
1281 rs.push_str("华华华华华");
1286 let letters = a_million_letter_x();
1287 assert!(half_a_million_letter_x() ==
1288 letters.as_slice().slice(0u, 3u * 500000u).to_string());
1293 let ss = "中华Việt Nam";
1295 assert_eq!("华", ss.slice(3u, 6u));
1296 assert_eq!("Việt Nam", ss.slice(6u, 16u));
1298 assert_eq!("ab", "abc".slice(0u, 2u));
1299 assert_eq!("bc", "abc".slice(1u, 3u));
1300 assert_eq!("", "abc".slice(1u, 1u));
1302 assert_eq!("中", ss.slice(0u, 3u));
1303 assert_eq!("华V", ss.slice(3u, 7u));
1304 assert_eq!("", ss.slice(3u, 3u));
1319 fn test_slice_fail() {
1320 "中华Việt Nam".slice(0u, 2u);
1324 fn test_slice_from() {
1325 assert_eq!("abcd".slice_from(0), "abcd");
1326 assert_eq!("abcd".slice_from(2), "cd");
1327 assert_eq!("abcd".slice_from(4), "");
1330 fn test_slice_to() {
1331 assert_eq!("abcd".slice_to(0), "");
1332 assert_eq!("abcd".slice_to(2), "ab");
1333 assert_eq!("abcd".slice_to(4), "abcd");
1337 fn test_trim_left_chars() {
1338 let v: &[char] = &[];
1339 assert_eq!(" *** foo *** ".trim_left_chars(v), " *** foo *** ");
1340 assert_eq!(" *** foo *** ".trim_left_chars(&['*', ' ']), "foo *** ");
1341 assert_eq!(" *** *** ".trim_left_chars(&['*', ' ']), "");
1342 assert_eq!("foo *** ".trim_left_chars(&['*', ' ']), "foo *** ");
1344 assert_eq!("11foo1bar11".trim_left_chars('1'), "foo1bar11");
1345 assert_eq!("12foo1bar12".trim_left_chars(&['1', '2']), "foo1bar12");
1346 assert_eq!("123foo1bar123".trim_left_chars(|c: char| c.is_digit()), "foo1bar123");
1350 fn test_trim_right_chars() {
1351 let v: &[char] = &[];
1352 assert_eq!(" *** foo *** ".trim_right_chars(v), " *** foo *** ");
1353 assert_eq!(" *** foo *** ".trim_right_chars(&['*', ' ']), " *** foo");
1354 assert_eq!(" *** *** ".trim_right_chars(&['*', ' ']), "");
1355 assert_eq!(" *** foo".trim_right_chars(&['*', ' ']), " *** foo");
1357 assert_eq!("11foo1bar11".trim_right_chars('1'), "11foo1bar");
1358 assert_eq!("12foo1bar12".trim_right_chars(&['1', '2']), "12foo1bar");
1359 assert_eq!("123foo1bar123".trim_right_chars(|c: char| c.is_digit()), "123foo1bar");
1363 fn test_trim_chars() {
1364 let v: &[char] = &[];
1365 assert_eq!(" *** foo *** ".trim_chars(v), " *** foo *** ");
1366 assert_eq!(" *** foo *** ".trim_chars(&['*', ' ']), "foo");
1367 assert_eq!(" *** *** ".trim_chars(&['*', ' ']), "");
1368 assert_eq!("foo".trim_chars(&['*', ' ']), "foo");
1370 assert_eq!("11foo1bar11".trim_chars('1'), "foo1bar");
1371 assert_eq!("12foo1bar12".trim_chars(&['1', '2']), "foo1bar");
1372 assert_eq!("123foo1bar123".trim_chars(|c: char| c.is_digit()), "foo1bar");
1376 fn test_trim_left() {
1377 assert_eq!("".trim_left(), "");
1378 assert_eq!("a".trim_left(), "a");
1379 assert_eq!(" ".trim_left(), "");
1380 assert_eq!(" blah".trim_left(), "blah");
1381 assert_eq!(" \u3000 wut".trim_left(), "wut");
1382 assert_eq!("hey ".trim_left(), "hey ");
1386 fn test_trim_right() {
1387 assert_eq!("".trim_right(), "");
1388 assert_eq!("a".trim_right(), "a");
1389 assert_eq!(" ".trim_right(), "");
1390 assert_eq!("blah ".trim_right(), "blah");
1391 assert_eq!("wut \u3000 ".trim_right(), "wut");
1392 assert_eq!(" hey".trim_right(), " hey");
1397 assert_eq!("".trim(), "");
1398 assert_eq!("a".trim(), "a");
1399 assert_eq!(" ".trim(), "");
1400 assert_eq!(" blah ".trim(), "blah");
1401 assert_eq!("\nwut \u3000 ".trim(), "wut");
1402 assert_eq!(" hey dude ".trim(), "hey dude");
1406 fn test_is_whitespace() {
1407 assert!("".is_whitespace());
1408 assert!(" ".is_whitespace());
1409 assert!("\u2009".is_whitespace()); // Thin space
1410 assert!(" \n\t ".is_whitespace());
1411 assert!(!" _ ".is_whitespace());
1415 fn test_slice_shift_char() {
1416 let data = "ประเทศไทย中";
1417 assert_eq!(data.slice_shift_char(), (Some('ป'), "ระเทศไทย中"));
1421 fn test_slice_shift_char_2() {
1423 assert_eq!(empty.slice_shift_char(), (None, ""));
1428 // deny overlong encodings
1429 assert!(!is_utf8([0xc0, 0x80]));
1430 assert!(!is_utf8([0xc0, 0xae]));
1431 assert!(!is_utf8([0xe0, 0x80, 0x80]));
1432 assert!(!is_utf8([0xe0, 0x80, 0xaf]));
1433 assert!(!is_utf8([0xe0, 0x81, 0x81]));
1434 assert!(!is_utf8([0xf0, 0x82, 0x82, 0xac]));
1435 assert!(!is_utf8([0xf4, 0x90, 0x80, 0x80]));
1438 assert!(!is_utf8([0xED, 0xA0, 0x80]));
1439 assert!(!is_utf8([0xED, 0xBF, 0xBF]));
1441 assert!(is_utf8([0xC2, 0x80]));
1442 assert!(is_utf8([0xDF, 0xBF]));
1443 assert!(is_utf8([0xE0, 0xA0, 0x80]));
1444 assert!(is_utf8([0xED, 0x9F, 0xBF]));
1445 assert!(is_utf8([0xEE, 0x80, 0x80]));
1446 assert!(is_utf8([0xEF, 0xBF, 0xBF]));
1447 assert!(is_utf8([0xF0, 0x90, 0x80, 0x80]));
1448 assert!(is_utf8([0xF4, 0x8F, 0xBF, 0xBF]));
1452 fn test_is_utf16() {
1453 macro_rules! pos ( ($($e:expr),*) => { { $(assert!(is_utf16($e));)* } });
1461 // surrogate pairs (randomly generated with Python 3's
1462 // .encode('utf-16be'))
1463 pos!([0xdb54, 0xdf16, 0xd880, 0xdee0, 0xdb6a, 0xdd45],
1464 [0xd91f, 0xdeb1, 0xdb31, 0xdd84, 0xd8e2, 0xde14],
1465 [0xdb9f, 0xdc26, 0xdb6f, 0xde58, 0xd850, 0xdfae]);
1467 // mixtures (also random)
1468 pos!([0xd921, 0xdcc2, 0x002d, 0x004d, 0xdb32, 0xdf65],
1469 [0xdb45, 0xdd2d, 0x006a, 0xdacd, 0xddfe, 0x0006],
1470 [0x0067, 0xd8ff, 0xddb7, 0x000f, 0xd900, 0xdc80]);
1473 macro_rules! neg ( ($($e:expr),*) => { { $(assert!(!is_utf16($e));)* } });
1476 // surrogate + regular unit
1478 // surrogate + lead surrogate
1480 // unterminated surrogate
1482 // trail surrogate without a lead
1485 // random byte sequences that Python 3's .decode('utf-16be')
1487 neg!([0x5b3d, 0x0141, 0xde9e, 0x8fdc, 0xc6e7],
1488 [0xdf5a, 0x82a5, 0x62b9, 0xb447, 0x92f3],
1489 [0xda4e, 0x42bc, 0x4462, 0xee98, 0xc2ca],
1490 [0xbe00, 0xb04a, 0x6ecb, 0xdd89, 0xe278],
1491 [0x0465, 0xab56, 0xdbb6, 0xa893, 0x665e],
1492 [0x6b7f, 0x0a19, 0x40f4, 0xa657, 0xdcc5],
1493 [0x9b50, 0xda5e, 0x24ec, 0x03ad, 0x6dee],
1494 [0x8d17, 0xcaa7, 0xf4ae, 0xdf6e, 0xbed7],
1495 [0xdaee, 0x2584, 0x7d30, 0xa626, 0x121a],
1496 [0xd956, 0x4b43, 0x7570, 0xccd6, 0x4f4a],
1497 [0x9dcf, 0x1b49, 0x4ba5, 0xfce9, 0xdffe],
1498 [0x6572, 0xce53, 0xb05a, 0xf6af, 0xdacf],
1499 [0x1b90, 0x728c, 0x9906, 0xdb68, 0xf46e],
1500 [0x1606, 0xbeca, 0xbe76, 0x860f, 0xdfa5],
1501 [0x8b4f, 0xde7a, 0xd220, 0x9fac, 0x2b6f],
1502 [0xb8fe, 0xebbe, 0xda32, 0x1a5f, 0x8b8b],
1503 [0x934b, 0x8956, 0xc434, 0x1881, 0xddf7],
1504 [0x5a95, 0x13fc, 0xf116, 0xd89b, 0x93f9],
1505 [0xd640, 0x71f1, 0xdd7d, 0x77eb, 0x1cd8],
1506 [0x348b, 0xaef0, 0xdb2c, 0xebf1, 0x1282],
1507 [0x50d7, 0xd824, 0x5010, 0xb369, 0x22ea]);
1511 fn test_raw_from_c_str() {
1513 let a = vec![65, 65, 65, 65, 65, 65, 65, 0];
1515 let c = raw::from_c_str(b);
1516 assert_eq!(c, "AAAAAAA".to_string());
1521 fn test_as_bytes() {
1524 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1525 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1528 assert_eq!("".as_bytes(), &[]);
1529 assert_eq!("abc".as_bytes(), &['a' as u8, 'b' as u8, 'c' as u8]);
1530 assert_eq!("ศไทย中华Việt Nam".as_bytes(), v.as_slice());
1535 fn test_as_bytes_fail() {
1536 // Don't double free. (I'm not sure if this exercises the
1537 // original problem code path anymore.)
1538 let s = "".to_string();
1539 let _bytes = s.as_bytes();
1545 let buf = "hello".as_ptr();
1547 assert_eq!(*buf.offset(0), 'h' as u8);
1548 assert_eq!(*buf.offset(1), 'e' as u8);
1549 assert_eq!(*buf.offset(2), 'l' as u8);
1550 assert_eq!(*buf.offset(3), 'l' as u8);
1551 assert_eq!(*buf.offset(4), 'o' as u8);
1556 fn test_subslice_offset() {
1557 let a = "kernelsprite";
1558 let b = a.slice(7, a.len());
1559 let c = a.slice(0, a.len() - 6);
1560 assert_eq!(a.subslice_offset(b), 7);
1561 assert_eq!(a.subslice_offset(c), 0);
1563 let string = "a\nb\nc";
1564 let lines: Vec<&str> = string.lines().collect();
1565 let lines = lines.as_slice();
1566 assert_eq!(string.subslice_offset(lines[0]), 0);
1567 assert_eq!(string.subslice_offset(lines[1]), 2);
1568 assert_eq!(string.subslice_offset(lines[2]), 4);
1573 fn test_subslice_offset_2() {
1574 let a = "alchemiter";
1575 let b = "cruxtruder";
1576 a.subslice_offset(b);
1580 fn vec_str_conversions() {
1581 let s1: String = "All mimsy were the borogoves".to_string();
1583 let v: Vec<u8> = Vec::from_slice(s1.as_bytes());
1584 let s2: String = from_utf8(v.as_slice()).unwrap().to_string();
1585 let mut i: uint = 0u;
1586 let n1: uint = s1.len();
1587 let n2: uint = v.len();
1590 let a: u8 = s1.as_bytes()[i];
1591 let b: u8 = s2.as_bytes()[i];
1600 fn test_contains() {
1601 assert!("abcde".contains("bcd"));
1602 assert!("abcde".contains("abcd"));
1603 assert!("abcde".contains("bcde"));
1604 assert!("abcde".contains(""));
1605 assert!("".contains(""));
1606 assert!(!"abcde".contains("def"));
1607 assert!(!"".contains("a"));
1609 let data = "ประเทศไทย中华Việt Nam";
1610 assert!(data.contains("ประเ"));
1611 assert!(data.contains("ะเ"));
1612 assert!(data.contains("中华"));
1613 assert!(!data.contains("ไท华"));
1617 fn test_contains_char() {
1618 assert!("abc".contains_char('b'));
1619 assert!("a".contains_char('a'));
1620 assert!(!"abc".contains_char('d'));
1621 assert!(!"".contains_char('a'));
1627 [("𐍅𐌿𐌻𐍆𐌹𐌻𐌰\n".to_string(),
1628 vec![0xd800_u16, 0xdf45_u16, 0xd800_u16, 0xdf3f_u16,
1629 0xd800_u16, 0xdf3b_u16, 0xd800_u16, 0xdf46_u16,
1630 0xd800_u16, 0xdf39_u16, 0xd800_u16, 0xdf3b_u16,
1631 0xd800_u16, 0xdf30_u16, 0x000a_u16]),
1633 ("𐐒𐑉𐐮𐑀𐐲𐑋 𐐏𐐲𐑍\n".to_string(),
1634 vec![0xd801_u16, 0xdc12_u16, 0xd801_u16,
1635 0xdc49_u16, 0xd801_u16, 0xdc2e_u16, 0xd801_u16,
1636 0xdc40_u16, 0xd801_u16, 0xdc32_u16, 0xd801_u16,
1637 0xdc4b_u16, 0x0020_u16, 0xd801_u16, 0xdc0f_u16,
1638 0xd801_u16, 0xdc32_u16, 0xd801_u16, 0xdc4d_u16,
1641 ("𐌀𐌖𐌋𐌄𐌑𐌉·𐌌𐌄𐌕𐌄𐌋𐌉𐌑\n".to_string(),
1642 vec![0xd800_u16, 0xdf00_u16, 0xd800_u16, 0xdf16_u16,
1643 0xd800_u16, 0xdf0b_u16, 0xd800_u16, 0xdf04_u16,
1644 0xd800_u16, 0xdf11_u16, 0xd800_u16, 0xdf09_u16,
1645 0x00b7_u16, 0xd800_u16, 0xdf0c_u16, 0xd800_u16,
1646 0xdf04_u16, 0xd800_u16, 0xdf15_u16, 0xd800_u16,
1647 0xdf04_u16, 0xd800_u16, 0xdf0b_u16, 0xd800_u16,
1648 0xdf09_u16, 0xd800_u16, 0xdf11_u16, 0x000a_u16 ]),
1650 ("𐒋𐒘𐒈𐒑𐒛𐒒 𐒕𐒓 𐒈𐒚𐒍 𐒏𐒜𐒒𐒖𐒆 𐒕𐒆\n".to_string(),
1651 vec![0xd801_u16, 0xdc8b_u16, 0xd801_u16, 0xdc98_u16,
1652 0xd801_u16, 0xdc88_u16, 0xd801_u16, 0xdc91_u16,
1653 0xd801_u16, 0xdc9b_u16, 0xd801_u16, 0xdc92_u16,
1654 0x0020_u16, 0xd801_u16, 0xdc95_u16, 0xd801_u16,
1655 0xdc93_u16, 0x0020_u16, 0xd801_u16, 0xdc88_u16,
1656 0xd801_u16, 0xdc9a_u16, 0xd801_u16, 0xdc8d_u16,
1657 0x0020_u16, 0xd801_u16, 0xdc8f_u16, 0xd801_u16,
1658 0xdc9c_u16, 0xd801_u16, 0xdc92_u16, 0xd801_u16,
1659 0xdc96_u16, 0xd801_u16, 0xdc86_u16, 0x0020_u16,
1660 0xd801_u16, 0xdc95_u16, 0xd801_u16, 0xdc86_u16,
1662 // Issue #12318, even-numbered non-BMP planes
1663 ("\U00020000".to_string(),
1664 vec![0xD840, 0xDC00])];
1666 for p in pairs.iter() {
1667 let (s, u) = (*p).clone();
1668 let s_as_utf16 = s.as_slice().utf16_units().collect::<Vec<u16>>();
1669 let u_as_string = from_utf16(u.as_slice()).unwrap();
1671 assert!(is_utf16(u.as_slice()));
1672 assert_eq!(s_as_utf16, u);
1674 assert_eq!(u_as_string, s);
1675 assert_eq!(from_utf16_lossy(u.as_slice()), s);
1677 assert_eq!(from_utf16(s_as_utf16.as_slice()).unwrap(), s);
1678 assert_eq!(u_as_string.as_slice().utf16_units().collect::<Vec<u16>>(), u);
1683 fn test_utf16_invalid() {
1684 // completely positive cases tested above.
1686 assert_eq!(from_utf16([0xD800]), None);
1688 assert_eq!(from_utf16([0xD800, 0xD800]), None);
1691 assert_eq!(from_utf16([0x0061, 0xDC00]), None);
1694 assert_eq!(from_utf16([0xD800, 0xd801, 0xdc8b, 0xD800]), None);
1698 fn test_utf16_lossy() {
1699 // completely positive cases tested above.
1701 assert_eq!(from_utf16_lossy([0xD800]), "\uFFFD".to_string());
1703 assert_eq!(from_utf16_lossy([0xD800, 0xD800]), "\uFFFD\uFFFD".to_string());
1706 assert_eq!(from_utf16_lossy([0x0061, 0xDC00]), "a\uFFFD".to_string());
1709 assert_eq!(from_utf16_lossy([0xD800, 0xd801, 0xdc8b, 0xD800]),
1710 "\uFFFD𐒋\uFFFD".to_string());
1714 fn test_truncate_utf16_at_nul() {
1716 assert_eq!(truncate_utf16_at_nul(v), &[]);
1719 assert_eq!(truncate_utf16_at_nul(v), &[]);
1722 assert_eq!(truncate_utf16_at_nul(v), &[1]);
1725 assert_eq!(truncate_utf16_at_nul(v), &[1, 2]);
1728 assert_eq!(truncate_utf16_at_nul(v), &[1, 2, 3]);
1733 let s = "ศไทย中华Việt Nam";
1734 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1736 for ch in v.iter() {
1737 assert!(s.char_at(pos) == *ch);
1738 pos += from_char(*ch).len();
1743 fn test_char_at_reverse() {
1744 let s = "ศไทย中华Việt Nam";
1745 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1746 let mut pos = s.len();
1747 for ch in v.iter().rev() {
1748 assert!(s.char_at_reverse(pos) == *ch);
1749 pos -= from_char(*ch).len();
1754 fn test_escape_unicode() {
1755 assert_eq!("abc".escape_unicode(), "\\x61\\x62\\x63".to_string());
1756 assert_eq!("a c".escape_unicode(), "\\x61\\x20\\x63".to_string());
1757 assert_eq!("\r\n\t".escape_unicode(), "\\x0d\\x0a\\x09".to_string());
1758 assert_eq!("'\"\\".escape_unicode(), "\\x27\\x22\\x5c".to_string());
1759 assert_eq!("\x00\x01\xfe\xff".escape_unicode(), "\\x00\\x01\\xfe\\xff".to_string());
1760 assert_eq!("\u0100\uffff".escape_unicode(), "\\u0100\\uffff".to_string());
1761 assert_eq!("\U00010000\U0010ffff".escape_unicode(), "\\U00010000\\U0010ffff".to_string());
1762 assert_eq!("ab\ufb00".escape_unicode(), "\\x61\\x62\\ufb00".to_string());
1763 assert_eq!("\U0001d4ea\r".escape_unicode(), "\\U0001d4ea\\x0d".to_string());
1767 fn test_escape_default() {
1768 assert_eq!("abc".escape_default(), "abc".to_string());
1769 assert_eq!("a c".escape_default(), "a c".to_string());
1770 assert_eq!("\r\n\t".escape_default(), "\\r\\n\\t".to_string());
1771 assert_eq!("'\"\\".escape_default(), "\\'\\\"\\\\".to_string());
1772 assert_eq!("\u0100\uffff".escape_default(), "\\u0100\\uffff".to_string());
1773 assert_eq!("\U00010000\U0010ffff".escape_default(), "\\U00010000\\U0010ffff".to_string());
1774 assert_eq!("ab\ufb00".escape_default(), "ab\\ufb00".to_string());
1775 assert_eq!("\U0001d4ea\r".escape_default(), "\\U0001d4ea\\r".to_string());
1779 fn test_total_ord() {
1780 "1234".cmp(&("123")) == Greater;
1781 "123".cmp(&("1234")) == Less;
1782 "1234".cmp(&("1234")) == Equal;
1783 "12345555".cmp(&("123456")) == Less;
1784 "22".cmp(&("1234")) == Greater;
1788 fn test_char_range_at() {
1789 let data = "b¢€𤭢𤭢€¢b";
1790 assert_eq!('b', data.char_range_at(0).ch);
1791 assert_eq!('¢', data.char_range_at(1).ch);
1792 assert_eq!('€', data.char_range_at(3).ch);
1793 assert_eq!('𤭢', data.char_range_at(6).ch);
1794 assert_eq!('𤭢', data.char_range_at(10).ch);
1795 assert_eq!('€', data.char_range_at(14).ch);
1796 assert_eq!('¢', data.char_range_at(17).ch);
1797 assert_eq!('b', data.char_range_at(19).ch);
1801 fn test_char_range_at_reverse_underflow() {
1802 assert_eq!("abc".char_range_at_reverse(0).next, 0);
1806 fn test_iterator() {
1807 let s = "ศไทย中华Việt Nam";
1808 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1811 let mut it = s.chars();
1814 assert_eq!(c, v[pos]);
1817 assert_eq!(pos, v.len());
1821 fn test_rev_iterator() {
1822 let s = "ศไทย中华Việt Nam";
1823 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
1826 let mut it = s.chars().rev();
1829 assert_eq!(c, v[pos]);
1832 assert_eq!(pos, v.len());
1836 fn test_iterator_clone() {
1837 let s = "ศไทย中华Việt Nam";
1838 let mut it = s.chars();
1840 assert!(it.zip(it.clone()).all(|(x,y)| x == y));
1844 fn test_bytesator() {
1845 let s = "ศไทย中华Việt Nam";
1847 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1848 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1853 for b in s.bytes() {
1854 assert_eq!(b, v[pos]);
1860 fn test_bytes_revator() {
1861 let s = "ศไทย中华Việt Nam";
1863 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1864 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1867 let mut pos = v.len();
1869 for b in s.bytes().rev() {
1871 assert_eq!(b, v[pos]);
1876 fn test_char_indicesator() {
1877 let s = "ศไทย中华Việt Nam";
1878 let p = [0, 3, 6, 9, 12, 15, 18, 19, 20, 23, 24, 25, 26, 27];
1879 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1882 let mut it = s.char_indices();
1885 assert_eq!(c, (p[pos], v[pos]));
1888 assert_eq!(pos, v.len());
1889 assert_eq!(pos, p.len());
1893 fn test_char_indices_revator() {
1894 let s = "ศไทย中华Việt Nam";
1895 let p = [27, 26, 25, 24, 23, 20, 19, 18, 15, 12, 9, 6, 3, 0];
1896 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
1899 let mut it = s.char_indices().rev();
1902 assert_eq!(c, (p[pos], v[pos]));
1905 assert_eq!(pos, v.len());
1906 assert_eq!(pos, p.len());
1910 fn test_split_char_iterator() {
1911 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1913 let split: Vec<&str> = data.split(' ').collect();
1914 assert_eq!( split, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1916 let mut rsplit: Vec<&str> = data.split(' ').rev().collect();
1918 assert_eq!(rsplit, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1920 let split: Vec<&str> = data.split(|c: char| c == ' ').collect();
1921 assert_eq!( split, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1923 let mut rsplit: Vec<&str> = data.split(|c: char| c == ' ').rev().collect();
1925 assert_eq!(rsplit, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1928 let split: Vec<&str> = data.split('ä').collect();
1929 assert_eq!( split, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1931 let mut rsplit: Vec<&str> = data.split('ä').rev().collect();
1933 assert_eq!(rsplit, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1935 let split: Vec<&str> = data.split(|c: char| c == 'ä').collect();
1936 assert_eq!( split, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1938 let mut rsplit: Vec<&str> = data.split(|c: char| c == 'ä').rev().collect();
1940 assert_eq!(rsplit, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1944 fn test_splitn_char_iterator() {
1945 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1947 let split: Vec<&str> = data.splitn(' ', 3).collect();
1948 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
1950 let split: Vec<&str> = data.splitn(|c: char| c == ' ', 3).collect();
1951 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
1954 let split: Vec<&str> = data.splitn('ä', 3).collect();
1955 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
1957 let split: Vec<&str> = data.splitn(|c: char| c == 'ä', 3).collect();
1958 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
1962 fn test_rsplitn_char_iterator() {
1963 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1965 let mut split: Vec<&str> = data.rsplitn(' ', 3).collect();
1967 assert_eq!(split, vec!["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]);
1969 let mut split: Vec<&str> = data.rsplitn(|c: char| c == ' ', 3).collect();
1971 assert_eq!(split, vec!["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]);
1974 let mut split: Vec<&str> = data.rsplitn('ä', 3).collect();
1976 assert_eq!(split, vec!["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]);
1978 let mut split: Vec<&str> = data.rsplitn(|c: char| c == 'ä', 3).collect();
1980 assert_eq!(split, vec!["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]);
1984 fn test_split_char_iterator_no_trailing() {
1985 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1987 let split: Vec<&str> = data.split('\n').collect();
1988 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
1990 let split: Vec<&str> = data.split_terminator('\n').collect();
1991 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
1995 fn test_rev_split_char_iterator_no_trailing() {
1996 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1998 let mut split: Vec<&str> = data.split('\n').rev().collect();
2000 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
2002 let mut split: Vec<&str> = data.split_terminator('\n').rev().collect();
2004 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
2009 let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n";
2010 let words: Vec<&str> = data.words().collect();
2011 assert_eq!(words, vec!["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
2015 fn test_nfd_chars() {
2016 assert_eq!("abc".nfd_chars().collect::<String>(), "abc".to_string());
2017 assert_eq!("\u1e0b\u01c4".nfd_chars().collect::<String>(), "d\u0307\u01c4".to_string());
2018 assert_eq!("\u2026".nfd_chars().collect::<String>(), "\u2026".to_string());
2019 assert_eq!("\u2126".nfd_chars().collect::<String>(), "\u03a9".to_string());
2020 assert_eq!("\u1e0b\u0323".nfd_chars().collect::<String>(), "d\u0323\u0307".to_string());
2021 assert_eq!("\u1e0d\u0307".nfd_chars().collect::<String>(), "d\u0323\u0307".to_string());
2022 assert_eq!("a\u0301".nfd_chars().collect::<String>(), "a\u0301".to_string());
2023 assert_eq!("\u0301a".nfd_chars().collect::<String>(), "\u0301a".to_string());
2024 assert_eq!("\ud4db".nfd_chars().collect::<String>(), "\u1111\u1171\u11b6".to_string());
2025 assert_eq!("\uac1c".nfd_chars().collect::<String>(), "\u1100\u1162".to_string());
2029 fn test_nfkd_chars() {
2030 assert_eq!("abc".nfkd_chars().collect::<String>(), "abc".to_string());
2031 assert_eq!("\u1e0b\u01c4".nfkd_chars().collect::<String>(), "d\u0307DZ\u030c".to_string());
2032 assert_eq!("\u2026".nfkd_chars().collect::<String>(), "...".to_string());
2033 assert_eq!("\u2126".nfkd_chars().collect::<String>(), "\u03a9".to_string());
2034 assert_eq!("\u1e0b\u0323".nfkd_chars().collect::<String>(), "d\u0323\u0307".to_string());
2035 assert_eq!("\u1e0d\u0307".nfkd_chars().collect::<String>(), "d\u0323\u0307".to_string());
2036 assert_eq!("a\u0301".nfkd_chars().collect::<String>(), "a\u0301".to_string());
2037 assert_eq!("\u0301a".nfkd_chars().collect::<String>(), "\u0301a".to_string());
2038 assert_eq!("\ud4db".nfkd_chars().collect::<String>(), "\u1111\u1171\u11b6".to_string());
2039 assert_eq!("\uac1c".nfkd_chars().collect::<String>(), "\u1100\u1162".to_string());
2044 let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
2045 let lines: Vec<&str> = data.lines().collect();
2046 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2048 let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
2049 let lines: Vec<&str> = data.lines().collect();
2050 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2054 fn test_split_strator() {
2055 fn t(s: &str, sep: &str, u: &[&str]) {
2056 let v: Vec<&str> = s.split_str(sep).collect();
2057 assert_eq!(v.as_slice(), u.as_slice());
2059 t("--1233345--", "12345", ["--1233345--"]);
2060 t("abc::hello::there", "::", ["abc", "hello", "there"]);
2061 t("::hello::there", "::", ["", "hello", "there"]);
2062 t("hello::there::", "::", ["hello", "there", ""]);
2063 t("::hello::there::", "::", ["", "hello", "there", ""]);
2064 t("ประเทศไทย中华Việt Nam", "中华", ["ประเทศไทย", "Việt Nam"]);
2065 t("zzXXXzzYYYzz", "zz", ["", "XXX", "YYY", ""]);
2066 t("zzXXXzYYYz", "XXX", ["zz", "zYYYz"]);
2067 t(".XXX.YYY.", ".", ["", "XXX", "YYY", ""]);
2069 t("zz", "zz", ["",""]);
2070 t("ok", "z", ["ok"]);
2071 t("zzz", "zz", ["","z"]);
2072 t("zzzzz", "zz", ["","","z"]);
2076 fn test_str_default() {
2077 use std::default::Default;
2078 fn t<S: Default + Str>() {
2079 let s: S = Default::default();
2080 assert_eq!(s.as_slice(), "");
2088 fn test_str_container() {
2089 fn sum_len<S: Collection>(v: &[S]) -> uint {
2090 v.iter().map(|x| x.len()).sum()
2093 let s = "01234".to_string();
2094 assert_eq!(5, sum_len(["012", "", "34"]));
2095 assert_eq!(5, sum_len(["01".to_string(), "2".to_string(),
2096 "34".to_string(), "".to_string()]));
2097 assert_eq!(5, sum_len([s.as_slice()]));
2101 fn test_str_from_utf8() {
2103 assert_eq!(from_utf8(xs), Some("hello"));
2105 let xs = "ศไทย中华Việt Nam".as_bytes();
2106 assert_eq!(from_utf8(xs), Some("ศไทย中华Việt Nam"));
2108 let xs = b"hello\xFF";
2109 assert_eq!(from_utf8(xs), None);
2113 fn test_str_from_utf8_owned() {
2114 let xs = Vec::from_slice(b"hello");
2115 assert_eq!(from_utf8_owned(xs), Ok("hello".to_string()));
2117 let xs = Vec::from_slice("ศไทย中华Việt Nam".as_bytes());
2118 assert_eq!(from_utf8_owned(xs), Ok("ศไทย中华Việt Nam".to_string()));
2120 let xs = Vec::from_slice(b"hello\xFF");
2121 assert_eq!(from_utf8_owned(xs),
2122 Err(Vec::from_slice(b"hello\xFF")));
2126 fn test_str_from_utf8_lossy() {
2128 assert_eq!(from_utf8_lossy(xs), Slice("hello"));
2130 let xs = "ศไทย中华Việt Nam".as_bytes();
2131 assert_eq!(from_utf8_lossy(xs), Slice("ศไทย中华Việt Nam"));
2133 let xs = b"Hello\xC2 There\xFF Goodbye";
2134 assert_eq!(from_utf8_lossy(xs), Owned("Hello\uFFFD There\uFFFD Goodbye".to_string()));
2136 let xs = b"Hello\xC0\x80 There\xE6\x83 Goodbye";
2137 assert_eq!(from_utf8_lossy(xs), Owned("Hello\uFFFD\uFFFD There\uFFFD Goodbye".to_string()));
2139 let xs = b"\xF5foo\xF5\x80bar";
2140 assert_eq!(from_utf8_lossy(xs), Owned("\uFFFDfoo\uFFFD\uFFFDbar".to_string()));
2142 let xs = b"\xF1foo\xF1\x80bar\xF1\x80\x80baz";
2143 assert_eq!(from_utf8_lossy(xs), Owned("\uFFFDfoo\uFFFDbar\uFFFDbaz".to_string()));
2145 let xs = b"\xF4foo\xF4\x80bar\xF4\xBFbaz";
2146 assert_eq!(from_utf8_lossy(xs), Owned("\uFFFDfoo\uFFFDbar\uFFFD\uFFFDbaz".to_string()));
2148 let xs = b"\xF0\x80\x80\x80foo\xF0\x90\x80\x80bar";
2149 assert_eq!(from_utf8_lossy(xs), Owned("\uFFFD\uFFFD\uFFFD\uFFFD\
2150 foo\U00010000bar".to_string()));
2153 let xs = b"\xED\xA0\x80foo\xED\xBF\xBFbar";
2154 assert_eq!(from_utf8_lossy(xs), Owned("\uFFFD\uFFFD\uFFFDfoo\
2155 \uFFFD\uFFFD\uFFFDbar".to_string()));
2159 fn test_from_str() {
2160 let owned: Option<::std::string::String> = from_str("string");
2161 assert_eq!(owned.as_ref().map(|s| s.as_slice()), Some("string"));
2165 fn test_maybe_owned_traits() {
2166 let s = Slice("abcde");
2167 assert_eq!(s.len(), 5);
2168 assert_eq!(s.as_slice(), "abcde");
2169 assert_eq!(s.to_str().as_slice(), "abcde");
2170 assert_eq!(format!("{}", s).as_slice(), "abcde");
2171 assert!(s.lt(&Owned("bcdef".to_string())));
2172 assert_eq!(Slice(""), Default::default());
2174 let o = Owned("abcde".to_string());
2175 assert_eq!(o.len(), 5);
2176 assert_eq!(o.as_slice(), "abcde");
2177 assert_eq!(o.to_str().as_slice(), "abcde");
2178 assert_eq!(format!("{}", o).as_slice(), "abcde");
2179 assert!(o.lt(&Slice("bcdef")));
2180 assert_eq!(Owned("".to_string()), Default::default());
2182 assert!(s.cmp(&o) == Equal);
2183 assert!(s.equiv(&o));
2185 assert!(o.cmp(&s) == Equal);
2186 assert!(o.equiv(&s));
2190 fn test_maybe_owned_methods() {
2191 let s = Slice("abcde");
2192 assert!(s.is_slice());
2193 assert!(!s.is_owned());
2195 let o = Owned("abcde".to_string());
2196 assert!(!o.is_slice());
2197 assert!(o.is_owned());
2201 fn test_maybe_owned_clone() {
2202 assert_eq!(Owned("abcde".to_string()), Slice("abcde").clone());
2203 assert_eq!(Owned("abcde".to_string()), Owned("abcde".to_string()).clone());
2204 assert_eq!(Slice("abcde"), Slice("abcde").clone());
2205 assert_eq!(Slice("abcde"), Owned("abcde".to_string()).clone());
2209 fn test_maybe_owned_into_string() {
2210 assert_eq!(Slice("abcde").into_string(), "abcde".to_string());
2211 assert_eq!(Owned("abcde".to_string()).into_string(), "abcde".to_string());
2215 fn test_into_maybe_owned() {
2216 assert_eq!("abcde".into_maybe_owned(), Slice("abcde"));
2217 assert_eq!(("abcde".to_string()).into_maybe_owned(), Slice("abcde"));
2218 assert_eq!("abcde".into_maybe_owned(), Owned("abcde".to_string()));
2219 assert_eq!(("abcde".to_string()).into_maybe_owned(), Owned("abcde".to_string()));
2227 use std::prelude::*;
2230 fn char_iterator(b: &mut Bencher) {
2231 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2232 let len = s.char_len();
2234 b.iter(|| assert_eq!(s.chars().count(), len));
2238 fn char_iterator_ascii(b: &mut Bencher) {
2239 let s = "Mary had a little lamb, Little lamb
2240 Mary had a little lamb, Little lamb
2241 Mary had a little lamb, Little lamb
2242 Mary had a little lamb, Little lamb
2243 Mary had a little lamb, Little lamb
2244 Mary had a little lamb, Little lamb";
2245 let len = s.char_len();
2247 b.iter(|| assert_eq!(s.chars().count(), len));
2251 fn char_iterator_rev(b: &mut Bencher) {
2252 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2253 let len = s.char_len();
2255 b.iter(|| assert_eq!(s.chars().rev().count(), len));
2259 fn char_indicesator(b: &mut Bencher) {
2260 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2261 let len = s.char_len();
2263 b.iter(|| assert_eq!(s.char_indices().count(), len));
2267 fn char_indicesator_rev(b: &mut Bencher) {
2268 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2269 let len = s.char_len();
2271 b.iter(|| assert_eq!(s.char_indices().rev().count(), len));
2275 fn split_unicode_ascii(b: &mut Bencher) {
2276 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2278 b.iter(|| assert_eq!(s.split('V').count(), 3));
2282 fn split_unicode_not_ascii(b: &mut Bencher) {
2283 struct NotAscii(char);
2284 impl CharEq for NotAscii {
2285 fn matches(&mut self, c: char) -> bool {
2286 let NotAscii(cc) = *self;
2289 fn only_ascii(&self) -> bool { false }
2291 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2293 b.iter(|| assert_eq!(s.split(NotAscii('V')).count(), 3));
2298 fn split_ascii(b: &mut Bencher) {
2299 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2300 let len = s.split(' ').count();
2302 b.iter(|| assert_eq!(s.split(' ').count(), len));
2306 fn split_not_ascii(b: &mut Bencher) {
2307 struct NotAscii(char);
2308 impl CharEq for NotAscii {
2310 fn matches(&mut self, c: char) -> bool {
2311 let NotAscii(cc) = *self;
2314 fn only_ascii(&self) -> bool { false }
2316 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2317 let len = s.split(' ').count();
2319 b.iter(|| assert_eq!(s.split(NotAscii(' ')).count(), len));
2323 fn split_extern_fn(b: &mut Bencher) {
2324 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2325 let len = s.split(' ').count();
2326 fn pred(c: char) -> bool { c == ' ' }
2328 b.iter(|| assert_eq!(s.split(pred).count(), len));
2332 fn split_closure(b: &mut Bencher) {
2333 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2334 let len = s.split(' ').count();
2336 b.iter(|| assert_eq!(s.split(|c: char| c == ' ').count(), len));
2340 fn split_slice(b: &mut Bencher) {
2341 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2342 let len = s.split(' ').count();
2344 b.iter(|| assert_eq!(s.split(&[' ']).count(), len));
2348 fn is_utf8_100_ascii(b: &mut Bencher) {
2350 let s = b"Hello there, the quick brown fox jumped over the lazy dog! \
2351 Lorem ipsum dolor sit amet, consectetur. ";
2353 assert_eq!(100, s.len());
2360 fn is_utf8_100_multibyte(b: &mut Bencher) {
2361 let s = "𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰".as_bytes();
2362 assert_eq!(100, s.len());
2369 fn from_utf8_lossy_100_ascii(b: &mut Bencher) {
2370 let s = b"Hello there, the quick brown fox jumped over the lazy dog! \
2371 Lorem ipsum dolor sit amet, consectetur. ";
2373 assert_eq!(100, s.len());
2375 let _ = from_utf8_lossy(s);
2380 fn from_utf8_lossy_100_multibyte(b: &mut Bencher) {
2381 let s = "𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰".as_bytes();
2382 assert_eq!(100, s.len());
2384 let _ = from_utf8_lossy(s);
2389 fn from_utf8_lossy_invalid(b: &mut Bencher) {
2390 let s = b"Hello\xC0\x80 There\xE6\x83 Goodbye";
2392 let _ = from_utf8_lossy(s);
2397 fn from_utf8_lossy_100_invalid(b: &mut Bencher) {
2398 let s = Vec::from_elem(100, 0xF5u8);
2400 let _ = from_utf8_lossy(s.as_slice());
2405 fn bench_connect(b: &mut Bencher) {
2406 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2408 let v = [s, s, s, s, s, s, s, s, s, s];
2410 assert_eq!(v.connect(sep).len(), s.len() * 10 + sep.len() * 9);
2415 fn bench_contains_short_short(b: &mut Bencher) {
2416 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2420 assert!(haystack.contains(needle));
2425 fn bench_contains_short_long(b: &mut Bencher) {
2427 Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \
2428 ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \
2429 eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \
2430 sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \
2431 tempus vel, gravida nec quam.
2433 In est dui, tincidunt sed tempus interdum, adipiscing laoreet ante. Etiam tempor, tellus quis \
2434 sagittis interdum, nulla purus mattis sem, quis auctor erat odio ac tellus. In nec nunc sit amet \
2435 diam volutpat molestie at sed ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan \
2436 lorem ac dignissim placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat. In vel \
2437 eleifend felis. Sed suscipit nulla lorem, sed mollis est sollicitudin et. Nam fermentum egestas \
2438 interdum. Curabitur ut nisi justo.
2440 Sed sollicitudin ipsum tellus, ut condimentum leo eleifend nec. Cras ut velit ante. Phasellus nec \
2441 mollis odio. Mauris molestie erat in arcu mattis, at aliquet dolor vehicula. Quisque malesuada \
2442 lectus sit amet nisi pretium, a condimentum ipsum porta. Morbi at dapibus diam. Praesent egestas \
2443 est sed risus elementum, eu rutrum metus ultrices. Etiam fermentum consectetur magna, id rutrum \
2444 felis accumsan a. Aliquam ut pellentesque libero. Sed mi nulla, lobortis eu tortor id, suscipit \
2445 ultricies neque. Morbi iaculis sit amet risus at iaculis. Praesent eget ligula quis turpis \
2446 feugiat suscipit vel non arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. \
2447 Aliquam sit amet placerat lorem.
2449 Cras a lacus vel ante posuere elementum. Nunc est leo, bibendum ut facilisis vel, bibendum at \
2450 mauris. Nullam adipiscing diam vel odio ornare, luctus adipiscing mi luctus. Nulla facilisi. \
2451 Mauris adipiscing bibendum neque, quis adipiscing lectus tempus et. Sed feugiat erat et nisl \
2452 lobortis pharetra. Donec vitae erat enim. Nullam sit amet felis et quam lacinia tincidunt. Aliquam \
2453 suscipit dapibus urna. Sed volutpat urna in magna pulvinar volutpat. Phasellus nec tellus ac diam \
2456 Nam lectus enim, dapibus non nisi tempor, consectetur convallis massa. Maecenas eleifend dictum \
2457 feugiat. Etiam quis mauris vel risus luctus mattis a a nunc. Nullam orci quam, imperdiet id \
2458 vehicula in, porttitor ut nibh. Duis sagittis adipiscing nisl vitae congue. Donec mollis risus eu \
2459 leo suscipit, varius porttitor nulla porta. Pellentesque ut sem nec nisi euismod vehicula. Nulla \
2460 malesuada sollicitudin quam eu fermentum.";
2461 let needle = "english";
2464 assert!(!haystack.contains(needle));
2469 fn bench_contains_bad_naive(b: &mut Bencher) {
2470 let haystack = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
2471 let needle = "aaaaaaaab";
2474 assert!(!haystack.contains(needle));
2479 fn bench_contains_equal(b: &mut Bencher) {
2480 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2481 let needle = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2484 assert!(haystack.contains(needle));