1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
13 Unicode string manipulation (`str` type)
17 Rust's string type is one of the core primitive types of the language. While
18 represented by the name `str`, the name `str` is not actually a valid type in
19 Rust. Each string must also be decorated with a pointer. `String` is used
20 for an owned string, so there is only one commonly-used `str` type in Rust:
23 `&str` is the borrowed string type. This type of string can only be created
24 from other strings, unless it is a static string (see below). As the word
25 "borrowed" implies, this type of string is owned elsewhere, and this string
26 cannot be moved out of.
28 As an example, here's some code that uses a string.
32 let borrowed_string = "This string is borrowed with the 'static lifetime";
36 From the example above, you can see that Rust's string literals have the
37 `'static` lifetime. This is akin to C's concept of a static string.
39 String literals are allocated statically in the rodata of the
40 executable/library. The string then has the type `&'static str` meaning that
41 the string is valid for the `'static` lifetime, otherwise known as the
42 lifetime of the entire program. As can be inferred from the type, these static
43 strings are not mutable.
47 Many languages have immutable strings by default, and Rust has a particular
48 flavor on this idea. As with the rest of Rust types, strings are immutable by
49 default. If a string is declared as `mut`, however, it may be mutated. This
50 works the same way as the rest of Rust's type system in the sense that if
51 there's a mutable reference to a string, there may only be one mutable reference
52 to that string. With these guarantees, strings can easily transition between
53 being mutable/immutable with the same benefits of having mutable strings in
58 Rust's string type, `str`, is a sequence of unicode codepoints encoded as a
59 stream of UTF-8 bytes. All safely-created strings are guaranteed to be validly
60 encoded UTF-8 sequences. Additionally, strings are not null-terminated
61 and can contain null codepoints.
63 The actual representation of strings have direct mappings to vectors: `&str`
64 is the same as `&[u8]`.
68 #![doc(primitive = "str")]
73 use core::default::Default;
76 use core::iter::AdditiveIterator;
84 pub use core::str::{from_utf8, CharEq, Chars, CharOffsets};
85 pub use core::str::{Bytes, CharSplits};
86 pub use core::str::{CharSplitsN, Words, AnyLines, MatchIndices, StrSplits};
87 pub use core::str::{eq_slice, is_utf8, is_utf16, Utf16Items};
88 pub use core::str::{Utf16Item, ScalarValue, LoneSurrogate, utf16_items};
89 pub use core::str::{truncate_utf16_at_nul, utf8_char_width, CharRange};
90 pub use core::str::{Str, StrSlice};
93 Section: Creating a string
96 /// Consumes a vector of bytes to create a new utf-8 string.
98 /// Returns `Err` with the original vector if the vector contains invalid
105 /// let hello_vec = vec![104, 101, 108, 108, 111];
106 /// let string = str::from_utf8_owned(hello_vec);
107 /// assert_eq!(string, Ok("hello".to_string()));
109 pub fn from_utf8_owned(vv: Vec<u8>) -> Result<String, Vec<u8>> {
110 String::from_utf8(vv)
113 /// Convert a byte to a UTF-8 string
117 /// Fails if invalid UTF-8
123 /// let string = str::from_byte(104);
124 /// assert_eq!(string.as_slice(), "h");
126 pub fn from_byte(b: u8) -> String {
128 String::from_char(1, b as char)
131 /// Convert a char to a string
137 /// let string = str::from_char('b');
138 /// assert_eq!(string.as_slice(), "b");
140 pub fn from_char(ch: char) -> String {
141 let mut buf = String::new();
146 /// Convert a vector of chars to a string
152 /// let chars = ['h', 'e', 'l', 'l', 'o'];
153 /// let string = str::from_chars(chars);
154 /// assert_eq!(string.as_slice(), "hello");
156 pub fn from_chars(chs: &[char]) -> String {
157 chs.iter().map(|c| *c).collect()
160 /// Methods for vectors of strings
161 pub trait StrVector {
162 /// Concatenate a vector of strings.
163 fn concat(&self) -> String;
165 /// Concatenate a vector of strings, placing a given separator between each.
166 fn connect(&self, sep: &str) -> String;
169 impl<'a, S: Str> StrVector for &'a [S] {
170 fn concat(&self) -> String {
172 return String::new();
175 // `len` calculation may overflow but push_str but will check boundaries
176 let len = self.iter().map(|s| s.as_slice().len()).sum();
178 let mut result = String::with_capacity(len);
180 for s in self.iter() {
181 result.push_str(s.as_slice())
187 fn connect(&self, sep: &str) -> String {
189 return String::new();
194 return self.concat();
197 // this is wrong without the guarantee that `self` is non-empty
198 // `len` calculation may overflow but push_str but will check boundaries
199 let len = sep.len() * (self.len() - 1)
200 + self.iter().map(|s| s.as_slice().len()).sum();
201 let mut result = String::with_capacity(len);
202 let mut first = true;
204 for s in self.iter() {
208 result.push_str(sep);
210 result.push_str(s.as_slice());
216 impl<'a, S: Str> StrVector for Vec<S> {
218 fn concat(&self) -> String {
219 self.as_slice().concat()
223 fn connect(&self, sep: &str) -> String {
224 self.as_slice().connect(sep)
232 // Helper functions used for Unicode normalization
233 fn canonical_sort(comb: &mut [(char, u8)]) {
234 let len = comb.len();
235 for i in range(0, len) {
236 let mut swapped = false;
237 for j in range(1, len-i) {
238 let class_a = *comb[j-1].ref1();
239 let class_b = *comb[j].ref1();
240 if class_a != 0 && class_b != 0 && class_a > class_b {
245 if !swapped { break; }
250 enum DecompositionType {
255 /// External iterator for a string's decomposition's characters.
256 /// Use with the `std::iter` module.
258 pub struct Decompositions<'a> {
259 kind: DecompositionType,
261 buffer: Vec<(char, u8)>,
265 impl<'a> Iterator<char> for Decompositions<'a> {
267 fn next(&mut self) -> Option<char> {
268 use unicode::normalization::canonical_combining_class;
270 match self.buffer.as_slice().head() {
276 Some(&(c, _)) if self.sorted => {
280 _ => self.sorted = false
283 let decomposer = match self.kind {
284 Canonical => char::decompose_canonical,
285 Compatible => char::decompose_compatible
289 for ch in self.iter {
290 let buffer = &mut self.buffer;
291 let sorted = &mut self.sorted;
293 let class = canonical_combining_class(d);
294 if class == 0 && !*sorted {
295 canonical_sort(buffer.as_mut_slice());
298 buffer.push((d, class));
305 canonical_sort(self.buffer.as_mut_slice());
309 match self.buffer.shift() {
314 Some((c, _)) => Some(c),
319 fn size_hint(&self) -> (uint, Option<uint>) {
320 let (lower, _) = self.iter.size_hint();
325 /// Replace all occurrences of one string with another
329 /// * s - The string containing substrings to replace
330 /// * from - The string to replace
331 /// * to - The replacement string
335 /// The original string with all occurrences of `from` replaced with `to`
336 pub fn replace(s: &str, from: &str, to: &str) -> String {
337 let mut result = String::new();
338 let mut last_end = 0;
339 for (start, end) in s.match_indices(from) {
340 result.push_str(unsafe{raw::slice_bytes(s, last_end, start)});
344 result.push_str(unsafe{raw::slice_bytes(s, last_end, s.len())});
352 /// Decode a UTF-16 encoded vector `v` into a string, returning `None`
353 /// if `v` contains any invalid data.
361 /// let mut v = [0xD834, 0xDD1E, 0x006d, 0x0075,
362 /// 0x0073, 0x0069, 0x0063];
363 /// assert_eq!(str::from_utf16(v), Some("𝄞music".to_string()));
365 /// // 𝄞mu<invalid>ic
367 /// assert_eq!(str::from_utf16(v), None);
369 pub fn from_utf16(v: &[u16]) -> Option<String> {
370 let mut s = String::with_capacity(v.len() / 2);
371 for c in utf16_items(v) {
373 ScalarValue(c) => s.push_char(c),
374 LoneSurrogate(_) => return None
380 /// Decode a UTF-16 encoded vector `v` into a string, replacing
381 /// invalid data with the replacement character (U+FFFD).
387 /// // 𝄞mus<invalid>ic<invalid>
388 /// let v = [0xD834, 0xDD1E, 0x006d, 0x0075,
389 /// 0x0073, 0xDD1E, 0x0069, 0x0063,
392 /// assert_eq!(str::from_utf16_lossy(v),
393 /// "𝄞mus\uFFFDic\uFFFD".to_string());
395 pub fn from_utf16_lossy(v: &[u16]) -> String {
396 utf16_items(v).map(|c| c.to_char_lossy()).collect()
399 // Return the initial codepoint accumulator for the first byte.
400 // The first byte is special, only want bottom 5 bits for width 2, 4 bits
401 // for width 3, and 3 bits for width 4
402 macro_rules! utf8_first_byte(
403 ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as u32)
406 // return the value of $ch updated with continuation byte $byte
407 macro_rules! utf8_acc_cont_byte(
408 ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as u32)
411 static TAG_CONT_U8: u8 = 128u8;
413 /// Converts a vector of bytes to a new utf-8 string.
414 /// Any invalid utf-8 sequences are replaced with U+FFFD REPLACEMENT CHARACTER.
419 /// let input = b"Hello \xF0\x90\x80World";
420 /// let output = std::str::from_utf8_lossy(input);
421 /// assert_eq!(output.as_slice(), "Hello \uFFFDWorld");
423 pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> MaybeOwned<'a> {
425 return Slice(unsafe { mem::transmute(v) })
428 static REPLACEMENT: &'static [u8] = b"\xEF\xBF\xBD"; // U+FFFD in UTF-8
431 fn unsafe_get(xs: &[u8], i: uint) -> u8 {
432 unsafe { *xs.unsafe_ref(i) }
434 fn safe_get(xs: &[u8], i: uint, total: uint) -> u8 {
442 let mut res = String::with_capacity(total);
446 res.push_bytes(v.slice_to(i))
450 // subseqidx is the index of the first byte of the subsequence we're looking at.
451 // It's used to copy a bunch of contiguous good codepoints at once instead of copying
453 let mut subseqidx = 0;
457 let byte = unsafe_get(v, i);
460 macro_rules! error(() => ({
463 res.push_bytes(v.slice(subseqidx, i_));
466 res.push_bytes(REPLACEMENT);
471 // subseqidx handles this
473 let w = utf8_char_width(byte);
477 if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
484 match (byte, safe_get(v, i, total)) {
485 (0xE0 , 0xA0 .. 0xBF) => (),
486 (0xE1 .. 0xEC, 0x80 .. 0xBF) => (),
487 (0xED , 0x80 .. 0x9F) => (),
488 (0xEE .. 0xEF, 0x80 .. 0xBF) => (),
495 if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
502 match (byte, safe_get(v, i, total)) {
503 (0xF0 , 0x90 .. 0xBF) => (),
504 (0xF1 .. 0xF3, 0x80 .. 0xBF) => (),
505 (0xF4 , 0x80 .. 0x8F) => (),
512 if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
517 if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
530 if subseqidx < total {
532 res.push_bytes(v.slice(subseqidx, total))
535 Owned(res.into_string())
542 /// A `MaybeOwned` is a string that can hold either a `String` or a `&str`.
543 /// This can be useful as an optimization when an allocation is sometimes
544 /// needed but not always.
545 pub enum MaybeOwned<'a> {
546 /// A borrowed string
552 /// `SendStr` is a specialization of `MaybeOwned` to be sendable
553 pub type SendStr = MaybeOwned<'static>;
555 impl<'a> MaybeOwned<'a> {
556 /// Returns `true` if this `MaybeOwned` wraps an owned string
558 pub fn is_owned(&self) -> bool {
565 /// Returns `true` if this `MaybeOwned` wraps a borrowed string
567 pub fn is_slice(&self) -> bool {
575 /// Trait for moving into a `MaybeOwned`
576 pub trait IntoMaybeOwned<'a> {
577 /// Moves self into a `MaybeOwned`
578 fn into_maybe_owned(self) -> MaybeOwned<'a>;
581 impl<'a> IntoMaybeOwned<'a> for String {
583 fn into_maybe_owned(self) -> MaybeOwned<'a> {
588 impl<'a> IntoMaybeOwned<'a> for &'a str {
590 fn into_maybe_owned(self) -> MaybeOwned<'a> { Slice(self) }
593 impl<'a> IntoMaybeOwned<'a> for MaybeOwned<'a> {
595 fn into_maybe_owned(self) -> MaybeOwned<'a> { self }
598 impl<'a> PartialEq for MaybeOwned<'a> {
600 fn eq(&self, other: &MaybeOwned) -> bool {
601 self.as_slice() == other.as_slice()
605 impl<'a> Eq for MaybeOwned<'a> {}
607 impl<'a> PartialOrd for MaybeOwned<'a> {
609 fn partial_cmp(&self, other: &MaybeOwned) -> Option<Ordering> {
610 Some(self.cmp(other))
614 impl<'a> Ord for MaybeOwned<'a> {
616 fn cmp(&self, other: &MaybeOwned) -> Ordering {
617 self.as_slice().cmp(&other.as_slice())
621 impl<'a, S: Str> Equiv<S> for MaybeOwned<'a> {
623 fn equiv(&self, other: &S) -> bool {
624 self.as_slice() == other.as_slice()
628 impl<'a> Str for MaybeOwned<'a> {
630 fn as_slice<'b>(&'b self) -> &'b str {
633 Owned(ref s) => s.as_slice()
638 impl<'a> StrAllocating for MaybeOwned<'a> {
640 fn into_string(self) -> String {
642 Slice(s) => s.to_string(),
648 impl<'a> Collection for MaybeOwned<'a> {
650 fn len(&self) -> uint { self.as_slice().len() }
653 impl<'a> Clone for MaybeOwned<'a> {
655 fn clone(&self) -> MaybeOwned<'a> {
657 Slice(s) => Slice(s),
658 Owned(ref s) => Owned(s.to_string())
663 impl<'a> Default for MaybeOwned<'a> {
665 fn default() -> MaybeOwned<'a> { Slice("") }
668 impl<'a, H: hash::Writer> hash::Hash<H> for MaybeOwned<'a> {
670 fn hash(&self, hasher: &mut H) {
671 self.as_slice().hash(hasher)
675 impl<'a> fmt::Show for MaybeOwned<'a> {
677 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
679 Slice(ref s) => s.fmt(f),
680 Owned(ref s) => s.fmt(f)
685 /// Unsafe operations
687 use core::prelude::*;
689 use core::raw::Slice;
694 pub use core::str::raw::{from_utf8, c_str_to_static_slice, slice_bytes};
695 pub use core::str::raw::{slice_unchecked};
697 /// Create a Rust string from a *u8 buffer of the given length
698 pub unsafe fn from_buf_len(buf: *const u8, len: uint) -> String {
699 let mut result = String::new();
700 result.push_bytes(mem::transmute(Slice {
707 /// Create a Rust string from a null-terminated C string
708 pub unsafe fn from_c_str(c_string: *const i8) -> String {
709 let mut buf = String::new();
711 while *c_string.offset(len) != 0 {
714 buf.push_bytes(mem::transmute(Slice {
721 /// Converts an owned vector of bytes to a new owned string. This assumes
722 /// that the utf-8-ness of the vector has already been validated
724 pub unsafe fn from_utf8_owned(v: Vec<u8>) -> String {
728 /// Converts a byte to a string.
729 pub unsafe fn from_byte(u: u8) -> String {
730 from_utf8_owned(vec![u])
733 /// Sets the length of a string
735 /// This will explicitly set the size of the string, without actually
736 /// modifying its buffers, so it is up to the caller to ensure that
737 /// the string is actually the specified size.
739 fn test_from_buf_len() {
740 use slice::ImmutableVector;
741 use str::StrAllocating;
744 let a = vec![65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8];
746 let c = from_buf_len(b, 3u);
747 assert_eq!(c, "AAA".to_string());
753 Section: Trait implementations
756 /// Any string that can be represented as a slice
757 pub trait StrAllocating: Str {
758 /// Convert `self` into a `String`, not making a copy if possible.
759 fn into_string(self) -> String;
761 /// Convert `self` into a `String`.
763 fn to_string(&self) -> String {
764 String::from_str(self.as_slice())
767 #[allow(missing_doc)]
768 #[deprecated = "replaced by .into_string()"]
769 fn into_owned(self) -> String {
773 /// Escape each char in `s` with `char::escape_default`.
774 fn escape_default(&self) -> String {
775 let me = self.as_slice();
776 let mut out = String::with_capacity(me.len());
777 for c in me.chars() {
778 c.escape_default(|c| out.push_char(c));
783 /// Escape each char in `s` with `char::escape_unicode`.
784 fn escape_unicode(&self) -> String {
785 let me = self.as_slice();
786 let mut out = String::with_capacity(me.len());
787 for c in me.chars() {
788 c.escape_unicode(|c| out.push_char(c));
793 /// Replace all occurrences of one string with another.
797 /// * `from` - The string to replace
798 /// * `to` - The replacement string
802 /// The original string with all occurrences of `from` replaced with `to`.
807 /// let s = "Do you know the muffin man,
808 /// The muffin man, the muffin man, ...".to_string();
810 /// assert_eq!(s.replace("muffin man", "little lamb"),
811 /// "Do you know the little lamb,
812 /// The little lamb, the little lamb, ...".to_string());
814 /// // not found, so no change.
815 /// assert_eq!(s.replace("cookie monster", "little lamb"), s);
817 fn replace(&self, from: &str, to: &str) -> String {
818 let me = self.as_slice();
819 let mut result = String::new();
820 let mut last_end = 0;
821 for (start, end) in me.match_indices(from) {
822 result.push_str(unsafe{raw::slice_bytes(me, last_end, start)});
826 result.push_str(unsafe{raw::slice_bytes(me, last_end, me.len())});
830 #[allow(missing_doc)]
831 #[deprecated = "obsolete, use `to_string`"]
833 fn to_owned(&self) -> String {
835 mem::transmute(Vec::from_slice(self.as_slice().as_bytes()))
839 /// Converts to a vector of `u16` encoded as UTF-16.
840 #[deprecated = "use `utf16_units` instead"]
841 fn to_utf16(&self) -> Vec<u16> {
842 self.as_slice().utf16_units().collect::<Vec<u16>>()
845 /// Given a string, make a new string with repeated copies of it.
846 fn repeat(&self, nn: uint) -> String {
847 let me = self.as_slice();
848 let mut ret = String::with_capacity(nn * me.len());
849 for _ in range(0, nn) {
855 /// Levenshtein Distance between two strings.
856 fn lev_distance(&self, t: &str) -> uint {
857 let me = self.as_slice();
861 if slen == 0 { return tlen; }
862 if tlen == 0 { return slen; }
864 let mut dcol = Vec::from_fn(tlen + 1, |x| x);
866 for (i, sc) in me.chars().enumerate() {
869 *dcol.get_mut(0) = current + 1;
871 for (j, tc) in t.chars().enumerate() {
873 let next = *dcol.get(j + 1);
876 *dcol.get_mut(j + 1) = current;
878 *dcol.get_mut(j + 1) = cmp::min(current, next);
879 *dcol.get_mut(j + 1) = cmp::min(*dcol.get(j + 1),
887 return *dcol.get(tlen);
890 /// An Iterator over the string in Unicode Normalization Form D
891 /// (canonical decomposition).
893 fn nfd_chars<'a>(&'a self) -> Decompositions<'a> {
895 iter: self.as_slice().chars(),
902 /// An Iterator over the string in Unicode Normalization Form KD
903 /// (compatibility decomposition).
905 fn nfkd_chars<'a>(&'a self) -> Decompositions<'a> {
907 iter: self.as_slice().chars(),
915 impl<'a> StrAllocating for &'a str {
917 fn into_string(self) -> String {
922 /// Methods for owned strings
924 /// Consumes the string, returning the underlying byte buffer.
926 /// The buffer does not have a null terminator.
927 fn into_bytes(self) -> Vec<u8>;
929 /// Pushes the given string onto this string, returning the concatenation of the two strings.
930 fn append(self, rhs: &str) -> String;
933 impl OwnedStr for String {
935 fn into_bytes(self) -> Vec<u8> {
936 unsafe { mem::transmute(self) }
940 fn append(mut self, rhs: &str) -> String {
949 use std::iter::AdditiveIterator;
950 use std::default::Default;
958 assert!((eq_slice("foobar".slice(0, 3), "foo")));
959 assert!((eq_slice("barfoo".slice(3, 6), "foo")));
960 assert!((!eq_slice("foo1", "foo2")));
966 assert!("" <= "foo");
967 assert!("foo" <= "foo");
968 assert!("foo" != "bar");
973 assert_eq!("".len(), 0u);
974 assert_eq!("hello world".len(), 11u);
975 assert_eq!("\x63".len(), 1u);
976 assert_eq!("\xa2".len(), 2u);
977 assert_eq!("\u03c0".len(), 2u);
978 assert_eq!("\u2620".len(), 3u);
979 assert_eq!("\U0001d11e".len(), 4u);
981 assert_eq!("".char_len(), 0u);
982 assert_eq!("hello world".char_len(), 11u);
983 assert_eq!("\x63".char_len(), 1u);
984 assert_eq!("\xa2".char_len(), 1u);
985 assert_eq!("\u03c0".char_len(), 1u);
986 assert_eq!("\u2620".char_len(), 1u);
987 assert_eq!("\U0001d11e".char_len(), 1u);
988 assert_eq!("ประเทศไทย中华Việt Nam".char_len(), 19u);
993 assert_eq!("hello".find('l'), Some(2u));
994 assert_eq!("hello".find(|c:char| c == 'o'), Some(4u));
995 assert!("hello".find('x').is_none());
996 assert!("hello".find(|c:char| c == 'x').is_none());
997 assert_eq!("ประเทศไทย中华Việt Nam".find('华'), Some(30u));
998 assert_eq!("ประเทศไทย中华Việt Nam".find(|c: char| c == '华'), Some(30u));
1003 assert_eq!("hello".rfind('l'), Some(3u));
1004 assert_eq!("hello".rfind(|c:char| c == 'o'), Some(4u));
1005 assert!("hello".rfind('x').is_none());
1006 assert!("hello".rfind(|c:char| c == 'x').is_none());
1007 assert_eq!("ประเทศไทย中华Việt Nam".rfind('华'), Some(30u));
1008 assert_eq!("ประเทศไทย中华Việt Nam".rfind(|c: char| c == '华'), Some(30u));
1013 let empty = "".to_string();
1014 let s: String = empty.as_slice().chars().collect();
1015 assert_eq!(empty, s);
1016 let data = "ประเทศไทย中".to_string();
1017 let s: String = data.as_slice().chars().collect();
1018 assert_eq!(data, s);
1022 fn test_into_bytes() {
1023 let data = "asdf".to_string();
1024 let buf = data.into_bytes();
1025 assert_eq!(b"asdf", buf.as_slice());
1029 fn test_find_str() {
1031 assert_eq!("".find_str(""), Some(0u));
1032 assert!("banana".find_str("apple pie").is_none());
1034 let data = "abcabc";
1035 assert_eq!(data.slice(0u, 6u).find_str("ab"), Some(0u));
1036 assert_eq!(data.slice(2u, 6u).find_str("ab"), Some(3u - 2u));
1037 assert!(data.slice(2u, 4u).find_str("ab").is_none());
1039 let string = "ประเทศไทย中华Việt Nam";
1040 let mut data = string.to_string();
1041 data.push_str(string);
1042 assert!(data.as_slice().find_str("ไท华").is_none());
1043 assert_eq!(data.as_slice().slice(0u, 43u).find_str(""), Some(0u));
1044 assert_eq!(data.as_slice().slice(6u, 43u).find_str(""), Some(6u - 6u));
1046 assert_eq!(data.as_slice().slice(0u, 43u).find_str("ประ"), Some( 0u));
1047 assert_eq!(data.as_slice().slice(0u, 43u).find_str("ทศไ"), Some(12u));
1048 assert_eq!(data.as_slice().slice(0u, 43u).find_str("ย中"), Some(24u));
1049 assert_eq!(data.as_slice().slice(0u, 43u).find_str("iệt"), Some(34u));
1050 assert_eq!(data.as_slice().slice(0u, 43u).find_str("Nam"), Some(40u));
1052 assert_eq!(data.as_slice().slice(43u, 86u).find_str("ประ"), Some(43u - 43u));
1053 assert_eq!(data.as_slice().slice(43u, 86u).find_str("ทศไ"), Some(55u - 43u));
1054 assert_eq!(data.as_slice().slice(43u, 86u).find_str("ย中"), Some(67u - 43u));
1055 assert_eq!(data.as_slice().slice(43u, 86u).find_str("iệt"), Some(77u - 43u));
1056 assert_eq!(data.as_slice().slice(43u, 86u).find_str("Nam"), Some(83u - 43u));
1060 fn test_slice_chars() {
1061 fn t(a: &str, b: &str, start: uint) {
1062 assert_eq!(a.slice_chars(start, start + b.char_len()), b);
1065 t("hello", "llo", 2);
1066 t("hello", "el", 1);
1069 assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".slice_chars(2, 8));
1074 fn t(v: &[String], s: &str) {
1075 assert_eq!(v.concat().as_slice(), s);
1077 t(["you".to_string(), "know".to_string(), "I'm".to_string(),
1078 "no".to_string(), "good".to_string()], "youknowI'mnogood");
1079 let v: &[String] = [];
1081 t(["hi".to_string()], "hi");
1086 fn t(v: &[String], sep: &str, s: &str) {
1087 assert_eq!(v.connect(sep).as_slice(), s);
1089 t(["you".to_string(), "know".to_string(), "I'm".to_string(),
1090 "no".to_string(), "good".to_string()],
1091 " ", "you know I'm no good");
1092 let v: &[String] = [];
1094 t(["hi".to_string()], " ", "hi");
1098 fn test_concat_slices() {
1099 fn t(v: &[&str], s: &str) {
1100 assert_eq!(v.concat().as_slice(), s);
1102 t(["you", "know", "I'm", "no", "good"], "youknowI'mnogood");
1103 let v: &[&str] = [];
1109 fn test_connect_slices() {
1110 fn t(v: &[&str], sep: &str, s: &str) {
1111 assert_eq!(v.connect(sep).as_slice(), s);
1113 t(["you", "know", "I'm", "no", "good"],
1114 " ", "you know I'm no good");
1116 t(["hi"], " ", "hi");
1121 assert_eq!("x".repeat(4), "xxxx".to_string());
1122 assert_eq!("hi".repeat(4), "hihihihi".to_string());
1123 assert_eq!("ไท华".repeat(3), "ไท华ไท华ไท华".to_string());
1124 assert_eq!("".repeat(4), "".to_string());
1125 assert_eq!("hi".repeat(0), "".to_string());
1129 fn test_unsafe_slice() {
1130 assert_eq!("ab", unsafe {raw::slice_bytes("abc", 0, 2)});
1131 assert_eq!("bc", unsafe {raw::slice_bytes("abc", 1, 3)});
1132 assert_eq!("", unsafe {raw::slice_bytes("abc", 1, 1)});
1133 fn a_million_letter_a() -> String {
1135 let mut rs = String::new();
1137 rs.push_str("aaaaaaaaaa");
1142 fn half_a_million_letter_a() -> String {
1144 let mut rs = String::new();
1146 rs.push_str("aaaaa");
1151 let letters = a_million_letter_a();
1152 assert!(half_a_million_letter_a() ==
1153 unsafe {raw::slice_bytes(letters.as_slice(),
1155 500000)}.to_string());
1159 fn test_starts_with() {
1160 assert!(("".starts_with("")));
1161 assert!(("abc".starts_with("")));
1162 assert!(("abc".starts_with("a")));
1163 assert!((!"a".starts_with("abc")));
1164 assert!((!"".starts_with("abc")));
1165 assert!((!"ödd".starts_with("-")));
1166 assert!(("ödd".starts_with("öd")));
1170 fn test_ends_with() {
1171 assert!(("".ends_with("")));
1172 assert!(("abc".ends_with("")));
1173 assert!(("abc".ends_with("c")));
1174 assert!((!"a".ends_with("abc")));
1175 assert!((!"".ends_with("abc")));
1176 assert!((!"ddö".ends_with("-")));
1177 assert!(("ddö".ends_with("dö")));
1181 fn test_is_empty() {
1182 assert!("".is_empty());
1183 assert!(!"a".is_empty());
1189 assert_eq!("".replace(a, "b"), "".to_string());
1190 assert_eq!("a".replace(a, "b"), "b".to_string());
1191 assert_eq!("ab".replace(a, "b"), "bb".to_string());
1193 assert!(" test test ".replace(test, "toast") ==
1194 " toast toast ".to_string());
1195 assert_eq!(" test test ".replace(test, ""), " ".to_string());
1199 fn test_replace_2a() {
1200 let data = "ประเทศไทย中华";
1201 let repl = "دولة الكويت";
1204 let a2 = "دولة الكويتทศไทย中华";
1205 assert_eq!(data.replace(a, repl).as_slice(), a2);
1209 fn test_replace_2b() {
1210 let data = "ประเทศไทย中华";
1211 let repl = "دولة الكويت";
1214 let b2 = "ปรدولة الكويتทศไทย中华";
1215 assert_eq!(data.replace(b, repl).as_slice(), b2);
1219 fn test_replace_2c() {
1220 let data = "ประเทศไทย中华";
1221 let repl = "دولة الكويت";
1224 let c2 = "ประเทศไทยدولة الكويت";
1225 assert_eq!(data.replace(c, repl).as_slice(), c2);
1229 fn test_replace_2d() {
1230 let data = "ประเทศไทย中华";
1231 let repl = "دولة الكويت";
1234 assert_eq!(data.replace(d, repl).as_slice(), data);
1239 assert_eq!("ab", "abc".slice(0, 2));
1240 assert_eq!("bc", "abc".slice(1, 3));
1241 assert_eq!("", "abc".slice(1, 1));
1242 assert_eq!("\u65e5", "\u65e5\u672c".slice(0, 3));
1244 let data = "ประเทศไทย中华";
1245 assert_eq!("ป", data.slice(0, 3));
1246 assert_eq!("ร", data.slice(3, 6));
1247 assert_eq!("", data.slice(3, 3));
1248 assert_eq!("华", data.slice(30, 33));
1250 fn a_million_letter_x() -> String {
1252 let mut rs = String::new();
1254 rs.push_str("华华华华华华华华华华");
1259 fn half_a_million_letter_x() -> String {
1261 let mut rs = String::new();
1263 rs.push_str("华华华华华");
1268 let letters = a_million_letter_x();
1269 assert!(half_a_million_letter_x() ==
1270 letters.as_slice().slice(0u, 3u * 500000u).to_string());
1275 let ss = "中华Việt Nam";
1277 assert_eq!("华", ss.slice(3u, 6u));
1278 assert_eq!("Việt Nam", ss.slice(6u, 16u));
1280 assert_eq!("ab", "abc".slice(0u, 2u));
1281 assert_eq!("bc", "abc".slice(1u, 3u));
1282 assert_eq!("", "abc".slice(1u, 1u));
1284 assert_eq!("中", ss.slice(0u, 3u));
1285 assert_eq!("华V", ss.slice(3u, 7u));
1286 assert_eq!("", ss.slice(3u, 3u));
1301 fn test_slice_fail() {
1302 "中华Việt Nam".slice(0u, 2u);
1306 fn test_slice_from() {
1307 assert_eq!("abcd".slice_from(0), "abcd");
1308 assert_eq!("abcd".slice_from(2), "cd");
1309 assert_eq!("abcd".slice_from(4), "");
1312 fn test_slice_to() {
1313 assert_eq!("abcd".slice_to(0), "");
1314 assert_eq!("abcd".slice_to(2), "ab");
1315 assert_eq!("abcd".slice_to(4), "abcd");
1319 fn test_trim_left_chars() {
1320 let v: &[char] = &[];
1321 assert_eq!(" *** foo *** ".trim_left_chars(v), " *** foo *** ");
1322 assert_eq!(" *** foo *** ".trim_left_chars(&['*', ' ']), "foo *** ");
1323 assert_eq!(" *** *** ".trim_left_chars(&['*', ' ']), "");
1324 assert_eq!("foo *** ".trim_left_chars(&['*', ' ']), "foo *** ");
1326 assert_eq!("11foo1bar11".trim_left_chars('1'), "foo1bar11");
1327 assert_eq!("12foo1bar12".trim_left_chars(&['1', '2']), "foo1bar12");
1328 assert_eq!("123foo1bar123".trim_left_chars(|c: char| c.is_digit()), "foo1bar123");
1332 fn test_trim_right_chars() {
1333 let v: &[char] = &[];
1334 assert_eq!(" *** foo *** ".trim_right_chars(v), " *** foo *** ");
1335 assert_eq!(" *** foo *** ".trim_right_chars(&['*', ' ']), " *** foo");
1336 assert_eq!(" *** *** ".trim_right_chars(&['*', ' ']), "");
1337 assert_eq!(" *** foo".trim_right_chars(&['*', ' ']), " *** foo");
1339 assert_eq!("11foo1bar11".trim_right_chars('1'), "11foo1bar");
1340 assert_eq!("12foo1bar12".trim_right_chars(&['1', '2']), "12foo1bar");
1341 assert_eq!("123foo1bar123".trim_right_chars(|c: char| c.is_digit()), "123foo1bar");
1345 fn test_trim_chars() {
1346 let v: &[char] = &[];
1347 assert_eq!(" *** foo *** ".trim_chars(v), " *** foo *** ");
1348 assert_eq!(" *** foo *** ".trim_chars(&['*', ' ']), "foo");
1349 assert_eq!(" *** *** ".trim_chars(&['*', ' ']), "");
1350 assert_eq!("foo".trim_chars(&['*', ' ']), "foo");
1352 assert_eq!("11foo1bar11".trim_chars('1'), "foo1bar");
1353 assert_eq!("12foo1bar12".trim_chars(&['1', '2']), "foo1bar");
1354 assert_eq!("123foo1bar123".trim_chars(|c: char| c.is_digit()), "foo1bar");
1358 fn test_trim_left() {
1359 assert_eq!("".trim_left(), "");
1360 assert_eq!("a".trim_left(), "a");
1361 assert_eq!(" ".trim_left(), "");
1362 assert_eq!(" blah".trim_left(), "blah");
1363 assert_eq!(" \u3000 wut".trim_left(), "wut");
1364 assert_eq!("hey ".trim_left(), "hey ");
1368 fn test_trim_right() {
1369 assert_eq!("".trim_right(), "");
1370 assert_eq!("a".trim_right(), "a");
1371 assert_eq!(" ".trim_right(), "");
1372 assert_eq!("blah ".trim_right(), "blah");
1373 assert_eq!("wut \u3000 ".trim_right(), "wut");
1374 assert_eq!(" hey".trim_right(), " hey");
1379 assert_eq!("".trim(), "");
1380 assert_eq!("a".trim(), "a");
1381 assert_eq!(" ".trim(), "");
1382 assert_eq!(" blah ".trim(), "blah");
1383 assert_eq!("\nwut \u3000 ".trim(), "wut");
1384 assert_eq!(" hey dude ".trim(), "hey dude");
1388 fn test_is_whitespace() {
1389 assert!("".is_whitespace());
1390 assert!(" ".is_whitespace());
1391 assert!("\u2009".is_whitespace()); // Thin space
1392 assert!(" \n\t ".is_whitespace());
1393 assert!(!" _ ".is_whitespace());
1397 fn test_slice_shift_char() {
1398 let data = "ประเทศไทย中";
1399 assert_eq!(data.slice_shift_char(), (Some('ป'), "ระเทศไทย中"));
1403 fn test_slice_shift_char_2() {
1405 assert_eq!(empty.slice_shift_char(), (None, ""));
1410 // deny overlong encodings
1411 assert!(!is_utf8([0xc0, 0x80]));
1412 assert!(!is_utf8([0xc0, 0xae]));
1413 assert!(!is_utf8([0xe0, 0x80, 0x80]));
1414 assert!(!is_utf8([0xe0, 0x80, 0xaf]));
1415 assert!(!is_utf8([0xe0, 0x81, 0x81]));
1416 assert!(!is_utf8([0xf0, 0x82, 0x82, 0xac]));
1417 assert!(!is_utf8([0xf4, 0x90, 0x80, 0x80]));
1420 assert!(!is_utf8([0xED, 0xA0, 0x80]));
1421 assert!(!is_utf8([0xED, 0xBF, 0xBF]));
1423 assert!(is_utf8([0xC2, 0x80]));
1424 assert!(is_utf8([0xDF, 0xBF]));
1425 assert!(is_utf8([0xE0, 0xA0, 0x80]));
1426 assert!(is_utf8([0xED, 0x9F, 0xBF]));
1427 assert!(is_utf8([0xEE, 0x80, 0x80]));
1428 assert!(is_utf8([0xEF, 0xBF, 0xBF]));
1429 assert!(is_utf8([0xF0, 0x90, 0x80, 0x80]));
1430 assert!(is_utf8([0xF4, 0x8F, 0xBF, 0xBF]));
1434 fn test_is_utf16() {
1435 macro_rules! pos ( ($($e:expr),*) => { { $(assert!(is_utf16($e));)* } });
1443 // surrogate pairs (randomly generated with Python 3's
1444 // .encode('utf-16be'))
1445 pos!([0xdb54, 0xdf16, 0xd880, 0xdee0, 0xdb6a, 0xdd45],
1446 [0xd91f, 0xdeb1, 0xdb31, 0xdd84, 0xd8e2, 0xde14],
1447 [0xdb9f, 0xdc26, 0xdb6f, 0xde58, 0xd850, 0xdfae]);
1449 // mixtures (also random)
1450 pos!([0xd921, 0xdcc2, 0x002d, 0x004d, 0xdb32, 0xdf65],
1451 [0xdb45, 0xdd2d, 0x006a, 0xdacd, 0xddfe, 0x0006],
1452 [0x0067, 0xd8ff, 0xddb7, 0x000f, 0xd900, 0xdc80]);
1455 macro_rules! neg ( ($($e:expr),*) => { { $(assert!(!is_utf16($e));)* } });
1458 // surrogate + regular unit
1460 // surrogate + lead surrogate
1462 // unterminated surrogate
1464 // trail surrogate without a lead
1467 // random byte sequences that Python 3's .decode('utf-16be')
1469 neg!([0x5b3d, 0x0141, 0xde9e, 0x8fdc, 0xc6e7],
1470 [0xdf5a, 0x82a5, 0x62b9, 0xb447, 0x92f3],
1471 [0xda4e, 0x42bc, 0x4462, 0xee98, 0xc2ca],
1472 [0xbe00, 0xb04a, 0x6ecb, 0xdd89, 0xe278],
1473 [0x0465, 0xab56, 0xdbb6, 0xa893, 0x665e],
1474 [0x6b7f, 0x0a19, 0x40f4, 0xa657, 0xdcc5],
1475 [0x9b50, 0xda5e, 0x24ec, 0x03ad, 0x6dee],
1476 [0x8d17, 0xcaa7, 0xf4ae, 0xdf6e, 0xbed7],
1477 [0xdaee, 0x2584, 0x7d30, 0xa626, 0x121a],
1478 [0xd956, 0x4b43, 0x7570, 0xccd6, 0x4f4a],
1479 [0x9dcf, 0x1b49, 0x4ba5, 0xfce9, 0xdffe],
1480 [0x6572, 0xce53, 0xb05a, 0xf6af, 0xdacf],
1481 [0x1b90, 0x728c, 0x9906, 0xdb68, 0xf46e],
1482 [0x1606, 0xbeca, 0xbe76, 0x860f, 0xdfa5],
1483 [0x8b4f, 0xde7a, 0xd220, 0x9fac, 0x2b6f],
1484 [0xb8fe, 0xebbe, 0xda32, 0x1a5f, 0x8b8b],
1485 [0x934b, 0x8956, 0xc434, 0x1881, 0xddf7],
1486 [0x5a95, 0x13fc, 0xf116, 0xd89b, 0x93f9],
1487 [0xd640, 0x71f1, 0xdd7d, 0x77eb, 0x1cd8],
1488 [0x348b, 0xaef0, 0xdb2c, 0xebf1, 0x1282],
1489 [0x50d7, 0xd824, 0x5010, 0xb369, 0x22ea]);
1493 fn test_raw_from_c_str() {
1495 let a = vec![65, 65, 65, 65, 65, 65, 65, 0];
1497 let c = raw::from_c_str(b);
1498 assert_eq!(c, "AAAAAAA".to_string());
1503 fn test_as_bytes() {
1506 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1507 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1510 assert_eq!("".as_bytes(), &[]);
1511 assert_eq!("abc".as_bytes(), &['a' as u8, 'b' as u8, 'c' as u8]);
1512 assert_eq!("ศไทย中华Việt Nam".as_bytes(), v.as_slice());
1517 fn test_as_bytes_fail() {
1518 // Don't double free. (I'm not sure if this exercises the
1519 // original problem code path anymore.)
1520 let s = "".to_string();
1521 let _bytes = s.as_bytes();
1527 let buf = "hello".as_ptr();
1529 assert_eq!(*buf.offset(0), 'h' as u8);
1530 assert_eq!(*buf.offset(1), 'e' as u8);
1531 assert_eq!(*buf.offset(2), 'l' as u8);
1532 assert_eq!(*buf.offset(3), 'l' as u8);
1533 assert_eq!(*buf.offset(4), 'o' as u8);
1538 fn test_subslice_offset() {
1539 let a = "kernelsprite";
1540 let b = a.slice(7, a.len());
1541 let c = a.slice(0, a.len() - 6);
1542 assert_eq!(a.subslice_offset(b), 7);
1543 assert_eq!(a.subslice_offset(c), 0);
1545 let string = "a\nb\nc";
1546 let lines: Vec<&str> = string.lines().collect();
1547 let lines = lines.as_slice();
1548 assert_eq!(string.subslice_offset(lines[0]), 0);
1549 assert_eq!(string.subslice_offset(lines[1]), 2);
1550 assert_eq!(string.subslice_offset(lines[2]), 4);
1555 fn test_subslice_offset_2() {
1556 let a = "alchemiter";
1557 let b = "cruxtruder";
1558 a.subslice_offset(b);
1562 fn vec_str_conversions() {
1563 let s1: String = "All mimsy were the borogoves".to_string();
1565 let v: Vec<u8> = Vec::from_slice(s1.as_bytes());
1566 let s2: String = from_utf8(v.as_slice()).unwrap().to_string();
1567 let mut i: uint = 0u;
1568 let n1: uint = s1.len();
1569 let n2: uint = v.len();
1572 let a: u8 = s1.as_slice()[i];
1573 let b: u8 = s2.as_slice()[i];
1582 fn test_contains() {
1583 assert!("abcde".contains("bcd"));
1584 assert!("abcde".contains("abcd"));
1585 assert!("abcde".contains("bcde"));
1586 assert!("abcde".contains(""));
1587 assert!("".contains(""));
1588 assert!(!"abcde".contains("def"));
1589 assert!(!"".contains("a"));
1591 let data = "ประเทศไทย中华Việt Nam";
1592 assert!(data.contains("ประเ"));
1593 assert!(data.contains("ะเ"));
1594 assert!(data.contains("中华"));
1595 assert!(!data.contains("ไท华"));
1599 fn test_contains_char() {
1600 assert!("abc".contains_char('b'));
1601 assert!("a".contains_char('a'));
1602 assert!(!"abc".contains_char('d'));
1603 assert!(!"".contains_char('a'));
1609 [("𐍅𐌿𐌻𐍆𐌹𐌻𐌰\n".to_string(),
1610 vec![0xd800_u16, 0xdf45_u16, 0xd800_u16, 0xdf3f_u16,
1611 0xd800_u16, 0xdf3b_u16, 0xd800_u16, 0xdf46_u16,
1612 0xd800_u16, 0xdf39_u16, 0xd800_u16, 0xdf3b_u16,
1613 0xd800_u16, 0xdf30_u16, 0x000a_u16]),
1615 ("𐐒𐑉𐐮𐑀𐐲𐑋 𐐏𐐲𐑍\n".to_string(),
1616 vec![0xd801_u16, 0xdc12_u16, 0xd801_u16,
1617 0xdc49_u16, 0xd801_u16, 0xdc2e_u16, 0xd801_u16,
1618 0xdc40_u16, 0xd801_u16, 0xdc32_u16, 0xd801_u16,
1619 0xdc4b_u16, 0x0020_u16, 0xd801_u16, 0xdc0f_u16,
1620 0xd801_u16, 0xdc32_u16, 0xd801_u16, 0xdc4d_u16,
1623 ("𐌀𐌖𐌋𐌄𐌑𐌉·𐌌𐌄𐌕𐌄𐌋𐌉𐌑\n".to_string(),
1624 vec![0xd800_u16, 0xdf00_u16, 0xd800_u16, 0xdf16_u16,
1625 0xd800_u16, 0xdf0b_u16, 0xd800_u16, 0xdf04_u16,
1626 0xd800_u16, 0xdf11_u16, 0xd800_u16, 0xdf09_u16,
1627 0x00b7_u16, 0xd800_u16, 0xdf0c_u16, 0xd800_u16,
1628 0xdf04_u16, 0xd800_u16, 0xdf15_u16, 0xd800_u16,
1629 0xdf04_u16, 0xd800_u16, 0xdf0b_u16, 0xd800_u16,
1630 0xdf09_u16, 0xd800_u16, 0xdf11_u16, 0x000a_u16 ]),
1632 ("𐒋𐒘𐒈𐒑𐒛𐒒 𐒕𐒓 𐒈𐒚𐒍 𐒏𐒜𐒒𐒖𐒆 𐒕𐒆\n".to_string(),
1633 vec![0xd801_u16, 0xdc8b_u16, 0xd801_u16, 0xdc98_u16,
1634 0xd801_u16, 0xdc88_u16, 0xd801_u16, 0xdc91_u16,
1635 0xd801_u16, 0xdc9b_u16, 0xd801_u16, 0xdc92_u16,
1636 0x0020_u16, 0xd801_u16, 0xdc95_u16, 0xd801_u16,
1637 0xdc93_u16, 0x0020_u16, 0xd801_u16, 0xdc88_u16,
1638 0xd801_u16, 0xdc9a_u16, 0xd801_u16, 0xdc8d_u16,
1639 0x0020_u16, 0xd801_u16, 0xdc8f_u16, 0xd801_u16,
1640 0xdc9c_u16, 0xd801_u16, 0xdc92_u16, 0xd801_u16,
1641 0xdc96_u16, 0xd801_u16, 0xdc86_u16, 0x0020_u16,
1642 0xd801_u16, 0xdc95_u16, 0xd801_u16, 0xdc86_u16,
1644 // Issue #12318, even-numbered non-BMP planes
1645 ("\U00020000".to_string(),
1646 vec![0xD840, 0xDC00])];
1648 for p in pairs.iter() {
1649 let (s, u) = (*p).clone();
1650 let s_as_utf16 = s.as_slice().utf16_units().collect::<Vec<u16>>();
1651 let u_as_string = from_utf16(u.as_slice()).unwrap();
1653 assert!(is_utf16(u.as_slice()));
1654 assert_eq!(s_as_utf16, u);
1656 assert_eq!(u_as_string, s);
1657 assert_eq!(from_utf16_lossy(u.as_slice()), s);
1659 assert_eq!(from_utf16(s_as_utf16.as_slice()).unwrap(), s);
1660 assert_eq!(u_as_string.as_slice().utf16_units().collect::<Vec<u16>>(), u);
1665 fn test_utf16_invalid() {
1666 // completely positive cases tested above.
1668 assert_eq!(from_utf16([0xD800]), None);
1670 assert_eq!(from_utf16([0xD800, 0xD800]), None);
1673 assert_eq!(from_utf16([0x0061, 0xDC00]), None);
1676 assert_eq!(from_utf16([0xD800, 0xd801, 0xdc8b, 0xD800]), None);
1680 fn test_utf16_lossy() {
1681 // completely positive cases tested above.
1683 assert_eq!(from_utf16_lossy([0xD800]), "\uFFFD".to_string());
1685 assert_eq!(from_utf16_lossy([0xD800, 0xD800]), "\uFFFD\uFFFD".to_string());
1688 assert_eq!(from_utf16_lossy([0x0061, 0xDC00]), "a\uFFFD".to_string());
1691 assert_eq!(from_utf16_lossy([0xD800, 0xd801, 0xdc8b, 0xD800]),
1692 "\uFFFD𐒋\uFFFD".to_string());
1696 fn test_truncate_utf16_at_nul() {
1698 assert_eq!(truncate_utf16_at_nul(v), &[]);
1701 assert_eq!(truncate_utf16_at_nul(v), &[]);
1704 assert_eq!(truncate_utf16_at_nul(v), &[1]);
1707 assert_eq!(truncate_utf16_at_nul(v), &[1, 2]);
1710 assert_eq!(truncate_utf16_at_nul(v), &[1, 2, 3]);
1715 let s = "ศไทย中华Việt Nam";
1716 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1718 for ch in v.iter() {
1719 assert!(s.char_at(pos) == *ch);
1720 pos += from_char(*ch).len();
1725 fn test_char_at_reverse() {
1726 let s = "ศไทย中华Việt Nam";
1727 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1728 let mut pos = s.len();
1729 for ch in v.iter().rev() {
1730 assert!(s.char_at_reverse(pos) == *ch);
1731 pos -= from_char(*ch).len();
1736 fn test_escape_unicode() {
1737 assert_eq!("abc".escape_unicode(), "\\x61\\x62\\x63".to_string());
1738 assert_eq!("a c".escape_unicode(), "\\x61\\x20\\x63".to_string());
1739 assert_eq!("\r\n\t".escape_unicode(), "\\x0d\\x0a\\x09".to_string());
1740 assert_eq!("'\"\\".escape_unicode(), "\\x27\\x22\\x5c".to_string());
1741 assert_eq!("\x00\x01\xfe\xff".escape_unicode(), "\\x00\\x01\\xfe\\xff".to_string());
1742 assert_eq!("\u0100\uffff".escape_unicode(), "\\u0100\\uffff".to_string());
1743 assert_eq!("\U00010000\U0010ffff".escape_unicode(), "\\U00010000\\U0010ffff".to_string());
1744 assert_eq!("ab\ufb00".escape_unicode(), "\\x61\\x62\\ufb00".to_string());
1745 assert_eq!("\U0001d4ea\r".escape_unicode(), "\\U0001d4ea\\x0d".to_string());
1749 fn test_escape_default() {
1750 assert_eq!("abc".escape_default(), "abc".to_string());
1751 assert_eq!("a c".escape_default(), "a c".to_string());
1752 assert_eq!("\r\n\t".escape_default(), "\\r\\n\\t".to_string());
1753 assert_eq!("'\"\\".escape_default(), "\\'\\\"\\\\".to_string());
1754 assert_eq!("\u0100\uffff".escape_default(), "\\u0100\\uffff".to_string());
1755 assert_eq!("\U00010000\U0010ffff".escape_default(), "\\U00010000\\U0010ffff".to_string());
1756 assert_eq!("ab\ufb00".escape_default(), "ab\\ufb00".to_string());
1757 assert_eq!("\U0001d4ea\r".escape_default(), "\\U0001d4ea\\r".to_string());
1761 fn test_total_ord() {
1762 "1234".cmp(&("123")) == Greater;
1763 "123".cmp(&("1234")) == Less;
1764 "1234".cmp(&("1234")) == Equal;
1765 "12345555".cmp(&("123456")) == Less;
1766 "22".cmp(&("1234")) == Greater;
1770 fn test_char_range_at() {
1771 let data = "b¢€𤭢𤭢€¢b";
1772 assert_eq!('b', data.char_range_at(0).ch);
1773 assert_eq!('¢', data.char_range_at(1).ch);
1774 assert_eq!('€', data.char_range_at(3).ch);
1775 assert_eq!('𤭢', data.char_range_at(6).ch);
1776 assert_eq!('𤭢', data.char_range_at(10).ch);
1777 assert_eq!('€', data.char_range_at(14).ch);
1778 assert_eq!('¢', data.char_range_at(17).ch);
1779 assert_eq!('b', data.char_range_at(19).ch);
1783 fn test_char_range_at_reverse_underflow() {
1784 assert_eq!("abc".char_range_at_reverse(0).next, 0);
1788 fn test_iterator() {
1789 let s = "ศไทย中华Việt Nam";
1790 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1793 let mut it = s.chars();
1796 assert_eq!(c, v[pos]);
1799 assert_eq!(pos, v.len());
1803 fn test_rev_iterator() {
1804 let s = "ศไทย中华Việt Nam";
1805 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
1808 let mut it = s.chars().rev();
1811 assert_eq!(c, v[pos]);
1814 assert_eq!(pos, v.len());
1818 fn test_iterator_clone() {
1819 let s = "ศไทย中华Việt Nam";
1820 let mut it = s.chars();
1822 assert!(it.zip(it.clone()).all(|(x,y)| x == y));
1826 fn test_bytesator() {
1827 let s = "ศไทย中华Việt Nam";
1829 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1830 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1835 for b in s.bytes() {
1836 assert_eq!(b, v[pos]);
1842 fn test_bytes_revator() {
1843 let s = "ศไทย中华Việt Nam";
1845 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1846 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1849 let mut pos = v.len();
1851 for b in s.bytes().rev() {
1853 assert_eq!(b, v[pos]);
1858 fn test_char_indicesator() {
1859 let s = "ศไทย中华Việt Nam";
1860 let p = [0, 3, 6, 9, 12, 15, 18, 19, 20, 23, 24, 25, 26, 27];
1861 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1864 let mut it = s.char_indices();
1867 assert_eq!(c, (p[pos], v[pos]));
1870 assert_eq!(pos, v.len());
1871 assert_eq!(pos, p.len());
1875 fn test_char_indices_revator() {
1876 let s = "ศไทย中华Việt Nam";
1877 let p = [27, 26, 25, 24, 23, 20, 19, 18, 15, 12, 9, 6, 3, 0];
1878 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
1881 let mut it = s.char_indices().rev();
1884 assert_eq!(c, (p[pos], v[pos]));
1887 assert_eq!(pos, v.len());
1888 assert_eq!(pos, p.len());
1892 fn test_split_char_iterator() {
1893 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1895 let split: Vec<&str> = data.split(' ').collect();
1896 assert_eq!( split, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1898 let mut rsplit: Vec<&str> = data.split(' ').rev().collect();
1900 assert_eq!(rsplit, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1902 let split: Vec<&str> = data.split(|c: char| c == ' ').collect();
1903 assert_eq!( split, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1905 let mut rsplit: Vec<&str> = data.split(|c: char| c == ' ').rev().collect();
1907 assert_eq!(rsplit, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1910 let split: Vec<&str> = data.split('ä').collect();
1911 assert_eq!( split, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1913 let mut rsplit: Vec<&str> = data.split('ä').rev().collect();
1915 assert_eq!(rsplit, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1917 let split: Vec<&str> = data.split(|c: char| c == 'ä').collect();
1918 assert_eq!( split, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1920 let mut rsplit: Vec<&str> = data.split(|c: char| c == 'ä').rev().collect();
1922 assert_eq!(rsplit, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1926 fn test_splitn_char_iterator() {
1927 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1929 let split: Vec<&str> = data.splitn(' ', 3).collect();
1930 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
1932 let split: Vec<&str> = data.splitn(|c: char| c == ' ', 3).collect();
1933 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
1936 let split: Vec<&str> = data.splitn('ä', 3).collect();
1937 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
1939 let split: Vec<&str> = data.splitn(|c: char| c == 'ä', 3).collect();
1940 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
1944 fn test_rsplitn_char_iterator() {
1945 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1947 let mut split: Vec<&str> = data.rsplitn(' ', 3).collect();
1949 assert_eq!(split, vec!["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]);
1951 let mut split: Vec<&str> = data.rsplitn(|c: char| c == ' ', 3).collect();
1953 assert_eq!(split, vec!["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]);
1956 let mut split: Vec<&str> = data.rsplitn('ä', 3).collect();
1958 assert_eq!(split, vec!["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]);
1960 let mut split: Vec<&str> = data.rsplitn(|c: char| c == 'ä', 3).collect();
1962 assert_eq!(split, vec!["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]);
1966 fn test_split_char_iterator_no_trailing() {
1967 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1969 let split: Vec<&str> = data.split('\n').collect();
1970 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
1972 let split: Vec<&str> = data.split_terminator('\n').collect();
1973 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
1977 fn test_rev_split_char_iterator_no_trailing() {
1978 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1980 let mut split: Vec<&str> = data.split('\n').rev().collect();
1982 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
1984 let mut split: Vec<&str> = data.split_terminator('\n').rev().collect();
1986 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
1991 let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n";
1992 let words: Vec<&str> = data.words().collect();
1993 assert_eq!(words, vec!["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
1997 fn test_nfd_chars() {
1998 assert_eq!("abc".nfd_chars().collect::<String>(), "abc".to_string());
1999 assert_eq!("\u1e0b\u01c4".nfd_chars().collect::<String>(), "d\u0307\u01c4".to_string());
2000 assert_eq!("\u2026".nfd_chars().collect::<String>(), "\u2026".to_string());
2001 assert_eq!("\u2126".nfd_chars().collect::<String>(), "\u03a9".to_string());
2002 assert_eq!("\u1e0b\u0323".nfd_chars().collect::<String>(), "d\u0323\u0307".to_string());
2003 assert_eq!("\u1e0d\u0307".nfd_chars().collect::<String>(), "d\u0323\u0307".to_string());
2004 assert_eq!("a\u0301".nfd_chars().collect::<String>(), "a\u0301".to_string());
2005 assert_eq!("\u0301a".nfd_chars().collect::<String>(), "\u0301a".to_string());
2006 assert_eq!("\ud4db".nfd_chars().collect::<String>(), "\u1111\u1171\u11b6".to_string());
2007 assert_eq!("\uac1c".nfd_chars().collect::<String>(), "\u1100\u1162".to_string());
2011 fn test_nfkd_chars() {
2012 assert_eq!("abc".nfkd_chars().collect::<String>(), "abc".to_string());
2013 assert_eq!("\u1e0b\u01c4".nfkd_chars().collect::<String>(), "d\u0307DZ\u030c".to_string());
2014 assert_eq!("\u2026".nfkd_chars().collect::<String>(), "...".to_string());
2015 assert_eq!("\u2126".nfkd_chars().collect::<String>(), "\u03a9".to_string());
2016 assert_eq!("\u1e0b\u0323".nfkd_chars().collect::<String>(), "d\u0323\u0307".to_string());
2017 assert_eq!("\u1e0d\u0307".nfkd_chars().collect::<String>(), "d\u0323\u0307".to_string());
2018 assert_eq!("a\u0301".nfkd_chars().collect::<String>(), "a\u0301".to_string());
2019 assert_eq!("\u0301a".nfkd_chars().collect::<String>(), "\u0301a".to_string());
2020 assert_eq!("\ud4db".nfkd_chars().collect::<String>(), "\u1111\u1171\u11b6".to_string());
2021 assert_eq!("\uac1c".nfkd_chars().collect::<String>(), "\u1100\u1162".to_string());
2026 let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
2027 let lines: Vec<&str> = data.lines().collect();
2028 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2030 let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
2031 let lines: Vec<&str> = data.lines().collect();
2032 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2036 fn test_split_strator() {
2037 fn t(s: &str, sep: &str, u: &[&str]) {
2038 let v: Vec<&str> = s.split_str(sep).collect();
2039 assert_eq!(v.as_slice(), u.as_slice());
2041 t("--1233345--", "12345", ["--1233345--"]);
2042 t("abc::hello::there", "::", ["abc", "hello", "there"]);
2043 t("::hello::there", "::", ["", "hello", "there"]);
2044 t("hello::there::", "::", ["hello", "there", ""]);
2045 t("::hello::there::", "::", ["", "hello", "there", ""]);
2046 t("ประเทศไทย中华Việt Nam", "中华", ["ประเทศไทย", "Việt Nam"]);
2047 t("zzXXXzzYYYzz", "zz", ["", "XXX", "YYY", ""]);
2048 t("zzXXXzYYYz", "XXX", ["zz", "zYYYz"]);
2049 t(".XXX.YYY.", ".", ["", "XXX", "YYY", ""]);
2051 t("zz", "zz", ["",""]);
2052 t("ok", "z", ["ok"]);
2053 t("zzz", "zz", ["","z"]);
2054 t("zzzzz", "zz", ["","","z"]);
2058 fn test_str_default() {
2059 use std::default::Default;
2060 fn t<S: Default + Str>() {
2061 let s: S = Default::default();
2062 assert_eq!(s.as_slice(), "");
2070 fn test_str_container() {
2071 fn sum_len<S: Collection>(v: &[S]) -> uint {
2072 v.iter().map(|x| x.len()).sum()
2075 let s = "01234".to_string();
2076 assert_eq!(5, sum_len(["012", "", "34"]));
2077 assert_eq!(5, sum_len(["01".to_string(), "2".to_string(),
2078 "34".to_string(), "".to_string()]));
2079 assert_eq!(5, sum_len([s.as_slice()]));
2083 fn test_str_from_utf8() {
2085 assert_eq!(from_utf8(xs), Some("hello"));
2087 let xs = "ศไทย中华Việt Nam".as_bytes();
2088 assert_eq!(from_utf8(xs), Some("ศไทย中华Việt Nam"));
2090 let xs = b"hello\xFF";
2091 assert_eq!(from_utf8(xs), None);
2095 fn test_str_from_utf8_owned() {
2096 let xs = Vec::from_slice(b"hello");
2097 assert_eq!(from_utf8_owned(xs), Ok("hello".to_string()));
2099 let xs = Vec::from_slice("ศไทย中华Việt Nam".as_bytes());
2100 assert_eq!(from_utf8_owned(xs), Ok("ศไทย中华Việt Nam".to_string()));
2102 let xs = Vec::from_slice(b"hello\xFF");
2103 assert_eq!(from_utf8_owned(xs),
2104 Err(Vec::from_slice(b"hello\xFF")));
2108 fn test_str_from_utf8_lossy() {
2110 assert_eq!(from_utf8_lossy(xs), Slice("hello"));
2112 let xs = "ศไทย中华Việt Nam".as_bytes();
2113 assert_eq!(from_utf8_lossy(xs), Slice("ศไทย中华Việt Nam"));
2115 let xs = b"Hello\xC2 There\xFF Goodbye";
2116 assert_eq!(from_utf8_lossy(xs), Owned("Hello\uFFFD There\uFFFD Goodbye".to_string()));
2118 let xs = b"Hello\xC0\x80 There\xE6\x83 Goodbye";
2119 assert_eq!(from_utf8_lossy(xs), Owned("Hello\uFFFD\uFFFD There\uFFFD Goodbye".to_string()));
2121 let xs = b"\xF5foo\xF5\x80bar";
2122 assert_eq!(from_utf8_lossy(xs), Owned("\uFFFDfoo\uFFFD\uFFFDbar".to_string()));
2124 let xs = b"\xF1foo\xF1\x80bar\xF1\x80\x80baz";
2125 assert_eq!(from_utf8_lossy(xs), Owned("\uFFFDfoo\uFFFDbar\uFFFDbaz".to_string()));
2127 let xs = b"\xF4foo\xF4\x80bar\xF4\xBFbaz";
2128 assert_eq!(from_utf8_lossy(xs), Owned("\uFFFDfoo\uFFFDbar\uFFFD\uFFFDbaz".to_string()));
2130 let xs = b"\xF0\x80\x80\x80foo\xF0\x90\x80\x80bar";
2131 assert_eq!(from_utf8_lossy(xs), Owned("\uFFFD\uFFFD\uFFFD\uFFFD\
2132 foo\U00010000bar".to_string()));
2135 let xs = b"\xED\xA0\x80foo\xED\xBF\xBFbar";
2136 assert_eq!(from_utf8_lossy(xs), Owned("\uFFFD\uFFFD\uFFFDfoo\
2137 \uFFFD\uFFFD\uFFFDbar".to_string()));
2141 fn test_from_str() {
2142 let owned: Option<::std::string::String> = from_str("string");
2143 assert_eq!(owned.as_ref().map(|s| s.as_slice()), Some("string"));
2147 fn test_maybe_owned_traits() {
2148 let s = Slice("abcde");
2149 assert_eq!(s.len(), 5);
2150 assert_eq!(s.as_slice(), "abcde");
2151 assert_eq!(s.to_str().as_slice(), "abcde");
2152 assert_eq!(format!("{}", s).as_slice(), "abcde");
2153 assert!(s.lt(&Owned("bcdef".to_string())));
2154 assert_eq!(Slice(""), Default::default());
2156 let o = Owned("abcde".to_string());
2157 assert_eq!(o.len(), 5);
2158 assert_eq!(o.as_slice(), "abcde");
2159 assert_eq!(o.to_str().as_slice(), "abcde");
2160 assert_eq!(format!("{}", o).as_slice(), "abcde");
2161 assert!(o.lt(&Slice("bcdef")));
2162 assert_eq!(Owned("".to_string()), Default::default());
2164 assert!(s.cmp(&o) == Equal);
2165 assert!(s.equiv(&o));
2167 assert!(o.cmp(&s) == Equal);
2168 assert!(o.equiv(&s));
2172 fn test_maybe_owned_methods() {
2173 let s = Slice("abcde");
2174 assert!(s.is_slice());
2175 assert!(!s.is_owned());
2177 let o = Owned("abcde".to_string());
2178 assert!(!o.is_slice());
2179 assert!(o.is_owned());
2183 fn test_maybe_owned_clone() {
2184 assert_eq!(Owned("abcde".to_string()), Slice("abcde").clone());
2185 assert_eq!(Owned("abcde".to_string()), Owned("abcde".to_string()).clone());
2186 assert_eq!(Slice("abcde"), Slice("abcde").clone());
2187 assert_eq!(Slice("abcde"), Owned("abcde".to_string()).clone());
2191 fn test_maybe_owned_into_string() {
2192 assert_eq!(Slice("abcde").into_string(), "abcde".to_string());
2193 assert_eq!(Owned("abcde".to_string()).into_string(), "abcde".to_string());
2197 fn test_into_maybe_owned() {
2198 assert_eq!("abcde".into_maybe_owned(), Slice("abcde"));
2199 assert_eq!(("abcde".to_string()).into_maybe_owned(), Slice("abcde"));
2200 assert_eq!("abcde".into_maybe_owned(), Owned("abcde".to_string()));
2201 assert_eq!(("abcde".to_string()).into_maybe_owned(), Owned("abcde".to_string()));
2209 use std::prelude::*;
2212 fn char_iterator(b: &mut Bencher) {
2213 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2214 let len = s.char_len();
2216 b.iter(|| assert_eq!(s.chars().count(), len));
2220 fn char_iterator_ascii(b: &mut Bencher) {
2221 let s = "Mary had a little lamb, Little lamb
2222 Mary had a little lamb, Little lamb
2223 Mary had a little lamb, Little lamb
2224 Mary had a little lamb, Little lamb
2225 Mary had a little lamb, Little lamb
2226 Mary had a little lamb, Little lamb";
2227 let len = s.char_len();
2229 b.iter(|| assert_eq!(s.chars().count(), len));
2233 fn char_iterator_rev(b: &mut Bencher) {
2234 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2235 let len = s.char_len();
2237 b.iter(|| assert_eq!(s.chars().rev().count(), len));
2241 fn char_indicesator(b: &mut Bencher) {
2242 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2243 let len = s.char_len();
2245 b.iter(|| assert_eq!(s.char_indices().count(), len));
2249 fn char_indicesator_rev(b: &mut Bencher) {
2250 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2251 let len = s.char_len();
2253 b.iter(|| assert_eq!(s.char_indices().rev().count(), len));
2257 fn split_unicode_ascii(b: &mut Bencher) {
2258 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2260 b.iter(|| assert_eq!(s.split('V').count(), 3));
2264 fn split_unicode_not_ascii(b: &mut Bencher) {
2265 struct NotAscii(char);
2266 impl CharEq for NotAscii {
2267 fn matches(&mut self, c: char) -> bool {
2268 let NotAscii(cc) = *self;
2271 fn only_ascii(&self) -> bool { false }
2273 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2275 b.iter(|| assert_eq!(s.split(NotAscii('V')).count(), 3));
2280 fn split_ascii(b: &mut Bencher) {
2281 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2282 let len = s.split(' ').count();
2284 b.iter(|| assert_eq!(s.split(' ').count(), len));
2288 fn split_not_ascii(b: &mut Bencher) {
2289 struct NotAscii(char);
2290 impl CharEq for NotAscii {
2292 fn matches(&mut self, c: char) -> bool {
2293 let NotAscii(cc) = *self;
2296 fn only_ascii(&self) -> bool { false }
2298 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2299 let len = s.split(' ').count();
2301 b.iter(|| assert_eq!(s.split(NotAscii(' ')).count(), len));
2305 fn split_extern_fn(b: &mut Bencher) {
2306 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2307 let len = s.split(' ').count();
2308 fn pred(c: char) -> bool { c == ' ' }
2310 b.iter(|| assert_eq!(s.split(pred).count(), len));
2314 fn split_closure(b: &mut Bencher) {
2315 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2316 let len = s.split(' ').count();
2318 b.iter(|| assert_eq!(s.split(|c: char| c == ' ').count(), len));
2322 fn split_slice(b: &mut Bencher) {
2323 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2324 let len = s.split(' ').count();
2326 b.iter(|| assert_eq!(s.split(&[' ']).count(), len));
2330 fn is_utf8_100_ascii(b: &mut Bencher) {
2332 let s = b"Hello there, the quick brown fox jumped over the lazy dog! \
2333 Lorem ipsum dolor sit amet, consectetur. ";
2335 assert_eq!(100, s.len());
2342 fn is_utf8_100_multibyte(b: &mut Bencher) {
2343 let s = "𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰".as_bytes();
2344 assert_eq!(100, s.len());
2351 fn from_utf8_lossy_100_ascii(b: &mut Bencher) {
2352 let s = b"Hello there, the quick brown fox jumped over the lazy dog! \
2353 Lorem ipsum dolor sit amet, consectetur. ";
2355 assert_eq!(100, s.len());
2357 let _ = from_utf8_lossy(s);
2362 fn from_utf8_lossy_100_multibyte(b: &mut Bencher) {
2363 let s = "𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰".as_bytes();
2364 assert_eq!(100, s.len());
2366 let _ = from_utf8_lossy(s);
2371 fn from_utf8_lossy_invalid(b: &mut Bencher) {
2372 let s = b"Hello\xC0\x80 There\xE6\x83 Goodbye";
2374 let _ = from_utf8_lossy(s);
2379 fn from_utf8_lossy_100_invalid(b: &mut Bencher) {
2380 let s = Vec::from_elem(100, 0xF5u8);
2382 let _ = from_utf8_lossy(s.as_slice());
2387 fn bench_connect(b: &mut Bencher) {
2388 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2390 let v = [s, s, s, s, s, s, s, s, s, s];
2392 assert_eq!(v.connect(sep).len(), s.len() * 10 + sep.len() * 9);
2397 fn bench_contains_short_short(b: &mut Bencher) {
2398 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2402 assert!(haystack.contains(needle));
2407 fn bench_contains_short_long(b: &mut Bencher) {
2409 Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \
2410 ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \
2411 eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \
2412 sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \
2413 tempus vel, gravida nec quam.
2415 In est dui, tincidunt sed tempus interdum, adipiscing laoreet ante. Etiam tempor, tellus quis \
2416 sagittis interdum, nulla purus mattis sem, quis auctor erat odio ac tellus. In nec nunc sit amet \
2417 diam volutpat molestie at sed ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan \
2418 lorem ac dignissim placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat. In vel \
2419 eleifend felis. Sed suscipit nulla lorem, sed mollis est sollicitudin et. Nam fermentum egestas \
2420 interdum. Curabitur ut nisi justo.
2422 Sed sollicitudin ipsum tellus, ut condimentum leo eleifend nec. Cras ut velit ante. Phasellus nec \
2423 mollis odio. Mauris molestie erat in arcu mattis, at aliquet dolor vehicula. Quisque malesuada \
2424 lectus sit amet nisi pretium, a condimentum ipsum porta. Morbi at dapibus diam. Praesent egestas \
2425 est sed risus elementum, eu rutrum metus ultrices. Etiam fermentum consectetur magna, id rutrum \
2426 felis accumsan a. Aliquam ut pellentesque libero. Sed mi nulla, lobortis eu tortor id, suscipit \
2427 ultricies neque. Morbi iaculis sit amet risus at iaculis. Praesent eget ligula quis turpis \
2428 feugiat suscipit vel non arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. \
2429 Aliquam sit amet placerat lorem.
2431 Cras a lacus vel ante posuere elementum. Nunc est leo, bibendum ut facilisis vel, bibendum at \
2432 mauris. Nullam adipiscing diam vel odio ornare, luctus adipiscing mi luctus. Nulla facilisi. \
2433 Mauris adipiscing bibendum neque, quis adipiscing lectus tempus et. Sed feugiat erat et nisl \
2434 lobortis pharetra. Donec vitae erat enim. Nullam sit amet felis et quam lacinia tincidunt. Aliquam \
2435 suscipit dapibus urna. Sed volutpat urna in magna pulvinar volutpat. Phasellus nec tellus ac diam \
2438 Nam lectus enim, dapibus non nisi tempor, consectetur convallis massa. Maecenas eleifend dictum \
2439 feugiat. Etiam quis mauris vel risus luctus mattis a a nunc. Nullam orci quam, imperdiet id \
2440 vehicula in, porttitor ut nibh. Duis sagittis adipiscing nisl vitae congue. Donec mollis risus eu \
2441 leo suscipit, varius porttitor nulla porta. Pellentesque ut sem nec nisi euismod vehicula. Nulla \
2442 malesuada sollicitudin quam eu fermentum.";
2443 let needle = "english";
2446 assert!(!haystack.contains(needle));
2451 fn bench_contains_bad_naive(b: &mut Bencher) {
2452 let haystack = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
2453 let needle = "aaaaaaaab";
2456 assert!(!haystack.contains(needle));
2461 fn bench_contains_equal(b: &mut Bencher) {
2462 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2463 let needle = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2466 assert!(haystack.contains(needle));