1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
13 Unicode string manipulation (`str` type)
17 Rust's string type is one of the core primitive types of the language. While
18 represented by the name `str`, the name `str` is not actually a valid type in
19 Rust. Each string must also be decorated with a pointer. `String` is used
20 for an owned string, so there is only one commonly-used `str` type in Rust:
23 `&str` is the borrowed string type. This type of string can only be created
24 from other strings, unless it is a static string (see below). As the word
25 "borrowed" implies, this type of string is owned elsewhere, and this string
26 cannot be moved out of.
28 As an example, here's some code that uses a string.
32 let borrowed_string = "This string is borrowed with the 'static lifetime";
36 From the example above, you can see that Rust's string literals have the
37 `'static` lifetime. This is akin to C's concept of a static string.
39 String literals are allocated statically in the rodata of the
40 executable/library. The string then has the type `&'static str` meaning that
41 the string is valid for the `'static` lifetime, otherwise known as the
42 lifetime of the entire program. As can be inferred from the type, these static
43 strings are not mutable.
47 Many languages have immutable strings by default, and Rust has a particular
48 flavor on this idea. As with the rest of Rust types, strings are immutable by
49 default. If a string is declared as `mut`, however, it may be mutated. This
50 works the same way as the rest of Rust's type system in the sense that if
51 there's a mutable reference to a string, there may only be one mutable reference
52 to that string. With these guarantees, strings can easily transition between
53 being mutable/immutable with the same benefits of having mutable strings in
58 Rust's string type, `str`, is a sequence of unicode codepoints encoded as a
59 stream of UTF-8 bytes. All safely-created strings are guaranteed to be validly
60 encoded UTF-8 sequences. Additionally, strings are not null-terminated
61 and can contain null codepoints.
63 The actual representation of strings have direct mappings to vectors: `&str`
64 is the same as `&[u8]`.
68 #![doc(primitive = "str")]
73 use core::default::Default;
76 use core::iter::AdditiveIterator;
84 pub use core::str::{from_utf8, CharEq, Chars, CharOffsets};
85 pub use core::str::{Bytes, CharSplits};
86 pub use core::str::{CharSplitsN, Words, AnyLines, MatchIndices, StrSplits};
87 pub use core::str::{eq_slice, is_utf8, is_utf16, Utf16Items};
88 pub use core::str::{Utf16Item, ScalarValue, LoneSurrogate, utf16_items};
89 pub use core::str::{truncate_utf16_at_nul, utf8_char_width, CharRange};
90 pub use core::str::{Str, StrSlice};
93 Section: Creating a string
96 /// Consumes a vector of bytes to create a new utf-8 string.
98 /// Returns `Err` with the original vector if the vector contains invalid
100 pub fn from_utf8_owned(vv: Vec<u8>) -> Result<String, Vec<u8>> {
101 String::from_utf8(vv)
104 /// Convert a byte to a UTF-8 string
108 /// Fails if invalid UTF-8
109 pub fn from_byte(b: u8) -> String {
111 String::from_char(1, b as char)
114 /// Convert a char to a string
115 pub fn from_char(ch: char) -> String {
116 let mut buf = String::new();
121 /// Convert a vector of chars to a string
122 pub fn from_chars(chs: &[char]) -> String {
123 chs.iter().map(|c| *c).collect()
126 /// Methods for vectors of strings
127 pub trait StrVector {
128 /// Concatenate a vector of strings.
129 fn concat(&self) -> String;
131 /// Concatenate a vector of strings, placing a given separator between each.
132 fn connect(&self, sep: &str) -> String;
135 impl<'a, S: Str> StrVector for &'a [S] {
136 fn concat(&self) -> String {
138 return String::new();
141 // `len` calculation may overflow but push_str but will check boundaries
142 let len = self.iter().map(|s| s.as_slice().len()).sum();
144 let mut result = String::with_capacity(len);
146 for s in self.iter() {
147 result.push_str(s.as_slice())
153 fn connect(&self, sep: &str) -> String {
155 return String::new();
160 return self.concat();
163 // this is wrong without the guarantee that `self` is non-empty
164 // `len` calculation may overflow but push_str but will check boundaries
165 let len = sep.len() * (self.len() - 1)
166 + self.iter().map(|s| s.as_slice().len()).sum();
167 let mut result = String::with_capacity(len);
168 let mut first = true;
170 for s in self.iter() {
174 result.push_str(sep);
176 result.push_str(s.as_slice());
182 impl<'a, S: Str> StrVector for Vec<S> {
184 fn concat(&self) -> String {
185 self.as_slice().concat()
189 fn connect(&self, sep: &str) -> String {
190 self.as_slice().connect(sep)
198 // Helper functions used for Unicode normalization
199 fn canonical_sort(comb: &mut [(char, u8)]) {
200 let len = comb.len();
201 for i in range(0, len) {
202 let mut swapped = false;
203 for j in range(1, len-i) {
204 let class_a = *comb[j-1].ref1();
205 let class_b = *comb[j].ref1();
206 if class_a != 0 && class_b != 0 && class_a > class_b {
211 if !swapped { break; }
216 enum DecompositionType {
221 /// External iterator for a string's decomposition's characters.
222 /// Use with the `std::iter` module.
224 pub struct Decompositions<'a> {
225 kind: DecompositionType,
227 buffer: Vec<(char, u8)>,
231 impl<'a> Iterator<char> for Decompositions<'a> {
233 fn next(&mut self) -> Option<char> {
234 use unicode::normalization::canonical_combining_class;
236 match self.buffer.as_slice().head() {
242 Some(&(c, _)) if self.sorted => {
246 _ => self.sorted = false
249 let decomposer = match self.kind {
250 Canonical => char::decompose_canonical,
251 Compatible => char::decompose_compatible
255 for ch in self.iter {
256 let buffer = &mut self.buffer;
257 let sorted = &mut self.sorted;
259 let class = canonical_combining_class(d);
260 if class == 0 && !*sorted {
261 canonical_sort(buffer.as_mut_slice());
264 buffer.push((d, class));
271 canonical_sort(self.buffer.as_mut_slice());
275 match self.buffer.shift() {
280 Some((c, _)) => Some(c),
285 fn size_hint(&self) -> (uint, Option<uint>) {
286 let (lower, _) = self.iter.size_hint();
291 /// Replace all occurrences of one string with another
295 /// * s - The string containing substrings to replace
296 /// * from - The string to replace
297 /// * to - The replacement string
301 /// The original string with all occurrences of `from` replaced with `to`
302 pub fn replace(s: &str, from: &str, to: &str) -> String {
303 let mut result = String::new();
304 let mut last_end = 0;
305 for (start, end) in s.match_indices(from) {
306 result.push_str(unsafe{raw::slice_bytes(s, last_end, start)});
310 result.push_str(unsafe{raw::slice_bytes(s, last_end, s.len())});
318 /// Decode a UTF-16 encoded vector `v` into a string, returning `None`
319 /// if `v` contains any invalid data.
327 /// let mut v = [0xD834, 0xDD1E, 0x006d, 0x0075,
328 /// 0x0073, 0x0069, 0x0063];
329 /// assert_eq!(str::from_utf16(v), Some("𝄞music".to_string()));
331 /// // 𝄞mu<invalid>ic
333 /// assert_eq!(str::from_utf16(v), None);
335 pub fn from_utf16(v: &[u16]) -> Option<String> {
336 let mut s = String::with_capacity(v.len() / 2);
337 for c in utf16_items(v) {
339 ScalarValue(c) => s.push_char(c),
340 LoneSurrogate(_) => return None
346 /// Decode a UTF-16 encoded vector `v` into a string, replacing
347 /// invalid data with the replacement character (U+FFFD).
353 /// // 𝄞mus<invalid>ic<invalid>
354 /// let v = [0xD834, 0xDD1E, 0x006d, 0x0075,
355 /// 0x0073, 0xDD1E, 0x0069, 0x0063,
358 /// assert_eq!(str::from_utf16_lossy(v),
359 /// "𝄞mus\uFFFDic\uFFFD".to_string());
361 pub fn from_utf16_lossy(v: &[u16]) -> String {
362 utf16_items(v).map(|c| c.to_char_lossy()).collect()
365 // Return the initial codepoint accumulator for the first byte.
366 // The first byte is special, only want bottom 5 bits for width 2, 4 bits
367 // for width 3, and 3 bits for width 4
368 macro_rules! utf8_first_byte(
369 ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as u32)
372 // return the value of $ch updated with continuation byte $byte
373 macro_rules! utf8_acc_cont_byte(
374 ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as u32)
377 static TAG_CONT_U8: u8 = 128u8;
379 /// Converts a vector of bytes to a new utf-8 string.
380 /// Any invalid utf-8 sequences are replaced with U+FFFD REPLACEMENT CHARACTER.
385 /// let input = b"Hello \xF0\x90\x80World";
386 /// let output = std::str::from_utf8_lossy(input);
387 /// assert_eq!(output.as_slice(), "Hello \uFFFDWorld");
389 pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> MaybeOwned<'a> {
391 return Slice(unsafe { mem::transmute(v) })
394 static REPLACEMENT: &'static [u8] = b"\xEF\xBF\xBD"; // U+FFFD in UTF-8
397 fn unsafe_get(xs: &[u8], i: uint) -> u8 {
398 unsafe { *xs.unsafe_ref(i) }
400 fn safe_get(xs: &[u8], i: uint, total: uint) -> u8 {
408 let mut res = String::with_capacity(total);
412 res.push_bytes(v.slice_to(i))
416 // subseqidx is the index of the first byte of the subsequence we're looking at.
417 // It's used to copy a bunch of contiguous good codepoints at once instead of copying
419 let mut subseqidx = 0;
423 let byte = unsafe_get(v, i);
426 macro_rules! error(() => ({
429 res.push_bytes(v.slice(subseqidx, i_));
432 res.push_bytes(REPLACEMENT);
437 // subseqidx handles this
439 let w = utf8_char_width(byte);
443 if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
450 match (byte, safe_get(v, i, total)) {
451 (0xE0 , 0xA0 .. 0xBF) => (),
452 (0xE1 .. 0xEC, 0x80 .. 0xBF) => (),
453 (0xED , 0x80 .. 0x9F) => (),
454 (0xEE .. 0xEF, 0x80 .. 0xBF) => (),
461 if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
468 match (byte, safe_get(v, i, total)) {
469 (0xF0 , 0x90 .. 0xBF) => (),
470 (0xF1 .. 0xF3, 0x80 .. 0xBF) => (),
471 (0xF4 , 0x80 .. 0x8F) => (),
478 if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
483 if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
496 if subseqidx < total {
498 res.push_bytes(v.slice(subseqidx, total))
501 Owned(res.into_string())
508 /// A `MaybeOwned` is a string that can hold either a `String` or a `&str`.
509 /// This can be useful as an optimization when an allocation is sometimes
510 /// needed but not always.
511 pub enum MaybeOwned<'a> {
512 /// A borrowed string
518 /// `SendStr` is a specialization of `MaybeOwned` to be sendable
519 pub type SendStr = MaybeOwned<'static>;
521 impl<'a> MaybeOwned<'a> {
522 /// Returns `true` if this `MaybeOwned` wraps an owned string
524 pub fn is_owned(&self) -> bool {
531 /// Returns `true` if this `MaybeOwned` wraps a borrowed string
533 pub fn is_slice(&self) -> bool {
541 /// Trait for moving into a `MaybeOwned`
542 pub trait IntoMaybeOwned<'a> {
543 /// Moves self into a `MaybeOwned`
544 fn into_maybe_owned(self) -> MaybeOwned<'a>;
547 impl<'a> IntoMaybeOwned<'a> for String {
549 fn into_maybe_owned(self) -> MaybeOwned<'a> {
554 impl<'a> IntoMaybeOwned<'a> for &'a str {
556 fn into_maybe_owned(self) -> MaybeOwned<'a> { Slice(self) }
559 impl<'a> IntoMaybeOwned<'a> for MaybeOwned<'a> {
561 fn into_maybe_owned(self) -> MaybeOwned<'a> { self }
564 impl<'a> PartialEq for MaybeOwned<'a> {
566 fn eq(&self, other: &MaybeOwned) -> bool {
567 self.as_slice() == other.as_slice()
571 impl<'a> Eq for MaybeOwned<'a> {}
573 impl<'a> PartialOrd for MaybeOwned<'a> {
575 fn lt(&self, other: &MaybeOwned) -> bool {
576 self.as_slice().lt(&other.as_slice())
580 impl<'a> Ord for MaybeOwned<'a> {
582 fn cmp(&self, other: &MaybeOwned) -> Ordering {
583 self.as_slice().cmp(&other.as_slice())
587 impl<'a, S: Str> Equiv<S> for MaybeOwned<'a> {
589 fn equiv(&self, other: &S) -> bool {
590 self.as_slice() == other.as_slice()
594 impl<'a> Str for MaybeOwned<'a> {
596 fn as_slice<'b>(&'b self) -> &'b str {
599 Owned(ref s) => s.as_slice()
604 impl<'a> StrAllocating for MaybeOwned<'a> {
606 fn into_string(self) -> String {
608 Slice(s) => s.to_string(),
614 impl<'a> Collection for MaybeOwned<'a> {
616 fn len(&self) -> uint { self.as_slice().len() }
619 impl<'a> Clone for MaybeOwned<'a> {
621 fn clone(&self) -> MaybeOwned<'a> {
623 Slice(s) => Slice(s),
624 Owned(ref s) => Owned(s.to_string())
629 impl<'a> Default for MaybeOwned<'a> {
631 fn default() -> MaybeOwned<'a> { Slice("") }
634 impl<'a, H: hash::Writer> hash::Hash<H> for MaybeOwned<'a> {
636 fn hash(&self, hasher: &mut H) {
637 self.as_slice().hash(hasher)
641 impl<'a> fmt::Show for MaybeOwned<'a> {
643 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
645 Slice(ref s) => s.fmt(f),
646 Owned(ref s) => s.fmt(f)
651 /// Unsafe operations
653 use core::prelude::*;
655 use core::raw::Slice;
660 pub use core::str::raw::{from_utf8, c_str_to_static_slice, slice_bytes};
661 pub use core::str::raw::{slice_unchecked};
663 /// Create a Rust string from a *u8 buffer of the given length
664 pub unsafe fn from_buf_len(buf: *u8, len: uint) -> String {
665 let mut result = String::new();
666 result.push_bytes(mem::transmute(Slice {
673 /// Create a Rust string from a null-terminated C string
674 pub unsafe fn from_c_str(c_string: *i8) -> String {
675 let mut buf = String::new();
677 while *c_string.offset(len) != 0 {
680 buf.push_bytes(mem::transmute(Slice {
687 /// Converts an owned vector of bytes to a new owned string. This assumes
688 /// that the utf-8-ness of the vector has already been validated
690 pub unsafe fn from_utf8_owned(v: Vec<u8>) -> String {
694 /// Converts a byte to a string.
695 pub unsafe fn from_byte(u: u8) -> String {
696 from_utf8_owned(vec![u])
699 /// Sets the length of a string
701 /// This will explicitly set the size of the string, without actually
702 /// modifying its buffers, so it is up to the caller to ensure that
703 /// the string is actually the specified size.
705 fn test_from_buf_len() {
706 use slice::ImmutableVector;
707 use str::StrAllocating;
710 let a = vec![65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8];
712 let c = from_buf_len(b, 3u);
713 assert_eq!(c, "AAA".to_string());
719 Section: Trait implementations
722 /// Any string that can be represented as a slice
723 pub trait StrAllocating: Str {
724 /// Convert `self` into a `String`, not making a copy if possible.
725 fn into_string(self) -> String;
727 /// Convert `self` into a `String`.
729 fn to_string(&self) -> String {
730 String::from_str(self.as_slice())
733 #[allow(missing_doc)]
734 #[deprecated = "replaced by .into_string()"]
735 fn into_owned(self) -> String {
739 /// Escape each char in `s` with `char::escape_default`.
740 fn escape_default(&self) -> String {
741 let me = self.as_slice();
742 let mut out = String::with_capacity(me.len());
743 for c in me.chars() {
744 c.escape_default(|c| out.push_char(c));
749 /// Escape each char in `s` with `char::escape_unicode`.
750 fn escape_unicode(&self) -> String {
751 let me = self.as_slice();
752 let mut out = String::with_capacity(me.len());
753 for c in me.chars() {
754 c.escape_unicode(|c| out.push_char(c));
759 /// Replace all occurrences of one string with another.
763 /// * `from` - The string to replace
764 /// * `to` - The replacement string
768 /// The original string with all occurrences of `from` replaced with `to`.
773 /// let s = "Do you know the muffin man,
774 /// The muffin man, the muffin man, ...".to_string();
776 /// assert_eq!(s.replace("muffin man", "little lamb"),
777 /// "Do you know the little lamb,
778 /// The little lamb, the little lamb, ...".to_string());
780 /// // not found, so no change.
781 /// assert_eq!(s.replace("cookie monster", "little lamb"), s);
783 fn replace(&self, from: &str, to: &str) -> String {
784 let me = self.as_slice();
785 let mut result = String::new();
786 let mut last_end = 0;
787 for (start, end) in me.match_indices(from) {
788 result.push_str(unsafe{raw::slice_bytes(me, last_end, start)});
792 result.push_str(unsafe{raw::slice_bytes(me, last_end, me.len())});
796 #[allow(missing_doc)]
797 #[deprecated = "obsolete, use `to_string`"]
799 fn to_owned(&self) -> String {
801 mem::transmute(Vec::from_slice(self.as_slice().as_bytes()))
805 /// Converts to a vector of `u16` encoded as UTF-16.
806 fn to_utf16(&self) -> Vec<u16> {
807 let me = self.as_slice();
808 let mut u = Vec::new();
809 for ch in me.chars() {
810 let mut buf = [0u16, ..2];
811 let n = ch.encode_utf16(buf /* as mut slice! */);
812 u.push_all(buf.slice_to(n));
817 /// Given a string, make a new string with repeated copies of it.
818 fn repeat(&self, nn: uint) -> String {
819 let me = self.as_slice();
820 let mut ret = String::with_capacity(nn * me.len());
821 for _ in range(0, nn) {
827 /// Levenshtein Distance between two strings.
828 fn lev_distance(&self, t: &str) -> uint {
829 let me = self.as_slice();
833 if slen == 0 { return tlen; }
834 if tlen == 0 { return slen; }
836 let mut dcol = Vec::from_fn(tlen + 1, |x| x);
838 for (i, sc) in me.chars().enumerate() {
841 *dcol.get_mut(0) = current + 1;
843 for (j, tc) in t.chars().enumerate() {
845 let next = *dcol.get(j + 1);
848 *dcol.get_mut(j + 1) = current;
850 *dcol.get_mut(j + 1) = cmp::min(current, next);
851 *dcol.get_mut(j + 1) = cmp::min(*dcol.get(j + 1),
859 return *dcol.get(tlen);
862 /// An Iterator over the string in Unicode Normalization Form D
863 /// (canonical decomposition).
865 fn nfd_chars<'a>(&'a self) -> Decompositions<'a> {
867 iter: self.as_slice().chars(),
874 /// An Iterator over the string in Unicode Normalization Form KD
875 /// (compatibility decomposition).
877 fn nfkd_chars<'a>(&'a self) -> Decompositions<'a> {
879 iter: self.as_slice().chars(),
887 impl<'a> StrAllocating for &'a str {
889 fn into_string(self) -> String {
894 /// Methods for owned strings
896 /// Consumes the string, returning the underlying byte buffer.
898 /// The buffer does not have a null terminator.
899 fn into_bytes(self) -> Vec<u8>;
901 /// Pushes the given string onto this string, returning the concatenation of the two strings.
902 fn append(self, rhs: &str) -> String;
905 impl OwnedStr for String {
907 fn into_bytes(self) -> Vec<u8> {
908 unsafe { mem::transmute(self) }
912 fn append(mut self, rhs: &str) -> String {
921 use std::iter::AdditiveIterator;
922 use std::default::Default;
930 assert!((eq_slice("foobar".slice(0, 3), "foo")));
931 assert!((eq_slice("barfoo".slice(3, 6), "foo")));
932 assert!((!eq_slice("foo1", "foo2")));
938 assert!("" <= "foo");
939 assert!("foo" <= "foo");
940 assert!("foo" != "bar");
945 assert_eq!("".len(), 0u);
946 assert_eq!("hello world".len(), 11u);
947 assert_eq!("\x63".len(), 1u);
948 assert_eq!("\xa2".len(), 2u);
949 assert_eq!("\u03c0".len(), 2u);
950 assert_eq!("\u2620".len(), 3u);
951 assert_eq!("\U0001d11e".len(), 4u);
953 assert_eq!("".char_len(), 0u);
954 assert_eq!("hello world".char_len(), 11u);
955 assert_eq!("\x63".char_len(), 1u);
956 assert_eq!("\xa2".char_len(), 1u);
957 assert_eq!("\u03c0".char_len(), 1u);
958 assert_eq!("\u2620".char_len(), 1u);
959 assert_eq!("\U0001d11e".char_len(), 1u);
960 assert_eq!("ประเทศไทย中华Việt Nam".char_len(), 19u);
965 assert_eq!("hello".find('l'), Some(2u));
966 assert_eq!("hello".find(|c:char| c == 'o'), Some(4u));
967 assert!("hello".find('x').is_none());
968 assert!("hello".find(|c:char| c == 'x').is_none());
969 assert_eq!("ประเทศไทย中华Việt Nam".find('华'), Some(30u));
970 assert_eq!("ประเทศไทย中华Việt Nam".find(|c: char| c == '华'), Some(30u));
975 assert_eq!("hello".rfind('l'), Some(3u));
976 assert_eq!("hello".rfind(|c:char| c == 'o'), Some(4u));
977 assert!("hello".rfind('x').is_none());
978 assert!("hello".rfind(|c:char| c == 'x').is_none());
979 assert_eq!("ประเทศไทย中华Việt Nam".rfind('华'), Some(30u));
980 assert_eq!("ประเทศไทย中华Việt Nam".rfind(|c: char| c == '华'), Some(30u));
985 let empty = "".to_string();
986 let s: String = empty.as_slice().chars().collect();
987 assert_eq!(empty, s);
988 let data = "ประเทศไทย中".to_string();
989 let s: String = data.as_slice().chars().collect();
994 fn test_into_bytes() {
995 let data = "asdf".to_string();
996 let buf = data.into_bytes();
997 assert_eq!(b"asdf", buf.as_slice());
1001 fn test_find_str() {
1003 assert_eq!("".find_str(""), Some(0u));
1004 assert!("banana".find_str("apple pie").is_none());
1006 let data = "abcabc";
1007 assert_eq!(data.slice(0u, 6u).find_str("ab"), Some(0u));
1008 assert_eq!(data.slice(2u, 6u).find_str("ab"), Some(3u - 2u));
1009 assert!(data.slice(2u, 4u).find_str("ab").is_none());
1011 let string = "ประเทศไทย中华Việt Nam";
1012 let mut data = string.to_string();
1013 data.push_str(string);
1014 assert!(data.as_slice().find_str("ไท华").is_none());
1015 assert_eq!(data.as_slice().slice(0u, 43u).find_str(""), Some(0u));
1016 assert_eq!(data.as_slice().slice(6u, 43u).find_str(""), Some(6u - 6u));
1018 assert_eq!(data.as_slice().slice(0u, 43u).find_str("ประ"), Some( 0u));
1019 assert_eq!(data.as_slice().slice(0u, 43u).find_str("ทศไ"), Some(12u));
1020 assert_eq!(data.as_slice().slice(0u, 43u).find_str("ย中"), Some(24u));
1021 assert_eq!(data.as_slice().slice(0u, 43u).find_str("iệt"), Some(34u));
1022 assert_eq!(data.as_slice().slice(0u, 43u).find_str("Nam"), Some(40u));
1024 assert_eq!(data.as_slice().slice(43u, 86u).find_str("ประ"), Some(43u - 43u));
1025 assert_eq!(data.as_slice().slice(43u, 86u).find_str("ทศไ"), Some(55u - 43u));
1026 assert_eq!(data.as_slice().slice(43u, 86u).find_str("ย中"), Some(67u - 43u));
1027 assert_eq!(data.as_slice().slice(43u, 86u).find_str("iệt"), Some(77u - 43u));
1028 assert_eq!(data.as_slice().slice(43u, 86u).find_str("Nam"), Some(83u - 43u));
1032 fn test_slice_chars() {
1033 fn t(a: &str, b: &str, start: uint) {
1034 assert_eq!(a.slice_chars(start, start + b.char_len()), b);
1037 t("hello", "llo", 2);
1038 t("hello", "el", 1);
1041 assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".slice_chars(2, 8));
1046 fn t(v: &[String], s: &str) {
1047 assert_eq!(v.concat().as_slice(), s);
1049 t(["you".to_string(), "know".to_string(), "I'm".to_string(),
1050 "no".to_string(), "good".to_string()], "youknowI'mnogood");
1051 let v: &[String] = [];
1053 t(["hi".to_string()], "hi");
1058 fn t(v: &[String], sep: &str, s: &str) {
1059 assert_eq!(v.connect(sep).as_slice(), s);
1061 t(["you".to_string(), "know".to_string(), "I'm".to_string(),
1062 "no".to_string(), "good".to_string()],
1063 " ", "you know I'm no good");
1064 let v: &[String] = [];
1066 t(["hi".to_string()], " ", "hi");
1070 fn test_concat_slices() {
1071 fn t(v: &[&str], s: &str) {
1072 assert_eq!(v.concat().as_slice(), s);
1074 t(["you", "know", "I'm", "no", "good"], "youknowI'mnogood");
1075 let v: &[&str] = [];
1081 fn test_connect_slices() {
1082 fn t(v: &[&str], sep: &str, s: &str) {
1083 assert_eq!(v.connect(sep).as_slice(), s);
1085 t(["you", "know", "I'm", "no", "good"],
1086 " ", "you know I'm no good");
1088 t(["hi"], " ", "hi");
1093 assert_eq!("x".repeat(4), "xxxx".to_string());
1094 assert_eq!("hi".repeat(4), "hihihihi".to_string());
1095 assert_eq!("ไท华".repeat(3), "ไท华ไท华ไท华".to_string());
1096 assert_eq!("".repeat(4), "".to_string());
1097 assert_eq!("hi".repeat(0), "".to_string());
1101 fn test_unsafe_slice() {
1102 assert_eq!("ab", unsafe {raw::slice_bytes("abc", 0, 2)});
1103 assert_eq!("bc", unsafe {raw::slice_bytes("abc", 1, 3)});
1104 assert_eq!("", unsafe {raw::slice_bytes("abc", 1, 1)});
1105 fn a_million_letter_a() -> String {
1107 let mut rs = String::new();
1109 rs.push_str("aaaaaaaaaa");
1114 fn half_a_million_letter_a() -> String {
1116 let mut rs = String::new();
1118 rs.push_str("aaaaa");
1123 let letters = a_million_letter_a();
1124 assert!(half_a_million_letter_a() ==
1125 unsafe {raw::slice_bytes(letters.as_slice(),
1127 500000)}.to_string());
1131 fn test_starts_with() {
1132 assert!(("".starts_with("")));
1133 assert!(("abc".starts_with("")));
1134 assert!(("abc".starts_with("a")));
1135 assert!((!"a".starts_with("abc")));
1136 assert!((!"".starts_with("abc")));
1137 assert!((!"ödd".starts_with("-")));
1138 assert!(("ödd".starts_with("öd")));
1142 fn test_ends_with() {
1143 assert!(("".ends_with("")));
1144 assert!(("abc".ends_with("")));
1145 assert!(("abc".ends_with("c")));
1146 assert!((!"a".ends_with("abc")));
1147 assert!((!"".ends_with("abc")));
1148 assert!((!"ddö".ends_with("-")));
1149 assert!(("ddö".ends_with("dö")));
1153 fn test_is_empty() {
1154 assert!("".is_empty());
1155 assert!(!"a".is_empty());
1161 assert_eq!("".replace(a, "b"), "".to_string());
1162 assert_eq!("a".replace(a, "b"), "b".to_string());
1163 assert_eq!("ab".replace(a, "b"), "bb".to_string());
1165 assert!(" test test ".replace(test, "toast") ==
1166 " toast toast ".to_string());
1167 assert_eq!(" test test ".replace(test, ""), " ".to_string());
1171 fn test_replace_2a() {
1172 let data = "ประเทศไทย中华";
1173 let repl = "دولة الكويت";
1176 let a2 = "دولة الكويتทศไทย中华";
1177 assert_eq!(data.replace(a, repl).as_slice(), a2);
1181 fn test_replace_2b() {
1182 let data = "ประเทศไทย中华";
1183 let repl = "دولة الكويت";
1186 let b2 = "ปรدولة الكويتทศไทย中华";
1187 assert_eq!(data.replace(b, repl).as_slice(), b2);
1191 fn test_replace_2c() {
1192 let data = "ประเทศไทย中华";
1193 let repl = "دولة الكويت";
1196 let c2 = "ประเทศไทยدولة الكويت";
1197 assert_eq!(data.replace(c, repl).as_slice(), c2);
1201 fn test_replace_2d() {
1202 let data = "ประเทศไทย中华";
1203 let repl = "دولة الكويت";
1206 assert_eq!(data.replace(d, repl).as_slice(), data);
1211 assert_eq!("ab", "abc".slice(0, 2));
1212 assert_eq!("bc", "abc".slice(1, 3));
1213 assert_eq!("", "abc".slice(1, 1));
1214 assert_eq!("\u65e5", "\u65e5\u672c".slice(0, 3));
1216 let data = "ประเทศไทย中华";
1217 assert_eq!("ป", data.slice(0, 3));
1218 assert_eq!("ร", data.slice(3, 6));
1219 assert_eq!("", data.slice(3, 3));
1220 assert_eq!("华", data.slice(30, 33));
1222 fn a_million_letter_x() -> String {
1224 let mut rs = String::new();
1226 rs.push_str("华华华华华华华华华华");
1231 fn half_a_million_letter_x() -> String {
1233 let mut rs = String::new();
1235 rs.push_str("华华华华华");
1240 let letters = a_million_letter_x();
1241 assert!(half_a_million_letter_x() ==
1242 letters.as_slice().slice(0u, 3u * 500000u).to_string());
1247 let ss = "中华Việt Nam";
1249 assert_eq!("华", ss.slice(3u, 6u));
1250 assert_eq!("Việt Nam", ss.slice(6u, 16u));
1252 assert_eq!("ab", "abc".slice(0u, 2u));
1253 assert_eq!("bc", "abc".slice(1u, 3u));
1254 assert_eq!("", "abc".slice(1u, 1u));
1256 assert_eq!("中", ss.slice(0u, 3u));
1257 assert_eq!("华V", ss.slice(3u, 7u));
1258 assert_eq!("", ss.slice(3u, 3u));
1273 fn test_slice_fail() {
1274 "中华Việt Nam".slice(0u, 2u);
1278 fn test_slice_from() {
1279 assert_eq!("abcd".slice_from(0), "abcd");
1280 assert_eq!("abcd".slice_from(2), "cd");
1281 assert_eq!("abcd".slice_from(4), "");
1284 fn test_slice_to() {
1285 assert_eq!("abcd".slice_to(0), "");
1286 assert_eq!("abcd".slice_to(2), "ab");
1287 assert_eq!("abcd".slice_to(4), "abcd");
1291 fn test_trim_left_chars() {
1292 let v: &[char] = &[];
1293 assert_eq!(" *** foo *** ".trim_left_chars(v), " *** foo *** ");
1294 assert_eq!(" *** foo *** ".trim_left_chars(&['*', ' ']), "foo *** ");
1295 assert_eq!(" *** *** ".trim_left_chars(&['*', ' ']), "");
1296 assert_eq!("foo *** ".trim_left_chars(&['*', ' ']), "foo *** ");
1298 assert_eq!("11foo1bar11".trim_left_chars('1'), "foo1bar11");
1299 assert_eq!("12foo1bar12".trim_left_chars(&['1', '2']), "foo1bar12");
1300 assert_eq!("123foo1bar123".trim_left_chars(|c: char| c.is_digit()), "foo1bar123");
1304 fn test_trim_right_chars() {
1305 let v: &[char] = &[];
1306 assert_eq!(" *** foo *** ".trim_right_chars(v), " *** foo *** ");
1307 assert_eq!(" *** foo *** ".trim_right_chars(&['*', ' ']), " *** foo");
1308 assert_eq!(" *** *** ".trim_right_chars(&['*', ' ']), "");
1309 assert_eq!(" *** foo".trim_right_chars(&['*', ' ']), " *** foo");
1311 assert_eq!("11foo1bar11".trim_right_chars('1'), "11foo1bar");
1312 assert_eq!("12foo1bar12".trim_right_chars(&['1', '2']), "12foo1bar");
1313 assert_eq!("123foo1bar123".trim_right_chars(|c: char| c.is_digit()), "123foo1bar");
1317 fn test_trim_chars() {
1318 let v: &[char] = &[];
1319 assert_eq!(" *** foo *** ".trim_chars(v), " *** foo *** ");
1320 assert_eq!(" *** foo *** ".trim_chars(&['*', ' ']), "foo");
1321 assert_eq!(" *** *** ".trim_chars(&['*', ' ']), "");
1322 assert_eq!("foo".trim_chars(&['*', ' ']), "foo");
1324 assert_eq!("11foo1bar11".trim_chars('1'), "foo1bar");
1325 assert_eq!("12foo1bar12".trim_chars(&['1', '2']), "foo1bar");
1326 assert_eq!("123foo1bar123".trim_chars(|c: char| c.is_digit()), "foo1bar");
1330 fn test_trim_left() {
1331 assert_eq!("".trim_left(), "");
1332 assert_eq!("a".trim_left(), "a");
1333 assert_eq!(" ".trim_left(), "");
1334 assert_eq!(" blah".trim_left(), "blah");
1335 assert_eq!(" \u3000 wut".trim_left(), "wut");
1336 assert_eq!("hey ".trim_left(), "hey ");
1340 fn test_trim_right() {
1341 assert_eq!("".trim_right(), "");
1342 assert_eq!("a".trim_right(), "a");
1343 assert_eq!(" ".trim_right(), "");
1344 assert_eq!("blah ".trim_right(), "blah");
1345 assert_eq!("wut \u3000 ".trim_right(), "wut");
1346 assert_eq!(" hey".trim_right(), " hey");
1351 assert_eq!("".trim(), "");
1352 assert_eq!("a".trim(), "a");
1353 assert_eq!(" ".trim(), "");
1354 assert_eq!(" blah ".trim(), "blah");
1355 assert_eq!("\nwut \u3000 ".trim(), "wut");
1356 assert_eq!(" hey dude ".trim(), "hey dude");
1360 fn test_is_whitespace() {
1361 assert!("".is_whitespace());
1362 assert!(" ".is_whitespace());
1363 assert!("\u2009".is_whitespace()); // Thin space
1364 assert!(" \n\t ".is_whitespace());
1365 assert!(!" _ ".is_whitespace());
1369 fn test_slice_shift_char() {
1370 let data = "ประเทศไทย中";
1371 assert_eq!(data.slice_shift_char(), (Some('ป'), "ระเทศไทย中"));
1375 fn test_slice_shift_char_2() {
1377 assert_eq!(empty.slice_shift_char(), (None, ""));
1382 // deny overlong encodings
1383 assert!(!is_utf8([0xc0, 0x80]));
1384 assert!(!is_utf8([0xc0, 0xae]));
1385 assert!(!is_utf8([0xe0, 0x80, 0x80]));
1386 assert!(!is_utf8([0xe0, 0x80, 0xaf]));
1387 assert!(!is_utf8([0xe0, 0x81, 0x81]));
1388 assert!(!is_utf8([0xf0, 0x82, 0x82, 0xac]));
1389 assert!(!is_utf8([0xf4, 0x90, 0x80, 0x80]));
1392 assert!(!is_utf8([0xED, 0xA0, 0x80]));
1393 assert!(!is_utf8([0xED, 0xBF, 0xBF]));
1395 assert!(is_utf8([0xC2, 0x80]));
1396 assert!(is_utf8([0xDF, 0xBF]));
1397 assert!(is_utf8([0xE0, 0xA0, 0x80]));
1398 assert!(is_utf8([0xED, 0x9F, 0xBF]));
1399 assert!(is_utf8([0xEE, 0x80, 0x80]));
1400 assert!(is_utf8([0xEF, 0xBF, 0xBF]));
1401 assert!(is_utf8([0xF0, 0x90, 0x80, 0x80]));
1402 assert!(is_utf8([0xF4, 0x8F, 0xBF, 0xBF]));
1406 fn test_is_utf16() {
1407 macro_rules! pos ( ($($e:expr),*) => { { $(assert!(is_utf16($e));)* } });
1415 // surrogate pairs (randomly generated with Python 3's
1416 // .encode('utf-16be'))
1417 pos!([0xdb54, 0xdf16, 0xd880, 0xdee0, 0xdb6a, 0xdd45],
1418 [0xd91f, 0xdeb1, 0xdb31, 0xdd84, 0xd8e2, 0xde14],
1419 [0xdb9f, 0xdc26, 0xdb6f, 0xde58, 0xd850, 0xdfae]);
1421 // mixtures (also random)
1422 pos!([0xd921, 0xdcc2, 0x002d, 0x004d, 0xdb32, 0xdf65],
1423 [0xdb45, 0xdd2d, 0x006a, 0xdacd, 0xddfe, 0x0006],
1424 [0x0067, 0xd8ff, 0xddb7, 0x000f, 0xd900, 0xdc80]);
1427 macro_rules! neg ( ($($e:expr),*) => { { $(assert!(!is_utf16($e));)* } });
1430 // surrogate + regular unit
1432 // surrogate + lead surrogate
1434 // unterminated surrogate
1436 // trail surrogate without a lead
1439 // random byte sequences that Python 3's .decode('utf-16be')
1441 neg!([0x5b3d, 0x0141, 0xde9e, 0x8fdc, 0xc6e7],
1442 [0xdf5a, 0x82a5, 0x62b9, 0xb447, 0x92f3],
1443 [0xda4e, 0x42bc, 0x4462, 0xee98, 0xc2ca],
1444 [0xbe00, 0xb04a, 0x6ecb, 0xdd89, 0xe278],
1445 [0x0465, 0xab56, 0xdbb6, 0xa893, 0x665e],
1446 [0x6b7f, 0x0a19, 0x40f4, 0xa657, 0xdcc5],
1447 [0x9b50, 0xda5e, 0x24ec, 0x03ad, 0x6dee],
1448 [0x8d17, 0xcaa7, 0xf4ae, 0xdf6e, 0xbed7],
1449 [0xdaee, 0x2584, 0x7d30, 0xa626, 0x121a],
1450 [0xd956, 0x4b43, 0x7570, 0xccd6, 0x4f4a],
1451 [0x9dcf, 0x1b49, 0x4ba5, 0xfce9, 0xdffe],
1452 [0x6572, 0xce53, 0xb05a, 0xf6af, 0xdacf],
1453 [0x1b90, 0x728c, 0x9906, 0xdb68, 0xf46e],
1454 [0x1606, 0xbeca, 0xbe76, 0x860f, 0xdfa5],
1455 [0x8b4f, 0xde7a, 0xd220, 0x9fac, 0x2b6f],
1456 [0xb8fe, 0xebbe, 0xda32, 0x1a5f, 0x8b8b],
1457 [0x934b, 0x8956, 0xc434, 0x1881, 0xddf7],
1458 [0x5a95, 0x13fc, 0xf116, 0xd89b, 0x93f9],
1459 [0xd640, 0x71f1, 0xdd7d, 0x77eb, 0x1cd8],
1460 [0x348b, 0xaef0, 0xdb2c, 0xebf1, 0x1282],
1461 [0x50d7, 0xd824, 0x5010, 0xb369, 0x22ea]);
1465 fn test_raw_from_c_str() {
1467 let a = vec![65, 65, 65, 65, 65, 65, 65, 0];
1469 let c = raw::from_c_str(b);
1470 assert_eq!(c, "AAAAAAA".to_string());
1475 fn test_as_bytes() {
1478 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1479 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1482 assert_eq!("".as_bytes(), &[]);
1483 assert_eq!("abc".as_bytes(), &['a' as u8, 'b' as u8, 'c' as u8]);
1484 assert_eq!("ศไทย中华Việt Nam".as_bytes(), v.as_slice());
1489 fn test_as_bytes_fail() {
1490 // Don't double free. (I'm not sure if this exercises the
1491 // original problem code path anymore.)
1492 let s = "".to_string();
1493 let _bytes = s.as_bytes();
1499 let buf = "hello".as_ptr();
1501 assert_eq!(*buf.offset(0), 'h' as u8);
1502 assert_eq!(*buf.offset(1), 'e' as u8);
1503 assert_eq!(*buf.offset(2), 'l' as u8);
1504 assert_eq!(*buf.offset(3), 'l' as u8);
1505 assert_eq!(*buf.offset(4), 'o' as u8);
1510 fn test_subslice_offset() {
1511 let a = "kernelsprite";
1512 let b = a.slice(7, a.len());
1513 let c = a.slice(0, a.len() - 6);
1514 assert_eq!(a.subslice_offset(b), 7);
1515 assert_eq!(a.subslice_offset(c), 0);
1517 let string = "a\nb\nc";
1518 let lines: Vec<&str> = string.lines().collect();
1519 let lines = lines.as_slice();
1520 assert_eq!(string.subslice_offset(lines[0]), 0);
1521 assert_eq!(string.subslice_offset(lines[1]), 2);
1522 assert_eq!(string.subslice_offset(lines[2]), 4);
1527 fn test_subslice_offset_2() {
1528 let a = "alchemiter";
1529 let b = "cruxtruder";
1530 a.subslice_offset(b);
1534 fn vec_str_conversions() {
1535 let s1: String = "All mimsy were the borogoves".to_string();
1537 let v: Vec<u8> = Vec::from_slice(s1.as_bytes());
1538 let s2: String = from_utf8(v.as_slice()).unwrap().to_string();
1539 let mut i: uint = 0u;
1540 let n1: uint = s1.len();
1541 let n2: uint = v.len();
1544 let a: u8 = s1.as_slice()[i];
1545 let b: u8 = s2.as_slice()[i];
1554 fn test_contains() {
1555 assert!("abcde".contains("bcd"));
1556 assert!("abcde".contains("abcd"));
1557 assert!("abcde".contains("bcde"));
1558 assert!("abcde".contains(""));
1559 assert!("".contains(""));
1560 assert!(!"abcde".contains("def"));
1561 assert!(!"".contains("a"));
1563 let data = "ประเทศไทย中华Việt Nam";
1564 assert!(data.contains("ประเ"));
1565 assert!(data.contains("ะเ"));
1566 assert!(data.contains("中华"));
1567 assert!(!data.contains("ไท华"));
1571 fn test_contains_char() {
1572 assert!("abc".contains_char('b'));
1573 assert!("a".contains_char('a'));
1574 assert!(!"abc".contains_char('d'));
1575 assert!(!"".contains_char('a'));
1581 [("𐍅𐌿𐌻𐍆𐌹𐌻𐌰\n".to_string(),
1582 vec![0xd800_u16, 0xdf45_u16, 0xd800_u16, 0xdf3f_u16,
1583 0xd800_u16, 0xdf3b_u16, 0xd800_u16, 0xdf46_u16,
1584 0xd800_u16, 0xdf39_u16, 0xd800_u16, 0xdf3b_u16,
1585 0xd800_u16, 0xdf30_u16, 0x000a_u16]),
1587 ("𐐒𐑉𐐮𐑀𐐲𐑋 𐐏𐐲𐑍\n".to_string(),
1588 vec![0xd801_u16, 0xdc12_u16, 0xd801_u16,
1589 0xdc49_u16, 0xd801_u16, 0xdc2e_u16, 0xd801_u16,
1590 0xdc40_u16, 0xd801_u16, 0xdc32_u16, 0xd801_u16,
1591 0xdc4b_u16, 0x0020_u16, 0xd801_u16, 0xdc0f_u16,
1592 0xd801_u16, 0xdc32_u16, 0xd801_u16, 0xdc4d_u16,
1595 ("𐌀𐌖𐌋𐌄𐌑𐌉·𐌌𐌄𐌕𐌄𐌋𐌉𐌑\n".to_string(),
1596 vec![0xd800_u16, 0xdf00_u16, 0xd800_u16, 0xdf16_u16,
1597 0xd800_u16, 0xdf0b_u16, 0xd800_u16, 0xdf04_u16,
1598 0xd800_u16, 0xdf11_u16, 0xd800_u16, 0xdf09_u16,
1599 0x00b7_u16, 0xd800_u16, 0xdf0c_u16, 0xd800_u16,
1600 0xdf04_u16, 0xd800_u16, 0xdf15_u16, 0xd800_u16,
1601 0xdf04_u16, 0xd800_u16, 0xdf0b_u16, 0xd800_u16,
1602 0xdf09_u16, 0xd800_u16, 0xdf11_u16, 0x000a_u16 ]),
1604 ("𐒋𐒘𐒈𐒑𐒛𐒒 𐒕𐒓 𐒈𐒚𐒍 𐒏𐒜𐒒𐒖𐒆 𐒕𐒆\n".to_string(),
1605 vec![0xd801_u16, 0xdc8b_u16, 0xd801_u16, 0xdc98_u16,
1606 0xd801_u16, 0xdc88_u16, 0xd801_u16, 0xdc91_u16,
1607 0xd801_u16, 0xdc9b_u16, 0xd801_u16, 0xdc92_u16,
1608 0x0020_u16, 0xd801_u16, 0xdc95_u16, 0xd801_u16,
1609 0xdc93_u16, 0x0020_u16, 0xd801_u16, 0xdc88_u16,
1610 0xd801_u16, 0xdc9a_u16, 0xd801_u16, 0xdc8d_u16,
1611 0x0020_u16, 0xd801_u16, 0xdc8f_u16, 0xd801_u16,
1612 0xdc9c_u16, 0xd801_u16, 0xdc92_u16, 0xd801_u16,
1613 0xdc96_u16, 0xd801_u16, 0xdc86_u16, 0x0020_u16,
1614 0xd801_u16, 0xdc95_u16, 0xd801_u16, 0xdc86_u16,
1616 // Issue #12318, even-numbered non-BMP planes
1617 ("\U00020000".to_string(),
1618 vec![0xD840, 0xDC00])];
1620 for p in pairs.iter() {
1621 let (s, u) = (*p).clone();
1622 assert!(is_utf16(u.as_slice()));
1623 assert_eq!(s.to_utf16(), u);
1625 assert_eq!(from_utf16(u.as_slice()).unwrap(), s);
1626 assert_eq!(from_utf16_lossy(u.as_slice()), s);
1628 assert_eq!(from_utf16(s.to_utf16().as_slice()).unwrap(), s);
1629 assert_eq!(from_utf16(u.as_slice()).unwrap().to_utf16(), u);
1634 fn test_utf16_invalid() {
1635 // completely positive cases tested above.
1637 assert_eq!(from_utf16([0xD800]), None);
1639 assert_eq!(from_utf16([0xD800, 0xD800]), None);
1642 assert_eq!(from_utf16([0x0061, 0xDC00]), None);
1645 assert_eq!(from_utf16([0xD800, 0xd801, 0xdc8b, 0xD800]), None);
1649 fn test_utf16_lossy() {
1650 // completely positive cases tested above.
1652 assert_eq!(from_utf16_lossy([0xD800]), "\uFFFD".to_string());
1654 assert_eq!(from_utf16_lossy([0xD800, 0xD800]), "\uFFFD\uFFFD".to_string());
1657 assert_eq!(from_utf16_lossy([0x0061, 0xDC00]), "a\uFFFD".to_string());
1660 assert_eq!(from_utf16_lossy([0xD800, 0xd801, 0xdc8b, 0xD800]),
1661 "\uFFFD𐒋\uFFFD".to_string());
1665 fn test_truncate_utf16_at_nul() {
1667 assert_eq!(truncate_utf16_at_nul(v), &[]);
1670 assert_eq!(truncate_utf16_at_nul(v), &[]);
1673 assert_eq!(truncate_utf16_at_nul(v), &[1]);
1676 assert_eq!(truncate_utf16_at_nul(v), &[1, 2]);
1679 assert_eq!(truncate_utf16_at_nul(v), &[1, 2, 3]);
1684 let s = "ศไทย中华Việt Nam";
1685 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1687 for ch in v.iter() {
1688 assert!(s.char_at(pos) == *ch);
1689 pos += from_char(*ch).len();
1694 fn test_char_at_reverse() {
1695 let s = "ศไทย中华Việt Nam";
1696 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1697 let mut pos = s.len();
1698 for ch in v.iter().rev() {
1699 assert!(s.char_at_reverse(pos) == *ch);
1700 pos -= from_char(*ch).len();
1705 fn test_escape_unicode() {
1706 assert_eq!("abc".escape_unicode(), "\\x61\\x62\\x63".to_string());
1707 assert_eq!("a c".escape_unicode(), "\\x61\\x20\\x63".to_string());
1708 assert_eq!("\r\n\t".escape_unicode(), "\\x0d\\x0a\\x09".to_string());
1709 assert_eq!("'\"\\".escape_unicode(), "\\x27\\x22\\x5c".to_string());
1710 assert_eq!("\x00\x01\xfe\xff".escape_unicode(), "\\x00\\x01\\xfe\\xff".to_string());
1711 assert_eq!("\u0100\uffff".escape_unicode(), "\\u0100\\uffff".to_string());
1712 assert_eq!("\U00010000\U0010ffff".escape_unicode(), "\\U00010000\\U0010ffff".to_string());
1713 assert_eq!("ab\ufb00".escape_unicode(), "\\x61\\x62\\ufb00".to_string());
1714 assert_eq!("\U0001d4ea\r".escape_unicode(), "\\U0001d4ea\\x0d".to_string());
1718 fn test_escape_default() {
1719 assert_eq!("abc".escape_default(), "abc".to_string());
1720 assert_eq!("a c".escape_default(), "a c".to_string());
1721 assert_eq!("\r\n\t".escape_default(), "\\r\\n\\t".to_string());
1722 assert_eq!("'\"\\".escape_default(), "\\'\\\"\\\\".to_string());
1723 assert_eq!("\u0100\uffff".escape_default(), "\\u0100\\uffff".to_string());
1724 assert_eq!("\U00010000\U0010ffff".escape_default(), "\\U00010000\\U0010ffff".to_string());
1725 assert_eq!("ab\ufb00".escape_default(), "ab\\ufb00".to_string());
1726 assert_eq!("\U0001d4ea\r".escape_default(), "\\U0001d4ea\\r".to_string());
1730 fn test_total_ord() {
1731 "1234".cmp(&("123")) == Greater;
1732 "123".cmp(&("1234")) == Less;
1733 "1234".cmp(&("1234")) == Equal;
1734 "12345555".cmp(&("123456")) == Less;
1735 "22".cmp(&("1234")) == Greater;
1739 fn test_char_range_at() {
1740 let data = "b¢€𤭢𤭢€¢b";
1741 assert_eq!('b', data.char_range_at(0).ch);
1742 assert_eq!('¢', data.char_range_at(1).ch);
1743 assert_eq!('€', data.char_range_at(3).ch);
1744 assert_eq!('𤭢', data.char_range_at(6).ch);
1745 assert_eq!('𤭢', data.char_range_at(10).ch);
1746 assert_eq!('€', data.char_range_at(14).ch);
1747 assert_eq!('¢', data.char_range_at(17).ch);
1748 assert_eq!('b', data.char_range_at(19).ch);
1752 fn test_char_range_at_reverse_underflow() {
1753 assert_eq!("abc".char_range_at_reverse(0).next, 0);
1757 fn test_iterator() {
1758 let s = "ศไทย中华Việt Nam";
1759 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1762 let mut it = s.chars();
1765 assert_eq!(c, v[pos]);
1768 assert_eq!(pos, v.len());
1772 fn test_rev_iterator() {
1773 let s = "ศไทย中华Việt Nam";
1774 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
1777 let mut it = s.chars().rev();
1780 assert_eq!(c, v[pos]);
1783 assert_eq!(pos, v.len());
1787 fn test_iterator_clone() {
1788 let s = "ศไทย中华Việt Nam";
1789 let mut it = s.chars();
1791 assert!(it.zip(it.clone()).all(|(x,y)| x == y));
1795 fn test_bytesator() {
1796 let s = "ศไทย中华Việt Nam";
1798 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1799 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1804 for b in s.bytes() {
1805 assert_eq!(b, v[pos]);
1811 fn test_bytes_revator() {
1812 let s = "ศไทย中华Việt Nam";
1814 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1815 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1818 let mut pos = v.len();
1820 for b in s.bytes().rev() {
1822 assert_eq!(b, v[pos]);
1827 fn test_char_indicesator() {
1828 let s = "ศไทย中华Việt Nam";
1829 let p = [0, 3, 6, 9, 12, 15, 18, 19, 20, 23, 24, 25, 26, 27];
1830 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1833 let mut it = s.char_indices();
1836 assert_eq!(c, (p[pos], v[pos]));
1839 assert_eq!(pos, v.len());
1840 assert_eq!(pos, p.len());
1844 fn test_char_indices_revator() {
1845 let s = "ศไทย中华Việt Nam";
1846 let p = [27, 26, 25, 24, 23, 20, 19, 18, 15, 12, 9, 6, 3, 0];
1847 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
1850 let mut it = s.char_indices().rev();
1853 assert_eq!(c, (p[pos], v[pos]));
1856 assert_eq!(pos, v.len());
1857 assert_eq!(pos, p.len());
1861 fn test_split_char_iterator() {
1862 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1864 let split: Vec<&str> = data.split(' ').collect();
1865 assert_eq!( split, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1867 let mut rsplit: Vec<&str> = data.split(' ').rev().collect();
1869 assert_eq!(rsplit, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1871 let split: Vec<&str> = data.split(|c: char| c == ' ').collect();
1872 assert_eq!( split, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1874 let mut rsplit: Vec<&str> = data.split(|c: char| c == ' ').rev().collect();
1876 assert_eq!(rsplit, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1879 let split: Vec<&str> = data.split('ä').collect();
1880 assert_eq!( split, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1882 let mut rsplit: Vec<&str> = data.split('ä').rev().collect();
1884 assert_eq!(rsplit, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1886 let split: Vec<&str> = data.split(|c: char| c == 'ä').collect();
1887 assert_eq!( split, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1889 let mut rsplit: Vec<&str> = data.split(|c: char| c == 'ä').rev().collect();
1891 assert_eq!(rsplit, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1895 fn test_splitn_char_iterator() {
1896 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1898 let split: Vec<&str> = data.splitn(' ', 3).collect();
1899 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
1901 let split: Vec<&str> = data.splitn(|c: char| c == ' ', 3).collect();
1902 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
1905 let split: Vec<&str> = data.splitn('ä', 3).collect();
1906 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
1908 let split: Vec<&str> = data.splitn(|c: char| c == 'ä', 3).collect();
1909 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
1913 fn test_rsplitn_char_iterator() {
1914 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1916 let mut split: Vec<&str> = data.rsplitn(' ', 3).collect();
1918 assert_eq!(split, vec!["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]);
1920 let mut split: Vec<&str> = data.rsplitn(|c: char| c == ' ', 3).collect();
1922 assert_eq!(split, vec!["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]);
1925 let mut split: Vec<&str> = data.rsplitn('ä', 3).collect();
1927 assert_eq!(split, vec!["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]);
1929 let mut split: Vec<&str> = data.rsplitn(|c: char| c == 'ä', 3).collect();
1931 assert_eq!(split, vec!["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]);
1935 fn test_split_char_iterator_no_trailing() {
1936 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1938 let split: Vec<&str> = data.split('\n').collect();
1939 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
1941 let split: Vec<&str> = data.split_terminator('\n').collect();
1942 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
1946 fn test_rev_split_char_iterator_no_trailing() {
1947 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1949 let mut split: Vec<&str> = data.split('\n').rev().collect();
1951 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
1953 let mut split: Vec<&str> = data.split_terminator('\n').rev().collect();
1955 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
1960 let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n";
1961 let words: Vec<&str> = data.words().collect();
1962 assert_eq!(words, vec!["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
1966 fn test_nfd_chars() {
1967 assert_eq!("abc".nfd_chars().collect::<String>(), "abc".to_string());
1968 assert_eq!("\u1e0b\u01c4".nfd_chars().collect::<String>(), "d\u0307\u01c4".to_string());
1969 assert_eq!("\u2026".nfd_chars().collect::<String>(), "\u2026".to_string());
1970 assert_eq!("\u2126".nfd_chars().collect::<String>(), "\u03a9".to_string());
1971 assert_eq!("\u1e0b\u0323".nfd_chars().collect::<String>(), "d\u0323\u0307".to_string());
1972 assert_eq!("\u1e0d\u0307".nfd_chars().collect::<String>(), "d\u0323\u0307".to_string());
1973 assert_eq!("a\u0301".nfd_chars().collect::<String>(), "a\u0301".to_string());
1974 assert_eq!("\u0301a".nfd_chars().collect::<String>(), "\u0301a".to_string());
1975 assert_eq!("\ud4db".nfd_chars().collect::<String>(), "\u1111\u1171\u11b6".to_string());
1976 assert_eq!("\uac1c".nfd_chars().collect::<String>(), "\u1100\u1162".to_string());
1980 fn test_nfkd_chars() {
1981 assert_eq!("abc".nfkd_chars().collect::<String>(), "abc".to_string());
1982 assert_eq!("\u1e0b\u01c4".nfkd_chars().collect::<String>(), "d\u0307DZ\u030c".to_string());
1983 assert_eq!("\u2026".nfkd_chars().collect::<String>(), "...".to_string());
1984 assert_eq!("\u2126".nfkd_chars().collect::<String>(), "\u03a9".to_string());
1985 assert_eq!("\u1e0b\u0323".nfkd_chars().collect::<String>(), "d\u0323\u0307".to_string());
1986 assert_eq!("\u1e0d\u0307".nfkd_chars().collect::<String>(), "d\u0323\u0307".to_string());
1987 assert_eq!("a\u0301".nfkd_chars().collect::<String>(), "a\u0301".to_string());
1988 assert_eq!("\u0301a".nfkd_chars().collect::<String>(), "\u0301a".to_string());
1989 assert_eq!("\ud4db".nfkd_chars().collect::<String>(), "\u1111\u1171\u11b6".to_string());
1990 assert_eq!("\uac1c".nfkd_chars().collect::<String>(), "\u1100\u1162".to_string());
1995 let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
1996 let lines: Vec<&str> = data.lines().collect();
1997 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
1999 let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
2000 let lines: Vec<&str> = data.lines().collect();
2001 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2005 fn test_split_strator() {
2006 fn t(s: &str, sep: &str, u: &[&str]) {
2007 let v: Vec<&str> = s.split_str(sep).collect();
2008 assert_eq!(v.as_slice(), u.as_slice());
2010 t("--1233345--", "12345", ["--1233345--"]);
2011 t("abc::hello::there", "::", ["abc", "hello", "there"]);
2012 t("::hello::there", "::", ["", "hello", "there"]);
2013 t("hello::there::", "::", ["hello", "there", ""]);
2014 t("::hello::there::", "::", ["", "hello", "there", ""]);
2015 t("ประเทศไทย中华Việt Nam", "中华", ["ประเทศไทย", "Việt Nam"]);
2016 t("zzXXXzzYYYzz", "zz", ["", "XXX", "YYY", ""]);
2017 t("zzXXXzYYYz", "XXX", ["zz", "zYYYz"]);
2018 t(".XXX.YYY.", ".", ["", "XXX", "YYY", ""]);
2020 t("zz", "zz", ["",""]);
2021 t("ok", "z", ["ok"]);
2022 t("zzz", "zz", ["","z"]);
2023 t("zzzzz", "zz", ["","","z"]);
2027 fn test_str_default() {
2028 use std::default::Default;
2029 fn t<S: Default + Str>() {
2030 let s: S = Default::default();
2031 assert_eq!(s.as_slice(), "");
2039 fn test_str_container() {
2040 fn sum_len<S: Collection>(v: &[S]) -> uint {
2041 v.iter().map(|x| x.len()).sum()
2044 let s = "01234".to_string();
2045 assert_eq!(5, sum_len(["012", "", "34"]));
2046 assert_eq!(5, sum_len(["01".to_string(), "2".to_string(),
2047 "34".to_string(), "".to_string()]));
2048 assert_eq!(5, sum_len([s.as_slice()]));
2052 fn test_str_from_utf8() {
2054 assert_eq!(from_utf8(xs), Some("hello"));
2056 let xs = "ศไทย中华Việt Nam".as_bytes();
2057 assert_eq!(from_utf8(xs), Some("ศไทย中华Việt Nam"));
2059 let xs = b"hello\xFF";
2060 assert_eq!(from_utf8(xs), None);
2064 fn test_str_from_utf8_owned() {
2065 let xs = Vec::from_slice(b"hello");
2066 assert_eq!(from_utf8_owned(xs), Ok("hello".to_string()));
2068 let xs = Vec::from_slice("ศไทย中华Việt Nam".as_bytes());
2069 assert_eq!(from_utf8_owned(xs), Ok("ศไทย中华Việt Nam".to_string()));
2071 let xs = Vec::from_slice(b"hello\xFF");
2072 assert_eq!(from_utf8_owned(xs),
2073 Err(Vec::from_slice(b"hello\xFF")));
2077 fn test_str_from_utf8_lossy() {
2079 assert_eq!(from_utf8_lossy(xs), Slice("hello"));
2081 let xs = "ศไทย中华Việt Nam".as_bytes();
2082 assert_eq!(from_utf8_lossy(xs), Slice("ศไทย中华Việt Nam"));
2084 let xs = b"Hello\xC2 There\xFF Goodbye";
2085 assert_eq!(from_utf8_lossy(xs), Owned("Hello\uFFFD There\uFFFD Goodbye".to_string()));
2087 let xs = b"Hello\xC0\x80 There\xE6\x83 Goodbye";
2088 assert_eq!(from_utf8_lossy(xs), Owned("Hello\uFFFD\uFFFD There\uFFFD Goodbye".to_string()));
2090 let xs = b"\xF5foo\xF5\x80bar";
2091 assert_eq!(from_utf8_lossy(xs), Owned("\uFFFDfoo\uFFFD\uFFFDbar".to_string()));
2093 let xs = b"\xF1foo\xF1\x80bar\xF1\x80\x80baz";
2094 assert_eq!(from_utf8_lossy(xs), Owned("\uFFFDfoo\uFFFDbar\uFFFDbaz".to_string()));
2096 let xs = b"\xF4foo\xF4\x80bar\xF4\xBFbaz";
2097 assert_eq!(from_utf8_lossy(xs), Owned("\uFFFDfoo\uFFFDbar\uFFFD\uFFFDbaz".to_string()));
2099 let xs = b"\xF0\x80\x80\x80foo\xF0\x90\x80\x80bar";
2100 assert_eq!(from_utf8_lossy(xs), Owned("\uFFFD\uFFFD\uFFFD\uFFFD\
2101 foo\U00010000bar".to_string()));
2104 let xs = b"\xED\xA0\x80foo\xED\xBF\xBFbar";
2105 assert_eq!(from_utf8_lossy(xs), Owned("\uFFFD\uFFFD\uFFFDfoo\
2106 \uFFFD\uFFFD\uFFFDbar".to_string()));
2110 fn test_from_str() {
2111 let owned: Option<::std::string::String> = from_str("string");
2112 assert_eq!(owned.as_ref().map(|s| s.as_slice()), Some("string"));
2116 fn test_maybe_owned_traits() {
2117 let s = Slice("abcde");
2118 assert_eq!(s.len(), 5);
2119 assert_eq!(s.as_slice(), "abcde");
2120 assert_eq!(s.to_str().as_slice(), "abcde");
2121 assert_eq!(format!("{}", s).as_slice(), "abcde");
2122 assert!(s.lt(&Owned("bcdef".to_string())));
2123 assert_eq!(Slice(""), Default::default());
2125 let o = Owned("abcde".to_string());
2126 assert_eq!(o.len(), 5);
2127 assert_eq!(o.as_slice(), "abcde");
2128 assert_eq!(o.to_str().as_slice(), "abcde");
2129 assert_eq!(format!("{}", o).as_slice(), "abcde");
2130 assert!(o.lt(&Slice("bcdef")));
2131 assert_eq!(Owned("".to_string()), Default::default());
2133 assert!(s.cmp(&o) == Equal);
2134 assert!(s.equiv(&o));
2136 assert!(o.cmp(&s) == Equal);
2137 assert!(o.equiv(&s));
2141 fn test_maybe_owned_methods() {
2142 let s = Slice("abcde");
2143 assert!(s.is_slice());
2144 assert!(!s.is_owned());
2146 let o = Owned("abcde".to_string());
2147 assert!(!o.is_slice());
2148 assert!(o.is_owned());
2152 fn test_maybe_owned_clone() {
2153 assert_eq!(Owned("abcde".to_string()), Slice("abcde").clone());
2154 assert_eq!(Owned("abcde".to_string()), Owned("abcde".to_string()).clone());
2155 assert_eq!(Slice("abcde"), Slice("abcde").clone());
2156 assert_eq!(Slice("abcde"), Owned("abcde".to_string()).clone());
2160 fn test_maybe_owned_into_string() {
2161 assert_eq!(Slice("abcde").into_string(), "abcde".to_string());
2162 assert_eq!(Owned("abcde".to_string()).into_string(), "abcde".to_string());
2166 fn test_into_maybe_owned() {
2167 assert_eq!("abcde".into_maybe_owned(), Slice("abcde"));
2168 assert_eq!(("abcde".to_string()).into_maybe_owned(), Slice("abcde"));
2169 assert_eq!("abcde".into_maybe_owned(), Owned("abcde".to_string()));
2170 assert_eq!(("abcde".to_string()).into_maybe_owned(), Owned("abcde".to_string()));
2178 use std::prelude::*;
2181 fn char_iterator(b: &mut Bencher) {
2182 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2183 let len = s.char_len();
2185 b.iter(|| assert_eq!(s.chars().count(), len));
2189 fn char_iterator_ascii(b: &mut Bencher) {
2190 let s = "Mary had a little lamb, Little lamb
2191 Mary had a little lamb, Little lamb
2192 Mary had a little lamb, Little lamb
2193 Mary had a little lamb, Little lamb
2194 Mary had a little lamb, Little lamb
2195 Mary had a little lamb, Little lamb";
2196 let len = s.char_len();
2198 b.iter(|| assert_eq!(s.chars().count(), len));
2202 fn char_iterator_rev(b: &mut Bencher) {
2203 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2204 let len = s.char_len();
2206 b.iter(|| assert_eq!(s.chars().rev().count(), len));
2210 fn char_indicesator(b: &mut Bencher) {
2211 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2212 let len = s.char_len();
2214 b.iter(|| assert_eq!(s.char_indices().count(), len));
2218 fn char_indicesator_rev(b: &mut Bencher) {
2219 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2220 let len = s.char_len();
2222 b.iter(|| assert_eq!(s.char_indices().rev().count(), len));
2226 fn split_unicode_ascii(b: &mut Bencher) {
2227 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2229 b.iter(|| assert_eq!(s.split('V').count(), 3));
2233 fn split_unicode_not_ascii(b: &mut Bencher) {
2234 struct NotAscii(char);
2235 impl CharEq for NotAscii {
2236 fn matches(&mut self, c: char) -> bool {
2237 let NotAscii(cc) = *self;
2240 fn only_ascii(&self) -> bool { false }
2242 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2244 b.iter(|| assert_eq!(s.split(NotAscii('V')).count(), 3));
2249 fn split_ascii(b: &mut Bencher) {
2250 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2251 let len = s.split(' ').count();
2253 b.iter(|| assert_eq!(s.split(' ').count(), len));
2257 fn split_not_ascii(b: &mut Bencher) {
2258 struct NotAscii(char);
2259 impl CharEq for NotAscii {
2261 fn matches(&mut self, c: char) -> bool {
2262 let NotAscii(cc) = *self;
2265 fn only_ascii(&self) -> bool { false }
2267 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2268 let len = s.split(' ').count();
2270 b.iter(|| assert_eq!(s.split(NotAscii(' ')).count(), len));
2274 fn split_extern_fn(b: &mut Bencher) {
2275 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2276 let len = s.split(' ').count();
2277 fn pred(c: char) -> bool { c == ' ' }
2279 b.iter(|| assert_eq!(s.split(pred).count(), len));
2283 fn split_closure(b: &mut Bencher) {
2284 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2285 let len = s.split(' ').count();
2287 b.iter(|| assert_eq!(s.split(|c: char| c == ' ').count(), len));
2291 fn split_slice(b: &mut Bencher) {
2292 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2293 let len = s.split(' ').count();
2295 b.iter(|| assert_eq!(s.split(&[' ']).count(), len));
2299 fn is_utf8_100_ascii(b: &mut Bencher) {
2301 let s = b"Hello there, the quick brown fox jumped over the lazy dog! \
2302 Lorem ipsum dolor sit amet, consectetur. ";
2304 assert_eq!(100, s.len());
2311 fn is_utf8_100_multibyte(b: &mut Bencher) {
2312 let s = "𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰".as_bytes();
2313 assert_eq!(100, s.len());
2320 fn from_utf8_lossy_100_ascii(b: &mut Bencher) {
2321 let s = b"Hello there, the quick brown fox jumped over the lazy dog! \
2322 Lorem ipsum dolor sit amet, consectetur. ";
2324 assert_eq!(100, s.len());
2326 let _ = from_utf8_lossy(s);
2331 fn from_utf8_lossy_100_multibyte(b: &mut Bencher) {
2332 let s = "𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰".as_bytes();
2333 assert_eq!(100, s.len());
2335 let _ = from_utf8_lossy(s);
2340 fn from_utf8_lossy_invalid(b: &mut Bencher) {
2341 let s = b"Hello\xC0\x80 There\xE6\x83 Goodbye";
2343 let _ = from_utf8_lossy(s);
2348 fn from_utf8_lossy_100_invalid(b: &mut Bencher) {
2349 let s = Vec::from_elem(100, 0xF5u8);
2351 let _ = from_utf8_lossy(s.as_slice());
2356 fn bench_connect(b: &mut Bencher) {
2357 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2359 let v = [s, s, s, s, s, s, s, s, s, s];
2361 assert_eq!(v.connect(sep).len(), s.len() * 10 + sep.len() * 9);
2366 fn bench_contains_short_short(b: &mut Bencher) {
2367 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2371 assert!(haystack.contains(needle));
2376 fn bench_contains_short_long(b: &mut Bencher) {
2378 Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \
2379 ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \
2380 eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \
2381 sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \
2382 tempus vel, gravida nec quam.
2384 In est dui, tincidunt sed tempus interdum, adipiscing laoreet ante. Etiam tempor, tellus quis \
2385 sagittis interdum, nulla purus mattis sem, quis auctor erat odio ac tellus. In nec nunc sit amet \
2386 diam volutpat molestie at sed ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan \
2387 lorem ac dignissim placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat. In vel \
2388 eleifend felis. Sed suscipit nulla lorem, sed mollis est sollicitudin et. Nam fermentum egestas \
2389 interdum. Curabitur ut nisi justo.
2391 Sed sollicitudin ipsum tellus, ut condimentum leo eleifend nec. Cras ut velit ante. Phasellus nec \
2392 mollis odio. Mauris molestie erat in arcu mattis, at aliquet dolor vehicula. Quisque malesuada \
2393 lectus sit amet nisi pretium, a condimentum ipsum porta. Morbi at dapibus diam. Praesent egestas \
2394 est sed risus elementum, eu rutrum metus ultrices. Etiam fermentum consectetur magna, id rutrum \
2395 felis accumsan a. Aliquam ut pellentesque libero. Sed mi nulla, lobortis eu tortor id, suscipit \
2396 ultricies neque. Morbi iaculis sit amet risus at iaculis. Praesent eget ligula quis turpis \
2397 feugiat suscipit vel non arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. \
2398 Aliquam sit amet placerat lorem.
2400 Cras a lacus vel ante posuere elementum. Nunc est leo, bibendum ut facilisis vel, bibendum at \
2401 mauris. Nullam adipiscing diam vel odio ornare, luctus adipiscing mi luctus. Nulla facilisi. \
2402 Mauris adipiscing bibendum neque, quis adipiscing lectus tempus et. Sed feugiat erat et nisl \
2403 lobortis pharetra. Donec vitae erat enim. Nullam sit amet felis et quam lacinia tincidunt. Aliquam \
2404 suscipit dapibus urna. Sed volutpat urna in magna pulvinar volutpat. Phasellus nec tellus ac diam \
2407 Nam lectus enim, dapibus non nisi tempor, consectetur convallis massa. Maecenas eleifend dictum \
2408 feugiat. Etiam quis mauris vel risus luctus mattis a a nunc. Nullam orci quam, imperdiet id \
2409 vehicula in, porttitor ut nibh. Duis sagittis adipiscing nisl vitae congue. Donec mollis risus eu \
2410 leo suscipit, varius porttitor nulla porta. Pellentesque ut sem nec nisi euismod vehicula. Nulla \
2411 malesuada sollicitudin quam eu fermentum.";
2412 let needle = "english";
2415 assert!(!haystack.contains(needle));
2420 fn bench_contains_bad_naive(b: &mut Bencher) {
2421 let haystack = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
2422 let needle = "aaaaaaaab";
2425 assert!(!haystack.contains(needle));
2430 fn bench_contains_equal(b: &mut Bencher) {
2431 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2432 let needle = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2435 assert!(haystack.contains(needle));