1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
13 Unicode string manipulation (`str` type)
17 Rust's string type is one of the core primitive types of the language. While
18 represented by the name `str`, the name `str` is not actually a valid type in
19 Rust. Each string must also be decorated with a pointer. `String` is used
20 for an owned string, so there is only one commonly-used `str` type in Rust:
23 `&str` is the borrowed string type. This type of string can only be created
24 from other strings, unless it is a static string (see below). As the word
25 "borrowed" implies, this type of string is owned elsewhere, and this string
26 cannot be moved out of.
28 As an example, here's some code that uses a string.
32 let borrowed_string = "This string is borrowed with the 'static lifetime";
36 From the example above, you can see that Rust's string literals have the
37 `'static` lifetime. This is akin to C's concept of a static string.
39 String literals are allocated statically in the rodata of the
40 executable/library. The string then has the type `&'static str` meaning that
41 the string is valid for the `'static` lifetime, otherwise known as the
42 lifetime of the entire program. As can be inferred from the type, these static
43 strings are not mutable.
47 Many languages have immutable strings by default, and Rust has a particular
48 flavor on this idea. As with the rest of Rust types, strings are immutable by
49 default. If a string is declared as `mut`, however, it may be mutated. This
50 works the same way as the rest of Rust's type system in the sense that if
51 there's a mutable reference to a string, there may only be one mutable reference
52 to that string. With these guarantees, strings can easily transition between
53 being mutable/immutable with the same benefits of having mutable strings in
58 Rust's string type, `str`, is a sequence of unicode codepoints encoded as a
59 stream of UTF-8 bytes. All safely-created strings are guaranteed to be validly
60 encoded UTF-8 sequences. Additionally, strings are not null-terminated
61 and can contain null codepoints.
63 The actual representation of strings have direct mappings to vectors: `&str`
64 is the same as `&[u8]`.
71 use cmp::{Eq, TotalEq, Ord, TotalOrd, Equiv, Ordering};
72 use container::Container;
76 use iter::{Iterator, range, AdditiveIterator};
79 use option::{None, Option, Some};
82 use slice::{ImmutableVector, MutableVector, CloneableVector};
86 pub use core::str::{from_utf8, CharEq, Chars, CharOffsets};
87 pub use core::str::{Bytes, CharSplits};
88 pub use core::str::{CharSplitsN, Words, AnyLines, MatchIndices, StrSplits};
89 pub use core::str::{eq_slice, is_utf8, is_utf16, UTF16Items};
90 pub use core::str::{UTF16Item, ScalarValue, LoneSurrogate, utf16_items};
91 pub use core::str::{truncate_utf16_at_nul, utf8_char_width, CharRange};
92 pub use core::str::{Str, StrSlice};
95 Section: Creating a string
98 /// Consumes a vector of bytes to create a new utf-8 string.
100 /// Returns `Err` with the original vector if the vector contains invalid
102 pub fn from_utf8_owned(vv: Vec<u8>) -> Result<String, Vec<u8>> {
103 String::from_utf8(vv)
106 /// Convert a byte to a UTF-8 string
110 /// Fails if invalid UTF-8
111 pub fn from_byte(b: u8) -> String {
113 String::from_char(1, b as char)
116 /// Convert a char to a string
117 pub fn from_char(ch: char) -> String {
118 let mut buf = String::new();
123 /// Convert a vector of chars to a string
124 pub fn from_chars(chs: &[char]) -> String {
125 chs.iter().map(|c| *c).collect()
128 /// Methods for vectors of strings
129 pub trait StrVector {
130 /// Concatenate a vector of strings.
131 fn concat(&self) -> String;
133 /// Concatenate a vector of strings, placing a given separator between each.
134 fn connect(&self, sep: &str) -> String;
137 impl<'a, S: Str> StrVector for &'a [S] {
138 fn concat(&self) -> String {
140 return String::new();
143 // `len` calculation may overflow but push_str but will check boundaries
144 let len = self.iter().map(|s| s.as_slice().len()).sum();
146 let mut result = String::with_capacity(len);
148 for s in self.iter() {
149 result.push_str(s.as_slice())
155 fn connect(&self, sep: &str) -> String {
157 return String::new();
162 return self.concat();
165 // this is wrong without the guarantee that `self` is non-empty
166 // `len` calculation may overflow but push_str but will check boundaries
167 let len = sep.len() * (self.len() - 1)
168 + self.iter().map(|s| s.as_slice().len()).sum();
169 let mut result = String::with_capacity(len);
170 let mut first = true;
172 for s in self.iter() {
176 result.push_str(sep);
178 result.push_str(s.as_slice());
184 impl<'a, S: Str> StrVector for Vec<S> {
186 fn concat(&self) -> String {
187 self.as_slice().concat()
191 fn connect(&self, sep: &str) -> String {
192 self.as_slice().connect(sep)
200 // Helper functions used for Unicode normalization
201 fn canonical_sort(comb: &mut [(char, u8)]) {
205 let len = comb.len();
206 for i in range(0, len) {
207 let mut swapped = false;
208 for j in range(1, len-i) {
209 let class_a = *comb[j-1].ref1();
210 let class_b = *comb[j].ref1();
211 if class_a != 0 && class_b != 0 && class_a > class_b {
216 if !swapped { break; }
221 enum DecompositionType {
226 /// External iterator for a string's decomposition's characters.
227 /// Use with the `std::iter` module.
229 pub struct Decompositions<'a> {
230 kind: DecompositionType,
232 buffer: Vec<(char, u8)>,
236 impl<'a> Iterator<char> for Decompositions<'a> {
238 fn next(&mut self) -> Option<char> {
239 use unicode::normalization::canonical_combining_class;
241 match self.buffer.as_slice().head() {
247 Some(&(c, _)) if self.sorted => {
251 _ => self.sorted = false
254 let decomposer = match self.kind {
255 Canonical => char::decompose_canonical,
256 Compatible => char::decompose_compatible
260 for ch in self.iter {
261 let buffer = &mut self.buffer;
262 let sorted = &mut self.sorted;
264 let class = canonical_combining_class(d);
265 if class == 0 && !*sorted {
266 canonical_sort(buffer.as_mut_slice());
269 buffer.push((d, class));
276 canonical_sort(self.buffer.as_mut_slice());
280 match self.buffer.shift() {
285 Some((c, _)) => Some(c),
290 fn size_hint(&self) -> (uint, Option<uint>) {
291 let (lower, _) = self.iter.size_hint();
296 /// Replace all occurrences of one string with another
300 /// * s - The string containing substrings to replace
301 /// * from - The string to replace
302 /// * to - The replacement string
306 /// The original string with all occurrences of `from` replaced with `to`
307 pub fn replace(s: &str, from: &str, to: &str) -> String {
308 let mut result = String::new();
309 let mut last_end = 0;
310 for (start, end) in s.match_indices(from) {
311 result.push_str(unsafe{raw::slice_bytes(s, last_end, start)});
315 result.push_str(unsafe{raw::slice_bytes(s, last_end, s.len())});
323 /// Decode a UTF-16 encoded vector `v` into a string, returning `None`
324 /// if `v` contains any invalid data.
332 /// let mut v = [0xD834, 0xDD1E, 0x006d, 0x0075,
333 /// 0x0073, 0x0069, 0x0063];
334 /// assert_eq!(str::from_utf16(v), Some("𝄞music".to_string()));
336 /// // 𝄞mu<invalid>ic
338 /// assert_eq!(str::from_utf16(v), None);
340 pub fn from_utf16(v: &[u16]) -> Option<String> {
341 let mut s = String::with_capacity(v.len() / 2);
342 for c in utf16_items(v) {
344 ScalarValue(c) => s.push_char(c),
345 LoneSurrogate(_) => return None
351 /// Decode a UTF-16 encoded vector `v` into a string, replacing
352 /// invalid data with the replacement character (U+FFFD).
358 /// // 𝄞mus<invalid>ic<invalid>
359 /// let v = [0xD834, 0xDD1E, 0x006d, 0x0075,
360 /// 0x0073, 0xDD1E, 0x0069, 0x0063,
363 /// assert_eq!(str::from_utf16_lossy(v),
364 /// "𝄞mus\uFFFDic\uFFFD".to_string());
366 pub fn from_utf16_lossy(v: &[u16]) -> String {
367 utf16_items(v).map(|c| c.to_char_lossy()).collect()
370 // Return the initial codepoint accumulator for the first byte.
371 // The first byte is special, only want bottom 5 bits for width 2, 4 bits
372 // for width 3, and 3 bits for width 4
373 macro_rules! utf8_first_byte(
374 ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as u32)
377 // return the value of $ch updated with continuation byte $byte
378 macro_rules! utf8_acc_cont_byte(
379 ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as u32)
382 static TAG_CONT_U8: u8 = 128u8;
384 /// Converts a vector of bytes to a new utf-8 string.
385 /// Any invalid utf-8 sequences are replaced with U+FFFD REPLACEMENT CHARACTER.
390 /// let input = bytes!("Hello ", 0xF0, 0x90, 0x80, "World");
391 /// let output = std::str::from_utf8_lossy(input);
392 /// assert_eq!(output.as_slice(), "Hello \uFFFDWorld");
394 pub fn from_utf8_lossy<'a>(v: &'a [u8]) -> MaybeOwned<'a> {
396 return Slice(unsafe { mem::transmute(v) })
399 static REPLACEMENT: &'static [u8] = bytes!(0xEF, 0xBF, 0xBD); // U+FFFD in UTF-8
402 fn unsafe_get(xs: &[u8], i: uint) -> u8 {
403 unsafe { *xs.unsafe_ref(i) }
405 fn safe_get(xs: &[u8], i: uint, total: uint) -> u8 {
413 let mut res = String::with_capacity(total);
417 res.push_bytes(v.slice_to(i))
421 // subseqidx is the index of the first byte of the subsequence we're looking at.
422 // It's used to copy a bunch of contiguous good codepoints at once instead of copying
424 let mut subseqidx = 0;
428 let byte = unsafe_get(v, i);
431 macro_rules! error(() => ({
434 res.push_bytes(v.slice(subseqidx, i_));
437 res.push_bytes(REPLACEMENT);
442 // subseqidx handles this
444 let w = utf8_char_width(byte);
448 if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
455 match (byte, safe_get(v, i, total)) {
456 (0xE0 , 0xA0 .. 0xBF) => (),
457 (0xE1 .. 0xEC, 0x80 .. 0xBF) => (),
458 (0xED , 0x80 .. 0x9F) => (),
459 (0xEE .. 0xEF, 0x80 .. 0xBF) => (),
466 if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
473 match (byte, safe_get(v, i, total)) {
474 (0xF0 , 0x90 .. 0xBF) => (),
475 (0xF1 .. 0xF3, 0x80 .. 0xBF) => (),
476 (0xF4 , 0x80 .. 0x8F) => (),
483 if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
488 if safe_get(v, i, total) & 192u8 != TAG_CONT_U8 {
501 if subseqidx < total {
503 res.push_bytes(v.slice(subseqidx, total))
506 Owned(res.into_owned())
513 /// A `MaybeOwned` is a string that can hold either a `String` or a `&str`.
514 /// This can be useful as an optimization when an allocation is sometimes
515 /// needed but not always.
516 pub enum MaybeOwned<'a> {
517 /// A borrowed string
523 /// `SendStr` is a specialization of `MaybeOwned` to be sendable
524 pub type SendStr = MaybeOwned<'static>;
526 impl<'a> MaybeOwned<'a> {
527 /// Returns `true` if this `MaybeOwned` wraps an owned string
529 pub fn is_owned(&self) -> bool {
536 /// Returns `true` if this `MaybeOwned` wraps a borrowed string
538 pub fn is_slice(&self) -> bool {
546 /// Trait for moving into a `MaybeOwned`
547 pub trait IntoMaybeOwned<'a> {
548 /// Moves self into a `MaybeOwned`
549 fn into_maybe_owned(self) -> MaybeOwned<'a>;
552 impl<'a> IntoMaybeOwned<'a> for String {
554 fn into_maybe_owned(self) -> MaybeOwned<'a> {
559 impl<'a> IntoMaybeOwned<'a> for &'a str {
561 fn into_maybe_owned(self) -> MaybeOwned<'a> { Slice(self) }
564 impl<'a> IntoMaybeOwned<'a> for MaybeOwned<'a> {
566 fn into_maybe_owned(self) -> MaybeOwned<'a> { self }
569 impl<'a> Eq for MaybeOwned<'a> {
571 fn eq(&self, other: &MaybeOwned) -> bool {
572 self.as_slice() == other.as_slice()
576 impl<'a> TotalEq for MaybeOwned<'a> {}
578 impl<'a> Ord for MaybeOwned<'a> {
580 fn lt(&self, other: &MaybeOwned) -> bool {
581 self.as_slice().lt(&other.as_slice())
585 impl<'a> TotalOrd for MaybeOwned<'a> {
587 fn cmp(&self, other: &MaybeOwned) -> Ordering {
588 self.as_slice().cmp(&other.as_slice())
592 impl<'a, S: Str> Equiv<S> for MaybeOwned<'a> {
594 fn equiv(&self, other: &S) -> bool {
595 self.as_slice() == other.as_slice()
599 impl<'a> Str for MaybeOwned<'a> {
601 fn as_slice<'b>(&'b self) -> &'b str {
604 Owned(ref s) => s.as_slice()
609 impl<'a> StrAllocating for MaybeOwned<'a> {
611 fn into_owned(self) -> String {
613 Slice(s) => s.to_string(),
619 impl<'a> Container for MaybeOwned<'a> {
621 fn len(&self) -> uint { self.as_slice().len() }
624 impl<'a> Clone for MaybeOwned<'a> {
626 fn clone(&self) -> MaybeOwned<'a> {
628 Slice(s) => Slice(s),
629 Owned(ref s) => Owned(s.to_string())
634 impl<'a> Default for MaybeOwned<'a> {
636 fn default() -> MaybeOwned<'a> { Slice("") }
639 impl<'a, H: Writer> ::hash::Hash<H> for MaybeOwned<'a> {
641 fn hash(&self, hasher: &mut H) {
643 Slice(s) => s.hash(hasher),
644 Owned(ref s) => s.as_slice().hash(hasher),
649 impl<'a> fmt::Show for MaybeOwned<'a> {
651 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
653 Slice(ref s) => s.fmt(f),
654 Owned(ref s) => s.fmt(f)
659 /// Unsafe operations
668 pub use core::str::raw::{from_utf8, c_str_to_static_slice, slice_bytes};
669 pub use core::str::raw::{slice_unchecked};
671 /// Create a Rust string from a *u8 buffer of the given length
672 pub unsafe fn from_buf_len(buf: *u8, len: uint) -> String {
673 let mut result = String::new();
674 result.push_bytes(mem::transmute(Slice {
681 /// Create a Rust string from a null-terminated C string
682 pub unsafe fn from_c_str(c_string: *libc::c_char) -> String {
683 let mut buf = String::new();
684 buf.push_bytes(CString::new(c_string, false).as_bytes_no_nul());
688 /// Converts an owned vector of bytes to a new owned string. This assumes
689 /// that the utf-8-ness of the vector has already been validated
691 pub unsafe fn from_utf8_owned(v: Vec<u8>) -> String {
695 /// Converts a byte to a string.
696 pub unsafe fn from_byte(u: u8) -> String {
697 from_utf8_owned(vec![u])
700 /// Sets the length of a string
702 /// This will explicitly set the size of the string, without actually
703 /// modifying its buffers, so it is up to the caller to ensure that
704 /// the string is actually the specified size.
706 fn test_from_buf_len() {
707 use slice::ImmutableVector;
708 use str::StrAllocating;
711 let a = ~[65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 65u8, 0u8];
713 let c = from_buf_len(b, 3u);
714 assert_eq!(c, "AAA".to_string());
720 Section: Trait implementations
723 /// Any string that can be represented as a slice
724 pub trait StrAllocating: Str {
725 /// Convert `self` into a `String`, not making a copy if possible.
726 fn into_owned(self) -> String;
728 /// Convert `self` into a `String`.
730 fn to_string(&self) -> String {
731 String::from_str(self.as_slice())
734 /// Convert `self` into a `String`, not making a copy if possible.
736 fn into_string(self) -> String {
740 /// Escape each char in `s` with `char::escape_default`.
741 fn escape_default(&self) -> String {
742 let me = self.as_slice();
743 let mut out = String::with_capacity(me.len());
744 for c in me.chars() {
745 c.escape_default(|c| out.push_char(c));
750 /// Escape each char in `s` with `char::escape_unicode`.
751 fn escape_unicode(&self) -> String {
752 let me = self.as_slice();
753 let mut out = String::with_capacity(me.len());
754 for c in me.chars() {
755 c.escape_unicode(|c| out.push_char(c));
760 /// Replace all occurrences of one string with another.
764 /// * `from` - The string to replace
765 /// * `to` - The replacement string
769 /// The original string with all occurrences of `from` replaced with `to`.
774 /// let s = "Do you know the muffin man,
775 /// The muffin man, the muffin man, ...".to_string();
777 /// assert_eq!(s.replace("muffin man", "little lamb"),
778 /// "Do you know the little lamb,
779 /// The little lamb, the little lamb, ...".to_string());
781 /// // not found, so no change.
782 /// assert_eq!(s.replace("cookie monster", "little lamb"), s);
784 fn replace(&self, from: &str, to: &str) -> String {
785 let me = self.as_slice();
786 let mut result = String::new();
787 let mut last_end = 0;
788 for (start, end) in me.match_indices(from) {
789 result.push_str(unsafe{raw::slice_bytes(me, last_end, start)});
793 result.push_str(unsafe{raw::slice_bytes(me, last_end, me.len())});
797 #[allow(missing_doc)]
798 #[deprecated = "obsolete, use `to_string`"]
800 fn to_owned(&self) -> String {
804 ::mem::transmute(Vec::from_slice(self.as_slice().as_bytes()))
808 /// Converts to a vector of `u16` encoded as UTF-16.
809 fn to_utf16(&self) -> Vec<u16> {
810 let me = self.as_slice();
811 let mut u = Vec::new();
812 for ch in me.chars() {
813 let mut buf = [0u16, ..2];
814 let n = ch.encode_utf16(buf /* as mut slice! */);
815 u.push_all(buf.slice_to(n));
820 /// Given a string, make a new string with repeated copies of it.
821 fn repeat(&self, nn: uint) -> String {
822 let me = self.as_slice();
823 let mut ret = String::with_capacity(nn * me.len());
824 for _ in range(0, nn) {
830 /// Levenshtein Distance between two strings.
831 fn lev_distance(&self, t: &str) -> uint {
832 let me = self.as_slice();
836 if slen == 0 { return tlen; }
837 if tlen == 0 { return slen; }
839 let mut dcol = Vec::from_fn(tlen + 1, |x| x);
841 for (i, sc) in me.chars().enumerate() {
844 *dcol.get_mut(0) = current + 1;
846 for (j, tc) in t.chars().enumerate() {
848 let next = *dcol.get(j + 1);
851 *dcol.get_mut(j + 1) = current;
853 *dcol.get_mut(j + 1) = ::cmp::min(current, next);
854 *dcol.get_mut(j + 1) = ::cmp::min(*dcol.get(j + 1),
862 return *dcol.get(tlen);
865 /// An Iterator over the string in Unicode Normalization Form D
866 /// (canonical decomposition).
868 fn nfd_chars<'a>(&'a self) -> Decompositions<'a> {
870 iter: self.as_slice().chars(),
877 /// An Iterator over the string in Unicode Normalization Form KD
878 /// (compatibility decomposition).
880 fn nfkd_chars<'a>(&'a self) -> Decompositions<'a> {
882 iter: self.as_slice().chars(),
890 impl<'a> StrAllocating for &'a str {
892 fn into_owned(self) -> String {
897 /// Methods for owned strings
899 /// Consumes the string, returning the underlying byte buffer.
901 /// The buffer does not have a null terminator.
902 fn into_bytes(self) -> Vec<u8>;
904 /// Pushes the given string onto this string, returning the concatenation of the two strings.
905 fn append(self, rhs: &str) -> String;
908 impl OwnedStr for String {
910 fn into_bytes(self) -> Vec<u8> {
911 unsafe { mem::transmute(self) }
915 fn append(mut self, rhs: &str) -> String {
923 use iter::AdditiveIterator;
924 use default::Default;
931 assert!((eq_slice("foobar".slice(0, 3), "foo")));
932 assert!((eq_slice("barfoo".slice(3, 6), "foo")));
933 assert!((!eq_slice("foo1", "foo2")));
939 assert!("" <= "foo");
940 assert!("foo" <= "foo");
941 assert!("foo" != "bar");
946 assert_eq!("".len(), 0u);
947 assert_eq!("hello world".len(), 11u);
948 assert_eq!("\x63".len(), 1u);
949 assert_eq!("\xa2".len(), 2u);
950 assert_eq!("\u03c0".len(), 2u);
951 assert_eq!("\u2620".len(), 3u);
952 assert_eq!("\U0001d11e".len(), 4u);
954 assert_eq!("".char_len(), 0u);
955 assert_eq!("hello world".char_len(), 11u);
956 assert_eq!("\x63".char_len(), 1u);
957 assert_eq!("\xa2".char_len(), 1u);
958 assert_eq!("\u03c0".char_len(), 1u);
959 assert_eq!("\u2620".char_len(), 1u);
960 assert_eq!("\U0001d11e".char_len(), 1u);
961 assert_eq!("ประเทศไทย中华Việt Nam".char_len(), 19u);
966 assert_eq!("hello".find('l'), Some(2u));
967 assert_eq!("hello".find(|c:char| c == 'o'), Some(4u));
968 assert!("hello".find('x').is_none());
969 assert!("hello".find(|c:char| c == 'x').is_none());
970 assert_eq!("ประเทศไทย中华Việt Nam".find('华'), Some(30u));
971 assert_eq!("ประเทศไทย中华Việt Nam".find(|c: char| c == '华'), Some(30u));
976 assert_eq!("hello".rfind('l'), Some(3u));
977 assert_eq!("hello".rfind(|c:char| c == 'o'), Some(4u));
978 assert!("hello".rfind('x').is_none());
979 assert!("hello".rfind(|c:char| c == 'x').is_none());
980 assert_eq!("ประเทศไทย中华Việt Nam".rfind('华'), Some(30u));
981 assert_eq!("ประเทศไทย中华Việt Nam".rfind(|c: char| c == '华'), Some(30u));
986 let empty = "".to_string();
987 let s: String = empty.as_slice().chars().collect();
988 assert_eq!(empty, s);
989 let data = "ประเทศไทย中".to_string();
990 let s: String = data.as_slice().chars().collect();
995 fn test_into_bytes() {
996 let data = "asdf".to_string();
997 let buf = data.into_bytes();
998 assert_eq!(bytes!("asdf"), buf.as_slice());
1002 fn test_find_str() {
1004 assert_eq!("".find_str(""), Some(0u));
1005 assert!("banana".find_str("apple pie").is_none());
1007 let data = "abcabc";
1008 assert_eq!(data.slice(0u, 6u).find_str("ab"), Some(0u));
1009 assert_eq!(data.slice(2u, 6u).find_str("ab"), Some(3u - 2u));
1010 assert!(data.slice(2u, 4u).find_str("ab").is_none());
1012 let string = "ประเทศไทย中华Việt Nam";
1013 let mut data = string.to_string();
1014 data.push_str(string);
1015 assert!(data.as_slice().find_str("ไท华").is_none());
1016 assert_eq!(data.as_slice().slice(0u, 43u).find_str(""), Some(0u));
1017 assert_eq!(data.as_slice().slice(6u, 43u).find_str(""), Some(6u - 6u));
1019 assert_eq!(data.as_slice().slice(0u, 43u).find_str("ประ"), Some( 0u));
1020 assert_eq!(data.as_slice().slice(0u, 43u).find_str("ทศไ"), Some(12u));
1021 assert_eq!(data.as_slice().slice(0u, 43u).find_str("ย中"), Some(24u));
1022 assert_eq!(data.as_slice().slice(0u, 43u).find_str("iệt"), Some(34u));
1023 assert_eq!(data.as_slice().slice(0u, 43u).find_str("Nam"), Some(40u));
1025 assert_eq!(data.as_slice().slice(43u, 86u).find_str("ประ"), Some(43u - 43u));
1026 assert_eq!(data.as_slice().slice(43u, 86u).find_str("ทศไ"), Some(55u - 43u));
1027 assert_eq!(data.as_slice().slice(43u, 86u).find_str("ย中"), Some(67u - 43u));
1028 assert_eq!(data.as_slice().slice(43u, 86u).find_str("iệt"), Some(77u - 43u));
1029 assert_eq!(data.as_slice().slice(43u, 86u).find_str("Nam"), Some(83u - 43u));
1033 fn test_slice_chars() {
1034 fn t(a: &str, b: &str, start: uint) {
1035 assert_eq!(a.slice_chars(start, start + b.char_len()), b);
1038 t("hello", "llo", 2);
1039 t("hello", "el", 1);
1042 assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".slice_chars(2, 8));
1047 fn t(v: &[String], s: &str) {
1048 assert_eq!(v.concat(), s.to_str().into_owned());
1050 t(["you".to_string(), "know".to_string(), "I'm".to_string(),
1051 "no".to_string(), "good".to_string()], "youknowI'mnogood");
1052 let v: &[String] = [];
1054 t(["hi".to_string()], "hi");
1059 fn t(v: &[String], sep: &str, s: &str) {
1060 assert_eq!(v.connect(sep), s.to_str().into_owned());
1062 t(["you".to_string(), "know".to_string(), "I'm".to_string(),
1063 "no".to_string(), "good".to_string()],
1064 " ", "you know I'm no good");
1065 let v: &[String] = [];
1067 t(["hi".to_string()], " ", "hi");
1071 fn test_concat_slices() {
1072 fn t(v: &[&str], s: &str) {
1073 assert_eq!(v.concat(), s.to_str().into_owned());
1075 t(["you", "know", "I'm", "no", "good"], "youknowI'mnogood");
1076 let v: &[&str] = [];
1082 fn test_connect_slices() {
1083 fn t(v: &[&str], sep: &str, s: &str) {
1084 assert_eq!(v.connect(sep), s.to_str().into_owned());
1086 t(["you", "know", "I'm", "no", "good"],
1087 " ", "you know I'm no good");
1089 t(["hi"], " ", "hi");
1094 assert_eq!("x".repeat(4), "xxxx".to_string());
1095 assert_eq!("hi".repeat(4), "hihihihi".to_string());
1096 assert_eq!("ไท华".repeat(3), "ไท华ไท华ไท华".to_string());
1097 assert_eq!("".repeat(4), "".to_string());
1098 assert_eq!("hi".repeat(0), "".to_string());
1102 fn test_unsafe_slice() {
1103 assert_eq!("ab", unsafe {raw::slice_bytes("abc", 0, 2)});
1104 assert_eq!("bc", unsafe {raw::slice_bytes("abc", 1, 3)});
1105 assert_eq!("", unsafe {raw::slice_bytes("abc", 1, 1)});
1106 fn a_million_letter_a() -> String {
1108 let mut rs = String::new();
1110 rs.push_str("aaaaaaaaaa");
1115 fn half_a_million_letter_a() -> String {
1117 let mut rs = String::new();
1119 rs.push_str("aaaaa");
1124 let letters = a_million_letter_a();
1125 assert!(half_a_million_letter_a() ==
1126 unsafe {raw::slice_bytes(letters.as_slice(),
1128 500000)}.to_owned());
1132 fn test_starts_with() {
1133 assert!(("".starts_with("")));
1134 assert!(("abc".starts_with("")));
1135 assert!(("abc".starts_with("a")));
1136 assert!((!"a".starts_with("abc")));
1137 assert!((!"".starts_with("abc")));
1138 assert!((!"ödd".starts_with("-")));
1139 assert!(("ödd".starts_with("öd")));
1143 fn test_ends_with() {
1144 assert!(("".ends_with("")));
1145 assert!(("abc".ends_with("")));
1146 assert!(("abc".ends_with("c")));
1147 assert!((!"a".ends_with("abc")));
1148 assert!((!"".ends_with("abc")));
1149 assert!((!"ddö".ends_with("-")));
1150 assert!(("ddö".ends_with("dö")));
1154 fn test_is_empty() {
1155 assert!("".is_empty());
1156 assert!(!"a".is_empty());
1162 assert_eq!("".replace(a, "b"), "".to_string());
1163 assert_eq!("a".replace(a, "b"), "b".to_string());
1164 assert_eq!("ab".replace(a, "b"), "bb".to_string());
1166 assert!(" test test ".replace(test, "toast") ==
1167 " toast toast ".to_string());
1168 assert_eq!(" test test ".replace(test, ""), " ".to_string());
1172 fn test_replace_2a() {
1173 let data = "ประเทศไทย中华";
1174 let repl = "دولة الكويت";
1177 let a2 = "دولة الكويتทศไทย中华";
1178 assert_eq!(data.replace(a, repl).as_slice(), a2);
1182 fn test_replace_2b() {
1183 let data = "ประเทศไทย中华";
1184 let repl = "دولة الكويت";
1187 let b2 = "ปรدولة الكويتทศไทย中华";
1188 assert_eq!(data.replace(b, repl).as_slice(), b2);
1192 fn test_replace_2c() {
1193 let data = "ประเทศไทย中华";
1194 let repl = "دولة الكويت";
1197 let c2 = "ประเทศไทยدولة الكويت";
1198 assert_eq!(data.replace(c, repl).as_slice(), c2);
1202 fn test_replace_2d() {
1203 let data = "ประเทศไทย中华";
1204 let repl = "دولة الكويت";
1207 assert_eq!(data.replace(d, repl).as_slice(), data);
1212 assert_eq!("ab", "abc".slice(0, 2));
1213 assert_eq!("bc", "abc".slice(1, 3));
1214 assert_eq!("", "abc".slice(1, 1));
1215 assert_eq!("\u65e5", "\u65e5\u672c".slice(0, 3));
1217 let data = "ประเทศไทย中华";
1218 assert_eq!("ป", data.slice(0, 3));
1219 assert_eq!("ร", data.slice(3, 6));
1220 assert_eq!("", data.slice(3, 3));
1221 assert_eq!("华", data.slice(30, 33));
1223 fn a_million_letter_X() -> String {
1225 let mut rs = String::new();
1227 rs.push_str("华华华华华华华华华华");
1232 fn half_a_million_letter_X() -> String {
1234 let mut rs = String::new();
1236 rs.push_str("华华华华华");
1241 let letters = a_million_letter_X();
1242 assert!(half_a_million_letter_X() ==
1243 letters.as_slice().slice(0u, 3u * 500000u).to_owned());
1248 let ss = "中华Việt Nam";
1250 assert_eq!("华", ss.slice(3u, 6u));
1251 assert_eq!("Việt Nam", ss.slice(6u, 16u));
1253 assert_eq!("ab", "abc".slice(0u, 2u));
1254 assert_eq!("bc", "abc".slice(1u, 3u));
1255 assert_eq!("", "abc".slice(1u, 1u));
1257 assert_eq!("中", ss.slice(0u, 3u));
1258 assert_eq!("华V", ss.slice(3u, 7u));
1259 assert_eq!("", ss.slice(3u, 3u));
1274 fn test_slice_fail() {
1275 "中华Việt Nam".slice(0u, 2u);
1279 fn test_slice_from() {
1280 assert_eq!("abcd".slice_from(0), "abcd");
1281 assert_eq!("abcd".slice_from(2), "cd");
1282 assert_eq!("abcd".slice_from(4), "");
1285 fn test_slice_to() {
1286 assert_eq!("abcd".slice_to(0), "");
1287 assert_eq!("abcd".slice_to(2), "ab");
1288 assert_eq!("abcd".slice_to(4), "abcd");
1292 fn test_trim_left_chars() {
1293 let v: &[char] = &[];
1294 assert_eq!(" *** foo *** ".trim_left_chars(v), " *** foo *** ");
1295 assert_eq!(" *** foo *** ".trim_left_chars(&['*', ' ']), "foo *** ");
1296 assert_eq!(" *** *** ".trim_left_chars(&['*', ' ']), "");
1297 assert_eq!("foo *** ".trim_left_chars(&['*', ' ']), "foo *** ");
1299 assert_eq!("11foo1bar11".trim_left_chars('1'), "foo1bar11");
1300 assert_eq!("12foo1bar12".trim_left_chars(&['1', '2']), "foo1bar12");
1301 assert_eq!("123foo1bar123".trim_left_chars(|c: char| c.is_digit()), "foo1bar123");
1305 fn test_trim_right_chars() {
1306 let v: &[char] = &[];
1307 assert_eq!(" *** foo *** ".trim_right_chars(v), " *** foo *** ");
1308 assert_eq!(" *** foo *** ".trim_right_chars(&['*', ' ']), " *** foo");
1309 assert_eq!(" *** *** ".trim_right_chars(&['*', ' ']), "");
1310 assert_eq!(" *** foo".trim_right_chars(&['*', ' ']), " *** foo");
1312 assert_eq!("11foo1bar11".trim_right_chars('1'), "11foo1bar");
1313 assert_eq!("12foo1bar12".trim_right_chars(&['1', '2']), "12foo1bar");
1314 assert_eq!("123foo1bar123".trim_right_chars(|c: char| c.is_digit()), "123foo1bar");
1318 fn test_trim_chars() {
1319 let v: &[char] = &[];
1320 assert_eq!(" *** foo *** ".trim_chars(v), " *** foo *** ");
1321 assert_eq!(" *** foo *** ".trim_chars(&['*', ' ']), "foo");
1322 assert_eq!(" *** *** ".trim_chars(&['*', ' ']), "");
1323 assert_eq!("foo".trim_chars(&['*', ' ']), "foo");
1325 assert_eq!("11foo1bar11".trim_chars('1'), "foo1bar");
1326 assert_eq!("12foo1bar12".trim_chars(&['1', '2']), "foo1bar");
1327 assert_eq!("123foo1bar123".trim_chars(|c: char| c.is_digit()), "foo1bar");
1331 fn test_trim_left() {
1332 assert_eq!("".trim_left(), "");
1333 assert_eq!("a".trim_left(), "a");
1334 assert_eq!(" ".trim_left(), "");
1335 assert_eq!(" blah".trim_left(), "blah");
1336 assert_eq!(" \u3000 wut".trim_left(), "wut");
1337 assert_eq!("hey ".trim_left(), "hey ");
1341 fn test_trim_right() {
1342 assert_eq!("".trim_right(), "");
1343 assert_eq!("a".trim_right(), "a");
1344 assert_eq!(" ".trim_right(), "");
1345 assert_eq!("blah ".trim_right(), "blah");
1346 assert_eq!("wut \u3000 ".trim_right(), "wut");
1347 assert_eq!(" hey".trim_right(), " hey");
1352 assert_eq!("".trim(), "");
1353 assert_eq!("a".trim(), "a");
1354 assert_eq!(" ".trim(), "");
1355 assert_eq!(" blah ".trim(), "blah");
1356 assert_eq!("\nwut \u3000 ".trim(), "wut");
1357 assert_eq!(" hey dude ".trim(), "hey dude");
1361 fn test_is_whitespace() {
1362 assert!("".is_whitespace());
1363 assert!(" ".is_whitespace());
1364 assert!("\u2009".is_whitespace()); // Thin space
1365 assert!(" \n\t ".is_whitespace());
1366 assert!(!" _ ".is_whitespace());
1370 fn test_slice_shift_char() {
1371 let data = "ประเทศไทย中";
1372 assert_eq!(data.slice_shift_char(), (Some('ป'), "ระเทศไทย中"));
1376 fn test_slice_shift_char_2() {
1378 assert_eq!(empty.slice_shift_char(), (None, ""));
1383 // deny overlong encodings
1384 assert!(!is_utf8([0xc0, 0x80]));
1385 assert!(!is_utf8([0xc0, 0xae]));
1386 assert!(!is_utf8([0xe0, 0x80, 0x80]));
1387 assert!(!is_utf8([0xe0, 0x80, 0xaf]));
1388 assert!(!is_utf8([0xe0, 0x81, 0x81]));
1389 assert!(!is_utf8([0xf0, 0x82, 0x82, 0xac]));
1390 assert!(!is_utf8([0xf4, 0x90, 0x80, 0x80]));
1393 assert!(!is_utf8([0xED, 0xA0, 0x80]));
1394 assert!(!is_utf8([0xED, 0xBF, 0xBF]));
1396 assert!(is_utf8([0xC2, 0x80]));
1397 assert!(is_utf8([0xDF, 0xBF]));
1398 assert!(is_utf8([0xE0, 0xA0, 0x80]));
1399 assert!(is_utf8([0xED, 0x9F, 0xBF]));
1400 assert!(is_utf8([0xEE, 0x80, 0x80]));
1401 assert!(is_utf8([0xEF, 0xBF, 0xBF]));
1402 assert!(is_utf8([0xF0, 0x90, 0x80, 0x80]));
1403 assert!(is_utf8([0xF4, 0x8F, 0xBF, 0xBF]));
1407 fn test_is_utf16() {
1408 macro_rules! pos ( ($($e:expr),*) => { { $(assert!(is_utf16($e));)* } });
1416 // surrogate pairs (randomly generated with Python 3's
1417 // .encode('utf-16be'))
1418 pos!([0xdb54, 0xdf16, 0xd880, 0xdee0, 0xdb6a, 0xdd45],
1419 [0xd91f, 0xdeb1, 0xdb31, 0xdd84, 0xd8e2, 0xde14],
1420 [0xdb9f, 0xdc26, 0xdb6f, 0xde58, 0xd850, 0xdfae]);
1422 // mixtures (also random)
1423 pos!([0xd921, 0xdcc2, 0x002d, 0x004d, 0xdb32, 0xdf65],
1424 [0xdb45, 0xdd2d, 0x006a, 0xdacd, 0xddfe, 0x0006],
1425 [0x0067, 0xd8ff, 0xddb7, 0x000f, 0xd900, 0xdc80]);
1428 macro_rules! neg ( ($($e:expr),*) => { { $(assert!(!is_utf16($e));)* } });
1431 // surrogate + regular unit
1433 // surrogate + lead surrogate
1435 // unterminated surrogate
1437 // trail surrogate without a lead
1440 // random byte sequences that Python 3's .decode('utf-16be')
1442 neg!([0x5b3d, 0x0141, 0xde9e, 0x8fdc, 0xc6e7],
1443 [0xdf5a, 0x82a5, 0x62b9, 0xb447, 0x92f3],
1444 [0xda4e, 0x42bc, 0x4462, 0xee98, 0xc2ca],
1445 [0xbe00, 0xb04a, 0x6ecb, 0xdd89, 0xe278],
1446 [0x0465, 0xab56, 0xdbb6, 0xa893, 0x665e],
1447 [0x6b7f, 0x0a19, 0x40f4, 0xa657, 0xdcc5],
1448 [0x9b50, 0xda5e, 0x24ec, 0x03ad, 0x6dee],
1449 [0x8d17, 0xcaa7, 0xf4ae, 0xdf6e, 0xbed7],
1450 [0xdaee, 0x2584, 0x7d30, 0xa626, 0x121a],
1451 [0xd956, 0x4b43, 0x7570, 0xccd6, 0x4f4a],
1452 [0x9dcf, 0x1b49, 0x4ba5, 0xfce9, 0xdffe],
1453 [0x6572, 0xce53, 0xb05a, 0xf6af, 0xdacf],
1454 [0x1b90, 0x728c, 0x9906, 0xdb68, 0xf46e],
1455 [0x1606, 0xbeca, 0xbe76, 0x860f, 0xdfa5],
1456 [0x8b4f, 0xde7a, 0xd220, 0x9fac, 0x2b6f],
1457 [0xb8fe, 0xebbe, 0xda32, 0x1a5f, 0x8b8b],
1458 [0x934b, 0x8956, 0xc434, 0x1881, 0xddf7],
1459 [0x5a95, 0x13fc, 0xf116, 0xd89b, 0x93f9],
1460 [0xd640, 0x71f1, 0xdd7d, 0x77eb, 0x1cd8],
1461 [0x348b, 0xaef0, 0xdb2c, 0xebf1, 0x1282],
1462 [0x50d7, 0xd824, 0x5010, 0xb369, 0x22ea]);
1466 fn test_raw_from_c_str() {
1468 let a = box [65, 65, 65, 65, 65, 65, 65, 0];
1470 let c = raw::from_c_str(b);
1471 assert_eq!(c, "AAAAAAA".to_string());
1476 fn test_as_bytes() {
1479 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1480 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1483 assert_eq!("".as_bytes(), &[]);
1484 assert_eq!("abc".as_bytes(), &['a' as u8, 'b' as u8, 'c' as u8]);
1485 assert_eq!("ศไทย中华Việt Nam".as_bytes(), v.as_slice());
1490 fn test_as_bytes_fail() {
1491 // Don't double free. (I'm not sure if this exercises the
1492 // original problem code path anymore.)
1493 let s = "".to_string();
1494 let _bytes = s.as_bytes();
1500 let buf = "hello".as_ptr();
1502 assert_eq!(*buf.offset(0), 'h' as u8);
1503 assert_eq!(*buf.offset(1), 'e' as u8);
1504 assert_eq!(*buf.offset(2), 'l' as u8);
1505 assert_eq!(*buf.offset(3), 'l' as u8);
1506 assert_eq!(*buf.offset(4), 'o' as u8);
1511 fn test_subslice_offset() {
1512 let a = "kernelsprite";
1513 let b = a.slice(7, a.len());
1514 let c = a.slice(0, a.len() - 6);
1515 assert_eq!(a.subslice_offset(b), 7);
1516 assert_eq!(a.subslice_offset(c), 0);
1518 let string = "a\nb\nc";
1519 let lines: Vec<&str> = string.lines().collect();
1520 let lines = lines.as_slice();
1521 assert_eq!(string.subslice_offset(lines[0]), 0);
1522 assert_eq!(string.subslice_offset(lines[1]), 2);
1523 assert_eq!(string.subslice_offset(lines[2]), 4);
1528 fn test_subslice_offset_2() {
1529 let a = "alchemiter";
1530 let b = "cruxtruder";
1531 a.subslice_offset(b);
1535 fn vec_str_conversions() {
1536 let s1: String = "All mimsy were the borogoves".to_string();
1538 let v: Vec<u8> = Vec::from_slice(s1.as_bytes());
1539 let s2: String = from_utf8(v.as_slice()).unwrap().to_string();
1540 let mut i: uint = 0u;
1541 let n1: uint = s1.len();
1542 let n2: uint = v.len();
1545 let a: u8 = s1.as_slice()[i];
1546 let b: u8 = s2.as_slice()[i];
1555 fn test_contains() {
1556 assert!("abcde".contains("bcd"));
1557 assert!("abcde".contains("abcd"));
1558 assert!("abcde".contains("bcde"));
1559 assert!("abcde".contains(""));
1560 assert!("".contains(""));
1561 assert!(!"abcde".contains("def"));
1562 assert!(!"".contains("a"));
1564 let data = "ประเทศไทย中华Việt Nam";
1565 assert!(data.contains("ประเ"));
1566 assert!(data.contains("ะเ"));
1567 assert!(data.contains("中华"));
1568 assert!(!data.contains("ไท华"));
1572 fn test_contains_char() {
1573 assert!("abc".contains_char('b'));
1574 assert!("a".contains_char('a'));
1575 assert!(!"abc".contains_char('d'));
1576 assert!(!"".contains_char('a'));
1582 [("𐍅𐌿𐌻𐍆𐌹𐌻𐌰\n".to_string(),
1583 vec![0xd800_u16, 0xdf45_u16, 0xd800_u16, 0xdf3f_u16,
1584 0xd800_u16, 0xdf3b_u16, 0xd800_u16, 0xdf46_u16,
1585 0xd800_u16, 0xdf39_u16, 0xd800_u16, 0xdf3b_u16,
1586 0xd800_u16, 0xdf30_u16, 0x000a_u16]),
1588 ("𐐒𐑉𐐮𐑀𐐲𐑋 𐐏𐐲𐑍\n".to_string(),
1589 vec![0xd801_u16, 0xdc12_u16, 0xd801_u16,
1590 0xdc49_u16, 0xd801_u16, 0xdc2e_u16, 0xd801_u16,
1591 0xdc40_u16, 0xd801_u16, 0xdc32_u16, 0xd801_u16,
1592 0xdc4b_u16, 0x0020_u16, 0xd801_u16, 0xdc0f_u16,
1593 0xd801_u16, 0xdc32_u16, 0xd801_u16, 0xdc4d_u16,
1596 ("𐌀𐌖𐌋𐌄𐌑𐌉·𐌌𐌄𐌕𐌄𐌋𐌉𐌑\n".to_string(),
1597 vec![0xd800_u16, 0xdf00_u16, 0xd800_u16, 0xdf16_u16,
1598 0xd800_u16, 0xdf0b_u16, 0xd800_u16, 0xdf04_u16,
1599 0xd800_u16, 0xdf11_u16, 0xd800_u16, 0xdf09_u16,
1600 0x00b7_u16, 0xd800_u16, 0xdf0c_u16, 0xd800_u16,
1601 0xdf04_u16, 0xd800_u16, 0xdf15_u16, 0xd800_u16,
1602 0xdf04_u16, 0xd800_u16, 0xdf0b_u16, 0xd800_u16,
1603 0xdf09_u16, 0xd800_u16, 0xdf11_u16, 0x000a_u16 ]),
1605 ("𐒋𐒘𐒈𐒑𐒛𐒒 𐒕𐒓 𐒈𐒚𐒍 𐒏𐒜𐒒𐒖𐒆 𐒕𐒆\n".to_string(),
1606 vec![0xd801_u16, 0xdc8b_u16, 0xd801_u16, 0xdc98_u16,
1607 0xd801_u16, 0xdc88_u16, 0xd801_u16, 0xdc91_u16,
1608 0xd801_u16, 0xdc9b_u16, 0xd801_u16, 0xdc92_u16,
1609 0x0020_u16, 0xd801_u16, 0xdc95_u16, 0xd801_u16,
1610 0xdc93_u16, 0x0020_u16, 0xd801_u16, 0xdc88_u16,
1611 0xd801_u16, 0xdc9a_u16, 0xd801_u16, 0xdc8d_u16,
1612 0x0020_u16, 0xd801_u16, 0xdc8f_u16, 0xd801_u16,
1613 0xdc9c_u16, 0xd801_u16, 0xdc92_u16, 0xd801_u16,
1614 0xdc96_u16, 0xd801_u16, 0xdc86_u16, 0x0020_u16,
1615 0xd801_u16, 0xdc95_u16, 0xd801_u16, 0xdc86_u16,
1617 // Issue #12318, even-numbered non-BMP planes
1618 ("\U00020000".to_string(),
1619 vec![0xD840, 0xDC00])];
1621 for p in pairs.iter() {
1622 let (s, u) = (*p).clone();
1623 assert!(is_utf16(u.as_slice()));
1624 assert_eq!(s.to_utf16(), u);
1626 assert_eq!(from_utf16(u.as_slice()).unwrap(), s);
1627 assert_eq!(from_utf16_lossy(u.as_slice()), s);
1629 assert_eq!(from_utf16(s.to_utf16().as_slice()).unwrap(), s);
1630 assert_eq!(from_utf16(u.as_slice()).unwrap().to_utf16(), u);
1635 fn test_utf16_invalid() {
1636 // completely positive cases tested above.
1638 assert_eq!(from_utf16([0xD800]), None);
1640 assert_eq!(from_utf16([0xD800, 0xD800]), None);
1643 assert_eq!(from_utf16([0x0061, 0xDC00]), None);
1646 assert_eq!(from_utf16([0xD800, 0xd801, 0xdc8b, 0xD800]), None);
1650 fn test_utf16_lossy() {
1651 // completely positive cases tested above.
1653 assert_eq!(from_utf16_lossy([0xD800]), "\uFFFD".to_string());
1655 assert_eq!(from_utf16_lossy([0xD800, 0xD800]), "\uFFFD\uFFFD".to_string());
1658 assert_eq!(from_utf16_lossy([0x0061, 0xDC00]), "a\uFFFD".to_string());
1661 assert_eq!(from_utf16_lossy([0xD800, 0xd801, 0xdc8b, 0xD800]),
1662 "\uFFFD𐒋\uFFFD".to_string());
1666 fn test_truncate_utf16_at_nul() {
1668 assert_eq!(truncate_utf16_at_nul(v), &[]);
1671 assert_eq!(truncate_utf16_at_nul(v), &[]);
1674 assert_eq!(truncate_utf16_at_nul(v), &[1]);
1677 assert_eq!(truncate_utf16_at_nul(v), &[1, 2]);
1680 assert_eq!(truncate_utf16_at_nul(v), &[1, 2, 3]);
1685 let s = "ศไทย中华Việt Nam";
1686 let v = box ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1688 for ch in v.iter() {
1689 assert!(s.char_at(pos) == *ch);
1690 pos += from_char(*ch).len();
1695 fn test_char_at_reverse() {
1696 let s = "ศไทย中华Việt Nam";
1697 let v = box ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1698 let mut pos = s.len();
1699 for ch in v.iter().rev() {
1700 assert!(s.char_at_reverse(pos) == *ch);
1701 pos -= from_char(*ch).len();
1706 fn test_escape_unicode() {
1707 assert_eq!("abc".escape_unicode(), "\\x61\\x62\\x63".to_string());
1708 assert_eq!("a c".escape_unicode(), "\\x61\\x20\\x63".to_string());
1709 assert_eq!("\r\n\t".escape_unicode(), "\\x0d\\x0a\\x09".to_string());
1710 assert_eq!("'\"\\".escape_unicode(), "\\x27\\x22\\x5c".to_string());
1711 assert_eq!("\x00\x01\xfe\xff".escape_unicode(), "\\x00\\x01\\xfe\\xff".to_string());
1712 assert_eq!("\u0100\uffff".escape_unicode(), "\\u0100\\uffff".to_string());
1713 assert_eq!("\U00010000\U0010ffff".escape_unicode(), "\\U00010000\\U0010ffff".to_string());
1714 assert_eq!("ab\ufb00".escape_unicode(), "\\x61\\x62\\ufb00".to_string());
1715 assert_eq!("\U0001d4ea\r".escape_unicode(), "\\U0001d4ea\\x0d".to_string());
1719 fn test_escape_default() {
1720 assert_eq!("abc".escape_default(), "abc".to_string());
1721 assert_eq!("a c".escape_default(), "a c".to_string());
1722 assert_eq!("\r\n\t".escape_default(), "\\r\\n\\t".to_string());
1723 assert_eq!("'\"\\".escape_default(), "\\'\\\"\\\\".to_string());
1724 assert_eq!("\u0100\uffff".escape_default(), "\\u0100\\uffff".to_string());
1725 assert_eq!("\U00010000\U0010ffff".escape_default(), "\\U00010000\\U0010ffff".to_string());
1726 assert_eq!("ab\ufb00".escape_default(), "ab\\ufb00".to_string());
1727 assert_eq!("\U0001d4ea\r".escape_default(), "\\U0001d4ea\\r".to_string());
1731 fn test_total_ord() {
1732 "1234".cmp(&("123")) == Greater;
1733 "123".cmp(&("1234")) == Less;
1734 "1234".cmp(&("1234")) == Equal;
1735 "12345555".cmp(&("123456")) == Less;
1736 "22".cmp(&("1234")) == Greater;
1740 fn test_char_range_at() {
1741 let data = "b¢€𤭢𤭢€¢b";
1742 assert_eq!('b', data.char_range_at(0).ch);
1743 assert_eq!('¢', data.char_range_at(1).ch);
1744 assert_eq!('€', data.char_range_at(3).ch);
1745 assert_eq!('𤭢', data.char_range_at(6).ch);
1746 assert_eq!('𤭢', data.char_range_at(10).ch);
1747 assert_eq!('€', data.char_range_at(14).ch);
1748 assert_eq!('¢', data.char_range_at(17).ch);
1749 assert_eq!('b', data.char_range_at(19).ch);
1753 fn test_char_range_at_reverse_underflow() {
1754 assert_eq!("abc".char_range_at_reverse(0).next, 0);
1758 fn test_iterator() {
1760 let s = "ศไทย中华Việt Nam";
1761 let v = box ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1764 let mut it = s.chars();
1767 assert_eq!(c, v[pos]);
1770 assert_eq!(pos, v.len());
1774 fn test_rev_iterator() {
1776 let s = "ศไทย中华Việt Nam";
1777 let v = box ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
1780 let mut it = s.chars().rev();
1783 assert_eq!(c, v[pos]);
1786 assert_eq!(pos, v.len());
1790 fn test_iterator_clone() {
1791 let s = "ศไทย中华Việt Nam";
1792 let mut it = s.chars();
1794 assert!(it.zip(it.clone()).all(|(x,y)| x == y));
1798 fn test_bytesator() {
1799 let s = "ศไทย中华Việt Nam";
1801 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1802 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1807 for b in s.bytes() {
1808 assert_eq!(b, v[pos]);
1814 fn test_bytes_revator() {
1815 let s = "ศไทย中华Việt Nam";
1817 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1818 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1821 let mut pos = v.len();
1823 for b in s.bytes().rev() {
1825 assert_eq!(b, v[pos]);
1830 fn test_char_indicesator() {
1832 let s = "ศไทย中华Việt Nam";
1833 let p = [0, 3, 6, 9, 12, 15, 18, 19, 20, 23, 24, 25, 26, 27];
1834 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
1837 let mut it = s.char_indices();
1840 assert_eq!(c, (p[pos], v[pos]));
1843 assert_eq!(pos, v.len());
1844 assert_eq!(pos, p.len());
1848 fn test_char_indices_revator() {
1850 let s = "ศไทย中华Việt Nam";
1851 let p = [27, 26, 25, 24, 23, 20, 19, 18, 15, 12, 9, 6, 3, 0];
1852 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
1855 let mut it = s.char_indices().rev();
1858 assert_eq!(c, (p[pos], v[pos]));
1861 assert_eq!(pos, v.len());
1862 assert_eq!(pos, p.len());
1866 fn test_split_char_iterator() {
1867 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1869 let split: Vec<&str> = data.split(' ').collect();
1870 assert_eq!( split, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1872 let mut rsplit: Vec<&str> = data.split(' ').rev().collect();
1874 assert_eq!(rsplit, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1876 let split: Vec<&str> = data.split(|c: char| c == ' ').collect();
1877 assert_eq!( split, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1879 let mut rsplit: Vec<&str> = data.split(|c: char| c == ' ').rev().collect();
1881 assert_eq!(rsplit, vec!["\nMäry", "häd", "ä", "little", "lämb\nLittle", "lämb\n"]);
1884 let split: Vec<&str> = data.split('ä').collect();
1885 assert_eq!( split, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1887 let mut rsplit: Vec<&str> = data.split('ä').rev().collect();
1889 assert_eq!(rsplit, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1891 let split: Vec<&str> = data.split(|c: char| c == 'ä').collect();
1892 assert_eq!( split, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1894 let mut rsplit: Vec<&str> = data.split(|c: char| c == 'ä').rev().collect();
1896 assert_eq!(rsplit, vec!["\nM", "ry h", "d ", " little l", "mb\nLittle l", "mb\n"]);
1900 fn test_splitn_char_iterator() {
1901 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1903 let split: Vec<&str> = data.splitn(' ', 3).collect();
1904 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
1906 let split: Vec<&str> = data.splitn(|c: char| c == ' ', 3).collect();
1907 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
1910 let split: Vec<&str> = data.splitn('ä', 3).collect();
1911 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
1913 let split: Vec<&str> = data.splitn(|c: char| c == 'ä', 3).collect();
1914 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
1918 fn test_rsplitn_char_iterator() {
1919 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1921 let mut split: Vec<&str> = data.rsplitn(' ', 3).collect();
1923 assert_eq!(split, vec!["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]);
1925 let mut split: Vec<&str> = data.rsplitn(|c: char| c == ' ', 3).collect();
1927 assert_eq!(split, vec!["\nMäry häd ä", "little", "lämb\nLittle", "lämb\n"]);
1930 let mut split: Vec<&str> = data.rsplitn('ä', 3).collect();
1932 assert_eq!(split, vec!["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]);
1934 let mut split: Vec<&str> = data.rsplitn(|c: char| c == 'ä', 3).collect();
1936 assert_eq!(split, vec!["\nMäry häd ", " little l", "mb\nLittle l", "mb\n"]);
1940 fn test_split_char_iterator_no_trailing() {
1941 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1943 let split: Vec<&str> = data.split('\n').collect();
1944 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
1946 let split: Vec<&str> = data.split_terminator('\n').collect();
1947 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
1951 fn test_rev_split_char_iterator_no_trailing() {
1952 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
1954 let mut split: Vec<&str> = data.split('\n').rev().collect();
1956 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
1958 let mut split: Vec<&str> = data.split_terminator('\n').rev().collect();
1960 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
1965 let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n";
1966 let words: Vec<&str> = data.words().collect();
1967 assert_eq!(words, vec!["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
1971 fn test_nfd_chars() {
1972 assert_eq!("abc".nfd_chars().collect::<String>(), "abc".to_string());
1973 assert_eq!("\u1e0b\u01c4".nfd_chars().collect::<String>(), "d\u0307\u01c4".to_string());
1974 assert_eq!("\u2026".nfd_chars().collect::<String>(), "\u2026".to_string());
1975 assert_eq!("\u2126".nfd_chars().collect::<String>(), "\u03a9".to_string());
1976 assert_eq!("\u1e0b\u0323".nfd_chars().collect::<String>(), "d\u0323\u0307".to_string());
1977 assert_eq!("\u1e0d\u0307".nfd_chars().collect::<String>(), "d\u0323\u0307".to_string());
1978 assert_eq!("a\u0301".nfd_chars().collect::<String>(), "a\u0301".to_string());
1979 assert_eq!("\u0301a".nfd_chars().collect::<String>(), "\u0301a".to_string());
1980 assert_eq!("\ud4db".nfd_chars().collect::<String>(), "\u1111\u1171\u11b6".to_string());
1981 assert_eq!("\uac1c".nfd_chars().collect::<String>(), "\u1100\u1162".to_string());
1985 fn test_nfkd_chars() {
1986 assert_eq!("abc".nfkd_chars().collect::<String>(), "abc".to_string());
1987 assert_eq!("\u1e0b\u01c4".nfkd_chars().collect::<String>(), "d\u0307DZ\u030c".to_string());
1988 assert_eq!("\u2026".nfkd_chars().collect::<String>(), "...".to_string());
1989 assert_eq!("\u2126".nfkd_chars().collect::<String>(), "\u03a9".to_string());
1990 assert_eq!("\u1e0b\u0323".nfkd_chars().collect::<String>(), "d\u0323\u0307".to_string());
1991 assert_eq!("\u1e0d\u0307".nfkd_chars().collect::<String>(), "d\u0323\u0307".to_string());
1992 assert_eq!("a\u0301".nfkd_chars().collect::<String>(), "a\u0301".to_string());
1993 assert_eq!("\u0301a".nfkd_chars().collect::<String>(), "\u0301a".to_string());
1994 assert_eq!("\ud4db".nfkd_chars().collect::<String>(), "\u1111\u1171\u11b6".to_string());
1995 assert_eq!("\uac1c".nfkd_chars().collect::<String>(), "\u1100\u1162".to_string());
2000 let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
2001 let lines: Vec<&str> = data.lines().collect();
2002 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2004 let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
2005 let lines: Vec<&str> = data.lines().collect();
2006 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2010 fn test_split_strator() {
2011 fn t(s: &str, sep: &str, u: &[&str]) {
2012 let v: Vec<&str> = s.split_str(sep).collect();
2013 assert_eq!(v.as_slice(), u.as_slice());
2015 t("--1233345--", "12345", ["--1233345--"]);
2016 t("abc::hello::there", "::", ["abc", "hello", "there"]);
2017 t("::hello::there", "::", ["", "hello", "there"]);
2018 t("hello::there::", "::", ["hello", "there", ""]);
2019 t("::hello::there::", "::", ["", "hello", "there", ""]);
2020 t("ประเทศไทย中华Việt Nam", "中华", ["ประเทศไทย", "Việt Nam"]);
2021 t("zzXXXzzYYYzz", "zz", ["", "XXX", "YYY", ""]);
2022 t("zzXXXzYYYz", "XXX", ["zz", "zYYYz"]);
2023 t(".XXX.YYY.", ".", ["", "XXX", "YYY", ""]);
2025 t("zz", "zz", ["",""]);
2026 t("ok", "z", ["ok"]);
2027 t("zzz", "zz", ["","z"]);
2028 t("zzzzz", "zz", ["","","z"]);
2032 fn test_str_default() {
2033 use default::Default;
2034 fn t<S: Default + Str>() {
2035 let s: S = Default::default();
2036 assert_eq!(s.as_slice(), "");
2044 fn test_str_container() {
2045 fn sum_len<S: Container>(v: &[S]) -> uint {
2046 v.iter().map(|x| x.len()).sum()
2049 let s = "01234".to_string();
2050 assert_eq!(5, sum_len(["012", "", "34"]));
2051 assert_eq!(5, sum_len(["01".to_string(), "2".to_string(),
2052 "34".to_string(), "".to_string()]));
2053 assert_eq!(5, sum_len([s.as_slice()]));
2057 fn test_str_from_utf8() {
2058 let xs = bytes!("hello");
2059 assert_eq!(from_utf8(xs), Some("hello"));
2061 let xs = bytes!("ศไทย中华Việt Nam");
2062 assert_eq!(from_utf8(xs), Some("ศไทย中华Việt Nam"));
2064 let xs = bytes!("hello", 0xff);
2065 assert_eq!(from_utf8(xs), None);
2069 fn test_str_from_utf8_owned() {
2070 let xs = Vec::from_slice(bytes!("hello"));
2071 assert_eq!(from_utf8_owned(xs), Ok("hello".to_string()));
2073 let xs = Vec::from_slice(bytes!("ศไทย中华Việt Nam"));
2074 assert_eq!(from_utf8_owned(xs), Ok("ศไทย中华Việt Nam".to_string()));
2076 let xs = Vec::from_slice(bytes!("hello", 0xff));
2077 assert_eq!(from_utf8_owned(xs),
2078 Err(Vec::from_slice(bytes!("hello", 0xff))));
2082 fn test_str_from_utf8_lossy() {
2083 let xs = bytes!("hello");
2084 assert_eq!(from_utf8_lossy(xs), Slice("hello"));
2086 let xs = bytes!("ศไทย中华Việt Nam");
2087 assert_eq!(from_utf8_lossy(xs), Slice("ศไทย中华Việt Nam"));
2089 let xs = bytes!("Hello", 0xC2, " There", 0xFF, " Goodbye");
2090 assert_eq!(from_utf8_lossy(xs), Owned("Hello\uFFFD There\uFFFD Goodbye".to_string()));
2092 let xs = bytes!("Hello", 0xC0, 0x80, " There", 0xE6, 0x83, " Goodbye");
2093 assert_eq!(from_utf8_lossy(xs), Owned("Hello\uFFFD\uFFFD There\uFFFD Goodbye".to_string()));
2095 let xs = bytes!(0xF5, "foo", 0xF5, 0x80, "bar");
2096 assert_eq!(from_utf8_lossy(xs), Owned("\uFFFDfoo\uFFFD\uFFFDbar".to_string()));
2098 let xs = bytes!(0xF1, "foo", 0xF1, 0x80, "bar", 0xF1, 0x80, 0x80, "baz");
2099 assert_eq!(from_utf8_lossy(xs), Owned("\uFFFDfoo\uFFFDbar\uFFFDbaz".to_string()));
2101 let xs = bytes!(0xF4, "foo", 0xF4, 0x80, "bar", 0xF4, 0xBF, "baz");
2102 assert_eq!(from_utf8_lossy(xs), Owned("\uFFFDfoo\uFFFDbar\uFFFD\uFFFDbaz".to_string()));
2104 let xs = bytes!(0xF0, 0x80, 0x80, 0x80, "foo", 0xF0, 0x90, 0x80, 0x80, "bar");
2105 assert_eq!(from_utf8_lossy(xs), Owned("\uFFFD\uFFFD\uFFFD\uFFFD\
2106 foo\U00010000bar".to_string()));
2109 let xs = bytes!(0xED, 0xA0, 0x80, "foo", 0xED, 0xBF, 0xBF, "bar");
2110 assert_eq!(from_utf8_lossy(xs), Owned("\uFFFD\uFFFD\uFFFDfoo\
2111 \uFFFD\uFFFD\uFFFDbar".to_string()));
2115 fn test_from_str() {
2116 let owned: Option<String> = from_str("string");
2117 assert_eq!(owned, Some("string".to_string()));
2121 fn test_maybe_owned_traits() {
2122 let s = Slice("abcde");
2123 assert_eq!(s.len(), 5);
2124 assert_eq!(s.as_slice(), "abcde");
2125 assert_eq!(s.to_str(), "abcde".to_string());
2126 assert_eq!(format_strbuf!("{}", s), "abcde".to_string());
2127 assert!(s.lt(&Owned("bcdef".to_string())));
2128 assert_eq!(Slice(""), Default::default());
2130 let o = Owned("abcde".to_string());
2131 assert_eq!(o.len(), 5);
2132 assert_eq!(o.as_slice(), "abcde");
2133 assert_eq!(o.to_str(), "abcde".to_string());
2134 assert_eq!(format_strbuf!("{}", o), "abcde".to_string());
2135 assert!(o.lt(&Slice("bcdef")));
2136 assert_eq!(Owned("".to_string()), Default::default());
2138 assert!(s.cmp(&o) == Equal);
2139 assert!(s.equiv(&o));
2141 assert!(o.cmp(&s) == Equal);
2142 assert!(o.equiv(&s));
2146 fn test_maybe_owned_methods() {
2147 let s = Slice("abcde");
2148 assert!(s.is_slice());
2149 assert!(!s.is_owned());
2151 let o = Owned("abcde".to_string());
2152 assert!(!o.is_slice());
2153 assert!(o.is_owned());
2157 fn test_maybe_owned_clone() {
2158 assert_eq!(Owned("abcde".to_string()), Slice("abcde").clone());
2159 assert_eq!(Owned("abcde".to_string()), Owned("abcde".to_string()).clone());
2160 assert_eq!(Slice("abcde"), Slice("abcde").clone());
2161 assert_eq!(Slice("abcde"), Owned("abcde".to_string()).clone());
2165 fn test_maybe_owned_into_owned() {
2166 assert_eq!(Slice("abcde").into_owned(), "abcde".to_string());
2167 assert_eq!(Owned("abcde".to_string()).into_owned(), "abcde".to_string());
2171 fn test_into_maybe_owned() {
2172 assert_eq!("abcde".into_maybe_owned(), Slice("abcde"));
2173 assert_eq!(("abcde".to_string()).into_maybe_owned(), Slice("abcde"));
2174 assert_eq!("abcde".into_maybe_owned(), Owned("abcde".to_string()));
2175 assert_eq!(("abcde".to_string()).into_maybe_owned(), Owned("abcde".to_string()));
2182 use self::test::Bencher;
2187 fn char_iterator(b: &mut Bencher) {
2188 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2189 let len = s.char_len();
2191 b.iter(|| assert_eq!(s.chars().len(), len));
2195 fn char_iterator_ascii(b: &mut Bencher) {
2196 let s = "Mary had a little lamb, Little lamb
2197 Mary had a little lamb, Little lamb
2198 Mary had a little lamb, Little lamb
2199 Mary had a little lamb, Little lamb
2200 Mary had a little lamb, Little lamb
2201 Mary had a little lamb, Little lamb";
2202 let len = s.char_len();
2204 b.iter(|| assert_eq!(s.chars().len(), len));
2208 fn char_iterator_rev(b: &mut Bencher) {
2209 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2210 let len = s.char_len();
2212 b.iter(|| assert_eq!(s.chars().rev().len(), len));
2216 fn char_indicesator(b: &mut Bencher) {
2217 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2218 let len = s.char_len();
2220 b.iter(|| assert_eq!(s.char_indices().len(), len));
2224 fn char_indicesator_rev(b: &mut Bencher) {
2225 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2226 let len = s.char_len();
2228 b.iter(|| assert_eq!(s.char_indices().rev().len(), len));
2232 fn split_unicode_ascii(b: &mut Bencher) {
2233 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2235 b.iter(|| assert_eq!(s.split('V').len(), 3));
2239 fn split_unicode_not_ascii(b: &mut Bencher) {
2240 struct NotAscii(char);
2241 impl CharEq for NotAscii {
2242 fn matches(&mut self, c: char) -> bool {
2243 let NotAscii(cc) = *self;
2246 fn only_ascii(&self) -> bool { false }
2248 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2250 b.iter(|| assert_eq!(s.split(NotAscii('V')).len(), 3));
2255 fn split_ascii(b: &mut Bencher) {
2256 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2257 let len = s.split(' ').len();
2259 b.iter(|| assert_eq!(s.split(' ').len(), len));
2263 fn split_not_ascii(b: &mut Bencher) {
2264 struct NotAscii(char);
2265 impl CharEq for NotAscii {
2267 fn matches(&mut self, c: char) -> bool {
2268 let NotAscii(cc) = *self;
2271 fn only_ascii(&self) -> bool { false }
2273 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2274 let len = s.split(' ').len();
2276 b.iter(|| assert_eq!(s.split(NotAscii(' ')).len(), len));
2280 fn split_extern_fn(b: &mut Bencher) {
2281 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2282 let len = s.split(' ').len();
2283 fn pred(c: char) -> bool { c == ' ' }
2285 b.iter(|| assert_eq!(s.split(pred).len(), len));
2289 fn split_closure(b: &mut Bencher) {
2290 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2291 let len = s.split(' ').len();
2293 b.iter(|| assert_eq!(s.split(|c: char| c == ' ').len(), len));
2297 fn split_slice(b: &mut Bencher) {
2298 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2299 let len = s.split(' ').len();
2301 b.iter(|| assert_eq!(s.split(&[' ']).len(), len));
2305 fn is_utf8_100_ascii(b: &mut Bencher) {
2307 let s = bytes!("Hello there, the quick brown fox jumped over the lazy dog! \
2308 Lorem ipsum dolor sit amet, consectetur. ");
2310 assert_eq!(100, s.len());
2317 fn is_utf8_100_multibyte(b: &mut Bencher) {
2318 let s = bytes!("𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰");
2319 assert_eq!(100, s.len());
2326 fn from_utf8_lossy_100_ascii(b: &mut Bencher) {
2327 let s = bytes!("Hello there, the quick brown fox jumped over the lazy dog! \
2328 Lorem ipsum dolor sit amet, consectetur. ");
2330 assert_eq!(100, s.len());
2332 let _ = from_utf8_lossy(s);
2337 fn from_utf8_lossy_100_multibyte(b: &mut Bencher) {
2338 let s = bytes!("𐌀𐌖𐌋𐌄𐌑𐌉ปรدولة الكويتทศไทย中华𐍅𐌿𐌻𐍆𐌹𐌻𐌰");
2339 assert_eq!(100, s.len());
2341 let _ = from_utf8_lossy(s);
2346 fn from_utf8_lossy_invalid(b: &mut Bencher) {
2347 let s = bytes!("Hello", 0xC0, 0x80, " There", 0xE6, 0x83, " Goodbye");
2349 let _ = from_utf8_lossy(s);
2354 fn from_utf8_lossy_100_invalid(b: &mut Bencher) {
2355 let s = Vec::from_elem(100, 0xF5u8);
2357 let _ = from_utf8_lossy(s.as_slice());
2362 fn bench_connect(b: &mut Bencher) {
2363 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2365 let v = [s, s, s, s, s, s, s, s, s, s];
2367 assert_eq!(v.connect(sep).len(), s.len() * 10 + sep.len() * 9);
2372 fn bench_contains_short_short(b: &mut Bencher) {
2373 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2377 assert!(haystack.contains(needle));
2382 fn bench_contains_short_long(b: &mut Bencher) {
2384 Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \
2385 ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \
2386 eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \
2387 sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \
2388 tempus vel, gravida nec quam.
2390 In est dui, tincidunt sed tempus interdum, adipiscing laoreet ante. Etiam tempor, tellus quis \
2391 sagittis interdum, nulla purus mattis sem, quis auctor erat odio ac tellus. In nec nunc sit amet \
2392 diam volutpat molestie at sed ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan \
2393 lorem ac dignissim placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat. In vel \
2394 eleifend felis. Sed suscipit nulla lorem, sed mollis est sollicitudin et. Nam fermentum egestas \
2395 interdum. Curabitur ut nisi justo.
2397 Sed sollicitudin ipsum tellus, ut condimentum leo eleifend nec. Cras ut velit ante. Phasellus nec \
2398 mollis odio. Mauris molestie erat in arcu mattis, at aliquet dolor vehicula. Quisque malesuada \
2399 lectus sit amet nisi pretium, a condimentum ipsum porta. Morbi at dapibus diam. Praesent egestas \
2400 est sed risus elementum, eu rutrum metus ultrices. Etiam fermentum consectetur magna, id rutrum \
2401 felis accumsan a. Aliquam ut pellentesque libero. Sed mi nulla, lobortis eu tortor id, suscipit \
2402 ultricies neque. Morbi iaculis sit amet risus at iaculis. Praesent eget ligula quis turpis \
2403 feugiat suscipit vel non arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. \
2404 Aliquam sit amet placerat lorem.
2406 Cras a lacus vel ante posuere elementum. Nunc est leo, bibendum ut facilisis vel, bibendum at \
2407 mauris. Nullam adipiscing diam vel odio ornare, luctus adipiscing mi luctus. Nulla facilisi. \
2408 Mauris adipiscing bibendum neque, quis adipiscing lectus tempus et. Sed feugiat erat et nisl \
2409 lobortis pharetra. Donec vitae erat enim. Nullam sit amet felis et quam lacinia tincidunt. Aliquam \
2410 suscipit dapibus urna. Sed volutpat urna in magna pulvinar volutpat. Phasellus nec tellus ac diam \
2413 Nam lectus enim, dapibus non nisi tempor, consectetur convallis massa. Maecenas eleifend dictum \
2414 feugiat. Etiam quis mauris vel risus luctus mattis a a nunc. Nullam orci quam, imperdiet id \
2415 vehicula in, porttitor ut nibh. Duis sagittis adipiscing nisl vitae congue. Donec mollis risus eu \
2416 leo suscipit, varius porttitor nulla porta. Pellentesque ut sem nec nisi euismod vehicula. Nulla \
2417 malesuada sollicitudin quam eu fermentum.";
2418 let needle = "english";
2421 assert!(!haystack.contains(needle));
2426 fn bench_contains_bad_naive(b: &mut Bencher) {
2427 let haystack = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
2428 let needle = "aaaaaaaab";
2431 assert!(!haystack.contains(needle));
2436 fn bench_contains_equal(b: &mut Bencher) {
2437 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2438 let needle = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2441 assert!(haystack.contains(needle));