1 // Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT
2 // file at the top-level directory of this distribution and at
3 // http://rust-lang.org/COPYRIGHT.
5 // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6 // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7 // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8 // option. This file may not be copied, modified, or distributed
9 // except according to those terms.
11 // ignore-lexer-test FIXME #15679
13 //! Unicode string manipulation (`str` type)
17 //! Rust's string type is one of the core primitive types of the language. While
18 //! represented by the name `str`, the name `str` is not actually a valid type in
19 //! Rust. Each string must also be decorated with a pointer. `String` is used
20 //! for an owned string, so there is only one commonly-used `str` type in Rust:
23 //! `&str` is the borrowed string type. This type of string can only be created
24 //! from other strings, unless it is a static string (see below). As the word
25 //! "borrowed" implies, this type of string is owned elsewhere, and this string
26 //! cannot be moved out of.
28 //! As an example, here's some code that uses a string.
32 //! let borrowed_string = "This string is borrowed with the 'static lifetime";
36 //! From the example above, you can guess that Rust's string literals have the
37 //! `'static` lifetime. This is akin to C's concept of a static string.
38 //! More precisely, string literals are immutable views with a 'static lifetime
39 //! (otherwise known as the lifetime of the entire program), and thus have the
40 //! type `&'static str`.
44 //! Rust's string type, `str`, is a sequence of Unicode scalar values encoded as a
45 //! stream of UTF-8 bytes. All [strings](../../reference.html#literals) are
46 //! guaranteed to be validly encoded UTF-8 sequences. Additionally, strings are
47 //! not null-terminated and can thus contain null bytes.
49 //! The actual representation of strings have direct mappings to slices: `&str`
50 //! is the same as `&[u8]`.
52 #![doc(primitive = "str")]
53 #![stable(feature = "rust1", since = "1.0.0")]
55 use self::RecompositionState::*;
56 use self::DecompositionType::*;
58 use core::borrow::{BorrowFrom, ToOwned};
59 use core::char::CharExt;
60 use core::clone::Clone;
61 use core::iter::AdditiveIterator;
62 use core::iter::{Iterator, IteratorExt};
64 use core::ops::RangeFull;
65 use core::option::Option::{self, Some, None};
66 use core::result::Result;
67 use core::slice::AsSlice;
68 use core::str as core_str;
69 use unicode::str::{UnicodeStr, Utf16Encoder};
71 use ring_buf::RingBuf;
76 use slice::SliceConcatExt;
78 pub use core::str::{FromStr, Utf8Error, Str};
79 pub use core::str::{Lines, LinesAny, MatchIndices, SplitStr, CharRange};
80 pub use core::str::{Split, SplitTerminator};
81 pub use core::str::{SplitN, RSplitN};
82 pub use core::str::{from_utf8, CharEq, Chars, CharIndices, Bytes};
83 pub use core::str::{from_utf8_unchecked, from_c_str};
84 pub use unicode::str::{Words, Graphemes, GraphemeIndices};
87 Section: Creating a string
90 impl<S: Str> SliceConcatExt<str, String> for [S] {
91 fn concat(&self) -> String {
92 let s = self.as_slice();
98 // `len` calculation may overflow but push_str will check boundaries
99 let len = s.iter().map(|s| s.as_slice().len()).sum();
100 let mut result = String::with_capacity(len);
103 result.push_str(s.as_slice())
109 fn connect(&self, sep: &str) -> String {
110 let s = self.as_slice();
113 return String::new();
121 // this is wrong without the guarantee that `self` is non-empty
122 // `len` calculation may overflow but push_str but will check boundaries
123 let len = sep.len() * (s.len() - 1)
124 + s.iter().map(|s| s.as_slice().len()).sum();
125 let mut result = String::with_capacity(len);
126 let mut first = true;
132 result.push_str(sep);
134 result.push_str(s.as_slice());
144 // Helper functions used for Unicode normalization
145 fn canonical_sort(comb: &mut [(char, u8)]) {
146 let len = comb.len();
148 let mut swapped = false;
150 let class_a = comb[j-1].1;
151 let class_b = comb[j].1;
152 if class_a != 0 && class_b != 0 && class_a > class_b {
157 if !swapped { break; }
162 enum DecompositionType {
167 /// External iterator for a string's decomposition's characters.
168 /// Use with the `std::iter` module.
170 #[unstable(feature = "collections")]
171 pub struct Decompositions<'a> {
172 kind: DecompositionType,
174 buffer: Vec<(char, u8)>,
178 #[stable(feature = "rust1", since = "1.0.0")]
179 impl<'a> Iterator for Decompositions<'a> {
183 fn next(&mut self) -> Option<char> {
184 match self.buffer.first() {
187 self.buffer.remove(0);
190 Some(&(c, _)) if self.sorted => {
191 self.buffer.remove(0);
194 _ => self.sorted = false
198 for ch in self.iter.by_ref() {
199 let buffer = &mut self.buffer;
200 let sorted = &mut self.sorted;
202 let callback = |&mut: d| {
204 unicode::char::canonical_combining_class(d);
205 if class == 0 && !*sorted {
206 canonical_sort(buffer.as_mut_slice());
209 buffer.push((d, class));
213 unicode::char::decompose_canonical(ch, callback)
216 unicode::char::decompose_compatible(ch, callback)
227 canonical_sort(self.buffer.as_mut_slice());
231 if self.buffer.is_empty() {
234 match self.buffer.remove(0) {
244 fn size_hint(&self) -> (uint, Option<uint>) {
245 let (lower, _) = self.iter.size_hint();
251 enum RecompositionState {
257 /// External iterator for a string's recomposition's characters.
258 /// Use with the `std::iter` module.
260 #[unstable(feature = "collections")]
261 pub struct Recompositions<'a> {
262 iter: Decompositions<'a>,
263 state: RecompositionState,
264 buffer: RingBuf<char>,
265 composee: Option<char>,
269 #[stable(feature = "rust1", since = "1.0.0")]
270 impl<'a> Iterator for Recompositions<'a> {
274 fn next(&mut self) -> Option<char> {
278 for ch in self.iter.by_ref() {
279 let ch_class = unicode::char::canonical_combining_class(ch);
280 if self.composee.is_none() {
284 self.composee = Some(ch);
287 let k = self.composee.clone().unwrap();
289 match self.last_ccc {
291 match unicode::char::compose(k, ch) {
293 self.composee = Some(r);
298 self.composee = Some(ch);
301 self.buffer.push_back(ch);
302 self.last_ccc = Some(ch_class);
307 if l_class >= ch_class {
308 // `ch` is blocked from `composee`
310 self.composee = Some(ch);
311 self.last_ccc = None;
312 self.state = Purging;
315 self.buffer.push_back(ch);
316 self.last_ccc = Some(ch_class);
319 match unicode::char::compose(k, ch) {
321 self.composee = Some(r);
325 self.buffer.push_back(ch);
326 self.last_ccc = Some(ch_class);
332 self.state = Finished;
333 if self.composee.is_some() {
334 return self.composee.take();
338 match self.buffer.pop_front() {
339 None => self.state = Composing,
344 match self.buffer.pop_front() {
345 None => return self.composee.take(),
354 /// External iterator for a string's UTF16 codeunits.
355 /// Use with the `std::iter` module.
357 #[unstable(feature = "collections")]
358 pub struct Utf16Units<'a> {
359 encoder: Utf16Encoder<Chars<'a>>
362 #[stable(feature = "rust1", since = "1.0.0")]
363 impl<'a> Iterator for Utf16Units<'a> {
367 fn next(&mut self) -> Option<u16> { self.encoder.next() }
370 fn size_hint(&self) -> (uint, Option<uint>) { self.encoder.size_hint() }
377 // Return the initial codepoint accumulator for the first byte.
378 // The first byte is special, only want bottom 5 bits for width 2, 4 bits
379 // for width 3, and 3 bits for width 4
380 macro_rules! utf8_first_byte {
381 ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as u32)
384 // return the value of $ch updated with continuation byte $byte
385 macro_rules! utf8_acc_cont_byte {
386 ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as u32)
389 #[unstable(feature = "collections", reason = "trait is unstable")]
390 impl BorrowFrom<String> for str {
391 fn borrow_from(owned: &String) -> &str { &owned[] }
394 #[unstable(feature = "collections", reason = "trait is unstable")]
395 impl ToOwned<String> for str {
396 fn to_owned(&self) -> String {
398 String::from_utf8_unchecked(self.as_bytes().to_owned())
408 Section: Trait implementations
411 /// Any string that can be represented as a slice.
412 #[stable(feature = "rust1", since = "1.0.0")]
413 pub trait StrExt: Index<RangeFull, Output = str> {
414 /// Escapes each char in `s` with `char::escape_default`.
415 #[unstable(feature = "collections",
416 reason = "return type may change to be an iterator")]
417 fn escape_default(&self) -> String {
418 self.chars().flat_map(|c| c.escape_default()).collect()
421 /// Escapes each char in `s` with `char::escape_unicode`.
422 #[unstable(feature = "collections",
423 reason = "return type may change to be an iterator")]
424 fn escape_unicode(&self) -> String {
425 self.chars().flat_map(|c| c.escape_unicode()).collect()
428 /// Replaces all occurrences of one string with another.
432 /// * `from` - The string to replace
433 /// * `to` - The replacement string
437 /// The original string with all occurrences of `from` replaced with `to`.
442 /// let s = "this is old";
444 /// assert_eq!(s.replace("old", "new"), "this is new");
446 /// // not found, so no change.
447 /// assert_eq!(s.replace("cookie monster", "little lamb"), s);
449 #[stable(feature = "rust1", since = "1.0.0")]
450 fn replace(&self, from: &str, to: &str) -> String {
451 let mut result = String::new();
452 let mut last_end = 0;
453 for (start, end) in self.match_indices(from) {
454 result.push_str(unsafe { self.slice_unchecked(last_end, start) });
458 result.push_str(unsafe { self.slice_unchecked(last_end, self.len()) });
462 /// Returns an iterator over the string in Unicode Normalization Form D
463 /// (canonical decomposition).
465 #[unstable(feature = "collections",
466 reason = "this functionality may be moved to libunicode")]
467 fn nfd_chars<'a>(&'a self) -> Decompositions<'a> {
469 iter: self[].chars(),
476 /// Returns an iterator over the string in Unicode Normalization Form KD
477 /// (compatibility decomposition).
479 #[unstable(feature = "collections",
480 reason = "this functionality may be moved to libunicode")]
481 fn nfkd_chars<'a>(&'a self) -> Decompositions<'a> {
483 iter: self[].chars(),
490 /// An Iterator over the string in Unicode Normalization Form C
491 /// (canonical decomposition followed by canonical composition).
493 #[unstable(feature = "collections",
494 reason = "this functionality may be moved to libunicode")]
495 fn nfc_chars<'a>(&'a self) -> Recompositions<'a> {
497 iter: self.nfd_chars(),
499 buffer: RingBuf::new(),
505 /// An Iterator over the string in Unicode Normalization Form KC
506 /// (compatibility decomposition followed by canonical composition).
508 #[unstable(feature = "collections",
509 reason = "this functionality may be moved to libunicode")]
510 fn nfkc_chars<'a>(&'a self) -> Recompositions<'a> {
512 iter: self.nfkd_chars(),
514 buffer: RingBuf::new(),
520 /// Returns true if a string contains a string pattern.
524 /// - pat - The string pattern to look for
529 /// assert!("bananas".contains("nana"));
531 #[stable(feature = "rust1", since = "1.0.0")]
532 fn contains(&self, pat: &str) -> bool {
533 core_str::StrExt::contains(&self[], pat)
536 /// Returns true if a string contains a char pattern.
540 /// - pat - The char pattern to look for
545 /// assert!("hello".contains_char('e'));
547 #[unstable(feature = "collections",
548 reason = "might get removed in favour of a more generic contains()")]
549 fn contains_char<P: CharEq>(&self, pat: P) -> bool {
550 core_str::StrExt::contains_char(&self[], pat)
553 /// An iterator over the characters of `self`. Note, this iterates
554 /// over Unicode code-points, not Unicode graphemes.
559 /// let v: Vec<char> = "abc åäö".chars().collect();
560 /// assert_eq!(v, vec!['a', 'b', 'c', ' ', 'å', 'ä', 'ö']);
562 #[stable(feature = "rust1", since = "1.0.0")]
563 fn chars(&self) -> Chars {
564 core_str::StrExt::chars(&self[])
567 /// An iterator over the bytes of `self`
572 /// let v: Vec<u8> = "bors".bytes().collect();
573 /// assert_eq!(v, b"bors".to_vec());
575 #[stable(feature = "rust1", since = "1.0.0")]
576 fn bytes(&self) -> Bytes {
577 core_str::StrExt::bytes(&self[])
580 /// An iterator over the characters of `self` and their byte offsets.
581 #[stable(feature = "rust1", since = "1.0.0")]
582 fn char_indices(&self) -> CharIndices {
583 core_str::StrExt::char_indices(&self[])
586 /// An iterator over substrings of `self`, separated by characters
587 /// matched by the pattern `pat`.
592 /// let v: Vec<&str> = "Mary had a little lamb".split(' ').collect();
593 /// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]);
595 /// let v: Vec<&str> = "abc1def2ghi".split(|&: c: char| c.is_numeric()).collect();
596 /// assert_eq!(v, vec!["abc", "def", "ghi"]);
598 /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').collect();
599 /// assert_eq!(v, vec!["lion", "", "tiger", "leopard"]);
601 /// let v: Vec<&str> = "".split('X').collect();
602 /// assert_eq!(v, vec![""]);
604 #[stable(feature = "rust1", since = "1.0.0")]
605 fn split<P: CharEq>(&self, pat: P) -> Split<P> {
606 core_str::StrExt::split(&self[], pat)
609 /// An iterator over substrings of `self`, separated by characters
610 /// matched by the pattern `pat`, restricted to splitting at most `count`
616 /// let v: Vec<&str> = "Mary had a little lambda".splitn(2, ' ').collect();
617 /// assert_eq!(v, vec!["Mary", "had", "a little lambda"]);
619 /// let v: Vec<&str> = "abc1def2ghi".splitn(1, |&: c: char| c.is_numeric()).collect();
620 /// assert_eq!(v, vec!["abc", "def2ghi"]);
622 /// let v: Vec<&str> = "lionXXtigerXleopard".splitn(2, 'X').collect();
623 /// assert_eq!(v, vec!["lion", "", "tigerXleopard"]);
625 /// let v: Vec<&str> = "abcXdef".splitn(0, 'X').collect();
626 /// assert_eq!(v, vec!["abcXdef"]);
628 /// let v: Vec<&str> = "".splitn(1, 'X').collect();
629 /// assert_eq!(v, vec![""]);
631 #[stable(feature = "rust1", since = "1.0.0")]
632 fn splitn<P: CharEq>(&self, count: uint, pat: P) -> SplitN<P> {
633 core_str::StrExt::splitn(&self[], count, pat)
636 /// An iterator over substrings of `self`, separated by characters
637 /// matched by the pattern `pat`.
639 /// Equivalent to `split`, except that the trailing substring
640 /// is skipped if empty (terminator semantics).
645 /// let v: Vec<&str> = "A.B.".split_terminator('.').collect();
646 /// assert_eq!(v, vec!["A", "B"]);
648 /// let v: Vec<&str> = "A..B..".split_terminator('.').collect();
649 /// assert_eq!(v, vec!["A", "", "B", ""]);
651 /// let v: Vec<&str> = "Mary had a little lamb".split(' ').rev().collect();
652 /// assert_eq!(v, vec!["lamb", "little", "a", "had", "Mary"]);
654 /// let v: Vec<&str> = "abc1def2ghi".split(|&: c: char| c.is_numeric()).rev().collect();
655 /// assert_eq!(v, vec!["ghi", "def", "abc"]);
657 /// let v: Vec<&str> = "lionXXtigerXleopard".split('X').rev().collect();
658 /// assert_eq!(v, vec!["leopard", "tiger", "", "lion"]);
660 #[unstable(feature = "collections", reason = "might get removed")]
661 fn split_terminator<P: CharEq>(&self, pat: P) -> SplitTerminator<P> {
662 core_str::StrExt::split_terminator(&self[], pat)
665 /// An iterator over substrings of `self`, separated by characters
666 /// matched by the pattern `pat`, starting from the end of the string.
667 /// Restricted to splitting at most `count` times.
672 /// let v: Vec<&str> = "Mary had a little lamb".rsplitn(2, ' ').collect();
673 /// assert_eq!(v, vec!["lamb", "little", "Mary had a"]);
675 /// let v: Vec<&str> = "abc1def2ghi".rsplitn(1, |&: c: char| c.is_numeric()).collect();
676 /// assert_eq!(v, vec!["ghi", "abc1def"]);
678 /// let v: Vec<&str> = "lionXXtigerXleopard".rsplitn(2, 'X').collect();
679 /// assert_eq!(v, vec!["leopard", "tiger", "lionX"]);
681 #[stable(feature = "rust1", since = "1.0.0")]
682 fn rsplitn<P: CharEq>(&self, count: uint, pat: P) -> RSplitN<P> {
683 core_str::StrExt::rsplitn(&self[], count, pat)
686 /// An iterator over the start and end indices of the disjoint
687 /// matches of the pattern `pat` within `self`.
689 /// That is, each returned value `(start, end)` satisfies
690 /// `self.slice(start, end) == sep`. For matches of `sep` within
691 /// `self` that overlap, only the indices corresponding to the
692 /// first match are returned.
697 /// let v: Vec<(uint, uint)> = "abcXXXabcYYYabc".match_indices("abc").collect();
698 /// assert_eq!(v, vec![(0,3), (6,9), (12,15)]);
700 /// let v: Vec<(uint, uint)> = "1abcabc2".match_indices("abc").collect();
701 /// assert_eq!(v, vec![(1,4), (4,7)]);
703 /// let v: Vec<(uint, uint)> = "ababa".match_indices("aba").collect();
704 /// assert_eq!(v, vec![(0, 3)]); // only the first `aba`
706 #[unstable(feature = "collections",
707 reason = "might have its iterator type changed")]
708 fn match_indices<'a>(&'a self, pat: &'a str) -> MatchIndices<'a> {
709 core_str::StrExt::match_indices(&self[], pat)
712 /// An iterator over the substrings of `self` separated by the pattern `sep`.
717 /// let v: Vec<&str> = "abcXXXabcYYYabc".split_str("abc").collect();
718 /// assert_eq!(v, vec!["", "XXX", "YYY", ""]);
720 /// let v: Vec<&str> = "1abcabc2".split_str("abc").collect();
721 /// assert_eq!(v, vec!["1", "", "2"]);
723 #[unstable(feature = "collections",
724 reason = "might get removed in the future in favor of a more generic split()")]
725 fn split_str<'a>(&'a self, pat: &'a str) -> SplitStr<'a> {
726 core_str::StrExt::split_str(&self[], pat)
729 /// An iterator over the lines of a string (subsequences separated
730 /// by `\n`). This does not include the empty string after a
736 /// let four_lines = "foo\nbar\n\nbaz\n";
737 /// let v: Vec<&str> = four_lines.lines().collect();
738 /// assert_eq!(v, vec!["foo", "bar", "", "baz"]);
740 #[stable(feature = "rust1", since = "1.0.0")]
741 fn lines(&self) -> Lines {
742 core_str::StrExt::lines(&self[])
745 /// An iterator over the lines of a string, separated by either
746 /// `\n` or `\r\n`. As with `.lines()`, this does not include an
747 /// empty trailing line.
752 /// let four_lines = "foo\r\nbar\n\r\nbaz\n";
753 /// let v: Vec<&str> = four_lines.lines_any().collect();
754 /// assert_eq!(v, vec!["foo", "bar", "", "baz"]);
756 #[stable(feature = "rust1", since = "1.0.0")]
757 fn lines_any(&self) -> LinesAny {
758 core_str::StrExt::lines_any(&self[])
761 /// Deprecated: use `s[a .. b]` instead.
762 #[unstable(feature = "collections",
763 reason = "use slice notation [a..b] instead")]
764 #[deprecated(since = "1.0.0", reason = "use slice notation [a..b] instead")]
765 fn slice(&self, begin: uint, end: uint) -> &str;
767 /// Deprecated: use `s[a..]` instead.
768 #[unstable(feature = "collections",
769 reason = "use slice notation [a..b] instead")]
770 #[deprecated(since = "1.0.0", reason = "use slice notation [a..] instead")]
771 fn slice_from(&self, begin: uint) -> &str;
773 /// Deprecated: use `s[..a]` instead.
774 #[unstable(feature = "collections",
775 reason = "use slice notation [a..b] instead")]
776 #[deprecated(since = "1.0.0", reason = "use slice notation [..a] instead")]
777 fn slice_to(&self, end: uint) -> &str;
779 /// Returns a slice of the string from the character range
780 /// [`begin`..`end`).
782 /// That is, start at the `begin`-th code point of the string and
783 /// continue to the `end`-th code point. This does not detect or
784 /// handle edge cases such as leaving a combining character as the
785 /// first code point of the string.
787 /// Due to the design of UTF-8, this operation is `O(end)`.
788 /// See `slice`, `slice_to` and `slice_from` for `O(1)`
789 /// variants that use byte indices rather than code point
792 /// Panics if `begin` > `end` or the either `begin` or `end` are
793 /// beyond the last character of the string.
798 /// let s = "Löwe 老虎 Léopard";
799 /// assert_eq!(s.slice_chars(0, 4), "Löwe");
800 /// assert_eq!(s.slice_chars(5, 7), "老虎");
802 #[unstable(feature = "collections",
803 reason = "may have yet to prove its worth")]
804 fn slice_chars(&self, begin: uint, end: uint) -> &str {
805 core_str::StrExt::slice_chars(&self[], begin, end)
808 /// Takes a bytewise (not UTF-8) slice from a string.
810 /// Returns the substring from [`begin`..`end`).
812 /// Caller must check both UTF-8 character boundaries and the boundaries of
813 /// the entire slice as well.
814 #[stable(feature = "rust1", since = "1.0.0")]
815 unsafe fn slice_unchecked(&self, begin: uint, end: uint) -> &str {
816 core_str::StrExt::slice_unchecked(&self[], begin, end)
819 /// Returns true if the pattern `pat` is a prefix of the string.
824 /// assert!("banana".starts_with("ba"));
826 #[stable(feature = "rust1", since = "1.0.0")]
827 fn starts_with(&self, pat: &str) -> bool {
828 core_str::StrExt::starts_with(&self[], pat)
831 /// Returns true if the pattern `pat` is a suffix of the string.
836 /// assert!("banana".ends_with("nana"));
838 #[stable(feature = "rust1", since = "1.0.0")]
839 fn ends_with(&self, pat: &str) -> bool {
840 core_str::StrExt::ends_with(&self[], pat)
843 /// Returns a string with all pre- and suffixes that match
844 /// the pattern `pat` repeatedly removed.
848 /// * pat - a string pattern
853 /// assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar");
854 /// let x: &[_] = &['1', '2'];
855 /// assert_eq!("12foo1bar12".trim_matches(x), "foo1bar");
856 /// assert_eq!("123foo1bar123".trim_matches(|&: c: char| c.is_numeric()), "foo1bar");
858 #[stable(feature = "rust1", since = "1.0.0")]
859 fn trim_matches<P: CharEq>(&self, pat: P) -> &str {
860 core_str::StrExt::trim_matches(&self[], pat)
863 /// Returns a string with all prefixes that match
864 /// the pattern `pat` repeatedly removed.
868 /// * pat - a string pattern
873 /// assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11");
874 /// let x: &[_] = &['1', '2'];
875 /// assert_eq!("12foo1bar12".trim_left_matches(x), "foo1bar12");
876 /// assert_eq!("123foo1bar123".trim_left_matches(|&: c: char| c.is_numeric()), "foo1bar123");
878 #[stable(feature = "rust1", since = "1.0.0")]
879 fn trim_left_matches<P: CharEq>(&self, pat: P) -> &str {
880 core_str::StrExt::trim_left_matches(&self[], pat)
883 /// Returns a string with all suffixes that match
884 /// the pattern `pat` repeatedly removed.
888 /// * pat - a string pattern
893 /// assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar");
894 /// let x: &[_] = &['1', '2'];
895 /// assert_eq!("12foo1bar12".trim_right_matches(x), "12foo1bar");
896 /// assert_eq!("123foo1bar123".trim_right_matches(|&: c: char| c.is_numeric()), "123foo1bar");
898 #[stable(feature = "rust1", since = "1.0.0")]
899 fn trim_right_matches<P: CharEq>(&self, pat: P) -> &str {
900 core_str::StrExt::trim_right_matches(&self[], pat)
903 /// Check that `index`-th byte lies at the start and/or end of a
904 /// UTF-8 code point sequence.
906 /// The start and end of the string (when `index == self.len()`)
907 /// are considered to be boundaries.
909 /// Panics if `index` is greater than `self.len()`.
914 /// let s = "Löwe 老虎 Léopard";
915 /// assert!(s.is_char_boundary(0));
917 /// assert!(s.is_char_boundary(6));
918 /// assert!(s.is_char_boundary(s.len()));
920 /// // second byte of `ö`
921 /// assert!(!s.is_char_boundary(2));
923 /// // third byte of `老`
924 /// assert!(!s.is_char_boundary(8));
926 #[unstable(feature = "collections",
927 reason = "naming is uncertain with container conventions")]
928 fn is_char_boundary(&self, index: uint) -> bool {
929 core_str::StrExt::is_char_boundary(&self[], index)
932 /// Pluck a character out of a string and return the index of the next
935 /// This function can be used to iterate over the Unicode characters of a
940 /// This example manually iterates through the characters of a
941 /// string; this should normally be done by `.chars()` or
945 /// use std::str::CharRange;
947 /// let s = "中华Việt Nam";
949 /// while i < s.len() {
950 /// let CharRange {ch, next} = s.char_range_at(i);
951 /// println!("{}: {}", i, ch);
974 /// * i - The byte offset of the char to extract
978 /// A record {ch: char, next: uint} containing the char value and the byte
979 /// index of the next Unicode character.
983 /// If `i` is greater than or equal to the length of the string.
984 /// If `i` is not the index of the beginning of a valid UTF-8 character.
985 #[unstable(feature = "collections",
986 reason = "naming is uncertain with container conventions")]
987 fn char_range_at(&self, start: uint) -> CharRange {
988 core_str::StrExt::char_range_at(&self[], start)
991 /// Given a byte position and a str, return the previous char and its position.
993 /// This function can be used to iterate over a Unicode string in reverse.
995 /// Returns 0 for next index if called on start index 0.
999 /// If `i` is greater than the length of the string.
1000 /// If `i` is not an index following a valid UTF-8 character.
1001 #[unstable(feature = "collections",
1002 reason = "naming is uncertain with container conventions")]
1003 fn char_range_at_reverse(&self, start: uint) -> CharRange {
1004 core_str::StrExt::char_range_at_reverse(&self[], start)
1007 /// Plucks the character starting at the `i`th byte of a string.
1013 /// assert_eq!(s.char_at(1), 'b');
1014 /// assert_eq!(s.char_at(2), 'π');
1015 /// assert_eq!(s.char_at(4), 'c');
1020 /// If `i` is greater than or equal to the length of the string.
1021 /// If `i` is not the index of the beginning of a valid UTF-8 character.
1022 #[unstable(feature = "collections",
1023 reason = "naming is uncertain with container conventions")]
1024 fn char_at(&self, i: uint) -> char {
1025 core_str::StrExt::char_at(&self[], i)
1028 /// Plucks the character ending at the `i`th byte of a string.
1032 /// If `i` is greater than the length of the string.
1033 /// If `i` is not an index following a valid UTF-8 character.
1034 #[unstable(feature = "collections",
1035 reason = "naming is uncertain with container conventions")]
1036 fn char_at_reverse(&self, i: uint) -> char {
1037 core_str::StrExt::char_at_reverse(&self[], i)
1040 /// Work with the byte buffer of a string as a byte slice.
1045 /// assert_eq!("bors".as_bytes(), b"bors");
1047 #[stable(feature = "rust1", since = "1.0.0")]
1048 fn as_bytes(&self) -> &[u8] {
1049 core_str::StrExt::as_bytes(&self[])
1052 /// Returns the byte index of the first character of `self` that
1053 /// matches the pattern `pat`.
1057 /// `Some` containing the byte index of the last matching character
1058 /// or `None` if there is no match
1063 /// let s = "Löwe 老虎 Léopard";
1065 /// assert_eq!(s.find('L'), Some(0));
1066 /// assert_eq!(s.find('é'), Some(14));
1068 /// // the first space
1069 /// assert_eq!(s.find(|&: c: char| c.is_whitespace()), Some(5));
1071 /// // neither are found
1072 /// let x: &[_] = &['1', '2'];
1073 /// assert_eq!(s.find(x), None);
1075 #[stable(feature = "rust1", since = "1.0.0")]
1076 fn find<P: CharEq>(&self, pat: P) -> Option<uint> {
1077 core_str::StrExt::find(&self[], pat)
1080 /// Returns the byte index of the last character of `self` that
1081 /// matches the pattern `pat`.
1085 /// `Some` containing the byte index of the last matching character
1086 /// or `None` if there is no match.
1091 /// let s = "Löwe 老虎 Léopard";
1093 /// assert_eq!(s.rfind('L'), Some(13));
1094 /// assert_eq!(s.rfind('é'), Some(14));
1096 /// // the second space
1097 /// assert_eq!(s.rfind(|&: c: char| c.is_whitespace()), Some(12));
1099 /// // searches for an occurrence of either `1` or `2`, but neither are found
1100 /// let x: &[_] = &['1', '2'];
1101 /// assert_eq!(s.rfind(x), None);
1103 #[stable(feature = "rust1", since = "1.0.0")]
1104 fn rfind<P: CharEq>(&self, pat: P) -> Option<uint> {
1105 core_str::StrExt::rfind(&self[], pat)
1108 /// Returns the byte index of the first matching substring
1112 /// * `needle` - The string to search for
1116 /// `Some` containing the byte index of the first matching substring
1117 /// or `None` if there is no match.
1122 /// let s = "Löwe 老虎 Léopard";
1124 /// assert_eq!(s.find_str("老虎 L"), Some(6));
1125 /// assert_eq!(s.find_str("muffin man"), None);
1127 #[unstable(feature = "collections",
1128 reason = "might get removed in favor of a more generic find in the future")]
1129 fn find_str(&self, needle: &str) -> Option<uint> {
1130 core_str::StrExt::find_str(&self[], needle)
1133 /// Retrieves the first character from a string slice and returns
1134 /// it. This does not allocate a new string; instead, it returns a
1135 /// slice that point one character beyond the character that was
1136 /// shifted. If the string does not contain any characters,
1137 /// None is returned instead.
1142 /// let s = "Löwe 老虎 Léopard";
1143 /// let (c, s1) = s.slice_shift_char().unwrap();
1144 /// assert_eq!(c, 'L');
1145 /// assert_eq!(s1, "öwe 老虎 Léopard");
1147 /// let (c, s2) = s1.slice_shift_char().unwrap();
1148 /// assert_eq!(c, 'ö');
1149 /// assert_eq!(s2, "we 老虎 Léopard");
1151 #[unstable(feature = "collections",
1152 reason = "awaiting conventions about shifting and slices")]
1153 fn slice_shift_char(&self) -> Option<(char, &str)> {
1154 core_str::StrExt::slice_shift_char(&self[])
1157 /// Returns the byte offset of an inner slice relative to an enclosing outer slice.
1159 /// Panics if `inner` is not a direct slice contained within self.
1164 /// let string = "a\nb\nc";
1165 /// let lines: Vec<&str> = string.lines().collect();
1167 /// assert!(string.subslice_offset(lines[0]) == 0); // &"a"
1168 /// assert!(string.subslice_offset(lines[1]) == 2); // &"b"
1169 /// assert!(string.subslice_offset(lines[2]) == 4); // &"c"
1171 #[unstable(feature = "collections",
1172 reason = "awaiting convention about comparability of arbitrary slices")]
1173 fn subslice_offset(&self, inner: &str) -> uint {
1174 core_str::StrExt::subslice_offset(&self[], inner)
1177 /// Return an unsafe pointer to the strings buffer.
1179 /// The caller must ensure that the string outlives this pointer,
1180 /// and that it is not reallocated (e.g. by pushing to the
1182 #[stable(feature = "rust1", since = "1.0.0")]
1184 fn as_ptr(&self) -> *const u8 {
1185 core_str::StrExt::as_ptr(&self[])
1188 /// Return an iterator of `u16` over the string encoded as UTF-16.
1189 #[unstable(feature = "collections",
1190 reason = "this functionality may only be provided by libunicode")]
1191 fn utf16_units(&self) -> Utf16Units {
1192 Utf16Units { encoder: Utf16Encoder::new(self[].chars()) }
1195 /// Return the number of bytes in this string
1200 /// assert_eq!("foo".len(), 3);
1201 /// assert_eq!("ƒoo".len(), 4);
1203 #[stable(feature = "rust1", since = "1.0.0")]
1205 fn len(&self) -> uint {
1206 core_str::StrExt::len(&self[])
1209 /// Returns true if this slice contains no bytes
1214 /// assert!("".is_empty());
1217 #[stable(feature = "rust1", since = "1.0.0")]
1218 fn is_empty(&self) -> bool {
1219 core_str::StrExt::is_empty(&self[])
1222 /// Parse this string into the specified type.
1227 /// assert_eq!("4".parse::<u32>(), Ok(4));
1228 /// assert!("j".parse::<u32>().is_err());
1231 #[stable(feature = "rust1", since = "1.0.0")]
1232 fn parse<F: FromStr>(&self) -> Result<F, F::Err> {
1233 core_str::StrExt::parse(&self[])
1236 /// Returns an iterator over the
1237 /// [grapheme clusters](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
1240 /// If `is_extended` is true, the iterator is over the *extended grapheme clusters*;
1241 /// otherwise, the iterator is over the *legacy grapheme clusters*.
1242 /// [UAX#29](http://www.unicode.org/reports/tr29/#Grapheme_Cluster_Boundaries)
1243 /// recommends extended grapheme cluster boundaries for general processing.
1248 /// let gr1 = "a\u{310}e\u{301}o\u{308}\u{332}".graphemes(true).collect::<Vec<&str>>();
1249 /// let b: &[_] = &["a\u{310}", "e\u{301}", "o\u{308}\u{332}"];
1250 /// assert_eq!(gr1.as_slice(), b);
1251 /// let gr2 = "a\r\nb🇷🇺🇸🇹".graphemes(true).collect::<Vec<&str>>();
1252 /// let b: &[_] = &["a", "\r\n", "b", "🇷🇺🇸🇹"];
1253 /// assert_eq!(gr2.as_slice(), b);
1255 #[unstable(feature = "collections",
1256 reason = "this functionality may only be provided by libunicode")]
1257 fn graphemes(&self, is_extended: bool) -> Graphemes {
1258 UnicodeStr::graphemes(&self[], is_extended)
1261 /// Returns an iterator over the grapheme clusters of self and their byte offsets.
1262 /// See `graphemes()` method for more information.
1267 /// let gr_inds = "a̐éö̲\r\n".grapheme_indices(true).collect::<Vec<(uint, &str)>>();
1268 /// let b: &[_] = &[(0u, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
1269 /// assert_eq!(gr_inds.as_slice(), b);
1271 #[unstable(feature = "collections",
1272 reason = "this functionality may only be provided by libunicode")]
1273 fn grapheme_indices(&self, is_extended: bool) -> GraphemeIndices {
1274 UnicodeStr::grapheme_indices(&self[], is_extended)
1277 /// An iterator over the words of a string (subsequences separated
1278 /// by any sequence of whitespace). Sequences of whitespace are
1279 /// collapsed, so empty "words" are not included.
1284 /// let some_words = " Mary had\ta little \n\t lamb";
1285 /// let v: Vec<&str> = some_words.words().collect();
1286 /// assert_eq!(v, vec!["Mary", "had", "a", "little", "lamb"]);
1288 #[stable(feature = "rust1", since = "1.0.0")]
1289 fn words(&self) -> Words {
1290 UnicodeStr::words(&self[])
1293 /// Returns a string's displayed width in columns, treating control
1294 /// characters as zero-width.
1296 /// `is_cjk` determines behavior for characters in the Ambiguous category:
1297 /// if `is_cjk` is `true`, these are 2 columns wide; otherwise, they are 1.
1298 /// In CJK locales, `is_cjk` should be `true`, else it should be `false`.
1299 /// [Unicode Standard Annex #11](http://www.unicode.org/reports/tr11/)
1300 /// recommends that these characters be treated as 1 column (i.e.,
1301 /// `is_cjk` = `false`) if the locale is unknown.
1302 #[unstable(feature = "collections",
1303 reason = "this functionality may only be provided by libunicode")]
1304 fn width(&self, is_cjk: bool) -> uint {
1305 UnicodeStr::width(&self[], is_cjk)
1308 /// Returns a string with leading and trailing whitespace removed.
1309 #[stable(feature = "rust1", since = "1.0.0")]
1310 fn trim(&self) -> &str {
1311 UnicodeStr::trim(&self[])
1314 /// Returns a string with leading whitespace removed.
1315 #[stable(feature = "rust1", since = "1.0.0")]
1316 fn trim_left(&self) -> &str {
1317 UnicodeStr::trim_left(&self[])
1320 /// Returns a string with trailing whitespace removed.
1321 #[stable(feature = "rust1", since = "1.0.0")]
1322 fn trim_right(&self) -> &str {
1323 UnicodeStr::trim_right(&self[])
1327 #[stable(feature = "rust1", since = "1.0.0")]
1328 impl StrExt for str {
1329 fn slice(&self, begin: uint, end: uint) -> &str {
1333 fn slice_from(&self, begin: uint) -> &str {
1337 fn slice_to(&self, end: uint) -> &str {
1346 use core::iter::AdditiveIterator;
1347 use super::from_utf8;
1348 use super::Utf8Error;
1353 assert!("" <= "foo");
1354 assert!("foo" <= "foo");
1355 assert!("foo" != "bar");
1360 assert_eq!("".len(), 0u);
1361 assert_eq!("hello world".len(), 11u);
1362 assert_eq!("\x63".len(), 1u);
1363 assert_eq!("\u{a2}".len(), 2u);
1364 assert_eq!("\u{3c0}".len(), 2u);
1365 assert_eq!("\u{2620}".len(), 3u);
1366 assert_eq!("\u{1d11e}".len(), 4u);
1368 assert_eq!("".chars().count(), 0u);
1369 assert_eq!("hello world".chars().count(), 11u);
1370 assert_eq!("\x63".chars().count(), 1u);
1371 assert_eq!("\u{a2}".chars().count(), 1u);
1372 assert_eq!("\u{3c0}".chars().count(), 1u);
1373 assert_eq!("\u{2620}".chars().count(), 1u);
1374 assert_eq!("\u{1d11e}".chars().count(), 1u);
1375 assert_eq!("ประเทศไทย中华Việt Nam".chars().count(), 19u);
1377 assert_eq!("hello".width(false), 10u);
1378 assert_eq!("hello".width(true), 10u);
1379 assert_eq!("\0\0\0\0\0".width(false), 0u);
1380 assert_eq!("\0\0\0\0\0".width(true), 0u);
1381 assert_eq!("".width(false), 0u);
1382 assert_eq!("".width(true), 0u);
1383 assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width(false), 4u);
1384 assert_eq!("\u{2081}\u{2082}\u{2083}\u{2084}".width(true), 8u);
1389 assert_eq!("hello".find('l'), Some(2u));
1390 assert_eq!("hello".find(|&: c:char| c == 'o'), Some(4u));
1391 assert!("hello".find('x').is_none());
1392 assert!("hello".find(|&: c:char| c == 'x').is_none());
1393 assert_eq!("ประเทศไทย中华Việt Nam".find('华'), Some(30u));
1394 assert_eq!("ประเทศไทย中华Việt Nam".find(|&: c: char| c == '华'), Some(30u));
1399 assert_eq!("hello".rfind('l'), Some(3u));
1400 assert_eq!("hello".rfind(|&: c:char| c == 'o'), Some(4u));
1401 assert!("hello".rfind('x').is_none());
1402 assert!("hello".rfind(|&: c:char| c == 'x').is_none());
1403 assert_eq!("ประเทศไทย中华Việt Nam".rfind('华'), Some(30u));
1404 assert_eq!("ประเทศไทย中华Việt Nam".rfind(|&: c: char| c == '华'), Some(30u));
1409 let empty = String::from_str("");
1410 let s: String = empty.chars().collect();
1411 assert_eq!(empty, s);
1412 let data = String::from_str("ประเทศไทย中");
1413 let s: String = data.chars().collect();
1414 assert_eq!(data, s);
1418 fn test_into_bytes() {
1419 let data = String::from_str("asdf");
1420 let buf = data.into_bytes();
1421 assert_eq!(b"asdf", buf);
1425 fn test_find_str() {
1427 assert_eq!("".find_str(""), Some(0u));
1428 assert!("banana".find_str("apple pie").is_none());
1430 let data = "abcabc";
1431 assert_eq!(data[0u..6u].find_str("ab"), Some(0u));
1432 assert_eq!(data[2u..6u].find_str("ab"), Some(3u - 2u));
1433 assert!(data[2u..4u].find_str("ab").is_none());
1435 let string = "ประเทศไทย中华Việt Nam";
1436 let mut data = String::from_str(string);
1437 data.push_str(string);
1438 assert!(data.find_str("ไท华").is_none());
1439 assert_eq!(data[0u..43u].find_str(""), Some(0u));
1440 assert_eq!(data[6u..43u].find_str(""), Some(6u - 6u));
1442 assert_eq!(data[0u..43u].find_str("ประ"), Some( 0u));
1443 assert_eq!(data[0u..43u].find_str("ทศไ"), Some(12u));
1444 assert_eq!(data[0u..43u].find_str("ย中"), Some(24u));
1445 assert_eq!(data[0u..43u].find_str("iệt"), Some(34u));
1446 assert_eq!(data[0u..43u].find_str("Nam"), Some(40u));
1448 assert_eq!(data[43u..86u].find_str("ประ"), Some(43u - 43u));
1449 assert_eq!(data[43u..86u].find_str("ทศไ"), Some(55u - 43u));
1450 assert_eq!(data[43u..86u].find_str("ย中"), Some(67u - 43u));
1451 assert_eq!(data[43u..86u].find_str("iệt"), Some(77u - 43u));
1452 assert_eq!(data[43u..86u].find_str("Nam"), Some(83u - 43u));
1456 fn test_slice_chars() {
1457 fn t(a: &str, b: &str, start: uint) {
1458 assert_eq!(a.slice_chars(start, start + b.chars().count()), b);
1461 t("hello", "llo", 2);
1462 t("hello", "el", 1);
1465 assert_eq!("ะเทศไท", "ประเทศไทย中华Việt Nam".slice_chars(2, 8));
1468 fn s(x: &str) -> String { x.to_string() }
1470 macro_rules! test_concat {
1471 ($expected: expr, $string: expr) => {
1473 let s: String = $string.concat();
1474 assert_eq!($expected, s);
1480 fn test_concat_for_different_types() {
1481 test_concat!("ab", vec![s("a"), s("b")]);
1482 test_concat!("ab", vec!["a", "b"]);
1483 test_concat!("ab", vec!["a", "b"].as_slice());
1484 test_concat!("ab", vec![s("a"), s("b")]);
1488 fn test_concat_for_different_lengths() {
1489 let empty: &[&str] = &[];
1490 test_concat!("", empty);
1491 test_concat!("a", ["a"]);
1492 test_concat!("ab", ["a", "b"]);
1493 test_concat!("abc", ["", "a", "bc"]);
1496 macro_rules! test_connect {
1497 ($expected: expr, $string: expr, $delim: expr) => {
1499 let s = $string.connect($delim);
1500 assert_eq!($expected, s);
1506 fn test_connect_for_different_types() {
1507 test_connect!("a-b", ["a", "b"], "-");
1508 let hyphen = "-".to_string();
1509 test_connect!("a-b", [s("a"), s("b")], hyphen.as_slice());
1510 test_connect!("a-b", vec!["a", "b"], hyphen.as_slice());
1511 test_connect!("a-b", vec!["a", "b"].as_slice(), "-");
1512 test_connect!("a-b", vec![s("a"), s("b")], "-");
1516 fn test_connect_for_different_lengths() {
1517 let empty: &[&str] = &[];
1518 test_connect!("", empty, "-");
1519 test_connect!("a", ["a"], "-");
1520 test_connect!("a-b", ["a", "b"], "-");
1521 test_connect!("-a-bc", ["", "a", "bc"], "-");
1525 fn test_unsafe_slice() {
1526 assert_eq!("ab", unsafe {"abc".slice_unchecked(0, 2)});
1527 assert_eq!("bc", unsafe {"abc".slice_unchecked(1, 3)});
1528 assert_eq!("", unsafe {"abc".slice_unchecked(1, 1)});
1529 fn a_million_letter_a() -> String {
1531 let mut rs = String::new();
1533 rs.push_str("aaaaaaaaaa");
1538 fn half_a_million_letter_a() -> String {
1540 let mut rs = String::new();
1542 rs.push_str("aaaaa");
1547 let letters = a_million_letter_a();
1548 assert!(half_a_million_letter_a() ==
1549 unsafe {String::from_str(letters.slice_unchecked(
1555 fn test_starts_with() {
1556 assert!(("".starts_with("")));
1557 assert!(("abc".starts_with("")));
1558 assert!(("abc".starts_with("a")));
1559 assert!((!"a".starts_with("abc")));
1560 assert!((!"".starts_with("abc")));
1561 assert!((!"ödd".starts_with("-")));
1562 assert!(("ödd".starts_with("öd")));
1566 fn test_ends_with() {
1567 assert!(("".ends_with("")));
1568 assert!(("abc".ends_with("")));
1569 assert!(("abc".ends_with("c")));
1570 assert!((!"a".ends_with("abc")));
1571 assert!((!"".ends_with("abc")));
1572 assert!((!"ddö".ends_with("-")));
1573 assert!(("ddö".ends_with("dö")));
1577 fn test_is_empty() {
1578 assert!("".is_empty());
1579 assert!(!"a".is_empty());
1585 assert_eq!("".replace(a, "b"), String::from_str(""));
1586 assert_eq!("a".replace(a, "b"), String::from_str("b"));
1587 assert_eq!("ab".replace(a, "b"), String::from_str("bb"));
1589 assert!(" test test ".replace(test, "toast") ==
1590 String::from_str(" toast toast "));
1591 assert_eq!(" test test ".replace(test, ""), String::from_str(" "));
1595 fn test_replace_2a() {
1596 let data = "ประเทศไทย中华";
1597 let repl = "دولة الكويت";
1600 let a2 = "دولة الكويتทศไทย中华";
1601 assert_eq!(data.replace(a, repl), a2);
1605 fn test_replace_2b() {
1606 let data = "ประเทศไทย中华";
1607 let repl = "دولة الكويت";
1610 let b2 = "ปรدولة الكويتทศไทย中华";
1611 assert_eq!(data.replace(b, repl), b2);
1615 fn test_replace_2c() {
1616 let data = "ประเทศไทย中华";
1617 let repl = "دولة الكويت";
1620 let c2 = "ประเทศไทยدولة الكويت";
1621 assert_eq!(data.replace(c, repl), c2);
1625 fn test_replace_2d() {
1626 let data = "ประเทศไทย中华";
1627 let repl = "دولة الكويت";
1630 assert_eq!(data.replace(d, repl), data);
1635 assert_eq!("ab", "abc".slice(0, 2));
1636 assert_eq!("bc", "abc".slice(1, 3));
1637 assert_eq!("", "abc".slice(1, 1));
1638 assert_eq!("\u{65e5}", "\u{65e5}\u{672c}".slice(0, 3));
1640 let data = "ประเทศไทย中华";
1641 assert_eq!("ป", data.slice(0, 3));
1642 assert_eq!("ร", data.slice(3, 6));
1643 assert_eq!("", data.slice(3, 3));
1644 assert_eq!("华", data.slice(30, 33));
1646 fn a_million_letter_x() -> String {
1648 let mut rs = String::new();
1650 rs.push_str("华华华华华华华华华华");
1655 fn half_a_million_letter_x() -> String {
1657 let mut rs = String::new();
1659 rs.push_str("华华华华华");
1664 let letters = a_million_letter_x();
1665 assert!(half_a_million_letter_x() ==
1666 String::from_str(letters.slice(0u, 3u * 500000u)));
1671 let ss = "中华Việt Nam";
1673 assert_eq!("华", ss.slice(3u, 6u));
1674 assert_eq!("Việt Nam", ss.slice(6u, 16u));
1676 assert_eq!("ab", "abc".slice(0u, 2u));
1677 assert_eq!("bc", "abc".slice(1u, 3u));
1678 assert_eq!("", "abc".slice(1u, 1u));
1680 assert_eq!("中", ss.slice(0u, 3u));
1681 assert_eq!("华V", ss.slice(3u, 7u));
1682 assert_eq!("", ss.slice(3u, 3u));
1697 fn test_slice_fail() {
1698 "中华Việt Nam".slice(0u, 2u);
1702 fn test_slice_from() {
1703 assert_eq!("abcd".slice_from(0), "abcd");
1704 assert_eq!("abcd".slice_from(2), "cd");
1705 assert_eq!("abcd".slice_from(4), "");
1708 fn test_slice_to() {
1709 assert_eq!("abcd".slice_to(0), "");
1710 assert_eq!("abcd".slice_to(2), "ab");
1711 assert_eq!("abcd".slice_to(4), "abcd");
1715 fn test_trim_left_matches() {
1716 let v: &[char] = &[];
1717 assert_eq!(" *** foo *** ".trim_left_matches(v), " *** foo *** ");
1718 let chars: &[char] = &['*', ' '];
1719 assert_eq!(" *** foo *** ".trim_left_matches(chars), "foo *** ");
1720 assert_eq!(" *** *** ".trim_left_matches(chars), "");
1721 assert_eq!("foo *** ".trim_left_matches(chars), "foo *** ");
1723 assert_eq!("11foo1bar11".trim_left_matches('1'), "foo1bar11");
1724 let chars: &[char] = &['1', '2'];
1725 assert_eq!("12foo1bar12".trim_left_matches(chars), "foo1bar12");
1726 assert_eq!("123foo1bar123".trim_left_matches(|&: c: char| c.is_numeric()), "foo1bar123");
1730 fn test_trim_right_matches() {
1731 let v: &[char] = &[];
1732 assert_eq!(" *** foo *** ".trim_right_matches(v), " *** foo *** ");
1733 let chars: &[char] = &['*', ' '];
1734 assert_eq!(" *** foo *** ".trim_right_matches(chars), " *** foo");
1735 assert_eq!(" *** *** ".trim_right_matches(chars), "");
1736 assert_eq!(" *** foo".trim_right_matches(chars), " *** foo");
1738 assert_eq!("11foo1bar11".trim_right_matches('1'), "11foo1bar");
1739 let chars: &[char] = &['1', '2'];
1740 assert_eq!("12foo1bar12".trim_right_matches(chars), "12foo1bar");
1741 assert_eq!("123foo1bar123".trim_right_matches(|&: c: char| c.is_numeric()), "123foo1bar");
1745 fn test_trim_matches() {
1746 let v: &[char] = &[];
1747 assert_eq!(" *** foo *** ".trim_matches(v), " *** foo *** ");
1748 let chars: &[char] = &['*', ' '];
1749 assert_eq!(" *** foo *** ".trim_matches(chars), "foo");
1750 assert_eq!(" *** *** ".trim_matches(chars), "");
1751 assert_eq!("foo".trim_matches(chars), "foo");
1753 assert_eq!("11foo1bar11".trim_matches('1'), "foo1bar");
1754 let chars: &[char] = &['1', '2'];
1755 assert_eq!("12foo1bar12".trim_matches(chars), "foo1bar");
1756 assert_eq!("123foo1bar123".trim_matches(|&: c: char| c.is_numeric()), "foo1bar");
1760 fn test_trim_left() {
1761 assert_eq!("".trim_left(), "");
1762 assert_eq!("a".trim_left(), "a");
1763 assert_eq!(" ".trim_left(), "");
1764 assert_eq!(" blah".trim_left(), "blah");
1765 assert_eq!(" \u{3000} wut".trim_left(), "wut");
1766 assert_eq!("hey ".trim_left(), "hey ");
1770 fn test_trim_right() {
1771 assert_eq!("".trim_right(), "");
1772 assert_eq!("a".trim_right(), "a");
1773 assert_eq!(" ".trim_right(), "");
1774 assert_eq!("blah ".trim_right(), "blah");
1775 assert_eq!("wut \u{3000} ".trim_right(), "wut");
1776 assert_eq!(" hey".trim_right(), " hey");
1781 assert_eq!("".trim(), "");
1782 assert_eq!("a".trim(), "a");
1783 assert_eq!(" ".trim(), "");
1784 assert_eq!(" blah ".trim(), "blah");
1785 assert_eq!("\nwut \u{3000} ".trim(), "wut");
1786 assert_eq!(" hey dude ".trim(), "hey dude");
1790 fn test_is_whitespace() {
1791 assert!("".chars().all(|c| c.is_whitespace()));
1792 assert!(" ".chars().all(|c| c.is_whitespace()));
1793 assert!("\u{2009}".chars().all(|c| c.is_whitespace())); // Thin space
1794 assert!(" \n\t ".chars().all(|c| c.is_whitespace()));
1795 assert!(!" _ ".chars().all(|c| c.is_whitespace()));
1799 fn test_slice_shift_char() {
1800 let data = "ประเทศไทย中";
1801 assert_eq!(data.slice_shift_char(), Some(('ป', "ระเทศไทย中")));
1805 fn test_slice_shift_char_2() {
1807 assert_eq!(empty.slice_shift_char(), None);
1812 // deny overlong encodings
1813 assert!(from_utf8(&[0xc0, 0x80]).is_err());
1814 assert!(from_utf8(&[0xc0, 0xae]).is_err());
1815 assert!(from_utf8(&[0xe0, 0x80, 0x80]).is_err());
1816 assert!(from_utf8(&[0xe0, 0x80, 0xaf]).is_err());
1817 assert!(from_utf8(&[0xe0, 0x81, 0x81]).is_err());
1818 assert!(from_utf8(&[0xf0, 0x82, 0x82, 0xac]).is_err());
1819 assert!(from_utf8(&[0xf4, 0x90, 0x80, 0x80]).is_err());
1822 assert!(from_utf8(&[0xED, 0xA0, 0x80]).is_err());
1823 assert!(from_utf8(&[0xED, 0xBF, 0xBF]).is_err());
1825 assert!(from_utf8(&[0xC2, 0x80]).is_ok());
1826 assert!(from_utf8(&[0xDF, 0xBF]).is_ok());
1827 assert!(from_utf8(&[0xE0, 0xA0, 0x80]).is_ok());
1828 assert!(from_utf8(&[0xED, 0x9F, 0xBF]).is_ok());
1829 assert!(from_utf8(&[0xEE, 0x80, 0x80]).is_ok());
1830 assert!(from_utf8(&[0xEF, 0xBF, 0xBF]).is_ok());
1831 assert!(from_utf8(&[0xF0, 0x90, 0x80, 0x80]).is_ok());
1832 assert!(from_utf8(&[0xF4, 0x8F, 0xBF, 0xBF]).is_ok());
1836 fn test_is_utf16() {
1837 use unicode::str::is_utf16;
1839 ($($e:expr),*) => { { $(assert!(is_utf16($e));)* } }
1848 // surrogate pairs (randomly generated with Python 3's
1849 // .encode('utf-16be'))
1850 pos!(&[0xdb54, 0xdf16, 0xd880, 0xdee0, 0xdb6a, 0xdd45],
1851 &[0xd91f, 0xdeb1, 0xdb31, 0xdd84, 0xd8e2, 0xde14],
1852 &[0xdb9f, 0xdc26, 0xdb6f, 0xde58, 0xd850, 0xdfae]);
1854 // mixtures (also random)
1855 pos!(&[0xd921, 0xdcc2, 0x002d, 0x004d, 0xdb32, 0xdf65],
1856 &[0xdb45, 0xdd2d, 0x006a, 0xdacd, 0xddfe, 0x0006],
1857 &[0x0067, 0xd8ff, 0xddb7, 0x000f, 0xd900, 0xdc80]);
1861 ($($e:expr),*) => { { $(assert!(!is_utf16($e));)* } }
1865 // surrogate + regular unit
1867 // surrogate + lead surrogate
1869 // unterminated surrogate
1871 // trail surrogate without a lead
1874 // random byte sequences that Python 3's .decode('utf-16be')
1876 neg!(&[0x5b3d, 0x0141, 0xde9e, 0x8fdc, 0xc6e7],
1877 &[0xdf5a, 0x82a5, 0x62b9, 0xb447, 0x92f3],
1878 &[0xda4e, 0x42bc, 0x4462, 0xee98, 0xc2ca],
1879 &[0xbe00, 0xb04a, 0x6ecb, 0xdd89, 0xe278],
1880 &[0x0465, 0xab56, 0xdbb6, 0xa893, 0x665e],
1881 &[0x6b7f, 0x0a19, 0x40f4, 0xa657, 0xdcc5],
1882 &[0x9b50, 0xda5e, 0x24ec, 0x03ad, 0x6dee],
1883 &[0x8d17, 0xcaa7, 0xf4ae, 0xdf6e, 0xbed7],
1884 &[0xdaee, 0x2584, 0x7d30, 0xa626, 0x121a],
1885 &[0xd956, 0x4b43, 0x7570, 0xccd6, 0x4f4a],
1886 &[0x9dcf, 0x1b49, 0x4ba5, 0xfce9, 0xdffe],
1887 &[0x6572, 0xce53, 0xb05a, 0xf6af, 0xdacf],
1888 &[0x1b90, 0x728c, 0x9906, 0xdb68, 0xf46e],
1889 &[0x1606, 0xbeca, 0xbe76, 0x860f, 0xdfa5],
1890 &[0x8b4f, 0xde7a, 0xd220, 0x9fac, 0x2b6f],
1891 &[0xb8fe, 0xebbe, 0xda32, 0x1a5f, 0x8b8b],
1892 &[0x934b, 0x8956, 0xc434, 0x1881, 0xddf7],
1893 &[0x5a95, 0x13fc, 0xf116, 0xd89b, 0x93f9],
1894 &[0xd640, 0x71f1, 0xdd7d, 0x77eb, 0x1cd8],
1895 &[0x348b, 0xaef0, 0xdb2c, 0xebf1, 0x1282],
1896 &[0x50d7, 0xd824, 0x5010, 0xb369, 0x22ea]);
1900 fn test_as_bytes() {
1903 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
1904 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
1908 assert_eq!("".as_bytes(), b);
1909 assert_eq!("abc".as_bytes(), b"abc");
1910 assert_eq!("ศไทย中华Việt Nam".as_bytes(), v);
1915 fn test_as_bytes_fail() {
1916 // Don't double free. (I'm not sure if this exercises the
1917 // original problem code path anymore.)
1918 let s = String::from_str("");
1919 let _bytes = s.as_bytes();
1925 let buf = "hello".as_ptr();
1927 assert_eq!(*buf.offset(0), b'h');
1928 assert_eq!(*buf.offset(1), b'e');
1929 assert_eq!(*buf.offset(2), b'l');
1930 assert_eq!(*buf.offset(3), b'l');
1931 assert_eq!(*buf.offset(4), b'o');
1936 fn test_subslice_offset() {
1937 let a = "kernelsprite";
1938 let b = &a[7..a.len()];
1939 let c = &a[0..a.len() - 6];
1940 assert_eq!(a.subslice_offset(b), 7);
1941 assert_eq!(a.subslice_offset(c), 0);
1943 let string = "a\nb\nc";
1944 let lines: Vec<&str> = string.lines().collect();
1945 assert_eq!(string.subslice_offset(lines[0]), 0);
1946 assert_eq!(string.subslice_offset(lines[1]), 2);
1947 assert_eq!(string.subslice_offset(lines[2]), 4);
1952 fn test_subslice_offset_2() {
1953 let a = "alchemiter";
1954 let b = "cruxtruder";
1955 a.subslice_offset(b);
1959 fn vec_str_conversions() {
1960 let s1: String = String::from_str("All mimsy were the borogoves");
1962 let v: Vec<u8> = s1.as_bytes().to_vec();
1963 let s2: String = String::from_str(from_utf8(v.as_slice()).unwrap());
1964 let mut i: uint = 0u;
1965 let n1: uint = s1.len();
1966 let n2: uint = v.len();
1969 let a: u8 = s1.as_bytes()[i];
1970 let b: u8 = s2.as_bytes()[i];
1979 fn test_contains() {
1980 assert!("abcde".contains("bcd"));
1981 assert!("abcde".contains("abcd"));
1982 assert!("abcde".contains("bcde"));
1983 assert!("abcde".contains(""));
1984 assert!("".contains(""));
1985 assert!(!"abcde".contains("def"));
1986 assert!(!"".contains("a"));
1988 let data = "ประเทศไทย中华Việt Nam";
1989 assert!(data.contains("ประเ"));
1990 assert!(data.contains("ะเ"));
1991 assert!(data.contains("中华"));
1992 assert!(!data.contains("ไท华"));
1996 fn test_contains_char() {
1997 assert!("abc".contains_char('b'));
1998 assert!("a".contains_char('a'));
1999 assert!(!"abc".contains_char('d'));
2000 assert!(!"".contains_char('a'));
2005 let s = "ศไทย中华Việt Nam";
2006 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2009 assert!(s.char_at(pos) == *ch);
2010 pos += ch.to_string().len();
2015 fn test_char_at_reverse() {
2016 let s = "ศไทย中华Việt Nam";
2017 let v = vec!['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2018 let mut pos = s.len();
2019 for ch in v.iter().rev() {
2020 assert!(s.char_at_reverse(pos) == *ch);
2021 pos -= ch.to_string().len();
2026 fn test_escape_unicode() {
2027 assert_eq!("abc".escape_unicode(),
2028 String::from_str("\\u{61}\\u{62}\\u{63}"));
2029 assert_eq!("a c".escape_unicode(),
2030 String::from_str("\\u{61}\\u{20}\\u{63}"));
2031 assert_eq!("\r\n\t".escape_unicode(),
2032 String::from_str("\\u{d}\\u{a}\\u{9}"));
2033 assert_eq!("'\"\\".escape_unicode(),
2034 String::from_str("\\u{27}\\u{22}\\u{5c}"));
2035 assert_eq!("\x00\x01\u{fe}\u{ff}".escape_unicode(),
2036 String::from_str("\\u{0}\\u{1}\\u{fe}\\u{ff}"));
2037 assert_eq!("\u{100}\u{ffff}".escape_unicode(),
2038 String::from_str("\\u{100}\\u{ffff}"));
2039 assert_eq!("\u{10000}\u{10ffff}".escape_unicode(),
2040 String::from_str("\\u{10000}\\u{10ffff}"));
2041 assert_eq!("ab\u{fb00}".escape_unicode(),
2042 String::from_str("\\u{61}\\u{62}\\u{fb00}"));
2043 assert_eq!("\u{1d4ea}\r".escape_unicode(),
2044 String::from_str("\\u{1d4ea}\\u{d}"));
2048 fn test_escape_default() {
2049 assert_eq!("abc".escape_default(), String::from_str("abc"));
2050 assert_eq!("a c".escape_default(), String::from_str("a c"));
2051 assert_eq!("\r\n\t".escape_default(), String::from_str("\\r\\n\\t"));
2052 assert_eq!("'\"\\".escape_default(), String::from_str("\\'\\\"\\\\"));
2053 assert_eq!("\u{100}\u{ffff}".escape_default(),
2054 String::from_str("\\u{100}\\u{ffff}"));
2055 assert_eq!("\u{10000}\u{10ffff}".escape_default(),
2056 String::from_str("\\u{10000}\\u{10ffff}"));
2057 assert_eq!("ab\u{fb00}".escape_default(),
2058 String::from_str("ab\\u{fb00}"));
2059 assert_eq!("\u{1d4ea}\r".escape_default(),
2060 String::from_str("\\u{1d4ea}\\r"));
2064 fn test_total_ord() {
2065 "1234".cmp("123") == Greater;
2066 "123".cmp("1234") == Less;
2067 "1234".cmp("1234") == Equal;
2068 "12345555".cmp("123456") == Less;
2069 "22".cmp("1234") == Greater;
2073 fn test_char_range_at() {
2074 let data = "b¢€𤭢𤭢€¢b";
2075 assert_eq!('b', data.char_range_at(0).ch);
2076 assert_eq!('¢', data.char_range_at(1).ch);
2077 assert_eq!('€', data.char_range_at(3).ch);
2078 assert_eq!('𤭢', data.char_range_at(6).ch);
2079 assert_eq!('𤭢', data.char_range_at(10).ch);
2080 assert_eq!('€', data.char_range_at(14).ch);
2081 assert_eq!('¢', data.char_range_at(17).ch);
2082 assert_eq!('b', data.char_range_at(19).ch);
2086 fn test_char_range_at_reverse_underflow() {
2087 assert_eq!("abc".char_range_at_reverse(0).next, 0);
2091 fn test_iterator() {
2092 let s = "ศไทย中华Việt Nam";
2093 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2096 let mut it = s.chars();
2099 assert_eq!(c, v[pos]);
2102 assert_eq!(pos, v.len());
2106 fn test_rev_iterator() {
2107 let s = "ศไทย中华Việt Nam";
2108 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
2111 let mut it = s.chars().rev();
2114 assert_eq!(c, v[pos]);
2117 assert_eq!(pos, v.len());
2121 fn test_chars_decoding() {
2122 let mut bytes = [0u8; 4];
2123 for c in (0u32..0x110000).filter_map(|c| ::core::char::from_u32(c)) {
2124 let len = c.encode_utf8(&mut bytes).unwrap_or(0);
2125 let s = ::core::str::from_utf8(&bytes[..len]).unwrap();
2126 if Some(c) != s.chars().next() {
2127 panic!("character {:x}={} does not decode correctly", c as u32, c);
2133 fn test_chars_rev_decoding() {
2134 let mut bytes = [0u8; 4];
2135 for c in (0u32..0x110000).filter_map(|c| ::core::char::from_u32(c)) {
2136 let len = c.encode_utf8(&mut bytes).unwrap_or(0);
2137 let s = ::core::str::from_utf8(&bytes[..len]).unwrap();
2138 if Some(c) != s.chars().rev().next() {
2139 panic!("character {:x}={} does not decode correctly", c as u32, c);
2145 fn test_iterator_clone() {
2146 let s = "ศไทย中华Việt Nam";
2147 let mut it = s.chars();
2149 assert!(it.clone().zip(it).all(|(x,y)| x == y));
2153 fn test_bytesator() {
2154 let s = "ศไทย中华Việt Nam";
2156 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
2157 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
2162 for b in s.bytes() {
2163 assert_eq!(b, v[pos]);
2169 fn test_bytes_revator() {
2170 let s = "ศไทย中华Việt Nam";
2172 224, 184, 168, 224, 185, 132, 224, 184, 151, 224, 184, 162, 228,
2173 184, 173, 229, 141, 142, 86, 105, 225, 187, 135, 116, 32, 78, 97,
2176 let mut pos = v.len();
2178 for b in s.bytes().rev() {
2180 assert_eq!(b, v[pos]);
2185 fn test_char_indicesator() {
2186 let s = "ศไทย中华Việt Nam";
2187 let p = [0, 3, 6, 9, 12, 15, 18, 19, 20, 23, 24, 25, 26, 27];
2188 let v = ['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a','m'];
2191 let mut it = s.char_indices();
2194 assert_eq!(c, (p[pos], v[pos]));
2197 assert_eq!(pos, v.len());
2198 assert_eq!(pos, p.len());
2202 fn test_char_indices_revator() {
2203 let s = "ศไทย中华Việt Nam";
2204 let p = [27, 26, 25, 24, 23, 20, 19, 18, 15, 12, 9, 6, 3, 0];
2205 let v = ['m', 'a', 'N', ' ', 't', 'ệ','i','V','华','中','ย','ท','ไ','ศ'];
2208 let mut it = s.char_indices().rev();
2211 assert_eq!(c, (p[pos], v[pos]));
2214 assert_eq!(pos, v.len());
2215 assert_eq!(pos, p.len());
2219 fn test_splitn_char_iterator() {
2220 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
2222 let split: Vec<&str> = data.splitn(3, ' ').collect();
2223 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
2225 let split: Vec<&str> = data.splitn(3, |&: c: char| c == ' ').collect();
2226 assert_eq!(split, vec!["\nMäry", "häd", "ä", "little lämb\nLittle lämb\n"]);
2229 let split: Vec<&str> = data.splitn(3, 'ä').collect();
2230 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
2232 let split: Vec<&str> = data.splitn(3, |&: c: char| c == 'ä').collect();
2233 assert_eq!(split, vec!["\nM", "ry h", "d ", " little lämb\nLittle lämb\n"]);
2237 fn test_split_char_iterator_no_trailing() {
2238 let data = "\nMäry häd ä little lämb\nLittle lämb\n";
2240 let split: Vec<&str> = data.split('\n').collect();
2241 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb", ""]);
2243 let split: Vec<&str> = data.split_terminator('\n').collect();
2244 assert_eq!(split, vec!["", "Märy häd ä little lämb", "Little lämb"]);
2249 let data = "\n \tMäry häd\tä little lämb\nLittle lämb\n";
2250 let words: Vec<&str> = data.words().collect();
2251 assert_eq!(words, vec!["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
2255 fn test_nfd_chars() {
2257 ($input: expr, $expected: expr) => {
2258 assert_eq!($input.nfd_chars().collect::<String>(), $expected);
2262 t!("\u{1e0b}\u{1c4}", "d\u{307}\u{1c4}");
2263 t!("\u{2026}", "\u{2026}");
2264 t!("\u{2126}", "\u{3a9}");
2265 t!("\u{1e0b}\u{323}", "d\u{323}\u{307}");
2266 t!("\u{1e0d}\u{307}", "d\u{323}\u{307}");
2267 t!("a\u{301}", "a\u{301}");
2268 t!("\u{301}a", "\u{301}a");
2269 t!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}");
2270 t!("\u{ac1c}", "\u{1100}\u{1162}");
2274 fn test_nfkd_chars() {
2276 ($input: expr, $expected: expr) => {
2277 assert_eq!($input.nfkd_chars().collect::<String>(), $expected);
2281 t!("\u{1e0b}\u{1c4}", "d\u{307}DZ\u{30c}");
2282 t!("\u{2026}", "...");
2283 t!("\u{2126}", "\u{3a9}");
2284 t!("\u{1e0b}\u{323}", "d\u{323}\u{307}");
2285 t!("\u{1e0d}\u{307}", "d\u{323}\u{307}");
2286 t!("a\u{301}", "a\u{301}");
2287 t!("\u{301}a", "\u{301}a");
2288 t!("\u{d4db}", "\u{1111}\u{1171}\u{11b6}");
2289 t!("\u{ac1c}", "\u{1100}\u{1162}");
2293 fn test_nfc_chars() {
2295 ($input: expr, $expected: expr) => {
2296 assert_eq!($input.nfc_chars().collect::<String>(), $expected);
2300 t!("\u{1e0b}\u{1c4}", "\u{1e0b}\u{1c4}");
2301 t!("\u{2026}", "\u{2026}");
2302 t!("\u{2126}", "\u{3a9}");
2303 t!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}");
2304 t!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}");
2305 t!("a\u{301}", "\u{e1}");
2306 t!("\u{301}a", "\u{301}a");
2307 t!("\u{d4db}", "\u{d4db}");
2308 t!("\u{ac1c}", "\u{ac1c}");
2309 t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
2313 fn test_nfkc_chars() {
2315 ($input: expr, $expected: expr) => {
2316 assert_eq!($input.nfkc_chars().collect::<String>(), $expected);
2320 t!("\u{1e0b}\u{1c4}", "\u{1e0b}D\u{17d}");
2321 t!("\u{2026}", "...");
2322 t!("\u{2126}", "\u{3a9}");
2323 t!("\u{1e0b}\u{323}", "\u{1e0d}\u{307}");
2324 t!("\u{1e0d}\u{307}", "\u{1e0d}\u{307}");
2325 t!("a\u{301}", "\u{e1}");
2326 t!("\u{301}a", "\u{301}a");
2327 t!("\u{d4db}", "\u{d4db}");
2328 t!("\u{ac1c}", "\u{ac1c}");
2329 t!("a\u{300}\u{305}\u{315}\u{5ae}b", "\u{e0}\u{5ae}\u{305}\u{315}b");
2334 let data = "\nMäry häd ä little lämb\n\nLittle lämb\n";
2335 let lines: Vec<&str> = data.lines().collect();
2336 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2338 let data = "\nMäry häd ä little lämb\n\nLittle lämb"; // no trailing \n
2339 let lines: Vec<&str> = data.lines().collect();
2340 assert_eq!(lines, vec!["", "Märy häd ä little lämb", "", "Little lämb"]);
2344 fn test_graphemes() {
2345 use core::iter::order;
2346 // official Unicode test data
2347 // from http://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakTest.txt
2348 let test_same: [(_, &[_]); 325] = [
2349 ("\u{20}\u{20}", &["\u{20}", "\u{20}"]),
2350 ("\u{20}\u{308}\u{20}", &["\u{20}\u{308}", "\u{20}"]),
2351 ("\u{20}\u{D}", &["\u{20}", "\u{D}"]),
2352 ("\u{20}\u{308}\u{D}", &["\u{20}\u{308}", "\u{D}"]),
2353 ("\u{20}\u{A}", &["\u{20}", "\u{A}"]),
2354 ("\u{20}\u{308}\u{A}", &["\u{20}\u{308}", "\u{A}"]),
2355 ("\u{20}\u{1}", &["\u{20}", "\u{1}"]),
2356 ("\u{20}\u{308}\u{1}", &["\u{20}\u{308}", "\u{1}"]),
2357 ("\u{20}\u{300}", &["\u{20}\u{300}"]),
2358 ("\u{20}\u{308}\u{300}", &["\u{20}\u{308}\u{300}"]),
2359 ("\u{20}\u{1100}", &["\u{20}", "\u{1100}"]),
2360 ("\u{20}\u{308}\u{1100}", &["\u{20}\u{308}", "\u{1100}"]),
2361 ("\u{20}\u{1160}", &["\u{20}", "\u{1160}"]),
2362 ("\u{20}\u{308}\u{1160}", &["\u{20}\u{308}", "\u{1160}"]),
2363 ("\u{20}\u{11A8}", &["\u{20}", "\u{11A8}"]),
2364 ("\u{20}\u{308}\u{11A8}", &["\u{20}\u{308}", "\u{11A8}"]),
2365 ("\u{20}\u{AC00}", &["\u{20}", "\u{AC00}"]),
2366 ("\u{20}\u{308}\u{AC00}", &["\u{20}\u{308}", "\u{AC00}"]),
2367 ("\u{20}\u{AC01}", &["\u{20}", "\u{AC01}"]),
2368 ("\u{20}\u{308}\u{AC01}", &["\u{20}\u{308}", "\u{AC01}"]),
2369 ("\u{20}\u{1F1E6}", &["\u{20}", "\u{1F1E6}"]),
2370 ("\u{20}\u{308}\u{1F1E6}", &["\u{20}\u{308}", "\u{1F1E6}"]),
2371 ("\u{20}\u{378}", &["\u{20}", "\u{378}"]),
2372 ("\u{20}\u{308}\u{378}", &["\u{20}\u{308}", "\u{378}"]),
2373 ("\u{D}\u{20}", &["\u{D}", "\u{20}"]),
2374 ("\u{D}\u{308}\u{20}", &["\u{D}", "\u{308}", "\u{20}"]),
2375 ("\u{D}\u{D}", &["\u{D}", "\u{D}"]),
2376 ("\u{D}\u{308}\u{D}", &["\u{D}", "\u{308}", "\u{D}"]),
2377 ("\u{D}\u{A}", &["\u{D}\u{A}"]),
2378 ("\u{D}\u{308}\u{A}", &["\u{D}", "\u{308}", "\u{A}"]),
2379 ("\u{D}\u{1}", &["\u{D}", "\u{1}"]),
2380 ("\u{D}\u{308}\u{1}", &["\u{D}", "\u{308}", "\u{1}"]),
2381 ("\u{D}\u{300}", &["\u{D}", "\u{300}"]),
2382 ("\u{D}\u{308}\u{300}", &["\u{D}", "\u{308}\u{300}"]),
2383 ("\u{D}\u{903}", &["\u{D}", "\u{903}"]),
2384 ("\u{D}\u{1100}", &["\u{D}", "\u{1100}"]),
2385 ("\u{D}\u{308}\u{1100}", &["\u{D}", "\u{308}", "\u{1100}"]),
2386 ("\u{D}\u{1160}", &["\u{D}", "\u{1160}"]),
2387 ("\u{D}\u{308}\u{1160}", &["\u{D}", "\u{308}", "\u{1160}"]),
2388 ("\u{D}\u{11A8}", &["\u{D}", "\u{11A8}"]),
2389 ("\u{D}\u{308}\u{11A8}", &["\u{D}", "\u{308}", "\u{11A8}"]),
2390 ("\u{D}\u{AC00}", &["\u{D}", "\u{AC00}"]),
2391 ("\u{D}\u{308}\u{AC00}", &["\u{D}", "\u{308}", "\u{AC00}"]),
2392 ("\u{D}\u{AC01}", &["\u{D}", "\u{AC01}"]),
2393 ("\u{D}\u{308}\u{AC01}", &["\u{D}", "\u{308}", "\u{AC01}"]),
2394 ("\u{D}\u{1F1E6}", &["\u{D}", "\u{1F1E6}"]),
2395 ("\u{D}\u{308}\u{1F1E6}", &["\u{D}", "\u{308}", "\u{1F1E6}"]),
2396 ("\u{D}\u{378}", &["\u{D}", "\u{378}"]),
2397 ("\u{D}\u{308}\u{378}", &["\u{D}", "\u{308}", "\u{378}"]),
2398 ("\u{A}\u{20}", &["\u{A}", "\u{20}"]),
2399 ("\u{A}\u{308}\u{20}", &["\u{A}", "\u{308}", "\u{20}"]),
2400 ("\u{A}\u{D}", &["\u{A}", "\u{D}"]),
2401 ("\u{A}\u{308}\u{D}", &["\u{A}", "\u{308}", "\u{D}"]),
2402 ("\u{A}\u{A}", &["\u{A}", "\u{A}"]),
2403 ("\u{A}\u{308}\u{A}", &["\u{A}", "\u{308}", "\u{A}"]),
2404 ("\u{A}\u{1}", &["\u{A}", "\u{1}"]),
2405 ("\u{A}\u{308}\u{1}", &["\u{A}", "\u{308}", "\u{1}"]),
2406 ("\u{A}\u{300}", &["\u{A}", "\u{300}"]),
2407 ("\u{A}\u{308}\u{300}", &["\u{A}", "\u{308}\u{300}"]),
2408 ("\u{A}\u{903}", &["\u{A}", "\u{903}"]),
2409 ("\u{A}\u{1100}", &["\u{A}", "\u{1100}"]),
2410 ("\u{A}\u{308}\u{1100}", &["\u{A}", "\u{308}", "\u{1100}"]),
2411 ("\u{A}\u{1160}", &["\u{A}", "\u{1160}"]),
2412 ("\u{A}\u{308}\u{1160}", &["\u{A}", "\u{308}", "\u{1160}"]),
2413 ("\u{A}\u{11A8}", &["\u{A}", "\u{11A8}"]),
2414 ("\u{A}\u{308}\u{11A8}", &["\u{A}", "\u{308}", "\u{11A8}"]),
2415 ("\u{A}\u{AC00}", &["\u{A}", "\u{AC00}"]),
2416 ("\u{A}\u{308}\u{AC00}", &["\u{A}", "\u{308}", "\u{AC00}"]),
2417 ("\u{A}\u{AC01}", &["\u{A}", "\u{AC01}"]),
2418 ("\u{A}\u{308}\u{AC01}", &["\u{A}", "\u{308}", "\u{AC01}"]),
2419 ("\u{A}\u{1F1E6}", &["\u{A}", "\u{1F1E6}"]),
2420 ("\u{A}\u{308}\u{1F1E6}", &["\u{A}", "\u{308}", "\u{1F1E6}"]),
2421 ("\u{A}\u{378}", &["\u{A}", "\u{378}"]),
2422 ("\u{A}\u{308}\u{378}", &["\u{A}", "\u{308}", "\u{378}"]),
2423 ("\u{1}\u{20}", &["\u{1}", "\u{20}"]),
2424 ("\u{1}\u{308}\u{20}", &["\u{1}", "\u{308}", "\u{20}"]),
2425 ("\u{1}\u{D}", &["\u{1}", "\u{D}"]),
2426 ("\u{1}\u{308}\u{D}", &["\u{1}", "\u{308}", "\u{D}"]),
2427 ("\u{1}\u{A}", &["\u{1}", "\u{A}"]),
2428 ("\u{1}\u{308}\u{A}", &["\u{1}", "\u{308}", "\u{A}"]),
2429 ("\u{1}\u{1}", &["\u{1}", "\u{1}"]),
2430 ("\u{1}\u{308}\u{1}", &["\u{1}", "\u{308}", "\u{1}"]),
2431 ("\u{1}\u{300}", &["\u{1}", "\u{300}"]),
2432 ("\u{1}\u{308}\u{300}", &["\u{1}", "\u{308}\u{300}"]),
2433 ("\u{1}\u{903}", &["\u{1}", "\u{903}"]),
2434 ("\u{1}\u{1100}", &["\u{1}", "\u{1100}"]),
2435 ("\u{1}\u{308}\u{1100}", &["\u{1}", "\u{308}", "\u{1100}"]),
2436 ("\u{1}\u{1160}", &["\u{1}", "\u{1160}"]),
2437 ("\u{1}\u{308}\u{1160}", &["\u{1}", "\u{308}", "\u{1160}"]),
2438 ("\u{1}\u{11A8}", &["\u{1}", "\u{11A8}"]),
2439 ("\u{1}\u{308}\u{11A8}", &["\u{1}", "\u{308}", "\u{11A8}"]),
2440 ("\u{1}\u{AC00}", &["\u{1}", "\u{AC00}"]),
2441 ("\u{1}\u{308}\u{AC00}", &["\u{1}", "\u{308}", "\u{AC00}"]),
2442 ("\u{1}\u{AC01}", &["\u{1}", "\u{AC01}"]),
2443 ("\u{1}\u{308}\u{AC01}", &["\u{1}", "\u{308}", "\u{AC01}"]),
2444 ("\u{1}\u{1F1E6}", &["\u{1}", "\u{1F1E6}"]),
2445 ("\u{1}\u{308}\u{1F1E6}", &["\u{1}", "\u{308}", "\u{1F1E6}"]),
2446 ("\u{1}\u{378}", &["\u{1}", "\u{378}"]),
2447 ("\u{1}\u{308}\u{378}", &["\u{1}", "\u{308}", "\u{378}"]),
2448 ("\u{300}\u{20}", &["\u{300}", "\u{20}"]),
2449 ("\u{300}\u{308}\u{20}", &["\u{300}\u{308}", "\u{20}"]),
2450 ("\u{300}\u{D}", &["\u{300}", "\u{D}"]),
2451 ("\u{300}\u{308}\u{D}", &["\u{300}\u{308}", "\u{D}"]),
2452 ("\u{300}\u{A}", &["\u{300}", "\u{A}"]),
2453 ("\u{300}\u{308}\u{A}", &["\u{300}\u{308}", "\u{A}"]),
2454 ("\u{300}\u{1}", &["\u{300}", "\u{1}"]),
2455 ("\u{300}\u{308}\u{1}", &["\u{300}\u{308}", "\u{1}"]),
2456 ("\u{300}\u{300}", &["\u{300}\u{300}"]),
2457 ("\u{300}\u{308}\u{300}", &["\u{300}\u{308}\u{300}"]),
2458 ("\u{300}\u{1100}", &["\u{300}", "\u{1100}"]),
2459 ("\u{300}\u{308}\u{1100}", &["\u{300}\u{308}", "\u{1100}"]),
2460 ("\u{300}\u{1160}", &["\u{300}", "\u{1160}"]),
2461 ("\u{300}\u{308}\u{1160}", &["\u{300}\u{308}", "\u{1160}"]),
2462 ("\u{300}\u{11A8}", &["\u{300}", "\u{11A8}"]),
2463 ("\u{300}\u{308}\u{11A8}", &["\u{300}\u{308}", "\u{11A8}"]),
2464 ("\u{300}\u{AC00}", &["\u{300}", "\u{AC00}"]),
2465 ("\u{300}\u{308}\u{AC00}", &["\u{300}\u{308}", "\u{AC00}"]),
2466 ("\u{300}\u{AC01}", &["\u{300}", "\u{AC01}"]),
2467 ("\u{300}\u{308}\u{AC01}", &["\u{300}\u{308}", "\u{AC01}"]),
2468 ("\u{300}\u{1F1E6}", &["\u{300}", "\u{1F1E6}"]),
2469 ("\u{300}\u{308}\u{1F1E6}", &["\u{300}\u{308}", "\u{1F1E6}"]),
2470 ("\u{300}\u{378}", &["\u{300}", "\u{378}"]),
2471 ("\u{300}\u{308}\u{378}", &["\u{300}\u{308}", "\u{378}"]),
2472 ("\u{903}\u{20}", &["\u{903}", "\u{20}"]),
2473 ("\u{903}\u{308}\u{20}", &["\u{903}\u{308}", "\u{20}"]),
2474 ("\u{903}\u{D}", &["\u{903}", "\u{D}"]),
2475 ("\u{903}\u{308}\u{D}", &["\u{903}\u{308}", "\u{D}"]),
2476 ("\u{903}\u{A}", &["\u{903}", "\u{A}"]),
2477 ("\u{903}\u{308}\u{A}", &["\u{903}\u{308}", "\u{A}"]),
2478 ("\u{903}\u{1}", &["\u{903}", "\u{1}"]),
2479 ("\u{903}\u{308}\u{1}", &["\u{903}\u{308}", "\u{1}"]),
2480 ("\u{903}\u{300}", &["\u{903}\u{300}"]),
2481 ("\u{903}\u{308}\u{300}", &["\u{903}\u{308}\u{300}"]),
2482 ("\u{903}\u{1100}", &["\u{903}", "\u{1100}"]),
2483 ("\u{903}\u{308}\u{1100}", &["\u{903}\u{308}", "\u{1100}"]),
2484 ("\u{903}\u{1160}", &["\u{903}", "\u{1160}"]),
2485 ("\u{903}\u{308}\u{1160}", &["\u{903}\u{308}", "\u{1160}"]),
2486 ("\u{903}\u{11A8}", &["\u{903}", "\u{11A8}"]),
2487 ("\u{903}\u{308}\u{11A8}", &["\u{903}\u{308}", "\u{11A8}"]),
2488 ("\u{903}\u{AC00}", &["\u{903}", "\u{AC00}"]),
2489 ("\u{903}\u{308}\u{AC00}", &["\u{903}\u{308}", "\u{AC00}"]),
2490 ("\u{903}\u{AC01}", &["\u{903}", "\u{AC01}"]),
2491 ("\u{903}\u{308}\u{AC01}", &["\u{903}\u{308}", "\u{AC01}"]),
2492 ("\u{903}\u{1F1E6}", &["\u{903}", "\u{1F1E6}"]),
2493 ("\u{903}\u{308}\u{1F1E6}", &["\u{903}\u{308}", "\u{1F1E6}"]),
2494 ("\u{903}\u{378}", &["\u{903}", "\u{378}"]),
2495 ("\u{903}\u{308}\u{378}", &["\u{903}\u{308}", "\u{378}"]),
2496 ("\u{1100}\u{20}", &["\u{1100}", "\u{20}"]),
2497 ("\u{1100}\u{308}\u{20}", &["\u{1100}\u{308}", "\u{20}"]),
2498 ("\u{1100}\u{D}", &["\u{1100}", "\u{D}"]),
2499 ("\u{1100}\u{308}\u{D}", &["\u{1100}\u{308}", "\u{D}"]),
2500 ("\u{1100}\u{A}", &["\u{1100}", "\u{A}"]),
2501 ("\u{1100}\u{308}\u{A}", &["\u{1100}\u{308}", "\u{A}"]),
2502 ("\u{1100}\u{1}", &["\u{1100}", "\u{1}"]),
2503 ("\u{1100}\u{308}\u{1}", &["\u{1100}\u{308}", "\u{1}"]),
2504 ("\u{1100}\u{300}", &["\u{1100}\u{300}"]),
2505 ("\u{1100}\u{308}\u{300}", &["\u{1100}\u{308}\u{300}"]),
2506 ("\u{1100}\u{1100}", &["\u{1100}\u{1100}"]),
2507 ("\u{1100}\u{308}\u{1100}", &["\u{1100}\u{308}", "\u{1100}"]),
2508 ("\u{1100}\u{1160}", &["\u{1100}\u{1160}"]),
2509 ("\u{1100}\u{308}\u{1160}", &["\u{1100}\u{308}", "\u{1160}"]),
2510 ("\u{1100}\u{11A8}", &["\u{1100}", "\u{11A8}"]),
2511 ("\u{1100}\u{308}\u{11A8}", &["\u{1100}\u{308}", "\u{11A8}"]),
2512 ("\u{1100}\u{AC00}", &["\u{1100}\u{AC00}"]),
2513 ("\u{1100}\u{308}\u{AC00}", &["\u{1100}\u{308}", "\u{AC00}"]),
2514 ("\u{1100}\u{AC01}", &["\u{1100}\u{AC01}"]),
2515 ("\u{1100}\u{308}\u{AC01}", &["\u{1100}\u{308}", "\u{AC01}"]),
2516 ("\u{1100}\u{1F1E6}", &["\u{1100}", "\u{1F1E6}"]),
2517 ("\u{1100}\u{308}\u{1F1E6}", &["\u{1100}\u{308}", "\u{1F1E6}"]),
2518 ("\u{1100}\u{378}", &["\u{1100}", "\u{378}"]),
2519 ("\u{1100}\u{308}\u{378}", &["\u{1100}\u{308}", "\u{378}"]),
2520 ("\u{1160}\u{20}", &["\u{1160}", "\u{20}"]),
2521 ("\u{1160}\u{308}\u{20}", &["\u{1160}\u{308}", "\u{20}"]),
2522 ("\u{1160}\u{D}", &["\u{1160}", "\u{D}"]),
2523 ("\u{1160}\u{308}\u{D}", &["\u{1160}\u{308}", "\u{D}"]),
2524 ("\u{1160}\u{A}", &["\u{1160}", "\u{A}"]),
2525 ("\u{1160}\u{308}\u{A}", &["\u{1160}\u{308}", "\u{A}"]),
2526 ("\u{1160}\u{1}", &["\u{1160}", "\u{1}"]),
2527 ("\u{1160}\u{308}\u{1}", &["\u{1160}\u{308}", "\u{1}"]),
2528 ("\u{1160}\u{300}", &["\u{1160}\u{300}"]),
2529 ("\u{1160}\u{308}\u{300}", &["\u{1160}\u{308}\u{300}"]),
2530 ("\u{1160}\u{1100}", &["\u{1160}", "\u{1100}"]),
2531 ("\u{1160}\u{308}\u{1100}", &["\u{1160}\u{308}", "\u{1100}"]),
2532 ("\u{1160}\u{1160}", &["\u{1160}\u{1160}"]),
2533 ("\u{1160}\u{308}\u{1160}", &["\u{1160}\u{308}", "\u{1160}"]),
2534 ("\u{1160}\u{11A8}", &["\u{1160}\u{11A8}"]),
2535 ("\u{1160}\u{308}\u{11A8}", &["\u{1160}\u{308}", "\u{11A8}"]),
2536 ("\u{1160}\u{AC00}", &["\u{1160}", "\u{AC00}"]),
2537 ("\u{1160}\u{308}\u{AC00}", &["\u{1160}\u{308}", "\u{AC00}"]),
2538 ("\u{1160}\u{AC01}", &["\u{1160}", "\u{AC01}"]),
2539 ("\u{1160}\u{308}\u{AC01}", &["\u{1160}\u{308}", "\u{AC01}"]),
2540 ("\u{1160}\u{1F1E6}", &["\u{1160}", "\u{1F1E6}"]),
2541 ("\u{1160}\u{308}\u{1F1E6}", &["\u{1160}\u{308}", "\u{1F1E6}"]),
2542 ("\u{1160}\u{378}", &["\u{1160}", "\u{378}"]),
2543 ("\u{1160}\u{308}\u{378}", &["\u{1160}\u{308}", "\u{378}"]),
2544 ("\u{11A8}\u{20}", &["\u{11A8}", "\u{20}"]),
2545 ("\u{11A8}\u{308}\u{20}", &["\u{11A8}\u{308}", "\u{20}"]),
2546 ("\u{11A8}\u{D}", &["\u{11A8}", "\u{D}"]),
2547 ("\u{11A8}\u{308}\u{D}", &["\u{11A8}\u{308}", "\u{D}"]),
2548 ("\u{11A8}\u{A}", &["\u{11A8}", "\u{A}"]),
2549 ("\u{11A8}\u{308}\u{A}", &["\u{11A8}\u{308}", "\u{A}"]),
2550 ("\u{11A8}\u{1}", &["\u{11A8}", "\u{1}"]),
2551 ("\u{11A8}\u{308}\u{1}", &["\u{11A8}\u{308}", "\u{1}"]),
2552 ("\u{11A8}\u{300}", &["\u{11A8}\u{300}"]),
2553 ("\u{11A8}\u{308}\u{300}", &["\u{11A8}\u{308}\u{300}"]),
2554 ("\u{11A8}\u{1100}", &["\u{11A8}", "\u{1100}"]),
2555 ("\u{11A8}\u{308}\u{1100}", &["\u{11A8}\u{308}", "\u{1100}"]),
2556 ("\u{11A8}\u{1160}", &["\u{11A8}", "\u{1160}"]),
2557 ("\u{11A8}\u{308}\u{1160}", &["\u{11A8}\u{308}", "\u{1160}"]),
2558 ("\u{11A8}\u{11A8}", &["\u{11A8}\u{11A8}"]),
2559 ("\u{11A8}\u{308}\u{11A8}", &["\u{11A8}\u{308}", "\u{11A8}"]),
2560 ("\u{11A8}\u{AC00}", &["\u{11A8}", "\u{AC00}"]),
2561 ("\u{11A8}\u{308}\u{AC00}", &["\u{11A8}\u{308}", "\u{AC00}"]),
2562 ("\u{11A8}\u{AC01}", &["\u{11A8}", "\u{AC01}"]),
2563 ("\u{11A8}\u{308}\u{AC01}", &["\u{11A8}\u{308}", "\u{AC01}"]),
2564 ("\u{11A8}\u{1F1E6}", &["\u{11A8}", "\u{1F1E6}"]),
2565 ("\u{11A8}\u{308}\u{1F1E6}", &["\u{11A8}\u{308}", "\u{1F1E6}"]),
2566 ("\u{11A8}\u{378}", &["\u{11A8}", "\u{378}"]),
2567 ("\u{11A8}\u{308}\u{378}", &["\u{11A8}\u{308}", "\u{378}"]),
2568 ("\u{AC00}\u{20}", &["\u{AC00}", "\u{20}"]),
2569 ("\u{AC00}\u{308}\u{20}", &["\u{AC00}\u{308}", "\u{20}"]),
2570 ("\u{AC00}\u{D}", &["\u{AC00}", "\u{D}"]),
2571 ("\u{AC00}\u{308}\u{D}", &["\u{AC00}\u{308}", "\u{D}"]),
2572 ("\u{AC00}\u{A}", &["\u{AC00}", "\u{A}"]),
2573 ("\u{AC00}\u{308}\u{A}", &["\u{AC00}\u{308}", "\u{A}"]),
2574 ("\u{AC00}\u{1}", &["\u{AC00}", "\u{1}"]),
2575 ("\u{AC00}\u{308}\u{1}", &["\u{AC00}\u{308}", "\u{1}"]),
2576 ("\u{AC00}\u{300}", &["\u{AC00}\u{300}"]),
2577 ("\u{AC00}\u{308}\u{300}", &["\u{AC00}\u{308}\u{300}"]),
2578 ("\u{AC00}\u{1100}", &["\u{AC00}", "\u{1100}"]),
2579 ("\u{AC00}\u{308}\u{1100}", &["\u{AC00}\u{308}", "\u{1100}"]),
2580 ("\u{AC00}\u{1160}", &["\u{AC00}\u{1160}"]),
2581 ("\u{AC00}\u{308}\u{1160}", &["\u{AC00}\u{308}", "\u{1160}"]),
2582 ("\u{AC00}\u{11A8}", &["\u{AC00}\u{11A8}"]),
2583 ("\u{AC00}\u{308}\u{11A8}", &["\u{AC00}\u{308}", "\u{11A8}"]),
2584 ("\u{AC00}\u{AC00}", &["\u{AC00}", "\u{AC00}"]),
2585 ("\u{AC00}\u{308}\u{AC00}", &["\u{AC00}\u{308}", "\u{AC00}"]),
2586 ("\u{AC00}\u{AC01}", &["\u{AC00}", "\u{AC01}"]),
2587 ("\u{AC00}\u{308}\u{AC01}", &["\u{AC00}\u{308}", "\u{AC01}"]),
2588 ("\u{AC00}\u{1F1E6}", &["\u{AC00}", "\u{1F1E6}"]),
2589 ("\u{AC00}\u{308}\u{1F1E6}", &["\u{AC00}\u{308}", "\u{1F1E6}"]),
2590 ("\u{AC00}\u{378}", &["\u{AC00}", "\u{378}"]),
2591 ("\u{AC00}\u{308}\u{378}", &["\u{AC00}\u{308}", "\u{378}"]),
2592 ("\u{AC01}\u{20}", &["\u{AC01}", "\u{20}"]),
2593 ("\u{AC01}\u{308}\u{20}", &["\u{AC01}\u{308}", "\u{20}"]),
2594 ("\u{AC01}\u{D}", &["\u{AC01}", "\u{D}"]),
2595 ("\u{AC01}\u{308}\u{D}", &["\u{AC01}\u{308}", "\u{D}"]),
2596 ("\u{AC01}\u{A}", &["\u{AC01}", "\u{A}"]),
2597 ("\u{AC01}\u{308}\u{A}", &["\u{AC01}\u{308}", "\u{A}"]),
2598 ("\u{AC01}\u{1}", &["\u{AC01}", "\u{1}"]),
2599 ("\u{AC01}\u{308}\u{1}", &["\u{AC01}\u{308}", "\u{1}"]),
2600 ("\u{AC01}\u{300}", &["\u{AC01}\u{300}"]),
2601 ("\u{AC01}\u{308}\u{300}", &["\u{AC01}\u{308}\u{300}"]),
2602 ("\u{AC01}\u{1100}", &["\u{AC01}", "\u{1100}"]),
2603 ("\u{AC01}\u{308}\u{1100}", &["\u{AC01}\u{308}", "\u{1100}"]),
2604 ("\u{AC01}\u{1160}", &["\u{AC01}", "\u{1160}"]),
2605 ("\u{AC01}\u{308}\u{1160}", &["\u{AC01}\u{308}", "\u{1160}"]),
2606 ("\u{AC01}\u{11A8}", &["\u{AC01}\u{11A8}"]),
2607 ("\u{AC01}\u{308}\u{11A8}", &["\u{AC01}\u{308}", "\u{11A8}"]),
2608 ("\u{AC01}\u{AC00}", &["\u{AC01}", "\u{AC00}"]),
2609 ("\u{AC01}\u{308}\u{AC00}", &["\u{AC01}\u{308}", "\u{AC00}"]),
2610 ("\u{AC01}\u{AC01}", &["\u{AC01}", "\u{AC01}"]),
2611 ("\u{AC01}\u{308}\u{AC01}", &["\u{AC01}\u{308}", "\u{AC01}"]),
2612 ("\u{AC01}\u{1F1E6}", &["\u{AC01}", "\u{1F1E6}"]),
2613 ("\u{AC01}\u{308}\u{1F1E6}", &["\u{AC01}\u{308}", "\u{1F1E6}"]),
2614 ("\u{AC01}\u{378}", &["\u{AC01}", "\u{378}"]),
2615 ("\u{AC01}\u{308}\u{378}", &["\u{AC01}\u{308}", "\u{378}"]),
2616 ("\u{1F1E6}\u{20}", &["\u{1F1E6}", "\u{20}"]),
2617 ("\u{1F1E6}\u{308}\u{20}", &["\u{1F1E6}\u{308}", "\u{20}"]),
2618 ("\u{1F1E6}\u{D}", &["\u{1F1E6}", "\u{D}"]),
2619 ("\u{1F1E6}\u{308}\u{D}", &["\u{1F1E6}\u{308}", "\u{D}"]),
2620 ("\u{1F1E6}\u{A}", &["\u{1F1E6}", "\u{A}"]),
2621 ("\u{1F1E6}\u{308}\u{A}", &["\u{1F1E6}\u{308}", "\u{A}"]),
2622 ("\u{1F1E6}\u{1}", &["\u{1F1E6}", "\u{1}"]),
2623 ("\u{1F1E6}\u{308}\u{1}", &["\u{1F1E6}\u{308}", "\u{1}"]),
2624 ("\u{1F1E6}\u{300}", &["\u{1F1E6}\u{300}"]),
2625 ("\u{1F1E6}\u{308}\u{300}", &["\u{1F1E6}\u{308}\u{300}"]),
2626 ("\u{1F1E6}\u{1100}", &["\u{1F1E6}", "\u{1100}"]),
2627 ("\u{1F1E6}\u{308}\u{1100}", &["\u{1F1E6}\u{308}", "\u{1100}"]),
2628 ("\u{1F1E6}\u{1160}", &["\u{1F1E6}", "\u{1160}"]),
2629 ("\u{1F1E6}\u{308}\u{1160}", &["\u{1F1E6}\u{308}", "\u{1160}"]),
2630 ("\u{1F1E6}\u{11A8}", &["\u{1F1E6}", "\u{11A8}"]),
2631 ("\u{1F1E6}\u{308}\u{11A8}", &["\u{1F1E6}\u{308}", "\u{11A8}"]),
2632 ("\u{1F1E6}\u{AC00}", &["\u{1F1E6}", "\u{AC00}"]),
2633 ("\u{1F1E6}\u{308}\u{AC00}", &["\u{1F1E6}\u{308}", "\u{AC00}"]),
2634 ("\u{1F1E6}\u{AC01}", &["\u{1F1E6}", "\u{AC01}"]),
2635 ("\u{1F1E6}\u{308}\u{AC01}", &["\u{1F1E6}\u{308}", "\u{AC01}"]),
2636 ("\u{1F1E6}\u{1F1E6}", &["\u{1F1E6}\u{1F1E6}"]),
2637 ("\u{1F1E6}\u{308}\u{1F1E6}", &["\u{1F1E6}\u{308}", "\u{1F1E6}"]),
2638 ("\u{1F1E6}\u{378}", &["\u{1F1E6}", "\u{378}"]),
2639 ("\u{1F1E6}\u{308}\u{378}", &["\u{1F1E6}\u{308}", "\u{378}"]),
2640 ("\u{378}\u{20}", &["\u{378}", "\u{20}"]),
2641 ("\u{378}\u{308}\u{20}", &["\u{378}\u{308}", "\u{20}"]),
2642 ("\u{378}\u{D}", &["\u{378}", "\u{D}"]),
2643 ("\u{378}\u{308}\u{D}", &["\u{378}\u{308}", "\u{D}"]),
2644 ("\u{378}\u{A}", &["\u{378}", "\u{A}"]),
2645 ("\u{378}\u{308}\u{A}", &["\u{378}\u{308}", "\u{A}"]),
2646 ("\u{378}\u{1}", &["\u{378}", "\u{1}"]),
2647 ("\u{378}\u{308}\u{1}", &["\u{378}\u{308}", "\u{1}"]),
2648 ("\u{378}\u{300}", &["\u{378}\u{300}"]),
2649 ("\u{378}\u{308}\u{300}", &["\u{378}\u{308}\u{300}"]),
2650 ("\u{378}\u{1100}", &["\u{378}", "\u{1100}"]),
2651 ("\u{378}\u{308}\u{1100}", &["\u{378}\u{308}", "\u{1100}"]),
2652 ("\u{378}\u{1160}", &["\u{378}", "\u{1160}"]),
2653 ("\u{378}\u{308}\u{1160}", &["\u{378}\u{308}", "\u{1160}"]),
2654 ("\u{378}\u{11A8}", &["\u{378}", "\u{11A8}"]),
2655 ("\u{378}\u{308}\u{11A8}", &["\u{378}\u{308}", "\u{11A8}"]),
2656 ("\u{378}\u{AC00}", &["\u{378}", "\u{AC00}"]),
2657 ("\u{378}\u{308}\u{AC00}", &["\u{378}\u{308}", "\u{AC00}"]),
2658 ("\u{378}\u{AC01}", &["\u{378}", "\u{AC01}"]),
2659 ("\u{378}\u{308}\u{AC01}", &["\u{378}\u{308}", "\u{AC01}"]),
2660 ("\u{378}\u{1F1E6}", &["\u{378}", "\u{1F1E6}"]),
2661 ("\u{378}\u{308}\u{1F1E6}", &["\u{378}\u{308}", "\u{1F1E6}"]),
2662 ("\u{378}\u{378}", &["\u{378}", "\u{378}"]),
2663 ("\u{378}\u{308}\u{378}", &["\u{378}\u{308}", "\u{378}"]),
2664 ("\u{61}\u{1F1E6}\u{62}", &["\u{61}", "\u{1F1E6}", "\u{62}"]),
2665 ("\u{1F1F7}\u{1F1FA}", &["\u{1F1F7}\u{1F1FA}"]),
2666 ("\u{1F1F7}\u{1F1FA}\u{1F1F8}", &["\u{1F1F7}\u{1F1FA}\u{1F1F8}"]),
2667 ("\u{1F1F7}\u{1F1FA}\u{1F1F8}\u{1F1EA}",
2668 &["\u{1F1F7}\u{1F1FA}\u{1F1F8}\u{1F1EA}"]),
2669 ("\u{1F1F7}\u{1F1FA}\u{200B}\u{1F1F8}\u{1F1EA}",
2670 &["\u{1F1F7}\u{1F1FA}", "\u{200B}", "\u{1F1F8}\u{1F1EA}"]),
2671 ("\u{1F1E6}\u{1F1E7}\u{1F1E8}", &["\u{1F1E6}\u{1F1E7}\u{1F1E8}"]),
2672 ("\u{1F1E6}\u{200D}\u{1F1E7}\u{1F1E8}", &["\u{1F1E6}\u{200D}",
2673 "\u{1F1E7}\u{1F1E8}"]),
2674 ("\u{1F1E6}\u{1F1E7}\u{200D}\u{1F1E8}",
2675 &["\u{1F1E6}\u{1F1E7}\u{200D}", "\u{1F1E8}"]),
2676 ("\u{20}\u{200D}\u{646}", &["\u{20}\u{200D}", "\u{646}"]),
2677 ("\u{646}\u{200D}\u{20}", &["\u{646}\u{200D}", "\u{20}"]),
2680 let test_diff: [(_, &[_], &[_]); 23] = [
2681 ("\u{20}\u{903}", &["\u{20}\u{903}"], &["\u{20}", "\u{903}"]), ("\u{20}\u{308}\u{903}",
2682 &["\u{20}\u{308}\u{903}"], &["\u{20}\u{308}", "\u{903}"]), ("\u{D}\u{308}\u{903}",
2683 &["\u{D}", "\u{308}\u{903}"], &["\u{D}", "\u{308}", "\u{903}"]), ("\u{A}\u{308}\u{903}",
2684 &["\u{A}", "\u{308}\u{903}"], &["\u{A}", "\u{308}", "\u{903}"]), ("\u{1}\u{308}\u{903}",
2685 &["\u{1}", "\u{308}\u{903}"], &["\u{1}", "\u{308}", "\u{903}"]), ("\u{300}\u{903}",
2686 &["\u{300}\u{903}"], &["\u{300}", "\u{903}"]), ("\u{300}\u{308}\u{903}",
2687 &["\u{300}\u{308}\u{903}"], &["\u{300}\u{308}", "\u{903}"]), ("\u{903}\u{903}",
2688 &["\u{903}\u{903}"], &["\u{903}", "\u{903}"]), ("\u{903}\u{308}\u{903}",
2689 &["\u{903}\u{308}\u{903}"], &["\u{903}\u{308}", "\u{903}"]), ("\u{1100}\u{903}",
2690 &["\u{1100}\u{903}"], &["\u{1100}", "\u{903}"]), ("\u{1100}\u{308}\u{903}",
2691 &["\u{1100}\u{308}\u{903}"], &["\u{1100}\u{308}", "\u{903}"]), ("\u{1160}\u{903}",
2692 &["\u{1160}\u{903}"], &["\u{1160}", "\u{903}"]), ("\u{1160}\u{308}\u{903}",
2693 &["\u{1160}\u{308}\u{903}"], &["\u{1160}\u{308}", "\u{903}"]), ("\u{11A8}\u{903}",
2694 &["\u{11A8}\u{903}"], &["\u{11A8}", "\u{903}"]), ("\u{11A8}\u{308}\u{903}",
2695 &["\u{11A8}\u{308}\u{903}"], &["\u{11A8}\u{308}", "\u{903}"]), ("\u{AC00}\u{903}",
2696 &["\u{AC00}\u{903}"], &["\u{AC00}", "\u{903}"]), ("\u{AC00}\u{308}\u{903}",
2697 &["\u{AC00}\u{308}\u{903}"], &["\u{AC00}\u{308}", "\u{903}"]), ("\u{AC01}\u{903}",
2698 &["\u{AC01}\u{903}"], &["\u{AC01}", "\u{903}"]), ("\u{AC01}\u{308}\u{903}",
2699 &["\u{AC01}\u{308}\u{903}"], &["\u{AC01}\u{308}", "\u{903}"]), ("\u{1F1E6}\u{903}",
2700 &["\u{1F1E6}\u{903}"], &["\u{1F1E6}", "\u{903}"]), ("\u{1F1E6}\u{308}\u{903}",
2701 &["\u{1F1E6}\u{308}\u{903}"], &["\u{1F1E6}\u{308}", "\u{903}"]), ("\u{378}\u{903}",
2702 &["\u{378}\u{903}"], &["\u{378}", "\u{903}"]), ("\u{378}\u{308}\u{903}",
2703 &["\u{378}\u{308}\u{903}"], &["\u{378}\u{308}", "\u{903}"]),
2706 for &(s, g) in &test_same[] {
2707 // test forward iterator
2708 assert!(order::equals(s.graphemes(true), g.iter().map(|&x| x)));
2709 assert!(order::equals(s.graphemes(false), g.iter().map(|&x| x)));
2711 // test reverse iterator
2712 assert!(order::equals(s.graphemes(true).rev(), g.iter().rev().map(|&x| x)));
2713 assert!(order::equals(s.graphemes(false).rev(), g.iter().rev().map(|&x| x)));
2716 for &(s, gt, gf) in &test_diff {
2717 // test forward iterator
2718 assert!(order::equals(s.graphemes(true), gt.iter().map(|&x| x)));
2719 assert!(order::equals(s.graphemes(false), gf.iter().map(|&x| x)));
2721 // test reverse iterator
2722 assert!(order::equals(s.graphemes(true).rev(), gt.iter().rev().map(|&x| x)));
2723 assert!(order::equals(s.graphemes(false).rev(), gf.iter().rev().map(|&x| x)));
2726 // test the indices iterators
2727 let s = "a̐éö̲\r\n";
2728 let gr_inds = s.grapheme_indices(true).collect::<Vec<(uint, &str)>>();
2729 let b: &[_] = &[(0u, "a̐"), (3, "é"), (6, "ö̲"), (11, "\r\n")];
2730 assert_eq!(gr_inds, b);
2731 let gr_inds = s.grapheme_indices(true).rev().collect::<Vec<(uint, &str)>>();
2732 let b: &[_] = &[(11, "\r\n"), (6, "ö̲"), (3, "é"), (0u, "a̐")];
2733 assert_eq!(gr_inds, b);
2734 let mut gr_inds_iter = s.grapheme_indices(true);
2736 let gr_inds = gr_inds_iter.by_ref();
2737 let e1 = gr_inds.size_hint();
2738 assert_eq!(e1, (1, Some(13)));
2739 let c = gr_inds.count();
2742 let e2 = gr_inds_iter.size_hint();
2743 assert_eq!(e2, (0, Some(0)));
2745 // make sure the reverse iterator does the right thing with "\n" at beginning of string
2747 let gr = s.graphemes(true).rev().collect::<Vec<&str>>();
2748 let b: &[_] = &["\r", "\r\n", "\n"];
2753 fn test_split_strator() {
2754 fn t(s: &str, sep: &str, u: &[&str]) {
2755 let v: Vec<&str> = s.split_str(sep).collect();
2758 t("--1233345--", "12345", &["--1233345--"]);
2759 t("abc::hello::there", "::", &["abc", "hello", "there"]);
2760 t("::hello::there", "::", &["", "hello", "there"]);
2761 t("hello::there::", "::", &["hello", "there", ""]);
2762 t("::hello::there::", "::", &["", "hello", "there", ""]);
2763 t("ประเทศไทย中华Việt Nam", "中华", &["ประเทศไทย", "Việt Nam"]);
2764 t("zzXXXzzYYYzz", "zz", &["", "XXX", "YYY", ""]);
2765 t("zzXXXzYYYz", "XXX", &["zz", "zYYYz"]);
2766 t(".XXX.YYY.", ".", &["", "XXX", "YYY", ""]);
2768 t("zz", "zz", &["",""]);
2769 t("ok", "z", &["ok"]);
2770 t("zzz", "zz", &["","z"]);
2771 t("zzzzz", "zz", &["","","z"]);
2775 fn test_str_default() {
2776 use core::default::Default;
2777 fn t<S: Default + Str>() {
2778 let s: S = Default::default();
2779 assert_eq!(s.as_slice(), "");
2787 fn test_str_container() {
2788 fn sum_len(v: &[&str]) -> uint {
2789 v.iter().map(|x| x.len()).sum()
2792 let s = String::from_str("01234");
2793 assert_eq!(5, sum_len(&["012", "", "34"]));
2794 assert_eq!(5, sum_len(&[String::from_str("01").as_slice(),
2795 String::from_str("2").as_slice(),
2796 String::from_str("34").as_slice(),
2797 String::from_str("").as_slice()]));
2798 assert_eq!(5, sum_len(&[s.as_slice()]));
2802 fn test_str_from_utf8() {
2804 assert_eq!(from_utf8(xs), Ok("hello"));
2806 let xs = "ศไทย中华Việt Nam".as_bytes();
2807 assert_eq!(from_utf8(xs), Ok("ศไทย中华Việt Nam"));
2809 let xs = b"hello\xFF";
2810 assert_eq!(from_utf8(xs), Err(Utf8Error::TooShort));
2817 use prelude::{SliceExt, IteratorExt, SliceConcatExt};
2819 use test::black_box;
2822 fn char_iterator(b: &mut Bencher) {
2823 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2825 b.iter(|| s.chars().count());
2829 fn char_iterator_for(b: &mut Bencher) {
2830 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2833 for ch in s.chars() { black_box(ch); }
2838 fn char_iterator_ascii(b: &mut Bencher) {
2839 let s = "Mary had a little lamb, Little lamb
2840 Mary had a little lamb, Little lamb
2841 Mary had a little lamb, Little lamb
2842 Mary had a little lamb, Little lamb
2843 Mary had a little lamb, Little lamb
2844 Mary had a little lamb, Little lamb";
2846 b.iter(|| s.chars().count());
2850 fn char_iterator_rev(b: &mut Bencher) {
2851 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2853 b.iter(|| s.chars().rev().count());
2857 fn char_iterator_rev_for(b: &mut Bencher) {
2858 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2861 for ch in s.chars().rev() { black_box(ch); }
2866 fn char_indicesator(b: &mut Bencher) {
2867 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2868 let len = s.chars().count();
2870 b.iter(|| assert_eq!(s.char_indices().count(), len));
2874 fn char_indicesator_rev(b: &mut Bencher) {
2875 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2876 let len = s.chars().count();
2878 b.iter(|| assert_eq!(s.char_indices().rev().count(), len));
2882 fn split_unicode_ascii(b: &mut Bencher) {
2883 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2885 b.iter(|| assert_eq!(s.split('V').count(), 3));
2889 fn split_unicode_not_ascii(b: &mut Bencher) {
2890 struct NotAscii(char);
2891 impl CharEq for NotAscii {
2892 fn matches(&mut self, c: char) -> bool {
2893 let NotAscii(cc) = *self;
2896 fn only_ascii(&self) -> bool { false }
2898 let s = "ประเทศไทย中华Việt Namประเทศไทย中华Việt Nam";
2900 b.iter(|| assert_eq!(s.split(NotAscii('V')).count(), 3));
2905 fn split_ascii(b: &mut Bencher) {
2906 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2907 let len = s.split(' ').count();
2909 b.iter(|| assert_eq!(s.split(' ').count(), len));
2913 fn split_not_ascii(b: &mut Bencher) {
2914 struct NotAscii(char);
2915 impl CharEq for NotAscii {
2917 fn matches(&mut self, c: char) -> bool {
2918 let NotAscii(cc) = *self;
2921 fn only_ascii(&self) -> bool { false }
2923 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2924 let len = s.split(' ').count();
2926 b.iter(|| assert_eq!(s.split(NotAscii(' ')).count(), len));
2930 fn split_extern_fn(b: &mut Bencher) {
2931 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2932 let len = s.split(' ').count();
2933 fn pred(c: char) -> bool { c == ' ' }
2935 b.iter(|| assert_eq!(s.split(pred).count(), len));
2939 fn split_closure(b: &mut Bencher) {
2940 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2941 let len = s.split(' ').count();
2943 b.iter(|| assert_eq!(s.split(|&: c: char| c == ' ').count(), len));
2947 fn split_slice(b: &mut Bencher) {
2948 let s = "Mary had a little lamb, Little lamb, little-lamb.";
2949 let len = s.split(' ').count();
2951 let c: &[char] = &[' '];
2952 b.iter(|| assert_eq!(s.split(c).count(), len));
2956 fn bench_connect(b: &mut Bencher) {
2957 let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
2959 let v = vec![s, s, s, s, s, s, s, s, s, s];
2961 assert_eq!(v.connect(sep).len(), s.len() * 10 + sep.len() * 9);
2966 fn bench_contains_short_short(b: &mut Bencher) {
2967 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
2971 assert!(haystack.contains(needle));
2976 fn bench_contains_short_long(b: &mut Bencher) {
2978 Lorem ipsum dolor sit amet, consectetur adipiscing elit. Suspendisse quis lorem sit amet dolor \
2979 ultricies condimentum. Praesent iaculis purus elit, ac malesuada quam malesuada in. Duis sed orci \
2980 eros. Suspendisse sit amet magna mollis, mollis nunc luctus, imperdiet mi. Integer fringilla non \
2981 sem ut lacinia. Fusce varius tortor a risus porttitor hendrerit. Morbi mauris dui, ultricies nec \
2982 tempus vel, gravida nec quam.
2984 In est dui, tincidunt sed tempus interdum, adipiscing laoreet ante. Etiam tempor, tellus quis \
2985 sagittis interdum, nulla purus mattis sem, quis auctor erat odio ac tellus. In nec nunc sit amet \
2986 diam volutpat molestie at sed ipsum. Vestibulum laoreet consequat vulputate. Integer accumsan \
2987 lorem ac dignissim placerat. Suspendisse convallis faucibus lorem. Aliquam erat volutpat. In vel \
2988 eleifend felis. Sed suscipit nulla lorem, sed mollis est sollicitudin et. Nam fermentum egestas \
2989 interdum. Curabitur ut nisi justo.
2991 Sed sollicitudin ipsum tellus, ut condimentum leo eleifend nec. Cras ut velit ante. Phasellus nec \
2992 mollis odio. Mauris molestie erat in arcu mattis, at aliquet dolor vehicula. Quisque malesuada \
2993 lectus sit amet nisi pretium, a condimentum ipsum porta. Morbi at dapibus diam. Praesent egestas \
2994 est sed risus elementum, eu rutrum metus ultrices. Etiam fermentum consectetur magna, id rutrum \
2995 felis accumsan a. Aliquam ut pellentesque libero. Sed mi nulla, lobortis eu tortor id, suscipit \
2996 ultricies neque. Morbi iaculis sit amet risus at iaculis. Praesent eget ligula quis turpis \
2997 feugiat suscipit vel non arcu. Interdum et malesuada fames ac ante ipsum primis in faucibus. \
2998 Aliquam sit amet placerat lorem.
3000 Cras a lacus vel ante posuere elementum. Nunc est leo, bibendum ut facilisis vel, bibendum at \
3001 mauris. Nullam adipiscing diam vel odio ornare, luctus adipiscing mi luctus. Nulla facilisi. \
3002 Mauris adipiscing bibendum neque, quis adipiscing lectus tempus et. Sed feugiat erat et nisl \
3003 lobortis pharetra. Donec vitae erat enim. Nullam sit amet felis et quam lacinia tincidunt. Aliquam \
3004 suscipit dapibus urna. Sed volutpat urna in magna pulvinar volutpat. Phasellus nec tellus ac diam \
3007 Nam lectus enim, dapibus non nisi tempor, consectetur convallis massa. Maecenas eleifend dictum \
3008 feugiat. Etiam quis mauris vel risus luctus mattis a a nunc. Nullam orci quam, imperdiet id \
3009 vehicula in, porttitor ut nibh. Duis sagittis adipiscing nisl vitae congue. Donec mollis risus eu \
3010 leo suscipit, varius porttitor nulla porta. Pellentesque ut sem nec nisi euismod vehicula. Nulla \
3011 malesuada sollicitudin quam eu fermentum.";
3012 let needle = "english";
3015 assert!(!haystack.contains(needle));
3020 fn bench_contains_bad_naive(b: &mut Bencher) {
3021 let haystack = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
3022 let needle = "aaaaaaaab";
3025 assert!(!haystack.contains(needle));
3030 fn bench_contains_equal(b: &mut Bencher) {
3031 let haystack = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
3032 let needle = "Lorem ipsum dolor sit amet, consectetur adipiscing elit.";
3035 assert!(haystack.contains(needle));